1e8d8bef9SDimitry Andric //=== AArch64PostLegalizerLowering.cpp --------------------------*- C++ -*-===//
2e8d8bef9SDimitry Andric //
3e8d8bef9SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e8d8bef9SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5e8d8bef9SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6e8d8bef9SDimitry Andric //
7e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===//
8e8d8bef9SDimitry Andric ///
9e8d8bef9SDimitry Andric /// \file
10e8d8bef9SDimitry Andric /// Post-legalization lowering for instructions.
11e8d8bef9SDimitry Andric ///
12e8d8bef9SDimitry Andric /// This is used to offload pattern matching from the selector.
13e8d8bef9SDimitry Andric ///
14e8d8bef9SDimitry Andric /// For example, this combiner will notice that a G_SHUFFLE_VECTOR is actually
15e8d8bef9SDimitry Andric /// a G_ZIP, G_UZP, etc.
16e8d8bef9SDimitry Andric ///
17e8d8bef9SDimitry Andric /// General optimization combines should be handled by either the
18e8d8bef9SDimitry Andric /// AArch64PostLegalizerCombiner or the AArch64PreLegalizerCombiner.
19e8d8bef9SDimitry Andric ///
20e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===//
21e8d8bef9SDimitry Andric
22*0fca6ea1SDimitry Andric #include "AArch64ExpandImm.h"
23e8d8bef9SDimitry Andric #include "AArch64GlobalISelUtils.h"
24*0fca6ea1SDimitry Andric #include "AArch64PerfectShuffle.h"
25fe6060f1SDimitry Andric #include "AArch64Subtarget.h"
26fe6060f1SDimitry Andric #include "AArch64TargetMachine.h"
27fe6060f1SDimitry Andric #include "GISel/AArch64LegalizerInfo.h"
28e8d8bef9SDimitry Andric #include "MCTargetDesc/AArch64MCTargetDesc.h"
29fe6060f1SDimitry Andric #include "TargetInfo/AArch64TargetInfo.h"
30fe6060f1SDimitry Andric #include "Utils/AArch64BaseInfo.h"
31e8d8bef9SDimitry Andric #include "llvm/CodeGen/GlobalISel/Combiner.h"
32e8d8bef9SDimitry Andric #include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
33e8d8bef9SDimitry Andric #include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
3406c3fb27SDimitry Andric #include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
35bdd1243dSDimitry Andric #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
36bdd1243dSDimitry Andric #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
37bdd1243dSDimitry Andric #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
38e8d8bef9SDimitry Andric #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
39e8d8bef9SDimitry Andric #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
40e8d8bef9SDimitry Andric #include "llvm/CodeGen/GlobalISel/Utils.h"
41*0fca6ea1SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h"
42e8d8bef9SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
43e8d8bef9SDimitry Andric #include "llvm/CodeGen/MachineInstrBuilder.h"
44e8d8bef9SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h"
45e8d8bef9SDimitry Andric #include "llvm/CodeGen/TargetOpcodes.h"
46e8d8bef9SDimitry Andric #include "llvm/CodeGen/TargetPassConfig.h"
47fe6060f1SDimitry Andric #include "llvm/IR/InstrTypes.h"
48e8d8bef9SDimitry Andric #include "llvm/InitializePasses.h"
49e8d8bef9SDimitry Andric #include "llvm/Support/Debug.h"
50fe6060f1SDimitry Andric #include "llvm/Support/ErrorHandling.h"
51bdd1243dSDimitry Andric #include <optional>
52e8d8bef9SDimitry Andric
5306c3fb27SDimitry Andric #define GET_GICOMBINER_DEPS
5406c3fb27SDimitry Andric #include "AArch64GenPostLegalizeGILowering.inc"
5506c3fb27SDimitry Andric #undef GET_GICOMBINER_DEPS
5606c3fb27SDimitry Andric
57e8d8bef9SDimitry Andric #define DEBUG_TYPE "aarch64-postlegalizer-lowering"
58e8d8bef9SDimitry Andric
59e8d8bef9SDimitry Andric using namespace llvm;
60e8d8bef9SDimitry Andric using namespace MIPatternMatch;
61e8d8bef9SDimitry Andric using namespace AArch64GISelUtils;
62e8d8bef9SDimitry Andric
6306c3fb27SDimitry Andric namespace {
6406c3fb27SDimitry Andric
6506c3fb27SDimitry Andric #define GET_GICOMBINER_TYPES
6606c3fb27SDimitry Andric #include "AArch64GenPostLegalizeGILowering.inc"
6706c3fb27SDimitry Andric #undef GET_GICOMBINER_TYPES
6806c3fb27SDimitry Andric
69e8d8bef9SDimitry Andric /// Represents a pseudo instruction which replaces a G_SHUFFLE_VECTOR.
70e8d8bef9SDimitry Andric ///
71e8d8bef9SDimitry Andric /// Used for matching target-supported shuffles before codegen.
72e8d8bef9SDimitry Andric struct ShuffleVectorPseudo {
73e8d8bef9SDimitry Andric unsigned Opc; ///< Opcode for the instruction. (E.g. G_ZIP1)
74e8d8bef9SDimitry Andric Register Dst; ///< Destination register.
75e8d8bef9SDimitry Andric SmallVector<SrcOp, 2> SrcOps; ///< Source registers.
ShuffleVectorPseudo__anone40d29560111::ShuffleVectorPseudo76e8d8bef9SDimitry Andric ShuffleVectorPseudo(unsigned Opc, Register Dst,
77e8d8bef9SDimitry Andric std::initializer_list<SrcOp> SrcOps)
78e8d8bef9SDimitry Andric : Opc(Opc), Dst(Dst), SrcOps(SrcOps){};
7981ad6265SDimitry Andric ShuffleVectorPseudo() = default;
80e8d8bef9SDimitry Andric };
81e8d8bef9SDimitry Andric
82e8d8bef9SDimitry Andric /// Check if a G_EXT instruction can handle a shuffle mask \p M when the vector
83e8d8bef9SDimitry Andric /// sources of the shuffle are different.
getExtMask(ArrayRef<int> M,unsigned NumElts)8406c3fb27SDimitry Andric std::optional<std::pair<bool, uint64_t>> getExtMask(ArrayRef<int> M,
85e8d8bef9SDimitry Andric unsigned NumElts) {
86e8d8bef9SDimitry Andric // Look for the first non-undef element.
87e8d8bef9SDimitry Andric auto FirstRealElt = find_if(M, [](int Elt) { return Elt >= 0; });
88e8d8bef9SDimitry Andric if (FirstRealElt == M.end())
89bdd1243dSDimitry Andric return std::nullopt;
90e8d8bef9SDimitry Andric
91e8d8bef9SDimitry Andric // Use APInt to handle overflow when calculating expected element.
92e8d8bef9SDimitry Andric unsigned MaskBits = APInt(32, NumElts * 2).logBase2();
93e8d8bef9SDimitry Andric APInt ExpectedElt = APInt(MaskBits, *FirstRealElt + 1);
94e8d8bef9SDimitry Andric
95e8d8bef9SDimitry Andric // The following shuffle indices must be the successive elements after the
96e8d8bef9SDimitry Andric // first real element.
97e8d8bef9SDimitry Andric if (any_of(
98e8d8bef9SDimitry Andric make_range(std::next(FirstRealElt), M.end()),
99e8d8bef9SDimitry Andric [&ExpectedElt](int Elt) { return Elt != ExpectedElt++ && Elt >= 0; }))
100bdd1243dSDimitry Andric return std::nullopt;
101e8d8bef9SDimitry Andric
102e8d8bef9SDimitry Andric // The index of an EXT is the first element if it is not UNDEF.
103e8d8bef9SDimitry Andric // Watch out for the beginning UNDEFs. The EXT index should be the expected
104e8d8bef9SDimitry Andric // value of the first element. E.g.
105e8d8bef9SDimitry Andric // <-1, -1, 3, ...> is treated as <1, 2, 3, ...>.
106e8d8bef9SDimitry Andric // <-1, -1, 0, 1, ...> is treated as <2*NumElts-2, 2*NumElts-1, 0, 1, ...>.
107e8d8bef9SDimitry Andric // ExpectedElt is the last mask index plus 1.
108e8d8bef9SDimitry Andric uint64_t Imm = ExpectedElt.getZExtValue();
109e8d8bef9SDimitry Andric bool ReverseExt = false;
110e8d8bef9SDimitry Andric
111e8d8bef9SDimitry Andric // There are two difference cases requiring to reverse input vectors.
112e8d8bef9SDimitry Andric // For example, for vector <4 x i32> we have the following cases,
113e8d8bef9SDimitry Andric // Case 1: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, -1, 0>)
114e8d8bef9SDimitry Andric // Case 2: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, 7, 0>)
115e8d8bef9SDimitry Andric // For both cases, we finally use mask <5, 6, 7, 0>, which requires
116e8d8bef9SDimitry Andric // to reverse two input vectors.
117e8d8bef9SDimitry Andric if (Imm < NumElts)
118e8d8bef9SDimitry Andric ReverseExt = true;
119e8d8bef9SDimitry Andric else
120e8d8bef9SDimitry Andric Imm -= NumElts;
121e8d8bef9SDimitry Andric return std::make_pair(ReverseExt, Imm);
122e8d8bef9SDimitry Andric }
123e8d8bef9SDimitry Andric
124fe6060f1SDimitry Andric /// Helper function for matchINS.
125fe6060f1SDimitry Andric ///
126fe6060f1SDimitry Andric /// \returns a value when \p M is an ins mask for \p NumInputElements.
127fe6060f1SDimitry Andric ///
128fe6060f1SDimitry Andric /// First element of the returned pair is true when the produced
129fe6060f1SDimitry Andric /// G_INSERT_VECTOR_ELT destination should be the LHS of the G_SHUFFLE_VECTOR.
130fe6060f1SDimitry Andric ///
131fe6060f1SDimitry Andric /// Second element is the destination lane for the G_INSERT_VECTOR_ELT.
isINSMask(ArrayRef<int> M,int NumInputElements)13206c3fb27SDimitry Andric std::optional<std::pair<bool, int>> isINSMask(ArrayRef<int> M,
133fe6060f1SDimitry Andric int NumInputElements) {
134fe6060f1SDimitry Andric if (M.size() != static_cast<size_t>(NumInputElements))
135bdd1243dSDimitry Andric return std::nullopt;
136fe6060f1SDimitry Andric int NumLHSMatch = 0, NumRHSMatch = 0;
137fe6060f1SDimitry Andric int LastLHSMismatch = -1, LastRHSMismatch = -1;
138fe6060f1SDimitry Andric for (int Idx = 0; Idx < NumInputElements; ++Idx) {
139fe6060f1SDimitry Andric if (M[Idx] == -1) {
140fe6060f1SDimitry Andric ++NumLHSMatch;
141fe6060f1SDimitry Andric ++NumRHSMatch;
142fe6060f1SDimitry Andric continue;
143fe6060f1SDimitry Andric }
144fe6060f1SDimitry Andric M[Idx] == Idx ? ++NumLHSMatch : LastLHSMismatch = Idx;
145fe6060f1SDimitry Andric M[Idx] == Idx + NumInputElements ? ++NumRHSMatch : LastRHSMismatch = Idx;
146fe6060f1SDimitry Andric }
147fe6060f1SDimitry Andric const int NumNeededToMatch = NumInputElements - 1;
148fe6060f1SDimitry Andric if (NumLHSMatch == NumNeededToMatch)
149fe6060f1SDimitry Andric return std::make_pair(true, LastLHSMismatch);
150fe6060f1SDimitry Andric if (NumRHSMatch == NumNeededToMatch)
151fe6060f1SDimitry Andric return std::make_pair(false, LastRHSMismatch);
152bdd1243dSDimitry Andric return std::nullopt;
153fe6060f1SDimitry Andric }
154fe6060f1SDimitry Andric
155e8d8bef9SDimitry Andric /// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with a
156e8d8bef9SDimitry Andric /// G_REV instruction. Returns the appropriate G_REV opcode in \p Opc.
matchREV(MachineInstr & MI,MachineRegisterInfo & MRI,ShuffleVectorPseudo & MatchInfo)15706c3fb27SDimitry Andric bool matchREV(MachineInstr &MI, MachineRegisterInfo &MRI,
158e8d8bef9SDimitry Andric ShuffleVectorPseudo &MatchInfo) {
159e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
160e8d8bef9SDimitry Andric ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
161e8d8bef9SDimitry Andric Register Dst = MI.getOperand(0).getReg();
162e8d8bef9SDimitry Andric Register Src = MI.getOperand(1).getReg();
163e8d8bef9SDimitry Andric LLT Ty = MRI.getType(Dst);
164e8d8bef9SDimitry Andric unsigned EltSize = Ty.getScalarSizeInBits();
165e8d8bef9SDimitry Andric
166e8d8bef9SDimitry Andric // Element size for a rev cannot be 64.
167e8d8bef9SDimitry Andric if (EltSize == 64)
168e8d8bef9SDimitry Andric return false;
169e8d8bef9SDimitry Andric
170e8d8bef9SDimitry Andric unsigned NumElts = Ty.getNumElements();
171e8d8bef9SDimitry Andric
172*0fca6ea1SDimitry Andric // Try to produce a G_REV instruction
173*0fca6ea1SDimitry Andric for (unsigned LaneSize : {64U, 32U, 16U}) {
174*0fca6ea1SDimitry Andric if (isREVMask(ShuffleMask, EltSize, NumElts, LaneSize)) {
175*0fca6ea1SDimitry Andric unsigned Opcode;
176*0fca6ea1SDimitry Andric if (LaneSize == 64U)
177*0fca6ea1SDimitry Andric Opcode = AArch64::G_REV64;
178*0fca6ea1SDimitry Andric else if (LaneSize == 32U)
179*0fca6ea1SDimitry Andric Opcode = AArch64::G_REV32;
180*0fca6ea1SDimitry Andric else
181*0fca6ea1SDimitry Andric Opcode = AArch64::G_REV16;
182*0fca6ea1SDimitry Andric
183*0fca6ea1SDimitry Andric MatchInfo = ShuffleVectorPseudo(Opcode, Dst, {Src});
184e8d8bef9SDimitry Andric return true;
185e8d8bef9SDimitry Andric }
186*0fca6ea1SDimitry Andric }
187e8d8bef9SDimitry Andric
188e8d8bef9SDimitry Andric return false;
189e8d8bef9SDimitry Andric }
190e8d8bef9SDimitry Andric
191e8d8bef9SDimitry Andric /// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with
192e8d8bef9SDimitry Andric /// a G_TRN1 or G_TRN2 instruction.
matchTRN(MachineInstr & MI,MachineRegisterInfo & MRI,ShuffleVectorPseudo & MatchInfo)19306c3fb27SDimitry Andric bool matchTRN(MachineInstr &MI, MachineRegisterInfo &MRI,
194e8d8bef9SDimitry Andric ShuffleVectorPseudo &MatchInfo) {
195e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
196e8d8bef9SDimitry Andric unsigned WhichResult;
197e8d8bef9SDimitry Andric ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
198e8d8bef9SDimitry Andric Register Dst = MI.getOperand(0).getReg();
199e8d8bef9SDimitry Andric unsigned NumElts = MRI.getType(Dst).getNumElements();
200e8d8bef9SDimitry Andric if (!isTRNMask(ShuffleMask, NumElts, WhichResult))
201e8d8bef9SDimitry Andric return false;
202e8d8bef9SDimitry Andric unsigned Opc = (WhichResult == 0) ? AArch64::G_TRN1 : AArch64::G_TRN2;
203e8d8bef9SDimitry Andric Register V1 = MI.getOperand(1).getReg();
204e8d8bef9SDimitry Andric Register V2 = MI.getOperand(2).getReg();
205e8d8bef9SDimitry Andric MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});
206e8d8bef9SDimitry Andric return true;
207e8d8bef9SDimitry Andric }
208e8d8bef9SDimitry Andric
209e8d8bef9SDimitry Andric /// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with
210e8d8bef9SDimitry Andric /// a G_UZP1 or G_UZP2 instruction.
211e8d8bef9SDimitry Andric ///
212e8d8bef9SDimitry Andric /// \param [in] MI - The shuffle vector instruction.
213e8d8bef9SDimitry Andric /// \param [out] MatchInfo - Either G_UZP1 or G_UZP2 on success.
matchUZP(MachineInstr & MI,MachineRegisterInfo & MRI,ShuffleVectorPseudo & MatchInfo)21406c3fb27SDimitry Andric bool matchUZP(MachineInstr &MI, MachineRegisterInfo &MRI,
215e8d8bef9SDimitry Andric ShuffleVectorPseudo &MatchInfo) {
216e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
217e8d8bef9SDimitry Andric unsigned WhichResult;
218e8d8bef9SDimitry Andric ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
219e8d8bef9SDimitry Andric Register Dst = MI.getOperand(0).getReg();
220e8d8bef9SDimitry Andric unsigned NumElts = MRI.getType(Dst).getNumElements();
221e8d8bef9SDimitry Andric if (!isUZPMask(ShuffleMask, NumElts, WhichResult))
222e8d8bef9SDimitry Andric return false;
223e8d8bef9SDimitry Andric unsigned Opc = (WhichResult == 0) ? AArch64::G_UZP1 : AArch64::G_UZP2;
224e8d8bef9SDimitry Andric Register V1 = MI.getOperand(1).getReg();
225e8d8bef9SDimitry Andric Register V2 = MI.getOperand(2).getReg();
226e8d8bef9SDimitry Andric MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});
227e8d8bef9SDimitry Andric return true;
228e8d8bef9SDimitry Andric }
229e8d8bef9SDimitry Andric
matchZip(MachineInstr & MI,MachineRegisterInfo & MRI,ShuffleVectorPseudo & MatchInfo)23006c3fb27SDimitry Andric bool matchZip(MachineInstr &MI, MachineRegisterInfo &MRI,
231e8d8bef9SDimitry Andric ShuffleVectorPseudo &MatchInfo) {
232e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
233e8d8bef9SDimitry Andric unsigned WhichResult;
234e8d8bef9SDimitry Andric ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
235e8d8bef9SDimitry Andric Register Dst = MI.getOperand(0).getReg();
236e8d8bef9SDimitry Andric unsigned NumElts = MRI.getType(Dst).getNumElements();
237*0fca6ea1SDimitry Andric if (!isZIPMask(ShuffleMask, NumElts, WhichResult))
238e8d8bef9SDimitry Andric return false;
239e8d8bef9SDimitry Andric unsigned Opc = (WhichResult == 0) ? AArch64::G_ZIP1 : AArch64::G_ZIP2;
240e8d8bef9SDimitry Andric Register V1 = MI.getOperand(1).getReg();
241e8d8bef9SDimitry Andric Register V2 = MI.getOperand(2).getReg();
242e8d8bef9SDimitry Andric MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});
243e8d8bef9SDimitry Andric return true;
244e8d8bef9SDimitry Andric }
245e8d8bef9SDimitry Andric
246e8d8bef9SDimitry Andric /// Helper function for matchDup.
matchDupFromInsertVectorElt(int Lane,MachineInstr & MI,MachineRegisterInfo & MRI,ShuffleVectorPseudo & MatchInfo)24706c3fb27SDimitry Andric bool matchDupFromInsertVectorElt(int Lane, MachineInstr &MI,
248e8d8bef9SDimitry Andric MachineRegisterInfo &MRI,
249e8d8bef9SDimitry Andric ShuffleVectorPseudo &MatchInfo) {
250e8d8bef9SDimitry Andric if (Lane != 0)
251e8d8bef9SDimitry Andric return false;
252e8d8bef9SDimitry Andric
253e8d8bef9SDimitry Andric // Try to match a vector splat operation into a dup instruction.
254e8d8bef9SDimitry Andric // We're looking for this pattern:
255e8d8bef9SDimitry Andric //
256e8d8bef9SDimitry Andric // %scalar:gpr(s64) = COPY $x0
257e8d8bef9SDimitry Andric // %undef:fpr(<2 x s64>) = G_IMPLICIT_DEF
258e8d8bef9SDimitry Andric // %cst0:gpr(s32) = G_CONSTANT i32 0
259e8d8bef9SDimitry Andric // %zerovec:fpr(<2 x s32>) = G_BUILD_VECTOR %cst0(s32), %cst0(s32)
260e8d8bef9SDimitry Andric // %ins:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %undef, %scalar(s64), %cst0(s32)
26106c3fb27SDimitry Andric // %splat:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %ins(<2 x s64>), %undef,
26206c3fb27SDimitry Andric // %zerovec(<2 x s32>)
263e8d8bef9SDimitry Andric //
264e8d8bef9SDimitry Andric // ...into:
265e8d8bef9SDimitry Andric // %splat = G_DUP %scalar
266e8d8bef9SDimitry Andric
267e8d8bef9SDimitry Andric // Begin matching the insert.
268e8d8bef9SDimitry Andric auto *InsMI = getOpcodeDef(TargetOpcode::G_INSERT_VECTOR_ELT,
269e8d8bef9SDimitry Andric MI.getOperand(1).getReg(), MRI);
270e8d8bef9SDimitry Andric if (!InsMI)
271e8d8bef9SDimitry Andric return false;
272e8d8bef9SDimitry Andric // Match the undef vector operand.
273e8d8bef9SDimitry Andric if (!getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, InsMI->getOperand(1).getReg(),
274e8d8bef9SDimitry Andric MRI))
275e8d8bef9SDimitry Andric return false;
276e8d8bef9SDimitry Andric
277e8d8bef9SDimitry Andric // Match the index constant 0.
278e8d8bef9SDimitry Andric if (!mi_match(InsMI->getOperand(3).getReg(), MRI, m_ZeroInt()))
279e8d8bef9SDimitry Andric return false;
280e8d8bef9SDimitry Andric
281e8d8bef9SDimitry Andric MatchInfo = ShuffleVectorPseudo(AArch64::G_DUP, MI.getOperand(0).getReg(),
282e8d8bef9SDimitry Andric {InsMI->getOperand(2).getReg()});
283e8d8bef9SDimitry Andric return true;
284e8d8bef9SDimitry Andric }
285e8d8bef9SDimitry Andric
286e8d8bef9SDimitry Andric /// Helper function for matchDup.
matchDupFromBuildVector(int Lane,MachineInstr & MI,MachineRegisterInfo & MRI,ShuffleVectorPseudo & MatchInfo)28706c3fb27SDimitry Andric bool matchDupFromBuildVector(int Lane, MachineInstr &MI,
288e8d8bef9SDimitry Andric MachineRegisterInfo &MRI,
289e8d8bef9SDimitry Andric ShuffleVectorPseudo &MatchInfo) {
290e8d8bef9SDimitry Andric assert(Lane >= 0 && "Expected positive lane?");
291e8d8bef9SDimitry Andric // Test if the LHS is a BUILD_VECTOR. If it is, then we can just reference the
292e8d8bef9SDimitry Andric // lane's definition directly.
293e8d8bef9SDimitry Andric auto *BuildVecMI = getOpcodeDef(TargetOpcode::G_BUILD_VECTOR,
294e8d8bef9SDimitry Andric MI.getOperand(1).getReg(), MRI);
295e8d8bef9SDimitry Andric if (!BuildVecMI)
296e8d8bef9SDimitry Andric return false;
297e8d8bef9SDimitry Andric Register Reg = BuildVecMI->getOperand(Lane + 1).getReg();
298e8d8bef9SDimitry Andric MatchInfo =
299e8d8bef9SDimitry Andric ShuffleVectorPseudo(AArch64::G_DUP, MI.getOperand(0).getReg(), {Reg});
300e8d8bef9SDimitry Andric return true;
301e8d8bef9SDimitry Andric }
302e8d8bef9SDimitry Andric
matchDup(MachineInstr & MI,MachineRegisterInfo & MRI,ShuffleVectorPseudo & MatchInfo)30306c3fb27SDimitry Andric bool matchDup(MachineInstr &MI, MachineRegisterInfo &MRI,
304e8d8bef9SDimitry Andric ShuffleVectorPseudo &MatchInfo) {
305e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
306e8d8bef9SDimitry Andric auto MaybeLane = getSplatIndex(MI);
307e8d8bef9SDimitry Andric if (!MaybeLane)
308e8d8bef9SDimitry Andric return false;
309e8d8bef9SDimitry Andric int Lane = *MaybeLane;
310e8d8bef9SDimitry Andric // If this is undef splat, generate it via "just" vdup, if possible.
311e8d8bef9SDimitry Andric if (Lane < 0)
312e8d8bef9SDimitry Andric Lane = 0;
313e8d8bef9SDimitry Andric if (matchDupFromInsertVectorElt(Lane, MI, MRI, MatchInfo))
314e8d8bef9SDimitry Andric return true;
315e8d8bef9SDimitry Andric if (matchDupFromBuildVector(Lane, MI, MRI, MatchInfo))
316e8d8bef9SDimitry Andric return true;
317e8d8bef9SDimitry Andric return false;
318e8d8bef9SDimitry Andric }
319e8d8bef9SDimitry Andric
320bdd1243dSDimitry Andric // Check if an EXT instruction can handle the shuffle mask when the vector
321bdd1243dSDimitry Andric // sources of the shuffle are the same.
isSingletonExtMask(ArrayRef<int> M,LLT Ty)32206c3fb27SDimitry Andric bool isSingletonExtMask(ArrayRef<int> M, LLT Ty) {
323bdd1243dSDimitry Andric unsigned NumElts = Ty.getNumElements();
324bdd1243dSDimitry Andric
325bdd1243dSDimitry Andric // Assume that the first shuffle index is not UNDEF. Fail if it is.
326bdd1243dSDimitry Andric if (M[0] < 0)
327bdd1243dSDimitry Andric return false;
328bdd1243dSDimitry Andric
329bdd1243dSDimitry Andric // If this is a VEXT shuffle, the immediate value is the index of the first
330bdd1243dSDimitry Andric // element. The other shuffle indices must be the successive elements after
331bdd1243dSDimitry Andric // the first one.
332bdd1243dSDimitry Andric unsigned ExpectedElt = M[0];
333bdd1243dSDimitry Andric for (unsigned I = 1; I < NumElts; ++I) {
334bdd1243dSDimitry Andric // Increment the expected index. If it wraps around, just follow it
335bdd1243dSDimitry Andric // back to index zero and keep going.
336bdd1243dSDimitry Andric ++ExpectedElt;
337bdd1243dSDimitry Andric if (ExpectedElt == NumElts)
338bdd1243dSDimitry Andric ExpectedElt = 0;
339bdd1243dSDimitry Andric
340bdd1243dSDimitry Andric if (M[I] < 0)
341bdd1243dSDimitry Andric continue; // Ignore UNDEF indices.
342bdd1243dSDimitry Andric if (ExpectedElt != static_cast<unsigned>(M[I]))
343bdd1243dSDimitry Andric return false;
344bdd1243dSDimitry Andric }
345bdd1243dSDimitry Andric
346bdd1243dSDimitry Andric return true;
347bdd1243dSDimitry Andric }
348bdd1243dSDimitry Andric
matchEXT(MachineInstr & MI,MachineRegisterInfo & MRI,ShuffleVectorPseudo & MatchInfo)34906c3fb27SDimitry Andric bool matchEXT(MachineInstr &MI, MachineRegisterInfo &MRI,
350e8d8bef9SDimitry Andric ShuffleVectorPseudo &MatchInfo) {
351e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
352e8d8bef9SDimitry Andric Register Dst = MI.getOperand(0).getReg();
353bdd1243dSDimitry Andric LLT DstTy = MRI.getType(Dst);
354e8d8bef9SDimitry Andric Register V1 = MI.getOperand(1).getReg();
355e8d8bef9SDimitry Andric Register V2 = MI.getOperand(2).getReg();
356bdd1243dSDimitry Andric auto Mask = MI.getOperand(3).getShuffleMask();
357bdd1243dSDimitry Andric uint64_t Imm;
358bdd1243dSDimitry Andric auto ExtInfo = getExtMask(Mask, DstTy.getNumElements());
359bdd1243dSDimitry Andric uint64_t ExtFactor = MRI.getType(V1).getScalarSizeInBits() / 8;
360bdd1243dSDimitry Andric
361bdd1243dSDimitry Andric if (!ExtInfo) {
362bdd1243dSDimitry Andric if (!getOpcodeDef<GImplicitDef>(V2, MRI) ||
363bdd1243dSDimitry Andric !isSingletonExtMask(Mask, DstTy))
364bdd1243dSDimitry Andric return false;
365bdd1243dSDimitry Andric
366bdd1243dSDimitry Andric Imm = Mask[0] * ExtFactor;
367bdd1243dSDimitry Andric MatchInfo = ShuffleVectorPseudo(AArch64::G_EXT, Dst, {V1, V1, Imm});
368bdd1243dSDimitry Andric return true;
369bdd1243dSDimitry Andric }
370bdd1243dSDimitry Andric bool ReverseExt;
371bdd1243dSDimitry Andric std::tie(ReverseExt, Imm) = *ExtInfo;
372e8d8bef9SDimitry Andric if (ReverseExt)
373e8d8bef9SDimitry Andric std::swap(V1, V2);
374e8d8bef9SDimitry Andric Imm *= ExtFactor;
375e8d8bef9SDimitry Andric MatchInfo = ShuffleVectorPseudo(AArch64::G_EXT, Dst, {V1, V2, Imm});
376e8d8bef9SDimitry Andric return true;
377e8d8bef9SDimitry Andric }
378e8d8bef9SDimitry Andric
379e8d8bef9SDimitry Andric /// Replace a G_SHUFFLE_VECTOR instruction with a pseudo.
380e8d8bef9SDimitry Andric /// \p Opc is the opcode to use. \p MI is the G_SHUFFLE_VECTOR.
applyShuffleVectorPseudo(MachineInstr & MI,ShuffleVectorPseudo & MatchInfo)38106c3fb27SDimitry Andric void applyShuffleVectorPseudo(MachineInstr &MI,
382e8d8bef9SDimitry Andric ShuffleVectorPseudo &MatchInfo) {
383e8d8bef9SDimitry Andric MachineIRBuilder MIRBuilder(MI);
384e8d8bef9SDimitry Andric MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst}, MatchInfo.SrcOps);
385e8d8bef9SDimitry Andric MI.eraseFromParent();
386e8d8bef9SDimitry Andric }
387e8d8bef9SDimitry Andric
388e8d8bef9SDimitry Andric /// Replace a G_SHUFFLE_VECTOR instruction with G_EXT.
389e8d8bef9SDimitry Andric /// Special-cased because the constant operand must be emitted as a G_CONSTANT
390e8d8bef9SDimitry Andric /// for the imported tablegen patterns to work.
applyEXT(MachineInstr & MI,ShuffleVectorPseudo & MatchInfo)39106c3fb27SDimitry Andric void applyEXT(MachineInstr &MI, ShuffleVectorPseudo &MatchInfo) {
392e8d8bef9SDimitry Andric MachineIRBuilder MIRBuilder(MI);
3935f757f3fSDimitry Andric if (MatchInfo.SrcOps[2].getImm() == 0)
3945f757f3fSDimitry Andric MIRBuilder.buildCopy(MatchInfo.Dst, MatchInfo.SrcOps[0]);
3955f757f3fSDimitry Andric else {
396e8d8bef9SDimitry Andric // Tablegen patterns expect an i32 G_CONSTANT as the final op.
397e8d8bef9SDimitry Andric auto Cst =
398e8d8bef9SDimitry Andric MIRBuilder.buildConstant(LLT::scalar(32), MatchInfo.SrcOps[2].getImm());
399e8d8bef9SDimitry Andric MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst},
400e8d8bef9SDimitry Andric {MatchInfo.SrcOps[0], MatchInfo.SrcOps[1], Cst});
4015f757f3fSDimitry Andric }
402e8d8bef9SDimitry Andric MI.eraseFromParent();
403e8d8bef9SDimitry Andric }
404e8d8bef9SDimitry Andric
matchNonConstInsert(MachineInstr & MI,MachineRegisterInfo & MRI)405*0fca6ea1SDimitry Andric bool matchNonConstInsert(MachineInstr &MI, MachineRegisterInfo &MRI) {
406*0fca6ea1SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);
407*0fca6ea1SDimitry Andric
408*0fca6ea1SDimitry Andric auto ValAndVReg =
409*0fca6ea1SDimitry Andric getIConstantVRegValWithLookThrough(MI.getOperand(3).getReg(), MRI);
410*0fca6ea1SDimitry Andric return !ValAndVReg;
411*0fca6ea1SDimitry Andric }
412*0fca6ea1SDimitry Andric
applyNonConstInsert(MachineInstr & MI,MachineRegisterInfo & MRI,MachineIRBuilder & Builder)413*0fca6ea1SDimitry Andric void applyNonConstInsert(MachineInstr &MI, MachineRegisterInfo &MRI,
414*0fca6ea1SDimitry Andric MachineIRBuilder &Builder) {
415*0fca6ea1SDimitry Andric auto &Insert = cast<GInsertVectorElement>(MI);
416*0fca6ea1SDimitry Andric Builder.setInstrAndDebugLoc(Insert);
417*0fca6ea1SDimitry Andric
418*0fca6ea1SDimitry Andric Register Offset = Insert.getIndexReg();
419*0fca6ea1SDimitry Andric LLT VecTy = MRI.getType(Insert.getReg(0));
420*0fca6ea1SDimitry Andric LLT EltTy = MRI.getType(Insert.getElementReg());
421*0fca6ea1SDimitry Andric LLT IdxTy = MRI.getType(Insert.getIndexReg());
422*0fca6ea1SDimitry Andric
423*0fca6ea1SDimitry Andric // Create a stack slot and store the vector into it
424*0fca6ea1SDimitry Andric MachineFunction &MF = Builder.getMF();
425*0fca6ea1SDimitry Andric Align Alignment(
426*0fca6ea1SDimitry Andric std::min<uint64_t>(VecTy.getSizeInBytes().getKnownMinValue(), 16));
427*0fca6ea1SDimitry Andric int FrameIdx = MF.getFrameInfo().CreateStackObject(VecTy.getSizeInBytes(),
428*0fca6ea1SDimitry Andric Alignment, false);
429*0fca6ea1SDimitry Andric LLT FramePtrTy = LLT::pointer(0, 64);
430*0fca6ea1SDimitry Andric MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIdx);
431*0fca6ea1SDimitry Andric auto StackTemp = Builder.buildFrameIndex(FramePtrTy, FrameIdx);
432*0fca6ea1SDimitry Andric
433*0fca6ea1SDimitry Andric Builder.buildStore(Insert.getOperand(1), StackTemp, PtrInfo, Align(8));
434*0fca6ea1SDimitry Andric
435*0fca6ea1SDimitry Andric // Get the pointer to the element, and be sure not to hit undefined behavior
436*0fca6ea1SDimitry Andric // if the index is out of bounds.
437*0fca6ea1SDimitry Andric assert(isPowerOf2_64(VecTy.getNumElements()) &&
438*0fca6ea1SDimitry Andric "Expected a power-2 vector size");
439*0fca6ea1SDimitry Andric auto Mask = Builder.buildConstant(IdxTy, VecTy.getNumElements() - 1);
440*0fca6ea1SDimitry Andric Register And = Builder.buildAnd(IdxTy, Offset, Mask).getReg(0);
441*0fca6ea1SDimitry Andric auto EltSize = Builder.buildConstant(IdxTy, EltTy.getSizeInBytes());
442*0fca6ea1SDimitry Andric Register Mul = Builder.buildMul(IdxTy, And, EltSize).getReg(0);
443*0fca6ea1SDimitry Andric Register EltPtr =
444*0fca6ea1SDimitry Andric Builder.buildPtrAdd(MRI.getType(StackTemp.getReg(0)), StackTemp, Mul)
445*0fca6ea1SDimitry Andric .getReg(0);
446*0fca6ea1SDimitry Andric
447*0fca6ea1SDimitry Andric // Write the inserted element
448*0fca6ea1SDimitry Andric Builder.buildStore(Insert.getElementReg(), EltPtr, PtrInfo, Align(1));
449*0fca6ea1SDimitry Andric // Reload the whole vector.
450*0fca6ea1SDimitry Andric Builder.buildLoad(Insert.getReg(0), StackTemp, PtrInfo, Align(8));
451*0fca6ea1SDimitry Andric Insert.eraseFromParent();
452*0fca6ea1SDimitry Andric }
453*0fca6ea1SDimitry Andric
454fe6060f1SDimitry Andric /// Match a G_SHUFFLE_VECTOR with a mask which corresponds to a
455fe6060f1SDimitry Andric /// G_INSERT_VECTOR_ELT and G_EXTRACT_VECTOR_ELT pair.
456fe6060f1SDimitry Andric ///
457fe6060f1SDimitry Andric /// e.g.
458fe6060f1SDimitry Andric /// %shuf = G_SHUFFLE_VECTOR %left, %right, shufflemask(0, 0)
459fe6060f1SDimitry Andric ///
460fe6060f1SDimitry Andric /// Can be represented as
461fe6060f1SDimitry Andric ///
462fe6060f1SDimitry Andric /// %extract = G_EXTRACT_VECTOR_ELT %left, 0
463fe6060f1SDimitry Andric /// %ins = G_INSERT_VECTOR_ELT %left, %extract, 1
464fe6060f1SDimitry Andric ///
matchINS(MachineInstr & MI,MachineRegisterInfo & MRI,std::tuple<Register,int,Register,int> & MatchInfo)46506c3fb27SDimitry Andric bool matchINS(MachineInstr &MI, MachineRegisterInfo &MRI,
466fe6060f1SDimitry Andric std::tuple<Register, int, Register, int> &MatchInfo) {
467fe6060f1SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
468fe6060f1SDimitry Andric ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
469fe6060f1SDimitry Andric Register Dst = MI.getOperand(0).getReg();
470fe6060f1SDimitry Andric int NumElts = MRI.getType(Dst).getNumElements();
471fe6060f1SDimitry Andric auto DstIsLeftAndDstLane = isINSMask(ShuffleMask, NumElts);
472fe6060f1SDimitry Andric if (!DstIsLeftAndDstLane)
473fe6060f1SDimitry Andric return false;
474fe6060f1SDimitry Andric bool DstIsLeft;
475fe6060f1SDimitry Andric int DstLane;
476fe6060f1SDimitry Andric std::tie(DstIsLeft, DstLane) = *DstIsLeftAndDstLane;
477fe6060f1SDimitry Andric Register Left = MI.getOperand(1).getReg();
478fe6060f1SDimitry Andric Register Right = MI.getOperand(2).getReg();
479fe6060f1SDimitry Andric Register DstVec = DstIsLeft ? Left : Right;
480fe6060f1SDimitry Andric Register SrcVec = Left;
481fe6060f1SDimitry Andric
482fe6060f1SDimitry Andric int SrcLane = ShuffleMask[DstLane];
483fe6060f1SDimitry Andric if (SrcLane >= NumElts) {
484fe6060f1SDimitry Andric SrcVec = Right;
485fe6060f1SDimitry Andric SrcLane -= NumElts;
486fe6060f1SDimitry Andric }
487fe6060f1SDimitry Andric
488fe6060f1SDimitry Andric MatchInfo = std::make_tuple(DstVec, DstLane, SrcVec, SrcLane);
489fe6060f1SDimitry Andric return true;
490fe6060f1SDimitry Andric }
491fe6060f1SDimitry Andric
applyINS(MachineInstr & MI,MachineRegisterInfo & MRI,MachineIRBuilder & Builder,std::tuple<Register,int,Register,int> & MatchInfo)49206c3fb27SDimitry Andric void applyINS(MachineInstr &MI, MachineRegisterInfo &MRI,
493fe6060f1SDimitry Andric MachineIRBuilder &Builder,
494fe6060f1SDimitry Andric std::tuple<Register, int, Register, int> &MatchInfo) {
495fe6060f1SDimitry Andric Builder.setInstrAndDebugLoc(MI);
496fe6060f1SDimitry Andric Register Dst = MI.getOperand(0).getReg();
497fe6060f1SDimitry Andric auto ScalarTy = MRI.getType(Dst).getElementType();
498fe6060f1SDimitry Andric Register DstVec, SrcVec;
499fe6060f1SDimitry Andric int DstLane, SrcLane;
500fe6060f1SDimitry Andric std::tie(DstVec, DstLane, SrcVec, SrcLane) = MatchInfo;
501fe6060f1SDimitry Andric auto SrcCst = Builder.buildConstant(LLT::scalar(64), SrcLane);
502fe6060f1SDimitry Andric auto Extract = Builder.buildExtractVectorElement(ScalarTy, SrcVec, SrcCst);
503fe6060f1SDimitry Andric auto DstCst = Builder.buildConstant(LLT::scalar(64), DstLane);
504fe6060f1SDimitry Andric Builder.buildInsertVectorElement(Dst, DstVec, Extract, DstCst);
505fe6060f1SDimitry Andric MI.eraseFromParent();
506fe6060f1SDimitry Andric }
507fe6060f1SDimitry Andric
508e8d8bef9SDimitry Andric /// isVShiftRImm - Check if this is a valid vector for the immediate
509e8d8bef9SDimitry Andric /// operand of a vector shift right operation. The value must be in the range:
510e8d8bef9SDimitry Andric /// 1 <= Value <= ElementBits for a right shift.
isVShiftRImm(Register Reg,MachineRegisterInfo & MRI,LLT Ty,int64_t & Cnt)51106c3fb27SDimitry Andric bool isVShiftRImm(Register Reg, MachineRegisterInfo &MRI, LLT Ty,
512e8d8bef9SDimitry Andric int64_t &Cnt) {
513e8d8bef9SDimitry Andric assert(Ty.isVector() && "vector shift count is not a vector type");
514e8d8bef9SDimitry Andric MachineInstr *MI = MRI.getVRegDef(Reg);
515fe6060f1SDimitry Andric auto Cst = getAArch64VectorSplatScalar(*MI, MRI);
516e8d8bef9SDimitry Andric if (!Cst)
517e8d8bef9SDimitry Andric return false;
518e8d8bef9SDimitry Andric Cnt = *Cst;
519e8d8bef9SDimitry Andric int64_t ElementBits = Ty.getScalarSizeInBits();
520e8d8bef9SDimitry Andric return Cnt >= 1 && Cnt <= ElementBits;
521e8d8bef9SDimitry Andric }
522e8d8bef9SDimitry Andric
523e8d8bef9SDimitry Andric /// Match a vector G_ASHR or G_LSHR with a valid immediate shift.
matchVAshrLshrImm(MachineInstr & MI,MachineRegisterInfo & MRI,int64_t & Imm)52406c3fb27SDimitry Andric bool matchVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI,
525e8d8bef9SDimitry Andric int64_t &Imm) {
526e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
527e8d8bef9SDimitry Andric MI.getOpcode() == TargetOpcode::G_LSHR);
528e8d8bef9SDimitry Andric LLT Ty = MRI.getType(MI.getOperand(1).getReg());
529e8d8bef9SDimitry Andric if (!Ty.isVector())
530e8d8bef9SDimitry Andric return false;
531e8d8bef9SDimitry Andric return isVShiftRImm(MI.getOperand(2).getReg(), MRI, Ty, Imm);
532e8d8bef9SDimitry Andric }
533e8d8bef9SDimitry Andric
applyVAshrLshrImm(MachineInstr & MI,MachineRegisterInfo & MRI,int64_t & Imm)53406c3fb27SDimitry Andric void applyVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI,
535e8d8bef9SDimitry Andric int64_t &Imm) {
536e8d8bef9SDimitry Andric unsigned Opc = MI.getOpcode();
537e8d8bef9SDimitry Andric assert(Opc == TargetOpcode::G_ASHR || Opc == TargetOpcode::G_LSHR);
538e8d8bef9SDimitry Andric unsigned NewOpc =
539e8d8bef9SDimitry Andric Opc == TargetOpcode::G_ASHR ? AArch64::G_VASHR : AArch64::G_VLSHR;
540e8d8bef9SDimitry Andric MachineIRBuilder MIB(MI);
541e8d8bef9SDimitry Andric auto ImmDef = MIB.buildConstant(LLT::scalar(32), Imm);
542e8d8bef9SDimitry Andric MIB.buildInstr(NewOpc, {MI.getOperand(0)}, {MI.getOperand(1), ImmDef});
543e8d8bef9SDimitry Andric MI.eraseFromParent();
544e8d8bef9SDimitry Andric }
545e8d8bef9SDimitry Andric
546e8d8bef9SDimitry Andric /// Determine if it is possible to modify the \p RHS and predicate \p P of a
547e8d8bef9SDimitry Andric /// G_ICMP instruction such that the right-hand side is an arithmetic immediate.
548e8d8bef9SDimitry Andric ///
549e8d8bef9SDimitry Andric /// \returns A pair containing the updated immediate and predicate which may
550e8d8bef9SDimitry Andric /// be used to optimize the instruction.
551e8d8bef9SDimitry Andric ///
552e8d8bef9SDimitry Andric /// \note This assumes that the comparison has been legalized.
553bdd1243dSDimitry Andric std::optional<std::pair<uint64_t, CmpInst::Predicate>>
tryAdjustICmpImmAndPred(Register RHS,CmpInst::Predicate P,const MachineRegisterInfo & MRI)554e8d8bef9SDimitry Andric tryAdjustICmpImmAndPred(Register RHS, CmpInst::Predicate P,
555e8d8bef9SDimitry Andric const MachineRegisterInfo &MRI) {
556e8d8bef9SDimitry Andric const auto &Ty = MRI.getType(RHS);
557e8d8bef9SDimitry Andric if (Ty.isVector())
558bdd1243dSDimitry Andric return std::nullopt;
559e8d8bef9SDimitry Andric unsigned Size = Ty.getSizeInBits();
560e8d8bef9SDimitry Andric assert((Size == 32 || Size == 64) && "Expected 32 or 64 bit compare only?");
561e8d8bef9SDimitry Andric
562e8d8bef9SDimitry Andric // If the RHS is not a constant, or the RHS is already a valid arithmetic
563e8d8bef9SDimitry Andric // immediate, then there is nothing to change.
564349cc55cSDimitry Andric auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS, MRI);
565e8d8bef9SDimitry Andric if (!ValAndVReg)
566bdd1243dSDimitry Andric return std::nullopt;
567*0fca6ea1SDimitry Andric uint64_t OriginalC = ValAndVReg->Value.getZExtValue();
568*0fca6ea1SDimitry Andric uint64_t C = OriginalC;
569e8d8bef9SDimitry Andric if (isLegalArithImmed(C))
570bdd1243dSDimitry Andric return std::nullopt;
571e8d8bef9SDimitry Andric
572e8d8bef9SDimitry Andric // We have a non-arithmetic immediate. Check if adjusting the immediate and
573e8d8bef9SDimitry Andric // adjusting the predicate will result in a legal arithmetic immediate.
574e8d8bef9SDimitry Andric switch (P) {
575e8d8bef9SDimitry Andric default:
576bdd1243dSDimitry Andric return std::nullopt;
577e8d8bef9SDimitry Andric case CmpInst::ICMP_SLT:
578e8d8bef9SDimitry Andric case CmpInst::ICMP_SGE:
579e8d8bef9SDimitry Andric // Check for
580e8d8bef9SDimitry Andric //
581e8d8bef9SDimitry Andric // x slt c => x sle c - 1
582e8d8bef9SDimitry Andric // x sge c => x sgt c - 1
583e8d8bef9SDimitry Andric //
584e8d8bef9SDimitry Andric // When c is not the smallest possible negative number.
585e8d8bef9SDimitry Andric if ((Size == 64 && static_cast<int64_t>(C) == INT64_MIN) ||
586e8d8bef9SDimitry Andric (Size == 32 && static_cast<int32_t>(C) == INT32_MIN))
587bdd1243dSDimitry Andric return std::nullopt;
588e8d8bef9SDimitry Andric P = (P == CmpInst::ICMP_SLT) ? CmpInst::ICMP_SLE : CmpInst::ICMP_SGT;
589e8d8bef9SDimitry Andric C -= 1;
590e8d8bef9SDimitry Andric break;
591e8d8bef9SDimitry Andric case CmpInst::ICMP_ULT:
592e8d8bef9SDimitry Andric case CmpInst::ICMP_UGE:
593e8d8bef9SDimitry Andric // Check for
594e8d8bef9SDimitry Andric //
595e8d8bef9SDimitry Andric // x ult c => x ule c - 1
596e8d8bef9SDimitry Andric // x uge c => x ugt c - 1
597e8d8bef9SDimitry Andric //
598e8d8bef9SDimitry Andric // When c is not zero.
599e8d8bef9SDimitry Andric if (C == 0)
600bdd1243dSDimitry Andric return std::nullopt;
601e8d8bef9SDimitry Andric P = (P == CmpInst::ICMP_ULT) ? CmpInst::ICMP_ULE : CmpInst::ICMP_UGT;
602e8d8bef9SDimitry Andric C -= 1;
603e8d8bef9SDimitry Andric break;
604e8d8bef9SDimitry Andric case CmpInst::ICMP_SLE:
605e8d8bef9SDimitry Andric case CmpInst::ICMP_SGT:
606e8d8bef9SDimitry Andric // Check for
607e8d8bef9SDimitry Andric //
608e8d8bef9SDimitry Andric // x sle c => x slt c + 1
609e8d8bef9SDimitry Andric // x sgt c => s sge c + 1
610e8d8bef9SDimitry Andric //
611e8d8bef9SDimitry Andric // When c is not the largest possible signed integer.
612e8d8bef9SDimitry Andric if ((Size == 32 && static_cast<int32_t>(C) == INT32_MAX) ||
613e8d8bef9SDimitry Andric (Size == 64 && static_cast<int64_t>(C) == INT64_MAX))
614bdd1243dSDimitry Andric return std::nullopt;
615e8d8bef9SDimitry Andric P = (P == CmpInst::ICMP_SLE) ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGE;
616e8d8bef9SDimitry Andric C += 1;
617e8d8bef9SDimitry Andric break;
618e8d8bef9SDimitry Andric case CmpInst::ICMP_ULE:
619e8d8bef9SDimitry Andric case CmpInst::ICMP_UGT:
620e8d8bef9SDimitry Andric // Check for
621e8d8bef9SDimitry Andric //
622e8d8bef9SDimitry Andric // x ule c => x ult c + 1
623e8d8bef9SDimitry Andric // x ugt c => s uge c + 1
624e8d8bef9SDimitry Andric //
625e8d8bef9SDimitry Andric // When c is not the largest possible unsigned integer.
626e8d8bef9SDimitry Andric if ((Size == 32 && static_cast<uint32_t>(C) == UINT32_MAX) ||
627e8d8bef9SDimitry Andric (Size == 64 && C == UINT64_MAX))
628bdd1243dSDimitry Andric return std::nullopt;
629e8d8bef9SDimitry Andric P = (P == CmpInst::ICMP_ULE) ? CmpInst::ICMP_ULT : CmpInst::ICMP_UGE;
630e8d8bef9SDimitry Andric C += 1;
631e8d8bef9SDimitry Andric break;
632e8d8bef9SDimitry Andric }
633e8d8bef9SDimitry Andric
634e8d8bef9SDimitry Andric // Check if the new constant is valid, and return the updated constant and
635e8d8bef9SDimitry Andric // predicate if it is.
636e8d8bef9SDimitry Andric if (Size == 32)
637e8d8bef9SDimitry Andric C = static_cast<uint32_t>(C);
638*0fca6ea1SDimitry Andric if (isLegalArithImmed(C))
639e8d8bef9SDimitry Andric return {{C, P}};
640*0fca6ea1SDimitry Andric
641*0fca6ea1SDimitry Andric auto IsMaterializableInSingleInstruction = [=](uint64_t Imm) {
642*0fca6ea1SDimitry Andric SmallVector<AArch64_IMM::ImmInsnModel> Insn;
643*0fca6ea1SDimitry Andric AArch64_IMM::expandMOVImm(Imm, 32, Insn);
644*0fca6ea1SDimitry Andric return Insn.size() == 1;
645*0fca6ea1SDimitry Andric };
646*0fca6ea1SDimitry Andric
647*0fca6ea1SDimitry Andric if (!IsMaterializableInSingleInstruction(OriginalC) &&
648*0fca6ea1SDimitry Andric IsMaterializableInSingleInstruction(C))
649*0fca6ea1SDimitry Andric return {{C, P}};
650*0fca6ea1SDimitry Andric
651*0fca6ea1SDimitry Andric return std::nullopt;
652e8d8bef9SDimitry Andric }
653e8d8bef9SDimitry Andric
654e8d8bef9SDimitry Andric /// Determine whether or not it is possible to update the RHS and predicate of
655e8d8bef9SDimitry Andric /// a G_ICMP instruction such that the RHS will be selected as an arithmetic
656e8d8bef9SDimitry Andric /// immediate.
657e8d8bef9SDimitry Andric ///
658e8d8bef9SDimitry Andric /// \p MI - The G_ICMP instruction
659e8d8bef9SDimitry Andric /// \p MatchInfo - The new RHS immediate and predicate on success
660e8d8bef9SDimitry Andric ///
661e8d8bef9SDimitry Andric /// See tryAdjustICmpImmAndPred for valid transformations.
matchAdjustICmpImmAndPred(MachineInstr & MI,const MachineRegisterInfo & MRI,std::pair<uint64_t,CmpInst::Predicate> & MatchInfo)662e8d8bef9SDimitry Andric bool matchAdjustICmpImmAndPred(
663e8d8bef9SDimitry Andric MachineInstr &MI, const MachineRegisterInfo &MRI,
664e8d8bef9SDimitry Andric std::pair<uint64_t, CmpInst::Predicate> &MatchInfo) {
665e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_ICMP);
666e8d8bef9SDimitry Andric Register RHS = MI.getOperand(3).getReg();
667e8d8bef9SDimitry Andric auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
668e8d8bef9SDimitry Andric if (auto MaybeNewImmAndPred = tryAdjustICmpImmAndPred(RHS, Pred, MRI)) {
669e8d8bef9SDimitry Andric MatchInfo = *MaybeNewImmAndPred;
670e8d8bef9SDimitry Andric return true;
671e8d8bef9SDimitry Andric }
672e8d8bef9SDimitry Andric return false;
673e8d8bef9SDimitry Andric }
674e8d8bef9SDimitry Andric
applyAdjustICmpImmAndPred(MachineInstr & MI,std::pair<uint64_t,CmpInst::Predicate> & MatchInfo,MachineIRBuilder & MIB,GISelChangeObserver & Observer)67506c3fb27SDimitry Andric void applyAdjustICmpImmAndPred(
676e8d8bef9SDimitry Andric MachineInstr &MI, std::pair<uint64_t, CmpInst::Predicate> &MatchInfo,
677e8d8bef9SDimitry Andric MachineIRBuilder &MIB, GISelChangeObserver &Observer) {
678e8d8bef9SDimitry Andric MIB.setInstrAndDebugLoc(MI);
679e8d8bef9SDimitry Andric MachineOperand &RHS = MI.getOperand(3);
680e8d8bef9SDimitry Andric MachineRegisterInfo &MRI = *MIB.getMRI();
681e8d8bef9SDimitry Andric auto Cst = MIB.buildConstant(MRI.cloneVirtualRegister(RHS.getReg()),
682e8d8bef9SDimitry Andric MatchInfo.first);
683e8d8bef9SDimitry Andric Observer.changingInstr(MI);
684e8d8bef9SDimitry Andric RHS.setReg(Cst->getOperand(0).getReg());
685e8d8bef9SDimitry Andric MI.getOperand(1).setPredicate(MatchInfo.second);
686e8d8bef9SDimitry Andric Observer.changedInstr(MI);
687e8d8bef9SDimitry Andric }
688e8d8bef9SDimitry Andric
matchDupLane(MachineInstr & MI,MachineRegisterInfo & MRI,std::pair<unsigned,int> & MatchInfo)689e8d8bef9SDimitry Andric bool matchDupLane(MachineInstr &MI, MachineRegisterInfo &MRI,
690e8d8bef9SDimitry Andric std::pair<unsigned, int> &MatchInfo) {
691e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
692e8d8bef9SDimitry Andric Register Src1Reg = MI.getOperand(1).getReg();
693e8d8bef9SDimitry Andric const LLT SrcTy = MRI.getType(Src1Reg);
694e8d8bef9SDimitry Andric const LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
695e8d8bef9SDimitry Andric
696e8d8bef9SDimitry Andric auto LaneIdx = getSplatIndex(MI);
697e8d8bef9SDimitry Andric if (!LaneIdx)
698e8d8bef9SDimitry Andric return false;
699e8d8bef9SDimitry Andric
700e8d8bef9SDimitry Andric // The lane idx should be within the first source vector.
701e8d8bef9SDimitry Andric if (*LaneIdx >= SrcTy.getNumElements())
702e8d8bef9SDimitry Andric return false;
703e8d8bef9SDimitry Andric
704e8d8bef9SDimitry Andric if (DstTy != SrcTy)
705e8d8bef9SDimitry Andric return false;
706e8d8bef9SDimitry Andric
707e8d8bef9SDimitry Andric LLT ScalarTy = SrcTy.getElementType();
708e8d8bef9SDimitry Andric unsigned ScalarSize = ScalarTy.getSizeInBits();
709e8d8bef9SDimitry Andric
710e8d8bef9SDimitry Andric unsigned Opc = 0;
711e8d8bef9SDimitry Andric switch (SrcTy.getNumElements()) {
712e8d8bef9SDimitry Andric case 2:
713e8d8bef9SDimitry Andric if (ScalarSize == 64)
714e8d8bef9SDimitry Andric Opc = AArch64::G_DUPLANE64;
715fe6060f1SDimitry Andric else if (ScalarSize == 32)
716fe6060f1SDimitry Andric Opc = AArch64::G_DUPLANE32;
717e8d8bef9SDimitry Andric break;
718e8d8bef9SDimitry Andric case 4:
719e8d8bef9SDimitry Andric if (ScalarSize == 32)
720e8d8bef9SDimitry Andric Opc = AArch64::G_DUPLANE32;
7215f757f3fSDimitry Andric else if (ScalarSize == 16)
7225f757f3fSDimitry Andric Opc = AArch64::G_DUPLANE16;
723e8d8bef9SDimitry Andric break;
724e8d8bef9SDimitry Andric case 8:
7255f757f3fSDimitry Andric if (ScalarSize == 8)
7265f757f3fSDimitry Andric Opc = AArch64::G_DUPLANE8;
7275f757f3fSDimitry Andric else if (ScalarSize == 16)
728e8d8bef9SDimitry Andric Opc = AArch64::G_DUPLANE16;
729e8d8bef9SDimitry Andric break;
730e8d8bef9SDimitry Andric case 16:
731e8d8bef9SDimitry Andric if (ScalarSize == 8)
732e8d8bef9SDimitry Andric Opc = AArch64::G_DUPLANE8;
733e8d8bef9SDimitry Andric break;
734e8d8bef9SDimitry Andric default:
735e8d8bef9SDimitry Andric break;
736e8d8bef9SDimitry Andric }
737e8d8bef9SDimitry Andric if (!Opc)
738e8d8bef9SDimitry Andric return false;
739e8d8bef9SDimitry Andric
740e8d8bef9SDimitry Andric MatchInfo.first = Opc;
741e8d8bef9SDimitry Andric MatchInfo.second = *LaneIdx;
742e8d8bef9SDimitry Andric return true;
743e8d8bef9SDimitry Andric }
744e8d8bef9SDimitry Andric
applyDupLane(MachineInstr & MI,MachineRegisterInfo & MRI,MachineIRBuilder & B,std::pair<unsigned,int> & MatchInfo)74506c3fb27SDimitry Andric void applyDupLane(MachineInstr &MI, MachineRegisterInfo &MRI,
746e8d8bef9SDimitry Andric MachineIRBuilder &B, std::pair<unsigned, int> &MatchInfo) {
747e8d8bef9SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
748fe6060f1SDimitry Andric Register Src1Reg = MI.getOperand(1).getReg();
749fe6060f1SDimitry Andric const LLT SrcTy = MRI.getType(Src1Reg);
750fe6060f1SDimitry Andric
751e8d8bef9SDimitry Andric B.setInstrAndDebugLoc(MI);
752e8d8bef9SDimitry Andric auto Lane = B.buildConstant(LLT::scalar(64), MatchInfo.second);
753fe6060f1SDimitry Andric
754fe6060f1SDimitry Andric Register DupSrc = MI.getOperand(1).getReg();
755fe6060f1SDimitry Andric // For types like <2 x s32>, we can use G_DUPLANE32, with a <4 x s32> source.
756fe6060f1SDimitry Andric // To do this, we can use a G_CONCAT_VECTORS to do the widening.
7575f757f3fSDimitry Andric if (SrcTy.getSizeInBits() == 64) {
758fe6060f1SDimitry Andric auto Undef = B.buildUndef(SrcTy);
7595f757f3fSDimitry Andric DupSrc = B.buildConcatVectors(SrcTy.multiplyElements(2),
760fe6060f1SDimitry Andric {Src1Reg, Undef.getReg(0)})
761fe6060f1SDimitry Andric .getReg(0);
762fe6060f1SDimitry Andric }
763fe6060f1SDimitry Andric B.buildInstr(MatchInfo.first, {MI.getOperand(0).getReg()}, {DupSrc, Lane});
764e8d8bef9SDimitry Andric MI.eraseFromParent();
765e8d8bef9SDimitry Andric }
766e8d8bef9SDimitry Andric
matchScalarizeVectorUnmerge(MachineInstr & MI,MachineRegisterInfo & MRI)767cb14a3feSDimitry Andric bool matchScalarizeVectorUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI) {
768cb14a3feSDimitry Andric auto &Unmerge = cast<GUnmerge>(MI);
769cb14a3feSDimitry Andric Register Src1Reg = Unmerge.getReg(Unmerge.getNumOperands() - 1);
770cb14a3feSDimitry Andric const LLT SrcTy = MRI.getType(Src1Reg);
771*0fca6ea1SDimitry Andric if (SrcTy.getSizeInBits() != 128 && SrcTy.getSizeInBits() != 64)
772*0fca6ea1SDimitry Andric return false;
773cb14a3feSDimitry Andric return SrcTy.isVector() && !SrcTy.isScalable() &&
774cb14a3feSDimitry Andric Unmerge.getNumOperands() == (unsigned)SrcTy.getNumElements() + 1;
775cb14a3feSDimitry Andric }
776cb14a3feSDimitry Andric
applyScalarizeVectorUnmerge(MachineInstr & MI,MachineRegisterInfo & MRI,MachineIRBuilder & B)777cb14a3feSDimitry Andric void applyScalarizeVectorUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
778cb14a3feSDimitry Andric MachineIRBuilder &B) {
779cb14a3feSDimitry Andric auto &Unmerge = cast<GUnmerge>(MI);
780cb14a3feSDimitry Andric Register Src1Reg = Unmerge.getReg(Unmerge.getNumOperands() - 1);
781cb14a3feSDimitry Andric const LLT SrcTy = MRI.getType(Src1Reg);
782cb14a3feSDimitry Andric assert((SrcTy.isVector() && !SrcTy.isScalable()) &&
783cb14a3feSDimitry Andric "Expected a fixed length vector");
784cb14a3feSDimitry Andric
785cb14a3feSDimitry Andric for (int I = 0; I < SrcTy.getNumElements(); ++I)
786cb14a3feSDimitry Andric B.buildExtractVectorElementConstant(Unmerge.getReg(I), Src1Reg, I);
787cb14a3feSDimitry Andric MI.eraseFromParent();
788cb14a3feSDimitry Andric }
789cb14a3feSDimitry Andric
matchBuildVectorToDup(MachineInstr & MI,MachineRegisterInfo & MRI)79006c3fb27SDimitry Andric bool matchBuildVectorToDup(MachineInstr &MI, MachineRegisterInfo &MRI) {
791fe6060f1SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
792fe6060f1SDimitry Andric auto Splat = getAArch64VectorSplat(MI, MRI);
793fe6060f1SDimitry Andric if (!Splat)
794fe6060f1SDimitry Andric return false;
795fe6060f1SDimitry Andric if (Splat->isReg())
796fe6060f1SDimitry Andric return true;
797fe6060f1SDimitry Andric // Later, during selection, we'll try to match imported patterns using
798fe6060f1SDimitry Andric // immAllOnesV and immAllZerosV. These require G_BUILD_VECTOR. Don't lower
799fe6060f1SDimitry Andric // G_BUILD_VECTORs which could match those patterns.
800fe6060f1SDimitry Andric int64_t Cst = Splat->getCst();
801fe6060f1SDimitry Andric return (Cst != 0 && Cst != -1);
802fe6060f1SDimitry Andric }
803fe6060f1SDimitry Andric
applyBuildVectorToDup(MachineInstr & MI,MachineRegisterInfo & MRI,MachineIRBuilder & B)80406c3fb27SDimitry Andric void applyBuildVectorToDup(MachineInstr &MI, MachineRegisterInfo &MRI,
805fe6060f1SDimitry Andric MachineIRBuilder &B) {
806fe6060f1SDimitry Andric B.setInstrAndDebugLoc(MI);
807fe6060f1SDimitry Andric B.buildInstr(AArch64::G_DUP, {MI.getOperand(0).getReg()},
808fe6060f1SDimitry Andric {MI.getOperand(1).getReg()});
809fe6060f1SDimitry Andric MI.eraseFromParent();
810fe6060f1SDimitry Andric }
811fe6060f1SDimitry Andric
812fe6060f1SDimitry Andric /// \returns how many instructions would be saved by folding a G_ICMP's shift
813fe6060f1SDimitry Andric /// and/or extension operations.
getCmpOperandFoldingProfit(Register CmpOp,MachineRegisterInfo & MRI)81406c3fb27SDimitry Andric unsigned getCmpOperandFoldingProfit(Register CmpOp, MachineRegisterInfo &MRI) {
815fe6060f1SDimitry Andric // No instructions to save if there's more than one use or no uses.
816fe6060f1SDimitry Andric if (!MRI.hasOneNonDBGUse(CmpOp))
817fe6060f1SDimitry Andric return 0;
818fe6060f1SDimitry Andric
819fe6060f1SDimitry Andric // FIXME: This is duplicated with the selector. (See: selectShiftedRegister)
820fe6060f1SDimitry Andric auto IsSupportedExtend = [&](const MachineInstr &MI) {
821fe6060f1SDimitry Andric if (MI.getOpcode() == TargetOpcode::G_SEXT_INREG)
822fe6060f1SDimitry Andric return true;
823fe6060f1SDimitry Andric if (MI.getOpcode() != TargetOpcode::G_AND)
824fe6060f1SDimitry Andric return false;
825fe6060f1SDimitry Andric auto ValAndVReg =
826349cc55cSDimitry Andric getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
827fe6060f1SDimitry Andric if (!ValAndVReg)
828fe6060f1SDimitry Andric return false;
829fe6060f1SDimitry Andric uint64_t Mask = ValAndVReg->Value.getZExtValue();
830fe6060f1SDimitry Andric return (Mask == 0xFF || Mask == 0xFFFF || Mask == 0xFFFFFFFF);
831fe6060f1SDimitry Andric };
832fe6060f1SDimitry Andric
833fe6060f1SDimitry Andric MachineInstr *Def = getDefIgnoringCopies(CmpOp, MRI);
834fe6060f1SDimitry Andric if (IsSupportedExtend(*Def))
835fe6060f1SDimitry Andric return 1;
836fe6060f1SDimitry Andric
837fe6060f1SDimitry Andric unsigned Opc = Def->getOpcode();
838fe6060f1SDimitry Andric if (Opc != TargetOpcode::G_SHL && Opc != TargetOpcode::G_ASHR &&
839fe6060f1SDimitry Andric Opc != TargetOpcode::G_LSHR)
840fe6060f1SDimitry Andric return 0;
841fe6060f1SDimitry Andric
842fe6060f1SDimitry Andric auto MaybeShiftAmt =
843349cc55cSDimitry Andric getIConstantVRegValWithLookThrough(Def->getOperand(2).getReg(), MRI);
844fe6060f1SDimitry Andric if (!MaybeShiftAmt)
845fe6060f1SDimitry Andric return 0;
846fe6060f1SDimitry Andric uint64_t ShiftAmt = MaybeShiftAmt->Value.getZExtValue();
847fe6060f1SDimitry Andric MachineInstr *ShiftLHS =
848fe6060f1SDimitry Andric getDefIgnoringCopies(Def->getOperand(1).getReg(), MRI);
849fe6060f1SDimitry Andric
850fe6060f1SDimitry Andric // Check if we can fold an extend and a shift.
851fe6060f1SDimitry Andric // FIXME: This is duplicated with the selector. (See:
852fe6060f1SDimitry Andric // selectArithExtendedRegister)
853fe6060f1SDimitry Andric if (IsSupportedExtend(*ShiftLHS))
854fe6060f1SDimitry Andric return (ShiftAmt <= 4) ? 2 : 1;
855fe6060f1SDimitry Andric
856fe6060f1SDimitry Andric LLT Ty = MRI.getType(Def->getOperand(0).getReg());
857fe6060f1SDimitry Andric if (Ty.isVector())
858fe6060f1SDimitry Andric return 0;
859fe6060f1SDimitry Andric unsigned ShiftSize = Ty.getSizeInBits();
860fe6060f1SDimitry Andric if ((ShiftSize == 32 && ShiftAmt <= 31) ||
861fe6060f1SDimitry Andric (ShiftSize == 64 && ShiftAmt <= 63))
862fe6060f1SDimitry Andric return 1;
863fe6060f1SDimitry Andric return 0;
864fe6060f1SDimitry Andric }
865fe6060f1SDimitry Andric
866fe6060f1SDimitry Andric /// \returns true if it would be profitable to swap the LHS and RHS of a G_ICMP
867fe6060f1SDimitry Andric /// instruction \p MI.
trySwapICmpOperands(MachineInstr & MI,MachineRegisterInfo & MRI)86806c3fb27SDimitry Andric bool trySwapICmpOperands(MachineInstr &MI, MachineRegisterInfo &MRI) {
869fe6060f1SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_ICMP);
870fe6060f1SDimitry Andric // Swap the operands if it would introduce a profitable folding opportunity.
871fe6060f1SDimitry Andric // (e.g. a shift + extend).
872fe6060f1SDimitry Andric //
873fe6060f1SDimitry Andric // For example:
874fe6060f1SDimitry Andric // lsl w13, w11, #1
875fe6060f1SDimitry Andric // cmp w13, w12
876fe6060f1SDimitry Andric // can be turned into:
877fe6060f1SDimitry Andric // cmp w12, w11, lsl #1
878fe6060f1SDimitry Andric
879fe6060f1SDimitry Andric // Don't swap if there's a constant on the RHS, because we know we can fold
880fe6060f1SDimitry Andric // that.
881fe6060f1SDimitry Andric Register RHS = MI.getOperand(3).getReg();
882349cc55cSDimitry Andric auto RHSCst = getIConstantVRegValWithLookThrough(RHS, MRI);
883fe6060f1SDimitry Andric if (RHSCst && isLegalArithImmed(RHSCst->Value.getSExtValue()))
884fe6060f1SDimitry Andric return false;
885fe6060f1SDimitry Andric
886fe6060f1SDimitry Andric Register LHS = MI.getOperand(2).getReg();
887fe6060f1SDimitry Andric auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
888fe6060f1SDimitry Andric auto GetRegForProfit = [&](Register Reg) {
889fe6060f1SDimitry Andric MachineInstr *Def = getDefIgnoringCopies(Reg, MRI);
890fe6060f1SDimitry Andric return isCMN(Def, Pred, MRI) ? Def->getOperand(2).getReg() : Reg;
891fe6060f1SDimitry Andric };
892fe6060f1SDimitry Andric
893fe6060f1SDimitry Andric // Don't have a constant on the RHS. If we swap the LHS and RHS of the
894fe6060f1SDimitry Andric // compare, would we be able to fold more instructions?
895fe6060f1SDimitry Andric Register TheLHS = GetRegForProfit(LHS);
896fe6060f1SDimitry Andric Register TheRHS = GetRegForProfit(RHS);
897fe6060f1SDimitry Andric
898fe6060f1SDimitry Andric // If the LHS is more likely to give us a folding opportunity, then swap the
899fe6060f1SDimitry Andric // LHS and RHS.
900fe6060f1SDimitry Andric return (getCmpOperandFoldingProfit(TheLHS, MRI) >
901fe6060f1SDimitry Andric getCmpOperandFoldingProfit(TheRHS, MRI));
902fe6060f1SDimitry Andric }
903fe6060f1SDimitry Andric
applySwapICmpOperands(MachineInstr & MI,GISelChangeObserver & Observer)90406c3fb27SDimitry Andric void applySwapICmpOperands(MachineInstr &MI, GISelChangeObserver &Observer) {
905fe6060f1SDimitry Andric auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
906fe6060f1SDimitry Andric Register LHS = MI.getOperand(2).getReg();
907fe6060f1SDimitry Andric Register RHS = MI.getOperand(3).getReg();
908fe6060f1SDimitry Andric Observer.changedInstr(MI);
909fe6060f1SDimitry Andric MI.getOperand(1).setPredicate(CmpInst::getSwappedPredicate(Pred));
910fe6060f1SDimitry Andric MI.getOperand(2).setReg(RHS);
911fe6060f1SDimitry Andric MI.getOperand(3).setReg(LHS);
912fe6060f1SDimitry Andric Observer.changedInstr(MI);
913fe6060f1SDimitry Andric }
914fe6060f1SDimitry Andric
915fe6060f1SDimitry Andric /// \returns a function which builds a vector floating point compare instruction
916fe6060f1SDimitry Andric /// for a condition code \p CC.
917fe6060f1SDimitry Andric /// \param [in] IsZero - True if the comparison is against 0.
918fe6060f1SDimitry Andric /// \param [in] NoNans - True if the target has NoNansFPMath.
91906c3fb27SDimitry Andric std::function<Register(MachineIRBuilder &)>
getVectorFCMP(AArch64CC::CondCode CC,Register LHS,Register RHS,bool IsZero,bool NoNans,MachineRegisterInfo & MRI)920fe6060f1SDimitry Andric getVectorFCMP(AArch64CC::CondCode CC, Register LHS, Register RHS, bool IsZero,
921fe6060f1SDimitry Andric bool NoNans, MachineRegisterInfo &MRI) {
922fe6060f1SDimitry Andric LLT DstTy = MRI.getType(LHS);
923fe6060f1SDimitry Andric assert(DstTy.isVector() && "Expected vector types only?");
924fe6060f1SDimitry Andric assert(DstTy == MRI.getType(RHS) && "Src and Dst types must match!");
925fe6060f1SDimitry Andric switch (CC) {
926fe6060f1SDimitry Andric default:
927fe6060f1SDimitry Andric llvm_unreachable("Unexpected condition code!");
928fe6060f1SDimitry Andric case AArch64CC::NE:
929fe6060f1SDimitry Andric return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
930fe6060f1SDimitry Andric auto FCmp = IsZero
931fe6060f1SDimitry Andric ? MIB.buildInstr(AArch64::G_FCMEQZ, {DstTy}, {LHS})
932fe6060f1SDimitry Andric : MIB.buildInstr(AArch64::G_FCMEQ, {DstTy}, {LHS, RHS});
933fe6060f1SDimitry Andric return MIB.buildNot(DstTy, FCmp).getReg(0);
934fe6060f1SDimitry Andric };
935fe6060f1SDimitry Andric case AArch64CC::EQ:
936fe6060f1SDimitry Andric return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
937fe6060f1SDimitry Andric return IsZero
938fe6060f1SDimitry Andric ? MIB.buildInstr(AArch64::G_FCMEQZ, {DstTy}, {LHS}).getReg(0)
939fe6060f1SDimitry Andric : MIB.buildInstr(AArch64::G_FCMEQ, {DstTy}, {LHS, RHS})
940fe6060f1SDimitry Andric .getReg(0);
941fe6060f1SDimitry Andric };
942fe6060f1SDimitry Andric case AArch64CC::GE:
943fe6060f1SDimitry Andric return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
944fe6060f1SDimitry Andric return IsZero
945fe6060f1SDimitry Andric ? MIB.buildInstr(AArch64::G_FCMGEZ, {DstTy}, {LHS}).getReg(0)
946fe6060f1SDimitry Andric : MIB.buildInstr(AArch64::G_FCMGE, {DstTy}, {LHS, RHS})
947fe6060f1SDimitry Andric .getReg(0);
948fe6060f1SDimitry Andric };
949fe6060f1SDimitry Andric case AArch64CC::GT:
950fe6060f1SDimitry Andric return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
951fe6060f1SDimitry Andric return IsZero
952fe6060f1SDimitry Andric ? MIB.buildInstr(AArch64::G_FCMGTZ, {DstTy}, {LHS}).getReg(0)
953fe6060f1SDimitry Andric : MIB.buildInstr(AArch64::G_FCMGT, {DstTy}, {LHS, RHS})
954fe6060f1SDimitry Andric .getReg(0);
955fe6060f1SDimitry Andric };
956fe6060f1SDimitry Andric case AArch64CC::LS:
957fe6060f1SDimitry Andric return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
958fe6060f1SDimitry Andric return IsZero
959fe6060f1SDimitry Andric ? MIB.buildInstr(AArch64::G_FCMLEZ, {DstTy}, {LHS}).getReg(0)
960fe6060f1SDimitry Andric : MIB.buildInstr(AArch64::G_FCMGE, {DstTy}, {RHS, LHS})
961fe6060f1SDimitry Andric .getReg(0);
962fe6060f1SDimitry Andric };
963fe6060f1SDimitry Andric case AArch64CC::MI:
964fe6060f1SDimitry Andric return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) {
965fe6060f1SDimitry Andric return IsZero
966fe6060f1SDimitry Andric ? MIB.buildInstr(AArch64::G_FCMLTZ, {DstTy}, {LHS}).getReg(0)
967fe6060f1SDimitry Andric : MIB.buildInstr(AArch64::G_FCMGT, {DstTy}, {RHS, LHS})
968fe6060f1SDimitry Andric .getReg(0);
969fe6060f1SDimitry Andric };
970fe6060f1SDimitry Andric }
971fe6060f1SDimitry Andric }
972fe6060f1SDimitry Andric
973fe6060f1SDimitry Andric /// Try to lower a vector G_FCMP \p MI into an AArch64-specific pseudo.
matchLowerVectorFCMP(MachineInstr & MI,MachineRegisterInfo & MRI,MachineIRBuilder & MIB)97406c3fb27SDimitry Andric bool matchLowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI,
975fe6060f1SDimitry Andric MachineIRBuilder &MIB) {
976fe6060f1SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_FCMP);
977fe6060f1SDimitry Andric const auto &ST = MI.getMF()->getSubtarget<AArch64Subtarget>();
97806c3fb27SDimitry Andric
979fe6060f1SDimitry Andric Register Dst = MI.getOperand(0).getReg();
980fe6060f1SDimitry Andric LLT DstTy = MRI.getType(Dst);
981fe6060f1SDimitry Andric if (!DstTy.isVector() || !ST.hasNEON())
982fe6060f1SDimitry Andric return false;
983fe6060f1SDimitry Andric Register LHS = MI.getOperand(2).getReg();
984fe6060f1SDimitry Andric unsigned EltSize = MRI.getType(LHS).getScalarSizeInBits();
98506c3fb27SDimitry Andric if (EltSize == 16 && !ST.hasFullFP16())
986fe6060f1SDimitry Andric return false;
98706c3fb27SDimitry Andric if (EltSize != 16 && EltSize != 32 && EltSize != 64)
98806c3fb27SDimitry Andric return false;
98906c3fb27SDimitry Andric
99006c3fb27SDimitry Andric return true;
99106c3fb27SDimitry Andric }
99206c3fb27SDimitry Andric
99306c3fb27SDimitry Andric /// Try to lower a vector G_FCMP \p MI into an AArch64-specific pseudo.
applyLowerVectorFCMP(MachineInstr & MI,MachineRegisterInfo & MRI,MachineIRBuilder & MIB)99406c3fb27SDimitry Andric void applyLowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI,
99506c3fb27SDimitry Andric MachineIRBuilder &MIB) {
99606c3fb27SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_FCMP);
99706c3fb27SDimitry Andric const auto &ST = MI.getMF()->getSubtarget<AArch64Subtarget>();
99806c3fb27SDimitry Andric
99906c3fb27SDimitry Andric const auto &CmpMI = cast<GFCmp>(MI);
100006c3fb27SDimitry Andric
100106c3fb27SDimitry Andric Register Dst = CmpMI.getReg(0);
100206c3fb27SDimitry Andric CmpInst::Predicate Pred = CmpMI.getCond();
100306c3fb27SDimitry Andric Register LHS = CmpMI.getLHSReg();
100406c3fb27SDimitry Andric Register RHS = CmpMI.getRHSReg();
100506c3fb27SDimitry Andric
100606c3fb27SDimitry Andric LLT DstTy = MRI.getType(Dst);
100706c3fb27SDimitry Andric
1008fe6060f1SDimitry Andric auto Splat = getAArch64VectorSplat(*MRI.getVRegDef(RHS), MRI);
1009fe6060f1SDimitry Andric
1010fe6060f1SDimitry Andric // Compares against 0 have special target-specific pseudos.
1011fe6060f1SDimitry Andric bool IsZero = Splat && Splat->isCst() && Splat->getCst() == 0;
1012bdd1243dSDimitry Andric
1013bdd1243dSDimitry Andric bool Invert = false;
1014bdd1243dSDimitry Andric AArch64CC::CondCode CC, CC2 = AArch64CC::AL;
1015*0fca6ea1SDimitry Andric if ((Pred == CmpInst::Predicate::FCMP_ORD ||
1016*0fca6ea1SDimitry Andric Pred == CmpInst::Predicate::FCMP_UNO) &&
1017*0fca6ea1SDimitry Andric IsZero) {
1018bdd1243dSDimitry Andric // The special case "fcmp ord %a, 0" is the canonical check that LHS isn't
1019bdd1243dSDimitry Andric // NaN, so equivalent to a == a and doesn't need the two comparisons an
1020bdd1243dSDimitry Andric // "ord" normally would.
1021*0fca6ea1SDimitry Andric // Similarly, "fcmp uno %a, 0" is the canonical check that LHS is NaN and is
1022*0fca6ea1SDimitry Andric // thus equivalent to a != a.
1023bdd1243dSDimitry Andric RHS = LHS;
1024bdd1243dSDimitry Andric IsZero = false;
1025*0fca6ea1SDimitry Andric CC = Pred == CmpInst::Predicate::FCMP_ORD ? AArch64CC::EQ : AArch64CC::NE;
1026bdd1243dSDimitry Andric } else
1027fe6060f1SDimitry Andric changeVectorFCMPPredToAArch64CC(Pred, CC, CC2, Invert);
1028bdd1243dSDimitry Andric
1029fe6060f1SDimitry Andric // Instead of having an apply function, just build here to simplify things.
1030fe6060f1SDimitry Andric MIB.setInstrAndDebugLoc(MI);
103106c3fb27SDimitry Andric
103206c3fb27SDimitry Andric const bool NoNans =
103306c3fb27SDimitry Andric ST.getTargetLowering()->getTargetMachine().Options.NoNaNsFPMath;
103406c3fb27SDimitry Andric
1035fe6060f1SDimitry Andric auto Cmp = getVectorFCMP(CC, LHS, RHS, IsZero, NoNans, MRI);
1036fe6060f1SDimitry Andric Register CmpRes;
1037fe6060f1SDimitry Andric if (CC2 == AArch64CC::AL)
1038fe6060f1SDimitry Andric CmpRes = Cmp(MIB);
1039fe6060f1SDimitry Andric else {
1040fe6060f1SDimitry Andric auto Cmp2 = getVectorFCMP(CC2, LHS, RHS, IsZero, NoNans, MRI);
1041fe6060f1SDimitry Andric auto Cmp2Dst = Cmp2(MIB);
1042fe6060f1SDimitry Andric auto Cmp1Dst = Cmp(MIB);
1043fe6060f1SDimitry Andric CmpRes = MIB.buildOr(DstTy, Cmp1Dst, Cmp2Dst).getReg(0);
1044fe6060f1SDimitry Andric }
1045fe6060f1SDimitry Andric if (Invert)
1046fe6060f1SDimitry Andric CmpRes = MIB.buildNot(DstTy, CmpRes).getReg(0);
1047fe6060f1SDimitry Andric MRI.replaceRegWith(Dst, CmpRes);
1048fe6060f1SDimitry Andric MI.eraseFromParent();
1049fe6060f1SDimitry Andric }
1050fe6060f1SDimitry Andric
matchFormTruncstore(MachineInstr & MI,MachineRegisterInfo & MRI,Register & SrcReg)105106c3fb27SDimitry Andric bool matchFormTruncstore(MachineInstr &MI, MachineRegisterInfo &MRI,
1052fe6060f1SDimitry Andric Register &SrcReg) {
1053fe6060f1SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_STORE);
1054fe6060f1SDimitry Andric Register DstReg = MI.getOperand(0).getReg();
1055fe6060f1SDimitry Andric if (MRI.getType(DstReg).isVector())
1056fe6060f1SDimitry Andric return false;
1057fe6060f1SDimitry Andric // Match a store of a truncate.
1058fe6060f1SDimitry Andric if (!mi_match(DstReg, MRI, m_GTrunc(m_Reg(SrcReg))))
1059fe6060f1SDimitry Andric return false;
1060fe6060f1SDimitry Andric // Only form truncstores for value types of max 64b.
1061fe6060f1SDimitry Andric return MRI.getType(SrcReg).getSizeInBits() <= 64;
1062fe6060f1SDimitry Andric }
1063fe6060f1SDimitry Andric
applyFormTruncstore(MachineInstr & MI,MachineRegisterInfo & MRI,MachineIRBuilder & B,GISelChangeObserver & Observer,Register & SrcReg)106406c3fb27SDimitry Andric void applyFormTruncstore(MachineInstr &MI, MachineRegisterInfo &MRI,
106506c3fb27SDimitry Andric MachineIRBuilder &B, GISelChangeObserver &Observer,
1066fe6060f1SDimitry Andric Register &SrcReg) {
1067fe6060f1SDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_STORE);
1068fe6060f1SDimitry Andric Observer.changingInstr(MI);
1069fe6060f1SDimitry Andric MI.getOperand(0).setReg(SrcReg);
1070fe6060f1SDimitry Andric Observer.changedInstr(MI);
1071fe6060f1SDimitry Andric }
1072fe6060f1SDimitry Andric
1073bdd1243dSDimitry Andric // Lower vector G_SEXT_INREG back to shifts for selection. We allowed them to
1074bdd1243dSDimitry Andric // form in the first place for combine opportunities, so any remaining ones
1075bdd1243dSDimitry Andric // at this stage need be lowered back.
matchVectorSextInReg(MachineInstr & MI,MachineRegisterInfo & MRI)107606c3fb27SDimitry Andric bool matchVectorSextInReg(MachineInstr &MI, MachineRegisterInfo &MRI) {
1077bdd1243dSDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1078bdd1243dSDimitry Andric Register DstReg = MI.getOperand(0).getReg();
1079bdd1243dSDimitry Andric LLT DstTy = MRI.getType(DstReg);
1080bdd1243dSDimitry Andric return DstTy.isVector();
1081bdd1243dSDimitry Andric }
1082bdd1243dSDimitry Andric
applyVectorSextInReg(MachineInstr & MI,MachineRegisterInfo & MRI,MachineIRBuilder & B,GISelChangeObserver & Observer)108306c3fb27SDimitry Andric void applyVectorSextInReg(MachineInstr &MI, MachineRegisterInfo &MRI,
108406c3fb27SDimitry Andric MachineIRBuilder &B, GISelChangeObserver &Observer) {
1085bdd1243dSDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1086bdd1243dSDimitry Andric B.setInstrAndDebugLoc(MI);
1087bdd1243dSDimitry Andric LegalizerHelper Helper(*MI.getMF(), Observer, B);
1088bdd1243dSDimitry Andric Helper.lower(MI, 0, /* Unused hint type */ LLT());
1089bdd1243dSDimitry Andric }
1090bdd1243dSDimitry Andric
10915f757f3fSDimitry Andric /// Combine <N x t>, unused = unmerge(G_EXT <2*N x t> v, undef, N)
10925f757f3fSDimitry Andric /// => unused, <N x t> = unmerge v
matchUnmergeExtToUnmerge(MachineInstr & MI,MachineRegisterInfo & MRI,Register & MatchInfo)10935f757f3fSDimitry Andric bool matchUnmergeExtToUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
10945f757f3fSDimitry Andric Register &MatchInfo) {
10955f757f3fSDimitry Andric auto &Unmerge = cast<GUnmerge>(MI);
10965f757f3fSDimitry Andric if (Unmerge.getNumDefs() != 2)
10975f757f3fSDimitry Andric return false;
10985f757f3fSDimitry Andric if (!MRI.use_nodbg_empty(Unmerge.getReg(1)))
10995f757f3fSDimitry Andric return false;
11005f757f3fSDimitry Andric
11015f757f3fSDimitry Andric LLT DstTy = MRI.getType(Unmerge.getReg(0));
11025f757f3fSDimitry Andric if (!DstTy.isVector())
11035f757f3fSDimitry Andric return false;
11045f757f3fSDimitry Andric
11055f757f3fSDimitry Andric MachineInstr *Ext = getOpcodeDef(AArch64::G_EXT, Unmerge.getSourceReg(), MRI);
11065f757f3fSDimitry Andric if (!Ext)
11075f757f3fSDimitry Andric return false;
11085f757f3fSDimitry Andric
11095f757f3fSDimitry Andric Register ExtSrc1 = Ext->getOperand(1).getReg();
11105f757f3fSDimitry Andric Register ExtSrc2 = Ext->getOperand(2).getReg();
11115f757f3fSDimitry Andric auto LowestVal =
11125f757f3fSDimitry Andric getIConstantVRegValWithLookThrough(Ext->getOperand(3).getReg(), MRI);
11135f757f3fSDimitry Andric if (!LowestVal || LowestVal->Value.getZExtValue() != DstTy.getSizeInBytes())
11145f757f3fSDimitry Andric return false;
11155f757f3fSDimitry Andric
11165f757f3fSDimitry Andric if (!getOpcodeDef<GImplicitDef>(ExtSrc2, MRI))
11175f757f3fSDimitry Andric return false;
11185f757f3fSDimitry Andric
11195f757f3fSDimitry Andric MatchInfo = ExtSrc1;
11205f757f3fSDimitry Andric return true;
11215f757f3fSDimitry Andric }
11225f757f3fSDimitry Andric
applyUnmergeExtToUnmerge(MachineInstr & MI,MachineRegisterInfo & MRI,MachineIRBuilder & B,GISelChangeObserver & Observer,Register & SrcReg)11235f757f3fSDimitry Andric void applyUnmergeExtToUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
11245f757f3fSDimitry Andric MachineIRBuilder &B,
11255f757f3fSDimitry Andric GISelChangeObserver &Observer, Register &SrcReg) {
11265f757f3fSDimitry Andric Observer.changingInstr(MI);
11275f757f3fSDimitry Andric // Swap dst registers.
11285f757f3fSDimitry Andric Register Dst1 = MI.getOperand(0).getReg();
11295f757f3fSDimitry Andric MI.getOperand(0).setReg(MI.getOperand(1).getReg());
11305f757f3fSDimitry Andric MI.getOperand(1).setReg(Dst1);
11315f757f3fSDimitry Andric MI.getOperand(2).setReg(SrcReg);
11325f757f3fSDimitry Andric Observer.changedInstr(MI);
11335f757f3fSDimitry Andric }
11345f757f3fSDimitry Andric
11355f757f3fSDimitry Andric // Match mul({z/s}ext , {z/s}ext) => {u/s}mull OR
11365f757f3fSDimitry Andric // Match v2s64 mul instructions, which will then be scalarised later on
11375f757f3fSDimitry Andric // Doing these two matches in one function to ensure that the order of matching
11385f757f3fSDimitry Andric // will always be the same.
11395f757f3fSDimitry Andric // Try lowering MUL to MULL before trying to scalarize if needed.
matchExtMulToMULL(MachineInstr & MI,MachineRegisterInfo & MRI)11405f757f3fSDimitry Andric bool matchExtMulToMULL(MachineInstr &MI, MachineRegisterInfo &MRI) {
11415f757f3fSDimitry Andric // Get the instructions that defined the source operand
11425f757f3fSDimitry Andric LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
11435f757f3fSDimitry Andric MachineInstr *I1 = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI);
11445f757f3fSDimitry Andric MachineInstr *I2 = getDefIgnoringCopies(MI.getOperand(2).getReg(), MRI);
11455f757f3fSDimitry Andric
11465f757f3fSDimitry Andric if (DstTy.isVector()) {
11475f757f3fSDimitry Andric // If the source operands were EXTENDED before, then {U/S}MULL can be used
11485f757f3fSDimitry Andric unsigned I1Opc = I1->getOpcode();
11495f757f3fSDimitry Andric unsigned I2Opc = I2->getOpcode();
11505f757f3fSDimitry Andric if (((I1Opc == TargetOpcode::G_ZEXT && I2Opc == TargetOpcode::G_ZEXT) ||
11515f757f3fSDimitry Andric (I1Opc == TargetOpcode::G_SEXT && I2Opc == TargetOpcode::G_SEXT)) &&
11525f757f3fSDimitry Andric (MRI.getType(I1->getOperand(0).getReg()).getScalarSizeInBits() ==
11535f757f3fSDimitry Andric MRI.getType(I1->getOperand(1).getReg()).getScalarSizeInBits() * 2) &&
11545f757f3fSDimitry Andric (MRI.getType(I2->getOperand(0).getReg()).getScalarSizeInBits() ==
11555f757f3fSDimitry Andric MRI.getType(I2->getOperand(1).getReg()).getScalarSizeInBits() * 2)) {
11565f757f3fSDimitry Andric return true;
11575f757f3fSDimitry Andric }
11585f757f3fSDimitry Andric // If result type is v2s64, scalarise the instruction
11595f757f3fSDimitry Andric else if (DstTy == LLT::fixed_vector(2, 64)) {
11605f757f3fSDimitry Andric return true;
11615f757f3fSDimitry Andric }
11625f757f3fSDimitry Andric }
11635f757f3fSDimitry Andric return false;
11645f757f3fSDimitry Andric }
11655f757f3fSDimitry Andric
applyExtMulToMULL(MachineInstr & MI,MachineRegisterInfo & MRI,MachineIRBuilder & B,GISelChangeObserver & Observer)11665f757f3fSDimitry Andric void applyExtMulToMULL(MachineInstr &MI, MachineRegisterInfo &MRI,
11675f757f3fSDimitry Andric MachineIRBuilder &B, GISelChangeObserver &Observer) {
11685f757f3fSDimitry Andric assert(MI.getOpcode() == TargetOpcode::G_MUL &&
11695f757f3fSDimitry Andric "Expected a G_MUL instruction");
11705f757f3fSDimitry Andric
11715f757f3fSDimitry Andric // Get the instructions that defined the source operand
11725f757f3fSDimitry Andric LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
11735f757f3fSDimitry Andric MachineInstr *I1 = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI);
11745f757f3fSDimitry Andric MachineInstr *I2 = getDefIgnoringCopies(MI.getOperand(2).getReg(), MRI);
11755f757f3fSDimitry Andric
11765f757f3fSDimitry Andric // If the source operands were EXTENDED before, then {U/S}MULL can be used
11775f757f3fSDimitry Andric unsigned I1Opc = I1->getOpcode();
11785f757f3fSDimitry Andric unsigned I2Opc = I2->getOpcode();
11795f757f3fSDimitry Andric if (((I1Opc == TargetOpcode::G_ZEXT && I2Opc == TargetOpcode::G_ZEXT) ||
11805f757f3fSDimitry Andric (I1Opc == TargetOpcode::G_SEXT && I2Opc == TargetOpcode::G_SEXT)) &&
11815f757f3fSDimitry Andric (MRI.getType(I1->getOperand(0).getReg()).getScalarSizeInBits() ==
11825f757f3fSDimitry Andric MRI.getType(I1->getOperand(1).getReg()).getScalarSizeInBits() * 2) &&
11835f757f3fSDimitry Andric (MRI.getType(I2->getOperand(0).getReg()).getScalarSizeInBits() ==
11845f757f3fSDimitry Andric MRI.getType(I2->getOperand(1).getReg()).getScalarSizeInBits() * 2)) {
11855f757f3fSDimitry Andric
11865f757f3fSDimitry Andric B.setInstrAndDebugLoc(MI);
11875f757f3fSDimitry Andric B.buildInstr(I1->getOpcode() == TargetOpcode::G_ZEXT ? AArch64::G_UMULL
11885f757f3fSDimitry Andric : AArch64::G_SMULL,
11895f757f3fSDimitry Andric {MI.getOperand(0).getReg()},
11905f757f3fSDimitry Andric {I1->getOperand(1).getReg(), I2->getOperand(1).getReg()});
11915f757f3fSDimitry Andric MI.eraseFromParent();
11925f757f3fSDimitry Andric }
11935f757f3fSDimitry Andric // If result type is v2s64, scalarise the instruction
11945f757f3fSDimitry Andric else if (DstTy == LLT::fixed_vector(2, 64)) {
11955f757f3fSDimitry Andric LegalizerHelper Helper(*MI.getMF(), Observer, B);
11965f757f3fSDimitry Andric B.setInstrAndDebugLoc(MI);
11975f757f3fSDimitry Andric Helper.fewerElementsVector(
11985f757f3fSDimitry Andric MI, 0,
11995f757f3fSDimitry Andric DstTy.changeElementCount(
12005f757f3fSDimitry Andric DstTy.getElementCount().divideCoefficientBy(2)));
12015f757f3fSDimitry Andric }
12025f757f3fSDimitry Andric }
12035f757f3fSDimitry Andric
12045f757f3fSDimitry Andric class AArch64PostLegalizerLoweringImpl : public Combiner {
120506c3fb27SDimitry Andric protected:
12065f757f3fSDimitry Andric // TODO: Make CombinerHelper methods const.
12075f757f3fSDimitry Andric mutable CombinerHelper Helper;
120806c3fb27SDimitry Andric const AArch64PostLegalizerLoweringImplRuleConfig &RuleConfig;
120906c3fb27SDimitry Andric const AArch64Subtarget &STI;
121006c3fb27SDimitry Andric
121106c3fb27SDimitry Andric public:
121206c3fb27SDimitry Andric AArch64PostLegalizerLoweringImpl(
12135f757f3fSDimitry Andric MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
12145f757f3fSDimitry Andric GISelCSEInfo *CSEInfo,
121506c3fb27SDimitry Andric const AArch64PostLegalizerLoweringImplRuleConfig &RuleConfig,
12165f757f3fSDimitry Andric const AArch64Subtarget &STI);
121706c3fb27SDimitry Andric
getName()121806c3fb27SDimitry Andric static const char *getName() { return "AArch6400PreLegalizerCombiner"; }
121906c3fb27SDimitry Andric
12205f757f3fSDimitry Andric bool tryCombineAll(MachineInstr &I) const override;
122106c3fb27SDimitry Andric
122206c3fb27SDimitry Andric private:
122306c3fb27SDimitry Andric #define GET_GICOMBINER_CLASS_MEMBERS
1224e8d8bef9SDimitry Andric #include "AArch64GenPostLegalizeGILowering.inc"
122506c3fb27SDimitry Andric #undef GET_GICOMBINER_CLASS_MEMBERS
122606c3fb27SDimitry Andric };
122706c3fb27SDimitry Andric
122806c3fb27SDimitry Andric #define GET_GICOMBINER_IMPL
122906c3fb27SDimitry Andric #include "AArch64GenPostLegalizeGILowering.inc"
123006c3fb27SDimitry Andric #undef GET_GICOMBINER_IMPL
123106c3fb27SDimitry Andric
AArch64PostLegalizerLoweringImpl(MachineFunction & MF,CombinerInfo & CInfo,const TargetPassConfig * TPC,GISelCSEInfo * CSEInfo,const AArch64PostLegalizerLoweringImplRuleConfig & RuleConfig,const AArch64Subtarget & STI)123206c3fb27SDimitry Andric AArch64PostLegalizerLoweringImpl::AArch64PostLegalizerLoweringImpl(
12335f757f3fSDimitry Andric MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
12345f757f3fSDimitry Andric GISelCSEInfo *CSEInfo,
123506c3fb27SDimitry Andric const AArch64PostLegalizerLoweringImplRuleConfig &RuleConfig,
12365f757f3fSDimitry Andric const AArch64Subtarget &STI)
12375f757f3fSDimitry Andric : Combiner(MF, CInfo, TPC, /*KB*/ nullptr, CSEInfo),
12385f757f3fSDimitry Andric Helper(Observer, B, /*IsPreLegalize*/ true), RuleConfig(RuleConfig),
12395f757f3fSDimitry Andric STI(STI),
124006c3fb27SDimitry Andric #define GET_GICOMBINER_CONSTRUCTOR_INITS
124106c3fb27SDimitry Andric #include "AArch64GenPostLegalizeGILowering.inc"
124206c3fb27SDimitry Andric #undef GET_GICOMBINER_CONSTRUCTOR_INITS
124306c3fb27SDimitry Andric {
124406c3fb27SDimitry Andric }
1245e8d8bef9SDimitry Andric
1246e8d8bef9SDimitry Andric class AArch64PostLegalizerLowering : public MachineFunctionPass {
1247e8d8bef9SDimitry Andric public:
1248e8d8bef9SDimitry Andric static char ID;
1249e8d8bef9SDimitry Andric
1250e8d8bef9SDimitry Andric AArch64PostLegalizerLowering();
1251e8d8bef9SDimitry Andric
getPassName() const1252e8d8bef9SDimitry Andric StringRef getPassName() const override {
1253e8d8bef9SDimitry Andric return "AArch64PostLegalizerLowering";
1254e8d8bef9SDimitry Andric }
1255e8d8bef9SDimitry Andric
1256e8d8bef9SDimitry Andric bool runOnMachineFunction(MachineFunction &MF) override;
1257e8d8bef9SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override;
12585f757f3fSDimitry Andric
12595f757f3fSDimitry Andric private:
12605f757f3fSDimitry Andric AArch64PostLegalizerLoweringImplRuleConfig RuleConfig;
1261e8d8bef9SDimitry Andric };
1262e8d8bef9SDimitry Andric } // end anonymous namespace
1263e8d8bef9SDimitry Andric
getAnalysisUsage(AnalysisUsage & AU) const1264e8d8bef9SDimitry Andric void AArch64PostLegalizerLowering::getAnalysisUsage(AnalysisUsage &AU) const {
1265e8d8bef9SDimitry Andric AU.addRequired<TargetPassConfig>();
1266e8d8bef9SDimitry Andric AU.setPreservesCFG();
1267e8d8bef9SDimitry Andric getSelectionDAGFallbackAnalysisUsage(AU);
1268e8d8bef9SDimitry Andric MachineFunctionPass::getAnalysisUsage(AU);
1269e8d8bef9SDimitry Andric }
1270e8d8bef9SDimitry Andric
AArch64PostLegalizerLowering()1271e8d8bef9SDimitry Andric AArch64PostLegalizerLowering::AArch64PostLegalizerLowering()
1272e8d8bef9SDimitry Andric : MachineFunctionPass(ID) {
1273e8d8bef9SDimitry Andric initializeAArch64PostLegalizerLoweringPass(*PassRegistry::getPassRegistry());
12745f757f3fSDimitry Andric
12755f757f3fSDimitry Andric if (!RuleConfig.parseCommandLineOption())
12765f757f3fSDimitry Andric report_fatal_error("Invalid rule identifier");
1277e8d8bef9SDimitry Andric }
1278e8d8bef9SDimitry Andric
runOnMachineFunction(MachineFunction & MF)1279e8d8bef9SDimitry Andric bool AArch64PostLegalizerLowering::runOnMachineFunction(MachineFunction &MF) {
1280e8d8bef9SDimitry Andric if (MF.getProperties().hasProperty(
1281e8d8bef9SDimitry Andric MachineFunctionProperties::Property::FailedISel))
1282e8d8bef9SDimitry Andric return false;
1283e8d8bef9SDimitry Andric assert(MF.getProperties().hasProperty(
1284e8d8bef9SDimitry Andric MachineFunctionProperties::Property::Legalized) &&
1285e8d8bef9SDimitry Andric "Expected a legalized function?");
1286e8d8bef9SDimitry Andric auto *TPC = &getAnalysis<TargetPassConfig>();
1287e8d8bef9SDimitry Andric const Function &F = MF.getFunction();
12885f757f3fSDimitry Andric
12895f757f3fSDimitry Andric const AArch64Subtarget &ST = MF.getSubtarget<AArch64Subtarget>();
12905f757f3fSDimitry Andric CombinerInfo CInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
12915f757f3fSDimitry Andric /*LegalizerInfo*/ nullptr, /*OptEnabled=*/true,
12925f757f3fSDimitry Andric F.hasOptSize(), F.hasMinSize());
12935f757f3fSDimitry Andric AArch64PostLegalizerLoweringImpl Impl(MF, CInfo, TPC, /*CSEInfo*/ nullptr,
12945f757f3fSDimitry Andric RuleConfig, ST);
12955f757f3fSDimitry Andric return Impl.combineMachineInstrs();
1296e8d8bef9SDimitry Andric }
1297e8d8bef9SDimitry Andric
1298e8d8bef9SDimitry Andric char AArch64PostLegalizerLowering::ID = 0;
1299e8d8bef9SDimitry Andric INITIALIZE_PASS_BEGIN(AArch64PostLegalizerLowering, DEBUG_TYPE,
1300e8d8bef9SDimitry Andric "Lower AArch64 MachineInstrs after legalization", false,
1301e8d8bef9SDimitry Andric false)
1302e8d8bef9SDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
1303e8d8bef9SDimitry Andric INITIALIZE_PASS_END(AArch64PostLegalizerLowering, DEBUG_TYPE,
1304e8d8bef9SDimitry Andric "Lower AArch64 MachineInstrs after legalization", false,
1305e8d8bef9SDimitry Andric false)
1306e8d8bef9SDimitry Andric
1307e8d8bef9SDimitry Andric namespace llvm {
createAArch64PostLegalizerLowering()1308e8d8bef9SDimitry Andric FunctionPass *createAArch64PostLegalizerLowering() {
1309e8d8bef9SDimitry Andric return new AArch64PostLegalizerLowering();
1310e8d8bef9SDimitry Andric }
1311e8d8bef9SDimitry Andric } // end namespace llvm
1312