xref: /freebsd/contrib/llvm-project/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1 //===-- llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h -----*- C++ -*-//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // This file contains some helper functions which try to cleanup artifacts
9 // such as G_TRUNCs/G_[ZSA]EXTENDS that were created during legalization to make
10 // the types match. This file also contains some combines of merges that happens
11 // at the end of the legalization.
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_CODEGEN_GLOBALISEL_LEGALIZATIONARTIFACTCOMBINER_H
15 #define LLVM_CODEGEN_GLOBALISEL_LEGALIZATIONARTIFACTCOMBINER_H
16 
17 #include "llvm/ADT/SmallBitVector.h"
18 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
19 #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
20 #include "llvm/CodeGen/GlobalISel/Legalizer.h"
21 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
22 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
23 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
24 #include "llvm/CodeGen/GlobalISel/Utils.h"
25 #include "llvm/CodeGen/MachineRegisterInfo.h"
26 #include "llvm/CodeGen/Register.h"
27 #include "llvm/CodeGen/TargetOpcodes.h"
28 #include "llvm/IR/Constants.h"
29 #include "llvm/IR/DebugInfoMetadata.h"
30 #include "llvm/Support/Debug.h"
31 
32 #define DEBUG_TYPE "legalizer"
33 
34 namespace llvm {
35 class LegalizationArtifactCombiner {
36   MachineIRBuilder &Builder;
37   MachineRegisterInfo &MRI;
38   const LegalizerInfo &LI;
39   GISelKnownBits *KB;
40 
isArtifactCast(unsigned Opc)41   static bool isArtifactCast(unsigned Opc) {
42     switch (Opc) {
43     case TargetOpcode::G_TRUNC:
44     case TargetOpcode::G_SEXT:
45     case TargetOpcode::G_ZEXT:
46     case TargetOpcode::G_ANYEXT:
47       return true;
48     default:
49       return false;
50     }
51   }
52 
53 public:
54   LegalizationArtifactCombiner(MachineIRBuilder &B, MachineRegisterInfo &MRI,
55                                const LegalizerInfo &LI,
56                                GISelKnownBits *KB = nullptr)
Builder(B)57       : Builder(B), MRI(MRI), LI(LI), KB(KB) {}
58 
tryCombineAnyExt(MachineInstr & MI,SmallVectorImpl<MachineInstr * > & DeadInsts,SmallVectorImpl<Register> & UpdatedDefs,GISelObserverWrapper & Observer)59   bool tryCombineAnyExt(MachineInstr &MI,
60                         SmallVectorImpl<MachineInstr *> &DeadInsts,
61                         SmallVectorImpl<Register> &UpdatedDefs,
62                         GISelObserverWrapper &Observer) {
63     using namespace llvm::MIPatternMatch;
64     assert(MI.getOpcode() == TargetOpcode::G_ANYEXT);
65 
66     Builder.setInstrAndDebugLoc(MI);
67     Register DstReg = MI.getOperand(0).getReg();
68     Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg());
69 
70     // aext(trunc x) - > aext/copy/trunc x
71     Register TruncSrc;
72     if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) {
73       LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;);
74       if (MRI.getType(DstReg) == MRI.getType(TruncSrc))
75         replaceRegOrBuildCopy(DstReg, TruncSrc, MRI, Builder, UpdatedDefs,
76                               Observer);
77       else
78         Builder.buildAnyExtOrTrunc(DstReg, TruncSrc);
79       UpdatedDefs.push_back(DstReg);
80       markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts);
81       return true;
82     }
83 
84     // aext([asz]ext x) -> [asz]ext x
85     Register ExtSrc;
86     MachineInstr *ExtMI;
87     if (mi_match(SrcReg, MRI,
88                  m_all_of(m_MInstr(ExtMI), m_any_of(m_GAnyExt(m_Reg(ExtSrc)),
89                                                     m_GSExt(m_Reg(ExtSrc)),
90                                                     m_GZExt(m_Reg(ExtSrc)))))) {
91       Builder.buildInstr(ExtMI->getOpcode(), {DstReg}, {ExtSrc});
92       UpdatedDefs.push_back(DstReg);
93       markInstAndDefDead(MI, *ExtMI, DeadInsts);
94       return true;
95     }
96 
97     // Try to fold aext(g_constant) when the larger constant type is legal.
98     auto *SrcMI = MRI.getVRegDef(SrcReg);
99     if (SrcMI->getOpcode() == TargetOpcode::G_CONSTANT) {
100       const LLT DstTy = MRI.getType(DstReg);
101       if (isInstLegal({TargetOpcode::G_CONSTANT, {DstTy}})) {
102         auto &CstVal = SrcMI->getOperand(1);
103         auto *MergedLocation = DILocation::getMergedLocation(
104             MI.getDebugLoc().get(), SrcMI->getDebugLoc().get());
105         // Set the debug location to the merged location of the SrcMI and the MI
106         // if the aext fold is successful.
107         Builder.setDebugLoc(MergedLocation);
108         Builder.buildConstant(
109             DstReg, CstVal.getCImm()->getValue().sext(DstTy.getSizeInBits()));
110         UpdatedDefs.push_back(DstReg);
111         markInstAndDefDead(MI, *SrcMI, DeadInsts);
112         return true;
113       }
114     }
115     return tryFoldImplicitDef(MI, DeadInsts, UpdatedDefs);
116   }
117 
tryCombineZExt(MachineInstr & MI,SmallVectorImpl<MachineInstr * > & DeadInsts,SmallVectorImpl<Register> & UpdatedDefs,GISelObserverWrapper & Observer)118   bool tryCombineZExt(MachineInstr &MI,
119                       SmallVectorImpl<MachineInstr *> &DeadInsts,
120                       SmallVectorImpl<Register> &UpdatedDefs,
121                       GISelObserverWrapper &Observer) {
122     using namespace llvm::MIPatternMatch;
123     assert(MI.getOpcode() == TargetOpcode::G_ZEXT);
124 
125     Builder.setInstrAndDebugLoc(MI);
126     Register DstReg = MI.getOperand(0).getReg();
127     Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg());
128 
129     // zext(trunc x) - > and (aext/copy/trunc x), mask
130     // zext(sext x) -> and (sext x), mask
131     Register TruncSrc;
132     Register SextSrc;
133     if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc))) ||
134         mi_match(SrcReg, MRI, m_GSExt(m_Reg(SextSrc)))) {
135       LLT DstTy = MRI.getType(DstReg);
136       if (isInstUnsupported({TargetOpcode::G_AND, {DstTy}}) ||
137           isConstantUnsupported(DstTy))
138         return false;
139       LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;);
140       LLT SrcTy = MRI.getType(SrcReg);
141       APInt MaskVal = APInt::getAllOnes(SrcTy.getScalarSizeInBits());
142       if (SextSrc && (DstTy != MRI.getType(SextSrc)))
143         SextSrc = Builder.buildSExtOrTrunc(DstTy, SextSrc).getReg(0);
144       if (TruncSrc && (DstTy != MRI.getType(TruncSrc)))
145         TruncSrc = Builder.buildAnyExtOrTrunc(DstTy, TruncSrc).getReg(0);
146       APInt ExtMaskVal = MaskVal.zext(DstTy.getScalarSizeInBits());
147       Register AndSrc = SextSrc ? SextSrc : TruncSrc;
148       // Elide G_AND and mask constant if possible.
149       // The G_AND would also be removed by the post-legalize redundant_and
150       // combine, but in this very common case, eliding early and regardless of
151       // OptLevel results in significant compile-time and O0 code-size
152       // improvements. Inserting unnecessary instructions between boolean defs
153       // and uses hinders a lot of folding during ISel.
154       if (KB && (KB->getKnownZeroes(AndSrc) | ExtMaskVal).isAllOnes()) {
155         replaceRegOrBuildCopy(DstReg, AndSrc, MRI, Builder, UpdatedDefs,
156                               Observer);
157       } else {
158         auto Mask = Builder.buildConstant(DstTy, ExtMaskVal);
159         Builder.buildAnd(DstReg, AndSrc, Mask);
160       }
161       markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts);
162       return true;
163     }
164 
165     // zext(zext x) -> (zext x)
166     Register ZextSrc;
167     if (mi_match(SrcReg, MRI, m_GZExt(m_Reg(ZextSrc)))) {
168       LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI);
169       Observer.changingInstr(MI);
170       MI.getOperand(1).setReg(ZextSrc);
171       Observer.changedInstr(MI);
172       UpdatedDefs.push_back(DstReg);
173       markDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts);
174       return true;
175     }
176 
177     // Try to fold zext(g_constant) when the larger constant type is legal.
178     auto *SrcMI = MRI.getVRegDef(SrcReg);
179     if (SrcMI->getOpcode() == TargetOpcode::G_CONSTANT) {
180       const LLT DstTy = MRI.getType(DstReg);
181       if (isInstLegal({TargetOpcode::G_CONSTANT, {DstTy}})) {
182         auto &CstVal = SrcMI->getOperand(1);
183         Builder.buildConstant(
184             DstReg, CstVal.getCImm()->getValue().zext(DstTy.getSizeInBits()));
185         UpdatedDefs.push_back(DstReg);
186         markInstAndDefDead(MI, *SrcMI, DeadInsts);
187         return true;
188       }
189     }
190     return tryFoldImplicitDef(MI, DeadInsts, UpdatedDefs);
191   }
192 
tryCombineSExt(MachineInstr & MI,SmallVectorImpl<MachineInstr * > & DeadInsts,SmallVectorImpl<Register> & UpdatedDefs,GISelObserverWrapper & Observer)193   bool tryCombineSExt(MachineInstr &MI,
194                       SmallVectorImpl<MachineInstr *> &DeadInsts,
195                       SmallVectorImpl<Register> &UpdatedDefs,
196                       GISelObserverWrapper &Observer) {
197     using namespace llvm::MIPatternMatch;
198     assert(MI.getOpcode() == TargetOpcode::G_SEXT);
199 
200     Builder.setInstrAndDebugLoc(MI);
201     Register DstReg = MI.getOperand(0).getReg();
202     Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg());
203 
204     // sext(trunc x) - > (sext_inreg (aext/copy/trunc x), c)
205     Register TruncSrc;
206     if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) {
207       LLT DstTy = MRI.getType(DstReg);
208       if (isInstUnsupported({TargetOpcode::G_SEXT_INREG, {DstTy}}))
209         return false;
210       LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;);
211       LLT SrcTy = MRI.getType(SrcReg);
212       uint64_t SizeInBits = SrcTy.getScalarSizeInBits();
213       if (DstTy != MRI.getType(TruncSrc))
214         TruncSrc = Builder.buildAnyExtOrTrunc(DstTy, TruncSrc).getReg(0);
215       // Elide G_SEXT_INREG if possible. This is similar to eliding G_AND in
216       // tryCombineZExt. Refer to the comment in tryCombineZExt for rationale.
217       if (KB && KB->computeNumSignBits(TruncSrc) >
218                     DstTy.getScalarSizeInBits() - SizeInBits)
219         replaceRegOrBuildCopy(DstReg, TruncSrc, MRI, Builder, UpdatedDefs,
220                               Observer);
221       else
222         Builder.buildSExtInReg(DstReg, TruncSrc, SizeInBits);
223       markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts);
224       return true;
225     }
226 
227     // sext(zext x) -> (zext x)
228     // sext(sext x) -> (sext x)
229     Register ExtSrc;
230     MachineInstr *ExtMI;
231     if (mi_match(SrcReg, MRI,
232                  m_all_of(m_MInstr(ExtMI), m_any_of(m_GZExt(m_Reg(ExtSrc)),
233                                                     m_GSExt(m_Reg(ExtSrc)))))) {
234       LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI);
235       Builder.buildInstr(ExtMI->getOpcode(), {DstReg}, {ExtSrc});
236       UpdatedDefs.push_back(DstReg);
237       markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts);
238       return true;
239     }
240 
241     // Try to fold sext(g_constant) when the larger constant type is legal.
242     auto *SrcMI = MRI.getVRegDef(SrcReg);
243     if (SrcMI->getOpcode() == TargetOpcode::G_CONSTANT) {
244       const LLT DstTy = MRI.getType(DstReg);
245       if (isInstLegal({TargetOpcode::G_CONSTANT, {DstTy}})) {
246         auto &CstVal = SrcMI->getOperand(1);
247         Builder.buildConstant(
248             DstReg, CstVal.getCImm()->getValue().sext(DstTy.getSizeInBits()));
249         UpdatedDefs.push_back(DstReg);
250         markInstAndDefDead(MI, *SrcMI, DeadInsts);
251         return true;
252       }
253     }
254 
255     return tryFoldImplicitDef(MI, DeadInsts, UpdatedDefs);
256   }
257 
tryCombineTrunc(MachineInstr & MI,SmallVectorImpl<MachineInstr * > & DeadInsts,SmallVectorImpl<Register> & UpdatedDefs,GISelObserverWrapper & Observer)258   bool tryCombineTrunc(MachineInstr &MI,
259                        SmallVectorImpl<MachineInstr *> &DeadInsts,
260                        SmallVectorImpl<Register> &UpdatedDefs,
261                        GISelObserverWrapper &Observer) {
262     using namespace llvm::MIPatternMatch;
263     assert(MI.getOpcode() == TargetOpcode::G_TRUNC);
264 
265     Builder.setInstr(MI);
266     Register DstReg = MI.getOperand(0).getReg();
267     const LLT DstTy = MRI.getType(DstReg);
268     Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg());
269 
270     // Try to fold trunc(g_constant) when the smaller constant type is legal.
271     auto *SrcMI = MRI.getVRegDef(SrcReg);
272     if (SrcMI->getOpcode() == TargetOpcode::G_CONSTANT) {
273       if (isInstLegal({TargetOpcode::G_CONSTANT, {DstTy}})) {
274         auto &CstVal = SrcMI->getOperand(1);
275         Builder.buildConstant(
276             DstReg, CstVal.getCImm()->getValue().trunc(DstTy.getSizeInBits()));
277         UpdatedDefs.push_back(DstReg);
278         markInstAndDefDead(MI, *SrcMI, DeadInsts);
279         return true;
280       }
281     }
282 
283     // Try to fold trunc(merge) to directly use the source of the merge.
284     // This gets rid of large, difficult to legalize, merges
285     if (auto *SrcMerge = dyn_cast<GMerge>(SrcMI)) {
286       const Register MergeSrcReg = SrcMerge->getSourceReg(0);
287       const LLT MergeSrcTy = MRI.getType(MergeSrcReg);
288 
289       // We can only fold if the types are scalar
290       const unsigned DstSize = DstTy.getSizeInBits();
291       const unsigned MergeSrcSize = MergeSrcTy.getSizeInBits();
292       if (!DstTy.isScalar() || !MergeSrcTy.isScalar())
293         return false;
294 
295       if (DstSize < MergeSrcSize) {
296         // When the merge source is larger than the destination, we can just
297         // truncate the merge source directly
298         if (isInstUnsupported({TargetOpcode::G_TRUNC, {DstTy, MergeSrcTy}}))
299           return false;
300 
301         LLVM_DEBUG(dbgs() << "Combining G_TRUNC(G_MERGE_VALUES) to G_TRUNC: "
302                           << MI);
303 
304         Builder.buildTrunc(DstReg, MergeSrcReg);
305         UpdatedDefs.push_back(DstReg);
306       } else if (DstSize == MergeSrcSize) {
307         // If the sizes match we can simply try to replace the register
308         LLVM_DEBUG(
309             dbgs() << "Replacing G_TRUNC(G_MERGE_VALUES) with merge input: "
310                    << MI);
311         replaceRegOrBuildCopy(DstReg, MergeSrcReg, MRI, Builder, UpdatedDefs,
312                               Observer);
313       } else if (DstSize % MergeSrcSize == 0) {
314         // If the trunc size is a multiple of the merge source size we can use
315         // a smaller merge instead
316         if (isInstUnsupported(
317                 {TargetOpcode::G_MERGE_VALUES, {DstTy, MergeSrcTy}}))
318           return false;
319 
320         LLVM_DEBUG(
321             dbgs() << "Combining G_TRUNC(G_MERGE_VALUES) to G_MERGE_VALUES: "
322                    << MI);
323 
324         const unsigned NumSrcs = DstSize / MergeSrcSize;
325         assert(NumSrcs < SrcMI->getNumOperands() - 1 &&
326                "trunc(merge) should require less inputs than merge");
327         SmallVector<Register, 8> SrcRegs(NumSrcs);
328         for (unsigned i = 0; i < NumSrcs; ++i)
329           SrcRegs[i] = SrcMerge->getSourceReg(i);
330 
331         Builder.buildMergeValues(DstReg, SrcRegs);
332         UpdatedDefs.push_back(DstReg);
333       } else {
334         // Unable to combine
335         return false;
336       }
337 
338       markInstAndDefDead(MI, *SrcMerge, DeadInsts);
339       return true;
340     }
341 
342     // trunc(trunc) -> trunc
343     Register TruncSrc;
344     if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) {
345       // Always combine trunc(trunc) since the eventual resulting trunc must be
346       // legal anyway as it must be legal for all outputs of the consumer type
347       // set.
348       LLVM_DEBUG(dbgs() << ".. Combine G_TRUNC(G_TRUNC): " << MI);
349 
350       Builder.buildTrunc(DstReg, TruncSrc);
351       UpdatedDefs.push_back(DstReg);
352       markInstAndDefDead(MI, *MRI.getVRegDef(TruncSrc), DeadInsts);
353       return true;
354     }
355 
356     // trunc(ext x) -> x
357     ArtifactValueFinder Finder(MRI, Builder, LI);
358     if (Register FoundReg =
359             Finder.findValueFromDef(DstReg, 0, DstTy.getSizeInBits())) {
360       LLT FoundRegTy = MRI.getType(FoundReg);
361       if (DstTy == FoundRegTy) {
362         LLVM_DEBUG(dbgs() << ".. Combine G_TRUNC(G_[S,Z,ANY]EXT/G_TRUNC...): "
363                           << MI;);
364 
365         replaceRegOrBuildCopy(DstReg, FoundReg, MRI, Builder, UpdatedDefs,
366                               Observer);
367         UpdatedDefs.push_back(DstReg);
368         markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts);
369         return true;
370       }
371     }
372 
373     return false;
374   }
375 
376   /// Try to fold G_[ASZ]EXT (G_IMPLICIT_DEF).
tryFoldImplicitDef(MachineInstr & MI,SmallVectorImpl<MachineInstr * > & DeadInsts,SmallVectorImpl<Register> & UpdatedDefs)377   bool tryFoldImplicitDef(MachineInstr &MI,
378                           SmallVectorImpl<MachineInstr *> &DeadInsts,
379                           SmallVectorImpl<Register> &UpdatedDefs) {
380     unsigned Opcode = MI.getOpcode();
381     assert(Opcode == TargetOpcode::G_ANYEXT || Opcode == TargetOpcode::G_ZEXT ||
382            Opcode == TargetOpcode::G_SEXT);
383 
384     if (MachineInstr *DefMI = getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF,
385                                            MI.getOperand(1).getReg(), MRI)) {
386       Builder.setInstr(MI);
387       Register DstReg = MI.getOperand(0).getReg();
388       LLT DstTy = MRI.getType(DstReg);
389 
390       if (Opcode == TargetOpcode::G_ANYEXT) {
391         // G_ANYEXT (G_IMPLICIT_DEF) -> G_IMPLICIT_DEF
392         if (!isInstLegal({TargetOpcode::G_IMPLICIT_DEF, {DstTy}}))
393           return false;
394         LLVM_DEBUG(dbgs() << ".. Combine G_ANYEXT(G_IMPLICIT_DEF): " << MI;);
395         Builder.buildInstr(TargetOpcode::G_IMPLICIT_DEF, {DstReg}, {});
396         UpdatedDefs.push_back(DstReg);
397       } else {
398         // G_[SZ]EXT (G_IMPLICIT_DEF) -> G_CONSTANT 0 because the top
399         // bits will be 0 for G_ZEXT and 0/1 for the G_SEXT.
400         if (isConstantUnsupported(DstTy))
401           return false;
402         LLVM_DEBUG(dbgs() << ".. Combine G_[SZ]EXT(G_IMPLICIT_DEF): " << MI;);
403         Builder.buildConstant(DstReg, 0);
404         UpdatedDefs.push_back(DstReg);
405       }
406 
407       markInstAndDefDead(MI, *DefMI, DeadInsts);
408       return true;
409     }
410     return false;
411   }
412 
tryFoldUnmergeCast(MachineInstr & MI,MachineInstr & CastMI,SmallVectorImpl<MachineInstr * > & DeadInsts,SmallVectorImpl<Register> & UpdatedDefs)413   bool tryFoldUnmergeCast(MachineInstr &MI, MachineInstr &CastMI,
414                           SmallVectorImpl<MachineInstr *> &DeadInsts,
415                           SmallVectorImpl<Register> &UpdatedDefs) {
416 
417     assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
418 
419     const unsigned CastOpc = CastMI.getOpcode();
420 
421     if (!isArtifactCast(CastOpc))
422       return false;
423 
424     const unsigned NumDefs = MI.getNumOperands() - 1;
425 
426     const Register CastSrcReg = CastMI.getOperand(1).getReg();
427     const LLT CastSrcTy = MRI.getType(CastSrcReg);
428     const LLT DestTy = MRI.getType(MI.getOperand(0).getReg());
429     const LLT SrcTy = MRI.getType(MI.getOperand(NumDefs).getReg());
430 
431     const unsigned CastSrcSize = CastSrcTy.getSizeInBits();
432     const unsigned DestSize = DestTy.getSizeInBits();
433 
434     if (CastOpc == TargetOpcode::G_TRUNC) {
435       if (SrcTy.isVector() && SrcTy.getScalarType() == DestTy.getScalarType()) {
436         //  %1:_(<4 x s8>) = G_TRUNC %0(<4 x s32>)
437         //  %2:_(s8), %3:_(s8), %4:_(s8), %5:_(s8) = G_UNMERGE_VALUES %1
438         // =>
439         //  %6:_(s32), %7:_(s32), %8:_(s32), %9:_(s32) = G_UNMERGE_VALUES %0
440         //  %2:_(s8) = G_TRUNC %6
441         //  %3:_(s8) = G_TRUNC %7
442         //  %4:_(s8) = G_TRUNC %8
443         //  %5:_(s8) = G_TRUNC %9
444 
445         unsigned UnmergeNumElts =
446             DestTy.isVector() ? CastSrcTy.getNumElements() / NumDefs : 1;
447         LLT UnmergeTy = CastSrcTy.changeElementCount(
448             ElementCount::getFixed(UnmergeNumElts));
449         LLT SrcWideTy =
450             SrcTy.changeElementCount(ElementCount::getFixed(UnmergeNumElts));
451 
452         if (isInstUnsupported(
453                 {TargetOpcode::G_UNMERGE_VALUES, {UnmergeTy, CastSrcTy}}) ||
454             LI.getAction({TargetOpcode::G_TRUNC, {SrcWideTy, UnmergeTy}})
455                     .Action == LegalizeActions::MoreElements)
456           return false;
457 
458         Builder.setInstr(MI);
459         auto NewUnmerge = Builder.buildUnmerge(UnmergeTy, CastSrcReg);
460 
461         for (unsigned I = 0; I != NumDefs; ++I) {
462           Register DefReg = MI.getOperand(I).getReg();
463           UpdatedDefs.push_back(DefReg);
464           Builder.buildTrunc(DefReg, NewUnmerge.getReg(I));
465         }
466 
467         markInstAndDefDead(MI, CastMI, DeadInsts);
468         return true;
469       }
470 
471       if (CastSrcTy.isScalar() && SrcTy.isScalar() && !DestTy.isVector()) {
472         //  %1:_(s16) = G_TRUNC %0(s32)
473         //  %2:_(s8), %3:_(s8) = G_UNMERGE_VALUES %1
474         // =>
475         //  %2:_(s8), %3:_(s8), %4:_(s8), %5:_(s8) = G_UNMERGE_VALUES %0
476 
477         // Unmerge(trunc) can be combined if the trunc source size is a multiple
478         // of the unmerge destination size
479         if (CastSrcSize % DestSize != 0)
480           return false;
481 
482         // Check if the new unmerge is supported
483         if (isInstUnsupported(
484                 {TargetOpcode::G_UNMERGE_VALUES, {DestTy, CastSrcTy}}))
485           return false;
486 
487         // Gather the original destination registers and create new ones for the
488         // unused bits
489         const unsigned NewNumDefs = CastSrcSize / DestSize;
490         SmallVector<Register, 8> DstRegs(NewNumDefs);
491         for (unsigned Idx = 0; Idx < NewNumDefs; ++Idx) {
492           if (Idx < NumDefs)
493             DstRegs[Idx] = MI.getOperand(Idx).getReg();
494           else
495             DstRegs[Idx] = MRI.createGenericVirtualRegister(DestTy);
496         }
497 
498         // Build new unmerge
499         Builder.setInstr(MI);
500         Builder.buildUnmerge(DstRegs, CastSrcReg);
501         UpdatedDefs.append(DstRegs.begin(), DstRegs.begin() + NewNumDefs);
502         markInstAndDefDead(MI, CastMI, DeadInsts);
503         return true;
504       }
505     }
506 
507     // TODO: support combines with other casts as well
508     return false;
509   }
510 
canFoldMergeOpcode(unsigned MergeOp,unsigned ConvertOp,LLT OpTy,LLT DestTy)511   static bool canFoldMergeOpcode(unsigned MergeOp, unsigned ConvertOp,
512                                  LLT OpTy, LLT DestTy) {
513     // Check if we found a definition that is like G_MERGE_VALUES.
514     switch (MergeOp) {
515     default:
516       return false;
517     case TargetOpcode::G_BUILD_VECTOR:
518     case TargetOpcode::G_MERGE_VALUES:
519       // The convert operation that we will need to insert is
520       // going to convert the input of that type of instruction (scalar)
521       // to the destination type (DestTy).
522       // The conversion needs to stay in the same domain (scalar to scalar
523       // and vector to vector), so if we were to allow to fold the merge
524       // we would need to insert some bitcasts.
525       // E.g.,
526       // <2 x s16> = build_vector s16, s16
527       // <2 x s32> = zext <2 x s16>
528       // <2 x s16>, <2 x s16> = unmerge <2 x s32>
529       //
530       // As is the folding would produce:
531       // <2 x s16> = zext s16  <-- scalar to vector
532       // <2 x s16> = zext s16  <-- scalar to vector
533       // Which is invalid.
534       // Instead we would want to generate:
535       // s32 = zext s16
536       // <2 x s16> = bitcast s32
537       // s32 = zext s16
538       // <2 x s16> = bitcast s32
539       //
540       // That is not done yet.
541       if (ConvertOp == 0)
542         return true;
543       return !DestTy.isVector() && OpTy.isVector() &&
544              DestTy == OpTy.getElementType();
545     case TargetOpcode::G_CONCAT_VECTORS: {
546       if (ConvertOp == 0)
547         return true;
548       if (!DestTy.isVector())
549         return false;
550 
551       const unsigned OpEltSize = OpTy.getElementType().getSizeInBits();
552 
553       // Don't handle scalarization with a cast that isn't in the same
554       // direction as the vector cast. This could be handled, but it would
555       // require more intermediate unmerges.
556       if (ConvertOp == TargetOpcode::G_TRUNC)
557         return DestTy.getSizeInBits() <= OpEltSize;
558       return DestTy.getSizeInBits() >= OpEltSize;
559     }
560     }
561   }
562 
563   /// Try to replace DstReg with SrcReg or build a COPY instruction
564   /// depending on the register constraints.
replaceRegOrBuildCopy(Register DstReg,Register SrcReg,MachineRegisterInfo & MRI,MachineIRBuilder & Builder,SmallVectorImpl<Register> & UpdatedDefs,GISelChangeObserver & Observer)565   static void replaceRegOrBuildCopy(Register DstReg, Register SrcReg,
566                                     MachineRegisterInfo &MRI,
567                                     MachineIRBuilder &Builder,
568                                     SmallVectorImpl<Register> &UpdatedDefs,
569                                     GISelChangeObserver &Observer) {
570     if (!llvm::canReplaceReg(DstReg, SrcReg, MRI)) {
571       Builder.buildCopy(DstReg, SrcReg);
572       UpdatedDefs.push_back(DstReg);
573       return;
574     }
575     SmallVector<MachineInstr *, 4> UseMIs;
576     // Get the users and notify the observer before replacing.
577     for (auto &UseMI : MRI.use_instructions(DstReg)) {
578       UseMIs.push_back(&UseMI);
579       Observer.changingInstr(UseMI);
580     }
581     // Replace the registers.
582     MRI.replaceRegWith(DstReg, SrcReg);
583     UpdatedDefs.push_back(SrcReg);
584     // Notify the observer that we changed the instructions.
585     for (auto *UseMI : UseMIs)
586       Observer.changedInstr(*UseMI);
587   }
588 
589   /// Return the operand index in \p MI that defines \p Def
getDefIndex(const MachineInstr & MI,Register SearchDef)590   static unsigned getDefIndex(const MachineInstr &MI, Register SearchDef) {
591     unsigned DefIdx = 0;
592     for (const MachineOperand &Def : MI.defs()) {
593       if (Def.getReg() == SearchDef)
594         break;
595       ++DefIdx;
596     }
597 
598     return DefIdx;
599   }
600 
601   /// This class provides utilities for finding source registers of specific
602   /// bit ranges in an artifact. The routines can look through the source
603   /// registers if they're other artifacts to try to find a non-artifact source
604   /// of a value.
605   class ArtifactValueFinder {
606     MachineRegisterInfo &MRI;
607     MachineIRBuilder &MIB;
608     const LegalizerInfo &LI;
609 
610     // Stores the best register found in the current query so far.
611     Register CurrentBest = Register();
612 
613     /// Given an concat_vector op \p Concat and a start bit and size, try to
614     /// find the origin of the value defined by that start position and size.
615     ///
616     /// \returns a register with the requested size, or the current best
617     /// register found during the current query.
findValueFromConcat(GConcatVectors & Concat,unsigned StartBit,unsigned Size)618     Register findValueFromConcat(GConcatVectors &Concat, unsigned StartBit,
619                                  unsigned Size) {
620       assert(Size > 0);
621 
622       // Find the source operand that provides the bits requested.
623       Register Src1Reg = Concat.getSourceReg(0);
624       unsigned SrcSize = MRI.getType(Src1Reg).getSizeInBits();
625 
626       // Operand index of the source that provides the start of the bit range.
627       unsigned StartSrcIdx = (StartBit / SrcSize) + 1;
628       // Offset into the source at which the bit range starts.
629       unsigned InRegOffset = StartBit % SrcSize;
630       // Check that the bits don't span multiple sources.
631       // FIXME: we might be able return multiple sources? Or create an
632       // appropriate concat to make it fit.
633       if (InRegOffset + Size > SrcSize)
634         return CurrentBest;
635 
636       Register SrcReg = Concat.getReg(StartSrcIdx);
637       if (InRegOffset == 0 && Size == SrcSize) {
638         CurrentBest = SrcReg;
639         return findValueFromDefImpl(SrcReg, 0, Size);
640       }
641 
642       return findValueFromDefImpl(SrcReg, InRegOffset, Size);
643     }
644 
645     /// Given an build_vector op \p BV and a start bit and size, try to find
646     /// the origin of the value defined by that start position and size.
647     ///
648     /// \returns a register with the requested size, or the current best
649     /// register found during the current query.
findValueFromBuildVector(GBuildVector & BV,unsigned StartBit,unsigned Size)650     Register findValueFromBuildVector(GBuildVector &BV, unsigned StartBit,
651                                       unsigned Size) {
652       assert(Size > 0);
653 
654       // Find the source operand that provides the bits requested.
655       Register Src1Reg = BV.getSourceReg(0);
656       unsigned SrcSize = MRI.getType(Src1Reg).getSizeInBits();
657 
658       // Operand index of the source that provides the start of the bit range.
659       unsigned StartSrcIdx = (StartBit / SrcSize) + 1;
660       // Offset into the source at which the bit range starts.
661       unsigned InRegOffset = StartBit % SrcSize;
662 
663       if (InRegOffset != 0)
664         return CurrentBest; // Give up, bits don't start at a scalar source.
665       if (Size < SrcSize)
666         return CurrentBest; // Scalar source is too large for requested bits.
667 
668       // If the bits cover multiple sources evenly, then create a new
669       // build_vector to synthesize the required size, if that's been requested.
670       if (Size > SrcSize) {
671         if (Size % SrcSize > 0)
672           return CurrentBest; // Isn't covered exactly by sources.
673 
674         unsigned NumSrcsUsed = Size / SrcSize;
675         // If we're requesting all of the sources, just return this def.
676         if (NumSrcsUsed == BV.getNumSources())
677           return BV.getReg(0);
678 
679         LLT SrcTy = MRI.getType(Src1Reg);
680         LLT NewBVTy = LLT::fixed_vector(NumSrcsUsed, SrcTy);
681 
682         // Check if the resulting build vector would be legal.
683         LegalizeActionStep ActionStep =
684             LI.getAction({TargetOpcode::G_BUILD_VECTOR, {NewBVTy, SrcTy}});
685         if (ActionStep.Action != LegalizeActions::Legal)
686           return CurrentBest;
687 
688         SmallVector<Register> NewSrcs;
689         for (unsigned SrcIdx = StartSrcIdx; SrcIdx < StartSrcIdx + NumSrcsUsed;
690              ++SrcIdx)
691           NewSrcs.push_back(BV.getReg(SrcIdx));
692         MIB.setInstrAndDebugLoc(BV);
693         return MIB.buildBuildVector(NewBVTy, NewSrcs).getReg(0);
694       }
695       // A single source is requested, just return it.
696       return BV.getReg(StartSrcIdx);
697     }
698 
699     /// Given an G_INSERT op \p MI and a start bit and size, try to find
700     /// the origin of the value defined by that start position and size.
701     ///
702     /// \returns a register with the requested size, or the current best
703     /// register found during the current query.
findValueFromInsert(MachineInstr & MI,unsigned StartBit,unsigned Size)704     Register findValueFromInsert(MachineInstr &MI, unsigned StartBit,
705                                  unsigned Size) {
706       assert(MI.getOpcode() == TargetOpcode::G_INSERT);
707       assert(Size > 0);
708 
709       Register ContainerSrcReg = MI.getOperand(1).getReg();
710       Register InsertedReg = MI.getOperand(2).getReg();
711       LLT InsertedRegTy = MRI.getType(InsertedReg);
712       unsigned InsertOffset = MI.getOperand(3).getImm();
713 
714       // There are 4 possible container/insertreg + requested bit-range layouts
715       // that the instruction and query could be representing.
716       // For: %_ = G_INSERT %CONTAINER, %INS, InsOff (abbrev. to 'IO')
717       // and a start bit 'SB', with size S, giving an end bit 'EB', we could
718       // have...
719       // Scenario A:
720       //   --------------------------
721       //  |  INS    |  CONTAINER     |
722       //   --------------------------
723       //       |   |
724       //       SB  EB
725       //
726       // Scenario B:
727       //   --------------------------
728       //  |  INS    |  CONTAINER     |
729       //   --------------------------
730       //                |    |
731       //                SB   EB
732       //
733       // Scenario C:
734       //   --------------------------
735       //  |  CONTAINER    |  INS     |
736       //   --------------------------
737       //       |    |
738       //       SB   EB
739       //
740       // Scenario D:
741       //   --------------------------
742       //  |  CONTAINER    |  INS     |
743       //   --------------------------
744       //                     |   |
745       //                     SB  EB
746       //
747       // So therefore, A and D are requesting data from the INS operand, while
748       // B and C are requesting from the container operand.
749 
750       unsigned InsertedEndBit = InsertOffset + InsertedRegTy.getSizeInBits();
751       unsigned EndBit = StartBit + Size;
752       unsigned NewStartBit;
753       Register SrcRegToUse;
754       if (EndBit <= InsertOffset || InsertedEndBit <= StartBit) {
755         SrcRegToUse = ContainerSrcReg;
756         NewStartBit = StartBit;
757         return findValueFromDefImpl(SrcRegToUse, NewStartBit, Size);
758       }
759       if (InsertOffset <= StartBit && EndBit <= InsertedEndBit) {
760         SrcRegToUse = InsertedReg;
761         NewStartBit = StartBit - InsertOffset;
762         if (NewStartBit == 0 &&
763             Size == MRI.getType(SrcRegToUse).getSizeInBits())
764           CurrentBest = SrcRegToUse;
765         return findValueFromDefImpl(SrcRegToUse, NewStartBit, Size);
766       }
767       // The bit range spans both the inserted and container regions.
768       return Register();
769     }
770 
771     /// Given an G_SEXT, G_ZEXT, G_ANYEXT op \p MI and a start bit and
772     /// size, try to find the origin of the value defined by that start
773     /// position and size.
774     ///
775     /// \returns a register with the requested size, or the current best
776     /// register found during the current query.
findValueFromExt(MachineInstr & MI,unsigned StartBit,unsigned Size)777     Register findValueFromExt(MachineInstr &MI, unsigned StartBit,
778                               unsigned Size) {
779       assert(MI.getOpcode() == TargetOpcode::G_SEXT ||
780              MI.getOpcode() == TargetOpcode::G_ZEXT ||
781              MI.getOpcode() == TargetOpcode::G_ANYEXT);
782       assert(Size > 0);
783 
784       Register SrcReg = MI.getOperand(1).getReg();
785       LLT SrcType = MRI.getType(SrcReg);
786       unsigned SrcSize = SrcType.getSizeInBits();
787 
788       // Currently we don't go into vectors.
789       if (!SrcType.isScalar())
790         return CurrentBest;
791 
792       if (StartBit + Size > SrcSize)
793         return CurrentBest;
794 
795       if (StartBit == 0 && SrcType.getSizeInBits() == Size)
796         CurrentBest = SrcReg;
797       return findValueFromDefImpl(SrcReg, StartBit, Size);
798     }
799 
800     /// Given an G_TRUNC op \p MI and a start bit and size, try to find
801     /// the origin of the value defined by that start position and size.
802     ///
803     /// \returns a register with the requested size, or the current best
804     /// register found during the current query.
findValueFromTrunc(MachineInstr & MI,unsigned StartBit,unsigned Size)805     Register findValueFromTrunc(MachineInstr &MI, unsigned StartBit,
806                                 unsigned Size) {
807       assert(MI.getOpcode() == TargetOpcode::G_TRUNC);
808       assert(Size > 0);
809 
810       Register SrcReg = MI.getOperand(1).getReg();
811       LLT SrcType = MRI.getType(SrcReg);
812 
813       // Currently we don't go into vectors.
814       if (!SrcType.isScalar())
815         return CurrentBest;
816 
817       return findValueFromDefImpl(SrcReg, StartBit, Size);
818     }
819 
820     /// Internal implementation for findValueFromDef(). findValueFromDef()
821     /// initializes some data like the CurrentBest register, which this method
822     /// and its callees rely upon.
findValueFromDefImpl(Register DefReg,unsigned StartBit,unsigned Size)823     Register findValueFromDefImpl(Register DefReg, unsigned StartBit,
824                                   unsigned Size) {
825       std::optional<DefinitionAndSourceRegister> DefSrcReg =
826           getDefSrcRegIgnoringCopies(DefReg, MRI);
827       MachineInstr *Def = DefSrcReg->MI;
828       DefReg = DefSrcReg->Reg;
829       // If the instruction has a single def, then simply delegate the search.
830       // For unmerge however with multiple defs, we need to compute the offset
831       // into the source of the unmerge.
832       switch (Def->getOpcode()) {
833       case TargetOpcode::G_CONCAT_VECTORS:
834         return findValueFromConcat(cast<GConcatVectors>(*Def), StartBit, Size);
835       case TargetOpcode::G_UNMERGE_VALUES: {
836         unsigned DefStartBit = 0;
837         unsigned DefSize = MRI.getType(DefReg).getSizeInBits();
838         for (const auto &MO : Def->defs()) {
839           if (MO.getReg() == DefReg)
840             break;
841           DefStartBit += DefSize;
842         }
843         Register SrcReg = Def->getOperand(Def->getNumOperands() - 1).getReg();
844         Register SrcOriginReg =
845             findValueFromDefImpl(SrcReg, StartBit + DefStartBit, Size);
846         if (SrcOriginReg)
847           return SrcOriginReg;
848         // Failed to find a further value. If the StartBit and Size perfectly
849         // covered the requested DefReg, return that since it's better than
850         // nothing.
851         if (StartBit == 0 && Size == DefSize)
852           return DefReg;
853         return CurrentBest;
854       }
855       case TargetOpcode::G_BUILD_VECTOR:
856         return findValueFromBuildVector(cast<GBuildVector>(*Def), StartBit,
857                                         Size);
858       case TargetOpcode::G_INSERT:
859         return findValueFromInsert(*Def, StartBit, Size);
860       case TargetOpcode::G_TRUNC:
861         return findValueFromTrunc(*Def, StartBit, Size);
862       case TargetOpcode::G_SEXT:
863       case TargetOpcode::G_ZEXT:
864       case TargetOpcode::G_ANYEXT:
865         return findValueFromExt(*Def, StartBit, Size);
866       default:
867         return CurrentBest;
868       }
869     }
870 
871   public:
ArtifactValueFinder(MachineRegisterInfo & Mri,MachineIRBuilder & Builder,const LegalizerInfo & Info)872     ArtifactValueFinder(MachineRegisterInfo &Mri, MachineIRBuilder &Builder,
873                         const LegalizerInfo &Info)
874         : MRI(Mri), MIB(Builder), LI(Info) {}
875 
876     /// Try to find a source of the value defined in the def \p DefReg, starting
877     /// at position \p StartBit with size \p Size.
878     /// \returns a register with the requested size, or an empty Register if no
879     /// better value could be found.
findValueFromDef(Register DefReg,unsigned StartBit,unsigned Size)880     Register findValueFromDef(Register DefReg, unsigned StartBit,
881                               unsigned Size) {
882       CurrentBest = Register();
883       Register FoundReg = findValueFromDefImpl(DefReg, StartBit, Size);
884       return FoundReg != DefReg ? FoundReg : Register();
885     }
886 
887     /// Try to combine the defs of an unmerge \p MI by attempting to find
888     /// values that provides the bits for each def reg.
889     /// \returns true if all the defs of the unmerge have been made dead.
tryCombineUnmergeDefs(GUnmerge & MI,GISelChangeObserver & Observer,SmallVectorImpl<Register> & UpdatedDefs)890     bool tryCombineUnmergeDefs(GUnmerge &MI, GISelChangeObserver &Observer,
891                                SmallVectorImpl<Register> &UpdatedDefs) {
892       unsigned NumDefs = MI.getNumDefs();
893       LLT DestTy = MRI.getType(MI.getReg(0));
894 
895       SmallBitVector DeadDefs(NumDefs);
896       for (unsigned DefIdx = 0; DefIdx < NumDefs; ++DefIdx) {
897         Register DefReg = MI.getReg(DefIdx);
898         if (MRI.use_nodbg_empty(DefReg)) {
899           DeadDefs[DefIdx] = true;
900           continue;
901         }
902         Register FoundVal = findValueFromDef(DefReg, 0, DestTy.getSizeInBits());
903         if (!FoundVal)
904           continue;
905         if (MRI.getType(FoundVal) != DestTy)
906           continue;
907 
908         replaceRegOrBuildCopy(DefReg, FoundVal, MRI, MIB, UpdatedDefs,
909                               Observer);
910         // We only want to replace the uses, not the def of the old reg.
911         Observer.changingInstr(MI);
912         MI.getOperand(DefIdx).setReg(DefReg);
913         Observer.changedInstr(MI);
914         DeadDefs[DefIdx] = true;
915       }
916       return DeadDefs.all();
917     }
918 
findUnmergeThatDefinesReg(Register Reg,unsigned Size,unsigned & DefOperandIdx)919     GUnmerge *findUnmergeThatDefinesReg(Register Reg, unsigned Size,
920                                         unsigned &DefOperandIdx) {
921       if (Register Def = findValueFromDefImpl(Reg, 0, Size)) {
922         if (auto *Unmerge = dyn_cast<GUnmerge>(MRI.getVRegDef(Def))) {
923           DefOperandIdx =
924               Unmerge->findRegisterDefOperandIdx(Def, /*TRI=*/nullptr);
925           return Unmerge;
926         }
927       }
928       return nullptr;
929     }
930 
931     // Check if sequence of elements from merge-like instruction is defined by
932     // another sequence of elements defined by unmerge. Most often this is the
933     // same sequence. Search for elements using findValueFromDefImpl.
isSequenceFromUnmerge(GMergeLikeInstr & MI,unsigned MergeStartIdx,GUnmerge * Unmerge,unsigned UnmergeIdxStart,unsigned NumElts,unsigned EltSize,bool AllowUndef)934     bool isSequenceFromUnmerge(GMergeLikeInstr &MI, unsigned MergeStartIdx,
935                                GUnmerge *Unmerge, unsigned UnmergeIdxStart,
936                                unsigned NumElts, unsigned EltSize,
937                                bool AllowUndef) {
938       assert(MergeStartIdx + NumElts <= MI.getNumSources());
939       for (unsigned i = MergeStartIdx; i < MergeStartIdx + NumElts; ++i) {
940         unsigned EltUnmergeIdx;
941         GUnmerge *EltUnmerge = findUnmergeThatDefinesReg(
942             MI.getSourceReg(i), EltSize, EltUnmergeIdx);
943         // Check if source i comes from the same Unmerge.
944         if (EltUnmerge == Unmerge) {
945           // Check that source i's def has same index in sequence in Unmerge.
946           if (i - MergeStartIdx != EltUnmergeIdx - UnmergeIdxStart)
947             return false;
948         } else if (!AllowUndef ||
949                    MRI.getVRegDef(MI.getSourceReg(i))->getOpcode() !=
950                        TargetOpcode::G_IMPLICIT_DEF)
951           return false;
952       }
953       return true;
954     }
955 
tryCombineMergeLike(GMergeLikeInstr & MI,SmallVectorImpl<MachineInstr * > & DeadInsts,SmallVectorImpl<Register> & UpdatedDefs,GISelChangeObserver & Observer)956     bool tryCombineMergeLike(GMergeLikeInstr &MI,
957                              SmallVectorImpl<MachineInstr *> &DeadInsts,
958                              SmallVectorImpl<Register> &UpdatedDefs,
959                              GISelChangeObserver &Observer) {
960       Register Elt0 = MI.getSourceReg(0);
961       LLT EltTy = MRI.getType(Elt0);
962       unsigned EltSize = EltTy.getSizeInBits();
963 
964       unsigned Elt0UnmergeIdx;
965       // Search for unmerge that will be candidate for combine.
966       auto *Unmerge = findUnmergeThatDefinesReg(Elt0, EltSize, Elt0UnmergeIdx);
967       if (!Unmerge)
968         return false;
969 
970       unsigned NumMIElts = MI.getNumSources();
971       Register Dst = MI.getReg(0);
972       LLT DstTy = MRI.getType(Dst);
973       Register UnmergeSrc = Unmerge->getSourceReg();
974       LLT UnmergeSrcTy = MRI.getType(UnmergeSrc);
975 
976       // Recognize copy of UnmergeSrc to Dst.
977       // Unmerge UnmergeSrc and reassemble it using merge-like opcode into Dst.
978       //
979       // %0:_(EltTy), %1, ... = G_UNMERGE_VALUES %UnmergeSrc:_(Ty)
980       // %Dst:_(Ty) = G_merge_like_opcode %0:_(EltTy), %1, ...
981       //
982       // %Dst:_(Ty) = COPY %UnmergeSrc:_(Ty)
983       if ((DstTy == UnmergeSrcTy) && (Elt0UnmergeIdx == 0)) {
984         if (!isSequenceFromUnmerge(MI, 0, Unmerge, 0, NumMIElts, EltSize,
985                                    /*AllowUndef=*/DstTy.isVector()))
986           return false;
987 
988         replaceRegOrBuildCopy(Dst, UnmergeSrc, MRI, MIB, UpdatedDefs, Observer);
989         DeadInsts.push_back(&MI);
990         return true;
991       }
992 
993       // Recognize UnmergeSrc that can be unmerged to DstTy directly.
994       // Types have to be either both vector or both non-vector types.
995       // Merge-like opcodes are combined one at the time. First one creates new
996       // unmerge, following should use the same unmerge (builder performs CSE).
997       //
998       // %0:_(EltTy), %1, %2, %3 = G_UNMERGE_VALUES %UnmergeSrc:_(UnmergeSrcTy)
999       // %Dst:_(DstTy) = G_merge_like_opcode %0:_(EltTy), %1
1000       // %AnotherDst:_(DstTy) = G_merge_like_opcode %2:_(EltTy), %3
1001       //
1002       // %Dst:_(DstTy), %AnotherDst = G_UNMERGE_VALUES %UnmergeSrc
1003       if ((DstTy.isVector() == UnmergeSrcTy.isVector()) &&
1004           (Elt0UnmergeIdx % NumMIElts == 0) &&
1005           getCoverTy(UnmergeSrcTy, DstTy) == UnmergeSrcTy) {
1006         if (!isSequenceFromUnmerge(MI, 0, Unmerge, Elt0UnmergeIdx, NumMIElts,
1007                                    EltSize, false))
1008           return false;
1009         MIB.setInstrAndDebugLoc(MI);
1010         auto NewUnmerge = MIB.buildUnmerge(DstTy, Unmerge->getSourceReg());
1011         unsigned DstIdx = (Elt0UnmergeIdx * EltSize) / DstTy.getSizeInBits();
1012         replaceRegOrBuildCopy(Dst, NewUnmerge.getReg(DstIdx), MRI, MIB,
1013                               UpdatedDefs, Observer);
1014         DeadInsts.push_back(&MI);
1015         return true;
1016       }
1017 
1018       // Recognize when multiple unmerged sources with UnmergeSrcTy type
1019       // can be merged into Dst with DstTy type directly.
1020       // Types have to be either both vector or both non-vector types.
1021 
1022       // %0:_(EltTy), %1 = G_UNMERGE_VALUES %UnmergeSrc:_(UnmergeSrcTy)
1023       // %2:_(EltTy), %3 = G_UNMERGE_VALUES %AnotherUnmergeSrc:_(UnmergeSrcTy)
1024       // %Dst:_(DstTy) = G_merge_like_opcode %0:_(EltTy), %1, %2, %3
1025       //
1026       // %Dst:_(DstTy) = G_merge_like_opcode %UnmergeSrc, %AnotherUnmergeSrc
1027 
1028       if ((DstTy.isVector() == UnmergeSrcTy.isVector()) &&
1029           getCoverTy(DstTy, UnmergeSrcTy) == DstTy) {
1030         SmallVector<Register, 4> ConcatSources;
1031         unsigned NumElts = Unmerge->getNumDefs();
1032         for (unsigned i = 0; i < MI.getNumSources(); i += NumElts) {
1033           unsigned EltUnmergeIdx;
1034           auto *UnmergeI = findUnmergeThatDefinesReg(MI.getSourceReg(i),
1035                                                      EltSize, EltUnmergeIdx);
1036           // All unmerges have to be the same size.
1037           if ((!UnmergeI) || (UnmergeI->getNumDefs() != NumElts) ||
1038               (EltUnmergeIdx != 0))
1039             return false;
1040           if (!isSequenceFromUnmerge(MI, i, UnmergeI, 0, NumElts, EltSize,
1041                                      false))
1042             return false;
1043           ConcatSources.push_back(UnmergeI->getSourceReg());
1044         }
1045 
1046         MIB.setInstrAndDebugLoc(MI);
1047         MIB.buildMergeLikeInstr(Dst, ConcatSources);
1048         DeadInsts.push_back(&MI);
1049         return true;
1050       }
1051 
1052       return false;
1053     }
1054   };
1055 
tryCombineUnmergeValues(GUnmerge & MI,SmallVectorImpl<MachineInstr * > & DeadInsts,SmallVectorImpl<Register> & UpdatedDefs,GISelChangeObserver & Observer)1056   bool tryCombineUnmergeValues(GUnmerge &MI,
1057                                SmallVectorImpl<MachineInstr *> &DeadInsts,
1058                                SmallVectorImpl<Register> &UpdatedDefs,
1059                                GISelChangeObserver &Observer) {
1060     unsigned NumDefs = MI.getNumDefs();
1061     Register SrcReg = MI.getSourceReg();
1062     MachineInstr *SrcDef = getDefIgnoringCopies(SrcReg, MRI);
1063     if (!SrcDef)
1064       return false;
1065 
1066     LLT OpTy = MRI.getType(SrcReg);
1067     LLT DestTy = MRI.getType(MI.getReg(0));
1068     unsigned SrcDefIdx = getDefIndex(*SrcDef, SrcReg);
1069 
1070     Builder.setInstrAndDebugLoc(MI);
1071 
1072     ArtifactValueFinder Finder(MRI, Builder, LI);
1073     if (Finder.tryCombineUnmergeDefs(MI, Observer, UpdatedDefs)) {
1074       markInstAndDefDead(MI, *SrcDef, DeadInsts, SrcDefIdx);
1075       return true;
1076     }
1077 
1078     if (auto *SrcUnmerge = dyn_cast<GUnmerge>(SrcDef)) {
1079       // %0:_(<4 x s16>) = G_FOO
1080       // %1:_(<2 x s16>), %2:_(<2 x s16>) = G_UNMERGE_VALUES %0
1081       // %3:_(s16), %4:_(s16) = G_UNMERGE_VALUES %1
1082       //
1083       // %3:_(s16), %4:_(s16), %5:_(s16), %6:_(s16) = G_UNMERGE_VALUES %0
1084       Register SrcUnmergeSrc = SrcUnmerge->getSourceReg();
1085       LLT SrcUnmergeSrcTy = MRI.getType(SrcUnmergeSrc);
1086 
1087       // If we need to decrease the number of vector elements in the result type
1088       // of an unmerge, this would involve the creation of an equivalent unmerge
1089       // to copy back to the original result registers.
1090       LegalizeActionStep ActionStep = LI.getAction(
1091           {TargetOpcode::G_UNMERGE_VALUES, {OpTy, SrcUnmergeSrcTy}});
1092       switch (ActionStep.Action) {
1093       case LegalizeActions::Lower:
1094       case LegalizeActions::Unsupported:
1095         break;
1096       case LegalizeActions::FewerElements:
1097       case LegalizeActions::NarrowScalar:
1098         if (ActionStep.TypeIdx == 1)
1099           return false;
1100         break;
1101       default:
1102         return false;
1103       }
1104 
1105       auto NewUnmerge = Builder.buildUnmerge(DestTy, SrcUnmergeSrc);
1106 
1107       // TODO: Should we try to process out the other defs now? If the other
1108       // defs of the source unmerge are also unmerged, we end up with a separate
1109       // unmerge for each one.
1110       for (unsigned I = 0; I != NumDefs; ++I) {
1111         Register Def = MI.getReg(I);
1112         replaceRegOrBuildCopy(Def, NewUnmerge.getReg(SrcDefIdx * NumDefs + I),
1113                               MRI, Builder, UpdatedDefs, Observer);
1114       }
1115 
1116       markInstAndDefDead(MI, *SrcUnmerge, DeadInsts, SrcDefIdx);
1117       return true;
1118     }
1119 
1120     MachineInstr *MergeI = SrcDef;
1121     unsigned ConvertOp = 0;
1122 
1123     // Handle intermediate conversions
1124     unsigned SrcOp = SrcDef->getOpcode();
1125     if (isArtifactCast(SrcOp)) {
1126       ConvertOp = SrcOp;
1127       MergeI = getDefIgnoringCopies(SrcDef->getOperand(1).getReg(), MRI);
1128     }
1129 
1130     if (!MergeI || !canFoldMergeOpcode(MergeI->getOpcode(),
1131                                        ConvertOp, OpTy, DestTy)) {
1132       // We might have a chance to combine later by trying to combine
1133       // unmerge(cast) first
1134       return tryFoldUnmergeCast(MI, *SrcDef, DeadInsts, UpdatedDefs);
1135     }
1136 
1137     const unsigned NumMergeRegs = MergeI->getNumOperands() - 1;
1138 
1139     if (NumMergeRegs < NumDefs) {
1140       if (NumDefs % NumMergeRegs != 0)
1141         return false;
1142 
1143       Builder.setInstr(MI);
1144       // Transform to UNMERGEs, for example
1145       //   %1 = G_MERGE_VALUES %4, %5
1146       //   %9, %10, %11, %12 = G_UNMERGE_VALUES %1
1147       // to
1148       //   %9, %10 = G_UNMERGE_VALUES %4
1149       //   %11, %12 = G_UNMERGE_VALUES %5
1150 
1151       const unsigned NewNumDefs = NumDefs / NumMergeRegs;
1152       for (unsigned Idx = 0; Idx < NumMergeRegs; ++Idx) {
1153         SmallVector<Register, 8> DstRegs;
1154         for (unsigned j = 0, DefIdx = Idx * NewNumDefs; j < NewNumDefs;
1155              ++j, ++DefIdx)
1156           DstRegs.push_back(MI.getReg(DefIdx));
1157 
1158         if (ConvertOp) {
1159           LLT MergeDstTy = MRI.getType(SrcDef->getOperand(0).getReg());
1160 
1161           // This is a vector that is being split and casted. Extract to the
1162           // element type, and do the conversion on the scalars (or smaller
1163           // vectors).
1164           LLT MergeEltTy = MergeDstTy.divide(NumMergeRegs);
1165 
1166           // Handle split to smaller vectors, with conversions.
1167           // %2(<8 x s8>) = G_CONCAT_VECTORS %0(<4 x s8>), %1(<4 x s8>)
1168           // %3(<8 x s16>) = G_SEXT %2
1169           // %4(<2 x s16>), %5(<2 x s16>), %6(<2 x s16>), %7(<2 x s16>) =
1170           // G_UNMERGE_VALUES %3
1171           //
1172           // =>
1173           //
1174           // %8(<4 x s16>) = G_SEXT %0
1175           // %9(<4 x s16>) = G_SEXT %1
1176           // %4(<2 x s16>), %5(<2 x s16>) = G_UNMERGE_VALUES %8
1177           // %7(<2 x s16>), %7(<2 x s16>) = G_UNMERGE_VALUES %9
1178 
1179           Register TmpReg = MRI.createGenericVirtualRegister(MergeEltTy);
1180           Builder.buildInstr(ConvertOp, {TmpReg},
1181                              {MergeI->getOperand(Idx + 1).getReg()});
1182           Builder.buildUnmerge(DstRegs, TmpReg);
1183         } else {
1184           Builder.buildUnmerge(DstRegs, MergeI->getOperand(Idx + 1).getReg());
1185         }
1186         UpdatedDefs.append(DstRegs.begin(), DstRegs.end());
1187       }
1188 
1189     } else if (NumMergeRegs > NumDefs) {
1190       if (ConvertOp != 0 || NumMergeRegs % NumDefs != 0)
1191         return false;
1192 
1193       Builder.setInstr(MI);
1194       // Transform to MERGEs
1195       //   %6 = G_MERGE_VALUES %17, %18, %19, %20
1196       //   %7, %8 = G_UNMERGE_VALUES %6
1197       // to
1198       //   %7 = G_MERGE_VALUES %17, %18
1199       //   %8 = G_MERGE_VALUES %19, %20
1200 
1201       const unsigned NumRegs = NumMergeRegs / NumDefs;
1202       for (unsigned DefIdx = 0; DefIdx < NumDefs; ++DefIdx) {
1203         SmallVector<Register, 8> Regs;
1204         for (unsigned j = 0, Idx = NumRegs * DefIdx + 1; j < NumRegs;
1205              ++j, ++Idx)
1206           Regs.push_back(MergeI->getOperand(Idx).getReg());
1207 
1208         Register DefReg = MI.getReg(DefIdx);
1209         Builder.buildMergeLikeInstr(DefReg, Regs);
1210         UpdatedDefs.push_back(DefReg);
1211       }
1212 
1213     } else {
1214       LLT MergeSrcTy = MRI.getType(MergeI->getOperand(1).getReg());
1215 
1216       if (!ConvertOp && DestTy != MergeSrcTy)
1217         ConvertOp = TargetOpcode::G_BITCAST;
1218 
1219       if (ConvertOp) {
1220         Builder.setInstr(MI);
1221 
1222         for (unsigned Idx = 0; Idx < NumDefs; ++Idx) {
1223           Register DefReg = MI.getOperand(Idx).getReg();
1224           Register MergeSrc = MergeI->getOperand(Idx + 1).getReg();
1225 
1226           if (!MRI.use_empty(DefReg)) {
1227             Builder.buildInstr(ConvertOp, {DefReg}, {MergeSrc});
1228             UpdatedDefs.push_back(DefReg);
1229           }
1230         }
1231 
1232         markInstAndDefDead(MI, *MergeI, DeadInsts);
1233         return true;
1234       }
1235 
1236       assert(DestTy == MergeSrcTy &&
1237              "Bitcast and the other kinds of conversions should "
1238              "have happened earlier");
1239 
1240       Builder.setInstr(MI);
1241       for (unsigned Idx = 0; Idx < NumDefs; ++Idx) {
1242         Register DstReg = MI.getOperand(Idx).getReg();
1243         Register SrcReg = MergeI->getOperand(Idx + 1).getReg();
1244         replaceRegOrBuildCopy(DstReg, SrcReg, MRI, Builder, UpdatedDefs,
1245                               Observer);
1246       }
1247     }
1248 
1249     markInstAndDefDead(MI, *MergeI, DeadInsts);
1250     return true;
1251   }
1252 
tryCombineExtract(MachineInstr & MI,SmallVectorImpl<MachineInstr * > & DeadInsts,SmallVectorImpl<Register> & UpdatedDefs)1253   bool tryCombineExtract(MachineInstr &MI,
1254                          SmallVectorImpl<MachineInstr *> &DeadInsts,
1255                          SmallVectorImpl<Register> &UpdatedDefs) {
1256     assert(MI.getOpcode() == TargetOpcode::G_EXTRACT);
1257 
1258     // Try to use the source registers from a G_MERGE_VALUES
1259     //
1260     // %2 = G_MERGE_VALUES %0, %1
1261     // %3 = G_EXTRACT %2, N
1262     // =>
1263     //
1264     // for N < %2.getSizeInBits() / 2
1265     //     %3 = G_EXTRACT %0, N
1266     //
1267     // for N >= %2.getSizeInBits() / 2
1268     //    %3 = G_EXTRACT %1, (N - %0.getSizeInBits()
1269 
1270     Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg());
1271     MachineInstr *MergeI = MRI.getVRegDef(SrcReg);
1272     if (!MergeI || !isa<GMergeLikeInstr>(MergeI))
1273       return false;
1274 
1275     Register DstReg = MI.getOperand(0).getReg();
1276     LLT DstTy = MRI.getType(DstReg);
1277     LLT SrcTy = MRI.getType(SrcReg);
1278 
1279     // TODO: Do we need to check if the resulting extract is supported?
1280     unsigned ExtractDstSize = DstTy.getSizeInBits();
1281     unsigned Offset = MI.getOperand(2).getImm();
1282     unsigned NumMergeSrcs = MergeI->getNumOperands() - 1;
1283     unsigned MergeSrcSize = SrcTy.getSizeInBits() / NumMergeSrcs;
1284     unsigned MergeSrcIdx = Offset / MergeSrcSize;
1285 
1286     // Compute the offset of the last bit the extract needs.
1287     unsigned EndMergeSrcIdx = (Offset + ExtractDstSize - 1) / MergeSrcSize;
1288 
1289     // Can't handle the case where the extract spans multiple inputs.
1290     if (MergeSrcIdx != EndMergeSrcIdx)
1291       return false;
1292 
1293     // TODO: We could modify MI in place in most cases.
1294     Builder.setInstr(MI);
1295     Builder.buildExtract(DstReg, MergeI->getOperand(MergeSrcIdx + 1).getReg(),
1296                          Offset - MergeSrcIdx * MergeSrcSize);
1297     UpdatedDefs.push_back(DstReg);
1298     markInstAndDefDead(MI, *MergeI, DeadInsts);
1299     return true;
1300   }
1301 
1302   /// Try to combine away MI.
1303   /// Returns true if it combined away the MI.
1304   /// Adds instructions that are dead as a result of the combine
1305   /// into DeadInsts, which can include MI.
tryCombineInstruction(MachineInstr & MI,SmallVectorImpl<MachineInstr * > & DeadInsts,GISelObserverWrapper & WrapperObserver)1306   bool tryCombineInstruction(MachineInstr &MI,
1307                              SmallVectorImpl<MachineInstr *> &DeadInsts,
1308                              GISelObserverWrapper &WrapperObserver) {
1309     ArtifactValueFinder Finder(MRI, Builder, LI);
1310 
1311     // This might be a recursive call, and we might have DeadInsts already
1312     // populated. To avoid bad things happening later with multiple vreg defs
1313     // etc, process the dead instructions now if any.
1314     if (!DeadInsts.empty())
1315       deleteMarkedDeadInsts(DeadInsts, WrapperObserver);
1316 
1317     // Put here every vreg that was redefined in such a way that it's at least
1318     // possible that one (or more) of its users (immediate or COPY-separated)
1319     // could become artifact combinable with the new definition (or the
1320     // instruction reachable from it through a chain of copies if any).
1321     SmallVector<Register, 4> UpdatedDefs;
1322     bool Changed = false;
1323     switch (MI.getOpcode()) {
1324     default:
1325       return false;
1326     case TargetOpcode::G_ANYEXT:
1327       Changed = tryCombineAnyExt(MI, DeadInsts, UpdatedDefs, WrapperObserver);
1328       break;
1329     case TargetOpcode::G_ZEXT:
1330       Changed = tryCombineZExt(MI, DeadInsts, UpdatedDefs, WrapperObserver);
1331       break;
1332     case TargetOpcode::G_SEXT:
1333       Changed = tryCombineSExt(MI, DeadInsts, UpdatedDefs, WrapperObserver);
1334       break;
1335     case TargetOpcode::G_UNMERGE_VALUES:
1336       Changed = tryCombineUnmergeValues(cast<GUnmerge>(MI), DeadInsts,
1337                                         UpdatedDefs, WrapperObserver);
1338       break;
1339     case TargetOpcode::G_MERGE_VALUES:
1340     case TargetOpcode::G_BUILD_VECTOR:
1341     case TargetOpcode::G_CONCAT_VECTORS:
1342       // If any of the users of this merge are an unmerge, then add them to the
1343       // artifact worklist in case there's folding that can be done looking up.
1344       for (MachineInstr &U : MRI.use_instructions(MI.getOperand(0).getReg())) {
1345         if (U.getOpcode() == TargetOpcode::G_UNMERGE_VALUES ||
1346             U.getOpcode() == TargetOpcode::G_TRUNC) {
1347           UpdatedDefs.push_back(MI.getOperand(0).getReg());
1348           break;
1349         }
1350       }
1351       Changed = Finder.tryCombineMergeLike(cast<GMergeLikeInstr>(MI), DeadInsts,
1352                                            UpdatedDefs, WrapperObserver);
1353       break;
1354     case TargetOpcode::G_EXTRACT:
1355       Changed = tryCombineExtract(MI, DeadInsts, UpdatedDefs);
1356       break;
1357     case TargetOpcode::G_TRUNC:
1358       Changed = tryCombineTrunc(MI, DeadInsts, UpdatedDefs, WrapperObserver);
1359       if (!Changed) {
1360         // Try to combine truncates away even if they are legal. As all artifact
1361         // combines at the moment look only "up" the def-use chains, we achieve
1362         // that by throwing truncates' users (with look through copies) into the
1363         // ArtifactList again.
1364         UpdatedDefs.push_back(MI.getOperand(0).getReg());
1365       }
1366       break;
1367     }
1368     // If the main loop through the ArtifactList found at least one combinable
1369     // pair of artifacts, not only combine it away (as done above), but also
1370     // follow the def-use chain from there to combine everything that can be
1371     // combined within this def-use chain of artifacts.
1372     while (!UpdatedDefs.empty()) {
1373       Register NewDef = UpdatedDefs.pop_back_val();
1374       assert(NewDef.isVirtual() && "Unexpected redefinition of a physreg");
1375       for (MachineInstr &Use : MRI.use_instructions(NewDef)) {
1376         switch (Use.getOpcode()) {
1377         // Keep this list in sync with the list of all artifact combines.
1378         case TargetOpcode::G_ANYEXT:
1379         case TargetOpcode::G_ZEXT:
1380         case TargetOpcode::G_SEXT:
1381         case TargetOpcode::G_UNMERGE_VALUES:
1382         case TargetOpcode::G_EXTRACT:
1383         case TargetOpcode::G_TRUNC:
1384         case TargetOpcode::G_BUILD_VECTOR:
1385           // Adding Use to ArtifactList.
1386           WrapperObserver.changedInstr(Use);
1387           break;
1388         case TargetOpcode::G_ASSERT_SEXT:
1389         case TargetOpcode::G_ASSERT_ZEXT:
1390         case TargetOpcode::G_ASSERT_ALIGN:
1391         case TargetOpcode::COPY: {
1392           Register Copy = Use.getOperand(0).getReg();
1393           if (Copy.isVirtual())
1394             UpdatedDefs.push_back(Copy);
1395           break;
1396         }
1397         default:
1398           // If we do not have an artifact combine for the opcode, there is no
1399           // point in adding it to the ArtifactList as nothing interesting will
1400           // be done to it anyway.
1401           break;
1402         }
1403       }
1404     }
1405     return Changed;
1406   }
1407 
1408 private:
getArtifactSrcReg(const MachineInstr & MI)1409   static Register getArtifactSrcReg(const MachineInstr &MI) {
1410     switch (MI.getOpcode()) {
1411     case TargetOpcode::COPY:
1412     case TargetOpcode::G_TRUNC:
1413     case TargetOpcode::G_ZEXT:
1414     case TargetOpcode::G_ANYEXT:
1415     case TargetOpcode::G_SEXT:
1416     case TargetOpcode::G_EXTRACT:
1417     case TargetOpcode::G_ASSERT_SEXT:
1418     case TargetOpcode::G_ASSERT_ZEXT:
1419     case TargetOpcode::G_ASSERT_ALIGN:
1420       return MI.getOperand(1).getReg();
1421     case TargetOpcode::G_UNMERGE_VALUES:
1422       return MI.getOperand(MI.getNumOperands() - 1).getReg();
1423     default:
1424       llvm_unreachable("Not a legalization artifact happen");
1425     }
1426   }
1427 
1428   /// Mark a def of one of MI's original operands, DefMI, as dead if changing MI
1429   /// (either by killing it or changing operands) results in DefMI being dead
1430   /// too. In-between COPYs or artifact-casts are also collected if they are
1431   /// dead.
1432   /// MI is not marked dead.
1433   void markDefDead(MachineInstr &MI, MachineInstr &DefMI,
1434                    SmallVectorImpl<MachineInstr *> &DeadInsts,
1435                    unsigned DefIdx = 0) {
1436     // Collect all the copy instructions that are made dead, due to deleting
1437     // this instruction. Collect all of them until the Trunc(DefMI).
1438     // Eg,
1439     // %1(s1) = G_TRUNC %0(s32)
1440     // %2(s1) = COPY %1(s1)
1441     // %3(s1) = COPY %2(s1)
1442     // %4(s32) = G_ANYEXT %3(s1)
1443     // In this case, we would have replaced %4 with a copy of %0,
1444     // and as a result, %3, %2, %1 are dead.
1445     MachineInstr *PrevMI = &MI;
1446     while (PrevMI != &DefMI) {
1447       Register PrevRegSrc = getArtifactSrcReg(*PrevMI);
1448 
1449       MachineInstr *TmpDef = MRI.getVRegDef(PrevRegSrc);
1450       if (MRI.hasOneUse(PrevRegSrc)) {
1451         if (TmpDef != &DefMI) {
1452           assert((TmpDef->getOpcode() == TargetOpcode::COPY ||
1453                   isArtifactCast(TmpDef->getOpcode()) ||
1454                   isPreISelGenericOptimizationHint(TmpDef->getOpcode())) &&
1455                  "Expecting copy or artifact cast here");
1456 
1457           DeadInsts.push_back(TmpDef);
1458         }
1459       } else
1460         break;
1461       PrevMI = TmpDef;
1462     }
1463 
1464     if (PrevMI == &DefMI) {
1465       unsigned I = 0;
1466       bool IsDead = true;
1467       for (MachineOperand &Def : DefMI.defs()) {
1468         if (I != DefIdx) {
1469           if (!MRI.use_empty(Def.getReg())) {
1470             IsDead = false;
1471             break;
1472           }
1473         } else {
1474           if (!MRI.hasOneUse(DefMI.getOperand(DefIdx).getReg()))
1475             break;
1476         }
1477 
1478         ++I;
1479       }
1480 
1481       if (IsDead)
1482         DeadInsts.push_back(&DefMI);
1483     }
1484   }
1485 
1486   /// Mark MI as dead. If a def of one of MI's operands, DefMI, would also be
1487   /// dead due to MI being killed, then mark DefMI as dead too.
1488   /// Some of the combines (extends(trunc)), try to walk through redundant
1489   /// copies in between the extends and the truncs, and this attempts to collect
1490   /// the in between copies if they're dead.
1491   void markInstAndDefDead(MachineInstr &MI, MachineInstr &DefMI,
1492                           SmallVectorImpl<MachineInstr *> &DeadInsts,
1493                           unsigned DefIdx = 0) {
1494     DeadInsts.push_back(&MI);
1495     markDefDead(MI, DefMI, DeadInsts, DefIdx);
1496   }
1497 
1498   /// Erase the dead instructions in the list and call the observer hooks.
1499   /// Normally the Legalizer will deal with erasing instructions that have been
1500   /// marked dead. However, for the trunc(ext(x)) cases we can end up trying to
1501   /// process instructions which have been marked dead, but otherwise break the
1502   /// MIR by introducing multiple vreg defs. For those cases, allow the combines
1503   /// to explicitly delete the instructions before we run into trouble.
deleteMarkedDeadInsts(SmallVectorImpl<MachineInstr * > & DeadInsts,GISelObserverWrapper & WrapperObserver)1504   void deleteMarkedDeadInsts(SmallVectorImpl<MachineInstr *> &DeadInsts,
1505                              GISelObserverWrapper &WrapperObserver) {
1506     for (auto *DeadMI : DeadInsts) {
1507       LLVM_DEBUG(dbgs() << *DeadMI << "Is dead, eagerly deleting\n");
1508       WrapperObserver.erasingInstr(*DeadMI);
1509       DeadMI->eraseFromParent();
1510     }
1511     DeadInsts.clear();
1512   }
1513 
1514   /// Checks if the target legalizer info has specified anything about the
1515   /// instruction, or if unsupported.
isInstUnsupported(const LegalityQuery & Query)1516   bool isInstUnsupported(const LegalityQuery &Query) const {
1517     using namespace LegalizeActions;
1518     auto Step = LI.getAction(Query);
1519     return Step.Action == Unsupported || Step.Action == NotFound;
1520   }
1521 
isInstLegal(const LegalityQuery & Query)1522   bool isInstLegal(const LegalityQuery &Query) const {
1523     return LI.getAction(Query).Action == LegalizeActions::Legal;
1524   }
1525 
isConstantUnsupported(LLT Ty)1526   bool isConstantUnsupported(LLT Ty) const {
1527     if (!Ty.isVector())
1528       return isInstUnsupported({TargetOpcode::G_CONSTANT, {Ty}});
1529 
1530     LLT EltTy = Ty.getElementType();
1531     return isInstUnsupported({TargetOpcode::G_CONSTANT, {EltTy}}) ||
1532            isInstUnsupported({TargetOpcode::G_BUILD_VECTOR, {Ty, EltTy}});
1533   }
1534 
1535   /// Looks through copy instructions and returns the actual
1536   /// source register.
lookThroughCopyInstrs(Register Reg)1537   Register lookThroughCopyInstrs(Register Reg) {
1538     Register TmpReg = getSrcRegIgnoringCopies(Reg, MRI);
1539     return TmpReg.isValid() ? TmpReg : Reg;
1540   }
1541 };
1542 
1543 } // namespace llvm
1544 
1545 #endif // LLVM_CODEGEN_GLOBALISEL_LEGALIZATIONARTIFACTCOMBINER_H
1546