xref: /freebsd/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp (revision 7fdf597e96a02165cfe22ff357b857d5fa15ed8a)
1 //===- lib/CodeGen/GlobalISel/GISelKnownBits.cpp --------------*- C++ *-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// Provides analysis for querying information about KnownBits during GISel
10 /// passes.
11 //
12 //===----------------------------------------------------------------------===//
13 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
14 #include "llvm/ADT/StringExtras.h"
15 #include "llvm/Analysis/ValueTracking.h"
16 #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
17 #include "llvm/CodeGen/GlobalISel/Utils.h"
18 #include "llvm/CodeGen/MachineFrameInfo.h"
19 #include "llvm/CodeGen/MachineRegisterInfo.h"
20 #include "llvm/CodeGen/TargetLowering.h"
21 #include "llvm/CodeGen/TargetOpcodes.h"
22 #include "llvm/IR/ConstantRange.h"
23 #include "llvm/IR/Module.h"
24 #include "llvm/Target/TargetMachine.h"
25 
26 #define DEBUG_TYPE "gisel-known-bits"
27 
28 using namespace llvm;
29 
30 char llvm::GISelKnownBitsAnalysis::ID = 0;
31 
32 INITIALIZE_PASS(GISelKnownBitsAnalysis, DEBUG_TYPE,
33                 "Analysis for ComputingKnownBits", false, true)
34 
35 GISelKnownBits::GISelKnownBits(MachineFunction &MF, unsigned MaxDepth)
36     : MF(MF), MRI(MF.getRegInfo()), TL(*MF.getSubtarget().getTargetLowering()),
37       DL(MF.getFunction().getDataLayout()), MaxDepth(MaxDepth) {}
38 
39 Align GISelKnownBits::computeKnownAlignment(Register R, unsigned Depth) {
40   const MachineInstr *MI = MRI.getVRegDef(R);
41   switch (MI->getOpcode()) {
42   case TargetOpcode::COPY:
43     return computeKnownAlignment(MI->getOperand(1).getReg(), Depth);
44   case TargetOpcode::G_ASSERT_ALIGN: {
45     // TODO: Min with source
46     return Align(MI->getOperand(2).getImm());
47   }
48   case TargetOpcode::G_FRAME_INDEX: {
49     int FrameIdx = MI->getOperand(1).getIndex();
50     return MF.getFrameInfo().getObjectAlign(FrameIdx);
51   }
52   case TargetOpcode::G_INTRINSIC:
53   case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
54   case TargetOpcode::G_INTRINSIC_CONVERGENT:
55   case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
56   default:
57     return TL.computeKnownAlignForTargetInstr(*this, R, MRI, Depth + 1);
58   }
59 }
60 
61 KnownBits GISelKnownBits::getKnownBits(MachineInstr &MI) {
62   assert(MI.getNumExplicitDefs() == 1 &&
63          "expected single return generic instruction");
64   return getKnownBits(MI.getOperand(0).getReg());
65 }
66 
67 KnownBits GISelKnownBits::getKnownBits(Register R) {
68   const LLT Ty = MRI.getType(R);
69   // Since the number of lanes in a scalable vector is unknown at compile time,
70   // we track one bit which is implicitly broadcast to all lanes.  This means
71   // that all lanes in a scalable vector are considered demanded.
72   APInt DemandedElts =
73       Ty.isFixedVector() ? APInt::getAllOnes(Ty.getNumElements()) : APInt(1, 1);
74   return getKnownBits(R, DemandedElts);
75 }
76 
77 KnownBits GISelKnownBits::getKnownBits(Register R, const APInt &DemandedElts,
78                                        unsigned Depth) {
79   // For now, we only maintain the cache during one request.
80   assert(ComputeKnownBitsCache.empty() && "Cache should have been cleared");
81 
82   KnownBits Known;
83   computeKnownBitsImpl(R, Known, DemandedElts, Depth);
84   ComputeKnownBitsCache.clear();
85   return Known;
86 }
87 
88 bool GISelKnownBits::signBitIsZero(Register R) {
89   LLT Ty = MRI.getType(R);
90   unsigned BitWidth = Ty.getScalarSizeInBits();
91   return maskedValueIsZero(R, APInt::getSignMask(BitWidth));
92 }
93 
94 APInt GISelKnownBits::getKnownZeroes(Register R) {
95   return getKnownBits(R).Zero;
96 }
97 
98 APInt GISelKnownBits::getKnownOnes(Register R) { return getKnownBits(R).One; }
99 
100 LLVM_ATTRIBUTE_UNUSED static void
101 dumpResult(const MachineInstr &MI, const KnownBits &Known, unsigned Depth) {
102   dbgs() << "[" << Depth << "] Compute known bits: " << MI << "[" << Depth
103          << "] Computed for: " << MI << "[" << Depth << "] Known: 0x"
104          << toString(Known.Zero | Known.One, 16, false) << "\n"
105          << "[" << Depth << "] Zero: 0x" << toString(Known.Zero, 16, false)
106          << "\n"
107          << "[" << Depth << "] One:  0x" << toString(Known.One, 16, false)
108          << "\n";
109 }
110 
111 /// Compute known bits for the intersection of \p Src0 and \p Src1
112 void GISelKnownBits::computeKnownBitsMin(Register Src0, Register Src1,
113                                          KnownBits &Known,
114                                          const APInt &DemandedElts,
115                                          unsigned Depth) {
116   // Test src1 first, since we canonicalize simpler expressions to the RHS.
117   computeKnownBitsImpl(Src1, Known, DemandedElts, Depth);
118 
119   // If we don't know any bits, early out.
120   if (Known.isUnknown())
121     return;
122 
123   KnownBits Known2;
124   computeKnownBitsImpl(Src0, Known2, DemandedElts, Depth);
125 
126   // Only known if known in both the LHS and RHS.
127   Known = Known.intersectWith(Known2);
128 }
129 
130 // Bitfield extract is computed as (Src >> Offset) & Mask, where Mask is
131 // created using Width. Use this function when the inputs are KnownBits
132 // objects. TODO: Move this KnownBits.h if this is usable in more cases.
133 static KnownBits extractBits(unsigned BitWidth, const KnownBits &SrcOpKnown,
134                              const KnownBits &OffsetKnown,
135                              const KnownBits &WidthKnown) {
136   KnownBits Mask(BitWidth);
137   Mask.Zero = APInt::getBitsSetFrom(
138       BitWidth, WidthKnown.getMaxValue().getLimitedValue(BitWidth));
139   Mask.One = APInt::getLowBitsSet(
140       BitWidth, WidthKnown.getMinValue().getLimitedValue(BitWidth));
141   return KnownBits::lshr(SrcOpKnown, OffsetKnown) & Mask;
142 }
143 
144 void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
145                                           const APInt &DemandedElts,
146                                           unsigned Depth) {
147   MachineInstr &MI = *MRI.getVRegDef(R);
148   unsigned Opcode = MI.getOpcode();
149   LLT DstTy = MRI.getType(R);
150 
151   // Handle the case where this is called on a register that does not have a
152   // type constraint (i.e. it has a register class constraint instead). This is
153   // unlikely to occur except by looking through copies but it is possible for
154   // the initial register being queried to be in this state.
155   if (!DstTy.isValid()) {
156     Known = KnownBits();
157     return;
158   }
159 
160   unsigned BitWidth = DstTy.getScalarSizeInBits();
161   auto CacheEntry = ComputeKnownBitsCache.find(R);
162   if (CacheEntry != ComputeKnownBitsCache.end()) {
163     Known = CacheEntry->second;
164     LLVM_DEBUG(dbgs() << "Cache hit at ");
165     LLVM_DEBUG(dumpResult(MI, Known, Depth));
166     assert(Known.getBitWidth() == BitWidth && "Cache entry size doesn't match");
167     return;
168   }
169   Known = KnownBits(BitWidth); // Don't know anything
170 
171   // Depth may get bigger than max depth if it gets passed to a different
172   // GISelKnownBits object.
173   // This may happen when say a generic part uses a GISelKnownBits object
174   // with some max depth, but then we hit TL.computeKnownBitsForTargetInstr
175   // which creates a new GISelKnownBits object with a different and smaller
176   // depth. If we just check for equality, we would never exit if the depth
177   // that is passed down to the target specific GISelKnownBits object is
178   // already bigger than its max depth.
179   if (Depth >= getMaxDepth())
180     return;
181 
182   if (!DemandedElts)
183     return; // No demanded elts, better to assume we don't know anything.
184 
185   KnownBits Known2;
186 
187   switch (Opcode) {
188   default:
189     TL.computeKnownBitsForTargetInstr(*this, R, Known, DemandedElts, MRI,
190                                       Depth);
191     break;
192   case TargetOpcode::G_BUILD_VECTOR: {
193     // Collect the known bits that are shared by every demanded vector element.
194     Known.Zero.setAllBits(); Known.One.setAllBits();
195     for (unsigned i = 0, e = MI.getNumOperands() - 1; i < e; ++i) {
196       if (!DemandedElts[i])
197         continue;
198 
199       computeKnownBitsImpl(MI.getOperand(i + 1).getReg(), Known2, DemandedElts,
200                            Depth + 1);
201 
202       // Known bits are the values that are shared by every demanded element.
203       Known = Known.intersectWith(Known2);
204 
205       // If we don't know any bits, early out.
206       if (Known.isUnknown())
207         break;
208     }
209     break;
210   }
211   case TargetOpcode::COPY:
212   case TargetOpcode::G_PHI:
213   case TargetOpcode::PHI: {
214     Known.One = APInt::getAllOnes(BitWidth);
215     Known.Zero = APInt::getAllOnes(BitWidth);
216     // Destination registers should not have subregisters at this
217     // point of the pipeline, otherwise the main live-range will be
218     // defined more than once, which is against SSA.
219     assert(MI.getOperand(0).getSubReg() == 0 && "Is this code in SSA?");
220     // Record in the cache that we know nothing for MI.
221     // This will get updated later and in the meantime, if we reach that
222     // phi again, because of a loop, we will cut the search thanks to this
223     // cache entry.
224     // We could actually build up more information on the phi by not cutting
225     // the search, but that additional information is more a side effect
226     // than an intended choice.
227     // Therefore, for now, save on compile time until we derive a proper way
228     // to derive known bits for PHIs within loops.
229     ComputeKnownBitsCache[R] = KnownBits(BitWidth);
230     // PHI's operand are a mix of registers and basic blocks interleaved.
231     // We only care about the register ones.
232     for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) {
233       const MachineOperand &Src = MI.getOperand(Idx);
234       Register SrcReg = Src.getReg();
235       // Look through trivial copies and phis but don't look through trivial
236       // copies or phis of the form `%1:(s32) = OP %0:gpr32`, known-bits
237       // analysis is currently unable to determine the bit width of a
238       // register class.
239       //
240       // We can't use NoSubRegister by name as it's defined by each target but
241       // it's always defined to be 0 by tablegen.
242       if (SrcReg.isVirtual() && Src.getSubReg() == 0 /*NoSubRegister*/ &&
243           MRI.getType(SrcReg).isValid()) {
244         // For COPYs we don't do anything, don't increase the depth.
245         computeKnownBitsImpl(SrcReg, Known2, DemandedElts,
246                              Depth + (Opcode != TargetOpcode::COPY));
247         Known = Known.intersectWith(Known2);
248         // If we reach a point where we don't know anything
249         // just stop looking through the operands.
250         if (Known.isUnknown())
251           break;
252       } else {
253         // We know nothing.
254         Known = KnownBits(BitWidth);
255         break;
256       }
257     }
258     break;
259   }
260   case TargetOpcode::G_CONSTANT: {
261     Known = KnownBits::makeConstant(MI.getOperand(1).getCImm()->getValue());
262     break;
263   }
264   case TargetOpcode::G_FRAME_INDEX: {
265     int FrameIdx = MI.getOperand(1).getIndex();
266     TL.computeKnownBitsForFrameIndex(FrameIdx, Known, MF);
267     break;
268   }
269   case TargetOpcode::G_SUB: {
270     computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
271                          Depth + 1);
272     computeKnownBitsImpl(MI.getOperand(2).getReg(), Known2, DemandedElts,
273                          Depth + 1);
274     Known = KnownBits::computeForAddSub(/*Add=*/false, /*NSW=*/false,
275                                         /* NUW=*/false, Known, Known2);
276     break;
277   }
278   case TargetOpcode::G_XOR: {
279     computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts,
280                          Depth + 1);
281     computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
282                          Depth + 1);
283 
284     Known ^= Known2;
285     break;
286   }
287   case TargetOpcode::G_PTR_ADD: {
288     if (DstTy.isVector())
289       break;
290     // G_PTR_ADD is like G_ADD. FIXME: Is this true for all targets?
291     LLT Ty = MRI.getType(MI.getOperand(1).getReg());
292     if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
293       break;
294     [[fallthrough]];
295   }
296   case TargetOpcode::G_ADD: {
297     computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
298                          Depth + 1);
299     computeKnownBitsImpl(MI.getOperand(2).getReg(), Known2, DemandedElts,
300                          Depth + 1);
301     Known = KnownBits::computeForAddSub(/*Add=*/true, /*NSW=*/false,
302                                         /* NUW=*/false, Known, Known2);
303     break;
304   }
305   case TargetOpcode::G_AND: {
306     // If either the LHS or the RHS are Zero, the result is zero.
307     computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts,
308                          Depth + 1);
309     computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
310                          Depth + 1);
311 
312     Known &= Known2;
313     break;
314   }
315   case TargetOpcode::G_OR: {
316     // If either the LHS or the RHS are Zero, the result is zero.
317     computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts,
318                          Depth + 1);
319     computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
320                          Depth + 1);
321 
322     Known |= Known2;
323     break;
324   }
325   case TargetOpcode::G_MUL: {
326     computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts,
327                          Depth + 1);
328     computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
329                          Depth + 1);
330     Known = KnownBits::mul(Known, Known2);
331     break;
332   }
333   case TargetOpcode::G_SELECT: {
334     computeKnownBitsMin(MI.getOperand(2).getReg(), MI.getOperand(3).getReg(),
335                         Known, DemandedElts, Depth + 1);
336     break;
337   }
338   case TargetOpcode::G_SMIN: {
339     // TODO: Handle clamp pattern with number of sign bits
340     KnownBits KnownRHS;
341     computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
342                          Depth + 1);
343     computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS, DemandedElts,
344                          Depth + 1);
345     Known = KnownBits::smin(Known, KnownRHS);
346     break;
347   }
348   case TargetOpcode::G_SMAX: {
349     // TODO: Handle clamp pattern with number of sign bits
350     KnownBits KnownRHS;
351     computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
352                          Depth + 1);
353     computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS, DemandedElts,
354                          Depth + 1);
355     Known = KnownBits::smax(Known, KnownRHS);
356     break;
357   }
358   case TargetOpcode::G_UMIN: {
359     KnownBits KnownRHS;
360     computeKnownBitsImpl(MI.getOperand(1).getReg(), Known,
361                          DemandedElts, Depth + 1);
362     computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS,
363                          DemandedElts, Depth + 1);
364     Known = KnownBits::umin(Known, KnownRHS);
365     break;
366   }
367   case TargetOpcode::G_UMAX: {
368     KnownBits KnownRHS;
369     computeKnownBitsImpl(MI.getOperand(1).getReg(), Known,
370                          DemandedElts, Depth + 1);
371     computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS,
372                          DemandedElts, Depth + 1);
373     Known = KnownBits::umax(Known, KnownRHS);
374     break;
375   }
376   case TargetOpcode::G_FCMP:
377   case TargetOpcode::G_ICMP: {
378     if (DstTy.isVector())
379       break;
380     if (TL.getBooleanContents(DstTy.isVector(),
381                               Opcode == TargetOpcode::G_FCMP) ==
382             TargetLowering::ZeroOrOneBooleanContent &&
383         BitWidth > 1)
384       Known.Zero.setBitsFrom(1);
385     break;
386   }
387   case TargetOpcode::G_SEXT: {
388     computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
389                          Depth + 1);
390     // If the sign bit is known to be zero or one, then sext will extend
391     // it to the top bits, else it will just zext.
392     Known = Known.sext(BitWidth);
393     break;
394   }
395   case TargetOpcode::G_ASSERT_SEXT:
396   case TargetOpcode::G_SEXT_INREG: {
397     computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
398                          Depth + 1);
399     Known = Known.sextInReg(MI.getOperand(2).getImm());
400     break;
401   }
402   case TargetOpcode::G_ANYEXT: {
403     computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
404                          Depth + 1);
405     Known = Known.anyext(BitWidth);
406     break;
407   }
408   case TargetOpcode::G_LOAD: {
409     const MachineMemOperand *MMO = *MI.memoperands_begin();
410     KnownBits KnownRange(MMO->getMemoryType().getScalarSizeInBits());
411     if (const MDNode *Ranges = MMO->getRanges())
412       computeKnownBitsFromRangeMetadata(*Ranges, KnownRange);
413     Known = KnownRange.anyext(Known.getBitWidth());
414     break;
415   }
416   case TargetOpcode::G_SEXTLOAD:
417   case TargetOpcode::G_ZEXTLOAD: {
418     if (DstTy.isVector())
419       break;
420     const MachineMemOperand *MMO = *MI.memoperands_begin();
421     KnownBits KnownRange(MMO->getMemoryType().getScalarSizeInBits());
422     if (const MDNode *Ranges = MMO->getRanges())
423       computeKnownBitsFromRangeMetadata(*Ranges, KnownRange);
424     Known = Opcode == TargetOpcode::G_SEXTLOAD
425                 ? KnownRange.sext(Known.getBitWidth())
426                 : KnownRange.zext(Known.getBitWidth());
427     break;
428   }
429   case TargetOpcode::G_ASHR: {
430     KnownBits LHSKnown, RHSKnown;
431     computeKnownBitsImpl(MI.getOperand(1).getReg(), LHSKnown, DemandedElts,
432                          Depth + 1);
433     computeKnownBitsImpl(MI.getOperand(2).getReg(), RHSKnown, DemandedElts,
434                          Depth + 1);
435     Known = KnownBits::ashr(LHSKnown, RHSKnown);
436     break;
437   }
438   case TargetOpcode::G_LSHR: {
439     KnownBits LHSKnown, RHSKnown;
440     computeKnownBitsImpl(MI.getOperand(1).getReg(), LHSKnown, DemandedElts,
441                          Depth + 1);
442     computeKnownBitsImpl(MI.getOperand(2).getReg(), RHSKnown, DemandedElts,
443                          Depth + 1);
444     Known = KnownBits::lshr(LHSKnown, RHSKnown);
445     break;
446   }
447   case TargetOpcode::G_SHL: {
448     KnownBits LHSKnown, RHSKnown;
449     computeKnownBitsImpl(MI.getOperand(1).getReg(), LHSKnown, DemandedElts,
450                          Depth + 1);
451     computeKnownBitsImpl(MI.getOperand(2).getReg(), RHSKnown, DemandedElts,
452                          Depth + 1);
453     Known = KnownBits::shl(LHSKnown, RHSKnown);
454     break;
455   }
456   case TargetOpcode::G_INTTOPTR:
457   case TargetOpcode::G_PTRTOINT:
458     if (DstTy.isVector())
459       break;
460     // Fall through and handle them the same as zext/trunc.
461     [[fallthrough]];
462   case TargetOpcode::G_ASSERT_ZEXT:
463   case TargetOpcode::G_ZEXT:
464   case TargetOpcode::G_TRUNC: {
465     Register SrcReg = MI.getOperand(1).getReg();
466     LLT SrcTy = MRI.getType(SrcReg);
467     unsigned SrcBitWidth;
468 
469     // G_ASSERT_ZEXT stores the original bitwidth in the immediate operand.
470     if (Opcode == TargetOpcode::G_ASSERT_ZEXT)
471       SrcBitWidth = MI.getOperand(2).getImm();
472     else {
473       SrcBitWidth = SrcTy.isPointer()
474                         ? DL.getIndexSizeInBits(SrcTy.getAddressSpace())
475                         : SrcTy.getSizeInBits();
476     }
477     assert(SrcBitWidth && "SrcBitWidth can't be zero");
478     Known = Known.zextOrTrunc(SrcBitWidth);
479     computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1);
480     Known = Known.zextOrTrunc(BitWidth);
481     if (BitWidth > SrcBitWidth)
482       Known.Zero.setBitsFrom(SrcBitWidth);
483     break;
484   }
485   case TargetOpcode::G_ASSERT_ALIGN: {
486     int64_t LogOfAlign = Log2_64(MI.getOperand(2).getImm());
487 
488     // TODO: Should use maximum with source
489     // If a node is guaranteed to be aligned, set low zero bits accordingly as
490     // well as clearing one bits.
491     Known.Zero.setLowBits(LogOfAlign);
492     Known.One.clearLowBits(LogOfAlign);
493     break;
494   }
495   case TargetOpcode::G_MERGE_VALUES: {
496     unsigned NumOps = MI.getNumOperands();
497     unsigned OpSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
498 
499     for (unsigned I = 0; I != NumOps - 1; ++I) {
500       KnownBits SrcOpKnown;
501       computeKnownBitsImpl(MI.getOperand(I + 1).getReg(), SrcOpKnown,
502                            DemandedElts, Depth + 1);
503       Known.insertBits(SrcOpKnown, I * OpSize);
504     }
505     break;
506   }
507   case TargetOpcode::G_UNMERGE_VALUES: {
508     if (DstTy.isVector())
509       break;
510     unsigned NumOps = MI.getNumOperands();
511     Register SrcReg = MI.getOperand(NumOps - 1).getReg();
512     if (MRI.getType(SrcReg).isVector())
513       return; // TODO: Handle vectors.
514 
515     KnownBits SrcOpKnown;
516     computeKnownBitsImpl(SrcReg, SrcOpKnown, DemandedElts, Depth + 1);
517 
518     // Figure out the result operand index
519     unsigned DstIdx = 0;
520     for (; DstIdx != NumOps - 1 && MI.getOperand(DstIdx).getReg() != R;
521          ++DstIdx)
522       ;
523 
524     Known = SrcOpKnown.extractBits(BitWidth, BitWidth * DstIdx);
525     break;
526   }
527   case TargetOpcode::G_BSWAP: {
528     Register SrcReg = MI.getOperand(1).getReg();
529     computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1);
530     Known = Known.byteSwap();
531     break;
532   }
533   case TargetOpcode::G_BITREVERSE: {
534     Register SrcReg = MI.getOperand(1).getReg();
535     computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1);
536     Known = Known.reverseBits();
537     break;
538   }
539   case TargetOpcode::G_CTPOP: {
540     computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
541                          Depth + 1);
542     // We can bound the space the count needs.  Also, bits known to be zero can't
543     // contribute to the population.
544     unsigned BitsPossiblySet = Known2.countMaxPopulation();
545     unsigned LowBits = llvm::bit_width(BitsPossiblySet);
546     Known.Zero.setBitsFrom(LowBits);
547     // TODO: we could bound Known.One using the lower bound on the number of
548     // bits which might be set provided by popcnt KnownOne2.
549     break;
550   }
551   case TargetOpcode::G_UBFX: {
552     KnownBits SrcOpKnown, OffsetKnown, WidthKnown;
553     computeKnownBitsImpl(MI.getOperand(1).getReg(), SrcOpKnown, DemandedElts,
554                          Depth + 1);
555     computeKnownBitsImpl(MI.getOperand(2).getReg(), OffsetKnown, DemandedElts,
556                          Depth + 1);
557     computeKnownBitsImpl(MI.getOperand(3).getReg(), WidthKnown, DemandedElts,
558                          Depth + 1);
559     Known = extractBits(BitWidth, SrcOpKnown, OffsetKnown, WidthKnown);
560     break;
561   }
562   case TargetOpcode::G_SBFX: {
563     KnownBits SrcOpKnown, OffsetKnown, WidthKnown;
564     computeKnownBitsImpl(MI.getOperand(1).getReg(), SrcOpKnown, DemandedElts,
565                          Depth + 1);
566     computeKnownBitsImpl(MI.getOperand(2).getReg(), OffsetKnown, DemandedElts,
567                          Depth + 1);
568     computeKnownBitsImpl(MI.getOperand(3).getReg(), WidthKnown, DemandedElts,
569                          Depth + 1);
570     Known = extractBits(BitWidth, SrcOpKnown, OffsetKnown, WidthKnown);
571     // Sign extend the extracted value using shift left and arithmetic shift
572     // right.
573     KnownBits ExtKnown = KnownBits::makeConstant(APInt(BitWidth, BitWidth));
574     KnownBits ShiftKnown = KnownBits::computeForAddSub(
575         /*Add=*/false, /*NSW=*/false, /* NUW=*/false, ExtKnown, WidthKnown);
576     Known = KnownBits::ashr(KnownBits::shl(Known, ShiftKnown), ShiftKnown);
577     break;
578   }
579   case TargetOpcode::G_UADDO:
580   case TargetOpcode::G_UADDE:
581   case TargetOpcode::G_SADDO:
582   case TargetOpcode::G_SADDE:
583   case TargetOpcode::G_USUBO:
584   case TargetOpcode::G_USUBE:
585   case TargetOpcode::G_SSUBO:
586   case TargetOpcode::G_SSUBE:
587   case TargetOpcode::G_UMULO:
588   case TargetOpcode::G_SMULO: {
589     if (MI.getOperand(1).getReg() == R) {
590       // If we know the result of a compare has the top bits zero, use this
591       // info.
592       if (TL.getBooleanContents(DstTy.isVector(), false) ==
593               TargetLowering::ZeroOrOneBooleanContent &&
594           BitWidth > 1)
595         Known.Zero.setBitsFrom(1);
596     }
597     break;
598   }
599   case TargetOpcode::G_CTLZ:
600   case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
601     KnownBits SrcOpKnown;
602     computeKnownBitsImpl(MI.getOperand(1).getReg(), SrcOpKnown, DemandedElts,
603                          Depth + 1);
604     // If we have a known 1, its position is our upper bound.
605     unsigned PossibleLZ = SrcOpKnown.countMaxLeadingZeros();
606     unsigned LowBits = llvm::bit_width(PossibleLZ);
607     Known.Zero.setBitsFrom(LowBits);
608     break;
609   }
610   }
611 
612   LLVM_DEBUG(dumpResult(MI, Known, Depth));
613 
614   // Update the cache.
615   ComputeKnownBitsCache[R] = Known;
616 }
617 
618 /// Compute number of sign bits for the intersection of \p Src0 and \p Src1
619 unsigned GISelKnownBits::computeNumSignBitsMin(Register Src0, Register Src1,
620                                                const APInt &DemandedElts,
621                                                unsigned Depth) {
622   // Test src1 first, since we canonicalize simpler expressions to the RHS.
623   unsigned Src1SignBits = computeNumSignBits(Src1, DemandedElts, Depth);
624   if (Src1SignBits == 1)
625     return 1;
626   return std::min(computeNumSignBits(Src0, DemandedElts, Depth), Src1SignBits);
627 }
628 
629 /// Compute the known number of sign bits with attached range metadata in the
630 /// memory operand. If this is an extending load, accounts for the behavior of
631 /// the high bits.
632 static unsigned computeNumSignBitsFromRangeMetadata(const GAnyLoad *Ld,
633                                                     unsigned TyBits) {
634   const MDNode *Ranges = Ld->getRanges();
635   if (!Ranges)
636     return 1;
637 
638   ConstantRange CR = getConstantRangeFromMetadata(*Ranges);
639   if (TyBits > CR.getBitWidth()) {
640     switch (Ld->getOpcode()) {
641     case TargetOpcode::G_SEXTLOAD:
642       CR = CR.signExtend(TyBits);
643       break;
644     case TargetOpcode::G_ZEXTLOAD:
645       CR = CR.zeroExtend(TyBits);
646       break;
647     default:
648       break;
649     }
650   }
651 
652   return std::min(CR.getSignedMin().getNumSignBits(),
653                   CR.getSignedMax().getNumSignBits());
654 }
655 
656 unsigned GISelKnownBits::computeNumSignBits(Register R,
657                                             const APInt &DemandedElts,
658                                             unsigned Depth) {
659   MachineInstr &MI = *MRI.getVRegDef(R);
660   unsigned Opcode = MI.getOpcode();
661 
662   if (Opcode == TargetOpcode::G_CONSTANT)
663     return MI.getOperand(1).getCImm()->getValue().getNumSignBits();
664 
665   if (Depth == getMaxDepth())
666     return 1;
667 
668   if (!DemandedElts)
669     return 1; // No demanded elts, better to assume we don't know anything.
670 
671   LLT DstTy = MRI.getType(R);
672   const unsigned TyBits = DstTy.getScalarSizeInBits();
673 
674   // Handle the case where this is called on a register that does not have a
675   // type constraint. This is unlikely to occur except by looking through copies
676   // but it is possible for the initial register being queried to be in this
677   // state.
678   if (!DstTy.isValid())
679     return 1;
680 
681   unsigned FirstAnswer = 1;
682   switch (Opcode) {
683   case TargetOpcode::COPY: {
684     MachineOperand &Src = MI.getOperand(1);
685     if (Src.getReg().isVirtual() && Src.getSubReg() == 0 &&
686         MRI.getType(Src.getReg()).isValid()) {
687       // Don't increment Depth for this one since we didn't do any work.
688       return computeNumSignBits(Src.getReg(), DemandedElts, Depth);
689     }
690 
691     return 1;
692   }
693   case TargetOpcode::G_SEXT: {
694     Register Src = MI.getOperand(1).getReg();
695     LLT SrcTy = MRI.getType(Src);
696     unsigned Tmp = DstTy.getScalarSizeInBits() - SrcTy.getScalarSizeInBits();
697     return computeNumSignBits(Src, DemandedElts, Depth + 1) + Tmp;
698   }
699   case TargetOpcode::G_ASSERT_SEXT:
700   case TargetOpcode::G_SEXT_INREG: {
701     // Max of the input and what this extends.
702     Register Src = MI.getOperand(1).getReg();
703     unsigned SrcBits = MI.getOperand(2).getImm();
704     unsigned InRegBits = TyBits - SrcBits + 1;
705     return std::max(computeNumSignBits(Src, DemandedElts, Depth + 1), InRegBits);
706   }
707   case TargetOpcode::G_LOAD: {
708     GLoad *Ld = cast<GLoad>(&MI);
709     if (DemandedElts != 1 || !getDataLayout().isLittleEndian())
710       break;
711 
712     return computeNumSignBitsFromRangeMetadata(Ld, TyBits);
713   }
714   case TargetOpcode::G_SEXTLOAD: {
715     GSExtLoad *Ld = cast<GSExtLoad>(&MI);
716 
717     // FIXME: We need an in-memory type representation.
718     if (DstTy.isVector())
719       return 1;
720 
721     unsigned NumBits = computeNumSignBitsFromRangeMetadata(Ld, TyBits);
722     if (NumBits != 1)
723       return NumBits;
724 
725     // e.g. i16->i32 = '17' bits known.
726     const MachineMemOperand *MMO = *MI.memoperands_begin();
727     return TyBits - MMO->getSizeInBits().getValue() + 1;
728   }
729   case TargetOpcode::G_ZEXTLOAD: {
730     GZExtLoad *Ld = cast<GZExtLoad>(&MI);
731 
732     // FIXME: We need an in-memory type representation.
733     if (DstTy.isVector())
734       return 1;
735 
736     unsigned NumBits = computeNumSignBitsFromRangeMetadata(Ld, TyBits);
737     if (NumBits != 1)
738       return NumBits;
739 
740     // e.g. i16->i32 = '16' bits known.
741     const MachineMemOperand *MMO = *MI.memoperands_begin();
742     return TyBits - MMO->getSizeInBits().getValue();
743   }
744   case TargetOpcode::G_AND:
745   case TargetOpcode::G_OR:
746   case TargetOpcode::G_XOR: {
747     Register Src1 = MI.getOperand(1).getReg();
748     unsigned Src1NumSignBits =
749         computeNumSignBits(Src1, DemandedElts, Depth + 1);
750     if (Src1NumSignBits != 1) {
751       Register Src2 = MI.getOperand(2).getReg();
752       unsigned Src2NumSignBits =
753           computeNumSignBits(Src2, DemandedElts, Depth + 1);
754       FirstAnswer = std::min(Src1NumSignBits, Src2NumSignBits);
755     }
756     break;
757   }
758   case TargetOpcode::G_TRUNC: {
759     Register Src = MI.getOperand(1).getReg();
760     LLT SrcTy = MRI.getType(Src);
761 
762     // Check if the sign bits of source go down as far as the truncated value.
763     unsigned DstTyBits = DstTy.getScalarSizeInBits();
764     unsigned NumSrcBits = SrcTy.getScalarSizeInBits();
765     unsigned NumSrcSignBits = computeNumSignBits(Src, DemandedElts, Depth + 1);
766     if (NumSrcSignBits > (NumSrcBits - DstTyBits))
767       return NumSrcSignBits - (NumSrcBits - DstTyBits);
768     break;
769   }
770   case TargetOpcode::G_SELECT: {
771     return computeNumSignBitsMin(MI.getOperand(2).getReg(),
772                                  MI.getOperand(3).getReg(), DemandedElts,
773                                  Depth + 1);
774   }
775   case TargetOpcode::G_SADDO:
776   case TargetOpcode::G_SADDE:
777   case TargetOpcode::G_UADDO:
778   case TargetOpcode::G_UADDE:
779   case TargetOpcode::G_SSUBO:
780   case TargetOpcode::G_SSUBE:
781   case TargetOpcode::G_USUBO:
782   case TargetOpcode::G_USUBE:
783   case TargetOpcode::G_SMULO:
784   case TargetOpcode::G_UMULO: {
785     // If compares returns 0/-1, all bits are sign bits.
786     // We know that we have an integer-based boolean since these operations
787     // are only available for integer.
788     if (MI.getOperand(1).getReg() == R) {
789       if (TL.getBooleanContents(DstTy.isVector(), false) ==
790           TargetLowering::ZeroOrNegativeOneBooleanContent)
791         return TyBits;
792     }
793 
794     break;
795   }
796   case TargetOpcode::G_FCMP:
797   case TargetOpcode::G_ICMP: {
798     bool IsFP = Opcode == TargetOpcode::G_FCMP;
799     if (TyBits == 1)
800       break;
801     auto BC = TL.getBooleanContents(DstTy.isVector(), IsFP);
802     if (BC == TargetLoweringBase::ZeroOrNegativeOneBooleanContent)
803       return TyBits; // All bits are sign bits.
804     if (BC == TargetLowering::ZeroOrOneBooleanContent)
805       return TyBits - 1; // Every always-zero bit is a sign bit.
806     break;
807   }
808   case TargetOpcode::G_INTRINSIC:
809   case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
810   case TargetOpcode::G_INTRINSIC_CONVERGENT:
811   case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
812   default: {
813     unsigned NumBits =
814       TL.computeNumSignBitsForTargetInstr(*this, R, DemandedElts, MRI, Depth);
815     if (NumBits > 1)
816       FirstAnswer = std::max(FirstAnswer, NumBits);
817     break;
818   }
819   }
820 
821   // Finally, if we can prove that the top bits of the result are 0's or 1's,
822   // use this information.
823   KnownBits Known = getKnownBits(R, DemandedElts, Depth);
824   APInt Mask;
825   if (Known.isNonNegative()) {        // sign bit is 0
826     Mask = Known.Zero;
827   } else if (Known.isNegative()) {  // sign bit is 1;
828     Mask = Known.One;
829   } else {
830     // Nothing known.
831     return FirstAnswer;
832   }
833 
834   // Okay, we know that the sign bit in Mask is set.  Use CLO to determine
835   // the number of identical bits in the top of the input value.
836   Mask <<= Mask.getBitWidth() - TyBits;
837   return std::max(FirstAnswer, Mask.countl_one());
838 }
839 
840 unsigned GISelKnownBits::computeNumSignBits(Register R, unsigned Depth) {
841   LLT Ty = MRI.getType(R);
842   APInt DemandedElts =
843       Ty.isVector() ? APInt::getAllOnes(Ty.getNumElements()) : APInt(1, 1);
844   return computeNumSignBits(R, DemandedElts, Depth);
845 }
846 
847 void GISelKnownBitsAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
848   AU.setPreservesAll();
849   MachineFunctionPass::getAnalysisUsage(AU);
850 }
851 
852 bool GISelKnownBitsAnalysis::runOnMachineFunction(MachineFunction &MF) {
853   return false;
854 }
855 
856 GISelKnownBits &GISelKnownBitsAnalysis::get(MachineFunction &MF) {
857   if (!Info) {
858     unsigned MaxDepth =
859         MF.getTarget().getOptLevel() == CodeGenOptLevel::None ? 2 : 6;
860     Info = std::make_unique<GISelKnownBits>(MF, MaxDepth);
861   }
862   return *Info;
863 }
864