xref: /freebsd/contrib/llvm-project/llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp (revision 95b4436e989df29f6368f13832cb13d7cbc52eac)
1 //===- lib/CodeGen/GlobalISel/GISelValueTracking.cpp --------------*- C++
2 //*-===//
3 //
4 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5 // See https://llvm.org/LICENSE.txt for license information.
6 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// Provides analysis for querying information about KnownBits during GISel
11 /// passes.
12 //
13 //===----------------------------------------------------------------------===//
14 #include "llvm/CodeGen/GlobalISel/GISelValueTracking.h"
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/FloatingPointMode.h"
17 #include "llvm/ADT/ScopeExit.h"
18 #include "llvm/ADT/StringExtras.h"
19 #include "llvm/Analysis/ValueTracking.h"
20 #include "llvm/Analysis/VectorUtils.h"
21 #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
22 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
23 #include "llvm/CodeGen/GlobalISel/MachineFloatingPointPredicateUtils.h"
24 #include "llvm/CodeGen/GlobalISel/Utils.h"
25 #include "llvm/CodeGen/LowLevelTypeUtils.h"
26 #include "llvm/CodeGen/MachineFrameInfo.h"
27 #include "llvm/CodeGen/MachineInstr.h"
28 #include "llvm/CodeGen/MachineOperand.h"
29 #include "llvm/CodeGen/MachineRegisterInfo.h"
30 #include "llvm/CodeGen/Register.h"
31 #include "llvm/CodeGen/TargetLowering.h"
32 #include "llvm/CodeGen/TargetOpcodes.h"
33 #include "llvm/IR/ConstantRange.h"
34 #include "llvm/IR/DerivedTypes.h"
35 #include "llvm/IR/FMF.h"
36 #include "llvm/MC/TargetRegistry.h"
37 #include "llvm/Support/KnownBits.h"
38 #include "llvm/Support/KnownFPClass.h"
39 #include "llvm/Target/TargetMachine.h"
40 
41 #define DEBUG_TYPE "gisel-known-bits"
42 
43 using namespace llvm;
44 using namespace MIPatternMatch;
45 
46 char llvm::GISelValueTrackingAnalysisLegacy::ID = 0;
47 
48 INITIALIZE_PASS(GISelValueTrackingAnalysisLegacy, DEBUG_TYPE,
49                 "Analysis for ComputingKnownBits", false, true)
50 
51 GISelValueTracking::GISelValueTracking(MachineFunction &MF, unsigned MaxDepth)
52     : MF(MF), MRI(MF.getRegInfo()), TL(*MF.getSubtarget().getTargetLowering()),
53       DL(MF.getFunction().getDataLayout()), MaxDepth(MaxDepth) {}
54 
55 Align GISelValueTracking::computeKnownAlignment(Register R, unsigned Depth) {
56   const MachineInstr *MI = MRI.getVRegDef(R);
57   switch (MI->getOpcode()) {
58   case TargetOpcode::COPY:
59     return computeKnownAlignment(MI->getOperand(1).getReg(), Depth);
60   case TargetOpcode::G_ASSERT_ALIGN: {
61     // TODO: Min with source
62     return Align(MI->getOperand(2).getImm());
63   }
64   case TargetOpcode::G_FRAME_INDEX: {
65     int FrameIdx = MI->getOperand(1).getIndex();
66     return MF.getFrameInfo().getObjectAlign(FrameIdx);
67   }
68   case TargetOpcode::G_INTRINSIC:
69   case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
70   case TargetOpcode::G_INTRINSIC_CONVERGENT:
71   case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
72   default:
73     return TL.computeKnownAlignForTargetInstr(*this, R, MRI, Depth + 1);
74   }
75 }
76 
77 KnownBits GISelValueTracking::getKnownBits(MachineInstr &MI) {
78   assert(MI.getNumExplicitDefs() == 1 &&
79          "expected single return generic instruction");
80   return getKnownBits(MI.getOperand(0).getReg());
81 }
82 
83 KnownBits GISelValueTracking::getKnownBits(Register R) {
84   const LLT Ty = MRI.getType(R);
85   // Since the number of lanes in a scalable vector is unknown at compile time,
86   // we track one bit which is implicitly broadcast to all lanes.  This means
87   // that all lanes in a scalable vector are considered demanded.
88   APInt DemandedElts =
89       Ty.isFixedVector() ? APInt::getAllOnes(Ty.getNumElements()) : APInt(1, 1);
90   return getKnownBits(R, DemandedElts);
91 }
92 
93 KnownBits GISelValueTracking::getKnownBits(Register R,
94                                            const APInt &DemandedElts,
95                                            unsigned Depth) {
96   // For now, we only maintain the cache during one request.
97   assert(ComputeKnownBitsCache.empty() && "Cache should have been cleared");
98 
99   KnownBits Known;
100   computeKnownBitsImpl(R, Known, DemandedElts, Depth);
101   ComputeKnownBitsCache.clear();
102   return Known;
103 }
104 
105 bool GISelValueTracking::signBitIsZero(Register R) {
106   LLT Ty = MRI.getType(R);
107   unsigned BitWidth = Ty.getScalarSizeInBits();
108   return maskedValueIsZero(R, APInt::getSignMask(BitWidth));
109 }
110 
111 APInt GISelValueTracking::getKnownZeroes(Register R) {
112   return getKnownBits(R).Zero;
113 }
114 
115 APInt GISelValueTracking::getKnownOnes(Register R) {
116   return getKnownBits(R).One;
117 }
118 
119 LLVM_ATTRIBUTE_UNUSED static void
120 dumpResult(const MachineInstr &MI, const KnownBits &Known, unsigned Depth) {
121   dbgs() << "[" << Depth << "] Compute known bits: " << MI << "[" << Depth
122          << "] Computed for: " << MI << "[" << Depth << "] Known: 0x"
123          << toString(Known.Zero | Known.One, 16, false) << "\n"
124          << "[" << Depth << "] Zero: 0x" << toString(Known.Zero, 16, false)
125          << "\n"
126          << "[" << Depth << "] One:  0x" << toString(Known.One, 16, false)
127          << "\n";
128 }
129 
130 /// Compute known bits for the intersection of \p Src0 and \p Src1
131 void GISelValueTracking::computeKnownBitsMin(Register Src0, Register Src1,
132                                              KnownBits &Known,
133                                              const APInt &DemandedElts,
134                                              unsigned Depth) {
135   // Test src1 first, since we canonicalize simpler expressions to the RHS.
136   computeKnownBitsImpl(Src1, Known, DemandedElts, Depth);
137 
138   // If we don't know any bits, early out.
139   if (Known.isUnknown())
140     return;
141 
142   KnownBits Known2;
143   computeKnownBitsImpl(Src0, Known2, DemandedElts, Depth);
144 
145   // Only known if known in both the LHS and RHS.
146   Known = Known.intersectWith(Known2);
147 }
148 
149 // Bitfield extract is computed as (Src >> Offset) & Mask, where Mask is
150 // created using Width. Use this function when the inputs are KnownBits
151 // objects. TODO: Move this KnownBits.h if this is usable in more cases.
152 static KnownBits extractBits(unsigned BitWidth, const KnownBits &SrcOpKnown,
153                              const KnownBits &OffsetKnown,
154                              const KnownBits &WidthKnown) {
155   KnownBits Mask(BitWidth);
156   Mask.Zero = APInt::getBitsSetFrom(
157       BitWidth, WidthKnown.getMaxValue().getLimitedValue(BitWidth));
158   Mask.One = APInt::getLowBitsSet(
159       BitWidth, WidthKnown.getMinValue().getLimitedValue(BitWidth));
160   return KnownBits::lshr(SrcOpKnown, OffsetKnown) & Mask;
161 }
162 
163 void GISelValueTracking::computeKnownBitsImpl(Register R, KnownBits &Known,
164                                               const APInt &DemandedElts,
165                                               unsigned Depth) {
166   MachineInstr &MI = *MRI.getVRegDef(R);
167   unsigned Opcode = MI.getOpcode();
168   LLT DstTy = MRI.getType(R);
169 
170   // Handle the case where this is called on a register that does not have a
171   // type constraint. For example, it may be post-ISel or this target might not
172   // preserve the type when early-selecting instructions.
173   if (!DstTy.isValid()) {
174     Known = KnownBits();
175     return;
176   }
177 
178 #ifndef NDEBUG
179   if (DstTy.isFixedVector()) {
180     assert(
181         DstTy.getNumElements() == DemandedElts.getBitWidth() &&
182         "DemandedElt width should equal the fixed vector number of elements");
183   } else {
184     assert(DemandedElts.getBitWidth() == 1 && DemandedElts == APInt(1, 1) &&
185            "DemandedElt width should be 1 for scalars or scalable vectors");
186   }
187 #endif
188 
189   unsigned BitWidth = DstTy.getScalarSizeInBits();
190   auto CacheEntry = ComputeKnownBitsCache.find(R);
191   if (CacheEntry != ComputeKnownBitsCache.end()) {
192     Known = CacheEntry->second;
193     LLVM_DEBUG(dbgs() << "Cache hit at ");
194     LLVM_DEBUG(dumpResult(MI, Known, Depth));
195     assert(Known.getBitWidth() == BitWidth && "Cache entry size doesn't match");
196     return;
197   }
198   Known = KnownBits(BitWidth); // Don't know anything
199 
200   // Depth may get bigger than max depth if it gets passed to a different
201   // GISelValueTracking object.
202   // This may happen when say a generic part uses a GISelValueTracking object
203   // with some max depth, but then we hit TL.computeKnownBitsForTargetInstr
204   // which creates a new GISelValueTracking object with a different and smaller
205   // depth. If we just check for equality, we would never exit if the depth
206   // that is passed down to the target specific GISelValueTracking object is
207   // already bigger than its max depth.
208   if (Depth >= getMaxDepth())
209     return;
210 
211   if (!DemandedElts)
212     return; // No demanded elts, better to assume we don't know anything.
213 
214   KnownBits Known2;
215 
216   switch (Opcode) {
217   default:
218     TL.computeKnownBitsForTargetInstr(*this, R, Known, DemandedElts, MRI,
219                                       Depth);
220     break;
221   case TargetOpcode::G_BUILD_VECTOR: {
222     // Collect the known bits that are shared by every demanded vector element.
223     Known.Zero.setAllBits();
224     Known.One.setAllBits();
225     for (const auto &[I, MO] : enumerate(drop_begin(MI.operands()))) {
226       if (!DemandedElts[I])
227         continue;
228 
229       computeKnownBitsImpl(MO.getReg(), Known2, APInt(1, 1), Depth + 1);
230 
231       // Known bits are the values that are shared by every demanded element.
232       Known = Known.intersectWith(Known2);
233 
234       // If we don't know any bits, early out.
235       if (Known.isUnknown())
236         break;
237     }
238     break;
239   }
240   case TargetOpcode::G_SPLAT_VECTOR: {
241     computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, APInt(1, 1),
242                          Depth + 1);
243     // Implicitly truncate the bits to match the official semantics of
244     // G_SPLAT_VECTOR.
245     Known = Known.trunc(BitWidth);
246     break;
247   }
248   case TargetOpcode::COPY:
249   case TargetOpcode::G_PHI:
250   case TargetOpcode::PHI: {
251     Known.One = APInt::getAllOnes(BitWidth);
252     Known.Zero = APInt::getAllOnes(BitWidth);
253     // Destination registers should not have subregisters at this
254     // point of the pipeline, otherwise the main live-range will be
255     // defined more than once, which is against SSA.
256     assert(MI.getOperand(0).getSubReg() == 0 && "Is this code in SSA?");
257     // Record in the cache that we know nothing for MI.
258     // This will get updated later and in the meantime, if we reach that
259     // phi again, because of a loop, we will cut the search thanks to this
260     // cache entry.
261     // We could actually build up more information on the phi by not cutting
262     // the search, but that additional information is more a side effect
263     // than an intended choice.
264     // Therefore, for now, save on compile time until we derive a proper way
265     // to derive known bits for PHIs within loops.
266     ComputeKnownBitsCache[R] = KnownBits(BitWidth);
267     // PHI's operand are a mix of registers and basic blocks interleaved.
268     // We only care about the register ones.
269     for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) {
270       const MachineOperand &Src = MI.getOperand(Idx);
271       Register SrcReg = Src.getReg();
272       // Look through trivial copies and phis but don't look through trivial
273       // copies or phis of the form `%1:(s32) = OP %0:gpr32`, known-bits
274       // analysis is currently unable to determine the bit width of a
275       // register class.
276       //
277       // We can't use NoSubRegister by name as it's defined by each target but
278       // it's always defined to be 0 by tablegen.
279       if (SrcReg.isVirtual() && Src.getSubReg() == 0 /*NoSubRegister*/ &&
280           MRI.getType(SrcReg).isValid()) {
281         // For COPYs we don't do anything, don't increase the depth.
282         computeKnownBitsImpl(SrcReg, Known2, DemandedElts,
283                              Depth + (Opcode != TargetOpcode::COPY));
284         Known2 = Known2.anyextOrTrunc(BitWidth);
285         Known = Known.intersectWith(Known2);
286         // If we reach a point where we don't know anything
287         // just stop looking through the operands.
288         if (Known.isUnknown())
289           break;
290       } else {
291         // We know nothing.
292         Known = KnownBits(BitWidth);
293         break;
294       }
295     }
296     break;
297   }
298   case TargetOpcode::G_CONSTANT: {
299     Known = KnownBits::makeConstant(MI.getOperand(1).getCImm()->getValue());
300     break;
301   }
302   case TargetOpcode::G_FRAME_INDEX: {
303     int FrameIdx = MI.getOperand(1).getIndex();
304     TL.computeKnownBitsForFrameIndex(FrameIdx, Known, MF);
305     break;
306   }
307   case TargetOpcode::G_SUB: {
308     computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
309                          Depth + 1);
310     computeKnownBitsImpl(MI.getOperand(2).getReg(), Known2, DemandedElts,
311                          Depth + 1);
312     Known = KnownBits::sub(Known, Known2);
313     break;
314   }
315   case TargetOpcode::G_XOR: {
316     computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts,
317                          Depth + 1);
318     computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
319                          Depth + 1);
320 
321     Known ^= Known2;
322     break;
323   }
324   case TargetOpcode::G_PTR_ADD: {
325     if (DstTy.isVector())
326       break;
327     // G_PTR_ADD is like G_ADD. FIXME: Is this true for all targets?
328     LLT Ty = MRI.getType(MI.getOperand(1).getReg());
329     if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
330       break;
331     [[fallthrough]];
332   }
333   case TargetOpcode::G_ADD: {
334     computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
335                          Depth + 1);
336     computeKnownBitsImpl(MI.getOperand(2).getReg(), Known2, DemandedElts,
337                          Depth + 1);
338     Known = KnownBits::add(Known, Known2);
339     break;
340   }
341   case TargetOpcode::G_AND: {
342     // If either the LHS or the RHS are Zero, the result is zero.
343     computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts,
344                          Depth + 1);
345     computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
346                          Depth + 1);
347 
348     Known &= Known2;
349     break;
350   }
351   case TargetOpcode::G_OR: {
352     // If either the LHS or the RHS are Zero, the result is zero.
353     computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts,
354                          Depth + 1);
355     computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
356                          Depth + 1);
357 
358     Known |= Known2;
359     break;
360   }
361   case TargetOpcode::G_MUL: {
362     computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts,
363                          Depth + 1);
364     computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
365                          Depth + 1);
366     Known = KnownBits::mul(Known, Known2);
367     break;
368   }
369   case TargetOpcode::G_SELECT: {
370     computeKnownBitsMin(MI.getOperand(2).getReg(), MI.getOperand(3).getReg(),
371                         Known, DemandedElts, Depth + 1);
372     break;
373   }
374   case TargetOpcode::G_SMIN: {
375     // TODO: Handle clamp pattern with number of sign bits
376     KnownBits KnownRHS;
377     computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
378                          Depth + 1);
379     computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS, DemandedElts,
380                          Depth + 1);
381     Known = KnownBits::smin(Known, KnownRHS);
382     break;
383   }
384   case TargetOpcode::G_SMAX: {
385     // TODO: Handle clamp pattern with number of sign bits
386     KnownBits KnownRHS;
387     computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
388                          Depth + 1);
389     computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS, DemandedElts,
390                          Depth + 1);
391     Known = KnownBits::smax(Known, KnownRHS);
392     break;
393   }
394   case TargetOpcode::G_UMIN: {
395     KnownBits KnownRHS;
396     computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
397                          Depth + 1);
398     computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS, DemandedElts,
399                          Depth + 1);
400     Known = KnownBits::umin(Known, KnownRHS);
401     break;
402   }
403   case TargetOpcode::G_UMAX: {
404     KnownBits KnownRHS;
405     computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
406                          Depth + 1);
407     computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS, DemandedElts,
408                          Depth + 1);
409     Known = KnownBits::umax(Known, KnownRHS);
410     break;
411   }
412   case TargetOpcode::G_FCMP:
413   case TargetOpcode::G_ICMP: {
414     if (DstTy.isVector())
415       break;
416     if (TL.getBooleanContents(DstTy.isVector(),
417                               Opcode == TargetOpcode::G_FCMP) ==
418             TargetLowering::ZeroOrOneBooleanContent &&
419         BitWidth > 1)
420       Known.Zero.setBitsFrom(1);
421     break;
422   }
423   case TargetOpcode::G_SEXT: {
424     computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
425                          Depth + 1);
426     // If the sign bit is known to be zero or one, then sext will extend
427     // it to the top bits, else it will just zext.
428     Known = Known.sext(BitWidth);
429     break;
430   }
431   case TargetOpcode::G_ASSERT_SEXT:
432   case TargetOpcode::G_SEXT_INREG: {
433     computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
434                          Depth + 1);
435     Known = Known.sextInReg(MI.getOperand(2).getImm());
436     break;
437   }
438   case TargetOpcode::G_ANYEXT: {
439     computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
440                          Depth + 1);
441     Known = Known.anyext(BitWidth);
442     break;
443   }
444   case TargetOpcode::G_LOAD: {
445     const MachineMemOperand *MMO = *MI.memoperands_begin();
446     KnownBits KnownRange(MMO->getMemoryType().getScalarSizeInBits());
447     if (const MDNode *Ranges = MMO->getRanges())
448       computeKnownBitsFromRangeMetadata(*Ranges, KnownRange);
449     Known = KnownRange.anyext(Known.getBitWidth());
450     break;
451   }
452   case TargetOpcode::G_SEXTLOAD:
453   case TargetOpcode::G_ZEXTLOAD: {
454     if (DstTy.isVector())
455       break;
456     const MachineMemOperand *MMO = *MI.memoperands_begin();
457     KnownBits KnownRange(MMO->getMemoryType().getScalarSizeInBits());
458     if (const MDNode *Ranges = MMO->getRanges())
459       computeKnownBitsFromRangeMetadata(*Ranges, KnownRange);
460     Known = Opcode == TargetOpcode::G_SEXTLOAD
461                 ? KnownRange.sext(Known.getBitWidth())
462                 : KnownRange.zext(Known.getBitWidth());
463     break;
464   }
465   case TargetOpcode::G_ASHR: {
466     KnownBits LHSKnown, RHSKnown;
467     computeKnownBitsImpl(MI.getOperand(1).getReg(), LHSKnown, DemandedElts,
468                          Depth + 1);
469     computeKnownBitsImpl(MI.getOperand(2).getReg(), RHSKnown, DemandedElts,
470                          Depth + 1);
471     Known = KnownBits::ashr(LHSKnown, RHSKnown);
472     break;
473   }
474   case TargetOpcode::G_LSHR: {
475     KnownBits LHSKnown, RHSKnown;
476     computeKnownBitsImpl(MI.getOperand(1).getReg(), LHSKnown, DemandedElts,
477                          Depth + 1);
478     computeKnownBitsImpl(MI.getOperand(2).getReg(), RHSKnown, DemandedElts,
479                          Depth + 1);
480     Known = KnownBits::lshr(LHSKnown, RHSKnown);
481     break;
482   }
483   case TargetOpcode::G_SHL: {
484     KnownBits LHSKnown, RHSKnown;
485     computeKnownBitsImpl(MI.getOperand(1).getReg(), LHSKnown, DemandedElts,
486                          Depth + 1);
487     computeKnownBitsImpl(MI.getOperand(2).getReg(), RHSKnown, DemandedElts,
488                          Depth + 1);
489     Known = KnownBits::shl(LHSKnown, RHSKnown);
490     break;
491   }
492   case TargetOpcode::G_INTTOPTR:
493   case TargetOpcode::G_PTRTOINT:
494     if (DstTy.isVector())
495       break;
496     // Fall through and handle them the same as zext/trunc.
497     [[fallthrough]];
498   case TargetOpcode::G_ZEXT:
499   case TargetOpcode::G_TRUNC: {
500     Register SrcReg = MI.getOperand(1).getReg();
501     computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1);
502     Known = Known.zextOrTrunc(BitWidth);
503     break;
504   }
505   case TargetOpcode::G_ASSERT_ZEXT: {
506     Register SrcReg = MI.getOperand(1).getReg();
507     computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1);
508 
509     unsigned SrcBitWidth = MI.getOperand(2).getImm();
510     assert(SrcBitWidth && "SrcBitWidth can't be zero");
511     APInt InMask = APInt::getLowBitsSet(BitWidth, SrcBitWidth);
512     Known.Zero |= (~InMask);
513     Known.One &= (~Known.Zero);
514     break;
515   }
516   case TargetOpcode::G_ASSERT_ALIGN: {
517     int64_t LogOfAlign = Log2_64(MI.getOperand(2).getImm());
518 
519     // TODO: Should use maximum with source
520     // If a node is guaranteed to be aligned, set low zero bits accordingly as
521     // well as clearing one bits.
522     Known.Zero.setLowBits(LogOfAlign);
523     Known.One.clearLowBits(LogOfAlign);
524     break;
525   }
526   case TargetOpcode::G_MERGE_VALUES: {
527     unsigned NumOps = MI.getNumOperands();
528     unsigned OpSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
529 
530     for (unsigned I = 0; I != NumOps - 1; ++I) {
531       KnownBits SrcOpKnown;
532       computeKnownBitsImpl(MI.getOperand(I + 1).getReg(), SrcOpKnown,
533                            DemandedElts, Depth + 1);
534       Known.insertBits(SrcOpKnown, I * OpSize);
535     }
536     break;
537   }
538   case TargetOpcode::G_UNMERGE_VALUES: {
539     unsigned NumOps = MI.getNumOperands();
540     Register SrcReg = MI.getOperand(NumOps - 1).getReg();
541     LLT SrcTy = MRI.getType(SrcReg);
542 
543     if (SrcTy.isVector() && SrcTy.getScalarType() != DstTy.getScalarType())
544       return; // TODO: Handle vector->subelement unmerges
545 
546     // Figure out the result operand index
547     unsigned DstIdx = 0;
548     for (; DstIdx != NumOps - 1 && MI.getOperand(DstIdx).getReg() != R;
549          ++DstIdx)
550       ;
551 
552     APInt SubDemandedElts = DemandedElts;
553     if (SrcTy.isVector()) {
554       unsigned DstLanes = DstTy.isVector() ? DstTy.getNumElements() : 1;
555       SubDemandedElts =
556           DemandedElts.zext(SrcTy.getNumElements()).shl(DstIdx * DstLanes);
557     }
558 
559     KnownBits SrcOpKnown;
560     computeKnownBitsImpl(SrcReg, SrcOpKnown, SubDemandedElts, Depth + 1);
561 
562     if (SrcTy.isVector())
563       Known = std::move(SrcOpKnown);
564     else
565       Known = SrcOpKnown.extractBits(BitWidth, BitWidth * DstIdx);
566     break;
567   }
568   case TargetOpcode::G_BSWAP: {
569     Register SrcReg = MI.getOperand(1).getReg();
570     computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1);
571     Known = Known.byteSwap();
572     break;
573   }
574   case TargetOpcode::G_BITREVERSE: {
575     Register SrcReg = MI.getOperand(1).getReg();
576     computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1);
577     Known = Known.reverseBits();
578     break;
579   }
580   case TargetOpcode::G_CTPOP: {
581     computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
582                          Depth + 1);
583     // We can bound the space the count needs.  Also, bits known to be zero
584     // can't contribute to the population.
585     unsigned BitsPossiblySet = Known2.countMaxPopulation();
586     unsigned LowBits = llvm::bit_width(BitsPossiblySet);
587     Known.Zero.setBitsFrom(LowBits);
588     // TODO: we could bound Known.One using the lower bound on the number of
589     // bits which might be set provided by popcnt KnownOne2.
590     break;
591   }
592   case TargetOpcode::G_UBFX: {
593     KnownBits SrcOpKnown, OffsetKnown, WidthKnown;
594     computeKnownBitsImpl(MI.getOperand(1).getReg(), SrcOpKnown, DemandedElts,
595                          Depth + 1);
596     computeKnownBitsImpl(MI.getOperand(2).getReg(), OffsetKnown, DemandedElts,
597                          Depth + 1);
598     computeKnownBitsImpl(MI.getOperand(3).getReg(), WidthKnown, DemandedElts,
599                          Depth + 1);
600     Known = extractBits(BitWidth, SrcOpKnown, OffsetKnown, WidthKnown);
601     break;
602   }
603   case TargetOpcode::G_SBFX: {
604     KnownBits SrcOpKnown, OffsetKnown, WidthKnown;
605     computeKnownBitsImpl(MI.getOperand(1).getReg(), SrcOpKnown, DemandedElts,
606                          Depth + 1);
607     computeKnownBitsImpl(MI.getOperand(2).getReg(), OffsetKnown, DemandedElts,
608                          Depth + 1);
609     computeKnownBitsImpl(MI.getOperand(3).getReg(), WidthKnown, DemandedElts,
610                          Depth + 1);
611     Known = extractBits(BitWidth, SrcOpKnown, OffsetKnown, WidthKnown);
612     // Sign extend the extracted value using shift left and arithmetic shift
613     // right.
614     KnownBits ExtKnown = KnownBits::makeConstant(APInt(BitWidth, BitWidth));
615     KnownBits ShiftKnown = KnownBits::sub(ExtKnown, WidthKnown);
616     Known = KnownBits::ashr(KnownBits::shl(Known, ShiftKnown), ShiftKnown);
617     break;
618   }
619   case TargetOpcode::G_UADDO:
620   case TargetOpcode::G_UADDE:
621   case TargetOpcode::G_SADDO:
622   case TargetOpcode::G_SADDE:
623   case TargetOpcode::G_USUBO:
624   case TargetOpcode::G_USUBE:
625   case TargetOpcode::G_SSUBO:
626   case TargetOpcode::G_SSUBE:
627   case TargetOpcode::G_UMULO:
628   case TargetOpcode::G_SMULO: {
629     if (MI.getOperand(1).getReg() == R) {
630       // If we know the result of a compare has the top bits zero, use this
631       // info.
632       if (TL.getBooleanContents(DstTy.isVector(), false) ==
633               TargetLowering::ZeroOrOneBooleanContent &&
634           BitWidth > 1)
635         Known.Zero.setBitsFrom(1);
636     }
637     break;
638   }
639   case TargetOpcode::G_CTLZ:
640   case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
641     KnownBits SrcOpKnown;
642     computeKnownBitsImpl(MI.getOperand(1).getReg(), SrcOpKnown, DemandedElts,
643                          Depth + 1);
644     // If we have a known 1, its position is our upper bound.
645     unsigned PossibleLZ = SrcOpKnown.countMaxLeadingZeros();
646     unsigned LowBits = llvm::bit_width(PossibleLZ);
647     Known.Zero.setBitsFrom(LowBits);
648     break;
649   }
650   case TargetOpcode::G_SHUFFLE_VECTOR: {
651     APInt DemandedLHS, DemandedRHS;
652     // Collect the known bits that are shared by every vector element referenced
653     // by the shuffle.
654     unsigned NumElts = MRI.getType(MI.getOperand(1).getReg()).getNumElements();
655     if (!getShuffleDemandedElts(NumElts, MI.getOperand(3).getShuffleMask(),
656                                 DemandedElts, DemandedLHS, DemandedRHS))
657       break;
658 
659     // Known bits are the values that are shared by every demanded element.
660     Known.Zero.setAllBits();
661     Known.One.setAllBits();
662     if (!!DemandedLHS) {
663       computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedLHS,
664                            Depth + 1);
665       Known = Known.intersectWith(Known2);
666     }
667     // If we don't know any bits, early out.
668     if (Known.isUnknown())
669       break;
670     if (!!DemandedRHS) {
671       computeKnownBitsImpl(MI.getOperand(2).getReg(), Known2, DemandedRHS,
672                            Depth + 1);
673       Known = Known.intersectWith(Known2);
674     }
675     break;
676   }
677   case TargetOpcode::G_CONCAT_VECTORS: {
678     if (MRI.getType(MI.getOperand(0).getReg()).isScalableVector())
679       break;
680     // Split DemandedElts and test each of the demanded subvectors.
681     Known.Zero.setAllBits();
682     Known.One.setAllBits();
683     unsigned NumSubVectorElts =
684         MRI.getType(MI.getOperand(1).getReg()).getNumElements();
685 
686     for (const auto &[I, MO] : enumerate(drop_begin(MI.operands()))) {
687       APInt DemandedSub =
688           DemandedElts.extractBits(NumSubVectorElts, I * NumSubVectorElts);
689       if (!!DemandedSub) {
690         computeKnownBitsImpl(MO.getReg(), Known2, DemandedSub, Depth + 1);
691 
692         Known = Known.intersectWith(Known2);
693       }
694       // If we don't know any bits, early out.
695       if (Known.isUnknown())
696         break;
697     }
698     break;
699   }
700   }
701 
702   LLVM_DEBUG(dumpResult(MI, Known, Depth));
703 
704   // Update the cache.
705   ComputeKnownBitsCache[R] = Known;
706 }
707 
708 static bool outputDenormalIsIEEEOrPosZero(const MachineFunction &MF, LLT Ty) {
709   Ty = Ty.getScalarType();
710   DenormalMode Mode = MF.getDenormalMode(getFltSemanticForLLT(Ty));
711   return Mode.Output == DenormalMode::IEEE ||
712          Mode.Output == DenormalMode::PositiveZero;
713 }
714 
715 void GISelValueTracking::computeKnownFPClass(Register R, KnownFPClass &Known,
716                                              FPClassTest InterestedClasses,
717                                              unsigned Depth) {
718   LLT Ty = MRI.getType(R);
719   APInt DemandedElts =
720       Ty.isFixedVector() ? APInt::getAllOnes(Ty.getNumElements()) : APInt(1, 1);
721   computeKnownFPClass(R, DemandedElts, InterestedClasses, Known, Depth);
722 }
723 
724 void GISelValueTracking::computeKnownFPClassForFPTrunc(
725     const MachineInstr &MI, const APInt &DemandedElts,
726     FPClassTest InterestedClasses, KnownFPClass &Known, unsigned Depth) {
727   if ((InterestedClasses & (KnownFPClass::OrderedLessThanZeroMask | fcNan)) ==
728       fcNone)
729     return;
730 
731   Register Val = MI.getOperand(1).getReg();
732   KnownFPClass KnownSrc;
733   computeKnownFPClass(Val, DemandedElts, InterestedClasses, KnownSrc,
734                       Depth + 1);
735 
736   // Sign should be preserved
737   // TODO: Handle cannot be ordered greater than zero
738   if (KnownSrc.cannotBeOrderedLessThanZero())
739     Known.knownNot(KnownFPClass::OrderedLessThanZeroMask);
740 
741   Known.propagateNaN(KnownSrc, true);
742 
743   // Infinity needs a range check.
744 }
745 
746 void GISelValueTracking::computeKnownFPClass(Register R,
747                                              const APInt &DemandedElts,
748                                              FPClassTest InterestedClasses,
749                                              KnownFPClass &Known,
750                                              unsigned Depth) {
751   assert(Known.isUnknown() && "should not be called with known information");
752 
753   if (!DemandedElts) {
754     // No demanded elts, better to assume we don't know anything.
755     Known.resetAll();
756     return;
757   }
758 
759   assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
760 
761   MachineInstr &MI = *MRI.getVRegDef(R);
762   unsigned Opcode = MI.getOpcode();
763   LLT DstTy = MRI.getType(R);
764 
765   if (!DstTy.isValid()) {
766     Known.resetAll();
767     return;
768   }
769 
770   if (auto Cst = GFConstant::getConstant(R, MRI)) {
771     switch (Cst->getKind()) {
772     case GFConstant::GFConstantKind::Scalar: {
773       auto APF = Cst->getScalarValue();
774       Known.KnownFPClasses = APF.classify();
775       Known.SignBit = APF.isNegative();
776       break;
777     }
778     case GFConstant::GFConstantKind::FixedVector: {
779       Known.KnownFPClasses = fcNone;
780       bool SignBitAllZero = true;
781       bool SignBitAllOne = true;
782 
783       for (auto C : *Cst) {
784         Known.KnownFPClasses |= C.classify();
785         if (C.isNegative())
786           SignBitAllZero = false;
787         else
788           SignBitAllOne = false;
789       }
790 
791       if (SignBitAllOne != SignBitAllZero)
792         Known.SignBit = SignBitAllOne;
793 
794       break;
795     }
796     case GFConstant::GFConstantKind::ScalableVector: {
797       Known.resetAll();
798       break;
799     }
800     }
801 
802     return;
803   }
804 
805   FPClassTest KnownNotFromFlags = fcNone;
806   if (MI.getFlag(MachineInstr::MIFlag::FmNoNans))
807     KnownNotFromFlags |= fcNan;
808   if (MI.getFlag(MachineInstr::MIFlag::FmNoInfs))
809     KnownNotFromFlags |= fcInf;
810 
811   // We no longer need to find out about these bits from inputs if we can
812   // assume this from flags/attributes.
813   InterestedClasses &= ~KnownNotFromFlags;
814 
815   auto ClearClassesFromFlags =
816       make_scope_exit([=, &Known] { Known.knownNot(KnownNotFromFlags); });
817 
818   // All recursive calls that increase depth must come after this.
819   if (Depth == MaxAnalysisRecursionDepth)
820     return;
821 
822   const MachineFunction *MF = MI.getMF();
823 
824   switch (Opcode) {
825   default:
826     TL.computeKnownFPClassForTargetInstr(*this, R, Known, DemandedElts, MRI,
827                                          Depth);
828     break;
829   case TargetOpcode::G_FNEG: {
830     Register Val = MI.getOperand(1).getReg();
831     computeKnownFPClass(Val, DemandedElts, InterestedClasses, Known, Depth + 1);
832     Known.fneg();
833     break;
834   }
835   case TargetOpcode::G_SELECT: {
836     GSelect &SelMI = cast<GSelect>(MI);
837     Register Cond = SelMI.getCondReg();
838     Register LHS = SelMI.getTrueReg();
839     Register RHS = SelMI.getFalseReg();
840 
841     FPClassTest FilterLHS = fcAllFlags;
842     FPClassTest FilterRHS = fcAllFlags;
843 
844     Register TestedValue;
845     FPClassTest MaskIfTrue = fcAllFlags;
846     FPClassTest MaskIfFalse = fcAllFlags;
847     FPClassTest ClassVal = fcNone;
848 
849     CmpInst::Predicate Pred;
850     Register CmpLHS, CmpRHS;
851     if (mi_match(Cond, MRI,
852                  m_GFCmp(m_Pred(Pred), m_Reg(CmpLHS), m_Reg(CmpRHS)))) {
853       // If the select filters out a value based on the class, it no longer
854       // participates in the class of the result
855 
856       // TODO: In some degenerate cases we can infer something if we try again
857       // without looking through sign operations.
858       bool LookThroughFAbsFNeg = CmpLHS != LHS && CmpLHS != RHS;
859       std::tie(TestedValue, MaskIfTrue, MaskIfFalse) =
860           fcmpImpliesClass(Pred, *MF, CmpLHS, CmpRHS, LookThroughFAbsFNeg);
861     } else if (mi_match(
862                    Cond, MRI,
863                    m_GIsFPClass(m_Reg(TestedValue), m_FPClassTest(ClassVal)))) {
864       FPClassTest TestedMask = ClassVal;
865       MaskIfTrue = TestedMask;
866       MaskIfFalse = ~TestedMask;
867     }
868 
869     if (TestedValue == LHS) {
870       // match !isnan(x) ? x : y
871       FilterLHS = MaskIfTrue;
872     } else if (TestedValue == RHS) { // && IsExactClass
873       // match !isnan(x) ? y : x
874       FilterRHS = MaskIfFalse;
875     }
876 
877     KnownFPClass Known2;
878     computeKnownFPClass(LHS, DemandedElts, InterestedClasses & FilterLHS, Known,
879                         Depth + 1);
880     Known.KnownFPClasses &= FilterLHS;
881 
882     computeKnownFPClass(RHS, DemandedElts, InterestedClasses & FilterRHS,
883                         Known2, Depth + 1);
884     Known2.KnownFPClasses &= FilterRHS;
885 
886     Known |= Known2;
887     break;
888   }
889   case TargetOpcode::G_FCOPYSIGN: {
890     Register Magnitude = MI.getOperand(1).getReg();
891     Register Sign = MI.getOperand(2).getReg();
892 
893     KnownFPClass KnownSign;
894 
895     computeKnownFPClass(Magnitude, DemandedElts, InterestedClasses, Known,
896                         Depth + 1);
897     computeKnownFPClass(Sign, DemandedElts, InterestedClasses, KnownSign,
898                         Depth + 1);
899     Known.copysign(KnownSign);
900     break;
901   }
902   case TargetOpcode::G_FMA:
903   case TargetOpcode::G_STRICT_FMA:
904   case TargetOpcode::G_FMAD: {
905     if ((InterestedClasses & fcNegative) == fcNone)
906       break;
907 
908     Register A = MI.getOperand(1).getReg();
909     Register B = MI.getOperand(2).getReg();
910     Register C = MI.getOperand(3).getReg();
911 
912     if (A != B)
913       break;
914 
915     // The multiply cannot be -0 and therefore the add can't be -0
916     Known.knownNot(fcNegZero);
917 
918     // x * x + y is non-negative if y is non-negative.
919     KnownFPClass KnownAddend;
920     computeKnownFPClass(C, DemandedElts, InterestedClasses, KnownAddend,
921                         Depth + 1);
922 
923     if (KnownAddend.cannotBeOrderedLessThanZero())
924       Known.knownNot(fcNegative);
925     break;
926   }
927   case TargetOpcode::G_FSQRT:
928   case TargetOpcode::G_STRICT_FSQRT: {
929     KnownFPClass KnownSrc;
930     FPClassTest InterestedSrcs = InterestedClasses;
931     if (InterestedClasses & fcNan)
932       InterestedSrcs |= KnownFPClass::OrderedLessThanZeroMask;
933 
934     Register Val = MI.getOperand(1).getReg();
935 
936     computeKnownFPClass(Val, DemandedElts, InterestedSrcs, KnownSrc, Depth + 1);
937 
938     if (KnownSrc.isKnownNeverPosInfinity())
939       Known.knownNot(fcPosInf);
940     if (KnownSrc.isKnownNever(fcSNan))
941       Known.knownNot(fcSNan);
942 
943     // Any negative value besides -0 returns a nan.
944     if (KnownSrc.isKnownNeverNaN() && KnownSrc.cannotBeOrderedLessThanZero())
945       Known.knownNot(fcNan);
946 
947     // The only negative value that can be returned is -0 for -0 inputs.
948     Known.knownNot(fcNegInf | fcNegSubnormal | fcNegNormal);
949     break;
950   }
951   case TargetOpcode::G_FABS: {
952     if ((InterestedClasses & (fcNan | fcPositive)) != fcNone) {
953       Register Val = MI.getOperand(1).getReg();
954       // If we only care about the sign bit we don't need to inspect the
955       // operand.
956       computeKnownFPClass(Val, DemandedElts, InterestedClasses, Known,
957                           Depth + 1);
958     }
959     Known.fabs();
960     break;
961   }
962   case TargetOpcode::G_FSIN:
963   case TargetOpcode::G_FCOS:
964   case TargetOpcode::G_FSINCOS: {
965     // Return NaN on infinite inputs.
966     Register Val = MI.getOperand(1).getReg();
967     KnownFPClass KnownSrc;
968 
969     computeKnownFPClass(Val, DemandedElts, InterestedClasses, KnownSrc,
970                         Depth + 1);
971     Known.knownNot(fcInf);
972 
973     if (KnownSrc.isKnownNeverNaN() && KnownSrc.isKnownNeverInfinity())
974       Known.knownNot(fcNan);
975     break;
976   }
977   case TargetOpcode::G_FMAXNUM:
978   case TargetOpcode::G_FMINNUM:
979   case TargetOpcode::G_FMINNUM_IEEE:
980   case TargetOpcode::G_FMAXIMUM:
981   case TargetOpcode::G_FMINIMUM:
982   case TargetOpcode::G_FMAXNUM_IEEE:
983   case TargetOpcode::G_FMAXIMUMNUM:
984   case TargetOpcode::G_FMINIMUMNUM: {
985     Register LHS = MI.getOperand(1).getReg();
986     Register RHS = MI.getOperand(2).getReg();
987     KnownFPClass KnownLHS, KnownRHS;
988 
989     computeKnownFPClass(LHS, DemandedElts, InterestedClasses, KnownLHS,
990                         Depth + 1);
991     computeKnownFPClass(RHS, DemandedElts, InterestedClasses, KnownRHS,
992                         Depth + 1);
993 
994     bool NeverNaN = KnownLHS.isKnownNeverNaN() || KnownRHS.isKnownNeverNaN();
995     Known = KnownLHS | KnownRHS;
996 
997     // If either operand is not NaN, the result is not NaN.
998     if (NeverNaN && (Opcode == TargetOpcode::G_FMINNUM ||
999                      Opcode == TargetOpcode::G_FMAXNUM ||
1000                      Opcode == TargetOpcode::G_FMINIMUMNUM ||
1001                      Opcode == TargetOpcode::G_FMAXIMUMNUM))
1002       Known.knownNot(fcNan);
1003 
1004     if (Opcode == TargetOpcode::G_FMAXNUM ||
1005         Opcode == TargetOpcode::G_FMAXIMUMNUM ||
1006         Opcode == TargetOpcode::G_FMAXNUM_IEEE) {
1007       // If at least one operand is known to be positive, the result must be
1008       // positive.
1009       if ((KnownLHS.cannotBeOrderedLessThanZero() &&
1010            KnownLHS.isKnownNeverNaN()) ||
1011           (KnownRHS.cannotBeOrderedLessThanZero() &&
1012            KnownRHS.isKnownNeverNaN()))
1013         Known.knownNot(KnownFPClass::OrderedLessThanZeroMask);
1014     } else if (Opcode == TargetOpcode::G_FMAXIMUM) {
1015       // If at least one operand is known to be positive, the result must be
1016       // positive.
1017       if (KnownLHS.cannotBeOrderedLessThanZero() ||
1018           KnownRHS.cannotBeOrderedLessThanZero())
1019         Known.knownNot(KnownFPClass::OrderedLessThanZeroMask);
1020     } else if (Opcode == TargetOpcode::G_FMINNUM ||
1021                Opcode == TargetOpcode::G_FMINIMUMNUM ||
1022                Opcode == TargetOpcode::G_FMINNUM_IEEE) {
1023       // If at least one operand is known to be negative, the result must be
1024       // negative.
1025       if ((KnownLHS.cannotBeOrderedGreaterThanZero() &&
1026            KnownLHS.isKnownNeverNaN()) ||
1027           (KnownRHS.cannotBeOrderedGreaterThanZero() &&
1028            KnownRHS.isKnownNeverNaN()))
1029         Known.knownNot(KnownFPClass::OrderedGreaterThanZeroMask);
1030     } else if (Opcode == TargetOpcode::G_FMINIMUM) {
1031       // If at least one operand is known to be negative, the result must be
1032       // negative.
1033       if (KnownLHS.cannotBeOrderedGreaterThanZero() ||
1034           KnownRHS.cannotBeOrderedGreaterThanZero())
1035         Known.knownNot(KnownFPClass::OrderedGreaterThanZeroMask);
1036     } else {
1037       llvm_unreachable("unhandled intrinsic");
1038     }
1039 
1040     // Fixup zero handling if denormals could be returned as a zero.
1041     //
1042     // As there's no spec for denormal flushing, be conservative with the
1043     // treatment of denormals that could be flushed to zero. For older
1044     // subtargets on AMDGPU the min/max instructions would not flush the
1045     // output and return the original value.
1046     //
1047     if ((Known.KnownFPClasses & fcZero) != fcNone &&
1048         !Known.isKnownNeverSubnormal()) {
1049       DenormalMode Mode =
1050           MF->getDenormalMode(getFltSemanticForLLT(DstTy.getScalarType()));
1051       if (Mode != DenormalMode::getIEEE())
1052         Known.KnownFPClasses |= fcZero;
1053     }
1054 
1055     if (Known.isKnownNeverNaN()) {
1056       if (KnownLHS.SignBit && KnownRHS.SignBit &&
1057           *KnownLHS.SignBit == *KnownRHS.SignBit) {
1058         if (*KnownLHS.SignBit)
1059           Known.signBitMustBeOne();
1060         else
1061           Known.signBitMustBeZero();
1062       } else if ((Opcode == TargetOpcode::G_FMAXIMUM ||
1063                   Opcode == TargetOpcode::G_FMINIMUM) ||
1064                  Opcode == TargetOpcode::G_FMAXIMUMNUM ||
1065                  Opcode == TargetOpcode::G_FMINIMUMNUM ||
1066                  Opcode == TargetOpcode::G_FMAXNUM_IEEE ||
1067                  Opcode == TargetOpcode::G_FMINNUM_IEEE ||
1068                  // FIXME: Should be using logical zero versions
1069                  ((KnownLHS.isKnownNeverNegZero() ||
1070                    KnownRHS.isKnownNeverPosZero()) &&
1071                   (KnownLHS.isKnownNeverPosZero() ||
1072                    KnownRHS.isKnownNeverNegZero()))) {
1073         if ((Opcode == TargetOpcode::G_FMAXIMUM ||
1074              Opcode == TargetOpcode::G_FMAXNUM ||
1075              Opcode == TargetOpcode::G_FMAXIMUMNUM ||
1076              Opcode == TargetOpcode::G_FMAXNUM_IEEE) &&
1077             (KnownLHS.SignBit == false || KnownRHS.SignBit == false))
1078           Known.signBitMustBeZero();
1079         else if ((Opcode == TargetOpcode::G_FMINIMUM ||
1080                   Opcode == TargetOpcode::G_FMINNUM ||
1081                   Opcode == TargetOpcode::G_FMINIMUMNUM ||
1082                   Opcode == TargetOpcode::G_FMINNUM_IEEE) &&
1083                  (KnownLHS.SignBit == true || KnownRHS.SignBit == true))
1084           Known.signBitMustBeOne();
1085       }
1086     }
1087     break;
1088   }
1089   case TargetOpcode::G_FCANONICALIZE: {
1090     Register Val = MI.getOperand(1).getReg();
1091     KnownFPClass KnownSrc;
1092     computeKnownFPClass(Val, DemandedElts, InterestedClasses, KnownSrc,
1093                         Depth + 1);
1094 
1095     // This is essentially a stronger form of
1096     // propagateCanonicalizingSrc. Other "canonicalizing" operations don't
1097     // actually have an IR canonicalization guarantee.
1098 
1099     // Canonicalize may flush denormals to zero, so we have to consider the
1100     // denormal mode to preserve known-not-0 knowledge.
1101     Known.KnownFPClasses = KnownSrc.KnownFPClasses | fcZero | fcQNan;
1102 
1103     // Stronger version of propagateNaN
1104     // Canonicalize is guaranteed to quiet signaling nans.
1105     if (KnownSrc.isKnownNeverNaN())
1106       Known.knownNot(fcNan);
1107     else
1108       Known.knownNot(fcSNan);
1109 
1110     // If the parent function flushes denormals, the canonical output cannot
1111     // be a denormal.
1112     LLT Ty = MRI.getType(Val).getScalarType();
1113     const fltSemantics &FPType = getFltSemanticForLLT(Ty);
1114     DenormalMode DenormMode = MF->getDenormalMode(FPType);
1115     if (DenormMode == DenormalMode::getIEEE()) {
1116       if (KnownSrc.isKnownNever(fcPosZero))
1117         Known.knownNot(fcPosZero);
1118       if (KnownSrc.isKnownNever(fcNegZero))
1119         Known.knownNot(fcNegZero);
1120       break;
1121     }
1122 
1123     if (DenormMode.inputsAreZero() || DenormMode.outputsAreZero())
1124       Known.knownNot(fcSubnormal);
1125 
1126     if (DenormMode.Input == DenormalMode::PositiveZero ||
1127         (DenormMode.Output == DenormalMode::PositiveZero &&
1128          DenormMode.Input == DenormalMode::IEEE))
1129       Known.knownNot(fcNegZero);
1130 
1131     break;
1132   }
1133   case TargetOpcode::G_VECREDUCE_FMAX:
1134   case TargetOpcode::G_VECREDUCE_FMIN:
1135   case TargetOpcode::G_VECREDUCE_FMAXIMUM:
1136   case TargetOpcode::G_VECREDUCE_FMINIMUM: {
1137     Register Val = MI.getOperand(1).getReg();
1138     // reduce min/max will choose an element from one of the vector elements,
1139     // so we can infer and class information that is common to all elements.
1140 
1141     Known =
1142         computeKnownFPClass(Val, MI.getFlags(), InterestedClasses, Depth + 1);
1143     // Can only propagate sign if output is never NaN.
1144     if (!Known.isKnownNeverNaN())
1145       Known.SignBit.reset();
1146     break;
1147   }
1148   case TargetOpcode::G_TRUNC:
1149   case TargetOpcode::G_FFLOOR:
1150   case TargetOpcode::G_FCEIL:
1151   case TargetOpcode::G_FRINT:
1152   case TargetOpcode::G_FNEARBYINT:
1153   case TargetOpcode::G_INTRINSIC_FPTRUNC_ROUND:
1154   case TargetOpcode::G_INTRINSIC_ROUND: {
1155     Register Val = MI.getOperand(1).getReg();
1156     KnownFPClass KnownSrc;
1157     FPClassTest InterestedSrcs = InterestedClasses;
1158     if (InterestedSrcs & fcPosFinite)
1159       InterestedSrcs |= fcPosFinite;
1160     if (InterestedSrcs & fcNegFinite)
1161       InterestedSrcs |= fcNegFinite;
1162     computeKnownFPClass(Val, DemandedElts, InterestedSrcs, KnownSrc, Depth + 1);
1163 
1164     // Integer results cannot be subnormal.
1165     Known.knownNot(fcSubnormal);
1166 
1167     Known.propagateNaN(KnownSrc, true);
1168 
1169     // TODO: handle multi unit FPTypes once LLT FPInfo lands
1170 
1171     // Negative round ups to 0 produce -0
1172     if (KnownSrc.isKnownNever(fcPosFinite))
1173       Known.knownNot(fcPosFinite);
1174     if (KnownSrc.isKnownNever(fcNegFinite))
1175       Known.knownNot(fcNegFinite);
1176 
1177     break;
1178   }
1179   case TargetOpcode::G_FEXP:
1180   case TargetOpcode::G_FEXP2:
1181   case TargetOpcode::G_FEXP10: {
1182     Known.knownNot(fcNegative);
1183     if ((InterestedClasses & fcNan) == fcNone)
1184       break;
1185 
1186     Register Val = MI.getOperand(1).getReg();
1187     KnownFPClass KnownSrc;
1188     computeKnownFPClass(Val, DemandedElts, InterestedClasses, KnownSrc,
1189                         Depth + 1);
1190     if (KnownSrc.isKnownNeverNaN()) {
1191       Known.knownNot(fcNan);
1192       Known.signBitMustBeZero();
1193     }
1194 
1195     break;
1196   }
1197   case TargetOpcode::G_FLOG:
1198   case TargetOpcode::G_FLOG2:
1199   case TargetOpcode::G_FLOG10: {
1200     // log(+inf) -> +inf
1201     // log([+-]0.0) -> -inf
1202     // log(-inf) -> nan
1203     // log(-x) -> nan
1204     if ((InterestedClasses & (fcNan | fcInf)) == fcNone)
1205       break;
1206 
1207     FPClassTest InterestedSrcs = InterestedClasses;
1208     if ((InterestedClasses & fcNegInf) != fcNone)
1209       InterestedSrcs |= fcZero | fcSubnormal;
1210     if ((InterestedClasses & fcNan) != fcNone)
1211       InterestedSrcs |= fcNan | (fcNegative & ~fcNan);
1212 
1213     Register Val = MI.getOperand(1).getReg();
1214     KnownFPClass KnownSrc;
1215     computeKnownFPClass(Val, DemandedElts, InterestedSrcs, KnownSrc, Depth + 1);
1216 
1217     if (KnownSrc.isKnownNeverPosInfinity())
1218       Known.knownNot(fcPosInf);
1219 
1220     if (KnownSrc.isKnownNeverNaN() && KnownSrc.cannotBeOrderedLessThanZero())
1221       Known.knownNot(fcNan);
1222 
1223     LLT Ty = MRI.getType(Val).getScalarType();
1224     const fltSemantics &FltSem = getFltSemanticForLLT(Ty);
1225     DenormalMode Mode = MF->getDenormalMode(FltSem);
1226 
1227     if (KnownSrc.isKnownNeverLogicalZero(Mode))
1228       Known.knownNot(fcNegInf);
1229 
1230     break;
1231   }
1232   case TargetOpcode::G_FPOWI: {
1233     if ((InterestedClasses & fcNegative) == fcNone)
1234       break;
1235 
1236     Register Exp = MI.getOperand(2).getReg();
1237     LLT ExpTy = MRI.getType(Exp);
1238     KnownBits ExponentKnownBits = getKnownBits(
1239         Exp, ExpTy.isVector() ? DemandedElts : APInt(1, 1), Depth + 1);
1240 
1241     if (ExponentKnownBits.Zero[0]) { // Is even
1242       Known.knownNot(fcNegative);
1243       break;
1244     }
1245 
1246     // Given that exp is an integer, here are the
1247     // ways that pow can return a negative value:
1248     //
1249     //   pow(-x, exp)   --> negative if exp is odd and x is negative.
1250     //   pow(-0, exp)   --> -inf if exp is negative odd.
1251     //   pow(-0, exp)   --> -0 if exp is positive odd.
1252     //   pow(-inf, exp) --> -0 if exp is negative odd.
1253     //   pow(-inf, exp) --> -inf if exp is positive odd.
1254     Register Val = MI.getOperand(1).getReg();
1255     KnownFPClass KnownSrc;
1256     computeKnownFPClass(Val, DemandedElts, fcNegative, KnownSrc, Depth + 1);
1257     if (KnownSrc.isKnownNever(fcNegative))
1258       Known.knownNot(fcNegative);
1259     break;
1260   }
1261   case TargetOpcode::G_FLDEXP:
1262   case TargetOpcode::G_STRICT_FLDEXP: {
1263     Register Val = MI.getOperand(1).getReg();
1264     KnownFPClass KnownSrc;
1265     computeKnownFPClass(Val, DemandedElts, InterestedClasses, KnownSrc,
1266                         Depth + 1);
1267     Known.propagateNaN(KnownSrc, /*PropagateSign=*/true);
1268 
1269     // Sign is preserved, but underflows may produce zeroes.
1270     if (KnownSrc.isKnownNever(fcNegative))
1271       Known.knownNot(fcNegative);
1272     else if (KnownSrc.cannotBeOrderedLessThanZero())
1273       Known.knownNot(KnownFPClass::OrderedLessThanZeroMask);
1274 
1275     if (KnownSrc.isKnownNever(fcPositive))
1276       Known.knownNot(fcPositive);
1277     else if (KnownSrc.cannotBeOrderedGreaterThanZero())
1278       Known.knownNot(KnownFPClass::OrderedGreaterThanZeroMask);
1279 
1280     // Can refine inf/zero handling based on the exponent operand.
1281     const FPClassTest ExpInfoMask = fcZero | fcSubnormal | fcInf;
1282     if ((InterestedClasses & ExpInfoMask) == fcNone)
1283       break;
1284     if ((KnownSrc.KnownFPClasses & ExpInfoMask) == fcNone)
1285       break;
1286 
1287     // TODO: Handle constant range of Exp
1288 
1289     break;
1290   }
1291   case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
1292     computeKnownFPClassForFPTrunc(MI, DemandedElts, InterestedClasses, Known,
1293                                   Depth);
1294     break;
1295   }
1296   case TargetOpcode::G_FADD:
1297   case TargetOpcode::G_STRICT_FADD:
1298   case TargetOpcode::G_FSUB:
1299   case TargetOpcode::G_STRICT_FSUB: {
1300     Register LHS = MI.getOperand(1).getReg();
1301     Register RHS = MI.getOperand(2).getReg();
1302     KnownFPClass KnownLHS, KnownRHS;
1303     bool WantNegative =
1304         (Opcode == TargetOpcode::G_FADD ||
1305          Opcode == TargetOpcode::G_STRICT_FADD) &&
1306         (InterestedClasses & KnownFPClass::OrderedLessThanZeroMask) != fcNone;
1307     bool WantNaN = (InterestedClasses & fcNan) != fcNone;
1308     bool WantNegZero = (InterestedClasses & fcNegZero) != fcNone;
1309 
1310     if (!WantNaN && !WantNegative && !WantNegZero)
1311       break;
1312 
1313     FPClassTest InterestedSrcs = InterestedClasses;
1314     if (WantNegative)
1315       InterestedSrcs |= KnownFPClass::OrderedLessThanZeroMask;
1316     if (InterestedClasses & fcNan)
1317       InterestedSrcs |= fcInf;
1318     computeKnownFPClass(RHS, DemandedElts, InterestedSrcs, KnownRHS, Depth + 1);
1319 
1320     if ((WantNaN && KnownRHS.isKnownNeverNaN()) ||
1321         (WantNegative && KnownRHS.cannotBeOrderedLessThanZero()) ||
1322         WantNegZero ||
1323         (Opcode == TargetOpcode::G_FSUB ||
1324          Opcode == TargetOpcode::G_STRICT_FSUB)) {
1325 
1326       // RHS is canonically cheaper to compute. Skip inspecting the LHS if
1327       // there's no point.
1328       computeKnownFPClass(LHS, DemandedElts, InterestedSrcs, KnownLHS,
1329                           Depth + 1);
1330       // Adding positive and negative infinity produces NaN.
1331       // TODO: Check sign of infinities.
1332       if (KnownLHS.isKnownNeverNaN() && KnownRHS.isKnownNeverNaN() &&
1333           (KnownLHS.isKnownNeverInfinity() || KnownRHS.isKnownNeverInfinity()))
1334         Known.knownNot(fcNan);
1335 
1336       if (Opcode == Instruction::FAdd) {
1337         if (KnownLHS.cannotBeOrderedLessThanZero() &&
1338             KnownRHS.cannotBeOrderedLessThanZero())
1339           Known.knownNot(KnownFPClass::OrderedLessThanZeroMask);
1340 
1341         // (fadd x, 0.0) is guaranteed to return +0.0, not -0.0.
1342         if ((KnownLHS.isKnownNeverLogicalNegZero(MF->getDenormalMode(
1343                  getFltSemanticForLLT(DstTy.getScalarType()))) ||
1344              KnownRHS.isKnownNeverLogicalNegZero(MF->getDenormalMode(
1345                  getFltSemanticForLLT(DstTy.getScalarType())))) &&
1346             // Make sure output negative denormal can't flush to -0
1347             outputDenormalIsIEEEOrPosZero(*MF, DstTy))
1348           Known.knownNot(fcNegZero);
1349       } else {
1350         // Only fsub -0, +0 can return -0
1351         if ((KnownLHS.isKnownNeverLogicalNegZero(MF->getDenormalMode(
1352                  getFltSemanticForLLT(DstTy.getScalarType()))) ||
1353              KnownRHS.isKnownNeverLogicalPosZero(MF->getDenormalMode(
1354                  getFltSemanticForLLT(DstTy.getScalarType())))) &&
1355             // Make sure output negative denormal can't flush to -0
1356             outputDenormalIsIEEEOrPosZero(*MF, DstTy))
1357           Known.knownNot(fcNegZero);
1358       }
1359     }
1360 
1361     break;
1362   }
1363   case TargetOpcode::G_FMUL:
1364   case TargetOpcode::G_STRICT_FMUL: {
1365     Register LHS = MI.getOperand(1).getReg();
1366     Register RHS = MI.getOperand(2).getReg();
1367     // X * X is always non-negative or a NaN.
1368     if (LHS == RHS)
1369       Known.knownNot(fcNegative);
1370 
1371     if ((InterestedClasses & fcNan) != fcNan)
1372       break;
1373 
1374     // fcSubnormal is only needed in case of DAZ.
1375     const FPClassTest NeedForNan = fcNan | fcInf | fcZero | fcSubnormal;
1376 
1377     KnownFPClass KnownLHS, KnownRHS;
1378     computeKnownFPClass(RHS, DemandedElts, NeedForNan, KnownRHS, Depth + 1);
1379     if (!KnownRHS.isKnownNeverNaN())
1380       break;
1381 
1382     computeKnownFPClass(LHS, DemandedElts, NeedForNan, KnownLHS, Depth + 1);
1383     if (!KnownLHS.isKnownNeverNaN())
1384       break;
1385 
1386     if (KnownLHS.SignBit && KnownRHS.SignBit) {
1387       if (*KnownLHS.SignBit == *KnownRHS.SignBit)
1388         Known.signBitMustBeZero();
1389       else
1390         Known.signBitMustBeOne();
1391     }
1392 
1393     // If 0 * +/-inf produces NaN.
1394     if (KnownLHS.isKnownNeverInfinity() && KnownRHS.isKnownNeverInfinity()) {
1395       Known.knownNot(fcNan);
1396       break;
1397     }
1398 
1399     if ((KnownRHS.isKnownNeverInfinity() ||
1400          KnownLHS.isKnownNeverLogicalZero(MF->getDenormalMode(
1401              getFltSemanticForLLT(DstTy.getScalarType())))) &&
1402         (KnownLHS.isKnownNeverInfinity() ||
1403          KnownRHS.isKnownNeverLogicalZero(
1404              MF->getDenormalMode(getFltSemanticForLLT(DstTy.getScalarType())))))
1405       Known.knownNot(fcNan);
1406 
1407     break;
1408   }
1409   case TargetOpcode::G_FDIV:
1410   case TargetOpcode::G_FREM: {
1411     Register LHS = MI.getOperand(1).getReg();
1412     Register RHS = MI.getOperand(2).getReg();
1413 
1414     if (LHS == RHS) {
1415       // TODO: Could filter out snan if we inspect the operand
1416       if (Opcode == TargetOpcode::G_FDIV) {
1417         // X / X is always exactly 1.0 or a NaN.
1418         Known.KnownFPClasses = fcNan | fcPosNormal;
1419       } else {
1420         // X % X is always exactly [+-]0.0 or a NaN.
1421         Known.KnownFPClasses = fcNan | fcZero;
1422       }
1423 
1424       break;
1425     }
1426 
1427     const bool WantNan = (InterestedClasses & fcNan) != fcNone;
1428     const bool WantNegative = (InterestedClasses & fcNegative) != fcNone;
1429     const bool WantPositive = Opcode == TargetOpcode::G_FREM &&
1430                               (InterestedClasses & fcPositive) != fcNone;
1431     if (!WantNan && !WantNegative && !WantPositive)
1432       break;
1433 
1434     KnownFPClass KnownLHS, KnownRHS;
1435 
1436     computeKnownFPClass(RHS, DemandedElts, fcNan | fcInf | fcZero | fcNegative,
1437                         KnownRHS, Depth + 1);
1438 
1439     bool KnowSomethingUseful =
1440         KnownRHS.isKnownNeverNaN() || KnownRHS.isKnownNever(fcNegative);
1441 
1442     if (KnowSomethingUseful || WantPositive) {
1443       const FPClassTest InterestedLHS =
1444           WantPositive ? fcAllFlags
1445                        : fcNan | fcInf | fcZero | fcSubnormal | fcNegative;
1446 
1447       computeKnownFPClass(LHS, DemandedElts, InterestedClasses & InterestedLHS,
1448                           KnownLHS, Depth + 1);
1449     }
1450 
1451     if (Opcode == Instruction::FDiv) {
1452       // Only 0/0, Inf/Inf produce NaN.
1453       if (KnownLHS.isKnownNeverNaN() && KnownRHS.isKnownNeverNaN() &&
1454           (KnownLHS.isKnownNeverInfinity() ||
1455            KnownRHS.isKnownNeverInfinity()) &&
1456           ((KnownLHS.isKnownNeverLogicalZero(MF->getDenormalMode(
1457                getFltSemanticForLLT(DstTy.getScalarType())))) ||
1458            (KnownRHS.isKnownNeverLogicalZero(MF->getDenormalMode(
1459                getFltSemanticForLLT(DstTy.getScalarType())))))) {
1460         Known.knownNot(fcNan);
1461       }
1462 
1463       // X / -0.0 is -Inf (or NaN).
1464       // +X / +X is +X
1465       if (KnownLHS.isKnownNever(fcNegative) &&
1466           KnownRHS.isKnownNever(fcNegative))
1467         Known.knownNot(fcNegative);
1468     } else {
1469       // Inf REM x and x REM 0 produce NaN.
1470       if (KnownLHS.isKnownNeverNaN() && KnownRHS.isKnownNeverNaN() &&
1471           KnownLHS.isKnownNeverInfinity() &&
1472           KnownRHS.isKnownNeverLogicalZero(MF->getDenormalMode(
1473               getFltSemanticForLLT(DstTy.getScalarType())))) {
1474         Known.knownNot(fcNan);
1475       }
1476 
1477       // The sign for frem is the same as the first operand.
1478       if (KnownLHS.cannotBeOrderedLessThanZero())
1479         Known.knownNot(KnownFPClass::OrderedLessThanZeroMask);
1480       if (KnownLHS.cannotBeOrderedGreaterThanZero())
1481         Known.knownNot(KnownFPClass::OrderedGreaterThanZeroMask);
1482 
1483       // See if we can be more aggressive about the sign of 0.
1484       if (KnownLHS.isKnownNever(fcNegative))
1485         Known.knownNot(fcNegative);
1486       if (KnownLHS.isKnownNever(fcPositive))
1487         Known.knownNot(fcPositive);
1488     }
1489 
1490     break;
1491   }
1492   case TargetOpcode::G_FPEXT: {
1493     Register Dst = MI.getOperand(0).getReg();
1494     Register Src = MI.getOperand(1).getReg();
1495     // Infinity, nan and zero propagate from source.
1496     computeKnownFPClass(R, DemandedElts, InterestedClasses, Known, Depth + 1);
1497 
1498     LLT DstTy = MRI.getType(Dst).getScalarType();
1499     const fltSemantics &DstSem = getFltSemanticForLLT(DstTy);
1500     LLT SrcTy = MRI.getType(Src).getScalarType();
1501     const fltSemantics &SrcSem = getFltSemanticForLLT(SrcTy);
1502 
1503     // All subnormal inputs should be in the normal range in the result type.
1504     if (APFloat::isRepresentableAsNormalIn(SrcSem, DstSem)) {
1505       if (Known.KnownFPClasses & fcPosSubnormal)
1506         Known.KnownFPClasses |= fcPosNormal;
1507       if (Known.KnownFPClasses & fcNegSubnormal)
1508         Known.KnownFPClasses |= fcNegNormal;
1509       Known.knownNot(fcSubnormal);
1510     }
1511 
1512     // Sign bit of a nan isn't guaranteed.
1513     if (!Known.isKnownNeverNaN())
1514       Known.SignBit = std::nullopt;
1515     break;
1516   }
1517   case TargetOpcode::G_FPTRUNC: {
1518     computeKnownFPClassForFPTrunc(MI, DemandedElts, InterestedClasses, Known,
1519                                   Depth);
1520     break;
1521   }
1522   case TargetOpcode::G_SITOFP:
1523   case TargetOpcode::G_UITOFP: {
1524     // Cannot produce nan
1525     Known.knownNot(fcNan);
1526 
1527     // Integers cannot be subnormal
1528     Known.knownNot(fcSubnormal);
1529 
1530     // sitofp and uitofp turn into +0.0 for zero.
1531     Known.knownNot(fcNegZero);
1532     if (Opcode == TargetOpcode::G_UITOFP)
1533       Known.signBitMustBeZero();
1534 
1535     Register Val = MI.getOperand(1).getReg();
1536     LLT Ty = MRI.getType(Val);
1537 
1538     if (InterestedClasses & fcInf) {
1539       // Get width of largest magnitude integer (remove a bit if signed).
1540       // This still works for a signed minimum value because the largest FP
1541       // value is scaled by some fraction close to 2.0 (1.0 + 0.xxxx).;
1542       int IntSize = Ty.getScalarSizeInBits();
1543       if (Opcode == TargetOpcode::G_SITOFP)
1544         --IntSize;
1545 
1546       // If the exponent of the largest finite FP value can hold the largest
1547       // integer, the result of the cast must be finite.
1548       LLT FPTy = DstTy.getScalarType();
1549       const fltSemantics &FltSem = getFltSemanticForLLT(FPTy);
1550       if (ilogb(APFloat::getLargest(FltSem)) >= IntSize)
1551         Known.knownNot(fcInf);
1552     }
1553 
1554     break;
1555   }
1556   // case TargetOpcode::G_MERGE_VALUES:
1557   case TargetOpcode::G_BUILD_VECTOR:
1558   case TargetOpcode::G_CONCAT_VECTORS: {
1559     GMergeLikeInstr &Merge = cast<GMergeLikeInstr>(MI);
1560 
1561     if (!DstTy.isFixedVector())
1562       break;
1563 
1564     bool First = true;
1565     for (unsigned Idx = 0; Idx < Merge.getNumSources(); ++Idx) {
1566       // We know the index we are inserting to, so clear it from Vec check.
1567       bool NeedsElt = DemandedElts[Idx];
1568 
1569       // Do we demand the inserted element?
1570       if (NeedsElt) {
1571         Register Src = Merge.getSourceReg(Idx);
1572         if (First) {
1573           computeKnownFPClass(Src, Known, InterestedClasses, Depth + 1);
1574           First = false;
1575         } else {
1576           KnownFPClass Known2;
1577           computeKnownFPClass(Src, Known2, InterestedClasses, Depth + 1);
1578           Known |= Known2;
1579         }
1580 
1581         // If we don't know any bits, early out.
1582         if (Known.isUnknown())
1583           break;
1584       }
1585     }
1586 
1587     break;
1588   }
1589   case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
1590     // Look through extract element. If the index is non-constant or
1591     // out-of-range demand all elements, otherwise just the extracted
1592     // element.
1593     GExtractVectorElement &Extract = cast<GExtractVectorElement>(MI);
1594     Register Vec = Extract.getVectorReg();
1595     Register Idx = Extract.getIndexReg();
1596 
1597     auto CIdx = getIConstantVRegVal(Idx, MRI);
1598 
1599     LLT VecTy = MRI.getType(Vec);
1600 
1601     if (VecTy.isFixedVector()) {
1602       unsigned NumElts = VecTy.getNumElements();
1603       APInt DemandedVecElts = APInt::getAllOnes(NumElts);
1604       if (CIdx && CIdx->ult(NumElts))
1605         DemandedVecElts = APInt::getOneBitSet(NumElts, CIdx->getZExtValue());
1606       return computeKnownFPClass(Vec, DemandedVecElts, InterestedClasses, Known,
1607                                  Depth + 1);
1608     }
1609 
1610     break;
1611   }
1612   case TargetOpcode::G_INSERT_VECTOR_ELT: {
1613     GInsertVectorElement &Insert = cast<GInsertVectorElement>(MI);
1614     Register Vec = Insert.getVectorReg();
1615     Register Elt = Insert.getElementReg();
1616     Register Idx = Insert.getIndexReg();
1617 
1618     LLT VecTy = MRI.getType(Vec);
1619 
1620     if (VecTy.isScalableVector())
1621       return;
1622 
1623     auto CIdx = getIConstantVRegVal(Idx, MRI);
1624 
1625     unsigned NumElts = DemandedElts.getBitWidth();
1626     APInt DemandedVecElts = DemandedElts;
1627     bool NeedsElt = true;
1628     // If we know the index we are inserting to, clear it from Vec check.
1629     if (CIdx && CIdx->ult(NumElts)) {
1630       DemandedVecElts.clearBit(CIdx->getZExtValue());
1631       NeedsElt = DemandedElts[CIdx->getZExtValue()];
1632     }
1633 
1634     // Do we demand the inserted element?
1635     if (NeedsElt) {
1636       computeKnownFPClass(Elt, Known, InterestedClasses, Depth + 1);
1637       // If we don't know any bits, early out.
1638       if (Known.isUnknown())
1639         break;
1640     } else {
1641       Known.KnownFPClasses = fcNone;
1642     }
1643 
1644     // Do we need anymore elements from Vec?
1645     if (!DemandedVecElts.isZero()) {
1646       KnownFPClass Known2;
1647       computeKnownFPClass(Vec, DemandedVecElts, InterestedClasses, Known2,
1648                           Depth + 1);
1649       Known |= Known2;
1650     }
1651 
1652     break;
1653   }
1654   case TargetOpcode::G_SHUFFLE_VECTOR: {
1655     // For undef elements, we don't know anything about the common state of
1656     // the shuffle result.
1657     GShuffleVector &Shuf = cast<GShuffleVector>(MI);
1658     APInt DemandedLHS, DemandedRHS;
1659     if (DstTy.isScalableVector()) {
1660       assert(DemandedElts == APInt(1, 1));
1661       DemandedLHS = DemandedRHS = DemandedElts;
1662     } else {
1663       if (!llvm::getShuffleDemandedElts(DstTy.getNumElements(), Shuf.getMask(),
1664                                         DemandedElts, DemandedLHS,
1665                                         DemandedRHS)) {
1666         Known.resetAll();
1667         return;
1668       }
1669     }
1670 
1671     if (!!DemandedLHS) {
1672       Register LHS = Shuf.getSrc1Reg();
1673       computeKnownFPClass(LHS, DemandedLHS, InterestedClasses, Known,
1674                           Depth + 1);
1675 
1676       // If we don't know any bits, early out.
1677       if (Known.isUnknown())
1678         break;
1679     } else {
1680       Known.KnownFPClasses = fcNone;
1681     }
1682 
1683     if (!!DemandedRHS) {
1684       KnownFPClass Known2;
1685       Register RHS = Shuf.getSrc2Reg();
1686       computeKnownFPClass(RHS, DemandedRHS, InterestedClasses, Known2,
1687                           Depth + 1);
1688       Known |= Known2;
1689     }
1690     break;
1691   }
1692   case TargetOpcode::COPY: {
1693     Register Src = MI.getOperand(1).getReg();
1694 
1695     if (!Src.isVirtual())
1696       return;
1697 
1698     computeKnownFPClass(Src, DemandedElts, InterestedClasses, Known, Depth + 1);
1699     break;
1700   }
1701   }
1702 }
1703 
1704 KnownFPClass
1705 GISelValueTracking::computeKnownFPClass(Register R, const APInt &DemandedElts,
1706                                         FPClassTest InterestedClasses,
1707                                         unsigned Depth) {
1708   KnownFPClass KnownClasses;
1709   computeKnownFPClass(R, DemandedElts, InterestedClasses, KnownClasses, Depth);
1710   return KnownClasses;
1711 }
1712 
1713 KnownFPClass GISelValueTracking::computeKnownFPClass(
1714     Register R, FPClassTest InterestedClasses, unsigned Depth) {
1715   KnownFPClass Known;
1716   computeKnownFPClass(R, Known, InterestedClasses, Depth);
1717   return Known;
1718 }
1719 
1720 KnownFPClass GISelValueTracking::computeKnownFPClass(
1721     Register R, const APInt &DemandedElts, uint32_t Flags,
1722     FPClassTest InterestedClasses, unsigned Depth) {
1723   if (Flags & MachineInstr::MIFlag::FmNoNans)
1724     InterestedClasses &= ~fcNan;
1725   if (Flags & MachineInstr::MIFlag::FmNoInfs)
1726     InterestedClasses &= ~fcInf;
1727 
1728   KnownFPClass Result =
1729       computeKnownFPClass(R, DemandedElts, InterestedClasses, Depth);
1730 
1731   if (Flags & MachineInstr::MIFlag::FmNoNans)
1732     Result.KnownFPClasses &= ~fcNan;
1733   if (Flags & MachineInstr::MIFlag::FmNoInfs)
1734     Result.KnownFPClasses &= ~fcInf;
1735   return Result;
1736 }
1737 
1738 KnownFPClass GISelValueTracking::computeKnownFPClass(
1739     Register R, uint32_t Flags, FPClassTest InterestedClasses, unsigned Depth) {
1740   LLT Ty = MRI.getType(R);
1741   APInt DemandedElts =
1742       Ty.isFixedVector() ? APInt::getAllOnes(Ty.getNumElements()) : APInt(1, 1);
1743   return computeKnownFPClass(R, DemandedElts, Flags, InterestedClasses, Depth);
1744 }
1745 
1746 /// Compute number of sign bits for the intersection of \p Src0 and \p Src1
1747 unsigned GISelValueTracking::computeNumSignBitsMin(Register Src0, Register Src1,
1748                                                    const APInt &DemandedElts,
1749                                                    unsigned Depth) {
1750   // Test src1 first, since we canonicalize simpler expressions to the RHS.
1751   unsigned Src1SignBits = computeNumSignBits(Src1, DemandedElts, Depth);
1752   if (Src1SignBits == 1)
1753     return 1;
1754   return std::min(computeNumSignBits(Src0, DemandedElts, Depth), Src1SignBits);
1755 }
1756 
1757 /// Compute the known number of sign bits with attached range metadata in the
1758 /// memory operand. If this is an extending load, accounts for the behavior of
1759 /// the high bits.
1760 static unsigned computeNumSignBitsFromRangeMetadata(const GAnyLoad *Ld,
1761                                                     unsigned TyBits) {
1762   const MDNode *Ranges = Ld->getRanges();
1763   if (!Ranges)
1764     return 1;
1765 
1766   ConstantRange CR = getConstantRangeFromMetadata(*Ranges);
1767   if (TyBits > CR.getBitWidth()) {
1768     switch (Ld->getOpcode()) {
1769     case TargetOpcode::G_SEXTLOAD:
1770       CR = CR.signExtend(TyBits);
1771       break;
1772     case TargetOpcode::G_ZEXTLOAD:
1773       CR = CR.zeroExtend(TyBits);
1774       break;
1775     default:
1776       break;
1777     }
1778   }
1779 
1780   return std::min(CR.getSignedMin().getNumSignBits(),
1781                   CR.getSignedMax().getNumSignBits());
1782 }
1783 
1784 unsigned GISelValueTracking::computeNumSignBits(Register R,
1785                                                 const APInt &DemandedElts,
1786                                                 unsigned Depth) {
1787   MachineInstr &MI = *MRI.getVRegDef(R);
1788   unsigned Opcode = MI.getOpcode();
1789 
1790   if (Opcode == TargetOpcode::G_CONSTANT)
1791     return MI.getOperand(1).getCImm()->getValue().getNumSignBits();
1792 
1793   if (Depth == getMaxDepth())
1794     return 1;
1795 
1796   if (!DemandedElts)
1797     return 1; // No demanded elts, better to assume we don't know anything.
1798 
1799   LLT DstTy = MRI.getType(R);
1800   const unsigned TyBits = DstTy.getScalarSizeInBits();
1801 
1802   // Handle the case where this is called on a register that does not have a
1803   // type constraint. This is unlikely to occur except by looking through copies
1804   // but it is possible for the initial register being queried to be in this
1805   // state.
1806   if (!DstTy.isValid())
1807     return 1;
1808 
1809   unsigned FirstAnswer = 1;
1810   switch (Opcode) {
1811   case TargetOpcode::COPY: {
1812     MachineOperand &Src = MI.getOperand(1);
1813     if (Src.getReg().isVirtual() && Src.getSubReg() == 0 &&
1814         MRI.getType(Src.getReg()).isValid()) {
1815       // Don't increment Depth for this one since we didn't do any work.
1816       return computeNumSignBits(Src.getReg(), DemandedElts, Depth);
1817     }
1818 
1819     return 1;
1820   }
1821   case TargetOpcode::G_SEXT: {
1822     Register Src = MI.getOperand(1).getReg();
1823     LLT SrcTy = MRI.getType(Src);
1824     unsigned Tmp = DstTy.getScalarSizeInBits() - SrcTy.getScalarSizeInBits();
1825     return computeNumSignBits(Src, DemandedElts, Depth + 1) + Tmp;
1826   }
1827   case TargetOpcode::G_ASSERT_SEXT:
1828   case TargetOpcode::G_SEXT_INREG: {
1829     // Max of the input and what this extends.
1830     Register Src = MI.getOperand(1).getReg();
1831     unsigned SrcBits = MI.getOperand(2).getImm();
1832     unsigned InRegBits = TyBits - SrcBits + 1;
1833     return std::max(computeNumSignBits(Src, DemandedElts, Depth + 1),
1834                     InRegBits);
1835   }
1836   case TargetOpcode::G_LOAD: {
1837     GLoad *Ld = cast<GLoad>(&MI);
1838     if (DemandedElts != 1 || !getDataLayout().isLittleEndian())
1839       break;
1840 
1841     return computeNumSignBitsFromRangeMetadata(Ld, TyBits);
1842   }
1843   case TargetOpcode::G_SEXTLOAD: {
1844     GSExtLoad *Ld = cast<GSExtLoad>(&MI);
1845 
1846     // FIXME: We need an in-memory type representation.
1847     if (DstTy.isVector())
1848       return 1;
1849 
1850     unsigned NumBits = computeNumSignBitsFromRangeMetadata(Ld, TyBits);
1851     if (NumBits != 1)
1852       return NumBits;
1853 
1854     // e.g. i16->i32 = '17' bits known.
1855     const MachineMemOperand *MMO = *MI.memoperands_begin();
1856     return TyBits - MMO->getSizeInBits().getValue() + 1;
1857   }
1858   case TargetOpcode::G_ZEXTLOAD: {
1859     GZExtLoad *Ld = cast<GZExtLoad>(&MI);
1860 
1861     // FIXME: We need an in-memory type representation.
1862     if (DstTy.isVector())
1863       return 1;
1864 
1865     unsigned NumBits = computeNumSignBitsFromRangeMetadata(Ld, TyBits);
1866     if (NumBits != 1)
1867       return NumBits;
1868 
1869     // e.g. i16->i32 = '16' bits known.
1870     const MachineMemOperand *MMO = *MI.memoperands_begin();
1871     return TyBits - MMO->getSizeInBits().getValue();
1872   }
1873   case TargetOpcode::G_AND:
1874   case TargetOpcode::G_OR:
1875   case TargetOpcode::G_XOR: {
1876     Register Src1 = MI.getOperand(1).getReg();
1877     unsigned Src1NumSignBits =
1878         computeNumSignBits(Src1, DemandedElts, Depth + 1);
1879     if (Src1NumSignBits != 1) {
1880       Register Src2 = MI.getOperand(2).getReg();
1881       unsigned Src2NumSignBits =
1882           computeNumSignBits(Src2, DemandedElts, Depth + 1);
1883       FirstAnswer = std::min(Src1NumSignBits, Src2NumSignBits);
1884     }
1885     break;
1886   }
1887   case TargetOpcode::G_TRUNC: {
1888     Register Src = MI.getOperand(1).getReg();
1889     LLT SrcTy = MRI.getType(Src);
1890 
1891     // Check if the sign bits of source go down as far as the truncated value.
1892     unsigned DstTyBits = DstTy.getScalarSizeInBits();
1893     unsigned NumSrcBits = SrcTy.getScalarSizeInBits();
1894     unsigned NumSrcSignBits = computeNumSignBits(Src, DemandedElts, Depth + 1);
1895     if (NumSrcSignBits > (NumSrcBits - DstTyBits))
1896       return NumSrcSignBits - (NumSrcBits - DstTyBits);
1897     break;
1898   }
1899   case TargetOpcode::G_SELECT: {
1900     return computeNumSignBitsMin(MI.getOperand(2).getReg(),
1901                                  MI.getOperand(3).getReg(), DemandedElts,
1902                                  Depth + 1);
1903   }
1904   case TargetOpcode::G_SMIN:
1905   case TargetOpcode::G_SMAX:
1906   case TargetOpcode::G_UMIN:
1907   case TargetOpcode::G_UMAX:
1908     // TODO: Handle clamp pattern with number of sign bits for SMIN/SMAX.
1909     return computeNumSignBitsMin(MI.getOperand(1).getReg(),
1910                                  MI.getOperand(2).getReg(), DemandedElts,
1911                                  Depth + 1);
1912   case TargetOpcode::G_SADDO:
1913   case TargetOpcode::G_SADDE:
1914   case TargetOpcode::G_UADDO:
1915   case TargetOpcode::G_UADDE:
1916   case TargetOpcode::G_SSUBO:
1917   case TargetOpcode::G_SSUBE:
1918   case TargetOpcode::G_USUBO:
1919   case TargetOpcode::G_USUBE:
1920   case TargetOpcode::G_SMULO:
1921   case TargetOpcode::G_UMULO: {
1922     // If compares returns 0/-1, all bits are sign bits.
1923     // We know that we have an integer-based boolean since these operations
1924     // are only available for integer.
1925     if (MI.getOperand(1).getReg() == R) {
1926       if (TL.getBooleanContents(DstTy.isVector(), false) ==
1927           TargetLowering::ZeroOrNegativeOneBooleanContent)
1928         return TyBits;
1929     }
1930 
1931     break;
1932   }
1933   case TargetOpcode::G_FCMP:
1934   case TargetOpcode::G_ICMP: {
1935     bool IsFP = Opcode == TargetOpcode::G_FCMP;
1936     if (TyBits == 1)
1937       break;
1938     auto BC = TL.getBooleanContents(DstTy.isVector(), IsFP);
1939     if (BC == TargetLoweringBase::ZeroOrNegativeOneBooleanContent)
1940       return TyBits; // All bits are sign bits.
1941     if (BC == TargetLowering::ZeroOrOneBooleanContent)
1942       return TyBits - 1; // Every always-zero bit is a sign bit.
1943     break;
1944   }
1945   case TargetOpcode::G_BUILD_VECTOR: {
1946     // Collect the known bits that are shared by every demanded vector element.
1947     FirstAnswer = TyBits;
1948     APInt SingleDemandedElt(1, 1);
1949     for (const auto &[I, MO] : enumerate(drop_begin(MI.operands()))) {
1950       if (!DemandedElts[I])
1951         continue;
1952 
1953       unsigned Tmp2 =
1954           computeNumSignBits(MO.getReg(), SingleDemandedElt, Depth + 1);
1955       FirstAnswer = std::min(FirstAnswer, Tmp2);
1956 
1957       // If we don't know any bits, early out.
1958       if (FirstAnswer == 1)
1959         break;
1960     }
1961     break;
1962   }
1963   case TargetOpcode::G_CONCAT_VECTORS: {
1964     if (MRI.getType(MI.getOperand(0).getReg()).isScalableVector())
1965       break;
1966     FirstAnswer = TyBits;
1967     // Determine the minimum number of sign bits across all demanded
1968     // elts of the input vectors. Early out if the result is already 1.
1969     unsigned NumSubVectorElts =
1970         MRI.getType(MI.getOperand(1).getReg()).getNumElements();
1971     for (const auto &[I, MO] : enumerate(drop_begin(MI.operands()))) {
1972       APInt DemandedSub =
1973           DemandedElts.extractBits(NumSubVectorElts, I * NumSubVectorElts);
1974       if (!DemandedSub)
1975         continue;
1976       unsigned Tmp2 = computeNumSignBits(MO.getReg(), DemandedSub, Depth + 1);
1977 
1978       FirstAnswer = std::min(FirstAnswer, Tmp2);
1979 
1980       // If we don't know any bits, early out.
1981       if (FirstAnswer == 1)
1982         break;
1983     }
1984     break;
1985   }
1986   case TargetOpcode::G_SHUFFLE_VECTOR: {
1987     // Collect the minimum number of sign bits that are shared by every vector
1988     // element referenced by the shuffle.
1989     APInt DemandedLHS, DemandedRHS;
1990     Register Src1 = MI.getOperand(1).getReg();
1991     unsigned NumElts = MRI.getType(Src1).getNumElements();
1992     if (!getShuffleDemandedElts(NumElts, MI.getOperand(3).getShuffleMask(),
1993                                 DemandedElts, DemandedLHS, DemandedRHS))
1994       return 1;
1995 
1996     if (!!DemandedLHS)
1997       FirstAnswer = computeNumSignBits(Src1, DemandedLHS, Depth + 1);
1998     // If we don't know anything, early out and try computeKnownBits fall-back.
1999     if (FirstAnswer == 1)
2000       break;
2001     if (!!DemandedRHS) {
2002       unsigned Tmp2 =
2003           computeNumSignBits(MI.getOperand(2).getReg(), DemandedRHS, Depth + 1);
2004       FirstAnswer = std::min(FirstAnswer, Tmp2);
2005     }
2006     break;
2007   }
2008   case TargetOpcode::G_SPLAT_VECTOR: {
2009     // Check if the sign bits of source go down as far as the truncated value.
2010     Register Src = MI.getOperand(1).getReg();
2011     unsigned NumSrcSignBits = computeNumSignBits(Src, APInt(1, 1), Depth + 1);
2012     unsigned NumSrcBits = MRI.getType(Src).getSizeInBits();
2013     if (NumSrcSignBits > (NumSrcBits - TyBits))
2014       return NumSrcSignBits - (NumSrcBits - TyBits);
2015     break;
2016   }
2017   case TargetOpcode::G_INTRINSIC:
2018   case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
2019   case TargetOpcode::G_INTRINSIC_CONVERGENT:
2020   case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
2021   default: {
2022     unsigned NumBits =
2023         TL.computeNumSignBitsForTargetInstr(*this, R, DemandedElts, MRI, Depth);
2024     if (NumBits > 1)
2025       FirstAnswer = std::max(FirstAnswer, NumBits);
2026     break;
2027   }
2028   }
2029 
2030   // Finally, if we can prove that the top bits of the result are 0's or 1's,
2031   // use this information.
2032   KnownBits Known = getKnownBits(R, DemandedElts, Depth);
2033   APInt Mask;
2034   if (Known.isNonNegative()) { // sign bit is 0
2035     Mask = Known.Zero;
2036   } else if (Known.isNegative()) { // sign bit is 1;
2037     Mask = Known.One;
2038   } else {
2039     // Nothing known.
2040     return FirstAnswer;
2041   }
2042 
2043   // Okay, we know that the sign bit in Mask is set.  Use CLO to determine
2044   // the number of identical bits in the top of the input value.
2045   Mask <<= Mask.getBitWidth() - TyBits;
2046   return std::max(FirstAnswer, Mask.countl_one());
2047 }
2048 
2049 unsigned GISelValueTracking::computeNumSignBits(Register R, unsigned Depth) {
2050   LLT Ty = MRI.getType(R);
2051   APInt DemandedElts =
2052       Ty.isFixedVector() ? APInt::getAllOnes(Ty.getNumElements()) : APInt(1, 1);
2053   return computeNumSignBits(R, DemandedElts, Depth);
2054 }
2055 
2056 void GISelValueTrackingAnalysisLegacy::getAnalysisUsage(
2057     AnalysisUsage &AU) const {
2058   AU.setPreservesAll();
2059   MachineFunctionPass::getAnalysisUsage(AU);
2060 }
2061 
2062 bool GISelValueTrackingAnalysisLegacy::runOnMachineFunction(
2063     MachineFunction &MF) {
2064   return false;
2065 }
2066 
2067 GISelValueTracking &GISelValueTrackingAnalysisLegacy::get(MachineFunction &MF) {
2068   if (!Info) {
2069     unsigned MaxDepth =
2070         MF.getTarget().getOptLevel() == CodeGenOptLevel::None ? 2 : 6;
2071     Info = std::make_unique<GISelValueTracking>(MF, MaxDepth);
2072   }
2073   return *Info;
2074 }
2075 
2076 AnalysisKey GISelValueTrackingAnalysis::Key;
2077 
2078 GISelValueTracking
2079 GISelValueTrackingAnalysis::run(MachineFunction &MF,
2080                                 MachineFunctionAnalysisManager &MFAM) {
2081   return Result(MF);
2082 }
2083 
2084 PreservedAnalyses
2085 GISelValueTrackingPrinterPass::run(MachineFunction &MF,
2086                                    MachineFunctionAnalysisManager &MFAM) {
2087   auto &VTA = MFAM.getResult<GISelValueTrackingAnalysis>(MF);
2088   const auto &MRI = MF.getRegInfo();
2089   OS << "name: ";
2090   MF.getFunction().printAsOperand(OS, /*PrintType=*/false);
2091   OS << '\n';
2092 
2093   for (MachineBasicBlock &BB : MF) {
2094     for (MachineInstr &MI : BB) {
2095       for (MachineOperand &MO : MI.defs()) {
2096         if (!MO.isReg() || MO.getReg().isPhysical())
2097           continue;
2098         Register Reg = MO.getReg();
2099         if (!MRI.getType(Reg).isValid())
2100           continue;
2101         KnownBits Known = VTA.getKnownBits(Reg);
2102         unsigned SignedBits = VTA.computeNumSignBits(Reg);
2103         OS << "  " << MO << " KnownBits:" << Known << " SignBits:" << SignedBits
2104            << '\n';
2105       };
2106     }
2107   }
2108   return PreservedAnalyses::all();
2109 }
2110