1 //===- lib/CodeGen/GlobalISel/GISelKnownBits.cpp --------------*- C++ *-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// Provides analysis for querying information about KnownBits during GISel
10 /// passes.
11 //
12 //===----------------------------------------------------------------------===//
13 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
14 #include "llvm/ADT/StringExtras.h"
15 #include "llvm/Analysis/ValueTracking.h"
16 #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
17 #include "llvm/CodeGen/GlobalISel/Utils.h"
18 #include "llvm/CodeGen/MachineFrameInfo.h"
19 #include "llvm/CodeGen/MachineRegisterInfo.h"
20 #include "llvm/CodeGen/TargetLowering.h"
21 #include "llvm/CodeGen/TargetOpcodes.h"
22 #include "llvm/IR/ConstantRange.h"
23 #include "llvm/IR/Module.h"
24 #include "llvm/Target/TargetMachine.h"
25
26 #define DEBUG_TYPE "gisel-known-bits"
27
28 using namespace llvm;
29
30 char llvm::GISelKnownBitsAnalysis::ID = 0;
31
32 INITIALIZE_PASS(GISelKnownBitsAnalysis, DEBUG_TYPE,
33 "Analysis for ComputingKnownBits", false, true)
34
GISelKnownBits(MachineFunction & MF,unsigned MaxDepth)35 GISelKnownBits::GISelKnownBits(MachineFunction &MF, unsigned MaxDepth)
36 : MF(MF), MRI(MF.getRegInfo()), TL(*MF.getSubtarget().getTargetLowering()),
37 DL(MF.getFunction().getDataLayout()), MaxDepth(MaxDepth) {}
38
computeKnownAlignment(Register R,unsigned Depth)39 Align GISelKnownBits::computeKnownAlignment(Register R, unsigned Depth) {
40 const MachineInstr *MI = MRI.getVRegDef(R);
41 switch (MI->getOpcode()) {
42 case TargetOpcode::COPY:
43 return computeKnownAlignment(MI->getOperand(1).getReg(), Depth);
44 case TargetOpcode::G_ASSERT_ALIGN: {
45 // TODO: Min with source
46 return Align(MI->getOperand(2).getImm());
47 }
48 case TargetOpcode::G_FRAME_INDEX: {
49 int FrameIdx = MI->getOperand(1).getIndex();
50 return MF.getFrameInfo().getObjectAlign(FrameIdx);
51 }
52 case TargetOpcode::G_INTRINSIC:
53 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
54 case TargetOpcode::G_INTRINSIC_CONVERGENT:
55 case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
56 default:
57 return TL.computeKnownAlignForTargetInstr(*this, R, MRI, Depth + 1);
58 }
59 }
60
getKnownBits(MachineInstr & MI)61 KnownBits GISelKnownBits::getKnownBits(MachineInstr &MI) {
62 assert(MI.getNumExplicitDefs() == 1 &&
63 "expected single return generic instruction");
64 return getKnownBits(MI.getOperand(0).getReg());
65 }
66
getKnownBits(Register R)67 KnownBits GISelKnownBits::getKnownBits(Register R) {
68 const LLT Ty = MRI.getType(R);
69 // Since the number of lanes in a scalable vector is unknown at compile time,
70 // we track one bit which is implicitly broadcast to all lanes. This means
71 // that all lanes in a scalable vector are considered demanded.
72 APInt DemandedElts =
73 Ty.isFixedVector() ? APInt::getAllOnes(Ty.getNumElements()) : APInt(1, 1);
74 return getKnownBits(R, DemandedElts);
75 }
76
getKnownBits(Register R,const APInt & DemandedElts,unsigned Depth)77 KnownBits GISelKnownBits::getKnownBits(Register R, const APInt &DemandedElts,
78 unsigned Depth) {
79 // For now, we only maintain the cache during one request.
80 assert(ComputeKnownBitsCache.empty() && "Cache should have been cleared");
81
82 KnownBits Known;
83 computeKnownBitsImpl(R, Known, DemandedElts, Depth);
84 ComputeKnownBitsCache.clear();
85 return Known;
86 }
87
signBitIsZero(Register R)88 bool GISelKnownBits::signBitIsZero(Register R) {
89 LLT Ty = MRI.getType(R);
90 unsigned BitWidth = Ty.getScalarSizeInBits();
91 return maskedValueIsZero(R, APInt::getSignMask(BitWidth));
92 }
93
getKnownZeroes(Register R)94 APInt GISelKnownBits::getKnownZeroes(Register R) {
95 return getKnownBits(R).Zero;
96 }
97
getKnownOnes(Register R)98 APInt GISelKnownBits::getKnownOnes(Register R) { return getKnownBits(R).One; }
99
100 LLVM_ATTRIBUTE_UNUSED static void
dumpResult(const MachineInstr & MI,const KnownBits & Known,unsigned Depth)101 dumpResult(const MachineInstr &MI, const KnownBits &Known, unsigned Depth) {
102 dbgs() << "[" << Depth << "] Compute known bits: " << MI << "[" << Depth
103 << "] Computed for: " << MI << "[" << Depth << "] Known: 0x"
104 << toString(Known.Zero | Known.One, 16, false) << "\n"
105 << "[" << Depth << "] Zero: 0x" << toString(Known.Zero, 16, false)
106 << "\n"
107 << "[" << Depth << "] One: 0x" << toString(Known.One, 16, false)
108 << "\n";
109 }
110
111 /// Compute known bits for the intersection of \p Src0 and \p Src1
computeKnownBitsMin(Register Src0,Register Src1,KnownBits & Known,const APInt & DemandedElts,unsigned Depth)112 void GISelKnownBits::computeKnownBitsMin(Register Src0, Register Src1,
113 KnownBits &Known,
114 const APInt &DemandedElts,
115 unsigned Depth) {
116 // Test src1 first, since we canonicalize simpler expressions to the RHS.
117 computeKnownBitsImpl(Src1, Known, DemandedElts, Depth);
118
119 // If we don't know any bits, early out.
120 if (Known.isUnknown())
121 return;
122
123 KnownBits Known2;
124 computeKnownBitsImpl(Src0, Known2, DemandedElts, Depth);
125
126 // Only known if known in both the LHS and RHS.
127 Known = Known.intersectWith(Known2);
128 }
129
130 // Bitfield extract is computed as (Src >> Offset) & Mask, where Mask is
131 // created using Width. Use this function when the inputs are KnownBits
132 // objects. TODO: Move this KnownBits.h if this is usable in more cases.
extractBits(unsigned BitWidth,const KnownBits & SrcOpKnown,const KnownBits & OffsetKnown,const KnownBits & WidthKnown)133 static KnownBits extractBits(unsigned BitWidth, const KnownBits &SrcOpKnown,
134 const KnownBits &OffsetKnown,
135 const KnownBits &WidthKnown) {
136 KnownBits Mask(BitWidth);
137 Mask.Zero = APInt::getBitsSetFrom(
138 BitWidth, WidthKnown.getMaxValue().getLimitedValue(BitWidth));
139 Mask.One = APInt::getLowBitsSet(
140 BitWidth, WidthKnown.getMinValue().getLimitedValue(BitWidth));
141 return KnownBits::lshr(SrcOpKnown, OffsetKnown) & Mask;
142 }
143
computeKnownBitsImpl(Register R,KnownBits & Known,const APInt & DemandedElts,unsigned Depth)144 void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
145 const APInt &DemandedElts,
146 unsigned Depth) {
147 MachineInstr &MI = *MRI.getVRegDef(R);
148 unsigned Opcode = MI.getOpcode();
149 LLT DstTy = MRI.getType(R);
150
151 // Handle the case where this is called on a register that does not have a
152 // type constraint (i.e. it has a register class constraint instead). This is
153 // unlikely to occur except by looking through copies but it is possible for
154 // the initial register being queried to be in this state.
155 if (!DstTy.isValid()) {
156 Known = KnownBits();
157 return;
158 }
159
160 unsigned BitWidth = DstTy.getScalarSizeInBits();
161 auto CacheEntry = ComputeKnownBitsCache.find(R);
162 if (CacheEntry != ComputeKnownBitsCache.end()) {
163 Known = CacheEntry->second;
164 LLVM_DEBUG(dbgs() << "Cache hit at ");
165 LLVM_DEBUG(dumpResult(MI, Known, Depth));
166 assert(Known.getBitWidth() == BitWidth && "Cache entry size doesn't match");
167 return;
168 }
169 Known = KnownBits(BitWidth); // Don't know anything
170
171 // Depth may get bigger than max depth if it gets passed to a different
172 // GISelKnownBits object.
173 // This may happen when say a generic part uses a GISelKnownBits object
174 // with some max depth, but then we hit TL.computeKnownBitsForTargetInstr
175 // which creates a new GISelKnownBits object with a different and smaller
176 // depth. If we just check for equality, we would never exit if the depth
177 // that is passed down to the target specific GISelKnownBits object is
178 // already bigger than its max depth.
179 if (Depth >= getMaxDepth())
180 return;
181
182 if (!DemandedElts)
183 return; // No demanded elts, better to assume we don't know anything.
184
185 KnownBits Known2;
186
187 switch (Opcode) {
188 default:
189 TL.computeKnownBitsForTargetInstr(*this, R, Known, DemandedElts, MRI,
190 Depth);
191 break;
192 case TargetOpcode::G_BUILD_VECTOR: {
193 // Collect the known bits that are shared by every demanded vector element.
194 Known.Zero.setAllBits(); Known.One.setAllBits();
195 for (unsigned i = 0, e = MI.getNumOperands() - 1; i < e; ++i) {
196 if (!DemandedElts[i])
197 continue;
198
199 computeKnownBitsImpl(MI.getOperand(i + 1).getReg(), Known2, DemandedElts,
200 Depth + 1);
201
202 // Known bits are the values that are shared by every demanded element.
203 Known = Known.intersectWith(Known2);
204
205 // If we don't know any bits, early out.
206 if (Known.isUnknown())
207 break;
208 }
209 break;
210 }
211 case TargetOpcode::COPY:
212 case TargetOpcode::G_PHI:
213 case TargetOpcode::PHI: {
214 Known.One = APInt::getAllOnes(BitWidth);
215 Known.Zero = APInt::getAllOnes(BitWidth);
216 // Destination registers should not have subregisters at this
217 // point of the pipeline, otherwise the main live-range will be
218 // defined more than once, which is against SSA.
219 assert(MI.getOperand(0).getSubReg() == 0 && "Is this code in SSA?");
220 // Record in the cache that we know nothing for MI.
221 // This will get updated later and in the meantime, if we reach that
222 // phi again, because of a loop, we will cut the search thanks to this
223 // cache entry.
224 // We could actually build up more information on the phi by not cutting
225 // the search, but that additional information is more a side effect
226 // than an intended choice.
227 // Therefore, for now, save on compile time until we derive a proper way
228 // to derive known bits for PHIs within loops.
229 ComputeKnownBitsCache[R] = KnownBits(BitWidth);
230 // PHI's operand are a mix of registers and basic blocks interleaved.
231 // We only care about the register ones.
232 for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) {
233 const MachineOperand &Src = MI.getOperand(Idx);
234 Register SrcReg = Src.getReg();
235 // Look through trivial copies and phis but don't look through trivial
236 // copies or phis of the form `%1:(s32) = OP %0:gpr32`, known-bits
237 // analysis is currently unable to determine the bit width of a
238 // register class.
239 //
240 // We can't use NoSubRegister by name as it's defined by each target but
241 // it's always defined to be 0 by tablegen.
242 if (SrcReg.isVirtual() && Src.getSubReg() == 0 /*NoSubRegister*/ &&
243 MRI.getType(SrcReg).isValid()) {
244 // For COPYs we don't do anything, don't increase the depth.
245 computeKnownBitsImpl(SrcReg, Known2, DemandedElts,
246 Depth + (Opcode != TargetOpcode::COPY));
247 Known = Known.intersectWith(Known2);
248 // If we reach a point where we don't know anything
249 // just stop looking through the operands.
250 if (Known.isUnknown())
251 break;
252 } else {
253 // We know nothing.
254 Known = KnownBits(BitWidth);
255 break;
256 }
257 }
258 break;
259 }
260 case TargetOpcode::G_CONSTANT: {
261 Known = KnownBits::makeConstant(MI.getOperand(1).getCImm()->getValue());
262 break;
263 }
264 case TargetOpcode::G_FRAME_INDEX: {
265 int FrameIdx = MI.getOperand(1).getIndex();
266 TL.computeKnownBitsForFrameIndex(FrameIdx, Known, MF);
267 break;
268 }
269 case TargetOpcode::G_SUB: {
270 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
271 Depth + 1);
272 computeKnownBitsImpl(MI.getOperand(2).getReg(), Known2, DemandedElts,
273 Depth + 1);
274 Known = KnownBits::computeForAddSub(/*Add=*/false, /*NSW=*/false,
275 /* NUW=*/false, Known, Known2);
276 break;
277 }
278 case TargetOpcode::G_XOR: {
279 computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts,
280 Depth + 1);
281 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
282 Depth + 1);
283
284 Known ^= Known2;
285 break;
286 }
287 case TargetOpcode::G_PTR_ADD: {
288 if (DstTy.isVector())
289 break;
290 // G_PTR_ADD is like G_ADD. FIXME: Is this true for all targets?
291 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
292 if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
293 break;
294 [[fallthrough]];
295 }
296 case TargetOpcode::G_ADD: {
297 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
298 Depth + 1);
299 computeKnownBitsImpl(MI.getOperand(2).getReg(), Known2, DemandedElts,
300 Depth + 1);
301 Known = KnownBits::computeForAddSub(/*Add=*/true, /*NSW=*/false,
302 /* NUW=*/false, Known, Known2);
303 break;
304 }
305 case TargetOpcode::G_AND: {
306 // If either the LHS or the RHS are Zero, the result is zero.
307 computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts,
308 Depth + 1);
309 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
310 Depth + 1);
311
312 Known &= Known2;
313 break;
314 }
315 case TargetOpcode::G_OR: {
316 // If either the LHS or the RHS are Zero, the result is zero.
317 computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts,
318 Depth + 1);
319 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
320 Depth + 1);
321
322 Known |= Known2;
323 break;
324 }
325 case TargetOpcode::G_MUL: {
326 computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts,
327 Depth + 1);
328 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
329 Depth + 1);
330 Known = KnownBits::mul(Known, Known2);
331 break;
332 }
333 case TargetOpcode::G_SELECT: {
334 computeKnownBitsMin(MI.getOperand(2).getReg(), MI.getOperand(3).getReg(),
335 Known, DemandedElts, Depth + 1);
336 break;
337 }
338 case TargetOpcode::G_SMIN: {
339 // TODO: Handle clamp pattern with number of sign bits
340 KnownBits KnownRHS;
341 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
342 Depth + 1);
343 computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS, DemandedElts,
344 Depth + 1);
345 Known = KnownBits::smin(Known, KnownRHS);
346 break;
347 }
348 case TargetOpcode::G_SMAX: {
349 // TODO: Handle clamp pattern with number of sign bits
350 KnownBits KnownRHS;
351 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
352 Depth + 1);
353 computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS, DemandedElts,
354 Depth + 1);
355 Known = KnownBits::smax(Known, KnownRHS);
356 break;
357 }
358 case TargetOpcode::G_UMIN: {
359 KnownBits KnownRHS;
360 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known,
361 DemandedElts, Depth + 1);
362 computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS,
363 DemandedElts, Depth + 1);
364 Known = KnownBits::umin(Known, KnownRHS);
365 break;
366 }
367 case TargetOpcode::G_UMAX: {
368 KnownBits KnownRHS;
369 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known,
370 DemandedElts, Depth + 1);
371 computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS,
372 DemandedElts, Depth + 1);
373 Known = KnownBits::umax(Known, KnownRHS);
374 break;
375 }
376 case TargetOpcode::G_FCMP:
377 case TargetOpcode::G_ICMP: {
378 if (DstTy.isVector())
379 break;
380 if (TL.getBooleanContents(DstTy.isVector(),
381 Opcode == TargetOpcode::G_FCMP) ==
382 TargetLowering::ZeroOrOneBooleanContent &&
383 BitWidth > 1)
384 Known.Zero.setBitsFrom(1);
385 break;
386 }
387 case TargetOpcode::G_SEXT: {
388 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
389 Depth + 1);
390 // If the sign bit is known to be zero or one, then sext will extend
391 // it to the top bits, else it will just zext.
392 Known = Known.sext(BitWidth);
393 break;
394 }
395 case TargetOpcode::G_ASSERT_SEXT:
396 case TargetOpcode::G_SEXT_INREG: {
397 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
398 Depth + 1);
399 Known = Known.sextInReg(MI.getOperand(2).getImm());
400 break;
401 }
402 case TargetOpcode::G_ANYEXT: {
403 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
404 Depth + 1);
405 Known = Known.anyext(BitWidth);
406 break;
407 }
408 case TargetOpcode::G_LOAD: {
409 const MachineMemOperand *MMO = *MI.memoperands_begin();
410 KnownBits KnownRange(MMO->getMemoryType().getScalarSizeInBits());
411 if (const MDNode *Ranges = MMO->getRanges())
412 computeKnownBitsFromRangeMetadata(*Ranges, KnownRange);
413 Known = KnownRange.anyext(Known.getBitWidth());
414 break;
415 }
416 case TargetOpcode::G_SEXTLOAD:
417 case TargetOpcode::G_ZEXTLOAD: {
418 if (DstTy.isVector())
419 break;
420 const MachineMemOperand *MMO = *MI.memoperands_begin();
421 KnownBits KnownRange(MMO->getMemoryType().getScalarSizeInBits());
422 if (const MDNode *Ranges = MMO->getRanges())
423 computeKnownBitsFromRangeMetadata(*Ranges, KnownRange);
424 Known = Opcode == TargetOpcode::G_SEXTLOAD
425 ? KnownRange.sext(Known.getBitWidth())
426 : KnownRange.zext(Known.getBitWidth());
427 break;
428 }
429 case TargetOpcode::G_ASHR: {
430 KnownBits LHSKnown, RHSKnown;
431 computeKnownBitsImpl(MI.getOperand(1).getReg(), LHSKnown, DemandedElts,
432 Depth + 1);
433 computeKnownBitsImpl(MI.getOperand(2).getReg(), RHSKnown, DemandedElts,
434 Depth + 1);
435 Known = KnownBits::ashr(LHSKnown, RHSKnown);
436 break;
437 }
438 case TargetOpcode::G_LSHR: {
439 KnownBits LHSKnown, RHSKnown;
440 computeKnownBitsImpl(MI.getOperand(1).getReg(), LHSKnown, DemandedElts,
441 Depth + 1);
442 computeKnownBitsImpl(MI.getOperand(2).getReg(), RHSKnown, DemandedElts,
443 Depth + 1);
444 Known = KnownBits::lshr(LHSKnown, RHSKnown);
445 break;
446 }
447 case TargetOpcode::G_SHL: {
448 KnownBits LHSKnown, RHSKnown;
449 computeKnownBitsImpl(MI.getOperand(1).getReg(), LHSKnown, DemandedElts,
450 Depth + 1);
451 computeKnownBitsImpl(MI.getOperand(2).getReg(), RHSKnown, DemandedElts,
452 Depth + 1);
453 Known = KnownBits::shl(LHSKnown, RHSKnown);
454 break;
455 }
456 case TargetOpcode::G_INTTOPTR:
457 case TargetOpcode::G_PTRTOINT:
458 if (DstTy.isVector())
459 break;
460 // Fall through and handle them the same as zext/trunc.
461 [[fallthrough]];
462 case TargetOpcode::G_ASSERT_ZEXT:
463 case TargetOpcode::G_ZEXT:
464 case TargetOpcode::G_TRUNC: {
465 Register SrcReg = MI.getOperand(1).getReg();
466 LLT SrcTy = MRI.getType(SrcReg);
467 unsigned SrcBitWidth;
468
469 // G_ASSERT_ZEXT stores the original bitwidth in the immediate operand.
470 if (Opcode == TargetOpcode::G_ASSERT_ZEXT)
471 SrcBitWidth = MI.getOperand(2).getImm();
472 else {
473 SrcBitWidth = SrcTy.isPointer()
474 ? DL.getIndexSizeInBits(SrcTy.getAddressSpace())
475 : SrcTy.getSizeInBits();
476 }
477 assert(SrcBitWidth && "SrcBitWidth can't be zero");
478 Known = Known.zextOrTrunc(SrcBitWidth);
479 computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1);
480 Known = Known.zextOrTrunc(BitWidth);
481 if (BitWidth > SrcBitWidth)
482 Known.Zero.setBitsFrom(SrcBitWidth);
483 break;
484 }
485 case TargetOpcode::G_ASSERT_ALIGN: {
486 int64_t LogOfAlign = Log2_64(MI.getOperand(2).getImm());
487
488 // TODO: Should use maximum with source
489 // If a node is guaranteed to be aligned, set low zero bits accordingly as
490 // well as clearing one bits.
491 Known.Zero.setLowBits(LogOfAlign);
492 Known.One.clearLowBits(LogOfAlign);
493 break;
494 }
495 case TargetOpcode::G_MERGE_VALUES: {
496 unsigned NumOps = MI.getNumOperands();
497 unsigned OpSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
498
499 for (unsigned I = 0; I != NumOps - 1; ++I) {
500 KnownBits SrcOpKnown;
501 computeKnownBitsImpl(MI.getOperand(I + 1).getReg(), SrcOpKnown,
502 DemandedElts, Depth + 1);
503 Known.insertBits(SrcOpKnown, I * OpSize);
504 }
505 break;
506 }
507 case TargetOpcode::G_UNMERGE_VALUES: {
508 if (DstTy.isVector())
509 break;
510 unsigned NumOps = MI.getNumOperands();
511 Register SrcReg = MI.getOperand(NumOps - 1).getReg();
512 if (MRI.getType(SrcReg).isVector())
513 return; // TODO: Handle vectors.
514
515 KnownBits SrcOpKnown;
516 computeKnownBitsImpl(SrcReg, SrcOpKnown, DemandedElts, Depth + 1);
517
518 // Figure out the result operand index
519 unsigned DstIdx = 0;
520 for (; DstIdx != NumOps - 1 && MI.getOperand(DstIdx).getReg() != R;
521 ++DstIdx)
522 ;
523
524 Known = SrcOpKnown.extractBits(BitWidth, BitWidth * DstIdx);
525 break;
526 }
527 case TargetOpcode::G_BSWAP: {
528 Register SrcReg = MI.getOperand(1).getReg();
529 computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1);
530 Known = Known.byteSwap();
531 break;
532 }
533 case TargetOpcode::G_BITREVERSE: {
534 Register SrcReg = MI.getOperand(1).getReg();
535 computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1);
536 Known = Known.reverseBits();
537 break;
538 }
539 case TargetOpcode::G_CTPOP: {
540 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
541 Depth + 1);
542 // We can bound the space the count needs. Also, bits known to be zero can't
543 // contribute to the population.
544 unsigned BitsPossiblySet = Known2.countMaxPopulation();
545 unsigned LowBits = llvm::bit_width(BitsPossiblySet);
546 Known.Zero.setBitsFrom(LowBits);
547 // TODO: we could bound Known.One using the lower bound on the number of
548 // bits which might be set provided by popcnt KnownOne2.
549 break;
550 }
551 case TargetOpcode::G_UBFX: {
552 KnownBits SrcOpKnown, OffsetKnown, WidthKnown;
553 computeKnownBitsImpl(MI.getOperand(1).getReg(), SrcOpKnown, DemandedElts,
554 Depth + 1);
555 computeKnownBitsImpl(MI.getOperand(2).getReg(), OffsetKnown, DemandedElts,
556 Depth + 1);
557 computeKnownBitsImpl(MI.getOperand(3).getReg(), WidthKnown, DemandedElts,
558 Depth + 1);
559 Known = extractBits(BitWidth, SrcOpKnown, OffsetKnown, WidthKnown);
560 break;
561 }
562 case TargetOpcode::G_SBFX: {
563 KnownBits SrcOpKnown, OffsetKnown, WidthKnown;
564 computeKnownBitsImpl(MI.getOperand(1).getReg(), SrcOpKnown, DemandedElts,
565 Depth + 1);
566 computeKnownBitsImpl(MI.getOperand(2).getReg(), OffsetKnown, DemandedElts,
567 Depth + 1);
568 computeKnownBitsImpl(MI.getOperand(3).getReg(), WidthKnown, DemandedElts,
569 Depth + 1);
570 Known = extractBits(BitWidth, SrcOpKnown, OffsetKnown, WidthKnown);
571 // Sign extend the extracted value using shift left and arithmetic shift
572 // right.
573 KnownBits ExtKnown = KnownBits::makeConstant(APInt(BitWidth, BitWidth));
574 KnownBits ShiftKnown = KnownBits::computeForAddSub(
575 /*Add=*/false, /*NSW=*/false, /* NUW=*/false, ExtKnown, WidthKnown);
576 Known = KnownBits::ashr(KnownBits::shl(Known, ShiftKnown), ShiftKnown);
577 break;
578 }
579 case TargetOpcode::G_UADDO:
580 case TargetOpcode::G_UADDE:
581 case TargetOpcode::G_SADDO:
582 case TargetOpcode::G_SADDE:
583 case TargetOpcode::G_USUBO:
584 case TargetOpcode::G_USUBE:
585 case TargetOpcode::G_SSUBO:
586 case TargetOpcode::G_SSUBE:
587 case TargetOpcode::G_UMULO:
588 case TargetOpcode::G_SMULO: {
589 if (MI.getOperand(1).getReg() == R) {
590 // If we know the result of a compare has the top bits zero, use this
591 // info.
592 if (TL.getBooleanContents(DstTy.isVector(), false) ==
593 TargetLowering::ZeroOrOneBooleanContent &&
594 BitWidth > 1)
595 Known.Zero.setBitsFrom(1);
596 }
597 break;
598 }
599 case TargetOpcode::G_CTLZ:
600 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
601 KnownBits SrcOpKnown;
602 computeKnownBitsImpl(MI.getOperand(1).getReg(), SrcOpKnown, DemandedElts,
603 Depth + 1);
604 // If we have a known 1, its position is our upper bound.
605 unsigned PossibleLZ = SrcOpKnown.countMaxLeadingZeros();
606 unsigned LowBits = llvm::bit_width(PossibleLZ);
607 Known.Zero.setBitsFrom(LowBits);
608 break;
609 }
610 }
611
612 LLVM_DEBUG(dumpResult(MI, Known, Depth));
613
614 // Update the cache.
615 ComputeKnownBitsCache[R] = Known;
616 }
617
618 /// Compute number of sign bits for the intersection of \p Src0 and \p Src1
computeNumSignBitsMin(Register Src0,Register Src1,const APInt & DemandedElts,unsigned Depth)619 unsigned GISelKnownBits::computeNumSignBitsMin(Register Src0, Register Src1,
620 const APInt &DemandedElts,
621 unsigned Depth) {
622 // Test src1 first, since we canonicalize simpler expressions to the RHS.
623 unsigned Src1SignBits = computeNumSignBits(Src1, DemandedElts, Depth);
624 if (Src1SignBits == 1)
625 return 1;
626 return std::min(computeNumSignBits(Src0, DemandedElts, Depth), Src1SignBits);
627 }
628
629 /// Compute the known number of sign bits with attached range metadata in the
630 /// memory operand. If this is an extending load, accounts for the behavior of
631 /// the high bits.
computeNumSignBitsFromRangeMetadata(const GAnyLoad * Ld,unsigned TyBits)632 static unsigned computeNumSignBitsFromRangeMetadata(const GAnyLoad *Ld,
633 unsigned TyBits) {
634 const MDNode *Ranges = Ld->getRanges();
635 if (!Ranges)
636 return 1;
637
638 ConstantRange CR = getConstantRangeFromMetadata(*Ranges);
639 if (TyBits > CR.getBitWidth()) {
640 switch (Ld->getOpcode()) {
641 case TargetOpcode::G_SEXTLOAD:
642 CR = CR.signExtend(TyBits);
643 break;
644 case TargetOpcode::G_ZEXTLOAD:
645 CR = CR.zeroExtend(TyBits);
646 break;
647 default:
648 break;
649 }
650 }
651
652 return std::min(CR.getSignedMin().getNumSignBits(),
653 CR.getSignedMax().getNumSignBits());
654 }
655
computeNumSignBits(Register R,const APInt & DemandedElts,unsigned Depth)656 unsigned GISelKnownBits::computeNumSignBits(Register R,
657 const APInt &DemandedElts,
658 unsigned Depth) {
659 MachineInstr &MI = *MRI.getVRegDef(R);
660 unsigned Opcode = MI.getOpcode();
661
662 if (Opcode == TargetOpcode::G_CONSTANT)
663 return MI.getOperand(1).getCImm()->getValue().getNumSignBits();
664
665 if (Depth == getMaxDepth())
666 return 1;
667
668 if (!DemandedElts)
669 return 1; // No demanded elts, better to assume we don't know anything.
670
671 LLT DstTy = MRI.getType(R);
672 const unsigned TyBits = DstTy.getScalarSizeInBits();
673
674 // Handle the case where this is called on a register that does not have a
675 // type constraint. This is unlikely to occur except by looking through copies
676 // but it is possible for the initial register being queried to be in this
677 // state.
678 if (!DstTy.isValid())
679 return 1;
680
681 unsigned FirstAnswer = 1;
682 switch (Opcode) {
683 case TargetOpcode::COPY: {
684 MachineOperand &Src = MI.getOperand(1);
685 if (Src.getReg().isVirtual() && Src.getSubReg() == 0 &&
686 MRI.getType(Src.getReg()).isValid()) {
687 // Don't increment Depth for this one since we didn't do any work.
688 return computeNumSignBits(Src.getReg(), DemandedElts, Depth);
689 }
690
691 return 1;
692 }
693 case TargetOpcode::G_SEXT: {
694 Register Src = MI.getOperand(1).getReg();
695 LLT SrcTy = MRI.getType(Src);
696 unsigned Tmp = DstTy.getScalarSizeInBits() - SrcTy.getScalarSizeInBits();
697 return computeNumSignBits(Src, DemandedElts, Depth + 1) + Tmp;
698 }
699 case TargetOpcode::G_ASSERT_SEXT:
700 case TargetOpcode::G_SEXT_INREG: {
701 // Max of the input and what this extends.
702 Register Src = MI.getOperand(1).getReg();
703 unsigned SrcBits = MI.getOperand(2).getImm();
704 unsigned InRegBits = TyBits - SrcBits + 1;
705 return std::max(computeNumSignBits(Src, DemandedElts, Depth + 1), InRegBits);
706 }
707 case TargetOpcode::G_LOAD: {
708 GLoad *Ld = cast<GLoad>(&MI);
709 if (DemandedElts != 1 || !getDataLayout().isLittleEndian())
710 break;
711
712 return computeNumSignBitsFromRangeMetadata(Ld, TyBits);
713 }
714 case TargetOpcode::G_SEXTLOAD: {
715 GSExtLoad *Ld = cast<GSExtLoad>(&MI);
716
717 // FIXME: We need an in-memory type representation.
718 if (DstTy.isVector())
719 return 1;
720
721 unsigned NumBits = computeNumSignBitsFromRangeMetadata(Ld, TyBits);
722 if (NumBits != 1)
723 return NumBits;
724
725 // e.g. i16->i32 = '17' bits known.
726 const MachineMemOperand *MMO = *MI.memoperands_begin();
727 return TyBits - MMO->getSizeInBits().getValue() + 1;
728 }
729 case TargetOpcode::G_ZEXTLOAD: {
730 GZExtLoad *Ld = cast<GZExtLoad>(&MI);
731
732 // FIXME: We need an in-memory type representation.
733 if (DstTy.isVector())
734 return 1;
735
736 unsigned NumBits = computeNumSignBitsFromRangeMetadata(Ld, TyBits);
737 if (NumBits != 1)
738 return NumBits;
739
740 // e.g. i16->i32 = '16' bits known.
741 const MachineMemOperand *MMO = *MI.memoperands_begin();
742 return TyBits - MMO->getSizeInBits().getValue();
743 }
744 case TargetOpcode::G_AND:
745 case TargetOpcode::G_OR:
746 case TargetOpcode::G_XOR: {
747 Register Src1 = MI.getOperand(1).getReg();
748 unsigned Src1NumSignBits =
749 computeNumSignBits(Src1, DemandedElts, Depth + 1);
750 if (Src1NumSignBits != 1) {
751 Register Src2 = MI.getOperand(2).getReg();
752 unsigned Src2NumSignBits =
753 computeNumSignBits(Src2, DemandedElts, Depth + 1);
754 FirstAnswer = std::min(Src1NumSignBits, Src2NumSignBits);
755 }
756 break;
757 }
758 case TargetOpcode::G_TRUNC: {
759 Register Src = MI.getOperand(1).getReg();
760 LLT SrcTy = MRI.getType(Src);
761
762 // Check if the sign bits of source go down as far as the truncated value.
763 unsigned DstTyBits = DstTy.getScalarSizeInBits();
764 unsigned NumSrcBits = SrcTy.getScalarSizeInBits();
765 unsigned NumSrcSignBits = computeNumSignBits(Src, DemandedElts, Depth + 1);
766 if (NumSrcSignBits > (NumSrcBits - DstTyBits))
767 return NumSrcSignBits - (NumSrcBits - DstTyBits);
768 break;
769 }
770 case TargetOpcode::G_SELECT: {
771 return computeNumSignBitsMin(MI.getOperand(2).getReg(),
772 MI.getOperand(3).getReg(), DemandedElts,
773 Depth + 1);
774 }
775 case TargetOpcode::G_SADDO:
776 case TargetOpcode::G_SADDE:
777 case TargetOpcode::G_UADDO:
778 case TargetOpcode::G_UADDE:
779 case TargetOpcode::G_SSUBO:
780 case TargetOpcode::G_SSUBE:
781 case TargetOpcode::G_USUBO:
782 case TargetOpcode::G_USUBE:
783 case TargetOpcode::G_SMULO:
784 case TargetOpcode::G_UMULO: {
785 // If compares returns 0/-1, all bits are sign bits.
786 // We know that we have an integer-based boolean since these operations
787 // are only available for integer.
788 if (MI.getOperand(1).getReg() == R) {
789 if (TL.getBooleanContents(DstTy.isVector(), false) ==
790 TargetLowering::ZeroOrNegativeOneBooleanContent)
791 return TyBits;
792 }
793
794 break;
795 }
796 case TargetOpcode::G_FCMP:
797 case TargetOpcode::G_ICMP: {
798 bool IsFP = Opcode == TargetOpcode::G_FCMP;
799 if (TyBits == 1)
800 break;
801 auto BC = TL.getBooleanContents(DstTy.isVector(), IsFP);
802 if (BC == TargetLoweringBase::ZeroOrNegativeOneBooleanContent)
803 return TyBits; // All bits are sign bits.
804 if (BC == TargetLowering::ZeroOrOneBooleanContent)
805 return TyBits - 1; // Every always-zero bit is a sign bit.
806 break;
807 }
808 case TargetOpcode::G_INTRINSIC:
809 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
810 case TargetOpcode::G_INTRINSIC_CONVERGENT:
811 case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
812 default: {
813 unsigned NumBits =
814 TL.computeNumSignBitsForTargetInstr(*this, R, DemandedElts, MRI, Depth);
815 if (NumBits > 1)
816 FirstAnswer = std::max(FirstAnswer, NumBits);
817 break;
818 }
819 }
820
821 // Finally, if we can prove that the top bits of the result are 0's or 1's,
822 // use this information.
823 KnownBits Known = getKnownBits(R, DemandedElts, Depth);
824 APInt Mask;
825 if (Known.isNonNegative()) { // sign bit is 0
826 Mask = Known.Zero;
827 } else if (Known.isNegative()) { // sign bit is 1;
828 Mask = Known.One;
829 } else {
830 // Nothing known.
831 return FirstAnswer;
832 }
833
834 // Okay, we know that the sign bit in Mask is set. Use CLO to determine
835 // the number of identical bits in the top of the input value.
836 Mask <<= Mask.getBitWidth() - TyBits;
837 return std::max(FirstAnswer, Mask.countl_one());
838 }
839
computeNumSignBits(Register R,unsigned Depth)840 unsigned GISelKnownBits::computeNumSignBits(Register R, unsigned Depth) {
841 LLT Ty = MRI.getType(R);
842 APInt DemandedElts =
843 Ty.isVector() ? APInt::getAllOnes(Ty.getNumElements()) : APInt(1, 1);
844 return computeNumSignBits(R, DemandedElts, Depth);
845 }
846
getAnalysisUsage(AnalysisUsage & AU) const847 void GISelKnownBitsAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
848 AU.setPreservesAll();
849 MachineFunctionPass::getAnalysisUsage(AU);
850 }
851
runOnMachineFunction(MachineFunction & MF)852 bool GISelKnownBitsAnalysis::runOnMachineFunction(MachineFunction &MF) {
853 return false;
854 }
855
get(MachineFunction & MF)856 GISelKnownBits &GISelKnownBitsAnalysis::get(MachineFunction &MF) {
857 if (!Info) {
858 unsigned MaxDepth =
859 MF.getTarget().getOptLevel() == CodeGenOptLevel::None ? 2 : 6;
860 Info = std::make_unique<GISelKnownBits>(MF, MaxDepth);
861 }
862 return *Info;
863 }
864