1 //===- lib/CodeGen/GlobalISel/GISelKnownBits.cpp --------------*- C++ *-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// Provides analysis for querying information about KnownBits during GISel 10 /// passes. 11 // 12 //===----------------------------------------------------------------------===// 13 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" 14 #include "llvm/ADT/StringExtras.h" 15 #include "llvm/Analysis/ValueTracking.h" 16 #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" 17 #include "llvm/CodeGen/GlobalISel/Utils.h" 18 #include "llvm/CodeGen/MachineFrameInfo.h" 19 #include "llvm/CodeGen/MachineRegisterInfo.h" 20 #include "llvm/CodeGen/TargetLowering.h" 21 #include "llvm/CodeGen/TargetOpcodes.h" 22 #include "llvm/IR/ConstantRange.h" 23 #include "llvm/IR/Module.h" 24 #include "llvm/Target/TargetMachine.h" 25 26 #define DEBUG_TYPE "gisel-known-bits" 27 28 using namespace llvm; 29 30 char llvm::GISelKnownBitsAnalysis::ID = 0; 31 32 INITIALIZE_PASS(GISelKnownBitsAnalysis, DEBUG_TYPE, 33 "Analysis for ComputingKnownBits", false, true) 34 35 GISelKnownBits::GISelKnownBits(MachineFunction &MF, unsigned MaxDepth) 36 : MF(MF), MRI(MF.getRegInfo()), TL(*MF.getSubtarget().getTargetLowering()), 37 DL(MF.getFunction().getDataLayout()), MaxDepth(MaxDepth) {} 38 39 Align GISelKnownBits::computeKnownAlignment(Register R, unsigned Depth) { 40 const MachineInstr *MI = MRI.getVRegDef(R); 41 switch (MI->getOpcode()) { 42 case TargetOpcode::COPY: 43 return computeKnownAlignment(MI->getOperand(1).getReg(), Depth); 44 case TargetOpcode::G_ASSERT_ALIGN: { 45 // TODO: Min with source 46 return Align(MI->getOperand(2).getImm()); 47 } 48 case TargetOpcode::G_FRAME_INDEX: { 49 int FrameIdx = MI->getOperand(1).getIndex(); 50 return MF.getFrameInfo().getObjectAlign(FrameIdx); 51 } 52 case TargetOpcode::G_INTRINSIC: 53 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: 54 case TargetOpcode::G_INTRINSIC_CONVERGENT: 55 case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS: 56 default: 57 return TL.computeKnownAlignForTargetInstr(*this, R, MRI, Depth + 1); 58 } 59 } 60 61 KnownBits GISelKnownBits::getKnownBits(MachineInstr &MI) { 62 assert(MI.getNumExplicitDefs() == 1 && 63 "expected single return generic instruction"); 64 return getKnownBits(MI.getOperand(0).getReg()); 65 } 66 67 KnownBits GISelKnownBits::getKnownBits(Register R) { 68 const LLT Ty = MRI.getType(R); 69 // Since the number of lanes in a scalable vector is unknown at compile time, 70 // we track one bit which is implicitly broadcast to all lanes. This means 71 // that all lanes in a scalable vector are considered demanded. 72 APInt DemandedElts = 73 Ty.isFixedVector() ? APInt::getAllOnes(Ty.getNumElements()) : APInt(1, 1); 74 return getKnownBits(R, DemandedElts); 75 } 76 77 KnownBits GISelKnownBits::getKnownBits(Register R, const APInt &DemandedElts, 78 unsigned Depth) { 79 // For now, we only maintain the cache during one request. 80 assert(ComputeKnownBitsCache.empty() && "Cache should have been cleared"); 81 82 KnownBits Known; 83 computeKnownBitsImpl(R, Known, DemandedElts, Depth); 84 ComputeKnownBitsCache.clear(); 85 return Known; 86 } 87 88 bool GISelKnownBits::signBitIsZero(Register R) { 89 LLT Ty = MRI.getType(R); 90 unsigned BitWidth = Ty.getScalarSizeInBits(); 91 return maskedValueIsZero(R, APInt::getSignMask(BitWidth)); 92 } 93 94 APInt GISelKnownBits::getKnownZeroes(Register R) { 95 return getKnownBits(R).Zero; 96 } 97 98 APInt GISelKnownBits::getKnownOnes(Register R) { return getKnownBits(R).One; } 99 100 LLVM_ATTRIBUTE_UNUSED static void 101 dumpResult(const MachineInstr &MI, const KnownBits &Known, unsigned Depth) { 102 dbgs() << "[" << Depth << "] Compute known bits: " << MI << "[" << Depth 103 << "] Computed for: " << MI << "[" << Depth << "] Known: 0x" 104 << toString(Known.Zero | Known.One, 16, false) << "\n" 105 << "[" << Depth << "] Zero: 0x" << toString(Known.Zero, 16, false) 106 << "\n" 107 << "[" << Depth << "] One: 0x" << toString(Known.One, 16, false) 108 << "\n"; 109 } 110 111 /// Compute known bits for the intersection of \p Src0 and \p Src1 112 void GISelKnownBits::computeKnownBitsMin(Register Src0, Register Src1, 113 KnownBits &Known, 114 const APInt &DemandedElts, 115 unsigned Depth) { 116 // Test src1 first, since we canonicalize simpler expressions to the RHS. 117 computeKnownBitsImpl(Src1, Known, DemandedElts, Depth); 118 119 // If we don't know any bits, early out. 120 if (Known.isUnknown()) 121 return; 122 123 KnownBits Known2; 124 computeKnownBitsImpl(Src0, Known2, DemandedElts, Depth); 125 126 // Only known if known in both the LHS and RHS. 127 Known = Known.intersectWith(Known2); 128 } 129 130 // Bitfield extract is computed as (Src >> Offset) & Mask, where Mask is 131 // created using Width. Use this function when the inputs are KnownBits 132 // objects. TODO: Move this KnownBits.h if this is usable in more cases. 133 static KnownBits extractBits(unsigned BitWidth, const KnownBits &SrcOpKnown, 134 const KnownBits &OffsetKnown, 135 const KnownBits &WidthKnown) { 136 KnownBits Mask(BitWidth); 137 Mask.Zero = APInt::getBitsSetFrom( 138 BitWidth, WidthKnown.getMaxValue().getLimitedValue(BitWidth)); 139 Mask.One = APInt::getLowBitsSet( 140 BitWidth, WidthKnown.getMinValue().getLimitedValue(BitWidth)); 141 return KnownBits::lshr(SrcOpKnown, OffsetKnown) & Mask; 142 } 143 144 void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, 145 const APInt &DemandedElts, 146 unsigned Depth) { 147 MachineInstr &MI = *MRI.getVRegDef(R); 148 unsigned Opcode = MI.getOpcode(); 149 LLT DstTy = MRI.getType(R); 150 151 // Handle the case where this is called on a register that does not have a 152 // type constraint (i.e. it has a register class constraint instead). This is 153 // unlikely to occur except by looking through copies but it is possible for 154 // the initial register being queried to be in this state. 155 if (!DstTy.isValid()) { 156 Known = KnownBits(); 157 return; 158 } 159 160 unsigned BitWidth = DstTy.getScalarSizeInBits(); 161 auto CacheEntry = ComputeKnownBitsCache.find(R); 162 if (CacheEntry != ComputeKnownBitsCache.end()) { 163 Known = CacheEntry->second; 164 LLVM_DEBUG(dbgs() << "Cache hit at "); 165 LLVM_DEBUG(dumpResult(MI, Known, Depth)); 166 assert(Known.getBitWidth() == BitWidth && "Cache entry size doesn't match"); 167 return; 168 } 169 Known = KnownBits(BitWidth); // Don't know anything 170 171 // Depth may get bigger than max depth if it gets passed to a different 172 // GISelKnownBits object. 173 // This may happen when say a generic part uses a GISelKnownBits object 174 // with some max depth, but then we hit TL.computeKnownBitsForTargetInstr 175 // which creates a new GISelKnownBits object with a different and smaller 176 // depth. If we just check for equality, we would never exit if the depth 177 // that is passed down to the target specific GISelKnownBits object is 178 // already bigger than its max depth. 179 if (Depth >= getMaxDepth()) 180 return; 181 182 if (!DemandedElts) 183 return; // No demanded elts, better to assume we don't know anything. 184 185 KnownBits Known2; 186 187 switch (Opcode) { 188 default: 189 TL.computeKnownBitsForTargetInstr(*this, R, Known, DemandedElts, MRI, 190 Depth); 191 break; 192 case TargetOpcode::G_BUILD_VECTOR: { 193 // Collect the known bits that are shared by every demanded vector element. 194 Known.Zero.setAllBits(); Known.One.setAllBits(); 195 for (unsigned i = 0, e = MI.getNumOperands() - 1; i < e; ++i) { 196 if (!DemandedElts[i]) 197 continue; 198 199 computeKnownBitsImpl(MI.getOperand(i + 1).getReg(), Known2, DemandedElts, 200 Depth + 1); 201 202 // Known bits are the values that are shared by every demanded element. 203 Known = Known.intersectWith(Known2); 204 205 // If we don't know any bits, early out. 206 if (Known.isUnknown()) 207 break; 208 } 209 break; 210 } 211 case TargetOpcode::COPY: 212 case TargetOpcode::G_PHI: 213 case TargetOpcode::PHI: { 214 Known.One = APInt::getAllOnes(BitWidth); 215 Known.Zero = APInt::getAllOnes(BitWidth); 216 // Destination registers should not have subregisters at this 217 // point of the pipeline, otherwise the main live-range will be 218 // defined more than once, which is against SSA. 219 assert(MI.getOperand(0).getSubReg() == 0 && "Is this code in SSA?"); 220 // Record in the cache that we know nothing for MI. 221 // This will get updated later and in the meantime, if we reach that 222 // phi again, because of a loop, we will cut the search thanks to this 223 // cache entry. 224 // We could actually build up more information on the phi by not cutting 225 // the search, but that additional information is more a side effect 226 // than an intended choice. 227 // Therefore, for now, save on compile time until we derive a proper way 228 // to derive known bits for PHIs within loops. 229 ComputeKnownBitsCache[R] = KnownBits(BitWidth); 230 // PHI's operand are a mix of registers and basic blocks interleaved. 231 // We only care about the register ones. 232 for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) { 233 const MachineOperand &Src = MI.getOperand(Idx); 234 Register SrcReg = Src.getReg(); 235 // Look through trivial copies and phis but don't look through trivial 236 // copies or phis of the form `%1:(s32) = OP %0:gpr32`, known-bits 237 // analysis is currently unable to determine the bit width of a 238 // register class. 239 // 240 // We can't use NoSubRegister by name as it's defined by each target but 241 // it's always defined to be 0 by tablegen. 242 if (SrcReg.isVirtual() && Src.getSubReg() == 0 /*NoSubRegister*/ && 243 MRI.getType(SrcReg).isValid()) { 244 // For COPYs we don't do anything, don't increase the depth. 245 computeKnownBitsImpl(SrcReg, Known2, DemandedElts, 246 Depth + (Opcode != TargetOpcode::COPY)); 247 Known = Known.intersectWith(Known2); 248 // If we reach a point where we don't know anything 249 // just stop looking through the operands. 250 if (Known.isUnknown()) 251 break; 252 } else { 253 // We know nothing. 254 Known = KnownBits(BitWidth); 255 break; 256 } 257 } 258 break; 259 } 260 case TargetOpcode::G_CONSTANT: { 261 Known = KnownBits::makeConstant(MI.getOperand(1).getCImm()->getValue()); 262 break; 263 } 264 case TargetOpcode::G_FRAME_INDEX: { 265 int FrameIdx = MI.getOperand(1).getIndex(); 266 TL.computeKnownBitsForFrameIndex(FrameIdx, Known, MF); 267 break; 268 } 269 case TargetOpcode::G_SUB: { 270 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts, 271 Depth + 1); 272 computeKnownBitsImpl(MI.getOperand(2).getReg(), Known2, DemandedElts, 273 Depth + 1); 274 Known = KnownBits::computeForAddSub(/*Add=*/false, /*NSW=*/false, 275 /* NUW=*/false, Known, Known2); 276 break; 277 } 278 case TargetOpcode::G_XOR: { 279 computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts, 280 Depth + 1); 281 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts, 282 Depth + 1); 283 284 Known ^= Known2; 285 break; 286 } 287 case TargetOpcode::G_PTR_ADD: { 288 if (DstTy.isVector()) 289 break; 290 // G_PTR_ADD is like G_ADD. FIXME: Is this true for all targets? 291 LLT Ty = MRI.getType(MI.getOperand(1).getReg()); 292 if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace())) 293 break; 294 [[fallthrough]]; 295 } 296 case TargetOpcode::G_ADD: { 297 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts, 298 Depth + 1); 299 computeKnownBitsImpl(MI.getOperand(2).getReg(), Known2, DemandedElts, 300 Depth + 1); 301 Known = KnownBits::computeForAddSub(/*Add=*/true, /*NSW=*/false, 302 /* NUW=*/false, Known, Known2); 303 break; 304 } 305 case TargetOpcode::G_AND: { 306 // If either the LHS or the RHS are Zero, the result is zero. 307 computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts, 308 Depth + 1); 309 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts, 310 Depth + 1); 311 312 Known &= Known2; 313 break; 314 } 315 case TargetOpcode::G_OR: { 316 // If either the LHS or the RHS are Zero, the result is zero. 317 computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts, 318 Depth + 1); 319 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts, 320 Depth + 1); 321 322 Known |= Known2; 323 break; 324 } 325 case TargetOpcode::G_MUL: { 326 computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts, 327 Depth + 1); 328 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts, 329 Depth + 1); 330 Known = KnownBits::mul(Known, Known2); 331 break; 332 } 333 case TargetOpcode::G_SELECT: { 334 computeKnownBitsMin(MI.getOperand(2).getReg(), MI.getOperand(3).getReg(), 335 Known, DemandedElts, Depth + 1); 336 break; 337 } 338 case TargetOpcode::G_SMIN: { 339 // TODO: Handle clamp pattern with number of sign bits 340 KnownBits KnownRHS; 341 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts, 342 Depth + 1); 343 computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS, DemandedElts, 344 Depth + 1); 345 Known = KnownBits::smin(Known, KnownRHS); 346 break; 347 } 348 case TargetOpcode::G_SMAX: { 349 // TODO: Handle clamp pattern with number of sign bits 350 KnownBits KnownRHS; 351 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts, 352 Depth + 1); 353 computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS, DemandedElts, 354 Depth + 1); 355 Known = KnownBits::smax(Known, KnownRHS); 356 break; 357 } 358 case TargetOpcode::G_UMIN: { 359 KnownBits KnownRHS; 360 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, 361 DemandedElts, Depth + 1); 362 computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS, 363 DemandedElts, Depth + 1); 364 Known = KnownBits::umin(Known, KnownRHS); 365 break; 366 } 367 case TargetOpcode::G_UMAX: { 368 KnownBits KnownRHS; 369 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, 370 DemandedElts, Depth + 1); 371 computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS, 372 DemandedElts, Depth + 1); 373 Known = KnownBits::umax(Known, KnownRHS); 374 break; 375 } 376 case TargetOpcode::G_FCMP: 377 case TargetOpcode::G_ICMP: { 378 if (DstTy.isVector()) 379 break; 380 if (TL.getBooleanContents(DstTy.isVector(), 381 Opcode == TargetOpcode::G_FCMP) == 382 TargetLowering::ZeroOrOneBooleanContent && 383 BitWidth > 1) 384 Known.Zero.setBitsFrom(1); 385 break; 386 } 387 case TargetOpcode::G_SEXT: { 388 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts, 389 Depth + 1); 390 // If the sign bit is known to be zero or one, then sext will extend 391 // it to the top bits, else it will just zext. 392 Known = Known.sext(BitWidth); 393 break; 394 } 395 case TargetOpcode::G_ASSERT_SEXT: 396 case TargetOpcode::G_SEXT_INREG: { 397 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts, 398 Depth + 1); 399 Known = Known.sextInReg(MI.getOperand(2).getImm()); 400 break; 401 } 402 case TargetOpcode::G_ANYEXT: { 403 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts, 404 Depth + 1); 405 Known = Known.anyext(BitWidth); 406 break; 407 } 408 case TargetOpcode::G_LOAD: { 409 const MachineMemOperand *MMO = *MI.memoperands_begin(); 410 KnownBits KnownRange(MMO->getMemoryType().getScalarSizeInBits()); 411 if (const MDNode *Ranges = MMO->getRanges()) 412 computeKnownBitsFromRangeMetadata(*Ranges, KnownRange); 413 Known = KnownRange.anyext(Known.getBitWidth()); 414 break; 415 } 416 case TargetOpcode::G_SEXTLOAD: 417 case TargetOpcode::G_ZEXTLOAD: { 418 if (DstTy.isVector()) 419 break; 420 const MachineMemOperand *MMO = *MI.memoperands_begin(); 421 KnownBits KnownRange(MMO->getMemoryType().getScalarSizeInBits()); 422 if (const MDNode *Ranges = MMO->getRanges()) 423 computeKnownBitsFromRangeMetadata(*Ranges, KnownRange); 424 Known = Opcode == TargetOpcode::G_SEXTLOAD 425 ? KnownRange.sext(Known.getBitWidth()) 426 : KnownRange.zext(Known.getBitWidth()); 427 break; 428 } 429 case TargetOpcode::G_ASHR: { 430 KnownBits LHSKnown, RHSKnown; 431 computeKnownBitsImpl(MI.getOperand(1).getReg(), LHSKnown, DemandedElts, 432 Depth + 1); 433 computeKnownBitsImpl(MI.getOperand(2).getReg(), RHSKnown, DemandedElts, 434 Depth + 1); 435 Known = KnownBits::ashr(LHSKnown, RHSKnown); 436 break; 437 } 438 case TargetOpcode::G_LSHR: { 439 KnownBits LHSKnown, RHSKnown; 440 computeKnownBitsImpl(MI.getOperand(1).getReg(), LHSKnown, DemandedElts, 441 Depth + 1); 442 computeKnownBitsImpl(MI.getOperand(2).getReg(), RHSKnown, DemandedElts, 443 Depth + 1); 444 Known = KnownBits::lshr(LHSKnown, RHSKnown); 445 break; 446 } 447 case TargetOpcode::G_SHL: { 448 KnownBits LHSKnown, RHSKnown; 449 computeKnownBitsImpl(MI.getOperand(1).getReg(), LHSKnown, DemandedElts, 450 Depth + 1); 451 computeKnownBitsImpl(MI.getOperand(2).getReg(), RHSKnown, DemandedElts, 452 Depth + 1); 453 Known = KnownBits::shl(LHSKnown, RHSKnown); 454 break; 455 } 456 case TargetOpcode::G_INTTOPTR: 457 case TargetOpcode::G_PTRTOINT: 458 if (DstTy.isVector()) 459 break; 460 // Fall through and handle them the same as zext/trunc. 461 [[fallthrough]]; 462 case TargetOpcode::G_ASSERT_ZEXT: 463 case TargetOpcode::G_ZEXT: 464 case TargetOpcode::G_TRUNC: { 465 Register SrcReg = MI.getOperand(1).getReg(); 466 LLT SrcTy = MRI.getType(SrcReg); 467 unsigned SrcBitWidth; 468 469 // G_ASSERT_ZEXT stores the original bitwidth in the immediate operand. 470 if (Opcode == TargetOpcode::G_ASSERT_ZEXT) 471 SrcBitWidth = MI.getOperand(2).getImm(); 472 else { 473 SrcBitWidth = SrcTy.isPointer() 474 ? DL.getIndexSizeInBits(SrcTy.getAddressSpace()) 475 : SrcTy.getSizeInBits(); 476 } 477 assert(SrcBitWidth && "SrcBitWidth can't be zero"); 478 Known = Known.zextOrTrunc(SrcBitWidth); 479 computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1); 480 Known = Known.zextOrTrunc(BitWidth); 481 if (BitWidth > SrcBitWidth) 482 Known.Zero.setBitsFrom(SrcBitWidth); 483 break; 484 } 485 case TargetOpcode::G_ASSERT_ALIGN: { 486 int64_t LogOfAlign = Log2_64(MI.getOperand(2).getImm()); 487 488 // TODO: Should use maximum with source 489 // If a node is guaranteed to be aligned, set low zero bits accordingly as 490 // well as clearing one bits. 491 Known.Zero.setLowBits(LogOfAlign); 492 Known.One.clearLowBits(LogOfAlign); 493 break; 494 } 495 case TargetOpcode::G_MERGE_VALUES: { 496 unsigned NumOps = MI.getNumOperands(); 497 unsigned OpSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 498 499 for (unsigned I = 0; I != NumOps - 1; ++I) { 500 KnownBits SrcOpKnown; 501 computeKnownBitsImpl(MI.getOperand(I + 1).getReg(), SrcOpKnown, 502 DemandedElts, Depth + 1); 503 Known.insertBits(SrcOpKnown, I * OpSize); 504 } 505 break; 506 } 507 case TargetOpcode::G_UNMERGE_VALUES: { 508 if (DstTy.isVector()) 509 break; 510 unsigned NumOps = MI.getNumOperands(); 511 Register SrcReg = MI.getOperand(NumOps - 1).getReg(); 512 if (MRI.getType(SrcReg).isVector()) 513 return; // TODO: Handle vectors. 514 515 KnownBits SrcOpKnown; 516 computeKnownBitsImpl(SrcReg, SrcOpKnown, DemandedElts, Depth + 1); 517 518 // Figure out the result operand index 519 unsigned DstIdx = 0; 520 for (; DstIdx != NumOps - 1 && MI.getOperand(DstIdx).getReg() != R; 521 ++DstIdx) 522 ; 523 524 Known = SrcOpKnown.extractBits(BitWidth, BitWidth * DstIdx); 525 break; 526 } 527 case TargetOpcode::G_BSWAP: { 528 Register SrcReg = MI.getOperand(1).getReg(); 529 computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1); 530 Known = Known.byteSwap(); 531 break; 532 } 533 case TargetOpcode::G_BITREVERSE: { 534 Register SrcReg = MI.getOperand(1).getReg(); 535 computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1); 536 Known = Known.reverseBits(); 537 break; 538 } 539 case TargetOpcode::G_CTPOP: { 540 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts, 541 Depth + 1); 542 // We can bound the space the count needs. Also, bits known to be zero can't 543 // contribute to the population. 544 unsigned BitsPossiblySet = Known2.countMaxPopulation(); 545 unsigned LowBits = llvm::bit_width(BitsPossiblySet); 546 Known.Zero.setBitsFrom(LowBits); 547 // TODO: we could bound Known.One using the lower bound on the number of 548 // bits which might be set provided by popcnt KnownOne2. 549 break; 550 } 551 case TargetOpcode::G_UBFX: { 552 KnownBits SrcOpKnown, OffsetKnown, WidthKnown; 553 computeKnownBitsImpl(MI.getOperand(1).getReg(), SrcOpKnown, DemandedElts, 554 Depth + 1); 555 computeKnownBitsImpl(MI.getOperand(2).getReg(), OffsetKnown, DemandedElts, 556 Depth + 1); 557 computeKnownBitsImpl(MI.getOperand(3).getReg(), WidthKnown, DemandedElts, 558 Depth + 1); 559 Known = extractBits(BitWidth, SrcOpKnown, OffsetKnown, WidthKnown); 560 break; 561 } 562 case TargetOpcode::G_SBFX: { 563 KnownBits SrcOpKnown, OffsetKnown, WidthKnown; 564 computeKnownBitsImpl(MI.getOperand(1).getReg(), SrcOpKnown, DemandedElts, 565 Depth + 1); 566 computeKnownBitsImpl(MI.getOperand(2).getReg(), OffsetKnown, DemandedElts, 567 Depth + 1); 568 computeKnownBitsImpl(MI.getOperand(3).getReg(), WidthKnown, DemandedElts, 569 Depth + 1); 570 Known = extractBits(BitWidth, SrcOpKnown, OffsetKnown, WidthKnown); 571 // Sign extend the extracted value using shift left and arithmetic shift 572 // right. 573 KnownBits ExtKnown = KnownBits::makeConstant(APInt(BitWidth, BitWidth)); 574 KnownBits ShiftKnown = KnownBits::computeForAddSub( 575 /*Add=*/false, /*NSW=*/false, /* NUW=*/false, ExtKnown, WidthKnown); 576 Known = KnownBits::ashr(KnownBits::shl(Known, ShiftKnown), ShiftKnown); 577 break; 578 } 579 case TargetOpcode::G_UADDO: 580 case TargetOpcode::G_UADDE: 581 case TargetOpcode::G_SADDO: 582 case TargetOpcode::G_SADDE: 583 case TargetOpcode::G_USUBO: 584 case TargetOpcode::G_USUBE: 585 case TargetOpcode::G_SSUBO: 586 case TargetOpcode::G_SSUBE: 587 case TargetOpcode::G_UMULO: 588 case TargetOpcode::G_SMULO: { 589 if (MI.getOperand(1).getReg() == R) { 590 // If we know the result of a compare has the top bits zero, use this 591 // info. 592 if (TL.getBooleanContents(DstTy.isVector(), false) == 593 TargetLowering::ZeroOrOneBooleanContent && 594 BitWidth > 1) 595 Known.Zero.setBitsFrom(1); 596 } 597 break; 598 } 599 case TargetOpcode::G_CTLZ: 600 case TargetOpcode::G_CTLZ_ZERO_UNDEF: { 601 KnownBits SrcOpKnown; 602 computeKnownBitsImpl(MI.getOperand(1).getReg(), SrcOpKnown, DemandedElts, 603 Depth + 1); 604 // If we have a known 1, its position is our upper bound. 605 unsigned PossibleLZ = SrcOpKnown.countMaxLeadingZeros(); 606 unsigned LowBits = llvm::bit_width(PossibleLZ); 607 Known.Zero.setBitsFrom(LowBits); 608 break; 609 } 610 } 611 612 LLVM_DEBUG(dumpResult(MI, Known, Depth)); 613 614 // Update the cache. 615 ComputeKnownBitsCache[R] = Known; 616 } 617 618 /// Compute number of sign bits for the intersection of \p Src0 and \p Src1 619 unsigned GISelKnownBits::computeNumSignBitsMin(Register Src0, Register Src1, 620 const APInt &DemandedElts, 621 unsigned Depth) { 622 // Test src1 first, since we canonicalize simpler expressions to the RHS. 623 unsigned Src1SignBits = computeNumSignBits(Src1, DemandedElts, Depth); 624 if (Src1SignBits == 1) 625 return 1; 626 return std::min(computeNumSignBits(Src0, DemandedElts, Depth), Src1SignBits); 627 } 628 629 /// Compute the known number of sign bits with attached range metadata in the 630 /// memory operand. If this is an extending load, accounts for the behavior of 631 /// the high bits. 632 static unsigned computeNumSignBitsFromRangeMetadata(const GAnyLoad *Ld, 633 unsigned TyBits) { 634 const MDNode *Ranges = Ld->getRanges(); 635 if (!Ranges) 636 return 1; 637 638 ConstantRange CR = getConstantRangeFromMetadata(*Ranges); 639 if (TyBits > CR.getBitWidth()) { 640 switch (Ld->getOpcode()) { 641 case TargetOpcode::G_SEXTLOAD: 642 CR = CR.signExtend(TyBits); 643 break; 644 case TargetOpcode::G_ZEXTLOAD: 645 CR = CR.zeroExtend(TyBits); 646 break; 647 default: 648 break; 649 } 650 } 651 652 return std::min(CR.getSignedMin().getNumSignBits(), 653 CR.getSignedMax().getNumSignBits()); 654 } 655 656 unsigned GISelKnownBits::computeNumSignBits(Register R, 657 const APInt &DemandedElts, 658 unsigned Depth) { 659 MachineInstr &MI = *MRI.getVRegDef(R); 660 unsigned Opcode = MI.getOpcode(); 661 662 if (Opcode == TargetOpcode::G_CONSTANT) 663 return MI.getOperand(1).getCImm()->getValue().getNumSignBits(); 664 665 if (Depth == getMaxDepth()) 666 return 1; 667 668 if (!DemandedElts) 669 return 1; // No demanded elts, better to assume we don't know anything. 670 671 LLT DstTy = MRI.getType(R); 672 const unsigned TyBits = DstTy.getScalarSizeInBits(); 673 674 // Handle the case where this is called on a register that does not have a 675 // type constraint. This is unlikely to occur except by looking through copies 676 // but it is possible for the initial register being queried to be in this 677 // state. 678 if (!DstTy.isValid()) 679 return 1; 680 681 unsigned FirstAnswer = 1; 682 switch (Opcode) { 683 case TargetOpcode::COPY: { 684 MachineOperand &Src = MI.getOperand(1); 685 if (Src.getReg().isVirtual() && Src.getSubReg() == 0 && 686 MRI.getType(Src.getReg()).isValid()) { 687 // Don't increment Depth for this one since we didn't do any work. 688 return computeNumSignBits(Src.getReg(), DemandedElts, Depth); 689 } 690 691 return 1; 692 } 693 case TargetOpcode::G_SEXT: { 694 Register Src = MI.getOperand(1).getReg(); 695 LLT SrcTy = MRI.getType(Src); 696 unsigned Tmp = DstTy.getScalarSizeInBits() - SrcTy.getScalarSizeInBits(); 697 return computeNumSignBits(Src, DemandedElts, Depth + 1) + Tmp; 698 } 699 case TargetOpcode::G_ASSERT_SEXT: 700 case TargetOpcode::G_SEXT_INREG: { 701 // Max of the input and what this extends. 702 Register Src = MI.getOperand(1).getReg(); 703 unsigned SrcBits = MI.getOperand(2).getImm(); 704 unsigned InRegBits = TyBits - SrcBits + 1; 705 return std::max(computeNumSignBits(Src, DemandedElts, Depth + 1), InRegBits); 706 } 707 case TargetOpcode::G_LOAD: { 708 GLoad *Ld = cast<GLoad>(&MI); 709 if (DemandedElts != 1 || !getDataLayout().isLittleEndian()) 710 break; 711 712 return computeNumSignBitsFromRangeMetadata(Ld, TyBits); 713 } 714 case TargetOpcode::G_SEXTLOAD: { 715 GSExtLoad *Ld = cast<GSExtLoad>(&MI); 716 717 // FIXME: We need an in-memory type representation. 718 if (DstTy.isVector()) 719 return 1; 720 721 unsigned NumBits = computeNumSignBitsFromRangeMetadata(Ld, TyBits); 722 if (NumBits != 1) 723 return NumBits; 724 725 // e.g. i16->i32 = '17' bits known. 726 const MachineMemOperand *MMO = *MI.memoperands_begin(); 727 return TyBits - MMO->getSizeInBits().getValue() + 1; 728 } 729 case TargetOpcode::G_ZEXTLOAD: { 730 GZExtLoad *Ld = cast<GZExtLoad>(&MI); 731 732 // FIXME: We need an in-memory type representation. 733 if (DstTy.isVector()) 734 return 1; 735 736 unsigned NumBits = computeNumSignBitsFromRangeMetadata(Ld, TyBits); 737 if (NumBits != 1) 738 return NumBits; 739 740 // e.g. i16->i32 = '16' bits known. 741 const MachineMemOperand *MMO = *MI.memoperands_begin(); 742 return TyBits - MMO->getSizeInBits().getValue(); 743 } 744 case TargetOpcode::G_AND: 745 case TargetOpcode::G_OR: 746 case TargetOpcode::G_XOR: { 747 Register Src1 = MI.getOperand(1).getReg(); 748 unsigned Src1NumSignBits = 749 computeNumSignBits(Src1, DemandedElts, Depth + 1); 750 if (Src1NumSignBits != 1) { 751 Register Src2 = MI.getOperand(2).getReg(); 752 unsigned Src2NumSignBits = 753 computeNumSignBits(Src2, DemandedElts, Depth + 1); 754 FirstAnswer = std::min(Src1NumSignBits, Src2NumSignBits); 755 } 756 break; 757 } 758 case TargetOpcode::G_TRUNC: { 759 Register Src = MI.getOperand(1).getReg(); 760 LLT SrcTy = MRI.getType(Src); 761 762 // Check if the sign bits of source go down as far as the truncated value. 763 unsigned DstTyBits = DstTy.getScalarSizeInBits(); 764 unsigned NumSrcBits = SrcTy.getScalarSizeInBits(); 765 unsigned NumSrcSignBits = computeNumSignBits(Src, DemandedElts, Depth + 1); 766 if (NumSrcSignBits > (NumSrcBits - DstTyBits)) 767 return NumSrcSignBits - (NumSrcBits - DstTyBits); 768 break; 769 } 770 case TargetOpcode::G_SELECT: { 771 return computeNumSignBitsMin(MI.getOperand(2).getReg(), 772 MI.getOperand(3).getReg(), DemandedElts, 773 Depth + 1); 774 } 775 case TargetOpcode::G_SADDO: 776 case TargetOpcode::G_SADDE: 777 case TargetOpcode::G_UADDO: 778 case TargetOpcode::G_UADDE: 779 case TargetOpcode::G_SSUBO: 780 case TargetOpcode::G_SSUBE: 781 case TargetOpcode::G_USUBO: 782 case TargetOpcode::G_USUBE: 783 case TargetOpcode::G_SMULO: 784 case TargetOpcode::G_UMULO: { 785 // If compares returns 0/-1, all bits are sign bits. 786 // We know that we have an integer-based boolean since these operations 787 // are only available for integer. 788 if (MI.getOperand(1).getReg() == R) { 789 if (TL.getBooleanContents(DstTy.isVector(), false) == 790 TargetLowering::ZeroOrNegativeOneBooleanContent) 791 return TyBits; 792 } 793 794 break; 795 } 796 case TargetOpcode::G_FCMP: 797 case TargetOpcode::G_ICMP: { 798 bool IsFP = Opcode == TargetOpcode::G_FCMP; 799 if (TyBits == 1) 800 break; 801 auto BC = TL.getBooleanContents(DstTy.isVector(), IsFP); 802 if (BC == TargetLoweringBase::ZeroOrNegativeOneBooleanContent) 803 return TyBits; // All bits are sign bits. 804 if (BC == TargetLowering::ZeroOrOneBooleanContent) 805 return TyBits - 1; // Every always-zero bit is a sign bit. 806 break; 807 } 808 case TargetOpcode::G_INTRINSIC: 809 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: 810 case TargetOpcode::G_INTRINSIC_CONVERGENT: 811 case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS: 812 default: { 813 unsigned NumBits = 814 TL.computeNumSignBitsForTargetInstr(*this, R, DemandedElts, MRI, Depth); 815 if (NumBits > 1) 816 FirstAnswer = std::max(FirstAnswer, NumBits); 817 break; 818 } 819 } 820 821 // Finally, if we can prove that the top bits of the result are 0's or 1's, 822 // use this information. 823 KnownBits Known = getKnownBits(R, DemandedElts, Depth); 824 APInt Mask; 825 if (Known.isNonNegative()) { // sign bit is 0 826 Mask = Known.Zero; 827 } else if (Known.isNegative()) { // sign bit is 1; 828 Mask = Known.One; 829 } else { 830 // Nothing known. 831 return FirstAnswer; 832 } 833 834 // Okay, we know that the sign bit in Mask is set. Use CLO to determine 835 // the number of identical bits in the top of the input value. 836 Mask <<= Mask.getBitWidth() - TyBits; 837 return std::max(FirstAnswer, Mask.countl_one()); 838 } 839 840 unsigned GISelKnownBits::computeNumSignBits(Register R, unsigned Depth) { 841 LLT Ty = MRI.getType(R); 842 APInt DemandedElts = 843 Ty.isVector() ? APInt::getAllOnes(Ty.getNumElements()) : APInt(1, 1); 844 return computeNumSignBits(R, DemandedElts, Depth); 845 } 846 847 void GISelKnownBitsAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { 848 AU.setPreservesAll(); 849 MachineFunctionPass::getAnalysisUsage(AU); 850 } 851 852 bool GISelKnownBitsAnalysis::runOnMachineFunction(MachineFunction &MF) { 853 return false; 854 } 855 856 GISelKnownBits &GISelKnownBitsAnalysis::get(MachineFunction &MF) { 857 if (!Info) { 858 unsigned MaxDepth = 859 MF.getTarget().getOptLevel() == CodeGenOptLevel::None ? 2 : 6; 860 Info = std::make_unique<GISelKnownBits>(MF, MaxDepth); 861 } 862 return *Info; 863 } 864