1 //===- lib/CodeGen/GlobalISel/GISelValueTracking.cpp --------------*- C++ 2 //*-===// 3 // 4 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 5 // See https://llvm.org/LICENSE.txt for license information. 6 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// Provides analysis for querying information about KnownBits during GISel 11 /// passes. 12 // 13 //===----------------------------------------------------------------------===// 14 #include "llvm/CodeGen/GlobalISel/GISelValueTracking.h" 15 #include "llvm/ADT/APFloat.h" 16 #include "llvm/ADT/FloatingPointMode.h" 17 #include "llvm/ADT/ScopeExit.h" 18 #include "llvm/ADT/StringExtras.h" 19 #include "llvm/Analysis/ValueTracking.h" 20 #include "llvm/Analysis/VectorUtils.h" 21 #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" 22 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" 23 #include "llvm/CodeGen/GlobalISel/MachineFloatingPointPredicateUtils.h" 24 #include "llvm/CodeGen/GlobalISel/Utils.h" 25 #include "llvm/CodeGen/LowLevelTypeUtils.h" 26 #include "llvm/CodeGen/MachineFrameInfo.h" 27 #include "llvm/CodeGen/MachineInstr.h" 28 #include "llvm/CodeGen/MachineOperand.h" 29 #include "llvm/CodeGen/MachineRegisterInfo.h" 30 #include "llvm/CodeGen/Register.h" 31 #include "llvm/CodeGen/TargetLowering.h" 32 #include "llvm/CodeGen/TargetOpcodes.h" 33 #include "llvm/IR/ConstantRange.h" 34 #include "llvm/IR/DerivedTypes.h" 35 #include "llvm/IR/FMF.h" 36 #include "llvm/MC/TargetRegistry.h" 37 #include "llvm/Support/KnownBits.h" 38 #include "llvm/Support/KnownFPClass.h" 39 #include "llvm/Target/TargetMachine.h" 40 41 #define DEBUG_TYPE "gisel-known-bits" 42 43 using namespace llvm; 44 using namespace MIPatternMatch; 45 46 char llvm::GISelValueTrackingAnalysisLegacy::ID = 0; 47 48 INITIALIZE_PASS(GISelValueTrackingAnalysisLegacy, DEBUG_TYPE, 49 "Analysis for ComputingKnownBits", false, true) 50 51 GISelValueTracking::GISelValueTracking(MachineFunction &MF, unsigned MaxDepth) 52 : MF(MF), MRI(MF.getRegInfo()), TL(*MF.getSubtarget().getTargetLowering()), 53 DL(MF.getFunction().getDataLayout()), MaxDepth(MaxDepth) {} 54 55 Align GISelValueTracking::computeKnownAlignment(Register R, unsigned Depth) { 56 const MachineInstr *MI = MRI.getVRegDef(R); 57 switch (MI->getOpcode()) { 58 case TargetOpcode::COPY: 59 return computeKnownAlignment(MI->getOperand(1).getReg(), Depth); 60 case TargetOpcode::G_ASSERT_ALIGN: { 61 // TODO: Min with source 62 return Align(MI->getOperand(2).getImm()); 63 } 64 case TargetOpcode::G_FRAME_INDEX: { 65 int FrameIdx = MI->getOperand(1).getIndex(); 66 return MF.getFrameInfo().getObjectAlign(FrameIdx); 67 } 68 case TargetOpcode::G_INTRINSIC: 69 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: 70 case TargetOpcode::G_INTRINSIC_CONVERGENT: 71 case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS: 72 default: 73 return TL.computeKnownAlignForTargetInstr(*this, R, MRI, Depth + 1); 74 } 75 } 76 77 KnownBits GISelValueTracking::getKnownBits(MachineInstr &MI) { 78 assert(MI.getNumExplicitDefs() == 1 && 79 "expected single return generic instruction"); 80 return getKnownBits(MI.getOperand(0).getReg()); 81 } 82 83 KnownBits GISelValueTracking::getKnownBits(Register R) { 84 const LLT Ty = MRI.getType(R); 85 // Since the number of lanes in a scalable vector is unknown at compile time, 86 // we track one bit which is implicitly broadcast to all lanes. This means 87 // that all lanes in a scalable vector are considered demanded. 88 APInt DemandedElts = 89 Ty.isFixedVector() ? APInt::getAllOnes(Ty.getNumElements()) : APInt(1, 1); 90 return getKnownBits(R, DemandedElts); 91 } 92 93 KnownBits GISelValueTracking::getKnownBits(Register R, 94 const APInt &DemandedElts, 95 unsigned Depth) { 96 // For now, we only maintain the cache during one request. 97 assert(ComputeKnownBitsCache.empty() && "Cache should have been cleared"); 98 99 KnownBits Known; 100 computeKnownBitsImpl(R, Known, DemandedElts, Depth); 101 ComputeKnownBitsCache.clear(); 102 return Known; 103 } 104 105 bool GISelValueTracking::signBitIsZero(Register R) { 106 LLT Ty = MRI.getType(R); 107 unsigned BitWidth = Ty.getScalarSizeInBits(); 108 return maskedValueIsZero(R, APInt::getSignMask(BitWidth)); 109 } 110 111 APInt GISelValueTracking::getKnownZeroes(Register R) { 112 return getKnownBits(R).Zero; 113 } 114 115 APInt GISelValueTracking::getKnownOnes(Register R) { 116 return getKnownBits(R).One; 117 } 118 119 LLVM_ATTRIBUTE_UNUSED static void 120 dumpResult(const MachineInstr &MI, const KnownBits &Known, unsigned Depth) { 121 dbgs() << "[" << Depth << "] Compute known bits: " << MI << "[" << Depth 122 << "] Computed for: " << MI << "[" << Depth << "] Known: 0x" 123 << toString(Known.Zero | Known.One, 16, false) << "\n" 124 << "[" << Depth << "] Zero: 0x" << toString(Known.Zero, 16, false) 125 << "\n" 126 << "[" << Depth << "] One: 0x" << toString(Known.One, 16, false) 127 << "\n"; 128 } 129 130 /// Compute known bits for the intersection of \p Src0 and \p Src1 131 void GISelValueTracking::computeKnownBitsMin(Register Src0, Register Src1, 132 KnownBits &Known, 133 const APInt &DemandedElts, 134 unsigned Depth) { 135 // Test src1 first, since we canonicalize simpler expressions to the RHS. 136 computeKnownBitsImpl(Src1, Known, DemandedElts, Depth); 137 138 // If we don't know any bits, early out. 139 if (Known.isUnknown()) 140 return; 141 142 KnownBits Known2; 143 computeKnownBitsImpl(Src0, Known2, DemandedElts, Depth); 144 145 // Only known if known in both the LHS and RHS. 146 Known = Known.intersectWith(Known2); 147 } 148 149 // Bitfield extract is computed as (Src >> Offset) & Mask, where Mask is 150 // created using Width. Use this function when the inputs are KnownBits 151 // objects. TODO: Move this KnownBits.h if this is usable in more cases. 152 static KnownBits extractBits(unsigned BitWidth, const KnownBits &SrcOpKnown, 153 const KnownBits &OffsetKnown, 154 const KnownBits &WidthKnown) { 155 KnownBits Mask(BitWidth); 156 Mask.Zero = APInt::getBitsSetFrom( 157 BitWidth, WidthKnown.getMaxValue().getLimitedValue(BitWidth)); 158 Mask.One = APInt::getLowBitsSet( 159 BitWidth, WidthKnown.getMinValue().getLimitedValue(BitWidth)); 160 return KnownBits::lshr(SrcOpKnown, OffsetKnown) & Mask; 161 } 162 163 void GISelValueTracking::computeKnownBitsImpl(Register R, KnownBits &Known, 164 const APInt &DemandedElts, 165 unsigned Depth) { 166 MachineInstr &MI = *MRI.getVRegDef(R); 167 unsigned Opcode = MI.getOpcode(); 168 LLT DstTy = MRI.getType(R); 169 170 // Handle the case where this is called on a register that does not have a 171 // type constraint. For example, it may be post-ISel or this target might not 172 // preserve the type when early-selecting instructions. 173 if (!DstTy.isValid()) { 174 Known = KnownBits(); 175 return; 176 } 177 178 #ifndef NDEBUG 179 if (DstTy.isFixedVector()) { 180 assert( 181 DstTy.getNumElements() == DemandedElts.getBitWidth() && 182 "DemandedElt width should equal the fixed vector number of elements"); 183 } else { 184 assert(DemandedElts.getBitWidth() == 1 && DemandedElts == APInt(1, 1) && 185 "DemandedElt width should be 1 for scalars or scalable vectors"); 186 } 187 #endif 188 189 unsigned BitWidth = DstTy.getScalarSizeInBits(); 190 auto CacheEntry = ComputeKnownBitsCache.find(R); 191 if (CacheEntry != ComputeKnownBitsCache.end()) { 192 Known = CacheEntry->second; 193 LLVM_DEBUG(dbgs() << "Cache hit at "); 194 LLVM_DEBUG(dumpResult(MI, Known, Depth)); 195 assert(Known.getBitWidth() == BitWidth && "Cache entry size doesn't match"); 196 return; 197 } 198 Known = KnownBits(BitWidth); // Don't know anything 199 200 // Depth may get bigger than max depth if it gets passed to a different 201 // GISelValueTracking object. 202 // This may happen when say a generic part uses a GISelValueTracking object 203 // with some max depth, but then we hit TL.computeKnownBitsForTargetInstr 204 // which creates a new GISelValueTracking object with a different and smaller 205 // depth. If we just check for equality, we would never exit if the depth 206 // that is passed down to the target specific GISelValueTracking object is 207 // already bigger than its max depth. 208 if (Depth >= getMaxDepth()) 209 return; 210 211 if (!DemandedElts) 212 return; // No demanded elts, better to assume we don't know anything. 213 214 KnownBits Known2; 215 216 switch (Opcode) { 217 default: 218 TL.computeKnownBitsForTargetInstr(*this, R, Known, DemandedElts, MRI, 219 Depth); 220 break; 221 case TargetOpcode::G_BUILD_VECTOR: { 222 // Collect the known bits that are shared by every demanded vector element. 223 Known.Zero.setAllBits(); 224 Known.One.setAllBits(); 225 for (const auto &[I, MO] : enumerate(drop_begin(MI.operands()))) { 226 if (!DemandedElts[I]) 227 continue; 228 229 computeKnownBitsImpl(MO.getReg(), Known2, APInt(1, 1), Depth + 1); 230 231 // Known bits are the values that are shared by every demanded element. 232 Known = Known.intersectWith(Known2); 233 234 // If we don't know any bits, early out. 235 if (Known.isUnknown()) 236 break; 237 } 238 break; 239 } 240 case TargetOpcode::G_SPLAT_VECTOR: { 241 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, APInt(1, 1), 242 Depth + 1); 243 // Implicitly truncate the bits to match the official semantics of 244 // G_SPLAT_VECTOR. 245 Known = Known.trunc(BitWidth); 246 break; 247 } 248 case TargetOpcode::COPY: 249 case TargetOpcode::G_PHI: 250 case TargetOpcode::PHI: { 251 Known.One = APInt::getAllOnes(BitWidth); 252 Known.Zero = APInt::getAllOnes(BitWidth); 253 // Destination registers should not have subregisters at this 254 // point of the pipeline, otherwise the main live-range will be 255 // defined more than once, which is against SSA. 256 assert(MI.getOperand(0).getSubReg() == 0 && "Is this code in SSA?"); 257 // Record in the cache that we know nothing for MI. 258 // This will get updated later and in the meantime, if we reach that 259 // phi again, because of a loop, we will cut the search thanks to this 260 // cache entry. 261 // We could actually build up more information on the phi by not cutting 262 // the search, but that additional information is more a side effect 263 // than an intended choice. 264 // Therefore, for now, save on compile time until we derive a proper way 265 // to derive known bits for PHIs within loops. 266 ComputeKnownBitsCache[R] = KnownBits(BitWidth); 267 // PHI's operand are a mix of registers and basic blocks interleaved. 268 // We only care about the register ones. 269 for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) { 270 const MachineOperand &Src = MI.getOperand(Idx); 271 Register SrcReg = Src.getReg(); 272 // Look through trivial copies and phis but don't look through trivial 273 // copies or phis of the form `%1:(s32) = OP %0:gpr32`, known-bits 274 // analysis is currently unable to determine the bit width of a 275 // register class. 276 // 277 // We can't use NoSubRegister by name as it's defined by each target but 278 // it's always defined to be 0 by tablegen. 279 if (SrcReg.isVirtual() && Src.getSubReg() == 0 /*NoSubRegister*/ && 280 MRI.getType(SrcReg).isValid()) { 281 // For COPYs we don't do anything, don't increase the depth. 282 computeKnownBitsImpl(SrcReg, Known2, DemandedElts, 283 Depth + (Opcode != TargetOpcode::COPY)); 284 Known2 = Known2.anyextOrTrunc(BitWidth); 285 Known = Known.intersectWith(Known2); 286 // If we reach a point where we don't know anything 287 // just stop looking through the operands. 288 if (Known.isUnknown()) 289 break; 290 } else { 291 // We know nothing. 292 Known = KnownBits(BitWidth); 293 break; 294 } 295 } 296 break; 297 } 298 case TargetOpcode::G_CONSTANT: { 299 Known = KnownBits::makeConstant(MI.getOperand(1).getCImm()->getValue()); 300 break; 301 } 302 case TargetOpcode::G_FRAME_INDEX: { 303 int FrameIdx = MI.getOperand(1).getIndex(); 304 TL.computeKnownBitsForFrameIndex(FrameIdx, Known, MF); 305 break; 306 } 307 case TargetOpcode::G_SUB: { 308 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts, 309 Depth + 1); 310 computeKnownBitsImpl(MI.getOperand(2).getReg(), Known2, DemandedElts, 311 Depth + 1); 312 Known = KnownBits::sub(Known, Known2); 313 break; 314 } 315 case TargetOpcode::G_XOR: { 316 computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts, 317 Depth + 1); 318 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts, 319 Depth + 1); 320 321 Known ^= Known2; 322 break; 323 } 324 case TargetOpcode::G_PTR_ADD: { 325 if (DstTy.isVector()) 326 break; 327 // G_PTR_ADD is like G_ADD. FIXME: Is this true for all targets? 328 LLT Ty = MRI.getType(MI.getOperand(1).getReg()); 329 if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace())) 330 break; 331 [[fallthrough]]; 332 } 333 case TargetOpcode::G_ADD: { 334 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts, 335 Depth + 1); 336 computeKnownBitsImpl(MI.getOperand(2).getReg(), Known2, DemandedElts, 337 Depth + 1); 338 Known = KnownBits::add(Known, Known2); 339 break; 340 } 341 case TargetOpcode::G_AND: { 342 // If either the LHS or the RHS are Zero, the result is zero. 343 computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts, 344 Depth + 1); 345 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts, 346 Depth + 1); 347 348 Known &= Known2; 349 break; 350 } 351 case TargetOpcode::G_OR: { 352 // If either the LHS or the RHS are Zero, the result is zero. 353 computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts, 354 Depth + 1); 355 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts, 356 Depth + 1); 357 358 Known |= Known2; 359 break; 360 } 361 case TargetOpcode::G_MUL: { 362 computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts, 363 Depth + 1); 364 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts, 365 Depth + 1); 366 Known = KnownBits::mul(Known, Known2); 367 break; 368 } 369 case TargetOpcode::G_SELECT: { 370 computeKnownBitsMin(MI.getOperand(2).getReg(), MI.getOperand(3).getReg(), 371 Known, DemandedElts, Depth + 1); 372 break; 373 } 374 case TargetOpcode::G_SMIN: { 375 // TODO: Handle clamp pattern with number of sign bits 376 KnownBits KnownRHS; 377 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts, 378 Depth + 1); 379 computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS, DemandedElts, 380 Depth + 1); 381 Known = KnownBits::smin(Known, KnownRHS); 382 break; 383 } 384 case TargetOpcode::G_SMAX: { 385 // TODO: Handle clamp pattern with number of sign bits 386 KnownBits KnownRHS; 387 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts, 388 Depth + 1); 389 computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS, DemandedElts, 390 Depth + 1); 391 Known = KnownBits::smax(Known, KnownRHS); 392 break; 393 } 394 case TargetOpcode::G_UMIN: { 395 KnownBits KnownRHS; 396 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts, 397 Depth + 1); 398 computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS, DemandedElts, 399 Depth + 1); 400 Known = KnownBits::umin(Known, KnownRHS); 401 break; 402 } 403 case TargetOpcode::G_UMAX: { 404 KnownBits KnownRHS; 405 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts, 406 Depth + 1); 407 computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS, DemandedElts, 408 Depth + 1); 409 Known = KnownBits::umax(Known, KnownRHS); 410 break; 411 } 412 case TargetOpcode::G_FCMP: 413 case TargetOpcode::G_ICMP: { 414 if (DstTy.isVector()) 415 break; 416 if (TL.getBooleanContents(DstTy.isVector(), 417 Opcode == TargetOpcode::G_FCMP) == 418 TargetLowering::ZeroOrOneBooleanContent && 419 BitWidth > 1) 420 Known.Zero.setBitsFrom(1); 421 break; 422 } 423 case TargetOpcode::G_SEXT: { 424 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts, 425 Depth + 1); 426 // If the sign bit is known to be zero or one, then sext will extend 427 // it to the top bits, else it will just zext. 428 Known = Known.sext(BitWidth); 429 break; 430 } 431 case TargetOpcode::G_ASSERT_SEXT: 432 case TargetOpcode::G_SEXT_INREG: { 433 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts, 434 Depth + 1); 435 Known = Known.sextInReg(MI.getOperand(2).getImm()); 436 break; 437 } 438 case TargetOpcode::G_ANYEXT: { 439 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts, 440 Depth + 1); 441 Known = Known.anyext(BitWidth); 442 break; 443 } 444 case TargetOpcode::G_LOAD: { 445 const MachineMemOperand *MMO = *MI.memoperands_begin(); 446 KnownBits KnownRange(MMO->getMemoryType().getScalarSizeInBits()); 447 if (const MDNode *Ranges = MMO->getRanges()) 448 computeKnownBitsFromRangeMetadata(*Ranges, KnownRange); 449 Known = KnownRange.anyext(Known.getBitWidth()); 450 break; 451 } 452 case TargetOpcode::G_SEXTLOAD: 453 case TargetOpcode::G_ZEXTLOAD: { 454 if (DstTy.isVector()) 455 break; 456 const MachineMemOperand *MMO = *MI.memoperands_begin(); 457 KnownBits KnownRange(MMO->getMemoryType().getScalarSizeInBits()); 458 if (const MDNode *Ranges = MMO->getRanges()) 459 computeKnownBitsFromRangeMetadata(*Ranges, KnownRange); 460 Known = Opcode == TargetOpcode::G_SEXTLOAD 461 ? KnownRange.sext(Known.getBitWidth()) 462 : KnownRange.zext(Known.getBitWidth()); 463 break; 464 } 465 case TargetOpcode::G_ASHR: { 466 KnownBits LHSKnown, RHSKnown; 467 computeKnownBitsImpl(MI.getOperand(1).getReg(), LHSKnown, DemandedElts, 468 Depth + 1); 469 computeKnownBitsImpl(MI.getOperand(2).getReg(), RHSKnown, DemandedElts, 470 Depth + 1); 471 Known = KnownBits::ashr(LHSKnown, RHSKnown); 472 break; 473 } 474 case TargetOpcode::G_LSHR: { 475 KnownBits LHSKnown, RHSKnown; 476 computeKnownBitsImpl(MI.getOperand(1).getReg(), LHSKnown, DemandedElts, 477 Depth + 1); 478 computeKnownBitsImpl(MI.getOperand(2).getReg(), RHSKnown, DemandedElts, 479 Depth + 1); 480 Known = KnownBits::lshr(LHSKnown, RHSKnown); 481 break; 482 } 483 case TargetOpcode::G_SHL: { 484 KnownBits LHSKnown, RHSKnown; 485 computeKnownBitsImpl(MI.getOperand(1).getReg(), LHSKnown, DemandedElts, 486 Depth + 1); 487 computeKnownBitsImpl(MI.getOperand(2).getReg(), RHSKnown, DemandedElts, 488 Depth + 1); 489 Known = KnownBits::shl(LHSKnown, RHSKnown); 490 break; 491 } 492 case TargetOpcode::G_INTTOPTR: 493 case TargetOpcode::G_PTRTOINT: 494 if (DstTy.isVector()) 495 break; 496 // Fall through and handle them the same as zext/trunc. 497 [[fallthrough]]; 498 case TargetOpcode::G_ZEXT: 499 case TargetOpcode::G_TRUNC: { 500 Register SrcReg = MI.getOperand(1).getReg(); 501 computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1); 502 Known = Known.zextOrTrunc(BitWidth); 503 break; 504 } 505 case TargetOpcode::G_ASSERT_ZEXT: { 506 Register SrcReg = MI.getOperand(1).getReg(); 507 computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1); 508 509 unsigned SrcBitWidth = MI.getOperand(2).getImm(); 510 assert(SrcBitWidth && "SrcBitWidth can't be zero"); 511 APInt InMask = APInt::getLowBitsSet(BitWidth, SrcBitWidth); 512 Known.Zero |= (~InMask); 513 Known.One &= (~Known.Zero); 514 break; 515 } 516 case TargetOpcode::G_ASSERT_ALIGN: { 517 int64_t LogOfAlign = Log2_64(MI.getOperand(2).getImm()); 518 519 // TODO: Should use maximum with source 520 // If a node is guaranteed to be aligned, set low zero bits accordingly as 521 // well as clearing one bits. 522 Known.Zero.setLowBits(LogOfAlign); 523 Known.One.clearLowBits(LogOfAlign); 524 break; 525 } 526 case TargetOpcode::G_MERGE_VALUES: { 527 unsigned NumOps = MI.getNumOperands(); 528 unsigned OpSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 529 530 for (unsigned I = 0; I != NumOps - 1; ++I) { 531 KnownBits SrcOpKnown; 532 computeKnownBitsImpl(MI.getOperand(I + 1).getReg(), SrcOpKnown, 533 DemandedElts, Depth + 1); 534 Known.insertBits(SrcOpKnown, I * OpSize); 535 } 536 break; 537 } 538 case TargetOpcode::G_UNMERGE_VALUES: { 539 unsigned NumOps = MI.getNumOperands(); 540 Register SrcReg = MI.getOperand(NumOps - 1).getReg(); 541 LLT SrcTy = MRI.getType(SrcReg); 542 543 if (SrcTy.isVector() && SrcTy.getScalarType() != DstTy.getScalarType()) 544 return; // TODO: Handle vector->subelement unmerges 545 546 // Figure out the result operand index 547 unsigned DstIdx = 0; 548 for (; DstIdx != NumOps - 1 && MI.getOperand(DstIdx).getReg() != R; 549 ++DstIdx) 550 ; 551 552 APInt SubDemandedElts = DemandedElts; 553 if (SrcTy.isVector()) { 554 unsigned DstLanes = DstTy.isVector() ? DstTy.getNumElements() : 1; 555 SubDemandedElts = 556 DemandedElts.zext(SrcTy.getNumElements()).shl(DstIdx * DstLanes); 557 } 558 559 KnownBits SrcOpKnown; 560 computeKnownBitsImpl(SrcReg, SrcOpKnown, SubDemandedElts, Depth + 1); 561 562 if (SrcTy.isVector()) 563 Known = std::move(SrcOpKnown); 564 else 565 Known = SrcOpKnown.extractBits(BitWidth, BitWidth * DstIdx); 566 break; 567 } 568 case TargetOpcode::G_BSWAP: { 569 Register SrcReg = MI.getOperand(1).getReg(); 570 computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1); 571 Known = Known.byteSwap(); 572 break; 573 } 574 case TargetOpcode::G_BITREVERSE: { 575 Register SrcReg = MI.getOperand(1).getReg(); 576 computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1); 577 Known = Known.reverseBits(); 578 break; 579 } 580 case TargetOpcode::G_CTPOP: { 581 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts, 582 Depth + 1); 583 // We can bound the space the count needs. Also, bits known to be zero 584 // can't contribute to the population. 585 unsigned BitsPossiblySet = Known2.countMaxPopulation(); 586 unsigned LowBits = llvm::bit_width(BitsPossiblySet); 587 Known.Zero.setBitsFrom(LowBits); 588 // TODO: we could bound Known.One using the lower bound on the number of 589 // bits which might be set provided by popcnt KnownOne2. 590 break; 591 } 592 case TargetOpcode::G_UBFX: { 593 KnownBits SrcOpKnown, OffsetKnown, WidthKnown; 594 computeKnownBitsImpl(MI.getOperand(1).getReg(), SrcOpKnown, DemandedElts, 595 Depth + 1); 596 computeKnownBitsImpl(MI.getOperand(2).getReg(), OffsetKnown, DemandedElts, 597 Depth + 1); 598 computeKnownBitsImpl(MI.getOperand(3).getReg(), WidthKnown, DemandedElts, 599 Depth + 1); 600 Known = extractBits(BitWidth, SrcOpKnown, OffsetKnown, WidthKnown); 601 break; 602 } 603 case TargetOpcode::G_SBFX: { 604 KnownBits SrcOpKnown, OffsetKnown, WidthKnown; 605 computeKnownBitsImpl(MI.getOperand(1).getReg(), SrcOpKnown, DemandedElts, 606 Depth + 1); 607 computeKnownBitsImpl(MI.getOperand(2).getReg(), OffsetKnown, DemandedElts, 608 Depth + 1); 609 computeKnownBitsImpl(MI.getOperand(3).getReg(), WidthKnown, DemandedElts, 610 Depth + 1); 611 Known = extractBits(BitWidth, SrcOpKnown, OffsetKnown, WidthKnown); 612 // Sign extend the extracted value using shift left and arithmetic shift 613 // right. 614 KnownBits ExtKnown = KnownBits::makeConstant(APInt(BitWidth, BitWidth)); 615 KnownBits ShiftKnown = KnownBits::sub(ExtKnown, WidthKnown); 616 Known = KnownBits::ashr(KnownBits::shl(Known, ShiftKnown), ShiftKnown); 617 break; 618 } 619 case TargetOpcode::G_UADDO: 620 case TargetOpcode::G_UADDE: 621 case TargetOpcode::G_SADDO: 622 case TargetOpcode::G_SADDE: 623 case TargetOpcode::G_USUBO: 624 case TargetOpcode::G_USUBE: 625 case TargetOpcode::G_SSUBO: 626 case TargetOpcode::G_SSUBE: 627 case TargetOpcode::G_UMULO: 628 case TargetOpcode::G_SMULO: { 629 if (MI.getOperand(1).getReg() == R) { 630 // If we know the result of a compare has the top bits zero, use this 631 // info. 632 if (TL.getBooleanContents(DstTy.isVector(), false) == 633 TargetLowering::ZeroOrOneBooleanContent && 634 BitWidth > 1) 635 Known.Zero.setBitsFrom(1); 636 } 637 break; 638 } 639 case TargetOpcode::G_CTLZ: 640 case TargetOpcode::G_CTLZ_ZERO_UNDEF: { 641 KnownBits SrcOpKnown; 642 computeKnownBitsImpl(MI.getOperand(1).getReg(), SrcOpKnown, DemandedElts, 643 Depth + 1); 644 // If we have a known 1, its position is our upper bound. 645 unsigned PossibleLZ = SrcOpKnown.countMaxLeadingZeros(); 646 unsigned LowBits = llvm::bit_width(PossibleLZ); 647 Known.Zero.setBitsFrom(LowBits); 648 break; 649 } 650 case TargetOpcode::G_SHUFFLE_VECTOR: { 651 APInt DemandedLHS, DemandedRHS; 652 // Collect the known bits that are shared by every vector element referenced 653 // by the shuffle. 654 unsigned NumElts = MRI.getType(MI.getOperand(1).getReg()).getNumElements(); 655 if (!getShuffleDemandedElts(NumElts, MI.getOperand(3).getShuffleMask(), 656 DemandedElts, DemandedLHS, DemandedRHS)) 657 break; 658 659 // Known bits are the values that are shared by every demanded element. 660 Known.Zero.setAllBits(); 661 Known.One.setAllBits(); 662 if (!!DemandedLHS) { 663 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedLHS, 664 Depth + 1); 665 Known = Known.intersectWith(Known2); 666 } 667 // If we don't know any bits, early out. 668 if (Known.isUnknown()) 669 break; 670 if (!!DemandedRHS) { 671 computeKnownBitsImpl(MI.getOperand(2).getReg(), Known2, DemandedRHS, 672 Depth + 1); 673 Known = Known.intersectWith(Known2); 674 } 675 break; 676 } 677 case TargetOpcode::G_CONCAT_VECTORS: { 678 if (MRI.getType(MI.getOperand(0).getReg()).isScalableVector()) 679 break; 680 // Split DemandedElts and test each of the demanded subvectors. 681 Known.Zero.setAllBits(); 682 Known.One.setAllBits(); 683 unsigned NumSubVectorElts = 684 MRI.getType(MI.getOperand(1).getReg()).getNumElements(); 685 686 for (const auto &[I, MO] : enumerate(drop_begin(MI.operands()))) { 687 APInt DemandedSub = 688 DemandedElts.extractBits(NumSubVectorElts, I * NumSubVectorElts); 689 if (!!DemandedSub) { 690 computeKnownBitsImpl(MO.getReg(), Known2, DemandedSub, Depth + 1); 691 692 Known = Known.intersectWith(Known2); 693 } 694 // If we don't know any bits, early out. 695 if (Known.isUnknown()) 696 break; 697 } 698 break; 699 } 700 } 701 702 LLVM_DEBUG(dumpResult(MI, Known, Depth)); 703 704 // Update the cache. 705 ComputeKnownBitsCache[R] = Known; 706 } 707 708 static bool outputDenormalIsIEEEOrPosZero(const MachineFunction &MF, LLT Ty) { 709 Ty = Ty.getScalarType(); 710 DenormalMode Mode = MF.getDenormalMode(getFltSemanticForLLT(Ty)); 711 return Mode.Output == DenormalMode::IEEE || 712 Mode.Output == DenormalMode::PositiveZero; 713 } 714 715 void GISelValueTracking::computeKnownFPClass(Register R, KnownFPClass &Known, 716 FPClassTest InterestedClasses, 717 unsigned Depth) { 718 LLT Ty = MRI.getType(R); 719 APInt DemandedElts = 720 Ty.isFixedVector() ? APInt::getAllOnes(Ty.getNumElements()) : APInt(1, 1); 721 computeKnownFPClass(R, DemandedElts, InterestedClasses, Known, Depth); 722 } 723 724 void GISelValueTracking::computeKnownFPClassForFPTrunc( 725 const MachineInstr &MI, const APInt &DemandedElts, 726 FPClassTest InterestedClasses, KnownFPClass &Known, unsigned Depth) { 727 if ((InterestedClasses & (KnownFPClass::OrderedLessThanZeroMask | fcNan)) == 728 fcNone) 729 return; 730 731 Register Val = MI.getOperand(1).getReg(); 732 KnownFPClass KnownSrc; 733 computeKnownFPClass(Val, DemandedElts, InterestedClasses, KnownSrc, 734 Depth + 1); 735 736 // Sign should be preserved 737 // TODO: Handle cannot be ordered greater than zero 738 if (KnownSrc.cannotBeOrderedLessThanZero()) 739 Known.knownNot(KnownFPClass::OrderedLessThanZeroMask); 740 741 Known.propagateNaN(KnownSrc, true); 742 743 // Infinity needs a range check. 744 } 745 746 void GISelValueTracking::computeKnownFPClass(Register R, 747 const APInt &DemandedElts, 748 FPClassTest InterestedClasses, 749 KnownFPClass &Known, 750 unsigned Depth) { 751 assert(Known.isUnknown() && "should not be called with known information"); 752 753 if (!DemandedElts) { 754 // No demanded elts, better to assume we don't know anything. 755 Known.resetAll(); 756 return; 757 } 758 759 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth"); 760 761 MachineInstr &MI = *MRI.getVRegDef(R); 762 unsigned Opcode = MI.getOpcode(); 763 LLT DstTy = MRI.getType(R); 764 765 if (!DstTy.isValid()) { 766 Known.resetAll(); 767 return; 768 } 769 770 if (auto Cst = GFConstant::getConstant(R, MRI)) { 771 switch (Cst->getKind()) { 772 case GFConstant::GFConstantKind::Scalar: { 773 auto APF = Cst->getScalarValue(); 774 Known.KnownFPClasses = APF.classify(); 775 Known.SignBit = APF.isNegative(); 776 break; 777 } 778 case GFConstant::GFConstantKind::FixedVector: { 779 Known.KnownFPClasses = fcNone; 780 bool SignBitAllZero = true; 781 bool SignBitAllOne = true; 782 783 for (auto C : *Cst) { 784 Known.KnownFPClasses |= C.classify(); 785 if (C.isNegative()) 786 SignBitAllZero = false; 787 else 788 SignBitAllOne = false; 789 } 790 791 if (SignBitAllOne != SignBitAllZero) 792 Known.SignBit = SignBitAllOne; 793 794 break; 795 } 796 case GFConstant::GFConstantKind::ScalableVector: { 797 Known.resetAll(); 798 break; 799 } 800 } 801 802 return; 803 } 804 805 FPClassTest KnownNotFromFlags = fcNone; 806 if (MI.getFlag(MachineInstr::MIFlag::FmNoNans)) 807 KnownNotFromFlags |= fcNan; 808 if (MI.getFlag(MachineInstr::MIFlag::FmNoInfs)) 809 KnownNotFromFlags |= fcInf; 810 811 // We no longer need to find out about these bits from inputs if we can 812 // assume this from flags/attributes. 813 InterestedClasses &= ~KnownNotFromFlags; 814 815 auto ClearClassesFromFlags = 816 make_scope_exit([=, &Known] { Known.knownNot(KnownNotFromFlags); }); 817 818 // All recursive calls that increase depth must come after this. 819 if (Depth == MaxAnalysisRecursionDepth) 820 return; 821 822 const MachineFunction *MF = MI.getMF(); 823 824 switch (Opcode) { 825 default: 826 TL.computeKnownFPClassForTargetInstr(*this, R, Known, DemandedElts, MRI, 827 Depth); 828 break; 829 case TargetOpcode::G_FNEG: { 830 Register Val = MI.getOperand(1).getReg(); 831 computeKnownFPClass(Val, DemandedElts, InterestedClasses, Known, Depth + 1); 832 Known.fneg(); 833 break; 834 } 835 case TargetOpcode::G_SELECT: { 836 GSelect &SelMI = cast<GSelect>(MI); 837 Register Cond = SelMI.getCondReg(); 838 Register LHS = SelMI.getTrueReg(); 839 Register RHS = SelMI.getFalseReg(); 840 841 FPClassTest FilterLHS = fcAllFlags; 842 FPClassTest FilterRHS = fcAllFlags; 843 844 Register TestedValue; 845 FPClassTest MaskIfTrue = fcAllFlags; 846 FPClassTest MaskIfFalse = fcAllFlags; 847 FPClassTest ClassVal = fcNone; 848 849 CmpInst::Predicate Pred; 850 Register CmpLHS, CmpRHS; 851 if (mi_match(Cond, MRI, 852 m_GFCmp(m_Pred(Pred), m_Reg(CmpLHS), m_Reg(CmpRHS)))) { 853 // If the select filters out a value based on the class, it no longer 854 // participates in the class of the result 855 856 // TODO: In some degenerate cases we can infer something if we try again 857 // without looking through sign operations. 858 bool LookThroughFAbsFNeg = CmpLHS != LHS && CmpLHS != RHS; 859 std::tie(TestedValue, MaskIfTrue, MaskIfFalse) = 860 fcmpImpliesClass(Pred, *MF, CmpLHS, CmpRHS, LookThroughFAbsFNeg); 861 } else if (mi_match( 862 Cond, MRI, 863 m_GIsFPClass(m_Reg(TestedValue), m_FPClassTest(ClassVal)))) { 864 FPClassTest TestedMask = ClassVal; 865 MaskIfTrue = TestedMask; 866 MaskIfFalse = ~TestedMask; 867 } 868 869 if (TestedValue == LHS) { 870 // match !isnan(x) ? x : y 871 FilterLHS = MaskIfTrue; 872 } else if (TestedValue == RHS) { // && IsExactClass 873 // match !isnan(x) ? y : x 874 FilterRHS = MaskIfFalse; 875 } 876 877 KnownFPClass Known2; 878 computeKnownFPClass(LHS, DemandedElts, InterestedClasses & FilterLHS, Known, 879 Depth + 1); 880 Known.KnownFPClasses &= FilterLHS; 881 882 computeKnownFPClass(RHS, DemandedElts, InterestedClasses & FilterRHS, 883 Known2, Depth + 1); 884 Known2.KnownFPClasses &= FilterRHS; 885 886 Known |= Known2; 887 break; 888 } 889 case TargetOpcode::G_FCOPYSIGN: { 890 Register Magnitude = MI.getOperand(1).getReg(); 891 Register Sign = MI.getOperand(2).getReg(); 892 893 KnownFPClass KnownSign; 894 895 computeKnownFPClass(Magnitude, DemandedElts, InterestedClasses, Known, 896 Depth + 1); 897 computeKnownFPClass(Sign, DemandedElts, InterestedClasses, KnownSign, 898 Depth + 1); 899 Known.copysign(KnownSign); 900 break; 901 } 902 case TargetOpcode::G_FMA: 903 case TargetOpcode::G_STRICT_FMA: 904 case TargetOpcode::G_FMAD: { 905 if ((InterestedClasses & fcNegative) == fcNone) 906 break; 907 908 Register A = MI.getOperand(1).getReg(); 909 Register B = MI.getOperand(2).getReg(); 910 Register C = MI.getOperand(3).getReg(); 911 912 if (A != B) 913 break; 914 915 // The multiply cannot be -0 and therefore the add can't be -0 916 Known.knownNot(fcNegZero); 917 918 // x * x + y is non-negative if y is non-negative. 919 KnownFPClass KnownAddend; 920 computeKnownFPClass(C, DemandedElts, InterestedClasses, KnownAddend, 921 Depth + 1); 922 923 if (KnownAddend.cannotBeOrderedLessThanZero()) 924 Known.knownNot(fcNegative); 925 break; 926 } 927 case TargetOpcode::G_FSQRT: 928 case TargetOpcode::G_STRICT_FSQRT: { 929 KnownFPClass KnownSrc; 930 FPClassTest InterestedSrcs = InterestedClasses; 931 if (InterestedClasses & fcNan) 932 InterestedSrcs |= KnownFPClass::OrderedLessThanZeroMask; 933 934 Register Val = MI.getOperand(1).getReg(); 935 936 computeKnownFPClass(Val, DemandedElts, InterestedSrcs, KnownSrc, Depth + 1); 937 938 if (KnownSrc.isKnownNeverPosInfinity()) 939 Known.knownNot(fcPosInf); 940 if (KnownSrc.isKnownNever(fcSNan)) 941 Known.knownNot(fcSNan); 942 943 // Any negative value besides -0 returns a nan. 944 if (KnownSrc.isKnownNeverNaN() && KnownSrc.cannotBeOrderedLessThanZero()) 945 Known.knownNot(fcNan); 946 947 // The only negative value that can be returned is -0 for -0 inputs. 948 Known.knownNot(fcNegInf | fcNegSubnormal | fcNegNormal); 949 break; 950 } 951 case TargetOpcode::G_FABS: { 952 if ((InterestedClasses & (fcNan | fcPositive)) != fcNone) { 953 Register Val = MI.getOperand(1).getReg(); 954 // If we only care about the sign bit we don't need to inspect the 955 // operand. 956 computeKnownFPClass(Val, DemandedElts, InterestedClasses, Known, 957 Depth + 1); 958 } 959 Known.fabs(); 960 break; 961 } 962 case TargetOpcode::G_FSIN: 963 case TargetOpcode::G_FCOS: 964 case TargetOpcode::G_FSINCOS: { 965 // Return NaN on infinite inputs. 966 Register Val = MI.getOperand(1).getReg(); 967 KnownFPClass KnownSrc; 968 969 computeKnownFPClass(Val, DemandedElts, InterestedClasses, KnownSrc, 970 Depth + 1); 971 Known.knownNot(fcInf); 972 973 if (KnownSrc.isKnownNeverNaN() && KnownSrc.isKnownNeverInfinity()) 974 Known.knownNot(fcNan); 975 break; 976 } 977 case TargetOpcode::G_FMAXNUM: 978 case TargetOpcode::G_FMINNUM: 979 case TargetOpcode::G_FMINNUM_IEEE: 980 case TargetOpcode::G_FMAXIMUM: 981 case TargetOpcode::G_FMINIMUM: 982 case TargetOpcode::G_FMAXNUM_IEEE: 983 case TargetOpcode::G_FMAXIMUMNUM: 984 case TargetOpcode::G_FMINIMUMNUM: { 985 Register LHS = MI.getOperand(1).getReg(); 986 Register RHS = MI.getOperand(2).getReg(); 987 KnownFPClass KnownLHS, KnownRHS; 988 989 computeKnownFPClass(LHS, DemandedElts, InterestedClasses, KnownLHS, 990 Depth + 1); 991 computeKnownFPClass(RHS, DemandedElts, InterestedClasses, KnownRHS, 992 Depth + 1); 993 994 bool NeverNaN = KnownLHS.isKnownNeverNaN() || KnownRHS.isKnownNeverNaN(); 995 Known = KnownLHS | KnownRHS; 996 997 // If either operand is not NaN, the result is not NaN. 998 if (NeverNaN && (Opcode == TargetOpcode::G_FMINNUM || 999 Opcode == TargetOpcode::G_FMAXNUM || 1000 Opcode == TargetOpcode::G_FMINIMUMNUM || 1001 Opcode == TargetOpcode::G_FMAXIMUMNUM)) 1002 Known.knownNot(fcNan); 1003 1004 if (Opcode == TargetOpcode::G_FMAXNUM || 1005 Opcode == TargetOpcode::G_FMAXIMUMNUM || 1006 Opcode == TargetOpcode::G_FMAXNUM_IEEE) { 1007 // If at least one operand is known to be positive, the result must be 1008 // positive. 1009 if ((KnownLHS.cannotBeOrderedLessThanZero() && 1010 KnownLHS.isKnownNeverNaN()) || 1011 (KnownRHS.cannotBeOrderedLessThanZero() && 1012 KnownRHS.isKnownNeverNaN())) 1013 Known.knownNot(KnownFPClass::OrderedLessThanZeroMask); 1014 } else if (Opcode == TargetOpcode::G_FMAXIMUM) { 1015 // If at least one operand is known to be positive, the result must be 1016 // positive. 1017 if (KnownLHS.cannotBeOrderedLessThanZero() || 1018 KnownRHS.cannotBeOrderedLessThanZero()) 1019 Known.knownNot(KnownFPClass::OrderedLessThanZeroMask); 1020 } else if (Opcode == TargetOpcode::G_FMINNUM || 1021 Opcode == TargetOpcode::G_FMINIMUMNUM || 1022 Opcode == TargetOpcode::G_FMINNUM_IEEE) { 1023 // If at least one operand is known to be negative, the result must be 1024 // negative. 1025 if ((KnownLHS.cannotBeOrderedGreaterThanZero() && 1026 KnownLHS.isKnownNeverNaN()) || 1027 (KnownRHS.cannotBeOrderedGreaterThanZero() && 1028 KnownRHS.isKnownNeverNaN())) 1029 Known.knownNot(KnownFPClass::OrderedGreaterThanZeroMask); 1030 } else if (Opcode == TargetOpcode::G_FMINIMUM) { 1031 // If at least one operand is known to be negative, the result must be 1032 // negative. 1033 if (KnownLHS.cannotBeOrderedGreaterThanZero() || 1034 KnownRHS.cannotBeOrderedGreaterThanZero()) 1035 Known.knownNot(KnownFPClass::OrderedGreaterThanZeroMask); 1036 } else { 1037 llvm_unreachable("unhandled intrinsic"); 1038 } 1039 1040 // Fixup zero handling if denormals could be returned as a zero. 1041 // 1042 // As there's no spec for denormal flushing, be conservative with the 1043 // treatment of denormals that could be flushed to zero. For older 1044 // subtargets on AMDGPU the min/max instructions would not flush the 1045 // output and return the original value. 1046 // 1047 if ((Known.KnownFPClasses & fcZero) != fcNone && 1048 !Known.isKnownNeverSubnormal()) { 1049 DenormalMode Mode = 1050 MF->getDenormalMode(getFltSemanticForLLT(DstTy.getScalarType())); 1051 if (Mode != DenormalMode::getIEEE()) 1052 Known.KnownFPClasses |= fcZero; 1053 } 1054 1055 if (Known.isKnownNeverNaN()) { 1056 if (KnownLHS.SignBit && KnownRHS.SignBit && 1057 *KnownLHS.SignBit == *KnownRHS.SignBit) { 1058 if (*KnownLHS.SignBit) 1059 Known.signBitMustBeOne(); 1060 else 1061 Known.signBitMustBeZero(); 1062 } else if ((Opcode == TargetOpcode::G_FMAXIMUM || 1063 Opcode == TargetOpcode::G_FMINIMUM) || 1064 Opcode == TargetOpcode::G_FMAXIMUMNUM || 1065 Opcode == TargetOpcode::G_FMINIMUMNUM || 1066 Opcode == TargetOpcode::G_FMAXNUM_IEEE || 1067 Opcode == TargetOpcode::G_FMINNUM_IEEE || 1068 // FIXME: Should be using logical zero versions 1069 ((KnownLHS.isKnownNeverNegZero() || 1070 KnownRHS.isKnownNeverPosZero()) && 1071 (KnownLHS.isKnownNeverPosZero() || 1072 KnownRHS.isKnownNeverNegZero()))) { 1073 if ((Opcode == TargetOpcode::G_FMAXIMUM || 1074 Opcode == TargetOpcode::G_FMAXNUM || 1075 Opcode == TargetOpcode::G_FMAXIMUMNUM || 1076 Opcode == TargetOpcode::G_FMAXNUM_IEEE) && 1077 (KnownLHS.SignBit == false || KnownRHS.SignBit == false)) 1078 Known.signBitMustBeZero(); 1079 else if ((Opcode == TargetOpcode::G_FMINIMUM || 1080 Opcode == TargetOpcode::G_FMINNUM || 1081 Opcode == TargetOpcode::G_FMINIMUMNUM || 1082 Opcode == TargetOpcode::G_FMINNUM_IEEE) && 1083 (KnownLHS.SignBit == true || KnownRHS.SignBit == true)) 1084 Known.signBitMustBeOne(); 1085 } 1086 } 1087 break; 1088 } 1089 case TargetOpcode::G_FCANONICALIZE: { 1090 Register Val = MI.getOperand(1).getReg(); 1091 KnownFPClass KnownSrc; 1092 computeKnownFPClass(Val, DemandedElts, InterestedClasses, KnownSrc, 1093 Depth + 1); 1094 1095 // This is essentially a stronger form of 1096 // propagateCanonicalizingSrc. Other "canonicalizing" operations don't 1097 // actually have an IR canonicalization guarantee. 1098 1099 // Canonicalize may flush denormals to zero, so we have to consider the 1100 // denormal mode to preserve known-not-0 knowledge. 1101 Known.KnownFPClasses = KnownSrc.KnownFPClasses | fcZero | fcQNan; 1102 1103 // Stronger version of propagateNaN 1104 // Canonicalize is guaranteed to quiet signaling nans. 1105 if (KnownSrc.isKnownNeverNaN()) 1106 Known.knownNot(fcNan); 1107 else 1108 Known.knownNot(fcSNan); 1109 1110 // If the parent function flushes denormals, the canonical output cannot 1111 // be a denormal. 1112 LLT Ty = MRI.getType(Val).getScalarType(); 1113 const fltSemantics &FPType = getFltSemanticForLLT(Ty); 1114 DenormalMode DenormMode = MF->getDenormalMode(FPType); 1115 if (DenormMode == DenormalMode::getIEEE()) { 1116 if (KnownSrc.isKnownNever(fcPosZero)) 1117 Known.knownNot(fcPosZero); 1118 if (KnownSrc.isKnownNever(fcNegZero)) 1119 Known.knownNot(fcNegZero); 1120 break; 1121 } 1122 1123 if (DenormMode.inputsAreZero() || DenormMode.outputsAreZero()) 1124 Known.knownNot(fcSubnormal); 1125 1126 if (DenormMode.Input == DenormalMode::PositiveZero || 1127 (DenormMode.Output == DenormalMode::PositiveZero && 1128 DenormMode.Input == DenormalMode::IEEE)) 1129 Known.knownNot(fcNegZero); 1130 1131 break; 1132 } 1133 case TargetOpcode::G_VECREDUCE_FMAX: 1134 case TargetOpcode::G_VECREDUCE_FMIN: 1135 case TargetOpcode::G_VECREDUCE_FMAXIMUM: 1136 case TargetOpcode::G_VECREDUCE_FMINIMUM: { 1137 Register Val = MI.getOperand(1).getReg(); 1138 // reduce min/max will choose an element from one of the vector elements, 1139 // so we can infer and class information that is common to all elements. 1140 1141 Known = 1142 computeKnownFPClass(Val, MI.getFlags(), InterestedClasses, Depth + 1); 1143 // Can only propagate sign if output is never NaN. 1144 if (!Known.isKnownNeverNaN()) 1145 Known.SignBit.reset(); 1146 break; 1147 } 1148 case TargetOpcode::G_TRUNC: 1149 case TargetOpcode::G_FFLOOR: 1150 case TargetOpcode::G_FCEIL: 1151 case TargetOpcode::G_FRINT: 1152 case TargetOpcode::G_FNEARBYINT: 1153 case TargetOpcode::G_INTRINSIC_FPTRUNC_ROUND: 1154 case TargetOpcode::G_INTRINSIC_ROUND: { 1155 Register Val = MI.getOperand(1).getReg(); 1156 KnownFPClass KnownSrc; 1157 FPClassTest InterestedSrcs = InterestedClasses; 1158 if (InterestedSrcs & fcPosFinite) 1159 InterestedSrcs |= fcPosFinite; 1160 if (InterestedSrcs & fcNegFinite) 1161 InterestedSrcs |= fcNegFinite; 1162 computeKnownFPClass(Val, DemandedElts, InterestedSrcs, KnownSrc, Depth + 1); 1163 1164 // Integer results cannot be subnormal. 1165 Known.knownNot(fcSubnormal); 1166 1167 Known.propagateNaN(KnownSrc, true); 1168 1169 // TODO: handle multi unit FPTypes once LLT FPInfo lands 1170 1171 // Negative round ups to 0 produce -0 1172 if (KnownSrc.isKnownNever(fcPosFinite)) 1173 Known.knownNot(fcPosFinite); 1174 if (KnownSrc.isKnownNever(fcNegFinite)) 1175 Known.knownNot(fcNegFinite); 1176 1177 break; 1178 } 1179 case TargetOpcode::G_FEXP: 1180 case TargetOpcode::G_FEXP2: 1181 case TargetOpcode::G_FEXP10: { 1182 Known.knownNot(fcNegative); 1183 if ((InterestedClasses & fcNan) == fcNone) 1184 break; 1185 1186 Register Val = MI.getOperand(1).getReg(); 1187 KnownFPClass KnownSrc; 1188 computeKnownFPClass(Val, DemandedElts, InterestedClasses, KnownSrc, 1189 Depth + 1); 1190 if (KnownSrc.isKnownNeverNaN()) { 1191 Known.knownNot(fcNan); 1192 Known.signBitMustBeZero(); 1193 } 1194 1195 break; 1196 } 1197 case TargetOpcode::G_FLOG: 1198 case TargetOpcode::G_FLOG2: 1199 case TargetOpcode::G_FLOG10: { 1200 // log(+inf) -> +inf 1201 // log([+-]0.0) -> -inf 1202 // log(-inf) -> nan 1203 // log(-x) -> nan 1204 if ((InterestedClasses & (fcNan | fcInf)) == fcNone) 1205 break; 1206 1207 FPClassTest InterestedSrcs = InterestedClasses; 1208 if ((InterestedClasses & fcNegInf) != fcNone) 1209 InterestedSrcs |= fcZero | fcSubnormal; 1210 if ((InterestedClasses & fcNan) != fcNone) 1211 InterestedSrcs |= fcNan | (fcNegative & ~fcNan); 1212 1213 Register Val = MI.getOperand(1).getReg(); 1214 KnownFPClass KnownSrc; 1215 computeKnownFPClass(Val, DemandedElts, InterestedSrcs, KnownSrc, Depth + 1); 1216 1217 if (KnownSrc.isKnownNeverPosInfinity()) 1218 Known.knownNot(fcPosInf); 1219 1220 if (KnownSrc.isKnownNeverNaN() && KnownSrc.cannotBeOrderedLessThanZero()) 1221 Known.knownNot(fcNan); 1222 1223 LLT Ty = MRI.getType(Val).getScalarType(); 1224 const fltSemantics &FltSem = getFltSemanticForLLT(Ty); 1225 DenormalMode Mode = MF->getDenormalMode(FltSem); 1226 1227 if (KnownSrc.isKnownNeverLogicalZero(Mode)) 1228 Known.knownNot(fcNegInf); 1229 1230 break; 1231 } 1232 case TargetOpcode::G_FPOWI: { 1233 if ((InterestedClasses & fcNegative) == fcNone) 1234 break; 1235 1236 Register Exp = MI.getOperand(2).getReg(); 1237 LLT ExpTy = MRI.getType(Exp); 1238 KnownBits ExponentKnownBits = getKnownBits( 1239 Exp, ExpTy.isVector() ? DemandedElts : APInt(1, 1), Depth + 1); 1240 1241 if (ExponentKnownBits.Zero[0]) { // Is even 1242 Known.knownNot(fcNegative); 1243 break; 1244 } 1245 1246 // Given that exp is an integer, here are the 1247 // ways that pow can return a negative value: 1248 // 1249 // pow(-x, exp) --> negative if exp is odd and x is negative. 1250 // pow(-0, exp) --> -inf if exp is negative odd. 1251 // pow(-0, exp) --> -0 if exp is positive odd. 1252 // pow(-inf, exp) --> -0 if exp is negative odd. 1253 // pow(-inf, exp) --> -inf if exp is positive odd. 1254 Register Val = MI.getOperand(1).getReg(); 1255 KnownFPClass KnownSrc; 1256 computeKnownFPClass(Val, DemandedElts, fcNegative, KnownSrc, Depth + 1); 1257 if (KnownSrc.isKnownNever(fcNegative)) 1258 Known.knownNot(fcNegative); 1259 break; 1260 } 1261 case TargetOpcode::G_FLDEXP: 1262 case TargetOpcode::G_STRICT_FLDEXP: { 1263 Register Val = MI.getOperand(1).getReg(); 1264 KnownFPClass KnownSrc; 1265 computeKnownFPClass(Val, DemandedElts, InterestedClasses, KnownSrc, 1266 Depth + 1); 1267 Known.propagateNaN(KnownSrc, /*PropagateSign=*/true); 1268 1269 // Sign is preserved, but underflows may produce zeroes. 1270 if (KnownSrc.isKnownNever(fcNegative)) 1271 Known.knownNot(fcNegative); 1272 else if (KnownSrc.cannotBeOrderedLessThanZero()) 1273 Known.knownNot(KnownFPClass::OrderedLessThanZeroMask); 1274 1275 if (KnownSrc.isKnownNever(fcPositive)) 1276 Known.knownNot(fcPositive); 1277 else if (KnownSrc.cannotBeOrderedGreaterThanZero()) 1278 Known.knownNot(KnownFPClass::OrderedGreaterThanZeroMask); 1279 1280 // Can refine inf/zero handling based on the exponent operand. 1281 const FPClassTest ExpInfoMask = fcZero | fcSubnormal | fcInf; 1282 if ((InterestedClasses & ExpInfoMask) == fcNone) 1283 break; 1284 if ((KnownSrc.KnownFPClasses & ExpInfoMask) == fcNone) 1285 break; 1286 1287 // TODO: Handle constant range of Exp 1288 1289 break; 1290 } 1291 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: { 1292 computeKnownFPClassForFPTrunc(MI, DemandedElts, InterestedClasses, Known, 1293 Depth); 1294 break; 1295 } 1296 case TargetOpcode::G_FADD: 1297 case TargetOpcode::G_STRICT_FADD: 1298 case TargetOpcode::G_FSUB: 1299 case TargetOpcode::G_STRICT_FSUB: { 1300 Register LHS = MI.getOperand(1).getReg(); 1301 Register RHS = MI.getOperand(2).getReg(); 1302 KnownFPClass KnownLHS, KnownRHS; 1303 bool WantNegative = 1304 (Opcode == TargetOpcode::G_FADD || 1305 Opcode == TargetOpcode::G_STRICT_FADD) && 1306 (InterestedClasses & KnownFPClass::OrderedLessThanZeroMask) != fcNone; 1307 bool WantNaN = (InterestedClasses & fcNan) != fcNone; 1308 bool WantNegZero = (InterestedClasses & fcNegZero) != fcNone; 1309 1310 if (!WantNaN && !WantNegative && !WantNegZero) 1311 break; 1312 1313 FPClassTest InterestedSrcs = InterestedClasses; 1314 if (WantNegative) 1315 InterestedSrcs |= KnownFPClass::OrderedLessThanZeroMask; 1316 if (InterestedClasses & fcNan) 1317 InterestedSrcs |= fcInf; 1318 computeKnownFPClass(RHS, DemandedElts, InterestedSrcs, KnownRHS, Depth + 1); 1319 1320 if ((WantNaN && KnownRHS.isKnownNeverNaN()) || 1321 (WantNegative && KnownRHS.cannotBeOrderedLessThanZero()) || 1322 WantNegZero || 1323 (Opcode == TargetOpcode::G_FSUB || 1324 Opcode == TargetOpcode::G_STRICT_FSUB)) { 1325 1326 // RHS is canonically cheaper to compute. Skip inspecting the LHS if 1327 // there's no point. 1328 computeKnownFPClass(LHS, DemandedElts, InterestedSrcs, KnownLHS, 1329 Depth + 1); 1330 // Adding positive and negative infinity produces NaN. 1331 // TODO: Check sign of infinities. 1332 if (KnownLHS.isKnownNeverNaN() && KnownRHS.isKnownNeverNaN() && 1333 (KnownLHS.isKnownNeverInfinity() || KnownRHS.isKnownNeverInfinity())) 1334 Known.knownNot(fcNan); 1335 1336 if (Opcode == Instruction::FAdd) { 1337 if (KnownLHS.cannotBeOrderedLessThanZero() && 1338 KnownRHS.cannotBeOrderedLessThanZero()) 1339 Known.knownNot(KnownFPClass::OrderedLessThanZeroMask); 1340 1341 // (fadd x, 0.0) is guaranteed to return +0.0, not -0.0. 1342 if ((KnownLHS.isKnownNeverLogicalNegZero(MF->getDenormalMode( 1343 getFltSemanticForLLT(DstTy.getScalarType()))) || 1344 KnownRHS.isKnownNeverLogicalNegZero(MF->getDenormalMode( 1345 getFltSemanticForLLT(DstTy.getScalarType())))) && 1346 // Make sure output negative denormal can't flush to -0 1347 outputDenormalIsIEEEOrPosZero(*MF, DstTy)) 1348 Known.knownNot(fcNegZero); 1349 } else { 1350 // Only fsub -0, +0 can return -0 1351 if ((KnownLHS.isKnownNeverLogicalNegZero(MF->getDenormalMode( 1352 getFltSemanticForLLT(DstTy.getScalarType()))) || 1353 KnownRHS.isKnownNeverLogicalPosZero(MF->getDenormalMode( 1354 getFltSemanticForLLT(DstTy.getScalarType())))) && 1355 // Make sure output negative denormal can't flush to -0 1356 outputDenormalIsIEEEOrPosZero(*MF, DstTy)) 1357 Known.knownNot(fcNegZero); 1358 } 1359 } 1360 1361 break; 1362 } 1363 case TargetOpcode::G_FMUL: 1364 case TargetOpcode::G_STRICT_FMUL: { 1365 Register LHS = MI.getOperand(1).getReg(); 1366 Register RHS = MI.getOperand(2).getReg(); 1367 // X * X is always non-negative or a NaN. 1368 if (LHS == RHS) 1369 Known.knownNot(fcNegative); 1370 1371 if ((InterestedClasses & fcNan) != fcNan) 1372 break; 1373 1374 // fcSubnormal is only needed in case of DAZ. 1375 const FPClassTest NeedForNan = fcNan | fcInf | fcZero | fcSubnormal; 1376 1377 KnownFPClass KnownLHS, KnownRHS; 1378 computeKnownFPClass(RHS, DemandedElts, NeedForNan, KnownRHS, Depth + 1); 1379 if (!KnownRHS.isKnownNeverNaN()) 1380 break; 1381 1382 computeKnownFPClass(LHS, DemandedElts, NeedForNan, KnownLHS, Depth + 1); 1383 if (!KnownLHS.isKnownNeverNaN()) 1384 break; 1385 1386 if (KnownLHS.SignBit && KnownRHS.SignBit) { 1387 if (*KnownLHS.SignBit == *KnownRHS.SignBit) 1388 Known.signBitMustBeZero(); 1389 else 1390 Known.signBitMustBeOne(); 1391 } 1392 1393 // If 0 * +/-inf produces NaN. 1394 if (KnownLHS.isKnownNeverInfinity() && KnownRHS.isKnownNeverInfinity()) { 1395 Known.knownNot(fcNan); 1396 break; 1397 } 1398 1399 if ((KnownRHS.isKnownNeverInfinity() || 1400 KnownLHS.isKnownNeverLogicalZero(MF->getDenormalMode( 1401 getFltSemanticForLLT(DstTy.getScalarType())))) && 1402 (KnownLHS.isKnownNeverInfinity() || 1403 KnownRHS.isKnownNeverLogicalZero( 1404 MF->getDenormalMode(getFltSemanticForLLT(DstTy.getScalarType()))))) 1405 Known.knownNot(fcNan); 1406 1407 break; 1408 } 1409 case TargetOpcode::G_FDIV: 1410 case TargetOpcode::G_FREM: { 1411 Register LHS = MI.getOperand(1).getReg(); 1412 Register RHS = MI.getOperand(2).getReg(); 1413 1414 if (LHS == RHS) { 1415 // TODO: Could filter out snan if we inspect the operand 1416 if (Opcode == TargetOpcode::G_FDIV) { 1417 // X / X is always exactly 1.0 or a NaN. 1418 Known.KnownFPClasses = fcNan | fcPosNormal; 1419 } else { 1420 // X % X is always exactly [+-]0.0 or a NaN. 1421 Known.KnownFPClasses = fcNan | fcZero; 1422 } 1423 1424 break; 1425 } 1426 1427 const bool WantNan = (InterestedClasses & fcNan) != fcNone; 1428 const bool WantNegative = (InterestedClasses & fcNegative) != fcNone; 1429 const bool WantPositive = Opcode == TargetOpcode::G_FREM && 1430 (InterestedClasses & fcPositive) != fcNone; 1431 if (!WantNan && !WantNegative && !WantPositive) 1432 break; 1433 1434 KnownFPClass KnownLHS, KnownRHS; 1435 1436 computeKnownFPClass(RHS, DemandedElts, fcNan | fcInf | fcZero | fcNegative, 1437 KnownRHS, Depth + 1); 1438 1439 bool KnowSomethingUseful = 1440 KnownRHS.isKnownNeverNaN() || KnownRHS.isKnownNever(fcNegative); 1441 1442 if (KnowSomethingUseful || WantPositive) { 1443 const FPClassTest InterestedLHS = 1444 WantPositive ? fcAllFlags 1445 : fcNan | fcInf | fcZero | fcSubnormal | fcNegative; 1446 1447 computeKnownFPClass(LHS, DemandedElts, InterestedClasses & InterestedLHS, 1448 KnownLHS, Depth + 1); 1449 } 1450 1451 if (Opcode == Instruction::FDiv) { 1452 // Only 0/0, Inf/Inf produce NaN. 1453 if (KnownLHS.isKnownNeverNaN() && KnownRHS.isKnownNeverNaN() && 1454 (KnownLHS.isKnownNeverInfinity() || 1455 KnownRHS.isKnownNeverInfinity()) && 1456 ((KnownLHS.isKnownNeverLogicalZero(MF->getDenormalMode( 1457 getFltSemanticForLLT(DstTy.getScalarType())))) || 1458 (KnownRHS.isKnownNeverLogicalZero(MF->getDenormalMode( 1459 getFltSemanticForLLT(DstTy.getScalarType())))))) { 1460 Known.knownNot(fcNan); 1461 } 1462 1463 // X / -0.0 is -Inf (or NaN). 1464 // +X / +X is +X 1465 if (KnownLHS.isKnownNever(fcNegative) && 1466 KnownRHS.isKnownNever(fcNegative)) 1467 Known.knownNot(fcNegative); 1468 } else { 1469 // Inf REM x and x REM 0 produce NaN. 1470 if (KnownLHS.isKnownNeverNaN() && KnownRHS.isKnownNeverNaN() && 1471 KnownLHS.isKnownNeverInfinity() && 1472 KnownRHS.isKnownNeverLogicalZero(MF->getDenormalMode( 1473 getFltSemanticForLLT(DstTy.getScalarType())))) { 1474 Known.knownNot(fcNan); 1475 } 1476 1477 // The sign for frem is the same as the first operand. 1478 if (KnownLHS.cannotBeOrderedLessThanZero()) 1479 Known.knownNot(KnownFPClass::OrderedLessThanZeroMask); 1480 if (KnownLHS.cannotBeOrderedGreaterThanZero()) 1481 Known.knownNot(KnownFPClass::OrderedGreaterThanZeroMask); 1482 1483 // See if we can be more aggressive about the sign of 0. 1484 if (KnownLHS.isKnownNever(fcNegative)) 1485 Known.knownNot(fcNegative); 1486 if (KnownLHS.isKnownNever(fcPositive)) 1487 Known.knownNot(fcPositive); 1488 } 1489 1490 break; 1491 } 1492 case TargetOpcode::G_FPEXT: { 1493 Register Dst = MI.getOperand(0).getReg(); 1494 Register Src = MI.getOperand(1).getReg(); 1495 // Infinity, nan and zero propagate from source. 1496 computeKnownFPClass(R, DemandedElts, InterestedClasses, Known, Depth + 1); 1497 1498 LLT DstTy = MRI.getType(Dst).getScalarType(); 1499 const fltSemantics &DstSem = getFltSemanticForLLT(DstTy); 1500 LLT SrcTy = MRI.getType(Src).getScalarType(); 1501 const fltSemantics &SrcSem = getFltSemanticForLLT(SrcTy); 1502 1503 // All subnormal inputs should be in the normal range in the result type. 1504 if (APFloat::isRepresentableAsNormalIn(SrcSem, DstSem)) { 1505 if (Known.KnownFPClasses & fcPosSubnormal) 1506 Known.KnownFPClasses |= fcPosNormal; 1507 if (Known.KnownFPClasses & fcNegSubnormal) 1508 Known.KnownFPClasses |= fcNegNormal; 1509 Known.knownNot(fcSubnormal); 1510 } 1511 1512 // Sign bit of a nan isn't guaranteed. 1513 if (!Known.isKnownNeverNaN()) 1514 Known.SignBit = std::nullopt; 1515 break; 1516 } 1517 case TargetOpcode::G_FPTRUNC: { 1518 computeKnownFPClassForFPTrunc(MI, DemandedElts, InterestedClasses, Known, 1519 Depth); 1520 break; 1521 } 1522 case TargetOpcode::G_SITOFP: 1523 case TargetOpcode::G_UITOFP: { 1524 // Cannot produce nan 1525 Known.knownNot(fcNan); 1526 1527 // Integers cannot be subnormal 1528 Known.knownNot(fcSubnormal); 1529 1530 // sitofp and uitofp turn into +0.0 for zero. 1531 Known.knownNot(fcNegZero); 1532 if (Opcode == TargetOpcode::G_UITOFP) 1533 Known.signBitMustBeZero(); 1534 1535 Register Val = MI.getOperand(1).getReg(); 1536 LLT Ty = MRI.getType(Val); 1537 1538 if (InterestedClasses & fcInf) { 1539 // Get width of largest magnitude integer (remove a bit if signed). 1540 // This still works for a signed minimum value because the largest FP 1541 // value is scaled by some fraction close to 2.0 (1.0 + 0.xxxx).; 1542 int IntSize = Ty.getScalarSizeInBits(); 1543 if (Opcode == TargetOpcode::G_SITOFP) 1544 --IntSize; 1545 1546 // If the exponent of the largest finite FP value can hold the largest 1547 // integer, the result of the cast must be finite. 1548 LLT FPTy = DstTy.getScalarType(); 1549 const fltSemantics &FltSem = getFltSemanticForLLT(FPTy); 1550 if (ilogb(APFloat::getLargest(FltSem)) >= IntSize) 1551 Known.knownNot(fcInf); 1552 } 1553 1554 break; 1555 } 1556 // case TargetOpcode::G_MERGE_VALUES: 1557 case TargetOpcode::G_BUILD_VECTOR: 1558 case TargetOpcode::G_CONCAT_VECTORS: { 1559 GMergeLikeInstr &Merge = cast<GMergeLikeInstr>(MI); 1560 1561 if (!DstTy.isFixedVector()) 1562 break; 1563 1564 bool First = true; 1565 for (unsigned Idx = 0; Idx < Merge.getNumSources(); ++Idx) { 1566 // We know the index we are inserting to, so clear it from Vec check. 1567 bool NeedsElt = DemandedElts[Idx]; 1568 1569 // Do we demand the inserted element? 1570 if (NeedsElt) { 1571 Register Src = Merge.getSourceReg(Idx); 1572 if (First) { 1573 computeKnownFPClass(Src, Known, InterestedClasses, Depth + 1); 1574 First = false; 1575 } else { 1576 KnownFPClass Known2; 1577 computeKnownFPClass(Src, Known2, InterestedClasses, Depth + 1); 1578 Known |= Known2; 1579 } 1580 1581 // If we don't know any bits, early out. 1582 if (Known.isUnknown()) 1583 break; 1584 } 1585 } 1586 1587 break; 1588 } 1589 case TargetOpcode::G_EXTRACT_VECTOR_ELT: { 1590 // Look through extract element. If the index is non-constant or 1591 // out-of-range demand all elements, otherwise just the extracted 1592 // element. 1593 GExtractVectorElement &Extract = cast<GExtractVectorElement>(MI); 1594 Register Vec = Extract.getVectorReg(); 1595 Register Idx = Extract.getIndexReg(); 1596 1597 auto CIdx = getIConstantVRegVal(Idx, MRI); 1598 1599 LLT VecTy = MRI.getType(Vec); 1600 1601 if (VecTy.isFixedVector()) { 1602 unsigned NumElts = VecTy.getNumElements(); 1603 APInt DemandedVecElts = APInt::getAllOnes(NumElts); 1604 if (CIdx && CIdx->ult(NumElts)) 1605 DemandedVecElts = APInt::getOneBitSet(NumElts, CIdx->getZExtValue()); 1606 return computeKnownFPClass(Vec, DemandedVecElts, InterestedClasses, Known, 1607 Depth + 1); 1608 } 1609 1610 break; 1611 } 1612 case TargetOpcode::G_INSERT_VECTOR_ELT: { 1613 GInsertVectorElement &Insert = cast<GInsertVectorElement>(MI); 1614 Register Vec = Insert.getVectorReg(); 1615 Register Elt = Insert.getElementReg(); 1616 Register Idx = Insert.getIndexReg(); 1617 1618 LLT VecTy = MRI.getType(Vec); 1619 1620 if (VecTy.isScalableVector()) 1621 return; 1622 1623 auto CIdx = getIConstantVRegVal(Idx, MRI); 1624 1625 unsigned NumElts = DemandedElts.getBitWidth(); 1626 APInt DemandedVecElts = DemandedElts; 1627 bool NeedsElt = true; 1628 // If we know the index we are inserting to, clear it from Vec check. 1629 if (CIdx && CIdx->ult(NumElts)) { 1630 DemandedVecElts.clearBit(CIdx->getZExtValue()); 1631 NeedsElt = DemandedElts[CIdx->getZExtValue()]; 1632 } 1633 1634 // Do we demand the inserted element? 1635 if (NeedsElt) { 1636 computeKnownFPClass(Elt, Known, InterestedClasses, Depth + 1); 1637 // If we don't know any bits, early out. 1638 if (Known.isUnknown()) 1639 break; 1640 } else { 1641 Known.KnownFPClasses = fcNone; 1642 } 1643 1644 // Do we need anymore elements from Vec? 1645 if (!DemandedVecElts.isZero()) { 1646 KnownFPClass Known2; 1647 computeKnownFPClass(Vec, DemandedVecElts, InterestedClasses, Known2, 1648 Depth + 1); 1649 Known |= Known2; 1650 } 1651 1652 break; 1653 } 1654 case TargetOpcode::G_SHUFFLE_VECTOR: { 1655 // For undef elements, we don't know anything about the common state of 1656 // the shuffle result. 1657 GShuffleVector &Shuf = cast<GShuffleVector>(MI); 1658 APInt DemandedLHS, DemandedRHS; 1659 if (DstTy.isScalableVector()) { 1660 assert(DemandedElts == APInt(1, 1)); 1661 DemandedLHS = DemandedRHS = DemandedElts; 1662 } else { 1663 if (!llvm::getShuffleDemandedElts(DstTy.getNumElements(), Shuf.getMask(), 1664 DemandedElts, DemandedLHS, 1665 DemandedRHS)) { 1666 Known.resetAll(); 1667 return; 1668 } 1669 } 1670 1671 if (!!DemandedLHS) { 1672 Register LHS = Shuf.getSrc1Reg(); 1673 computeKnownFPClass(LHS, DemandedLHS, InterestedClasses, Known, 1674 Depth + 1); 1675 1676 // If we don't know any bits, early out. 1677 if (Known.isUnknown()) 1678 break; 1679 } else { 1680 Known.KnownFPClasses = fcNone; 1681 } 1682 1683 if (!!DemandedRHS) { 1684 KnownFPClass Known2; 1685 Register RHS = Shuf.getSrc2Reg(); 1686 computeKnownFPClass(RHS, DemandedRHS, InterestedClasses, Known2, 1687 Depth + 1); 1688 Known |= Known2; 1689 } 1690 break; 1691 } 1692 case TargetOpcode::COPY: { 1693 Register Src = MI.getOperand(1).getReg(); 1694 1695 if (!Src.isVirtual()) 1696 return; 1697 1698 computeKnownFPClass(Src, DemandedElts, InterestedClasses, Known, Depth + 1); 1699 break; 1700 } 1701 } 1702 } 1703 1704 KnownFPClass 1705 GISelValueTracking::computeKnownFPClass(Register R, const APInt &DemandedElts, 1706 FPClassTest InterestedClasses, 1707 unsigned Depth) { 1708 KnownFPClass KnownClasses; 1709 computeKnownFPClass(R, DemandedElts, InterestedClasses, KnownClasses, Depth); 1710 return KnownClasses; 1711 } 1712 1713 KnownFPClass GISelValueTracking::computeKnownFPClass( 1714 Register R, FPClassTest InterestedClasses, unsigned Depth) { 1715 KnownFPClass Known; 1716 computeKnownFPClass(R, Known, InterestedClasses, Depth); 1717 return Known; 1718 } 1719 1720 KnownFPClass GISelValueTracking::computeKnownFPClass( 1721 Register R, const APInt &DemandedElts, uint32_t Flags, 1722 FPClassTest InterestedClasses, unsigned Depth) { 1723 if (Flags & MachineInstr::MIFlag::FmNoNans) 1724 InterestedClasses &= ~fcNan; 1725 if (Flags & MachineInstr::MIFlag::FmNoInfs) 1726 InterestedClasses &= ~fcInf; 1727 1728 KnownFPClass Result = 1729 computeKnownFPClass(R, DemandedElts, InterestedClasses, Depth); 1730 1731 if (Flags & MachineInstr::MIFlag::FmNoNans) 1732 Result.KnownFPClasses &= ~fcNan; 1733 if (Flags & MachineInstr::MIFlag::FmNoInfs) 1734 Result.KnownFPClasses &= ~fcInf; 1735 return Result; 1736 } 1737 1738 KnownFPClass GISelValueTracking::computeKnownFPClass( 1739 Register R, uint32_t Flags, FPClassTest InterestedClasses, unsigned Depth) { 1740 LLT Ty = MRI.getType(R); 1741 APInt DemandedElts = 1742 Ty.isFixedVector() ? APInt::getAllOnes(Ty.getNumElements()) : APInt(1, 1); 1743 return computeKnownFPClass(R, DemandedElts, Flags, InterestedClasses, Depth); 1744 } 1745 1746 /// Compute number of sign bits for the intersection of \p Src0 and \p Src1 1747 unsigned GISelValueTracking::computeNumSignBitsMin(Register Src0, Register Src1, 1748 const APInt &DemandedElts, 1749 unsigned Depth) { 1750 // Test src1 first, since we canonicalize simpler expressions to the RHS. 1751 unsigned Src1SignBits = computeNumSignBits(Src1, DemandedElts, Depth); 1752 if (Src1SignBits == 1) 1753 return 1; 1754 return std::min(computeNumSignBits(Src0, DemandedElts, Depth), Src1SignBits); 1755 } 1756 1757 /// Compute the known number of sign bits with attached range metadata in the 1758 /// memory operand. If this is an extending load, accounts for the behavior of 1759 /// the high bits. 1760 static unsigned computeNumSignBitsFromRangeMetadata(const GAnyLoad *Ld, 1761 unsigned TyBits) { 1762 const MDNode *Ranges = Ld->getRanges(); 1763 if (!Ranges) 1764 return 1; 1765 1766 ConstantRange CR = getConstantRangeFromMetadata(*Ranges); 1767 if (TyBits > CR.getBitWidth()) { 1768 switch (Ld->getOpcode()) { 1769 case TargetOpcode::G_SEXTLOAD: 1770 CR = CR.signExtend(TyBits); 1771 break; 1772 case TargetOpcode::G_ZEXTLOAD: 1773 CR = CR.zeroExtend(TyBits); 1774 break; 1775 default: 1776 break; 1777 } 1778 } 1779 1780 return std::min(CR.getSignedMin().getNumSignBits(), 1781 CR.getSignedMax().getNumSignBits()); 1782 } 1783 1784 unsigned GISelValueTracking::computeNumSignBits(Register R, 1785 const APInt &DemandedElts, 1786 unsigned Depth) { 1787 MachineInstr &MI = *MRI.getVRegDef(R); 1788 unsigned Opcode = MI.getOpcode(); 1789 1790 if (Opcode == TargetOpcode::G_CONSTANT) 1791 return MI.getOperand(1).getCImm()->getValue().getNumSignBits(); 1792 1793 if (Depth == getMaxDepth()) 1794 return 1; 1795 1796 if (!DemandedElts) 1797 return 1; // No demanded elts, better to assume we don't know anything. 1798 1799 LLT DstTy = MRI.getType(R); 1800 const unsigned TyBits = DstTy.getScalarSizeInBits(); 1801 1802 // Handle the case where this is called on a register that does not have a 1803 // type constraint. This is unlikely to occur except by looking through copies 1804 // but it is possible for the initial register being queried to be in this 1805 // state. 1806 if (!DstTy.isValid()) 1807 return 1; 1808 1809 unsigned FirstAnswer = 1; 1810 switch (Opcode) { 1811 case TargetOpcode::COPY: { 1812 MachineOperand &Src = MI.getOperand(1); 1813 if (Src.getReg().isVirtual() && Src.getSubReg() == 0 && 1814 MRI.getType(Src.getReg()).isValid()) { 1815 // Don't increment Depth for this one since we didn't do any work. 1816 return computeNumSignBits(Src.getReg(), DemandedElts, Depth); 1817 } 1818 1819 return 1; 1820 } 1821 case TargetOpcode::G_SEXT: { 1822 Register Src = MI.getOperand(1).getReg(); 1823 LLT SrcTy = MRI.getType(Src); 1824 unsigned Tmp = DstTy.getScalarSizeInBits() - SrcTy.getScalarSizeInBits(); 1825 return computeNumSignBits(Src, DemandedElts, Depth + 1) + Tmp; 1826 } 1827 case TargetOpcode::G_ASSERT_SEXT: 1828 case TargetOpcode::G_SEXT_INREG: { 1829 // Max of the input and what this extends. 1830 Register Src = MI.getOperand(1).getReg(); 1831 unsigned SrcBits = MI.getOperand(2).getImm(); 1832 unsigned InRegBits = TyBits - SrcBits + 1; 1833 return std::max(computeNumSignBits(Src, DemandedElts, Depth + 1), 1834 InRegBits); 1835 } 1836 case TargetOpcode::G_LOAD: { 1837 GLoad *Ld = cast<GLoad>(&MI); 1838 if (DemandedElts != 1 || !getDataLayout().isLittleEndian()) 1839 break; 1840 1841 return computeNumSignBitsFromRangeMetadata(Ld, TyBits); 1842 } 1843 case TargetOpcode::G_SEXTLOAD: { 1844 GSExtLoad *Ld = cast<GSExtLoad>(&MI); 1845 1846 // FIXME: We need an in-memory type representation. 1847 if (DstTy.isVector()) 1848 return 1; 1849 1850 unsigned NumBits = computeNumSignBitsFromRangeMetadata(Ld, TyBits); 1851 if (NumBits != 1) 1852 return NumBits; 1853 1854 // e.g. i16->i32 = '17' bits known. 1855 const MachineMemOperand *MMO = *MI.memoperands_begin(); 1856 return TyBits - MMO->getSizeInBits().getValue() + 1; 1857 } 1858 case TargetOpcode::G_ZEXTLOAD: { 1859 GZExtLoad *Ld = cast<GZExtLoad>(&MI); 1860 1861 // FIXME: We need an in-memory type representation. 1862 if (DstTy.isVector()) 1863 return 1; 1864 1865 unsigned NumBits = computeNumSignBitsFromRangeMetadata(Ld, TyBits); 1866 if (NumBits != 1) 1867 return NumBits; 1868 1869 // e.g. i16->i32 = '16' bits known. 1870 const MachineMemOperand *MMO = *MI.memoperands_begin(); 1871 return TyBits - MMO->getSizeInBits().getValue(); 1872 } 1873 case TargetOpcode::G_AND: 1874 case TargetOpcode::G_OR: 1875 case TargetOpcode::G_XOR: { 1876 Register Src1 = MI.getOperand(1).getReg(); 1877 unsigned Src1NumSignBits = 1878 computeNumSignBits(Src1, DemandedElts, Depth + 1); 1879 if (Src1NumSignBits != 1) { 1880 Register Src2 = MI.getOperand(2).getReg(); 1881 unsigned Src2NumSignBits = 1882 computeNumSignBits(Src2, DemandedElts, Depth + 1); 1883 FirstAnswer = std::min(Src1NumSignBits, Src2NumSignBits); 1884 } 1885 break; 1886 } 1887 case TargetOpcode::G_TRUNC: { 1888 Register Src = MI.getOperand(1).getReg(); 1889 LLT SrcTy = MRI.getType(Src); 1890 1891 // Check if the sign bits of source go down as far as the truncated value. 1892 unsigned DstTyBits = DstTy.getScalarSizeInBits(); 1893 unsigned NumSrcBits = SrcTy.getScalarSizeInBits(); 1894 unsigned NumSrcSignBits = computeNumSignBits(Src, DemandedElts, Depth + 1); 1895 if (NumSrcSignBits > (NumSrcBits - DstTyBits)) 1896 return NumSrcSignBits - (NumSrcBits - DstTyBits); 1897 break; 1898 } 1899 case TargetOpcode::G_SELECT: { 1900 return computeNumSignBitsMin(MI.getOperand(2).getReg(), 1901 MI.getOperand(3).getReg(), DemandedElts, 1902 Depth + 1); 1903 } 1904 case TargetOpcode::G_SMIN: 1905 case TargetOpcode::G_SMAX: 1906 case TargetOpcode::G_UMIN: 1907 case TargetOpcode::G_UMAX: 1908 // TODO: Handle clamp pattern with number of sign bits for SMIN/SMAX. 1909 return computeNumSignBitsMin(MI.getOperand(1).getReg(), 1910 MI.getOperand(2).getReg(), DemandedElts, 1911 Depth + 1); 1912 case TargetOpcode::G_SADDO: 1913 case TargetOpcode::G_SADDE: 1914 case TargetOpcode::G_UADDO: 1915 case TargetOpcode::G_UADDE: 1916 case TargetOpcode::G_SSUBO: 1917 case TargetOpcode::G_SSUBE: 1918 case TargetOpcode::G_USUBO: 1919 case TargetOpcode::G_USUBE: 1920 case TargetOpcode::G_SMULO: 1921 case TargetOpcode::G_UMULO: { 1922 // If compares returns 0/-1, all bits are sign bits. 1923 // We know that we have an integer-based boolean since these operations 1924 // are only available for integer. 1925 if (MI.getOperand(1).getReg() == R) { 1926 if (TL.getBooleanContents(DstTy.isVector(), false) == 1927 TargetLowering::ZeroOrNegativeOneBooleanContent) 1928 return TyBits; 1929 } 1930 1931 break; 1932 } 1933 case TargetOpcode::G_FCMP: 1934 case TargetOpcode::G_ICMP: { 1935 bool IsFP = Opcode == TargetOpcode::G_FCMP; 1936 if (TyBits == 1) 1937 break; 1938 auto BC = TL.getBooleanContents(DstTy.isVector(), IsFP); 1939 if (BC == TargetLoweringBase::ZeroOrNegativeOneBooleanContent) 1940 return TyBits; // All bits are sign bits. 1941 if (BC == TargetLowering::ZeroOrOneBooleanContent) 1942 return TyBits - 1; // Every always-zero bit is a sign bit. 1943 break; 1944 } 1945 case TargetOpcode::G_BUILD_VECTOR: { 1946 // Collect the known bits that are shared by every demanded vector element. 1947 FirstAnswer = TyBits; 1948 APInt SingleDemandedElt(1, 1); 1949 for (const auto &[I, MO] : enumerate(drop_begin(MI.operands()))) { 1950 if (!DemandedElts[I]) 1951 continue; 1952 1953 unsigned Tmp2 = 1954 computeNumSignBits(MO.getReg(), SingleDemandedElt, Depth + 1); 1955 FirstAnswer = std::min(FirstAnswer, Tmp2); 1956 1957 // If we don't know any bits, early out. 1958 if (FirstAnswer == 1) 1959 break; 1960 } 1961 break; 1962 } 1963 case TargetOpcode::G_CONCAT_VECTORS: { 1964 if (MRI.getType(MI.getOperand(0).getReg()).isScalableVector()) 1965 break; 1966 FirstAnswer = TyBits; 1967 // Determine the minimum number of sign bits across all demanded 1968 // elts of the input vectors. Early out if the result is already 1. 1969 unsigned NumSubVectorElts = 1970 MRI.getType(MI.getOperand(1).getReg()).getNumElements(); 1971 for (const auto &[I, MO] : enumerate(drop_begin(MI.operands()))) { 1972 APInt DemandedSub = 1973 DemandedElts.extractBits(NumSubVectorElts, I * NumSubVectorElts); 1974 if (!DemandedSub) 1975 continue; 1976 unsigned Tmp2 = computeNumSignBits(MO.getReg(), DemandedSub, Depth + 1); 1977 1978 FirstAnswer = std::min(FirstAnswer, Tmp2); 1979 1980 // If we don't know any bits, early out. 1981 if (FirstAnswer == 1) 1982 break; 1983 } 1984 break; 1985 } 1986 case TargetOpcode::G_SHUFFLE_VECTOR: { 1987 // Collect the minimum number of sign bits that are shared by every vector 1988 // element referenced by the shuffle. 1989 APInt DemandedLHS, DemandedRHS; 1990 Register Src1 = MI.getOperand(1).getReg(); 1991 unsigned NumElts = MRI.getType(Src1).getNumElements(); 1992 if (!getShuffleDemandedElts(NumElts, MI.getOperand(3).getShuffleMask(), 1993 DemandedElts, DemandedLHS, DemandedRHS)) 1994 return 1; 1995 1996 if (!!DemandedLHS) 1997 FirstAnswer = computeNumSignBits(Src1, DemandedLHS, Depth + 1); 1998 // If we don't know anything, early out and try computeKnownBits fall-back. 1999 if (FirstAnswer == 1) 2000 break; 2001 if (!!DemandedRHS) { 2002 unsigned Tmp2 = 2003 computeNumSignBits(MI.getOperand(2).getReg(), DemandedRHS, Depth + 1); 2004 FirstAnswer = std::min(FirstAnswer, Tmp2); 2005 } 2006 break; 2007 } 2008 case TargetOpcode::G_SPLAT_VECTOR: { 2009 // Check if the sign bits of source go down as far as the truncated value. 2010 Register Src = MI.getOperand(1).getReg(); 2011 unsigned NumSrcSignBits = computeNumSignBits(Src, APInt(1, 1), Depth + 1); 2012 unsigned NumSrcBits = MRI.getType(Src).getSizeInBits(); 2013 if (NumSrcSignBits > (NumSrcBits - TyBits)) 2014 return NumSrcSignBits - (NumSrcBits - TyBits); 2015 break; 2016 } 2017 case TargetOpcode::G_INTRINSIC: 2018 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: 2019 case TargetOpcode::G_INTRINSIC_CONVERGENT: 2020 case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS: 2021 default: { 2022 unsigned NumBits = 2023 TL.computeNumSignBitsForTargetInstr(*this, R, DemandedElts, MRI, Depth); 2024 if (NumBits > 1) 2025 FirstAnswer = std::max(FirstAnswer, NumBits); 2026 break; 2027 } 2028 } 2029 2030 // Finally, if we can prove that the top bits of the result are 0's or 1's, 2031 // use this information. 2032 KnownBits Known = getKnownBits(R, DemandedElts, Depth); 2033 APInt Mask; 2034 if (Known.isNonNegative()) { // sign bit is 0 2035 Mask = Known.Zero; 2036 } else if (Known.isNegative()) { // sign bit is 1; 2037 Mask = Known.One; 2038 } else { 2039 // Nothing known. 2040 return FirstAnswer; 2041 } 2042 2043 // Okay, we know that the sign bit in Mask is set. Use CLO to determine 2044 // the number of identical bits in the top of the input value. 2045 Mask <<= Mask.getBitWidth() - TyBits; 2046 return std::max(FirstAnswer, Mask.countl_one()); 2047 } 2048 2049 unsigned GISelValueTracking::computeNumSignBits(Register R, unsigned Depth) { 2050 LLT Ty = MRI.getType(R); 2051 APInt DemandedElts = 2052 Ty.isFixedVector() ? APInt::getAllOnes(Ty.getNumElements()) : APInt(1, 1); 2053 return computeNumSignBits(R, DemandedElts, Depth); 2054 } 2055 2056 void GISelValueTrackingAnalysisLegacy::getAnalysisUsage( 2057 AnalysisUsage &AU) const { 2058 AU.setPreservesAll(); 2059 MachineFunctionPass::getAnalysisUsage(AU); 2060 } 2061 2062 bool GISelValueTrackingAnalysisLegacy::runOnMachineFunction( 2063 MachineFunction &MF) { 2064 return false; 2065 } 2066 2067 GISelValueTracking &GISelValueTrackingAnalysisLegacy::get(MachineFunction &MF) { 2068 if (!Info) { 2069 unsigned MaxDepth = 2070 MF.getTarget().getOptLevel() == CodeGenOptLevel::None ? 2 : 6; 2071 Info = std::make_unique<GISelValueTracking>(MF, MaxDepth); 2072 } 2073 return *Info; 2074 } 2075 2076 AnalysisKey GISelValueTrackingAnalysis::Key; 2077 2078 GISelValueTracking 2079 GISelValueTrackingAnalysis::run(MachineFunction &MF, 2080 MachineFunctionAnalysisManager &MFAM) { 2081 return Result(MF); 2082 } 2083 2084 PreservedAnalyses 2085 GISelValueTrackingPrinterPass::run(MachineFunction &MF, 2086 MachineFunctionAnalysisManager &MFAM) { 2087 auto &VTA = MFAM.getResult<GISelValueTrackingAnalysis>(MF); 2088 const auto &MRI = MF.getRegInfo(); 2089 OS << "name: "; 2090 MF.getFunction().printAsOperand(OS, /*PrintType=*/false); 2091 OS << '\n'; 2092 2093 for (MachineBasicBlock &BB : MF) { 2094 for (MachineInstr &MI : BB) { 2095 for (MachineOperand &MO : MI.defs()) { 2096 if (!MO.isReg() || MO.getReg().isPhysical()) 2097 continue; 2098 Register Reg = MO.getReg(); 2099 if (!MRI.getType(Reg).isValid()) 2100 continue; 2101 KnownBits Known = VTA.getKnownBits(Reg); 2102 unsigned SignedBits = VTA.computeNumSignBits(Reg); 2103 OS << " " << MO << " KnownBits:" << Known << " SignBits:" << SignedBits 2104 << '\n'; 2105 }; 2106 } 2107 } 2108 return PreservedAnalyses::all(); 2109 } 2110