1 //===-- APInt.cpp - Implement APInt class ---------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a class to represent arbitrary precision integer 10 // constant values and provide a variety of arithmetic operations on them. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/ADT/APInt.h" 15 #include "llvm/ADT/ArrayRef.h" 16 #include "llvm/ADT/FoldingSet.h" 17 #include "llvm/ADT/Hashing.h" 18 #include "llvm/ADT/Optional.h" 19 #include "llvm/ADT/SmallString.h" 20 #include "llvm/ADT/StringRef.h" 21 #include "llvm/ADT/bit.h" 22 #include "llvm/Config/llvm-config.h" 23 #include "llvm/Support/Debug.h" 24 #include "llvm/Support/ErrorHandling.h" 25 #include "llvm/Support/MathExtras.h" 26 #include "llvm/Support/raw_ostream.h" 27 #include <climits> 28 #include <cmath> 29 #include <cstdlib> 30 #include <cstring> 31 using namespace llvm; 32 33 #define DEBUG_TYPE "apint" 34 35 /// A utility function for allocating memory, checking for allocation failures, 36 /// and ensuring the contents are zeroed. 37 inline static uint64_t* getClearedMemory(unsigned numWords) { 38 uint64_t *result = new uint64_t[numWords]; 39 memset(result, 0, numWords * sizeof(uint64_t)); 40 return result; 41 } 42 43 /// A utility function for allocating memory and checking for allocation 44 /// failure. The content is not zeroed. 45 inline static uint64_t* getMemory(unsigned numWords) { 46 return new uint64_t[numWords]; 47 } 48 49 /// A utility function that converts a character to a digit. 50 inline static unsigned getDigit(char cdigit, uint8_t radix) { 51 unsigned r; 52 53 if (radix == 16 || radix == 36) { 54 r = cdigit - '0'; 55 if (r <= 9) 56 return r; 57 58 r = cdigit - 'A'; 59 if (r <= radix - 11U) 60 return r + 10; 61 62 r = cdigit - 'a'; 63 if (r <= radix - 11U) 64 return r + 10; 65 66 radix = 10; 67 } 68 69 r = cdigit - '0'; 70 if (r < radix) 71 return r; 72 73 return -1U; 74 } 75 76 77 void APInt::initSlowCase(uint64_t val, bool isSigned) { 78 U.pVal = getClearedMemory(getNumWords()); 79 U.pVal[0] = val; 80 if (isSigned && int64_t(val) < 0) 81 for (unsigned i = 1; i < getNumWords(); ++i) 82 U.pVal[i] = WORDTYPE_MAX; 83 clearUnusedBits(); 84 } 85 86 void APInt::initSlowCase(const APInt& that) { 87 U.pVal = getMemory(getNumWords()); 88 memcpy(U.pVal, that.U.pVal, getNumWords() * APINT_WORD_SIZE); 89 } 90 91 void APInt::initFromArray(ArrayRef<uint64_t> bigVal) { 92 assert(BitWidth && "Bitwidth too small"); 93 assert(bigVal.data() && "Null pointer detected!"); 94 if (isSingleWord()) 95 U.VAL = bigVal[0]; 96 else { 97 // Get memory, cleared to 0 98 U.pVal = getClearedMemory(getNumWords()); 99 // Calculate the number of words to copy 100 unsigned words = std::min<unsigned>(bigVal.size(), getNumWords()); 101 // Copy the words from bigVal to pVal 102 memcpy(U.pVal, bigVal.data(), words * APINT_WORD_SIZE); 103 } 104 // Make sure unused high bits are cleared 105 clearUnusedBits(); 106 } 107 108 APInt::APInt(unsigned numBits, ArrayRef<uint64_t> bigVal) 109 : BitWidth(numBits) { 110 initFromArray(bigVal); 111 } 112 113 APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[]) 114 : BitWidth(numBits) { 115 initFromArray(makeArrayRef(bigVal, numWords)); 116 } 117 118 APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix) 119 : BitWidth(numbits) { 120 assert(BitWidth && "Bitwidth too small"); 121 fromString(numbits, Str, radix); 122 } 123 124 void APInt::reallocate(unsigned NewBitWidth) { 125 // If the number of words is the same we can just change the width and stop. 126 if (getNumWords() == getNumWords(NewBitWidth)) { 127 BitWidth = NewBitWidth; 128 return; 129 } 130 131 // If we have an allocation, delete it. 132 if (!isSingleWord()) 133 delete [] U.pVal; 134 135 // Update BitWidth. 136 BitWidth = NewBitWidth; 137 138 // If we are supposed to have an allocation, create it. 139 if (!isSingleWord()) 140 U.pVal = getMemory(getNumWords()); 141 } 142 143 void APInt::AssignSlowCase(const APInt& RHS) { 144 // Don't do anything for X = X 145 if (this == &RHS) 146 return; 147 148 // Adjust the bit width and handle allocations as necessary. 149 reallocate(RHS.getBitWidth()); 150 151 // Copy the data. 152 if (isSingleWord()) 153 U.VAL = RHS.U.VAL; 154 else 155 memcpy(U.pVal, RHS.U.pVal, getNumWords() * APINT_WORD_SIZE); 156 } 157 158 /// This method 'profiles' an APInt for use with FoldingSet. 159 void APInt::Profile(FoldingSetNodeID& ID) const { 160 ID.AddInteger(BitWidth); 161 162 if (isSingleWord()) { 163 ID.AddInteger(U.VAL); 164 return; 165 } 166 167 unsigned NumWords = getNumWords(); 168 for (unsigned i = 0; i < NumWords; ++i) 169 ID.AddInteger(U.pVal[i]); 170 } 171 172 /// Prefix increment operator. Increments the APInt by one. 173 APInt& APInt::operator++() { 174 if (isSingleWord()) 175 ++U.VAL; 176 else 177 tcIncrement(U.pVal, getNumWords()); 178 return clearUnusedBits(); 179 } 180 181 /// Prefix decrement operator. Decrements the APInt by one. 182 APInt& APInt::operator--() { 183 if (isSingleWord()) 184 --U.VAL; 185 else 186 tcDecrement(U.pVal, getNumWords()); 187 return clearUnusedBits(); 188 } 189 190 /// Adds the RHS APInt to this APInt. 191 /// @returns this, after addition of RHS. 192 /// Addition assignment operator. 193 APInt& APInt::operator+=(const APInt& RHS) { 194 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); 195 if (isSingleWord()) 196 U.VAL += RHS.U.VAL; 197 else 198 tcAdd(U.pVal, RHS.U.pVal, 0, getNumWords()); 199 return clearUnusedBits(); 200 } 201 202 APInt& APInt::operator+=(uint64_t RHS) { 203 if (isSingleWord()) 204 U.VAL += RHS; 205 else 206 tcAddPart(U.pVal, RHS, getNumWords()); 207 return clearUnusedBits(); 208 } 209 210 /// Subtracts the RHS APInt from this APInt 211 /// @returns this, after subtraction 212 /// Subtraction assignment operator. 213 APInt& APInt::operator-=(const APInt& RHS) { 214 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); 215 if (isSingleWord()) 216 U.VAL -= RHS.U.VAL; 217 else 218 tcSubtract(U.pVal, RHS.U.pVal, 0, getNumWords()); 219 return clearUnusedBits(); 220 } 221 222 APInt& APInt::operator-=(uint64_t RHS) { 223 if (isSingleWord()) 224 U.VAL -= RHS; 225 else 226 tcSubtractPart(U.pVal, RHS, getNumWords()); 227 return clearUnusedBits(); 228 } 229 230 APInt APInt::operator*(const APInt& RHS) const { 231 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); 232 if (isSingleWord()) 233 return APInt(BitWidth, U.VAL * RHS.U.VAL); 234 235 APInt Result(getMemory(getNumWords()), getBitWidth()); 236 237 tcMultiply(Result.U.pVal, U.pVal, RHS.U.pVal, getNumWords()); 238 239 Result.clearUnusedBits(); 240 return Result; 241 } 242 243 void APInt::AndAssignSlowCase(const APInt& RHS) { 244 tcAnd(U.pVal, RHS.U.pVal, getNumWords()); 245 } 246 247 void APInt::OrAssignSlowCase(const APInt& RHS) { 248 tcOr(U.pVal, RHS.U.pVal, getNumWords()); 249 } 250 251 void APInt::XorAssignSlowCase(const APInt& RHS) { 252 tcXor(U.pVal, RHS.U.pVal, getNumWords()); 253 } 254 255 APInt& APInt::operator*=(const APInt& RHS) { 256 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); 257 *this = *this * RHS; 258 return *this; 259 } 260 261 APInt& APInt::operator*=(uint64_t RHS) { 262 if (isSingleWord()) { 263 U.VAL *= RHS; 264 } else { 265 unsigned NumWords = getNumWords(); 266 tcMultiplyPart(U.pVal, U.pVal, RHS, 0, NumWords, NumWords, false); 267 } 268 return clearUnusedBits(); 269 } 270 271 bool APInt::EqualSlowCase(const APInt& RHS) const { 272 return std::equal(U.pVal, U.pVal + getNumWords(), RHS.U.pVal); 273 } 274 275 int APInt::compare(const APInt& RHS) const { 276 assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison"); 277 if (isSingleWord()) 278 return U.VAL < RHS.U.VAL ? -1 : U.VAL > RHS.U.VAL; 279 280 return tcCompare(U.pVal, RHS.U.pVal, getNumWords()); 281 } 282 283 int APInt::compareSigned(const APInt& RHS) const { 284 assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison"); 285 if (isSingleWord()) { 286 int64_t lhsSext = SignExtend64(U.VAL, BitWidth); 287 int64_t rhsSext = SignExtend64(RHS.U.VAL, BitWidth); 288 return lhsSext < rhsSext ? -1 : lhsSext > rhsSext; 289 } 290 291 bool lhsNeg = isNegative(); 292 bool rhsNeg = RHS.isNegative(); 293 294 // If the sign bits don't match, then (LHS < RHS) if LHS is negative 295 if (lhsNeg != rhsNeg) 296 return lhsNeg ? -1 : 1; 297 298 // Otherwise we can just use an unsigned comparison, because even negative 299 // numbers compare correctly this way if both have the same signed-ness. 300 return tcCompare(U.pVal, RHS.U.pVal, getNumWords()); 301 } 302 303 void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) { 304 unsigned loWord = whichWord(loBit); 305 unsigned hiWord = whichWord(hiBit); 306 307 // Create an initial mask for the low word with zeros below loBit. 308 uint64_t loMask = WORDTYPE_MAX << whichBit(loBit); 309 310 // If hiBit is not aligned, we need a high mask. 311 unsigned hiShiftAmt = whichBit(hiBit); 312 if (hiShiftAmt != 0) { 313 // Create a high mask with zeros above hiBit. 314 uint64_t hiMask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - hiShiftAmt); 315 // If loWord and hiWord are equal, then we combine the masks. Otherwise, 316 // set the bits in hiWord. 317 if (hiWord == loWord) 318 loMask &= hiMask; 319 else 320 U.pVal[hiWord] |= hiMask; 321 } 322 // Apply the mask to the low word. 323 U.pVal[loWord] |= loMask; 324 325 // Fill any words between loWord and hiWord with all ones. 326 for (unsigned word = loWord + 1; word < hiWord; ++word) 327 U.pVal[word] = WORDTYPE_MAX; 328 } 329 330 /// Toggle every bit to its opposite value. 331 void APInt::flipAllBitsSlowCase() { 332 tcComplement(U.pVal, getNumWords()); 333 clearUnusedBits(); 334 } 335 336 /// Toggle a given bit to its opposite value whose position is given 337 /// as "bitPosition". 338 /// Toggles a given bit to its opposite value. 339 void APInt::flipBit(unsigned bitPosition) { 340 assert(bitPosition < BitWidth && "Out of the bit-width range!"); 341 setBitVal(bitPosition, !(*this)[bitPosition]); 342 } 343 344 void APInt::insertBits(const APInt &subBits, unsigned bitPosition) { 345 unsigned subBitWidth = subBits.getBitWidth(); 346 assert(0 < subBitWidth && (subBitWidth + bitPosition) <= BitWidth && 347 "Illegal bit insertion"); 348 349 // Insertion is a direct copy. 350 if (subBitWidth == BitWidth) { 351 *this = subBits; 352 return; 353 } 354 355 // Single word result can be done as a direct bitmask. 356 if (isSingleWord()) { 357 uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth); 358 U.VAL &= ~(mask << bitPosition); 359 U.VAL |= (subBits.U.VAL << bitPosition); 360 return; 361 } 362 363 unsigned loBit = whichBit(bitPosition); 364 unsigned loWord = whichWord(bitPosition); 365 unsigned hi1Word = whichWord(bitPosition + subBitWidth - 1); 366 367 // Insertion within a single word can be done as a direct bitmask. 368 if (loWord == hi1Word) { 369 uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth); 370 U.pVal[loWord] &= ~(mask << loBit); 371 U.pVal[loWord] |= (subBits.U.VAL << loBit); 372 return; 373 } 374 375 // Insert on word boundaries. 376 if (loBit == 0) { 377 // Direct copy whole words. 378 unsigned numWholeSubWords = subBitWidth / APINT_BITS_PER_WORD; 379 memcpy(U.pVal + loWord, subBits.getRawData(), 380 numWholeSubWords * APINT_WORD_SIZE); 381 382 // Mask+insert remaining bits. 383 unsigned remainingBits = subBitWidth % APINT_BITS_PER_WORD; 384 if (remainingBits != 0) { 385 uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - remainingBits); 386 U.pVal[hi1Word] &= ~mask; 387 U.pVal[hi1Word] |= subBits.getWord(subBitWidth - 1); 388 } 389 return; 390 } 391 392 // General case - set/clear individual bits in dst based on src. 393 // TODO - there is scope for optimization here, but at the moment this code 394 // path is barely used so prefer readability over performance. 395 for (unsigned i = 0; i != subBitWidth; ++i) 396 setBitVal(bitPosition + i, subBits[i]); 397 } 398 399 void APInt::insertBits(uint64_t subBits, unsigned bitPosition, unsigned numBits) { 400 uint64_t maskBits = maskTrailingOnes<uint64_t>(numBits); 401 subBits &= maskBits; 402 if (isSingleWord()) { 403 U.VAL &= ~(maskBits << bitPosition); 404 U.VAL |= subBits << bitPosition; 405 return; 406 } 407 408 unsigned loBit = whichBit(bitPosition); 409 unsigned loWord = whichWord(bitPosition); 410 unsigned hiWord = whichWord(bitPosition + numBits - 1); 411 if (loWord == hiWord) { 412 U.pVal[loWord] &= ~(maskBits << loBit); 413 U.pVal[loWord] |= subBits << loBit; 414 return; 415 } 416 417 static_assert(8 * sizeof(WordType) <= 64, "This code assumes only two words affected"); 418 unsigned wordBits = 8 * sizeof(WordType); 419 U.pVal[loWord] &= ~(maskBits << loBit); 420 U.pVal[loWord] |= subBits << loBit; 421 422 U.pVal[hiWord] &= ~(maskBits >> (wordBits - loBit)); 423 U.pVal[hiWord] |= subBits >> (wordBits - loBit); 424 } 425 426 APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const { 427 assert(numBits > 0 && "Can't extract zero bits"); 428 assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth && 429 "Illegal bit extraction"); 430 431 if (isSingleWord()) 432 return APInt(numBits, U.VAL >> bitPosition); 433 434 unsigned loBit = whichBit(bitPosition); 435 unsigned loWord = whichWord(bitPosition); 436 unsigned hiWord = whichWord(bitPosition + numBits - 1); 437 438 // Single word result extracting bits from a single word source. 439 if (loWord == hiWord) 440 return APInt(numBits, U.pVal[loWord] >> loBit); 441 442 // Extracting bits that start on a source word boundary can be done 443 // as a fast memory copy. 444 if (loBit == 0) 445 return APInt(numBits, makeArrayRef(U.pVal + loWord, 1 + hiWord - loWord)); 446 447 // General case - shift + copy source words directly into place. 448 APInt Result(numBits, 0); 449 unsigned NumSrcWords = getNumWords(); 450 unsigned NumDstWords = Result.getNumWords(); 451 452 uint64_t *DestPtr = Result.isSingleWord() ? &Result.U.VAL : Result.U.pVal; 453 for (unsigned word = 0; word < NumDstWords; ++word) { 454 uint64_t w0 = U.pVal[loWord + word]; 455 uint64_t w1 = 456 (loWord + word + 1) < NumSrcWords ? U.pVal[loWord + word + 1] : 0; 457 DestPtr[word] = (w0 >> loBit) | (w1 << (APINT_BITS_PER_WORD - loBit)); 458 } 459 460 return Result.clearUnusedBits(); 461 } 462 463 uint64_t APInt::extractBitsAsZExtValue(unsigned numBits, 464 unsigned bitPosition) const { 465 assert(numBits > 0 && "Can't extract zero bits"); 466 assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth && 467 "Illegal bit extraction"); 468 assert(numBits <= 64 && "Illegal bit extraction"); 469 470 uint64_t maskBits = maskTrailingOnes<uint64_t>(numBits); 471 if (isSingleWord()) 472 return (U.VAL >> bitPosition) & maskBits; 473 474 unsigned loBit = whichBit(bitPosition); 475 unsigned loWord = whichWord(bitPosition); 476 unsigned hiWord = whichWord(bitPosition + numBits - 1); 477 if (loWord == hiWord) 478 return (U.pVal[loWord] >> loBit) & maskBits; 479 480 static_assert(8 * sizeof(WordType) <= 64, "This code assumes only two words affected"); 481 unsigned wordBits = 8 * sizeof(WordType); 482 uint64_t retBits = U.pVal[loWord] >> loBit; 483 retBits |= U.pVal[hiWord] << (wordBits - loBit); 484 retBits &= maskBits; 485 return retBits; 486 } 487 488 unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) { 489 assert(!str.empty() && "Invalid string length"); 490 assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 || 491 radix == 36) && 492 "Radix should be 2, 8, 10, 16, or 36!"); 493 494 size_t slen = str.size(); 495 496 // Each computation below needs to know if it's negative. 497 StringRef::iterator p = str.begin(); 498 unsigned isNegative = *p == '-'; 499 if (*p == '-' || *p == '+') { 500 p++; 501 slen--; 502 assert(slen && "String is only a sign, needs a value."); 503 } 504 505 // For radixes of power-of-two values, the bits required is accurately and 506 // easily computed 507 if (radix == 2) 508 return slen + isNegative; 509 if (radix == 8) 510 return slen * 3 + isNegative; 511 if (radix == 16) 512 return slen * 4 + isNegative; 513 514 // FIXME: base 36 515 516 // This is grossly inefficient but accurate. We could probably do something 517 // with a computation of roughly slen*64/20 and then adjust by the value of 518 // the first few digits. But, I'm not sure how accurate that could be. 519 520 // Compute a sufficient number of bits that is always large enough but might 521 // be too large. This avoids the assertion in the constructor. This 522 // calculation doesn't work appropriately for the numbers 0-9, so just use 4 523 // bits in that case. 524 unsigned sufficient 525 = radix == 10? (slen == 1 ? 4 : slen * 64/18) 526 : (slen == 1 ? 7 : slen * 16/3); 527 528 // Convert to the actual binary value. 529 APInt tmp(sufficient, StringRef(p, slen), radix); 530 531 // Compute how many bits are required. If the log is infinite, assume we need 532 // just bit. If the log is exact and value is negative, then the value is 533 // MinSignedValue with (log + 1) bits. 534 unsigned log = tmp.logBase2(); 535 if (log == (unsigned)-1) { 536 return isNegative + 1; 537 } else if (isNegative && tmp.isPowerOf2()) { 538 return isNegative + log; 539 } else { 540 return isNegative + log + 1; 541 } 542 } 543 544 hash_code llvm::hash_value(const APInt &Arg) { 545 if (Arg.isSingleWord()) 546 return hash_combine(Arg.BitWidth, Arg.U.VAL); 547 548 return hash_combine( 549 Arg.BitWidth, 550 hash_combine_range(Arg.U.pVal, Arg.U.pVal + Arg.getNumWords())); 551 } 552 553 bool APInt::isSplat(unsigned SplatSizeInBits) const { 554 assert(getBitWidth() % SplatSizeInBits == 0 && 555 "SplatSizeInBits must divide width!"); 556 // We can check that all parts of an integer are equal by making use of a 557 // little trick: rotate and check if it's still the same value. 558 return *this == rotl(SplatSizeInBits); 559 } 560 561 /// This function returns the high "numBits" bits of this APInt. 562 APInt APInt::getHiBits(unsigned numBits) const { 563 return this->lshr(BitWidth - numBits); 564 } 565 566 /// This function returns the low "numBits" bits of this APInt. 567 APInt APInt::getLoBits(unsigned numBits) const { 568 APInt Result(getLowBitsSet(BitWidth, numBits)); 569 Result &= *this; 570 return Result; 571 } 572 573 /// Return a value containing V broadcasted over NewLen bits. 574 APInt APInt::getSplat(unsigned NewLen, const APInt &V) { 575 assert(NewLen >= V.getBitWidth() && "Can't splat to smaller bit width!"); 576 577 APInt Val = V.zextOrSelf(NewLen); 578 for (unsigned I = V.getBitWidth(); I < NewLen; I <<= 1) 579 Val |= Val << I; 580 581 return Val; 582 } 583 584 unsigned APInt::countLeadingZerosSlowCase() const { 585 unsigned Count = 0; 586 for (int i = getNumWords()-1; i >= 0; --i) { 587 uint64_t V = U.pVal[i]; 588 if (V == 0) 589 Count += APINT_BITS_PER_WORD; 590 else { 591 Count += llvm::countLeadingZeros(V); 592 break; 593 } 594 } 595 // Adjust for unused bits in the most significant word (they are zero). 596 unsigned Mod = BitWidth % APINT_BITS_PER_WORD; 597 Count -= Mod > 0 ? APINT_BITS_PER_WORD - Mod : 0; 598 return Count; 599 } 600 601 unsigned APInt::countLeadingOnesSlowCase() const { 602 unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD; 603 unsigned shift; 604 if (!highWordBits) { 605 highWordBits = APINT_BITS_PER_WORD; 606 shift = 0; 607 } else { 608 shift = APINT_BITS_PER_WORD - highWordBits; 609 } 610 int i = getNumWords() - 1; 611 unsigned Count = llvm::countLeadingOnes(U.pVal[i] << shift); 612 if (Count == highWordBits) { 613 for (i--; i >= 0; --i) { 614 if (U.pVal[i] == WORDTYPE_MAX) 615 Count += APINT_BITS_PER_WORD; 616 else { 617 Count += llvm::countLeadingOnes(U.pVal[i]); 618 break; 619 } 620 } 621 } 622 return Count; 623 } 624 625 unsigned APInt::countTrailingZerosSlowCase() const { 626 unsigned Count = 0; 627 unsigned i = 0; 628 for (; i < getNumWords() && U.pVal[i] == 0; ++i) 629 Count += APINT_BITS_PER_WORD; 630 if (i < getNumWords()) 631 Count += llvm::countTrailingZeros(U.pVal[i]); 632 return std::min(Count, BitWidth); 633 } 634 635 unsigned APInt::countTrailingOnesSlowCase() const { 636 unsigned Count = 0; 637 unsigned i = 0; 638 for (; i < getNumWords() && U.pVal[i] == WORDTYPE_MAX; ++i) 639 Count += APINT_BITS_PER_WORD; 640 if (i < getNumWords()) 641 Count += llvm::countTrailingOnes(U.pVal[i]); 642 assert(Count <= BitWidth); 643 return Count; 644 } 645 646 unsigned APInt::countPopulationSlowCase() const { 647 unsigned Count = 0; 648 for (unsigned i = 0; i < getNumWords(); ++i) 649 Count += llvm::countPopulation(U.pVal[i]); 650 return Count; 651 } 652 653 bool APInt::intersectsSlowCase(const APInt &RHS) const { 654 for (unsigned i = 0, e = getNumWords(); i != e; ++i) 655 if ((U.pVal[i] & RHS.U.pVal[i]) != 0) 656 return true; 657 658 return false; 659 } 660 661 bool APInt::isSubsetOfSlowCase(const APInt &RHS) const { 662 for (unsigned i = 0, e = getNumWords(); i != e; ++i) 663 if ((U.pVal[i] & ~RHS.U.pVal[i]) != 0) 664 return false; 665 666 return true; 667 } 668 669 APInt APInt::byteSwap() const { 670 assert(BitWidth >= 16 && BitWidth % 8 == 0 && "Cannot byteswap!"); 671 if (BitWidth == 16) 672 return APInt(BitWidth, ByteSwap_16(uint16_t(U.VAL))); 673 if (BitWidth == 32) 674 return APInt(BitWidth, ByteSwap_32(unsigned(U.VAL))); 675 if (BitWidth <= 64) { 676 uint64_t Tmp1 = ByteSwap_64(U.VAL); 677 Tmp1 >>= (64 - BitWidth); 678 return APInt(BitWidth, Tmp1); 679 } 680 681 APInt Result(getNumWords() * APINT_BITS_PER_WORD, 0); 682 for (unsigned I = 0, N = getNumWords(); I != N; ++I) 683 Result.U.pVal[I] = ByteSwap_64(U.pVal[N - I - 1]); 684 if (Result.BitWidth != BitWidth) { 685 Result.lshrInPlace(Result.BitWidth - BitWidth); 686 Result.BitWidth = BitWidth; 687 } 688 return Result; 689 } 690 691 APInt APInt::reverseBits() const { 692 switch (BitWidth) { 693 case 64: 694 return APInt(BitWidth, llvm::reverseBits<uint64_t>(U.VAL)); 695 case 32: 696 return APInt(BitWidth, llvm::reverseBits<uint32_t>(U.VAL)); 697 case 16: 698 return APInt(BitWidth, llvm::reverseBits<uint16_t>(U.VAL)); 699 case 8: 700 return APInt(BitWidth, llvm::reverseBits<uint8_t>(U.VAL)); 701 default: 702 break; 703 } 704 705 APInt Val(*this); 706 APInt Reversed(BitWidth, 0); 707 unsigned S = BitWidth; 708 709 for (; Val != 0; Val.lshrInPlace(1)) { 710 Reversed <<= 1; 711 Reversed |= Val[0]; 712 --S; 713 } 714 715 Reversed <<= S; 716 return Reversed; 717 } 718 719 APInt llvm::APIntOps::GreatestCommonDivisor(APInt A, APInt B) { 720 // Fast-path a common case. 721 if (A == B) return A; 722 723 // Corner cases: if either operand is zero, the other is the gcd. 724 if (!A) return B; 725 if (!B) return A; 726 727 // Count common powers of 2 and remove all other powers of 2. 728 unsigned Pow2; 729 { 730 unsigned Pow2_A = A.countTrailingZeros(); 731 unsigned Pow2_B = B.countTrailingZeros(); 732 if (Pow2_A > Pow2_B) { 733 A.lshrInPlace(Pow2_A - Pow2_B); 734 Pow2 = Pow2_B; 735 } else if (Pow2_B > Pow2_A) { 736 B.lshrInPlace(Pow2_B - Pow2_A); 737 Pow2 = Pow2_A; 738 } else { 739 Pow2 = Pow2_A; 740 } 741 } 742 743 // Both operands are odd multiples of 2^Pow_2: 744 // 745 // gcd(a, b) = gcd(|a - b| / 2^i, min(a, b)) 746 // 747 // This is a modified version of Stein's algorithm, taking advantage of 748 // efficient countTrailingZeros(). 749 while (A != B) { 750 if (A.ugt(B)) { 751 A -= B; 752 A.lshrInPlace(A.countTrailingZeros() - Pow2); 753 } else { 754 B -= A; 755 B.lshrInPlace(B.countTrailingZeros() - Pow2); 756 } 757 } 758 759 return A; 760 } 761 762 APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) { 763 uint64_t I = bit_cast<uint64_t>(Double); 764 765 // Get the sign bit from the highest order bit 766 bool isNeg = I >> 63; 767 768 // Get the 11-bit exponent and adjust for the 1023 bit bias 769 int64_t exp = ((I >> 52) & 0x7ff) - 1023; 770 771 // If the exponent is negative, the value is < 0 so just return 0. 772 if (exp < 0) 773 return APInt(width, 0u); 774 775 // Extract the mantissa by clearing the top 12 bits (sign + exponent). 776 uint64_t mantissa = (I & (~0ULL >> 12)) | 1ULL << 52; 777 778 // If the exponent doesn't shift all bits out of the mantissa 779 if (exp < 52) 780 return isNeg ? -APInt(width, mantissa >> (52 - exp)) : 781 APInt(width, mantissa >> (52 - exp)); 782 783 // If the client didn't provide enough bits for us to shift the mantissa into 784 // then the result is undefined, just return 0 785 if (width <= exp - 52) 786 return APInt(width, 0); 787 788 // Otherwise, we have to shift the mantissa bits up to the right location 789 APInt Tmp(width, mantissa); 790 Tmp <<= (unsigned)exp - 52; 791 return isNeg ? -Tmp : Tmp; 792 } 793 794 /// This function converts this APInt to a double. 795 /// The layout for double is as following (IEEE Standard 754): 796 /// -------------------------------------- 797 /// | Sign Exponent Fraction Bias | 798 /// |-------------------------------------- | 799 /// | 1[63] 11[62-52] 52[51-00] 1023 | 800 /// -------------------------------------- 801 double APInt::roundToDouble(bool isSigned) const { 802 803 // Handle the simple case where the value is contained in one uint64_t. 804 // It is wrong to optimize getWord(0) to VAL; there might be more than one word. 805 if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) { 806 if (isSigned) { 807 int64_t sext = SignExtend64(getWord(0), BitWidth); 808 return double(sext); 809 } else 810 return double(getWord(0)); 811 } 812 813 // Determine if the value is negative. 814 bool isNeg = isSigned ? (*this)[BitWidth-1] : false; 815 816 // Construct the absolute value if we're negative. 817 APInt Tmp(isNeg ? -(*this) : (*this)); 818 819 // Figure out how many bits we're using. 820 unsigned n = Tmp.getActiveBits(); 821 822 // The exponent (without bias normalization) is just the number of bits 823 // we are using. Note that the sign bit is gone since we constructed the 824 // absolute value. 825 uint64_t exp = n; 826 827 // Return infinity for exponent overflow 828 if (exp > 1023) { 829 if (!isSigned || !isNeg) 830 return std::numeric_limits<double>::infinity(); 831 else 832 return -std::numeric_limits<double>::infinity(); 833 } 834 exp += 1023; // Increment for 1023 bias 835 836 // Number of bits in mantissa is 52. To obtain the mantissa value, we must 837 // extract the high 52 bits from the correct words in pVal. 838 uint64_t mantissa; 839 unsigned hiWord = whichWord(n-1); 840 if (hiWord == 0) { 841 mantissa = Tmp.U.pVal[0]; 842 if (n > 52) 843 mantissa >>= n - 52; // shift down, we want the top 52 bits. 844 } else { 845 assert(hiWord > 0 && "huh?"); 846 uint64_t hibits = Tmp.U.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD); 847 uint64_t lobits = Tmp.U.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD); 848 mantissa = hibits | lobits; 849 } 850 851 // The leading bit of mantissa is implicit, so get rid of it. 852 uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0; 853 uint64_t I = sign | (exp << 52) | mantissa; 854 return bit_cast<double>(I); 855 } 856 857 // Truncate to new width. 858 APInt APInt::trunc(unsigned width) const { 859 assert(width < BitWidth && "Invalid APInt Truncate request"); 860 assert(width && "Can't truncate to 0 bits"); 861 862 if (width <= APINT_BITS_PER_WORD) 863 return APInt(width, getRawData()[0]); 864 865 APInt Result(getMemory(getNumWords(width)), width); 866 867 // Copy full words. 868 unsigned i; 869 for (i = 0; i != width / APINT_BITS_PER_WORD; i++) 870 Result.U.pVal[i] = U.pVal[i]; 871 872 // Truncate and copy any partial word. 873 unsigned bits = (0 - width) % APINT_BITS_PER_WORD; 874 if (bits != 0) 875 Result.U.pVal[i] = U.pVal[i] << bits >> bits; 876 877 return Result; 878 } 879 880 // Truncate to new width with unsigned saturation. 881 APInt APInt::truncUSat(unsigned width) const { 882 assert(width < BitWidth && "Invalid APInt Truncate request"); 883 assert(width && "Can't truncate to 0 bits"); 884 885 // Can we just losslessly truncate it? 886 if (isIntN(width)) 887 return trunc(width); 888 // If not, then just return the new limit. 889 return APInt::getMaxValue(width); 890 } 891 892 // Truncate to new width with signed saturation. 893 APInt APInt::truncSSat(unsigned width) const { 894 assert(width < BitWidth && "Invalid APInt Truncate request"); 895 assert(width && "Can't truncate to 0 bits"); 896 897 // Can we just losslessly truncate it? 898 if (isSignedIntN(width)) 899 return trunc(width); 900 // If not, then just return the new limits. 901 return isNegative() ? APInt::getSignedMinValue(width) 902 : APInt::getSignedMaxValue(width); 903 } 904 905 // Sign extend to a new width. 906 APInt APInt::sext(unsigned Width) const { 907 assert(Width > BitWidth && "Invalid APInt SignExtend request"); 908 909 if (Width <= APINT_BITS_PER_WORD) 910 return APInt(Width, SignExtend64(U.VAL, BitWidth)); 911 912 APInt Result(getMemory(getNumWords(Width)), Width); 913 914 // Copy words. 915 std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE); 916 917 // Sign extend the last word since there may be unused bits in the input. 918 Result.U.pVal[getNumWords() - 1] = 919 SignExtend64(Result.U.pVal[getNumWords() - 1], 920 ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1); 921 922 // Fill with sign bits. 923 std::memset(Result.U.pVal + getNumWords(), isNegative() ? -1 : 0, 924 (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE); 925 Result.clearUnusedBits(); 926 return Result; 927 } 928 929 // Zero extend to a new width. 930 APInt APInt::zext(unsigned width) const { 931 assert(width > BitWidth && "Invalid APInt ZeroExtend request"); 932 933 if (width <= APINT_BITS_PER_WORD) 934 return APInt(width, U.VAL); 935 936 APInt Result(getMemory(getNumWords(width)), width); 937 938 // Copy words. 939 std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE); 940 941 // Zero remaining words. 942 std::memset(Result.U.pVal + getNumWords(), 0, 943 (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE); 944 945 return Result; 946 } 947 948 APInt APInt::zextOrTrunc(unsigned width) const { 949 if (BitWidth < width) 950 return zext(width); 951 if (BitWidth > width) 952 return trunc(width); 953 return *this; 954 } 955 956 APInt APInt::sextOrTrunc(unsigned width) const { 957 if (BitWidth < width) 958 return sext(width); 959 if (BitWidth > width) 960 return trunc(width); 961 return *this; 962 } 963 964 APInt APInt::truncOrSelf(unsigned width) const { 965 if (BitWidth > width) 966 return trunc(width); 967 return *this; 968 } 969 970 APInt APInt::zextOrSelf(unsigned width) const { 971 if (BitWidth < width) 972 return zext(width); 973 return *this; 974 } 975 976 APInt APInt::sextOrSelf(unsigned width) const { 977 if (BitWidth < width) 978 return sext(width); 979 return *this; 980 } 981 982 /// Arithmetic right-shift this APInt by shiftAmt. 983 /// Arithmetic right-shift function. 984 void APInt::ashrInPlace(const APInt &shiftAmt) { 985 ashrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth)); 986 } 987 988 /// Arithmetic right-shift this APInt by shiftAmt. 989 /// Arithmetic right-shift function. 990 void APInt::ashrSlowCase(unsigned ShiftAmt) { 991 // Don't bother performing a no-op shift. 992 if (!ShiftAmt) 993 return; 994 995 // Save the original sign bit for later. 996 bool Negative = isNegative(); 997 998 // WordShift is the inter-part shift; BitShift is intra-part shift. 999 unsigned WordShift = ShiftAmt / APINT_BITS_PER_WORD; 1000 unsigned BitShift = ShiftAmt % APINT_BITS_PER_WORD; 1001 1002 unsigned WordsToMove = getNumWords() - WordShift; 1003 if (WordsToMove != 0) { 1004 // Sign extend the last word to fill in the unused bits. 1005 U.pVal[getNumWords() - 1] = SignExtend64( 1006 U.pVal[getNumWords() - 1], ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1); 1007 1008 // Fastpath for moving by whole words. 1009 if (BitShift == 0) { 1010 std::memmove(U.pVal, U.pVal + WordShift, WordsToMove * APINT_WORD_SIZE); 1011 } else { 1012 // Move the words containing significant bits. 1013 for (unsigned i = 0; i != WordsToMove - 1; ++i) 1014 U.pVal[i] = (U.pVal[i + WordShift] >> BitShift) | 1015 (U.pVal[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift)); 1016 1017 // Handle the last word which has no high bits to copy. 1018 U.pVal[WordsToMove - 1] = U.pVal[WordShift + WordsToMove - 1] >> BitShift; 1019 // Sign extend one more time. 1020 U.pVal[WordsToMove - 1] = 1021 SignExtend64(U.pVal[WordsToMove - 1], APINT_BITS_PER_WORD - BitShift); 1022 } 1023 } 1024 1025 // Fill in the remainder based on the original sign. 1026 std::memset(U.pVal + WordsToMove, Negative ? -1 : 0, 1027 WordShift * APINT_WORD_SIZE); 1028 clearUnusedBits(); 1029 } 1030 1031 /// Logical right-shift this APInt by shiftAmt. 1032 /// Logical right-shift function. 1033 void APInt::lshrInPlace(const APInt &shiftAmt) { 1034 lshrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth)); 1035 } 1036 1037 /// Logical right-shift this APInt by shiftAmt. 1038 /// Logical right-shift function. 1039 void APInt::lshrSlowCase(unsigned ShiftAmt) { 1040 tcShiftRight(U.pVal, getNumWords(), ShiftAmt); 1041 } 1042 1043 /// Left-shift this APInt by shiftAmt. 1044 /// Left-shift function. 1045 APInt &APInt::operator<<=(const APInt &shiftAmt) { 1046 // It's undefined behavior in C to shift by BitWidth or greater. 1047 *this <<= (unsigned)shiftAmt.getLimitedValue(BitWidth); 1048 return *this; 1049 } 1050 1051 void APInt::shlSlowCase(unsigned ShiftAmt) { 1052 tcShiftLeft(U.pVal, getNumWords(), ShiftAmt); 1053 clearUnusedBits(); 1054 } 1055 1056 // Calculate the rotate amount modulo the bit width. 1057 static unsigned rotateModulo(unsigned BitWidth, const APInt &rotateAmt) { 1058 unsigned rotBitWidth = rotateAmt.getBitWidth(); 1059 APInt rot = rotateAmt; 1060 if (rotBitWidth < BitWidth) { 1061 // Extend the rotate APInt, so that the urem doesn't divide by 0. 1062 // e.g. APInt(1, 32) would give APInt(1, 0). 1063 rot = rotateAmt.zext(BitWidth); 1064 } 1065 rot = rot.urem(APInt(rot.getBitWidth(), BitWidth)); 1066 return rot.getLimitedValue(BitWidth); 1067 } 1068 1069 APInt APInt::rotl(const APInt &rotateAmt) const { 1070 return rotl(rotateModulo(BitWidth, rotateAmt)); 1071 } 1072 1073 APInt APInt::rotl(unsigned rotateAmt) const { 1074 rotateAmt %= BitWidth; 1075 if (rotateAmt == 0) 1076 return *this; 1077 return shl(rotateAmt) | lshr(BitWidth - rotateAmt); 1078 } 1079 1080 APInt APInt::rotr(const APInt &rotateAmt) const { 1081 return rotr(rotateModulo(BitWidth, rotateAmt)); 1082 } 1083 1084 APInt APInt::rotr(unsigned rotateAmt) const { 1085 rotateAmt %= BitWidth; 1086 if (rotateAmt == 0) 1087 return *this; 1088 return lshr(rotateAmt) | shl(BitWidth - rotateAmt); 1089 } 1090 1091 // Square Root - this method computes and returns the square root of "this". 1092 // Three mechanisms are used for computation. For small values (<= 5 bits), 1093 // a table lookup is done. This gets some performance for common cases. For 1094 // values using less than 52 bits, the value is converted to double and then 1095 // the libc sqrt function is called. The result is rounded and then converted 1096 // back to a uint64_t which is then used to construct the result. Finally, 1097 // the Babylonian method for computing square roots is used. 1098 APInt APInt::sqrt() const { 1099 1100 // Determine the magnitude of the value. 1101 unsigned magnitude = getActiveBits(); 1102 1103 // Use a fast table for some small values. This also gets rid of some 1104 // rounding errors in libc sqrt for small values. 1105 if (magnitude <= 5) { 1106 static const uint8_t results[32] = { 1107 /* 0 */ 0, 1108 /* 1- 2 */ 1, 1, 1109 /* 3- 6 */ 2, 2, 2, 2, 1110 /* 7-12 */ 3, 3, 3, 3, 3, 3, 1111 /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4, 1112 /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1113 /* 31 */ 6 1114 }; 1115 return APInt(BitWidth, results[ (isSingleWord() ? U.VAL : U.pVal[0]) ]); 1116 } 1117 1118 // If the magnitude of the value fits in less than 52 bits (the precision of 1119 // an IEEE double precision floating point value), then we can use the 1120 // libc sqrt function which will probably use a hardware sqrt computation. 1121 // This should be faster than the algorithm below. 1122 if (magnitude < 52) { 1123 return APInt(BitWidth, 1124 uint64_t(::round(::sqrt(double(isSingleWord() ? U.VAL 1125 : U.pVal[0]))))); 1126 } 1127 1128 // Okay, all the short cuts are exhausted. We must compute it. The following 1129 // is a classical Babylonian method for computing the square root. This code 1130 // was adapted to APInt from a wikipedia article on such computations. 1131 // See http://www.wikipedia.org/ and go to the page named 1132 // Calculate_an_integer_square_root. 1133 unsigned nbits = BitWidth, i = 4; 1134 APInt testy(BitWidth, 16); 1135 APInt x_old(BitWidth, 1); 1136 APInt x_new(BitWidth, 0); 1137 APInt two(BitWidth, 2); 1138 1139 // Select a good starting value using binary logarithms. 1140 for (;; i += 2, testy = testy.shl(2)) 1141 if (i >= nbits || this->ule(testy)) { 1142 x_old = x_old.shl(i / 2); 1143 break; 1144 } 1145 1146 // Use the Babylonian method to arrive at the integer square root: 1147 for (;;) { 1148 x_new = (this->udiv(x_old) + x_old).udiv(two); 1149 if (x_old.ule(x_new)) 1150 break; 1151 x_old = x_new; 1152 } 1153 1154 // Make sure we return the closest approximation 1155 // NOTE: The rounding calculation below is correct. It will produce an 1156 // off-by-one discrepancy with results from pari/gp. That discrepancy has been 1157 // determined to be a rounding issue with pari/gp as it begins to use a 1158 // floating point representation after 192 bits. There are no discrepancies 1159 // between this algorithm and pari/gp for bit widths < 192 bits. 1160 APInt square(x_old * x_old); 1161 APInt nextSquare((x_old + 1) * (x_old +1)); 1162 if (this->ult(square)) 1163 return x_old; 1164 assert(this->ule(nextSquare) && "Error in APInt::sqrt computation"); 1165 APInt midpoint((nextSquare - square).udiv(two)); 1166 APInt offset(*this - square); 1167 if (offset.ult(midpoint)) 1168 return x_old; 1169 return x_old + 1; 1170 } 1171 1172 /// Computes the multiplicative inverse of this APInt for a given modulo. The 1173 /// iterative extended Euclidean algorithm is used to solve for this value, 1174 /// however we simplify it to speed up calculating only the inverse, and take 1175 /// advantage of div+rem calculations. We also use some tricks to avoid copying 1176 /// (potentially large) APInts around. 1177 /// WARNING: a value of '0' may be returned, 1178 /// signifying that no multiplicative inverse exists! 1179 APInt APInt::multiplicativeInverse(const APInt& modulo) const { 1180 assert(ult(modulo) && "This APInt must be smaller than the modulo"); 1181 1182 // Using the properties listed at the following web page (accessed 06/21/08): 1183 // http://www.numbertheory.org/php/euclid.html 1184 // (especially the properties numbered 3, 4 and 9) it can be proved that 1185 // BitWidth bits suffice for all the computations in the algorithm implemented 1186 // below. More precisely, this number of bits suffice if the multiplicative 1187 // inverse exists, but may not suffice for the general extended Euclidean 1188 // algorithm. 1189 1190 APInt r[2] = { modulo, *this }; 1191 APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) }; 1192 APInt q(BitWidth, 0); 1193 1194 unsigned i; 1195 for (i = 0; r[i^1] != 0; i ^= 1) { 1196 // An overview of the math without the confusing bit-flipping: 1197 // q = r[i-2] / r[i-1] 1198 // r[i] = r[i-2] % r[i-1] 1199 // t[i] = t[i-2] - t[i-1] * q 1200 udivrem(r[i], r[i^1], q, r[i]); 1201 t[i] -= t[i^1] * q; 1202 } 1203 1204 // If this APInt and the modulo are not coprime, there is no multiplicative 1205 // inverse, so return 0. We check this by looking at the next-to-last 1206 // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean 1207 // algorithm. 1208 if (r[i] != 1) 1209 return APInt(BitWidth, 0); 1210 1211 // The next-to-last t is the multiplicative inverse. However, we are 1212 // interested in a positive inverse. Calculate a positive one from a negative 1213 // one if necessary. A simple addition of the modulo suffices because 1214 // abs(t[i]) is known to be less than *this/2 (see the link above). 1215 if (t[i].isNegative()) 1216 t[i] += modulo; 1217 1218 return std::move(t[i]); 1219 } 1220 1221 /// Calculate the magic numbers required to implement a signed integer division 1222 /// by a constant as a sequence of multiplies, adds and shifts. Requires that 1223 /// the divisor not be 0, 1, or -1. Taken from "Hacker's Delight", Henry S. 1224 /// Warren, Jr., chapter 10. 1225 APInt::ms APInt::magic() const { 1226 const APInt& d = *this; 1227 unsigned p; 1228 APInt ad, anc, delta, q1, r1, q2, r2, t; 1229 APInt signedMin = APInt::getSignedMinValue(d.getBitWidth()); 1230 struct ms mag; 1231 1232 ad = d.abs(); 1233 t = signedMin + (d.lshr(d.getBitWidth() - 1)); 1234 anc = t - 1 - t.urem(ad); // absolute value of nc 1235 p = d.getBitWidth() - 1; // initialize p 1236 q1 = signedMin.udiv(anc); // initialize q1 = 2p/abs(nc) 1237 r1 = signedMin - q1*anc; // initialize r1 = rem(2p,abs(nc)) 1238 q2 = signedMin.udiv(ad); // initialize q2 = 2p/abs(d) 1239 r2 = signedMin - q2*ad; // initialize r2 = rem(2p,abs(d)) 1240 do { 1241 p = p + 1; 1242 q1 = q1<<1; // update q1 = 2p/abs(nc) 1243 r1 = r1<<1; // update r1 = rem(2p/abs(nc)) 1244 if (r1.uge(anc)) { // must be unsigned comparison 1245 q1 = q1 + 1; 1246 r1 = r1 - anc; 1247 } 1248 q2 = q2<<1; // update q2 = 2p/abs(d) 1249 r2 = r2<<1; // update r2 = rem(2p/abs(d)) 1250 if (r2.uge(ad)) { // must be unsigned comparison 1251 q2 = q2 + 1; 1252 r2 = r2 - ad; 1253 } 1254 delta = ad - r2; 1255 } while (q1.ult(delta) || (q1 == delta && r1 == 0)); 1256 1257 mag.m = q2 + 1; 1258 if (d.isNegative()) mag.m = -mag.m; // resulting magic number 1259 mag.s = p - d.getBitWidth(); // resulting shift 1260 return mag; 1261 } 1262 1263 /// Calculate the magic numbers required to implement an unsigned integer 1264 /// division by a constant as a sequence of multiplies, adds and shifts. 1265 /// Requires that the divisor not be 0. Taken from "Hacker's Delight", Henry 1266 /// S. Warren, Jr., chapter 10. 1267 /// LeadingZeros can be used to simplify the calculation if the upper bits 1268 /// of the divided value are known zero. 1269 APInt::mu APInt::magicu(unsigned LeadingZeros) const { 1270 const APInt& d = *this; 1271 unsigned p; 1272 APInt nc, delta, q1, r1, q2, r2; 1273 struct mu magu; 1274 magu.a = 0; // initialize "add" indicator 1275 APInt allOnes = APInt::getAllOnesValue(d.getBitWidth()).lshr(LeadingZeros); 1276 APInt signedMin = APInt::getSignedMinValue(d.getBitWidth()); 1277 APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth()); 1278 1279 nc = allOnes - (allOnes - d).urem(d); 1280 p = d.getBitWidth() - 1; // initialize p 1281 q1 = signedMin.udiv(nc); // initialize q1 = 2p/nc 1282 r1 = signedMin - q1*nc; // initialize r1 = rem(2p,nc) 1283 q2 = signedMax.udiv(d); // initialize q2 = (2p-1)/d 1284 r2 = signedMax - q2*d; // initialize r2 = rem((2p-1),d) 1285 do { 1286 p = p + 1; 1287 if (r1.uge(nc - r1)) { 1288 q1 = q1 + q1 + 1; // update q1 1289 r1 = r1 + r1 - nc; // update r1 1290 } 1291 else { 1292 q1 = q1+q1; // update q1 1293 r1 = r1+r1; // update r1 1294 } 1295 if ((r2 + 1).uge(d - r2)) { 1296 if (q2.uge(signedMax)) magu.a = 1; 1297 q2 = q2+q2 + 1; // update q2 1298 r2 = r2+r2 + 1 - d; // update r2 1299 } 1300 else { 1301 if (q2.uge(signedMin)) magu.a = 1; 1302 q2 = q2+q2; // update q2 1303 r2 = r2+r2 + 1; // update r2 1304 } 1305 delta = d - 1 - r2; 1306 } while (p < d.getBitWidth()*2 && 1307 (q1.ult(delta) || (q1 == delta && r1 == 0))); 1308 magu.m = q2 + 1; // resulting magic number 1309 magu.s = p - d.getBitWidth(); // resulting shift 1310 return magu; 1311 } 1312 1313 /// Implementation of Knuth's Algorithm D (Division of nonnegative integers) 1314 /// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The 1315 /// variables here have the same names as in the algorithm. Comments explain 1316 /// the algorithm and any deviation from it. 1317 static void KnuthDiv(uint32_t *u, uint32_t *v, uint32_t *q, uint32_t* r, 1318 unsigned m, unsigned n) { 1319 assert(u && "Must provide dividend"); 1320 assert(v && "Must provide divisor"); 1321 assert(q && "Must provide quotient"); 1322 assert(u != v && u != q && v != q && "Must use different memory"); 1323 assert(n>1 && "n must be > 1"); 1324 1325 // b denotes the base of the number system. In our case b is 2^32. 1326 const uint64_t b = uint64_t(1) << 32; 1327 1328 // The DEBUG macros here tend to be spam in the debug output if you're not 1329 // debugging this code. Disable them unless KNUTH_DEBUG is defined. 1330 #ifdef KNUTH_DEBUG 1331 #define DEBUG_KNUTH(X) LLVM_DEBUG(X) 1332 #else 1333 #define DEBUG_KNUTH(X) do {} while(false) 1334 #endif 1335 1336 DEBUG_KNUTH(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n'); 1337 DEBUG_KNUTH(dbgs() << "KnuthDiv: original:"); 1338 DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]); 1339 DEBUG_KNUTH(dbgs() << " by"); 1340 DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]); 1341 DEBUG_KNUTH(dbgs() << '\n'); 1342 // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of 1343 // u and v by d. Note that we have taken Knuth's advice here to use a power 1344 // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of 1345 // 2 allows us to shift instead of multiply and it is easy to determine the 1346 // shift amount from the leading zeros. We are basically normalizing the u 1347 // and v so that its high bits are shifted to the top of v's range without 1348 // overflow. Note that this can require an extra word in u so that u must 1349 // be of length m+n+1. 1350 unsigned shift = countLeadingZeros(v[n-1]); 1351 uint32_t v_carry = 0; 1352 uint32_t u_carry = 0; 1353 if (shift) { 1354 for (unsigned i = 0; i < m+n; ++i) { 1355 uint32_t u_tmp = u[i] >> (32 - shift); 1356 u[i] = (u[i] << shift) | u_carry; 1357 u_carry = u_tmp; 1358 } 1359 for (unsigned i = 0; i < n; ++i) { 1360 uint32_t v_tmp = v[i] >> (32 - shift); 1361 v[i] = (v[i] << shift) | v_carry; 1362 v_carry = v_tmp; 1363 } 1364 } 1365 u[m+n] = u_carry; 1366 1367 DEBUG_KNUTH(dbgs() << "KnuthDiv: normal:"); 1368 DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]); 1369 DEBUG_KNUTH(dbgs() << " by"); 1370 DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]); 1371 DEBUG_KNUTH(dbgs() << '\n'); 1372 1373 // D2. [Initialize j.] Set j to m. This is the loop counter over the places. 1374 int j = m; 1375 do { 1376 DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient digit #" << j << '\n'); 1377 // D3. [Calculate q'.]. 1378 // Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q') 1379 // Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r') 1380 // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease 1381 // qp by 1, increase rp by v[n-1], and repeat this test if rp < b. The test 1382 // on v[n-2] determines at high speed most of the cases in which the trial 1383 // value qp is one too large, and it eliminates all cases where qp is two 1384 // too large. 1385 uint64_t dividend = Make_64(u[j+n], u[j+n-1]); 1386 DEBUG_KNUTH(dbgs() << "KnuthDiv: dividend == " << dividend << '\n'); 1387 uint64_t qp = dividend / v[n-1]; 1388 uint64_t rp = dividend % v[n-1]; 1389 if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) { 1390 qp--; 1391 rp += v[n-1]; 1392 if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2])) 1393 qp--; 1394 } 1395 DEBUG_KNUTH(dbgs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n'); 1396 1397 // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with 1398 // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation 1399 // consists of a simple multiplication by a one-place number, combined with 1400 // a subtraction. 1401 // The digits (u[j+n]...u[j]) should be kept positive; if the result of 1402 // this step is actually negative, (u[j+n]...u[j]) should be left as the 1403 // true value plus b**(n+1), namely as the b's complement of 1404 // the true value, and a "borrow" to the left should be remembered. 1405 int64_t borrow = 0; 1406 for (unsigned i = 0; i < n; ++i) { 1407 uint64_t p = uint64_t(qp) * uint64_t(v[i]); 1408 int64_t subres = int64_t(u[j+i]) - borrow - Lo_32(p); 1409 u[j+i] = Lo_32(subres); 1410 borrow = Hi_32(p) - Hi_32(subres); 1411 DEBUG_KNUTH(dbgs() << "KnuthDiv: u[j+i] = " << u[j + i] 1412 << ", borrow = " << borrow << '\n'); 1413 } 1414 bool isNeg = u[j+n] < borrow; 1415 u[j+n] -= Lo_32(borrow); 1416 1417 DEBUG_KNUTH(dbgs() << "KnuthDiv: after subtraction:"); 1418 DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]); 1419 DEBUG_KNUTH(dbgs() << '\n'); 1420 1421 // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was 1422 // negative, go to step D6; otherwise go on to step D7. 1423 q[j] = Lo_32(qp); 1424 if (isNeg) { 1425 // D6. [Add back]. The probability that this step is necessary is very 1426 // small, on the order of only 2/b. Make sure that test data accounts for 1427 // this possibility. Decrease q[j] by 1 1428 q[j]--; 1429 // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]). 1430 // A carry will occur to the left of u[j+n], and it should be ignored 1431 // since it cancels with the borrow that occurred in D4. 1432 bool carry = false; 1433 for (unsigned i = 0; i < n; i++) { 1434 uint32_t limit = std::min(u[j+i],v[i]); 1435 u[j+i] += v[i] + carry; 1436 carry = u[j+i] < limit || (carry && u[j+i] == limit); 1437 } 1438 u[j+n] += carry; 1439 } 1440 DEBUG_KNUTH(dbgs() << "KnuthDiv: after correction:"); 1441 DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]); 1442 DEBUG_KNUTH(dbgs() << "\nKnuthDiv: digit result = " << q[j] << '\n'); 1443 1444 // D7. [Loop on j.] Decrease j by one. Now if j >= 0, go back to D3. 1445 } while (--j >= 0); 1446 1447 DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient:"); 1448 DEBUG_KNUTH(for (int i = m; i >= 0; i--) dbgs() << " " << q[i]); 1449 DEBUG_KNUTH(dbgs() << '\n'); 1450 1451 // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired 1452 // remainder may be obtained by dividing u[...] by d. If r is non-null we 1453 // compute the remainder (urem uses this). 1454 if (r) { 1455 // The value d is expressed by the "shift" value above since we avoided 1456 // multiplication by d by using a shift left. So, all we have to do is 1457 // shift right here. 1458 if (shift) { 1459 uint32_t carry = 0; 1460 DEBUG_KNUTH(dbgs() << "KnuthDiv: remainder:"); 1461 for (int i = n-1; i >= 0; i--) { 1462 r[i] = (u[i] >> shift) | carry; 1463 carry = u[i] << (32 - shift); 1464 DEBUG_KNUTH(dbgs() << " " << r[i]); 1465 } 1466 } else { 1467 for (int i = n-1; i >= 0; i--) { 1468 r[i] = u[i]; 1469 DEBUG_KNUTH(dbgs() << " " << r[i]); 1470 } 1471 } 1472 DEBUG_KNUTH(dbgs() << '\n'); 1473 } 1474 DEBUG_KNUTH(dbgs() << '\n'); 1475 } 1476 1477 void APInt::divide(const WordType *LHS, unsigned lhsWords, const WordType *RHS, 1478 unsigned rhsWords, WordType *Quotient, WordType *Remainder) { 1479 assert(lhsWords >= rhsWords && "Fractional result"); 1480 1481 // First, compose the values into an array of 32-bit words instead of 1482 // 64-bit words. This is a necessity of both the "short division" algorithm 1483 // and the Knuth "classical algorithm" which requires there to be native 1484 // operations for +, -, and * on an m bit value with an m*2 bit result. We 1485 // can't use 64-bit operands here because we don't have native results of 1486 // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't 1487 // work on large-endian machines. 1488 unsigned n = rhsWords * 2; 1489 unsigned m = (lhsWords * 2) - n; 1490 1491 // Allocate space for the temporary values we need either on the stack, if 1492 // it will fit, or on the heap if it won't. 1493 uint32_t SPACE[128]; 1494 uint32_t *U = nullptr; 1495 uint32_t *V = nullptr; 1496 uint32_t *Q = nullptr; 1497 uint32_t *R = nullptr; 1498 if ((Remainder?4:3)*n+2*m+1 <= 128) { 1499 U = &SPACE[0]; 1500 V = &SPACE[m+n+1]; 1501 Q = &SPACE[(m+n+1) + n]; 1502 if (Remainder) 1503 R = &SPACE[(m+n+1) + n + (m+n)]; 1504 } else { 1505 U = new uint32_t[m + n + 1]; 1506 V = new uint32_t[n]; 1507 Q = new uint32_t[m+n]; 1508 if (Remainder) 1509 R = new uint32_t[n]; 1510 } 1511 1512 // Initialize the dividend 1513 memset(U, 0, (m+n+1)*sizeof(uint32_t)); 1514 for (unsigned i = 0; i < lhsWords; ++i) { 1515 uint64_t tmp = LHS[i]; 1516 U[i * 2] = Lo_32(tmp); 1517 U[i * 2 + 1] = Hi_32(tmp); 1518 } 1519 U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm. 1520 1521 // Initialize the divisor 1522 memset(V, 0, (n)*sizeof(uint32_t)); 1523 for (unsigned i = 0; i < rhsWords; ++i) { 1524 uint64_t tmp = RHS[i]; 1525 V[i * 2] = Lo_32(tmp); 1526 V[i * 2 + 1] = Hi_32(tmp); 1527 } 1528 1529 // initialize the quotient and remainder 1530 memset(Q, 0, (m+n) * sizeof(uint32_t)); 1531 if (Remainder) 1532 memset(R, 0, n * sizeof(uint32_t)); 1533 1534 // Now, adjust m and n for the Knuth division. n is the number of words in 1535 // the divisor. m is the number of words by which the dividend exceeds the 1536 // divisor (i.e. m+n is the length of the dividend). These sizes must not 1537 // contain any zero words or the Knuth algorithm fails. 1538 for (unsigned i = n; i > 0 && V[i-1] == 0; i--) { 1539 n--; 1540 m++; 1541 } 1542 for (unsigned i = m+n; i > 0 && U[i-1] == 0; i--) 1543 m--; 1544 1545 // If we're left with only a single word for the divisor, Knuth doesn't work 1546 // so we implement the short division algorithm here. This is much simpler 1547 // and faster because we are certain that we can divide a 64-bit quantity 1548 // by a 32-bit quantity at hardware speed and short division is simply a 1549 // series of such operations. This is just like doing short division but we 1550 // are using base 2^32 instead of base 10. 1551 assert(n != 0 && "Divide by zero?"); 1552 if (n == 1) { 1553 uint32_t divisor = V[0]; 1554 uint32_t remainder = 0; 1555 for (int i = m; i >= 0; i--) { 1556 uint64_t partial_dividend = Make_64(remainder, U[i]); 1557 if (partial_dividend == 0) { 1558 Q[i] = 0; 1559 remainder = 0; 1560 } else if (partial_dividend < divisor) { 1561 Q[i] = 0; 1562 remainder = Lo_32(partial_dividend); 1563 } else if (partial_dividend == divisor) { 1564 Q[i] = 1; 1565 remainder = 0; 1566 } else { 1567 Q[i] = Lo_32(partial_dividend / divisor); 1568 remainder = Lo_32(partial_dividend - (Q[i] * divisor)); 1569 } 1570 } 1571 if (R) 1572 R[0] = remainder; 1573 } else { 1574 // Now we're ready to invoke the Knuth classical divide algorithm. In this 1575 // case n > 1. 1576 KnuthDiv(U, V, Q, R, m, n); 1577 } 1578 1579 // If the caller wants the quotient 1580 if (Quotient) { 1581 for (unsigned i = 0; i < lhsWords; ++i) 1582 Quotient[i] = Make_64(Q[i*2+1], Q[i*2]); 1583 } 1584 1585 // If the caller wants the remainder 1586 if (Remainder) { 1587 for (unsigned i = 0; i < rhsWords; ++i) 1588 Remainder[i] = Make_64(R[i*2+1], R[i*2]); 1589 } 1590 1591 // Clean up the memory we allocated. 1592 if (U != &SPACE[0]) { 1593 delete [] U; 1594 delete [] V; 1595 delete [] Q; 1596 delete [] R; 1597 } 1598 } 1599 1600 APInt APInt::udiv(const APInt &RHS) const { 1601 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); 1602 1603 // First, deal with the easy case 1604 if (isSingleWord()) { 1605 assert(RHS.U.VAL != 0 && "Divide by zero?"); 1606 return APInt(BitWidth, U.VAL / RHS.U.VAL); 1607 } 1608 1609 // Get some facts about the LHS and RHS number of bits and words 1610 unsigned lhsWords = getNumWords(getActiveBits()); 1611 unsigned rhsBits = RHS.getActiveBits(); 1612 unsigned rhsWords = getNumWords(rhsBits); 1613 assert(rhsWords && "Divided by zero???"); 1614 1615 // Deal with some degenerate cases 1616 if (!lhsWords) 1617 // 0 / X ===> 0 1618 return APInt(BitWidth, 0); 1619 if (rhsBits == 1) 1620 // X / 1 ===> X 1621 return *this; 1622 if (lhsWords < rhsWords || this->ult(RHS)) 1623 // X / Y ===> 0, iff X < Y 1624 return APInt(BitWidth, 0); 1625 if (*this == RHS) 1626 // X / X ===> 1 1627 return APInt(BitWidth, 1); 1628 if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1. 1629 // All high words are zero, just use native divide 1630 return APInt(BitWidth, this->U.pVal[0] / RHS.U.pVal[0]); 1631 1632 // We have to compute it the hard way. Invoke the Knuth divide algorithm. 1633 APInt Quotient(BitWidth, 0); // to hold result. 1634 divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal, nullptr); 1635 return Quotient; 1636 } 1637 1638 APInt APInt::udiv(uint64_t RHS) const { 1639 assert(RHS != 0 && "Divide by zero?"); 1640 1641 // First, deal with the easy case 1642 if (isSingleWord()) 1643 return APInt(BitWidth, U.VAL / RHS); 1644 1645 // Get some facts about the LHS words. 1646 unsigned lhsWords = getNumWords(getActiveBits()); 1647 1648 // Deal with some degenerate cases 1649 if (!lhsWords) 1650 // 0 / X ===> 0 1651 return APInt(BitWidth, 0); 1652 if (RHS == 1) 1653 // X / 1 ===> X 1654 return *this; 1655 if (this->ult(RHS)) 1656 // X / Y ===> 0, iff X < Y 1657 return APInt(BitWidth, 0); 1658 if (*this == RHS) 1659 // X / X ===> 1 1660 return APInt(BitWidth, 1); 1661 if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1. 1662 // All high words are zero, just use native divide 1663 return APInt(BitWidth, this->U.pVal[0] / RHS); 1664 1665 // We have to compute it the hard way. Invoke the Knuth divide algorithm. 1666 APInt Quotient(BitWidth, 0); // to hold result. 1667 divide(U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, nullptr); 1668 return Quotient; 1669 } 1670 1671 APInt APInt::sdiv(const APInt &RHS) const { 1672 if (isNegative()) { 1673 if (RHS.isNegative()) 1674 return (-(*this)).udiv(-RHS); 1675 return -((-(*this)).udiv(RHS)); 1676 } 1677 if (RHS.isNegative()) 1678 return -(this->udiv(-RHS)); 1679 return this->udiv(RHS); 1680 } 1681 1682 APInt APInt::sdiv(int64_t RHS) const { 1683 if (isNegative()) { 1684 if (RHS < 0) 1685 return (-(*this)).udiv(-RHS); 1686 return -((-(*this)).udiv(RHS)); 1687 } 1688 if (RHS < 0) 1689 return -(this->udiv(-RHS)); 1690 return this->udiv(RHS); 1691 } 1692 1693 APInt APInt::urem(const APInt &RHS) const { 1694 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); 1695 if (isSingleWord()) { 1696 assert(RHS.U.VAL != 0 && "Remainder by zero?"); 1697 return APInt(BitWidth, U.VAL % RHS.U.VAL); 1698 } 1699 1700 // Get some facts about the LHS 1701 unsigned lhsWords = getNumWords(getActiveBits()); 1702 1703 // Get some facts about the RHS 1704 unsigned rhsBits = RHS.getActiveBits(); 1705 unsigned rhsWords = getNumWords(rhsBits); 1706 assert(rhsWords && "Performing remainder operation by zero ???"); 1707 1708 // Check the degenerate cases 1709 if (lhsWords == 0) 1710 // 0 % Y ===> 0 1711 return APInt(BitWidth, 0); 1712 if (rhsBits == 1) 1713 // X % 1 ===> 0 1714 return APInt(BitWidth, 0); 1715 if (lhsWords < rhsWords || this->ult(RHS)) 1716 // X % Y ===> X, iff X < Y 1717 return *this; 1718 if (*this == RHS) 1719 // X % X == 0; 1720 return APInt(BitWidth, 0); 1721 if (lhsWords == 1) 1722 // All high words are zero, just use native remainder 1723 return APInt(BitWidth, U.pVal[0] % RHS.U.pVal[0]); 1724 1725 // We have to compute it the hard way. Invoke the Knuth divide algorithm. 1726 APInt Remainder(BitWidth, 0); 1727 divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, nullptr, Remainder.U.pVal); 1728 return Remainder; 1729 } 1730 1731 uint64_t APInt::urem(uint64_t RHS) const { 1732 assert(RHS != 0 && "Remainder by zero?"); 1733 1734 if (isSingleWord()) 1735 return U.VAL % RHS; 1736 1737 // Get some facts about the LHS 1738 unsigned lhsWords = getNumWords(getActiveBits()); 1739 1740 // Check the degenerate cases 1741 if (lhsWords == 0) 1742 // 0 % Y ===> 0 1743 return 0; 1744 if (RHS == 1) 1745 // X % 1 ===> 0 1746 return 0; 1747 if (this->ult(RHS)) 1748 // X % Y ===> X, iff X < Y 1749 return getZExtValue(); 1750 if (*this == RHS) 1751 // X % X == 0; 1752 return 0; 1753 if (lhsWords == 1) 1754 // All high words are zero, just use native remainder 1755 return U.pVal[0] % RHS; 1756 1757 // We have to compute it the hard way. Invoke the Knuth divide algorithm. 1758 uint64_t Remainder; 1759 divide(U.pVal, lhsWords, &RHS, 1, nullptr, &Remainder); 1760 return Remainder; 1761 } 1762 1763 APInt APInt::srem(const APInt &RHS) const { 1764 if (isNegative()) { 1765 if (RHS.isNegative()) 1766 return -((-(*this)).urem(-RHS)); 1767 return -((-(*this)).urem(RHS)); 1768 } 1769 if (RHS.isNegative()) 1770 return this->urem(-RHS); 1771 return this->urem(RHS); 1772 } 1773 1774 int64_t APInt::srem(int64_t RHS) const { 1775 if (isNegative()) { 1776 if (RHS < 0) 1777 return -((-(*this)).urem(-RHS)); 1778 return -((-(*this)).urem(RHS)); 1779 } 1780 if (RHS < 0) 1781 return this->urem(-RHS); 1782 return this->urem(RHS); 1783 } 1784 1785 void APInt::udivrem(const APInt &LHS, const APInt &RHS, 1786 APInt &Quotient, APInt &Remainder) { 1787 assert(LHS.BitWidth == RHS.BitWidth && "Bit widths must be the same"); 1788 unsigned BitWidth = LHS.BitWidth; 1789 1790 // First, deal with the easy case 1791 if (LHS.isSingleWord()) { 1792 assert(RHS.U.VAL != 0 && "Divide by zero?"); 1793 uint64_t QuotVal = LHS.U.VAL / RHS.U.VAL; 1794 uint64_t RemVal = LHS.U.VAL % RHS.U.VAL; 1795 Quotient = APInt(BitWidth, QuotVal); 1796 Remainder = APInt(BitWidth, RemVal); 1797 return; 1798 } 1799 1800 // Get some size facts about the dividend and divisor 1801 unsigned lhsWords = getNumWords(LHS.getActiveBits()); 1802 unsigned rhsBits = RHS.getActiveBits(); 1803 unsigned rhsWords = getNumWords(rhsBits); 1804 assert(rhsWords && "Performing divrem operation by zero ???"); 1805 1806 // Check the degenerate cases 1807 if (lhsWords == 0) { 1808 Quotient = APInt(BitWidth, 0); // 0 / Y ===> 0 1809 Remainder = APInt(BitWidth, 0); // 0 % Y ===> 0 1810 return; 1811 } 1812 1813 if (rhsBits == 1) { 1814 Quotient = LHS; // X / 1 ===> X 1815 Remainder = APInt(BitWidth, 0); // X % 1 ===> 0 1816 } 1817 1818 if (lhsWords < rhsWords || LHS.ult(RHS)) { 1819 Remainder = LHS; // X % Y ===> X, iff X < Y 1820 Quotient = APInt(BitWidth, 0); // X / Y ===> 0, iff X < Y 1821 return; 1822 } 1823 1824 if (LHS == RHS) { 1825 Quotient = APInt(BitWidth, 1); // X / X ===> 1 1826 Remainder = APInt(BitWidth, 0); // X % X ===> 0; 1827 return; 1828 } 1829 1830 // Make sure there is enough space to hold the results. 1831 // NOTE: This assumes that reallocate won't affect any bits if it doesn't 1832 // change the size. This is necessary if Quotient or Remainder is aliased 1833 // with LHS or RHS. 1834 Quotient.reallocate(BitWidth); 1835 Remainder.reallocate(BitWidth); 1836 1837 if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1. 1838 // There is only one word to consider so use the native versions. 1839 uint64_t lhsValue = LHS.U.pVal[0]; 1840 uint64_t rhsValue = RHS.U.pVal[0]; 1841 Quotient = lhsValue / rhsValue; 1842 Remainder = lhsValue % rhsValue; 1843 return; 1844 } 1845 1846 // Okay, lets do it the long way 1847 divide(LHS.U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal, 1848 Remainder.U.pVal); 1849 // Clear the rest of the Quotient and Remainder. 1850 std::memset(Quotient.U.pVal + lhsWords, 0, 1851 (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE); 1852 std::memset(Remainder.U.pVal + rhsWords, 0, 1853 (getNumWords(BitWidth) - rhsWords) * APINT_WORD_SIZE); 1854 } 1855 1856 void APInt::udivrem(const APInt &LHS, uint64_t RHS, APInt &Quotient, 1857 uint64_t &Remainder) { 1858 assert(RHS != 0 && "Divide by zero?"); 1859 unsigned BitWidth = LHS.BitWidth; 1860 1861 // First, deal with the easy case 1862 if (LHS.isSingleWord()) { 1863 uint64_t QuotVal = LHS.U.VAL / RHS; 1864 Remainder = LHS.U.VAL % RHS; 1865 Quotient = APInt(BitWidth, QuotVal); 1866 return; 1867 } 1868 1869 // Get some size facts about the dividend and divisor 1870 unsigned lhsWords = getNumWords(LHS.getActiveBits()); 1871 1872 // Check the degenerate cases 1873 if (lhsWords == 0) { 1874 Quotient = APInt(BitWidth, 0); // 0 / Y ===> 0 1875 Remainder = 0; // 0 % Y ===> 0 1876 return; 1877 } 1878 1879 if (RHS == 1) { 1880 Quotient = LHS; // X / 1 ===> X 1881 Remainder = 0; // X % 1 ===> 0 1882 return; 1883 } 1884 1885 if (LHS.ult(RHS)) { 1886 Remainder = LHS.getZExtValue(); // X % Y ===> X, iff X < Y 1887 Quotient = APInt(BitWidth, 0); // X / Y ===> 0, iff X < Y 1888 return; 1889 } 1890 1891 if (LHS == RHS) { 1892 Quotient = APInt(BitWidth, 1); // X / X ===> 1 1893 Remainder = 0; // X % X ===> 0; 1894 return; 1895 } 1896 1897 // Make sure there is enough space to hold the results. 1898 // NOTE: This assumes that reallocate won't affect any bits if it doesn't 1899 // change the size. This is necessary if Quotient is aliased with LHS. 1900 Quotient.reallocate(BitWidth); 1901 1902 if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1. 1903 // There is only one word to consider so use the native versions. 1904 uint64_t lhsValue = LHS.U.pVal[0]; 1905 Quotient = lhsValue / RHS; 1906 Remainder = lhsValue % RHS; 1907 return; 1908 } 1909 1910 // Okay, lets do it the long way 1911 divide(LHS.U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, &Remainder); 1912 // Clear the rest of the Quotient. 1913 std::memset(Quotient.U.pVal + lhsWords, 0, 1914 (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE); 1915 } 1916 1917 void APInt::sdivrem(const APInt &LHS, const APInt &RHS, 1918 APInt &Quotient, APInt &Remainder) { 1919 if (LHS.isNegative()) { 1920 if (RHS.isNegative()) 1921 APInt::udivrem(-LHS, -RHS, Quotient, Remainder); 1922 else { 1923 APInt::udivrem(-LHS, RHS, Quotient, Remainder); 1924 Quotient.negate(); 1925 } 1926 Remainder.negate(); 1927 } else if (RHS.isNegative()) { 1928 APInt::udivrem(LHS, -RHS, Quotient, Remainder); 1929 Quotient.negate(); 1930 } else { 1931 APInt::udivrem(LHS, RHS, Quotient, Remainder); 1932 } 1933 } 1934 1935 void APInt::sdivrem(const APInt &LHS, int64_t RHS, 1936 APInt &Quotient, int64_t &Remainder) { 1937 uint64_t R = Remainder; 1938 if (LHS.isNegative()) { 1939 if (RHS < 0) 1940 APInt::udivrem(-LHS, -RHS, Quotient, R); 1941 else { 1942 APInt::udivrem(-LHS, RHS, Quotient, R); 1943 Quotient.negate(); 1944 } 1945 R = -R; 1946 } else if (RHS < 0) { 1947 APInt::udivrem(LHS, -RHS, Quotient, R); 1948 Quotient.negate(); 1949 } else { 1950 APInt::udivrem(LHS, RHS, Quotient, R); 1951 } 1952 Remainder = R; 1953 } 1954 1955 APInt APInt::sadd_ov(const APInt &RHS, bool &Overflow) const { 1956 APInt Res = *this+RHS; 1957 Overflow = isNonNegative() == RHS.isNonNegative() && 1958 Res.isNonNegative() != isNonNegative(); 1959 return Res; 1960 } 1961 1962 APInt APInt::uadd_ov(const APInt &RHS, bool &Overflow) const { 1963 APInt Res = *this+RHS; 1964 Overflow = Res.ult(RHS); 1965 return Res; 1966 } 1967 1968 APInt APInt::ssub_ov(const APInt &RHS, bool &Overflow) const { 1969 APInt Res = *this - RHS; 1970 Overflow = isNonNegative() != RHS.isNonNegative() && 1971 Res.isNonNegative() != isNonNegative(); 1972 return Res; 1973 } 1974 1975 APInt APInt::usub_ov(const APInt &RHS, bool &Overflow) const { 1976 APInt Res = *this-RHS; 1977 Overflow = Res.ugt(*this); 1978 return Res; 1979 } 1980 1981 APInt APInt::sdiv_ov(const APInt &RHS, bool &Overflow) const { 1982 // MININT/-1 --> overflow. 1983 Overflow = isMinSignedValue() && RHS.isAllOnesValue(); 1984 return sdiv(RHS); 1985 } 1986 1987 APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const { 1988 APInt Res = *this * RHS; 1989 1990 if (*this != 0 && RHS != 0) 1991 Overflow = Res.sdiv(RHS) != *this || Res.sdiv(*this) != RHS; 1992 else 1993 Overflow = false; 1994 return Res; 1995 } 1996 1997 APInt APInt::umul_ov(const APInt &RHS, bool &Overflow) const { 1998 if (countLeadingZeros() + RHS.countLeadingZeros() + 2 <= BitWidth) { 1999 Overflow = true; 2000 return *this * RHS; 2001 } 2002 2003 APInt Res = lshr(1) * RHS; 2004 Overflow = Res.isNegative(); 2005 Res <<= 1; 2006 if ((*this)[0]) { 2007 Res += RHS; 2008 if (Res.ult(RHS)) 2009 Overflow = true; 2010 } 2011 return Res; 2012 } 2013 2014 APInt APInt::sshl_ov(const APInt &ShAmt, bool &Overflow) const { 2015 Overflow = ShAmt.uge(getBitWidth()); 2016 if (Overflow) 2017 return APInt(BitWidth, 0); 2018 2019 if (isNonNegative()) // Don't allow sign change. 2020 Overflow = ShAmt.uge(countLeadingZeros()); 2021 else 2022 Overflow = ShAmt.uge(countLeadingOnes()); 2023 2024 return *this << ShAmt; 2025 } 2026 2027 APInt APInt::ushl_ov(const APInt &ShAmt, bool &Overflow) const { 2028 Overflow = ShAmt.uge(getBitWidth()); 2029 if (Overflow) 2030 return APInt(BitWidth, 0); 2031 2032 Overflow = ShAmt.ugt(countLeadingZeros()); 2033 2034 return *this << ShAmt; 2035 } 2036 2037 APInt APInt::sadd_sat(const APInt &RHS) const { 2038 bool Overflow; 2039 APInt Res = sadd_ov(RHS, Overflow); 2040 if (!Overflow) 2041 return Res; 2042 2043 return isNegative() ? APInt::getSignedMinValue(BitWidth) 2044 : APInt::getSignedMaxValue(BitWidth); 2045 } 2046 2047 APInt APInt::uadd_sat(const APInt &RHS) const { 2048 bool Overflow; 2049 APInt Res = uadd_ov(RHS, Overflow); 2050 if (!Overflow) 2051 return Res; 2052 2053 return APInt::getMaxValue(BitWidth); 2054 } 2055 2056 APInt APInt::ssub_sat(const APInt &RHS) const { 2057 bool Overflow; 2058 APInt Res = ssub_ov(RHS, Overflow); 2059 if (!Overflow) 2060 return Res; 2061 2062 return isNegative() ? APInt::getSignedMinValue(BitWidth) 2063 : APInt::getSignedMaxValue(BitWidth); 2064 } 2065 2066 APInt APInt::usub_sat(const APInt &RHS) const { 2067 bool Overflow; 2068 APInt Res = usub_ov(RHS, Overflow); 2069 if (!Overflow) 2070 return Res; 2071 2072 return APInt(BitWidth, 0); 2073 } 2074 2075 APInt APInt::smul_sat(const APInt &RHS) const { 2076 bool Overflow; 2077 APInt Res = smul_ov(RHS, Overflow); 2078 if (!Overflow) 2079 return Res; 2080 2081 // The result is negative if one and only one of inputs is negative. 2082 bool ResIsNegative = isNegative() ^ RHS.isNegative(); 2083 2084 return ResIsNegative ? APInt::getSignedMinValue(BitWidth) 2085 : APInt::getSignedMaxValue(BitWidth); 2086 } 2087 2088 APInt APInt::umul_sat(const APInt &RHS) const { 2089 bool Overflow; 2090 APInt Res = umul_ov(RHS, Overflow); 2091 if (!Overflow) 2092 return Res; 2093 2094 return APInt::getMaxValue(BitWidth); 2095 } 2096 2097 APInt APInt::sshl_sat(const APInt &RHS) const { 2098 bool Overflow; 2099 APInt Res = sshl_ov(RHS, Overflow); 2100 if (!Overflow) 2101 return Res; 2102 2103 return isNegative() ? APInt::getSignedMinValue(BitWidth) 2104 : APInt::getSignedMaxValue(BitWidth); 2105 } 2106 2107 APInt APInt::ushl_sat(const APInt &RHS) const { 2108 bool Overflow; 2109 APInt Res = ushl_ov(RHS, Overflow); 2110 if (!Overflow) 2111 return Res; 2112 2113 return APInt::getMaxValue(BitWidth); 2114 } 2115 2116 void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) { 2117 // Check our assumptions here 2118 assert(!str.empty() && "Invalid string length"); 2119 assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 || 2120 radix == 36) && 2121 "Radix should be 2, 8, 10, 16, or 36!"); 2122 2123 StringRef::iterator p = str.begin(); 2124 size_t slen = str.size(); 2125 bool isNeg = *p == '-'; 2126 if (*p == '-' || *p == '+') { 2127 p++; 2128 slen--; 2129 assert(slen && "String is only a sign, needs a value."); 2130 } 2131 assert((slen <= numbits || radix != 2) && "Insufficient bit width"); 2132 assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width"); 2133 assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width"); 2134 assert((((slen-1)*64)/22 <= numbits || radix != 10) && 2135 "Insufficient bit width"); 2136 2137 // Allocate memory if needed 2138 if (isSingleWord()) 2139 U.VAL = 0; 2140 else 2141 U.pVal = getClearedMemory(getNumWords()); 2142 2143 // Figure out if we can shift instead of multiply 2144 unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0); 2145 2146 // Enter digit traversal loop 2147 for (StringRef::iterator e = str.end(); p != e; ++p) { 2148 unsigned digit = getDigit(*p, radix); 2149 assert(digit < radix && "Invalid character in digit string"); 2150 2151 // Shift or multiply the value by the radix 2152 if (slen > 1) { 2153 if (shift) 2154 *this <<= shift; 2155 else 2156 *this *= radix; 2157 } 2158 2159 // Add in the digit we just interpreted 2160 *this += digit; 2161 } 2162 // If its negative, put it in two's complement form 2163 if (isNeg) 2164 this->negate(); 2165 } 2166 2167 void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix, 2168 bool Signed, bool formatAsCLiteral) const { 2169 assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2 || 2170 Radix == 36) && 2171 "Radix should be 2, 8, 10, 16, or 36!"); 2172 2173 const char *Prefix = ""; 2174 if (formatAsCLiteral) { 2175 switch (Radix) { 2176 case 2: 2177 // Binary literals are a non-standard extension added in gcc 4.3: 2178 // http://gcc.gnu.org/onlinedocs/gcc-4.3.0/gcc/Binary-constants.html 2179 Prefix = "0b"; 2180 break; 2181 case 8: 2182 Prefix = "0"; 2183 break; 2184 case 10: 2185 break; // No prefix 2186 case 16: 2187 Prefix = "0x"; 2188 break; 2189 default: 2190 llvm_unreachable("Invalid radix!"); 2191 } 2192 } 2193 2194 // First, check for a zero value and just short circuit the logic below. 2195 if (*this == 0) { 2196 while (*Prefix) { 2197 Str.push_back(*Prefix); 2198 ++Prefix; 2199 }; 2200 Str.push_back('0'); 2201 return; 2202 } 2203 2204 static const char Digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; 2205 2206 if (isSingleWord()) { 2207 char Buffer[65]; 2208 char *BufPtr = std::end(Buffer); 2209 2210 uint64_t N; 2211 if (!Signed) { 2212 N = getZExtValue(); 2213 } else { 2214 int64_t I = getSExtValue(); 2215 if (I >= 0) { 2216 N = I; 2217 } else { 2218 Str.push_back('-'); 2219 N = -(uint64_t)I; 2220 } 2221 } 2222 2223 while (*Prefix) { 2224 Str.push_back(*Prefix); 2225 ++Prefix; 2226 }; 2227 2228 while (N) { 2229 *--BufPtr = Digits[N % Radix]; 2230 N /= Radix; 2231 } 2232 Str.append(BufPtr, std::end(Buffer)); 2233 return; 2234 } 2235 2236 APInt Tmp(*this); 2237 2238 if (Signed && isNegative()) { 2239 // They want to print the signed version and it is a negative value 2240 // Flip the bits and add one to turn it into the equivalent positive 2241 // value and put a '-' in the result. 2242 Tmp.negate(); 2243 Str.push_back('-'); 2244 } 2245 2246 while (*Prefix) { 2247 Str.push_back(*Prefix); 2248 ++Prefix; 2249 }; 2250 2251 // We insert the digits backward, then reverse them to get the right order. 2252 unsigned StartDig = Str.size(); 2253 2254 // For the 2, 8 and 16 bit cases, we can just shift instead of divide 2255 // because the number of bits per digit (1, 3 and 4 respectively) divides 2256 // equally. We just shift until the value is zero. 2257 if (Radix == 2 || Radix == 8 || Radix == 16) { 2258 // Just shift tmp right for each digit width until it becomes zero 2259 unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1)); 2260 unsigned MaskAmt = Radix - 1; 2261 2262 while (Tmp.getBoolValue()) { 2263 unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt; 2264 Str.push_back(Digits[Digit]); 2265 Tmp.lshrInPlace(ShiftAmt); 2266 } 2267 } else { 2268 while (Tmp.getBoolValue()) { 2269 uint64_t Digit; 2270 udivrem(Tmp, Radix, Tmp, Digit); 2271 assert(Digit < Radix && "divide failed"); 2272 Str.push_back(Digits[Digit]); 2273 } 2274 } 2275 2276 // Reverse the digits before returning. 2277 std::reverse(Str.begin()+StartDig, Str.end()); 2278 } 2279 2280 /// Returns the APInt as a std::string. Note that this is an inefficient method. 2281 /// It is better to pass in a SmallVector/SmallString to the methods above. 2282 std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const { 2283 SmallString<40> S; 2284 toString(S, Radix, Signed, /* formatAsCLiteral = */false); 2285 return std::string(S.str()); 2286 } 2287 2288 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 2289 LLVM_DUMP_METHOD void APInt::dump() const { 2290 SmallString<40> S, U; 2291 this->toStringUnsigned(U); 2292 this->toStringSigned(S); 2293 dbgs() << "APInt(" << BitWidth << "b, " 2294 << U << "u " << S << "s)\n"; 2295 } 2296 #endif 2297 2298 void APInt::print(raw_ostream &OS, bool isSigned) const { 2299 SmallString<40> S; 2300 this->toString(S, 10, isSigned, /* formatAsCLiteral = */false); 2301 OS << S; 2302 } 2303 2304 // This implements a variety of operations on a representation of 2305 // arbitrary precision, two's-complement, bignum integer values. 2306 2307 // Assumed by lowHalf, highHalf, partMSB and partLSB. A fairly safe 2308 // and unrestricting assumption. 2309 static_assert(APInt::APINT_BITS_PER_WORD % 2 == 0, 2310 "Part width must be divisible by 2!"); 2311 2312 /* Some handy functions local to this file. */ 2313 2314 /* Returns the integer part with the least significant BITS set. 2315 BITS cannot be zero. */ 2316 static inline APInt::WordType lowBitMask(unsigned bits) { 2317 assert(bits != 0 && bits <= APInt::APINT_BITS_PER_WORD); 2318 2319 return ~(APInt::WordType) 0 >> (APInt::APINT_BITS_PER_WORD - bits); 2320 } 2321 2322 /* Returns the value of the lower half of PART. */ 2323 static inline APInt::WordType lowHalf(APInt::WordType part) { 2324 return part & lowBitMask(APInt::APINT_BITS_PER_WORD / 2); 2325 } 2326 2327 /* Returns the value of the upper half of PART. */ 2328 static inline APInt::WordType highHalf(APInt::WordType part) { 2329 return part >> (APInt::APINT_BITS_PER_WORD / 2); 2330 } 2331 2332 /* Returns the bit number of the most significant set bit of a part. 2333 If the input number has no bits set -1U is returned. */ 2334 static unsigned partMSB(APInt::WordType value) { 2335 return findLastSet(value, ZB_Max); 2336 } 2337 2338 /* Returns the bit number of the least significant set bit of a 2339 part. If the input number has no bits set -1U is returned. */ 2340 static unsigned partLSB(APInt::WordType value) { 2341 return findFirstSet(value, ZB_Max); 2342 } 2343 2344 /* Sets the least significant part of a bignum to the input value, and 2345 zeroes out higher parts. */ 2346 void APInt::tcSet(WordType *dst, WordType part, unsigned parts) { 2347 assert(parts > 0); 2348 2349 dst[0] = part; 2350 for (unsigned i = 1; i < parts; i++) 2351 dst[i] = 0; 2352 } 2353 2354 /* Assign one bignum to another. */ 2355 void APInt::tcAssign(WordType *dst, const WordType *src, unsigned parts) { 2356 for (unsigned i = 0; i < parts; i++) 2357 dst[i] = src[i]; 2358 } 2359 2360 /* Returns true if a bignum is zero, false otherwise. */ 2361 bool APInt::tcIsZero(const WordType *src, unsigned parts) { 2362 for (unsigned i = 0; i < parts; i++) 2363 if (src[i]) 2364 return false; 2365 2366 return true; 2367 } 2368 2369 /* Extract the given bit of a bignum; returns 0 or 1. */ 2370 int APInt::tcExtractBit(const WordType *parts, unsigned bit) { 2371 return (parts[whichWord(bit)] & maskBit(bit)) != 0; 2372 } 2373 2374 /* Set the given bit of a bignum. */ 2375 void APInt::tcSetBit(WordType *parts, unsigned bit) { 2376 parts[whichWord(bit)] |= maskBit(bit); 2377 } 2378 2379 /* Clears the given bit of a bignum. */ 2380 void APInt::tcClearBit(WordType *parts, unsigned bit) { 2381 parts[whichWord(bit)] &= ~maskBit(bit); 2382 } 2383 2384 /* Returns the bit number of the least significant set bit of a 2385 number. If the input number has no bits set -1U is returned. */ 2386 unsigned APInt::tcLSB(const WordType *parts, unsigned n) { 2387 for (unsigned i = 0; i < n; i++) { 2388 if (parts[i] != 0) { 2389 unsigned lsb = partLSB(parts[i]); 2390 2391 return lsb + i * APINT_BITS_PER_WORD; 2392 } 2393 } 2394 2395 return -1U; 2396 } 2397 2398 /* Returns the bit number of the most significant set bit of a number. 2399 If the input number has no bits set -1U is returned. */ 2400 unsigned APInt::tcMSB(const WordType *parts, unsigned n) { 2401 do { 2402 --n; 2403 2404 if (parts[n] != 0) { 2405 unsigned msb = partMSB(parts[n]); 2406 2407 return msb + n * APINT_BITS_PER_WORD; 2408 } 2409 } while (n); 2410 2411 return -1U; 2412 } 2413 2414 /* Copy the bit vector of width srcBITS from SRC, starting at bit 2415 srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB becomes 2416 the least significant bit of DST. All high bits above srcBITS in 2417 DST are zero-filled. */ 2418 void 2419 APInt::tcExtract(WordType *dst, unsigned dstCount, const WordType *src, 2420 unsigned srcBits, unsigned srcLSB) { 2421 unsigned dstParts = (srcBits + APINT_BITS_PER_WORD - 1) / APINT_BITS_PER_WORD; 2422 assert(dstParts <= dstCount); 2423 2424 unsigned firstSrcPart = srcLSB / APINT_BITS_PER_WORD; 2425 tcAssign (dst, src + firstSrcPart, dstParts); 2426 2427 unsigned shift = srcLSB % APINT_BITS_PER_WORD; 2428 tcShiftRight (dst, dstParts, shift); 2429 2430 /* We now have (dstParts * APINT_BITS_PER_WORD - shift) bits from SRC 2431 in DST. If this is less that srcBits, append the rest, else 2432 clear the high bits. */ 2433 unsigned n = dstParts * APINT_BITS_PER_WORD - shift; 2434 if (n < srcBits) { 2435 WordType mask = lowBitMask (srcBits - n); 2436 dst[dstParts - 1] |= ((src[firstSrcPart + dstParts] & mask) 2437 << n % APINT_BITS_PER_WORD); 2438 } else if (n > srcBits) { 2439 if (srcBits % APINT_BITS_PER_WORD) 2440 dst[dstParts - 1] &= lowBitMask (srcBits % APINT_BITS_PER_WORD); 2441 } 2442 2443 /* Clear high parts. */ 2444 while (dstParts < dstCount) 2445 dst[dstParts++] = 0; 2446 } 2447 2448 /* DST += RHS + C where C is zero or one. Returns the carry flag. */ 2449 APInt::WordType APInt::tcAdd(WordType *dst, const WordType *rhs, 2450 WordType c, unsigned parts) { 2451 assert(c <= 1); 2452 2453 for (unsigned i = 0; i < parts; i++) { 2454 WordType l = dst[i]; 2455 if (c) { 2456 dst[i] += rhs[i] + 1; 2457 c = (dst[i] <= l); 2458 } else { 2459 dst[i] += rhs[i]; 2460 c = (dst[i] < l); 2461 } 2462 } 2463 2464 return c; 2465 } 2466 2467 /// This function adds a single "word" integer, src, to the multiple 2468 /// "word" integer array, dst[]. dst[] is modified to reflect the addition and 2469 /// 1 is returned if there is a carry out, otherwise 0 is returned. 2470 /// @returns the carry of the addition. 2471 APInt::WordType APInt::tcAddPart(WordType *dst, WordType src, 2472 unsigned parts) { 2473 for (unsigned i = 0; i < parts; ++i) { 2474 dst[i] += src; 2475 if (dst[i] >= src) 2476 return 0; // No need to carry so exit early. 2477 src = 1; // Carry one to next digit. 2478 } 2479 2480 return 1; 2481 } 2482 2483 /* DST -= RHS + C where C is zero or one. Returns the carry flag. */ 2484 APInt::WordType APInt::tcSubtract(WordType *dst, const WordType *rhs, 2485 WordType c, unsigned parts) { 2486 assert(c <= 1); 2487 2488 for (unsigned i = 0; i < parts; i++) { 2489 WordType l = dst[i]; 2490 if (c) { 2491 dst[i] -= rhs[i] + 1; 2492 c = (dst[i] >= l); 2493 } else { 2494 dst[i] -= rhs[i]; 2495 c = (dst[i] > l); 2496 } 2497 } 2498 2499 return c; 2500 } 2501 2502 /// This function subtracts a single "word" (64-bit word), src, from 2503 /// the multi-word integer array, dst[], propagating the borrowed 1 value until 2504 /// no further borrowing is needed or it runs out of "words" in dst. The result 2505 /// is 1 if "borrowing" exhausted the digits in dst, or 0 if dst was not 2506 /// exhausted. In other words, if src > dst then this function returns 1, 2507 /// otherwise 0. 2508 /// @returns the borrow out of the subtraction 2509 APInt::WordType APInt::tcSubtractPart(WordType *dst, WordType src, 2510 unsigned parts) { 2511 for (unsigned i = 0; i < parts; ++i) { 2512 WordType Dst = dst[i]; 2513 dst[i] -= src; 2514 if (src <= Dst) 2515 return 0; // No need to borrow so exit early. 2516 src = 1; // We have to "borrow 1" from next "word" 2517 } 2518 2519 return 1; 2520 } 2521 2522 /* Negate a bignum in-place. */ 2523 void APInt::tcNegate(WordType *dst, unsigned parts) { 2524 tcComplement(dst, parts); 2525 tcIncrement(dst, parts); 2526 } 2527 2528 /* DST += SRC * MULTIPLIER + CARRY if add is true 2529 DST = SRC * MULTIPLIER + CARRY if add is false 2530 2531 Requires 0 <= DSTPARTS <= SRCPARTS + 1. If DST overlaps SRC 2532 they must start at the same point, i.e. DST == SRC. 2533 2534 If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is 2535 returned. Otherwise DST is filled with the least significant 2536 DSTPARTS parts of the result, and if all of the omitted higher 2537 parts were zero return zero, otherwise overflow occurred and 2538 return one. */ 2539 int APInt::tcMultiplyPart(WordType *dst, const WordType *src, 2540 WordType multiplier, WordType carry, 2541 unsigned srcParts, unsigned dstParts, 2542 bool add) { 2543 /* Otherwise our writes of DST kill our later reads of SRC. */ 2544 assert(dst <= src || dst >= src + srcParts); 2545 assert(dstParts <= srcParts + 1); 2546 2547 /* N loops; minimum of dstParts and srcParts. */ 2548 unsigned n = std::min(dstParts, srcParts); 2549 2550 for (unsigned i = 0; i < n; i++) { 2551 WordType low, mid, high, srcPart; 2552 2553 /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY. 2554 2555 This cannot overflow, because 2556 2557 (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1) 2558 2559 which is less than n^2. */ 2560 2561 srcPart = src[i]; 2562 2563 if (multiplier == 0 || srcPart == 0) { 2564 low = carry; 2565 high = 0; 2566 } else { 2567 low = lowHalf(srcPart) * lowHalf(multiplier); 2568 high = highHalf(srcPart) * highHalf(multiplier); 2569 2570 mid = lowHalf(srcPart) * highHalf(multiplier); 2571 high += highHalf(mid); 2572 mid <<= APINT_BITS_PER_WORD / 2; 2573 if (low + mid < low) 2574 high++; 2575 low += mid; 2576 2577 mid = highHalf(srcPart) * lowHalf(multiplier); 2578 high += highHalf(mid); 2579 mid <<= APINT_BITS_PER_WORD / 2; 2580 if (low + mid < low) 2581 high++; 2582 low += mid; 2583 2584 /* Now add carry. */ 2585 if (low + carry < low) 2586 high++; 2587 low += carry; 2588 } 2589 2590 if (add) { 2591 /* And now DST[i], and store the new low part there. */ 2592 if (low + dst[i] < low) 2593 high++; 2594 dst[i] += low; 2595 } else 2596 dst[i] = low; 2597 2598 carry = high; 2599 } 2600 2601 if (srcParts < dstParts) { 2602 /* Full multiplication, there is no overflow. */ 2603 assert(srcParts + 1 == dstParts); 2604 dst[srcParts] = carry; 2605 return 0; 2606 } 2607 2608 /* We overflowed if there is carry. */ 2609 if (carry) 2610 return 1; 2611 2612 /* We would overflow if any significant unwritten parts would be 2613 non-zero. This is true if any remaining src parts are non-zero 2614 and the multiplier is non-zero. */ 2615 if (multiplier) 2616 for (unsigned i = dstParts; i < srcParts; i++) 2617 if (src[i]) 2618 return 1; 2619 2620 /* We fitted in the narrow destination. */ 2621 return 0; 2622 } 2623 2624 /* DST = LHS * RHS, where DST has the same width as the operands and 2625 is filled with the least significant parts of the result. Returns 2626 one if overflow occurred, otherwise zero. DST must be disjoint 2627 from both operands. */ 2628 int APInt::tcMultiply(WordType *dst, const WordType *lhs, 2629 const WordType *rhs, unsigned parts) { 2630 assert(dst != lhs && dst != rhs); 2631 2632 int overflow = 0; 2633 tcSet(dst, 0, parts); 2634 2635 for (unsigned i = 0; i < parts; i++) 2636 overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts, 2637 parts - i, true); 2638 2639 return overflow; 2640 } 2641 2642 /// DST = LHS * RHS, where DST has width the sum of the widths of the 2643 /// operands. No overflow occurs. DST must be disjoint from both operands. 2644 void APInt::tcFullMultiply(WordType *dst, const WordType *lhs, 2645 const WordType *rhs, unsigned lhsParts, 2646 unsigned rhsParts) { 2647 /* Put the narrower number on the LHS for less loops below. */ 2648 if (lhsParts > rhsParts) 2649 return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts); 2650 2651 assert(dst != lhs && dst != rhs); 2652 2653 tcSet(dst, 0, rhsParts); 2654 2655 for (unsigned i = 0; i < lhsParts; i++) 2656 tcMultiplyPart(&dst[i], rhs, lhs[i], 0, rhsParts, rhsParts + 1, true); 2657 } 2658 2659 /* If RHS is zero LHS and REMAINDER are left unchanged, return one. 2660 Otherwise set LHS to LHS / RHS with the fractional part discarded, 2661 set REMAINDER to the remainder, return zero. i.e. 2662 2663 OLD_LHS = RHS * LHS + REMAINDER 2664 2665 SCRATCH is a bignum of the same size as the operands and result for 2666 use by the routine; its contents need not be initialized and are 2667 destroyed. LHS, REMAINDER and SCRATCH must be distinct. 2668 */ 2669 int APInt::tcDivide(WordType *lhs, const WordType *rhs, 2670 WordType *remainder, WordType *srhs, 2671 unsigned parts) { 2672 assert(lhs != remainder && lhs != srhs && remainder != srhs); 2673 2674 unsigned shiftCount = tcMSB(rhs, parts) + 1; 2675 if (shiftCount == 0) 2676 return true; 2677 2678 shiftCount = parts * APINT_BITS_PER_WORD - shiftCount; 2679 unsigned n = shiftCount / APINT_BITS_PER_WORD; 2680 WordType mask = (WordType) 1 << (shiftCount % APINT_BITS_PER_WORD); 2681 2682 tcAssign(srhs, rhs, parts); 2683 tcShiftLeft(srhs, parts, shiftCount); 2684 tcAssign(remainder, lhs, parts); 2685 tcSet(lhs, 0, parts); 2686 2687 /* Loop, subtracting SRHS if REMAINDER is greater and adding that to 2688 the total. */ 2689 for (;;) { 2690 int compare = tcCompare(remainder, srhs, parts); 2691 if (compare >= 0) { 2692 tcSubtract(remainder, srhs, 0, parts); 2693 lhs[n] |= mask; 2694 } 2695 2696 if (shiftCount == 0) 2697 break; 2698 shiftCount--; 2699 tcShiftRight(srhs, parts, 1); 2700 if ((mask >>= 1) == 0) { 2701 mask = (WordType) 1 << (APINT_BITS_PER_WORD - 1); 2702 n--; 2703 } 2704 } 2705 2706 return false; 2707 } 2708 2709 /// Shift a bignum left Cound bits in-place. Shifted in bits are zero. There are 2710 /// no restrictions on Count. 2711 void APInt::tcShiftLeft(WordType *Dst, unsigned Words, unsigned Count) { 2712 // Don't bother performing a no-op shift. 2713 if (!Count) 2714 return; 2715 2716 // WordShift is the inter-part shift; BitShift is the intra-part shift. 2717 unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words); 2718 unsigned BitShift = Count % APINT_BITS_PER_WORD; 2719 2720 // Fastpath for moving by whole words. 2721 if (BitShift == 0) { 2722 std::memmove(Dst + WordShift, Dst, (Words - WordShift) * APINT_WORD_SIZE); 2723 } else { 2724 while (Words-- > WordShift) { 2725 Dst[Words] = Dst[Words - WordShift] << BitShift; 2726 if (Words > WordShift) 2727 Dst[Words] |= 2728 Dst[Words - WordShift - 1] >> (APINT_BITS_PER_WORD - BitShift); 2729 } 2730 } 2731 2732 // Fill in the remainder with 0s. 2733 std::memset(Dst, 0, WordShift * APINT_WORD_SIZE); 2734 } 2735 2736 /// Shift a bignum right Count bits in-place. Shifted in bits are zero. There 2737 /// are no restrictions on Count. 2738 void APInt::tcShiftRight(WordType *Dst, unsigned Words, unsigned Count) { 2739 // Don't bother performing a no-op shift. 2740 if (!Count) 2741 return; 2742 2743 // WordShift is the inter-part shift; BitShift is the intra-part shift. 2744 unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words); 2745 unsigned BitShift = Count % APINT_BITS_PER_WORD; 2746 2747 unsigned WordsToMove = Words - WordShift; 2748 // Fastpath for moving by whole words. 2749 if (BitShift == 0) { 2750 std::memmove(Dst, Dst + WordShift, WordsToMove * APINT_WORD_SIZE); 2751 } else { 2752 for (unsigned i = 0; i != WordsToMove; ++i) { 2753 Dst[i] = Dst[i + WordShift] >> BitShift; 2754 if (i + 1 != WordsToMove) 2755 Dst[i] |= Dst[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift); 2756 } 2757 } 2758 2759 // Fill in the remainder with 0s. 2760 std::memset(Dst + WordsToMove, 0, WordShift * APINT_WORD_SIZE); 2761 } 2762 2763 /* Bitwise and of two bignums. */ 2764 void APInt::tcAnd(WordType *dst, const WordType *rhs, unsigned parts) { 2765 for (unsigned i = 0; i < parts; i++) 2766 dst[i] &= rhs[i]; 2767 } 2768 2769 /* Bitwise inclusive or of two bignums. */ 2770 void APInt::tcOr(WordType *dst, const WordType *rhs, unsigned parts) { 2771 for (unsigned i = 0; i < parts; i++) 2772 dst[i] |= rhs[i]; 2773 } 2774 2775 /* Bitwise exclusive or of two bignums. */ 2776 void APInt::tcXor(WordType *dst, const WordType *rhs, unsigned parts) { 2777 for (unsigned i = 0; i < parts; i++) 2778 dst[i] ^= rhs[i]; 2779 } 2780 2781 /* Complement a bignum in-place. */ 2782 void APInt::tcComplement(WordType *dst, unsigned parts) { 2783 for (unsigned i = 0; i < parts; i++) 2784 dst[i] = ~dst[i]; 2785 } 2786 2787 /* Comparison (unsigned) of two bignums. */ 2788 int APInt::tcCompare(const WordType *lhs, const WordType *rhs, 2789 unsigned parts) { 2790 while (parts) { 2791 parts--; 2792 if (lhs[parts] != rhs[parts]) 2793 return (lhs[parts] > rhs[parts]) ? 1 : -1; 2794 } 2795 2796 return 0; 2797 } 2798 2799 /* Set the least significant BITS bits of a bignum, clear the 2800 rest. */ 2801 void APInt::tcSetLeastSignificantBits(WordType *dst, unsigned parts, 2802 unsigned bits) { 2803 unsigned i = 0; 2804 while (bits > APINT_BITS_PER_WORD) { 2805 dst[i++] = ~(WordType) 0; 2806 bits -= APINT_BITS_PER_WORD; 2807 } 2808 2809 if (bits) 2810 dst[i++] = ~(WordType) 0 >> (APINT_BITS_PER_WORD - bits); 2811 2812 while (i < parts) 2813 dst[i++] = 0; 2814 } 2815 2816 APInt llvm::APIntOps::RoundingUDiv(const APInt &A, const APInt &B, 2817 APInt::Rounding RM) { 2818 // Currently udivrem always rounds down. 2819 switch (RM) { 2820 case APInt::Rounding::DOWN: 2821 case APInt::Rounding::TOWARD_ZERO: 2822 return A.udiv(B); 2823 case APInt::Rounding::UP: { 2824 APInt Quo, Rem; 2825 APInt::udivrem(A, B, Quo, Rem); 2826 if (Rem == 0) 2827 return Quo; 2828 return Quo + 1; 2829 } 2830 } 2831 llvm_unreachable("Unknown APInt::Rounding enum"); 2832 } 2833 2834 APInt llvm::APIntOps::RoundingSDiv(const APInt &A, const APInt &B, 2835 APInt::Rounding RM) { 2836 switch (RM) { 2837 case APInt::Rounding::DOWN: 2838 case APInt::Rounding::UP: { 2839 APInt Quo, Rem; 2840 APInt::sdivrem(A, B, Quo, Rem); 2841 if (Rem == 0) 2842 return Quo; 2843 // This algorithm deals with arbitrary rounding mode used by sdivrem. 2844 // We want to check whether the non-integer part of the mathematical value 2845 // is negative or not. If the non-integer part is negative, we need to round 2846 // down from Quo; otherwise, if it's positive or 0, we return Quo, as it's 2847 // already rounded down. 2848 if (RM == APInt::Rounding::DOWN) { 2849 if (Rem.isNegative() != B.isNegative()) 2850 return Quo - 1; 2851 return Quo; 2852 } 2853 if (Rem.isNegative() != B.isNegative()) 2854 return Quo; 2855 return Quo + 1; 2856 } 2857 // Currently sdiv rounds towards zero. 2858 case APInt::Rounding::TOWARD_ZERO: 2859 return A.sdiv(B); 2860 } 2861 llvm_unreachable("Unknown APInt::Rounding enum"); 2862 } 2863 2864 Optional<APInt> 2865 llvm::APIntOps::SolveQuadraticEquationWrap(APInt A, APInt B, APInt C, 2866 unsigned RangeWidth) { 2867 unsigned CoeffWidth = A.getBitWidth(); 2868 assert(CoeffWidth == B.getBitWidth() && CoeffWidth == C.getBitWidth()); 2869 assert(RangeWidth <= CoeffWidth && 2870 "Value range width should be less than coefficient width"); 2871 assert(RangeWidth > 1 && "Value range bit width should be > 1"); 2872 2873 LLVM_DEBUG(dbgs() << __func__ << ": solving " << A << "x^2 + " << B 2874 << "x + " << C << ", rw:" << RangeWidth << '\n'); 2875 2876 // Identify 0 as a (non)solution immediately. 2877 if (C.sextOrTrunc(RangeWidth).isNullValue() ) { 2878 LLVM_DEBUG(dbgs() << __func__ << ": zero solution\n"); 2879 return APInt(CoeffWidth, 0); 2880 } 2881 2882 // The result of APInt arithmetic has the same bit width as the operands, 2883 // so it can actually lose high bits. A product of two n-bit integers needs 2884 // 2n-1 bits to represent the full value. 2885 // The operation done below (on quadratic coefficients) that can produce 2886 // the largest value is the evaluation of the equation during bisection, 2887 // which needs 3 times the bitwidth of the coefficient, so the total number 2888 // of required bits is 3n. 2889 // 2890 // The purpose of this extension is to simulate the set Z of all integers, 2891 // where n+1 > n for all n in Z. In Z it makes sense to talk about positive 2892 // and negative numbers (not so much in a modulo arithmetic). The method 2893 // used to solve the equation is based on the standard formula for real 2894 // numbers, and uses the concepts of "positive" and "negative" with their 2895 // usual meanings. 2896 CoeffWidth *= 3; 2897 A = A.sext(CoeffWidth); 2898 B = B.sext(CoeffWidth); 2899 C = C.sext(CoeffWidth); 2900 2901 // Make A > 0 for simplicity. Negate cannot overflow at this point because 2902 // the bit width has increased. 2903 if (A.isNegative()) { 2904 A.negate(); 2905 B.negate(); 2906 C.negate(); 2907 } 2908 2909 // Solving an equation q(x) = 0 with coefficients in modular arithmetic 2910 // is really solving a set of equations q(x) = kR for k = 0, 1, 2, ..., 2911 // and R = 2^BitWidth. 2912 // Since we're trying not only to find exact solutions, but also values 2913 // that "wrap around", such a set will always have a solution, i.e. an x 2914 // that satisfies at least one of the equations, or such that |q(x)| 2915 // exceeds kR, while |q(x-1)| for the same k does not. 2916 // 2917 // We need to find a value k, such that Ax^2 + Bx + C = kR will have a 2918 // positive solution n (in the above sense), and also such that the n 2919 // will be the least among all solutions corresponding to k = 0, 1, ... 2920 // (more precisely, the least element in the set 2921 // { n(k) | k is such that a solution n(k) exists }). 2922 // 2923 // Consider the parabola (over real numbers) that corresponds to the 2924 // quadratic equation. Since A > 0, the arms of the parabola will point 2925 // up. Picking different values of k will shift it up and down by R. 2926 // 2927 // We want to shift the parabola in such a way as to reduce the problem 2928 // of solving q(x) = kR to solving shifted_q(x) = 0. 2929 // (The interesting solutions are the ceilings of the real number 2930 // solutions.) 2931 APInt R = APInt::getOneBitSet(CoeffWidth, RangeWidth); 2932 APInt TwoA = 2 * A; 2933 APInt SqrB = B * B; 2934 bool PickLow; 2935 2936 auto RoundUp = [] (const APInt &V, const APInt &A) -> APInt { 2937 assert(A.isStrictlyPositive()); 2938 APInt T = V.abs().urem(A); 2939 if (T.isNullValue()) 2940 return V; 2941 return V.isNegative() ? V+T : V+(A-T); 2942 }; 2943 2944 // The vertex of the parabola is at -B/2A, but since A > 0, it's negative 2945 // iff B is positive. 2946 if (B.isNonNegative()) { 2947 // If B >= 0, the vertex it at a negative location (or at 0), so in 2948 // order to have a non-negative solution we need to pick k that makes 2949 // C-kR negative. To satisfy all the requirements for the solution 2950 // that we are looking for, it needs to be closest to 0 of all k. 2951 C = C.srem(R); 2952 if (C.isStrictlyPositive()) 2953 C -= R; 2954 // Pick the greater solution. 2955 PickLow = false; 2956 } else { 2957 // If B < 0, the vertex is at a positive location. For any solution 2958 // to exist, the discriminant must be non-negative. This means that 2959 // C-kR <= B^2/4A is a necessary condition for k, i.e. there is a 2960 // lower bound on values of k: kR >= C - B^2/4A. 2961 APInt LowkR = C - SqrB.udiv(2*TwoA); // udiv because all values > 0. 2962 // Round LowkR up (towards +inf) to the nearest kR. 2963 LowkR = RoundUp(LowkR, R); 2964 2965 // If there exists k meeting the condition above, and such that 2966 // C-kR > 0, there will be two positive real number solutions of 2967 // q(x) = kR. Out of all such values of k, pick the one that makes 2968 // C-kR closest to 0, (i.e. pick maximum k such that C-kR > 0). 2969 // In other words, find maximum k such that LowkR <= kR < C. 2970 if (C.sgt(LowkR)) { 2971 // If LowkR < C, then such a k is guaranteed to exist because 2972 // LowkR itself is a multiple of R. 2973 C -= -RoundUp(-C, R); // C = C - RoundDown(C, R) 2974 // Pick the smaller solution. 2975 PickLow = true; 2976 } else { 2977 // If C-kR < 0 for all potential k's, it means that one solution 2978 // will be negative, while the other will be positive. The positive 2979 // solution will shift towards 0 if the parabola is moved up. 2980 // Pick the kR closest to the lower bound (i.e. make C-kR closest 2981 // to 0, or in other words, out of all parabolas that have solutions, 2982 // pick the one that is the farthest "up"). 2983 // Since LowkR is itself a multiple of R, simply take C-LowkR. 2984 C -= LowkR; 2985 // Pick the greater solution. 2986 PickLow = false; 2987 } 2988 } 2989 2990 LLVM_DEBUG(dbgs() << __func__ << ": updated coefficients " << A << "x^2 + " 2991 << B << "x + " << C << ", rw:" << RangeWidth << '\n'); 2992 2993 APInt D = SqrB - 4*A*C; 2994 assert(D.isNonNegative() && "Negative discriminant"); 2995 APInt SQ = D.sqrt(); 2996 2997 APInt Q = SQ * SQ; 2998 bool InexactSQ = Q != D; 2999 // The calculated SQ may actually be greater than the exact (non-integer) 3000 // value. If that's the case, decrement SQ to get a value that is lower. 3001 if (Q.sgt(D)) 3002 SQ -= 1; 3003 3004 APInt X; 3005 APInt Rem; 3006 3007 // SQ is rounded down (i.e SQ * SQ <= D), so the roots may be inexact. 3008 // When using the quadratic formula directly, the calculated low root 3009 // may be greater than the exact one, since we would be subtracting SQ. 3010 // To make sure that the calculated root is not greater than the exact 3011 // one, subtract SQ+1 when calculating the low root (for inexact value 3012 // of SQ). 3013 if (PickLow) 3014 APInt::sdivrem(-B - (SQ+InexactSQ), TwoA, X, Rem); 3015 else 3016 APInt::sdivrem(-B + SQ, TwoA, X, Rem); 3017 3018 // The updated coefficients should be such that the (exact) solution is 3019 // positive. Since APInt division rounds towards 0, the calculated one 3020 // can be 0, but cannot be negative. 3021 assert(X.isNonNegative() && "Solution should be non-negative"); 3022 3023 if (!InexactSQ && Rem.isNullValue()) { 3024 LLVM_DEBUG(dbgs() << __func__ << ": solution (root): " << X << '\n'); 3025 return X; 3026 } 3027 3028 assert((SQ*SQ).sle(D) && "SQ = |_sqrt(D)_|, so SQ*SQ <= D"); 3029 // The exact value of the square root of D should be between SQ and SQ+1. 3030 // This implies that the solution should be between that corresponding to 3031 // SQ (i.e. X) and that corresponding to SQ+1. 3032 // 3033 // The calculated X cannot be greater than the exact (real) solution. 3034 // Actually it must be strictly less than the exact solution, while 3035 // X+1 will be greater than or equal to it. 3036 3037 APInt VX = (A*X + B)*X + C; 3038 APInt VY = VX + TwoA*X + A + B; 3039 bool SignChange = VX.isNegative() != VY.isNegative() || 3040 VX.isNullValue() != VY.isNullValue(); 3041 // If the sign did not change between X and X+1, X is not a valid solution. 3042 // This could happen when the actual (exact) roots don't have an integer 3043 // between them, so they would both be contained between X and X+1. 3044 if (!SignChange) { 3045 LLVM_DEBUG(dbgs() << __func__ << ": no valid solution\n"); 3046 return None; 3047 } 3048 3049 X += 1; 3050 LLVM_DEBUG(dbgs() << __func__ << ": solution (wrap): " << X << '\n'); 3051 return X; 3052 } 3053 3054 Optional<unsigned> 3055 llvm::APIntOps::GetMostSignificantDifferentBit(const APInt &A, const APInt &B) { 3056 assert(A.getBitWidth() == B.getBitWidth() && "Must have the same bitwidth"); 3057 if (A == B) 3058 return llvm::None; 3059 return A.getBitWidth() - ((A ^ B).countLeadingZeros() + 1); 3060 } 3061 3062 /// StoreIntToMemory - Fills the StoreBytes bytes of memory starting from Dst 3063 /// with the integer held in IntVal. 3064 void llvm::StoreIntToMemory(const APInt &IntVal, uint8_t *Dst, 3065 unsigned StoreBytes) { 3066 assert((IntVal.getBitWidth()+7)/8 >= StoreBytes && "Integer too small!"); 3067 const uint8_t *Src = (const uint8_t *)IntVal.getRawData(); 3068 3069 if (sys::IsLittleEndianHost) { 3070 // Little-endian host - the source is ordered from LSB to MSB. Order the 3071 // destination from LSB to MSB: Do a straight copy. 3072 memcpy(Dst, Src, StoreBytes); 3073 } else { 3074 // Big-endian host - the source is an array of 64 bit words ordered from 3075 // LSW to MSW. Each word is ordered from MSB to LSB. Order the destination 3076 // from MSB to LSB: Reverse the word order, but not the bytes in a word. 3077 while (StoreBytes > sizeof(uint64_t)) { 3078 StoreBytes -= sizeof(uint64_t); 3079 // May not be aligned so use memcpy. 3080 memcpy(Dst + StoreBytes, Src, sizeof(uint64_t)); 3081 Src += sizeof(uint64_t); 3082 } 3083 3084 memcpy(Dst, Src + sizeof(uint64_t) - StoreBytes, StoreBytes); 3085 } 3086 } 3087 3088 /// LoadIntFromMemory - Loads the integer stored in the LoadBytes bytes starting 3089 /// from Src into IntVal, which is assumed to be wide enough and to hold zero. 3090 void llvm::LoadIntFromMemory(APInt &IntVal, const uint8_t *Src, 3091 unsigned LoadBytes) { 3092 assert((IntVal.getBitWidth()+7)/8 >= LoadBytes && "Integer too small!"); 3093 uint8_t *Dst = reinterpret_cast<uint8_t *>( 3094 const_cast<uint64_t *>(IntVal.getRawData())); 3095 3096 if (sys::IsLittleEndianHost) 3097 // Little-endian host - the destination must be ordered from LSB to MSB. 3098 // The source is ordered from LSB to MSB: Do a straight copy. 3099 memcpy(Dst, Src, LoadBytes); 3100 else { 3101 // Big-endian - the destination is an array of 64 bit words ordered from 3102 // LSW to MSW. Each word must be ordered from MSB to LSB. The source is 3103 // ordered from MSB to LSB: Reverse the word order, but not the bytes in 3104 // a word. 3105 while (LoadBytes > sizeof(uint64_t)) { 3106 LoadBytes -= sizeof(uint64_t); 3107 // May not be aligned so use memcpy. 3108 memcpy(Dst, Src + LoadBytes, sizeof(uint64_t)); 3109 Dst += sizeof(uint64_t); 3110 } 3111 3112 memcpy(Dst + sizeof(uint64_t) - LoadBytes, Src, LoadBytes); 3113 } 3114 } 3115