1 //===-- APInt.cpp - Implement APInt class ---------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements a class to represent arbitrary precision integer 10 // constant values and provide a variety of arithmetic operations on them. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/ADT/APInt.h" 15 #include "llvm/ADT/ArrayRef.h" 16 #include "llvm/ADT/FoldingSet.h" 17 #include "llvm/ADT/Hashing.h" 18 #include "llvm/ADT/Optional.h" 19 #include "llvm/ADT/SmallString.h" 20 #include "llvm/ADT/StringRef.h" 21 #include "llvm/ADT/bit.h" 22 #include "llvm/Config/llvm-config.h" 23 #include "llvm/Support/Debug.h" 24 #include "llvm/Support/ErrorHandling.h" 25 #include "llvm/Support/MathExtras.h" 26 #include "llvm/Support/raw_ostream.h" 27 #include <climits> 28 #include <cmath> 29 #include <cstdlib> 30 #include <cstring> 31 using namespace llvm; 32 33 #define DEBUG_TYPE "apint" 34 35 /// A utility function for allocating memory, checking for allocation failures, 36 /// and ensuring the contents are zeroed. 37 inline static uint64_t* getClearedMemory(unsigned numWords) { 38 uint64_t *result = new uint64_t[numWords]; 39 memset(result, 0, numWords * sizeof(uint64_t)); 40 return result; 41 } 42 43 /// A utility function for allocating memory and checking for allocation 44 /// failure. The content is not zeroed. 45 inline static uint64_t* getMemory(unsigned numWords) { 46 return new uint64_t[numWords]; 47 } 48 49 /// A utility function that converts a character to a digit. 50 inline static unsigned getDigit(char cdigit, uint8_t radix) { 51 unsigned r; 52 53 if (radix == 16 || radix == 36) { 54 r = cdigit - '0'; 55 if (r <= 9) 56 return r; 57 58 r = cdigit - 'A'; 59 if (r <= radix - 11U) 60 return r + 10; 61 62 r = cdigit - 'a'; 63 if (r <= radix - 11U) 64 return r + 10; 65 66 radix = 10; 67 } 68 69 r = cdigit - '0'; 70 if (r < radix) 71 return r; 72 73 return -1U; 74 } 75 76 77 void APInt::initSlowCase(uint64_t val, bool isSigned) { 78 U.pVal = getClearedMemory(getNumWords()); 79 U.pVal[0] = val; 80 if (isSigned && int64_t(val) < 0) 81 for (unsigned i = 1; i < getNumWords(); ++i) 82 U.pVal[i] = WORDTYPE_MAX; 83 clearUnusedBits(); 84 } 85 86 void APInt::initSlowCase(const APInt& that) { 87 U.pVal = getMemory(getNumWords()); 88 memcpy(U.pVal, that.U.pVal, getNumWords() * APINT_WORD_SIZE); 89 } 90 91 void APInt::initFromArray(ArrayRef<uint64_t> bigVal) { 92 assert(BitWidth && "Bitwidth too small"); 93 assert(bigVal.data() && "Null pointer detected!"); 94 if (isSingleWord()) 95 U.VAL = bigVal[0]; 96 else { 97 // Get memory, cleared to 0 98 U.pVal = getClearedMemory(getNumWords()); 99 // Calculate the number of words to copy 100 unsigned words = std::min<unsigned>(bigVal.size(), getNumWords()); 101 // Copy the words from bigVal to pVal 102 memcpy(U.pVal, bigVal.data(), words * APINT_WORD_SIZE); 103 } 104 // Make sure unused high bits are cleared 105 clearUnusedBits(); 106 } 107 108 APInt::APInt(unsigned numBits, ArrayRef<uint64_t> bigVal) 109 : BitWidth(numBits) { 110 initFromArray(bigVal); 111 } 112 113 APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[]) 114 : BitWidth(numBits) { 115 initFromArray(makeArrayRef(bigVal, numWords)); 116 } 117 118 APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix) 119 : BitWidth(numbits) { 120 assert(BitWidth && "Bitwidth too small"); 121 fromString(numbits, Str, radix); 122 } 123 124 void APInt::reallocate(unsigned NewBitWidth) { 125 // If the number of words is the same we can just change the width and stop. 126 if (getNumWords() == getNumWords(NewBitWidth)) { 127 BitWidth = NewBitWidth; 128 return; 129 } 130 131 // If we have an allocation, delete it. 132 if (!isSingleWord()) 133 delete [] U.pVal; 134 135 // Update BitWidth. 136 BitWidth = NewBitWidth; 137 138 // If we are supposed to have an allocation, create it. 139 if (!isSingleWord()) 140 U.pVal = getMemory(getNumWords()); 141 } 142 143 void APInt::AssignSlowCase(const APInt& RHS) { 144 // Don't do anything for X = X 145 if (this == &RHS) 146 return; 147 148 // Adjust the bit width and handle allocations as necessary. 149 reallocate(RHS.getBitWidth()); 150 151 // Copy the data. 152 if (isSingleWord()) 153 U.VAL = RHS.U.VAL; 154 else 155 memcpy(U.pVal, RHS.U.pVal, getNumWords() * APINT_WORD_SIZE); 156 } 157 158 /// This method 'profiles' an APInt for use with FoldingSet. 159 void APInt::Profile(FoldingSetNodeID& ID) const { 160 ID.AddInteger(BitWidth); 161 162 if (isSingleWord()) { 163 ID.AddInteger(U.VAL); 164 return; 165 } 166 167 unsigned NumWords = getNumWords(); 168 for (unsigned i = 0; i < NumWords; ++i) 169 ID.AddInteger(U.pVal[i]); 170 } 171 172 /// Prefix increment operator. Increments the APInt by one. 173 APInt& APInt::operator++() { 174 if (isSingleWord()) 175 ++U.VAL; 176 else 177 tcIncrement(U.pVal, getNumWords()); 178 return clearUnusedBits(); 179 } 180 181 /// Prefix decrement operator. Decrements the APInt by one. 182 APInt& APInt::operator--() { 183 if (isSingleWord()) 184 --U.VAL; 185 else 186 tcDecrement(U.pVal, getNumWords()); 187 return clearUnusedBits(); 188 } 189 190 /// Adds the RHS APInt to this APInt. 191 /// @returns this, after addition of RHS. 192 /// Addition assignment operator. 193 APInt& APInt::operator+=(const APInt& RHS) { 194 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); 195 if (isSingleWord()) 196 U.VAL += RHS.U.VAL; 197 else 198 tcAdd(U.pVal, RHS.U.pVal, 0, getNumWords()); 199 return clearUnusedBits(); 200 } 201 202 APInt& APInt::operator+=(uint64_t RHS) { 203 if (isSingleWord()) 204 U.VAL += RHS; 205 else 206 tcAddPart(U.pVal, RHS, getNumWords()); 207 return clearUnusedBits(); 208 } 209 210 /// Subtracts the RHS APInt from this APInt 211 /// @returns this, after subtraction 212 /// Subtraction assignment operator. 213 APInt& APInt::operator-=(const APInt& RHS) { 214 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); 215 if (isSingleWord()) 216 U.VAL -= RHS.U.VAL; 217 else 218 tcSubtract(U.pVal, RHS.U.pVal, 0, getNumWords()); 219 return clearUnusedBits(); 220 } 221 222 APInt& APInt::operator-=(uint64_t RHS) { 223 if (isSingleWord()) 224 U.VAL -= RHS; 225 else 226 tcSubtractPart(U.pVal, RHS, getNumWords()); 227 return clearUnusedBits(); 228 } 229 230 APInt APInt::operator*(const APInt& RHS) const { 231 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); 232 if (isSingleWord()) 233 return APInt(BitWidth, U.VAL * RHS.U.VAL); 234 235 APInt Result(getMemory(getNumWords()), getBitWidth()); 236 237 tcMultiply(Result.U.pVal, U.pVal, RHS.U.pVal, getNumWords()); 238 239 Result.clearUnusedBits(); 240 return Result; 241 } 242 243 void APInt::AndAssignSlowCase(const APInt& RHS) { 244 tcAnd(U.pVal, RHS.U.pVal, getNumWords()); 245 } 246 247 void APInt::OrAssignSlowCase(const APInt& RHS) { 248 tcOr(U.pVal, RHS.U.pVal, getNumWords()); 249 } 250 251 void APInt::XorAssignSlowCase(const APInt& RHS) { 252 tcXor(U.pVal, RHS.U.pVal, getNumWords()); 253 } 254 255 APInt& APInt::operator*=(const APInt& RHS) { 256 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); 257 *this = *this * RHS; 258 return *this; 259 } 260 261 APInt& APInt::operator*=(uint64_t RHS) { 262 if (isSingleWord()) { 263 U.VAL *= RHS; 264 } else { 265 unsigned NumWords = getNumWords(); 266 tcMultiplyPart(U.pVal, U.pVal, RHS, 0, NumWords, NumWords, false); 267 } 268 return clearUnusedBits(); 269 } 270 271 bool APInt::EqualSlowCase(const APInt& RHS) const { 272 return std::equal(U.pVal, U.pVal + getNumWords(), RHS.U.pVal); 273 } 274 275 int APInt::compare(const APInt& RHS) const { 276 assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison"); 277 if (isSingleWord()) 278 return U.VAL < RHS.U.VAL ? -1 : U.VAL > RHS.U.VAL; 279 280 return tcCompare(U.pVal, RHS.U.pVal, getNumWords()); 281 } 282 283 int APInt::compareSigned(const APInt& RHS) const { 284 assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison"); 285 if (isSingleWord()) { 286 int64_t lhsSext = SignExtend64(U.VAL, BitWidth); 287 int64_t rhsSext = SignExtend64(RHS.U.VAL, BitWidth); 288 return lhsSext < rhsSext ? -1 : lhsSext > rhsSext; 289 } 290 291 bool lhsNeg = isNegative(); 292 bool rhsNeg = RHS.isNegative(); 293 294 // If the sign bits don't match, then (LHS < RHS) if LHS is negative 295 if (lhsNeg != rhsNeg) 296 return lhsNeg ? -1 : 1; 297 298 // Otherwise we can just use an unsigned comparison, because even negative 299 // numbers compare correctly this way if both have the same signed-ness. 300 return tcCompare(U.pVal, RHS.U.pVal, getNumWords()); 301 } 302 303 void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) { 304 unsigned loWord = whichWord(loBit); 305 unsigned hiWord = whichWord(hiBit); 306 307 // Create an initial mask for the low word with zeros below loBit. 308 uint64_t loMask = WORDTYPE_MAX << whichBit(loBit); 309 310 // If hiBit is not aligned, we need a high mask. 311 unsigned hiShiftAmt = whichBit(hiBit); 312 if (hiShiftAmt != 0) { 313 // Create a high mask with zeros above hiBit. 314 uint64_t hiMask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - hiShiftAmt); 315 // If loWord and hiWord are equal, then we combine the masks. Otherwise, 316 // set the bits in hiWord. 317 if (hiWord == loWord) 318 loMask &= hiMask; 319 else 320 U.pVal[hiWord] |= hiMask; 321 } 322 // Apply the mask to the low word. 323 U.pVal[loWord] |= loMask; 324 325 // Fill any words between loWord and hiWord with all ones. 326 for (unsigned word = loWord + 1; word < hiWord; ++word) 327 U.pVal[word] = WORDTYPE_MAX; 328 } 329 330 /// Toggle every bit to its opposite value. 331 void APInt::flipAllBitsSlowCase() { 332 tcComplement(U.pVal, getNumWords()); 333 clearUnusedBits(); 334 } 335 336 /// Toggle a given bit to its opposite value whose position is given 337 /// as "bitPosition". 338 /// Toggles a given bit to its opposite value. 339 void APInt::flipBit(unsigned bitPosition) { 340 assert(bitPosition < BitWidth && "Out of the bit-width range!"); 341 if ((*this)[bitPosition]) clearBit(bitPosition); 342 else setBit(bitPosition); 343 } 344 345 void APInt::insertBits(const APInt &subBits, unsigned bitPosition) { 346 unsigned subBitWidth = subBits.getBitWidth(); 347 assert(0 < subBitWidth && (subBitWidth + bitPosition) <= BitWidth && 348 "Illegal bit insertion"); 349 350 // Insertion is a direct copy. 351 if (subBitWidth == BitWidth) { 352 *this = subBits; 353 return; 354 } 355 356 // Single word result can be done as a direct bitmask. 357 if (isSingleWord()) { 358 uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth); 359 U.VAL &= ~(mask << bitPosition); 360 U.VAL |= (subBits.U.VAL << bitPosition); 361 return; 362 } 363 364 unsigned loBit = whichBit(bitPosition); 365 unsigned loWord = whichWord(bitPosition); 366 unsigned hi1Word = whichWord(bitPosition + subBitWidth - 1); 367 368 // Insertion within a single word can be done as a direct bitmask. 369 if (loWord == hi1Word) { 370 uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth); 371 U.pVal[loWord] &= ~(mask << loBit); 372 U.pVal[loWord] |= (subBits.U.VAL << loBit); 373 return; 374 } 375 376 // Insert on word boundaries. 377 if (loBit == 0) { 378 // Direct copy whole words. 379 unsigned numWholeSubWords = subBitWidth / APINT_BITS_PER_WORD; 380 memcpy(U.pVal + loWord, subBits.getRawData(), 381 numWholeSubWords * APINT_WORD_SIZE); 382 383 // Mask+insert remaining bits. 384 unsigned remainingBits = subBitWidth % APINT_BITS_PER_WORD; 385 if (remainingBits != 0) { 386 uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - remainingBits); 387 U.pVal[hi1Word] &= ~mask; 388 U.pVal[hi1Word] |= subBits.getWord(subBitWidth - 1); 389 } 390 return; 391 } 392 393 // General case - set/clear individual bits in dst based on src. 394 // TODO - there is scope for optimization here, but at the moment this code 395 // path is barely used so prefer readability over performance. 396 for (unsigned i = 0; i != subBitWidth; ++i) { 397 if (subBits[i]) 398 setBit(bitPosition + i); 399 else 400 clearBit(bitPosition + i); 401 } 402 } 403 404 void APInt::insertBits(uint64_t subBits, unsigned bitPosition, unsigned numBits) { 405 uint64_t maskBits = maskTrailingOnes<uint64_t>(numBits); 406 subBits &= maskBits; 407 if (isSingleWord()) { 408 U.VAL &= ~(maskBits << bitPosition); 409 U.VAL |= subBits << bitPosition; 410 return; 411 } 412 413 unsigned loBit = whichBit(bitPosition); 414 unsigned loWord = whichWord(bitPosition); 415 unsigned hiWord = whichWord(bitPosition + numBits - 1); 416 if (loWord == hiWord) { 417 U.pVal[loWord] &= ~(maskBits << loBit); 418 U.pVal[loWord] |= subBits << loBit; 419 return; 420 } 421 422 static_assert(8 * sizeof(WordType) <= 64, "This code assumes only two words affected"); 423 unsigned wordBits = 8 * sizeof(WordType); 424 U.pVal[loWord] &= ~(maskBits << loBit); 425 U.pVal[loWord] |= subBits << loBit; 426 427 U.pVal[hiWord] &= ~(maskBits >> (wordBits - loBit)); 428 U.pVal[hiWord] |= subBits >> (wordBits - loBit); 429 } 430 431 APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const { 432 assert(numBits > 0 && "Can't extract zero bits"); 433 assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth && 434 "Illegal bit extraction"); 435 436 if (isSingleWord()) 437 return APInt(numBits, U.VAL >> bitPosition); 438 439 unsigned loBit = whichBit(bitPosition); 440 unsigned loWord = whichWord(bitPosition); 441 unsigned hiWord = whichWord(bitPosition + numBits - 1); 442 443 // Single word result extracting bits from a single word source. 444 if (loWord == hiWord) 445 return APInt(numBits, U.pVal[loWord] >> loBit); 446 447 // Extracting bits that start on a source word boundary can be done 448 // as a fast memory copy. 449 if (loBit == 0) 450 return APInt(numBits, makeArrayRef(U.pVal + loWord, 1 + hiWord - loWord)); 451 452 // General case - shift + copy source words directly into place. 453 APInt Result(numBits, 0); 454 unsigned NumSrcWords = getNumWords(); 455 unsigned NumDstWords = Result.getNumWords(); 456 457 uint64_t *DestPtr = Result.isSingleWord() ? &Result.U.VAL : Result.U.pVal; 458 for (unsigned word = 0; word < NumDstWords; ++word) { 459 uint64_t w0 = U.pVal[loWord + word]; 460 uint64_t w1 = 461 (loWord + word + 1) < NumSrcWords ? U.pVal[loWord + word + 1] : 0; 462 DestPtr[word] = (w0 >> loBit) | (w1 << (APINT_BITS_PER_WORD - loBit)); 463 } 464 465 return Result.clearUnusedBits(); 466 } 467 468 uint64_t APInt::extractBitsAsZExtValue(unsigned numBits, 469 unsigned bitPosition) const { 470 assert(numBits > 0 && "Can't extract zero bits"); 471 assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth && 472 "Illegal bit extraction"); 473 assert(numBits <= 64 && "Illegal bit extraction"); 474 475 uint64_t maskBits = maskTrailingOnes<uint64_t>(numBits); 476 if (isSingleWord()) 477 return (U.VAL >> bitPosition) & maskBits; 478 479 unsigned loBit = whichBit(bitPosition); 480 unsigned loWord = whichWord(bitPosition); 481 unsigned hiWord = whichWord(bitPosition + numBits - 1); 482 if (loWord == hiWord) 483 return (U.pVal[loWord] >> loBit) & maskBits; 484 485 static_assert(8 * sizeof(WordType) <= 64, "This code assumes only two words affected"); 486 unsigned wordBits = 8 * sizeof(WordType); 487 uint64_t retBits = U.pVal[loWord] >> loBit; 488 retBits |= U.pVal[hiWord] << (wordBits - loBit); 489 retBits &= maskBits; 490 return retBits; 491 } 492 493 unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) { 494 assert(!str.empty() && "Invalid string length"); 495 assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 || 496 radix == 36) && 497 "Radix should be 2, 8, 10, 16, or 36!"); 498 499 size_t slen = str.size(); 500 501 // Each computation below needs to know if it's negative. 502 StringRef::iterator p = str.begin(); 503 unsigned isNegative = *p == '-'; 504 if (*p == '-' || *p == '+') { 505 p++; 506 slen--; 507 assert(slen && "String is only a sign, needs a value."); 508 } 509 510 // For radixes of power-of-two values, the bits required is accurately and 511 // easily computed 512 if (radix == 2) 513 return slen + isNegative; 514 if (radix == 8) 515 return slen * 3 + isNegative; 516 if (radix == 16) 517 return slen * 4 + isNegative; 518 519 // FIXME: base 36 520 521 // This is grossly inefficient but accurate. We could probably do something 522 // with a computation of roughly slen*64/20 and then adjust by the value of 523 // the first few digits. But, I'm not sure how accurate that could be. 524 525 // Compute a sufficient number of bits that is always large enough but might 526 // be too large. This avoids the assertion in the constructor. This 527 // calculation doesn't work appropriately for the numbers 0-9, so just use 4 528 // bits in that case. 529 unsigned sufficient 530 = radix == 10? (slen == 1 ? 4 : slen * 64/18) 531 : (slen == 1 ? 7 : slen * 16/3); 532 533 // Convert to the actual binary value. 534 APInt tmp(sufficient, StringRef(p, slen), radix); 535 536 // Compute how many bits are required. If the log is infinite, assume we need 537 // just bit. If the log is exact and value is negative, then the value is 538 // MinSignedValue with (log + 1) bits. 539 unsigned log = tmp.logBase2(); 540 if (log == (unsigned)-1) { 541 return isNegative + 1; 542 } else if (isNegative && tmp.isPowerOf2()) { 543 return isNegative + log; 544 } else { 545 return isNegative + log + 1; 546 } 547 } 548 549 hash_code llvm::hash_value(const APInt &Arg) { 550 if (Arg.isSingleWord()) 551 return hash_combine(Arg.BitWidth, Arg.U.VAL); 552 553 return hash_combine( 554 Arg.BitWidth, 555 hash_combine_range(Arg.U.pVal, Arg.U.pVal + Arg.getNumWords())); 556 } 557 558 bool APInt::isSplat(unsigned SplatSizeInBits) const { 559 assert(getBitWidth() % SplatSizeInBits == 0 && 560 "SplatSizeInBits must divide width!"); 561 // We can check that all parts of an integer are equal by making use of a 562 // little trick: rotate and check if it's still the same value. 563 return *this == rotl(SplatSizeInBits); 564 } 565 566 /// This function returns the high "numBits" bits of this APInt. 567 APInt APInt::getHiBits(unsigned numBits) const { 568 return this->lshr(BitWidth - numBits); 569 } 570 571 /// This function returns the low "numBits" bits of this APInt. 572 APInt APInt::getLoBits(unsigned numBits) const { 573 APInt Result(getLowBitsSet(BitWidth, numBits)); 574 Result &= *this; 575 return Result; 576 } 577 578 /// Return a value containing V broadcasted over NewLen bits. 579 APInt APInt::getSplat(unsigned NewLen, const APInt &V) { 580 assert(NewLen >= V.getBitWidth() && "Can't splat to smaller bit width!"); 581 582 APInt Val = V.zextOrSelf(NewLen); 583 for (unsigned I = V.getBitWidth(); I < NewLen; I <<= 1) 584 Val |= Val << I; 585 586 return Val; 587 } 588 589 unsigned APInt::countLeadingZerosSlowCase() const { 590 unsigned Count = 0; 591 for (int i = getNumWords()-1; i >= 0; --i) { 592 uint64_t V = U.pVal[i]; 593 if (V == 0) 594 Count += APINT_BITS_PER_WORD; 595 else { 596 Count += llvm::countLeadingZeros(V); 597 break; 598 } 599 } 600 // Adjust for unused bits in the most significant word (they are zero). 601 unsigned Mod = BitWidth % APINT_BITS_PER_WORD; 602 Count -= Mod > 0 ? APINT_BITS_PER_WORD - Mod : 0; 603 return Count; 604 } 605 606 unsigned APInt::countLeadingOnesSlowCase() const { 607 unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD; 608 unsigned shift; 609 if (!highWordBits) { 610 highWordBits = APINT_BITS_PER_WORD; 611 shift = 0; 612 } else { 613 shift = APINT_BITS_PER_WORD - highWordBits; 614 } 615 int i = getNumWords() - 1; 616 unsigned Count = llvm::countLeadingOnes(U.pVal[i] << shift); 617 if (Count == highWordBits) { 618 for (i--; i >= 0; --i) { 619 if (U.pVal[i] == WORDTYPE_MAX) 620 Count += APINT_BITS_PER_WORD; 621 else { 622 Count += llvm::countLeadingOnes(U.pVal[i]); 623 break; 624 } 625 } 626 } 627 return Count; 628 } 629 630 unsigned APInt::countTrailingZerosSlowCase() const { 631 unsigned Count = 0; 632 unsigned i = 0; 633 for (; i < getNumWords() && U.pVal[i] == 0; ++i) 634 Count += APINT_BITS_PER_WORD; 635 if (i < getNumWords()) 636 Count += llvm::countTrailingZeros(U.pVal[i]); 637 return std::min(Count, BitWidth); 638 } 639 640 unsigned APInt::countTrailingOnesSlowCase() const { 641 unsigned Count = 0; 642 unsigned i = 0; 643 for (; i < getNumWords() && U.pVal[i] == WORDTYPE_MAX; ++i) 644 Count += APINT_BITS_PER_WORD; 645 if (i < getNumWords()) 646 Count += llvm::countTrailingOnes(U.pVal[i]); 647 assert(Count <= BitWidth); 648 return Count; 649 } 650 651 unsigned APInt::countPopulationSlowCase() const { 652 unsigned Count = 0; 653 for (unsigned i = 0; i < getNumWords(); ++i) 654 Count += llvm::countPopulation(U.pVal[i]); 655 return Count; 656 } 657 658 bool APInt::intersectsSlowCase(const APInt &RHS) const { 659 for (unsigned i = 0, e = getNumWords(); i != e; ++i) 660 if ((U.pVal[i] & RHS.U.pVal[i]) != 0) 661 return true; 662 663 return false; 664 } 665 666 bool APInt::isSubsetOfSlowCase(const APInt &RHS) const { 667 for (unsigned i = 0, e = getNumWords(); i != e; ++i) 668 if ((U.pVal[i] & ~RHS.U.pVal[i]) != 0) 669 return false; 670 671 return true; 672 } 673 674 APInt APInt::byteSwap() const { 675 assert(BitWidth >= 16 && BitWidth % 8 == 0 && "Cannot byteswap!"); 676 if (BitWidth == 16) 677 return APInt(BitWidth, ByteSwap_16(uint16_t(U.VAL))); 678 if (BitWidth == 32) 679 return APInt(BitWidth, ByteSwap_32(unsigned(U.VAL))); 680 if (BitWidth <= 64) { 681 uint64_t Tmp1 = ByteSwap_64(U.VAL); 682 Tmp1 >>= (64 - BitWidth); 683 return APInt(BitWidth, Tmp1); 684 } 685 686 APInt Result(getNumWords() * APINT_BITS_PER_WORD, 0); 687 for (unsigned I = 0, N = getNumWords(); I != N; ++I) 688 Result.U.pVal[I] = ByteSwap_64(U.pVal[N - I - 1]); 689 if (Result.BitWidth != BitWidth) { 690 Result.lshrInPlace(Result.BitWidth - BitWidth); 691 Result.BitWidth = BitWidth; 692 } 693 return Result; 694 } 695 696 APInt APInt::reverseBits() const { 697 switch (BitWidth) { 698 case 64: 699 return APInt(BitWidth, llvm::reverseBits<uint64_t>(U.VAL)); 700 case 32: 701 return APInt(BitWidth, llvm::reverseBits<uint32_t>(U.VAL)); 702 case 16: 703 return APInt(BitWidth, llvm::reverseBits<uint16_t>(U.VAL)); 704 case 8: 705 return APInt(BitWidth, llvm::reverseBits<uint8_t>(U.VAL)); 706 default: 707 break; 708 } 709 710 APInt Val(*this); 711 APInt Reversed(BitWidth, 0); 712 unsigned S = BitWidth; 713 714 for (; Val != 0; Val.lshrInPlace(1)) { 715 Reversed <<= 1; 716 Reversed |= Val[0]; 717 --S; 718 } 719 720 Reversed <<= S; 721 return Reversed; 722 } 723 724 APInt llvm::APIntOps::GreatestCommonDivisor(APInt A, APInt B) { 725 // Fast-path a common case. 726 if (A == B) return A; 727 728 // Corner cases: if either operand is zero, the other is the gcd. 729 if (!A) return B; 730 if (!B) return A; 731 732 // Count common powers of 2 and remove all other powers of 2. 733 unsigned Pow2; 734 { 735 unsigned Pow2_A = A.countTrailingZeros(); 736 unsigned Pow2_B = B.countTrailingZeros(); 737 if (Pow2_A > Pow2_B) { 738 A.lshrInPlace(Pow2_A - Pow2_B); 739 Pow2 = Pow2_B; 740 } else if (Pow2_B > Pow2_A) { 741 B.lshrInPlace(Pow2_B - Pow2_A); 742 Pow2 = Pow2_A; 743 } else { 744 Pow2 = Pow2_A; 745 } 746 } 747 748 // Both operands are odd multiples of 2^Pow_2: 749 // 750 // gcd(a, b) = gcd(|a - b| / 2^i, min(a, b)) 751 // 752 // This is a modified version of Stein's algorithm, taking advantage of 753 // efficient countTrailingZeros(). 754 while (A != B) { 755 if (A.ugt(B)) { 756 A -= B; 757 A.lshrInPlace(A.countTrailingZeros() - Pow2); 758 } else { 759 B -= A; 760 B.lshrInPlace(B.countTrailingZeros() - Pow2); 761 } 762 } 763 764 return A; 765 } 766 767 APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) { 768 uint64_t I = bit_cast<uint64_t>(Double); 769 770 // Get the sign bit from the highest order bit 771 bool isNeg = I >> 63; 772 773 // Get the 11-bit exponent and adjust for the 1023 bit bias 774 int64_t exp = ((I >> 52) & 0x7ff) - 1023; 775 776 // If the exponent is negative, the value is < 0 so just return 0. 777 if (exp < 0) 778 return APInt(width, 0u); 779 780 // Extract the mantissa by clearing the top 12 bits (sign + exponent). 781 uint64_t mantissa = (I & (~0ULL >> 12)) | 1ULL << 52; 782 783 // If the exponent doesn't shift all bits out of the mantissa 784 if (exp < 52) 785 return isNeg ? -APInt(width, mantissa >> (52 - exp)) : 786 APInt(width, mantissa >> (52 - exp)); 787 788 // If the client didn't provide enough bits for us to shift the mantissa into 789 // then the result is undefined, just return 0 790 if (width <= exp - 52) 791 return APInt(width, 0); 792 793 // Otherwise, we have to shift the mantissa bits up to the right location 794 APInt Tmp(width, mantissa); 795 Tmp <<= (unsigned)exp - 52; 796 return isNeg ? -Tmp : Tmp; 797 } 798 799 /// This function converts this APInt to a double. 800 /// The layout for double is as following (IEEE Standard 754): 801 /// -------------------------------------- 802 /// | Sign Exponent Fraction Bias | 803 /// |-------------------------------------- | 804 /// | 1[63] 11[62-52] 52[51-00] 1023 | 805 /// -------------------------------------- 806 double APInt::roundToDouble(bool isSigned) const { 807 808 // Handle the simple case where the value is contained in one uint64_t. 809 // It is wrong to optimize getWord(0) to VAL; there might be more than one word. 810 if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) { 811 if (isSigned) { 812 int64_t sext = SignExtend64(getWord(0), BitWidth); 813 return double(sext); 814 } else 815 return double(getWord(0)); 816 } 817 818 // Determine if the value is negative. 819 bool isNeg = isSigned ? (*this)[BitWidth-1] : false; 820 821 // Construct the absolute value if we're negative. 822 APInt Tmp(isNeg ? -(*this) : (*this)); 823 824 // Figure out how many bits we're using. 825 unsigned n = Tmp.getActiveBits(); 826 827 // The exponent (without bias normalization) is just the number of bits 828 // we are using. Note that the sign bit is gone since we constructed the 829 // absolute value. 830 uint64_t exp = n; 831 832 // Return infinity for exponent overflow 833 if (exp > 1023) { 834 if (!isSigned || !isNeg) 835 return std::numeric_limits<double>::infinity(); 836 else 837 return -std::numeric_limits<double>::infinity(); 838 } 839 exp += 1023; // Increment for 1023 bias 840 841 // Number of bits in mantissa is 52. To obtain the mantissa value, we must 842 // extract the high 52 bits from the correct words in pVal. 843 uint64_t mantissa; 844 unsigned hiWord = whichWord(n-1); 845 if (hiWord == 0) { 846 mantissa = Tmp.U.pVal[0]; 847 if (n > 52) 848 mantissa >>= n - 52; // shift down, we want the top 52 bits. 849 } else { 850 assert(hiWord > 0 && "huh?"); 851 uint64_t hibits = Tmp.U.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD); 852 uint64_t lobits = Tmp.U.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD); 853 mantissa = hibits | lobits; 854 } 855 856 // The leading bit of mantissa is implicit, so get rid of it. 857 uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0; 858 uint64_t I = sign | (exp << 52) | mantissa; 859 return bit_cast<double>(I); 860 } 861 862 // Truncate to new width. 863 APInt APInt::trunc(unsigned width) const { 864 assert(width < BitWidth && "Invalid APInt Truncate request"); 865 assert(width && "Can't truncate to 0 bits"); 866 867 if (width <= APINT_BITS_PER_WORD) 868 return APInt(width, getRawData()[0]); 869 870 APInt Result(getMemory(getNumWords(width)), width); 871 872 // Copy full words. 873 unsigned i; 874 for (i = 0; i != width / APINT_BITS_PER_WORD; i++) 875 Result.U.pVal[i] = U.pVal[i]; 876 877 // Truncate and copy any partial word. 878 unsigned bits = (0 - width) % APINT_BITS_PER_WORD; 879 if (bits != 0) 880 Result.U.pVal[i] = U.pVal[i] << bits >> bits; 881 882 return Result; 883 } 884 885 // Truncate to new width with unsigned saturation. 886 APInt APInt::truncUSat(unsigned width) const { 887 assert(width < BitWidth && "Invalid APInt Truncate request"); 888 assert(width && "Can't truncate to 0 bits"); 889 890 // Can we just losslessly truncate it? 891 if (isIntN(width)) 892 return trunc(width); 893 // If not, then just return the new limit. 894 return APInt::getMaxValue(width); 895 } 896 897 // Truncate to new width with signed saturation. 898 APInt APInt::truncSSat(unsigned width) const { 899 assert(width < BitWidth && "Invalid APInt Truncate request"); 900 assert(width && "Can't truncate to 0 bits"); 901 902 // Can we just losslessly truncate it? 903 if (isSignedIntN(width)) 904 return trunc(width); 905 // If not, then just return the new limits. 906 return isNegative() ? APInt::getSignedMinValue(width) 907 : APInt::getSignedMaxValue(width); 908 } 909 910 // Sign extend to a new width. 911 APInt APInt::sext(unsigned Width) const { 912 assert(Width > BitWidth && "Invalid APInt SignExtend request"); 913 914 if (Width <= APINT_BITS_PER_WORD) 915 return APInt(Width, SignExtend64(U.VAL, BitWidth)); 916 917 APInt Result(getMemory(getNumWords(Width)), Width); 918 919 // Copy words. 920 std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE); 921 922 // Sign extend the last word since there may be unused bits in the input. 923 Result.U.pVal[getNumWords() - 1] = 924 SignExtend64(Result.U.pVal[getNumWords() - 1], 925 ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1); 926 927 // Fill with sign bits. 928 std::memset(Result.U.pVal + getNumWords(), isNegative() ? -1 : 0, 929 (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE); 930 Result.clearUnusedBits(); 931 return Result; 932 } 933 934 // Zero extend to a new width. 935 APInt APInt::zext(unsigned width) const { 936 assert(width > BitWidth && "Invalid APInt ZeroExtend request"); 937 938 if (width <= APINT_BITS_PER_WORD) 939 return APInt(width, U.VAL); 940 941 APInt Result(getMemory(getNumWords(width)), width); 942 943 // Copy words. 944 std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE); 945 946 // Zero remaining words. 947 std::memset(Result.U.pVal + getNumWords(), 0, 948 (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE); 949 950 return Result; 951 } 952 953 APInt APInt::zextOrTrunc(unsigned width) const { 954 if (BitWidth < width) 955 return zext(width); 956 if (BitWidth > width) 957 return trunc(width); 958 return *this; 959 } 960 961 APInt APInt::sextOrTrunc(unsigned width) const { 962 if (BitWidth < width) 963 return sext(width); 964 if (BitWidth > width) 965 return trunc(width); 966 return *this; 967 } 968 969 APInt APInt::zextOrSelf(unsigned width) const { 970 if (BitWidth < width) 971 return zext(width); 972 return *this; 973 } 974 975 APInt APInt::sextOrSelf(unsigned width) const { 976 if (BitWidth < width) 977 return sext(width); 978 return *this; 979 } 980 981 /// Arithmetic right-shift this APInt by shiftAmt. 982 /// Arithmetic right-shift function. 983 void APInt::ashrInPlace(const APInt &shiftAmt) { 984 ashrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth)); 985 } 986 987 /// Arithmetic right-shift this APInt by shiftAmt. 988 /// Arithmetic right-shift function. 989 void APInt::ashrSlowCase(unsigned ShiftAmt) { 990 // Don't bother performing a no-op shift. 991 if (!ShiftAmt) 992 return; 993 994 // Save the original sign bit for later. 995 bool Negative = isNegative(); 996 997 // WordShift is the inter-part shift; BitShift is intra-part shift. 998 unsigned WordShift = ShiftAmt / APINT_BITS_PER_WORD; 999 unsigned BitShift = ShiftAmt % APINT_BITS_PER_WORD; 1000 1001 unsigned WordsToMove = getNumWords() - WordShift; 1002 if (WordsToMove != 0) { 1003 // Sign extend the last word to fill in the unused bits. 1004 U.pVal[getNumWords() - 1] = SignExtend64( 1005 U.pVal[getNumWords() - 1], ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1); 1006 1007 // Fastpath for moving by whole words. 1008 if (BitShift == 0) { 1009 std::memmove(U.pVal, U.pVal + WordShift, WordsToMove * APINT_WORD_SIZE); 1010 } else { 1011 // Move the words containing significant bits. 1012 for (unsigned i = 0; i != WordsToMove - 1; ++i) 1013 U.pVal[i] = (U.pVal[i + WordShift] >> BitShift) | 1014 (U.pVal[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift)); 1015 1016 // Handle the last word which has no high bits to copy. 1017 U.pVal[WordsToMove - 1] = U.pVal[WordShift + WordsToMove - 1] >> BitShift; 1018 // Sign extend one more time. 1019 U.pVal[WordsToMove - 1] = 1020 SignExtend64(U.pVal[WordsToMove - 1], APINT_BITS_PER_WORD - BitShift); 1021 } 1022 } 1023 1024 // Fill in the remainder based on the original sign. 1025 std::memset(U.pVal + WordsToMove, Negative ? -1 : 0, 1026 WordShift * APINT_WORD_SIZE); 1027 clearUnusedBits(); 1028 } 1029 1030 /// Logical right-shift this APInt by shiftAmt. 1031 /// Logical right-shift function. 1032 void APInt::lshrInPlace(const APInt &shiftAmt) { 1033 lshrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth)); 1034 } 1035 1036 /// Logical right-shift this APInt by shiftAmt. 1037 /// Logical right-shift function. 1038 void APInt::lshrSlowCase(unsigned ShiftAmt) { 1039 tcShiftRight(U.pVal, getNumWords(), ShiftAmt); 1040 } 1041 1042 /// Left-shift this APInt by shiftAmt. 1043 /// Left-shift function. 1044 APInt &APInt::operator<<=(const APInt &shiftAmt) { 1045 // It's undefined behavior in C to shift by BitWidth or greater. 1046 *this <<= (unsigned)shiftAmt.getLimitedValue(BitWidth); 1047 return *this; 1048 } 1049 1050 void APInt::shlSlowCase(unsigned ShiftAmt) { 1051 tcShiftLeft(U.pVal, getNumWords(), ShiftAmt); 1052 clearUnusedBits(); 1053 } 1054 1055 // Calculate the rotate amount modulo the bit width. 1056 static unsigned rotateModulo(unsigned BitWidth, const APInt &rotateAmt) { 1057 unsigned rotBitWidth = rotateAmt.getBitWidth(); 1058 APInt rot = rotateAmt; 1059 if (rotBitWidth < BitWidth) { 1060 // Extend the rotate APInt, so that the urem doesn't divide by 0. 1061 // e.g. APInt(1, 32) would give APInt(1, 0). 1062 rot = rotateAmt.zext(BitWidth); 1063 } 1064 rot = rot.urem(APInt(rot.getBitWidth(), BitWidth)); 1065 return rot.getLimitedValue(BitWidth); 1066 } 1067 1068 APInt APInt::rotl(const APInt &rotateAmt) const { 1069 return rotl(rotateModulo(BitWidth, rotateAmt)); 1070 } 1071 1072 APInt APInt::rotl(unsigned rotateAmt) const { 1073 rotateAmt %= BitWidth; 1074 if (rotateAmt == 0) 1075 return *this; 1076 return shl(rotateAmt) | lshr(BitWidth - rotateAmt); 1077 } 1078 1079 APInt APInt::rotr(const APInt &rotateAmt) const { 1080 return rotr(rotateModulo(BitWidth, rotateAmt)); 1081 } 1082 1083 APInt APInt::rotr(unsigned rotateAmt) const { 1084 rotateAmt %= BitWidth; 1085 if (rotateAmt == 0) 1086 return *this; 1087 return lshr(rotateAmt) | shl(BitWidth - rotateAmt); 1088 } 1089 1090 // Square Root - this method computes and returns the square root of "this". 1091 // Three mechanisms are used for computation. For small values (<= 5 bits), 1092 // a table lookup is done. This gets some performance for common cases. For 1093 // values using less than 52 bits, the value is converted to double and then 1094 // the libc sqrt function is called. The result is rounded and then converted 1095 // back to a uint64_t which is then used to construct the result. Finally, 1096 // the Babylonian method for computing square roots is used. 1097 APInt APInt::sqrt() const { 1098 1099 // Determine the magnitude of the value. 1100 unsigned magnitude = getActiveBits(); 1101 1102 // Use a fast table for some small values. This also gets rid of some 1103 // rounding errors in libc sqrt for small values. 1104 if (magnitude <= 5) { 1105 static const uint8_t results[32] = { 1106 /* 0 */ 0, 1107 /* 1- 2 */ 1, 1, 1108 /* 3- 6 */ 2, 2, 2, 2, 1109 /* 7-12 */ 3, 3, 3, 3, 3, 3, 1110 /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4, 1111 /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1112 /* 31 */ 6 1113 }; 1114 return APInt(BitWidth, results[ (isSingleWord() ? U.VAL : U.pVal[0]) ]); 1115 } 1116 1117 // If the magnitude of the value fits in less than 52 bits (the precision of 1118 // an IEEE double precision floating point value), then we can use the 1119 // libc sqrt function which will probably use a hardware sqrt computation. 1120 // This should be faster than the algorithm below. 1121 if (magnitude < 52) { 1122 return APInt(BitWidth, 1123 uint64_t(::round(::sqrt(double(isSingleWord() ? U.VAL 1124 : U.pVal[0]))))); 1125 } 1126 1127 // Okay, all the short cuts are exhausted. We must compute it. The following 1128 // is a classical Babylonian method for computing the square root. This code 1129 // was adapted to APInt from a wikipedia article on such computations. 1130 // See http://www.wikipedia.org/ and go to the page named 1131 // Calculate_an_integer_square_root. 1132 unsigned nbits = BitWidth, i = 4; 1133 APInt testy(BitWidth, 16); 1134 APInt x_old(BitWidth, 1); 1135 APInt x_new(BitWidth, 0); 1136 APInt two(BitWidth, 2); 1137 1138 // Select a good starting value using binary logarithms. 1139 for (;; i += 2, testy = testy.shl(2)) 1140 if (i >= nbits || this->ule(testy)) { 1141 x_old = x_old.shl(i / 2); 1142 break; 1143 } 1144 1145 // Use the Babylonian method to arrive at the integer square root: 1146 for (;;) { 1147 x_new = (this->udiv(x_old) + x_old).udiv(two); 1148 if (x_old.ule(x_new)) 1149 break; 1150 x_old = x_new; 1151 } 1152 1153 // Make sure we return the closest approximation 1154 // NOTE: The rounding calculation below is correct. It will produce an 1155 // off-by-one discrepancy with results from pari/gp. That discrepancy has been 1156 // determined to be a rounding issue with pari/gp as it begins to use a 1157 // floating point representation after 192 bits. There are no discrepancies 1158 // between this algorithm and pari/gp for bit widths < 192 bits. 1159 APInt square(x_old * x_old); 1160 APInt nextSquare((x_old + 1) * (x_old +1)); 1161 if (this->ult(square)) 1162 return x_old; 1163 assert(this->ule(nextSquare) && "Error in APInt::sqrt computation"); 1164 APInt midpoint((nextSquare - square).udiv(two)); 1165 APInt offset(*this - square); 1166 if (offset.ult(midpoint)) 1167 return x_old; 1168 return x_old + 1; 1169 } 1170 1171 /// Computes the multiplicative inverse of this APInt for a given modulo. The 1172 /// iterative extended Euclidean algorithm is used to solve for this value, 1173 /// however we simplify it to speed up calculating only the inverse, and take 1174 /// advantage of div+rem calculations. We also use some tricks to avoid copying 1175 /// (potentially large) APInts around. 1176 /// WARNING: a value of '0' may be returned, 1177 /// signifying that no multiplicative inverse exists! 1178 APInt APInt::multiplicativeInverse(const APInt& modulo) const { 1179 assert(ult(modulo) && "This APInt must be smaller than the modulo"); 1180 1181 // Using the properties listed at the following web page (accessed 06/21/08): 1182 // http://www.numbertheory.org/php/euclid.html 1183 // (especially the properties numbered 3, 4 and 9) it can be proved that 1184 // BitWidth bits suffice for all the computations in the algorithm implemented 1185 // below. More precisely, this number of bits suffice if the multiplicative 1186 // inverse exists, but may not suffice for the general extended Euclidean 1187 // algorithm. 1188 1189 APInt r[2] = { modulo, *this }; 1190 APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) }; 1191 APInt q(BitWidth, 0); 1192 1193 unsigned i; 1194 for (i = 0; r[i^1] != 0; i ^= 1) { 1195 // An overview of the math without the confusing bit-flipping: 1196 // q = r[i-2] / r[i-1] 1197 // r[i] = r[i-2] % r[i-1] 1198 // t[i] = t[i-2] - t[i-1] * q 1199 udivrem(r[i], r[i^1], q, r[i]); 1200 t[i] -= t[i^1] * q; 1201 } 1202 1203 // If this APInt and the modulo are not coprime, there is no multiplicative 1204 // inverse, so return 0. We check this by looking at the next-to-last 1205 // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean 1206 // algorithm. 1207 if (r[i] != 1) 1208 return APInt(BitWidth, 0); 1209 1210 // The next-to-last t is the multiplicative inverse. However, we are 1211 // interested in a positive inverse. Calculate a positive one from a negative 1212 // one if necessary. A simple addition of the modulo suffices because 1213 // abs(t[i]) is known to be less than *this/2 (see the link above). 1214 if (t[i].isNegative()) 1215 t[i] += modulo; 1216 1217 return std::move(t[i]); 1218 } 1219 1220 /// Calculate the magic numbers required to implement a signed integer division 1221 /// by a constant as a sequence of multiplies, adds and shifts. Requires that 1222 /// the divisor not be 0, 1, or -1. Taken from "Hacker's Delight", Henry S. 1223 /// Warren, Jr., chapter 10. 1224 APInt::ms APInt::magic() const { 1225 const APInt& d = *this; 1226 unsigned p; 1227 APInt ad, anc, delta, q1, r1, q2, r2, t; 1228 APInt signedMin = APInt::getSignedMinValue(d.getBitWidth()); 1229 struct ms mag; 1230 1231 ad = d.abs(); 1232 t = signedMin + (d.lshr(d.getBitWidth() - 1)); 1233 anc = t - 1 - t.urem(ad); // absolute value of nc 1234 p = d.getBitWidth() - 1; // initialize p 1235 q1 = signedMin.udiv(anc); // initialize q1 = 2p/abs(nc) 1236 r1 = signedMin - q1*anc; // initialize r1 = rem(2p,abs(nc)) 1237 q2 = signedMin.udiv(ad); // initialize q2 = 2p/abs(d) 1238 r2 = signedMin - q2*ad; // initialize r2 = rem(2p,abs(d)) 1239 do { 1240 p = p + 1; 1241 q1 = q1<<1; // update q1 = 2p/abs(nc) 1242 r1 = r1<<1; // update r1 = rem(2p/abs(nc)) 1243 if (r1.uge(anc)) { // must be unsigned comparison 1244 q1 = q1 + 1; 1245 r1 = r1 - anc; 1246 } 1247 q2 = q2<<1; // update q2 = 2p/abs(d) 1248 r2 = r2<<1; // update r2 = rem(2p/abs(d)) 1249 if (r2.uge(ad)) { // must be unsigned comparison 1250 q2 = q2 + 1; 1251 r2 = r2 - ad; 1252 } 1253 delta = ad - r2; 1254 } while (q1.ult(delta) || (q1 == delta && r1 == 0)); 1255 1256 mag.m = q2 + 1; 1257 if (d.isNegative()) mag.m = -mag.m; // resulting magic number 1258 mag.s = p - d.getBitWidth(); // resulting shift 1259 return mag; 1260 } 1261 1262 /// Calculate the magic numbers required to implement an unsigned integer 1263 /// division by a constant as a sequence of multiplies, adds and shifts. 1264 /// Requires that the divisor not be 0. Taken from "Hacker's Delight", Henry 1265 /// S. Warren, Jr., chapter 10. 1266 /// LeadingZeros can be used to simplify the calculation if the upper bits 1267 /// of the divided value are known zero. 1268 APInt::mu APInt::magicu(unsigned LeadingZeros) const { 1269 const APInt& d = *this; 1270 unsigned p; 1271 APInt nc, delta, q1, r1, q2, r2; 1272 struct mu magu; 1273 magu.a = 0; // initialize "add" indicator 1274 APInt allOnes = APInt::getAllOnesValue(d.getBitWidth()).lshr(LeadingZeros); 1275 APInt signedMin = APInt::getSignedMinValue(d.getBitWidth()); 1276 APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth()); 1277 1278 nc = allOnes - (allOnes - d).urem(d); 1279 p = d.getBitWidth() - 1; // initialize p 1280 q1 = signedMin.udiv(nc); // initialize q1 = 2p/nc 1281 r1 = signedMin - q1*nc; // initialize r1 = rem(2p,nc) 1282 q2 = signedMax.udiv(d); // initialize q2 = (2p-1)/d 1283 r2 = signedMax - q2*d; // initialize r2 = rem((2p-1),d) 1284 do { 1285 p = p + 1; 1286 if (r1.uge(nc - r1)) { 1287 q1 = q1 + q1 + 1; // update q1 1288 r1 = r1 + r1 - nc; // update r1 1289 } 1290 else { 1291 q1 = q1+q1; // update q1 1292 r1 = r1+r1; // update r1 1293 } 1294 if ((r2 + 1).uge(d - r2)) { 1295 if (q2.uge(signedMax)) magu.a = 1; 1296 q2 = q2+q2 + 1; // update q2 1297 r2 = r2+r2 + 1 - d; // update r2 1298 } 1299 else { 1300 if (q2.uge(signedMin)) magu.a = 1; 1301 q2 = q2+q2; // update q2 1302 r2 = r2+r2 + 1; // update r2 1303 } 1304 delta = d - 1 - r2; 1305 } while (p < d.getBitWidth()*2 && 1306 (q1.ult(delta) || (q1 == delta && r1 == 0))); 1307 magu.m = q2 + 1; // resulting magic number 1308 magu.s = p - d.getBitWidth(); // resulting shift 1309 return magu; 1310 } 1311 1312 /// Implementation of Knuth's Algorithm D (Division of nonnegative integers) 1313 /// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The 1314 /// variables here have the same names as in the algorithm. Comments explain 1315 /// the algorithm and any deviation from it. 1316 static void KnuthDiv(uint32_t *u, uint32_t *v, uint32_t *q, uint32_t* r, 1317 unsigned m, unsigned n) { 1318 assert(u && "Must provide dividend"); 1319 assert(v && "Must provide divisor"); 1320 assert(q && "Must provide quotient"); 1321 assert(u != v && u != q && v != q && "Must use different memory"); 1322 assert(n>1 && "n must be > 1"); 1323 1324 // b denotes the base of the number system. In our case b is 2^32. 1325 const uint64_t b = uint64_t(1) << 32; 1326 1327 // The DEBUG macros here tend to be spam in the debug output if you're not 1328 // debugging this code. Disable them unless KNUTH_DEBUG is defined. 1329 #ifdef KNUTH_DEBUG 1330 #define DEBUG_KNUTH(X) LLVM_DEBUG(X) 1331 #else 1332 #define DEBUG_KNUTH(X) do {} while(false) 1333 #endif 1334 1335 DEBUG_KNUTH(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n'); 1336 DEBUG_KNUTH(dbgs() << "KnuthDiv: original:"); 1337 DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]); 1338 DEBUG_KNUTH(dbgs() << " by"); 1339 DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]); 1340 DEBUG_KNUTH(dbgs() << '\n'); 1341 // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of 1342 // u and v by d. Note that we have taken Knuth's advice here to use a power 1343 // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of 1344 // 2 allows us to shift instead of multiply and it is easy to determine the 1345 // shift amount from the leading zeros. We are basically normalizing the u 1346 // and v so that its high bits are shifted to the top of v's range without 1347 // overflow. Note that this can require an extra word in u so that u must 1348 // be of length m+n+1. 1349 unsigned shift = countLeadingZeros(v[n-1]); 1350 uint32_t v_carry = 0; 1351 uint32_t u_carry = 0; 1352 if (shift) { 1353 for (unsigned i = 0; i < m+n; ++i) { 1354 uint32_t u_tmp = u[i] >> (32 - shift); 1355 u[i] = (u[i] << shift) | u_carry; 1356 u_carry = u_tmp; 1357 } 1358 for (unsigned i = 0; i < n; ++i) { 1359 uint32_t v_tmp = v[i] >> (32 - shift); 1360 v[i] = (v[i] << shift) | v_carry; 1361 v_carry = v_tmp; 1362 } 1363 } 1364 u[m+n] = u_carry; 1365 1366 DEBUG_KNUTH(dbgs() << "KnuthDiv: normal:"); 1367 DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]); 1368 DEBUG_KNUTH(dbgs() << " by"); 1369 DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]); 1370 DEBUG_KNUTH(dbgs() << '\n'); 1371 1372 // D2. [Initialize j.] Set j to m. This is the loop counter over the places. 1373 int j = m; 1374 do { 1375 DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient digit #" << j << '\n'); 1376 // D3. [Calculate q'.]. 1377 // Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q') 1378 // Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r') 1379 // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease 1380 // qp by 1, increase rp by v[n-1], and repeat this test if rp < b. The test 1381 // on v[n-2] determines at high speed most of the cases in which the trial 1382 // value qp is one too large, and it eliminates all cases where qp is two 1383 // too large. 1384 uint64_t dividend = Make_64(u[j+n], u[j+n-1]); 1385 DEBUG_KNUTH(dbgs() << "KnuthDiv: dividend == " << dividend << '\n'); 1386 uint64_t qp = dividend / v[n-1]; 1387 uint64_t rp = dividend % v[n-1]; 1388 if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) { 1389 qp--; 1390 rp += v[n-1]; 1391 if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2])) 1392 qp--; 1393 } 1394 DEBUG_KNUTH(dbgs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n'); 1395 1396 // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with 1397 // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation 1398 // consists of a simple multiplication by a one-place number, combined with 1399 // a subtraction. 1400 // The digits (u[j+n]...u[j]) should be kept positive; if the result of 1401 // this step is actually negative, (u[j+n]...u[j]) should be left as the 1402 // true value plus b**(n+1), namely as the b's complement of 1403 // the true value, and a "borrow" to the left should be remembered. 1404 int64_t borrow = 0; 1405 for (unsigned i = 0; i < n; ++i) { 1406 uint64_t p = uint64_t(qp) * uint64_t(v[i]); 1407 int64_t subres = int64_t(u[j+i]) - borrow - Lo_32(p); 1408 u[j+i] = Lo_32(subres); 1409 borrow = Hi_32(p) - Hi_32(subres); 1410 DEBUG_KNUTH(dbgs() << "KnuthDiv: u[j+i] = " << u[j + i] 1411 << ", borrow = " << borrow << '\n'); 1412 } 1413 bool isNeg = u[j+n] < borrow; 1414 u[j+n] -= Lo_32(borrow); 1415 1416 DEBUG_KNUTH(dbgs() << "KnuthDiv: after subtraction:"); 1417 DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]); 1418 DEBUG_KNUTH(dbgs() << '\n'); 1419 1420 // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was 1421 // negative, go to step D6; otherwise go on to step D7. 1422 q[j] = Lo_32(qp); 1423 if (isNeg) { 1424 // D6. [Add back]. The probability that this step is necessary is very 1425 // small, on the order of only 2/b. Make sure that test data accounts for 1426 // this possibility. Decrease q[j] by 1 1427 q[j]--; 1428 // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]). 1429 // A carry will occur to the left of u[j+n], and it should be ignored 1430 // since it cancels with the borrow that occurred in D4. 1431 bool carry = false; 1432 for (unsigned i = 0; i < n; i++) { 1433 uint32_t limit = std::min(u[j+i],v[i]); 1434 u[j+i] += v[i] + carry; 1435 carry = u[j+i] < limit || (carry && u[j+i] == limit); 1436 } 1437 u[j+n] += carry; 1438 } 1439 DEBUG_KNUTH(dbgs() << "KnuthDiv: after correction:"); 1440 DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]); 1441 DEBUG_KNUTH(dbgs() << "\nKnuthDiv: digit result = " << q[j] << '\n'); 1442 1443 // D7. [Loop on j.] Decrease j by one. Now if j >= 0, go back to D3. 1444 } while (--j >= 0); 1445 1446 DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient:"); 1447 DEBUG_KNUTH(for (int i = m; i >= 0; i--) dbgs() << " " << q[i]); 1448 DEBUG_KNUTH(dbgs() << '\n'); 1449 1450 // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired 1451 // remainder may be obtained by dividing u[...] by d. If r is non-null we 1452 // compute the remainder (urem uses this). 1453 if (r) { 1454 // The value d is expressed by the "shift" value above since we avoided 1455 // multiplication by d by using a shift left. So, all we have to do is 1456 // shift right here. 1457 if (shift) { 1458 uint32_t carry = 0; 1459 DEBUG_KNUTH(dbgs() << "KnuthDiv: remainder:"); 1460 for (int i = n-1; i >= 0; i--) { 1461 r[i] = (u[i] >> shift) | carry; 1462 carry = u[i] << (32 - shift); 1463 DEBUG_KNUTH(dbgs() << " " << r[i]); 1464 } 1465 } else { 1466 for (int i = n-1; i >= 0; i--) { 1467 r[i] = u[i]; 1468 DEBUG_KNUTH(dbgs() << " " << r[i]); 1469 } 1470 } 1471 DEBUG_KNUTH(dbgs() << '\n'); 1472 } 1473 DEBUG_KNUTH(dbgs() << '\n'); 1474 } 1475 1476 void APInt::divide(const WordType *LHS, unsigned lhsWords, const WordType *RHS, 1477 unsigned rhsWords, WordType *Quotient, WordType *Remainder) { 1478 assert(lhsWords >= rhsWords && "Fractional result"); 1479 1480 // First, compose the values into an array of 32-bit words instead of 1481 // 64-bit words. This is a necessity of both the "short division" algorithm 1482 // and the Knuth "classical algorithm" which requires there to be native 1483 // operations for +, -, and * on an m bit value with an m*2 bit result. We 1484 // can't use 64-bit operands here because we don't have native results of 1485 // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't 1486 // work on large-endian machines. 1487 unsigned n = rhsWords * 2; 1488 unsigned m = (lhsWords * 2) - n; 1489 1490 // Allocate space for the temporary values we need either on the stack, if 1491 // it will fit, or on the heap if it won't. 1492 uint32_t SPACE[128]; 1493 uint32_t *U = nullptr; 1494 uint32_t *V = nullptr; 1495 uint32_t *Q = nullptr; 1496 uint32_t *R = nullptr; 1497 if ((Remainder?4:3)*n+2*m+1 <= 128) { 1498 U = &SPACE[0]; 1499 V = &SPACE[m+n+1]; 1500 Q = &SPACE[(m+n+1) + n]; 1501 if (Remainder) 1502 R = &SPACE[(m+n+1) + n + (m+n)]; 1503 } else { 1504 U = new uint32_t[m + n + 1]; 1505 V = new uint32_t[n]; 1506 Q = new uint32_t[m+n]; 1507 if (Remainder) 1508 R = new uint32_t[n]; 1509 } 1510 1511 // Initialize the dividend 1512 memset(U, 0, (m+n+1)*sizeof(uint32_t)); 1513 for (unsigned i = 0; i < lhsWords; ++i) { 1514 uint64_t tmp = LHS[i]; 1515 U[i * 2] = Lo_32(tmp); 1516 U[i * 2 + 1] = Hi_32(tmp); 1517 } 1518 U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm. 1519 1520 // Initialize the divisor 1521 memset(V, 0, (n)*sizeof(uint32_t)); 1522 for (unsigned i = 0; i < rhsWords; ++i) { 1523 uint64_t tmp = RHS[i]; 1524 V[i * 2] = Lo_32(tmp); 1525 V[i * 2 + 1] = Hi_32(tmp); 1526 } 1527 1528 // initialize the quotient and remainder 1529 memset(Q, 0, (m+n) * sizeof(uint32_t)); 1530 if (Remainder) 1531 memset(R, 0, n * sizeof(uint32_t)); 1532 1533 // Now, adjust m and n for the Knuth division. n is the number of words in 1534 // the divisor. m is the number of words by which the dividend exceeds the 1535 // divisor (i.e. m+n is the length of the dividend). These sizes must not 1536 // contain any zero words or the Knuth algorithm fails. 1537 for (unsigned i = n; i > 0 && V[i-1] == 0; i--) { 1538 n--; 1539 m++; 1540 } 1541 for (unsigned i = m+n; i > 0 && U[i-1] == 0; i--) 1542 m--; 1543 1544 // If we're left with only a single word for the divisor, Knuth doesn't work 1545 // so we implement the short division algorithm here. This is much simpler 1546 // and faster because we are certain that we can divide a 64-bit quantity 1547 // by a 32-bit quantity at hardware speed and short division is simply a 1548 // series of such operations. This is just like doing short division but we 1549 // are using base 2^32 instead of base 10. 1550 assert(n != 0 && "Divide by zero?"); 1551 if (n == 1) { 1552 uint32_t divisor = V[0]; 1553 uint32_t remainder = 0; 1554 for (int i = m; i >= 0; i--) { 1555 uint64_t partial_dividend = Make_64(remainder, U[i]); 1556 if (partial_dividend == 0) { 1557 Q[i] = 0; 1558 remainder = 0; 1559 } else if (partial_dividend < divisor) { 1560 Q[i] = 0; 1561 remainder = Lo_32(partial_dividend); 1562 } else if (partial_dividend == divisor) { 1563 Q[i] = 1; 1564 remainder = 0; 1565 } else { 1566 Q[i] = Lo_32(partial_dividend / divisor); 1567 remainder = Lo_32(partial_dividend - (Q[i] * divisor)); 1568 } 1569 } 1570 if (R) 1571 R[0] = remainder; 1572 } else { 1573 // Now we're ready to invoke the Knuth classical divide algorithm. In this 1574 // case n > 1. 1575 KnuthDiv(U, V, Q, R, m, n); 1576 } 1577 1578 // If the caller wants the quotient 1579 if (Quotient) { 1580 for (unsigned i = 0; i < lhsWords; ++i) 1581 Quotient[i] = Make_64(Q[i*2+1], Q[i*2]); 1582 } 1583 1584 // If the caller wants the remainder 1585 if (Remainder) { 1586 for (unsigned i = 0; i < rhsWords; ++i) 1587 Remainder[i] = Make_64(R[i*2+1], R[i*2]); 1588 } 1589 1590 // Clean up the memory we allocated. 1591 if (U != &SPACE[0]) { 1592 delete [] U; 1593 delete [] V; 1594 delete [] Q; 1595 delete [] R; 1596 } 1597 } 1598 1599 APInt APInt::udiv(const APInt &RHS) const { 1600 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); 1601 1602 // First, deal with the easy case 1603 if (isSingleWord()) { 1604 assert(RHS.U.VAL != 0 && "Divide by zero?"); 1605 return APInt(BitWidth, U.VAL / RHS.U.VAL); 1606 } 1607 1608 // Get some facts about the LHS and RHS number of bits and words 1609 unsigned lhsWords = getNumWords(getActiveBits()); 1610 unsigned rhsBits = RHS.getActiveBits(); 1611 unsigned rhsWords = getNumWords(rhsBits); 1612 assert(rhsWords && "Divided by zero???"); 1613 1614 // Deal with some degenerate cases 1615 if (!lhsWords) 1616 // 0 / X ===> 0 1617 return APInt(BitWidth, 0); 1618 if (rhsBits == 1) 1619 // X / 1 ===> X 1620 return *this; 1621 if (lhsWords < rhsWords || this->ult(RHS)) 1622 // X / Y ===> 0, iff X < Y 1623 return APInt(BitWidth, 0); 1624 if (*this == RHS) 1625 // X / X ===> 1 1626 return APInt(BitWidth, 1); 1627 if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1. 1628 // All high words are zero, just use native divide 1629 return APInt(BitWidth, this->U.pVal[0] / RHS.U.pVal[0]); 1630 1631 // We have to compute it the hard way. Invoke the Knuth divide algorithm. 1632 APInt Quotient(BitWidth, 0); // to hold result. 1633 divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal, nullptr); 1634 return Quotient; 1635 } 1636 1637 APInt APInt::udiv(uint64_t RHS) const { 1638 assert(RHS != 0 && "Divide by zero?"); 1639 1640 // First, deal with the easy case 1641 if (isSingleWord()) 1642 return APInt(BitWidth, U.VAL / RHS); 1643 1644 // Get some facts about the LHS words. 1645 unsigned lhsWords = getNumWords(getActiveBits()); 1646 1647 // Deal with some degenerate cases 1648 if (!lhsWords) 1649 // 0 / X ===> 0 1650 return APInt(BitWidth, 0); 1651 if (RHS == 1) 1652 // X / 1 ===> X 1653 return *this; 1654 if (this->ult(RHS)) 1655 // X / Y ===> 0, iff X < Y 1656 return APInt(BitWidth, 0); 1657 if (*this == RHS) 1658 // X / X ===> 1 1659 return APInt(BitWidth, 1); 1660 if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1. 1661 // All high words are zero, just use native divide 1662 return APInt(BitWidth, this->U.pVal[0] / RHS); 1663 1664 // We have to compute it the hard way. Invoke the Knuth divide algorithm. 1665 APInt Quotient(BitWidth, 0); // to hold result. 1666 divide(U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, nullptr); 1667 return Quotient; 1668 } 1669 1670 APInt APInt::sdiv(const APInt &RHS) const { 1671 if (isNegative()) { 1672 if (RHS.isNegative()) 1673 return (-(*this)).udiv(-RHS); 1674 return -((-(*this)).udiv(RHS)); 1675 } 1676 if (RHS.isNegative()) 1677 return -(this->udiv(-RHS)); 1678 return this->udiv(RHS); 1679 } 1680 1681 APInt APInt::sdiv(int64_t RHS) const { 1682 if (isNegative()) { 1683 if (RHS < 0) 1684 return (-(*this)).udiv(-RHS); 1685 return -((-(*this)).udiv(RHS)); 1686 } 1687 if (RHS < 0) 1688 return -(this->udiv(-RHS)); 1689 return this->udiv(RHS); 1690 } 1691 1692 APInt APInt::urem(const APInt &RHS) const { 1693 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); 1694 if (isSingleWord()) { 1695 assert(RHS.U.VAL != 0 && "Remainder by zero?"); 1696 return APInt(BitWidth, U.VAL % RHS.U.VAL); 1697 } 1698 1699 // Get some facts about the LHS 1700 unsigned lhsWords = getNumWords(getActiveBits()); 1701 1702 // Get some facts about the RHS 1703 unsigned rhsBits = RHS.getActiveBits(); 1704 unsigned rhsWords = getNumWords(rhsBits); 1705 assert(rhsWords && "Performing remainder operation by zero ???"); 1706 1707 // Check the degenerate cases 1708 if (lhsWords == 0) 1709 // 0 % Y ===> 0 1710 return APInt(BitWidth, 0); 1711 if (rhsBits == 1) 1712 // X % 1 ===> 0 1713 return APInt(BitWidth, 0); 1714 if (lhsWords < rhsWords || this->ult(RHS)) 1715 // X % Y ===> X, iff X < Y 1716 return *this; 1717 if (*this == RHS) 1718 // X % X == 0; 1719 return APInt(BitWidth, 0); 1720 if (lhsWords == 1) 1721 // All high words are zero, just use native remainder 1722 return APInt(BitWidth, U.pVal[0] % RHS.U.pVal[0]); 1723 1724 // We have to compute it the hard way. Invoke the Knuth divide algorithm. 1725 APInt Remainder(BitWidth, 0); 1726 divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, nullptr, Remainder.U.pVal); 1727 return Remainder; 1728 } 1729 1730 uint64_t APInt::urem(uint64_t RHS) const { 1731 assert(RHS != 0 && "Remainder by zero?"); 1732 1733 if (isSingleWord()) 1734 return U.VAL % RHS; 1735 1736 // Get some facts about the LHS 1737 unsigned lhsWords = getNumWords(getActiveBits()); 1738 1739 // Check the degenerate cases 1740 if (lhsWords == 0) 1741 // 0 % Y ===> 0 1742 return 0; 1743 if (RHS == 1) 1744 // X % 1 ===> 0 1745 return 0; 1746 if (this->ult(RHS)) 1747 // X % Y ===> X, iff X < Y 1748 return getZExtValue(); 1749 if (*this == RHS) 1750 // X % X == 0; 1751 return 0; 1752 if (lhsWords == 1) 1753 // All high words are zero, just use native remainder 1754 return U.pVal[0] % RHS; 1755 1756 // We have to compute it the hard way. Invoke the Knuth divide algorithm. 1757 uint64_t Remainder; 1758 divide(U.pVal, lhsWords, &RHS, 1, nullptr, &Remainder); 1759 return Remainder; 1760 } 1761 1762 APInt APInt::srem(const APInt &RHS) const { 1763 if (isNegative()) { 1764 if (RHS.isNegative()) 1765 return -((-(*this)).urem(-RHS)); 1766 return -((-(*this)).urem(RHS)); 1767 } 1768 if (RHS.isNegative()) 1769 return this->urem(-RHS); 1770 return this->urem(RHS); 1771 } 1772 1773 int64_t APInt::srem(int64_t RHS) const { 1774 if (isNegative()) { 1775 if (RHS < 0) 1776 return -((-(*this)).urem(-RHS)); 1777 return -((-(*this)).urem(RHS)); 1778 } 1779 if (RHS < 0) 1780 return this->urem(-RHS); 1781 return this->urem(RHS); 1782 } 1783 1784 void APInt::udivrem(const APInt &LHS, const APInt &RHS, 1785 APInt &Quotient, APInt &Remainder) { 1786 assert(LHS.BitWidth == RHS.BitWidth && "Bit widths must be the same"); 1787 unsigned BitWidth = LHS.BitWidth; 1788 1789 // First, deal with the easy case 1790 if (LHS.isSingleWord()) { 1791 assert(RHS.U.VAL != 0 && "Divide by zero?"); 1792 uint64_t QuotVal = LHS.U.VAL / RHS.U.VAL; 1793 uint64_t RemVal = LHS.U.VAL % RHS.U.VAL; 1794 Quotient = APInt(BitWidth, QuotVal); 1795 Remainder = APInt(BitWidth, RemVal); 1796 return; 1797 } 1798 1799 // Get some size facts about the dividend and divisor 1800 unsigned lhsWords = getNumWords(LHS.getActiveBits()); 1801 unsigned rhsBits = RHS.getActiveBits(); 1802 unsigned rhsWords = getNumWords(rhsBits); 1803 assert(rhsWords && "Performing divrem operation by zero ???"); 1804 1805 // Check the degenerate cases 1806 if (lhsWords == 0) { 1807 Quotient = APInt(BitWidth, 0); // 0 / Y ===> 0 1808 Remainder = APInt(BitWidth, 0); // 0 % Y ===> 0 1809 return; 1810 } 1811 1812 if (rhsBits == 1) { 1813 Quotient = LHS; // X / 1 ===> X 1814 Remainder = APInt(BitWidth, 0); // X % 1 ===> 0 1815 } 1816 1817 if (lhsWords < rhsWords || LHS.ult(RHS)) { 1818 Remainder = LHS; // X % Y ===> X, iff X < Y 1819 Quotient = APInt(BitWidth, 0); // X / Y ===> 0, iff X < Y 1820 return; 1821 } 1822 1823 if (LHS == RHS) { 1824 Quotient = APInt(BitWidth, 1); // X / X ===> 1 1825 Remainder = APInt(BitWidth, 0); // X % X ===> 0; 1826 return; 1827 } 1828 1829 // Make sure there is enough space to hold the results. 1830 // NOTE: This assumes that reallocate won't affect any bits if it doesn't 1831 // change the size. This is necessary if Quotient or Remainder is aliased 1832 // with LHS or RHS. 1833 Quotient.reallocate(BitWidth); 1834 Remainder.reallocate(BitWidth); 1835 1836 if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1. 1837 // There is only one word to consider so use the native versions. 1838 uint64_t lhsValue = LHS.U.pVal[0]; 1839 uint64_t rhsValue = RHS.U.pVal[0]; 1840 Quotient = lhsValue / rhsValue; 1841 Remainder = lhsValue % rhsValue; 1842 return; 1843 } 1844 1845 // Okay, lets do it the long way 1846 divide(LHS.U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal, 1847 Remainder.U.pVal); 1848 // Clear the rest of the Quotient and Remainder. 1849 std::memset(Quotient.U.pVal + lhsWords, 0, 1850 (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE); 1851 std::memset(Remainder.U.pVal + rhsWords, 0, 1852 (getNumWords(BitWidth) - rhsWords) * APINT_WORD_SIZE); 1853 } 1854 1855 void APInt::udivrem(const APInt &LHS, uint64_t RHS, APInt &Quotient, 1856 uint64_t &Remainder) { 1857 assert(RHS != 0 && "Divide by zero?"); 1858 unsigned BitWidth = LHS.BitWidth; 1859 1860 // First, deal with the easy case 1861 if (LHS.isSingleWord()) { 1862 uint64_t QuotVal = LHS.U.VAL / RHS; 1863 Remainder = LHS.U.VAL % RHS; 1864 Quotient = APInt(BitWidth, QuotVal); 1865 return; 1866 } 1867 1868 // Get some size facts about the dividend and divisor 1869 unsigned lhsWords = getNumWords(LHS.getActiveBits()); 1870 1871 // Check the degenerate cases 1872 if (lhsWords == 0) { 1873 Quotient = APInt(BitWidth, 0); // 0 / Y ===> 0 1874 Remainder = 0; // 0 % Y ===> 0 1875 return; 1876 } 1877 1878 if (RHS == 1) { 1879 Quotient = LHS; // X / 1 ===> X 1880 Remainder = 0; // X % 1 ===> 0 1881 return; 1882 } 1883 1884 if (LHS.ult(RHS)) { 1885 Remainder = LHS.getZExtValue(); // X % Y ===> X, iff X < Y 1886 Quotient = APInt(BitWidth, 0); // X / Y ===> 0, iff X < Y 1887 return; 1888 } 1889 1890 if (LHS == RHS) { 1891 Quotient = APInt(BitWidth, 1); // X / X ===> 1 1892 Remainder = 0; // X % X ===> 0; 1893 return; 1894 } 1895 1896 // Make sure there is enough space to hold the results. 1897 // NOTE: This assumes that reallocate won't affect any bits if it doesn't 1898 // change the size. This is necessary if Quotient is aliased with LHS. 1899 Quotient.reallocate(BitWidth); 1900 1901 if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1. 1902 // There is only one word to consider so use the native versions. 1903 uint64_t lhsValue = LHS.U.pVal[0]; 1904 Quotient = lhsValue / RHS; 1905 Remainder = lhsValue % RHS; 1906 return; 1907 } 1908 1909 // Okay, lets do it the long way 1910 divide(LHS.U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, &Remainder); 1911 // Clear the rest of the Quotient. 1912 std::memset(Quotient.U.pVal + lhsWords, 0, 1913 (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE); 1914 } 1915 1916 void APInt::sdivrem(const APInt &LHS, const APInt &RHS, 1917 APInt &Quotient, APInt &Remainder) { 1918 if (LHS.isNegative()) { 1919 if (RHS.isNegative()) 1920 APInt::udivrem(-LHS, -RHS, Quotient, Remainder); 1921 else { 1922 APInt::udivrem(-LHS, RHS, Quotient, Remainder); 1923 Quotient.negate(); 1924 } 1925 Remainder.negate(); 1926 } else if (RHS.isNegative()) { 1927 APInt::udivrem(LHS, -RHS, Quotient, Remainder); 1928 Quotient.negate(); 1929 } else { 1930 APInt::udivrem(LHS, RHS, Quotient, Remainder); 1931 } 1932 } 1933 1934 void APInt::sdivrem(const APInt &LHS, int64_t RHS, 1935 APInt &Quotient, int64_t &Remainder) { 1936 uint64_t R = Remainder; 1937 if (LHS.isNegative()) { 1938 if (RHS < 0) 1939 APInt::udivrem(-LHS, -RHS, Quotient, R); 1940 else { 1941 APInt::udivrem(-LHS, RHS, Quotient, R); 1942 Quotient.negate(); 1943 } 1944 R = -R; 1945 } else if (RHS < 0) { 1946 APInt::udivrem(LHS, -RHS, Quotient, R); 1947 Quotient.negate(); 1948 } else { 1949 APInt::udivrem(LHS, RHS, Quotient, R); 1950 } 1951 Remainder = R; 1952 } 1953 1954 APInt APInt::sadd_ov(const APInt &RHS, bool &Overflow) const { 1955 APInt Res = *this+RHS; 1956 Overflow = isNonNegative() == RHS.isNonNegative() && 1957 Res.isNonNegative() != isNonNegative(); 1958 return Res; 1959 } 1960 1961 APInt APInt::uadd_ov(const APInt &RHS, bool &Overflow) const { 1962 APInt Res = *this+RHS; 1963 Overflow = Res.ult(RHS); 1964 return Res; 1965 } 1966 1967 APInt APInt::ssub_ov(const APInt &RHS, bool &Overflow) const { 1968 APInt Res = *this - RHS; 1969 Overflow = isNonNegative() != RHS.isNonNegative() && 1970 Res.isNonNegative() != isNonNegative(); 1971 return Res; 1972 } 1973 1974 APInt APInt::usub_ov(const APInt &RHS, bool &Overflow) const { 1975 APInt Res = *this-RHS; 1976 Overflow = Res.ugt(*this); 1977 return Res; 1978 } 1979 1980 APInt APInt::sdiv_ov(const APInt &RHS, bool &Overflow) const { 1981 // MININT/-1 --> overflow. 1982 Overflow = isMinSignedValue() && RHS.isAllOnesValue(); 1983 return sdiv(RHS); 1984 } 1985 1986 APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const { 1987 APInt Res = *this * RHS; 1988 1989 if (*this != 0 && RHS != 0) 1990 Overflow = Res.sdiv(RHS) != *this || Res.sdiv(*this) != RHS; 1991 else 1992 Overflow = false; 1993 return Res; 1994 } 1995 1996 APInt APInt::umul_ov(const APInt &RHS, bool &Overflow) const { 1997 if (countLeadingZeros() + RHS.countLeadingZeros() + 2 <= BitWidth) { 1998 Overflow = true; 1999 return *this * RHS; 2000 } 2001 2002 APInt Res = lshr(1) * RHS; 2003 Overflow = Res.isNegative(); 2004 Res <<= 1; 2005 if ((*this)[0]) { 2006 Res += RHS; 2007 if (Res.ult(RHS)) 2008 Overflow = true; 2009 } 2010 return Res; 2011 } 2012 2013 APInt APInt::sshl_ov(const APInt &ShAmt, bool &Overflow) const { 2014 Overflow = ShAmt.uge(getBitWidth()); 2015 if (Overflow) 2016 return APInt(BitWidth, 0); 2017 2018 if (isNonNegative()) // Don't allow sign change. 2019 Overflow = ShAmt.uge(countLeadingZeros()); 2020 else 2021 Overflow = ShAmt.uge(countLeadingOnes()); 2022 2023 return *this << ShAmt; 2024 } 2025 2026 APInt APInt::ushl_ov(const APInt &ShAmt, bool &Overflow) const { 2027 Overflow = ShAmt.uge(getBitWidth()); 2028 if (Overflow) 2029 return APInt(BitWidth, 0); 2030 2031 Overflow = ShAmt.ugt(countLeadingZeros()); 2032 2033 return *this << ShAmt; 2034 } 2035 2036 APInt APInt::sadd_sat(const APInt &RHS) const { 2037 bool Overflow; 2038 APInt Res = sadd_ov(RHS, Overflow); 2039 if (!Overflow) 2040 return Res; 2041 2042 return isNegative() ? APInt::getSignedMinValue(BitWidth) 2043 : APInt::getSignedMaxValue(BitWidth); 2044 } 2045 2046 APInt APInt::uadd_sat(const APInt &RHS) const { 2047 bool Overflow; 2048 APInt Res = uadd_ov(RHS, Overflow); 2049 if (!Overflow) 2050 return Res; 2051 2052 return APInt::getMaxValue(BitWidth); 2053 } 2054 2055 APInt APInt::ssub_sat(const APInt &RHS) const { 2056 bool Overflow; 2057 APInt Res = ssub_ov(RHS, Overflow); 2058 if (!Overflow) 2059 return Res; 2060 2061 return isNegative() ? APInt::getSignedMinValue(BitWidth) 2062 : APInt::getSignedMaxValue(BitWidth); 2063 } 2064 2065 APInt APInt::usub_sat(const APInt &RHS) const { 2066 bool Overflow; 2067 APInt Res = usub_ov(RHS, Overflow); 2068 if (!Overflow) 2069 return Res; 2070 2071 return APInt(BitWidth, 0); 2072 } 2073 2074 APInt APInt::smul_sat(const APInt &RHS) const { 2075 bool Overflow; 2076 APInt Res = smul_ov(RHS, Overflow); 2077 if (!Overflow) 2078 return Res; 2079 2080 // The result is negative if one and only one of inputs is negative. 2081 bool ResIsNegative = isNegative() ^ RHS.isNegative(); 2082 2083 return ResIsNegative ? APInt::getSignedMinValue(BitWidth) 2084 : APInt::getSignedMaxValue(BitWidth); 2085 } 2086 2087 APInt APInt::umul_sat(const APInt &RHS) const { 2088 bool Overflow; 2089 APInt Res = umul_ov(RHS, Overflow); 2090 if (!Overflow) 2091 return Res; 2092 2093 return APInt::getMaxValue(BitWidth); 2094 } 2095 2096 APInt APInt::sshl_sat(const APInt &RHS) const { 2097 bool Overflow; 2098 APInt Res = sshl_ov(RHS, Overflow); 2099 if (!Overflow) 2100 return Res; 2101 2102 return isNegative() ? APInt::getSignedMinValue(BitWidth) 2103 : APInt::getSignedMaxValue(BitWidth); 2104 } 2105 2106 APInt APInt::ushl_sat(const APInt &RHS) const { 2107 bool Overflow; 2108 APInt Res = ushl_ov(RHS, Overflow); 2109 if (!Overflow) 2110 return Res; 2111 2112 return APInt::getMaxValue(BitWidth); 2113 } 2114 2115 void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) { 2116 // Check our assumptions here 2117 assert(!str.empty() && "Invalid string length"); 2118 assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 || 2119 radix == 36) && 2120 "Radix should be 2, 8, 10, 16, or 36!"); 2121 2122 StringRef::iterator p = str.begin(); 2123 size_t slen = str.size(); 2124 bool isNeg = *p == '-'; 2125 if (*p == '-' || *p == '+') { 2126 p++; 2127 slen--; 2128 assert(slen && "String is only a sign, needs a value."); 2129 } 2130 assert((slen <= numbits || radix != 2) && "Insufficient bit width"); 2131 assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width"); 2132 assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width"); 2133 assert((((slen-1)*64)/22 <= numbits || radix != 10) && 2134 "Insufficient bit width"); 2135 2136 // Allocate memory if needed 2137 if (isSingleWord()) 2138 U.VAL = 0; 2139 else 2140 U.pVal = getClearedMemory(getNumWords()); 2141 2142 // Figure out if we can shift instead of multiply 2143 unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0); 2144 2145 // Enter digit traversal loop 2146 for (StringRef::iterator e = str.end(); p != e; ++p) { 2147 unsigned digit = getDigit(*p, radix); 2148 assert(digit < radix && "Invalid character in digit string"); 2149 2150 // Shift or multiply the value by the radix 2151 if (slen > 1) { 2152 if (shift) 2153 *this <<= shift; 2154 else 2155 *this *= radix; 2156 } 2157 2158 // Add in the digit we just interpreted 2159 *this += digit; 2160 } 2161 // If its negative, put it in two's complement form 2162 if (isNeg) 2163 this->negate(); 2164 } 2165 2166 void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix, 2167 bool Signed, bool formatAsCLiteral) const { 2168 assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2 || 2169 Radix == 36) && 2170 "Radix should be 2, 8, 10, 16, or 36!"); 2171 2172 const char *Prefix = ""; 2173 if (formatAsCLiteral) { 2174 switch (Radix) { 2175 case 2: 2176 // Binary literals are a non-standard extension added in gcc 4.3: 2177 // http://gcc.gnu.org/onlinedocs/gcc-4.3.0/gcc/Binary-constants.html 2178 Prefix = "0b"; 2179 break; 2180 case 8: 2181 Prefix = "0"; 2182 break; 2183 case 10: 2184 break; // No prefix 2185 case 16: 2186 Prefix = "0x"; 2187 break; 2188 default: 2189 llvm_unreachable("Invalid radix!"); 2190 } 2191 } 2192 2193 // First, check for a zero value and just short circuit the logic below. 2194 if (*this == 0) { 2195 while (*Prefix) { 2196 Str.push_back(*Prefix); 2197 ++Prefix; 2198 }; 2199 Str.push_back('0'); 2200 return; 2201 } 2202 2203 static const char Digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; 2204 2205 if (isSingleWord()) { 2206 char Buffer[65]; 2207 char *BufPtr = std::end(Buffer); 2208 2209 uint64_t N; 2210 if (!Signed) { 2211 N = getZExtValue(); 2212 } else { 2213 int64_t I = getSExtValue(); 2214 if (I >= 0) { 2215 N = I; 2216 } else { 2217 Str.push_back('-'); 2218 N = -(uint64_t)I; 2219 } 2220 } 2221 2222 while (*Prefix) { 2223 Str.push_back(*Prefix); 2224 ++Prefix; 2225 }; 2226 2227 while (N) { 2228 *--BufPtr = Digits[N % Radix]; 2229 N /= Radix; 2230 } 2231 Str.append(BufPtr, std::end(Buffer)); 2232 return; 2233 } 2234 2235 APInt Tmp(*this); 2236 2237 if (Signed && isNegative()) { 2238 // They want to print the signed version and it is a negative value 2239 // Flip the bits and add one to turn it into the equivalent positive 2240 // value and put a '-' in the result. 2241 Tmp.negate(); 2242 Str.push_back('-'); 2243 } 2244 2245 while (*Prefix) { 2246 Str.push_back(*Prefix); 2247 ++Prefix; 2248 }; 2249 2250 // We insert the digits backward, then reverse them to get the right order. 2251 unsigned StartDig = Str.size(); 2252 2253 // For the 2, 8 and 16 bit cases, we can just shift instead of divide 2254 // because the number of bits per digit (1, 3 and 4 respectively) divides 2255 // equally. We just shift until the value is zero. 2256 if (Radix == 2 || Radix == 8 || Radix == 16) { 2257 // Just shift tmp right for each digit width until it becomes zero 2258 unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1)); 2259 unsigned MaskAmt = Radix - 1; 2260 2261 while (Tmp.getBoolValue()) { 2262 unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt; 2263 Str.push_back(Digits[Digit]); 2264 Tmp.lshrInPlace(ShiftAmt); 2265 } 2266 } else { 2267 while (Tmp.getBoolValue()) { 2268 uint64_t Digit; 2269 udivrem(Tmp, Radix, Tmp, Digit); 2270 assert(Digit < Radix && "divide failed"); 2271 Str.push_back(Digits[Digit]); 2272 } 2273 } 2274 2275 // Reverse the digits before returning. 2276 std::reverse(Str.begin()+StartDig, Str.end()); 2277 } 2278 2279 /// Returns the APInt as a std::string. Note that this is an inefficient method. 2280 /// It is better to pass in a SmallVector/SmallString to the methods above. 2281 std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const { 2282 SmallString<40> S; 2283 toString(S, Radix, Signed, /* formatAsCLiteral = */false); 2284 return std::string(S.str()); 2285 } 2286 2287 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 2288 LLVM_DUMP_METHOD void APInt::dump() const { 2289 SmallString<40> S, U; 2290 this->toStringUnsigned(U); 2291 this->toStringSigned(S); 2292 dbgs() << "APInt(" << BitWidth << "b, " 2293 << U << "u " << S << "s)\n"; 2294 } 2295 #endif 2296 2297 void APInt::print(raw_ostream &OS, bool isSigned) const { 2298 SmallString<40> S; 2299 this->toString(S, 10, isSigned, /* formatAsCLiteral = */false); 2300 OS << S; 2301 } 2302 2303 // This implements a variety of operations on a representation of 2304 // arbitrary precision, two's-complement, bignum integer values. 2305 2306 // Assumed by lowHalf, highHalf, partMSB and partLSB. A fairly safe 2307 // and unrestricting assumption. 2308 static_assert(APInt::APINT_BITS_PER_WORD % 2 == 0, 2309 "Part width must be divisible by 2!"); 2310 2311 /* Some handy functions local to this file. */ 2312 2313 /* Returns the integer part with the least significant BITS set. 2314 BITS cannot be zero. */ 2315 static inline APInt::WordType lowBitMask(unsigned bits) { 2316 assert(bits != 0 && bits <= APInt::APINT_BITS_PER_WORD); 2317 2318 return ~(APInt::WordType) 0 >> (APInt::APINT_BITS_PER_WORD - bits); 2319 } 2320 2321 /* Returns the value of the lower half of PART. */ 2322 static inline APInt::WordType lowHalf(APInt::WordType part) { 2323 return part & lowBitMask(APInt::APINT_BITS_PER_WORD / 2); 2324 } 2325 2326 /* Returns the value of the upper half of PART. */ 2327 static inline APInt::WordType highHalf(APInt::WordType part) { 2328 return part >> (APInt::APINT_BITS_PER_WORD / 2); 2329 } 2330 2331 /* Returns the bit number of the most significant set bit of a part. 2332 If the input number has no bits set -1U is returned. */ 2333 static unsigned partMSB(APInt::WordType value) { 2334 return findLastSet(value, ZB_Max); 2335 } 2336 2337 /* Returns the bit number of the least significant set bit of a 2338 part. If the input number has no bits set -1U is returned. */ 2339 static unsigned partLSB(APInt::WordType value) { 2340 return findFirstSet(value, ZB_Max); 2341 } 2342 2343 /* Sets the least significant part of a bignum to the input value, and 2344 zeroes out higher parts. */ 2345 void APInt::tcSet(WordType *dst, WordType part, unsigned parts) { 2346 assert(parts > 0); 2347 2348 dst[0] = part; 2349 for (unsigned i = 1; i < parts; i++) 2350 dst[i] = 0; 2351 } 2352 2353 /* Assign one bignum to another. */ 2354 void APInt::tcAssign(WordType *dst, const WordType *src, unsigned parts) { 2355 for (unsigned i = 0; i < parts; i++) 2356 dst[i] = src[i]; 2357 } 2358 2359 /* Returns true if a bignum is zero, false otherwise. */ 2360 bool APInt::tcIsZero(const WordType *src, unsigned parts) { 2361 for (unsigned i = 0; i < parts; i++) 2362 if (src[i]) 2363 return false; 2364 2365 return true; 2366 } 2367 2368 /* Extract the given bit of a bignum; returns 0 or 1. */ 2369 int APInt::tcExtractBit(const WordType *parts, unsigned bit) { 2370 return (parts[whichWord(bit)] & maskBit(bit)) != 0; 2371 } 2372 2373 /* Set the given bit of a bignum. */ 2374 void APInt::tcSetBit(WordType *parts, unsigned bit) { 2375 parts[whichWord(bit)] |= maskBit(bit); 2376 } 2377 2378 /* Clears the given bit of a bignum. */ 2379 void APInt::tcClearBit(WordType *parts, unsigned bit) { 2380 parts[whichWord(bit)] &= ~maskBit(bit); 2381 } 2382 2383 /* Returns the bit number of the least significant set bit of a 2384 number. If the input number has no bits set -1U is returned. */ 2385 unsigned APInt::tcLSB(const WordType *parts, unsigned n) { 2386 for (unsigned i = 0; i < n; i++) { 2387 if (parts[i] != 0) { 2388 unsigned lsb = partLSB(parts[i]); 2389 2390 return lsb + i * APINT_BITS_PER_WORD; 2391 } 2392 } 2393 2394 return -1U; 2395 } 2396 2397 /* Returns the bit number of the most significant set bit of a number. 2398 If the input number has no bits set -1U is returned. */ 2399 unsigned APInt::tcMSB(const WordType *parts, unsigned n) { 2400 do { 2401 --n; 2402 2403 if (parts[n] != 0) { 2404 unsigned msb = partMSB(parts[n]); 2405 2406 return msb + n * APINT_BITS_PER_WORD; 2407 } 2408 } while (n); 2409 2410 return -1U; 2411 } 2412 2413 /* Copy the bit vector of width srcBITS from SRC, starting at bit 2414 srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB becomes 2415 the least significant bit of DST. All high bits above srcBITS in 2416 DST are zero-filled. */ 2417 void 2418 APInt::tcExtract(WordType *dst, unsigned dstCount, const WordType *src, 2419 unsigned srcBits, unsigned srcLSB) { 2420 unsigned dstParts = (srcBits + APINT_BITS_PER_WORD - 1) / APINT_BITS_PER_WORD; 2421 assert(dstParts <= dstCount); 2422 2423 unsigned firstSrcPart = srcLSB / APINT_BITS_PER_WORD; 2424 tcAssign (dst, src + firstSrcPart, dstParts); 2425 2426 unsigned shift = srcLSB % APINT_BITS_PER_WORD; 2427 tcShiftRight (dst, dstParts, shift); 2428 2429 /* We now have (dstParts * APINT_BITS_PER_WORD - shift) bits from SRC 2430 in DST. If this is less that srcBits, append the rest, else 2431 clear the high bits. */ 2432 unsigned n = dstParts * APINT_BITS_PER_WORD - shift; 2433 if (n < srcBits) { 2434 WordType mask = lowBitMask (srcBits - n); 2435 dst[dstParts - 1] |= ((src[firstSrcPart + dstParts] & mask) 2436 << n % APINT_BITS_PER_WORD); 2437 } else if (n > srcBits) { 2438 if (srcBits % APINT_BITS_PER_WORD) 2439 dst[dstParts - 1] &= lowBitMask (srcBits % APINT_BITS_PER_WORD); 2440 } 2441 2442 /* Clear high parts. */ 2443 while (dstParts < dstCount) 2444 dst[dstParts++] = 0; 2445 } 2446 2447 /* DST += RHS + C where C is zero or one. Returns the carry flag. */ 2448 APInt::WordType APInt::tcAdd(WordType *dst, const WordType *rhs, 2449 WordType c, unsigned parts) { 2450 assert(c <= 1); 2451 2452 for (unsigned i = 0; i < parts; i++) { 2453 WordType l = dst[i]; 2454 if (c) { 2455 dst[i] += rhs[i] + 1; 2456 c = (dst[i] <= l); 2457 } else { 2458 dst[i] += rhs[i]; 2459 c = (dst[i] < l); 2460 } 2461 } 2462 2463 return c; 2464 } 2465 2466 /// This function adds a single "word" integer, src, to the multiple 2467 /// "word" integer array, dst[]. dst[] is modified to reflect the addition and 2468 /// 1 is returned if there is a carry out, otherwise 0 is returned. 2469 /// @returns the carry of the addition. 2470 APInt::WordType APInt::tcAddPart(WordType *dst, WordType src, 2471 unsigned parts) { 2472 for (unsigned i = 0; i < parts; ++i) { 2473 dst[i] += src; 2474 if (dst[i] >= src) 2475 return 0; // No need to carry so exit early. 2476 src = 1; // Carry one to next digit. 2477 } 2478 2479 return 1; 2480 } 2481 2482 /* DST -= RHS + C where C is zero or one. Returns the carry flag. */ 2483 APInt::WordType APInt::tcSubtract(WordType *dst, const WordType *rhs, 2484 WordType c, unsigned parts) { 2485 assert(c <= 1); 2486 2487 for (unsigned i = 0; i < parts; i++) { 2488 WordType l = dst[i]; 2489 if (c) { 2490 dst[i] -= rhs[i] + 1; 2491 c = (dst[i] >= l); 2492 } else { 2493 dst[i] -= rhs[i]; 2494 c = (dst[i] > l); 2495 } 2496 } 2497 2498 return c; 2499 } 2500 2501 /// This function subtracts a single "word" (64-bit word), src, from 2502 /// the multi-word integer array, dst[], propagating the borrowed 1 value until 2503 /// no further borrowing is needed or it runs out of "words" in dst. The result 2504 /// is 1 if "borrowing" exhausted the digits in dst, or 0 if dst was not 2505 /// exhausted. In other words, if src > dst then this function returns 1, 2506 /// otherwise 0. 2507 /// @returns the borrow out of the subtraction 2508 APInt::WordType APInt::tcSubtractPart(WordType *dst, WordType src, 2509 unsigned parts) { 2510 for (unsigned i = 0; i < parts; ++i) { 2511 WordType Dst = dst[i]; 2512 dst[i] -= src; 2513 if (src <= Dst) 2514 return 0; // No need to borrow so exit early. 2515 src = 1; // We have to "borrow 1" from next "word" 2516 } 2517 2518 return 1; 2519 } 2520 2521 /* Negate a bignum in-place. */ 2522 void APInt::tcNegate(WordType *dst, unsigned parts) { 2523 tcComplement(dst, parts); 2524 tcIncrement(dst, parts); 2525 } 2526 2527 /* DST += SRC * MULTIPLIER + CARRY if add is true 2528 DST = SRC * MULTIPLIER + CARRY if add is false 2529 2530 Requires 0 <= DSTPARTS <= SRCPARTS + 1. If DST overlaps SRC 2531 they must start at the same point, i.e. DST == SRC. 2532 2533 If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is 2534 returned. Otherwise DST is filled with the least significant 2535 DSTPARTS parts of the result, and if all of the omitted higher 2536 parts were zero return zero, otherwise overflow occurred and 2537 return one. */ 2538 int APInt::tcMultiplyPart(WordType *dst, const WordType *src, 2539 WordType multiplier, WordType carry, 2540 unsigned srcParts, unsigned dstParts, 2541 bool add) { 2542 /* Otherwise our writes of DST kill our later reads of SRC. */ 2543 assert(dst <= src || dst >= src + srcParts); 2544 assert(dstParts <= srcParts + 1); 2545 2546 /* N loops; minimum of dstParts and srcParts. */ 2547 unsigned n = std::min(dstParts, srcParts); 2548 2549 for (unsigned i = 0; i < n; i++) { 2550 WordType low, mid, high, srcPart; 2551 2552 /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY. 2553 2554 This cannot overflow, because 2555 2556 (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1) 2557 2558 which is less than n^2. */ 2559 2560 srcPart = src[i]; 2561 2562 if (multiplier == 0 || srcPart == 0) { 2563 low = carry; 2564 high = 0; 2565 } else { 2566 low = lowHalf(srcPart) * lowHalf(multiplier); 2567 high = highHalf(srcPart) * highHalf(multiplier); 2568 2569 mid = lowHalf(srcPart) * highHalf(multiplier); 2570 high += highHalf(mid); 2571 mid <<= APINT_BITS_PER_WORD / 2; 2572 if (low + mid < low) 2573 high++; 2574 low += mid; 2575 2576 mid = highHalf(srcPart) * lowHalf(multiplier); 2577 high += highHalf(mid); 2578 mid <<= APINT_BITS_PER_WORD / 2; 2579 if (low + mid < low) 2580 high++; 2581 low += mid; 2582 2583 /* Now add carry. */ 2584 if (low + carry < low) 2585 high++; 2586 low += carry; 2587 } 2588 2589 if (add) { 2590 /* And now DST[i], and store the new low part there. */ 2591 if (low + dst[i] < low) 2592 high++; 2593 dst[i] += low; 2594 } else 2595 dst[i] = low; 2596 2597 carry = high; 2598 } 2599 2600 if (srcParts < dstParts) { 2601 /* Full multiplication, there is no overflow. */ 2602 assert(srcParts + 1 == dstParts); 2603 dst[srcParts] = carry; 2604 return 0; 2605 } 2606 2607 /* We overflowed if there is carry. */ 2608 if (carry) 2609 return 1; 2610 2611 /* We would overflow if any significant unwritten parts would be 2612 non-zero. This is true if any remaining src parts are non-zero 2613 and the multiplier is non-zero. */ 2614 if (multiplier) 2615 for (unsigned i = dstParts; i < srcParts; i++) 2616 if (src[i]) 2617 return 1; 2618 2619 /* We fitted in the narrow destination. */ 2620 return 0; 2621 } 2622 2623 /* DST = LHS * RHS, where DST has the same width as the operands and 2624 is filled with the least significant parts of the result. Returns 2625 one if overflow occurred, otherwise zero. DST must be disjoint 2626 from both operands. */ 2627 int APInt::tcMultiply(WordType *dst, const WordType *lhs, 2628 const WordType *rhs, unsigned parts) { 2629 assert(dst != lhs && dst != rhs); 2630 2631 int overflow = 0; 2632 tcSet(dst, 0, parts); 2633 2634 for (unsigned i = 0; i < parts; i++) 2635 overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts, 2636 parts - i, true); 2637 2638 return overflow; 2639 } 2640 2641 /// DST = LHS * RHS, where DST has width the sum of the widths of the 2642 /// operands. No overflow occurs. DST must be disjoint from both operands. 2643 void APInt::tcFullMultiply(WordType *dst, const WordType *lhs, 2644 const WordType *rhs, unsigned lhsParts, 2645 unsigned rhsParts) { 2646 /* Put the narrower number on the LHS for less loops below. */ 2647 if (lhsParts > rhsParts) 2648 return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts); 2649 2650 assert(dst != lhs && dst != rhs); 2651 2652 tcSet(dst, 0, rhsParts); 2653 2654 for (unsigned i = 0; i < lhsParts; i++) 2655 tcMultiplyPart(&dst[i], rhs, lhs[i], 0, rhsParts, rhsParts + 1, true); 2656 } 2657 2658 /* If RHS is zero LHS and REMAINDER are left unchanged, return one. 2659 Otherwise set LHS to LHS / RHS with the fractional part discarded, 2660 set REMAINDER to the remainder, return zero. i.e. 2661 2662 OLD_LHS = RHS * LHS + REMAINDER 2663 2664 SCRATCH is a bignum of the same size as the operands and result for 2665 use by the routine; its contents need not be initialized and are 2666 destroyed. LHS, REMAINDER and SCRATCH must be distinct. 2667 */ 2668 int APInt::tcDivide(WordType *lhs, const WordType *rhs, 2669 WordType *remainder, WordType *srhs, 2670 unsigned parts) { 2671 assert(lhs != remainder && lhs != srhs && remainder != srhs); 2672 2673 unsigned shiftCount = tcMSB(rhs, parts) + 1; 2674 if (shiftCount == 0) 2675 return true; 2676 2677 shiftCount = parts * APINT_BITS_PER_WORD - shiftCount; 2678 unsigned n = shiftCount / APINT_BITS_PER_WORD; 2679 WordType mask = (WordType) 1 << (shiftCount % APINT_BITS_PER_WORD); 2680 2681 tcAssign(srhs, rhs, parts); 2682 tcShiftLeft(srhs, parts, shiftCount); 2683 tcAssign(remainder, lhs, parts); 2684 tcSet(lhs, 0, parts); 2685 2686 /* Loop, subtracting SRHS if REMAINDER is greater and adding that to 2687 the total. */ 2688 for (;;) { 2689 int compare = tcCompare(remainder, srhs, parts); 2690 if (compare >= 0) { 2691 tcSubtract(remainder, srhs, 0, parts); 2692 lhs[n] |= mask; 2693 } 2694 2695 if (shiftCount == 0) 2696 break; 2697 shiftCount--; 2698 tcShiftRight(srhs, parts, 1); 2699 if ((mask >>= 1) == 0) { 2700 mask = (WordType) 1 << (APINT_BITS_PER_WORD - 1); 2701 n--; 2702 } 2703 } 2704 2705 return false; 2706 } 2707 2708 /// Shift a bignum left Cound bits in-place. Shifted in bits are zero. There are 2709 /// no restrictions on Count. 2710 void APInt::tcShiftLeft(WordType *Dst, unsigned Words, unsigned Count) { 2711 // Don't bother performing a no-op shift. 2712 if (!Count) 2713 return; 2714 2715 // WordShift is the inter-part shift; BitShift is the intra-part shift. 2716 unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words); 2717 unsigned BitShift = Count % APINT_BITS_PER_WORD; 2718 2719 // Fastpath for moving by whole words. 2720 if (BitShift == 0) { 2721 std::memmove(Dst + WordShift, Dst, (Words - WordShift) * APINT_WORD_SIZE); 2722 } else { 2723 while (Words-- > WordShift) { 2724 Dst[Words] = Dst[Words - WordShift] << BitShift; 2725 if (Words > WordShift) 2726 Dst[Words] |= 2727 Dst[Words - WordShift - 1] >> (APINT_BITS_PER_WORD - BitShift); 2728 } 2729 } 2730 2731 // Fill in the remainder with 0s. 2732 std::memset(Dst, 0, WordShift * APINT_WORD_SIZE); 2733 } 2734 2735 /// Shift a bignum right Count bits in-place. Shifted in bits are zero. There 2736 /// are no restrictions on Count. 2737 void APInt::tcShiftRight(WordType *Dst, unsigned Words, unsigned Count) { 2738 // Don't bother performing a no-op shift. 2739 if (!Count) 2740 return; 2741 2742 // WordShift is the inter-part shift; BitShift is the intra-part shift. 2743 unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words); 2744 unsigned BitShift = Count % APINT_BITS_PER_WORD; 2745 2746 unsigned WordsToMove = Words - WordShift; 2747 // Fastpath for moving by whole words. 2748 if (BitShift == 0) { 2749 std::memmove(Dst, Dst + WordShift, WordsToMove * APINT_WORD_SIZE); 2750 } else { 2751 for (unsigned i = 0; i != WordsToMove; ++i) { 2752 Dst[i] = Dst[i + WordShift] >> BitShift; 2753 if (i + 1 != WordsToMove) 2754 Dst[i] |= Dst[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift); 2755 } 2756 } 2757 2758 // Fill in the remainder with 0s. 2759 std::memset(Dst + WordsToMove, 0, WordShift * APINT_WORD_SIZE); 2760 } 2761 2762 /* Bitwise and of two bignums. */ 2763 void APInt::tcAnd(WordType *dst, const WordType *rhs, unsigned parts) { 2764 for (unsigned i = 0; i < parts; i++) 2765 dst[i] &= rhs[i]; 2766 } 2767 2768 /* Bitwise inclusive or of two bignums. */ 2769 void APInt::tcOr(WordType *dst, const WordType *rhs, unsigned parts) { 2770 for (unsigned i = 0; i < parts; i++) 2771 dst[i] |= rhs[i]; 2772 } 2773 2774 /* Bitwise exclusive or of two bignums. */ 2775 void APInt::tcXor(WordType *dst, const WordType *rhs, unsigned parts) { 2776 for (unsigned i = 0; i < parts; i++) 2777 dst[i] ^= rhs[i]; 2778 } 2779 2780 /* Complement a bignum in-place. */ 2781 void APInt::tcComplement(WordType *dst, unsigned parts) { 2782 for (unsigned i = 0; i < parts; i++) 2783 dst[i] = ~dst[i]; 2784 } 2785 2786 /* Comparison (unsigned) of two bignums. */ 2787 int APInt::tcCompare(const WordType *lhs, const WordType *rhs, 2788 unsigned parts) { 2789 while (parts) { 2790 parts--; 2791 if (lhs[parts] != rhs[parts]) 2792 return (lhs[parts] > rhs[parts]) ? 1 : -1; 2793 } 2794 2795 return 0; 2796 } 2797 2798 /* Set the least significant BITS bits of a bignum, clear the 2799 rest. */ 2800 void APInt::tcSetLeastSignificantBits(WordType *dst, unsigned parts, 2801 unsigned bits) { 2802 unsigned i = 0; 2803 while (bits > APINT_BITS_PER_WORD) { 2804 dst[i++] = ~(WordType) 0; 2805 bits -= APINT_BITS_PER_WORD; 2806 } 2807 2808 if (bits) 2809 dst[i++] = ~(WordType) 0 >> (APINT_BITS_PER_WORD - bits); 2810 2811 while (i < parts) 2812 dst[i++] = 0; 2813 } 2814 2815 APInt llvm::APIntOps::RoundingUDiv(const APInt &A, const APInt &B, 2816 APInt::Rounding RM) { 2817 // Currently udivrem always rounds down. 2818 switch (RM) { 2819 case APInt::Rounding::DOWN: 2820 case APInt::Rounding::TOWARD_ZERO: 2821 return A.udiv(B); 2822 case APInt::Rounding::UP: { 2823 APInt Quo, Rem; 2824 APInt::udivrem(A, B, Quo, Rem); 2825 if (Rem == 0) 2826 return Quo; 2827 return Quo + 1; 2828 } 2829 } 2830 llvm_unreachable("Unknown APInt::Rounding enum"); 2831 } 2832 2833 APInt llvm::APIntOps::RoundingSDiv(const APInt &A, const APInt &B, 2834 APInt::Rounding RM) { 2835 switch (RM) { 2836 case APInt::Rounding::DOWN: 2837 case APInt::Rounding::UP: { 2838 APInt Quo, Rem; 2839 APInt::sdivrem(A, B, Quo, Rem); 2840 if (Rem == 0) 2841 return Quo; 2842 // This algorithm deals with arbitrary rounding mode used by sdivrem. 2843 // We want to check whether the non-integer part of the mathematical value 2844 // is negative or not. If the non-integer part is negative, we need to round 2845 // down from Quo; otherwise, if it's positive or 0, we return Quo, as it's 2846 // already rounded down. 2847 if (RM == APInt::Rounding::DOWN) { 2848 if (Rem.isNegative() != B.isNegative()) 2849 return Quo - 1; 2850 return Quo; 2851 } 2852 if (Rem.isNegative() != B.isNegative()) 2853 return Quo; 2854 return Quo + 1; 2855 } 2856 // Currently sdiv rounds towards zero. 2857 case APInt::Rounding::TOWARD_ZERO: 2858 return A.sdiv(B); 2859 } 2860 llvm_unreachable("Unknown APInt::Rounding enum"); 2861 } 2862 2863 Optional<APInt> 2864 llvm::APIntOps::SolveQuadraticEquationWrap(APInt A, APInt B, APInt C, 2865 unsigned RangeWidth) { 2866 unsigned CoeffWidth = A.getBitWidth(); 2867 assert(CoeffWidth == B.getBitWidth() && CoeffWidth == C.getBitWidth()); 2868 assert(RangeWidth <= CoeffWidth && 2869 "Value range width should be less than coefficient width"); 2870 assert(RangeWidth > 1 && "Value range bit width should be > 1"); 2871 2872 LLVM_DEBUG(dbgs() << __func__ << ": solving " << A << "x^2 + " << B 2873 << "x + " << C << ", rw:" << RangeWidth << '\n'); 2874 2875 // Identify 0 as a (non)solution immediately. 2876 if (C.sextOrTrunc(RangeWidth).isNullValue() ) { 2877 LLVM_DEBUG(dbgs() << __func__ << ": zero solution\n"); 2878 return APInt(CoeffWidth, 0); 2879 } 2880 2881 // The result of APInt arithmetic has the same bit width as the operands, 2882 // so it can actually lose high bits. A product of two n-bit integers needs 2883 // 2n-1 bits to represent the full value. 2884 // The operation done below (on quadratic coefficients) that can produce 2885 // the largest value is the evaluation of the equation during bisection, 2886 // which needs 3 times the bitwidth of the coefficient, so the total number 2887 // of required bits is 3n. 2888 // 2889 // The purpose of this extension is to simulate the set Z of all integers, 2890 // where n+1 > n for all n in Z. In Z it makes sense to talk about positive 2891 // and negative numbers (not so much in a modulo arithmetic). The method 2892 // used to solve the equation is based on the standard formula for real 2893 // numbers, and uses the concepts of "positive" and "negative" with their 2894 // usual meanings. 2895 CoeffWidth *= 3; 2896 A = A.sext(CoeffWidth); 2897 B = B.sext(CoeffWidth); 2898 C = C.sext(CoeffWidth); 2899 2900 // Make A > 0 for simplicity. Negate cannot overflow at this point because 2901 // the bit width has increased. 2902 if (A.isNegative()) { 2903 A.negate(); 2904 B.negate(); 2905 C.negate(); 2906 } 2907 2908 // Solving an equation q(x) = 0 with coefficients in modular arithmetic 2909 // is really solving a set of equations q(x) = kR for k = 0, 1, 2, ..., 2910 // and R = 2^BitWidth. 2911 // Since we're trying not only to find exact solutions, but also values 2912 // that "wrap around", such a set will always have a solution, i.e. an x 2913 // that satisfies at least one of the equations, or such that |q(x)| 2914 // exceeds kR, while |q(x-1)| for the same k does not. 2915 // 2916 // We need to find a value k, such that Ax^2 + Bx + C = kR will have a 2917 // positive solution n (in the above sense), and also such that the n 2918 // will be the least among all solutions corresponding to k = 0, 1, ... 2919 // (more precisely, the least element in the set 2920 // { n(k) | k is such that a solution n(k) exists }). 2921 // 2922 // Consider the parabola (over real numbers) that corresponds to the 2923 // quadratic equation. Since A > 0, the arms of the parabola will point 2924 // up. Picking different values of k will shift it up and down by R. 2925 // 2926 // We want to shift the parabola in such a way as to reduce the problem 2927 // of solving q(x) = kR to solving shifted_q(x) = 0. 2928 // (The interesting solutions are the ceilings of the real number 2929 // solutions.) 2930 APInt R = APInt::getOneBitSet(CoeffWidth, RangeWidth); 2931 APInt TwoA = 2 * A; 2932 APInt SqrB = B * B; 2933 bool PickLow; 2934 2935 auto RoundUp = [] (const APInt &V, const APInt &A) -> APInt { 2936 assert(A.isStrictlyPositive()); 2937 APInt T = V.abs().urem(A); 2938 if (T.isNullValue()) 2939 return V; 2940 return V.isNegative() ? V+T : V+(A-T); 2941 }; 2942 2943 // The vertex of the parabola is at -B/2A, but since A > 0, it's negative 2944 // iff B is positive. 2945 if (B.isNonNegative()) { 2946 // If B >= 0, the vertex it at a negative location (or at 0), so in 2947 // order to have a non-negative solution we need to pick k that makes 2948 // C-kR negative. To satisfy all the requirements for the solution 2949 // that we are looking for, it needs to be closest to 0 of all k. 2950 C = C.srem(R); 2951 if (C.isStrictlyPositive()) 2952 C -= R; 2953 // Pick the greater solution. 2954 PickLow = false; 2955 } else { 2956 // If B < 0, the vertex is at a positive location. For any solution 2957 // to exist, the discriminant must be non-negative. This means that 2958 // C-kR <= B^2/4A is a necessary condition for k, i.e. there is a 2959 // lower bound on values of k: kR >= C - B^2/4A. 2960 APInt LowkR = C - SqrB.udiv(2*TwoA); // udiv because all values > 0. 2961 // Round LowkR up (towards +inf) to the nearest kR. 2962 LowkR = RoundUp(LowkR, R); 2963 2964 // If there exists k meeting the condition above, and such that 2965 // C-kR > 0, there will be two positive real number solutions of 2966 // q(x) = kR. Out of all such values of k, pick the one that makes 2967 // C-kR closest to 0, (i.e. pick maximum k such that C-kR > 0). 2968 // In other words, find maximum k such that LowkR <= kR < C. 2969 if (C.sgt(LowkR)) { 2970 // If LowkR < C, then such a k is guaranteed to exist because 2971 // LowkR itself is a multiple of R. 2972 C -= -RoundUp(-C, R); // C = C - RoundDown(C, R) 2973 // Pick the smaller solution. 2974 PickLow = true; 2975 } else { 2976 // If C-kR < 0 for all potential k's, it means that one solution 2977 // will be negative, while the other will be positive. The positive 2978 // solution will shift towards 0 if the parabola is moved up. 2979 // Pick the kR closest to the lower bound (i.e. make C-kR closest 2980 // to 0, or in other words, out of all parabolas that have solutions, 2981 // pick the one that is the farthest "up"). 2982 // Since LowkR is itself a multiple of R, simply take C-LowkR. 2983 C -= LowkR; 2984 // Pick the greater solution. 2985 PickLow = false; 2986 } 2987 } 2988 2989 LLVM_DEBUG(dbgs() << __func__ << ": updated coefficients " << A << "x^2 + " 2990 << B << "x + " << C << ", rw:" << RangeWidth << '\n'); 2991 2992 APInt D = SqrB - 4*A*C; 2993 assert(D.isNonNegative() && "Negative discriminant"); 2994 APInt SQ = D.sqrt(); 2995 2996 APInt Q = SQ * SQ; 2997 bool InexactSQ = Q != D; 2998 // The calculated SQ may actually be greater than the exact (non-integer) 2999 // value. If that's the case, decrement SQ to get a value that is lower. 3000 if (Q.sgt(D)) 3001 SQ -= 1; 3002 3003 APInt X; 3004 APInt Rem; 3005 3006 // SQ is rounded down (i.e SQ * SQ <= D), so the roots may be inexact. 3007 // When using the quadratic formula directly, the calculated low root 3008 // may be greater than the exact one, since we would be subtracting SQ. 3009 // To make sure that the calculated root is not greater than the exact 3010 // one, subtract SQ+1 when calculating the low root (for inexact value 3011 // of SQ). 3012 if (PickLow) 3013 APInt::sdivrem(-B - (SQ+InexactSQ), TwoA, X, Rem); 3014 else 3015 APInt::sdivrem(-B + SQ, TwoA, X, Rem); 3016 3017 // The updated coefficients should be such that the (exact) solution is 3018 // positive. Since APInt division rounds towards 0, the calculated one 3019 // can be 0, but cannot be negative. 3020 assert(X.isNonNegative() && "Solution should be non-negative"); 3021 3022 if (!InexactSQ && Rem.isNullValue()) { 3023 LLVM_DEBUG(dbgs() << __func__ << ": solution (root): " << X << '\n'); 3024 return X; 3025 } 3026 3027 assert((SQ*SQ).sle(D) && "SQ = |_sqrt(D)_|, so SQ*SQ <= D"); 3028 // The exact value of the square root of D should be between SQ and SQ+1. 3029 // This implies that the solution should be between that corresponding to 3030 // SQ (i.e. X) and that corresponding to SQ+1. 3031 // 3032 // The calculated X cannot be greater than the exact (real) solution. 3033 // Actually it must be strictly less than the exact solution, while 3034 // X+1 will be greater than or equal to it. 3035 3036 APInt VX = (A*X + B)*X + C; 3037 APInt VY = VX + TwoA*X + A + B; 3038 bool SignChange = VX.isNegative() != VY.isNegative() || 3039 VX.isNullValue() != VY.isNullValue(); 3040 // If the sign did not change between X and X+1, X is not a valid solution. 3041 // This could happen when the actual (exact) roots don't have an integer 3042 // between them, so they would both be contained between X and X+1. 3043 if (!SignChange) { 3044 LLVM_DEBUG(dbgs() << __func__ << ": no valid solution\n"); 3045 return None; 3046 } 3047 3048 X += 1; 3049 LLVM_DEBUG(dbgs() << __func__ << ": solution (wrap): " << X << '\n'); 3050 return X; 3051 } 3052 3053 Optional<unsigned> 3054 llvm::APIntOps::GetMostSignificantDifferentBit(const APInt &A, const APInt &B) { 3055 assert(A.getBitWidth() == B.getBitWidth() && "Must have the same bitwidth"); 3056 if (A == B) 3057 return llvm::None; 3058 return A.getBitWidth() - ((A ^ B).countLeadingZeros() + 1); 3059 } 3060 3061 /// StoreIntToMemory - Fills the StoreBytes bytes of memory starting from Dst 3062 /// with the integer held in IntVal. 3063 void llvm::StoreIntToMemory(const APInt &IntVal, uint8_t *Dst, 3064 unsigned StoreBytes) { 3065 assert((IntVal.getBitWidth()+7)/8 >= StoreBytes && "Integer too small!"); 3066 const uint8_t *Src = (const uint8_t *)IntVal.getRawData(); 3067 3068 if (sys::IsLittleEndianHost) { 3069 // Little-endian host - the source is ordered from LSB to MSB. Order the 3070 // destination from LSB to MSB: Do a straight copy. 3071 memcpy(Dst, Src, StoreBytes); 3072 } else { 3073 // Big-endian host - the source is an array of 64 bit words ordered from 3074 // LSW to MSW. Each word is ordered from MSB to LSB. Order the destination 3075 // from MSB to LSB: Reverse the word order, but not the bytes in a word. 3076 while (StoreBytes > sizeof(uint64_t)) { 3077 StoreBytes -= sizeof(uint64_t); 3078 // May not be aligned so use memcpy. 3079 memcpy(Dst + StoreBytes, Src, sizeof(uint64_t)); 3080 Src += sizeof(uint64_t); 3081 } 3082 3083 memcpy(Dst, Src + sizeof(uint64_t) - StoreBytes, StoreBytes); 3084 } 3085 } 3086 3087 /// LoadIntFromMemory - Loads the integer stored in the LoadBytes bytes starting 3088 /// from Src into IntVal, which is assumed to be wide enough and to hold zero. 3089 void llvm::LoadIntFromMemory(APInt &IntVal, const uint8_t *Src, 3090 unsigned LoadBytes) { 3091 assert((IntVal.getBitWidth()+7)/8 >= LoadBytes && "Integer too small!"); 3092 uint8_t *Dst = reinterpret_cast<uint8_t *>( 3093 const_cast<uint64_t *>(IntVal.getRawData())); 3094 3095 if (sys::IsLittleEndianHost) 3096 // Little-endian host - the destination must be ordered from LSB to MSB. 3097 // The source is ordered from LSB to MSB: Do a straight copy. 3098 memcpy(Dst, Src, LoadBytes); 3099 else { 3100 // Big-endian - the destination is an array of 64 bit words ordered from 3101 // LSW to MSW. Each word must be ordered from MSB to LSB. The source is 3102 // ordered from MSB to LSB: Reverse the word order, but not the bytes in 3103 // a word. 3104 while (LoadBytes > sizeof(uint64_t)) { 3105 LoadBytes -= sizeof(uint64_t); 3106 // May not be aligned so use memcpy. 3107 memcpy(Dst, Src + LoadBytes, sizeof(uint64_t)); 3108 Dst += sizeof(uint64_t); 3109 } 3110 3111 memcpy(Dst + sizeof(uint64_t) - LoadBytes, Src, LoadBytes); 3112 } 3113 } 3114