xref: /freebsd/contrib/llvm-project/llvm/lib/Support/APInt.cpp (revision a85404906bc8f402318524b4ccd196712fc09fbd)
1  //===-- APInt.cpp - Implement APInt class ---------------------------------===//
2  //
3  // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  // See https://llvm.org/LICENSE.txt for license information.
5  // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  //
7  //===----------------------------------------------------------------------===//
8  //
9  // This file implements a class to represent arbitrary precision integer
10  // constant values and provide a variety of arithmetic operations on them.
11  //
12  //===----------------------------------------------------------------------===//
13  
14  #include "llvm/ADT/APInt.h"
15  #include "llvm/ADT/ArrayRef.h"
16  #include "llvm/ADT/FoldingSet.h"
17  #include "llvm/ADT/Hashing.h"
18  #include "llvm/ADT/Optional.h"
19  #include "llvm/ADT/SmallString.h"
20  #include "llvm/ADT/StringRef.h"
21  #include "llvm/ADT/bit.h"
22  #include "llvm/Config/llvm-config.h"
23  #include "llvm/Support/Debug.h"
24  #include "llvm/Support/ErrorHandling.h"
25  #include "llvm/Support/MathExtras.h"
26  #include "llvm/Support/raw_ostream.h"
27  #include <climits>
28  #include <cmath>
29  #include <cstdlib>
30  #include <cstring>
31  using namespace llvm;
32  
33  #define DEBUG_TYPE "apint"
34  
35  /// A utility function for allocating memory, checking for allocation failures,
36  /// and ensuring the contents are zeroed.
37  inline static uint64_t* getClearedMemory(unsigned numWords) {
38    uint64_t *result = new uint64_t[numWords];
39    memset(result, 0, numWords * sizeof(uint64_t));
40    return result;
41  }
42  
43  /// A utility function for allocating memory and checking for allocation
44  /// failure.  The content is not zeroed.
45  inline static uint64_t* getMemory(unsigned numWords) {
46    return new uint64_t[numWords];
47  }
48  
49  /// A utility function that converts a character to a digit.
50  inline static unsigned getDigit(char cdigit, uint8_t radix) {
51    unsigned r;
52  
53    if (radix == 16 || radix == 36) {
54      r = cdigit - '0';
55      if (r <= 9)
56        return r;
57  
58      r = cdigit - 'A';
59      if (r <= radix - 11U)
60        return r + 10;
61  
62      r = cdigit - 'a';
63      if (r <= radix - 11U)
64        return r + 10;
65  
66      radix = 10;
67    }
68  
69    r = cdigit - '0';
70    if (r < radix)
71      return r;
72  
73    return -1U;
74  }
75  
76  
77  void APInt::initSlowCase(uint64_t val, bool isSigned) {
78    U.pVal = getClearedMemory(getNumWords());
79    U.pVal[0] = val;
80    if (isSigned && int64_t(val) < 0)
81      for (unsigned i = 1; i < getNumWords(); ++i)
82        U.pVal[i] = WORDTYPE_MAX;
83    clearUnusedBits();
84  }
85  
86  void APInt::initSlowCase(const APInt& that) {
87    U.pVal = getMemory(getNumWords());
88    memcpy(U.pVal, that.U.pVal, getNumWords() * APINT_WORD_SIZE);
89  }
90  
91  void APInt::initFromArray(ArrayRef<uint64_t> bigVal) {
92    assert(BitWidth && "Bitwidth too small");
93    assert(bigVal.data() && "Null pointer detected!");
94    if (isSingleWord())
95      U.VAL = bigVal[0];
96    else {
97      // Get memory, cleared to 0
98      U.pVal = getClearedMemory(getNumWords());
99      // Calculate the number of words to copy
100      unsigned words = std::min<unsigned>(bigVal.size(), getNumWords());
101      // Copy the words from bigVal to pVal
102      memcpy(U.pVal, bigVal.data(), words * APINT_WORD_SIZE);
103    }
104    // Make sure unused high bits are cleared
105    clearUnusedBits();
106  }
107  
108  APInt::APInt(unsigned numBits, ArrayRef<uint64_t> bigVal)
109    : BitWidth(numBits) {
110    initFromArray(bigVal);
111  }
112  
113  APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[])
114    : BitWidth(numBits) {
115    initFromArray(makeArrayRef(bigVal, numWords));
116  }
117  
118  APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix)
119    : BitWidth(numbits) {
120    assert(BitWidth && "Bitwidth too small");
121    fromString(numbits, Str, radix);
122  }
123  
124  void APInt::reallocate(unsigned NewBitWidth) {
125    // If the number of words is the same we can just change the width and stop.
126    if (getNumWords() == getNumWords(NewBitWidth)) {
127      BitWidth = NewBitWidth;
128      return;
129    }
130  
131    // If we have an allocation, delete it.
132    if (!isSingleWord())
133      delete [] U.pVal;
134  
135    // Update BitWidth.
136    BitWidth = NewBitWidth;
137  
138    // If we are supposed to have an allocation, create it.
139    if (!isSingleWord())
140      U.pVal = getMemory(getNumWords());
141  }
142  
143  void APInt::AssignSlowCase(const APInt& RHS) {
144    // Don't do anything for X = X
145    if (this == &RHS)
146      return;
147  
148    // Adjust the bit width and handle allocations as necessary.
149    reallocate(RHS.getBitWidth());
150  
151    // Copy the data.
152    if (isSingleWord())
153      U.VAL = RHS.U.VAL;
154    else
155      memcpy(U.pVal, RHS.U.pVal, getNumWords() * APINT_WORD_SIZE);
156  }
157  
158  /// This method 'profiles' an APInt for use with FoldingSet.
159  void APInt::Profile(FoldingSetNodeID& ID) const {
160    ID.AddInteger(BitWidth);
161  
162    if (isSingleWord()) {
163      ID.AddInteger(U.VAL);
164      return;
165    }
166  
167    unsigned NumWords = getNumWords();
168    for (unsigned i = 0; i < NumWords; ++i)
169      ID.AddInteger(U.pVal[i]);
170  }
171  
172  /// Prefix increment operator. Increments the APInt by one.
173  APInt& APInt::operator++() {
174    if (isSingleWord())
175      ++U.VAL;
176    else
177      tcIncrement(U.pVal, getNumWords());
178    return clearUnusedBits();
179  }
180  
181  /// Prefix decrement operator. Decrements the APInt by one.
182  APInt& APInt::operator--() {
183    if (isSingleWord())
184      --U.VAL;
185    else
186      tcDecrement(U.pVal, getNumWords());
187    return clearUnusedBits();
188  }
189  
190  /// Adds the RHS APInt to this APInt.
191  /// @returns this, after addition of RHS.
192  /// Addition assignment operator.
193  APInt& APInt::operator+=(const APInt& RHS) {
194    assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
195    if (isSingleWord())
196      U.VAL += RHS.U.VAL;
197    else
198      tcAdd(U.pVal, RHS.U.pVal, 0, getNumWords());
199    return clearUnusedBits();
200  }
201  
202  APInt& APInt::operator+=(uint64_t RHS) {
203    if (isSingleWord())
204      U.VAL += RHS;
205    else
206      tcAddPart(U.pVal, RHS, getNumWords());
207    return clearUnusedBits();
208  }
209  
210  /// Subtracts the RHS APInt from this APInt
211  /// @returns this, after subtraction
212  /// Subtraction assignment operator.
213  APInt& APInt::operator-=(const APInt& RHS) {
214    assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
215    if (isSingleWord())
216      U.VAL -= RHS.U.VAL;
217    else
218      tcSubtract(U.pVal, RHS.U.pVal, 0, getNumWords());
219    return clearUnusedBits();
220  }
221  
222  APInt& APInt::operator-=(uint64_t RHS) {
223    if (isSingleWord())
224      U.VAL -= RHS;
225    else
226      tcSubtractPart(U.pVal, RHS, getNumWords());
227    return clearUnusedBits();
228  }
229  
230  APInt APInt::operator*(const APInt& RHS) const {
231    assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
232    if (isSingleWord())
233      return APInt(BitWidth, U.VAL * RHS.U.VAL);
234  
235    APInt Result(getMemory(getNumWords()), getBitWidth());
236  
237    tcMultiply(Result.U.pVal, U.pVal, RHS.U.pVal, getNumWords());
238  
239    Result.clearUnusedBits();
240    return Result;
241  }
242  
243  void APInt::AndAssignSlowCase(const APInt& RHS) {
244    tcAnd(U.pVal, RHS.U.pVal, getNumWords());
245  }
246  
247  void APInt::OrAssignSlowCase(const APInt& RHS) {
248    tcOr(U.pVal, RHS.U.pVal, getNumWords());
249  }
250  
251  void APInt::XorAssignSlowCase(const APInt& RHS) {
252    tcXor(U.pVal, RHS.U.pVal, getNumWords());
253  }
254  
255  APInt& APInt::operator*=(const APInt& RHS) {
256    assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
257    *this = *this * RHS;
258    return *this;
259  }
260  
261  APInt& APInt::operator*=(uint64_t RHS) {
262    if (isSingleWord()) {
263      U.VAL *= RHS;
264    } else {
265      unsigned NumWords = getNumWords();
266      tcMultiplyPart(U.pVal, U.pVal, RHS, 0, NumWords, NumWords, false);
267    }
268    return clearUnusedBits();
269  }
270  
271  bool APInt::EqualSlowCase(const APInt& RHS) const {
272    return std::equal(U.pVal, U.pVal + getNumWords(), RHS.U.pVal);
273  }
274  
275  int APInt::compare(const APInt& RHS) const {
276    assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
277    if (isSingleWord())
278      return U.VAL < RHS.U.VAL ? -1 : U.VAL > RHS.U.VAL;
279  
280    return tcCompare(U.pVal, RHS.U.pVal, getNumWords());
281  }
282  
283  int APInt::compareSigned(const APInt& RHS) const {
284    assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
285    if (isSingleWord()) {
286      int64_t lhsSext = SignExtend64(U.VAL, BitWidth);
287      int64_t rhsSext = SignExtend64(RHS.U.VAL, BitWidth);
288      return lhsSext < rhsSext ? -1 : lhsSext > rhsSext;
289    }
290  
291    bool lhsNeg = isNegative();
292    bool rhsNeg = RHS.isNegative();
293  
294    // If the sign bits don't match, then (LHS < RHS) if LHS is negative
295    if (lhsNeg != rhsNeg)
296      return lhsNeg ? -1 : 1;
297  
298    // Otherwise we can just use an unsigned comparison, because even negative
299    // numbers compare correctly this way if both have the same signed-ness.
300    return tcCompare(U.pVal, RHS.U.pVal, getNumWords());
301  }
302  
303  void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) {
304    unsigned loWord = whichWord(loBit);
305    unsigned hiWord = whichWord(hiBit);
306  
307    // Create an initial mask for the low word with zeros below loBit.
308    uint64_t loMask = WORDTYPE_MAX << whichBit(loBit);
309  
310    // If hiBit is not aligned, we need a high mask.
311    unsigned hiShiftAmt = whichBit(hiBit);
312    if (hiShiftAmt != 0) {
313      // Create a high mask with zeros above hiBit.
314      uint64_t hiMask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - hiShiftAmt);
315      // If loWord and hiWord are equal, then we combine the masks. Otherwise,
316      // set the bits in hiWord.
317      if (hiWord == loWord)
318        loMask &= hiMask;
319      else
320        U.pVal[hiWord] |= hiMask;
321    }
322    // Apply the mask to the low word.
323    U.pVal[loWord] |= loMask;
324  
325    // Fill any words between loWord and hiWord with all ones.
326    for (unsigned word = loWord + 1; word < hiWord; ++word)
327      U.pVal[word] = WORDTYPE_MAX;
328  }
329  
330  /// Toggle every bit to its opposite value.
331  void APInt::flipAllBitsSlowCase() {
332    tcComplement(U.pVal, getNumWords());
333    clearUnusedBits();
334  }
335  
336  /// Toggle a given bit to its opposite value whose position is given
337  /// as "bitPosition".
338  /// Toggles a given bit to its opposite value.
339  void APInt::flipBit(unsigned bitPosition) {
340    assert(bitPosition < BitWidth && "Out of the bit-width range!");
341    setBitVal(bitPosition, !(*this)[bitPosition]);
342  }
343  
344  void APInt::insertBits(const APInt &subBits, unsigned bitPosition) {
345    unsigned subBitWidth = subBits.getBitWidth();
346    assert(0 < subBitWidth && (subBitWidth + bitPosition) <= BitWidth &&
347           "Illegal bit insertion");
348  
349    // Insertion is a direct copy.
350    if (subBitWidth == BitWidth) {
351      *this = subBits;
352      return;
353    }
354  
355    // Single word result can be done as a direct bitmask.
356    if (isSingleWord()) {
357      uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
358      U.VAL &= ~(mask << bitPosition);
359      U.VAL |= (subBits.U.VAL << bitPosition);
360      return;
361    }
362  
363    unsigned loBit = whichBit(bitPosition);
364    unsigned loWord = whichWord(bitPosition);
365    unsigned hi1Word = whichWord(bitPosition + subBitWidth - 1);
366  
367    // Insertion within a single word can be done as a direct bitmask.
368    if (loWord == hi1Word) {
369      uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
370      U.pVal[loWord] &= ~(mask << loBit);
371      U.pVal[loWord] |= (subBits.U.VAL << loBit);
372      return;
373    }
374  
375    // Insert on word boundaries.
376    if (loBit == 0) {
377      // Direct copy whole words.
378      unsigned numWholeSubWords = subBitWidth / APINT_BITS_PER_WORD;
379      memcpy(U.pVal + loWord, subBits.getRawData(),
380             numWholeSubWords * APINT_WORD_SIZE);
381  
382      // Mask+insert remaining bits.
383      unsigned remainingBits = subBitWidth % APINT_BITS_PER_WORD;
384      if (remainingBits != 0) {
385        uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - remainingBits);
386        U.pVal[hi1Word] &= ~mask;
387        U.pVal[hi1Word] |= subBits.getWord(subBitWidth - 1);
388      }
389      return;
390    }
391  
392    // General case - set/clear individual bits in dst based on src.
393    // TODO - there is scope for optimization here, but at the moment this code
394    // path is barely used so prefer readability over performance.
395    for (unsigned i = 0; i != subBitWidth; ++i)
396      setBitVal(bitPosition + i, subBits[i]);
397  }
398  
399  void APInt::insertBits(uint64_t subBits, unsigned bitPosition, unsigned numBits) {
400    uint64_t maskBits = maskTrailingOnes<uint64_t>(numBits);
401    subBits &= maskBits;
402    if (isSingleWord()) {
403      U.VAL &= ~(maskBits << bitPosition);
404      U.VAL |= subBits << bitPosition;
405      return;
406    }
407  
408    unsigned loBit = whichBit(bitPosition);
409    unsigned loWord = whichWord(bitPosition);
410    unsigned hiWord = whichWord(bitPosition + numBits - 1);
411    if (loWord == hiWord) {
412      U.pVal[loWord] &= ~(maskBits << loBit);
413      U.pVal[loWord] |= subBits << loBit;
414      return;
415    }
416  
417    static_assert(8 * sizeof(WordType) <= 64, "This code assumes only two words affected");
418    unsigned wordBits = 8 * sizeof(WordType);
419    U.pVal[loWord] &= ~(maskBits << loBit);
420    U.pVal[loWord] |= subBits << loBit;
421  
422    U.pVal[hiWord] &= ~(maskBits >> (wordBits - loBit));
423    U.pVal[hiWord] |= subBits >> (wordBits - loBit);
424  }
425  
426  APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const {
427    assert(numBits > 0 && "Can't extract zero bits");
428    assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth &&
429           "Illegal bit extraction");
430  
431    if (isSingleWord())
432      return APInt(numBits, U.VAL >> bitPosition);
433  
434    unsigned loBit = whichBit(bitPosition);
435    unsigned loWord = whichWord(bitPosition);
436    unsigned hiWord = whichWord(bitPosition + numBits - 1);
437  
438    // Single word result extracting bits from a single word source.
439    if (loWord == hiWord)
440      return APInt(numBits, U.pVal[loWord] >> loBit);
441  
442    // Extracting bits that start on a source word boundary can be done
443    // as a fast memory copy.
444    if (loBit == 0)
445      return APInt(numBits, makeArrayRef(U.pVal + loWord, 1 + hiWord - loWord));
446  
447    // General case - shift + copy source words directly into place.
448    APInt Result(numBits, 0);
449    unsigned NumSrcWords = getNumWords();
450    unsigned NumDstWords = Result.getNumWords();
451  
452    uint64_t *DestPtr = Result.isSingleWord() ? &Result.U.VAL : Result.U.pVal;
453    for (unsigned word = 0; word < NumDstWords; ++word) {
454      uint64_t w0 = U.pVal[loWord + word];
455      uint64_t w1 =
456          (loWord + word + 1) < NumSrcWords ? U.pVal[loWord + word + 1] : 0;
457      DestPtr[word] = (w0 >> loBit) | (w1 << (APINT_BITS_PER_WORD - loBit));
458    }
459  
460    return Result.clearUnusedBits();
461  }
462  
463  uint64_t APInt::extractBitsAsZExtValue(unsigned numBits,
464                                         unsigned bitPosition) const {
465    assert(numBits > 0 && "Can't extract zero bits");
466    assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth &&
467           "Illegal bit extraction");
468    assert(numBits <= 64 && "Illegal bit extraction");
469  
470    uint64_t maskBits = maskTrailingOnes<uint64_t>(numBits);
471    if (isSingleWord())
472      return (U.VAL >> bitPosition) & maskBits;
473  
474    unsigned loBit = whichBit(bitPosition);
475    unsigned loWord = whichWord(bitPosition);
476    unsigned hiWord = whichWord(bitPosition + numBits - 1);
477    if (loWord == hiWord)
478      return (U.pVal[loWord] >> loBit) & maskBits;
479  
480    static_assert(8 * sizeof(WordType) <= 64, "This code assumes only two words affected");
481    unsigned wordBits = 8 * sizeof(WordType);
482    uint64_t retBits = U.pVal[loWord] >> loBit;
483    retBits |= U.pVal[hiWord] << (wordBits - loBit);
484    retBits &= maskBits;
485    return retBits;
486  }
487  
488  unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
489    assert(!str.empty() && "Invalid string length");
490    assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
491            radix == 36) &&
492           "Radix should be 2, 8, 10, 16, or 36!");
493  
494    size_t slen = str.size();
495  
496    // Each computation below needs to know if it's negative.
497    StringRef::iterator p = str.begin();
498    unsigned isNegative = *p == '-';
499    if (*p == '-' || *p == '+') {
500      p++;
501      slen--;
502      assert(slen && "String is only a sign, needs a value.");
503    }
504  
505    // For radixes of power-of-two values, the bits required is accurately and
506    // easily computed
507    if (radix == 2)
508      return slen + isNegative;
509    if (radix == 8)
510      return slen * 3 + isNegative;
511    if (radix == 16)
512      return slen * 4 + isNegative;
513  
514    // FIXME: base 36
515  
516    // This is grossly inefficient but accurate. We could probably do something
517    // with a computation of roughly slen*64/20 and then adjust by the value of
518    // the first few digits. But, I'm not sure how accurate that could be.
519  
520    // Compute a sufficient number of bits that is always large enough but might
521    // be too large. This avoids the assertion in the constructor. This
522    // calculation doesn't work appropriately for the numbers 0-9, so just use 4
523    // bits in that case.
524    unsigned sufficient
525      = radix == 10? (slen == 1 ? 4 : slen * 64/18)
526                   : (slen == 1 ? 7 : slen * 16/3);
527  
528    // Convert to the actual binary value.
529    APInt tmp(sufficient, StringRef(p, slen), radix);
530  
531    // Compute how many bits are required. If the log is infinite, assume we need
532    // just bit. If the log is exact and value is negative, then the value is
533    // MinSignedValue with (log + 1) bits.
534    unsigned log = tmp.logBase2();
535    if (log == (unsigned)-1) {
536      return isNegative + 1;
537    } else if (isNegative && tmp.isPowerOf2()) {
538      return isNegative + log;
539    } else {
540      return isNegative + log + 1;
541    }
542  }
543  
544  hash_code llvm::hash_value(const APInt &Arg) {
545    if (Arg.isSingleWord())
546      return hash_combine(Arg.BitWidth, Arg.U.VAL);
547  
548    return hash_combine(
549        Arg.BitWidth,
550        hash_combine_range(Arg.U.pVal, Arg.U.pVal + Arg.getNumWords()));
551  }
552  
553  bool APInt::isSplat(unsigned SplatSizeInBits) const {
554    assert(getBitWidth() % SplatSizeInBits == 0 &&
555           "SplatSizeInBits must divide width!");
556    // We can check that all parts of an integer are equal by making use of a
557    // little trick: rotate and check if it's still the same value.
558    return *this == rotl(SplatSizeInBits);
559  }
560  
561  /// This function returns the high "numBits" bits of this APInt.
562  APInt APInt::getHiBits(unsigned numBits) const {
563    return this->lshr(BitWidth - numBits);
564  }
565  
566  /// This function returns the low "numBits" bits of this APInt.
567  APInt APInt::getLoBits(unsigned numBits) const {
568    APInt Result(getLowBitsSet(BitWidth, numBits));
569    Result &= *this;
570    return Result;
571  }
572  
573  /// Return a value containing V broadcasted over NewLen bits.
574  APInt APInt::getSplat(unsigned NewLen, const APInt &V) {
575    assert(NewLen >= V.getBitWidth() && "Can't splat to smaller bit width!");
576  
577    APInt Val = V.zextOrSelf(NewLen);
578    for (unsigned I = V.getBitWidth(); I < NewLen; I <<= 1)
579      Val |= Val << I;
580  
581    return Val;
582  }
583  
584  unsigned APInt::countLeadingZerosSlowCase() const {
585    unsigned Count = 0;
586    for (int i = getNumWords()-1; i >= 0; --i) {
587      uint64_t V = U.pVal[i];
588      if (V == 0)
589        Count += APINT_BITS_PER_WORD;
590      else {
591        Count += llvm::countLeadingZeros(V);
592        break;
593      }
594    }
595    // Adjust for unused bits in the most significant word (they are zero).
596    unsigned Mod = BitWidth % APINT_BITS_PER_WORD;
597    Count -= Mod > 0 ? APINT_BITS_PER_WORD - Mod : 0;
598    return Count;
599  }
600  
601  unsigned APInt::countLeadingOnesSlowCase() const {
602    unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD;
603    unsigned shift;
604    if (!highWordBits) {
605      highWordBits = APINT_BITS_PER_WORD;
606      shift = 0;
607    } else {
608      shift = APINT_BITS_PER_WORD - highWordBits;
609    }
610    int i = getNumWords() - 1;
611    unsigned Count = llvm::countLeadingOnes(U.pVal[i] << shift);
612    if (Count == highWordBits) {
613      for (i--; i >= 0; --i) {
614        if (U.pVal[i] == WORDTYPE_MAX)
615          Count += APINT_BITS_PER_WORD;
616        else {
617          Count += llvm::countLeadingOnes(U.pVal[i]);
618          break;
619        }
620      }
621    }
622    return Count;
623  }
624  
625  unsigned APInt::countTrailingZerosSlowCase() const {
626    unsigned Count = 0;
627    unsigned i = 0;
628    for (; i < getNumWords() && U.pVal[i] == 0; ++i)
629      Count += APINT_BITS_PER_WORD;
630    if (i < getNumWords())
631      Count += llvm::countTrailingZeros(U.pVal[i]);
632    return std::min(Count, BitWidth);
633  }
634  
635  unsigned APInt::countTrailingOnesSlowCase() const {
636    unsigned Count = 0;
637    unsigned i = 0;
638    for (; i < getNumWords() && U.pVal[i] == WORDTYPE_MAX; ++i)
639      Count += APINT_BITS_PER_WORD;
640    if (i < getNumWords())
641      Count += llvm::countTrailingOnes(U.pVal[i]);
642    assert(Count <= BitWidth);
643    return Count;
644  }
645  
646  unsigned APInt::countPopulationSlowCase() const {
647    unsigned Count = 0;
648    for (unsigned i = 0; i < getNumWords(); ++i)
649      Count += llvm::countPopulation(U.pVal[i]);
650    return Count;
651  }
652  
653  bool APInt::intersectsSlowCase(const APInt &RHS) const {
654    for (unsigned i = 0, e = getNumWords(); i != e; ++i)
655      if ((U.pVal[i] & RHS.U.pVal[i]) != 0)
656        return true;
657  
658    return false;
659  }
660  
661  bool APInt::isSubsetOfSlowCase(const APInt &RHS) const {
662    for (unsigned i = 0, e = getNumWords(); i != e; ++i)
663      if ((U.pVal[i] & ~RHS.U.pVal[i]) != 0)
664        return false;
665  
666    return true;
667  }
668  
669  APInt APInt::byteSwap() const {
670    assert(BitWidth >= 16 && BitWidth % 8 == 0 && "Cannot byteswap!");
671    if (BitWidth == 16)
672      return APInt(BitWidth, ByteSwap_16(uint16_t(U.VAL)));
673    if (BitWidth == 32)
674      return APInt(BitWidth, ByteSwap_32(unsigned(U.VAL)));
675    if (BitWidth <= 64) {
676      uint64_t Tmp1 = ByteSwap_64(U.VAL);
677      Tmp1 >>= (64 - BitWidth);
678      return APInt(BitWidth, Tmp1);
679    }
680  
681    APInt Result(getNumWords() * APINT_BITS_PER_WORD, 0);
682    for (unsigned I = 0, N = getNumWords(); I != N; ++I)
683      Result.U.pVal[I] = ByteSwap_64(U.pVal[N - I - 1]);
684    if (Result.BitWidth != BitWidth) {
685      Result.lshrInPlace(Result.BitWidth - BitWidth);
686      Result.BitWidth = BitWidth;
687    }
688    return Result;
689  }
690  
691  APInt APInt::reverseBits() const {
692    switch (BitWidth) {
693    case 64:
694      return APInt(BitWidth, llvm::reverseBits<uint64_t>(U.VAL));
695    case 32:
696      return APInt(BitWidth, llvm::reverseBits<uint32_t>(U.VAL));
697    case 16:
698      return APInt(BitWidth, llvm::reverseBits<uint16_t>(U.VAL));
699    case 8:
700      return APInt(BitWidth, llvm::reverseBits<uint8_t>(U.VAL));
701    default:
702      break;
703    }
704  
705    APInt Val(*this);
706    APInt Reversed(BitWidth, 0);
707    unsigned S = BitWidth;
708  
709    for (; Val != 0; Val.lshrInPlace(1)) {
710      Reversed <<= 1;
711      Reversed |= Val[0];
712      --S;
713    }
714  
715    Reversed <<= S;
716    return Reversed;
717  }
718  
719  APInt llvm::APIntOps::GreatestCommonDivisor(APInt A, APInt B) {
720    // Fast-path a common case.
721    if (A == B) return A;
722  
723    // Corner cases: if either operand is zero, the other is the gcd.
724    if (!A) return B;
725    if (!B) return A;
726  
727    // Count common powers of 2 and remove all other powers of 2.
728    unsigned Pow2;
729    {
730      unsigned Pow2_A = A.countTrailingZeros();
731      unsigned Pow2_B = B.countTrailingZeros();
732      if (Pow2_A > Pow2_B) {
733        A.lshrInPlace(Pow2_A - Pow2_B);
734        Pow2 = Pow2_B;
735      } else if (Pow2_B > Pow2_A) {
736        B.lshrInPlace(Pow2_B - Pow2_A);
737        Pow2 = Pow2_A;
738      } else {
739        Pow2 = Pow2_A;
740      }
741    }
742  
743    // Both operands are odd multiples of 2^Pow_2:
744    //
745    //   gcd(a, b) = gcd(|a - b| / 2^i, min(a, b))
746    //
747    // This is a modified version of Stein's algorithm, taking advantage of
748    // efficient countTrailingZeros().
749    while (A != B) {
750      if (A.ugt(B)) {
751        A -= B;
752        A.lshrInPlace(A.countTrailingZeros() - Pow2);
753      } else {
754        B -= A;
755        B.lshrInPlace(B.countTrailingZeros() - Pow2);
756      }
757    }
758  
759    return A;
760  }
761  
762  APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) {
763    uint64_t I = bit_cast<uint64_t>(Double);
764  
765    // Get the sign bit from the highest order bit
766    bool isNeg = I >> 63;
767  
768    // Get the 11-bit exponent and adjust for the 1023 bit bias
769    int64_t exp = ((I >> 52) & 0x7ff) - 1023;
770  
771    // If the exponent is negative, the value is < 0 so just return 0.
772    if (exp < 0)
773      return APInt(width, 0u);
774  
775    // Extract the mantissa by clearing the top 12 bits (sign + exponent).
776    uint64_t mantissa = (I & (~0ULL >> 12)) | 1ULL << 52;
777  
778    // If the exponent doesn't shift all bits out of the mantissa
779    if (exp < 52)
780      return isNeg ? -APInt(width, mantissa >> (52 - exp)) :
781                      APInt(width, mantissa >> (52 - exp));
782  
783    // If the client didn't provide enough bits for us to shift the mantissa into
784    // then the result is undefined, just return 0
785    if (width <= exp - 52)
786      return APInt(width, 0);
787  
788    // Otherwise, we have to shift the mantissa bits up to the right location
789    APInt Tmp(width, mantissa);
790    Tmp <<= (unsigned)exp - 52;
791    return isNeg ? -Tmp : Tmp;
792  }
793  
794  /// This function converts this APInt to a double.
795  /// The layout for double is as following (IEEE Standard 754):
796  ///  --------------------------------------
797  /// |  Sign    Exponent    Fraction    Bias |
798  /// |-------------------------------------- |
799  /// |  1[63]   11[62-52]   52[51-00]   1023 |
800  ///  --------------------------------------
801  double APInt::roundToDouble(bool isSigned) const {
802  
803    // Handle the simple case where the value is contained in one uint64_t.
804    // It is wrong to optimize getWord(0) to VAL; there might be more than one word.
805    if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) {
806      if (isSigned) {
807        int64_t sext = SignExtend64(getWord(0), BitWidth);
808        return double(sext);
809      } else
810        return double(getWord(0));
811    }
812  
813    // Determine if the value is negative.
814    bool isNeg = isSigned ? (*this)[BitWidth-1] : false;
815  
816    // Construct the absolute value if we're negative.
817    APInt Tmp(isNeg ? -(*this) : (*this));
818  
819    // Figure out how many bits we're using.
820    unsigned n = Tmp.getActiveBits();
821  
822    // The exponent (without bias normalization) is just the number of bits
823    // we are using. Note that the sign bit is gone since we constructed the
824    // absolute value.
825    uint64_t exp = n;
826  
827    // Return infinity for exponent overflow
828    if (exp > 1023) {
829      if (!isSigned || !isNeg)
830        return std::numeric_limits<double>::infinity();
831      else
832        return -std::numeric_limits<double>::infinity();
833    }
834    exp += 1023; // Increment for 1023 bias
835  
836    // Number of bits in mantissa is 52. To obtain the mantissa value, we must
837    // extract the high 52 bits from the correct words in pVal.
838    uint64_t mantissa;
839    unsigned hiWord = whichWord(n-1);
840    if (hiWord == 0) {
841      mantissa = Tmp.U.pVal[0];
842      if (n > 52)
843        mantissa >>= n - 52; // shift down, we want the top 52 bits.
844    } else {
845      assert(hiWord > 0 && "huh?");
846      uint64_t hibits = Tmp.U.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD);
847      uint64_t lobits = Tmp.U.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD);
848      mantissa = hibits | lobits;
849    }
850  
851    // The leading bit of mantissa is implicit, so get rid of it.
852    uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0;
853    uint64_t I = sign | (exp << 52) | mantissa;
854    return bit_cast<double>(I);
855  }
856  
857  // Truncate to new width.
858  APInt APInt::trunc(unsigned width) const {
859    assert(width < BitWidth && "Invalid APInt Truncate request");
860    assert(width && "Can't truncate to 0 bits");
861  
862    if (width <= APINT_BITS_PER_WORD)
863      return APInt(width, getRawData()[0]);
864  
865    APInt Result(getMemory(getNumWords(width)), width);
866  
867    // Copy full words.
868    unsigned i;
869    for (i = 0; i != width / APINT_BITS_PER_WORD; i++)
870      Result.U.pVal[i] = U.pVal[i];
871  
872    // Truncate and copy any partial word.
873    unsigned bits = (0 - width) % APINT_BITS_PER_WORD;
874    if (bits != 0)
875      Result.U.pVal[i] = U.pVal[i] << bits >> bits;
876  
877    return Result;
878  }
879  
880  // Truncate to new width with unsigned saturation.
881  APInt APInt::truncUSat(unsigned width) const {
882    assert(width < BitWidth && "Invalid APInt Truncate request");
883    assert(width && "Can't truncate to 0 bits");
884  
885    // Can we just losslessly truncate it?
886    if (isIntN(width))
887      return trunc(width);
888    // If not, then just return the new limit.
889    return APInt::getMaxValue(width);
890  }
891  
892  // Truncate to new width with signed saturation.
893  APInt APInt::truncSSat(unsigned width) const {
894    assert(width < BitWidth && "Invalid APInt Truncate request");
895    assert(width && "Can't truncate to 0 bits");
896  
897    // Can we just losslessly truncate it?
898    if (isSignedIntN(width))
899      return trunc(width);
900    // If not, then just return the new limits.
901    return isNegative() ? APInt::getSignedMinValue(width)
902                        : APInt::getSignedMaxValue(width);
903  }
904  
905  // Sign extend to a new width.
906  APInt APInt::sext(unsigned Width) const {
907    assert(Width > BitWidth && "Invalid APInt SignExtend request");
908  
909    if (Width <= APINT_BITS_PER_WORD)
910      return APInt(Width, SignExtend64(U.VAL, BitWidth));
911  
912    APInt Result(getMemory(getNumWords(Width)), Width);
913  
914    // Copy words.
915    std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
916  
917    // Sign extend the last word since there may be unused bits in the input.
918    Result.U.pVal[getNumWords() - 1] =
919        SignExtend64(Result.U.pVal[getNumWords() - 1],
920                     ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
921  
922    // Fill with sign bits.
923    std::memset(Result.U.pVal + getNumWords(), isNegative() ? -1 : 0,
924                (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
925    Result.clearUnusedBits();
926    return Result;
927  }
928  
929  //  Zero extend to a new width.
930  APInt APInt::zext(unsigned width) const {
931    assert(width > BitWidth && "Invalid APInt ZeroExtend request");
932  
933    if (width <= APINT_BITS_PER_WORD)
934      return APInt(width, U.VAL);
935  
936    APInt Result(getMemory(getNumWords(width)), width);
937  
938    // Copy words.
939    std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
940  
941    // Zero remaining words.
942    std::memset(Result.U.pVal + getNumWords(), 0,
943                (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
944  
945    return Result;
946  }
947  
948  APInt APInt::zextOrTrunc(unsigned width) const {
949    if (BitWidth < width)
950      return zext(width);
951    if (BitWidth > width)
952      return trunc(width);
953    return *this;
954  }
955  
956  APInt APInt::sextOrTrunc(unsigned width) const {
957    if (BitWidth < width)
958      return sext(width);
959    if (BitWidth > width)
960      return trunc(width);
961    return *this;
962  }
963  
964  APInt APInt::truncOrSelf(unsigned width) const {
965    if (BitWidth > width)
966      return trunc(width);
967    return *this;
968  }
969  
970  APInt APInt::zextOrSelf(unsigned width) const {
971    if (BitWidth < width)
972      return zext(width);
973    return *this;
974  }
975  
976  APInt APInt::sextOrSelf(unsigned width) const {
977    if (BitWidth < width)
978      return sext(width);
979    return *this;
980  }
981  
982  /// Arithmetic right-shift this APInt by shiftAmt.
983  /// Arithmetic right-shift function.
984  void APInt::ashrInPlace(const APInt &shiftAmt) {
985    ashrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
986  }
987  
988  /// Arithmetic right-shift this APInt by shiftAmt.
989  /// Arithmetic right-shift function.
990  void APInt::ashrSlowCase(unsigned ShiftAmt) {
991    // Don't bother performing a no-op shift.
992    if (!ShiftAmt)
993      return;
994  
995    // Save the original sign bit for later.
996    bool Negative = isNegative();
997  
998    // WordShift is the inter-part shift; BitShift is intra-part shift.
999    unsigned WordShift = ShiftAmt / APINT_BITS_PER_WORD;
1000    unsigned BitShift = ShiftAmt % APINT_BITS_PER_WORD;
1001  
1002    unsigned WordsToMove = getNumWords() - WordShift;
1003    if (WordsToMove != 0) {
1004      // Sign extend the last word to fill in the unused bits.
1005      U.pVal[getNumWords() - 1] = SignExtend64(
1006          U.pVal[getNumWords() - 1], ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
1007  
1008      // Fastpath for moving by whole words.
1009      if (BitShift == 0) {
1010        std::memmove(U.pVal, U.pVal + WordShift, WordsToMove * APINT_WORD_SIZE);
1011      } else {
1012        // Move the words containing significant bits.
1013        for (unsigned i = 0; i != WordsToMove - 1; ++i)
1014          U.pVal[i] = (U.pVal[i + WordShift] >> BitShift) |
1015                      (U.pVal[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift));
1016  
1017        // Handle the last word which has no high bits to copy.
1018        U.pVal[WordsToMove - 1] = U.pVal[WordShift + WordsToMove - 1] >> BitShift;
1019        // Sign extend one more time.
1020        U.pVal[WordsToMove - 1] =
1021            SignExtend64(U.pVal[WordsToMove - 1], APINT_BITS_PER_WORD - BitShift);
1022      }
1023    }
1024  
1025    // Fill in the remainder based on the original sign.
1026    std::memset(U.pVal + WordsToMove, Negative ? -1 : 0,
1027                WordShift * APINT_WORD_SIZE);
1028    clearUnusedBits();
1029  }
1030  
1031  /// Logical right-shift this APInt by shiftAmt.
1032  /// Logical right-shift function.
1033  void APInt::lshrInPlace(const APInt &shiftAmt) {
1034    lshrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
1035  }
1036  
1037  /// Logical right-shift this APInt by shiftAmt.
1038  /// Logical right-shift function.
1039  void APInt::lshrSlowCase(unsigned ShiftAmt) {
1040    tcShiftRight(U.pVal, getNumWords(), ShiftAmt);
1041  }
1042  
1043  /// Left-shift this APInt by shiftAmt.
1044  /// Left-shift function.
1045  APInt &APInt::operator<<=(const APInt &shiftAmt) {
1046    // It's undefined behavior in C to shift by BitWidth or greater.
1047    *this <<= (unsigned)shiftAmt.getLimitedValue(BitWidth);
1048    return *this;
1049  }
1050  
1051  void APInt::shlSlowCase(unsigned ShiftAmt) {
1052    tcShiftLeft(U.pVal, getNumWords(), ShiftAmt);
1053    clearUnusedBits();
1054  }
1055  
1056  // Calculate the rotate amount modulo the bit width.
1057  static unsigned rotateModulo(unsigned BitWidth, const APInt &rotateAmt) {
1058    unsigned rotBitWidth = rotateAmt.getBitWidth();
1059    APInt rot = rotateAmt;
1060    if (rotBitWidth < BitWidth) {
1061      // Extend the rotate APInt, so that the urem doesn't divide by 0.
1062      // e.g. APInt(1, 32) would give APInt(1, 0).
1063      rot = rotateAmt.zext(BitWidth);
1064    }
1065    rot = rot.urem(APInt(rot.getBitWidth(), BitWidth));
1066    return rot.getLimitedValue(BitWidth);
1067  }
1068  
1069  APInt APInt::rotl(const APInt &rotateAmt) const {
1070    return rotl(rotateModulo(BitWidth, rotateAmt));
1071  }
1072  
1073  APInt APInt::rotl(unsigned rotateAmt) const {
1074    rotateAmt %= BitWidth;
1075    if (rotateAmt == 0)
1076      return *this;
1077    return shl(rotateAmt) | lshr(BitWidth - rotateAmt);
1078  }
1079  
1080  APInt APInt::rotr(const APInt &rotateAmt) const {
1081    return rotr(rotateModulo(BitWidth, rotateAmt));
1082  }
1083  
1084  APInt APInt::rotr(unsigned rotateAmt) const {
1085    rotateAmt %= BitWidth;
1086    if (rotateAmt == 0)
1087      return *this;
1088    return lshr(rotateAmt) | shl(BitWidth - rotateAmt);
1089  }
1090  
1091  // Square Root - this method computes and returns the square root of "this".
1092  // Three mechanisms are used for computation. For small values (<= 5 bits),
1093  // a table lookup is done. This gets some performance for common cases. For
1094  // values using less than 52 bits, the value is converted to double and then
1095  // the libc sqrt function is called. The result is rounded and then converted
1096  // back to a uint64_t which is then used to construct the result. Finally,
1097  // the Babylonian method for computing square roots is used.
1098  APInt APInt::sqrt() const {
1099  
1100    // Determine the magnitude of the value.
1101    unsigned magnitude = getActiveBits();
1102  
1103    // Use a fast table for some small values. This also gets rid of some
1104    // rounding errors in libc sqrt for small values.
1105    if (magnitude <= 5) {
1106      static const uint8_t results[32] = {
1107        /*     0 */ 0,
1108        /*  1- 2 */ 1, 1,
1109        /*  3- 6 */ 2, 2, 2, 2,
1110        /*  7-12 */ 3, 3, 3, 3, 3, 3,
1111        /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4,
1112        /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1113        /*    31 */ 6
1114      };
1115      return APInt(BitWidth, results[ (isSingleWord() ? U.VAL : U.pVal[0]) ]);
1116    }
1117  
1118    // If the magnitude of the value fits in less than 52 bits (the precision of
1119    // an IEEE double precision floating point value), then we can use the
1120    // libc sqrt function which will probably use a hardware sqrt computation.
1121    // This should be faster than the algorithm below.
1122    if (magnitude < 52) {
1123      return APInt(BitWidth,
1124                   uint64_t(::round(::sqrt(double(isSingleWord() ? U.VAL
1125                                                                 : U.pVal[0])))));
1126    }
1127  
1128    // Okay, all the short cuts are exhausted. We must compute it. The following
1129    // is a classical Babylonian method for computing the square root. This code
1130    // was adapted to APInt from a wikipedia article on such computations.
1131    // See http://www.wikipedia.org/ and go to the page named
1132    // Calculate_an_integer_square_root.
1133    unsigned nbits = BitWidth, i = 4;
1134    APInt testy(BitWidth, 16);
1135    APInt x_old(BitWidth, 1);
1136    APInt x_new(BitWidth, 0);
1137    APInt two(BitWidth, 2);
1138  
1139    // Select a good starting value using binary logarithms.
1140    for (;; i += 2, testy = testy.shl(2))
1141      if (i >= nbits || this->ule(testy)) {
1142        x_old = x_old.shl(i / 2);
1143        break;
1144      }
1145  
1146    // Use the Babylonian method to arrive at the integer square root:
1147    for (;;) {
1148      x_new = (this->udiv(x_old) + x_old).udiv(two);
1149      if (x_old.ule(x_new))
1150        break;
1151      x_old = x_new;
1152    }
1153  
1154    // Make sure we return the closest approximation
1155    // NOTE: The rounding calculation below is correct. It will produce an
1156    // off-by-one discrepancy with results from pari/gp. That discrepancy has been
1157    // determined to be a rounding issue with pari/gp as it begins to use a
1158    // floating point representation after 192 bits. There are no discrepancies
1159    // between this algorithm and pari/gp for bit widths < 192 bits.
1160    APInt square(x_old * x_old);
1161    APInt nextSquare((x_old + 1) * (x_old +1));
1162    if (this->ult(square))
1163      return x_old;
1164    assert(this->ule(nextSquare) && "Error in APInt::sqrt computation");
1165    APInt midpoint((nextSquare - square).udiv(two));
1166    APInt offset(*this - square);
1167    if (offset.ult(midpoint))
1168      return x_old;
1169    return x_old + 1;
1170  }
1171  
1172  /// Computes the multiplicative inverse of this APInt for a given modulo. The
1173  /// iterative extended Euclidean algorithm is used to solve for this value,
1174  /// however we simplify it to speed up calculating only the inverse, and take
1175  /// advantage of div+rem calculations. We also use some tricks to avoid copying
1176  /// (potentially large) APInts around.
1177  /// WARNING: a value of '0' may be returned,
1178  ///          signifying that no multiplicative inverse exists!
1179  APInt APInt::multiplicativeInverse(const APInt& modulo) const {
1180    assert(ult(modulo) && "This APInt must be smaller than the modulo");
1181  
1182    // Using the properties listed at the following web page (accessed 06/21/08):
1183    //   http://www.numbertheory.org/php/euclid.html
1184    // (especially the properties numbered 3, 4 and 9) it can be proved that
1185    // BitWidth bits suffice for all the computations in the algorithm implemented
1186    // below. More precisely, this number of bits suffice if the multiplicative
1187    // inverse exists, but may not suffice for the general extended Euclidean
1188    // algorithm.
1189  
1190    APInt r[2] = { modulo, *this };
1191    APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) };
1192    APInt q(BitWidth, 0);
1193  
1194    unsigned i;
1195    for (i = 0; r[i^1] != 0; i ^= 1) {
1196      // An overview of the math without the confusing bit-flipping:
1197      // q = r[i-2] / r[i-1]
1198      // r[i] = r[i-2] % r[i-1]
1199      // t[i] = t[i-2] - t[i-1] * q
1200      udivrem(r[i], r[i^1], q, r[i]);
1201      t[i] -= t[i^1] * q;
1202    }
1203  
1204    // If this APInt and the modulo are not coprime, there is no multiplicative
1205    // inverse, so return 0. We check this by looking at the next-to-last
1206    // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean
1207    // algorithm.
1208    if (r[i] != 1)
1209      return APInt(BitWidth, 0);
1210  
1211    // The next-to-last t is the multiplicative inverse.  However, we are
1212    // interested in a positive inverse. Calculate a positive one from a negative
1213    // one if necessary. A simple addition of the modulo suffices because
1214    // abs(t[i]) is known to be less than *this/2 (see the link above).
1215    if (t[i].isNegative())
1216      t[i] += modulo;
1217  
1218    return std::move(t[i]);
1219  }
1220  
1221  /// Calculate the magic numbers required to implement a signed integer division
1222  /// by a constant as a sequence of multiplies, adds and shifts.  Requires that
1223  /// the divisor not be 0, 1, or -1.  Taken from "Hacker's Delight", Henry S.
1224  /// Warren, Jr., chapter 10.
1225  APInt::ms APInt::magic() const {
1226    const APInt& d = *this;
1227    unsigned p;
1228    APInt ad, anc, delta, q1, r1, q2, r2, t;
1229    APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1230    struct ms mag;
1231  
1232    ad = d.abs();
1233    t = signedMin + (d.lshr(d.getBitWidth() - 1));
1234    anc = t - 1 - t.urem(ad);   // absolute value of nc
1235    p = d.getBitWidth() - 1;    // initialize p
1236    q1 = signedMin.udiv(anc);   // initialize q1 = 2p/abs(nc)
1237    r1 = signedMin - q1*anc;    // initialize r1 = rem(2p,abs(nc))
1238    q2 = signedMin.udiv(ad);    // initialize q2 = 2p/abs(d)
1239    r2 = signedMin - q2*ad;     // initialize r2 = rem(2p,abs(d))
1240    do {
1241      p = p + 1;
1242      q1 = q1<<1;          // update q1 = 2p/abs(nc)
1243      r1 = r1<<1;          // update r1 = rem(2p/abs(nc))
1244      if (r1.uge(anc)) {  // must be unsigned comparison
1245        q1 = q1 + 1;
1246        r1 = r1 - anc;
1247      }
1248      q2 = q2<<1;          // update q2 = 2p/abs(d)
1249      r2 = r2<<1;          // update r2 = rem(2p/abs(d))
1250      if (r2.uge(ad)) {   // must be unsigned comparison
1251        q2 = q2 + 1;
1252        r2 = r2 - ad;
1253      }
1254      delta = ad - r2;
1255    } while (q1.ult(delta) || (q1 == delta && r1 == 0));
1256  
1257    mag.m = q2 + 1;
1258    if (d.isNegative()) mag.m = -mag.m;   // resulting magic number
1259    mag.s = p - d.getBitWidth();          // resulting shift
1260    return mag;
1261  }
1262  
1263  /// Calculate the magic numbers required to implement an unsigned integer
1264  /// division by a constant as a sequence of multiplies, adds and shifts.
1265  /// Requires that the divisor not be 0.  Taken from "Hacker's Delight", Henry
1266  /// S. Warren, Jr., chapter 10.
1267  /// LeadingZeros can be used to simplify the calculation if the upper bits
1268  /// of the divided value are known zero.
1269  APInt::mu APInt::magicu(unsigned LeadingZeros) const {
1270    const APInt& d = *this;
1271    unsigned p;
1272    APInt nc, delta, q1, r1, q2, r2;
1273    struct mu magu;
1274    magu.a = 0;               // initialize "add" indicator
1275    APInt allOnes = APInt::getAllOnesValue(d.getBitWidth()).lshr(LeadingZeros);
1276    APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1277    APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth());
1278  
1279    nc = allOnes - (allOnes - d).urem(d);
1280    p = d.getBitWidth() - 1;  // initialize p
1281    q1 = signedMin.udiv(nc);  // initialize q1 = 2p/nc
1282    r1 = signedMin - q1*nc;   // initialize r1 = rem(2p,nc)
1283    q2 = signedMax.udiv(d);   // initialize q2 = (2p-1)/d
1284    r2 = signedMax - q2*d;    // initialize r2 = rem((2p-1),d)
1285    do {
1286      p = p + 1;
1287      if (r1.uge(nc - r1)) {
1288        q1 = q1 + q1 + 1;  // update q1
1289        r1 = r1 + r1 - nc; // update r1
1290      }
1291      else {
1292        q1 = q1+q1; // update q1
1293        r1 = r1+r1; // update r1
1294      }
1295      if ((r2 + 1).uge(d - r2)) {
1296        if (q2.uge(signedMax)) magu.a = 1;
1297        q2 = q2+q2 + 1;     // update q2
1298        r2 = r2+r2 + 1 - d; // update r2
1299      }
1300      else {
1301        if (q2.uge(signedMin)) magu.a = 1;
1302        q2 = q2+q2;     // update q2
1303        r2 = r2+r2 + 1; // update r2
1304      }
1305      delta = d - 1 - r2;
1306    } while (p < d.getBitWidth()*2 &&
1307             (q1.ult(delta) || (q1 == delta && r1 == 0)));
1308    magu.m = q2 + 1; // resulting magic number
1309    magu.s = p - d.getBitWidth();  // resulting shift
1310    return magu;
1311  }
1312  
1313  /// Implementation of Knuth's Algorithm D (Division of nonnegative integers)
1314  /// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The
1315  /// variables here have the same names as in the algorithm. Comments explain
1316  /// the algorithm and any deviation from it.
1317  static void KnuthDiv(uint32_t *u, uint32_t *v, uint32_t *q, uint32_t* r,
1318                       unsigned m, unsigned n) {
1319    assert(u && "Must provide dividend");
1320    assert(v && "Must provide divisor");
1321    assert(q && "Must provide quotient");
1322    assert(u != v && u != q && v != q && "Must use different memory");
1323    assert(n>1 && "n must be > 1");
1324  
1325    // b denotes the base of the number system. In our case b is 2^32.
1326    const uint64_t b = uint64_t(1) << 32;
1327  
1328  // The DEBUG macros here tend to be spam in the debug output if you're not
1329  // debugging this code. Disable them unless KNUTH_DEBUG is defined.
1330  #ifdef KNUTH_DEBUG
1331  #define DEBUG_KNUTH(X) LLVM_DEBUG(X)
1332  #else
1333  #define DEBUG_KNUTH(X) do {} while(false)
1334  #endif
1335  
1336    DEBUG_KNUTH(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n');
1337    DEBUG_KNUTH(dbgs() << "KnuthDiv: original:");
1338    DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1339    DEBUG_KNUTH(dbgs() << " by");
1340    DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);
1341    DEBUG_KNUTH(dbgs() << '\n');
1342    // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
1343    // u and v by d. Note that we have taken Knuth's advice here to use a power
1344    // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of
1345    // 2 allows us to shift instead of multiply and it is easy to determine the
1346    // shift amount from the leading zeros.  We are basically normalizing the u
1347    // and v so that its high bits are shifted to the top of v's range without
1348    // overflow. Note that this can require an extra word in u so that u must
1349    // be of length m+n+1.
1350    unsigned shift = countLeadingZeros(v[n-1]);
1351    uint32_t v_carry = 0;
1352    uint32_t u_carry = 0;
1353    if (shift) {
1354      for (unsigned i = 0; i < m+n; ++i) {
1355        uint32_t u_tmp = u[i] >> (32 - shift);
1356        u[i] = (u[i] << shift) | u_carry;
1357        u_carry = u_tmp;
1358      }
1359      for (unsigned i = 0; i < n; ++i) {
1360        uint32_t v_tmp = v[i] >> (32 - shift);
1361        v[i] = (v[i] << shift) | v_carry;
1362        v_carry = v_tmp;
1363      }
1364    }
1365    u[m+n] = u_carry;
1366  
1367    DEBUG_KNUTH(dbgs() << "KnuthDiv:   normal:");
1368    DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1369    DEBUG_KNUTH(dbgs() << " by");
1370    DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);
1371    DEBUG_KNUTH(dbgs() << '\n');
1372  
1373    // D2. [Initialize j.]  Set j to m. This is the loop counter over the places.
1374    int j = m;
1375    do {
1376      DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient digit #" << j << '\n');
1377      // D3. [Calculate q'.].
1378      //     Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q')
1379      //     Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r')
1380      // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease
1381      // qp by 1, increase rp by v[n-1], and repeat this test if rp < b. The test
1382      // on v[n-2] determines at high speed most of the cases in which the trial
1383      // value qp is one too large, and it eliminates all cases where qp is two
1384      // too large.
1385      uint64_t dividend = Make_64(u[j+n], u[j+n-1]);
1386      DEBUG_KNUTH(dbgs() << "KnuthDiv: dividend == " << dividend << '\n');
1387      uint64_t qp = dividend / v[n-1];
1388      uint64_t rp = dividend % v[n-1];
1389      if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) {
1390        qp--;
1391        rp += v[n-1];
1392        if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2]))
1393          qp--;
1394      }
1395      DEBUG_KNUTH(dbgs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n');
1396  
1397      // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with
1398      // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation
1399      // consists of a simple multiplication by a one-place number, combined with
1400      // a subtraction.
1401      // The digits (u[j+n]...u[j]) should be kept positive; if the result of
1402      // this step is actually negative, (u[j+n]...u[j]) should be left as the
1403      // true value plus b**(n+1), namely as the b's complement of
1404      // the true value, and a "borrow" to the left should be remembered.
1405      int64_t borrow = 0;
1406      for (unsigned i = 0; i < n; ++i) {
1407        uint64_t p = uint64_t(qp) * uint64_t(v[i]);
1408        int64_t subres = int64_t(u[j+i]) - borrow - Lo_32(p);
1409        u[j+i] = Lo_32(subres);
1410        borrow = Hi_32(p) - Hi_32(subres);
1411        DEBUG_KNUTH(dbgs() << "KnuthDiv: u[j+i] = " << u[j + i]
1412                          << ", borrow = " << borrow << '\n');
1413      }
1414      bool isNeg = u[j+n] < borrow;
1415      u[j+n] -= Lo_32(borrow);
1416  
1417      DEBUG_KNUTH(dbgs() << "KnuthDiv: after subtraction:");
1418      DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1419      DEBUG_KNUTH(dbgs() << '\n');
1420  
1421      // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was
1422      // negative, go to step D6; otherwise go on to step D7.
1423      q[j] = Lo_32(qp);
1424      if (isNeg) {
1425        // D6. [Add back]. The probability that this step is necessary is very
1426        // small, on the order of only 2/b. Make sure that test data accounts for
1427        // this possibility. Decrease q[j] by 1
1428        q[j]--;
1429        // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]).
1430        // A carry will occur to the left of u[j+n], and it should be ignored
1431        // since it cancels with the borrow that occurred in D4.
1432        bool carry = false;
1433        for (unsigned i = 0; i < n; i++) {
1434          uint32_t limit = std::min(u[j+i],v[i]);
1435          u[j+i] += v[i] + carry;
1436          carry = u[j+i] < limit || (carry && u[j+i] == limit);
1437        }
1438        u[j+n] += carry;
1439      }
1440      DEBUG_KNUTH(dbgs() << "KnuthDiv: after correction:");
1441      DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1442      DEBUG_KNUTH(dbgs() << "\nKnuthDiv: digit result = " << q[j] << '\n');
1443  
1444      // D7. [Loop on j.]  Decrease j by one. Now if j >= 0, go back to D3.
1445    } while (--j >= 0);
1446  
1447    DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient:");
1448    DEBUG_KNUTH(for (int i = m; i >= 0; i--) dbgs() << " " << q[i]);
1449    DEBUG_KNUTH(dbgs() << '\n');
1450  
1451    // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired
1452    // remainder may be obtained by dividing u[...] by d. If r is non-null we
1453    // compute the remainder (urem uses this).
1454    if (r) {
1455      // The value d is expressed by the "shift" value above since we avoided
1456      // multiplication by d by using a shift left. So, all we have to do is
1457      // shift right here.
1458      if (shift) {
1459        uint32_t carry = 0;
1460        DEBUG_KNUTH(dbgs() << "KnuthDiv: remainder:");
1461        for (int i = n-1; i >= 0; i--) {
1462          r[i] = (u[i] >> shift) | carry;
1463          carry = u[i] << (32 - shift);
1464          DEBUG_KNUTH(dbgs() << " " << r[i]);
1465        }
1466      } else {
1467        for (int i = n-1; i >= 0; i--) {
1468          r[i] = u[i];
1469          DEBUG_KNUTH(dbgs() << " " << r[i]);
1470        }
1471      }
1472      DEBUG_KNUTH(dbgs() << '\n');
1473    }
1474    DEBUG_KNUTH(dbgs() << '\n');
1475  }
1476  
1477  void APInt::divide(const WordType *LHS, unsigned lhsWords, const WordType *RHS,
1478                     unsigned rhsWords, WordType *Quotient, WordType *Remainder) {
1479    assert(lhsWords >= rhsWords && "Fractional result");
1480  
1481    // First, compose the values into an array of 32-bit words instead of
1482    // 64-bit words. This is a necessity of both the "short division" algorithm
1483    // and the Knuth "classical algorithm" which requires there to be native
1484    // operations for +, -, and * on an m bit value with an m*2 bit result. We
1485    // can't use 64-bit operands here because we don't have native results of
1486    // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't
1487    // work on large-endian machines.
1488    unsigned n = rhsWords * 2;
1489    unsigned m = (lhsWords * 2) - n;
1490  
1491    // Allocate space for the temporary values we need either on the stack, if
1492    // it will fit, or on the heap if it won't.
1493    uint32_t SPACE[128];
1494    uint32_t *U = nullptr;
1495    uint32_t *V = nullptr;
1496    uint32_t *Q = nullptr;
1497    uint32_t *R = nullptr;
1498    if ((Remainder?4:3)*n+2*m+1 <= 128) {
1499      U = &SPACE[0];
1500      V = &SPACE[m+n+1];
1501      Q = &SPACE[(m+n+1) + n];
1502      if (Remainder)
1503        R = &SPACE[(m+n+1) + n + (m+n)];
1504    } else {
1505      U = new uint32_t[m + n + 1];
1506      V = new uint32_t[n];
1507      Q = new uint32_t[m+n];
1508      if (Remainder)
1509        R = new uint32_t[n];
1510    }
1511  
1512    // Initialize the dividend
1513    memset(U, 0, (m+n+1)*sizeof(uint32_t));
1514    for (unsigned i = 0; i < lhsWords; ++i) {
1515      uint64_t tmp = LHS[i];
1516      U[i * 2] = Lo_32(tmp);
1517      U[i * 2 + 1] = Hi_32(tmp);
1518    }
1519    U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm.
1520  
1521    // Initialize the divisor
1522    memset(V, 0, (n)*sizeof(uint32_t));
1523    for (unsigned i = 0; i < rhsWords; ++i) {
1524      uint64_t tmp = RHS[i];
1525      V[i * 2] = Lo_32(tmp);
1526      V[i * 2 + 1] = Hi_32(tmp);
1527    }
1528  
1529    // initialize the quotient and remainder
1530    memset(Q, 0, (m+n) * sizeof(uint32_t));
1531    if (Remainder)
1532      memset(R, 0, n * sizeof(uint32_t));
1533  
1534    // Now, adjust m and n for the Knuth division. n is the number of words in
1535    // the divisor. m is the number of words by which the dividend exceeds the
1536    // divisor (i.e. m+n is the length of the dividend). These sizes must not
1537    // contain any zero words or the Knuth algorithm fails.
1538    for (unsigned i = n; i > 0 && V[i-1] == 0; i--) {
1539      n--;
1540      m++;
1541    }
1542    for (unsigned i = m+n; i > 0 && U[i-1] == 0; i--)
1543      m--;
1544  
1545    // If we're left with only a single word for the divisor, Knuth doesn't work
1546    // so we implement the short division algorithm here. This is much simpler
1547    // and faster because we are certain that we can divide a 64-bit quantity
1548    // by a 32-bit quantity at hardware speed and short division is simply a
1549    // series of such operations. This is just like doing short division but we
1550    // are using base 2^32 instead of base 10.
1551    assert(n != 0 && "Divide by zero?");
1552    if (n == 1) {
1553      uint32_t divisor = V[0];
1554      uint32_t remainder = 0;
1555      for (int i = m; i >= 0; i--) {
1556        uint64_t partial_dividend = Make_64(remainder, U[i]);
1557        if (partial_dividend == 0) {
1558          Q[i] = 0;
1559          remainder = 0;
1560        } else if (partial_dividend < divisor) {
1561          Q[i] = 0;
1562          remainder = Lo_32(partial_dividend);
1563        } else if (partial_dividend == divisor) {
1564          Q[i] = 1;
1565          remainder = 0;
1566        } else {
1567          Q[i] = Lo_32(partial_dividend / divisor);
1568          remainder = Lo_32(partial_dividend - (Q[i] * divisor));
1569        }
1570      }
1571      if (R)
1572        R[0] = remainder;
1573    } else {
1574      // Now we're ready to invoke the Knuth classical divide algorithm. In this
1575      // case n > 1.
1576      KnuthDiv(U, V, Q, R, m, n);
1577    }
1578  
1579    // If the caller wants the quotient
1580    if (Quotient) {
1581      for (unsigned i = 0; i < lhsWords; ++i)
1582        Quotient[i] = Make_64(Q[i*2+1], Q[i*2]);
1583    }
1584  
1585    // If the caller wants the remainder
1586    if (Remainder) {
1587      for (unsigned i = 0; i < rhsWords; ++i)
1588        Remainder[i] = Make_64(R[i*2+1], R[i*2]);
1589    }
1590  
1591    // Clean up the memory we allocated.
1592    if (U != &SPACE[0]) {
1593      delete [] U;
1594      delete [] V;
1595      delete [] Q;
1596      delete [] R;
1597    }
1598  }
1599  
1600  APInt APInt::udiv(const APInt &RHS) const {
1601    assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1602  
1603    // First, deal with the easy case
1604    if (isSingleWord()) {
1605      assert(RHS.U.VAL != 0 && "Divide by zero?");
1606      return APInt(BitWidth, U.VAL / RHS.U.VAL);
1607    }
1608  
1609    // Get some facts about the LHS and RHS number of bits and words
1610    unsigned lhsWords = getNumWords(getActiveBits());
1611    unsigned rhsBits  = RHS.getActiveBits();
1612    unsigned rhsWords = getNumWords(rhsBits);
1613    assert(rhsWords && "Divided by zero???");
1614  
1615    // Deal with some degenerate cases
1616    if (!lhsWords)
1617      // 0 / X ===> 0
1618      return APInt(BitWidth, 0);
1619    if (rhsBits == 1)
1620      // X / 1 ===> X
1621      return *this;
1622    if (lhsWords < rhsWords || this->ult(RHS))
1623      // X / Y ===> 0, iff X < Y
1624      return APInt(BitWidth, 0);
1625    if (*this == RHS)
1626      // X / X ===> 1
1627      return APInt(BitWidth, 1);
1628    if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1.
1629      // All high words are zero, just use native divide
1630      return APInt(BitWidth, this->U.pVal[0] / RHS.U.pVal[0]);
1631  
1632    // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1633    APInt Quotient(BitWidth, 0); // to hold result.
1634    divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal, nullptr);
1635    return Quotient;
1636  }
1637  
1638  APInt APInt::udiv(uint64_t RHS) const {
1639    assert(RHS != 0 && "Divide by zero?");
1640  
1641    // First, deal with the easy case
1642    if (isSingleWord())
1643      return APInt(BitWidth, U.VAL / RHS);
1644  
1645    // Get some facts about the LHS words.
1646    unsigned lhsWords = getNumWords(getActiveBits());
1647  
1648    // Deal with some degenerate cases
1649    if (!lhsWords)
1650      // 0 / X ===> 0
1651      return APInt(BitWidth, 0);
1652    if (RHS == 1)
1653      // X / 1 ===> X
1654      return *this;
1655    if (this->ult(RHS))
1656      // X / Y ===> 0, iff X < Y
1657      return APInt(BitWidth, 0);
1658    if (*this == RHS)
1659      // X / X ===> 1
1660      return APInt(BitWidth, 1);
1661    if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1.
1662      // All high words are zero, just use native divide
1663      return APInt(BitWidth, this->U.pVal[0] / RHS);
1664  
1665    // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1666    APInt Quotient(BitWidth, 0); // to hold result.
1667    divide(U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, nullptr);
1668    return Quotient;
1669  }
1670  
1671  APInt APInt::sdiv(const APInt &RHS) const {
1672    if (isNegative()) {
1673      if (RHS.isNegative())
1674        return (-(*this)).udiv(-RHS);
1675      return -((-(*this)).udiv(RHS));
1676    }
1677    if (RHS.isNegative())
1678      return -(this->udiv(-RHS));
1679    return this->udiv(RHS);
1680  }
1681  
1682  APInt APInt::sdiv(int64_t RHS) const {
1683    if (isNegative()) {
1684      if (RHS < 0)
1685        return (-(*this)).udiv(-RHS);
1686      return -((-(*this)).udiv(RHS));
1687    }
1688    if (RHS < 0)
1689      return -(this->udiv(-RHS));
1690    return this->udiv(RHS);
1691  }
1692  
1693  APInt APInt::urem(const APInt &RHS) const {
1694    assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1695    if (isSingleWord()) {
1696      assert(RHS.U.VAL != 0 && "Remainder by zero?");
1697      return APInt(BitWidth, U.VAL % RHS.U.VAL);
1698    }
1699  
1700    // Get some facts about the LHS
1701    unsigned lhsWords = getNumWords(getActiveBits());
1702  
1703    // Get some facts about the RHS
1704    unsigned rhsBits = RHS.getActiveBits();
1705    unsigned rhsWords = getNumWords(rhsBits);
1706    assert(rhsWords && "Performing remainder operation by zero ???");
1707  
1708    // Check the degenerate cases
1709    if (lhsWords == 0)
1710      // 0 % Y ===> 0
1711      return APInt(BitWidth, 0);
1712    if (rhsBits == 1)
1713      // X % 1 ===> 0
1714      return APInt(BitWidth, 0);
1715    if (lhsWords < rhsWords || this->ult(RHS))
1716      // X % Y ===> X, iff X < Y
1717      return *this;
1718    if (*this == RHS)
1719      // X % X == 0;
1720      return APInt(BitWidth, 0);
1721    if (lhsWords == 1)
1722      // All high words are zero, just use native remainder
1723      return APInt(BitWidth, U.pVal[0] % RHS.U.pVal[0]);
1724  
1725    // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1726    APInt Remainder(BitWidth, 0);
1727    divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, nullptr, Remainder.U.pVal);
1728    return Remainder;
1729  }
1730  
1731  uint64_t APInt::urem(uint64_t RHS) const {
1732    assert(RHS != 0 && "Remainder by zero?");
1733  
1734    if (isSingleWord())
1735      return U.VAL % RHS;
1736  
1737    // Get some facts about the LHS
1738    unsigned lhsWords = getNumWords(getActiveBits());
1739  
1740    // Check the degenerate cases
1741    if (lhsWords == 0)
1742      // 0 % Y ===> 0
1743      return 0;
1744    if (RHS == 1)
1745      // X % 1 ===> 0
1746      return 0;
1747    if (this->ult(RHS))
1748      // X % Y ===> X, iff X < Y
1749      return getZExtValue();
1750    if (*this == RHS)
1751      // X % X == 0;
1752      return 0;
1753    if (lhsWords == 1)
1754      // All high words are zero, just use native remainder
1755      return U.pVal[0] % RHS;
1756  
1757    // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1758    uint64_t Remainder;
1759    divide(U.pVal, lhsWords, &RHS, 1, nullptr, &Remainder);
1760    return Remainder;
1761  }
1762  
1763  APInt APInt::srem(const APInt &RHS) const {
1764    if (isNegative()) {
1765      if (RHS.isNegative())
1766        return -((-(*this)).urem(-RHS));
1767      return -((-(*this)).urem(RHS));
1768    }
1769    if (RHS.isNegative())
1770      return this->urem(-RHS);
1771    return this->urem(RHS);
1772  }
1773  
1774  int64_t APInt::srem(int64_t RHS) const {
1775    if (isNegative()) {
1776      if (RHS < 0)
1777        return -((-(*this)).urem(-RHS));
1778      return -((-(*this)).urem(RHS));
1779    }
1780    if (RHS < 0)
1781      return this->urem(-RHS);
1782    return this->urem(RHS);
1783  }
1784  
1785  void APInt::udivrem(const APInt &LHS, const APInt &RHS,
1786                      APInt &Quotient, APInt &Remainder) {
1787    assert(LHS.BitWidth == RHS.BitWidth && "Bit widths must be the same");
1788    unsigned BitWidth = LHS.BitWidth;
1789  
1790    // First, deal with the easy case
1791    if (LHS.isSingleWord()) {
1792      assert(RHS.U.VAL != 0 && "Divide by zero?");
1793      uint64_t QuotVal = LHS.U.VAL / RHS.U.VAL;
1794      uint64_t RemVal = LHS.U.VAL % RHS.U.VAL;
1795      Quotient = APInt(BitWidth, QuotVal);
1796      Remainder = APInt(BitWidth, RemVal);
1797      return;
1798    }
1799  
1800    // Get some size facts about the dividend and divisor
1801    unsigned lhsWords = getNumWords(LHS.getActiveBits());
1802    unsigned rhsBits  = RHS.getActiveBits();
1803    unsigned rhsWords = getNumWords(rhsBits);
1804    assert(rhsWords && "Performing divrem operation by zero ???");
1805  
1806    // Check the degenerate cases
1807    if (lhsWords == 0) {
1808      Quotient = APInt(BitWidth, 0);    // 0 / Y ===> 0
1809      Remainder = APInt(BitWidth, 0);   // 0 % Y ===> 0
1810      return;
1811    }
1812  
1813    if (rhsBits == 1) {
1814      Quotient = LHS;                   // X / 1 ===> X
1815      Remainder = APInt(BitWidth, 0);   // X % 1 ===> 0
1816    }
1817  
1818    if (lhsWords < rhsWords || LHS.ult(RHS)) {
1819      Remainder = LHS;                  // X % Y ===> X, iff X < Y
1820      Quotient = APInt(BitWidth, 0);    // X / Y ===> 0, iff X < Y
1821      return;
1822    }
1823  
1824    if (LHS == RHS) {
1825      Quotient  = APInt(BitWidth, 1);   // X / X ===> 1
1826      Remainder = APInt(BitWidth, 0);   // X % X ===> 0;
1827      return;
1828    }
1829  
1830    // Make sure there is enough space to hold the results.
1831    // NOTE: This assumes that reallocate won't affect any bits if it doesn't
1832    // change the size. This is necessary if Quotient or Remainder is aliased
1833    // with LHS or RHS.
1834    Quotient.reallocate(BitWidth);
1835    Remainder.reallocate(BitWidth);
1836  
1837    if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1.
1838      // There is only one word to consider so use the native versions.
1839      uint64_t lhsValue = LHS.U.pVal[0];
1840      uint64_t rhsValue = RHS.U.pVal[0];
1841      Quotient = lhsValue / rhsValue;
1842      Remainder = lhsValue % rhsValue;
1843      return;
1844    }
1845  
1846    // Okay, lets do it the long way
1847    divide(LHS.U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal,
1848           Remainder.U.pVal);
1849    // Clear the rest of the Quotient and Remainder.
1850    std::memset(Quotient.U.pVal + lhsWords, 0,
1851                (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE);
1852    std::memset(Remainder.U.pVal + rhsWords, 0,
1853                (getNumWords(BitWidth) - rhsWords) * APINT_WORD_SIZE);
1854  }
1855  
1856  void APInt::udivrem(const APInt &LHS, uint64_t RHS, APInt &Quotient,
1857                      uint64_t &Remainder) {
1858    assert(RHS != 0 && "Divide by zero?");
1859    unsigned BitWidth = LHS.BitWidth;
1860  
1861    // First, deal with the easy case
1862    if (LHS.isSingleWord()) {
1863      uint64_t QuotVal = LHS.U.VAL / RHS;
1864      Remainder = LHS.U.VAL % RHS;
1865      Quotient = APInt(BitWidth, QuotVal);
1866      return;
1867    }
1868  
1869    // Get some size facts about the dividend and divisor
1870    unsigned lhsWords = getNumWords(LHS.getActiveBits());
1871  
1872    // Check the degenerate cases
1873    if (lhsWords == 0) {
1874      Quotient = APInt(BitWidth, 0);    // 0 / Y ===> 0
1875      Remainder = 0;                    // 0 % Y ===> 0
1876      return;
1877    }
1878  
1879    if (RHS == 1) {
1880      Quotient = LHS;                   // X / 1 ===> X
1881      Remainder = 0;                    // X % 1 ===> 0
1882      return;
1883    }
1884  
1885    if (LHS.ult(RHS)) {
1886      Remainder = LHS.getZExtValue();   // X % Y ===> X, iff X < Y
1887      Quotient = APInt(BitWidth, 0);    // X / Y ===> 0, iff X < Y
1888      return;
1889    }
1890  
1891    if (LHS == RHS) {
1892      Quotient  = APInt(BitWidth, 1);   // X / X ===> 1
1893      Remainder = 0;                    // X % X ===> 0;
1894      return;
1895    }
1896  
1897    // Make sure there is enough space to hold the results.
1898    // NOTE: This assumes that reallocate won't affect any bits if it doesn't
1899    // change the size. This is necessary if Quotient is aliased with LHS.
1900    Quotient.reallocate(BitWidth);
1901  
1902    if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1.
1903      // There is only one word to consider so use the native versions.
1904      uint64_t lhsValue = LHS.U.pVal[0];
1905      Quotient = lhsValue / RHS;
1906      Remainder = lhsValue % RHS;
1907      return;
1908    }
1909  
1910    // Okay, lets do it the long way
1911    divide(LHS.U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, &Remainder);
1912    // Clear the rest of the Quotient.
1913    std::memset(Quotient.U.pVal + lhsWords, 0,
1914                (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE);
1915  }
1916  
1917  void APInt::sdivrem(const APInt &LHS, const APInt &RHS,
1918                      APInt &Quotient, APInt &Remainder) {
1919    if (LHS.isNegative()) {
1920      if (RHS.isNegative())
1921        APInt::udivrem(-LHS, -RHS, Quotient, Remainder);
1922      else {
1923        APInt::udivrem(-LHS, RHS, Quotient, Remainder);
1924        Quotient.negate();
1925      }
1926      Remainder.negate();
1927    } else if (RHS.isNegative()) {
1928      APInt::udivrem(LHS, -RHS, Quotient, Remainder);
1929      Quotient.negate();
1930    } else {
1931      APInt::udivrem(LHS, RHS, Quotient, Remainder);
1932    }
1933  }
1934  
1935  void APInt::sdivrem(const APInt &LHS, int64_t RHS,
1936                      APInt &Quotient, int64_t &Remainder) {
1937    uint64_t R = Remainder;
1938    if (LHS.isNegative()) {
1939      if (RHS < 0)
1940        APInt::udivrem(-LHS, -RHS, Quotient, R);
1941      else {
1942        APInt::udivrem(-LHS, RHS, Quotient, R);
1943        Quotient.negate();
1944      }
1945      R = -R;
1946    } else if (RHS < 0) {
1947      APInt::udivrem(LHS, -RHS, Quotient, R);
1948      Quotient.negate();
1949    } else {
1950      APInt::udivrem(LHS, RHS, Quotient, R);
1951    }
1952    Remainder = R;
1953  }
1954  
1955  APInt APInt::sadd_ov(const APInt &RHS, bool &Overflow) const {
1956    APInt Res = *this+RHS;
1957    Overflow = isNonNegative() == RHS.isNonNegative() &&
1958               Res.isNonNegative() != isNonNegative();
1959    return Res;
1960  }
1961  
1962  APInt APInt::uadd_ov(const APInt &RHS, bool &Overflow) const {
1963    APInt Res = *this+RHS;
1964    Overflow = Res.ult(RHS);
1965    return Res;
1966  }
1967  
1968  APInt APInt::ssub_ov(const APInt &RHS, bool &Overflow) const {
1969    APInt Res = *this - RHS;
1970    Overflow = isNonNegative() != RHS.isNonNegative() &&
1971               Res.isNonNegative() != isNonNegative();
1972    return Res;
1973  }
1974  
1975  APInt APInt::usub_ov(const APInt &RHS, bool &Overflow) const {
1976    APInt Res = *this-RHS;
1977    Overflow = Res.ugt(*this);
1978    return Res;
1979  }
1980  
1981  APInt APInt::sdiv_ov(const APInt &RHS, bool &Overflow) const {
1982    // MININT/-1  -->  overflow.
1983    Overflow = isMinSignedValue() && RHS.isAllOnesValue();
1984    return sdiv(RHS);
1985  }
1986  
1987  APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const {
1988    APInt Res = *this * RHS;
1989  
1990    if (*this != 0 && RHS != 0)
1991      Overflow = Res.sdiv(RHS) != *this || Res.sdiv(*this) != RHS;
1992    else
1993      Overflow = false;
1994    return Res;
1995  }
1996  
1997  APInt APInt::umul_ov(const APInt &RHS, bool &Overflow) const {
1998    if (countLeadingZeros() + RHS.countLeadingZeros() + 2 <= BitWidth) {
1999      Overflow = true;
2000      return *this * RHS;
2001    }
2002  
2003    APInt Res = lshr(1) * RHS;
2004    Overflow = Res.isNegative();
2005    Res <<= 1;
2006    if ((*this)[0]) {
2007      Res += RHS;
2008      if (Res.ult(RHS))
2009        Overflow = true;
2010    }
2011    return Res;
2012  }
2013  
2014  APInt APInt::sshl_ov(const APInt &ShAmt, bool &Overflow) const {
2015    Overflow = ShAmt.uge(getBitWidth());
2016    if (Overflow)
2017      return APInt(BitWidth, 0);
2018  
2019    if (isNonNegative()) // Don't allow sign change.
2020      Overflow = ShAmt.uge(countLeadingZeros());
2021    else
2022      Overflow = ShAmt.uge(countLeadingOnes());
2023  
2024    return *this << ShAmt;
2025  }
2026  
2027  APInt APInt::ushl_ov(const APInt &ShAmt, bool &Overflow) const {
2028    Overflow = ShAmt.uge(getBitWidth());
2029    if (Overflow)
2030      return APInt(BitWidth, 0);
2031  
2032    Overflow = ShAmt.ugt(countLeadingZeros());
2033  
2034    return *this << ShAmt;
2035  }
2036  
2037  APInt APInt::sadd_sat(const APInt &RHS) const {
2038    bool Overflow;
2039    APInt Res = sadd_ov(RHS, Overflow);
2040    if (!Overflow)
2041      return Res;
2042  
2043    return isNegative() ? APInt::getSignedMinValue(BitWidth)
2044                        : APInt::getSignedMaxValue(BitWidth);
2045  }
2046  
2047  APInt APInt::uadd_sat(const APInt &RHS) const {
2048    bool Overflow;
2049    APInt Res = uadd_ov(RHS, Overflow);
2050    if (!Overflow)
2051      return Res;
2052  
2053    return APInt::getMaxValue(BitWidth);
2054  }
2055  
2056  APInt APInt::ssub_sat(const APInt &RHS) const {
2057    bool Overflow;
2058    APInt Res = ssub_ov(RHS, Overflow);
2059    if (!Overflow)
2060      return Res;
2061  
2062    return isNegative() ? APInt::getSignedMinValue(BitWidth)
2063                        : APInt::getSignedMaxValue(BitWidth);
2064  }
2065  
2066  APInt APInt::usub_sat(const APInt &RHS) const {
2067    bool Overflow;
2068    APInt Res = usub_ov(RHS, Overflow);
2069    if (!Overflow)
2070      return Res;
2071  
2072    return APInt(BitWidth, 0);
2073  }
2074  
2075  APInt APInt::smul_sat(const APInt &RHS) const {
2076    bool Overflow;
2077    APInt Res = smul_ov(RHS, Overflow);
2078    if (!Overflow)
2079      return Res;
2080  
2081    // The result is negative if one and only one of inputs is negative.
2082    bool ResIsNegative = isNegative() ^ RHS.isNegative();
2083  
2084    return ResIsNegative ? APInt::getSignedMinValue(BitWidth)
2085                         : APInt::getSignedMaxValue(BitWidth);
2086  }
2087  
2088  APInt APInt::umul_sat(const APInt &RHS) const {
2089    bool Overflow;
2090    APInt Res = umul_ov(RHS, Overflow);
2091    if (!Overflow)
2092      return Res;
2093  
2094    return APInt::getMaxValue(BitWidth);
2095  }
2096  
2097  APInt APInt::sshl_sat(const APInt &RHS) const {
2098    bool Overflow;
2099    APInt Res = sshl_ov(RHS, Overflow);
2100    if (!Overflow)
2101      return Res;
2102  
2103    return isNegative() ? APInt::getSignedMinValue(BitWidth)
2104                        : APInt::getSignedMaxValue(BitWidth);
2105  }
2106  
2107  APInt APInt::ushl_sat(const APInt &RHS) const {
2108    bool Overflow;
2109    APInt Res = ushl_ov(RHS, Overflow);
2110    if (!Overflow)
2111      return Res;
2112  
2113    return APInt::getMaxValue(BitWidth);
2114  }
2115  
2116  void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
2117    // Check our assumptions here
2118    assert(!str.empty() && "Invalid string length");
2119    assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
2120            radix == 36) &&
2121           "Radix should be 2, 8, 10, 16, or 36!");
2122  
2123    StringRef::iterator p = str.begin();
2124    size_t slen = str.size();
2125    bool isNeg = *p == '-';
2126    if (*p == '-' || *p == '+') {
2127      p++;
2128      slen--;
2129      assert(slen && "String is only a sign, needs a value.");
2130    }
2131    assert((slen <= numbits || radix != 2) && "Insufficient bit width");
2132    assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width");
2133    assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width");
2134    assert((((slen-1)*64)/22 <= numbits || radix != 10) &&
2135           "Insufficient bit width");
2136  
2137    // Allocate memory if needed
2138    if (isSingleWord())
2139      U.VAL = 0;
2140    else
2141      U.pVal = getClearedMemory(getNumWords());
2142  
2143    // Figure out if we can shift instead of multiply
2144    unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0);
2145  
2146    // Enter digit traversal loop
2147    for (StringRef::iterator e = str.end(); p != e; ++p) {
2148      unsigned digit = getDigit(*p, radix);
2149      assert(digit < radix && "Invalid character in digit string");
2150  
2151      // Shift or multiply the value by the radix
2152      if (slen > 1) {
2153        if (shift)
2154          *this <<= shift;
2155        else
2156          *this *= radix;
2157      }
2158  
2159      // Add in the digit we just interpreted
2160      *this += digit;
2161    }
2162    // If its negative, put it in two's complement form
2163    if (isNeg)
2164      this->negate();
2165  }
2166  
2167  void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
2168                       bool Signed, bool formatAsCLiteral) const {
2169    assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2 ||
2170            Radix == 36) &&
2171           "Radix should be 2, 8, 10, 16, or 36!");
2172  
2173    const char *Prefix = "";
2174    if (formatAsCLiteral) {
2175      switch (Radix) {
2176        case 2:
2177          // Binary literals are a non-standard extension added in gcc 4.3:
2178          // http://gcc.gnu.org/onlinedocs/gcc-4.3.0/gcc/Binary-constants.html
2179          Prefix = "0b";
2180          break;
2181        case 8:
2182          Prefix = "0";
2183          break;
2184        case 10:
2185          break; // No prefix
2186        case 16:
2187          Prefix = "0x";
2188          break;
2189        default:
2190          llvm_unreachable("Invalid radix!");
2191      }
2192    }
2193  
2194    // First, check for a zero value and just short circuit the logic below.
2195    if (*this == 0) {
2196      while (*Prefix) {
2197        Str.push_back(*Prefix);
2198        ++Prefix;
2199      };
2200      Str.push_back('0');
2201      return;
2202    }
2203  
2204    static const char Digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
2205  
2206    if (isSingleWord()) {
2207      char Buffer[65];
2208      char *BufPtr = std::end(Buffer);
2209  
2210      uint64_t N;
2211      if (!Signed) {
2212        N = getZExtValue();
2213      } else {
2214        int64_t I = getSExtValue();
2215        if (I >= 0) {
2216          N = I;
2217        } else {
2218          Str.push_back('-');
2219          N = -(uint64_t)I;
2220        }
2221      }
2222  
2223      while (*Prefix) {
2224        Str.push_back(*Prefix);
2225        ++Prefix;
2226      };
2227  
2228      while (N) {
2229        *--BufPtr = Digits[N % Radix];
2230        N /= Radix;
2231      }
2232      Str.append(BufPtr, std::end(Buffer));
2233      return;
2234    }
2235  
2236    APInt Tmp(*this);
2237  
2238    if (Signed && isNegative()) {
2239      // They want to print the signed version and it is a negative value
2240      // Flip the bits and add one to turn it into the equivalent positive
2241      // value and put a '-' in the result.
2242      Tmp.negate();
2243      Str.push_back('-');
2244    }
2245  
2246    while (*Prefix) {
2247      Str.push_back(*Prefix);
2248      ++Prefix;
2249    };
2250  
2251    // We insert the digits backward, then reverse them to get the right order.
2252    unsigned StartDig = Str.size();
2253  
2254    // For the 2, 8 and 16 bit cases, we can just shift instead of divide
2255    // because the number of bits per digit (1, 3 and 4 respectively) divides
2256    // equally.  We just shift until the value is zero.
2257    if (Radix == 2 || Radix == 8 || Radix == 16) {
2258      // Just shift tmp right for each digit width until it becomes zero
2259      unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1));
2260      unsigned MaskAmt = Radix - 1;
2261  
2262      while (Tmp.getBoolValue()) {
2263        unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt;
2264        Str.push_back(Digits[Digit]);
2265        Tmp.lshrInPlace(ShiftAmt);
2266      }
2267    } else {
2268      while (Tmp.getBoolValue()) {
2269        uint64_t Digit;
2270        udivrem(Tmp, Radix, Tmp, Digit);
2271        assert(Digit < Radix && "divide failed");
2272        Str.push_back(Digits[Digit]);
2273      }
2274    }
2275  
2276    // Reverse the digits before returning.
2277    std::reverse(Str.begin()+StartDig, Str.end());
2278  }
2279  
2280  /// Returns the APInt as a std::string. Note that this is an inefficient method.
2281  /// It is better to pass in a SmallVector/SmallString to the methods above.
2282  std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const {
2283    SmallString<40> S;
2284    toString(S, Radix, Signed, /* formatAsCLiteral = */false);
2285    return std::string(S.str());
2286  }
2287  
2288  #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2289  LLVM_DUMP_METHOD void APInt::dump() const {
2290    SmallString<40> S, U;
2291    this->toStringUnsigned(U);
2292    this->toStringSigned(S);
2293    dbgs() << "APInt(" << BitWidth << "b, "
2294           << U << "u " << S << "s)\n";
2295  }
2296  #endif
2297  
2298  void APInt::print(raw_ostream &OS, bool isSigned) const {
2299    SmallString<40> S;
2300    this->toString(S, 10, isSigned, /* formatAsCLiteral = */false);
2301    OS << S;
2302  }
2303  
2304  // This implements a variety of operations on a representation of
2305  // arbitrary precision, two's-complement, bignum integer values.
2306  
2307  // Assumed by lowHalf, highHalf, partMSB and partLSB.  A fairly safe
2308  // and unrestricting assumption.
2309  static_assert(APInt::APINT_BITS_PER_WORD % 2 == 0,
2310                "Part width must be divisible by 2!");
2311  
2312  /* Some handy functions local to this file.  */
2313  
2314  /* Returns the integer part with the least significant BITS set.
2315     BITS cannot be zero.  */
2316  static inline APInt::WordType lowBitMask(unsigned bits) {
2317    assert(bits != 0 && bits <= APInt::APINT_BITS_PER_WORD);
2318  
2319    return ~(APInt::WordType) 0 >> (APInt::APINT_BITS_PER_WORD - bits);
2320  }
2321  
2322  /* Returns the value of the lower half of PART.  */
2323  static inline APInt::WordType lowHalf(APInt::WordType part) {
2324    return part & lowBitMask(APInt::APINT_BITS_PER_WORD / 2);
2325  }
2326  
2327  /* Returns the value of the upper half of PART.  */
2328  static inline APInt::WordType highHalf(APInt::WordType part) {
2329    return part >> (APInt::APINT_BITS_PER_WORD / 2);
2330  }
2331  
2332  /* Returns the bit number of the most significant set bit of a part.
2333     If the input number has no bits set -1U is returned.  */
2334  static unsigned partMSB(APInt::WordType value) {
2335    return findLastSet(value, ZB_Max);
2336  }
2337  
2338  /* Returns the bit number of the least significant set bit of a
2339     part.  If the input number has no bits set -1U is returned.  */
2340  static unsigned partLSB(APInt::WordType value) {
2341    return findFirstSet(value, ZB_Max);
2342  }
2343  
2344  /* Sets the least significant part of a bignum to the input value, and
2345     zeroes out higher parts.  */
2346  void APInt::tcSet(WordType *dst, WordType part, unsigned parts) {
2347    assert(parts > 0);
2348  
2349    dst[0] = part;
2350    for (unsigned i = 1; i < parts; i++)
2351      dst[i] = 0;
2352  }
2353  
2354  /* Assign one bignum to another.  */
2355  void APInt::tcAssign(WordType *dst, const WordType *src, unsigned parts) {
2356    for (unsigned i = 0; i < parts; i++)
2357      dst[i] = src[i];
2358  }
2359  
2360  /* Returns true if a bignum is zero, false otherwise.  */
2361  bool APInt::tcIsZero(const WordType *src, unsigned parts) {
2362    for (unsigned i = 0; i < parts; i++)
2363      if (src[i])
2364        return false;
2365  
2366    return true;
2367  }
2368  
2369  /* Extract the given bit of a bignum; returns 0 or 1.  */
2370  int APInt::tcExtractBit(const WordType *parts, unsigned bit) {
2371    return (parts[whichWord(bit)] & maskBit(bit)) != 0;
2372  }
2373  
2374  /* Set the given bit of a bignum. */
2375  void APInt::tcSetBit(WordType *parts, unsigned bit) {
2376    parts[whichWord(bit)] |= maskBit(bit);
2377  }
2378  
2379  /* Clears the given bit of a bignum. */
2380  void APInt::tcClearBit(WordType *parts, unsigned bit) {
2381    parts[whichWord(bit)] &= ~maskBit(bit);
2382  }
2383  
2384  /* Returns the bit number of the least significant set bit of a
2385     number.  If the input number has no bits set -1U is returned.  */
2386  unsigned APInt::tcLSB(const WordType *parts, unsigned n) {
2387    for (unsigned i = 0; i < n; i++) {
2388      if (parts[i] != 0) {
2389        unsigned lsb = partLSB(parts[i]);
2390  
2391        return lsb + i * APINT_BITS_PER_WORD;
2392      }
2393    }
2394  
2395    return -1U;
2396  }
2397  
2398  /* Returns the bit number of the most significant set bit of a number.
2399     If the input number has no bits set -1U is returned.  */
2400  unsigned APInt::tcMSB(const WordType *parts, unsigned n) {
2401    do {
2402      --n;
2403  
2404      if (parts[n] != 0) {
2405        unsigned msb = partMSB(parts[n]);
2406  
2407        return msb + n * APINT_BITS_PER_WORD;
2408      }
2409    } while (n);
2410  
2411    return -1U;
2412  }
2413  
2414  /* Copy the bit vector of width srcBITS from SRC, starting at bit
2415     srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB becomes
2416     the least significant bit of DST.  All high bits above srcBITS in
2417     DST are zero-filled.  */
2418  void
2419  APInt::tcExtract(WordType *dst, unsigned dstCount, const WordType *src,
2420                   unsigned srcBits, unsigned srcLSB) {
2421    unsigned dstParts = (srcBits + APINT_BITS_PER_WORD - 1) / APINT_BITS_PER_WORD;
2422    assert(dstParts <= dstCount);
2423  
2424    unsigned firstSrcPart = srcLSB / APINT_BITS_PER_WORD;
2425    tcAssign (dst, src + firstSrcPart, dstParts);
2426  
2427    unsigned shift = srcLSB % APINT_BITS_PER_WORD;
2428    tcShiftRight (dst, dstParts, shift);
2429  
2430    /* We now have (dstParts * APINT_BITS_PER_WORD - shift) bits from SRC
2431       in DST.  If this is less that srcBits, append the rest, else
2432       clear the high bits.  */
2433    unsigned n = dstParts * APINT_BITS_PER_WORD - shift;
2434    if (n < srcBits) {
2435      WordType mask = lowBitMask (srcBits - n);
2436      dst[dstParts - 1] |= ((src[firstSrcPart + dstParts] & mask)
2437                            << n % APINT_BITS_PER_WORD);
2438    } else if (n > srcBits) {
2439      if (srcBits % APINT_BITS_PER_WORD)
2440        dst[dstParts - 1] &= lowBitMask (srcBits % APINT_BITS_PER_WORD);
2441    }
2442  
2443    /* Clear high parts.  */
2444    while (dstParts < dstCount)
2445      dst[dstParts++] = 0;
2446  }
2447  
2448  /* DST += RHS + C where C is zero or one.  Returns the carry flag.  */
2449  APInt::WordType APInt::tcAdd(WordType *dst, const WordType *rhs,
2450                               WordType c, unsigned parts) {
2451    assert(c <= 1);
2452  
2453    for (unsigned i = 0; i < parts; i++) {
2454      WordType l = dst[i];
2455      if (c) {
2456        dst[i] += rhs[i] + 1;
2457        c = (dst[i] <= l);
2458      } else {
2459        dst[i] += rhs[i];
2460        c = (dst[i] < l);
2461      }
2462    }
2463  
2464    return c;
2465  }
2466  
2467  /// This function adds a single "word" integer, src, to the multiple
2468  /// "word" integer array, dst[]. dst[] is modified to reflect the addition and
2469  /// 1 is returned if there is a carry out, otherwise 0 is returned.
2470  /// @returns the carry of the addition.
2471  APInt::WordType APInt::tcAddPart(WordType *dst, WordType src,
2472                                   unsigned parts) {
2473    for (unsigned i = 0; i < parts; ++i) {
2474      dst[i] += src;
2475      if (dst[i] >= src)
2476        return 0; // No need to carry so exit early.
2477      src = 1; // Carry one to next digit.
2478    }
2479  
2480    return 1;
2481  }
2482  
2483  /* DST -= RHS + C where C is zero or one.  Returns the carry flag.  */
2484  APInt::WordType APInt::tcSubtract(WordType *dst, const WordType *rhs,
2485                                    WordType c, unsigned parts) {
2486    assert(c <= 1);
2487  
2488    for (unsigned i = 0; i < parts; i++) {
2489      WordType l = dst[i];
2490      if (c) {
2491        dst[i] -= rhs[i] + 1;
2492        c = (dst[i] >= l);
2493      } else {
2494        dst[i] -= rhs[i];
2495        c = (dst[i] > l);
2496      }
2497    }
2498  
2499    return c;
2500  }
2501  
2502  /// This function subtracts a single "word" (64-bit word), src, from
2503  /// the multi-word integer array, dst[], propagating the borrowed 1 value until
2504  /// no further borrowing is needed or it runs out of "words" in dst.  The result
2505  /// is 1 if "borrowing" exhausted the digits in dst, or 0 if dst was not
2506  /// exhausted. In other words, if src > dst then this function returns 1,
2507  /// otherwise 0.
2508  /// @returns the borrow out of the subtraction
2509  APInt::WordType APInt::tcSubtractPart(WordType *dst, WordType src,
2510                                        unsigned parts) {
2511    for (unsigned i = 0; i < parts; ++i) {
2512      WordType Dst = dst[i];
2513      dst[i] -= src;
2514      if (src <= Dst)
2515        return 0; // No need to borrow so exit early.
2516      src = 1; // We have to "borrow 1" from next "word"
2517    }
2518  
2519    return 1;
2520  }
2521  
2522  /* Negate a bignum in-place.  */
2523  void APInt::tcNegate(WordType *dst, unsigned parts) {
2524    tcComplement(dst, parts);
2525    tcIncrement(dst, parts);
2526  }
2527  
2528  /*  DST += SRC * MULTIPLIER + CARRY   if add is true
2529      DST  = SRC * MULTIPLIER + CARRY   if add is false
2530  
2531      Requires 0 <= DSTPARTS <= SRCPARTS + 1.  If DST overlaps SRC
2532      they must start at the same point, i.e. DST == SRC.
2533  
2534      If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is
2535      returned.  Otherwise DST is filled with the least significant
2536      DSTPARTS parts of the result, and if all of the omitted higher
2537      parts were zero return zero, otherwise overflow occurred and
2538      return one.  */
2539  int APInt::tcMultiplyPart(WordType *dst, const WordType *src,
2540                            WordType multiplier, WordType carry,
2541                            unsigned srcParts, unsigned dstParts,
2542                            bool add) {
2543    /* Otherwise our writes of DST kill our later reads of SRC.  */
2544    assert(dst <= src || dst >= src + srcParts);
2545    assert(dstParts <= srcParts + 1);
2546  
2547    /* N loops; minimum of dstParts and srcParts.  */
2548    unsigned n = std::min(dstParts, srcParts);
2549  
2550    for (unsigned i = 0; i < n; i++) {
2551      WordType low, mid, high, srcPart;
2552  
2553        /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY.
2554  
2555           This cannot overflow, because
2556  
2557           (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1)
2558  
2559           which is less than n^2.  */
2560  
2561      srcPart = src[i];
2562  
2563      if (multiplier == 0 || srcPart == 0) {
2564        low = carry;
2565        high = 0;
2566      } else {
2567        low = lowHalf(srcPart) * lowHalf(multiplier);
2568        high = highHalf(srcPart) * highHalf(multiplier);
2569  
2570        mid = lowHalf(srcPart) * highHalf(multiplier);
2571        high += highHalf(mid);
2572        mid <<= APINT_BITS_PER_WORD / 2;
2573        if (low + mid < low)
2574          high++;
2575        low += mid;
2576  
2577        mid = highHalf(srcPart) * lowHalf(multiplier);
2578        high += highHalf(mid);
2579        mid <<= APINT_BITS_PER_WORD / 2;
2580        if (low + mid < low)
2581          high++;
2582        low += mid;
2583  
2584        /* Now add carry.  */
2585        if (low + carry < low)
2586          high++;
2587        low += carry;
2588      }
2589  
2590      if (add) {
2591        /* And now DST[i], and store the new low part there.  */
2592        if (low + dst[i] < low)
2593          high++;
2594        dst[i] += low;
2595      } else
2596        dst[i] = low;
2597  
2598      carry = high;
2599    }
2600  
2601    if (srcParts < dstParts) {
2602      /* Full multiplication, there is no overflow.  */
2603      assert(srcParts + 1 == dstParts);
2604      dst[srcParts] = carry;
2605      return 0;
2606    }
2607  
2608    /* We overflowed if there is carry.  */
2609    if (carry)
2610      return 1;
2611  
2612    /* We would overflow if any significant unwritten parts would be
2613       non-zero.  This is true if any remaining src parts are non-zero
2614       and the multiplier is non-zero.  */
2615    if (multiplier)
2616      for (unsigned i = dstParts; i < srcParts; i++)
2617        if (src[i])
2618          return 1;
2619  
2620    /* We fitted in the narrow destination.  */
2621    return 0;
2622  }
2623  
2624  /* DST = LHS * RHS, where DST has the same width as the operands and
2625     is filled with the least significant parts of the result.  Returns
2626     one if overflow occurred, otherwise zero.  DST must be disjoint
2627     from both operands.  */
2628  int APInt::tcMultiply(WordType *dst, const WordType *lhs,
2629                        const WordType *rhs, unsigned parts) {
2630    assert(dst != lhs && dst != rhs);
2631  
2632    int overflow = 0;
2633    tcSet(dst, 0, parts);
2634  
2635    for (unsigned i = 0; i < parts; i++)
2636      overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts,
2637                                 parts - i, true);
2638  
2639    return overflow;
2640  }
2641  
2642  /// DST = LHS * RHS, where DST has width the sum of the widths of the
2643  /// operands. No overflow occurs. DST must be disjoint from both operands.
2644  void APInt::tcFullMultiply(WordType *dst, const WordType *lhs,
2645                             const WordType *rhs, unsigned lhsParts,
2646                             unsigned rhsParts) {
2647    /* Put the narrower number on the LHS for less loops below.  */
2648    if (lhsParts > rhsParts)
2649      return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts);
2650  
2651    assert(dst != lhs && dst != rhs);
2652  
2653    tcSet(dst, 0, rhsParts);
2654  
2655    for (unsigned i = 0; i < lhsParts; i++)
2656      tcMultiplyPart(&dst[i], rhs, lhs[i], 0, rhsParts, rhsParts + 1, true);
2657  }
2658  
2659  /* If RHS is zero LHS and REMAINDER are left unchanged, return one.
2660     Otherwise set LHS to LHS / RHS with the fractional part discarded,
2661     set REMAINDER to the remainder, return zero.  i.e.
2662  
2663     OLD_LHS = RHS * LHS + REMAINDER
2664  
2665     SCRATCH is a bignum of the same size as the operands and result for
2666     use by the routine; its contents need not be initialized and are
2667     destroyed.  LHS, REMAINDER and SCRATCH must be distinct.
2668  */
2669  int APInt::tcDivide(WordType *lhs, const WordType *rhs,
2670                      WordType *remainder, WordType *srhs,
2671                      unsigned parts) {
2672    assert(lhs != remainder && lhs != srhs && remainder != srhs);
2673  
2674    unsigned shiftCount = tcMSB(rhs, parts) + 1;
2675    if (shiftCount == 0)
2676      return true;
2677  
2678    shiftCount = parts * APINT_BITS_PER_WORD - shiftCount;
2679    unsigned n = shiftCount / APINT_BITS_PER_WORD;
2680    WordType mask = (WordType) 1 << (shiftCount % APINT_BITS_PER_WORD);
2681  
2682    tcAssign(srhs, rhs, parts);
2683    tcShiftLeft(srhs, parts, shiftCount);
2684    tcAssign(remainder, lhs, parts);
2685    tcSet(lhs, 0, parts);
2686  
2687    /* Loop, subtracting SRHS if REMAINDER is greater and adding that to
2688       the total.  */
2689    for (;;) {
2690      int compare = tcCompare(remainder, srhs, parts);
2691      if (compare >= 0) {
2692        tcSubtract(remainder, srhs, 0, parts);
2693        lhs[n] |= mask;
2694      }
2695  
2696      if (shiftCount == 0)
2697        break;
2698      shiftCount--;
2699      tcShiftRight(srhs, parts, 1);
2700      if ((mask >>= 1) == 0) {
2701        mask = (WordType) 1 << (APINT_BITS_PER_WORD - 1);
2702        n--;
2703      }
2704    }
2705  
2706    return false;
2707  }
2708  
2709  /// Shift a bignum left Cound bits in-place. Shifted in bits are zero. There are
2710  /// no restrictions on Count.
2711  void APInt::tcShiftLeft(WordType *Dst, unsigned Words, unsigned Count) {
2712    // Don't bother performing a no-op shift.
2713    if (!Count)
2714      return;
2715  
2716    // WordShift is the inter-part shift; BitShift is the intra-part shift.
2717    unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
2718    unsigned BitShift = Count % APINT_BITS_PER_WORD;
2719  
2720    // Fastpath for moving by whole words.
2721    if (BitShift == 0) {
2722      std::memmove(Dst + WordShift, Dst, (Words - WordShift) * APINT_WORD_SIZE);
2723    } else {
2724      while (Words-- > WordShift) {
2725        Dst[Words] = Dst[Words - WordShift] << BitShift;
2726        if (Words > WordShift)
2727          Dst[Words] |=
2728            Dst[Words - WordShift - 1] >> (APINT_BITS_PER_WORD - BitShift);
2729      }
2730    }
2731  
2732    // Fill in the remainder with 0s.
2733    std::memset(Dst, 0, WordShift * APINT_WORD_SIZE);
2734  }
2735  
2736  /// Shift a bignum right Count bits in-place. Shifted in bits are zero. There
2737  /// are no restrictions on Count.
2738  void APInt::tcShiftRight(WordType *Dst, unsigned Words, unsigned Count) {
2739    // Don't bother performing a no-op shift.
2740    if (!Count)
2741      return;
2742  
2743    // WordShift is the inter-part shift; BitShift is the intra-part shift.
2744    unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
2745    unsigned BitShift = Count % APINT_BITS_PER_WORD;
2746  
2747    unsigned WordsToMove = Words - WordShift;
2748    // Fastpath for moving by whole words.
2749    if (BitShift == 0) {
2750      std::memmove(Dst, Dst + WordShift, WordsToMove * APINT_WORD_SIZE);
2751    } else {
2752      for (unsigned i = 0; i != WordsToMove; ++i) {
2753        Dst[i] = Dst[i + WordShift] >> BitShift;
2754        if (i + 1 != WordsToMove)
2755          Dst[i] |= Dst[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift);
2756      }
2757    }
2758  
2759    // Fill in the remainder with 0s.
2760    std::memset(Dst + WordsToMove, 0, WordShift * APINT_WORD_SIZE);
2761  }
2762  
2763  /* Bitwise and of two bignums.  */
2764  void APInt::tcAnd(WordType *dst, const WordType *rhs, unsigned parts) {
2765    for (unsigned i = 0; i < parts; i++)
2766      dst[i] &= rhs[i];
2767  }
2768  
2769  /* Bitwise inclusive or of two bignums.  */
2770  void APInt::tcOr(WordType *dst, const WordType *rhs, unsigned parts) {
2771    for (unsigned i = 0; i < parts; i++)
2772      dst[i] |= rhs[i];
2773  }
2774  
2775  /* Bitwise exclusive or of two bignums.  */
2776  void APInt::tcXor(WordType *dst, const WordType *rhs, unsigned parts) {
2777    for (unsigned i = 0; i < parts; i++)
2778      dst[i] ^= rhs[i];
2779  }
2780  
2781  /* Complement a bignum in-place.  */
2782  void APInt::tcComplement(WordType *dst, unsigned parts) {
2783    for (unsigned i = 0; i < parts; i++)
2784      dst[i] = ~dst[i];
2785  }
2786  
2787  /* Comparison (unsigned) of two bignums.  */
2788  int APInt::tcCompare(const WordType *lhs, const WordType *rhs,
2789                       unsigned parts) {
2790    while (parts) {
2791      parts--;
2792      if (lhs[parts] != rhs[parts])
2793        return (lhs[parts] > rhs[parts]) ? 1 : -1;
2794    }
2795  
2796    return 0;
2797  }
2798  
2799  /* Set the least significant BITS bits of a bignum, clear the
2800     rest.  */
2801  void APInt::tcSetLeastSignificantBits(WordType *dst, unsigned parts,
2802                                        unsigned bits) {
2803    unsigned i = 0;
2804    while (bits > APINT_BITS_PER_WORD) {
2805      dst[i++] = ~(WordType) 0;
2806      bits -= APINT_BITS_PER_WORD;
2807    }
2808  
2809    if (bits)
2810      dst[i++] = ~(WordType) 0 >> (APINT_BITS_PER_WORD - bits);
2811  
2812    while (i < parts)
2813      dst[i++] = 0;
2814  }
2815  
2816  APInt llvm::APIntOps::RoundingUDiv(const APInt &A, const APInt &B,
2817                                     APInt::Rounding RM) {
2818    // Currently udivrem always rounds down.
2819    switch (RM) {
2820    case APInt::Rounding::DOWN:
2821    case APInt::Rounding::TOWARD_ZERO:
2822      return A.udiv(B);
2823    case APInt::Rounding::UP: {
2824      APInt Quo, Rem;
2825      APInt::udivrem(A, B, Quo, Rem);
2826      if (Rem == 0)
2827        return Quo;
2828      return Quo + 1;
2829    }
2830    }
2831    llvm_unreachable("Unknown APInt::Rounding enum");
2832  }
2833  
2834  APInt llvm::APIntOps::RoundingSDiv(const APInt &A, const APInt &B,
2835                                     APInt::Rounding RM) {
2836    switch (RM) {
2837    case APInt::Rounding::DOWN:
2838    case APInt::Rounding::UP: {
2839      APInt Quo, Rem;
2840      APInt::sdivrem(A, B, Quo, Rem);
2841      if (Rem == 0)
2842        return Quo;
2843      // This algorithm deals with arbitrary rounding mode used by sdivrem.
2844      // We want to check whether the non-integer part of the mathematical value
2845      // is negative or not. If the non-integer part is negative, we need to round
2846      // down from Quo; otherwise, if it's positive or 0, we return Quo, as it's
2847      // already rounded down.
2848      if (RM == APInt::Rounding::DOWN) {
2849        if (Rem.isNegative() != B.isNegative())
2850          return Quo - 1;
2851        return Quo;
2852      }
2853      if (Rem.isNegative() != B.isNegative())
2854        return Quo;
2855      return Quo + 1;
2856    }
2857    // Currently sdiv rounds towards zero.
2858    case APInt::Rounding::TOWARD_ZERO:
2859      return A.sdiv(B);
2860    }
2861    llvm_unreachable("Unknown APInt::Rounding enum");
2862  }
2863  
2864  Optional<APInt>
2865  llvm::APIntOps::SolveQuadraticEquationWrap(APInt A, APInt B, APInt C,
2866                                             unsigned RangeWidth) {
2867    unsigned CoeffWidth = A.getBitWidth();
2868    assert(CoeffWidth == B.getBitWidth() && CoeffWidth == C.getBitWidth());
2869    assert(RangeWidth <= CoeffWidth &&
2870           "Value range width should be less than coefficient width");
2871    assert(RangeWidth > 1 && "Value range bit width should be > 1");
2872  
2873    LLVM_DEBUG(dbgs() << __func__ << ": solving " << A << "x^2 + " << B
2874                      << "x + " << C << ", rw:" << RangeWidth << '\n');
2875  
2876    // Identify 0 as a (non)solution immediately.
2877    if (C.sextOrTrunc(RangeWidth).isNullValue() ) {
2878      LLVM_DEBUG(dbgs() << __func__ << ": zero solution\n");
2879      return APInt(CoeffWidth, 0);
2880    }
2881  
2882    // The result of APInt arithmetic has the same bit width as the operands,
2883    // so it can actually lose high bits. A product of two n-bit integers needs
2884    // 2n-1 bits to represent the full value.
2885    // The operation done below (on quadratic coefficients) that can produce
2886    // the largest value is the evaluation of the equation during bisection,
2887    // which needs 3 times the bitwidth of the coefficient, so the total number
2888    // of required bits is 3n.
2889    //
2890    // The purpose of this extension is to simulate the set Z of all integers,
2891    // where n+1 > n for all n in Z. In Z it makes sense to talk about positive
2892    // and negative numbers (not so much in a modulo arithmetic). The method
2893    // used to solve the equation is based on the standard formula for real
2894    // numbers, and uses the concepts of "positive" and "negative" with their
2895    // usual meanings.
2896    CoeffWidth *= 3;
2897    A = A.sext(CoeffWidth);
2898    B = B.sext(CoeffWidth);
2899    C = C.sext(CoeffWidth);
2900  
2901    // Make A > 0 for simplicity. Negate cannot overflow at this point because
2902    // the bit width has increased.
2903    if (A.isNegative()) {
2904      A.negate();
2905      B.negate();
2906      C.negate();
2907    }
2908  
2909    // Solving an equation q(x) = 0 with coefficients in modular arithmetic
2910    // is really solving a set of equations q(x) = kR for k = 0, 1, 2, ...,
2911    // and R = 2^BitWidth.
2912    // Since we're trying not only to find exact solutions, but also values
2913    // that "wrap around", such a set will always have a solution, i.e. an x
2914    // that satisfies at least one of the equations, or such that |q(x)|
2915    // exceeds kR, while |q(x-1)| for the same k does not.
2916    //
2917    // We need to find a value k, such that Ax^2 + Bx + C = kR will have a
2918    // positive solution n (in the above sense), and also such that the n
2919    // will be the least among all solutions corresponding to k = 0, 1, ...
2920    // (more precisely, the least element in the set
2921    //   { n(k) | k is such that a solution n(k) exists }).
2922    //
2923    // Consider the parabola (over real numbers) that corresponds to the
2924    // quadratic equation. Since A > 0, the arms of the parabola will point
2925    // up. Picking different values of k will shift it up and down by R.
2926    //
2927    // We want to shift the parabola in such a way as to reduce the problem
2928    // of solving q(x) = kR to solving shifted_q(x) = 0.
2929    // (The interesting solutions are the ceilings of the real number
2930    // solutions.)
2931    APInt R = APInt::getOneBitSet(CoeffWidth, RangeWidth);
2932    APInt TwoA = 2 * A;
2933    APInt SqrB = B * B;
2934    bool PickLow;
2935  
2936    auto RoundUp = [] (const APInt &V, const APInt &A) -> APInt {
2937      assert(A.isStrictlyPositive());
2938      APInt T = V.abs().urem(A);
2939      if (T.isNullValue())
2940        return V;
2941      return V.isNegative() ? V+T : V+(A-T);
2942    };
2943  
2944    // The vertex of the parabola is at -B/2A, but since A > 0, it's negative
2945    // iff B is positive.
2946    if (B.isNonNegative()) {
2947      // If B >= 0, the vertex it at a negative location (or at 0), so in
2948      // order to have a non-negative solution we need to pick k that makes
2949      // C-kR negative. To satisfy all the requirements for the solution
2950      // that we are looking for, it needs to be closest to 0 of all k.
2951      C = C.srem(R);
2952      if (C.isStrictlyPositive())
2953        C -= R;
2954      // Pick the greater solution.
2955      PickLow = false;
2956    } else {
2957      // If B < 0, the vertex is at a positive location. For any solution
2958      // to exist, the discriminant must be non-negative. This means that
2959      // C-kR <= B^2/4A is a necessary condition for k, i.e. there is a
2960      // lower bound on values of k: kR >= C - B^2/4A.
2961      APInt LowkR = C - SqrB.udiv(2*TwoA); // udiv because all values > 0.
2962      // Round LowkR up (towards +inf) to the nearest kR.
2963      LowkR = RoundUp(LowkR, R);
2964  
2965      // If there exists k meeting the condition above, and such that
2966      // C-kR > 0, there will be two positive real number solutions of
2967      // q(x) = kR. Out of all such values of k, pick the one that makes
2968      // C-kR closest to 0, (i.e. pick maximum k such that C-kR > 0).
2969      // In other words, find maximum k such that LowkR <= kR < C.
2970      if (C.sgt(LowkR)) {
2971        // If LowkR < C, then such a k is guaranteed to exist because
2972        // LowkR itself is a multiple of R.
2973        C -= -RoundUp(-C, R);      // C = C - RoundDown(C, R)
2974        // Pick the smaller solution.
2975        PickLow = true;
2976      } else {
2977        // If C-kR < 0 for all potential k's, it means that one solution
2978        // will be negative, while the other will be positive. The positive
2979        // solution will shift towards 0 if the parabola is moved up.
2980        // Pick the kR closest to the lower bound (i.e. make C-kR closest
2981        // to 0, or in other words, out of all parabolas that have solutions,
2982        // pick the one that is the farthest "up").
2983        // Since LowkR is itself a multiple of R, simply take C-LowkR.
2984        C -= LowkR;
2985        // Pick the greater solution.
2986        PickLow = false;
2987      }
2988    }
2989  
2990    LLVM_DEBUG(dbgs() << __func__ << ": updated coefficients " << A << "x^2 + "
2991                      << B << "x + " << C << ", rw:" << RangeWidth << '\n');
2992  
2993    APInt D = SqrB - 4*A*C;
2994    assert(D.isNonNegative() && "Negative discriminant");
2995    APInt SQ = D.sqrt();
2996  
2997    APInt Q = SQ * SQ;
2998    bool InexactSQ = Q != D;
2999    // The calculated SQ may actually be greater than the exact (non-integer)
3000    // value. If that's the case, decrement SQ to get a value that is lower.
3001    if (Q.sgt(D))
3002      SQ -= 1;
3003  
3004    APInt X;
3005    APInt Rem;
3006  
3007    // SQ is rounded down (i.e SQ * SQ <= D), so the roots may be inexact.
3008    // When using the quadratic formula directly, the calculated low root
3009    // may be greater than the exact one, since we would be subtracting SQ.
3010    // To make sure that the calculated root is not greater than the exact
3011    // one, subtract SQ+1 when calculating the low root (for inexact value
3012    // of SQ).
3013    if (PickLow)
3014      APInt::sdivrem(-B - (SQ+InexactSQ), TwoA, X, Rem);
3015    else
3016      APInt::sdivrem(-B + SQ, TwoA, X, Rem);
3017  
3018    // The updated coefficients should be such that the (exact) solution is
3019    // positive. Since APInt division rounds towards 0, the calculated one
3020    // can be 0, but cannot be negative.
3021    assert(X.isNonNegative() && "Solution should be non-negative");
3022  
3023    if (!InexactSQ && Rem.isNullValue()) {
3024      LLVM_DEBUG(dbgs() << __func__ << ": solution (root): " << X << '\n');
3025      return X;
3026    }
3027  
3028    assert((SQ*SQ).sle(D) && "SQ = |_sqrt(D)_|, so SQ*SQ <= D");
3029    // The exact value of the square root of D should be between SQ and SQ+1.
3030    // This implies that the solution should be between that corresponding to
3031    // SQ (i.e. X) and that corresponding to SQ+1.
3032    //
3033    // The calculated X cannot be greater than the exact (real) solution.
3034    // Actually it must be strictly less than the exact solution, while
3035    // X+1 will be greater than or equal to it.
3036  
3037    APInt VX = (A*X + B)*X + C;
3038    APInt VY = VX + TwoA*X + A + B;
3039    bool SignChange = VX.isNegative() != VY.isNegative() ||
3040                      VX.isNullValue() != VY.isNullValue();
3041    // If the sign did not change between X and X+1, X is not a valid solution.
3042    // This could happen when the actual (exact) roots don't have an integer
3043    // between them, so they would both be contained between X and X+1.
3044    if (!SignChange) {
3045      LLVM_DEBUG(dbgs() << __func__ << ": no valid solution\n");
3046      return None;
3047    }
3048  
3049    X += 1;
3050    LLVM_DEBUG(dbgs() << __func__ << ": solution (wrap): " << X << '\n');
3051    return X;
3052  }
3053  
3054  Optional<unsigned>
3055  llvm::APIntOps::GetMostSignificantDifferentBit(const APInt &A, const APInt &B) {
3056    assert(A.getBitWidth() == B.getBitWidth() && "Must have the same bitwidth");
3057    if (A == B)
3058      return llvm::None;
3059    return A.getBitWidth() - ((A ^ B).countLeadingZeros() + 1);
3060  }
3061  
3062  /// StoreIntToMemory - Fills the StoreBytes bytes of memory starting from Dst
3063  /// with the integer held in IntVal.
3064  void llvm::StoreIntToMemory(const APInt &IntVal, uint8_t *Dst,
3065                              unsigned StoreBytes) {
3066    assert((IntVal.getBitWidth()+7)/8 >= StoreBytes && "Integer too small!");
3067    const uint8_t *Src = (const uint8_t *)IntVal.getRawData();
3068  
3069    if (sys::IsLittleEndianHost) {
3070      // Little-endian host - the source is ordered from LSB to MSB.  Order the
3071      // destination from LSB to MSB: Do a straight copy.
3072      memcpy(Dst, Src, StoreBytes);
3073    } else {
3074      // Big-endian host - the source is an array of 64 bit words ordered from
3075      // LSW to MSW.  Each word is ordered from MSB to LSB.  Order the destination
3076      // from MSB to LSB: Reverse the word order, but not the bytes in a word.
3077      while (StoreBytes > sizeof(uint64_t)) {
3078        StoreBytes -= sizeof(uint64_t);
3079        // May not be aligned so use memcpy.
3080        memcpy(Dst + StoreBytes, Src, sizeof(uint64_t));
3081        Src += sizeof(uint64_t);
3082      }
3083  
3084      memcpy(Dst, Src + sizeof(uint64_t) - StoreBytes, StoreBytes);
3085    }
3086  }
3087  
3088  /// LoadIntFromMemory - Loads the integer stored in the LoadBytes bytes starting
3089  /// from Src into IntVal, which is assumed to be wide enough and to hold zero.
3090  void llvm::LoadIntFromMemory(APInt &IntVal, const uint8_t *Src,
3091                               unsigned LoadBytes) {
3092    assert((IntVal.getBitWidth()+7)/8 >= LoadBytes && "Integer too small!");
3093    uint8_t *Dst = reinterpret_cast<uint8_t *>(
3094                     const_cast<uint64_t *>(IntVal.getRawData()));
3095  
3096    if (sys::IsLittleEndianHost)
3097      // Little-endian host - the destination must be ordered from LSB to MSB.
3098      // The source is ordered from LSB to MSB: Do a straight copy.
3099      memcpy(Dst, Src, LoadBytes);
3100    else {
3101      // Big-endian - the destination is an array of 64 bit words ordered from
3102      // LSW to MSW.  Each word must be ordered from MSB to LSB.  The source is
3103      // ordered from MSB to LSB: Reverse the word order, but not the bytes in
3104      // a word.
3105      while (LoadBytes > sizeof(uint64_t)) {
3106        LoadBytes -= sizeof(uint64_t);
3107        // May not be aligned so use memcpy.
3108        memcpy(Dst, Src + LoadBytes, sizeof(uint64_t));
3109        Dst += sizeof(uint64_t);
3110      }
3111  
3112      memcpy(Dst + sizeof(uint64_t) - LoadBytes, Src, LoadBytes);
3113    }
3114  }
3115