xref: /freebsd/contrib/llvm-project/llvm/lib/Support/APInt.cpp (revision a3c35da61bb201168575f1d18f4ca3e96937d35c)
1  //===-- APInt.cpp - Implement APInt class ---------------------------------===//
2  //
3  // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  // See https://llvm.org/LICENSE.txt for license information.
5  // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  //
7  //===----------------------------------------------------------------------===//
8  //
9  // This file implements a class to represent arbitrary precision integer
10  // constant values and provide a variety of arithmetic operations on them.
11  //
12  //===----------------------------------------------------------------------===//
13  
14  #include "llvm/ADT/APInt.h"
15  #include "llvm/ADT/ArrayRef.h"
16  #include "llvm/ADT/FoldingSet.h"
17  #include "llvm/ADT/Hashing.h"
18  #include "llvm/ADT/Optional.h"
19  #include "llvm/ADT/SmallString.h"
20  #include "llvm/ADT/StringRef.h"
21  #include "llvm/ADT/bit.h"
22  #include "llvm/Config/llvm-config.h"
23  #include "llvm/Support/Debug.h"
24  #include "llvm/Support/ErrorHandling.h"
25  #include "llvm/Support/MathExtras.h"
26  #include "llvm/Support/raw_ostream.h"
27  #include <climits>
28  #include <cmath>
29  #include <cstdlib>
30  #include <cstring>
31  using namespace llvm;
32  
33  #define DEBUG_TYPE "apint"
34  
35  /// A utility function for allocating memory, checking for allocation failures,
36  /// and ensuring the contents are zeroed.
37  inline static uint64_t* getClearedMemory(unsigned numWords) {
38    uint64_t *result = new uint64_t[numWords];
39    memset(result, 0, numWords * sizeof(uint64_t));
40    return result;
41  }
42  
43  /// A utility function for allocating memory and checking for allocation
44  /// failure.  The content is not zeroed.
45  inline static uint64_t* getMemory(unsigned numWords) {
46    return new uint64_t[numWords];
47  }
48  
49  /// A utility function that converts a character to a digit.
50  inline static unsigned getDigit(char cdigit, uint8_t radix) {
51    unsigned r;
52  
53    if (radix == 16 || radix == 36) {
54      r = cdigit - '0';
55      if (r <= 9)
56        return r;
57  
58      r = cdigit - 'A';
59      if (r <= radix - 11U)
60        return r + 10;
61  
62      r = cdigit - 'a';
63      if (r <= radix - 11U)
64        return r + 10;
65  
66      radix = 10;
67    }
68  
69    r = cdigit - '0';
70    if (r < radix)
71      return r;
72  
73    return -1U;
74  }
75  
76  
77  void APInt::initSlowCase(uint64_t val, bool isSigned) {
78    U.pVal = getClearedMemory(getNumWords());
79    U.pVal[0] = val;
80    if (isSigned && int64_t(val) < 0)
81      for (unsigned i = 1; i < getNumWords(); ++i)
82        U.pVal[i] = WORDTYPE_MAX;
83    clearUnusedBits();
84  }
85  
86  void APInt::initSlowCase(const APInt& that) {
87    U.pVal = getMemory(getNumWords());
88    memcpy(U.pVal, that.U.pVal, getNumWords() * APINT_WORD_SIZE);
89  }
90  
91  void APInt::initFromArray(ArrayRef<uint64_t> bigVal) {
92    assert(BitWidth && "Bitwidth too small");
93    assert(bigVal.data() && "Null pointer detected!");
94    if (isSingleWord())
95      U.VAL = bigVal[0];
96    else {
97      // Get memory, cleared to 0
98      U.pVal = getClearedMemory(getNumWords());
99      // Calculate the number of words to copy
100      unsigned words = std::min<unsigned>(bigVal.size(), getNumWords());
101      // Copy the words from bigVal to pVal
102      memcpy(U.pVal, bigVal.data(), words * APINT_WORD_SIZE);
103    }
104    // Make sure unused high bits are cleared
105    clearUnusedBits();
106  }
107  
108  APInt::APInt(unsigned numBits, ArrayRef<uint64_t> bigVal)
109    : BitWidth(numBits) {
110    initFromArray(bigVal);
111  }
112  
113  APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[])
114    : BitWidth(numBits) {
115    initFromArray(makeArrayRef(bigVal, numWords));
116  }
117  
118  APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix)
119    : BitWidth(numbits) {
120    assert(BitWidth && "Bitwidth too small");
121    fromString(numbits, Str, radix);
122  }
123  
124  void APInt::reallocate(unsigned NewBitWidth) {
125    // If the number of words is the same we can just change the width and stop.
126    if (getNumWords() == getNumWords(NewBitWidth)) {
127      BitWidth = NewBitWidth;
128      return;
129    }
130  
131    // If we have an allocation, delete it.
132    if (!isSingleWord())
133      delete [] U.pVal;
134  
135    // Update BitWidth.
136    BitWidth = NewBitWidth;
137  
138    // If we are supposed to have an allocation, create it.
139    if (!isSingleWord())
140      U.pVal = getMemory(getNumWords());
141  }
142  
143  void APInt::AssignSlowCase(const APInt& RHS) {
144    // Don't do anything for X = X
145    if (this == &RHS)
146      return;
147  
148    // Adjust the bit width and handle allocations as necessary.
149    reallocate(RHS.getBitWidth());
150  
151    // Copy the data.
152    if (isSingleWord())
153      U.VAL = RHS.U.VAL;
154    else
155      memcpy(U.pVal, RHS.U.pVal, getNumWords() * APINT_WORD_SIZE);
156  }
157  
158  /// This method 'profiles' an APInt for use with FoldingSet.
159  void APInt::Profile(FoldingSetNodeID& ID) const {
160    ID.AddInteger(BitWidth);
161  
162    if (isSingleWord()) {
163      ID.AddInteger(U.VAL);
164      return;
165    }
166  
167    unsigned NumWords = getNumWords();
168    for (unsigned i = 0; i < NumWords; ++i)
169      ID.AddInteger(U.pVal[i]);
170  }
171  
172  /// Prefix increment operator. Increments the APInt by one.
173  APInt& APInt::operator++() {
174    if (isSingleWord())
175      ++U.VAL;
176    else
177      tcIncrement(U.pVal, getNumWords());
178    return clearUnusedBits();
179  }
180  
181  /// Prefix decrement operator. Decrements the APInt by one.
182  APInt& APInt::operator--() {
183    if (isSingleWord())
184      --U.VAL;
185    else
186      tcDecrement(U.pVal, getNumWords());
187    return clearUnusedBits();
188  }
189  
190  /// Adds the RHS APint to this APInt.
191  /// @returns this, after addition of RHS.
192  /// Addition assignment operator.
193  APInt& APInt::operator+=(const APInt& RHS) {
194    assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
195    if (isSingleWord())
196      U.VAL += RHS.U.VAL;
197    else
198      tcAdd(U.pVal, RHS.U.pVal, 0, getNumWords());
199    return clearUnusedBits();
200  }
201  
202  APInt& APInt::operator+=(uint64_t RHS) {
203    if (isSingleWord())
204      U.VAL += RHS;
205    else
206      tcAddPart(U.pVal, RHS, getNumWords());
207    return clearUnusedBits();
208  }
209  
210  /// Subtracts the RHS APInt from this APInt
211  /// @returns this, after subtraction
212  /// Subtraction assignment operator.
213  APInt& APInt::operator-=(const APInt& RHS) {
214    assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
215    if (isSingleWord())
216      U.VAL -= RHS.U.VAL;
217    else
218      tcSubtract(U.pVal, RHS.U.pVal, 0, getNumWords());
219    return clearUnusedBits();
220  }
221  
222  APInt& APInt::operator-=(uint64_t RHS) {
223    if (isSingleWord())
224      U.VAL -= RHS;
225    else
226      tcSubtractPart(U.pVal, RHS, getNumWords());
227    return clearUnusedBits();
228  }
229  
230  APInt APInt::operator*(const APInt& RHS) const {
231    assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
232    if (isSingleWord())
233      return APInt(BitWidth, U.VAL * RHS.U.VAL);
234  
235    APInt Result(getMemory(getNumWords()), getBitWidth());
236  
237    tcMultiply(Result.U.pVal, U.pVal, RHS.U.pVal, getNumWords());
238  
239    Result.clearUnusedBits();
240    return Result;
241  }
242  
243  void APInt::AndAssignSlowCase(const APInt& RHS) {
244    tcAnd(U.pVal, RHS.U.pVal, getNumWords());
245  }
246  
247  void APInt::OrAssignSlowCase(const APInt& RHS) {
248    tcOr(U.pVal, RHS.U.pVal, getNumWords());
249  }
250  
251  void APInt::XorAssignSlowCase(const APInt& RHS) {
252    tcXor(U.pVal, RHS.U.pVal, getNumWords());
253  }
254  
255  APInt& APInt::operator*=(const APInt& RHS) {
256    assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
257    *this = *this * RHS;
258    return *this;
259  }
260  
261  APInt& APInt::operator*=(uint64_t RHS) {
262    if (isSingleWord()) {
263      U.VAL *= RHS;
264    } else {
265      unsigned NumWords = getNumWords();
266      tcMultiplyPart(U.pVal, U.pVal, RHS, 0, NumWords, NumWords, false);
267    }
268    return clearUnusedBits();
269  }
270  
271  bool APInt::EqualSlowCase(const APInt& RHS) const {
272    return std::equal(U.pVal, U.pVal + getNumWords(), RHS.U.pVal);
273  }
274  
275  int APInt::compare(const APInt& RHS) const {
276    assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
277    if (isSingleWord())
278      return U.VAL < RHS.U.VAL ? -1 : U.VAL > RHS.U.VAL;
279  
280    return tcCompare(U.pVal, RHS.U.pVal, getNumWords());
281  }
282  
283  int APInt::compareSigned(const APInt& RHS) const {
284    assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
285    if (isSingleWord()) {
286      int64_t lhsSext = SignExtend64(U.VAL, BitWidth);
287      int64_t rhsSext = SignExtend64(RHS.U.VAL, BitWidth);
288      return lhsSext < rhsSext ? -1 : lhsSext > rhsSext;
289    }
290  
291    bool lhsNeg = isNegative();
292    bool rhsNeg = RHS.isNegative();
293  
294    // If the sign bits don't match, then (LHS < RHS) if LHS is negative
295    if (lhsNeg != rhsNeg)
296      return lhsNeg ? -1 : 1;
297  
298    // Otherwise we can just use an unsigned comparison, because even negative
299    // numbers compare correctly this way if both have the same signed-ness.
300    return tcCompare(U.pVal, RHS.U.pVal, getNumWords());
301  }
302  
303  void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) {
304    unsigned loWord = whichWord(loBit);
305    unsigned hiWord = whichWord(hiBit);
306  
307    // Create an initial mask for the low word with zeros below loBit.
308    uint64_t loMask = WORDTYPE_MAX << whichBit(loBit);
309  
310    // If hiBit is not aligned, we need a high mask.
311    unsigned hiShiftAmt = whichBit(hiBit);
312    if (hiShiftAmt != 0) {
313      // Create a high mask with zeros above hiBit.
314      uint64_t hiMask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - hiShiftAmt);
315      // If loWord and hiWord are equal, then we combine the masks. Otherwise,
316      // set the bits in hiWord.
317      if (hiWord == loWord)
318        loMask &= hiMask;
319      else
320        U.pVal[hiWord] |= hiMask;
321    }
322    // Apply the mask to the low word.
323    U.pVal[loWord] |= loMask;
324  
325    // Fill any words between loWord and hiWord with all ones.
326    for (unsigned word = loWord + 1; word < hiWord; ++word)
327      U.pVal[word] = WORDTYPE_MAX;
328  }
329  
330  /// Toggle every bit to its opposite value.
331  void APInt::flipAllBitsSlowCase() {
332    tcComplement(U.pVal, getNumWords());
333    clearUnusedBits();
334  }
335  
336  /// Toggle a given bit to its opposite value whose position is given
337  /// as "bitPosition".
338  /// Toggles a given bit to its opposite value.
339  void APInt::flipBit(unsigned bitPosition) {
340    assert(bitPosition < BitWidth && "Out of the bit-width range!");
341    if ((*this)[bitPosition]) clearBit(bitPosition);
342    else setBit(bitPosition);
343  }
344  
345  void APInt::insertBits(const APInt &subBits, unsigned bitPosition) {
346    unsigned subBitWidth = subBits.getBitWidth();
347    assert(0 < subBitWidth && (subBitWidth + bitPosition) <= BitWidth &&
348           "Illegal bit insertion");
349  
350    // Insertion is a direct copy.
351    if (subBitWidth == BitWidth) {
352      *this = subBits;
353      return;
354    }
355  
356    // Single word result can be done as a direct bitmask.
357    if (isSingleWord()) {
358      uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
359      U.VAL &= ~(mask << bitPosition);
360      U.VAL |= (subBits.U.VAL << bitPosition);
361      return;
362    }
363  
364    unsigned loBit = whichBit(bitPosition);
365    unsigned loWord = whichWord(bitPosition);
366    unsigned hi1Word = whichWord(bitPosition + subBitWidth - 1);
367  
368    // Insertion within a single word can be done as a direct bitmask.
369    if (loWord == hi1Word) {
370      uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
371      U.pVal[loWord] &= ~(mask << loBit);
372      U.pVal[loWord] |= (subBits.U.VAL << loBit);
373      return;
374    }
375  
376    // Insert on word boundaries.
377    if (loBit == 0) {
378      // Direct copy whole words.
379      unsigned numWholeSubWords = subBitWidth / APINT_BITS_PER_WORD;
380      memcpy(U.pVal + loWord, subBits.getRawData(),
381             numWholeSubWords * APINT_WORD_SIZE);
382  
383      // Mask+insert remaining bits.
384      unsigned remainingBits = subBitWidth % APINT_BITS_PER_WORD;
385      if (remainingBits != 0) {
386        uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - remainingBits);
387        U.pVal[hi1Word] &= ~mask;
388        U.pVal[hi1Word] |= subBits.getWord(subBitWidth - 1);
389      }
390      return;
391    }
392  
393    // General case - set/clear individual bits in dst based on src.
394    // TODO - there is scope for optimization here, but at the moment this code
395    // path is barely used so prefer readability over performance.
396    for (unsigned i = 0; i != subBitWidth; ++i) {
397      if (subBits[i])
398        setBit(bitPosition + i);
399      else
400        clearBit(bitPosition + i);
401    }
402  }
403  
404  APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const {
405    assert(numBits > 0 && "Can't extract zero bits");
406    assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth &&
407           "Illegal bit extraction");
408  
409    if (isSingleWord())
410      return APInt(numBits, U.VAL >> bitPosition);
411  
412    unsigned loBit = whichBit(bitPosition);
413    unsigned loWord = whichWord(bitPosition);
414    unsigned hiWord = whichWord(bitPosition + numBits - 1);
415  
416    // Single word result extracting bits from a single word source.
417    if (loWord == hiWord)
418      return APInt(numBits, U.pVal[loWord] >> loBit);
419  
420    // Extracting bits that start on a source word boundary can be done
421    // as a fast memory copy.
422    if (loBit == 0)
423      return APInt(numBits, makeArrayRef(U.pVal + loWord, 1 + hiWord - loWord));
424  
425    // General case - shift + copy source words directly into place.
426    APInt Result(numBits, 0);
427    unsigned NumSrcWords = getNumWords();
428    unsigned NumDstWords = Result.getNumWords();
429  
430    uint64_t *DestPtr = Result.isSingleWord() ? &Result.U.VAL : Result.U.pVal;
431    for (unsigned word = 0; word < NumDstWords; ++word) {
432      uint64_t w0 = U.pVal[loWord + word];
433      uint64_t w1 =
434          (loWord + word + 1) < NumSrcWords ? U.pVal[loWord + word + 1] : 0;
435      DestPtr[word] = (w0 >> loBit) | (w1 << (APINT_BITS_PER_WORD - loBit));
436    }
437  
438    return Result.clearUnusedBits();
439  }
440  
441  unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
442    assert(!str.empty() && "Invalid string length");
443    assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
444            radix == 36) &&
445           "Radix should be 2, 8, 10, 16, or 36!");
446  
447    size_t slen = str.size();
448  
449    // Each computation below needs to know if it's negative.
450    StringRef::iterator p = str.begin();
451    unsigned isNegative = *p == '-';
452    if (*p == '-' || *p == '+') {
453      p++;
454      slen--;
455      assert(slen && "String is only a sign, needs a value.");
456    }
457  
458    // For radixes of power-of-two values, the bits required is accurately and
459    // easily computed
460    if (radix == 2)
461      return slen + isNegative;
462    if (radix == 8)
463      return slen * 3 + isNegative;
464    if (radix == 16)
465      return slen * 4 + isNegative;
466  
467    // FIXME: base 36
468  
469    // This is grossly inefficient but accurate. We could probably do something
470    // with a computation of roughly slen*64/20 and then adjust by the value of
471    // the first few digits. But, I'm not sure how accurate that could be.
472  
473    // Compute a sufficient number of bits that is always large enough but might
474    // be too large. This avoids the assertion in the constructor. This
475    // calculation doesn't work appropriately for the numbers 0-9, so just use 4
476    // bits in that case.
477    unsigned sufficient
478      = radix == 10? (slen == 1 ? 4 : slen * 64/18)
479                   : (slen == 1 ? 7 : slen * 16/3);
480  
481    // Convert to the actual binary value.
482    APInt tmp(sufficient, StringRef(p, slen), radix);
483  
484    // Compute how many bits are required. If the log is infinite, assume we need
485    // just bit. If the log is exact and value is negative, then the value is
486    // MinSignedValue with (log + 1) bits.
487    unsigned log = tmp.logBase2();
488    if (log == (unsigned)-1) {
489      return isNegative + 1;
490    } else if (isNegative && tmp.isPowerOf2()) {
491      return isNegative + log;
492    } else {
493      return isNegative + log + 1;
494    }
495  }
496  
497  hash_code llvm::hash_value(const APInt &Arg) {
498    if (Arg.isSingleWord())
499      return hash_combine(Arg.U.VAL);
500  
501    return hash_combine_range(Arg.U.pVal, Arg.U.pVal + Arg.getNumWords());
502  }
503  
504  bool APInt::isSplat(unsigned SplatSizeInBits) const {
505    assert(getBitWidth() % SplatSizeInBits == 0 &&
506           "SplatSizeInBits must divide width!");
507    // We can check that all parts of an integer are equal by making use of a
508    // little trick: rotate and check if it's still the same value.
509    return *this == rotl(SplatSizeInBits);
510  }
511  
512  /// This function returns the high "numBits" bits of this APInt.
513  APInt APInt::getHiBits(unsigned numBits) const {
514    return this->lshr(BitWidth - numBits);
515  }
516  
517  /// This function returns the low "numBits" bits of this APInt.
518  APInt APInt::getLoBits(unsigned numBits) const {
519    APInt Result(getLowBitsSet(BitWidth, numBits));
520    Result &= *this;
521    return Result;
522  }
523  
524  /// Return a value containing V broadcasted over NewLen bits.
525  APInt APInt::getSplat(unsigned NewLen, const APInt &V) {
526    assert(NewLen >= V.getBitWidth() && "Can't splat to smaller bit width!");
527  
528    APInt Val = V.zextOrSelf(NewLen);
529    for (unsigned I = V.getBitWidth(); I < NewLen; I <<= 1)
530      Val |= Val << I;
531  
532    return Val;
533  }
534  
535  unsigned APInt::countLeadingZerosSlowCase() const {
536    unsigned Count = 0;
537    for (int i = getNumWords()-1; i >= 0; --i) {
538      uint64_t V = U.pVal[i];
539      if (V == 0)
540        Count += APINT_BITS_PER_WORD;
541      else {
542        Count += llvm::countLeadingZeros(V);
543        break;
544      }
545    }
546    // Adjust for unused bits in the most significant word (they are zero).
547    unsigned Mod = BitWidth % APINT_BITS_PER_WORD;
548    Count -= Mod > 0 ? APINT_BITS_PER_WORD - Mod : 0;
549    return Count;
550  }
551  
552  unsigned APInt::countLeadingOnesSlowCase() const {
553    unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD;
554    unsigned shift;
555    if (!highWordBits) {
556      highWordBits = APINT_BITS_PER_WORD;
557      shift = 0;
558    } else {
559      shift = APINT_BITS_PER_WORD - highWordBits;
560    }
561    int i = getNumWords() - 1;
562    unsigned Count = llvm::countLeadingOnes(U.pVal[i] << shift);
563    if (Count == highWordBits) {
564      for (i--; i >= 0; --i) {
565        if (U.pVal[i] == WORDTYPE_MAX)
566          Count += APINT_BITS_PER_WORD;
567        else {
568          Count += llvm::countLeadingOnes(U.pVal[i]);
569          break;
570        }
571      }
572    }
573    return Count;
574  }
575  
576  unsigned APInt::countTrailingZerosSlowCase() const {
577    unsigned Count = 0;
578    unsigned i = 0;
579    for (; i < getNumWords() && U.pVal[i] == 0; ++i)
580      Count += APINT_BITS_PER_WORD;
581    if (i < getNumWords())
582      Count += llvm::countTrailingZeros(U.pVal[i]);
583    return std::min(Count, BitWidth);
584  }
585  
586  unsigned APInt::countTrailingOnesSlowCase() const {
587    unsigned Count = 0;
588    unsigned i = 0;
589    for (; i < getNumWords() && U.pVal[i] == WORDTYPE_MAX; ++i)
590      Count += APINT_BITS_PER_WORD;
591    if (i < getNumWords())
592      Count += llvm::countTrailingOnes(U.pVal[i]);
593    assert(Count <= BitWidth);
594    return Count;
595  }
596  
597  unsigned APInt::countPopulationSlowCase() const {
598    unsigned Count = 0;
599    for (unsigned i = 0; i < getNumWords(); ++i)
600      Count += llvm::countPopulation(U.pVal[i]);
601    return Count;
602  }
603  
604  bool APInt::intersectsSlowCase(const APInt &RHS) const {
605    for (unsigned i = 0, e = getNumWords(); i != e; ++i)
606      if ((U.pVal[i] & RHS.U.pVal[i]) != 0)
607        return true;
608  
609    return false;
610  }
611  
612  bool APInt::isSubsetOfSlowCase(const APInt &RHS) const {
613    for (unsigned i = 0, e = getNumWords(); i != e; ++i)
614      if ((U.pVal[i] & ~RHS.U.pVal[i]) != 0)
615        return false;
616  
617    return true;
618  }
619  
620  APInt APInt::byteSwap() const {
621    assert(BitWidth >= 16 && BitWidth % 16 == 0 && "Cannot byteswap!");
622    if (BitWidth == 16)
623      return APInt(BitWidth, ByteSwap_16(uint16_t(U.VAL)));
624    if (BitWidth == 32)
625      return APInt(BitWidth, ByteSwap_32(unsigned(U.VAL)));
626    if (BitWidth == 48) {
627      unsigned Tmp1 = unsigned(U.VAL >> 16);
628      Tmp1 = ByteSwap_32(Tmp1);
629      uint16_t Tmp2 = uint16_t(U.VAL);
630      Tmp2 = ByteSwap_16(Tmp2);
631      return APInt(BitWidth, (uint64_t(Tmp2) << 32) | Tmp1);
632    }
633    if (BitWidth == 64)
634      return APInt(BitWidth, ByteSwap_64(U.VAL));
635  
636    APInt Result(getNumWords() * APINT_BITS_PER_WORD, 0);
637    for (unsigned I = 0, N = getNumWords(); I != N; ++I)
638      Result.U.pVal[I] = ByteSwap_64(U.pVal[N - I - 1]);
639    if (Result.BitWidth != BitWidth) {
640      Result.lshrInPlace(Result.BitWidth - BitWidth);
641      Result.BitWidth = BitWidth;
642    }
643    return Result;
644  }
645  
646  APInt APInt::reverseBits() const {
647    switch (BitWidth) {
648    case 64:
649      return APInt(BitWidth, llvm::reverseBits<uint64_t>(U.VAL));
650    case 32:
651      return APInt(BitWidth, llvm::reverseBits<uint32_t>(U.VAL));
652    case 16:
653      return APInt(BitWidth, llvm::reverseBits<uint16_t>(U.VAL));
654    case 8:
655      return APInt(BitWidth, llvm::reverseBits<uint8_t>(U.VAL));
656    default:
657      break;
658    }
659  
660    APInt Val(*this);
661    APInt Reversed(BitWidth, 0);
662    unsigned S = BitWidth;
663  
664    for (; Val != 0; Val.lshrInPlace(1)) {
665      Reversed <<= 1;
666      Reversed |= Val[0];
667      --S;
668    }
669  
670    Reversed <<= S;
671    return Reversed;
672  }
673  
674  APInt llvm::APIntOps::GreatestCommonDivisor(APInt A, APInt B) {
675    // Fast-path a common case.
676    if (A == B) return A;
677  
678    // Corner cases: if either operand is zero, the other is the gcd.
679    if (!A) return B;
680    if (!B) return A;
681  
682    // Count common powers of 2 and remove all other powers of 2.
683    unsigned Pow2;
684    {
685      unsigned Pow2_A = A.countTrailingZeros();
686      unsigned Pow2_B = B.countTrailingZeros();
687      if (Pow2_A > Pow2_B) {
688        A.lshrInPlace(Pow2_A - Pow2_B);
689        Pow2 = Pow2_B;
690      } else if (Pow2_B > Pow2_A) {
691        B.lshrInPlace(Pow2_B - Pow2_A);
692        Pow2 = Pow2_A;
693      } else {
694        Pow2 = Pow2_A;
695      }
696    }
697  
698    // Both operands are odd multiples of 2^Pow_2:
699    //
700    //   gcd(a, b) = gcd(|a - b| / 2^i, min(a, b))
701    //
702    // This is a modified version of Stein's algorithm, taking advantage of
703    // efficient countTrailingZeros().
704    while (A != B) {
705      if (A.ugt(B)) {
706        A -= B;
707        A.lshrInPlace(A.countTrailingZeros() - Pow2);
708      } else {
709        B -= A;
710        B.lshrInPlace(B.countTrailingZeros() - Pow2);
711      }
712    }
713  
714    return A;
715  }
716  
717  APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) {
718    uint64_t I = bit_cast<uint64_t>(Double);
719  
720    // Get the sign bit from the highest order bit
721    bool isNeg = I >> 63;
722  
723    // Get the 11-bit exponent and adjust for the 1023 bit bias
724    int64_t exp = ((I >> 52) & 0x7ff) - 1023;
725  
726    // If the exponent is negative, the value is < 0 so just return 0.
727    if (exp < 0)
728      return APInt(width, 0u);
729  
730    // Extract the mantissa by clearing the top 12 bits (sign + exponent).
731    uint64_t mantissa = (I & (~0ULL >> 12)) | 1ULL << 52;
732  
733    // If the exponent doesn't shift all bits out of the mantissa
734    if (exp < 52)
735      return isNeg ? -APInt(width, mantissa >> (52 - exp)) :
736                      APInt(width, mantissa >> (52 - exp));
737  
738    // If the client didn't provide enough bits for us to shift the mantissa into
739    // then the result is undefined, just return 0
740    if (width <= exp - 52)
741      return APInt(width, 0);
742  
743    // Otherwise, we have to shift the mantissa bits up to the right location
744    APInt Tmp(width, mantissa);
745    Tmp <<= (unsigned)exp - 52;
746    return isNeg ? -Tmp : Tmp;
747  }
748  
749  /// This function converts this APInt to a double.
750  /// The layout for double is as following (IEEE Standard 754):
751  ///  --------------------------------------
752  /// |  Sign    Exponent    Fraction    Bias |
753  /// |-------------------------------------- |
754  /// |  1[63]   11[62-52]   52[51-00]   1023 |
755  ///  --------------------------------------
756  double APInt::roundToDouble(bool isSigned) const {
757  
758    // Handle the simple case where the value is contained in one uint64_t.
759    // It is wrong to optimize getWord(0) to VAL; there might be more than one word.
760    if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) {
761      if (isSigned) {
762        int64_t sext = SignExtend64(getWord(0), BitWidth);
763        return double(sext);
764      } else
765        return double(getWord(0));
766    }
767  
768    // Determine if the value is negative.
769    bool isNeg = isSigned ? (*this)[BitWidth-1] : false;
770  
771    // Construct the absolute value if we're negative.
772    APInt Tmp(isNeg ? -(*this) : (*this));
773  
774    // Figure out how many bits we're using.
775    unsigned n = Tmp.getActiveBits();
776  
777    // The exponent (without bias normalization) is just the number of bits
778    // we are using. Note that the sign bit is gone since we constructed the
779    // absolute value.
780    uint64_t exp = n;
781  
782    // Return infinity for exponent overflow
783    if (exp > 1023) {
784      if (!isSigned || !isNeg)
785        return std::numeric_limits<double>::infinity();
786      else
787        return -std::numeric_limits<double>::infinity();
788    }
789    exp += 1023; // Increment for 1023 bias
790  
791    // Number of bits in mantissa is 52. To obtain the mantissa value, we must
792    // extract the high 52 bits from the correct words in pVal.
793    uint64_t mantissa;
794    unsigned hiWord = whichWord(n-1);
795    if (hiWord == 0) {
796      mantissa = Tmp.U.pVal[0];
797      if (n > 52)
798        mantissa >>= n - 52; // shift down, we want the top 52 bits.
799    } else {
800      assert(hiWord > 0 && "huh?");
801      uint64_t hibits = Tmp.U.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD);
802      uint64_t lobits = Tmp.U.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD);
803      mantissa = hibits | lobits;
804    }
805  
806    // The leading bit of mantissa is implicit, so get rid of it.
807    uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0;
808    uint64_t I = sign | (exp << 52) | mantissa;
809    return bit_cast<double>(I);
810  }
811  
812  // Truncate to new width.
813  APInt APInt::trunc(unsigned width) const {
814    assert(width < BitWidth && "Invalid APInt Truncate request");
815    assert(width && "Can't truncate to 0 bits");
816  
817    if (width <= APINT_BITS_PER_WORD)
818      return APInt(width, getRawData()[0]);
819  
820    APInt Result(getMemory(getNumWords(width)), width);
821  
822    // Copy full words.
823    unsigned i;
824    for (i = 0; i != width / APINT_BITS_PER_WORD; i++)
825      Result.U.pVal[i] = U.pVal[i];
826  
827    // Truncate and copy any partial word.
828    unsigned bits = (0 - width) % APINT_BITS_PER_WORD;
829    if (bits != 0)
830      Result.U.pVal[i] = U.pVal[i] << bits >> bits;
831  
832    return Result;
833  }
834  
835  // Sign extend to a new width.
836  APInt APInt::sext(unsigned Width) const {
837    assert(Width > BitWidth && "Invalid APInt SignExtend request");
838  
839    if (Width <= APINT_BITS_PER_WORD)
840      return APInt(Width, SignExtend64(U.VAL, BitWidth));
841  
842    APInt Result(getMemory(getNumWords(Width)), Width);
843  
844    // Copy words.
845    std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
846  
847    // Sign extend the last word since there may be unused bits in the input.
848    Result.U.pVal[getNumWords() - 1] =
849        SignExtend64(Result.U.pVal[getNumWords() - 1],
850                     ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
851  
852    // Fill with sign bits.
853    std::memset(Result.U.pVal + getNumWords(), isNegative() ? -1 : 0,
854                (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
855    Result.clearUnusedBits();
856    return Result;
857  }
858  
859  //  Zero extend to a new width.
860  APInt APInt::zext(unsigned width) const {
861    assert(width > BitWidth && "Invalid APInt ZeroExtend request");
862  
863    if (width <= APINT_BITS_PER_WORD)
864      return APInt(width, U.VAL);
865  
866    APInt Result(getMemory(getNumWords(width)), width);
867  
868    // Copy words.
869    std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
870  
871    // Zero remaining words.
872    std::memset(Result.U.pVal + getNumWords(), 0,
873                (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
874  
875    return Result;
876  }
877  
878  APInt APInt::zextOrTrunc(unsigned width) const {
879    if (BitWidth < width)
880      return zext(width);
881    if (BitWidth > width)
882      return trunc(width);
883    return *this;
884  }
885  
886  APInt APInt::sextOrTrunc(unsigned width) const {
887    if (BitWidth < width)
888      return sext(width);
889    if (BitWidth > width)
890      return trunc(width);
891    return *this;
892  }
893  
894  APInt APInt::zextOrSelf(unsigned width) const {
895    if (BitWidth < width)
896      return zext(width);
897    return *this;
898  }
899  
900  APInt APInt::sextOrSelf(unsigned width) const {
901    if (BitWidth < width)
902      return sext(width);
903    return *this;
904  }
905  
906  /// Arithmetic right-shift this APInt by shiftAmt.
907  /// Arithmetic right-shift function.
908  void APInt::ashrInPlace(const APInt &shiftAmt) {
909    ashrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
910  }
911  
912  /// Arithmetic right-shift this APInt by shiftAmt.
913  /// Arithmetic right-shift function.
914  void APInt::ashrSlowCase(unsigned ShiftAmt) {
915    // Don't bother performing a no-op shift.
916    if (!ShiftAmt)
917      return;
918  
919    // Save the original sign bit for later.
920    bool Negative = isNegative();
921  
922    // WordShift is the inter-part shift; BitShift is intra-part shift.
923    unsigned WordShift = ShiftAmt / APINT_BITS_PER_WORD;
924    unsigned BitShift = ShiftAmt % APINT_BITS_PER_WORD;
925  
926    unsigned WordsToMove = getNumWords() - WordShift;
927    if (WordsToMove != 0) {
928      // Sign extend the last word to fill in the unused bits.
929      U.pVal[getNumWords() - 1] = SignExtend64(
930          U.pVal[getNumWords() - 1], ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
931  
932      // Fastpath for moving by whole words.
933      if (BitShift == 0) {
934        std::memmove(U.pVal, U.pVal + WordShift, WordsToMove * APINT_WORD_SIZE);
935      } else {
936        // Move the words containing significant bits.
937        for (unsigned i = 0; i != WordsToMove - 1; ++i)
938          U.pVal[i] = (U.pVal[i + WordShift] >> BitShift) |
939                      (U.pVal[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift));
940  
941        // Handle the last word which has no high bits to copy.
942        U.pVal[WordsToMove - 1] = U.pVal[WordShift + WordsToMove - 1] >> BitShift;
943        // Sign extend one more time.
944        U.pVal[WordsToMove - 1] =
945            SignExtend64(U.pVal[WordsToMove - 1], APINT_BITS_PER_WORD - BitShift);
946      }
947    }
948  
949    // Fill in the remainder based on the original sign.
950    std::memset(U.pVal + WordsToMove, Negative ? -1 : 0,
951                WordShift * APINT_WORD_SIZE);
952    clearUnusedBits();
953  }
954  
955  /// Logical right-shift this APInt by shiftAmt.
956  /// Logical right-shift function.
957  void APInt::lshrInPlace(const APInt &shiftAmt) {
958    lshrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
959  }
960  
961  /// Logical right-shift this APInt by shiftAmt.
962  /// Logical right-shift function.
963  void APInt::lshrSlowCase(unsigned ShiftAmt) {
964    tcShiftRight(U.pVal, getNumWords(), ShiftAmt);
965  }
966  
967  /// Left-shift this APInt by shiftAmt.
968  /// Left-shift function.
969  APInt &APInt::operator<<=(const APInt &shiftAmt) {
970    // It's undefined behavior in C to shift by BitWidth or greater.
971    *this <<= (unsigned)shiftAmt.getLimitedValue(BitWidth);
972    return *this;
973  }
974  
975  void APInt::shlSlowCase(unsigned ShiftAmt) {
976    tcShiftLeft(U.pVal, getNumWords(), ShiftAmt);
977    clearUnusedBits();
978  }
979  
980  // Calculate the rotate amount modulo the bit width.
981  static unsigned rotateModulo(unsigned BitWidth, const APInt &rotateAmt) {
982    unsigned rotBitWidth = rotateAmt.getBitWidth();
983    APInt rot = rotateAmt;
984    if (rotBitWidth < BitWidth) {
985      // Extend the rotate APInt, so that the urem doesn't divide by 0.
986      // e.g. APInt(1, 32) would give APInt(1, 0).
987      rot = rotateAmt.zext(BitWidth);
988    }
989    rot = rot.urem(APInt(rot.getBitWidth(), BitWidth));
990    return rot.getLimitedValue(BitWidth);
991  }
992  
993  APInt APInt::rotl(const APInt &rotateAmt) const {
994    return rotl(rotateModulo(BitWidth, rotateAmt));
995  }
996  
997  APInt APInt::rotl(unsigned rotateAmt) const {
998    rotateAmt %= BitWidth;
999    if (rotateAmt == 0)
1000      return *this;
1001    return shl(rotateAmt) | lshr(BitWidth - rotateAmt);
1002  }
1003  
1004  APInt APInt::rotr(const APInt &rotateAmt) const {
1005    return rotr(rotateModulo(BitWidth, rotateAmt));
1006  }
1007  
1008  APInt APInt::rotr(unsigned rotateAmt) const {
1009    rotateAmt %= BitWidth;
1010    if (rotateAmt == 0)
1011      return *this;
1012    return lshr(rotateAmt) | shl(BitWidth - rotateAmt);
1013  }
1014  
1015  // Square Root - this method computes and returns the square root of "this".
1016  // Three mechanisms are used for computation. For small values (<= 5 bits),
1017  // a table lookup is done. This gets some performance for common cases. For
1018  // values using less than 52 bits, the value is converted to double and then
1019  // the libc sqrt function is called. The result is rounded and then converted
1020  // back to a uint64_t which is then used to construct the result. Finally,
1021  // the Babylonian method for computing square roots is used.
1022  APInt APInt::sqrt() const {
1023  
1024    // Determine the magnitude of the value.
1025    unsigned magnitude = getActiveBits();
1026  
1027    // Use a fast table for some small values. This also gets rid of some
1028    // rounding errors in libc sqrt for small values.
1029    if (magnitude <= 5) {
1030      static const uint8_t results[32] = {
1031        /*     0 */ 0,
1032        /*  1- 2 */ 1, 1,
1033        /*  3- 6 */ 2, 2, 2, 2,
1034        /*  7-12 */ 3, 3, 3, 3, 3, 3,
1035        /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4,
1036        /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1037        /*    31 */ 6
1038      };
1039      return APInt(BitWidth, results[ (isSingleWord() ? U.VAL : U.pVal[0]) ]);
1040    }
1041  
1042    // If the magnitude of the value fits in less than 52 bits (the precision of
1043    // an IEEE double precision floating point value), then we can use the
1044    // libc sqrt function which will probably use a hardware sqrt computation.
1045    // This should be faster than the algorithm below.
1046    if (magnitude < 52) {
1047      return APInt(BitWidth,
1048                   uint64_t(::round(::sqrt(double(isSingleWord() ? U.VAL
1049                                                                 : U.pVal[0])))));
1050    }
1051  
1052    // Okay, all the short cuts are exhausted. We must compute it. The following
1053    // is a classical Babylonian method for computing the square root. This code
1054    // was adapted to APInt from a wikipedia article on such computations.
1055    // See http://www.wikipedia.org/ and go to the page named
1056    // Calculate_an_integer_square_root.
1057    unsigned nbits = BitWidth, i = 4;
1058    APInt testy(BitWidth, 16);
1059    APInt x_old(BitWidth, 1);
1060    APInt x_new(BitWidth, 0);
1061    APInt two(BitWidth, 2);
1062  
1063    // Select a good starting value using binary logarithms.
1064    for (;; i += 2, testy = testy.shl(2))
1065      if (i >= nbits || this->ule(testy)) {
1066        x_old = x_old.shl(i / 2);
1067        break;
1068      }
1069  
1070    // Use the Babylonian method to arrive at the integer square root:
1071    for (;;) {
1072      x_new = (this->udiv(x_old) + x_old).udiv(two);
1073      if (x_old.ule(x_new))
1074        break;
1075      x_old = x_new;
1076    }
1077  
1078    // Make sure we return the closest approximation
1079    // NOTE: The rounding calculation below is correct. It will produce an
1080    // off-by-one discrepancy with results from pari/gp. That discrepancy has been
1081    // determined to be a rounding issue with pari/gp as it begins to use a
1082    // floating point representation after 192 bits. There are no discrepancies
1083    // between this algorithm and pari/gp for bit widths < 192 bits.
1084    APInt square(x_old * x_old);
1085    APInt nextSquare((x_old + 1) * (x_old +1));
1086    if (this->ult(square))
1087      return x_old;
1088    assert(this->ule(nextSquare) && "Error in APInt::sqrt computation");
1089    APInt midpoint((nextSquare - square).udiv(two));
1090    APInt offset(*this - square);
1091    if (offset.ult(midpoint))
1092      return x_old;
1093    return x_old + 1;
1094  }
1095  
1096  /// Computes the multiplicative inverse of this APInt for a given modulo. The
1097  /// iterative extended Euclidean algorithm is used to solve for this value,
1098  /// however we simplify it to speed up calculating only the inverse, and take
1099  /// advantage of div+rem calculations. We also use some tricks to avoid copying
1100  /// (potentially large) APInts around.
1101  /// WARNING: a value of '0' may be returned,
1102  ///          signifying that no multiplicative inverse exists!
1103  APInt APInt::multiplicativeInverse(const APInt& modulo) const {
1104    assert(ult(modulo) && "This APInt must be smaller than the modulo");
1105  
1106    // Using the properties listed at the following web page (accessed 06/21/08):
1107    //   http://www.numbertheory.org/php/euclid.html
1108    // (especially the properties numbered 3, 4 and 9) it can be proved that
1109    // BitWidth bits suffice for all the computations in the algorithm implemented
1110    // below. More precisely, this number of bits suffice if the multiplicative
1111    // inverse exists, but may not suffice for the general extended Euclidean
1112    // algorithm.
1113  
1114    APInt r[2] = { modulo, *this };
1115    APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) };
1116    APInt q(BitWidth, 0);
1117  
1118    unsigned i;
1119    for (i = 0; r[i^1] != 0; i ^= 1) {
1120      // An overview of the math without the confusing bit-flipping:
1121      // q = r[i-2] / r[i-1]
1122      // r[i] = r[i-2] % r[i-1]
1123      // t[i] = t[i-2] - t[i-1] * q
1124      udivrem(r[i], r[i^1], q, r[i]);
1125      t[i] -= t[i^1] * q;
1126    }
1127  
1128    // If this APInt and the modulo are not coprime, there is no multiplicative
1129    // inverse, so return 0. We check this by looking at the next-to-last
1130    // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean
1131    // algorithm.
1132    if (r[i] != 1)
1133      return APInt(BitWidth, 0);
1134  
1135    // The next-to-last t is the multiplicative inverse.  However, we are
1136    // interested in a positive inverse. Calculate a positive one from a negative
1137    // one if necessary. A simple addition of the modulo suffices because
1138    // abs(t[i]) is known to be less than *this/2 (see the link above).
1139    if (t[i].isNegative())
1140      t[i] += modulo;
1141  
1142    return std::move(t[i]);
1143  }
1144  
1145  /// Calculate the magic numbers required to implement a signed integer division
1146  /// by a constant as a sequence of multiplies, adds and shifts.  Requires that
1147  /// the divisor not be 0, 1, or -1.  Taken from "Hacker's Delight", Henry S.
1148  /// Warren, Jr., chapter 10.
1149  APInt::ms APInt::magic() const {
1150    const APInt& d = *this;
1151    unsigned p;
1152    APInt ad, anc, delta, q1, r1, q2, r2, t;
1153    APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1154    struct ms mag;
1155  
1156    ad = d.abs();
1157    t = signedMin + (d.lshr(d.getBitWidth() - 1));
1158    anc = t - 1 - t.urem(ad);   // absolute value of nc
1159    p = d.getBitWidth() - 1;    // initialize p
1160    q1 = signedMin.udiv(anc);   // initialize q1 = 2p/abs(nc)
1161    r1 = signedMin - q1*anc;    // initialize r1 = rem(2p,abs(nc))
1162    q2 = signedMin.udiv(ad);    // initialize q2 = 2p/abs(d)
1163    r2 = signedMin - q2*ad;     // initialize r2 = rem(2p,abs(d))
1164    do {
1165      p = p + 1;
1166      q1 = q1<<1;          // update q1 = 2p/abs(nc)
1167      r1 = r1<<1;          // update r1 = rem(2p/abs(nc))
1168      if (r1.uge(anc)) {  // must be unsigned comparison
1169        q1 = q1 + 1;
1170        r1 = r1 - anc;
1171      }
1172      q2 = q2<<1;          // update q2 = 2p/abs(d)
1173      r2 = r2<<1;          // update r2 = rem(2p/abs(d))
1174      if (r2.uge(ad)) {   // must be unsigned comparison
1175        q2 = q2 + 1;
1176        r2 = r2 - ad;
1177      }
1178      delta = ad - r2;
1179    } while (q1.ult(delta) || (q1 == delta && r1 == 0));
1180  
1181    mag.m = q2 + 1;
1182    if (d.isNegative()) mag.m = -mag.m;   // resulting magic number
1183    mag.s = p - d.getBitWidth();          // resulting shift
1184    return mag;
1185  }
1186  
1187  /// Calculate the magic numbers required to implement an unsigned integer
1188  /// division by a constant as a sequence of multiplies, adds and shifts.
1189  /// Requires that the divisor not be 0.  Taken from "Hacker's Delight", Henry
1190  /// S. Warren, Jr., chapter 10.
1191  /// LeadingZeros can be used to simplify the calculation if the upper bits
1192  /// of the divided value are known zero.
1193  APInt::mu APInt::magicu(unsigned LeadingZeros) const {
1194    const APInt& d = *this;
1195    unsigned p;
1196    APInt nc, delta, q1, r1, q2, r2;
1197    struct mu magu;
1198    magu.a = 0;               // initialize "add" indicator
1199    APInt allOnes = APInt::getAllOnesValue(d.getBitWidth()).lshr(LeadingZeros);
1200    APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1201    APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth());
1202  
1203    nc = allOnes - (allOnes - d).urem(d);
1204    p = d.getBitWidth() - 1;  // initialize p
1205    q1 = signedMin.udiv(nc);  // initialize q1 = 2p/nc
1206    r1 = signedMin - q1*nc;   // initialize r1 = rem(2p,nc)
1207    q2 = signedMax.udiv(d);   // initialize q2 = (2p-1)/d
1208    r2 = signedMax - q2*d;    // initialize r2 = rem((2p-1),d)
1209    do {
1210      p = p + 1;
1211      if (r1.uge(nc - r1)) {
1212        q1 = q1 + q1 + 1;  // update q1
1213        r1 = r1 + r1 - nc; // update r1
1214      }
1215      else {
1216        q1 = q1+q1; // update q1
1217        r1 = r1+r1; // update r1
1218      }
1219      if ((r2 + 1).uge(d - r2)) {
1220        if (q2.uge(signedMax)) magu.a = 1;
1221        q2 = q2+q2 + 1;     // update q2
1222        r2 = r2+r2 + 1 - d; // update r2
1223      }
1224      else {
1225        if (q2.uge(signedMin)) magu.a = 1;
1226        q2 = q2+q2;     // update q2
1227        r2 = r2+r2 + 1; // update r2
1228      }
1229      delta = d - 1 - r2;
1230    } while (p < d.getBitWidth()*2 &&
1231             (q1.ult(delta) || (q1 == delta && r1 == 0)));
1232    magu.m = q2 + 1; // resulting magic number
1233    magu.s = p - d.getBitWidth();  // resulting shift
1234    return magu;
1235  }
1236  
1237  /// Implementation of Knuth's Algorithm D (Division of nonnegative integers)
1238  /// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The
1239  /// variables here have the same names as in the algorithm. Comments explain
1240  /// the algorithm and any deviation from it.
1241  static void KnuthDiv(uint32_t *u, uint32_t *v, uint32_t *q, uint32_t* r,
1242                       unsigned m, unsigned n) {
1243    assert(u && "Must provide dividend");
1244    assert(v && "Must provide divisor");
1245    assert(q && "Must provide quotient");
1246    assert(u != v && u != q && v != q && "Must use different memory");
1247    assert(n>1 && "n must be > 1");
1248  
1249    // b denotes the base of the number system. In our case b is 2^32.
1250    const uint64_t b = uint64_t(1) << 32;
1251  
1252  // The DEBUG macros here tend to be spam in the debug output if you're not
1253  // debugging this code. Disable them unless KNUTH_DEBUG is defined.
1254  #ifdef KNUTH_DEBUG
1255  #define DEBUG_KNUTH(X) LLVM_DEBUG(X)
1256  #else
1257  #define DEBUG_KNUTH(X) do {} while(false)
1258  #endif
1259  
1260    DEBUG_KNUTH(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n');
1261    DEBUG_KNUTH(dbgs() << "KnuthDiv: original:");
1262    DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1263    DEBUG_KNUTH(dbgs() << " by");
1264    DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);
1265    DEBUG_KNUTH(dbgs() << '\n');
1266    // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
1267    // u and v by d. Note that we have taken Knuth's advice here to use a power
1268    // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of
1269    // 2 allows us to shift instead of multiply and it is easy to determine the
1270    // shift amount from the leading zeros.  We are basically normalizing the u
1271    // and v so that its high bits are shifted to the top of v's range without
1272    // overflow. Note that this can require an extra word in u so that u must
1273    // be of length m+n+1.
1274    unsigned shift = countLeadingZeros(v[n-1]);
1275    uint32_t v_carry = 0;
1276    uint32_t u_carry = 0;
1277    if (shift) {
1278      for (unsigned i = 0; i < m+n; ++i) {
1279        uint32_t u_tmp = u[i] >> (32 - shift);
1280        u[i] = (u[i] << shift) | u_carry;
1281        u_carry = u_tmp;
1282      }
1283      for (unsigned i = 0; i < n; ++i) {
1284        uint32_t v_tmp = v[i] >> (32 - shift);
1285        v[i] = (v[i] << shift) | v_carry;
1286        v_carry = v_tmp;
1287      }
1288    }
1289    u[m+n] = u_carry;
1290  
1291    DEBUG_KNUTH(dbgs() << "KnuthDiv:   normal:");
1292    DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1293    DEBUG_KNUTH(dbgs() << " by");
1294    DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);
1295    DEBUG_KNUTH(dbgs() << '\n');
1296  
1297    // D2. [Initialize j.]  Set j to m. This is the loop counter over the places.
1298    int j = m;
1299    do {
1300      DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient digit #" << j << '\n');
1301      // D3. [Calculate q'.].
1302      //     Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q')
1303      //     Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r')
1304      // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease
1305      // qp by 1, increase rp by v[n-1], and repeat this test if rp < b. The test
1306      // on v[n-2] determines at high speed most of the cases in which the trial
1307      // value qp is one too large, and it eliminates all cases where qp is two
1308      // too large.
1309      uint64_t dividend = Make_64(u[j+n], u[j+n-1]);
1310      DEBUG_KNUTH(dbgs() << "KnuthDiv: dividend == " << dividend << '\n');
1311      uint64_t qp = dividend / v[n-1];
1312      uint64_t rp = dividend % v[n-1];
1313      if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) {
1314        qp--;
1315        rp += v[n-1];
1316        if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2]))
1317          qp--;
1318      }
1319      DEBUG_KNUTH(dbgs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n');
1320  
1321      // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with
1322      // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation
1323      // consists of a simple multiplication by a one-place number, combined with
1324      // a subtraction.
1325      // The digits (u[j+n]...u[j]) should be kept positive; if the result of
1326      // this step is actually negative, (u[j+n]...u[j]) should be left as the
1327      // true value plus b**(n+1), namely as the b's complement of
1328      // the true value, and a "borrow" to the left should be remembered.
1329      int64_t borrow = 0;
1330      for (unsigned i = 0; i < n; ++i) {
1331        uint64_t p = uint64_t(qp) * uint64_t(v[i]);
1332        int64_t subres = int64_t(u[j+i]) - borrow - Lo_32(p);
1333        u[j+i] = Lo_32(subres);
1334        borrow = Hi_32(p) - Hi_32(subres);
1335        DEBUG_KNUTH(dbgs() << "KnuthDiv: u[j+i] = " << u[j + i]
1336                          << ", borrow = " << borrow << '\n');
1337      }
1338      bool isNeg = u[j+n] < borrow;
1339      u[j+n] -= Lo_32(borrow);
1340  
1341      DEBUG_KNUTH(dbgs() << "KnuthDiv: after subtraction:");
1342      DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1343      DEBUG_KNUTH(dbgs() << '\n');
1344  
1345      // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was
1346      // negative, go to step D6; otherwise go on to step D7.
1347      q[j] = Lo_32(qp);
1348      if (isNeg) {
1349        // D6. [Add back]. The probability that this step is necessary is very
1350        // small, on the order of only 2/b. Make sure that test data accounts for
1351        // this possibility. Decrease q[j] by 1
1352        q[j]--;
1353        // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]).
1354        // A carry will occur to the left of u[j+n], and it should be ignored
1355        // since it cancels with the borrow that occurred in D4.
1356        bool carry = false;
1357        for (unsigned i = 0; i < n; i++) {
1358          uint32_t limit = std::min(u[j+i],v[i]);
1359          u[j+i] += v[i] + carry;
1360          carry = u[j+i] < limit || (carry && u[j+i] == limit);
1361        }
1362        u[j+n] += carry;
1363      }
1364      DEBUG_KNUTH(dbgs() << "KnuthDiv: after correction:");
1365      DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1366      DEBUG_KNUTH(dbgs() << "\nKnuthDiv: digit result = " << q[j] << '\n');
1367  
1368      // D7. [Loop on j.]  Decrease j by one. Now if j >= 0, go back to D3.
1369    } while (--j >= 0);
1370  
1371    DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient:");
1372    DEBUG_KNUTH(for (int i = m; i >= 0; i--) dbgs() << " " << q[i]);
1373    DEBUG_KNUTH(dbgs() << '\n');
1374  
1375    // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired
1376    // remainder may be obtained by dividing u[...] by d. If r is non-null we
1377    // compute the remainder (urem uses this).
1378    if (r) {
1379      // The value d is expressed by the "shift" value above since we avoided
1380      // multiplication by d by using a shift left. So, all we have to do is
1381      // shift right here.
1382      if (shift) {
1383        uint32_t carry = 0;
1384        DEBUG_KNUTH(dbgs() << "KnuthDiv: remainder:");
1385        for (int i = n-1; i >= 0; i--) {
1386          r[i] = (u[i] >> shift) | carry;
1387          carry = u[i] << (32 - shift);
1388          DEBUG_KNUTH(dbgs() << " " << r[i]);
1389        }
1390      } else {
1391        for (int i = n-1; i >= 0; i--) {
1392          r[i] = u[i];
1393          DEBUG_KNUTH(dbgs() << " " << r[i]);
1394        }
1395      }
1396      DEBUG_KNUTH(dbgs() << '\n');
1397    }
1398    DEBUG_KNUTH(dbgs() << '\n');
1399  }
1400  
1401  void APInt::divide(const WordType *LHS, unsigned lhsWords, const WordType *RHS,
1402                     unsigned rhsWords, WordType *Quotient, WordType *Remainder) {
1403    assert(lhsWords >= rhsWords && "Fractional result");
1404  
1405    // First, compose the values into an array of 32-bit words instead of
1406    // 64-bit words. This is a necessity of both the "short division" algorithm
1407    // and the Knuth "classical algorithm" which requires there to be native
1408    // operations for +, -, and * on an m bit value with an m*2 bit result. We
1409    // can't use 64-bit operands here because we don't have native results of
1410    // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't
1411    // work on large-endian machines.
1412    unsigned n = rhsWords * 2;
1413    unsigned m = (lhsWords * 2) - n;
1414  
1415    // Allocate space for the temporary values we need either on the stack, if
1416    // it will fit, or on the heap if it won't.
1417    uint32_t SPACE[128];
1418    uint32_t *U = nullptr;
1419    uint32_t *V = nullptr;
1420    uint32_t *Q = nullptr;
1421    uint32_t *R = nullptr;
1422    if ((Remainder?4:3)*n+2*m+1 <= 128) {
1423      U = &SPACE[0];
1424      V = &SPACE[m+n+1];
1425      Q = &SPACE[(m+n+1) + n];
1426      if (Remainder)
1427        R = &SPACE[(m+n+1) + n + (m+n)];
1428    } else {
1429      U = new uint32_t[m + n + 1];
1430      V = new uint32_t[n];
1431      Q = new uint32_t[m+n];
1432      if (Remainder)
1433        R = new uint32_t[n];
1434    }
1435  
1436    // Initialize the dividend
1437    memset(U, 0, (m+n+1)*sizeof(uint32_t));
1438    for (unsigned i = 0; i < lhsWords; ++i) {
1439      uint64_t tmp = LHS[i];
1440      U[i * 2] = Lo_32(tmp);
1441      U[i * 2 + 1] = Hi_32(tmp);
1442    }
1443    U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm.
1444  
1445    // Initialize the divisor
1446    memset(V, 0, (n)*sizeof(uint32_t));
1447    for (unsigned i = 0; i < rhsWords; ++i) {
1448      uint64_t tmp = RHS[i];
1449      V[i * 2] = Lo_32(tmp);
1450      V[i * 2 + 1] = Hi_32(tmp);
1451    }
1452  
1453    // initialize the quotient and remainder
1454    memset(Q, 0, (m+n) * sizeof(uint32_t));
1455    if (Remainder)
1456      memset(R, 0, n * sizeof(uint32_t));
1457  
1458    // Now, adjust m and n for the Knuth division. n is the number of words in
1459    // the divisor. m is the number of words by which the dividend exceeds the
1460    // divisor (i.e. m+n is the length of the dividend). These sizes must not
1461    // contain any zero words or the Knuth algorithm fails.
1462    for (unsigned i = n; i > 0 && V[i-1] == 0; i--) {
1463      n--;
1464      m++;
1465    }
1466    for (unsigned i = m+n; i > 0 && U[i-1] == 0; i--)
1467      m--;
1468  
1469    // If we're left with only a single word for the divisor, Knuth doesn't work
1470    // so we implement the short division algorithm here. This is much simpler
1471    // and faster because we are certain that we can divide a 64-bit quantity
1472    // by a 32-bit quantity at hardware speed and short division is simply a
1473    // series of such operations. This is just like doing short division but we
1474    // are using base 2^32 instead of base 10.
1475    assert(n != 0 && "Divide by zero?");
1476    if (n == 1) {
1477      uint32_t divisor = V[0];
1478      uint32_t remainder = 0;
1479      for (int i = m; i >= 0; i--) {
1480        uint64_t partial_dividend = Make_64(remainder, U[i]);
1481        if (partial_dividend == 0) {
1482          Q[i] = 0;
1483          remainder = 0;
1484        } else if (partial_dividend < divisor) {
1485          Q[i] = 0;
1486          remainder = Lo_32(partial_dividend);
1487        } else if (partial_dividend == divisor) {
1488          Q[i] = 1;
1489          remainder = 0;
1490        } else {
1491          Q[i] = Lo_32(partial_dividend / divisor);
1492          remainder = Lo_32(partial_dividend - (Q[i] * divisor));
1493        }
1494      }
1495      if (R)
1496        R[0] = remainder;
1497    } else {
1498      // Now we're ready to invoke the Knuth classical divide algorithm. In this
1499      // case n > 1.
1500      KnuthDiv(U, V, Q, R, m, n);
1501    }
1502  
1503    // If the caller wants the quotient
1504    if (Quotient) {
1505      for (unsigned i = 0; i < lhsWords; ++i)
1506        Quotient[i] = Make_64(Q[i*2+1], Q[i*2]);
1507    }
1508  
1509    // If the caller wants the remainder
1510    if (Remainder) {
1511      for (unsigned i = 0; i < rhsWords; ++i)
1512        Remainder[i] = Make_64(R[i*2+1], R[i*2]);
1513    }
1514  
1515    // Clean up the memory we allocated.
1516    if (U != &SPACE[0]) {
1517      delete [] U;
1518      delete [] V;
1519      delete [] Q;
1520      delete [] R;
1521    }
1522  }
1523  
1524  APInt APInt::udiv(const APInt &RHS) const {
1525    assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1526  
1527    // First, deal with the easy case
1528    if (isSingleWord()) {
1529      assert(RHS.U.VAL != 0 && "Divide by zero?");
1530      return APInt(BitWidth, U.VAL / RHS.U.VAL);
1531    }
1532  
1533    // Get some facts about the LHS and RHS number of bits and words
1534    unsigned lhsWords = getNumWords(getActiveBits());
1535    unsigned rhsBits  = RHS.getActiveBits();
1536    unsigned rhsWords = getNumWords(rhsBits);
1537    assert(rhsWords && "Divided by zero???");
1538  
1539    // Deal with some degenerate cases
1540    if (!lhsWords)
1541      // 0 / X ===> 0
1542      return APInt(BitWidth, 0);
1543    if (rhsBits == 1)
1544      // X / 1 ===> X
1545      return *this;
1546    if (lhsWords < rhsWords || this->ult(RHS))
1547      // X / Y ===> 0, iff X < Y
1548      return APInt(BitWidth, 0);
1549    if (*this == RHS)
1550      // X / X ===> 1
1551      return APInt(BitWidth, 1);
1552    if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1.
1553      // All high words are zero, just use native divide
1554      return APInt(BitWidth, this->U.pVal[0] / RHS.U.pVal[0]);
1555  
1556    // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1557    APInt Quotient(BitWidth, 0); // to hold result.
1558    divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal, nullptr);
1559    return Quotient;
1560  }
1561  
1562  APInt APInt::udiv(uint64_t RHS) const {
1563    assert(RHS != 0 && "Divide by zero?");
1564  
1565    // First, deal with the easy case
1566    if (isSingleWord())
1567      return APInt(BitWidth, U.VAL / RHS);
1568  
1569    // Get some facts about the LHS words.
1570    unsigned lhsWords = getNumWords(getActiveBits());
1571  
1572    // Deal with some degenerate cases
1573    if (!lhsWords)
1574      // 0 / X ===> 0
1575      return APInt(BitWidth, 0);
1576    if (RHS == 1)
1577      // X / 1 ===> X
1578      return *this;
1579    if (this->ult(RHS))
1580      // X / Y ===> 0, iff X < Y
1581      return APInt(BitWidth, 0);
1582    if (*this == RHS)
1583      // X / X ===> 1
1584      return APInt(BitWidth, 1);
1585    if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1.
1586      // All high words are zero, just use native divide
1587      return APInt(BitWidth, this->U.pVal[0] / RHS);
1588  
1589    // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1590    APInt Quotient(BitWidth, 0); // to hold result.
1591    divide(U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, nullptr);
1592    return Quotient;
1593  }
1594  
1595  APInt APInt::sdiv(const APInt &RHS) const {
1596    if (isNegative()) {
1597      if (RHS.isNegative())
1598        return (-(*this)).udiv(-RHS);
1599      return -((-(*this)).udiv(RHS));
1600    }
1601    if (RHS.isNegative())
1602      return -(this->udiv(-RHS));
1603    return this->udiv(RHS);
1604  }
1605  
1606  APInt APInt::sdiv(int64_t RHS) const {
1607    if (isNegative()) {
1608      if (RHS < 0)
1609        return (-(*this)).udiv(-RHS);
1610      return -((-(*this)).udiv(RHS));
1611    }
1612    if (RHS < 0)
1613      return -(this->udiv(-RHS));
1614    return this->udiv(RHS);
1615  }
1616  
1617  APInt APInt::urem(const APInt &RHS) const {
1618    assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1619    if (isSingleWord()) {
1620      assert(RHS.U.VAL != 0 && "Remainder by zero?");
1621      return APInt(BitWidth, U.VAL % RHS.U.VAL);
1622    }
1623  
1624    // Get some facts about the LHS
1625    unsigned lhsWords = getNumWords(getActiveBits());
1626  
1627    // Get some facts about the RHS
1628    unsigned rhsBits = RHS.getActiveBits();
1629    unsigned rhsWords = getNumWords(rhsBits);
1630    assert(rhsWords && "Performing remainder operation by zero ???");
1631  
1632    // Check the degenerate cases
1633    if (lhsWords == 0)
1634      // 0 % Y ===> 0
1635      return APInt(BitWidth, 0);
1636    if (rhsBits == 1)
1637      // X % 1 ===> 0
1638      return APInt(BitWidth, 0);
1639    if (lhsWords < rhsWords || this->ult(RHS))
1640      // X % Y ===> X, iff X < Y
1641      return *this;
1642    if (*this == RHS)
1643      // X % X == 0;
1644      return APInt(BitWidth, 0);
1645    if (lhsWords == 1)
1646      // All high words are zero, just use native remainder
1647      return APInt(BitWidth, U.pVal[0] % RHS.U.pVal[0]);
1648  
1649    // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1650    APInt Remainder(BitWidth, 0);
1651    divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, nullptr, Remainder.U.pVal);
1652    return Remainder;
1653  }
1654  
1655  uint64_t APInt::urem(uint64_t RHS) const {
1656    assert(RHS != 0 && "Remainder by zero?");
1657  
1658    if (isSingleWord())
1659      return U.VAL % RHS;
1660  
1661    // Get some facts about the LHS
1662    unsigned lhsWords = getNumWords(getActiveBits());
1663  
1664    // Check the degenerate cases
1665    if (lhsWords == 0)
1666      // 0 % Y ===> 0
1667      return 0;
1668    if (RHS == 1)
1669      // X % 1 ===> 0
1670      return 0;
1671    if (this->ult(RHS))
1672      // X % Y ===> X, iff X < Y
1673      return getZExtValue();
1674    if (*this == RHS)
1675      // X % X == 0;
1676      return 0;
1677    if (lhsWords == 1)
1678      // All high words are zero, just use native remainder
1679      return U.pVal[0] % RHS;
1680  
1681    // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1682    uint64_t Remainder;
1683    divide(U.pVal, lhsWords, &RHS, 1, nullptr, &Remainder);
1684    return Remainder;
1685  }
1686  
1687  APInt APInt::srem(const APInt &RHS) const {
1688    if (isNegative()) {
1689      if (RHS.isNegative())
1690        return -((-(*this)).urem(-RHS));
1691      return -((-(*this)).urem(RHS));
1692    }
1693    if (RHS.isNegative())
1694      return this->urem(-RHS);
1695    return this->urem(RHS);
1696  }
1697  
1698  int64_t APInt::srem(int64_t RHS) const {
1699    if (isNegative()) {
1700      if (RHS < 0)
1701        return -((-(*this)).urem(-RHS));
1702      return -((-(*this)).urem(RHS));
1703    }
1704    if (RHS < 0)
1705      return this->urem(-RHS);
1706    return this->urem(RHS);
1707  }
1708  
1709  void APInt::udivrem(const APInt &LHS, const APInt &RHS,
1710                      APInt &Quotient, APInt &Remainder) {
1711    assert(LHS.BitWidth == RHS.BitWidth && "Bit widths must be the same");
1712    unsigned BitWidth = LHS.BitWidth;
1713  
1714    // First, deal with the easy case
1715    if (LHS.isSingleWord()) {
1716      assert(RHS.U.VAL != 0 && "Divide by zero?");
1717      uint64_t QuotVal = LHS.U.VAL / RHS.U.VAL;
1718      uint64_t RemVal = LHS.U.VAL % RHS.U.VAL;
1719      Quotient = APInt(BitWidth, QuotVal);
1720      Remainder = APInt(BitWidth, RemVal);
1721      return;
1722    }
1723  
1724    // Get some size facts about the dividend and divisor
1725    unsigned lhsWords = getNumWords(LHS.getActiveBits());
1726    unsigned rhsBits  = RHS.getActiveBits();
1727    unsigned rhsWords = getNumWords(rhsBits);
1728    assert(rhsWords && "Performing divrem operation by zero ???");
1729  
1730    // Check the degenerate cases
1731    if (lhsWords == 0) {
1732      Quotient = APInt(BitWidth, 0);    // 0 / Y ===> 0
1733      Remainder = APInt(BitWidth, 0);   // 0 % Y ===> 0
1734      return;
1735    }
1736  
1737    if (rhsBits == 1) {
1738      Quotient = LHS;                   // X / 1 ===> X
1739      Remainder = APInt(BitWidth, 0);   // X % 1 ===> 0
1740    }
1741  
1742    if (lhsWords < rhsWords || LHS.ult(RHS)) {
1743      Remainder = LHS;                  // X % Y ===> X, iff X < Y
1744      Quotient = APInt(BitWidth, 0);    // X / Y ===> 0, iff X < Y
1745      return;
1746    }
1747  
1748    if (LHS == RHS) {
1749      Quotient  = APInt(BitWidth, 1);   // X / X ===> 1
1750      Remainder = APInt(BitWidth, 0);   // X % X ===> 0;
1751      return;
1752    }
1753  
1754    // Make sure there is enough space to hold the results.
1755    // NOTE: This assumes that reallocate won't affect any bits if it doesn't
1756    // change the size. This is necessary if Quotient or Remainder is aliased
1757    // with LHS or RHS.
1758    Quotient.reallocate(BitWidth);
1759    Remainder.reallocate(BitWidth);
1760  
1761    if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1.
1762      // There is only one word to consider so use the native versions.
1763      uint64_t lhsValue = LHS.U.pVal[0];
1764      uint64_t rhsValue = RHS.U.pVal[0];
1765      Quotient = lhsValue / rhsValue;
1766      Remainder = lhsValue % rhsValue;
1767      return;
1768    }
1769  
1770    // Okay, lets do it the long way
1771    divide(LHS.U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal,
1772           Remainder.U.pVal);
1773    // Clear the rest of the Quotient and Remainder.
1774    std::memset(Quotient.U.pVal + lhsWords, 0,
1775                (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE);
1776    std::memset(Remainder.U.pVal + rhsWords, 0,
1777                (getNumWords(BitWidth) - rhsWords) * APINT_WORD_SIZE);
1778  }
1779  
1780  void APInt::udivrem(const APInt &LHS, uint64_t RHS, APInt &Quotient,
1781                      uint64_t &Remainder) {
1782    assert(RHS != 0 && "Divide by zero?");
1783    unsigned BitWidth = LHS.BitWidth;
1784  
1785    // First, deal with the easy case
1786    if (LHS.isSingleWord()) {
1787      uint64_t QuotVal = LHS.U.VAL / RHS;
1788      Remainder = LHS.U.VAL % RHS;
1789      Quotient = APInt(BitWidth, QuotVal);
1790      return;
1791    }
1792  
1793    // Get some size facts about the dividend and divisor
1794    unsigned lhsWords = getNumWords(LHS.getActiveBits());
1795  
1796    // Check the degenerate cases
1797    if (lhsWords == 0) {
1798      Quotient = APInt(BitWidth, 0);    // 0 / Y ===> 0
1799      Remainder = 0;                    // 0 % Y ===> 0
1800      return;
1801    }
1802  
1803    if (RHS == 1) {
1804      Quotient = LHS;                   // X / 1 ===> X
1805      Remainder = 0;                    // X % 1 ===> 0
1806      return;
1807    }
1808  
1809    if (LHS.ult(RHS)) {
1810      Remainder = LHS.getZExtValue();   // X % Y ===> X, iff X < Y
1811      Quotient = APInt(BitWidth, 0);    // X / Y ===> 0, iff X < Y
1812      return;
1813    }
1814  
1815    if (LHS == RHS) {
1816      Quotient  = APInt(BitWidth, 1);   // X / X ===> 1
1817      Remainder = 0;                    // X % X ===> 0;
1818      return;
1819    }
1820  
1821    // Make sure there is enough space to hold the results.
1822    // NOTE: This assumes that reallocate won't affect any bits if it doesn't
1823    // change the size. This is necessary if Quotient is aliased with LHS.
1824    Quotient.reallocate(BitWidth);
1825  
1826    if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1.
1827      // There is only one word to consider so use the native versions.
1828      uint64_t lhsValue = LHS.U.pVal[0];
1829      Quotient = lhsValue / RHS;
1830      Remainder = lhsValue % RHS;
1831      return;
1832    }
1833  
1834    // Okay, lets do it the long way
1835    divide(LHS.U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, &Remainder);
1836    // Clear the rest of the Quotient.
1837    std::memset(Quotient.U.pVal + lhsWords, 0,
1838                (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE);
1839  }
1840  
1841  void APInt::sdivrem(const APInt &LHS, const APInt &RHS,
1842                      APInt &Quotient, APInt &Remainder) {
1843    if (LHS.isNegative()) {
1844      if (RHS.isNegative())
1845        APInt::udivrem(-LHS, -RHS, Quotient, Remainder);
1846      else {
1847        APInt::udivrem(-LHS, RHS, Quotient, Remainder);
1848        Quotient.negate();
1849      }
1850      Remainder.negate();
1851    } else if (RHS.isNegative()) {
1852      APInt::udivrem(LHS, -RHS, Quotient, Remainder);
1853      Quotient.negate();
1854    } else {
1855      APInt::udivrem(LHS, RHS, Quotient, Remainder);
1856    }
1857  }
1858  
1859  void APInt::sdivrem(const APInt &LHS, int64_t RHS,
1860                      APInt &Quotient, int64_t &Remainder) {
1861    uint64_t R = Remainder;
1862    if (LHS.isNegative()) {
1863      if (RHS < 0)
1864        APInt::udivrem(-LHS, -RHS, Quotient, R);
1865      else {
1866        APInt::udivrem(-LHS, RHS, Quotient, R);
1867        Quotient.negate();
1868      }
1869      R = -R;
1870    } else if (RHS < 0) {
1871      APInt::udivrem(LHS, -RHS, Quotient, R);
1872      Quotient.negate();
1873    } else {
1874      APInt::udivrem(LHS, RHS, Quotient, R);
1875    }
1876    Remainder = R;
1877  }
1878  
1879  APInt APInt::sadd_ov(const APInt &RHS, bool &Overflow) const {
1880    APInt Res = *this+RHS;
1881    Overflow = isNonNegative() == RHS.isNonNegative() &&
1882               Res.isNonNegative() != isNonNegative();
1883    return Res;
1884  }
1885  
1886  APInt APInt::uadd_ov(const APInt &RHS, bool &Overflow) const {
1887    APInt Res = *this+RHS;
1888    Overflow = Res.ult(RHS);
1889    return Res;
1890  }
1891  
1892  APInt APInt::ssub_ov(const APInt &RHS, bool &Overflow) const {
1893    APInt Res = *this - RHS;
1894    Overflow = isNonNegative() != RHS.isNonNegative() &&
1895               Res.isNonNegative() != isNonNegative();
1896    return Res;
1897  }
1898  
1899  APInt APInt::usub_ov(const APInt &RHS, bool &Overflow) const {
1900    APInt Res = *this-RHS;
1901    Overflow = Res.ugt(*this);
1902    return Res;
1903  }
1904  
1905  APInt APInt::sdiv_ov(const APInt &RHS, bool &Overflow) const {
1906    // MININT/-1  -->  overflow.
1907    Overflow = isMinSignedValue() && RHS.isAllOnesValue();
1908    return sdiv(RHS);
1909  }
1910  
1911  APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const {
1912    APInt Res = *this * RHS;
1913  
1914    if (*this != 0 && RHS != 0)
1915      Overflow = Res.sdiv(RHS) != *this || Res.sdiv(*this) != RHS;
1916    else
1917      Overflow = false;
1918    return Res;
1919  }
1920  
1921  APInt APInt::umul_ov(const APInt &RHS, bool &Overflow) const {
1922    if (countLeadingZeros() + RHS.countLeadingZeros() + 2 <= BitWidth) {
1923      Overflow = true;
1924      return *this * RHS;
1925    }
1926  
1927    APInt Res = lshr(1) * RHS;
1928    Overflow = Res.isNegative();
1929    Res <<= 1;
1930    if ((*this)[0]) {
1931      Res += RHS;
1932      if (Res.ult(RHS))
1933        Overflow = true;
1934    }
1935    return Res;
1936  }
1937  
1938  APInt APInt::sshl_ov(const APInt &ShAmt, bool &Overflow) const {
1939    Overflow = ShAmt.uge(getBitWidth());
1940    if (Overflow)
1941      return APInt(BitWidth, 0);
1942  
1943    if (isNonNegative()) // Don't allow sign change.
1944      Overflow = ShAmt.uge(countLeadingZeros());
1945    else
1946      Overflow = ShAmt.uge(countLeadingOnes());
1947  
1948    return *this << ShAmt;
1949  }
1950  
1951  APInt APInt::ushl_ov(const APInt &ShAmt, bool &Overflow) const {
1952    Overflow = ShAmt.uge(getBitWidth());
1953    if (Overflow)
1954      return APInt(BitWidth, 0);
1955  
1956    Overflow = ShAmt.ugt(countLeadingZeros());
1957  
1958    return *this << ShAmt;
1959  }
1960  
1961  APInt APInt::sadd_sat(const APInt &RHS) const {
1962    bool Overflow;
1963    APInt Res = sadd_ov(RHS, Overflow);
1964    if (!Overflow)
1965      return Res;
1966  
1967    return isNegative() ? APInt::getSignedMinValue(BitWidth)
1968                        : APInt::getSignedMaxValue(BitWidth);
1969  }
1970  
1971  APInt APInt::uadd_sat(const APInt &RHS) const {
1972    bool Overflow;
1973    APInt Res = uadd_ov(RHS, Overflow);
1974    if (!Overflow)
1975      return Res;
1976  
1977    return APInt::getMaxValue(BitWidth);
1978  }
1979  
1980  APInt APInt::ssub_sat(const APInt &RHS) const {
1981    bool Overflow;
1982    APInt Res = ssub_ov(RHS, Overflow);
1983    if (!Overflow)
1984      return Res;
1985  
1986    return isNegative() ? APInt::getSignedMinValue(BitWidth)
1987                        : APInt::getSignedMaxValue(BitWidth);
1988  }
1989  
1990  APInt APInt::usub_sat(const APInt &RHS) const {
1991    bool Overflow;
1992    APInt Res = usub_ov(RHS, Overflow);
1993    if (!Overflow)
1994      return Res;
1995  
1996    return APInt(BitWidth, 0);
1997  }
1998  
1999  
2000  void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
2001    // Check our assumptions here
2002    assert(!str.empty() && "Invalid string length");
2003    assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
2004            radix == 36) &&
2005           "Radix should be 2, 8, 10, 16, or 36!");
2006  
2007    StringRef::iterator p = str.begin();
2008    size_t slen = str.size();
2009    bool isNeg = *p == '-';
2010    if (*p == '-' || *p == '+') {
2011      p++;
2012      slen--;
2013      assert(slen && "String is only a sign, needs a value.");
2014    }
2015    assert((slen <= numbits || radix != 2) && "Insufficient bit width");
2016    assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width");
2017    assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width");
2018    assert((((slen-1)*64)/22 <= numbits || radix != 10) &&
2019           "Insufficient bit width");
2020  
2021    // Allocate memory if needed
2022    if (isSingleWord())
2023      U.VAL = 0;
2024    else
2025      U.pVal = getClearedMemory(getNumWords());
2026  
2027    // Figure out if we can shift instead of multiply
2028    unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0);
2029  
2030    // Enter digit traversal loop
2031    for (StringRef::iterator e = str.end(); p != e; ++p) {
2032      unsigned digit = getDigit(*p, radix);
2033      assert(digit < radix && "Invalid character in digit string");
2034  
2035      // Shift or multiply the value by the radix
2036      if (slen > 1) {
2037        if (shift)
2038          *this <<= shift;
2039        else
2040          *this *= radix;
2041      }
2042  
2043      // Add in the digit we just interpreted
2044      *this += digit;
2045    }
2046    // If its negative, put it in two's complement form
2047    if (isNeg)
2048      this->negate();
2049  }
2050  
2051  void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
2052                       bool Signed, bool formatAsCLiteral) const {
2053    assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2 ||
2054            Radix == 36) &&
2055           "Radix should be 2, 8, 10, 16, or 36!");
2056  
2057    const char *Prefix = "";
2058    if (formatAsCLiteral) {
2059      switch (Radix) {
2060        case 2:
2061          // Binary literals are a non-standard extension added in gcc 4.3:
2062          // http://gcc.gnu.org/onlinedocs/gcc-4.3.0/gcc/Binary-constants.html
2063          Prefix = "0b";
2064          break;
2065        case 8:
2066          Prefix = "0";
2067          break;
2068        case 10:
2069          break; // No prefix
2070        case 16:
2071          Prefix = "0x";
2072          break;
2073        default:
2074          llvm_unreachable("Invalid radix!");
2075      }
2076    }
2077  
2078    // First, check for a zero value and just short circuit the logic below.
2079    if (*this == 0) {
2080      while (*Prefix) {
2081        Str.push_back(*Prefix);
2082        ++Prefix;
2083      };
2084      Str.push_back('0');
2085      return;
2086    }
2087  
2088    static const char Digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
2089  
2090    if (isSingleWord()) {
2091      char Buffer[65];
2092      char *BufPtr = std::end(Buffer);
2093  
2094      uint64_t N;
2095      if (!Signed) {
2096        N = getZExtValue();
2097      } else {
2098        int64_t I = getSExtValue();
2099        if (I >= 0) {
2100          N = I;
2101        } else {
2102          Str.push_back('-');
2103          N = -(uint64_t)I;
2104        }
2105      }
2106  
2107      while (*Prefix) {
2108        Str.push_back(*Prefix);
2109        ++Prefix;
2110      };
2111  
2112      while (N) {
2113        *--BufPtr = Digits[N % Radix];
2114        N /= Radix;
2115      }
2116      Str.append(BufPtr, std::end(Buffer));
2117      return;
2118    }
2119  
2120    APInt Tmp(*this);
2121  
2122    if (Signed && isNegative()) {
2123      // They want to print the signed version and it is a negative value
2124      // Flip the bits and add one to turn it into the equivalent positive
2125      // value and put a '-' in the result.
2126      Tmp.negate();
2127      Str.push_back('-');
2128    }
2129  
2130    while (*Prefix) {
2131      Str.push_back(*Prefix);
2132      ++Prefix;
2133    };
2134  
2135    // We insert the digits backward, then reverse them to get the right order.
2136    unsigned StartDig = Str.size();
2137  
2138    // For the 2, 8 and 16 bit cases, we can just shift instead of divide
2139    // because the number of bits per digit (1, 3 and 4 respectively) divides
2140    // equally.  We just shift until the value is zero.
2141    if (Radix == 2 || Radix == 8 || Radix == 16) {
2142      // Just shift tmp right for each digit width until it becomes zero
2143      unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1));
2144      unsigned MaskAmt = Radix - 1;
2145  
2146      while (Tmp.getBoolValue()) {
2147        unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt;
2148        Str.push_back(Digits[Digit]);
2149        Tmp.lshrInPlace(ShiftAmt);
2150      }
2151    } else {
2152      while (Tmp.getBoolValue()) {
2153        uint64_t Digit;
2154        udivrem(Tmp, Radix, Tmp, Digit);
2155        assert(Digit < Radix && "divide failed");
2156        Str.push_back(Digits[Digit]);
2157      }
2158    }
2159  
2160    // Reverse the digits before returning.
2161    std::reverse(Str.begin()+StartDig, Str.end());
2162  }
2163  
2164  /// Returns the APInt as a std::string. Note that this is an inefficient method.
2165  /// It is better to pass in a SmallVector/SmallString to the methods above.
2166  std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const {
2167    SmallString<40> S;
2168    toString(S, Radix, Signed, /* formatAsCLiteral = */false);
2169    return S.str();
2170  }
2171  
2172  #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2173  LLVM_DUMP_METHOD void APInt::dump() const {
2174    SmallString<40> S, U;
2175    this->toStringUnsigned(U);
2176    this->toStringSigned(S);
2177    dbgs() << "APInt(" << BitWidth << "b, "
2178           << U << "u " << S << "s)\n";
2179  }
2180  #endif
2181  
2182  void APInt::print(raw_ostream &OS, bool isSigned) const {
2183    SmallString<40> S;
2184    this->toString(S, 10, isSigned, /* formatAsCLiteral = */false);
2185    OS << S;
2186  }
2187  
2188  // This implements a variety of operations on a representation of
2189  // arbitrary precision, two's-complement, bignum integer values.
2190  
2191  // Assumed by lowHalf, highHalf, partMSB and partLSB.  A fairly safe
2192  // and unrestricting assumption.
2193  static_assert(APInt::APINT_BITS_PER_WORD % 2 == 0,
2194                "Part width must be divisible by 2!");
2195  
2196  /* Some handy functions local to this file.  */
2197  
2198  /* Returns the integer part with the least significant BITS set.
2199     BITS cannot be zero.  */
2200  static inline APInt::WordType lowBitMask(unsigned bits) {
2201    assert(bits != 0 && bits <= APInt::APINT_BITS_PER_WORD);
2202  
2203    return ~(APInt::WordType) 0 >> (APInt::APINT_BITS_PER_WORD - bits);
2204  }
2205  
2206  /* Returns the value of the lower half of PART.  */
2207  static inline APInt::WordType lowHalf(APInt::WordType part) {
2208    return part & lowBitMask(APInt::APINT_BITS_PER_WORD / 2);
2209  }
2210  
2211  /* Returns the value of the upper half of PART.  */
2212  static inline APInt::WordType highHalf(APInt::WordType part) {
2213    return part >> (APInt::APINT_BITS_PER_WORD / 2);
2214  }
2215  
2216  /* Returns the bit number of the most significant set bit of a part.
2217     If the input number has no bits set -1U is returned.  */
2218  static unsigned partMSB(APInt::WordType value) {
2219    return findLastSet(value, ZB_Max);
2220  }
2221  
2222  /* Returns the bit number of the least significant set bit of a
2223     part.  If the input number has no bits set -1U is returned.  */
2224  static unsigned partLSB(APInt::WordType value) {
2225    return findFirstSet(value, ZB_Max);
2226  }
2227  
2228  /* Sets the least significant part of a bignum to the input value, and
2229     zeroes out higher parts.  */
2230  void APInt::tcSet(WordType *dst, WordType part, unsigned parts) {
2231    assert(parts > 0);
2232  
2233    dst[0] = part;
2234    for (unsigned i = 1; i < parts; i++)
2235      dst[i] = 0;
2236  }
2237  
2238  /* Assign one bignum to another.  */
2239  void APInt::tcAssign(WordType *dst, const WordType *src, unsigned parts) {
2240    for (unsigned i = 0; i < parts; i++)
2241      dst[i] = src[i];
2242  }
2243  
2244  /* Returns true if a bignum is zero, false otherwise.  */
2245  bool APInt::tcIsZero(const WordType *src, unsigned parts) {
2246    for (unsigned i = 0; i < parts; i++)
2247      if (src[i])
2248        return false;
2249  
2250    return true;
2251  }
2252  
2253  /* Extract the given bit of a bignum; returns 0 or 1.  */
2254  int APInt::tcExtractBit(const WordType *parts, unsigned bit) {
2255    return (parts[whichWord(bit)] & maskBit(bit)) != 0;
2256  }
2257  
2258  /* Set the given bit of a bignum. */
2259  void APInt::tcSetBit(WordType *parts, unsigned bit) {
2260    parts[whichWord(bit)] |= maskBit(bit);
2261  }
2262  
2263  /* Clears the given bit of a bignum. */
2264  void APInt::tcClearBit(WordType *parts, unsigned bit) {
2265    parts[whichWord(bit)] &= ~maskBit(bit);
2266  }
2267  
2268  /* Returns the bit number of the least significant set bit of a
2269     number.  If the input number has no bits set -1U is returned.  */
2270  unsigned APInt::tcLSB(const WordType *parts, unsigned n) {
2271    for (unsigned i = 0; i < n; i++) {
2272      if (parts[i] != 0) {
2273        unsigned lsb = partLSB(parts[i]);
2274  
2275        return lsb + i * APINT_BITS_PER_WORD;
2276      }
2277    }
2278  
2279    return -1U;
2280  }
2281  
2282  /* Returns the bit number of the most significant set bit of a number.
2283     If the input number has no bits set -1U is returned.  */
2284  unsigned APInt::tcMSB(const WordType *parts, unsigned n) {
2285    do {
2286      --n;
2287  
2288      if (parts[n] != 0) {
2289        unsigned msb = partMSB(parts[n]);
2290  
2291        return msb + n * APINT_BITS_PER_WORD;
2292      }
2293    } while (n);
2294  
2295    return -1U;
2296  }
2297  
2298  /* Copy the bit vector of width srcBITS from SRC, starting at bit
2299     srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB becomes
2300     the least significant bit of DST.  All high bits above srcBITS in
2301     DST are zero-filled.  */
2302  void
2303  APInt::tcExtract(WordType *dst, unsigned dstCount, const WordType *src,
2304                   unsigned srcBits, unsigned srcLSB) {
2305    unsigned dstParts = (srcBits + APINT_BITS_PER_WORD - 1) / APINT_BITS_PER_WORD;
2306    assert(dstParts <= dstCount);
2307  
2308    unsigned firstSrcPart = srcLSB / APINT_BITS_PER_WORD;
2309    tcAssign (dst, src + firstSrcPart, dstParts);
2310  
2311    unsigned shift = srcLSB % APINT_BITS_PER_WORD;
2312    tcShiftRight (dst, dstParts, shift);
2313  
2314    /* We now have (dstParts * APINT_BITS_PER_WORD - shift) bits from SRC
2315       in DST.  If this is less that srcBits, append the rest, else
2316       clear the high bits.  */
2317    unsigned n = dstParts * APINT_BITS_PER_WORD - shift;
2318    if (n < srcBits) {
2319      WordType mask = lowBitMask (srcBits - n);
2320      dst[dstParts - 1] |= ((src[firstSrcPart + dstParts] & mask)
2321                            << n % APINT_BITS_PER_WORD);
2322    } else if (n > srcBits) {
2323      if (srcBits % APINT_BITS_PER_WORD)
2324        dst[dstParts - 1] &= lowBitMask (srcBits % APINT_BITS_PER_WORD);
2325    }
2326  
2327    /* Clear high parts.  */
2328    while (dstParts < dstCount)
2329      dst[dstParts++] = 0;
2330  }
2331  
2332  /* DST += RHS + C where C is zero or one.  Returns the carry flag.  */
2333  APInt::WordType APInt::tcAdd(WordType *dst, const WordType *rhs,
2334                               WordType c, unsigned parts) {
2335    assert(c <= 1);
2336  
2337    for (unsigned i = 0; i < parts; i++) {
2338      WordType l = dst[i];
2339      if (c) {
2340        dst[i] += rhs[i] + 1;
2341        c = (dst[i] <= l);
2342      } else {
2343        dst[i] += rhs[i];
2344        c = (dst[i] < l);
2345      }
2346    }
2347  
2348    return c;
2349  }
2350  
2351  /// This function adds a single "word" integer, src, to the multiple
2352  /// "word" integer array, dst[]. dst[] is modified to reflect the addition and
2353  /// 1 is returned if there is a carry out, otherwise 0 is returned.
2354  /// @returns the carry of the addition.
2355  APInt::WordType APInt::tcAddPart(WordType *dst, WordType src,
2356                                   unsigned parts) {
2357    for (unsigned i = 0; i < parts; ++i) {
2358      dst[i] += src;
2359      if (dst[i] >= src)
2360        return 0; // No need to carry so exit early.
2361      src = 1; // Carry one to next digit.
2362    }
2363  
2364    return 1;
2365  }
2366  
2367  /* DST -= RHS + C where C is zero or one.  Returns the carry flag.  */
2368  APInt::WordType APInt::tcSubtract(WordType *dst, const WordType *rhs,
2369                                    WordType c, unsigned parts) {
2370    assert(c <= 1);
2371  
2372    for (unsigned i = 0; i < parts; i++) {
2373      WordType l = dst[i];
2374      if (c) {
2375        dst[i] -= rhs[i] + 1;
2376        c = (dst[i] >= l);
2377      } else {
2378        dst[i] -= rhs[i];
2379        c = (dst[i] > l);
2380      }
2381    }
2382  
2383    return c;
2384  }
2385  
2386  /// This function subtracts a single "word" (64-bit word), src, from
2387  /// the multi-word integer array, dst[], propagating the borrowed 1 value until
2388  /// no further borrowing is needed or it runs out of "words" in dst.  The result
2389  /// is 1 if "borrowing" exhausted the digits in dst, or 0 if dst was not
2390  /// exhausted. In other words, if src > dst then this function returns 1,
2391  /// otherwise 0.
2392  /// @returns the borrow out of the subtraction
2393  APInt::WordType APInt::tcSubtractPart(WordType *dst, WordType src,
2394                                        unsigned parts) {
2395    for (unsigned i = 0; i < parts; ++i) {
2396      WordType Dst = dst[i];
2397      dst[i] -= src;
2398      if (src <= Dst)
2399        return 0; // No need to borrow so exit early.
2400      src = 1; // We have to "borrow 1" from next "word"
2401    }
2402  
2403    return 1;
2404  }
2405  
2406  /* Negate a bignum in-place.  */
2407  void APInt::tcNegate(WordType *dst, unsigned parts) {
2408    tcComplement(dst, parts);
2409    tcIncrement(dst, parts);
2410  }
2411  
2412  /*  DST += SRC * MULTIPLIER + CARRY   if add is true
2413      DST  = SRC * MULTIPLIER + CARRY   if add is false
2414  
2415      Requires 0 <= DSTPARTS <= SRCPARTS + 1.  If DST overlaps SRC
2416      they must start at the same point, i.e. DST == SRC.
2417  
2418      If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is
2419      returned.  Otherwise DST is filled with the least significant
2420      DSTPARTS parts of the result, and if all of the omitted higher
2421      parts were zero return zero, otherwise overflow occurred and
2422      return one.  */
2423  int APInt::tcMultiplyPart(WordType *dst, const WordType *src,
2424                            WordType multiplier, WordType carry,
2425                            unsigned srcParts, unsigned dstParts,
2426                            bool add) {
2427    /* Otherwise our writes of DST kill our later reads of SRC.  */
2428    assert(dst <= src || dst >= src + srcParts);
2429    assert(dstParts <= srcParts + 1);
2430  
2431    /* N loops; minimum of dstParts and srcParts.  */
2432    unsigned n = std::min(dstParts, srcParts);
2433  
2434    for (unsigned i = 0; i < n; i++) {
2435      WordType low, mid, high, srcPart;
2436  
2437        /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY.
2438  
2439           This cannot overflow, because
2440  
2441           (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1)
2442  
2443           which is less than n^2.  */
2444  
2445      srcPart = src[i];
2446  
2447      if (multiplier == 0 || srcPart == 0) {
2448        low = carry;
2449        high = 0;
2450      } else {
2451        low = lowHalf(srcPart) * lowHalf(multiplier);
2452        high = highHalf(srcPart) * highHalf(multiplier);
2453  
2454        mid = lowHalf(srcPart) * highHalf(multiplier);
2455        high += highHalf(mid);
2456        mid <<= APINT_BITS_PER_WORD / 2;
2457        if (low + mid < low)
2458          high++;
2459        low += mid;
2460  
2461        mid = highHalf(srcPart) * lowHalf(multiplier);
2462        high += highHalf(mid);
2463        mid <<= APINT_BITS_PER_WORD / 2;
2464        if (low + mid < low)
2465          high++;
2466        low += mid;
2467  
2468        /* Now add carry.  */
2469        if (low + carry < low)
2470          high++;
2471        low += carry;
2472      }
2473  
2474      if (add) {
2475        /* And now DST[i], and store the new low part there.  */
2476        if (low + dst[i] < low)
2477          high++;
2478        dst[i] += low;
2479      } else
2480        dst[i] = low;
2481  
2482      carry = high;
2483    }
2484  
2485    if (srcParts < dstParts) {
2486      /* Full multiplication, there is no overflow.  */
2487      assert(srcParts + 1 == dstParts);
2488      dst[srcParts] = carry;
2489      return 0;
2490    }
2491  
2492    /* We overflowed if there is carry.  */
2493    if (carry)
2494      return 1;
2495  
2496    /* We would overflow if any significant unwritten parts would be
2497       non-zero.  This is true if any remaining src parts are non-zero
2498       and the multiplier is non-zero.  */
2499    if (multiplier)
2500      for (unsigned i = dstParts; i < srcParts; i++)
2501        if (src[i])
2502          return 1;
2503  
2504    /* We fitted in the narrow destination.  */
2505    return 0;
2506  }
2507  
2508  /* DST = LHS * RHS, where DST has the same width as the operands and
2509     is filled with the least significant parts of the result.  Returns
2510     one if overflow occurred, otherwise zero.  DST must be disjoint
2511     from both operands.  */
2512  int APInt::tcMultiply(WordType *dst, const WordType *lhs,
2513                        const WordType *rhs, unsigned parts) {
2514    assert(dst != lhs && dst != rhs);
2515  
2516    int overflow = 0;
2517    tcSet(dst, 0, parts);
2518  
2519    for (unsigned i = 0; i < parts; i++)
2520      overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts,
2521                                 parts - i, true);
2522  
2523    return overflow;
2524  }
2525  
2526  /// DST = LHS * RHS, where DST has width the sum of the widths of the
2527  /// operands. No overflow occurs. DST must be disjoint from both operands.
2528  void APInt::tcFullMultiply(WordType *dst, const WordType *lhs,
2529                             const WordType *rhs, unsigned lhsParts,
2530                             unsigned rhsParts) {
2531    /* Put the narrower number on the LHS for less loops below.  */
2532    if (lhsParts > rhsParts)
2533      return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts);
2534  
2535    assert(dst != lhs && dst != rhs);
2536  
2537    tcSet(dst, 0, rhsParts);
2538  
2539    for (unsigned i = 0; i < lhsParts; i++)
2540      tcMultiplyPart(&dst[i], rhs, lhs[i], 0, rhsParts, rhsParts + 1, true);
2541  }
2542  
2543  /* If RHS is zero LHS and REMAINDER are left unchanged, return one.
2544     Otherwise set LHS to LHS / RHS with the fractional part discarded,
2545     set REMAINDER to the remainder, return zero.  i.e.
2546  
2547     OLD_LHS = RHS * LHS + REMAINDER
2548  
2549     SCRATCH is a bignum of the same size as the operands and result for
2550     use by the routine; its contents need not be initialized and are
2551     destroyed.  LHS, REMAINDER and SCRATCH must be distinct.
2552  */
2553  int APInt::tcDivide(WordType *lhs, const WordType *rhs,
2554                      WordType *remainder, WordType *srhs,
2555                      unsigned parts) {
2556    assert(lhs != remainder && lhs != srhs && remainder != srhs);
2557  
2558    unsigned shiftCount = tcMSB(rhs, parts) + 1;
2559    if (shiftCount == 0)
2560      return true;
2561  
2562    shiftCount = parts * APINT_BITS_PER_WORD - shiftCount;
2563    unsigned n = shiftCount / APINT_BITS_PER_WORD;
2564    WordType mask = (WordType) 1 << (shiftCount % APINT_BITS_PER_WORD);
2565  
2566    tcAssign(srhs, rhs, parts);
2567    tcShiftLeft(srhs, parts, shiftCount);
2568    tcAssign(remainder, lhs, parts);
2569    tcSet(lhs, 0, parts);
2570  
2571    /* Loop, subtracting SRHS if REMAINDER is greater and adding that to
2572       the total.  */
2573    for (;;) {
2574      int compare = tcCompare(remainder, srhs, parts);
2575      if (compare >= 0) {
2576        tcSubtract(remainder, srhs, 0, parts);
2577        lhs[n] |= mask;
2578      }
2579  
2580      if (shiftCount == 0)
2581        break;
2582      shiftCount--;
2583      tcShiftRight(srhs, parts, 1);
2584      if ((mask >>= 1) == 0) {
2585        mask = (WordType) 1 << (APINT_BITS_PER_WORD - 1);
2586        n--;
2587      }
2588    }
2589  
2590    return false;
2591  }
2592  
2593  /// Shift a bignum left Cound bits in-place. Shifted in bits are zero. There are
2594  /// no restrictions on Count.
2595  void APInt::tcShiftLeft(WordType *Dst, unsigned Words, unsigned Count) {
2596    // Don't bother performing a no-op shift.
2597    if (!Count)
2598      return;
2599  
2600    // WordShift is the inter-part shift; BitShift is the intra-part shift.
2601    unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
2602    unsigned BitShift = Count % APINT_BITS_PER_WORD;
2603  
2604    // Fastpath for moving by whole words.
2605    if (BitShift == 0) {
2606      std::memmove(Dst + WordShift, Dst, (Words - WordShift) * APINT_WORD_SIZE);
2607    } else {
2608      while (Words-- > WordShift) {
2609        Dst[Words] = Dst[Words - WordShift] << BitShift;
2610        if (Words > WordShift)
2611          Dst[Words] |=
2612            Dst[Words - WordShift - 1] >> (APINT_BITS_PER_WORD - BitShift);
2613      }
2614    }
2615  
2616    // Fill in the remainder with 0s.
2617    std::memset(Dst, 0, WordShift * APINT_WORD_SIZE);
2618  }
2619  
2620  /// Shift a bignum right Count bits in-place. Shifted in bits are zero. There
2621  /// are no restrictions on Count.
2622  void APInt::tcShiftRight(WordType *Dst, unsigned Words, unsigned Count) {
2623    // Don't bother performing a no-op shift.
2624    if (!Count)
2625      return;
2626  
2627    // WordShift is the inter-part shift; BitShift is the intra-part shift.
2628    unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
2629    unsigned BitShift = Count % APINT_BITS_PER_WORD;
2630  
2631    unsigned WordsToMove = Words - WordShift;
2632    // Fastpath for moving by whole words.
2633    if (BitShift == 0) {
2634      std::memmove(Dst, Dst + WordShift, WordsToMove * APINT_WORD_SIZE);
2635    } else {
2636      for (unsigned i = 0; i != WordsToMove; ++i) {
2637        Dst[i] = Dst[i + WordShift] >> BitShift;
2638        if (i + 1 != WordsToMove)
2639          Dst[i] |= Dst[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift);
2640      }
2641    }
2642  
2643    // Fill in the remainder with 0s.
2644    std::memset(Dst + WordsToMove, 0, WordShift * APINT_WORD_SIZE);
2645  }
2646  
2647  /* Bitwise and of two bignums.  */
2648  void APInt::tcAnd(WordType *dst, const WordType *rhs, unsigned parts) {
2649    for (unsigned i = 0; i < parts; i++)
2650      dst[i] &= rhs[i];
2651  }
2652  
2653  /* Bitwise inclusive or of two bignums.  */
2654  void APInt::tcOr(WordType *dst, const WordType *rhs, unsigned parts) {
2655    for (unsigned i = 0; i < parts; i++)
2656      dst[i] |= rhs[i];
2657  }
2658  
2659  /* Bitwise exclusive or of two bignums.  */
2660  void APInt::tcXor(WordType *dst, const WordType *rhs, unsigned parts) {
2661    for (unsigned i = 0; i < parts; i++)
2662      dst[i] ^= rhs[i];
2663  }
2664  
2665  /* Complement a bignum in-place.  */
2666  void APInt::tcComplement(WordType *dst, unsigned parts) {
2667    for (unsigned i = 0; i < parts; i++)
2668      dst[i] = ~dst[i];
2669  }
2670  
2671  /* Comparison (unsigned) of two bignums.  */
2672  int APInt::tcCompare(const WordType *lhs, const WordType *rhs,
2673                       unsigned parts) {
2674    while (parts) {
2675      parts--;
2676      if (lhs[parts] != rhs[parts])
2677        return (lhs[parts] > rhs[parts]) ? 1 : -1;
2678    }
2679  
2680    return 0;
2681  }
2682  
2683  /* Set the least significant BITS bits of a bignum, clear the
2684     rest.  */
2685  void APInt::tcSetLeastSignificantBits(WordType *dst, unsigned parts,
2686                                        unsigned bits) {
2687    unsigned i = 0;
2688    while (bits > APINT_BITS_PER_WORD) {
2689      dst[i++] = ~(WordType) 0;
2690      bits -= APINT_BITS_PER_WORD;
2691    }
2692  
2693    if (bits)
2694      dst[i++] = ~(WordType) 0 >> (APINT_BITS_PER_WORD - bits);
2695  
2696    while (i < parts)
2697      dst[i++] = 0;
2698  }
2699  
2700  APInt llvm::APIntOps::RoundingUDiv(const APInt &A, const APInt &B,
2701                                     APInt::Rounding RM) {
2702    // Currently udivrem always rounds down.
2703    switch (RM) {
2704    case APInt::Rounding::DOWN:
2705    case APInt::Rounding::TOWARD_ZERO:
2706      return A.udiv(B);
2707    case APInt::Rounding::UP: {
2708      APInt Quo, Rem;
2709      APInt::udivrem(A, B, Quo, Rem);
2710      if (Rem == 0)
2711        return Quo;
2712      return Quo + 1;
2713    }
2714    }
2715    llvm_unreachable("Unknown APInt::Rounding enum");
2716  }
2717  
2718  APInt llvm::APIntOps::RoundingSDiv(const APInt &A, const APInt &B,
2719                                     APInt::Rounding RM) {
2720    switch (RM) {
2721    case APInt::Rounding::DOWN:
2722    case APInt::Rounding::UP: {
2723      APInt Quo, Rem;
2724      APInt::sdivrem(A, B, Quo, Rem);
2725      if (Rem == 0)
2726        return Quo;
2727      // This algorithm deals with arbitrary rounding mode used by sdivrem.
2728      // We want to check whether the non-integer part of the mathematical value
2729      // is negative or not. If the non-integer part is negative, we need to round
2730      // down from Quo; otherwise, if it's positive or 0, we return Quo, as it's
2731      // already rounded down.
2732      if (RM == APInt::Rounding::DOWN) {
2733        if (Rem.isNegative() != B.isNegative())
2734          return Quo - 1;
2735        return Quo;
2736      }
2737      if (Rem.isNegative() != B.isNegative())
2738        return Quo;
2739      return Quo + 1;
2740    }
2741    // Currently sdiv rounds twards zero.
2742    case APInt::Rounding::TOWARD_ZERO:
2743      return A.sdiv(B);
2744    }
2745    llvm_unreachable("Unknown APInt::Rounding enum");
2746  }
2747  
2748  Optional<APInt>
2749  llvm::APIntOps::SolveQuadraticEquationWrap(APInt A, APInt B, APInt C,
2750                                             unsigned RangeWidth) {
2751    unsigned CoeffWidth = A.getBitWidth();
2752    assert(CoeffWidth == B.getBitWidth() && CoeffWidth == C.getBitWidth());
2753    assert(RangeWidth <= CoeffWidth &&
2754           "Value range width should be less than coefficient width");
2755    assert(RangeWidth > 1 && "Value range bit width should be > 1");
2756  
2757    LLVM_DEBUG(dbgs() << __func__ << ": solving " << A << "x^2 + " << B
2758                      << "x + " << C << ", rw:" << RangeWidth << '\n');
2759  
2760    // Identify 0 as a (non)solution immediately.
2761    if (C.sextOrTrunc(RangeWidth).isNullValue() ) {
2762      LLVM_DEBUG(dbgs() << __func__ << ": zero solution\n");
2763      return APInt(CoeffWidth, 0);
2764    }
2765  
2766    // The result of APInt arithmetic has the same bit width as the operands,
2767    // so it can actually lose high bits. A product of two n-bit integers needs
2768    // 2n-1 bits to represent the full value.
2769    // The operation done below (on quadratic coefficients) that can produce
2770    // the largest value is the evaluation of the equation during bisection,
2771    // which needs 3 times the bitwidth of the coefficient, so the total number
2772    // of required bits is 3n.
2773    //
2774    // The purpose of this extension is to simulate the set Z of all integers,
2775    // where n+1 > n for all n in Z. In Z it makes sense to talk about positive
2776    // and negative numbers (not so much in a modulo arithmetic). The method
2777    // used to solve the equation is based on the standard formula for real
2778    // numbers, and uses the concepts of "positive" and "negative" with their
2779    // usual meanings.
2780    CoeffWidth *= 3;
2781    A = A.sext(CoeffWidth);
2782    B = B.sext(CoeffWidth);
2783    C = C.sext(CoeffWidth);
2784  
2785    // Make A > 0 for simplicity. Negate cannot overflow at this point because
2786    // the bit width has increased.
2787    if (A.isNegative()) {
2788      A.negate();
2789      B.negate();
2790      C.negate();
2791    }
2792  
2793    // Solving an equation q(x) = 0 with coefficients in modular arithmetic
2794    // is really solving a set of equations q(x) = kR for k = 0, 1, 2, ...,
2795    // and R = 2^BitWidth.
2796    // Since we're trying not only to find exact solutions, but also values
2797    // that "wrap around", such a set will always have a solution, i.e. an x
2798    // that satisfies at least one of the equations, or such that |q(x)|
2799    // exceeds kR, while |q(x-1)| for the same k does not.
2800    //
2801    // We need to find a value k, such that Ax^2 + Bx + C = kR will have a
2802    // positive solution n (in the above sense), and also such that the n
2803    // will be the least among all solutions corresponding to k = 0, 1, ...
2804    // (more precisely, the least element in the set
2805    //   { n(k) | k is such that a solution n(k) exists }).
2806    //
2807    // Consider the parabola (over real numbers) that corresponds to the
2808    // quadratic equation. Since A > 0, the arms of the parabola will point
2809    // up. Picking different values of k will shift it up and down by R.
2810    //
2811    // We want to shift the parabola in such a way as to reduce the problem
2812    // of solving q(x) = kR to solving shifted_q(x) = 0.
2813    // (The interesting solutions are the ceilings of the real number
2814    // solutions.)
2815    APInt R = APInt::getOneBitSet(CoeffWidth, RangeWidth);
2816    APInt TwoA = 2 * A;
2817    APInt SqrB = B * B;
2818    bool PickLow;
2819  
2820    auto RoundUp = [] (const APInt &V, const APInt &A) -> APInt {
2821      assert(A.isStrictlyPositive());
2822      APInt T = V.abs().urem(A);
2823      if (T.isNullValue())
2824        return V;
2825      return V.isNegative() ? V+T : V+(A-T);
2826    };
2827  
2828    // The vertex of the parabola is at -B/2A, but since A > 0, it's negative
2829    // iff B is positive.
2830    if (B.isNonNegative()) {
2831      // If B >= 0, the vertex it at a negative location (or at 0), so in
2832      // order to have a non-negative solution we need to pick k that makes
2833      // C-kR negative. To satisfy all the requirements for the solution
2834      // that we are looking for, it needs to be closest to 0 of all k.
2835      C = C.srem(R);
2836      if (C.isStrictlyPositive())
2837        C -= R;
2838      // Pick the greater solution.
2839      PickLow = false;
2840    } else {
2841      // If B < 0, the vertex is at a positive location. For any solution
2842      // to exist, the discriminant must be non-negative. This means that
2843      // C-kR <= B^2/4A is a necessary condition for k, i.e. there is a
2844      // lower bound on values of k: kR >= C - B^2/4A.
2845      APInt LowkR = C - SqrB.udiv(2*TwoA); // udiv because all values > 0.
2846      // Round LowkR up (towards +inf) to the nearest kR.
2847      LowkR = RoundUp(LowkR, R);
2848  
2849      // If there exists k meeting the condition above, and such that
2850      // C-kR > 0, there will be two positive real number solutions of
2851      // q(x) = kR. Out of all such values of k, pick the one that makes
2852      // C-kR closest to 0, (i.e. pick maximum k such that C-kR > 0).
2853      // In other words, find maximum k such that LowkR <= kR < C.
2854      if (C.sgt(LowkR)) {
2855        // If LowkR < C, then such a k is guaranteed to exist because
2856        // LowkR itself is a multiple of R.
2857        C -= -RoundUp(-C, R);      // C = C - RoundDown(C, R)
2858        // Pick the smaller solution.
2859        PickLow = true;
2860      } else {
2861        // If C-kR < 0 for all potential k's, it means that one solution
2862        // will be negative, while the other will be positive. The positive
2863        // solution will shift towards 0 if the parabola is moved up.
2864        // Pick the kR closest to the lower bound (i.e. make C-kR closest
2865        // to 0, or in other words, out of all parabolas that have solutions,
2866        // pick the one that is the farthest "up").
2867        // Since LowkR is itself a multiple of R, simply take C-LowkR.
2868        C -= LowkR;
2869        // Pick the greater solution.
2870        PickLow = false;
2871      }
2872    }
2873  
2874    LLVM_DEBUG(dbgs() << __func__ << ": updated coefficients " << A << "x^2 + "
2875                      << B << "x + " << C << ", rw:" << RangeWidth << '\n');
2876  
2877    APInt D = SqrB - 4*A*C;
2878    assert(D.isNonNegative() && "Negative discriminant");
2879    APInt SQ = D.sqrt();
2880  
2881    APInt Q = SQ * SQ;
2882    bool InexactSQ = Q != D;
2883    // The calculated SQ may actually be greater than the exact (non-integer)
2884    // value. If that's the case, decremement SQ to get a value that is lower.
2885    if (Q.sgt(D))
2886      SQ -= 1;
2887  
2888    APInt X;
2889    APInt Rem;
2890  
2891    // SQ is rounded down (i.e SQ * SQ <= D), so the roots may be inexact.
2892    // When using the quadratic formula directly, the calculated low root
2893    // may be greater than the exact one, since we would be subtracting SQ.
2894    // To make sure that the calculated root is not greater than the exact
2895    // one, subtract SQ+1 when calculating the low root (for inexact value
2896    // of SQ).
2897    if (PickLow)
2898      APInt::sdivrem(-B - (SQ+InexactSQ), TwoA, X, Rem);
2899    else
2900      APInt::sdivrem(-B + SQ, TwoA, X, Rem);
2901  
2902    // The updated coefficients should be such that the (exact) solution is
2903    // positive. Since APInt division rounds towards 0, the calculated one
2904    // can be 0, but cannot be negative.
2905    assert(X.isNonNegative() && "Solution should be non-negative");
2906  
2907    if (!InexactSQ && Rem.isNullValue()) {
2908      LLVM_DEBUG(dbgs() << __func__ << ": solution (root): " << X << '\n');
2909      return X;
2910    }
2911  
2912    assert((SQ*SQ).sle(D) && "SQ = |_sqrt(D)_|, so SQ*SQ <= D");
2913    // The exact value of the square root of D should be between SQ and SQ+1.
2914    // This implies that the solution should be between that corresponding to
2915    // SQ (i.e. X) and that corresponding to SQ+1.
2916    //
2917    // The calculated X cannot be greater than the exact (real) solution.
2918    // Actually it must be strictly less than the exact solution, while
2919    // X+1 will be greater than or equal to it.
2920  
2921    APInt VX = (A*X + B)*X + C;
2922    APInt VY = VX + TwoA*X + A + B;
2923    bool SignChange = VX.isNegative() != VY.isNegative() ||
2924                      VX.isNullValue() != VY.isNullValue();
2925    // If the sign did not change between X and X+1, X is not a valid solution.
2926    // This could happen when the actual (exact) roots don't have an integer
2927    // between them, so they would both be contained between X and X+1.
2928    if (!SignChange) {
2929      LLVM_DEBUG(dbgs() << __func__ << ": no valid solution\n");
2930      return None;
2931    }
2932  
2933    X += 1;
2934    LLVM_DEBUG(dbgs() << __func__ << ": solution (wrap): " << X << '\n');
2935    return X;
2936  }
2937  
2938  /// StoreIntToMemory - Fills the StoreBytes bytes of memory starting from Dst
2939  /// with the integer held in IntVal.
2940  void llvm::StoreIntToMemory(const APInt &IntVal, uint8_t *Dst,
2941                              unsigned StoreBytes) {
2942    assert((IntVal.getBitWidth()+7)/8 >= StoreBytes && "Integer too small!");
2943    const uint8_t *Src = (const uint8_t *)IntVal.getRawData();
2944  
2945    if (sys::IsLittleEndianHost) {
2946      // Little-endian host - the source is ordered from LSB to MSB.  Order the
2947      // destination from LSB to MSB: Do a straight copy.
2948      memcpy(Dst, Src, StoreBytes);
2949    } else {
2950      // Big-endian host - the source is an array of 64 bit words ordered from
2951      // LSW to MSW.  Each word is ordered from MSB to LSB.  Order the destination
2952      // from MSB to LSB: Reverse the word order, but not the bytes in a word.
2953      while (StoreBytes > sizeof(uint64_t)) {
2954        StoreBytes -= sizeof(uint64_t);
2955        // May not be aligned so use memcpy.
2956        memcpy(Dst + StoreBytes, Src, sizeof(uint64_t));
2957        Src += sizeof(uint64_t);
2958      }
2959  
2960      memcpy(Dst, Src + sizeof(uint64_t) - StoreBytes, StoreBytes);
2961    }
2962  }
2963  
2964  /// LoadIntFromMemory - Loads the integer stored in the LoadBytes bytes starting
2965  /// from Src into IntVal, which is assumed to be wide enough and to hold zero.
2966  void llvm::LoadIntFromMemory(APInt &IntVal, uint8_t *Src, unsigned LoadBytes) {
2967    assert((IntVal.getBitWidth()+7)/8 >= LoadBytes && "Integer too small!");
2968    uint8_t *Dst = reinterpret_cast<uint8_t *>(
2969                     const_cast<uint64_t *>(IntVal.getRawData()));
2970  
2971    if (sys::IsLittleEndianHost)
2972      // Little-endian host - the destination must be ordered from LSB to MSB.
2973      // The source is ordered from LSB to MSB: Do a straight copy.
2974      memcpy(Dst, Src, LoadBytes);
2975    else {
2976      // Big-endian - the destination is an array of 64 bit words ordered from
2977      // LSW to MSW.  Each word must be ordered from MSB to LSB.  The source is
2978      // ordered from MSB to LSB: Reverse the word order, but not the bytes in
2979      // a word.
2980      while (LoadBytes > sizeof(uint64_t)) {
2981        LoadBytes -= sizeof(uint64_t);
2982        // May not be aligned so use memcpy.
2983        memcpy(Dst, Src + LoadBytes, sizeof(uint64_t));
2984        Dst += sizeof(uint64_t);
2985      }
2986  
2987      memcpy(Dst + sizeof(uint64_t) - LoadBytes, Src, LoadBytes);
2988    }
2989  }
2990