xref: /freebsd/contrib/llvm-project/llvm/lib/Support/APInt.cpp (revision 8bcb0991864975618c09697b1aca10683346d9f0)
1 //===-- APInt.cpp - Implement APInt class ---------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a class to represent arbitrary precision integer
10 // constant values and provide a variety of arithmetic operations on them.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/ADT/APInt.h"
15 #include "llvm/ADT/ArrayRef.h"
16 #include "llvm/ADT/FoldingSet.h"
17 #include "llvm/ADT/Hashing.h"
18 #include "llvm/ADT/Optional.h"
19 #include "llvm/ADT/SmallString.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/ADT/bit.h"
22 #include "llvm/Config/llvm-config.h"
23 #include "llvm/Support/Debug.h"
24 #include "llvm/Support/ErrorHandling.h"
25 #include "llvm/Support/MathExtras.h"
26 #include "llvm/Support/raw_ostream.h"
27 #include <climits>
28 #include <cmath>
29 #include <cstdlib>
30 #include <cstring>
31 using namespace llvm;
32 
33 #define DEBUG_TYPE "apint"
34 
35 /// A utility function for allocating memory, checking for allocation failures,
36 /// and ensuring the contents are zeroed.
37 inline static uint64_t* getClearedMemory(unsigned numWords) {
38   uint64_t *result = new uint64_t[numWords];
39   memset(result, 0, numWords * sizeof(uint64_t));
40   return result;
41 }
42 
43 /// A utility function for allocating memory and checking for allocation
44 /// failure.  The content is not zeroed.
45 inline static uint64_t* getMemory(unsigned numWords) {
46   return new uint64_t[numWords];
47 }
48 
49 /// A utility function that converts a character to a digit.
50 inline static unsigned getDigit(char cdigit, uint8_t radix) {
51   unsigned r;
52 
53   if (radix == 16 || radix == 36) {
54     r = cdigit - '0';
55     if (r <= 9)
56       return r;
57 
58     r = cdigit - 'A';
59     if (r <= radix - 11U)
60       return r + 10;
61 
62     r = cdigit - 'a';
63     if (r <= radix - 11U)
64       return r + 10;
65 
66     radix = 10;
67   }
68 
69   r = cdigit - '0';
70   if (r < radix)
71     return r;
72 
73   return -1U;
74 }
75 
76 
77 void APInt::initSlowCase(uint64_t val, bool isSigned) {
78   U.pVal = getClearedMemory(getNumWords());
79   U.pVal[0] = val;
80   if (isSigned && int64_t(val) < 0)
81     for (unsigned i = 1; i < getNumWords(); ++i)
82       U.pVal[i] = WORDTYPE_MAX;
83   clearUnusedBits();
84 }
85 
86 void APInt::initSlowCase(const APInt& that) {
87   U.pVal = getMemory(getNumWords());
88   memcpy(U.pVal, that.U.pVal, getNumWords() * APINT_WORD_SIZE);
89 }
90 
91 void APInt::initFromArray(ArrayRef<uint64_t> bigVal) {
92   assert(BitWidth && "Bitwidth too small");
93   assert(bigVal.data() && "Null pointer detected!");
94   if (isSingleWord())
95     U.VAL = bigVal[0];
96   else {
97     // Get memory, cleared to 0
98     U.pVal = getClearedMemory(getNumWords());
99     // Calculate the number of words to copy
100     unsigned words = std::min<unsigned>(bigVal.size(), getNumWords());
101     // Copy the words from bigVal to pVal
102     memcpy(U.pVal, bigVal.data(), words * APINT_WORD_SIZE);
103   }
104   // Make sure unused high bits are cleared
105   clearUnusedBits();
106 }
107 
108 APInt::APInt(unsigned numBits, ArrayRef<uint64_t> bigVal)
109   : BitWidth(numBits) {
110   initFromArray(bigVal);
111 }
112 
113 APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[])
114   : BitWidth(numBits) {
115   initFromArray(makeArrayRef(bigVal, numWords));
116 }
117 
118 APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix)
119   : BitWidth(numbits) {
120   assert(BitWidth && "Bitwidth too small");
121   fromString(numbits, Str, radix);
122 }
123 
124 void APInt::reallocate(unsigned NewBitWidth) {
125   // If the number of words is the same we can just change the width and stop.
126   if (getNumWords() == getNumWords(NewBitWidth)) {
127     BitWidth = NewBitWidth;
128     return;
129   }
130 
131   // If we have an allocation, delete it.
132   if (!isSingleWord())
133     delete [] U.pVal;
134 
135   // Update BitWidth.
136   BitWidth = NewBitWidth;
137 
138   // If we are supposed to have an allocation, create it.
139   if (!isSingleWord())
140     U.pVal = getMemory(getNumWords());
141 }
142 
143 void APInt::AssignSlowCase(const APInt& RHS) {
144   // Don't do anything for X = X
145   if (this == &RHS)
146     return;
147 
148   // Adjust the bit width and handle allocations as necessary.
149   reallocate(RHS.getBitWidth());
150 
151   // Copy the data.
152   if (isSingleWord())
153     U.VAL = RHS.U.VAL;
154   else
155     memcpy(U.pVal, RHS.U.pVal, getNumWords() * APINT_WORD_SIZE);
156 }
157 
158 /// This method 'profiles' an APInt for use with FoldingSet.
159 void APInt::Profile(FoldingSetNodeID& ID) const {
160   ID.AddInteger(BitWidth);
161 
162   if (isSingleWord()) {
163     ID.AddInteger(U.VAL);
164     return;
165   }
166 
167   unsigned NumWords = getNumWords();
168   for (unsigned i = 0; i < NumWords; ++i)
169     ID.AddInteger(U.pVal[i]);
170 }
171 
172 /// Prefix increment operator. Increments the APInt by one.
173 APInt& APInt::operator++() {
174   if (isSingleWord())
175     ++U.VAL;
176   else
177     tcIncrement(U.pVal, getNumWords());
178   return clearUnusedBits();
179 }
180 
181 /// Prefix decrement operator. Decrements the APInt by one.
182 APInt& APInt::operator--() {
183   if (isSingleWord())
184     --U.VAL;
185   else
186     tcDecrement(U.pVal, getNumWords());
187   return clearUnusedBits();
188 }
189 
190 /// Adds the RHS APint to this APInt.
191 /// @returns this, after addition of RHS.
192 /// Addition assignment operator.
193 APInt& APInt::operator+=(const APInt& RHS) {
194   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
195   if (isSingleWord())
196     U.VAL += RHS.U.VAL;
197   else
198     tcAdd(U.pVal, RHS.U.pVal, 0, getNumWords());
199   return clearUnusedBits();
200 }
201 
202 APInt& APInt::operator+=(uint64_t RHS) {
203   if (isSingleWord())
204     U.VAL += RHS;
205   else
206     tcAddPart(U.pVal, RHS, getNumWords());
207   return clearUnusedBits();
208 }
209 
210 /// Subtracts the RHS APInt from this APInt
211 /// @returns this, after subtraction
212 /// Subtraction assignment operator.
213 APInt& APInt::operator-=(const APInt& RHS) {
214   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
215   if (isSingleWord())
216     U.VAL -= RHS.U.VAL;
217   else
218     tcSubtract(U.pVal, RHS.U.pVal, 0, getNumWords());
219   return clearUnusedBits();
220 }
221 
222 APInt& APInt::operator-=(uint64_t RHS) {
223   if (isSingleWord())
224     U.VAL -= RHS;
225   else
226     tcSubtractPart(U.pVal, RHS, getNumWords());
227   return clearUnusedBits();
228 }
229 
230 APInt APInt::operator*(const APInt& RHS) const {
231   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
232   if (isSingleWord())
233     return APInt(BitWidth, U.VAL * RHS.U.VAL);
234 
235   APInt Result(getMemory(getNumWords()), getBitWidth());
236 
237   tcMultiply(Result.U.pVal, U.pVal, RHS.U.pVal, getNumWords());
238 
239   Result.clearUnusedBits();
240   return Result;
241 }
242 
243 void APInt::AndAssignSlowCase(const APInt& RHS) {
244   tcAnd(U.pVal, RHS.U.pVal, getNumWords());
245 }
246 
247 void APInt::OrAssignSlowCase(const APInt& RHS) {
248   tcOr(U.pVal, RHS.U.pVal, getNumWords());
249 }
250 
251 void APInt::XorAssignSlowCase(const APInt& RHS) {
252   tcXor(U.pVal, RHS.U.pVal, getNumWords());
253 }
254 
255 APInt& APInt::operator*=(const APInt& RHS) {
256   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
257   *this = *this * RHS;
258   return *this;
259 }
260 
261 APInt& APInt::operator*=(uint64_t RHS) {
262   if (isSingleWord()) {
263     U.VAL *= RHS;
264   } else {
265     unsigned NumWords = getNumWords();
266     tcMultiplyPart(U.pVal, U.pVal, RHS, 0, NumWords, NumWords, false);
267   }
268   return clearUnusedBits();
269 }
270 
271 bool APInt::EqualSlowCase(const APInt& RHS) const {
272   return std::equal(U.pVal, U.pVal + getNumWords(), RHS.U.pVal);
273 }
274 
275 int APInt::compare(const APInt& RHS) const {
276   assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
277   if (isSingleWord())
278     return U.VAL < RHS.U.VAL ? -1 : U.VAL > RHS.U.VAL;
279 
280   return tcCompare(U.pVal, RHS.U.pVal, getNumWords());
281 }
282 
283 int APInt::compareSigned(const APInt& RHS) const {
284   assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
285   if (isSingleWord()) {
286     int64_t lhsSext = SignExtend64(U.VAL, BitWidth);
287     int64_t rhsSext = SignExtend64(RHS.U.VAL, BitWidth);
288     return lhsSext < rhsSext ? -1 : lhsSext > rhsSext;
289   }
290 
291   bool lhsNeg = isNegative();
292   bool rhsNeg = RHS.isNegative();
293 
294   // If the sign bits don't match, then (LHS < RHS) if LHS is negative
295   if (lhsNeg != rhsNeg)
296     return lhsNeg ? -1 : 1;
297 
298   // Otherwise we can just use an unsigned comparison, because even negative
299   // numbers compare correctly this way if both have the same signed-ness.
300   return tcCompare(U.pVal, RHS.U.pVal, getNumWords());
301 }
302 
303 void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) {
304   unsigned loWord = whichWord(loBit);
305   unsigned hiWord = whichWord(hiBit);
306 
307   // Create an initial mask for the low word with zeros below loBit.
308   uint64_t loMask = WORDTYPE_MAX << whichBit(loBit);
309 
310   // If hiBit is not aligned, we need a high mask.
311   unsigned hiShiftAmt = whichBit(hiBit);
312   if (hiShiftAmt != 0) {
313     // Create a high mask with zeros above hiBit.
314     uint64_t hiMask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - hiShiftAmt);
315     // If loWord and hiWord are equal, then we combine the masks. Otherwise,
316     // set the bits in hiWord.
317     if (hiWord == loWord)
318       loMask &= hiMask;
319     else
320       U.pVal[hiWord] |= hiMask;
321   }
322   // Apply the mask to the low word.
323   U.pVal[loWord] |= loMask;
324 
325   // Fill any words between loWord and hiWord with all ones.
326   for (unsigned word = loWord + 1; word < hiWord; ++word)
327     U.pVal[word] = WORDTYPE_MAX;
328 }
329 
330 /// Toggle every bit to its opposite value.
331 void APInt::flipAllBitsSlowCase() {
332   tcComplement(U.pVal, getNumWords());
333   clearUnusedBits();
334 }
335 
336 /// Toggle a given bit to its opposite value whose position is given
337 /// as "bitPosition".
338 /// Toggles a given bit to its opposite value.
339 void APInt::flipBit(unsigned bitPosition) {
340   assert(bitPosition < BitWidth && "Out of the bit-width range!");
341   if ((*this)[bitPosition]) clearBit(bitPosition);
342   else setBit(bitPosition);
343 }
344 
345 void APInt::insertBits(const APInt &subBits, unsigned bitPosition) {
346   unsigned subBitWidth = subBits.getBitWidth();
347   assert(0 < subBitWidth && (subBitWidth + bitPosition) <= BitWidth &&
348          "Illegal bit insertion");
349 
350   // Insertion is a direct copy.
351   if (subBitWidth == BitWidth) {
352     *this = subBits;
353     return;
354   }
355 
356   // Single word result can be done as a direct bitmask.
357   if (isSingleWord()) {
358     uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
359     U.VAL &= ~(mask << bitPosition);
360     U.VAL |= (subBits.U.VAL << bitPosition);
361     return;
362   }
363 
364   unsigned loBit = whichBit(bitPosition);
365   unsigned loWord = whichWord(bitPosition);
366   unsigned hi1Word = whichWord(bitPosition + subBitWidth - 1);
367 
368   // Insertion within a single word can be done as a direct bitmask.
369   if (loWord == hi1Word) {
370     uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
371     U.pVal[loWord] &= ~(mask << loBit);
372     U.pVal[loWord] |= (subBits.U.VAL << loBit);
373     return;
374   }
375 
376   // Insert on word boundaries.
377   if (loBit == 0) {
378     // Direct copy whole words.
379     unsigned numWholeSubWords = subBitWidth / APINT_BITS_PER_WORD;
380     memcpy(U.pVal + loWord, subBits.getRawData(),
381            numWholeSubWords * APINT_WORD_SIZE);
382 
383     // Mask+insert remaining bits.
384     unsigned remainingBits = subBitWidth % APINT_BITS_PER_WORD;
385     if (remainingBits != 0) {
386       uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - remainingBits);
387       U.pVal[hi1Word] &= ~mask;
388       U.pVal[hi1Word] |= subBits.getWord(subBitWidth - 1);
389     }
390     return;
391   }
392 
393   // General case - set/clear individual bits in dst based on src.
394   // TODO - there is scope for optimization here, but at the moment this code
395   // path is barely used so prefer readability over performance.
396   for (unsigned i = 0; i != subBitWidth; ++i) {
397     if (subBits[i])
398       setBit(bitPosition + i);
399     else
400       clearBit(bitPosition + i);
401   }
402 }
403 
404 void APInt::insertBits(uint64_t subBits, unsigned bitPosition, unsigned numBits) {
405   uint64_t maskBits = maskTrailingOnes<uint64_t>(numBits);
406   subBits &= maskBits;
407   if (isSingleWord()) {
408     U.VAL &= ~(maskBits << bitPosition);
409     U.VAL |= subBits << bitPosition;
410     return;
411   }
412 
413   unsigned loBit = whichBit(bitPosition);
414   unsigned loWord = whichWord(bitPosition);
415   unsigned hiWord = whichWord(bitPosition + numBits - 1);
416   if (loWord == hiWord) {
417     U.pVal[loWord] &= ~(maskBits << loBit);
418     U.pVal[loWord] |= subBits << loBit;
419     return;
420   }
421 
422   static_assert(8 * sizeof(WordType) <= 64, "This code assumes only two words affected");
423   unsigned wordBits = 8 * sizeof(WordType);
424   U.pVal[loWord] &= ~(maskBits << loBit);
425   U.pVal[loWord] |= subBits << loBit;
426 
427   U.pVal[hiWord] &= ~(maskBits >> (wordBits - loBit));
428   U.pVal[hiWord] |= subBits >> (wordBits - loBit);
429 }
430 
431 APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const {
432   assert(numBits > 0 && "Can't extract zero bits");
433   assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth &&
434          "Illegal bit extraction");
435 
436   if (isSingleWord())
437     return APInt(numBits, U.VAL >> bitPosition);
438 
439   unsigned loBit = whichBit(bitPosition);
440   unsigned loWord = whichWord(bitPosition);
441   unsigned hiWord = whichWord(bitPosition + numBits - 1);
442 
443   // Single word result extracting bits from a single word source.
444   if (loWord == hiWord)
445     return APInt(numBits, U.pVal[loWord] >> loBit);
446 
447   // Extracting bits that start on a source word boundary can be done
448   // as a fast memory copy.
449   if (loBit == 0)
450     return APInt(numBits, makeArrayRef(U.pVal + loWord, 1 + hiWord - loWord));
451 
452   // General case - shift + copy source words directly into place.
453   APInt Result(numBits, 0);
454   unsigned NumSrcWords = getNumWords();
455   unsigned NumDstWords = Result.getNumWords();
456 
457   uint64_t *DestPtr = Result.isSingleWord() ? &Result.U.VAL : Result.U.pVal;
458   for (unsigned word = 0; word < NumDstWords; ++word) {
459     uint64_t w0 = U.pVal[loWord + word];
460     uint64_t w1 =
461         (loWord + word + 1) < NumSrcWords ? U.pVal[loWord + word + 1] : 0;
462     DestPtr[word] = (w0 >> loBit) | (w1 << (APINT_BITS_PER_WORD - loBit));
463   }
464 
465   return Result.clearUnusedBits();
466 }
467 
468 uint64_t APInt::extractBitsAsZExtValue(unsigned numBits,
469                                        unsigned bitPosition) const {
470   assert(numBits > 0 && "Can't extract zero bits");
471   assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth &&
472          "Illegal bit extraction");
473   assert(numBits <= 64 && "Illegal bit extraction");
474 
475   uint64_t maskBits = maskTrailingOnes<uint64_t>(numBits);
476   if (isSingleWord())
477     return (U.VAL >> bitPosition) & maskBits;
478 
479   unsigned loBit = whichBit(bitPosition);
480   unsigned loWord = whichWord(bitPosition);
481   unsigned hiWord = whichWord(bitPosition + numBits - 1);
482   if (loWord == hiWord)
483     return (U.pVal[loWord] >> loBit) & maskBits;
484 
485   static_assert(8 * sizeof(WordType) <= 64, "This code assumes only two words affected");
486   unsigned wordBits = 8 * sizeof(WordType);
487   uint64_t retBits = U.pVal[loWord] >> loBit;
488   retBits |= U.pVal[hiWord] << (wordBits - loBit);
489   retBits &= maskBits;
490   return retBits;
491 }
492 
493 unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
494   assert(!str.empty() && "Invalid string length");
495   assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
496           radix == 36) &&
497          "Radix should be 2, 8, 10, 16, or 36!");
498 
499   size_t slen = str.size();
500 
501   // Each computation below needs to know if it's negative.
502   StringRef::iterator p = str.begin();
503   unsigned isNegative = *p == '-';
504   if (*p == '-' || *p == '+') {
505     p++;
506     slen--;
507     assert(slen && "String is only a sign, needs a value.");
508   }
509 
510   // For radixes of power-of-two values, the bits required is accurately and
511   // easily computed
512   if (radix == 2)
513     return slen + isNegative;
514   if (radix == 8)
515     return slen * 3 + isNegative;
516   if (radix == 16)
517     return slen * 4 + isNegative;
518 
519   // FIXME: base 36
520 
521   // This is grossly inefficient but accurate. We could probably do something
522   // with a computation of roughly slen*64/20 and then adjust by the value of
523   // the first few digits. But, I'm not sure how accurate that could be.
524 
525   // Compute a sufficient number of bits that is always large enough but might
526   // be too large. This avoids the assertion in the constructor. This
527   // calculation doesn't work appropriately for the numbers 0-9, so just use 4
528   // bits in that case.
529   unsigned sufficient
530     = radix == 10? (slen == 1 ? 4 : slen * 64/18)
531                  : (slen == 1 ? 7 : slen * 16/3);
532 
533   // Convert to the actual binary value.
534   APInt tmp(sufficient, StringRef(p, slen), radix);
535 
536   // Compute how many bits are required. If the log is infinite, assume we need
537   // just bit. If the log is exact and value is negative, then the value is
538   // MinSignedValue with (log + 1) bits.
539   unsigned log = tmp.logBase2();
540   if (log == (unsigned)-1) {
541     return isNegative + 1;
542   } else if (isNegative && tmp.isPowerOf2()) {
543     return isNegative + log;
544   } else {
545     return isNegative + log + 1;
546   }
547 }
548 
549 hash_code llvm::hash_value(const APInt &Arg) {
550   if (Arg.isSingleWord())
551     return hash_combine(Arg.U.VAL);
552 
553   return hash_combine_range(Arg.U.pVal, Arg.U.pVal + Arg.getNumWords());
554 }
555 
556 bool APInt::isSplat(unsigned SplatSizeInBits) const {
557   assert(getBitWidth() % SplatSizeInBits == 0 &&
558          "SplatSizeInBits must divide width!");
559   // We can check that all parts of an integer are equal by making use of a
560   // little trick: rotate and check if it's still the same value.
561   return *this == rotl(SplatSizeInBits);
562 }
563 
564 /// This function returns the high "numBits" bits of this APInt.
565 APInt APInt::getHiBits(unsigned numBits) const {
566   return this->lshr(BitWidth - numBits);
567 }
568 
569 /// This function returns the low "numBits" bits of this APInt.
570 APInt APInt::getLoBits(unsigned numBits) const {
571   APInt Result(getLowBitsSet(BitWidth, numBits));
572   Result &= *this;
573   return Result;
574 }
575 
576 /// Return a value containing V broadcasted over NewLen bits.
577 APInt APInt::getSplat(unsigned NewLen, const APInt &V) {
578   assert(NewLen >= V.getBitWidth() && "Can't splat to smaller bit width!");
579 
580   APInt Val = V.zextOrSelf(NewLen);
581   for (unsigned I = V.getBitWidth(); I < NewLen; I <<= 1)
582     Val |= Val << I;
583 
584   return Val;
585 }
586 
587 unsigned APInt::countLeadingZerosSlowCase() const {
588   unsigned Count = 0;
589   for (int i = getNumWords()-1; i >= 0; --i) {
590     uint64_t V = U.pVal[i];
591     if (V == 0)
592       Count += APINT_BITS_PER_WORD;
593     else {
594       Count += llvm::countLeadingZeros(V);
595       break;
596     }
597   }
598   // Adjust for unused bits in the most significant word (they are zero).
599   unsigned Mod = BitWidth % APINT_BITS_PER_WORD;
600   Count -= Mod > 0 ? APINT_BITS_PER_WORD - Mod : 0;
601   return Count;
602 }
603 
604 unsigned APInt::countLeadingOnesSlowCase() const {
605   unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD;
606   unsigned shift;
607   if (!highWordBits) {
608     highWordBits = APINT_BITS_PER_WORD;
609     shift = 0;
610   } else {
611     shift = APINT_BITS_PER_WORD - highWordBits;
612   }
613   int i = getNumWords() - 1;
614   unsigned Count = llvm::countLeadingOnes(U.pVal[i] << shift);
615   if (Count == highWordBits) {
616     for (i--; i >= 0; --i) {
617       if (U.pVal[i] == WORDTYPE_MAX)
618         Count += APINT_BITS_PER_WORD;
619       else {
620         Count += llvm::countLeadingOnes(U.pVal[i]);
621         break;
622       }
623     }
624   }
625   return Count;
626 }
627 
628 unsigned APInt::countTrailingZerosSlowCase() const {
629   unsigned Count = 0;
630   unsigned i = 0;
631   for (; i < getNumWords() && U.pVal[i] == 0; ++i)
632     Count += APINT_BITS_PER_WORD;
633   if (i < getNumWords())
634     Count += llvm::countTrailingZeros(U.pVal[i]);
635   return std::min(Count, BitWidth);
636 }
637 
638 unsigned APInt::countTrailingOnesSlowCase() const {
639   unsigned Count = 0;
640   unsigned i = 0;
641   for (; i < getNumWords() && U.pVal[i] == WORDTYPE_MAX; ++i)
642     Count += APINT_BITS_PER_WORD;
643   if (i < getNumWords())
644     Count += llvm::countTrailingOnes(U.pVal[i]);
645   assert(Count <= BitWidth);
646   return Count;
647 }
648 
649 unsigned APInt::countPopulationSlowCase() const {
650   unsigned Count = 0;
651   for (unsigned i = 0; i < getNumWords(); ++i)
652     Count += llvm::countPopulation(U.pVal[i]);
653   return Count;
654 }
655 
656 bool APInt::intersectsSlowCase(const APInt &RHS) const {
657   for (unsigned i = 0, e = getNumWords(); i != e; ++i)
658     if ((U.pVal[i] & RHS.U.pVal[i]) != 0)
659       return true;
660 
661   return false;
662 }
663 
664 bool APInt::isSubsetOfSlowCase(const APInt &RHS) const {
665   for (unsigned i = 0, e = getNumWords(); i != e; ++i)
666     if ((U.pVal[i] & ~RHS.U.pVal[i]) != 0)
667       return false;
668 
669   return true;
670 }
671 
672 APInt APInt::byteSwap() const {
673   assert(BitWidth >= 16 && BitWidth % 16 == 0 && "Cannot byteswap!");
674   if (BitWidth == 16)
675     return APInt(BitWidth, ByteSwap_16(uint16_t(U.VAL)));
676   if (BitWidth == 32)
677     return APInt(BitWidth, ByteSwap_32(unsigned(U.VAL)));
678   if (BitWidth == 48) {
679     unsigned Tmp1 = unsigned(U.VAL >> 16);
680     Tmp1 = ByteSwap_32(Tmp1);
681     uint16_t Tmp2 = uint16_t(U.VAL);
682     Tmp2 = ByteSwap_16(Tmp2);
683     return APInt(BitWidth, (uint64_t(Tmp2) << 32) | Tmp1);
684   }
685   if (BitWidth == 64)
686     return APInt(BitWidth, ByteSwap_64(U.VAL));
687 
688   APInt Result(getNumWords() * APINT_BITS_PER_WORD, 0);
689   for (unsigned I = 0, N = getNumWords(); I != N; ++I)
690     Result.U.pVal[I] = ByteSwap_64(U.pVal[N - I - 1]);
691   if (Result.BitWidth != BitWidth) {
692     Result.lshrInPlace(Result.BitWidth - BitWidth);
693     Result.BitWidth = BitWidth;
694   }
695   return Result;
696 }
697 
698 APInt APInt::reverseBits() const {
699   switch (BitWidth) {
700   case 64:
701     return APInt(BitWidth, llvm::reverseBits<uint64_t>(U.VAL));
702   case 32:
703     return APInt(BitWidth, llvm::reverseBits<uint32_t>(U.VAL));
704   case 16:
705     return APInt(BitWidth, llvm::reverseBits<uint16_t>(U.VAL));
706   case 8:
707     return APInt(BitWidth, llvm::reverseBits<uint8_t>(U.VAL));
708   default:
709     break;
710   }
711 
712   APInt Val(*this);
713   APInt Reversed(BitWidth, 0);
714   unsigned S = BitWidth;
715 
716   for (; Val != 0; Val.lshrInPlace(1)) {
717     Reversed <<= 1;
718     Reversed |= Val[0];
719     --S;
720   }
721 
722   Reversed <<= S;
723   return Reversed;
724 }
725 
726 APInt llvm::APIntOps::GreatestCommonDivisor(APInt A, APInt B) {
727   // Fast-path a common case.
728   if (A == B) return A;
729 
730   // Corner cases: if either operand is zero, the other is the gcd.
731   if (!A) return B;
732   if (!B) return A;
733 
734   // Count common powers of 2 and remove all other powers of 2.
735   unsigned Pow2;
736   {
737     unsigned Pow2_A = A.countTrailingZeros();
738     unsigned Pow2_B = B.countTrailingZeros();
739     if (Pow2_A > Pow2_B) {
740       A.lshrInPlace(Pow2_A - Pow2_B);
741       Pow2 = Pow2_B;
742     } else if (Pow2_B > Pow2_A) {
743       B.lshrInPlace(Pow2_B - Pow2_A);
744       Pow2 = Pow2_A;
745     } else {
746       Pow2 = Pow2_A;
747     }
748   }
749 
750   // Both operands are odd multiples of 2^Pow_2:
751   //
752   //   gcd(a, b) = gcd(|a - b| / 2^i, min(a, b))
753   //
754   // This is a modified version of Stein's algorithm, taking advantage of
755   // efficient countTrailingZeros().
756   while (A != B) {
757     if (A.ugt(B)) {
758       A -= B;
759       A.lshrInPlace(A.countTrailingZeros() - Pow2);
760     } else {
761       B -= A;
762       B.lshrInPlace(B.countTrailingZeros() - Pow2);
763     }
764   }
765 
766   return A;
767 }
768 
769 APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) {
770   uint64_t I = bit_cast<uint64_t>(Double);
771 
772   // Get the sign bit from the highest order bit
773   bool isNeg = I >> 63;
774 
775   // Get the 11-bit exponent and adjust for the 1023 bit bias
776   int64_t exp = ((I >> 52) & 0x7ff) - 1023;
777 
778   // If the exponent is negative, the value is < 0 so just return 0.
779   if (exp < 0)
780     return APInt(width, 0u);
781 
782   // Extract the mantissa by clearing the top 12 bits (sign + exponent).
783   uint64_t mantissa = (I & (~0ULL >> 12)) | 1ULL << 52;
784 
785   // If the exponent doesn't shift all bits out of the mantissa
786   if (exp < 52)
787     return isNeg ? -APInt(width, mantissa >> (52 - exp)) :
788                     APInt(width, mantissa >> (52 - exp));
789 
790   // If the client didn't provide enough bits for us to shift the mantissa into
791   // then the result is undefined, just return 0
792   if (width <= exp - 52)
793     return APInt(width, 0);
794 
795   // Otherwise, we have to shift the mantissa bits up to the right location
796   APInt Tmp(width, mantissa);
797   Tmp <<= (unsigned)exp - 52;
798   return isNeg ? -Tmp : Tmp;
799 }
800 
801 /// This function converts this APInt to a double.
802 /// The layout for double is as following (IEEE Standard 754):
803 ///  --------------------------------------
804 /// |  Sign    Exponent    Fraction    Bias |
805 /// |-------------------------------------- |
806 /// |  1[63]   11[62-52]   52[51-00]   1023 |
807 ///  --------------------------------------
808 double APInt::roundToDouble(bool isSigned) const {
809 
810   // Handle the simple case where the value is contained in one uint64_t.
811   // It is wrong to optimize getWord(0) to VAL; there might be more than one word.
812   if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) {
813     if (isSigned) {
814       int64_t sext = SignExtend64(getWord(0), BitWidth);
815       return double(sext);
816     } else
817       return double(getWord(0));
818   }
819 
820   // Determine if the value is negative.
821   bool isNeg = isSigned ? (*this)[BitWidth-1] : false;
822 
823   // Construct the absolute value if we're negative.
824   APInt Tmp(isNeg ? -(*this) : (*this));
825 
826   // Figure out how many bits we're using.
827   unsigned n = Tmp.getActiveBits();
828 
829   // The exponent (without bias normalization) is just the number of bits
830   // we are using. Note that the sign bit is gone since we constructed the
831   // absolute value.
832   uint64_t exp = n;
833 
834   // Return infinity for exponent overflow
835   if (exp > 1023) {
836     if (!isSigned || !isNeg)
837       return std::numeric_limits<double>::infinity();
838     else
839       return -std::numeric_limits<double>::infinity();
840   }
841   exp += 1023; // Increment for 1023 bias
842 
843   // Number of bits in mantissa is 52. To obtain the mantissa value, we must
844   // extract the high 52 bits from the correct words in pVal.
845   uint64_t mantissa;
846   unsigned hiWord = whichWord(n-1);
847   if (hiWord == 0) {
848     mantissa = Tmp.U.pVal[0];
849     if (n > 52)
850       mantissa >>= n - 52; // shift down, we want the top 52 bits.
851   } else {
852     assert(hiWord > 0 && "huh?");
853     uint64_t hibits = Tmp.U.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD);
854     uint64_t lobits = Tmp.U.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD);
855     mantissa = hibits | lobits;
856   }
857 
858   // The leading bit of mantissa is implicit, so get rid of it.
859   uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0;
860   uint64_t I = sign | (exp << 52) | mantissa;
861   return bit_cast<double>(I);
862 }
863 
864 // Truncate to new width.
865 APInt APInt::trunc(unsigned width) const {
866   assert(width < BitWidth && "Invalid APInt Truncate request");
867   assert(width && "Can't truncate to 0 bits");
868 
869   if (width <= APINT_BITS_PER_WORD)
870     return APInt(width, getRawData()[0]);
871 
872   APInt Result(getMemory(getNumWords(width)), width);
873 
874   // Copy full words.
875   unsigned i;
876   for (i = 0; i != width / APINT_BITS_PER_WORD; i++)
877     Result.U.pVal[i] = U.pVal[i];
878 
879   // Truncate and copy any partial word.
880   unsigned bits = (0 - width) % APINT_BITS_PER_WORD;
881   if (bits != 0)
882     Result.U.pVal[i] = U.pVal[i] << bits >> bits;
883 
884   return Result;
885 }
886 
887 // Sign extend to a new width.
888 APInt APInt::sext(unsigned Width) const {
889   assert(Width > BitWidth && "Invalid APInt SignExtend request");
890 
891   if (Width <= APINT_BITS_PER_WORD)
892     return APInt(Width, SignExtend64(U.VAL, BitWidth));
893 
894   APInt Result(getMemory(getNumWords(Width)), Width);
895 
896   // Copy words.
897   std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
898 
899   // Sign extend the last word since there may be unused bits in the input.
900   Result.U.pVal[getNumWords() - 1] =
901       SignExtend64(Result.U.pVal[getNumWords() - 1],
902                    ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
903 
904   // Fill with sign bits.
905   std::memset(Result.U.pVal + getNumWords(), isNegative() ? -1 : 0,
906               (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
907   Result.clearUnusedBits();
908   return Result;
909 }
910 
911 //  Zero extend to a new width.
912 APInt APInt::zext(unsigned width) const {
913   assert(width > BitWidth && "Invalid APInt ZeroExtend request");
914 
915   if (width <= APINT_BITS_PER_WORD)
916     return APInt(width, U.VAL);
917 
918   APInt Result(getMemory(getNumWords(width)), width);
919 
920   // Copy words.
921   std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
922 
923   // Zero remaining words.
924   std::memset(Result.U.pVal + getNumWords(), 0,
925               (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
926 
927   return Result;
928 }
929 
930 APInt APInt::zextOrTrunc(unsigned width) const {
931   if (BitWidth < width)
932     return zext(width);
933   if (BitWidth > width)
934     return trunc(width);
935   return *this;
936 }
937 
938 APInt APInt::sextOrTrunc(unsigned width) const {
939   if (BitWidth < width)
940     return sext(width);
941   if (BitWidth > width)
942     return trunc(width);
943   return *this;
944 }
945 
946 APInt APInt::zextOrSelf(unsigned width) const {
947   if (BitWidth < width)
948     return zext(width);
949   return *this;
950 }
951 
952 APInt APInt::sextOrSelf(unsigned width) const {
953   if (BitWidth < width)
954     return sext(width);
955   return *this;
956 }
957 
958 /// Arithmetic right-shift this APInt by shiftAmt.
959 /// Arithmetic right-shift function.
960 void APInt::ashrInPlace(const APInt &shiftAmt) {
961   ashrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
962 }
963 
964 /// Arithmetic right-shift this APInt by shiftAmt.
965 /// Arithmetic right-shift function.
966 void APInt::ashrSlowCase(unsigned ShiftAmt) {
967   // Don't bother performing a no-op shift.
968   if (!ShiftAmt)
969     return;
970 
971   // Save the original sign bit for later.
972   bool Negative = isNegative();
973 
974   // WordShift is the inter-part shift; BitShift is intra-part shift.
975   unsigned WordShift = ShiftAmt / APINT_BITS_PER_WORD;
976   unsigned BitShift = ShiftAmt % APINT_BITS_PER_WORD;
977 
978   unsigned WordsToMove = getNumWords() - WordShift;
979   if (WordsToMove != 0) {
980     // Sign extend the last word to fill in the unused bits.
981     U.pVal[getNumWords() - 1] = SignExtend64(
982         U.pVal[getNumWords() - 1], ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
983 
984     // Fastpath for moving by whole words.
985     if (BitShift == 0) {
986       std::memmove(U.pVal, U.pVal + WordShift, WordsToMove * APINT_WORD_SIZE);
987     } else {
988       // Move the words containing significant bits.
989       for (unsigned i = 0; i != WordsToMove - 1; ++i)
990         U.pVal[i] = (U.pVal[i + WordShift] >> BitShift) |
991                     (U.pVal[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift));
992 
993       // Handle the last word which has no high bits to copy.
994       U.pVal[WordsToMove - 1] = U.pVal[WordShift + WordsToMove - 1] >> BitShift;
995       // Sign extend one more time.
996       U.pVal[WordsToMove - 1] =
997           SignExtend64(U.pVal[WordsToMove - 1], APINT_BITS_PER_WORD - BitShift);
998     }
999   }
1000 
1001   // Fill in the remainder based on the original sign.
1002   std::memset(U.pVal + WordsToMove, Negative ? -1 : 0,
1003               WordShift * APINT_WORD_SIZE);
1004   clearUnusedBits();
1005 }
1006 
1007 /// Logical right-shift this APInt by shiftAmt.
1008 /// Logical right-shift function.
1009 void APInt::lshrInPlace(const APInt &shiftAmt) {
1010   lshrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
1011 }
1012 
1013 /// Logical right-shift this APInt by shiftAmt.
1014 /// Logical right-shift function.
1015 void APInt::lshrSlowCase(unsigned ShiftAmt) {
1016   tcShiftRight(U.pVal, getNumWords(), ShiftAmt);
1017 }
1018 
1019 /// Left-shift this APInt by shiftAmt.
1020 /// Left-shift function.
1021 APInt &APInt::operator<<=(const APInt &shiftAmt) {
1022   // It's undefined behavior in C to shift by BitWidth or greater.
1023   *this <<= (unsigned)shiftAmt.getLimitedValue(BitWidth);
1024   return *this;
1025 }
1026 
1027 void APInt::shlSlowCase(unsigned ShiftAmt) {
1028   tcShiftLeft(U.pVal, getNumWords(), ShiftAmt);
1029   clearUnusedBits();
1030 }
1031 
1032 // Calculate the rotate amount modulo the bit width.
1033 static unsigned rotateModulo(unsigned BitWidth, const APInt &rotateAmt) {
1034   unsigned rotBitWidth = rotateAmt.getBitWidth();
1035   APInt rot = rotateAmt;
1036   if (rotBitWidth < BitWidth) {
1037     // Extend the rotate APInt, so that the urem doesn't divide by 0.
1038     // e.g. APInt(1, 32) would give APInt(1, 0).
1039     rot = rotateAmt.zext(BitWidth);
1040   }
1041   rot = rot.urem(APInt(rot.getBitWidth(), BitWidth));
1042   return rot.getLimitedValue(BitWidth);
1043 }
1044 
1045 APInt APInt::rotl(const APInt &rotateAmt) const {
1046   return rotl(rotateModulo(BitWidth, rotateAmt));
1047 }
1048 
1049 APInt APInt::rotl(unsigned rotateAmt) const {
1050   rotateAmt %= BitWidth;
1051   if (rotateAmt == 0)
1052     return *this;
1053   return shl(rotateAmt) | lshr(BitWidth - rotateAmt);
1054 }
1055 
1056 APInt APInt::rotr(const APInt &rotateAmt) const {
1057   return rotr(rotateModulo(BitWidth, rotateAmt));
1058 }
1059 
1060 APInt APInt::rotr(unsigned rotateAmt) const {
1061   rotateAmt %= BitWidth;
1062   if (rotateAmt == 0)
1063     return *this;
1064   return lshr(rotateAmt) | shl(BitWidth - rotateAmt);
1065 }
1066 
1067 // Square Root - this method computes and returns the square root of "this".
1068 // Three mechanisms are used for computation. For small values (<= 5 bits),
1069 // a table lookup is done. This gets some performance for common cases. For
1070 // values using less than 52 bits, the value is converted to double and then
1071 // the libc sqrt function is called. The result is rounded and then converted
1072 // back to a uint64_t which is then used to construct the result. Finally,
1073 // the Babylonian method for computing square roots is used.
1074 APInt APInt::sqrt() const {
1075 
1076   // Determine the magnitude of the value.
1077   unsigned magnitude = getActiveBits();
1078 
1079   // Use a fast table for some small values. This also gets rid of some
1080   // rounding errors in libc sqrt for small values.
1081   if (magnitude <= 5) {
1082     static const uint8_t results[32] = {
1083       /*     0 */ 0,
1084       /*  1- 2 */ 1, 1,
1085       /*  3- 6 */ 2, 2, 2, 2,
1086       /*  7-12 */ 3, 3, 3, 3, 3, 3,
1087       /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4,
1088       /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1089       /*    31 */ 6
1090     };
1091     return APInt(BitWidth, results[ (isSingleWord() ? U.VAL : U.pVal[0]) ]);
1092   }
1093 
1094   // If the magnitude of the value fits in less than 52 bits (the precision of
1095   // an IEEE double precision floating point value), then we can use the
1096   // libc sqrt function which will probably use a hardware sqrt computation.
1097   // This should be faster than the algorithm below.
1098   if (magnitude < 52) {
1099     return APInt(BitWidth,
1100                  uint64_t(::round(::sqrt(double(isSingleWord() ? U.VAL
1101                                                                : U.pVal[0])))));
1102   }
1103 
1104   // Okay, all the short cuts are exhausted. We must compute it. The following
1105   // is a classical Babylonian method for computing the square root. This code
1106   // was adapted to APInt from a wikipedia article on such computations.
1107   // See http://www.wikipedia.org/ and go to the page named
1108   // Calculate_an_integer_square_root.
1109   unsigned nbits = BitWidth, i = 4;
1110   APInt testy(BitWidth, 16);
1111   APInt x_old(BitWidth, 1);
1112   APInt x_new(BitWidth, 0);
1113   APInt two(BitWidth, 2);
1114 
1115   // Select a good starting value using binary logarithms.
1116   for (;; i += 2, testy = testy.shl(2))
1117     if (i >= nbits || this->ule(testy)) {
1118       x_old = x_old.shl(i / 2);
1119       break;
1120     }
1121 
1122   // Use the Babylonian method to arrive at the integer square root:
1123   for (;;) {
1124     x_new = (this->udiv(x_old) + x_old).udiv(two);
1125     if (x_old.ule(x_new))
1126       break;
1127     x_old = x_new;
1128   }
1129 
1130   // Make sure we return the closest approximation
1131   // NOTE: The rounding calculation below is correct. It will produce an
1132   // off-by-one discrepancy with results from pari/gp. That discrepancy has been
1133   // determined to be a rounding issue with pari/gp as it begins to use a
1134   // floating point representation after 192 bits. There are no discrepancies
1135   // between this algorithm and pari/gp for bit widths < 192 bits.
1136   APInt square(x_old * x_old);
1137   APInt nextSquare((x_old + 1) * (x_old +1));
1138   if (this->ult(square))
1139     return x_old;
1140   assert(this->ule(nextSquare) && "Error in APInt::sqrt computation");
1141   APInt midpoint((nextSquare - square).udiv(two));
1142   APInt offset(*this - square);
1143   if (offset.ult(midpoint))
1144     return x_old;
1145   return x_old + 1;
1146 }
1147 
1148 /// Computes the multiplicative inverse of this APInt for a given modulo. The
1149 /// iterative extended Euclidean algorithm is used to solve for this value,
1150 /// however we simplify it to speed up calculating only the inverse, and take
1151 /// advantage of div+rem calculations. We also use some tricks to avoid copying
1152 /// (potentially large) APInts around.
1153 /// WARNING: a value of '0' may be returned,
1154 ///          signifying that no multiplicative inverse exists!
1155 APInt APInt::multiplicativeInverse(const APInt& modulo) const {
1156   assert(ult(modulo) && "This APInt must be smaller than the modulo");
1157 
1158   // Using the properties listed at the following web page (accessed 06/21/08):
1159   //   http://www.numbertheory.org/php/euclid.html
1160   // (especially the properties numbered 3, 4 and 9) it can be proved that
1161   // BitWidth bits suffice for all the computations in the algorithm implemented
1162   // below. More precisely, this number of bits suffice if the multiplicative
1163   // inverse exists, but may not suffice for the general extended Euclidean
1164   // algorithm.
1165 
1166   APInt r[2] = { modulo, *this };
1167   APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) };
1168   APInt q(BitWidth, 0);
1169 
1170   unsigned i;
1171   for (i = 0; r[i^1] != 0; i ^= 1) {
1172     // An overview of the math without the confusing bit-flipping:
1173     // q = r[i-2] / r[i-1]
1174     // r[i] = r[i-2] % r[i-1]
1175     // t[i] = t[i-2] - t[i-1] * q
1176     udivrem(r[i], r[i^1], q, r[i]);
1177     t[i] -= t[i^1] * q;
1178   }
1179 
1180   // If this APInt and the modulo are not coprime, there is no multiplicative
1181   // inverse, so return 0. We check this by looking at the next-to-last
1182   // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean
1183   // algorithm.
1184   if (r[i] != 1)
1185     return APInt(BitWidth, 0);
1186 
1187   // The next-to-last t is the multiplicative inverse.  However, we are
1188   // interested in a positive inverse. Calculate a positive one from a negative
1189   // one if necessary. A simple addition of the modulo suffices because
1190   // abs(t[i]) is known to be less than *this/2 (see the link above).
1191   if (t[i].isNegative())
1192     t[i] += modulo;
1193 
1194   return std::move(t[i]);
1195 }
1196 
1197 /// Calculate the magic numbers required to implement a signed integer division
1198 /// by a constant as a sequence of multiplies, adds and shifts.  Requires that
1199 /// the divisor not be 0, 1, or -1.  Taken from "Hacker's Delight", Henry S.
1200 /// Warren, Jr., chapter 10.
1201 APInt::ms APInt::magic() const {
1202   const APInt& d = *this;
1203   unsigned p;
1204   APInt ad, anc, delta, q1, r1, q2, r2, t;
1205   APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1206   struct ms mag;
1207 
1208   ad = d.abs();
1209   t = signedMin + (d.lshr(d.getBitWidth() - 1));
1210   anc = t - 1 - t.urem(ad);   // absolute value of nc
1211   p = d.getBitWidth() - 1;    // initialize p
1212   q1 = signedMin.udiv(anc);   // initialize q1 = 2p/abs(nc)
1213   r1 = signedMin - q1*anc;    // initialize r1 = rem(2p,abs(nc))
1214   q2 = signedMin.udiv(ad);    // initialize q2 = 2p/abs(d)
1215   r2 = signedMin - q2*ad;     // initialize r2 = rem(2p,abs(d))
1216   do {
1217     p = p + 1;
1218     q1 = q1<<1;          // update q1 = 2p/abs(nc)
1219     r1 = r1<<1;          // update r1 = rem(2p/abs(nc))
1220     if (r1.uge(anc)) {  // must be unsigned comparison
1221       q1 = q1 + 1;
1222       r1 = r1 - anc;
1223     }
1224     q2 = q2<<1;          // update q2 = 2p/abs(d)
1225     r2 = r2<<1;          // update r2 = rem(2p/abs(d))
1226     if (r2.uge(ad)) {   // must be unsigned comparison
1227       q2 = q2 + 1;
1228       r2 = r2 - ad;
1229     }
1230     delta = ad - r2;
1231   } while (q1.ult(delta) || (q1 == delta && r1 == 0));
1232 
1233   mag.m = q2 + 1;
1234   if (d.isNegative()) mag.m = -mag.m;   // resulting magic number
1235   mag.s = p - d.getBitWidth();          // resulting shift
1236   return mag;
1237 }
1238 
1239 /// Calculate the magic numbers required to implement an unsigned integer
1240 /// division by a constant as a sequence of multiplies, adds and shifts.
1241 /// Requires that the divisor not be 0.  Taken from "Hacker's Delight", Henry
1242 /// S. Warren, Jr., chapter 10.
1243 /// LeadingZeros can be used to simplify the calculation if the upper bits
1244 /// of the divided value are known zero.
1245 APInt::mu APInt::magicu(unsigned LeadingZeros) const {
1246   const APInt& d = *this;
1247   unsigned p;
1248   APInt nc, delta, q1, r1, q2, r2;
1249   struct mu magu;
1250   magu.a = 0;               // initialize "add" indicator
1251   APInt allOnes = APInt::getAllOnesValue(d.getBitWidth()).lshr(LeadingZeros);
1252   APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1253   APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth());
1254 
1255   nc = allOnes - (allOnes - d).urem(d);
1256   p = d.getBitWidth() - 1;  // initialize p
1257   q1 = signedMin.udiv(nc);  // initialize q1 = 2p/nc
1258   r1 = signedMin - q1*nc;   // initialize r1 = rem(2p,nc)
1259   q2 = signedMax.udiv(d);   // initialize q2 = (2p-1)/d
1260   r2 = signedMax - q2*d;    // initialize r2 = rem((2p-1),d)
1261   do {
1262     p = p + 1;
1263     if (r1.uge(nc - r1)) {
1264       q1 = q1 + q1 + 1;  // update q1
1265       r1 = r1 + r1 - nc; // update r1
1266     }
1267     else {
1268       q1 = q1+q1; // update q1
1269       r1 = r1+r1; // update r1
1270     }
1271     if ((r2 + 1).uge(d - r2)) {
1272       if (q2.uge(signedMax)) magu.a = 1;
1273       q2 = q2+q2 + 1;     // update q2
1274       r2 = r2+r2 + 1 - d; // update r2
1275     }
1276     else {
1277       if (q2.uge(signedMin)) magu.a = 1;
1278       q2 = q2+q2;     // update q2
1279       r2 = r2+r2 + 1; // update r2
1280     }
1281     delta = d - 1 - r2;
1282   } while (p < d.getBitWidth()*2 &&
1283            (q1.ult(delta) || (q1 == delta && r1 == 0)));
1284   magu.m = q2 + 1; // resulting magic number
1285   magu.s = p - d.getBitWidth();  // resulting shift
1286   return magu;
1287 }
1288 
1289 /// Implementation of Knuth's Algorithm D (Division of nonnegative integers)
1290 /// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The
1291 /// variables here have the same names as in the algorithm. Comments explain
1292 /// the algorithm and any deviation from it.
1293 static void KnuthDiv(uint32_t *u, uint32_t *v, uint32_t *q, uint32_t* r,
1294                      unsigned m, unsigned n) {
1295   assert(u && "Must provide dividend");
1296   assert(v && "Must provide divisor");
1297   assert(q && "Must provide quotient");
1298   assert(u != v && u != q && v != q && "Must use different memory");
1299   assert(n>1 && "n must be > 1");
1300 
1301   // b denotes the base of the number system. In our case b is 2^32.
1302   const uint64_t b = uint64_t(1) << 32;
1303 
1304 // The DEBUG macros here tend to be spam in the debug output if you're not
1305 // debugging this code. Disable them unless KNUTH_DEBUG is defined.
1306 #ifdef KNUTH_DEBUG
1307 #define DEBUG_KNUTH(X) LLVM_DEBUG(X)
1308 #else
1309 #define DEBUG_KNUTH(X) do {} while(false)
1310 #endif
1311 
1312   DEBUG_KNUTH(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n');
1313   DEBUG_KNUTH(dbgs() << "KnuthDiv: original:");
1314   DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1315   DEBUG_KNUTH(dbgs() << " by");
1316   DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);
1317   DEBUG_KNUTH(dbgs() << '\n');
1318   // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
1319   // u and v by d. Note that we have taken Knuth's advice here to use a power
1320   // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of
1321   // 2 allows us to shift instead of multiply and it is easy to determine the
1322   // shift amount from the leading zeros.  We are basically normalizing the u
1323   // and v so that its high bits are shifted to the top of v's range without
1324   // overflow. Note that this can require an extra word in u so that u must
1325   // be of length m+n+1.
1326   unsigned shift = countLeadingZeros(v[n-1]);
1327   uint32_t v_carry = 0;
1328   uint32_t u_carry = 0;
1329   if (shift) {
1330     for (unsigned i = 0; i < m+n; ++i) {
1331       uint32_t u_tmp = u[i] >> (32 - shift);
1332       u[i] = (u[i] << shift) | u_carry;
1333       u_carry = u_tmp;
1334     }
1335     for (unsigned i = 0; i < n; ++i) {
1336       uint32_t v_tmp = v[i] >> (32 - shift);
1337       v[i] = (v[i] << shift) | v_carry;
1338       v_carry = v_tmp;
1339     }
1340   }
1341   u[m+n] = u_carry;
1342 
1343   DEBUG_KNUTH(dbgs() << "KnuthDiv:   normal:");
1344   DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1345   DEBUG_KNUTH(dbgs() << " by");
1346   DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);
1347   DEBUG_KNUTH(dbgs() << '\n');
1348 
1349   // D2. [Initialize j.]  Set j to m. This is the loop counter over the places.
1350   int j = m;
1351   do {
1352     DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient digit #" << j << '\n');
1353     // D3. [Calculate q'.].
1354     //     Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q')
1355     //     Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r')
1356     // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease
1357     // qp by 1, increase rp by v[n-1], and repeat this test if rp < b. The test
1358     // on v[n-2] determines at high speed most of the cases in which the trial
1359     // value qp is one too large, and it eliminates all cases where qp is two
1360     // too large.
1361     uint64_t dividend = Make_64(u[j+n], u[j+n-1]);
1362     DEBUG_KNUTH(dbgs() << "KnuthDiv: dividend == " << dividend << '\n');
1363     uint64_t qp = dividend / v[n-1];
1364     uint64_t rp = dividend % v[n-1];
1365     if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) {
1366       qp--;
1367       rp += v[n-1];
1368       if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2]))
1369         qp--;
1370     }
1371     DEBUG_KNUTH(dbgs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n');
1372 
1373     // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with
1374     // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation
1375     // consists of a simple multiplication by a one-place number, combined with
1376     // a subtraction.
1377     // The digits (u[j+n]...u[j]) should be kept positive; if the result of
1378     // this step is actually negative, (u[j+n]...u[j]) should be left as the
1379     // true value plus b**(n+1), namely as the b's complement of
1380     // the true value, and a "borrow" to the left should be remembered.
1381     int64_t borrow = 0;
1382     for (unsigned i = 0; i < n; ++i) {
1383       uint64_t p = uint64_t(qp) * uint64_t(v[i]);
1384       int64_t subres = int64_t(u[j+i]) - borrow - Lo_32(p);
1385       u[j+i] = Lo_32(subres);
1386       borrow = Hi_32(p) - Hi_32(subres);
1387       DEBUG_KNUTH(dbgs() << "KnuthDiv: u[j+i] = " << u[j + i]
1388                         << ", borrow = " << borrow << '\n');
1389     }
1390     bool isNeg = u[j+n] < borrow;
1391     u[j+n] -= Lo_32(borrow);
1392 
1393     DEBUG_KNUTH(dbgs() << "KnuthDiv: after subtraction:");
1394     DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1395     DEBUG_KNUTH(dbgs() << '\n');
1396 
1397     // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was
1398     // negative, go to step D6; otherwise go on to step D7.
1399     q[j] = Lo_32(qp);
1400     if (isNeg) {
1401       // D6. [Add back]. The probability that this step is necessary is very
1402       // small, on the order of only 2/b. Make sure that test data accounts for
1403       // this possibility. Decrease q[j] by 1
1404       q[j]--;
1405       // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]).
1406       // A carry will occur to the left of u[j+n], and it should be ignored
1407       // since it cancels with the borrow that occurred in D4.
1408       bool carry = false;
1409       for (unsigned i = 0; i < n; i++) {
1410         uint32_t limit = std::min(u[j+i],v[i]);
1411         u[j+i] += v[i] + carry;
1412         carry = u[j+i] < limit || (carry && u[j+i] == limit);
1413       }
1414       u[j+n] += carry;
1415     }
1416     DEBUG_KNUTH(dbgs() << "KnuthDiv: after correction:");
1417     DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1418     DEBUG_KNUTH(dbgs() << "\nKnuthDiv: digit result = " << q[j] << '\n');
1419 
1420     // D7. [Loop on j.]  Decrease j by one. Now if j >= 0, go back to D3.
1421   } while (--j >= 0);
1422 
1423   DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient:");
1424   DEBUG_KNUTH(for (int i = m; i >= 0; i--) dbgs() << " " << q[i]);
1425   DEBUG_KNUTH(dbgs() << '\n');
1426 
1427   // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired
1428   // remainder may be obtained by dividing u[...] by d. If r is non-null we
1429   // compute the remainder (urem uses this).
1430   if (r) {
1431     // The value d is expressed by the "shift" value above since we avoided
1432     // multiplication by d by using a shift left. So, all we have to do is
1433     // shift right here.
1434     if (shift) {
1435       uint32_t carry = 0;
1436       DEBUG_KNUTH(dbgs() << "KnuthDiv: remainder:");
1437       for (int i = n-1; i >= 0; i--) {
1438         r[i] = (u[i] >> shift) | carry;
1439         carry = u[i] << (32 - shift);
1440         DEBUG_KNUTH(dbgs() << " " << r[i]);
1441       }
1442     } else {
1443       for (int i = n-1; i >= 0; i--) {
1444         r[i] = u[i];
1445         DEBUG_KNUTH(dbgs() << " " << r[i]);
1446       }
1447     }
1448     DEBUG_KNUTH(dbgs() << '\n');
1449   }
1450   DEBUG_KNUTH(dbgs() << '\n');
1451 }
1452 
1453 void APInt::divide(const WordType *LHS, unsigned lhsWords, const WordType *RHS,
1454                    unsigned rhsWords, WordType *Quotient, WordType *Remainder) {
1455   assert(lhsWords >= rhsWords && "Fractional result");
1456 
1457   // First, compose the values into an array of 32-bit words instead of
1458   // 64-bit words. This is a necessity of both the "short division" algorithm
1459   // and the Knuth "classical algorithm" which requires there to be native
1460   // operations for +, -, and * on an m bit value with an m*2 bit result. We
1461   // can't use 64-bit operands here because we don't have native results of
1462   // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't
1463   // work on large-endian machines.
1464   unsigned n = rhsWords * 2;
1465   unsigned m = (lhsWords * 2) - n;
1466 
1467   // Allocate space for the temporary values we need either on the stack, if
1468   // it will fit, or on the heap if it won't.
1469   uint32_t SPACE[128];
1470   uint32_t *U = nullptr;
1471   uint32_t *V = nullptr;
1472   uint32_t *Q = nullptr;
1473   uint32_t *R = nullptr;
1474   if ((Remainder?4:3)*n+2*m+1 <= 128) {
1475     U = &SPACE[0];
1476     V = &SPACE[m+n+1];
1477     Q = &SPACE[(m+n+1) + n];
1478     if (Remainder)
1479       R = &SPACE[(m+n+1) + n + (m+n)];
1480   } else {
1481     U = new uint32_t[m + n + 1];
1482     V = new uint32_t[n];
1483     Q = new uint32_t[m+n];
1484     if (Remainder)
1485       R = new uint32_t[n];
1486   }
1487 
1488   // Initialize the dividend
1489   memset(U, 0, (m+n+1)*sizeof(uint32_t));
1490   for (unsigned i = 0; i < lhsWords; ++i) {
1491     uint64_t tmp = LHS[i];
1492     U[i * 2] = Lo_32(tmp);
1493     U[i * 2 + 1] = Hi_32(tmp);
1494   }
1495   U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm.
1496 
1497   // Initialize the divisor
1498   memset(V, 0, (n)*sizeof(uint32_t));
1499   for (unsigned i = 0; i < rhsWords; ++i) {
1500     uint64_t tmp = RHS[i];
1501     V[i * 2] = Lo_32(tmp);
1502     V[i * 2 + 1] = Hi_32(tmp);
1503   }
1504 
1505   // initialize the quotient and remainder
1506   memset(Q, 0, (m+n) * sizeof(uint32_t));
1507   if (Remainder)
1508     memset(R, 0, n * sizeof(uint32_t));
1509 
1510   // Now, adjust m and n for the Knuth division. n is the number of words in
1511   // the divisor. m is the number of words by which the dividend exceeds the
1512   // divisor (i.e. m+n is the length of the dividend). These sizes must not
1513   // contain any zero words or the Knuth algorithm fails.
1514   for (unsigned i = n; i > 0 && V[i-1] == 0; i--) {
1515     n--;
1516     m++;
1517   }
1518   for (unsigned i = m+n; i > 0 && U[i-1] == 0; i--)
1519     m--;
1520 
1521   // If we're left with only a single word for the divisor, Knuth doesn't work
1522   // so we implement the short division algorithm here. This is much simpler
1523   // and faster because we are certain that we can divide a 64-bit quantity
1524   // by a 32-bit quantity at hardware speed and short division is simply a
1525   // series of such operations. This is just like doing short division but we
1526   // are using base 2^32 instead of base 10.
1527   assert(n != 0 && "Divide by zero?");
1528   if (n == 1) {
1529     uint32_t divisor = V[0];
1530     uint32_t remainder = 0;
1531     for (int i = m; i >= 0; i--) {
1532       uint64_t partial_dividend = Make_64(remainder, U[i]);
1533       if (partial_dividend == 0) {
1534         Q[i] = 0;
1535         remainder = 0;
1536       } else if (partial_dividend < divisor) {
1537         Q[i] = 0;
1538         remainder = Lo_32(partial_dividend);
1539       } else if (partial_dividend == divisor) {
1540         Q[i] = 1;
1541         remainder = 0;
1542       } else {
1543         Q[i] = Lo_32(partial_dividend / divisor);
1544         remainder = Lo_32(partial_dividend - (Q[i] * divisor));
1545       }
1546     }
1547     if (R)
1548       R[0] = remainder;
1549   } else {
1550     // Now we're ready to invoke the Knuth classical divide algorithm. In this
1551     // case n > 1.
1552     KnuthDiv(U, V, Q, R, m, n);
1553   }
1554 
1555   // If the caller wants the quotient
1556   if (Quotient) {
1557     for (unsigned i = 0; i < lhsWords; ++i)
1558       Quotient[i] = Make_64(Q[i*2+1], Q[i*2]);
1559   }
1560 
1561   // If the caller wants the remainder
1562   if (Remainder) {
1563     for (unsigned i = 0; i < rhsWords; ++i)
1564       Remainder[i] = Make_64(R[i*2+1], R[i*2]);
1565   }
1566 
1567   // Clean up the memory we allocated.
1568   if (U != &SPACE[0]) {
1569     delete [] U;
1570     delete [] V;
1571     delete [] Q;
1572     delete [] R;
1573   }
1574 }
1575 
1576 APInt APInt::udiv(const APInt &RHS) const {
1577   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1578 
1579   // First, deal with the easy case
1580   if (isSingleWord()) {
1581     assert(RHS.U.VAL != 0 && "Divide by zero?");
1582     return APInt(BitWidth, U.VAL / RHS.U.VAL);
1583   }
1584 
1585   // Get some facts about the LHS and RHS number of bits and words
1586   unsigned lhsWords = getNumWords(getActiveBits());
1587   unsigned rhsBits  = RHS.getActiveBits();
1588   unsigned rhsWords = getNumWords(rhsBits);
1589   assert(rhsWords && "Divided by zero???");
1590 
1591   // Deal with some degenerate cases
1592   if (!lhsWords)
1593     // 0 / X ===> 0
1594     return APInt(BitWidth, 0);
1595   if (rhsBits == 1)
1596     // X / 1 ===> X
1597     return *this;
1598   if (lhsWords < rhsWords || this->ult(RHS))
1599     // X / Y ===> 0, iff X < Y
1600     return APInt(BitWidth, 0);
1601   if (*this == RHS)
1602     // X / X ===> 1
1603     return APInt(BitWidth, 1);
1604   if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1.
1605     // All high words are zero, just use native divide
1606     return APInt(BitWidth, this->U.pVal[0] / RHS.U.pVal[0]);
1607 
1608   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1609   APInt Quotient(BitWidth, 0); // to hold result.
1610   divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal, nullptr);
1611   return Quotient;
1612 }
1613 
1614 APInt APInt::udiv(uint64_t RHS) const {
1615   assert(RHS != 0 && "Divide by zero?");
1616 
1617   // First, deal with the easy case
1618   if (isSingleWord())
1619     return APInt(BitWidth, U.VAL / RHS);
1620 
1621   // Get some facts about the LHS words.
1622   unsigned lhsWords = getNumWords(getActiveBits());
1623 
1624   // Deal with some degenerate cases
1625   if (!lhsWords)
1626     // 0 / X ===> 0
1627     return APInt(BitWidth, 0);
1628   if (RHS == 1)
1629     // X / 1 ===> X
1630     return *this;
1631   if (this->ult(RHS))
1632     // X / Y ===> 0, iff X < Y
1633     return APInt(BitWidth, 0);
1634   if (*this == RHS)
1635     // X / X ===> 1
1636     return APInt(BitWidth, 1);
1637   if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1.
1638     // All high words are zero, just use native divide
1639     return APInt(BitWidth, this->U.pVal[0] / RHS);
1640 
1641   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1642   APInt Quotient(BitWidth, 0); // to hold result.
1643   divide(U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, nullptr);
1644   return Quotient;
1645 }
1646 
1647 APInt APInt::sdiv(const APInt &RHS) const {
1648   if (isNegative()) {
1649     if (RHS.isNegative())
1650       return (-(*this)).udiv(-RHS);
1651     return -((-(*this)).udiv(RHS));
1652   }
1653   if (RHS.isNegative())
1654     return -(this->udiv(-RHS));
1655   return this->udiv(RHS);
1656 }
1657 
1658 APInt APInt::sdiv(int64_t RHS) const {
1659   if (isNegative()) {
1660     if (RHS < 0)
1661       return (-(*this)).udiv(-RHS);
1662     return -((-(*this)).udiv(RHS));
1663   }
1664   if (RHS < 0)
1665     return -(this->udiv(-RHS));
1666   return this->udiv(RHS);
1667 }
1668 
1669 APInt APInt::urem(const APInt &RHS) const {
1670   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1671   if (isSingleWord()) {
1672     assert(RHS.U.VAL != 0 && "Remainder by zero?");
1673     return APInt(BitWidth, U.VAL % RHS.U.VAL);
1674   }
1675 
1676   // Get some facts about the LHS
1677   unsigned lhsWords = getNumWords(getActiveBits());
1678 
1679   // Get some facts about the RHS
1680   unsigned rhsBits = RHS.getActiveBits();
1681   unsigned rhsWords = getNumWords(rhsBits);
1682   assert(rhsWords && "Performing remainder operation by zero ???");
1683 
1684   // Check the degenerate cases
1685   if (lhsWords == 0)
1686     // 0 % Y ===> 0
1687     return APInt(BitWidth, 0);
1688   if (rhsBits == 1)
1689     // X % 1 ===> 0
1690     return APInt(BitWidth, 0);
1691   if (lhsWords < rhsWords || this->ult(RHS))
1692     // X % Y ===> X, iff X < Y
1693     return *this;
1694   if (*this == RHS)
1695     // X % X == 0;
1696     return APInt(BitWidth, 0);
1697   if (lhsWords == 1)
1698     // All high words are zero, just use native remainder
1699     return APInt(BitWidth, U.pVal[0] % RHS.U.pVal[0]);
1700 
1701   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1702   APInt Remainder(BitWidth, 0);
1703   divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, nullptr, Remainder.U.pVal);
1704   return Remainder;
1705 }
1706 
1707 uint64_t APInt::urem(uint64_t RHS) const {
1708   assert(RHS != 0 && "Remainder by zero?");
1709 
1710   if (isSingleWord())
1711     return U.VAL % RHS;
1712 
1713   // Get some facts about the LHS
1714   unsigned lhsWords = getNumWords(getActiveBits());
1715 
1716   // Check the degenerate cases
1717   if (lhsWords == 0)
1718     // 0 % Y ===> 0
1719     return 0;
1720   if (RHS == 1)
1721     // X % 1 ===> 0
1722     return 0;
1723   if (this->ult(RHS))
1724     // X % Y ===> X, iff X < Y
1725     return getZExtValue();
1726   if (*this == RHS)
1727     // X % X == 0;
1728     return 0;
1729   if (lhsWords == 1)
1730     // All high words are zero, just use native remainder
1731     return U.pVal[0] % RHS;
1732 
1733   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1734   uint64_t Remainder;
1735   divide(U.pVal, lhsWords, &RHS, 1, nullptr, &Remainder);
1736   return Remainder;
1737 }
1738 
1739 APInt APInt::srem(const APInt &RHS) const {
1740   if (isNegative()) {
1741     if (RHS.isNegative())
1742       return -((-(*this)).urem(-RHS));
1743     return -((-(*this)).urem(RHS));
1744   }
1745   if (RHS.isNegative())
1746     return this->urem(-RHS);
1747   return this->urem(RHS);
1748 }
1749 
1750 int64_t APInt::srem(int64_t RHS) const {
1751   if (isNegative()) {
1752     if (RHS < 0)
1753       return -((-(*this)).urem(-RHS));
1754     return -((-(*this)).urem(RHS));
1755   }
1756   if (RHS < 0)
1757     return this->urem(-RHS);
1758   return this->urem(RHS);
1759 }
1760 
1761 void APInt::udivrem(const APInt &LHS, const APInt &RHS,
1762                     APInt &Quotient, APInt &Remainder) {
1763   assert(LHS.BitWidth == RHS.BitWidth && "Bit widths must be the same");
1764   unsigned BitWidth = LHS.BitWidth;
1765 
1766   // First, deal with the easy case
1767   if (LHS.isSingleWord()) {
1768     assert(RHS.U.VAL != 0 && "Divide by zero?");
1769     uint64_t QuotVal = LHS.U.VAL / RHS.U.VAL;
1770     uint64_t RemVal = LHS.U.VAL % RHS.U.VAL;
1771     Quotient = APInt(BitWidth, QuotVal);
1772     Remainder = APInt(BitWidth, RemVal);
1773     return;
1774   }
1775 
1776   // Get some size facts about the dividend and divisor
1777   unsigned lhsWords = getNumWords(LHS.getActiveBits());
1778   unsigned rhsBits  = RHS.getActiveBits();
1779   unsigned rhsWords = getNumWords(rhsBits);
1780   assert(rhsWords && "Performing divrem operation by zero ???");
1781 
1782   // Check the degenerate cases
1783   if (lhsWords == 0) {
1784     Quotient = APInt(BitWidth, 0);    // 0 / Y ===> 0
1785     Remainder = APInt(BitWidth, 0);   // 0 % Y ===> 0
1786     return;
1787   }
1788 
1789   if (rhsBits == 1) {
1790     Quotient = LHS;                   // X / 1 ===> X
1791     Remainder = APInt(BitWidth, 0);   // X % 1 ===> 0
1792   }
1793 
1794   if (lhsWords < rhsWords || LHS.ult(RHS)) {
1795     Remainder = LHS;                  // X % Y ===> X, iff X < Y
1796     Quotient = APInt(BitWidth, 0);    // X / Y ===> 0, iff X < Y
1797     return;
1798   }
1799 
1800   if (LHS == RHS) {
1801     Quotient  = APInt(BitWidth, 1);   // X / X ===> 1
1802     Remainder = APInt(BitWidth, 0);   // X % X ===> 0;
1803     return;
1804   }
1805 
1806   // Make sure there is enough space to hold the results.
1807   // NOTE: This assumes that reallocate won't affect any bits if it doesn't
1808   // change the size. This is necessary if Quotient or Remainder is aliased
1809   // with LHS or RHS.
1810   Quotient.reallocate(BitWidth);
1811   Remainder.reallocate(BitWidth);
1812 
1813   if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1.
1814     // There is only one word to consider so use the native versions.
1815     uint64_t lhsValue = LHS.U.pVal[0];
1816     uint64_t rhsValue = RHS.U.pVal[0];
1817     Quotient = lhsValue / rhsValue;
1818     Remainder = lhsValue % rhsValue;
1819     return;
1820   }
1821 
1822   // Okay, lets do it the long way
1823   divide(LHS.U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal,
1824          Remainder.U.pVal);
1825   // Clear the rest of the Quotient and Remainder.
1826   std::memset(Quotient.U.pVal + lhsWords, 0,
1827               (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE);
1828   std::memset(Remainder.U.pVal + rhsWords, 0,
1829               (getNumWords(BitWidth) - rhsWords) * APINT_WORD_SIZE);
1830 }
1831 
1832 void APInt::udivrem(const APInt &LHS, uint64_t RHS, APInt &Quotient,
1833                     uint64_t &Remainder) {
1834   assert(RHS != 0 && "Divide by zero?");
1835   unsigned BitWidth = LHS.BitWidth;
1836 
1837   // First, deal with the easy case
1838   if (LHS.isSingleWord()) {
1839     uint64_t QuotVal = LHS.U.VAL / RHS;
1840     Remainder = LHS.U.VAL % RHS;
1841     Quotient = APInt(BitWidth, QuotVal);
1842     return;
1843   }
1844 
1845   // Get some size facts about the dividend and divisor
1846   unsigned lhsWords = getNumWords(LHS.getActiveBits());
1847 
1848   // Check the degenerate cases
1849   if (lhsWords == 0) {
1850     Quotient = APInt(BitWidth, 0);    // 0 / Y ===> 0
1851     Remainder = 0;                    // 0 % Y ===> 0
1852     return;
1853   }
1854 
1855   if (RHS == 1) {
1856     Quotient = LHS;                   // X / 1 ===> X
1857     Remainder = 0;                    // X % 1 ===> 0
1858     return;
1859   }
1860 
1861   if (LHS.ult(RHS)) {
1862     Remainder = LHS.getZExtValue();   // X % Y ===> X, iff X < Y
1863     Quotient = APInt(BitWidth, 0);    // X / Y ===> 0, iff X < Y
1864     return;
1865   }
1866 
1867   if (LHS == RHS) {
1868     Quotient  = APInt(BitWidth, 1);   // X / X ===> 1
1869     Remainder = 0;                    // X % X ===> 0;
1870     return;
1871   }
1872 
1873   // Make sure there is enough space to hold the results.
1874   // NOTE: This assumes that reallocate won't affect any bits if it doesn't
1875   // change the size. This is necessary if Quotient is aliased with LHS.
1876   Quotient.reallocate(BitWidth);
1877 
1878   if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1.
1879     // There is only one word to consider so use the native versions.
1880     uint64_t lhsValue = LHS.U.pVal[0];
1881     Quotient = lhsValue / RHS;
1882     Remainder = lhsValue % RHS;
1883     return;
1884   }
1885 
1886   // Okay, lets do it the long way
1887   divide(LHS.U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, &Remainder);
1888   // Clear the rest of the Quotient.
1889   std::memset(Quotient.U.pVal + lhsWords, 0,
1890               (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE);
1891 }
1892 
1893 void APInt::sdivrem(const APInt &LHS, const APInt &RHS,
1894                     APInt &Quotient, APInt &Remainder) {
1895   if (LHS.isNegative()) {
1896     if (RHS.isNegative())
1897       APInt::udivrem(-LHS, -RHS, Quotient, Remainder);
1898     else {
1899       APInt::udivrem(-LHS, RHS, Quotient, Remainder);
1900       Quotient.negate();
1901     }
1902     Remainder.negate();
1903   } else if (RHS.isNegative()) {
1904     APInt::udivrem(LHS, -RHS, Quotient, Remainder);
1905     Quotient.negate();
1906   } else {
1907     APInt::udivrem(LHS, RHS, Quotient, Remainder);
1908   }
1909 }
1910 
1911 void APInt::sdivrem(const APInt &LHS, int64_t RHS,
1912                     APInt &Quotient, int64_t &Remainder) {
1913   uint64_t R = Remainder;
1914   if (LHS.isNegative()) {
1915     if (RHS < 0)
1916       APInt::udivrem(-LHS, -RHS, Quotient, R);
1917     else {
1918       APInt::udivrem(-LHS, RHS, Quotient, R);
1919       Quotient.negate();
1920     }
1921     R = -R;
1922   } else if (RHS < 0) {
1923     APInt::udivrem(LHS, -RHS, Quotient, R);
1924     Quotient.negate();
1925   } else {
1926     APInt::udivrem(LHS, RHS, Quotient, R);
1927   }
1928   Remainder = R;
1929 }
1930 
1931 APInt APInt::sadd_ov(const APInt &RHS, bool &Overflow) const {
1932   APInt Res = *this+RHS;
1933   Overflow = isNonNegative() == RHS.isNonNegative() &&
1934              Res.isNonNegative() != isNonNegative();
1935   return Res;
1936 }
1937 
1938 APInt APInt::uadd_ov(const APInt &RHS, bool &Overflow) const {
1939   APInt Res = *this+RHS;
1940   Overflow = Res.ult(RHS);
1941   return Res;
1942 }
1943 
1944 APInt APInt::ssub_ov(const APInt &RHS, bool &Overflow) const {
1945   APInt Res = *this - RHS;
1946   Overflow = isNonNegative() != RHS.isNonNegative() &&
1947              Res.isNonNegative() != isNonNegative();
1948   return Res;
1949 }
1950 
1951 APInt APInt::usub_ov(const APInt &RHS, bool &Overflow) const {
1952   APInt Res = *this-RHS;
1953   Overflow = Res.ugt(*this);
1954   return Res;
1955 }
1956 
1957 APInt APInt::sdiv_ov(const APInt &RHS, bool &Overflow) const {
1958   // MININT/-1  -->  overflow.
1959   Overflow = isMinSignedValue() && RHS.isAllOnesValue();
1960   return sdiv(RHS);
1961 }
1962 
1963 APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const {
1964   APInt Res = *this * RHS;
1965 
1966   if (*this != 0 && RHS != 0)
1967     Overflow = Res.sdiv(RHS) != *this || Res.sdiv(*this) != RHS;
1968   else
1969     Overflow = false;
1970   return Res;
1971 }
1972 
1973 APInt APInt::umul_ov(const APInt &RHS, bool &Overflow) const {
1974   if (countLeadingZeros() + RHS.countLeadingZeros() + 2 <= BitWidth) {
1975     Overflow = true;
1976     return *this * RHS;
1977   }
1978 
1979   APInt Res = lshr(1) * RHS;
1980   Overflow = Res.isNegative();
1981   Res <<= 1;
1982   if ((*this)[0]) {
1983     Res += RHS;
1984     if (Res.ult(RHS))
1985       Overflow = true;
1986   }
1987   return Res;
1988 }
1989 
1990 APInt APInt::sshl_ov(const APInt &ShAmt, bool &Overflow) const {
1991   Overflow = ShAmt.uge(getBitWidth());
1992   if (Overflow)
1993     return APInt(BitWidth, 0);
1994 
1995   if (isNonNegative()) // Don't allow sign change.
1996     Overflow = ShAmt.uge(countLeadingZeros());
1997   else
1998     Overflow = ShAmt.uge(countLeadingOnes());
1999 
2000   return *this << ShAmt;
2001 }
2002 
2003 APInt APInt::ushl_ov(const APInt &ShAmt, bool &Overflow) const {
2004   Overflow = ShAmt.uge(getBitWidth());
2005   if (Overflow)
2006     return APInt(BitWidth, 0);
2007 
2008   Overflow = ShAmt.ugt(countLeadingZeros());
2009 
2010   return *this << ShAmt;
2011 }
2012 
2013 APInt APInt::sadd_sat(const APInt &RHS) const {
2014   bool Overflow;
2015   APInt Res = sadd_ov(RHS, Overflow);
2016   if (!Overflow)
2017     return Res;
2018 
2019   return isNegative() ? APInt::getSignedMinValue(BitWidth)
2020                       : APInt::getSignedMaxValue(BitWidth);
2021 }
2022 
2023 APInt APInt::uadd_sat(const APInt &RHS) const {
2024   bool Overflow;
2025   APInt Res = uadd_ov(RHS, Overflow);
2026   if (!Overflow)
2027     return Res;
2028 
2029   return APInt::getMaxValue(BitWidth);
2030 }
2031 
2032 APInt APInt::ssub_sat(const APInt &RHS) const {
2033   bool Overflow;
2034   APInt Res = ssub_ov(RHS, Overflow);
2035   if (!Overflow)
2036     return Res;
2037 
2038   return isNegative() ? APInt::getSignedMinValue(BitWidth)
2039                       : APInt::getSignedMaxValue(BitWidth);
2040 }
2041 
2042 APInt APInt::usub_sat(const APInt &RHS) const {
2043   bool Overflow;
2044   APInt Res = usub_ov(RHS, Overflow);
2045   if (!Overflow)
2046     return Res;
2047 
2048   return APInt(BitWidth, 0);
2049 }
2050 
2051 
2052 void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
2053   // Check our assumptions here
2054   assert(!str.empty() && "Invalid string length");
2055   assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
2056           radix == 36) &&
2057          "Radix should be 2, 8, 10, 16, or 36!");
2058 
2059   StringRef::iterator p = str.begin();
2060   size_t slen = str.size();
2061   bool isNeg = *p == '-';
2062   if (*p == '-' || *p == '+') {
2063     p++;
2064     slen--;
2065     assert(slen && "String is only a sign, needs a value.");
2066   }
2067   assert((slen <= numbits || radix != 2) && "Insufficient bit width");
2068   assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width");
2069   assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width");
2070   assert((((slen-1)*64)/22 <= numbits || radix != 10) &&
2071          "Insufficient bit width");
2072 
2073   // Allocate memory if needed
2074   if (isSingleWord())
2075     U.VAL = 0;
2076   else
2077     U.pVal = getClearedMemory(getNumWords());
2078 
2079   // Figure out if we can shift instead of multiply
2080   unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0);
2081 
2082   // Enter digit traversal loop
2083   for (StringRef::iterator e = str.end(); p != e; ++p) {
2084     unsigned digit = getDigit(*p, radix);
2085     assert(digit < radix && "Invalid character in digit string");
2086 
2087     // Shift or multiply the value by the radix
2088     if (slen > 1) {
2089       if (shift)
2090         *this <<= shift;
2091       else
2092         *this *= radix;
2093     }
2094 
2095     // Add in the digit we just interpreted
2096     *this += digit;
2097   }
2098   // If its negative, put it in two's complement form
2099   if (isNeg)
2100     this->negate();
2101 }
2102 
2103 void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
2104                      bool Signed, bool formatAsCLiteral) const {
2105   assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2 ||
2106           Radix == 36) &&
2107          "Radix should be 2, 8, 10, 16, or 36!");
2108 
2109   const char *Prefix = "";
2110   if (formatAsCLiteral) {
2111     switch (Radix) {
2112       case 2:
2113         // Binary literals are a non-standard extension added in gcc 4.3:
2114         // http://gcc.gnu.org/onlinedocs/gcc-4.3.0/gcc/Binary-constants.html
2115         Prefix = "0b";
2116         break;
2117       case 8:
2118         Prefix = "0";
2119         break;
2120       case 10:
2121         break; // No prefix
2122       case 16:
2123         Prefix = "0x";
2124         break;
2125       default:
2126         llvm_unreachable("Invalid radix!");
2127     }
2128   }
2129 
2130   // First, check for a zero value and just short circuit the logic below.
2131   if (*this == 0) {
2132     while (*Prefix) {
2133       Str.push_back(*Prefix);
2134       ++Prefix;
2135     };
2136     Str.push_back('0');
2137     return;
2138   }
2139 
2140   static const char Digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
2141 
2142   if (isSingleWord()) {
2143     char Buffer[65];
2144     char *BufPtr = std::end(Buffer);
2145 
2146     uint64_t N;
2147     if (!Signed) {
2148       N = getZExtValue();
2149     } else {
2150       int64_t I = getSExtValue();
2151       if (I >= 0) {
2152         N = I;
2153       } else {
2154         Str.push_back('-');
2155         N = -(uint64_t)I;
2156       }
2157     }
2158 
2159     while (*Prefix) {
2160       Str.push_back(*Prefix);
2161       ++Prefix;
2162     };
2163 
2164     while (N) {
2165       *--BufPtr = Digits[N % Radix];
2166       N /= Radix;
2167     }
2168     Str.append(BufPtr, std::end(Buffer));
2169     return;
2170   }
2171 
2172   APInt Tmp(*this);
2173 
2174   if (Signed && isNegative()) {
2175     // They want to print the signed version and it is a negative value
2176     // Flip the bits and add one to turn it into the equivalent positive
2177     // value and put a '-' in the result.
2178     Tmp.negate();
2179     Str.push_back('-');
2180   }
2181 
2182   while (*Prefix) {
2183     Str.push_back(*Prefix);
2184     ++Prefix;
2185   };
2186 
2187   // We insert the digits backward, then reverse them to get the right order.
2188   unsigned StartDig = Str.size();
2189 
2190   // For the 2, 8 and 16 bit cases, we can just shift instead of divide
2191   // because the number of bits per digit (1, 3 and 4 respectively) divides
2192   // equally.  We just shift until the value is zero.
2193   if (Radix == 2 || Radix == 8 || Radix == 16) {
2194     // Just shift tmp right for each digit width until it becomes zero
2195     unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1));
2196     unsigned MaskAmt = Radix - 1;
2197 
2198     while (Tmp.getBoolValue()) {
2199       unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt;
2200       Str.push_back(Digits[Digit]);
2201       Tmp.lshrInPlace(ShiftAmt);
2202     }
2203   } else {
2204     while (Tmp.getBoolValue()) {
2205       uint64_t Digit;
2206       udivrem(Tmp, Radix, Tmp, Digit);
2207       assert(Digit < Radix && "divide failed");
2208       Str.push_back(Digits[Digit]);
2209     }
2210   }
2211 
2212   // Reverse the digits before returning.
2213   std::reverse(Str.begin()+StartDig, Str.end());
2214 }
2215 
2216 /// Returns the APInt as a std::string. Note that this is an inefficient method.
2217 /// It is better to pass in a SmallVector/SmallString to the methods above.
2218 std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const {
2219   SmallString<40> S;
2220   toString(S, Radix, Signed, /* formatAsCLiteral = */false);
2221   return S.str();
2222 }
2223 
2224 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2225 LLVM_DUMP_METHOD void APInt::dump() const {
2226   SmallString<40> S, U;
2227   this->toStringUnsigned(U);
2228   this->toStringSigned(S);
2229   dbgs() << "APInt(" << BitWidth << "b, "
2230          << U << "u " << S << "s)\n";
2231 }
2232 #endif
2233 
2234 void APInt::print(raw_ostream &OS, bool isSigned) const {
2235   SmallString<40> S;
2236   this->toString(S, 10, isSigned, /* formatAsCLiteral = */false);
2237   OS << S;
2238 }
2239 
2240 // This implements a variety of operations on a representation of
2241 // arbitrary precision, two's-complement, bignum integer values.
2242 
2243 // Assumed by lowHalf, highHalf, partMSB and partLSB.  A fairly safe
2244 // and unrestricting assumption.
2245 static_assert(APInt::APINT_BITS_PER_WORD % 2 == 0,
2246               "Part width must be divisible by 2!");
2247 
2248 /* Some handy functions local to this file.  */
2249 
2250 /* Returns the integer part with the least significant BITS set.
2251    BITS cannot be zero.  */
2252 static inline APInt::WordType lowBitMask(unsigned bits) {
2253   assert(bits != 0 && bits <= APInt::APINT_BITS_PER_WORD);
2254 
2255   return ~(APInt::WordType) 0 >> (APInt::APINT_BITS_PER_WORD - bits);
2256 }
2257 
2258 /* Returns the value of the lower half of PART.  */
2259 static inline APInt::WordType lowHalf(APInt::WordType part) {
2260   return part & lowBitMask(APInt::APINT_BITS_PER_WORD / 2);
2261 }
2262 
2263 /* Returns the value of the upper half of PART.  */
2264 static inline APInt::WordType highHalf(APInt::WordType part) {
2265   return part >> (APInt::APINT_BITS_PER_WORD / 2);
2266 }
2267 
2268 /* Returns the bit number of the most significant set bit of a part.
2269    If the input number has no bits set -1U is returned.  */
2270 static unsigned partMSB(APInt::WordType value) {
2271   return findLastSet(value, ZB_Max);
2272 }
2273 
2274 /* Returns the bit number of the least significant set bit of a
2275    part.  If the input number has no bits set -1U is returned.  */
2276 static unsigned partLSB(APInt::WordType value) {
2277   return findFirstSet(value, ZB_Max);
2278 }
2279 
2280 /* Sets the least significant part of a bignum to the input value, and
2281    zeroes out higher parts.  */
2282 void APInt::tcSet(WordType *dst, WordType part, unsigned parts) {
2283   assert(parts > 0);
2284 
2285   dst[0] = part;
2286   for (unsigned i = 1; i < parts; i++)
2287     dst[i] = 0;
2288 }
2289 
2290 /* Assign one bignum to another.  */
2291 void APInt::tcAssign(WordType *dst, const WordType *src, unsigned parts) {
2292   for (unsigned i = 0; i < parts; i++)
2293     dst[i] = src[i];
2294 }
2295 
2296 /* Returns true if a bignum is zero, false otherwise.  */
2297 bool APInt::tcIsZero(const WordType *src, unsigned parts) {
2298   for (unsigned i = 0; i < parts; i++)
2299     if (src[i])
2300       return false;
2301 
2302   return true;
2303 }
2304 
2305 /* Extract the given bit of a bignum; returns 0 or 1.  */
2306 int APInt::tcExtractBit(const WordType *parts, unsigned bit) {
2307   return (parts[whichWord(bit)] & maskBit(bit)) != 0;
2308 }
2309 
2310 /* Set the given bit of a bignum. */
2311 void APInt::tcSetBit(WordType *parts, unsigned bit) {
2312   parts[whichWord(bit)] |= maskBit(bit);
2313 }
2314 
2315 /* Clears the given bit of a bignum. */
2316 void APInt::tcClearBit(WordType *parts, unsigned bit) {
2317   parts[whichWord(bit)] &= ~maskBit(bit);
2318 }
2319 
2320 /* Returns the bit number of the least significant set bit of a
2321    number.  If the input number has no bits set -1U is returned.  */
2322 unsigned APInt::tcLSB(const WordType *parts, unsigned n) {
2323   for (unsigned i = 0; i < n; i++) {
2324     if (parts[i] != 0) {
2325       unsigned lsb = partLSB(parts[i]);
2326 
2327       return lsb + i * APINT_BITS_PER_WORD;
2328     }
2329   }
2330 
2331   return -1U;
2332 }
2333 
2334 /* Returns the bit number of the most significant set bit of a number.
2335    If the input number has no bits set -1U is returned.  */
2336 unsigned APInt::tcMSB(const WordType *parts, unsigned n) {
2337   do {
2338     --n;
2339 
2340     if (parts[n] != 0) {
2341       unsigned msb = partMSB(parts[n]);
2342 
2343       return msb + n * APINT_BITS_PER_WORD;
2344     }
2345   } while (n);
2346 
2347   return -1U;
2348 }
2349 
2350 /* Copy the bit vector of width srcBITS from SRC, starting at bit
2351    srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB becomes
2352    the least significant bit of DST.  All high bits above srcBITS in
2353    DST are zero-filled.  */
2354 void
2355 APInt::tcExtract(WordType *dst, unsigned dstCount, const WordType *src,
2356                  unsigned srcBits, unsigned srcLSB) {
2357   unsigned dstParts = (srcBits + APINT_BITS_PER_WORD - 1) / APINT_BITS_PER_WORD;
2358   assert(dstParts <= dstCount);
2359 
2360   unsigned firstSrcPart = srcLSB / APINT_BITS_PER_WORD;
2361   tcAssign (dst, src + firstSrcPart, dstParts);
2362 
2363   unsigned shift = srcLSB % APINT_BITS_PER_WORD;
2364   tcShiftRight (dst, dstParts, shift);
2365 
2366   /* We now have (dstParts * APINT_BITS_PER_WORD - shift) bits from SRC
2367      in DST.  If this is less that srcBits, append the rest, else
2368      clear the high bits.  */
2369   unsigned n = dstParts * APINT_BITS_PER_WORD - shift;
2370   if (n < srcBits) {
2371     WordType mask = lowBitMask (srcBits - n);
2372     dst[dstParts - 1] |= ((src[firstSrcPart + dstParts] & mask)
2373                           << n % APINT_BITS_PER_WORD);
2374   } else if (n > srcBits) {
2375     if (srcBits % APINT_BITS_PER_WORD)
2376       dst[dstParts - 1] &= lowBitMask (srcBits % APINT_BITS_PER_WORD);
2377   }
2378 
2379   /* Clear high parts.  */
2380   while (dstParts < dstCount)
2381     dst[dstParts++] = 0;
2382 }
2383 
2384 /* DST += RHS + C where C is zero or one.  Returns the carry flag.  */
2385 APInt::WordType APInt::tcAdd(WordType *dst, const WordType *rhs,
2386                              WordType c, unsigned parts) {
2387   assert(c <= 1);
2388 
2389   for (unsigned i = 0; i < parts; i++) {
2390     WordType l = dst[i];
2391     if (c) {
2392       dst[i] += rhs[i] + 1;
2393       c = (dst[i] <= l);
2394     } else {
2395       dst[i] += rhs[i];
2396       c = (dst[i] < l);
2397     }
2398   }
2399 
2400   return c;
2401 }
2402 
2403 /// This function adds a single "word" integer, src, to the multiple
2404 /// "word" integer array, dst[]. dst[] is modified to reflect the addition and
2405 /// 1 is returned if there is a carry out, otherwise 0 is returned.
2406 /// @returns the carry of the addition.
2407 APInt::WordType APInt::tcAddPart(WordType *dst, WordType src,
2408                                  unsigned parts) {
2409   for (unsigned i = 0; i < parts; ++i) {
2410     dst[i] += src;
2411     if (dst[i] >= src)
2412       return 0; // No need to carry so exit early.
2413     src = 1; // Carry one to next digit.
2414   }
2415 
2416   return 1;
2417 }
2418 
2419 /* DST -= RHS + C where C is zero or one.  Returns the carry flag.  */
2420 APInt::WordType APInt::tcSubtract(WordType *dst, const WordType *rhs,
2421                                   WordType c, unsigned parts) {
2422   assert(c <= 1);
2423 
2424   for (unsigned i = 0; i < parts; i++) {
2425     WordType l = dst[i];
2426     if (c) {
2427       dst[i] -= rhs[i] + 1;
2428       c = (dst[i] >= l);
2429     } else {
2430       dst[i] -= rhs[i];
2431       c = (dst[i] > l);
2432     }
2433   }
2434 
2435   return c;
2436 }
2437 
2438 /// This function subtracts a single "word" (64-bit word), src, from
2439 /// the multi-word integer array, dst[], propagating the borrowed 1 value until
2440 /// no further borrowing is needed or it runs out of "words" in dst.  The result
2441 /// is 1 if "borrowing" exhausted the digits in dst, or 0 if dst was not
2442 /// exhausted. In other words, if src > dst then this function returns 1,
2443 /// otherwise 0.
2444 /// @returns the borrow out of the subtraction
2445 APInt::WordType APInt::tcSubtractPart(WordType *dst, WordType src,
2446                                       unsigned parts) {
2447   for (unsigned i = 0; i < parts; ++i) {
2448     WordType Dst = dst[i];
2449     dst[i] -= src;
2450     if (src <= Dst)
2451       return 0; // No need to borrow so exit early.
2452     src = 1; // We have to "borrow 1" from next "word"
2453   }
2454 
2455   return 1;
2456 }
2457 
2458 /* Negate a bignum in-place.  */
2459 void APInt::tcNegate(WordType *dst, unsigned parts) {
2460   tcComplement(dst, parts);
2461   tcIncrement(dst, parts);
2462 }
2463 
2464 /*  DST += SRC * MULTIPLIER + CARRY   if add is true
2465     DST  = SRC * MULTIPLIER + CARRY   if add is false
2466 
2467     Requires 0 <= DSTPARTS <= SRCPARTS + 1.  If DST overlaps SRC
2468     they must start at the same point, i.e. DST == SRC.
2469 
2470     If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is
2471     returned.  Otherwise DST is filled with the least significant
2472     DSTPARTS parts of the result, and if all of the omitted higher
2473     parts were zero return zero, otherwise overflow occurred and
2474     return one.  */
2475 int APInt::tcMultiplyPart(WordType *dst, const WordType *src,
2476                           WordType multiplier, WordType carry,
2477                           unsigned srcParts, unsigned dstParts,
2478                           bool add) {
2479   /* Otherwise our writes of DST kill our later reads of SRC.  */
2480   assert(dst <= src || dst >= src + srcParts);
2481   assert(dstParts <= srcParts + 1);
2482 
2483   /* N loops; minimum of dstParts and srcParts.  */
2484   unsigned n = std::min(dstParts, srcParts);
2485 
2486   for (unsigned i = 0; i < n; i++) {
2487     WordType low, mid, high, srcPart;
2488 
2489       /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY.
2490 
2491          This cannot overflow, because
2492 
2493          (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1)
2494 
2495          which is less than n^2.  */
2496 
2497     srcPart = src[i];
2498 
2499     if (multiplier == 0 || srcPart == 0) {
2500       low = carry;
2501       high = 0;
2502     } else {
2503       low = lowHalf(srcPart) * lowHalf(multiplier);
2504       high = highHalf(srcPart) * highHalf(multiplier);
2505 
2506       mid = lowHalf(srcPart) * highHalf(multiplier);
2507       high += highHalf(mid);
2508       mid <<= APINT_BITS_PER_WORD / 2;
2509       if (low + mid < low)
2510         high++;
2511       low += mid;
2512 
2513       mid = highHalf(srcPart) * lowHalf(multiplier);
2514       high += highHalf(mid);
2515       mid <<= APINT_BITS_PER_WORD / 2;
2516       if (low + mid < low)
2517         high++;
2518       low += mid;
2519 
2520       /* Now add carry.  */
2521       if (low + carry < low)
2522         high++;
2523       low += carry;
2524     }
2525 
2526     if (add) {
2527       /* And now DST[i], and store the new low part there.  */
2528       if (low + dst[i] < low)
2529         high++;
2530       dst[i] += low;
2531     } else
2532       dst[i] = low;
2533 
2534     carry = high;
2535   }
2536 
2537   if (srcParts < dstParts) {
2538     /* Full multiplication, there is no overflow.  */
2539     assert(srcParts + 1 == dstParts);
2540     dst[srcParts] = carry;
2541     return 0;
2542   }
2543 
2544   /* We overflowed if there is carry.  */
2545   if (carry)
2546     return 1;
2547 
2548   /* We would overflow if any significant unwritten parts would be
2549      non-zero.  This is true if any remaining src parts are non-zero
2550      and the multiplier is non-zero.  */
2551   if (multiplier)
2552     for (unsigned i = dstParts; i < srcParts; i++)
2553       if (src[i])
2554         return 1;
2555 
2556   /* We fitted in the narrow destination.  */
2557   return 0;
2558 }
2559 
2560 /* DST = LHS * RHS, where DST has the same width as the operands and
2561    is filled with the least significant parts of the result.  Returns
2562    one if overflow occurred, otherwise zero.  DST must be disjoint
2563    from both operands.  */
2564 int APInt::tcMultiply(WordType *dst, const WordType *lhs,
2565                       const WordType *rhs, unsigned parts) {
2566   assert(dst != lhs && dst != rhs);
2567 
2568   int overflow = 0;
2569   tcSet(dst, 0, parts);
2570 
2571   for (unsigned i = 0; i < parts; i++)
2572     overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts,
2573                                parts - i, true);
2574 
2575   return overflow;
2576 }
2577 
2578 /// DST = LHS * RHS, where DST has width the sum of the widths of the
2579 /// operands. No overflow occurs. DST must be disjoint from both operands.
2580 void APInt::tcFullMultiply(WordType *dst, const WordType *lhs,
2581                            const WordType *rhs, unsigned lhsParts,
2582                            unsigned rhsParts) {
2583   /* Put the narrower number on the LHS for less loops below.  */
2584   if (lhsParts > rhsParts)
2585     return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts);
2586 
2587   assert(dst != lhs && dst != rhs);
2588 
2589   tcSet(dst, 0, rhsParts);
2590 
2591   for (unsigned i = 0; i < lhsParts; i++)
2592     tcMultiplyPart(&dst[i], rhs, lhs[i], 0, rhsParts, rhsParts + 1, true);
2593 }
2594 
2595 /* If RHS is zero LHS and REMAINDER are left unchanged, return one.
2596    Otherwise set LHS to LHS / RHS with the fractional part discarded,
2597    set REMAINDER to the remainder, return zero.  i.e.
2598 
2599    OLD_LHS = RHS * LHS + REMAINDER
2600 
2601    SCRATCH is a bignum of the same size as the operands and result for
2602    use by the routine; its contents need not be initialized and are
2603    destroyed.  LHS, REMAINDER and SCRATCH must be distinct.
2604 */
2605 int APInt::tcDivide(WordType *lhs, const WordType *rhs,
2606                     WordType *remainder, WordType *srhs,
2607                     unsigned parts) {
2608   assert(lhs != remainder && lhs != srhs && remainder != srhs);
2609 
2610   unsigned shiftCount = tcMSB(rhs, parts) + 1;
2611   if (shiftCount == 0)
2612     return true;
2613 
2614   shiftCount = parts * APINT_BITS_PER_WORD - shiftCount;
2615   unsigned n = shiftCount / APINT_BITS_PER_WORD;
2616   WordType mask = (WordType) 1 << (shiftCount % APINT_BITS_PER_WORD);
2617 
2618   tcAssign(srhs, rhs, parts);
2619   tcShiftLeft(srhs, parts, shiftCount);
2620   tcAssign(remainder, lhs, parts);
2621   tcSet(lhs, 0, parts);
2622 
2623   /* Loop, subtracting SRHS if REMAINDER is greater and adding that to
2624      the total.  */
2625   for (;;) {
2626     int compare = tcCompare(remainder, srhs, parts);
2627     if (compare >= 0) {
2628       tcSubtract(remainder, srhs, 0, parts);
2629       lhs[n] |= mask;
2630     }
2631 
2632     if (shiftCount == 0)
2633       break;
2634     shiftCount--;
2635     tcShiftRight(srhs, parts, 1);
2636     if ((mask >>= 1) == 0) {
2637       mask = (WordType) 1 << (APINT_BITS_PER_WORD - 1);
2638       n--;
2639     }
2640   }
2641 
2642   return false;
2643 }
2644 
2645 /// Shift a bignum left Cound bits in-place. Shifted in bits are zero. There are
2646 /// no restrictions on Count.
2647 void APInt::tcShiftLeft(WordType *Dst, unsigned Words, unsigned Count) {
2648   // Don't bother performing a no-op shift.
2649   if (!Count)
2650     return;
2651 
2652   // WordShift is the inter-part shift; BitShift is the intra-part shift.
2653   unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
2654   unsigned BitShift = Count % APINT_BITS_PER_WORD;
2655 
2656   // Fastpath for moving by whole words.
2657   if (BitShift == 0) {
2658     std::memmove(Dst + WordShift, Dst, (Words - WordShift) * APINT_WORD_SIZE);
2659   } else {
2660     while (Words-- > WordShift) {
2661       Dst[Words] = Dst[Words - WordShift] << BitShift;
2662       if (Words > WordShift)
2663         Dst[Words] |=
2664           Dst[Words - WordShift - 1] >> (APINT_BITS_PER_WORD - BitShift);
2665     }
2666   }
2667 
2668   // Fill in the remainder with 0s.
2669   std::memset(Dst, 0, WordShift * APINT_WORD_SIZE);
2670 }
2671 
2672 /// Shift a bignum right Count bits in-place. Shifted in bits are zero. There
2673 /// are no restrictions on Count.
2674 void APInt::tcShiftRight(WordType *Dst, unsigned Words, unsigned Count) {
2675   // Don't bother performing a no-op shift.
2676   if (!Count)
2677     return;
2678 
2679   // WordShift is the inter-part shift; BitShift is the intra-part shift.
2680   unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
2681   unsigned BitShift = Count % APINT_BITS_PER_WORD;
2682 
2683   unsigned WordsToMove = Words - WordShift;
2684   // Fastpath for moving by whole words.
2685   if (BitShift == 0) {
2686     std::memmove(Dst, Dst + WordShift, WordsToMove * APINT_WORD_SIZE);
2687   } else {
2688     for (unsigned i = 0; i != WordsToMove; ++i) {
2689       Dst[i] = Dst[i + WordShift] >> BitShift;
2690       if (i + 1 != WordsToMove)
2691         Dst[i] |= Dst[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift);
2692     }
2693   }
2694 
2695   // Fill in the remainder with 0s.
2696   std::memset(Dst + WordsToMove, 0, WordShift * APINT_WORD_SIZE);
2697 }
2698 
2699 /* Bitwise and of two bignums.  */
2700 void APInt::tcAnd(WordType *dst, const WordType *rhs, unsigned parts) {
2701   for (unsigned i = 0; i < parts; i++)
2702     dst[i] &= rhs[i];
2703 }
2704 
2705 /* Bitwise inclusive or of two bignums.  */
2706 void APInt::tcOr(WordType *dst, const WordType *rhs, unsigned parts) {
2707   for (unsigned i = 0; i < parts; i++)
2708     dst[i] |= rhs[i];
2709 }
2710 
2711 /* Bitwise exclusive or of two bignums.  */
2712 void APInt::tcXor(WordType *dst, const WordType *rhs, unsigned parts) {
2713   for (unsigned i = 0; i < parts; i++)
2714     dst[i] ^= rhs[i];
2715 }
2716 
2717 /* Complement a bignum in-place.  */
2718 void APInt::tcComplement(WordType *dst, unsigned parts) {
2719   for (unsigned i = 0; i < parts; i++)
2720     dst[i] = ~dst[i];
2721 }
2722 
2723 /* Comparison (unsigned) of two bignums.  */
2724 int APInt::tcCompare(const WordType *lhs, const WordType *rhs,
2725                      unsigned parts) {
2726   while (parts) {
2727     parts--;
2728     if (lhs[parts] != rhs[parts])
2729       return (lhs[parts] > rhs[parts]) ? 1 : -1;
2730   }
2731 
2732   return 0;
2733 }
2734 
2735 /* Set the least significant BITS bits of a bignum, clear the
2736    rest.  */
2737 void APInt::tcSetLeastSignificantBits(WordType *dst, unsigned parts,
2738                                       unsigned bits) {
2739   unsigned i = 0;
2740   while (bits > APINT_BITS_PER_WORD) {
2741     dst[i++] = ~(WordType) 0;
2742     bits -= APINT_BITS_PER_WORD;
2743   }
2744 
2745   if (bits)
2746     dst[i++] = ~(WordType) 0 >> (APINT_BITS_PER_WORD - bits);
2747 
2748   while (i < parts)
2749     dst[i++] = 0;
2750 }
2751 
2752 APInt llvm::APIntOps::RoundingUDiv(const APInt &A, const APInt &B,
2753                                    APInt::Rounding RM) {
2754   // Currently udivrem always rounds down.
2755   switch (RM) {
2756   case APInt::Rounding::DOWN:
2757   case APInt::Rounding::TOWARD_ZERO:
2758     return A.udiv(B);
2759   case APInt::Rounding::UP: {
2760     APInt Quo, Rem;
2761     APInt::udivrem(A, B, Quo, Rem);
2762     if (Rem == 0)
2763       return Quo;
2764     return Quo + 1;
2765   }
2766   }
2767   llvm_unreachable("Unknown APInt::Rounding enum");
2768 }
2769 
2770 APInt llvm::APIntOps::RoundingSDiv(const APInt &A, const APInt &B,
2771                                    APInt::Rounding RM) {
2772   switch (RM) {
2773   case APInt::Rounding::DOWN:
2774   case APInt::Rounding::UP: {
2775     APInt Quo, Rem;
2776     APInt::sdivrem(A, B, Quo, Rem);
2777     if (Rem == 0)
2778       return Quo;
2779     // This algorithm deals with arbitrary rounding mode used by sdivrem.
2780     // We want to check whether the non-integer part of the mathematical value
2781     // is negative or not. If the non-integer part is negative, we need to round
2782     // down from Quo; otherwise, if it's positive or 0, we return Quo, as it's
2783     // already rounded down.
2784     if (RM == APInt::Rounding::DOWN) {
2785       if (Rem.isNegative() != B.isNegative())
2786         return Quo - 1;
2787       return Quo;
2788     }
2789     if (Rem.isNegative() != B.isNegative())
2790       return Quo;
2791     return Quo + 1;
2792   }
2793   // Currently sdiv rounds twards zero.
2794   case APInt::Rounding::TOWARD_ZERO:
2795     return A.sdiv(B);
2796   }
2797   llvm_unreachable("Unknown APInt::Rounding enum");
2798 }
2799 
2800 Optional<APInt>
2801 llvm::APIntOps::SolveQuadraticEquationWrap(APInt A, APInt B, APInt C,
2802                                            unsigned RangeWidth) {
2803   unsigned CoeffWidth = A.getBitWidth();
2804   assert(CoeffWidth == B.getBitWidth() && CoeffWidth == C.getBitWidth());
2805   assert(RangeWidth <= CoeffWidth &&
2806          "Value range width should be less than coefficient width");
2807   assert(RangeWidth > 1 && "Value range bit width should be > 1");
2808 
2809   LLVM_DEBUG(dbgs() << __func__ << ": solving " << A << "x^2 + " << B
2810                     << "x + " << C << ", rw:" << RangeWidth << '\n');
2811 
2812   // Identify 0 as a (non)solution immediately.
2813   if (C.sextOrTrunc(RangeWidth).isNullValue() ) {
2814     LLVM_DEBUG(dbgs() << __func__ << ": zero solution\n");
2815     return APInt(CoeffWidth, 0);
2816   }
2817 
2818   // The result of APInt arithmetic has the same bit width as the operands,
2819   // so it can actually lose high bits. A product of two n-bit integers needs
2820   // 2n-1 bits to represent the full value.
2821   // The operation done below (on quadratic coefficients) that can produce
2822   // the largest value is the evaluation of the equation during bisection,
2823   // which needs 3 times the bitwidth of the coefficient, so the total number
2824   // of required bits is 3n.
2825   //
2826   // The purpose of this extension is to simulate the set Z of all integers,
2827   // where n+1 > n for all n in Z. In Z it makes sense to talk about positive
2828   // and negative numbers (not so much in a modulo arithmetic). The method
2829   // used to solve the equation is based on the standard formula for real
2830   // numbers, and uses the concepts of "positive" and "negative" with their
2831   // usual meanings.
2832   CoeffWidth *= 3;
2833   A = A.sext(CoeffWidth);
2834   B = B.sext(CoeffWidth);
2835   C = C.sext(CoeffWidth);
2836 
2837   // Make A > 0 for simplicity. Negate cannot overflow at this point because
2838   // the bit width has increased.
2839   if (A.isNegative()) {
2840     A.negate();
2841     B.negate();
2842     C.negate();
2843   }
2844 
2845   // Solving an equation q(x) = 0 with coefficients in modular arithmetic
2846   // is really solving a set of equations q(x) = kR for k = 0, 1, 2, ...,
2847   // and R = 2^BitWidth.
2848   // Since we're trying not only to find exact solutions, but also values
2849   // that "wrap around", such a set will always have a solution, i.e. an x
2850   // that satisfies at least one of the equations, or such that |q(x)|
2851   // exceeds kR, while |q(x-1)| for the same k does not.
2852   //
2853   // We need to find a value k, such that Ax^2 + Bx + C = kR will have a
2854   // positive solution n (in the above sense), and also such that the n
2855   // will be the least among all solutions corresponding to k = 0, 1, ...
2856   // (more precisely, the least element in the set
2857   //   { n(k) | k is such that a solution n(k) exists }).
2858   //
2859   // Consider the parabola (over real numbers) that corresponds to the
2860   // quadratic equation. Since A > 0, the arms of the parabola will point
2861   // up. Picking different values of k will shift it up and down by R.
2862   //
2863   // We want to shift the parabola in such a way as to reduce the problem
2864   // of solving q(x) = kR to solving shifted_q(x) = 0.
2865   // (The interesting solutions are the ceilings of the real number
2866   // solutions.)
2867   APInt R = APInt::getOneBitSet(CoeffWidth, RangeWidth);
2868   APInt TwoA = 2 * A;
2869   APInt SqrB = B * B;
2870   bool PickLow;
2871 
2872   auto RoundUp = [] (const APInt &V, const APInt &A) -> APInt {
2873     assert(A.isStrictlyPositive());
2874     APInt T = V.abs().urem(A);
2875     if (T.isNullValue())
2876       return V;
2877     return V.isNegative() ? V+T : V+(A-T);
2878   };
2879 
2880   // The vertex of the parabola is at -B/2A, but since A > 0, it's negative
2881   // iff B is positive.
2882   if (B.isNonNegative()) {
2883     // If B >= 0, the vertex it at a negative location (or at 0), so in
2884     // order to have a non-negative solution we need to pick k that makes
2885     // C-kR negative. To satisfy all the requirements for the solution
2886     // that we are looking for, it needs to be closest to 0 of all k.
2887     C = C.srem(R);
2888     if (C.isStrictlyPositive())
2889       C -= R;
2890     // Pick the greater solution.
2891     PickLow = false;
2892   } else {
2893     // If B < 0, the vertex is at a positive location. For any solution
2894     // to exist, the discriminant must be non-negative. This means that
2895     // C-kR <= B^2/4A is a necessary condition for k, i.e. there is a
2896     // lower bound on values of k: kR >= C - B^2/4A.
2897     APInt LowkR = C - SqrB.udiv(2*TwoA); // udiv because all values > 0.
2898     // Round LowkR up (towards +inf) to the nearest kR.
2899     LowkR = RoundUp(LowkR, R);
2900 
2901     // If there exists k meeting the condition above, and such that
2902     // C-kR > 0, there will be two positive real number solutions of
2903     // q(x) = kR. Out of all such values of k, pick the one that makes
2904     // C-kR closest to 0, (i.e. pick maximum k such that C-kR > 0).
2905     // In other words, find maximum k such that LowkR <= kR < C.
2906     if (C.sgt(LowkR)) {
2907       // If LowkR < C, then such a k is guaranteed to exist because
2908       // LowkR itself is a multiple of R.
2909       C -= -RoundUp(-C, R);      // C = C - RoundDown(C, R)
2910       // Pick the smaller solution.
2911       PickLow = true;
2912     } else {
2913       // If C-kR < 0 for all potential k's, it means that one solution
2914       // will be negative, while the other will be positive. The positive
2915       // solution will shift towards 0 if the parabola is moved up.
2916       // Pick the kR closest to the lower bound (i.e. make C-kR closest
2917       // to 0, or in other words, out of all parabolas that have solutions,
2918       // pick the one that is the farthest "up").
2919       // Since LowkR is itself a multiple of R, simply take C-LowkR.
2920       C -= LowkR;
2921       // Pick the greater solution.
2922       PickLow = false;
2923     }
2924   }
2925 
2926   LLVM_DEBUG(dbgs() << __func__ << ": updated coefficients " << A << "x^2 + "
2927                     << B << "x + " << C << ", rw:" << RangeWidth << '\n');
2928 
2929   APInt D = SqrB - 4*A*C;
2930   assert(D.isNonNegative() && "Negative discriminant");
2931   APInt SQ = D.sqrt();
2932 
2933   APInt Q = SQ * SQ;
2934   bool InexactSQ = Q != D;
2935   // The calculated SQ may actually be greater than the exact (non-integer)
2936   // value. If that's the case, decremement SQ to get a value that is lower.
2937   if (Q.sgt(D))
2938     SQ -= 1;
2939 
2940   APInt X;
2941   APInt Rem;
2942 
2943   // SQ is rounded down (i.e SQ * SQ <= D), so the roots may be inexact.
2944   // When using the quadratic formula directly, the calculated low root
2945   // may be greater than the exact one, since we would be subtracting SQ.
2946   // To make sure that the calculated root is not greater than the exact
2947   // one, subtract SQ+1 when calculating the low root (for inexact value
2948   // of SQ).
2949   if (PickLow)
2950     APInt::sdivrem(-B - (SQ+InexactSQ), TwoA, X, Rem);
2951   else
2952     APInt::sdivrem(-B + SQ, TwoA, X, Rem);
2953 
2954   // The updated coefficients should be such that the (exact) solution is
2955   // positive. Since APInt division rounds towards 0, the calculated one
2956   // can be 0, but cannot be negative.
2957   assert(X.isNonNegative() && "Solution should be non-negative");
2958 
2959   if (!InexactSQ && Rem.isNullValue()) {
2960     LLVM_DEBUG(dbgs() << __func__ << ": solution (root): " << X << '\n');
2961     return X;
2962   }
2963 
2964   assert((SQ*SQ).sle(D) && "SQ = |_sqrt(D)_|, so SQ*SQ <= D");
2965   // The exact value of the square root of D should be between SQ and SQ+1.
2966   // This implies that the solution should be between that corresponding to
2967   // SQ (i.e. X) and that corresponding to SQ+1.
2968   //
2969   // The calculated X cannot be greater than the exact (real) solution.
2970   // Actually it must be strictly less than the exact solution, while
2971   // X+1 will be greater than or equal to it.
2972 
2973   APInt VX = (A*X + B)*X + C;
2974   APInt VY = VX + TwoA*X + A + B;
2975   bool SignChange = VX.isNegative() != VY.isNegative() ||
2976                     VX.isNullValue() != VY.isNullValue();
2977   // If the sign did not change between X and X+1, X is not a valid solution.
2978   // This could happen when the actual (exact) roots don't have an integer
2979   // between them, so they would both be contained between X and X+1.
2980   if (!SignChange) {
2981     LLVM_DEBUG(dbgs() << __func__ << ": no valid solution\n");
2982     return None;
2983   }
2984 
2985   X += 1;
2986   LLVM_DEBUG(dbgs() << __func__ << ": solution (wrap): " << X << '\n');
2987   return X;
2988 }
2989 
2990 /// StoreIntToMemory - Fills the StoreBytes bytes of memory starting from Dst
2991 /// with the integer held in IntVal.
2992 void llvm::StoreIntToMemory(const APInt &IntVal, uint8_t *Dst,
2993                             unsigned StoreBytes) {
2994   assert((IntVal.getBitWidth()+7)/8 >= StoreBytes && "Integer too small!");
2995   const uint8_t *Src = (const uint8_t *)IntVal.getRawData();
2996 
2997   if (sys::IsLittleEndianHost) {
2998     // Little-endian host - the source is ordered from LSB to MSB.  Order the
2999     // destination from LSB to MSB: Do a straight copy.
3000     memcpy(Dst, Src, StoreBytes);
3001   } else {
3002     // Big-endian host - the source is an array of 64 bit words ordered from
3003     // LSW to MSW.  Each word is ordered from MSB to LSB.  Order the destination
3004     // from MSB to LSB: Reverse the word order, but not the bytes in a word.
3005     while (StoreBytes > sizeof(uint64_t)) {
3006       StoreBytes -= sizeof(uint64_t);
3007       // May not be aligned so use memcpy.
3008       memcpy(Dst + StoreBytes, Src, sizeof(uint64_t));
3009       Src += sizeof(uint64_t);
3010     }
3011 
3012     memcpy(Dst, Src + sizeof(uint64_t) - StoreBytes, StoreBytes);
3013   }
3014 }
3015 
3016 /// LoadIntFromMemory - Loads the integer stored in the LoadBytes bytes starting
3017 /// from Src into IntVal, which is assumed to be wide enough and to hold zero.
3018 void llvm::LoadIntFromMemory(APInt &IntVal, uint8_t *Src, unsigned LoadBytes) {
3019   assert((IntVal.getBitWidth()+7)/8 >= LoadBytes && "Integer too small!");
3020   uint8_t *Dst = reinterpret_cast<uint8_t *>(
3021                    const_cast<uint64_t *>(IntVal.getRawData()));
3022 
3023   if (sys::IsLittleEndianHost)
3024     // Little-endian host - the destination must be ordered from LSB to MSB.
3025     // The source is ordered from LSB to MSB: Do a straight copy.
3026     memcpy(Dst, Src, LoadBytes);
3027   else {
3028     // Big-endian - the destination is an array of 64 bit words ordered from
3029     // LSW to MSW.  Each word must be ordered from MSB to LSB.  The source is
3030     // ordered from MSB to LSB: Reverse the word order, but not the bytes in
3031     // a word.
3032     while (LoadBytes > sizeof(uint64_t)) {
3033       LoadBytes -= sizeof(uint64_t);
3034       // May not be aligned so use memcpy.
3035       memcpy(Dst, Src + LoadBytes, sizeof(uint64_t));
3036       Dst += sizeof(uint64_t);
3037     }
3038 
3039     memcpy(Dst + sizeof(uint64_t) - LoadBytes, Src, LoadBytes);
3040   }
3041 }
3042