xref: /freebsd/contrib/llvm-project/llvm/lib/Support/APInt.cpp (revision bc5304a006238115291e7568583632889dffbab9)
1 //===-- APInt.cpp - Implement APInt class ---------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a class to represent arbitrary precision integer
10 // constant values and provide a variety of arithmetic operations on them.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/ADT/APInt.h"
15 #include "llvm/ADT/ArrayRef.h"
16 #include "llvm/ADT/FoldingSet.h"
17 #include "llvm/ADT/Hashing.h"
18 #include "llvm/ADT/Optional.h"
19 #include "llvm/ADT/SmallString.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/ADT/bit.h"
22 #include "llvm/Config/llvm-config.h"
23 #include "llvm/Support/Debug.h"
24 #include "llvm/Support/ErrorHandling.h"
25 #include "llvm/Support/MathExtras.h"
26 #include "llvm/Support/raw_ostream.h"
27 #include <climits>
28 #include <cmath>
29 #include <cstdlib>
30 #include <cstring>
31 using namespace llvm;
32 
33 #define DEBUG_TYPE "apint"
34 
35 /// A utility function for allocating memory, checking for allocation failures,
36 /// and ensuring the contents are zeroed.
37 inline static uint64_t* getClearedMemory(unsigned numWords) {
38   uint64_t *result = new uint64_t[numWords];
39   memset(result, 0, numWords * sizeof(uint64_t));
40   return result;
41 }
42 
43 /// A utility function for allocating memory and checking for allocation
44 /// failure.  The content is not zeroed.
45 inline static uint64_t* getMemory(unsigned numWords) {
46   return new uint64_t[numWords];
47 }
48 
49 /// A utility function that converts a character to a digit.
50 inline static unsigned getDigit(char cdigit, uint8_t radix) {
51   unsigned r;
52 
53   if (radix == 16 || radix == 36) {
54     r = cdigit - '0';
55     if (r <= 9)
56       return r;
57 
58     r = cdigit - 'A';
59     if (r <= radix - 11U)
60       return r + 10;
61 
62     r = cdigit - 'a';
63     if (r <= radix - 11U)
64       return r + 10;
65 
66     radix = 10;
67   }
68 
69   r = cdigit - '0';
70   if (r < radix)
71     return r;
72 
73   return -1U;
74 }
75 
76 
77 void APInt::initSlowCase(uint64_t val, bool isSigned) {
78   U.pVal = getClearedMemory(getNumWords());
79   U.pVal[0] = val;
80   if (isSigned && int64_t(val) < 0)
81     for (unsigned i = 1; i < getNumWords(); ++i)
82       U.pVal[i] = WORDTYPE_MAX;
83   clearUnusedBits();
84 }
85 
86 void APInt::initSlowCase(const APInt& that) {
87   U.pVal = getMemory(getNumWords());
88   memcpy(U.pVal, that.U.pVal, getNumWords() * APINT_WORD_SIZE);
89 }
90 
91 void APInt::initFromArray(ArrayRef<uint64_t> bigVal) {
92   assert(BitWidth && "Bitwidth too small");
93   assert(bigVal.data() && "Null pointer detected!");
94   if (isSingleWord())
95     U.VAL = bigVal[0];
96   else {
97     // Get memory, cleared to 0
98     U.pVal = getClearedMemory(getNumWords());
99     // Calculate the number of words to copy
100     unsigned words = std::min<unsigned>(bigVal.size(), getNumWords());
101     // Copy the words from bigVal to pVal
102     memcpy(U.pVal, bigVal.data(), words * APINT_WORD_SIZE);
103   }
104   // Make sure unused high bits are cleared
105   clearUnusedBits();
106 }
107 
108 APInt::APInt(unsigned numBits, ArrayRef<uint64_t> bigVal)
109   : BitWidth(numBits) {
110   initFromArray(bigVal);
111 }
112 
113 APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[])
114   : BitWidth(numBits) {
115   initFromArray(makeArrayRef(bigVal, numWords));
116 }
117 
118 APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix)
119   : BitWidth(numbits) {
120   assert(BitWidth && "Bitwidth too small");
121   fromString(numbits, Str, radix);
122 }
123 
124 void APInt::reallocate(unsigned NewBitWidth) {
125   // If the number of words is the same we can just change the width and stop.
126   if (getNumWords() == getNumWords(NewBitWidth)) {
127     BitWidth = NewBitWidth;
128     return;
129   }
130 
131   // If we have an allocation, delete it.
132   if (!isSingleWord())
133     delete [] U.pVal;
134 
135   // Update BitWidth.
136   BitWidth = NewBitWidth;
137 
138   // If we are supposed to have an allocation, create it.
139   if (!isSingleWord())
140     U.pVal = getMemory(getNumWords());
141 }
142 
143 void APInt::AssignSlowCase(const APInt& RHS) {
144   // Don't do anything for X = X
145   if (this == &RHS)
146     return;
147 
148   // Adjust the bit width and handle allocations as necessary.
149   reallocate(RHS.getBitWidth());
150 
151   // Copy the data.
152   if (isSingleWord())
153     U.VAL = RHS.U.VAL;
154   else
155     memcpy(U.pVal, RHS.U.pVal, getNumWords() * APINT_WORD_SIZE);
156 }
157 
158 /// This method 'profiles' an APInt for use with FoldingSet.
159 void APInt::Profile(FoldingSetNodeID& ID) const {
160   ID.AddInteger(BitWidth);
161 
162   if (isSingleWord()) {
163     ID.AddInteger(U.VAL);
164     return;
165   }
166 
167   unsigned NumWords = getNumWords();
168   for (unsigned i = 0; i < NumWords; ++i)
169     ID.AddInteger(U.pVal[i]);
170 }
171 
172 /// Prefix increment operator. Increments the APInt by one.
173 APInt& APInt::operator++() {
174   if (isSingleWord())
175     ++U.VAL;
176   else
177     tcIncrement(U.pVal, getNumWords());
178   return clearUnusedBits();
179 }
180 
181 /// Prefix decrement operator. Decrements the APInt by one.
182 APInt& APInt::operator--() {
183   if (isSingleWord())
184     --U.VAL;
185   else
186     tcDecrement(U.pVal, getNumWords());
187   return clearUnusedBits();
188 }
189 
190 /// Adds the RHS APInt to this APInt.
191 /// @returns this, after addition of RHS.
192 /// Addition assignment operator.
193 APInt& APInt::operator+=(const APInt& RHS) {
194   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
195   if (isSingleWord())
196     U.VAL += RHS.U.VAL;
197   else
198     tcAdd(U.pVal, RHS.U.pVal, 0, getNumWords());
199   return clearUnusedBits();
200 }
201 
202 APInt& APInt::operator+=(uint64_t RHS) {
203   if (isSingleWord())
204     U.VAL += RHS;
205   else
206     tcAddPart(U.pVal, RHS, getNumWords());
207   return clearUnusedBits();
208 }
209 
210 /// Subtracts the RHS APInt from this APInt
211 /// @returns this, after subtraction
212 /// Subtraction assignment operator.
213 APInt& APInt::operator-=(const APInt& RHS) {
214   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
215   if (isSingleWord())
216     U.VAL -= RHS.U.VAL;
217   else
218     tcSubtract(U.pVal, RHS.U.pVal, 0, getNumWords());
219   return clearUnusedBits();
220 }
221 
222 APInt& APInt::operator-=(uint64_t RHS) {
223   if (isSingleWord())
224     U.VAL -= RHS;
225   else
226     tcSubtractPart(U.pVal, RHS, getNumWords());
227   return clearUnusedBits();
228 }
229 
230 APInt APInt::operator*(const APInt& RHS) const {
231   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
232   if (isSingleWord())
233     return APInt(BitWidth, U.VAL * RHS.U.VAL);
234 
235   APInt Result(getMemory(getNumWords()), getBitWidth());
236 
237   tcMultiply(Result.U.pVal, U.pVal, RHS.U.pVal, getNumWords());
238 
239   Result.clearUnusedBits();
240   return Result;
241 }
242 
243 void APInt::AndAssignSlowCase(const APInt& RHS) {
244   tcAnd(U.pVal, RHS.U.pVal, getNumWords());
245 }
246 
247 void APInt::OrAssignSlowCase(const APInt& RHS) {
248   tcOr(U.pVal, RHS.U.pVal, getNumWords());
249 }
250 
251 void APInt::XorAssignSlowCase(const APInt& RHS) {
252   tcXor(U.pVal, RHS.U.pVal, getNumWords());
253 }
254 
255 APInt& APInt::operator*=(const APInt& RHS) {
256   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
257   *this = *this * RHS;
258   return *this;
259 }
260 
261 APInt& APInt::operator*=(uint64_t RHS) {
262   if (isSingleWord()) {
263     U.VAL *= RHS;
264   } else {
265     unsigned NumWords = getNumWords();
266     tcMultiplyPart(U.pVal, U.pVal, RHS, 0, NumWords, NumWords, false);
267   }
268   return clearUnusedBits();
269 }
270 
271 bool APInt::EqualSlowCase(const APInt& RHS) const {
272   return std::equal(U.pVal, U.pVal + getNumWords(), RHS.U.pVal);
273 }
274 
275 int APInt::compare(const APInt& RHS) const {
276   assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
277   if (isSingleWord())
278     return U.VAL < RHS.U.VAL ? -1 : U.VAL > RHS.U.VAL;
279 
280   return tcCompare(U.pVal, RHS.U.pVal, getNumWords());
281 }
282 
283 int APInt::compareSigned(const APInt& RHS) const {
284   assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
285   if (isSingleWord()) {
286     int64_t lhsSext = SignExtend64(U.VAL, BitWidth);
287     int64_t rhsSext = SignExtend64(RHS.U.VAL, BitWidth);
288     return lhsSext < rhsSext ? -1 : lhsSext > rhsSext;
289   }
290 
291   bool lhsNeg = isNegative();
292   bool rhsNeg = RHS.isNegative();
293 
294   // If the sign bits don't match, then (LHS < RHS) if LHS is negative
295   if (lhsNeg != rhsNeg)
296     return lhsNeg ? -1 : 1;
297 
298   // Otherwise we can just use an unsigned comparison, because even negative
299   // numbers compare correctly this way if both have the same signed-ness.
300   return tcCompare(U.pVal, RHS.U.pVal, getNumWords());
301 }
302 
303 void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) {
304   unsigned loWord = whichWord(loBit);
305   unsigned hiWord = whichWord(hiBit);
306 
307   // Create an initial mask for the low word with zeros below loBit.
308   uint64_t loMask = WORDTYPE_MAX << whichBit(loBit);
309 
310   // If hiBit is not aligned, we need a high mask.
311   unsigned hiShiftAmt = whichBit(hiBit);
312   if (hiShiftAmt != 0) {
313     // Create a high mask with zeros above hiBit.
314     uint64_t hiMask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - hiShiftAmt);
315     // If loWord and hiWord are equal, then we combine the masks. Otherwise,
316     // set the bits in hiWord.
317     if (hiWord == loWord)
318       loMask &= hiMask;
319     else
320       U.pVal[hiWord] |= hiMask;
321   }
322   // Apply the mask to the low word.
323   U.pVal[loWord] |= loMask;
324 
325   // Fill any words between loWord and hiWord with all ones.
326   for (unsigned word = loWord + 1; word < hiWord; ++word)
327     U.pVal[word] = WORDTYPE_MAX;
328 }
329 
330 /// Toggle every bit to its opposite value.
331 void APInt::flipAllBitsSlowCase() {
332   tcComplement(U.pVal, getNumWords());
333   clearUnusedBits();
334 }
335 
336 /// Toggle a given bit to its opposite value whose position is given
337 /// as "bitPosition".
338 /// Toggles a given bit to its opposite value.
339 void APInt::flipBit(unsigned bitPosition) {
340   assert(bitPosition < BitWidth && "Out of the bit-width range!");
341   setBitVal(bitPosition, !(*this)[bitPosition]);
342 }
343 
344 void APInt::insertBits(const APInt &subBits, unsigned bitPosition) {
345   unsigned subBitWidth = subBits.getBitWidth();
346   assert(0 < subBitWidth && (subBitWidth + bitPosition) <= BitWidth &&
347          "Illegal bit insertion");
348 
349   // Insertion is a direct copy.
350   if (subBitWidth == BitWidth) {
351     *this = subBits;
352     return;
353   }
354 
355   // Single word result can be done as a direct bitmask.
356   if (isSingleWord()) {
357     uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
358     U.VAL &= ~(mask << bitPosition);
359     U.VAL |= (subBits.U.VAL << bitPosition);
360     return;
361   }
362 
363   unsigned loBit = whichBit(bitPosition);
364   unsigned loWord = whichWord(bitPosition);
365   unsigned hi1Word = whichWord(bitPosition + subBitWidth - 1);
366 
367   // Insertion within a single word can be done as a direct bitmask.
368   if (loWord == hi1Word) {
369     uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
370     U.pVal[loWord] &= ~(mask << loBit);
371     U.pVal[loWord] |= (subBits.U.VAL << loBit);
372     return;
373   }
374 
375   // Insert on word boundaries.
376   if (loBit == 0) {
377     // Direct copy whole words.
378     unsigned numWholeSubWords = subBitWidth / APINT_BITS_PER_WORD;
379     memcpy(U.pVal + loWord, subBits.getRawData(),
380            numWholeSubWords * APINT_WORD_SIZE);
381 
382     // Mask+insert remaining bits.
383     unsigned remainingBits = subBitWidth % APINT_BITS_PER_WORD;
384     if (remainingBits != 0) {
385       uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - remainingBits);
386       U.pVal[hi1Word] &= ~mask;
387       U.pVal[hi1Word] |= subBits.getWord(subBitWidth - 1);
388     }
389     return;
390   }
391 
392   // General case - set/clear individual bits in dst based on src.
393   // TODO - there is scope for optimization here, but at the moment this code
394   // path is barely used so prefer readability over performance.
395   for (unsigned i = 0; i != subBitWidth; ++i)
396     setBitVal(bitPosition + i, subBits[i]);
397 }
398 
399 void APInt::insertBits(uint64_t subBits, unsigned bitPosition, unsigned numBits) {
400   uint64_t maskBits = maskTrailingOnes<uint64_t>(numBits);
401   subBits &= maskBits;
402   if (isSingleWord()) {
403     U.VAL &= ~(maskBits << bitPosition);
404     U.VAL |= subBits << bitPosition;
405     return;
406   }
407 
408   unsigned loBit = whichBit(bitPosition);
409   unsigned loWord = whichWord(bitPosition);
410   unsigned hiWord = whichWord(bitPosition + numBits - 1);
411   if (loWord == hiWord) {
412     U.pVal[loWord] &= ~(maskBits << loBit);
413     U.pVal[loWord] |= subBits << loBit;
414     return;
415   }
416 
417   static_assert(8 * sizeof(WordType) <= 64, "This code assumes only two words affected");
418   unsigned wordBits = 8 * sizeof(WordType);
419   U.pVal[loWord] &= ~(maskBits << loBit);
420   U.pVal[loWord] |= subBits << loBit;
421 
422   U.pVal[hiWord] &= ~(maskBits >> (wordBits - loBit));
423   U.pVal[hiWord] |= subBits >> (wordBits - loBit);
424 }
425 
426 APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const {
427   assert(numBits > 0 && "Can't extract zero bits");
428   assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth &&
429          "Illegal bit extraction");
430 
431   if (isSingleWord())
432     return APInt(numBits, U.VAL >> bitPosition);
433 
434   unsigned loBit = whichBit(bitPosition);
435   unsigned loWord = whichWord(bitPosition);
436   unsigned hiWord = whichWord(bitPosition + numBits - 1);
437 
438   // Single word result extracting bits from a single word source.
439   if (loWord == hiWord)
440     return APInt(numBits, U.pVal[loWord] >> loBit);
441 
442   // Extracting bits that start on a source word boundary can be done
443   // as a fast memory copy.
444   if (loBit == 0)
445     return APInt(numBits, makeArrayRef(U.pVal + loWord, 1 + hiWord - loWord));
446 
447   // General case - shift + copy source words directly into place.
448   APInt Result(numBits, 0);
449   unsigned NumSrcWords = getNumWords();
450   unsigned NumDstWords = Result.getNumWords();
451 
452   uint64_t *DestPtr = Result.isSingleWord() ? &Result.U.VAL : Result.U.pVal;
453   for (unsigned word = 0; word < NumDstWords; ++word) {
454     uint64_t w0 = U.pVal[loWord + word];
455     uint64_t w1 =
456         (loWord + word + 1) < NumSrcWords ? U.pVal[loWord + word + 1] : 0;
457     DestPtr[word] = (w0 >> loBit) | (w1 << (APINT_BITS_PER_WORD - loBit));
458   }
459 
460   return Result.clearUnusedBits();
461 }
462 
463 uint64_t APInt::extractBitsAsZExtValue(unsigned numBits,
464                                        unsigned bitPosition) const {
465   assert(numBits > 0 && "Can't extract zero bits");
466   assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth &&
467          "Illegal bit extraction");
468   assert(numBits <= 64 && "Illegal bit extraction");
469 
470   uint64_t maskBits = maskTrailingOnes<uint64_t>(numBits);
471   if (isSingleWord())
472     return (U.VAL >> bitPosition) & maskBits;
473 
474   unsigned loBit = whichBit(bitPosition);
475   unsigned loWord = whichWord(bitPosition);
476   unsigned hiWord = whichWord(bitPosition + numBits - 1);
477   if (loWord == hiWord)
478     return (U.pVal[loWord] >> loBit) & maskBits;
479 
480   static_assert(8 * sizeof(WordType) <= 64, "This code assumes only two words affected");
481   unsigned wordBits = 8 * sizeof(WordType);
482   uint64_t retBits = U.pVal[loWord] >> loBit;
483   retBits |= U.pVal[hiWord] << (wordBits - loBit);
484   retBits &= maskBits;
485   return retBits;
486 }
487 
488 unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
489   assert(!str.empty() && "Invalid string length");
490   assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
491           radix == 36) &&
492          "Radix should be 2, 8, 10, 16, or 36!");
493 
494   size_t slen = str.size();
495 
496   // Each computation below needs to know if it's negative.
497   StringRef::iterator p = str.begin();
498   unsigned isNegative = *p == '-';
499   if (*p == '-' || *p == '+') {
500     p++;
501     slen--;
502     assert(slen && "String is only a sign, needs a value.");
503   }
504 
505   // For radixes of power-of-two values, the bits required is accurately and
506   // easily computed
507   if (radix == 2)
508     return slen + isNegative;
509   if (radix == 8)
510     return slen * 3 + isNegative;
511   if (radix == 16)
512     return slen * 4 + isNegative;
513 
514   // FIXME: base 36
515 
516   // This is grossly inefficient but accurate. We could probably do something
517   // with a computation of roughly slen*64/20 and then adjust by the value of
518   // the first few digits. But, I'm not sure how accurate that could be.
519 
520   // Compute a sufficient number of bits that is always large enough but might
521   // be too large. This avoids the assertion in the constructor. This
522   // calculation doesn't work appropriately for the numbers 0-9, so just use 4
523   // bits in that case.
524   unsigned sufficient
525     = radix == 10? (slen == 1 ? 4 : slen * 64/18)
526                  : (slen == 1 ? 7 : slen * 16/3);
527 
528   // Convert to the actual binary value.
529   APInt tmp(sufficient, StringRef(p, slen), radix);
530 
531   // Compute how many bits are required. If the log is infinite, assume we need
532   // just bit. If the log is exact and value is negative, then the value is
533   // MinSignedValue with (log + 1) bits.
534   unsigned log = tmp.logBase2();
535   if (log == (unsigned)-1) {
536     return isNegative + 1;
537   } else if (isNegative && tmp.isPowerOf2()) {
538     return isNegative + log;
539   } else {
540     return isNegative + log + 1;
541   }
542 }
543 
544 hash_code llvm::hash_value(const APInt &Arg) {
545   if (Arg.isSingleWord())
546     return hash_combine(Arg.BitWidth, Arg.U.VAL);
547 
548   return hash_combine(
549       Arg.BitWidth,
550       hash_combine_range(Arg.U.pVal, Arg.U.pVal + Arg.getNumWords()));
551 }
552 
553 bool APInt::isSplat(unsigned SplatSizeInBits) const {
554   assert(getBitWidth() % SplatSizeInBits == 0 &&
555          "SplatSizeInBits must divide width!");
556   // We can check that all parts of an integer are equal by making use of a
557   // little trick: rotate and check if it's still the same value.
558   return *this == rotl(SplatSizeInBits);
559 }
560 
561 /// This function returns the high "numBits" bits of this APInt.
562 APInt APInt::getHiBits(unsigned numBits) const {
563   return this->lshr(BitWidth - numBits);
564 }
565 
566 /// This function returns the low "numBits" bits of this APInt.
567 APInt APInt::getLoBits(unsigned numBits) const {
568   APInt Result(getLowBitsSet(BitWidth, numBits));
569   Result &= *this;
570   return Result;
571 }
572 
573 /// Return a value containing V broadcasted over NewLen bits.
574 APInt APInt::getSplat(unsigned NewLen, const APInt &V) {
575   assert(NewLen >= V.getBitWidth() && "Can't splat to smaller bit width!");
576 
577   APInt Val = V.zextOrSelf(NewLen);
578   for (unsigned I = V.getBitWidth(); I < NewLen; I <<= 1)
579     Val |= Val << I;
580 
581   return Val;
582 }
583 
584 unsigned APInt::countLeadingZerosSlowCase() const {
585   unsigned Count = 0;
586   for (int i = getNumWords()-1; i >= 0; --i) {
587     uint64_t V = U.pVal[i];
588     if (V == 0)
589       Count += APINT_BITS_PER_WORD;
590     else {
591       Count += llvm::countLeadingZeros(V);
592       break;
593     }
594   }
595   // Adjust for unused bits in the most significant word (they are zero).
596   unsigned Mod = BitWidth % APINT_BITS_PER_WORD;
597   Count -= Mod > 0 ? APINT_BITS_PER_WORD - Mod : 0;
598   return Count;
599 }
600 
601 unsigned APInt::countLeadingOnesSlowCase() const {
602   unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD;
603   unsigned shift;
604   if (!highWordBits) {
605     highWordBits = APINT_BITS_PER_WORD;
606     shift = 0;
607   } else {
608     shift = APINT_BITS_PER_WORD - highWordBits;
609   }
610   int i = getNumWords() - 1;
611   unsigned Count = llvm::countLeadingOnes(U.pVal[i] << shift);
612   if (Count == highWordBits) {
613     for (i--; i >= 0; --i) {
614       if (U.pVal[i] == WORDTYPE_MAX)
615         Count += APINT_BITS_PER_WORD;
616       else {
617         Count += llvm::countLeadingOnes(U.pVal[i]);
618         break;
619       }
620     }
621   }
622   return Count;
623 }
624 
625 unsigned APInt::countTrailingZerosSlowCase() const {
626   unsigned Count = 0;
627   unsigned i = 0;
628   for (; i < getNumWords() && U.pVal[i] == 0; ++i)
629     Count += APINT_BITS_PER_WORD;
630   if (i < getNumWords())
631     Count += llvm::countTrailingZeros(U.pVal[i]);
632   return std::min(Count, BitWidth);
633 }
634 
635 unsigned APInt::countTrailingOnesSlowCase() const {
636   unsigned Count = 0;
637   unsigned i = 0;
638   for (; i < getNumWords() && U.pVal[i] == WORDTYPE_MAX; ++i)
639     Count += APINT_BITS_PER_WORD;
640   if (i < getNumWords())
641     Count += llvm::countTrailingOnes(U.pVal[i]);
642   assert(Count <= BitWidth);
643   return Count;
644 }
645 
646 unsigned APInt::countPopulationSlowCase() const {
647   unsigned Count = 0;
648   for (unsigned i = 0; i < getNumWords(); ++i)
649     Count += llvm::countPopulation(U.pVal[i]);
650   return Count;
651 }
652 
653 bool APInt::intersectsSlowCase(const APInt &RHS) const {
654   for (unsigned i = 0, e = getNumWords(); i != e; ++i)
655     if ((U.pVal[i] & RHS.U.pVal[i]) != 0)
656       return true;
657 
658   return false;
659 }
660 
661 bool APInt::isSubsetOfSlowCase(const APInt &RHS) const {
662   for (unsigned i = 0, e = getNumWords(); i != e; ++i)
663     if ((U.pVal[i] & ~RHS.U.pVal[i]) != 0)
664       return false;
665 
666   return true;
667 }
668 
669 APInt APInt::byteSwap() const {
670   assert(BitWidth >= 16 && BitWidth % 8 == 0 && "Cannot byteswap!");
671   if (BitWidth == 16)
672     return APInt(BitWidth, ByteSwap_16(uint16_t(U.VAL)));
673   if (BitWidth == 32)
674     return APInt(BitWidth, ByteSwap_32(unsigned(U.VAL)));
675   if (BitWidth <= 64) {
676     uint64_t Tmp1 = ByteSwap_64(U.VAL);
677     Tmp1 >>= (64 - BitWidth);
678     return APInt(BitWidth, Tmp1);
679   }
680 
681   APInt Result(getNumWords() * APINT_BITS_PER_WORD, 0);
682   for (unsigned I = 0, N = getNumWords(); I != N; ++I)
683     Result.U.pVal[I] = ByteSwap_64(U.pVal[N - I - 1]);
684   if (Result.BitWidth != BitWidth) {
685     Result.lshrInPlace(Result.BitWidth - BitWidth);
686     Result.BitWidth = BitWidth;
687   }
688   return Result;
689 }
690 
691 APInt APInt::reverseBits() const {
692   switch (BitWidth) {
693   case 64:
694     return APInt(BitWidth, llvm::reverseBits<uint64_t>(U.VAL));
695   case 32:
696     return APInt(BitWidth, llvm::reverseBits<uint32_t>(U.VAL));
697   case 16:
698     return APInt(BitWidth, llvm::reverseBits<uint16_t>(U.VAL));
699   case 8:
700     return APInt(BitWidth, llvm::reverseBits<uint8_t>(U.VAL));
701   default:
702     break;
703   }
704 
705   APInt Val(*this);
706   APInt Reversed(BitWidth, 0);
707   unsigned S = BitWidth;
708 
709   for (; Val != 0; Val.lshrInPlace(1)) {
710     Reversed <<= 1;
711     Reversed |= Val[0];
712     --S;
713   }
714 
715   Reversed <<= S;
716   return Reversed;
717 }
718 
719 APInt llvm::APIntOps::GreatestCommonDivisor(APInt A, APInt B) {
720   // Fast-path a common case.
721   if (A == B) return A;
722 
723   // Corner cases: if either operand is zero, the other is the gcd.
724   if (!A) return B;
725   if (!B) return A;
726 
727   // Count common powers of 2 and remove all other powers of 2.
728   unsigned Pow2;
729   {
730     unsigned Pow2_A = A.countTrailingZeros();
731     unsigned Pow2_B = B.countTrailingZeros();
732     if (Pow2_A > Pow2_B) {
733       A.lshrInPlace(Pow2_A - Pow2_B);
734       Pow2 = Pow2_B;
735     } else if (Pow2_B > Pow2_A) {
736       B.lshrInPlace(Pow2_B - Pow2_A);
737       Pow2 = Pow2_A;
738     } else {
739       Pow2 = Pow2_A;
740     }
741   }
742 
743   // Both operands are odd multiples of 2^Pow_2:
744   //
745   //   gcd(a, b) = gcd(|a - b| / 2^i, min(a, b))
746   //
747   // This is a modified version of Stein's algorithm, taking advantage of
748   // efficient countTrailingZeros().
749   while (A != B) {
750     if (A.ugt(B)) {
751       A -= B;
752       A.lshrInPlace(A.countTrailingZeros() - Pow2);
753     } else {
754       B -= A;
755       B.lshrInPlace(B.countTrailingZeros() - Pow2);
756     }
757   }
758 
759   return A;
760 }
761 
762 APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) {
763   uint64_t I = bit_cast<uint64_t>(Double);
764 
765   // Get the sign bit from the highest order bit
766   bool isNeg = I >> 63;
767 
768   // Get the 11-bit exponent and adjust for the 1023 bit bias
769   int64_t exp = ((I >> 52) & 0x7ff) - 1023;
770 
771   // If the exponent is negative, the value is < 0 so just return 0.
772   if (exp < 0)
773     return APInt(width, 0u);
774 
775   // Extract the mantissa by clearing the top 12 bits (sign + exponent).
776   uint64_t mantissa = (I & (~0ULL >> 12)) | 1ULL << 52;
777 
778   // If the exponent doesn't shift all bits out of the mantissa
779   if (exp < 52)
780     return isNeg ? -APInt(width, mantissa >> (52 - exp)) :
781                     APInt(width, mantissa >> (52 - exp));
782 
783   // If the client didn't provide enough bits for us to shift the mantissa into
784   // then the result is undefined, just return 0
785   if (width <= exp - 52)
786     return APInt(width, 0);
787 
788   // Otherwise, we have to shift the mantissa bits up to the right location
789   APInt Tmp(width, mantissa);
790   Tmp <<= (unsigned)exp - 52;
791   return isNeg ? -Tmp : Tmp;
792 }
793 
794 /// This function converts this APInt to a double.
795 /// The layout for double is as following (IEEE Standard 754):
796 ///  --------------------------------------
797 /// |  Sign    Exponent    Fraction    Bias |
798 /// |-------------------------------------- |
799 /// |  1[63]   11[62-52]   52[51-00]   1023 |
800 ///  --------------------------------------
801 double APInt::roundToDouble(bool isSigned) const {
802 
803   // Handle the simple case where the value is contained in one uint64_t.
804   // It is wrong to optimize getWord(0) to VAL; there might be more than one word.
805   if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) {
806     if (isSigned) {
807       int64_t sext = SignExtend64(getWord(0), BitWidth);
808       return double(sext);
809     } else
810       return double(getWord(0));
811   }
812 
813   // Determine if the value is negative.
814   bool isNeg = isSigned ? (*this)[BitWidth-1] : false;
815 
816   // Construct the absolute value if we're negative.
817   APInt Tmp(isNeg ? -(*this) : (*this));
818 
819   // Figure out how many bits we're using.
820   unsigned n = Tmp.getActiveBits();
821 
822   // The exponent (without bias normalization) is just the number of bits
823   // we are using. Note that the sign bit is gone since we constructed the
824   // absolute value.
825   uint64_t exp = n;
826 
827   // Return infinity for exponent overflow
828   if (exp > 1023) {
829     if (!isSigned || !isNeg)
830       return std::numeric_limits<double>::infinity();
831     else
832       return -std::numeric_limits<double>::infinity();
833   }
834   exp += 1023; // Increment for 1023 bias
835 
836   // Number of bits in mantissa is 52. To obtain the mantissa value, we must
837   // extract the high 52 bits from the correct words in pVal.
838   uint64_t mantissa;
839   unsigned hiWord = whichWord(n-1);
840   if (hiWord == 0) {
841     mantissa = Tmp.U.pVal[0];
842     if (n > 52)
843       mantissa >>= n - 52; // shift down, we want the top 52 bits.
844   } else {
845     assert(hiWord > 0 && "huh?");
846     uint64_t hibits = Tmp.U.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD);
847     uint64_t lobits = Tmp.U.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD);
848     mantissa = hibits | lobits;
849   }
850 
851   // The leading bit of mantissa is implicit, so get rid of it.
852   uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0;
853   uint64_t I = sign | (exp << 52) | mantissa;
854   return bit_cast<double>(I);
855 }
856 
857 // Truncate to new width.
858 APInt APInt::trunc(unsigned width) const {
859   assert(width < BitWidth && "Invalid APInt Truncate request");
860   assert(width && "Can't truncate to 0 bits");
861 
862   if (width <= APINT_BITS_PER_WORD)
863     return APInt(width, getRawData()[0]);
864 
865   APInt Result(getMemory(getNumWords(width)), width);
866 
867   // Copy full words.
868   unsigned i;
869   for (i = 0; i != width / APINT_BITS_PER_WORD; i++)
870     Result.U.pVal[i] = U.pVal[i];
871 
872   // Truncate and copy any partial word.
873   unsigned bits = (0 - width) % APINT_BITS_PER_WORD;
874   if (bits != 0)
875     Result.U.pVal[i] = U.pVal[i] << bits >> bits;
876 
877   return Result;
878 }
879 
880 // Truncate to new width with unsigned saturation.
881 APInt APInt::truncUSat(unsigned width) const {
882   assert(width < BitWidth && "Invalid APInt Truncate request");
883   assert(width && "Can't truncate to 0 bits");
884 
885   // Can we just losslessly truncate it?
886   if (isIntN(width))
887     return trunc(width);
888   // If not, then just return the new limit.
889   return APInt::getMaxValue(width);
890 }
891 
892 // Truncate to new width with signed saturation.
893 APInt APInt::truncSSat(unsigned width) const {
894   assert(width < BitWidth && "Invalid APInt Truncate request");
895   assert(width && "Can't truncate to 0 bits");
896 
897   // Can we just losslessly truncate it?
898   if (isSignedIntN(width))
899     return trunc(width);
900   // If not, then just return the new limits.
901   return isNegative() ? APInt::getSignedMinValue(width)
902                       : APInt::getSignedMaxValue(width);
903 }
904 
905 // Sign extend to a new width.
906 APInt APInt::sext(unsigned Width) const {
907   assert(Width > BitWidth && "Invalid APInt SignExtend request");
908 
909   if (Width <= APINT_BITS_PER_WORD)
910     return APInt(Width, SignExtend64(U.VAL, BitWidth));
911 
912   APInt Result(getMemory(getNumWords(Width)), Width);
913 
914   // Copy words.
915   std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
916 
917   // Sign extend the last word since there may be unused bits in the input.
918   Result.U.pVal[getNumWords() - 1] =
919       SignExtend64(Result.U.pVal[getNumWords() - 1],
920                    ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
921 
922   // Fill with sign bits.
923   std::memset(Result.U.pVal + getNumWords(), isNegative() ? -1 : 0,
924               (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
925   Result.clearUnusedBits();
926   return Result;
927 }
928 
929 //  Zero extend to a new width.
930 APInt APInt::zext(unsigned width) const {
931   assert(width > BitWidth && "Invalid APInt ZeroExtend request");
932 
933   if (width <= APINT_BITS_PER_WORD)
934     return APInt(width, U.VAL);
935 
936   APInt Result(getMemory(getNumWords(width)), width);
937 
938   // Copy words.
939   std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
940 
941   // Zero remaining words.
942   std::memset(Result.U.pVal + getNumWords(), 0,
943               (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
944 
945   return Result;
946 }
947 
948 APInt APInt::zextOrTrunc(unsigned width) const {
949   if (BitWidth < width)
950     return zext(width);
951   if (BitWidth > width)
952     return trunc(width);
953   return *this;
954 }
955 
956 APInt APInt::sextOrTrunc(unsigned width) const {
957   if (BitWidth < width)
958     return sext(width);
959   if (BitWidth > width)
960     return trunc(width);
961   return *this;
962 }
963 
964 APInt APInt::truncOrSelf(unsigned width) const {
965   if (BitWidth > width)
966     return trunc(width);
967   return *this;
968 }
969 
970 APInt APInt::zextOrSelf(unsigned width) const {
971   if (BitWidth < width)
972     return zext(width);
973   return *this;
974 }
975 
976 APInt APInt::sextOrSelf(unsigned width) const {
977   if (BitWidth < width)
978     return sext(width);
979   return *this;
980 }
981 
982 /// Arithmetic right-shift this APInt by shiftAmt.
983 /// Arithmetic right-shift function.
984 void APInt::ashrInPlace(const APInt &shiftAmt) {
985   ashrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
986 }
987 
988 /// Arithmetic right-shift this APInt by shiftAmt.
989 /// Arithmetic right-shift function.
990 void APInt::ashrSlowCase(unsigned ShiftAmt) {
991   // Don't bother performing a no-op shift.
992   if (!ShiftAmt)
993     return;
994 
995   // Save the original sign bit for later.
996   bool Negative = isNegative();
997 
998   // WordShift is the inter-part shift; BitShift is intra-part shift.
999   unsigned WordShift = ShiftAmt / APINT_BITS_PER_WORD;
1000   unsigned BitShift = ShiftAmt % APINT_BITS_PER_WORD;
1001 
1002   unsigned WordsToMove = getNumWords() - WordShift;
1003   if (WordsToMove != 0) {
1004     // Sign extend the last word to fill in the unused bits.
1005     U.pVal[getNumWords() - 1] = SignExtend64(
1006         U.pVal[getNumWords() - 1], ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
1007 
1008     // Fastpath for moving by whole words.
1009     if (BitShift == 0) {
1010       std::memmove(U.pVal, U.pVal + WordShift, WordsToMove * APINT_WORD_SIZE);
1011     } else {
1012       // Move the words containing significant bits.
1013       for (unsigned i = 0; i != WordsToMove - 1; ++i)
1014         U.pVal[i] = (U.pVal[i + WordShift] >> BitShift) |
1015                     (U.pVal[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift));
1016 
1017       // Handle the last word which has no high bits to copy.
1018       U.pVal[WordsToMove - 1] = U.pVal[WordShift + WordsToMove - 1] >> BitShift;
1019       // Sign extend one more time.
1020       U.pVal[WordsToMove - 1] =
1021           SignExtend64(U.pVal[WordsToMove - 1], APINT_BITS_PER_WORD - BitShift);
1022     }
1023   }
1024 
1025   // Fill in the remainder based on the original sign.
1026   std::memset(U.pVal + WordsToMove, Negative ? -1 : 0,
1027               WordShift * APINT_WORD_SIZE);
1028   clearUnusedBits();
1029 }
1030 
1031 /// Logical right-shift this APInt by shiftAmt.
1032 /// Logical right-shift function.
1033 void APInt::lshrInPlace(const APInt &shiftAmt) {
1034   lshrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
1035 }
1036 
1037 /// Logical right-shift this APInt by shiftAmt.
1038 /// Logical right-shift function.
1039 void APInt::lshrSlowCase(unsigned ShiftAmt) {
1040   tcShiftRight(U.pVal, getNumWords(), ShiftAmt);
1041 }
1042 
1043 /// Left-shift this APInt by shiftAmt.
1044 /// Left-shift function.
1045 APInt &APInt::operator<<=(const APInt &shiftAmt) {
1046   // It's undefined behavior in C to shift by BitWidth or greater.
1047   *this <<= (unsigned)shiftAmt.getLimitedValue(BitWidth);
1048   return *this;
1049 }
1050 
1051 void APInt::shlSlowCase(unsigned ShiftAmt) {
1052   tcShiftLeft(U.pVal, getNumWords(), ShiftAmt);
1053   clearUnusedBits();
1054 }
1055 
1056 // Calculate the rotate amount modulo the bit width.
1057 static unsigned rotateModulo(unsigned BitWidth, const APInt &rotateAmt) {
1058   unsigned rotBitWidth = rotateAmt.getBitWidth();
1059   APInt rot = rotateAmt;
1060   if (rotBitWidth < BitWidth) {
1061     // Extend the rotate APInt, so that the urem doesn't divide by 0.
1062     // e.g. APInt(1, 32) would give APInt(1, 0).
1063     rot = rotateAmt.zext(BitWidth);
1064   }
1065   rot = rot.urem(APInt(rot.getBitWidth(), BitWidth));
1066   return rot.getLimitedValue(BitWidth);
1067 }
1068 
1069 APInt APInt::rotl(const APInt &rotateAmt) const {
1070   return rotl(rotateModulo(BitWidth, rotateAmt));
1071 }
1072 
1073 APInt APInt::rotl(unsigned rotateAmt) const {
1074   rotateAmt %= BitWidth;
1075   if (rotateAmt == 0)
1076     return *this;
1077   return shl(rotateAmt) | lshr(BitWidth - rotateAmt);
1078 }
1079 
1080 APInt APInt::rotr(const APInt &rotateAmt) const {
1081   return rotr(rotateModulo(BitWidth, rotateAmt));
1082 }
1083 
1084 APInt APInt::rotr(unsigned rotateAmt) const {
1085   rotateAmt %= BitWidth;
1086   if (rotateAmt == 0)
1087     return *this;
1088   return lshr(rotateAmt) | shl(BitWidth - rotateAmt);
1089 }
1090 
1091 // Square Root - this method computes and returns the square root of "this".
1092 // Three mechanisms are used for computation. For small values (<= 5 bits),
1093 // a table lookup is done. This gets some performance for common cases. For
1094 // values using less than 52 bits, the value is converted to double and then
1095 // the libc sqrt function is called. The result is rounded and then converted
1096 // back to a uint64_t which is then used to construct the result. Finally,
1097 // the Babylonian method for computing square roots is used.
1098 APInt APInt::sqrt() const {
1099 
1100   // Determine the magnitude of the value.
1101   unsigned magnitude = getActiveBits();
1102 
1103   // Use a fast table for some small values. This also gets rid of some
1104   // rounding errors in libc sqrt for small values.
1105   if (magnitude <= 5) {
1106     static const uint8_t results[32] = {
1107       /*     0 */ 0,
1108       /*  1- 2 */ 1, 1,
1109       /*  3- 6 */ 2, 2, 2, 2,
1110       /*  7-12 */ 3, 3, 3, 3, 3, 3,
1111       /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4,
1112       /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1113       /*    31 */ 6
1114     };
1115     return APInt(BitWidth, results[ (isSingleWord() ? U.VAL : U.pVal[0]) ]);
1116   }
1117 
1118   // If the magnitude of the value fits in less than 52 bits (the precision of
1119   // an IEEE double precision floating point value), then we can use the
1120   // libc sqrt function which will probably use a hardware sqrt computation.
1121   // This should be faster than the algorithm below.
1122   if (magnitude < 52) {
1123     return APInt(BitWidth,
1124                  uint64_t(::round(::sqrt(double(isSingleWord() ? U.VAL
1125                                                                : U.pVal[0])))));
1126   }
1127 
1128   // Okay, all the short cuts are exhausted. We must compute it. The following
1129   // is a classical Babylonian method for computing the square root. This code
1130   // was adapted to APInt from a wikipedia article on such computations.
1131   // See http://www.wikipedia.org/ and go to the page named
1132   // Calculate_an_integer_square_root.
1133   unsigned nbits = BitWidth, i = 4;
1134   APInt testy(BitWidth, 16);
1135   APInt x_old(BitWidth, 1);
1136   APInt x_new(BitWidth, 0);
1137   APInt two(BitWidth, 2);
1138 
1139   // Select a good starting value using binary logarithms.
1140   for (;; i += 2, testy = testy.shl(2))
1141     if (i >= nbits || this->ule(testy)) {
1142       x_old = x_old.shl(i / 2);
1143       break;
1144     }
1145 
1146   // Use the Babylonian method to arrive at the integer square root:
1147   for (;;) {
1148     x_new = (this->udiv(x_old) + x_old).udiv(two);
1149     if (x_old.ule(x_new))
1150       break;
1151     x_old = x_new;
1152   }
1153 
1154   // Make sure we return the closest approximation
1155   // NOTE: The rounding calculation below is correct. It will produce an
1156   // off-by-one discrepancy with results from pari/gp. That discrepancy has been
1157   // determined to be a rounding issue with pari/gp as it begins to use a
1158   // floating point representation after 192 bits. There are no discrepancies
1159   // between this algorithm and pari/gp for bit widths < 192 bits.
1160   APInt square(x_old * x_old);
1161   APInt nextSquare((x_old + 1) * (x_old +1));
1162   if (this->ult(square))
1163     return x_old;
1164   assert(this->ule(nextSquare) && "Error in APInt::sqrt computation");
1165   APInt midpoint((nextSquare - square).udiv(two));
1166   APInt offset(*this - square);
1167   if (offset.ult(midpoint))
1168     return x_old;
1169   return x_old + 1;
1170 }
1171 
1172 /// Computes the multiplicative inverse of this APInt for a given modulo. The
1173 /// iterative extended Euclidean algorithm is used to solve for this value,
1174 /// however we simplify it to speed up calculating only the inverse, and take
1175 /// advantage of div+rem calculations. We also use some tricks to avoid copying
1176 /// (potentially large) APInts around.
1177 /// WARNING: a value of '0' may be returned,
1178 ///          signifying that no multiplicative inverse exists!
1179 APInt APInt::multiplicativeInverse(const APInt& modulo) const {
1180   assert(ult(modulo) && "This APInt must be smaller than the modulo");
1181 
1182   // Using the properties listed at the following web page (accessed 06/21/08):
1183   //   http://www.numbertheory.org/php/euclid.html
1184   // (especially the properties numbered 3, 4 and 9) it can be proved that
1185   // BitWidth bits suffice for all the computations in the algorithm implemented
1186   // below. More precisely, this number of bits suffice if the multiplicative
1187   // inverse exists, but may not suffice for the general extended Euclidean
1188   // algorithm.
1189 
1190   APInt r[2] = { modulo, *this };
1191   APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) };
1192   APInt q(BitWidth, 0);
1193 
1194   unsigned i;
1195   for (i = 0; r[i^1] != 0; i ^= 1) {
1196     // An overview of the math without the confusing bit-flipping:
1197     // q = r[i-2] / r[i-1]
1198     // r[i] = r[i-2] % r[i-1]
1199     // t[i] = t[i-2] - t[i-1] * q
1200     udivrem(r[i], r[i^1], q, r[i]);
1201     t[i] -= t[i^1] * q;
1202   }
1203 
1204   // If this APInt and the modulo are not coprime, there is no multiplicative
1205   // inverse, so return 0. We check this by looking at the next-to-last
1206   // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean
1207   // algorithm.
1208   if (r[i] != 1)
1209     return APInt(BitWidth, 0);
1210 
1211   // The next-to-last t is the multiplicative inverse.  However, we are
1212   // interested in a positive inverse. Calculate a positive one from a negative
1213   // one if necessary. A simple addition of the modulo suffices because
1214   // abs(t[i]) is known to be less than *this/2 (see the link above).
1215   if (t[i].isNegative())
1216     t[i] += modulo;
1217 
1218   return std::move(t[i]);
1219 }
1220 
1221 /// Calculate the magic numbers required to implement a signed integer division
1222 /// by a constant as a sequence of multiplies, adds and shifts.  Requires that
1223 /// the divisor not be 0, 1, or -1.  Taken from "Hacker's Delight", Henry S.
1224 /// Warren, Jr., chapter 10.
1225 APInt::ms APInt::magic() const {
1226   const APInt& d = *this;
1227   unsigned p;
1228   APInt ad, anc, delta, q1, r1, q2, r2, t;
1229   APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1230   struct ms mag;
1231 
1232   ad = d.abs();
1233   t = signedMin + (d.lshr(d.getBitWidth() - 1));
1234   anc = t - 1 - t.urem(ad);   // absolute value of nc
1235   p = d.getBitWidth() - 1;    // initialize p
1236   q1 = signedMin.udiv(anc);   // initialize q1 = 2p/abs(nc)
1237   r1 = signedMin - q1*anc;    // initialize r1 = rem(2p,abs(nc))
1238   q2 = signedMin.udiv(ad);    // initialize q2 = 2p/abs(d)
1239   r2 = signedMin - q2*ad;     // initialize r2 = rem(2p,abs(d))
1240   do {
1241     p = p + 1;
1242     q1 = q1<<1;          // update q1 = 2p/abs(nc)
1243     r1 = r1<<1;          // update r1 = rem(2p/abs(nc))
1244     if (r1.uge(anc)) {  // must be unsigned comparison
1245       q1 = q1 + 1;
1246       r1 = r1 - anc;
1247     }
1248     q2 = q2<<1;          // update q2 = 2p/abs(d)
1249     r2 = r2<<1;          // update r2 = rem(2p/abs(d))
1250     if (r2.uge(ad)) {   // must be unsigned comparison
1251       q2 = q2 + 1;
1252       r2 = r2 - ad;
1253     }
1254     delta = ad - r2;
1255   } while (q1.ult(delta) || (q1 == delta && r1 == 0));
1256 
1257   mag.m = q2 + 1;
1258   if (d.isNegative()) mag.m = -mag.m;   // resulting magic number
1259   mag.s = p - d.getBitWidth();          // resulting shift
1260   return mag;
1261 }
1262 
1263 /// Calculate the magic numbers required to implement an unsigned integer
1264 /// division by a constant as a sequence of multiplies, adds and shifts.
1265 /// Requires that the divisor not be 0.  Taken from "Hacker's Delight", Henry
1266 /// S. Warren, Jr., chapter 10.
1267 /// LeadingZeros can be used to simplify the calculation if the upper bits
1268 /// of the divided value are known zero.
1269 APInt::mu APInt::magicu(unsigned LeadingZeros) const {
1270   const APInt& d = *this;
1271   unsigned p;
1272   APInt nc, delta, q1, r1, q2, r2;
1273   struct mu magu;
1274   magu.a = 0;               // initialize "add" indicator
1275   APInt allOnes = APInt::getAllOnesValue(d.getBitWidth()).lshr(LeadingZeros);
1276   APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
1277   APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth());
1278 
1279   nc = allOnes - (allOnes - d).urem(d);
1280   p = d.getBitWidth() - 1;  // initialize p
1281   q1 = signedMin.udiv(nc);  // initialize q1 = 2p/nc
1282   r1 = signedMin - q1*nc;   // initialize r1 = rem(2p,nc)
1283   q2 = signedMax.udiv(d);   // initialize q2 = (2p-1)/d
1284   r2 = signedMax - q2*d;    // initialize r2 = rem((2p-1),d)
1285   do {
1286     p = p + 1;
1287     if (r1.uge(nc - r1)) {
1288       q1 = q1 + q1 + 1;  // update q1
1289       r1 = r1 + r1 - nc; // update r1
1290     }
1291     else {
1292       q1 = q1+q1; // update q1
1293       r1 = r1+r1; // update r1
1294     }
1295     if ((r2 + 1).uge(d - r2)) {
1296       if (q2.uge(signedMax)) magu.a = 1;
1297       q2 = q2+q2 + 1;     // update q2
1298       r2 = r2+r2 + 1 - d; // update r2
1299     }
1300     else {
1301       if (q2.uge(signedMin)) magu.a = 1;
1302       q2 = q2+q2;     // update q2
1303       r2 = r2+r2 + 1; // update r2
1304     }
1305     delta = d - 1 - r2;
1306   } while (p < d.getBitWidth()*2 &&
1307            (q1.ult(delta) || (q1 == delta && r1 == 0)));
1308   magu.m = q2 + 1; // resulting magic number
1309   magu.s = p - d.getBitWidth();  // resulting shift
1310   return magu;
1311 }
1312 
1313 /// Implementation of Knuth's Algorithm D (Division of nonnegative integers)
1314 /// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The
1315 /// variables here have the same names as in the algorithm. Comments explain
1316 /// the algorithm and any deviation from it.
1317 static void KnuthDiv(uint32_t *u, uint32_t *v, uint32_t *q, uint32_t* r,
1318                      unsigned m, unsigned n) {
1319   assert(u && "Must provide dividend");
1320   assert(v && "Must provide divisor");
1321   assert(q && "Must provide quotient");
1322   assert(u != v && u != q && v != q && "Must use different memory");
1323   assert(n>1 && "n must be > 1");
1324 
1325   // b denotes the base of the number system. In our case b is 2^32.
1326   const uint64_t b = uint64_t(1) << 32;
1327 
1328 // The DEBUG macros here tend to be spam in the debug output if you're not
1329 // debugging this code. Disable them unless KNUTH_DEBUG is defined.
1330 #ifdef KNUTH_DEBUG
1331 #define DEBUG_KNUTH(X) LLVM_DEBUG(X)
1332 #else
1333 #define DEBUG_KNUTH(X) do {} while(false)
1334 #endif
1335 
1336   DEBUG_KNUTH(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n');
1337   DEBUG_KNUTH(dbgs() << "KnuthDiv: original:");
1338   DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1339   DEBUG_KNUTH(dbgs() << " by");
1340   DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);
1341   DEBUG_KNUTH(dbgs() << '\n');
1342   // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
1343   // u and v by d. Note that we have taken Knuth's advice here to use a power
1344   // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of
1345   // 2 allows us to shift instead of multiply and it is easy to determine the
1346   // shift amount from the leading zeros.  We are basically normalizing the u
1347   // and v so that its high bits are shifted to the top of v's range without
1348   // overflow. Note that this can require an extra word in u so that u must
1349   // be of length m+n+1.
1350   unsigned shift = countLeadingZeros(v[n-1]);
1351   uint32_t v_carry = 0;
1352   uint32_t u_carry = 0;
1353   if (shift) {
1354     for (unsigned i = 0; i < m+n; ++i) {
1355       uint32_t u_tmp = u[i] >> (32 - shift);
1356       u[i] = (u[i] << shift) | u_carry;
1357       u_carry = u_tmp;
1358     }
1359     for (unsigned i = 0; i < n; ++i) {
1360       uint32_t v_tmp = v[i] >> (32 - shift);
1361       v[i] = (v[i] << shift) | v_carry;
1362       v_carry = v_tmp;
1363     }
1364   }
1365   u[m+n] = u_carry;
1366 
1367   DEBUG_KNUTH(dbgs() << "KnuthDiv:   normal:");
1368   DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1369   DEBUG_KNUTH(dbgs() << " by");
1370   DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);
1371   DEBUG_KNUTH(dbgs() << '\n');
1372 
1373   // D2. [Initialize j.]  Set j to m. This is the loop counter over the places.
1374   int j = m;
1375   do {
1376     DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient digit #" << j << '\n');
1377     // D3. [Calculate q'.].
1378     //     Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q')
1379     //     Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r')
1380     // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease
1381     // qp by 1, increase rp by v[n-1], and repeat this test if rp < b. The test
1382     // on v[n-2] determines at high speed most of the cases in which the trial
1383     // value qp is one too large, and it eliminates all cases where qp is two
1384     // too large.
1385     uint64_t dividend = Make_64(u[j+n], u[j+n-1]);
1386     DEBUG_KNUTH(dbgs() << "KnuthDiv: dividend == " << dividend << '\n');
1387     uint64_t qp = dividend / v[n-1];
1388     uint64_t rp = dividend % v[n-1];
1389     if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) {
1390       qp--;
1391       rp += v[n-1];
1392       if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2]))
1393         qp--;
1394     }
1395     DEBUG_KNUTH(dbgs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n');
1396 
1397     // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with
1398     // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation
1399     // consists of a simple multiplication by a one-place number, combined with
1400     // a subtraction.
1401     // The digits (u[j+n]...u[j]) should be kept positive; if the result of
1402     // this step is actually negative, (u[j+n]...u[j]) should be left as the
1403     // true value plus b**(n+1), namely as the b's complement of
1404     // the true value, and a "borrow" to the left should be remembered.
1405     int64_t borrow = 0;
1406     for (unsigned i = 0; i < n; ++i) {
1407       uint64_t p = uint64_t(qp) * uint64_t(v[i]);
1408       int64_t subres = int64_t(u[j+i]) - borrow - Lo_32(p);
1409       u[j+i] = Lo_32(subres);
1410       borrow = Hi_32(p) - Hi_32(subres);
1411       DEBUG_KNUTH(dbgs() << "KnuthDiv: u[j+i] = " << u[j + i]
1412                         << ", borrow = " << borrow << '\n');
1413     }
1414     bool isNeg = u[j+n] < borrow;
1415     u[j+n] -= Lo_32(borrow);
1416 
1417     DEBUG_KNUTH(dbgs() << "KnuthDiv: after subtraction:");
1418     DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1419     DEBUG_KNUTH(dbgs() << '\n');
1420 
1421     // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was
1422     // negative, go to step D6; otherwise go on to step D7.
1423     q[j] = Lo_32(qp);
1424     if (isNeg) {
1425       // D6. [Add back]. The probability that this step is necessary is very
1426       // small, on the order of only 2/b. Make sure that test data accounts for
1427       // this possibility. Decrease q[j] by 1
1428       q[j]--;
1429       // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]).
1430       // A carry will occur to the left of u[j+n], and it should be ignored
1431       // since it cancels with the borrow that occurred in D4.
1432       bool carry = false;
1433       for (unsigned i = 0; i < n; i++) {
1434         uint32_t limit = std::min(u[j+i],v[i]);
1435         u[j+i] += v[i] + carry;
1436         carry = u[j+i] < limit || (carry && u[j+i] == limit);
1437       }
1438       u[j+n] += carry;
1439     }
1440     DEBUG_KNUTH(dbgs() << "KnuthDiv: after correction:");
1441     DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
1442     DEBUG_KNUTH(dbgs() << "\nKnuthDiv: digit result = " << q[j] << '\n');
1443 
1444     // D7. [Loop on j.]  Decrease j by one. Now if j >= 0, go back to D3.
1445   } while (--j >= 0);
1446 
1447   DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient:");
1448   DEBUG_KNUTH(for (int i = m; i >= 0; i--) dbgs() << " " << q[i]);
1449   DEBUG_KNUTH(dbgs() << '\n');
1450 
1451   // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired
1452   // remainder may be obtained by dividing u[...] by d. If r is non-null we
1453   // compute the remainder (urem uses this).
1454   if (r) {
1455     // The value d is expressed by the "shift" value above since we avoided
1456     // multiplication by d by using a shift left. So, all we have to do is
1457     // shift right here.
1458     if (shift) {
1459       uint32_t carry = 0;
1460       DEBUG_KNUTH(dbgs() << "KnuthDiv: remainder:");
1461       for (int i = n-1; i >= 0; i--) {
1462         r[i] = (u[i] >> shift) | carry;
1463         carry = u[i] << (32 - shift);
1464         DEBUG_KNUTH(dbgs() << " " << r[i]);
1465       }
1466     } else {
1467       for (int i = n-1; i >= 0; i--) {
1468         r[i] = u[i];
1469         DEBUG_KNUTH(dbgs() << " " << r[i]);
1470       }
1471     }
1472     DEBUG_KNUTH(dbgs() << '\n');
1473   }
1474   DEBUG_KNUTH(dbgs() << '\n');
1475 }
1476 
1477 void APInt::divide(const WordType *LHS, unsigned lhsWords, const WordType *RHS,
1478                    unsigned rhsWords, WordType *Quotient, WordType *Remainder) {
1479   assert(lhsWords >= rhsWords && "Fractional result");
1480 
1481   // First, compose the values into an array of 32-bit words instead of
1482   // 64-bit words. This is a necessity of both the "short division" algorithm
1483   // and the Knuth "classical algorithm" which requires there to be native
1484   // operations for +, -, and * on an m bit value with an m*2 bit result. We
1485   // can't use 64-bit operands here because we don't have native results of
1486   // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't
1487   // work on large-endian machines.
1488   unsigned n = rhsWords * 2;
1489   unsigned m = (lhsWords * 2) - n;
1490 
1491   // Allocate space for the temporary values we need either on the stack, if
1492   // it will fit, or on the heap if it won't.
1493   uint32_t SPACE[128];
1494   uint32_t *U = nullptr;
1495   uint32_t *V = nullptr;
1496   uint32_t *Q = nullptr;
1497   uint32_t *R = nullptr;
1498   if ((Remainder?4:3)*n+2*m+1 <= 128) {
1499     U = &SPACE[0];
1500     V = &SPACE[m+n+1];
1501     Q = &SPACE[(m+n+1) + n];
1502     if (Remainder)
1503       R = &SPACE[(m+n+1) + n + (m+n)];
1504   } else {
1505     U = new uint32_t[m + n + 1];
1506     V = new uint32_t[n];
1507     Q = new uint32_t[m+n];
1508     if (Remainder)
1509       R = new uint32_t[n];
1510   }
1511 
1512   // Initialize the dividend
1513   memset(U, 0, (m+n+1)*sizeof(uint32_t));
1514   for (unsigned i = 0; i < lhsWords; ++i) {
1515     uint64_t tmp = LHS[i];
1516     U[i * 2] = Lo_32(tmp);
1517     U[i * 2 + 1] = Hi_32(tmp);
1518   }
1519   U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm.
1520 
1521   // Initialize the divisor
1522   memset(V, 0, (n)*sizeof(uint32_t));
1523   for (unsigned i = 0; i < rhsWords; ++i) {
1524     uint64_t tmp = RHS[i];
1525     V[i * 2] = Lo_32(tmp);
1526     V[i * 2 + 1] = Hi_32(tmp);
1527   }
1528 
1529   // initialize the quotient and remainder
1530   memset(Q, 0, (m+n) * sizeof(uint32_t));
1531   if (Remainder)
1532     memset(R, 0, n * sizeof(uint32_t));
1533 
1534   // Now, adjust m and n for the Knuth division. n is the number of words in
1535   // the divisor. m is the number of words by which the dividend exceeds the
1536   // divisor (i.e. m+n is the length of the dividend). These sizes must not
1537   // contain any zero words or the Knuth algorithm fails.
1538   for (unsigned i = n; i > 0 && V[i-1] == 0; i--) {
1539     n--;
1540     m++;
1541   }
1542   for (unsigned i = m+n; i > 0 && U[i-1] == 0; i--)
1543     m--;
1544 
1545   // If we're left with only a single word for the divisor, Knuth doesn't work
1546   // so we implement the short division algorithm here. This is much simpler
1547   // and faster because we are certain that we can divide a 64-bit quantity
1548   // by a 32-bit quantity at hardware speed and short division is simply a
1549   // series of such operations. This is just like doing short division but we
1550   // are using base 2^32 instead of base 10.
1551   assert(n != 0 && "Divide by zero?");
1552   if (n == 1) {
1553     uint32_t divisor = V[0];
1554     uint32_t remainder = 0;
1555     for (int i = m; i >= 0; i--) {
1556       uint64_t partial_dividend = Make_64(remainder, U[i]);
1557       if (partial_dividend == 0) {
1558         Q[i] = 0;
1559         remainder = 0;
1560       } else if (partial_dividend < divisor) {
1561         Q[i] = 0;
1562         remainder = Lo_32(partial_dividend);
1563       } else if (partial_dividend == divisor) {
1564         Q[i] = 1;
1565         remainder = 0;
1566       } else {
1567         Q[i] = Lo_32(partial_dividend / divisor);
1568         remainder = Lo_32(partial_dividend - (Q[i] * divisor));
1569       }
1570     }
1571     if (R)
1572       R[0] = remainder;
1573   } else {
1574     // Now we're ready to invoke the Knuth classical divide algorithm. In this
1575     // case n > 1.
1576     KnuthDiv(U, V, Q, R, m, n);
1577   }
1578 
1579   // If the caller wants the quotient
1580   if (Quotient) {
1581     for (unsigned i = 0; i < lhsWords; ++i)
1582       Quotient[i] = Make_64(Q[i*2+1], Q[i*2]);
1583   }
1584 
1585   // If the caller wants the remainder
1586   if (Remainder) {
1587     for (unsigned i = 0; i < rhsWords; ++i)
1588       Remainder[i] = Make_64(R[i*2+1], R[i*2]);
1589   }
1590 
1591   // Clean up the memory we allocated.
1592   if (U != &SPACE[0]) {
1593     delete [] U;
1594     delete [] V;
1595     delete [] Q;
1596     delete [] R;
1597   }
1598 }
1599 
1600 APInt APInt::udiv(const APInt &RHS) const {
1601   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1602 
1603   // First, deal with the easy case
1604   if (isSingleWord()) {
1605     assert(RHS.U.VAL != 0 && "Divide by zero?");
1606     return APInt(BitWidth, U.VAL / RHS.U.VAL);
1607   }
1608 
1609   // Get some facts about the LHS and RHS number of bits and words
1610   unsigned lhsWords = getNumWords(getActiveBits());
1611   unsigned rhsBits  = RHS.getActiveBits();
1612   unsigned rhsWords = getNumWords(rhsBits);
1613   assert(rhsWords && "Divided by zero???");
1614 
1615   // Deal with some degenerate cases
1616   if (!lhsWords)
1617     // 0 / X ===> 0
1618     return APInt(BitWidth, 0);
1619   if (rhsBits == 1)
1620     // X / 1 ===> X
1621     return *this;
1622   if (lhsWords < rhsWords || this->ult(RHS))
1623     // X / Y ===> 0, iff X < Y
1624     return APInt(BitWidth, 0);
1625   if (*this == RHS)
1626     // X / X ===> 1
1627     return APInt(BitWidth, 1);
1628   if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1.
1629     // All high words are zero, just use native divide
1630     return APInt(BitWidth, this->U.pVal[0] / RHS.U.pVal[0]);
1631 
1632   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1633   APInt Quotient(BitWidth, 0); // to hold result.
1634   divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal, nullptr);
1635   return Quotient;
1636 }
1637 
1638 APInt APInt::udiv(uint64_t RHS) const {
1639   assert(RHS != 0 && "Divide by zero?");
1640 
1641   // First, deal with the easy case
1642   if (isSingleWord())
1643     return APInt(BitWidth, U.VAL / RHS);
1644 
1645   // Get some facts about the LHS words.
1646   unsigned lhsWords = getNumWords(getActiveBits());
1647 
1648   // Deal with some degenerate cases
1649   if (!lhsWords)
1650     // 0 / X ===> 0
1651     return APInt(BitWidth, 0);
1652   if (RHS == 1)
1653     // X / 1 ===> X
1654     return *this;
1655   if (this->ult(RHS))
1656     // X / Y ===> 0, iff X < Y
1657     return APInt(BitWidth, 0);
1658   if (*this == RHS)
1659     // X / X ===> 1
1660     return APInt(BitWidth, 1);
1661   if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1.
1662     // All high words are zero, just use native divide
1663     return APInt(BitWidth, this->U.pVal[0] / RHS);
1664 
1665   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1666   APInt Quotient(BitWidth, 0); // to hold result.
1667   divide(U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, nullptr);
1668   return Quotient;
1669 }
1670 
1671 APInt APInt::sdiv(const APInt &RHS) const {
1672   if (isNegative()) {
1673     if (RHS.isNegative())
1674       return (-(*this)).udiv(-RHS);
1675     return -((-(*this)).udiv(RHS));
1676   }
1677   if (RHS.isNegative())
1678     return -(this->udiv(-RHS));
1679   return this->udiv(RHS);
1680 }
1681 
1682 APInt APInt::sdiv(int64_t RHS) const {
1683   if (isNegative()) {
1684     if (RHS < 0)
1685       return (-(*this)).udiv(-RHS);
1686     return -((-(*this)).udiv(RHS));
1687   }
1688   if (RHS < 0)
1689     return -(this->udiv(-RHS));
1690   return this->udiv(RHS);
1691 }
1692 
1693 APInt APInt::urem(const APInt &RHS) const {
1694   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
1695   if (isSingleWord()) {
1696     assert(RHS.U.VAL != 0 && "Remainder by zero?");
1697     return APInt(BitWidth, U.VAL % RHS.U.VAL);
1698   }
1699 
1700   // Get some facts about the LHS
1701   unsigned lhsWords = getNumWords(getActiveBits());
1702 
1703   // Get some facts about the RHS
1704   unsigned rhsBits = RHS.getActiveBits();
1705   unsigned rhsWords = getNumWords(rhsBits);
1706   assert(rhsWords && "Performing remainder operation by zero ???");
1707 
1708   // Check the degenerate cases
1709   if (lhsWords == 0)
1710     // 0 % Y ===> 0
1711     return APInt(BitWidth, 0);
1712   if (rhsBits == 1)
1713     // X % 1 ===> 0
1714     return APInt(BitWidth, 0);
1715   if (lhsWords < rhsWords || this->ult(RHS))
1716     // X % Y ===> X, iff X < Y
1717     return *this;
1718   if (*this == RHS)
1719     // X % X == 0;
1720     return APInt(BitWidth, 0);
1721   if (lhsWords == 1)
1722     // All high words are zero, just use native remainder
1723     return APInt(BitWidth, U.pVal[0] % RHS.U.pVal[0]);
1724 
1725   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1726   APInt Remainder(BitWidth, 0);
1727   divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, nullptr, Remainder.U.pVal);
1728   return Remainder;
1729 }
1730 
1731 uint64_t APInt::urem(uint64_t RHS) const {
1732   assert(RHS != 0 && "Remainder by zero?");
1733 
1734   if (isSingleWord())
1735     return U.VAL % RHS;
1736 
1737   // Get some facts about the LHS
1738   unsigned lhsWords = getNumWords(getActiveBits());
1739 
1740   // Check the degenerate cases
1741   if (lhsWords == 0)
1742     // 0 % Y ===> 0
1743     return 0;
1744   if (RHS == 1)
1745     // X % 1 ===> 0
1746     return 0;
1747   if (this->ult(RHS))
1748     // X % Y ===> X, iff X < Y
1749     return getZExtValue();
1750   if (*this == RHS)
1751     // X % X == 0;
1752     return 0;
1753   if (lhsWords == 1)
1754     // All high words are zero, just use native remainder
1755     return U.pVal[0] % RHS;
1756 
1757   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
1758   uint64_t Remainder;
1759   divide(U.pVal, lhsWords, &RHS, 1, nullptr, &Remainder);
1760   return Remainder;
1761 }
1762 
1763 APInt APInt::srem(const APInt &RHS) const {
1764   if (isNegative()) {
1765     if (RHS.isNegative())
1766       return -((-(*this)).urem(-RHS));
1767     return -((-(*this)).urem(RHS));
1768   }
1769   if (RHS.isNegative())
1770     return this->urem(-RHS);
1771   return this->urem(RHS);
1772 }
1773 
1774 int64_t APInt::srem(int64_t RHS) const {
1775   if (isNegative()) {
1776     if (RHS < 0)
1777       return -((-(*this)).urem(-RHS));
1778     return -((-(*this)).urem(RHS));
1779   }
1780   if (RHS < 0)
1781     return this->urem(-RHS);
1782   return this->urem(RHS);
1783 }
1784 
1785 void APInt::udivrem(const APInt &LHS, const APInt &RHS,
1786                     APInt &Quotient, APInt &Remainder) {
1787   assert(LHS.BitWidth == RHS.BitWidth && "Bit widths must be the same");
1788   unsigned BitWidth = LHS.BitWidth;
1789 
1790   // First, deal with the easy case
1791   if (LHS.isSingleWord()) {
1792     assert(RHS.U.VAL != 0 && "Divide by zero?");
1793     uint64_t QuotVal = LHS.U.VAL / RHS.U.VAL;
1794     uint64_t RemVal = LHS.U.VAL % RHS.U.VAL;
1795     Quotient = APInt(BitWidth, QuotVal);
1796     Remainder = APInt(BitWidth, RemVal);
1797     return;
1798   }
1799 
1800   // Get some size facts about the dividend and divisor
1801   unsigned lhsWords = getNumWords(LHS.getActiveBits());
1802   unsigned rhsBits  = RHS.getActiveBits();
1803   unsigned rhsWords = getNumWords(rhsBits);
1804   assert(rhsWords && "Performing divrem operation by zero ???");
1805 
1806   // Check the degenerate cases
1807   if (lhsWords == 0) {
1808     Quotient = APInt(BitWidth, 0);    // 0 / Y ===> 0
1809     Remainder = APInt(BitWidth, 0);   // 0 % Y ===> 0
1810     return;
1811   }
1812 
1813   if (rhsBits == 1) {
1814     Quotient = LHS;                   // X / 1 ===> X
1815     Remainder = APInt(BitWidth, 0);   // X % 1 ===> 0
1816   }
1817 
1818   if (lhsWords < rhsWords || LHS.ult(RHS)) {
1819     Remainder = LHS;                  // X % Y ===> X, iff X < Y
1820     Quotient = APInt(BitWidth, 0);    // X / Y ===> 0, iff X < Y
1821     return;
1822   }
1823 
1824   if (LHS == RHS) {
1825     Quotient  = APInt(BitWidth, 1);   // X / X ===> 1
1826     Remainder = APInt(BitWidth, 0);   // X % X ===> 0;
1827     return;
1828   }
1829 
1830   // Make sure there is enough space to hold the results.
1831   // NOTE: This assumes that reallocate won't affect any bits if it doesn't
1832   // change the size. This is necessary if Quotient or Remainder is aliased
1833   // with LHS or RHS.
1834   Quotient.reallocate(BitWidth);
1835   Remainder.reallocate(BitWidth);
1836 
1837   if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1.
1838     // There is only one word to consider so use the native versions.
1839     uint64_t lhsValue = LHS.U.pVal[0];
1840     uint64_t rhsValue = RHS.U.pVal[0];
1841     Quotient = lhsValue / rhsValue;
1842     Remainder = lhsValue % rhsValue;
1843     return;
1844   }
1845 
1846   // Okay, lets do it the long way
1847   divide(LHS.U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal,
1848          Remainder.U.pVal);
1849   // Clear the rest of the Quotient and Remainder.
1850   std::memset(Quotient.U.pVal + lhsWords, 0,
1851               (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE);
1852   std::memset(Remainder.U.pVal + rhsWords, 0,
1853               (getNumWords(BitWidth) - rhsWords) * APINT_WORD_SIZE);
1854 }
1855 
1856 void APInt::udivrem(const APInt &LHS, uint64_t RHS, APInt &Quotient,
1857                     uint64_t &Remainder) {
1858   assert(RHS != 0 && "Divide by zero?");
1859   unsigned BitWidth = LHS.BitWidth;
1860 
1861   // First, deal with the easy case
1862   if (LHS.isSingleWord()) {
1863     uint64_t QuotVal = LHS.U.VAL / RHS;
1864     Remainder = LHS.U.VAL % RHS;
1865     Quotient = APInt(BitWidth, QuotVal);
1866     return;
1867   }
1868 
1869   // Get some size facts about the dividend and divisor
1870   unsigned lhsWords = getNumWords(LHS.getActiveBits());
1871 
1872   // Check the degenerate cases
1873   if (lhsWords == 0) {
1874     Quotient = APInt(BitWidth, 0);    // 0 / Y ===> 0
1875     Remainder = 0;                    // 0 % Y ===> 0
1876     return;
1877   }
1878 
1879   if (RHS == 1) {
1880     Quotient = LHS;                   // X / 1 ===> X
1881     Remainder = 0;                    // X % 1 ===> 0
1882     return;
1883   }
1884 
1885   if (LHS.ult(RHS)) {
1886     Remainder = LHS.getZExtValue();   // X % Y ===> X, iff X < Y
1887     Quotient = APInt(BitWidth, 0);    // X / Y ===> 0, iff X < Y
1888     return;
1889   }
1890 
1891   if (LHS == RHS) {
1892     Quotient  = APInt(BitWidth, 1);   // X / X ===> 1
1893     Remainder = 0;                    // X % X ===> 0;
1894     return;
1895   }
1896 
1897   // Make sure there is enough space to hold the results.
1898   // NOTE: This assumes that reallocate won't affect any bits if it doesn't
1899   // change the size. This is necessary if Quotient is aliased with LHS.
1900   Quotient.reallocate(BitWidth);
1901 
1902   if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1.
1903     // There is only one word to consider so use the native versions.
1904     uint64_t lhsValue = LHS.U.pVal[0];
1905     Quotient = lhsValue / RHS;
1906     Remainder = lhsValue % RHS;
1907     return;
1908   }
1909 
1910   // Okay, lets do it the long way
1911   divide(LHS.U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, &Remainder);
1912   // Clear the rest of the Quotient.
1913   std::memset(Quotient.U.pVal + lhsWords, 0,
1914               (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE);
1915 }
1916 
1917 void APInt::sdivrem(const APInt &LHS, const APInt &RHS,
1918                     APInt &Quotient, APInt &Remainder) {
1919   if (LHS.isNegative()) {
1920     if (RHS.isNegative())
1921       APInt::udivrem(-LHS, -RHS, Quotient, Remainder);
1922     else {
1923       APInt::udivrem(-LHS, RHS, Quotient, Remainder);
1924       Quotient.negate();
1925     }
1926     Remainder.negate();
1927   } else if (RHS.isNegative()) {
1928     APInt::udivrem(LHS, -RHS, Quotient, Remainder);
1929     Quotient.negate();
1930   } else {
1931     APInt::udivrem(LHS, RHS, Quotient, Remainder);
1932   }
1933 }
1934 
1935 void APInt::sdivrem(const APInt &LHS, int64_t RHS,
1936                     APInt &Quotient, int64_t &Remainder) {
1937   uint64_t R = Remainder;
1938   if (LHS.isNegative()) {
1939     if (RHS < 0)
1940       APInt::udivrem(-LHS, -RHS, Quotient, R);
1941     else {
1942       APInt::udivrem(-LHS, RHS, Quotient, R);
1943       Quotient.negate();
1944     }
1945     R = -R;
1946   } else if (RHS < 0) {
1947     APInt::udivrem(LHS, -RHS, Quotient, R);
1948     Quotient.negate();
1949   } else {
1950     APInt::udivrem(LHS, RHS, Quotient, R);
1951   }
1952   Remainder = R;
1953 }
1954 
1955 APInt APInt::sadd_ov(const APInt &RHS, bool &Overflow) const {
1956   APInt Res = *this+RHS;
1957   Overflow = isNonNegative() == RHS.isNonNegative() &&
1958              Res.isNonNegative() != isNonNegative();
1959   return Res;
1960 }
1961 
1962 APInt APInt::uadd_ov(const APInt &RHS, bool &Overflow) const {
1963   APInt Res = *this+RHS;
1964   Overflow = Res.ult(RHS);
1965   return Res;
1966 }
1967 
1968 APInt APInt::ssub_ov(const APInt &RHS, bool &Overflow) const {
1969   APInt Res = *this - RHS;
1970   Overflow = isNonNegative() != RHS.isNonNegative() &&
1971              Res.isNonNegative() != isNonNegative();
1972   return Res;
1973 }
1974 
1975 APInt APInt::usub_ov(const APInt &RHS, bool &Overflow) const {
1976   APInt Res = *this-RHS;
1977   Overflow = Res.ugt(*this);
1978   return Res;
1979 }
1980 
1981 APInt APInt::sdiv_ov(const APInt &RHS, bool &Overflow) const {
1982   // MININT/-1  -->  overflow.
1983   Overflow = isMinSignedValue() && RHS.isAllOnesValue();
1984   return sdiv(RHS);
1985 }
1986 
1987 APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const {
1988   APInt Res = *this * RHS;
1989 
1990   if (*this != 0 && RHS != 0)
1991     Overflow = Res.sdiv(RHS) != *this || Res.sdiv(*this) != RHS;
1992   else
1993     Overflow = false;
1994   return Res;
1995 }
1996 
1997 APInt APInt::umul_ov(const APInt &RHS, bool &Overflow) const {
1998   if (countLeadingZeros() + RHS.countLeadingZeros() + 2 <= BitWidth) {
1999     Overflow = true;
2000     return *this * RHS;
2001   }
2002 
2003   APInt Res = lshr(1) * RHS;
2004   Overflow = Res.isNegative();
2005   Res <<= 1;
2006   if ((*this)[0]) {
2007     Res += RHS;
2008     if (Res.ult(RHS))
2009       Overflow = true;
2010   }
2011   return Res;
2012 }
2013 
2014 APInt APInt::sshl_ov(const APInt &ShAmt, bool &Overflow) const {
2015   Overflow = ShAmt.uge(getBitWidth());
2016   if (Overflow)
2017     return APInt(BitWidth, 0);
2018 
2019   if (isNonNegative()) // Don't allow sign change.
2020     Overflow = ShAmt.uge(countLeadingZeros());
2021   else
2022     Overflow = ShAmt.uge(countLeadingOnes());
2023 
2024   return *this << ShAmt;
2025 }
2026 
2027 APInt APInt::ushl_ov(const APInt &ShAmt, bool &Overflow) const {
2028   Overflow = ShAmt.uge(getBitWidth());
2029   if (Overflow)
2030     return APInt(BitWidth, 0);
2031 
2032   Overflow = ShAmt.ugt(countLeadingZeros());
2033 
2034   return *this << ShAmt;
2035 }
2036 
2037 APInt APInt::sadd_sat(const APInt &RHS) const {
2038   bool Overflow;
2039   APInt Res = sadd_ov(RHS, Overflow);
2040   if (!Overflow)
2041     return Res;
2042 
2043   return isNegative() ? APInt::getSignedMinValue(BitWidth)
2044                       : APInt::getSignedMaxValue(BitWidth);
2045 }
2046 
2047 APInt APInt::uadd_sat(const APInt &RHS) const {
2048   bool Overflow;
2049   APInt Res = uadd_ov(RHS, Overflow);
2050   if (!Overflow)
2051     return Res;
2052 
2053   return APInt::getMaxValue(BitWidth);
2054 }
2055 
2056 APInt APInt::ssub_sat(const APInt &RHS) const {
2057   bool Overflow;
2058   APInt Res = ssub_ov(RHS, Overflow);
2059   if (!Overflow)
2060     return Res;
2061 
2062   return isNegative() ? APInt::getSignedMinValue(BitWidth)
2063                       : APInt::getSignedMaxValue(BitWidth);
2064 }
2065 
2066 APInt APInt::usub_sat(const APInt &RHS) const {
2067   bool Overflow;
2068   APInt Res = usub_ov(RHS, Overflow);
2069   if (!Overflow)
2070     return Res;
2071 
2072   return APInt(BitWidth, 0);
2073 }
2074 
2075 APInt APInt::smul_sat(const APInt &RHS) const {
2076   bool Overflow;
2077   APInt Res = smul_ov(RHS, Overflow);
2078   if (!Overflow)
2079     return Res;
2080 
2081   // The result is negative if one and only one of inputs is negative.
2082   bool ResIsNegative = isNegative() ^ RHS.isNegative();
2083 
2084   return ResIsNegative ? APInt::getSignedMinValue(BitWidth)
2085                        : APInt::getSignedMaxValue(BitWidth);
2086 }
2087 
2088 APInt APInt::umul_sat(const APInt &RHS) const {
2089   bool Overflow;
2090   APInt Res = umul_ov(RHS, Overflow);
2091   if (!Overflow)
2092     return Res;
2093 
2094   return APInt::getMaxValue(BitWidth);
2095 }
2096 
2097 APInt APInt::sshl_sat(const APInt &RHS) const {
2098   bool Overflow;
2099   APInt Res = sshl_ov(RHS, Overflow);
2100   if (!Overflow)
2101     return Res;
2102 
2103   return isNegative() ? APInt::getSignedMinValue(BitWidth)
2104                       : APInt::getSignedMaxValue(BitWidth);
2105 }
2106 
2107 APInt APInt::ushl_sat(const APInt &RHS) const {
2108   bool Overflow;
2109   APInt Res = ushl_ov(RHS, Overflow);
2110   if (!Overflow)
2111     return Res;
2112 
2113   return APInt::getMaxValue(BitWidth);
2114 }
2115 
2116 void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
2117   // Check our assumptions here
2118   assert(!str.empty() && "Invalid string length");
2119   assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
2120           radix == 36) &&
2121          "Radix should be 2, 8, 10, 16, or 36!");
2122 
2123   StringRef::iterator p = str.begin();
2124   size_t slen = str.size();
2125   bool isNeg = *p == '-';
2126   if (*p == '-' || *p == '+') {
2127     p++;
2128     slen--;
2129     assert(slen && "String is only a sign, needs a value.");
2130   }
2131   assert((slen <= numbits || radix != 2) && "Insufficient bit width");
2132   assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width");
2133   assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width");
2134   assert((((slen-1)*64)/22 <= numbits || radix != 10) &&
2135          "Insufficient bit width");
2136 
2137   // Allocate memory if needed
2138   if (isSingleWord())
2139     U.VAL = 0;
2140   else
2141     U.pVal = getClearedMemory(getNumWords());
2142 
2143   // Figure out if we can shift instead of multiply
2144   unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0);
2145 
2146   // Enter digit traversal loop
2147   for (StringRef::iterator e = str.end(); p != e; ++p) {
2148     unsigned digit = getDigit(*p, radix);
2149     assert(digit < radix && "Invalid character in digit string");
2150 
2151     // Shift or multiply the value by the radix
2152     if (slen > 1) {
2153       if (shift)
2154         *this <<= shift;
2155       else
2156         *this *= radix;
2157     }
2158 
2159     // Add in the digit we just interpreted
2160     *this += digit;
2161   }
2162   // If its negative, put it in two's complement form
2163   if (isNeg)
2164     this->negate();
2165 }
2166 
2167 void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
2168                      bool Signed, bool formatAsCLiteral) const {
2169   assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2 ||
2170           Radix == 36) &&
2171          "Radix should be 2, 8, 10, 16, or 36!");
2172 
2173   const char *Prefix = "";
2174   if (formatAsCLiteral) {
2175     switch (Radix) {
2176       case 2:
2177         // Binary literals are a non-standard extension added in gcc 4.3:
2178         // http://gcc.gnu.org/onlinedocs/gcc-4.3.0/gcc/Binary-constants.html
2179         Prefix = "0b";
2180         break;
2181       case 8:
2182         Prefix = "0";
2183         break;
2184       case 10:
2185         break; // No prefix
2186       case 16:
2187         Prefix = "0x";
2188         break;
2189       default:
2190         llvm_unreachable("Invalid radix!");
2191     }
2192   }
2193 
2194   // First, check for a zero value and just short circuit the logic below.
2195   if (*this == 0) {
2196     while (*Prefix) {
2197       Str.push_back(*Prefix);
2198       ++Prefix;
2199     };
2200     Str.push_back('0');
2201     return;
2202   }
2203 
2204   static const char Digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
2205 
2206   if (isSingleWord()) {
2207     char Buffer[65];
2208     char *BufPtr = std::end(Buffer);
2209 
2210     uint64_t N;
2211     if (!Signed) {
2212       N = getZExtValue();
2213     } else {
2214       int64_t I = getSExtValue();
2215       if (I >= 0) {
2216         N = I;
2217       } else {
2218         Str.push_back('-');
2219         N = -(uint64_t)I;
2220       }
2221     }
2222 
2223     while (*Prefix) {
2224       Str.push_back(*Prefix);
2225       ++Prefix;
2226     };
2227 
2228     while (N) {
2229       *--BufPtr = Digits[N % Radix];
2230       N /= Radix;
2231     }
2232     Str.append(BufPtr, std::end(Buffer));
2233     return;
2234   }
2235 
2236   APInt Tmp(*this);
2237 
2238   if (Signed && isNegative()) {
2239     // They want to print the signed version and it is a negative value
2240     // Flip the bits and add one to turn it into the equivalent positive
2241     // value and put a '-' in the result.
2242     Tmp.negate();
2243     Str.push_back('-');
2244   }
2245 
2246   while (*Prefix) {
2247     Str.push_back(*Prefix);
2248     ++Prefix;
2249   };
2250 
2251   // We insert the digits backward, then reverse them to get the right order.
2252   unsigned StartDig = Str.size();
2253 
2254   // For the 2, 8 and 16 bit cases, we can just shift instead of divide
2255   // because the number of bits per digit (1, 3 and 4 respectively) divides
2256   // equally.  We just shift until the value is zero.
2257   if (Radix == 2 || Radix == 8 || Radix == 16) {
2258     // Just shift tmp right for each digit width until it becomes zero
2259     unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1));
2260     unsigned MaskAmt = Radix - 1;
2261 
2262     while (Tmp.getBoolValue()) {
2263       unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt;
2264       Str.push_back(Digits[Digit]);
2265       Tmp.lshrInPlace(ShiftAmt);
2266     }
2267   } else {
2268     while (Tmp.getBoolValue()) {
2269       uint64_t Digit;
2270       udivrem(Tmp, Radix, Tmp, Digit);
2271       assert(Digit < Radix && "divide failed");
2272       Str.push_back(Digits[Digit]);
2273     }
2274   }
2275 
2276   // Reverse the digits before returning.
2277   std::reverse(Str.begin()+StartDig, Str.end());
2278 }
2279 
2280 /// Returns the APInt as a std::string. Note that this is an inefficient method.
2281 /// It is better to pass in a SmallVector/SmallString to the methods above.
2282 std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const {
2283   SmallString<40> S;
2284   toString(S, Radix, Signed, /* formatAsCLiteral = */false);
2285   return std::string(S.str());
2286 }
2287 
2288 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2289 LLVM_DUMP_METHOD void APInt::dump() const {
2290   SmallString<40> S, U;
2291   this->toStringUnsigned(U);
2292   this->toStringSigned(S);
2293   dbgs() << "APInt(" << BitWidth << "b, "
2294          << U << "u " << S << "s)\n";
2295 }
2296 #endif
2297 
2298 void APInt::print(raw_ostream &OS, bool isSigned) const {
2299   SmallString<40> S;
2300   this->toString(S, 10, isSigned, /* formatAsCLiteral = */false);
2301   OS << S;
2302 }
2303 
2304 // This implements a variety of operations on a representation of
2305 // arbitrary precision, two's-complement, bignum integer values.
2306 
2307 // Assumed by lowHalf, highHalf, partMSB and partLSB.  A fairly safe
2308 // and unrestricting assumption.
2309 static_assert(APInt::APINT_BITS_PER_WORD % 2 == 0,
2310               "Part width must be divisible by 2!");
2311 
2312 /* Some handy functions local to this file.  */
2313 
2314 /* Returns the integer part with the least significant BITS set.
2315    BITS cannot be zero.  */
2316 static inline APInt::WordType lowBitMask(unsigned bits) {
2317   assert(bits != 0 && bits <= APInt::APINT_BITS_PER_WORD);
2318 
2319   return ~(APInt::WordType) 0 >> (APInt::APINT_BITS_PER_WORD - bits);
2320 }
2321 
2322 /* Returns the value of the lower half of PART.  */
2323 static inline APInt::WordType lowHalf(APInt::WordType part) {
2324   return part & lowBitMask(APInt::APINT_BITS_PER_WORD / 2);
2325 }
2326 
2327 /* Returns the value of the upper half of PART.  */
2328 static inline APInt::WordType highHalf(APInt::WordType part) {
2329   return part >> (APInt::APINT_BITS_PER_WORD / 2);
2330 }
2331 
2332 /* Returns the bit number of the most significant set bit of a part.
2333    If the input number has no bits set -1U is returned.  */
2334 static unsigned partMSB(APInt::WordType value) {
2335   return findLastSet(value, ZB_Max);
2336 }
2337 
2338 /* Returns the bit number of the least significant set bit of a
2339    part.  If the input number has no bits set -1U is returned.  */
2340 static unsigned partLSB(APInt::WordType value) {
2341   return findFirstSet(value, ZB_Max);
2342 }
2343 
2344 /* Sets the least significant part of a bignum to the input value, and
2345    zeroes out higher parts.  */
2346 void APInt::tcSet(WordType *dst, WordType part, unsigned parts) {
2347   assert(parts > 0);
2348 
2349   dst[0] = part;
2350   for (unsigned i = 1; i < parts; i++)
2351     dst[i] = 0;
2352 }
2353 
2354 /* Assign one bignum to another.  */
2355 void APInt::tcAssign(WordType *dst, const WordType *src, unsigned parts) {
2356   for (unsigned i = 0; i < parts; i++)
2357     dst[i] = src[i];
2358 }
2359 
2360 /* Returns true if a bignum is zero, false otherwise.  */
2361 bool APInt::tcIsZero(const WordType *src, unsigned parts) {
2362   for (unsigned i = 0; i < parts; i++)
2363     if (src[i])
2364       return false;
2365 
2366   return true;
2367 }
2368 
2369 /* Extract the given bit of a bignum; returns 0 or 1.  */
2370 int APInt::tcExtractBit(const WordType *parts, unsigned bit) {
2371   return (parts[whichWord(bit)] & maskBit(bit)) != 0;
2372 }
2373 
2374 /* Set the given bit of a bignum. */
2375 void APInt::tcSetBit(WordType *parts, unsigned bit) {
2376   parts[whichWord(bit)] |= maskBit(bit);
2377 }
2378 
2379 /* Clears the given bit of a bignum. */
2380 void APInt::tcClearBit(WordType *parts, unsigned bit) {
2381   parts[whichWord(bit)] &= ~maskBit(bit);
2382 }
2383 
2384 /* Returns the bit number of the least significant set bit of a
2385    number.  If the input number has no bits set -1U is returned.  */
2386 unsigned APInt::tcLSB(const WordType *parts, unsigned n) {
2387   for (unsigned i = 0; i < n; i++) {
2388     if (parts[i] != 0) {
2389       unsigned lsb = partLSB(parts[i]);
2390 
2391       return lsb + i * APINT_BITS_PER_WORD;
2392     }
2393   }
2394 
2395   return -1U;
2396 }
2397 
2398 /* Returns the bit number of the most significant set bit of a number.
2399    If the input number has no bits set -1U is returned.  */
2400 unsigned APInt::tcMSB(const WordType *parts, unsigned n) {
2401   do {
2402     --n;
2403 
2404     if (parts[n] != 0) {
2405       unsigned msb = partMSB(parts[n]);
2406 
2407       return msb + n * APINT_BITS_PER_WORD;
2408     }
2409   } while (n);
2410 
2411   return -1U;
2412 }
2413 
2414 /* Copy the bit vector of width srcBITS from SRC, starting at bit
2415    srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB becomes
2416    the least significant bit of DST.  All high bits above srcBITS in
2417    DST are zero-filled.  */
2418 void
2419 APInt::tcExtract(WordType *dst, unsigned dstCount, const WordType *src,
2420                  unsigned srcBits, unsigned srcLSB) {
2421   unsigned dstParts = (srcBits + APINT_BITS_PER_WORD - 1) / APINT_BITS_PER_WORD;
2422   assert(dstParts <= dstCount);
2423 
2424   unsigned firstSrcPart = srcLSB / APINT_BITS_PER_WORD;
2425   tcAssign (dst, src + firstSrcPart, dstParts);
2426 
2427   unsigned shift = srcLSB % APINT_BITS_PER_WORD;
2428   tcShiftRight (dst, dstParts, shift);
2429 
2430   /* We now have (dstParts * APINT_BITS_PER_WORD - shift) bits from SRC
2431      in DST.  If this is less that srcBits, append the rest, else
2432      clear the high bits.  */
2433   unsigned n = dstParts * APINT_BITS_PER_WORD - shift;
2434   if (n < srcBits) {
2435     WordType mask = lowBitMask (srcBits - n);
2436     dst[dstParts - 1] |= ((src[firstSrcPart + dstParts] & mask)
2437                           << n % APINT_BITS_PER_WORD);
2438   } else if (n > srcBits) {
2439     if (srcBits % APINT_BITS_PER_WORD)
2440       dst[dstParts - 1] &= lowBitMask (srcBits % APINT_BITS_PER_WORD);
2441   }
2442 
2443   /* Clear high parts.  */
2444   while (dstParts < dstCount)
2445     dst[dstParts++] = 0;
2446 }
2447 
2448 /* DST += RHS + C where C is zero or one.  Returns the carry flag.  */
2449 APInt::WordType APInt::tcAdd(WordType *dst, const WordType *rhs,
2450                              WordType c, unsigned parts) {
2451   assert(c <= 1);
2452 
2453   for (unsigned i = 0; i < parts; i++) {
2454     WordType l = dst[i];
2455     if (c) {
2456       dst[i] += rhs[i] + 1;
2457       c = (dst[i] <= l);
2458     } else {
2459       dst[i] += rhs[i];
2460       c = (dst[i] < l);
2461     }
2462   }
2463 
2464   return c;
2465 }
2466 
2467 /// This function adds a single "word" integer, src, to the multiple
2468 /// "word" integer array, dst[]. dst[] is modified to reflect the addition and
2469 /// 1 is returned if there is a carry out, otherwise 0 is returned.
2470 /// @returns the carry of the addition.
2471 APInt::WordType APInt::tcAddPart(WordType *dst, WordType src,
2472                                  unsigned parts) {
2473   for (unsigned i = 0; i < parts; ++i) {
2474     dst[i] += src;
2475     if (dst[i] >= src)
2476       return 0; // No need to carry so exit early.
2477     src = 1; // Carry one to next digit.
2478   }
2479 
2480   return 1;
2481 }
2482 
2483 /* DST -= RHS + C where C is zero or one.  Returns the carry flag.  */
2484 APInt::WordType APInt::tcSubtract(WordType *dst, const WordType *rhs,
2485                                   WordType c, unsigned parts) {
2486   assert(c <= 1);
2487 
2488   for (unsigned i = 0; i < parts; i++) {
2489     WordType l = dst[i];
2490     if (c) {
2491       dst[i] -= rhs[i] + 1;
2492       c = (dst[i] >= l);
2493     } else {
2494       dst[i] -= rhs[i];
2495       c = (dst[i] > l);
2496     }
2497   }
2498 
2499   return c;
2500 }
2501 
2502 /// This function subtracts a single "word" (64-bit word), src, from
2503 /// the multi-word integer array, dst[], propagating the borrowed 1 value until
2504 /// no further borrowing is needed or it runs out of "words" in dst.  The result
2505 /// is 1 if "borrowing" exhausted the digits in dst, or 0 if dst was not
2506 /// exhausted. In other words, if src > dst then this function returns 1,
2507 /// otherwise 0.
2508 /// @returns the borrow out of the subtraction
2509 APInt::WordType APInt::tcSubtractPart(WordType *dst, WordType src,
2510                                       unsigned parts) {
2511   for (unsigned i = 0; i < parts; ++i) {
2512     WordType Dst = dst[i];
2513     dst[i] -= src;
2514     if (src <= Dst)
2515       return 0; // No need to borrow so exit early.
2516     src = 1; // We have to "borrow 1" from next "word"
2517   }
2518 
2519   return 1;
2520 }
2521 
2522 /* Negate a bignum in-place.  */
2523 void APInt::tcNegate(WordType *dst, unsigned parts) {
2524   tcComplement(dst, parts);
2525   tcIncrement(dst, parts);
2526 }
2527 
2528 /*  DST += SRC * MULTIPLIER + CARRY   if add is true
2529     DST  = SRC * MULTIPLIER + CARRY   if add is false
2530 
2531     Requires 0 <= DSTPARTS <= SRCPARTS + 1.  If DST overlaps SRC
2532     they must start at the same point, i.e. DST == SRC.
2533 
2534     If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is
2535     returned.  Otherwise DST is filled with the least significant
2536     DSTPARTS parts of the result, and if all of the omitted higher
2537     parts were zero return zero, otherwise overflow occurred and
2538     return one.  */
2539 int APInt::tcMultiplyPart(WordType *dst, const WordType *src,
2540                           WordType multiplier, WordType carry,
2541                           unsigned srcParts, unsigned dstParts,
2542                           bool add) {
2543   /* Otherwise our writes of DST kill our later reads of SRC.  */
2544   assert(dst <= src || dst >= src + srcParts);
2545   assert(dstParts <= srcParts + 1);
2546 
2547   /* N loops; minimum of dstParts and srcParts.  */
2548   unsigned n = std::min(dstParts, srcParts);
2549 
2550   for (unsigned i = 0; i < n; i++) {
2551     WordType low, mid, high, srcPart;
2552 
2553       /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY.
2554 
2555          This cannot overflow, because
2556 
2557          (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1)
2558 
2559          which is less than n^2.  */
2560 
2561     srcPart = src[i];
2562 
2563     if (multiplier == 0 || srcPart == 0) {
2564       low = carry;
2565       high = 0;
2566     } else {
2567       low = lowHalf(srcPart) * lowHalf(multiplier);
2568       high = highHalf(srcPart) * highHalf(multiplier);
2569 
2570       mid = lowHalf(srcPart) * highHalf(multiplier);
2571       high += highHalf(mid);
2572       mid <<= APINT_BITS_PER_WORD / 2;
2573       if (low + mid < low)
2574         high++;
2575       low += mid;
2576 
2577       mid = highHalf(srcPart) * lowHalf(multiplier);
2578       high += highHalf(mid);
2579       mid <<= APINT_BITS_PER_WORD / 2;
2580       if (low + mid < low)
2581         high++;
2582       low += mid;
2583 
2584       /* Now add carry.  */
2585       if (low + carry < low)
2586         high++;
2587       low += carry;
2588     }
2589 
2590     if (add) {
2591       /* And now DST[i], and store the new low part there.  */
2592       if (low + dst[i] < low)
2593         high++;
2594       dst[i] += low;
2595     } else
2596       dst[i] = low;
2597 
2598     carry = high;
2599   }
2600 
2601   if (srcParts < dstParts) {
2602     /* Full multiplication, there is no overflow.  */
2603     assert(srcParts + 1 == dstParts);
2604     dst[srcParts] = carry;
2605     return 0;
2606   }
2607 
2608   /* We overflowed if there is carry.  */
2609   if (carry)
2610     return 1;
2611 
2612   /* We would overflow if any significant unwritten parts would be
2613      non-zero.  This is true if any remaining src parts are non-zero
2614      and the multiplier is non-zero.  */
2615   if (multiplier)
2616     for (unsigned i = dstParts; i < srcParts; i++)
2617       if (src[i])
2618         return 1;
2619 
2620   /* We fitted in the narrow destination.  */
2621   return 0;
2622 }
2623 
2624 /* DST = LHS * RHS, where DST has the same width as the operands and
2625    is filled with the least significant parts of the result.  Returns
2626    one if overflow occurred, otherwise zero.  DST must be disjoint
2627    from both operands.  */
2628 int APInt::tcMultiply(WordType *dst, const WordType *lhs,
2629                       const WordType *rhs, unsigned parts) {
2630   assert(dst != lhs && dst != rhs);
2631 
2632   int overflow = 0;
2633   tcSet(dst, 0, parts);
2634 
2635   for (unsigned i = 0; i < parts; i++)
2636     overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts,
2637                                parts - i, true);
2638 
2639   return overflow;
2640 }
2641 
2642 /// DST = LHS * RHS, where DST has width the sum of the widths of the
2643 /// operands. No overflow occurs. DST must be disjoint from both operands.
2644 void APInt::tcFullMultiply(WordType *dst, const WordType *lhs,
2645                            const WordType *rhs, unsigned lhsParts,
2646                            unsigned rhsParts) {
2647   /* Put the narrower number on the LHS for less loops below.  */
2648   if (lhsParts > rhsParts)
2649     return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts);
2650 
2651   assert(dst != lhs && dst != rhs);
2652 
2653   tcSet(dst, 0, rhsParts);
2654 
2655   for (unsigned i = 0; i < lhsParts; i++)
2656     tcMultiplyPart(&dst[i], rhs, lhs[i], 0, rhsParts, rhsParts + 1, true);
2657 }
2658 
2659 /* If RHS is zero LHS and REMAINDER are left unchanged, return one.
2660    Otherwise set LHS to LHS / RHS with the fractional part discarded,
2661    set REMAINDER to the remainder, return zero.  i.e.
2662 
2663    OLD_LHS = RHS * LHS + REMAINDER
2664 
2665    SCRATCH is a bignum of the same size as the operands and result for
2666    use by the routine; its contents need not be initialized and are
2667    destroyed.  LHS, REMAINDER and SCRATCH must be distinct.
2668 */
2669 int APInt::tcDivide(WordType *lhs, const WordType *rhs,
2670                     WordType *remainder, WordType *srhs,
2671                     unsigned parts) {
2672   assert(lhs != remainder && lhs != srhs && remainder != srhs);
2673 
2674   unsigned shiftCount = tcMSB(rhs, parts) + 1;
2675   if (shiftCount == 0)
2676     return true;
2677 
2678   shiftCount = parts * APINT_BITS_PER_WORD - shiftCount;
2679   unsigned n = shiftCount / APINT_BITS_PER_WORD;
2680   WordType mask = (WordType) 1 << (shiftCount % APINT_BITS_PER_WORD);
2681 
2682   tcAssign(srhs, rhs, parts);
2683   tcShiftLeft(srhs, parts, shiftCount);
2684   tcAssign(remainder, lhs, parts);
2685   tcSet(lhs, 0, parts);
2686 
2687   /* Loop, subtracting SRHS if REMAINDER is greater and adding that to
2688      the total.  */
2689   for (;;) {
2690     int compare = tcCompare(remainder, srhs, parts);
2691     if (compare >= 0) {
2692       tcSubtract(remainder, srhs, 0, parts);
2693       lhs[n] |= mask;
2694     }
2695 
2696     if (shiftCount == 0)
2697       break;
2698     shiftCount--;
2699     tcShiftRight(srhs, parts, 1);
2700     if ((mask >>= 1) == 0) {
2701       mask = (WordType) 1 << (APINT_BITS_PER_WORD - 1);
2702       n--;
2703     }
2704   }
2705 
2706   return false;
2707 }
2708 
2709 /// Shift a bignum left Cound bits in-place. Shifted in bits are zero. There are
2710 /// no restrictions on Count.
2711 void APInt::tcShiftLeft(WordType *Dst, unsigned Words, unsigned Count) {
2712   // Don't bother performing a no-op shift.
2713   if (!Count)
2714     return;
2715 
2716   // WordShift is the inter-part shift; BitShift is the intra-part shift.
2717   unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
2718   unsigned BitShift = Count % APINT_BITS_PER_WORD;
2719 
2720   // Fastpath for moving by whole words.
2721   if (BitShift == 0) {
2722     std::memmove(Dst + WordShift, Dst, (Words - WordShift) * APINT_WORD_SIZE);
2723   } else {
2724     while (Words-- > WordShift) {
2725       Dst[Words] = Dst[Words - WordShift] << BitShift;
2726       if (Words > WordShift)
2727         Dst[Words] |=
2728           Dst[Words - WordShift - 1] >> (APINT_BITS_PER_WORD - BitShift);
2729     }
2730   }
2731 
2732   // Fill in the remainder with 0s.
2733   std::memset(Dst, 0, WordShift * APINT_WORD_SIZE);
2734 }
2735 
2736 /// Shift a bignum right Count bits in-place. Shifted in bits are zero. There
2737 /// are no restrictions on Count.
2738 void APInt::tcShiftRight(WordType *Dst, unsigned Words, unsigned Count) {
2739   // Don't bother performing a no-op shift.
2740   if (!Count)
2741     return;
2742 
2743   // WordShift is the inter-part shift; BitShift is the intra-part shift.
2744   unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
2745   unsigned BitShift = Count % APINT_BITS_PER_WORD;
2746 
2747   unsigned WordsToMove = Words - WordShift;
2748   // Fastpath for moving by whole words.
2749   if (BitShift == 0) {
2750     std::memmove(Dst, Dst + WordShift, WordsToMove * APINT_WORD_SIZE);
2751   } else {
2752     for (unsigned i = 0; i != WordsToMove; ++i) {
2753       Dst[i] = Dst[i + WordShift] >> BitShift;
2754       if (i + 1 != WordsToMove)
2755         Dst[i] |= Dst[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift);
2756     }
2757   }
2758 
2759   // Fill in the remainder with 0s.
2760   std::memset(Dst + WordsToMove, 0, WordShift * APINT_WORD_SIZE);
2761 }
2762 
2763 /* Bitwise and of two bignums.  */
2764 void APInt::tcAnd(WordType *dst, const WordType *rhs, unsigned parts) {
2765   for (unsigned i = 0; i < parts; i++)
2766     dst[i] &= rhs[i];
2767 }
2768 
2769 /* Bitwise inclusive or of two bignums.  */
2770 void APInt::tcOr(WordType *dst, const WordType *rhs, unsigned parts) {
2771   for (unsigned i = 0; i < parts; i++)
2772     dst[i] |= rhs[i];
2773 }
2774 
2775 /* Bitwise exclusive or of two bignums.  */
2776 void APInt::tcXor(WordType *dst, const WordType *rhs, unsigned parts) {
2777   for (unsigned i = 0; i < parts; i++)
2778     dst[i] ^= rhs[i];
2779 }
2780 
2781 /* Complement a bignum in-place.  */
2782 void APInt::tcComplement(WordType *dst, unsigned parts) {
2783   for (unsigned i = 0; i < parts; i++)
2784     dst[i] = ~dst[i];
2785 }
2786 
2787 /* Comparison (unsigned) of two bignums.  */
2788 int APInt::tcCompare(const WordType *lhs, const WordType *rhs,
2789                      unsigned parts) {
2790   while (parts) {
2791     parts--;
2792     if (lhs[parts] != rhs[parts])
2793       return (lhs[parts] > rhs[parts]) ? 1 : -1;
2794   }
2795 
2796   return 0;
2797 }
2798 
2799 /* Set the least significant BITS bits of a bignum, clear the
2800    rest.  */
2801 void APInt::tcSetLeastSignificantBits(WordType *dst, unsigned parts,
2802                                       unsigned bits) {
2803   unsigned i = 0;
2804   while (bits > APINT_BITS_PER_WORD) {
2805     dst[i++] = ~(WordType) 0;
2806     bits -= APINT_BITS_PER_WORD;
2807   }
2808 
2809   if (bits)
2810     dst[i++] = ~(WordType) 0 >> (APINT_BITS_PER_WORD - bits);
2811 
2812   while (i < parts)
2813     dst[i++] = 0;
2814 }
2815 
2816 APInt llvm::APIntOps::RoundingUDiv(const APInt &A, const APInt &B,
2817                                    APInt::Rounding RM) {
2818   // Currently udivrem always rounds down.
2819   switch (RM) {
2820   case APInt::Rounding::DOWN:
2821   case APInt::Rounding::TOWARD_ZERO:
2822     return A.udiv(B);
2823   case APInt::Rounding::UP: {
2824     APInt Quo, Rem;
2825     APInt::udivrem(A, B, Quo, Rem);
2826     if (Rem == 0)
2827       return Quo;
2828     return Quo + 1;
2829   }
2830   }
2831   llvm_unreachable("Unknown APInt::Rounding enum");
2832 }
2833 
2834 APInt llvm::APIntOps::RoundingSDiv(const APInt &A, const APInt &B,
2835                                    APInt::Rounding RM) {
2836   switch (RM) {
2837   case APInt::Rounding::DOWN:
2838   case APInt::Rounding::UP: {
2839     APInt Quo, Rem;
2840     APInt::sdivrem(A, B, Quo, Rem);
2841     if (Rem == 0)
2842       return Quo;
2843     // This algorithm deals with arbitrary rounding mode used by sdivrem.
2844     // We want to check whether the non-integer part of the mathematical value
2845     // is negative or not. If the non-integer part is negative, we need to round
2846     // down from Quo; otherwise, if it's positive or 0, we return Quo, as it's
2847     // already rounded down.
2848     if (RM == APInt::Rounding::DOWN) {
2849       if (Rem.isNegative() != B.isNegative())
2850         return Quo - 1;
2851       return Quo;
2852     }
2853     if (Rem.isNegative() != B.isNegative())
2854       return Quo;
2855     return Quo + 1;
2856   }
2857   // Currently sdiv rounds towards zero.
2858   case APInt::Rounding::TOWARD_ZERO:
2859     return A.sdiv(B);
2860   }
2861   llvm_unreachable("Unknown APInt::Rounding enum");
2862 }
2863 
2864 Optional<APInt>
2865 llvm::APIntOps::SolveQuadraticEquationWrap(APInt A, APInt B, APInt C,
2866                                            unsigned RangeWidth) {
2867   unsigned CoeffWidth = A.getBitWidth();
2868   assert(CoeffWidth == B.getBitWidth() && CoeffWidth == C.getBitWidth());
2869   assert(RangeWidth <= CoeffWidth &&
2870          "Value range width should be less than coefficient width");
2871   assert(RangeWidth > 1 && "Value range bit width should be > 1");
2872 
2873   LLVM_DEBUG(dbgs() << __func__ << ": solving " << A << "x^2 + " << B
2874                     << "x + " << C << ", rw:" << RangeWidth << '\n');
2875 
2876   // Identify 0 as a (non)solution immediately.
2877   if (C.sextOrTrunc(RangeWidth).isNullValue() ) {
2878     LLVM_DEBUG(dbgs() << __func__ << ": zero solution\n");
2879     return APInt(CoeffWidth, 0);
2880   }
2881 
2882   // The result of APInt arithmetic has the same bit width as the operands,
2883   // so it can actually lose high bits. A product of two n-bit integers needs
2884   // 2n-1 bits to represent the full value.
2885   // The operation done below (on quadratic coefficients) that can produce
2886   // the largest value is the evaluation of the equation during bisection,
2887   // which needs 3 times the bitwidth of the coefficient, so the total number
2888   // of required bits is 3n.
2889   //
2890   // The purpose of this extension is to simulate the set Z of all integers,
2891   // where n+1 > n for all n in Z. In Z it makes sense to talk about positive
2892   // and negative numbers (not so much in a modulo arithmetic). The method
2893   // used to solve the equation is based on the standard formula for real
2894   // numbers, and uses the concepts of "positive" and "negative" with their
2895   // usual meanings.
2896   CoeffWidth *= 3;
2897   A = A.sext(CoeffWidth);
2898   B = B.sext(CoeffWidth);
2899   C = C.sext(CoeffWidth);
2900 
2901   // Make A > 0 for simplicity. Negate cannot overflow at this point because
2902   // the bit width has increased.
2903   if (A.isNegative()) {
2904     A.negate();
2905     B.negate();
2906     C.negate();
2907   }
2908 
2909   // Solving an equation q(x) = 0 with coefficients in modular arithmetic
2910   // is really solving a set of equations q(x) = kR for k = 0, 1, 2, ...,
2911   // and R = 2^BitWidth.
2912   // Since we're trying not only to find exact solutions, but also values
2913   // that "wrap around", such a set will always have a solution, i.e. an x
2914   // that satisfies at least one of the equations, or such that |q(x)|
2915   // exceeds kR, while |q(x-1)| for the same k does not.
2916   //
2917   // We need to find a value k, such that Ax^2 + Bx + C = kR will have a
2918   // positive solution n (in the above sense), and also such that the n
2919   // will be the least among all solutions corresponding to k = 0, 1, ...
2920   // (more precisely, the least element in the set
2921   //   { n(k) | k is such that a solution n(k) exists }).
2922   //
2923   // Consider the parabola (over real numbers) that corresponds to the
2924   // quadratic equation. Since A > 0, the arms of the parabola will point
2925   // up. Picking different values of k will shift it up and down by R.
2926   //
2927   // We want to shift the parabola in such a way as to reduce the problem
2928   // of solving q(x) = kR to solving shifted_q(x) = 0.
2929   // (The interesting solutions are the ceilings of the real number
2930   // solutions.)
2931   APInt R = APInt::getOneBitSet(CoeffWidth, RangeWidth);
2932   APInt TwoA = 2 * A;
2933   APInt SqrB = B * B;
2934   bool PickLow;
2935 
2936   auto RoundUp = [] (const APInt &V, const APInt &A) -> APInt {
2937     assert(A.isStrictlyPositive());
2938     APInt T = V.abs().urem(A);
2939     if (T.isNullValue())
2940       return V;
2941     return V.isNegative() ? V+T : V+(A-T);
2942   };
2943 
2944   // The vertex of the parabola is at -B/2A, but since A > 0, it's negative
2945   // iff B is positive.
2946   if (B.isNonNegative()) {
2947     // If B >= 0, the vertex it at a negative location (or at 0), so in
2948     // order to have a non-negative solution we need to pick k that makes
2949     // C-kR negative. To satisfy all the requirements for the solution
2950     // that we are looking for, it needs to be closest to 0 of all k.
2951     C = C.srem(R);
2952     if (C.isStrictlyPositive())
2953       C -= R;
2954     // Pick the greater solution.
2955     PickLow = false;
2956   } else {
2957     // If B < 0, the vertex is at a positive location. For any solution
2958     // to exist, the discriminant must be non-negative. This means that
2959     // C-kR <= B^2/4A is a necessary condition for k, i.e. there is a
2960     // lower bound on values of k: kR >= C - B^2/4A.
2961     APInt LowkR = C - SqrB.udiv(2*TwoA); // udiv because all values > 0.
2962     // Round LowkR up (towards +inf) to the nearest kR.
2963     LowkR = RoundUp(LowkR, R);
2964 
2965     // If there exists k meeting the condition above, and such that
2966     // C-kR > 0, there will be two positive real number solutions of
2967     // q(x) = kR. Out of all such values of k, pick the one that makes
2968     // C-kR closest to 0, (i.e. pick maximum k such that C-kR > 0).
2969     // In other words, find maximum k such that LowkR <= kR < C.
2970     if (C.sgt(LowkR)) {
2971       // If LowkR < C, then such a k is guaranteed to exist because
2972       // LowkR itself is a multiple of R.
2973       C -= -RoundUp(-C, R);      // C = C - RoundDown(C, R)
2974       // Pick the smaller solution.
2975       PickLow = true;
2976     } else {
2977       // If C-kR < 0 for all potential k's, it means that one solution
2978       // will be negative, while the other will be positive. The positive
2979       // solution will shift towards 0 if the parabola is moved up.
2980       // Pick the kR closest to the lower bound (i.e. make C-kR closest
2981       // to 0, or in other words, out of all parabolas that have solutions,
2982       // pick the one that is the farthest "up").
2983       // Since LowkR is itself a multiple of R, simply take C-LowkR.
2984       C -= LowkR;
2985       // Pick the greater solution.
2986       PickLow = false;
2987     }
2988   }
2989 
2990   LLVM_DEBUG(dbgs() << __func__ << ": updated coefficients " << A << "x^2 + "
2991                     << B << "x + " << C << ", rw:" << RangeWidth << '\n');
2992 
2993   APInt D = SqrB - 4*A*C;
2994   assert(D.isNonNegative() && "Negative discriminant");
2995   APInt SQ = D.sqrt();
2996 
2997   APInt Q = SQ * SQ;
2998   bool InexactSQ = Q != D;
2999   // The calculated SQ may actually be greater than the exact (non-integer)
3000   // value. If that's the case, decrement SQ to get a value that is lower.
3001   if (Q.sgt(D))
3002     SQ -= 1;
3003 
3004   APInt X;
3005   APInt Rem;
3006 
3007   // SQ is rounded down (i.e SQ * SQ <= D), so the roots may be inexact.
3008   // When using the quadratic formula directly, the calculated low root
3009   // may be greater than the exact one, since we would be subtracting SQ.
3010   // To make sure that the calculated root is not greater than the exact
3011   // one, subtract SQ+1 when calculating the low root (for inexact value
3012   // of SQ).
3013   if (PickLow)
3014     APInt::sdivrem(-B - (SQ+InexactSQ), TwoA, X, Rem);
3015   else
3016     APInt::sdivrem(-B + SQ, TwoA, X, Rem);
3017 
3018   // The updated coefficients should be such that the (exact) solution is
3019   // positive. Since APInt division rounds towards 0, the calculated one
3020   // can be 0, but cannot be negative.
3021   assert(X.isNonNegative() && "Solution should be non-negative");
3022 
3023   if (!InexactSQ && Rem.isNullValue()) {
3024     LLVM_DEBUG(dbgs() << __func__ << ": solution (root): " << X << '\n');
3025     return X;
3026   }
3027 
3028   assert((SQ*SQ).sle(D) && "SQ = |_sqrt(D)_|, so SQ*SQ <= D");
3029   // The exact value of the square root of D should be between SQ and SQ+1.
3030   // This implies that the solution should be between that corresponding to
3031   // SQ (i.e. X) and that corresponding to SQ+1.
3032   //
3033   // The calculated X cannot be greater than the exact (real) solution.
3034   // Actually it must be strictly less than the exact solution, while
3035   // X+1 will be greater than or equal to it.
3036 
3037   APInt VX = (A*X + B)*X + C;
3038   APInt VY = VX + TwoA*X + A + B;
3039   bool SignChange = VX.isNegative() != VY.isNegative() ||
3040                     VX.isNullValue() != VY.isNullValue();
3041   // If the sign did not change between X and X+1, X is not a valid solution.
3042   // This could happen when the actual (exact) roots don't have an integer
3043   // between them, so they would both be contained between X and X+1.
3044   if (!SignChange) {
3045     LLVM_DEBUG(dbgs() << __func__ << ": no valid solution\n");
3046     return None;
3047   }
3048 
3049   X += 1;
3050   LLVM_DEBUG(dbgs() << __func__ << ": solution (wrap): " << X << '\n');
3051   return X;
3052 }
3053 
3054 Optional<unsigned>
3055 llvm::APIntOps::GetMostSignificantDifferentBit(const APInt &A, const APInt &B) {
3056   assert(A.getBitWidth() == B.getBitWidth() && "Must have the same bitwidth");
3057   if (A == B)
3058     return llvm::None;
3059   return A.getBitWidth() - ((A ^ B).countLeadingZeros() + 1);
3060 }
3061 
3062 /// StoreIntToMemory - Fills the StoreBytes bytes of memory starting from Dst
3063 /// with the integer held in IntVal.
3064 void llvm::StoreIntToMemory(const APInt &IntVal, uint8_t *Dst,
3065                             unsigned StoreBytes) {
3066   assert((IntVal.getBitWidth()+7)/8 >= StoreBytes && "Integer too small!");
3067   const uint8_t *Src = (const uint8_t *)IntVal.getRawData();
3068 
3069   if (sys::IsLittleEndianHost) {
3070     // Little-endian host - the source is ordered from LSB to MSB.  Order the
3071     // destination from LSB to MSB: Do a straight copy.
3072     memcpy(Dst, Src, StoreBytes);
3073   } else {
3074     // Big-endian host - the source is an array of 64 bit words ordered from
3075     // LSW to MSW.  Each word is ordered from MSB to LSB.  Order the destination
3076     // from MSB to LSB: Reverse the word order, but not the bytes in a word.
3077     while (StoreBytes > sizeof(uint64_t)) {
3078       StoreBytes -= sizeof(uint64_t);
3079       // May not be aligned so use memcpy.
3080       memcpy(Dst + StoreBytes, Src, sizeof(uint64_t));
3081       Src += sizeof(uint64_t);
3082     }
3083 
3084     memcpy(Dst, Src + sizeof(uint64_t) - StoreBytes, StoreBytes);
3085   }
3086 }
3087 
3088 /// LoadIntFromMemory - Loads the integer stored in the LoadBytes bytes starting
3089 /// from Src into IntVal, which is assumed to be wide enough and to hold zero.
3090 void llvm::LoadIntFromMemory(APInt &IntVal, const uint8_t *Src,
3091                              unsigned LoadBytes) {
3092   assert((IntVal.getBitWidth()+7)/8 >= LoadBytes && "Integer too small!");
3093   uint8_t *Dst = reinterpret_cast<uint8_t *>(
3094                    const_cast<uint64_t *>(IntVal.getRawData()));
3095 
3096   if (sys::IsLittleEndianHost)
3097     // Little-endian host - the destination must be ordered from LSB to MSB.
3098     // The source is ordered from LSB to MSB: Do a straight copy.
3099     memcpy(Dst, Src, LoadBytes);
3100   else {
3101     // Big-endian - the destination is an array of 64 bit words ordered from
3102     // LSW to MSW.  Each word must be ordered from MSB to LSB.  The source is
3103     // ordered from MSB to LSB: Reverse the word order, but not the bytes in
3104     // a word.
3105     while (LoadBytes > sizeof(uint64_t)) {
3106       LoadBytes -= sizeof(uint64_t);
3107       // May not be aligned so use memcpy.
3108       memcpy(Dst, Src + LoadBytes, sizeof(uint64_t));
3109       Dst += sizeof(uint64_t);
3110     }
3111 
3112     memcpy(Dst + sizeof(uint64_t) - LoadBytes, Src, LoadBytes);
3113   }
3114 }
3115