Home | History | Annotate | Line # | Download | only in Support
      1 //===-- APInt.cpp - Implement APInt class ---------------------------------===//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //===----------------------------------------------------------------------===//
      8 //
      9 // This file implements a class to represent arbitrary precision integer
     10 // constant values and provide a variety of arithmetic operations on them.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #include "llvm/ADT/APInt.h"
     15 #include "llvm/ADT/ArrayRef.h"
     16 #include "llvm/ADT/FoldingSet.h"
     17 #include "llvm/ADT/Hashing.h"
     18 #include "llvm/ADT/Optional.h"
     19 #include "llvm/ADT/SmallString.h"
     20 #include "llvm/ADT/StringRef.h"
     21 #include "llvm/ADT/bit.h"
     22 #include "llvm/Config/llvm-config.h"
     23 #include "llvm/Support/Debug.h"
     24 #include "llvm/Support/ErrorHandling.h"
     25 #include "llvm/Support/MathExtras.h"
     26 #include "llvm/Support/raw_ostream.h"
     27 #include <climits>
     28 #include <cmath>
     29 #include <cstdlib>
     30 #include <cstring>
     31 using namespace llvm;
     32 
     33 #define DEBUG_TYPE "apint"
     34 
     35 /// A utility function for allocating memory, checking for allocation failures,
     36 /// and ensuring the contents are zeroed.
     37 inline static uint64_t* getClearedMemory(unsigned numWords) {
     38   uint64_t *result = new uint64_t[numWords];
     39   memset(result, 0, numWords * sizeof(uint64_t));
     40   return result;
     41 }
     42 
     43 /// A utility function for allocating memory and checking for allocation
     44 /// failure.  The content is not zeroed.
     45 inline static uint64_t* getMemory(unsigned numWords) {
     46   return new uint64_t[numWords];
     47 }
     48 
     49 /// A utility function that converts a character to a digit.
     50 inline static unsigned getDigit(char cdigit, uint8_t radix) {
     51   unsigned r;
     52 
     53   if (radix == 16 || radix == 36) {
     54     r = cdigit - '0';
     55     if (r <= 9)
     56       return r;
     57 
     58     r = cdigit - 'A';
     59     if (r <= radix - 11U)
     60       return r + 10;
     61 
     62     r = cdigit - 'a';
     63     if (r <= radix - 11U)
     64       return r + 10;
     65 
     66     radix = 10;
     67   }
     68 
     69   r = cdigit - '0';
     70   if (r < radix)
     71     return r;
     72 
     73   return -1U;
     74 }
     75 
     76 
     77 void APInt::initSlowCase(uint64_t val, bool isSigned) {
     78   U.pVal = getClearedMemory(getNumWords());
     79   U.pVal[0] = val;
     80   if (isSigned && int64_t(val) < 0)
     81     for (unsigned i = 1; i < getNumWords(); ++i)
     82       U.pVal[i] = WORDTYPE_MAX;
     83   clearUnusedBits();
     84 }
     85 
     86 void APInt::initSlowCase(const APInt& that) {
     87   U.pVal = getMemory(getNumWords());
     88   memcpy(U.pVal, that.U.pVal, getNumWords() * APINT_WORD_SIZE);
     89 }
     90 
     91 void APInt::initFromArray(ArrayRef<uint64_t> bigVal) {
     92   assert(BitWidth && "Bitwidth too small");
     93   assert(bigVal.data() && "Null pointer detected!");
     94   if (isSingleWord())
     95     U.VAL = bigVal[0];
     96   else {
     97     // Get memory, cleared to 0
     98     U.pVal = getClearedMemory(getNumWords());
     99     // Calculate the number of words to copy
    100     unsigned words = std::min<unsigned>(bigVal.size(), getNumWords());
    101     // Copy the words from bigVal to pVal
    102     memcpy(U.pVal, bigVal.data(), words * APINT_WORD_SIZE);
    103   }
    104   // Make sure unused high bits are cleared
    105   clearUnusedBits();
    106 }
    107 
    108 APInt::APInt(unsigned numBits, ArrayRef<uint64_t> bigVal)
    109   : BitWidth(numBits) {
    110   initFromArray(bigVal);
    111 }
    112 
    113 APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[])
    114   : BitWidth(numBits) {
    115   initFromArray(makeArrayRef(bigVal, numWords));
    116 }
    117 
    118 APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix)
    119   : BitWidth(numbits) {
    120   assert(BitWidth && "Bitwidth too small");
    121   fromString(numbits, Str, radix);
    122 }
    123 
    124 void APInt::reallocate(unsigned NewBitWidth) {
    125   // If the number of words is the same we can just change the width and stop.
    126   if (getNumWords() == getNumWords(NewBitWidth)) {
    127     BitWidth = NewBitWidth;
    128     return;
    129   }
    130 
    131   // If we have an allocation, delete it.
    132   if (!isSingleWord())
    133     delete [] U.pVal;
    134 
    135   // Update BitWidth.
    136   BitWidth = NewBitWidth;
    137 
    138   // If we are supposed to have an allocation, create it.
    139   if (!isSingleWord())
    140     U.pVal = getMemory(getNumWords());
    141 }
    142 
    143 void APInt::AssignSlowCase(const APInt& RHS) {
    144   // Don't do anything for X = X
    145   if (this == &RHS)
    146     return;
    147 
    148   // Adjust the bit width and handle allocations as necessary.
    149   reallocate(RHS.getBitWidth());
    150 
    151   // Copy the data.
    152   if (isSingleWord())
    153     U.VAL = RHS.U.VAL;
    154   else
    155     memcpy(U.pVal, RHS.U.pVal, getNumWords() * APINT_WORD_SIZE);
    156 }
    157 
    158 /// This method 'profiles' an APInt for use with FoldingSet.
    159 void APInt::Profile(FoldingSetNodeID& ID) const {
    160   ID.AddInteger(BitWidth);
    161 
    162   if (isSingleWord()) {
    163     ID.AddInteger(U.VAL);
    164     return;
    165   }
    166 
    167   unsigned NumWords = getNumWords();
    168   for (unsigned i = 0; i < NumWords; ++i)
    169     ID.AddInteger(U.pVal[i]);
    170 }
    171 
    172 /// Prefix increment operator. Increments the APInt by one.
    173 APInt& APInt::operator++() {
    174   if (isSingleWord())
    175     ++U.VAL;
    176   else
    177     tcIncrement(U.pVal, getNumWords());
    178   return clearUnusedBits();
    179 }
    180 
    181 /// Prefix decrement operator. Decrements the APInt by one.
    182 APInt& APInt::operator--() {
    183   if (isSingleWord())
    184     --U.VAL;
    185   else
    186     tcDecrement(U.pVal, getNumWords());
    187   return clearUnusedBits();
    188 }
    189 
    190 /// Adds the RHS APInt to this APInt.
    191 /// @returns this, after addition of RHS.
    192 /// Addition assignment operator.
    193 APInt& APInt::operator+=(const APInt& RHS) {
    194   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
    195   if (isSingleWord())
    196     U.VAL += RHS.U.VAL;
    197   else
    198     tcAdd(U.pVal, RHS.U.pVal, 0, getNumWords());
    199   return clearUnusedBits();
    200 }
    201 
    202 APInt& APInt::operator+=(uint64_t RHS) {
    203   if (isSingleWord())
    204     U.VAL += RHS;
    205   else
    206     tcAddPart(U.pVal, RHS, getNumWords());
    207   return clearUnusedBits();
    208 }
    209 
    210 /// Subtracts the RHS APInt from this APInt
    211 /// @returns this, after subtraction
    212 /// Subtraction assignment operator.
    213 APInt& APInt::operator-=(const APInt& RHS) {
    214   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
    215   if (isSingleWord())
    216     U.VAL -= RHS.U.VAL;
    217   else
    218     tcSubtract(U.pVal, RHS.U.pVal, 0, getNumWords());
    219   return clearUnusedBits();
    220 }
    221 
    222 APInt& APInt::operator-=(uint64_t RHS) {
    223   if (isSingleWord())
    224     U.VAL -= RHS;
    225   else
    226     tcSubtractPart(U.pVal, RHS, getNumWords());
    227   return clearUnusedBits();
    228 }
    229 
    230 APInt APInt::operator*(const APInt& RHS) const {
    231   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
    232   if (isSingleWord())
    233     return APInt(BitWidth, U.VAL * RHS.U.VAL);
    234 
    235   APInt Result(getMemory(getNumWords()), getBitWidth());
    236 
    237   tcMultiply(Result.U.pVal, U.pVal, RHS.U.pVal, getNumWords());
    238 
    239   Result.clearUnusedBits();
    240   return Result;
    241 }
    242 
    243 void APInt::AndAssignSlowCase(const APInt& RHS) {
    244   tcAnd(U.pVal, RHS.U.pVal, getNumWords());
    245 }
    246 
    247 void APInt::OrAssignSlowCase(const APInt& RHS) {
    248   tcOr(U.pVal, RHS.U.pVal, getNumWords());
    249 }
    250 
    251 void APInt::XorAssignSlowCase(const APInt& RHS) {
    252   tcXor(U.pVal, RHS.U.pVal, getNumWords());
    253 }
    254 
    255 APInt& APInt::operator*=(const APInt& RHS) {
    256   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
    257   *this = *this * RHS;
    258   return *this;
    259 }
    260 
    261 APInt& APInt::operator*=(uint64_t RHS) {
    262   if (isSingleWord()) {
    263     U.VAL *= RHS;
    264   } else {
    265     unsigned NumWords = getNumWords();
    266     tcMultiplyPart(U.pVal, U.pVal, RHS, 0, NumWords, NumWords, false);
    267   }
    268   return clearUnusedBits();
    269 }
    270 
    271 bool APInt::EqualSlowCase(const APInt& RHS) const {
    272   return std::equal(U.pVal, U.pVal + getNumWords(), RHS.U.pVal);
    273 }
    274 
    275 int APInt::compare(const APInt& RHS) const {
    276   assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
    277   if (isSingleWord())
    278     return U.VAL < RHS.U.VAL ? -1 : U.VAL > RHS.U.VAL;
    279 
    280   return tcCompare(U.pVal, RHS.U.pVal, getNumWords());
    281 }
    282 
    283 int APInt::compareSigned(const APInt& RHS) const {
    284   assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
    285   if (isSingleWord()) {
    286     int64_t lhsSext = SignExtend64(U.VAL, BitWidth);
    287     int64_t rhsSext = SignExtend64(RHS.U.VAL, BitWidth);
    288     return lhsSext < rhsSext ? -1 : lhsSext > rhsSext;
    289   }
    290 
    291   bool lhsNeg = isNegative();
    292   bool rhsNeg = RHS.isNegative();
    293 
    294   // If the sign bits don't match, then (LHS < RHS) if LHS is negative
    295   if (lhsNeg != rhsNeg)
    296     return lhsNeg ? -1 : 1;
    297 
    298   // Otherwise we can just use an unsigned comparison, because even negative
    299   // numbers compare correctly this way if both have the same signed-ness.
    300   return tcCompare(U.pVal, RHS.U.pVal, getNumWords());
    301 }
    302 
    303 void APInt::setBitsSlowCase(unsigned loBit, unsigned hiBit) {
    304   unsigned loWord = whichWord(loBit);
    305   unsigned hiWord = whichWord(hiBit);
    306 
    307   // Create an initial mask for the low word with zeros below loBit.
    308   uint64_t loMask = WORDTYPE_MAX << whichBit(loBit);
    309 
    310   // If hiBit is not aligned, we need a high mask.
    311   unsigned hiShiftAmt = whichBit(hiBit);
    312   if (hiShiftAmt != 0) {
    313     // Create a high mask with zeros above hiBit.
    314     uint64_t hiMask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - hiShiftAmt);
    315     // If loWord and hiWord are equal, then we combine the masks. Otherwise,
    316     // set the bits in hiWord.
    317     if (hiWord == loWord)
    318       loMask &= hiMask;
    319     else
    320       U.pVal[hiWord] |= hiMask;
    321   }
    322   // Apply the mask to the low word.
    323   U.pVal[loWord] |= loMask;
    324 
    325   // Fill any words between loWord and hiWord with all ones.
    326   for (unsigned word = loWord + 1; word < hiWord; ++word)
    327     U.pVal[word] = WORDTYPE_MAX;
    328 }
    329 
    330 /// Toggle every bit to its opposite value.
    331 void APInt::flipAllBitsSlowCase() {
    332   tcComplement(U.pVal, getNumWords());
    333   clearUnusedBits();
    334 }
    335 
    336 /// Toggle a given bit to its opposite value whose position is given
    337 /// as "bitPosition".
    338 /// Toggles a given bit to its opposite value.
    339 void APInt::flipBit(unsigned bitPosition) {
    340   assert(bitPosition < BitWidth && "Out of the bit-width range!");
    341   setBitVal(bitPosition, !(*this)[bitPosition]);
    342 }
    343 
    344 void APInt::insertBits(const APInt &subBits, unsigned bitPosition) {
    345   unsigned subBitWidth = subBits.getBitWidth();
    346   assert(0 < subBitWidth && (subBitWidth + bitPosition) <= BitWidth &&
    347          "Illegal bit insertion");
    348 
    349   // Insertion is a direct copy.
    350   if (subBitWidth == BitWidth) {
    351     *this = subBits;
    352     return;
    353   }
    354 
    355   // Single word result can be done as a direct bitmask.
    356   if (isSingleWord()) {
    357     uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
    358     U.VAL &= ~(mask << bitPosition);
    359     U.VAL |= (subBits.U.VAL << bitPosition);
    360     return;
    361   }
    362 
    363   unsigned loBit = whichBit(bitPosition);
    364   unsigned loWord = whichWord(bitPosition);
    365   unsigned hi1Word = whichWord(bitPosition + subBitWidth - 1);
    366 
    367   // Insertion within a single word can be done as a direct bitmask.
    368   if (loWord == hi1Word) {
    369     uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - subBitWidth);
    370     U.pVal[loWord] &= ~(mask << loBit);
    371     U.pVal[loWord] |= (subBits.U.VAL << loBit);
    372     return;
    373   }
    374 
    375   // Insert on word boundaries.
    376   if (loBit == 0) {
    377     // Direct copy whole words.
    378     unsigned numWholeSubWords = subBitWidth / APINT_BITS_PER_WORD;
    379     memcpy(U.pVal + loWord, subBits.getRawData(),
    380            numWholeSubWords * APINT_WORD_SIZE);
    381 
    382     // Mask+insert remaining bits.
    383     unsigned remainingBits = subBitWidth % APINT_BITS_PER_WORD;
    384     if (remainingBits != 0) {
    385       uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - remainingBits);
    386       U.pVal[hi1Word] &= ~mask;
    387       U.pVal[hi1Word] |= subBits.getWord(subBitWidth - 1);
    388     }
    389     return;
    390   }
    391 
    392   // General case - set/clear individual bits in dst based on src.
    393   // TODO - there is scope for optimization here, but at the moment this code
    394   // path is barely used so prefer readability over performance.
    395   for (unsigned i = 0; i != subBitWidth; ++i)
    396     setBitVal(bitPosition + i, subBits[i]);
    397 }
    398 
    399 void APInt::insertBits(uint64_t subBits, unsigned bitPosition, unsigned numBits) {
    400   uint64_t maskBits = maskTrailingOnes<uint64_t>(numBits);
    401   subBits &= maskBits;
    402   if (isSingleWord()) {
    403     U.VAL &= ~(maskBits << bitPosition);
    404     U.VAL |= subBits << bitPosition;
    405     return;
    406   }
    407 
    408   unsigned loBit = whichBit(bitPosition);
    409   unsigned loWord = whichWord(bitPosition);
    410   unsigned hiWord = whichWord(bitPosition + numBits - 1);
    411   if (loWord == hiWord) {
    412     U.pVal[loWord] &= ~(maskBits << loBit);
    413     U.pVal[loWord] |= subBits << loBit;
    414     return;
    415   }
    416 
    417   static_assert(8 * sizeof(WordType) <= 64, "This code assumes only two words affected");
    418   unsigned wordBits = 8 * sizeof(WordType);
    419   U.pVal[loWord] &= ~(maskBits << loBit);
    420   U.pVal[loWord] |= subBits << loBit;
    421 
    422   U.pVal[hiWord] &= ~(maskBits >> (wordBits - loBit));
    423   U.pVal[hiWord] |= subBits >> (wordBits - loBit);
    424 }
    425 
    426 APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const {
    427   assert(numBits > 0 && "Can't extract zero bits");
    428   assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth &&
    429          "Illegal bit extraction");
    430 
    431   if (isSingleWord())
    432     return APInt(numBits, U.VAL >> bitPosition);
    433 
    434   unsigned loBit = whichBit(bitPosition);
    435   unsigned loWord = whichWord(bitPosition);
    436   unsigned hiWord = whichWord(bitPosition + numBits - 1);
    437 
    438   // Single word result extracting bits from a single word source.
    439   if (loWord == hiWord)
    440     return APInt(numBits, U.pVal[loWord] >> loBit);
    441 
    442   // Extracting bits that start on a source word boundary can be done
    443   // as a fast memory copy.
    444   if (loBit == 0)
    445     return APInt(numBits, makeArrayRef(U.pVal + loWord, 1 + hiWord - loWord));
    446 
    447   // General case - shift + copy source words directly into place.
    448   APInt Result(numBits, 0);
    449   unsigned NumSrcWords = getNumWords();
    450   unsigned NumDstWords = Result.getNumWords();
    451 
    452   uint64_t *DestPtr = Result.isSingleWord() ? &Result.U.VAL : Result.U.pVal;
    453   for (unsigned word = 0; word < NumDstWords; ++word) {
    454     uint64_t w0 = U.pVal[loWord + word];
    455     uint64_t w1 =
    456         (loWord + word + 1) < NumSrcWords ? U.pVal[loWord + word + 1] : 0;
    457     DestPtr[word] = (w0 >> loBit) | (w1 << (APINT_BITS_PER_WORD - loBit));
    458   }
    459 
    460   return Result.clearUnusedBits();
    461 }
    462 
    463 uint64_t APInt::extractBitsAsZExtValue(unsigned numBits,
    464                                        unsigned bitPosition) const {
    465   assert(numBits > 0 && "Can't extract zero bits");
    466   assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth &&
    467          "Illegal bit extraction");
    468   assert(numBits <= 64 && "Illegal bit extraction");
    469 
    470   uint64_t maskBits = maskTrailingOnes<uint64_t>(numBits);
    471   if (isSingleWord())
    472     return (U.VAL >> bitPosition) & maskBits;
    473 
    474   unsigned loBit = whichBit(bitPosition);
    475   unsigned loWord = whichWord(bitPosition);
    476   unsigned hiWord = whichWord(bitPosition + numBits - 1);
    477   if (loWord == hiWord)
    478     return (U.pVal[loWord] >> loBit) & maskBits;
    479 
    480   static_assert(8 * sizeof(WordType) <= 64, "This code assumes only two words affected");
    481   unsigned wordBits = 8 * sizeof(WordType);
    482   uint64_t retBits = U.pVal[loWord] >> loBit;
    483   retBits |= U.pVal[hiWord] << (wordBits - loBit);
    484   retBits &= maskBits;
    485   return retBits;
    486 }
    487 
    488 unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
    489   assert(!str.empty() && "Invalid string length");
    490   assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
    491           radix == 36) &&
    492          "Radix should be 2, 8, 10, 16, or 36!");
    493 
    494   size_t slen = str.size();
    495 
    496   // Each computation below needs to know if it's negative.
    497   StringRef::iterator p = str.begin();
    498   unsigned isNegative = *p == '-';
    499   if (*p == '-' || *p == '+') {
    500     p++;
    501     slen--;
    502     assert(slen && "String is only a sign, needs a value.");
    503   }
    504 
    505   // For radixes of power-of-two values, the bits required is accurately and
    506   // easily computed
    507   if (radix == 2)
    508     return slen + isNegative;
    509   if (radix == 8)
    510     return slen * 3 + isNegative;
    511   if (radix == 16)
    512     return slen * 4 + isNegative;
    513 
    514   // FIXME: base 36
    515 
    516   // This is grossly inefficient but accurate. We could probably do something
    517   // with a computation of roughly slen*64/20 and then adjust by the value of
    518   // the first few digits. But, I'm not sure how accurate that could be.
    519 
    520   // Compute a sufficient number of bits that is always large enough but might
    521   // be too large. This avoids the assertion in the constructor. This
    522   // calculation doesn't work appropriately for the numbers 0-9, so just use 4
    523   // bits in that case.
    524   unsigned sufficient
    525     = radix == 10? (slen == 1 ? 4 : slen * 64/18)
    526                  : (slen == 1 ? 7 : slen * 16/3);
    527 
    528   // Convert to the actual binary value.
    529   APInt tmp(sufficient, StringRef(p, slen), radix);
    530 
    531   // Compute how many bits are required. If the log is infinite, assume we need
    532   // just bit. If the log is exact and value is negative, then the value is
    533   // MinSignedValue with (log + 1) bits.
    534   unsigned log = tmp.logBase2();
    535   if (log == (unsigned)-1) {
    536     return isNegative + 1;
    537   } else if (isNegative && tmp.isPowerOf2()) {
    538     return isNegative + log;
    539   } else {
    540     return isNegative + log + 1;
    541   }
    542 }
    543 
    544 hash_code llvm::hash_value(const APInt &Arg) {
    545   if (Arg.isSingleWord())
    546     return hash_combine(Arg.BitWidth, Arg.U.VAL);
    547 
    548   return hash_combine(
    549       Arg.BitWidth,
    550       hash_combine_range(Arg.U.pVal, Arg.U.pVal + Arg.getNumWords()));
    551 }
    552 
    553 bool APInt::isSplat(unsigned SplatSizeInBits) const {
    554   assert(getBitWidth() % SplatSizeInBits == 0 &&
    555          "SplatSizeInBits must divide width!");
    556   // We can check that all parts of an integer are equal by making use of a
    557   // little trick: rotate and check if it's still the same value.
    558   return *this == rotl(SplatSizeInBits);
    559 }
    560 
    561 /// This function returns the high "numBits" bits of this APInt.
    562 APInt APInt::getHiBits(unsigned numBits) const {
    563   return this->lshr(BitWidth - numBits);
    564 }
    565 
    566 /// This function returns the low "numBits" bits of this APInt.
    567 APInt APInt::getLoBits(unsigned numBits) const {
    568   APInt Result(getLowBitsSet(BitWidth, numBits));
    569   Result &= *this;
    570   return Result;
    571 }
    572 
    573 /// Return a value containing V broadcasted over NewLen bits.
    574 APInt APInt::getSplat(unsigned NewLen, const APInt &V) {
    575   assert(NewLen >= V.getBitWidth() && "Can't splat to smaller bit width!");
    576 
    577   APInt Val = V.zextOrSelf(NewLen);
    578   for (unsigned I = V.getBitWidth(); I < NewLen; I <<= 1)
    579     Val |= Val << I;
    580 
    581   return Val;
    582 }
    583 
    584 unsigned APInt::countLeadingZerosSlowCase() const {
    585   unsigned Count = 0;
    586   for (int i = getNumWords()-1; i >= 0; --i) {
    587     uint64_t V = U.pVal[i];
    588     if (V == 0)
    589       Count += APINT_BITS_PER_WORD;
    590     else {
    591       Count += llvm::countLeadingZeros(V);
    592       break;
    593     }
    594   }
    595   // Adjust for unused bits in the most significant word (they are zero).
    596   unsigned Mod = BitWidth % APINT_BITS_PER_WORD;
    597   Count -= Mod > 0 ? APINT_BITS_PER_WORD - Mod : 0;
    598   return Count;
    599 }
    600 
    601 unsigned APInt::countLeadingOnesSlowCase() const {
    602   unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD;
    603   unsigned shift;
    604   if (!highWordBits) {
    605     highWordBits = APINT_BITS_PER_WORD;
    606     shift = 0;
    607   } else {
    608     shift = APINT_BITS_PER_WORD - highWordBits;
    609   }
    610   int i = getNumWords() - 1;
    611   unsigned Count = llvm::countLeadingOnes(U.pVal[i] << shift);
    612   if (Count == highWordBits) {
    613     for (i--; i >= 0; --i) {
    614       if (U.pVal[i] == WORDTYPE_MAX)
    615         Count += APINT_BITS_PER_WORD;
    616       else {
    617         Count += llvm::countLeadingOnes(U.pVal[i]);
    618         break;
    619       }
    620     }
    621   }
    622   return Count;
    623 }
    624 
    625 unsigned APInt::countTrailingZerosSlowCase() const {
    626   unsigned Count = 0;
    627   unsigned i = 0;
    628   for (; i < getNumWords() && U.pVal[i] == 0; ++i)
    629     Count += APINT_BITS_PER_WORD;
    630   if (i < getNumWords())
    631     Count += llvm::countTrailingZeros(U.pVal[i]);
    632   return std::min(Count, BitWidth);
    633 }
    634 
    635 unsigned APInt::countTrailingOnesSlowCase() const {
    636   unsigned Count = 0;
    637   unsigned i = 0;
    638   for (; i < getNumWords() && U.pVal[i] == WORDTYPE_MAX; ++i)
    639     Count += APINT_BITS_PER_WORD;
    640   if (i < getNumWords())
    641     Count += llvm::countTrailingOnes(U.pVal[i]);
    642   assert(Count <= BitWidth);
    643   return Count;
    644 }
    645 
    646 unsigned APInt::countPopulationSlowCase() const {
    647   unsigned Count = 0;
    648   for (unsigned i = 0; i < getNumWords(); ++i)
    649     Count += llvm::countPopulation(U.pVal[i]);
    650   return Count;
    651 }
    652 
    653 bool APInt::intersectsSlowCase(const APInt &RHS) const {
    654   for (unsigned i = 0, e = getNumWords(); i != e; ++i)
    655     if ((U.pVal[i] & RHS.U.pVal[i]) != 0)
    656       return true;
    657 
    658   return false;
    659 }
    660 
    661 bool APInt::isSubsetOfSlowCase(const APInt &RHS) const {
    662   for (unsigned i = 0, e = getNumWords(); i != e; ++i)
    663     if ((U.pVal[i] & ~RHS.U.pVal[i]) != 0)
    664       return false;
    665 
    666   return true;
    667 }
    668 
    669 APInt APInt::byteSwap() const {
    670   assert(BitWidth >= 16 && BitWidth % 8 == 0 && "Cannot byteswap!");
    671   if (BitWidth == 16)
    672     return APInt(BitWidth, ByteSwap_16(uint16_t(U.VAL)));
    673   if (BitWidth == 32)
    674     return APInt(BitWidth, ByteSwap_32(unsigned(U.VAL)));
    675   if (BitWidth <= 64) {
    676     uint64_t Tmp1 = ByteSwap_64(U.VAL);
    677     Tmp1 >>= (64 - BitWidth);
    678     return APInt(BitWidth, Tmp1);
    679   }
    680 
    681   APInt Result(getNumWords() * APINT_BITS_PER_WORD, 0);
    682   for (unsigned I = 0, N = getNumWords(); I != N; ++I)
    683     Result.U.pVal[I] = ByteSwap_64(U.pVal[N - I - 1]);
    684   if (Result.BitWidth != BitWidth) {
    685     Result.lshrInPlace(Result.BitWidth - BitWidth);
    686     Result.BitWidth = BitWidth;
    687   }
    688   return Result;
    689 }
    690 
    691 APInt APInt::reverseBits() const {
    692   switch (BitWidth) {
    693   case 64:
    694     return APInt(BitWidth, llvm::reverseBits<uint64_t>(U.VAL));
    695   case 32:
    696     return APInt(BitWidth, llvm::reverseBits<uint32_t>(U.VAL));
    697   case 16:
    698     return APInt(BitWidth, llvm::reverseBits<uint16_t>(U.VAL));
    699   case 8:
    700     return APInt(BitWidth, llvm::reverseBits<uint8_t>(U.VAL));
    701   default:
    702     break;
    703   }
    704 
    705   APInt Val(*this);
    706   APInt Reversed(BitWidth, 0);
    707   unsigned S = BitWidth;
    708 
    709   for (; Val != 0; Val.lshrInPlace(1)) {
    710     Reversed <<= 1;
    711     Reversed |= Val[0];
    712     --S;
    713   }
    714 
    715   Reversed <<= S;
    716   return Reversed;
    717 }
    718 
    719 APInt llvm::APIntOps::GreatestCommonDivisor(APInt A, APInt B) {
    720   // Fast-path a common case.
    721   if (A == B) return A;
    722 
    723   // Corner cases: if either operand is zero, the other is the gcd.
    724   if (!A) return B;
    725   if (!B) return A;
    726 
    727   // Count common powers of 2 and remove all other powers of 2.
    728   unsigned Pow2;
    729   {
    730     unsigned Pow2_A = A.countTrailingZeros();
    731     unsigned Pow2_B = B.countTrailingZeros();
    732     if (Pow2_A > Pow2_B) {
    733       A.lshrInPlace(Pow2_A - Pow2_B);
    734       Pow2 = Pow2_B;
    735     } else if (Pow2_B > Pow2_A) {
    736       B.lshrInPlace(Pow2_B - Pow2_A);
    737       Pow2 = Pow2_A;
    738     } else {
    739       Pow2 = Pow2_A;
    740     }
    741   }
    742 
    743   // Both operands are odd multiples of 2^Pow_2:
    744   //
    745   //   gcd(a, b) = gcd(|a - b| / 2^i, min(a, b))
    746   //
    747   // This is a modified version of Stein's algorithm, taking advantage of
    748   // efficient countTrailingZeros().
    749   while (A != B) {
    750     if (A.ugt(B)) {
    751       A -= B;
    752       A.lshrInPlace(A.countTrailingZeros() - Pow2);
    753     } else {
    754       B -= A;
    755       B.lshrInPlace(B.countTrailingZeros() - Pow2);
    756     }
    757   }
    758 
    759   return A;
    760 }
    761 
    762 APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) {
    763   uint64_t I = bit_cast<uint64_t>(Double);
    764 
    765   // Get the sign bit from the highest order bit
    766   bool isNeg = I >> 63;
    767 
    768   // Get the 11-bit exponent and adjust for the 1023 bit bias
    769   int64_t exp = ((I >> 52) & 0x7ff) - 1023;
    770 
    771   // If the exponent is negative, the value is < 0 so just return 0.
    772   if (exp < 0)
    773     return APInt(width, 0u);
    774 
    775   // Extract the mantissa by clearing the top 12 bits (sign + exponent).
    776   uint64_t mantissa = (I & (~0ULL >> 12)) | 1ULL << 52;
    777 
    778   // If the exponent doesn't shift all bits out of the mantissa
    779   if (exp < 52)
    780     return isNeg ? -APInt(width, mantissa >> (52 - exp)) :
    781                     APInt(width, mantissa >> (52 - exp));
    782 
    783   // If the client didn't provide enough bits for us to shift the mantissa into
    784   // then the result is undefined, just return 0
    785   if (width <= exp - 52)
    786     return APInt(width, 0);
    787 
    788   // Otherwise, we have to shift the mantissa bits up to the right location
    789   APInt Tmp(width, mantissa);
    790   Tmp <<= (unsigned)exp - 52;
    791   return isNeg ? -Tmp : Tmp;
    792 }
    793 
    794 /// This function converts this APInt to a double.
    795 /// The layout for double is as following (IEEE Standard 754):
    796 ///  --------------------------------------
    797 /// |  Sign    Exponent    Fraction    Bias |
    798 /// |-------------------------------------- |
    799 /// |  1[63]   11[62-52]   52[51-00]   1023 |
    800 ///  --------------------------------------
    801 double APInt::roundToDouble(bool isSigned) const {
    802 
    803   // Handle the simple case where the value is contained in one uint64_t.
    804   // It is wrong to optimize getWord(0) to VAL; there might be more than one word.
    805   if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) {
    806     if (isSigned) {
    807       int64_t sext = SignExtend64(getWord(0), BitWidth);
    808       return double(sext);
    809     } else
    810       return double(getWord(0));
    811   }
    812 
    813   // Determine if the value is negative.
    814   bool isNeg = isSigned ? (*this)[BitWidth-1] : false;
    815 
    816   // Construct the absolute value if we're negative.
    817   APInt Tmp(isNeg ? -(*this) : (*this));
    818 
    819   // Figure out how many bits we're using.
    820   unsigned n = Tmp.getActiveBits();
    821 
    822   // The exponent (without bias normalization) is just the number of bits
    823   // we are using. Note that the sign bit is gone since we constructed the
    824   // absolute value.
    825   uint64_t exp = n;
    826 
    827   // Return infinity for exponent overflow
    828   if (exp > 1023) {
    829     if (!isSigned || !isNeg)
    830       return std::numeric_limits<double>::infinity();
    831     else
    832       return -std::numeric_limits<double>::infinity();
    833   }
    834   exp += 1023; // Increment for 1023 bias
    835 
    836   // Number of bits in mantissa is 52. To obtain the mantissa value, we must
    837   // extract the high 52 bits from the correct words in pVal.
    838   uint64_t mantissa;
    839   unsigned hiWord = whichWord(n-1);
    840   if (hiWord == 0) {
    841     mantissa = Tmp.U.pVal[0];
    842     if (n > 52)
    843       mantissa >>= n - 52; // shift down, we want the top 52 bits.
    844   } else {
    845     assert(hiWord > 0 && "huh?");
    846     uint64_t hibits = Tmp.U.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD);
    847     uint64_t lobits = Tmp.U.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD);
    848     mantissa = hibits | lobits;
    849   }
    850 
    851   // The leading bit of mantissa is implicit, so get rid of it.
    852   uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0;
    853   uint64_t I = sign | (exp << 52) | mantissa;
    854   return bit_cast<double>(I);
    855 }
    856 
    857 // Truncate to new width.
    858 APInt APInt::trunc(unsigned width) const {
    859   assert(width < BitWidth && "Invalid APInt Truncate request");
    860   assert(width && "Can't truncate to 0 bits");
    861 
    862   if (width <= APINT_BITS_PER_WORD)
    863     return APInt(width, getRawData()[0]);
    864 
    865   APInt Result(getMemory(getNumWords(width)), width);
    866 
    867   // Copy full words.
    868   unsigned i;
    869   for (i = 0; i != width / APINT_BITS_PER_WORD; i++)
    870     Result.U.pVal[i] = U.pVal[i];
    871 
    872   // Truncate and copy any partial word.
    873   unsigned bits = (0 - width) % APINT_BITS_PER_WORD;
    874   if (bits != 0)
    875     Result.U.pVal[i] = U.pVal[i] << bits >> bits;
    876 
    877   return Result;
    878 }
    879 
    880 // Truncate to new width with unsigned saturation.
    881 APInt APInt::truncUSat(unsigned width) const {
    882   assert(width < BitWidth && "Invalid APInt Truncate request");
    883   assert(width && "Can't truncate to 0 bits");
    884 
    885   // Can we just losslessly truncate it?
    886   if (isIntN(width))
    887     return trunc(width);
    888   // If not, then just return the new limit.
    889   return APInt::getMaxValue(width);
    890 }
    891 
    892 // Truncate to new width with signed saturation.
    893 APInt APInt::truncSSat(unsigned width) const {
    894   assert(width < BitWidth && "Invalid APInt Truncate request");
    895   assert(width && "Can't truncate to 0 bits");
    896 
    897   // Can we just losslessly truncate it?
    898   if (isSignedIntN(width))
    899     return trunc(width);
    900   // If not, then just return the new limits.
    901   return isNegative() ? APInt::getSignedMinValue(width)
    902                       : APInt::getSignedMaxValue(width);
    903 }
    904 
    905 // Sign extend to a new width.
    906 APInt APInt::sext(unsigned Width) const {
    907   assert(Width > BitWidth && "Invalid APInt SignExtend request");
    908 
    909   if (Width <= APINT_BITS_PER_WORD)
    910     return APInt(Width, SignExtend64(U.VAL, BitWidth));
    911 
    912   APInt Result(getMemory(getNumWords(Width)), Width);
    913 
    914   // Copy words.
    915   std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
    916 
    917   // Sign extend the last word since there may be unused bits in the input.
    918   Result.U.pVal[getNumWords() - 1] =
    919       SignExtend64(Result.U.pVal[getNumWords() - 1],
    920                    ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
    921 
    922   // Fill with sign bits.
    923   std::memset(Result.U.pVal + getNumWords(), isNegative() ? -1 : 0,
    924               (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
    925   Result.clearUnusedBits();
    926   return Result;
    927 }
    928 
    929 //  Zero extend to a new width.
    930 APInt APInt::zext(unsigned width) const {
    931   assert(width > BitWidth && "Invalid APInt ZeroExtend request");
    932 
    933   if (width <= APINT_BITS_PER_WORD)
    934     return APInt(width, U.VAL);
    935 
    936   APInt Result(getMemory(getNumWords(width)), width);
    937 
    938   // Copy words.
    939   std::memcpy(Result.U.pVal, getRawData(), getNumWords() * APINT_WORD_SIZE);
    940 
    941   // Zero remaining words.
    942   std::memset(Result.U.pVal + getNumWords(), 0,
    943               (Result.getNumWords() - getNumWords()) * APINT_WORD_SIZE);
    944 
    945   return Result;
    946 }
    947 
    948 APInt APInt::zextOrTrunc(unsigned width) const {
    949   if (BitWidth < width)
    950     return zext(width);
    951   if (BitWidth > width)
    952     return trunc(width);
    953   return *this;
    954 }
    955 
    956 APInt APInt::sextOrTrunc(unsigned width) const {
    957   if (BitWidth < width)
    958     return sext(width);
    959   if (BitWidth > width)
    960     return trunc(width);
    961   return *this;
    962 }
    963 
    964 APInt APInt::truncOrSelf(unsigned width) const {
    965   if (BitWidth > width)
    966     return trunc(width);
    967   return *this;
    968 }
    969 
    970 APInt APInt::zextOrSelf(unsigned width) const {
    971   if (BitWidth < width)
    972     return zext(width);
    973   return *this;
    974 }
    975 
    976 APInt APInt::sextOrSelf(unsigned width) const {
    977   if (BitWidth < width)
    978     return sext(width);
    979   return *this;
    980 }
    981 
    982 /// Arithmetic right-shift this APInt by shiftAmt.
    983 /// Arithmetic right-shift function.
    984 void APInt::ashrInPlace(const APInt &shiftAmt) {
    985   ashrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
    986 }
    987 
    988 /// Arithmetic right-shift this APInt by shiftAmt.
    989 /// Arithmetic right-shift function.
    990 void APInt::ashrSlowCase(unsigned ShiftAmt) {
    991   // Don't bother performing a no-op shift.
    992   if (!ShiftAmt)
    993     return;
    994 
    995   // Save the original sign bit for later.
    996   bool Negative = isNegative();
    997 
    998   // WordShift is the inter-part shift; BitShift is intra-part shift.
    999   unsigned WordShift = ShiftAmt / APINT_BITS_PER_WORD;
   1000   unsigned BitShift = ShiftAmt % APINT_BITS_PER_WORD;
   1001 
   1002   unsigned WordsToMove = getNumWords() - WordShift;
   1003   if (WordsToMove != 0) {
   1004     // Sign extend the last word to fill in the unused bits.
   1005     U.pVal[getNumWords() - 1] = SignExtend64(
   1006         U.pVal[getNumWords() - 1], ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1);
   1007 
   1008     // Fastpath for moving by whole words.
   1009     if (BitShift == 0) {
   1010       std::memmove(U.pVal, U.pVal + WordShift, WordsToMove * APINT_WORD_SIZE);
   1011     } else {
   1012       // Move the words containing significant bits.
   1013       for (unsigned i = 0; i != WordsToMove - 1; ++i)
   1014         U.pVal[i] = (U.pVal[i + WordShift] >> BitShift) |
   1015                     (U.pVal[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift));
   1016 
   1017       // Handle the last word which has no high bits to copy.
   1018       U.pVal[WordsToMove - 1] = U.pVal[WordShift + WordsToMove - 1] >> BitShift;
   1019       // Sign extend one more time.
   1020       U.pVal[WordsToMove - 1] =
   1021           SignExtend64(U.pVal[WordsToMove - 1], APINT_BITS_PER_WORD - BitShift);
   1022     }
   1023   }
   1024 
   1025   // Fill in the remainder based on the original sign.
   1026   std::memset(U.pVal + WordsToMove, Negative ? -1 : 0,
   1027               WordShift * APINT_WORD_SIZE);
   1028   clearUnusedBits();
   1029 }
   1030 
   1031 /// Logical right-shift this APInt by shiftAmt.
   1032 /// Logical right-shift function.
   1033 void APInt::lshrInPlace(const APInt &shiftAmt) {
   1034   lshrInPlace((unsigned)shiftAmt.getLimitedValue(BitWidth));
   1035 }
   1036 
   1037 /// Logical right-shift this APInt by shiftAmt.
   1038 /// Logical right-shift function.
   1039 void APInt::lshrSlowCase(unsigned ShiftAmt) {
   1040   tcShiftRight(U.pVal, getNumWords(), ShiftAmt);
   1041 }
   1042 
   1043 /// Left-shift this APInt by shiftAmt.
   1044 /// Left-shift function.
   1045 APInt &APInt::operator<<=(const APInt &shiftAmt) {
   1046   // It's undefined behavior in C to shift by BitWidth or greater.
   1047   *this <<= (unsigned)shiftAmt.getLimitedValue(BitWidth);
   1048   return *this;
   1049 }
   1050 
   1051 void APInt::shlSlowCase(unsigned ShiftAmt) {
   1052   tcShiftLeft(U.pVal, getNumWords(), ShiftAmt);
   1053   clearUnusedBits();
   1054 }
   1055 
   1056 // Calculate the rotate amount modulo the bit width.
   1057 static unsigned rotateModulo(unsigned BitWidth, const APInt &rotateAmt) {
   1058   unsigned rotBitWidth = rotateAmt.getBitWidth();
   1059   APInt rot = rotateAmt;
   1060   if (rotBitWidth < BitWidth) {
   1061     // Extend the rotate APInt, so that the urem doesn't divide by 0.
   1062     // e.g. APInt(1, 32) would give APInt(1, 0).
   1063     rot = rotateAmt.zext(BitWidth);
   1064   }
   1065   rot = rot.urem(APInt(rot.getBitWidth(), BitWidth));
   1066   return rot.getLimitedValue(BitWidth);
   1067 }
   1068 
   1069 APInt APInt::rotl(const APInt &rotateAmt) const {
   1070   return rotl(rotateModulo(BitWidth, rotateAmt));
   1071 }
   1072 
   1073 APInt APInt::rotl(unsigned rotateAmt) const {
   1074   rotateAmt %= BitWidth;
   1075   if (rotateAmt == 0)
   1076     return *this;
   1077   return shl(rotateAmt) | lshr(BitWidth - rotateAmt);
   1078 }
   1079 
   1080 APInt APInt::rotr(const APInt &rotateAmt) const {
   1081   return rotr(rotateModulo(BitWidth, rotateAmt));
   1082 }
   1083 
   1084 APInt APInt::rotr(unsigned rotateAmt) const {
   1085   rotateAmt %= BitWidth;
   1086   if (rotateAmt == 0)
   1087     return *this;
   1088   return lshr(rotateAmt) | shl(BitWidth - rotateAmt);
   1089 }
   1090 
   1091 // Square Root - this method computes and returns the square root of "this".
   1092 // Three mechanisms are used for computation. For small values (<= 5 bits),
   1093 // a table lookup is done. This gets some performance for common cases. For
   1094 // values using less than 52 bits, the value is converted to double and then
   1095 // the libc sqrt function is called. The result is rounded and then converted
   1096 // back to a uint64_t which is then used to construct the result. Finally,
   1097 // the Babylonian method for computing square roots is used.
   1098 APInt APInt::sqrt() const {
   1099 
   1100   // Determine the magnitude of the value.
   1101   unsigned magnitude = getActiveBits();
   1102 
   1103   // Use a fast table for some small values. This also gets rid of some
   1104   // rounding errors in libc sqrt for small values.
   1105   if (magnitude <= 5) {
   1106     static const uint8_t results[32] = {
   1107       /*     0 */ 0,
   1108       /*  1- 2 */ 1, 1,
   1109       /*  3- 6 */ 2, 2, 2, 2,
   1110       /*  7-12 */ 3, 3, 3, 3, 3, 3,
   1111       /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4,
   1112       /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
   1113       /*    31 */ 6
   1114     };
   1115     return APInt(BitWidth, results[ (isSingleWord() ? U.VAL : U.pVal[0]) ]);
   1116   }
   1117 
   1118   // If the magnitude of the value fits in less than 52 bits (the precision of
   1119   // an IEEE double precision floating point value), then we can use the
   1120   // libc sqrt function which will probably use a hardware sqrt computation.
   1121   // This should be faster than the algorithm below.
   1122   if (magnitude < 52) {
   1123     return APInt(BitWidth,
   1124                  uint64_t(::round(::sqrt(double(isSingleWord() ? U.VAL
   1125                                                                : U.pVal[0])))));
   1126   }
   1127 
   1128   // Okay, all the short cuts are exhausted. We must compute it. The following
   1129   // is a classical Babylonian method for computing the square root. This code
   1130   // was adapted to APInt from a wikipedia article on such computations.
   1131   // See http://www.wikipedia.org/ and go to the page named
   1132   // Calculate_an_integer_square_root.
   1133   unsigned nbits = BitWidth, i = 4;
   1134   APInt testy(BitWidth, 16);
   1135   APInt x_old(BitWidth, 1);
   1136   APInt x_new(BitWidth, 0);
   1137   APInt two(BitWidth, 2);
   1138 
   1139   // Select a good starting value using binary logarithms.
   1140   for (;; i += 2, testy = testy.shl(2))
   1141     if (i >= nbits || this->ule(testy)) {
   1142       x_old = x_old.shl(i / 2);
   1143       break;
   1144     }
   1145 
   1146   // Use the Babylonian method to arrive at the integer square root:
   1147   for (;;) {
   1148     x_new = (this->udiv(x_old) + x_old).udiv(two);
   1149     if (x_old.ule(x_new))
   1150       break;
   1151     x_old = x_new;
   1152   }
   1153 
   1154   // Make sure we return the closest approximation
   1155   // NOTE: The rounding calculation below is correct. It will produce an
   1156   // off-by-one discrepancy with results from pari/gp. That discrepancy has been
   1157   // determined to be a rounding issue with pari/gp as it begins to use a
   1158   // floating point representation after 192 bits. There are no discrepancies
   1159   // between this algorithm and pari/gp for bit widths < 192 bits.
   1160   APInt square(x_old * x_old);
   1161   APInt nextSquare((x_old + 1) * (x_old +1));
   1162   if (this->ult(square))
   1163     return x_old;
   1164   assert(this->ule(nextSquare) && "Error in APInt::sqrt computation");
   1165   APInt midpoint((nextSquare - square).udiv(two));
   1166   APInt offset(*this - square);
   1167   if (offset.ult(midpoint))
   1168     return x_old;
   1169   return x_old + 1;
   1170 }
   1171 
   1172 /// Computes the multiplicative inverse of this APInt for a given modulo. The
   1173 /// iterative extended Euclidean algorithm is used to solve for this value,
   1174 /// however we simplify it to speed up calculating only the inverse, and take
   1175 /// advantage of div+rem calculations. We also use some tricks to avoid copying
   1176 /// (potentially large) APInts around.
   1177 /// WARNING: a value of '0' may be returned,
   1178 ///          signifying that no multiplicative inverse exists!
   1179 APInt APInt::multiplicativeInverse(const APInt& modulo) const {
   1180   assert(ult(modulo) && "This APInt must be smaller than the modulo");
   1181 
   1182   // Using the properties listed at the following web page (accessed 06/21/08):
   1183   //   http://www.numbertheory.org/php/euclid.html
   1184   // (especially the properties numbered 3, 4 and 9) it can be proved that
   1185   // BitWidth bits suffice for all the computations in the algorithm implemented
   1186   // below. More precisely, this number of bits suffice if the multiplicative
   1187   // inverse exists, but may not suffice for the general extended Euclidean
   1188   // algorithm.
   1189 
   1190   APInt r[2] = { modulo, *this };
   1191   APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) };
   1192   APInt q(BitWidth, 0);
   1193 
   1194   unsigned i;
   1195   for (i = 0; r[i^1] != 0; i ^= 1) {
   1196     // An overview of the math without the confusing bit-flipping:
   1197     // q = r[i-2] / r[i-1]
   1198     // r[i] = r[i-2] % r[i-1]
   1199     // t[i] = t[i-2] - t[i-1] * q
   1200     udivrem(r[i], r[i^1], q, r[i]);
   1201     t[i] -= t[i^1] * q;
   1202   }
   1203 
   1204   // If this APInt and the modulo are not coprime, there is no multiplicative
   1205   // inverse, so return 0. We check this by looking at the next-to-last
   1206   // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean
   1207   // algorithm.
   1208   if (r[i] != 1)
   1209     return APInt(BitWidth, 0);
   1210 
   1211   // The next-to-last t is the multiplicative inverse.  However, we are
   1212   // interested in a positive inverse. Calculate a positive one from a negative
   1213   // one if necessary. A simple addition of the modulo suffices because
   1214   // abs(t[i]) is known to be less than *this/2 (see the link above).
   1215   if (t[i].isNegative())
   1216     t[i] += modulo;
   1217 
   1218   return std::move(t[i]);
   1219 }
   1220 
   1221 /// Calculate the magic numbers required to implement a signed integer division
   1222 /// by a constant as a sequence of multiplies, adds and shifts.  Requires that
   1223 /// the divisor not be 0, 1, or -1.  Taken from "Hacker's Delight", Henry S.
   1224 /// Warren, Jr., chapter 10.
   1225 APInt::ms APInt::magic() const {
   1226   const APInt& d = *this;
   1227   unsigned p;
   1228   APInt ad, anc, delta, q1, r1, q2, r2, t;
   1229   APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
   1230   struct ms mag;
   1231 
   1232   ad = d.abs();
   1233   t = signedMin + (d.lshr(d.getBitWidth() - 1));
   1234   anc = t - 1 - t.urem(ad);   // absolute value of nc
   1235   p = d.getBitWidth() - 1;    // initialize p
   1236   q1 = signedMin.udiv(anc);   // initialize q1 = 2p/abs(nc)
   1237   r1 = signedMin - q1*anc;    // initialize r1 = rem(2p,abs(nc))
   1238   q2 = signedMin.udiv(ad);    // initialize q2 = 2p/abs(d)
   1239   r2 = signedMin - q2*ad;     // initialize r2 = rem(2p,abs(d))
   1240   do {
   1241     p = p + 1;
   1242     q1 = q1<<1;          // update q1 = 2p/abs(nc)
   1243     r1 = r1<<1;          // update r1 = rem(2p/abs(nc))
   1244     if (r1.uge(anc)) {  // must be unsigned comparison
   1245       q1 = q1 + 1;
   1246       r1 = r1 - anc;
   1247     }
   1248     q2 = q2<<1;          // update q2 = 2p/abs(d)
   1249     r2 = r2<<1;          // update r2 = rem(2p/abs(d))
   1250     if (r2.uge(ad)) {   // must be unsigned comparison
   1251       q2 = q2 + 1;
   1252       r2 = r2 - ad;
   1253     }
   1254     delta = ad - r2;
   1255   } while (q1.ult(delta) || (q1 == delta && r1 == 0));
   1256 
   1257   mag.m = q2 + 1;
   1258   if (d.isNegative()) mag.m = -mag.m;   // resulting magic number
   1259   mag.s = p - d.getBitWidth();          // resulting shift
   1260   return mag;
   1261 }
   1262 
   1263 /// Calculate the magic numbers required to implement an unsigned integer
   1264 /// division by a constant as a sequence of multiplies, adds and shifts.
   1265 /// Requires that the divisor not be 0.  Taken from "Hacker's Delight", Henry
   1266 /// S. Warren, Jr., chapter 10.
   1267 /// LeadingZeros can be used to simplify the calculation if the upper bits
   1268 /// of the divided value are known zero.
   1269 APInt::mu APInt::magicu(unsigned LeadingZeros) const {
   1270   const APInt& d = *this;
   1271   unsigned p;
   1272   APInt nc, delta, q1, r1, q2, r2;
   1273   struct mu magu;
   1274   magu.a = 0;               // initialize "add" indicator
   1275   APInt allOnes = APInt::getAllOnesValue(d.getBitWidth()).lshr(LeadingZeros);
   1276   APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
   1277   APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth());
   1278 
   1279   nc = allOnes - (allOnes - d).urem(d);
   1280   p = d.getBitWidth() - 1;  // initialize p
   1281   q1 = signedMin.udiv(nc);  // initialize q1 = 2p/nc
   1282   r1 = signedMin - q1*nc;   // initialize r1 = rem(2p,nc)
   1283   q2 = signedMax.udiv(d);   // initialize q2 = (2p-1)/d
   1284   r2 = signedMax - q2*d;    // initialize r2 = rem((2p-1),d)
   1285   do {
   1286     p = p + 1;
   1287     if (r1.uge(nc - r1)) {
   1288       q1 = q1 + q1 + 1;  // update q1
   1289       r1 = r1 + r1 - nc; // update r1
   1290     }
   1291     else {
   1292       q1 = q1+q1; // update q1
   1293       r1 = r1+r1; // update r1
   1294     }
   1295     if ((r2 + 1).uge(d - r2)) {
   1296       if (q2.uge(signedMax)) magu.a = 1;
   1297       q2 = q2+q2 + 1;     // update q2
   1298       r2 = r2+r2 + 1 - d; // update r2
   1299     }
   1300     else {
   1301       if (q2.uge(signedMin)) magu.a = 1;
   1302       q2 = q2+q2;     // update q2
   1303       r2 = r2+r2 + 1; // update r2
   1304     }
   1305     delta = d - 1 - r2;
   1306   } while (p < d.getBitWidth()*2 &&
   1307            (q1.ult(delta) || (q1 == delta && r1 == 0)));
   1308   magu.m = q2 + 1; // resulting magic number
   1309   magu.s = p - d.getBitWidth();  // resulting shift
   1310   return magu;
   1311 }
   1312 
   1313 /// Implementation of Knuth's Algorithm D (Division of nonnegative integers)
   1314 /// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The
   1315 /// variables here have the same names as in the algorithm. Comments explain
   1316 /// the algorithm and any deviation from it.
   1317 static void KnuthDiv(uint32_t *u, uint32_t *v, uint32_t *q, uint32_t* r,
   1318                      unsigned m, unsigned n) {
   1319   assert(u && "Must provide dividend");
   1320   assert(v && "Must provide divisor");
   1321   assert(q && "Must provide quotient");
   1322   assert(u != v && u != q && v != q && "Must use different memory");
   1323   assert(n>1 && "n must be > 1");
   1324 
   1325   // b denotes the base of the number system. In our case b is 2^32.
   1326   const uint64_t b = uint64_t(1) << 32;
   1327 
   1328 // The DEBUG macros here tend to be spam in the debug output if you're not
   1329 // debugging this code. Disable them unless KNUTH_DEBUG is defined.
   1330 #ifdef KNUTH_DEBUG
   1331 #define DEBUG_KNUTH(X) LLVM_DEBUG(X)
   1332 #else
   1333 #define DEBUG_KNUTH(X) do {} while(false)
   1334 #endif
   1335 
   1336   DEBUG_KNUTH(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n');
   1337   DEBUG_KNUTH(dbgs() << "KnuthDiv: original:");
   1338   DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
   1339   DEBUG_KNUTH(dbgs() << " by");
   1340   DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);
   1341   DEBUG_KNUTH(dbgs() << '\n');
   1342   // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
   1343   // u and v by d. Note that we have taken Knuth's advice here to use a power
   1344   // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of
   1345   // 2 allows us to shift instead of multiply and it is easy to determine the
   1346   // shift amount from the leading zeros.  We are basically normalizing the u
   1347   // and v so that its high bits are shifted to the top of v's range without
   1348   // overflow. Note that this can require an extra word in u so that u must
   1349   // be of length m+n+1.
   1350   unsigned shift = countLeadingZeros(v[n-1]);
   1351   uint32_t v_carry = 0;
   1352   uint32_t u_carry = 0;
   1353   if (shift) {
   1354     for (unsigned i = 0; i < m+n; ++i) {
   1355       uint32_t u_tmp = u[i] >> (32 - shift);
   1356       u[i] = (u[i] << shift) | u_carry;
   1357       u_carry = u_tmp;
   1358     }
   1359     for (unsigned i = 0; i < n; ++i) {
   1360       uint32_t v_tmp = v[i] >> (32 - shift);
   1361       v[i] = (v[i] << shift) | v_carry;
   1362       v_carry = v_tmp;
   1363     }
   1364   }
   1365   u[m+n] = u_carry;
   1366 
   1367   DEBUG_KNUTH(dbgs() << "KnuthDiv:   normal:");
   1368   DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
   1369   DEBUG_KNUTH(dbgs() << " by");
   1370   DEBUG_KNUTH(for (int i = n; i > 0; i--) dbgs() << " " << v[i - 1]);
   1371   DEBUG_KNUTH(dbgs() << '\n');
   1372 
   1373   // D2. [Initialize j.]  Set j to m. This is the loop counter over the places.
   1374   int j = m;
   1375   do {
   1376     DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient digit #" << j << '\n');
   1377     // D3. [Calculate q'.].
   1378     //     Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q')
   1379     //     Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r')
   1380     // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease
   1381     // qp by 1, increase rp by v[n-1], and repeat this test if rp < b. The test
   1382     // on v[n-2] determines at high speed most of the cases in which the trial
   1383     // value qp is one too large, and it eliminates all cases where qp is two
   1384     // too large.
   1385     uint64_t dividend = Make_64(u[j+n], u[j+n-1]);
   1386     DEBUG_KNUTH(dbgs() << "KnuthDiv: dividend == " << dividend << '\n');
   1387     uint64_t qp = dividend / v[n-1];
   1388     uint64_t rp = dividend % v[n-1];
   1389     if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) {
   1390       qp--;
   1391       rp += v[n-1];
   1392       if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2]))
   1393         qp--;
   1394     }
   1395     DEBUG_KNUTH(dbgs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n');
   1396 
   1397     // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with
   1398     // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation
   1399     // consists of a simple multiplication by a one-place number, combined with
   1400     // a subtraction.
   1401     // The digits (u[j+n]...u[j]) should be kept positive; if the result of
   1402     // this step is actually negative, (u[j+n]...u[j]) should be left as the
   1403     // true value plus b**(n+1), namely as the b's complement of
   1404     // the true value, and a "borrow" to the left should be remembered.
   1405     int64_t borrow = 0;
   1406     for (unsigned i = 0; i < n; ++i) {
   1407       uint64_t p = uint64_t(qp) * uint64_t(v[i]);
   1408       int64_t subres = int64_t(u[j+i]) - borrow - Lo_32(p);
   1409       u[j+i] = Lo_32(subres);
   1410       borrow = Hi_32(p) - Hi_32(subres);
   1411       DEBUG_KNUTH(dbgs() << "KnuthDiv: u[j+i] = " << u[j + i]
   1412                         << ", borrow = " << borrow << '\n');
   1413     }
   1414     bool isNeg = u[j+n] < borrow;
   1415     u[j+n] -= Lo_32(borrow);
   1416 
   1417     DEBUG_KNUTH(dbgs() << "KnuthDiv: after subtraction:");
   1418     DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
   1419     DEBUG_KNUTH(dbgs() << '\n');
   1420 
   1421     // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was
   1422     // negative, go to step D6; otherwise go on to step D7.
   1423     q[j] = Lo_32(qp);
   1424     if (isNeg) {
   1425       // D6. [Add back]. The probability that this step is necessary is very
   1426       // small, on the order of only 2/b. Make sure that test data accounts for
   1427       // this possibility. Decrease q[j] by 1
   1428       q[j]--;
   1429       // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]).
   1430       // A carry will occur to the left of u[j+n], and it should be ignored
   1431       // since it cancels with the borrow that occurred in D4.
   1432       bool carry = false;
   1433       for (unsigned i = 0; i < n; i++) {
   1434         uint32_t limit = std::min(u[j+i],v[i]);
   1435         u[j+i] += v[i] + carry;
   1436         carry = u[j+i] < limit || (carry && u[j+i] == limit);
   1437       }
   1438       u[j+n] += carry;
   1439     }
   1440     DEBUG_KNUTH(dbgs() << "KnuthDiv: after correction:");
   1441     DEBUG_KNUTH(for (int i = m + n; i >= 0; i--) dbgs() << " " << u[i]);
   1442     DEBUG_KNUTH(dbgs() << "\nKnuthDiv: digit result = " << q[j] << '\n');
   1443 
   1444     // D7. [Loop on j.]  Decrease j by one. Now if j >= 0, go back to D3.
   1445   } while (--j >= 0);
   1446 
   1447   DEBUG_KNUTH(dbgs() << "KnuthDiv: quotient:");
   1448   DEBUG_KNUTH(for (int i = m; i >= 0; i--) dbgs() << " " << q[i]);
   1449   DEBUG_KNUTH(dbgs() << '\n');
   1450 
   1451   // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired
   1452   // remainder may be obtained by dividing u[...] by d. If r is non-null we
   1453   // compute the remainder (urem uses this).
   1454   if (r) {
   1455     // The value d is expressed by the "shift" value above since we avoided
   1456     // multiplication by d by using a shift left. So, all we have to do is
   1457     // shift right here.
   1458     if (shift) {
   1459       uint32_t carry = 0;
   1460       DEBUG_KNUTH(dbgs() << "KnuthDiv: remainder:");
   1461       for (int i = n-1; i >= 0; i--) {
   1462         r[i] = (u[i] >> shift) | carry;
   1463         carry = u[i] << (32 - shift);
   1464         DEBUG_KNUTH(dbgs() << " " << r[i]);
   1465       }
   1466     } else {
   1467       for (int i = n-1; i >= 0; i--) {
   1468         r[i] = u[i];
   1469         DEBUG_KNUTH(dbgs() << " " << r[i]);
   1470       }
   1471     }
   1472     DEBUG_KNUTH(dbgs() << '\n');
   1473   }
   1474   DEBUG_KNUTH(dbgs() << '\n');
   1475 }
   1476 
   1477 void APInt::divide(const WordType *LHS, unsigned lhsWords, const WordType *RHS,
   1478                    unsigned rhsWords, WordType *Quotient, WordType *Remainder) {
   1479   assert(lhsWords >= rhsWords && "Fractional result");
   1480 
   1481   // First, compose the values into an array of 32-bit words instead of
   1482   // 64-bit words. This is a necessity of both the "short division" algorithm
   1483   // and the Knuth "classical algorithm" which requires there to be native
   1484   // operations for +, -, and * on an m bit value with an m*2 bit result. We
   1485   // can't use 64-bit operands here because we don't have native results of
   1486   // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't
   1487   // work on large-endian machines.
   1488   unsigned n = rhsWords * 2;
   1489   unsigned m = (lhsWords * 2) - n;
   1490 
   1491   // Allocate space for the temporary values we need either on the stack, if
   1492   // it will fit, or on the heap if it won't.
   1493   uint32_t SPACE[128];
   1494   uint32_t *U = nullptr;
   1495   uint32_t *V = nullptr;
   1496   uint32_t *Q = nullptr;
   1497   uint32_t *R = nullptr;
   1498   if ((Remainder?4:3)*n+2*m+1 <= 128) {
   1499     U = &SPACE[0];
   1500     V = &SPACE[m+n+1];
   1501     Q = &SPACE[(m+n+1) + n];
   1502     if (Remainder)
   1503       R = &SPACE[(m+n+1) + n + (m+n)];
   1504   } else {
   1505     U = new uint32_t[m + n + 1];
   1506     V = new uint32_t[n];
   1507     Q = new uint32_t[m+n];
   1508     if (Remainder)
   1509       R = new uint32_t[n];
   1510   }
   1511 
   1512   // Initialize the dividend
   1513   memset(U, 0, (m+n+1)*sizeof(uint32_t));
   1514   for (unsigned i = 0; i < lhsWords; ++i) {
   1515     uint64_t tmp = LHS[i];
   1516     U[i * 2] = Lo_32(tmp);
   1517     U[i * 2 + 1] = Hi_32(tmp);
   1518   }
   1519   U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm.
   1520 
   1521   // Initialize the divisor
   1522   memset(V, 0, (n)*sizeof(uint32_t));
   1523   for (unsigned i = 0; i < rhsWords; ++i) {
   1524     uint64_t tmp = RHS[i];
   1525     V[i * 2] = Lo_32(tmp);
   1526     V[i * 2 + 1] = Hi_32(tmp);
   1527   }
   1528 
   1529   // initialize the quotient and remainder
   1530   memset(Q, 0, (m+n) * sizeof(uint32_t));
   1531   if (Remainder)
   1532     memset(R, 0, n * sizeof(uint32_t));
   1533 
   1534   // Now, adjust m and n for the Knuth division. n is the number of words in
   1535   // the divisor. m is the number of words by which the dividend exceeds the
   1536   // divisor (i.e. m+n is the length of the dividend). These sizes must not
   1537   // contain any zero words or the Knuth algorithm fails.
   1538   for (unsigned i = n; i > 0 && V[i-1] == 0; i--) {
   1539     n--;
   1540     m++;
   1541   }
   1542   for (unsigned i = m+n; i > 0 && U[i-1] == 0; i--)
   1543     m--;
   1544 
   1545   // If we're left with only a single word for the divisor, Knuth doesn't work
   1546   // so we implement the short division algorithm here. This is much simpler
   1547   // and faster because we are certain that we can divide a 64-bit quantity
   1548   // by a 32-bit quantity at hardware speed and short division is simply a
   1549   // series of such operations. This is just like doing short division but we
   1550   // are using base 2^32 instead of base 10.
   1551   assert(n != 0 && "Divide by zero?");
   1552   if (n == 1) {
   1553     uint32_t divisor = V[0];
   1554     uint32_t remainder = 0;
   1555     for (int i = m; i >= 0; i--) {
   1556       uint64_t partial_dividend = Make_64(remainder, U[i]);
   1557       if (partial_dividend == 0) {
   1558         Q[i] = 0;
   1559         remainder = 0;
   1560       } else if (partial_dividend < divisor) {
   1561         Q[i] = 0;
   1562         remainder = Lo_32(partial_dividend);
   1563       } else if (partial_dividend == divisor) {
   1564         Q[i] = 1;
   1565         remainder = 0;
   1566       } else {
   1567         Q[i] = Lo_32(partial_dividend / divisor);
   1568         remainder = Lo_32(partial_dividend - (Q[i] * divisor));
   1569       }
   1570     }
   1571     if (R)
   1572       R[0] = remainder;
   1573   } else {
   1574     // Now we're ready to invoke the Knuth classical divide algorithm. In this
   1575     // case n > 1.
   1576     KnuthDiv(U, V, Q, R, m, n);
   1577   }
   1578 
   1579   // If the caller wants the quotient
   1580   if (Quotient) {
   1581     for (unsigned i = 0; i < lhsWords; ++i)
   1582       Quotient[i] = Make_64(Q[i*2+1], Q[i*2]);
   1583   }
   1584 
   1585   // If the caller wants the remainder
   1586   if (Remainder) {
   1587     for (unsigned i = 0; i < rhsWords; ++i)
   1588       Remainder[i] = Make_64(R[i*2+1], R[i*2]);
   1589   }
   1590 
   1591   // Clean up the memory we allocated.
   1592   if (U != &SPACE[0]) {
   1593     delete [] U;
   1594     delete [] V;
   1595     delete [] Q;
   1596     delete [] R;
   1597   }
   1598 }
   1599 
   1600 APInt APInt::udiv(const APInt &RHS) const {
   1601   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
   1602 
   1603   // First, deal with the easy case
   1604   if (isSingleWord()) {
   1605     assert(RHS.U.VAL != 0 && "Divide by zero?");
   1606     return APInt(BitWidth, U.VAL / RHS.U.VAL);
   1607   }
   1608 
   1609   // Get some facts about the LHS and RHS number of bits and words
   1610   unsigned lhsWords = getNumWords(getActiveBits());
   1611   unsigned rhsBits  = RHS.getActiveBits();
   1612   unsigned rhsWords = getNumWords(rhsBits);
   1613   assert(rhsWords && "Divided by zero???");
   1614 
   1615   // Deal with some degenerate cases
   1616   if (!lhsWords)
   1617     // 0 / X ===> 0
   1618     return APInt(BitWidth, 0);
   1619   if (rhsBits == 1)
   1620     // X / 1 ===> X
   1621     return *this;
   1622   if (lhsWords < rhsWords || this->ult(RHS))
   1623     // X / Y ===> 0, iff X < Y
   1624     return APInt(BitWidth, 0);
   1625   if (*this == RHS)
   1626     // X / X ===> 1
   1627     return APInt(BitWidth, 1);
   1628   if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1.
   1629     // All high words are zero, just use native divide
   1630     return APInt(BitWidth, this->U.pVal[0] / RHS.U.pVal[0]);
   1631 
   1632   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
   1633   APInt Quotient(BitWidth, 0); // to hold result.
   1634   divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal, nullptr);
   1635   return Quotient;
   1636 }
   1637 
   1638 APInt APInt::udiv(uint64_t RHS) const {
   1639   assert(RHS != 0 && "Divide by zero?");
   1640 
   1641   // First, deal with the easy case
   1642   if (isSingleWord())
   1643     return APInt(BitWidth, U.VAL / RHS);
   1644 
   1645   // Get some facts about the LHS words.
   1646   unsigned lhsWords = getNumWords(getActiveBits());
   1647 
   1648   // Deal with some degenerate cases
   1649   if (!lhsWords)
   1650     // 0 / X ===> 0
   1651     return APInt(BitWidth, 0);
   1652   if (RHS == 1)
   1653     // X / 1 ===> X
   1654     return *this;
   1655   if (this->ult(RHS))
   1656     // X / Y ===> 0, iff X < Y
   1657     return APInt(BitWidth, 0);
   1658   if (*this == RHS)
   1659     // X / X ===> 1
   1660     return APInt(BitWidth, 1);
   1661   if (lhsWords == 1) // rhsWords is 1 if lhsWords is 1.
   1662     // All high words are zero, just use native divide
   1663     return APInt(BitWidth, this->U.pVal[0] / RHS);
   1664 
   1665   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
   1666   APInt Quotient(BitWidth, 0); // to hold result.
   1667   divide(U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, nullptr);
   1668   return Quotient;
   1669 }
   1670 
   1671 APInt APInt::sdiv(const APInt &RHS) const {
   1672   if (isNegative()) {
   1673     if (RHS.isNegative())
   1674       return (-(*this)).udiv(-RHS);
   1675     return -((-(*this)).udiv(RHS));
   1676   }
   1677   if (RHS.isNegative())
   1678     return -(this->udiv(-RHS));
   1679   return this->udiv(RHS);
   1680 }
   1681 
   1682 APInt APInt::sdiv(int64_t RHS) const {
   1683   if (isNegative()) {
   1684     if (RHS < 0)
   1685       return (-(*this)).udiv(-RHS);
   1686     return -((-(*this)).udiv(RHS));
   1687   }
   1688   if (RHS < 0)
   1689     return -(this->udiv(-RHS));
   1690   return this->udiv(RHS);
   1691 }
   1692 
   1693 APInt APInt::urem(const APInt &RHS) const {
   1694   assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
   1695   if (isSingleWord()) {
   1696     assert(RHS.U.VAL != 0 && "Remainder by zero?");
   1697     return APInt(BitWidth, U.VAL % RHS.U.VAL);
   1698   }
   1699 
   1700   // Get some facts about the LHS
   1701   unsigned lhsWords = getNumWords(getActiveBits());
   1702 
   1703   // Get some facts about the RHS
   1704   unsigned rhsBits = RHS.getActiveBits();
   1705   unsigned rhsWords = getNumWords(rhsBits);
   1706   assert(rhsWords && "Performing remainder operation by zero ???");
   1707 
   1708   // Check the degenerate cases
   1709   if (lhsWords == 0)
   1710     // 0 % Y ===> 0
   1711     return APInt(BitWidth, 0);
   1712   if (rhsBits == 1)
   1713     // X % 1 ===> 0
   1714     return APInt(BitWidth, 0);
   1715   if (lhsWords < rhsWords || this->ult(RHS))
   1716     // X % Y ===> X, iff X < Y
   1717     return *this;
   1718   if (*this == RHS)
   1719     // X % X == 0;
   1720     return APInt(BitWidth, 0);
   1721   if (lhsWords == 1)
   1722     // All high words are zero, just use native remainder
   1723     return APInt(BitWidth, U.pVal[0] % RHS.U.pVal[0]);
   1724 
   1725   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
   1726   APInt Remainder(BitWidth, 0);
   1727   divide(U.pVal, lhsWords, RHS.U.pVal, rhsWords, nullptr, Remainder.U.pVal);
   1728   return Remainder;
   1729 }
   1730 
   1731 uint64_t APInt::urem(uint64_t RHS) const {
   1732   assert(RHS != 0 && "Remainder by zero?");
   1733 
   1734   if (isSingleWord())
   1735     return U.VAL % RHS;
   1736 
   1737   // Get some facts about the LHS
   1738   unsigned lhsWords = getNumWords(getActiveBits());
   1739 
   1740   // Check the degenerate cases
   1741   if (lhsWords == 0)
   1742     // 0 % Y ===> 0
   1743     return 0;
   1744   if (RHS == 1)
   1745     // X % 1 ===> 0
   1746     return 0;
   1747   if (this->ult(RHS))
   1748     // X % Y ===> X, iff X < Y
   1749     return getZExtValue();
   1750   if (*this == RHS)
   1751     // X % X == 0;
   1752     return 0;
   1753   if (lhsWords == 1)
   1754     // All high words are zero, just use native remainder
   1755     return U.pVal[0] % RHS;
   1756 
   1757   // We have to compute it the hard way. Invoke the Knuth divide algorithm.
   1758   uint64_t Remainder;
   1759   divide(U.pVal, lhsWords, &RHS, 1, nullptr, &Remainder);
   1760   return Remainder;
   1761 }
   1762 
   1763 APInt APInt::srem(const APInt &RHS) const {
   1764   if (isNegative()) {
   1765     if (RHS.isNegative())
   1766       return -((-(*this)).urem(-RHS));
   1767     return -((-(*this)).urem(RHS));
   1768   }
   1769   if (RHS.isNegative())
   1770     return this->urem(-RHS);
   1771   return this->urem(RHS);
   1772 }
   1773 
   1774 int64_t APInt::srem(int64_t RHS) const {
   1775   if (isNegative()) {
   1776     if (RHS < 0)
   1777       return -((-(*this)).urem(-RHS));
   1778     return -((-(*this)).urem(RHS));
   1779   }
   1780   if (RHS < 0)
   1781     return this->urem(-RHS);
   1782   return this->urem(RHS);
   1783 }
   1784 
   1785 void APInt::udivrem(const APInt &LHS, const APInt &RHS,
   1786                     APInt &Quotient, APInt &Remainder) {
   1787   assert(LHS.BitWidth == RHS.BitWidth && "Bit widths must be the same");
   1788   unsigned BitWidth = LHS.BitWidth;
   1789 
   1790   // First, deal with the easy case
   1791   if (LHS.isSingleWord()) {
   1792     assert(RHS.U.VAL != 0 && "Divide by zero?");
   1793     uint64_t QuotVal = LHS.U.VAL / RHS.U.VAL;
   1794     uint64_t RemVal = LHS.U.VAL % RHS.U.VAL;
   1795     Quotient = APInt(BitWidth, QuotVal);
   1796     Remainder = APInt(BitWidth, RemVal);
   1797     return;
   1798   }
   1799 
   1800   // Get some size facts about the dividend and divisor
   1801   unsigned lhsWords = getNumWords(LHS.getActiveBits());
   1802   unsigned rhsBits  = RHS.getActiveBits();
   1803   unsigned rhsWords = getNumWords(rhsBits);
   1804   assert(rhsWords && "Performing divrem operation by zero ???");
   1805 
   1806   // Check the degenerate cases
   1807   if (lhsWords == 0) {
   1808     Quotient = APInt(BitWidth, 0);    // 0 / Y ===> 0
   1809     Remainder = APInt(BitWidth, 0);   // 0 % Y ===> 0
   1810     return;
   1811   }
   1812 
   1813   if (rhsBits == 1) {
   1814     Quotient = LHS;                   // X / 1 ===> X
   1815     Remainder = APInt(BitWidth, 0);   // X % 1 ===> 0
   1816   }
   1817 
   1818   if (lhsWords < rhsWords || LHS.ult(RHS)) {
   1819     Remainder = LHS;                  // X % Y ===> X, iff X < Y
   1820     Quotient = APInt(BitWidth, 0);    // X / Y ===> 0, iff X < Y
   1821     return;
   1822   }
   1823 
   1824   if (LHS == RHS) {
   1825     Quotient  = APInt(BitWidth, 1);   // X / X ===> 1
   1826     Remainder = APInt(BitWidth, 0);   // X % X ===> 0;
   1827     return;
   1828   }
   1829 
   1830   // Make sure there is enough space to hold the results.
   1831   // NOTE: This assumes that reallocate won't affect any bits if it doesn't
   1832   // change the size. This is necessary if Quotient or Remainder is aliased
   1833   // with LHS or RHS.
   1834   Quotient.reallocate(BitWidth);
   1835   Remainder.reallocate(BitWidth);
   1836 
   1837   if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1.
   1838     // There is only one word to consider so use the native versions.
   1839     uint64_t lhsValue = LHS.U.pVal[0];
   1840     uint64_t rhsValue = RHS.U.pVal[0];
   1841     Quotient = lhsValue / rhsValue;
   1842     Remainder = lhsValue % rhsValue;
   1843     return;
   1844   }
   1845 
   1846   // Okay, lets do it the long way
   1847   divide(LHS.U.pVal, lhsWords, RHS.U.pVal, rhsWords, Quotient.U.pVal,
   1848          Remainder.U.pVal);
   1849   // Clear the rest of the Quotient and Remainder.
   1850   std::memset(Quotient.U.pVal + lhsWords, 0,
   1851               (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE);
   1852   std::memset(Remainder.U.pVal + rhsWords, 0,
   1853               (getNumWords(BitWidth) - rhsWords) * APINT_WORD_SIZE);
   1854 }
   1855 
   1856 void APInt::udivrem(const APInt &LHS, uint64_t RHS, APInt &Quotient,
   1857                     uint64_t &Remainder) {
   1858   assert(RHS != 0 && "Divide by zero?");
   1859   unsigned BitWidth = LHS.BitWidth;
   1860 
   1861   // First, deal with the easy case
   1862   if (LHS.isSingleWord()) {
   1863     uint64_t QuotVal = LHS.U.VAL / RHS;
   1864     Remainder = LHS.U.VAL % RHS;
   1865     Quotient = APInt(BitWidth, QuotVal);
   1866     return;
   1867   }
   1868 
   1869   // Get some size facts about the dividend and divisor
   1870   unsigned lhsWords = getNumWords(LHS.getActiveBits());
   1871 
   1872   // Check the degenerate cases
   1873   if (lhsWords == 0) {
   1874     Quotient = APInt(BitWidth, 0);    // 0 / Y ===> 0
   1875     Remainder = 0;                    // 0 % Y ===> 0
   1876     return;
   1877   }
   1878 
   1879   if (RHS == 1) {
   1880     Quotient = LHS;                   // X / 1 ===> X
   1881     Remainder = 0;                    // X % 1 ===> 0
   1882     return;
   1883   }
   1884 
   1885   if (LHS.ult(RHS)) {
   1886     Remainder = LHS.getZExtValue();   // X % Y ===> X, iff X < Y
   1887     Quotient = APInt(BitWidth, 0);    // X / Y ===> 0, iff X < Y
   1888     return;
   1889   }
   1890 
   1891   if (LHS == RHS) {
   1892     Quotient  = APInt(BitWidth, 1);   // X / X ===> 1
   1893     Remainder = 0;                    // X % X ===> 0;
   1894     return;
   1895   }
   1896 
   1897   // Make sure there is enough space to hold the results.
   1898   // NOTE: This assumes that reallocate won't affect any bits if it doesn't
   1899   // change the size. This is necessary if Quotient is aliased with LHS.
   1900   Quotient.reallocate(BitWidth);
   1901 
   1902   if (lhsWords == 1) { // rhsWords is 1 if lhsWords is 1.
   1903     // There is only one word to consider so use the native versions.
   1904     uint64_t lhsValue = LHS.U.pVal[0];
   1905     Quotient = lhsValue / RHS;
   1906     Remainder = lhsValue % RHS;
   1907     return;
   1908   }
   1909 
   1910   // Okay, lets do it the long way
   1911   divide(LHS.U.pVal, lhsWords, &RHS, 1, Quotient.U.pVal, &Remainder);
   1912   // Clear the rest of the Quotient.
   1913   std::memset(Quotient.U.pVal + lhsWords, 0,
   1914               (getNumWords(BitWidth) - lhsWords) * APINT_WORD_SIZE);
   1915 }
   1916 
   1917 void APInt::sdivrem(const APInt &LHS, const APInt &RHS,
   1918                     APInt &Quotient, APInt &Remainder) {
   1919   if (LHS.isNegative()) {
   1920     if (RHS.isNegative())
   1921       APInt::udivrem(-LHS, -RHS, Quotient, Remainder);
   1922     else {
   1923       APInt::udivrem(-LHS, RHS, Quotient, Remainder);
   1924       Quotient.negate();
   1925     }
   1926     Remainder.negate();
   1927   } else if (RHS.isNegative()) {
   1928     APInt::udivrem(LHS, -RHS, Quotient, Remainder);
   1929     Quotient.negate();
   1930   } else {
   1931     APInt::udivrem(LHS, RHS, Quotient, Remainder);
   1932   }
   1933 }
   1934 
   1935 void APInt::sdivrem(const APInt &LHS, int64_t RHS,
   1936                     APInt &Quotient, int64_t &Remainder) {
   1937   uint64_t R = Remainder;
   1938   if (LHS.isNegative()) {
   1939     if (RHS < 0)
   1940       APInt::udivrem(-LHS, -RHS, Quotient, R);
   1941     else {
   1942       APInt::udivrem(-LHS, RHS, Quotient, R);
   1943       Quotient.negate();
   1944     }
   1945     R = -R;
   1946   } else if (RHS < 0) {
   1947     APInt::udivrem(LHS, -RHS, Quotient, R);
   1948     Quotient.negate();
   1949   } else {
   1950     APInt::udivrem(LHS, RHS, Quotient, R);
   1951   }
   1952   Remainder = R;
   1953 }
   1954 
   1955 APInt APInt::sadd_ov(const APInt &RHS, bool &Overflow) const {
   1956   APInt Res = *this+RHS;
   1957   Overflow = isNonNegative() == RHS.isNonNegative() &&
   1958              Res.isNonNegative() != isNonNegative();
   1959   return Res;
   1960 }
   1961 
   1962 APInt APInt::uadd_ov(const APInt &RHS, bool &Overflow) const {
   1963   APInt Res = *this+RHS;
   1964   Overflow = Res.ult(RHS);
   1965   return Res;
   1966 }
   1967 
   1968 APInt APInt::ssub_ov(const APInt &RHS, bool &Overflow) const {
   1969   APInt Res = *this - RHS;
   1970   Overflow = isNonNegative() != RHS.isNonNegative() &&
   1971              Res.isNonNegative() != isNonNegative();
   1972   return Res;
   1973 }
   1974 
   1975 APInt APInt::usub_ov(const APInt &RHS, bool &Overflow) const {
   1976   APInt Res = *this-RHS;
   1977   Overflow = Res.ugt(*this);
   1978   return Res;
   1979 }
   1980 
   1981 APInt APInt::sdiv_ov(const APInt &RHS, bool &Overflow) const {
   1982   // MININT/-1  -->  overflow.
   1983   Overflow = isMinSignedValue() && RHS.isAllOnesValue();
   1984   return sdiv(RHS);
   1985 }
   1986 
   1987 APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const {
   1988   APInt Res = *this * RHS;
   1989 
   1990   if (*this != 0 && RHS != 0)
   1991     Overflow = Res.sdiv(RHS) != *this || Res.sdiv(*this) != RHS;
   1992   else
   1993     Overflow = false;
   1994   return Res;
   1995 }
   1996 
   1997 APInt APInt::umul_ov(const APInt &RHS, bool &Overflow) const {
   1998   if (countLeadingZeros() + RHS.countLeadingZeros() + 2 <= BitWidth) {
   1999     Overflow = true;
   2000     return *this * RHS;
   2001   }
   2002 
   2003   APInt Res = lshr(1) * RHS;
   2004   Overflow = Res.isNegative();
   2005   Res <<= 1;
   2006   if ((*this)[0]) {
   2007     Res += RHS;
   2008     if (Res.ult(RHS))
   2009       Overflow = true;
   2010   }
   2011   return Res;
   2012 }
   2013 
   2014 APInt APInt::sshl_ov(const APInt &ShAmt, bool &Overflow) const {
   2015   Overflow = ShAmt.uge(getBitWidth());
   2016   if (Overflow)
   2017     return APInt(BitWidth, 0);
   2018 
   2019   if (isNonNegative()) // Don't allow sign change.
   2020     Overflow = ShAmt.uge(countLeadingZeros());
   2021   else
   2022     Overflow = ShAmt.uge(countLeadingOnes());
   2023 
   2024   return *this << ShAmt;
   2025 }
   2026 
   2027 APInt APInt::ushl_ov(const APInt &ShAmt, bool &Overflow) const {
   2028   Overflow = ShAmt.uge(getBitWidth());
   2029   if (Overflow)
   2030     return APInt(BitWidth, 0);
   2031 
   2032   Overflow = ShAmt.ugt(countLeadingZeros());
   2033 
   2034   return *this << ShAmt;
   2035 }
   2036 
   2037 APInt APInt::sadd_sat(const APInt &RHS) const {
   2038   bool Overflow;
   2039   APInt Res = sadd_ov(RHS, Overflow);
   2040   if (!Overflow)
   2041     return Res;
   2042 
   2043   return isNegative() ? APInt::getSignedMinValue(BitWidth)
   2044                       : APInt::getSignedMaxValue(BitWidth);
   2045 }
   2046 
   2047 APInt APInt::uadd_sat(const APInt &RHS) const {
   2048   bool Overflow;
   2049   APInt Res = uadd_ov(RHS, Overflow);
   2050   if (!Overflow)
   2051     return Res;
   2052 
   2053   return APInt::getMaxValue(BitWidth);
   2054 }
   2055 
   2056 APInt APInt::ssub_sat(const APInt &RHS) const {
   2057   bool Overflow;
   2058   APInt Res = ssub_ov(RHS, Overflow);
   2059   if (!Overflow)
   2060     return Res;
   2061 
   2062   return isNegative() ? APInt::getSignedMinValue(BitWidth)
   2063                       : APInt::getSignedMaxValue(BitWidth);
   2064 }
   2065 
   2066 APInt APInt::usub_sat(const APInt &RHS) const {
   2067   bool Overflow;
   2068   APInt Res = usub_ov(RHS, Overflow);
   2069   if (!Overflow)
   2070     return Res;
   2071 
   2072   return APInt(BitWidth, 0);
   2073 }
   2074 
   2075 APInt APInt::smul_sat(const APInt &RHS) const {
   2076   bool Overflow;
   2077   APInt Res = smul_ov(RHS, Overflow);
   2078   if (!Overflow)
   2079     return Res;
   2080 
   2081   // The result is negative if one and only one of inputs is negative.
   2082   bool ResIsNegative = isNegative() ^ RHS.isNegative();
   2083 
   2084   return ResIsNegative ? APInt::getSignedMinValue(BitWidth)
   2085                        : APInt::getSignedMaxValue(BitWidth);
   2086 }
   2087 
   2088 APInt APInt::umul_sat(const APInt &RHS) const {
   2089   bool Overflow;
   2090   APInt Res = umul_ov(RHS, Overflow);
   2091   if (!Overflow)
   2092     return Res;
   2093 
   2094   return APInt::getMaxValue(BitWidth);
   2095 }
   2096 
   2097 APInt APInt::sshl_sat(const APInt &RHS) const {
   2098   bool Overflow;
   2099   APInt Res = sshl_ov(RHS, Overflow);
   2100   if (!Overflow)
   2101     return Res;
   2102 
   2103   return isNegative() ? APInt::getSignedMinValue(BitWidth)
   2104                       : APInt::getSignedMaxValue(BitWidth);
   2105 }
   2106 
   2107 APInt APInt::ushl_sat(const APInt &RHS) const {
   2108   bool Overflow;
   2109   APInt Res = ushl_ov(RHS, Overflow);
   2110   if (!Overflow)
   2111     return Res;
   2112 
   2113   return APInt::getMaxValue(BitWidth);
   2114 }
   2115 
   2116 void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
   2117   // Check our assumptions here
   2118   assert(!str.empty() && "Invalid string length");
   2119   assert((radix == 10 || radix == 8 || radix == 16 || radix == 2 ||
   2120           radix == 36) &&
   2121          "Radix should be 2, 8, 10, 16, or 36!");
   2122 
   2123   StringRef::iterator p = str.begin();
   2124   size_t slen = str.size();
   2125   bool isNeg = *p == '-';
   2126   if (*p == '-' || *p == '+') {
   2127     p++;
   2128     slen--;
   2129     assert(slen && "String is only a sign, needs a value.");
   2130   }
   2131   assert((slen <= numbits || radix != 2) && "Insufficient bit width");
   2132   assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width");
   2133   assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width");
   2134   assert((((slen-1)*64)/22 <= numbits || radix != 10) &&
   2135          "Insufficient bit width");
   2136 
   2137   // Allocate memory if needed
   2138   if (isSingleWord())
   2139     U.VAL = 0;
   2140   else
   2141     U.pVal = getClearedMemory(getNumWords());
   2142 
   2143   // Figure out if we can shift instead of multiply
   2144   unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0);
   2145 
   2146   // Enter digit traversal loop
   2147   for (StringRef::iterator e = str.end(); p != e; ++p) {
   2148     unsigned digit = getDigit(*p, radix);
   2149     assert(digit < radix && "Invalid character in digit string");
   2150 
   2151     // Shift or multiply the value by the radix
   2152     if (slen > 1) {
   2153       if (shift)
   2154         *this <<= shift;
   2155       else
   2156         *this *= radix;
   2157     }
   2158 
   2159     // Add in the digit we just interpreted
   2160     *this += digit;
   2161   }
   2162   // If its negative, put it in two's complement form
   2163   if (isNeg)
   2164     this->negate();
   2165 }
   2166 
   2167 void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
   2168                      bool Signed, bool formatAsCLiteral) const {
   2169   assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2 ||
   2170           Radix == 36) &&
   2171          "Radix should be 2, 8, 10, 16, or 36!");
   2172 
   2173   const char *Prefix = "";
   2174   if (formatAsCLiteral) {
   2175     switch (Radix) {
   2176       case 2:
   2177         // Binary literals are a non-standard extension added in gcc 4.3:
   2178         // http://gcc.gnu.org/onlinedocs/gcc-4.3.0/gcc/Binary-constants.html
   2179         Prefix = "0b";
   2180         break;
   2181       case 8:
   2182         Prefix = "0";
   2183         break;
   2184       case 10:
   2185         break; // No prefix
   2186       case 16:
   2187         Prefix = "0x";
   2188         break;
   2189       default:
   2190         llvm_unreachable("Invalid radix!");
   2191     }
   2192   }
   2193 
   2194   // First, check for a zero value and just short circuit the logic below.
   2195   if (*this == 0) {
   2196     while (*Prefix) {
   2197       Str.push_back(*Prefix);
   2198       ++Prefix;
   2199     };
   2200     Str.push_back('0');
   2201     return;
   2202   }
   2203 
   2204   static const char Digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
   2205 
   2206   if (isSingleWord()) {
   2207     char Buffer[65];
   2208     char *BufPtr = std::end(Buffer);
   2209 
   2210     uint64_t N;
   2211     if (!Signed) {
   2212       N = getZExtValue();
   2213     } else {
   2214       int64_t I = getSExtValue();
   2215       if (I >= 0) {
   2216         N = I;
   2217       } else {
   2218         Str.push_back('-');
   2219         N = -(uint64_t)I;
   2220       }
   2221     }
   2222 
   2223     while (*Prefix) {
   2224       Str.push_back(*Prefix);
   2225       ++Prefix;
   2226     };
   2227 
   2228     while (N) {
   2229       *--BufPtr = Digits[N % Radix];
   2230       N /= Radix;
   2231     }
   2232     Str.append(BufPtr, std::end(Buffer));
   2233     return;
   2234   }
   2235 
   2236   APInt Tmp(*this);
   2237 
   2238   if (Signed && isNegative()) {
   2239     // They want to print the signed version and it is a negative value
   2240     // Flip the bits and add one to turn it into the equivalent positive
   2241     // value and put a '-' in the result.
   2242     Tmp.negate();
   2243     Str.push_back('-');
   2244   }
   2245 
   2246   while (*Prefix) {
   2247     Str.push_back(*Prefix);
   2248     ++Prefix;
   2249   };
   2250 
   2251   // We insert the digits backward, then reverse them to get the right order.
   2252   unsigned StartDig = Str.size();
   2253 
   2254   // For the 2, 8 and 16 bit cases, we can just shift instead of divide
   2255   // because the number of bits per digit (1, 3 and 4 respectively) divides
   2256   // equally.  We just shift until the value is zero.
   2257   if (Radix == 2 || Radix == 8 || Radix == 16) {
   2258     // Just shift tmp right for each digit width until it becomes zero
   2259     unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1));
   2260     unsigned MaskAmt = Radix - 1;
   2261 
   2262     while (Tmp.getBoolValue()) {
   2263       unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt;
   2264       Str.push_back(Digits[Digit]);
   2265       Tmp.lshrInPlace(ShiftAmt);
   2266     }
   2267   } else {
   2268     while (Tmp.getBoolValue()) {
   2269       uint64_t Digit;
   2270       udivrem(Tmp, Radix, Tmp, Digit);
   2271       assert(Digit < Radix && "divide failed");
   2272       Str.push_back(Digits[Digit]);
   2273     }
   2274   }
   2275 
   2276   // Reverse the digits before returning.
   2277   std::reverse(Str.begin()+StartDig, Str.end());
   2278 }
   2279 
   2280 /// Returns the APInt as a std::string. Note that this is an inefficient method.
   2281 /// It is better to pass in a SmallVector/SmallString to the methods above.
   2282 std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const {
   2283   SmallString<40> S;
   2284   toString(S, Radix, Signed, /* formatAsCLiteral = */false);
   2285   return std::string(S.str());
   2286 }
   2287 
   2288 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
   2289 LLVM_DUMP_METHOD void APInt::dump() const {
   2290   SmallString<40> S, U;
   2291   this->toStringUnsigned(U);
   2292   this->toStringSigned(S);
   2293   dbgs() << "APInt(" << BitWidth << "b, "
   2294          << U << "u " << S << "s)\n";
   2295 }
   2296 #endif
   2297 
   2298 void APInt::print(raw_ostream &OS, bool isSigned) const {
   2299   SmallString<40> S;
   2300   this->toString(S, 10, isSigned, /* formatAsCLiteral = */false);
   2301   OS << S;
   2302 }
   2303 
   2304 // This implements a variety of operations on a representation of
   2305 // arbitrary precision, two's-complement, bignum integer values.
   2306 
   2307 // Assumed by lowHalf, highHalf, partMSB and partLSB.  A fairly safe
   2308 // and unrestricting assumption.
   2309 static_assert(APInt::APINT_BITS_PER_WORD % 2 == 0,
   2310               "Part width must be divisible by 2!");
   2311 
   2312 /* Some handy functions local to this file.  */
   2313 
   2314 /* Returns the integer part with the least significant BITS set.
   2315    BITS cannot be zero.  */
   2316 static inline APInt::WordType lowBitMask(unsigned bits) {
   2317   assert(bits != 0 && bits <= APInt::APINT_BITS_PER_WORD);
   2318 
   2319   return ~(APInt::WordType) 0 >> (APInt::APINT_BITS_PER_WORD - bits);
   2320 }
   2321 
   2322 /* Returns the value of the lower half of PART.  */
   2323 static inline APInt::WordType lowHalf(APInt::WordType part) {
   2324   return part & lowBitMask(APInt::APINT_BITS_PER_WORD / 2);
   2325 }
   2326 
   2327 /* Returns the value of the upper half of PART.  */
   2328 static inline APInt::WordType highHalf(APInt::WordType part) {
   2329   return part >> (APInt::APINT_BITS_PER_WORD / 2);
   2330 }
   2331 
   2332 /* Returns the bit number of the most significant set bit of a part.
   2333    If the input number has no bits set -1U is returned.  */
   2334 static unsigned partMSB(APInt::WordType value) {
   2335   return findLastSet(value, ZB_Max);
   2336 }
   2337 
   2338 /* Returns the bit number of the least significant set bit of a
   2339    part.  If the input number has no bits set -1U is returned.  */
   2340 static unsigned partLSB(APInt::WordType value) {
   2341   return findFirstSet(value, ZB_Max);
   2342 }
   2343 
   2344 /* Sets the least significant part of a bignum to the input value, and
   2345    zeroes out higher parts.  */
   2346 void APInt::tcSet(WordType *dst, WordType part, unsigned parts) {
   2347   assert(parts > 0);
   2348 
   2349   dst[0] = part;
   2350   for (unsigned i = 1; i < parts; i++)
   2351     dst[i] = 0;
   2352 }
   2353 
   2354 /* Assign one bignum to another.  */
   2355 void APInt::tcAssign(WordType *dst, const WordType *src, unsigned parts) {
   2356   for (unsigned i = 0; i < parts; i++)
   2357     dst[i] = src[i];
   2358 }
   2359 
   2360 /* Returns true if a bignum is zero, false otherwise.  */
   2361 bool APInt::tcIsZero(const WordType *src, unsigned parts) {
   2362   for (unsigned i = 0; i < parts; i++)
   2363     if (src[i])
   2364       return false;
   2365 
   2366   return true;
   2367 }
   2368 
   2369 /* Extract the given bit of a bignum; returns 0 or 1.  */
   2370 int APInt::tcExtractBit(const WordType *parts, unsigned bit) {
   2371   return (parts[whichWord(bit)] & maskBit(bit)) != 0;
   2372 }
   2373 
   2374 /* Set the given bit of a bignum. */
   2375 void APInt::tcSetBit(WordType *parts, unsigned bit) {
   2376   parts[whichWord(bit)] |= maskBit(bit);
   2377 }
   2378 
   2379 /* Clears the given bit of a bignum. */
   2380 void APInt::tcClearBit(WordType *parts, unsigned bit) {
   2381   parts[whichWord(bit)] &= ~maskBit(bit);
   2382 }
   2383 
   2384 /* Returns the bit number of the least significant set bit of a
   2385    number.  If the input number has no bits set -1U is returned.  */
   2386 unsigned APInt::tcLSB(const WordType *parts, unsigned n) {
   2387   for (unsigned i = 0; i < n; i++) {
   2388     if (parts[i] != 0) {
   2389       unsigned lsb = partLSB(parts[i]);
   2390 
   2391       return lsb + i * APINT_BITS_PER_WORD;
   2392     }
   2393   }
   2394 
   2395   return -1U;
   2396 }
   2397 
   2398 /* Returns the bit number of the most significant set bit of a number.
   2399    If the input number has no bits set -1U is returned.  */
   2400 unsigned APInt::tcMSB(const WordType *parts, unsigned n) {
   2401   do {
   2402     --n;
   2403 
   2404     if (parts[n] != 0) {
   2405       unsigned msb = partMSB(parts[n]);
   2406 
   2407       return msb + n * APINT_BITS_PER_WORD;
   2408     }
   2409   } while (n);
   2410 
   2411   return -1U;
   2412 }
   2413 
   2414 /* Copy the bit vector of width srcBITS from SRC, starting at bit
   2415    srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB becomes
   2416    the least significant bit of DST.  All high bits above srcBITS in
   2417    DST are zero-filled.  */
   2418 void
   2419 APInt::tcExtract(WordType *dst, unsigned dstCount, const WordType *src,
   2420                  unsigned srcBits, unsigned srcLSB) {
   2421   unsigned dstParts = (srcBits + APINT_BITS_PER_WORD - 1) / APINT_BITS_PER_WORD;
   2422   assert(dstParts <= dstCount);
   2423 
   2424   unsigned firstSrcPart = srcLSB / APINT_BITS_PER_WORD;
   2425   tcAssign (dst, src + firstSrcPart, dstParts);
   2426 
   2427   unsigned shift = srcLSB % APINT_BITS_PER_WORD;
   2428   tcShiftRight (dst, dstParts, shift);
   2429 
   2430   /* We now have (dstParts * APINT_BITS_PER_WORD - shift) bits from SRC
   2431      in DST.  If this is less that srcBits, append the rest, else
   2432      clear the high bits.  */
   2433   unsigned n = dstParts * APINT_BITS_PER_WORD - shift;
   2434   if (n < srcBits) {
   2435     WordType mask = lowBitMask (srcBits - n);
   2436     dst[dstParts - 1] |= ((src[firstSrcPart + dstParts] & mask)
   2437                           << n % APINT_BITS_PER_WORD);
   2438   } else if (n > srcBits) {
   2439     if (srcBits % APINT_BITS_PER_WORD)
   2440       dst[dstParts - 1] &= lowBitMask (srcBits % APINT_BITS_PER_WORD);
   2441   }
   2442 
   2443   /* Clear high parts.  */
   2444   while (dstParts < dstCount)
   2445     dst[dstParts++] = 0;
   2446 }
   2447 
   2448 /* DST += RHS + C where C is zero or one.  Returns the carry flag.  */
   2449 APInt::WordType APInt::tcAdd(WordType *dst, const WordType *rhs,
   2450                              WordType c, unsigned parts) {
   2451   assert(c <= 1);
   2452 
   2453   for (unsigned i = 0; i < parts; i++) {
   2454     WordType l = dst[i];
   2455     if (c) {
   2456       dst[i] += rhs[i] + 1;
   2457       c = (dst[i] <= l);
   2458     } else {
   2459       dst[i] += rhs[i];
   2460       c = (dst[i] < l);
   2461     }
   2462   }
   2463 
   2464   return c;
   2465 }
   2466 
   2467 /// This function adds a single "word" integer, src, to the multiple
   2468 /// "word" integer array, dst[]. dst[] is modified to reflect the addition and
   2469 /// 1 is returned if there is a carry out, otherwise 0 is returned.
   2470 /// @returns the carry of the addition.
   2471 APInt::WordType APInt::tcAddPart(WordType *dst, WordType src,
   2472                                  unsigned parts) {
   2473   for (unsigned i = 0; i < parts; ++i) {
   2474     dst[i] += src;
   2475     if (dst[i] >= src)
   2476       return 0; // No need to carry so exit early.
   2477     src = 1; // Carry one to next digit.
   2478   }
   2479 
   2480   return 1;
   2481 }
   2482 
   2483 /* DST -= RHS + C where C is zero or one.  Returns the carry flag.  */
   2484 APInt::WordType APInt::tcSubtract(WordType *dst, const WordType *rhs,
   2485                                   WordType c, unsigned parts) {
   2486   assert(c <= 1);
   2487 
   2488   for (unsigned i = 0; i < parts; i++) {
   2489     WordType l = dst[i];
   2490     if (c) {
   2491       dst[i] -= rhs[i] + 1;
   2492       c = (dst[i] >= l);
   2493     } else {
   2494       dst[i] -= rhs[i];
   2495       c = (dst[i] > l);
   2496     }
   2497   }
   2498 
   2499   return c;
   2500 }
   2501 
   2502 /// This function subtracts a single "word" (64-bit word), src, from
   2503 /// the multi-word integer array, dst[], propagating the borrowed 1 value until
   2504 /// no further borrowing is needed or it runs out of "words" in dst.  The result
   2505 /// is 1 if "borrowing" exhausted the digits in dst, or 0 if dst was not
   2506 /// exhausted. In other words, if src > dst then this function returns 1,
   2507 /// otherwise 0.
   2508 /// @returns the borrow out of the subtraction
   2509 APInt::WordType APInt::tcSubtractPart(WordType *dst, WordType src,
   2510                                       unsigned parts) {
   2511   for (unsigned i = 0; i < parts; ++i) {
   2512     WordType Dst = dst[i];
   2513     dst[i] -= src;
   2514     if (src <= Dst)
   2515       return 0; // No need to borrow so exit early.
   2516     src = 1; // We have to "borrow 1" from next "word"
   2517   }
   2518 
   2519   return 1;
   2520 }
   2521 
   2522 /* Negate a bignum in-place.  */
   2523 void APInt::tcNegate(WordType *dst, unsigned parts) {
   2524   tcComplement(dst, parts);
   2525   tcIncrement(dst, parts);
   2526 }
   2527 
   2528 /*  DST += SRC * MULTIPLIER + CARRY   if add is true
   2529     DST  = SRC * MULTIPLIER + CARRY   if add is false
   2530 
   2531     Requires 0 <= DSTPARTS <= SRCPARTS + 1.  If DST overlaps SRC
   2532     they must start at the same point, i.e. DST == SRC.
   2533 
   2534     If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is
   2535     returned.  Otherwise DST is filled with the least significant
   2536     DSTPARTS parts of the result, and if all of the omitted higher
   2537     parts were zero return zero, otherwise overflow occurred and
   2538     return one.  */
   2539 int APInt::tcMultiplyPart(WordType *dst, const WordType *src,
   2540                           WordType multiplier, WordType carry,
   2541                           unsigned srcParts, unsigned dstParts,
   2542                           bool add) {
   2543   /* Otherwise our writes of DST kill our later reads of SRC.  */
   2544   assert(dst <= src || dst >= src + srcParts);
   2545   assert(dstParts <= srcParts + 1);
   2546 
   2547   /* N loops; minimum of dstParts and srcParts.  */
   2548   unsigned n = std::min(dstParts, srcParts);
   2549 
   2550   for (unsigned i = 0; i < n; i++) {
   2551     WordType low, mid, high, srcPart;
   2552 
   2553       /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY.
   2554 
   2555          This cannot overflow, because
   2556 
   2557          (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1)
   2558 
   2559          which is less than n^2.  */
   2560 
   2561     srcPart = src[i];
   2562 
   2563     if (multiplier == 0 || srcPart == 0) {
   2564       low = carry;
   2565       high = 0;
   2566     } else {
   2567       low = lowHalf(srcPart) * lowHalf(multiplier);
   2568       high = highHalf(srcPart) * highHalf(multiplier);
   2569 
   2570       mid = lowHalf(srcPart) * highHalf(multiplier);
   2571       high += highHalf(mid);
   2572       mid <<= APINT_BITS_PER_WORD / 2;
   2573       if (low + mid < low)
   2574         high++;
   2575       low += mid;
   2576 
   2577       mid = highHalf(srcPart) * lowHalf(multiplier);
   2578       high += highHalf(mid);
   2579       mid <<= APINT_BITS_PER_WORD / 2;
   2580       if (low + mid < low)
   2581         high++;
   2582       low += mid;
   2583 
   2584       /* Now add carry.  */
   2585       if (low + carry < low)
   2586         high++;
   2587       low += carry;
   2588     }
   2589 
   2590     if (add) {
   2591       /* And now DST[i], and store the new low part there.  */
   2592       if (low + dst[i] < low)
   2593         high++;
   2594       dst[i] += low;
   2595     } else
   2596       dst[i] = low;
   2597 
   2598     carry = high;
   2599   }
   2600 
   2601   if (srcParts < dstParts) {
   2602     /* Full multiplication, there is no overflow.  */
   2603     assert(srcParts + 1 == dstParts);
   2604     dst[srcParts] = carry;
   2605     return 0;
   2606   }
   2607 
   2608   /* We overflowed if there is carry.  */
   2609   if (carry)
   2610     return 1;
   2611 
   2612   /* We would overflow if any significant unwritten parts would be
   2613      non-zero.  This is true if any remaining src parts are non-zero
   2614      and the multiplier is non-zero.  */
   2615   if (multiplier)
   2616     for (unsigned i = dstParts; i < srcParts; i++)
   2617       if (src[i])
   2618         return 1;
   2619 
   2620   /* We fitted in the narrow destination.  */
   2621   return 0;
   2622 }
   2623 
   2624 /* DST = LHS * RHS, where DST has the same width as the operands and
   2625    is filled with the least significant parts of the result.  Returns
   2626    one if overflow occurred, otherwise zero.  DST must be disjoint
   2627    from both operands.  */
   2628 int APInt::tcMultiply(WordType *dst, const WordType *lhs,
   2629                       const WordType *rhs, unsigned parts) {
   2630   assert(dst != lhs && dst != rhs);
   2631 
   2632   int overflow = 0;
   2633   tcSet(dst, 0, parts);
   2634 
   2635   for (unsigned i = 0; i < parts; i++)
   2636     overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts,
   2637                                parts - i, true);
   2638 
   2639   return overflow;
   2640 }
   2641 
   2642 /// DST = LHS * RHS, where DST has width the sum of the widths of the
   2643 /// operands. No overflow occurs. DST must be disjoint from both operands.
   2644 void APInt::tcFullMultiply(WordType *dst, const WordType *lhs,
   2645                            const WordType *rhs, unsigned lhsParts,
   2646                            unsigned rhsParts) {
   2647   /* Put the narrower number on the LHS for less loops below.  */
   2648   if (lhsParts > rhsParts)
   2649     return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts);
   2650 
   2651   assert(dst != lhs && dst != rhs);
   2652 
   2653   tcSet(dst, 0, rhsParts);
   2654 
   2655   for (unsigned i = 0; i < lhsParts; i++)
   2656     tcMultiplyPart(&dst[i], rhs, lhs[i], 0, rhsParts, rhsParts + 1, true);
   2657 }
   2658 
   2659 /* If RHS is zero LHS and REMAINDER are left unchanged, return one.
   2660    Otherwise set LHS to LHS / RHS with the fractional part discarded,
   2661    set REMAINDER to the remainder, return zero.  i.e.
   2662 
   2663    OLD_LHS = RHS * LHS + REMAINDER
   2664 
   2665    SCRATCH is a bignum of the same size as the operands and result for
   2666    use by the routine; its contents need not be initialized and are
   2667    destroyed.  LHS, REMAINDER and SCRATCH must be distinct.
   2668 */
   2669 int APInt::tcDivide(WordType *lhs, const WordType *rhs,
   2670                     WordType *remainder, WordType *srhs,
   2671                     unsigned parts) {
   2672   assert(lhs != remainder && lhs != srhs && remainder != srhs);
   2673 
   2674   unsigned shiftCount = tcMSB(rhs, parts) + 1;
   2675   if (shiftCount == 0)
   2676     return true;
   2677 
   2678   shiftCount = parts * APINT_BITS_PER_WORD - shiftCount;
   2679   unsigned n = shiftCount / APINT_BITS_PER_WORD;
   2680   WordType mask = (WordType) 1 << (shiftCount % APINT_BITS_PER_WORD);
   2681 
   2682   tcAssign(srhs, rhs, parts);
   2683   tcShiftLeft(srhs, parts, shiftCount);
   2684   tcAssign(remainder, lhs, parts);
   2685   tcSet(lhs, 0, parts);
   2686 
   2687   /* Loop, subtracting SRHS if REMAINDER is greater and adding that to
   2688      the total.  */
   2689   for (;;) {
   2690     int compare = tcCompare(remainder, srhs, parts);
   2691     if (compare >= 0) {
   2692       tcSubtract(remainder, srhs, 0, parts);
   2693       lhs[n] |= mask;
   2694     }
   2695 
   2696     if (shiftCount == 0)
   2697       break;
   2698     shiftCount--;
   2699     tcShiftRight(srhs, parts, 1);
   2700     if ((mask >>= 1) == 0) {
   2701       mask = (WordType) 1 << (APINT_BITS_PER_WORD - 1);
   2702       n--;
   2703     }
   2704   }
   2705 
   2706   return false;
   2707 }
   2708 
   2709 /// Shift a bignum left Cound bits in-place. Shifted in bits are zero. There are
   2710 /// no restrictions on Count.
   2711 void APInt::tcShiftLeft(WordType *Dst, unsigned Words, unsigned Count) {
   2712   // Don't bother performing a no-op shift.
   2713   if (!Count)
   2714     return;
   2715 
   2716   // WordShift is the inter-part shift; BitShift is the intra-part shift.
   2717   unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
   2718   unsigned BitShift = Count % APINT_BITS_PER_WORD;
   2719 
   2720   // Fastpath for moving by whole words.
   2721   if (BitShift == 0) {
   2722     std::memmove(Dst + WordShift, Dst, (Words - WordShift) * APINT_WORD_SIZE);
   2723   } else {
   2724     while (Words-- > WordShift) {
   2725       Dst[Words] = Dst[Words - WordShift] << BitShift;
   2726       if (Words > WordShift)
   2727         Dst[Words] |=
   2728           Dst[Words - WordShift - 1] >> (APINT_BITS_PER_WORD - BitShift);
   2729     }
   2730   }
   2731 
   2732   // Fill in the remainder with 0s.
   2733   std::memset(Dst, 0, WordShift * APINT_WORD_SIZE);
   2734 }
   2735 
   2736 /// Shift a bignum right Count bits in-place. Shifted in bits are zero. There
   2737 /// are no restrictions on Count.
   2738 void APInt::tcShiftRight(WordType *Dst, unsigned Words, unsigned Count) {
   2739   // Don't bother performing a no-op shift.
   2740   if (!Count)
   2741     return;
   2742 
   2743   // WordShift is the inter-part shift; BitShift is the intra-part shift.
   2744   unsigned WordShift = std::min(Count / APINT_BITS_PER_WORD, Words);
   2745   unsigned BitShift = Count % APINT_BITS_PER_WORD;
   2746 
   2747   unsigned WordsToMove = Words - WordShift;
   2748   // Fastpath for moving by whole words.
   2749   if (BitShift == 0) {
   2750     std::memmove(Dst, Dst + WordShift, WordsToMove * APINT_WORD_SIZE);
   2751   } else {
   2752     for (unsigned i = 0; i != WordsToMove; ++i) {
   2753       Dst[i] = Dst[i + WordShift] >> BitShift;
   2754       if (i + 1 != WordsToMove)
   2755         Dst[i] |= Dst[i + WordShift + 1] << (APINT_BITS_PER_WORD - BitShift);
   2756     }
   2757   }
   2758 
   2759   // Fill in the remainder with 0s.
   2760   std::memset(Dst + WordsToMove, 0, WordShift * APINT_WORD_SIZE);
   2761 }
   2762 
   2763 /* Bitwise and of two bignums.  */
   2764 void APInt::tcAnd(WordType *dst, const WordType *rhs, unsigned parts) {
   2765   for (unsigned i = 0; i < parts; i++)
   2766     dst[i] &= rhs[i];
   2767 }
   2768 
   2769 /* Bitwise inclusive or of two bignums.  */
   2770 void APInt::tcOr(WordType *dst, const WordType *rhs, unsigned parts) {
   2771   for (unsigned i = 0; i < parts; i++)
   2772     dst[i] |= rhs[i];
   2773 }
   2774 
   2775 /* Bitwise exclusive or of two bignums.  */
   2776 void APInt::tcXor(WordType *dst, const WordType *rhs, unsigned parts) {
   2777   for (unsigned i = 0; i < parts; i++)
   2778     dst[i] ^= rhs[i];
   2779 }
   2780 
   2781 /* Complement a bignum in-place.  */
   2782 void APInt::tcComplement(WordType *dst, unsigned parts) {
   2783   for (unsigned i = 0; i < parts; i++)
   2784     dst[i] = ~dst[i];
   2785 }
   2786 
   2787 /* Comparison (unsigned) of two bignums.  */
   2788 int APInt::tcCompare(const WordType *lhs, const WordType *rhs,
   2789                      unsigned parts) {
   2790   while (parts) {
   2791     parts--;
   2792     if (lhs[parts] != rhs[parts])
   2793       return (lhs[parts] > rhs[parts]) ? 1 : -1;
   2794   }
   2795 
   2796   return 0;
   2797 }
   2798 
   2799 /* Set the least significant BITS bits of a bignum, clear the
   2800    rest.  */
   2801 void APInt::tcSetLeastSignificantBits(WordType *dst, unsigned parts,
   2802                                       unsigned bits) {
   2803   unsigned i = 0;
   2804   while (bits > APINT_BITS_PER_WORD) {
   2805     dst[i++] = ~(WordType) 0;
   2806     bits -= APINT_BITS_PER_WORD;
   2807   }
   2808 
   2809   if (bits)
   2810     dst[i++] = ~(WordType) 0 >> (APINT_BITS_PER_WORD - bits);
   2811 
   2812   while (i < parts)
   2813     dst[i++] = 0;
   2814 }
   2815 
   2816 APInt llvm::APIntOps::RoundingUDiv(const APInt &A, const APInt &B,
   2817                                    APInt::Rounding RM) {
   2818   // Currently udivrem always rounds down.
   2819   switch (RM) {
   2820   case APInt::Rounding::DOWN:
   2821   case APInt::Rounding::TOWARD_ZERO:
   2822     return A.udiv(B);
   2823   case APInt::Rounding::UP: {
   2824     APInt Quo, Rem;
   2825     APInt::udivrem(A, B, Quo, Rem);
   2826     if (Rem == 0)
   2827       return Quo;
   2828     return Quo + 1;
   2829   }
   2830   }
   2831   llvm_unreachable("Unknown APInt::Rounding enum");
   2832 }
   2833 
   2834 APInt llvm::APIntOps::RoundingSDiv(const APInt &A, const APInt &B,
   2835                                    APInt::Rounding RM) {
   2836   switch (RM) {
   2837   case APInt::Rounding::DOWN:
   2838   case APInt::Rounding::UP: {
   2839     APInt Quo, Rem;
   2840     APInt::sdivrem(A, B, Quo, Rem);
   2841     if (Rem == 0)
   2842       return Quo;
   2843     // This algorithm deals with arbitrary rounding mode used by sdivrem.
   2844     // We want to check whether the non-integer part of the mathematical value
   2845     // is negative or not. If the non-integer part is negative, we need to round
   2846     // down from Quo; otherwise, if it's positive or 0, we return Quo, as it's
   2847     // already rounded down.
   2848     if (RM == APInt::Rounding::DOWN) {
   2849       if (Rem.isNegative() != B.isNegative())
   2850         return Quo - 1;
   2851       return Quo;
   2852     }
   2853     if (Rem.isNegative() != B.isNegative())
   2854       return Quo;
   2855     return Quo + 1;
   2856   }
   2857   // Currently sdiv rounds towards zero.
   2858   case APInt::Rounding::TOWARD_ZERO:
   2859     return A.sdiv(B);
   2860   }
   2861   llvm_unreachable("Unknown APInt::Rounding enum");
   2862 }
   2863 
   2864 Optional<APInt>
   2865 llvm::APIntOps::SolveQuadraticEquationWrap(APInt A, APInt B, APInt C,
   2866                                            unsigned RangeWidth) {
   2867   unsigned CoeffWidth = A.getBitWidth();
   2868   assert(CoeffWidth == B.getBitWidth() && CoeffWidth == C.getBitWidth());
   2869   assert(RangeWidth <= CoeffWidth &&
   2870          "Value range width should be less than coefficient width");
   2871   assert(RangeWidth > 1 && "Value range bit width should be > 1");
   2872 
   2873   LLVM_DEBUG(dbgs() << __func__ << ": solving " << A << "x^2 + " << B
   2874                     << "x + " << C << ", rw:" << RangeWidth << '\n');
   2875 
   2876   // Identify 0 as a (non)solution immediately.
   2877   if (C.sextOrTrunc(RangeWidth).isNullValue() ) {
   2878     LLVM_DEBUG(dbgs() << __func__ << ": zero solution\n");
   2879     return APInt(CoeffWidth, 0);
   2880   }
   2881 
   2882   // The result of APInt arithmetic has the same bit width as the operands,
   2883   // so it can actually lose high bits. A product of two n-bit integers needs
   2884   // 2n-1 bits to represent the full value.
   2885   // The operation done below (on quadratic coefficients) that can produce
   2886   // the largest value is the evaluation of the equation during bisection,
   2887   // which needs 3 times the bitwidth of the coefficient, so the total number
   2888   // of required bits is 3n.
   2889   //
   2890   // The purpose of this extension is to simulate the set Z of all integers,
   2891   // where n+1 > n for all n in Z. In Z it makes sense to talk about positive
   2892   // and negative numbers (not so much in a modulo arithmetic). The method
   2893   // used to solve the equation is based on the standard formula for real
   2894   // numbers, and uses the concepts of "positive" and "negative" with their
   2895   // usual meanings.
   2896   CoeffWidth *= 3;
   2897   A = A.sext(CoeffWidth);
   2898   B = B.sext(CoeffWidth);
   2899   C = C.sext(CoeffWidth);
   2900 
   2901   // Make A > 0 for simplicity. Negate cannot overflow at this point because
   2902   // the bit width has increased.
   2903   if (A.isNegative()) {
   2904     A.negate();
   2905     B.negate();
   2906     C.negate();
   2907   }
   2908 
   2909   // Solving an equation q(x) = 0 with coefficients in modular arithmetic
   2910   // is really solving a set of equations q(x) = kR for k = 0, 1, 2, ...,
   2911   // and R = 2^BitWidth.
   2912   // Since we're trying not only to find exact solutions, but also values
   2913   // that "wrap around", such a set will always have a solution, i.e. an x
   2914   // that satisfies at least one of the equations, or such that |q(x)|
   2915   // exceeds kR, while |q(x-1)| for the same k does not.
   2916   //
   2917   // We need to find a value k, such that Ax^2 + Bx + C = kR will have a
   2918   // positive solution n (in the above sense), and also such that the n
   2919   // will be the least among all solutions corresponding to k = 0, 1, ...
   2920   // (more precisely, the least element in the set
   2921   //   { n(k) | k is such that a solution n(k) exists }).
   2922   //
   2923   // Consider the parabola (over real numbers) that corresponds to the
   2924   // quadratic equation. Since A > 0, the arms of the parabola will point
   2925   // up. Picking different values of k will shift it up and down by R.
   2926   //
   2927   // We want to shift the parabola in such a way as to reduce the problem
   2928   // of solving q(x) = kR to solving shifted_q(x) = 0.
   2929   // (The interesting solutions are the ceilings of the real number
   2930   // solutions.)
   2931   APInt R = APInt::getOneBitSet(CoeffWidth, RangeWidth);
   2932   APInt TwoA = 2 * A;
   2933   APInt SqrB = B * B;
   2934   bool PickLow;
   2935 
   2936   auto RoundUp = [] (const APInt &V, const APInt &A) -> APInt {
   2937     assert(A.isStrictlyPositive());
   2938     APInt T = V.abs().urem(A);
   2939     if (T.isNullValue())
   2940       return V;
   2941     return V.isNegative() ? V+T : V+(A-T);
   2942   };
   2943 
   2944   // The vertex of the parabola is at -B/2A, but since A > 0, it's negative
   2945   // iff B is positive.
   2946   if (B.isNonNegative()) {
   2947     // If B >= 0, the vertex it at a negative location (or at 0), so in
   2948     // order to have a non-negative solution we need to pick k that makes
   2949     // C-kR negative. To satisfy all the requirements for the solution
   2950     // that we are looking for, it needs to be closest to 0 of all k.
   2951     C = C.srem(R);
   2952     if (C.isStrictlyPositive())
   2953       C -= R;
   2954     // Pick the greater solution.
   2955     PickLow = false;
   2956   } else {
   2957     // If B < 0, the vertex is at a positive location. For any solution
   2958     // to exist, the discriminant must be non-negative. This means that
   2959     // C-kR <= B^2/4A is a necessary condition for k, i.e. there is a
   2960     // lower bound on values of k: kR >= C - B^2/4A.
   2961     APInt LowkR = C - SqrB.udiv(2*TwoA); // udiv because all values > 0.
   2962     // Round LowkR up (towards +inf) to the nearest kR.
   2963     LowkR = RoundUp(LowkR, R);
   2964 
   2965     // If there exists k meeting the condition above, and such that
   2966     // C-kR > 0, there will be two positive real number solutions of
   2967     // q(x) = kR. Out of all such values of k, pick the one that makes
   2968     // C-kR closest to 0, (i.e. pick maximum k such that C-kR > 0).
   2969     // In other words, find maximum k such that LowkR <= kR < C.
   2970     if (C.sgt(LowkR)) {
   2971       // If LowkR < C, then such a k is guaranteed to exist because
   2972       // LowkR itself is a multiple of R.
   2973       C -= -RoundUp(-C, R);      // C = C - RoundDown(C, R)
   2974       // Pick the smaller solution.
   2975       PickLow = true;
   2976     } else {
   2977       // If C-kR < 0 for all potential k's, it means that one solution
   2978       // will be negative, while the other will be positive. The positive
   2979       // solution will shift towards 0 if the parabola is moved up.
   2980       // Pick the kR closest to the lower bound (i.e. make C-kR closest
   2981       // to 0, or in other words, out of all parabolas that have solutions,
   2982       // pick the one that is the farthest "up").
   2983       // Since LowkR is itself a multiple of R, simply take C-LowkR.
   2984       C -= LowkR;
   2985       // Pick the greater solution.
   2986       PickLow = false;
   2987     }
   2988   }
   2989 
   2990   LLVM_DEBUG(dbgs() << __func__ << ": updated coefficients " << A << "x^2 + "
   2991                     << B << "x + " << C << ", rw:" << RangeWidth << '\n');
   2992 
   2993   APInt D = SqrB - 4*A*C;
   2994   assert(D.isNonNegative() && "Negative discriminant");
   2995   APInt SQ = D.sqrt();
   2996 
   2997   APInt Q = SQ * SQ;
   2998   bool InexactSQ = Q != D;
   2999   // The calculated SQ may actually be greater than the exact (non-integer)
   3000   // value. If that's the case, decrement SQ to get a value that is lower.
   3001   if (Q.sgt(D))
   3002     SQ -= 1;
   3003 
   3004   APInt X;
   3005   APInt Rem;
   3006 
   3007   // SQ is rounded down (i.e SQ * SQ <= D), so the roots may be inexact.
   3008   // When using the quadratic formula directly, the calculated low root
   3009   // may be greater than the exact one, since we would be subtracting SQ.
   3010   // To make sure that the calculated root is not greater than the exact
   3011   // one, subtract SQ+1 when calculating the low root (for inexact value
   3012   // of SQ).
   3013   if (PickLow)
   3014     APInt::sdivrem(-B - (SQ+InexactSQ), TwoA, X, Rem);
   3015   else
   3016     APInt::sdivrem(-B + SQ, TwoA, X, Rem);
   3017 
   3018   // The updated coefficients should be such that the (exact) solution is
   3019   // positive. Since APInt division rounds towards 0, the calculated one
   3020   // can be 0, but cannot be negative.
   3021   assert(X.isNonNegative() && "Solution should be non-negative");
   3022 
   3023   if (!InexactSQ && Rem.isNullValue()) {
   3024     LLVM_DEBUG(dbgs() << __func__ << ": solution (root): " << X << '\n');
   3025     return X;
   3026   }
   3027 
   3028   assert((SQ*SQ).sle(D) && "SQ = |_sqrt(D)_|, so SQ*SQ <= D");
   3029   // The exact value of the square root of D should be between SQ and SQ+1.
   3030   // This implies that the solution should be between that corresponding to
   3031   // SQ (i.e. X) and that corresponding to SQ+1.
   3032   //
   3033   // The calculated X cannot be greater than the exact (real) solution.
   3034   // Actually it must be strictly less than the exact solution, while
   3035   // X+1 will be greater than or equal to it.
   3036 
   3037   APInt VX = (A*X + B)*X + C;
   3038   APInt VY = VX + TwoA*X + A + B;
   3039   bool SignChange = VX.isNegative() != VY.isNegative() ||
   3040                     VX.isNullValue() != VY.isNullValue();
   3041   // If the sign did not change between X and X+1, X is not a valid solution.
   3042   // This could happen when the actual (exact) roots don't have an integer
   3043   // between them, so they would both be contained between X and X+1.
   3044   if (!SignChange) {
   3045     LLVM_DEBUG(dbgs() << __func__ << ": no valid solution\n");
   3046     return None;
   3047   }
   3048 
   3049   X += 1;
   3050   LLVM_DEBUG(dbgs() << __func__ << ": solution (wrap): " << X << '\n');
   3051   return X;
   3052 }
   3053 
   3054 Optional<unsigned>
   3055 llvm::APIntOps::GetMostSignificantDifferentBit(const APInt &A, const APInt &B) {
   3056   assert(A.getBitWidth() == B.getBitWidth() && "Must have the same bitwidth");
   3057   if (A == B)
   3058     return llvm::None;
   3059   return A.getBitWidth() - ((A ^ B).countLeadingZeros() + 1);
   3060 }
   3061 
   3062 /// StoreIntToMemory - Fills the StoreBytes bytes of memory starting from Dst
   3063 /// with the integer held in IntVal.
   3064 void llvm::StoreIntToMemory(const APInt &IntVal, uint8_t *Dst,
   3065                             unsigned StoreBytes) {
   3066   assert((IntVal.getBitWidth()+7)/8 >= StoreBytes && "Integer too small!");
   3067   const uint8_t *Src = (const uint8_t *)IntVal.getRawData();
   3068 
   3069   if (sys::IsLittleEndianHost) {
   3070     // Little-endian host - the source is ordered from LSB to MSB.  Order the
   3071     // destination from LSB to MSB: Do a straight copy.
   3072     memcpy(Dst, Src, StoreBytes);
   3073   } else {
   3074     // Big-endian host - the source is an array of 64 bit words ordered from
   3075     // LSW to MSW.  Each word is ordered from MSB to LSB.  Order the destination
   3076     // from MSB to LSB: Reverse the word order, but not the bytes in a word.
   3077     while (StoreBytes > sizeof(uint64_t)) {
   3078       StoreBytes -= sizeof(uint64_t);
   3079       // May not be aligned so use memcpy.
   3080       memcpy(Dst + StoreBytes, Src, sizeof(uint64_t));
   3081       Src += sizeof(uint64_t);
   3082     }
   3083 
   3084     memcpy(Dst, Src + sizeof(uint64_t) - StoreBytes, StoreBytes);
   3085   }
   3086 }
   3087 
   3088 /// LoadIntFromMemory - Loads the integer stored in the LoadBytes bytes starting
   3089 /// from Src into IntVal, which is assumed to be wide enough and to hold zero.
   3090 void llvm::LoadIntFromMemory(APInt &IntVal, const uint8_t *Src,
   3091                              unsigned LoadBytes) {
   3092   assert((IntVal.getBitWidth()+7)/8 >= LoadBytes && "Integer too small!");
   3093   uint8_t *Dst = reinterpret_cast<uint8_t *>(
   3094                    const_cast<uint64_t *>(IntVal.getRawData()));
   3095 
   3096   if (sys::IsLittleEndianHost)
   3097     // Little-endian host - the destination must be ordered from LSB to MSB.
   3098     // The source is ordered from LSB to MSB: Do a straight copy.
   3099     memcpy(Dst, Src, LoadBytes);
   3100   else {
   3101     // Big-endian - the destination is an array of 64 bit words ordered from
   3102     // LSW to MSW.  Each word must be ordered from MSB to LSB.  The source is
   3103     // ordered from MSB to LSB: Reverse the word order, but not the bytes in
   3104     // a word.
   3105     while (LoadBytes > sizeof(uint64_t)) {
   3106       LoadBytes -= sizeof(uint64_t);
   3107       // May not be aligned so use memcpy.
   3108       memcpy(Dst, Src + LoadBytes, sizeof(uint64_t));
   3109       Dst += sizeof(uint64_t);
   3110     }
   3111 
   3112     memcpy(Dst + sizeof(uint64_t) - LoadBytes, Src, LoadBytes);
   3113   }
   3114 }
   3115