1 //===----------------------------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 // Copyright (c) Microsoft Corporation. 10 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 11 12 // Copyright 2018 Ulf Adams 13 // Copyright (c) Microsoft Corporation. All rights reserved. 14 15 // Boost Software License - Version 1.0 - August 17th, 2003 16 17 // Permission is hereby granted, free of charge, to any person or organization 18 // obtaining a copy of the software and accompanying documentation covered by 19 // this license (the "Software") to use, reproduce, display, distribute, 20 // execute, and transmit the Software, and to prepare derivative works of the 21 // Software, and to permit third-parties to whom the Software is furnished to 22 // do so, all subject to the following: 23 24 // The copyright notices in the Software and this entire statement, including 25 // the above license grant, this restriction and the following disclaimer, 26 // must be included in all copies of the Software, in whole or in part, and 27 // all derivative works of the Software, unless such copies or derivative 28 // works are solely in the form of machine-executable object code generated by 29 // a source language processor. 30 31 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 32 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 33 // FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT 34 // SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE 35 // FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, 36 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 37 // DEALINGS IN THE SOFTWARE. 38 39 #ifndef _LIBCPP_SRC_INCLUDE_RYU_DS2_INTRINSICS_H 40 #define _LIBCPP_SRC_INCLUDE_RYU_DS2_INTRINSICS_H 41 42 // Avoid formatting to keep the changes with the original code minimal. 43 // clang-format off 44 45 #include "__config" 46 47 _LIBCPP_BEGIN_NAMESPACE_STD 48 49 #if defined(_M_X64) && defined(_MSC_VER) 50 #define _LIBCPP_INTRINSIC128 1 51 [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __ryu_umul128(const uint64_t __a, const uint64_t __b, uint64_t* const __productHi) { 52 return _umul128(__a, __b, __productHi); 53 } 54 55 [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __ryu_shiftright128(const uint64_t __lo, const uint64_t __hi, const uint32_t __dist) { 56 // For the __shiftright128 intrinsic, the shift value is always 57 // modulo 64. 58 // In the current implementation of the double-precision version 59 // of Ryu, the shift value is always < 64. 60 // (The shift value is in the range [49, 58].) 61 // Check this here in case a future change requires larger shift 62 // values. In this case this function needs to be adjusted. 63 _LIBCPP_ASSERT(__dist < 64, ""); 64 return __shiftright128(__lo, __hi, static_cast<unsigned char>(__dist)); 65 } 66 67 // ^^^ intrinsics available ^^^ / vvv __int128 available vvv 68 #elif defined(__SIZEOF_INT128__) && ( \ 69 (defined(__clang__) && !defined(_MSC_VER)) || \ 70 (defined(__GNUC__) && !defined(__clang__) && !defined(__CUDACC__))) 71 #define _LIBCPP_INTRINSIC128 1 72 // We have __uint128 support in clang or gcc 73 [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __ryu_umul128(const uint64_t __a, const uint64_t __b, uint64_t* const __productHi) { 74 auto __temp = __a * (unsigned __int128)__b; 75 *__productHi = __temp >> 64; 76 return static_cast<uint64_t>(__temp); 77 } 78 79 [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __ryu_shiftright128(const uint64_t __lo, const uint64_t __hi, const uint32_t __dist) { 80 // In the current implementation of the double-precision version 81 // of Ryu, the shift value is always < 64. 82 // (The shift value is in the range [49, 58].) 83 // Check this here in case a future change requires larger shift 84 // values. In this case this function needs to be adjusted. 85 _LIBCPP_ASSERT(__dist < 64, ""); 86 auto __temp = __lo | ((unsigned __int128)__hi << 64); 87 // For x64 128-bit shfits using the `shrd` instruction and two 64-bit 88 // registers, the shift value is modulo 64. Thus the `& 63` is free. 89 return static_cast<uint64_t>(__temp >> (__dist & 63)); 90 } 91 #else // ^^^ __int128 available ^^^ / vvv intrinsics unavailable vvv 92 93 [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline _LIBCPP_ALWAYS_INLINE uint64_t __ryu_umul128(const uint64_t __a, const uint64_t __b, uint64_t* const __productHi) { 94 // TRANSITION, VSO-634761 95 // The casts here help MSVC to avoid calls to the __allmul library function. 96 const uint32_t __aLo = static_cast<uint32_t>(__a); 97 const uint32_t __aHi = static_cast<uint32_t>(__a >> 32); 98 const uint32_t __bLo = static_cast<uint32_t>(__b); 99 const uint32_t __bHi = static_cast<uint32_t>(__b >> 32); 100 101 const uint64_t __b00 = static_cast<uint64_t>(__aLo) * __bLo; 102 const uint64_t __b01 = static_cast<uint64_t>(__aLo) * __bHi; 103 const uint64_t __b10 = static_cast<uint64_t>(__aHi) * __bLo; 104 const uint64_t __b11 = static_cast<uint64_t>(__aHi) * __bHi; 105 106 const uint32_t __b00Lo = static_cast<uint32_t>(__b00); 107 const uint32_t __b00Hi = static_cast<uint32_t>(__b00 >> 32); 108 109 const uint64_t __mid1 = __b10 + __b00Hi; 110 const uint32_t __mid1Lo = static_cast<uint32_t>(__mid1); 111 const uint32_t __mid1Hi = static_cast<uint32_t>(__mid1 >> 32); 112 113 const uint64_t __mid2 = __b01 + __mid1Lo; 114 const uint32_t __mid2Lo = static_cast<uint32_t>(__mid2); 115 const uint32_t __mid2Hi = static_cast<uint32_t>(__mid2 >> 32); 116 117 const uint64_t __pHi = __b11 + __mid1Hi + __mid2Hi; 118 const uint64_t __pLo = (static_cast<uint64_t>(__mid2Lo) << 32) | __b00Lo; 119 120 *__productHi = __pHi; 121 return __pLo; 122 } 123 124 [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __ryu_shiftright128(const uint64_t __lo, const uint64_t __hi, const uint32_t __dist) { 125 // We don't need to handle the case __dist >= 64 here (see above). 126 _LIBCPP_ASSERT(__dist < 64, ""); 127 #ifdef _LIBCPP_64_BIT 128 _LIBCPP_ASSERT(__dist > 0, ""); 129 return (__hi << (64 - __dist)) | (__lo >> __dist); 130 #else // ^^^ 64-bit ^^^ / vvv 32-bit vvv 131 // Avoid a 64-bit shift by taking advantage of the range of shift values. 132 _LIBCPP_ASSERT(__dist >= 32, ""); 133 return (__hi << (64 - __dist)) | (static_cast<uint32_t>(__lo >> 32) >> (__dist - 32)); 134 #endif // ^^^ 32-bit ^^^ 135 } 136 137 #endif // ^^^ intrinsics unavailable ^^^ 138 139 #ifndef _LIBCPP_64_BIT 140 141 // Returns the high 64 bits of the 128-bit product of __a and __b. 142 [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __umulh(const uint64_t __a, const uint64_t __b) { 143 // Reuse the __ryu_umul128 implementation. 144 // Optimizers will likely eliminate the instructions used to compute the 145 // low part of the product. 146 uint64_t __hi; 147 (void) __ryu_umul128(__a, __b, &__hi); 148 return __hi; 149 } 150 151 // On 32-bit platforms, compilers typically generate calls to library 152 // functions for 64-bit divisions, even if the divisor is a constant. 153 // 154 // TRANSITION, LLVM-37932 155 // 156 // The functions here perform division-by-constant using multiplications 157 // in the same way as 64-bit compilers would do. 158 // 159 // NB: 160 // The multipliers and shift values are the ones generated by clang x64 161 // for expressions like x/5, x/10, etc. 162 163 [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div5(const uint64_t __x) { 164 return __umulh(__x, 0xCCCCCCCCCCCCCCCDu) >> 2; 165 } 166 167 [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div10(const uint64_t __x) { 168 return __umulh(__x, 0xCCCCCCCCCCCCCCCDu) >> 3; 169 } 170 171 [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div100(const uint64_t __x) { 172 return __umulh(__x >> 2, 0x28F5C28F5C28F5C3u) >> 2; 173 } 174 175 [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div1e8(const uint64_t __x) { 176 return __umulh(__x, 0xABCC77118461CEFDu) >> 26; 177 } 178 179 [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div1e9(const uint64_t __x) { 180 return __umulh(__x >> 9, 0x44B82FA09B5A53u) >> 11; 181 } 182 183 [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint32_t __mod1e9(const uint64_t __x) { 184 // Avoid 64-bit math as much as possible. 185 // Returning static_cast<uint32_t>(__x - 1000000000 * __div1e9(__x)) would 186 // perform 32x64-bit multiplication and 64-bit subtraction. 187 // __x and 1000000000 * __div1e9(__x) are guaranteed to differ by 188 // less than 10^9, so their highest 32 bits must be identical, 189 // so we can truncate both sides to uint32_t before subtracting. 190 // We can also simplify static_cast<uint32_t>(1000000000 * __div1e9(__x)). 191 // We can truncate before multiplying instead of after, as multiplying 192 // the highest 32 bits of __div1e9(__x) can't affect the lowest 32 bits. 193 return static_cast<uint32_t>(__x) - 1000000000 * static_cast<uint32_t>(__div1e9(__x)); 194 } 195 196 #else // ^^^ 32-bit ^^^ / vvv 64-bit vvv 197 198 [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div5(const uint64_t __x) { 199 return __x / 5; 200 } 201 202 [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div10(const uint64_t __x) { 203 return __x / 10; 204 } 205 206 [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div100(const uint64_t __x) { 207 return __x / 100; 208 } 209 210 [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div1e8(const uint64_t __x) { 211 return __x / 100000000; 212 } 213 214 [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __div1e9(const uint64_t __x) { 215 return __x / 1000000000; 216 } 217 218 [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint32_t __mod1e9(const uint64_t __x) { 219 return static_cast<uint32_t>(__x - 1000000000 * __div1e9(__x)); 220 } 221 222 #endif // ^^^ 64-bit ^^^ 223 224 [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint32_t __pow5Factor(uint64_t __value) { 225 uint32_t __count = 0; 226 for (;;) { 227 _LIBCPP_ASSERT(__value != 0, ""); 228 const uint64_t __q = __div5(__value); 229 const uint32_t __r = static_cast<uint32_t>(__value) - 5 * static_cast<uint32_t>(__q); 230 if (__r != 0) { 231 break; 232 } 233 __value = __q; 234 ++__count; 235 } 236 return __count; 237 } 238 239 // Returns true if __value is divisible by 5^__p. 240 [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline bool __multipleOfPowerOf5(const uint64_t __value, const uint32_t __p) { 241 // I tried a case distinction on __p, but there was no performance difference. 242 return __pow5Factor(__value) >= __p; 243 } 244 245 // Returns true if __value is divisible by 2^__p. 246 [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline bool __multipleOfPowerOf2(const uint64_t __value, const uint32_t __p) { 247 _LIBCPP_ASSERT(__value != 0, ""); 248 _LIBCPP_ASSERT(__p < 64, ""); 249 // __builtin_ctzll doesn't appear to be faster here. 250 return (__value & ((1ull << __p) - 1)) == 0; 251 } 252 253 _LIBCPP_END_NAMESPACE_STD 254 255 // clang-format on 256 257 #endif // _LIBCPP_SRC_INCLUDE_RYU_DS2_INTRINSICS_H 258