1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (C) 2003 Bernardo Innocenti <bernie@develer.com>
4 *
5 * Based on former do_div() implementation from asm-parisc/div64.h:
6 * Copyright (C) 1999 Hewlett-Packard Co
7 * Copyright (C) 1999 David Mosberger-Tang <davidm@hpl.hp.com>
8 *
9 *
10 * Generic C version of 64bit/32bit division and modulo, with
11 * 64bit result and 32bit remainder.
12 *
13 * The fast case for (n>>32 == 0) is handled inline by do_div().
14 *
15 * Code generated for this function might be very inefficient
16 * for some CPUs. __div64_32() can be overridden by linking arch-specific
17 * assembly versions such as arch/ppc/lib/div64.S and arch/sh/lib/div64.S
18 * or by defining a preprocessor macro in arch/include/asm/div64.h.
19 */
20
21 #include <linux/bitops.h>
22 #include <linux/export.h>
23 #include <linux/math.h>
24 #include <linux/math64.h>
25 #include <linux/minmax.h>
26 #include <linux/log2.h>
27
28 /* Not needed on 64bit architectures */
29 #if BITS_PER_LONG == 32
30
31 #ifndef __div64_32
__div64_32(uint64_t * n,uint32_t base)32 uint32_t __attribute__((weak)) __div64_32(uint64_t *n, uint32_t base)
33 {
34 uint64_t rem = *n;
35 uint64_t b = base;
36 uint64_t res, d = 1;
37 uint32_t high = rem >> 32;
38
39 /* Reduce the thing a bit first */
40 res = 0;
41 if (high >= base) {
42 high /= base;
43 res = (uint64_t) high << 32;
44 rem -= (uint64_t) (high*base) << 32;
45 }
46
47 while ((int64_t)b > 0 && b < rem) {
48 b = b+b;
49 d = d+d;
50 }
51
52 do {
53 if (rem >= b) {
54 rem -= b;
55 res += d;
56 }
57 b >>= 1;
58 d >>= 1;
59 } while (d);
60
61 *n = res;
62 return rem;
63 }
64 EXPORT_SYMBOL(__div64_32);
65 #endif
66
67 #ifndef div_s64_rem
div_s64_rem(s64 dividend,s32 divisor,s32 * remainder)68 s64 div_s64_rem(s64 dividend, s32 divisor, s32 *remainder)
69 {
70 u64 quotient;
71
72 if (dividend < 0) {
73 quotient = div_u64_rem(-dividend, abs(divisor), (u32 *)remainder);
74 *remainder = -*remainder;
75 if (divisor > 0)
76 quotient = -quotient;
77 } else {
78 quotient = div_u64_rem(dividend, abs(divisor), (u32 *)remainder);
79 if (divisor < 0)
80 quotient = -quotient;
81 }
82 return quotient;
83 }
84 EXPORT_SYMBOL(div_s64_rem);
85 #endif
86
87 /*
88 * div64_u64_rem - unsigned 64bit divide with 64bit divisor and remainder
89 * @dividend: 64bit dividend
90 * @divisor: 64bit divisor
91 * @remainder: 64bit remainder
92 *
93 * This implementation is a comparable to algorithm used by div64_u64.
94 * But this operation, which includes math for calculating the remainder,
95 * is kept distinct to avoid slowing down the div64_u64 operation on 32bit
96 * systems.
97 */
98 #ifndef div64_u64_rem
div64_u64_rem(u64 dividend,u64 divisor,u64 * remainder)99 u64 div64_u64_rem(u64 dividend, u64 divisor, u64 *remainder)
100 {
101 u32 high = divisor >> 32;
102 u64 quot;
103
104 if (high == 0) {
105 u32 rem32;
106 quot = div_u64_rem(dividend, divisor, &rem32);
107 *remainder = rem32;
108 } else {
109 int n = fls(high);
110 quot = div_u64(dividend >> n, divisor >> n);
111
112 if (quot != 0)
113 quot--;
114
115 *remainder = dividend - quot * divisor;
116 if (*remainder >= divisor) {
117 quot++;
118 *remainder -= divisor;
119 }
120 }
121
122 return quot;
123 }
124 EXPORT_SYMBOL(div64_u64_rem);
125 #endif
126
127 /*
128 * div64_u64 - unsigned 64bit divide with 64bit divisor
129 * @dividend: 64bit dividend
130 * @divisor: 64bit divisor
131 *
132 * This implementation is a modified version of the algorithm proposed
133 * by the book 'Hacker's Delight'. The original source and full proof
134 * can be found here and is available for use without restriction.
135 *
136 * 'http://www.hackersdelight.org/hdcodetxt/divDouble.c.txt'
137 */
138 #ifndef div64_u64
div64_u64(u64 dividend,u64 divisor)139 u64 div64_u64(u64 dividend, u64 divisor)
140 {
141 u32 high = divisor >> 32;
142 u64 quot;
143
144 if (high == 0) {
145 quot = div_u64(dividend, divisor);
146 } else {
147 int n = fls(high);
148 quot = div_u64(dividend >> n, divisor >> n);
149
150 if (quot != 0)
151 quot--;
152 if ((dividend - quot * divisor) >= divisor)
153 quot++;
154 }
155
156 return quot;
157 }
158 EXPORT_SYMBOL(div64_u64);
159 #endif
160
161 #ifndef div64_s64
div64_s64(s64 dividend,s64 divisor)162 s64 div64_s64(s64 dividend, s64 divisor)
163 {
164 s64 quot, t;
165
166 quot = div64_u64(abs(dividend), abs(divisor));
167 t = (dividend ^ divisor) >> 63;
168
169 return (quot ^ t) - t;
170 }
171 EXPORT_SYMBOL(div64_s64);
172 #endif
173
174 #endif /* BITS_PER_LONG == 32 */
175
176 /*
177 * Iterative div/mod for use when dividend is not expected to be much
178 * bigger than divisor.
179 */
180 #ifndef iter_div_u64_rem
iter_div_u64_rem(u64 dividend,u32 divisor,u64 * remainder)181 u32 iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder)
182 {
183 return __iter_div_u64_rem(dividend, divisor, remainder);
184 }
185 EXPORT_SYMBOL(iter_div_u64_rem);
186 #endif
187
188 #if !defined(mul_u64_add_u64_div_u64) || defined(test_mul_u64_add_u64_div_u64)
189
190 #define mul_add(a, b, c) add_u64_u32(mul_u32_u32(a, b), c)
191
192 #if defined(__SIZEOF_INT128__) && !defined(test_mul_u64_add_u64_div_u64)
mul_u64_u64_add_u64(u64 * p_lo,u64 a,u64 b,u64 c)193 static inline u64 mul_u64_u64_add_u64(u64 *p_lo, u64 a, u64 b, u64 c)
194 {
195 /* native 64x64=128 bits multiplication */
196 u128 prod = (u128)a * b + c;
197
198 *p_lo = prod;
199 return prod >> 64;
200 }
201 #else
mul_u64_u64_add_u64(u64 * p_lo,u64 a,u64 b,u64 c)202 static inline u64 mul_u64_u64_add_u64(u64 *p_lo, u64 a, u64 b, u64 c)
203 {
204 /* perform a 64x64=128 bits multiplication in 32bit chunks */
205 u64 x, y, z;
206
207 /* Since (x-1)(x-1) + 2(x-1) == x.x - 1 two u32 can be added to a u64 */
208 x = mul_add(a, b, c);
209 y = mul_add(a, b >> 32, c >> 32);
210 y = add_u64_u32(y, x >> 32);
211 z = mul_add(a >> 32, b >> 32, y >> 32);
212 y = mul_add(a >> 32, b, y);
213 *p_lo = (y << 32) + (u32)x;
214 return add_u64_u32(z, y >> 32);
215 }
216 #endif
217
218 #ifndef BITS_PER_ITER
219 #define BITS_PER_ITER (__LONG_WIDTH__ >= 64 ? 32 : 16)
220 #endif
221
222 #if BITS_PER_ITER == 32
223 #define mul_u64_long_add_u64(p_lo, a, b, c) mul_u64_u64_add_u64(p_lo, a, b, c)
224 #define add_u64_long(a, b) ((a) + (b))
225 #else
226 #undef BITS_PER_ITER
227 #define BITS_PER_ITER 16
mul_u64_long_add_u64(u64 * p_lo,u64 a,u32 b,u64 c)228 static inline u32 mul_u64_long_add_u64(u64 *p_lo, u64 a, u32 b, u64 c)
229 {
230 u64 n_lo = mul_add(a, b, c);
231 u64 n_med = mul_add(a >> 32, b, c >> 32);
232
233 n_med = add_u64_u32(n_med, n_lo >> 32);
234 *p_lo = n_med << 32 | (u32)n_lo;
235 return n_med >> 32;
236 }
237
238 #define add_u64_long(a, b) add_u64_u32(a, b)
239 #endif
240
mul_u64_add_u64_div_u64(u64 a,u64 b,u64 c,u64 d)241 u64 mul_u64_add_u64_div_u64(u64 a, u64 b, u64 c, u64 d)
242 {
243 unsigned long d_msig, q_digit;
244 unsigned int reps, d_z_hi;
245 u64 quotient, n_lo, n_hi;
246 u32 overflow;
247
248 n_hi = mul_u64_u64_add_u64(&n_lo, a, b, c);
249
250 if (!n_hi)
251 return div64_u64(n_lo, d);
252
253 if (unlikely(n_hi >= d)) {
254 /* trigger runtime exception if divisor is zero */
255 if (d == 0) {
256 unsigned long zero = 0;
257
258 OPTIMIZER_HIDE_VAR(zero);
259 return ~0UL/zero;
260 }
261 /* overflow: result is unrepresentable in a u64 */
262 return ~0ULL;
263 }
264
265 /* Left align the divisor, shifting the dividend to match */
266 d_z_hi = __builtin_clzll(d);
267 if (d_z_hi) {
268 d <<= d_z_hi;
269 n_hi = n_hi << d_z_hi | n_lo >> (64 - d_z_hi);
270 n_lo <<= d_z_hi;
271 }
272
273 reps = 64 / BITS_PER_ITER;
274 /* Optimise loop count for small dividends */
275 if (!(u32)(n_hi >> 32)) {
276 reps -= 32 / BITS_PER_ITER;
277 n_hi = n_hi << 32 | n_lo >> 32;
278 n_lo <<= 32;
279 }
280 #if BITS_PER_ITER == 16
281 if (!(u32)(n_hi >> 48)) {
282 reps--;
283 n_hi = add_u64_u32(n_hi << 16, n_lo >> 48);
284 n_lo <<= 16;
285 }
286 #endif
287
288 /* Invert the dividend so we can use add instead of subtract. */
289 n_lo = ~n_lo;
290 n_hi = ~n_hi;
291
292 /*
293 * Get the most significant BITS_PER_ITER bits of the divisor.
294 * This is used to get a low 'guestimate' of the quotient digit.
295 */
296 d_msig = (d >> (64 - BITS_PER_ITER)) + 1;
297
298 /*
299 * Now do a 'long division' with BITS_PER_ITER bit 'digits'.
300 * The 'guess' quotient digit can be low and BITS_PER_ITER+1 bits.
301 * The worst case is dividing ~0 by 0x8000 which requires two subtracts.
302 */
303 quotient = 0;
304 while (reps--) {
305 q_digit = (unsigned long)(~n_hi >> (64 - 2 * BITS_PER_ITER)) / d_msig;
306 /* Shift 'n' left to align with the product q_digit * d */
307 overflow = n_hi >> (64 - BITS_PER_ITER);
308 n_hi = add_u64_u32(n_hi << BITS_PER_ITER, n_lo >> (64 - BITS_PER_ITER));
309 n_lo <<= BITS_PER_ITER;
310 /* Add product to negated divisor */
311 overflow += mul_u64_long_add_u64(&n_hi, d, q_digit, n_hi);
312 /* Adjust for the q_digit 'guestimate' being low */
313 while (overflow < 0xffffffff >> (32 - BITS_PER_ITER)) {
314 q_digit++;
315 n_hi += d;
316 overflow += n_hi < d;
317 }
318 quotient = add_u64_long(quotient << BITS_PER_ITER, q_digit);
319 }
320
321 /*
322 * The above only ensures the remainder doesn't overflow,
323 * it can still be possible to add (aka subtract) another copy
324 * of the divisor.
325 */
326 if ((n_hi + d) > n_hi)
327 quotient++;
328 return quotient;
329 }
330 #if !defined(test_mul_u64_add_u64_div_u64)
331 EXPORT_SYMBOL(mul_u64_add_u64_div_u64);
332 #endif
333 #endif
334