xref: /linux/lib/math/div64.c (revision 7b8e9264f55a9c320f398e337d215e68cca50131)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2003 Bernardo Innocenti <bernie@develer.com>
4  *
5  * Based on former do_div() implementation from asm-parisc/div64.h:
6  *	Copyright (C) 1999 Hewlett-Packard Co
7  *	Copyright (C) 1999 David Mosberger-Tang <davidm@hpl.hp.com>
8  *
9  *
10  * Generic C version of 64bit/32bit division and modulo, with
11  * 64bit result and 32bit remainder.
12  *
13  * The fast case for (n>>32 == 0) is handled inline by do_div().
14  *
15  * Code generated for this function might be very inefficient
16  * for some CPUs. __div64_32() can be overridden by linking arch-specific
17  * assembly versions such as arch/ppc/lib/div64.S and arch/sh/lib/div64.S
18  * or by defining a preprocessor macro in arch/include/asm/div64.h.
19  */
20 
21 #include <linux/bitops.h>
22 #include <linux/export.h>
23 #include <linux/math.h>
24 #include <linux/math64.h>
25 #include <linux/minmax.h>
26 #include <linux/log2.h>
27 
28 /* Not needed on 64bit architectures */
29 #if BITS_PER_LONG == 32
30 
31 #ifndef __div64_32
32 uint32_t __attribute__((weak)) __div64_32(uint64_t *n, uint32_t base)
33 {
34 	uint64_t rem = *n;
35 	uint64_t b = base;
36 	uint64_t res, d = 1;
37 	uint32_t high = rem >> 32;
38 
39 	/* Reduce the thing a bit first */
40 	res = 0;
41 	if (high >= base) {
42 		high /= base;
43 		res = (uint64_t) high << 32;
44 		rem -= (uint64_t) (high*base) << 32;
45 	}
46 
47 	while ((int64_t)b > 0 && b < rem) {
48 		b = b+b;
49 		d = d+d;
50 	}
51 
52 	do {
53 		if (rem >= b) {
54 			rem -= b;
55 			res += d;
56 		}
57 		b >>= 1;
58 		d >>= 1;
59 	} while (d);
60 
61 	*n = res;
62 	return rem;
63 }
64 EXPORT_SYMBOL(__div64_32);
65 #endif
66 
67 #ifndef div_s64_rem
68 s64 div_s64_rem(s64 dividend, s32 divisor, s32 *remainder)
69 {
70 	u64 quotient;
71 
72 	if (dividend < 0) {
73 		quotient = div_u64_rem(-dividend, abs(divisor), (u32 *)remainder);
74 		*remainder = -*remainder;
75 		if (divisor > 0)
76 			quotient = -quotient;
77 	} else {
78 		quotient = div_u64_rem(dividend, abs(divisor), (u32 *)remainder);
79 		if (divisor < 0)
80 			quotient = -quotient;
81 	}
82 	return quotient;
83 }
84 EXPORT_SYMBOL(div_s64_rem);
85 #endif
86 
87 /*
88  * div64_u64_rem - unsigned 64bit divide with 64bit divisor and remainder
89  * @dividend:	64bit dividend
90  * @divisor:	64bit divisor
91  * @remainder:  64bit remainder
92  *
93  * This implementation is a comparable to algorithm used by div64_u64.
94  * But this operation, which includes math for calculating the remainder,
95  * is kept distinct to avoid slowing down the div64_u64 operation on 32bit
96  * systems.
97  */
98 #ifndef div64_u64_rem
99 u64 div64_u64_rem(u64 dividend, u64 divisor, u64 *remainder)
100 {
101 	u32 high = divisor >> 32;
102 	u64 quot;
103 
104 	if (high == 0) {
105 		u32 rem32;
106 		quot = div_u64_rem(dividend, divisor, &rem32);
107 		*remainder = rem32;
108 	} else {
109 		int n = fls(high);
110 		quot = div_u64(dividend >> n, divisor >> n);
111 
112 		if (quot != 0)
113 			quot--;
114 
115 		*remainder = dividend - quot * divisor;
116 		if (*remainder >= divisor) {
117 			quot++;
118 			*remainder -= divisor;
119 		}
120 	}
121 
122 	return quot;
123 }
124 EXPORT_SYMBOL(div64_u64_rem);
125 #endif
126 
127 /*
128  * div64_u64 - unsigned 64bit divide with 64bit divisor
129  * @dividend:	64bit dividend
130  * @divisor:	64bit divisor
131  *
132  * This implementation is a modified version of the algorithm proposed
133  * by the book 'Hacker's Delight'.  The original source and full proof
134  * can be found here and is available for use without restriction.
135  *
136  * 'http://www.hackersdelight.org/hdcodetxt/divDouble.c.txt'
137  */
138 #ifndef div64_u64
139 u64 div64_u64(u64 dividend, u64 divisor)
140 {
141 	u32 high = divisor >> 32;
142 	u64 quot;
143 
144 	if (high == 0) {
145 		quot = div_u64(dividend, divisor);
146 	} else {
147 		int n = fls(high);
148 		quot = div_u64(dividend >> n, divisor >> n);
149 
150 		if (quot != 0)
151 			quot--;
152 		if ((dividend - quot * divisor) >= divisor)
153 			quot++;
154 	}
155 
156 	return quot;
157 }
158 EXPORT_SYMBOL(div64_u64);
159 #endif
160 
161 #ifndef div64_s64
162 s64 div64_s64(s64 dividend, s64 divisor)
163 {
164 	s64 quot, t;
165 
166 	quot = div64_u64(abs(dividend), abs(divisor));
167 	t = (dividend ^ divisor) >> 63;
168 
169 	return (quot ^ t) - t;
170 }
171 EXPORT_SYMBOL(div64_s64);
172 #endif
173 
174 #endif /* BITS_PER_LONG == 32 */
175 
176 /*
177  * Iterative div/mod for use when dividend is not expected to be much
178  * bigger than divisor.
179  */
180 #ifndef iter_div_u64_rem
181 u32 iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder)
182 {
183 	return __iter_div_u64_rem(dividend, divisor, remainder);
184 }
185 EXPORT_SYMBOL(iter_div_u64_rem);
186 #endif
187 
188 #if !defined(mul_u64_add_u64_div_u64) || defined(test_mul_u64_add_u64_div_u64)
189 
190 #define mul_add(a, b, c) add_u64_u32(mul_u32_u32(a, b), c)
191 
192 #if defined(__SIZEOF_INT128__) && !defined(test_mul_u64_add_u64_div_u64)
193 static inline u64 mul_u64_u64_add_u64(u64 *p_lo, u64 a, u64 b, u64 c)
194 {
195 	/* native 64x64=128 bits multiplication */
196 	u128 prod = (u128)a * b + c;
197 
198 	*p_lo = prod;
199 	return prod >> 64;
200 }
201 #else
202 static inline u64 mul_u64_u64_add_u64(u64 *p_lo, u64 a, u64 b, u64 c)
203 {
204 	/* perform a 64x64=128 bits multiplication in 32bit chunks */
205 	u64 x, y, z;
206 
207 	/* Since (x-1)(x-1) + 2(x-1) == x.x - 1 two u32 can be added to a u64 */
208 	x = mul_add(a, b, c);
209 	y = mul_add(a, b >> 32, c >> 32);
210 	y = add_u64_u32(y, x >> 32);
211 	z = mul_add(a >> 32, b >> 32, y >> 32);
212 	y = mul_add(a >> 32, b, y);
213 	*p_lo = (y << 32) + (u32)x;
214 	return add_u64_u32(z, y >> 32);
215 }
216 #endif
217 
218 #ifndef BITS_PER_ITER
219 #define BITS_PER_ITER (__LONG_WIDTH__ >= 64 ? 32 : 16)
220 #endif
221 
222 #if BITS_PER_ITER == 32
223 #define mul_u64_long_add_u64(p_lo, a, b, c) mul_u64_u64_add_u64(p_lo, a, b, c)
224 #define add_u64_long(a, b) ((a) + (b))
225 #else
226 #undef BITS_PER_ITER
227 #define BITS_PER_ITER 16
228 static inline u32 mul_u64_long_add_u64(u64 *p_lo, u64 a, u32 b, u64 c)
229 {
230 	u64 n_lo = mul_add(a, b, c);
231 	u64 n_med = mul_add(a >> 32, b, c >> 32);
232 
233 	n_med = add_u64_u32(n_med, n_lo >> 32);
234 	*p_lo = n_med << 32 | (u32)n_lo;
235 	return n_med >> 32;
236 }
237 
238 #define add_u64_long(a, b) add_u64_u32(a, b)
239 #endif
240 
241 u64 mul_u64_add_u64_div_u64(u64 a, u64 b, u64 c, u64 d)
242 {
243 	unsigned long d_msig, q_digit;
244 	unsigned int reps, d_z_hi;
245 	u64 quotient, n_lo, n_hi;
246 	u32 overflow;
247 
248 	n_hi = mul_u64_u64_add_u64(&n_lo, a, b, c);
249 
250 	if (!n_hi)
251 		return div64_u64(n_lo, d);
252 
253 	if (unlikely(n_hi >= d)) {
254 		/* trigger runtime exception if divisor is zero */
255 		if (d == 0) {
256 			unsigned long zero = 0;
257 
258 			OPTIMIZER_HIDE_VAR(zero);
259 			return ~0UL/zero;
260 		}
261 		/* overflow: result is unrepresentable in a u64 */
262 		return ~0ULL;
263 	}
264 
265 	/* Left align the divisor, shifting the dividend to match */
266 	d_z_hi = __builtin_clzll(d);
267 	if (d_z_hi) {
268 		d <<= d_z_hi;
269 		n_hi = n_hi << d_z_hi | n_lo >> (64 - d_z_hi);
270 		n_lo <<= d_z_hi;
271 	}
272 
273 	reps = 64 / BITS_PER_ITER;
274 	/* Optimise loop count for small dividends */
275 	if (!(u32)(n_hi >> 32)) {
276 		reps -= 32 / BITS_PER_ITER;
277 		n_hi = n_hi << 32 | n_lo >> 32;
278 		n_lo <<= 32;
279 	}
280 #if BITS_PER_ITER == 16
281 	if (!(u32)(n_hi >> 48)) {
282 		reps--;
283 		n_hi = add_u64_u32(n_hi << 16, n_lo >> 48);
284 		n_lo <<= 16;
285 	}
286 #endif
287 
288 	/* Invert the dividend so we can use add instead of subtract. */
289 	n_lo = ~n_lo;
290 	n_hi = ~n_hi;
291 
292 	/*
293 	 * Get the most significant BITS_PER_ITER bits of the divisor.
294 	 * This is used to get a low 'guestimate' of the quotient digit.
295 	 */
296 	d_msig = (d >> (64 - BITS_PER_ITER)) + 1;
297 
298 	/*
299 	 * Now do a 'long division' with BITS_PER_ITER bit 'digits'.
300 	 * The 'guess' quotient digit can be low and BITS_PER_ITER+1 bits.
301 	 * The worst case is dividing ~0 by 0x8000 which requires two subtracts.
302 	 */
303 	quotient = 0;
304 	while (reps--) {
305 		q_digit = (unsigned long)(~n_hi >> (64 - 2 * BITS_PER_ITER)) / d_msig;
306 		/* Shift 'n' left to align with the product q_digit * d */
307 		overflow = n_hi >> (64 - BITS_PER_ITER);
308 		n_hi = add_u64_u32(n_hi << BITS_PER_ITER, n_lo >> (64 - BITS_PER_ITER));
309 		n_lo <<= BITS_PER_ITER;
310 		/* Add product to negated divisor */
311 		overflow += mul_u64_long_add_u64(&n_hi, d, q_digit, n_hi);
312 		/* Adjust for the q_digit 'guestimate' being low */
313 		while (overflow < 0xffffffff >> (32 - BITS_PER_ITER)) {
314 			q_digit++;
315 			n_hi += d;
316 			overflow += n_hi < d;
317 		}
318 		quotient = add_u64_long(quotient << BITS_PER_ITER, q_digit);
319 	}
320 
321 	/*
322 	 * The above only ensures the remainder doesn't overflow,
323 	 * it can still be possible to add (aka subtract) another copy
324 	 * of the divisor.
325 	 */
326 	if ((n_hi + d) > n_hi)
327 		quotient++;
328 	return quotient;
329 }
330 #if !defined(test_mul_u64_add_u64_div_u64)
331 EXPORT_SYMBOL(mul_u64_add_u64_div_u64);
332 #endif
333 #endif
334