1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC. 4 * Copyright (C) 2007 The Regents of the University of California. 5 * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). 6 * Written by Brian Behlendorf <behlendorf1@llnl.gov>. 7 * UCRL-CODE-235197 8 * 9 * This file is part of the SPL, Solaris Porting Layer. 10 * 11 * The SPL is free software; you can redistribute it and/or modify it 12 * under the terms of the GNU General Public License as published by the 13 * Free Software Foundation; either version 2 of the License, or (at your 14 * option) any later version. 15 * 16 * The SPL is distributed in the hope that it will be useful, but WITHOUT 17 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 18 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 19 * for more details. 20 * 21 * You should have received a copy of the GNU General Public License along 22 * with the SPL. If not, see <http://www.gnu.org/licenses/>. 23 * 24 * Solaris Porting Layer (SPL) Generic Implementation. 25 */ 26 27 #include <sys/isa_defs.h> 28 #include <sys/sysmacros.h> 29 30 /* 31 * 64-bit math support for 32-bit platforms. Compilers will generatee 32 * references to the functions here if required. 33 */ 34 35 #if BITS_PER_LONG == 32 36 37 /* 38 * Support 64/64 => 64 division on a 32-bit platform. While the kernel 39 * provides a div64_u64() function for this we do not use it because the 40 * implementation is flawed. There are cases which return incorrect 41 * results as late as linux-2.6.35. Until this is fixed upstream the 42 * spl must provide its own implementation. 43 * 44 * This implementation is a slightly modified version of the algorithm 45 * proposed by the book 'Hacker's Delight'. The original source can be 46 * found here and is available for use without restriction. 47 * 48 * http://www.hackersdelight.org/HDcode/newCode/divDouble.c 49 */ 50 51 /* 52 * Calculate number of leading of zeros for a 64-bit value. 53 */ 54 static int 55 nlz64(uint64_t x) 56 { 57 register int n = 0; 58 59 if (x == 0) 60 return (64); 61 62 if (x <= 0x00000000FFFFFFFFULL) { n = n + 32; x = x << 32; } 63 if (x <= 0x0000FFFFFFFFFFFFULL) { n = n + 16; x = x << 16; } 64 if (x <= 0x00FFFFFFFFFFFFFFULL) { n = n + 8; x = x << 8; } 65 if (x <= 0x0FFFFFFFFFFFFFFFULL) { n = n + 4; x = x << 4; } 66 if (x <= 0x3FFFFFFFFFFFFFFFULL) { n = n + 2; x = x << 2; } 67 if (x <= 0x7FFFFFFFFFFFFFFFULL) { n = n + 1; } 68 69 return (n); 70 } 71 72 /* 73 * Newer kernels have a div_u64() function but we define our own 74 * to simplify portability between kernel versions. 75 */ 76 static inline uint64_t 77 __div_u64(uint64_t u, uint32_t v) 78 { 79 (void) do_div(u, v); 80 return (u); 81 } 82 83 /* 84 * Implementation of 64-bit unsigned division for 32-bit machines. 85 * 86 * First the procedure takes care of the case in which the divisor is a 87 * 32-bit quantity. There are two subcases: (1) If the left half of the 88 * dividend is less than the divisor, one execution of do_div() is all that 89 * is required (overflow is not possible). (2) Otherwise it does two 90 * divisions, using the grade school method. 91 */ 92 uint64_t 93 __udivdi3(uint64_t u, uint64_t v) 94 { 95 uint64_t u0, u1, v1, q0, q1, k; 96 int n; 97 98 if (v >> 32 == 0) { // If v < 2**32: 99 if (u >> 32 < v) { // If u/v cannot overflow, 100 return (__div_u64(u, v)); // just do one division. 101 } else { // If u/v would overflow: 102 u1 = u >> 32; // Break u into two halves. 103 u0 = u & 0xFFFFFFFF; 104 q1 = __div_u64(u1, v); // First quotient digit. 105 k = u1 - q1 * v; // First remainder, < v. 106 u0 += (k << 32); 107 q0 = __div_u64(u0, v); // Seconds quotient digit. 108 return ((q1 << 32) + q0); 109 } 110 } else { // If v >= 2**32: 111 n = nlz64(v); // 0 <= n <= 31. 112 v1 = (v << n) >> 32; // Normalize divisor, MSB is 1. 113 u1 = u >> 1; // To ensure no overflow. 114 q1 = __div_u64(u1, v1); // Get quotient from 115 q0 = (q1 << n) >> 31; // Undo normalization and 116 // division of u by 2. 117 if (q0 != 0) // Make q0 correct or 118 q0 = q0 - 1; // too small by 1. 119 if ((u - q0 * v) >= v) 120 q0 = q0 + 1; // Now q0 is correct. 121 122 return (q0); 123 } 124 } 125 EXPORT_SYMBOL(__udivdi3); 126 127 #ifndef abs64 128 /* CSTYLED */ 129 #define abs64(x) ({ uint64_t t = (x) >> 63; ((x) ^ t) - t; }) 130 #endif 131 132 /* 133 * Implementation of 64-bit signed division for 32-bit machines. 134 */ 135 int64_t 136 __divdi3(int64_t u, int64_t v) 137 { 138 int64_t q, t; 139 q = __udivdi3(abs64(u), abs64(v)); 140 t = (u ^ v) >> 63; // If u, v have different 141 return ((q ^ t) - t); // signs, negate q. 142 } 143 EXPORT_SYMBOL(__divdi3); 144 145 /* 146 * Implementation of 64-bit unsigned modulo for 32-bit machines. 147 */ 148 uint64_t 149 __umoddi3(uint64_t dividend, uint64_t divisor) 150 { 151 return (dividend - (divisor * __udivdi3(dividend, divisor))); 152 } 153 EXPORT_SYMBOL(__umoddi3); 154 155 /* 64-bit signed modulo for 32-bit machines. */ 156 int64_t 157 __moddi3(int64_t n, int64_t d) 158 { 159 int64_t q; 160 boolean_t nn = B_FALSE; 161 162 if (n < 0) { 163 nn = B_TRUE; 164 n = -n; 165 } 166 if (d < 0) 167 d = -d; 168 169 q = __umoddi3(n, d); 170 171 return (nn ? -q : q); 172 } 173 EXPORT_SYMBOL(__moddi3); 174 175 /* 176 * Implementation of 64-bit unsigned division/modulo for 32-bit machines. 177 */ 178 uint64_t 179 __udivmoddi4(uint64_t n, uint64_t d, uint64_t *r) 180 { 181 uint64_t q = __udivdi3(n, d); 182 if (r) 183 *r = n - d * q; 184 return (q); 185 } 186 EXPORT_SYMBOL(__udivmoddi4); 187 188 /* 189 * Implementation of 64-bit signed division/modulo for 32-bit machines. 190 */ 191 int64_t 192 __divmoddi4(int64_t n, int64_t d, int64_t *r) 193 { 194 int64_t q, rr; 195 boolean_t nn = B_FALSE; 196 boolean_t nd = B_FALSE; 197 if (n < 0) { 198 nn = B_TRUE; 199 n = -n; 200 } 201 if (d < 0) { 202 nd = B_TRUE; 203 d = -d; 204 } 205 206 q = __udivmoddi4(n, d, (uint64_t *)&rr); 207 208 if (nn != nd) 209 q = -q; 210 if (nn) 211 rr = -rr; 212 if (r) 213 *r = rr; 214 return (q); 215 } 216 EXPORT_SYMBOL(__divmoddi4); 217 218 #if defined(__arm) || defined(__arm__) 219 /* 220 * Implementation of 64-bit (un)signed division for 32-bit arm machines. 221 * 222 * Run-time ABI for the ARM Architecture (page 20). A pair of (unsigned) 223 * long longs is returned in {{r0, r1}, {r2,r3}}, the quotient in {r0, r1}, 224 * and the remainder in {r2, r3}. The return type is specifically left 225 * set to 'void' to ensure the compiler does not overwrite these registers 226 * during the return. All results are in registers as per ABI 227 */ 228 void 229 __aeabi_uldivmod(uint64_t u, uint64_t v) 230 { 231 uint64_t res; 232 uint64_t mod; 233 234 res = __udivdi3(u, v); 235 mod = __umoddi3(u, v); 236 { 237 register uint32_t r0 asm("r0") = (res & 0xFFFFFFFF); 238 register uint32_t r1 asm("r1") = (res >> 32); 239 register uint32_t r2 asm("r2") = (mod & 0xFFFFFFFF); 240 register uint32_t r3 asm("r3") = (mod >> 32); 241 242 asm volatile("" 243 : "+r"(r0), "+r"(r1), "+r"(r2), "+r"(r3) /* output */ 244 : "r"(r0), "r"(r1), "r"(r2), "r"(r3)); /* input */ 245 246 return; /* r0; */ 247 } 248 } 249 EXPORT_SYMBOL(__aeabi_uldivmod); 250 251 void 252 __aeabi_ldivmod(int64_t u, int64_t v) 253 { 254 int64_t res; 255 uint64_t mod; 256 257 res = __divdi3(u, v); 258 mod = __umoddi3(u, v); 259 { 260 register uint32_t r0 asm("r0") = (res & 0xFFFFFFFF); 261 register uint32_t r1 asm("r1") = (res >> 32); 262 register uint32_t r2 asm("r2") = (mod & 0xFFFFFFFF); 263 register uint32_t r3 asm("r3") = (mod >> 32); 264 265 asm volatile("" 266 : "+r"(r0), "+r"(r1), "+r"(r2), "+r"(r3) /* output */ 267 : "r"(r0), "r"(r1), "r"(r2), "r"(r3)); /* input */ 268 269 return; /* r0; */ 270 } 271 } 272 EXPORT_SYMBOL(__aeabi_ldivmod); 273 #endif /* __arm || __arm__ */ 274 275 #endif /* BITS_PER_LONG */ 276