1*5d9d9091SRichard Lowe/* 2*5d9d9091SRichard Lowe * CDDL HEADER START 3*5d9d9091SRichard Lowe * 4*5d9d9091SRichard Lowe * The contents of this file are subject to the terms of the 5*5d9d9091SRichard Lowe * Common Development and Distribution License (the "License"). 6*5d9d9091SRichard Lowe * You may not use this file except in compliance with the License. 7*5d9d9091SRichard Lowe * 8*5d9d9091SRichard Lowe * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*5d9d9091SRichard Lowe * or http://www.opensolaris.org/os/licensing. 10*5d9d9091SRichard Lowe * See the License for the specific language governing permissions 11*5d9d9091SRichard Lowe * and limitations under the License. 12*5d9d9091SRichard Lowe * 13*5d9d9091SRichard Lowe * When distributing Covered Code, include this CDDL HEADER in each 14*5d9d9091SRichard Lowe * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*5d9d9091SRichard Lowe * If applicable, add the following below this CDDL HEADER, with the 16*5d9d9091SRichard Lowe * fields enclosed by brackets "[]" replaced with your own identifying 17*5d9d9091SRichard Lowe * information: Portions Copyright [yyyy] [name of copyright owner] 18*5d9d9091SRichard Lowe * 19*5d9d9091SRichard Lowe * CDDL HEADER END 20*5d9d9091SRichard Lowe */ 21*5d9d9091SRichard Lowe/* 22*5d9d9091SRichard Lowe * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 23*5d9d9091SRichard Lowe * Use is subject to license terms. 24*5d9d9091SRichard Lowe */ 25*5d9d9091SRichard Lowe 26*5d9d9091SRichard Lowe .file "_div64.s" 27*5d9d9091SRichard Lowe 28*5d9d9091SRichard Lowe#include "SYS.h" 29*5d9d9091SRichard Lowe 30*5d9d9091SRichard Lowe/* 31*5d9d9091SRichard Lowe * C support for 64-bit modulo and division. 32*5d9d9091SRichard Lowe * Hand-customized compiler output - see comments for details. 33*5d9d9091SRichard Lowe */ 34*5d9d9091SRichard Lowe 35*5d9d9091SRichard Lowe/* 36*5d9d9091SRichard Lowe * int32_t/int64_t division/manipulation 37*5d9d9091SRichard Lowe * 38*5d9d9091SRichard Lowe * Hand-customized compiler output: the non-GCC entry points depart from 39*5d9d9091SRichard Lowe * the SYS V ABI by requiring their arguments to be popped, and in the 40*5d9d9091SRichard Lowe * [u]divrem64 cases returning the remainder in %ecx:%esi. Note the 41*5d9d9091SRichard Lowe * compiler-generated use of %edx:%eax for the first argument of 42*5d9d9091SRichard Lowe * internal entry points. 43*5d9d9091SRichard Lowe * 44*5d9d9091SRichard Lowe * Inlines for speed: 45*5d9d9091SRichard Lowe * - counting the number of leading zeros in a word 46*5d9d9091SRichard Lowe * - multiplying two 32-bit numbers giving a 64-bit result 47*5d9d9091SRichard Lowe * - dividing a 64-bit number by a 32-bit number, giving both quotient 48*5d9d9091SRichard Lowe * and remainder 49*5d9d9091SRichard Lowe * - subtracting two 64-bit results 50*5d9d9091SRichard Lowe */ 51*5d9d9091SRichard Lowe/ #define LO(X) ((uint32_t)(X) & 0xffffffff) 52*5d9d9091SRichard Lowe/ #define HI(X) ((uint32_t)((X) >> 32) & 0xffffffff) 53*5d9d9091SRichard Lowe/ #define HILO(H, L) (((uint64_t)(H) << 32) + (L)) 54*5d9d9091SRichard Lowe/ 55*5d9d9091SRichard Lowe/ /* give index of highest bit */ 56*5d9d9091SRichard Lowe/ #define HIBIT(a, r) \ 57*5d9d9091SRichard Lowe/ asm("bsrl %1,%0": "=r"((uint32_t)(r)) : "g" (a)) 58*5d9d9091SRichard Lowe/ 59*5d9d9091SRichard Lowe/ /* multiply two uint32_ts resulting in a uint64_t */ 60*5d9d9091SRichard Lowe/ #define A_MUL32(a, b, lo, hi) \ 61*5d9d9091SRichard Lowe/ asm("mull %2" \ 62*5d9d9091SRichard Lowe/ : "=a"((uint32_t)(lo)), "=d"((uint32_t)(hi)) : "g" (b), "0"(a)) 63*5d9d9091SRichard Lowe/ 64*5d9d9091SRichard Lowe/ /* divide a uint64_t by a uint32_t */ 65*5d9d9091SRichard Lowe/ #define A_DIV32(lo, hi, b, q, r) \ 66*5d9d9091SRichard Lowe/ asm("divl %2" \ 67*5d9d9091SRichard Lowe/ : "=a"((uint32_t)(q)), "=d"((uint32_t)(r)) \ 68*5d9d9091SRichard Lowe/ : "g" (b), "0"((uint32_t)(lo)), "1"((uint32_t)hi)) 69*5d9d9091SRichard Lowe/ 70*5d9d9091SRichard Lowe/ /* subtract two uint64_ts (with borrow) */ 71*5d9d9091SRichard Lowe/ #define A_SUB2(bl, bh, al, ah) \ 72*5d9d9091SRichard Lowe/ asm("subl %4,%0\n\tsbbl %5,%1" \ 73*5d9d9091SRichard Lowe/ : "=&r"((uint32_t)(al)), "=r"((uint32_t)(ah)) \ 74*5d9d9091SRichard Lowe/ : "0"((uint32_t)(al)), "1"((uint32_t)(ah)), "g"((uint32_t)(bl)), \ 75*5d9d9091SRichard Lowe/ "g"((uint32_t)(bh))) 76*5d9d9091SRichard Lowe/ 77*5d9d9091SRichard Lowe/ /* 78*5d9d9091SRichard Lowe/ * Unsigned division with remainder. 79*5d9d9091SRichard Lowe/ * Divide two uint64_ts, and calculate remainder. 80*5d9d9091SRichard Lowe/ */ 81*5d9d9091SRichard Lowe/ uint64_t 82*5d9d9091SRichard Lowe/ UDivRem(uint64_t x, uint64_t y, uint64_t * pmod) 83*5d9d9091SRichard Lowe/ { 84*5d9d9091SRichard Lowe/ /* simple cases: y is a single uint32_t */ 85*5d9d9091SRichard Lowe/ if (HI(y) == 0) { 86*5d9d9091SRichard Lowe/ uint32_t div_hi, div_rem; 87*5d9d9091SRichard Lowe/ uint32_t q0, q1; 88*5d9d9091SRichard Lowe/ 89*5d9d9091SRichard Lowe/ /* calculate q1 */ 90*5d9d9091SRichard Lowe/ if (HI(x) < LO(y)) { 91*5d9d9091SRichard Lowe/ /* result is a single uint32_t, use one division */ 92*5d9d9091SRichard Lowe/ q1 = 0; 93*5d9d9091SRichard Lowe/ div_hi = HI(x); 94*5d9d9091SRichard Lowe/ } else { 95*5d9d9091SRichard Lowe/ /* result is a double uint32_t, use two divisions */ 96*5d9d9091SRichard Lowe/ A_DIV32(HI(x), 0, LO(y), q1, div_hi); 97*5d9d9091SRichard Lowe/ } 98*5d9d9091SRichard Lowe/ 99*5d9d9091SRichard Lowe/ /* calculate q0 and remainder */ 100*5d9d9091SRichard Lowe/ A_DIV32(LO(x), div_hi, LO(y), q0, div_rem); 101*5d9d9091SRichard Lowe/ 102*5d9d9091SRichard Lowe/ /* return remainder */ 103*5d9d9091SRichard Lowe/ *pmod = div_rem; 104*5d9d9091SRichard Lowe/ 105*5d9d9091SRichard Lowe/ /* return result */ 106*5d9d9091SRichard Lowe/ return (HILO(q1, q0)); 107*5d9d9091SRichard Lowe/ 108*5d9d9091SRichard Lowe/ } else if (HI(x) < HI(y)) { 109*5d9d9091SRichard Lowe/ /* HI(x) < HI(y) => x < y => result is 0 */ 110*5d9d9091SRichard Lowe/ 111*5d9d9091SRichard Lowe/ /* return remainder */ 112*5d9d9091SRichard Lowe/ *pmod = x; 113*5d9d9091SRichard Lowe/ 114*5d9d9091SRichard Lowe/ /* return result */ 115*5d9d9091SRichard Lowe/ return (0); 116*5d9d9091SRichard Lowe/ 117*5d9d9091SRichard Lowe/ } else { 118*5d9d9091SRichard Lowe/ /* 119*5d9d9091SRichard Lowe/ * uint64_t by uint64_t division, resulting in a one-uint32_t 120*5d9d9091SRichard Lowe/ * result 121*5d9d9091SRichard Lowe/ */ 122*5d9d9091SRichard Lowe/ uint32_t y0, y1; 123*5d9d9091SRichard Lowe/ uint32_t x1, x0; 124*5d9d9091SRichard Lowe/ uint32_t q0; 125*5d9d9091SRichard Lowe/ uint32_t normshift; 126*5d9d9091SRichard Lowe/ 127*5d9d9091SRichard Lowe/ /* normalize by shifting x and y so MSB(y) == 1 */ 128*5d9d9091SRichard Lowe/ HIBIT(HI(y), normshift); /* index of highest 1 bit */ 129*5d9d9091SRichard Lowe/ normshift = 31 - normshift; 130*5d9d9091SRichard Lowe/ 131*5d9d9091SRichard Lowe/ if (normshift == 0) { 132*5d9d9091SRichard Lowe/ /* no shifting needed, and x < 2*y so q <= 1 */ 133*5d9d9091SRichard Lowe/ y1 = HI(y); 134*5d9d9091SRichard Lowe/ y0 = LO(y); 135*5d9d9091SRichard Lowe/ x1 = HI(x); 136*5d9d9091SRichard Lowe/ x0 = LO(x); 137*5d9d9091SRichard Lowe/ 138*5d9d9091SRichard Lowe/ /* if x >= y then q = 1 (note x1 >= y1) */ 139*5d9d9091SRichard Lowe/ if (x1 > y1 || x0 >= y0) { 140*5d9d9091SRichard Lowe/ q0 = 1; 141*5d9d9091SRichard Lowe/ /* subtract y from x to get remainder */ 142*5d9d9091SRichard Lowe/ A_SUB2(y0, y1, x0, x1); 143*5d9d9091SRichard Lowe/ } else { 144*5d9d9091SRichard Lowe/ q0 = 0; 145*5d9d9091SRichard Lowe/ } 146*5d9d9091SRichard Lowe/ 147*5d9d9091SRichard Lowe/ /* return remainder */ 148*5d9d9091SRichard Lowe/ *pmod = HILO(x1, x0); 149*5d9d9091SRichard Lowe/ 150*5d9d9091SRichard Lowe/ /* return result */ 151*5d9d9091SRichard Lowe/ return (q0); 152*5d9d9091SRichard Lowe/ 153*5d9d9091SRichard Lowe/ } else { 154*5d9d9091SRichard Lowe/ /* 155*5d9d9091SRichard Lowe/ * the last case: result is one uint32_t, but we need to 156*5d9d9091SRichard Lowe/ * normalize 157*5d9d9091SRichard Lowe/ */ 158*5d9d9091SRichard Lowe/ uint64_t dt; 159*5d9d9091SRichard Lowe/ uint32_t t0, t1, x2; 160*5d9d9091SRichard Lowe/ 161*5d9d9091SRichard Lowe/ /* normalize y */ 162*5d9d9091SRichard Lowe/ dt = (y << normshift); 163*5d9d9091SRichard Lowe/ y1 = HI(dt); 164*5d9d9091SRichard Lowe/ y0 = LO(dt); 165*5d9d9091SRichard Lowe/ 166*5d9d9091SRichard Lowe/ /* normalize x (we need 3 uint32_ts!!!) */ 167*5d9d9091SRichard Lowe/ x2 = (HI(x) >> (32 - normshift)); 168*5d9d9091SRichard Lowe/ dt = (x << normshift); 169*5d9d9091SRichard Lowe/ x1 = HI(dt); 170*5d9d9091SRichard Lowe/ x0 = LO(dt); 171*5d9d9091SRichard Lowe/ 172*5d9d9091SRichard Lowe/ /* estimate q0, and reduce x to a two uint32_t value */ 173*5d9d9091SRichard Lowe/ A_DIV32(x1, x2, y1, q0, x1); 174*5d9d9091SRichard Lowe/ 175*5d9d9091SRichard Lowe/ /* adjust q0 down if too high */ 176*5d9d9091SRichard Lowe/ /* 177*5d9d9091SRichard Lowe/ * because of the limited range of x2 we can only be 178*5d9d9091SRichard Lowe/ * one off 179*5d9d9091SRichard Lowe/ */ 180*5d9d9091SRichard Lowe/ A_MUL32(y0, q0, t0, t1); 181*5d9d9091SRichard Lowe/ if (t1 > x1 || (t1 == x1 && t0 > x0)) { 182*5d9d9091SRichard Lowe/ q0--; 183*5d9d9091SRichard Lowe/ A_SUB2(y0, y1, t0, t1); 184*5d9d9091SRichard Lowe/ } 185*5d9d9091SRichard Lowe/ /* return remainder */ 186*5d9d9091SRichard Lowe/ /* subtract product from x to get remainder */ 187*5d9d9091SRichard Lowe/ A_SUB2(t0, t1, x0, x1); 188*5d9d9091SRichard Lowe/ *pmod = (HILO(x1, x0) >> normshift); 189*5d9d9091SRichard Lowe/ 190*5d9d9091SRichard Lowe/ /* return result */ 191*5d9d9091SRichard Lowe/ return (q0); 192*5d9d9091SRichard Lowe/ } 193*5d9d9091SRichard Lowe/ } 194*5d9d9091SRichard Lowe/ } 195*5d9d9091SRichard Lowe ENTRY(UDivRem) 196*5d9d9091SRichard Lowe pushl %ebp 197*5d9d9091SRichard Lowe pushl %edi 198*5d9d9091SRichard Lowe pushl %esi 199*5d9d9091SRichard Lowe subl $48, %esp 200*5d9d9091SRichard Lowe movl 68(%esp), %edi / y, 201*5d9d9091SRichard Lowe testl %edi, %edi / tmp63 202*5d9d9091SRichard Lowe movl %eax, 40(%esp) / x, x 203*5d9d9091SRichard Lowe movl %edx, 44(%esp) / x, x 204*5d9d9091SRichard Lowe movl %edi, %esi /, tmp62 205*5d9d9091SRichard Lowe movl %edi, %ecx / tmp62, tmp63 206*5d9d9091SRichard Lowe jne .LL2 207*5d9d9091SRichard Lowe movl %edx, %eax /, tmp68 208*5d9d9091SRichard Lowe cmpl 64(%esp), %eax / y, tmp68 209*5d9d9091SRichard Lowe jae .LL21 210*5d9d9091SRichard Lowe.LL4: 211*5d9d9091SRichard Lowe movl 72(%esp), %ebp / pmod, 212*5d9d9091SRichard Lowe xorl %esi, %esi / <result> 213*5d9d9091SRichard Lowe movl 40(%esp), %eax / x, q0 214*5d9d9091SRichard Lowe movl %ecx, %edi / <result>, <result> 215*5d9d9091SRichard Lowe divl 64(%esp) / y 216*5d9d9091SRichard Lowe movl %edx, (%ebp) / div_rem, 217*5d9d9091SRichard Lowe xorl %edx, %edx / q0 218*5d9d9091SRichard Lowe addl %eax, %esi / q0, <result> 219*5d9d9091SRichard Lowe movl $0, 4(%ebp) 220*5d9d9091SRichard Lowe adcl %edx, %edi / q0, <result> 221*5d9d9091SRichard Lowe addl $48, %esp 222*5d9d9091SRichard Lowe movl %esi, %eax / <result>, <result> 223*5d9d9091SRichard Lowe popl %esi 224*5d9d9091SRichard Lowe movl %edi, %edx / <result>, <result> 225*5d9d9091SRichard Lowe popl %edi 226*5d9d9091SRichard Lowe popl %ebp 227*5d9d9091SRichard Lowe ret 228*5d9d9091SRichard Lowe .align 16 229*5d9d9091SRichard Lowe.LL2: 230*5d9d9091SRichard Lowe movl 44(%esp), %eax / x, 231*5d9d9091SRichard Lowe xorl %edx, %edx 232*5d9d9091SRichard Lowe cmpl %esi, %eax / tmp62, tmp5 233*5d9d9091SRichard Lowe movl %eax, 32(%esp) / tmp5, 234*5d9d9091SRichard Lowe movl %edx, 36(%esp) 235*5d9d9091SRichard Lowe jae .LL6 236*5d9d9091SRichard Lowe movl 72(%esp), %esi / pmod, 237*5d9d9091SRichard Lowe movl 40(%esp), %ebp / x, 238*5d9d9091SRichard Lowe movl 44(%esp), %ecx / x, 239*5d9d9091SRichard Lowe movl %ebp, (%esi) 240*5d9d9091SRichard Lowe movl %ecx, 4(%esi) 241*5d9d9091SRichard Lowe xorl %edi, %edi / <result> 242*5d9d9091SRichard Lowe xorl %esi, %esi / <result> 243*5d9d9091SRichard Lowe.LL22: 244*5d9d9091SRichard Lowe addl $48, %esp 245*5d9d9091SRichard Lowe movl %esi, %eax / <result>, <result> 246*5d9d9091SRichard Lowe popl %esi 247*5d9d9091SRichard Lowe movl %edi, %edx / <result>, <result> 248*5d9d9091SRichard Lowe popl %edi 249*5d9d9091SRichard Lowe popl %ebp 250*5d9d9091SRichard Lowe ret 251*5d9d9091SRichard Lowe .align 16 252*5d9d9091SRichard Lowe.LL21: 253*5d9d9091SRichard Lowe movl %edi, %edx / tmp63, div_hi 254*5d9d9091SRichard Lowe divl 64(%esp) / y 255*5d9d9091SRichard Lowe movl %eax, %ecx /, q1 256*5d9d9091SRichard Lowe jmp .LL4 257*5d9d9091SRichard Lowe .align 16 258*5d9d9091SRichard Lowe.LL6: 259*5d9d9091SRichard Lowe movl $31, %edi /, tmp87 260*5d9d9091SRichard Lowe bsrl %esi,%edx / tmp62, normshift 261*5d9d9091SRichard Lowe subl %edx, %edi / normshift, tmp87 262*5d9d9091SRichard Lowe movl %edi, 28(%esp) / tmp87, 263*5d9d9091SRichard Lowe jne .LL8 264*5d9d9091SRichard Lowe movl 32(%esp), %edx /, x1 265*5d9d9091SRichard Lowe cmpl %ecx, %edx / y1, x1 266*5d9d9091SRichard Lowe movl 64(%esp), %edi / y, y0 267*5d9d9091SRichard Lowe movl 40(%esp), %esi / x, x0 268*5d9d9091SRichard Lowe ja .LL10 269*5d9d9091SRichard Lowe xorl %ebp, %ebp / q0 270*5d9d9091SRichard Lowe cmpl %edi, %esi / y0, x0 271*5d9d9091SRichard Lowe jb .LL11 272*5d9d9091SRichard Lowe.LL10: 273*5d9d9091SRichard Lowe movl $1, %ebp /, q0 274*5d9d9091SRichard Lowe subl %edi,%esi / y0, x0 275*5d9d9091SRichard Lowe sbbl %ecx,%edx / tmp63, x1 276*5d9d9091SRichard Lowe.LL11: 277*5d9d9091SRichard Lowe movl %edx, %ecx / x1, x1 278*5d9d9091SRichard Lowe xorl %edx, %edx / x1 279*5d9d9091SRichard Lowe xorl %edi, %edi / x0 280*5d9d9091SRichard Lowe addl %esi, %edx / x0, x1 281*5d9d9091SRichard Lowe adcl %edi, %ecx / x0, x1 282*5d9d9091SRichard Lowe movl 72(%esp), %esi / pmod, 283*5d9d9091SRichard Lowe movl %edx, (%esi) / x1, 284*5d9d9091SRichard Lowe movl %ecx, 4(%esi) / x1, 285*5d9d9091SRichard Lowe xorl %edi, %edi / <result> 286*5d9d9091SRichard Lowe movl %ebp, %esi / q0, <result> 287*5d9d9091SRichard Lowe jmp .LL22 288*5d9d9091SRichard Lowe .align 16 289*5d9d9091SRichard Lowe.LL8: 290*5d9d9091SRichard Lowe movb 28(%esp), %cl 291*5d9d9091SRichard Lowe movl 64(%esp), %esi / y, dt 292*5d9d9091SRichard Lowe movl 68(%esp), %edi / y, dt 293*5d9d9091SRichard Lowe shldl %esi, %edi /, dt, dt 294*5d9d9091SRichard Lowe sall %cl, %esi /, dt 295*5d9d9091SRichard Lowe andl $32, %ecx 296*5d9d9091SRichard Lowe jne .LL23 297*5d9d9091SRichard Lowe.LL17: 298*5d9d9091SRichard Lowe movl $32, %ecx /, tmp102 299*5d9d9091SRichard Lowe subl 28(%esp), %ecx /, tmp102 300*5d9d9091SRichard Lowe movl %esi, %ebp / dt, y0 301*5d9d9091SRichard Lowe movl 32(%esp), %esi 302*5d9d9091SRichard Lowe shrl %cl, %esi / tmp102, 303*5d9d9091SRichard Lowe movl %edi, 24(%esp) / tmp99, 304*5d9d9091SRichard Lowe movb 28(%esp), %cl 305*5d9d9091SRichard Lowe movl %esi, 12(%esp) /, x2 306*5d9d9091SRichard Lowe movl 44(%esp), %edi / x, dt 307*5d9d9091SRichard Lowe movl 40(%esp), %esi / x, dt 308*5d9d9091SRichard Lowe shldl %esi, %edi /, dt, dt 309*5d9d9091SRichard Lowe sall %cl, %esi /, dt 310*5d9d9091SRichard Lowe andl $32, %ecx 311*5d9d9091SRichard Lowe je .LL18 312*5d9d9091SRichard Lowe movl %esi, %edi / dt, dt 313*5d9d9091SRichard Lowe xorl %esi, %esi / dt 314*5d9d9091SRichard Lowe.LL18: 315*5d9d9091SRichard Lowe movl %edi, %ecx / dt, 316*5d9d9091SRichard Lowe movl %edi, %eax / tmp2, 317*5d9d9091SRichard Lowe movl %ecx, (%esp) 318*5d9d9091SRichard Lowe movl 12(%esp), %edx / x2, 319*5d9d9091SRichard Lowe divl 24(%esp) 320*5d9d9091SRichard Lowe movl %edx, %ecx /, x1 321*5d9d9091SRichard Lowe xorl %edi, %edi 322*5d9d9091SRichard Lowe movl %eax, 20(%esp) 323*5d9d9091SRichard Lowe movl %ebp, %eax / y0, t0 324*5d9d9091SRichard Lowe mull 20(%esp) 325*5d9d9091SRichard Lowe cmpl %ecx, %edx / x1, t1 326*5d9d9091SRichard Lowe movl %edi, 4(%esp) 327*5d9d9091SRichard Lowe ja .LL14 328*5d9d9091SRichard Lowe je .LL24 329*5d9d9091SRichard Lowe.LL15: 330*5d9d9091SRichard Lowe movl %ecx, %edi / x1, 331*5d9d9091SRichard Lowe subl %eax,%esi / t0, x0 332*5d9d9091SRichard Lowe sbbl %edx,%edi / t1, 333*5d9d9091SRichard Lowe movl %edi, %eax /, x1 334*5d9d9091SRichard Lowe movl %eax, %edx / x1, x1 335*5d9d9091SRichard Lowe xorl %eax, %eax / x1 336*5d9d9091SRichard Lowe xorl %ebp, %ebp / x0 337*5d9d9091SRichard Lowe addl %esi, %eax / x0, x1 338*5d9d9091SRichard Lowe adcl %ebp, %edx / x0, x1 339*5d9d9091SRichard Lowe movb 28(%esp), %cl 340*5d9d9091SRichard Lowe shrdl %edx, %eax /, x1, x1 341*5d9d9091SRichard Lowe shrl %cl, %edx /, x1 342*5d9d9091SRichard Lowe andl $32, %ecx 343*5d9d9091SRichard Lowe je .LL16 344*5d9d9091SRichard Lowe movl %edx, %eax / x1, x1 345*5d9d9091SRichard Lowe xorl %edx, %edx / x1 346*5d9d9091SRichard Lowe.LL16: 347*5d9d9091SRichard Lowe movl 72(%esp), %ecx / pmod, 348*5d9d9091SRichard Lowe movl 20(%esp), %esi /, <result> 349*5d9d9091SRichard Lowe xorl %edi, %edi / <result> 350*5d9d9091SRichard Lowe movl %eax, (%ecx) / x1, 351*5d9d9091SRichard Lowe movl %edx, 4(%ecx) / x1, 352*5d9d9091SRichard Lowe jmp .LL22 353*5d9d9091SRichard Lowe .align 16 354*5d9d9091SRichard Lowe.LL24: 355*5d9d9091SRichard Lowe cmpl %esi, %eax / x0, t0 356*5d9d9091SRichard Lowe jbe .LL15 357*5d9d9091SRichard Lowe.LL14: 358*5d9d9091SRichard Lowe decl 20(%esp) 359*5d9d9091SRichard Lowe subl %ebp,%eax / y0, t0 360*5d9d9091SRichard Lowe sbbl 24(%esp),%edx /, t1 361*5d9d9091SRichard Lowe jmp .LL15 362*5d9d9091SRichard Lowe.LL23: 363*5d9d9091SRichard Lowe movl %esi, %edi / dt, dt 364*5d9d9091SRichard Lowe xorl %esi, %esi / dt 365*5d9d9091SRichard Lowe jmp .LL17 366*5d9d9091SRichard Lowe SET_SIZE(UDivRem) 367*5d9d9091SRichard Lowe 368*5d9d9091SRichard Lowe/* 369*5d9d9091SRichard Lowe * Unsigned division without remainder. 370*5d9d9091SRichard Lowe */ 371*5d9d9091SRichard Lowe/ uint64_t 372*5d9d9091SRichard Lowe/ UDiv(uint64_t x, uint64_t y) 373*5d9d9091SRichard Lowe/ { 374*5d9d9091SRichard Lowe/ if (HI(y) == 0) { 375*5d9d9091SRichard Lowe/ /* simple cases: y is a single uint32_t */ 376*5d9d9091SRichard Lowe/ uint32_t div_hi, div_rem; 377*5d9d9091SRichard Lowe/ uint32_t q0, q1; 378*5d9d9091SRichard Lowe/ 379*5d9d9091SRichard Lowe/ /* calculate q1 */ 380*5d9d9091SRichard Lowe/ if (HI(x) < LO(y)) { 381*5d9d9091SRichard Lowe/ /* result is a single uint32_t, use one division */ 382*5d9d9091SRichard Lowe/ q1 = 0; 383*5d9d9091SRichard Lowe/ div_hi = HI(x); 384*5d9d9091SRichard Lowe/ } else { 385*5d9d9091SRichard Lowe/ /* result is a double uint32_t, use two divisions */ 386*5d9d9091SRichard Lowe/ A_DIV32(HI(x), 0, LO(y), q1, div_hi); 387*5d9d9091SRichard Lowe/ } 388*5d9d9091SRichard Lowe/ 389*5d9d9091SRichard Lowe/ /* calculate q0 and remainder */ 390*5d9d9091SRichard Lowe/ A_DIV32(LO(x), div_hi, LO(y), q0, div_rem); 391*5d9d9091SRichard Lowe/ 392*5d9d9091SRichard Lowe/ /* return result */ 393*5d9d9091SRichard Lowe/ return (HILO(q1, q0)); 394*5d9d9091SRichard Lowe/ 395*5d9d9091SRichard Lowe/ } else if (HI(x) < HI(y)) { 396*5d9d9091SRichard Lowe/ /* HI(x) < HI(y) => x < y => result is 0 */ 397*5d9d9091SRichard Lowe/ 398*5d9d9091SRichard Lowe/ /* return result */ 399*5d9d9091SRichard Lowe/ return (0); 400*5d9d9091SRichard Lowe/ 401*5d9d9091SRichard Lowe/ } else { 402*5d9d9091SRichard Lowe/ /* 403*5d9d9091SRichard Lowe/ * uint64_t by uint64_t division, resulting in a one-uint32_t 404*5d9d9091SRichard Lowe/ * result 405*5d9d9091SRichard Lowe/ */ 406*5d9d9091SRichard Lowe/ uint32_t y0, y1; 407*5d9d9091SRichard Lowe/ uint32_t x1, x0; 408*5d9d9091SRichard Lowe/ uint32_t q0; 409*5d9d9091SRichard Lowe/ unsigned normshift; 410*5d9d9091SRichard Lowe/ 411*5d9d9091SRichard Lowe/ /* normalize by shifting x and y so MSB(y) == 1 */ 412*5d9d9091SRichard Lowe/ HIBIT(HI(y), normshift); /* index of highest 1 bit */ 413*5d9d9091SRichard Lowe/ normshift = 31 - normshift; 414*5d9d9091SRichard Lowe/ 415*5d9d9091SRichard Lowe/ if (normshift == 0) { 416*5d9d9091SRichard Lowe/ /* no shifting needed, and x < 2*y so q <= 1 */ 417*5d9d9091SRichard Lowe/ y1 = HI(y); 418*5d9d9091SRichard Lowe/ y0 = LO(y); 419*5d9d9091SRichard Lowe/ x1 = HI(x); 420*5d9d9091SRichard Lowe/ x0 = LO(x); 421*5d9d9091SRichard Lowe/ 422*5d9d9091SRichard Lowe/ /* if x >= y then q = 1 (note x1 >= y1) */ 423*5d9d9091SRichard Lowe/ if (x1 > y1 || x0 >= y0) { 424*5d9d9091SRichard Lowe/ q0 = 1; 425*5d9d9091SRichard Lowe/ /* subtract y from x to get remainder */ 426*5d9d9091SRichard Lowe/ /* A_SUB2(y0, y1, x0, x1); */ 427*5d9d9091SRichard Lowe/ } else { 428*5d9d9091SRichard Lowe/ q0 = 0; 429*5d9d9091SRichard Lowe/ } 430*5d9d9091SRichard Lowe/ 431*5d9d9091SRichard Lowe/ /* return result */ 432*5d9d9091SRichard Lowe/ return (q0); 433*5d9d9091SRichard Lowe/ 434*5d9d9091SRichard Lowe/ } else { 435*5d9d9091SRichard Lowe/ /* 436*5d9d9091SRichard Lowe/ * the last case: result is one uint32_t, but we need to 437*5d9d9091SRichard Lowe/ * normalize 438*5d9d9091SRichard Lowe/ */ 439*5d9d9091SRichard Lowe/ uint64_t dt; 440*5d9d9091SRichard Lowe/ uint32_t t0, t1, x2; 441*5d9d9091SRichard Lowe/ 442*5d9d9091SRichard Lowe/ /* normalize y */ 443*5d9d9091SRichard Lowe/ dt = (y << normshift); 444*5d9d9091SRichard Lowe/ y1 = HI(dt); 445*5d9d9091SRichard Lowe/ y0 = LO(dt); 446*5d9d9091SRichard Lowe/ 447*5d9d9091SRichard Lowe/ /* normalize x (we need 3 uint32_ts!!!) */ 448*5d9d9091SRichard Lowe/ x2 = (HI(x) >> (32 - normshift)); 449*5d9d9091SRichard Lowe/ dt = (x << normshift); 450*5d9d9091SRichard Lowe/ x1 = HI(dt); 451*5d9d9091SRichard Lowe/ x0 = LO(dt); 452*5d9d9091SRichard Lowe/ 453*5d9d9091SRichard Lowe/ /* estimate q0, and reduce x to a two uint32_t value */ 454*5d9d9091SRichard Lowe/ A_DIV32(x1, x2, y1, q0, x1); 455*5d9d9091SRichard Lowe/ 456*5d9d9091SRichard Lowe/ /* adjust q0 down if too high */ 457*5d9d9091SRichard Lowe/ /* 458*5d9d9091SRichard Lowe/ * because of the limited range of x2 we can only be 459*5d9d9091SRichard Lowe/ * one off 460*5d9d9091SRichard Lowe/ */ 461*5d9d9091SRichard Lowe/ A_MUL32(y0, q0, t0, t1); 462*5d9d9091SRichard Lowe/ if (t1 > x1 || (t1 == x1 && t0 > x0)) { 463*5d9d9091SRichard Lowe/ q0--; 464*5d9d9091SRichard Lowe/ } 465*5d9d9091SRichard Lowe/ /* return result */ 466*5d9d9091SRichard Lowe/ return (q0); 467*5d9d9091SRichard Lowe/ } 468*5d9d9091SRichard Lowe/ } 469*5d9d9091SRichard Lowe/ } 470*5d9d9091SRichard Lowe ENTRY(UDiv) 471*5d9d9091SRichard Lowe pushl %ebp 472*5d9d9091SRichard Lowe pushl %edi 473*5d9d9091SRichard Lowe pushl %esi 474*5d9d9091SRichard Lowe subl $40, %esp 475*5d9d9091SRichard Lowe movl %edx, 36(%esp) / x, x 476*5d9d9091SRichard Lowe movl 60(%esp), %edx / y, 477*5d9d9091SRichard Lowe testl %edx, %edx / tmp62 478*5d9d9091SRichard Lowe movl %eax, 32(%esp) / x, x 479*5d9d9091SRichard Lowe movl %edx, %ecx / tmp61, tmp62 480*5d9d9091SRichard Lowe movl %edx, %eax /, tmp61 481*5d9d9091SRichard Lowe jne .LL26 482*5d9d9091SRichard Lowe movl 36(%esp), %esi / x, 483*5d9d9091SRichard Lowe cmpl 56(%esp), %esi / y, tmp67 484*5d9d9091SRichard Lowe movl %esi, %eax /, tmp67 485*5d9d9091SRichard Lowe movl %esi, %edx / tmp67, div_hi 486*5d9d9091SRichard Lowe jb .LL28 487*5d9d9091SRichard Lowe movl %ecx, %edx / tmp62, div_hi 488*5d9d9091SRichard Lowe divl 56(%esp) / y 489*5d9d9091SRichard Lowe movl %eax, %ecx /, q1 490*5d9d9091SRichard Lowe.LL28: 491*5d9d9091SRichard Lowe xorl %esi, %esi / <result> 492*5d9d9091SRichard Lowe movl %ecx, %edi / <result>, <result> 493*5d9d9091SRichard Lowe movl 32(%esp), %eax / x, q0 494*5d9d9091SRichard Lowe xorl %ecx, %ecx / q0 495*5d9d9091SRichard Lowe divl 56(%esp) / y 496*5d9d9091SRichard Lowe addl %eax, %esi / q0, <result> 497*5d9d9091SRichard Lowe adcl %ecx, %edi / q0, <result> 498*5d9d9091SRichard Lowe.LL25: 499*5d9d9091SRichard Lowe addl $40, %esp 500*5d9d9091SRichard Lowe movl %esi, %eax / <result>, <result> 501*5d9d9091SRichard Lowe popl %esi 502*5d9d9091SRichard Lowe movl %edi, %edx / <result>, <result> 503*5d9d9091SRichard Lowe popl %edi 504*5d9d9091SRichard Lowe popl %ebp 505*5d9d9091SRichard Lowe ret 506*5d9d9091SRichard Lowe .align 16 507*5d9d9091SRichard Lowe.LL26: 508*5d9d9091SRichard Lowe movl 36(%esp), %esi / x, 509*5d9d9091SRichard Lowe xorl %edi, %edi 510*5d9d9091SRichard Lowe movl %esi, 24(%esp) / tmp1, 511*5d9d9091SRichard Lowe movl %edi, 28(%esp) 512*5d9d9091SRichard Lowe xorl %esi, %esi / <result> 513*5d9d9091SRichard Lowe xorl %edi, %edi / <result> 514*5d9d9091SRichard Lowe cmpl %eax, 24(%esp) / tmp61, 515*5d9d9091SRichard Lowe jb .LL25 516*5d9d9091SRichard Lowe bsrl %eax,%ebp / tmp61, normshift 517*5d9d9091SRichard Lowe movl $31, %eax /, tmp85 518*5d9d9091SRichard Lowe subl %ebp, %eax / normshift, normshift 519*5d9d9091SRichard Lowe jne .LL32 520*5d9d9091SRichard Lowe movl 24(%esp), %eax /, x1 521*5d9d9091SRichard Lowe cmpl %ecx, %eax / tmp62, x1 522*5d9d9091SRichard Lowe movl 56(%esp), %esi / y, y0 523*5d9d9091SRichard Lowe movl 32(%esp), %edx / x, x0 524*5d9d9091SRichard Lowe ja .LL34 525*5d9d9091SRichard Lowe xorl %eax, %eax / q0 526*5d9d9091SRichard Lowe cmpl %esi, %edx / y0, x0 527*5d9d9091SRichard Lowe jb .LL35 528*5d9d9091SRichard Lowe.LL34: 529*5d9d9091SRichard Lowe movl $1, %eax /, q0 530*5d9d9091SRichard Lowe.LL35: 531*5d9d9091SRichard Lowe movl %eax, %esi / q0, <result> 532*5d9d9091SRichard Lowe xorl %edi, %edi / <result> 533*5d9d9091SRichard Lowe.LL45: 534*5d9d9091SRichard Lowe addl $40, %esp 535*5d9d9091SRichard Lowe movl %esi, %eax / <result>, <result> 536*5d9d9091SRichard Lowe popl %esi 537*5d9d9091SRichard Lowe movl %edi, %edx / <result>, <result> 538*5d9d9091SRichard Lowe popl %edi 539*5d9d9091SRichard Lowe popl %ebp 540*5d9d9091SRichard Lowe ret 541*5d9d9091SRichard Lowe .align 16 542*5d9d9091SRichard Lowe.LL32: 543*5d9d9091SRichard Lowe movb %al, %cl 544*5d9d9091SRichard Lowe movl 56(%esp), %esi / y, 545*5d9d9091SRichard Lowe movl 60(%esp), %edi / y, 546*5d9d9091SRichard Lowe shldl %esi, %edi 547*5d9d9091SRichard Lowe sall %cl, %esi 548*5d9d9091SRichard Lowe andl $32, %ecx 549*5d9d9091SRichard Lowe jne .LL43 550*5d9d9091SRichard Lowe.LL40: 551*5d9d9091SRichard Lowe movl $32, %ecx /, tmp96 552*5d9d9091SRichard Lowe subl %eax, %ecx / normshift, tmp96 553*5d9d9091SRichard Lowe movl %edi, %edx 554*5d9d9091SRichard Lowe movl %edi, 20(%esp) /, dt 555*5d9d9091SRichard Lowe movl 24(%esp), %ebp /, x2 556*5d9d9091SRichard Lowe xorl %edi, %edi 557*5d9d9091SRichard Lowe shrl %cl, %ebp / tmp96, x2 558*5d9d9091SRichard Lowe movl %esi, 16(%esp) /, dt 559*5d9d9091SRichard Lowe movb %al, %cl 560*5d9d9091SRichard Lowe movl 32(%esp), %esi / x, dt 561*5d9d9091SRichard Lowe movl %edi, 12(%esp) 562*5d9d9091SRichard Lowe movl 36(%esp), %edi / x, dt 563*5d9d9091SRichard Lowe shldl %esi, %edi /, dt, dt 564*5d9d9091SRichard Lowe sall %cl, %esi /, dt 565*5d9d9091SRichard Lowe andl $32, %ecx 566*5d9d9091SRichard Lowe movl %edx, 8(%esp) 567*5d9d9091SRichard Lowe je .LL41 568*5d9d9091SRichard Lowe movl %esi, %edi / dt, dt 569*5d9d9091SRichard Lowe xorl %esi, %esi / dt 570*5d9d9091SRichard Lowe.LL41: 571*5d9d9091SRichard Lowe xorl %ecx, %ecx 572*5d9d9091SRichard Lowe movl %edi, %eax / tmp1, 573*5d9d9091SRichard Lowe movl %ebp, %edx / x2, 574*5d9d9091SRichard Lowe divl 8(%esp) 575*5d9d9091SRichard Lowe movl %edx, %ebp /, x1 576*5d9d9091SRichard Lowe movl %ecx, 4(%esp) 577*5d9d9091SRichard Lowe movl %eax, %ecx /, q0 578*5d9d9091SRichard Lowe movl 16(%esp), %eax / dt, 579*5d9d9091SRichard Lowe mull %ecx / q0 580*5d9d9091SRichard Lowe cmpl %ebp, %edx / x1, t1 581*5d9d9091SRichard Lowe movl %edi, (%esp) 582*5d9d9091SRichard Lowe movl %esi, %edi / dt, x0 583*5d9d9091SRichard Lowe ja .LL38 584*5d9d9091SRichard Lowe je .LL44 585*5d9d9091SRichard Lowe.LL39: 586*5d9d9091SRichard Lowe movl %ecx, %esi / q0, <result> 587*5d9d9091SRichard Lowe.LL46: 588*5d9d9091SRichard Lowe xorl %edi, %edi / <result> 589*5d9d9091SRichard Lowe jmp .LL45 590*5d9d9091SRichard Lowe.LL44: 591*5d9d9091SRichard Lowe cmpl %edi, %eax / x0, t0 592*5d9d9091SRichard Lowe jbe .LL39 593*5d9d9091SRichard Lowe.LL38: 594*5d9d9091SRichard Lowe decl %ecx / q0 595*5d9d9091SRichard Lowe movl %ecx, %esi / q0, <result> 596*5d9d9091SRichard Lowe jmp .LL46 597*5d9d9091SRichard Lowe.LL43: 598*5d9d9091SRichard Lowe movl %esi, %edi 599*5d9d9091SRichard Lowe xorl %esi, %esi 600*5d9d9091SRichard Lowe jmp .LL40 601*5d9d9091SRichard Lowe SET_SIZE(UDiv) 602*5d9d9091SRichard Lowe 603*5d9d9091SRichard Lowe/* 604*5d9d9091SRichard Lowe * __udiv64 605*5d9d9091SRichard Lowe * 606*5d9d9091SRichard Lowe * Perform division of two unsigned 64-bit quantities, returning the 607*5d9d9091SRichard Lowe * quotient in %edx:%eax. __udiv64 pops the arguments on return, 608*5d9d9091SRichard Lowe */ 609*5d9d9091SRichard Lowe ENTRY(__udiv64) 610*5d9d9091SRichard Lowe movl 4(%esp), %eax / x, x 611*5d9d9091SRichard Lowe movl 8(%esp), %edx / x, x 612*5d9d9091SRichard Lowe pushl 16(%esp) / y 613*5d9d9091SRichard Lowe pushl 16(%esp) 614*5d9d9091SRichard Lowe call UDiv 615*5d9d9091SRichard Lowe addl $8, %esp 616*5d9d9091SRichard Lowe ret $16 617*5d9d9091SRichard Lowe SET_SIZE(__udiv64) 618*5d9d9091SRichard Lowe 619*5d9d9091SRichard Lowe/* 620*5d9d9091SRichard Lowe * __urem64 621*5d9d9091SRichard Lowe * 622*5d9d9091SRichard Lowe * Perform division of two unsigned 64-bit quantities, returning the 623*5d9d9091SRichard Lowe * remainder in %edx:%eax. __urem64 pops the arguments on return 624*5d9d9091SRichard Lowe */ 625*5d9d9091SRichard Lowe ENTRY(__urem64) 626*5d9d9091SRichard Lowe subl $12, %esp 627*5d9d9091SRichard Lowe movl %esp, %ecx /, tmp65 628*5d9d9091SRichard Lowe movl 16(%esp), %eax / x, x 629*5d9d9091SRichard Lowe movl 20(%esp), %edx / x, x 630*5d9d9091SRichard Lowe pushl %ecx / tmp65 631*5d9d9091SRichard Lowe pushl 32(%esp) / y 632*5d9d9091SRichard Lowe pushl 32(%esp) 633*5d9d9091SRichard Lowe call UDivRem 634*5d9d9091SRichard Lowe movl 12(%esp), %eax / rem, rem 635*5d9d9091SRichard Lowe movl 16(%esp), %edx / rem, rem 636*5d9d9091SRichard Lowe addl $24, %esp 637*5d9d9091SRichard Lowe ret $16 638*5d9d9091SRichard Lowe SET_SIZE(__urem64) 639*5d9d9091SRichard Lowe 640*5d9d9091SRichard Lowe/* 641*5d9d9091SRichard Lowe * __div64 642*5d9d9091SRichard Lowe * 643*5d9d9091SRichard Lowe * Perform division of two signed 64-bit quantities, returning the 644*5d9d9091SRichard Lowe * quotient in %edx:%eax. __div64 pops the arguments on return. 645*5d9d9091SRichard Lowe */ 646*5d9d9091SRichard Lowe/ int64_t 647*5d9d9091SRichard Lowe/ __div64(int64_t x, int64_t y) 648*5d9d9091SRichard Lowe/ { 649*5d9d9091SRichard Lowe/ int negative; 650*5d9d9091SRichard Lowe/ uint64_t xt, yt, r; 651*5d9d9091SRichard Lowe/ 652*5d9d9091SRichard Lowe/ if (x < 0) { 653*5d9d9091SRichard Lowe/ xt = -(uint64_t) x; 654*5d9d9091SRichard Lowe/ negative = 1; 655*5d9d9091SRichard Lowe/ } else { 656*5d9d9091SRichard Lowe/ xt = x; 657*5d9d9091SRichard Lowe/ negative = 0; 658*5d9d9091SRichard Lowe/ } 659*5d9d9091SRichard Lowe/ if (y < 0) { 660*5d9d9091SRichard Lowe/ yt = -(uint64_t) y; 661*5d9d9091SRichard Lowe/ negative ^= 1; 662*5d9d9091SRichard Lowe/ } else { 663*5d9d9091SRichard Lowe/ yt = y; 664*5d9d9091SRichard Lowe/ } 665*5d9d9091SRichard Lowe/ r = UDiv(xt, yt); 666*5d9d9091SRichard Lowe/ return (negative ? (int64_t) - r : r); 667*5d9d9091SRichard Lowe/ } 668*5d9d9091SRichard Lowe ENTRY(__div64) 669*5d9d9091SRichard Lowe pushl %ebp 670*5d9d9091SRichard Lowe pushl %edi 671*5d9d9091SRichard Lowe pushl %esi 672*5d9d9091SRichard Lowe subl $8, %esp 673*5d9d9091SRichard Lowe movl 28(%esp), %edx / x, x 674*5d9d9091SRichard Lowe testl %edx, %edx / x 675*5d9d9091SRichard Lowe movl 24(%esp), %eax / x, x 676*5d9d9091SRichard Lowe movl 32(%esp), %esi / y, y 677*5d9d9091SRichard Lowe movl 36(%esp), %edi / y, y 678*5d9d9091SRichard Lowe js .LL84 679*5d9d9091SRichard Lowe xorl %ebp, %ebp / negative 680*5d9d9091SRichard Lowe testl %edi, %edi / y 681*5d9d9091SRichard Lowe movl %eax, (%esp) / x, xt 682*5d9d9091SRichard Lowe movl %edx, 4(%esp) / x, xt 683*5d9d9091SRichard Lowe movl %esi, %eax / y, yt 684*5d9d9091SRichard Lowe movl %edi, %edx / y, yt 685*5d9d9091SRichard Lowe js .LL85 686*5d9d9091SRichard Lowe.LL82: 687*5d9d9091SRichard Lowe pushl %edx / yt 688*5d9d9091SRichard Lowe pushl %eax / yt 689*5d9d9091SRichard Lowe movl 8(%esp), %eax / xt, xt 690*5d9d9091SRichard Lowe movl 12(%esp), %edx / xt, xt 691*5d9d9091SRichard Lowe call UDiv 692*5d9d9091SRichard Lowe popl %ecx 693*5d9d9091SRichard Lowe testl %ebp, %ebp / negative 694*5d9d9091SRichard Lowe popl %esi 695*5d9d9091SRichard Lowe je .LL83 696*5d9d9091SRichard Lowe negl %eax / r 697*5d9d9091SRichard Lowe adcl $0, %edx /, r 698*5d9d9091SRichard Lowe negl %edx / r 699*5d9d9091SRichard Lowe.LL83: 700*5d9d9091SRichard Lowe addl $8, %esp 701*5d9d9091SRichard Lowe popl %esi 702*5d9d9091SRichard Lowe popl %edi 703*5d9d9091SRichard Lowe popl %ebp 704*5d9d9091SRichard Lowe ret $16 705*5d9d9091SRichard Lowe .align 16 706*5d9d9091SRichard Lowe.LL84: 707*5d9d9091SRichard Lowe negl %eax / x 708*5d9d9091SRichard Lowe adcl $0, %edx /, x 709*5d9d9091SRichard Lowe negl %edx / x 710*5d9d9091SRichard Lowe testl %edi, %edi / y 711*5d9d9091SRichard Lowe movl %eax, (%esp) / x, xt 712*5d9d9091SRichard Lowe movl %edx, 4(%esp) / x, xt 713*5d9d9091SRichard Lowe movl $1, %ebp /, negative 714*5d9d9091SRichard Lowe movl %esi, %eax / y, yt 715*5d9d9091SRichard Lowe movl %edi, %edx / y, yt 716*5d9d9091SRichard Lowe jns .LL82 717*5d9d9091SRichard Lowe .align 16 718*5d9d9091SRichard Lowe.LL85: 719*5d9d9091SRichard Lowe negl %eax / yt 720*5d9d9091SRichard Lowe adcl $0, %edx /, yt 721*5d9d9091SRichard Lowe negl %edx / yt 722*5d9d9091SRichard Lowe xorl $1, %ebp /, negative 723*5d9d9091SRichard Lowe jmp .LL82 724*5d9d9091SRichard Lowe SET_SIZE(__div64) 725*5d9d9091SRichard Lowe 726*5d9d9091SRichard Lowe/* 727*5d9d9091SRichard Lowe * __rem64 728*5d9d9091SRichard Lowe * 729*5d9d9091SRichard Lowe * Perform division of two signed 64-bit quantities, returning the 730*5d9d9091SRichard Lowe * remainder in %edx:%eax. __rem64 pops the arguments on return. 731*5d9d9091SRichard Lowe */ 732*5d9d9091SRichard Lowe/ int64_t 733*5d9d9091SRichard Lowe/ __rem64(int64_t x, int64_t y) 734*5d9d9091SRichard Lowe/ { 735*5d9d9091SRichard Lowe/ uint64_t xt, yt, rem; 736*5d9d9091SRichard Lowe/ 737*5d9d9091SRichard Lowe/ if (x < 0) { 738*5d9d9091SRichard Lowe/ xt = -(uint64_t) x; 739*5d9d9091SRichard Lowe/ } else { 740*5d9d9091SRichard Lowe/ xt = x; 741*5d9d9091SRichard Lowe/ } 742*5d9d9091SRichard Lowe/ if (y < 0) { 743*5d9d9091SRichard Lowe/ yt = -(uint64_t) y; 744*5d9d9091SRichard Lowe/ } else { 745*5d9d9091SRichard Lowe/ yt = y; 746*5d9d9091SRichard Lowe/ } 747*5d9d9091SRichard Lowe/ (void) UDivRem(xt, yt, &rem); 748*5d9d9091SRichard Lowe/ return (x < 0 ? (int64_t) - rem : rem); 749*5d9d9091SRichard Lowe/ } 750*5d9d9091SRichard Lowe ENTRY(__rem64) 751*5d9d9091SRichard Lowe pushl %edi 752*5d9d9091SRichard Lowe pushl %esi 753*5d9d9091SRichard Lowe subl $20, %esp 754*5d9d9091SRichard Lowe movl 36(%esp), %ecx / x, 755*5d9d9091SRichard Lowe movl 32(%esp), %esi / x, 756*5d9d9091SRichard Lowe movl 36(%esp), %edi / x, 757*5d9d9091SRichard Lowe testl %ecx, %ecx 758*5d9d9091SRichard Lowe movl 40(%esp), %eax / y, y 759*5d9d9091SRichard Lowe movl 44(%esp), %edx / y, y 760*5d9d9091SRichard Lowe movl %esi, (%esp) /, xt 761*5d9d9091SRichard Lowe movl %edi, 4(%esp) /, xt 762*5d9d9091SRichard Lowe js .LL92 763*5d9d9091SRichard Lowe testl %edx, %edx / y 764*5d9d9091SRichard Lowe movl %eax, %esi / y, yt 765*5d9d9091SRichard Lowe movl %edx, %edi / y, yt 766*5d9d9091SRichard Lowe js .LL93 767*5d9d9091SRichard Lowe.LL90: 768*5d9d9091SRichard Lowe leal 8(%esp), %eax /, tmp66 769*5d9d9091SRichard Lowe pushl %eax / tmp66 770*5d9d9091SRichard Lowe pushl %edi / yt 771*5d9d9091SRichard Lowe pushl %esi / yt 772*5d9d9091SRichard Lowe movl 12(%esp), %eax / xt, xt 773*5d9d9091SRichard Lowe movl 16(%esp), %edx / xt, xt 774*5d9d9091SRichard Lowe call UDivRem 775*5d9d9091SRichard Lowe addl $12, %esp 776*5d9d9091SRichard Lowe movl 36(%esp), %edi / x, 777*5d9d9091SRichard Lowe testl %edi, %edi 778*5d9d9091SRichard Lowe movl 8(%esp), %eax / rem, rem 779*5d9d9091SRichard Lowe movl 12(%esp), %edx / rem, rem 780*5d9d9091SRichard Lowe js .LL94 781*5d9d9091SRichard Lowe addl $20, %esp 782*5d9d9091SRichard Lowe popl %esi 783*5d9d9091SRichard Lowe popl %edi 784*5d9d9091SRichard Lowe ret $16 785*5d9d9091SRichard Lowe .align 16 786*5d9d9091SRichard Lowe.LL92: 787*5d9d9091SRichard Lowe negl %esi 788*5d9d9091SRichard Lowe adcl $0, %edi 789*5d9d9091SRichard Lowe negl %edi 790*5d9d9091SRichard Lowe testl %edx, %edx / y 791*5d9d9091SRichard Lowe movl %esi, (%esp) /, xt 792*5d9d9091SRichard Lowe movl %edi, 4(%esp) /, xt 793*5d9d9091SRichard Lowe movl %eax, %esi / y, yt 794*5d9d9091SRichard Lowe movl %edx, %edi / y, yt 795*5d9d9091SRichard Lowe jns .LL90 796*5d9d9091SRichard Lowe .align 16 797*5d9d9091SRichard Lowe.LL93: 798*5d9d9091SRichard Lowe negl %esi / yt 799*5d9d9091SRichard Lowe adcl $0, %edi /, yt 800*5d9d9091SRichard Lowe negl %edi / yt 801*5d9d9091SRichard Lowe jmp .LL90 802*5d9d9091SRichard Lowe .align 16 803*5d9d9091SRichard Lowe.LL94: 804*5d9d9091SRichard Lowe negl %eax / rem 805*5d9d9091SRichard Lowe adcl $0, %edx /, rem 806*5d9d9091SRichard Lowe addl $20, %esp 807*5d9d9091SRichard Lowe popl %esi 808*5d9d9091SRichard Lowe negl %edx / rem 809*5d9d9091SRichard Lowe popl %edi 810*5d9d9091SRichard Lowe ret $16 811*5d9d9091SRichard Lowe SET_SIZE(__rem64) 812*5d9d9091SRichard Lowe 813*5d9d9091SRichard Lowe/* 814*5d9d9091SRichard Lowe * __udivrem64 815*5d9d9091SRichard Lowe * 816*5d9d9091SRichard Lowe * Perform division of two unsigned 64-bit quantities, returning the 817*5d9d9091SRichard Lowe * quotient in %edx:%eax, and the remainder in %ecx:%esi. __udivrem64 818*5d9d9091SRichard Lowe * pops the arguments on return. 819*5d9d9091SRichard Lowe */ 820*5d9d9091SRichard Lowe ENTRY(__udivrem64) 821*5d9d9091SRichard Lowe subl $12, %esp 822*5d9d9091SRichard Lowe movl %esp, %ecx /, tmp64 823*5d9d9091SRichard Lowe movl 16(%esp), %eax / x, x 824*5d9d9091SRichard Lowe movl 20(%esp), %edx / x, x 825*5d9d9091SRichard Lowe pushl %ecx / tmp64 826*5d9d9091SRichard Lowe pushl 32(%esp) / y 827*5d9d9091SRichard Lowe pushl 32(%esp) 828*5d9d9091SRichard Lowe call UDivRem 829*5d9d9091SRichard Lowe movl 16(%esp), %ecx / rem, tmp63 830*5d9d9091SRichard Lowe movl 12(%esp), %esi / rem 831*5d9d9091SRichard Lowe addl $24, %esp 832*5d9d9091SRichard Lowe ret $16 833*5d9d9091SRichard Lowe SET_SIZE(__udivrem64) 834*5d9d9091SRichard Lowe 835*5d9d9091SRichard Lowe/* 836*5d9d9091SRichard Lowe * Signed division with remainder. 837*5d9d9091SRichard Lowe */ 838*5d9d9091SRichard Lowe/ int64_t 839*5d9d9091SRichard Lowe/ SDivRem(int64_t x, int64_t y, int64_t * pmod) 840*5d9d9091SRichard Lowe/ { 841*5d9d9091SRichard Lowe/ int negative; 842*5d9d9091SRichard Lowe/ uint64_t xt, yt, r, rem; 843*5d9d9091SRichard Lowe/ 844*5d9d9091SRichard Lowe/ if (x < 0) { 845*5d9d9091SRichard Lowe/ xt = -(uint64_t) x; 846*5d9d9091SRichard Lowe/ negative = 1; 847*5d9d9091SRichard Lowe/ } else { 848*5d9d9091SRichard Lowe/ xt = x; 849*5d9d9091SRichard Lowe/ negative = 0; 850*5d9d9091SRichard Lowe/ } 851*5d9d9091SRichard Lowe/ if (y < 0) { 852*5d9d9091SRichard Lowe/ yt = -(uint64_t) y; 853*5d9d9091SRichard Lowe/ negative ^= 1; 854*5d9d9091SRichard Lowe/ } else { 855*5d9d9091SRichard Lowe/ yt = y; 856*5d9d9091SRichard Lowe/ } 857*5d9d9091SRichard Lowe/ r = UDivRem(xt, yt, &rem); 858*5d9d9091SRichard Lowe/ *pmod = (x < 0 ? (int64_t) - rem : rem); 859*5d9d9091SRichard Lowe/ return (negative ? (int64_t) - r : r); 860*5d9d9091SRichard Lowe/ } 861*5d9d9091SRichard Lowe ENTRY(SDivRem) 862*5d9d9091SRichard Lowe pushl %ebp 863*5d9d9091SRichard Lowe pushl %edi 864*5d9d9091SRichard Lowe pushl %esi 865*5d9d9091SRichard Lowe subl $24, %esp 866*5d9d9091SRichard Lowe testl %edx, %edx / x 867*5d9d9091SRichard Lowe movl %edx, %edi / x, x 868*5d9d9091SRichard Lowe js .LL73 869*5d9d9091SRichard Lowe movl 44(%esp), %esi / y, 870*5d9d9091SRichard Lowe xorl %ebp, %ebp / negative 871*5d9d9091SRichard Lowe testl %esi, %esi 872*5d9d9091SRichard Lowe movl %edx, 12(%esp) / x, xt 873*5d9d9091SRichard Lowe movl %eax, 8(%esp) / x, xt 874*5d9d9091SRichard Lowe movl 40(%esp), %edx / y, yt 875*5d9d9091SRichard Lowe movl 44(%esp), %ecx / y, yt 876*5d9d9091SRichard Lowe js .LL74 877*5d9d9091SRichard Lowe.LL70: 878*5d9d9091SRichard Lowe leal 16(%esp), %eax /, tmp70 879*5d9d9091SRichard Lowe pushl %eax / tmp70 880*5d9d9091SRichard Lowe pushl %ecx / yt 881*5d9d9091SRichard Lowe pushl %edx / yt 882*5d9d9091SRichard Lowe movl 20(%esp), %eax / xt, xt 883*5d9d9091SRichard Lowe movl 24(%esp), %edx / xt, xt 884*5d9d9091SRichard Lowe call UDivRem 885*5d9d9091SRichard Lowe movl %edx, 16(%esp) /, r 886*5d9d9091SRichard Lowe movl %eax, 12(%esp) /, r 887*5d9d9091SRichard Lowe addl $12, %esp 888*5d9d9091SRichard Lowe testl %edi, %edi / x 889*5d9d9091SRichard Lowe movl 16(%esp), %edx / rem, rem 890*5d9d9091SRichard Lowe movl 20(%esp), %ecx / rem, rem 891*5d9d9091SRichard Lowe js .LL75 892*5d9d9091SRichard Lowe.LL71: 893*5d9d9091SRichard Lowe movl 48(%esp), %edi / pmod, pmod 894*5d9d9091SRichard Lowe testl %ebp, %ebp / negative 895*5d9d9091SRichard Lowe movl %edx, (%edi) / rem,* pmod 896*5d9d9091SRichard Lowe movl %ecx, 4(%edi) / rem, 897*5d9d9091SRichard Lowe movl (%esp), %eax / r, r 898*5d9d9091SRichard Lowe movl 4(%esp), %edx / r, r 899*5d9d9091SRichard Lowe je .LL72 900*5d9d9091SRichard Lowe negl %eax / r 901*5d9d9091SRichard Lowe adcl $0, %edx /, r 902*5d9d9091SRichard Lowe negl %edx / r 903*5d9d9091SRichard Lowe.LL72: 904*5d9d9091SRichard Lowe addl $24, %esp 905*5d9d9091SRichard Lowe popl %esi 906*5d9d9091SRichard Lowe popl %edi 907*5d9d9091SRichard Lowe popl %ebp 908*5d9d9091SRichard Lowe ret 909*5d9d9091SRichard Lowe .align 16 910*5d9d9091SRichard Lowe.LL73: 911*5d9d9091SRichard Lowe negl %eax 912*5d9d9091SRichard Lowe adcl $0, %edx 913*5d9d9091SRichard Lowe movl 44(%esp), %esi / y, 914*5d9d9091SRichard Lowe negl %edx 915*5d9d9091SRichard Lowe testl %esi, %esi 916*5d9d9091SRichard Lowe movl %edx, 12(%esp) /, xt 917*5d9d9091SRichard Lowe movl %eax, 8(%esp) /, xt 918*5d9d9091SRichard Lowe movl $1, %ebp /, negative 919*5d9d9091SRichard Lowe movl 40(%esp), %edx / y, yt 920*5d9d9091SRichard Lowe movl 44(%esp), %ecx / y, yt 921*5d9d9091SRichard Lowe jns .LL70 922*5d9d9091SRichard Lowe .align 16 923*5d9d9091SRichard Lowe.LL74: 924*5d9d9091SRichard Lowe negl %edx / yt 925*5d9d9091SRichard Lowe adcl $0, %ecx /, yt 926*5d9d9091SRichard Lowe negl %ecx / yt 927*5d9d9091SRichard Lowe xorl $1, %ebp /, negative 928*5d9d9091SRichard Lowe jmp .LL70 929*5d9d9091SRichard Lowe .align 16 930*5d9d9091SRichard Lowe.LL75: 931*5d9d9091SRichard Lowe negl %edx / rem 932*5d9d9091SRichard Lowe adcl $0, %ecx /, rem 933*5d9d9091SRichard Lowe negl %ecx / rem 934*5d9d9091SRichard Lowe jmp .LL71 935*5d9d9091SRichard Lowe SET_SIZE(SDivRem) 936*5d9d9091SRichard Lowe 937*5d9d9091SRichard Lowe/* 938*5d9d9091SRichard Lowe * __divrem64 939*5d9d9091SRichard Lowe * 940*5d9d9091SRichard Lowe * Perform division of two signed 64-bit quantities, returning the 941*5d9d9091SRichard Lowe * quotient in %edx:%eax, and the remainder in %ecx:%esi. __divrem64 942*5d9d9091SRichard Lowe * pops the arguments on return. 943*5d9d9091SRichard Lowe */ 944*5d9d9091SRichard Lowe ENTRY(__divrem64) 945*5d9d9091SRichard Lowe subl $20, %esp 946*5d9d9091SRichard Lowe movl %esp, %ecx /, tmp64 947*5d9d9091SRichard Lowe movl 24(%esp), %eax / x, x 948*5d9d9091SRichard Lowe movl 28(%esp), %edx / x, x 949*5d9d9091SRichard Lowe pushl %ecx / tmp64 950*5d9d9091SRichard Lowe pushl 40(%esp) / y 951*5d9d9091SRichard Lowe pushl 40(%esp) 952*5d9d9091SRichard Lowe call SDivRem 953*5d9d9091SRichard Lowe movl 16(%esp), %ecx 954*5d9d9091SRichard Lowe movl 12(%esp),%esi / rem 955*5d9d9091SRichard Lowe addl $32, %esp 956*5d9d9091SRichard Lowe ret $16 957*5d9d9091SRichard Lowe SET_SIZE(__divrem64) 958