1*5d9d9091SRichard Lowe/* 2*5d9d9091SRichard Lowe * CDDL HEADER START 3*5d9d9091SRichard Lowe * 4*5d9d9091SRichard Lowe * The contents of this file are subject to the terms of the 5*5d9d9091SRichard Lowe * Common Development and Distribution License, Version 1.0 only 6*5d9d9091SRichard Lowe * (the "License"). You may not use this file except in compliance 7*5d9d9091SRichard Lowe * with the License. 8*5d9d9091SRichard Lowe * 9*5d9d9091SRichard Lowe * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10*5d9d9091SRichard Lowe * or http://www.opensolaris.org/os/licensing. 11*5d9d9091SRichard Lowe * See the License for the specific language governing permissions 12*5d9d9091SRichard Lowe * and limitations under the License. 13*5d9d9091SRichard Lowe * 14*5d9d9091SRichard Lowe * When distributing Covered Code, include this CDDL HEADER in each 15*5d9d9091SRichard Lowe * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16*5d9d9091SRichard Lowe * If applicable, add the following below this CDDL HEADER, with the 17*5d9d9091SRichard Lowe * fields enclosed by brackets "[]" replaced with your own identifying 18*5d9d9091SRichard Lowe * information: Portions Copyright [yyyy] [name of copyright owner] 19*5d9d9091SRichard Lowe * 20*5d9d9091SRichard Lowe * CDDL HEADER END 21*5d9d9091SRichard Lowe */ 22*5d9d9091SRichard Lowe/* 23*5d9d9091SRichard Lowe * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24*5d9d9091SRichard Lowe * Use is subject to license terms. 25*5d9d9091SRichard Lowe */ 26*5d9d9091SRichard Lowe 27*5d9d9091SRichard Lowe#if !defined(lint) 28*5d9d9091SRichard Lowe .ident "%Z%%M% %I% %E% SMI" 29*5d9d9091SRichard Lowe 30*5d9d9091SRichard Lowe .file "muldiv.s" 31*5d9d9091SRichard Lowe#endif 32*5d9d9091SRichard Lowe 33*5d9d9091SRichard Lowe#if defined(__i386) && !defined(__amd64) 34*5d9d9091SRichard Lowe 35*5d9d9091SRichard Lowe/* 36*5d9d9091SRichard Lowe * Helper routines for 32-bit compilers to perform 64-bit math. 37*5d9d9091SRichard Lowe * These are used both by the Sun and GCC compilers. 38*5d9d9091SRichard Lowe */ 39*5d9d9091SRichard Lowe 40*5d9d9091SRichard Lowe#include <sys/asm_linkage.h> 41*5d9d9091SRichard Lowe#include <sys/asm_misc.h> 42*5d9d9091SRichard Lowe 43*5d9d9091SRichard Lowe 44*5d9d9091SRichard Lowe#if defined(__lint) 45*5d9d9091SRichard Lowe#include <sys/types.h> 46*5d9d9091SRichard Lowe 47*5d9d9091SRichard Lowe/* ARGSUSED */ 48*5d9d9091SRichard Loweint64_t 49*5d9d9091SRichard Lowe__mul64(int64_t a, int64_t b) 50*5d9d9091SRichard Lowe{ 51*5d9d9091SRichard Lowe return (0); 52*5d9d9091SRichard Lowe} 53*5d9d9091SRichard Lowe 54*5d9d9091SRichard Lowe#else /* __lint */ 55*5d9d9091SRichard Lowe 56*5d9d9091SRichard Lowe/ 57*5d9d9091SRichard Lowe/ function __mul64(A,B:Longint):Longint; 58*5d9d9091SRichard Lowe/ {Overflow is not checked} 59*5d9d9091SRichard Lowe/ 60*5d9d9091SRichard Lowe/ We essentially do multiply by longhand, using base 2**32 digits. 61*5d9d9091SRichard Lowe/ a b parameter A 62*5d9d9091SRichard Lowe/ x c d parameter B 63*5d9d9091SRichard Lowe/ --------- 64*5d9d9091SRichard Lowe/ ad bd 65*5d9d9091SRichard Lowe/ ac bc 66*5d9d9091SRichard Lowe/ ----------------- 67*5d9d9091SRichard Lowe/ ac ad+bc bd 68*5d9d9091SRichard Lowe/ 69*5d9d9091SRichard Lowe/ We can ignore ac and top 32 bits of ad+bc: if <> 0, overflow happened. 70*5d9d9091SRichard Lowe/ 71*5d9d9091SRichard Lowe ENTRY(__mul64) 72*5d9d9091SRichard Lowe push %ebp 73*5d9d9091SRichard Lowe mov %esp,%ebp 74*5d9d9091SRichard Lowe pushl %esi 75*5d9d9091SRichard Lowe mov 12(%ebp),%eax / A.hi (a) 76*5d9d9091SRichard Lowe mull 16(%ebp) / Multiply A.hi by B.lo (produces ad) 77*5d9d9091SRichard Lowe xchg %ecx,%eax / ecx = bottom half of ad. 78*5d9d9091SRichard Lowe movl 8(%ebp),%eax / A.Lo (b) 79*5d9d9091SRichard Lowe movl %eax,%esi / Save A.lo for later 80*5d9d9091SRichard Lowe mull 16(%ebp) / Multiply A.Lo by B.LO (dx:ax = bd.) 81*5d9d9091SRichard Lowe addl %edx,%ecx / cx is ad 82*5d9d9091SRichard Lowe xchg %eax,%esi / esi is bd, eax = A.lo (d) 83*5d9d9091SRichard Lowe mull 20(%ebp) / Multiply A.lo * B.hi (producing bc) 84*5d9d9091SRichard Lowe addl %ecx,%eax / Produce ad+bc 85*5d9d9091SRichard Lowe movl %esi,%edx 86*5d9d9091SRichard Lowe xchg %eax,%edx 87*5d9d9091SRichard Lowe popl %esi 88*5d9d9091SRichard Lowe movl %ebp,%esp 89*5d9d9091SRichard Lowe popl %ebp 90*5d9d9091SRichard Lowe ret $16 91*5d9d9091SRichard Lowe SET_SIZE(__mul64) 92*5d9d9091SRichard Lowe 93*5d9d9091SRichard Lowe#endif /* __lint */ 94*5d9d9091SRichard Lowe 95*5d9d9091SRichard Lowe/* 96*5d9d9091SRichard Lowe * C support for 64-bit modulo and division. 97*5d9d9091SRichard Lowe * Hand-customized compiler output - see comments for details. 98*5d9d9091SRichard Lowe */ 99*5d9d9091SRichard Lowe#if defined(__lint) 100*5d9d9091SRichard Lowe 101*5d9d9091SRichard Lowe/* ARGSUSED */ 102*5d9d9091SRichard Loweuint64_t 103*5d9d9091SRichard Lowe__udiv64(uint64_t a, uint64_t b) 104*5d9d9091SRichard Lowe{ return (0); } 105*5d9d9091SRichard Lowe 106*5d9d9091SRichard Lowe/* ARGSUSED */ 107*5d9d9091SRichard Loweuint64_t 108*5d9d9091SRichard Lowe__urem64(int64_t a, int64_t b) 109*5d9d9091SRichard Lowe{ return (0); } 110*5d9d9091SRichard Lowe 111*5d9d9091SRichard Lowe/* ARGSUSED */ 112*5d9d9091SRichard Loweint64_t 113*5d9d9091SRichard Lowe__div64(int64_t a, int64_t b) 114*5d9d9091SRichard Lowe{ return (0); } 115*5d9d9091SRichard Lowe 116*5d9d9091SRichard Lowe/* ARGSUSED */ 117*5d9d9091SRichard Loweint64_t 118*5d9d9091SRichard Lowe__rem64(int64_t a, int64_t b) 119*5d9d9091SRichard Lowe{ return (0); } 120*5d9d9091SRichard Lowe 121*5d9d9091SRichard Lowe#else /* __lint */ 122*5d9d9091SRichard Lowe 123*5d9d9091SRichard Lowe/ /* 124*5d9d9091SRichard Lowe/ * Unsigned division with remainder. 125*5d9d9091SRichard Lowe/ * Divide two uint64_ts, and calculate remainder. 126*5d9d9091SRichard Lowe/ */ 127*5d9d9091SRichard Lowe/ uint64_t 128*5d9d9091SRichard Lowe/ UDivRem(uint64_t x, uint64_t y, uint64_t * pmod) 129*5d9d9091SRichard Lowe/ { 130*5d9d9091SRichard Lowe/ /* simple cases: y is a single uint32_t */ 131*5d9d9091SRichard Lowe/ if (HI(y) == 0) { 132*5d9d9091SRichard Lowe/ uint32_t div_hi, div_rem; 133*5d9d9091SRichard Lowe/ uint32_t q0, q1; 134*5d9d9091SRichard Lowe/ 135*5d9d9091SRichard Lowe/ /* calculate q1 */ 136*5d9d9091SRichard Lowe/ if (HI(x) < LO(y)) { 137*5d9d9091SRichard Lowe/ /* result is a single uint32_t, use one division */ 138*5d9d9091SRichard Lowe/ q1 = 0; 139*5d9d9091SRichard Lowe/ div_hi = HI(x); 140*5d9d9091SRichard Lowe/ } else { 141*5d9d9091SRichard Lowe/ /* result is a double uint32_t, use two divisions */ 142*5d9d9091SRichard Lowe/ A_DIV32(HI(x), 0, LO(y), q1, div_hi); 143*5d9d9091SRichard Lowe/ } 144*5d9d9091SRichard Lowe/ 145*5d9d9091SRichard Lowe/ /* calculate q0 and remainder */ 146*5d9d9091SRichard Lowe/ A_DIV32(LO(x), div_hi, LO(y), q0, div_rem); 147*5d9d9091SRichard Lowe/ 148*5d9d9091SRichard Lowe/ /* return remainder */ 149*5d9d9091SRichard Lowe/ *pmod = div_rem; 150*5d9d9091SRichard Lowe/ 151*5d9d9091SRichard Lowe/ /* return result */ 152*5d9d9091SRichard Lowe/ return (HILO(q1, q0)); 153*5d9d9091SRichard Lowe/ 154*5d9d9091SRichard Lowe/ } else if (HI(x) < HI(y)) { 155*5d9d9091SRichard Lowe/ /* HI(x) < HI(y) => x < y => result is 0 */ 156*5d9d9091SRichard Lowe/ 157*5d9d9091SRichard Lowe/ /* return remainder */ 158*5d9d9091SRichard Lowe/ *pmod = x; 159*5d9d9091SRichard Lowe/ 160*5d9d9091SRichard Lowe/ /* return result */ 161*5d9d9091SRichard Lowe/ return (0); 162*5d9d9091SRichard Lowe/ 163*5d9d9091SRichard Lowe/ } else { 164*5d9d9091SRichard Lowe/ /* 165*5d9d9091SRichard Lowe/ * uint64_t by uint64_t division, resulting in a one-uint32_t 166*5d9d9091SRichard Lowe/ * result 167*5d9d9091SRichard Lowe/ */ 168*5d9d9091SRichard Lowe/ uint32_t y0, y1; 169*5d9d9091SRichard Lowe/ uint32_t x1, x0; 170*5d9d9091SRichard Lowe/ uint32_t q0; 171*5d9d9091SRichard Lowe/ uint32_t normshift; 172*5d9d9091SRichard Lowe/ 173*5d9d9091SRichard Lowe/ /* normalize by shifting x and y so MSB(y) == 1 */ 174*5d9d9091SRichard Lowe/ HIBIT(HI(y), normshift); /* index of highest 1 bit */ 175*5d9d9091SRichard Lowe/ normshift = 31 - normshift; 176*5d9d9091SRichard Lowe/ 177*5d9d9091SRichard Lowe/ if (normshift == 0) { 178*5d9d9091SRichard Lowe/ /* no shifting needed, and x < 2*y so q <= 1 */ 179*5d9d9091SRichard Lowe/ y1 = HI(y); 180*5d9d9091SRichard Lowe/ y0 = LO(y); 181*5d9d9091SRichard Lowe/ x1 = HI(x); 182*5d9d9091SRichard Lowe/ x0 = LO(x); 183*5d9d9091SRichard Lowe/ 184*5d9d9091SRichard Lowe/ /* if x >= y then q = 1 (note x1 >= y1) */ 185*5d9d9091SRichard Lowe/ if (x1 > y1 || x0 >= y0) { 186*5d9d9091SRichard Lowe/ q0 = 1; 187*5d9d9091SRichard Lowe/ /* subtract y from x to get remainder */ 188*5d9d9091SRichard Lowe/ A_SUB2(y0, y1, x0, x1); 189*5d9d9091SRichard Lowe/ } else { 190*5d9d9091SRichard Lowe/ q0 = 0; 191*5d9d9091SRichard Lowe/ } 192*5d9d9091SRichard Lowe/ 193*5d9d9091SRichard Lowe/ /* return remainder */ 194*5d9d9091SRichard Lowe/ *pmod = HILO(x1, x0); 195*5d9d9091SRichard Lowe/ 196*5d9d9091SRichard Lowe/ /* return result */ 197*5d9d9091SRichard Lowe/ return (q0); 198*5d9d9091SRichard Lowe/ 199*5d9d9091SRichard Lowe/ } else { 200*5d9d9091SRichard Lowe/ /* 201*5d9d9091SRichard Lowe/ * the last case: result is one uint32_t, but we need to 202*5d9d9091SRichard Lowe/ * normalize 203*5d9d9091SRichard Lowe/ */ 204*5d9d9091SRichard Lowe/ uint64_t dt; 205*5d9d9091SRichard Lowe/ uint32_t t0, t1, x2; 206*5d9d9091SRichard Lowe/ 207*5d9d9091SRichard Lowe/ /* normalize y */ 208*5d9d9091SRichard Lowe/ dt = (y << normshift); 209*5d9d9091SRichard Lowe/ y1 = HI(dt); 210*5d9d9091SRichard Lowe/ y0 = LO(dt); 211*5d9d9091SRichard Lowe/ 212*5d9d9091SRichard Lowe/ /* normalize x (we need 3 uint32_ts!!!) */ 213*5d9d9091SRichard Lowe/ x2 = (HI(x) >> (32 - normshift)); 214*5d9d9091SRichard Lowe/ dt = (x << normshift); 215*5d9d9091SRichard Lowe/ x1 = HI(dt); 216*5d9d9091SRichard Lowe/ x0 = LO(dt); 217*5d9d9091SRichard Lowe/ 218*5d9d9091SRichard Lowe/ /* estimate q0, and reduce x to a two uint32_t value */ 219*5d9d9091SRichard Lowe/ A_DIV32(x1, x2, y1, q0, x1); 220*5d9d9091SRichard Lowe/ 221*5d9d9091SRichard Lowe/ /* adjust q0 down if too high */ 222*5d9d9091SRichard Lowe/ /* 223*5d9d9091SRichard Lowe/ * because of the limited range of x2 we can only be 224*5d9d9091SRichard Lowe/ * one off 225*5d9d9091SRichard Lowe/ */ 226*5d9d9091SRichard Lowe/ A_MUL32(y0, q0, t0, t1); 227*5d9d9091SRichard Lowe/ if (t1 > x1 || (t1 == x1 && t0 > x0)) { 228*5d9d9091SRichard Lowe/ q0--; 229*5d9d9091SRichard Lowe/ A_SUB2(y0, y1, t0, t1); 230*5d9d9091SRichard Lowe/ } 231*5d9d9091SRichard Lowe/ /* return remainder */ 232*5d9d9091SRichard Lowe/ /* subtract product from x to get remainder */ 233*5d9d9091SRichard Lowe/ A_SUB2(t0, t1, x0, x1); 234*5d9d9091SRichard Lowe/ *pmod = (HILO(x1, x0) >> normshift); 235*5d9d9091SRichard Lowe/ 236*5d9d9091SRichard Lowe/ /* return result */ 237*5d9d9091SRichard Lowe/ return (q0); 238*5d9d9091SRichard Lowe/ } 239*5d9d9091SRichard Lowe/ } 240*5d9d9091SRichard Lowe/ } 241*5d9d9091SRichard Lowe ENTRY(UDivRem) 242*5d9d9091SRichard Lowe pushl %ebp 243*5d9d9091SRichard Lowe pushl %edi 244*5d9d9091SRichard Lowe pushl %esi 245*5d9d9091SRichard Lowe subl $48, %esp 246*5d9d9091SRichard Lowe movl 68(%esp), %edi / y, 247*5d9d9091SRichard Lowe testl %edi, %edi / tmp63 248*5d9d9091SRichard Lowe movl %eax, 40(%esp) / x, x 249*5d9d9091SRichard Lowe movl %edx, 44(%esp) / x, x 250*5d9d9091SRichard Lowe movl %edi, %esi /, tmp62 251*5d9d9091SRichard Lowe movl %edi, %ecx / tmp62, tmp63 252*5d9d9091SRichard Lowe jne .LL2 253*5d9d9091SRichard Lowe movl %edx, %eax /, tmp68 254*5d9d9091SRichard Lowe cmpl 64(%esp), %eax / y, tmp68 255*5d9d9091SRichard Lowe jae .LL21 256*5d9d9091SRichard Lowe.LL4: 257*5d9d9091SRichard Lowe movl 72(%esp), %ebp / pmod, 258*5d9d9091SRichard Lowe xorl %esi, %esi / <result> 259*5d9d9091SRichard Lowe movl 40(%esp), %eax / x, q0 260*5d9d9091SRichard Lowe movl %ecx, %edi / <result>, <result> 261*5d9d9091SRichard Lowe divl 64(%esp) / y 262*5d9d9091SRichard Lowe movl %edx, (%ebp) / div_rem, 263*5d9d9091SRichard Lowe xorl %edx, %edx / q0 264*5d9d9091SRichard Lowe addl %eax, %esi / q0, <result> 265*5d9d9091SRichard Lowe movl $0, 4(%ebp) 266*5d9d9091SRichard Lowe adcl %edx, %edi / q0, <result> 267*5d9d9091SRichard Lowe addl $48, %esp 268*5d9d9091SRichard Lowe movl %esi, %eax / <result>, <result> 269*5d9d9091SRichard Lowe popl %esi 270*5d9d9091SRichard Lowe movl %edi, %edx / <result>, <result> 271*5d9d9091SRichard Lowe popl %edi 272*5d9d9091SRichard Lowe popl %ebp 273*5d9d9091SRichard Lowe ret 274*5d9d9091SRichard Lowe .align 16 275*5d9d9091SRichard Lowe.LL2: 276*5d9d9091SRichard Lowe movl 44(%esp), %eax / x, 277*5d9d9091SRichard Lowe xorl %edx, %edx 278*5d9d9091SRichard Lowe cmpl %esi, %eax / tmp62, tmp5 279*5d9d9091SRichard Lowe movl %eax, 32(%esp) / tmp5, 280*5d9d9091SRichard Lowe movl %edx, 36(%esp) 281*5d9d9091SRichard Lowe jae .LL6 282*5d9d9091SRichard Lowe movl 72(%esp), %esi / pmod, 283*5d9d9091SRichard Lowe movl 40(%esp), %ebp / x, 284*5d9d9091SRichard Lowe movl 44(%esp), %ecx / x, 285*5d9d9091SRichard Lowe movl %ebp, (%esi) 286*5d9d9091SRichard Lowe movl %ecx, 4(%esi) 287*5d9d9091SRichard Lowe xorl %edi, %edi / <result> 288*5d9d9091SRichard Lowe xorl %esi, %esi / <result> 289*5d9d9091SRichard Lowe.LL22: 290*5d9d9091SRichard Lowe addl $48, %esp 291*5d9d9091SRichard Lowe movl %esi, %eax / <result>, <result> 292*5d9d9091SRichard Lowe popl %esi 293*5d9d9091SRichard Lowe movl %edi, %edx / <result>, <result> 294*5d9d9091SRichard Lowe popl %edi 295*5d9d9091SRichard Lowe popl %ebp 296*5d9d9091SRichard Lowe ret 297*5d9d9091SRichard Lowe .align 16 298*5d9d9091SRichard Lowe.LL21: 299*5d9d9091SRichard Lowe movl %edi, %edx / tmp63, div_hi 300*5d9d9091SRichard Lowe divl 64(%esp) / y 301*5d9d9091SRichard Lowe movl %eax, %ecx /, q1 302*5d9d9091SRichard Lowe jmp .LL4 303*5d9d9091SRichard Lowe .align 16 304*5d9d9091SRichard Lowe.LL6: 305*5d9d9091SRichard Lowe movl $31, %edi /, tmp87 306*5d9d9091SRichard Lowe bsrl %esi,%edx / tmp62, normshift 307*5d9d9091SRichard Lowe subl %edx, %edi / normshift, tmp87 308*5d9d9091SRichard Lowe movl %edi, 28(%esp) / tmp87, 309*5d9d9091SRichard Lowe jne .LL8 310*5d9d9091SRichard Lowe movl 32(%esp), %edx /, x1 311*5d9d9091SRichard Lowe cmpl %ecx, %edx / y1, x1 312*5d9d9091SRichard Lowe movl 64(%esp), %edi / y, y0 313*5d9d9091SRichard Lowe movl 40(%esp), %esi / x, x0 314*5d9d9091SRichard Lowe ja .LL10 315*5d9d9091SRichard Lowe xorl %ebp, %ebp / q0 316*5d9d9091SRichard Lowe cmpl %edi, %esi / y0, x0 317*5d9d9091SRichard Lowe jb .LL11 318*5d9d9091SRichard Lowe.LL10: 319*5d9d9091SRichard Lowe movl $1, %ebp /, q0 320*5d9d9091SRichard Lowe subl %edi,%esi / y0, x0 321*5d9d9091SRichard Lowe sbbl %ecx,%edx / tmp63, x1 322*5d9d9091SRichard Lowe.LL11: 323*5d9d9091SRichard Lowe movl %edx, %ecx / x1, x1 324*5d9d9091SRichard Lowe xorl %edx, %edx / x1 325*5d9d9091SRichard Lowe xorl %edi, %edi / x0 326*5d9d9091SRichard Lowe addl %esi, %edx / x0, x1 327*5d9d9091SRichard Lowe adcl %edi, %ecx / x0, x1 328*5d9d9091SRichard Lowe movl 72(%esp), %esi / pmod, 329*5d9d9091SRichard Lowe movl %edx, (%esi) / x1, 330*5d9d9091SRichard Lowe movl %ecx, 4(%esi) / x1, 331*5d9d9091SRichard Lowe xorl %edi, %edi / <result> 332*5d9d9091SRichard Lowe movl %ebp, %esi / q0, <result> 333*5d9d9091SRichard Lowe jmp .LL22 334*5d9d9091SRichard Lowe .align 16 335*5d9d9091SRichard Lowe.LL8: 336*5d9d9091SRichard Lowe movb 28(%esp), %cl 337*5d9d9091SRichard Lowe movl 64(%esp), %esi / y, dt 338*5d9d9091SRichard Lowe movl 68(%esp), %edi / y, dt 339*5d9d9091SRichard Lowe shldl %esi, %edi /, dt, dt 340*5d9d9091SRichard Lowe sall %cl, %esi /, dt 341*5d9d9091SRichard Lowe andl $32, %ecx 342*5d9d9091SRichard Lowe jne .LL23 343*5d9d9091SRichard Lowe.LL17: 344*5d9d9091SRichard Lowe movl $32, %ecx /, tmp102 345*5d9d9091SRichard Lowe subl 28(%esp), %ecx /, tmp102 346*5d9d9091SRichard Lowe movl %esi, %ebp / dt, y0 347*5d9d9091SRichard Lowe movl 32(%esp), %esi 348*5d9d9091SRichard Lowe shrl %cl, %esi / tmp102, 349*5d9d9091SRichard Lowe movl %edi, 24(%esp) / tmp99, 350*5d9d9091SRichard Lowe movb 28(%esp), %cl 351*5d9d9091SRichard Lowe movl %esi, 12(%esp) /, x2 352*5d9d9091SRichard Lowe movl 44(%esp), %edi / x, dt 353*5d9d9091SRichard Lowe movl 40(%esp), %esi / x, dt 354*5d9d9091SRichard Lowe shldl %esi, %edi /, dt, dt 355*5d9d9091SRichard Lowe sall %cl, %esi /, dt 356*5d9d9091SRichard Lowe andl $32, %ecx 357*5d9d9091SRichard Lowe je .LL18 358*5d9d9091SRichard Lowe movl %esi, %edi / dt, dt 359*5d9d9091SRichard Lowe xorl %esi, %esi / dt 360*5d9d9091SRichard Lowe.LL18: 361*5d9d9091SRichard Lowe movl %edi, %ecx / dt, 362*5d9d9091SRichard Lowe movl %edi, %eax / tmp2, 363*5d9d9091SRichard Lowe movl %ecx, (%esp) 364*5d9d9091SRichard Lowe movl 12(%esp), %edx / x2, 365*5d9d9091SRichard Lowe divl 24(%esp) 366*5d9d9091SRichard Lowe movl %edx, %ecx /, x1 367*5d9d9091SRichard Lowe xorl %edi, %edi 368*5d9d9091SRichard Lowe movl %eax, 20(%esp) 369*5d9d9091SRichard Lowe movl %ebp, %eax / y0, t0 370*5d9d9091SRichard Lowe mull 20(%esp) 371*5d9d9091SRichard Lowe cmpl %ecx, %edx / x1, t1 372*5d9d9091SRichard Lowe movl %edi, 4(%esp) 373*5d9d9091SRichard Lowe ja .LL14 374*5d9d9091SRichard Lowe je .LL24 375*5d9d9091SRichard Lowe.LL15: 376*5d9d9091SRichard Lowe movl %ecx, %edi / x1, 377*5d9d9091SRichard Lowe subl %eax,%esi / t0, x0 378*5d9d9091SRichard Lowe sbbl %edx,%edi / t1, 379*5d9d9091SRichard Lowe movl %edi, %eax /, x1 380*5d9d9091SRichard Lowe movl %eax, %edx / x1, x1 381*5d9d9091SRichard Lowe xorl %eax, %eax / x1 382*5d9d9091SRichard Lowe xorl %ebp, %ebp / x0 383*5d9d9091SRichard Lowe addl %esi, %eax / x0, x1 384*5d9d9091SRichard Lowe adcl %ebp, %edx / x0, x1 385*5d9d9091SRichard Lowe movb 28(%esp), %cl 386*5d9d9091SRichard Lowe shrdl %edx, %eax /, x1, x1 387*5d9d9091SRichard Lowe shrl %cl, %edx /, x1 388*5d9d9091SRichard Lowe andl $32, %ecx 389*5d9d9091SRichard Lowe je .LL16 390*5d9d9091SRichard Lowe movl %edx, %eax / x1, x1 391*5d9d9091SRichard Lowe xorl %edx, %edx / x1 392*5d9d9091SRichard Lowe.LL16: 393*5d9d9091SRichard Lowe movl 72(%esp), %ecx / pmod, 394*5d9d9091SRichard Lowe movl 20(%esp), %esi /, <result> 395*5d9d9091SRichard Lowe xorl %edi, %edi / <result> 396*5d9d9091SRichard Lowe movl %eax, (%ecx) / x1, 397*5d9d9091SRichard Lowe movl %edx, 4(%ecx) / x1, 398*5d9d9091SRichard Lowe jmp .LL22 399*5d9d9091SRichard Lowe .align 16 400*5d9d9091SRichard Lowe.LL24: 401*5d9d9091SRichard Lowe cmpl %esi, %eax / x0, t0 402*5d9d9091SRichard Lowe jbe .LL15 403*5d9d9091SRichard Lowe.LL14: 404*5d9d9091SRichard Lowe decl 20(%esp) 405*5d9d9091SRichard Lowe subl %ebp,%eax / y0, t0 406*5d9d9091SRichard Lowe sbbl 24(%esp),%edx /, t1 407*5d9d9091SRichard Lowe jmp .LL15 408*5d9d9091SRichard Lowe.LL23: 409*5d9d9091SRichard Lowe movl %esi, %edi / dt, dt 410*5d9d9091SRichard Lowe xorl %esi, %esi / dt 411*5d9d9091SRichard Lowe jmp .LL17 412*5d9d9091SRichard Lowe SET_SIZE(UDivRem) 413*5d9d9091SRichard Lowe 414*5d9d9091SRichard Lowe/* 415*5d9d9091SRichard Lowe * Unsigned division without remainder. 416*5d9d9091SRichard Lowe */ 417*5d9d9091SRichard Lowe/ uint64_t 418*5d9d9091SRichard Lowe/ UDiv(uint64_t x, uint64_t y) 419*5d9d9091SRichard Lowe/ { 420*5d9d9091SRichard Lowe/ if (HI(y) == 0) { 421*5d9d9091SRichard Lowe/ /* simple cases: y is a single uint32_t */ 422*5d9d9091SRichard Lowe/ uint32_t div_hi, div_rem; 423*5d9d9091SRichard Lowe/ uint32_t q0, q1; 424*5d9d9091SRichard Lowe/ 425*5d9d9091SRichard Lowe/ /* calculate q1 */ 426*5d9d9091SRichard Lowe/ if (HI(x) < LO(y)) { 427*5d9d9091SRichard Lowe/ /* result is a single uint32_t, use one division */ 428*5d9d9091SRichard Lowe/ q1 = 0; 429*5d9d9091SRichard Lowe/ div_hi = HI(x); 430*5d9d9091SRichard Lowe/ } else { 431*5d9d9091SRichard Lowe/ /* result is a double uint32_t, use two divisions */ 432*5d9d9091SRichard Lowe/ A_DIV32(HI(x), 0, LO(y), q1, div_hi); 433*5d9d9091SRichard Lowe/ } 434*5d9d9091SRichard Lowe/ 435*5d9d9091SRichard Lowe/ /* calculate q0 and remainder */ 436*5d9d9091SRichard Lowe/ A_DIV32(LO(x), div_hi, LO(y), q0, div_rem); 437*5d9d9091SRichard Lowe/ 438*5d9d9091SRichard Lowe/ /* return result */ 439*5d9d9091SRichard Lowe/ return (HILO(q1, q0)); 440*5d9d9091SRichard Lowe/ 441*5d9d9091SRichard Lowe/ } else if (HI(x) < HI(y)) { 442*5d9d9091SRichard Lowe/ /* HI(x) < HI(y) => x < y => result is 0 */ 443*5d9d9091SRichard Lowe/ 444*5d9d9091SRichard Lowe/ /* return result */ 445*5d9d9091SRichard Lowe/ return (0); 446*5d9d9091SRichard Lowe/ 447*5d9d9091SRichard Lowe/ } else { 448*5d9d9091SRichard Lowe/ /* 449*5d9d9091SRichard Lowe/ * uint64_t by uint64_t division, resulting in a one-uint32_t 450*5d9d9091SRichard Lowe/ * result 451*5d9d9091SRichard Lowe/ */ 452*5d9d9091SRichard Lowe/ uint32_t y0, y1; 453*5d9d9091SRichard Lowe/ uint32_t x1, x0; 454*5d9d9091SRichard Lowe/ uint32_t q0; 455*5d9d9091SRichard Lowe/ unsigned normshift; 456*5d9d9091SRichard Lowe/ 457*5d9d9091SRichard Lowe/ /* normalize by shifting x and y so MSB(y) == 1 */ 458*5d9d9091SRichard Lowe/ HIBIT(HI(y), normshift); /* index of highest 1 bit */ 459*5d9d9091SRichard Lowe/ normshift = 31 - normshift; 460*5d9d9091SRichard Lowe/ 461*5d9d9091SRichard Lowe/ if (normshift == 0) { 462*5d9d9091SRichard Lowe/ /* no shifting needed, and x < 2*y so q <= 1 */ 463*5d9d9091SRichard Lowe/ y1 = HI(y); 464*5d9d9091SRichard Lowe/ y0 = LO(y); 465*5d9d9091SRichard Lowe/ x1 = HI(x); 466*5d9d9091SRichard Lowe/ x0 = LO(x); 467*5d9d9091SRichard Lowe/ 468*5d9d9091SRichard Lowe/ /* if x >= y then q = 1 (note x1 >= y1) */ 469*5d9d9091SRichard Lowe/ if (x1 > y1 || x0 >= y0) { 470*5d9d9091SRichard Lowe/ q0 = 1; 471*5d9d9091SRichard Lowe/ /* subtract y from x to get remainder */ 472*5d9d9091SRichard Lowe/ /* A_SUB2(y0, y1, x0, x1); */ 473*5d9d9091SRichard Lowe/ } else { 474*5d9d9091SRichard Lowe/ q0 = 0; 475*5d9d9091SRichard Lowe/ } 476*5d9d9091SRichard Lowe/ 477*5d9d9091SRichard Lowe/ /* return result */ 478*5d9d9091SRichard Lowe/ return (q0); 479*5d9d9091SRichard Lowe/ 480*5d9d9091SRichard Lowe/ } else { 481*5d9d9091SRichard Lowe/ /* 482*5d9d9091SRichard Lowe/ * the last case: result is one uint32_t, but we need to 483*5d9d9091SRichard Lowe/ * normalize 484*5d9d9091SRichard Lowe/ */ 485*5d9d9091SRichard Lowe/ uint64_t dt; 486*5d9d9091SRichard Lowe/ uint32_t t0, t1, x2; 487*5d9d9091SRichard Lowe/ 488*5d9d9091SRichard Lowe/ /* normalize y */ 489*5d9d9091SRichard Lowe/ dt = (y << normshift); 490*5d9d9091SRichard Lowe/ y1 = HI(dt); 491*5d9d9091SRichard Lowe/ y0 = LO(dt); 492*5d9d9091SRichard Lowe/ 493*5d9d9091SRichard Lowe/ /* normalize x (we need 3 uint32_ts!!!) */ 494*5d9d9091SRichard Lowe/ x2 = (HI(x) >> (32 - normshift)); 495*5d9d9091SRichard Lowe/ dt = (x << normshift); 496*5d9d9091SRichard Lowe/ x1 = HI(dt); 497*5d9d9091SRichard Lowe/ x0 = LO(dt); 498*5d9d9091SRichard Lowe/ 499*5d9d9091SRichard Lowe/ /* estimate q0, and reduce x to a two uint32_t value */ 500*5d9d9091SRichard Lowe/ A_DIV32(x1, x2, y1, q0, x1); 501*5d9d9091SRichard Lowe/ 502*5d9d9091SRichard Lowe/ /* adjust q0 down if too high */ 503*5d9d9091SRichard Lowe/ /* 504*5d9d9091SRichard Lowe/ * because of the limited range of x2 we can only be 505*5d9d9091SRichard Lowe/ * one off 506*5d9d9091SRichard Lowe/ */ 507*5d9d9091SRichard Lowe/ A_MUL32(y0, q0, t0, t1); 508*5d9d9091SRichard Lowe/ if (t1 > x1 || (t1 == x1 && t0 > x0)) { 509*5d9d9091SRichard Lowe/ q0--; 510*5d9d9091SRichard Lowe/ } 511*5d9d9091SRichard Lowe/ /* return result */ 512*5d9d9091SRichard Lowe/ return (q0); 513*5d9d9091SRichard Lowe/ } 514*5d9d9091SRichard Lowe/ } 515*5d9d9091SRichard Lowe/ } 516*5d9d9091SRichard Lowe ENTRY(UDiv) 517*5d9d9091SRichard Lowe pushl %ebp 518*5d9d9091SRichard Lowe pushl %edi 519*5d9d9091SRichard Lowe pushl %esi 520*5d9d9091SRichard Lowe subl $40, %esp 521*5d9d9091SRichard Lowe movl %edx, 36(%esp) / x, x 522*5d9d9091SRichard Lowe movl 60(%esp), %edx / y, 523*5d9d9091SRichard Lowe testl %edx, %edx / tmp62 524*5d9d9091SRichard Lowe movl %eax, 32(%esp) / x, x 525*5d9d9091SRichard Lowe movl %edx, %ecx / tmp61, tmp62 526*5d9d9091SRichard Lowe movl %edx, %eax /, tmp61 527*5d9d9091SRichard Lowe jne .LL26 528*5d9d9091SRichard Lowe movl 36(%esp), %esi / x, 529*5d9d9091SRichard Lowe cmpl 56(%esp), %esi / y, tmp67 530*5d9d9091SRichard Lowe movl %esi, %eax /, tmp67 531*5d9d9091SRichard Lowe movl %esi, %edx / tmp67, div_hi 532*5d9d9091SRichard Lowe jb .LL28 533*5d9d9091SRichard Lowe movl %ecx, %edx / tmp62, div_hi 534*5d9d9091SRichard Lowe divl 56(%esp) / y 535*5d9d9091SRichard Lowe movl %eax, %ecx /, q1 536*5d9d9091SRichard Lowe.LL28: 537*5d9d9091SRichard Lowe xorl %esi, %esi / <result> 538*5d9d9091SRichard Lowe movl %ecx, %edi / <result>, <result> 539*5d9d9091SRichard Lowe movl 32(%esp), %eax / x, q0 540*5d9d9091SRichard Lowe xorl %ecx, %ecx / q0 541*5d9d9091SRichard Lowe divl 56(%esp) / y 542*5d9d9091SRichard Lowe addl %eax, %esi / q0, <result> 543*5d9d9091SRichard Lowe adcl %ecx, %edi / q0, <result> 544*5d9d9091SRichard Lowe.LL25: 545*5d9d9091SRichard Lowe addl $40, %esp 546*5d9d9091SRichard Lowe movl %esi, %eax / <result>, <result> 547*5d9d9091SRichard Lowe popl %esi 548*5d9d9091SRichard Lowe movl %edi, %edx / <result>, <result> 549*5d9d9091SRichard Lowe popl %edi 550*5d9d9091SRichard Lowe popl %ebp 551*5d9d9091SRichard Lowe ret 552*5d9d9091SRichard Lowe .align 16 553*5d9d9091SRichard Lowe.LL26: 554*5d9d9091SRichard Lowe movl 36(%esp), %esi / x, 555*5d9d9091SRichard Lowe xorl %edi, %edi 556*5d9d9091SRichard Lowe movl %esi, 24(%esp) / tmp1, 557*5d9d9091SRichard Lowe movl %edi, 28(%esp) 558*5d9d9091SRichard Lowe xorl %esi, %esi / <result> 559*5d9d9091SRichard Lowe xorl %edi, %edi / <result> 560*5d9d9091SRichard Lowe cmpl %eax, 24(%esp) / tmp61, 561*5d9d9091SRichard Lowe jb .LL25 562*5d9d9091SRichard Lowe bsrl %eax,%ebp / tmp61, normshift 563*5d9d9091SRichard Lowe movl $31, %eax /, tmp85 564*5d9d9091SRichard Lowe subl %ebp, %eax / normshift, normshift 565*5d9d9091SRichard Lowe jne .LL32 566*5d9d9091SRichard Lowe movl 24(%esp), %eax /, x1 567*5d9d9091SRichard Lowe cmpl %ecx, %eax / tmp62, x1 568*5d9d9091SRichard Lowe movl 56(%esp), %esi / y, y0 569*5d9d9091SRichard Lowe movl 32(%esp), %edx / x, x0 570*5d9d9091SRichard Lowe ja .LL34 571*5d9d9091SRichard Lowe xorl %eax, %eax / q0 572*5d9d9091SRichard Lowe cmpl %esi, %edx / y0, x0 573*5d9d9091SRichard Lowe jb .LL35 574*5d9d9091SRichard Lowe.LL34: 575*5d9d9091SRichard Lowe movl $1, %eax /, q0 576*5d9d9091SRichard Lowe.LL35: 577*5d9d9091SRichard Lowe movl %eax, %esi / q0, <result> 578*5d9d9091SRichard Lowe xorl %edi, %edi / <result> 579*5d9d9091SRichard Lowe.LL45: 580*5d9d9091SRichard Lowe addl $40, %esp 581*5d9d9091SRichard Lowe movl %esi, %eax / <result>, <result> 582*5d9d9091SRichard Lowe popl %esi 583*5d9d9091SRichard Lowe movl %edi, %edx / <result>, <result> 584*5d9d9091SRichard Lowe popl %edi 585*5d9d9091SRichard Lowe popl %ebp 586*5d9d9091SRichard Lowe ret 587*5d9d9091SRichard Lowe .align 16 588*5d9d9091SRichard Lowe.LL32: 589*5d9d9091SRichard Lowe movb %al, %cl 590*5d9d9091SRichard Lowe movl 56(%esp), %esi / y, 591*5d9d9091SRichard Lowe movl 60(%esp), %edi / y, 592*5d9d9091SRichard Lowe shldl %esi, %edi 593*5d9d9091SRichard Lowe sall %cl, %esi 594*5d9d9091SRichard Lowe andl $32, %ecx 595*5d9d9091SRichard Lowe jne .LL43 596*5d9d9091SRichard Lowe.LL40: 597*5d9d9091SRichard Lowe movl $32, %ecx /, tmp96 598*5d9d9091SRichard Lowe subl %eax, %ecx / normshift, tmp96 599*5d9d9091SRichard Lowe movl %edi, %edx 600*5d9d9091SRichard Lowe movl %edi, 20(%esp) /, dt 601*5d9d9091SRichard Lowe movl 24(%esp), %ebp /, x2 602*5d9d9091SRichard Lowe xorl %edi, %edi 603*5d9d9091SRichard Lowe shrl %cl, %ebp / tmp96, x2 604*5d9d9091SRichard Lowe movl %esi, 16(%esp) /, dt 605*5d9d9091SRichard Lowe movb %al, %cl 606*5d9d9091SRichard Lowe movl 32(%esp), %esi / x, dt 607*5d9d9091SRichard Lowe movl %edi, 12(%esp) 608*5d9d9091SRichard Lowe movl 36(%esp), %edi / x, dt 609*5d9d9091SRichard Lowe shldl %esi, %edi /, dt, dt 610*5d9d9091SRichard Lowe sall %cl, %esi /, dt 611*5d9d9091SRichard Lowe andl $32, %ecx 612*5d9d9091SRichard Lowe movl %edx, 8(%esp) 613*5d9d9091SRichard Lowe je .LL41 614*5d9d9091SRichard Lowe movl %esi, %edi / dt, dt 615*5d9d9091SRichard Lowe xorl %esi, %esi / dt 616*5d9d9091SRichard Lowe.LL41: 617*5d9d9091SRichard Lowe xorl %ecx, %ecx 618*5d9d9091SRichard Lowe movl %edi, %eax / tmp1, 619*5d9d9091SRichard Lowe movl %ebp, %edx / x2, 620*5d9d9091SRichard Lowe divl 8(%esp) 621*5d9d9091SRichard Lowe movl %edx, %ebp /, x1 622*5d9d9091SRichard Lowe movl %ecx, 4(%esp) 623*5d9d9091SRichard Lowe movl %eax, %ecx /, q0 624*5d9d9091SRichard Lowe movl 16(%esp), %eax / dt, 625*5d9d9091SRichard Lowe mull %ecx / q0 626*5d9d9091SRichard Lowe cmpl %ebp, %edx / x1, t1 627*5d9d9091SRichard Lowe movl %edi, (%esp) 628*5d9d9091SRichard Lowe movl %esi, %edi / dt, x0 629*5d9d9091SRichard Lowe ja .LL38 630*5d9d9091SRichard Lowe je .LL44 631*5d9d9091SRichard Lowe.LL39: 632*5d9d9091SRichard Lowe movl %ecx, %esi / q0, <result> 633*5d9d9091SRichard Lowe.LL46: 634*5d9d9091SRichard Lowe xorl %edi, %edi / <result> 635*5d9d9091SRichard Lowe jmp .LL45 636*5d9d9091SRichard Lowe.LL44: 637*5d9d9091SRichard Lowe cmpl %edi, %eax / x0, t0 638*5d9d9091SRichard Lowe jbe .LL39 639*5d9d9091SRichard Lowe.LL38: 640*5d9d9091SRichard Lowe decl %ecx / q0 641*5d9d9091SRichard Lowe movl %ecx, %esi / q0, <result> 642*5d9d9091SRichard Lowe jmp .LL46 643*5d9d9091SRichard Lowe.LL43: 644*5d9d9091SRichard Lowe movl %esi, %edi 645*5d9d9091SRichard Lowe xorl %esi, %esi 646*5d9d9091SRichard Lowe jmp .LL40 647*5d9d9091SRichard Lowe SET_SIZE(UDiv) 648*5d9d9091SRichard Lowe 649*5d9d9091SRichard Lowe/* 650*5d9d9091SRichard Lowe * __udiv64 651*5d9d9091SRichard Lowe * 652*5d9d9091SRichard Lowe * Perform division of two unsigned 64-bit quantities, returning the 653*5d9d9091SRichard Lowe * quotient in %edx:%eax. __udiv64 pops the arguments on return, 654*5d9d9091SRichard Lowe */ 655*5d9d9091SRichard Lowe ENTRY(__udiv64) 656*5d9d9091SRichard Lowe movl 4(%esp), %eax / x, x 657*5d9d9091SRichard Lowe movl 8(%esp), %edx / x, x 658*5d9d9091SRichard Lowe pushl 16(%esp) / y 659*5d9d9091SRichard Lowe pushl 16(%esp) 660*5d9d9091SRichard Lowe call UDiv 661*5d9d9091SRichard Lowe addl $8, %esp 662*5d9d9091SRichard Lowe ret $16 663*5d9d9091SRichard Lowe SET_SIZE(__udiv64) 664*5d9d9091SRichard Lowe 665*5d9d9091SRichard Lowe/* 666*5d9d9091SRichard Lowe * __urem64 667*5d9d9091SRichard Lowe * 668*5d9d9091SRichard Lowe * Perform division of two unsigned 64-bit quantities, returning the 669*5d9d9091SRichard Lowe * remainder in %edx:%eax. __urem64 pops the arguments on return 670*5d9d9091SRichard Lowe */ 671*5d9d9091SRichard Lowe ENTRY(__urem64) 672*5d9d9091SRichard Lowe subl $12, %esp 673*5d9d9091SRichard Lowe movl %esp, %ecx /, tmp65 674*5d9d9091SRichard Lowe movl 16(%esp), %eax / x, x 675*5d9d9091SRichard Lowe movl 20(%esp), %edx / x, x 676*5d9d9091SRichard Lowe pushl %ecx / tmp65 677*5d9d9091SRichard Lowe pushl 32(%esp) / y 678*5d9d9091SRichard Lowe pushl 32(%esp) 679*5d9d9091SRichard Lowe call UDivRem 680*5d9d9091SRichard Lowe movl 12(%esp), %eax / rem, rem 681*5d9d9091SRichard Lowe movl 16(%esp), %edx / rem, rem 682*5d9d9091SRichard Lowe addl $24, %esp 683*5d9d9091SRichard Lowe ret $16 684*5d9d9091SRichard Lowe SET_SIZE(__urem64) 685*5d9d9091SRichard Lowe 686*5d9d9091SRichard Lowe/* 687*5d9d9091SRichard Lowe * __div64 688*5d9d9091SRichard Lowe * 689*5d9d9091SRichard Lowe * Perform division of two signed 64-bit quantities, returning the 690*5d9d9091SRichard Lowe * quotient in %edx:%eax. __div64 pops the arguments on return. 691*5d9d9091SRichard Lowe */ 692*5d9d9091SRichard Lowe/ int64_t 693*5d9d9091SRichard Lowe/ __div64(int64_t x, int64_t y) 694*5d9d9091SRichard Lowe/ { 695*5d9d9091SRichard Lowe/ int negative; 696*5d9d9091SRichard Lowe/ uint64_t xt, yt, r; 697*5d9d9091SRichard Lowe/ 698*5d9d9091SRichard Lowe/ if (x < 0) { 699*5d9d9091SRichard Lowe/ xt = -(uint64_t) x; 700*5d9d9091SRichard Lowe/ negative = 1; 701*5d9d9091SRichard Lowe/ } else { 702*5d9d9091SRichard Lowe/ xt = x; 703*5d9d9091SRichard Lowe/ negative = 0; 704*5d9d9091SRichard Lowe/ } 705*5d9d9091SRichard Lowe/ if (y < 0) { 706*5d9d9091SRichard Lowe/ yt = -(uint64_t) y; 707*5d9d9091SRichard Lowe/ negative ^= 1; 708*5d9d9091SRichard Lowe/ } else { 709*5d9d9091SRichard Lowe/ yt = y; 710*5d9d9091SRichard Lowe/ } 711*5d9d9091SRichard Lowe/ r = UDiv(xt, yt); 712*5d9d9091SRichard Lowe/ return (negative ? (int64_t) - r : r); 713*5d9d9091SRichard Lowe/ } 714*5d9d9091SRichard Lowe ENTRY(__div64) 715*5d9d9091SRichard Lowe pushl %ebp 716*5d9d9091SRichard Lowe pushl %edi 717*5d9d9091SRichard Lowe pushl %esi 718*5d9d9091SRichard Lowe subl $8, %esp 719*5d9d9091SRichard Lowe movl 28(%esp), %edx / x, x 720*5d9d9091SRichard Lowe testl %edx, %edx / x 721*5d9d9091SRichard Lowe movl 24(%esp), %eax / x, x 722*5d9d9091SRichard Lowe movl 32(%esp), %esi / y, y 723*5d9d9091SRichard Lowe movl 36(%esp), %edi / y, y 724*5d9d9091SRichard Lowe js .LL84 725*5d9d9091SRichard Lowe xorl %ebp, %ebp / negative 726*5d9d9091SRichard Lowe testl %edi, %edi / y 727*5d9d9091SRichard Lowe movl %eax, (%esp) / x, xt 728*5d9d9091SRichard Lowe movl %edx, 4(%esp) / x, xt 729*5d9d9091SRichard Lowe movl %esi, %eax / y, yt 730*5d9d9091SRichard Lowe movl %edi, %edx / y, yt 731*5d9d9091SRichard Lowe js .LL85 732*5d9d9091SRichard Lowe.LL82: 733*5d9d9091SRichard Lowe pushl %edx / yt 734*5d9d9091SRichard Lowe pushl %eax / yt 735*5d9d9091SRichard Lowe movl 8(%esp), %eax / xt, xt 736*5d9d9091SRichard Lowe movl 12(%esp), %edx / xt, xt 737*5d9d9091SRichard Lowe call UDiv 738*5d9d9091SRichard Lowe popl %ecx 739*5d9d9091SRichard Lowe testl %ebp, %ebp / negative 740*5d9d9091SRichard Lowe popl %esi 741*5d9d9091SRichard Lowe je .LL83 742*5d9d9091SRichard Lowe negl %eax / r 743*5d9d9091SRichard Lowe adcl $0, %edx /, r 744*5d9d9091SRichard Lowe negl %edx / r 745*5d9d9091SRichard Lowe.LL83: 746*5d9d9091SRichard Lowe addl $8, %esp 747*5d9d9091SRichard Lowe popl %esi 748*5d9d9091SRichard Lowe popl %edi 749*5d9d9091SRichard Lowe popl %ebp 750*5d9d9091SRichard Lowe ret $16 751*5d9d9091SRichard Lowe .align 16 752*5d9d9091SRichard Lowe.LL84: 753*5d9d9091SRichard Lowe negl %eax / x 754*5d9d9091SRichard Lowe adcl $0, %edx /, x 755*5d9d9091SRichard Lowe negl %edx / x 756*5d9d9091SRichard Lowe testl %edi, %edi / y 757*5d9d9091SRichard Lowe movl %eax, (%esp) / x, xt 758*5d9d9091SRichard Lowe movl %edx, 4(%esp) / x, xt 759*5d9d9091SRichard Lowe movl $1, %ebp /, negative 760*5d9d9091SRichard Lowe movl %esi, %eax / y, yt 761*5d9d9091SRichard Lowe movl %edi, %edx / y, yt 762*5d9d9091SRichard Lowe jns .LL82 763*5d9d9091SRichard Lowe .align 16 764*5d9d9091SRichard Lowe.LL85: 765*5d9d9091SRichard Lowe negl %eax / yt 766*5d9d9091SRichard Lowe adcl $0, %edx /, yt 767*5d9d9091SRichard Lowe negl %edx / yt 768*5d9d9091SRichard Lowe xorl $1, %ebp /, negative 769*5d9d9091SRichard Lowe jmp .LL82 770*5d9d9091SRichard Lowe SET_SIZE(__div64) 771*5d9d9091SRichard Lowe 772*5d9d9091SRichard Lowe/* 773*5d9d9091SRichard Lowe * __rem64 774*5d9d9091SRichard Lowe * 775*5d9d9091SRichard Lowe * Perform division of two signed 64-bit quantities, returning the 776*5d9d9091SRichard Lowe * remainder in %edx:%eax. __rem64 pops the arguments on return. 777*5d9d9091SRichard Lowe */ 778*5d9d9091SRichard Lowe/ int64_t 779*5d9d9091SRichard Lowe/ __rem64(int64_t x, int64_t y) 780*5d9d9091SRichard Lowe/ { 781*5d9d9091SRichard Lowe/ uint64_t xt, yt, rem; 782*5d9d9091SRichard Lowe/ 783*5d9d9091SRichard Lowe/ if (x < 0) { 784*5d9d9091SRichard Lowe/ xt = -(uint64_t) x; 785*5d9d9091SRichard Lowe/ } else { 786*5d9d9091SRichard Lowe/ xt = x; 787*5d9d9091SRichard Lowe/ } 788*5d9d9091SRichard Lowe/ if (y < 0) { 789*5d9d9091SRichard Lowe/ yt = -(uint64_t) y; 790*5d9d9091SRichard Lowe/ } else { 791*5d9d9091SRichard Lowe/ yt = y; 792*5d9d9091SRichard Lowe/ } 793*5d9d9091SRichard Lowe/ (void) UDivRem(xt, yt, &rem); 794*5d9d9091SRichard Lowe/ return (x < 0 ? (int64_t) - rem : rem); 795*5d9d9091SRichard Lowe/ } 796*5d9d9091SRichard Lowe ENTRY(__rem64) 797*5d9d9091SRichard Lowe pushl %edi 798*5d9d9091SRichard Lowe pushl %esi 799*5d9d9091SRichard Lowe subl $20, %esp 800*5d9d9091SRichard Lowe movl 36(%esp), %ecx / x, 801*5d9d9091SRichard Lowe movl 32(%esp), %esi / x, 802*5d9d9091SRichard Lowe movl 36(%esp), %edi / x, 803*5d9d9091SRichard Lowe testl %ecx, %ecx 804*5d9d9091SRichard Lowe movl 40(%esp), %eax / y, y 805*5d9d9091SRichard Lowe movl 44(%esp), %edx / y, y 806*5d9d9091SRichard Lowe movl %esi, (%esp) /, xt 807*5d9d9091SRichard Lowe movl %edi, 4(%esp) /, xt 808*5d9d9091SRichard Lowe js .LL92 809*5d9d9091SRichard Lowe testl %edx, %edx / y 810*5d9d9091SRichard Lowe movl %eax, %esi / y, yt 811*5d9d9091SRichard Lowe movl %edx, %edi / y, yt 812*5d9d9091SRichard Lowe js .LL93 813*5d9d9091SRichard Lowe.LL90: 814*5d9d9091SRichard Lowe leal 8(%esp), %eax /, tmp66 815*5d9d9091SRichard Lowe pushl %eax / tmp66 816*5d9d9091SRichard Lowe pushl %edi / yt 817*5d9d9091SRichard Lowe pushl %esi / yt 818*5d9d9091SRichard Lowe movl 12(%esp), %eax / xt, xt 819*5d9d9091SRichard Lowe movl 16(%esp), %edx / xt, xt 820*5d9d9091SRichard Lowe call UDivRem 821*5d9d9091SRichard Lowe addl $12, %esp 822*5d9d9091SRichard Lowe movl 36(%esp), %edi / x, 823*5d9d9091SRichard Lowe testl %edi, %edi 824*5d9d9091SRichard Lowe movl 8(%esp), %eax / rem, rem 825*5d9d9091SRichard Lowe movl 12(%esp), %edx / rem, rem 826*5d9d9091SRichard Lowe js .LL94 827*5d9d9091SRichard Lowe addl $20, %esp 828*5d9d9091SRichard Lowe popl %esi 829*5d9d9091SRichard Lowe popl %edi 830*5d9d9091SRichard Lowe ret $16 831*5d9d9091SRichard Lowe .align 16 832*5d9d9091SRichard Lowe.LL92: 833*5d9d9091SRichard Lowe negl %esi 834*5d9d9091SRichard Lowe adcl $0, %edi 835*5d9d9091SRichard Lowe negl %edi 836*5d9d9091SRichard Lowe testl %edx, %edx / y 837*5d9d9091SRichard Lowe movl %esi, (%esp) /, xt 838*5d9d9091SRichard Lowe movl %edi, 4(%esp) /, xt 839*5d9d9091SRichard Lowe movl %eax, %esi / y, yt 840*5d9d9091SRichard Lowe movl %edx, %edi / y, yt 841*5d9d9091SRichard Lowe jns .LL90 842*5d9d9091SRichard Lowe .align 16 843*5d9d9091SRichard Lowe.LL93: 844*5d9d9091SRichard Lowe negl %esi / yt 845*5d9d9091SRichard Lowe adcl $0, %edi /, yt 846*5d9d9091SRichard Lowe negl %edi / yt 847*5d9d9091SRichard Lowe jmp .LL90 848*5d9d9091SRichard Lowe .align 16 849*5d9d9091SRichard Lowe.LL94: 850*5d9d9091SRichard Lowe negl %eax / rem 851*5d9d9091SRichard Lowe adcl $0, %edx /, rem 852*5d9d9091SRichard Lowe addl $20, %esp 853*5d9d9091SRichard Lowe popl %esi 854*5d9d9091SRichard Lowe negl %edx / rem 855*5d9d9091SRichard Lowe popl %edi 856*5d9d9091SRichard Lowe ret $16 857*5d9d9091SRichard Lowe SET_SIZE(__rem64) 858*5d9d9091SRichard Lowe 859*5d9d9091SRichard Lowe#endif /* __lint */ 860*5d9d9091SRichard Lowe 861*5d9d9091SRichard Lowe#if defined(__lint) 862*5d9d9091SRichard Lowe 863*5d9d9091SRichard Lowe/* 864*5d9d9091SRichard Lowe * C support for 64-bit modulo and division. 865*5d9d9091SRichard Lowe * GNU routines callable from C (though generated by the compiler). 866*5d9d9091SRichard Lowe * Hand-customized compiler output - see comments for details. 867*5d9d9091SRichard Lowe */ 868*5d9d9091SRichard Lowe/*ARGSUSED*/ 869*5d9d9091SRichard Loweunsigned long long 870*5d9d9091SRichard Lowe__udivdi3(unsigned long long a, unsigned long long b) 871*5d9d9091SRichard Lowe{ return (0); } 872*5d9d9091SRichard Lowe 873*5d9d9091SRichard Lowe/*ARGSUSED*/ 874*5d9d9091SRichard Loweunsigned long long 875*5d9d9091SRichard Lowe__umoddi3(unsigned long long a, unsigned long long b) 876*5d9d9091SRichard Lowe{ return (0); } 877*5d9d9091SRichard Lowe 878*5d9d9091SRichard Lowe/*ARGSUSED*/ 879*5d9d9091SRichard Lowelong long 880*5d9d9091SRichard Lowe__divdi3(long long a, long long b) 881*5d9d9091SRichard Lowe{ return (0); } 882*5d9d9091SRichard Lowe 883*5d9d9091SRichard Lowe/*ARGSUSED*/ 884*5d9d9091SRichard Lowelong long 885*5d9d9091SRichard Lowe__moddi3(long long a, long long b) 886*5d9d9091SRichard Lowe{ return (0); } 887*5d9d9091SRichard Lowe 888*5d9d9091SRichard Lowe/* ARGSUSED */ 889*5d9d9091SRichard Loweint64_t __divrem64(int64_t a, int64_t b) 890*5d9d9091SRichard Lowe{ return (0); } 891*5d9d9091SRichard Lowe 892*5d9d9091SRichard Lowe/* ARGSUSED */ 893*5d9d9091SRichard Loweuint64_t __udivrem64(uint64_t a, uint64_t b) 894*5d9d9091SRichard Lowe{ return (0); } 895*5d9d9091SRichard Lowe 896*5d9d9091SRichard Lowe#else /* __lint */ 897*5d9d9091SRichard Lowe 898*5d9d9091SRichard Lowe/* 899*5d9d9091SRichard Lowe * int32_t/int64_t division/manipulation 900*5d9d9091SRichard Lowe * 901*5d9d9091SRichard Lowe * Hand-customized compiler output: the non-GCC entry points depart from 902*5d9d9091SRichard Lowe * the SYS V ABI by requiring their arguments to be popped, and in the 903*5d9d9091SRichard Lowe * [u]divrem64 cases returning the remainder in %ecx:%esi. Note the 904*5d9d9091SRichard Lowe * compiler-generated use of %edx:%eax for the first argument of 905*5d9d9091SRichard Lowe * internal entry points. 906*5d9d9091SRichard Lowe * 907*5d9d9091SRichard Lowe * Inlines for speed: 908*5d9d9091SRichard Lowe * - counting the number of leading zeros in a word 909*5d9d9091SRichard Lowe * - multiplying two 32-bit numbers giving a 64-bit result 910*5d9d9091SRichard Lowe * - dividing a 64-bit number by a 32-bit number, giving both quotient 911*5d9d9091SRichard Lowe * and remainder 912*5d9d9091SRichard Lowe * - subtracting two 64-bit results 913*5d9d9091SRichard Lowe */ 914*5d9d9091SRichard Lowe/ #define LO(X) ((uint32_t)(X) & 0xffffffff) 915*5d9d9091SRichard Lowe/ #define HI(X) ((uint32_t)((X) >> 32) & 0xffffffff) 916*5d9d9091SRichard Lowe/ #define HILO(H, L) (((uint64_t)(H) << 32) + (L)) 917*5d9d9091SRichard Lowe/ 918*5d9d9091SRichard Lowe/ /* give index of highest bit */ 919*5d9d9091SRichard Lowe/ #define HIBIT(a, r) \ 920*5d9d9091SRichard Lowe/ asm("bsrl %1,%0": "=r"((uint32_t)(r)) : "g" (a)) 921*5d9d9091SRichard Lowe/ 922*5d9d9091SRichard Lowe/ /* multiply two uint32_ts resulting in a uint64_t */ 923*5d9d9091SRichard Lowe/ #define A_MUL32(a, b, lo, hi) \ 924*5d9d9091SRichard Lowe/ asm("mull %2" \ 925*5d9d9091SRichard Lowe/ : "=a"((uint32_t)(lo)), "=d"((uint32_t)(hi)) : "g" (b), "0"(a)) 926*5d9d9091SRichard Lowe/ 927*5d9d9091SRichard Lowe/ /* divide a uint64_t by a uint32_t */ 928*5d9d9091SRichard Lowe/ #define A_DIV32(lo, hi, b, q, r) \ 929*5d9d9091SRichard Lowe/ asm("divl %2" \ 930*5d9d9091SRichard Lowe/ : "=a"((uint32_t)(q)), "=d"((uint32_t)(r)) \ 931*5d9d9091SRichard Lowe/ : "g" (b), "0"((uint32_t)(lo)), "1"((uint32_t)hi)) 932*5d9d9091SRichard Lowe/ 933*5d9d9091SRichard Lowe/ /* subtract two uint64_ts (with borrow) */ 934*5d9d9091SRichard Lowe/ #define A_SUB2(bl, bh, al, ah) \ 935*5d9d9091SRichard Lowe/ asm("subl %4,%0\n\tsbbl %5,%1" \ 936*5d9d9091SRichard Lowe/ : "=&r"((uint32_t)(al)), "=r"((uint32_t)(ah)) \ 937*5d9d9091SRichard Lowe/ : "0"((uint32_t)(al)), "1"((uint32_t)(ah)), "g"((uint32_t)(bl)), \ 938*5d9d9091SRichard Lowe/ "g"((uint32_t)(bh))) 939*5d9d9091SRichard Lowe 940*5d9d9091SRichard Lowe/* 941*5d9d9091SRichard Lowe * __udivdi3 942*5d9d9091SRichard Lowe * 943*5d9d9091SRichard Lowe * Perform division of two unsigned 64-bit quantities, returning the 944*5d9d9091SRichard Lowe * quotient in %edx:%eax. 945*5d9d9091SRichard Lowe */ 946*5d9d9091SRichard Lowe ENTRY(__udivdi3) 947*5d9d9091SRichard Lowe movl 4(%esp), %eax / x, x 948*5d9d9091SRichard Lowe movl 8(%esp), %edx / x, x 949*5d9d9091SRichard Lowe pushl 16(%esp) / y 950*5d9d9091SRichard Lowe pushl 16(%esp) 951*5d9d9091SRichard Lowe call UDiv 952*5d9d9091SRichard Lowe addl $8, %esp 953*5d9d9091SRichard Lowe ret 954*5d9d9091SRichard Lowe SET_SIZE(__udivdi3) 955*5d9d9091SRichard Lowe 956*5d9d9091SRichard Lowe/* 957*5d9d9091SRichard Lowe * __umoddi3 958*5d9d9091SRichard Lowe * 959*5d9d9091SRichard Lowe * Perform division of two unsigned 64-bit quantities, returning the 960*5d9d9091SRichard Lowe * remainder in %edx:%eax. 961*5d9d9091SRichard Lowe */ 962*5d9d9091SRichard Lowe ENTRY(__umoddi3) 963*5d9d9091SRichard Lowe subl $12, %esp 964*5d9d9091SRichard Lowe movl %esp, %ecx /, tmp65 965*5d9d9091SRichard Lowe movl 16(%esp), %eax / x, x 966*5d9d9091SRichard Lowe movl 20(%esp), %edx / x, x 967*5d9d9091SRichard Lowe pushl %ecx / tmp65 968*5d9d9091SRichard Lowe pushl 32(%esp) / y 969*5d9d9091SRichard Lowe pushl 32(%esp) 970*5d9d9091SRichard Lowe call UDivRem 971*5d9d9091SRichard Lowe movl 12(%esp), %eax / rem, rem 972*5d9d9091SRichard Lowe movl 16(%esp), %edx / rem, rem 973*5d9d9091SRichard Lowe addl $24, %esp 974*5d9d9091SRichard Lowe ret 975*5d9d9091SRichard Lowe SET_SIZE(__umoddi3) 976*5d9d9091SRichard Lowe 977*5d9d9091SRichard Lowe/* 978*5d9d9091SRichard Lowe * __divdi3 979*5d9d9091SRichard Lowe * 980*5d9d9091SRichard Lowe * Perform division of two signed 64-bit quantities, returning the 981*5d9d9091SRichard Lowe * quotient in %edx:%eax. 982*5d9d9091SRichard Lowe */ 983*5d9d9091SRichard Lowe/ int64_t 984*5d9d9091SRichard Lowe/ __divdi3(int64_t x, int64_t y) 985*5d9d9091SRichard Lowe/ { 986*5d9d9091SRichard Lowe/ int negative; 987*5d9d9091SRichard Lowe/ uint64_t xt, yt, r; 988*5d9d9091SRichard Lowe/ 989*5d9d9091SRichard Lowe/ if (x < 0) { 990*5d9d9091SRichard Lowe/ xt = -(uint64_t) x; 991*5d9d9091SRichard Lowe/ negative = 1; 992*5d9d9091SRichard Lowe/ } else { 993*5d9d9091SRichard Lowe/ xt = x; 994*5d9d9091SRichard Lowe/ negative = 0; 995*5d9d9091SRichard Lowe/ } 996*5d9d9091SRichard Lowe/ if (y < 0) { 997*5d9d9091SRichard Lowe/ yt = -(uint64_t) y; 998*5d9d9091SRichard Lowe/ negative ^= 1; 999*5d9d9091SRichard Lowe/ } else { 1000*5d9d9091SRichard Lowe/ yt = y; 1001*5d9d9091SRichard Lowe/ } 1002*5d9d9091SRichard Lowe/ r = UDiv(xt, yt); 1003*5d9d9091SRichard Lowe/ return (negative ? (int64_t) - r : r); 1004*5d9d9091SRichard Lowe/ } 1005*5d9d9091SRichard Lowe ENTRY(__divdi3) 1006*5d9d9091SRichard Lowe pushl %ebp 1007*5d9d9091SRichard Lowe pushl %edi 1008*5d9d9091SRichard Lowe pushl %esi 1009*5d9d9091SRichard Lowe subl $8, %esp 1010*5d9d9091SRichard Lowe movl 28(%esp), %edx / x, x 1011*5d9d9091SRichard Lowe testl %edx, %edx / x 1012*5d9d9091SRichard Lowe movl 24(%esp), %eax / x, x 1013*5d9d9091SRichard Lowe movl 32(%esp), %esi / y, y 1014*5d9d9091SRichard Lowe movl 36(%esp), %edi / y, y 1015*5d9d9091SRichard Lowe js .LL55 1016*5d9d9091SRichard Lowe xorl %ebp, %ebp / negative 1017*5d9d9091SRichard Lowe testl %edi, %edi / y 1018*5d9d9091SRichard Lowe movl %eax, (%esp) / x, xt 1019*5d9d9091SRichard Lowe movl %edx, 4(%esp) / x, xt 1020*5d9d9091SRichard Lowe movl %esi, %eax / y, yt 1021*5d9d9091SRichard Lowe movl %edi, %edx / y, yt 1022*5d9d9091SRichard Lowe js .LL56 1023*5d9d9091SRichard Lowe.LL53: 1024*5d9d9091SRichard Lowe pushl %edx / yt 1025*5d9d9091SRichard Lowe pushl %eax / yt 1026*5d9d9091SRichard Lowe movl 8(%esp), %eax / xt, xt 1027*5d9d9091SRichard Lowe movl 12(%esp), %edx / xt, xt 1028*5d9d9091SRichard Lowe call UDiv 1029*5d9d9091SRichard Lowe popl %ecx 1030*5d9d9091SRichard Lowe testl %ebp, %ebp / negative 1031*5d9d9091SRichard Lowe popl %esi 1032*5d9d9091SRichard Lowe je .LL54 1033*5d9d9091SRichard Lowe negl %eax / r 1034*5d9d9091SRichard Lowe adcl $0, %edx /, r 1035*5d9d9091SRichard Lowe negl %edx / r 1036*5d9d9091SRichard Lowe.LL54: 1037*5d9d9091SRichard Lowe addl $8, %esp 1038*5d9d9091SRichard Lowe popl %esi 1039*5d9d9091SRichard Lowe popl %edi 1040*5d9d9091SRichard Lowe popl %ebp 1041*5d9d9091SRichard Lowe ret 1042*5d9d9091SRichard Lowe .align 16 1043*5d9d9091SRichard Lowe.LL55: 1044*5d9d9091SRichard Lowe negl %eax / x 1045*5d9d9091SRichard Lowe adcl $0, %edx /, x 1046*5d9d9091SRichard Lowe negl %edx / x 1047*5d9d9091SRichard Lowe testl %edi, %edi / y 1048*5d9d9091SRichard Lowe movl %eax, (%esp) / x, xt 1049*5d9d9091SRichard Lowe movl %edx, 4(%esp) / x, xt 1050*5d9d9091SRichard Lowe movl $1, %ebp /, negative 1051*5d9d9091SRichard Lowe movl %esi, %eax / y, yt 1052*5d9d9091SRichard Lowe movl %edi, %edx / y, yt 1053*5d9d9091SRichard Lowe jns .LL53 1054*5d9d9091SRichard Lowe .align 16 1055*5d9d9091SRichard Lowe.LL56: 1056*5d9d9091SRichard Lowe negl %eax / yt 1057*5d9d9091SRichard Lowe adcl $0, %edx /, yt 1058*5d9d9091SRichard Lowe negl %edx / yt 1059*5d9d9091SRichard Lowe xorl $1, %ebp /, negative 1060*5d9d9091SRichard Lowe jmp .LL53 1061*5d9d9091SRichard Lowe SET_SIZE(__divdi3) 1062*5d9d9091SRichard Lowe 1063*5d9d9091SRichard Lowe/* 1064*5d9d9091SRichard Lowe * __moddi3 1065*5d9d9091SRichard Lowe * 1066*5d9d9091SRichard Lowe * Perform division of two signed 64-bit quantities, returning the 1067*5d9d9091SRichard Lowe * quotient in %edx:%eax. 1068*5d9d9091SRichard Lowe */ 1069*5d9d9091SRichard Lowe/ int64_t 1070*5d9d9091SRichard Lowe/ __moddi3(int64_t x, int64_t y) 1071*5d9d9091SRichard Lowe/ { 1072*5d9d9091SRichard Lowe/ uint64_t xt, yt, rem; 1073*5d9d9091SRichard Lowe/ 1074*5d9d9091SRichard Lowe/ if (x < 0) { 1075*5d9d9091SRichard Lowe/ xt = -(uint64_t) x; 1076*5d9d9091SRichard Lowe/ } else { 1077*5d9d9091SRichard Lowe/ xt = x; 1078*5d9d9091SRichard Lowe/ } 1079*5d9d9091SRichard Lowe/ if (y < 0) { 1080*5d9d9091SRichard Lowe/ yt = -(uint64_t) y; 1081*5d9d9091SRichard Lowe/ } else { 1082*5d9d9091SRichard Lowe/ yt = y; 1083*5d9d9091SRichard Lowe/ } 1084*5d9d9091SRichard Lowe/ (void) UDivRem(xt, yt, &rem); 1085*5d9d9091SRichard Lowe/ return (x < 0 ? (int64_t) - rem : rem); 1086*5d9d9091SRichard Lowe/ } 1087*5d9d9091SRichard Lowe ENTRY(__moddi3) 1088*5d9d9091SRichard Lowe pushl %edi 1089*5d9d9091SRichard Lowe pushl %esi 1090*5d9d9091SRichard Lowe subl $20, %esp 1091*5d9d9091SRichard Lowe movl 36(%esp), %ecx / x, 1092*5d9d9091SRichard Lowe movl 32(%esp), %esi / x, 1093*5d9d9091SRichard Lowe movl 36(%esp), %edi / x, 1094*5d9d9091SRichard Lowe testl %ecx, %ecx 1095*5d9d9091SRichard Lowe movl 40(%esp), %eax / y, y 1096*5d9d9091SRichard Lowe movl 44(%esp), %edx / y, y 1097*5d9d9091SRichard Lowe movl %esi, (%esp) /, xt 1098*5d9d9091SRichard Lowe movl %edi, 4(%esp) /, xt 1099*5d9d9091SRichard Lowe js .LL63 1100*5d9d9091SRichard Lowe testl %edx, %edx / y 1101*5d9d9091SRichard Lowe movl %eax, %esi / y, yt 1102*5d9d9091SRichard Lowe movl %edx, %edi / y, yt 1103*5d9d9091SRichard Lowe js .LL64 1104*5d9d9091SRichard Lowe.LL61: 1105*5d9d9091SRichard Lowe leal 8(%esp), %eax /, tmp66 1106*5d9d9091SRichard Lowe pushl %eax / tmp66 1107*5d9d9091SRichard Lowe pushl %edi / yt 1108*5d9d9091SRichard Lowe pushl %esi / yt 1109*5d9d9091SRichard Lowe movl 12(%esp), %eax / xt, xt 1110*5d9d9091SRichard Lowe movl 16(%esp), %edx / xt, xt 1111*5d9d9091SRichard Lowe call UDivRem 1112*5d9d9091SRichard Lowe addl $12, %esp 1113*5d9d9091SRichard Lowe movl 36(%esp), %edi / x, 1114*5d9d9091SRichard Lowe testl %edi, %edi 1115*5d9d9091SRichard Lowe movl 8(%esp), %eax / rem, rem 1116*5d9d9091SRichard Lowe movl 12(%esp), %edx / rem, rem 1117*5d9d9091SRichard Lowe js .LL65 1118*5d9d9091SRichard Lowe addl $20, %esp 1119*5d9d9091SRichard Lowe popl %esi 1120*5d9d9091SRichard Lowe popl %edi 1121*5d9d9091SRichard Lowe ret 1122*5d9d9091SRichard Lowe .align 16 1123*5d9d9091SRichard Lowe.LL63: 1124*5d9d9091SRichard Lowe negl %esi 1125*5d9d9091SRichard Lowe adcl $0, %edi 1126*5d9d9091SRichard Lowe negl %edi 1127*5d9d9091SRichard Lowe testl %edx, %edx / y 1128*5d9d9091SRichard Lowe movl %esi, (%esp) /, xt 1129*5d9d9091SRichard Lowe movl %edi, 4(%esp) /, xt 1130*5d9d9091SRichard Lowe movl %eax, %esi / y, yt 1131*5d9d9091SRichard Lowe movl %edx, %edi / y, yt 1132*5d9d9091SRichard Lowe jns .LL61 1133*5d9d9091SRichard Lowe .align 16 1134*5d9d9091SRichard Lowe.LL64: 1135*5d9d9091SRichard Lowe negl %esi / yt 1136*5d9d9091SRichard Lowe adcl $0, %edi /, yt 1137*5d9d9091SRichard Lowe negl %edi / yt 1138*5d9d9091SRichard Lowe jmp .LL61 1139*5d9d9091SRichard Lowe .align 16 1140*5d9d9091SRichard Lowe.LL65: 1141*5d9d9091SRichard Lowe negl %eax / rem 1142*5d9d9091SRichard Lowe adcl $0, %edx /, rem 1143*5d9d9091SRichard Lowe addl $20, %esp 1144*5d9d9091SRichard Lowe popl %esi 1145*5d9d9091SRichard Lowe negl %edx / rem 1146*5d9d9091SRichard Lowe popl %edi 1147*5d9d9091SRichard Lowe ret 1148*5d9d9091SRichard Lowe SET_SIZE(__moddi3) 1149*5d9d9091SRichard Lowe 1150*5d9d9091SRichard Lowe/* 1151*5d9d9091SRichard Lowe * __udivrem64 1152*5d9d9091SRichard Lowe * 1153*5d9d9091SRichard Lowe * Perform division of two unsigned 64-bit quantities, returning the 1154*5d9d9091SRichard Lowe * quotient in %edx:%eax, and the remainder in %ecx:%esi. __udivrem64 1155*5d9d9091SRichard Lowe * pops the arguments on return. 1156*5d9d9091SRichard Lowe */ 1157*5d9d9091SRichard Lowe ENTRY(__udivrem64) 1158*5d9d9091SRichard Lowe subl $12, %esp 1159*5d9d9091SRichard Lowe movl %esp, %ecx /, tmp64 1160*5d9d9091SRichard Lowe movl 16(%esp), %eax / x, x 1161*5d9d9091SRichard Lowe movl 20(%esp), %edx / x, x 1162*5d9d9091SRichard Lowe pushl %ecx / tmp64 1163*5d9d9091SRichard Lowe pushl 32(%esp) / y 1164*5d9d9091SRichard Lowe pushl 32(%esp) 1165*5d9d9091SRichard Lowe call UDivRem 1166*5d9d9091SRichard Lowe movl 16(%esp), %ecx / rem, tmp63 1167*5d9d9091SRichard Lowe movl 12(%esp), %esi / rem 1168*5d9d9091SRichard Lowe addl $24, %esp 1169*5d9d9091SRichard Lowe ret $16 1170*5d9d9091SRichard Lowe SET_SIZE(__udivrem64) 1171*5d9d9091SRichard Lowe 1172*5d9d9091SRichard Lowe/* 1173*5d9d9091SRichard Lowe * Signed division with remainder. 1174*5d9d9091SRichard Lowe */ 1175*5d9d9091SRichard Lowe/ int64_t 1176*5d9d9091SRichard Lowe/ SDivRem(int64_t x, int64_t y, int64_t * pmod) 1177*5d9d9091SRichard Lowe/ { 1178*5d9d9091SRichard Lowe/ int negative; 1179*5d9d9091SRichard Lowe/ uint64_t xt, yt, r, rem; 1180*5d9d9091SRichard Lowe/ 1181*5d9d9091SRichard Lowe/ if (x < 0) { 1182*5d9d9091SRichard Lowe/ xt = -(uint64_t) x; 1183*5d9d9091SRichard Lowe/ negative = 1; 1184*5d9d9091SRichard Lowe/ } else { 1185*5d9d9091SRichard Lowe/ xt = x; 1186*5d9d9091SRichard Lowe/ negative = 0; 1187*5d9d9091SRichard Lowe/ } 1188*5d9d9091SRichard Lowe/ if (y < 0) { 1189*5d9d9091SRichard Lowe/ yt = -(uint64_t) y; 1190*5d9d9091SRichard Lowe/ negative ^= 1; 1191*5d9d9091SRichard Lowe/ } else { 1192*5d9d9091SRichard Lowe/ yt = y; 1193*5d9d9091SRichard Lowe/ } 1194*5d9d9091SRichard Lowe/ r = UDivRem(xt, yt, &rem); 1195*5d9d9091SRichard Lowe/ *pmod = (x < 0 ? (int64_t) - rem : rem); 1196*5d9d9091SRichard Lowe/ return (negative ? (int64_t) - r : r); 1197*5d9d9091SRichard Lowe/ } 1198*5d9d9091SRichard Lowe ENTRY(SDivRem) 1199*5d9d9091SRichard Lowe pushl %ebp 1200*5d9d9091SRichard Lowe pushl %edi 1201*5d9d9091SRichard Lowe pushl %esi 1202*5d9d9091SRichard Lowe subl $24, %esp 1203*5d9d9091SRichard Lowe testl %edx, %edx / x 1204*5d9d9091SRichard Lowe movl %edx, %edi / x, x 1205*5d9d9091SRichard Lowe js .LL73 1206*5d9d9091SRichard Lowe movl 44(%esp), %esi / y, 1207*5d9d9091SRichard Lowe xorl %ebp, %ebp / negative 1208*5d9d9091SRichard Lowe testl %esi, %esi 1209*5d9d9091SRichard Lowe movl %edx, 12(%esp) / x, xt 1210*5d9d9091SRichard Lowe movl %eax, 8(%esp) / x, xt 1211*5d9d9091SRichard Lowe movl 40(%esp), %edx / y, yt 1212*5d9d9091SRichard Lowe movl 44(%esp), %ecx / y, yt 1213*5d9d9091SRichard Lowe js .LL74 1214*5d9d9091SRichard Lowe.LL70: 1215*5d9d9091SRichard Lowe leal 16(%esp), %eax /, tmp70 1216*5d9d9091SRichard Lowe pushl %eax / tmp70 1217*5d9d9091SRichard Lowe pushl %ecx / yt 1218*5d9d9091SRichard Lowe pushl %edx / yt 1219*5d9d9091SRichard Lowe movl 20(%esp), %eax / xt, xt 1220*5d9d9091SRichard Lowe movl 24(%esp), %edx / xt, xt 1221*5d9d9091SRichard Lowe call UDivRem 1222*5d9d9091SRichard Lowe movl %edx, 16(%esp) /, r 1223*5d9d9091SRichard Lowe movl %eax, 12(%esp) /, r 1224*5d9d9091SRichard Lowe addl $12, %esp 1225*5d9d9091SRichard Lowe testl %edi, %edi / x 1226*5d9d9091SRichard Lowe movl 16(%esp), %edx / rem, rem 1227*5d9d9091SRichard Lowe movl 20(%esp), %ecx / rem, rem 1228*5d9d9091SRichard Lowe js .LL75 1229*5d9d9091SRichard Lowe.LL71: 1230*5d9d9091SRichard Lowe movl 48(%esp), %edi / pmod, pmod 1231*5d9d9091SRichard Lowe testl %ebp, %ebp / negative 1232*5d9d9091SRichard Lowe movl %edx, (%edi) / rem,* pmod 1233*5d9d9091SRichard Lowe movl %ecx, 4(%edi) / rem, 1234*5d9d9091SRichard Lowe movl (%esp), %eax / r, r 1235*5d9d9091SRichard Lowe movl 4(%esp), %edx / r, r 1236*5d9d9091SRichard Lowe je .LL72 1237*5d9d9091SRichard Lowe negl %eax / r 1238*5d9d9091SRichard Lowe adcl $0, %edx /, r 1239*5d9d9091SRichard Lowe negl %edx / r 1240*5d9d9091SRichard Lowe.LL72: 1241*5d9d9091SRichard Lowe addl $24, %esp 1242*5d9d9091SRichard Lowe popl %esi 1243*5d9d9091SRichard Lowe popl %edi 1244*5d9d9091SRichard Lowe popl %ebp 1245*5d9d9091SRichard Lowe ret 1246*5d9d9091SRichard Lowe .align 16 1247*5d9d9091SRichard Lowe.LL73: 1248*5d9d9091SRichard Lowe negl %eax 1249*5d9d9091SRichard Lowe adcl $0, %edx 1250*5d9d9091SRichard Lowe movl 44(%esp), %esi / y, 1251*5d9d9091SRichard Lowe negl %edx 1252*5d9d9091SRichard Lowe testl %esi, %esi 1253*5d9d9091SRichard Lowe movl %edx, 12(%esp) /, xt 1254*5d9d9091SRichard Lowe movl %eax, 8(%esp) /, xt 1255*5d9d9091SRichard Lowe movl $1, %ebp /, negative 1256*5d9d9091SRichard Lowe movl 40(%esp), %edx / y, yt 1257*5d9d9091SRichard Lowe movl 44(%esp), %ecx / y, yt 1258*5d9d9091SRichard Lowe jns .LL70 1259*5d9d9091SRichard Lowe .align 16 1260*5d9d9091SRichard Lowe.LL74: 1261*5d9d9091SRichard Lowe negl %edx / yt 1262*5d9d9091SRichard Lowe adcl $0, %ecx /, yt 1263*5d9d9091SRichard Lowe negl %ecx / yt 1264*5d9d9091SRichard Lowe xorl $1, %ebp /, negative 1265*5d9d9091SRichard Lowe jmp .LL70 1266*5d9d9091SRichard Lowe .align 16 1267*5d9d9091SRichard Lowe.LL75: 1268*5d9d9091SRichard Lowe negl %edx / rem 1269*5d9d9091SRichard Lowe adcl $0, %ecx /, rem 1270*5d9d9091SRichard Lowe negl %ecx / rem 1271*5d9d9091SRichard Lowe jmp .LL71 1272*5d9d9091SRichard Lowe SET_SIZE(SDivRem) 1273*5d9d9091SRichard Lowe 1274*5d9d9091SRichard Lowe/* 1275*5d9d9091SRichard Lowe * __divrem64 1276*5d9d9091SRichard Lowe * 1277*5d9d9091SRichard Lowe * Perform division of two signed 64-bit quantities, returning the 1278*5d9d9091SRichard Lowe * quotient in %edx:%eax, and the remainder in %ecx:%esi. __divrem64 1279*5d9d9091SRichard Lowe * pops the arguments on return. 1280*5d9d9091SRichard Lowe */ 1281*5d9d9091SRichard Lowe ENTRY(__divrem64) 1282*5d9d9091SRichard Lowe subl $20, %esp 1283*5d9d9091SRichard Lowe movl %esp, %ecx /, tmp64 1284*5d9d9091SRichard Lowe movl 24(%esp), %eax / x, x 1285*5d9d9091SRichard Lowe movl 28(%esp), %edx / x, x 1286*5d9d9091SRichard Lowe pushl %ecx / tmp64 1287*5d9d9091SRichard Lowe pushl 40(%esp) / y 1288*5d9d9091SRichard Lowe pushl 40(%esp) 1289*5d9d9091SRichard Lowe call SDivRem 1290*5d9d9091SRichard Lowe movl 16(%esp), %ecx 1291*5d9d9091SRichard Lowe movl 12(%esp),%esi / rem 1292*5d9d9091SRichard Lowe addl $32, %esp 1293*5d9d9091SRichard Lowe ret $16 1294*5d9d9091SRichard Lowe SET_SIZE(__divrem64) 1295*5d9d9091SRichard Lowe 1296*5d9d9091SRichard Lowe 1297*5d9d9091SRichard Lowe#endif /* __lint */ 1298*5d9d9091SRichard Lowe 1299*5d9d9091SRichard Lowe#endif /* defined(__i386) && !defined(__amd64) */ 1300