1*89518a1cSdmick/* 2*89518a1cSdmick * CDDL HEADER START 3*89518a1cSdmick * 4*89518a1cSdmick * The contents of this file are subject to the terms of the 5*89518a1cSdmick * Common Development and Distribution License, Version 1.0 only 6*89518a1cSdmick * (the "License"). You may not use this file except in compliance 7*89518a1cSdmick * with the License. 8*89518a1cSdmick * 9*89518a1cSdmick * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10*89518a1cSdmick * or http://www.opensolaris.org/os/licensing. 11*89518a1cSdmick * See the License for the specific language governing permissions 12*89518a1cSdmick * and limitations under the License. 13*89518a1cSdmick * 14*89518a1cSdmick * When distributing Covered Code, include this CDDL HEADER in each 15*89518a1cSdmick * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16*89518a1cSdmick * If applicable, add the following below this CDDL HEADER, with the 17*89518a1cSdmick * fields enclosed by brackets "[]" replaced with your own identifying 18*89518a1cSdmick * information: Portions Copyright [yyyy] [name of copyright owner] 19*89518a1cSdmick * 20*89518a1cSdmick * CDDL HEADER END 21*89518a1cSdmick */ 22*89518a1cSdmick/* 23*89518a1cSdmick * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24*89518a1cSdmick * Use is subject to license terms. 25*89518a1cSdmick */ 26*89518a1cSdmick 27*89518a1cSdmick#if !defined(lint) 28*89518a1cSdmick .ident "%Z%%M% %I% %E% SMI" 29*89518a1cSdmick 30*89518a1cSdmick .file "muldiv.s" 31*89518a1cSdmick#endif 32*89518a1cSdmick 33*89518a1cSdmick#if defined(__i386) && !defined(__amd64) 34*89518a1cSdmick 35*89518a1cSdmick/* 36*89518a1cSdmick * Helper routines for 32-bit compilers to perform 64-bit math. 37*89518a1cSdmick * These are used both by the Sun and GCC compilers. 38*89518a1cSdmick */ 39*89518a1cSdmick 40*89518a1cSdmick#include <sys/asm_linkage.h> 41*89518a1cSdmick#include <sys/asm_misc.h> 42*89518a1cSdmick 43*89518a1cSdmick 44*89518a1cSdmick#if defined(__lint) 45*89518a1cSdmick#include <sys/types.h> 46*89518a1cSdmick 47*89518a1cSdmick/* ARGSUSED */ 48*89518a1cSdmickint64_t 49*89518a1cSdmick__mul64(int64_t a, int64_t b) 50*89518a1cSdmick{ 51*89518a1cSdmick return (0); 52*89518a1cSdmick} 53*89518a1cSdmick 54*89518a1cSdmick#else /* __lint */ 55*89518a1cSdmick 56*89518a1cSdmick/ 57*89518a1cSdmick/ function __mul64(A,B:Longint):Longint; 58*89518a1cSdmick/ {Overflow is not checked} 59*89518a1cSdmick/ 60*89518a1cSdmick/ We essentially do multiply by longhand, using base 2**32 digits. 61*89518a1cSdmick/ a b parameter A 62*89518a1cSdmick/ x c d parameter B 63*89518a1cSdmick/ --------- 64*89518a1cSdmick/ ad bd 65*89518a1cSdmick/ ac bc 66*89518a1cSdmick/ ----------------- 67*89518a1cSdmick/ ac ad+bc bd 68*89518a1cSdmick/ 69*89518a1cSdmick/ We can ignore ac and top 32 bits of ad+bc: if <> 0, overflow happened. 70*89518a1cSdmick/ 71*89518a1cSdmick ENTRY(__mul64) 72*89518a1cSdmick push %ebp 73*89518a1cSdmick mov %esp,%ebp 74*89518a1cSdmick pushl %esi 75*89518a1cSdmick mov 12(%ebp),%eax / A.hi (a) 76*89518a1cSdmick mull 16(%ebp) / Multiply A.hi by B.lo (produces ad) 77*89518a1cSdmick xchg %ecx,%eax / ecx = bottom half of ad. 78*89518a1cSdmick movl 8(%ebp),%eax / A.Lo (b) 79*89518a1cSdmick movl %eax,%esi / Save A.lo for later 80*89518a1cSdmick mull 16(%ebp) / Multiply A.Lo by B.LO (dx:ax = bd.) 81*89518a1cSdmick addl %edx,%ecx / cx is ad 82*89518a1cSdmick xchg %eax,%esi / esi is bd, eax = A.lo (d) 83*89518a1cSdmick mull 20(%ebp) / Multiply A.lo * B.hi (producing bc) 84*89518a1cSdmick addl %ecx,%eax / Produce ad+bc 85*89518a1cSdmick movl %esi,%edx 86*89518a1cSdmick xchg %eax,%edx 87*89518a1cSdmick popl %esi 88*89518a1cSdmick movl %ebp,%esp 89*89518a1cSdmick popl %ebp 90*89518a1cSdmick ret $16 91*89518a1cSdmick SET_SIZE(__mul64) 92*89518a1cSdmick 93*89518a1cSdmick#endif /* __lint */ 94*89518a1cSdmick 95*89518a1cSdmick/* 96*89518a1cSdmick * C support for 64-bit modulo and division. 97*89518a1cSdmick * Hand-customized compiler output - see comments for details. 98*89518a1cSdmick */ 99*89518a1cSdmick#if defined(__lint) 100*89518a1cSdmick 101*89518a1cSdmick/* ARGSUSED */ 102*89518a1cSdmickuint64_t 103*89518a1cSdmick__udiv64(uint64_t a, uint64_t b) 104*89518a1cSdmick{ return (0); } 105*89518a1cSdmick 106*89518a1cSdmick/* ARGSUSED */ 107*89518a1cSdmickuint64_t 108*89518a1cSdmick__urem64(int64_t a, int64_t b) 109*89518a1cSdmick{ return (0); } 110*89518a1cSdmick 111*89518a1cSdmick/* ARGSUSED */ 112*89518a1cSdmickint64_t 113*89518a1cSdmick__div64(int64_t a, int64_t b) 114*89518a1cSdmick{ return (0); } 115*89518a1cSdmick 116*89518a1cSdmick/* ARGSUSED */ 117*89518a1cSdmickint64_t 118*89518a1cSdmick__rem64(int64_t a, int64_t b) 119*89518a1cSdmick{ return (0); } 120*89518a1cSdmick 121*89518a1cSdmick#else /* __lint */ 122*89518a1cSdmick 123*89518a1cSdmick/ /* 124*89518a1cSdmick/ * Unsigned division with remainder. 125*89518a1cSdmick/ * Divide two uint64_ts, and calculate remainder. 126*89518a1cSdmick/ */ 127*89518a1cSdmick/ uint64_t 128*89518a1cSdmick/ UDivRem(uint64_t x, uint64_t y, uint64_t * pmod) 129*89518a1cSdmick/ { 130*89518a1cSdmick/ /* simple cases: y is a single uint32_t */ 131*89518a1cSdmick/ if (HI(y) == 0) { 132*89518a1cSdmick/ uint32_t div_hi, div_rem; 133*89518a1cSdmick/ uint32_t q0, q1; 134*89518a1cSdmick/ 135*89518a1cSdmick/ /* calculate q1 */ 136*89518a1cSdmick/ if (HI(x) < LO(y)) { 137*89518a1cSdmick/ /* result is a single uint32_t, use one division */ 138*89518a1cSdmick/ q1 = 0; 139*89518a1cSdmick/ div_hi = HI(x); 140*89518a1cSdmick/ } else { 141*89518a1cSdmick/ /* result is a double uint32_t, use two divisions */ 142*89518a1cSdmick/ A_DIV32(HI(x), 0, LO(y), q1, div_hi); 143*89518a1cSdmick/ } 144*89518a1cSdmick/ 145*89518a1cSdmick/ /* calculate q0 and remainder */ 146*89518a1cSdmick/ A_DIV32(LO(x), div_hi, LO(y), q0, div_rem); 147*89518a1cSdmick/ 148*89518a1cSdmick/ /* return remainder */ 149*89518a1cSdmick/ *pmod = div_rem; 150*89518a1cSdmick/ 151*89518a1cSdmick/ /* return result */ 152*89518a1cSdmick/ return (HILO(q1, q0)); 153*89518a1cSdmick/ 154*89518a1cSdmick/ } else if (HI(x) < HI(y)) { 155*89518a1cSdmick/ /* HI(x) < HI(y) => x < y => result is 0 */ 156*89518a1cSdmick/ 157*89518a1cSdmick/ /* return remainder */ 158*89518a1cSdmick/ *pmod = x; 159*89518a1cSdmick/ 160*89518a1cSdmick/ /* return result */ 161*89518a1cSdmick/ return (0); 162*89518a1cSdmick/ 163*89518a1cSdmick/ } else { 164*89518a1cSdmick/ /* 165*89518a1cSdmick/ * uint64_t by uint64_t division, resulting in a one-uint32_t 166*89518a1cSdmick/ * result 167*89518a1cSdmick/ */ 168*89518a1cSdmick/ uint32_t y0, y1; 169*89518a1cSdmick/ uint32_t x1, x0; 170*89518a1cSdmick/ uint32_t q0; 171*89518a1cSdmick/ uint32_t normshift; 172*89518a1cSdmick/ 173*89518a1cSdmick/ /* normalize by shifting x and y so MSB(y) == 1 */ 174*89518a1cSdmick/ HIBIT(HI(y), normshift); /* index of highest 1 bit */ 175*89518a1cSdmick/ normshift = 31 - normshift; 176*89518a1cSdmick/ 177*89518a1cSdmick/ if (normshift == 0) { 178*89518a1cSdmick/ /* no shifting needed, and x < 2*y so q <= 1 */ 179*89518a1cSdmick/ y1 = HI(y); 180*89518a1cSdmick/ y0 = LO(y); 181*89518a1cSdmick/ x1 = HI(x); 182*89518a1cSdmick/ x0 = LO(x); 183*89518a1cSdmick/ 184*89518a1cSdmick/ /* if x >= y then q = 1 (note x1 >= y1) */ 185*89518a1cSdmick/ if (x1 > y1 || x0 >= y0) { 186*89518a1cSdmick/ q0 = 1; 187*89518a1cSdmick/ /* subtract y from x to get remainder */ 188*89518a1cSdmick/ A_SUB2(y0, y1, x0, x1); 189*89518a1cSdmick/ } else { 190*89518a1cSdmick/ q0 = 0; 191*89518a1cSdmick/ } 192*89518a1cSdmick/ 193*89518a1cSdmick/ /* return remainder */ 194*89518a1cSdmick/ *pmod = HILO(x1, x0); 195*89518a1cSdmick/ 196*89518a1cSdmick/ /* return result */ 197*89518a1cSdmick/ return (q0); 198*89518a1cSdmick/ 199*89518a1cSdmick/ } else { 200*89518a1cSdmick/ /* 201*89518a1cSdmick/ * the last case: result is one uint32_t, but we need to 202*89518a1cSdmick/ * normalize 203*89518a1cSdmick/ */ 204*89518a1cSdmick/ uint64_t dt; 205*89518a1cSdmick/ uint32_t t0, t1, x2; 206*89518a1cSdmick/ 207*89518a1cSdmick/ /* normalize y */ 208*89518a1cSdmick/ dt = (y << normshift); 209*89518a1cSdmick/ y1 = HI(dt); 210*89518a1cSdmick/ y0 = LO(dt); 211*89518a1cSdmick/ 212*89518a1cSdmick/ /* normalize x (we need 3 uint32_ts!!!) */ 213*89518a1cSdmick/ x2 = (HI(x) >> (32 - normshift)); 214*89518a1cSdmick/ dt = (x << normshift); 215*89518a1cSdmick/ x1 = HI(dt); 216*89518a1cSdmick/ x0 = LO(dt); 217*89518a1cSdmick/ 218*89518a1cSdmick/ /* estimate q0, and reduce x to a two uint32_t value */ 219*89518a1cSdmick/ A_DIV32(x1, x2, y1, q0, x1); 220*89518a1cSdmick/ 221*89518a1cSdmick/ /* adjust q0 down if too high */ 222*89518a1cSdmick/ /* 223*89518a1cSdmick/ * because of the limited range of x2 we can only be 224*89518a1cSdmick/ * one off 225*89518a1cSdmick/ */ 226*89518a1cSdmick/ A_MUL32(y0, q0, t0, t1); 227*89518a1cSdmick/ if (t1 > x1 || (t1 == x1 && t0 > x0)) { 228*89518a1cSdmick/ q0--; 229*89518a1cSdmick/ A_SUB2(y0, y1, t0, t1); 230*89518a1cSdmick/ } 231*89518a1cSdmick/ /* return remainder */ 232*89518a1cSdmick/ /* subtract product from x to get remainder */ 233*89518a1cSdmick/ A_SUB2(t0, t1, x0, x1); 234*89518a1cSdmick/ *pmod = (HILO(x1, x0) >> normshift); 235*89518a1cSdmick/ 236*89518a1cSdmick/ /* return result */ 237*89518a1cSdmick/ return (q0); 238*89518a1cSdmick/ } 239*89518a1cSdmick/ } 240*89518a1cSdmick/ } 241*89518a1cSdmick ENTRY(UDivRem) 242*89518a1cSdmick pushl %ebp 243*89518a1cSdmick pushl %edi 244*89518a1cSdmick pushl %esi 245*89518a1cSdmick subl $48, %esp 246*89518a1cSdmick movl 68(%esp), %edi / y, 247*89518a1cSdmick testl %edi, %edi / tmp63 248*89518a1cSdmick movl %eax, 40(%esp) / x, x 249*89518a1cSdmick movl %edx, 44(%esp) / x, x 250*89518a1cSdmick movl %edi, %esi /, tmp62 251*89518a1cSdmick movl %edi, %ecx / tmp62, tmp63 252*89518a1cSdmick jne .LL2 253*89518a1cSdmick movl %edx, %eax /, tmp68 254*89518a1cSdmick cmpl 64(%esp), %eax / y, tmp68 255*89518a1cSdmick jae .LL21 256*89518a1cSdmick.LL4: 257*89518a1cSdmick movl 72(%esp), %ebp / pmod, 258*89518a1cSdmick xorl %esi, %esi / <result> 259*89518a1cSdmick movl 40(%esp), %eax / x, q0 260*89518a1cSdmick movl %ecx, %edi / <result>, <result> 261*89518a1cSdmick divl 64(%esp) / y 262*89518a1cSdmick movl %edx, (%ebp) / div_rem, 263*89518a1cSdmick xorl %edx, %edx / q0 264*89518a1cSdmick addl %eax, %esi / q0, <result> 265*89518a1cSdmick movl $0, 4(%ebp) 266*89518a1cSdmick adcl %edx, %edi / q0, <result> 267*89518a1cSdmick addl $48, %esp 268*89518a1cSdmick movl %esi, %eax / <result>, <result> 269*89518a1cSdmick popl %esi 270*89518a1cSdmick movl %edi, %edx / <result>, <result> 271*89518a1cSdmick popl %edi 272*89518a1cSdmick popl %ebp 273*89518a1cSdmick ret 274*89518a1cSdmick .align 16 275*89518a1cSdmick.LL2: 276*89518a1cSdmick movl 44(%esp), %eax / x, 277*89518a1cSdmick xorl %edx, %edx 278*89518a1cSdmick cmpl %esi, %eax / tmp62, tmp5 279*89518a1cSdmick movl %eax, 32(%esp) / tmp5, 280*89518a1cSdmick movl %edx, 36(%esp) 281*89518a1cSdmick jae .LL6 282*89518a1cSdmick movl 72(%esp), %esi / pmod, 283*89518a1cSdmick movl 40(%esp), %ebp / x, 284*89518a1cSdmick movl 44(%esp), %ecx / x, 285*89518a1cSdmick movl %ebp, (%esi) 286*89518a1cSdmick movl %ecx, 4(%esi) 287*89518a1cSdmick xorl %edi, %edi / <result> 288*89518a1cSdmick xorl %esi, %esi / <result> 289*89518a1cSdmick.LL22: 290*89518a1cSdmick addl $48, %esp 291*89518a1cSdmick movl %esi, %eax / <result>, <result> 292*89518a1cSdmick popl %esi 293*89518a1cSdmick movl %edi, %edx / <result>, <result> 294*89518a1cSdmick popl %edi 295*89518a1cSdmick popl %ebp 296*89518a1cSdmick ret 297*89518a1cSdmick .align 16 298*89518a1cSdmick.LL21: 299*89518a1cSdmick movl %edi, %edx / tmp63, div_hi 300*89518a1cSdmick divl 64(%esp) / y 301*89518a1cSdmick movl %eax, %ecx /, q1 302*89518a1cSdmick jmp .LL4 303*89518a1cSdmick .align 16 304*89518a1cSdmick.LL6: 305*89518a1cSdmick movl $31, %edi /, tmp87 306*89518a1cSdmick bsrl %esi,%edx / tmp62, normshift 307*89518a1cSdmick subl %edx, %edi / normshift, tmp87 308*89518a1cSdmick movl %edi, 28(%esp) / tmp87, 309*89518a1cSdmick jne .LL8 310*89518a1cSdmick movl 32(%esp), %edx /, x1 311*89518a1cSdmick cmpl %ecx, %edx / y1, x1 312*89518a1cSdmick movl 64(%esp), %edi / y, y0 313*89518a1cSdmick movl 40(%esp), %esi / x, x0 314*89518a1cSdmick ja .LL10 315*89518a1cSdmick xorl %ebp, %ebp / q0 316*89518a1cSdmick cmpl %edi, %esi / y0, x0 317*89518a1cSdmick jb .LL11 318*89518a1cSdmick.LL10: 319*89518a1cSdmick movl $1, %ebp /, q0 320*89518a1cSdmick subl %edi,%esi / y0, x0 321*89518a1cSdmick sbbl %ecx,%edx / tmp63, x1 322*89518a1cSdmick.LL11: 323*89518a1cSdmick movl %edx, %ecx / x1, x1 324*89518a1cSdmick xorl %edx, %edx / x1 325*89518a1cSdmick xorl %edi, %edi / x0 326*89518a1cSdmick addl %esi, %edx / x0, x1 327*89518a1cSdmick adcl %edi, %ecx / x0, x1 328*89518a1cSdmick movl 72(%esp), %esi / pmod, 329*89518a1cSdmick movl %edx, (%esi) / x1, 330*89518a1cSdmick movl %ecx, 4(%esi) / x1, 331*89518a1cSdmick xorl %edi, %edi / <result> 332*89518a1cSdmick movl %ebp, %esi / q0, <result> 333*89518a1cSdmick jmp .LL22 334*89518a1cSdmick .align 16 335*89518a1cSdmick.LL8: 336*89518a1cSdmick movb 28(%esp), %cl 337*89518a1cSdmick movl 64(%esp), %esi / y, dt 338*89518a1cSdmick movl 68(%esp), %edi / y, dt 339*89518a1cSdmick shldl %esi, %edi /, dt, dt 340*89518a1cSdmick sall %cl, %esi /, dt 341*89518a1cSdmick andl $32, %ecx 342*89518a1cSdmick jne .LL23 343*89518a1cSdmick.LL17: 344*89518a1cSdmick movl $32, %ecx /, tmp102 345*89518a1cSdmick subl 28(%esp), %ecx /, tmp102 346*89518a1cSdmick movl %esi, %ebp / dt, y0 347*89518a1cSdmick movl 32(%esp), %esi 348*89518a1cSdmick shrl %cl, %esi / tmp102, 349*89518a1cSdmick movl %edi, 24(%esp) / tmp99, 350*89518a1cSdmick movb 28(%esp), %cl 351*89518a1cSdmick movl %esi, 12(%esp) /, x2 352*89518a1cSdmick movl 44(%esp), %edi / x, dt 353*89518a1cSdmick movl 40(%esp), %esi / x, dt 354*89518a1cSdmick shldl %esi, %edi /, dt, dt 355*89518a1cSdmick sall %cl, %esi /, dt 356*89518a1cSdmick andl $32, %ecx 357*89518a1cSdmick je .LL18 358*89518a1cSdmick movl %esi, %edi / dt, dt 359*89518a1cSdmick xorl %esi, %esi / dt 360*89518a1cSdmick.LL18: 361*89518a1cSdmick movl %edi, %ecx / dt, 362*89518a1cSdmick movl %edi, %eax / tmp2, 363*89518a1cSdmick movl %ecx, (%esp) 364*89518a1cSdmick movl 12(%esp), %edx / x2, 365*89518a1cSdmick divl 24(%esp) 366*89518a1cSdmick movl %edx, %ecx /, x1 367*89518a1cSdmick xorl %edi, %edi 368*89518a1cSdmick movl %eax, 20(%esp) 369*89518a1cSdmick movl %ebp, %eax / y0, t0 370*89518a1cSdmick mull 20(%esp) 371*89518a1cSdmick cmpl %ecx, %edx / x1, t1 372*89518a1cSdmick movl %edi, 4(%esp) 373*89518a1cSdmick ja .LL14 374*89518a1cSdmick je .LL24 375*89518a1cSdmick.LL15: 376*89518a1cSdmick movl %ecx, %edi / x1, 377*89518a1cSdmick subl %eax,%esi / t0, x0 378*89518a1cSdmick sbbl %edx,%edi / t1, 379*89518a1cSdmick movl %edi, %eax /, x1 380*89518a1cSdmick movl %eax, %edx / x1, x1 381*89518a1cSdmick xorl %eax, %eax / x1 382*89518a1cSdmick xorl %ebp, %ebp / x0 383*89518a1cSdmick addl %esi, %eax / x0, x1 384*89518a1cSdmick adcl %ebp, %edx / x0, x1 385*89518a1cSdmick movb 28(%esp), %cl 386*89518a1cSdmick shrdl %edx, %eax /, x1, x1 387*89518a1cSdmick shrl %cl, %edx /, x1 388*89518a1cSdmick andl $32, %ecx 389*89518a1cSdmick je .LL16 390*89518a1cSdmick movl %edx, %eax / x1, x1 391*89518a1cSdmick xorl %edx, %edx / x1 392*89518a1cSdmick.LL16: 393*89518a1cSdmick movl 72(%esp), %ecx / pmod, 394*89518a1cSdmick movl 20(%esp), %esi /, <result> 395*89518a1cSdmick xorl %edi, %edi / <result> 396*89518a1cSdmick movl %eax, (%ecx) / x1, 397*89518a1cSdmick movl %edx, 4(%ecx) / x1, 398*89518a1cSdmick jmp .LL22 399*89518a1cSdmick .align 16 400*89518a1cSdmick.LL24: 401*89518a1cSdmick cmpl %esi, %eax / x0, t0 402*89518a1cSdmick jbe .LL15 403*89518a1cSdmick.LL14: 404*89518a1cSdmick decl 20(%esp) 405*89518a1cSdmick subl %ebp,%eax / y0, t0 406*89518a1cSdmick sbbl 24(%esp),%edx /, t1 407*89518a1cSdmick jmp .LL15 408*89518a1cSdmick.LL23: 409*89518a1cSdmick movl %esi, %edi / dt, dt 410*89518a1cSdmick xorl %esi, %esi / dt 411*89518a1cSdmick jmp .LL17 412*89518a1cSdmick SET_SIZE(UDivRem) 413*89518a1cSdmick 414*89518a1cSdmick/* 415*89518a1cSdmick * Unsigned division without remainder. 416*89518a1cSdmick */ 417*89518a1cSdmick/ uint64_t 418*89518a1cSdmick/ UDiv(uint64_t x, uint64_t y) 419*89518a1cSdmick/ { 420*89518a1cSdmick/ if (HI(y) == 0) { 421*89518a1cSdmick/ /* simple cases: y is a single uint32_t */ 422*89518a1cSdmick/ uint32_t div_hi, div_rem; 423*89518a1cSdmick/ uint32_t q0, q1; 424*89518a1cSdmick/ 425*89518a1cSdmick/ /* calculate q1 */ 426*89518a1cSdmick/ if (HI(x) < LO(y)) { 427*89518a1cSdmick/ /* result is a single uint32_t, use one division */ 428*89518a1cSdmick/ q1 = 0; 429*89518a1cSdmick/ div_hi = HI(x); 430*89518a1cSdmick/ } else { 431*89518a1cSdmick/ /* result is a double uint32_t, use two divisions */ 432*89518a1cSdmick/ A_DIV32(HI(x), 0, LO(y), q1, div_hi); 433*89518a1cSdmick/ } 434*89518a1cSdmick/ 435*89518a1cSdmick/ /* calculate q0 and remainder */ 436*89518a1cSdmick/ A_DIV32(LO(x), div_hi, LO(y), q0, div_rem); 437*89518a1cSdmick/ 438*89518a1cSdmick/ /* return result */ 439*89518a1cSdmick/ return (HILO(q1, q0)); 440*89518a1cSdmick/ 441*89518a1cSdmick/ } else if (HI(x) < HI(y)) { 442*89518a1cSdmick/ /* HI(x) < HI(y) => x < y => result is 0 */ 443*89518a1cSdmick/ 444*89518a1cSdmick/ /* return result */ 445*89518a1cSdmick/ return (0); 446*89518a1cSdmick/ 447*89518a1cSdmick/ } else { 448*89518a1cSdmick/ /* 449*89518a1cSdmick/ * uint64_t by uint64_t division, resulting in a one-uint32_t 450*89518a1cSdmick/ * result 451*89518a1cSdmick/ */ 452*89518a1cSdmick/ uint32_t y0, y1; 453*89518a1cSdmick/ uint32_t x1, x0; 454*89518a1cSdmick/ uint32_t q0; 455*89518a1cSdmick/ unsigned normshift; 456*89518a1cSdmick/ 457*89518a1cSdmick/ /* normalize by shifting x and y so MSB(y) == 1 */ 458*89518a1cSdmick/ HIBIT(HI(y), normshift); /* index of highest 1 bit */ 459*89518a1cSdmick/ normshift = 31 - normshift; 460*89518a1cSdmick/ 461*89518a1cSdmick/ if (normshift == 0) { 462*89518a1cSdmick/ /* no shifting needed, and x < 2*y so q <= 1 */ 463*89518a1cSdmick/ y1 = HI(y); 464*89518a1cSdmick/ y0 = LO(y); 465*89518a1cSdmick/ x1 = HI(x); 466*89518a1cSdmick/ x0 = LO(x); 467*89518a1cSdmick/ 468*89518a1cSdmick/ /* if x >= y then q = 1 (note x1 >= y1) */ 469*89518a1cSdmick/ if (x1 > y1 || x0 >= y0) { 470*89518a1cSdmick/ q0 = 1; 471*89518a1cSdmick/ /* subtract y from x to get remainder */ 472*89518a1cSdmick/ /* A_SUB2(y0, y1, x0, x1); */ 473*89518a1cSdmick/ } else { 474*89518a1cSdmick/ q0 = 0; 475*89518a1cSdmick/ } 476*89518a1cSdmick/ 477*89518a1cSdmick/ /* return result */ 478*89518a1cSdmick/ return (q0); 479*89518a1cSdmick/ 480*89518a1cSdmick/ } else { 481*89518a1cSdmick/ /* 482*89518a1cSdmick/ * the last case: result is one uint32_t, but we need to 483*89518a1cSdmick/ * normalize 484*89518a1cSdmick/ */ 485*89518a1cSdmick/ uint64_t dt; 486*89518a1cSdmick/ uint32_t t0, t1, x2; 487*89518a1cSdmick/ 488*89518a1cSdmick/ /* normalize y */ 489*89518a1cSdmick/ dt = (y << normshift); 490*89518a1cSdmick/ y1 = HI(dt); 491*89518a1cSdmick/ y0 = LO(dt); 492*89518a1cSdmick/ 493*89518a1cSdmick/ /* normalize x (we need 3 uint32_ts!!!) */ 494*89518a1cSdmick/ x2 = (HI(x) >> (32 - normshift)); 495*89518a1cSdmick/ dt = (x << normshift); 496*89518a1cSdmick/ x1 = HI(dt); 497*89518a1cSdmick/ x0 = LO(dt); 498*89518a1cSdmick/ 499*89518a1cSdmick/ /* estimate q0, and reduce x to a two uint32_t value */ 500*89518a1cSdmick/ A_DIV32(x1, x2, y1, q0, x1); 501*89518a1cSdmick/ 502*89518a1cSdmick/ /* adjust q0 down if too high */ 503*89518a1cSdmick/ /* 504*89518a1cSdmick/ * because of the limited range of x2 we can only be 505*89518a1cSdmick/ * one off 506*89518a1cSdmick/ */ 507*89518a1cSdmick/ A_MUL32(y0, q0, t0, t1); 508*89518a1cSdmick/ if (t1 > x1 || (t1 == x1 && t0 > x0)) { 509*89518a1cSdmick/ q0--; 510*89518a1cSdmick/ } 511*89518a1cSdmick/ /* return result */ 512*89518a1cSdmick/ return (q0); 513*89518a1cSdmick/ } 514*89518a1cSdmick/ } 515*89518a1cSdmick/ } 516*89518a1cSdmick ENTRY(UDiv) 517*89518a1cSdmick pushl %ebp 518*89518a1cSdmick pushl %edi 519*89518a1cSdmick pushl %esi 520*89518a1cSdmick subl $40, %esp 521*89518a1cSdmick movl %edx, 36(%esp) / x, x 522*89518a1cSdmick movl 60(%esp), %edx / y, 523*89518a1cSdmick testl %edx, %edx / tmp62 524*89518a1cSdmick movl %eax, 32(%esp) / x, x 525*89518a1cSdmick movl %edx, %ecx / tmp61, tmp62 526*89518a1cSdmick movl %edx, %eax /, tmp61 527*89518a1cSdmick jne .LL26 528*89518a1cSdmick movl 36(%esp), %esi / x, 529*89518a1cSdmick cmpl 56(%esp), %esi / y, tmp67 530*89518a1cSdmick movl %esi, %eax /, tmp67 531*89518a1cSdmick movl %esi, %edx / tmp67, div_hi 532*89518a1cSdmick jb .LL28 533*89518a1cSdmick movl %ecx, %edx / tmp62, div_hi 534*89518a1cSdmick divl 56(%esp) / y 535*89518a1cSdmick movl %eax, %ecx /, q1 536*89518a1cSdmick.LL28: 537*89518a1cSdmick xorl %esi, %esi / <result> 538*89518a1cSdmick movl %ecx, %edi / <result>, <result> 539*89518a1cSdmick movl 32(%esp), %eax / x, q0 540*89518a1cSdmick xorl %ecx, %ecx / q0 541*89518a1cSdmick divl 56(%esp) / y 542*89518a1cSdmick addl %eax, %esi / q0, <result> 543*89518a1cSdmick adcl %ecx, %edi / q0, <result> 544*89518a1cSdmick.LL25: 545*89518a1cSdmick addl $40, %esp 546*89518a1cSdmick movl %esi, %eax / <result>, <result> 547*89518a1cSdmick popl %esi 548*89518a1cSdmick movl %edi, %edx / <result>, <result> 549*89518a1cSdmick popl %edi 550*89518a1cSdmick popl %ebp 551*89518a1cSdmick ret 552*89518a1cSdmick .align 16 553*89518a1cSdmick.LL26: 554*89518a1cSdmick movl 36(%esp), %esi / x, 555*89518a1cSdmick xorl %edi, %edi 556*89518a1cSdmick movl %esi, 24(%esp) / tmp1, 557*89518a1cSdmick movl %edi, 28(%esp) 558*89518a1cSdmick xorl %esi, %esi / <result> 559*89518a1cSdmick xorl %edi, %edi / <result> 560*89518a1cSdmick cmpl %eax, 24(%esp) / tmp61, 561*89518a1cSdmick jb .LL25 562*89518a1cSdmick bsrl %eax,%ebp / tmp61, normshift 563*89518a1cSdmick movl $31, %eax /, tmp85 564*89518a1cSdmick subl %ebp, %eax / normshift, normshift 565*89518a1cSdmick jne .LL32 566*89518a1cSdmick movl 24(%esp), %eax /, x1 567*89518a1cSdmick cmpl %ecx, %eax / tmp62, x1 568*89518a1cSdmick movl 56(%esp), %esi / y, y0 569*89518a1cSdmick movl 32(%esp), %edx / x, x0 570*89518a1cSdmick ja .LL34 571*89518a1cSdmick xorl %eax, %eax / q0 572*89518a1cSdmick cmpl %esi, %edx / y0, x0 573*89518a1cSdmick jb .LL35 574*89518a1cSdmick.LL34: 575*89518a1cSdmick movl $1, %eax /, q0 576*89518a1cSdmick.LL35: 577*89518a1cSdmick movl %eax, %esi / q0, <result> 578*89518a1cSdmick xorl %edi, %edi / <result> 579*89518a1cSdmick.LL45: 580*89518a1cSdmick addl $40, %esp 581*89518a1cSdmick movl %esi, %eax / <result>, <result> 582*89518a1cSdmick popl %esi 583*89518a1cSdmick movl %edi, %edx / <result>, <result> 584*89518a1cSdmick popl %edi 585*89518a1cSdmick popl %ebp 586*89518a1cSdmick ret 587*89518a1cSdmick .align 16 588*89518a1cSdmick.LL32: 589*89518a1cSdmick movb %al, %cl 590*89518a1cSdmick movl 56(%esp), %esi / y, 591*89518a1cSdmick movl 60(%esp), %edi / y, 592*89518a1cSdmick shldl %esi, %edi 593*89518a1cSdmick sall %cl, %esi 594*89518a1cSdmick andl $32, %ecx 595*89518a1cSdmick jne .LL43 596*89518a1cSdmick.LL40: 597*89518a1cSdmick movl $32, %ecx /, tmp96 598*89518a1cSdmick subl %eax, %ecx / normshift, tmp96 599*89518a1cSdmick movl %edi, %edx 600*89518a1cSdmick movl %edi, 20(%esp) /, dt 601*89518a1cSdmick movl 24(%esp), %ebp /, x2 602*89518a1cSdmick xorl %edi, %edi 603*89518a1cSdmick shrl %cl, %ebp / tmp96, x2 604*89518a1cSdmick movl %esi, 16(%esp) /, dt 605*89518a1cSdmick movb %al, %cl 606*89518a1cSdmick movl 32(%esp), %esi / x, dt 607*89518a1cSdmick movl %edi, 12(%esp) 608*89518a1cSdmick movl 36(%esp), %edi / x, dt 609*89518a1cSdmick shldl %esi, %edi /, dt, dt 610*89518a1cSdmick sall %cl, %esi /, dt 611*89518a1cSdmick andl $32, %ecx 612*89518a1cSdmick movl %edx, 8(%esp) 613*89518a1cSdmick je .LL41 614*89518a1cSdmick movl %esi, %edi / dt, dt 615*89518a1cSdmick xorl %esi, %esi / dt 616*89518a1cSdmick.LL41: 617*89518a1cSdmick xorl %ecx, %ecx 618*89518a1cSdmick movl %edi, %eax / tmp1, 619*89518a1cSdmick movl %ebp, %edx / x2, 620*89518a1cSdmick divl 8(%esp) 621*89518a1cSdmick movl %edx, %ebp /, x1 622*89518a1cSdmick movl %ecx, 4(%esp) 623*89518a1cSdmick movl %eax, %ecx /, q0 624*89518a1cSdmick movl 16(%esp), %eax / dt, 625*89518a1cSdmick mull %ecx / q0 626*89518a1cSdmick cmpl %ebp, %edx / x1, t1 627*89518a1cSdmick movl %edi, (%esp) 628*89518a1cSdmick movl %esi, %edi / dt, x0 629*89518a1cSdmick ja .LL38 630*89518a1cSdmick je .LL44 631*89518a1cSdmick.LL39: 632*89518a1cSdmick movl %ecx, %esi / q0, <result> 633*89518a1cSdmick.LL46: 634*89518a1cSdmick xorl %edi, %edi / <result> 635*89518a1cSdmick jmp .LL45 636*89518a1cSdmick.LL44: 637*89518a1cSdmick cmpl %edi, %eax / x0, t0 638*89518a1cSdmick jbe .LL39 639*89518a1cSdmick.LL38: 640*89518a1cSdmick decl %ecx / q0 641*89518a1cSdmick movl %ecx, %esi / q0, <result> 642*89518a1cSdmick jmp .LL46 643*89518a1cSdmick.LL43: 644*89518a1cSdmick movl %esi, %edi 645*89518a1cSdmick xorl %esi, %esi 646*89518a1cSdmick jmp .LL40 647*89518a1cSdmick SET_SIZE(UDiv) 648*89518a1cSdmick 649*89518a1cSdmick/* 650*89518a1cSdmick * __udiv64 651*89518a1cSdmick * 652*89518a1cSdmick * Perform division of two unsigned 64-bit quantities, returning the 653*89518a1cSdmick * quotient in %edx:%eax. __udiv64 pops the arguments on return, 654*89518a1cSdmick */ 655*89518a1cSdmick ENTRY(__udiv64) 656*89518a1cSdmick movl 4(%esp), %eax / x, x 657*89518a1cSdmick movl 8(%esp), %edx / x, x 658*89518a1cSdmick pushl 16(%esp) / y 659*89518a1cSdmick pushl 16(%esp) 660*89518a1cSdmick call UDiv 661*89518a1cSdmick addl $8, %esp 662*89518a1cSdmick ret $16 663*89518a1cSdmick SET_SIZE(__udiv64) 664*89518a1cSdmick 665*89518a1cSdmick/* 666*89518a1cSdmick * __urem64 667*89518a1cSdmick * 668*89518a1cSdmick * Perform division of two unsigned 64-bit quantities, returning the 669*89518a1cSdmick * remainder in %edx:%eax. __urem64 pops the arguments on return 670*89518a1cSdmick */ 671*89518a1cSdmick ENTRY(__urem64) 672*89518a1cSdmick subl $12, %esp 673*89518a1cSdmick movl %esp, %ecx /, tmp65 674*89518a1cSdmick movl 16(%esp), %eax / x, x 675*89518a1cSdmick movl 20(%esp), %edx / x, x 676*89518a1cSdmick pushl %ecx / tmp65 677*89518a1cSdmick pushl 32(%esp) / y 678*89518a1cSdmick pushl 32(%esp) 679*89518a1cSdmick call UDivRem 680*89518a1cSdmick movl 12(%esp), %eax / rem, rem 681*89518a1cSdmick movl 16(%esp), %edx / rem, rem 682*89518a1cSdmick addl $24, %esp 683*89518a1cSdmick ret $16 684*89518a1cSdmick SET_SIZE(__urem64) 685*89518a1cSdmick 686*89518a1cSdmick/* 687*89518a1cSdmick * __div64 688*89518a1cSdmick * 689*89518a1cSdmick * Perform division of two signed 64-bit quantities, returning the 690*89518a1cSdmick * quotient in %edx:%eax. __div64 pops the arguments on return. 691*89518a1cSdmick */ 692*89518a1cSdmick/ int64_t 693*89518a1cSdmick/ __div64(int64_t x, int64_t y) 694*89518a1cSdmick/ { 695*89518a1cSdmick/ int negative; 696*89518a1cSdmick/ uint64_t xt, yt, r; 697*89518a1cSdmick/ 698*89518a1cSdmick/ if (x < 0) { 699*89518a1cSdmick/ xt = -(uint64_t) x; 700*89518a1cSdmick/ negative = 1; 701*89518a1cSdmick/ } else { 702*89518a1cSdmick/ xt = x; 703*89518a1cSdmick/ negative = 0; 704*89518a1cSdmick/ } 705*89518a1cSdmick/ if (y < 0) { 706*89518a1cSdmick/ yt = -(uint64_t) y; 707*89518a1cSdmick/ negative ^= 1; 708*89518a1cSdmick/ } else { 709*89518a1cSdmick/ yt = y; 710*89518a1cSdmick/ } 711*89518a1cSdmick/ r = UDiv(xt, yt); 712*89518a1cSdmick/ return (negative ? (int64_t) - r : r); 713*89518a1cSdmick/ } 714*89518a1cSdmick ENTRY(__div64) 715*89518a1cSdmick pushl %ebp 716*89518a1cSdmick pushl %edi 717*89518a1cSdmick pushl %esi 718*89518a1cSdmick subl $8, %esp 719*89518a1cSdmick movl 28(%esp), %edx / x, x 720*89518a1cSdmick testl %edx, %edx / x 721*89518a1cSdmick movl 24(%esp), %eax / x, x 722*89518a1cSdmick movl 32(%esp), %esi / y, y 723*89518a1cSdmick movl 36(%esp), %edi / y, y 724*89518a1cSdmick js .LL84 725*89518a1cSdmick xorl %ebp, %ebp / negative 726*89518a1cSdmick testl %edi, %edi / y 727*89518a1cSdmick movl %eax, (%esp) / x, xt 728*89518a1cSdmick movl %edx, 4(%esp) / x, xt 729*89518a1cSdmick movl %esi, %eax / y, yt 730*89518a1cSdmick movl %edi, %edx / y, yt 731*89518a1cSdmick js .LL85 732*89518a1cSdmick.LL82: 733*89518a1cSdmick pushl %edx / yt 734*89518a1cSdmick pushl %eax / yt 735*89518a1cSdmick movl 8(%esp), %eax / xt, xt 736*89518a1cSdmick movl 12(%esp), %edx / xt, xt 737*89518a1cSdmick call UDiv 738*89518a1cSdmick popl %ecx 739*89518a1cSdmick testl %ebp, %ebp / negative 740*89518a1cSdmick popl %esi 741*89518a1cSdmick je .LL83 742*89518a1cSdmick negl %eax / r 743*89518a1cSdmick adcl $0, %edx /, r 744*89518a1cSdmick negl %edx / r 745*89518a1cSdmick.LL83: 746*89518a1cSdmick addl $8, %esp 747*89518a1cSdmick popl %esi 748*89518a1cSdmick popl %edi 749*89518a1cSdmick popl %ebp 750*89518a1cSdmick ret $16 751*89518a1cSdmick .align 16 752*89518a1cSdmick.LL84: 753*89518a1cSdmick negl %eax / x 754*89518a1cSdmick adcl $0, %edx /, x 755*89518a1cSdmick negl %edx / x 756*89518a1cSdmick testl %edi, %edi / y 757*89518a1cSdmick movl %eax, (%esp) / x, xt 758*89518a1cSdmick movl %edx, 4(%esp) / x, xt 759*89518a1cSdmick movl $1, %ebp /, negative 760*89518a1cSdmick movl %esi, %eax / y, yt 761*89518a1cSdmick movl %edi, %edx / y, yt 762*89518a1cSdmick jns .LL82 763*89518a1cSdmick .align 16 764*89518a1cSdmick.LL85: 765*89518a1cSdmick negl %eax / yt 766*89518a1cSdmick adcl $0, %edx /, yt 767*89518a1cSdmick negl %edx / yt 768*89518a1cSdmick xorl $1, %ebp /, negative 769*89518a1cSdmick jmp .LL82 770*89518a1cSdmick SET_SIZE(__div64) 771*89518a1cSdmick 772*89518a1cSdmick/* 773*89518a1cSdmick * __rem64 774*89518a1cSdmick * 775*89518a1cSdmick * Perform division of two signed 64-bit quantities, returning the 776*89518a1cSdmick * remainder in %edx:%eax. __rem64 pops the arguments on return. 777*89518a1cSdmick */ 778*89518a1cSdmick/ int64_t 779*89518a1cSdmick/ __rem64(int64_t x, int64_t y) 780*89518a1cSdmick/ { 781*89518a1cSdmick/ uint64_t xt, yt, rem; 782*89518a1cSdmick/ 783*89518a1cSdmick/ if (x < 0) { 784*89518a1cSdmick/ xt = -(uint64_t) x; 785*89518a1cSdmick/ } else { 786*89518a1cSdmick/ xt = x; 787*89518a1cSdmick/ } 788*89518a1cSdmick/ if (y < 0) { 789*89518a1cSdmick/ yt = -(uint64_t) y; 790*89518a1cSdmick/ } else { 791*89518a1cSdmick/ yt = y; 792*89518a1cSdmick/ } 793*89518a1cSdmick/ (void) UDivRem(xt, yt, &rem); 794*89518a1cSdmick/ return (x < 0 ? (int64_t) - rem : rem); 795*89518a1cSdmick/ } 796*89518a1cSdmick ENTRY(__rem64) 797*89518a1cSdmick pushl %edi 798*89518a1cSdmick pushl %esi 799*89518a1cSdmick subl $20, %esp 800*89518a1cSdmick movl 36(%esp), %ecx / x, 801*89518a1cSdmick movl 32(%esp), %esi / x, 802*89518a1cSdmick movl 36(%esp), %edi / x, 803*89518a1cSdmick testl %ecx, %ecx 804*89518a1cSdmick movl 40(%esp), %eax / y, y 805*89518a1cSdmick movl 44(%esp), %edx / y, y 806*89518a1cSdmick movl %esi, (%esp) /, xt 807*89518a1cSdmick movl %edi, 4(%esp) /, xt 808*89518a1cSdmick js .LL92 809*89518a1cSdmick testl %edx, %edx / y 810*89518a1cSdmick movl %eax, %esi / y, yt 811*89518a1cSdmick movl %edx, %edi / y, yt 812*89518a1cSdmick js .LL93 813*89518a1cSdmick.LL90: 814*89518a1cSdmick leal 8(%esp), %eax /, tmp66 815*89518a1cSdmick pushl %eax / tmp66 816*89518a1cSdmick pushl %edi / yt 817*89518a1cSdmick pushl %esi / yt 818*89518a1cSdmick movl 12(%esp), %eax / xt, xt 819*89518a1cSdmick movl 16(%esp), %edx / xt, xt 820*89518a1cSdmick call UDivRem 821*89518a1cSdmick addl $12, %esp 822*89518a1cSdmick movl 36(%esp), %edi / x, 823*89518a1cSdmick testl %edi, %edi 824*89518a1cSdmick movl 8(%esp), %eax / rem, rem 825*89518a1cSdmick movl 12(%esp), %edx / rem, rem 826*89518a1cSdmick js .LL94 827*89518a1cSdmick addl $20, %esp 828*89518a1cSdmick popl %esi 829*89518a1cSdmick popl %edi 830*89518a1cSdmick ret $16 831*89518a1cSdmick .align 16 832*89518a1cSdmick.LL92: 833*89518a1cSdmick negl %esi 834*89518a1cSdmick adcl $0, %edi 835*89518a1cSdmick negl %edi 836*89518a1cSdmick testl %edx, %edx / y 837*89518a1cSdmick movl %esi, (%esp) /, xt 838*89518a1cSdmick movl %edi, 4(%esp) /, xt 839*89518a1cSdmick movl %eax, %esi / y, yt 840*89518a1cSdmick movl %edx, %edi / y, yt 841*89518a1cSdmick jns .LL90 842*89518a1cSdmick .align 16 843*89518a1cSdmick.LL93: 844*89518a1cSdmick negl %esi / yt 845*89518a1cSdmick adcl $0, %edi /, yt 846*89518a1cSdmick negl %edi / yt 847*89518a1cSdmick jmp .LL90 848*89518a1cSdmick .align 16 849*89518a1cSdmick.LL94: 850*89518a1cSdmick negl %eax / rem 851*89518a1cSdmick adcl $0, %edx /, rem 852*89518a1cSdmick addl $20, %esp 853*89518a1cSdmick popl %esi 854*89518a1cSdmick negl %edx / rem 855*89518a1cSdmick popl %edi 856*89518a1cSdmick ret $16 857*89518a1cSdmick SET_SIZE(__rem64) 858*89518a1cSdmick 859*89518a1cSdmick#endif /* __lint */ 860*89518a1cSdmick 861*89518a1cSdmick#if defined(__lint) 862*89518a1cSdmick 863*89518a1cSdmick/* 864*89518a1cSdmick * C support for 64-bit modulo and division. 865*89518a1cSdmick * GNU routines callable from C (though generated by the compiler). 866*89518a1cSdmick * Hand-customized compiler output - see comments for details. 867*89518a1cSdmick */ 868*89518a1cSdmick/*ARGSUSED*/ 869*89518a1cSdmickunsigned long long 870*89518a1cSdmick__udivdi3(unsigned long long a, unsigned long long b) 871*89518a1cSdmick{ return (0); } 872*89518a1cSdmick 873*89518a1cSdmick/*ARGSUSED*/ 874*89518a1cSdmickunsigned long long 875*89518a1cSdmick__umoddi3(unsigned long long a, unsigned long long b) 876*89518a1cSdmick{ return (0); } 877*89518a1cSdmick 878*89518a1cSdmick/*ARGSUSED*/ 879*89518a1cSdmicklong long 880*89518a1cSdmick__divdi3(long long a, long long b) 881*89518a1cSdmick{ return (0); } 882*89518a1cSdmick 883*89518a1cSdmick/*ARGSUSED*/ 884*89518a1cSdmicklong long 885*89518a1cSdmick__moddi3(long long a, long long b) 886*89518a1cSdmick{ return (0); } 887*89518a1cSdmick 888*89518a1cSdmick/* ARGSUSED */ 889*89518a1cSdmickint64_t __divrem64(int64_t a, int64_t b) 890*89518a1cSdmick{ return (0); } 891*89518a1cSdmick 892*89518a1cSdmick/* ARGSUSED */ 893*89518a1cSdmickuint64_t __udivrem64(uint64_t a, uint64_t b) 894*89518a1cSdmick{ return (0); } 895*89518a1cSdmick 896*89518a1cSdmick#else /* __lint */ 897*89518a1cSdmick 898*89518a1cSdmick/* 899*89518a1cSdmick * int32_t/int64_t division/manipulation 900*89518a1cSdmick * 901*89518a1cSdmick * Hand-customized compiler output: the non-GCC entry points depart from 902*89518a1cSdmick * the SYS V ABI by requiring their arguments to be popped, and in the 903*89518a1cSdmick * [u]divrem64 cases returning the remainder in %ecx:%esi. Note the 904*89518a1cSdmick * compiler-generated use of %edx:%eax for the first argument of 905*89518a1cSdmick * internal entry points. 906*89518a1cSdmick * 907*89518a1cSdmick * Inlines for speed: 908*89518a1cSdmick * - counting the number of leading zeros in a word 909*89518a1cSdmick * - multiplying two 32-bit numbers giving a 64-bit result 910*89518a1cSdmick * - dividing a 64-bit number by a 32-bit number, giving both quotient 911*89518a1cSdmick * and remainder 912*89518a1cSdmick * - subtracting two 64-bit results 913*89518a1cSdmick */ 914*89518a1cSdmick/ #define LO(X) ((uint32_t)(X) & 0xffffffff) 915*89518a1cSdmick/ #define HI(X) ((uint32_t)((X) >> 32) & 0xffffffff) 916*89518a1cSdmick/ #define HILO(H, L) (((uint64_t)(H) << 32) + (L)) 917*89518a1cSdmick/ 918*89518a1cSdmick/ /* give index of highest bit */ 919*89518a1cSdmick/ #define HIBIT(a, r) \ 920*89518a1cSdmick/ asm("bsrl %1,%0": "=r"((uint32_t)(r)) : "g" (a)) 921*89518a1cSdmick/ 922*89518a1cSdmick/ /* multiply two uint32_ts resulting in a uint64_t */ 923*89518a1cSdmick/ #define A_MUL32(a, b, lo, hi) \ 924*89518a1cSdmick/ asm("mull %2" \ 925*89518a1cSdmick/ : "=a"((uint32_t)(lo)), "=d"((uint32_t)(hi)) : "g" (b), "0"(a)) 926*89518a1cSdmick/ 927*89518a1cSdmick/ /* divide a uint64_t by a uint32_t */ 928*89518a1cSdmick/ #define A_DIV32(lo, hi, b, q, r) \ 929*89518a1cSdmick/ asm("divl %2" \ 930*89518a1cSdmick/ : "=a"((uint32_t)(q)), "=d"((uint32_t)(r)) \ 931*89518a1cSdmick/ : "g" (b), "0"((uint32_t)(lo)), "1"((uint32_t)hi)) 932*89518a1cSdmick/ 933*89518a1cSdmick/ /* subtract two uint64_ts (with borrow) */ 934*89518a1cSdmick/ #define A_SUB2(bl, bh, al, ah) \ 935*89518a1cSdmick/ asm("subl %4,%0\n\tsbbl %5,%1" \ 936*89518a1cSdmick/ : "=&r"((uint32_t)(al)), "=r"((uint32_t)(ah)) \ 937*89518a1cSdmick/ : "0"((uint32_t)(al)), "1"((uint32_t)(ah)), "g"((uint32_t)(bl)), \ 938*89518a1cSdmick/ "g"((uint32_t)(bh))) 939*89518a1cSdmick 940*89518a1cSdmick/* 941*89518a1cSdmick * __udivdi3 942*89518a1cSdmick * 943*89518a1cSdmick * Perform division of two unsigned 64-bit quantities, returning the 944*89518a1cSdmick * quotient in %edx:%eax. 945*89518a1cSdmick */ 946*89518a1cSdmick ENTRY(__udivdi3) 947*89518a1cSdmick movl 4(%esp), %eax / x, x 948*89518a1cSdmick movl 8(%esp), %edx / x, x 949*89518a1cSdmick pushl 16(%esp) / y 950*89518a1cSdmick pushl 16(%esp) 951*89518a1cSdmick call UDiv 952*89518a1cSdmick addl $8, %esp 953*89518a1cSdmick ret 954*89518a1cSdmick SET_SIZE(__udivdi3) 955*89518a1cSdmick 956*89518a1cSdmick/* 957*89518a1cSdmick * __umoddi3 958*89518a1cSdmick * 959*89518a1cSdmick * Perform division of two unsigned 64-bit quantities, returning the 960*89518a1cSdmick * remainder in %edx:%eax. 961*89518a1cSdmick */ 962*89518a1cSdmick ENTRY(__umoddi3) 963*89518a1cSdmick subl $12, %esp 964*89518a1cSdmick movl %esp, %ecx /, tmp65 965*89518a1cSdmick movl 16(%esp), %eax / x, x 966*89518a1cSdmick movl 20(%esp), %edx / x, x 967*89518a1cSdmick pushl %ecx / tmp65 968*89518a1cSdmick pushl 32(%esp) / y 969*89518a1cSdmick pushl 32(%esp) 970*89518a1cSdmick call UDivRem 971*89518a1cSdmick movl 12(%esp), %eax / rem, rem 972*89518a1cSdmick movl 16(%esp), %edx / rem, rem 973*89518a1cSdmick addl $24, %esp 974*89518a1cSdmick ret 975*89518a1cSdmick SET_SIZE(__umoddi3) 976*89518a1cSdmick 977*89518a1cSdmick/* 978*89518a1cSdmick * __divdi3 979*89518a1cSdmick * 980*89518a1cSdmick * Perform division of two signed 64-bit quantities, returning the 981*89518a1cSdmick * quotient in %edx:%eax. 982*89518a1cSdmick */ 983*89518a1cSdmick/ int64_t 984*89518a1cSdmick/ __divdi3(int64_t x, int64_t y) 985*89518a1cSdmick/ { 986*89518a1cSdmick/ int negative; 987*89518a1cSdmick/ uint64_t xt, yt, r; 988*89518a1cSdmick/ 989*89518a1cSdmick/ if (x < 0) { 990*89518a1cSdmick/ xt = -(uint64_t) x; 991*89518a1cSdmick/ negative = 1; 992*89518a1cSdmick/ } else { 993*89518a1cSdmick/ xt = x; 994*89518a1cSdmick/ negative = 0; 995*89518a1cSdmick/ } 996*89518a1cSdmick/ if (y < 0) { 997*89518a1cSdmick/ yt = -(uint64_t) y; 998*89518a1cSdmick/ negative ^= 1; 999*89518a1cSdmick/ } else { 1000*89518a1cSdmick/ yt = y; 1001*89518a1cSdmick/ } 1002*89518a1cSdmick/ r = UDiv(xt, yt); 1003*89518a1cSdmick/ return (negative ? (int64_t) - r : r); 1004*89518a1cSdmick/ } 1005*89518a1cSdmick ENTRY(__divdi3) 1006*89518a1cSdmick pushl %ebp 1007*89518a1cSdmick pushl %edi 1008*89518a1cSdmick pushl %esi 1009*89518a1cSdmick subl $8, %esp 1010*89518a1cSdmick movl 28(%esp), %edx / x, x 1011*89518a1cSdmick testl %edx, %edx / x 1012*89518a1cSdmick movl 24(%esp), %eax / x, x 1013*89518a1cSdmick movl 32(%esp), %esi / y, y 1014*89518a1cSdmick movl 36(%esp), %edi / y, y 1015*89518a1cSdmick js .LL55 1016*89518a1cSdmick xorl %ebp, %ebp / negative 1017*89518a1cSdmick testl %edi, %edi / y 1018*89518a1cSdmick movl %eax, (%esp) / x, xt 1019*89518a1cSdmick movl %edx, 4(%esp) / x, xt 1020*89518a1cSdmick movl %esi, %eax / y, yt 1021*89518a1cSdmick movl %edi, %edx / y, yt 1022*89518a1cSdmick js .LL56 1023*89518a1cSdmick.LL53: 1024*89518a1cSdmick pushl %edx / yt 1025*89518a1cSdmick pushl %eax / yt 1026*89518a1cSdmick movl 8(%esp), %eax / xt, xt 1027*89518a1cSdmick movl 12(%esp), %edx / xt, xt 1028*89518a1cSdmick call UDiv 1029*89518a1cSdmick popl %ecx 1030*89518a1cSdmick testl %ebp, %ebp / negative 1031*89518a1cSdmick popl %esi 1032*89518a1cSdmick je .LL54 1033*89518a1cSdmick negl %eax / r 1034*89518a1cSdmick adcl $0, %edx /, r 1035*89518a1cSdmick negl %edx / r 1036*89518a1cSdmick.LL54: 1037*89518a1cSdmick addl $8, %esp 1038*89518a1cSdmick popl %esi 1039*89518a1cSdmick popl %edi 1040*89518a1cSdmick popl %ebp 1041*89518a1cSdmick ret 1042*89518a1cSdmick .align 16 1043*89518a1cSdmick.LL55: 1044*89518a1cSdmick negl %eax / x 1045*89518a1cSdmick adcl $0, %edx /, x 1046*89518a1cSdmick negl %edx / x 1047*89518a1cSdmick testl %edi, %edi / y 1048*89518a1cSdmick movl %eax, (%esp) / x, xt 1049*89518a1cSdmick movl %edx, 4(%esp) / x, xt 1050*89518a1cSdmick movl $1, %ebp /, negative 1051*89518a1cSdmick movl %esi, %eax / y, yt 1052*89518a1cSdmick movl %edi, %edx / y, yt 1053*89518a1cSdmick jns .LL53 1054*89518a1cSdmick .align 16 1055*89518a1cSdmick.LL56: 1056*89518a1cSdmick negl %eax / yt 1057*89518a1cSdmick adcl $0, %edx /, yt 1058*89518a1cSdmick negl %edx / yt 1059*89518a1cSdmick xorl $1, %ebp /, negative 1060*89518a1cSdmick jmp .LL53 1061*89518a1cSdmick SET_SIZE(__divdi3) 1062*89518a1cSdmick 1063*89518a1cSdmick/* 1064*89518a1cSdmick * __moddi3 1065*89518a1cSdmick * 1066*89518a1cSdmick * Perform division of two signed 64-bit quantities, returning the 1067*89518a1cSdmick * quotient in %edx:%eax. 1068*89518a1cSdmick */ 1069*89518a1cSdmick/ int64_t 1070*89518a1cSdmick/ __moddi3(int64_t x, int64_t y) 1071*89518a1cSdmick/ { 1072*89518a1cSdmick/ uint64_t xt, yt, rem; 1073*89518a1cSdmick/ 1074*89518a1cSdmick/ if (x < 0) { 1075*89518a1cSdmick/ xt = -(uint64_t) x; 1076*89518a1cSdmick/ } else { 1077*89518a1cSdmick/ xt = x; 1078*89518a1cSdmick/ } 1079*89518a1cSdmick/ if (y < 0) { 1080*89518a1cSdmick/ yt = -(uint64_t) y; 1081*89518a1cSdmick/ } else { 1082*89518a1cSdmick/ yt = y; 1083*89518a1cSdmick/ } 1084*89518a1cSdmick/ (void) UDivRem(xt, yt, &rem); 1085*89518a1cSdmick/ return (x < 0 ? (int64_t) - rem : rem); 1086*89518a1cSdmick/ } 1087*89518a1cSdmick ENTRY(__moddi3) 1088*89518a1cSdmick pushl %edi 1089*89518a1cSdmick pushl %esi 1090*89518a1cSdmick subl $20, %esp 1091*89518a1cSdmick movl 36(%esp), %ecx / x, 1092*89518a1cSdmick movl 32(%esp), %esi / x, 1093*89518a1cSdmick movl 36(%esp), %edi / x, 1094*89518a1cSdmick testl %ecx, %ecx 1095*89518a1cSdmick movl 40(%esp), %eax / y, y 1096*89518a1cSdmick movl 44(%esp), %edx / y, y 1097*89518a1cSdmick movl %esi, (%esp) /, xt 1098*89518a1cSdmick movl %edi, 4(%esp) /, xt 1099*89518a1cSdmick js .LL63 1100*89518a1cSdmick testl %edx, %edx / y 1101*89518a1cSdmick movl %eax, %esi / y, yt 1102*89518a1cSdmick movl %edx, %edi / y, yt 1103*89518a1cSdmick js .LL64 1104*89518a1cSdmick.LL61: 1105*89518a1cSdmick leal 8(%esp), %eax /, tmp66 1106*89518a1cSdmick pushl %eax / tmp66 1107*89518a1cSdmick pushl %edi / yt 1108*89518a1cSdmick pushl %esi / yt 1109*89518a1cSdmick movl 12(%esp), %eax / xt, xt 1110*89518a1cSdmick movl 16(%esp), %edx / xt, xt 1111*89518a1cSdmick call UDivRem 1112*89518a1cSdmick addl $12, %esp 1113*89518a1cSdmick movl 36(%esp), %edi / x, 1114*89518a1cSdmick testl %edi, %edi 1115*89518a1cSdmick movl 8(%esp), %eax / rem, rem 1116*89518a1cSdmick movl 12(%esp), %edx / rem, rem 1117*89518a1cSdmick js .LL65 1118*89518a1cSdmick addl $20, %esp 1119*89518a1cSdmick popl %esi 1120*89518a1cSdmick popl %edi 1121*89518a1cSdmick ret 1122*89518a1cSdmick .align 16 1123*89518a1cSdmick.LL63: 1124*89518a1cSdmick negl %esi 1125*89518a1cSdmick adcl $0, %edi 1126*89518a1cSdmick negl %edi 1127*89518a1cSdmick testl %edx, %edx / y 1128*89518a1cSdmick movl %esi, (%esp) /, xt 1129*89518a1cSdmick movl %edi, 4(%esp) /, xt 1130*89518a1cSdmick movl %eax, %esi / y, yt 1131*89518a1cSdmick movl %edx, %edi / y, yt 1132*89518a1cSdmick jns .LL61 1133*89518a1cSdmick .align 16 1134*89518a1cSdmick.LL64: 1135*89518a1cSdmick negl %esi / yt 1136*89518a1cSdmick adcl $0, %edi /, yt 1137*89518a1cSdmick negl %edi / yt 1138*89518a1cSdmick jmp .LL61 1139*89518a1cSdmick .align 16 1140*89518a1cSdmick.LL65: 1141*89518a1cSdmick negl %eax / rem 1142*89518a1cSdmick adcl $0, %edx /, rem 1143*89518a1cSdmick addl $20, %esp 1144*89518a1cSdmick popl %esi 1145*89518a1cSdmick negl %edx / rem 1146*89518a1cSdmick popl %edi 1147*89518a1cSdmick ret 1148*89518a1cSdmick SET_SIZE(__moddi3) 1149*89518a1cSdmick 1150*89518a1cSdmick/* 1151*89518a1cSdmick * __udivrem64 1152*89518a1cSdmick * 1153*89518a1cSdmick * Perform division of two unsigned 64-bit quantities, returning the 1154*89518a1cSdmick * quotient in %edx:%eax, and the remainder in %ecx:%esi. __udivrem64 1155*89518a1cSdmick * pops the arguments on return. 1156*89518a1cSdmick */ 1157*89518a1cSdmick ENTRY(__udivrem64) 1158*89518a1cSdmick subl $12, %esp 1159*89518a1cSdmick movl %esp, %ecx /, tmp64 1160*89518a1cSdmick movl 16(%esp), %eax / x, x 1161*89518a1cSdmick movl 20(%esp), %edx / x, x 1162*89518a1cSdmick pushl %ecx / tmp64 1163*89518a1cSdmick pushl 32(%esp) / y 1164*89518a1cSdmick pushl 32(%esp) 1165*89518a1cSdmick call UDivRem 1166*89518a1cSdmick movl 16(%esp), %ecx / rem, tmp63 1167*89518a1cSdmick movl 12(%esp), %esi / rem 1168*89518a1cSdmick addl $24, %esp 1169*89518a1cSdmick ret $16 1170*89518a1cSdmick SET_SIZE(__udivrem64) 1171*89518a1cSdmick 1172*89518a1cSdmick/* 1173*89518a1cSdmick * Signed division with remainder. 1174*89518a1cSdmick */ 1175*89518a1cSdmick/ int64_t 1176*89518a1cSdmick/ SDivRem(int64_t x, int64_t y, int64_t * pmod) 1177*89518a1cSdmick/ { 1178*89518a1cSdmick/ int negative; 1179*89518a1cSdmick/ uint64_t xt, yt, r, rem; 1180*89518a1cSdmick/ 1181*89518a1cSdmick/ if (x < 0) { 1182*89518a1cSdmick/ xt = -(uint64_t) x; 1183*89518a1cSdmick/ negative = 1; 1184*89518a1cSdmick/ } else { 1185*89518a1cSdmick/ xt = x; 1186*89518a1cSdmick/ negative = 0; 1187*89518a1cSdmick/ } 1188*89518a1cSdmick/ if (y < 0) { 1189*89518a1cSdmick/ yt = -(uint64_t) y; 1190*89518a1cSdmick/ negative ^= 1; 1191*89518a1cSdmick/ } else { 1192*89518a1cSdmick/ yt = y; 1193*89518a1cSdmick/ } 1194*89518a1cSdmick/ r = UDivRem(xt, yt, &rem); 1195*89518a1cSdmick/ *pmod = (x < 0 ? (int64_t) - rem : rem); 1196*89518a1cSdmick/ return (negative ? (int64_t) - r : r); 1197*89518a1cSdmick/ } 1198*89518a1cSdmick ENTRY(SDivRem) 1199*89518a1cSdmick pushl %ebp 1200*89518a1cSdmick pushl %edi 1201*89518a1cSdmick pushl %esi 1202*89518a1cSdmick subl $24, %esp 1203*89518a1cSdmick testl %edx, %edx / x 1204*89518a1cSdmick movl %edx, %edi / x, x 1205*89518a1cSdmick js .LL73 1206*89518a1cSdmick movl 44(%esp), %esi / y, 1207*89518a1cSdmick xorl %ebp, %ebp / negative 1208*89518a1cSdmick testl %esi, %esi 1209*89518a1cSdmick movl %edx, 12(%esp) / x, xt 1210*89518a1cSdmick movl %eax, 8(%esp) / x, xt 1211*89518a1cSdmick movl 40(%esp), %edx / y, yt 1212*89518a1cSdmick movl 44(%esp), %ecx / y, yt 1213*89518a1cSdmick js .LL74 1214*89518a1cSdmick.LL70: 1215*89518a1cSdmick leal 16(%esp), %eax /, tmp70 1216*89518a1cSdmick pushl %eax / tmp70 1217*89518a1cSdmick pushl %ecx / yt 1218*89518a1cSdmick pushl %edx / yt 1219*89518a1cSdmick movl 20(%esp), %eax / xt, xt 1220*89518a1cSdmick movl 24(%esp), %edx / xt, xt 1221*89518a1cSdmick call UDivRem 1222*89518a1cSdmick movl %edx, 16(%esp) /, r 1223*89518a1cSdmick movl %eax, 12(%esp) /, r 1224*89518a1cSdmick addl $12, %esp 1225*89518a1cSdmick testl %edi, %edi / x 1226*89518a1cSdmick movl 16(%esp), %edx / rem, rem 1227*89518a1cSdmick movl 20(%esp), %ecx / rem, rem 1228*89518a1cSdmick js .LL75 1229*89518a1cSdmick.LL71: 1230*89518a1cSdmick movl 48(%esp), %edi / pmod, pmod 1231*89518a1cSdmick testl %ebp, %ebp / negative 1232*89518a1cSdmick movl %edx, (%edi) / rem,* pmod 1233*89518a1cSdmick movl %ecx, 4(%edi) / rem, 1234*89518a1cSdmick movl (%esp), %eax / r, r 1235*89518a1cSdmick movl 4(%esp), %edx / r, r 1236*89518a1cSdmick je .LL72 1237*89518a1cSdmick negl %eax / r 1238*89518a1cSdmick adcl $0, %edx /, r 1239*89518a1cSdmick negl %edx / r 1240*89518a1cSdmick.LL72: 1241*89518a1cSdmick addl $24, %esp 1242*89518a1cSdmick popl %esi 1243*89518a1cSdmick popl %edi 1244*89518a1cSdmick popl %ebp 1245*89518a1cSdmick ret 1246*89518a1cSdmick .align 16 1247*89518a1cSdmick.LL73: 1248*89518a1cSdmick negl %eax 1249*89518a1cSdmick adcl $0, %edx 1250*89518a1cSdmick movl 44(%esp), %esi / y, 1251*89518a1cSdmick negl %edx 1252*89518a1cSdmick testl %esi, %esi 1253*89518a1cSdmick movl %edx, 12(%esp) /, xt 1254*89518a1cSdmick movl %eax, 8(%esp) /, xt 1255*89518a1cSdmick movl $1, %ebp /, negative 1256*89518a1cSdmick movl 40(%esp), %edx / y, yt 1257*89518a1cSdmick movl 44(%esp), %ecx / y, yt 1258*89518a1cSdmick jns .LL70 1259*89518a1cSdmick .align 16 1260*89518a1cSdmick.LL74: 1261*89518a1cSdmick negl %edx / yt 1262*89518a1cSdmick adcl $0, %ecx /, yt 1263*89518a1cSdmick negl %ecx / yt 1264*89518a1cSdmick xorl $1, %ebp /, negative 1265*89518a1cSdmick jmp .LL70 1266*89518a1cSdmick .align 16 1267*89518a1cSdmick.LL75: 1268*89518a1cSdmick negl %edx / rem 1269*89518a1cSdmick adcl $0, %ecx /, rem 1270*89518a1cSdmick negl %ecx / rem 1271*89518a1cSdmick jmp .LL71 1272*89518a1cSdmick SET_SIZE(SDivRem) 1273*89518a1cSdmick 1274*89518a1cSdmick/* 1275*89518a1cSdmick * __divrem64 1276*89518a1cSdmick * 1277*89518a1cSdmick * Perform division of two signed 64-bit quantities, returning the 1278*89518a1cSdmick * quotient in %edx:%eax, and the remainder in %ecx:%esi. __divrem64 1279*89518a1cSdmick * pops the arguments on return. 1280*89518a1cSdmick */ 1281*89518a1cSdmick ENTRY(__divrem64) 1282*89518a1cSdmick subl $20, %esp 1283*89518a1cSdmick movl %esp, %ecx /, tmp64 1284*89518a1cSdmick movl 24(%esp), %eax / x, x 1285*89518a1cSdmick movl 28(%esp), %edx / x, x 1286*89518a1cSdmick pushl %ecx / tmp64 1287*89518a1cSdmick pushl 40(%esp) / y 1288*89518a1cSdmick pushl 40(%esp) 1289*89518a1cSdmick call SDivRem 1290*89518a1cSdmick movl 16(%esp), %ecx 1291*89518a1cSdmick movl 12(%esp),%esi / rem 1292*89518a1cSdmick addl $32, %esp 1293*89518a1cSdmick ret $16 1294*89518a1cSdmick SET_SIZE(__divrem64) 1295*89518a1cSdmick 1296*89518a1cSdmick 1297*89518a1cSdmick#endif /* __lint */ 1298*89518a1cSdmick 1299*89518a1cSdmick#endif /* defined(__i386) && !defined(__amd64) */ 1300