xref: /linux/arch/powerpc/boot/div64.S (revision e32a03290c72773c304ffeebffed0a63c0a672ae)
194b212c2SPaul Mackerras/*
294b212c2SPaul Mackerras * Divide a 64-bit unsigned number by a 32-bit unsigned number.
394b212c2SPaul Mackerras * This routine assumes that the top 32 bits of the dividend are
494b212c2SPaul Mackerras * non-zero to start with.
594b212c2SPaul Mackerras * On entry, r3 points to the dividend, which get overwritten with
694b212c2SPaul Mackerras * the 64-bit quotient, and r4 contains the divisor.
794b212c2SPaul Mackerras * On exit, r3 contains the remainder.
894b212c2SPaul Mackerras *
994b212c2SPaul Mackerras * Copyright (C) 2002 Paul Mackerras, IBM Corp.
1094b212c2SPaul Mackerras *
1194b212c2SPaul Mackerras * This program is free software; you can redistribute it and/or
1294b212c2SPaul Mackerras * modify it under the terms of the GNU General Public License
1394b212c2SPaul Mackerras * as published by the Free Software Foundation; either version
1494b212c2SPaul Mackerras * 2 of the License, or (at your option) any later version.
1594b212c2SPaul Mackerras */
1694b212c2SPaul Mackerras#include "ppc_asm.h"
1794b212c2SPaul Mackerras
1894b212c2SPaul Mackerras	.globl __div64_32
1994b212c2SPaul Mackerras__div64_32:
2094b212c2SPaul Mackerras	lwz	r5,0(r3)	# get the dividend into r5/r6
2194b212c2SPaul Mackerras	lwz	r6,4(r3)
2294b212c2SPaul Mackerras	cmplw	r5,r4
2394b212c2SPaul Mackerras	li	r7,0
2494b212c2SPaul Mackerras	li	r8,0
2594b212c2SPaul Mackerras	blt	1f
2694b212c2SPaul Mackerras	divwu	r7,r5,r4	# if dividend.hi >= divisor,
2794b212c2SPaul Mackerras	mullw	r0,r7,r4	# quotient.hi = dividend.hi / divisor
2894b212c2SPaul Mackerras	subf.	r5,r0,r5	# dividend.hi %= divisor
2994b212c2SPaul Mackerras	beq	3f
3094b212c2SPaul Mackerras1:	mr	r11,r5		# here dividend.hi != 0
3194b212c2SPaul Mackerras	andis.	r0,r5,0xc000
3294b212c2SPaul Mackerras	bne	2f
3394b212c2SPaul Mackerras	cntlzw	r0,r5		# we are shifting the dividend right
3494b212c2SPaul Mackerras	li	r10,-1		# to make it < 2^32, and shifting
3594b212c2SPaul Mackerras	srw	r10,r10,r0	# the divisor right the same amount,
36acbfd58eSBenjamin Herrenschmidt	addc	r9,r4,r10	# rounding up (so the estimate cannot
3794b212c2SPaul Mackerras	andc	r11,r6,r10	# ever be too large, only too small)
3894b212c2SPaul Mackerras	andc	r9,r9,r10
39acbfd58eSBenjamin Herrenschmidt	addze	r9,r9
4094b212c2SPaul Mackerras	or	r11,r5,r11
4194b212c2SPaul Mackerras	rotlw	r9,r9,r0
4294b212c2SPaul Mackerras	rotlw	r11,r11,r0
4394b212c2SPaul Mackerras	divwu	r11,r11,r9	# then we divide the shifted quantities
4494b212c2SPaul Mackerras2:	mullw	r10,r11,r4	# to get an estimate of the quotient,
4594b212c2SPaul Mackerras	mulhwu	r9,r11,r4	# multiply the estimate by the divisor,
4694b212c2SPaul Mackerras	subfc	r6,r10,r6	# take the product from the divisor,
4794b212c2SPaul Mackerras	add	r8,r8,r11	# and add the estimate to the accumulated
4894b212c2SPaul Mackerras	subfe.	r5,r9,r5	# quotient
4994b212c2SPaul Mackerras	bne	1b
5094b212c2SPaul Mackerras3:	cmplw	r6,r4
5194b212c2SPaul Mackerras	blt	4f
5294b212c2SPaul Mackerras	divwu	r0,r6,r4	# perform the remaining 32-bit division
5394b212c2SPaul Mackerras	mullw	r10,r0,r4	# and get the remainder
5494b212c2SPaul Mackerras	add	r8,r8,r0
5594b212c2SPaul Mackerras	subf	r6,r10,r6
5694b212c2SPaul Mackerras4:	stw	r7,0(r3)	# return the quotient in *r3
5794b212c2SPaul Mackerras	stw	r8,4(r3)
5894b212c2SPaul Mackerras	mr	r3,r6		# return the remainder in r3
5994b212c2SPaul Mackerras	blr
60*e32a0329STony Breeds
61*e32a0329STony Breeds/*
62*e32a0329STony Breeds * Extended precision shifts.
63*e32a0329STony Breeds *
64*e32a0329STony Breeds * Updated to be valid for shift counts from 0 to 63 inclusive.
65*e32a0329STony Breeds * -- Gabriel
66*e32a0329STony Breeds *
67*e32a0329STony Breeds * R3/R4 has 64 bit value
68*e32a0329STony Breeds * R5    has shift count
69*e32a0329STony Breeds * result in R3/R4
70*e32a0329STony Breeds *
71*e32a0329STony Breeds *  ashrdi3: arithmetic right shift (sign propagation)
72*e32a0329STony Breeds *  lshrdi3: logical right shift
73*e32a0329STony Breeds *  ashldi3: left shift
74*e32a0329STony Breeds */
75*e32a0329STony Breeds	.globl __ashrdi3
76*e32a0329STony Breeds__ashrdi3:
77*e32a0329STony Breeds	subfic	r6,r5,32
78*e32a0329STony Breeds	srw	r4,r4,r5	# LSW = count > 31 ? 0 : LSW >> count
79*e32a0329STony Breeds	addi	r7,r5,32	# could be xori, or addi with -32
80*e32a0329STony Breeds	slw	r6,r3,r6	# t1 = count > 31 ? 0 : MSW << (32-count)
81*e32a0329STony Breeds	rlwinm	r8,r7,0,32	# t3 = (count < 32) ? 32 : 0
82*e32a0329STony Breeds	sraw	r7,r3,r7	# t2 = MSW >> (count-32)
83*e32a0329STony Breeds	or	r4,r4,r6	# LSW |= t1
84*e32a0329STony Breeds	slw	r7,r7,r8	# t2 = (count < 32) ? 0 : t2
85*e32a0329STony Breeds	sraw	r3,r3,r5	# MSW = MSW >> count
86*e32a0329STony Breeds	or	r4,r4,r7	# LSW |= t2
87*e32a0329STony Breeds	blr
88*e32a0329STony Breeds
89*e32a0329STony Breeds	.globl __ashldi3
90*e32a0329STony Breeds__ashldi3:
91*e32a0329STony Breeds	subfic	r6,r5,32
92*e32a0329STony Breeds	slw	r3,r3,r5	# MSW = count > 31 ? 0 : MSW << count
93*e32a0329STony Breeds	addi	r7,r5,32	# could be xori, or addi with -32
94*e32a0329STony Breeds	srw	r6,r4,r6	# t1 = count > 31 ? 0 : LSW >> (32-count)
95*e32a0329STony Breeds	slw	r7,r4,r7	# t2 = count < 32 ? 0 : LSW << (count-32)
96*e32a0329STony Breeds	or	r3,r3,r6	# MSW |= t1
97*e32a0329STony Breeds	slw	r4,r4,r5	# LSW = LSW << count
98*e32a0329STony Breeds	or	r3,r3,r7	# MSW |= t2
99*e32a0329STony Breeds	blr
100*e32a0329STony Breeds
101*e32a0329STony Breeds	.globl __lshrdi3
102*e32a0329STony Breeds__lshrdi3:
103*e32a0329STony Breeds	subfic	r6,r5,32
104*e32a0329STony Breeds	srw	r4,r4,r5	# LSW = count > 31 ? 0 : LSW >> count
105*e32a0329STony Breeds	addi	r7,r5,32	# could be xori, or addi with -32
106*e32a0329STony Breeds	slw	r6,r3,r6	# t1 = count > 31 ? 0 : MSW << (32-count)
107*e32a0329STony Breeds	srw	r7,r3,r7	# t2 = count < 32 ? 0 : MSW >> (count-32)
108*e32a0329STony Breeds	or	r4,r4,r6	# LSW |= t1
109*e32a0329STony Breeds	srw	r3,r3,r5	# MSW = MSW >> count
110*e32a0329STony Breeds	or	r4,r4,r7	# LSW |= t2
111*e32a0329STony Breeds	blr
112