1*0b57cec5SDimitry Andric// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 2*0b57cec5SDimitry Andric// See https://llvm.org/LICENSE.txt for license information. 3*0b57cec5SDimitry Andric// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 4*0b57cec5SDimitry Andric 5*0b57cec5SDimitry Andric#include "../assembly.h" 6*0b57cec5SDimitry Andric 7*0b57cec5SDimitry Andric// du_int __udivdi3(du_int a, du_int b); 8*0b57cec5SDimitry Andric 9*0b57cec5SDimitry Andric// result = a / b. 10*0b57cec5SDimitry Andric// both inputs and the output are 64-bit unsigned integers. 11*0b57cec5SDimitry Andric// This will do whatever the underlying hardware is set to do on division by zero. 12*0b57cec5SDimitry Andric// No other exceptions are generated, as the divide cannot overflow. 13*0b57cec5SDimitry Andric// 14*0b57cec5SDimitry Andric// This is targeted at 32-bit x86 *only*, as this can be done directly in hardware 15*0b57cec5SDimitry Andric// on x86_64. The performance goal is ~40 cycles per divide, which is faster than 16*0b57cec5SDimitry Andric// currently possible via simulation of integer divides on the x87 unit. 17*0b57cec5SDimitry Andric// 18*0b57cec5SDimitry Andric// Stephen Canon, December 2008 19*0b57cec5SDimitry Andric 20*0b57cec5SDimitry Andric#ifdef __i386__ 21*0b57cec5SDimitry Andric 22*0b57cec5SDimitry Andric.text 23*0b57cec5SDimitry Andric.balign 4 24*0b57cec5SDimitry AndricDEFINE_COMPILERRT_FUNCTION(__udivdi3) 25*0b57cec5SDimitry Andric 26*0b57cec5SDimitry Andric pushl %ebx 27*0b57cec5SDimitry Andric movl 20(%esp), %ebx // Find the index i of the leading bit in b. 28*0b57cec5SDimitry Andric bsrl %ebx, %ecx // If the high word of b is zero, jump to 29*0b57cec5SDimitry Andric jz 9f // the code to handle that special case [9]. 30*0b57cec5SDimitry Andric 31*0b57cec5SDimitry Andric // High word of b is known to be non-zero on this branch 32*0b57cec5SDimitry Andric 33*0b57cec5SDimitry Andric movl 16(%esp), %eax // Construct bhi, containing bits [1+i:32+i] of b 34*0b57cec5SDimitry Andric 35*0b57cec5SDimitry Andric shrl %cl, %eax // Practically, this means that bhi is given by: 36*0b57cec5SDimitry Andric shrl %eax // 37*0b57cec5SDimitry Andric notl %ecx // bhi = (high word of b) << (31 - i) | 38*0b57cec5SDimitry Andric shll %cl, %ebx // (low word of b) >> (1 + i) 39*0b57cec5SDimitry Andric orl %eax, %ebx // 40*0b57cec5SDimitry Andric movl 12(%esp), %edx // Load the high and low words of a, and jump 41*0b57cec5SDimitry Andric movl 8(%esp), %eax // to [1] if the high word is larger than bhi 42*0b57cec5SDimitry Andric cmpl %ebx, %edx // to avoid overflowing the upcoming divide. 43*0b57cec5SDimitry Andric jae 1f 44*0b57cec5SDimitry Andric 45*0b57cec5SDimitry Andric // High word of a is greater than or equal to (b >> (1 + i)) on this branch 46*0b57cec5SDimitry Andric 47*0b57cec5SDimitry Andric divl %ebx // eax <-- qs, edx <-- r such that ahi:alo = bs*qs + r 48*0b57cec5SDimitry Andric 49*0b57cec5SDimitry Andric pushl %edi 50*0b57cec5SDimitry Andric notl %ecx 51*0b57cec5SDimitry Andric shrl %eax 52*0b57cec5SDimitry Andric shrl %cl, %eax // q = qs >> (1 + i) 53*0b57cec5SDimitry Andric movl %eax, %edi 54*0b57cec5SDimitry Andric mull 20(%esp) // q*blo 55*0b57cec5SDimitry Andric movl 12(%esp), %ebx 56*0b57cec5SDimitry Andric movl 16(%esp), %ecx // ECX:EBX = a 57*0b57cec5SDimitry Andric subl %eax, %ebx 58*0b57cec5SDimitry Andric sbbl %edx, %ecx // ECX:EBX = a - q*blo 59*0b57cec5SDimitry Andric movl 24(%esp), %eax 60*0b57cec5SDimitry Andric imull %edi, %eax // q*bhi 61*0b57cec5SDimitry Andric subl %eax, %ecx // ECX:EBX = a - q*b 62*0b57cec5SDimitry Andric sbbl $0, %edi // decrement q if remainder is negative 63*0b57cec5SDimitry Andric xorl %edx, %edx 64*0b57cec5SDimitry Andric movl %edi, %eax 65*0b57cec5SDimitry Andric popl %edi 66*0b57cec5SDimitry Andric popl %ebx 67*0b57cec5SDimitry Andric retl 68*0b57cec5SDimitry Andric 69*0b57cec5SDimitry Andric 70*0b57cec5SDimitry Andric1: // High word of a is greater than or equal to (b >> (1 + i)) on this branch 71*0b57cec5SDimitry Andric 72*0b57cec5SDimitry Andric subl %ebx, %edx // subtract bhi from ahi so that divide will not 73*0b57cec5SDimitry Andric divl %ebx // overflow, and find q and r such that 74*0b57cec5SDimitry Andric // 75*0b57cec5SDimitry Andric // ahi:alo = (1:q)*bhi + r 76*0b57cec5SDimitry Andric // 77*0b57cec5SDimitry Andric // Note that q is a number in (31-i).(1+i) 78*0b57cec5SDimitry Andric // fix point. 79*0b57cec5SDimitry Andric 80*0b57cec5SDimitry Andric pushl %edi 81*0b57cec5SDimitry Andric notl %ecx 82*0b57cec5SDimitry Andric shrl %eax 83*0b57cec5SDimitry Andric orl $0x80000000, %eax 84*0b57cec5SDimitry Andric shrl %cl, %eax // q = (1:qs) >> (1 + i) 85*0b57cec5SDimitry Andric movl %eax, %edi 86*0b57cec5SDimitry Andric mull 20(%esp) // q*blo 87*0b57cec5SDimitry Andric movl 12(%esp), %ebx 88*0b57cec5SDimitry Andric movl 16(%esp), %ecx // ECX:EBX = a 89*0b57cec5SDimitry Andric subl %eax, %ebx 90*0b57cec5SDimitry Andric sbbl %edx, %ecx // ECX:EBX = a - q*blo 91*0b57cec5SDimitry Andric movl 24(%esp), %eax 92*0b57cec5SDimitry Andric imull %edi, %eax // q*bhi 93*0b57cec5SDimitry Andric subl %eax, %ecx // ECX:EBX = a - q*b 94*0b57cec5SDimitry Andric sbbl $0, %edi // decrement q if remainder is negative 95*0b57cec5SDimitry Andric xorl %edx, %edx 96*0b57cec5SDimitry Andric movl %edi, %eax 97*0b57cec5SDimitry Andric popl %edi 98*0b57cec5SDimitry Andric popl %ebx 99*0b57cec5SDimitry Andric retl 100*0b57cec5SDimitry Andric 101*0b57cec5SDimitry Andric 102*0b57cec5SDimitry Andric9: // High word of b is zero on this branch 103*0b57cec5SDimitry Andric 104*0b57cec5SDimitry Andric movl 12(%esp), %eax // Find qhi and rhi such that 105*0b57cec5SDimitry Andric movl 16(%esp), %ecx // 106*0b57cec5SDimitry Andric xorl %edx, %edx // ahi = qhi*b + rhi with 0 ≤ rhi < b 107*0b57cec5SDimitry Andric divl %ecx // 108*0b57cec5SDimitry Andric movl %eax, %ebx // 109*0b57cec5SDimitry Andric movl 8(%esp), %eax // Find qlo such that 110*0b57cec5SDimitry Andric divl %ecx // 111*0b57cec5SDimitry Andric movl %ebx, %edx // rhi:alo = qlo*b + rlo with 0 ≤ rlo < b 112*0b57cec5SDimitry Andric popl %ebx // 113*0b57cec5SDimitry Andric retl // and return qhi:qlo 114*0b57cec5SDimitry AndricEND_COMPILERRT_FUNCTION(__udivdi3) 115*0b57cec5SDimitry Andric 116*0b57cec5SDimitry Andric#endif // __i386__ 117*0b57cec5SDimitry Andric 118*0b57cec5SDimitry AndricNO_EXEC_STACK_DIRECTIVE 119*0b57cec5SDimitry Andric 120