1*0b57cec5SDimitry Andric// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 2*0b57cec5SDimitry Andric// See https://llvm.org/LICENSE.txt for license information. 3*0b57cec5SDimitry Andric// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 4*0b57cec5SDimitry Andric 5*0b57cec5SDimitry Andric#include "../assembly.h" 6*0b57cec5SDimitry Andric 7*0b57cec5SDimitry Andric// du_int __umoddi3(du_int a, du_int b); 8*0b57cec5SDimitry Andric 9*0b57cec5SDimitry Andric// result = remainder of a / b. 10*0b57cec5SDimitry Andric// both inputs and the output are 64-bit unsigned integers. 11*0b57cec5SDimitry Andric// This will do whatever the underlying hardware is set to do on division by zero. 12*0b57cec5SDimitry Andric// No other exceptions are generated, as the divide cannot overflow. 13*0b57cec5SDimitry Andric// 14*0b57cec5SDimitry Andric// This is targeted at 32-bit x86 *only*, as this can be done directly in hardware 15*0b57cec5SDimitry Andric// on x86_64. The performance goal is ~40 cycles per divide, which is faster than 16*0b57cec5SDimitry Andric// currently possible via simulation of integer divides on the x87 unit. 17*0b57cec5SDimitry Andric// 18*0b57cec5SDimitry Andric 19*0b57cec5SDimitry Andric// Stephen Canon, December 2008 20*0b57cec5SDimitry Andric 21*0b57cec5SDimitry Andric#ifdef __i386__ 22*0b57cec5SDimitry Andric 23*0b57cec5SDimitry Andric.text 24*0b57cec5SDimitry Andric.balign 4 25*0b57cec5SDimitry AndricDEFINE_COMPILERRT_FUNCTION(__umoddi3) 26*0b57cec5SDimitry Andric 27*0b57cec5SDimitry Andric pushl %ebx 28*0b57cec5SDimitry Andric movl 20(%esp), %ebx // Find the index i of the leading bit in b. 29*0b57cec5SDimitry Andric bsrl %ebx, %ecx // If the high word of b is zero, jump to 30*0b57cec5SDimitry Andric jz 9f // the code to handle that special case [9]. 31*0b57cec5SDimitry Andric 32*0b57cec5SDimitry Andric // High word of b is known to be non-zero on this branch 33*0b57cec5SDimitry Andric 34*0b57cec5SDimitry Andric movl 16(%esp), %eax // Construct bhi, containing bits [1+i:32+i] of b 35*0b57cec5SDimitry Andric 36*0b57cec5SDimitry Andric shrl %cl, %eax // Practically, this means that bhi is given by: 37*0b57cec5SDimitry Andric shrl %eax // 38*0b57cec5SDimitry Andric notl %ecx // bhi = (high word of b) << (31 - i) | 39*0b57cec5SDimitry Andric shll %cl, %ebx // (low word of b) >> (1 + i) 40*0b57cec5SDimitry Andric orl %eax, %ebx // 41*0b57cec5SDimitry Andric movl 12(%esp), %edx // Load the high and low words of a, and jump 42*0b57cec5SDimitry Andric movl 8(%esp), %eax // to [2] if the high word is larger than bhi 43*0b57cec5SDimitry Andric cmpl %ebx, %edx // to avoid overflowing the upcoming divide. 44*0b57cec5SDimitry Andric jae 2f 45*0b57cec5SDimitry Andric 46*0b57cec5SDimitry Andric // High word of a is greater than or equal to (b >> (1 + i)) on this branch 47*0b57cec5SDimitry Andric 48*0b57cec5SDimitry Andric divl %ebx // eax <-- qs, edx <-- r such that ahi:alo = bs*qs + r 49*0b57cec5SDimitry Andric 50*0b57cec5SDimitry Andric pushl %edi 51*0b57cec5SDimitry Andric notl %ecx 52*0b57cec5SDimitry Andric shrl %eax 53*0b57cec5SDimitry Andric shrl %cl, %eax // q = qs >> (1 + i) 54*0b57cec5SDimitry Andric movl %eax, %edi 55*0b57cec5SDimitry Andric mull 20(%esp) // q*blo 56*0b57cec5SDimitry Andric movl 12(%esp), %ebx 57*0b57cec5SDimitry Andric movl 16(%esp), %ecx // ECX:EBX = a 58*0b57cec5SDimitry Andric subl %eax, %ebx 59*0b57cec5SDimitry Andric sbbl %edx, %ecx // ECX:EBX = a - q*blo 60*0b57cec5SDimitry Andric movl 24(%esp), %eax 61*0b57cec5SDimitry Andric imull %edi, %eax // q*bhi 62*0b57cec5SDimitry Andric subl %eax, %ecx // ECX:EBX = a - q*b 63*0b57cec5SDimitry Andric 64*0b57cec5SDimitry Andric jnc 1f // if positive, this is the result. 65*0b57cec5SDimitry Andric addl 20(%esp), %ebx // otherwise 66*0b57cec5SDimitry Andric adcl 24(%esp), %ecx // ECX:EBX = a - (q-1)*b = result 67*0b57cec5SDimitry Andric1: movl %ebx, %eax 68*0b57cec5SDimitry Andric movl %ecx, %edx 69*0b57cec5SDimitry Andric 70*0b57cec5SDimitry Andric popl %edi 71*0b57cec5SDimitry Andric popl %ebx 72*0b57cec5SDimitry Andric retl 73*0b57cec5SDimitry Andric 74*0b57cec5SDimitry Andric 75*0b57cec5SDimitry Andric2: // High word of a is greater than or equal to (b >> (1 + i)) on this branch 76*0b57cec5SDimitry Andric 77*0b57cec5SDimitry Andric subl %ebx, %edx // subtract bhi from ahi so that divide will not 78*0b57cec5SDimitry Andric divl %ebx // overflow, and find q and r such that 79*0b57cec5SDimitry Andric // 80*0b57cec5SDimitry Andric // ahi:alo = (1:q)*bhi + r 81*0b57cec5SDimitry Andric // 82*0b57cec5SDimitry Andric // Note that q is a number in (31-i).(1+i) 83*0b57cec5SDimitry Andric // fix point. 84*0b57cec5SDimitry Andric 85*0b57cec5SDimitry Andric pushl %edi 86*0b57cec5SDimitry Andric notl %ecx 87*0b57cec5SDimitry Andric shrl %eax 88*0b57cec5SDimitry Andric orl $0x80000000, %eax 89*0b57cec5SDimitry Andric shrl %cl, %eax // q = (1:qs) >> (1 + i) 90*0b57cec5SDimitry Andric movl %eax, %edi 91*0b57cec5SDimitry Andric mull 20(%esp) // q*blo 92*0b57cec5SDimitry Andric movl 12(%esp), %ebx 93*0b57cec5SDimitry Andric movl 16(%esp), %ecx // ECX:EBX = a 94*0b57cec5SDimitry Andric subl %eax, %ebx 95*0b57cec5SDimitry Andric sbbl %edx, %ecx // ECX:EBX = a - q*blo 96*0b57cec5SDimitry Andric movl 24(%esp), %eax 97*0b57cec5SDimitry Andric imull %edi, %eax // q*bhi 98*0b57cec5SDimitry Andric subl %eax, %ecx // ECX:EBX = a - q*b 99*0b57cec5SDimitry Andric 100*0b57cec5SDimitry Andric jnc 3f // if positive, this is the result. 101*0b57cec5SDimitry Andric addl 20(%esp), %ebx // otherwise 102*0b57cec5SDimitry Andric adcl 24(%esp), %ecx // ECX:EBX = a - (q-1)*b = result 103*0b57cec5SDimitry Andric3: movl %ebx, %eax 104*0b57cec5SDimitry Andric movl %ecx, %edx 105*0b57cec5SDimitry Andric 106*0b57cec5SDimitry Andric popl %edi 107*0b57cec5SDimitry Andric popl %ebx 108*0b57cec5SDimitry Andric retl 109*0b57cec5SDimitry Andric 110*0b57cec5SDimitry Andric 111*0b57cec5SDimitry Andric 112*0b57cec5SDimitry Andric9: // High word of b is zero on this branch 113*0b57cec5SDimitry Andric 114*0b57cec5SDimitry Andric movl 12(%esp), %eax // Find qhi and rhi such that 115*0b57cec5SDimitry Andric movl 16(%esp), %ecx // 116*0b57cec5SDimitry Andric xorl %edx, %edx // ahi = qhi*b + rhi with 0 ≤ rhi < b 117*0b57cec5SDimitry Andric divl %ecx // 118*0b57cec5SDimitry Andric movl %eax, %ebx // 119*0b57cec5SDimitry Andric movl 8(%esp), %eax // Find rlo such that 120*0b57cec5SDimitry Andric divl %ecx // 121*0b57cec5SDimitry Andric movl %edx, %eax // rhi:alo = qlo*b + rlo with 0 ≤ rlo < b 122*0b57cec5SDimitry Andric popl %ebx // 123*0b57cec5SDimitry Andric xorl %edx, %edx // and return 0:rlo 124*0b57cec5SDimitry Andric retl // 125*0b57cec5SDimitry AndricEND_COMPILERRT_FUNCTION(__umoddi3) 126*0b57cec5SDimitry Andric 127*0b57cec5SDimitry Andric#endif // __i386__ 128*0b57cec5SDimitry Andric 129*0b57cec5SDimitry AndricNO_EXEC_STACK_DIRECTIVE 130*0b57cec5SDimitry Andric 131