xref: /freebsd/contrib/llvm-project/compiler-rt/lib/builtins/i386/lshrdi3.S (revision 0b57cec536236d46e3dba9bd041533462f33dbb7)
1*0b57cec5SDimitry Andric// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
2*0b57cec5SDimitry Andric// See https://llvm.org/LICENSE.txt for license information.
3*0b57cec5SDimitry Andric// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
4*0b57cec5SDimitry Andric
5*0b57cec5SDimitry Andric#include "../assembly.h"
6*0b57cec5SDimitry Andric
7*0b57cec5SDimitry Andric// di_int __lshrdi3(di_int input, int count);
8*0b57cec5SDimitry Andric
9*0b57cec5SDimitry Andric// This routine has some extra memory traffic, loading the 64-bit input via two
10*0b57cec5SDimitry Andric// 32-bit loads, then immediately storing it back to the stack via a single 64-bit
11*0b57cec5SDimitry Andric// store.  This is to avoid a write-small, read-large stall.
12*0b57cec5SDimitry Andric// However, if callers of this routine can be safely assumed to store the argument
13*0b57cec5SDimitry Andric// via a 64-bt store, this is unnecessary memory traffic, and should be avoided.
14*0b57cec5SDimitry Andric// It can be turned off by defining the TRUST_CALLERS_USE_64_BIT_STORES macro.
15*0b57cec5SDimitry Andric
16*0b57cec5SDimitry Andric#ifdef __i386__
17*0b57cec5SDimitry Andric#ifdef __SSE2__
18*0b57cec5SDimitry Andric
19*0b57cec5SDimitry Andric.text
20*0b57cec5SDimitry Andric.balign 4
21*0b57cec5SDimitry AndricDEFINE_COMPILERRT_FUNCTION(__lshrdi3)
22*0b57cec5SDimitry Andric	movd	  12(%esp),		%xmm2	// Load count
23*0b57cec5SDimitry Andric#ifndef TRUST_CALLERS_USE_64_BIT_STORES
24*0b57cec5SDimitry Andric	movd	   4(%esp),		%xmm0
25*0b57cec5SDimitry Andric	movd	   8(%esp),		%xmm1
26*0b57cec5SDimitry Andric	punpckldq	%xmm1,		%xmm0	// Load input
27*0b57cec5SDimitry Andric#else
28*0b57cec5SDimitry Andric	movq	   4(%esp),		%xmm0	// Load input
29*0b57cec5SDimitry Andric#endif
30*0b57cec5SDimitry Andric	psrlq		%xmm2,		%xmm0	// shift input by count
31*0b57cec5SDimitry Andric	movd		%xmm0,		%eax
32*0b57cec5SDimitry Andric	psrlq		$32,		%xmm0
33*0b57cec5SDimitry Andric	movd		%xmm0,		%edx
34*0b57cec5SDimitry Andric	ret
35*0b57cec5SDimitry AndricEND_COMPILERRT_FUNCTION(__lshrdi3)
36*0b57cec5SDimitry Andric
37*0b57cec5SDimitry Andric#else // Use GPRs instead of SSE2 instructions, if they aren't available.
38*0b57cec5SDimitry Andric
39*0b57cec5SDimitry Andric.text
40*0b57cec5SDimitry Andric.balign 4
41*0b57cec5SDimitry AndricDEFINE_COMPILERRT_FUNCTION(__lshrdi3)
42*0b57cec5SDimitry Andric	movl	  12(%esp),		%ecx	// Load count
43*0b57cec5SDimitry Andric	movl	   8(%esp),		%edx	// Load high
44*0b57cec5SDimitry Andric	movl	   4(%esp),		%eax	// Load low
45*0b57cec5SDimitry Andric
46*0b57cec5SDimitry Andric	testl		$0x20,		%ecx	// If count >= 32
47*0b57cec5SDimitry Andric	jnz			1f					//    goto 1
48*0b57cec5SDimitry Andric
49*0b57cec5SDimitry Andric	shrdl		%cl, %edx,	%eax	// right shift low by count
50*0b57cec5SDimitry Andric	shrl		%cl,		%edx	// right shift high by count
51*0b57cec5SDimitry Andric	ret
52*0b57cec5SDimitry Andric
53*0b57cec5SDimitry Andric1:	movl		%edx,		%eax	// Move high to low
54*0b57cec5SDimitry Andric	xorl		%edx,		%edx	// clear high
55*0b57cec5SDimitry Andric	shrl		%cl,		%eax	// shift low by count - 32
56*0b57cec5SDimitry Andric	ret
57*0b57cec5SDimitry AndricEND_COMPILERRT_FUNCTION(__lshrdi3)
58*0b57cec5SDimitry Andric
59*0b57cec5SDimitry Andric#endif // __SSE2__
60*0b57cec5SDimitry Andric#endif // __i386__
61*0b57cec5SDimitry Andric
62*0b57cec5SDimitry AndricNO_EXEC_STACK_DIRECTIVE
63*0b57cec5SDimitry Andric
64