xref: /freebsd/contrib/llvm-project/compiler-rt/lib/builtins/i386/ashrdi3.S (revision 0b57cec536236d46e3dba9bd041533462f33dbb7)
1*0b57cec5SDimitry Andric// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
2*0b57cec5SDimitry Andric// See https://llvm.org/LICENSE.txt for license information.
3*0b57cec5SDimitry Andric// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
4*0b57cec5SDimitry Andric
5*0b57cec5SDimitry Andric#include "../assembly.h"
6*0b57cec5SDimitry Andric
7*0b57cec5SDimitry Andric// di_int __ashrdi3(di_int input, int count);
8*0b57cec5SDimitry Andric
9*0b57cec5SDimitry Andric#ifdef __i386__
10*0b57cec5SDimitry Andric#ifdef __SSE2__
11*0b57cec5SDimitry Andric
12*0b57cec5SDimitry Andric.text
13*0b57cec5SDimitry Andric.balign 4
14*0b57cec5SDimitry AndricDEFINE_COMPILERRT_FUNCTION(__ashrdi3)
15*0b57cec5SDimitry Andric	movd	  12(%esp),		%xmm2	// Load count
16*0b57cec5SDimitry Andric	movl	   8(%esp),		%eax
17*0b57cec5SDimitry Andric#ifndef TRUST_CALLERS_USE_64_BIT_STORES
18*0b57cec5SDimitry Andric	movd	   4(%esp),		%xmm0
19*0b57cec5SDimitry Andric	movd	   8(%esp),		%xmm1
20*0b57cec5SDimitry Andric	punpckldq	%xmm1,		%xmm0	// Load input
21*0b57cec5SDimitry Andric#else
22*0b57cec5SDimitry Andric	movq	   4(%esp),		%xmm0	// Load input
23*0b57cec5SDimitry Andric#endif
24*0b57cec5SDimitry Andric
25*0b57cec5SDimitry Andric	psrlq		%xmm2,		%xmm0	// unsigned shift input by count
26*0b57cec5SDimitry Andric
27*0b57cec5SDimitry Andric	testl		%eax,		%eax	// check the sign-bit of the input
28*0b57cec5SDimitry Andric	jns			1f					// early out for positive inputs
29*0b57cec5SDimitry Andric
30*0b57cec5SDimitry Andric	// If the input is negative, we need to construct the shifted sign bit
31*0b57cec5SDimitry Andric	// to or into the result, as xmm does not have a signed right shift.
32*0b57cec5SDimitry Andric	pcmpeqb		%xmm1,		%xmm1	// -1ULL
33*0b57cec5SDimitry Andric	psrlq		$58,		%xmm1	// 0x3f
34*0b57cec5SDimitry Andric	pandn		%xmm1,		%xmm2	// 63 - count
35*0b57cec5SDimitry Andric	pcmpeqb		%xmm1,		%xmm1	// -1ULL
36*0b57cec5SDimitry Andric	psubq		%xmm1,		%xmm2	// 64 - count
37*0b57cec5SDimitry Andric	psllq		%xmm2,		%xmm1	// -1 << (64 - count) = leading sign bits
38*0b57cec5SDimitry Andric	por			%xmm1,		%xmm0
39*0b57cec5SDimitry Andric
40*0b57cec5SDimitry Andric	// Move the result back to the general purpose registers and return
41*0b57cec5SDimitry Andric1:	movd		%xmm0,		%eax
42*0b57cec5SDimitry Andric	psrlq		$32,		%xmm0
43*0b57cec5SDimitry Andric	movd		%xmm0,		%edx
44*0b57cec5SDimitry Andric	ret
45*0b57cec5SDimitry AndricEND_COMPILERRT_FUNCTION(__ashrdi3)
46*0b57cec5SDimitry Andric
47*0b57cec5SDimitry Andric#else // Use GPRs instead of SSE2 instructions, if they aren't available.
48*0b57cec5SDimitry Andric
49*0b57cec5SDimitry Andric.text
50*0b57cec5SDimitry Andric.balign 4
51*0b57cec5SDimitry AndricDEFINE_COMPILERRT_FUNCTION(__ashrdi3)
52*0b57cec5SDimitry Andric	movl	  12(%esp),		%ecx	// Load count
53*0b57cec5SDimitry Andric	movl	   8(%esp),		%edx	// Load high
54*0b57cec5SDimitry Andric	movl	   4(%esp),		%eax	// Load low
55*0b57cec5SDimitry Andric
56*0b57cec5SDimitry Andric	testl		$0x20,		%ecx	// If count >= 32
57*0b57cec5SDimitry Andric	jnz			1f					//    goto 1
58*0b57cec5SDimitry Andric
59*0b57cec5SDimitry Andric	shrdl		%cl, %edx,	%eax	// right shift low by count
60*0b57cec5SDimitry Andric	sarl		%cl,		%edx	// right shift high by count
61*0b57cec5SDimitry Andric	ret
62*0b57cec5SDimitry Andric
63*0b57cec5SDimitry Andric1:	movl		%edx,		%eax	// Move high to low
64*0b57cec5SDimitry Andric	sarl		$31,		%edx	// clear high
65*0b57cec5SDimitry Andric	sarl		%cl,		%eax	// shift low by count - 32
66*0b57cec5SDimitry Andric	ret
67*0b57cec5SDimitry AndricEND_COMPILERRT_FUNCTION(__ashrdi3)
68*0b57cec5SDimitry Andric
69*0b57cec5SDimitry Andric#endif // __SSE2__
70*0b57cec5SDimitry Andric#endif // __i386__
71*0b57cec5SDimitry Andric
72*0b57cec5SDimitry AndricNO_EXEC_STACK_DIRECTIVE
73*0b57cec5SDimitry Andric
74