1*0b57cec5SDimitry Andric// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 2*0b57cec5SDimitry Andric// See https://llvm.org/LICENSE.txt for license information. 3*0b57cec5SDimitry Andric// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 4*0b57cec5SDimitry Andric 5*0b57cec5SDimitry Andric#include "../assembly.h" 6*0b57cec5SDimitry Andric 7*0b57cec5SDimitry Andric// di_int __ashldi3(di_int input, int count); 8*0b57cec5SDimitry Andric 9*0b57cec5SDimitry Andric// This routine has some extra memory traffic, loading the 64-bit input via two 10*0b57cec5SDimitry Andric// 32-bit loads, then immediately storing it back to the stack via a single 64-bit 11*0b57cec5SDimitry Andric// store. This is to avoid a write-small, read-large stall. 12*0b57cec5SDimitry Andric// However, if callers of this routine can be safely assumed to store the argument 13*0b57cec5SDimitry Andric// via a 64-bt store, this is unnecessary memory traffic, and should be avoided. 14*0b57cec5SDimitry Andric// It can be turned off by defining the TRUST_CALLERS_USE_64_BIT_STORES macro. 15*0b57cec5SDimitry Andric 16*0b57cec5SDimitry Andric#ifdef __i386__ 17*0b57cec5SDimitry Andric#ifdef __SSE2__ 18*0b57cec5SDimitry Andric 19*0b57cec5SDimitry Andric.text 20*0b57cec5SDimitry Andric.balign 4 21*0b57cec5SDimitry AndricDEFINE_COMPILERRT_FUNCTION(__ashldi3) 22*0b57cec5SDimitry Andric movd 12(%esp), %xmm2 // Load count 23*0b57cec5SDimitry Andric#ifndef TRUST_CALLERS_USE_64_BIT_STORES 24*0b57cec5SDimitry Andric movd 4(%esp), %xmm0 25*0b57cec5SDimitry Andric movd 8(%esp), %xmm1 26*0b57cec5SDimitry Andric punpckldq %xmm1, %xmm0 // Load input 27*0b57cec5SDimitry Andric#else 28*0b57cec5SDimitry Andric movq 4(%esp), %xmm0 // Load input 29*0b57cec5SDimitry Andric#endif 30*0b57cec5SDimitry Andric psllq %xmm2, %xmm0 // shift input by count 31*0b57cec5SDimitry Andric movd %xmm0, %eax 32*0b57cec5SDimitry Andric psrlq $32, %xmm0 33*0b57cec5SDimitry Andric movd %xmm0, %edx 34*0b57cec5SDimitry Andric ret 35*0b57cec5SDimitry AndricEND_COMPILERRT_FUNCTION(__ashldi3) 36*0b57cec5SDimitry Andric 37*0b57cec5SDimitry Andric#else // Use GPRs instead of SSE2 instructions, if they aren't available. 38*0b57cec5SDimitry Andric 39*0b57cec5SDimitry Andric.text 40*0b57cec5SDimitry Andric.balign 4 41*0b57cec5SDimitry AndricDEFINE_COMPILERRT_FUNCTION(__ashldi3) 42*0b57cec5SDimitry Andric movl 12(%esp), %ecx // Load count 43*0b57cec5SDimitry Andric movl 8(%esp), %edx // Load high 44*0b57cec5SDimitry Andric movl 4(%esp), %eax // Load low 45*0b57cec5SDimitry Andric 46*0b57cec5SDimitry Andric testl $0x20, %ecx // If count >= 32 47*0b57cec5SDimitry Andric jnz 1f // goto 1 48*0b57cec5SDimitry Andric shldl %cl, %eax, %edx // left shift high by count 49*0b57cec5SDimitry Andric shll %cl, %eax // left shift low by count 50*0b57cec5SDimitry Andric ret 51*0b57cec5SDimitry Andric 52*0b57cec5SDimitry Andric1: movl %eax, %edx // Move low to high 53*0b57cec5SDimitry Andric xorl %eax, %eax // clear low 54*0b57cec5SDimitry Andric shll %cl, %edx // shift high by count - 32 55*0b57cec5SDimitry Andric ret 56*0b57cec5SDimitry AndricEND_COMPILERRT_FUNCTION(__ashldi3) 57*0b57cec5SDimitry Andric 58*0b57cec5SDimitry Andric#endif // __SSE2__ 59*0b57cec5SDimitry Andric#endif // __i386__ 60*0b57cec5SDimitry Andric 61*0b57cec5SDimitry AndricNO_EXEC_STACK_DIRECTIVE 62*0b57cec5SDimitry Andric 63