xref: /titanic_51/usr/src/cmd/sgs/rtld/amd64/boot_elf.s (revision f3390f39074f3a68f54318e83a9801b156b0f5d3)
17c478bd9Sstevel@tonic-gate/*
27c478bd9Sstevel@tonic-gate * CDDL HEADER START
37c478bd9Sstevel@tonic-gate *
47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the
533b4ce79Sab196087 * Common Development and Distribution License (the "License").
633b4ce79Sab196087 * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate *
87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate * and limitations under the License.
127c478bd9Sstevel@tonic-gate *
137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate *
197c478bd9Sstevel@tonic-gate * CDDL HEADER END
207c478bd9Sstevel@tonic-gate */
212c3cf7aaSbholler
227c478bd9Sstevel@tonic-gate/*
232c3cf7aaSbholler * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
247c478bd9Sstevel@tonic-gate * Use is subject to license terms.
25*f3390f39SRobert Mustacchi * Copyright (c) 2012 Joyent, Inc. All rights reserved.
267c478bd9Sstevel@tonic-gate */
272c3cf7aaSbholler
287c478bd9Sstevel@tonic-gate#if	defined(lint)
297c478bd9Sstevel@tonic-gate
307c478bd9Sstevel@tonic-gate#include	<sys/types.h>
317c478bd9Sstevel@tonic-gate#include	<_rtld.h>
327c478bd9Sstevel@tonic-gate#include	<_audit.h>
337c478bd9Sstevel@tonic-gate#include	<_elf.h>
347c478bd9Sstevel@tonic-gate#include	<sys/regset.h>
35*f3390f39SRobert Mustacchi#include	<sys/auxv_386.h>
367c478bd9Sstevel@tonic-gate
377c478bd9Sstevel@tonic-gate/* ARGSUSED0 */
387c478bd9Sstevel@tonic-gateint
397c478bd9Sstevel@tonic-gateelf_plt_trace()
407c478bd9Sstevel@tonic-gate{
417c478bd9Sstevel@tonic-gate	return (0);
427c478bd9Sstevel@tonic-gate}
437c478bd9Sstevel@tonic-gate#else
447c478bd9Sstevel@tonic-gate
457c478bd9Sstevel@tonic-gate#include	<link.h>
467c478bd9Sstevel@tonic-gate#include	<_audit.h>
477c478bd9Sstevel@tonic-gate#include	<sys/asm_linkage.h>
48*f3390f39SRobert Mustacchi#include	<sys/auxv_386.h>
497c478bd9Sstevel@tonic-gate
507c478bd9Sstevel@tonic-gate	.file	"boot_elf.s"
517c478bd9Sstevel@tonic-gate	.text
527c478bd9Sstevel@tonic-gate
537c478bd9Sstevel@tonic-gate/*
547c478bd9Sstevel@tonic-gate * On entry the 'glue code' has already  done the following:
557c478bd9Sstevel@tonic-gate *
567c478bd9Sstevel@tonic-gate *	pushq	%rbp
577c478bd9Sstevel@tonic-gate *	movq	%rsp, %rbp
587c478bd9Sstevel@tonic-gate *	subq	$0x10, %rsp
597c478bd9Sstevel@tonic-gate *	leaq	trace_fields(%rip), %r11
607c478bd9Sstevel@tonic-gate *	movq	%r11, -0x8(%rbp)
617c478bd9Sstevel@tonic-gate *	movq	$elf_plt_trace, %r11
627c478bd9Sstevel@tonic-gate *	jmp	*%r11
637c478bd9Sstevel@tonic-gate *
647c478bd9Sstevel@tonic-gate * so - -8(%rbp) contains the dyndata ptr
657c478bd9Sstevel@tonic-gate *
667c478bd9Sstevel@tonic-gate *	0x0	Addr		*reflmp
677c478bd9Sstevel@tonic-gate *	0x8	Addr		*deflmp
687c478bd9Sstevel@tonic-gate *	0x10	Word		symndx
697c478bd9Sstevel@tonic-gate *	0x14	Word		sb_flags
707c478bd9Sstevel@tonic-gate *	0x18	Sym		symdef.st_name
717c478bd9Sstevel@tonic-gate *	0x1c			symdef.st_info
727c478bd9Sstevel@tonic-gate *	0x1d			symdef.st_other
737c478bd9Sstevel@tonic-gate *	0x1e			symdef.st_shndx
747c478bd9Sstevel@tonic-gate *	0x20			symdef.st_value
757c478bd9Sstevel@tonic-gate *	0x28			symdef.st_size
767c478bd9Sstevel@tonic-gate *
777c478bd9Sstevel@tonic-gate * Also note - on entry 16 bytes have already been subtracted
787c478bd9Sstevel@tonic-gate * from the %rsp.  The first 8 bytes is for the dyn_data_ptr,
797c478bd9Sstevel@tonic-gate * the second 8 bytes are to align the stack and are available
807c478bd9Sstevel@tonic-gate * for use.
817c478bd9Sstevel@tonic-gate */
827c478bd9Sstevel@tonic-gate#define	REFLMP_OFF		0x0
837c478bd9Sstevel@tonic-gate#define	DEFLMP_OFF		0x8
847c478bd9Sstevel@tonic-gate#define	SYMNDX_OFF		0x10
857c478bd9Sstevel@tonic-gate#define	SBFLAGS_OFF		0x14
867c478bd9Sstevel@tonic-gate#define	SYMDEF_OFF		0x18
877c478bd9Sstevel@tonic-gate#define	SYMDEF_VALUE_OFF	0x20
887c478bd9Sstevel@tonic-gate/*
897c478bd9Sstevel@tonic-gate * Local stack space storage for elf_plt_trace is allocated
907c478bd9Sstevel@tonic-gate * as follows:
917c478bd9Sstevel@tonic-gate *
927c478bd9Sstevel@tonic-gate *  First - before we got here - %rsp has been decremented
937c478bd9Sstevel@tonic-gate *  by 0x10 to make space for the dyndata ptr (and another
947c478bd9Sstevel@tonic-gate *  free word).  In addition to that, we create space
957c478bd9Sstevel@tonic-gate *  for the following:
967c478bd9Sstevel@tonic-gate *
977c478bd9Sstevel@tonic-gate *	La_amd64_regs	    8 * 8:	64
987c478bd9Sstevel@tonic-gate *	prev_stack_size	    8		 8
997c478bd9Sstevel@tonic-gate *	Saved regs:
1007c478bd9Sstevel@tonic-gate *	    %rdi			 8
1017c478bd9Sstevel@tonic-gate *	    %rsi			 8
1027c478bd9Sstevel@tonic-gate *	    %rdx			 8
1037c478bd9Sstevel@tonic-gate *	    %rcx			 8
1047c478bd9Sstevel@tonic-gate *	    %r8				 8
1057c478bd9Sstevel@tonic-gate *	    %r9				 8
1067c478bd9Sstevel@tonic-gate *	    %r10			 8
1077c478bd9Sstevel@tonic-gate *	    %r11			 8
1087c478bd9Sstevel@tonic-gate *	    %rax			 8
1097c478bd9Sstevel@tonic-gate *				    =======
110*f3390f39SRobert Mustacchi *			    Subtotal:	144 (32byte aligned)
11133b4ce79Sab196087 *
11233b4ce79Sab196087 *	Saved Media Regs (used to pass floating point args):
113*f3390f39SRobert Mustacchi *	    %xmm0 - %xmm7   32 * 8:	256
11433b4ce79Sab196087 *				    =======
115*f3390f39SRobert Mustacchi *			    Total:	400 (32byte aligned)
1167c478bd9Sstevel@tonic-gate *
1177c478bd9Sstevel@tonic-gate *  So - will subtract the following to create enough space
1187c478bd9Sstevel@tonic-gate *
1197c478bd9Sstevel@tonic-gate *	-8(%rbp)	store dyndata ptr
1207c478bd9Sstevel@tonic-gate *	-16(%rbp)	store call destination
1217c478bd9Sstevel@tonic-gate *	-80(%rbp)	space for La_amd64_regs
1227c478bd9Sstevel@tonic-gate *	-88(%rbp)	prev stack size
123942cd3bfSbholler *  The next %rbp offsets are only true if the caller had correct stack
124942cd3bfSbholler *  alignment.  See note above SPRDIOFF for why we use %rsp alignment to
125942cd3bfSbholler *  access these stack fields.
1267c478bd9Sstevel@tonic-gate *	-96(%rbp)	entering %rdi
1277c478bd9Sstevel@tonic-gate *	-104(%rbp)	entering %rsi
1287c478bd9Sstevel@tonic-gate *	-112(%rbp)	entering %rdx
1297c478bd9Sstevel@tonic-gate *	-120(%rbp)	entering %rcx
1307c478bd9Sstevel@tonic-gate *	-128(%rbp)	entering %r8
1317c478bd9Sstevel@tonic-gate *	-136(%rbp)	entering %r9
1327c478bd9Sstevel@tonic-gate *	-144(%rbp)	entering %r10
1337c478bd9Sstevel@tonic-gate *	-152(%rbp)	entering %r11
134942cd3bfSbholler *	-160(%rbp)	entering %rax
135*f3390f39SRobert Mustacchi *	-192(%rbp)	entering %xmm0
136*f3390f39SRobert Mustacchi *	-224(%rbp)	entering %xmm1
137*f3390f39SRobert Mustacchi *	-256(%rbp)	entering %xmm2
138*f3390f39SRobert Mustacchi *	-288(%rbp)	entering %xmm3
139*f3390f39SRobert Mustacchi *	-320(%rbp)	entering %xmm4
140*f3390f39SRobert Mustacchi *	-384(%rbp)	entering %xmm5
141*f3390f39SRobert Mustacchi *	-416(%rbp)	entering %xmm6
142*f3390f39SRobert Mustacchi *	-448(%rbp)	entering %xmm7
1437c478bd9Sstevel@tonic-gate *
1447c478bd9Sstevel@tonic-gate */
1457c478bd9Sstevel@tonic-gate#define	SPDYNOFF    -8
1467c478bd9Sstevel@tonic-gate#define	SPDESTOFF   -16
1477c478bd9Sstevel@tonic-gate#define	SPLAREGOFF  -80
1487c478bd9Sstevel@tonic-gate#define	SPPRVSTKOFF -88
149942cd3bfSbholler
150942cd3bfSbholler/*
151942cd3bfSbholler * The next set of offsets are relative to %rsp.
152*f3390f39SRobert Mustacchi * We guarantee %rsp is ABI compliant 32-byte aligned.  This guarantees the
153*f3390f39SRobert Mustacchi * ymm registers are saved to 32-byte aligned addresses.
154942cd3bfSbholler * %rbp may only be 8 byte aligned if we came in from non-ABI compliant code.
155942cd3bfSbholler */
156*f3390f39SRobert Mustacchi#define	SPRDIOFF	320
157*f3390f39SRobert Mustacchi#define	SPRSIOFF	312
158*f3390f39SRobert Mustacchi#define	SPRDXOFF	304
159*f3390f39SRobert Mustacchi#define	SPRCXOFF	296
160*f3390f39SRobert Mustacchi#define	SPR8OFF		288
161*f3390f39SRobert Mustacchi#define	SPR9OFF		280
162*f3390f39SRobert Mustacchi#define	SPR10OFF	272
163*f3390f39SRobert Mustacchi#define	SPR11OFF	264
164*f3390f39SRobert Mustacchi#define	SPRAXOFF	256
165*f3390f39SRobert Mustacchi#define	SPXMM0OFF	224
166*f3390f39SRobert Mustacchi#define	SPXMM1OFF	192
167*f3390f39SRobert Mustacchi#define	SPXMM2OFF	160
168*f3390f39SRobert Mustacchi#define	SPXMM3OFF	128
169*f3390f39SRobert Mustacchi#define	SPXMM4OFF	96
170*f3390f39SRobert Mustacchi#define	SPXMM5OFF	64
171*f3390f39SRobert Mustacchi#define	SPXMM6OFF	32
172942cd3bfSbholler#define	SPXMM7OFF	0
1737c478bd9Sstevel@tonic-gate
174*f3390f39SRobert Mustacchi	/* See elf_rtbndr for explanation behind org_scapset */
175*f3390f39SRobert Mustacchi	.extern org_scapset
1767c478bd9Sstevel@tonic-gate	.globl	elf_plt_trace
1777c478bd9Sstevel@tonic-gate	.type	elf_plt_trace,@function
1787c478bd9Sstevel@tonic-gate	.align 16
1797c478bd9Sstevel@tonic-gateelf_plt_trace:
180942cd3bfSbholler	/*
181*f3390f39SRobert Mustacchi	 * Enforce ABI 32-byte stack alignment here.
182942cd3bfSbholler	 * The next andq instruction does this pseudo code:
183942cd3bfSbholler	 * If %rsp is 8 byte aligned then subtract 8 from %rsp.
184942cd3bfSbholler	 */
185*f3390f39SRobert Mustacchi	andq    $-32, %rsp	/* enforce ABI 32-byte stack alignment */
186*f3390f39SRobert Mustacchi	subq	$400,%rsp	/ create some local storage
187942cd3bfSbholler
188942cd3bfSbholler	movq	%rdi, SPRDIOFF(%rsp)
189942cd3bfSbholler	movq	%rsi, SPRSIOFF(%rsp)
190942cd3bfSbholler	movq	%rdx, SPRDXOFF(%rsp)
191942cd3bfSbholler	movq	%rcx, SPRCXOFF(%rsp)
192942cd3bfSbholler	movq	%r8, SPR8OFF(%rsp)
193942cd3bfSbholler	movq	%r9, SPR9OFF(%rsp)
194942cd3bfSbholler	movq	%r10, SPR10OFF(%rsp)
195942cd3bfSbholler	movq	%r11, SPR11OFF(%rsp)
196942cd3bfSbholler	movq	%rax, SPRAXOFF(%rsp)
197*f3390f39SRobert Mustacchi
198*f3390f39SRobert Mustacchi	movq	org_scapset@GOTPCREL(%rip),%r9
199*f3390f39SRobert Mustacchi	movq	(%r9),%r9
200*f3390f39SRobert Mustacchi	movl	(%r9),%edx
201*f3390f39SRobert Mustacchi	testl	$AV_386_AVX,%edx
202*f3390f39SRobert Mustacchi	jne	.trace_save_ymm
203*f3390f39SRobert Mustacchi
204*f3390f39SRobert Mustacchi.trace_save_xmm:
205942cd3bfSbholler	movdqa	%xmm0, SPXMM0OFF(%rsp)
206942cd3bfSbholler	movdqa	%xmm1, SPXMM1OFF(%rsp)
207942cd3bfSbholler	movdqa	%xmm2, SPXMM2OFF(%rsp)
208942cd3bfSbholler	movdqa	%xmm3, SPXMM3OFF(%rsp)
209942cd3bfSbholler	movdqa	%xmm4, SPXMM4OFF(%rsp)
210942cd3bfSbholler	movdqa	%xmm5, SPXMM5OFF(%rsp)
211942cd3bfSbholler	movdqa	%xmm6, SPXMM6OFF(%rsp)
212942cd3bfSbholler	movdqa	%xmm7, SPXMM7OFF(%rsp)
213*f3390f39SRobert Mustacchi	jmp	.trace_save_finish
214*f3390f39SRobert Mustacchi
215*f3390f39SRobert Mustacchi.trace_save_ymm:
216*f3390f39SRobert Mustacchi	vmovdqa	%ymm0, SPXMM0OFF(%rsp)
217*f3390f39SRobert Mustacchi	vmovdqa	%ymm1, SPXMM1OFF(%rsp)
218*f3390f39SRobert Mustacchi	vmovdqa	%ymm2, SPXMM2OFF(%rsp)
219*f3390f39SRobert Mustacchi	vmovdqa	%ymm3, SPXMM3OFF(%rsp)
220*f3390f39SRobert Mustacchi	vmovdqa	%ymm4, SPXMM4OFF(%rsp)
221*f3390f39SRobert Mustacchi	vmovdqa	%ymm5, SPXMM5OFF(%rsp)
222*f3390f39SRobert Mustacchi	vmovdqa	%ymm6, SPXMM6OFF(%rsp)
223*f3390f39SRobert Mustacchi	vmovdqa	%ymm7, SPXMM7OFF(%rsp)
224*f3390f39SRobert Mustacchi
225*f3390f39SRobert Mustacchi.trace_save_finish:
2267c478bd9Sstevel@tonic-gate
2277c478bd9Sstevel@tonic-gate	movq	SPDYNOFF(%rbp), %rax			/ %rax = dyndata
2287c478bd9Sstevel@tonic-gate	testb	$LA_SYMB_NOPLTENTER, SBFLAGS_OFF(%rax)	/ <link.h>
2297c478bd9Sstevel@tonic-gate	je	.start_pltenter
2307c478bd9Sstevel@tonic-gate	movq	SYMDEF_VALUE_OFF(%rax), %rdi
2317c478bd9Sstevel@tonic-gate	movq	%rdi, SPDESTOFF(%rbp)		/ save destination address
2327c478bd9Sstevel@tonic-gate	jmp	.end_pltenter
2337c478bd9Sstevel@tonic-gate
2347c478bd9Sstevel@tonic-gate.start_pltenter:
2357c478bd9Sstevel@tonic-gate	/*
2367c478bd9Sstevel@tonic-gate	 * save all registers into La_amd64_regs
2377c478bd9Sstevel@tonic-gate	 */
2387c478bd9Sstevel@tonic-gate	leaq	SPLAREGOFF(%rbp), %rsi	/ %rsi = &La_amd64_regs
2397c478bd9Sstevel@tonic-gate	leaq	8(%rbp), %rdi
2407c478bd9Sstevel@tonic-gate	movq	%rdi, 0(%rsi)		/ la_rsp
2417c478bd9Sstevel@tonic-gate	movq	0(%rbp), %rdi
2427c478bd9Sstevel@tonic-gate	movq	%rdi, 8(%rsi)		/ la_rbp
243942cd3bfSbholler	movq	SPRDIOFF(%rsp), %rdi
2447c478bd9Sstevel@tonic-gate	movq	%rdi, 16(%rsi)		/ la_rdi
245942cd3bfSbholler	movq	SPRSIOFF(%rsp), %rdi
2467c478bd9Sstevel@tonic-gate	movq	%rdi, 24(%rsi)		/ la_rsi
247942cd3bfSbholler	movq	SPRDXOFF(%rsp), %rdi
2487c478bd9Sstevel@tonic-gate	movq	%rdi, 32(%rsi)		/ la_rdx
249942cd3bfSbholler	movq	SPRCXOFF(%rsp), %rdi
2507c478bd9Sstevel@tonic-gate	movq	%rdi, 40(%rsi)		/ la_rcx
251942cd3bfSbholler	movq	SPR8OFF(%rsp), %rdi
2527c478bd9Sstevel@tonic-gate	movq	%rdi, 48(%rsi)		/ la_r8
253942cd3bfSbholler	movq	SPR9OFF(%rsp), %rdi
2547c478bd9Sstevel@tonic-gate	movq	%rdi, 56(%rsi)		/ la_r9
2557c478bd9Sstevel@tonic-gate
2567c478bd9Sstevel@tonic-gate	/*
2577c478bd9Sstevel@tonic-gate	 * prepare for call to la_pltenter
2587c478bd9Sstevel@tonic-gate	 */
2597c478bd9Sstevel@tonic-gate	movq	SPDYNOFF(%rbp), %r11		/ %r11 = &dyndata
2607c478bd9Sstevel@tonic-gate	leaq	SBFLAGS_OFF(%r11), %r9		/ arg6 (&sb_flags)
2617c478bd9Sstevel@tonic-gate	leaq	SPLAREGOFF(%rbp), %r8		/ arg5 (&La_amd64_regs)
2627c478bd9Sstevel@tonic-gate	movl	SYMNDX_OFF(%r11), %ecx		/ arg4 (symndx)
2637c478bd9Sstevel@tonic-gate	leaq	SYMDEF_OFF(%r11), %rdx		/ arg3 (&Sym)
2647c478bd9Sstevel@tonic-gate	movq	DEFLMP_OFF(%r11), %rsi		/ arg2 (dlmp)
2657c478bd9Sstevel@tonic-gate	movq	REFLMP_OFF(%r11), %rdi		/ arg1 (rlmp)
2667c478bd9Sstevel@tonic-gate	call	audit_pltenter@PLT
2677c478bd9Sstevel@tonic-gate	movq	%rax, SPDESTOFF(%rbp)		/ save calling address
2687c478bd9Sstevel@tonic-gate.end_pltenter:
2697c478bd9Sstevel@tonic-gate
2707c478bd9Sstevel@tonic-gate	/*
2717c478bd9Sstevel@tonic-gate	 * If *no* la_pltexit() routines exist
2727c478bd9Sstevel@tonic-gate	 * we do not need to keep the stack frame
2737c478bd9Sstevel@tonic-gate	 * before we call the actual routine.  Instead we
2747c478bd9Sstevel@tonic-gate	 * jump to it and remove our stack from the stack
2757c478bd9Sstevel@tonic-gate	 * at the same time.
2767c478bd9Sstevel@tonic-gate	 */
2777c478bd9Sstevel@tonic-gate	movl	audit_flags(%rip), %eax
2787c478bd9Sstevel@tonic-gate	andl	$AF_PLTEXIT, %eax		/ value of audit.h:AF_PLTEXIT
2797c478bd9Sstevel@tonic-gate	cmpl	$0, %eax
2807c478bd9Sstevel@tonic-gate	je	.bypass_pltexit
2817c478bd9Sstevel@tonic-gate	/*
2827c478bd9Sstevel@tonic-gate	 * Has the *nopltexit* flag been set for this entry point
2837c478bd9Sstevel@tonic-gate	 */
2847c478bd9Sstevel@tonic-gate	movq	SPDYNOFF(%rbp), %r11		/ %r11 = &dyndata
2857c478bd9Sstevel@tonic-gate	testb	$LA_SYMB_NOPLTEXIT, SBFLAGS_OFF(%r11)
2867c478bd9Sstevel@tonic-gate	je	.start_pltexit
2877c478bd9Sstevel@tonic-gate
2887c478bd9Sstevel@tonic-gate.bypass_pltexit:
2897c478bd9Sstevel@tonic-gate	/*
2907c478bd9Sstevel@tonic-gate	 * No PLTEXIT processing required.
2917c478bd9Sstevel@tonic-gate	 */
2927c478bd9Sstevel@tonic-gate	movq	0(%rbp), %r11
2937c478bd9Sstevel@tonic-gate	movq	%r11, -8(%rbp)			/ move prev %rbp
2947c478bd9Sstevel@tonic-gate	movq	SPDESTOFF(%rbp), %r11		/ r11 == calling destination
2957c478bd9Sstevel@tonic-gate	movq	%r11, 0(%rbp)			/ store destination at top
2967c478bd9Sstevel@tonic-gate
2977c478bd9Sstevel@tonic-gate	/
2987c478bd9Sstevel@tonic-gate	/ Restore registers
2997c478bd9Sstevel@tonic-gate	/
300*f3390f39SRobert Mustacchi	movq	org_scapset@GOTPCREL(%rip),%r9
301*f3390f39SRobert Mustacchi	movq	(%r9),%r9
302*f3390f39SRobert Mustacchi	movl	(%r9),%edx
303*f3390f39SRobert Mustacchi	testl	$AV_386_AVX,%edx
304*f3390f39SRobert Mustacchi	jne	.trace_restore_ymm
305*f3390f39SRobert Mustacchi
306*f3390f39SRobert Mustacchi.trace_restore_xmm:
307*f3390f39SRobert Mustacchi	movdqa	SPXMM0OFF(%rsp), %xmm0
308*f3390f39SRobert Mustacchi	movdqa	SPXMM1OFF(%rsp), %xmm1
309*f3390f39SRobert Mustacchi	movdqa	SPXMM2OFF(%rsp), %xmm2
310*f3390f39SRobert Mustacchi	movdqa	SPXMM3OFF(%rsp), %xmm3
311*f3390f39SRobert Mustacchi	movdqa	SPXMM4OFF(%rsp), %xmm4
312*f3390f39SRobert Mustacchi	movdqa	SPXMM5OFF(%rsp), %xmm5
313*f3390f39SRobert Mustacchi	movdqa	SPXMM6OFF(%rsp), %xmm6
314*f3390f39SRobert Mustacchi	movdqa	SPXMM7OFF(%rsp), %xmm7
315*f3390f39SRobert Mustacchi	jmp	.trace_restore_finish
316*f3390f39SRobert Mustacchi
317*f3390f39SRobert Mustacchi.trace_restore_ymm:
318*f3390f39SRobert Mustacchi	vmovdqa	SPXMM0OFF(%rsp), %ymm0
319*f3390f39SRobert Mustacchi	vmovdqa	SPXMM1OFF(%rsp), %ymm1
320*f3390f39SRobert Mustacchi	vmovdqa	SPXMM2OFF(%rsp), %ymm2
321*f3390f39SRobert Mustacchi	vmovdqa	SPXMM3OFF(%rsp), %ymm3
322*f3390f39SRobert Mustacchi	vmovdqa	SPXMM4OFF(%rsp), %ymm4
323*f3390f39SRobert Mustacchi	vmovdqa	SPXMM5OFF(%rsp), %ymm5
324*f3390f39SRobert Mustacchi	vmovdqa	SPXMM6OFF(%rsp), %ymm6
325*f3390f39SRobert Mustacchi	vmovdqa	SPXMM7OFF(%rsp), %ymm7
326*f3390f39SRobert Mustacchi
327*f3390f39SRobert Mustacchi.trace_restore_finish:
328942cd3bfSbholler	movq	SPRDIOFF(%rsp), %rdi
329942cd3bfSbholler	movq	SPRSIOFF(%rsp), %rsi
330942cd3bfSbholler	movq	SPRDXOFF(%rsp), %rdx
331942cd3bfSbholler	movq	SPRCXOFF(%rsp), %rcx
332942cd3bfSbholler	movq	SPR8OFF(%rsp), %r8
333942cd3bfSbholler	movq	SPR9OFF(%rsp), %r9
334942cd3bfSbholler	movq	SPR10OFF(%rsp), %r10
335942cd3bfSbholler	movq	SPR11OFF(%rsp), %r11
336942cd3bfSbholler	movq	SPRAXOFF(%rsp), %rax
3377c478bd9Sstevel@tonic-gate
3387c478bd9Sstevel@tonic-gate	subq	$8, %rbp			/ adjust %rbp for 'ret'
3397c478bd9Sstevel@tonic-gate	movq	%rbp, %rsp			/
3407c478bd9Sstevel@tonic-gate	/*
3417c478bd9Sstevel@tonic-gate	 * At this point, after a little doctoring, we should
3427c478bd9Sstevel@tonic-gate	 * have the following on the stack:
3437c478bd9Sstevel@tonic-gate	 *
3447c478bd9Sstevel@tonic-gate	 *	16(%rsp):  ret addr
3457c478bd9Sstevel@tonic-gate	 *	8(%rsp):  dest_addr
3467c478bd9Sstevel@tonic-gate	 *	0(%rsp):  Previous %rbp
3477c478bd9Sstevel@tonic-gate	 *
3487c478bd9Sstevel@tonic-gate	 * So - we pop the previous %rbp, and then
3497c478bd9Sstevel@tonic-gate	 * ret to our final destination.
3507c478bd9Sstevel@tonic-gate	 */
3517c478bd9Sstevel@tonic-gate	popq	%rbp				/
3527c478bd9Sstevel@tonic-gate	ret					/ jmp to final destination
3537c478bd9Sstevel@tonic-gate						/ and clean up stack :)
3547c478bd9Sstevel@tonic-gate
3557c478bd9Sstevel@tonic-gate.start_pltexit:
3567c478bd9Sstevel@tonic-gate	/*
3577c478bd9Sstevel@tonic-gate	 * In order to call the destination procedure and then return
3587c478bd9Sstevel@tonic-gate	 * to audit_pltexit() for post analysis we must first grow
3597c478bd9Sstevel@tonic-gate	 * our stack frame and then duplicate the original callers
3607c478bd9Sstevel@tonic-gate	 * stack state.  This duplicates all of the arguements
3617c478bd9Sstevel@tonic-gate	 * that were to be passed to the destination procedure.
3627c478bd9Sstevel@tonic-gate	 */
3637c478bd9Sstevel@tonic-gate	movq	%rbp, %rdi			/
3647c478bd9Sstevel@tonic-gate	addq	$16, %rdi			/    %rdi = src
3657c478bd9Sstevel@tonic-gate	movq	(%rbp), %rdx			/
3667c478bd9Sstevel@tonic-gate	subq	%rdi, %rdx			/    %rdx == prev frame sz
3677c478bd9Sstevel@tonic-gate	/*
3687c478bd9Sstevel@tonic-gate	 * If audit_argcnt > 0 then we limit the number of
3697c478bd9Sstevel@tonic-gate	 * arguements that will be duplicated to audit_argcnt.
3707c478bd9Sstevel@tonic-gate	 *
3717c478bd9Sstevel@tonic-gate	 * If (prev_stack_size > (audit_argcnt * 8))
3727c478bd9Sstevel@tonic-gate	 *	prev_stack_size = audit_argcnt * 8;
3737c478bd9Sstevel@tonic-gate	 */
3747c478bd9Sstevel@tonic-gate	movl	audit_argcnt(%rip),%eax		/   %eax = audit_argcnt
3757c478bd9Sstevel@tonic-gate	cmpl	$0, %eax
3767c478bd9Sstevel@tonic-gate	jle	.grow_stack
3777c478bd9Sstevel@tonic-gate	leaq	(,%rax,8), %rax			/    %eax = %eax * 4
3787c478bd9Sstevel@tonic-gate	cmpq	%rax,%rdx
3797c478bd9Sstevel@tonic-gate	jle	.grow_stack
3807c478bd9Sstevel@tonic-gate	movq	%rax, %rdx
3817c478bd9Sstevel@tonic-gate	/*
3827c478bd9Sstevel@tonic-gate	 * Grow the stack and duplicate the arguements of the
3837c478bd9Sstevel@tonic-gate	 * original caller.
384942cd3bfSbholler	 *
385942cd3bfSbholler	 * We save %rsp in %r11 since we need to use the current rsp for
386942cd3bfSbholler	 * accessing the registers saved in our stack frame.
3877c478bd9Sstevel@tonic-gate	 */
3887c478bd9Sstevel@tonic-gate.grow_stack:
389942cd3bfSbholler	movq	%rsp, %r11
3907c478bd9Sstevel@tonic-gate	subq	%rdx, %rsp			/    grow the stack
3917c478bd9Sstevel@tonic-gate	movq	%rdx, SPPRVSTKOFF(%rbp)		/    -88(%rbp) == prev frame sz
3927c478bd9Sstevel@tonic-gate	movq	%rsp, %rcx			/    %rcx = dest
3937c478bd9Sstevel@tonic-gate	addq	%rcx, %rdx			/    %rdx == tail of dest
3947c478bd9Sstevel@tonic-gate.while_base:
3957c478bd9Sstevel@tonic-gate	cmpq	%rdx, %rcx			/   while (base+size >= src++) {
3967c478bd9Sstevel@tonic-gate	jge	.end_while			/
3977c478bd9Sstevel@tonic-gate	movq	(%rdi), %rsi
3987c478bd9Sstevel@tonic-gate	movq	%rsi,(%rcx)			/        *dest = *src
3997c478bd9Sstevel@tonic-gate	addq	$8, %rdi			/	 src++
4007c478bd9Sstevel@tonic-gate	addq	$8, %rcx			/        dest++
4017c478bd9Sstevel@tonic-gate	jmp	.while_base			/    }
4027c478bd9Sstevel@tonic-gate
4037c478bd9Sstevel@tonic-gate	/*
4047c478bd9Sstevel@tonic-gate	 * The above stack is now an exact duplicate of
4057c478bd9Sstevel@tonic-gate	 * the stack of the original calling procedure.
4067c478bd9Sstevel@tonic-gate	 */
4077c478bd9Sstevel@tonic-gate.end_while:
4087c478bd9Sstevel@tonic-gate	/
409942cd3bfSbholler	/ Restore registers using %r11 which contains our old %rsp value
410942cd3bfSbholler	/ before growing the stack.
4117c478bd9Sstevel@tonic-gate	/
412*f3390f39SRobert Mustacchi
413*f3390f39SRobert Mustacchi	/ Yes, we have to do this dance again. Sorry.
414*f3390f39SRobert Mustacchi	movq	org_scapset@GOTPCREL(%rip),%r9
415*f3390f39SRobert Mustacchi	movq	(%r9),%r9
416*f3390f39SRobert Mustacchi	movl	(%r9),%edx
417*f3390f39SRobert Mustacchi	testl	$AV_386_AVX,%edx
418*f3390f39SRobert Mustacchi	jne	.trace_r2_ymm
419*f3390f39SRobert Mustacchi
420*f3390f39SRobert Mustacchi.trace_r2_xmm:
421942cd3bfSbholler	movdqa	SPXMM0OFF(%r11), %xmm0
422942cd3bfSbholler	movdqa	SPXMM1OFF(%r11), %xmm1
423942cd3bfSbholler	movdqa	SPXMM2OFF(%r11), %xmm2
424942cd3bfSbholler	movdqa	SPXMM3OFF(%r11), %xmm3
425942cd3bfSbholler	movdqa	SPXMM4OFF(%r11), %xmm4
426942cd3bfSbholler	movdqa	SPXMM5OFF(%r11), %xmm5
427942cd3bfSbholler	movdqa	SPXMM6OFF(%r11), %xmm6
428942cd3bfSbholler	movdqa	SPXMM7OFF(%r11), %xmm7
429*f3390f39SRobert Mustacchi	jmp	.trace_r2_finish
430*f3390f39SRobert Mustacchi
431*f3390f39SRobert Mustacchi.trace_r2_ymm:
432*f3390f39SRobert Mustacchi	vmovdqa	SPXMM0OFF(%r11), %ymm0
433*f3390f39SRobert Mustacchi	vmovdqa	SPXMM1OFF(%r11), %ymm1
434*f3390f39SRobert Mustacchi	vmovdqa	SPXMM2OFF(%r11), %ymm2
435*f3390f39SRobert Mustacchi	vmovdqa	SPXMM3OFF(%r11), %ymm3
436*f3390f39SRobert Mustacchi	vmovdqa	SPXMM4OFF(%r11), %ymm4
437*f3390f39SRobert Mustacchi	vmovdqa	SPXMM5OFF(%r11), %ymm5
438*f3390f39SRobert Mustacchi	vmovdqa	SPXMM6OFF(%r11), %ymm6
439*f3390f39SRobert Mustacchi	vmovdqa	SPXMM7OFF(%r11), %ymm7
440*f3390f39SRobert Mustacchi
441*f3390f39SRobert Mustacchi.trace_r2_finish:
442*f3390f39SRobert Mustacchi	movq	SPRDIOFF(%r11), %rdi
443*f3390f39SRobert Mustacchi	movq	SPRSIOFF(%r11), %rsi
444*f3390f39SRobert Mustacchi	movq	SPRDXOFF(%r11), %rdx
445*f3390f39SRobert Mustacchi	movq	SPRCXOFF(%r11), %rcx
446*f3390f39SRobert Mustacchi	movq	SPR8OFF(%r11), %r8
447*f3390f39SRobert Mustacchi	movq	SPR9OFF(%r11), %r9
448*f3390f39SRobert Mustacchi	movq	SPR10OFF(%r11), %r10
449*f3390f39SRobert Mustacchi	movq	SPRAXOFF(%r11), %rax
450942cd3bfSbholler	movq	SPR11OFF(%r11), %r11		/ retore %r11 last
4517c478bd9Sstevel@tonic-gate
4527c478bd9Sstevel@tonic-gate	/*
4537c478bd9Sstevel@tonic-gate	 * Call to desitnation function - we'll return here
4547c478bd9Sstevel@tonic-gate	 * for pltexit monitoring.
4557c478bd9Sstevel@tonic-gate	 */
4567c478bd9Sstevel@tonic-gate	call	*SPDESTOFF(%rbp)
4577c478bd9Sstevel@tonic-gate
4587c478bd9Sstevel@tonic-gate	addq	SPPRVSTKOFF(%rbp), %rsp	/ cleanup dupped stack
4597c478bd9Sstevel@tonic-gate
4607c478bd9Sstevel@tonic-gate	/
4617c478bd9Sstevel@tonic-gate	/ prepare for call to audit_pltenter()
4627c478bd9Sstevel@tonic-gate	/
4637c478bd9Sstevel@tonic-gate	movq	SPDYNOFF(%rbp), %r11		/ %r11 = &dyndata
4647c478bd9Sstevel@tonic-gate	movq	SYMNDX_OFF(%r11), %r8		/ arg5 (symndx)
4657c478bd9Sstevel@tonic-gate	leaq	SYMDEF_OFF(%r11), %rcx		/ arg4 (&Sym)
4667c478bd9Sstevel@tonic-gate	movq	DEFLMP_OFF(%r11), %rdx		/ arg3 (dlmp)
4677c478bd9Sstevel@tonic-gate	movq	REFLMP_OFF(%r11), %rsi		/ arg2 (rlmp)
4687c478bd9Sstevel@tonic-gate	movq	%rax, %rdi			/ arg1 (returnval)
4697c478bd9Sstevel@tonic-gate	call	audit_pltexit@PLT
4707c478bd9Sstevel@tonic-gate
4717c478bd9Sstevel@tonic-gate	/*
4727c478bd9Sstevel@tonic-gate	 * Clean up after ourselves and return to the
4737c478bd9Sstevel@tonic-gate	 * original calling procedure.
4747c478bd9Sstevel@tonic-gate	 */
4757c478bd9Sstevel@tonic-gate
4767c478bd9Sstevel@tonic-gate	/
4777c478bd9Sstevel@tonic-gate	/ Restore registers
4787c478bd9Sstevel@tonic-gate	/
479942cd3bfSbholler	movq	SPRDIOFF(%rsp), %rdi
480942cd3bfSbholler	movq	SPRSIOFF(%rsp), %rsi
481942cd3bfSbholler	movq	SPRDXOFF(%rsp), %rdx
482942cd3bfSbholler	movq	SPRCXOFF(%rsp), %rcx
483942cd3bfSbholler	movq	SPR8OFF(%rsp), %r8
484942cd3bfSbholler	movq	SPR9OFF(%rsp), %r9
485942cd3bfSbholler	movq	SPR10OFF(%rsp), %r10
486942cd3bfSbholler	movq	SPR11OFF(%rsp), %r11
4877c478bd9Sstevel@tonic-gate	// rax already contains return value
488942cd3bfSbholler	movdqa	SPXMM0OFF(%rsp), %xmm0
489942cd3bfSbholler	movdqa	SPXMM1OFF(%rsp), %xmm1
490942cd3bfSbholler	movdqa	SPXMM2OFF(%rsp), %xmm2
491942cd3bfSbholler	movdqa	SPXMM3OFF(%rsp), %xmm3
492942cd3bfSbholler	movdqa	SPXMM4OFF(%rsp), %xmm4
493942cd3bfSbholler	movdqa	SPXMM5OFF(%rsp), %xmm5
494942cd3bfSbholler	movdqa	SPXMM6OFF(%rsp), %xmm6
495942cd3bfSbholler	movdqa	SPXMM7OFF(%rsp), %xmm7
4967c478bd9Sstevel@tonic-gate
4977c478bd9Sstevel@tonic-gate	movq	%rbp, %rsp			/
4987c478bd9Sstevel@tonic-gate	popq	%rbp				/
4997c478bd9Sstevel@tonic-gate	ret					/ return to caller
5007c478bd9Sstevel@tonic-gate	.size	elf_plt_trace, .-elf_plt_trace
5017c478bd9Sstevel@tonic-gate#endif
5027c478bd9Sstevel@tonic-gate
5037c478bd9Sstevel@tonic-gate/*
5047c478bd9Sstevel@tonic-gate * We got here because a call to a function resolved to a procedure
5057c478bd9Sstevel@tonic-gate * linkage table entry.  That entry did a JMPL to the first PLT entry, which
5067c478bd9Sstevel@tonic-gate * in turn did a call to elf_rtbndr.
5077c478bd9Sstevel@tonic-gate *
5087c478bd9Sstevel@tonic-gate * the code sequence that got us here was:
5097c478bd9Sstevel@tonic-gate *
5107c478bd9Sstevel@tonic-gate * .PLT0:
5117c478bd9Sstevel@tonic-gate *	pushq	GOT+8(%rip)	#GOT[1]
5127c478bd9Sstevel@tonic-gate *	jmp	*GOT+16(%rip)	#GOT[2]
5137c478bd9Sstevel@tonic-gate *	nop
5147c478bd9Sstevel@tonic-gate *	nop
5157c478bd9Sstevel@tonic-gate *	nop
5167c478bd9Sstevel@tonic-gate *	nop
5177c478bd9Sstevel@tonic-gate *	...
5187c478bd9Sstevel@tonic-gate * PLT entry for foo:
5197c478bd9Sstevel@tonic-gate *	jmp	*name1@GOTPCREL(%rip)
5207c478bd9Sstevel@tonic-gate *	pushl	$rel.plt.foo
5217c478bd9Sstevel@tonic-gate *	jmp	PLT0
5227c478bd9Sstevel@tonic-gate *
5237c478bd9Sstevel@tonic-gate * At entry, the stack looks like this:
5247c478bd9Sstevel@tonic-gate *
5257c478bd9Sstevel@tonic-gate *	return address			16(%rsp)
5267c478bd9Sstevel@tonic-gate *	$rel.plt.foo	(plt index)	8(%rsp)
5277c478bd9Sstevel@tonic-gate *	lmp				0(%rsp)
5287c478bd9Sstevel@tonic-gate *
5297c478bd9Sstevel@tonic-gate */
5307c478bd9Sstevel@tonic-gate#if defined(lint)
5317c478bd9Sstevel@tonic-gate
5327c478bd9Sstevel@tonic-gateextern unsigned long	elf_bndr(Rt_map *, unsigned long, caddr_t);
5337c478bd9Sstevel@tonic-gate
5347c478bd9Sstevel@tonic-gatevoid
5357c478bd9Sstevel@tonic-gateelf_rtbndr(Rt_map * lmp, unsigned long reloc, caddr_t pc)
5367c478bd9Sstevel@tonic-gate{
5377c478bd9Sstevel@tonic-gate	(void) elf_bndr(lmp, reloc, pc);
5387c478bd9Sstevel@tonic-gate}
5397c478bd9Sstevel@tonic-gate
5407c478bd9Sstevel@tonic-gate#else
5412c3cf7aaSbholler
5422c3cf7aaSbholler/*
5432c3cf7aaSbholler * The PLT code that landed us here placed 2 arguments on the stack as
5442c3cf7aaSbholler * arguments to elf_rtbndr.
5452c3cf7aaSbholler * Additionally the pc of caller is below these 2 args.
5462c3cf7aaSbholler * Our stack will look like this after we establish a stack frame with
5472c3cf7aaSbholler * push %rbp; movq %rsp, %rbp sequence:
5482c3cf7aaSbholler *
5492c3cf7aaSbholler *	8(%rbp)			arg1 - *lmp
5502c3cf7aaSbholler *	16(%rbp), %rsi		arg2 - reloc index
5512c3cf7aaSbholler *	24(%rbp), %rdx		arg3 - pc of caller
5522c3cf7aaSbholler */
5532c3cf7aaSbholler#define	LBPLMPOFF	8	/* arg1 - *lmp */
5542c3cf7aaSbholler#define	LBPRELOCOFF	16	/* arg2 - reloc index */
5552c3cf7aaSbholler#define	LBRPCOFF	24	/* arg3 - pc of caller */
5562c3cf7aaSbholler
5572c3cf7aaSbholler/*
5582c3cf7aaSbholler * Possible arguments for the resolved function are in registers as per
5592c3cf7aaSbholler * the AMD64 ABI.  We must save on the local stack all possible register
5602c3cf7aaSbholler * arguments before interposing functions to resolve the called function.
5612c3cf7aaSbholler * Possible arguments must be restored before invoking the resolved function.
5622c3cf7aaSbholler *
563*f3390f39SRobert Mustacchi * Before the AVX instruction set enhancements to AMD64 there were no changes in
564*f3390f39SRobert Mustacchi * the set of registers and their sizes across different processors. With AVX,
565*f3390f39SRobert Mustacchi * the xmm registers became the lower 128 bits of the ymm registers. Because of
566*f3390f39SRobert Mustacchi * this, we need to conditionally save 256 bits instead of 128 bits. Regardless
567*f3390f39SRobert Mustacchi * of whether we have ymm registers or not, we're always going to push the stack
568*f3390f39SRobert Mustacchi * space assuming that we do to simplify the code.
569*f3390f39SRobert Mustacchi *
5702c3cf7aaSbholler * Local stack space storage for elf_rtbndr is allocated as follows:
5712c3cf7aaSbholler *
5722c3cf7aaSbholler *	Saved regs:
5732c3cf7aaSbholler *	    %rax			 8
5742c3cf7aaSbholler *	    %rdi			 8
5752c3cf7aaSbholler *	    %rsi			 8
5762c3cf7aaSbholler *	    %rdx			 8
5772c3cf7aaSbholler *	    %rcx			 8
5782c3cf7aaSbholler *	    %r8				 8
5792c3cf7aaSbholler *	    %r9				 8
5802c3cf7aaSbholler *	    %r10			 8
5812c3cf7aaSbholler *				    =======
582*f3390f39SRobert Mustacchi *			    Subtotal:   64 (32byte aligned)
5832c3cf7aaSbholler *
5842c3cf7aaSbholler *	Saved Media Regs (used to pass floating point args):
585*f3390f39SRobert Mustacchi *	    %ymm0 - %ymm7   32 * 8     256
5862c3cf7aaSbholler *				    =======
587*f3390f39SRobert Mustacchi *			    Total:     320 (32byte aligned)
5882c3cf7aaSbholler *
5892c3cf7aaSbholler *  So - will subtract the following to create enough space
5902c3cf7aaSbholler *
591942cd3bfSbholler *	0(%rsp)		save %rax
592942cd3bfSbholler *	8(%rsp)		save %rdi
593942cd3bfSbholler *	16(%rsp)	save %rsi
594942cd3bfSbholler *	24(%rsp)	save %rdx
595942cd3bfSbholler *	32(%rsp)	save %rcx
596942cd3bfSbholler *	40(%rsp)	save %r8
597942cd3bfSbholler *	48(%rsp)	save %r9
598942cd3bfSbholler *	56(%rsp)	save %r10
599*f3390f39SRobert Mustacchi *	64(%rsp)	save %ymm0
600*f3390f39SRobert Mustacchi *	96(%rsp)	save %ymm1
601*f3390f39SRobert Mustacchi *	128(%rsp)	save %ymm2
602*f3390f39SRobert Mustacchi *	160(%rsp)	save %ymm3
603*f3390f39SRobert Mustacchi *	192(%rsp)	save %ymm4
604*f3390f39SRobert Mustacchi *	224(%rsp)	save %ymm5
605*f3390f39SRobert Mustacchi *	256(%rsp)	save %ymm6
606*f3390f39SRobert Mustacchi *	288(%rsp)	save %ymm7
607942cd3bfSbholler *
608942cd3bfSbholler * Note: Some callers may use 8-byte stack alignment instead of the
609942cd3bfSbholler * ABI required 16-byte alignment.  We use %rsp offsets to save/restore
610942cd3bfSbholler * registers because %rbp may not be 16-byte aligned.  We guarantee %rsp
611942cd3bfSbholler * is 16-byte aligned in the function preamble.
6122c3cf7aaSbholler */
613*f3390f39SRobert Mustacchi/*
614*f3390f39SRobert Mustacchi * As the registers may either be xmm or ymm, we've left the name as xmm, but
615*f3390f39SRobert Mustacchi * increased the offset between them to always cover the xmm and ymm cases.
616*f3390f39SRobert Mustacchi */
617*f3390f39SRobert Mustacchi#define	LS_SIZE	$320	/* local stack space to save all possible arguments */
618942cd3bfSbholler#define	LSRAXOFF	0	/* for SSE register count */
619942cd3bfSbholler#define	LSRDIOFF	8	/* arg 0 ... */
620942cd3bfSbholler#define	LSRSIOFF	16
621942cd3bfSbholler#define	LSRDXOFF	24
622942cd3bfSbholler#define	LSRCXOFF	32
623942cd3bfSbholler#define	LSR8OFF		40
624942cd3bfSbholler#define	LSR9OFF		48
625942cd3bfSbholler#define	LSR10OFF	56	/* ... arg 5 */
626942cd3bfSbholler#define	LSXMM0OFF	64	/* SSE arg 0 ... */
627*f3390f39SRobert Mustacchi#define	LSXMM1OFF	96
628*f3390f39SRobert Mustacchi#define	LSXMM2OFF	128
629*f3390f39SRobert Mustacchi#define	LSXMM3OFF	160
630*f3390f39SRobert Mustacchi#define	LSXMM4OFF	192
631*f3390f39SRobert Mustacchi#define	LSXMM5OFF	224
632*f3390f39SRobert Mustacchi#define	LSXMM6OFF	256
633*f3390f39SRobert Mustacchi#define	LSXMM7OFF	288	/* ... SSE arg 7 */
6342c3cf7aaSbholler
635*f3390f39SRobert Mustacchi	/*
636*f3390f39SRobert Mustacchi	 * The org_scapset is a global variable that is a part of rtld. It
637*f3390f39SRobert Mustacchi	 * contains the capabilities that the kernel has told us are supported
638*f3390f39SRobert Mustacchi	 * (auxv_hwcap). This is necessary for determining whether or not we
639*f3390f39SRobert Mustacchi	 * need to save and restore AVX registers or simple SSE registers. Note,
640*f3390f39SRobert Mustacchi	 * that the field we care about is currently at offset 0, if that
641*f3390f39SRobert Mustacchi	 * changes, this code will have to be updated.
642*f3390f39SRobert Mustacchi	 */
643*f3390f39SRobert Mustacchi	.extern org_scapset
6447c478bd9Sstevel@tonic-gate	.weak	_elf_rtbndr
6457c478bd9Sstevel@tonic-gate	_elf_rtbndr = elf_rtbndr
6467c478bd9Sstevel@tonic-gate
6477c478bd9Sstevel@tonic-gate	ENTRY(elf_rtbndr)
6487c478bd9Sstevel@tonic-gate
6497c478bd9Sstevel@tonic-gate	pushq	%rbp
6507c478bd9Sstevel@tonic-gate	movq	%rsp, %rbp
6517c478bd9Sstevel@tonic-gate
652942cd3bfSbholler	/*
653942cd3bfSbholler	 * Some libraries may (incorrectly) use non-ABI compliant 8-byte stack
654942cd3bfSbholler	 * alignment.  Enforce ABI 16-byte stack alignment here.
655942cd3bfSbholler	 * The next andq instruction does this pseudo code:
656942cd3bfSbholler	 * If %rsp is 8 byte aligned then subtract 8 from %rsp.
657942cd3bfSbholler	 */
658*f3390f39SRobert Mustacchi	andq	$-32, %rsp	/* enforce ABI 32-byte stack alignment */
659942cd3bfSbholler
6602c3cf7aaSbholler	subq	LS_SIZE, %rsp	/* save all ABI defined argument registers */
6617c478bd9Sstevel@tonic-gate
662942cd3bfSbholler	movq	%rax, LSRAXOFF(%rsp)	/* for SSE register count */
663942cd3bfSbholler	movq	%rdi, LSRDIOFF(%rsp)	/*  arg 0 .. */
664942cd3bfSbholler	movq	%rsi, LSRSIOFF(%rsp)
665942cd3bfSbholler	movq	%rdx, LSRDXOFF(%rsp)
666942cd3bfSbholler	movq	%rcx, LSRCXOFF(%rsp)
667942cd3bfSbholler	movq	%r8, LSR8OFF(%rsp)
668942cd3bfSbholler	movq	%r9, LSR9OFF(%rsp)	/* .. arg 5 */
669942cd3bfSbholler	movq	%r10, LSR10OFF(%rsp)	/* call chain reg */
6702c3cf7aaSbholler
671*f3390f39SRobert Mustacchi	/*
672*f3390f39SRobert Mustacchi	 * Our xmm registers could secretly by ymm registers in disguise.
673*f3390f39SRobert Mustacchi	 */
674*f3390f39SRobert Mustacchi	movq	org_scapset@GOTPCREL(%rip),%r9
675*f3390f39SRobert Mustacchi	movq	(%r9),%r9
676*f3390f39SRobert Mustacchi	movl	(%r9),%edx
677*f3390f39SRobert Mustacchi	testl	$AV_386_AVX,%edx
678*f3390f39SRobert Mustacchi	jne	.save_ymm
679*f3390f39SRobert Mustacchi
680*f3390f39SRobert Mustacchi.save_xmm:
681942cd3bfSbholler	movdqa	%xmm0, LSXMM0OFF(%rsp)	/* SSE arg 0 ... */
682942cd3bfSbholler	movdqa	%xmm1, LSXMM1OFF(%rsp)
683942cd3bfSbholler	movdqa	%xmm2, LSXMM2OFF(%rsp)
684942cd3bfSbholler	movdqa	%xmm3, LSXMM3OFF(%rsp)
685942cd3bfSbholler	movdqa	%xmm4, LSXMM4OFF(%rsp)
686942cd3bfSbholler	movdqa	%xmm5, LSXMM5OFF(%rsp)
687942cd3bfSbholler	movdqa	%xmm6, LSXMM6OFF(%rsp)
688942cd3bfSbholler	movdqa	%xmm7, LSXMM7OFF(%rsp)	/* ... SSE arg 7 */
689*f3390f39SRobert Mustacchi	jmp	.save_finish
6902c3cf7aaSbholler
691*f3390f39SRobert Mustacchi.save_ymm:
692*f3390f39SRobert Mustacchi	vmovdqa	%ymm0, LSXMM0OFF(%rsp)	/* SSE arg 0 ... */
693*f3390f39SRobert Mustacchi	vmovdqa	%ymm1, LSXMM1OFF(%rsp)
694*f3390f39SRobert Mustacchi	vmovdqa	%ymm2, LSXMM2OFF(%rsp)
695*f3390f39SRobert Mustacchi	vmovdqa	%ymm3, LSXMM3OFF(%rsp)
696*f3390f39SRobert Mustacchi	vmovdqa	%ymm4, LSXMM4OFF(%rsp)
697*f3390f39SRobert Mustacchi	vmovdqa	%ymm5, LSXMM5OFF(%rsp)
698*f3390f39SRobert Mustacchi	vmovdqa	%ymm6, LSXMM6OFF(%rsp)
699*f3390f39SRobert Mustacchi	vmovdqa	%ymm7, LSXMM7OFF(%rsp)	/* ... SSE arg 7 */
700*f3390f39SRobert Mustacchi
701*f3390f39SRobert Mustacchi.save_finish:
7022c3cf7aaSbholler	movq	LBPLMPOFF(%rbp), %rdi	/* arg1 - *lmp */
7032c3cf7aaSbholler	movq	LBPRELOCOFF(%rbp), %rsi	/* arg2 - reloc index */
7042c3cf7aaSbholler	movq	LBRPCOFF(%rbp), %rdx	/* arg3 - pc of caller */
7057c478bd9Sstevel@tonic-gate	call	elf_bndr@PLT		/* call elf_rtbndr(lmp, relndx, pc) */
7062c3cf7aaSbholler	movq	%rax, LBPRELOCOFF(%rbp)	/* store final destination */
7077c478bd9Sstevel@tonic-gate
708*f3390f39SRobert Mustacchi	/*
709*f3390f39SRobert Mustacchi	 * Restore possible arguments before invoking resolved function. We
710*f3390f39SRobert Mustacchi	 * check the xmm vs. ymm regs first so we can use the others.
711*f3390f39SRobert Mustacchi	 */
712*f3390f39SRobert Mustacchi	movq	org_scapset@GOTPCREL(%rip),%r9
713*f3390f39SRobert Mustacchi	movq	(%r9),%r9
714*f3390f39SRobert Mustacchi	movl	(%r9),%edx
715*f3390f39SRobert Mustacchi	testl	$AV_386_AVX,%edx
716*f3390f39SRobert Mustacchi	jne	.restore_ymm
7172c3cf7aaSbholler
718*f3390f39SRobert Mustacchi.restore_xmm:
719942cd3bfSbholler	movdqa	LSXMM0OFF(%rsp), %xmm0
720942cd3bfSbholler	movdqa	LSXMM1OFF(%rsp), %xmm1
721942cd3bfSbholler	movdqa	LSXMM2OFF(%rsp), %xmm2
722942cd3bfSbholler	movdqa	LSXMM3OFF(%rsp), %xmm3
723942cd3bfSbholler	movdqa	LSXMM4OFF(%rsp), %xmm4
724942cd3bfSbholler	movdqa	LSXMM5OFF(%rsp), %xmm5
725942cd3bfSbholler	movdqa	LSXMM6OFF(%rsp), %xmm6
726942cd3bfSbholler	movdqa	LSXMM7OFF(%rsp), %xmm7
727*f3390f39SRobert Mustacchi	jmp .restore_finish
728*f3390f39SRobert Mustacchi
729*f3390f39SRobert Mustacchi.restore_ymm:
730*f3390f39SRobert Mustacchi	vmovdqa	LSXMM0OFF(%rsp), %ymm0
731*f3390f39SRobert Mustacchi	vmovdqa	LSXMM1OFF(%rsp), %ymm1
732*f3390f39SRobert Mustacchi	vmovdqa	LSXMM2OFF(%rsp), %ymm2
733*f3390f39SRobert Mustacchi	vmovdqa	LSXMM3OFF(%rsp), %ymm3
734*f3390f39SRobert Mustacchi	vmovdqa	LSXMM4OFF(%rsp), %ymm4
735*f3390f39SRobert Mustacchi	vmovdqa	LSXMM5OFF(%rsp), %ymm5
736*f3390f39SRobert Mustacchi	vmovdqa	LSXMM6OFF(%rsp), %ymm6
737*f3390f39SRobert Mustacchi	vmovdqa	LSXMM7OFF(%rsp), %ymm7
738*f3390f39SRobert Mustacchi
739*f3390f39SRobert Mustacchi.restore_finish:
740*f3390f39SRobert Mustacchi	movq	LSRAXOFF(%rsp), %rax
741*f3390f39SRobert Mustacchi	movq	LSRDIOFF(%rsp), %rdi
742*f3390f39SRobert Mustacchi	movq	LSRSIOFF(%rsp), %rsi
743*f3390f39SRobert Mustacchi	movq	LSRDXOFF(%rsp), %rdx
744*f3390f39SRobert Mustacchi	movq	LSRCXOFF(%rsp), %rcx
745*f3390f39SRobert Mustacchi	movq	LSR8OFF(%rsp), %r8
746*f3390f39SRobert Mustacchi	movq	LSR9OFF(%rsp), %r9
747*f3390f39SRobert Mustacchi	movq	LSR10OFF(%rsp), %r10
7487c478bd9Sstevel@tonic-gate
7497c478bd9Sstevel@tonic-gate	movq	%rbp, %rsp
7507c478bd9Sstevel@tonic-gate	popq	%rbp
7517c478bd9Sstevel@tonic-gate
7527c478bd9Sstevel@tonic-gate	addq	$8, %rsp	/* pop 1st plt-pushed args */
7537c478bd9Sstevel@tonic-gate				/* the second arguement is used */
7547c478bd9Sstevel@tonic-gate				/* for the 'return' address to our */
7557c478bd9Sstevel@tonic-gate				/* final destination */
7567c478bd9Sstevel@tonic-gate
7577c478bd9Sstevel@tonic-gate	ret			/* invoke resolved function */
7587c478bd9Sstevel@tonic-gate	.size 	elf_rtbndr, .-elf_rtbndr
7597c478bd9Sstevel@tonic-gate#endif
760