xref: /titanic_52/usr/src/cmd/sgs/rtld/amd64/boot_elf.s (revision d0fa49b78d1f40d84ec76c363cdc38cf128511dd)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27#pragma ident	"%Z%%M%	%I%	%E% SMI"
28
29#if	defined(lint)
30
31#include	<sys/types.h>
32#include	<_rtld.h>
33#include	<_audit.h>
34#include	<_elf.h>
35#include	<sys/regset.h>
36
37/* ARGSUSED0 */
38int
39elf_plt_trace()
40{
41	return (0);
42}
43#else
44
45#include	<link.h>
46#include	<_audit.h>
47#include	<sys/asm_linkage.h>
48
49	.file	"boot_elf.s"
50	.text
51
52/*
53 * On entry the 'glue code' has already  done the following:
54 *
55 *	pushq	%rbp
56 *	movq	%rsp, %rbp
57 *	subq	$0x10, %rsp
58 *	leaq	trace_fields(%rip), %r11
59 *	movq	%r11, -0x8(%rbp)
60 *	movq	$elf_plt_trace, %r11
61 *	jmp	*%r11
62 *
63 * so - -8(%rbp) contains the dyndata ptr
64 *
65 *	0x0	Addr		*reflmp
66 *	0x8	Addr		*deflmp
67 *	0x10	Word		symndx
68 *	0x14	Word		sb_flags
69 *	0x18	Sym		symdef.st_name
70 *	0x1c			symdef.st_info
71 *	0x1d			symdef.st_other
72 *	0x1e			symdef.st_shndx
73 *	0x20			symdef.st_value
74 *	0x28			symdef.st_size
75 *
76 * Also note - on entry 16 bytes have already been subtracted
77 * from the %rsp.  The first 8 bytes is for the dyn_data_ptr,
78 * the second 8 bytes are to align the stack and are available
79 * for use.
80 */
81#define	REFLMP_OFF		0x0
82#define	DEFLMP_OFF		0x8
83#define	SYMNDX_OFF		0x10
84#define	SBFLAGS_OFF		0x14
85#define	SYMDEF_OFF		0x18
86#define	SYMDEF_VALUE_OFF	0x20
87/*
88 * Local stack space storage for elf_plt_trace is allocated
89 * as follows:
90 *
91 *  First - before we got here - %rsp has been decremented
92 *  by 0x10 to make space for the dyndata ptr (and another
93 *  free word).  In addition to that, we create space
94 *  for the following:
95 *
96 *	La_amd64_regs	    8 * 8:	64
97 *	prev_stack_size	    8		 8
98 *	Saved regs:
99 *	    %rdi			 8
100 *	    %rsi			 8
101 *	    %rdx			 8
102 *	    %rcx			 8
103 *	    %r8				 8
104 *	    %r9				 8
105 *	    %r10			 8
106 *	    %r11			 8
107 *	    %rax			 8
108 *				    =======
109 *			    Subtotal:	144 (16byte aligned)
110 *
111 *	Saved Media Regs (used to pass floating point args):
112 *	    %xmm0 - %xmm7   16 * 8:	128
113 *				    =======
114 *			    Total:	272 (16byte aligned)
115 *
116 *  So - will subtract the following to create enough space
117 *
118 *	-8(%rbp)	store dyndata ptr
119 *	-16(%rbp)	store call destination
120 *	-80(%rbp)	space for La_amd64_regs
121 *	-88(%rbp)	prev stack size
122 *  The next %rbp offsets are only true if the caller had correct stack
123 *  alignment.  See note above SPRDIOFF for why we use %rsp alignment to
124 *  access these stack fields.
125 *	-96(%rbp)	entering %rdi
126 *	-104(%rbp)	entering %rsi
127 *	-112(%rbp)	entering %rdx
128 *	-120(%rbp)	entering %rcx
129 *	-128(%rbp)	entering %r8
130 *	-136(%rbp)	entering %r9
131 *	-144(%rbp)	entering %r10
132 *	-152(%rbp)	entering %r11
133 *	-160(%rbp)	entering %rax
134 *	-176(%rbp)	entering %xmm0
135 *	-192(%rbp)	entering %xmm1
136 *	-208(%rbp)	entering %xmm2
137 *	-224(%rbp)	entering %xmm3
138 *	-240(%rbp)	entering %xmm4
139 *	-256(%rbp)	entering %xmm5
140 *	-272(%rbp)	entering %xmm6
141 *	-288(%rbp)	entering %xmm7
142 *
143 */
144#define	SPDYNOFF    -8
145#define	SPDESTOFF   -16
146#define	SPLAREGOFF  -80
147#define	SPPRVSTKOFF -88
148
149/*
150 * The next set of offsets are relative to %rsp.
151 * We guarantee %rsp is ABI compliant 16-byte aligned.  This guarantees the
152 * xmm registers are saved to 16-byte aligned addresses.
153 * %rbp may only be 8 byte aligned if we came in from non-ABI compliant code.
154 */
155#define	SPRDIOFF	192
156#define	SPRSIOFF	184
157#define	SPRDXOFF	176
158#define	SPRCXOFF	168
159#define	SPR8OFF		160
160#define	SPR9OFF		152
161#define	SPR10OFF	144
162#define	SPR11OFF	136
163#define	SPRAXOFF	128
164#define	SPXMM0OFF	112
165#define	SPXMM1OFF	96
166#define	SPXMM2OFF	80
167#define	SPXMM3OFF	64
168#define	SPXMM4OFF	48
169#define	SPXMM5OFF	32
170#define	SPXMM6OFF	16
171#define	SPXMM7OFF	0
172
173	.globl	elf_plt_trace
174	.type	elf_plt_trace,@function
175	.align 16
176elf_plt_trace:
177	/*
178	 * Enforce ABI 16-byte stack alignment here.
179	 * The next andq instruction does this pseudo code:
180	 * If %rsp is 8 byte aligned then subtract 8 from %rsp.
181	 */
182	andq    $-16, %rsp	/* enforce ABI 16-byte stack alignment */
183	subq	$272,%rsp	/ create some local storage
184
185	movq	%rdi, SPRDIOFF(%rsp)
186	movq	%rsi, SPRSIOFF(%rsp)
187	movq	%rdx, SPRDXOFF(%rsp)
188	movq	%rcx, SPRCXOFF(%rsp)
189	movq	%r8, SPR8OFF(%rsp)
190	movq	%r9, SPR9OFF(%rsp)
191	movq	%r10, SPR10OFF(%rsp)
192	movq	%r11, SPR11OFF(%rsp)
193	movq	%rax, SPRAXOFF(%rsp)
194	movdqa	%xmm0, SPXMM0OFF(%rsp)
195	movdqa	%xmm1, SPXMM1OFF(%rsp)
196	movdqa	%xmm2, SPXMM2OFF(%rsp)
197	movdqa	%xmm3, SPXMM3OFF(%rsp)
198	movdqa	%xmm4, SPXMM4OFF(%rsp)
199	movdqa	%xmm5, SPXMM5OFF(%rsp)
200	movdqa	%xmm6, SPXMM6OFF(%rsp)
201	movdqa	%xmm7, SPXMM7OFF(%rsp)
202
203	movq	SPDYNOFF(%rbp), %rax			/ %rax = dyndata
204	testb	$LA_SYMB_NOPLTENTER, SBFLAGS_OFF(%rax)	/ <link.h>
205	je	.start_pltenter
206	movq	SYMDEF_VALUE_OFF(%rax), %rdi
207	movq	%rdi, SPDESTOFF(%rbp)		/ save destination address
208	jmp	.end_pltenter
209
210.start_pltenter:
211	/*
212	 * save all registers into La_amd64_regs
213	 */
214	leaq	SPLAREGOFF(%rbp), %rsi	/ %rsi = &La_amd64_regs
215	leaq	8(%rbp), %rdi
216	movq	%rdi, 0(%rsi)		/ la_rsp
217	movq	0(%rbp), %rdi
218	movq	%rdi, 8(%rsi)		/ la_rbp
219	movq	SPRDIOFF(%rsp), %rdi
220	movq	%rdi, 16(%rsi)		/ la_rdi
221	movq	SPRSIOFF(%rsp), %rdi
222	movq	%rdi, 24(%rsi)		/ la_rsi
223	movq	SPRDXOFF(%rsp), %rdi
224	movq	%rdi, 32(%rsi)		/ la_rdx
225	movq	SPRCXOFF(%rsp), %rdi
226	movq	%rdi, 40(%rsi)		/ la_rcx
227	movq	SPR8OFF(%rsp), %rdi
228	movq	%rdi, 48(%rsi)		/ la_r8
229	movq	SPR9OFF(%rsp), %rdi
230	movq	%rdi, 56(%rsi)		/ la_r9
231
232	/*
233	 * prepare for call to la_pltenter
234	 */
235	movq	SPDYNOFF(%rbp), %r11		/ %r11 = &dyndata
236	leaq	SBFLAGS_OFF(%r11), %r9		/ arg6 (&sb_flags)
237	leaq	SPLAREGOFF(%rbp), %r8		/ arg5 (&La_amd64_regs)
238	movl	SYMNDX_OFF(%r11), %ecx		/ arg4 (symndx)
239	leaq	SYMDEF_OFF(%r11), %rdx		/ arg3 (&Sym)
240	movq	DEFLMP_OFF(%r11), %rsi		/ arg2 (dlmp)
241	movq	REFLMP_OFF(%r11), %rdi		/ arg1 (rlmp)
242	call	audit_pltenter@PLT
243	movq	%rax, SPDESTOFF(%rbp)		/ save calling address
244.end_pltenter:
245
246	/*
247	 * If *no* la_pltexit() routines exist
248	 * we do not need to keep the stack frame
249	 * before we call the actual routine.  Instead we
250	 * jump to it and remove our stack from the stack
251	 * at the same time.
252	 */
253	movl	audit_flags(%rip), %eax
254	andl	$AF_PLTEXIT, %eax		/ value of audit.h:AF_PLTEXIT
255	cmpl	$0, %eax
256	je	.bypass_pltexit
257	/*
258	 * Has the *nopltexit* flag been set for this entry point
259	 */
260	movq	SPDYNOFF(%rbp), %r11		/ %r11 = &dyndata
261	testb	$LA_SYMB_NOPLTEXIT, SBFLAGS_OFF(%r11)
262	je	.start_pltexit
263
264.bypass_pltexit:
265	/*
266	 * No PLTEXIT processing required.
267	 */
268	movq	0(%rbp), %r11
269	movq	%r11, -8(%rbp)			/ move prev %rbp
270	movq	SPDESTOFF(%rbp), %r11		/ r11 == calling destination
271	movq	%r11, 0(%rbp)			/ store destination at top
272
273	/
274	/ Restore registers
275	/
276	movq	SPRDIOFF(%rsp), %rdi
277	movq	SPRSIOFF(%rsp), %rsi
278	movq	SPRDXOFF(%rsp), %rdx
279	movq	SPRCXOFF(%rsp), %rcx
280	movq	SPR8OFF(%rsp), %r8
281	movq	SPR9OFF(%rsp), %r9
282	movq	SPR10OFF(%rsp), %r10
283	movq	SPR11OFF(%rsp), %r11
284	movq	SPRAXOFF(%rsp), %rax
285	movdqa	SPXMM0OFF(%rsp), %xmm0
286	movdqa	SPXMM1OFF(%rsp), %xmm1
287	movdqa	SPXMM2OFF(%rsp), %xmm2
288	movdqa	SPXMM3OFF(%rsp), %xmm3
289	movdqa	SPXMM4OFF(%rsp), %xmm4
290	movdqa	SPXMM5OFF(%rsp), %xmm5
291	movdqa	SPXMM6OFF(%rsp), %xmm6
292	movdqa	SPXMM7OFF(%rsp), %xmm7
293
294	subq	$8, %rbp			/ adjust %rbp for 'ret'
295	movq	%rbp, %rsp			/
296	/*
297	 * At this point, after a little doctoring, we should
298	 * have the following on the stack:
299	 *
300	 *	16(%rsp):  ret addr
301	 *	8(%rsp):  dest_addr
302	 *	0(%rsp):  Previous %rbp
303	 *
304	 * So - we pop the previous %rbp, and then
305	 * ret to our final destination.
306	 */
307	popq	%rbp				/
308	ret					/ jmp to final destination
309						/ and clean up stack :)
310
311.start_pltexit:
312	/*
313	 * In order to call the destination procedure and then return
314	 * to audit_pltexit() for post analysis we must first grow
315	 * our stack frame and then duplicate the original callers
316	 * stack state.  This duplicates all of the arguements
317	 * that were to be passed to the destination procedure.
318	 */
319	movq	%rbp, %rdi			/
320	addq	$16, %rdi			/    %rdi = src
321	movq	(%rbp), %rdx			/
322	subq	%rdi, %rdx			/    %rdx == prev frame sz
323	/*
324	 * If audit_argcnt > 0 then we limit the number of
325	 * arguements that will be duplicated to audit_argcnt.
326	 *
327	 * If (prev_stack_size > (audit_argcnt * 8))
328	 *	prev_stack_size = audit_argcnt * 8;
329	 */
330	movl	audit_argcnt(%rip),%eax		/   %eax = audit_argcnt
331	cmpl	$0, %eax
332	jle	.grow_stack
333	leaq	(,%rax,8), %rax			/    %eax = %eax * 4
334	cmpq	%rax,%rdx
335	jle	.grow_stack
336	movq	%rax, %rdx
337	/*
338	 * Grow the stack and duplicate the arguements of the
339	 * original caller.
340	 *
341	 * We save %rsp in %r11 since we need to use the current rsp for
342	 * accessing the registers saved in our stack frame.
343	 */
344.grow_stack:
345	movq	%rsp, %r11
346	subq	%rdx, %rsp			/    grow the stack
347	movq	%rdx, SPPRVSTKOFF(%rbp)		/    -88(%rbp) == prev frame sz
348	movq	%rsp, %rcx			/    %rcx = dest
349	addq	%rcx, %rdx			/    %rdx == tail of dest
350.while_base:
351	cmpq	%rdx, %rcx			/   while (base+size >= src++) {
352	jge	.end_while			/
353	movq	(%rdi), %rsi
354	movq	%rsi,(%rcx)			/        *dest = *src
355	addq	$8, %rdi			/	 src++
356	addq	$8, %rcx			/        dest++
357	jmp	.while_base			/    }
358
359	/*
360	 * The above stack is now an exact duplicate of
361	 * the stack of the original calling procedure.
362	 */
363.end_while:
364	/
365	/ Restore registers using %r11 which contains our old %rsp value
366	/ before growing the stack.
367	/
368	movq	SPRDIOFF(%r11), %rdi
369	movq	SPRSIOFF(%r11), %rsi
370	movq	SPRDXOFF(%r11), %rdx
371	movq	SPRCXOFF(%r11), %rcx
372	movq	SPR8OFF(%r11), %r8
373	movq	SPR9OFF(%r11), %r9
374	movq	SPR10OFF(%r11), %r10
375	movq	SPRAXOFF(%r11), %rax
376	movdqa	SPXMM0OFF(%r11), %xmm0
377	movdqa	SPXMM1OFF(%r11), %xmm1
378	movdqa	SPXMM2OFF(%r11), %xmm2
379	movdqa	SPXMM3OFF(%r11), %xmm3
380	movdqa	SPXMM4OFF(%r11), %xmm4
381	movdqa	SPXMM5OFF(%r11), %xmm5
382	movdqa	SPXMM6OFF(%r11), %xmm6
383	movdqa	SPXMM7OFF(%r11), %xmm7
384	movq	SPR11OFF(%r11), %r11		/ retore %r11 last
385
386	/*
387	 * Call to desitnation function - we'll return here
388	 * for pltexit monitoring.
389	 */
390	call	*SPDESTOFF(%rbp)
391
392	addq	SPPRVSTKOFF(%rbp), %rsp	/ cleanup dupped stack
393
394	/
395	/ prepare for call to audit_pltenter()
396	/
397	movq	SPDYNOFF(%rbp), %r11		/ %r11 = &dyndata
398	movq	SYMNDX_OFF(%r11), %r8		/ arg5 (symndx)
399	leaq	SYMDEF_OFF(%r11), %rcx		/ arg4 (&Sym)
400	movq	DEFLMP_OFF(%r11), %rdx		/ arg3 (dlmp)
401	movq	REFLMP_OFF(%r11), %rsi		/ arg2 (rlmp)
402	movq	%rax, %rdi			/ arg1 (returnval)
403	call	audit_pltexit@PLT
404
405	/*
406	 * Clean up after ourselves and return to the
407	 * original calling procedure.
408	 */
409
410	/
411	/ Restore registers
412	/
413	movq	SPRDIOFF(%rsp), %rdi
414	movq	SPRSIOFF(%rsp), %rsi
415	movq	SPRDXOFF(%rsp), %rdx
416	movq	SPRCXOFF(%rsp), %rcx
417	movq	SPR8OFF(%rsp), %r8
418	movq	SPR9OFF(%rsp), %r9
419	movq	SPR10OFF(%rsp), %r10
420	movq	SPR11OFF(%rsp), %r11
421	// rax already contains return value
422	movdqa	SPXMM0OFF(%rsp), %xmm0
423	movdqa	SPXMM1OFF(%rsp), %xmm1
424	movdqa	SPXMM2OFF(%rsp), %xmm2
425	movdqa	SPXMM3OFF(%rsp), %xmm3
426	movdqa	SPXMM4OFF(%rsp), %xmm4
427	movdqa	SPXMM5OFF(%rsp), %xmm5
428	movdqa	SPXMM6OFF(%rsp), %xmm6
429	movdqa	SPXMM7OFF(%rsp), %xmm7
430
431	movq	%rbp, %rsp			/
432	popq	%rbp				/
433	ret					/ return to caller
434	.size	elf_plt_trace, .-elf_plt_trace
435#endif
436
437/*
438 * We got here because a call to a function resolved to a procedure
439 * linkage table entry.  That entry did a JMPL to the first PLT entry, which
440 * in turn did a call to elf_rtbndr.
441 *
442 * the code sequence that got us here was:
443 *
444 * .PLT0:
445 *	pushq	GOT+8(%rip)	#GOT[1]
446 *	jmp	*GOT+16(%rip)	#GOT[2]
447 *	nop
448 *	nop
449 *	nop
450 *	nop
451 *	...
452 * PLT entry for foo:
453 *	jmp	*name1@GOTPCREL(%rip)
454 *	pushl	$rel.plt.foo
455 *	jmp	PLT0
456 *
457 * At entry, the stack looks like this:
458 *
459 *	return address			16(%rsp)
460 *	$rel.plt.foo	(plt index)	8(%rsp)
461 *	lmp				0(%rsp)
462 *
463 */
464#if defined(lint)
465
466extern unsigned long	elf_bndr(Rt_map *, unsigned long, caddr_t);
467
468void
469elf_rtbndr(Rt_map * lmp, unsigned long reloc, caddr_t pc)
470{
471	(void) elf_bndr(lmp, reloc, pc);
472}
473
474#else
475
476/*
477 * The PLT code that landed us here placed 2 arguments on the stack as
478 * arguments to elf_rtbndr.
479 * Additionally the pc of caller is below these 2 args.
480 * Our stack will look like this after we establish a stack frame with
481 * push %rbp; movq %rsp, %rbp sequence:
482 *
483 *	8(%rbp)			arg1 - *lmp
484 *	16(%rbp), %rsi		arg2 - reloc index
485 *	24(%rbp), %rdx		arg3 - pc of caller
486 */
487#define	LBPLMPOFF	8	/* arg1 - *lmp */
488#define	LBPRELOCOFF	16	/* arg2 - reloc index */
489#define	LBRPCOFF	24	/* arg3 - pc of caller */
490
491/*
492 * Possible arguments for the resolved function are in registers as per
493 * the AMD64 ABI.  We must save on the local stack all possible register
494 * arguments before interposing functions to resolve the called function.
495 * Possible arguments must be restored before invoking the resolved function.
496 *
497 * Local stack space storage for elf_rtbndr is allocated as follows:
498 *
499 *	Saved regs:
500 *	    %rax			 8
501 *	    %rdi			 8
502 *	    %rsi			 8
503 *	    %rdx			 8
504 *	    %rcx			 8
505 *	    %r8				 8
506 *	    %r9				 8
507 *	    %r10			 8
508 *				    =======
509 *			    Subtotal:   64 (16byte aligned)
510 *
511 *	Saved Media Regs (used to pass floating point args):
512 *	    %xmm0 - %xmm7   16 * 8:    128
513 *				    =======
514 *			    Total:     192 (16byte aligned)
515 *
516 *  So - will subtract the following to create enough space
517 *
518 *	0(%rsp)		save %rax
519 *	8(%rsp)		save %rdi
520 *	16(%rsp)	save %rsi
521 *	24(%rsp)	save %rdx
522 *	32(%rsp)	save %rcx
523 *	40(%rsp)	save %r8
524 *	48(%rsp)	save %r9
525 *	56(%rsp)	save %r10
526 *	64(%rsp)	save %xmm0
527 *	80(%rsp)	save %xmm1
528 *	96(%rsp)	save %xmm2
529 *	112(%rsp)	save %xmm3
530 *	128(%rsp)	save %xmm4
531 *	144(%rsp)	save %xmm5
532 *	160(%rsp)	save %xmm6
533 *	176(%rsp)	save %xmm7
534 *
535 * Note: Some callers may use 8-byte stack alignment instead of the
536 * ABI required 16-byte alignment.  We use %rsp offsets to save/restore
537 * registers because %rbp may not be 16-byte aligned.  We guarantee %rsp
538 * is 16-byte aligned in the function preamble.
539 */
540#define	LS_SIZE	$192	/* local stack space to save all possible arguments */
541#define	LSRAXOFF	0	/* for SSE register count */
542#define	LSRDIOFF	8	/* arg 0 ... */
543#define	LSRSIOFF	16
544#define	LSRDXOFF	24
545#define	LSRCXOFF	32
546#define	LSR8OFF		40
547#define	LSR9OFF		48
548#define	LSR10OFF	56	/* ... arg 5 */
549#define	LSXMM0OFF	64	/* SSE arg 0 ... */
550#define	LSXMM1OFF	80
551#define	LSXMM2OFF	96
552#define	LSXMM3OFF	112
553#define	LSXMM4OFF	128
554#define	LSXMM5OFF	144
555#define	LSXMM6OFF	160
556#define	LSXMM7OFF	176	/* ... SSE arg 7 */
557
558	.weak	_elf_rtbndr
559	_elf_rtbndr = elf_rtbndr
560
561	ENTRY(elf_rtbndr)
562
563	pushq	%rbp
564	movq	%rsp, %rbp
565
566	/*
567	 * Some libraries may (incorrectly) use non-ABI compliant 8-byte stack
568	 * alignment.  Enforce ABI 16-byte stack alignment here.
569	 * The next andq instruction does this pseudo code:
570	 * If %rsp is 8 byte aligned then subtract 8 from %rsp.
571	 */
572	andq	$-16, %rsp	/* enforce ABI 16-byte stack alignment */
573
574	subq	LS_SIZE, %rsp	/* save all ABI defined argument registers */
575
576	movq	%rax, LSRAXOFF(%rsp)	/* for SSE register count */
577	movq	%rdi, LSRDIOFF(%rsp)	/*  arg 0 .. */
578	movq	%rsi, LSRSIOFF(%rsp)
579	movq	%rdx, LSRDXOFF(%rsp)
580	movq	%rcx, LSRCXOFF(%rsp)
581	movq	%r8, LSR8OFF(%rsp)
582	movq	%r9, LSR9OFF(%rsp)	/* .. arg 5 */
583	movq	%r10, LSR10OFF(%rsp)	/* call chain reg */
584
585	movdqa	%xmm0, LSXMM0OFF(%rsp)	/* SSE arg 0 ... */
586	movdqa	%xmm1, LSXMM1OFF(%rsp)
587	movdqa	%xmm2, LSXMM2OFF(%rsp)
588	movdqa	%xmm3, LSXMM3OFF(%rsp)
589	movdqa	%xmm4, LSXMM4OFF(%rsp)
590	movdqa	%xmm5, LSXMM5OFF(%rsp)
591	movdqa	%xmm6, LSXMM6OFF(%rsp)
592	movdqa	%xmm7, LSXMM7OFF(%rsp)	/* ... SSE arg 7 */
593
594	movq	LBPLMPOFF(%rbp), %rdi	/* arg1 - *lmp */
595	movq	LBPRELOCOFF(%rbp), %rsi	/* arg2 - reloc index */
596	movq	LBRPCOFF(%rbp), %rdx	/* arg3 - pc of caller */
597	call	elf_bndr@PLT		/* call elf_rtbndr(lmp, relndx, pc) */
598	movq	%rax, LBPRELOCOFF(%rbp)	/* store final destination */
599
600	/* restore possible arguments before invoking resolved function */
601	movq	LSRAXOFF(%rsp), %rax
602	movq	LSRDIOFF(%rsp), %rdi
603	movq	LSRSIOFF(%rsp), %rsi
604	movq	LSRDXOFF(%rsp), %rdx
605	movq	LSRCXOFF(%rsp), %rcx
606	movq	LSR8OFF(%rsp), %r8
607	movq	LSR9OFF(%rsp), %r9
608	movq	LSR10OFF(%rsp), %r10
609
610	movdqa	LSXMM0OFF(%rsp), %xmm0
611	movdqa	LSXMM1OFF(%rsp), %xmm1
612	movdqa	LSXMM2OFF(%rsp), %xmm2
613	movdqa	LSXMM3OFF(%rsp), %xmm3
614	movdqa	LSXMM4OFF(%rsp), %xmm4
615	movdqa	LSXMM5OFF(%rsp), %xmm5
616	movdqa	LSXMM6OFF(%rsp), %xmm6
617	movdqa	LSXMM7OFF(%rsp), %xmm7
618
619	movq	%rbp, %rsp
620	popq	%rbp
621
622	addq	$8, %rsp	/* pop 1st plt-pushed args */
623				/* the second arguement is used */
624				/* for the 'return' address to our */
625				/* final destination */
626
627	ret			/* invoke resolved function */
628	.size 	elf_rtbndr, .-elf_rtbndr
629#endif
630