xref: /titanic_50/usr/src/cmd/sgs/rtld/amd64/boot_elf.s (revision 2c3cf7aa0ae357193cc6e01cdecfdbcdfb00dfca)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27#pragma ident	"%Z%%M%	%I%	%E% SMI"
28
29#if	defined(lint)
30
31#include	<sys/types.h>
32#include	<_rtld.h>
33#include	<_audit.h>
34#include	<_elf.h>
35#include	<sys/regset.h>
36
37/* ARGSUSED0 */
38int
39elf_plt_trace()
40{
41	return (0);
42}
43#else
44
45#include	<link.h>
46#include	<_audit.h>
47#include	<sys/asm_linkage.h>
48
49	.file	"boot_elf.s"
50	.text
51
52/*
53 * On entry the 'glue code' has already  done the following:
54 *
55 *	pushq	%rbp
56 *	movq	%rsp, %rbp
57 *	subq	$0x10, %rsp
58 *	leaq	trace_fields(%rip), %r11
59 *	movq	%r11, -0x8(%rbp)
60 *	movq	$elf_plt_trace, %r11
61 *	jmp	*%r11
62 *
63 * so - -8(%rbp) contains the dyndata ptr
64 *
65 *	0x0	Addr		*reflmp
66 *	0x8	Addr		*deflmp
67 *	0x10	Word		symndx
68 *	0x14	Word		sb_flags
69 *	0x18	Sym		symdef.st_name
70 *	0x1c			symdef.st_info
71 *	0x1d			symdef.st_other
72 *	0x1e			symdef.st_shndx
73 *	0x20			symdef.st_value
74 *	0x28			symdef.st_size
75 *
76 * Also note - on entry 16 bytes have already been subtracted
77 * from the %rsp.  The first 8 bytes is for the dyn_data_ptr,
78 * the second 8 bytes are to align the stack and are available
79 * for use.
80 */
81#define	REFLMP_OFF		0x0
82#define	DEFLMP_OFF		0x8
83#define	SYMNDX_OFF		0x10
84#define	SBFLAGS_OFF		0x14
85#define	SYMDEF_OFF		0x18
86#define	SYMDEF_VALUE_OFF	0x20
87/*
88 * Local stack space storage for elf_plt_trace is allocated
89 * as follows:
90 *
91 *  First - before we got here - %rsp has been decremented
92 *  by 0x10 to make space for the dyndata ptr (and another
93 *  free word).  In addition to that, we create space
94 *  for the following:
95 *
96 *	La_amd64_regs	    8 * 8:	64
97 *	prev_stack_size	    8		 8
98 *	Saved regs:
99 *	    %rdi			 8
100 *	    %rsi			 8
101 *	    %rdx			 8
102 *	    %rcx			 8
103 *	    %r8				 8
104 *	    %r9				 8
105 *	    %r10			 8
106 *	    %r11			 8
107 *	    %rax			 8
108 *				    =======
109 *			    Subtotal:	144 (16byte aligned)
110 *
111 *	Saved Media Regs (used to pass floating point args):
112 *	    %xmm0 - %xmm7   16 * 8:	128
113 *				    =======
114 *			    Total:	272 (16byte aligned)
115 *
116 *  So - will subtract the following to create enough space
117 *
118 *	-8(%rbp)	store dyndata ptr
119 *	-16(%rbp)	store call destination
120 *	-80(%rbp)	space for La_amd64_regs
121 *	-88(%rbp)	prev stack size
122 *	-96(%rbp)	entering %rdi
123 *	-104(%rbp)	entering %rsi
124 *	-112(%rbp)	entering %rdx
125 *	-120(%rbp)	entering %rcx
126 *	-128(%rbp)	entering %r8
127 *	-136(%rbp)	entering %r9
128 *	-144(%rbp)	entering %r10
129 *	-152(%rbp)	entering %r11
130 *	-160(%rax)	entering %rax
131 *	-176(%xmm0)	entering %xmm0
132 *	-192(%xmm1)	entering %xmm1
133 *	-208(%xmm2)	entering %xmm2
134 *	-224(%xmm3)	entering %xmm3
135 *	-240(%xmm4)	entering %xmm4
136 *	-256(%xmm5)	entering %xmm5
137 *	-272(%xmm6)	entering %xmm6
138 *	-288(%xmm7)	entering %xmm7
139 *
140 */
141#define	SPDYNOFF    -8
142#define	SPDESTOFF   -16
143#define	SPLAREGOFF  -80
144#define	SPPRVSTKOFF -88
145#define	SPRDIOFF    -96
146#define	SPRSIOFF    -104
147#define	SPRDXOFF    -112
148#define	SPRCXOFF    -120
149#define	SPR8OFF	    -128
150#define	SPR9OFF	    -136
151#define	SPR10OFF    -144
152#define	SPR11OFF    -152
153#define	SPRAXOFF    -160
154#define	SPXMM0OFF   -176
155#define	SPXMM1OFF   -192
156#define	SPXMM2OFF   -208
157#define	SPXMM3OFF   -224
158#define	SPXMM4OFF   -240
159#define	SPXMM5OFF   -256
160#define	SPXMM6OFF   -272
161#define	SPXMM7OFF   -288
162
163	.globl	elf_plt_trace
164	.type	elf_plt_trace,@function
165	.align 16
166elf_plt_trace:
167	subq	$272,%rsp	/ create some local storage
168	movq	%rdi, SPRDIOFF(%rbp)
169	movq	%rsi, SPRSIOFF(%rbp)
170	movq	%rdx, SPRDXOFF(%rbp)
171	movq	%rcx, SPRCXOFF(%rbp)
172	movq	%r8, SPR8OFF(%rbp)
173	movq	%r9, SPR9OFF(%rbp)
174	movq	%r10, SPR10OFF(%rbp)
175	movq	%r11, SPR11OFF(%rbp)
176	movq	%rax, SPRAXOFF(%rbp)
177	movdqa	%xmm0, SPXMM0OFF(%rbp)
178	movdqa	%xmm1, SPXMM1OFF(%rbp)
179	movdqa	%xmm2, SPXMM2OFF(%rbp)
180	movdqa	%xmm3, SPXMM3OFF(%rbp)
181	movdqa	%xmm4, SPXMM4OFF(%rbp)
182	movdqa	%xmm5, SPXMM5OFF(%rbp)
183	movdqa	%xmm6, SPXMM6OFF(%rbp)
184	movdqa	%xmm7, SPXMM7OFF(%rbp)
185
186	movq	SPDYNOFF(%rbp), %rax			/ %rax = dyndata
187	testb	$LA_SYMB_NOPLTENTER, SBFLAGS_OFF(%rax)	/ <link.h>
188	je	.start_pltenter
189	movq	SYMDEF_VALUE_OFF(%rax), %rdi
190	movq	%rdi, SPDESTOFF(%rbp)		/ save destination address
191	jmp	.end_pltenter
192
193.start_pltenter:
194	/*
195	 * save all registers into La_amd64_regs
196	 */
197	leaq	SPLAREGOFF(%rbp), %rsi	/ %rsi = &La_amd64_regs
198	leaq	8(%rbp), %rdi
199	movq	%rdi, 0(%rsi)		/ la_rsp
200	movq	0(%rbp), %rdi
201	movq	%rdi, 8(%rsi)		/ la_rbp
202	movq	SPRDIOFF(%rbp), %rdi
203	movq	%rdi, 16(%rsi)		/ la_rdi
204	movq	SPRSIOFF(%rbp), %rdi
205	movq	%rdi, 24(%rsi)		/ la_rsi
206	movq	SPRDXOFF(%rbp), %rdi
207	movq	%rdi, 32(%rsi)		/ la_rdx
208	movq	SPRCXOFF(%rbp), %rdi
209	movq	%rdi, 40(%rsi)		/ la_rcx
210	movq	SPR8OFF(%rbp), %rdi
211	movq	%rdi, 48(%rsi)		/ la_r8
212	movq	SPR9OFF(%rbp), %rdi
213	movq	%rdi, 56(%rsi)		/ la_r9
214
215	/*
216	 * prepare for call to la_pltenter
217	 */
218	movq	SPDYNOFF(%rbp), %r11		/ %r11 = &dyndata
219	leaq	SBFLAGS_OFF(%r11), %r9		/ arg6 (&sb_flags)
220	leaq	SPLAREGOFF(%rbp), %r8		/ arg5 (&La_amd64_regs)
221	movl	SYMNDX_OFF(%r11), %ecx		/ arg4 (symndx)
222	leaq	SYMDEF_OFF(%r11), %rdx		/ arg3 (&Sym)
223	movq	DEFLMP_OFF(%r11), %rsi		/ arg2 (dlmp)
224	movq	REFLMP_OFF(%r11), %rdi		/ arg1 (rlmp)
225	call	audit_pltenter@PLT
226	movq	%rax, SPDESTOFF(%rbp)		/ save calling address
227.end_pltenter:
228
229	/*
230	 * If *no* la_pltexit() routines exist
231	 * we do not need to keep the stack frame
232	 * before we call the actual routine.  Instead we
233	 * jump to it and remove our stack from the stack
234	 * at the same time.
235	 */
236	movl	audit_flags(%rip), %eax
237	andl	$AF_PLTEXIT, %eax		/ value of audit.h:AF_PLTEXIT
238	cmpl	$0, %eax
239	je	.bypass_pltexit
240	/*
241	 * Has the *nopltexit* flag been set for this entry point
242	 */
243	movq	SPDYNOFF(%rbp), %r11		/ %r11 = &dyndata
244	testb	$LA_SYMB_NOPLTEXIT, SBFLAGS_OFF(%r11)
245	je	.start_pltexit
246
247.bypass_pltexit:
248	/*
249	 * No PLTEXIT processing required.
250	 */
251	movq	0(%rbp), %r11
252	movq	%r11, -8(%rbp)			/ move prev %rbp
253	movq	SPDESTOFF(%rbp), %r11		/ r11 == calling destination
254	movq	%r11, 0(%rbp)			/ store destination at top
255
256	/
257	/ Restore registers
258	/
259	movq	SPRDIOFF(%rbp), %rdi
260	movq	SPRSIOFF(%rbp), %rsi
261	movq	SPRDXOFF(%rbp), %rdx
262	movq	SPRCXOFF(%rbp), %rcx
263	movq	SPR8OFF(%rbp), %r8
264	movq	SPR9OFF(%rbp), %r9
265	movq	SPR10OFF(%rbp), %r10
266	movq	SPR11OFF(%rbp), %r11
267	movq	SPRAXOFF(%rbp), %rax
268	movdqa	SPXMM0OFF(%rbp), %xmm0
269	movdqa	SPXMM1OFF(%rbp), %xmm1
270	movdqa	SPXMM2OFF(%rbp), %xmm2
271	movdqa	SPXMM3OFF(%rbp), %xmm3
272	movdqa	SPXMM4OFF(%rbp), %xmm4
273	movdqa	SPXMM5OFF(%rbp), %xmm5
274	movdqa	SPXMM6OFF(%rbp), %xmm6
275	movdqa	SPXMM7OFF(%rbp), %xmm7
276
277	subq	$8, %rbp			/ adjust %rbp for 'ret'
278	movq	%rbp, %rsp			/
279	/*
280	 * At this point, after a little doctoring, we should
281	 * have the following on the stack:
282	 *
283	 *	16(%rsp):  ret addr
284	 *	8(%rsp):  dest_addr
285	 *	0(%rsp):  Previous %rbp
286	 *
287	 * So - we pop the previous %rbp, and then
288	 * ret to our final destination.
289	 */
290	popq	%rbp				/
291	ret					/ jmp to final destination
292						/ and clean up stack :)
293
294.start_pltexit:
295	/*
296	 * In order to call the destination procedure and then return
297	 * to audit_pltexit() for post analysis we must first grow
298	 * our stack frame and then duplicate the original callers
299	 * stack state.  This duplicates all of the arguements
300	 * that were to be passed to the destination procedure.
301	 */
302	movq	%rbp, %rdi			/
303	addq	$16, %rdi			/    %rdi = src
304	movq	(%rbp), %rdx			/
305	subq	%rdi, %rdx			/    %rdx == prev frame sz
306	/*
307	 * If audit_argcnt > 0 then we limit the number of
308	 * arguements that will be duplicated to audit_argcnt.
309	 *
310	 * If (prev_stack_size > (audit_argcnt * 8))
311	 *	prev_stack_size = audit_argcnt * 8;
312	 */
313	movl	audit_argcnt(%rip),%eax		/   %eax = audit_argcnt
314	cmpl	$0, %eax
315	jle	.grow_stack
316	leaq	(,%rax,8), %rax			/    %eax = %eax * 4
317	cmpq	%rax,%rdx
318	jle	.grow_stack
319	movq	%rax, %rdx
320	/*
321	 * Grow the stack and duplicate the arguements of the
322	 * original caller.
323	 */
324.grow_stack:
325	subq	%rdx, %rsp			/    grow the stack
326	movq	%rdx, SPPRVSTKOFF(%rbp)		/    -88(%rbp) == prev frame sz
327	movq	%rsp, %rcx			/    %rcx = dest
328	addq	%rcx, %rdx			/    %rdx == tail of dest
329.while_base:
330	cmpq	%rdx, %rcx			/   while (base+size >= src++) {
331	jge	.end_while			/
332	movq	(%rdi), %rsi
333	movq	%rsi,(%rcx)			/        *dest = *src
334	addq	$8, %rdi			/	 src++
335	addq	$8, %rcx			/        dest++
336	jmp	.while_base			/    }
337
338	/*
339	 * The above stack is now an exact duplicate of
340	 * the stack of the original calling procedure.
341	 */
342.end_while:
343	/
344	/ Restore registers
345	/
346	movq	SPRDIOFF(%rbp), %rdi
347	movq	SPRSIOFF(%rbp), %rsi
348	movq	SPRDXOFF(%rbp), %rdx
349	movq	SPRCXOFF(%rbp), %rcx
350	movq	SPR8OFF(%rbp), %r8
351	movq	SPR9OFF(%rbp), %r9
352	movq	SPR10OFF(%rbp), %r10
353	movq	SPR11OFF(%rbp), %r11
354	movq	SPRAXOFF(%rbp), %rax
355	movdqa	SPXMM0OFF(%rbp), %xmm0
356	movdqa	SPXMM1OFF(%rbp), %xmm1
357	movdqa	SPXMM2OFF(%rbp), %xmm2
358	movdqa	SPXMM3OFF(%rbp), %xmm3
359	movdqa	SPXMM4OFF(%rbp), %xmm4
360	movdqa	SPXMM5OFF(%rbp), %xmm5
361	movdqa	SPXMM6OFF(%rbp), %xmm6
362	movdqa	SPXMM7OFF(%rbp), %xmm7
363
364	/*
365	 * Call to desitnation function - we'll return here
366	 * for pltexit monitoring.
367	 */
368	call	*SPDESTOFF(%rbp)
369
370	addq	SPPRVSTKOFF(%rbp), %rsp	/ cleanup dupped stack
371
372	/
373	/ prepare for call to audit_pltenter()
374	/
375	movq	SPDYNOFF(%rbp), %r11		/ %r11 = &dyndata
376	movq	SYMNDX_OFF(%r11), %r8		/ arg5 (symndx)
377	leaq	SYMDEF_OFF(%r11), %rcx		/ arg4 (&Sym)
378	movq	DEFLMP_OFF(%r11), %rdx		/ arg3 (dlmp)
379	movq	REFLMP_OFF(%r11), %rsi		/ arg2 (rlmp)
380	movq	%rax, %rdi			/ arg1 (returnval)
381	call	audit_pltexit@PLT
382
383	/*
384	 * Clean up after ourselves and return to the
385	 * original calling procedure.
386	 */
387
388	/
389	/ Restore registers
390	/
391	movq	SPRDIOFF(%rbp), %rdi
392	movq	SPRSIOFF(%rbp), %rsi
393	movq	SPRDXOFF(%rbp), %rdx
394	movq	SPRCXOFF(%rbp), %rcx
395	movq	SPR8OFF(%rbp), %r8
396	movq	SPR9OFF(%rbp), %r9
397	movq	SPR10OFF(%rbp), %r10
398	movq	SPR11OFF(%rbp), %r11
399	// rax already contains return value
400	movdqa	SPXMM0OFF(%rbp), %xmm0
401	movdqa	SPXMM1OFF(%rbp), %xmm1
402	movdqa	SPXMM2OFF(%rbp), %xmm2
403	movdqa	SPXMM3OFF(%rbp), %xmm3
404	movdqa	SPXMM4OFF(%rbp), %xmm4
405	movdqa	SPXMM5OFF(%rbp), %xmm5
406	movdqa	SPXMM6OFF(%rbp), %xmm6
407	movdqa	SPXMM7OFF(%rbp), %xmm7
408
409	movq	%rbp, %rsp			/
410	popq	%rbp				/
411	ret					/ return to caller
412	.size	elf_plt_trace, .-elf_plt_trace
413#endif
414
415/*
416 * We got here because a call to a function resolved to a procedure
417 * linkage table entry.  That entry did a JMPL to the first PLT entry, which
418 * in turn did a call to elf_rtbndr.
419 *
420 * the code sequence that got us here was:
421 *
422 * .PLT0:
423 *	pushq	GOT+8(%rip)	#GOT[1]
424 *	jmp	*GOT+16(%rip)	#GOT[2]
425 *	nop
426 *	nop
427 *	nop
428 *	nop
429 *	...
430 * PLT entry for foo:
431 *	jmp	*name1@GOTPCREL(%rip)
432 *	pushl	$rel.plt.foo
433 *	jmp	PLT0
434 *
435 * At entry, the stack looks like this:
436 *
437 *	return address			16(%rsp)
438 *	$rel.plt.foo	(plt index)	8(%rsp)
439 *	lmp				0(%rsp)
440 *
441 */
442#if defined(lint)
443
444extern unsigned long	elf_bndr(Rt_map *, unsigned long, caddr_t);
445
446void
447elf_rtbndr(Rt_map * lmp, unsigned long reloc, caddr_t pc)
448{
449	(void) elf_bndr(lmp, reloc, pc);
450}
451
452#else
453
454/*
455 * The PLT code that landed us here placed 2 arguments on the stack as
456 * arguments to elf_rtbndr.
457 * Additionally the pc of caller is below these 2 args.
458 * Our stack will look like this after we establish a stack frame with
459 * push %rbp; movq %rsp, %rbp sequence:
460 *
461 *	8(%rbp)			arg1 - *lmp
462 *	16(%rbp), %rsi		arg2 - reloc index
463 *	24(%rbp), %rdx		arg3 - pc of caller
464 */
465#define	LBPLMPOFF	8	/* arg1 - *lmp */
466#define	LBPRELOCOFF	16	/* arg2 - reloc index */
467#define	LBRPCOFF	24	/* arg3 - pc of caller */
468
469/*
470 * Possible arguments for the resolved function are in registers as per
471 * the AMD64 ABI.  We must save on the local stack all possible register
472 * arguments before interposing functions to resolve the called function.
473 * Possible arguments must be restored before invoking the resolved function.
474 *
475 * Local stack space storage for elf_rtbndr is allocated as follows:
476 *
477 *	Saved regs:
478 *	    %rax			 8
479 *	    %rdi			 8
480 *	    %rsi			 8
481 *	    %rdx			 8
482 *	    %rcx			 8
483 *	    %r8				 8
484 *	    %r9				 8
485 *	    %r10			 8
486 *				    =======
487 *			    Subtotal:   64 (16byte aligned)
488 *
489 *	Saved Media Regs (used to pass floating point args):
490 *	    %xmm0 - %xmm7   16 * 8:    128
491 *				    =======
492 *			    Total:     192 (16byte aligned)
493 *
494 *  So - will subtract the following to create enough space
495 *
496 *	-8(%rbp)	entering %rax
497 *	-16(%rbp)	entering %rdi
498 *	-24(%rbp)	entering %rsi
499 *	-32(%rbp)	entering %rdx
500 *	-40(%rbp)	entering %rcx
501 *	-48(%rbp)	entering %r8
502 *	-56(%rbp)	entering %r9
503 *	-64(%rbp)	entering %r10
504 *	-80(%xmm0)	entering %xmm0
505 *	-96(%xmm1)	entering %xmm1
506 *	-112(%xmm2)	entering %xmm2
507 *	-128(%xmm3)	entering %xmm3
508 *	-144(%xmm4)	entering %xmm4
509 *	-160(%xmm5)	entering %xmm5
510 *	-176(%xmm6)	entering %xmm6
511 *	-192(%xmm7)	entering %xmm7
512 */
513#define	LS_SIZE	$192	/* local stack space to save all possible arguments */
514#define	LSRAXOFF	-8	/* for SSE register count */
515#define	LSRDIOFF	-16	/* arg 0 ... */
516#define	LSRSIOFF	-24
517#define	LSRDXOFF	-32
518#define	LSRCXOFF	-40
519#define	LSR8OFF		-48
520#define	LSR9OFF		-56
521#define	LSR10OFF	-64	/* ... arg 5 */
522#define	LSXMM0OFF	-80	/* SSE arg 0 ... */
523#define	LSXMM1OFF	-96
524#define	LSXMM2OFF	-112
525#define	LSXMM3OFF	-128
526#define	LSXMM4OFF	-144
527#define	LSXMM5OFF	-160
528#define	LSXMM6OFF	-176
529#define	LSXMM7OFF	-192	/* ... SSE arg 7 */
530
531	.weak	_elf_rtbndr
532	_elf_rtbndr = elf_rtbndr
533
534	ENTRY(elf_rtbndr)
535
536	pushq	%rbp
537	movq	%rsp, %rbp
538
539	subq	LS_SIZE, %rsp	/* save all ABI defined argument registers */
540
541	movq	%rax, LSRAXOFF(%rbp)	/* for SSE register count */
542	movq	%rdi, LSRDIOFF(%rbp)	/*  arg 0 .. */
543	movq	%rsi, LSRSIOFF(%rbp)
544	movq	%rdx, LSRDXOFF(%rbp)
545	movq	%rcx, LSRCXOFF(%rbp)
546	movq	%r8, LSR8OFF(%rbp)
547	movq	%r9, LSR9OFF(%rbp)	/* .. arg 5 */
548	movq	%r10, LSR10OFF(%rbp)	/* call chain reg */
549
550	movdqa	%xmm0, LSXMM0OFF(%rbp)	/* SSE arg 0 ... */
551	movdqa	%xmm1, LSXMM1OFF(%rbp)
552	movdqa	%xmm2, LSXMM2OFF(%rbp)
553	movdqa	%xmm3, LSXMM3OFF(%rbp)
554	movdqa	%xmm4, LSXMM4OFF(%rbp)
555	movdqa	%xmm5, LSXMM5OFF(%rbp)
556	movdqa	%xmm6, LSXMM6OFF(%rbp)
557	movdqa	%xmm7, LSXMM7OFF(%rbp)	/* ... SSE arg 7 */
558
559	movq	LBPLMPOFF(%rbp), %rdi	/* arg1 - *lmp */
560	movq	LBPRELOCOFF(%rbp), %rsi	/* arg2 - reloc index */
561	movq	LBRPCOFF(%rbp), %rdx	/* arg3 - pc of caller */
562	call	elf_bndr@PLT		/* call elf_rtbndr(lmp, relndx, pc) */
563	movq	%rax, LBPRELOCOFF(%rbp)	/* store final destination */
564
565	/* restore possible arguments before invoking resolved function */
566	movq	LSRAXOFF(%rbp), %rax
567	movq	LSRDIOFF(%rbp), %rdi
568	movq	LSRSIOFF(%rbp), %rsi
569	movq	LSRDXOFF(%rbp), %rdx
570	movq	LSRCXOFF(%rbp), %rcx
571	movq	LSR8OFF(%rbp), %r8
572	movq	LSR9OFF(%rbp), %r9
573	movq	LSR10OFF(%rbp), %r10
574
575	movdqa	LSXMM0OFF(%rbp), %xmm0
576	movdqa	LSXMM1OFF(%rbp), %xmm1
577	movdqa	LSXMM2OFF(%rbp), %xmm2
578	movdqa	LSXMM3OFF(%rbp), %xmm3
579	movdqa	LSXMM4OFF(%rbp), %xmm4
580	movdqa	LSXMM5OFF(%rbp), %xmm5
581	movdqa	LSXMM6OFF(%rbp), %xmm6
582	movdqa	LSXMM7OFF(%rbp), %xmm7
583
584	movq	%rbp, %rsp
585	popq	%rbp
586
587	addq	$8, %rsp	/* pop 1st plt-pushed args */
588				/* the second arguement is used */
589				/* for the 'return' address to our */
590				/* final destination */
591
592	ret			/* invoke resolved function */
593	.size 	elf_rtbndr, .-elf_rtbndr
594#endif
595