xref: /titanic_41/usr/src/cmd/sgs/rtld/amd64/boot_elf.s (revision c2580b931007758eab8cb5ae8726ebe1588e259b)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 *	Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23 *	Use is subject to license terms.
24 */
25#pragma ident	"%Z%%M%	%I%	%E% SMI"
26
27#if	defined(lint)
28
29#include	<sys/types.h>
30#include	<_rtld.h>
31#include	<_audit.h>
32#include	<_elf.h>
33#include	<sys/regset.h>
34
35/* ARGSUSED0 */
36int
37elf_plt_trace()
38{
39	return (0);
40}
41#else
42
43#include	<link.h>
44#include	<_audit.h>
45#include	<sys/asm_linkage.h>
46
47	.file	"boot_elf.s"
48	.text
49
50/*
51 * On entry the 'glue code' has already  done the following:
52 *
53 *	pushq	%rbp
54 *	movq	%rsp, %rbp
55 *	subq	$0x10, %rsp
56 *	leaq	trace_fields(%rip), %r11
57 *	movq	%r11, -0x8(%rbp)
58 *	movq	$elf_plt_trace, %r11
59 *	jmp	*%r11
60 *
61 * so - -8(%rbp) contains the dyndata ptr
62 *
63 *	0x0	Addr		*reflmp
64 *	0x8	Addr		*deflmp
65 *	0x10	Word		symndx
66 *	0x14	Word		sb_flags
67 *	0x18	Sym		symdef.st_name
68 *	0x1c			symdef.st_info
69 *	0x1d			symdef.st_other
70 *	0x1e			symdef.st_shndx
71 *	0x20			symdef.st_value
72 *	0x28			symdef.st_size
73 *
74 * Also note - on entry 16 bytes have already been subtracted
75 * from the %rsp.  The first 8 bytes is for the dyn_data_ptr,
76 * the second 8 bytes are to align the stack and are available
77 * for use.
78 */
79#define	REFLMP_OFF		0x0
80#define	DEFLMP_OFF		0x8
81#define	SYMNDX_OFF		0x10
82#define	SBFLAGS_OFF		0x14
83#define	SYMDEF_OFF		0x18
84#define	SYMDEF_VALUE_OFF	0x20
85/*
86 * Local stack space storage for elf_plt_trace is allocated
87 * as follows:
88 *
89 *  First - before we got here - %rsp has been decremented
90 *  by 0x10 to make space for the dyndata ptr (and another
91 *  free word).  In addition to that, we create space
92 *  for the following:
93 *
94 *	La_amd64_regs	    8 * 8:	64
95 *	prev_stack_size	    8		 8
96 *	Saved regs:
97 *	    %rdi			 8
98 *	    %rsi			 8
99 *	    %rdx			 8
100 *	    %rcx			 8
101 *	    %r8				 8
102 *	    %r9				 8
103 *	    %r10			 8
104 *	    %r11			 8
105 *	    %rax			 8
106 *				    =======
107 *			    Subtotal:	144 (16byte aligned)
108 *
109 *	Saved Media Regs (used to pass floating point args):
110 *	    %xmm0 - %xmm7   16 * 8:	128
111 *				    =======
112 *			    Total:	272 (16byte aligned)
113 *
114 *  So - will subtract the following to create enough space
115 *
116 *	-8(%rbp)	store dyndata ptr
117 *	-16(%rbp)	store call destination
118 *	-80(%rbp)	space for La_amd64_regs
119 *	-88(%rbp)	prev stack size
120 *	-96(%rbp)	entering %rdi
121 *	-104(%rbp)	entering %rsi
122 *	-112(%rbp)	entering %rdx
123 *	-120(%rbp)	entering %rcx
124 *	-128(%rbp)	entering %r8
125 *	-136(%rbp)	entering %r9
126 *	-144(%rbp)	entering %r10
127 *	-152(%rbp)	entering %r11
128 *	-160(%rax)	entering %rax
129 *	-176(%xmm0)	entering %xmm0
130 *	-192(%xmm1)	entering %xmm1
131 *	-208(%xmm2)	entering %xmm2
132 *	-224(%xmm3)	entering %xmm3
133 *	-240(%xmm4)	entering %xmm4
134 *	-256(%xmm5)	entering %xmm5
135 *	-272(%xmm6)	entering %xmm6
136 *	-288(%xmm7)	entering %xmm7
137 *
138 */
139#define	SPDYNOFF    -8
140#define	SPDESTOFF   -16
141#define	SPLAREGOFF  -80
142#define	SPPRVSTKOFF -88
143#define	SPRDIOFF    -96
144#define	SPRSIOFF    -104
145#define	SPRDXOFF    -112
146#define	SPRCXOFF    -120
147#define	SPR8OFF	    -128
148#define	SPR9OFF	    -136
149#define	SPR10OFF    -144
150#define	SPR11OFF    -152
151#define	SPRAXOFF    -160
152#define	SPXMM0OFF   -176
153#define	SPXMM1OFF   -192
154#define	SPXMM2OFF   -208
155#define	SPXMM3OFF   -224
156#define	SPXMM4OFF   -240
157#define	SPXMM5OFF   -256
158#define	SPXMM6OFF   -272
159#define	SPXMM7OFF   -288
160
161	.globl	elf_plt_trace
162	.type	elf_plt_trace,@function
163	.align 16
164elf_plt_trace:
165	subq	$272,%rsp	/ create some local storage
166	movq	%rdi, SPRDIOFF(%rbp)
167	movq	%rsi, SPRSIOFF(%rbp)
168	movq	%rdx, SPRDXOFF(%rbp)
169	movq	%rcx, SPRCXOFF(%rbp)
170	movq	%r8, SPR8OFF(%rbp)
171	movq	%r9, SPR9OFF(%rbp)
172	movq	%r10, SPR10OFF(%rbp)
173	movq	%r11, SPR11OFF(%rbp)
174	movq	%rax, SPRAXOFF(%rbp)
175	movdqa	%xmm0, SPXMM0OFF(%rbp)
176	movdqa	%xmm1, SPXMM1OFF(%rbp)
177	movdqa	%xmm2, SPXMM2OFF(%rbp)
178	movdqa	%xmm3, SPXMM3OFF(%rbp)
179	movdqa	%xmm4, SPXMM4OFF(%rbp)
180	movdqa	%xmm5, SPXMM5OFF(%rbp)
181	movdqa	%xmm6, SPXMM6OFF(%rbp)
182	movdqa	%xmm7, SPXMM7OFF(%rbp)
183
184	movq	SPDYNOFF(%rbp), %rax			/ %rax = dyndata
185	testb	$LA_SYMB_NOPLTENTER, SBFLAGS_OFF(%rax)	/ <link.h>
186	je	.start_pltenter
187	movq	SYMDEF_VALUE_OFF(%rax), %rdi
188	movq	%rdi, SPDESTOFF(%rbp)		/ save destination address
189	jmp	.end_pltenter
190
191.start_pltenter:
192	/*
193	 * save all registers into La_amd64_regs
194	 */
195	leaq	SPLAREGOFF(%rbp), %rsi	/ %rsi = &La_amd64_regs
196	leaq	8(%rbp), %rdi
197	movq	%rdi, 0(%rsi)		/ la_rsp
198	movq	0(%rbp), %rdi
199	movq	%rdi, 8(%rsi)		/ la_rbp
200	movq	SPRDIOFF(%rbp), %rdi
201	movq	%rdi, 16(%rsi)		/ la_rdi
202	movq	SPRSIOFF(%rbp), %rdi
203	movq	%rdi, 24(%rsi)		/ la_rsi
204	movq	SPRDXOFF(%rbp), %rdi
205	movq	%rdi, 32(%rsi)		/ la_rdx
206	movq	SPRCXOFF(%rbp), %rdi
207	movq	%rdi, 40(%rsi)		/ la_rcx
208	movq	SPR8OFF(%rbp), %rdi
209	movq	%rdi, 48(%rsi)		/ la_r8
210	movq	SPR9OFF(%rbp), %rdi
211	movq	%rdi, 56(%rsi)		/ la_r9
212
213	/*
214	 * prepare for call to la_pltenter
215	 */
216	movq	SPDYNOFF(%rbp), %r11		/ %r11 = &dyndata
217	leaq	SBFLAGS_OFF(%r11), %r9		/ arg6 (&sb_flags)
218	leaq	SPLAREGOFF(%rbp), %r8		/ arg5 (&La_amd64_regs)
219	movl	SYMNDX_OFF(%r11), %ecx		/ arg4 (symndx)
220	leaq	SYMDEF_OFF(%r11), %rdx		/ arg3 (&Sym)
221	movq	DEFLMP_OFF(%r11), %rsi		/ arg2 (dlmp)
222	movq	REFLMP_OFF(%r11), %rdi		/ arg1 (rlmp)
223	call	audit_pltenter@PLT
224	movq	%rax, SPDESTOFF(%rbp)		/ save calling address
225.end_pltenter:
226
227	/*
228	 * If *no* la_pltexit() routines exist
229	 * we do not need to keep the stack frame
230	 * before we call the actual routine.  Instead we
231	 * jump to it and remove our stack from the stack
232	 * at the same time.
233	 */
234	movl	audit_flags(%rip), %eax
235	andl	$AF_PLTEXIT, %eax		/ value of audit.h:AF_PLTEXIT
236	cmpl	$0, %eax
237	je	.bypass_pltexit
238	/*
239	 * Has the *nopltexit* flag been set for this entry point
240	 */
241	movq	SPDYNOFF(%rbp), %r11		/ %r11 = &dyndata
242	testb	$LA_SYMB_NOPLTEXIT, SBFLAGS_OFF(%r11)
243	je	.start_pltexit
244
245.bypass_pltexit:
246	/*
247	 * No PLTEXIT processing required.
248	 */
249	movq	0(%rbp), %r11
250	movq	%r11, -8(%rbp)			/ move prev %rbp
251	movq	SPDESTOFF(%rbp), %r11		/ r11 == calling destination
252	movq	%r11, 0(%rbp)			/ store destination at top
253
254	/
255	/ Restore registers
256	/
257	movq	SPRDIOFF(%rbp), %rdi
258	movq	SPRSIOFF(%rbp), %rsi
259	movq	SPRDXOFF(%rbp), %rdx
260	movq	SPRCXOFF(%rbp), %rcx
261	movq	SPR8OFF(%rbp), %r8
262	movq	SPR9OFF(%rbp), %r9
263	movq	SPR10OFF(%rbp), %r10
264	movq	SPR11OFF(%rbp), %r11
265	movq	SPRAXOFF(%rbp), %rax
266	movdqa	SPXMM0OFF(%rbp), %xmm0
267	movdqa	SPXMM1OFF(%rbp), %xmm1
268	movdqa	SPXMM2OFF(%rbp), %xmm2
269	movdqa	SPXMM3OFF(%rbp), %xmm3
270	movdqa	SPXMM4OFF(%rbp), %xmm4
271	movdqa	SPXMM5OFF(%rbp), %xmm5
272	movdqa	SPXMM6OFF(%rbp), %xmm6
273	movdqa	SPXMM7OFF(%rbp), %xmm7
274
275	subq	$8, %rbp			/ adjust %rbp for 'ret'
276	movq	%rbp, %rsp			/
277	/*
278	 * At this point, after a little doctoring, we should
279	 * have the following on the stack:
280	 *
281	 *	16(%rsp):  ret addr
282	 *	8(%rsp):  dest_addr
283	 *	0(%rsp):  Previous %rbp
284	 *
285	 * So - we pop the previous %rbp, and then
286	 * ret to our final destination.
287	 */
288	popq	%rbp				/
289	ret					/ jmp to final destination
290						/ and clean up stack :)
291
292.start_pltexit:
293	/*
294	 * In order to call the destination procedure and then return
295	 * to audit_pltexit() for post analysis we must first grow
296	 * our stack frame and then duplicate the original callers
297	 * stack state.  This duplicates all of the arguements
298	 * that were to be passed to the destination procedure.
299	 */
300	movq	%rbp, %rdi			/
301	addq	$16, %rdi			/    %rdi = src
302	movq	(%rbp), %rdx			/
303	subq	%rdi, %rdx			/    %rdx == prev frame sz
304	/*
305	 * If audit_argcnt > 0 then we limit the number of
306	 * arguements that will be duplicated to audit_argcnt.
307	 *
308	 * If (prev_stack_size > (audit_argcnt * 8))
309	 *	prev_stack_size = audit_argcnt * 8;
310	 */
311	movl	audit_argcnt(%rip),%eax		/   %eax = audit_argcnt
312	cmpl	$0, %eax
313	jle	.grow_stack
314	leaq	(,%rax,8), %rax			/    %eax = %eax * 4
315	cmpq	%rax,%rdx
316	jle	.grow_stack
317	movq	%rax, %rdx
318	/*
319	 * Grow the stack and duplicate the arguements of the
320	 * original caller.
321	 */
322.grow_stack:
323	subq	%rdx, %rsp			/    grow the stack
324	movq	%rdx, SPPRVSTKOFF(%rbp)		/    -88(%rbp) == prev frame sz
325	movq	%rsp, %rcx			/    %rcx = dest
326	addq	%rcx, %rdx			/    %rdx == tail of dest
327.while_base:
328	cmpq	%rdx, %rcx			/   while (base+size >= src++) {
329	jge	.end_while			/
330	movq	(%rdi), %rsi
331	movq	%rsi,(%rcx)			/        *dest = *src
332	addq	$8, %rdi			/	 src++
333	addq	$8, %rcx			/        dest++
334	jmp	.while_base			/    }
335
336	/*
337	 * The above stack is now an exact duplicate of
338	 * the stack of the original calling procedure.
339	 */
340.end_while:
341	/
342	/ Restore registers
343	/
344	movq	SPRDIOFF(%rbp), %rdi
345	movq	SPRSIOFF(%rbp), %rsi
346	movq	SPRDXOFF(%rbp), %rdx
347	movq	SPRCXOFF(%rbp), %rcx
348	movq	SPR8OFF(%rbp), %r8
349	movq	SPR9OFF(%rbp), %r9
350	movq	SPR10OFF(%rbp), %r10
351	movq	SPR11OFF(%rbp), %r11
352	movq	SPRAXOFF(%rbp), %rax
353	movdqa	SPXMM0OFF(%rbp), %xmm0
354	movdqa	SPXMM1OFF(%rbp), %xmm1
355	movdqa	SPXMM2OFF(%rbp), %xmm2
356	movdqa	SPXMM3OFF(%rbp), %xmm3
357	movdqa	SPXMM4OFF(%rbp), %xmm4
358	movdqa	SPXMM5OFF(%rbp), %xmm5
359	movdqa	SPXMM6OFF(%rbp), %xmm6
360	movdqa	SPXMM7OFF(%rbp), %xmm7
361
362	/*
363	 * Call to desitnation function - we'll return here
364	 * for pltexit monitoring.
365	 */
366	call	*SPDESTOFF(%rbp)
367
368	addq	SPPRVSTKOFF(%rbp), %rsp	/ cleanup dupped stack
369
370	/
371	/ prepare for call to audit_pltenter()
372	/
373	movq	SPDYNOFF(%rbp), %r11		/ %r11 = &dyndata
374	movq	SYMNDX_OFF(%r11), %r8		/ arg5 (symndx)
375	leaq	SYMDEF_OFF(%r11), %rcx		/ arg4 (&Sym)
376	movq	DEFLMP_OFF(%r11), %rdx		/ arg3 (dlmp)
377	movq	REFLMP_OFF(%r11), %rsi		/ arg2 (rlmp)
378	movq	%rax, %rdi			/ arg1 (returnval)
379	call	audit_pltexit@PLT
380
381	/*
382	 * Clean up after ourselves and return to the
383	 * original calling procedure.
384	 */
385
386	/
387	/ Restore registers
388	/
389	movq	SPRDIOFF(%rbp), %rdi
390	movq	SPRSIOFF(%rbp), %rsi
391	movq	SPRDXOFF(%rbp), %rdx
392	movq	SPRCXOFF(%rbp), %rcx
393	movq	SPR8OFF(%rbp), %r8
394	movq	SPR9OFF(%rbp), %r9
395	movq	SPR10OFF(%rbp), %r10
396	movq	SPR11OFF(%rbp), %r11
397	// rax already contains return value
398	movdqa	SPXMM0OFF(%rbp), %xmm0
399	movdqa	SPXMM1OFF(%rbp), %xmm1
400	movdqa	SPXMM2OFF(%rbp), %xmm2
401	movdqa	SPXMM3OFF(%rbp), %xmm3
402	movdqa	SPXMM4OFF(%rbp), %xmm4
403	movdqa	SPXMM5OFF(%rbp), %xmm5
404	movdqa	SPXMM6OFF(%rbp), %xmm6
405	movdqa	SPXMM7OFF(%rbp), %xmm7
406
407	movq	%rbp, %rsp			/
408	popq	%rbp				/
409	ret					/ return to caller
410	.size	elf_plt_trace, .-elf_plt_trace
411#endif
412
413/*
414 * We got here because a call to a function resolved to a procedure
415 * linkage table entry.  That entry did a JMPL to the first PLT entry, which
416 * in turn did a call to elf_rtbndr.
417 *
418 * the code sequence that got us here was:
419 *
420 * .PLT0:
421 *	pushq	GOT+8(%rip)	#GOT[1]
422 *	jmp	*GOT+16(%rip)	#GOT[2]
423 *	nop
424 *	nop
425 *	nop
426 *	nop
427 *	...
428 * PLT entry for foo:
429 *	jmp	*name1@GOTPCREL(%rip)
430 *	pushl	$rel.plt.foo
431 *	jmp	PLT0
432 *
433 * At entry, the stack looks like this:
434 *
435 *	return address			16(%rsp)
436 *	$rel.plt.foo	(plt index)	8(%rsp)
437 *	lmp				0(%rsp)
438 *
439 */
440#if defined(lint)
441
442extern unsigned long	elf_bndr(Rt_map *, unsigned long, caddr_t);
443
444void
445elf_rtbndr(Rt_map * lmp, unsigned long reloc, caddr_t pc)
446{
447	(void) elf_bndr(lmp, reloc, pc);
448}
449
450#else
451	.weak	_elf_rtbndr
452	_elf_rtbndr = elf_rtbndr
453
454	ENTRY(elf_rtbndr)
455
456	pushq	%rbp
457	movq	%rsp, %rbp
458
459	pushq	%rax		/* for SSE register count */
460	pushq	%rdi		/* arg 0 .. */
461	pushq	%rsi
462	pushq	%rdx
463	pushq	%rcx
464	pushq	%r8
465	pushq	%r9		/* .. arg 5 */
466	pushq	%r10		/* call chain reg */
467
468	movq	8(%rbp), %rdi	/* arg1 - *lmp */
469	movq	16(%rbp), %rsi	/* arg2 - reloc index */
470	movq	24(%rbp), %rdx	/* arg3 - pc of caller */
471	call	elf_bndr@PLT	/* call elf_rtbndr(lmp, relndx, pc) */
472	movq	%rax, 16(%rbp)	/* store final destination */
473
474	popq	%r10
475	popq	%r9
476	popq	%r8
477	popq	%rcx
478	popq	%rdx
479	popq	%rsi
480	popq	%rdi
481	popq	%rax
482
483	movq	%rbp, %rsp
484	popq	%rbp
485
486	addq	$8, %rsp	/* pop 1st plt-pushed args */
487				/* the second arguement is used */
488				/* for the 'return' address to our */
489				/* final destination */
490
491	ret			/* invoke resolved function */
492	.size 	elf_rtbndr, .-elf_rtbndr
493#endif
494