xref: /titanic_41/usr/src/cmd/sgs/rtld/amd64/boot_elf.s (revision 70025d765b044c6d8594bb965a2247a61e991a99)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License").  You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22/*
23 *	Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24 *	Use is subject to license terms.
25 */
26#pragma ident	"%Z%%M%	%I%	%E% SMI"
27
28#if	defined(lint)
29
30#include	<sys/types.h>
31#include	<_rtld.h>
32#include	<_audit.h>
33#include	<_elf.h>
34#include	<sys/regset.h>
35
36/* ARGSUSED0 */
37int
38elf_plt_trace()
39{
40	return (0);
41}
42#else
43
44#include	<link.h>
45#include	<_audit.h>
46#include	<sys/asm_linkage.h>
47
48	.file	"boot_elf.s"
49	.text
50
51/*
52 * On entry the 'glue code' has already  done the following:
53 *
54 *	pushq	%rbp
55 *	movq	%rsp, %rbp
56 *	subq	$0x10, %rsp
57 *	leaq	trace_fields(%rip), %r11
58 *	movq	%r11, -0x8(%rbp)
59 *	movq	$elf_plt_trace, %r11
60 *	jmp	*%r11
61 *
62 * so - -8(%rbp) contains the dyndata ptr
63 *
64 *	0x0	Addr		*reflmp
65 *	0x8	Addr		*deflmp
66 *	0x10	Word		symndx
67 *	0x14	Word		sb_flags
68 *	0x18	Sym		symdef.st_name
69 *	0x1c			symdef.st_info
70 *	0x1d			symdef.st_other
71 *	0x1e			symdef.st_shndx
72 *	0x20			symdef.st_value
73 *	0x28			symdef.st_size
74 *
75 * Also note - on entry 16 bytes have already been subtracted
76 * from the %rsp.  The first 8 bytes is for the dyn_data_ptr,
77 * the second 8 bytes are to align the stack and are available
78 * for use.
79 */
80#define	REFLMP_OFF		0x0
81#define	DEFLMP_OFF		0x8
82#define	SYMNDX_OFF		0x10
83#define	SBFLAGS_OFF		0x14
84#define	SYMDEF_OFF		0x18
85#define	SYMDEF_VALUE_OFF	0x20
86/*
87 * Local stack space storage for elf_plt_trace is allocated
88 * as follows:
89 *
90 *  First - before we got here - %rsp has been decremented
91 *  by 0x10 to make space for the dyndata ptr (and another
92 *  free word).  In addition to that, we create space
93 *  for the following:
94 *
95 *	La_amd64_regs	    8 * 8:	64
96 *	prev_stack_size	    8		 8
97 *	Saved regs:
98 *	    %rdi			 8
99 *	    %rsi			 8
100 *	    %rdx			 8
101 *	    %rcx			 8
102 *	    %r8				 8
103 *	    %r9				 8
104 *	    %r10			 8
105 *	    %r11			 8
106 *	    %rax			 8
107 *				    =======
108 *			    Total:	144 (16byte aligned)
109 *
110 *  So - will subtract the following to create enough space
111 *
112 *	-8(%rbp)	store dyndata ptr
113 *	-16(%rbp)	store call destination
114 *	-80(%rbp)	space for La_amd64_regs
115 *	-88(%rbp)	prev stack size
116 *	-96(%rbp)	entering %rdi
117 *	-104(%rbp)	entering %rsi
118 *	-112(%rbp)	entering %rdx
119 *	-120(%rbp)	entering %rcx
120 *	-128(%rbp)	entering %r8
121 *	-136(%rbp)	entering %r9
122 *	-144(%rbp)	entering %r10
123 *	-152(%rbp)	entering %r11
124 *	-160(%rax)	entering %rax
125 *
126 */
127#define	SPDYNOFF    -8
128#define	SPDESTOFF   -16
129#define	SPLAREGOFF  -80
130#define	SPPRVSTKOFF -88
131#define	SPRDIOFF    -96
132#define	SPRSIOFF    -104
133#define	SPRDXOFF    -112
134#define	SPRCXOFF    -120
135#define	SPR8OFF	    -128
136#define	SPR9OFF	    -136
137#define	SPR10OFF    -144
138#define	SPR11OFF    -152
139#define	SPRAXOFF    -160
140
141	.globl	elf_plt_trace
142	.type	elf_plt_trace,@function
143	.align 16
144elf_plt_trace:
145	subq	$144,%rsp	/ create some local storage
146	movq	%rdi, SPRDIOFF(%rbp)
147	movq	%rsi, SPRSIOFF(%rbp)
148	movq	%rdx, SPRDXOFF(%rbp)
149	movq	%rcx, SPRCXOFF(%rbp)
150	movq	%r8, SPR8OFF(%rbp)
151	movq	%r9, SPR9OFF(%rbp)
152	movq	%r10, SPR10OFF(%rbp)
153	movq	%r11, SPR11OFF(%rbp)
154	movq	%rax, SPRAXOFF(%rbp)
155
156	movq	SPDYNOFF(%rbp), %rax			/ %rax = dyndata
157	testb	$LA_SYMB_NOPLTENTER, SBFLAGS_OFF(%rax)	/ <link.h>
158	je	.start_pltenter
159	movq	SYMDEF_VALUE_OFF(%rax), %rdi
160	movq	%rdi, SPDESTOFF(%rbp)		/ save destination address
161	jmp	.end_pltenter
162
163.start_pltenter:
164	/*
165	 * save all registers into La_amd64_regs
166	 */
167	leaq	SPLAREGOFF(%rbp), %rsi	/ %rsi = &La_amd64_regs
168	leaq	8(%rbp), %rdi
169	movq	%rdi, 0(%rsi)		/ la_rsp
170	movq	0(%rbp), %rdi
171	movq	%rdi, 8(%rsi)		/ la_rbp
172	movq	SPRDIOFF(%rbp), %rdi
173	movq	%rdi, 16(%rsi)		/ la_rdi
174	movq	SPRSIOFF(%rbp), %rdi
175	movq	%rdi, 24(%rsi)		/ la_rsi
176	movq	SPRDXOFF(%rbp), %rdi
177	movq	%rdi, 32(%rsi)		/ la_rdx
178	movq	SPRCXOFF(%rbp), %rdi
179	movq	%rdi, 40(%rsi)		/ la_rcx
180	movq	SPR8OFF(%rbp), %rdi
181	movq	%rdi, 48(%rsi)		/ la_r8
182	movq	SPR9OFF(%rbp), %rdi
183	movq	%rdi, 56(%rsi)		/ la_r9
184
185	/*
186	 * prepare for call to la_pltenter
187	 */
188	movq	SPDYNOFF(%rbp), %r11		/ %r11 = &dyndata
189	leaq	SBFLAGS_OFF(%r11), %r9		/ arg6 (&sb_flags)
190	leaq	SPLAREGOFF(%rbp), %r8		/ arg5 (&La_amd64_regs)
191	movl	SYMNDX_OFF(%r11), %ecx		/ arg4 (symndx)
192	leaq	SYMDEF_OFF(%r11), %rdx		/ arg3 (&Sym)
193	movq	DEFLMP_OFF(%r11), %rsi		/ arg2 (dlmp)
194	movq	REFLMP_OFF(%r11), %rdi		/ arg1 (rlmp)
195	call	audit_pltenter@PLT
196	movq	%rax, SPDESTOFF(%rbp)		/ save calling address
197.end_pltenter:
198
199	/*
200	 * If *no* la_pltexit() routines exist
201	 * we do not need to keep the stack frame
202	 * before we call the actual routine.  Instead we
203	 * jump to it and remove our stack from the stack
204	 * at the same time.
205	 */
206	movl	audit_flags(%rip), %eax
207	andl	$AF_PLTEXIT, %eax		/ value of audit.h:AF_PLTEXIT
208	cmpl	$0, %eax
209	je	.bypass_pltexit
210	/*
211	 * Has the *nopltexit* flag been set for this entry point
212	 */
213	movq	SPDYNOFF(%rbp), %r11		/ %r11 = &dyndata
214	testb	$LA_SYMB_NOPLTEXIT, SBFLAGS_OFF(%r11)
215	je	.start_pltexit
216
217.bypass_pltexit:
218	/*
219	 * No PLTEXIT processing required.
220	 */
221	movq	0(%rbp), %r11
222	movq	%r11, -8(%rbp)			/ move prev %rbp
223	movq	SPDESTOFF(%rbp), %r11		/ r11 == calling destination
224	movq	%r11, 0(%rbp)			/ store destination at top
225
226	/
227	/ Restore registers
228	/
229	movq	SPRDIOFF(%rbp), %rdi
230	movq	SPRSIOFF(%rbp), %rsi
231	movq	SPRDXOFF(%rbp), %rdx
232	movq	SPRCXOFF(%rbp), %rcx
233	movq	SPR8OFF(%rbp), %r8
234	movq	SPR9OFF(%rbp), %r9
235	movq	SPR10OFF(%rbp), %r10
236	movq	SPR11OFF(%rbp), %r11
237	movq	SPRAXOFF(%rbp), %rax
238
239	subq	$8, %rbp			/ adjust %rbp for 'ret'
240	movq	%rbp, %rsp			/
241	/*
242	 * At this point, after a little doctoring, we should
243	 * have the following on the stack:
244	 *
245	 *	16(%rsp):  ret addr
246	 *	8(%rsp):  dest_addr
247	 *	0(%rsp):  Previous %rbp
248	 *
249	 * So - we pop the previous %rbp, and then
250	 * ret to our final destination.
251	 */
252	popq	%rbp				/
253	ret					/ jmp to final destination
254						/ and clean up stack :)
255
256.start_pltexit:
257	/*
258	 * In order to call the destination procedure and then return
259	 * to audit_pltexit() for post analysis we must first grow
260	 * our stack frame and then duplicate the original callers
261	 * stack state.  This duplicates all of the arguements
262	 * that were to be passed to the destination procedure.
263	 */
264	movq	%rbp, %rdi			/
265	addq	$16, %rdi			/    %rdi = src
266	movq	(%rbp), %rdx			/
267	subq	%rdi, %rdx			/    %rdx == prev frame sz
268	/*
269	 * If audit_argcnt > 0 then we limit the number of
270	 * arguements that will be duplicated to audit_argcnt.
271	 *
272	 * If (prev_stack_size > (audit_argcnt * 8))
273	 *	prev_stack_size = audit_argcnt * 8;
274	 */
275	movl	audit_argcnt(%rip),%eax		/   %eax = audit_argcnt
276	cmpl	$0, %eax
277	jle	.grow_stack
278	leaq	(,%rax,8), %rax			/    %eax = %eax * 4
279	cmpq	%rax,%rdx
280	jle	.grow_stack
281	movq	%rax, %rdx
282	/*
283	 * Grow the stack and duplicate the arguements of the
284	 * original caller.
285	 */
286.grow_stack:
287	subq	%rdx, %rsp			/    grow the stack
288	movq	%rdx, SPPRVSTKOFF(%rbp)		/    -88(%rbp) == prev frame sz
289	movq	%rsp, %rcx			/    %rcx = dest
290	addq	%rcx, %rdx			/    %rdx == tail of dest
291.while_base:
292	cmpq	%rdx, %rcx			/   while (base+size >= src++) {
293	jge	.end_while			/
294	movq	(%rdi), %rsi
295	movq	%rsi,(%rcx)			/        *dest = *src
296	addq	$8, %rdi			/	 src++
297	addq	$8, %rcx			/        dest++
298	jmp	.while_base			/    }
299
300	/*
301	 * The above stack is now an exact duplicate of
302	 * the stack of the original calling procedure.
303	 */
304.end_while:
305	/
306	/ Restore registers
307	/
308	movq	SPRDIOFF(%rbp), %rdi
309	movq	SPRSIOFF(%rbp), %rsi
310	movq	SPRDXOFF(%rbp), %rdx
311	movq	SPRCXOFF(%rbp), %rcx
312	movq	SPR8OFF(%rbp), %r8
313	movq	SPR9OFF(%rbp), %r9
314	movq	SPR10OFF(%rbp), %r10
315	movq	SPR11OFF(%rbp), %r11
316	movq	SPRAXOFF(%rbp), %rax
317
318	/*
319	 * Call to desitnation function - we'll return here
320	 * for pltexit monitoring.
321	 */
322	call	*SPDESTOFF(%rbp)
323
324	addq	SPPRVSTKOFF(%rbp), %rsp	/ cleanup dupped stack
325
326	/
327	/ prepare for call to audit_pltenter()
328	/
329	movq	SPDYNOFF(%rbp), %r11		/ %r11 = &dyndata
330	movq	SYMNDX_OFF(%r11), %r8		/ arg5 (symndx)
331	leaq	SYMDEF_OFF(%r11), %rcx		/ arg4 (&Sym)
332	movq	DEFLMP_OFF(%r11), %rdx		/ arg3 (dlmp)
333	movq	REFLMP_OFF(%r11), %rsi		/ arg2 (rlmp)
334	movq	%rax, %rdi			/ arg1 (returnval)
335	call	audit_pltexit@PLT
336
337	/*
338	 * Clean up after ourselves and return to the
339	 * original calling procedure.
340	 */
341
342	/
343	/ Restore registers
344	/
345	movq	SPRDIOFF(%rbp), %rdi
346	movq	SPRSIOFF(%rbp), %rsi
347	movq	SPRDXOFF(%rbp), %rdx
348	movq	SPRCXOFF(%rbp), %rcx
349	movq	SPR8OFF(%rbp), %r8
350	movq	SPR9OFF(%rbp), %r9
351	movq	SPR10OFF(%rbp), %r10
352	movq	SPR11OFF(%rbp), %r11
353	// rax already contains return value
354
355	movq	%rbp, %rsp			/
356	popq	%rbp				/
357	ret					/ return to caller
358	.size	elf_plt_trace, .-elf_plt_trace
359#endif
360
361/*
362 * We got here because a call to a function resolved to a procedure
363 * linkage table entry.  That entry did a JMPL to the first PLT entry, which
364 * in turn did a call to elf_rtbndr.
365 *
366 * the code sequence that got us here was:
367 *
368 * .PLT0:
369 *	pushq	GOT+8(%rip)	#GOT[1]
370 *	jmp	*GOT+16(%rip)	#GOT[2]
371 *	nop
372 *	nop
373 *	nop
374 *	nop
375 *	...
376 * PLT entry for foo:
377 *	jmp	*name1@GOTPCREL(%rip)
378 *	pushl	$rel.plt.foo
379 *	jmp	PLT0
380 *
381 * At entry, the stack looks like this:
382 *
383 *	return address			16(%rsp)
384 *	$rel.plt.foo	(plt index)	8(%rsp)
385 *	lmp				0(%rsp)
386 *
387 */
388#if defined(lint)
389
390extern unsigned long	elf_bndr(Rt_map *, unsigned long, caddr_t);
391
392void
393elf_rtbndr(Rt_map * lmp, unsigned long reloc, caddr_t pc)
394{
395	(void) elf_bndr(lmp, reloc, pc);
396}
397
398#else
399	.weak	_elf_rtbndr
400	_elf_rtbndr = elf_rtbndr
401
402	ENTRY(elf_rtbndr)
403
404	pushq	%rbp
405	movq	%rsp, %rbp
406
407	pushq	%rax		/* for SSE register count */
408	pushq	%rdi		/* arg 0 .. */
409	pushq	%rsi
410	pushq	%rdx
411	pushq	%rcx
412	pushq	%r8
413	pushq	%r9		/* .. arg 5 */
414	pushq	%r10		/* call chain reg */
415
416	movq	8(%rbp), %rdi	/* arg1 - *lmp */
417	movq	16(%rbp), %rsi	/* arg2 - reloc index */
418	movq	24(%rbp), %rdx	/* arg3 - pc of caller */
419	call	elf_bndr@PLT	/* call elf_rtbndr(lmp, relndx, pc) */
420	movq	%rax, 16(%rbp)	/* store final destination */
421
422	popq	%r10
423	popq	%r9
424	popq	%r8
425	popq	%rcx
426	popq	%rdx
427	popq	%rsi
428	popq	%rdi
429	popq	%rax
430
431	movq	%rbp, %rsp
432	popq	%rbp
433
434	addq	$8, %rsp	/* pop 1st plt-pushed args */
435				/* the second arguement is used */
436				/* for the 'return' address to our */
437				/* final destination */
438
439	ret			/* invoke resolved function */
440	.size 	elf_rtbndr, .-elf_rtbndr
441#endif
442