xref: /titanic_41/usr/src/cmd/sgs/rtld/sparcv9/boot_elf.s (revision 3db86aab554edbb4244c8d1a1c90f152eee768af)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License").  You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22/*
23 *	Copyright (c) 1988 AT&T
24 *	  All Rights Reserved
25 *
26 *
27 *	Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
28 *	Use is subject to license terms.
29 */
30
31#pragma ident	"%Z%%M%	%I%	%E% SMI"
32
33#include	"machdep.h"
34#include	"_audit.h"
35#if	defined(lint)
36#include	<sys/types.h>
37#include	"_rtld.h"
38#else
39#include	<sys/stack.h>
40#include	<sys/asm_linkage.h>
41
42	.file	"%M%"
43	.seg	".text"
44#endif
45
46
47
48/*
49 * We got here because the initial call to a function resolved to a procedure
50 * linkage table entry.  That entry did a branch to the first PLT entry, which
51 * in turn did a call to elf_rtbndr (refer elf_plt_init()).
52 *
53 * the code sequence that got us here was:
54 *
55 * PLT entry for foo():
56 *	sethi	(.-PLT0), %g1
57 *	ba,a	.PLT0				! patched atomically 2nd
58 *	nop					! patched 1st
59 *	nop
60 *	nop
61 *	nop
62 *	nop
63 *	nop
64 *
65 * Therefore on entry, %i7 has the address of the call, which will be added
66 * to the offset to the plt entry in %g1 to calculate the plt entry address
67 * we must also subtract 4 because the address of PLT0 points to the
68 * save instruction before the call.
69 *
70 * The PLT entry is rewritten in one of several ways.  For the full 64-bit
71 * span, the following sequence is generated:
72 *
73 *	nop
74 *	sethi	%hh(entry_pt), %g1
75 *	sethi	%lm(entry_pt), %g5
76 *	or	%g1, %hm(entry_pt), %g1
77 *	sllx	%g1, 32, %g1
78 *	or	%g1, %g5, %g5
79 *	jmpl	%g5 + %lo(entry_pt), %g0
80 *	nop
81 *
82 * Shorter code sequences are possible, depending on reachability
83 * constraints.  Note that 'call' is not as useful as it might seem in
84 * this context, because it is only capable of plus or minus 2Gbyte
85 * PC-relative jumps, and the rdpc instruction is very slow.
86 *
87 * At the time of writing, the present and future SPARC CPUs that will use
88 * this code are only capable of addressing the bottom 43-bits and top 43-bits
89 * of the address space.  And since shared libraries are placed at the top
90 * of the address space, the "top 44-bits" sequence will effectively always be
91 * used.  See elf_plt_write() below.  The "top 32-bits" are used when they
92 * can reach.
93 */
94
95#if	defined(lint)
96
97extern unsigned long	elf_bndr(Rt_map *, unsigned long, caddr_t);
98
99/*
100 * We're called here from .PLTn in a new frame, with %o0 containing
101 * the result of a sethi (. - .PLT0), and %o1 containing the pc of
102 * the jmpl instruction we're got here with inside .PLT1
103 */
104void
105elf_rtbndr(Rt_map *lmp, unsigned long pltoff, caddr_t from)
106{
107	(void) elf_bndr(lmp, pltoff, from);
108}
109
110#else
111	.weak	_elf_rtbndr		! keep dbx happy as it likes to
112	_elf_rtbndr = elf_rtbndr	! rummage around for our symbols
113
114	ENTRY(elf_rtbndr)
115	mov	%i7, %o3		! Save callers address(profiling)
116	save	%sp, -SA(MINFRAME), %sp
117	mov	%g4, %l5		! Save g4 (safe across function calls)
118	sub	%i1, 0x38, %o1		! compute addr of .PLT0 from addr of .PLT1 jmpl
119	ldx	[%o1 + 0x40], %o0	! ld PLT2[X] into third arg
120	srl	%i0, 10, %o1		! shift offset set by sethi
121	call	elf_bndr		! returns function address in %o0
122	mov	%i3, %o2		! Callers address is arg 3
123	mov	%o0, %g1		! save address of routine binded
124	mov	%l5, %g4		! restore g4
125	restore				! how many restores needed ? 2
126	jmp	%g1			! jump to it
127	restore
128	SET_SIZE(elf_rtbndr)
129
130#endif
131
132
133#if	defined(lint)
134void
135elf_rtbndr_far(Rt_map *lmp, unsigned long pltoff, caddr_t from)
136{
137	(void) elf_bndr(lmp, pltoff, from);
138}
139#else
140ENTRY(elf_rtbndr_far)
141	mov	%i7, %o3		! Save callers address
142	save	%sp, -SA(MINFRAME), %sp
143	mov	%g4, %l5		! preserve %g4
144	sub	%i1, 0x18, %o2		! compute address of .PLT0 from
145					!   .PLT0 jmpl instr.
146	sub	%i0, %o2, %o1		! pltoff = pc - 0x10 - .PLT0
147	sub	%o1, 0x10, %o1
148	ldx	[%o2 + 0x40], %o0	! ld PLT2[X] into third arg
149	call	elf_bndr		! returns function address in %o0
150	mov	%i3, %o2		! Callers address is arg3
151	mov	%o0, %g1		! save address of routine binded
152	mov	%l5, %g4		! restore g4
153	restore				! how many restores needed ? 2
154	jmp	%g1			! jump to it
155	restore
156SET_SIZE(elf_rtbndr_far)
157#endif
158
159
160/*
161 * Initialize a plt entry so that function calls go to 'bindfunc'
162 * (We parameterize the binding function here because we call this
163 * routine twice - once for PLT0 and once for PLT1 with different
164 * binding functions.)
165 *
166 * The plt entries (PLT0 and PLT1) look like:
167 *
168 *	save	%sp, -176, %sp
169 *	sethi	%hh(bindfunc), %l0
170 *	sethi	%lm(bindfunc), %l1
171 *	or	%l0, %hm(bindfunc), %l0
172 *	sllx	%l0, 32, %l0
173 *	or	%l0, %l1, %l0
174 *	jmpl	%l0 + %lo(bindfunc), %o1
175 *	mov	%g1, %o0
176 */
177
178#define	M_SAVE_SP176SP	0x9de3bf50	/*	save	%sp, -176, %sp */
179#define	M_SETHI_L0	0x21000000	/*	sethi	0x0, %l0 */
180#define	M_SETHI_L1	0x23000000	/*	sethi	0x0, %l1 */
181#define	M_OR_L0L0	0xa0142000	/*	or	%l0, 0x0, %l0 */
182#define	M_SLLX_L032L0	0xa12c3020	/*	sllx	%l0, 32, %l0 */
183#define	M_OR_L0L1L0	0xa0140011	/*	or	%l0, %l1, %l0 */
184#define	M_JMPL_L0O1	0x93c42000	/*	jmpl	%l0 + 0, %o1 */
185#define	M_MOV_G1O0	0x90100001	/*	or	%g0, %g1, %o0 */
186
187#if	defined(lint)
188
189#define	HH22(x)	0		/* for lint's benefit */
190#define	LM22(x)	0
191#define	HM10(x)	0
192#define	LO10(x)	0
193
194/* ARGSUSED */
195void
196elf_plt_init(void *plt, caddr_t bindfunc)
197{
198	uint_t	*_plt;
199
200	_plt = (uint_t *)plt;
201	_plt[0] = M_SAVE_SP176SP;
202	_plt[1] = M_SETHI_L0 | HH22(bindfunc);
203	_plt[2] = M_SETHI_L1 | LM22(bindfunc);
204	_plt[3] = M_OR_L0L0 | HM10(bindfunc);
205	_plt[4] = M_SLLX_L032L0;
206	_plt[5] = M_OR_L0L1L0;
207	_plt[6] = M_JMPL_L0O1 | LO10(bindfunc);
208	_plt[7] = M_MOV_G1O0;
209}
210
211#else
212	ENTRY(elf_plt_init)
213	save	%sp, -SA(MINFRAME), %sp	! Make a frame
214
215	sethi	%hi(M_SAVE_SP176SP), %o0	! Get save instruction
216	or	%o0, %lo(M_SAVE_SP176SP), %o0
217	st	%o0, [%i0]		! Store in plt[0]
218
219	sethi	%hi(M_SETHI_L0), %o4	! Get "sethi 0x0, %l0" insn
220	srlx	%i1, 42, %o2		! get %hh(function address)
221	or	%o4, %o2, %o4		!	or value into instruction
222	st	%o4, [%i0 + 0x4]	! Store instruction in plt[1]
223	iflush	%i0			! .. and flush
224
225	sethi	%hi(M_SETHI_L1), %o4	! Get "sethi 0x0, %l1" insn
226	srl	%i1, 10, %o2		! get %lm(function address)
227	or	%o4, %o2, %o4		!	or value into instruction
228	st	%o4, [%i0 + 0x8]	! Store instruction in plt[2]
229
230	sethi	%hi(M_OR_L0L0), %o4	! Get "or %l0, 0x0, %l0" insn
231	or	%o4, %lo(M_OR_L0L0), %o4
232	srlx	%i1, 32, %o2		! get %hm(function address)
233	and	%o2, 0x3ff, %o2		! pick out bits 42-33
234	or	%o4, %o2, %o4		!	or value into instruction
235	st	%o4, [%i0 + 0xc]	! Store instruction in plt[3]
236	iflush	%i0 + 8			! .. and flush
237
238	sethi	%hi(M_SLLX_L032L0), %o4	! get "sllx %l0, 32, %l0" insn
239	or	%o4, %lo(M_SLLX_L032L0), %o4
240	st	%o4, [%i0 + 0x10]	! Store instruction in plt[4]
241
242	sethi	%hi(M_OR_L0L1L0), %o4	! get "or %l0, %l1, %l0" insn
243	or	%o4, %lo(M_OR_L0L1L0), %o4
244	st	%o4, [%i0 + 0x14]	! Store instruction in plt[5]
245	iflush	%i0 + 0x10		! .. and flush
246
247	sethi	%hi(M_JMPL_L0O1), %o4	! get "jmpl %l0 + 0, %o1" insn
248	or	%o4, %lo(M_JMPL_L0O1), %o4
249	and	%i1, 0x3ff, %o2		! get %lo(function address)
250	or	%o4, %o2, %o4		!	or value into instruction
251	st	%o4, [%i0 + 0x18]	! Store instruction in plt[6]
252
253	sethi	%hi(M_MOV_G1O0), %o4	! get "mov %g1, %o0" insn
254	or	%o4, %lo(M_MOV_G1O0), %o4
255	st	%o4, [%i0 + 0x1c]	! Store instruction in plt[7]
256	iflush	%i0 + 0x18		! .. and flush
257
258	ret
259	restore
260	SET_SIZE(elf_plt_init)
261#endif
262
263
264
265
266#if	defined(lint)
267/*
268 *  The V9 ABI assigns the link map identifier, the
269 *  Rt_map pointer, to the start of .PLT2.
270 */
271void
272elf_plt2_init(unsigned int *plt2, Rt_map * lmp)
273{
274	/* LINTED */
275	*(unsigned long *)plt2 = (unsigned long)lmp;
276}
277#else
278	ENTRY(elf_plt2_init)
279	stx	%o1, [%o0]
280	retl
281	iflush	%o0
282	SET_SIZE(elf_plt2_init)
283#endif
284
285
286
287/*
288 * After the first call to a plt, elf_bndr() will have determined the true
289 * address of the function being bound.  The plt is now rewritten so that
290 * any subsequent calls go directly to the bound function.  If the library
291 * to which the function belongs is being profiled refer to _plt_cg_write.
292 *
293 * For complete 64-bit spanning, the new plt entry is:
294 *
295 *	nop
296 *	sethi	%hh(function address), %g1
297 *	sethi	%lm(function address), %g5
298 *	or	%g1, %hm(function address), %g1
299 *	sllx	%g1, 32, %g1
300 *	or	%g1, %g5, %g5
301 *	jmpl	%g5, %lo(function address), %g0
302 *	nop
303 *
304 * However, shorter instruction sequences are possible and useful.
305 * This version gets us anywhere in the top 44 bits of the
306 * address space - since this is where shared objects live most
307 * of the time, this case is worth optimizing.
308 *
309 *	nop
310 *	sethi	%h44(~function_address), %g5
311 *	xnor	%g5, %m44(~function address), %g1
312 *	sllx	%g1, 12, %g1
313 *	jmpl	%g1 + %l44(function address), %g0
314 *	nop
315 *	nop
316 *	nop
317 *
318 * This version gets anywhere in the top 32 bits:
319 *
320 *	nop
321 *	sethi	%hi(~function_address), %g5
322 *	xnor	%g5, %lo(~function_address), %g1
323 *	jmpl	%g1, %g0
324 *	nop
325 *	nop
326 *	nop
327 *	nop
328 *
329 * This version get's us to a destination within
330 * +- 8megs of the PLT's address:
331 *
332 *	nop
333 *	ba,a	<dest>
334 *	nop
335 *	nop
336 *	nop
337 *	nop
338 *	nop
339 *	nop
340 *
341 * This version get's us to a destination within
342 * +- 2megs of the PLT's address:
343 *
344 *	nop
345 *	ba,a,pt	%icc, <dest>
346 *	nop
347 *	nop
348 *	nop
349 *	nop
350 *	nop
351 *	nop
352 *
353 *
354 * The PLT is written in reverse order to ensure re-entrant behaviour.
355 * Note that the first two instructions must be overwritten with a
356 * single stx.
357 *
358 * Note that even in the 44-bit case, we deliberately use both %g5 and
359 * %g1 to prevent anyone accidentally relying on either of them being
360 * non-volatile across a function call.
361 */
362
363#define	M_JMPL_G5G0	0x81c16000	/* jmpl %g5 + 0, %g0 */
364#define	M_OR_G1G5G5	0x8a104005	/* or %g1, %g5, %g5 */
365#define	M_SLLX_G132G1	0x83287020	/* sllx %g1, 32, %g1 */
366#define	M_OR_G1G1	0x82106000	/* or %g1, 0x0, %g1 */
367#define	M_SETHI_G5	0x0b000000	/* sethi 0x0, %g5 */
368#define	M_SETHI_G1	0x03000000	/* sethi 0x0, %g1 */
369#define	M_NOP		0x01000000	/* sethi 0x0, %g0 */
370
371#define	M_JMPL_G1G0	0x81c06000	/* jmpl %g1 + 0, %g0 */
372#define	M_SLLX_G112G1	0x8328700c	/* sllx %g1, 12, %g1 */
373#define	M_XNOR_G5G1	0x82396000	/* xnor	%g5, 0, %g1 */
374
375#if	defined(lint)
376
377/* ARGSUSED */
378#define	MASK(m)		((1ul << (m)) - 1ul)
379#define	BITS(v, u, l)	(((v) >> (l)) & MASK((u) - (l) + 1))
380#define	H44(v)		BITS(v, 43, 22)
381#define	M44(v)		BITS(v, 21, 12)
382#define	L44(v)		BITS(v, 11, 0)
383
384#endif
385
386#if	defined(lint)
387
388void
389/* ARGSUSED1 */
390plt_upper_32(uintptr_t pc, uintptr_t symval)
391{
392	ulong_t		sym = (ulong_t)symval;
393	/* LINTED */
394	ulong_t		nsym = ~sym;
395	uint_t *	plttab = (uint_t *)pc;
396
397	plttab[3] = M_JMPL_G1G0;
398	plttab[2] = (uint_t)(M_XNOR_G5G1 | LO10(nsym));
399	*(ulong_t *)pc =
400	    ((ulong_t)M_NOP << 32) | (M_SETHI_G5 | LM22(nsym));
401}
402
403#else
404
405
406	ENTRY(plt_upper_32)
407	!
408	! Address lies in top 32-bits of address space, so use
409	! compact PLT sequence
410	!
411	sethi	%hi(M_JMPL_G1G0), %o3	! Get "jmpl %g1, %g0" insn
412	st	%o3, [%o0 + 0xc]	! store instruction in plt[3]
413	iflush	%o0 + 0xc		! .. and flush
414
415	not	%o1, %o4
416	sethi	%hi(M_XNOR_G5G1), %o3	! Get "xnor %g5, %g1, %g1" insn
417	and	%o4, 0x3ff, %o2		! pick out bits 0-9
418	or	%o3, %o2, %o3		!	or value into instruction
419	st	%o3, [%o0 + 0x8]	! store instruction in plt[2]
420	iflush	%o0 + 0x8		! .. and flush
421
422	sethi	%hi(M_SETHI_G5), %o3	! Get "sethi 0x0, %g5" insn
423	srl	%o4, 10, %o2		! get %lm(~function address)
424	or	%o3, %o2, %o3		!	or value into instruction
425
426	sethi	%hi(M_NOP), %o4		! Get "nop" instruction
427	sllx	%o4, 32, %o4		! shift to top of instruction pair
428	or	%o3, %o4, %o3		!	or value into instruction pair
429	stx	%o3, [%o0]		! store instructions into plt[0] plt[1]
430	retl
431	iflush	%o0			! .. and flush
432	SET_SIZE(plt_upper_32)
433#endif	/* defined lint */
434
435
436#if	defined(lint)
437
438void
439/* ARGSUSED1 */
440plt_upper_44(uintptr_t pc, uintptr_t symval)
441{
442	ulong_t		sym = (ulong_t)symval;
443	ulong_t		nsym = ~sym;
444	uint_t *	plttab = (uint_t *)pc;
445
446	/* LINTED */
447	plttab[4] = (uint_t)(M_JMPL_G1G0 | L44(sym));
448	plttab[3] = M_SLLX_G112G1;
449	/* LINTED */
450	plttab[2] = (uint_t)(M_XNOR_G5G1 | M44(nsym));
451	*(ulong_t *)pc = ((ulong_t)M_NOP << 32) | (M_SETHI_G5 | H44(nsym));
452}
453
454#else
455
456
457	ENTRY(plt_upper_44)
458	!
459	! Address lies in top 44-bits of address space, so use
460	! compact PLT sequence
461	!
462	setuw	M_JMPL_G1G0, %o3	! Get "jmpl %g1, %g0" insn
463	and	%o1, 0xfff, %o2		! lower 12 bits of function address
464	or	%o3, %o2, %o3		!	is or'ed into instruction
465	st	%o3, [%o0 + 0x10]	! store instruction in plt[4]
466	iflush	%o0 + 0x10		! .. and flush
467
468	setuw	M_SLLX_G112G1, %o3	! Get "sllx %g1, 12, %g1" insn
469	st	%o3, [%o0 + 0xc]	! store instruction in plt[3]
470
471	not	%o1, %o4
472	setuw	M_XNOR_G5G1, %o3	! Get "xnor %g5, 0, %g1" insn
473	srlx	%o4, 12, %o2		! get %m44(0 - function address)
474	and	%o2, 0x3ff, %o2		! pick out bits 21-12
475	or	%o3, %o2, %o3		!	or value into instruction
476	st	%o3, [%o0 + 8]		! store instruction in plt[2]
477	iflush	%o0 + 8			! .. and flush
478
479	setuw	M_SETHI_G5, %o3		! Get "sethi 0x0, %g5" insn
480	srlx	%o4, 22, %o2		! get %h44(0 - function address)
481	or	%o3, %o2, %o3		!	or value into instruction
482
483	setuw	M_NOP, %o4		! Get "nop" instruction
484	sllx	%o4, 32, %o4		! shift to top of instruction pair
485	or	%o3, %o4, %o3		!	or value into instruction pair
486	stx	%o3, [%o0]		! store instructions into plt[0] plt[1]
487	retl
488	iflush	%o0			! .. and flush
489	SET_SIZE(plt_upper_44)
490
491#endif	/* defined(lint) */
492
493
494#if	defined(lint)
495
496void
497/* ARGSUSED1 */
498plt_full_range(uintptr_t pc, uintptr_t symval)
499{
500	uint_t *	plttab = (uint_t *)pc;
501
502	plttab[6] = M_JMPL_G5G0 | LO10(symval);
503	plttab[5] = M_OR_G1G5G5;
504	plttab[4] = M_SLLX_G132G1;
505	plttab[3] = M_OR_G1G1 | HM10(symval);
506	plttab[2] = M_SETHI_G5 | LM22(symval);
507	*(ulong_t *)pc =
508		((ulong_t)M_NOP << 32) | (M_SETHI_G1 | HH22(symval));
509}
510
511#else
512	ENTRY(plt_full_range)
513	!
514	! Address lies anywhere in 64-bit address space, so use
515	! full PLT sequence
516	!
517	sethi	%hi(M_JMPL_G5G0), %o3	! Get "jmpl %g5, %g0" insn
518	and	%o1, 0x3ff, %o2		! lower 10 bits of function address
519	or	%o3, %o2, %o3		!	is or'ed into instruction
520	st	%o3, [%o0 + 0x18]	! store instruction in plt[6]
521	iflush	%o0 + 0x18		! .. and flush
522
523	sethi	%hi(M_OR_G1G5G5), %o3	! Get "or %g1, %g5, %g1" insn
524	or	%o3, %lo(M_OR_G1G5G5), %o3
525	st	%o3, [%o0 + 0x14]	! store instruction in plt[5]
526
527	sethi	%hi(M_SLLX_G132G1), %o3	!  Get "sllx %g1, 32, %g1" insn
528	or	%o3, %lo(M_SLLX_G132G1), %o3
529	st	%o3, [%o0 + 0x10]	! store instruction in plt[4]
530	iflush	%o0 + 0x10		! .. and flush
531
532	sethi	%hi(M_OR_G1G1), %o3	! Get "or %g1, 0x0, %g1" insn
533	or	%o3, %lo(M_OR_G1G1), %o3
534	srlx	%o1, 32, %o2		! get %hm(function address)
535	and	%o2, 0x3ff, %o2		! pick out bits 42-33
536	or	%o3, %o2, %o3		!	or value into instruction
537	st	%o3, [%o0 + 0xc]	! store instruction in plt[3]
538
539	sethi	%hi(M_SETHI_G5), %o3	! Get "sethi 0x0, %g5" insn
540	srl	%o1, 10, %o2		! get %lm(function address)
541	or	%o3, %o2, %o3		!	or value into instruction
542	st	%o3, [%o0 + 0x8]	! store instruction in plt[2]
543	iflush	%o0 + 8			! .. and flush
544
545	sethi	%hi(M_SETHI_G1), %o3	! Get "sethi 0x0, %g1" insn
546	srlx	%o1, 42, %o2		! get %hh(function address)
547	or	%o3, %o2, %o3		!	or value into instruction
548
549	sethi	%hi(M_NOP), %o4		! Get "nop" instruction
550	sllx	%o4, 32, %o4		! shift to top of instruction pair
551	or	%o3, %o4, %o3		!	or value into instruction pair
552	stx	%o3, [%o0]		! store instructions into plt[0] plt[1]
553	retl
554	iflush	%o0			! .. and flush
555
556	SET_SIZE(plt_full_range)
557
558#endif	/* defined(lint) */
559
560/*
561 * performs the 'iflush' instruction on a range of memory.
562 */
563#if	defined(lint)
564void
565iflush_range(caddr_t addr, size_t len)
566{
567	/* LINTED */
568	uintptr_t base;
569
570	base = (uintptr_t)addr & ~7;	/* round down to 8 byte alignment */
571	len = (len + 7) & ~7;		/* round up to multiple of 8 bytes */
572	for (len -= 8; (long)len >= 0; len -= 8)
573		/* iflush(base + len) */;
574}
575#else
576	ENTRY(iflush_range)
577	add	%o1, 7, %o1
578	andn	%o0, 7, %o0
579	andn	%o1, 7, %o1
5801:	subcc	%o1, 8, %o1
581	bge,a,pt %xcc, 1b
582	iflush	%o0 + %o1
583	retl
584	nop
585	SET_SIZE(iflush_range)
586#endif
587
588
589#if	defined(lint)
590
591ulong_t
592elf_plt_trace()
593{
594	return (0);
595}
596#else
597	.global	elf_plt_trace
598	.type   elf_plt_trace, #function
599
600/*
601 * The dyn_plt that called us has already created a stack-frame for
602 * us and placed the following entries in it:
603 *
604 *	[%fp + STACK_BIAS + -0x8]	* dyndata
605 *	[%fp + STACK_BIAS + -0x10]	* prev stack size
606 *
607 * dyndata currently contains:
608 *
609 *	dyndata:
610 *	0x0	Addr		*reflmp
611 *	0x8	Addr		*deflmp
612 *	0x10	Word		symndx
613 *	0x14	Word		sb_flags
614 *	0x18	Sym		symdef.st_name
615 *	0x1c			symdef.st_info
616 *	0x1d			symdef.st_other
617 *	0x1e			symdef.st_shndx
618 *	0x20			symdef.st_value
619 *	0x28			symdef.st_size
620 */
621#define	REFLMP_OFF		0x0
622#define	DEFLMP_OFF		0x8
623#define	SYMNDX_OFF		0x10
624#define	SBFLAGS_OFF		0x14
625#define	SYMDEF_OFF		0x18
626#define	SYMDEF_VALUE_OFF	0x20
627
628#define	LAREGSSZ	0x40	/* sizeof (La_sparcv9_regs) */
629
630
631elf_plt_trace:
6321:	call	2f
633	sethi	%hi(_GLOBAL_OFFSET_TABLE_ - (1b - .)), %l7
6342:	or	%l7, %lo(_GLOBAL_OFFSET_TABLE_ - (1b - .)), %l7
635	add	%l7, %o7, %l7
636
637	ldx	[%fp + STACK_BIAS + -CLONGSIZE], %l1	! l1 = * dyndata
638	lduw	[%l1 + SBFLAGS_OFF], %l2		! l2 = sb_flags
639	andcc	%l2, LA_SYMB_NOPLTENTER, %g0
640	be,pt	%icc, .start_pltenter
641	ldx	[%l1 + SYMDEF_VALUE_OFF], %l0	! l0 =
642						!  sym.st_value(calling address)
643	ba,a,pt	%icc, .end_pltenter
644	nop
645
646	/*
647	 * save all registers into La_sparcv9_regs
648	 */
649.start_pltenter:
650	sub	%sp, LAREGSSZ, %sp	! create space for La_sparcv9_regs
651					! storage on the stack.
652
653	add	%fp, STACK_BIAS - (LAREGSSZ + (2 * CLONGSIZE)), %o4	! addr of new space.
654
655	stx	%i0, [%o4 + 0x0]
656	stx	%i1, [%o4 + 0x8]
657	stx	%i2, [%o4 + 0x10]
658	stx	%i3, [%o4 + 0x18]	! because a regwindow shift has
659	stx	%i4, [%o4 + 0x20]	! already occured our current %i*
660	stx	%i5, [%o4 + 0x28]	! register's are the equivalent of
661	stx	%i6, [%o4 + 0x30]	! the %o* registers that the final
662	stx	%i7, [%o4 + 0x38]	! procedure shall see.
663	mov	%g4, %l5		! save g4 (safe across function calls)
664
665
666	ldx	[%fp + STACK_BIAS + -CLONGSIZE], %l1	! %l1 == * dyndata
667	ldx	[%l1 + REFLMP_OFF], %o0		! %o0 = reflmp
668	ldx	[%l1 + DEFLMP_OFF], %o1		! %o1 = deflmp
669	add	%l1, SYMDEF_OFF, %o2		! %o2 = symp
670	lduw	[%l1 + SYMNDX_OFF], %o3		! %o3 = symndx
671	call	audit_pltenter
672	add	%l1, SBFLAGS_OFF, %o5		! %o3 = * sb_flags
673
674	mov	%o0, %l0		! %l0 == calling address
675	add	%sp, LAREGSSZ, %sp	! cleanup La_sparcv9_regs off
676					! of the stack.
677
678.end_pltenter:
679	/*
680	 * If *no* la_pltexit() routines exist we do not need
681	 * to keep the stack frame before we call the actual
682	 * routine.  Instead we jump to it and remove ourself
683	 * from the stack at the same time.
684	 */
685	ldx	[%l7+audit_flags], %l3
686	lduw	[%l3], %l3				! %l3 = audit_flags
687	andcc	%l3, AF_PLTEXIT, %g0			! AF_PLTEXIT = 2
688	be,pt	%icc, .bypass_pltexit
689	ldx	[%fp + STACK_BIAS + -CLONGSIZE], %l1	! %l1 = * dyndata
690	lduw	[%l1 + SBFLAGS_OFF], %l2		! %l2 = sb_flags
691	andcc	%l2, LA_SYMB_NOPLTEXIT, %g0		! LA_SYMB_NOPLTEXIT = 2
692	bne,a,pt	%icc, .bypass_pltexit
693	nop
694
695	ba,a,pt	%icc, .start_pltexit
696	nop
697.bypass_pltexit:
698	mov	%l5, %g4		! restore g4
699	jmpl	%l0, %g0
700	restore
701
702.start_pltexit:
703	/*
704	 * In order to call la_pltexit() we must duplicate the
705	 * arguments from the 'callers' stack on our stack frame.
706	 *
707	 * First we check the size of the callers stack and grow
708	 * our stack to hold any of the arguments that need
709	 * duplicating (these are arguments 6->N), because the
710	 * first 6 (0->5) are passed via register windows on sparc.
711	 */
712
713	/*
714	 * The first calculation is to determine how large the
715	 * argument passing area might be.  Since there is no
716	 * way to distinquish between 'argument passing' and
717	 * 'local storage' from the previous stack this amount must
718	 * cover both.
719	 */
720	ldx	[%fp + STACK_BIAS + -(2 * CLONGSIZE)], %l1	! %l1 = callers
721						!	stack size
722	sub	%l1, MINFRAME, %l1		! %l1 = argument space on
723						!	caller's stack
724	/*
725	 * Next we compare the prev. stack size against the audit_argcnt.  We
726	 * copy at most 'audit_argcnt' arguments.  The default arg count is 64.
727	 *
728	 * NOTE: on sparc we always copy at least six args since these
729	 *	 are in reg-windows and not on the stack.
730	 *
731	 * NOTE: Also note that we multiply (shift really) the arg count
732	 *	 by 8 which is the 'word size' to calculate the amount
733	 *	 of stack space needed.
734	 */
735	ldx	[%l7 + audit_argcnt], %l2
736	lduw	[%l2], %l2			! %l2 = audit_argcnt
737	cmp	%l2, 6
738	ble,pn	%icc, .grow_stack
739	sub	%l2, 6, %l2
740	sllx	%l2, CLONGSHIFT, %l2		! arg count * 8
741	cmp	%l1, %l2			!
742	ble,a,pn	%icc, .grow_stack
743	nop
744	mov	%l2, %l1
745.grow_stack:
746	/*
747	 * When duplicating the stack we skip the first SA(MINFRAME)
748	 * bytes. This is the space on the stack reserved for preserving
749	 * the register windows and such and do not need to be duplicated
750	 * on this new stack frame.  We start duplicating at the portion
751	 * of the stack reserved for argument's above 6.
752	 */
753	sub	%sp, %l1, %sp		! grow our stack by amount required.
754	srax	%l1, CLONGSHIFT, %l1	! %l1 = %l1 / 8 (words to copy)
755	mov	SA(MINFRAME), %l2	! %l2 = index into stack & frame
756
7571:
758	cmp	%l1, 0
759	ble,a,pn	%icc, 2f
760	nop
761
762	add	%fp, %l2, %l4
763	ldx	[%l4 + STACK_BIAS], %l3		! duplicate args from previous
764	add	%sp, %l2, %l4
765	stx	%l3, [%l4 + STACK_BIAS]		! stack onto current stack
766
767	add	%l2, CLONGSIZE, %l2
768	ba,pt	%icc, 1b
769	sub	%l1, 0x1, %l1
7702:
771	mov	%i0, %o0		! copy ins to outs
772	mov	%i1, %o1
773	mov	%i2, %o2
774	mov	%i3, %o3
775	mov	%i4, %o4
776	mov	%i5, %o5
777	call	%l0			! call original routine
778	mov	%l5, %g4		! restore g4
779	mov	%o1, %l2		! l2 = second 1/2 of return value
780					! for those those 64 bit operations
781					! link div64 - yuck...
782
783					! %o0 = retval
784	ldx	[%fp + STACK_BIAS + -CLONGSIZE], %l1
785	ldx	[%l1 + REFLMP_OFF], %o1		! %o1 = reflmp
786	ldx	[%l1 + DEFLMP_OFF], %o2		! %o2 = deflmp
787	add	%l1, SYMDEF_OFF, %o3		! %o3 = symp
788	call	audit_pltexit
789	lduw	[%l1 + SYMNDX_OFF], %o4		! %o4 = symndx
790
791	mov	%o0, %i0			! pass on return code
792	mov	%l2, %i1
793	ret
794	restore
795	.size	elf_plt_trace, . - elf_plt_trace
796
797#endif
798
799