xref: /freebsd/sys/amd64/linux32/linux32_sysvec.c (revision 2ee8325f42c08ba86267361d71817b035fc3d1e0)
1ea0fabbcSTim J. Robbins /*-
2ea0fabbcSTim J. Robbins  * Copyright (c) 2004 Tim J. Robbins
3ea0fabbcSTim J. Robbins  * Copyright (c) 2003 Peter Wemm
4ea0fabbcSTim J. Robbins  * Copyright (c) 2002 Doug Rabson
5ea0fabbcSTim J. Robbins  * Copyright (c) 1998-1999 Andrew Gallatin
6ea0fabbcSTim J. Robbins  * Copyright (c) 1994-1996 S�ren Schmidt
7ea0fabbcSTim J. Robbins  * All rights reserved.
8ea0fabbcSTim J. Robbins  *
9ea0fabbcSTim J. Robbins  * Redistribution and use in source and binary forms, with or without
10ea0fabbcSTim J. Robbins  * modification, are permitted provided that the following conditions
11ea0fabbcSTim J. Robbins  * are met:
12ea0fabbcSTim J. Robbins  * 1. Redistributions of source code must retain the above copyright
13ea0fabbcSTim J. Robbins  *    notice, this list of conditions and the following disclaimer
14ea0fabbcSTim J. Robbins  *    in this position and unchanged.
15ea0fabbcSTim J. Robbins  * 2. Redistributions in binary form must reproduce the above copyright
16ea0fabbcSTim J. Robbins  *    notice, this list of conditions and the following disclaimer in the
17ea0fabbcSTim J. Robbins  *    documentation and/or other materials provided with the distribution.
18ea0fabbcSTim J. Robbins  * 3. The name of the author may not be used to endorse or promote products
19ea0fabbcSTim J. Robbins  *    derived from this software without specific prior written permission
20ea0fabbcSTim J. Robbins  *
21ea0fabbcSTim J. Robbins  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22ea0fabbcSTim J. Robbins  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23ea0fabbcSTim J. Robbins  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24ea0fabbcSTim J. Robbins  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25ea0fabbcSTim J. Robbins  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26ea0fabbcSTim J. Robbins  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27ea0fabbcSTim J. Robbins  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28ea0fabbcSTim J. Robbins  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29ea0fabbcSTim J. Robbins  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30ea0fabbcSTim J. Robbins  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31ea0fabbcSTim J. Robbins  */
32ea0fabbcSTim J. Robbins 
33ea0fabbcSTim J. Robbins #include <sys/cdefs.h>
34ea0fabbcSTim J. Robbins __FBSDID("$FreeBSD$");
35aefce619SRuslan Ermilov #include "opt_compat.h"
36ea0fabbcSTim J. Robbins 
37c680f6b1SDavid E. O'Brien #ifndef COMPAT_IA32
38ce55a234SDavid E. O'Brien #error "Unable to compile Linux-emulator due to missing COMPAT_IA32 option!"
39ea0fabbcSTim J. Robbins #endif
40ea0fabbcSTim J. Robbins 
41ea0fabbcSTim J. Robbins #define	__ELF_WORD_SIZE	32
42ea0fabbcSTim J. Robbins 
43ea0fabbcSTim J. Robbins #include <sys/param.h>
44ea0fabbcSTim J. Robbins #include <sys/systm.h>
45ea0fabbcSTim J. Robbins #include <sys/exec.h>
4648b05c3fSKonstantin Belousov #include <sys/fcntl.h>
47ea0fabbcSTim J. Robbins #include <sys/imgact.h>
48ea0fabbcSTim J. Robbins #include <sys/imgact_elf.h>
49ea0fabbcSTim J. Robbins #include <sys/kernel.h>
50ea0fabbcSTim J. Robbins #include <sys/lock.h>
51ea0fabbcSTim J. Robbins #include <sys/malloc.h>
52ea0fabbcSTim J. Robbins #include <sys/module.h>
53ea0fabbcSTim J. Robbins #include <sys/mutex.h>
54ea0fabbcSTim J. Robbins #include <sys/proc.h>
556004362eSDavid Schultz #include <sys/resourcevar.h>
56ea0fabbcSTim J. Robbins #include <sys/signalvar.h>
57ea0fabbcSTim J. Robbins #include <sys/sysctl.h>
58ea0fabbcSTim J. Robbins #include <sys/syscallsubr.h>
59ea0fabbcSTim J. Robbins #include <sys/sysent.h>
60ea0fabbcSTim J. Robbins #include <sys/sysproto.h>
61ea0fabbcSTim J. Robbins #include <sys/vnode.h>
627c09e6c0SAlexander Leidinger #include <sys/eventhandler.h>
63ea0fabbcSTim J. Robbins 
64ea0fabbcSTim J. Robbins #include <vm/vm.h>
65ea0fabbcSTim J. Robbins #include <vm/pmap.h>
66ea0fabbcSTim J. Robbins #include <vm/vm_extern.h>
67ea0fabbcSTim J. Robbins #include <vm/vm_map.h>
68ea0fabbcSTim J. Robbins #include <vm/vm_object.h>
69ea0fabbcSTim J. Robbins #include <vm/vm_page.h>
70ea0fabbcSTim J. Robbins #include <vm/vm_param.h>
71ea0fabbcSTim J. Robbins 
72ea0fabbcSTim J. Robbins #include <machine/cpu.h>
73ea0fabbcSTim J. Robbins #include <machine/md_var.h>
746004362eSDavid Schultz #include <machine/pcb.h>
75ea0fabbcSTim J. Robbins #include <machine/specialreg.h>
76ea0fabbcSTim J. Robbins 
77ea0fabbcSTim J. Robbins #include <amd64/linux32/linux.h>
78ea0fabbcSTim J. Robbins #include <amd64/linux32/linux32_proto.h>
797c09e6c0SAlexander Leidinger #include <compat/linux/linux_emul.h>
80ea0fabbcSTim J. Robbins #include <compat/linux/linux_mib.h>
814d7c2e8aSDmitry Chagin #include <compat/linux/linux_misc.h>
82ea0fabbcSTim J. Robbins #include <compat/linux/linux_signal.h>
83ea0fabbcSTim J. Robbins #include <compat/linux/linux_util.h>
84ea0fabbcSTim J. Robbins 
85ea0fabbcSTim J. Robbins MODULE_VERSION(linux, 1);
86ea0fabbcSTim J. Robbins 
87ea0fabbcSTim J. Robbins MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
88ea0fabbcSTim J. Robbins 
89ea0fabbcSTim J. Robbins #define	AUXARGS_ENTRY_32(pos, id, val)	\
90ea0fabbcSTim J. Robbins 	do {				\
91ea0fabbcSTim J. Robbins 		suword32(pos++, id);	\
92ea0fabbcSTim J. Robbins 		suword32(pos++, val);	\
93ea0fabbcSTim J. Robbins 	} while (0)
94ea0fabbcSTim J. Robbins 
95ea0fabbcSTim J. Robbins #if BYTE_ORDER == LITTLE_ENDIAN
96ea0fabbcSTim J. Robbins #define SHELLMAGIC      0x2123 /* #! */
97ea0fabbcSTim J. Robbins #else
98ea0fabbcSTim J. Robbins #define SHELLMAGIC      0x2321
99ea0fabbcSTim J. Robbins #endif
100ea0fabbcSTim J. Robbins 
101ea0fabbcSTim J. Robbins /*
102ea0fabbcSTim J. Robbins  * Allow the sendsig functions to use the ldebug() facility
103ea0fabbcSTim J. Robbins  * even though they are not syscalls themselves. Map them
104ea0fabbcSTim J. Robbins  * to syscall 0. This is slightly less bogus than using
105ea0fabbcSTim J. Robbins  * ldebug(sigreturn).
106ea0fabbcSTim J. Robbins  */
107ea0fabbcSTim J. Robbins #define	LINUX_SYS_linux_rt_sendsig	0
108ea0fabbcSTim J. Robbins #define	LINUX_SYS_linux_sendsig		0
109ea0fabbcSTim J. Robbins 
1104d7c2e8aSDmitry Chagin const char *linux_platform = "i686";
1114d7c2e8aSDmitry Chagin static int linux_szplatform;
112ea0fabbcSTim J. Robbins extern char linux_sigcode[];
113ea0fabbcSTim J. Robbins extern int linux_szsigcode;
114ea0fabbcSTim J. Robbins 
115ea0fabbcSTim J. Robbins extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
116ea0fabbcSTim J. Robbins 
117ea0fabbcSTim J. Robbins SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
118387196bfSDoug Ambrisko SET_DECLARE(linux_device_handler_set, struct linux_device_handler);
119ea0fabbcSTim J. Robbins 
120ea0fabbcSTim J. Robbins static int	elf_linux_fixup(register_t **stack_base,
121ea0fabbcSTim J. Robbins 		    struct image_params *iparams);
122ea0fabbcSTim J. Robbins static register_t *linux_copyout_strings(struct image_params *imgp);
123ea0fabbcSTim J. Robbins static void	linux_prepsyscall(struct trapframe *tf, int *args, u_int *code,
124ea0fabbcSTim J. Robbins 		    caddr_t *params);
1259104847fSDavid Xu static void     linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
126ea0fabbcSTim J. Robbins static void	exec_linux_setregs(struct thread *td, u_long entry,
127ea0fabbcSTim J. Robbins 				   u_long stack, u_long ps_strings);
12819059a13SJohn Baldwin static void	linux32_fixlimit(struct rlimit *rl, int which);
129ea0fabbcSTim J. Robbins 
1307c09e6c0SAlexander Leidinger extern LIST_HEAD(futex_list, futex) futex_list;
131bb59e63fSAlexander Leidinger extern struct sx futex_sx;
1327c09e6c0SAlexander Leidinger 
1337c09e6c0SAlexander Leidinger static eventhandler_tag linux_exit_tag;
1347c09e6c0SAlexander Leidinger static eventhandler_tag linux_schedtail_tag;
1357c09e6c0SAlexander Leidinger static eventhandler_tag linux_exec_tag;
1367c09e6c0SAlexander Leidinger 
137ea0fabbcSTim J. Robbins /*
138ea0fabbcSTim J. Robbins  * Linux syscalls return negative errno's, we do positive and map them
13950e422f0SAlexander Leidinger  * Reference:
14050e422f0SAlexander Leidinger  *   FreeBSD: src/sys/sys/errno.h
14150e422f0SAlexander Leidinger  *   Linux:   linux-2.6.17.8/include/asm-generic/errno-base.h
14250e422f0SAlexander Leidinger  *            linux-2.6.17.8/include/asm-generic/errno.h
143ea0fabbcSTim J. Robbins  */
144ea0fabbcSTim J. Robbins static int bsd_to_linux_errno[ELAST + 1] = {
145ea0fabbcSTim J. Robbins 	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
146ea0fabbcSTim J. Robbins 	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
147ea0fabbcSTim J. Robbins 	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
148ea0fabbcSTim J. Robbins 	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
149ea0fabbcSTim J. Robbins 	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
150ea0fabbcSTim J. Robbins 	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
151ea0fabbcSTim J. Robbins 	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
152ea0fabbcSTim J. Robbins 	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
15350e422f0SAlexander Leidinger 	  -6,  -6, -43, -42, -75,-125, -84, -95, -16, -74,
15450e422f0SAlexander Leidinger 	 -72, -67, -71
155ea0fabbcSTim J. Robbins };
156ea0fabbcSTim J. Robbins 
157ea0fabbcSTim J. Robbins int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
158ea0fabbcSTim J. Robbins 	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
159ea0fabbcSTim J. Robbins 	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
160ea0fabbcSTim J. Robbins 	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
161ea0fabbcSTim J. Robbins 	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
162ea0fabbcSTim J. Robbins 	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
163ea0fabbcSTim J. Robbins 	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
164ea0fabbcSTim J. Robbins 	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
165ea0fabbcSTim J. Robbins 	0, LINUX_SIGUSR1, LINUX_SIGUSR2
166ea0fabbcSTim J. Robbins };
167ea0fabbcSTim J. Robbins 
168ea0fabbcSTim J. Robbins int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
169ea0fabbcSTim J. Robbins 	SIGHUP, SIGINT, SIGQUIT, SIGILL,
170ea0fabbcSTim J. Robbins 	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
171ea0fabbcSTim J. Robbins 	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
172ea0fabbcSTim J. Robbins 	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
173ea0fabbcSTim J. Robbins 	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
174ea0fabbcSTim J. Robbins 	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
175ea0fabbcSTim J. Robbins 	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
176ea0fabbcSTim J. Robbins 	SIGIO, SIGURG, SIGSYS
177ea0fabbcSTim J. Robbins };
178ea0fabbcSTim J. Robbins 
179ea0fabbcSTim J. Robbins #define LINUX_T_UNKNOWN  255
180ea0fabbcSTim J. Robbins static int _bsd_to_linux_trapcode[] = {
181ea0fabbcSTim J. Robbins 	LINUX_T_UNKNOWN,	/* 0 */
182ea0fabbcSTim J. Robbins 	6,			/* 1  T_PRIVINFLT */
183ea0fabbcSTim J. Robbins 	LINUX_T_UNKNOWN,	/* 2 */
184ea0fabbcSTim J. Robbins 	3,			/* 3  T_BPTFLT */
185ea0fabbcSTim J. Robbins 	LINUX_T_UNKNOWN,	/* 4 */
186ea0fabbcSTim J. Robbins 	LINUX_T_UNKNOWN,	/* 5 */
187ea0fabbcSTim J. Robbins 	16,			/* 6  T_ARITHTRAP */
188ea0fabbcSTim J. Robbins 	254,			/* 7  T_ASTFLT */
189ea0fabbcSTim J. Robbins 	LINUX_T_UNKNOWN,	/* 8 */
190ea0fabbcSTim J. Robbins 	13,			/* 9  T_PROTFLT */
191ea0fabbcSTim J. Robbins 	1,			/* 10 T_TRCTRAP */
192ea0fabbcSTim J. Robbins 	LINUX_T_UNKNOWN,	/* 11 */
193ea0fabbcSTim J. Robbins 	14,			/* 12 T_PAGEFLT */
194ea0fabbcSTim J. Robbins 	LINUX_T_UNKNOWN,	/* 13 */
195ea0fabbcSTim J. Robbins 	17,			/* 14 T_ALIGNFLT */
196ea0fabbcSTim J. Robbins 	LINUX_T_UNKNOWN,	/* 15 */
197ea0fabbcSTim J. Robbins 	LINUX_T_UNKNOWN,	/* 16 */
198ea0fabbcSTim J. Robbins 	LINUX_T_UNKNOWN,	/* 17 */
199ea0fabbcSTim J. Robbins 	0,			/* 18 T_DIVIDE */
200ea0fabbcSTim J. Robbins 	2,			/* 19 T_NMI */
201ea0fabbcSTim J. Robbins 	4,			/* 20 T_OFLOW */
202ea0fabbcSTim J. Robbins 	5,			/* 21 T_BOUND */
203ea0fabbcSTim J. Robbins 	7,			/* 22 T_DNA */
204ea0fabbcSTim J. Robbins 	8,			/* 23 T_DOUBLEFLT */
205ea0fabbcSTim J. Robbins 	9,			/* 24 T_FPOPFLT */
206ea0fabbcSTim J. Robbins 	10,			/* 25 T_TSSFLT */
207ea0fabbcSTim J. Robbins 	11,			/* 26 T_SEGNPFLT */
208ea0fabbcSTim J. Robbins 	12,			/* 27 T_STKFLT */
209ea0fabbcSTim J. Robbins 	18,			/* 28 T_MCHK */
210ea0fabbcSTim J. Robbins 	19,			/* 29 T_XMMFLT */
211ea0fabbcSTim J. Robbins 	15			/* 30 T_RESERVED */
212ea0fabbcSTim J. Robbins };
213ea0fabbcSTim J. Robbins #define bsd_to_linux_trapcode(code) \
214ea0fabbcSTim J. Robbins     ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
215ea0fabbcSTim J. Robbins      _bsd_to_linux_trapcode[(code)]: \
216ea0fabbcSTim J. Robbins      LINUX_T_UNKNOWN)
217ea0fabbcSTim J. Robbins 
218ea0fabbcSTim J. Robbins struct linux32_ps_strings {
219ea0fabbcSTim J. Robbins 	u_int32_t ps_argvstr;	/* first of 0 or more argument strings */
220f2c7668eSDavid Schultz 	u_int ps_nargvstr;	/* the number of argument strings */
221ea0fabbcSTim J. Robbins 	u_int32_t ps_envstr;	/* first of 0 or more environment strings */
222f2c7668eSDavid Schultz 	u_int ps_nenvstr;	/* the number of environment strings */
223ea0fabbcSTim J. Robbins };
224ea0fabbcSTim J. Robbins 
225ea0fabbcSTim J. Robbins /*
226ea0fabbcSTim J. Robbins  * If FreeBSD & Linux have a difference of opinion about what a trap
227ea0fabbcSTim J. Robbins  * means, deal with it here.
228ea0fabbcSTim J. Robbins  *
229ea0fabbcSTim J. Robbins  * MPSAFE
230ea0fabbcSTim J. Robbins  */
231ea0fabbcSTim J. Robbins static int
232ea0fabbcSTim J. Robbins translate_traps(int signal, int trap_code)
233ea0fabbcSTim J. Robbins {
234ea0fabbcSTim J. Robbins 	if (signal != SIGBUS)
235ea0fabbcSTim J. Robbins 		return signal;
236ea0fabbcSTim J. Robbins 	switch (trap_code) {
237ea0fabbcSTim J. Robbins 	case T_PROTFLT:
238ea0fabbcSTim J. Robbins 	case T_TSSFLT:
239ea0fabbcSTim J. Robbins 	case T_DOUBLEFLT:
240ea0fabbcSTim J. Robbins 	case T_PAGEFLT:
241ea0fabbcSTim J. Robbins 		return SIGSEGV;
242ea0fabbcSTim J. Robbins 	default:
243ea0fabbcSTim J. Robbins 		return signal;
244ea0fabbcSTim J. Robbins 	}
245ea0fabbcSTim J. Robbins }
246ea0fabbcSTim J. Robbins 
247ea0fabbcSTim J. Robbins static int
248ea0fabbcSTim J. Robbins elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
249ea0fabbcSTim J. Robbins {
250ea0fabbcSTim J. Robbins 	Elf32_Auxargs *args;
251ea0fabbcSTim J. Robbins 	Elf32_Addr *base;
2524d7c2e8aSDmitry Chagin 	Elf32_Addr *pos, *uplatform;
2534d7c2e8aSDmitry Chagin 	struct linux32_ps_strings *arginfo;
2544d7c2e8aSDmitry Chagin 
2554d7c2e8aSDmitry Chagin 	arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS;
2564d7c2e8aSDmitry Chagin 	uplatform = (Elf32_Addr *)((caddr_t)arginfo - linux_szsigcode -
2574d7c2e8aSDmitry Chagin 	    linux_szplatform);
258ea0fabbcSTim J. Robbins 
2596617724cSJeff Roberson 	KASSERT(curthread->td_proc == imgp->proc,
260ea0fabbcSTim J. Robbins 	    ("unsafe elf_linux_fixup(), should be curproc"));
261ea0fabbcSTim J. Robbins 	base = (Elf32_Addr *)*stack_base;
262ea0fabbcSTim J. Robbins 	args = (Elf32_Auxargs *)imgp->auxargs;
263610ecfe0SMaxim Sobolev 	pos = base + (imgp->args->argc + imgp->args->envc + 2);
264ea0fabbcSTim J. Robbins 
2654d7c2e8aSDmitry Chagin 	AUXARGS_ENTRY_32(pos, LINUX_AT_HWCAP, cpu_feature);
2664d7c2e8aSDmitry Chagin 	AUXARGS_ENTRY_32(pos, LINUX_AT_CLKTCK, hz);
267ea0fabbcSTim J. Robbins 	AUXARGS_ENTRY_32(pos, AT_PHDR, args->phdr);
268ea0fabbcSTim J. Robbins 	AUXARGS_ENTRY_32(pos, AT_PHENT, args->phent);
269ea0fabbcSTim J. Robbins 	AUXARGS_ENTRY_32(pos, AT_PHNUM, args->phnum);
270ea0fabbcSTim J. Robbins 	AUXARGS_ENTRY_32(pos, AT_PAGESZ, args->pagesz);
271ea0fabbcSTim J. Robbins 	AUXARGS_ENTRY_32(pos, AT_FLAGS, args->flags);
272ea0fabbcSTim J. Robbins 	AUXARGS_ENTRY_32(pos, AT_ENTRY, args->entry);
273ea0fabbcSTim J. Robbins 	AUXARGS_ENTRY_32(pos, AT_BASE, args->base);
2744d7c2e8aSDmitry Chagin 	AUXARGS_ENTRY_32(pos, LINUX_AT_SECURE, 0);
275ea0fabbcSTim J. Robbins 	AUXARGS_ENTRY_32(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
276ea0fabbcSTim J. Robbins 	AUXARGS_ENTRY_32(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
277ea0fabbcSTim J. Robbins 	AUXARGS_ENTRY_32(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
278ea0fabbcSTim J. Robbins 	AUXARGS_ENTRY_32(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
2794d7c2e8aSDmitry Chagin 	AUXARGS_ENTRY_32(pos, LINUX_AT_PLATFORM, PTROUT(uplatform));
2804d7c2e8aSDmitry Chagin 	if (args->execfd != -1)
2814d7c2e8aSDmitry Chagin 		AUXARGS_ENTRY_32(pos, AT_EXECFD, args->execfd);
282ea0fabbcSTim J. Robbins 	AUXARGS_ENTRY_32(pos, AT_NULL, 0);
283ea0fabbcSTim J. Robbins 
284ea0fabbcSTim J. Robbins 	free(imgp->auxargs, M_TEMP);
285ea0fabbcSTim J. Robbins 	imgp->auxargs = NULL;
286ea0fabbcSTim J. Robbins 
287ea0fabbcSTim J. Robbins 	base--;
288610ecfe0SMaxim Sobolev 	suword32(base, (uint32_t)imgp->args->argc);
289ea0fabbcSTim J. Robbins 	*stack_base = (register_t *)base;
290ea0fabbcSTim J. Robbins 	return 0;
291ea0fabbcSTim J. Robbins }
292ea0fabbcSTim J. Robbins 
293ea0fabbcSTim J. Robbins extern int _ucodesel, _ucode32sel, _udatasel;
294ea0fabbcSTim J. Robbins extern unsigned long linux_sznonrtsigcode;
295ea0fabbcSTim J. Robbins 
296ea0fabbcSTim J. Robbins static void
2979104847fSDavid Xu linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
298ea0fabbcSTim J. Robbins {
299ea0fabbcSTim J. Robbins 	struct thread *td = curthread;
300ea0fabbcSTim J. Robbins 	struct proc *p = td->td_proc;
301ea0fabbcSTim J. Robbins 	struct sigacts *psp;
302ea0fabbcSTim J. Robbins 	struct trapframe *regs;
303ea0fabbcSTim J. Robbins 	struct l_rt_sigframe *fp, frame;
304ea0fabbcSTim J. Robbins 	int oonstack;
3059104847fSDavid Xu 	int sig;
3069104847fSDavid Xu 	int code;
307ea0fabbcSTim J. Robbins 
3089104847fSDavid Xu 	sig = ksi->ksi_signo;
3099104847fSDavid Xu 	code = ksi->ksi_code;
310ea0fabbcSTim J. Robbins 	PROC_LOCK_ASSERT(p, MA_OWNED);
311ea0fabbcSTim J. Robbins 	psp = p->p_sigacts;
312ea0fabbcSTim J. Robbins 	mtx_assert(&psp->ps_mtx, MA_OWNED);
313ea0fabbcSTim J. Robbins 	regs = td->td_frame;
314ea0fabbcSTim J. Robbins 	oonstack = sigonstack(regs->tf_rsp);
315ea0fabbcSTim J. Robbins 
316ea0fabbcSTim J. Robbins #ifdef DEBUG
317ea0fabbcSTim J. Robbins 	if (ldebug(rt_sendsig))
318728ef954SJohn Baldwin 		printf(ARGS(rt_sendsig, "%p, %d, %p, %u"),
319ea0fabbcSTim J. Robbins 		    catcher, sig, (void*)mask, code);
320ea0fabbcSTim J. Robbins #endif
321ea0fabbcSTim J. Robbins 	/*
322ea0fabbcSTim J. Robbins 	 * Allocate space for the signal handler context.
323ea0fabbcSTim J. Robbins 	 */
324ea0fabbcSTim J. Robbins 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
325ea0fabbcSTim J. Robbins 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
326ea0fabbcSTim J. Robbins 		fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
327ea0fabbcSTim J. Robbins 		    td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
328ea0fabbcSTim J. Robbins 	} else
329ea0fabbcSTim J. Robbins 		fp = (struct l_rt_sigframe *)regs->tf_rsp - 1;
330ea0fabbcSTim J. Robbins 	mtx_unlock(&psp->ps_mtx);
331ea0fabbcSTim J. Robbins 
332ea0fabbcSTim J. Robbins 	/*
333ea0fabbcSTim J. Robbins 	 * Build the argument list for the signal handler.
334ea0fabbcSTim J. Robbins 	 */
335ea0fabbcSTim J. Robbins 	if (p->p_sysent->sv_sigtbl)
336ea0fabbcSTim J. Robbins 		if (sig <= p->p_sysent->sv_sigsize)
337ea0fabbcSTim J. Robbins 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
338ea0fabbcSTim J. Robbins 
339ea0fabbcSTim J. Robbins 	bzero(&frame, sizeof(frame));
340ea0fabbcSTim J. Robbins 
341ea0fabbcSTim J. Robbins 	frame.sf_handler = PTROUT(catcher);
342ea0fabbcSTim J. Robbins 	frame.sf_sig = sig;
343ea0fabbcSTim J. Robbins 	frame.sf_siginfo = PTROUT(&fp->sf_si);
344ea0fabbcSTim J. Robbins 	frame.sf_ucontext = PTROUT(&fp->sf_sc);
345ea0fabbcSTim J. Robbins 
346ea0fabbcSTim J. Robbins 	/* Fill in POSIX parts */
347aa8b2011SKonstantin Belousov 	ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig);
348ea0fabbcSTim J. Robbins 
349ea0fabbcSTim J. Robbins 	/*
350ea0fabbcSTim J. Robbins 	 * Build the signal context to be used by sigreturn.
351ea0fabbcSTim J. Robbins 	 */
352ea0fabbcSTim J. Robbins 	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
353ea0fabbcSTim J. Robbins 	frame.sf_sc.uc_link = 0;		/* XXX ??? */
354ea0fabbcSTim J. Robbins 
355ea0fabbcSTim J. Robbins 	frame.sf_sc.uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp);
356ea0fabbcSTim J. Robbins 	frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
357ea0fabbcSTim J. Robbins 	frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
358ea0fabbcSTim J. Robbins 	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
359ea0fabbcSTim J. Robbins 	PROC_UNLOCK(p);
360ea0fabbcSTim J. Robbins 
361ea0fabbcSTim J. Robbins 	bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
362ea0fabbcSTim J. Robbins 
363ea0fabbcSTim J. Robbins 	frame.sf_sc.uc_mcontext.sc_mask	= frame.sf_sc.uc_sigmask.__bits[0];
364ea0fabbcSTim J. Robbins 	frame.sf_sc.uc_mcontext.sc_gs     = rgs();
365ea0fabbcSTim J. Robbins 	frame.sf_sc.uc_mcontext.sc_fs     = rfs();
366e6493bbeSDavid E. O'Brien 	__asm __volatile("mov %%es,%0" :
367ea0fabbcSTim J. Robbins 	    "=rm" (frame.sf_sc.uc_mcontext.sc_es));
368e6493bbeSDavid E. O'Brien 	__asm __volatile("mov %%ds,%0" :
369ea0fabbcSTim J. Robbins 	    "=rm" (frame.sf_sc.uc_mcontext.sc_ds));
370ea0fabbcSTim J. Robbins 	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_rdi;
371ea0fabbcSTim J. Robbins 	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_rsi;
372ea0fabbcSTim J. Robbins 	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_rbp;
373ea0fabbcSTim J. Robbins 	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_rbx;
374ea0fabbcSTim J. Robbins 	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_rdx;
375ea0fabbcSTim J. Robbins 	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_rcx;
376ea0fabbcSTim J. Robbins 	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_rax;
377ea0fabbcSTim J. Robbins 	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_rip;
378ea0fabbcSTim J. Robbins 	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
379ea0fabbcSTim J. Robbins 	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_rflags;
380ea0fabbcSTim J. Robbins 	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_rsp;
381ea0fabbcSTim J. Robbins 	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
382ea0fabbcSTim J. Robbins 	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
38396a2b635SKonstantin Belousov 	frame.sf_sc.uc_mcontext.sc_cr2    = (u_int32_t)(uintptr_t)ksi->ksi_addr;
384ea0fabbcSTim J. Robbins 	frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
385ea0fabbcSTim J. Robbins 
386ea0fabbcSTim J. Robbins #ifdef DEBUG
387ea0fabbcSTim J. Robbins 	if (ldebug(rt_sendsig))
388c680f6b1SDavid E. O'Brien 		printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
389ea0fabbcSTim J. Robbins 		    frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
390ea0fabbcSTim J. Robbins 		    td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
391ea0fabbcSTim J. Robbins #endif
392ea0fabbcSTim J. Robbins 
393ea0fabbcSTim J. Robbins 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
394ea0fabbcSTim J. Robbins 		/*
395ea0fabbcSTim J. Robbins 		 * Process has trashed its stack; give it an illegal
396ea0fabbcSTim J. Robbins 		 * instruction to halt it in its tracks.
397ea0fabbcSTim J. Robbins 		 */
398ea0fabbcSTim J. Robbins #ifdef DEBUG
399ea0fabbcSTim J. Robbins 		if (ldebug(rt_sendsig))
400ea0fabbcSTim J. Robbins 			printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
401ea0fabbcSTim J. Robbins 			    fp, oonstack);
402ea0fabbcSTim J. Robbins #endif
403ea0fabbcSTim J. Robbins 		PROC_LOCK(p);
404ea0fabbcSTim J. Robbins 		sigexit(td, SIGILL);
405ea0fabbcSTim J. Robbins 	}
406ea0fabbcSTim J. Robbins 
407ea0fabbcSTim J. Robbins 	/*
408ea0fabbcSTim J. Robbins 	 * Build context to run handler in.
409ea0fabbcSTim J. Robbins 	 */
410ea0fabbcSTim J. Robbins 	regs->tf_rsp = PTROUT(fp);
411ea0fabbcSTim J. Robbins 	regs->tf_rip = LINUX32_PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
412ea0fabbcSTim J. Robbins 	    linux_sznonrtsigcode;
41322eca0bfSKonstantin Belousov 	regs->tf_rflags &= ~(PSL_T | PSL_D);
414ea0fabbcSTim J. Robbins 	regs->tf_cs = _ucode32sel;
415ea0fabbcSTim J. Robbins 	regs->tf_ss = _udatasel;
416ea0fabbcSTim J. Robbins 	load_ds(_udatasel);
417ea0fabbcSTim J. Robbins 	td->td_pcb->pcb_ds = _udatasel;
418ea0fabbcSTim J. Robbins 	load_es(_udatasel);
419ea0fabbcSTim J. Robbins 	td->td_pcb->pcb_es = _udatasel;
4209c5b213eSJung-uk Kim 	/* leave user %fs and %gs untouched */
421ea0fabbcSTim J. Robbins 	PROC_LOCK(p);
422ea0fabbcSTim J. Robbins 	mtx_lock(&psp->ps_mtx);
423ea0fabbcSTim J. Robbins }
424ea0fabbcSTim J. Robbins 
425ea0fabbcSTim J. Robbins 
426ea0fabbcSTim J. Robbins /*
427ea0fabbcSTim J. Robbins  * Send an interrupt to process.
428ea0fabbcSTim J. Robbins  *
429ea0fabbcSTim J. Robbins  * Stack is set up to allow sigcode stored
430ea0fabbcSTim J. Robbins  * in u. to call routine, followed by kcall
431ea0fabbcSTim J. Robbins  * to sigreturn routine below.  After sigreturn
432ea0fabbcSTim J. Robbins  * resets the signal mask, the stack, and the
433ea0fabbcSTim J. Robbins  * frame pointer, it returns to the user
434ea0fabbcSTim J. Robbins  * specified pc, psl.
435ea0fabbcSTim J. Robbins  */
436ea0fabbcSTim J. Robbins static void
4379104847fSDavid Xu linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
438ea0fabbcSTim J. Robbins {
439ea0fabbcSTim J. Robbins 	struct thread *td = curthread;
440ea0fabbcSTim J. Robbins 	struct proc *p = td->td_proc;
441ea0fabbcSTim J. Robbins 	struct sigacts *psp;
442ea0fabbcSTim J. Robbins 	struct trapframe *regs;
443ea0fabbcSTim J. Robbins 	struct l_sigframe *fp, frame;
444ea0fabbcSTim J. Robbins 	l_sigset_t lmask;
445ea0fabbcSTim J. Robbins 	int oonstack, i;
4469104847fSDavid Xu 	int sig, code;
447ea0fabbcSTim J. Robbins 
4489104847fSDavid Xu 	sig = ksi->ksi_signo;
4499104847fSDavid Xu 	code = ksi->ksi_code;
450ea0fabbcSTim J. Robbins 	PROC_LOCK_ASSERT(p, MA_OWNED);
451ea0fabbcSTim J. Robbins 	psp = p->p_sigacts;
452ea0fabbcSTim J. Robbins 	mtx_assert(&psp->ps_mtx, MA_OWNED);
453ea0fabbcSTim J. Robbins 	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
454ea0fabbcSTim J. Robbins 		/* Signal handler installed with SA_SIGINFO. */
4559104847fSDavid Xu 		linux_rt_sendsig(catcher, ksi, mask);
456ea0fabbcSTim J. Robbins 		return;
457ea0fabbcSTim J. Robbins 	}
458ea0fabbcSTim J. Robbins 
459ea0fabbcSTim J. Robbins 	regs = td->td_frame;
460ea0fabbcSTim J. Robbins 	oonstack = sigonstack(regs->tf_rsp);
461ea0fabbcSTim J. Robbins 
462ea0fabbcSTim J. Robbins #ifdef DEBUG
463ea0fabbcSTim J. Robbins 	if (ldebug(sendsig))
464728ef954SJohn Baldwin 		printf(ARGS(sendsig, "%p, %d, %p, %u"),
465ea0fabbcSTim J. Robbins 		    catcher, sig, (void*)mask, code);
466ea0fabbcSTim J. Robbins #endif
467ea0fabbcSTim J. Robbins 
468ea0fabbcSTim J. Robbins 	/*
469ea0fabbcSTim J. Robbins 	 * Allocate space for the signal handler context.
470ea0fabbcSTim J. Robbins 	 */
471ea0fabbcSTim J. Robbins 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
472ea0fabbcSTim J. Robbins 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
473ea0fabbcSTim J. Robbins 		fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
474ea0fabbcSTim J. Robbins 		    td->td_sigstk.ss_size - sizeof(struct l_sigframe));
475ea0fabbcSTim J. Robbins 	} else
476ea0fabbcSTim J. Robbins 		fp = (struct l_sigframe *)regs->tf_rsp - 1;
477ea0fabbcSTim J. Robbins 	mtx_unlock(&psp->ps_mtx);
478ea0fabbcSTim J. Robbins 	PROC_UNLOCK(p);
479ea0fabbcSTim J. Robbins 
480ea0fabbcSTim J. Robbins 	/*
481ea0fabbcSTim J. Robbins 	 * Build the argument list for the signal handler.
482ea0fabbcSTim J. Robbins 	 */
483ea0fabbcSTim J. Robbins 	if (p->p_sysent->sv_sigtbl)
484ea0fabbcSTim J. Robbins 		if (sig <= p->p_sysent->sv_sigsize)
485ea0fabbcSTim J. Robbins 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
486ea0fabbcSTim J. Robbins 
487ea0fabbcSTim J. Robbins 	bzero(&frame, sizeof(frame));
488ea0fabbcSTim J. Robbins 
489ea0fabbcSTim J. Robbins 	frame.sf_handler = PTROUT(catcher);
490ea0fabbcSTim J. Robbins 	frame.sf_sig = sig;
491ea0fabbcSTim J. Robbins 
492ea0fabbcSTim J. Robbins 	bsd_to_linux_sigset(mask, &lmask);
493ea0fabbcSTim J. Robbins 
494ea0fabbcSTim J. Robbins 	/*
495ea0fabbcSTim J. Robbins 	 * Build the signal context to be used by sigreturn.
496ea0fabbcSTim J. Robbins 	 */
497ea0fabbcSTim J. Robbins 	frame.sf_sc.sc_mask   = lmask.__bits[0];
498ea0fabbcSTim J. Robbins 	frame.sf_sc.sc_gs     = rgs();
499ea0fabbcSTim J. Robbins 	frame.sf_sc.sc_fs     = rfs();
500e6493bbeSDavid E. O'Brien 	__asm __volatile("mov %%es,%0" : "=rm" (frame.sf_sc.sc_es));
501e6493bbeSDavid E. O'Brien 	__asm __volatile("mov %%ds,%0" : "=rm" (frame.sf_sc.sc_ds));
502ea0fabbcSTim J. Robbins 	frame.sf_sc.sc_edi    = regs->tf_rdi;
503ea0fabbcSTim J. Robbins 	frame.sf_sc.sc_esi    = regs->tf_rsi;
504ea0fabbcSTim J. Robbins 	frame.sf_sc.sc_ebp    = regs->tf_rbp;
505ea0fabbcSTim J. Robbins 	frame.sf_sc.sc_ebx    = regs->tf_rbx;
506ea0fabbcSTim J. Robbins 	frame.sf_sc.sc_edx    = regs->tf_rdx;
507ea0fabbcSTim J. Robbins 	frame.sf_sc.sc_ecx    = regs->tf_rcx;
508ea0fabbcSTim J. Robbins 	frame.sf_sc.sc_eax    = regs->tf_rax;
509ea0fabbcSTim J. Robbins 	frame.sf_sc.sc_eip    = regs->tf_rip;
510ea0fabbcSTim J. Robbins 	frame.sf_sc.sc_cs     = regs->tf_cs;
511ea0fabbcSTim J. Robbins 	frame.sf_sc.sc_eflags = regs->tf_rflags;
512ea0fabbcSTim J. Robbins 	frame.sf_sc.sc_esp_at_signal = regs->tf_rsp;
513ea0fabbcSTim J. Robbins 	frame.sf_sc.sc_ss     = regs->tf_ss;
514ea0fabbcSTim J. Robbins 	frame.sf_sc.sc_err    = regs->tf_err;
51596a2b635SKonstantin Belousov 	frame.sf_sc.sc_cr2    = (u_int32_t)(uintptr_t)ksi->ksi_addr;
516ea0fabbcSTim J. Robbins 	frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code);
517ea0fabbcSTim J. Robbins 
518ea0fabbcSTim J. Robbins 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
519ea0fabbcSTim J. Robbins 		frame.sf_extramask[i] = lmask.__bits[i+1];
520ea0fabbcSTim J. Robbins 
521ea0fabbcSTim J. Robbins 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
522ea0fabbcSTim J. Robbins 		/*
523ea0fabbcSTim J. Robbins 		 * Process has trashed its stack; give it an illegal
524ea0fabbcSTim J. Robbins 		 * instruction to halt it in its tracks.
525ea0fabbcSTim J. Robbins 		 */
526ea0fabbcSTim J. Robbins 		PROC_LOCK(p);
527ea0fabbcSTim J. Robbins 		sigexit(td, SIGILL);
528ea0fabbcSTim J. Robbins 	}
529ea0fabbcSTim J. Robbins 
530ea0fabbcSTim J. Robbins 	/*
531ea0fabbcSTim J. Robbins 	 * Build context to run handler in.
532ea0fabbcSTim J. Robbins 	 */
533ea0fabbcSTim J. Robbins 	regs->tf_rsp = PTROUT(fp);
534ea0fabbcSTim J. Robbins 	regs->tf_rip = LINUX32_PS_STRINGS - *(p->p_sysent->sv_szsigcode);
53522eca0bfSKonstantin Belousov 	regs->tf_rflags &= ~(PSL_T | PSL_D);
536ea0fabbcSTim J. Robbins 	regs->tf_cs = _ucode32sel;
537ea0fabbcSTim J. Robbins 	regs->tf_ss = _udatasel;
538ea0fabbcSTim J. Robbins 	load_ds(_udatasel);
539ea0fabbcSTim J. Robbins 	td->td_pcb->pcb_ds = _udatasel;
540ea0fabbcSTim J. Robbins 	load_es(_udatasel);
541ea0fabbcSTim J. Robbins 	td->td_pcb->pcb_es = _udatasel;
5429c5b213eSJung-uk Kim 	/* leave user %fs and %gs untouched */
543ea0fabbcSTim J. Robbins 	PROC_LOCK(p);
544ea0fabbcSTim J. Robbins 	mtx_lock(&psp->ps_mtx);
545ea0fabbcSTim J. Robbins }
546ea0fabbcSTim J. Robbins 
547ea0fabbcSTim J. Robbins /*
548ea0fabbcSTim J. Robbins  * System call to cleanup state after a signal
549ea0fabbcSTim J. Robbins  * has been taken.  Reset signal mask and
550ea0fabbcSTim J. Robbins  * stack state from context left by sendsig (above).
551ea0fabbcSTim J. Robbins  * Return to previous pc and psl as specified by
552ea0fabbcSTim J. Robbins  * context left by sendsig. Check carefully to
553ea0fabbcSTim J. Robbins  * make sure that the user has not modified the
554ea0fabbcSTim J. Robbins  * psl to gain improper privileges or to cause
555ea0fabbcSTim J. Robbins  * a machine fault.
556ea0fabbcSTim J. Robbins  */
557ea0fabbcSTim J. Robbins int
558ea0fabbcSTim J. Robbins linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
559ea0fabbcSTim J. Robbins {
560ea0fabbcSTim J. Robbins 	struct proc *p = td->td_proc;
561ea0fabbcSTim J. Robbins 	struct l_sigframe frame;
562ea0fabbcSTim J. Robbins 	struct trapframe *regs;
563ea0fabbcSTim J. Robbins 	l_sigset_t lmask;
564ea0fabbcSTim J. Robbins 	int eflags, i;
5659104847fSDavid Xu 	ksiginfo_t ksi;
566ea0fabbcSTim J. Robbins 
567ea0fabbcSTim J. Robbins 	regs = td->td_frame;
568ea0fabbcSTim J. Robbins 
569ea0fabbcSTim J. Robbins #ifdef DEBUG
570ea0fabbcSTim J. Robbins 	if (ldebug(sigreturn))
571ea0fabbcSTim J. Robbins 		printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
572ea0fabbcSTim J. Robbins #endif
573ea0fabbcSTim J. Robbins 	/*
574ea0fabbcSTim J. Robbins 	 * The trampoline code hands us the sigframe.
575ea0fabbcSTim J. Robbins 	 * It is unsafe to keep track of it ourselves, in the event that a
576ea0fabbcSTim J. Robbins 	 * program jumps out of a signal handler.
577ea0fabbcSTim J. Robbins 	 */
578ea0fabbcSTim J. Robbins 	if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
579ea0fabbcSTim J. Robbins 		return (EFAULT);
580ea0fabbcSTim J. Robbins 
581ea0fabbcSTim J. Robbins 	/*
582ea0fabbcSTim J. Robbins 	 * Check for security violations.
583ea0fabbcSTim J. Robbins 	 */
584ea0fabbcSTim J. Robbins #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
585ea0fabbcSTim J. Robbins 	eflags = frame.sf_sc.sc_eflags;
586ea0fabbcSTim J. Robbins 	/*
587ea0fabbcSTim J. Robbins 	 * XXX do allow users to change the privileged flag PSL_RF.  The
588ea0fabbcSTim J. Robbins 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
589ea0fabbcSTim J. Robbins 	 * sometimes set it there too.  tf_eflags is kept in the signal
590ea0fabbcSTim J. Robbins 	 * context during signal handling and there is no other place
591ea0fabbcSTim J. Robbins 	 * to remember it, so the PSL_RF bit may be corrupted by the
592ea0fabbcSTim J. Robbins 	 * signal handler without us knowing.  Corruption of the PSL_RF
593ea0fabbcSTim J. Robbins 	 * bit at worst causes one more or one less debugger trap, so
594ea0fabbcSTim J. Robbins 	 * allowing it is fairly harmless.
595ea0fabbcSTim J. Robbins 	 */
596ea0fabbcSTim J. Robbins 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF))
597ea0fabbcSTim J. Robbins 		return(EINVAL);
598ea0fabbcSTim J. Robbins 
599ea0fabbcSTim J. Robbins 	/*
600ea0fabbcSTim J. Robbins 	 * Don't allow users to load a valid privileged %cs.  Let the
601ea0fabbcSTim J. Robbins 	 * hardware check for invalid selectors, excess privilege in
602ea0fabbcSTim J. Robbins 	 * other selectors, invalid %eip's and invalid %esp's.
603ea0fabbcSTim J. Robbins 	 */
604ea0fabbcSTim J. Robbins #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
605ea0fabbcSTim J. Robbins 	if (!CS_SECURE(frame.sf_sc.sc_cs)) {
6069104847fSDavid Xu 		ksiginfo_init_trap(&ksi);
6079104847fSDavid Xu 		ksi.ksi_signo = SIGBUS;
6089104847fSDavid Xu 		ksi.ksi_code = BUS_OBJERR;
6099104847fSDavid Xu 		ksi.ksi_trapno = T_PROTFLT;
6109104847fSDavid Xu 		ksi.ksi_addr = (void *)regs->tf_rip;
6119104847fSDavid Xu 		trapsignal(td, &ksi);
612ea0fabbcSTim J. Robbins 		return(EINVAL);
613ea0fabbcSTim J. Robbins 	}
614ea0fabbcSTim J. Robbins 
615ea0fabbcSTim J. Robbins 	lmask.__bits[0] = frame.sf_sc.sc_mask;
616ea0fabbcSTim J. Robbins 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
617ea0fabbcSTim J. Robbins 		lmask.__bits[i+1] = frame.sf_extramask[i];
618ea0fabbcSTim J. Robbins 	PROC_LOCK(p);
619ea0fabbcSTim J. Robbins 	linux_to_bsd_sigset(&lmask, &td->td_sigmask);
620ea0fabbcSTim J. Robbins 	SIG_CANTMASK(td->td_sigmask);
621ea0fabbcSTim J. Robbins 	signotify(td);
622ea0fabbcSTim J. Robbins 	PROC_UNLOCK(p);
623ea0fabbcSTim J. Robbins 
624ea0fabbcSTim J. Robbins 	/*
625ea0fabbcSTim J. Robbins 	 * Restore signal context.
626ea0fabbcSTim J. Robbins 	 */
627ea0fabbcSTim J. Robbins 	/* Selectors were restored by the trampoline. */
628ea0fabbcSTim J. Robbins 	regs->tf_rdi    = frame.sf_sc.sc_edi;
629ea0fabbcSTim J. Robbins 	regs->tf_rsi    = frame.sf_sc.sc_esi;
630ea0fabbcSTim J. Robbins 	regs->tf_rbp    = frame.sf_sc.sc_ebp;
631ea0fabbcSTim J. Robbins 	regs->tf_rbx    = frame.sf_sc.sc_ebx;
632ea0fabbcSTim J. Robbins 	regs->tf_rdx    = frame.sf_sc.sc_edx;
633ea0fabbcSTim J. Robbins 	regs->tf_rcx    = frame.sf_sc.sc_ecx;
634ea0fabbcSTim J. Robbins 	regs->tf_rax    = frame.sf_sc.sc_eax;
635ea0fabbcSTim J. Robbins 	regs->tf_rip    = frame.sf_sc.sc_eip;
636ea0fabbcSTim J. Robbins 	regs->tf_cs     = frame.sf_sc.sc_cs;
637ea0fabbcSTim J. Robbins 	regs->tf_rflags = eflags;
638ea0fabbcSTim J. Robbins 	regs->tf_rsp    = frame.sf_sc.sc_esp_at_signal;
639ea0fabbcSTim J. Robbins 	regs->tf_ss     = frame.sf_sc.sc_ss;
640ea0fabbcSTim J. Robbins 
641ea0fabbcSTim J. Robbins 	return (EJUSTRETURN);
642ea0fabbcSTim J. Robbins }
643ea0fabbcSTim J. Robbins 
644ea0fabbcSTim J. Robbins /*
645ea0fabbcSTim J. Robbins  * System call to cleanup state after a signal
646ea0fabbcSTim J. Robbins  * has been taken.  Reset signal mask and
647ea0fabbcSTim J. Robbins  * stack state from context left by rt_sendsig (above).
648ea0fabbcSTim J. Robbins  * Return to previous pc and psl as specified by
649ea0fabbcSTim J. Robbins  * context left by sendsig. Check carefully to
650ea0fabbcSTim J. Robbins  * make sure that the user has not modified the
651ea0fabbcSTim J. Robbins  * psl to gain improper privileges or to cause
652ea0fabbcSTim J. Robbins  * a machine fault.
653ea0fabbcSTim J. Robbins  */
654ea0fabbcSTim J. Robbins int
655ea0fabbcSTim J. Robbins linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
656ea0fabbcSTim J. Robbins {
657ea0fabbcSTim J. Robbins 	struct proc *p = td->td_proc;
658ea0fabbcSTim J. Robbins 	struct l_ucontext uc;
659ea0fabbcSTim J. Robbins 	struct l_sigcontext *context;
660ea0fabbcSTim J. Robbins 	l_stack_t *lss;
661ea0fabbcSTim J. Robbins 	stack_t ss;
662ea0fabbcSTim J. Robbins 	struct trapframe *regs;
663ea0fabbcSTim J. Robbins 	int eflags;
6649104847fSDavid Xu 	ksiginfo_t ksi;
665ea0fabbcSTim J. Robbins 
666ea0fabbcSTim J. Robbins 	regs = td->td_frame;
667ea0fabbcSTim J. Robbins 
668ea0fabbcSTim J. Robbins #ifdef DEBUG
669ea0fabbcSTim J. Robbins 	if (ldebug(rt_sigreturn))
670ea0fabbcSTim J. Robbins 		printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
671ea0fabbcSTim J. Robbins #endif
672ea0fabbcSTim J. Robbins 	/*
673ea0fabbcSTim J. Robbins 	 * The trampoline code hands us the ucontext.
674ea0fabbcSTim J. Robbins 	 * It is unsafe to keep track of it ourselves, in the event that a
675ea0fabbcSTim J. Robbins 	 * program jumps out of a signal handler.
676ea0fabbcSTim J. Robbins 	 */
677ea0fabbcSTim J. Robbins 	if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
678ea0fabbcSTim J. Robbins 		return (EFAULT);
679ea0fabbcSTim J. Robbins 
680ea0fabbcSTim J. Robbins 	context = &uc.uc_mcontext;
681ea0fabbcSTim J. Robbins 
682ea0fabbcSTim J. Robbins 	/*
683ea0fabbcSTim J. Robbins 	 * Check for security violations.
684ea0fabbcSTim J. Robbins 	 */
685ea0fabbcSTim J. Robbins #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
686ea0fabbcSTim J. Robbins 	eflags = context->sc_eflags;
687ea0fabbcSTim J. Robbins 	/*
688ea0fabbcSTim J. Robbins 	 * XXX do allow users to change the privileged flag PSL_RF.  The
689ea0fabbcSTim J. Robbins 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
690ea0fabbcSTim J. Robbins 	 * sometimes set it there too.  tf_eflags is kept in the signal
691ea0fabbcSTim J. Robbins 	 * context during signal handling and there is no other place
692ea0fabbcSTim J. Robbins 	 * to remember it, so the PSL_RF bit may be corrupted by the
693ea0fabbcSTim J. Robbins 	 * signal handler without us knowing.  Corruption of the PSL_RF
694ea0fabbcSTim J. Robbins 	 * bit at worst causes one more or one less debugger trap, so
695ea0fabbcSTim J. Robbins 	 * allowing it is fairly harmless.
696ea0fabbcSTim J. Robbins 	 */
697ea0fabbcSTim J. Robbins 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF))
698ea0fabbcSTim J. Robbins 		return(EINVAL);
699ea0fabbcSTim J. Robbins 
700ea0fabbcSTim J. Robbins 	/*
701ea0fabbcSTim J. Robbins 	 * Don't allow users to load a valid privileged %cs.  Let the
702ea0fabbcSTim J. Robbins 	 * hardware check for invalid selectors, excess privilege in
703ea0fabbcSTim J. Robbins 	 * other selectors, invalid %eip's and invalid %esp's.
704ea0fabbcSTim J. Robbins 	 */
705ea0fabbcSTim J. Robbins #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
706ea0fabbcSTim J. Robbins 	if (!CS_SECURE(context->sc_cs)) {
7079104847fSDavid Xu 		ksiginfo_init_trap(&ksi);
7089104847fSDavid Xu 		ksi.ksi_signo = SIGBUS;
7099104847fSDavid Xu 		ksi.ksi_code = BUS_OBJERR;
7109104847fSDavid Xu 		ksi.ksi_trapno = T_PROTFLT;
7119104847fSDavid Xu 		ksi.ksi_addr = (void *)regs->tf_rip;
7129104847fSDavid Xu 		trapsignal(td, &ksi);
713ea0fabbcSTim J. Robbins 		return(EINVAL);
714ea0fabbcSTim J. Robbins 	}
715ea0fabbcSTim J. Robbins 
716ea0fabbcSTim J. Robbins 	PROC_LOCK(p);
717ea0fabbcSTim J. Robbins 	linux_to_bsd_sigset(&uc.uc_sigmask, &td->td_sigmask);
718ea0fabbcSTim J. Robbins 	SIG_CANTMASK(td->td_sigmask);
719ea0fabbcSTim J. Robbins 	signotify(td);
720ea0fabbcSTim J. Robbins 	PROC_UNLOCK(p);
721ea0fabbcSTim J. Robbins 
722ea0fabbcSTim J. Robbins 	/*
723ea0fabbcSTim J. Robbins 	 * Restore signal context
724ea0fabbcSTim J. Robbins 	 */
725ea0fabbcSTim J. Robbins 	/* Selectors were restored by the trampoline. */
726ea0fabbcSTim J. Robbins 	regs->tf_rdi    = context->sc_edi;
727ea0fabbcSTim J. Robbins 	regs->tf_rsi    = context->sc_esi;
728ea0fabbcSTim J. Robbins 	regs->tf_rbp    = context->sc_ebp;
729ea0fabbcSTim J. Robbins 	regs->tf_rbx    = context->sc_ebx;
730ea0fabbcSTim J. Robbins 	regs->tf_rdx    = context->sc_edx;
731ea0fabbcSTim J. Robbins 	regs->tf_rcx    = context->sc_ecx;
732ea0fabbcSTim J. Robbins 	regs->tf_rax    = context->sc_eax;
733ea0fabbcSTim J. Robbins 	regs->tf_rip    = context->sc_eip;
734ea0fabbcSTim J. Robbins 	regs->tf_cs     = context->sc_cs;
735ea0fabbcSTim J. Robbins 	regs->tf_rflags = eflags;
736ea0fabbcSTim J. Robbins 	regs->tf_rsp    = context->sc_esp_at_signal;
737ea0fabbcSTim J. Robbins 	regs->tf_ss     = context->sc_ss;
738ea0fabbcSTim J. Robbins 
739ea0fabbcSTim J. Robbins 	/*
740ea0fabbcSTim J. Robbins 	 * call sigaltstack & ignore results..
741ea0fabbcSTim J. Robbins 	 */
742ea0fabbcSTim J. Robbins 	lss = &uc.uc_stack;
743ea0fabbcSTim J. Robbins 	ss.ss_sp = PTRIN(lss->ss_sp);
744ea0fabbcSTim J. Robbins 	ss.ss_size = lss->ss_size;
745ea0fabbcSTim J. Robbins 	ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
746ea0fabbcSTim J. Robbins 
747ea0fabbcSTim J. Robbins #ifdef DEBUG
748ea0fabbcSTim J. Robbins 	if (ldebug(rt_sigreturn))
749c680f6b1SDavid E. O'Brien 		printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
750ea0fabbcSTim J. Robbins 		    ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
751ea0fabbcSTim J. Robbins #endif
752ea0fabbcSTim J. Robbins 	(void)kern_sigaltstack(td, &ss, NULL);
753ea0fabbcSTim J. Robbins 
754ea0fabbcSTim J. Robbins 	return (EJUSTRETURN);
755ea0fabbcSTim J. Robbins }
756ea0fabbcSTim J. Robbins 
757ea0fabbcSTim J. Robbins /*
758ea0fabbcSTim J. Robbins  * MPSAFE
759ea0fabbcSTim J. Robbins  */
760ea0fabbcSTim J. Robbins static void
761ea0fabbcSTim J. Robbins linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
762ea0fabbcSTim J. Robbins {
763ea0fabbcSTim J. Robbins 	args[0] = tf->tf_rbx;
764ea0fabbcSTim J. Robbins 	args[1] = tf->tf_rcx;
765ea0fabbcSTim J. Robbins 	args[2] = tf->tf_rdx;
766ea0fabbcSTim J. Robbins 	args[3] = tf->tf_rsi;
767ea0fabbcSTim J. Robbins 	args[4] = tf->tf_rdi;
768ea0fabbcSTim J. Robbins 	args[5] = tf->tf_rbp;	/* Unconfirmed */
769ea0fabbcSTim J. Robbins 	*params = NULL;		/* no copyin */
770ea0fabbcSTim J. Robbins }
771ea0fabbcSTim J. Robbins 
772ea0fabbcSTim J. Robbins /*
773ea0fabbcSTim J. Robbins  * If a linux binary is exec'ing something, try this image activator
774ea0fabbcSTim J. Robbins  * first.  We override standard shell script execution in order to
775ea0fabbcSTim J. Robbins  * be able to modify the interpreter path.  We only do this if a linux
776ea0fabbcSTim J. Robbins  * binary is doing the exec, so we do not create an EXEC module for it.
777ea0fabbcSTim J. Robbins  */
778ea0fabbcSTim J. Robbins static int	exec_linux_imgact_try(struct image_params *iparams);
779ea0fabbcSTim J. Robbins 
780ea0fabbcSTim J. Robbins static int
781ea0fabbcSTim J. Robbins exec_linux_imgact_try(struct image_params *imgp)
782ea0fabbcSTim J. Robbins {
783ea0fabbcSTim J. Robbins 	const char *head = (const char *)imgp->image_header;
7841d15fdd9SJohn Baldwin 	char *rpath;
7851d15fdd9SJohn Baldwin 	int error = -1, len;
786ea0fabbcSTim J. Robbins 
787ea0fabbcSTim J. Robbins 	/*
788ea0fabbcSTim J. Robbins 	* The interpreter for shell scripts run from a linux binary needs
789ea0fabbcSTim J. Robbins 	* to be located in /compat/linux if possible in order to recursively
790ea0fabbcSTim J. Robbins 	* maintain linux path emulation.
791ea0fabbcSTim J. Robbins 	*/
792ea0fabbcSTim J. Robbins 	if (((const short *)head)[0] == SHELLMAGIC) {
793ea0fabbcSTim J. Robbins 		/*
794ea0fabbcSTim J. Robbins 		* Run our normal shell image activator.  If it succeeds attempt
795d065e13dSDavid E. O'Brien 		* to use the alternate path for the interpreter.  If an
796d065e13dSDavid E. O'Brien 		* alternate * path is found, use our stringspace to store it.
797ea0fabbcSTim J. Robbins 		*/
798ea0fabbcSTim J. Robbins 		if ((error = exec_shell_imgact(imgp)) == 0) {
7991d15fdd9SJohn Baldwin 			linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
800d065e13dSDavid E. O'Brien 			    imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0,
801d065e13dSDavid E. O'Brien 			    AT_FDCWD);
8021d15fdd9SJohn Baldwin 			if (rpath != NULL) {
8031d15fdd9SJohn Baldwin 				len = strlen(rpath) + 1;
804ea0fabbcSTim J. Robbins 
805ea0fabbcSTim J. Robbins 				if (len <= MAXSHELLCMDLEN) {
806d065e13dSDavid E. O'Brien 					memcpy(imgp->interpreter_name, rpath,
807d065e13dSDavid E. O'Brien 					    len);
808ea0fabbcSTim J. Robbins 				}
809ea0fabbcSTim J. Robbins 				free(rpath, M_TEMP);
810ea0fabbcSTim J. Robbins 			}
811ea0fabbcSTim J. Robbins 		}
812ea0fabbcSTim J. Robbins 	}
813ea0fabbcSTim J. Robbins 	return(error);
814ea0fabbcSTim J. Robbins }
815ea0fabbcSTim J. Robbins 
816ea0fabbcSTim J. Robbins /*
817ea0fabbcSTim J. Robbins  * Clear registers on exec
818ea0fabbcSTim J. Robbins  * XXX copied from ia32_signal.c.
819ea0fabbcSTim J. Robbins  */
820ea0fabbcSTim J. Robbins static void
821ea0fabbcSTim J. Robbins exec_linux_setregs(td, entry, stack, ps_strings)
822ea0fabbcSTim J. Robbins 	struct thread *td;
823ea0fabbcSTim J. Robbins 	u_long entry;
824ea0fabbcSTim J. Robbins 	u_long stack;
825ea0fabbcSTim J. Robbins 	u_long ps_strings;
826ea0fabbcSTim J. Robbins {
827ea0fabbcSTim J. Robbins 	struct trapframe *regs = td->td_frame;
828ea0fabbcSTim J. Robbins 	struct pcb *pcb = td->td_pcb;
829ea0fabbcSTim J. Robbins 
8309c5b213eSJung-uk Kim 	critical_enter();
831ea0fabbcSTim J. Robbins 	wrmsr(MSR_FSBASE, 0);
832ea0fabbcSTim J. Robbins 	wrmsr(MSR_KGSBASE, 0);	/* User value while we're in the kernel */
833ea0fabbcSTim J. Robbins 	pcb->pcb_fsbase = 0;
834ea0fabbcSTim J. Robbins 	pcb->pcb_gsbase = 0;
8359c5b213eSJung-uk Kim 	critical_exit();
836ea0fabbcSTim J. Robbins 	load_ds(_udatasel);
837ea0fabbcSTim J. Robbins 	load_es(_udatasel);
838ea0fabbcSTim J. Robbins 	load_fs(_udatasel);
8399c5b213eSJung-uk Kim 	load_gs(_udatasel);
840ea0fabbcSTim J. Robbins 	pcb->pcb_ds = _udatasel;
841ea0fabbcSTim J. Robbins 	pcb->pcb_es = _udatasel;
842ea0fabbcSTim J. Robbins 	pcb->pcb_fs = _udatasel;
8439c5b213eSJung-uk Kim 	pcb->pcb_gs = _udatasel;
8442ee8325fSJohn Baldwin 	pcb->pcb_initial_fpucw = __LINUX_NPXCW__;
845ea0fabbcSTim J. Robbins 
846ea0fabbcSTim J. Robbins 	bzero((char *)regs, sizeof(struct trapframe));
847ea0fabbcSTim J. Robbins 	regs->tf_rip = entry;
848ea0fabbcSTim J. Robbins 	regs->tf_rsp = stack;
849ea0fabbcSTim J. Robbins 	regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T);
850ea0fabbcSTim J. Robbins 	regs->tf_ss = _udatasel;
851ea0fabbcSTim J. Robbins 	regs->tf_cs = _ucode32sel;
852ea0fabbcSTim J. Robbins 	regs->tf_rbx = ps_strings;
853ea0fabbcSTim J. Robbins 	load_cr0(rcr0() | CR0_MP | CR0_TS);
8542a988f7cSStephan Uphoff 	fpstate_drop(td);
855ea0fabbcSTim J. Robbins 
856ea0fabbcSTim J. Robbins 	/* Return via doreti so that we can change to a different %cs */
857f1f0dd9eSKonstantin Belousov 	pcb->pcb_flags |= PCB_FULLCTX | PCB_32BIT;
858f1f0dd9eSKonstantin Belousov 	pcb->pcb_flags &= ~PCB_GS32BIT;
859ea0fabbcSTim J. Robbins 	td->td_retval[1] = 0;
860ea0fabbcSTim J. Robbins }
861ea0fabbcSTim J. Robbins 
862ea0fabbcSTim J. Robbins /*
863ea0fabbcSTim J. Robbins  * XXX copied from ia32_sysvec.c.
864ea0fabbcSTim J. Robbins  */
865ea0fabbcSTim J. Robbins static register_t *
866ea0fabbcSTim J. Robbins linux_copyout_strings(struct image_params *imgp)
867ea0fabbcSTim J. Robbins {
868ea0fabbcSTim J. Robbins 	int argc, envc;
869ea0fabbcSTim J. Robbins 	u_int32_t *vectp;
870ea0fabbcSTim J. Robbins 	char *stringp, *destp;
871ea0fabbcSTim J. Robbins 	u_int32_t *stack_base;
872ea0fabbcSTim J. Robbins 	struct linux32_ps_strings *arginfo;
873ea0fabbcSTim J. Robbins 
874ea0fabbcSTim J. Robbins 	/*
875ea0fabbcSTim J. Robbins 	 * Calculate string base and vector table pointers.
876ea0fabbcSTim J. Robbins 	 * Also deal with signal trampoline code for this exec type.
877ea0fabbcSTim J. Robbins 	 */
878ea0fabbcSTim J. Robbins 	arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS;
8794d7c2e8aSDmitry Chagin 	destp =	(caddr_t)arginfo - linux_szsigcode - SPARE_USRSPACE -
8804d7c2e8aSDmitry Chagin 	    linux_szplatform - roundup((ARG_MAX - imgp->args->stringspace),
8814d7c2e8aSDmitry Chagin 	    sizeof(char *));
882ea0fabbcSTim J. Robbins 
883ea0fabbcSTim J. Robbins 	/*
884ea0fabbcSTim J. Robbins 	 * install sigcode
885ea0fabbcSTim J. Robbins 	 */
886ea0fabbcSTim J. Robbins 	copyout(imgp->proc->p_sysent->sv_sigcode,
8874d7c2e8aSDmitry Chagin 	    ((caddr_t)arginfo - linux_szsigcode), linux_szsigcode);
8884d7c2e8aSDmitry Chagin 
8894d7c2e8aSDmitry Chagin 	/*
8904d7c2e8aSDmitry Chagin 	 * Install LINUX_PLATFORM
8914d7c2e8aSDmitry Chagin 	 */
8924d7c2e8aSDmitry Chagin 	copyout(linux_platform, ((caddr_t)arginfo - linux_szsigcode -
8934d7c2e8aSDmitry Chagin 	    linux_szplatform), linux_szplatform);
894ea0fabbcSTim J. Robbins 
895ea0fabbcSTim J. Robbins 	/*
896ea0fabbcSTim J. Robbins 	 * If we have a valid auxargs ptr, prepare some room
897ea0fabbcSTim J. Robbins 	 * on the stack.
898ea0fabbcSTim J. Robbins 	 */
899ea0fabbcSTim J. Robbins 	if (imgp->auxargs) {
900ea0fabbcSTim J. Robbins 		/*
901ea0fabbcSTim J. Robbins 		 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
902ea0fabbcSTim J. Robbins 		 * lower compatibility.
903ea0fabbcSTim J. Robbins 		 */
904d065e13dSDavid E. O'Brien 		imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size :
9054d7c2e8aSDmitry Chagin 		    (LINUX_AT_COUNT * 2);
906ea0fabbcSTim J. Robbins 		/*
907ea0fabbcSTim J. Robbins 		 * The '+ 2' is for the null pointers at the end of each of
908ea0fabbcSTim J. Robbins 		 * the arg and env vector sets,and imgp->auxarg_size is room
909ea0fabbcSTim J. Robbins 		 * for argument of Runtime loader.
910ea0fabbcSTim J. Robbins 		 */
911d065e13dSDavid E. O'Brien 		vectp = (u_int32_t *) (destp - (imgp->args->argc +
912d065e13dSDavid E. O'Brien 		    imgp->args->envc + 2 + imgp->auxarg_size) *
913d065e13dSDavid E. O'Brien 		    sizeof(u_int32_t));
914ea0fabbcSTim J. Robbins 
915ea0fabbcSTim J. Robbins 	} else
916ea0fabbcSTim J. Robbins 		/*
917ea0fabbcSTim J. Robbins 		 * The '+ 2' is for the null pointers at the end of each of
918ea0fabbcSTim J. Robbins 		 * the arg and env vector sets
919ea0fabbcSTim J. Robbins 		 */
920d065e13dSDavid E. O'Brien 		vectp = (u_int32_t *)(destp - (imgp->args->argc +
921d065e13dSDavid E. O'Brien 		    imgp->args->envc + 2) * sizeof(u_int32_t));
922ea0fabbcSTim J. Robbins 
923ea0fabbcSTim J. Robbins 	/*
924ea0fabbcSTim J. Robbins 	 * vectp also becomes our initial stack base
925ea0fabbcSTim J. Robbins 	 */
926ea0fabbcSTim J. Robbins 	stack_base = vectp;
927ea0fabbcSTim J. Robbins 
928610ecfe0SMaxim Sobolev 	stringp = imgp->args->begin_argv;
929610ecfe0SMaxim Sobolev 	argc = imgp->args->argc;
930610ecfe0SMaxim Sobolev 	envc = imgp->args->envc;
931ea0fabbcSTim J. Robbins 	/*
932ea0fabbcSTim J. Robbins 	 * Copy out strings - arguments and environment.
933ea0fabbcSTim J. Robbins 	 */
934610ecfe0SMaxim Sobolev 	copyout(stringp, destp, ARG_MAX - imgp->args->stringspace);
935ea0fabbcSTim J. Robbins 
936ea0fabbcSTim J. Robbins 	/*
937ea0fabbcSTim J. Robbins 	 * Fill in "ps_strings" struct for ps, w, etc.
938ea0fabbcSTim J. Robbins 	 */
9394d7c2e8aSDmitry Chagin 	suword32(&arginfo->ps_argvstr, (uint32_t)(intptr_t)vectp);
940ea0fabbcSTim J. Robbins 	suword32(&arginfo->ps_nargvstr, argc);
941ea0fabbcSTim J. Robbins 
942ea0fabbcSTim J. Robbins 	/*
943ea0fabbcSTim J. Robbins 	 * Fill in argument portion of vector table.
944ea0fabbcSTim J. Robbins 	 */
945ea0fabbcSTim J. Robbins 	for (; argc > 0; --argc) {
9464d7c2e8aSDmitry Chagin 		suword32(vectp++, (uint32_t)(intptr_t)destp);
947ea0fabbcSTim J. Robbins 		while (*stringp++ != 0)
948ea0fabbcSTim J. Robbins 			destp++;
949ea0fabbcSTim J. Robbins 		destp++;
950ea0fabbcSTim J. Robbins 	}
951ea0fabbcSTim J. Robbins 
952ea0fabbcSTim J. Robbins 	/* a null vector table pointer separates the argp's from the envp's */
953ea0fabbcSTim J. Robbins 	suword32(vectp++, 0);
954ea0fabbcSTim J. Robbins 
9554d7c2e8aSDmitry Chagin 	suword32(&arginfo->ps_envstr, (uint32_t)(intptr_t)vectp);
956ea0fabbcSTim J. Robbins 	suword32(&arginfo->ps_nenvstr, envc);
957ea0fabbcSTim J. Robbins 
958ea0fabbcSTim J. Robbins 	/*
959ea0fabbcSTim J. Robbins 	 * Fill in environment portion of vector table.
960ea0fabbcSTim J. Robbins 	 */
961ea0fabbcSTim J. Robbins 	for (; envc > 0; --envc) {
9624d7c2e8aSDmitry Chagin 		suword32(vectp++, (uint32_t)(intptr_t)destp);
963ea0fabbcSTim J. Robbins 		while (*stringp++ != 0)
964ea0fabbcSTim J. Robbins 			destp++;
965ea0fabbcSTim J. Robbins 		destp++;
966ea0fabbcSTim J. Robbins 	}
967ea0fabbcSTim J. Robbins 
968ea0fabbcSTim J. Robbins 	/* end of vector table is a null pointer */
969ea0fabbcSTim J. Robbins 	suword32(vectp, 0);
970ea0fabbcSTim J. Robbins 
971ea0fabbcSTim J. Robbins 	return ((register_t *)stack_base);
972ea0fabbcSTim J. Robbins }
973ea0fabbcSTim J. Robbins 
974ea0fabbcSTim J. Robbins SYSCTL_NODE(_compat, OID_AUTO, linux32, CTLFLAG_RW, 0,
975ea0fabbcSTim J. Robbins     "32-bit Linux emulation");
976ea0fabbcSTim J. Robbins 
977ea0fabbcSTim J. Robbins static u_long	linux32_maxdsiz = LINUX32_MAXDSIZ;
978ea0fabbcSTim J. Robbins SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxdsiz, CTLFLAG_RW,
979ea0fabbcSTim J. Robbins     &linux32_maxdsiz, 0, "");
980ea0fabbcSTim J. Robbins static u_long	linux32_maxssiz = LINUX32_MAXSSIZ;
981ea0fabbcSTim J. Robbins SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxssiz, CTLFLAG_RW,
982ea0fabbcSTim J. Robbins     &linux32_maxssiz, 0, "");
983ea0fabbcSTim J. Robbins static u_long	linux32_maxvmem = LINUX32_MAXVMEM;
984ea0fabbcSTim J. Robbins SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxvmem, CTLFLAG_RW,
985ea0fabbcSTim J. Robbins     &linux32_maxvmem, 0, "");
986ea0fabbcSTim J. Robbins 
987ea0fabbcSTim J. Robbins static void
98819059a13SJohn Baldwin linux32_fixlimit(struct rlimit *rl, int which)
989ea0fabbcSTim J. Robbins {
990ea0fabbcSTim J. Robbins 
99119059a13SJohn Baldwin 	switch (which) {
99219059a13SJohn Baldwin 	case RLIMIT_DATA:
993ea0fabbcSTim J. Robbins 		if (linux32_maxdsiz != 0) {
99419059a13SJohn Baldwin 			if (rl->rlim_cur > linux32_maxdsiz)
99519059a13SJohn Baldwin 				rl->rlim_cur = linux32_maxdsiz;
99619059a13SJohn Baldwin 			if (rl->rlim_max > linux32_maxdsiz)
99719059a13SJohn Baldwin 				rl->rlim_max = linux32_maxdsiz;
998ea0fabbcSTim J. Robbins 		}
99919059a13SJohn Baldwin 		break;
100019059a13SJohn Baldwin 	case RLIMIT_STACK:
1001ea0fabbcSTim J. Robbins 		if (linux32_maxssiz != 0) {
100219059a13SJohn Baldwin 			if (rl->rlim_cur > linux32_maxssiz)
100319059a13SJohn Baldwin 				rl->rlim_cur = linux32_maxssiz;
100419059a13SJohn Baldwin 			if (rl->rlim_max > linux32_maxssiz)
100519059a13SJohn Baldwin 				rl->rlim_max = linux32_maxssiz;
1006ea0fabbcSTim J. Robbins 		}
100719059a13SJohn Baldwin 		break;
100819059a13SJohn Baldwin 	case RLIMIT_VMEM:
1009ea0fabbcSTim J. Robbins 		if (linux32_maxvmem != 0) {
101019059a13SJohn Baldwin 			if (rl->rlim_cur > linux32_maxvmem)
101119059a13SJohn Baldwin 				rl->rlim_cur = linux32_maxvmem;
101219059a13SJohn Baldwin 			if (rl->rlim_max > linux32_maxvmem)
101319059a13SJohn Baldwin 				rl->rlim_max = linux32_maxvmem;
1014ea0fabbcSTim J. Robbins 		}
101519059a13SJohn Baldwin 		break;
101619059a13SJohn Baldwin 	}
1017ea0fabbcSTim J. Robbins }
1018ea0fabbcSTim J. Robbins 
1019ea0fabbcSTim J. Robbins struct sysentvec elf_linux_sysvec = {
1020a8d403e1SKonstantin Belousov 	.sv_size	= LINUX_SYS_MAXSYSCALL,
1021a8d403e1SKonstantin Belousov 	.sv_table	= linux_sysent,
1022a8d403e1SKonstantin Belousov 	.sv_mask	= 0,
1023a8d403e1SKonstantin Belousov 	.sv_sigsize	= LINUX_SIGTBLSZ,
1024a8d403e1SKonstantin Belousov 	.sv_sigtbl	= bsd_to_linux_signal,
1025a8d403e1SKonstantin Belousov 	.sv_errsize	= ELAST + 1,
1026a8d403e1SKonstantin Belousov 	.sv_errtbl	= bsd_to_linux_errno,
1027a8d403e1SKonstantin Belousov 	.sv_transtrap	= translate_traps,
1028a8d403e1SKonstantin Belousov 	.sv_fixup	= elf_linux_fixup,
1029a8d403e1SKonstantin Belousov 	.sv_sendsig	= linux_sendsig,
1030a8d403e1SKonstantin Belousov 	.sv_sigcode	= linux_sigcode,
1031a8d403e1SKonstantin Belousov 	.sv_szsigcode	= &linux_szsigcode,
1032a8d403e1SKonstantin Belousov 	.sv_prepsyscall	= linux_prepsyscall,
1033a8d403e1SKonstantin Belousov 	.sv_name	= "Linux ELF32",
1034a8d403e1SKonstantin Belousov 	.sv_coredump	= elf32_coredump,
1035a8d403e1SKonstantin Belousov 	.sv_imgact_try	= exec_linux_imgact_try,
1036a8d403e1SKonstantin Belousov 	.sv_minsigstksz	= LINUX_MINSIGSTKSZ,
1037a8d403e1SKonstantin Belousov 	.sv_pagesize	= PAGE_SIZE,
1038a8d403e1SKonstantin Belousov 	.sv_minuser	= VM_MIN_ADDRESS,
1039a8d403e1SKonstantin Belousov 	.sv_maxuser	= LINUX32_USRSTACK,
1040a8d403e1SKonstantin Belousov 	.sv_usrstack	= LINUX32_USRSTACK,
1041a8d403e1SKonstantin Belousov 	.sv_psstrings	= LINUX32_PS_STRINGS,
1042a8d403e1SKonstantin Belousov 	.sv_stackprot	= VM_PROT_ALL,
1043a8d403e1SKonstantin Belousov 	.sv_copyout_strings = linux_copyout_strings,
1044a8d403e1SKonstantin Belousov 	.sv_setregs	= exec_linux_setregs,
1045a8d403e1SKonstantin Belousov 	.sv_fixlimit	= linux32_fixlimit,
1046a8d403e1SKonstantin Belousov 	.sv_maxssiz	= &linux32_maxssiz,
1047b4cf0e62SKonstantin Belousov 	.sv_flags	= SV_ABI_LINUX | SV_ILP32 | SV_IA32
1048ea0fabbcSTim J. Robbins };
1049ea0fabbcSTim J. Robbins 
1050ea0fabbcSTim J. Robbins static Elf32_Brandinfo linux_brand = {
1051a8d403e1SKonstantin Belousov 	.brand		= ELFOSABI_LINUX,
1052a8d403e1SKonstantin Belousov 	.machine	= EM_386,
1053a8d403e1SKonstantin Belousov 	.compat_3_brand	= "Linux",
1054a8d403e1SKonstantin Belousov 	.emul_path	= "/compat/linux",
1055a8d403e1SKonstantin Belousov 	.interp_path	= "/lib/ld-linux.so.1",
1056a8d403e1SKonstantin Belousov 	.sysvec		= &elf_linux_sysvec,
1057a8d403e1SKonstantin Belousov 	.interp_newpath	= NULL,
1058a8d403e1SKonstantin Belousov 	.flags		= BI_CAN_EXEC_DYN,
1059ea0fabbcSTim J. Robbins };
1060ea0fabbcSTim J. Robbins 
1061ea0fabbcSTim J. Robbins static Elf32_Brandinfo linux_glibc2brand = {
1062a8d403e1SKonstantin Belousov 	.brand		= ELFOSABI_LINUX,
1063a8d403e1SKonstantin Belousov 	.machine	= EM_386,
1064a8d403e1SKonstantin Belousov 	.compat_3_brand	= "Linux",
1065a8d403e1SKonstantin Belousov 	.emul_path	= "/compat/linux",
1066a8d403e1SKonstantin Belousov 	.interp_path	= "/lib/ld-linux.so.2",
1067a8d403e1SKonstantin Belousov 	.sysvec		= &elf_linux_sysvec,
1068a8d403e1SKonstantin Belousov 	.interp_newpath	= NULL,
1069a8d403e1SKonstantin Belousov 	.flags		= BI_CAN_EXEC_DYN,
1070ea0fabbcSTim J. Robbins };
1071ea0fabbcSTim J. Robbins 
1072ea0fabbcSTim J. Robbins Elf32_Brandinfo *linux_brandlist[] = {
1073ea0fabbcSTim J. Robbins 	&linux_brand,
1074ea0fabbcSTim J. Robbins 	&linux_glibc2brand,
1075ea0fabbcSTim J. Robbins 	NULL
1076ea0fabbcSTim J. Robbins };
1077ea0fabbcSTim J. Robbins 
1078ea0fabbcSTim J. Robbins static int
1079ea0fabbcSTim J. Robbins linux_elf_modevent(module_t mod, int type, void *data)
1080ea0fabbcSTim J. Robbins {
1081ea0fabbcSTim J. Robbins 	Elf32_Brandinfo **brandinfo;
1082ea0fabbcSTim J. Robbins 	int error;
1083ea0fabbcSTim J. Robbins 	struct linux_ioctl_handler **lihp;
1084387196bfSDoug Ambrisko 	struct linux_device_handler **ldhp;
1085ea0fabbcSTim J. Robbins 
1086ea0fabbcSTim J. Robbins 	error = 0;
1087ea0fabbcSTim J. Robbins 
1088ea0fabbcSTim J. Robbins 	switch(type) {
1089ea0fabbcSTim J. Robbins 	case MOD_LOAD:
1090ea0fabbcSTim J. Robbins 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1091ea0fabbcSTim J. Robbins 		     ++brandinfo)
1092ea0fabbcSTim J. Robbins 			if (elf32_insert_brand_entry(*brandinfo) < 0)
1093ea0fabbcSTim J. Robbins 				error = EINVAL;
1094ea0fabbcSTim J. Robbins 		if (error == 0) {
1095ea0fabbcSTim J. Robbins 			SET_FOREACH(lihp, linux_ioctl_handler_set)
1096ea0fabbcSTim J. Robbins 				linux_ioctl_register_handler(*lihp);
1097387196bfSDoug Ambrisko 			SET_FOREACH(ldhp, linux_device_handler_set)
1098387196bfSDoug Ambrisko 				linux_device_register_handler(*ldhp);
1099357afa71SJung-uk Kim 			mtx_init(&emul_lock, "emuldata lock", NULL, MTX_DEF);
11007c09e6c0SAlexander Leidinger 			sx_init(&emul_shared_lock, "emuldata->shared lock");
11017c09e6c0SAlexander Leidinger 			LIST_INIT(&futex_list);
1102bb59e63fSAlexander Leidinger 			sx_init(&futex_sx, "futex protection lock");
1103d065e13dSDavid E. O'Brien 			linux_exit_tag = EVENTHANDLER_REGISTER(process_exit,
1104d065e13dSDavid E. O'Brien 			    linux_proc_exit, NULL, 1000);
1105d065e13dSDavid E. O'Brien 			linux_schedtail_tag = EVENTHANDLER_REGISTER(schedtail,
1106d065e13dSDavid E. O'Brien 			    linux_schedtail, NULL, 1000);
1107d065e13dSDavid E. O'Brien 			linux_exec_tag = EVENTHANDLER_REGISTER(process_exec,
1108d065e13dSDavid E. O'Brien 			    linux_proc_exec, NULL, 1000);
11094d7c2e8aSDmitry Chagin 			linux_szplatform = roundup(strlen(linux_platform) + 1,
11104d7c2e8aSDmitry Chagin 			    sizeof(char *));
1111ea0fabbcSTim J. Robbins 			if (bootverbose)
1112ea0fabbcSTim J. Robbins 				printf("Linux ELF exec handler installed\n");
1113ea0fabbcSTim J. Robbins 		} else
1114ea0fabbcSTim J. Robbins 			printf("cannot insert Linux ELF brand handler\n");
1115ea0fabbcSTim J. Robbins 		break;
1116ea0fabbcSTim J. Robbins 	case MOD_UNLOAD:
1117ea0fabbcSTim J. Robbins 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1118ea0fabbcSTim J. Robbins 		     ++brandinfo)
1119ea0fabbcSTim J. Robbins 			if (elf32_brand_inuse(*brandinfo))
1120ea0fabbcSTim J. Robbins 				error = EBUSY;
1121ea0fabbcSTim J. Robbins 		if (error == 0) {
1122ea0fabbcSTim J. Robbins 			for (brandinfo = &linux_brandlist[0];
1123ea0fabbcSTim J. Robbins 			     *brandinfo != NULL; ++brandinfo)
1124ea0fabbcSTim J. Robbins 				if (elf32_remove_brand_entry(*brandinfo) < 0)
1125ea0fabbcSTim J. Robbins 					error = EINVAL;
1126ea0fabbcSTim J. Robbins 		}
1127ea0fabbcSTim J. Robbins 		if (error == 0) {
1128ea0fabbcSTim J. Robbins 			SET_FOREACH(lihp, linux_ioctl_handler_set)
1129ea0fabbcSTim J. Robbins 				linux_ioctl_unregister_handler(*lihp);
1130387196bfSDoug Ambrisko 			SET_FOREACH(ldhp, linux_device_handler_set)
1131387196bfSDoug Ambrisko 				linux_device_unregister_handler(*ldhp);
1132357afa71SJung-uk Kim 			mtx_destroy(&emul_lock);
11337c09e6c0SAlexander Leidinger 			sx_destroy(&emul_shared_lock);
1134bb59e63fSAlexander Leidinger 			sx_destroy(&futex_sx);
11357c09e6c0SAlexander Leidinger 			EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag);
11367c09e6c0SAlexander Leidinger 			EVENTHANDLER_DEREGISTER(schedtail, linux_schedtail_tag);
11377c09e6c0SAlexander Leidinger 			EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag);
1138ea0fabbcSTim J. Robbins 			if (bootverbose)
1139ea0fabbcSTim J. Robbins 				printf("Linux ELF exec handler removed\n");
1140ea0fabbcSTim J. Robbins 		} else
1141ea0fabbcSTim J. Robbins 			printf("Could not deinstall ELF interpreter entry\n");
1142ea0fabbcSTim J. Robbins 		break;
1143ea0fabbcSTim J. Robbins 	default:
1144786e4fc4SAlexander Leidinger 		return EOPNOTSUPP;
1145ea0fabbcSTim J. Robbins 	}
1146ea0fabbcSTim J. Robbins 	return error;
1147ea0fabbcSTim J. Robbins }
1148ea0fabbcSTim J. Robbins 
1149ea0fabbcSTim J. Robbins static moduledata_t linux_elf_mod = {
1150ea0fabbcSTim J. Robbins 	"linuxelf",
1151ea0fabbcSTim J. Robbins 	linux_elf_modevent,
1152ea0fabbcSTim J. Robbins 	0
1153ea0fabbcSTim J. Robbins };
1154ea0fabbcSTim J. Robbins 
1155ea0fabbcSTim J. Robbins DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
1156