xref: /freebsd/sys/amd64/linux32/linux32_sysvec.c (revision 96a65baf76f204e48f2f94554a0b9e2792f6682b)
1 /*-
2  * Copyright (c) 2004 Tim J. Robbins
3  * Copyright (c) 2003 Peter Wemm
4  * Copyright (c) 2002 Doug Rabson
5  * Copyright (c) 1998-1999 Andrew Gallatin
6  * Copyright (c) 1994-1996 S�ren Schmidt
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer
14  *    in this position and unchanged.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. The name of the author may not be used to endorse or promote products
19  *    derived from this software without specific prior written permission
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 #include <sys/cdefs.h>
34 __FBSDID("$FreeBSD$");
35 #include "opt_compat.h"
36 
37 #ifndef COMPAT_IA32
38 #error "Unable to compile Linux-emulator due to missing COMPAT_IA32 option!"
39 #endif
40 
41 #define	__ELF_WORD_SIZE	32
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/exec.h>
46 #include <sys/fcntl.h>
47 #include <sys/imgact.h>
48 #include <sys/imgact_elf.h>
49 #include <sys/kernel.h>
50 #include <sys/lock.h>
51 #include <sys/malloc.h>
52 #include <sys/module.h>
53 #include <sys/mutex.h>
54 #include <sys/proc.h>
55 #include <sys/resourcevar.h>
56 #include <sys/signalvar.h>
57 #include <sys/sysctl.h>
58 #include <sys/syscallsubr.h>
59 #include <sys/sysent.h>
60 #include <sys/sysproto.h>
61 #include <sys/vnode.h>
62 #include <sys/eventhandler.h>
63 
64 #include <vm/vm.h>
65 #include <vm/pmap.h>
66 #include <vm/vm_extern.h>
67 #include <vm/vm_map.h>
68 #include <vm/vm_object.h>
69 #include <vm/vm_page.h>
70 #include <vm/vm_param.h>
71 
72 #include <machine/cpu.h>
73 #include <machine/md_var.h>
74 #include <machine/pcb.h>
75 #include <machine/specialreg.h>
76 
77 #include <amd64/linux32/linux.h>
78 #include <amd64/linux32/linux32_proto.h>
79 #include <compat/linux/linux_futex.h>
80 #include <compat/linux/linux_emul.h>
81 #include <compat/linux/linux_mib.h>
82 #include <compat/linux/linux_misc.h>
83 #include <compat/linux/linux_signal.h>
84 #include <compat/linux/linux_util.h>
85 
86 MODULE_VERSION(linux, 1);
87 
88 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
89 
90 #define	AUXARGS_ENTRY_32(pos, id, val)	\
91 	do {				\
92 		suword32(pos++, id);	\
93 		suword32(pos++, val);	\
94 	} while (0)
95 
96 #if BYTE_ORDER == LITTLE_ENDIAN
97 #define SHELLMAGIC      0x2123 /* #! */
98 #else
99 #define SHELLMAGIC      0x2321
100 #endif
101 
102 /*
103  * Allow the sendsig functions to use the ldebug() facility
104  * even though they are not syscalls themselves. Map them
105  * to syscall 0. This is slightly less bogus than using
106  * ldebug(sigreturn).
107  */
108 #define	LINUX_SYS_linux_rt_sendsig	0
109 #define	LINUX_SYS_linux_sendsig		0
110 
111 const char *linux_platform = "i686";
112 static int linux_szplatform;
113 extern char linux_sigcode[];
114 extern int linux_szsigcode;
115 
116 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
117 
118 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
119 SET_DECLARE(linux_device_handler_set, struct linux_device_handler);
120 
121 static int	elf_linux_fixup(register_t **stack_base,
122 		    struct image_params *iparams);
123 static register_t *linux_copyout_strings(struct image_params *imgp);
124 static void	linux_prepsyscall(struct trapframe *tf, int *args, u_int *code,
125 		    caddr_t *params);
126 static void     linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
127 static void	exec_linux_setregs(struct thread *td, u_long entry,
128 				   u_long stack, u_long ps_strings);
129 static void	linux32_fixlimit(struct rlimit *rl, int which);
130 
131 static eventhandler_tag linux_exit_tag;
132 static eventhandler_tag linux_schedtail_tag;
133 static eventhandler_tag linux_exec_tag;
134 
135 /*
136  * Linux syscalls return negative errno's, we do positive and map them
137  * Reference:
138  *   FreeBSD: src/sys/sys/errno.h
139  *   Linux:   linux-2.6.17.8/include/asm-generic/errno-base.h
140  *            linux-2.6.17.8/include/asm-generic/errno.h
141  */
142 static int bsd_to_linux_errno[ELAST + 1] = {
143 	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
144 	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
145 	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
146 	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
147 	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
148 	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
149 	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
150 	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
151 	  -6,  -6, -43, -42, -75,-125, -84, -95, -16, -74,
152 	 -72, -67, -71
153 };
154 
155 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
156 	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
157 	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
158 	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
159 	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
160 	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
161 	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
162 	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
163 	0, LINUX_SIGUSR1, LINUX_SIGUSR2
164 };
165 
166 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
167 	SIGHUP, SIGINT, SIGQUIT, SIGILL,
168 	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
169 	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
170 	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
171 	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
172 	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
173 	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
174 	SIGIO, SIGURG, SIGSYS
175 };
176 
177 #define LINUX_T_UNKNOWN  255
178 static int _bsd_to_linux_trapcode[] = {
179 	LINUX_T_UNKNOWN,	/* 0 */
180 	6,			/* 1  T_PRIVINFLT */
181 	LINUX_T_UNKNOWN,	/* 2 */
182 	3,			/* 3  T_BPTFLT */
183 	LINUX_T_UNKNOWN,	/* 4 */
184 	LINUX_T_UNKNOWN,	/* 5 */
185 	16,			/* 6  T_ARITHTRAP */
186 	254,			/* 7  T_ASTFLT */
187 	LINUX_T_UNKNOWN,	/* 8 */
188 	13,			/* 9  T_PROTFLT */
189 	1,			/* 10 T_TRCTRAP */
190 	LINUX_T_UNKNOWN,	/* 11 */
191 	14,			/* 12 T_PAGEFLT */
192 	LINUX_T_UNKNOWN,	/* 13 */
193 	17,			/* 14 T_ALIGNFLT */
194 	LINUX_T_UNKNOWN,	/* 15 */
195 	LINUX_T_UNKNOWN,	/* 16 */
196 	LINUX_T_UNKNOWN,	/* 17 */
197 	0,			/* 18 T_DIVIDE */
198 	2,			/* 19 T_NMI */
199 	4,			/* 20 T_OFLOW */
200 	5,			/* 21 T_BOUND */
201 	7,			/* 22 T_DNA */
202 	8,			/* 23 T_DOUBLEFLT */
203 	9,			/* 24 T_FPOPFLT */
204 	10,			/* 25 T_TSSFLT */
205 	11,			/* 26 T_SEGNPFLT */
206 	12,			/* 27 T_STKFLT */
207 	18,			/* 28 T_MCHK */
208 	19,			/* 29 T_XMMFLT */
209 	15			/* 30 T_RESERVED */
210 };
211 #define bsd_to_linux_trapcode(code) \
212     ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
213      _bsd_to_linux_trapcode[(code)]: \
214      LINUX_T_UNKNOWN)
215 
216 struct linux32_ps_strings {
217 	u_int32_t ps_argvstr;	/* first of 0 or more argument strings */
218 	u_int ps_nargvstr;	/* the number of argument strings */
219 	u_int32_t ps_envstr;	/* first of 0 or more environment strings */
220 	u_int ps_nenvstr;	/* the number of environment strings */
221 };
222 
223 /*
224  * If FreeBSD & Linux have a difference of opinion about what a trap
225  * means, deal with it here.
226  *
227  * MPSAFE
228  */
229 static int
230 translate_traps(int signal, int trap_code)
231 {
232 	if (signal != SIGBUS)
233 		return signal;
234 	switch (trap_code) {
235 	case T_PROTFLT:
236 	case T_TSSFLT:
237 	case T_DOUBLEFLT:
238 	case T_PAGEFLT:
239 		return SIGSEGV;
240 	default:
241 		return signal;
242 	}
243 }
244 
245 static int
246 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
247 {
248 	Elf32_Auxargs *args;
249 	Elf32_Addr *base;
250 	Elf32_Addr *pos, *uplatform;
251 	struct linux32_ps_strings *arginfo;
252 
253 	arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS;
254 	uplatform = (Elf32_Addr *)((caddr_t)arginfo - linux_szsigcode -
255 	    linux_szplatform);
256 
257 	KASSERT(curthread->td_proc == imgp->proc,
258 	    ("unsafe elf_linux_fixup(), should be curproc"));
259 	base = (Elf32_Addr *)*stack_base;
260 	args = (Elf32_Auxargs *)imgp->auxargs;
261 	pos = base + (imgp->args->argc + imgp->args->envc + 2);
262 
263 	AUXARGS_ENTRY_32(pos, LINUX_AT_HWCAP, cpu_feature);
264 
265 	/*
266 	 * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0,
267 	 * as it has appeared in the 2.4.0-rc7 first time.
268 	 * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK),
269 	 * glibc falls back to the hard-coded CLK_TCK value when aux entry
270 	 * is not present.
271 	 * Also see linux_times() implementation.
272 	 */
273 	if (linux_kernver(curthread) >= LINUX_KERNVER_2004000)
274 		AUXARGS_ENTRY_32(pos, LINUX_AT_CLKTCK, stclohz);
275 	AUXARGS_ENTRY_32(pos, AT_PHDR, args->phdr);
276 	AUXARGS_ENTRY_32(pos, AT_PHENT, args->phent);
277 	AUXARGS_ENTRY_32(pos, AT_PHNUM, args->phnum);
278 	AUXARGS_ENTRY_32(pos, AT_PAGESZ, args->pagesz);
279 	AUXARGS_ENTRY_32(pos, AT_FLAGS, args->flags);
280 	AUXARGS_ENTRY_32(pos, AT_ENTRY, args->entry);
281 	AUXARGS_ENTRY_32(pos, AT_BASE, args->base);
282 	AUXARGS_ENTRY_32(pos, LINUX_AT_SECURE, 0);
283 	AUXARGS_ENTRY_32(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
284 	AUXARGS_ENTRY_32(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
285 	AUXARGS_ENTRY_32(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
286 	AUXARGS_ENTRY_32(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
287 	AUXARGS_ENTRY_32(pos, LINUX_AT_PLATFORM, PTROUT(uplatform));
288 	if (args->execfd != -1)
289 		AUXARGS_ENTRY_32(pos, AT_EXECFD, args->execfd);
290 	AUXARGS_ENTRY_32(pos, AT_NULL, 0);
291 
292 	free(imgp->auxargs, M_TEMP);
293 	imgp->auxargs = NULL;
294 
295 	base--;
296 	suword32(base, (uint32_t)imgp->args->argc);
297 	*stack_base = (register_t *)base;
298 	return 0;
299 }
300 
301 extern unsigned long linux_sznonrtsigcode;
302 
303 static void
304 linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
305 {
306 	struct thread *td = curthread;
307 	struct proc *p = td->td_proc;
308 	struct sigacts *psp;
309 	struct trapframe *regs;
310 	struct l_rt_sigframe *fp, frame;
311 	int oonstack;
312 	int sig;
313 	int code;
314 
315 	sig = ksi->ksi_signo;
316 	code = ksi->ksi_code;
317 	PROC_LOCK_ASSERT(p, MA_OWNED);
318 	psp = p->p_sigacts;
319 	mtx_assert(&psp->ps_mtx, MA_OWNED);
320 	regs = td->td_frame;
321 	oonstack = sigonstack(regs->tf_rsp);
322 
323 #ifdef DEBUG
324 	if (ldebug(rt_sendsig))
325 		printf(ARGS(rt_sendsig, "%p, %d, %p, %u"),
326 		    catcher, sig, (void*)mask, code);
327 #endif
328 	/*
329 	 * Allocate space for the signal handler context.
330 	 */
331 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
332 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
333 		fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
334 		    td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
335 	} else
336 		fp = (struct l_rt_sigframe *)regs->tf_rsp - 1;
337 	mtx_unlock(&psp->ps_mtx);
338 
339 	/*
340 	 * Build the argument list for the signal handler.
341 	 */
342 	if (p->p_sysent->sv_sigtbl)
343 		if (sig <= p->p_sysent->sv_sigsize)
344 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
345 
346 	bzero(&frame, sizeof(frame));
347 
348 	frame.sf_handler = PTROUT(catcher);
349 	frame.sf_sig = sig;
350 	frame.sf_siginfo = PTROUT(&fp->sf_si);
351 	frame.sf_ucontext = PTROUT(&fp->sf_sc);
352 
353 	/* Fill in POSIX parts */
354 	ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig);
355 
356 	/*
357 	 * Build the signal context to be used by sigreturn.
358 	 */
359 	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
360 	frame.sf_sc.uc_link = 0;		/* XXX ??? */
361 
362 	frame.sf_sc.uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp);
363 	frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
364 	frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
365 	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
366 	PROC_UNLOCK(p);
367 
368 	bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
369 
370 	frame.sf_sc.uc_mcontext.sc_mask   = frame.sf_sc.uc_sigmask.__bits[0];
371 	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_rdi;
372 	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_rsi;
373 	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_rbp;
374 	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_rbx;
375 	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_rdx;
376 	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_rcx;
377 	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_rax;
378 	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_rip;
379 	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
380 	frame.sf_sc.uc_mcontext.sc_gs     = regs->tf_gs;
381 	frame.sf_sc.uc_mcontext.sc_fs     = regs->tf_fs;
382 	frame.sf_sc.uc_mcontext.sc_es     = regs->tf_es;
383 	frame.sf_sc.uc_mcontext.sc_ds     = regs->tf_ds;
384 	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_rflags;
385 	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_rsp;
386 	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
387 	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
388 	frame.sf_sc.uc_mcontext.sc_cr2    = (u_int32_t)(uintptr_t)ksi->ksi_addr;
389 	frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
390 
391 #ifdef DEBUG
392 	if (ldebug(rt_sendsig))
393 		printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
394 		    frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
395 		    td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
396 #endif
397 
398 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
399 		/*
400 		 * Process has trashed its stack; give it an illegal
401 		 * instruction to halt it in its tracks.
402 		 */
403 #ifdef DEBUG
404 		if (ldebug(rt_sendsig))
405 			printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
406 			    fp, oonstack);
407 #endif
408 		PROC_LOCK(p);
409 		sigexit(td, SIGILL);
410 	}
411 
412 	/*
413 	 * Build context to run handler in.
414 	 */
415 	regs->tf_rsp = PTROUT(fp);
416 	regs->tf_rip = LINUX32_PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
417 	    linux_sznonrtsigcode;
418 	regs->tf_rflags &= ~(PSL_T | PSL_D);
419 	regs->tf_cs = _ucode32sel;
420 	regs->tf_ss = _udatasel;
421 	regs->tf_ds = _udatasel;
422 	regs->tf_es = _udatasel;
423 	regs->tf_fs = _ufssel;
424 	regs->tf_gs = _ugssel;
425 	regs->tf_flags = TF_HASSEGS;
426 	td->td_pcb->pcb_full_iret = 1;
427 	PROC_LOCK(p);
428 	mtx_lock(&psp->ps_mtx);
429 }
430 
431 
432 /*
433  * Send an interrupt to process.
434  *
435  * Stack is set up to allow sigcode stored
436  * in u. to call routine, followed by kcall
437  * to sigreturn routine below.  After sigreturn
438  * resets the signal mask, the stack, and the
439  * frame pointer, it returns to the user
440  * specified pc, psl.
441  */
442 static void
443 linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
444 {
445 	struct thread *td = curthread;
446 	struct proc *p = td->td_proc;
447 	struct sigacts *psp;
448 	struct trapframe *regs;
449 	struct l_sigframe *fp, frame;
450 	l_sigset_t lmask;
451 	int oonstack, i;
452 	int sig, code;
453 
454 	sig = ksi->ksi_signo;
455 	code = ksi->ksi_code;
456 	PROC_LOCK_ASSERT(p, MA_OWNED);
457 	psp = p->p_sigacts;
458 	mtx_assert(&psp->ps_mtx, MA_OWNED);
459 	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
460 		/* Signal handler installed with SA_SIGINFO. */
461 		linux_rt_sendsig(catcher, ksi, mask);
462 		return;
463 	}
464 
465 	regs = td->td_frame;
466 	oonstack = sigonstack(regs->tf_rsp);
467 
468 #ifdef DEBUG
469 	if (ldebug(sendsig))
470 		printf(ARGS(sendsig, "%p, %d, %p, %u"),
471 		    catcher, sig, (void*)mask, code);
472 #endif
473 
474 	/*
475 	 * Allocate space for the signal handler context.
476 	 */
477 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
478 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
479 		fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
480 		    td->td_sigstk.ss_size - sizeof(struct l_sigframe));
481 	} else
482 		fp = (struct l_sigframe *)regs->tf_rsp - 1;
483 	mtx_unlock(&psp->ps_mtx);
484 	PROC_UNLOCK(p);
485 
486 	/*
487 	 * Build the argument list for the signal handler.
488 	 */
489 	if (p->p_sysent->sv_sigtbl)
490 		if (sig <= p->p_sysent->sv_sigsize)
491 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
492 
493 	bzero(&frame, sizeof(frame));
494 
495 	frame.sf_handler = PTROUT(catcher);
496 	frame.sf_sig = sig;
497 
498 	bsd_to_linux_sigset(mask, &lmask);
499 
500 	/*
501 	 * Build the signal context to be used by sigreturn.
502 	 */
503 	frame.sf_sc.sc_mask   = lmask.__bits[0];
504 	frame.sf_sc.sc_gs     = regs->tf_gs;
505 	frame.sf_sc.sc_fs     = regs->tf_fs;
506 	frame.sf_sc.sc_es     = regs->tf_es;
507 	frame.sf_sc.sc_ds     = regs->tf_ds;
508 	frame.sf_sc.sc_edi    = regs->tf_rdi;
509 	frame.sf_sc.sc_esi    = regs->tf_rsi;
510 	frame.sf_sc.sc_ebp    = regs->tf_rbp;
511 	frame.sf_sc.sc_ebx    = regs->tf_rbx;
512 	frame.sf_sc.sc_edx    = regs->tf_rdx;
513 	frame.sf_sc.sc_ecx    = regs->tf_rcx;
514 	frame.sf_sc.sc_eax    = regs->tf_rax;
515 	frame.sf_sc.sc_eip    = regs->tf_rip;
516 	frame.sf_sc.sc_cs     = regs->tf_cs;
517 	frame.sf_sc.sc_eflags = regs->tf_rflags;
518 	frame.sf_sc.sc_esp_at_signal = regs->tf_rsp;
519 	frame.sf_sc.sc_ss     = regs->tf_ss;
520 	frame.sf_sc.sc_err    = regs->tf_err;
521 	frame.sf_sc.sc_cr2    = (u_int32_t)(uintptr_t)ksi->ksi_addr;
522 	frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code);
523 
524 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
525 		frame.sf_extramask[i] = lmask.__bits[i+1];
526 
527 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
528 		/*
529 		 * Process has trashed its stack; give it an illegal
530 		 * instruction to halt it in its tracks.
531 		 */
532 		PROC_LOCK(p);
533 		sigexit(td, SIGILL);
534 	}
535 
536 	/*
537 	 * Build context to run handler in.
538 	 */
539 	regs->tf_rsp = PTROUT(fp);
540 	regs->tf_rip = LINUX32_PS_STRINGS - *(p->p_sysent->sv_szsigcode);
541 	regs->tf_rflags &= ~(PSL_T | PSL_D);
542 	regs->tf_cs = _ucode32sel;
543 	regs->tf_ss = _udatasel;
544 	regs->tf_ds = _udatasel;
545 	regs->tf_es = _udatasel;
546 	regs->tf_fs = _ufssel;
547 	regs->tf_gs = _ugssel;
548 	regs->tf_flags = TF_HASSEGS;
549 	td->td_pcb->pcb_full_iret = 1;
550 	PROC_LOCK(p);
551 	mtx_lock(&psp->ps_mtx);
552 }
553 
554 /*
555  * System call to cleanup state after a signal
556  * has been taken.  Reset signal mask and
557  * stack state from context left by sendsig (above).
558  * Return to previous pc and psl as specified by
559  * context left by sendsig. Check carefully to
560  * make sure that the user has not modified the
561  * psl to gain improper privileges or to cause
562  * a machine fault.
563  */
564 int
565 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
566 {
567 	struct proc *p = td->td_proc;
568 	struct l_sigframe frame;
569 	struct trapframe *regs;
570 	l_sigset_t lmask;
571 	int eflags, i;
572 	ksiginfo_t ksi;
573 
574 	regs = td->td_frame;
575 
576 #ifdef DEBUG
577 	if (ldebug(sigreturn))
578 		printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
579 #endif
580 	/*
581 	 * The trampoline code hands us the sigframe.
582 	 * It is unsafe to keep track of it ourselves, in the event that a
583 	 * program jumps out of a signal handler.
584 	 */
585 	if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
586 		return (EFAULT);
587 
588 	/*
589 	 * Check for security violations.
590 	 */
591 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
592 	eflags = frame.sf_sc.sc_eflags;
593 	/*
594 	 * XXX do allow users to change the privileged flag PSL_RF.  The
595 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
596 	 * sometimes set it there too.  tf_eflags is kept in the signal
597 	 * context during signal handling and there is no other place
598 	 * to remember it, so the PSL_RF bit may be corrupted by the
599 	 * signal handler without us knowing.  Corruption of the PSL_RF
600 	 * bit at worst causes one more or one less debugger trap, so
601 	 * allowing it is fairly harmless.
602 	 */
603 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF))
604 		return(EINVAL);
605 
606 	/*
607 	 * Don't allow users to load a valid privileged %cs.  Let the
608 	 * hardware check for invalid selectors, excess privilege in
609 	 * other selectors, invalid %eip's and invalid %esp's.
610 	 */
611 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
612 	if (!CS_SECURE(frame.sf_sc.sc_cs)) {
613 		ksiginfo_init_trap(&ksi);
614 		ksi.ksi_signo = SIGBUS;
615 		ksi.ksi_code = BUS_OBJERR;
616 		ksi.ksi_trapno = T_PROTFLT;
617 		ksi.ksi_addr = (void *)regs->tf_rip;
618 		trapsignal(td, &ksi);
619 		return(EINVAL);
620 	}
621 
622 	lmask.__bits[0] = frame.sf_sc.sc_mask;
623 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
624 		lmask.__bits[i+1] = frame.sf_extramask[i];
625 	PROC_LOCK(p);
626 	linux_to_bsd_sigset(&lmask, &td->td_sigmask);
627 	SIG_CANTMASK(td->td_sigmask);
628 	signotify(td);
629 	PROC_UNLOCK(p);
630 
631 	/*
632 	 * Restore signal context.
633 	 */
634 	regs->tf_rdi    = frame.sf_sc.sc_edi;
635 	regs->tf_rsi    = frame.sf_sc.sc_esi;
636 	regs->tf_rbp    = frame.sf_sc.sc_ebp;
637 	regs->tf_rbx    = frame.sf_sc.sc_ebx;
638 	regs->tf_rdx    = frame.sf_sc.sc_edx;
639 	regs->tf_rcx    = frame.sf_sc.sc_ecx;
640 	regs->tf_rax    = frame.sf_sc.sc_eax;
641 	regs->tf_rip    = frame.sf_sc.sc_eip;
642 	regs->tf_cs     = frame.sf_sc.sc_cs;
643 	regs->tf_ds     = frame.sf_sc.sc_ds;
644 	regs->tf_es     = frame.sf_sc.sc_es;
645 	regs->tf_fs     = frame.sf_sc.sc_fs;
646 	regs->tf_gs     = frame.sf_sc.sc_gs;
647 	regs->tf_rflags = eflags;
648 	regs->tf_rsp    = frame.sf_sc.sc_esp_at_signal;
649 	regs->tf_ss     = frame.sf_sc.sc_ss;
650 	td->td_pcb->pcb_full_iret = 1;
651 
652 	return (EJUSTRETURN);
653 }
654 
655 /*
656  * System call to cleanup state after a signal
657  * has been taken.  Reset signal mask and
658  * stack state from context left by rt_sendsig (above).
659  * Return to previous pc and psl as specified by
660  * context left by sendsig. Check carefully to
661  * make sure that the user has not modified the
662  * psl to gain improper privileges or to cause
663  * a machine fault.
664  */
665 int
666 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
667 {
668 	struct proc *p = td->td_proc;
669 	struct l_ucontext uc;
670 	struct l_sigcontext *context;
671 	l_stack_t *lss;
672 	stack_t ss;
673 	struct trapframe *regs;
674 	int eflags;
675 	ksiginfo_t ksi;
676 
677 	regs = td->td_frame;
678 
679 #ifdef DEBUG
680 	if (ldebug(rt_sigreturn))
681 		printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
682 #endif
683 	/*
684 	 * The trampoline code hands us the ucontext.
685 	 * It is unsafe to keep track of it ourselves, in the event that a
686 	 * program jumps out of a signal handler.
687 	 */
688 	if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
689 		return (EFAULT);
690 
691 	context = &uc.uc_mcontext;
692 
693 	/*
694 	 * Check for security violations.
695 	 */
696 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
697 	eflags = context->sc_eflags;
698 	/*
699 	 * XXX do allow users to change the privileged flag PSL_RF.  The
700 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
701 	 * sometimes set it there too.  tf_eflags is kept in the signal
702 	 * context during signal handling and there is no other place
703 	 * to remember it, so the PSL_RF bit may be corrupted by the
704 	 * signal handler without us knowing.  Corruption of the PSL_RF
705 	 * bit at worst causes one more or one less debugger trap, so
706 	 * allowing it is fairly harmless.
707 	 */
708 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF))
709 		return(EINVAL);
710 
711 	/*
712 	 * Don't allow users to load a valid privileged %cs.  Let the
713 	 * hardware check for invalid selectors, excess privilege in
714 	 * other selectors, invalid %eip's and invalid %esp's.
715 	 */
716 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
717 	if (!CS_SECURE(context->sc_cs)) {
718 		ksiginfo_init_trap(&ksi);
719 		ksi.ksi_signo = SIGBUS;
720 		ksi.ksi_code = BUS_OBJERR;
721 		ksi.ksi_trapno = T_PROTFLT;
722 		ksi.ksi_addr = (void *)regs->tf_rip;
723 		trapsignal(td, &ksi);
724 		return(EINVAL);
725 	}
726 
727 	PROC_LOCK(p);
728 	linux_to_bsd_sigset(&uc.uc_sigmask, &td->td_sigmask);
729 	SIG_CANTMASK(td->td_sigmask);
730 	signotify(td);
731 	PROC_UNLOCK(p);
732 
733 	/*
734 	 * Restore signal context
735 	 */
736 	regs->tf_gs	= context->sc_gs;
737 	regs->tf_fs	= context->sc_fs;
738 	regs->tf_es	= context->sc_es;
739 	regs->tf_ds	= context->sc_ds;
740 	regs->tf_rdi    = context->sc_edi;
741 	regs->tf_rsi    = context->sc_esi;
742 	regs->tf_rbp    = context->sc_ebp;
743 	regs->tf_rbx    = context->sc_ebx;
744 	regs->tf_rdx    = context->sc_edx;
745 	regs->tf_rcx    = context->sc_ecx;
746 	regs->tf_rax    = context->sc_eax;
747 	regs->tf_rip    = context->sc_eip;
748 	regs->tf_cs     = context->sc_cs;
749 	regs->tf_rflags = eflags;
750 	regs->tf_rsp    = context->sc_esp_at_signal;
751 	regs->tf_ss     = context->sc_ss;
752 	td->td_pcb->pcb_full_iret = 1;
753 
754 	/*
755 	 * call sigaltstack & ignore results..
756 	 */
757 	lss = &uc.uc_stack;
758 	ss.ss_sp = PTRIN(lss->ss_sp);
759 	ss.ss_size = lss->ss_size;
760 	ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
761 
762 #ifdef DEBUG
763 	if (ldebug(rt_sigreturn))
764 		printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
765 		    ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
766 #endif
767 	(void)kern_sigaltstack(td, &ss, NULL);
768 
769 	return (EJUSTRETURN);
770 }
771 
772 /*
773  * MPSAFE
774  */
775 static void
776 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
777 {
778 	args[0] = tf->tf_rbx;
779 	args[1] = tf->tf_rcx;
780 	args[2] = tf->tf_rdx;
781 	args[3] = tf->tf_rsi;
782 	args[4] = tf->tf_rdi;
783 	args[5] = tf->tf_rbp;	/* Unconfirmed */
784 	*params = NULL;		/* no copyin */
785 }
786 
787 /*
788  * If a linux binary is exec'ing something, try this image activator
789  * first.  We override standard shell script execution in order to
790  * be able to modify the interpreter path.  We only do this if a linux
791  * binary is doing the exec, so we do not create an EXEC module for it.
792  */
793 static int	exec_linux_imgact_try(struct image_params *iparams);
794 
795 static int
796 exec_linux_imgact_try(struct image_params *imgp)
797 {
798 	const char *head = (const char *)imgp->image_header;
799 	char *rpath;
800 	int error = -1, len;
801 
802 	/*
803 	* The interpreter for shell scripts run from a linux binary needs
804 	* to be located in /compat/linux if possible in order to recursively
805 	* maintain linux path emulation.
806 	*/
807 	if (((const short *)head)[0] == SHELLMAGIC) {
808 		/*
809 		* Run our normal shell image activator.  If it succeeds attempt
810 		* to use the alternate path for the interpreter.  If an
811 		* alternate * path is found, use our stringspace to store it.
812 		*/
813 		if ((error = exec_shell_imgact(imgp)) == 0) {
814 			linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
815 			    imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0,
816 			    AT_FDCWD);
817 			if (rpath != NULL) {
818 				len = strlen(rpath) + 1;
819 
820 				if (len <= MAXSHELLCMDLEN) {
821 					memcpy(imgp->interpreter_name, rpath,
822 					    len);
823 				}
824 				free(rpath, M_TEMP);
825 			}
826 		}
827 	}
828 	return(error);
829 }
830 
831 /*
832  * Clear registers on exec
833  * XXX copied from ia32_signal.c.
834  */
835 static void
836 exec_linux_setregs(td, entry, stack, ps_strings)
837 	struct thread *td;
838 	u_long entry;
839 	u_long stack;
840 	u_long ps_strings;
841 {
842 	struct trapframe *regs = td->td_frame;
843 	struct pcb *pcb = td->td_pcb;
844 
845 	mtx_lock(&dt_lock);
846 	if (td->td_proc->p_md.md_ldt != NULL)
847 		user_ldt_free(td);
848 	else
849 		mtx_unlock(&dt_lock);
850 
851 	critical_enter();
852 	wrmsr(MSR_FSBASE, 0);
853 	wrmsr(MSR_KGSBASE, 0);	/* User value while we're in the kernel */
854 	pcb->pcb_fsbase = 0;
855 	pcb->pcb_gsbase = 0;
856 	critical_exit();
857 	pcb->pcb_initial_fpucw = __LINUX_NPXCW__;
858 
859 	bzero((char *)regs, sizeof(struct trapframe));
860 	regs->tf_rip = entry;
861 	regs->tf_rsp = stack;
862 	regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T);
863 	regs->tf_gs = _ugssel;
864 	regs->tf_fs = _ufssel;
865 	regs->tf_es = _udatasel;
866 	regs->tf_ds = _udatasel;
867 	regs->tf_ss = _udatasel;
868 	regs->tf_flags = TF_HASSEGS;
869 	regs->tf_cs = _ucode32sel;
870 	regs->tf_rbx = ps_strings;
871 	td->td_pcb->pcb_full_iret = 1;
872 	load_cr0(rcr0() | CR0_MP | CR0_TS);
873 	fpstate_drop(td);
874 
875 	/* Return via doreti so that we can change to a different %cs */
876 	pcb->pcb_flags |= PCB_FULLCTX | PCB_32BIT;
877 	pcb->pcb_flags &= ~PCB_GS32BIT;
878 	td->td_retval[1] = 0;
879 }
880 
881 /*
882  * XXX copied from ia32_sysvec.c.
883  */
884 static register_t *
885 linux_copyout_strings(struct image_params *imgp)
886 {
887 	int argc, envc;
888 	u_int32_t *vectp;
889 	char *stringp, *destp;
890 	u_int32_t *stack_base;
891 	struct linux32_ps_strings *arginfo;
892 
893 	/*
894 	 * Calculate string base and vector table pointers.
895 	 * Also deal with signal trampoline code for this exec type.
896 	 */
897 	arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS;
898 	destp =	(caddr_t)arginfo - linux_szsigcode - SPARE_USRSPACE -
899 	    linux_szplatform - roundup((ARG_MAX - imgp->args->stringspace),
900 	    sizeof(char *));
901 
902 	/*
903 	 * install sigcode
904 	 */
905 	copyout(imgp->proc->p_sysent->sv_sigcode,
906 	    ((caddr_t)arginfo - linux_szsigcode), linux_szsigcode);
907 
908 	/*
909 	 * Install LINUX_PLATFORM
910 	 */
911 	copyout(linux_platform, ((caddr_t)arginfo - linux_szsigcode -
912 	    linux_szplatform), linux_szplatform);
913 
914 	/*
915 	 * If we have a valid auxargs ptr, prepare some room
916 	 * on the stack.
917 	 */
918 	if (imgp->auxargs) {
919 		/*
920 		 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
921 		 * lower compatibility.
922 		 */
923 		imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size :
924 		    (LINUX_AT_COUNT * 2);
925 		/*
926 		 * The '+ 2' is for the null pointers at the end of each of
927 		 * the arg and env vector sets,and imgp->auxarg_size is room
928 		 * for argument of Runtime loader.
929 		 */
930 		vectp = (u_int32_t *) (destp - (imgp->args->argc +
931 		    imgp->args->envc + 2 + imgp->auxarg_size) *
932 		    sizeof(u_int32_t));
933 
934 	} else
935 		/*
936 		 * The '+ 2' is for the null pointers at the end of each of
937 		 * the arg and env vector sets
938 		 */
939 		vectp = (u_int32_t *)(destp - (imgp->args->argc +
940 		    imgp->args->envc + 2) * sizeof(u_int32_t));
941 
942 	/*
943 	 * vectp also becomes our initial stack base
944 	 */
945 	stack_base = vectp;
946 
947 	stringp = imgp->args->begin_argv;
948 	argc = imgp->args->argc;
949 	envc = imgp->args->envc;
950 	/*
951 	 * Copy out strings - arguments and environment.
952 	 */
953 	copyout(stringp, destp, ARG_MAX - imgp->args->stringspace);
954 
955 	/*
956 	 * Fill in "ps_strings" struct for ps, w, etc.
957 	 */
958 	suword32(&arginfo->ps_argvstr, (uint32_t)(intptr_t)vectp);
959 	suword32(&arginfo->ps_nargvstr, argc);
960 
961 	/*
962 	 * Fill in argument portion of vector table.
963 	 */
964 	for (; argc > 0; --argc) {
965 		suword32(vectp++, (uint32_t)(intptr_t)destp);
966 		while (*stringp++ != 0)
967 			destp++;
968 		destp++;
969 	}
970 
971 	/* a null vector table pointer separates the argp's from the envp's */
972 	suword32(vectp++, 0);
973 
974 	suword32(&arginfo->ps_envstr, (uint32_t)(intptr_t)vectp);
975 	suword32(&arginfo->ps_nenvstr, envc);
976 
977 	/*
978 	 * Fill in environment portion of vector table.
979 	 */
980 	for (; envc > 0; --envc) {
981 		suword32(vectp++, (uint32_t)(intptr_t)destp);
982 		while (*stringp++ != 0)
983 			destp++;
984 		destp++;
985 	}
986 
987 	/* end of vector table is a null pointer */
988 	suword32(vectp, 0);
989 
990 	return ((register_t *)stack_base);
991 }
992 
993 SYSCTL_NODE(_compat, OID_AUTO, linux32, CTLFLAG_RW, 0,
994     "32-bit Linux emulation");
995 
996 static u_long	linux32_maxdsiz = LINUX32_MAXDSIZ;
997 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxdsiz, CTLFLAG_RW,
998     &linux32_maxdsiz, 0, "");
999 static u_long	linux32_maxssiz = LINUX32_MAXSSIZ;
1000 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxssiz, CTLFLAG_RW,
1001     &linux32_maxssiz, 0, "");
1002 static u_long	linux32_maxvmem = LINUX32_MAXVMEM;
1003 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxvmem, CTLFLAG_RW,
1004     &linux32_maxvmem, 0, "");
1005 
1006 static void
1007 linux32_fixlimit(struct rlimit *rl, int which)
1008 {
1009 
1010 	switch (which) {
1011 	case RLIMIT_DATA:
1012 		if (linux32_maxdsiz != 0) {
1013 			if (rl->rlim_cur > linux32_maxdsiz)
1014 				rl->rlim_cur = linux32_maxdsiz;
1015 			if (rl->rlim_max > linux32_maxdsiz)
1016 				rl->rlim_max = linux32_maxdsiz;
1017 		}
1018 		break;
1019 	case RLIMIT_STACK:
1020 		if (linux32_maxssiz != 0) {
1021 			if (rl->rlim_cur > linux32_maxssiz)
1022 				rl->rlim_cur = linux32_maxssiz;
1023 			if (rl->rlim_max > linux32_maxssiz)
1024 				rl->rlim_max = linux32_maxssiz;
1025 		}
1026 		break;
1027 	case RLIMIT_VMEM:
1028 		if (linux32_maxvmem != 0) {
1029 			if (rl->rlim_cur > linux32_maxvmem)
1030 				rl->rlim_cur = linux32_maxvmem;
1031 			if (rl->rlim_max > linux32_maxvmem)
1032 				rl->rlim_max = linux32_maxvmem;
1033 		}
1034 		break;
1035 	}
1036 }
1037 
1038 struct sysentvec elf_linux_sysvec = {
1039 	.sv_size	= LINUX_SYS_MAXSYSCALL,
1040 	.sv_table	= linux_sysent,
1041 	.sv_mask	= 0,
1042 	.sv_sigsize	= LINUX_SIGTBLSZ,
1043 	.sv_sigtbl	= bsd_to_linux_signal,
1044 	.sv_errsize	= ELAST + 1,
1045 	.sv_errtbl	= bsd_to_linux_errno,
1046 	.sv_transtrap	= translate_traps,
1047 	.sv_fixup	= elf_linux_fixup,
1048 	.sv_sendsig	= linux_sendsig,
1049 	.sv_sigcode	= linux_sigcode,
1050 	.sv_szsigcode	= &linux_szsigcode,
1051 	.sv_prepsyscall	= linux_prepsyscall,
1052 	.sv_name	= "Linux ELF32",
1053 	.sv_coredump	= elf32_coredump,
1054 	.sv_imgact_try	= exec_linux_imgact_try,
1055 	.sv_minsigstksz	= LINUX_MINSIGSTKSZ,
1056 	.sv_pagesize	= PAGE_SIZE,
1057 	.sv_minuser	= VM_MIN_ADDRESS,
1058 	.sv_maxuser	= LINUX32_USRSTACK,
1059 	.sv_usrstack	= LINUX32_USRSTACK,
1060 	.sv_psstrings	= LINUX32_PS_STRINGS,
1061 	.sv_stackprot	= VM_PROT_ALL,
1062 	.sv_copyout_strings = linux_copyout_strings,
1063 	.sv_setregs	= exec_linux_setregs,
1064 	.sv_fixlimit	= linux32_fixlimit,
1065 	.sv_maxssiz	= &linux32_maxssiz,
1066 	.sv_flags	= SV_ABI_LINUX | SV_ILP32 | SV_IA32
1067 };
1068 
1069 static char GNULINUX_ABI_VENDOR[] = "GNU";
1070 
1071 static Elf_Brandnote linux32_brandnote = {
1072 	.hdr.n_namesz	= sizeof(GNULINUX_ABI_VENDOR),
1073 	.hdr.n_descsz	= 16,
1074 	.hdr.n_type	= 1,
1075 	.vendor		= GNULINUX_ABI_VENDOR,
1076 	.flags		= 0
1077 };
1078 
1079 static Elf32_Brandinfo linux_brand = {
1080 	.brand		= ELFOSABI_LINUX,
1081 	.machine	= EM_386,
1082 	.compat_3_brand	= "Linux",
1083 	.emul_path	= "/compat/linux",
1084 	.interp_path	= "/lib/ld-linux.so.1",
1085 	.sysvec		= &elf_linux_sysvec,
1086 	.interp_newpath	= NULL,
1087 	.brand_note	= &linux32_brandnote,
1088 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1089 };
1090 
1091 static Elf32_Brandinfo linux_glibc2brand = {
1092 	.brand		= ELFOSABI_LINUX,
1093 	.machine	= EM_386,
1094 	.compat_3_brand	= "Linux",
1095 	.emul_path	= "/compat/linux",
1096 	.interp_path	= "/lib/ld-linux.so.2",
1097 	.sysvec		= &elf_linux_sysvec,
1098 	.interp_newpath	= NULL,
1099 	.brand_note	= &linux32_brandnote,
1100 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1101 };
1102 
1103 Elf32_Brandinfo *linux_brandlist[] = {
1104 	&linux_brand,
1105 	&linux_glibc2brand,
1106 	NULL
1107 };
1108 
1109 static int
1110 linux_elf_modevent(module_t mod, int type, void *data)
1111 {
1112 	Elf32_Brandinfo **brandinfo;
1113 	int error;
1114 	struct linux_ioctl_handler **lihp;
1115 	struct linux_device_handler **ldhp;
1116 
1117 	error = 0;
1118 
1119 	switch(type) {
1120 	case MOD_LOAD:
1121 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1122 		     ++brandinfo)
1123 			if (elf32_insert_brand_entry(*brandinfo) < 0)
1124 				error = EINVAL;
1125 		if (error == 0) {
1126 			SET_FOREACH(lihp, linux_ioctl_handler_set)
1127 				linux_ioctl_register_handler(*lihp);
1128 			SET_FOREACH(ldhp, linux_device_handler_set)
1129 				linux_device_register_handler(*ldhp);
1130 			mtx_init(&emul_lock, "emuldata lock", NULL, MTX_DEF);
1131 			sx_init(&emul_shared_lock, "emuldata->shared lock");
1132 			LIST_INIT(&futex_list);
1133 			mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF);
1134 			linux_exit_tag = EVENTHANDLER_REGISTER(process_exit,
1135 			    linux_proc_exit, NULL, 1000);
1136 			linux_schedtail_tag = EVENTHANDLER_REGISTER(schedtail,
1137 			    linux_schedtail, NULL, 1000);
1138 			linux_exec_tag = EVENTHANDLER_REGISTER(process_exec,
1139 			    linux_proc_exec, NULL, 1000);
1140 			linux_szplatform = roundup(strlen(linux_platform) + 1,
1141 			    sizeof(char *));
1142 			linux_osd_jail_register();
1143 			stclohz = (stathz ? stathz : hz);
1144 			if (bootverbose)
1145 				printf("Linux ELF exec handler installed\n");
1146 		} else
1147 			printf("cannot insert Linux ELF brand handler\n");
1148 		break;
1149 	case MOD_UNLOAD:
1150 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1151 		     ++brandinfo)
1152 			if (elf32_brand_inuse(*brandinfo))
1153 				error = EBUSY;
1154 		if (error == 0) {
1155 			for (brandinfo = &linux_brandlist[0];
1156 			     *brandinfo != NULL; ++brandinfo)
1157 				if (elf32_remove_brand_entry(*brandinfo) < 0)
1158 					error = EINVAL;
1159 		}
1160 		if (error == 0) {
1161 			SET_FOREACH(lihp, linux_ioctl_handler_set)
1162 				linux_ioctl_unregister_handler(*lihp);
1163 			SET_FOREACH(ldhp, linux_device_handler_set)
1164 				linux_device_unregister_handler(*ldhp);
1165 			mtx_destroy(&emul_lock);
1166 			sx_destroy(&emul_shared_lock);
1167 			mtx_destroy(&futex_mtx);
1168 			EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag);
1169 			EVENTHANDLER_DEREGISTER(schedtail, linux_schedtail_tag);
1170 			EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag);
1171 			linux_osd_jail_deregister();
1172 			if (bootverbose)
1173 				printf("Linux ELF exec handler removed\n");
1174 		} else
1175 			printf("Could not deinstall ELF interpreter entry\n");
1176 		break;
1177 	default:
1178 		return EOPNOTSUPP;
1179 	}
1180 	return error;
1181 }
1182 
1183 static moduledata_t linux_elf_mod = {
1184 	"linuxelf",
1185 	linux_elf_modevent,
1186 	0
1187 };
1188 
1189 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
1190