xref: /freebsd/sys/amd64/linux32/linux32_sysvec.c (revision 3de4046939a04576ede9d97f48f6a02d1a2ccc8c)
1 /*-
2  * Copyright (c) 2004 Tim J. Robbins
3  * Copyright (c) 2003 Peter Wemm
4  * Copyright (c) 2002 Doug Rabson
5  * Copyright (c) 1998-1999 Andrew Gallatin
6  * Copyright (c) 1994-1996 S�ren Schmidt
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer
14  *    in this position and unchanged.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. The name of the author may not be used to endorse or promote products
19  *    derived from this software without specific prior written permission
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 #include <sys/cdefs.h>
34 __FBSDID("$FreeBSD$");
35 #include "opt_compat.h"
36 
37 #ifndef COMPAT_IA32
38 #error "Unable to compile Linux-emulator due to missing COMPAT_IA32 option!"
39 #endif
40 
41 #define	__ELF_WORD_SIZE	32
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/exec.h>
46 #include <sys/fcntl.h>
47 #include <sys/imgact.h>
48 #include <sys/imgact_elf.h>
49 #include <sys/kernel.h>
50 #include <sys/lock.h>
51 #include <sys/malloc.h>
52 #include <sys/module.h>
53 #include <sys/mutex.h>
54 #include <sys/proc.h>
55 #include <sys/resourcevar.h>
56 #include <sys/signalvar.h>
57 #include <sys/sysctl.h>
58 #include <sys/syscallsubr.h>
59 #include <sys/sysent.h>
60 #include <sys/sysproto.h>
61 #include <sys/vnode.h>
62 #include <sys/eventhandler.h>
63 
64 #include <vm/vm.h>
65 #include <vm/pmap.h>
66 #include <vm/vm_extern.h>
67 #include <vm/vm_map.h>
68 #include <vm/vm_object.h>
69 #include <vm/vm_page.h>
70 #include <vm/vm_param.h>
71 
72 #include <machine/cpu.h>
73 #include <machine/md_var.h>
74 #include <machine/pcb.h>
75 #include <machine/specialreg.h>
76 
77 #include <amd64/linux32/linux.h>
78 #include <amd64/linux32/linux32_proto.h>
79 #include <compat/linux/linux_futex.h>
80 #include <compat/linux/linux_emul.h>
81 #include <compat/linux/linux_mib.h>
82 #include <compat/linux/linux_misc.h>
83 #include <compat/linux/linux_signal.h>
84 #include <compat/linux/linux_util.h>
85 
86 MODULE_VERSION(linux, 1);
87 
88 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
89 
90 #define	AUXARGS_ENTRY_32(pos, id, val)	\
91 	do {				\
92 		suword32(pos++, id);	\
93 		suword32(pos++, val);	\
94 	} while (0)
95 
96 #if BYTE_ORDER == LITTLE_ENDIAN
97 #define SHELLMAGIC      0x2123 /* #! */
98 #else
99 #define SHELLMAGIC      0x2321
100 #endif
101 
102 /*
103  * Allow the sendsig functions to use the ldebug() facility
104  * even though they are not syscalls themselves. Map them
105  * to syscall 0. This is slightly less bogus than using
106  * ldebug(sigreturn).
107  */
108 #define	LINUX_SYS_linux_rt_sendsig	0
109 #define	LINUX_SYS_linux_sendsig		0
110 
111 const char *linux_platform = "i686";
112 static int linux_szplatform;
113 extern char linux_sigcode[];
114 extern int linux_szsigcode;
115 
116 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
117 
118 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
119 SET_DECLARE(linux_device_handler_set, struct linux_device_handler);
120 
121 static int	elf_linux_fixup(register_t **stack_base,
122 		    struct image_params *iparams);
123 static register_t *linux_copyout_strings(struct image_params *imgp);
124 static void	linux_prepsyscall(struct trapframe *tf, int *args, u_int *code,
125 		    caddr_t *params);
126 static void     linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
127 static void	exec_linux_setregs(struct thread *td, u_long entry,
128 				   u_long stack, u_long ps_strings);
129 static void	linux32_fixlimit(struct rlimit *rl, int which);
130 
131 static eventhandler_tag linux_exit_tag;
132 static eventhandler_tag linux_schedtail_tag;
133 static eventhandler_tag linux_exec_tag;
134 
135 /*
136  * Linux syscalls return negative errno's, we do positive and map them
137  * Reference:
138  *   FreeBSD: src/sys/sys/errno.h
139  *   Linux:   linux-2.6.17.8/include/asm-generic/errno-base.h
140  *            linux-2.6.17.8/include/asm-generic/errno.h
141  */
142 static int bsd_to_linux_errno[ELAST + 1] = {
143 	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
144 	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
145 	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
146 	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
147 	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
148 	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
149 	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
150 	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
151 	  -6,  -6, -43, -42, -75,-125, -84, -95, -16, -74,
152 	 -72, -67, -71
153 };
154 
155 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
156 	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
157 	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
158 	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
159 	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
160 	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
161 	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
162 	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
163 	0, LINUX_SIGUSR1, LINUX_SIGUSR2
164 };
165 
166 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
167 	SIGHUP, SIGINT, SIGQUIT, SIGILL,
168 	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
169 	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
170 	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
171 	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
172 	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
173 	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
174 	SIGIO, SIGURG, SIGSYS
175 };
176 
177 #define LINUX_T_UNKNOWN  255
178 static int _bsd_to_linux_trapcode[] = {
179 	LINUX_T_UNKNOWN,	/* 0 */
180 	6,			/* 1  T_PRIVINFLT */
181 	LINUX_T_UNKNOWN,	/* 2 */
182 	3,			/* 3  T_BPTFLT */
183 	LINUX_T_UNKNOWN,	/* 4 */
184 	LINUX_T_UNKNOWN,	/* 5 */
185 	16,			/* 6  T_ARITHTRAP */
186 	254,			/* 7  T_ASTFLT */
187 	LINUX_T_UNKNOWN,	/* 8 */
188 	13,			/* 9  T_PROTFLT */
189 	1,			/* 10 T_TRCTRAP */
190 	LINUX_T_UNKNOWN,	/* 11 */
191 	14,			/* 12 T_PAGEFLT */
192 	LINUX_T_UNKNOWN,	/* 13 */
193 	17,			/* 14 T_ALIGNFLT */
194 	LINUX_T_UNKNOWN,	/* 15 */
195 	LINUX_T_UNKNOWN,	/* 16 */
196 	LINUX_T_UNKNOWN,	/* 17 */
197 	0,			/* 18 T_DIVIDE */
198 	2,			/* 19 T_NMI */
199 	4,			/* 20 T_OFLOW */
200 	5,			/* 21 T_BOUND */
201 	7,			/* 22 T_DNA */
202 	8,			/* 23 T_DOUBLEFLT */
203 	9,			/* 24 T_FPOPFLT */
204 	10,			/* 25 T_TSSFLT */
205 	11,			/* 26 T_SEGNPFLT */
206 	12,			/* 27 T_STKFLT */
207 	18,			/* 28 T_MCHK */
208 	19,			/* 29 T_XMMFLT */
209 	15			/* 30 T_RESERVED */
210 };
211 #define bsd_to_linux_trapcode(code) \
212     ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
213      _bsd_to_linux_trapcode[(code)]: \
214      LINUX_T_UNKNOWN)
215 
216 struct linux32_ps_strings {
217 	u_int32_t ps_argvstr;	/* first of 0 or more argument strings */
218 	u_int ps_nargvstr;	/* the number of argument strings */
219 	u_int32_t ps_envstr;	/* first of 0 or more environment strings */
220 	u_int ps_nenvstr;	/* the number of environment strings */
221 };
222 
223 /*
224  * If FreeBSD & Linux have a difference of opinion about what a trap
225  * means, deal with it here.
226  *
227  * MPSAFE
228  */
229 static int
230 translate_traps(int signal, int trap_code)
231 {
232 	if (signal != SIGBUS)
233 		return signal;
234 	switch (trap_code) {
235 	case T_PROTFLT:
236 	case T_TSSFLT:
237 	case T_DOUBLEFLT:
238 	case T_PAGEFLT:
239 		return SIGSEGV;
240 	default:
241 		return signal;
242 	}
243 }
244 
245 static int
246 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
247 {
248 	Elf32_Auxargs *args;
249 	Elf32_Addr *base;
250 	Elf32_Addr *pos, *uplatform;
251 	struct linux32_ps_strings *arginfo;
252 
253 	arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS;
254 	uplatform = (Elf32_Addr *)((caddr_t)arginfo - linux_szsigcode -
255 	    linux_szplatform);
256 
257 	KASSERT(curthread->td_proc == imgp->proc,
258 	    ("unsafe elf_linux_fixup(), should be curproc"));
259 	base = (Elf32_Addr *)*stack_base;
260 	args = (Elf32_Auxargs *)imgp->auxargs;
261 	pos = base + (imgp->args->argc + imgp->args->envc + 2);
262 
263 	AUXARGS_ENTRY_32(pos, LINUX_AT_HWCAP, cpu_feature);
264 
265 	/*
266 	 * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0,
267 	 * as it has appeared in the 2.4.0-rc7 first time.
268 	 * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK),
269 	 * glibc falls back to the hard-coded CLK_TCK value when aux entry
270 	 * is not present.
271 	 * Also see linux_times() implementation.
272 	 */
273 	if (linux_kernver(curthread) >= LINUX_KERNVER_2004000)
274 		AUXARGS_ENTRY_32(pos, LINUX_AT_CLKTCK, stclohz);
275 	AUXARGS_ENTRY_32(pos, AT_PHDR, args->phdr);
276 	AUXARGS_ENTRY_32(pos, AT_PHENT, args->phent);
277 	AUXARGS_ENTRY_32(pos, AT_PHNUM, args->phnum);
278 	AUXARGS_ENTRY_32(pos, AT_PAGESZ, args->pagesz);
279 	AUXARGS_ENTRY_32(pos, AT_FLAGS, args->flags);
280 	AUXARGS_ENTRY_32(pos, AT_ENTRY, args->entry);
281 	AUXARGS_ENTRY_32(pos, AT_BASE, args->base);
282 	AUXARGS_ENTRY_32(pos, LINUX_AT_SECURE, 0);
283 	AUXARGS_ENTRY_32(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
284 	AUXARGS_ENTRY_32(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
285 	AUXARGS_ENTRY_32(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
286 	AUXARGS_ENTRY_32(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
287 	AUXARGS_ENTRY_32(pos, LINUX_AT_PLATFORM, PTROUT(uplatform));
288 	if (args->execfd != -1)
289 		AUXARGS_ENTRY_32(pos, AT_EXECFD, args->execfd);
290 	AUXARGS_ENTRY_32(pos, AT_NULL, 0);
291 
292 	free(imgp->auxargs, M_TEMP);
293 	imgp->auxargs = NULL;
294 
295 	base--;
296 	suword32(base, (uint32_t)imgp->args->argc);
297 	*stack_base = (register_t *)base;
298 	return 0;
299 }
300 
301 extern unsigned long linux_sznonrtsigcode;
302 
303 static void
304 linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
305 {
306 	struct thread *td = curthread;
307 	struct proc *p = td->td_proc;
308 	struct sigacts *psp;
309 	struct trapframe *regs;
310 	struct l_rt_sigframe *fp, frame;
311 	int oonstack;
312 	int sig;
313 	int code;
314 
315 	sig = ksi->ksi_signo;
316 	code = ksi->ksi_code;
317 	PROC_LOCK_ASSERT(p, MA_OWNED);
318 	psp = p->p_sigacts;
319 	mtx_assert(&psp->ps_mtx, MA_OWNED);
320 	regs = td->td_frame;
321 	oonstack = sigonstack(regs->tf_rsp);
322 
323 #ifdef DEBUG
324 	if (ldebug(rt_sendsig))
325 		printf(ARGS(rt_sendsig, "%p, %d, %p, %u"),
326 		    catcher, sig, (void*)mask, code);
327 #endif
328 	/*
329 	 * Allocate space for the signal handler context.
330 	 */
331 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
332 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
333 		fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
334 		    td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
335 	} else
336 		fp = (struct l_rt_sigframe *)regs->tf_rsp - 1;
337 	mtx_unlock(&psp->ps_mtx);
338 
339 	/*
340 	 * Build the argument list for the signal handler.
341 	 */
342 	if (p->p_sysent->sv_sigtbl)
343 		if (sig <= p->p_sysent->sv_sigsize)
344 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
345 
346 	bzero(&frame, sizeof(frame));
347 
348 	frame.sf_handler = PTROUT(catcher);
349 	frame.sf_sig = sig;
350 	frame.sf_siginfo = PTROUT(&fp->sf_si);
351 	frame.sf_ucontext = PTROUT(&fp->sf_sc);
352 
353 	/* Fill in POSIX parts */
354 	ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig);
355 
356 	/*
357 	 * Build the signal context to be used by sigreturn.
358 	 */
359 	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
360 	frame.sf_sc.uc_link = 0;		/* XXX ??? */
361 
362 	frame.sf_sc.uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp);
363 	frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
364 	frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
365 	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
366 	PROC_UNLOCK(p);
367 
368 	bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
369 
370 	frame.sf_sc.uc_mcontext.sc_mask   = frame.sf_sc.uc_sigmask.__bits[0];
371 	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_rdi;
372 	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_rsi;
373 	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_rbp;
374 	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_rbx;
375 	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_rdx;
376 	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_rcx;
377 	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_rax;
378 	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_rip;
379 	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
380 	frame.sf_sc.uc_mcontext.sc_gs     = regs->tf_gs;
381 	frame.sf_sc.uc_mcontext.sc_fs     = regs->tf_fs;
382 	frame.sf_sc.uc_mcontext.sc_es     = regs->tf_es;
383 	frame.sf_sc.uc_mcontext.sc_ds     = regs->tf_ds;
384 	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_rflags;
385 	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_rsp;
386 	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
387 	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
388 	frame.sf_sc.uc_mcontext.sc_cr2    = (u_int32_t)(uintptr_t)ksi->ksi_addr;
389 	frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
390 
391 #ifdef DEBUG
392 	if (ldebug(rt_sendsig))
393 		printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
394 		    frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
395 		    td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
396 #endif
397 
398 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
399 		/*
400 		 * Process has trashed its stack; give it an illegal
401 		 * instruction to halt it in its tracks.
402 		 */
403 #ifdef DEBUG
404 		if (ldebug(rt_sendsig))
405 			printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
406 			    fp, oonstack);
407 #endif
408 		PROC_LOCK(p);
409 		sigexit(td, SIGILL);
410 	}
411 
412 	/*
413 	 * Build context to run handler in.
414 	 */
415 	regs->tf_rsp = PTROUT(fp);
416 	regs->tf_rip = LINUX32_PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
417 	    linux_sznonrtsigcode;
418 	regs->tf_rflags &= ~(PSL_T | PSL_D);
419 	regs->tf_cs = _ucode32sel;
420 	regs->tf_ss = _udatasel;
421 	regs->tf_ds = _udatasel;
422 	regs->tf_es = _udatasel;
423 	regs->tf_fs = _ufssel;
424 	regs->tf_gs = _ugssel;
425 	regs->tf_flags = TF_HASSEGS;
426 	PROC_LOCK(p);
427 	mtx_lock(&psp->ps_mtx);
428 }
429 
430 
431 /*
432  * Send an interrupt to process.
433  *
434  * Stack is set up to allow sigcode stored
435  * in u. to call routine, followed by kcall
436  * to sigreturn routine below.  After sigreturn
437  * resets the signal mask, the stack, and the
438  * frame pointer, it returns to the user
439  * specified pc, psl.
440  */
441 static void
442 linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
443 {
444 	struct thread *td = curthread;
445 	struct proc *p = td->td_proc;
446 	struct sigacts *psp;
447 	struct trapframe *regs;
448 	struct l_sigframe *fp, frame;
449 	l_sigset_t lmask;
450 	int oonstack, i;
451 	int sig, code;
452 
453 	sig = ksi->ksi_signo;
454 	code = ksi->ksi_code;
455 	PROC_LOCK_ASSERT(p, MA_OWNED);
456 	psp = p->p_sigacts;
457 	mtx_assert(&psp->ps_mtx, MA_OWNED);
458 	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
459 		/* Signal handler installed with SA_SIGINFO. */
460 		linux_rt_sendsig(catcher, ksi, mask);
461 		return;
462 	}
463 
464 	regs = td->td_frame;
465 	oonstack = sigonstack(regs->tf_rsp);
466 
467 #ifdef DEBUG
468 	if (ldebug(sendsig))
469 		printf(ARGS(sendsig, "%p, %d, %p, %u"),
470 		    catcher, sig, (void*)mask, code);
471 #endif
472 
473 	/*
474 	 * Allocate space for the signal handler context.
475 	 */
476 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
477 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
478 		fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
479 		    td->td_sigstk.ss_size - sizeof(struct l_sigframe));
480 	} else
481 		fp = (struct l_sigframe *)regs->tf_rsp - 1;
482 	mtx_unlock(&psp->ps_mtx);
483 	PROC_UNLOCK(p);
484 
485 	/*
486 	 * Build the argument list for the signal handler.
487 	 */
488 	if (p->p_sysent->sv_sigtbl)
489 		if (sig <= p->p_sysent->sv_sigsize)
490 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
491 
492 	bzero(&frame, sizeof(frame));
493 
494 	frame.sf_handler = PTROUT(catcher);
495 	frame.sf_sig = sig;
496 
497 	bsd_to_linux_sigset(mask, &lmask);
498 
499 	/*
500 	 * Build the signal context to be used by sigreturn.
501 	 */
502 	frame.sf_sc.sc_mask   = lmask.__bits[0];
503 	frame.sf_sc.sc_gs     = regs->tf_gs;
504 	frame.sf_sc.sc_fs     = regs->tf_fs;
505 	frame.sf_sc.sc_es     = regs->tf_es;
506 	frame.sf_sc.sc_ds     = regs->tf_ds;
507 	frame.sf_sc.sc_edi    = regs->tf_rdi;
508 	frame.sf_sc.sc_esi    = regs->tf_rsi;
509 	frame.sf_sc.sc_ebp    = regs->tf_rbp;
510 	frame.sf_sc.sc_ebx    = regs->tf_rbx;
511 	frame.sf_sc.sc_edx    = regs->tf_rdx;
512 	frame.sf_sc.sc_ecx    = regs->tf_rcx;
513 	frame.sf_sc.sc_eax    = regs->tf_rax;
514 	frame.sf_sc.sc_eip    = regs->tf_rip;
515 	frame.sf_sc.sc_cs     = regs->tf_cs;
516 	frame.sf_sc.sc_eflags = regs->tf_rflags;
517 	frame.sf_sc.sc_esp_at_signal = regs->tf_rsp;
518 	frame.sf_sc.sc_ss     = regs->tf_ss;
519 	frame.sf_sc.sc_err    = regs->tf_err;
520 	frame.sf_sc.sc_cr2    = (u_int32_t)(uintptr_t)ksi->ksi_addr;
521 	frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code);
522 
523 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
524 		frame.sf_extramask[i] = lmask.__bits[i+1];
525 
526 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
527 		/*
528 		 * Process has trashed its stack; give it an illegal
529 		 * instruction to halt it in its tracks.
530 		 */
531 		PROC_LOCK(p);
532 		sigexit(td, SIGILL);
533 	}
534 
535 	/*
536 	 * Build context to run handler in.
537 	 */
538 	regs->tf_rsp = PTROUT(fp);
539 	regs->tf_rip = LINUX32_PS_STRINGS - *(p->p_sysent->sv_szsigcode);
540 	regs->tf_rflags &= ~(PSL_T | PSL_D);
541 	regs->tf_cs = _ucode32sel;
542 	regs->tf_ss = _udatasel;
543 	regs->tf_ds = _udatasel;
544 	regs->tf_es = _udatasel;
545 	regs->tf_fs = _ufssel;
546 	regs->tf_gs = _ugssel;
547 	regs->tf_flags = TF_HASSEGS;
548 	PROC_LOCK(p);
549 	mtx_lock(&psp->ps_mtx);
550 }
551 
552 /*
553  * System call to cleanup state after a signal
554  * has been taken.  Reset signal mask and
555  * stack state from context left by sendsig (above).
556  * Return to previous pc and psl as specified by
557  * context left by sendsig. Check carefully to
558  * make sure that the user has not modified the
559  * psl to gain improper privileges or to cause
560  * a machine fault.
561  */
562 int
563 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
564 {
565 	struct proc *p = td->td_proc;
566 	struct l_sigframe frame;
567 	struct trapframe *regs;
568 	l_sigset_t lmask;
569 	int eflags, i;
570 	ksiginfo_t ksi;
571 
572 	regs = td->td_frame;
573 
574 #ifdef DEBUG
575 	if (ldebug(sigreturn))
576 		printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
577 #endif
578 	/*
579 	 * The trampoline code hands us the sigframe.
580 	 * It is unsafe to keep track of it ourselves, in the event that a
581 	 * program jumps out of a signal handler.
582 	 */
583 	if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
584 		return (EFAULT);
585 
586 	/*
587 	 * Check for security violations.
588 	 */
589 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
590 	eflags = frame.sf_sc.sc_eflags;
591 	/*
592 	 * XXX do allow users to change the privileged flag PSL_RF.  The
593 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
594 	 * sometimes set it there too.  tf_eflags is kept in the signal
595 	 * context during signal handling and there is no other place
596 	 * to remember it, so the PSL_RF bit may be corrupted by the
597 	 * signal handler without us knowing.  Corruption of the PSL_RF
598 	 * bit at worst causes one more or one less debugger trap, so
599 	 * allowing it is fairly harmless.
600 	 */
601 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF))
602 		return(EINVAL);
603 
604 	/*
605 	 * Don't allow users to load a valid privileged %cs.  Let the
606 	 * hardware check for invalid selectors, excess privilege in
607 	 * other selectors, invalid %eip's and invalid %esp's.
608 	 */
609 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
610 	if (!CS_SECURE(frame.sf_sc.sc_cs)) {
611 		ksiginfo_init_trap(&ksi);
612 		ksi.ksi_signo = SIGBUS;
613 		ksi.ksi_code = BUS_OBJERR;
614 		ksi.ksi_trapno = T_PROTFLT;
615 		ksi.ksi_addr = (void *)regs->tf_rip;
616 		trapsignal(td, &ksi);
617 		return(EINVAL);
618 	}
619 
620 	lmask.__bits[0] = frame.sf_sc.sc_mask;
621 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
622 		lmask.__bits[i+1] = frame.sf_extramask[i];
623 	PROC_LOCK(p);
624 	linux_to_bsd_sigset(&lmask, &td->td_sigmask);
625 	SIG_CANTMASK(td->td_sigmask);
626 	signotify(td);
627 	PROC_UNLOCK(p);
628 
629 	/*
630 	 * Restore signal context.
631 	 */
632 	regs->tf_rdi    = frame.sf_sc.sc_edi;
633 	regs->tf_rsi    = frame.sf_sc.sc_esi;
634 	regs->tf_rbp    = frame.sf_sc.sc_ebp;
635 	regs->tf_rbx    = frame.sf_sc.sc_ebx;
636 	regs->tf_rdx    = frame.sf_sc.sc_edx;
637 	regs->tf_rcx    = frame.sf_sc.sc_ecx;
638 	regs->tf_rax    = frame.sf_sc.sc_eax;
639 	regs->tf_rip    = frame.sf_sc.sc_eip;
640 	regs->tf_cs     = frame.sf_sc.sc_cs;
641 	regs->tf_ds     = frame.sf_sc.sc_ds;
642 	regs->tf_es     = frame.sf_sc.sc_es;
643 	regs->tf_fs     = frame.sf_sc.sc_fs;
644 	regs->tf_gs     = frame.sf_sc.sc_gs;
645 	regs->tf_rflags = eflags;
646 	regs->tf_rsp    = frame.sf_sc.sc_esp_at_signal;
647 	regs->tf_ss     = frame.sf_sc.sc_ss;
648 
649 	return (EJUSTRETURN);
650 }
651 
652 /*
653  * System call to cleanup state after a signal
654  * has been taken.  Reset signal mask and
655  * stack state from context left by rt_sendsig (above).
656  * Return to previous pc and psl as specified by
657  * context left by sendsig. Check carefully to
658  * make sure that the user has not modified the
659  * psl to gain improper privileges or to cause
660  * a machine fault.
661  */
662 int
663 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
664 {
665 	struct proc *p = td->td_proc;
666 	struct l_ucontext uc;
667 	struct l_sigcontext *context;
668 	l_stack_t *lss;
669 	stack_t ss;
670 	struct trapframe *regs;
671 	int eflags;
672 	ksiginfo_t ksi;
673 
674 	regs = td->td_frame;
675 
676 #ifdef DEBUG
677 	if (ldebug(rt_sigreturn))
678 		printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
679 #endif
680 	/*
681 	 * The trampoline code hands us the ucontext.
682 	 * It is unsafe to keep track of it ourselves, in the event that a
683 	 * program jumps out of a signal handler.
684 	 */
685 	if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
686 		return (EFAULT);
687 
688 	context = &uc.uc_mcontext;
689 
690 	/*
691 	 * Check for security violations.
692 	 */
693 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
694 	eflags = context->sc_eflags;
695 	/*
696 	 * XXX do allow users to change the privileged flag PSL_RF.  The
697 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
698 	 * sometimes set it there too.  tf_eflags is kept in the signal
699 	 * context during signal handling and there is no other place
700 	 * to remember it, so the PSL_RF bit may be corrupted by the
701 	 * signal handler without us knowing.  Corruption of the PSL_RF
702 	 * bit at worst causes one more or one less debugger trap, so
703 	 * allowing it is fairly harmless.
704 	 */
705 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF))
706 		return(EINVAL);
707 
708 	/*
709 	 * Don't allow users to load a valid privileged %cs.  Let the
710 	 * hardware check for invalid selectors, excess privilege in
711 	 * other selectors, invalid %eip's and invalid %esp's.
712 	 */
713 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
714 	if (!CS_SECURE(context->sc_cs)) {
715 		ksiginfo_init_trap(&ksi);
716 		ksi.ksi_signo = SIGBUS;
717 		ksi.ksi_code = BUS_OBJERR;
718 		ksi.ksi_trapno = T_PROTFLT;
719 		ksi.ksi_addr = (void *)regs->tf_rip;
720 		trapsignal(td, &ksi);
721 		return(EINVAL);
722 	}
723 
724 	PROC_LOCK(p);
725 	linux_to_bsd_sigset(&uc.uc_sigmask, &td->td_sigmask);
726 	SIG_CANTMASK(td->td_sigmask);
727 	signotify(td);
728 	PROC_UNLOCK(p);
729 
730 	/*
731 	 * Restore signal context
732 	 */
733 	regs->tf_gs	= context->sc_gs;
734 	regs->tf_fs	= context->sc_fs;
735 	regs->tf_es	= context->sc_es;
736 	regs->tf_ds	= context->sc_ds;
737 	regs->tf_rdi    = context->sc_edi;
738 	regs->tf_rsi    = context->sc_esi;
739 	regs->tf_rbp    = context->sc_ebp;
740 	regs->tf_rbx    = context->sc_ebx;
741 	regs->tf_rdx    = context->sc_edx;
742 	regs->tf_rcx    = context->sc_ecx;
743 	regs->tf_rax    = context->sc_eax;
744 	regs->tf_rip    = context->sc_eip;
745 	regs->tf_cs     = context->sc_cs;
746 	regs->tf_rflags = eflags;
747 	regs->tf_rsp    = context->sc_esp_at_signal;
748 	regs->tf_ss     = context->sc_ss;
749 
750 	/*
751 	 * call sigaltstack & ignore results..
752 	 */
753 	lss = &uc.uc_stack;
754 	ss.ss_sp = PTRIN(lss->ss_sp);
755 	ss.ss_size = lss->ss_size;
756 	ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
757 
758 #ifdef DEBUG
759 	if (ldebug(rt_sigreturn))
760 		printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
761 		    ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
762 #endif
763 	(void)kern_sigaltstack(td, &ss, NULL);
764 
765 	return (EJUSTRETURN);
766 }
767 
768 /*
769  * MPSAFE
770  */
771 static void
772 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
773 {
774 	args[0] = tf->tf_rbx;
775 	args[1] = tf->tf_rcx;
776 	args[2] = tf->tf_rdx;
777 	args[3] = tf->tf_rsi;
778 	args[4] = tf->tf_rdi;
779 	args[5] = tf->tf_rbp;	/* Unconfirmed */
780 	*params = NULL;		/* no copyin */
781 }
782 
783 /*
784  * If a linux binary is exec'ing something, try this image activator
785  * first.  We override standard shell script execution in order to
786  * be able to modify the interpreter path.  We only do this if a linux
787  * binary is doing the exec, so we do not create an EXEC module for it.
788  */
789 static int	exec_linux_imgact_try(struct image_params *iparams);
790 
791 static int
792 exec_linux_imgact_try(struct image_params *imgp)
793 {
794 	const char *head = (const char *)imgp->image_header;
795 	char *rpath;
796 	int error = -1, len;
797 
798 	/*
799 	* The interpreter for shell scripts run from a linux binary needs
800 	* to be located in /compat/linux if possible in order to recursively
801 	* maintain linux path emulation.
802 	*/
803 	if (((const short *)head)[0] == SHELLMAGIC) {
804 		/*
805 		* Run our normal shell image activator.  If it succeeds attempt
806 		* to use the alternate path for the interpreter.  If an
807 		* alternate * path is found, use our stringspace to store it.
808 		*/
809 		if ((error = exec_shell_imgact(imgp)) == 0) {
810 			linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
811 			    imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0,
812 			    AT_FDCWD);
813 			if (rpath != NULL) {
814 				len = strlen(rpath) + 1;
815 
816 				if (len <= MAXSHELLCMDLEN) {
817 					memcpy(imgp->interpreter_name, rpath,
818 					    len);
819 				}
820 				free(rpath, M_TEMP);
821 			}
822 		}
823 	}
824 	return(error);
825 }
826 
827 /*
828  * Clear registers on exec
829  * XXX copied from ia32_signal.c.
830  */
831 static void
832 exec_linux_setregs(td, entry, stack, ps_strings)
833 	struct thread *td;
834 	u_long entry;
835 	u_long stack;
836 	u_long ps_strings;
837 {
838 	struct trapframe *regs = td->td_frame;
839 	struct pcb *pcb = td->td_pcb;
840 
841 	mtx_lock(&dt_lock);
842 	if (td->td_proc->p_md.md_ldt != NULL)
843 		user_ldt_free(td);
844 	else
845 		mtx_unlock(&dt_lock);
846 
847 	critical_enter();
848 	wrmsr(MSR_FSBASE, 0);
849 	wrmsr(MSR_KGSBASE, 0);	/* User value while we're in the kernel */
850 	pcb->pcb_fsbase = 0;
851 	pcb->pcb_gsbase = 0;
852 	critical_exit();
853 	pcb->pcb_initial_fpucw = __LINUX_NPXCW__;
854 
855 	bzero((char *)regs, sizeof(struct trapframe));
856 	regs->tf_rip = entry;
857 	regs->tf_rsp = stack;
858 	regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T);
859 	regs->tf_gs = _ugssel;
860 	regs->tf_fs = _ufssel;
861 	regs->tf_es = _udatasel;
862 	regs->tf_ds = _udatasel;
863 	regs->tf_ss = _udatasel;
864 	regs->tf_flags = TF_HASSEGS;
865 	regs->tf_cs = _ucode32sel;
866 	regs->tf_rbx = ps_strings;
867 	load_cr0(rcr0() | CR0_MP | CR0_TS);
868 	fpstate_drop(td);
869 
870 	/* Return via doreti so that we can change to a different %cs */
871 	pcb->pcb_flags |= PCB_FULLCTX | PCB_32BIT;
872 	pcb->pcb_flags &= ~PCB_GS32BIT;
873 	td->td_retval[1] = 0;
874 }
875 
876 /*
877  * XXX copied from ia32_sysvec.c.
878  */
879 static register_t *
880 linux_copyout_strings(struct image_params *imgp)
881 {
882 	int argc, envc;
883 	u_int32_t *vectp;
884 	char *stringp, *destp;
885 	u_int32_t *stack_base;
886 	struct linux32_ps_strings *arginfo;
887 
888 	/*
889 	 * Calculate string base and vector table pointers.
890 	 * Also deal with signal trampoline code for this exec type.
891 	 */
892 	arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS;
893 	destp =	(caddr_t)arginfo - linux_szsigcode - SPARE_USRSPACE -
894 	    linux_szplatform - roundup((ARG_MAX - imgp->args->stringspace),
895 	    sizeof(char *));
896 
897 	/*
898 	 * install sigcode
899 	 */
900 	copyout(imgp->proc->p_sysent->sv_sigcode,
901 	    ((caddr_t)arginfo - linux_szsigcode), linux_szsigcode);
902 
903 	/*
904 	 * Install LINUX_PLATFORM
905 	 */
906 	copyout(linux_platform, ((caddr_t)arginfo - linux_szsigcode -
907 	    linux_szplatform), linux_szplatform);
908 
909 	/*
910 	 * If we have a valid auxargs ptr, prepare some room
911 	 * on the stack.
912 	 */
913 	if (imgp->auxargs) {
914 		/*
915 		 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
916 		 * lower compatibility.
917 		 */
918 		imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size :
919 		    (LINUX_AT_COUNT * 2);
920 		/*
921 		 * The '+ 2' is for the null pointers at the end of each of
922 		 * the arg and env vector sets,and imgp->auxarg_size is room
923 		 * for argument of Runtime loader.
924 		 */
925 		vectp = (u_int32_t *) (destp - (imgp->args->argc +
926 		    imgp->args->envc + 2 + imgp->auxarg_size) *
927 		    sizeof(u_int32_t));
928 
929 	} else
930 		/*
931 		 * The '+ 2' is for the null pointers at the end of each of
932 		 * the arg and env vector sets
933 		 */
934 		vectp = (u_int32_t *)(destp - (imgp->args->argc +
935 		    imgp->args->envc + 2) * sizeof(u_int32_t));
936 
937 	/*
938 	 * vectp also becomes our initial stack base
939 	 */
940 	stack_base = vectp;
941 
942 	stringp = imgp->args->begin_argv;
943 	argc = imgp->args->argc;
944 	envc = imgp->args->envc;
945 	/*
946 	 * Copy out strings - arguments and environment.
947 	 */
948 	copyout(stringp, destp, ARG_MAX - imgp->args->stringspace);
949 
950 	/*
951 	 * Fill in "ps_strings" struct for ps, w, etc.
952 	 */
953 	suword32(&arginfo->ps_argvstr, (uint32_t)(intptr_t)vectp);
954 	suword32(&arginfo->ps_nargvstr, argc);
955 
956 	/*
957 	 * Fill in argument portion of vector table.
958 	 */
959 	for (; argc > 0; --argc) {
960 		suword32(vectp++, (uint32_t)(intptr_t)destp);
961 		while (*stringp++ != 0)
962 			destp++;
963 		destp++;
964 	}
965 
966 	/* a null vector table pointer separates the argp's from the envp's */
967 	suword32(vectp++, 0);
968 
969 	suword32(&arginfo->ps_envstr, (uint32_t)(intptr_t)vectp);
970 	suword32(&arginfo->ps_nenvstr, envc);
971 
972 	/*
973 	 * Fill in environment portion of vector table.
974 	 */
975 	for (; envc > 0; --envc) {
976 		suword32(vectp++, (uint32_t)(intptr_t)destp);
977 		while (*stringp++ != 0)
978 			destp++;
979 		destp++;
980 	}
981 
982 	/* end of vector table is a null pointer */
983 	suword32(vectp, 0);
984 
985 	return ((register_t *)stack_base);
986 }
987 
988 SYSCTL_NODE(_compat, OID_AUTO, linux32, CTLFLAG_RW, 0,
989     "32-bit Linux emulation");
990 
991 static u_long	linux32_maxdsiz = LINUX32_MAXDSIZ;
992 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxdsiz, CTLFLAG_RW,
993     &linux32_maxdsiz, 0, "");
994 static u_long	linux32_maxssiz = LINUX32_MAXSSIZ;
995 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxssiz, CTLFLAG_RW,
996     &linux32_maxssiz, 0, "");
997 static u_long	linux32_maxvmem = LINUX32_MAXVMEM;
998 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxvmem, CTLFLAG_RW,
999     &linux32_maxvmem, 0, "");
1000 
1001 static void
1002 linux32_fixlimit(struct rlimit *rl, int which)
1003 {
1004 
1005 	switch (which) {
1006 	case RLIMIT_DATA:
1007 		if (linux32_maxdsiz != 0) {
1008 			if (rl->rlim_cur > linux32_maxdsiz)
1009 				rl->rlim_cur = linux32_maxdsiz;
1010 			if (rl->rlim_max > linux32_maxdsiz)
1011 				rl->rlim_max = linux32_maxdsiz;
1012 		}
1013 		break;
1014 	case RLIMIT_STACK:
1015 		if (linux32_maxssiz != 0) {
1016 			if (rl->rlim_cur > linux32_maxssiz)
1017 				rl->rlim_cur = linux32_maxssiz;
1018 			if (rl->rlim_max > linux32_maxssiz)
1019 				rl->rlim_max = linux32_maxssiz;
1020 		}
1021 		break;
1022 	case RLIMIT_VMEM:
1023 		if (linux32_maxvmem != 0) {
1024 			if (rl->rlim_cur > linux32_maxvmem)
1025 				rl->rlim_cur = linux32_maxvmem;
1026 			if (rl->rlim_max > linux32_maxvmem)
1027 				rl->rlim_max = linux32_maxvmem;
1028 		}
1029 		break;
1030 	}
1031 }
1032 
1033 struct sysentvec elf_linux_sysvec = {
1034 	.sv_size	= LINUX_SYS_MAXSYSCALL,
1035 	.sv_table	= linux_sysent,
1036 	.sv_mask	= 0,
1037 	.sv_sigsize	= LINUX_SIGTBLSZ,
1038 	.sv_sigtbl	= bsd_to_linux_signal,
1039 	.sv_errsize	= ELAST + 1,
1040 	.sv_errtbl	= bsd_to_linux_errno,
1041 	.sv_transtrap	= translate_traps,
1042 	.sv_fixup	= elf_linux_fixup,
1043 	.sv_sendsig	= linux_sendsig,
1044 	.sv_sigcode	= linux_sigcode,
1045 	.sv_szsigcode	= &linux_szsigcode,
1046 	.sv_prepsyscall	= linux_prepsyscall,
1047 	.sv_name	= "Linux ELF32",
1048 	.sv_coredump	= elf32_coredump,
1049 	.sv_imgact_try	= exec_linux_imgact_try,
1050 	.sv_minsigstksz	= LINUX_MINSIGSTKSZ,
1051 	.sv_pagesize	= PAGE_SIZE,
1052 	.sv_minuser	= VM_MIN_ADDRESS,
1053 	.sv_maxuser	= LINUX32_USRSTACK,
1054 	.sv_usrstack	= LINUX32_USRSTACK,
1055 	.sv_psstrings	= LINUX32_PS_STRINGS,
1056 	.sv_stackprot	= VM_PROT_ALL,
1057 	.sv_copyout_strings = linux_copyout_strings,
1058 	.sv_setregs	= exec_linux_setregs,
1059 	.sv_fixlimit	= linux32_fixlimit,
1060 	.sv_maxssiz	= &linux32_maxssiz,
1061 	.sv_flags	= SV_ABI_LINUX | SV_ILP32 | SV_IA32
1062 };
1063 
1064 static char GNULINUX_ABI_VENDOR[] = "GNU";
1065 
1066 static Elf_Brandnote linux32_brandnote = {
1067 	.hdr.n_namesz	= sizeof(GNULINUX_ABI_VENDOR),
1068 	.hdr.n_descsz	= 16,
1069 	.hdr.n_type	= 1,
1070 	.vendor		= GNULINUX_ABI_VENDOR,
1071 	.flags		= 0
1072 };
1073 
1074 static Elf32_Brandinfo linux_brand = {
1075 	.brand		= ELFOSABI_LINUX,
1076 	.machine	= EM_386,
1077 	.compat_3_brand	= "Linux",
1078 	.emul_path	= "/compat/linux",
1079 	.interp_path	= "/lib/ld-linux.so.1",
1080 	.sysvec		= &elf_linux_sysvec,
1081 	.interp_newpath	= NULL,
1082 	.brand_note	= &linux32_brandnote,
1083 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1084 };
1085 
1086 static Elf32_Brandinfo linux_glibc2brand = {
1087 	.brand		= ELFOSABI_LINUX,
1088 	.machine	= EM_386,
1089 	.compat_3_brand	= "Linux",
1090 	.emul_path	= "/compat/linux",
1091 	.interp_path	= "/lib/ld-linux.so.2",
1092 	.sysvec		= &elf_linux_sysvec,
1093 	.interp_newpath	= NULL,
1094 	.brand_note	= &linux32_brandnote,
1095 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1096 };
1097 
1098 Elf32_Brandinfo *linux_brandlist[] = {
1099 	&linux_brand,
1100 	&linux_glibc2brand,
1101 	NULL
1102 };
1103 
1104 static int
1105 linux_elf_modevent(module_t mod, int type, void *data)
1106 {
1107 	Elf32_Brandinfo **brandinfo;
1108 	int error;
1109 	struct linux_ioctl_handler **lihp;
1110 	struct linux_device_handler **ldhp;
1111 
1112 	error = 0;
1113 
1114 	switch(type) {
1115 	case MOD_LOAD:
1116 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1117 		     ++brandinfo)
1118 			if (elf32_insert_brand_entry(*brandinfo) < 0)
1119 				error = EINVAL;
1120 		if (error == 0) {
1121 			SET_FOREACH(lihp, linux_ioctl_handler_set)
1122 				linux_ioctl_register_handler(*lihp);
1123 			SET_FOREACH(ldhp, linux_device_handler_set)
1124 				linux_device_register_handler(*ldhp);
1125 			mtx_init(&emul_lock, "emuldata lock", NULL, MTX_DEF);
1126 			sx_init(&emul_shared_lock, "emuldata->shared lock");
1127 			LIST_INIT(&futex_list);
1128 			mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF);
1129 			linux_exit_tag = EVENTHANDLER_REGISTER(process_exit,
1130 			    linux_proc_exit, NULL, 1000);
1131 			linux_schedtail_tag = EVENTHANDLER_REGISTER(schedtail,
1132 			    linux_schedtail, NULL, 1000);
1133 			linux_exec_tag = EVENTHANDLER_REGISTER(process_exec,
1134 			    linux_proc_exec, NULL, 1000);
1135 			linux_szplatform = roundup(strlen(linux_platform) + 1,
1136 			    sizeof(char *));
1137 			linux_osd_jail_register();
1138 			stclohz = (stathz ? stathz : hz);
1139 			if (bootverbose)
1140 				printf("Linux ELF exec handler installed\n");
1141 		} else
1142 			printf("cannot insert Linux ELF brand handler\n");
1143 		break;
1144 	case MOD_UNLOAD:
1145 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1146 		     ++brandinfo)
1147 			if (elf32_brand_inuse(*brandinfo))
1148 				error = EBUSY;
1149 		if (error == 0) {
1150 			for (brandinfo = &linux_brandlist[0];
1151 			     *brandinfo != NULL; ++brandinfo)
1152 				if (elf32_remove_brand_entry(*brandinfo) < 0)
1153 					error = EINVAL;
1154 		}
1155 		if (error == 0) {
1156 			SET_FOREACH(lihp, linux_ioctl_handler_set)
1157 				linux_ioctl_unregister_handler(*lihp);
1158 			SET_FOREACH(ldhp, linux_device_handler_set)
1159 				linux_device_unregister_handler(*ldhp);
1160 			mtx_destroy(&emul_lock);
1161 			sx_destroy(&emul_shared_lock);
1162 			mtx_destroy(&futex_mtx);
1163 			EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag);
1164 			EVENTHANDLER_DEREGISTER(schedtail, linux_schedtail_tag);
1165 			EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag);
1166 			linux_osd_jail_deregister();
1167 			if (bootverbose)
1168 				printf("Linux ELF exec handler removed\n");
1169 		} else
1170 			printf("Could not deinstall ELF interpreter entry\n");
1171 		break;
1172 	default:
1173 		return EOPNOTSUPP;
1174 	}
1175 	return error;
1176 }
1177 
1178 static moduledata_t linux_elf_mod = {
1179 	"linuxelf",
1180 	linux_elf_modevent,
1181 	0
1182 };
1183 
1184 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
1185