xref: /freebsd/sys/amd64/linux32/linux32_sysvec.c (revision 10f0bcab61ef441cb5af32fb706688d8cbd55dc0)
1 /*-
2  * Copyright (c) 2004 Tim J. Robbins
3  * Copyright (c) 2003 Peter Wemm
4  * Copyright (c) 2002 Doug Rabson
5  * Copyright (c) 1998-1999 Andrew Gallatin
6  * Copyright (c) 1994-1996 S�ren Schmidt
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer
14  *    in this position and unchanged.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. The name of the author may not be used to endorse or promote products
19  *    derived from this software without specific prior written permission
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 #include <sys/cdefs.h>
34 __FBSDID("$FreeBSD$");
35 #include "opt_compat.h"
36 
37 #ifndef COMPAT_IA32
38 #error "Unable to compile Linux-emulator due to missing COMPAT_IA32 option!"
39 #endif
40 
41 #define	__ELF_WORD_SIZE	32
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/exec.h>
46 #include <sys/imgact.h>
47 #include <sys/imgact_elf.h>
48 #include <sys/kernel.h>
49 #include <sys/lock.h>
50 #include <sys/malloc.h>
51 #include <sys/module.h>
52 #include <sys/mutex.h>
53 #include <sys/proc.h>
54 #include <sys/resourcevar.h>
55 #include <sys/signalvar.h>
56 #include <sys/sysctl.h>
57 #include <sys/syscallsubr.h>
58 #include <sys/sysent.h>
59 #include <sys/sysproto.h>
60 #include <sys/vnode.h>
61 #include <sys/eventhandler.h>
62 
63 #include <vm/vm.h>
64 #include <vm/pmap.h>
65 #include <vm/vm_extern.h>
66 #include <vm/vm_map.h>
67 #include <vm/vm_object.h>
68 #include <vm/vm_page.h>
69 #include <vm/vm_param.h>
70 
71 #include <machine/cpu.h>
72 #include <machine/md_var.h>
73 #include <machine/pcb.h>
74 #include <machine/specialreg.h>
75 
76 #include <amd64/linux32/linux.h>
77 #include <amd64/linux32/linux32_proto.h>
78 #include <compat/linux/linux_emul.h>
79 #include <compat/linux/linux_mib.h>
80 #include <compat/linux/linux_signal.h>
81 #include <compat/linux/linux_util.h>
82 
83 MODULE_VERSION(linux, 1);
84 
85 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
86 
87 #define	AUXARGS_ENTRY_32(pos, id, val)	\
88 	do {				\
89 		suword32(pos++, id);	\
90 		suword32(pos++, val);	\
91 	} while (0)
92 
93 #if BYTE_ORDER == LITTLE_ENDIAN
94 #define SHELLMAGIC      0x2123 /* #! */
95 #else
96 #define SHELLMAGIC      0x2321
97 #endif
98 
99 /*
100  * Allow the sendsig functions to use the ldebug() facility
101  * even though they are not syscalls themselves. Map them
102  * to syscall 0. This is slightly less bogus than using
103  * ldebug(sigreturn).
104  */
105 #define	LINUX_SYS_linux_rt_sendsig	0
106 #define	LINUX_SYS_linux_sendsig		0
107 
108 extern char linux_sigcode[];
109 extern int linux_szsigcode;
110 
111 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
112 
113 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
114 SET_DECLARE(linux_device_handler_set, struct linux_device_handler);
115 
116 static int	elf_linux_fixup(register_t **stack_base,
117 		    struct image_params *iparams);
118 static register_t *linux_copyout_strings(struct image_params *imgp);
119 static void	linux_prepsyscall(struct trapframe *tf, int *args, u_int *code,
120 		    caddr_t *params);
121 static void     linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
122 static void	exec_linux_setregs(struct thread *td, u_long entry,
123 				   u_long stack, u_long ps_strings);
124 static void	linux32_fixlimit(struct rlimit *rl, int which);
125 
126 extern LIST_HEAD(futex_list, futex) futex_list;
127 extern struct sx futex_sx;
128 
129 static eventhandler_tag linux_exit_tag;
130 static eventhandler_tag linux_schedtail_tag;
131 static eventhandler_tag linux_exec_tag;
132 
133 /*
134  * Linux syscalls return negative errno's, we do positive and map them
135  * Reference:
136  *   FreeBSD: src/sys/sys/errno.h
137  *   Linux:   linux-2.6.17.8/include/asm-generic/errno-base.h
138  *            linux-2.6.17.8/include/asm-generic/errno.h
139  */
140 static int bsd_to_linux_errno[ELAST + 1] = {
141 	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
142 	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
143 	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
144 	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
145 	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
146 	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
147 	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
148 	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
149 	  -6,  -6, -43, -42, -75,-125, -84, -95, -16, -74,
150 	 -72, -67, -71
151 };
152 
153 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
154 	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
155 	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
156 	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
157 	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
158 	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
159 	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
160 	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
161 	0, LINUX_SIGUSR1, LINUX_SIGUSR2
162 };
163 
164 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
165 	SIGHUP, SIGINT, SIGQUIT, SIGILL,
166 	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
167 	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
168 	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
169 	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
170 	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
171 	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
172 	SIGIO, SIGURG, SIGSYS
173 };
174 
175 #define LINUX_T_UNKNOWN  255
176 static int _bsd_to_linux_trapcode[] = {
177 	LINUX_T_UNKNOWN,	/* 0 */
178 	6,			/* 1  T_PRIVINFLT */
179 	LINUX_T_UNKNOWN,	/* 2 */
180 	3,			/* 3  T_BPTFLT */
181 	LINUX_T_UNKNOWN,	/* 4 */
182 	LINUX_T_UNKNOWN,	/* 5 */
183 	16,			/* 6  T_ARITHTRAP */
184 	254,			/* 7  T_ASTFLT */
185 	LINUX_T_UNKNOWN,	/* 8 */
186 	13,			/* 9  T_PROTFLT */
187 	1,			/* 10 T_TRCTRAP */
188 	LINUX_T_UNKNOWN,	/* 11 */
189 	14,			/* 12 T_PAGEFLT */
190 	LINUX_T_UNKNOWN,	/* 13 */
191 	17,			/* 14 T_ALIGNFLT */
192 	LINUX_T_UNKNOWN,	/* 15 */
193 	LINUX_T_UNKNOWN,	/* 16 */
194 	LINUX_T_UNKNOWN,	/* 17 */
195 	0,			/* 18 T_DIVIDE */
196 	2,			/* 19 T_NMI */
197 	4,			/* 20 T_OFLOW */
198 	5,			/* 21 T_BOUND */
199 	7,			/* 22 T_DNA */
200 	8,			/* 23 T_DOUBLEFLT */
201 	9,			/* 24 T_FPOPFLT */
202 	10,			/* 25 T_TSSFLT */
203 	11,			/* 26 T_SEGNPFLT */
204 	12,			/* 27 T_STKFLT */
205 	18,			/* 28 T_MCHK */
206 	19,			/* 29 T_XMMFLT */
207 	15			/* 30 T_RESERVED */
208 };
209 #define bsd_to_linux_trapcode(code) \
210     ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
211      _bsd_to_linux_trapcode[(code)]: \
212      LINUX_T_UNKNOWN)
213 
214 struct linux32_ps_strings {
215 	u_int32_t ps_argvstr;	/* first of 0 or more argument strings */
216 	u_int ps_nargvstr;	/* the number of argument strings */
217 	u_int32_t ps_envstr;	/* first of 0 or more environment strings */
218 	u_int ps_nenvstr;	/* the number of environment strings */
219 };
220 
221 /*
222  * If FreeBSD & Linux have a difference of opinion about what a trap
223  * means, deal with it here.
224  *
225  * MPSAFE
226  */
227 static int
228 translate_traps(int signal, int trap_code)
229 {
230 	if (signal != SIGBUS)
231 		return signal;
232 	switch (trap_code) {
233 	case T_PROTFLT:
234 	case T_TSSFLT:
235 	case T_DOUBLEFLT:
236 	case T_PAGEFLT:
237 		return SIGSEGV;
238 	default:
239 		return signal;
240 	}
241 }
242 
243 static int
244 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
245 {
246 	Elf32_Auxargs *args;
247 	Elf32_Addr *base;
248 	Elf32_Addr *pos;
249 
250 	KASSERT(curthread->td_proc == imgp->proc,
251 	    ("unsafe elf_linux_fixup(), should be curproc"));
252 	base = (Elf32_Addr *)*stack_base;
253 	args = (Elf32_Auxargs *)imgp->auxargs;
254 	pos = base + (imgp->args->argc + imgp->args->envc + 2);
255 
256 	if (args->trace)
257 		AUXARGS_ENTRY_32(pos, AT_DEBUG, 1);
258 	if (args->execfd != -1)
259 		AUXARGS_ENTRY_32(pos, AT_EXECFD, args->execfd);
260 	AUXARGS_ENTRY_32(pos, AT_PHDR, args->phdr);
261 	AUXARGS_ENTRY_32(pos, AT_PHENT, args->phent);
262 	AUXARGS_ENTRY_32(pos, AT_PHNUM, args->phnum);
263 	AUXARGS_ENTRY_32(pos, AT_PAGESZ, args->pagesz);
264 	AUXARGS_ENTRY_32(pos, AT_FLAGS, args->flags);
265 	AUXARGS_ENTRY_32(pos, AT_ENTRY, args->entry);
266 	AUXARGS_ENTRY_32(pos, AT_BASE, args->base);
267 	AUXARGS_ENTRY_32(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
268 	AUXARGS_ENTRY_32(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
269 	AUXARGS_ENTRY_32(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
270 	AUXARGS_ENTRY_32(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
271 	AUXARGS_ENTRY_32(pos, AT_NULL, 0);
272 
273 	free(imgp->auxargs, M_TEMP);
274 	imgp->auxargs = NULL;
275 
276 	base--;
277 	suword32(base, (uint32_t)imgp->args->argc);
278 	*stack_base = (register_t *)base;
279 	return 0;
280 }
281 
282 extern int _ucodesel, _ucode32sel, _udatasel;
283 extern unsigned long linux_sznonrtsigcode;
284 
285 static void
286 linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
287 {
288 	struct thread *td = curthread;
289 	struct proc *p = td->td_proc;
290 	struct sigacts *psp;
291 	struct trapframe *regs;
292 	struct l_rt_sigframe *fp, frame;
293 	int oonstack;
294 	int sig;
295 	int code;
296 
297 	sig = ksi->ksi_signo;
298 	code = ksi->ksi_code;
299 	PROC_LOCK_ASSERT(p, MA_OWNED);
300 	psp = p->p_sigacts;
301 	mtx_assert(&psp->ps_mtx, MA_OWNED);
302 	regs = td->td_frame;
303 	oonstack = sigonstack(regs->tf_rsp);
304 
305 #ifdef DEBUG
306 	if (ldebug(rt_sendsig))
307 		printf(ARGS(rt_sendsig, "%p, %d, %p, %u"),
308 		    catcher, sig, (void*)mask, code);
309 #endif
310 	/*
311 	 * Allocate space for the signal handler context.
312 	 */
313 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
314 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
315 		fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
316 		    td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
317 	} else
318 		fp = (struct l_rt_sigframe *)regs->tf_rsp - 1;
319 	mtx_unlock(&psp->ps_mtx);
320 
321 	/*
322 	 * Build the argument list for the signal handler.
323 	 */
324 	if (p->p_sysent->sv_sigtbl)
325 		if (sig <= p->p_sysent->sv_sigsize)
326 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
327 
328 	bzero(&frame, sizeof(frame));
329 
330 	frame.sf_handler = PTROUT(catcher);
331 	frame.sf_sig = sig;
332 	frame.sf_siginfo = PTROUT(&fp->sf_si);
333 	frame.sf_ucontext = PTROUT(&fp->sf_sc);
334 
335 	/* Fill in POSIX parts */
336 	frame.sf_si.lsi_signo = sig;
337 	frame.sf_si.lsi_code = code;
338 	frame.sf_si.lsi_addr = PTROUT(ksi->ksi_addr);
339 
340 	/*
341 	 * Build the signal context to be used by sigreturn.
342 	 */
343 	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
344 	frame.sf_sc.uc_link = 0;		/* XXX ??? */
345 
346 	frame.sf_sc.uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp);
347 	frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
348 	frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
349 	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
350 	PROC_UNLOCK(p);
351 
352 	bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
353 
354 	frame.sf_sc.uc_mcontext.sc_mask   = frame.sf_sc.uc_sigmask.__bits[0];
355         frame.sf_sc.uc_mcontext.sc_gs     = rgs();
356         frame.sf_sc.uc_mcontext.sc_fs     = rfs();
357         __asm __volatile("movl %%es,%0" :
358 	    "=rm" (frame.sf_sc.uc_mcontext.sc_es));
359         __asm __volatile("movl %%ds,%0" :
360 	    "=rm" (frame.sf_sc.uc_mcontext.sc_ds));
361 	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_rdi;
362 	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_rsi;
363 	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_rbp;
364 	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_rbx;
365 	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_rdx;
366 	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_rcx;
367 	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_rax;
368 	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_rip;
369 	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
370 	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_rflags;
371 	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_rsp;
372 	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
373 	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
374 	frame.sf_sc.uc_mcontext.sc_cr2    = (u_int32_t)(uintptr_t)ksi->ksi_addr;
375 	frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
376 
377 #ifdef DEBUG
378 	if (ldebug(rt_sendsig))
379 		printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
380 		    frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
381 		    td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
382 #endif
383 
384 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
385 		/*
386 		 * Process has trashed its stack; give it an illegal
387 		 * instruction to halt it in its tracks.
388 		 */
389 #ifdef DEBUG
390 		if (ldebug(rt_sendsig))
391 			printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
392 			    fp, oonstack);
393 #endif
394 		PROC_LOCK(p);
395 		sigexit(td, SIGILL);
396 	}
397 
398 	/*
399 	 * Build context to run handler in.
400 	 */
401 	regs->tf_rsp = PTROUT(fp);
402 	regs->tf_rip = LINUX32_PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
403 	    linux_sznonrtsigcode;
404 	regs->tf_rflags &= ~(PSL_T | PSL_D);
405 	regs->tf_cs = _ucode32sel;
406 	regs->tf_ss = _udatasel;
407 	load_ds(_udatasel);
408 	td->td_pcb->pcb_ds = _udatasel;
409 	load_es(_udatasel);
410 	td->td_pcb->pcb_es = _udatasel;
411 	/* leave user %fs and %gs untouched */
412 	PROC_LOCK(p);
413 	mtx_lock(&psp->ps_mtx);
414 }
415 
416 
417 /*
418  * Send an interrupt to process.
419  *
420  * Stack is set up to allow sigcode stored
421  * in u. to call routine, followed by kcall
422  * to sigreturn routine below.  After sigreturn
423  * resets the signal mask, the stack, and the
424  * frame pointer, it returns to the user
425  * specified pc, psl.
426  */
427 static void
428 linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
429 {
430 	struct thread *td = curthread;
431 	struct proc *p = td->td_proc;
432 	struct sigacts *psp;
433 	struct trapframe *regs;
434 	struct l_sigframe *fp, frame;
435 	l_sigset_t lmask;
436 	int oonstack, i;
437 	int sig, code;
438 
439 	sig = ksi->ksi_signo;
440 	code = ksi->ksi_code;
441 	PROC_LOCK_ASSERT(p, MA_OWNED);
442 	psp = p->p_sigacts;
443 	mtx_assert(&psp->ps_mtx, MA_OWNED);
444 	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
445 		/* Signal handler installed with SA_SIGINFO. */
446 		linux_rt_sendsig(catcher, ksi, mask);
447 		return;
448 	}
449 
450 	regs = td->td_frame;
451 	oonstack = sigonstack(regs->tf_rsp);
452 
453 #ifdef DEBUG
454 	if (ldebug(sendsig))
455 		printf(ARGS(sendsig, "%p, %d, %p, %u"),
456 		    catcher, sig, (void*)mask, code);
457 #endif
458 
459 	/*
460 	 * Allocate space for the signal handler context.
461 	 */
462 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
463 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
464 		fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
465 		    td->td_sigstk.ss_size - sizeof(struct l_sigframe));
466 	} else
467 		fp = (struct l_sigframe *)regs->tf_rsp - 1;
468 	mtx_unlock(&psp->ps_mtx);
469 	PROC_UNLOCK(p);
470 
471 	/*
472 	 * Build the argument list for the signal handler.
473 	 */
474 	if (p->p_sysent->sv_sigtbl)
475 		if (sig <= p->p_sysent->sv_sigsize)
476 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
477 
478 	bzero(&frame, sizeof(frame));
479 
480 	frame.sf_handler = PTROUT(catcher);
481 	frame.sf_sig = sig;
482 
483 	bsd_to_linux_sigset(mask, &lmask);
484 
485 	/*
486 	 * Build the signal context to be used by sigreturn.
487 	 */
488 	frame.sf_sc.sc_mask   = lmask.__bits[0];
489         frame.sf_sc.sc_gs     = rgs();
490         frame.sf_sc.sc_fs     = rfs();
491         __asm __volatile("movl %%es,%0" : "=rm" (frame.sf_sc.sc_es));
492         __asm __volatile("movl %%ds,%0" : "=rm" (frame.sf_sc.sc_ds));
493 	frame.sf_sc.sc_edi    = regs->tf_rdi;
494 	frame.sf_sc.sc_esi    = regs->tf_rsi;
495 	frame.sf_sc.sc_ebp    = regs->tf_rbp;
496 	frame.sf_sc.sc_ebx    = regs->tf_rbx;
497 	frame.sf_sc.sc_edx    = regs->tf_rdx;
498 	frame.sf_sc.sc_ecx    = regs->tf_rcx;
499 	frame.sf_sc.sc_eax    = regs->tf_rax;
500 	frame.sf_sc.sc_eip    = regs->tf_rip;
501 	frame.sf_sc.sc_cs     = regs->tf_cs;
502 	frame.sf_sc.sc_eflags = regs->tf_rflags;
503 	frame.sf_sc.sc_esp_at_signal = regs->tf_rsp;
504 	frame.sf_sc.sc_ss     = regs->tf_ss;
505 	frame.sf_sc.sc_err    = regs->tf_err;
506 	frame.sf_sc.sc_cr2    = (u_int32_t)(uintptr_t)ksi->ksi_addr;
507 	frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code);
508 
509 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
510 		frame.sf_extramask[i] = lmask.__bits[i+1];
511 
512 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
513 		/*
514 		 * Process has trashed its stack; give it an illegal
515 		 * instruction to halt it in its tracks.
516 		 */
517 		PROC_LOCK(p);
518 		sigexit(td, SIGILL);
519 	}
520 
521 	/*
522 	 * Build context to run handler in.
523 	 */
524 	regs->tf_rsp = PTROUT(fp);
525 	regs->tf_rip = LINUX32_PS_STRINGS - *(p->p_sysent->sv_szsigcode);
526 	regs->tf_rflags &= ~(PSL_T | PSL_D);
527 	regs->tf_cs = _ucode32sel;
528 	regs->tf_ss = _udatasel;
529 	load_ds(_udatasel);
530 	td->td_pcb->pcb_ds = _udatasel;
531 	load_es(_udatasel);
532 	td->td_pcb->pcb_es = _udatasel;
533 	/* leave user %fs and %gs untouched */
534 	PROC_LOCK(p);
535 	mtx_lock(&psp->ps_mtx);
536 }
537 
538 /*
539  * System call to cleanup state after a signal
540  * has been taken.  Reset signal mask and
541  * stack state from context left by sendsig (above).
542  * Return to previous pc and psl as specified by
543  * context left by sendsig. Check carefully to
544  * make sure that the user has not modified the
545  * psl to gain improper privileges or to cause
546  * a machine fault.
547  */
548 int
549 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
550 {
551 	struct proc *p = td->td_proc;
552 	struct l_sigframe frame;
553 	struct trapframe *regs;
554 	l_sigset_t lmask;
555 	int eflags, i;
556 	ksiginfo_t ksi;
557 
558 	regs = td->td_frame;
559 
560 #ifdef DEBUG
561 	if (ldebug(sigreturn))
562 		printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
563 #endif
564 	/*
565 	 * The trampoline code hands us the sigframe.
566 	 * It is unsafe to keep track of it ourselves, in the event that a
567 	 * program jumps out of a signal handler.
568 	 */
569 	if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
570 		return (EFAULT);
571 
572 	/*
573 	 * Check for security violations.
574 	 */
575 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
576 	eflags = frame.sf_sc.sc_eflags;
577 	/*
578 	 * XXX do allow users to change the privileged flag PSL_RF.  The
579 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
580 	 * sometimes set it there too.  tf_eflags is kept in the signal
581 	 * context during signal handling and there is no other place
582 	 * to remember it, so the PSL_RF bit may be corrupted by the
583 	 * signal handler without us knowing.  Corruption of the PSL_RF
584 	 * bit at worst causes one more or one less debugger trap, so
585 	 * allowing it is fairly harmless.
586 	 */
587 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF))
588 		return(EINVAL);
589 
590 	/*
591 	 * Don't allow users to load a valid privileged %cs.  Let the
592 	 * hardware check for invalid selectors, excess privilege in
593 	 * other selectors, invalid %eip's and invalid %esp's.
594 	 */
595 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
596 	if (!CS_SECURE(frame.sf_sc.sc_cs)) {
597 		ksiginfo_init_trap(&ksi);
598 		ksi.ksi_signo = SIGBUS;
599 		ksi.ksi_code = BUS_OBJERR;
600 		ksi.ksi_trapno = T_PROTFLT;
601 		ksi.ksi_addr = (void *)regs->tf_rip;
602 		trapsignal(td, &ksi);
603 		return(EINVAL);
604 	}
605 
606 	lmask.__bits[0] = frame.sf_sc.sc_mask;
607 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
608 		lmask.__bits[i+1] = frame.sf_extramask[i];
609 	PROC_LOCK(p);
610 	linux_to_bsd_sigset(&lmask, &td->td_sigmask);
611 	SIG_CANTMASK(td->td_sigmask);
612 	signotify(td);
613 	PROC_UNLOCK(p);
614 
615 	/*
616 	 * Restore signal context.
617 	 */
618 	/* Selectors were restored by the trampoline. */
619 	regs->tf_rdi    = frame.sf_sc.sc_edi;
620 	regs->tf_rsi    = frame.sf_sc.sc_esi;
621 	regs->tf_rbp    = frame.sf_sc.sc_ebp;
622 	regs->tf_rbx    = frame.sf_sc.sc_ebx;
623 	regs->tf_rdx    = frame.sf_sc.sc_edx;
624 	regs->tf_rcx    = frame.sf_sc.sc_ecx;
625 	regs->tf_rax    = frame.sf_sc.sc_eax;
626 	regs->tf_rip    = frame.sf_sc.sc_eip;
627 	regs->tf_cs     = frame.sf_sc.sc_cs;
628 	regs->tf_rflags = eflags;
629 	regs->tf_rsp    = frame.sf_sc.sc_esp_at_signal;
630 	regs->tf_ss     = frame.sf_sc.sc_ss;
631 
632 	return (EJUSTRETURN);
633 }
634 
635 /*
636  * System call to cleanup state after a signal
637  * has been taken.  Reset signal mask and
638  * stack state from context left by rt_sendsig (above).
639  * Return to previous pc and psl as specified by
640  * context left by sendsig. Check carefully to
641  * make sure that the user has not modified the
642  * psl to gain improper privileges or to cause
643  * a machine fault.
644  */
645 int
646 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
647 {
648 	struct proc *p = td->td_proc;
649 	struct l_ucontext uc;
650 	struct l_sigcontext *context;
651 	l_stack_t *lss;
652 	stack_t ss;
653 	struct trapframe *regs;
654 	int eflags;
655 	ksiginfo_t ksi;
656 
657 	regs = td->td_frame;
658 
659 #ifdef DEBUG
660 	if (ldebug(rt_sigreturn))
661 		printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
662 #endif
663 	/*
664 	 * The trampoline code hands us the ucontext.
665 	 * It is unsafe to keep track of it ourselves, in the event that a
666 	 * program jumps out of a signal handler.
667 	 */
668 	if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
669 		return (EFAULT);
670 
671 	context = &uc.uc_mcontext;
672 
673 	/*
674 	 * Check for security violations.
675 	 */
676 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
677 	eflags = context->sc_eflags;
678 	/*
679 	 * XXX do allow users to change the privileged flag PSL_RF.  The
680 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
681 	 * sometimes set it there too.  tf_eflags is kept in the signal
682 	 * context during signal handling and there is no other place
683 	 * to remember it, so the PSL_RF bit may be corrupted by the
684 	 * signal handler without us knowing.  Corruption of the PSL_RF
685 	 * bit at worst causes one more or one less debugger trap, so
686 	 * allowing it is fairly harmless.
687 	 */
688 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF))
689 		return(EINVAL);
690 
691 	/*
692 	 * Don't allow users to load a valid privileged %cs.  Let the
693 	 * hardware check for invalid selectors, excess privilege in
694 	 * other selectors, invalid %eip's and invalid %esp's.
695 	 */
696 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
697 	if (!CS_SECURE(context->sc_cs)) {
698 		ksiginfo_init_trap(&ksi);
699 		ksi.ksi_signo = SIGBUS;
700 		ksi.ksi_code = BUS_OBJERR;
701 		ksi.ksi_trapno = T_PROTFLT;
702 		ksi.ksi_addr = (void *)regs->tf_rip;
703 		trapsignal(td, &ksi);
704 		return(EINVAL);
705 	}
706 
707 	PROC_LOCK(p);
708 	linux_to_bsd_sigset(&uc.uc_sigmask, &td->td_sigmask);
709 	SIG_CANTMASK(td->td_sigmask);
710 	signotify(td);
711 	PROC_UNLOCK(p);
712 
713 	/*
714 	 * Restore signal context
715 	 */
716 	/* Selectors were restored by the trampoline. */
717 	regs->tf_rdi    = context->sc_edi;
718 	regs->tf_rsi    = context->sc_esi;
719 	regs->tf_rbp    = context->sc_ebp;
720 	regs->tf_rbx    = context->sc_ebx;
721 	regs->tf_rdx    = context->sc_edx;
722 	regs->tf_rcx    = context->sc_ecx;
723 	regs->tf_rax    = context->sc_eax;
724 	regs->tf_rip    = context->sc_eip;
725 	regs->tf_cs     = context->sc_cs;
726 	regs->tf_rflags = eflags;
727 	regs->tf_rsp    = context->sc_esp_at_signal;
728 	regs->tf_ss     = context->sc_ss;
729 
730 	/*
731 	 * call sigaltstack & ignore results..
732 	 */
733 	lss = &uc.uc_stack;
734 	ss.ss_sp = PTRIN(lss->ss_sp);
735 	ss.ss_size = lss->ss_size;
736 	ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
737 
738 #ifdef DEBUG
739 	if (ldebug(rt_sigreturn))
740 		printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
741 		    ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
742 #endif
743 	(void)kern_sigaltstack(td, &ss, NULL);
744 
745 	return (EJUSTRETURN);
746 }
747 
748 /*
749  * MPSAFE
750  */
751 static void
752 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
753 {
754 	args[0] = tf->tf_rbx;
755 	args[1] = tf->tf_rcx;
756 	args[2] = tf->tf_rdx;
757 	args[3] = tf->tf_rsi;
758 	args[4] = tf->tf_rdi;
759 	args[5] = tf->tf_rbp;	/* Unconfirmed */
760 	*params = NULL;		/* no copyin */
761 }
762 
763 /*
764  * If a linux binary is exec'ing something, try this image activator
765  * first.  We override standard shell script execution in order to
766  * be able to modify the interpreter path.  We only do this if a linux
767  * binary is doing the exec, so we do not create an EXEC module for it.
768  */
769 static int	exec_linux_imgact_try(struct image_params *iparams);
770 
771 static int
772 exec_linux_imgact_try(struct image_params *imgp)
773 {
774     const char *head = (const char *)imgp->image_header;
775     char *rpath;
776     int error = -1, len;
777 
778     /*
779      * The interpreter for shell scripts run from a linux binary needs
780      * to be located in /compat/linux if possible in order to recursively
781      * maintain linux path emulation.
782      */
783     if (((const short *)head)[0] == SHELLMAGIC) {
784 	    /*
785 	     * Run our normal shell image activator.  If it succeeds attempt
786 	     * to use the alternate path for the interpreter.  If an alternate
787 	     * path is found, use our stringspace to store it.
788 	     */
789 	    if ((error = exec_shell_imgact(imgp)) == 0) {
790 		    linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
791 			imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0);
792 		    if (rpath != NULL) {
793 			    len = strlen(rpath) + 1;
794 
795 			    if (len <= MAXSHELLCMDLEN) {
796 				    memcpy(imgp->interpreter_name, rpath, len);
797 			    }
798 			    free(rpath, M_TEMP);
799 		    }
800 	    }
801     }
802     return(error);
803 }
804 
805 /*
806  * Clear registers on exec
807  * XXX copied from ia32_signal.c.
808  */
809 static void
810 exec_linux_setregs(td, entry, stack, ps_strings)
811 	struct thread *td;
812 	u_long entry;
813 	u_long stack;
814 	u_long ps_strings;
815 {
816 	struct trapframe *regs = td->td_frame;
817 	struct pcb *pcb = td->td_pcb;
818 
819 	critical_enter();
820 	wrmsr(MSR_FSBASE, 0);
821 	wrmsr(MSR_KGSBASE, 0);	/* User value while we're in the kernel */
822 	pcb->pcb_fsbase = 0;
823 	pcb->pcb_gsbase = 0;
824 	critical_exit();
825 	load_ds(_udatasel);
826 	load_es(_udatasel);
827 	load_fs(_udatasel);
828 	load_gs(_udatasel);
829 	pcb->pcb_ds = _udatasel;
830 	pcb->pcb_es = _udatasel;
831 	pcb->pcb_fs = _udatasel;
832 	pcb->pcb_gs = _udatasel;
833 
834 	bzero((char *)regs, sizeof(struct trapframe));
835 	regs->tf_rip = entry;
836 	regs->tf_rsp = stack;
837 	regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T);
838 	regs->tf_ss = _udatasel;
839 	regs->tf_cs = _ucode32sel;
840 	regs->tf_rbx = ps_strings;
841 	load_cr0(rcr0() | CR0_MP | CR0_TS);
842 	fpstate_drop(td);
843 
844 	/* Return via doreti so that we can change to a different %cs */
845 	pcb->pcb_flags |= PCB_FULLCTX;
846 	td->td_retval[1] = 0;
847 }
848 
849 /*
850  * XXX copied from ia32_sysvec.c.
851  */
852 static register_t *
853 linux_copyout_strings(struct image_params *imgp)
854 {
855 	int argc, envc;
856 	u_int32_t *vectp;
857 	char *stringp, *destp;
858 	u_int32_t *stack_base;
859 	struct linux32_ps_strings *arginfo;
860 	int sigcodesz;
861 
862 	/*
863 	 * Calculate string base and vector table pointers.
864 	 * Also deal with signal trampoline code for this exec type.
865 	 */
866 	arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS;
867 	sigcodesz = *(imgp->proc->p_sysent->sv_szsigcode);
868 	destp =	(caddr_t)arginfo - sigcodesz - SPARE_USRSPACE -
869 		roundup((ARG_MAX - imgp->args->stringspace), sizeof(char *));
870 
871 	/*
872 	 * install sigcode
873 	 */
874 	if (sigcodesz)
875 		copyout(imgp->proc->p_sysent->sv_sigcode,
876 			((caddr_t)arginfo - sigcodesz), sigcodesz);
877 
878 	/*
879 	 * If we have a valid auxargs ptr, prepare some room
880 	 * on the stack.
881 	 */
882 	if (imgp->auxargs) {
883 		/*
884 		 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
885 		 * lower compatibility.
886 		 */
887 		imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size
888 			: (AT_COUNT * 2);
889 		/*
890 		 * The '+ 2' is for the null pointers at the end of each of
891 		 * the arg and env vector sets,and imgp->auxarg_size is room
892 		 * for argument of Runtime loader.
893 		 */
894 		vectp = (u_int32_t *) (destp - (imgp->args->argc + imgp->args->envc + 2 +
895 				       imgp->auxarg_size) * sizeof(u_int32_t));
896 
897 	} else
898 		/*
899 		 * The '+ 2' is for the null pointers at the end of each of
900 		 * the arg and env vector sets
901 		 */
902 		vectp = (u_int32_t *)
903 			(destp - (imgp->args->argc + imgp->args->envc + 2) * sizeof(u_int32_t));
904 
905 	/*
906 	 * vectp also becomes our initial stack base
907 	 */
908 	stack_base = vectp;
909 
910 	stringp = imgp->args->begin_argv;
911 	argc = imgp->args->argc;
912 	envc = imgp->args->envc;
913 	/*
914 	 * Copy out strings - arguments and environment.
915 	 */
916 	copyout(stringp, destp, ARG_MAX - imgp->args->stringspace);
917 
918 	/*
919 	 * Fill in "ps_strings" struct for ps, w, etc.
920 	 */
921 	suword32(&arginfo->ps_argvstr, (u_int32_t)(intptr_t)vectp);
922 	suword32(&arginfo->ps_nargvstr, argc);
923 
924 	/*
925 	 * Fill in argument portion of vector table.
926 	 */
927 	for (; argc > 0; --argc) {
928 		suword32(vectp++, (u_int32_t)(intptr_t)destp);
929 		while (*stringp++ != 0)
930 			destp++;
931 		destp++;
932 	}
933 
934 	/* a null vector table pointer separates the argp's from the envp's */
935 	suword32(vectp++, 0);
936 
937 	suword32(&arginfo->ps_envstr, (u_int32_t)(intptr_t)vectp);
938 	suword32(&arginfo->ps_nenvstr, envc);
939 
940 	/*
941 	 * Fill in environment portion of vector table.
942 	 */
943 	for (; envc > 0; --envc) {
944 		suword32(vectp++, (u_int32_t)(intptr_t)destp);
945 		while (*stringp++ != 0)
946 			destp++;
947 		destp++;
948 	}
949 
950 	/* end of vector table is a null pointer */
951 	suword32(vectp, 0);
952 
953 	return ((register_t *)stack_base);
954 }
955 
956 SYSCTL_NODE(_compat, OID_AUTO, linux32, CTLFLAG_RW, 0,
957     "32-bit Linux emulation");
958 
959 static u_long	linux32_maxdsiz = LINUX32_MAXDSIZ;
960 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxdsiz, CTLFLAG_RW,
961     &linux32_maxdsiz, 0, "");
962 static u_long	linux32_maxssiz = LINUX32_MAXSSIZ;
963 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxssiz, CTLFLAG_RW,
964     &linux32_maxssiz, 0, "");
965 static u_long	linux32_maxvmem = LINUX32_MAXVMEM;
966 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxvmem, CTLFLAG_RW,
967     &linux32_maxvmem, 0, "");
968 
969 static void
970 linux32_fixlimit(struct rlimit *rl, int which)
971 {
972 
973 	switch (which) {
974 	case RLIMIT_DATA:
975 		if (linux32_maxdsiz != 0) {
976 			if (rl->rlim_cur > linux32_maxdsiz)
977 				rl->rlim_cur = linux32_maxdsiz;
978 			if (rl->rlim_max > linux32_maxdsiz)
979 				rl->rlim_max = linux32_maxdsiz;
980 		}
981 		break;
982 	case RLIMIT_STACK:
983 		if (linux32_maxssiz != 0) {
984 			if (rl->rlim_cur > linux32_maxssiz)
985 				rl->rlim_cur = linux32_maxssiz;
986 			if (rl->rlim_max > linux32_maxssiz)
987 				rl->rlim_max = linux32_maxssiz;
988 		}
989 		break;
990 	case RLIMIT_VMEM:
991 		if (linux32_maxvmem != 0) {
992 			if (rl->rlim_cur > linux32_maxvmem)
993 				rl->rlim_cur = linux32_maxvmem;
994 			if (rl->rlim_max > linux32_maxvmem)
995 				rl->rlim_max = linux32_maxvmem;
996 		}
997 		break;
998 	}
999 }
1000 
1001 struct sysentvec elf_linux_sysvec = {
1002 	LINUX_SYS_MAXSYSCALL,
1003 	linux_sysent,
1004 	0,
1005 	LINUX_SIGTBLSZ,
1006 	bsd_to_linux_signal,
1007 	ELAST + 1,
1008 	bsd_to_linux_errno,
1009 	translate_traps,
1010 	elf_linux_fixup,
1011 	linux_sendsig,
1012 	linux_sigcode,
1013 	&linux_szsigcode,
1014 	linux_prepsyscall,
1015 	"Linux ELF32",
1016 	elf32_coredump,
1017 	exec_linux_imgact_try,
1018 	LINUX_MINSIGSTKSZ,
1019 	PAGE_SIZE,
1020 	VM_MIN_ADDRESS,
1021 	LINUX32_USRSTACK,
1022 	LINUX32_USRSTACK,
1023 	LINUX32_PS_STRINGS,
1024 	VM_PROT_ALL,
1025 	linux_copyout_strings,
1026 	exec_linux_setregs,
1027 	linux32_fixlimit,
1028 	&linux32_maxssiz,
1029 };
1030 
1031 static Elf32_Brandinfo linux_brand = {
1032 					ELFOSABI_LINUX,
1033 					EM_386,
1034 					"Linux",
1035 					"/compat/linux",
1036 					"/lib/ld-linux.so.1",
1037 					&elf_linux_sysvec,
1038 					NULL,
1039 					BI_CAN_EXEC_DYN,
1040 				 };
1041 
1042 static Elf32_Brandinfo linux_glibc2brand = {
1043 					ELFOSABI_LINUX,
1044 					EM_386,
1045 					"Linux",
1046 					"/compat/linux",
1047 					"/lib/ld-linux.so.2",
1048 					&elf_linux_sysvec,
1049 					NULL,
1050 					BI_CAN_EXEC_DYN,
1051 				 };
1052 
1053 Elf32_Brandinfo *linux_brandlist[] = {
1054 					&linux_brand,
1055 					&linux_glibc2brand,
1056 					NULL
1057 				};
1058 
1059 static int
1060 linux_elf_modevent(module_t mod, int type, void *data)
1061 {
1062 	Elf32_Brandinfo **brandinfo;
1063 	int error;
1064 	struct linux_ioctl_handler **lihp;
1065 	struct linux_device_handler **ldhp;
1066 
1067 	error = 0;
1068 
1069 	switch(type) {
1070 	case MOD_LOAD:
1071 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1072 		     ++brandinfo)
1073 			if (elf32_insert_brand_entry(*brandinfo) < 0)
1074 				error = EINVAL;
1075 		if (error == 0) {
1076 			SET_FOREACH(lihp, linux_ioctl_handler_set)
1077 				linux_ioctl_register_handler(*lihp);
1078 			SET_FOREACH(ldhp, linux_device_handler_set)
1079 				linux_device_register_handler(*ldhp);
1080 			mtx_init(&emul_lock, "emuldata lock", NULL, MTX_DEF);
1081 			sx_init(&emul_shared_lock, "emuldata->shared lock");
1082 			LIST_INIT(&futex_list);
1083 			sx_init(&futex_sx, "futex protection lock");
1084 			linux_exit_tag = EVENTHANDLER_REGISTER(process_exit, linux_proc_exit,
1085 			      NULL, 1000);
1086 			linux_schedtail_tag = EVENTHANDLER_REGISTER(schedtail, linux_schedtail,
1087 			      NULL, 1000);
1088 			linux_exec_tag = EVENTHANDLER_REGISTER(process_exec, linux_proc_exec,
1089 			      NULL, 1000);
1090 			if (bootverbose)
1091 				printf("Linux ELF exec handler installed\n");
1092 		} else
1093 			printf("cannot insert Linux ELF brand handler\n");
1094 		break;
1095 	case MOD_UNLOAD:
1096 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1097 		     ++brandinfo)
1098 			if (elf32_brand_inuse(*brandinfo))
1099 				error = EBUSY;
1100 		if (error == 0) {
1101 			for (brandinfo = &linux_brandlist[0];
1102 			     *brandinfo != NULL; ++brandinfo)
1103 				if (elf32_remove_brand_entry(*brandinfo) < 0)
1104 					error = EINVAL;
1105 		}
1106 		if (error == 0) {
1107 			SET_FOREACH(lihp, linux_ioctl_handler_set)
1108 				linux_ioctl_unregister_handler(*lihp);
1109 			SET_FOREACH(ldhp, linux_device_handler_set)
1110 				linux_device_unregister_handler(*ldhp);
1111 			mtx_destroy(&emul_lock);
1112 			sx_destroy(&emul_shared_lock);
1113 			sx_destroy(&futex_sx);
1114 			EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag);
1115 			EVENTHANDLER_DEREGISTER(schedtail, linux_schedtail_tag);
1116 			EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag);
1117 			if (bootverbose)
1118 				printf("Linux ELF exec handler removed\n");
1119 		} else
1120 			printf("Could not deinstall ELF interpreter entry\n");
1121 		break;
1122 	default:
1123 		return EOPNOTSUPP;
1124 	}
1125 	return error;
1126 }
1127 
1128 static moduledata_t linux_elf_mod = {
1129 	"linuxelf",
1130 	linux_elf_modevent,
1131 	0
1132 };
1133 
1134 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
1135