xref: /freebsd/sys/amd64/linux32/linux32_sysvec.c (revision 1e413cf93298b5b97441a21d9a50fdcd0ee9945e)
1 /*-
2  * Copyright (c) 2004 Tim J. Robbins
3  * Copyright (c) 2003 Peter Wemm
4  * Copyright (c) 2002 Doug Rabson
5  * Copyright (c) 1998-1999 Andrew Gallatin
6  * Copyright (c) 1994-1996 S�ren Schmidt
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer
14  *    in this position and unchanged.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. The name of the author may not be used to endorse or promote products
19  *    derived from this software without specific prior written permission
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 #include <sys/cdefs.h>
34 __FBSDID("$FreeBSD$");
35 #include "opt_compat.h"
36 
37 #ifndef COMPAT_IA32
38 #error "Unable to compile Linux-emulator due to missing COMPAT_IA32 option!"
39 #endif
40 
41 #define	__ELF_WORD_SIZE	32
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/exec.h>
46 #include <sys/imgact.h>
47 #include <sys/imgact_elf.h>
48 #include <sys/kernel.h>
49 #include <sys/lock.h>
50 #include <sys/malloc.h>
51 #include <sys/module.h>
52 #include <sys/mutex.h>
53 #include <sys/proc.h>
54 #include <sys/resourcevar.h>
55 #include <sys/signalvar.h>
56 #include <sys/sysctl.h>
57 #include <sys/syscallsubr.h>
58 #include <sys/sysent.h>
59 #include <sys/sysproto.h>
60 #include <sys/vnode.h>
61 #include <sys/eventhandler.h>
62 
63 #include <vm/vm.h>
64 #include <vm/pmap.h>
65 #include <vm/vm_extern.h>
66 #include <vm/vm_map.h>
67 #include <vm/vm_object.h>
68 #include <vm/vm_page.h>
69 #include <vm/vm_param.h>
70 
71 #include <machine/cpu.h>
72 #include <machine/md_var.h>
73 #include <machine/pcb.h>
74 #include <machine/specialreg.h>
75 
76 #include <amd64/linux32/linux.h>
77 #include <amd64/linux32/linux32_proto.h>
78 #include <compat/linux/linux_emul.h>
79 #include <compat/linux/linux_mib.h>
80 #include <compat/linux/linux_signal.h>
81 #include <compat/linux/linux_util.h>
82 
83 MODULE_VERSION(linux, 1);
84 
85 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
86 
87 #define	AUXARGS_ENTRY_32(pos, id, val)	\
88 	do {				\
89 		suword32(pos++, id);	\
90 		suword32(pos++, val);	\
91 	} while (0)
92 
93 #if BYTE_ORDER == LITTLE_ENDIAN
94 #define SHELLMAGIC      0x2123 /* #! */
95 #else
96 #define SHELLMAGIC      0x2321
97 #endif
98 
99 /*
100  * Allow the sendsig functions to use the ldebug() facility
101  * even though they are not syscalls themselves. Map them
102  * to syscall 0. This is slightly less bogus than using
103  * ldebug(sigreturn).
104  */
105 #define	LINUX_SYS_linux_rt_sendsig	0
106 #define	LINUX_SYS_linux_sendsig		0
107 
108 extern char linux_sigcode[];
109 extern int linux_szsigcode;
110 
111 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
112 
113 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
114 SET_DECLARE(linux_device_handler_set, struct linux_device_handler);
115 
116 static int	elf_linux_fixup(register_t **stack_base,
117 		    struct image_params *iparams);
118 static register_t *linux_copyout_strings(struct image_params *imgp);
119 static void	linux_prepsyscall(struct trapframe *tf, int *args, u_int *code,
120 		    caddr_t *params);
121 static void     linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
122 static void	exec_linux_setregs(struct thread *td, u_long entry,
123 				   u_long stack, u_long ps_strings);
124 static void	linux32_fixlimit(struct rlimit *rl, int which);
125 
126 extern LIST_HEAD(futex_list, futex) futex_list;
127 extern struct sx futex_sx;
128 
129 static eventhandler_tag linux_exit_tag;
130 static eventhandler_tag linux_schedtail_tag;
131 static eventhandler_tag linux_exec_tag;
132 
133 /*
134  * Linux syscalls return negative errno's, we do positive and map them
135  * Reference:
136  *   FreeBSD: src/sys/sys/errno.h
137  *   Linux:   linux-2.6.17.8/include/asm-generic/errno-base.h
138  *            linux-2.6.17.8/include/asm-generic/errno.h
139  */
140 static int bsd_to_linux_errno[ELAST + 1] = {
141 	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
142 	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
143 	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
144 	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
145 	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
146 	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
147 	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
148 	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
149 	  -6,  -6, -43, -42, -75,-125, -84, -95, -16, -74,
150 	 -72, -67, -71
151 };
152 
153 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
154 	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
155 	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
156 	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
157 	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
158 	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
159 	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
160 	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
161 	0, LINUX_SIGUSR1, LINUX_SIGUSR2
162 };
163 
164 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
165 	SIGHUP, SIGINT, SIGQUIT, SIGILL,
166 	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
167 	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
168 	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
169 	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
170 	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
171 	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
172 	SIGIO, SIGURG, SIGSYS
173 };
174 
175 #define LINUX_T_UNKNOWN  255
176 static int _bsd_to_linux_trapcode[] = {
177 	LINUX_T_UNKNOWN,	/* 0 */
178 	6,			/* 1  T_PRIVINFLT */
179 	LINUX_T_UNKNOWN,	/* 2 */
180 	3,			/* 3  T_BPTFLT */
181 	LINUX_T_UNKNOWN,	/* 4 */
182 	LINUX_T_UNKNOWN,	/* 5 */
183 	16,			/* 6  T_ARITHTRAP */
184 	254,			/* 7  T_ASTFLT */
185 	LINUX_T_UNKNOWN,	/* 8 */
186 	13,			/* 9  T_PROTFLT */
187 	1,			/* 10 T_TRCTRAP */
188 	LINUX_T_UNKNOWN,	/* 11 */
189 	14,			/* 12 T_PAGEFLT */
190 	LINUX_T_UNKNOWN,	/* 13 */
191 	17,			/* 14 T_ALIGNFLT */
192 	LINUX_T_UNKNOWN,	/* 15 */
193 	LINUX_T_UNKNOWN,	/* 16 */
194 	LINUX_T_UNKNOWN,	/* 17 */
195 	0,			/* 18 T_DIVIDE */
196 	2,			/* 19 T_NMI */
197 	4,			/* 20 T_OFLOW */
198 	5,			/* 21 T_BOUND */
199 	7,			/* 22 T_DNA */
200 	8,			/* 23 T_DOUBLEFLT */
201 	9,			/* 24 T_FPOPFLT */
202 	10,			/* 25 T_TSSFLT */
203 	11,			/* 26 T_SEGNPFLT */
204 	12,			/* 27 T_STKFLT */
205 	18,			/* 28 T_MCHK */
206 	19,			/* 29 T_XMMFLT */
207 	15			/* 30 T_RESERVED */
208 };
209 #define bsd_to_linux_trapcode(code) \
210     ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
211      _bsd_to_linux_trapcode[(code)]: \
212      LINUX_T_UNKNOWN)
213 
214 struct linux32_ps_strings {
215 	u_int32_t ps_argvstr;	/* first of 0 or more argument strings */
216 	u_int ps_nargvstr;	/* the number of argument strings */
217 	u_int32_t ps_envstr;	/* first of 0 or more environment strings */
218 	u_int ps_nenvstr;	/* the number of environment strings */
219 };
220 
221 /*
222  * If FreeBSD & Linux have a difference of opinion about what a trap
223  * means, deal with it here.
224  *
225  * MPSAFE
226  */
227 static int
228 translate_traps(int signal, int trap_code)
229 {
230 	if (signal != SIGBUS)
231 		return signal;
232 	switch (trap_code) {
233 	case T_PROTFLT:
234 	case T_TSSFLT:
235 	case T_DOUBLEFLT:
236 	case T_PAGEFLT:
237 		return SIGSEGV;
238 	default:
239 		return signal;
240 	}
241 }
242 
243 static int
244 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
245 {
246 	Elf32_Auxargs *args;
247 	Elf32_Addr *base;
248 	Elf32_Addr *pos;
249 
250 	KASSERT(curthread->td_proc == imgp->proc &&
251 	    (curthread->td_proc->p_flag & P_SA) == 0,
252 	    ("unsafe elf_linux_fixup(), should be curproc"));
253 	base = (Elf32_Addr *)*stack_base;
254 	args = (Elf32_Auxargs *)imgp->auxargs;
255 	pos = base + (imgp->args->argc + imgp->args->envc + 2);
256 
257 	if (args->trace)
258 		AUXARGS_ENTRY_32(pos, AT_DEBUG, 1);
259 	if (args->execfd != -1)
260 		AUXARGS_ENTRY_32(pos, AT_EXECFD, args->execfd);
261 	AUXARGS_ENTRY_32(pos, AT_PHDR, args->phdr);
262 	AUXARGS_ENTRY_32(pos, AT_PHENT, args->phent);
263 	AUXARGS_ENTRY_32(pos, AT_PHNUM, args->phnum);
264 	AUXARGS_ENTRY_32(pos, AT_PAGESZ, args->pagesz);
265 	AUXARGS_ENTRY_32(pos, AT_FLAGS, args->flags);
266 	AUXARGS_ENTRY_32(pos, AT_ENTRY, args->entry);
267 	AUXARGS_ENTRY_32(pos, AT_BASE, args->base);
268 	AUXARGS_ENTRY_32(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
269 	AUXARGS_ENTRY_32(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
270 	AUXARGS_ENTRY_32(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
271 	AUXARGS_ENTRY_32(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
272 	AUXARGS_ENTRY_32(pos, AT_NULL, 0);
273 
274 	free(imgp->auxargs, M_TEMP);
275 	imgp->auxargs = NULL;
276 
277 	base--;
278 	suword32(base, (uint32_t)imgp->args->argc);
279 	*stack_base = (register_t *)base;
280 	return 0;
281 }
282 
283 extern int _ucodesel, _ucode32sel, _udatasel;
284 extern unsigned long linux_sznonrtsigcode;
285 
286 static void
287 linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
288 {
289 	struct thread *td = curthread;
290 	struct proc *p = td->td_proc;
291 	struct sigacts *psp;
292 	struct trapframe *regs;
293 	struct l_rt_sigframe *fp, frame;
294 	int oonstack;
295 	int sig;
296 	int code;
297 
298 	sig = ksi->ksi_signo;
299 	code = ksi->ksi_code;
300 	PROC_LOCK_ASSERT(p, MA_OWNED);
301 	psp = p->p_sigacts;
302 	mtx_assert(&psp->ps_mtx, MA_OWNED);
303 	regs = td->td_frame;
304 	oonstack = sigonstack(regs->tf_rsp);
305 
306 #ifdef DEBUG
307 	if (ldebug(rt_sendsig))
308 		printf(ARGS(rt_sendsig, "%p, %d, %p, %u"),
309 		    catcher, sig, (void*)mask, code);
310 #endif
311 	/*
312 	 * Allocate space for the signal handler context.
313 	 */
314 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
315 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
316 		fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
317 		    td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
318 	} else
319 		fp = (struct l_rt_sigframe *)regs->tf_rsp - 1;
320 	mtx_unlock(&psp->ps_mtx);
321 
322 	/*
323 	 * Build the argument list for the signal handler.
324 	 */
325 	if (p->p_sysent->sv_sigtbl)
326 		if (sig <= p->p_sysent->sv_sigsize)
327 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
328 
329 	bzero(&frame, sizeof(frame));
330 
331 	frame.sf_handler = PTROUT(catcher);
332 	frame.sf_sig = sig;
333 	frame.sf_siginfo = PTROUT(&fp->sf_si);
334 	frame.sf_ucontext = PTROUT(&fp->sf_sc);
335 
336 	/* Fill in POSIX parts */
337 	frame.sf_si.lsi_signo = sig;
338 	frame.sf_si.lsi_code = code;
339 	frame.sf_si.lsi_addr = PTROUT(ksi->ksi_addr);
340 
341 	/*
342 	 * Build the signal context to be used by sigreturn.
343 	 */
344 	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
345 	frame.sf_sc.uc_link = 0;		/* XXX ??? */
346 
347 	frame.sf_sc.uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp);
348 	frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
349 	frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
350 	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
351 	PROC_UNLOCK(p);
352 
353 	bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
354 
355 	frame.sf_sc.uc_mcontext.sc_mask   = frame.sf_sc.uc_sigmask.__bits[0];
356         frame.sf_sc.uc_mcontext.sc_gs     = rgs();
357         frame.sf_sc.uc_mcontext.sc_fs     = rfs();
358         __asm __volatile("movl %%es,%0" :
359 	    "=rm" (frame.sf_sc.uc_mcontext.sc_es));
360         __asm __volatile("movl %%ds,%0" :
361 	    "=rm" (frame.sf_sc.uc_mcontext.sc_ds));
362 	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_rdi;
363 	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_rsi;
364 	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_rbp;
365 	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_rbx;
366 	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_rdx;
367 	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_rcx;
368 	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_rax;
369 	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_rip;
370 	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
371 	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_rflags;
372 	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_rsp;
373 	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
374 	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
375 	frame.sf_sc.uc_mcontext.sc_cr2    = (u_int32_t)(uintptr_t)ksi->ksi_addr;
376 	frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
377 
378 #ifdef DEBUG
379 	if (ldebug(rt_sendsig))
380 		printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
381 		    frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
382 		    td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
383 #endif
384 
385 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
386 		/*
387 		 * Process has trashed its stack; give it an illegal
388 		 * instruction to halt it in its tracks.
389 		 */
390 #ifdef DEBUG
391 		if (ldebug(rt_sendsig))
392 			printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
393 			    fp, oonstack);
394 #endif
395 		PROC_LOCK(p);
396 		sigexit(td, SIGILL);
397 	}
398 
399 	/*
400 	 * Build context to run handler in.
401 	 */
402 	regs->tf_rsp = PTROUT(fp);
403 	regs->tf_rip = LINUX32_PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
404 	    linux_sznonrtsigcode;
405 	regs->tf_rflags &= ~PSL_T;
406 	regs->tf_cs = _ucode32sel;
407 	regs->tf_ss = _udatasel;
408 	load_ds(_udatasel);
409 	td->td_pcb->pcb_ds = _udatasel;
410 	load_es(_udatasel);
411 	td->td_pcb->pcb_es = _udatasel;
412 	/* leave user %fs and %gs untouched */
413 	PROC_LOCK(p);
414 	mtx_lock(&psp->ps_mtx);
415 }
416 
417 
418 /*
419  * Send an interrupt to process.
420  *
421  * Stack is set up to allow sigcode stored
422  * in u. to call routine, followed by kcall
423  * to sigreturn routine below.  After sigreturn
424  * resets the signal mask, the stack, and the
425  * frame pointer, it returns to the user
426  * specified pc, psl.
427  */
428 static void
429 linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
430 {
431 	struct thread *td = curthread;
432 	struct proc *p = td->td_proc;
433 	struct sigacts *psp;
434 	struct trapframe *regs;
435 	struct l_sigframe *fp, frame;
436 	l_sigset_t lmask;
437 	int oonstack, i;
438 	int sig, code;
439 
440 	sig = ksi->ksi_signo;
441 	code = ksi->ksi_code;
442 	PROC_LOCK_ASSERT(p, MA_OWNED);
443 	psp = p->p_sigacts;
444 	mtx_assert(&psp->ps_mtx, MA_OWNED);
445 	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
446 		/* Signal handler installed with SA_SIGINFO. */
447 		linux_rt_sendsig(catcher, ksi, mask);
448 		return;
449 	}
450 
451 	regs = td->td_frame;
452 	oonstack = sigonstack(regs->tf_rsp);
453 
454 #ifdef DEBUG
455 	if (ldebug(sendsig))
456 		printf(ARGS(sendsig, "%p, %d, %p, %u"),
457 		    catcher, sig, (void*)mask, code);
458 #endif
459 
460 	/*
461 	 * Allocate space for the signal handler context.
462 	 */
463 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
464 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
465 		fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
466 		    td->td_sigstk.ss_size - sizeof(struct l_sigframe));
467 	} else
468 		fp = (struct l_sigframe *)regs->tf_rsp - 1;
469 	mtx_unlock(&psp->ps_mtx);
470 	PROC_UNLOCK(p);
471 
472 	/*
473 	 * Build the argument list for the signal handler.
474 	 */
475 	if (p->p_sysent->sv_sigtbl)
476 		if (sig <= p->p_sysent->sv_sigsize)
477 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
478 
479 	bzero(&frame, sizeof(frame));
480 
481 	frame.sf_handler = PTROUT(catcher);
482 	frame.sf_sig = sig;
483 
484 	bsd_to_linux_sigset(mask, &lmask);
485 
486 	/*
487 	 * Build the signal context to be used by sigreturn.
488 	 */
489 	frame.sf_sc.sc_mask   = lmask.__bits[0];
490         frame.sf_sc.sc_gs     = rgs();
491         frame.sf_sc.sc_fs     = rfs();
492         __asm __volatile("movl %%es,%0" : "=rm" (frame.sf_sc.sc_es));
493         __asm __volatile("movl %%ds,%0" : "=rm" (frame.sf_sc.sc_ds));
494 	frame.sf_sc.sc_edi    = regs->tf_rdi;
495 	frame.sf_sc.sc_esi    = regs->tf_rsi;
496 	frame.sf_sc.sc_ebp    = regs->tf_rbp;
497 	frame.sf_sc.sc_ebx    = regs->tf_rbx;
498 	frame.sf_sc.sc_edx    = regs->tf_rdx;
499 	frame.sf_sc.sc_ecx    = regs->tf_rcx;
500 	frame.sf_sc.sc_eax    = regs->tf_rax;
501 	frame.sf_sc.sc_eip    = regs->tf_rip;
502 	frame.sf_sc.sc_cs     = regs->tf_cs;
503 	frame.sf_sc.sc_eflags = regs->tf_rflags;
504 	frame.sf_sc.sc_esp_at_signal = regs->tf_rsp;
505 	frame.sf_sc.sc_ss     = regs->tf_ss;
506 	frame.sf_sc.sc_err    = regs->tf_err;
507 	frame.sf_sc.sc_cr2    = (u_int32_t)(uintptr_t)ksi->ksi_addr;
508 	frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code);
509 
510 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
511 		frame.sf_extramask[i] = lmask.__bits[i+1];
512 
513 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
514 		/*
515 		 * Process has trashed its stack; give it an illegal
516 		 * instruction to halt it in its tracks.
517 		 */
518 		PROC_LOCK(p);
519 		sigexit(td, SIGILL);
520 	}
521 
522 	/*
523 	 * Build context to run handler in.
524 	 */
525 	regs->tf_rsp = PTROUT(fp);
526 	regs->tf_rip = LINUX32_PS_STRINGS - *(p->p_sysent->sv_szsigcode);
527 	regs->tf_rflags &= ~PSL_T;
528 	regs->tf_cs = _ucode32sel;
529 	regs->tf_ss = _udatasel;
530 	load_ds(_udatasel);
531 	td->td_pcb->pcb_ds = _udatasel;
532 	load_es(_udatasel);
533 	td->td_pcb->pcb_es = _udatasel;
534 	/* leave user %fs and %gs untouched */
535 	PROC_LOCK(p);
536 	mtx_lock(&psp->ps_mtx);
537 }
538 
539 /*
540  * System call to cleanup state after a signal
541  * has been taken.  Reset signal mask and
542  * stack state from context left by sendsig (above).
543  * Return to previous pc and psl as specified by
544  * context left by sendsig. Check carefully to
545  * make sure that the user has not modified the
546  * psl to gain improper privileges or to cause
547  * a machine fault.
548  */
549 int
550 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
551 {
552 	struct proc *p = td->td_proc;
553 	struct l_sigframe frame;
554 	struct trapframe *regs;
555 	l_sigset_t lmask;
556 	int eflags, i;
557 	ksiginfo_t ksi;
558 
559 	regs = td->td_frame;
560 
561 #ifdef DEBUG
562 	if (ldebug(sigreturn))
563 		printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
564 #endif
565 	/*
566 	 * The trampoline code hands us the sigframe.
567 	 * It is unsafe to keep track of it ourselves, in the event that a
568 	 * program jumps out of a signal handler.
569 	 */
570 	if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
571 		return (EFAULT);
572 
573 	/*
574 	 * Check for security violations.
575 	 */
576 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
577 	eflags = frame.sf_sc.sc_eflags;
578 	/*
579 	 * XXX do allow users to change the privileged flag PSL_RF.  The
580 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
581 	 * sometimes set it there too.  tf_eflags is kept in the signal
582 	 * context during signal handling and there is no other place
583 	 * to remember it, so the PSL_RF bit may be corrupted by the
584 	 * signal handler without us knowing.  Corruption of the PSL_RF
585 	 * bit at worst causes one more or one less debugger trap, so
586 	 * allowing it is fairly harmless.
587 	 */
588 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF))
589 		return(EINVAL);
590 
591 	/*
592 	 * Don't allow users to load a valid privileged %cs.  Let the
593 	 * hardware check for invalid selectors, excess privilege in
594 	 * other selectors, invalid %eip's and invalid %esp's.
595 	 */
596 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
597 	if (!CS_SECURE(frame.sf_sc.sc_cs)) {
598 		ksiginfo_init_trap(&ksi);
599 		ksi.ksi_signo = SIGBUS;
600 		ksi.ksi_code = BUS_OBJERR;
601 		ksi.ksi_trapno = T_PROTFLT;
602 		ksi.ksi_addr = (void *)regs->tf_rip;
603 		trapsignal(td, &ksi);
604 		return(EINVAL);
605 	}
606 
607 	lmask.__bits[0] = frame.sf_sc.sc_mask;
608 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
609 		lmask.__bits[i+1] = frame.sf_extramask[i];
610 	PROC_LOCK(p);
611 	linux_to_bsd_sigset(&lmask, &td->td_sigmask);
612 	SIG_CANTMASK(td->td_sigmask);
613 	signotify(td);
614 	PROC_UNLOCK(p);
615 
616 	/*
617 	 * Restore signal context.
618 	 */
619 	/* Selectors were restored by the trampoline. */
620 	regs->tf_rdi    = frame.sf_sc.sc_edi;
621 	regs->tf_rsi    = frame.sf_sc.sc_esi;
622 	regs->tf_rbp    = frame.sf_sc.sc_ebp;
623 	regs->tf_rbx    = frame.sf_sc.sc_ebx;
624 	regs->tf_rdx    = frame.sf_sc.sc_edx;
625 	regs->tf_rcx    = frame.sf_sc.sc_ecx;
626 	regs->tf_rax    = frame.sf_sc.sc_eax;
627 	regs->tf_rip    = frame.sf_sc.sc_eip;
628 	regs->tf_cs     = frame.sf_sc.sc_cs;
629 	regs->tf_rflags = eflags;
630 	regs->tf_rsp    = frame.sf_sc.sc_esp_at_signal;
631 	regs->tf_ss     = frame.sf_sc.sc_ss;
632 
633 	return (EJUSTRETURN);
634 }
635 
636 /*
637  * System call to cleanup state after a signal
638  * has been taken.  Reset signal mask and
639  * stack state from context left by rt_sendsig (above).
640  * Return to previous pc and psl as specified by
641  * context left by sendsig. Check carefully to
642  * make sure that the user has not modified the
643  * psl to gain improper privileges or to cause
644  * a machine fault.
645  */
646 int
647 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
648 {
649 	struct proc *p = td->td_proc;
650 	struct l_ucontext uc;
651 	struct l_sigcontext *context;
652 	l_stack_t *lss;
653 	stack_t ss;
654 	struct trapframe *regs;
655 	int eflags;
656 	ksiginfo_t ksi;
657 
658 	regs = td->td_frame;
659 
660 #ifdef DEBUG
661 	if (ldebug(rt_sigreturn))
662 		printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
663 #endif
664 	/*
665 	 * The trampoline code hands us the ucontext.
666 	 * It is unsafe to keep track of it ourselves, in the event that a
667 	 * program jumps out of a signal handler.
668 	 */
669 	if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
670 		return (EFAULT);
671 
672 	context = &uc.uc_mcontext;
673 
674 	/*
675 	 * Check for security violations.
676 	 */
677 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
678 	eflags = context->sc_eflags;
679 	/*
680 	 * XXX do allow users to change the privileged flag PSL_RF.  The
681 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
682 	 * sometimes set it there too.  tf_eflags is kept in the signal
683 	 * context during signal handling and there is no other place
684 	 * to remember it, so the PSL_RF bit may be corrupted by the
685 	 * signal handler without us knowing.  Corruption of the PSL_RF
686 	 * bit at worst causes one more or one less debugger trap, so
687 	 * allowing it is fairly harmless.
688 	 */
689 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF))
690 		return(EINVAL);
691 
692 	/*
693 	 * Don't allow users to load a valid privileged %cs.  Let the
694 	 * hardware check for invalid selectors, excess privilege in
695 	 * other selectors, invalid %eip's and invalid %esp's.
696 	 */
697 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
698 	if (!CS_SECURE(context->sc_cs)) {
699 		ksiginfo_init_trap(&ksi);
700 		ksi.ksi_signo = SIGBUS;
701 		ksi.ksi_code = BUS_OBJERR;
702 		ksi.ksi_trapno = T_PROTFLT;
703 		ksi.ksi_addr = (void *)regs->tf_rip;
704 		trapsignal(td, &ksi);
705 		return(EINVAL);
706 	}
707 
708 	PROC_LOCK(p);
709 	linux_to_bsd_sigset(&uc.uc_sigmask, &td->td_sigmask);
710 	SIG_CANTMASK(td->td_sigmask);
711 	signotify(td);
712 	PROC_UNLOCK(p);
713 
714 	/*
715 	 * Restore signal context
716 	 */
717 	/* Selectors were restored by the trampoline. */
718 	regs->tf_rdi    = context->sc_edi;
719 	regs->tf_rsi    = context->sc_esi;
720 	regs->tf_rbp    = context->sc_ebp;
721 	regs->tf_rbx    = context->sc_ebx;
722 	regs->tf_rdx    = context->sc_edx;
723 	regs->tf_rcx    = context->sc_ecx;
724 	regs->tf_rax    = context->sc_eax;
725 	regs->tf_rip    = context->sc_eip;
726 	regs->tf_cs     = context->sc_cs;
727 	regs->tf_rflags = eflags;
728 	regs->tf_rsp    = context->sc_esp_at_signal;
729 	regs->tf_ss     = context->sc_ss;
730 
731 	/*
732 	 * call sigaltstack & ignore results..
733 	 */
734 	lss = &uc.uc_stack;
735 	ss.ss_sp = PTRIN(lss->ss_sp);
736 	ss.ss_size = lss->ss_size;
737 	ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
738 
739 #ifdef DEBUG
740 	if (ldebug(rt_sigreturn))
741 		printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
742 		    ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
743 #endif
744 	(void)kern_sigaltstack(td, &ss, NULL);
745 
746 	return (EJUSTRETURN);
747 }
748 
749 /*
750  * MPSAFE
751  */
752 static void
753 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
754 {
755 	args[0] = tf->tf_rbx;
756 	args[1] = tf->tf_rcx;
757 	args[2] = tf->tf_rdx;
758 	args[3] = tf->tf_rsi;
759 	args[4] = tf->tf_rdi;
760 	args[5] = tf->tf_rbp;	/* Unconfirmed */
761 	*params = NULL;		/* no copyin */
762 }
763 
764 /*
765  * If a linux binary is exec'ing something, try this image activator
766  * first.  We override standard shell script execution in order to
767  * be able to modify the interpreter path.  We only do this if a linux
768  * binary is doing the exec, so we do not create an EXEC module for it.
769  */
770 static int	exec_linux_imgact_try(struct image_params *iparams);
771 
772 static int
773 exec_linux_imgact_try(struct image_params *imgp)
774 {
775     const char *head = (const char *)imgp->image_header;
776     char *rpath;
777     int error = -1, len;
778 
779     /*
780      * The interpreter for shell scripts run from a linux binary needs
781      * to be located in /compat/linux if possible in order to recursively
782      * maintain linux path emulation.
783      */
784     if (((const short *)head)[0] == SHELLMAGIC) {
785 	    /*
786 	     * Run our normal shell image activator.  If it succeeds attempt
787 	     * to use the alternate path for the interpreter.  If an alternate
788 	     * path is found, use our stringspace to store it.
789 	     */
790 	    if ((error = exec_shell_imgact(imgp)) == 0) {
791 		    linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
792 			imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0);
793 		    if (rpath != NULL) {
794 			    len = strlen(rpath) + 1;
795 
796 			    if (len <= MAXSHELLCMDLEN) {
797 				    memcpy(imgp->interpreter_name, rpath, len);
798 			    }
799 			    free(rpath, M_TEMP);
800 		    }
801 	    }
802     }
803     return(error);
804 }
805 
806 /*
807  * Clear registers on exec
808  * XXX copied from ia32_signal.c.
809  */
810 static void
811 exec_linux_setregs(td, entry, stack, ps_strings)
812 	struct thread *td;
813 	u_long entry;
814 	u_long stack;
815 	u_long ps_strings;
816 {
817 	struct trapframe *regs = td->td_frame;
818 	struct pcb *pcb = td->td_pcb;
819 
820 	critical_enter();
821 	wrmsr(MSR_FSBASE, 0);
822 	wrmsr(MSR_KGSBASE, 0);	/* User value while we're in the kernel */
823 	pcb->pcb_fsbase = 0;
824 	pcb->pcb_gsbase = 0;
825 	critical_exit();
826 	load_ds(_udatasel);
827 	load_es(_udatasel);
828 	load_fs(_udatasel);
829 	load_gs(_udatasel);
830 	pcb->pcb_ds = _udatasel;
831 	pcb->pcb_es = _udatasel;
832 	pcb->pcb_fs = _udatasel;
833 	pcb->pcb_gs = _udatasel;
834 
835 	bzero((char *)regs, sizeof(struct trapframe));
836 	regs->tf_rip = entry;
837 	regs->tf_rsp = stack;
838 	regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T);
839 	regs->tf_ss = _udatasel;
840 	regs->tf_cs = _ucode32sel;
841 	regs->tf_rbx = ps_strings;
842 	load_cr0(rcr0() | CR0_MP | CR0_TS);
843 	fpstate_drop(td);
844 
845 	/* Return via doreti so that we can change to a different %cs */
846 	pcb->pcb_flags |= PCB_FULLCTX;
847 	td->td_retval[1] = 0;
848 }
849 
850 /*
851  * XXX copied from ia32_sysvec.c.
852  */
853 static register_t *
854 linux_copyout_strings(struct image_params *imgp)
855 {
856 	int argc, envc;
857 	u_int32_t *vectp;
858 	char *stringp, *destp;
859 	u_int32_t *stack_base;
860 	struct linux32_ps_strings *arginfo;
861 	int sigcodesz;
862 
863 	/*
864 	 * Calculate string base and vector table pointers.
865 	 * Also deal with signal trampoline code for this exec type.
866 	 */
867 	arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS;
868 	sigcodesz = *(imgp->proc->p_sysent->sv_szsigcode);
869 	destp =	(caddr_t)arginfo - sigcodesz - SPARE_USRSPACE -
870 		roundup((ARG_MAX - imgp->args->stringspace), sizeof(char *));
871 
872 	/*
873 	 * install sigcode
874 	 */
875 	if (sigcodesz)
876 		copyout(imgp->proc->p_sysent->sv_sigcode,
877 			((caddr_t)arginfo - sigcodesz), sigcodesz);
878 
879 	/*
880 	 * If we have a valid auxargs ptr, prepare some room
881 	 * on the stack.
882 	 */
883 	if (imgp->auxargs) {
884 		/*
885 		 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
886 		 * lower compatibility.
887 		 */
888 		imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size
889 			: (AT_COUNT * 2);
890 		/*
891 		 * The '+ 2' is for the null pointers at the end of each of
892 		 * the arg and env vector sets,and imgp->auxarg_size is room
893 		 * for argument of Runtime loader.
894 		 */
895 		vectp = (u_int32_t *) (destp - (imgp->args->argc + imgp->args->envc + 2 +
896 				       imgp->auxarg_size) * sizeof(u_int32_t));
897 
898 	} else
899 		/*
900 		 * The '+ 2' is for the null pointers at the end of each of
901 		 * the arg and env vector sets
902 		 */
903 		vectp = (u_int32_t *)
904 			(destp - (imgp->args->argc + imgp->args->envc + 2) * sizeof(u_int32_t));
905 
906 	/*
907 	 * vectp also becomes our initial stack base
908 	 */
909 	stack_base = vectp;
910 
911 	stringp = imgp->args->begin_argv;
912 	argc = imgp->args->argc;
913 	envc = imgp->args->envc;
914 	/*
915 	 * Copy out strings - arguments and environment.
916 	 */
917 	copyout(stringp, destp, ARG_MAX - imgp->args->stringspace);
918 
919 	/*
920 	 * Fill in "ps_strings" struct for ps, w, etc.
921 	 */
922 	suword32(&arginfo->ps_argvstr, (u_int32_t)(intptr_t)vectp);
923 	suword32(&arginfo->ps_nargvstr, argc);
924 
925 	/*
926 	 * Fill in argument portion of vector table.
927 	 */
928 	for (; argc > 0; --argc) {
929 		suword32(vectp++, (u_int32_t)(intptr_t)destp);
930 		while (*stringp++ != 0)
931 			destp++;
932 		destp++;
933 	}
934 
935 	/* a null vector table pointer separates the argp's from the envp's */
936 	suword32(vectp++, 0);
937 
938 	suword32(&arginfo->ps_envstr, (u_int32_t)(intptr_t)vectp);
939 	suword32(&arginfo->ps_nenvstr, envc);
940 
941 	/*
942 	 * Fill in environment portion of vector table.
943 	 */
944 	for (; envc > 0; --envc) {
945 		suword32(vectp++, (u_int32_t)(intptr_t)destp);
946 		while (*stringp++ != 0)
947 			destp++;
948 		destp++;
949 	}
950 
951 	/* end of vector table is a null pointer */
952 	suword32(vectp, 0);
953 
954 	return ((register_t *)stack_base);
955 }
956 
957 SYSCTL_NODE(_compat, OID_AUTO, linux32, CTLFLAG_RW, 0,
958     "32-bit Linux emulation");
959 
960 static u_long	linux32_maxdsiz = LINUX32_MAXDSIZ;
961 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxdsiz, CTLFLAG_RW,
962     &linux32_maxdsiz, 0, "");
963 static u_long	linux32_maxssiz = LINUX32_MAXSSIZ;
964 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxssiz, CTLFLAG_RW,
965     &linux32_maxssiz, 0, "");
966 static u_long	linux32_maxvmem = LINUX32_MAXVMEM;
967 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxvmem, CTLFLAG_RW,
968     &linux32_maxvmem, 0, "");
969 
970 static void
971 linux32_fixlimit(struct rlimit *rl, int which)
972 {
973 
974 	switch (which) {
975 	case RLIMIT_DATA:
976 		if (linux32_maxdsiz != 0) {
977 			if (rl->rlim_cur > linux32_maxdsiz)
978 				rl->rlim_cur = linux32_maxdsiz;
979 			if (rl->rlim_max > linux32_maxdsiz)
980 				rl->rlim_max = linux32_maxdsiz;
981 		}
982 		break;
983 	case RLIMIT_STACK:
984 		if (linux32_maxssiz != 0) {
985 			if (rl->rlim_cur > linux32_maxssiz)
986 				rl->rlim_cur = linux32_maxssiz;
987 			if (rl->rlim_max > linux32_maxssiz)
988 				rl->rlim_max = linux32_maxssiz;
989 		}
990 		break;
991 	case RLIMIT_VMEM:
992 		if (linux32_maxvmem != 0) {
993 			if (rl->rlim_cur > linux32_maxvmem)
994 				rl->rlim_cur = linux32_maxvmem;
995 			if (rl->rlim_max > linux32_maxvmem)
996 				rl->rlim_max = linux32_maxvmem;
997 		}
998 		break;
999 	}
1000 }
1001 
1002 struct sysentvec elf_linux_sysvec = {
1003 	LINUX_SYS_MAXSYSCALL,
1004 	linux_sysent,
1005 	0,
1006 	LINUX_SIGTBLSZ,
1007 	bsd_to_linux_signal,
1008 	ELAST + 1,
1009 	bsd_to_linux_errno,
1010 	translate_traps,
1011 	elf_linux_fixup,
1012 	linux_sendsig,
1013 	linux_sigcode,
1014 	&linux_szsigcode,
1015 	linux_prepsyscall,
1016 	"Linux ELF32",
1017 	elf32_coredump,
1018 	exec_linux_imgact_try,
1019 	LINUX_MINSIGSTKSZ,
1020 	PAGE_SIZE,
1021 	VM_MIN_ADDRESS,
1022 	LINUX32_USRSTACK,
1023 	LINUX32_USRSTACK,
1024 	LINUX32_PS_STRINGS,
1025 	VM_PROT_ALL,
1026 	linux_copyout_strings,
1027 	exec_linux_setregs,
1028 	linux32_fixlimit,
1029 	&linux32_maxssiz,
1030 };
1031 
1032 static Elf32_Brandinfo linux_brand = {
1033 					ELFOSABI_LINUX,
1034 					EM_386,
1035 					"Linux",
1036 					"/compat/linux",
1037 					"/lib/ld-linux.so.1",
1038 					&elf_linux_sysvec,
1039 					NULL,
1040 					BI_CAN_EXEC_DYN,
1041 				 };
1042 
1043 static Elf32_Brandinfo linux_glibc2brand = {
1044 					ELFOSABI_LINUX,
1045 					EM_386,
1046 					"Linux",
1047 					"/compat/linux",
1048 					"/lib/ld-linux.so.2",
1049 					&elf_linux_sysvec,
1050 					NULL,
1051 					BI_CAN_EXEC_DYN,
1052 				 };
1053 
1054 Elf32_Brandinfo *linux_brandlist[] = {
1055 					&linux_brand,
1056 					&linux_glibc2brand,
1057 					NULL
1058 				};
1059 
1060 static int
1061 linux_elf_modevent(module_t mod, int type, void *data)
1062 {
1063 	Elf32_Brandinfo **brandinfo;
1064 	int error;
1065 	struct linux_ioctl_handler **lihp;
1066 	struct linux_device_handler **ldhp;
1067 
1068 	error = 0;
1069 
1070 	switch(type) {
1071 	case MOD_LOAD:
1072 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1073 		     ++brandinfo)
1074 			if (elf32_insert_brand_entry(*brandinfo) < 0)
1075 				error = EINVAL;
1076 		if (error == 0) {
1077 			SET_FOREACH(lihp, linux_ioctl_handler_set)
1078 				linux_ioctl_register_handler(*lihp);
1079 			SET_FOREACH(ldhp, linux_device_handler_set)
1080 				linux_device_register_handler(*ldhp);
1081 			mtx_init(&emul_lock, "emuldata lock", NULL, MTX_DEF);
1082 			sx_init(&emul_shared_lock, "emuldata->shared lock");
1083 			LIST_INIT(&futex_list);
1084 			sx_init(&futex_sx, "futex protection lock");
1085 			linux_exit_tag = EVENTHANDLER_REGISTER(process_exit, linux_proc_exit,
1086 			      NULL, 1000);
1087 			linux_schedtail_tag = EVENTHANDLER_REGISTER(schedtail, linux_schedtail,
1088 			      NULL, 1000);
1089 			linux_exec_tag = EVENTHANDLER_REGISTER(process_exec, linux_proc_exec,
1090 			      NULL, 1000);
1091 			if (bootverbose)
1092 				printf("Linux ELF exec handler installed\n");
1093 		} else
1094 			printf("cannot insert Linux ELF brand handler\n");
1095 		break;
1096 	case MOD_UNLOAD:
1097 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1098 		     ++brandinfo)
1099 			if (elf32_brand_inuse(*brandinfo))
1100 				error = EBUSY;
1101 		if (error == 0) {
1102 			for (brandinfo = &linux_brandlist[0];
1103 			     *brandinfo != NULL; ++brandinfo)
1104 				if (elf32_remove_brand_entry(*brandinfo) < 0)
1105 					error = EINVAL;
1106 		}
1107 		if (error == 0) {
1108 			SET_FOREACH(lihp, linux_ioctl_handler_set)
1109 				linux_ioctl_unregister_handler(*lihp);
1110 			SET_FOREACH(ldhp, linux_device_handler_set)
1111 				linux_device_unregister_handler(*ldhp);
1112 			mtx_destroy(&emul_lock);
1113 			sx_destroy(&emul_shared_lock);
1114 			sx_destroy(&futex_sx);
1115 			EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag);
1116 			EVENTHANDLER_DEREGISTER(schedtail, linux_schedtail_tag);
1117 			EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag);
1118 			if (bootverbose)
1119 				printf("Linux ELF exec handler removed\n");
1120 		} else
1121 			printf("Could not deinstall ELF interpreter entry\n");
1122 		break;
1123 	default:
1124 		return EOPNOTSUPP;
1125 	}
1126 	return error;
1127 }
1128 
1129 static moduledata_t linux_elf_mod = {
1130 	"linuxelf",
1131 	linux_elf_modevent,
1132 	0
1133 };
1134 
1135 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
1136