xref: /freebsd/sys/amd64/linux32/linux32_sysvec.c (revision bfe691b2f75de2224c7ceb304ebcdef2b42d4179)
1 /*-
2  * Copyright (c) 2004 Tim J. Robbins
3  * Copyright (c) 2003 Peter Wemm
4  * Copyright (c) 2002 Doug Rabson
5  * Copyright (c) 1998-1999 Andrew Gallatin
6  * Copyright (c) 1994-1996 S�ren Schmidt
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer
14  *    in this position and unchanged.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. The name of the author may not be used to endorse or promote products
19  *    derived from this software without specific prior written permission
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 #include <sys/cdefs.h>
34 __FBSDID("$FreeBSD$");
35 #include "opt_compat.h"
36 
37 #ifndef COMPAT_IA32
38 #error "Unable to compile Linux-emulator due to missing COMPAT_IA32 option!"
39 #endif
40 
41 #define	__ELF_WORD_SIZE	32
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/exec.h>
46 #include <sys/imgact.h>
47 #include <sys/imgact_elf.h>
48 #include <sys/kernel.h>
49 #include <sys/lock.h>
50 #include <sys/malloc.h>
51 #include <sys/module.h>
52 #include <sys/mutex.h>
53 #include <sys/proc.h>
54 #include <sys/resourcevar.h>
55 #include <sys/signalvar.h>
56 #include <sys/sysctl.h>
57 #include <sys/syscallsubr.h>
58 #include <sys/sysent.h>
59 #include <sys/sysproto.h>
60 #include <sys/vnode.h>
61 #include <sys/eventhandler.h>
62 
63 #include <vm/vm.h>
64 #include <vm/pmap.h>
65 #include <vm/vm_extern.h>
66 #include <vm/vm_map.h>
67 #include <vm/vm_object.h>
68 #include <vm/vm_page.h>
69 #include <vm/vm_param.h>
70 
71 #include <machine/cpu.h>
72 #include <machine/md_var.h>
73 #include <machine/pcb.h>
74 #include <machine/specialreg.h>
75 
76 #include <amd64/linux32/linux.h>
77 #include <amd64/linux32/linux32_proto.h>
78 #include <compat/linux/linux_emul.h>
79 #include <compat/linux/linux_mib.h>
80 #include <compat/linux/linux_signal.h>
81 #include <compat/linux/linux_util.h>
82 
83 MODULE_VERSION(linux, 1);
84 
85 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
86 
87 #define	AUXARGS_ENTRY_32(pos, id, val)	\
88 	do {				\
89 		suword32(pos++, id);	\
90 		suword32(pos++, val);	\
91 	} while (0)
92 
93 #if BYTE_ORDER == LITTLE_ENDIAN
94 #define SHELLMAGIC      0x2123 /* #! */
95 #else
96 #define SHELLMAGIC      0x2321
97 #endif
98 
99 /*
100  * Allow the sendsig functions to use the ldebug() facility
101  * even though they are not syscalls themselves. Map them
102  * to syscall 0. This is slightly less bogus than using
103  * ldebug(sigreturn).
104  */
105 #define	LINUX_SYS_linux_rt_sendsig	0
106 #define	LINUX_SYS_linux_sendsig		0
107 
108 extern char linux_sigcode[];
109 extern int linux_szsigcode;
110 
111 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
112 
113 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
114 SET_DECLARE(linux_device_handler_set, struct linux_device_handler);
115 
116 static int	elf_linux_fixup(register_t **stack_base,
117 		    struct image_params *iparams);
118 static register_t *linux_copyout_strings(struct image_params *imgp);
119 static void	linux_prepsyscall(struct trapframe *tf, int *args, u_int *code,
120 		    caddr_t *params);
121 static void     linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
122 static void	exec_linux_setregs(struct thread *td, u_long entry,
123 				   u_long stack, u_long ps_strings);
124 static void	linux32_fixlimits(struct proc *p);
125 
126 extern LIST_HEAD(futex_list, futex) futex_list;
127 extern struct sx futex_sx;
128 
129 static eventhandler_tag linux_exit_tag;
130 static eventhandler_tag linux_schedtail_tag;
131 static eventhandler_tag linux_exec_tag;
132 
133 /*
134  * Linux syscalls return negative errno's, we do positive and map them
135  * Reference:
136  *   FreeBSD: src/sys/sys/errno.h
137  *   Linux:   linux-2.6.17.8/include/asm-generic/errno-base.h
138  *            linux-2.6.17.8/include/asm-generic/errno.h
139  */
140 static int bsd_to_linux_errno[ELAST + 1] = {
141 	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
142 	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
143 	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
144 	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
145 	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
146 	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
147 	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
148 	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
149 	  -6,  -6, -43, -42, -75,-125, -84, -95, -16, -74,
150 	 -72, -67, -71
151 };
152 
153 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
154 	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
155 	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
156 	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
157 	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
158 	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
159 	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
160 	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
161 	0, LINUX_SIGUSR1, LINUX_SIGUSR2
162 };
163 
164 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
165 	SIGHUP, SIGINT, SIGQUIT, SIGILL,
166 	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
167 	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
168 	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
169 	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
170 	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
171 	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
172 	SIGIO, SIGURG, SIGSYS
173 };
174 
175 #define LINUX_T_UNKNOWN  255
176 static int _bsd_to_linux_trapcode[] = {
177 	LINUX_T_UNKNOWN,	/* 0 */
178 	6,			/* 1  T_PRIVINFLT */
179 	LINUX_T_UNKNOWN,	/* 2 */
180 	3,			/* 3  T_BPTFLT */
181 	LINUX_T_UNKNOWN,	/* 4 */
182 	LINUX_T_UNKNOWN,	/* 5 */
183 	16,			/* 6  T_ARITHTRAP */
184 	254,			/* 7  T_ASTFLT */
185 	LINUX_T_UNKNOWN,	/* 8 */
186 	13,			/* 9  T_PROTFLT */
187 	1,			/* 10 T_TRCTRAP */
188 	LINUX_T_UNKNOWN,	/* 11 */
189 	14,			/* 12 T_PAGEFLT */
190 	LINUX_T_UNKNOWN,	/* 13 */
191 	17,			/* 14 T_ALIGNFLT */
192 	LINUX_T_UNKNOWN,	/* 15 */
193 	LINUX_T_UNKNOWN,	/* 16 */
194 	LINUX_T_UNKNOWN,	/* 17 */
195 	0,			/* 18 T_DIVIDE */
196 	2,			/* 19 T_NMI */
197 	4,			/* 20 T_OFLOW */
198 	5,			/* 21 T_BOUND */
199 	7,			/* 22 T_DNA */
200 	8,			/* 23 T_DOUBLEFLT */
201 	9,			/* 24 T_FPOPFLT */
202 	10,			/* 25 T_TSSFLT */
203 	11,			/* 26 T_SEGNPFLT */
204 	12,			/* 27 T_STKFLT */
205 	18,			/* 28 T_MCHK */
206 	19,			/* 29 T_XMMFLT */
207 	15			/* 30 T_RESERVED */
208 };
209 #define bsd_to_linux_trapcode(code) \
210     ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
211      _bsd_to_linux_trapcode[(code)]: \
212      LINUX_T_UNKNOWN)
213 
214 struct linux32_ps_strings {
215 	u_int32_t ps_argvstr;	/* first of 0 or more argument strings */
216 	u_int ps_nargvstr;	/* the number of argument strings */
217 	u_int32_t ps_envstr;	/* first of 0 or more environment strings */
218 	u_int ps_nenvstr;	/* the number of environment strings */
219 };
220 
221 /*
222  * If FreeBSD & Linux have a difference of opinion about what a trap
223  * means, deal with it here.
224  *
225  * MPSAFE
226  */
227 static int
228 translate_traps(int signal, int trap_code)
229 {
230 	if (signal != SIGBUS)
231 		return signal;
232 	switch (trap_code) {
233 	case T_PROTFLT:
234 	case T_TSSFLT:
235 	case T_DOUBLEFLT:
236 	case T_PAGEFLT:
237 		return SIGSEGV;
238 	default:
239 		return signal;
240 	}
241 }
242 
243 static int
244 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
245 {
246 	Elf32_Auxargs *args;
247 	Elf32_Addr *base;
248 	Elf32_Addr *pos;
249 
250 	KASSERT(curthread->td_proc == imgp->proc &&
251 	    (curthread->td_proc->p_flag & P_SA) == 0,
252 	    ("unsafe elf_linux_fixup(), should be curproc"));
253 	base = (Elf32_Addr *)*stack_base;
254 	args = (Elf32_Auxargs *)imgp->auxargs;
255 	pos = base + (imgp->args->argc + imgp->args->envc + 2);
256 
257 	if (args->trace)
258 		AUXARGS_ENTRY_32(pos, AT_DEBUG, 1);
259 	if (args->execfd != -1)
260 		AUXARGS_ENTRY_32(pos, AT_EXECFD, args->execfd);
261 	AUXARGS_ENTRY_32(pos, AT_PHDR, args->phdr);
262 	AUXARGS_ENTRY_32(pos, AT_PHENT, args->phent);
263 	AUXARGS_ENTRY_32(pos, AT_PHNUM, args->phnum);
264 	AUXARGS_ENTRY_32(pos, AT_PAGESZ, args->pagesz);
265 	AUXARGS_ENTRY_32(pos, AT_FLAGS, args->flags);
266 	AUXARGS_ENTRY_32(pos, AT_ENTRY, args->entry);
267 	AUXARGS_ENTRY_32(pos, AT_BASE, args->base);
268 	AUXARGS_ENTRY_32(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
269 	AUXARGS_ENTRY_32(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
270 	AUXARGS_ENTRY_32(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
271 	AUXARGS_ENTRY_32(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
272 	AUXARGS_ENTRY_32(pos, AT_NULL, 0);
273 
274 	free(imgp->auxargs, M_TEMP);
275 	imgp->auxargs = NULL;
276 
277 	base--;
278 	suword32(base, (uint32_t)imgp->args->argc);
279 	*stack_base = (register_t *)base;
280 	return 0;
281 }
282 
283 extern int _ucodesel, _ucode32sel, _udatasel;
284 extern unsigned long linux_sznonrtsigcode;
285 
286 static void
287 linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
288 {
289 	struct thread *td = curthread;
290 	struct proc *p = td->td_proc;
291 	struct sigacts *psp;
292 	struct trapframe *regs;
293 	struct l_rt_sigframe *fp, frame;
294 	int oonstack;
295 	int sig;
296 	int code;
297 
298 	sig = ksi->ksi_signo;
299 	code = ksi->ksi_code;
300 	PROC_LOCK_ASSERT(p, MA_OWNED);
301 	psp = p->p_sigacts;
302 	mtx_assert(&psp->ps_mtx, MA_OWNED);
303 	regs = td->td_frame;
304 	oonstack = sigonstack(regs->tf_rsp);
305 
306 #ifdef DEBUG
307 	if (ldebug(rt_sendsig))
308 		printf(ARGS(rt_sendsig, "%p, %d, %p, %u"),
309 		    catcher, sig, (void*)mask, code);
310 #endif
311 	/*
312 	 * Allocate space for the signal handler context.
313 	 */
314 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
315 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
316 		fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
317 		    td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
318 	} else
319 		fp = (struct l_rt_sigframe *)regs->tf_rsp - 1;
320 	mtx_unlock(&psp->ps_mtx);
321 
322 	/*
323 	 * Build the argument list for the signal handler.
324 	 */
325 	if (p->p_sysent->sv_sigtbl)
326 		if (sig <= p->p_sysent->sv_sigsize)
327 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
328 
329 	bzero(&frame, sizeof(frame));
330 
331 	frame.sf_handler = PTROUT(catcher);
332 	frame.sf_sig = sig;
333 	frame.sf_siginfo = PTROUT(&fp->sf_si);
334 	frame.sf_ucontext = PTROUT(&fp->sf_sc);
335 
336 	/* Fill in POSIX parts */
337 	frame.sf_si.lsi_signo = sig;
338 	frame.sf_si.lsi_code = code;
339 	frame.sf_si.lsi_addr = PTROUT(ksi->ksi_addr);
340 
341 	/*
342 	 * Build the signal context to be used by sigreturn.
343 	 */
344 	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
345 	frame.sf_sc.uc_link = 0;		/* XXX ??? */
346 
347 	frame.sf_sc.uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp);
348 	frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
349 	frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
350 	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
351 	PROC_UNLOCK(p);
352 
353 	bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
354 
355 	frame.sf_sc.uc_mcontext.sc_mask   = frame.sf_sc.uc_sigmask.__bits[0];
356         frame.sf_sc.uc_mcontext.sc_gs     = rgs();
357         frame.sf_sc.uc_mcontext.sc_fs     = rfs();
358         __asm __volatile("movl %%es,%0" :
359 	    "=rm" (frame.sf_sc.uc_mcontext.sc_es));
360         __asm __volatile("movl %%ds,%0" :
361 	    "=rm" (frame.sf_sc.uc_mcontext.sc_ds));
362 	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_rdi;
363 	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_rsi;
364 	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_rbp;
365 	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_rbx;
366 	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_rdx;
367 	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_rcx;
368 	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_rax;
369 	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_rip;
370 	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
371 	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_rflags;
372 	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_rsp;
373 	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
374 	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
375 	frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
376 
377 #ifdef DEBUG
378 	if (ldebug(rt_sendsig))
379 		printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
380 		    frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
381 		    td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
382 #endif
383 
384 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
385 		/*
386 		 * Process has trashed its stack; give it an illegal
387 		 * instruction to halt it in its tracks.
388 		 */
389 #ifdef DEBUG
390 		if (ldebug(rt_sendsig))
391 			printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
392 			    fp, oonstack);
393 #endif
394 		PROC_LOCK(p);
395 		sigexit(td, SIGILL);
396 	}
397 
398 	/*
399 	 * Build context to run handler in.
400 	 */
401 	regs->tf_rsp = PTROUT(fp);
402 	regs->tf_rip = LINUX32_PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
403 	    linux_sznonrtsigcode;
404 	regs->tf_rflags &= ~PSL_T;
405 	regs->tf_cs = _ucode32sel;
406 	regs->tf_ss = _udatasel;
407 	load_ds(_udatasel);
408 	td->td_pcb->pcb_ds = _udatasel;
409 	load_es(_udatasel);
410 	td->td_pcb->pcb_es = _udatasel;
411 	/* leave user %fs and %gs untouched */
412 	PROC_LOCK(p);
413 	mtx_lock(&psp->ps_mtx);
414 }
415 
416 
417 /*
418  * Send an interrupt to process.
419  *
420  * Stack is set up to allow sigcode stored
421  * in u. to call routine, followed by kcall
422  * to sigreturn routine below.  After sigreturn
423  * resets the signal mask, the stack, and the
424  * frame pointer, it returns to the user
425  * specified pc, psl.
426  */
427 static void
428 linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
429 {
430 	struct thread *td = curthread;
431 	struct proc *p = td->td_proc;
432 	struct sigacts *psp;
433 	struct trapframe *regs;
434 	struct l_sigframe *fp, frame;
435 	l_sigset_t lmask;
436 	int oonstack, i;
437 	int sig, code;
438 
439 	sig = ksi->ksi_signo;
440 	code = ksi->ksi_code;
441 	PROC_LOCK_ASSERT(p, MA_OWNED);
442 	psp = p->p_sigacts;
443 	mtx_assert(&psp->ps_mtx, MA_OWNED);
444 	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
445 		/* Signal handler installed with SA_SIGINFO. */
446 		linux_rt_sendsig(catcher, ksi, mask);
447 		return;
448 	}
449 
450 	regs = td->td_frame;
451 	oonstack = sigonstack(regs->tf_rsp);
452 
453 #ifdef DEBUG
454 	if (ldebug(sendsig))
455 		printf(ARGS(sendsig, "%p, %d, %p, %u"),
456 		    catcher, sig, (void*)mask, code);
457 #endif
458 
459 	/*
460 	 * Allocate space for the signal handler context.
461 	 */
462 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
463 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
464 		fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
465 		    td->td_sigstk.ss_size - sizeof(struct l_sigframe));
466 	} else
467 		fp = (struct l_sigframe *)regs->tf_rsp - 1;
468 	mtx_unlock(&psp->ps_mtx);
469 	PROC_UNLOCK(p);
470 
471 	/*
472 	 * Build the argument list for the signal handler.
473 	 */
474 	if (p->p_sysent->sv_sigtbl)
475 		if (sig <= p->p_sysent->sv_sigsize)
476 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
477 
478 	bzero(&frame, sizeof(frame));
479 
480 	frame.sf_handler = PTROUT(catcher);
481 	frame.sf_sig = sig;
482 
483 	bsd_to_linux_sigset(mask, &lmask);
484 
485 	/*
486 	 * Build the signal context to be used by sigreturn.
487 	 */
488 	frame.sf_sc.sc_mask   = lmask.__bits[0];
489         frame.sf_sc.sc_gs     = rgs();
490         frame.sf_sc.sc_fs     = rfs();
491         __asm __volatile("movl %%es,%0" : "=rm" (frame.sf_sc.sc_es));
492         __asm __volatile("movl %%ds,%0" : "=rm" (frame.sf_sc.sc_ds));
493 	frame.sf_sc.sc_edi    = regs->tf_rdi;
494 	frame.sf_sc.sc_esi    = regs->tf_rsi;
495 	frame.sf_sc.sc_ebp    = regs->tf_rbp;
496 	frame.sf_sc.sc_ebx    = regs->tf_rbx;
497 	frame.sf_sc.sc_edx    = regs->tf_rdx;
498 	frame.sf_sc.sc_ecx    = regs->tf_rcx;
499 	frame.sf_sc.sc_eax    = regs->tf_rax;
500 	frame.sf_sc.sc_eip    = regs->tf_rip;
501 	frame.sf_sc.sc_cs     = regs->tf_cs;
502 	frame.sf_sc.sc_eflags = regs->tf_rflags;
503 	frame.sf_sc.sc_esp_at_signal = regs->tf_rsp;
504 	frame.sf_sc.sc_ss     = regs->tf_ss;
505 	frame.sf_sc.sc_err    = regs->tf_err;
506 	frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code);
507 
508 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
509 		frame.sf_extramask[i] = lmask.__bits[i+1];
510 
511 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
512 		/*
513 		 * Process has trashed its stack; give it an illegal
514 		 * instruction to halt it in its tracks.
515 		 */
516 		PROC_LOCK(p);
517 		sigexit(td, SIGILL);
518 	}
519 
520 	/*
521 	 * Build context to run handler in.
522 	 */
523 	regs->tf_rsp = PTROUT(fp);
524 	regs->tf_rip = LINUX32_PS_STRINGS - *(p->p_sysent->sv_szsigcode);
525 	regs->tf_rflags &= ~PSL_T;
526 	regs->tf_cs = _ucode32sel;
527 	regs->tf_ss = _udatasel;
528 	load_ds(_udatasel);
529 	td->td_pcb->pcb_ds = _udatasel;
530 	load_es(_udatasel);
531 	td->td_pcb->pcb_es = _udatasel;
532 	/* leave user %fs and %gs untouched */
533 	PROC_LOCK(p);
534 	mtx_lock(&psp->ps_mtx);
535 }
536 
537 /*
538  * System call to cleanup state after a signal
539  * has been taken.  Reset signal mask and
540  * stack state from context left by sendsig (above).
541  * Return to previous pc and psl as specified by
542  * context left by sendsig. Check carefully to
543  * make sure that the user has not modified the
544  * psl to gain improper privileges or to cause
545  * a machine fault.
546  */
547 int
548 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
549 {
550 	struct proc *p = td->td_proc;
551 	struct l_sigframe frame;
552 	struct trapframe *regs;
553 	l_sigset_t lmask;
554 	int eflags, i;
555 	ksiginfo_t ksi;
556 
557 	regs = td->td_frame;
558 
559 #ifdef DEBUG
560 	if (ldebug(sigreturn))
561 		printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
562 #endif
563 	/*
564 	 * The trampoline code hands us the sigframe.
565 	 * It is unsafe to keep track of it ourselves, in the event that a
566 	 * program jumps out of a signal handler.
567 	 */
568 	if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
569 		return (EFAULT);
570 
571 	/*
572 	 * Check for security violations.
573 	 */
574 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
575 	eflags = frame.sf_sc.sc_eflags;
576 	/*
577 	 * XXX do allow users to change the privileged flag PSL_RF.  The
578 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
579 	 * sometimes set it there too.  tf_eflags is kept in the signal
580 	 * context during signal handling and there is no other place
581 	 * to remember it, so the PSL_RF bit may be corrupted by the
582 	 * signal handler without us knowing.  Corruption of the PSL_RF
583 	 * bit at worst causes one more or one less debugger trap, so
584 	 * allowing it is fairly harmless.
585 	 */
586 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF))
587 		return(EINVAL);
588 
589 	/*
590 	 * Don't allow users to load a valid privileged %cs.  Let the
591 	 * hardware check for invalid selectors, excess privilege in
592 	 * other selectors, invalid %eip's and invalid %esp's.
593 	 */
594 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
595 	if (!CS_SECURE(frame.sf_sc.sc_cs)) {
596 		ksiginfo_init_trap(&ksi);
597 		ksi.ksi_signo = SIGBUS;
598 		ksi.ksi_code = BUS_OBJERR;
599 		ksi.ksi_trapno = T_PROTFLT;
600 		ksi.ksi_addr = (void *)regs->tf_rip;
601 		trapsignal(td, &ksi);
602 		return(EINVAL);
603 	}
604 
605 	lmask.__bits[0] = frame.sf_sc.sc_mask;
606 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
607 		lmask.__bits[i+1] = frame.sf_extramask[i];
608 	PROC_LOCK(p);
609 	linux_to_bsd_sigset(&lmask, &td->td_sigmask);
610 	SIG_CANTMASK(td->td_sigmask);
611 	signotify(td);
612 	PROC_UNLOCK(p);
613 
614 	/*
615 	 * Restore signal context.
616 	 */
617 	/* Selectors were restored by the trampoline. */
618 	regs->tf_rdi    = frame.sf_sc.sc_edi;
619 	regs->tf_rsi    = frame.sf_sc.sc_esi;
620 	regs->tf_rbp    = frame.sf_sc.sc_ebp;
621 	regs->tf_rbx    = frame.sf_sc.sc_ebx;
622 	regs->tf_rdx    = frame.sf_sc.sc_edx;
623 	regs->tf_rcx    = frame.sf_sc.sc_ecx;
624 	regs->tf_rax    = frame.sf_sc.sc_eax;
625 	regs->tf_rip    = frame.sf_sc.sc_eip;
626 	regs->tf_cs     = frame.sf_sc.sc_cs;
627 	regs->tf_rflags = eflags;
628 	regs->tf_rsp    = frame.sf_sc.sc_esp_at_signal;
629 	regs->tf_ss     = frame.sf_sc.sc_ss;
630 
631 	return (EJUSTRETURN);
632 }
633 
634 /*
635  * System call to cleanup state after a signal
636  * has been taken.  Reset signal mask and
637  * stack state from context left by rt_sendsig (above).
638  * Return to previous pc and psl as specified by
639  * context left by sendsig. Check carefully to
640  * make sure that the user has not modified the
641  * psl to gain improper privileges or to cause
642  * a machine fault.
643  */
644 int
645 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
646 {
647 	struct proc *p = td->td_proc;
648 	struct l_ucontext uc;
649 	struct l_sigcontext *context;
650 	l_stack_t *lss;
651 	stack_t ss;
652 	struct trapframe *regs;
653 	int eflags;
654 	ksiginfo_t ksi;
655 
656 	regs = td->td_frame;
657 
658 #ifdef DEBUG
659 	if (ldebug(rt_sigreturn))
660 		printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
661 #endif
662 	/*
663 	 * The trampoline code hands us the ucontext.
664 	 * It is unsafe to keep track of it ourselves, in the event that a
665 	 * program jumps out of a signal handler.
666 	 */
667 	if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
668 		return (EFAULT);
669 
670 	context = &uc.uc_mcontext;
671 
672 	/*
673 	 * Check for security violations.
674 	 */
675 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
676 	eflags = context->sc_eflags;
677 	/*
678 	 * XXX do allow users to change the privileged flag PSL_RF.  The
679 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
680 	 * sometimes set it there too.  tf_eflags is kept in the signal
681 	 * context during signal handling and there is no other place
682 	 * to remember it, so the PSL_RF bit may be corrupted by the
683 	 * signal handler without us knowing.  Corruption of the PSL_RF
684 	 * bit at worst causes one more or one less debugger trap, so
685 	 * allowing it is fairly harmless.
686 	 */
687 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF))
688 		return(EINVAL);
689 
690 	/*
691 	 * Don't allow users to load a valid privileged %cs.  Let the
692 	 * hardware check for invalid selectors, excess privilege in
693 	 * other selectors, invalid %eip's and invalid %esp's.
694 	 */
695 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
696 	if (!CS_SECURE(context->sc_cs)) {
697 		ksiginfo_init_trap(&ksi);
698 		ksi.ksi_signo = SIGBUS;
699 		ksi.ksi_code = BUS_OBJERR;
700 		ksi.ksi_trapno = T_PROTFLT;
701 		ksi.ksi_addr = (void *)regs->tf_rip;
702 		trapsignal(td, &ksi);
703 		return(EINVAL);
704 	}
705 
706 	PROC_LOCK(p);
707 	linux_to_bsd_sigset(&uc.uc_sigmask, &td->td_sigmask);
708 	SIG_CANTMASK(td->td_sigmask);
709 	signotify(td);
710 	PROC_UNLOCK(p);
711 
712 	/*
713 	 * Restore signal context
714 	 */
715 	/* Selectors were restored by the trampoline. */
716 	regs->tf_rdi    = context->sc_edi;
717 	regs->tf_rsi    = context->sc_esi;
718 	regs->tf_rbp    = context->sc_ebp;
719 	regs->tf_rbx    = context->sc_ebx;
720 	regs->tf_rdx    = context->sc_edx;
721 	regs->tf_rcx    = context->sc_ecx;
722 	regs->tf_rax    = context->sc_eax;
723 	regs->tf_rip    = context->sc_eip;
724 	regs->tf_cs     = context->sc_cs;
725 	regs->tf_rflags = eflags;
726 	regs->tf_rsp    = context->sc_esp_at_signal;
727 	regs->tf_ss     = context->sc_ss;
728 
729 	/*
730 	 * call sigaltstack & ignore results..
731 	 */
732 	lss = &uc.uc_stack;
733 	ss.ss_sp = PTRIN(lss->ss_sp);
734 	ss.ss_size = lss->ss_size;
735 	ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
736 
737 #ifdef DEBUG
738 	if (ldebug(rt_sigreturn))
739 		printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
740 		    ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
741 #endif
742 	(void)kern_sigaltstack(td, &ss, NULL);
743 
744 	return (EJUSTRETURN);
745 }
746 
747 /*
748  * MPSAFE
749  */
750 static void
751 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
752 {
753 	args[0] = tf->tf_rbx;
754 	args[1] = tf->tf_rcx;
755 	args[2] = tf->tf_rdx;
756 	args[3] = tf->tf_rsi;
757 	args[4] = tf->tf_rdi;
758 	args[5] = tf->tf_rbp;	/* Unconfirmed */
759 	*params = NULL;		/* no copyin */
760 }
761 
762 /*
763  * If a linux binary is exec'ing something, try this image activator
764  * first.  We override standard shell script execution in order to
765  * be able to modify the interpreter path.  We only do this if a linux
766  * binary is doing the exec, so we do not create an EXEC module for it.
767  */
768 static int	exec_linux_imgact_try(struct image_params *iparams);
769 
770 static int
771 exec_linux_imgact_try(struct image_params *imgp)
772 {
773     const char *head = (const char *)imgp->image_header;
774     char *rpath;
775     int error = -1, len;
776 
777     /*
778      * The interpreter for shell scripts run from a linux binary needs
779      * to be located in /compat/linux if possible in order to recursively
780      * maintain linux path emulation.
781      */
782     if (((const short *)head)[0] == SHELLMAGIC) {
783 	    /*
784 	     * Run our normal shell image activator.  If it succeeds attempt
785 	     * to use the alternate path for the interpreter.  If an alternate
786 	     * path is found, use our stringspace to store it.
787 	     */
788 	    if ((error = exec_shell_imgact(imgp)) == 0) {
789 		    linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
790 			imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0);
791 		    if (rpath != NULL) {
792 			    len = strlen(rpath) + 1;
793 
794 			    if (len <= MAXSHELLCMDLEN) {
795 				    memcpy(imgp->interpreter_name, rpath, len);
796 			    }
797 			    free(rpath, M_TEMP);
798 		    }
799 	    }
800     }
801     return(error);
802 }
803 
804 /*
805  * Clear registers on exec
806  * XXX copied from ia32_signal.c.
807  */
808 static void
809 exec_linux_setregs(td, entry, stack, ps_strings)
810 	struct thread *td;
811 	u_long entry;
812 	u_long stack;
813 	u_long ps_strings;
814 {
815 	struct trapframe *regs = td->td_frame;
816 	struct pcb *pcb = td->td_pcb;
817 
818 	critical_enter();
819 	wrmsr(MSR_FSBASE, 0);
820 	wrmsr(MSR_KGSBASE, 0);	/* User value while we're in the kernel */
821 	pcb->pcb_fsbase = 0;
822 	pcb->pcb_gsbase = 0;
823 	critical_exit();
824 	load_ds(_udatasel);
825 	load_es(_udatasel);
826 	load_fs(_udatasel);
827 	load_gs(_udatasel);
828 	pcb->pcb_ds = _udatasel;
829 	pcb->pcb_es = _udatasel;
830 	pcb->pcb_fs = _udatasel;
831 	pcb->pcb_gs = _udatasel;
832 
833 	bzero((char *)regs, sizeof(struct trapframe));
834 	regs->tf_rip = entry;
835 	regs->tf_rsp = stack;
836 	regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T);
837 	regs->tf_ss = _udatasel;
838 	regs->tf_cs = _ucode32sel;
839 	regs->tf_rbx = ps_strings;
840 	load_cr0(rcr0() | CR0_MP | CR0_TS);
841 	fpstate_drop(td);
842 
843 	/* Return via doreti so that we can change to a different %cs */
844 	pcb->pcb_flags |= PCB_FULLCTX;
845 	td->td_retval[1] = 0;
846 }
847 
848 /*
849  * XXX copied from ia32_sysvec.c.
850  */
851 static register_t *
852 linux_copyout_strings(struct image_params *imgp)
853 {
854 	int argc, envc;
855 	u_int32_t *vectp;
856 	char *stringp, *destp;
857 	u_int32_t *stack_base;
858 	struct linux32_ps_strings *arginfo;
859 	int sigcodesz;
860 
861 	/*
862 	 * Calculate string base and vector table pointers.
863 	 * Also deal with signal trampoline code for this exec type.
864 	 */
865 	arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS;
866 	sigcodesz = *(imgp->proc->p_sysent->sv_szsigcode);
867 	destp =	(caddr_t)arginfo - sigcodesz - SPARE_USRSPACE -
868 		roundup((ARG_MAX - imgp->args->stringspace), sizeof(char *));
869 
870 	/*
871 	 * install sigcode
872 	 */
873 	if (sigcodesz)
874 		copyout(imgp->proc->p_sysent->sv_sigcode,
875 			((caddr_t)arginfo - sigcodesz), sigcodesz);
876 
877 	/*
878 	 * If we have a valid auxargs ptr, prepare some room
879 	 * on the stack.
880 	 */
881 	if (imgp->auxargs) {
882 		/*
883 		 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
884 		 * lower compatibility.
885 		 */
886 		imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size
887 			: (AT_COUNT * 2);
888 		/*
889 		 * The '+ 2' is for the null pointers at the end of each of
890 		 * the arg and env vector sets,and imgp->auxarg_size is room
891 		 * for argument of Runtime loader.
892 		 */
893 		vectp = (u_int32_t *) (destp - (imgp->args->argc + imgp->args->envc + 2 +
894 				       imgp->auxarg_size) * sizeof(u_int32_t));
895 
896 	} else
897 		/*
898 		 * The '+ 2' is for the null pointers at the end of each of
899 		 * the arg and env vector sets
900 		 */
901 		vectp = (u_int32_t *)
902 			(destp - (imgp->args->argc + imgp->args->envc + 2) * sizeof(u_int32_t));
903 
904 	/*
905 	 * vectp also becomes our initial stack base
906 	 */
907 	stack_base = vectp;
908 
909 	stringp = imgp->args->begin_argv;
910 	argc = imgp->args->argc;
911 	envc = imgp->args->envc;
912 	/*
913 	 * Copy out strings - arguments and environment.
914 	 */
915 	copyout(stringp, destp, ARG_MAX - imgp->args->stringspace);
916 
917 	/*
918 	 * Fill in "ps_strings" struct for ps, w, etc.
919 	 */
920 	suword32(&arginfo->ps_argvstr, (u_int32_t)(intptr_t)vectp);
921 	suword32(&arginfo->ps_nargvstr, argc);
922 
923 	/*
924 	 * Fill in argument portion of vector table.
925 	 */
926 	for (; argc > 0; --argc) {
927 		suword32(vectp++, (u_int32_t)(intptr_t)destp);
928 		while (*stringp++ != 0)
929 			destp++;
930 		destp++;
931 	}
932 
933 	/* a null vector table pointer separates the argp's from the envp's */
934 	suword32(vectp++, 0);
935 
936 	suword32(&arginfo->ps_envstr, (u_int32_t)(intptr_t)vectp);
937 	suword32(&arginfo->ps_nenvstr, envc);
938 
939 	/*
940 	 * Fill in environment portion of vector table.
941 	 */
942 	for (; envc > 0; --envc) {
943 		suword32(vectp++, (u_int32_t)(intptr_t)destp);
944 		while (*stringp++ != 0)
945 			destp++;
946 		destp++;
947 	}
948 
949 	/* end of vector table is a null pointer */
950 	suword32(vectp, 0);
951 
952 	return ((register_t *)stack_base);
953 }
954 
955 SYSCTL_NODE(_compat, OID_AUTO, linux32, CTLFLAG_RW, 0,
956     "32-bit Linux emulation");
957 
958 static u_long	linux32_maxdsiz = LINUX32_MAXDSIZ;
959 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxdsiz, CTLFLAG_RW,
960     &linux32_maxdsiz, 0, "");
961 static u_long	linux32_maxssiz = LINUX32_MAXSSIZ;
962 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxssiz, CTLFLAG_RW,
963     &linux32_maxssiz, 0, "");
964 static u_long	linux32_maxvmem = LINUX32_MAXVMEM;
965 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxvmem, CTLFLAG_RW,
966     &linux32_maxvmem, 0, "");
967 
968 /*
969  * XXX copied from ia32_sysvec.c.
970  */
971 static void
972 linux32_fixlimits(struct proc *p)
973 {
974 	struct plimit *oldlim, *newlim;
975 
976 	if (linux32_maxdsiz == 0 && linux32_maxssiz == 0 &&
977 	    linux32_maxvmem == 0)
978 		return;
979 	newlim = lim_alloc();
980 	PROC_LOCK(p);
981 	oldlim = p->p_limit;
982 	lim_copy(newlim, oldlim);
983 	if (linux32_maxdsiz != 0) {
984 		if (newlim->pl_rlimit[RLIMIT_DATA].rlim_cur > linux32_maxdsiz)
985 		    newlim->pl_rlimit[RLIMIT_DATA].rlim_cur = linux32_maxdsiz;
986 		if (newlim->pl_rlimit[RLIMIT_DATA].rlim_max > linux32_maxdsiz)
987 		    newlim->pl_rlimit[RLIMIT_DATA].rlim_max = linux32_maxdsiz;
988 	}
989 	if (linux32_maxssiz != 0) {
990 		if (newlim->pl_rlimit[RLIMIT_STACK].rlim_cur > linux32_maxssiz)
991 		    newlim->pl_rlimit[RLIMIT_STACK].rlim_cur = linux32_maxssiz;
992 		if (newlim->pl_rlimit[RLIMIT_STACK].rlim_max > linux32_maxssiz)
993 		    newlim->pl_rlimit[RLIMIT_STACK].rlim_max = linux32_maxssiz;
994 	}
995 	if (linux32_maxvmem != 0) {
996 		if (newlim->pl_rlimit[RLIMIT_VMEM].rlim_cur > linux32_maxvmem)
997 		    newlim->pl_rlimit[RLIMIT_VMEM].rlim_cur = linux32_maxvmem;
998 		if (newlim->pl_rlimit[RLIMIT_VMEM].rlim_max > linux32_maxvmem)
999 		    newlim->pl_rlimit[RLIMIT_VMEM].rlim_max = linux32_maxvmem;
1000 	}
1001 	p->p_limit = newlim;
1002 	PROC_UNLOCK(p);
1003 	lim_free(oldlim);
1004 }
1005 
1006 struct sysentvec elf_linux_sysvec = {
1007 	LINUX_SYS_MAXSYSCALL,
1008 	linux_sysent,
1009 	0,
1010 	LINUX_SIGTBLSZ,
1011 	bsd_to_linux_signal,
1012 	ELAST + 1,
1013 	bsd_to_linux_errno,
1014 	translate_traps,
1015 	elf_linux_fixup,
1016 	linux_sendsig,
1017 	linux_sigcode,
1018 	&linux_szsigcode,
1019 	linux_prepsyscall,
1020 	"Linux ELF32",
1021 	elf32_coredump,
1022 	exec_linux_imgact_try,
1023 	LINUX_MINSIGSTKSZ,
1024 	PAGE_SIZE,
1025 	VM_MIN_ADDRESS,
1026 	LINUX32_USRSTACK,
1027 	LINUX32_USRSTACK,
1028 	LINUX32_PS_STRINGS,
1029 	VM_PROT_ALL,
1030 	linux_copyout_strings,
1031 	exec_linux_setregs,
1032 	linux32_fixlimits
1033 };
1034 
1035 static Elf32_Brandinfo linux_brand = {
1036 					ELFOSABI_LINUX,
1037 					EM_386,
1038 					"Linux",
1039 					"/compat/linux",
1040 					"/lib/ld-linux.so.1",
1041 					&elf_linux_sysvec,
1042 					NULL,
1043 					BI_CAN_EXEC_DYN,
1044 				 };
1045 
1046 static Elf32_Brandinfo linux_glibc2brand = {
1047 					ELFOSABI_LINUX,
1048 					EM_386,
1049 					"Linux",
1050 					"/compat/linux",
1051 					"/lib/ld-linux.so.2",
1052 					&elf_linux_sysvec,
1053 					NULL,
1054 					BI_CAN_EXEC_DYN,
1055 				 };
1056 
1057 Elf32_Brandinfo *linux_brandlist[] = {
1058 					&linux_brand,
1059 					&linux_glibc2brand,
1060 					NULL
1061 				};
1062 
1063 static int
1064 linux_elf_modevent(module_t mod, int type, void *data)
1065 {
1066 	Elf32_Brandinfo **brandinfo;
1067 	int error;
1068 	struct linux_ioctl_handler **lihp;
1069 	struct linux_device_handler **ldhp;
1070 
1071 	error = 0;
1072 
1073 	switch(type) {
1074 	case MOD_LOAD:
1075 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1076 		     ++brandinfo)
1077 			if (elf32_insert_brand_entry(*brandinfo) < 0)
1078 				error = EINVAL;
1079 		if (error == 0) {
1080 			SET_FOREACH(lihp, linux_ioctl_handler_set)
1081 				linux_ioctl_register_handler(*lihp);
1082 			SET_FOREACH(ldhp, linux_device_handler_set)
1083 				linux_device_register_handler(*ldhp);
1084 			mtx_init(&emul_lock, "emuldata lock", NULL, MTX_DEF);
1085 			sx_init(&emul_shared_lock, "emuldata->shared lock");
1086 			LIST_INIT(&futex_list);
1087 			sx_init(&futex_sx, "futex protection lock");
1088 			linux_exit_tag = EVENTHANDLER_REGISTER(process_exit, linux_proc_exit,
1089 			      NULL, 1000);
1090 			linux_schedtail_tag = EVENTHANDLER_REGISTER(schedtail, linux_schedtail,
1091 			      NULL, 1000);
1092 			linux_exec_tag = EVENTHANDLER_REGISTER(process_exec, linux_proc_exec,
1093 			      NULL, 1000);
1094 			if (bootverbose)
1095 				printf("Linux ELF exec handler installed\n");
1096 		} else
1097 			printf("cannot insert Linux ELF brand handler\n");
1098 		break;
1099 	case MOD_UNLOAD:
1100 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1101 		     ++brandinfo)
1102 			if (elf32_brand_inuse(*brandinfo))
1103 				error = EBUSY;
1104 		if (error == 0) {
1105 			for (brandinfo = &linux_brandlist[0];
1106 			     *brandinfo != NULL; ++brandinfo)
1107 				if (elf32_remove_brand_entry(*brandinfo) < 0)
1108 					error = EINVAL;
1109 		}
1110 		if (error == 0) {
1111 			SET_FOREACH(lihp, linux_ioctl_handler_set)
1112 				linux_ioctl_unregister_handler(*lihp);
1113 			SET_FOREACH(ldhp, linux_device_handler_set)
1114 				linux_device_unregister_handler(*ldhp);
1115 			mtx_destroy(&emul_lock);
1116 			sx_destroy(&emul_shared_lock);
1117 			sx_destroy(&futex_sx);
1118 			EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag);
1119 			EVENTHANDLER_DEREGISTER(schedtail, linux_schedtail_tag);
1120 			EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag);
1121 			if (bootverbose)
1122 				printf("Linux ELF exec handler removed\n");
1123 		} else
1124 			printf("Could not deinstall ELF interpreter entry\n");
1125 		break;
1126 	default:
1127 		return EOPNOTSUPP;
1128 	}
1129 	return error;
1130 }
1131 
1132 static moduledata_t linux_elf_mod = {
1133 	"linuxelf",
1134 	linux_elf_modevent,
1135 	0
1136 };
1137 
1138 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
1139