xref: /freebsd/sys/amd64/linux32/linux32_sysvec.c (revision 2b743a9e9ddc6736208dc8ca1ce06ce64ad20a19)
1 /*-
2  * Copyright (c) 2004 Tim J. Robbins
3  * Copyright (c) 2003 Peter Wemm
4  * Copyright (c) 2002 Doug Rabson
5  * Copyright (c) 1998-1999 Andrew Gallatin
6  * Copyright (c) 1994-1996 S�ren Schmidt
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer
14  *    in this position and unchanged.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. The name of the author may not be used to endorse or promote products
19  *    derived from this software without specific prior written permission
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 #include <sys/cdefs.h>
34 __FBSDID("$FreeBSD$");
35 #include "opt_compat.h"
36 
37 #ifndef COMPAT_IA32
38 #error "Unable to compile Linux-emulator due to missing COMPAT_IA32 option!"
39 #endif
40 
41 #define	__ELF_WORD_SIZE	32
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/exec.h>
46 #include <sys/imgact.h>
47 #include <sys/imgact_elf.h>
48 #include <sys/kernel.h>
49 #include <sys/lock.h>
50 #include <sys/malloc.h>
51 #include <sys/module.h>
52 #include <sys/mutex.h>
53 #include <sys/proc.h>
54 #include <sys/resourcevar.h>
55 #include <sys/signalvar.h>
56 #include <sys/sysctl.h>
57 #include <sys/syscallsubr.h>
58 #include <sys/sysent.h>
59 #include <sys/sysproto.h>
60 #include <sys/vnode.h>
61 #include <sys/eventhandler.h>
62 
63 #include <vm/vm.h>
64 #include <vm/pmap.h>
65 #include <vm/vm_extern.h>
66 #include <vm/vm_map.h>
67 #include <vm/vm_object.h>
68 #include <vm/vm_page.h>
69 #include <vm/vm_param.h>
70 
71 #include <machine/cpu.h>
72 #include <machine/md_var.h>
73 #include <machine/pcb.h>
74 #include <machine/specialreg.h>
75 
76 #include <amd64/linux32/linux.h>
77 #include <amd64/linux32/linux32_proto.h>
78 #include <compat/linux/linux_emul.h>
79 #include <compat/linux/linux_mib.h>
80 #include <compat/linux/linux_signal.h>
81 #include <compat/linux/linux_util.h>
82 
83 MODULE_VERSION(linux, 1);
84 
85 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
86 
87 #define	AUXARGS_ENTRY_32(pos, id, val)	\
88 	do {				\
89 		suword32(pos++, id);	\
90 		suword32(pos++, val);	\
91 	} while (0)
92 
93 #if BYTE_ORDER == LITTLE_ENDIAN
94 #define SHELLMAGIC      0x2123 /* #! */
95 #else
96 #define SHELLMAGIC      0x2321
97 #endif
98 
99 /*
100  * Allow the sendsig functions to use the ldebug() facility
101  * even though they are not syscalls themselves. Map them
102  * to syscall 0. This is slightly less bogus than using
103  * ldebug(sigreturn).
104  */
105 #define	LINUX_SYS_linux_rt_sendsig	0
106 #define	LINUX_SYS_linux_sendsig		0
107 
108 extern char linux_sigcode[];
109 extern int linux_szsigcode;
110 
111 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
112 
113 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
114 SET_DECLARE(linux_device_handler_set, struct linux_device_handler);
115 
116 static int	elf_linux_fixup(register_t **stack_base,
117 		    struct image_params *iparams);
118 static register_t *linux_copyout_strings(struct image_params *imgp);
119 static void	linux_prepsyscall(struct trapframe *tf, int *args, u_int *code,
120 		    caddr_t *params);
121 static void     linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
122 static void	exec_linux_setregs(struct thread *td, u_long entry,
123 				   u_long stack, u_long ps_strings);
124 static void	linux32_fixlimits(struct proc *p);
125 
126 extern LIST_HEAD(futex_list, futex) futex_list;
127 extern struct sx futex_sx;
128 
129 static eventhandler_tag linux_exit_tag;
130 static eventhandler_tag linux_schedtail_tag;
131 static eventhandler_tag linux_exec_tag;
132 
133 /*
134  * Linux syscalls return negative errno's, we do positive and map them
135  * Reference:
136  *   FreeBSD: src/sys/sys/errno.h
137  *   Linux:   linux-2.6.17.8/include/asm-generic/errno-base.h
138  *            linux-2.6.17.8/include/asm-generic/errno.h
139  */
140 static int bsd_to_linux_errno[ELAST + 1] = {
141 	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
142 	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
143 	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
144 	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
145 	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
146 	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
147 	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
148 	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
149 	  -6,  -6, -43, -42, -75,-125, -84, -95, -16, -74,
150 	 -72, -67, -71
151 };
152 
153 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
154 	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
155 	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
156 	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
157 	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
158 	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
159 	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
160 	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
161 	0, LINUX_SIGUSR1, LINUX_SIGUSR2
162 };
163 
164 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
165 	SIGHUP, SIGINT, SIGQUIT, SIGILL,
166 	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
167 	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
168 	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
169 	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
170 	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
171 	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
172 	SIGIO, SIGURG, SIGSYS
173 };
174 
175 #define LINUX_T_UNKNOWN  255
176 static int _bsd_to_linux_trapcode[] = {
177 	LINUX_T_UNKNOWN,	/* 0 */
178 	6,			/* 1  T_PRIVINFLT */
179 	LINUX_T_UNKNOWN,	/* 2 */
180 	3,			/* 3  T_BPTFLT */
181 	LINUX_T_UNKNOWN,	/* 4 */
182 	LINUX_T_UNKNOWN,	/* 5 */
183 	16,			/* 6  T_ARITHTRAP */
184 	254,			/* 7  T_ASTFLT */
185 	LINUX_T_UNKNOWN,	/* 8 */
186 	13,			/* 9  T_PROTFLT */
187 	1,			/* 10 T_TRCTRAP */
188 	LINUX_T_UNKNOWN,	/* 11 */
189 	14,			/* 12 T_PAGEFLT */
190 	LINUX_T_UNKNOWN,	/* 13 */
191 	17,			/* 14 T_ALIGNFLT */
192 	LINUX_T_UNKNOWN,	/* 15 */
193 	LINUX_T_UNKNOWN,	/* 16 */
194 	LINUX_T_UNKNOWN,	/* 17 */
195 	0,			/* 18 T_DIVIDE */
196 	2,			/* 19 T_NMI */
197 	4,			/* 20 T_OFLOW */
198 	5,			/* 21 T_BOUND */
199 	7,			/* 22 T_DNA */
200 	8,			/* 23 T_DOUBLEFLT */
201 	9,			/* 24 T_FPOPFLT */
202 	10,			/* 25 T_TSSFLT */
203 	11,			/* 26 T_SEGNPFLT */
204 	12,			/* 27 T_STKFLT */
205 	18,			/* 28 T_MCHK */
206 	19,			/* 29 T_XMMFLT */
207 	15			/* 30 T_RESERVED */
208 };
209 #define bsd_to_linux_trapcode(code) \
210     ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
211      _bsd_to_linux_trapcode[(code)]: \
212      LINUX_T_UNKNOWN)
213 
214 struct linux32_ps_strings {
215 	u_int32_t ps_argvstr;	/* first of 0 or more argument strings */
216 	u_int ps_nargvstr;	/* the number of argument strings */
217 	u_int32_t ps_envstr;	/* first of 0 or more environment strings */
218 	u_int ps_nenvstr;	/* the number of environment strings */
219 };
220 
221 /*
222  * If FreeBSD & Linux have a difference of opinion about what a trap
223  * means, deal with it here.
224  *
225  * MPSAFE
226  */
227 static int
228 translate_traps(int signal, int trap_code)
229 {
230 	if (signal != SIGBUS)
231 		return signal;
232 	switch (trap_code) {
233 	case T_PROTFLT:
234 	case T_TSSFLT:
235 	case T_DOUBLEFLT:
236 	case T_PAGEFLT:
237 		return SIGSEGV;
238 	default:
239 		return signal;
240 	}
241 }
242 
243 static int
244 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
245 {
246 	Elf32_Auxargs *args;
247 	Elf32_Addr *base;
248 	Elf32_Addr *pos;
249 
250 	KASSERT(curthread->td_proc == imgp->proc &&
251 	    (curthread->td_proc->p_flag & P_SA) == 0,
252 	    ("unsafe elf_linux_fixup(), should be curproc"));
253 	base = (Elf32_Addr *)*stack_base;
254 	args = (Elf32_Auxargs *)imgp->auxargs;
255 	pos = base + (imgp->args->argc + imgp->args->envc + 2);
256 
257 	if (args->trace)
258 		AUXARGS_ENTRY_32(pos, AT_DEBUG, 1);
259 	if (args->execfd != -1)
260 		AUXARGS_ENTRY_32(pos, AT_EXECFD, args->execfd);
261 	AUXARGS_ENTRY_32(pos, AT_PHDR, args->phdr);
262 	AUXARGS_ENTRY_32(pos, AT_PHENT, args->phent);
263 	AUXARGS_ENTRY_32(pos, AT_PHNUM, args->phnum);
264 	AUXARGS_ENTRY_32(pos, AT_PAGESZ, args->pagesz);
265 	AUXARGS_ENTRY_32(pos, AT_FLAGS, args->flags);
266 	AUXARGS_ENTRY_32(pos, AT_ENTRY, args->entry);
267 	AUXARGS_ENTRY_32(pos, AT_BASE, args->base);
268 	AUXARGS_ENTRY_32(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
269 	AUXARGS_ENTRY_32(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
270 	AUXARGS_ENTRY_32(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
271 	AUXARGS_ENTRY_32(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
272 	AUXARGS_ENTRY_32(pos, AT_NULL, 0);
273 
274 	free(imgp->auxargs, M_TEMP);
275 	imgp->auxargs = NULL;
276 
277 	base--;
278 	suword32(base, (uint32_t)imgp->args->argc);
279 	*stack_base = (register_t *)base;
280 	return 0;
281 }
282 
283 extern int _ucodesel, _ucode32sel, _udatasel;
284 extern unsigned long linux_sznonrtsigcode;
285 
286 static void
287 linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
288 {
289 	struct thread *td = curthread;
290 	struct proc *p = td->td_proc;
291 	struct sigacts *psp;
292 	struct trapframe *regs;
293 	struct l_rt_sigframe *fp, frame;
294 	int oonstack;
295 	int sig;
296 	int code;
297 
298 	sig = ksi->ksi_signo;
299 	code = ksi->ksi_code;
300 	PROC_LOCK_ASSERT(p, MA_OWNED);
301 	psp = p->p_sigacts;
302 	mtx_assert(&psp->ps_mtx, MA_OWNED);
303 	regs = td->td_frame;
304 	oonstack = sigonstack(regs->tf_rsp);
305 
306 #ifdef DEBUG
307 	if (ldebug(rt_sendsig))
308 		printf(ARGS(rt_sendsig, "%p, %d, %p, %u"),
309 		    catcher, sig, (void*)mask, code);
310 #endif
311 	/*
312 	 * Allocate space for the signal handler context.
313 	 */
314 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
315 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
316 		fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
317 		    td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
318 	} else
319 		fp = (struct l_rt_sigframe *)regs->tf_rsp - 1;
320 	mtx_unlock(&psp->ps_mtx);
321 
322 	/*
323 	 * Build the argument list for the signal handler.
324 	 */
325 	if (p->p_sysent->sv_sigtbl)
326 		if (sig <= p->p_sysent->sv_sigsize)
327 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
328 
329 	bzero(&frame, sizeof(frame));
330 
331 	frame.sf_handler = PTROUT(catcher);
332 	frame.sf_sig = sig;
333 	frame.sf_siginfo = PTROUT(&fp->sf_si);
334 	frame.sf_ucontext = PTROUT(&fp->sf_sc);
335 
336 	/* Fill in POSIX parts */
337 	frame.sf_si.lsi_signo = sig;
338 	frame.sf_si.lsi_code = code;
339 	frame.sf_si.lsi_addr = PTROUT(ksi->ksi_addr);
340 
341 	/*
342 	 * Build the signal context to be used by sigreturn.
343 	 */
344 	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
345 	frame.sf_sc.uc_link = 0;		/* XXX ??? */
346 
347 	frame.sf_sc.uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp);
348 	frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
349 	frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
350 	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
351 	PROC_UNLOCK(p);
352 
353 	bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
354 
355 	frame.sf_sc.uc_mcontext.sc_mask   = frame.sf_sc.uc_sigmask.__bits[0];
356         frame.sf_sc.uc_mcontext.sc_gs     = rgs();
357         frame.sf_sc.uc_mcontext.sc_fs     = rfs();
358         __asm __volatile("movl %%es,%0" :
359 	    "=rm" (frame.sf_sc.uc_mcontext.sc_es));
360         __asm __volatile("movl %%ds,%0" :
361 	    "=rm" (frame.sf_sc.uc_mcontext.sc_ds));
362 	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_rdi;
363 	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_rsi;
364 	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_rbp;
365 	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_rbx;
366 	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_rdx;
367 	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_rcx;
368 	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_rax;
369 	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_rip;
370 	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
371 	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_rflags;
372 	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_rsp;
373 	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
374 	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
375 	frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
376 
377 #ifdef DEBUG
378 	if (ldebug(rt_sendsig))
379 		printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
380 		    frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
381 		    td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
382 #endif
383 
384 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
385 		/*
386 		 * Process has trashed its stack; give it an illegal
387 		 * instruction to halt it in its tracks.
388 		 */
389 #ifdef DEBUG
390 		if (ldebug(rt_sendsig))
391 			printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
392 			    fp, oonstack);
393 #endif
394 		PROC_LOCK(p);
395 		sigexit(td, SIGILL);
396 	}
397 
398 	/*
399 	 * Build context to run handler in.
400 	 */
401 	regs->tf_rsp = PTROUT(fp);
402 	regs->tf_rip = LINUX32_PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
403 	    linux_sznonrtsigcode;
404 	regs->tf_rflags &= ~PSL_T;
405 	regs->tf_cs = _ucode32sel;
406 	regs->tf_ss = _udatasel;
407 	load_ds(_udatasel);
408 	td->td_pcb->pcb_ds = _udatasel;
409 	load_es(_udatasel);
410 	td->td_pcb->pcb_es = _udatasel;
411 	PROC_LOCK(p);
412 	mtx_lock(&psp->ps_mtx);
413 }
414 
415 
416 /*
417  * Send an interrupt to process.
418  *
419  * Stack is set up to allow sigcode stored
420  * in u. to call routine, followed by kcall
421  * to sigreturn routine below.  After sigreturn
422  * resets the signal mask, the stack, and the
423  * frame pointer, it returns to the user
424  * specified pc, psl.
425  */
426 static void
427 linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
428 {
429 	struct thread *td = curthread;
430 	struct proc *p = td->td_proc;
431 	struct sigacts *psp;
432 	struct trapframe *regs;
433 	struct l_sigframe *fp, frame;
434 	l_sigset_t lmask;
435 	int oonstack, i;
436 	int sig, code;
437 
438 	sig = ksi->ksi_signo;
439 	code = ksi->ksi_code;
440 	PROC_LOCK_ASSERT(p, MA_OWNED);
441 	psp = p->p_sigacts;
442 	mtx_assert(&psp->ps_mtx, MA_OWNED);
443 	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
444 		/* Signal handler installed with SA_SIGINFO. */
445 		linux_rt_sendsig(catcher, ksi, mask);
446 		return;
447 	}
448 
449 	regs = td->td_frame;
450 	oonstack = sigonstack(regs->tf_rsp);
451 
452 #ifdef DEBUG
453 	if (ldebug(sendsig))
454 		printf(ARGS(sendsig, "%p, %d, %p, %u"),
455 		    catcher, sig, (void*)mask, code);
456 #endif
457 
458 	/*
459 	 * Allocate space for the signal handler context.
460 	 */
461 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
462 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
463 		fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
464 		    td->td_sigstk.ss_size - sizeof(struct l_sigframe));
465 	} else
466 		fp = (struct l_sigframe *)regs->tf_rsp - 1;
467 	mtx_unlock(&psp->ps_mtx);
468 	PROC_UNLOCK(p);
469 
470 	/*
471 	 * Build the argument list for the signal handler.
472 	 */
473 	if (p->p_sysent->sv_sigtbl)
474 		if (sig <= p->p_sysent->sv_sigsize)
475 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
476 
477 	bzero(&frame, sizeof(frame));
478 
479 	frame.sf_handler = PTROUT(catcher);
480 	frame.sf_sig = sig;
481 
482 	bsd_to_linux_sigset(mask, &lmask);
483 
484 	/*
485 	 * Build the signal context to be used by sigreturn.
486 	 */
487 	frame.sf_sc.sc_mask   = lmask.__bits[0];
488         frame.sf_sc.sc_gs     = rgs();
489         frame.sf_sc.sc_fs     = rfs();
490         __asm __volatile("movl %%es,%0" : "=rm" (frame.sf_sc.sc_es));
491         __asm __volatile("movl %%ds,%0" : "=rm" (frame.sf_sc.sc_ds));
492 	frame.sf_sc.sc_edi    = regs->tf_rdi;
493 	frame.sf_sc.sc_esi    = regs->tf_rsi;
494 	frame.sf_sc.sc_ebp    = regs->tf_rbp;
495 	frame.sf_sc.sc_ebx    = regs->tf_rbx;
496 	frame.sf_sc.sc_edx    = regs->tf_rdx;
497 	frame.sf_sc.sc_ecx    = regs->tf_rcx;
498 	frame.sf_sc.sc_eax    = regs->tf_rax;
499 	frame.sf_sc.sc_eip    = regs->tf_rip;
500 	frame.sf_sc.sc_cs     = regs->tf_cs;
501 	frame.sf_sc.sc_eflags = regs->tf_rflags;
502 	frame.sf_sc.sc_esp_at_signal = regs->tf_rsp;
503 	frame.sf_sc.sc_ss     = regs->tf_ss;
504 	frame.sf_sc.sc_err    = regs->tf_err;
505 	frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code);
506 
507 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
508 		frame.sf_extramask[i] = lmask.__bits[i+1];
509 
510 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
511 		/*
512 		 * Process has trashed its stack; give it an illegal
513 		 * instruction to halt it in its tracks.
514 		 */
515 		PROC_LOCK(p);
516 		sigexit(td, SIGILL);
517 	}
518 
519 	/*
520 	 * Build context to run handler in.
521 	 */
522 	regs->tf_rsp = PTROUT(fp);
523 	regs->tf_rip = LINUX32_PS_STRINGS - *(p->p_sysent->sv_szsigcode);
524 	regs->tf_rflags &= ~PSL_T;
525 	regs->tf_cs = _ucode32sel;
526 	regs->tf_ss = _udatasel;
527 	load_ds(_udatasel);
528 	td->td_pcb->pcb_ds = _udatasel;
529 	load_es(_udatasel);
530 	td->td_pcb->pcb_es = _udatasel;
531 	PROC_LOCK(p);
532 	mtx_lock(&psp->ps_mtx);
533 }
534 
535 /*
536  * System call to cleanup state after a signal
537  * has been taken.  Reset signal mask and
538  * stack state from context left by sendsig (above).
539  * Return to previous pc and psl as specified by
540  * context left by sendsig. Check carefully to
541  * make sure that the user has not modified the
542  * psl to gain improper privileges or to cause
543  * a machine fault.
544  */
545 int
546 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
547 {
548 	struct proc *p = td->td_proc;
549 	struct l_sigframe frame;
550 	struct trapframe *regs;
551 	l_sigset_t lmask;
552 	int eflags, i;
553 	ksiginfo_t ksi;
554 
555 	regs = td->td_frame;
556 
557 #ifdef DEBUG
558 	if (ldebug(sigreturn))
559 		printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
560 #endif
561 	/*
562 	 * The trampoline code hands us the sigframe.
563 	 * It is unsafe to keep track of it ourselves, in the event that a
564 	 * program jumps out of a signal handler.
565 	 */
566 	if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
567 		return (EFAULT);
568 
569 	/*
570 	 * Check for security violations.
571 	 */
572 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
573 	eflags = frame.sf_sc.sc_eflags;
574 	/*
575 	 * XXX do allow users to change the privileged flag PSL_RF.  The
576 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
577 	 * sometimes set it there too.  tf_eflags is kept in the signal
578 	 * context during signal handling and there is no other place
579 	 * to remember it, so the PSL_RF bit may be corrupted by the
580 	 * signal handler without us knowing.  Corruption of the PSL_RF
581 	 * bit at worst causes one more or one less debugger trap, so
582 	 * allowing it is fairly harmless.
583 	 */
584 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF))
585 		return(EINVAL);
586 
587 	/*
588 	 * Don't allow users to load a valid privileged %cs.  Let the
589 	 * hardware check for invalid selectors, excess privilege in
590 	 * other selectors, invalid %eip's and invalid %esp's.
591 	 */
592 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
593 	if (!CS_SECURE(frame.sf_sc.sc_cs)) {
594 		ksiginfo_init_trap(&ksi);
595 		ksi.ksi_signo = SIGBUS;
596 		ksi.ksi_code = BUS_OBJERR;
597 		ksi.ksi_trapno = T_PROTFLT;
598 		ksi.ksi_addr = (void *)regs->tf_rip;
599 		trapsignal(td, &ksi);
600 		return(EINVAL);
601 	}
602 
603 	lmask.__bits[0] = frame.sf_sc.sc_mask;
604 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
605 		lmask.__bits[i+1] = frame.sf_extramask[i];
606 	PROC_LOCK(p);
607 	linux_to_bsd_sigset(&lmask, &td->td_sigmask);
608 	SIG_CANTMASK(td->td_sigmask);
609 	signotify(td);
610 	PROC_UNLOCK(p);
611 
612 	/*
613 	 * Restore signal context.
614 	 */
615 	/* Selectors were restored by the trampoline. */
616 	regs->tf_rdi    = frame.sf_sc.sc_edi;
617 	regs->tf_rsi    = frame.sf_sc.sc_esi;
618 	regs->tf_rbp    = frame.sf_sc.sc_ebp;
619 	regs->tf_rbx    = frame.sf_sc.sc_ebx;
620 	regs->tf_rdx    = frame.sf_sc.sc_edx;
621 	regs->tf_rcx    = frame.sf_sc.sc_ecx;
622 	regs->tf_rax    = frame.sf_sc.sc_eax;
623 	regs->tf_rip    = frame.sf_sc.sc_eip;
624 	regs->tf_cs     = frame.sf_sc.sc_cs;
625 	regs->tf_rflags = eflags;
626 	regs->tf_rsp    = frame.sf_sc.sc_esp_at_signal;
627 	regs->tf_ss     = frame.sf_sc.sc_ss;
628 
629 	return (EJUSTRETURN);
630 }
631 
632 /*
633  * System call to cleanup state after a signal
634  * has been taken.  Reset signal mask and
635  * stack state from context left by rt_sendsig (above).
636  * Return to previous pc and psl as specified by
637  * context left by sendsig. Check carefully to
638  * make sure that the user has not modified the
639  * psl to gain improper privileges or to cause
640  * a machine fault.
641  */
642 int
643 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
644 {
645 	struct proc *p = td->td_proc;
646 	struct l_ucontext uc;
647 	struct l_sigcontext *context;
648 	l_stack_t *lss;
649 	stack_t ss;
650 	struct trapframe *regs;
651 	int eflags;
652 	ksiginfo_t ksi;
653 
654 	regs = td->td_frame;
655 
656 #ifdef DEBUG
657 	if (ldebug(rt_sigreturn))
658 		printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
659 #endif
660 	/*
661 	 * The trampoline code hands us the ucontext.
662 	 * It is unsafe to keep track of it ourselves, in the event that a
663 	 * program jumps out of a signal handler.
664 	 */
665 	if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
666 		return (EFAULT);
667 
668 	context = &uc.uc_mcontext;
669 
670 	/*
671 	 * Check for security violations.
672 	 */
673 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
674 	eflags = context->sc_eflags;
675 	/*
676 	 * XXX do allow users to change the privileged flag PSL_RF.  The
677 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
678 	 * sometimes set it there too.  tf_eflags is kept in the signal
679 	 * context during signal handling and there is no other place
680 	 * to remember it, so the PSL_RF bit may be corrupted by the
681 	 * signal handler without us knowing.  Corruption of the PSL_RF
682 	 * bit at worst causes one more or one less debugger trap, so
683 	 * allowing it is fairly harmless.
684 	 */
685 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF))
686 		return(EINVAL);
687 
688 	/*
689 	 * Don't allow users to load a valid privileged %cs.  Let the
690 	 * hardware check for invalid selectors, excess privilege in
691 	 * other selectors, invalid %eip's and invalid %esp's.
692 	 */
693 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
694 	if (!CS_SECURE(context->sc_cs)) {
695 		ksiginfo_init_trap(&ksi);
696 		ksi.ksi_signo = SIGBUS;
697 		ksi.ksi_code = BUS_OBJERR;
698 		ksi.ksi_trapno = T_PROTFLT;
699 		ksi.ksi_addr = (void *)regs->tf_rip;
700 		trapsignal(td, &ksi);
701 		return(EINVAL);
702 	}
703 
704 	PROC_LOCK(p);
705 	linux_to_bsd_sigset(&uc.uc_sigmask, &td->td_sigmask);
706 	SIG_CANTMASK(td->td_sigmask);
707 	signotify(td);
708 	PROC_UNLOCK(p);
709 
710 	/*
711 	 * Restore signal context
712 	 */
713 	/* Selectors were restored by the trampoline. */
714 	regs->tf_rdi    = context->sc_edi;
715 	regs->tf_rsi    = context->sc_esi;
716 	regs->tf_rbp    = context->sc_ebp;
717 	regs->tf_rbx    = context->sc_ebx;
718 	regs->tf_rdx    = context->sc_edx;
719 	regs->tf_rcx    = context->sc_ecx;
720 	regs->tf_rax    = context->sc_eax;
721 	regs->tf_rip    = context->sc_eip;
722 	regs->tf_cs     = context->sc_cs;
723 	regs->tf_rflags = eflags;
724 	regs->tf_rsp    = context->sc_esp_at_signal;
725 	regs->tf_ss     = context->sc_ss;
726 
727 	/*
728 	 * call sigaltstack & ignore results..
729 	 */
730 	lss = &uc.uc_stack;
731 	ss.ss_sp = PTRIN(lss->ss_sp);
732 	ss.ss_size = lss->ss_size;
733 	ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
734 
735 #ifdef DEBUG
736 	if (ldebug(rt_sigreturn))
737 		printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
738 		    ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
739 #endif
740 	(void)kern_sigaltstack(td, &ss, NULL);
741 
742 	return (EJUSTRETURN);
743 }
744 
745 /*
746  * MPSAFE
747  */
748 static void
749 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
750 {
751 	args[0] = tf->tf_rbx;
752 	args[1] = tf->tf_rcx;
753 	args[2] = tf->tf_rdx;
754 	args[3] = tf->tf_rsi;
755 	args[4] = tf->tf_rdi;
756 	args[5] = tf->tf_rbp;	/* Unconfirmed */
757 	*params = NULL;		/* no copyin */
758 }
759 
760 /*
761  * If a linux binary is exec'ing something, try this image activator
762  * first.  We override standard shell script execution in order to
763  * be able to modify the interpreter path.  We only do this if a linux
764  * binary is doing the exec, so we do not create an EXEC module for it.
765  */
766 static int	exec_linux_imgact_try(struct image_params *iparams);
767 
768 static int
769 exec_linux_imgact_try(struct image_params *imgp)
770 {
771     const char *head = (const char *)imgp->image_header;
772     char *rpath;
773     int error = -1, len;
774 
775     /*
776      * The interpreter for shell scripts run from a linux binary needs
777      * to be located in /compat/linux if possible in order to recursively
778      * maintain linux path emulation.
779      */
780     if (((const short *)head)[0] == SHELLMAGIC) {
781 	    /*
782 	     * Run our normal shell image activator.  If it succeeds attempt
783 	     * to use the alternate path for the interpreter.  If an alternate
784 	     * path is found, use our stringspace to store it.
785 	     */
786 	    if ((error = exec_shell_imgact(imgp)) == 0) {
787 		    linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
788 			imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0);
789 		    if (rpath != NULL) {
790 			    len = strlen(rpath) + 1;
791 
792 			    if (len <= MAXSHELLCMDLEN) {
793 				    memcpy(imgp->interpreter_name, rpath, len);
794 			    }
795 			    free(rpath, M_TEMP);
796 		    }
797 	    }
798     }
799     return(error);
800 }
801 
802 /*
803  * Clear registers on exec
804  * XXX copied from ia32_signal.c.
805  */
806 static void
807 exec_linux_setregs(td, entry, stack, ps_strings)
808 	struct thread *td;
809 	u_long entry;
810 	u_long stack;
811 	u_long ps_strings;
812 {
813 	struct trapframe *regs = td->td_frame;
814 	struct pcb *pcb = td->td_pcb;
815 
816 	wrmsr(MSR_FSBASE, 0);
817 	wrmsr(MSR_KGSBASE, 0);	/* User value while we're in the kernel */
818 	pcb->pcb_fsbase = 0;
819 	pcb->pcb_gsbase = 0;
820 	load_ds(_udatasel);
821 	load_es(_udatasel);
822 	load_fs(_udatasel);
823 	load_gs(0);
824 	pcb->pcb_ds = _udatasel;
825 	pcb->pcb_es = _udatasel;
826 	pcb->pcb_fs = _udatasel;
827 	pcb->pcb_gs = 0;
828 
829 	bzero((char *)regs, sizeof(struct trapframe));
830 	regs->tf_rip = entry;
831 	regs->tf_rsp = stack;
832 	regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T);
833 	regs->tf_ss = _udatasel;
834 	regs->tf_cs = _ucode32sel;
835 	regs->tf_rbx = ps_strings;
836 	load_cr0(rcr0() | CR0_MP | CR0_TS);
837 	fpstate_drop(td);
838 
839 	/* Return via doreti so that we can change to a different %cs */
840 	pcb->pcb_flags |= PCB_FULLCTX;
841 	td->td_retval[1] = 0;
842 }
843 
844 /*
845  * XXX copied from ia32_sysvec.c.
846  */
847 static register_t *
848 linux_copyout_strings(struct image_params *imgp)
849 {
850 	int argc, envc;
851 	u_int32_t *vectp;
852 	char *stringp, *destp;
853 	u_int32_t *stack_base;
854 	struct linux32_ps_strings *arginfo;
855 	int sigcodesz;
856 
857 	/*
858 	 * Calculate string base and vector table pointers.
859 	 * Also deal with signal trampoline code for this exec type.
860 	 */
861 	arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS;
862 	sigcodesz = *(imgp->proc->p_sysent->sv_szsigcode);
863 	destp =	(caddr_t)arginfo - sigcodesz - SPARE_USRSPACE -
864 		roundup((ARG_MAX - imgp->args->stringspace), sizeof(char *));
865 
866 	/*
867 	 * install sigcode
868 	 */
869 	if (sigcodesz)
870 		copyout(imgp->proc->p_sysent->sv_sigcode,
871 			((caddr_t)arginfo - sigcodesz), sigcodesz);
872 
873 	/*
874 	 * If we have a valid auxargs ptr, prepare some room
875 	 * on the stack.
876 	 */
877 	if (imgp->auxargs) {
878 		/*
879 		 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
880 		 * lower compatibility.
881 		 */
882 		imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size
883 			: (AT_COUNT * 2);
884 		/*
885 		 * The '+ 2' is for the null pointers at the end of each of
886 		 * the arg and env vector sets,and imgp->auxarg_size is room
887 		 * for argument of Runtime loader.
888 		 */
889 		vectp = (u_int32_t *) (destp - (imgp->args->argc + imgp->args->envc + 2 +
890 				       imgp->auxarg_size) * sizeof(u_int32_t));
891 
892 	} else
893 		/*
894 		 * The '+ 2' is for the null pointers at the end of each of
895 		 * the arg and env vector sets
896 		 */
897 		vectp = (u_int32_t *)
898 			(destp - (imgp->args->argc + imgp->args->envc + 2) * sizeof(u_int32_t));
899 
900 	/*
901 	 * vectp also becomes our initial stack base
902 	 */
903 	stack_base = vectp;
904 
905 	stringp = imgp->args->begin_argv;
906 	argc = imgp->args->argc;
907 	envc = imgp->args->envc;
908 	/*
909 	 * Copy out strings - arguments and environment.
910 	 */
911 	copyout(stringp, destp, ARG_MAX - imgp->args->stringspace);
912 
913 	/*
914 	 * Fill in "ps_strings" struct for ps, w, etc.
915 	 */
916 	suword32(&arginfo->ps_argvstr, (u_int32_t)(intptr_t)vectp);
917 	suword32(&arginfo->ps_nargvstr, argc);
918 
919 	/*
920 	 * Fill in argument portion of vector table.
921 	 */
922 	for (; argc > 0; --argc) {
923 		suword32(vectp++, (u_int32_t)(intptr_t)destp);
924 		while (*stringp++ != 0)
925 			destp++;
926 		destp++;
927 	}
928 
929 	/* a null vector table pointer separates the argp's from the envp's */
930 	suword32(vectp++, 0);
931 
932 	suword32(&arginfo->ps_envstr, (u_int32_t)(intptr_t)vectp);
933 	suword32(&arginfo->ps_nenvstr, envc);
934 
935 	/*
936 	 * Fill in environment portion of vector table.
937 	 */
938 	for (; envc > 0; --envc) {
939 		suword32(vectp++, (u_int32_t)(intptr_t)destp);
940 		while (*stringp++ != 0)
941 			destp++;
942 		destp++;
943 	}
944 
945 	/* end of vector table is a null pointer */
946 	suword32(vectp, 0);
947 
948 	return ((register_t *)stack_base);
949 }
950 
951 SYSCTL_NODE(_compat, OID_AUTO, linux32, CTLFLAG_RW, 0,
952     "32-bit Linux emulation");
953 
954 static u_long	linux32_maxdsiz = LINUX32_MAXDSIZ;
955 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxdsiz, CTLFLAG_RW,
956     &linux32_maxdsiz, 0, "");
957 static u_long	linux32_maxssiz = LINUX32_MAXSSIZ;
958 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxssiz, CTLFLAG_RW,
959     &linux32_maxssiz, 0, "");
960 static u_long	linux32_maxvmem = LINUX32_MAXVMEM;
961 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxvmem, CTLFLAG_RW,
962     &linux32_maxvmem, 0, "");
963 
964 /*
965  * XXX copied from ia32_sysvec.c.
966  */
967 static void
968 linux32_fixlimits(struct proc *p)
969 {
970 	struct plimit *oldlim, *newlim;
971 
972 	if (linux32_maxdsiz == 0 && linux32_maxssiz == 0 &&
973 	    linux32_maxvmem == 0)
974 		return;
975 	newlim = lim_alloc();
976 	PROC_LOCK(p);
977 	oldlim = p->p_limit;
978 	lim_copy(newlim, oldlim);
979 	if (linux32_maxdsiz != 0) {
980 		if (newlim->pl_rlimit[RLIMIT_DATA].rlim_cur > linux32_maxdsiz)
981 		    newlim->pl_rlimit[RLIMIT_DATA].rlim_cur = linux32_maxdsiz;
982 		if (newlim->pl_rlimit[RLIMIT_DATA].rlim_max > linux32_maxdsiz)
983 		    newlim->pl_rlimit[RLIMIT_DATA].rlim_max = linux32_maxdsiz;
984 	}
985 	if (linux32_maxssiz != 0) {
986 		if (newlim->pl_rlimit[RLIMIT_STACK].rlim_cur > linux32_maxssiz)
987 		    newlim->pl_rlimit[RLIMIT_STACK].rlim_cur = linux32_maxssiz;
988 		if (newlim->pl_rlimit[RLIMIT_STACK].rlim_max > linux32_maxssiz)
989 		    newlim->pl_rlimit[RLIMIT_STACK].rlim_max = linux32_maxssiz;
990 	}
991 	if (linux32_maxvmem != 0) {
992 		if (newlim->pl_rlimit[RLIMIT_VMEM].rlim_cur > linux32_maxvmem)
993 		    newlim->pl_rlimit[RLIMIT_VMEM].rlim_cur = linux32_maxvmem;
994 		if (newlim->pl_rlimit[RLIMIT_VMEM].rlim_max > linux32_maxvmem)
995 		    newlim->pl_rlimit[RLIMIT_VMEM].rlim_max = linux32_maxvmem;
996 	}
997 	p->p_limit = newlim;
998 	PROC_UNLOCK(p);
999 	lim_free(oldlim);
1000 }
1001 
1002 struct sysentvec elf_linux_sysvec = {
1003 	LINUX_SYS_MAXSYSCALL,
1004 	linux_sysent,
1005 	0,
1006 	LINUX_SIGTBLSZ,
1007 	bsd_to_linux_signal,
1008 	ELAST + 1,
1009 	bsd_to_linux_errno,
1010 	translate_traps,
1011 	elf_linux_fixup,
1012 	linux_sendsig,
1013 	linux_sigcode,
1014 	&linux_szsigcode,
1015 	linux_prepsyscall,
1016 	"Linux ELF32",
1017 	elf32_coredump,
1018 	exec_linux_imgact_try,
1019 	LINUX_MINSIGSTKSZ,
1020 	PAGE_SIZE,
1021 	VM_MIN_ADDRESS,
1022 	LINUX32_USRSTACK,
1023 	LINUX32_USRSTACK,
1024 	LINUX32_PS_STRINGS,
1025 	VM_PROT_ALL,
1026 	linux_copyout_strings,
1027 	exec_linux_setregs,
1028 	linux32_fixlimits
1029 };
1030 
1031 static Elf32_Brandinfo linux_brand = {
1032 					ELFOSABI_LINUX,
1033 					EM_386,
1034 					"Linux",
1035 					"/compat/linux",
1036 					"/lib/ld-linux.so.1",
1037 					&elf_linux_sysvec,
1038 					NULL,
1039 					BI_CAN_EXEC_DYN,
1040 				 };
1041 
1042 static Elf32_Brandinfo linux_glibc2brand = {
1043 					ELFOSABI_LINUX,
1044 					EM_386,
1045 					"Linux",
1046 					"/compat/linux",
1047 					"/lib/ld-linux.so.2",
1048 					&elf_linux_sysvec,
1049 					NULL,
1050 					BI_CAN_EXEC_DYN,
1051 				 };
1052 
1053 Elf32_Brandinfo *linux_brandlist[] = {
1054 					&linux_brand,
1055 					&linux_glibc2brand,
1056 					NULL
1057 				};
1058 
1059 static int
1060 linux_elf_modevent(module_t mod, int type, void *data)
1061 {
1062 	Elf32_Brandinfo **brandinfo;
1063 	int error;
1064 	struct linux_ioctl_handler **lihp;
1065 	struct linux_device_handler **ldhp;
1066 
1067 	error = 0;
1068 
1069 	switch(type) {
1070 	case MOD_LOAD:
1071 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1072 		     ++brandinfo)
1073 			if (elf32_insert_brand_entry(*brandinfo) < 0)
1074 				error = EINVAL;
1075 		if (error == 0) {
1076 			SET_FOREACH(lihp, linux_ioctl_handler_set)
1077 				linux_ioctl_register_handler(*lihp);
1078 			SET_FOREACH(ldhp, linux_device_handler_set)
1079 				linux_device_register_handler(*ldhp);
1080 			sx_init(&emul_lock, "emuldata lock");
1081 			sx_init(&emul_shared_lock, "emuldata->shared lock");
1082 			LIST_INIT(&futex_list);
1083 			sx_init(&futex_sx, "futex protection lock");
1084 			linux_exit_tag = EVENTHANDLER_REGISTER(process_exit, linux_proc_exit,
1085 			      NULL, 1000);
1086 			linux_schedtail_tag = EVENTHANDLER_REGISTER(schedtail, linux_schedtail,
1087 			      NULL, 1000);
1088 			linux_exec_tag = EVENTHANDLER_REGISTER(process_exec, linux_proc_exec,
1089 			      NULL, 1000);
1090 			if (bootverbose)
1091 				printf("Linux ELF exec handler installed\n");
1092 		} else
1093 			printf("cannot insert Linux ELF brand handler\n");
1094 		break;
1095 	case MOD_UNLOAD:
1096 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1097 		     ++brandinfo)
1098 			if (elf32_brand_inuse(*brandinfo))
1099 				error = EBUSY;
1100 		if (error == 0) {
1101 			for (brandinfo = &linux_brandlist[0];
1102 			     *brandinfo != NULL; ++brandinfo)
1103 				if (elf32_remove_brand_entry(*brandinfo) < 0)
1104 					error = EINVAL;
1105 		}
1106 		if (error == 0) {
1107 			SET_FOREACH(lihp, linux_ioctl_handler_set)
1108 				linux_ioctl_unregister_handler(*lihp);
1109 			SET_FOREACH(ldhp, linux_device_handler_set)
1110 				linux_device_unregister_handler(*ldhp);
1111 			sx_destroy(&emul_lock);
1112 			sx_destroy(&emul_shared_lock);
1113 			sx_destroy(&futex_sx);
1114 			EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag);
1115 			EVENTHANDLER_DEREGISTER(schedtail, linux_schedtail_tag);
1116 			EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag);
1117 			if (bootverbose)
1118 				printf("Linux ELF exec handler removed\n");
1119 		} else
1120 			printf("Could not deinstall ELF interpreter entry\n");
1121 		break;
1122 	default:
1123 		return EOPNOTSUPP;
1124 	}
1125 	return error;
1126 }
1127 
1128 static moduledata_t linux_elf_mod = {
1129 	"linuxelf",
1130 	linux_elf_modevent,
1131 	0
1132 };
1133 
1134 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
1135