xref: /freebsd/sys/amd64/linux32/linux32_sysvec.c (revision dbdcb99498a0007fafce9b8f1b85de424b5d70d7)
1 /*-
2  * Copyright (c) 2004 Tim J. Robbins
3  * Copyright (c) 2003 Peter Wemm
4  * Copyright (c) 2002 Doug Rabson
5  * Copyright (c) 1998-1999 Andrew Gallatin
6  * Copyright (c) 1994-1996 S�ren Schmidt
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer
14  *    in this position and unchanged.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. The name of the author may not be used to endorse or promote products
19  *    derived from this software without specific prior written permission
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 #include <sys/cdefs.h>
34 __FBSDID("$FreeBSD$");
35 #include "opt_compat.h"
36 
37 #ifndef COMPAT_IA32
38 #error "Unable to compile Linux-emulator due to missing COMPAT_IA32 option!"
39 #endif
40 
41 #define	__ELF_WORD_SIZE	32
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/exec.h>
46 #include <sys/fcntl.h>
47 #include <sys/imgact.h>
48 #include <sys/imgact_elf.h>
49 #include <sys/kernel.h>
50 #include <sys/lock.h>
51 #include <sys/malloc.h>
52 #include <sys/module.h>
53 #include <sys/mutex.h>
54 #include <sys/proc.h>
55 #include <sys/resourcevar.h>
56 #include <sys/signalvar.h>
57 #include <sys/sysctl.h>
58 #include <sys/syscallsubr.h>
59 #include <sys/sysent.h>
60 #include <sys/sysproto.h>
61 #include <sys/vnode.h>
62 #include <sys/eventhandler.h>
63 
64 #include <vm/vm.h>
65 #include <vm/pmap.h>
66 #include <vm/vm_extern.h>
67 #include <vm/vm_map.h>
68 #include <vm/vm_object.h>
69 #include <vm/vm_page.h>
70 #include <vm/vm_param.h>
71 
72 #include <machine/cpu.h>
73 #include <machine/md_var.h>
74 #include <machine/pcb.h>
75 #include <machine/specialreg.h>
76 
77 #include <amd64/linux32/linux.h>
78 #include <amd64/linux32/linux32_proto.h>
79 #include <compat/linux/linux_emul.h>
80 #include <compat/linux/linux_mib.h>
81 #include <compat/linux/linux_signal.h>
82 #include <compat/linux/linux_util.h>
83 
84 MODULE_VERSION(linux, 1);
85 
86 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
87 
88 #define	AUXARGS_ENTRY_32(pos, id, val)	\
89 	do {				\
90 		suword32(pos++, id);	\
91 		suword32(pos++, val);	\
92 	} while (0)
93 
94 #if BYTE_ORDER == LITTLE_ENDIAN
95 #define SHELLMAGIC      0x2123 /* #! */
96 #else
97 #define SHELLMAGIC      0x2321
98 #endif
99 
100 /*
101  * Allow the sendsig functions to use the ldebug() facility
102  * even though they are not syscalls themselves. Map them
103  * to syscall 0. This is slightly less bogus than using
104  * ldebug(sigreturn).
105  */
106 #define	LINUX_SYS_linux_rt_sendsig	0
107 #define	LINUX_SYS_linux_sendsig		0
108 
109 extern char linux_sigcode[];
110 extern int linux_szsigcode;
111 
112 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
113 
114 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
115 SET_DECLARE(linux_device_handler_set, struct linux_device_handler);
116 
117 static int	elf_linux_fixup(register_t **stack_base,
118 		    struct image_params *iparams);
119 static register_t *linux_copyout_strings(struct image_params *imgp);
120 static void	linux_prepsyscall(struct trapframe *tf, int *args, u_int *code,
121 		    caddr_t *params);
122 static void     linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
123 static void	exec_linux_setregs(struct thread *td, u_long entry,
124 				   u_long stack, u_long ps_strings);
125 static void	linux32_fixlimit(struct rlimit *rl, int which);
126 
127 extern LIST_HEAD(futex_list, futex) futex_list;
128 extern struct sx futex_sx;
129 
130 static eventhandler_tag linux_exit_tag;
131 static eventhandler_tag linux_schedtail_tag;
132 static eventhandler_tag linux_exec_tag;
133 
134 /*
135  * Linux syscalls return negative errno's, we do positive and map them
136  * Reference:
137  *   FreeBSD: src/sys/sys/errno.h
138  *   Linux:   linux-2.6.17.8/include/asm-generic/errno-base.h
139  *            linux-2.6.17.8/include/asm-generic/errno.h
140  */
141 static int bsd_to_linux_errno[ELAST + 1] = {
142 	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
143 	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
144 	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
145 	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
146 	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
147 	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
148 	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
149 	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
150 	  -6,  -6, -43, -42, -75,-125, -84, -95, -16, -74,
151 	 -72, -67, -71
152 };
153 
154 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
155 	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
156 	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
157 	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
158 	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
159 	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
160 	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
161 	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
162 	0, LINUX_SIGUSR1, LINUX_SIGUSR2
163 };
164 
165 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
166 	SIGHUP, SIGINT, SIGQUIT, SIGILL,
167 	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
168 	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
169 	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
170 	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
171 	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
172 	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
173 	SIGIO, SIGURG, SIGSYS
174 };
175 
176 #define LINUX_T_UNKNOWN  255
177 static int _bsd_to_linux_trapcode[] = {
178 	LINUX_T_UNKNOWN,	/* 0 */
179 	6,			/* 1  T_PRIVINFLT */
180 	LINUX_T_UNKNOWN,	/* 2 */
181 	3,			/* 3  T_BPTFLT */
182 	LINUX_T_UNKNOWN,	/* 4 */
183 	LINUX_T_UNKNOWN,	/* 5 */
184 	16,			/* 6  T_ARITHTRAP */
185 	254,			/* 7  T_ASTFLT */
186 	LINUX_T_UNKNOWN,	/* 8 */
187 	13,			/* 9  T_PROTFLT */
188 	1,			/* 10 T_TRCTRAP */
189 	LINUX_T_UNKNOWN,	/* 11 */
190 	14,			/* 12 T_PAGEFLT */
191 	LINUX_T_UNKNOWN,	/* 13 */
192 	17,			/* 14 T_ALIGNFLT */
193 	LINUX_T_UNKNOWN,	/* 15 */
194 	LINUX_T_UNKNOWN,	/* 16 */
195 	LINUX_T_UNKNOWN,	/* 17 */
196 	0,			/* 18 T_DIVIDE */
197 	2,			/* 19 T_NMI */
198 	4,			/* 20 T_OFLOW */
199 	5,			/* 21 T_BOUND */
200 	7,			/* 22 T_DNA */
201 	8,			/* 23 T_DOUBLEFLT */
202 	9,			/* 24 T_FPOPFLT */
203 	10,			/* 25 T_TSSFLT */
204 	11,			/* 26 T_SEGNPFLT */
205 	12,			/* 27 T_STKFLT */
206 	18,			/* 28 T_MCHK */
207 	19,			/* 29 T_XMMFLT */
208 	15			/* 30 T_RESERVED */
209 };
210 #define bsd_to_linux_trapcode(code) \
211     ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
212      _bsd_to_linux_trapcode[(code)]: \
213      LINUX_T_UNKNOWN)
214 
215 struct linux32_ps_strings {
216 	u_int32_t ps_argvstr;	/* first of 0 or more argument strings */
217 	u_int ps_nargvstr;	/* the number of argument strings */
218 	u_int32_t ps_envstr;	/* first of 0 or more environment strings */
219 	u_int ps_nenvstr;	/* the number of environment strings */
220 };
221 
222 /*
223  * If FreeBSD & Linux have a difference of opinion about what a trap
224  * means, deal with it here.
225  *
226  * MPSAFE
227  */
228 static int
229 translate_traps(int signal, int trap_code)
230 {
231 	if (signal != SIGBUS)
232 		return signal;
233 	switch (trap_code) {
234 	case T_PROTFLT:
235 	case T_TSSFLT:
236 	case T_DOUBLEFLT:
237 	case T_PAGEFLT:
238 		return SIGSEGV;
239 	default:
240 		return signal;
241 	}
242 }
243 
244 static int
245 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
246 {
247 	Elf32_Auxargs *args;
248 	Elf32_Addr *base;
249 	Elf32_Addr *pos;
250 
251 	KASSERT(curthread->td_proc == imgp->proc,
252 	    ("unsafe elf_linux_fixup(), should be curproc"));
253 	base = (Elf32_Addr *)*stack_base;
254 	args = (Elf32_Auxargs *)imgp->auxargs;
255 	pos = base + (imgp->args->argc + imgp->args->envc + 2);
256 
257 	if (args->execfd != -1)
258 		AUXARGS_ENTRY_32(pos, AT_EXECFD, args->execfd);
259 	AUXARGS_ENTRY_32(pos, AT_PHDR, args->phdr);
260 	AUXARGS_ENTRY_32(pos, AT_PHENT, args->phent);
261 	AUXARGS_ENTRY_32(pos, AT_PHNUM, args->phnum);
262 	AUXARGS_ENTRY_32(pos, AT_PAGESZ, args->pagesz);
263 	AUXARGS_ENTRY_32(pos, AT_FLAGS, args->flags);
264 	AUXARGS_ENTRY_32(pos, AT_ENTRY, args->entry);
265 	AUXARGS_ENTRY_32(pos, AT_BASE, args->base);
266 	AUXARGS_ENTRY_32(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
267 	AUXARGS_ENTRY_32(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
268 	AUXARGS_ENTRY_32(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
269 	AUXARGS_ENTRY_32(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
270 	AUXARGS_ENTRY_32(pos, AT_NULL, 0);
271 
272 	free(imgp->auxargs, M_TEMP);
273 	imgp->auxargs = NULL;
274 
275 	base--;
276 	suword32(base, (uint32_t)imgp->args->argc);
277 	*stack_base = (register_t *)base;
278 	return 0;
279 }
280 
281 extern int _ucodesel, _ucode32sel, _udatasel;
282 extern unsigned long linux_sznonrtsigcode;
283 
284 static void
285 linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
286 {
287 	struct thread *td = curthread;
288 	struct proc *p = td->td_proc;
289 	struct sigacts *psp;
290 	struct trapframe *regs;
291 	struct l_rt_sigframe *fp, frame;
292 	int oonstack;
293 	int sig;
294 	int code;
295 
296 	sig = ksi->ksi_signo;
297 	code = ksi->ksi_code;
298 	PROC_LOCK_ASSERT(p, MA_OWNED);
299 	psp = p->p_sigacts;
300 	mtx_assert(&psp->ps_mtx, MA_OWNED);
301 	regs = td->td_frame;
302 	oonstack = sigonstack(regs->tf_rsp);
303 
304 #ifdef DEBUG
305 	if (ldebug(rt_sendsig))
306 		printf(ARGS(rt_sendsig, "%p, %d, %p, %u"),
307 		    catcher, sig, (void*)mask, code);
308 #endif
309 	/*
310 	 * Allocate space for the signal handler context.
311 	 */
312 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
313 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
314 		fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
315 		    td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
316 	} else
317 		fp = (struct l_rt_sigframe *)regs->tf_rsp - 1;
318 	mtx_unlock(&psp->ps_mtx);
319 
320 	/*
321 	 * Build the argument list for the signal handler.
322 	 */
323 	if (p->p_sysent->sv_sigtbl)
324 		if (sig <= p->p_sysent->sv_sigsize)
325 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
326 
327 	bzero(&frame, sizeof(frame));
328 
329 	frame.sf_handler = PTROUT(catcher);
330 	frame.sf_sig = sig;
331 	frame.sf_siginfo = PTROUT(&fp->sf_si);
332 	frame.sf_ucontext = PTROUT(&fp->sf_sc);
333 
334 	/* Fill in POSIX parts */
335 	ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig);
336 
337 	/*
338 	 * Build the signal context to be used by sigreturn.
339 	 */
340 	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
341 	frame.sf_sc.uc_link = 0;		/* XXX ??? */
342 
343 	frame.sf_sc.uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp);
344 	frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
345 	frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
346 	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
347 	PROC_UNLOCK(p);
348 
349 	bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
350 
351 	frame.sf_sc.uc_mcontext.sc_mask   = frame.sf_sc.uc_sigmask.__bits[0];
352         frame.sf_sc.uc_mcontext.sc_gs     = rgs();
353         frame.sf_sc.uc_mcontext.sc_fs     = rfs();
354         __asm __volatile("movl %%es,%0" :
355 	    "=rm" (frame.sf_sc.uc_mcontext.sc_es));
356         __asm __volatile("movl %%ds,%0" :
357 	    "=rm" (frame.sf_sc.uc_mcontext.sc_ds));
358 	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_rdi;
359 	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_rsi;
360 	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_rbp;
361 	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_rbx;
362 	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_rdx;
363 	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_rcx;
364 	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_rax;
365 	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_rip;
366 	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
367 	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_rflags;
368 	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_rsp;
369 	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
370 	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
371 	frame.sf_sc.uc_mcontext.sc_cr2    = (u_int32_t)(uintptr_t)ksi->ksi_addr;
372 	frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
373 
374 #ifdef DEBUG
375 	if (ldebug(rt_sendsig))
376 		printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
377 		    frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
378 		    td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
379 #endif
380 
381 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
382 		/*
383 		 * Process has trashed its stack; give it an illegal
384 		 * instruction to halt it in its tracks.
385 		 */
386 #ifdef DEBUG
387 		if (ldebug(rt_sendsig))
388 			printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
389 			    fp, oonstack);
390 #endif
391 		PROC_LOCK(p);
392 		sigexit(td, SIGILL);
393 	}
394 
395 	/*
396 	 * Build context to run handler in.
397 	 */
398 	regs->tf_rsp = PTROUT(fp);
399 	regs->tf_rip = LINUX32_PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
400 	    linux_sznonrtsigcode;
401 	regs->tf_rflags &= ~(PSL_T | PSL_D);
402 	regs->tf_cs = _ucode32sel;
403 	regs->tf_ss = _udatasel;
404 	load_ds(_udatasel);
405 	td->td_pcb->pcb_ds = _udatasel;
406 	load_es(_udatasel);
407 	td->td_pcb->pcb_es = _udatasel;
408 	/* leave user %fs and %gs untouched */
409 	PROC_LOCK(p);
410 	mtx_lock(&psp->ps_mtx);
411 }
412 
413 
414 /*
415  * Send an interrupt to process.
416  *
417  * Stack is set up to allow sigcode stored
418  * in u. to call routine, followed by kcall
419  * to sigreturn routine below.  After sigreturn
420  * resets the signal mask, the stack, and the
421  * frame pointer, it returns to the user
422  * specified pc, psl.
423  */
424 static void
425 linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
426 {
427 	struct thread *td = curthread;
428 	struct proc *p = td->td_proc;
429 	struct sigacts *psp;
430 	struct trapframe *regs;
431 	struct l_sigframe *fp, frame;
432 	l_sigset_t lmask;
433 	int oonstack, i;
434 	int sig, code;
435 
436 	sig = ksi->ksi_signo;
437 	code = ksi->ksi_code;
438 	PROC_LOCK_ASSERT(p, MA_OWNED);
439 	psp = p->p_sigacts;
440 	mtx_assert(&psp->ps_mtx, MA_OWNED);
441 	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
442 		/* Signal handler installed with SA_SIGINFO. */
443 		linux_rt_sendsig(catcher, ksi, mask);
444 		return;
445 	}
446 
447 	regs = td->td_frame;
448 	oonstack = sigonstack(regs->tf_rsp);
449 
450 #ifdef DEBUG
451 	if (ldebug(sendsig))
452 		printf(ARGS(sendsig, "%p, %d, %p, %u"),
453 		    catcher, sig, (void*)mask, code);
454 #endif
455 
456 	/*
457 	 * Allocate space for the signal handler context.
458 	 */
459 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
460 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
461 		fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
462 		    td->td_sigstk.ss_size - sizeof(struct l_sigframe));
463 	} else
464 		fp = (struct l_sigframe *)regs->tf_rsp - 1;
465 	mtx_unlock(&psp->ps_mtx);
466 	PROC_UNLOCK(p);
467 
468 	/*
469 	 * Build the argument list for the signal handler.
470 	 */
471 	if (p->p_sysent->sv_sigtbl)
472 		if (sig <= p->p_sysent->sv_sigsize)
473 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
474 
475 	bzero(&frame, sizeof(frame));
476 
477 	frame.sf_handler = PTROUT(catcher);
478 	frame.sf_sig = sig;
479 
480 	bsd_to_linux_sigset(mask, &lmask);
481 
482 	/*
483 	 * Build the signal context to be used by sigreturn.
484 	 */
485 	frame.sf_sc.sc_mask   = lmask.__bits[0];
486         frame.sf_sc.sc_gs     = rgs();
487         frame.sf_sc.sc_fs     = rfs();
488         __asm __volatile("movl %%es,%0" : "=rm" (frame.sf_sc.sc_es));
489         __asm __volatile("movl %%ds,%0" : "=rm" (frame.sf_sc.sc_ds));
490 	frame.sf_sc.sc_edi    = regs->tf_rdi;
491 	frame.sf_sc.sc_esi    = regs->tf_rsi;
492 	frame.sf_sc.sc_ebp    = regs->tf_rbp;
493 	frame.sf_sc.sc_ebx    = regs->tf_rbx;
494 	frame.sf_sc.sc_edx    = regs->tf_rdx;
495 	frame.sf_sc.sc_ecx    = regs->tf_rcx;
496 	frame.sf_sc.sc_eax    = regs->tf_rax;
497 	frame.sf_sc.sc_eip    = regs->tf_rip;
498 	frame.sf_sc.sc_cs     = regs->tf_cs;
499 	frame.sf_sc.sc_eflags = regs->tf_rflags;
500 	frame.sf_sc.sc_esp_at_signal = regs->tf_rsp;
501 	frame.sf_sc.sc_ss     = regs->tf_ss;
502 	frame.sf_sc.sc_err    = regs->tf_err;
503 	frame.sf_sc.sc_cr2    = (u_int32_t)(uintptr_t)ksi->ksi_addr;
504 	frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code);
505 
506 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
507 		frame.sf_extramask[i] = lmask.__bits[i+1];
508 
509 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
510 		/*
511 		 * Process has trashed its stack; give it an illegal
512 		 * instruction to halt it in its tracks.
513 		 */
514 		PROC_LOCK(p);
515 		sigexit(td, SIGILL);
516 	}
517 
518 	/*
519 	 * Build context to run handler in.
520 	 */
521 	regs->tf_rsp = PTROUT(fp);
522 	regs->tf_rip = LINUX32_PS_STRINGS - *(p->p_sysent->sv_szsigcode);
523 	regs->tf_rflags &= ~(PSL_T | PSL_D);
524 	regs->tf_cs = _ucode32sel;
525 	regs->tf_ss = _udatasel;
526 	load_ds(_udatasel);
527 	td->td_pcb->pcb_ds = _udatasel;
528 	load_es(_udatasel);
529 	td->td_pcb->pcb_es = _udatasel;
530 	/* leave user %fs and %gs untouched */
531 	PROC_LOCK(p);
532 	mtx_lock(&psp->ps_mtx);
533 }
534 
535 /*
536  * System call to cleanup state after a signal
537  * has been taken.  Reset signal mask and
538  * stack state from context left by sendsig (above).
539  * Return to previous pc and psl as specified by
540  * context left by sendsig. Check carefully to
541  * make sure that the user has not modified the
542  * psl to gain improper privileges or to cause
543  * a machine fault.
544  */
545 int
546 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
547 {
548 	struct proc *p = td->td_proc;
549 	struct l_sigframe frame;
550 	struct trapframe *regs;
551 	l_sigset_t lmask;
552 	int eflags, i;
553 	ksiginfo_t ksi;
554 
555 	regs = td->td_frame;
556 
557 #ifdef DEBUG
558 	if (ldebug(sigreturn))
559 		printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
560 #endif
561 	/*
562 	 * The trampoline code hands us the sigframe.
563 	 * It is unsafe to keep track of it ourselves, in the event that a
564 	 * program jumps out of a signal handler.
565 	 */
566 	if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
567 		return (EFAULT);
568 
569 	/*
570 	 * Check for security violations.
571 	 */
572 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
573 	eflags = frame.sf_sc.sc_eflags;
574 	/*
575 	 * XXX do allow users to change the privileged flag PSL_RF.  The
576 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
577 	 * sometimes set it there too.  tf_eflags is kept in the signal
578 	 * context during signal handling and there is no other place
579 	 * to remember it, so the PSL_RF bit may be corrupted by the
580 	 * signal handler without us knowing.  Corruption of the PSL_RF
581 	 * bit at worst causes one more or one less debugger trap, so
582 	 * allowing it is fairly harmless.
583 	 */
584 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF))
585 		return(EINVAL);
586 
587 	/*
588 	 * Don't allow users to load a valid privileged %cs.  Let the
589 	 * hardware check for invalid selectors, excess privilege in
590 	 * other selectors, invalid %eip's and invalid %esp's.
591 	 */
592 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
593 	if (!CS_SECURE(frame.sf_sc.sc_cs)) {
594 		ksiginfo_init_trap(&ksi);
595 		ksi.ksi_signo = SIGBUS;
596 		ksi.ksi_code = BUS_OBJERR;
597 		ksi.ksi_trapno = T_PROTFLT;
598 		ksi.ksi_addr = (void *)regs->tf_rip;
599 		trapsignal(td, &ksi);
600 		return(EINVAL);
601 	}
602 
603 	lmask.__bits[0] = frame.sf_sc.sc_mask;
604 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
605 		lmask.__bits[i+1] = frame.sf_extramask[i];
606 	PROC_LOCK(p);
607 	linux_to_bsd_sigset(&lmask, &td->td_sigmask);
608 	SIG_CANTMASK(td->td_sigmask);
609 	signotify(td);
610 	PROC_UNLOCK(p);
611 
612 	/*
613 	 * Restore signal context.
614 	 */
615 	/* Selectors were restored by the trampoline. */
616 	regs->tf_rdi    = frame.sf_sc.sc_edi;
617 	regs->tf_rsi    = frame.sf_sc.sc_esi;
618 	regs->tf_rbp    = frame.sf_sc.sc_ebp;
619 	regs->tf_rbx    = frame.sf_sc.sc_ebx;
620 	regs->tf_rdx    = frame.sf_sc.sc_edx;
621 	regs->tf_rcx    = frame.sf_sc.sc_ecx;
622 	regs->tf_rax    = frame.sf_sc.sc_eax;
623 	regs->tf_rip    = frame.sf_sc.sc_eip;
624 	regs->tf_cs     = frame.sf_sc.sc_cs;
625 	regs->tf_rflags = eflags;
626 	regs->tf_rsp    = frame.sf_sc.sc_esp_at_signal;
627 	regs->tf_ss     = frame.sf_sc.sc_ss;
628 
629 	return (EJUSTRETURN);
630 }
631 
632 /*
633  * System call to cleanup state after a signal
634  * has been taken.  Reset signal mask and
635  * stack state from context left by rt_sendsig (above).
636  * Return to previous pc and psl as specified by
637  * context left by sendsig. Check carefully to
638  * make sure that the user has not modified the
639  * psl to gain improper privileges or to cause
640  * a machine fault.
641  */
642 int
643 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
644 {
645 	struct proc *p = td->td_proc;
646 	struct l_ucontext uc;
647 	struct l_sigcontext *context;
648 	l_stack_t *lss;
649 	stack_t ss;
650 	struct trapframe *regs;
651 	int eflags;
652 	ksiginfo_t ksi;
653 
654 	regs = td->td_frame;
655 
656 #ifdef DEBUG
657 	if (ldebug(rt_sigreturn))
658 		printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
659 #endif
660 	/*
661 	 * The trampoline code hands us the ucontext.
662 	 * It is unsafe to keep track of it ourselves, in the event that a
663 	 * program jumps out of a signal handler.
664 	 */
665 	if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
666 		return (EFAULT);
667 
668 	context = &uc.uc_mcontext;
669 
670 	/*
671 	 * Check for security violations.
672 	 */
673 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
674 	eflags = context->sc_eflags;
675 	/*
676 	 * XXX do allow users to change the privileged flag PSL_RF.  The
677 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
678 	 * sometimes set it there too.  tf_eflags is kept in the signal
679 	 * context during signal handling and there is no other place
680 	 * to remember it, so the PSL_RF bit may be corrupted by the
681 	 * signal handler without us knowing.  Corruption of the PSL_RF
682 	 * bit at worst causes one more or one less debugger trap, so
683 	 * allowing it is fairly harmless.
684 	 */
685 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF))
686 		return(EINVAL);
687 
688 	/*
689 	 * Don't allow users to load a valid privileged %cs.  Let the
690 	 * hardware check for invalid selectors, excess privilege in
691 	 * other selectors, invalid %eip's and invalid %esp's.
692 	 */
693 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
694 	if (!CS_SECURE(context->sc_cs)) {
695 		ksiginfo_init_trap(&ksi);
696 		ksi.ksi_signo = SIGBUS;
697 		ksi.ksi_code = BUS_OBJERR;
698 		ksi.ksi_trapno = T_PROTFLT;
699 		ksi.ksi_addr = (void *)regs->tf_rip;
700 		trapsignal(td, &ksi);
701 		return(EINVAL);
702 	}
703 
704 	PROC_LOCK(p);
705 	linux_to_bsd_sigset(&uc.uc_sigmask, &td->td_sigmask);
706 	SIG_CANTMASK(td->td_sigmask);
707 	signotify(td);
708 	PROC_UNLOCK(p);
709 
710 	/*
711 	 * Restore signal context
712 	 */
713 	/* Selectors were restored by the trampoline. */
714 	regs->tf_rdi    = context->sc_edi;
715 	regs->tf_rsi    = context->sc_esi;
716 	regs->tf_rbp    = context->sc_ebp;
717 	regs->tf_rbx    = context->sc_ebx;
718 	regs->tf_rdx    = context->sc_edx;
719 	regs->tf_rcx    = context->sc_ecx;
720 	regs->tf_rax    = context->sc_eax;
721 	regs->tf_rip    = context->sc_eip;
722 	regs->tf_cs     = context->sc_cs;
723 	regs->tf_rflags = eflags;
724 	regs->tf_rsp    = context->sc_esp_at_signal;
725 	regs->tf_ss     = context->sc_ss;
726 
727 	/*
728 	 * call sigaltstack & ignore results..
729 	 */
730 	lss = &uc.uc_stack;
731 	ss.ss_sp = PTRIN(lss->ss_sp);
732 	ss.ss_size = lss->ss_size;
733 	ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
734 
735 #ifdef DEBUG
736 	if (ldebug(rt_sigreturn))
737 		printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
738 		    ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
739 #endif
740 	(void)kern_sigaltstack(td, &ss, NULL);
741 
742 	return (EJUSTRETURN);
743 }
744 
745 /*
746  * MPSAFE
747  */
748 static void
749 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
750 {
751 	args[0] = tf->tf_rbx;
752 	args[1] = tf->tf_rcx;
753 	args[2] = tf->tf_rdx;
754 	args[3] = tf->tf_rsi;
755 	args[4] = tf->tf_rdi;
756 	args[5] = tf->tf_rbp;	/* Unconfirmed */
757 	*params = NULL;		/* no copyin */
758 }
759 
760 /*
761  * If a linux binary is exec'ing something, try this image activator
762  * first.  We override standard shell script execution in order to
763  * be able to modify the interpreter path.  We only do this if a linux
764  * binary is doing the exec, so we do not create an EXEC module for it.
765  */
766 static int	exec_linux_imgact_try(struct image_params *iparams);
767 
768 static int
769 exec_linux_imgact_try(struct image_params *imgp)
770 {
771     const char *head = (const char *)imgp->image_header;
772     char *rpath;
773     int error = -1, len;
774 
775     /*
776      * The interpreter for shell scripts run from a linux binary needs
777      * to be located in /compat/linux if possible in order to recursively
778      * maintain linux path emulation.
779      */
780     if (((const short *)head)[0] == SHELLMAGIC) {
781 	    /*
782 	     * Run our normal shell image activator.  If it succeeds attempt
783 	     * to use the alternate path for the interpreter.  If an alternate
784 	     * path is found, use our stringspace to store it.
785 	     */
786 	    if ((error = exec_shell_imgact(imgp)) == 0) {
787 		    linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
788 			imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0, AT_FDCWD);
789 		    if (rpath != NULL) {
790 			    len = strlen(rpath) + 1;
791 
792 			    if (len <= MAXSHELLCMDLEN) {
793 				    memcpy(imgp->interpreter_name, rpath, len);
794 			    }
795 			    free(rpath, M_TEMP);
796 		    }
797 	    }
798     }
799     return(error);
800 }
801 
802 /*
803  * Clear registers on exec
804  * XXX copied from ia32_signal.c.
805  */
806 static void
807 exec_linux_setregs(td, entry, stack, ps_strings)
808 	struct thread *td;
809 	u_long entry;
810 	u_long stack;
811 	u_long ps_strings;
812 {
813 	struct trapframe *regs = td->td_frame;
814 	struct pcb *pcb = td->td_pcb;
815 
816 	critical_enter();
817 	wrmsr(MSR_FSBASE, 0);
818 	wrmsr(MSR_KGSBASE, 0);	/* User value while we're in the kernel */
819 	pcb->pcb_fsbase = 0;
820 	pcb->pcb_gsbase = 0;
821 	critical_exit();
822 	load_ds(_udatasel);
823 	load_es(_udatasel);
824 	load_fs(_udatasel);
825 	load_gs(_udatasel);
826 	pcb->pcb_ds = _udatasel;
827 	pcb->pcb_es = _udatasel;
828 	pcb->pcb_fs = _udatasel;
829 	pcb->pcb_gs = _udatasel;
830 
831 	bzero((char *)regs, sizeof(struct trapframe));
832 	regs->tf_rip = entry;
833 	regs->tf_rsp = stack;
834 	regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T);
835 	regs->tf_ss = _udatasel;
836 	regs->tf_cs = _ucode32sel;
837 	regs->tf_rbx = ps_strings;
838 	load_cr0(rcr0() | CR0_MP | CR0_TS);
839 	fpstate_drop(td);
840 
841 	/* Return via doreti so that we can change to a different %cs */
842 	pcb->pcb_flags |= PCB_FULLCTX | PCB_32BIT;
843 	pcb->pcb_flags &= ~PCB_GS32BIT;
844 	td->td_retval[1] = 0;
845 }
846 
847 /*
848  * XXX copied from ia32_sysvec.c.
849  */
850 static register_t *
851 linux_copyout_strings(struct image_params *imgp)
852 {
853 	int argc, envc;
854 	u_int32_t *vectp;
855 	char *stringp, *destp;
856 	u_int32_t *stack_base;
857 	struct linux32_ps_strings *arginfo;
858 	int sigcodesz;
859 
860 	/*
861 	 * Calculate string base and vector table pointers.
862 	 * Also deal with signal trampoline code for this exec type.
863 	 */
864 	arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS;
865 	sigcodesz = *(imgp->proc->p_sysent->sv_szsigcode);
866 	destp =	(caddr_t)arginfo - sigcodesz - SPARE_USRSPACE -
867 		roundup((ARG_MAX - imgp->args->stringspace), sizeof(char *));
868 
869 	/*
870 	 * install sigcode
871 	 */
872 	if (sigcodesz)
873 		copyout(imgp->proc->p_sysent->sv_sigcode,
874 			((caddr_t)arginfo - sigcodesz), sigcodesz);
875 
876 	/*
877 	 * If we have a valid auxargs ptr, prepare some room
878 	 * on the stack.
879 	 */
880 	if (imgp->auxargs) {
881 		/*
882 		 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
883 		 * lower compatibility.
884 		 */
885 		imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size
886 			: (AT_COUNT * 2);
887 		/*
888 		 * The '+ 2' is for the null pointers at the end of each of
889 		 * the arg and env vector sets,and imgp->auxarg_size is room
890 		 * for argument of Runtime loader.
891 		 */
892 		vectp = (u_int32_t *) (destp - (imgp->args->argc + imgp->args->envc + 2 +
893 				       imgp->auxarg_size) * sizeof(u_int32_t));
894 
895 	} else
896 		/*
897 		 * The '+ 2' is for the null pointers at the end of each of
898 		 * the arg and env vector sets
899 		 */
900 		vectp = (u_int32_t *)
901 			(destp - (imgp->args->argc + imgp->args->envc + 2) * sizeof(u_int32_t));
902 
903 	/*
904 	 * vectp also becomes our initial stack base
905 	 */
906 	stack_base = vectp;
907 
908 	stringp = imgp->args->begin_argv;
909 	argc = imgp->args->argc;
910 	envc = imgp->args->envc;
911 	/*
912 	 * Copy out strings - arguments and environment.
913 	 */
914 	copyout(stringp, destp, ARG_MAX - imgp->args->stringspace);
915 
916 	/*
917 	 * Fill in "ps_strings" struct for ps, w, etc.
918 	 */
919 	suword32(&arginfo->ps_argvstr, (u_int32_t)(intptr_t)vectp);
920 	suword32(&arginfo->ps_nargvstr, argc);
921 
922 	/*
923 	 * Fill in argument portion of vector table.
924 	 */
925 	for (; argc > 0; --argc) {
926 		suword32(vectp++, (u_int32_t)(intptr_t)destp);
927 		while (*stringp++ != 0)
928 			destp++;
929 		destp++;
930 	}
931 
932 	/* a null vector table pointer separates the argp's from the envp's */
933 	suword32(vectp++, 0);
934 
935 	suword32(&arginfo->ps_envstr, (u_int32_t)(intptr_t)vectp);
936 	suword32(&arginfo->ps_nenvstr, envc);
937 
938 	/*
939 	 * Fill in environment portion of vector table.
940 	 */
941 	for (; envc > 0; --envc) {
942 		suword32(vectp++, (u_int32_t)(intptr_t)destp);
943 		while (*stringp++ != 0)
944 			destp++;
945 		destp++;
946 	}
947 
948 	/* end of vector table is a null pointer */
949 	suword32(vectp, 0);
950 
951 	return ((register_t *)stack_base);
952 }
953 
954 SYSCTL_NODE(_compat, OID_AUTO, linux32, CTLFLAG_RW, 0,
955     "32-bit Linux emulation");
956 
957 static u_long	linux32_maxdsiz = LINUX32_MAXDSIZ;
958 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxdsiz, CTLFLAG_RW,
959     &linux32_maxdsiz, 0, "");
960 static u_long	linux32_maxssiz = LINUX32_MAXSSIZ;
961 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxssiz, CTLFLAG_RW,
962     &linux32_maxssiz, 0, "");
963 static u_long	linux32_maxvmem = LINUX32_MAXVMEM;
964 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxvmem, CTLFLAG_RW,
965     &linux32_maxvmem, 0, "");
966 
967 static void
968 linux32_fixlimit(struct rlimit *rl, int which)
969 {
970 
971 	switch (which) {
972 	case RLIMIT_DATA:
973 		if (linux32_maxdsiz != 0) {
974 			if (rl->rlim_cur > linux32_maxdsiz)
975 				rl->rlim_cur = linux32_maxdsiz;
976 			if (rl->rlim_max > linux32_maxdsiz)
977 				rl->rlim_max = linux32_maxdsiz;
978 		}
979 		break;
980 	case RLIMIT_STACK:
981 		if (linux32_maxssiz != 0) {
982 			if (rl->rlim_cur > linux32_maxssiz)
983 				rl->rlim_cur = linux32_maxssiz;
984 			if (rl->rlim_max > linux32_maxssiz)
985 				rl->rlim_max = linux32_maxssiz;
986 		}
987 		break;
988 	case RLIMIT_VMEM:
989 		if (linux32_maxvmem != 0) {
990 			if (rl->rlim_cur > linux32_maxvmem)
991 				rl->rlim_cur = linux32_maxvmem;
992 			if (rl->rlim_max > linux32_maxvmem)
993 				rl->rlim_max = linux32_maxvmem;
994 		}
995 		break;
996 	}
997 }
998 
999 struct sysentvec elf_linux_sysvec = {
1000 	.sv_size	= LINUX_SYS_MAXSYSCALL,
1001 	.sv_table	= linux_sysent,
1002 	.sv_mask	= 0,
1003 	.sv_sigsize	= LINUX_SIGTBLSZ,
1004 	.sv_sigtbl	= bsd_to_linux_signal,
1005 	.sv_errsize	= ELAST + 1,
1006 	.sv_errtbl	= bsd_to_linux_errno,
1007 	.sv_transtrap	= translate_traps,
1008 	.sv_fixup	= elf_linux_fixup,
1009 	.sv_sendsig	= linux_sendsig,
1010 	.sv_sigcode	= linux_sigcode,
1011 	.sv_szsigcode	= &linux_szsigcode,
1012 	.sv_prepsyscall	= linux_prepsyscall,
1013 	.sv_name	= "Linux ELF32",
1014 	.sv_coredump	= elf32_coredump,
1015 	.sv_imgact_try	= exec_linux_imgact_try,
1016 	.sv_minsigstksz	= LINUX_MINSIGSTKSZ,
1017 	.sv_pagesize	= PAGE_SIZE,
1018 	.sv_minuser	= VM_MIN_ADDRESS,
1019 	.sv_maxuser	= LINUX32_USRSTACK,
1020 	.sv_usrstack	= LINUX32_USRSTACK,
1021 	.sv_psstrings	= LINUX32_PS_STRINGS,
1022 	.sv_stackprot	= VM_PROT_ALL,
1023 	.sv_copyout_strings = linux_copyout_strings,
1024 	.sv_setregs	= exec_linux_setregs,
1025 	.sv_fixlimit	= linux32_fixlimit,
1026 	.sv_maxssiz	= &linux32_maxssiz,
1027 	.sv_flags	= SV_ABI_LINUX | SV_ILP32 | SV_IA32
1028 };
1029 
1030 static Elf32_Brandinfo linux_brand = {
1031 	.brand		= ELFOSABI_LINUX,
1032 	.machine	= EM_386,
1033 	.compat_3_brand	= "Linux",
1034 	.emul_path	= "/compat/linux",
1035 	.interp_path	= "/lib/ld-linux.so.1",
1036 	.sysvec		= &elf_linux_sysvec,
1037 	.interp_newpath	= NULL,
1038 	.flags		= BI_CAN_EXEC_DYN,
1039 };
1040 
1041 static Elf32_Brandinfo linux_glibc2brand = {
1042 	.brand		= ELFOSABI_LINUX,
1043 	.machine	= EM_386,
1044 	.compat_3_brand	= "Linux",
1045 	.emul_path	= "/compat/linux",
1046 	.interp_path	= "/lib/ld-linux.so.2",
1047 	.sysvec		= &elf_linux_sysvec,
1048 	.interp_newpath	= NULL,
1049 	.flags		= BI_CAN_EXEC_DYN,
1050 };
1051 
1052 Elf32_Brandinfo *linux_brandlist[] = {
1053 	&linux_brand,
1054 	&linux_glibc2brand,
1055 	NULL
1056 };
1057 
1058 static int
1059 linux_elf_modevent(module_t mod, int type, void *data)
1060 {
1061 	Elf32_Brandinfo **brandinfo;
1062 	int error;
1063 	struct linux_ioctl_handler **lihp;
1064 	struct linux_device_handler **ldhp;
1065 
1066 	error = 0;
1067 
1068 	switch(type) {
1069 	case MOD_LOAD:
1070 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1071 		     ++brandinfo)
1072 			if (elf32_insert_brand_entry(*brandinfo) < 0)
1073 				error = EINVAL;
1074 		if (error == 0) {
1075 			SET_FOREACH(lihp, linux_ioctl_handler_set)
1076 				linux_ioctl_register_handler(*lihp);
1077 			SET_FOREACH(ldhp, linux_device_handler_set)
1078 				linux_device_register_handler(*ldhp);
1079 			mtx_init(&emul_lock, "emuldata lock", NULL, MTX_DEF);
1080 			sx_init(&emul_shared_lock, "emuldata->shared lock");
1081 			LIST_INIT(&futex_list);
1082 			sx_init(&futex_sx, "futex protection lock");
1083 			linux_exit_tag = EVENTHANDLER_REGISTER(process_exit, linux_proc_exit,
1084 			      NULL, 1000);
1085 			linux_schedtail_tag = EVENTHANDLER_REGISTER(schedtail, linux_schedtail,
1086 			      NULL, 1000);
1087 			linux_exec_tag = EVENTHANDLER_REGISTER(process_exec, linux_proc_exec,
1088 			      NULL, 1000);
1089 			if (bootverbose)
1090 				printf("Linux ELF exec handler installed\n");
1091 		} else
1092 			printf("cannot insert Linux ELF brand handler\n");
1093 		break;
1094 	case MOD_UNLOAD:
1095 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1096 		     ++brandinfo)
1097 			if (elf32_brand_inuse(*brandinfo))
1098 				error = EBUSY;
1099 		if (error == 0) {
1100 			for (brandinfo = &linux_brandlist[0];
1101 			     *brandinfo != NULL; ++brandinfo)
1102 				if (elf32_remove_brand_entry(*brandinfo) < 0)
1103 					error = EINVAL;
1104 		}
1105 		if (error == 0) {
1106 			SET_FOREACH(lihp, linux_ioctl_handler_set)
1107 				linux_ioctl_unregister_handler(*lihp);
1108 			SET_FOREACH(ldhp, linux_device_handler_set)
1109 				linux_device_unregister_handler(*ldhp);
1110 			mtx_destroy(&emul_lock);
1111 			sx_destroy(&emul_shared_lock);
1112 			sx_destroy(&futex_sx);
1113 			EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag);
1114 			EVENTHANDLER_DEREGISTER(schedtail, linux_schedtail_tag);
1115 			EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag);
1116 			if (bootverbose)
1117 				printf("Linux ELF exec handler removed\n");
1118 		} else
1119 			printf("Could not deinstall ELF interpreter entry\n");
1120 		break;
1121 	default:
1122 		return EOPNOTSUPP;
1123 	}
1124 	return error;
1125 }
1126 
1127 static moduledata_t linux_elf_mod = {
1128 	"linuxelf",
1129 	linux_elf_modevent,
1130 	0
1131 };
1132 
1133 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
1134