xref: /freebsd/sys/i386/linux/linux_sysvec.c (revision 830940567b49bb0c08dfaed40418999e76616909)
1 /*-
2  * Copyright (c) 1994-1996 S�ren Schmidt
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software without specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/exec.h>
35 #include <sys/fcntl.h>
36 #include <sys/imgact.h>
37 #include <sys/imgact_aout.h>
38 #include <sys/imgact_elf.h>
39 #include <sys/kernel.h>
40 #include <sys/lock.h>
41 #include <sys/malloc.h>
42 #include <sys/module.h>
43 #include <sys/mutex.h>
44 #include <sys/proc.h>
45 #include <sys/signalvar.h>
46 #include <sys/syscallsubr.h>
47 #include <sys/sysent.h>
48 #include <sys/sysproto.h>
49 #include <sys/vnode.h>
50 #include <sys/eventhandler.h>
51 
52 #include <vm/vm.h>
53 #include <vm/pmap.h>
54 #include <vm/vm_extern.h>
55 #include <vm/vm_map.h>
56 #include <vm/vm_object.h>
57 #include <vm/vm_page.h>
58 #include <vm/vm_param.h>
59 
60 #include <machine/cpu.h>
61 #include <machine/cputypes.h>
62 #include <machine/md_var.h>
63 #include <machine/pcb.h>
64 
65 #include <i386/linux/linux.h>
66 #include <i386/linux/linux_proto.h>
67 #include <compat/linux/linux_futex.h>
68 #include <compat/linux/linux_emul.h>
69 #include <compat/linux/linux_mib.h>
70 #include <compat/linux/linux_misc.h>
71 #include <compat/linux/linux_signal.h>
72 #include <compat/linux/linux_util.h>
73 
74 MODULE_VERSION(linux, 1);
75 
76 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
77 
78 #if BYTE_ORDER == LITTLE_ENDIAN
79 #define SHELLMAGIC      0x2123 /* #! */
80 #else
81 #define SHELLMAGIC      0x2321
82 #endif
83 
84 /*
85  * Allow the sendsig functions to use the ldebug() facility
86  * even though they are not syscalls themselves. Map them
87  * to syscall 0. This is slightly less bogus than using
88  * ldebug(sigreturn).
89  */
90 #define	LINUX_SYS_linux_rt_sendsig	0
91 #define	LINUX_SYS_linux_sendsig		0
92 
93 extern char linux_sigcode[];
94 extern int linux_szsigcode;
95 
96 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
97 
98 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
99 SET_DECLARE(linux_device_handler_set, struct linux_device_handler);
100 
101 static int	linux_fixup(register_t **stack_base,
102 		    struct image_params *iparams);
103 static int	elf_linux_fixup(register_t **stack_base,
104 		    struct image_params *iparams);
105 static void	linux_prepsyscall(struct trapframe *tf, int *args, u_int *code,
106 		    caddr_t *params);
107 static void     linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
108 static void	exec_linux_setregs(struct thread *td, u_long entry,
109 				   u_long stack, u_long ps_strings);
110 static register_t *linux_copyout_strings(struct image_params *imgp);
111 
112 static int linux_szplatform;
113 const char *linux_platform;
114 
115 static eventhandler_tag linux_exit_tag;
116 static eventhandler_tag linux_schedtail_tag;
117 static eventhandler_tag linux_exec_tag;
118 
119 /*
120  * Linux syscalls return negative errno's, we do positive and map them
121  * Reference:
122  *   FreeBSD: src/sys/sys/errno.h
123  *   Linux:   linux-2.6.17.8/include/asm-generic/errno-base.h
124  *            linux-2.6.17.8/include/asm-generic/errno.h
125  */
126 static int bsd_to_linux_errno[ELAST + 1] = {
127 	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
128 	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
129 	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
130 	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
131 	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
132 	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
133 	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
134 	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
135 	  -6,  -6, -43, -42, -75,-125, -84, -95, -16, -74,
136 	 -72, -67, -71
137 };
138 
139 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
140 	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
141 	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
142 	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
143 	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
144 	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
145 	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
146 	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
147 	0, LINUX_SIGUSR1, LINUX_SIGUSR2
148 };
149 
150 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
151 	SIGHUP, SIGINT, SIGQUIT, SIGILL,
152 	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
153 	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
154 	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
155 	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
156 	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
157 	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
158 	SIGIO, SIGURG, SIGSYS
159 };
160 
161 #define LINUX_T_UNKNOWN  255
162 static int _bsd_to_linux_trapcode[] = {
163 	LINUX_T_UNKNOWN,	/* 0 */
164 	6,			/* 1  T_PRIVINFLT */
165 	LINUX_T_UNKNOWN,	/* 2 */
166 	3,			/* 3  T_BPTFLT */
167 	LINUX_T_UNKNOWN,	/* 4 */
168 	LINUX_T_UNKNOWN,	/* 5 */
169 	16,			/* 6  T_ARITHTRAP */
170 	254,			/* 7  T_ASTFLT */
171 	LINUX_T_UNKNOWN,	/* 8 */
172 	13,			/* 9  T_PROTFLT */
173 	1,			/* 10 T_TRCTRAP */
174 	LINUX_T_UNKNOWN,	/* 11 */
175 	14,			/* 12 T_PAGEFLT */
176 	LINUX_T_UNKNOWN,	/* 13 */
177 	17,			/* 14 T_ALIGNFLT */
178 	LINUX_T_UNKNOWN,	/* 15 */
179 	LINUX_T_UNKNOWN,	/* 16 */
180 	LINUX_T_UNKNOWN,	/* 17 */
181 	0,			/* 18 T_DIVIDE */
182 	2,			/* 19 T_NMI */
183 	4,			/* 20 T_OFLOW */
184 	5,			/* 21 T_BOUND */
185 	7,			/* 22 T_DNA */
186 	8,			/* 23 T_DOUBLEFLT */
187 	9,			/* 24 T_FPOPFLT */
188 	10,			/* 25 T_TSSFLT */
189 	11,			/* 26 T_SEGNPFLT */
190 	12,			/* 27 T_STKFLT */
191 	18,			/* 28 T_MCHK */
192 	19,			/* 29 T_XMMFLT */
193 	15			/* 30 T_RESERVED */
194 };
195 #define bsd_to_linux_trapcode(code) \
196     ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
197      _bsd_to_linux_trapcode[(code)]: \
198      LINUX_T_UNKNOWN)
199 
200 /*
201  * If FreeBSD & Linux have a difference of opinion about what a trap
202  * means, deal with it here.
203  *
204  * MPSAFE
205  */
206 static int
207 translate_traps(int signal, int trap_code)
208 {
209 	if (signal != SIGBUS)
210 		return signal;
211 	switch (trap_code) {
212 	case T_PROTFLT:
213 	case T_TSSFLT:
214 	case T_DOUBLEFLT:
215 	case T_PAGEFLT:
216 		return SIGSEGV;
217 	default:
218 		return signal;
219 	}
220 }
221 
222 static int
223 linux_fixup(register_t **stack_base, struct image_params *imgp)
224 {
225 	register_t *argv, *envp;
226 
227 	argv = *stack_base;
228 	envp = *stack_base + (imgp->args->argc + 1);
229 	(*stack_base)--;
230 	**stack_base = (intptr_t)(void *)envp;
231 	(*stack_base)--;
232 	**stack_base = (intptr_t)(void *)argv;
233 	(*stack_base)--;
234 	**stack_base = imgp->args->argc;
235 	return (0);
236 }
237 
238 static int
239 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
240 {
241 	struct proc *p;
242 	Elf32_Auxargs *args;
243 	Elf32_Addr *uplatform;
244 	struct ps_strings *arginfo;
245 	register_t *pos;
246 
247 	KASSERT(curthread->td_proc == imgp->proc,
248 	    ("unsafe elf_linux_fixup(), should be curproc"));
249 
250 	p = imgp->proc;
251 	arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
252 	uplatform = (Elf32_Addr *)((caddr_t)arginfo - linux_szsigcode -
253 	    linux_szplatform);
254 	args = (Elf32_Auxargs *)imgp->auxargs;
255 	pos = *stack_base + (imgp->args->argc + imgp->args->envc + 2);
256 
257 	AUXARGS_ENTRY(pos, LINUX_AT_HWCAP, cpu_feature);
258 
259 	/*
260 	 * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0,
261 	 * as it has appeared in the 2.4.0-rc7 first time.
262 	 * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK),
263 	 * glibc falls back to the hard-coded CLK_TCK value when aux entry
264 	 * is not present.
265 	 * Also see linux_times() implementation.
266 	 */
267 	if (linux_kernver(curthread) >= LINUX_KERNVER_2004000)
268 		AUXARGS_ENTRY(pos, LINUX_AT_CLKTCK, stclohz);
269 	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
270 	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
271 	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
272 	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
273 	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
274 	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
275 	AUXARGS_ENTRY(pos, AT_BASE, args->base);
276 	AUXARGS_ENTRY(pos, LINUX_AT_SECURE, 0);
277 	AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
278 	AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
279 	AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
280 	AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
281 	AUXARGS_ENTRY(pos, LINUX_AT_PLATFORM, PTROUT(uplatform));
282 	if (args->execfd != -1)
283 		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
284 	AUXARGS_ENTRY(pos, AT_NULL, 0);
285 
286 	free(imgp->auxargs, M_TEMP);
287 	imgp->auxargs = NULL;
288 
289 	(*stack_base)--;
290 	**stack_base = (register_t)imgp->args->argc;
291 	return (0);
292 }
293 
294 /*
295  * Copied from kern/kern_exec.c
296  */
297 static register_t *
298 linux_copyout_strings(struct image_params *imgp)
299 {
300 	int argc, envc;
301 	char **vectp;
302 	char *stringp, *destp;
303 	register_t *stack_base;
304 	struct ps_strings *arginfo;
305 	struct proc *p;
306 
307 	/*
308 	 * Calculate string base and vector table pointers.
309 	 * Also deal with signal trampoline code for this exec type.
310 	 */
311 	p = imgp->proc;
312 	arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
313 	destp = (caddr_t)arginfo - linux_szsigcode - SPARE_USRSPACE -
314 	    linux_szplatform - roundup((ARG_MAX - imgp->args->stringspace),
315 	    sizeof(char *));
316 
317 	/*
318 	 * install sigcode
319 	 */
320 	copyout(p->p_sysent->sv_sigcode, ((caddr_t)arginfo -
321 	    linux_szsigcode), linux_szsigcode);
322 
323 	/*
324 	 * install LINUX_PLATFORM
325 	 */
326 	copyout(linux_platform, ((caddr_t)arginfo - linux_szsigcode -
327 	    linux_szplatform), linux_szplatform);
328 
329 	/*
330 	 * If we have a valid auxargs ptr, prepare some room
331 	 * on the stack.
332 	 */
333 	if (imgp->auxargs) {
334 		/*
335 		 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
336 		 * lower compatibility.
337 		 */
338 		imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size :
339 		    (LINUX_AT_COUNT * 2);
340 		/*
341 		 * The '+ 2' is for the null pointers at the end of each of
342 		 * the arg and env vector sets,and imgp->auxarg_size is room
343 		 * for argument of Runtime loader.
344 		 */
345 		vectp = (char **)(destp - (imgp->args->argc +
346 		    imgp->args->envc + 2 + imgp->auxarg_size) * sizeof(char *));
347 	} else {
348 		/*
349 		 * The '+ 2' is for the null pointers at the end of each of
350 		 * the arg and env vector sets
351 		 */
352 		vectp = (char **)(destp - (imgp->args->argc + imgp->args->envc + 2) *
353 		    sizeof(char *));
354 	}
355 
356 	/*
357 	 * vectp also becomes our initial stack base
358 	 */
359 	stack_base = (register_t *)vectp;
360 
361 	stringp = imgp->args->begin_argv;
362 	argc = imgp->args->argc;
363 	envc = imgp->args->envc;
364 
365 	/*
366 	 * Copy out strings - arguments and environment.
367 	 */
368 	copyout(stringp, destp, ARG_MAX - imgp->args->stringspace);
369 
370 	/*
371 	 * Fill in "ps_strings" struct for ps, w, etc.
372 	 */
373 	suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp);
374 	suword(&arginfo->ps_nargvstr, argc);
375 
376 	/*
377 	 * Fill in argument portion of vector table.
378 	 */
379 	for (; argc > 0; --argc) {
380 		suword(vectp++, (long)(intptr_t)destp);
381 		while (*stringp++ != 0)
382 			destp++;
383 		destp++;
384 	}
385 
386 	/* a null vector table pointer separates the argp's from the envp's */
387 	suword(vectp++, 0);
388 
389 	suword(&arginfo->ps_envstr, (long)(intptr_t)vectp);
390 	suword(&arginfo->ps_nenvstr, envc);
391 
392 	/*
393 	 * Fill in environment portion of vector table.
394 	 */
395 	for (; envc > 0; --envc) {
396 		suword(vectp++, (long)(intptr_t)destp);
397 		while (*stringp++ != 0)
398 			destp++;
399 		destp++;
400 	}
401 
402 	/* end of vector table is a null pointer */
403 	suword(vectp, 0);
404 
405 	return (stack_base);
406 }
407 
408 
409 
410 extern int _ucodesel, _udatasel;
411 extern unsigned long linux_sznonrtsigcode;
412 
413 static void
414 linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
415 {
416 	struct thread *td = curthread;
417 	struct proc *p = td->td_proc;
418 	struct sigacts *psp;
419 	struct trapframe *regs;
420 	struct l_rt_sigframe *fp, frame;
421 	int sig, code;
422 	int oonstack;
423 
424 	sig = ksi->ksi_signo;
425 	code = ksi->ksi_code;
426 	PROC_LOCK_ASSERT(p, MA_OWNED);
427 	psp = p->p_sigacts;
428 	mtx_assert(&psp->ps_mtx, MA_OWNED);
429 	regs = td->td_frame;
430 	oonstack = sigonstack(regs->tf_esp);
431 
432 #ifdef DEBUG
433 	if (ldebug(rt_sendsig))
434 		printf(ARGS(rt_sendsig, "%p, %d, %p, %u"),
435 		    catcher, sig, (void*)mask, code);
436 #endif
437 	/*
438 	 * Allocate space for the signal handler context.
439 	 */
440 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
441 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
442 		fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
443 		    td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
444 	} else
445 		fp = (struct l_rt_sigframe *)regs->tf_esp - 1;
446 	mtx_unlock(&psp->ps_mtx);
447 
448 	/*
449 	 * Build the argument list for the signal handler.
450 	 */
451 	if (p->p_sysent->sv_sigtbl)
452 		if (sig <= p->p_sysent->sv_sigsize)
453 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
454 
455 	bzero(&frame, sizeof(frame));
456 
457 	frame.sf_handler = catcher;
458 	frame.sf_sig = sig;
459 	frame.sf_siginfo = &fp->sf_si;
460 	frame.sf_ucontext = &fp->sf_sc;
461 
462 	/* Fill in POSIX parts */
463 	ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig);
464 
465 	/*
466 	 * Build the signal context to be used by sigreturn.
467 	 */
468 	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
469 	frame.sf_sc.uc_link = NULL;		/* XXX ??? */
470 
471 	frame.sf_sc.uc_stack.ss_sp = td->td_sigstk.ss_sp;
472 	frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
473 	frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
474 	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
475 	PROC_UNLOCK(p);
476 
477 	bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
478 
479 	frame.sf_sc.uc_mcontext.sc_mask   = frame.sf_sc.uc_sigmask.__bits[0];
480 	frame.sf_sc.uc_mcontext.sc_gs     = rgs();
481 	frame.sf_sc.uc_mcontext.sc_fs     = regs->tf_fs;
482 	frame.sf_sc.uc_mcontext.sc_es     = regs->tf_es;
483 	frame.sf_sc.uc_mcontext.sc_ds     = regs->tf_ds;
484 	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_edi;
485 	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_esi;
486 	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_ebp;
487 	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_ebx;
488 	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_edx;
489 	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_ecx;
490 	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_eax;
491 	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_eip;
492 	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
493 	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
494 	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
495 	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
496 	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
497 	frame.sf_sc.uc_mcontext.sc_cr2    = (register_t)ksi->ksi_addr;
498 	frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
499 
500 #ifdef DEBUG
501 	if (ldebug(rt_sendsig))
502 		printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
503 		    frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
504 		    td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
505 #endif
506 
507 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
508 		/*
509 		 * Process has trashed its stack; give it an illegal
510 		 * instruction to halt it in its tracks.
511 		 */
512 #ifdef DEBUG
513 		if (ldebug(rt_sendsig))
514 			printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
515 			    fp, oonstack);
516 #endif
517 		PROC_LOCK(p);
518 		sigexit(td, SIGILL);
519 	}
520 
521 	/*
522 	 * Build context to run handler in.
523 	 */
524 	regs->tf_esp = (int)fp;
525 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
526 	    linux_sznonrtsigcode;
527 	regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
528 	regs->tf_cs = _ucodesel;
529 	regs->tf_ds = _udatasel;
530 	regs->tf_es = _udatasel;
531 	regs->tf_fs = _udatasel;
532 	regs->tf_ss = _udatasel;
533 	PROC_LOCK(p);
534 	mtx_lock(&psp->ps_mtx);
535 }
536 
537 
538 /*
539  * Send an interrupt to process.
540  *
541  * Stack is set up to allow sigcode stored
542  * in u. to call routine, followed by kcall
543  * to sigreturn routine below.  After sigreturn
544  * resets the signal mask, the stack, and the
545  * frame pointer, it returns to the user
546  * specified pc, psl.
547  */
548 static void
549 linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
550 {
551 	struct thread *td = curthread;
552 	struct proc *p = td->td_proc;
553 	struct sigacts *psp;
554 	struct trapframe *regs;
555 	struct l_sigframe *fp, frame;
556 	l_sigset_t lmask;
557 	int sig, code;
558 	int oonstack, i;
559 
560 	PROC_LOCK_ASSERT(p, MA_OWNED);
561 	psp = p->p_sigacts;
562 	sig = ksi->ksi_signo;
563 	code = ksi->ksi_code;
564 	mtx_assert(&psp->ps_mtx, MA_OWNED);
565 	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
566 		/* Signal handler installed with SA_SIGINFO. */
567 		linux_rt_sendsig(catcher, ksi, mask);
568 		return;
569 	}
570 	regs = td->td_frame;
571 	oonstack = sigonstack(regs->tf_esp);
572 
573 #ifdef DEBUG
574 	if (ldebug(sendsig))
575 		printf(ARGS(sendsig, "%p, %d, %p, %u"),
576 		    catcher, sig, (void*)mask, code);
577 #endif
578 
579 	/*
580 	 * Allocate space for the signal handler context.
581 	 */
582 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
583 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
584 		fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
585 		    td->td_sigstk.ss_size - sizeof(struct l_sigframe));
586 	} else
587 		fp = (struct l_sigframe *)regs->tf_esp - 1;
588 	mtx_unlock(&psp->ps_mtx);
589 	PROC_UNLOCK(p);
590 
591 	/*
592 	 * Build the argument list for the signal handler.
593 	 */
594 	if (p->p_sysent->sv_sigtbl)
595 		if (sig <= p->p_sysent->sv_sigsize)
596 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
597 
598 	bzero(&frame, sizeof(frame));
599 
600 	frame.sf_handler = catcher;
601 	frame.sf_sig = sig;
602 
603 	bsd_to_linux_sigset(mask, &lmask);
604 
605 	/*
606 	 * Build the signal context to be used by sigreturn.
607 	 */
608 	frame.sf_sc.sc_mask   = lmask.__bits[0];
609 	frame.sf_sc.sc_gs     = rgs();
610 	frame.sf_sc.sc_fs     = regs->tf_fs;
611 	frame.sf_sc.sc_es     = regs->tf_es;
612 	frame.sf_sc.sc_ds     = regs->tf_ds;
613 	frame.sf_sc.sc_edi    = regs->tf_edi;
614 	frame.sf_sc.sc_esi    = regs->tf_esi;
615 	frame.sf_sc.sc_ebp    = regs->tf_ebp;
616 	frame.sf_sc.sc_ebx    = regs->tf_ebx;
617 	frame.sf_sc.sc_edx    = regs->tf_edx;
618 	frame.sf_sc.sc_ecx    = regs->tf_ecx;
619 	frame.sf_sc.sc_eax    = regs->tf_eax;
620 	frame.sf_sc.sc_eip    = regs->tf_eip;
621 	frame.sf_sc.sc_cs     = regs->tf_cs;
622 	frame.sf_sc.sc_eflags = regs->tf_eflags;
623 	frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
624 	frame.sf_sc.sc_ss     = regs->tf_ss;
625 	frame.sf_sc.sc_err    = regs->tf_err;
626 	frame.sf_sc.sc_cr2    = (register_t)ksi->ksi_addr;
627 	frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(ksi->ksi_trapno);
628 
629 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
630 		frame.sf_extramask[i] = lmask.__bits[i+1];
631 
632 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
633 		/*
634 		 * Process has trashed its stack; give it an illegal
635 		 * instruction to halt it in its tracks.
636 		 */
637 		PROC_LOCK(p);
638 		sigexit(td, SIGILL);
639 	}
640 
641 	/*
642 	 * Build context to run handler in.
643 	 */
644 	regs->tf_esp = (int)fp;
645 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
646 	regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
647 	regs->tf_cs = _ucodesel;
648 	regs->tf_ds = _udatasel;
649 	regs->tf_es = _udatasel;
650 	regs->tf_fs = _udatasel;
651 	regs->tf_ss = _udatasel;
652 	PROC_LOCK(p);
653 	mtx_lock(&psp->ps_mtx);
654 }
655 
656 /*
657  * System call to cleanup state after a signal
658  * has been taken.  Reset signal mask and
659  * stack state from context left by sendsig (above).
660  * Return to previous pc and psl as specified by
661  * context left by sendsig. Check carefully to
662  * make sure that the user has not modified the
663  * psl to gain improper privileges or to cause
664  * a machine fault.
665  */
666 int
667 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
668 {
669 	struct proc *p = td->td_proc;
670 	struct l_sigframe frame;
671 	struct trapframe *regs;
672 	l_sigset_t lmask;
673 	int eflags, i;
674 	ksiginfo_t ksi;
675 
676 	regs = td->td_frame;
677 
678 #ifdef DEBUG
679 	if (ldebug(sigreturn))
680 		printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
681 #endif
682 	/*
683 	 * The trampoline code hands us the sigframe.
684 	 * It is unsafe to keep track of it ourselves, in the event that a
685 	 * program jumps out of a signal handler.
686 	 */
687 	if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
688 		return (EFAULT);
689 
690 	/*
691 	 * Check for security violations.
692 	 */
693 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
694 	eflags = frame.sf_sc.sc_eflags;
695 	/*
696 	 * XXX do allow users to change the privileged flag PSL_RF.  The
697 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
698 	 * sometimes set it there too.  tf_eflags is kept in the signal
699 	 * context during signal handling and there is no other place
700 	 * to remember it, so the PSL_RF bit may be corrupted by the
701 	 * signal handler without us knowing.  Corruption of the PSL_RF
702 	 * bit at worst causes one more or one less debugger trap, so
703 	 * allowing it is fairly harmless.
704 	 */
705 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF))
706 		return(EINVAL);
707 
708 	/*
709 	 * Don't allow users to load a valid privileged %cs.  Let the
710 	 * hardware check for invalid selectors, excess privilege in
711 	 * other selectors, invalid %eip's and invalid %esp's.
712 	 */
713 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
714 	if (!CS_SECURE(frame.sf_sc.sc_cs)) {
715 		ksiginfo_init_trap(&ksi);
716 		ksi.ksi_signo = SIGBUS;
717 		ksi.ksi_code = BUS_OBJERR;
718 		ksi.ksi_trapno = T_PROTFLT;
719 		ksi.ksi_addr = (void *)regs->tf_eip;
720 		trapsignal(td, &ksi);
721 		return(EINVAL);
722 	}
723 
724 	lmask.__bits[0] = frame.sf_sc.sc_mask;
725 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
726 		lmask.__bits[i+1] = frame.sf_extramask[i];
727 	PROC_LOCK(p);
728 	linux_to_bsd_sigset(&lmask, &td->td_sigmask);
729 	SIG_CANTMASK(td->td_sigmask);
730 	signotify(td);
731 	PROC_UNLOCK(p);
732 
733 	/*
734 	 * Restore signal context.
735 	 */
736 	/* %gs was restored by the trampoline. */
737 	regs->tf_fs     = frame.sf_sc.sc_fs;
738 	regs->tf_es     = frame.sf_sc.sc_es;
739 	regs->tf_ds     = frame.sf_sc.sc_ds;
740 	regs->tf_edi    = frame.sf_sc.sc_edi;
741 	regs->tf_esi    = frame.sf_sc.sc_esi;
742 	regs->tf_ebp    = frame.sf_sc.sc_ebp;
743 	regs->tf_ebx    = frame.sf_sc.sc_ebx;
744 	regs->tf_edx    = frame.sf_sc.sc_edx;
745 	regs->tf_ecx    = frame.sf_sc.sc_ecx;
746 	regs->tf_eax    = frame.sf_sc.sc_eax;
747 	regs->tf_eip    = frame.sf_sc.sc_eip;
748 	regs->tf_cs     = frame.sf_sc.sc_cs;
749 	regs->tf_eflags = eflags;
750 	regs->tf_esp    = frame.sf_sc.sc_esp_at_signal;
751 	regs->tf_ss     = frame.sf_sc.sc_ss;
752 
753 	return (EJUSTRETURN);
754 }
755 
756 /*
757  * System call to cleanup state after a signal
758  * has been taken.  Reset signal mask and
759  * stack state from context left by rt_sendsig (above).
760  * Return to previous pc and psl as specified by
761  * context left by sendsig. Check carefully to
762  * make sure that the user has not modified the
763  * psl to gain improper privileges or to cause
764  * a machine fault.
765  */
766 int
767 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
768 {
769 	struct proc *p = td->td_proc;
770 	struct l_ucontext uc;
771 	struct l_sigcontext *context;
772 	l_stack_t *lss;
773 	stack_t ss;
774 	struct trapframe *regs;
775 	int eflags;
776 	ksiginfo_t ksi;
777 
778 	regs = td->td_frame;
779 
780 #ifdef DEBUG
781 	if (ldebug(rt_sigreturn))
782 		printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
783 #endif
784 	/*
785 	 * The trampoline code hands us the ucontext.
786 	 * It is unsafe to keep track of it ourselves, in the event that a
787 	 * program jumps out of a signal handler.
788 	 */
789 	if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
790 		return (EFAULT);
791 
792 	context = &uc.uc_mcontext;
793 
794 	/*
795 	 * Check for security violations.
796 	 */
797 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
798 	eflags = context->sc_eflags;
799 	/*
800 	 * XXX do allow users to change the privileged flag PSL_RF.  The
801 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
802 	 * sometimes set it there too.  tf_eflags is kept in the signal
803 	 * context during signal handling and there is no other place
804 	 * to remember it, so the PSL_RF bit may be corrupted by the
805 	 * signal handler without us knowing.  Corruption of the PSL_RF
806 	 * bit at worst causes one more or one less debugger trap, so
807 	 * allowing it is fairly harmless.
808 	 */
809 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF))
810 		return(EINVAL);
811 
812 	/*
813 	 * Don't allow users to load a valid privileged %cs.  Let the
814 	 * hardware check for invalid selectors, excess privilege in
815 	 * other selectors, invalid %eip's and invalid %esp's.
816 	 */
817 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
818 	if (!CS_SECURE(context->sc_cs)) {
819 		ksiginfo_init_trap(&ksi);
820 		ksi.ksi_signo = SIGBUS;
821 		ksi.ksi_code = BUS_OBJERR;
822 		ksi.ksi_trapno = T_PROTFLT;
823 		ksi.ksi_addr = (void *)regs->tf_eip;
824 		trapsignal(td, &ksi);
825 		return(EINVAL);
826 	}
827 
828 	PROC_LOCK(p);
829 	linux_to_bsd_sigset(&uc.uc_sigmask, &td->td_sigmask);
830 	SIG_CANTMASK(td->td_sigmask);
831 	signotify(td);
832 	PROC_UNLOCK(p);
833 
834 	/*
835 	 * Restore signal context
836 	 */
837 	/* %gs was restored by the trampoline. */
838 	regs->tf_fs     = context->sc_fs;
839 	regs->tf_es     = context->sc_es;
840 	regs->tf_ds     = context->sc_ds;
841 	regs->tf_edi    = context->sc_edi;
842 	regs->tf_esi    = context->sc_esi;
843 	regs->tf_ebp    = context->sc_ebp;
844 	regs->tf_ebx    = context->sc_ebx;
845 	regs->tf_edx    = context->sc_edx;
846 	regs->tf_ecx    = context->sc_ecx;
847 	regs->tf_eax    = context->sc_eax;
848 	regs->tf_eip    = context->sc_eip;
849 	regs->tf_cs     = context->sc_cs;
850 	regs->tf_eflags = eflags;
851 	regs->tf_esp    = context->sc_esp_at_signal;
852 	regs->tf_ss     = context->sc_ss;
853 
854 	/*
855 	 * call sigaltstack & ignore results..
856 	 */
857 	lss = &uc.uc_stack;
858 	ss.ss_sp = lss->ss_sp;
859 	ss.ss_size = lss->ss_size;
860 	ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
861 
862 #ifdef DEBUG
863 	if (ldebug(rt_sigreturn))
864 		printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
865 		    ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
866 #endif
867 	(void)kern_sigaltstack(td, &ss, NULL);
868 
869 	return (EJUSTRETURN);
870 }
871 
872 /*
873  * MPSAFE
874  */
875 static void
876 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
877 {
878 	args[0] = tf->tf_ebx;
879 	args[1] = tf->tf_ecx;
880 	args[2] = tf->tf_edx;
881 	args[3] = tf->tf_esi;
882 	args[4] = tf->tf_edi;
883 	args[5] = tf->tf_ebp;	/* Unconfirmed */
884 	*params = NULL;		/* no copyin */
885 }
886 
887 /*
888  * If a linux binary is exec'ing something, try this image activator
889  * first.  We override standard shell script execution in order to
890  * be able to modify the interpreter path.  We only do this if a linux
891  * binary is doing the exec, so we do not create an EXEC module for it.
892  */
893 static int	exec_linux_imgact_try(struct image_params *iparams);
894 
895 static int
896 exec_linux_imgact_try(struct image_params *imgp)
897 {
898     const char *head = (const char *)imgp->image_header;
899     char *rpath;
900     int error = -1, len;
901 
902     /*
903      * The interpreter for shell scripts run from a linux binary needs
904      * to be located in /compat/linux if possible in order to recursively
905      * maintain linux path emulation.
906      */
907     if (((const short *)head)[0] == SHELLMAGIC) {
908 	    /*
909 	     * Run our normal shell image activator.  If it succeeds attempt
910 	     * to use the alternate path for the interpreter.  If an alternate
911 	     * path is found, use our stringspace to store it.
912 	     */
913 	    if ((error = exec_shell_imgact(imgp)) == 0) {
914 		    linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
915 			imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0, AT_FDCWD);
916 		    if (rpath != NULL) {
917 			    len = strlen(rpath) + 1;
918 
919 			    if (len <= MAXSHELLCMDLEN) {
920 				    memcpy(imgp->interpreter_name, rpath, len);
921 			    }
922 			    free(rpath, M_TEMP);
923 		    }
924 	    }
925     }
926     return(error);
927 }
928 
929 /*
930  * exec_setregs may initialize some registers differently than Linux
931  * does, thus potentially confusing Linux binaries. If necessary, we
932  * override the exec_setregs default(s) here.
933  */
934 static void
935 exec_linux_setregs(struct thread *td, u_long entry,
936 		   u_long stack, u_long ps_strings)
937 {
938 	struct pcb *pcb = td->td_pcb;
939 
940 	exec_setregs(td, entry, stack, ps_strings);
941 
942 	/* Linux sets %gs to 0, we default to _udatasel */
943 	pcb->pcb_gs = 0;
944 	load_gs(0);
945 
946 	pcb->pcb_initial_npxcw = __LINUX_NPXCW__;
947 }
948 
949 static void
950 linux_get_machine(const char **dst)
951 {
952 
953 	switch (cpu_class) {
954 	case CPUCLASS_686:
955 		*dst = "i686";
956 		break;
957 	case CPUCLASS_586:
958 		*dst = "i586";
959 		break;
960 	case CPUCLASS_486:
961 		*dst = "i486";
962 		break;
963 	default:
964 		*dst = "i386";
965 	}
966 }
967 
968 struct sysentvec linux_sysvec = {
969 	.sv_size	= LINUX_SYS_MAXSYSCALL,
970 	.sv_table	= linux_sysent,
971 	.sv_mask	= 0,
972 	.sv_sigsize	= LINUX_SIGTBLSZ,
973 	.sv_sigtbl	= bsd_to_linux_signal,
974 	.sv_errsize	= ELAST + 1,
975 	.sv_errtbl	= bsd_to_linux_errno,
976 	.sv_transtrap	= translate_traps,
977 	.sv_fixup	= linux_fixup,
978 	.sv_sendsig	= linux_sendsig,
979 	.sv_sigcode	= linux_sigcode,
980 	.sv_szsigcode	= &linux_szsigcode,
981 	.sv_prepsyscall	= linux_prepsyscall,
982 	.sv_name	= "Linux a.out",
983 	.sv_coredump	= NULL,
984 	.sv_imgact_try	= exec_linux_imgact_try,
985 	.sv_minsigstksz	= LINUX_MINSIGSTKSZ,
986 	.sv_pagesize	= PAGE_SIZE,
987 	.sv_minuser	= VM_MIN_ADDRESS,
988 	.sv_maxuser	= VM_MAXUSER_ADDRESS,
989 	.sv_usrstack	= USRSTACK,
990 	.sv_psstrings	= PS_STRINGS,
991 	.sv_stackprot	= VM_PROT_ALL,
992 	.sv_copyout_strings = exec_copyout_strings,
993 	.sv_setregs	= exec_linux_setregs,
994 	.sv_fixlimit	= NULL,
995 	.sv_maxssiz	= NULL,
996 	.sv_flags	= SV_ABI_LINUX | SV_AOUT | SV_IA32 | SV_ILP32
997 };
998 
999 struct sysentvec elf_linux_sysvec = {
1000 	.sv_size	= LINUX_SYS_MAXSYSCALL,
1001 	.sv_table	= linux_sysent,
1002 	.sv_mask	= 0,
1003 	.sv_sigsize	= LINUX_SIGTBLSZ,
1004 	.sv_sigtbl	= bsd_to_linux_signal,
1005 	.sv_errsize	= ELAST + 1,
1006 	.sv_errtbl	= bsd_to_linux_errno,
1007 	.sv_transtrap	= translate_traps,
1008 	.sv_fixup	= elf_linux_fixup,
1009 	.sv_sendsig	= linux_sendsig,
1010 	.sv_sigcode	= linux_sigcode,
1011 	.sv_szsigcode	= &linux_szsigcode,
1012 	.sv_prepsyscall	= linux_prepsyscall,
1013 	.sv_name	= "Linux ELF",
1014 	.sv_coredump	= elf32_coredump,
1015 	.sv_imgact_try	= exec_linux_imgact_try,
1016 	.sv_minsigstksz	= LINUX_MINSIGSTKSZ,
1017 	.sv_pagesize	= PAGE_SIZE,
1018 	.sv_minuser	= VM_MIN_ADDRESS,
1019 	.sv_maxuser	= VM_MAXUSER_ADDRESS,
1020 	.sv_usrstack	= USRSTACK,
1021 	.sv_psstrings	= PS_STRINGS,
1022 	.sv_stackprot	= VM_PROT_ALL,
1023 	.sv_copyout_strings = linux_copyout_strings,
1024 	.sv_setregs	= exec_linux_setregs,
1025 	.sv_fixlimit	= NULL,
1026 	.sv_maxssiz	= NULL,
1027 	.sv_flags	= SV_ABI_LINUX | SV_IA32 | SV_ILP32
1028 };
1029 
1030 static char GNULINUX_ABI_VENDOR[] = "GNU";
1031 
1032 static Elf_Brandnote linux_brandnote = {
1033 	.hdr.n_namesz	= sizeof(GNULINUX_ABI_VENDOR),
1034 	.hdr.n_descsz	= 16,
1035 	.hdr.n_type	= 1,
1036 	.vendor		= GNULINUX_ABI_VENDOR,
1037 	.flags		= 0
1038 };
1039 
1040 static Elf32_Brandinfo linux_brand = {
1041 	.brand		= ELFOSABI_LINUX,
1042 	.machine	= EM_386,
1043 	.compat_3_brand	= "Linux",
1044 	.emul_path	= "/compat/linux",
1045 	.interp_path	= "/lib/ld-linux.so.1",
1046 	.sysvec		= &elf_linux_sysvec,
1047 	.interp_newpath	= NULL,
1048 	.brand_note	= &linux_brandnote,
1049 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1050 };
1051 
1052 static Elf32_Brandinfo linux_glibc2brand = {
1053 	.brand		= ELFOSABI_LINUX,
1054 	.machine	= EM_386,
1055 	.compat_3_brand	= "Linux",
1056 	.emul_path	= "/compat/linux",
1057 	.interp_path	= "/lib/ld-linux.so.2",
1058 	.sysvec		= &elf_linux_sysvec,
1059 	.interp_newpath	= NULL,
1060 	.brand_note	= &linux_brandnote,
1061 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1062 };
1063 
1064 Elf32_Brandinfo *linux_brandlist[] = {
1065 	&linux_brand,
1066 	&linux_glibc2brand,
1067 	NULL
1068 };
1069 
1070 static int
1071 linux_elf_modevent(module_t mod, int type, void *data)
1072 {
1073 	Elf32_Brandinfo **brandinfo;
1074 	int error;
1075 	struct linux_ioctl_handler **lihp;
1076 	struct linux_device_handler **ldhp;
1077 
1078 	error = 0;
1079 
1080 	switch(type) {
1081 	case MOD_LOAD:
1082 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1083 		     ++brandinfo)
1084 			if (elf32_insert_brand_entry(*brandinfo) < 0)
1085 				error = EINVAL;
1086 		if (error == 0) {
1087 			SET_FOREACH(lihp, linux_ioctl_handler_set)
1088 				linux_ioctl_register_handler(*lihp);
1089 			SET_FOREACH(ldhp, linux_device_handler_set)
1090 				linux_device_register_handler(*ldhp);
1091 			mtx_init(&emul_lock, "emuldata lock", NULL, MTX_DEF);
1092 			sx_init(&emul_shared_lock, "emuldata->shared lock");
1093 			LIST_INIT(&futex_list);
1094 			mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF);
1095 			linux_exit_tag = EVENTHANDLER_REGISTER(process_exit, linux_proc_exit,
1096 			      NULL, 1000);
1097 			linux_schedtail_tag = EVENTHANDLER_REGISTER(schedtail, linux_schedtail,
1098 			      NULL, 1000);
1099 			linux_exec_tag = EVENTHANDLER_REGISTER(process_exec, linux_proc_exec,
1100 			      NULL, 1000);
1101 			linux_get_machine(&linux_platform);
1102 			linux_szplatform = roundup(strlen(linux_platform) + 1,
1103 			    sizeof(char *));
1104 			linux_osd_jail_register();
1105 			stclohz = (stathz ? stathz : hz);
1106 			if (bootverbose)
1107 				printf("Linux ELF exec handler installed\n");
1108 		} else
1109 			printf("cannot insert Linux ELF brand handler\n");
1110 		break;
1111 	case MOD_UNLOAD:
1112 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1113 		     ++brandinfo)
1114 			if (elf32_brand_inuse(*brandinfo))
1115 				error = EBUSY;
1116 		if (error == 0) {
1117 			for (brandinfo = &linux_brandlist[0];
1118 			     *brandinfo != NULL; ++brandinfo)
1119 				if (elf32_remove_brand_entry(*brandinfo) < 0)
1120 					error = EINVAL;
1121 		}
1122 		if (error == 0) {
1123 			SET_FOREACH(lihp, linux_ioctl_handler_set)
1124 				linux_ioctl_unregister_handler(*lihp);
1125 			SET_FOREACH(ldhp, linux_device_handler_set)
1126 				linux_device_unregister_handler(*ldhp);
1127 			mtx_destroy(&emul_lock);
1128 			sx_destroy(&emul_shared_lock);
1129 			mtx_destroy(&futex_mtx);
1130 			EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag);
1131 			EVENTHANDLER_DEREGISTER(schedtail, linux_schedtail_tag);
1132 			EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag);
1133 			linux_osd_jail_deregister();
1134 			if (bootverbose)
1135 				printf("Linux ELF exec handler removed\n");
1136 		} else
1137 			printf("Could not deinstall ELF interpreter entry\n");
1138 		break;
1139 	default:
1140 		return EOPNOTSUPP;
1141 	}
1142 	return error;
1143 }
1144 
1145 static moduledata_t linux_elf_mod = {
1146 	"linuxelf",
1147 	linux_elf_modevent,
1148 	0
1149 };
1150 
1151 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
1152