xref: /freebsd/sys/i386/linux/linux_sysvec.c (revision efbbe93e563efe4f4ca8ec60fa97aa3d5b087aa3)
1 /*-
2  * Copyright (c) 1994-1996 S�ren Schmidt
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software without specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/exec.h>
35 #include <sys/fcntl.h>
36 #include <sys/imgact.h>
37 #include <sys/imgact_aout.h>
38 #include <sys/imgact_elf.h>
39 #include <sys/kernel.h>
40 #include <sys/lock.h>
41 #include <sys/malloc.h>
42 #include <sys/module.h>
43 #include <sys/mutex.h>
44 #include <sys/proc.h>
45 #include <sys/signalvar.h>
46 #include <sys/syscallsubr.h>
47 #include <sys/sysent.h>
48 #include <sys/sysproto.h>
49 #include <sys/vnode.h>
50 #include <sys/eventhandler.h>
51 
52 #include <vm/vm.h>
53 #include <vm/pmap.h>
54 #include <vm/vm_extern.h>
55 #include <vm/vm_map.h>
56 #include <vm/vm_object.h>
57 #include <vm/vm_page.h>
58 #include <vm/vm_param.h>
59 
60 #include <machine/cpu.h>
61 #include <machine/cputypes.h>
62 #include <machine/md_var.h>
63 #include <machine/pcb.h>
64 
65 #include <i386/linux/linux.h>
66 #include <i386/linux/linux_proto.h>
67 #include <compat/linux/linux_emul.h>
68 #include <compat/linux/linux_mib.h>
69 #include <compat/linux/linux_misc.h>
70 #include <compat/linux/linux_signal.h>
71 #include <compat/linux/linux_util.h>
72 
73 MODULE_VERSION(linux, 1);
74 
75 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
76 
77 #if BYTE_ORDER == LITTLE_ENDIAN
78 #define SHELLMAGIC      0x2123 /* #! */
79 #else
80 #define SHELLMAGIC      0x2321
81 #endif
82 
83 /*
84  * Allow the sendsig functions to use the ldebug() facility
85  * even though they are not syscalls themselves. Map them
86  * to syscall 0. This is slightly less bogus than using
87  * ldebug(sigreturn).
88  */
89 #define	LINUX_SYS_linux_rt_sendsig	0
90 #define	LINUX_SYS_linux_sendsig		0
91 
92 extern char linux_sigcode[];
93 extern int linux_szsigcode;
94 
95 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
96 
97 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
98 SET_DECLARE(linux_device_handler_set, struct linux_device_handler);
99 
100 static int	linux_fixup(register_t **stack_base,
101 		    struct image_params *iparams);
102 static int	elf_linux_fixup(register_t **stack_base,
103 		    struct image_params *iparams);
104 static void	linux_prepsyscall(struct trapframe *tf, int *args, u_int *code,
105 		    caddr_t *params);
106 static void     linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
107 static void	exec_linux_setregs(struct thread *td, u_long entry,
108 				   u_long stack, u_long ps_strings);
109 static register_t *linux_copyout_strings(struct image_params *imgp);
110 
111 static int linux_szplatform;
112 const char *linux_platform;
113 
114 extern LIST_HEAD(futex_list, futex) futex_list;
115 extern struct sx futex_sx;
116 
117 static eventhandler_tag linux_exit_tag;
118 static eventhandler_tag linux_schedtail_tag;
119 static eventhandler_tag linux_exec_tag;
120 
121 /*
122  * Linux syscalls return negative errno's, we do positive and map them
123  * Reference:
124  *   FreeBSD: src/sys/sys/errno.h
125  *   Linux:   linux-2.6.17.8/include/asm-generic/errno-base.h
126  *            linux-2.6.17.8/include/asm-generic/errno.h
127  */
128 static int bsd_to_linux_errno[ELAST + 1] = {
129 	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
130 	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
131 	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
132 	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
133 	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
134 	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
135 	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
136 	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
137 	  -6,  -6, -43, -42, -75,-125, -84, -95, -16, -74,
138 	 -72, -67, -71
139 };
140 
141 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
142 	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
143 	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
144 	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
145 	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
146 	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
147 	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
148 	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
149 	0, LINUX_SIGUSR1, LINUX_SIGUSR2
150 };
151 
152 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
153 	SIGHUP, SIGINT, SIGQUIT, SIGILL,
154 	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
155 	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
156 	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
157 	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
158 	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
159 	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
160 	SIGIO, SIGURG, SIGSYS
161 };
162 
163 #define LINUX_T_UNKNOWN  255
164 static int _bsd_to_linux_trapcode[] = {
165 	LINUX_T_UNKNOWN,	/* 0 */
166 	6,			/* 1  T_PRIVINFLT */
167 	LINUX_T_UNKNOWN,	/* 2 */
168 	3,			/* 3  T_BPTFLT */
169 	LINUX_T_UNKNOWN,	/* 4 */
170 	LINUX_T_UNKNOWN,	/* 5 */
171 	16,			/* 6  T_ARITHTRAP */
172 	254,			/* 7  T_ASTFLT */
173 	LINUX_T_UNKNOWN,	/* 8 */
174 	13,			/* 9  T_PROTFLT */
175 	1,			/* 10 T_TRCTRAP */
176 	LINUX_T_UNKNOWN,	/* 11 */
177 	14,			/* 12 T_PAGEFLT */
178 	LINUX_T_UNKNOWN,	/* 13 */
179 	17,			/* 14 T_ALIGNFLT */
180 	LINUX_T_UNKNOWN,	/* 15 */
181 	LINUX_T_UNKNOWN,	/* 16 */
182 	LINUX_T_UNKNOWN,	/* 17 */
183 	0,			/* 18 T_DIVIDE */
184 	2,			/* 19 T_NMI */
185 	4,			/* 20 T_OFLOW */
186 	5,			/* 21 T_BOUND */
187 	7,			/* 22 T_DNA */
188 	8,			/* 23 T_DOUBLEFLT */
189 	9,			/* 24 T_FPOPFLT */
190 	10,			/* 25 T_TSSFLT */
191 	11,			/* 26 T_SEGNPFLT */
192 	12,			/* 27 T_STKFLT */
193 	18,			/* 28 T_MCHK */
194 	19,			/* 29 T_XMMFLT */
195 	15			/* 30 T_RESERVED */
196 };
197 #define bsd_to_linux_trapcode(code) \
198     ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
199      _bsd_to_linux_trapcode[(code)]: \
200      LINUX_T_UNKNOWN)
201 
202 /*
203  * If FreeBSD & Linux have a difference of opinion about what a trap
204  * means, deal with it here.
205  *
206  * MPSAFE
207  */
208 static int
209 translate_traps(int signal, int trap_code)
210 {
211 	if (signal != SIGBUS)
212 		return signal;
213 	switch (trap_code) {
214 	case T_PROTFLT:
215 	case T_TSSFLT:
216 	case T_DOUBLEFLT:
217 	case T_PAGEFLT:
218 		return SIGSEGV;
219 	default:
220 		return signal;
221 	}
222 }
223 
224 static int
225 linux_fixup(register_t **stack_base, struct image_params *imgp)
226 {
227 	register_t *argv, *envp;
228 
229 	argv = *stack_base;
230 	envp = *stack_base + (imgp->args->argc + 1);
231 	(*stack_base)--;
232 	**stack_base = (intptr_t)(void *)envp;
233 	(*stack_base)--;
234 	**stack_base = (intptr_t)(void *)argv;
235 	(*stack_base)--;
236 	**stack_base = imgp->args->argc;
237 	return (0);
238 }
239 
240 static int
241 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
242 {
243 	struct proc *p;
244 	Elf32_Auxargs *args;
245 	Elf32_Addr *uplatform;
246 	struct ps_strings *arginfo;
247 	register_t *pos;
248 
249 	KASSERT(curthread->td_proc == imgp->proc,
250 	    ("unsafe elf_linux_fixup(), should be curproc"));
251 
252 	p = imgp->proc;
253 	arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
254 	uplatform = (Elf32_Addr *)((caddr_t)arginfo - linux_szsigcode -
255 	    linux_szplatform);
256 	args = (Elf32_Auxargs *)imgp->auxargs;
257 	pos = *stack_base + (imgp->args->argc + imgp->args->envc + 2);
258 
259 	AUXARGS_ENTRY(pos, LINUX_AT_HWCAP, cpu_feature);
260 	AUXARGS_ENTRY(pos, LINUX_AT_CLKTCK, hz);
261 	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
262 	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
263 	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
264 	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
265 	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
266 	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
267 	AUXARGS_ENTRY(pos, AT_BASE, args->base);
268 	AUXARGS_ENTRY(pos, LINUX_AT_SECURE, 0);
269 	AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
270 	AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
271 	AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
272 	AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
273 	AUXARGS_ENTRY(pos, LINUX_AT_PLATFORM, PTROUT(uplatform));
274 	if (args->execfd != -1)
275 		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
276 	AUXARGS_ENTRY(pos, AT_NULL, 0);
277 
278 	free(imgp->auxargs, M_TEMP);
279 	imgp->auxargs = NULL;
280 
281 	(*stack_base)--;
282 	**stack_base = (register_t)imgp->args->argc;
283 	return (0);
284 }
285 
286 /*
287  * Copied from kern/kern_exec.c
288  */
289 static register_t *
290 linux_copyout_strings(struct image_params *imgp)
291 {
292 	int argc, envc;
293 	char **vectp;
294 	char *stringp, *destp;
295 	register_t *stack_base;
296 	struct ps_strings *arginfo;
297 	struct proc *p;
298 
299 	/*
300 	 * Calculate string base and vector table pointers.
301 	 * Also deal with signal trampoline code for this exec type.
302 	 */
303 	p = imgp->proc;
304 	arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
305 	destp = (caddr_t)arginfo - linux_szsigcode - SPARE_USRSPACE -
306 	    linux_szplatform - roundup((ARG_MAX - imgp->args->stringspace),
307 	    sizeof(char *));
308 
309 	/*
310 	 * install sigcode
311 	 */
312 	copyout(p->p_sysent->sv_sigcode, ((caddr_t)arginfo -
313 	    linux_szsigcode), linux_szsigcode);
314 
315 	/*
316 	 * install LINUX_PLATFORM
317 	 */
318 	copyout(linux_platform, ((caddr_t)arginfo - linux_szsigcode -
319 	    linux_szplatform), linux_szplatform);
320 
321 	/*
322 	 * If we have a valid auxargs ptr, prepare some room
323 	 * on the stack.
324 	 */
325 	if (imgp->auxargs) {
326 		/*
327 		 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
328 		 * lower compatibility.
329 		 */
330 		imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size :
331 		    (LINUX_AT_COUNT * 2);
332 		/*
333 		 * The '+ 2' is for the null pointers at the end of each of
334 		 * the arg and env vector sets,and imgp->auxarg_size is room
335 		 * for argument of Runtime loader.
336 		 */
337 		vectp = (char **)(destp - (imgp->args->argc +
338 		    imgp->args->envc + 2 + imgp->auxarg_size) * sizeof(char *));
339 	} else {
340 		/*
341 		 * The '+ 2' is for the null pointers at the end of each of
342 		 * the arg and env vector sets
343 		 */
344 		vectp = (char **)(destp - (imgp->args->argc + imgp->args->envc + 2) *
345 		    sizeof(char *));
346 	}
347 
348 	/*
349 	 * vectp also becomes our initial stack base
350 	 */
351 	stack_base = (register_t *)vectp;
352 
353 	stringp = imgp->args->begin_argv;
354 	argc = imgp->args->argc;
355 	envc = imgp->args->envc;
356 
357 	/*
358 	 * Copy out strings - arguments and environment.
359 	 */
360 	copyout(stringp, destp, ARG_MAX - imgp->args->stringspace);
361 
362 	/*
363 	 * Fill in "ps_strings" struct for ps, w, etc.
364 	 */
365 	suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp);
366 	suword(&arginfo->ps_nargvstr, argc);
367 
368 	/*
369 	 * Fill in argument portion of vector table.
370 	 */
371 	for (; argc > 0; --argc) {
372 		suword(vectp++, (long)(intptr_t)destp);
373 		while (*stringp++ != 0)
374 			destp++;
375 		destp++;
376 	}
377 
378 	/* a null vector table pointer separates the argp's from the envp's */
379 	suword(vectp++, 0);
380 
381 	suword(&arginfo->ps_envstr, (long)(intptr_t)vectp);
382 	suword(&arginfo->ps_nenvstr, envc);
383 
384 	/*
385 	 * Fill in environment portion of vector table.
386 	 */
387 	for (; envc > 0; --envc) {
388 		suword(vectp++, (long)(intptr_t)destp);
389 		while (*stringp++ != 0)
390 			destp++;
391 		destp++;
392 	}
393 
394 	/* end of vector table is a null pointer */
395 	suword(vectp, 0);
396 
397 	return (stack_base);
398 }
399 
400 
401 
402 extern int _ucodesel, _udatasel;
403 extern unsigned long linux_sznonrtsigcode;
404 
405 static void
406 linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
407 {
408 	struct thread *td = curthread;
409 	struct proc *p = td->td_proc;
410 	struct sigacts *psp;
411 	struct trapframe *regs;
412 	struct l_rt_sigframe *fp, frame;
413 	int sig, code;
414 	int oonstack;
415 
416 	sig = ksi->ksi_signo;
417 	code = ksi->ksi_code;
418 	PROC_LOCK_ASSERT(p, MA_OWNED);
419 	psp = p->p_sigacts;
420 	mtx_assert(&psp->ps_mtx, MA_OWNED);
421 	regs = td->td_frame;
422 	oonstack = sigonstack(regs->tf_esp);
423 
424 #ifdef DEBUG
425 	if (ldebug(rt_sendsig))
426 		printf(ARGS(rt_sendsig, "%p, %d, %p, %u"),
427 		    catcher, sig, (void*)mask, code);
428 #endif
429 	/*
430 	 * Allocate space for the signal handler context.
431 	 */
432 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
433 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
434 		fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
435 		    td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
436 	} else
437 		fp = (struct l_rt_sigframe *)regs->tf_esp - 1;
438 	mtx_unlock(&psp->ps_mtx);
439 
440 	/*
441 	 * Build the argument list for the signal handler.
442 	 */
443 	if (p->p_sysent->sv_sigtbl)
444 		if (sig <= p->p_sysent->sv_sigsize)
445 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
446 
447 	bzero(&frame, sizeof(frame));
448 
449 	frame.sf_handler = catcher;
450 	frame.sf_sig = sig;
451 	frame.sf_siginfo = &fp->sf_si;
452 	frame.sf_ucontext = &fp->sf_sc;
453 
454 	/* Fill in POSIX parts */
455 	ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig);
456 
457 	/*
458 	 * Build the signal context to be used by sigreturn.
459 	 */
460 	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
461 	frame.sf_sc.uc_link = NULL;		/* XXX ??? */
462 
463 	frame.sf_sc.uc_stack.ss_sp = td->td_sigstk.ss_sp;
464 	frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
465 	frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
466 	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
467 	PROC_UNLOCK(p);
468 
469 	bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
470 
471 	frame.sf_sc.uc_mcontext.sc_mask   = frame.sf_sc.uc_sigmask.__bits[0];
472 	frame.sf_sc.uc_mcontext.sc_gs     = rgs();
473 	frame.sf_sc.uc_mcontext.sc_fs     = regs->tf_fs;
474 	frame.sf_sc.uc_mcontext.sc_es     = regs->tf_es;
475 	frame.sf_sc.uc_mcontext.sc_ds     = regs->tf_ds;
476 	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_edi;
477 	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_esi;
478 	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_ebp;
479 	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_ebx;
480 	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_edx;
481 	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_ecx;
482 	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_eax;
483 	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_eip;
484 	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
485 	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
486 	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
487 	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
488 	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
489 	frame.sf_sc.uc_mcontext.sc_cr2    = (register_t)ksi->ksi_addr;
490 	frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
491 
492 #ifdef DEBUG
493 	if (ldebug(rt_sendsig))
494 		printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
495 		    frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
496 		    td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
497 #endif
498 
499 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
500 		/*
501 		 * Process has trashed its stack; give it an illegal
502 		 * instruction to halt it in its tracks.
503 		 */
504 #ifdef DEBUG
505 		if (ldebug(rt_sendsig))
506 			printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
507 			    fp, oonstack);
508 #endif
509 		PROC_LOCK(p);
510 		sigexit(td, SIGILL);
511 	}
512 
513 	/*
514 	 * Build context to run handler in.
515 	 */
516 	regs->tf_esp = (int)fp;
517 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
518 	    linux_sznonrtsigcode;
519 	regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
520 	regs->tf_cs = _ucodesel;
521 	regs->tf_ds = _udatasel;
522 	regs->tf_es = _udatasel;
523 	regs->tf_fs = _udatasel;
524 	regs->tf_ss = _udatasel;
525 	PROC_LOCK(p);
526 	mtx_lock(&psp->ps_mtx);
527 }
528 
529 
530 /*
531  * Send an interrupt to process.
532  *
533  * Stack is set up to allow sigcode stored
534  * in u. to call routine, followed by kcall
535  * to sigreturn routine below.  After sigreturn
536  * resets the signal mask, the stack, and the
537  * frame pointer, it returns to the user
538  * specified pc, psl.
539  */
540 static void
541 linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
542 {
543 	struct thread *td = curthread;
544 	struct proc *p = td->td_proc;
545 	struct sigacts *psp;
546 	struct trapframe *regs;
547 	struct l_sigframe *fp, frame;
548 	l_sigset_t lmask;
549 	int sig, code;
550 	int oonstack, i;
551 
552 	PROC_LOCK_ASSERT(p, MA_OWNED);
553 	psp = p->p_sigacts;
554 	sig = ksi->ksi_signo;
555 	code = ksi->ksi_code;
556 	mtx_assert(&psp->ps_mtx, MA_OWNED);
557 	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
558 		/* Signal handler installed with SA_SIGINFO. */
559 		linux_rt_sendsig(catcher, ksi, mask);
560 		return;
561 	}
562 	regs = td->td_frame;
563 	oonstack = sigonstack(regs->tf_esp);
564 
565 #ifdef DEBUG
566 	if (ldebug(sendsig))
567 		printf(ARGS(sendsig, "%p, %d, %p, %u"),
568 		    catcher, sig, (void*)mask, code);
569 #endif
570 
571 	/*
572 	 * Allocate space for the signal handler context.
573 	 */
574 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
575 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
576 		fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
577 		    td->td_sigstk.ss_size - sizeof(struct l_sigframe));
578 	} else
579 		fp = (struct l_sigframe *)regs->tf_esp - 1;
580 	mtx_unlock(&psp->ps_mtx);
581 	PROC_UNLOCK(p);
582 
583 	/*
584 	 * Build the argument list for the signal handler.
585 	 */
586 	if (p->p_sysent->sv_sigtbl)
587 		if (sig <= p->p_sysent->sv_sigsize)
588 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
589 
590 	bzero(&frame, sizeof(frame));
591 
592 	frame.sf_handler = catcher;
593 	frame.sf_sig = sig;
594 
595 	bsd_to_linux_sigset(mask, &lmask);
596 
597 	/*
598 	 * Build the signal context to be used by sigreturn.
599 	 */
600 	frame.sf_sc.sc_mask   = lmask.__bits[0];
601 	frame.sf_sc.sc_gs     = rgs();
602 	frame.sf_sc.sc_fs     = regs->tf_fs;
603 	frame.sf_sc.sc_es     = regs->tf_es;
604 	frame.sf_sc.sc_ds     = regs->tf_ds;
605 	frame.sf_sc.sc_edi    = regs->tf_edi;
606 	frame.sf_sc.sc_esi    = regs->tf_esi;
607 	frame.sf_sc.sc_ebp    = regs->tf_ebp;
608 	frame.sf_sc.sc_ebx    = regs->tf_ebx;
609 	frame.sf_sc.sc_edx    = regs->tf_edx;
610 	frame.sf_sc.sc_ecx    = regs->tf_ecx;
611 	frame.sf_sc.sc_eax    = regs->tf_eax;
612 	frame.sf_sc.sc_eip    = regs->tf_eip;
613 	frame.sf_sc.sc_cs     = regs->tf_cs;
614 	frame.sf_sc.sc_eflags = regs->tf_eflags;
615 	frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
616 	frame.sf_sc.sc_ss     = regs->tf_ss;
617 	frame.sf_sc.sc_err    = regs->tf_err;
618 	frame.sf_sc.sc_cr2    = (register_t)ksi->ksi_addr;
619 	frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(ksi->ksi_trapno);
620 
621 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
622 		frame.sf_extramask[i] = lmask.__bits[i+1];
623 
624 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
625 		/*
626 		 * Process has trashed its stack; give it an illegal
627 		 * instruction to halt it in its tracks.
628 		 */
629 		PROC_LOCK(p);
630 		sigexit(td, SIGILL);
631 	}
632 
633 	/*
634 	 * Build context to run handler in.
635 	 */
636 	regs->tf_esp = (int)fp;
637 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
638 	regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
639 	regs->tf_cs = _ucodesel;
640 	regs->tf_ds = _udatasel;
641 	regs->tf_es = _udatasel;
642 	regs->tf_fs = _udatasel;
643 	regs->tf_ss = _udatasel;
644 	PROC_LOCK(p);
645 	mtx_lock(&psp->ps_mtx);
646 }
647 
648 /*
649  * System call to cleanup state after a signal
650  * has been taken.  Reset signal mask and
651  * stack state from context left by sendsig (above).
652  * Return to previous pc and psl as specified by
653  * context left by sendsig. Check carefully to
654  * make sure that the user has not modified the
655  * psl to gain improper privileges or to cause
656  * a machine fault.
657  */
658 int
659 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
660 {
661 	struct proc *p = td->td_proc;
662 	struct l_sigframe frame;
663 	struct trapframe *regs;
664 	l_sigset_t lmask;
665 	int eflags, i;
666 	ksiginfo_t ksi;
667 
668 	regs = td->td_frame;
669 
670 #ifdef DEBUG
671 	if (ldebug(sigreturn))
672 		printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
673 #endif
674 	/*
675 	 * The trampoline code hands us the sigframe.
676 	 * It is unsafe to keep track of it ourselves, in the event that a
677 	 * program jumps out of a signal handler.
678 	 */
679 	if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
680 		return (EFAULT);
681 
682 	/*
683 	 * Check for security violations.
684 	 */
685 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
686 	eflags = frame.sf_sc.sc_eflags;
687 	/*
688 	 * XXX do allow users to change the privileged flag PSL_RF.  The
689 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
690 	 * sometimes set it there too.  tf_eflags is kept in the signal
691 	 * context during signal handling and there is no other place
692 	 * to remember it, so the PSL_RF bit may be corrupted by the
693 	 * signal handler without us knowing.  Corruption of the PSL_RF
694 	 * bit at worst causes one more or one less debugger trap, so
695 	 * allowing it is fairly harmless.
696 	 */
697 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF))
698 		return(EINVAL);
699 
700 	/*
701 	 * Don't allow users to load a valid privileged %cs.  Let the
702 	 * hardware check for invalid selectors, excess privilege in
703 	 * other selectors, invalid %eip's and invalid %esp's.
704 	 */
705 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
706 	if (!CS_SECURE(frame.sf_sc.sc_cs)) {
707 		ksiginfo_init_trap(&ksi);
708 		ksi.ksi_signo = SIGBUS;
709 		ksi.ksi_code = BUS_OBJERR;
710 		ksi.ksi_trapno = T_PROTFLT;
711 		ksi.ksi_addr = (void *)regs->tf_eip;
712 		trapsignal(td, &ksi);
713 		return(EINVAL);
714 	}
715 
716 	lmask.__bits[0] = frame.sf_sc.sc_mask;
717 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
718 		lmask.__bits[i+1] = frame.sf_extramask[i];
719 	PROC_LOCK(p);
720 	linux_to_bsd_sigset(&lmask, &td->td_sigmask);
721 	SIG_CANTMASK(td->td_sigmask);
722 	signotify(td);
723 	PROC_UNLOCK(p);
724 
725 	/*
726 	 * Restore signal context.
727 	 */
728 	/* %gs was restored by the trampoline. */
729 	regs->tf_fs     = frame.sf_sc.sc_fs;
730 	regs->tf_es     = frame.sf_sc.sc_es;
731 	regs->tf_ds     = frame.sf_sc.sc_ds;
732 	regs->tf_edi    = frame.sf_sc.sc_edi;
733 	regs->tf_esi    = frame.sf_sc.sc_esi;
734 	regs->tf_ebp    = frame.sf_sc.sc_ebp;
735 	regs->tf_ebx    = frame.sf_sc.sc_ebx;
736 	regs->tf_edx    = frame.sf_sc.sc_edx;
737 	regs->tf_ecx    = frame.sf_sc.sc_ecx;
738 	regs->tf_eax    = frame.sf_sc.sc_eax;
739 	regs->tf_eip    = frame.sf_sc.sc_eip;
740 	regs->tf_cs     = frame.sf_sc.sc_cs;
741 	regs->tf_eflags = eflags;
742 	regs->tf_esp    = frame.sf_sc.sc_esp_at_signal;
743 	regs->tf_ss     = frame.sf_sc.sc_ss;
744 
745 	return (EJUSTRETURN);
746 }
747 
748 /*
749  * System call to cleanup state after a signal
750  * has been taken.  Reset signal mask and
751  * stack state from context left by rt_sendsig (above).
752  * Return to previous pc and psl as specified by
753  * context left by sendsig. Check carefully to
754  * make sure that the user has not modified the
755  * psl to gain improper privileges or to cause
756  * a machine fault.
757  */
758 int
759 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
760 {
761 	struct proc *p = td->td_proc;
762 	struct l_ucontext uc;
763 	struct l_sigcontext *context;
764 	l_stack_t *lss;
765 	stack_t ss;
766 	struct trapframe *regs;
767 	int eflags;
768 	ksiginfo_t ksi;
769 
770 	regs = td->td_frame;
771 
772 #ifdef DEBUG
773 	if (ldebug(rt_sigreturn))
774 		printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
775 #endif
776 	/*
777 	 * The trampoline code hands us the ucontext.
778 	 * It is unsafe to keep track of it ourselves, in the event that a
779 	 * program jumps out of a signal handler.
780 	 */
781 	if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
782 		return (EFAULT);
783 
784 	context = &uc.uc_mcontext;
785 
786 	/*
787 	 * Check for security violations.
788 	 */
789 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
790 	eflags = context->sc_eflags;
791 	/*
792 	 * XXX do allow users to change the privileged flag PSL_RF.  The
793 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
794 	 * sometimes set it there too.  tf_eflags is kept in the signal
795 	 * context during signal handling and there is no other place
796 	 * to remember it, so the PSL_RF bit may be corrupted by the
797 	 * signal handler without us knowing.  Corruption of the PSL_RF
798 	 * bit at worst causes one more or one less debugger trap, so
799 	 * allowing it is fairly harmless.
800 	 */
801 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF))
802 		return(EINVAL);
803 
804 	/*
805 	 * Don't allow users to load a valid privileged %cs.  Let the
806 	 * hardware check for invalid selectors, excess privilege in
807 	 * other selectors, invalid %eip's and invalid %esp's.
808 	 */
809 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
810 	if (!CS_SECURE(context->sc_cs)) {
811 		ksiginfo_init_trap(&ksi);
812 		ksi.ksi_signo = SIGBUS;
813 		ksi.ksi_code = BUS_OBJERR;
814 		ksi.ksi_trapno = T_PROTFLT;
815 		ksi.ksi_addr = (void *)regs->tf_eip;
816 		trapsignal(td, &ksi);
817 		return(EINVAL);
818 	}
819 
820 	PROC_LOCK(p);
821 	linux_to_bsd_sigset(&uc.uc_sigmask, &td->td_sigmask);
822 	SIG_CANTMASK(td->td_sigmask);
823 	signotify(td);
824 	PROC_UNLOCK(p);
825 
826 	/*
827 	 * Restore signal context
828 	 */
829 	/* %gs was restored by the trampoline. */
830 	regs->tf_fs     = context->sc_fs;
831 	regs->tf_es     = context->sc_es;
832 	regs->tf_ds     = context->sc_ds;
833 	regs->tf_edi    = context->sc_edi;
834 	regs->tf_esi    = context->sc_esi;
835 	regs->tf_ebp    = context->sc_ebp;
836 	regs->tf_ebx    = context->sc_ebx;
837 	regs->tf_edx    = context->sc_edx;
838 	regs->tf_ecx    = context->sc_ecx;
839 	regs->tf_eax    = context->sc_eax;
840 	regs->tf_eip    = context->sc_eip;
841 	regs->tf_cs     = context->sc_cs;
842 	regs->tf_eflags = eflags;
843 	regs->tf_esp    = context->sc_esp_at_signal;
844 	regs->tf_ss     = context->sc_ss;
845 
846 	/*
847 	 * call sigaltstack & ignore results..
848 	 */
849 	lss = &uc.uc_stack;
850 	ss.ss_sp = lss->ss_sp;
851 	ss.ss_size = lss->ss_size;
852 	ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
853 
854 #ifdef DEBUG
855 	if (ldebug(rt_sigreturn))
856 		printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
857 		    ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
858 #endif
859 	(void)kern_sigaltstack(td, &ss, NULL);
860 
861 	return (EJUSTRETURN);
862 }
863 
864 /*
865  * MPSAFE
866  */
867 static void
868 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
869 {
870 	args[0] = tf->tf_ebx;
871 	args[1] = tf->tf_ecx;
872 	args[2] = tf->tf_edx;
873 	args[3] = tf->tf_esi;
874 	args[4] = tf->tf_edi;
875 	args[5] = tf->tf_ebp;	/* Unconfirmed */
876 	*params = NULL;		/* no copyin */
877 }
878 
879 /*
880  * If a linux binary is exec'ing something, try this image activator
881  * first.  We override standard shell script execution in order to
882  * be able to modify the interpreter path.  We only do this if a linux
883  * binary is doing the exec, so we do not create an EXEC module for it.
884  */
885 static int	exec_linux_imgact_try(struct image_params *iparams);
886 
887 static int
888 exec_linux_imgact_try(struct image_params *imgp)
889 {
890     const char *head = (const char *)imgp->image_header;
891     char *rpath;
892     int error = -1, len;
893 
894     /*
895      * The interpreter for shell scripts run from a linux binary needs
896      * to be located in /compat/linux if possible in order to recursively
897      * maintain linux path emulation.
898      */
899     if (((const short *)head)[0] == SHELLMAGIC) {
900 	    /*
901 	     * Run our normal shell image activator.  If it succeeds attempt
902 	     * to use the alternate path for the interpreter.  If an alternate
903 	     * path is found, use our stringspace to store it.
904 	     */
905 	    if ((error = exec_shell_imgact(imgp)) == 0) {
906 		    linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
907 			imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0, AT_FDCWD);
908 		    if (rpath != NULL) {
909 			    len = strlen(rpath) + 1;
910 
911 			    if (len <= MAXSHELLCMDLEN) {
912 				    memcpy(imgp->interpreter_name, rpath, len);
913 			    }
914 			    free(rpath, M_TEMP);
915 		    }
916 	    }
917     }
918     return(error);
919 }
920 
921 /*
922  * exec_setregs may initialize some registers differently than Linux
923  * does, thus potentially confusing Linux binaries. If necessary, we
924  * override the exec_setregs default(s) here.
925  */
926 static void
927 exec_linux_setregs(struct thread *td, u_long entry,
928 		   u_long stack, u_long ps_strings)
929 {
930 	struct pcb *pcb = td->td_pcb;
931 
932 	exec_setregs(td, entry, stack, ps_strings);
933 
934 	/* Linux sets %gs to 0, we default to _udatasel */
935 	pcb->pcb_gs = 0;
936 	load_gs(0);
937 
938 	pcb->pcb_initial_npxcw = __LINUX_NPXCW__;
939 }
940 
941 static void
942 linux_get_machine(const char **dst)
943 {
944 
945 	switch (cpu_class) {
946 	case CPUCLASS_686:
947 		*dst = "i686";
948 		break;
949 	case CPUCLASS_586:
950 		*dst = "i586";
951 		break;
952 	case CPUCLASS_486:
953 		*dst = "i486";
954 		break;
955 	default:
956 		*dst = "i386";
957 	}
958 }
959 
960 struct sysentvec linux_sysvec = {
961 	.sv_size	= LINUX_SYS_MAXSYSCALL,
962 	.sv_table	= linux_sysent,
963 	.sv_mask	= 0,
964 	.sv_sigsize	= LINUX_SIGTBLSZ,
965 	.sv_sigtbl	= bsd_to_linux_signal,
966 	.sv_errsize	= ELAST + 1,
967 	.sv_errtbl	= bsd_to_linux_errno,
968 	.sv_transtrap	= translate_traps,
969 	.sv_fixup	= linux_fixup,
970 	.sv_sendsig	= linux_sendsig,
971 	.sv_sigcode	= linux_sigcode,
972 	.sv_szsigcode	= &linux_szsigcode,
973 	.sv_prepsyscall	= linux_prepsyscall,
974 	.sv_name	= "Linux a.out",
975 	.sv_coredump	= NULL,
976 	.sv_imgact_try	= exec_linux_imgact_try,
977 	.sv_minsigstksz	= LINUX_MINSIGSTKSZ,
978 	.sv_pagesize	= PAGE_SIZE,
979 	.sv_minuser	= VM_MIN_ADDRESS,
980 	.sv_maxuser	= VM_MAXUSER_ADDRESS,
981 	.sv_usrstack	= USRSTACK,
982 	.sv_psstrings	= PS_STRINGS,
983 	.sv_stackprot	= VM_PROT_ALL,
984 	.sv_copyout_strings = exec_copyout_strings,
985 	.sv_setregs	= exec_linux_setregs,
986 	.sv_fixlimit	= NULL,
987 	.sv_maxssiz	= NULL,
988 	.sv_flags	= SV_ABI_LINUX | SV_AOUT | SV_IA32 | SV_ILP32
989 };
990 
991 struct sysentvec elf_linux_sysvec = {
992 	.sv_size	= LINUX_SYS_MAXSYSCALL,
993 	.sv_table	= linux_sysent,
994 	.sv_mask	= 0,
995 	.sv_sigsize	= LINUX_SIGTBLSZ,
996 	.sv_sigtbl	= bsd_to_linux_signal,
997 	.sv_errsize	= ELAST + 1,
998 	.sv_errtbl	= bsd_to_linux_errno,
999 	.sv_transtrap	= translate_traps,
1000 	.sv_fixup	= elf_linux_fixup,
1001 	.sv_sendsig	= linux_sendsig,
1002 	.sv_sigcode	= linux_sigcode,
1003 	.sv_szsigcode	= &linux_szsigcode,
1004 	.sv_prepsyscall	= linux_prepsyscall,
1005 	.sv_name	= "Linux ELF",
1006 	.sv_coredump	= elf32_coredump,
1007 	.sv_imgact_try	= exec_linux_imgact_try,
1008 	.sv_minsigstksz	= LINUX_MINSIGSTKSZ,
1009 	.sv_pagesize	= PAGE_SIZE,
1010 	.sv_minuser	= VM_MIN_ADDRESS,
1011 	.sv_maxuser	= VM_MAXUSER_ADDRESS,
1012 	.sv_usrstack	= USRSTACK,
1013 	.sv_psstrings	= PS_STRINGS,
1014 	.sv_stackprot	= VM_PROT_ALL,
1015 	.sv_copyout_strings = linux_copyout_strings,
1016 	.sv_setregs	= exec_linux_setregs,
1017 	.sv_fixlimit	= NULL,
1018 	.sv_maxssiz	= NULL,
1019 	.sv_flags	= SV_ABI_LINUX | SV_IA32 | SV_ILP32
1020 };
1021 
1022 static char GNULINUX_ABI_VENDOR[] = "GNU";
1023 
1024 static Elf_Brandnote linux_brandnote = {
1025 	.hdr.n_namesz	= sizeof(GNULINUX_ABI_VENDOR),
1026 	.hdr.n_descsz	= 16,
1027 	.hdr.n_type	= 1,
1028 	.vendor		= GNULINUX_ABI_VENDOR,
1029 	.flags		= 0
1030 };
1031 
1032 static Elf32_Brandinfo linux_brand = {
1033 	.brand		= ELFOSABI_LINUX,
1034 	.machine	= EM_386,
1035 	.compat_3_brand	= "Linux",
1036 	.emul_path	= "/compat/linux",
1037 	.interp_path	= "/lib/ld-linux.so.1",
1038 	.sysvec		= &elf_linux_sysvec,
1039 	.interp_newpath	= NULL,
1040 	.brand_note	= &linux_brandnote,
1041 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1042 };
1043 
1044 static Elf32_Brandinfo linux_glibc2brand = {
1045 	.brand		= ELFOSABI_LINUX,
1046 	.machine	= EM_386,
1047 	.compat_3_brand	= "Linux",
1048 	.emul_path	= "/compat/linux",
1049 	.interp_path	= "/lib/ld-linux.so.2",
1050 	.sysvec		= &elf_linux_sysvec,
1051 	.interp_newpath	= NULL,
1052 	.brand_note	= &linux_brandnote,
1053 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1054 };
1055 
1056 Elf32_Brandinfo *linux_brandlist[] = {
1057 	&linux_brand,
1058 	&linux_glibc2brand,
1059 	NULL
1060 };
1061 
1062 static int
1063 linux_elf_modevent(module_t mod, int type, void *data)
1064 {
1065 	Elf32_Brandinfo **brandinfo;
1066 	int error;
1067 	struct linux_ioctl_handler **lihp;
1068 	struct linux_device_handler **ldhp;
1069 
1070 	error = 0;
1071 
1072 	switch(type) {
1073 	case MOD_LOAD:
1074 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1075 		     ++brandinfo)
1076 			if (elf32_insert_brand_entry(*brandinfo) < 0)
1077 				error = EINVAL;
1078 		if (error == 0) {
1079 			SET_FOREACH(lihp, linux_ioctl_handler_set)
1080 				linux_ioctl_register_handler(*lihp);
1081 			SET_FOREACH(ldhp, linux_device_handler_set)
1082 				linux_device_register_handler(*ldhp);
1083 			mtx_init(&emul_lock, "emuldata lock", NULL, MTX_DEF);
1084 			sx_init(&emul_shared_lock, "emuldata->shared lock");
1085 			LIST_INIT(&futex_list);
1086 			sx_init(&futex_sx, "futex protection lock");
1087 			linux_exit_tag = EVENTHANDLER_REGISTER(process_exit, linux_proc_exit,
1088 			      NULL, 1000);
1089 			linux_schedtail_tag = EVENTHANDLER_REGISTER(schedtail, linux_schedtail,
1090 			      NULL, 1000);
1091 			linux_exec_tag = EVENTHANDLER_REGISTER(process_exec, linux_proc_exec,
1092 			      NULL, 1000);
1093 			linux_get_machine(&linux_platform);
1094 			linux_szplatform = roundup(strlen(linux_platform) + 1,
1095 			    sizeof(char *));
1096 			if (bootverbose)
1097 				printf("Linux ELF exec handler installed\n");
1098 		} else
1099 			printf("cannot insert Linux ELF brand handler\n");
1100 		break;
1101 	case MOD_UNLOAD:
1102 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1103 		     ++brandinfo)
1104 			if (elf32_brand_inuse(*brandinfo))
1105 				error = EBUSY;
1106 		if (error == 0) {
1107 			for (brandinfo = &linux_brandlist[0];
1108 			     *brandinfo != NULL; ++brandinfo)
1109 				if (elf32_remove_brand_entry(*brandinfo) < 0)
1110 					error = EINVAL;
1111 		}
1112 		if (error == 0) {
1113 			SET_FOREACH(lihp, linux_ioctl_handler_set)
1114 				linux_ioctl_unregister_handler(*lihp);
1115 			SET_FOREACH(ldhp, linux_device_handler_set)
1116 				linux_device_unregister_handler(*ldhp);
1117 			mtx_destroy(&emul_lock);
1118 			sx_destroy(&emul_shared_lock);
1119 			sx_destroy(&futex_sx);
1120 			EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag);
1121 			EVENTHANDLER_DEREGISTER(schedtail, linux_schedtail_tag);
1122 			EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag);
1123 			if (bootverbose)
1124 				printf("Linux ELF exec handler removed\n");
1125 		} else
1126 			printf("Could not deinstall ELF interpreter entry\n");
1127 		break;
1128 	default:
1129 		return EOPNOTSUPP;
1130 	}
1131 	return error;
1132 }
1133 
1134 static moduledata_t linux_elf_mod = {
1135 	"linuxelf",
1136 	linux_elf_modevent,
1137 	0
1138 };
1139 
1140 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
1141