xref: /freebsd/sys/i386/linux/linux_sysvec.c (revision 17dfbc1c43467f952aabe4c72744b33a258a6488)
1 /*-
2  * Copyright (c) 1994-1996 S�ren Schmidt
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software without specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/exec.h>
35 #include <sys/fcntl.h>
36 #include <sys/imgact.h>
37 #include <sys/imgact_aout.h>
38 #include <sys/imgact_elf.h>
39 #include <sys/kernel.h>
40 #include <sys/lock.h>
41 #include <sys/malloc.h>
42 #include <sys/module.h>
43 #include <sys/mutex.h>
44 #include <sys/proc.h>
45 #include <sys/signalvar.h>
46 #include <sys/syscallsubr.h>
47 #include <sys/sysent.h>
48 #include <sys/sysproto.h>
49 #include <sys/vnode.h>
50 #include <sys/eventhandler.h>
51 
52 #include <vm/vm.h>
53 #include <vm/pmap.h>
54 #include <vm/vm_extern.h>
55 #include <vm/vm_map.h>
56 #include <vm/vm_object.h>
57 #include <vm/vm_page.h>
58 #include <vm/vm_param.h>
59 
60 #include <machine/cpu.h>
61 #include <machine/cputypes.h>
62 #include <machine/md_var.h>
63 #include <machine/pcb.h>
64 
65 #include <i386/linux/linux.h>
66 #include <i386/linux/linux_proto.h>
67 #include <compat/linux/linux_futex.h>
68 #include <compat/linux/linux_emul.h>
69 #include <compat/linux/linux_mib.h>
70 #include <compat/linux/linux_misc.h>
71 #include <compat/linux/linux_signal.h>
72 #include <compat/linux/linux_util.h>
73 
74 MODULE_VERSION(linux, 1);
75 
76 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
77 
78 #if BYTE_ORDER == LITTLE_ENDIAN
79 #define SHELLMAGIC      0x2123 /* #! */
80 #else
81 #define SHELLMAGIC      0x2321
82 #endif
83 
84 /*
85  * Allow the sendsig functions to use the ldebug() facility
86  * even though they are not syscalls themselves. Map them
87  * to syscall 0. This is slightly less bogus than using
88  * ldebug(sigreturn).
89  */
90 #define	LINUX_SYS_linux_rt_sendsig	0
91 #define	LINUX_SYS_linux_sendsig		0
92 
93 extern char linux_sigcode[];
94 extern int linux_szsigcode;
95 
96 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
97 
98 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
99 SET_DECLARE(linux_device_handler_set, struct linux_device_handler);
100 
101 static int	linux_fixup(register_t **stack_base,
102 		    struct image_params *iparams);
103 static int	elf_linux_fixup(register_t **stack_base,
104 		    struct image_params *iparams);
105 static void	linux_prepsyscall(struct trapframe *tf, int *args, u_int *code,
106 		    caddr_t *params);
107 static void     linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
108 static void	exec_linux_setregs(struct thread *td, u_long entry,
109 				   u_long stack, u_long ps_strings);
110 static register_t *linux_copyout_strings(struct image_params *imgp);
111 static boolean_t linux_trans_osrel(const Elf_Note *note, int32_t *osrel);
112 
113 static int linux_szplatform;
114 const char *linux_platform;
115 
116 static eventhandler_tag linux_exit_tag;
117 static eventhandler_tag linux_schedtail_tag;
118 static eventhandler_tag linux_exec_tag;
119 
120 /*
121  * Linux syscalls return negative errno's, we do positive and map them
122  * Reference:
123  *   FreeBSD: src/sys/sys/errno.h
124  *   Linux:   linux-2.6.17.8/include/asm-generic/errno-base.h
125  *            linux-2.6.17.8/include/asm-generic/errno.h
126  */
127 static int bsd_to_linux_errno[ELAST + 1] = {
128 	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
129 	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
130 	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
131 	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
132 	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
133 	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
134 	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
135 	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
136 	  -6,  -6, -43, -42, -75,-125, -84, -95, -16, -74,
137 	 -72, -67, -71
138 };
139 
140 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
141 	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
142 	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
143 	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
144 	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
145 	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
146 	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
147 	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
148 	0, LINUX_SIGUSR1, LINUX_SIGUSR2
149 };
150 
151 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
152 	SIGHUP, SIGINT, SIGQUIT, SIGILL,
153 	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
154 	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
155 	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
156 	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
157 	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
158 	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
159 	SIGIO, SIGURG, SIGSYS
160 };
161 
162 #define LINUX_T_UNKNOWN  255
163 static int _bsd_to_linux_trapcode[] = {
164 	LINUX_T_UNKNOWN,	/* 0 */
165 	6,			/* 1  T_PRIVINFLT */
166 	LINUX_T_UNKNOWN,	/* 2 */
167 	3,			/* 3  T_BPTFLT */
168 	LINUX_T_UNKNOWN,	/* 4 */
169 	LINUX_T_UNKNOWN,	/* 5 */
170 	16,			/* 6  T_ARITHTRAP */
171 	254,			/* 7  T_ASTFLT */
172 	LINUX_T_UNKNOWN,	/* 8 */
173 	13,			/* 9  T_PROTFLT */
174 	1,			/* 10 T_TRCTRAP */
175 	LINUX_T_UNKNOWN,	/* 11 */
176 	14,			/* 12 T_PAGEFLT */
177 	LINUX_T_UNKNOWN,	/* 13 */
178 	17,			/* 14 T_ALIGNFLT */
179 	LINUX_T_UNKNOWN,	/* 15 */
180 	LINUX_T_UNKNOWN,	/* 16 */
181 	LINUX_T_UNKNOWN,	/* 17 */
182 	0,			/* 18 T_DIVIDE */
183 	2,			/* 19 T_NMI */
184 	4,			/* 20 T_OFLOW */
185 	5,			/* 21 T_BOUND */
186 	7,			/* 22 T_DNA */
187 	8,			/* 23 T_DOUBLEFLT */
188 	9,			/* 24 T_FPOPFLT */
189 	10,			/* 25 T_TSSFLT */
190 	11,			/* 26 T_SEGNPFLT */
191 	12,			/* 27 T_STKFLT */
192 	18,			/* 28 T_MCHK */
193 	19,			/* 29 T_XMMFLT */
194 	15			/* 30 T_RESERVED */
195 };
196 #define bsd_to_linux_trapcode(code) \
197     ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
198      _bsd_to_linux_trapcode[(code)]: \
199      LINUX_T_UNKNOWN)
200 
201 /*
202  * If FreeBSD & Linux have a difference of opinion about what a trap
203  * means, deal with it here.
204  *
205  * MPSAFE
206  */
207 static int
208 translate_traps(int signal, int trap_code)
209 {
210 	if (signal != SIGBUS)
211 		return signal;
212 	switch (trap_code) {
213 	case T_PROTFLT:
214 	case T_TSSFLT:
215 	case T_DOUBLEFLT:
216 	case T_PAGEFLT:
217 		return SIGSEGV;
218 	default:
219 		return signal;
220 	}
221 }
222 
223 static int
224 linux_fixup(register_t **stack_base, struct image_params *imgp)
225 {
226 	register_t *argv, *envp;
227 
228 	argv = *stack_base;
229 	envp = *stack_base + (imgp->args->argc + 1);
230 	(*stack_base)--;
231 	**stack_base = (intptr_t)(void *)envp;
232 	(*stack_base)--;
233 	**stack_base = (intptr_t)(void *)argv;
234 	(*stack_base)--;
235 	**stack_base = imgp->args->argc;
236 	return (0);
237 }
238 
239 static int
240 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
241 {
242 	struct proc *p;
243 	Elf32_Auxargs *args;
244 	Elf32_Addr *uplatform;
245 	struct ps_strings *arginfo;
246 	register_t *pos;
247 
248 	KASSERT(curthread->td_proc == imgp->proc,
249 	    ("unsafe elf_linux_fixup(), should be curproc"));
250 
251 	p = imgp->proc;
252 	arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
253 	uplatform = (Elf32_Addr *)((caddr_t)arginfo - linux_szsigcode -
254 	    linux_szplatform);
255 	args = (Elf32_Auxargs *)imgp->auxargs;
256 	pos = *stack_base + (imgp->args->argc + imgp->args->envc + 2);
257 
258 	AUXARGS_ENTRY(pos, LINUX_AT_HWCAP, cpu_feature);
259 
260 	/*
261 	 * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0,
262 	 * as it has appeared in the 2.4.0-rc7 first time.
263 	 * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK),
264 	 * glibc falls back to the hard-coded CLK_TCK value when aux entry
265 	 * is not present.
266 	 * Also see linux_times() implementation.
267 	 */
268 	if (linux_kernver(curthread) >= LINUX_KERNVER_2004000)
269 		AUXARGS_ENTRY(pos, LINUX_AT_CLKTCK, stclohz);
270 	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
271 	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
272 	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
273 	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
274 	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
275 	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
276 	AUXARGS_ENTRY(pos, AT_BASE, args->base);
277 	AUXARGS_ENTRY(pos, LINUX_AT_SECURE, 0);
278 	AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
279 	AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
280 	AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
281 	AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
282 	AUXARGS_ENTRY(pos, LINUX_AT_PLATFORM, PTROUT(uplatform));
283 	if (args->execfd != -1)
284 		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
285 	AUXARGS_ENTRY(pos, AT_NULL, 0);
286 
287 	free(imgp->auxargs, M_TEMP);
288 	imgp->auxargs = NULL;
289 
290 	(*stack_base)--;
291 	**stack_base = (register_t)imgp->args->argc;
292 	return (0);
293 }
294 
295 /*
296  * Copied from kern/kern_exec.c
297  */
298 static register_t *
299 linux_copyout_strings(struct image_params *imgp)
300 {
301 	int argc, envc;
302 	char **vectp;
303 	char *stringp, *destp;
304 	register_t *stack_base;
305 	struct ps_strings *arginfo;
306 	struct proc *p;
307 
308 	/*
309 	 * Calculate string base and vector table pointers.
310 	 * Also deal with signal trampoline code for this exec type.
311 	 */
312 	p = imgp->proc;
313 	arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
314 	destp = (caddr_t)arginfo - linux_szsigcode - SPARE_USRSPACE -
315 	    linux_szplatform - roundup((ARG_MAX - imgp->args->stringspace),
316 	    sizeof(char *));
317 
318 	/*
319 	 * install sigcode
320 	 */
321 	copyout(p->p_sysent->sv_sigcode, ((caddr_t)arginfo -
322 	    linux_szsigcode), linux_szsigcode);
323 
324 	/*
325 	 * install LINUX_PLATFORM
326 	 */
327 	copyout(linux_platform, ((caddr_t)arginfo - linux_szsigcode -
328 	    linux_szplatform), linux_szplatform);
329 
330 	/*
331 	 * If we have a valid auxargs ptr, prepare some room
332 	 * on the stack.
333 	 */
334 	if (imgp->auxargs) {
335 		/*
336 		 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
337 		 * lower compatibility.
338 		 */
339 		imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size :
340 		    (LINUX_AT_COUNT * 2);
341 		/*
342 		 * The '+ 2' is for the null pointers at the end of each of
343 		 * the arg and env vector sets,and imgp->auxarg_size is room
344 		 * for argument of Runtime loader.
345 		 */
346 		vectp = (char **)(destp - (imgp->args->argc +
347 		    imgp->args->envc + 2 + imgp->auxarg_size) * sizeof(char *));
348 	} else {
349 		/*
350 		 * The '+ 2' is for the null pointers at the end of each of
351 		 * the arg and env vector sets
352 		 */
353 		vectp = (char **)(destp - (imgp->args->argc + imgp->args->envc + 2) *
354 		    sizeof(char *));
355 	}
356 
357 	/*
358 	 * vectp also becomes our initial stack base
359 	 */
360 	stack_base = (register_t *)vectp;
361 
362 	stringp = imgp->args->begin_argv;
363 	argc = imgp->args->argc;
364 	envc = imgp->args->envc;
365 
366 	/*
367 	 * Copy out strings - arguments and environment.
368 	 */
369 	copyout(stringp, destp, ARG_MAX - imgp->args->stringspace);
370 
371 	/*
372 	 * Fill in "ps_strings" struct for ps, w, etc.
373 	 */
374 	suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp);
375 	suword(&arginfo->ps_nargvstr, argc);
376 
377 	/*
378 	 * Fill in argument portion of vector table.
379 	 */
380 	for (; argc > 0; --argc) {
381 		suword(vectp++, (long)(intptr_t)destp);
382 		while (*stringp++ != 0)
383 			destp++;
384 		destp++;
385 	}
386 
387 	/* a null vector table pointer separates the argp's from the envp's */
388 	suword(vectp++, 0);
389 
390 	suword(&arginfo->ps_envstr, (long)(intptr_t)vectp);
391 	suword(&arginfo->ps_nenvstr, envc);
392 
393 	/*
394 	 * Fill in environment portion of vector table.
395 	 */
396 	for (; envc > 0; --envc) {
397 		suword(vectp++, (long)(intptr_t)destp);
398 		while (*stringp++ != 0)
399 			destp++;
400 		destp++;
401 	}
402 
403 	/* end of vector table is a null pointer */
404 	suword(vectp, 0);
405 
406 	return (stack_base);
407 }
408 
409 
410 
411 extern int _ucodesel, _udatasel;
412 extern unsigned long linux_sznonrtsigcode;
413 
414 static void
415 linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
416 {
417 	struct thread *td = curthread;
418 	struct proc *p = td->td_proc;
419 	struct sigacts *psp;
420 	struct trapframe *regs;
421 	struct l_rt_sigframe *fp, frame;
422 	int sig, code;
423 	int oonstack;
424 
425 	sig = ksi->ksi_signo;
426 	code = ksi->ksi_code;
427 	PROC_LOCK_ASSERT(p, MA_OWNED);
428 	psp = p->p_sigacts;
429 	mtx_assert(&psp->ps_mtx, MA_OWNED);
430 	regs = td->td_frame;
431 	oonstack = sigonstack(regs->tf_esp);
432 
433 #ifdef DEBUG
434 	if (ldebug(rt_sendsig))
435 		printf(ARGS(rt_sendsig, "%p, %d, %p, %u"),
436 		    catcher, sig, (void*)mask, code);
437 #endif
438 	/*
439 	 * Allocate space for the signal handler context.
440 	 */
441 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
442 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
443 		fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
444 		    td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
445 	} else
446 		fp = (struct l_rt_sigframe *)regs->tf_esp - 1;
447 	mtx_unlock(&psp->ps_mtx);
448 
449 	/*
450 	 * Build the argument list for the signal handler.
451 	 */
452 	if (p->p_sysent->sv_sigtbl)
453 		if (sig <= p->p_sysent->sv_sigsize)
454 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
455 
456 	bzero(&frame, sizeof(frame));
457 
458 	frame.sf_handler = catcher;
459 	frame.sf_sig = sig;
460 	frame.sf_siginfo = &fp->sf_si;
461 	frame.sf_ucontext = &fp->sf_sc;
462 
463 	/* Fill in POSIX parts */
464 	ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig);
465 
466 	/*
467 	 * Build the signal context to be used by sigreturn.
468 	 */
469 	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
470 	frame.sf_sc.uc_link = NULL;		/* XXX ??? */
471 
472 	frame.sf_sc.uc_stack.ss_sp = td->td_sigstk.ss_sp;
473 	frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
474 	frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
475 	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
476 	PROC_UNLOCK(p);
477 
478 	bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
479 
480 	frame.sf_sc.uc_mcontext.sc_mask   = frame.sf_sc.uc_sigmask.__bits[0];
481 	frame.sf_sc.uc_mcontext.sc_gs     = rgs();
482 	frame.sf_sc.uc_mcontext.sc_fs     = regs->tf_fs;
483 	frame.sf_sc.uc_mcontext.sc_es     = regs->tf_es;
484 	frame.sf_sc.uc_mcontext.sc_ds     = regs->tf_ds;
485 	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_edi;
486 	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_esi;
487 	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_ebp;
488 	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_ebx;
489 	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_edx;
490 	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_ecx;
491 	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_eax;
492 	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_eip;
493 	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
494 	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
495 	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
496 	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
497 	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
498 	frame.sf_sc.uc_mcontext.sc_cr2    = (register_t)ksi->ksi_addr;
499 	frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
500 
501 #ifdef DEBUG
502 	if (ldebug(rt_sendsig))
503 		printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
504 		    frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
505 		    td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
506 #endif
507 
508 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
509 		/*
510 		 * Process has trashed its stack; give it an illegal
511 		 * instruction to halt it in its tracks.
512 		 */
513 #ifdef DEBUG
514 		if (ldebug(rt_sendsig))
515 			printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
516 			    fp, oonstack);
517 #endif
518 		PROC_LOCK(p);
519 		sigexit(td, SIGILL);
520 	}
521 
522 	/*
523 	 * Build context to run handler in.
524 	 */
525 	regs->tf_esp = (int)fp;
526 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
527 	    linux_sznonrtsigcode;
528 	regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
529 	regs->tf_cs = _ucodesel;
530 	regs->tf_ds = _udatasel;
531 	regs->tf_es = _udatasel;
532 	regs->tf_fs = _udatasel;
533 	regs->tf_ss = _udatasel;
534 	PROC_LOCK(p);
535 	mtx_lock(&psp->ps_mtx);
536 }
537 
538 
539 /*
540  * Send an interrupt to process.
541  *
542  * Stack is set up to allow sigcode stored
543  * in u. to call routine, followed by kcall
544  * to sigreturn routine below.  After sigreturn
545  * resets the signal mask, the stack, and the
546  * frame pointer, it returns to the user
547  * specified pc, psl.
548  */
549 static void
550 linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
551 {
552 	struct thread *td = curthread;
553 	struct proc *p = td->td_proc;
554 	struct sigacts *psp;
555 	struct trapframe *regs;
556 	struct l_sigframe *fp, frame;
557 	l_sigset_t lmask;
558 	int sig, code;
559 	int oonstack, i;
560 
561 	PROC_LOCK_ASSERT(p, MA_OWNED);
562 	psp = p->p_sigacts;
563 	sig = ksi->ksi_signo;
564 	code = ksi->ksi_code;
565 	mtx_assert(&psp->ps_mtx, MA_OWNED);
566 	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
567 		/* Signal handler installed with SA_SIGINFO. */
568 		linux_rt_sendsig(catcher, ksi, mask);
569 		return;
570 	}
571 	regs = td->td_frame;
572 	oonstack = sigonstack(regs->tf_esp);
573 
574 #ifdef DEBUG
575 	if (ldebug(sendsig))
576 		printf(ARGS(sendsig, "%p, %d, %p, %u"),
577 		    catcher, sig, (void*)mask, code);
578 #endif
579 
580 	/*
581 	 * Allocate space for the signal handler context.
582 	 */
583 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
584 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
585 		fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
586 		    td->td_sigstk.ss_size - sizeof(struct l_sigframe));
587 	} else
588 		fp = (struct l_sigframe *)regs->tf_esp - 1;
589 	mtx_unlock(&psp->ps_mtx);
590 	PROC_UNLOCK(p);
591 
592 	/*
593 	 * Build the argument list for the signal handler.
594 	 */
595 	if (p->p_sysent->sv_sigtbl)
596 		if (sig <= p->p_sysent->sv_sigsize)
597 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
598 
599 	bzero(&frame, sizeof(frame));
600 
601 	frame.sf_handler = catcher;
602 	frame.sf_sig = sig;
603 
604 	bsd_to_linux_sigset(mask, &lmask);
605 
606 	/*
607 	 * Build the signal context to be used by sigreturn.
608 	 */
609 	frame.sf_sc.sc_mask   = lmask.__bits[0];
610 	frame.sf_sc.sc_gs     = rgs();
611 	frame.sf_sc.sc_fs     = regs->tf_fs;
612 	frame.sf_sc.sc_es     = regs->tf_es;
613 	frame.sf_sc.sc_ds     = regs->tf_ds;
614 	frame.sf_sc.sc_edi    = regs->tf_edi;
615 	frame.sf_sc.sc_esi    = regs->tf_esi;
616 	frame.sf_sc.sc_ebp    = regs->tf_ebp;
617 	frame.sf_sc.sc_ebx    = regs->tf_ebx;
618 	frame.sf_sc.sc_edx    = regs->tf_edx;
619 	frame.sf_sc.sc_ecx    = regs->tf_ecx;
620 	frame.sf_sc.sc_eax    = regs->tf_eax;
621 	frame.sf_sc.sc_eip    = regs->tf_eip;
622 	frame.sf_sc.sc_cs     = regs->tf_cs;
623 	frame.sf_sc.sc_eflags = regs->tf_eflags;
624 	frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
625 	frame.sf_sc.sc_ss     = regs->tf_ss;
626 	frame.sf_sc.sc_err    = regs->tf_err;
627 	frame.sf_sc.sc_cr2    = (register_t)ksi->ksi_addr;
628 	frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(ksi->ksi_trapno);
629 
630 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
631 		frame.sf_extramask[i] = lmask.__bits[i+1];
632 
633 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
634 		/*
635 		 * Process has trashed its stack; give it an illegal
636 		 * instruction to halt it in its tracks.
637 		 */
638 		PROC_LOCK(p);
639 		sigexit(td, SIGILL);
640 	}
641 
642 	/*
643 	 * Build context to run handler in.
644 	 */
645 	regs->tf_esp = (int)fp;
646 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
647 	regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
648 	regs->tf_cs = _ucodesel;
649 	regs->tf_ds = _udatasel;
650 	regs->tf_es = _udatasel;
651 	regs->tf_fs = _udatasel;
652 	regs->tf_ss = _udatasel;
653 	PROC_LOCK(p);
654 	mtx_lock(&psp->ps_mtx);
655 }
656 
657 /*
658  * System call to cleanup state after a signal
659  * has been taken.  Reset signal mask and
660  * stack state from context left by sendsig (above).
661  * Return to previous pc and psl as specified by
662  * context left by sendsig. Check carefully to
663  * make sure that the user has not modified the
664  * psl to gain improper privileges or to cause
665  * a machine fault.
666  */
667 int
668 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
669 {
670 	struct proc *p = td->td_proc;
671 	struct l_sigframe frame;
672 	struct trapframe *regs;
673 	l_sigset_t lmask;
674 	int eflags, i;
675 	ksiginfo_t ksi;
676 
677 	regs = td->td_frame;
678 
679 #ifdef DEBUG
680 	if (ldebug(sigreturn))
681 		printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
682 #endif
683 	/*
684 	 * The trampoline code hands us the sigframe.
685 	 * It is unsafe to keep track of it ourselves, in the event that a
686 	 * program jumps out of a signal handler.
687 	 */
688 	if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
689 		return (EFAULT);
690 
691 	/*
692 	 * Check for security violations.
693 	 */
694 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
695 	eflags = frame.sf_sc.sc_eflags;
696 	/*
697 	 * XXX do allow users to change the privileged flag PSL_RF.  The
698 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
699 	 * sometimes set it there too.  tf_eflags is kept in the signal
700 	 * context during signal handling and there is no other place
701 	 * to remember it, so the PSL_RF bit may be corrupted by the
702 	 * signal handler without us knowing.  Corruption of the PSL_RF
703 	 * bit at worst causes one more or one less debugger trap, so
704 	 * allowing it is fairly harmless.
705 	 */
706 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF))
707 		return(EINVAL);
708 
709 	/*
710 	 * Don't allow users to load a valid privileged %cs.  Let the
711 	 * hardware check for invalid selectors, excess privilege in
712 	 * other selectors, invalid %eip's and invalid %esp's.
713 	 */
714 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
715 	if (!CS_SECURE(frame.sf_sc.sc_cs)) {
716 		ksiginfo_init_trap(&ksi);
717 		ksi.ksi_signo = SIGBUS;
718 		ksi.ksi_code = BUS_OBJERR;
719 		ksi.ksi_trapno = T_PROTFLT;
720 		ksi.ksi_addr = (void *)regs->tf_eip;
721 		trapsignal(td, &ksi);
722 		return(EINVAL);
723 	}
724 
725 	lmask.__bits[0] = frame.sf_sc.sc_mask;
726 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
727 		lmask.__bits[i+1] = frame.sf_extramask[i];
728 	PROC_LOCK(p);
729 	linux_to_bsd_sigset(&lmask, &td->td_sigmask);
730 	SIG_CANTMASK(td->td_sigmask);
731 	signotify(td);
732 	PROC_UNLOCK(p);
733 
734 	/*
735 	 * Restore signal context.
736 	 */
737 	/* %gs was restored by the trampoline. */
738 	regs->tf_fs     = frame.sf_sc.sc_fs;
739 	regs->tf_es     = frame.sf_sc.sc_es;
740 	regs->tf_ds     = frame.sf_sc.sc_ds;
741 	regs->tf_edi    = frame.sf_sc.sc_edi;
742 	regs->tf_esi    = frame.sf_sc.sc_esi;
743 	regs->tf_ebp    = frame.sf_sc.sc_ebp;
744 	regs->tf_ebx    = frame.sf_sc.sc_ebx;
745 	regs->tf_edx    = frame.sf_sc.sc_edx;
746 	regs->tf_ecx    = frame.sf_sc.sc_ecx;
747 	regs->tf_eax    = frame.sf_sc.sc_eax;
748 	regs->tf_eip    = frame.sf_sc.sc_eip;
749 	regs->tf_cs     = frame.sf_sc.sc_cs;
750 	regs->tf_eflags = eflags;
751 	regs->tf_esp    = frame.sf_sc.sc_esp_at_signal;
752 	regs->tf_ss     = frame.sf_sc.sc_ss;
753 
754 	return (EJUSTRETURN);
755 }
756 
757 /*
758  * System call to cleanup state after a signal
759  * has been taken.  Reset signal mask and
760  * stack state from context left by rt_sendsig (above).
761  * Return to previous pc and psl as specified by
762  * context left by sendsig. Check carefully to
763  * make sure that the user has not modified the
764  * psl to gain improper privileges or to cause
765  * a machine fault.
766  */
767 int
768 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
769 {
770 	struct proc *p = td->td_proc;
771 	struct l_ucontext uc;
772 	struct l_sigcontext *context;
773 	l_stack_t *lss;
774 	stack_t ss;
775 	struct trapframe *regs;
776 	int eflags;
777 	ksiginfo_t ksi;
778 
779 	regs = td->td_frame;
780 
781 #ifdef DEBUG
782 	if (ldebug(rt_sigreturn))
783 		printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
784 #endif
785 	/*
786 	 * The trampoline code hands us the ucontext.
787 	 * It is unsafe to keep track of it ourselves, in the event that a
788 	 * program jumps out of a signal handler.
789 	 */
790 	if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
791 		return (EFAULT);
792 
793 	context = &uc.uc_mcontext;
794 
795 	/*
796 	 * Check for security violations.
797 	 */
798 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
799 	eflags = context->sc_eflags;
800 	/*
801 	 * XXX do allow users to change the privileged flag PSL_RF.  The
802 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
803 	 * sometimes set it there too.  tf_eflags is kept in the signal
804 	 * context during signal handling and there is no other place
805 	 * to remember it, so the PSL_RF bit may be corrupted by the
806 	 * signal handler without us knowing.  Corruption of the PSL_RF
807 	 * bit at worst causes one more or one less debugger trap, so
808 	 * allowing it is fairly harmless.
809 	 */
810 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF))
811 		return(EINVAL);
812 
813 	/*
814 	 * Don't allow users to load a valid privileged %cs.  Let the
815 	 * hardware check for invalid selectors, excess privilege in
816 	 * other selectors, invalid %eip's and invalid %esp's.
817 	 */
818 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
819 	if (!CS_SECURE(context->sc_cs)) {
820 		ksiginfo_init_trap(&ksi);
821 		ksi.ksi_signo = SIGBUS;
822 		ksi.ksi_code = BUS_OBJERR;
823 		ksi.ksi_trapno = T_PROTFLT;
824 		ksi.ksi_addr = (void *)regs->tf_eip;
825 		trapsignal(td, &ksi);
826 		return(EINVAL);
827 	}
828 
829 	PROC_LOCK(p);
830 	linux_to_bsd_sigset(&uc.uc_sigmask, &td->td_sigmask);
831 	SIG_CANTMASK(td->td_sigmask);
832 	signotify(td);
833 	PROC_UNLOCK(p);
834 
835 	/*
836 	 * Restore signal context
837 	 */
838 	/* %gs was restored by the trampoline. */
839 	regs->tf_fs     = context->sc_fs;
840 	regs->tf_es     = context->sc_es;
841 	regs->tf_ds     = context->sc_ds;
842 	regs->tf_edi    = context->sc_edi;
843 	regs->tf_esi    = context->sc_esi;
844 	regs->tf_ebp    = context->sc_ebp;
845 	regs->tf_ebx    = context->sc_ebx;
846 	regs->tf_edx    = context->sc_edx;
847 	regs->tf_ecx    = context->sc_ecx;
848 	regs->tf_eax    = context->sc_eax;
849 	regs->tf_eip    = context->sc_eip;
850 	regs->tf_cs     = context->sc_cs;
851 	regs->tf_eflags = eflags;
852 	regs->tf_esp    = context->sc_esp_at_signal;
853 	regs->tf_ss     = context->sc_ss;
854 
855 	/*
856 	 * call sigaltstack & ignore results..
857 	 */
858 	lss = &uc.uc_stack;
859 	ss.ss_sp = lss->ss_sp;
860 	ss.ss_size = lss->ss_size;
861 	ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
862 
863 #ifdef DEBUG
864 	if (ldebug(rt_sigreturn))
865 		printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
866 		    ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
867 #endif
868 	(void)kern_sigaltstack(td, &ss, NULL);
869 
870 	return (EJUSTRETURN);
871 }
872 
873 /*
874  * MPSAFE
875  */
876 static void
877 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
878 {
879 	args[0] = tf->tf_ebx;
880 	args[1] = tf->tf_ecx;
881 	args[2] = tf->tf_edx;
882 	args[3] = tf->tf_esi;
883 	args[4] = tf->tf_edi;
884 	args[5] = tf->tf_ebp;	/* Unconfirmed */
885 	*params = NULL;		/* no copyin */
886 }
887 
888 /*
889  * If a linux binary is exec'ing something, try this image activator
890  * first.  We override standard shell script execution in order to
891  * be able to modify the interpreter path.  We only do this if a linux
892  * binary is doing the exec, so we do not create an EXEC module for it.
893  */
894 static int	exec_linux_imgact_try(struct image_params *iparams);
895 
896 static int
897 exec_linux_imgact_try(struct image_params *imgp)
898 {
899     const char *head = (const char *)imgp->image_header;
900     char *rpath;
901     int error = -1, len;
902 
903     /*
904      * The interpreter for shell scripts run from a linux binary needs
905      * to be located in /compat/linux if possible in order to recursively
906      * maintain linux path emulation.
907      */
908     if (((const short *)head)[0] == SHELLMAGIC) {
909 	    /*
910 	     * Run our normal shell image activator.  If it succeeds attempt
911 	     * to use the alternate path for the interpreter.  If an alternate
912 	     * path is found, use our stringspace to store it.
913 	     */
914 	    if ((error = exec_shell_imgact(imgp)) == 0) {
915 		    linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
916 			imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0, AT_FDCWD);
917 		    if (rpath != NULL) {
918 			    len = strlen(rpath) + 1;
919 
920 			    if (len <= MAXSHELLCMDLEN) {
921 				    memcpy(imgp->interpreter_name, rpath, len);
922 			    }
923 			    free(rpath, M_TEMP);
924 		    }
925 	    }
926     }
927     return(error);
928 }
929 
930 /*
931  * exec_setregs may initialize some registers differently than Linux
932  * does, thus potentially confusing Linux binaries. If necessary, we
933  * override the exec_setregs default(s) here.
934  */
935 static void
936 exec_linux_setregs(struct thread *td, u_long entry,
937 		   u_long stack, u_long ps_strings)
938 {
939 	struct pcb *pcb = td->td_pcb;
940 
941 	exec_setregs(td, entry, stack, ps_strings);
942 
943 	/* Linux sets %gs to 0, we default to _udatasel */
944 	pcb->pcb_gs = 0;
945 	load_gs(0);
946 
947 	pcb->pcb_initial_npxcw = __LINUX_NPXCW__;
948 }
949 
950 static void
951 linux_get_machine(const char **dst)
952 {
953 
954 	switch (cpu_class) {
955 	case CPUCLASS_686:
956 		*dst = "i686";
957 		break;
958 	case CPUCLASS_586:
959 		*dst = "i586";
960 		break;
961 	case CPUCLASS_486:
962 		*dst = "i486";
963 		break;
964 	default:
965 		*dst = "i386";
966 	}
967 }
968 
969 struct sysentvec linux_sysvec = {
970 	.sv_size	= LINUX_SYS_MAXSYSCALL,
971 	.sv_table	= linux_sysent,
972 	.sv_mask	= 0,
973 	.sv_sigsize	= LINUX_SIGTBLSZ,
974 	.sv_sigtbl	= bsd_to_linux_signal,
975 	.sv_errsize	= ELAST + 1,
976 	.sv_errtbl	= bsd_to_linux_errno,
977 	.sv_transtrap	= translate_traps,
978 	.sv_fixup	= linux_fixup,
979 	.sv_sendsig	= linux_sendsig,
980 	.sv_sigcode	= linux_sigcode,
981 	.sv_szsigcode	= &linux_szsigcode,
982 	.sv_prepsyscall	= linux_prepsyscall,
983 	.sv_name	= "Linux a.out",
984 	.sv_coredump	= NULL,
985 	.sv_imgact_try	= exec_linux_imgact_try,
986 	.sv_minsigstksz	= LINUX_MINSIGSTKSZ,
987 	.sv_pagesize	= PAGE_SIZE,
988 	.sv_minuser	= VM_MIN_ADDRESS,
989 	.sv_maxuser	= VM_MAXUSER_ADDRESS,
990 	.sv_usrstack	= USRSTACK,
991 	.sv_psstrings	= PS_STRINGS,
992 	.sv_stackprot	= VM_PROT_ALL,
993 	.sv_copyout_strings = exec_copyout_strings,
994 	.sv_setregs	= exec_linux_setregs,
995 	.sv_fixlimit	= NULL,
996 	.sv_maxssiz	= NULL,
997 	.sv_flags	= SV_ABI_LINUX | SV_AOUT | SV_IA32 | SV_ILP32
998 };
999 
1000 struct sysentvec elf_linux_sysvec = {
1001 	.sv_size	= LINUX_SYS_MAXSYSCALL,
1002 	.sv_table	= linux_sysent,
1003 	.sv_mask	= 0,
1004 	.sv_sigsize	= LINUX_SIGTBLSZ,
1005 	.sv_sigtbl	= bsd_to_linux_signal,
1006 	.sv_errsize	= ELAST + 1,
1007 	.sv_errtbl	= bsd_to_linux_errno,
1008 	.sv_transtrap	= translate_traps,
1009 	.sv_fixup	= elf_linux_fixup,
1010 	.sv_sendsig	= linux_sendsig,
1011 	.sv_sigcode	= linux_sigcode,
1012 	.sv_szsigcode	= &linux_szsigcode,
1013 	.sv_prepsyscall	= linux_prepsyscall,
1014 	.sv_name	= "Linux ELF",
1015 	.sv_coredump	= elf32_coredump,
1016 	.sv_imgact_try	= exec_linux_imgact_try,
1017 	.sv_minsigstksz	= LINUX_MINSIGSTKSZ,
1018 	.sv_pagesize	= PAGE_SIZE,
1019 	.sv_minuser	= VM_MIN_ADDRESS,
1020 	.sv_maxuser	= VM_MAXUSER_ADDRESS,
1021 	.sv_usrstack	= USRSTACK,
1022 	.sv_psstrings	= PS_STRINGS,
1023 	.sv_stackprot	= VM_PROT_ALL,
1024 	.sv_copyout_strings = linux_copyout_strings,
1025 	.sv_setregs	= exec_linux_setregs,
1026 	.sv_fixlimit	= NULL,
1027 	.sv_maxssiz	= NULL,
1028 	.sv_flags	= SV_ABI_LINUX | SV_IA32 | SV_ILP32
1029 };
1030 
1031 static char GNU_ABI_VENDOR[] = "GNU";
1032 static int GNULINUX_ABI_DESC = 0;
1033 
1034 static boolean_t
1035 linux_trans_osrel(const Elf_Note *note, int32_t *osrel)
1036 {
1037 	const Elf32_Word *desc;
1038 	uintptr_t p;
1039 
1040 	p = (uintptr_t)(note + 1);
1041 	p += roundup2(note->n_namesz, sizeof(Elf32_Addr));
1042 
1043 	desc = (const Elf32_Word *)p;
1044 	if (desc[0] != GNULINUX_ABI_DESC)
1045 		return (FALSE);
1046 
1047 	/*
1048 	 * For linux we encode osrel as follows (see linux_mib.c):
1049 	 * VVVMMMIII (version, major, minor), see linux_mib.c.
1050 	 */
1051 	*osrel = desc[1] * 1000000 + desc[2] * 1000 + desc[3];
1052 
1053 	return (TRUE);
1054 }
1055 
1056 static Elf_Brandnote linux_brandnote = {
1057 	.hdr.n_namesz	= sizeof(GNU_ABI_VENDOR),
1058 	.hdr.n_descsz	= 16,	/* XXX at least 16 */
1059 	.hdr.n_type	= 1,
1060 	.vendor		= GNU_ABI_VENDOR,
1061 	.flags		= BN_TRANSLATE_OSREL,
1062 	.trans_osrel	= linux_trans_osrel
1063 };
1064 
1065 static Elf32_Brandinfo linux_brand = {
1066 	.brand		= ELFOSABI_LINUX,
1067 	.machine	= EM_386,
1068 	.compat_3_brand	= "Linux",
1069 	.emul_path	= "/compat/linux",
1070 	.interp_path	= "/lib/ld-linux.so.1",
1071 	.sysvec		= &elf_linux_sysvec,
1072 	.interp_newpath	= NULL,
1073 	.brand_note	= &linux_brandnote,
1074 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1075 };
1076 
1077 static Elf32_Brandinfo linux_glibc2brand = {
1078 	.brand		= ELFOSABI_LINUX,
1079 	.machine	= EM_386,
1080 	.compat_3_brand	= "Linux",
1081 	.emul_path	= "/compat/linux",
1082 	.interp_path	= "/lib/ld-linux.so.2",
1083 	.sysvec		= &elf_linux_sysvec,
1084 	.interp_newpath	= NULL,
1085 	.brand_note	= &linux_brandnote,
1086 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1087 };
1088 
1089 Elf32_Brandinfo *linux_brandlist[] = {
1090 	&linux_brand,
1091 	&linux_glibc2brand,
1092 	NULL
1093 };
1094 
1095 static int
1096 linux_elf_modevent(module_t mod, int type, void *data)
1097 {
1098 	Elf32_Brandinfo **brandinfo;
1099 	int error;
1100 	struct linux_ioctl_handler **lihp;
1101 	struct linux_device_handler **ldhp;
1102 
1103 	error = 0;
1104 
1105 	switch(type) {
1106 	case MOD_LOAD:
1107 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1108 		     ++brandinfo)
1109 			if (elf32_insert_brand_entry(*brandinfo) < 0)
1110 				error = EINVAL;
1111 		if (error == 0) {
1112 			SET_FOREACH(lihp, linux_ioctl_handler_set)
1113 				linux_ioctl_register_handler(*lihp);
1114 			SET_FOREACH(ldhp, linux_device_handler_set)
1115 				linux_device_register_handler(*ldhp);
1116 			mtx_init(&emul_lock, "emuldata lock", NULL, MTX_DEF);
1117 			sx_init(&emul_shared_lock, "emuldata->shared lock");
1118 			LIST_INIT(&futex_list);
1119 			mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF);
1120 			linux_exit_tag = EVENTHANDLER_REGISTER(process_exit, linux_proc_exit,
1121 			      NULL, 1000);
1122 			linux_schedtail_tag = EVENTHANDLER_REGISTER(schedtail, linux_schedtail,
1123 			      NULL, 1000);
1124 			linux_exec_tag = EVENTHANDLER_REGISTER(process_exec, linux_proc_exec,
1125 			      NULL, 1000);
1126 			linux_get_machine(&linux_platform);
1127 			linux_szplatform = roundup(strlen(linux_platform) + 1,
1128 			    sizeof(char *));
1129 			linux_osd_jail_register();
1130 			stclohz = (stathz ? stathz : hz);
1131 			if (bootverbose)
1132 				printf("Linux ELF exec handler installed\n");
1133 		} else
1134 			printf("cannot insert Linux ELF brand handler\n");
1135 		break;
1136 	case MOD_UNLOAD:
1137 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1138 		     ++brandinfo)
1139 			if (elf32_brand_inuse(*brandinfo))
1140 				error = EBUSY;
1141 		if (error == 0) {
1142 			for (brandinfo = &linux_brandlist[0];
1143 			     *brandinfo != NULL; ++brandinfo)
1144 				if (elf32_remove_brand_entry(*brandinfo) < 0)
1145 					error = EINVAL;
1146 		}
1147 		if (error == 0) {
1148 			SET_FOREACH(lihp, linux_ioctl_handler_set)
1149 				linux_ioctl_unregister_handler(*lihp);
1150 			SET_FOREACH(ldhp, linux_device_handler_set)
1151 				linux_device_unregister_handler(*ldhp);
1152 			mtx_destroy(&emul_lock);
1153 			sx_destroy(&emul_shared_lock);
1154 			mtx_destroy(&futex_mtx);
1155 			EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag);
1156 			EVENTHANDLER_DEREGISTER(schedtail, linux_schedtail_tag);
1157 			EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag);
1158 			linux_osd_jail_deregister();
1159 			if (bootverbose)
1160 				printf("Linux ELF exec handler removed\n");
1161 		} else
1162 			printf("Could not deinstall ELF interpreter entry\n");
1163 		break;
1164 	default:
1165 		return EOPNOTSUPP;
1166 	}
1167 	return error;
1168 }
1169 
1170 static moduledata_t linux_elf_mod = {
1171 	"linuxelf",
1172 	linux_elf_modevent,
1173 	0
1174 };
1175 
1176 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
1177