xref: /freebsd/sys/i386/linux/linux_sysvec.c (revision 5686c6c38a3e1cc78804eaf5f880bda23dcf592f)
1 /*-
2  * Copyright (c) 1994-1996 Søren Schmidt
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software without specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/exec.h>
35 #include <sys/fcntl.h>
36 #include <sys/imgact.h>
37 #include <sys/imgact_aout.h>
38 #include <sys/imgact_elf.h>
39 #include <sys/kernel.h>
40 #include <sys/lock.h>
41 #include <sys/malloc.h>
42 #include <sys/module.h>
43 #include <sys/mutex.h>
44 #include <sys/proc.h>
45 #include <sys/signalvar.h>
46 #include <sys/syscallsubr.h>
47 #include <sys/sysent.h>
48 #include <sys/sysproto.h>
49 #include <sys/vnode.h>
50 #include <sys/eventhandler.h>
51 
52 #include <vm/vm.h>
53 #include <vm/pmap.h>
54 #include <vm/vm_extern.h>
55 #include <vm/vm_map.h>
56 #include <vm/vm_object.h>
57 #include <vm/vm_page.h>
58 #include <vm/vm_param.h>
59 
60 #include <machine/cpu.h>
61 #include <machine/cputypes.h>
62 #include <machine/md_var.h>
63 #include <machine/pcb.h>
64 
65 #include <i386/linux/linux.h>
66 #include <i386/linux/linux_proto.h>
67 #include <compat/linux/linux_emul.h>
68 #include <compat/linux/linux_futex.h>
69 #include <compat/linux/linux_ioctl.h>
70 #include <compat/linux/linux_mib.h>
71 #include <compat/linux/linux_misc.h>
72 #include <compat/linux/linux_signal.h>
73 #include <compat/linux/linux_util.h>
74 
75 MODULE_VERSION(linux, 1);
76 
77 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
78 
79 #if BYTE_ORDER == LITTLE_ENDIAN
80 #define SHELLMAGIC      0x2123 /* #! */
81 #else
82 #define SHELLMAGIC      0x2321
83 #endif
84 
85 /*
86  * Allow the sendsig functions to use the ldebug() facility
87  * even though they are not syscalls themselves. Map them
88  * to syscall 0. This is slightly less bogus than using
89  * ldebug(sigreturn).
90  */
91 #define	LINUX_SYS_linux_rt_sendsig	0
92 #define	LINUX_SYS_linux_sendsig		0
93 
94 #define	LINUX_PS_STRINGS	(LINUX_USRSTACK - sizeof(struct ps_strings))
95 
96 extern char linux_sigcode[];
97 extern int linux_szsigcode;
98 
99 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
100 
101 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
102 SET_DECLARE(linux_device_handler_set, struct linux_device_handler);
103 
104 static int	linux_fixup(register_t **stack_base,
105 		    struct image_params *iparams);
106 static int	elf_linux_fixup(register_t **stack_base,
107 		    struct image_params *iparams);
108 static void     linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
109 static void	exec_linux_setregs(struct thread *td,
110 		    struct image_params *imgp, u_long stack);
111 static register_t *linux_copyout_strings(struct image_params *imgp);
112 static boolean_t linux_trans_osrel(const Elf_Note *note, int32_t *osrel);
113 
114 static int linux_szplatform;
115 const char *linux_platform;
116 
117 static eventhandler_tag linux_exit_tag;
118 static eventhandler_tag linux_exec_tag;
119 
120 /*
121  * Linux syscalls return negative errno's, we do positive and map them
122  * Reference:
123  *   FreeBSD: src/sys/sys/errno.h
124  *   Linux:   linux-2.6.17.8/include/asm-generic/errno-base.h
125  *            linux-2.6.17.8/include/asm-generic/errno.h
126  */
127 static int bsd_to_linux_errno[ELAST + 1] = {
128 	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
129 	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
130 	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
131 	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
132 	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
133 	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
134 	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
135 	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
136 	  -6,  -6, -43, -42, -75,-125, -84, -95, -16, -74,
137 	 -72, -67, -71
138 };
139 
140 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
141 	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
142 	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
143 	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
144 	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
145 	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
146 	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
147 	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
148 	0, LINUX_SIGUSR1, LINUX_SIGUSR2
149 };
150 
151 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
152 	SIGHUP, SIGINT, SIGQUIT, SIGILL,
153 	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
154 	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
155 	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
156 	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
157 	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
158 	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
159 	SIGIO, SIGURG, SIGSYS
160 };
161 
162 #define LINUX_T_UNKNOWN  255
163 static int _bsd_to_linux_trapcode[] = {
164 	LINUX_T_UNKNOWN,	/* 0 */
165 	6,			/* 1  T_PRIVINFLT */
166 	LINUX_T_UNKNOWN,	/* 2 */
167 	3,			/* 3  T_BPTFLT */
168 	LINUX_T_UNKNOWN,	/* 4 */
169 	LINUX_T_UNKNOWN,	/* 5 */
170 	16,			/* 6  T_ARITHTRAP */
171 	254,			/* 7  T_ASTFLT */
172 	LINUX_T_UNKNOWN,	/* 8 */
173 	13,			/* 9  T_PROTFLT */
174 	1,			/* 10 T_TRCTRAP */
175 	LINUX_T_UNKNOWN,	/* 11 */
176 	14,			/* 12 T_PAGEFLT */
177 	LINUX_T_UNKNOWN,	/* 13 */
178 	17,			/* 14 T_ALIGNFLT */
179 	LINUX_T_UNKNOWN,	/* 15 */
180 	LINUX_T_UNKNOWN,	/* 16 */
181 	LINUX_T_UNKNOWN,	/* 17 */
182 	0,			/* 18 T_DIVIDE */
183 	2,			/* 19 T_NMI */
184 	4,			/* 20 T_OFLOW */
185 	5,			/* 21 T_BOUND */
186 	7,			/* 22 T_DNA */
187 	8,			/* 23 T_DOUBLEFLT */
188 	9,			/* 24 T_FPOPFLT */
189 	10,			/* 25 T_TSSFLT */
190 	11,			/* 26 T_SEGNPFLT */
191 	12,			/* 27 T_STKFLT */
192 	18,			/* 28 T_MCHK */
193 	19,			/* 29 T_XMMFLT */
194 	15			/* 30 T_RESERVED */
195 };
196 #define bsd_to_linux_trapcode(code) \
197     ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
198      _bsd_to_linux_trapcode[(code)]: \
199      LINUX_T_UNKNOWN)
200 
201 /*
202  * If FreeBSD & Linux have a difference of opinion about what a trap
203  * means, deal with it here.
204  *
205  * MPSAFE
206  */
207 static int
208 translate_traps(int signal, int trap_code)
209 {
210 	if (signal != SIGBUS)
211 		return signal;
212 	switch (trap_code) {
213 	case T_PROTFLT:
214 	case T_TSSFLT:
215 	case T_DOUBLEFLT:
216 	case T_PAGEFLT:
217 		return SIGSEGV;
218 	default:
219 		return signal;
220 	}
221 }
222 
223 static int
224 linux_fixup(register_t **stack_base, struct image_params *imgp)
225 {
226 	register_t *argv, *envp;
227 
228 	argv = *stack_base;
229 	envp = *stack_base + (imgp->args->argc + 1);
230 	(*stack_base)--;
231 	suword(*stack_base, (intptr_t)(void *)envp);
232 	(*stack_base)--;
233 	suword(*stack_base, (intptr_t)(void *)argv);
234 	(*stack_base)--;
235 	suword(*stack_base, imgp->args->argc);
236 	return (0);
237 }
238 
239 static int
240 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
241 {
242 	struct proc *p;
243 	Elf32_Auxargs *args;
244 	Elf32_Addr *uplatform;
245 	struct ps_strings *arginfo;
246 	register_t *pos;
247 
248 	KASSERT(curthread->td_proc == imgp->proc,
249 	    ("unsafe elf_linux_fixup(), should be curproc"));
250 
251 	p = imgp->proc;
252 	arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
253 	uplatform = (Elf32_Addr *)((caddr_t)arginfo - linux_szplatform);
254 	args = (Elf32_Auxargs *)imgp->auxargs;
255 	pos = *stack_base + (imgp->args->argc + imgp->args->envc + 2);
256 
257 	AUXARGS_ENTRY(pos, LINUX_AT_HWCAP, cpu_feature);
258 
259 	/*
260 	 * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0,
261 	 * as it has appeared in the 2.4.0-rc7 first time.
262 	 * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK),
263 	 * glibc falls back to the hard-coded CLK_TCK value when aux entry
264 	 * is not present.
265 	 * Also see linux_times() implementation.
266 	 */
267 	if (linux_kernver(curthread) >= LINUX_KERNVER_2004000)
268 		AUXARGS_ENTRY(pos, LINUX_AT_CLKTCK, stclohz);
269 	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
270 	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
271 	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
272 	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
273 	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
274 	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
275 	AUXARGS_ENTRY(pos, AT_BASE, args->base);
276 	AUXARGS_ENTRY(pos, LINUX_AT_SECURE, 0);
277 	AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
278 	AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
279 	AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
280 	AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
281 	AUXARGS_ENTRY(pos, LINUX_AT_PLATFORM, PTROUT(uplatform));
282 	if (args->execfd != -1)
283 		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
284 	AUXARGS_ENTRY(pos, AT_NULL, 0);
285 
286 	free(imgp->auxargs, M_TEMP);
287 	imgp->auxargs = NULL;
288 
289 	(*stack_base)--;
290 	suword(*stack_base, (register_t)imgp->args->argc);
291 	return (0);
292 }
293 
294 /*
295  * Copied from kern/kern_exec.c
296  */
297 static register_t *
298 linux_copyout_strings(struct image_params *imgp)
299 {
300 	int argc, envc;
301 	char **vectp;
302 	char *stringp, *destp;
303 	register_t *stack_base;
304 	struct ps_strings *arginfo;
305 	struct proc *p;
306 
307 	/*
308 	 * Calculate string base and vector table pointers.
309 	 * Also deal with signal trampoline code for this exec type.
310 	 */
311 	p = imgp->proc;
312 	arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
313 	destp = (caddr_t)arginfo - SPARE_USRSPACE - linux_szplatform -
314 	    roundup((ARG_MAX - imgp->args->stringspace), sizeof(char *));
315 
316 	/*
317 	 * install LINUX_PLATFORM
318 	 */
319 	copyout(linux_platform, ((caddr_t)arginfo - linux_szplatform),
320 	    linux_szplatform);
321 
322 	/*
323 	 * If we have a valid auxargs ptr, prepare some room
324 	 * on the stack.
325 	 */
326 	if (imgp->auxargs) {
327 		/*
328 		 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
329 		 * lower compatibility.
330 		 */
331 		imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size :
332 		    (LINUX_AT_COUNT * 2);
333 		/*
334 		 * The '+ 2' is for the null pointers at the end of each of
335 		 * the arg and env vector sets,and imgp->auxarg_size is room
336 		 * for argument of Runtime loader.
337 		 */
338 		vectp = (char **)(destp - (imgp->args->argc +
339 		    imgp->args->envc + 2 + imgp->auxarg_size) * sizeof(char *));
340 	} else {
341 		/*
342 		 * The '+ 2' is for the null pointers at the end of each of
343 		 * the arg and env vector sets
344 		 */
345 		vectp = (char **)(destp - (imgp->args->argc + imgp->args->envc + 2) *
346 		    sizeof(char *));
347 	}
348 
349 	/*
350 	 * vectp also becomes our initial stack base
351 	 */
352 	stack_base = (register_t *)vectp;
353 
354 	stringp = imgp->args->begin_argv;
355 	argc = imgp->args->argc;
356 	envc = imgp->args->envc;
357 
358 	/*
359 	 * Copy out strings - arguments and environment.
360 	 */
361 	copyout(stringp, destp, ARG_MAX - imgp->args->stringspace);
362 
363 	/*
364 	 * Fill in "ps_strings" struct for ps, w, etc.
365 	 */
366 	suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp);
367 	suword(&arginfo->ps_nargvstr, argc);
368 
369 	/*
370 	 * Fill in argument portion of vector table.
371 	 */
372 	for (; argc > 0; --argc) {
373 		suword(vectp++, (long)(intptr_t)destp);
374 		while (*stringp++ != 0)
375 			destp++;
376 		destp++;
377 	}
378 
379 	/* a null vector table pointer separates the argp's from the envp's */
380 	suword(vectp++, 0);
381 
382 	suword(&arginfo->ps_envstr, (long)(intptr_t)vectp);
383 	suword(&arginfo->ps_nenvstr, envc);
384 
385 	/*
386 	 * Fill in environment portion of vector table.
387 	 */
388 	for (; envc > 0; --envc) {
389 		suword(vectp++, (long)(intptr_t)destp);
390 		while (*stringp++ != 0)
391 			destp++;
392 		destp++;
393 	}
394 
395 	/* end of vector table is a null pointer */
396 	suword(vectp, 0);
397 
398 	return (stack_base);
399 }
400 
401 
402 
403 extern int _ucodesel, _udatasel;
404 extern unsigned long linux_sznonrtsigcode;
405 
406 static void
407 linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
408 {
409 	struct thread *td = curthread;
410 	struct proc *p = td->td_proc;
411 	struct sigacts *psp;
412 	struct trapframe *regs;
413 	struct l_rt_sigframe *fp, frame;
414 	int sig, code;
415 	int oonstack;
416 
417 	sig = ksi->ksi_signo;
418 	code = ksi->ksi_code;
419 	PROC_LOCK_ASSERT(p, MA_OWNED);
420 	psp = p->p_sigacts;
421 	mtx_assert(&psp->ps_mtx, MA_OWNED);
422 	regs = td->td_frame;
423 	oonstack = sigonstack(regs->tf_esp);
424 
425 #ifdef DEBUG
426 	if (ldebug(rt_sendsig))
427 		printf(ARGS(rt_sendsig, "%p, %d, %p, %u"),
428 		    catcher, sig, (void*)mask, code);
429 #endif
430 	/*
431 	 * Allocate space for the signal handler context.
432 	 */
433 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
434 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
435 		fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
436 		    td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
437 	} else
438 		fp = (struct l_rt_sigframe *)regs->tf_esp - 1;
439 	mtx_unlock(&psp->ps_mtx);
440 
441 	/*
442 	 * Build the argument list for the signal handler.
443 	 */
444 	if (p->p_sysent->sv_sigtbl)
445 		if (sig <= p->p_sysent->sv_sigsize)
446 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
447 
448 	bzero(&frame, sizeof(frame));
449 
450 	frame.sf_handler = catcher;
451 	frame.sf_sig = sig;
452 	frame.sf_siginfo = &fp->sf_si;
453 	frame.sf_ucontext = &fp->sf_sc;
454 
455 	/* Fill in POSIX parts */
456 	ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig);
457 
458 	/*
459 	 * Build the signal context to be used by sigreturn.
460 	 */
461 	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
462 	frame.sf_sc.uc_link = NULL;		/* XXX ??? */
463 
464 	frame.sf_sc.uc_stack.ss_sp = td->td_sigstk.ss_sp;
465 	frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
466 	frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
467 	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
468 	PROC_UNLOCK(p);
469 
470 	bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
471 
472 	frame.sf_sc.uc_mcontext.sc_mask   = frame.sf_sc.uc_sigmask.__bits[0];
473 	frame.sf_sc.uc_mcontext.sc_gs     = rgs();
474 	frame.sf_sc.uc_mcontext.sc_fs     = regs->tf_fs;
475 	frame.sf_sc.uc_mcontext.sc_es     = regs->tf_es;
476 	frame.sf_sc.uc_mcontext.sc_ds     = regs->tf_ds;
477 	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_edi;
478 	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_esi;
479 	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_ebp;
480 	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_ebx;
481 	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_edx;
482 	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_ecx;
483 	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_eax;
484 	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_eip;
485 	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
486 	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
487 	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
488 	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
489 	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
490 	frame.sf_sc.uc_mcontext.sc_cr2    = (register_t)ksi->ksi_addr;
491 	frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
492 
493 #ifdef DEBUG
494 	if (ldebug(rt_sendsig))
495 		printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
496 		    frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
497 		    td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
498 #endif
499 
500 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
501 		/*
502 		 * Process has trashed its stack; give it an illegal
503 		 * instruction to halt it in its tracks.
504 		 */
505 #ifdef DEBUG
506 		if (ldebug(rt_sendsig))
507 			printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
508 			    fp, oonstack);
509 #endif
510 		PROC_LOCK(p);
511 		sigexit(td, SIGILL);
512 	}
513 
514 	/*
515 	 * Build context to run handler in.
516 	 */
517 	regs->tf_esp = (int)fp;
518 	regs->tf_eip = p->p_sysent->sv_sigcode_base + linux_sznonrtsigcode;
519 	regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
520 	regs->tf_cs = _ucodesel;
521 	regs->tf_ds = _udatasel;
522 	regs->tf_es = _udatasel;
523 	regs->tf_fs = _udatasel;
524 	regs->tf_ss = _udatasel;
525 	PROC_LOCK(p);
526 	mtx_lock(&psp->ps_mtx);
527 }
528 
529 
530 /*
531  * Send an interrupt to process.
532  *
533  * Stack is set up to allow sigcode stored
534  * in u. to call routine, followed by kcall
535  * to sigreturn routine below.  After sigreturn
536  * resets the signal mask, the stack, and the
537  * frame pointer, it returns to the user
538  * specified pc, psl.
539  */
540 static void
541 linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
542 {
543 	struct thread *td = curthread;
544 	struct proc *p = td->td_proc;
545 	struct sigacts *psp;
546 	struct trapframe *regs;
547 	struct l_sigframe *fp, frame;
548 	l_sigset_t lmask;
549 	int sig, code;
550 	int oonstack, i;
551 
552 	PROC_LOCK_ASSERT(p, MA_OWNED);
553 	psp = p->p_sigacts;
554 	sig = ksi->ksi_signo;
555 	code = ksi->ksi_code;
556 	mtx_assert(&psp->ps_mtx, MA_OWNED);
557 	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
558 		/* Signal handler installed with SA_SIGINFO. */
559 		linux_rt_sendsig(catcher, ksi, mask);
560 		return;
561 	}
562 	regs = td->td_frame;
563 	oonstack = sigonstack(regs->tf_esp);
564 
565 #ifdef DEBUG
566 	if (ldebug(sendsig))
567 		printf(ARGS(sendsig, "%p, %d, %p, %u"),
568 		    catcher, sig, (void*)mask, code);
569 #endif
570 
571 	/*
572 	 * Allocate space for the signal handler context.
573 	 */
574 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
575 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
576 		fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
577 		    td->td_sigstk.ss_size - sizeof(struct l_sigframe));
578 	} else
579 		fp = (struct l_sigframe *)regs->tf_esp - 1;
580 	mtx_unlock(&psp->ps_mtx);
581 	PROC_UNLOCK(p);
582 
583 	/*
584 	 * Build the argument list for the signal handler.
585 	 */
586 	if (p->p_sysent->sv_sigtbl)
587 		if (sig <= p->p_sysent->sv_sigsize)
588 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
589 
590 	bzero(&frame, sizeof(frame));
591 
592 	frame.sf_handler = catcher;
593 	frame.sf_sig = sig;
594 
595 	bsd_to_linux_sigset(mask, &lmask);
596 
597 	/*
598 	 * Build the signal context to be used by sigreturn.
599 	 */
600 	frame.sf_sc.sc_mask   = lmask.__bits[0];
601 	frame.sf_sc.sc_gs     = rgs();
602 	frame.sf_sc.sc_fs     = regs->tf_fs;
603 	frame.sf_sc.sc_es     = regs->tf_es;
604 	frame.sf_sc.sc_ds     = regs->tf_ds;
605 	frame.sf_sc.sc_edi    = regs->tf_edi;
606 	frame.sf_sc.sc_esi    = regs->tf_esi;
607 	frame.sf_sc.sc_ebp    = regs->tf_ebp;
608 	frame.sf_sc.sc_ebx    = regs->tf_ebx;
609 	frame.sf_sc.sc_edx    = regs->tf_edx;
610 	frame.sf_sc.sc_ecx    = regs->tf_ecx;
611 	frame.sf_sc.sc_eax    = regs->tf_eax;
612 	frame.sf_sc.sc_eip    = regs->tf_eip;
613 	frame.sf_sc.sc_cs     = regs->tf_cs;
614 	frame.sf_sc.sc_eflags = regs->tf_eflags;
615 	frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
616 	frame.sf_sc.sc_ss     = regs->tf_ss;
617 	frame.sf_sc.sc_err    = regs->tf_err;
618 	frame.sf_sc.sc_cr2    = (register_t)ksi->ksi_addr;
619 	frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(ksi->ksi_trapno);
620 
621 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
622 		frame.sf_extramask[i] = lmask.__bits[i+1];
623 
624 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
625 		/*
626 		 * Process has trashed its stack; give it an illegal
627 		 * instruction to halt it in its tracks.
628 		 */
629 		PROC_LOCK(p);
630 		sigexit(td, SIGILL);
631 	}
632 
633 	/*
634 	 * Build context to run handler in.
635 	 */
636 	regs->tf_esp = (int)fp;
637 	regs->tf_eip = p->p_sysent->sv_sigcode_base;
638 	regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
639 	regs->tf_cs = _ucodesel;
640 	regs->tf_ds = _udatasel;
641 	regs->tf_es = _udatasel;
642 	regs->tf_fs = _udatasel;
643 	regs->tf_ss = _udatasel;
644 	PROC_LOCK(p);
645 	mtx_lock(&psp->ps_mtx);
646 }
647 
648 /*
649  * System call to cleanup state after a signal
650  * has been taken.  Reset signal mask and
651  * stack state from context left by sendsig (above).
652  * Return to previous pc and psl as specified by
653  * context left by sendsig. Check carefully to
654  * make sure that the user has not modified the
655  * psl to gain improper privileges or to cause
656  * a machine fault.
657  */
658 int
659 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
660 {
661 	struct l_sigframe frame;
662 	struct trapframe *regs;
663 	l_sigset_t lmask;
664 	sigset_t bmask;
665 	int eflags, i;
666 	ksiginfo_t ksi;
667 
668 	regs = td->td_frame;
669 
670 #ifdef DEBUG
671 	if (ldebug(sigreturn))
672 		printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
673 #endif
674 	/*
675 	 * The trampoline code hands us the sigframe.
676 	 * It is unsafe to keep track of it ourselves, in the event that a
677 	 * program jumps out of a signal handler.
678 	 */
679 	if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
680 		return (EFAULT);
681 
682 	/*
683 	 * Check for security violations.
684 	 */
685 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
686 	eflags = frame.sf_sc.sc_eflags;
687 	/*
688 	 * XXX do allow users to change the privileged flag PSL_RF.  The
689 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
690 	 * sometimes set it there too.  tf_eflags is kept in the signal
691 	 * context during signal handling and there is no other place
692 	 * to remember it, so the PSL_RF bit may be corrupted by the
693 	 * signal handler without us knowing.  Corruption of the PSL_RF
694 	 * bit at worst causes one more or one less debugger trap, so
695 	 * allowing it is fairly harmless.
696 	 */
697 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF))
698 		return(EINVAL);
699 
700 	/*
701 	 * Don't allow users to load a valid privileged %cs.  Let the
702 	 * hardware check for invalid selectors, excess privilege in
703 	 * other selectors, invalid %eip's and invalid %esp's.
704 	 */
705 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
706 	if (!CS_SECURE(frame.sf_sc.sc_cs)) {
707 		ksiginfo_init_trap(&ksi);
708 		ksi.ksi_signo = SIGBUS;
709 		ksi.ksi_code = BUS_OBJERR;
710 		ksi.ksi_trapno = T_PROTFLT;
711 		ksi.ksi_addr = (void *)regs->tf_eip;
712 		trapsignal(td, &ksi);
713 		return(EINVAL);
714 	}
715 
716 	lmask.__bits[0] = frame.sf_sc.sc_mask;
717 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
718 		lmask.__bits[i+1] = frame.sf_extramask[i];
719 	linux_to_bsd_sigset(&lmask, &bmask);
720 	kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
721 
722 	/*
723 	 * Restore signal context.
724 	 */
725 	/* %gs was restored by the trampoline. */
726 	regs->tf_fs     = frame.sf_sc.sc_fs;
727 	regs->tf_es     = frame.sf_sc.sc_es;
728 	regs->tf_ds     = frame.sf_sc.sc_ds;
729 	regs->tf_edi    = frame.sf_sc.sc_edi;
730 	regs->tf_esi    = frame.sf_sc.sc_esi;
731 	regs->tf_ebp    = frame.sf_sc.sc_ebp;
732 	regs->tf_ebx    = frame.sf_sc.sc_ebx;
733 	regs->tf_edx    = frame.sf_sc.sc_edx;
734 	regs->tf_ecx    = frame.sf_sc.sc_ecx;
735 	regs->tf_eax    = frame.sf_sc.sc_eax;
736 	regs->tf_eip    = frame.sf_sc.sc_eip;
737 	regs->tf_cs     = frame.sf_sc.sc_cs;
738 	regs->tf_eflags = eflags;
739 	regs->tf_esp    = frame.sf_sc.sc_esp_at_signal;
740 	regs->tf_ss     = frame.sf_sc.sc_ss;
741 
742 	return (EJUSTRETURN);
743 }
744 
745 /*
746  * System call to cleanup state after a signal
747  * has been taken.  Reset signal mask and
748  * stack state from context left by rt_sendsig (above).
749  * Return to previous pc and psl as specified by
750  * context left by sendsig. Check carefully to
751  * make sure that the user has not modified the
752  * psl to gain improper privileges or to cause
753  * a machine fault.
754  */
755 int
756 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
757 {
758 	struct l_ucontext uc;
759 	struct l_sigcontext *context;
760 	sigset_t bmask;
761 	l_stack_t *lss;
762 	stack_t ss;
763 	struct trapframe *regs;
764 	int eflags;
765 	ksiginfo_t ksi;
766 
767 	regs = td->td_frame;
768 
769 #ifdef DEBUG
770 	if (ldebug(rt_sigreturn))
771 		printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
772 #endif
773 	/*
774 	 * The trampoline code hands us the ucontext.
775 	 * It is unsafe to keep track of it ourselves, in the event that a
776 	 * program jumps out of a signal handler.
777 	 */
778 	if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
779 		return (EFAULT);
780 
781 	context = &uc.uc_mcontext;
782 
783 	/*
784 	 * Check for security violations.
785 	 */
786 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
787 	eflags = context->sc_eflags;
788 	/*
789 	 * XXX do allow users to change the privileged flag PSL_RF.  The
790 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
791 	 * sometimes set it there too.  tf_eflags is kept in the signal
792 	 * context during signal handling and there is no other place
793 	 * to remember it, so the PSL_RF bit may be corrupted by the
794 	 * signal handler without us knowing.  Corruption of the PSL_RF
795 	 * bit at worst causes one more or one less debugger trap, so
796 	 * allowing it is fairly harmless.
797 	 */
798 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF))
799 		return(EINVAL);
800 
801 	/*
802 	 * Don't allow users to load a valid privileged %cs.  Let the
803 	 * hardware check for invalid selectors, excess privilege in
804 	 * other selectors, invalid %eip's and invalid %esp's.
805 	 */
806 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
807 	if (!CS_SECURE(context->sc_cs)) {
808 		ksiginfo_init_trap(&ksi);
809 		ksi.ksi_signo = SIGBUS;
810 		ksi.ksi_code = BUS_OBJERR;
811 		ksi.ksi_trapno = T_PROTFLT;
812 		ksi.ksi_addr = (void *)regs->tf_eip;
813 		trapsignal(td, &ksi);
814 		return(EINVAL);
815 	}
816 
817 	linux_to_bsd_sigset(&uc.uc_sigmask, &bmask);
818 	kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
819 
820 	/*
821 	 * Restore signal context
822 	 */
823 	/* %gs was restored by the trampoline. */
824 	regs->tf_fs     = context->sc_fs;
825 	regs->tf_es     = context->sc_es;
826 	regs->tf_ds     = context->sc_ds;
827 	regs->tf_edi    = context->sc_edi;
828 	regs->tf_esi    = context->sc_esi;
829 	regs->tf_ebp    = context->sc_ebp;
830 	regs->tf_ebx    = context->sc_ebx;
831 	regs->tf_edx    = context->sc_edx;
832 	regs->tf_ecx    = context->sc_ecx;
833 	regs->tf_eax    = context->sc_eax;
834 	regs->tf_eip    = context->sc_eip;
835 	regs->tf_cs     = context->sc_cs;
836 	regs->tf_eflags = eflags;
837 	regs->tf_esp    = context->sc_esp_at_signal;
838 	regs->tf_ss     = context->sc_ss;
839 
840 	/*
841 	 * call sigaltstack & ignore results..
842 	 */
843 	lss = &uc.uc_stack;
844 	ss.ss_sp = lss->ss_sp;
845 	ss.ss_size = lss->ss_size;
846 	ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
847 
848 #ifdef DEBUG
849 	if (ldebug(rt_sigreturn))
850 		printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
851 		    ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
852 #endif
853 	(void)kern_sigaltstack(td, &ss, NULL);
854 
855 	return (EJUSTRETURN);
856 }
857 
858 static int
859 linux_fetch_syscall_args(struct thread *td, struct syscall_args *sa)
860 {
861 	struct proc *p;
862 	struct trapframe *frame;
863 
864 	p = td->td_proc;
865 	frame = td->td_frame;
866 
867 	sa->code = frame->tf_eax;
868 	sa->args[0] = frame->tf_ebx;
869 	sa->args[1] = frame->tf_ecx;
870 	sa->args[2] = frame->tf_edx;
871 	sa->args[3] = frame->tf_esi;
872 	sa->args[4] = frame->tf_edi;
873 	sa->args[5] = frame->tf_ebp;	/* Unconfirmed */
874 
875 	if (sa->code >= p->p_sysent->sv_size)
876 		sa->callp = &p->p_sysent->sv_table[0];
877  	else
878  		sa->callp = &p->p_sysent->sv_table[sa->code];
879 	sa->narg = sa->callp->sy_narg;
880 
881 	td->td_retval[0] = 0;
882 	td->td_retval[1] = frame->tf_edx;
883 
884 	return (0);
885 }
886 
887 /*
888  * If a linux binary is exec'ing something, try this image activator
889  * first.  We override standard shell script execution in order to
890  * be able to modify the interpreter path.  We only do this if a linux
891  * binary is doing the exec, so we do not create an EXEC module for it.
892  */
893 static int	exec_linux_imgact_try(struct image_params *iparams);
894 
895 static int
896 exec_linux_imgact_try(struct image_params *imgp)
897 {
898     const char *head = (const char *)imgp->image_header;
899     char *rpath;
900     int error = -1;
901 
902     /*
903      * The interpreter for shell scripts run from a linux binary needs
904      * to be located in /compat/linux if possible in order to recursively
905      * maintain linux path emulation.
906      */
907     if (((const short *)head)[0] == SHELLMAGIC) {
908 	    /*
909 	     * Run our normal shell image activator.  If it succeeds attempt
910 	     * to use the alternate path for the interpreter.  If an alternate
911 	     * path is found, use our stringspace to store it.
912 	     */
913 	    if ((error = exec_shell_imgact(imgp)) == 0) {
914 		    linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
915 			imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0, AT_FDCWD);
916 		    if (rpath != NULL)
917 			    imgp->args->fname_buf =
918 				imgp->interpreter_name = rpath;
919 	    }
920     }
921     return (error);
922 }
923 
924 /*
925  * exec_setregs may initialize some registers differently than Linux
926  * does, thus potentially confusing Linux binaries. If necessary, we
927  * override the exec_setregs default(s) here.
928  */
929 static void
930 exec_linux_setregs(struct thread *td, struct image_params *imgp, u_long stack)
931 {
932 	struct pcb *pcb = td->td_pcb;
933 
934 	exec_setregs(td, imgp, stack);
935 
936 	/* Linux sets %gs to 0, we default to _udatasel */
937 	pcb->pcb_gs = 0;
938 	load_gs(0);
939 
940 	pcb->pcb_initial_npxcw = __LINUX_NPXCW__;
941 }
942 
943 static void
944 linux_get_machine(const char **dst)
945 {
946 
947 	switch (cpu_class) {
948 	case CPUCLASS_686:
949 		*dst = "i686";
950 		break;
951 	case CPUCLASS_586:
952 		*dst = "i586";
953 		break;
954 	case CPUCLASS_486:
955 		*dst = "i486";
956 		break;
957 	default:
958 		*dst = "i386";
959 	}
960 }
961 
962 struct sysentvec linux_sysvec = {
963 	.sv_size	= LINUX_SYS_MAXSYSCALL,
964 	.sv_table	= linux_sysent,
965 	.sv_mask	= 0,
966 	.sv_sigsize	= LINUX_SIGTBLSZ,
967 	.sv_sigtbl	= bsd_to_linux_signal,
968 	.sv_errsize	= ELAST + 1,
969 	.sv_errtbl	= bsd_to_linux_errno,
970 	.sv_transtrap	= translate_traps,
971 	.sv_fixup	= linux_fixup,
972 	.sv_sendsig	= linux_sendsig,
973 	.sv_sigcode	= linux_sigcode,
974 	.sv_szsigcode	= &linux_szsigcode,
975 	.sv_prepsyscall	= NULL,
976 	.sv_name	= "Linux a.out",
977 	.sv_coredump	= NULL,
978 	.sv_imgact_try	= exec_linux_imgact_try,
979 	.sv_minsigstksz	= LINUX_MINSIGSTKSZ,
980 	.sv_pagesize	= PAGE_SIZE,
981 	.sv_minuser	= VM_MIN_ADDRESS,
982 	.sv_maxuser	= VM_MAXUSER_ADDRESS,
983 	.sv_usrstack	= LINUX_USRSTACK,
984 	.sv_psstrings	= PS_STRINGS,
985 	.sv_stackprot	= VM_PROT_ALL,
986 	.sv_copyout_strings = exec_copyout_strings,
987 	.sv_setregs	= exec_linux_setregs,
988 	.sv_fixlimit	= NULL,
989 	.sv_maxssiz	= NULL,
990 	.sv_flags	= SV_ABI_LINUX | SV_AOUT | SV_IA32 | SV_ILP32,
991 	.sv_set_syscall_retval = cpu_set_syscall_retval,
992 	.sv_fetch_syscall_args = linux_fetch_syscall_args,
993 	.sv_syscallnames = NULL,
994 	.sv_shared_page_base = LINUX_SHAREDPAGE,
995 	.sv_shared_page_len = PAGE_SIZE,
996 	.sv_schedtail	= linux_schedtail,
997 };
998 INIT_SYSENTVEC(aout_sysvec, &linux_sysvec);
999 
1000 struct sysentvec elf_linux_sysvec = {
1001 	.sv_size	= LINUX_SYS_MAXSYSCALL,
1002 	.sv_table	= linux_sysent,
1003 	.sv_mask	= 0,
1004 	.sv_sigsize	= LINUX_SIGTBLSZ,
1005 	.sv_sigtbl	= bsd_to_linux_signal,
1006 	.sv_errsize	= ELAST + 1,
1007 	.sv_errtbl	= bsd_to_linux_errno,
1008 	.sv_transtrap	= translate_traps,
1009 	.sv_fixup	= elf_linux_fixup,
1010 	.sv_sendsig	= linux_sendsig,
1011 	.sv_sigcode	= linux_sigcode,
1012 	.sv_szsigcode	= &linux_szsigcode,
1013 	.sv_prepsyscall	= NULL,
1014 	.sv_name	= "Linux ELF",
1015 	.sv_coredump	= elf32_coredump,
1016 	.sv_imgact_try	= exec_linux_imgact_try,
1017 	.sv_minsigstksz	= LINUX_MINSIGSTKSZ,
1018 	.sv_pagesize	= PAGE_SIZE,
1019 	.sv_minuser	= VM_MIN_ADDRESS,
1020 	.sv_maxuser	= VM_MAXUSER_ADDRESS,
1021 	.sv_usrstack	= LINUX_USRSTACK,
1022 	.sv_psstrings	= LINUX_PS_STRINGS,
1023 	.sv_stackprot	= VM_PROT_ALL,
1024 	.sv_copyout_strings = linux_copyout_strings,
1025 	.sv_setregs	= exec_linux_setregs,
1026 	.sv_fixlimit	= NULL,
1027 	.sv_maxssiz	= NULL,
1028 	.sv_flags	= SV_ABI_LINUX | SV_IA32 | SV_ILP32 | SV_SHP,
1029 	.sv_set_syscall_retval = cpu_set_syscall_retval,
1030 	.sv_fetch_syscall_args = linux_fetch_syscall_args,
1031 	.sv_syscallnames = NULL,
1032 	.sv_shared_page_base = LINUX_SHAREDPAGE,
1033 	.sv_shared_page_len = PAGE_SIZE,
1034 	.sv_schedtail	= linux_schedtail,
1035 };
1036 INIT_SYSENTVEC(elf_sysvec, &elf_linux_sysvec);
1037 
1038 static char GNU_ABI_VENDOR[] = "GNU";
1039 static int GNULINUX_ABI_DESC = 0;
1040 
1041 static boolean_t
1042 linux_trans_osrel(const Elf_Note *note, int32_t *osrel)
1043 {
1044 	const Elf32_Word *desc;
1045 	uintptr_t p;
1046 
1047 	p = (uintptr_t)(note + 1);
1048 	p += roundup2(note->n_namesz, sizeof(Elf32_Addr));
1049 
1050 	desc = (const Elf32_Word *)p;
1051 	if (desc[0] != GNULINUX_ABI_DESC)
1052 		return (FALSE);
1053 
1054 	/*
1055 	 * For linux we encode osrel as follows (see linux_mib.c):
1056 	 * VVVMMMIII (version, major, minor), see linux_mib.c.
1057 	 */
1058 	*osrel = desc[1] * 1000000 + desc[2] * 1000 + desc[3];
1059 
1060 	return (TRUE);
1061 }
1062 
1063 static Elf_Brandnote linux_brandnote = {
1064 	.hdr.n_namesz	= sizeof(GNU_ABI_VENDOR),
1065 	.hdr.n_descsz	= 16,	/* XXX at least 16 */
1066 	.hdr.n_type	= 1,
1067 	.vendor		= GNU_ABI_VENDOR,
1068 	.flags		= BN_TRANSLATE_OSREL,
1069 	.trans_osrel	= linux_trans_osrel
1070 };
1071 
1072 static Elf32_Brandinfo linux_brand = {
1073 	.brand		= ELFOSABI_LINUX,
1074 	.machine	= EM_386,
1075 	.compat_3_brand	= "Linux",
1076 	.emul_path	= "/compat/linux",
1077 	.interp_path	= "/lib/ld-linux.so.1",
1078 	.sysvec		= &elf_linux_sysvec,
1079 	.interp_newpath	= NULL,
1080 	.brand_note	= &linux_brandnote,
1081 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1082 };
1083 
1084 static Elf32_Brandinfo linux_glibc2brand = {
1085 	.brand		= ELFOSABI_LINUX,
1086 	.machine	= EM_386,
1087 	.compat_3_brand	= "Linux",
1088 	.emul_path	= "/compat/linux",
1089 	.interp_path	= "/lib/ld-linux.so.2",
1090 	.sysvec		= &elf_linux_sysvec,
1091 	.interp_newpath	= NULL,
1092 	.brand_note	= &linux_brandnote,
1093 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1094 };
1095 
1096 Elf32_Brandinfo *linux_brandlist[] = {
1097 	&linux_brand,
1098 	&linux_glibc2brand,
1099 	NULL
1100 };
1101 
1102 static int
1103 linux_elf_modevent(module_t mod, int type, void *data)
1104 {
1105 	Elf32_Brandinfo **brandinfo;
1106 	int error;
1107 	struct linux_ioctl_handler **lihp;
1108 	struct linux_device_handler **ldhp;
1109 
1110 	error = 0;
1111 
1112 	switch(type) {
1113 	case MOD_LOAD:
1114 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1115 		     ++brandinfo)
1116 			if (elf32_insert_brand_entry(*brandinfo) < 0)
1117 				error = EINVAL;
1118 		if (error == 0) {
1119 			SET_FOREACH(lihp, linux_ioctl_handler_set)
1120 				linux_ioctl_register_handler(*lihp);
1121 			SET_FOREACH(ldhp, linux_device_handler_set)
1122 				linux_device_register_handler(*ldhp);
1123 			mtx_init(&emul_lock, "emuldata lock", NULL, MTX_DEF);
1124 			sx_init(&emul_shared_lock, "emuldata->shared lock");
1125 			LIST_INIT(&futex_list);
1126 			mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF);
1127 			linux_exit_tag = EVENTHANDLER_REGISTER(process_exit, linux_proc_exit,
1128 			      NULL, 1000);
1129 			linux_exec_tag = EVENTHANDLER_REGISTER(process_exec, linux_proc_exec,
1130 			      NULL, 1000);
1131 			linux_get_machine(&linux_platform);
1132 			linux_szplatform = roundup(strlen(linux_platform) + 1,
1133 			    sizeof(char *));
1134 			linux_osd_jail_register();
1135 			stclohz = (stathz ? stathz : hz);
1136 			if (bootverbose)
1137 				printf("Linux ELF exec handler installed\n");
1138 		} else
1139 			printf("cannot insert Linux ELF brand handler\n");
1140 		break;
1141 	case MOD_UNLOAD:
1142 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1143 		     ++brandinfo)
1144 			if (elf32_brand_inuse(*brandinfo))
1145 				error = EBUSY;
1146 		if (error == 0) {
1147 			for (brandinfo = &linux_brandlist[0];
1148 			     *brandinfo != NULL; ++brandinfo)
1149 				if (elf32_remove_brand_entry(*brandinfo) < 0)
1150 					error = EINVAL;
1151 		}
1152 		if (error == 0) {
1153 			SET_FOREACH(lihp, linux_ioctl_handler_set)
1154 				linux_ioctl_unregister_handler(*lihp);
1155 			SET_FOREACH(ldhp, linux_device_handler_set)
1156 				linux_device_unregister_handler(*ldhp);
1157 			mtx_destroy(&emul_lock);
1158 			sx_destroy(&emul_shared_lock);
1159 			mtx_destroy(&futex_mtx);
1160 			EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag);
1161 			EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag);
1162 			linux_osd_jail_deregister();
1163 			if (bootverbose)
1164 				printf("Linux ELF exec handler removed\n");
1165 		} else
1166 			printf("Could not deinstall ELF interpreter entry\n");
1167 		break;
1168 	default:
1169 		return EOPNOTSUPP;
1170 	}
1171 	return error;
1172 }
1173 
1174 static moduledata_t linux_elf_mod = {
1175 	"linuxelf",
1176 	linux_elf_modevent,
1177 	0
1178 };
1179 
1180 DECLARE_MODULE_TIED(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
1181