xref: /freebsd/sys/i386/linux/linux_sysvec.c (revision 884a2a699669ec61e2366e3e358342dbc94be24a)
1 /*-
2  * Copyright (c) 1994-1996 S�ren Schmidt
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software without specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/exec.h>
35 #include <sys/fcntl.h>
36 #include <sys/imgact.h>
37 #include <sys/imgact_aout.h>
38 #include <sys/imgact_elf.h>
39 #include <sys/kernel.h>
40 #include <sys/lock.h>
41 #include <sys/malloc.h>
42 #include <sys/module.h>
43 #include <sys/mutex.h>
44 #include <sys/proc.h>
45 #include <sys/signalvar.h>
46 #include <sys/syscallsubr.h>
47 #include <sys/sysent.h>
48 #include <sys/sysproto.h>
49 #include <sys/vnode.h>
50 #include <sys/eventhandler.h>
51 
52 #include <vm/vm.h>
53 #include <vm/pmap.h>
54 #include <vm/vm_extern.h>
55 #include <vm/vm_map.h>
56 #include <vm/vm_object.h>
57 #include <vm/vm_page.h>
58 #include <vm/vm_param.h>
59 
60 #include <machine/cpu.h>
61 #include <machine/cputypes.h>
62 #include <machine/md_var.h>
63 #include <machine/pcb.h>
64 
65 #include <i386/linux/linux.h>
66 #include <i386/linux/linux_proto.h>
67 #include <compat/linux/linux_emul.h>
68 #include <compat/linux/linux_futex.h>
69 #include <compat/linux/linux_mib.h>
70 #include <compat/linux/linux_misc.h>
71 #include <compat/linux/linux_signal.h>
72 #include <compat/linux/linux_util.h>
73 
74 MODULE_VERSION(linux, 1);
75 
76 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
77 
78 #if BYTE_ORDER == LITTLE_ENDIAN
79 #define SHELLMAGIC      0x2123 /* #! */
80 #else
81 #define SHELLMAGIC      0x2321
82 #endif
83 
84 /*
85  * Allow the sendsig functions to use the ldebug() facility
86  * even though they are not syscalls themselves. Map them
87  * to syscall 0. This is slightly less bogus than using
88  * ldebug(sigreturn).
89  */
90 #define	LINUX_SYS_linux_rt_sendsig	0
91 #define	LINUX_SYS_linux_sendsig		0
92 
93 #define	LINUX_PS_STRINGS	(LINUX_USRSTACK - sizeof(struct ps_strings))
94 
95 extern char linux_sigcode[];
96 extern int linux_szsigcode;
97 
98 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
99 
100 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
101 SET_DECLARE(linux_device_handler_set, struct linux_device_handler);
102 
103 static int	linux_fixup(register_t **stack_base,
104 		    struct image_params *iparams);
105 static int	elf_linux_fixup(register_t **stack_base,
106 		    struct image_params *iparams);
107 static void     linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
108 static void	exec_linux_setregs(struct thread *td,
109 		    struct image_params *imgp, u_long stack);
110 static register_t *linux_copyout_strings(struct image_params *imgp);
111 static boolean_t linux_trans_osrel(const Elf_Note *note, int32_t *osrel);
112 
113 static int linux_szplatform;
114 const char *linux_platform;
115 
116 static eventhandler_tag linux_exit_tag;
117 static eventhandler_tag linux_exec_tag;
118 
119 /*
120  * Linux syscalls return negative errno's, we do positive and map them
121  * Reference:
122  *   FreeBSD: src/sys/sys/errno.h
123  *   Linux:   linux-2.6.17.8/include/asm-generic/errno-base.h
124  *            linux-2.6.17.8/include/asm-generic/errno.h
125  */
126 static int bsd_to_linux_errno[ELAST + 1] = {
127 	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
128 	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
129 	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
130 	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
131 	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
132 	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
133 	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
134 	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
135 	  -6,  -6, -43, -42, -75,-125, -84, -95, -16, -74,
136 	 -72, -67, -71
137 };
138 
139 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
140 	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
141 	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
142 	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
143 	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
144 	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
145 	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
146 	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
147 	0, LINUX_SIGUSR1, LINUX_SIGUSR2
148 };
149 
150 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
151 	SIGHUP, SIGINT, SIGQUIT, SIGILL,
152 	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
153 	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
154 	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
155 	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
156 	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
157 	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
158 	SIGIO, SIGURG, SIGSYS
159 };
160 
161 #define LINUX_T_UNKNOWN  255
162 static int _bsd_to_linux_trapcode[] = {
163 	LINUX_T_UNKNOWN,	/* 0 */
164 	6,			/* 1  T_PRIVINFLT */
165 	LINUX_T_UNKNOWN,	/* 2 */
166 	3,			/* 3  T_BPTFLT */
167 	LINUX_T_UNKNOWN,	/* 4 */
168 	LINUX_T_UNKNOWN,	/* 5 */
169 	16,			/* 6  T_ARITHTRAP */
170 	254,			/* 7  T_ASTFLT */
171 	LINUX_T_UNKNOWN,	/* 8 */
172 	13,			/* 9  T_PROTFLT */
173 	1,			/* 10 T_TRCTRAP */
174 	LINUX_T_UNKNOWN,	/* 11 */
175 	14,			/* 12 T_PAGEFLT */
176 	LINUX_T_UNKNOWN,	/* 13 */
177 	17,			/* 14 T_ALIGNFLT */
178 	LINUX_T_UNKNOWN,	/* 15 */
179 	LINUX_T_UNKNOWN,	/* 16 */
180 	LINUX_T_UNKNOWN,	/* 17 */
181 	0,			/* 18 T_DIVIDE */
182 	2,			/* 19 T_NMI */
183 	4,			/* 20 T_OFLOW */
184 	5,			/* 21 T_BOUND */
185 	7,			/* 22 T_DNA */
186 	8,			/* 23 T_DOUBLEFLT */
187 	9,			/* 24 T_FPOPFLT */
188 	10,			/* 25 T_TSSFLT */
189 	11,			/* 26 T_SEGNPFLT */
190 	12,			/* 27 T_STKFLT */
191 	18,			/* 28 T_MCHK */
192 	19,			/* 29 T_XMMFLT */
193 	15			/* 30 T_RESERVED */
194 };
195 #define bsd_to_linux_trapcode(code) \
196     ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
197      _bsd_to_linux_trapcode[(code)]: \
198      LINUX_T_UNKNOWN)
199 
200 /*
201  * If FreeBSD & Linux have a difference of opinion about what a trap
202  * means, deal with it here.
203  *
204  * MPSAFE
205  */
206 static int
207 translate_traps(int signal, int trap_code)
208 {
209 	if (signal != SIGBUS)
210 		return signal;
211 	switch (trap_code) {
212 	case T_PROTFLT:
213 	case T_TSSFLT:
214 	case T_DOUBLEFLT:
215 	case T_PAGEFLT:
216 		return SIGSEGV;
217 	default:
218 		return signal;
219 	}
220 }
221 
222 static int
223 linux_fixup(register_t **stack_base, struct image_params *imgp)
224 {
225 	register_t *argv, *envp;
226 
227 	argv = *stack_base;
228 	envp = *stack_base + (imgp->args->argc + 1);
229 	(*stack_base)--;
230 	**stack_base = (intptr_t)(void *)envp;
231 	(*stack_base)--;
232 	**stack_base = (intptr_t)(void *)argv;
233 	(*stack_base)--;
234 	**stack_base = imgp->args->argc;
235 	return (0);
236 }
237 
238 static int
239 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
240 {
241 	struct proc *p;
242 	Elf32_Auxargs *args;
243 	Elf32_Addr *uplatform;
244 	struct ps_strings *arginfo;
245 	register_t *pos;
246 
247 	KASSERT(curthread->td_proc == imgp->proc,
248 	    ("unsafe elf_linux_fixup(), should be curproc"));
249 
250 	p = imgp->proc;
251 	arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
252 	uplatform = (Elf32_Addr *)((caddr_t)arginfo - linux_szplatform);
253 	args = (Elf32_Auxargs *)imgp->auxargs;
254 	pos = *stack_base + (imgp->args->argc + imgp->args->envc + 2);
255 
256 	AUXARGS_ENTRY(pos, LINUX_AT_HWCAP, cpu_feature);
257 
258 	/*
259 	 * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0,
260 	 * as it has appeared in the 2.4.0-rc7 first time.
261 	 * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK),
262 	 * glibc falls back to the hard-coded CLK_TCK value when aux entry
263 	 * is not present.
264 	 * Also see linux_times() implementation.
265 	 */
266 	if (linux_kernver(curthread) >= LINUX_KERNVER_2004000)
267 		AUXARGS_ENTRY(pos, LINUX_AT_CLKTCK, stclohz);
268 	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
269 	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
270 	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
271 	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
272 	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
273 	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
274 	AUXARGS_ENTRY(pos, AT_BASE, args->base);
275 	AUXARGS_ENTRY(pos, LINUX_AT_SECURE, 0);
276 	AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
277 	AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
278 	AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
279 	AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
280 	AUXARGS_ENTRY(pos, LINUX_AT_PLATFORM, PTROUT(uplatform));
281 	if (args->execfd != -1)
282 		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
283 	AUXARGS_ENTRY(pos, AT_NULL, 0);
284 
285 	free(imgp->auxargs, M_TEMP);
286 	imgp->auxargs = NULL;
287 
288 	(*stack_base)--;
289 	**stack_base = (register_t)imgp->args->argc;
290 	return (0);
291 }
292 
293 /*
294  * Copied from kern/kern_exec.c
295  */
296 static register_t *
297 linux_copyout_strings(struct image_params *imgp)
298 {
299 	int argc, envc;
300 	char **vectp;
301 	char *stringp, *destp;
302 	register_t *stack_base;
303 	struct ps_strings *arginfo;
304 	struct proc *p;
305 
306 	/*
307 	 * Calculate string base and vector table pointers.
308 	 * Also deal with signal trampoline code for this exec type.
309 	 */
310 	p = imgp->proc;
311 	arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
312 	destp = (caddr_t)arginfo - SPARE_USRSPACE - linux_szplatform -
313 	    roundup((ARG_MAX - imgp->args->stringspace), sizeof(char *));
314 
315 	/*
316 	 * install LINUX_PLATFORM
317 	 */
318 	copyout(linux_platform, ((caddr_t)arginfo - linux_szplatform),
319 	    linux_szplatform);
320 
321 	/*
322 	 * If we have a valid auxargs ptr, prepare some room
323 	 * on the stack.
324 	 */
325 	if (imgp->auxargs) {
326 		/*
327 		 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
328 		 * lower compatibility.
329 		 */
330 		imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size :
331 		    (LINUX_AT_COUNT * 2);
332 		/*
333 		 * The '+ 2' is for the null pointers at the end of each of
334 		 * the arg and env vector sets,and imgp->auxarg_size is room
335 		 * for argument of Runtime loader.
336 		 */
337 		vectp = (char **)(destp - (imgp->args->argc +
338 		    imgp->args->envc + 2 + imgp->auxarg_size) * sizeof(char *));
339 	} else {
340 		/*
341 		 * The '+ 2' is for the null pointers at the end of each of
342 		 * the arg and env vector sets
343 		 */
344 		vectp = (char **)(destp - (imgp->args->argc + imgp->args->envc + 2) *
345 		    sizeof(char *));
346 	}
347 
348 	/*
349 	 * vectp also becomes our initial stack base
350 	 */
351 	stack_base = (register_t *)vectp;
352 
353 	stringp = imgp->args->begin_argv;
354 	argc = imgp->args->argc;
355 	envc = imgp->args->envc;
356 
357 	/*
358 	 * Copy out strings - arguments and environment.
359 	 */
360 	copyout(stringp, destp, ARG_MAX - imgp->args->stringspace);
361 
362 	/*
363 	 * Fill in "ps_strings" struct for ps, w, etc.
364 	 */
365 	suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp);
366 	suword(&arginfo->ps_nargvstr, argc);
367 
368 	/*
369 	 * Fill in argument portion of vector table.
370 	 */
371 	for (; argc > 0; --argc) {
372 		suword(vectp++, (long)(intptr_t)destp);
373 		while (*stringp++ != 0)
374 			destp++;
375 		destp++;
376 	}
377 
378 	/* a null vector table pointer separates the argp's from the envp's */
379 	suword(vectp++, 0);
380 
381 	suword(&arginfo->ps_envstr, (long)(intptr_t)vectp);
382 	suword(&arginfo->ps_nenvstr, envc);
383 
384 	/*
385 	 * Fill in environment portion of vector table.
386 	 */
387 	for (; envc > 0; --envc) {
388 		suword(vectp++, (long)(intptr_t)destp);
389 		while (*stringp++ != 0)
390 			destp++;
391 		destp++;
392 	}
393 
394 	/* end of vector table is a null pointer */
395 	suword(vectp, 0);
396 
397 	return (stack_base);
398 }
399 
400 
401 
402 extern int _ucodesel, _udatasel;
403 extern unsigned long linux_sznonrtsigcode;
404 
405 static void
406 linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
407 {
408 	struct thread *td = curthread;
409 	struct proc *p = td->td_proc;
410 	struct sigacts *psp;
411 	struct trapframe *regs;
412 	struct l_rt_sigframe *fp, frame;
413 	int sig, code;
414 	int oonstack;
415 
416 	sig = ksi->ksi_signo;
417 	code = ksi->ksi_code;
418 	PROC_LOCK_ASSERT(p, MA_OWNED);
419 	psp = p->p_sigacts;
420 	mtx_assert(&psp->ps_mtx, MA_OWNED);
421 	regs = td->td_frame;
422 	oonstack = sigonstack(regs->tf_esp);
423 
424 #ifdef DEBUG
425 	if (ldebug(rt_sendsig))
426 		printf(ARGS(rt_sendsig, "%p, %d, %p, %u"),
427 		    catcher, sig, (void*)mask, code);
428 #endif
429 	/*
430 	 * Allocate space for the signal handler context.
431 	 */
432 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
433 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
434 		fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
435 		    td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
436 	} else
437 		fp = (struct l_rt_sigframe *)regs->tf_esp - 1;
438 	mtx_unlock(&psp->ps_mtx);
439 
440 	/*
441 	 * Build the argument list for the signal handler.
442 	 */
443 	if (p->p_sysent->sv_sigtbl)
444 		if (sig <= p->p_sysent->sv_sigsize)
445 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
446 
447 	bzero(&frame, sizeof(frame));
448 
449 	frame.sf_handler = catcher;
450 	frame.sf_sig = sig;
451 	frame.sf_siginfo = &fp->sf_si;
452 	frame.sf_ucontext = &fp->sf_sc;
453 
454 	/* Fill in POSIX parts */
455 	ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig);
456 
457 	/*
458 	 * Build the signal context to be used by sigreturn.
459 	 */
460 	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
461 	frame.sf_sc.uc_link = NULL;		/* XXX ??? */
462 
463 	frame.sf_sc.uc_stack.ss_sp = td->td_sigstk.ss_sp;
464 	frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
465 	frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
466 	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
467 	PROC_UNLOCK(p);
468 
469 	bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
470 
471 	frame.sf_sc.uc_mcontext.sc_mask   = frame.sf_sc.uc_sigmask.__bits[0];
472 	frame.sf_sc.uc_mcontext.sc_gs     = rgs();
473 	frame.sf_sc.uc_mcontext.sc_fs     = regs->tf_fs;
474 	frame.sf_sc.uc_mcontext.sc_es     = regs->tf_es;
475 	frame.sf_sc.uc_mcontext.sc_ds     = regs->tf_ds;
476 	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_edi;
477 	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_esi;
478 	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_ebp;
479 	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_ebx;
480 	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_edx;
481 	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_ecx;
482 	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_eax;
483 	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_eip;
484 	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
485 	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
486 	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
487 	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
488 	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
489 	frame.sf_sc.uc_mcontext.sc_cr2    = (register_t)ksi->ksi_addr;
490 	frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
491 
492 #ifdef DEBUG
493 	if (ldebug(rt_sendsig))
494 		printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
495 		    frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
496 		    td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
497 #endif
498 
499 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
500 		/*
501 		 * Process has trashed its stack; give it an illegal
502 		 * instruction to halt it in its tracks.
503 		 */
504 #ifdef DEBUG
505 		if (ldebug(rt_sendsig))
506 			printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
507 			    fp, oonstack);
508 #endif
509 		PROC_LOCK(p);
510 		sigexit(td, SIGILL);
511 	}
512 
513 	/*
514 	 * Build context to run handler in.
515 	 */
516 	regs->tf_esp = (int)fp;
517 	regs->tf_eip = p->p_sysent->sv_sigcode_base + linux_sznonrtsigcode;
518 	regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
519 	regs->tf_cs = _ucodesel;
520 	regs->tf_ds = _udatasel;
521 	regs->tf_es = _udatasel;
522 	regs->tf_fs = _udatasel;
523 	regs->tf_ss = _udatasel;
524 	PROC_LOCK(p);
525 	mtx_lock(&psp->ps_mtx);
526 }
527 
528 
529 /*
530  * Send an interrupt to process.
531  *
532  * Stack is set up to allow sigcode stored
533  * in u. to call routine, followed by kcall
534  * to sigreturn routine below.  After sigreturn
535  * resets the signal mask, the stack, and the
536  * frame pointer, it returns to the user
537  * specified pc, psl.
538  */
539 static void
540 linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
541 {
542 	struct thread *td = curthread;
543 	struct proc *p = td->td_proc;
544 	struct sigacts *psp;
545 	struct trapframe *regs;
546 	struct l_sigframe *fp, frame;
547 	l_sigset_t lmask;
548 	int sig, code;
549 	int oonstack, i;
550 
551 	PROC_LOCK_ASSERT(p, MA_OWNED);
552 	psp = p->p_sigacts;
553 	sig = ksi->ksi_signo;
554 	code = ksi->ksi_code;
555 	mtx_assert(&psp->ps_mtx, MA_OWNED);
556 	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
557 		/* Signal handler installed with SA_SIGINFO. */
558 		linux_rt_sendsig(catcher, ksi, mask);
559 		return;
560 	}
561 	regs = td->td_frame;
562 	oonstack = sigonstack(regs->tf_esp);
563 
564 #ifdef DEBUG
565 	if (ldebug(sendsig))
566 		printf(ARGS(sendsig, "%p, %d, %p, %u"),
567 		    catcher, sig, (void*)mask, code);
568 #endif
569 
570 	/*
571 	 * Allocate space for the signal handler context.
572 	 */
573 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
574 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
575 		fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
576 		    td->td_sigstk.ss_size - sizeof(struct l_sigframe));
577 	} else
578 		fp = (struct l_sigframe *)regs->tf_esp - 1;
579 	mtx_unlock(&psp->ps_mtx);
580 	PROC_UNLOCK(p);
581 
582 	/*
583 	 * Build the argument list for the signal handler.
584 	 */
585 	if (p->p_sysent->sv_sigtbl)
586 		if (sig <= p->p_sysent->sv_sigsize)
587 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
588 
589 	bzero(&frame, sizeof(frame));
590 
591 	frame.sf_handler = catcher;
592 	frame.sf_sig = sig;
593 
594 	bsd_to_linux_sigset(mask, &lmask);
595 
596 	/*
597 	 * Build the signal context to be used by sigreturn.
598 	 */
599 	frame.sf_sc.sc_mask   = lmask.__bits[0];
600 	frame.sf_sc.sc_gs     = rgs();
601 	frame.sf_sc.sc_fs     = regs->tf_fs;
602 	frame.sf_sc.sc_es     = regs->tf_es;
603 	frame.sf_sc.sc_ds     = regs->tf_ds;
604 	frame.sf_sc.sc_edi    = regs->tf_edi;
605 	frame.sf_sc.sc_esi    = regs->tf_esi;
606 	frame.sf_sc.sc_ebp    = regs->tf_ebp;
607 	frame.sf_sc.sc_ebx    = regs->tf_ebx;
608 	frame.sf_sc.sc_edx    = regs->tf_edx;
609 	frame.sf_sc.sc_ecx    = regs->tf_ecx;
610 	frame.sf_sc.sc_eax    = regs->tf_eax;
611 	frame.sf_sc.sc_eip    = regs->tf_eip;
612 	frame.sf_sc.sc_cs     = regs->tf_cs;
613 	frame.sf_sc.sc_eflags = regs->tf_eflags;
614 	frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
615 	frame.sf_sc.sc_ss     = regs->tf_ss;
616 	frame.sf_sc.sc_err    = regs->tf_err;
617 	frame.sf_sc.sc_cr2    = (register_t)ksi->ksi_addr;
618 	frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(ksi->ksi_trapno);
619 
620 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
621 		frame.sf_extramask[i] = lmask.__bits[i+1];
622 
623 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
624 		/*
625 		 * Process has trashed its stack; give it an illegal
626 		 * instruction to halt it in its tracks.
627 		 */
628 		PROC_LOCK(p);
629 		sigexit(td, SIGILL);
630 	}
631 
632 	/*
633 	 * Build context to run handler in.
634 	 */
635 	regs->tf_esp = (int)fp;
636 	regs->tf_eip = p->p_sysent->sv_sigcode_base;
637 	regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
638 	regs->tf_cs = _ucodesel;
639 	regs->tf_ds = _udatasel;
640 	regs->tf_es = _udatasel;
641 	regs->tf_fs = _udatasel;
642 	regs->tf_ss = _udatasel;
643 	PROC_LOCK(p);
644 	mtx_lock(&psp->ps_mtx);
645 }
646 
647 /*
648  * System call to cleanup state after a signal
649  * has been taken.  Reset signal mask and
650  * stack state from context left by sendsig (above).
651  * Return to previous pc and psl as specified by
652  * context left by sendsig. Check carefully to
653  * make sure that the user has not modified the
654  * psl to gain improper privileges or to cause
655  * a machine fault.
656  */
657 int
658 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
659 {
660 	struct l_sigframe frame;
661 	struct trapframe *regs;
662 	l_sigset_t lmask;
663 	sigset_t bmask;
664 	int eflags, i;
665 	ksiginfo_t ksi;
666 
667 	regs = td->td_frame;
668 
669 #ifdef DEBUG
670 	if (ldebug(sigreturn))
671 		printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
672 #endif
673 	/*
674 	 * The trampoline code hands us the sigframe.
675 	 * It is unsafe to keep track of it ourselves, in the event that a
676 	 * program jumps out of a signal handler.
677 	 */
678 	if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
679 		return (EFAULT);
680 
681 	/*
682 	 * Check for security violations.
683 	 */
684 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
685 	eflags = frame.sf_sc.sc_eflags;
686 	/*
687 	 * XXX do allow users to change the privileged flag PSL_RF.  The
688 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
689 	 * sometimes set it there too.  tf_eflags is kept in the signal
690 	 * context during signal handling and there is no other place
691 	 * to remember it, so the PSL_RF bit may be corrupted by the
692 	 * signal handler without us knowing.  Corruption of the PSL_RF
693 	 * bit at worst causes one more or one less debugger trap, so
694 	 * allowing it is fairly harmless.
695 	 */
696 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF))
697 		return(EINVAL);
698 
699 	/*
700 	 * Don't allow users to load a valid privileged %cs.  Let the
701 	 * hardware check for invalid selectors, excess privilege in
702 	 * other selectors, invalid %eip's and invalid %esp's.
703 	 */
704 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
705 	if (!CS_SECURE(frame.sf_sc.sc_cs)) {
706 		ksiginfo_init_trap(&ksi);
707 		ksi.ksi_signo = SIGBUS;
708 		ksi.ksi_code = BUS_OBJERR;
709 		ksi.ksi_trapno = T_PROTFLT;
710 		ksi.ksi_addr = (void *)regs->tf_eip;
711 		trapsignal(td, &ksi);
712 		return(EINVAL);
713 	}
714 
715 	lmask.__bits[0] = frame.sf_sc.sc_mask;
716 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
717 		lmask.__bits[i+1] = frame.sf_extramask[i];
718 	linux_to_bsd_sigset(&lmask, &bmask);
719 	kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
720 
721 	/*
722 	 * Restore signal context.
723 	 */
724 	/* %gs was restored by the trampoline. */
725 	regs->tf_fs     = frame.sf_sc.sc_fs;
726 	regs->tf_es     = frame.sf_sc.sc_es;
727 	regs->tf_ds     = frame.sf_sc.sc_ds;
728 	regs->tf_edi    = frame.sf_sc.sc_edi;
729 	regs->tf_esi    = frame.sf_sc.sc_esi;
730 	regs->tf_ebp    = frame.sf_sc.sc_ebp;
731 	regs->tf_ebx    = frame.sf_sc.sc_ebx;
732 	regs->tf_edx    = frame.sf_sc.sc_edx;
733 	regs->tf_ecx    = frame.sf_sc.sc_ecx;
734 	regs->tf_eax    = frame.sf_sc.sc_eax;
735 	regs->tf_eip    = frame.sf_sc.sc_eip;
736 	regs->tf_cs     = frame.sf_sc.sc_cs;
737 	regs->tf_eflags = eflags;
738 	regs->tf_esp    = frame.sf_sc.sc_esp_at_signal;
739 	regs->tf_ss     = frame.sf_sc.sc_ss;
740 
741 	return (EJUSTRETURN);
742 }
743 
744 /*
745  * System call to cleanup state after a signal
746  * has been taken.  Reset signal mask and
747  * stack state from context left by rt_sendsig (above).
748  * Return to previous pc and psl as specified by
749  * context left by sendsig. Check carefully to
750  * make sure that the user has not modified the
751  * psl to gain improper privileges or to cause
752  * a machine fault.
753  */
754 int
755 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
756 {
757 	struct l_ucontext uc;
758 	struct l_sigcontext *context;
759 	sigset_t bmask;
760 	l_stack_t *lss;
761 	stack_t ss;
762 	struct trapframe *regs;
763 	int eflags;
764 	ksiginfo_t ksi;
765 
766 	regs = td->td_frame;
767 
768 #ifdef DEBUG
769 	if (ldebug(rt_sigreturn))
770 		printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
771 #endif
772 	/*
773 	 * The trampoline code hands us the ucontext.
774 	 * It is unsafe to keep track of it ourselves, in the event that a
775 	 * program jumps out of a signal handler.
776 	 */
777 	if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
778 		return (EFAULT);
779 
780 	context = &uc.uc_mcontext;
781 
782 	/*
783 	 * Check for security violations.
784 	 */
785 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
786 	eflags = context->sc_eflags;
787 	/*
788 	 * XXX do allow users to change the privileged flag PSL_RF.  The
789 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
790 	 * sometimes set it there too.  tf_eflags is kept in the signal
791 	 * context during signal handling and there is no other place
792 	 * to remember it, so the PSL_RF bit may be corrupted by the
793 	 * signal handler without us knowing.  Corruption of the PSL_RF
794 	 * bit at worst causes one more or one less debugger trap, so
795 	 * allowing it is fairly harmless.
796 	 */
797 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF))
798 		return(EINVAL);
799 
800 	/*
801 	 * Don't allow users to load a valid privileged %cs.  Let the
802 	 * hardware check for invalid selectors, excess privilege in
803 	 * other selectors, invalid %eip's and invalid %esp's.
804 	 */
805 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
806 	if (!CS_SECURE(context->sc_cs)) {
807 		ksiginfo_init_trap(&ksi);
808 		ksi.ksi_signo = SIGBUS;
809 		ksi.ksi_code = BUS_OBJERR;
810 		ksi.ksi_trapno = T_PROTFLT;
811 		ksi.ksi_addr = (void *)regs->tf_eip;
812 		trapsignal(td, &ksi);
813 		return(EINVAL);
814 	}
815 
816 	linux_to_bsd_sigset(&uc.uc_sigmask, &bmask);
817 	kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
818 
819 	/*
820 	 * Restore signal context
821 	 */
822 	/* %gs was restored by the trampoline. */
823 	regs->tf_fs     = context->sc_fs;
824 	regs->tf_es     = context->sc_es;
825 	regs->tf_ds     = context->sc_ds;
826 	regs->tf_edi    = context->sc_edi;
827 	regs->tf_esi    = context->sc_esi;
828 	regs->tf_ebp    = context->sc_ebp;
829 	regs->tf_ebx    = context->sc_ebx;
830 	regs->tf_edx    = context->sc_edx;
831 	regs->tf_ecx    = context->sc_ecx;
832 	regs->tf_eax    = context->sc_eax;
833 	regs->tf_eip    = context->sc_eip;
834 	regs->tf_cs     = context->sc_cs;
835 	regs->tf_eflags = eflags;
836 	regs->tf_esp    = context->sc_esp_at_signal;
837 	regs->tf_ss     = context->sc_ss;
838 
839 	/*
840 	 * call sigaltstack & ignore results..
841 	 */
842 	lss = &uc.uc_stack;
843 	ss.ss_sp = lss->ss_sp;
844 	ss.ss_size = lss->ss_size;
845 	ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
846 
847 #ifdef DEBUG
848 	if (ldebug(rt_sigreturn))
849 		printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
850 		    ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
851 #endif
852 	(void)kern_sigaltstack(td, &ss, NULL);
853 
854 	return (EJUSTRETURN);
855 }
856 
857 static int
858 linux_fetch_syscall_args(struct thread *td, struct syscall_args *sa)
859 {
860 	struct proc *p;
861 	struct trapframe *frame;
862 
863 	p = td->td_proc;
864 	frame = td->td_frame;
865 
866 	sa->code = frame->tf_eax;
867 	sa->args[0] = frame->tf_ebx;
868 	sa->args[1] = frame->tf_ecx;
869 	sa->args[2] = frame->tf_edx;
870 	sa->args[3] = frame->tf_esi;
871 	sa->args[4] = frame->tf_edi;
872 	sa->args[5] = frame->tf_ebp;	/* Unconfirmed */
873 
874 	if (sa->code >= p->p_sysent->sv_size)
875 		sa->callp = &p->p_sysent->sv_table[0];
876  	else
877  		sa->callp = &p->p_sysent->sv_table[sa->code];
878 	sa->narg = sa->callp->sy_narg;
879 
880 	td->td_retval[0] = 0;
881 	td->td_retval[1] = frame->tf_edx;
882 
883 	return (0);
884 }
885 
886 /*
887  * If a linux binary is exec'ing something, try this image activator
888  * first.  We override standard shell script execution in order to
889  * be able to modify the interpreter path.  We only do this if a linux
890  * binary is doing the exec, so we do not create an EXEC module for it.
891  */
892 static int	exec_linux_imgact_try(struct image_params *iparams);
893 
894 static int
895 exec_linux_imgact_try(struct image_params *imgp)
896 {
897     const char *head = (const char *)imgp->image_header;
898     char *rpath;
899     int error = -1;
900 
901     /*
902      * The interpreter for shell scripts run from a linux binary needs
903      * to be located in /compat/linux if possible in order to recursively
904      * maintain linux path emulation.
905      */
906     if (((const short *)head)[0] == SHELLMAGIC) {
907 	    /*
908 	     * Run our normal shell image activator.  If it succeeds attempt
909 	     * to use the alternate path for the interpreter.  If an alternate
910 	     * path is found, use our stringspace to store it.
911 	     */
912 	    if ((error = exec_shell_imgact(imgp)) == 0) {
913 		    linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
914 			imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0, AT_FDCWD);
915 		    if (rpath != NULL)
916 			    imgp->args->fname_buf =
917 				imgp->interpreter_name = rpath;
918 	    }
919     }
920     return (error);
921 }
922 
923 /*
924  * exec_setregs may initialize some registers differently than Linux
925  * does, thus potentially confusing Linux binaries. If necessary, we
926  * override the exec_setregs default(s) here.
927  */
928 static void
929 exec_linux_setregs(struct thread *td, struct image_params *imgp, u_long stack)
930 {
931 	struct pcb *pcb = td->td_pcb;
932 
933 	exec_setregs(td, imgp, stack);
934 
935 	/* Linux sets %gs to 0, we default to _udatasel */
936 	pcb->pcb_gs = 0;
937 	load_gs(0);
938 
939 	pcb->pcb_initial_npxcw = __LINUX_NPXCW__;
940 }
941 
942 static void
943 linux_get_machine(const char **dst)
944 {
945 
946 	switch (cpu_class) {
947 	case CPUCLASS_686:
948 		*dst = "i686";
949 		break;
950 	case CPUCLASS_586:
951 		*dst = "i586";
952 		break;
953 	case CPUCLASS_486:
954 		*dst = "i486";
955 		break;
956 	default:
957 		*dst = "i386";
958 	}
959 }
960 
961 struct sysentvec linux_sysvec = {
962 	.sv_size	= LINUX_SYS_MAXSYSCALL,
963 	.sv_table	= linux_sysent,
964 	.sv_mask	= 0,
965 	.sv_sigsize	= LINUX_SIGTBLSZ,
966 	.sv_sigtbl	= bsd_to_linux_signal,
967 	.sv_errsize	= ELAST + 1,
968 	.sv_errtbl	= bsd_to_linux_errno,
969 	.sv_transtrap	= translate_traps,
970 	.sv_fixup	= linux_fixup,
971 	.sv_sendsig	= linux_sendsig,
972 	.sv_sigcode	= linux_sigcode,
973 	.sv_szsigcode	= &linux_szsigcode,
974 	.sv_prepsyscall	= NULL,
975 	.sv_name	= "Linux a.out",
976 	.sv_coredump	= NULL,
977 	.sv_imgact_try	= exec_linux_imgact_try,
978 	.sv_minsigstksz	= LINUX_MINSIGSTKSZ,
979 	.sv_pagesize	= PAGE_SIZE,
980 	.sv_minuser	= VM_MIN_ADDRESS,
981 	.sv_maxuser	= VM_MAXUSER_ADDRESS,
982 	.sv_usrstack	= LINUX_USRSTACK,
983 	.sv_psstrings	= PS_STRINGS,
984 	.sv_stackprot	= VM_PROT_ALL,
985 	.sv_copyout_strings = exec_copyout_strings,
986 	.sv_setregs	= exec_linux_setregs,
987 	.sv_fixlimit	= NULL,
988 	.sv_maxssiz	= NULL,
989 	.sv_flags	= SV_ABI_LINUX | SV_AOUT | SV_IA32 | SV_ILP32,
990 	.sv_set_syscall_retval = cpu_set_syscall_retval,
991 	.sv_fetch_syscall_args = linux_fetch_syscall_args,
992 	.sv_syscallnames = NULL,
993 	.sv_shared_page_base = LINUX_SHAREDPAGE,
994 	.sv_shared_page_len = PAGE_SIZE,
995 	.sv_schedtail	= linux_schedtail,
996 };
997 INIT_SYSENTVEC(aout_sysvec, &linux_sysvec);
998 
999 struct sysentvec elf_linux_sysvec = {
1000 	.sv_size	= LINUX_SYS_MAXSYSCALL,
1001 	.sv_table	= linux_sysent,
1002 	.sv_mask	= 0,
1003 	.sv_sigsize	= LINUX_SIGTBLSZ,
1004 	.sv_sigtbl	= bsd_to_linux_signal,
1005 	.sv_errsize	= ELAST + 1,
1006 	.sv_errtbl	= bsd_to_linux_errno,
1007 	.sv_transtrap	= translate_traps,
1008 	.sv_fixup	= elf_linux_fixup,
1009 	.sv_sendsig	= linux_sendsig,
1010 	.sv_sigcode	= linux_sigcode,
1011 	.sv_szsigcode	= &linux_szsigcode,
1012 	.sv_prepsyscall	= NULL,
1013 	.sv_name	= "Linux ELF",
1014 	.sv_coredump	= elf32_coredump,
1015 	.sv_imgact_try	= exec_linux_imgact_try,
1016 	.sv_minsigstksz	= LINUX_MINSIGSTKSZ,
1017 	.sv_pagesize	= PAGE_SIZE,
1018 	.sv_minuser	= VM_MIN_ADDRESS,
1019 	.sv_maxuser	= VM_MAXUSER_ADDRESS,
1020 	.sv_usrstack	= LINUX_USRSTACK,
1021 	.sv_psstrings	= LINUX_PS_STRINGS,
1022 	.sv_stackprot	= VM_PROT_ALL,
1023 	.sv_copyout_strings = linux_copyout_strings,
1024 	.sv_setregs	= exec_linux_setregs,
1025 	.sv_fixlimit	= NULL,
1026 	.sv_maxssiz	= NULL,
1027 	.sv_flags	= SV_ABI_LINUX | SV_IA32 | SV_ILP32 | SV_SHP,
1028 	.sv_set_syscall_retval = cpu_set_syscall_retval,
1029 	.sv_fetch_syscall_args = linux_fetch_syscall_args,
1030 	.sv_syscallnames = NULL,
1031 	.sv_shared_page_base = LINUX_SHAREDPAGE,
1032 	.sv_shared_page_len = PAGE_SIZE,
1033 	.sv_schedtail	= linux_schedtail,
1034 };
1035 INIT_SYSENTVEC(elf_sysvec, &elf_linux_sysvec);
1036 
1037 static char GNU_ABI_VENDOR[] = "GNU";
1038 static int GNULINUX_ABI_DESC = 0;
1039 
1040 static boolean_t
1041 linux_trans_osrel(const Elf_Note *note, int32_t *osrel)
1042 {
1043 	const Elf32_Word *desc;
1044 	uintptr_t p;
1045 
1046 	p = (uintptr_t)(note + 1);
1047 	p += roundup2(note->n_namesz, sizeof(Elf32_Addr));
1048 
1049 	desc = (const Elf32_Word *)p;
1050 	if (desc[0] != GNULINUX_ABI_DESC)
1051 		return (FALSE);
1052 
1053 	/*
1054 	 * For linux we encode osrel as follows (see linux_mib.c):
1055 	 * VVVMMMIII (version, major, minor), see linux_mib.c.
1056 	 */
1057 	*osrel = desc[1] * 1000000 + desc[2] * 1000 + desc[3];
1058 
1059 	return (TRUE);
1060 }
1061 
1062 static Elf_Brandnote linux_brandnote = {
1063 	.hdr.n_namesz	= sizeof(GNU_ABI_VENDOR),
1064 	.hdr.n_descsz	= 16,	/* XXX at least 16 */
1065 	.hdr.n_type	= 1,
1066 	.vendor		= GNU_ABI_VENDOR,
1067 	.flags		= BN_TRANSLATE_OSREL,
1068 	.trans_osrel	= linux_trans_osrel
1069 };
1070 
1071 static Elf32_Brandinfo linux_brand = {
1072 	.brand		= ELFOSABI_LINUX,
1073 	.machine	= EM_386,
1074 	.compat_3_brand	= "Linux",
1075 	.emul_path	= "/compat/linux",
1076 	.interp_path	= "/lib/ld-linux.so.1",
1077 	.sysvec		= &elf_linux_sysvec,
1078 	.interp_newpath	= NULL,
1079 	.brand_note	= &linux_brandnote,
1080 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1081 };
1082 
1083 static Elf32_Brandinfo linux_glibc2brand = {
1084 	.brand		= ELFOSABI_LINUX,
1085 	.machine	= EM_386,
1086 	.compat_3_brand	= "Linux",
1087 	.emul_path	= "/compat/linux",
1088 	.interp_path	= "/lib/ld-linux.so.2",
1089 	.sysvec		= &elf_linux_sysvec,
1090 	.interp_newpath	= NULL,
1091 	.brand_note	= &linux_brandnote,
1092 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1093 };
1094 
1095 Elf32_Brandinfo *linux_brandlist[] = {
1096 	&linux_brand,
1097 	&linux_glibc2brand,
1098 	NULL
1099 };
1100 
1101 static int
1102 linux_elf_modevent(module_t mod, int type, void *data)
1103 {
1104 	Elf32_Brandinfo **brandinfo;
1105 	int error;
1106 	struct linux_ioctl_handler **lihp;
1107 	struct linux_device_handler **ldhp;
1108 
1109 	error = 0;
1110 
1111 	switch(type) {
1112 	case MOD_LOAD:
1113 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1114 		     ++brandinfo)
1115 			if (elf32_insert_brand_entry(*brandinfo) < 0)
1116 				error = EINVAL;
1117 		if (error == 0) {
1118 			SET_FOREACH(lihp, linux_ioctl_handler_set)
1119 				linux_ioctl_register_handler(*lihp);
1120 			SET_FOREACH(ldhp, linux_device_handler_set)
1121 				linux_device_register_handler(*ldhp);
1122 			mtx_init(&emul_lock, "emuldata lock", NULL, MTX_DEF);
1123 			sx_init(&emul_shared_lock, "emuldata->shared lock");
1124 			LIST_INIT(&futex_list);
1125 			mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF);
1126 			linux_exit_tag = EVENTHANDLER_REGISTER(process_exit, linux_proc_exit,
1127 			      NULL, 1000);
1128 			linux_exec_tag = EVENTHANDLER_REGISTER(process_exec, linux_proc_exec,
1129 			      NULL, 1000);
1130 			linux_get_machine(&linux_platform);
1131 			linux_szplatform = roundup(strlen(linux_platform) + 1,
1132 			    sizeof(char *));
1133 			linux_osd_jail_register();
1134 			stclohz = (stathz ? stathz : hz);
1135 			if (bootverbose)
1136 				printf("Linux ELF exec handler installed\n");
1137 		} else
1138 			printf("cannot insert Linux ELF brand handler\n");
1139 		break;
1140 	case MOD_UNLOAD:
1141 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1142 		     ++brandinfo)
1143 			if (elf32_brand_inuse(*brandinfo))
1144 				error = EBUSY;
1145 		if (error == 0) {
1146 			for (brandinfo = &linux_brandlist[0];
1147 			     *brandinfo != NULL; ++brandinfo)
1148 				if (elf32_remove_brand_entry(*brandinfo) < 0)
1149 					error = EINVAL;
1150 		}
1151 		if (error == 0) {
1152 			SET_FOREACH(lihp, linux_ioctl_handler_set)
1153 				linux_ioctl_unregister_handler(*lihp);
1154 			SET_FOREACH(ldhp, linux_device_handler_set)
1155 				linux_device_unregister_handler(*ldhp);
1156 			mtx_destroy(&emul_lock);
1157 			sx_destroy(&emul_shared_lock);
1158 			mtx_destroy(&futex_mtx);
1159 			EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag);
1160 			EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag);
1161 			linux_osd_jail_deregister();
1162 			if (bootverbose)
1163 				printf("Linux ELF exec handler removed\n");
1164 		} else
1165 			printf("Could not deinstall ELF interpreter entry\n");
1166 		break;
1167 	default:
1168 		return EOPNOTSUPP;
1169 	}
1170 	return error;
1171 }
1172 
1173 static moduledata_t linux_elf_mod = {
1174 	"linuxelf",
1175 	linux_elf_modevent,
1176 	0
1177 };
1178 
1179 DECLARE_MODULE_TIED(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
1180