xref: /freebsd/sys/i386/linux/linux_sysvec.c (revision af682d487b6c8ebba4858b2a6578b795c885e15b)
1 /*-
2  * Copyright (c) 1994-1996 Søren Schmidt
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software without specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/exec.h>
35 #include <sys/fcntl.h>
36 #include <sys/imgact.h>
37 #include <sys/imgact_aout.h>
38 #include <sys/imgact_elf.h>
39 #include <sys/kernel.h>
40 #include <sys/lock.h>
41 #include <sys/malloc.h>
42 #include <sys/module.h>
43 #include <sys/mutex.h>
44 #include <sys/proc.h>
45 #include <sys/signalvar.h>
46 #include <sys/syscallsubr.h>
47 #include <sys/sysent.h>
48 #include <sys/sysproto.h>
49 #include <sys/vnode.h>
50 #include <sys/eventhandler.h>
51 
52 #include <vm/vm.h>
53 #include <vm/pmap.h>
54 #include <vm/vm_extern.h>
55 #include <vm/vm_map.h>
56 #include <vm/vm_object.h>
57 #include <vm/vm_page.h>
58 #include <vm/vm_param.h>
59 
60 #include <machine/cpu.h>
61 #include <machine/cputypes.h>
62 #include <machine/md_var.h>
63 #include <machine/pcb.h>
64 
65 #include <i386/linux/linux.h>
66 #include <i386/linux/linux_proto.h>
67 #include <compat/linux/linux_emul.h>
68 #include <compat/linux/linux_futex.h>
69 #include <compat/linux/linux_ioctl.h>
70 #include <compat/linux/linux_mib.h>
71 #include <compat/linux/linux_misc.h>
72 #include <compat/linux/linux_signal.h>
73 #include <compat/linux/linux_util.h>
74 
75 MODULE_VERSION(linux, 1);
76 
77 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
78 
79 #if BYTE_ORDER == LITTLE_ENDIAN
80 #define SHELLMAGIC      0x2123 /* #! */
81 #else
82 #define SHELLMAGIC      0x2321
83 #endif
84 
85 /*
86  * Allow the sendsig functions to use the ldebug() facility
87  * even though they are not syscalls themselves. Map them
88  * to syscall 0. This is slightly less bogus than using
89  * ldebug(sigreturn).
90  */
91 #define	LINUX_SYS_linux_rt_sendsig	0
92 #define	LINUX_SYS_linux_sendsig		0
93 
94 #define	LINUX_PS_STRINGS	(LINUX_USRSTACK - sizeof(struct ps_strings))
95 
96 extern char linux_sigcode[];
97 extern int linux_szsigcode;
98 
99 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
100 
101 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
102 SET_DECLARE(linux_device_handler_set, struct linux_device_handler);
103 
104 static int	linux_fixup(register_t **stack_base,
105 		    struct image_params *iparams);
106 static int	elf_linux_fixup(register_t **stack_base,
107 		    struct image_params *iparams);
108 static void     linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
109 static void	exec_linux_setregs(struct thread *td,
110 		    struct image_params *imgp, u_long stack);
111 static register_t *linux_copyout_strings(struct image_params *imgp);
112 static boolean_t linux_trans_osrel(const Elf_Note *note, int32_t *osrel);
113 
114 static int linux_szplatform;
115 const char *linux_platform;
116 
117 static eventhandler_tag linux_exit_tag;
118 static eventhandler_tag linux_exec_tag;
119 static eventhandler_tag linux_thread_dtor_tag;
120 
121 /*
122  * Linux syscalls return negative errno's, we do positive and map them
123  * Reference:
124  *   FreeBSD: src/sys/sys/errno.h
125  *   Linux:   linux-2.6.17.8/include/asm-generic/errno-base.h
126  *            linux-2.6.17.8/include/asm-generic/errno.h
127  */
128 static int bsd_to_linux_errno[ELAST + 1] = {
129 	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
130 	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
131 	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
132 	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
133 	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
134 	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
135 	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
136 	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
137 	  -6,  -6, -43, -42, -75,-125, -84, -95, -16, -74,
138 	 -72, -67, -71
139 };
140 
141 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
142 	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
143 	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
144 	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
145 	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
146 	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
147 	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
148 	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
149 	0, LINUX_SIGUSR1, LINUX_SIGUSR2
150 };
151 
152 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
153 	SIGHUP, SIGINT, SIGQUIT, SIGILL,
154 	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
155 	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
156 	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
157 	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
158 	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
159 	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
160 	SIGIO, SIGURG, SIGSYS
161 };
162 
163 #define LINUX_T_UNKNOWN  255
164 static int _bsd_to_linux_trapcode[] = {
165 	LINUX_T_UNKNOWN,	/* 0 */
166 	6,			/* 1  T_PRIVINFLT */
167 	LINUX_T_UNKNOWN,	/* 2 */
168 	3,			/* 3  T_BPTFLT */
169 	LINUX_T_UNKNOWN,	/* 4 */
170 	LINUX_T_UNKNOWN,	/* 5 */
171 	16,			/* 6  T_ARITHTRAP */
172 	254,			/* 7  T_ASTFLT */
173 	LINUX_T_UNKNOWN,	/* 8 */
174 	13,			/* 9  T_PROTFLT */
175 	1,			/* 10 T_TRCTRAP */
176 	LINUX_T_UNKNOWN,	/* 11 */
177 	14,			/* 12 T_PAGEFLT */
178 	LINUX_T_UNKNOWN,	/* 13 */
179 	17,			/* 14 T_ALIGNFLT */
180 	LINUX_T_UNKNOWN,	/* 15 */
181 	LINUX_T_UNKNOWN,	/* 16 */
182 	LINUX_T_UNKNOWN,	/* 17 */
183 	0,			/* 18 T_DIVIDE */
184 	2,			/* 19 T_NMI */
185 	4,			/* 20 T_OFLOW */
186 	5,			/* 21 T_BOUND */
187 	7,			/* 22 T_DNA */
188 	8,			/* 23 T_DOUBLEFLT */
189 	9,			/* 24 T_FPOPFLT */
190 	10,			/* 25 T_TSSFLT */
191 	11,			/* 26 T_SEGNPFLT */
192 	12,			/* 27 T_STKFLT */
193 	18,			/* 28 T_MCHK */
194 	19,			/* 29 T_XMMFLT */
195 	15			/* 30 T_RESERVED */
196 };
197 #define bsd_to_linux_trapcode(code) \
198     ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
199      _bsd_to_linux_trapcode[(code)]: \
200      LINUX_T_UNKNOWN)
201 
202 /*
203  * If FreeBSD & Linux have a difference of opinion about what a trap
204  * means, deal with it here.
205  *
206  * MPSAFE
207  */
208 static int
209 translate_traps(int signal, int trap_code)
210 {
211 	if (signal != SIGBUS)
212 		return (signal);
213 	switch (trap_code) {
214 	case T_PROTFLT:
215 	case T_TSSFLT:
216 	case T_DOUBLEFLT:
217 	case T_PAGEFLT:
218 		return (SIGSEGV);
219 	default:
220 		return (signal);
221 	}
222 }
223 
224 static int
225 linux_fixup(register_t **stack_base, struct image_params *imgp)
226 {
227 	register_t *argv, *envp;
228 
229 	argv = *stack_base;
230 	envp = *stack_base + (imgp->args->argc + 1);
231 	(*stack_base)--;
232 	suword(*stack_base, (intptr_t)(void *)envp);
233 	(*stack_base)--;
234 	suword(*stack_base, (intptr_t)(void *)argv);
235 	(*stack_base)--;
236 	suword(*stack_base, imgp->args->argc);
237 	return (0);
238 }
239 
240 static int
241 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
242 {
243 	struct proc *p;
244 	Elf32_Auxargs *args;
245 	Elf32_Addr *uplatform;
246 	struct ps_strings *arginfo;
247 	register_t *pos;
248 
249 	KASSERT(curthread->td_proc == imgp->proc,
250 	    ("unsafe elf_linux_fixup(), should be curproc"));
251 
252 	p = imgp->proc;
253 	arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
254 	uplatform = (Elf32_Addr *)((caddr_t)arginfo - linux_szplatform);
255 	args = (Elf32_Auxargs *)imgp->auxargs;
256 	pos = *stack_base + (imgp->args->argc + imgp->args->envc + 2);
257 
258 	AUXARGS_ENTRY(pos, LINUX_AT_HWCAP, cpu_feature);
259 
260 	/*
261 	 * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0,
262 	 * as it has appeared in the 2.4.0-rc7 first time.
263 	 * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK),
264 	 * glibc falls back to the hard-coded CLK_TCK value when aux entry
265 	 * is not present.
266 	 * Also see linux_times() implementation.
267 	 */
268 	if (linux_kernver(curthread) >= LINUX_KERNVER_2004000)
269 		AUXARGS_ENTRY(pos, LINUX_AT_CLKTCK, stclohz);
270 	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
271 	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
272 	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
273 	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
274 	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
275 	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
276 	AUXARGS_ENTRY(pos, AT_BASE, args->base);
277 	AUXARGS_ENTRY(pos, LINUX_AT_SECURE, 0);
278 	AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
279 	AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
280 	AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
281 	AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
282 	AUXARGS_ENTRY(pos, LINUX_AT_PLATFORM, PTROUT(uplatform));
283 	if (args->execfd != -1)
284 		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
285 	AUXARGS_ENTRY(pos, AT_NULL, 0);
286 
287 	free(imgp->auxargs, M_TEMP);
288 	imgp->auxargs = NULL;
289 
290 	(*stack_base)--;
291 	suword(*stack_base, (register_t)imgp->args->argc);
292 	return (0);
293 }
294 
295 /*
296  * Copied from kern/kern_exec.c
297  */
298 static register_t *
299 linux_copyout_strings(struct image_params *imgp)
300 {
301 	int argc, envc;
302 	char **vectp;
303 	char *stringp, *destp;
304 	register_t *stack_base;
305 	struct ps_strings *arginfo;
306 	struct proc *p;
307 
308 	/*
309 	 * Calculate string base and vector table pointers.
310 	 * Also deal with signal trampoline code for this exec type.
311 	 */
312 	p = imgp->proc;
313 	arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
314 	destp = (caddr_t)arginfo - SPARE_USRSPACE - linux_szplatform -
315 	    roundup((ARG_MAX - imgp->args->stringspace), sizeof(char *));
316 
317 	/*
318 	 * install LINUX_PLATFORM
319 	 */
320 	copyout(linux_platform, ((caddr_t)arginfo - linux_szplatform),
321 	    linux_szplatform);
322 
323 	/*
324 	 * If we have a valid auxargs ptr, prepare some room
325 	 * on the stack.
326 	 */
327 	if (imgp->auxargs) {
328 		/*
329 		 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
330 		 * lower compatibility.
331 		 */
332 		imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size :
333 		    (LINUX_AT_COUNT * 2);
334 		/*
335 		 * The '+ 2' is for the null pointers at the end of each of
336 		 * the arg and env vector sets,and imgp->auxarg_size is room
337 		 * for argument of Runtime loader.
338 		 */
339 		vectp = (char **)(destp - (imgp->args->argc +
340 		    imgp->args->envc + 2 + imgp->auxarg_size) * sizeof(char *));
341 	} else {
342 		/*
343 		 * The '+ 2' is for the null pointers at the end of each of
344 		 * the arg and env vector sets
345 		 */
346 		vectp = (char **)(destp - (imgp->args->argc + imgp->args->envc + 2) *
347 		    sizeof(char *));
348 	}
349 
350 	/*
351 	 * vectp also becomes our initial stack base
352 	 */
353 	stack_base = (register_t *)vectp;
354 
355 	stringp = imgp->args->begin_argv;
356 	argc = imgp->args->argc;
357 	envc = imgp->args->envc;
358 
359 	/*
360 	 * Copy out strings - arguments and environment.
361 	 */
362 	copyout(stringp, destp, ARG_MAX - imgp->args->stringspace);
363 
364 	/*
365 	 * Fill in "ps_strings" struct for ps, w, etc.
366 	 */
367 	suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp);
368 	suword(&arginfo->ps_nargvstr, argc);
369 
370 	/*
371 	 * Fill in argument portion of vector table.
372 	 */
373 	for (; argc > 0; --argc) {
374 		suword(vectp++, (long)(intptr_t)destp);
375 		while (*stringp++ != 0)
376 			destp++;
377 		destp++;
378 	}
379 
380 	/* a null vector table pointer separates the argp's from the envp's */
381 	suword(vectp++, 0);
382 
383 	suword(&arginfo->ps_envstr, (long)(intptr_t)vectp);
384 	suword(&arginfo->ps_nenvstr, envc);
385 
386 	/*
387 	 * Fill in environment portion of vector table.
388 	 */
389 	for (; envc > 0; --envc) {
390 		suword(vectp++, (long)(intptr_t)destp);
391 		while (*stringp++ != 0)
392 			destp++;
393 		destp++;
394 	}
395 
396 	/* end of vector table is a null pointer */
397 	suword(vectp, 0);
398 
399 	return (stack_base);
400 }
401 
402 
403 
404 extern unsigned long linux_sznonrtsigcode;
405 
406 static void
407 linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
408 {
409 	struct thread *td = curthread;
410 	struct proc *p = td->td_proc;
411 	struct sigacts *psp;
412 	struct trapframe *regs;
413 	struct l_rt_sigframe *fp, frame;
414 	int sig, code;
415 	int oonstack;
416 
417 	sig = ksi->ksi_signo;
418 	code = ksi->ksi_code;
419 	PROC_LOCK_ASSERT(p, MA_OWNED);
420 	psp = p->p_sigacts;
421 	mtx_assert(&psp->ps_mtx, MA_OWNED);
422 	regs = td->td_frame;
423 	oonstack = sigonstack(regs->tf_esp);
424 
425 #ifdef DEBUG
426 	if (ldebug(rt_sendsig))
427 		printf(ARGS(rt_sendsig, "%p, %d, %p, %u"),
428 		    catcher, sig, (void*)mask, code);
429 #endif
430 	/*
431 	 * Allocate space for the signal handler context.
432 	 */
433 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
434 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
435 		fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
436 		    td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
437 	} else
438 		fp = (struct l_rt_sigframe *)regs->tf_esp - 1;
439 	mtx_unlock(&psp->ps_mtx);
440 
441 	/*
442 	 * Build the argument list for the signal handler.
443 	 */
444 	if (p->p_sysent->sv_sigtbl)
445 		if (sig <= p->p_sysent->sv_sigsize)
446 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
447 
448 	bzero(&frame, sizeof(frame));
449 
450 	frame.sf_handler = catcher;
451 	frame.sf_sig = sig;
452 	frame.sf_siginfo = &fp->sf_si;
453 	frame.sf_ucontext = &fp->sf_sc;
454 
455 	/* Fill in POSIX parts */
456 	ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig);
457 
458 	/*
459 	 * Build the signal context to be used by sigreturn.
460 	 */
461 	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
462 	frame.sf_sc.uc_link = NULL;		/* XXX ??? */
463 
464 	frame.sf_sc.uc_stack.ss_sp = td->td_sigstk.ss_sp;
465 	frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
466 	frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
467 	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
468 	PROC_UNLOCK(p);
469 
470 	bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
471 
472 	frame.sf_sc.uc_mcontext.sc_mask   = frame.sf_sc.uc_sigmask.__bits[0];
473 	frame.sf_sc.uc_mcontext.sc_gs     = rgs();
474 	frame.sf_sc.uc_mcontext.sc_fs     = regs->tf_fs;
475 	frame.sf_sc.uc_mcontext.sc_es     = regs->tf_es;
476 	frame.sf_sc.uc_mcontext.sc_ds     = regs->tf_ds;
477 	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_edi;
478 	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_esi;
479 	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_ebp;
480 	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_ebx;
481 	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_edx;
482 	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_ecx;
483 	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_eax;
484 	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_eip;
485 	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
486 	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
487 	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
488 	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
489 	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
490 	frame.sf_sc.uc_mcontext.sc_cr2    = (register_t)ksi->ksi_addr;
491 	frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
492 
493 #ifdef DEBUG
494 	if (ldebug(rt_sendsig))
495 		printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
496 		    frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
497 		    td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
498 #endif
499 
500 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
501 		/*
502 		 * Process has trashed its stack; give it an illegal
503 		 * instruction to halt it in its tracks.
504 		 */
505 #ifdef DEBUG
506 		if (ldebug(rt_sendsig))
507 			printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
508 			    fp, oonstack);
509 #endif
510 		PROC_LOCK(p);
511 		sigexit(td, SIGILL);
512 	}
513 
514 	/*
515 	 * Build context to run handler in.
516 	 */
517 	regs->tf_esp = (int)fp;
518 	regs->tf_eip = p->p_sysent->sv_sigcode_base + linux_sznonrtsigcode;
519 	regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
520 	regs->tf_cs = _ucodesel;
521 	regs->tf_ds = _udatasel;
522 	regs->tf_es = _udatasel;
523 	regs->tf_fs = _udatasel;
524 	regs->tf_ss = _udatasel;
525 	PROC_LOCK(p);
526 	mtx_lock(&psp->ps_mtx);
527 }
528 
529 
530 /*
531  * Send an interrupt to process.
532  *
533  * Stack is set up to allow sigcode stored
534  * in u. to call routine, followed by kcall
535  * to sigreturn routine below.  After sigreturn
536  * resets the signal mask, the stack, and the
537  * frame pointer, it returns to the user
538  * specified pc, psl.
539  */
540 static void
541 linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
542 {
543 	struct thread *td = curthread;
544 	struct proc *p = td->td_proc;
545 	struct sigacts *psp;
546 	struct trapframe *regs;
547 	struct l_sigframe *fp, frame;
548 	l_sigset_t lmask;
549 	int sig, code;
550 	int oonstack, i;
551 
552 	PROC_LOCK_ASSERT(p, MA_OWNED);
553 	psp = p->p_sigacts;
554 	sig = ksi->ksi_signo;
555 	code = ksi->ksi_code;
556 	mtx_assert(&psp->ps_mtx, MA_OWNED);
557 	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
558 		/* Signal handler installed with SA_SIGINFO. */
559 		linux_rt_sendsig(catcher, ksi, mask);
560 		return;
561 	}
562 	regs = td->td_frame;
563 	oonstack = sigonstack(regs->tf_esp);
564 
565 #ifdef DEBUG
566 	if (ldebug(sendsig))
567 		printf(ARGS(sendsig, "%p, %d, %p, %u"),
568 		    catcher, sig, (void*)mask, code);
569 #endif
570 
571 	/*
572 	 * Allocate space for the signal handler context.
573 	 */
574 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
575 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
576 		fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
577 		    td->td_sigstk.ss_size - sizeof(struct l_sigframe));
578 	} else
579 		fp = (struct l_sigframe *)regs->tf_esp - 1;
580 	mtx_unlock(&psp->ps_mtx);
581 	PROC_UNLOCK(p);
582 
583 	/*
584 	 * Build the argument list for the signal handler.
585 	 */
586 	if (p->p_sysent->sv_sigtbl)
587 		if (sig <= p->p_sysent->sv_sigsize)
588 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
589 
590 	bzero(&frame, sizeof(frame));
591 
592 	frame.sf_handler = catcher;
593 	frame.sf_sig = sig;
594 
595 	bsd_to_linux_sigset(mask, &lmask);
596 
597 	/*
598 	 * Build the signal context to be used by sigreturn.
599 	 */
600 	frame.sf_sc.sc_mask   = lmask.__bits[0];
601 	frame.sf_sc.sc_gs     = rgs();
602 	frame.sf_sc.sc_fs     = regs->tf_fs;
603 	frame.sf_sc.sc_es     = regs->tf_es;
604 	frame.sf_sc.sc_ds     = regs->tf_ds;
605 	frame.sf_sc.sc_edi    = regs->tf_edi;
606 	frame.sf_sc.sc_esi    = regs->tf_esi;
607 	frame.sf_sc.sc_ebp    = regs->tf_ebp;
608 	frame.sf_sc.sc_ebx    = regs->tf_ebx;
609 	frame.sf_sc.sc_edx    = regs->tf_edx;
610 	frame.sf_sc.sc_ecx    = regs->tf_ecx;
611 	frame.sf_sc.sc_eax    = regs->tf_eax;
612 	frame.sf_sc.sc_eip    = regs->tf_eip;
613 	frame.sf_sc.sc_cs     = regs->tf_cs;
614 	frame.sf_sc.sc_eflags = regs->tf_eflags;
615 	frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
616 	frame.sf_sc.sc_ss     = regs->tf_ss;
617 	frame.sf_sc.sc_err    = regs->tf_err;
618 	frame.sf_sc.sc_cr2    = (register_t)ksi->ksi_addr;
619 	frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(ksi->ksi_trapno);
620 
621 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
622 		frame.sf_extramask[i] = lmask.__bits[i+1];
623 
624 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
625 		/*
626 		 * Process has trashed its stack; give it an illegal
627 		 * instruction to halt it in its tracks.
628 		 */
629 		PROC_LOCK(p);
630 		sigexit(td, SIGILL);
631 	}
632 
633 	/*
634 	 * Build context to run handler in.
635 	 */
636 	regs->tf_esp = (int)fp;
637 	regs->tf_eip = p->p_sysent->sv_sigcode_base;
638 	regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
639 	regs->tf_cs = _ucodesel;
640 	regs->tf_ds = _udatasel;
641 	regs->tf_es = _udatasel;
642 	regs->tf_fs = _udatasel;
643 	regs->tf_ss = _udatasel;
644 	PROC_LOCK(p);
645 	mtx_lock(&psp->ps_mtx);
646 }
647 
648 /*
649  * System call to cleanup state after a signal
650  * has been taken.  Reset signal mask and
651  * stack state from context left by sendsig (above).
652  * Return to previous pc and psl as specified by
653  * context left by sendsig. Check carefully to
654  * make sure that the user has not modified the
655  * psl to gain improper privileges or to cause
656  * a machine fault.
657  */
658 int
659 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
660 {
661 	struct l_sigframe frame;
662 	struct trapframe *regs;
663 	l_sigset_t lmask;
664 	sigset_t bmask;
665 	int eflags, i;
666 	ksiginfo_t ksi;
667 
668 	regs = td->td_frame;
669 
670 #ifdef DEBUG
671 	if (ldebug(sigreturn))
672 		printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
673 #endif
674 	/*
675 	 * The trampoline code hands us the sigframe.
676 	 * It is unsafe to keep track of it ourselves, in the event that a
677 	 * program jumps out of a signal handler.
678 	 */
679 	if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
680 		return (EFAULT);
681 
682 	/*
683 	 * Check for security violations.
684 	 */
685 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
686 	eflags = frame.sf_sc.sc_eflags;
687 	if (!EFLAGS_SECURE(eflags, regs->tf_eflags))
688 		return (EINVAL);
689 
690 	/*
691 	 * Don't allow users to load a valid privileged %cs.  Let the
692 	 * hardware check for invalid selectors, excess privilege in
693 	 * other selectors, invalid %eip's and invalid %esp's.
694 	 */
695 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
696 	if (!CS_SECURE(frame.sf_sc.sc_cs)) {
697 		ksiginfo_init_trap(&ksi);
698 		ksi.ksi_signo = SIGBUS;
699 		ksi.ksi_code = BUS_OBJERR;
700 		ksi.ksi_trapno = T_PROTFLT;
701 		ksi.ksi_addr = (void *)regs->tf_eip;
702 		trapsignal(td, &ksi);
703 		return (EINVAL);
704 	}
705 
706 	lmask.__bits[0] = frame.sf_sc.sc_mask;
707 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
708 		lmask.__bits[i+1] = frame.sf_extramask[i];
709 	linux_to_bsd_sigset(&lmask, &bmask);
710 	kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
711 
712 	/*
713 	 * Restore signal context.
714 	 */
715 	/* %gs was restored by the trampoline. */
716 	regs->tf_fs     = frame.sf_sc.sc_fs;
717 	regs->tf_es     = frame.sf_sc.sc_es;
718 	regs->tf_ds     = frame.sf_sc.sc_ds;
719 	regs->tf_edi    = frame.sf_sc.sc_edi;
720 	regs->tf_esi    = frame.sf_sc.sc_esi;
721 	regs->tf_ebp    = frame.sf_sc.sc_ebp;
722 	regs->tf_ebx    = frame.sf_sc.sc_ebx;
723 	regs->tf_edx    = frame.sf_sc.sc_edx;
724 	regs->tf_ecx    = frame.sf_sc.sc_ecx;
725 	regs->tf_eax    = frame.sf_sc.sc_eax;
726 	regs->tf_eip    = frame.sf_sc.sc_eip;
727 	regs->tf_cs     = frame.sf_sc.sc_cs;
728 	regs->tf_eflags = eflags;
729 	regs->tf_esp    = frame.sf_sc.sc_esp_at_signal;
730 	regs->tf_ss     = frame.sf_sc.sc_ss;
731 
732 	return (EJUSTRETURN);
733 }
734 
735 /*
736  * System call to cleanup state after a signal
737  * has been taken.  Reset signal mask and
738  * stack state from context left by rt_sendsig (above).
739  * Return to previous pc and psl as specified by
740  * context left by sendsig. Check carefully to
741  * make sure that the user has not modified the
742  * psl to gain improper privileges or to cause
743  * a machine fault.
744  */
745 int
746 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
747 {
748 	struct l_ucontext uc;
749 	struct l_sigcontext *context;
750 	sigset_t bmask;
751 	l_stack_t *lss;
752 	stack_t ss;
753 	struct trapframe *regs;
754 	int eflags;
755 	ksiginfo_t ksi;
756 
757 	regs = td->td_frame;
758 
759 #ifdef DEBUG
760 	if (ldebug(rt_sigreturn))
761 		printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
762 #endif
763 	/*
764 	 * The trampoline code hands us the ucontext.
765 	 * It is unsafe to keep track of it ourselves, in the event that a
766 	 * program jumps out of a signal handler.
767 	 */
768 	if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
769 		return (EFAULT);
770 
771 	context = &uc.uc_mcontext;
772 
773 	/*
774 	 * Check for security violations.
775 	 */
776 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
777 	eflags = context->sc_eflags;
778 	if (!EFLAGS_SECURE(eflags, regs->tf_eflags))
779 		return (EINVAL);
780 
781 	/*
782 	 * Don't allow users to load a valid privileged %cs.  Let the
783 	 * hardware check for invalid selectors, excess privilege in
784 	 * other selectors, invalid %eip's and invalid %esp's.
785 	 */
786 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
787 	if (!CS_SECURE(context->sc_cs)) {
788 		ksiginfo_init_trap(&ksi);
789 		ksi.ksi_signo = SIGBUS;
790 		ksi.ksi_code = BUS_OBJERR;
791 		ksi.ksi_trapno = T_PROTFLT;
792 		ksi.ksi_addr = (void *)regs->tf_eip;
793 		trapsignal(td, &ksi);
794 		return (EINVAL);
795 	}
796 
797 	linux_to_bsd_sigset(&uc.uc_sigmask, &bmask);
798 	kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
799 
800 	/*
801 	 * Restore signal context
802 	 */
803 	/* %gs was restored by the trampoline. */
804 	regs->tf_fs     = context->sc_fs;
805 	regs->tf_es     = context->sc_es;
806 	regs->tf_ds     = context->sc_ds;
807 	regs->tf_edi    = context->sc_edi;
808 	regs->tf_esi    = context->sc_esi;
809 	regs->tf_ebp    = context->sc_ebp;
810 	regs->tf_ebx    = context->sc_ebx;
811 	regs->tf_edx    = context->sc_edx;
812 	regs->tf_ecx    = context->sc_ecx;
813 	regs->tf_eax    = context->sc_eax;
814 	regs->tf_eip    = context->sc_eip;
815 	regs->tf_cs     = context->sc_cs;
816 	regs->tf_eflags = eflags;
817 	regs->tf_esp    = context->sc_esp_at_signal;
818 	regs->tf_ss     = context->sc_ss;
819 
820 	/*
821 	 * call sigaltstack & ignore results..
822 	 */
823 	lss = &uc.uc_stack;
824 	ss.ss_sp = lss->ss_sp;
825 	ss.ss_size = lss->ss_size;
826 	ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
827 
828 #ifdef DEBUG
829 	if (ldebug(rt_sigreturn))
830 		printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
831 		    ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
832 #endif
833 	(void)kern_sigaltstack(td, &ss, NULL);
834 
835 	return (EJUSTRETURN);
836 }
837 
838 static int
839 linux_fetch_syscall_args(struct thread *td, struct syscall_args *sa)
840 {
841 	struct proc *p;
842 	struct trapframe *frame;
843 
844 	p = td->td_proc;
845 	frame = td->td_frame;
846 
847 	sa->code = frame->tf_eax;
848 	sa->args[0] = frame->tf_ebx;
849 	sa->args[1] = frame->tf_ecx;
850 	sa->args[2] = frame->tf_edx;
851 	sa->args[3] = frame->tf_esi;
852 	sa->args[4] = frame->tf_edi;
853 	sa->args[5] = frame->tf_ebp;	/* Unconfirmed */
854 
855 	if (sa->code >= p->p_sysent->sv_size)
856 		sa->callp = &p->p_sysent->sv_table[0];
857  	else
858  		sa->callp = &p->p_sysent->sv_table[sa->code];
859 	sa->narg = sa->callp->sy_narg;
860 
861 	td->td_retval[0] = 0;
862 	td->td_retval[1] = frame->tf_edx;
863 
864 	return (0);
865 }
866 
867 /*
868  * If a linux binary is exec'ing something, try this image activator
869  * first.  We override standard shell script execution in order to
870  * be able to modify the interpreter path.  We only do this if a linux
871  * binary is doing the exec, so we do not create an EXEC module for it.
872  */
873 static int	exec_linux_imgact_try(struct image_params *iparams);
874 
875 static int
876 exec_linux_imgact_try(struct image_params *imgp)
877 {
878     const char *head = (const char *)imgp->image_header;
879     char *rpath;
880     int error = -1;
881 
882     /*
883      * The interpreter for shell scripts run from a linux binary needs
884      * to be located in /compat/linux if possible in order to recursively
885      * maintain linux path emulation.
886      */
887     if (((const short *)head)[0] == SHELLMAGIC) {
888 	    /*
889 	     * Run our normal shell image activator.  If it succeeds attempt
890 	     * to use the alternate path for the interpreter.  If an alternate
891 	     * path is found, use our stringspace to store it.
892 	     */
893 	    if ((error = exec_shell_imgact(imgp)) == 0) {
894 		    linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
895 			imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0, AT_FDCWD);
896 		    if (rpath != NULL)
897 			    imgp->args->fname_buf =
898 				imgp->interpreter_name = rpath;
899 	    }
900     }
901     return (error);
902 }
903 
904 /*
905  * exec_setregs may initialize some registers differently than Linux
906  * does, thus potentially confusing Linux binaries. If necessary, we
907  * override the exec_setregs default(s) here.
908  */
909 static void
910 exec_linux_setregs(struct thread *td, struct image_params *imgp, u_long stack)
911 {
912 	struct pcb *pcb = td->td_pcb;
913 
914 	exec_setregs(td, imgp, stack);
915 
916 	/* Linux sets %gs to 0, we default to _udatasel */
917 	pcb->pcb_gs = 0;
918 	load_gs(0);
919 
920 	pcb->pcb_initial_npxcw = __LINUX_NPXCW__;
921 }
922 
923 static void
924 linux_get_machine(const char **dst)
925 {
926 
927 	switch (cpu_class) {
928 	case CPUCLASS_686:
929 		*dst = "i686";
930 		break;
931 	case CPUCLASS_586:
932 		*dst = "i586";
933 		break;
934 	case CPUCLASS_486:
935 		*dst = "i486";
936 		break;
937 	default:
938 		*dst = "i386";
939 	}
940 }
941 
942 struct sysentvec linux_sysvec = {
943 	.sv_size	= LINUX_SYS_MAXSYSCALL,
944 	.sv_table	= linux_sysent,
945 	.sv_mask	= 0,
946 	.sv_sigsize	= LINUX_SIGTBLSZ,
947 	.sv_sigtbl	= bsd_to_linux_signal,
948 	.sv_errsize	= ELAST + 1,
949 	.sv_errtbl	= bsd_to_linux_errno,
950 	.sv_transtrap	= translate_traps,
951 	.sv_fixup	= linux_fixup,
952 	.sv_sendsig	= linux_sendsig,
953 	.sv_sigcode	= linux_sigcode,
954 	.sv_szsigcode	= &linux_szsigcode,
955 	.sv_prepsyscall	= NULL,
956 	.sv_name	= "Linux a.out",
957 	.sv_coredump	= NULL,
958 	.sv_imgact_try	= exec_linux_imgact_try,
959 	.sv_minsigstksz	= LINUX_MINSIGSTKSZ,
960 	.sv_pagesize	= PAGE_SIZE,
961 	.sv_minuser	= VM_MIN_ADDRESS,
962 	.sv_maxuser	= VM_MAXUSER_ADDRESS,
963 	.sv_usrstack	= LINUX_USRSTACK,
964 	.sv_psstrings	= PS_STRINGS,
965 	.sv_stackprot	= VM_PROT_ALL,
966 	.sv_copyout_strings = exec_copyout_strings,
967 	.sv_setregs	= exec_linux_setregs,
968 	.sv_fixlimit	= NULL,
969 	.sv_maxssiz	= NULL,
970 	.sv_flags	= SV_ABI_LINUX | SV_AOUT | SV_IA32 | SV_ILP32,
971 	.sv_set_syscall_retval = cpu_set_syscall_retval,
972 	.sv_fetch_syscall_args = linux_fetch_syscall_args,
973 	.sv_syscallnames = NULL,
974 	.sv_shared_page_base = LINUX_SHAREDPAGE,
975 	.sv_shared_page_len = PAGE_SIZE,
976 	.sv_schedtail	= linux_schedtail,
977 	.sv_thread_detach = linux_thread_detach,
978 };
979 INIT_SYSENTVEC(aout_sysvec, &linux_sysvec);
980 
981 struct sysentvec elf_linux_sysvec = {
982 	.sv_size	= LINUX_SYS_MAXSYSCALL,
983 	.sv_table	= linux_sysent,
984 	.sv_mask	= 0,
985 	.sv_sigsize	= LINUX_SIGTBLSZ,
986 	.sv_sigtbl	= bsd_to_linux_signal,
987 	.sv_errsize	= ELAST + 1,
988 	.sv_errtbl	= bsd_to_linux_errno,
989 	.sv_transtrap	= translate_traps,
990 	.sv_fixup	= elf_linux_fixup,
991 	.sv_sendsig	= linux_sendsig,
992 	.sv_sigcode	= linux_sigcode,
993 	.sv_szsigcode	= &linux_szsigcode,
994 	.sv_prepsyscall	= NULL,
995 	.sv_name	= "Linux ELF",
996 	.sv_coredump	= elf32_coredump,
997 	.sv_imgact_try	= exec_linux_imgact_try,
998 	.sv_minsigstksz	= LINUX_MINSIGSTKSZ,
999 	.sv_pagesize	= PAGE_SIZE,
1000 	.sv_minuser	= VM_MIN_ADDRESS,
1001 	.sv_maxuser	= VM_MAXUSER_ADDRESS,
1002 	.sv_usrstack	= LINUX_USRSTACK,
1003 	.sv_psstrings	= LINUX_PS_STRINGS,
1004 	.sv_stackprot	= VM_PROT_ALL,
1005 	.sv_copyout_strings = linux_copyout_strings,
1006 	.sv_setregs	= exec_linux_setregs,
1007 	.sv_fixlimit	= NULL,
1008 	.sv_maxssiz	= NULL,
1009 	.sv_flags	= SV_ABI_LINUX | SV_IA32 | SV_ILP32 | SV_SHP,
1010 	.sv_set_syscall_retval = cpu_set_syscall_retval,
1011 	.sv_fetch_syscall_args = linux_fetch_syscall_args,
1012 	.sv_syscallnames = NULL,
1013 	.sv_shared_page_base = LINUX_SHAREDPAGE,
1014 	.sv_shared_page_len = PAGE_SIZE,
1015 	.sv_schedtail	= linux_schedtail,
1016 	.sv_thread_detach = linux_thread_detach,
1017 };
1018 INIT_SYSENTVEC(elf_sysvec, &elf_linux_sysvec);
1019 
1020 static char GNU_ABI_VENDOR[] = "GNU";
1021 static int GNULINUX_ABI_DESC = 0;
1022 
1023 static boolean_t
1024 linux_trans_osrel(const Elf_Note *note, int32_t *osrel)
1025 {
1026 	const Elf32_Word *desc;
1027 	uintptr_t p;
1028 
1029 	p = (uintptr_t)(note + 1);
1030 	p += roundup2(note->n_namesz, sizeof(Elf32_Addr));
1031 
1032 	desc = (const Elf32_Word *)p;
1033 	if (desc[0] != GNULINUX_ABI_DESC)
1034 		return (FALSE);
1035 
1036 	/*
1037 	 * For linux we encode osrel as follows (see linux_mib.c):
1038 	 * VVVMMMIII (version, major, minor), see linux_mib.c.
1039 	 */
1040 	*osrel = desc[1] * 1000000 + desc[2] * 1000 + desc[3];
1041 
1042 	return (TRUE);
1043 }
1044 
1045 static Elf_Brandnote linux_brandnote = {
1046 	.hdr.n_namesz	= sizeof(GNU_ABI_VENDOR),
1047 	.hdr.n_descsz	= 16,	/* XXX at least 16 */
1048 	.hdr.n_type	= 1,
1049 	.vendor		= GNU_ABI_VENDOR,
1050 	.flags		= BN_TRANSLATE_OSREL,
1051 	.trans_osrel	= linux_trans_osrel
1052 };
1053 
1054 static Elf32_Brandinfo linux_brand = {
1055 	.brand		= ELFOSABI_LINUX,
1056 	.machine	= EM_386,
1057 	.compat_3_brand	= "Linux",
1058 	.emul_path	= "/compat/linux",
1059 	.interp_path	= "/lib/ld-linux.so.1",
1060 	.sysvec		= &elf_linux_sysvec,
1061 	.interp_newpath	= NULL,
1062 	.brand_note	= &linux_brandnote,
1063 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1064 };
1065 
1066 static Elf32_Brandinfo linux_glibc2brand = {
1067 	.brand		= ELFOSABI_LINUX,
1068 	.machine	= EM_386,
1069 	.compat_3_brand	= "Linux",
1070 	.emul_path	= "/compat/linux",
1071 	.interp_path	= "/lib/ld-linux.so.2",
1072 	.sysvec		= &elf_linux_sysvec,
1073 	.interp_newpath	= NULL,
1074 	.brand_note	= &linux_brandnote,
1075 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1076 };
1077 
1078 Elf32_Brandinfo *linux_brandlist[] = {
1079 	&linux_brand,
1080 	&linux_glibc2brand,
1081 	NULL
1082 };
1083 
1084 static int
1085 linux_elf_modevent(module_t mod, int type, void *data)
1086 {
1087 	Elf32_Brandinfo **brandinfo;
1088 	int error;
1089 	struct linux_ioctl_handler **lihp;
1090 	struct linux_device_handler **ldhp;
1091 
1092 	error = 0;
1093 
1094 	switch(type) {
1095 	case MOD_LOAD:
1096 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1097 		     ++brandinfo)
1098 			if (elf32_insert_brand_entry(*brandinfo) < 0)
1099 				error = EINVAL;
1100 		if (error == 0) {
1101 			SET_FOREACH(lihp, linux_ioctl_handler_set)
1102 				linux_ioctl_register_handler(*lihp);
1103 			SET_FOREACH(ldhp, linux_device_handler_set)
1104 				linux_device_register_handler(*ldhp);
1105 			LIST_INIT(&futex_list);
1106 			mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF);
1107 			linux_exit_tag = EVENTHANDLER_REGISTER(process_exit, linux_proc_exit,
1108 			      NULL, 1000);
1109 			linux_exec_tag = EVENTHANDLER_REGISTER(process_exec, linux_proc_exec,
1110 			      NULL, 1000);
1111 			linux_thread_dtor_tag = EVENTHANDLER_REGISTER(thread_dtor,
1112 			    linux_thread_dtor, NULL, EVENTHANDLER_PRI_ANY);
1113 			linux_get_machine(&linux_platform);
1114 			linux_szplatform = roundup(strlen(linux_platform) + 1,
1115 			    sizeof(char *));
1116 			linux_osd_jail_register();
1117 			stclohz = (stathz ? stathz : hz);
1118 			if (bootverbose)
1119 				printf("Linux ELF exec handler installed\n");
1120 		} else
1121 			printf("cannot insert Linux ELF brand handler\n");
1122 		break;
1123 	case MOD_UNLOAD:
1124 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1125 		     ++brandinfo)
1126 			if (elf32_brand_inuse(*brandinfo))
1127 				error = EBUSY;
1128 		if (error == 0) {
1129 			for (brandinfo = &linux_brandlist[0];
1130 			     *brandinfo != NULL; ++brandinfo)
1131 				if (elf32_remove_brand_entry(*brandinfo) < 0)
1132 					error = EINVAL;
1133 		}
1134 		if (error == 0) {
1135 			SET_FOREACH(lihp, linux_ioctl_handler_set)
1136 				linux_ioctl_unregister_handler(*lihp);
1137 			SET_FOREACH(ldhp, linux_device_handler_set)
1138 				linux_device_unregister_handler(*ldhp);
1139 			mtx_destroy(&futex_mtx);
1140 			EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag);
1141 			EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag);
1142 			EVENTHANDLER_DEREGISTER(thread_dtor, linux_thread_dtor_tag);
1143 			linux_osd_jail_deregister();
1144 			if (bootverbose)
1145 				printf("Linux ELF exec handler removed\n");
1146 		} else
1147 			printf("Could not deinstall ELF interpreter entry\n");
1148 		break;
1149 	default:
1150 		return (EOPNOTSUPP);
1151 	}
1152 	return (error);
1153 }
1154 
1155 static moduledata_t linux_elf_mod = {
1156 	"linuxelf",
1157 	linux_elf_modevent,
1158 	0
1159 };
1160 
1161 DECLARE_MODULE_TIED(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
1162