xref: /freebsd/sys/i386/linux/linux_sysvec.c (revision d7d3cbdadf840c3b43a54df7075d870c1e933b06)
1 /*-
2  * Copyright (c) 1994-1996 S�ren Schmidt
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software without specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/exec.h>
35 #include <sys/fcntl.h>
36 #include <sys/imgact.h>
37 #include <sys/imgact_aout.h>
38 #include <sys/imgact_elf.h>
39 #include <sys/kernel.h>
40 #include <sys/lock.h>
41 #include <sys/malloc.h>
42 #include <sys/module.h>
43 #include <sys/mutex.h>
44 #include <sys/proc.h>
45 #include <sys/signalvar.h>
46 #include <sys/syscallsubr.h>
47 #include <sys/sysent.h>
48 #include <sys/sysproto.h>
49 #include <sys/vnode.h>
50 #include <sys/eventhandler.h>
51 
52 #include <vm/vm.h>
53 #include <vm/pmap.h>
54 #include <vm/vm_extern.h>
55 #include <vm/vm_map.h>
56 #include <vm/vm_object.h>
57 #include <vm/vm_page.h>
58 #include <vm/vm_param.h>
59 
60 #include <machine/cpu.h>
61 #include <machine/cputypes.h>
62 #include <machine/md_var.h>
63 #include <machine/pcb.h>
64 
65 #include <i386/linux/linux.h>
66 #include <i386/linux/linux_proto.h>
67 #include <compat/linux/linux_emul.h>
68 #include <compat/linux/linux_futex.h>
69 #include <compat/linux/linux_mib.h>
70 #include <compat/linux/linux_misc.h>
71 #include <compat/linux/linux_signal.h>
72 #include <compat/linux/linux_util.h>
73 
74 MODULE_VERSION(linux, 1);
75 
76 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
77 
78 #if BYTE_ORDER == LITTLE_ENDIAN
79 #define SHELLMAGIC      0x2123 /* #! */
80 #else
81 #define SHELLMAGIC      0x2321
82 #endif
83 
84 /*
85  * Allow the sendsig functions to use the ldebug() facility
86  * even though they are not syscalls themselves. Map them
87  * to syscall 0. This is slightly less bogus than using
88  * ldebug(sigreturn).
89  */
90 #define	LINUX_SYS_linux_rt_sendsig	0
91 #define	LINUX_SYS_linux_sendsig		0
92 
93 extern char linux_sigcode[];
94 extern int linux_szsigcode;
95 
96 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
97 
98 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
99 SET_DECLARE(linux_device_handler_set, struct linux_device_handler);
100 
101 static int	linux_fixup(register_t **stack_base,
102 		    struct image_params *iparams);
103 static int	elf_linux_fixup(register_t **stack_base,
104 		    struct image_params *iparams);
105 static void     linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
106 static void	exec_linux_setregs(struct thread *td,
107 		    struct image_params *imgp, u_long stack);
108 static register_t *linux_copyout_strings(struct image_params *imgp);
109 static boolean_t linux_trans_osrel(const Elf_Note *note, int32_t *osrel);
110 
111 static int linux_szplatform;
112 const char *linux_platform;
113 
114 static eventhandler_tag linux_exit_tag;
115 static eventhandler_tag linux_exec_tag;
116 
117 /*
118  * Linux syscalls return negative errno's, we do positive and map them
119  * Reference:
120  *   FreeBSD: src/sys/sys/errno.h
121  *   Linux:   linux-2.6.17.8/include/asm-generic/errno-base.h
122  *            linux-2.6.17.8/include/asm-generic/errno.h
123  */
124 static int bsd_to_linux_errno[ELAST + 1] = {
125 	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
126 	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
127 	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
128 	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
129 	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
130 	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
131 	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
132 	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
133 	  -6,  -6, -43, -42, -75,-125, -84, -95, -16, -74,
134 	 -72, -67, -71
135 };
136 
137 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
138 	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
139 	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
140 	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
141 	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
142 	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
143 	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
144 	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
145 	0, LINUX_SIGUSR1, LINUX_SIGUSR2
146 };
147 
148 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
149 	SIGHUP, SIGINT, SIGQUIT, SIGILL,
150 	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
151 	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
152 	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
153 	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
154 	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
155 	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
156 	SIGIO, SIGURG, SIGSYS
157 };
158 
159 #define LINUX_T_UNKNOWN  255
160 static int _bsd_to_linux_trapcode[] = {
161 	LINUX_T_UNKNOWN,	/* 0 */
162 	6,			/* 1  T_PRIVINFLT */
163 	LINUX_T_UNKNOWN,	/* 2 */
164 	3,			/* 3  T_BPTFLT */
165 	LINUX_T_UNKNOWN,	/* 4 */
166 	LINUX_T_UNKNOWN,	/* 5 */
167 	16,			/* 6  T_ARITHTRAP */
168 	254,			/* 7  T_ASTFLT */
169 	LINUX_T_UNKNOWN,	/* 8 */
170 	13,			/* 9  T_PROTFLT */
171 	1,			/* 10 T_TRCTRAP */
172 	LINUX_T_UNKNOWN,	/* 11 */
173 	14,			/* 12 T_PAGEFLT */
174 	LINUX_T_UNKNOWN,	/* 13 */
175 	17,			/* 14 T_ALIGNFLT */
176 	LINUX_T_UNKNOWN,	/* 15 */
177 	LINUX_T_UNKNOWN,	/* 16 */
178 	LINUX_T_UNKNOWN,	/* 17 */
179 	0,			/* 18 T_DIVIDE */
180 	2,			/* 19 T_NMI */
181 	4,			/* 20 T_OFLOW */
182 	5,			/* 21 T_BOUND */
183 	7,			/* 22 T_DNA */
184 	8,			/* 23 T_DOUBLEFLT */
185 	9,			/* 24 T_FPOPFLT */
186 	10,			/* 25 T_TSSFLT */
187 	11,			/* 26 T_SEGNPFLT */
188 	12,			/* 27 T_STKFLT */
189 	18,			/* 28 T_MCHK */
190 	19,			/* 29 T_XMMFLT */
191 	15			/* 30 T_RESERVED */
192 };
193 #define bsd_to_linux_trapcode(code) \
194     ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
195      _bsd_to_linux_trapcode[(code)]: \
196      LINUX_T_UNKNOWN)
197 
198 /*
199  * If FreeBSD & Linux have a difference of opinion about what a trap
200  * means, deal with it here.
201  *
202  * MPSAFE
203  */
204 static int
205 translate_traps(int signal, int trap_code)
206 {
207 	if (signal != SIGBUS)
208 		return signal;
209 	switch (trap_code) {
210 	case T_PROTFLT:
211 	case T_TSSFLT:
212 	case T_DOUBLEFLT:
213 	case T_PAGEFLT:
214 		return SIGSEGV;
215 	default:
216 		return signal;
217 	}
218 }
219 
220 static int
221 linux_fixup(register_t **stack_base, struct image_params *imgp)
222 {
223 	register_t *argv, *envp;
224 
225 	argv = *stack_base;
226 	envp = *stack_base + (imgp->args->argc + 1);
227 	(*stack_base)--;
228 	**stack_base = (intptr_t)(void *)envp;
229 	(*stack_base)--;
230 	**stack_base = (intptr_t)(void *)argv;
231 	(*stack_base)--;
232 	**stack_base = imgp->args->argc;
233 	return (0);
234 }
235 
236 static int
237 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
238 {
239 	struct proc *p;
240 	Elf32_Auxargs *args;
241 	Elf32_Addr *uplatform;
242 	struct ps_strings *arginfo;
243 	register_t *pos;
244 
245 	KASSERT(curthread->td_proc == imgp->proc,
246 	    ("unsafe elf_linux_fixup(), should be curproc"));
247 
248 	p = imgp->proc;
249 	arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
250 	uplatform = (Elf32_Addr *)((caddr_t)arginfo - linux_szsigcode -
251 	    linux_szplatform);
252 	args = (Elf32_Auxargs *)imgp->auxargs;
253 	pos = *stack_base + (imgp->args->argc + imgp->args->envc + 2);
254 
255 	AUXARGS_ENTRY(pos, LINUX_AT_HWCAP, cpu_feature);
256 
257 	/*
258 	 * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0,
259 	 * as it has appeared in the 2.4.0-rc7 first time.
260 	 * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK),
261 	 * glibc falls back to the hard-coded CLK_TCK value when aux entry
262 	 * is not present.
263 	 * Also see linux_times() implementation.
264 	 */
265 	if (linux_kernver(curthread) >= LINUX_KERNVER_2004000)
266 		AUXARGS_ENTRY(pos, LINUX_AT_CLKTCK, stclohz);
267 	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
268 	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
269 	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
270 	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
271 	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
272 	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
273 	AUXARGS_ENTRY(pos, AT_BASE, args->base);
274 	AUXARGS_ENTRY(pos, LINUX_AT_SECURE, 0);
275 	AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
276 	AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
277 	AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
278 	AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
279 	AUXARGS_ENTRY(pos, LINUX_AT_PLATFORM, PTROUT(uplatform));
280 	if (args->execfd != -1)
281 		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
282 	AUXARGS_ENTRY(pos, AT_NULL, 0);
283 
284 	free(imgp->auxargs, M_TEMP);
285 	imgp->auxargs = NULL;
286 
287 	(*stack_base)--;
288 	**stack_base = (register_t)imgp->args->argc;
289 	return (0);
290 }
291 
292 /*
293  * Copied from kern/kern_exec.c
294  */
295 static register_t *
296 linux_copyout_strings(struct image_params *imgp)
297 {
298 	int argc, envc;
299 	char **vectp;
300 	char *stringp, *destp;
301 	register_t *stack_base;
302 	struct ps_strings *arginfo;
303 	struct proc *p;
304 
305 	/*
306 	 * Calculate string base and vector table pointers.
307 	 * Also deal with signal trampoline code for this exec type.
308 	 */
309 	p = imgp->proc;
310 	arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
311 	destp = (caddr_t)arginfo - linux_szsigcode - SPARE_USRSPACE -
312 	    linux_szplatform - roundup((ARG_MAX - imgp->args->stringspace),
313 	    sizeof(char *));
314 
315 	/*
316 	 * install sigcode
317 	 */
318 	copyout(p->p_sysent->sv_sigcode, ((caddr_t)arginfo -
319 	    linux_szsigcode), linux_szsigcode);
320 
321 	/*
322 	 * install LINUX_PLATFORM
323 	 */
324 	copyout(linux_platform, ((caddr_t)arginfo - linux_szsigcode -
325 	    linux_szplatform), linux_szplatform);
326 
327 	/*
328 	 * If we have a valid auxargs ptr, prepare some room
329 	 * on the stack.
330 	 */
331 	if (imgp->auxargs) {
332 		/*
333 		 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
334 		 * lower compatibility.
335 		 */
336 		imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size :
337 		    (LINUX_AT_COUNT * 2);
338 		/*
339 		 * The '+ 2' is for the null pointers at the end of each of
340 		 * the arg and env vector sets,and imgp->auxarg_size is room
341 		 * for argument of Runtime loader.
342 		 */
343 		vectp = (char **)(destp - (imgp->args->argc +
344 		    imgp->args->envc + 2 + imgp->auxarg_size) * sizeof(char *));
345 	} else {
346 		/*
347 		 * The '+ 2' is for the null pointers at the end of each of
348 		 * the arg and env vector sets
349 		 */
350 		vectp = (char **)(destp - (imgp->args->argc + imgp->args->envc + 2) *
351 		    sizeof(char *));
352 	}
353 
354 	/*
355 	 * vectp also becomes our initial stack base
356 	 */
357 	stack_base = (register_t *)vectp;
358 
359 	stringp = imgp->args->begin_argv;
360 	argc = imgp->args->argc;
361 	envc = imgp->args->envc;
362 
363 	/*
364 	 * Copy out strings - arguments and environment.
365 	 */
366 	copyout(stringp, destp, ARG_MAX - imgp->args->stringspace);
367 
368 	/*
369 	 * Fill in "ps_strings" struct for ps, w, etc.
370 	 */
371 	suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp);
372 	suword(&arginfo->ps_nargvstr, argc);
373 
374 	/*
375 	 * Fill in argument portion of vector table.
376 	 */
377 	for (; argc > 0; --argc) {
378 		suword(vectp++, (long)(intptr_t)destp);
379 		while (*stringp++ != 0)
380 			destp++;
381 		destp++;
382 	}
383 
384 	/* a null vector table pointer separates the argp's from the envp's */
385 	suword(vectp++, 0);
386 
387 	suword(&arginfo->ps_envstr, (long)(intptr_t)vectp);
388 	suword(&arginfo->ps_nenvstr, envc);
389 
390 	/*
391 	 * Fill in environment portion of vector table.
392 	 */
393 	for (; envc > 0; --envc) {
394 		suword(vectp++, (long)(intptr_t)destp);
395 		while (*stringp++ != 0)
396 			destp++;
397 		destp++;
398 	}
399 
400 	/* end of vector table is a null pointer */
401 	suword(vectp, 0);
402 
403 	return (stack_base);
404 }
405 
406 
407 
408 extern int _ucodesel, _udatasel;
409 extern unsigned long linux_sznonrtsigcode;
410 
411 static void
412 linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
413 {
414 	struct thread *td = curthread;
415 	struct proc *p = td->td_proc;
416 	struct sigacts *psp;
417 	struct trapframe *regs;
418 	struct l_rt_sigframe *fp, frame;
419 	int sig, code;
420 	int oonstack;
421 
422 	sig = ksi->ksi_signo;
423 	code = ksi->ksi_code;
424 	PROC_LOCK_ASSERT(p, MA_OWNED);
425 	psp = p->p_sigacts;
426 	mtx_assert(&psp->ps_mtx, MA_OWNED);
427 	regs = td->td_frame;
428 	oonstack = sigonstack(regs->tf_esp);
429 
430 #ifdef DEBUG
431 	if (ldebug(rt_sendsig))
432 		printf(ARGS(rt_sendsig, "%p, %d, %p, %u"),
433 		    catcher, sig, (void*)mask, code);
434 #endif
435 	/*
436 	 * Allocate space for the signal handler context.
437 	 */
438 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
439 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
440 		fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
441 		    td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
442 	} else
443 		fp = (struct l_rt_sigframe *)regs->tf_esp - 1;
444 	mtx_unlock(&psp->ps_mtx);
445 
446 	/*
447 	 * Build the argument list for the signal handler.
448 	 */
449 	if (p->p_sysent->sv_sigtbl)
450 		if (sig <= p->p_sysent->sv_sigsize)
451 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
452 
453 	bzero(&frame, sizeof(frame));
454 
455 	frame.sf_handler = catcher;
456 	frame.sf_sig = sig;
457 	frame.sf_siginfo = &fp->sf_si;
458 	frame.sf_ucontext = &fp->sf_sc;
459 
460 	/* Fill in POSIX parts */
461 	ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig);
462 
463 	/*
464 	 * Build the signal context to be used by sigreturn.
465 	 */
466 	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
467 	frame.sf_sc.uc_link = NULL;		/* XXX ??? */
468 
469 	frame.sf_sc.uc_stack.ss_sp = td->td_sigstk.ss_sp;
470 	frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
471 	frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
472 	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
473 	PROC_UNLOCK(p);
474 
475 	bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
476 
477 	frame.sf_sc.uc_mcontext.sc_mask   = frame.sf_sc.uc_sigmask.__bits[0];
478 	frame.sf_sc.uc_mcontext.sc_gs     = rgs();
479 	frame.sf_sc.uc_mcontext.sc_fs     = regs->tf_fs;
480 	frame.sf_sc.uc_mcontext.sc_es     = regs->tf_es;
481 	frame.sf_sc.uc_mcontext.sc_ds     = regs->tf_ds;
482 	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_edi;
483 	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_esi;
484 	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_ebp;
485 	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_ebx;
486 	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_edx;
487 	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_ecx;
488 	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_eax;
489 	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_eip;
490 	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
491 	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
492 	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
493 	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
494 	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
495 	frame.sf_sc.uc_mcontext.sc_cr2    = (register_t)ksi->ksi_addr;
496 	frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
497 
498 #ifdef DEBUG
499 	if (ldebug(rt_sendsig))
500 		printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
501 		    frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
502 		    td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
503 #endif
504 
505 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
506 		/*
507 		 * Process has trashed its stack; give it an illegal
508 		 * instruction to halt it in its tracks.
509 		 */
510 #ifdef DEBUG
511 		if (ldebug(rt_sendsig))
512 			printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
513 			    fp, oonstack);
514 #endif
515 		PROC_LOCK(p);
516 		sigexit(td, SIGILL);
517 	}
518 
519 	/*
520 	 * Build context to run handler in.
521 	 */
522 	regs->tf_esp = (int)fp;
523 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
524 	    linux_sznonrtsigcode;
525 	regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
526 	regs->tf_cs = _ucodesel;
527 	regs->tf_ds = _udatasel;
528 	regs->tf_es = _udatasel;
529 	regs->tf_fs = _udatasel;
530 	regs->tf_ss = _udatasel;
531 	PROC_LOCK(p);
532 	mtx_lock(&psp->ps_mtx);
533 }
534 
535 
536 /*
537  * Send an interrupt to process.
538  *
539  * Stack is set up to allow sigcode stored
540  * in u. to call routine, followed by kcall
541  * to sigreturn routine below.  After sigreturn
542  * resets the signal mask, the stack, and the
543  * frame pointer, it returns to the user
544  * specified pc, psl.
545  */
546 static void
547 linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
548 {
549 	struct thread *td = curthread;
550 	struct proc *p = td->td_proc;
551 	struct sigacts *psp;
552 	struct trapframe *regs;
553 	struct l_sigframe *fp, frame;
554 	l_sigset_t lmask;
555 	int sig, code;
556 	int oonstack, i;
557 
558 	PROC_LOCK_ASSERT(p, MA_OWNED);
559 	psp = p->p_sigacts;
560 	sig = ksi->ksi_signo;
561 	code = ksi->ksi_code;
562 	mtx_assert(&psp->ps_mtx, MA_OWNED);
563 	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
564 		/* Signal handler installed with SA_SIGINFO. */
565 		linux_rt_sendsig(catcher, ksi, mask);
566 		return;
567 	}
568 	regs = td->td_frame;
569 	oonstack = sigonstack(regs->tf_esp);
570 
571 #ifdef DEBUG
572 	if (ldebug(sendsig))
573 		printf(ARGS(sendsig, "%p, %d, %p, %u"),
574 		    catcher, sig, (void*)mask, code);
575 #endif
576 
577 	/*
578 	 * Allocate space for the signal handler context.
579 	 */
580 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
581 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
582 		fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
583 		    td->td_sigstk.ss_size - sizeof(struct l_sigframe));
584 	} else
585 		fp = (struct l_sigframe *)regs->tf_esp - 1;
586 	mtx_unlock(&psp->ps_mtx);
587 	PROC_UNLOCK(p);
588 
589 	/*
590 	 * Build the argument list for the signal handler.
591 	 */
592 	if (p->p_sysent->sv_sigtbl)
593 		if (sig <= p->p_sysent->sv_sigsize)
594 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
595 
596 	bzero(&frame, sizeof(frame));
597 
598 	frame.sf_handler = catcher;
599 	frame.sf_sig = sig;
600 
601 	bsd_to_linux_sigset(mask, &lmask);
602 
603 	/*
604 	 * Build the signal context to be used by sigreturn.
605 	 */
606 	frame.sf_sc.sc_mask   = lmask.__bits[0];
607 	frame.sf_sc.sc_gs     = rgs();
608 	frame.sf_sc.sc_fs     = regs->tf_fs;
609 	frame.sf_sc.sc_es     = regs->tf_es;
610 	frame.sf_sc.sc_ds     = regs->tf_ds;
611 	frame.sf_sc.sc_edi    = regs->tf_edi;
612 	frame.sf_sc.sc_esi    = regs->tf_esi;
613 	frame.sf_sc.sc_ebp    = regs->tf_ebp;
614 	frame.sf_sc.sc_ebx    = regs->tf_ebx;
615 	frame.sf_sc.sc_edx    = regs->tf_edx;
616 	frame.sf_sc.sc_ecx    = regs->tf_ecx;
617 	frame.sf_sc.sc_eax    = regs->tf_eax;
618 	frame.sf_sc.sc_eip    = regs->tf_eip;
619 	frame.sf_sc.sc_cs     = regs->tf_cs;
620 	frame.sf_sc.sc_eflags = regs->tf_eflags;
621 	frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
622 	frame.sf_sc.sc_ss     = regs->tf_ss;
623 	frame.sf_sc.sc_err    = regs->tf_err;
624 	frame.sf_sc.sc_cr2    = (register_t)ksi->ksi_addr;
625 	frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(ksi->ksi_trapno);
626 
627 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
628 		frame.sf_extramask[i] = lmask.__bits[i+1];
629 
630 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
631 		/*
632 		 * Process has trashed its stack; give it an illegal
633 		 * instruction to halt it in its tracks.
634 		 */
635 		PROC_LOCK(p);
636 		sigexit(td, SIGILL);
637 	}
638 
639 	/*
640 	 * Build context to run handler in.
641 	 */
642 	regs->tf_esp = (int)fp;
643 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
644 	regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
645 	regs->tf_cs = _ucodesel;
646 	regs->tf_ds = _udatasel;
647 	regs->tf_es = _udatasel;
648 	regs->tf_fs = _udatasel;
649 	regs->tf_ss = _udatasel;
650 	PROC_LOCK(p);
651 	mtx_lock(&psp->ps_mtx);
652 }
653 
654 /*
655  * System call to cleanup state after a signal
656  * has been taken.  Reset signal mask and
657  * stack state from context left by sendsig (above).
658  * Return to previous pc and psl as specified by
659  * context left by sendsig. Check carefully to
660  * make sure that the user has not modified the
661  * psl to gain improper privileges or to cause
662  * a machine fault.
663  */
664 int
665 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
666 {
667 	struct l_sigframe frame;
668 	struct trapframe *regs;
669 	l_sigset_t lmask;
670 	sigset_t bmask;
671 	int eflags, i;
672 	ksiginfo_t ksi;
673 
674 	regs = td->td_frame;
675 
676 #ifdef DEBUG
677 	if (ldebug(sigreturn))
678 		printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
679 #endif
680 	/*
681 	 * The trampoline code hands us the sigframe.
682 	 * It is unsafe to keep track of it ourselves, in the event that a
683 	 * program jumps out of a signal handler.
684 	 */
685 	if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
686 		return (EFAULT);
687 
688 	/*
689 	 * Check for security violations.
690 	 */
691 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
692 	eflags = frame.sf_sc.sc_eflags;
693 	/*
694 	 * XXX do allow users to change the privileged flag PSL_RF.  The
695 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
696 	 * sometimes set it there too.  tf_eflags is kept in the signal
697 	 * context during signal handling and there is no other place
698 	 * to remember it, so the PSL_RF bit may be corrupted by the
699 	 * signal handler without us knowing.  Corruption of the PSL_RF
700 	 * bit at worst causes one more or one less debugger trap, so
701 	 * allowing it is fairly harmless.
702 	 */
703 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF))
704 		return(EINVAL);
705 
706 	/*
707 	 * Don't allow users to load a valid privileged %cs.  Let the
708 	 * hardware check for invalid selectors, excess privilege in
709 	 * other selectors, invalid %eip's and invalid %esp's.
710 	 */
711 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
712 	if (!CS_SECURE(frame.sf_sc.sc_cs)) {
713 		ksiginfo_init_trap(&ksi);
714 		ksi.ksi_signo = SIGBUS;
715 		ksi.ksi_code = BUS_OBJERR;
716 		ksi.ksi_trapno = T_PROTFLT;
717 		ksi.ksi_addr = (void *)regs->tf_eip;
718 		trapsignal(td, &ksi);
719 		return(EINVAL);
720 	}
721 
722 	lmask.__bits[0] = frame.sf_sc.sc_mask;
723 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
724 		lmask.__bits[i+1] = frame.sf_extramask[i];
725 	linux_to_bsd_sigset(&lmask, &bmask);
726 	kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
727 
728 	/*
729 	 * Restore signal context.
730 	 */
731 	/* %gs was restored by the trampoline. */
732 	regs->tf_fs     = frame.sf_sc.sc_fs;
733 	regs->tf_es     = frame.sf_sc.sc_es;
734 	regs->tf_ds     = frame.sf_sc.sc_ds;
735 	regs->tf_edi    = frame.sf_sc.sc_edi;
736 	regs->tf_esi    = frame.sf_sc.sc_esi;
737 	regs->tf_ebp    = frame.sf_sc.sc_ebp;
738 	regs->tf_ebx    = frame.sf_sc.sc_ebx;
739 	regs->tf_edx    = frame.sf_sc.sc_edx;
740 	regs->tf_ecx    = frame.sf_sc.sc_ecx;
741 	regs->tf_eax    = frame.sf_sc.sc_eax;
742 	regs->tf_eip    = frame.sf_sc.sc_eip;
743 	regs->tf_cs     = frame.sf_sc.sc_cs;
744 	regs->tf_eflags = eflags;
745 	regs->tf_esp    = frame.sf_sc.sc_esp_at_signal;
746 	regs->tf_ss     = frame.sf_sc.sc_ss;
747 
748 	return (EJUSTRETURN);
749 }
750 
751 /*
752  * System call to cleanup state after a signal
753  * has been taken.  Reset signal mask and
754  * stack state from context left by rt_sendsig (above).
755  * Return to previous pc and psl as specified by
756  * context left by sendsig. Check carefully to
757  * make sure that the user has not modified the
758  * psl to gain improper privileges or to cause
759  * a machine fault.
760  */
761 int
762 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
763 {
764 	struct l_ucontext uc;
765 	struct l_sigcontext *context;
766 	sigset_t bmask;
767 	l_stack_t *lss;
768 	stack_t ss;
769 	struct trapframe *regs;
770 	int eflags;
771 	ksiginfo_t ksi;
772 
773 	regs = td->td_frame;
774 
775 #ifdef DEBUG
776 	if (ldebug(rt_sigreturn))
777 		printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
778 #endif
779 	/*
780 	 * The trampoline code hands us the ucontext.
781 	 * It is unsafe to keep track of it ourselves, in the event that a
782 	 * program jumps out of a signal handler.
783 	 */
784 	if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
785 		return (EFAULT);
786 
787 	context = &uc.uc_mcontext;
788 
789 	/*
790 	 * Check for security violations.
791 	 */
792 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
793 	eflags = context->sc_eflags;
794 	/*
795 	 * XXX do allow users to change the privileged flag PSL_RF.  The
796 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
797 	 * sometimes set it there too.  tf_eflags is kept in the signal
798 	 * context during signal handling and there is no other place
799 	 * to remember it, so the PSL_RF bit may be corrupted by the
800 	 * signal handler without us knowing.  Corruption of the PSL_RF
801 	 * bit at worst causes one more or one less debugger trap, so
802 	 * allowing it is fairly harmless.
803 	 */
804 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF))
805 		return(EINVAL);
806 
807 	/*
808 	 * Don't allow users to load a valid privileged %cs.  Let the
809 	 * hardware check for invalid selectors, excess privilege in
810 	 * other selectors, invalid %eip's and invalid %esp's.
811 	 */
812 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
813 	if (!CS_SECURE(context->sc_cs)) {
814 		ksiginfo_init_trap(&ksi);
815 		ksi.ksi_signo = SIGBUS;
816 		ksi.ksi_code = BUS_OBJERR;
817 		ksi.ksi_trapno = T_PROTFLT;
818 		ksi.ksi_addr = (void *)regs->tf_eip;
819 		trapsignal(td, &ksi);
820 		return(EINVAL);
821 	}
822 
823 	linux_to_bsd_sigset(&uc.uc_sigmask, &bmask);
824 	kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
825 
826 	/*
827 	 * Restore signal context
828 	 */
829 	/* %gs was restored by the trampoline. */
830 	regs->tf_fs     = context->sc_fs;
831 	regs->tf_es     = context->sc_es;
832 	regs->tf_ds     = context->sc_ds;
833 	regs->tf_edi    = context->sc_edi;
834 	regs->tf_esi    = context->sc_esi;
835 	regs->tf_ebp    = context->sc_ebp;
836 	regs->tf_ebx    = context->sc_ebx;
837 	regs->tf_edx    = context->sc_edx;
838 	regs->tf_ecx    = context->sc_ecx;
839 	regs->tf_eax    = context->sc_eax;
840 	regs->tf_eip    = context->sc_eip;
841 	regs->tf_cs     = context->sc_cs;
842 	regs->tf_eflags = eflags;
843 	regs->tf_esp    = context->sc_esp_at_signal;
844 	regs->tf_ss     = context->sc_ss;
845 
846 	/*
847 	 * call sigaltstack & ignore results..
848 	 */
849 	lss = &uc.uc_stack;
850 	ss.ss_sp = lss->ss_sp;
851 	ss.ss_size = lss->ss_size;
852 	ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
853 
854 #ifdef DEBUG
855 	if (ldebug(rt_sigreturn))
856 		printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
857 		    ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
858 #endif
859 	(void)kern_sigaltstack(td, &ss, NULL);
860 
861 	return (EJUSTRETURN);
862 }
863 
864 static int
865 linux_fetch_syscall_args(struct thread *td, struct syscall_args *sa)
866 {
867 	struct proc *p;
868 	struct trapframe *frame;
869 
870 	p = td->td_proc;
871 	frame = td->td_frame;
872 
873 	sa->code = frame->tf_eax;
874 	sa->args[0] = frame->tf_ebx;
875 	sa->args[1] = frame->tf_ecx;
876 	sa->args[2] = frame->tf_edx;
877 	sa->args[3] = frame->tf_esi;
878 	sa->args[4] = frame->tf_edi;
879 	sa->args[5] = frame->tf_ebp;	/* Unconfirmed */
880 
881 	if (sa->code >= p->p_sysent->sv_size)
882 		sa->callp = &p->p_sysent->sv_table[0];
883  	else
884  		sa->callp = &p->p_sysent->sv_table[sa->code];
885 	sa->narg = sa->callp->sy_narg;
886 
887 	td->td_retval[0] = 0;
888 	td->td_retval[1] = frame->tf_edx;
889 
890 	return (0);
891 }
892 
893 /*
894  * If a linux binary is exec'ing something, try this image activator
895  * first.  We override standard shell script execution in order to
896  * be able to modify the interpreter path.  We only do this if a linux
897  * binary is doing the exec, so we do not create an EXEC module for it.
898  */
899 static int	exec_linux_imgact_try(struct image_params *iparams);
900 
901 static int
902 exec_linux_imgact_try(struct image_params *imgp)
903 {
904     const char *head = (const char *)imgp->image_header;
905     char *rpath;
906     int error = -1;
907 
908     /*
909      * The interpreter for shell scripts run from a linux binary needs
910      * to be located in /compat/linux if possible in order to recursively
911      * maintain linux path emulation.
912      */
913     if (((const short *)head)[0] == SHELLMAGIC) {
914 	    /*
915 	     * Run our normal shell image activator.  If it succeeds attempt
916 	     * to use the alternate path for the interpreter.  If an alternate
917 	     * path is found, use our stringspace to store it.
918 	     */
919 	    if ((error = exec_shell_imgact(imgp)) == 0) {
920 		    linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
921 			imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0, AT_FDCWD);
922 		    if (rpath != NULL)
923 			    imgp->args->fname_buf =
924 				imgp->interpreter_name = rpath;
925 	    }
926     }
927     return (error);
928 }
929 
930 /*
931  * exec_setregs may initialize some registers differently than Linux
932  * does, thus potentially confusing Linux binaries. If necessary, we
933  * override the exec_setregs default(s) here.
934  */
935 static void
936 exec_linux_setregs(struct thread *td, struct image_params *imgp, u_long stack)
937 {
938 	struct pcb *pcb = td->td_pcb;
939 
940 	exec_setregs(td, imgp, stack);
941 
942 	/* Linux sets %gs to 0, we default to _udatasel */
943 	pcb->pcb_gs = 0;
944 	load_gs(0);
945 
946 	pcb->pcb_initial_npxcw = __LINUX_NPXCW__;
947 }
948 
949 static void
950 linux_get_machine(const char **dst)
951 {
952 
953 	switch (cpu_class) {
954 	case CPUCLASS_686:
955 		*dst = "i686";
956 		break;
957 	case CPUCLASS_586:
958 		*dst = "i586";
959 		break;
960 	case CPUCLASS_486:
961 		*dst = "i486";
962 		break;
963 	default:
964 		*dst = "i386";
965 	}
966 }
967 
968 struct sysentvec linux_sysvec = {
969 	.sv_size	= LINUX_SYS_MAXSYSCALL,
970 	.sv_table	= linux_sysent,
971 	.sv_mask	= 0,
972 	.sv_sigsize	= LINUX_SIGTBLSZ,
973 	.sv_sigtbl	= bsd_to_linux_signal,
974 	.sv_errsize	= ELAST + 1,
975 	.sv_errtbl	= bsd_to_linux_errno,
976 	.sv_transtrap	= translate_traps,
977 	.sv_fixup	= linux_fixup,
978 	.sv_sendsig	= linux_sendsig,
979 	.sv_sigcode	= linux_sigcode,
980 	.sv_szsigcode	= &linux_szsigcode,
981 	.sv_prepsyscall	= NULL,
982 	.sv_name	= "Linux a.out",
983 	.sv_coredump	= NULL,
984 	.sv_imgact_try	= exec_linux_imgact_try,
985 	.sv_minsigstksz	= LINUX_MINSIGSTKSZ,
986 	.sv_pagesize	= PAGE_SIZE,
987 	.sv_minuser	= VM_MIN_ADDRESS,
988 	.sv_maxuser	= VM_MAXUSER_ADDRESS,
989 	.sv_usrstack	= USRSTACK,
990 	.sv_psstrings	= PS_STRINGS,
991 	.sv_stackprot	= VM_PROT_ALL,
992 	.sv_copyout_strings = exec_copyout_strings,
993 	.sv_setregs	= exec_linux_setregs,
994 	.sv_fixlimit	= NULL,
995 	.sv_maxssiz	= NULL,
996 	.sv_flags	= SV_ABI_LINUX | SV_AOUT | SV_IA32 | SV_ILP32,
997 	.sv_set_syscall_retval = cpu_set_syscall_retval,
998 	.sv_fetch_syscall_args = linux_fetch_syscall_args,
999 	.sv_syscallnames = NULL,
1000 	.sv_schedtail	= linux_schedtail,
1001 };
1002 
1003 struct sysentvec elf_linux_sysvec = {
1004 	.sv_size	= LINUX_SYS_MAXSYSCALL,
1005 	.sv_table	= linux_sysent,
1006 	.sv_mask	= 0,
1007 	.sv_sigsize	= LINUX_SIGTBLSZ,
1008 	.sv_sigtbl	= bsd_to_linux_signal,
1009 	.sv_errsize	= ELAST + 1,
1010 	.sv_errtbl	= bsd_to_linux_errno,
1011 	.sv_transtrap	= translate_traps,
1012 	.sv_fixup	= elf_linux_fixup,
1013 	.sv_sendsig	= linux_sendsig,
1014 	.sv_sigcode	= linux_sigcode,
1015 	.sv_szsigcode	= &linux_szsigcode,
1016 	.sv_prepsyscall	= NULL,
1017 	.sv_name	= "Linux ELF",
1018 	.sv_coredump	= elf32_coredump,
1019 	.sv_imgact_try	= exec_linux_imgact_try,
1020 	.sv_minsigstksz	= LINUX_MINSIGSTKSZ,
1021 	.sv_pagesize	= PAGE_SIZE,
1022 	.sv_minuser	= VM_MIN_ADDRESS,
1023 	.sv_maxuser	= VM_MAXUSER_ADDRESS,
1024 	.sv_usrstack	= USRSTACK,
1025 	.sv_psstrings	= PS_STRINGS,
1026 	.sv_stackprot	= VM_PROT_ALL,
1027 	.sv_copyout_strings = linux_copyout_strings,
1028 	.sv_setregs	= exec_linux_setregs,
1029 	.sv_fixlimit	= NULL,
1030 	.sv_maxssiz	= NULL,
1031 	.sv_flags	= SV_ABI_LINUX | SV_IA32 | SV_ILP32,
1032 	.sv_set_syscall_retval = cpu_set_syscall_retval,
1033 	.sv_fetch_syscall_args = linux_fetch_syscall_args,
1034 	.sv_syscallnames = NULL,
1035 	.sv_schedtail	= linux_schedtail,
1036 };
1037 
1038 static char GNU_ABI_VENDOR[] = "GNU";
1039 static int GNULINUX_ABI_DESC = 0;
1040 
1041 static boolean_t
1042 linux_trans_osrel(const Elf_Note *note, int32_t *osrel)
1043 {
1044 	const Elf32_Word *desc;
1045 	uintptr_t p;
1046 
1047 	p = (uintptr_t)(note + 1);
1048 	p += roundup2(note->n_namesz, sizeof(Elf32_Addr));
1049 
1050 	desc = (const Elf32_Word *)p;
1051 	if (desc[0] != GNULINUX_ABI_DESC)
1052 		return (FALSE);
1053 
1054 	/*
1055 	 * For linux we encode osrel as follows (see linux_mib.c):
1056 	 * VVVMMMIII (version, major, minor), see linux_mib.c.
1057 	 */
1058 	*osrel = desc[1] * 1000000 + desc[2] * 1000 + desc[3];
1059 
1060 	return (TRUE);
1061 }
1062 
1063 static Elf_Brandnote linux_brandnote = {
1064 	.hdr.n_namesz	= sizeof(GNU_ABI_VENDOR),
1065 	.hdr.n_descsz	= 16,	/* XXX at least 16 */
1066 	.hdr.n_type	= 1,
1067 	.vendor		= GNU_ABI_VENDOR,
1068 	.flags		= BN_TRANSLATE_OSREL,
1069 	.trans_osrel	= linux_trans_osrel
1070 };
1071 
1072 static Elf32_Brandinfo linux_brand = {
1073 	.brand		= ELFOSABI_LINUX,
1074 	.machine	= EM_386,
1075 	.compat_3_brand	= "Linux",
1076 	.emul_path	= "/compat/linux",
1077 	.interp_path	= "/lib/ld-linux.so.1",
1078 	.sysvec		= &elf_linux_sysvec,
1079 	.interp_newpath	= NULL,
1080 	.brand_note	= &linux_brandnote,
1081 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1082 };
1083 
1084 static Elf32_Brandinfo linux_glibc2brand = {
1085 	.brand		= ELFOSABI_LINUX,
1086 	.machine	= EM_386,
1087 	.compat_3_brand	= "Linux",
1088 	.emul_path	= "/compat/linux",
1089 	.interp_path	= "/lib/ld-linux.so.2",
1090 	.sysvec		= &elf_linux_sysvec,
1091 	.interp_newpath	= NULL,
1092 	.brand_note	= &linux_brandnote,
1093 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1094 };
1095 
1096 Elf32_Brandinfo *linux_brandlist[] = {
1097 	&linux_brand,
1098 	&linux_glibc2brand,
1099 	NULL
1100 };
1101 
1102 static int
1103 linux_elf_modevent(module_t mod, int type, void *data)
1104 {
1105 	Elf32_Brandinfo **brandinfo;
1106 	int error;
1107 	struct linux_ioctl_handler **lihp;
1108 	struct linux_device_handler **ldhp;
1109 
1110 	error = 0;
1111 
1112 	switch(type) {
1113 	case MOD_LOAD:
1114 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1115 		     ++brandinfo)
1116 			if (elf32_insert_brand_entry(*brandinfo) < 0)
1117 				error = EINVAL;
1118 		if (error == 0) {
1119 			SET_FOREACH(lihp, linux_ioctl_handler_set)
1120 				linux_ioctl_register_handler(*lihp);
1121 			SET_FOREACH(ldhp, linux_device_handler_set)
1122 				linux_device_register_handler(*ldhp);
1123 			mtx_init(&emul_lock, "emuldata lock", NULL, MTX_DEF);
1124 			sx_init(&emul_shared_lock, "emuldata->shared lock");
1125 			LIST_INIT(&futex_list);
1126 			mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF);
1127 			linux_exit_tag = EVENTHANDLER_REGISTER(process_exit, linux_proc_exit,
1128 			      NULL, 1000);
1129 			linux_exec_tag = EVENTHANDLER_REGISTER(process_exec, linux_proc_exec,
1130 			      NULL, 1000);
1131 			linux_get_machine(&linux_platform);
1132 			linux_szplatform = roundup(strlen(linux_platform) + 1,
1133 			    sizeof(char *));
1134 			linux_osd_jail_register();
1135 			stclohz = (stathz ? stathz : hz);
1136 			if (bootverbose)
1137 				printf("Linux ELF exec handler installed\n");
1138 		} else
1139 			printf("cannot insert Linux ELF brand handler\n");
1140 		break;
1141 	case MOD_UNLOAD:
1142 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1143 		     ++brandinfo)
1144 			if (elf32_brand_inuse(*brandinfo))
1145 				error = EBUSY;
1146 		if (error == 0) {
1147 			for (brandinfo = &linux_brandlist[0];
1148 			     *brandinfo != NULL; ++brandinfo)
1149 				if (elf32_remove_brand_entry(*brandinfo) < 0)
1150 					error = EINVAL;
1151 		}
1152 		if (error == 0) {
1153 			SET_FOREACH(lihp, linux_ioctl_handler_set)
1154 				linux_ioctl_unregister_handler(*lihp);
1155 			SET_FOREACH(ldhp, linux_device_handler_set)
1156 				linux_device_unregister_handler(*ldhp);
1157 			mtx_destroy(&emul_lock);
1158 			sx_destroy(&emul_shared_lock);
1159 			mtx_destroy(&futex_mtx);
1160 			EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag);
1161 			EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag);
1162 			linux_osd_jail_deregister();
1163 			if (bootverbose)
1164 				printf("Linux ELF exec handler removed\n");
1165 		} else
1166 			printf("Could not deinstall ELF interpreter entry\n");
1167 		break;
1168 	default:
1169 		return EOPNOTSUPP;
1170 	}
1171 	return error;
1172 }
1173 
1174 static moduledata_t linux_elf_mod = {
1175 	"linuxelf",
1176 	linux_elf_modevent,
1177 	0
1178 };
1179 
1180 DECLARE_MODULE_TIED(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
1181