xref: /freebsd/sys/i386/linux/linux_sysvec.c (revision 7e00348e7605b9906601438008341ffc37c00e2c)
1 /*-
2  * Copyright (c) 1994-1996 Søren Schmidt
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software without specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/exec.h>
35 #include <sys/fcntl.h>
36 #include <sys/imgact.h>
37 #include <sys/imgact_aout.h>
38 #include <sys/imgact_elf.h>
39 #include <sys/kernel.h>
40 #include <sys/lock.h>
41 #include <sys/malloc.h>
42 #include <sys/module.h>
43 #include <sys/mutex.h>
44 #include <sys/proc.h>
45 #include <sys/signalvar.h>
46 #include <sys/syscallsubr.h>
47 #include <sys/sysent.h>
48 #include <sys/sysproto.h>
49 #include <sys/vnode.h>
50 #include <sys/eventhandler.h>
51 
52 #include <vm/vm.h>
53 #include <vm/pmap.h>
54 #include <vm/vm_extern.h>
55 #include <vm/vm_map.h>
56 #include <vm/vm_object.h>
57 #include <vm/vm_page.h>
58 #include <vm/vm_param.h>
59 
60 #include <machine/cpu.h>
61 #include <machine/cputypes.h>
62 #include <machine/md_var.h>
63 #include <machine/pcb.h>
64 
65 #include <i386/linux/linux.h>
66 #include <i386/linux/linux_proto.h>
67 #include <compat/linux/linux_emul.h>
68 #include <compat/linux/linux_futex.h>
69 #include <compat/linux/linux_ioctl.h>
70 #include <compat/linux/linux_mib.h>
71 #include <compat/linux/linux_misc.h>
72 #include <compat/linux/linux_signal.h>
73 #include <compat/linux/linux_util.h>
74 
75 MODULE_VERSION(linux, 1);
76 
77 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
78 
79 #if BYTE_ORDER == LITTLE_ENDIAN
80 #define SHELLMAGIC      0x2123 /* #! */
81 #else
82 #define SHELLMAGIC      0x2321
83 #endif
84 
85 /*
86  * Allow the sendsig functions to use the ldebug() facility
87  * even though they are not syscalls themselves. Map them
88  * to syscall 0. This is slightly less bogus than using
89  * ldebug(sigreturn).
90  */
91 #define	LINUX_SYS_linux_rt_sendsig	0
92 #define	LINUX_SYS_linux_sendsig		0
93 
94 #define	LINUX_PS_STRINGS	(LINUX_USRSTACK - sizeof(struct ps_strings))
95 
96 extern char linux_sigcode[];
97 extern int linux_szsigcode;
98 
99 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
100 
101 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
102 SET_DECLARE(linux_device_handler_set, struct linux_device_handler);
103 
104 static int	linux_fixup(register_t **stack_base,
105 		    struct image_params *iparams);
106 static int	elf_linux_fixup(register_t **stack_base,
107 		    struct image_params *iparams);
108 static void     linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
109 static void	exec_linux_setregs(struct thread *td,
110 		    struct image_params *imgp, u_long stack);
111 static register_t *linux_copyout_strings(struct image_params *imgp);
112 static boolean_t linux_trans_osrel(const Elf_Note *note, int32_t *osrel);
113 
114 static int linux_szplatform;
115 const char *linux_platform;
116 
117 static eventhandler_tag linux_exit_tag;
118 static eventhandler_tag linux_exec_tag;
119 
120 /*
121  * Linux syscalls return negative errno's, we do positive and map them
122  * Reference:
123  *   FreeBSD: src/sys/sys/errno.h
124  *   Linux:   linux-2.6.17.8/include/asm-generic/errno-base.h
125  *            linux-2.6.17.8/include/asm-generic/errno.h
126  */
127 static int bsd_to_linux_errno[ELAST + 1] = {
128 	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
129 	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
130 	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
131 	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
132 	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
133 	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
134 	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
135 	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
136 	  -6,  -6, -43, -42, -75,-125, -84, -95, -16, -74,
137 	 -72, -67, -71
138 };
139 
140 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
141 	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
142 	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
143 	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
144 	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
145 	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
146 	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
147 	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
148 	0, LINUX_SIGUSR1, LINUX_SIGUSR2
149 };
150 
151 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
152 	SIGHUP, SIGINT, SIGQUIT, SIGILL,
153 	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
154 	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
155 	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
156 	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
157 	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
158 	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
159 	SIGIO, SIGURG, SIGSYS
160 };
161 
162 #define LINUX_T_UNKNOWN  255
163 static int _bsd_to_linux_trapcode[] = {
164 	LINUX_T_UNKNOWN,	/* 0 */
165 	6,			/* 1  T_PRIVINFLT */
166 	LINUX_T_UNKNOWN,	/* 2 */
167 	3,			/* 3  T_BPTFLT */
168 	LINUX_T_UNKNOWN,	/* 4 */
169 	LINUX_T_UNKNOWN,	/* 5 */
170 	16,			/* 6  T_ARITHTRAP */
171 	254,			/* 7  T_ASTFLT */
172 	LINUX_T_UNKNOWN,	/* 8 */
173 	13,			/* 9  T_PROTFLT */
174 	1,			/* 10 T_TRCTRAP */
175 	LINUX_T_UNKNOWN,	/* 11 */
176 	14,			/* 12 T_PAGEFLT */
177 	LINUX_T_UNKNOWN,	/* 13 */
178 	17,			/* 14 T_ALIGNFLT */
179 	LINUX_T_UNKNOWN,	/* 15 */
180 	LINUX_T_UNKNOWN,	/* 16 */
181 	LINUX_T_UNKNOWN,	/* 17 */
182 	0,			/* 18 T_DIVIDE */
183 	2,			/* 19 T_NMI */
184 	4,			/* 20 T_OFLOW */
185 	5,			/* 21 T_BOUND */
186 	7,			/* 22 T_DNA */
187 	8,			/* 23 T_DOUBLEFLT */
188 	9,			/* 24 T_FPOPFLT */
189 	10,			/* 25 T_TSSFLT */
190 	11,			/* 26 T_SEGNPFLT */
191 	12,			/* 27 T_STKFLT */
192 	18,			/* 28 T_MCHK */
193 	19,			/* 29 T_XMMFLT */
194 	15			/* 30 T_RESERVED */
195 };
196 #define bsd_to_linux_trapcode(code) \
197     ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
198      _bsd_to_linux_trapcode[(code)]: \
199      LINUX_T_UNKNOWN)
200 
201 /*
202  * If FreeBSD & Linux have a difference of opinion about what a trap
203  * means, deal with it here.
204  *
205  * MPSAFE
206  */
207 static int
208 translate_traps(int signal, int trap_code)
209 {
210 	if (signal != SIGBUS)
211 		return signal;
212 	switch (trap_code) {
213 	case T_PROTFLT:
214 	case T_TSSFLT:
215 	case T_DOUBLEFLT:
216 	case T_PAGEFLT:
217 		return SIGSEGV;
218 	default:
219 		return signal;
220 	}
221 }
222 
223 static int
224 linux_fixup(register_t **stack_base, struct image_params *imgp)
225 {
226 	register_t *argv, *envp;
227 
228 	argv = *stack_base;
229 	envp = *stack_base + (imgp->args->argc + 1);
230 	(*stack_base)--;
231 	suword(*stack_base, (intptr_t)(void *)envp);
232 	(*stack_base)--;
233 	suword(*stack_base, (intptr_t)(void *)argv);
234 	(*stack_base)--;
235 	suword(*stack_base, imgp->args->argc);
236 	return (0);
237 }
238 
239 static int
240 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
241 {
242 	struct proc *p;
243 	Elf32_Auxargs *args;
244 	Elf32_Addr *uplatform;
245 	struct ps_strings *arginfo;
246 	register_t *pos;
247 
248 	KASSERT(curthread->td_proc == imgp->proc,
249 	    ("unsafe elf_linux_fixup(), should be curproc"));
250 
251 	p = imgp->proc;
252 	arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
253 	uplatform = (Elf32_Addr *)((caddr_t)arginfo - linux_szplatform);
254 	args = (Elf32_Auxargs *)imgp->auxargs;
255 	pos = *stack_base + (imgp->args->argc + imgp->args->envc + 2);
256 
257 	AUXARGS_ENTRY(pos, LINUX_AT_HWCAP, cpu_feature);
258 
259 	/*
260 	 * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0,
261 	 * as it has appeared in the 2.4.0-rc7 first time.
262 	 * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK),
263 	 * glibc falls back to the hard-coded CLK_TCK value when aux entry
264 	 * is not present.
265 	 * Also see linux_times() implementation.
266 	 */
267 	if (linux_kernver(curthread) >= LINUX_KERNVER_2004000)
268 		AUXARGS_ENTRY(pos, LINUX_AT_CLKTCK, stclohz);
269 	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
270 	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
271 	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
272 	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
273 	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
274 	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
275 	AUXARGS_ENTRY(pos, AT_BASE, args->base);
276 	AUXARGS_ENTRY(pos, LINUX_AT_SECURE, 0);
277 	AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
278 	AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
279 	AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
280 	AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
281 	AUXARGS_ENTRY(pos, LINUX_AT_PLATFORM, PTROUT(uplatform));
282 	if (args->execfd != -1)
283 		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
284 	AUXARGS_ENTRY(pos, AT_NULL, 0);
285 
286 	free(imgp->auxargs, M_TEMP);
287 	imgp->auxargs = NULL;
288 
289 	(*stack_base)--;
290 	suword(*stack_base, (register_t)imgp->args->argc);
291 	return (0);
292 }
293 
294 /*
295  * Copied from kern/kern_exec.c
296  */
297 static register_t *
298 linux_copyout_strings(struct image_params *imgp)
299 {
300 	int argc, envc;
301 	char **vectp;
302 	char *stringp, *destp;
303 	register_t *stack_base;
304 	struct ps_strings *arginfo;
305 	struct proc *p;
306 
307 	/*
308 	 * Calculate string base and vector table pointers.
309 	 * Also deal with signal trampoline code for this exec type.
310 	 */
311 	p = imgp->proc;
312 	arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
313 	destp = (caddr_t)arginfo - SPARE_USRSPACE - linux_szplatform -
314 	    roundup((ARG_MAX - imgp->args->stringspace), sizeof(char *));
315 
316 	/*
317 	 * install LINUX_PLATFORM
318 	 */
319 	copyout(linux_platform, ((caddr_t)arginfo - linux_szplatform),
320 	    linux_szplatform);
321 
322 	/*
323 	 * If we have a valid auxargs ptr, prepare some room
324 	 * on the stack.
325 	 */
326 	if (imgp->auxargs) {
327 		/*
328 		 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
329 		 * lower compatibility.
330 		 */
331 		imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size :
332 		    (LINUX_AT_COUNT * 2);
333 		/*
334 		 * The '+ 2' is for the null pointers at the end of each of
335 		 * the arg and env vector sets,and imgp->auxarg_size is room
336 		 * for argument of Runtime loader.
337 		 */
338 		vectp = (char **)(destp - (imgp->args->argc +
339 		    imgp->args->envc + 2 + imgp->auxarg_size) * sizeof(char *));
340 	} else {
341 		/*
342 		 * The '+ 2' is for the null pointers at the end of each of
343 		 * the arg and env vector sets
344 		 */
345 		vectp = (char **)(destp - (imgp->args->argc + imgp->args->envc + 2) *
346 		    sizeof(char *));
347 	}
348 
349 	/*
350 	 * vectp also becomes our initial stack base
351 	 */
352 	stack_base = (register_t *)vectp;
353 
354 	stringp = imgp->args->begin_argv;
355 	argc = imgp->args->argc;
356 	envc = imgp->args->envc;
357 
358 	/*
359 	 * Copy out strings - arguments and environment.
360 	 */
361 	copyout(stringp, destp, ARG_MAX - imgp->args->stringspace);
362 
363 	/*
364 	 * Fill in "ps_strings" struct for ps, w, etc.
365 	 */
366 	suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp);
367 	suword(&arginfo->ps_nargvstr, argc);
368 
369 	/*
370 	 * Fill in argument portion of vector table.
371 	 */
372 	for (; argc > 0; --argc) {
373 		suword(vectp++, (long)(intptr_t)destp);
374 		while (*stringp++ != 0)
375 			destp++;
376 		destp++;
377 	}
378 
379 	/* a null vector table pointer separates the argp's from the envp's */
380 	suword(vectp++, 0);
381 
382 	suword(&arginfo->ps_envstr, (long)(intptr_t)vectp);
383 	suword(&arginfo->ps_nenvstr, envc);
384 
385 	/*
386 	 * Fill in environment portion of vector table.
387 	 */
388 	for (; envc > 0; --envc) {
389 		suword(vectp++, (long)(intptr_t)destp);
390 		while (*stringp++ != 0)
391 			destp++;
392 		destp++;
393 	}
394 
395 	/* end of vector table is a null pointer */
396 	suword(vectp, 0);
397 
398 	return (stack_base);
399 }
400 
401 
402 
403 extern unsigned long linux_sznonrtsigcode;
404 
405 static void
406 linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
407 {
408 	struct thread *td = curthread;
409 	struct proc *p = td->td_proc;
410 	struct sigacts *psp;
411 	struct trapframe *regs;
412 	struct l_rt_sigframe *fp, frame;
413 	int sig, code;
414 	int oonstack;
415 
416 	sig = ksi->ksi_signo;
417 	code = ksi->ksi_code;
418 	PROC_LOCK_ASSERT(p, MA_OWNED);
419 	psp = p->p_sigacts;
420 	mtx_assert(&psp->ps_mtx, MA_OWNED);
421 	regs = td->td_frame;
422 	oonstack = sigonstack(regs->tf_esp);
423 
424 #ifdef DEBUG
425 	if (ldebug(rt_sendsig))
426 		printf(ARGS(rt_sendsig, "%p, %d, %p, %u"),
427 		    catcher, sig, (void*)mask, code);
428 #endif
429 	/*
430 	 * Allocate space for the signal handler context.
431 	 */
432 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
433 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
434 		fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
435 		    td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
436 	} else
437 		fp = (struct l_rt_sigframe *)regs->tf_esp - 1;
438 	mtx_unlock(&psp->ps_mtx);
439 
440 	/*
441 	 * Build the argument list for the signal handler.
442 	 */
443 	if (p->p_sysent->sv_sigtbl)
444 		if (sig <= p->p_sysent->sv_sigsize)
445 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
446 
447 	bzero(&frame, sizeof(frame));
448 
449 	frame.sf_handler = catcher;
450 	frame.sf_sig = sig;
451 	frame.sf_siginfo = &fp->sf_si;
452 	frame.sf_ucontext = &fp->sf_sc;
453 
454 	/* Fill in POSIX parts */
455 	ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig);
456 
457 	/*
458 	 * Build the signal context to be used by sigreturn.
459 	 */
460 	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
461 	frame.sf_sc.uc_link = NULL;		/* XXX ??? */
462 
463 	frame.sf_sc.uc_stack.ss_sp = td->td_sigstk.ss_sp;
464 	frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
465 	frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
466 	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
467 	PROC_UNLOCK(p);
468 
469 	bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
470 
471 	frame.sf_sc.uc_mcontext.sc_mask   = frame.sf_sc.uc_sigmask.__bits[0];
472 	frame.sf_sc.uc_mcontext.sc_gs     = rgs();
473 	frame.sf_sc.uc_mcontext.sc_fs     = regs->tf_fs;
474 	frame.sf_sc.uc_mcontext.sc_es     = regs->tf_es;
475 	frame.sf_sc.uc_mcontext.sc_ds     = regs->tf_ds;
476 	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_edi;
477 	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_esi;
478 	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_ebp;
479 	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_ebx;
480 	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_edx;
481 	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_ecx;
482 	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_eax;
483 	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_eip;
484 	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
485 	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
486 	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
487 	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
488 	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
489 	frame.sf_sc.uc_mcontext.sc_cr2    = (register_t)ksi->ksi_addr;
490 	frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
491 
492 #ifdef DEBUG
493 	if (ldebug(rt_sendsig))
494 		printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
495 		    frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
496 		    td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
497 #endif
498 
499 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
500 		/*
501 		 * Process has trashed its stack; give it an illegal
502 		 * instruction to halt it in its tracks.
503 		 */
504 #ifdef DEBUG
505 		if (ldebug(rt_sendsig))
506 			printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
507 			    fp, oonstack);
508 #endif
509 		PROC_LOCK(p);
510 		sigexit(td, SIGILL);
511 	}
512 
513 	/*
514 	 * Build context to run handler in.
515 	 */
516 	regs->tf_esp = (int)fp;
517 	regs->tf_eip = p->p_sysent->sv_sigcode_base + linux_sznonrtsigcode;
518 	regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
519 	regs->tf_cs = _ucodesel;
520 	regs->tf_ds = _udatasel;
521 	regs->tf_es = _udatasel;
522 	regs->tf_fs = _udatasel;
523 	regs->tf_ss = _udatasel;
524 	PROC_LOCK(p);
525 	mtx_lock(&psp->ps_mtx);
526 }
527 
528 
529 /*
530  * Send an interrupt to process.
531  *
532  * Stack is set up to allow sigcode stored
533  * in u. to call routine, followed by kcall
534  * to sigreturn routine below.  After sigreturn
535  * resets the signal mask, the stack, and the
536  * frame pointer, it returns to the user
537  * specified pc, psl.
538  */
539 static void
540 linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
541 {
542 	struct thread *td = curthread;
543 	struct proc *p = td->td_proc;
544 	struct sigacts *psp;
545 	struct trapframe *regs;
546 	struct l_sigframe *fp, frame;
547 	l_sigset_t lmask;
548 	int sig, code;
549 	int oonstack, i;
550 
551 	PROC_LOCK_ASSERT(p, MA_OWNED);
552 	psp = p->p_sigacts;
553 	sig = ksi->ksi_signo;
554 	code = ksi->ksi_code;
555 	mtx_assert(&psp->ps_mtx, MA_OWNED);
556 	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
557 		/* Signal handler installed with SA_SIGINFO. */
558 		linux_rt_sendsig(catcher, ksi, mask);
559 		return;
560 	}
561 	regs = td->td_frame;
562 	oonstack = sigonstack(regs->tf_esp);
563 
564 #ifdef DEBUG
565 	if (ldebug(sendsig))
566 		printf(ARGS(sendsig, "%p, %d, %p, %u"),
567 		    catcher, sig, (void*)mask, code);
568 #endif
569 
570 	/*
571 	 * Allocate space for the signal handler context.
572 	 */
573 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
574 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
575 		fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
576 		    td->td_sigstk.ss_size - sizeof(struct l_sigframe));
577 	} else
578 		fp = (struct l_sigframe *)regs->tf_esp - 1;
579 	mtx_unlock(&psp->ps_mtx);
580 	PROC_UNLOCK(p);
581 
582 	/*
583 	 * Build the argument list for the signal handler.
584 	 */
585 	if (p->p_sysent->sv_sigtbl)
586 		if (sig <= p->p_sysent->sv_sigsize)
587 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
588 
589 	bzero(&frame, sizeof(frame));
590 
591 	frame.sf_handler = catcher;
592 	frame.sf_sig = sig;
593 
594 	bsd_to_linux_sigset(mask, &lmask);
595 
596 	/*
597 	 * Build the signal context to be used by sigreturn.
598 	 */
599 	frame.sf_sc.sc_mask   = lmask.__bits[0];
600 	frame.sf_sc.sc_gs     = rgs();
601 	frame.sf_sc.sc_fs     = regs->tf_fs;
602 	frame.sf_sc.sc_es     = regs->tf_es;
603 	frame.sf_sc.sc_ds     = regs->tf_ds;
604 	frame.sf_sc.sc_edi    = regs->tf_edi;
605 	frame.sf_sc.sc_esi    = regs->tf_esi;
606 	frame.sf_sc.sc_ebp    = regs->tf_ebp;
607 	frame.sf_sc.sc_ebx    = regs->tf_ebx;
608 	frame.sf_sc.sc_edx    = regs->tf_edx;
609 	frame.sf_sc.sc_ecx    = regs->tf_ecx;
610 	frame.sf_sc.sc_eax    = regs->tf_eax;
611 	frame.sf_sc.sc_eip    = regs->tf_eip;
612 	frame.sf_sc.sc_cs     = regs->tf_cs;
613 	frame.sf_sc.sc_eflags = regs->tf_eflags;
614 	frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
615 	frame.sf_sc.sc_ss     = regs->tf_ss;
616 	frame.sf_sc.sc_err    = regs->tf_err;
617 	frame.sf_sc.sc_cr2    = (register_t)ksi->ksi_addr;
618 	frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(ksi->ksi_trapno);
619 
620 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
621 		frame.sf_extramask[i] = lmask.__bits[i+1];
622 
623 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
624 		/*
625 		 * Process has trashed its stack; give it an illegal
626 		 * instruction to halt it in its tracks.
627 		 */
628 		PROC_LOCK(p);
629 		sigexit(td, SIGILL);
630 	}
631 
632 	/*
633 	 * Build context to run handler in.
634 	 */
635 	regs->tf_esp = (int)fp;
636 	regs->tf_eip = p->p_sysent->sv_sigcode_base;
637 	regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
638 	regs->tf_cs = _ucodesel;
639 	regs->tf_ds = _udatasel;
640 	regs->tf_es = _udatasel;
641 	regs->tf_fs = _udatasel;
642 	regs->tf_ss = _udatasel;
643 	PROC_LOCK(p);
644 	mtx_lock(&psp->ps_mtx);
645 }
646 
647 /*
648  * System call to cleanup state after a signal
649  * has been taken.  Reset signal mask and
650  * stack state from context left by sendsig (above).
651  * Return to previous pc and psl as specified by
652  * context left by sendsig. Check carefully to
653  * make sure that the user has not modified the
654  * psl to gain improper privileges or to cause
655  * a machine fault.
656  */
657 int
658 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
659 {
660 	struct l_sigframe frame;
661 	struct trapframe *regs;
662 	l_sigset_t lmask;
663 	sigset_t bmask;
664 	int eflags, i;
665 	ksiginfo_t ksi;
666 
667 	regs = td->td_frame;
668 
669 #ifdef DEBUG
670 	if (ldebug(sigreturn))
671 		printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
672 #endif
673 	/*
674 	 * The trampoline code hands us the sigframe.
675 	 * It is unsafe to keep track of it ourselves, in the event that a
676 	 * program jumps out of a signal handler.
677 	 */
678 	if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
679 		return (EFAULT);
680 
681 	/*
682 	 * Check for security violations.
683 	 */
684 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
685 	eflags = frame.sf_sc.sc_eflags;
686 	if (!EFLAGS_SECURE(eflags, regs->tf_eflags))
687 		return(EINVAL);
688 
689 	/*
690 	 * Don't allow users to load a valid privileged %cs.  Let the
691 	 * hardware check for invalid selectors, excess privilege in
692 	 * other selectors, invalid %eip's and invalid %esp's.
693 	 */
694 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
695 	if (!CS_SECURE(frame.sf_sc.sc_cs)) {
696 		ksiginfo_init_trap(&ksi);
697 		ksi.ksi_signo = SIGBUS;
698 		ksi.ksi_code = BUS_OBJERR;
699 		ksi.ksi_trapno = T_PROTFLT;
700 		ksi.ksi_addr = (void *)regs->tf_eip;
701 		trapsignal(td, &ksi);
702 		return(EINVAL);
703 	}
704 
705 	lmask.__bits[0] = frame.sf_sc.sc_mask;
706 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
707 		lmask.__bits[i+1] = frame.sf_extramask[i];
708 	linux_to_bsd_sigset(&lmask, &bmask);
709 	kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
710 
711 	/*
712 	 * Restore signal context.
713 	 */
714 	/* %gs was restored by the trampoline. */
715 	regs->tf_fs     = frame.sf_sc.sc_fs;
716 	regs->tf_es     = frame.sf_sc.sc_es;
717 	regs->tf_ds     = frame.sf_sc.sc_ds;
718 	regs->tf_edi    = frame.sf_sc.sc_edi;
719 	regs->tf_esi    = frame.sf_sc.sc_esi;
720 	regs->tf_ebp    = frame.sf_sc.sc_ebp;
721 	regs->tf_ebx    = frame.sf_sc.sc_ebx;
722 	regs->tf_edx    = frame.sf_sc.sc_edx;
723 	regs->tf_ecx    = frame.sf_sc.sc_ecx;
724 	regs->tf_eax    = frame.sf_sc.sc_eax;
725 	regs->tf_eip    = frame.sf_sc.sc_eip;
726 	regs->tf_cs     = frame.sf_sc.sc_cs;
727 	regs->tf_eflags = eflags;
728 	regs->tf_esp    = frame.sf_sc.sc_esp_at_signal;
729 	regs->tf_ss     = frame.sf_sc.sc_ss;
730 
731 	return (EJUSTRETURN);
732 }
733 
734 /*
735  * System call to cleanup state after a signal
736  * has been taken.  Reset signal mask and
737  * stack state from context left by rt_sendsig (above).
738  * Return to previous pc and psl as specified by
739  * context left by sendsig. Check carefully to
740  * make sure that the user has not modified the
741  * psl to gain improper privileges or to cause
742  * a machine fault.
743  */
744 int
745 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
746 {
747 	struct l_ucontext uc;
748 	struct l_sigcontext *context;
749 	sigset_t bmask;
750 	l_stack_t *lss;
751 	stack_t ss;
752 	struct trapframe *regs;
753 	int eflags;
754 	ksiginfo_t ksi;
755 
756 	regs = td->td_frame;
757 
758 #ifdef DEBUG
759 	if (ldebug(rt_sigreturn))
760 		printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
761 #endif
762 	/*
763 	 * The trampoline code hands us the ucontext.
764 	 * It is unsafe to keep track of it ourselves, in the event that a
765 	 * program jumps out of a signal handler.
766 	 */
767 	if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
768 		return (EFAULT);
769 
770 	context = &uc.uc_mcontext;
771 
772 	/*
773 	 * Check for security violations.
774 	 */
775 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
776 	eflags = context->sc_eflags;
777 	if (!EFLAGS_SECURE(eflags, regs->tf_eflags))
778 		return(EINVAL);
779 
780 	/*
781 	 * Don't allow users to load a valid privileged %cs.  Let the
782 	 * hardware check for invalid selectors, excess privilege in
783 	 * other selectors, invalid %eip's and invalid %esp's.
784 	 */
785 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
786 	if (!CS_SECURE(context->sc_cs)) {
787 		ksiginfo_init_trap(&ksi);
788 		ksi.ksi_signo = SIGBUS;
789 		ksi.ksi_code = BUS_OBJERR;
790 		ksi.ksi_trapno = T_PROTFLT;
791 		ksi.ksi_addr = (void *)regs->tf_eip;
792 		trapsignal(td, &ksi);
793 		return(EINVAL);
794 	}
795 
796 	linux_to_bsd_sigset(&uc.uc_sigmask, &bmask);
797 	kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
798 
799 	/*
800 	 * Restore signal context
801 	 */
802 	/* %gs was restored by the trampoline. */
803 	regs->tf_fs     = context->sc_fs;
804 	regs->tf_es     = context->sc_es;
805 	regs->tf_ds     = context->sc_ds;
806 	regs->tf_edi    = context->sc_edi;
807 	regs->tf_esi    = context->sc_esi;
808 	regs->tf_ebp    = context->sc_ebp;
809 	regs->tf_ebx    = context->sc_ebx;
810 	regs->tf_edx    = context->sc_edx;
811 	regs->tf_ecx    = context->sc_ecx;
812 	regs->tf_eax    = context->sc_eax;
813 	regs->tf_eip    = context->sc_eip;
814 	regs->tf_cs     = context->sc_cs;
815 	regs->tf_eflags = eflags;
816 	regs->tf_esp    = context->sc_esp_at_signal;
817 	regs->tf_ss     = context->sc_ss;
818 
819 	/*
820 	 * call sigaltstack & ignore results..
821 	 */
822 	lss = &uc.uc_stack;
823 	ss.ss_sp = lss->ss_sp;
824 	ss.ss_size = lss->ss_size;
825 	ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
826 
827 #ifdef DEBUG
828 	if (ldebug(rt_sigreturn))
829 		printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
830 		    ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
831 #endif
832 	(void)kern_sigaltstack(td, &ss, NULL);
833 
834 	return (EJUSTRETURN);
835 }
836 
837 static int
838 linux_fetch_syscall_args(struct thread *td, struct syscall_args *sa)
839 {
840 	struct proc *p;
841 	struct trapframe *frame;
842 
843 	p = td->td_proc;
844 	frame = td->td_frame;
845 
846 	sa->code = frame->tf_eax;
847 	sa->args[0] = frame->tf_ebx;
848 	sa->args[1] = frame->tf_ecx;
849 	sa->args[2] = frame->tf_edx;
850 	sa->args[3] = frame->tf_esi;
851 	sa->args[4] = frame->tf_edi;
852 	sa->args[5] = frame->tf_ebp;	/* Unconfirmed */
853 
854 	if (sa->code >= p->p_sysent->sv_size)
855 		sa->callp = &p->p_sysent->sv_table[0];
856  	else
857  		sa->callp = &p->p_sysent->sv_table[sa->code];
858 	sa->narg = sa->callp->sy_narg;
859 
860 	td->td_retval[0] = 0;
861 	td->td_retval[1] = frame->tf_edx;
862 
863 	return (0);
864 }
865 
866 /*
867  * If a linux binary is exec'ing something, try this image activator
868  * first.  We override standard shell script execution in order to
869  * be able to modify the interpreter path.  We only do this if a linux
870  * binary is doing the exec, so we do not create an EXEC module for it.
871  */
872 static int	exec_linux_imgact_try(struct image_params *iparams);
873 
874 static int
875 exec_linux_imgact_try(struct image_params *imgp)
876 {
877     const char *head = (const char *)imgp->image_header;
878     char *rpath;
879     int error = -1;
880 
881     /*
882      * The interpreter for shell scripts run from a linux binary needs
883      * to be located in /compat/linux if possible in order to recursively
884      * maintain linux path emulation.
885      */
886     if (((const short *)head)[0] == SHELLMAGIC) {
887 	    /*
888 	     * Run our normal shell image activator.  If it succeeds attempt
889 	     * to use the alternate path for the interpreter.  If an alternate
890 	     * path is found, use our stringspace to store it.
891 	     */
892 	    if ((error = exec_shell_imgact(imgp)) == 0) {
893 		    linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
894 			imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0, AT_FDCWD);
895 		    if (rpath != NULL)
896 			    imgp->args->fname_buf =
897 				imgp->interpreter_name = rpath;
898 	    }
899     }
900     return (error);
901 }
902 
903 /*
904  * exec_setregs may initialize some registers differently than Linux
905  * does, thus potentially confusing Linux binaries. If necessary, we
906  * override the exec_setregs default(s) here.
907  */
908 static void
909 exec_linux_setregs(struct thread *td, struct image_params *imgp, u_long stack)
910 {
911 	struct pcb *pcb = td->td_pcb;
912 
913 	exec_setregs(td, imgp, stack);
914 
915 	/* Linux sets %gs to 0, we default to _udatasel */
916 	pcb->pcb_gs = 0;
917 	load_gs(0);
918 
919 	pcb->pcb_initial_npxcw = __LINUX_NPXCW__;
920 }
921 
922 static void
923 linux_get_machine(const char **dst)
924 {
925 
926 	switch (cpu_class) {
927 	case CPUCLASS_686:
928 		*dst = "i686";
929 		break;
930 	case CPUCLASS_586:
931 		*dst = "i586";
932 		break;
933 	case CPUCLASS_486:
934 		*dst = "i486";
935 		break;
936 	default:
937 		*dst = "i386";
938 	}
939 }
940 
941 struct sysentvec linux_sysvec = {
942 	.sv_size	= LINUX_SYS_MAXSYSCALL,
943 	.sv_table	= linux_sysent,
944 	.sv_mask	= 0,
945 	.sv_sigsize	= LINUX_SIGTBLSZ,
946 	.sv_sigtbl	= bsd_to_linux_signal,
947 	.sv_errsize	= ELAST + 1,
948 	.sv_errtbl	= bsd_to_linux_errno,
949 	.sv_transtrap	= translate_traps,
950 	.sv_fixup	= linux_fixup,
951 	.sv_sendsig	= linux_sendsig,
952 	.sv_sigcode	= linux_sigcode,
953 	.sv_szsigcode	= &linux_szsigcode,
954 	.sv_prepsyscall	= NULL,
955 	.sv_name	= "Linux a.out",
956 	.sv_coredump	= NULL,
957 	.sv_imgact_try	= exec_linux_imgact_try,
958 	.sv_minsigstksz	= LINUX_MINSIGSTKSZ,
959 	.sv_pagesize	= PAGE_SIZE,
960 	.sv_minuser	= VM_MIN_ADDRESS,
961 	.sv_maxuser	= VM_MAXUSER_ADDRESS,
962 	.sv_usrstack	= LINUX_USRSTACK,
963 	.sv_psstrings	= PS_STRINGS,
964 	.sv_stackprot	= VM_PROT_ALL,
965 	.sv_copyout_strings = exec_copyout_strings,
966 	.sv_setregs	= exec_linux_setregs,
967 	.sv_fixlimit	= NULL,
968 	.sv_maxssiz	= NULL,
969 	.sv_flags	= SV_ABI_LINUX | SV_AOUT | SV_IA32 | SV_ILP32,
970 	.sv_set_syscall_retval = cpu_set_syscall_retval,
971 	.sv_fetch_syscall_args = linux_fetch_syscall_args,
972 	.sv_syscallnames = NULL,
973 	.sv_shared_page_base = LINUX_SHAREDPAGE,
974 	.sv_shared_page_len = PAGE_SIZE,
975 	.sv_schedtail	= linux_schedtail,
976 };
977 INIT_SYSENTVEC(aout_sysvec, &linux_sysvec);
978 
979 struct sysentvec elf_linux_sysvec = {
980 	.sv_size	= LINUX_SYS_MAXSYSCALL,
981 	.sv_table	= linux_sysent,
982 	.sv_mask	= 0,
983 	.sv_sigsize	= LINUX_SIGTBLSZ,
984 	.sv_sigtbl	= bsd_to_linux_signal,
985 	.sv_errsize	= ELAST + 1,
986 	.sv_errtbl	= bsd_to_linux_errno,
987 	.sv_transtrap	= translate_traps,
988 	.sv_fixup	= elf_linux_fixup,
989 	.sv_sendsig	= linux_sendsig,
990 	.sv_sigcode	= linux_sigcode,
991 	.sv_szsigcode	= &linux_szsigcode,
992 	.sv_prepsyscall	= NULL,
993 	.sv_name	= "Linux ELF",
994 	.sv_coredump	= elf32_coredump,
995 	.sv_imgact_try	= exec_linux_imgact_try,
996 	.sv_minsigstksz	= LINUX_MINSIGSTKSZ,
997 	.sv_pagesize	= PAGE_SIZE,
998 	.sv_minuser	= VM_MIN_ADDRESS,
999 	.sv_maxuser	= VM_MAXUSER_ADDRESS,
1000 	.sv_usrstack	= LINUX_USRSTACK,
1001 	.sv_psstrings	= LINUX_PS_STRINGS,
1002 	.sv_stackprot	= VM_PROT_ALL,
1003 	.sv_copyout_strings = linux_copyout_strings,
1004 	.sv_setregs	= exec_linux_setregs,
1005 	.sv_fixlimit	= NULL,
1006 	.sv_maxssiz	= NULL,
1007 	.sv_flags	= SV_ABI_LINUX | SV_IA32 | SV_ILP32 | SV_SHP,
1008 	.sv_set_syscall_retval = cpu_set_syscall_retval,
1009 	.sv_fetch_syscall_args = linux_fetch_syscall_args,
1010 	.sv_syscallnames = NULL,
1011 	.sv_shared_page_base = LINUX_SHAREDPAGE,
1012 	.sv_shared_page_len = PAGE_SIZE,
1013 	.sv_schedtail	= linux_schedtail,
1014 };
1015 INIT_SYSENTVEC(elf_sysvec, &elf_linux_sysvec);
1016 
1017 static char GNU_ABI_VENDOR[] = "GNU";
1018 static int GNULINUX_ABI_DESC = 0;
1019 
1020 static boolean_t
1021 linux_trans_osrel(const Elf_Note *note, int32_t *osrel)
1022 {
1023 	const Elf32_Word *desc;
1024 	uintptr_t p;
1025 
1026 	p = (uintptr_t)(note + 1);
1027 	p += roundup2(note->n_namesz, sizeof(Elf32_Addr));
1028 
1029 	desc = (const Elf32_Word *)p;
1030 	if (desc[0] != GNULINUX_ABI_DESC)
1031 		return (FALSE);
1032 
1033 	/*
1034 	 * For linux we encode osrel as follows (see linux_mib.c):
1035 	 * VVVMMMIII (version, major, minor), see linux_mib.c.
1036 	 */
1037 	*osrel = desc[1] * 1000000 + desc[2] * 1000 + desc[3];
1038 
1039 	return (TRUE);
1040 }
1041 
1042 static Elf_Brandnote linux_brandnote = {
1043 	.hdr.n_namesz	= sizeof(GNU_ABI_VENDOR),
1044 	.hdr.n_descsz	= 16,	/* XXX at least 16 */
1045 	.hdr.n_type	= 1,
1046 	.vendor		= GNU_ABI_VENDOR,
1047 	.flags		= BN_TRANSLATE_OSREL,
1048 	.trans_osrel	= linux_trans_osrel
1049 };
1050 
1051 static Elf32_Brandinfo linux_brand = {
1052 	.brand		= ELFOSABI_LINUX,
1053 	.machine	= EM_386,
1054 	.compat_3_brand	= "Linux",
1055 	.emul_path	= "/compat/linux",
1056 	.interp_path	= "/lib/ld-linux.so.1",
1057 	.sysvec		= &elf_linux_sysvec,
1058 	.interp_newpath	= NULL,
1059 	.brand_note	= &linux_brandnote,
1060 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1061 };
1062 
1063 static Elf32_Brandinfo linux_glibc2brand = {
1064 	.brand		= ELFOSABI_LINUX,
1065 	.machine	= EM_386,
1066 	.compat_3_brand	= "Linux",
1067 	.emul_path	= "/compat/linux",
1068 	.interp_path	= "/lib/ld-linux.so.2",
1069 	.sysvec		= &elf_linux_sysvec,
1070 	.interp_newpath	= NULL,
1071 	.brand_note	= &linux_brandnote,
1072 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1073 };
1074 
1075 Elf32_Brandinfo *linux_brandlist[] = {
1076 	&linux_brand,
1077 	&linux_glibc2brand,
1078 	NULL
1079 };
1080 
1081 static int
1082 linux_elf_modevent(module_t mod, int type, void *data)
1083 {
1084 	Elf32_Brandinfo **brandinfo;
1085 	int error;
1086 	struct linux_ioctl_handler **lihp;
1087 	struct linux_device_handler **ldhp;
1088 
1089 	error = 0;
1090 
1091 	switch(type) {
1092 	case MOD_LOAD:
1093 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1094 		     ++brandinfo)
1095 			if (elf32_insert_brand_entry(*brandinfo) < 0)
1096 				error = EINVAL;
1097 		if (error == 0) {
1098 			SET_FOREACH(lihp, linux_ioctl_handler_set)
1099 				linux_ioctl_register_handler(*lihp);
1100 			SET_FOREACH(ldhp, linux_device_handler_set)
1101 				linux_device_register_handler(*ldhp);
1102 			mtx_init(&emul_lock, "emuldata lock", NULL, MTX_DEF);
1103 			sx_init(&emul_shared_lock, "emuldata->shared lock");
1104 			LIST_INIT(&futex_list);
1105 			mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF);
1106 			linux_exit_tag = EVENTHANDLER_REGISTER(process_exit, linux_proc_exit,
1107 			      NULL, 1000);
1108 			linux_exec_tag = EVENTHANDLER_REGISTER(process_exec, linux_proc_exec,
1109 			      NULL, 1000);
1110 			linux_get_machine(&linux_platform);
1111 			linux_szplatform = roundup(strlen(linux_platform) + 1,
1112 			    sizeof(char *));
1113 			linux_osd_jail_register();
1114 			stclohz = (stathz ? stathz : hz);
1115 			if (bootverbose)
1116 				printf("Linux ELF exec handler installed\n");
1117 		} else
1118 			printf("cannot insert Linux ELF brand handler\n");
1119 		break;
1120 	case MOD_UNLOAD:
1121 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1122 		     ++brandinfo)
1123 			if (elf32_brand_inuse(*brandinfo))
1124 				error = EBUSY;
1125 		if (error == 0) {
1126 			for (brandinfo = &linux_brandlist[0];
1127 			     *brandinfo != NULL; ++brandinfo)
1128 				if (elf32_remove_brand_entry(*brandinfo) < 0)
1129 					error = EINVAL;
1130 		}
1131 		if (error == 0) {
1132 			SET_FOREACH(lihp, linux_ioctl_handler_set)
1133 				linux_ioctl_unregister_handler(*lihp);
1134 			SET_FOREACH(ldhp, linux_device_handler_set)
1135 				linux_device_unregister_handler(*ldhp);
1136 			mtx_destroy(&emul_lock);
1137 			sx_destroy(&emul_shared_lock);
1138 			mtx_destroy(&futex_mtx);
1139 			EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag);
1140 			EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag);
1141 			linux_osd_jail_deregister();
1142 			if (bootverbose)
1143 				printf("Linux ELF exec handler removed\n");
1144 		} else
1145 			printf("Could not deinstall ELF interpreter entry\n");
1146 		break;
1147 	default:
1148 		return EOPNOTSUPP;
1149 	}
1150 	return error;
1151 }
1152 
1153 static moduledata_t linux_elf_mod = {
1154 	"linuxelf",
1155 	linux_elf_modevent,
1156 	0
1157 };
1158 
1159 DECLARE_MODULE_TIED(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
1160