xref: /freebsd/sys/i386/linux/linux_sysvec.c (revision 60fde7ce5d7bf5d94290720ea53db5701ab406a8)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 1994-1996 Søren Schmidt
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/exec.h>
35 #include <sys/fcntl.h>
36 #include <sys/imgact.h>
37 #include <sys/imgact_aout.h>
38 #include <sys/imgact_elf.h>
39 #include <sys/kernel.h>
40 #include <sys/lock.h>
41 #include <sys/malloc.h>
42 #include <sys/module.h>
43 #include <sys/mutex.h>
44 #include <sys/proc.h>
45 #include <sys/signalvar.h>
46 #include <sys/syscallsubr.h>
47 #include <sys/sysctl.h>
48 #include <sys/sysent.h>
49 #include <sys/sysproto.h>
50 #include <sys/vnode.h>
51 #include <sys/eventhandler.h>
52 
53 #include <vm/vm.h>
54 #include <vm/pmap.h>
55 #include <vm/vm_extern.h>
56 #include <vm/vm_map.h>
57 #include <vm/vm_object.h>
58 #include <vm/vm_page.h>
59 #include <vm/vm_param.h>
60 
61 #include <machine/cpu.h>
62 #include <machine/cputypes.h>
63 #include <machine/md_var.h>
64 #include <machine/pcb.h>
65 
66 #include <i386/linux/linux.h>
67 #include <i386/linux/linux_proto.h>
68 #include <compat/linux/linux_emul.h>
69 #include <compat/linux/linux_futex.h>
70 #include <compat/linux/linux_ioctl.h>
71 #include <compat/linux/linux_mib.h>
72 #include <compat/linux/linux_misc.h>
73 #include <compat/linux/linux_signal.h>
74 #include <compat/linux/linux_util.h>
75 #include <compat/linux/linux_vdso.h>
76 
77 MODULE_VERSION(linux, 1);
78 
79 #if BYTE_ORDER == LITTLE_ENDIAN
80 #define SHELLMAGIC      0x2123 /* #! */
81 #else
82 #define SHELLMAGIC      0x2321
83 #endif
84 
85 #if defined(DEBUG)
86 SYSCTL_PROC(_compat_linux, OID_AUTO, debug, CTLTYPE_STRING | CTLFLAG_RW, 0, 0,
87     linux_sysctl_debug, "A", "Linux debugging control");
88 #endif
89 
90 /*
91  * Allow the sendsig functions to use the ldebug() facility even though they
92  * are not syscalls themselves.  Map them to syscall 0.  This is slightly less
93  * bogus than using ldebug(sigreturn).
94  */
95 #define	LINUX_SYS_linux_rt_sendsig	0
96 #define	LINUX_SYS_linux_sendsig		0
97 
98 #define	LINUX_PS_STRINGS	(LINUX_USRSTACK - sizeof(struct ps_strings))
99 
100 static int linux_szsigcode;
101 static vm_object_t linux_shared_page_obj;
102 static char *linux_shared_page_mapping;
103 extern char _binary_linux_locore_o_start;
104 extern char _binary_linux_locore_o_end;
105 
106 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
107 
108 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
109 
110 static int	linux_fixup(register_t **stack_base,
111 		    struct image_params *iparams);
112 static int	linux_fixup_elf(register_t **stack_base,
113 		    struct image_params *iparams);
114 static void     linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
115 static int	linux_exec_imgact_try(struct image_params *iparams);
116 static void	linux_exec_setregs(struct thread *td,
117 		    struct image_params *imgp, u_long stack);
118 static register_t *linux_copyout_strings(struct image_params *imgp);
119 static bool	linux_trans_osrel(const Elf_Note *note, int32_t *osrel);
120 static void	linux_vdso_install(void *param);
121 static void	linux_vdso_deinstall(void *param);
122 
123 static int linux_szplatform;
124 const char *linux_kplatform;
125 
126 static eventhandler_tag linux_exit_tag;
127 static eventhandler_tag linux_exec_tag;
128 static eventhandler_tag linux_thread_dtor_tag;
129 
130 #define LINUX_T_UNKNOWN  255
131 static int _bsd_to_linux_trapcode[] = {
132 	LINUX_T_UNKNOWN,	/* 0 */
133 	6,			/* 1  T_PRIVINFLT */
134 	LINUX_T_UNKNOWN,	/* 2 */
135 	3,			/* 3  T_BPTFLT */
136 	LINUX_T_UNKNOWN,	/* 4 */
137 	LINUX_T_UNKNOWN,	/* 5 */
138 	16,			/* 6  T_ARITHTRAP */
139 	254,			/* 7  T_ASTFLT */
140 	LINUX_T_UNKNOWN,	/* 8 */
141 	13,			/* 9  T_PROTFLT */
142 	1,			/* 10 T_TRCTRAP */
143 	LINUX_T_UNKNOWN,	/* 11 */
144 	14,			/* 12 T_PAGEFLT */
145 	LINUX_T_UNKNOWN,	/* 13 */
146 	17,			/* 14 T_ALIGNFLT */
147 	LINUX_T_UNKNOWN,	/* 15 */
148 	LINUX_T_UNKNOWN,	/* 16 */
149 	LINUX_T_UNKNOWN,	/* 17 */
150 	0,			/* 18 T_DIVIDE */
151 	2,			/* 19 T_NMI */
152 	4,			/* 20 T_OFLOW */
153 	5,			/* 21 T_BOUND */
154 	7,			/* 22 T_DNA */
155 	8,			/* 23 T_DOUBLEFLT */
156 	9,			/* 24 T_FPOPFLT */
157 	10,			/* 25 T_TSSFLT */
158 	11,			/* 26 T_SEGNPFLT */
159 	12,			/* 27 T_STKFLT */
160 	18,			/* 28 T_MCHK */
161 	19,			/* 29 T_XMMFLT */
162 	15			/* 30 T_RESERVED */
163 };
164 #define bsd_to_linux_trapcode(code) \
165     ((code)<nitems(_bsd_to_linux_trapcode)? \
166      _bsd_to_linux_trapcode[(code)]: \
167      LINUX_T_UNKNOWN)
168 
169 LINUX_VDSO_SYM_INTPTR(linux_sigcode);
170 LINUX_VDSO_SYM_INTPTR(linux_rt_sigcode);
171 LINUX_VDSO_SYM_INTPTR(linux_vsyscall);
172 
173 /*
174  * If FreeBSD & Linux have a difference of opinion about what a trap
175  * means, deal with it here.
176  *
177  * MPSAFE
178  */
179 static int
180 linux_translate_traps(int signal, int trap_code)
181 {
182 	if (signal != SIGBUS)
183 		return (signal);
184 	switch (trap_code) {
185 	case T_PROTFLT:
186 	case T_TSSFLT:
187 	case T_DOUBLEFLT:
188 	case T_PAGEFLT:
189 		return (SIGSEGV);
190 	default:
191 		return (signal);
192 	}
193 }
194 
195 static int
196 linux_fixup(register_t **stack_base, struct image_params *imgp)
197 {
198 	register_t *argv, *envp;
199 
200 	argv = *stack_base;
201 	envp = *stack_base + (imgp->args->argc + 1);
202 	(*stack_base)--;
203 	suword(*stack_base, (intptr_t)(void *)envp);
204 	(*stack_base)--;
205 	suword(*stack_base, (intptr_t)(void *)argv);
206 	(*stack_base)--;
207 	suword(*stack_base, imgp->args->argc);
208 	return (0);
209 }
210 
211 static int
212 linux_fixup_elf(register_t **stack_base, struct image_params *imgp)
213 {
214 	struct proc *p;
215 	Elf32_Auxargs *args;
216 	Elf32_Addr *uplatform;
217 	struct ps_strings *arginfo;
218 	register_t *pos;
219 	int issetugid;
220 
221 	KASSERT(curthread->td_proc == imgp->proc,
222 	    ("unsafe linux_fixup_elf(), should be curproc"));
223 
224 	p = imgp->proc;
225 	issetugid = imgp->proc->p_flag & P_SUGID ? 1 : 0;
226 	arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
227 	uplatform = (Elf32_Addr *)((caddr_t)arginfo - linux_szplatform);
228 	args = (Elf32_Auxargs *)imgp->auxargs;
229 	pos = *stack_base + (imgp->args->argc + imgp->args->envc + 2);
230 
231 	AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO_EHDR,
232 	    imgp->proc->p_sysent->sv_shared_page_base);
233 	AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO, linux_vsyscall);
234 	AUXARGS_ENTRY(pos, LINUX_AT_HWCAP, cpu_feature);
235 
236 	/*
237 	 * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0,
238 	 * as it has appeared in the 2.4.0-rc7 first time.
239 	 * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK),
240 	 * glibc falls back to the hard-coded CLK_TCK value when aux entry
241 	 * is not present.
242 	 * Also see linux_times() implementation.
243 	 */
244 	if (linux_kernver(curthread) >= LINUX_KERNVER_2004000)
245 		AUXARGS_ENTRY(pos, LINUX_AT_CLKTCK, stclohz);
246 	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
247 	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
248 	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
249 	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
250 	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
251 	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
252 	AUXARGS_ENTRY(pos, AT_BASE, args->base);
253 	AUXARGS_ENTRY(pos, LINUX_AT_SECURE, issetugid);
254 	AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
255 	AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
256 	AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
257 	AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
258 	AUXARGS_ENTRY(pos, LINUX_AT_PLATFORM, PTROUT(uplatform));
259 	AUXARGS_ENTRY(pos, LINUX_AT_RANDOM, imgp->canary);
260 	if (imgp->execpathp != 0)
261 		AUXARGS_ENTRY(pos, LINUX_AT_EXECFN, imgp->execpathp);
262 	if (args->execfd != -1)
263 		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
264 	AUXARGS_ENTRY(pos, AT_NULL, 0);
265 
266 	free(imgp->auxargs, M_TEMP);
267 	imgp->auxargs = NULL;
268 
269 	(*stack_base)--;
270 	suword(*stack_base, (register_t)imgp->args->argc);
271 	return (0);
272 }
273 
274 /*
275  * Copied from kern/kern_exec.c
276  */
277 static register_t *
278 linux_copyout_strings(struct image_params *imgp)
279 {
280 	int argc, envc;
281 	char **vectp;
282 	char *stringp, *destp;
283 	register_t *stack_base;
284 	struct ps_strings *arginfo;
285 	char canary[LINUX_AT_RANDOM_LEN];
286 	size_t execpath_len;
287 	struct proc *p;
288 
289 	/* Calculate string base and vector table pointers. */
290 	p = imgp->proc;
291 	if (imgp->execpath != NULL && imgp->auxargs != NULL)
292 		execpath_len = strlen(imgp->execpath) + 1;
293 	else
294 		execpath_len = 0;
295 	arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
296 	destp = (caddr_t)arginfo - SPARE_USRSPACE - linux_szplatform -
297 	    roundup(sizeof(canary), sizeof(char *)) -
298 	    roundup(execpath_len, sizeof(char *)) -
299 	    roundup(ARG_MAX - imgp->args->stringspace, sizeof(char *));
300 
301 	/* Install LINUX_PLATFORM. */
302 	copyout(linux_kplatform, ((caddr_t)arginfo - linux_szplatform),
303 	    linux_szplatform);
304 
305 	if (execpath_len != 0) {
306 		imgp->execpathp = (uintptr_t)arginfo -
307 		linux_szplatform - execpath_len;
308 		copyout(imgp->execpath, (void *)imgp->execpathp, execpath_len);
309 	}
310 
311 	/* Prepare the canary for SSP. */
312 	arc4rand(canary, sizeof(canary), 0);
313 	imgp->canary = (uintptr_t)arginfo - linux_szplatform -
314 	    roundup(execpath_len, sizeof(char *)) -
315 	    roundup(sizeof(canary), sizeof(char *));
316 	copyout(canary, (void *)imgp->canary, sizeof(canary));
317 
318 	/* If we have a valid auxargs ptr, prepare some room on the stack. */
319 	if (imgp->auxargs) {
320 		/*
321 		 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
322 		 * lower compatibility.
323 		 */
324 		imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size :
325 		    (LINUX_AT_COUNT * 2);
326 		/*
327 		 * The '+ 2' is for the null pointers at the end of each of
328 		 * the arg and env vector sets,and imgp->auxarg_size is room
329 		 * for argument of Runtime loader.
330 		 */
331 		vectp = (char **)(destp - (imgp->args->argc +
332 		    imgp->args->envc + 2 + imgp->auxarg_size) * sizeof(char *));
333 	} else {
334 		/*
335 		 * The '+ 2' is for the null pointers at the end of each of
336 		 * the arg and env vector sets
337 		 */
338 		vectp = (char **)(destp - (imgp->args->argc + imgp->args->envc + 2) *
339 		    sizeof(char *));
340 	}
341 
342 	/* vectp also becomes our initial stack base. */
343 	stack_base = (register_t *)vectp;
344 
345 	stringp = imgp->args->begin_argv;
346 	argc = imgp->args->argc;
347 	envc = imgp->args->envc;
348 
349 	/* Copy out strings - arguments and environment. */
350 	copyout(stringp, destp, ARG_MAX - imgp->args->stringspace);
351 
352 	/* Fill in "ps_strings" struct for ps, w, etc. */
353 	suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp);
354 	suword(&arginfo->ps_nargvstr, argc);
355 
356 	/* Fill in argument portion of vector table. */
357 	for (; argc > 0; --argc) {
358 		suword(vectp++, (long)(intptr_t)destp);
359 		while (*stringp++ != 0)
360 			destp++;
361 		destp++;
362 	}
363 
364 	/* A null vector table pointer separates the argp's from the envp's. */
365 	suword(vectp++, 0);
366 
367 	suword(&arginfo->ps_envstr, (long)(intptr_t)vectp);
368 	suword(&arginfo->ps_nenvstr, envc);
369 
370 	/* Fill in environment portion of vector table. */
371 	for (; envc > 0; --envc) {
372 		suword(vectp++, (long)(intptr_t)destp);
373 		while (*stringp++ != 0)
374 			destp++;
375 		destp++;
376 	}
377 
378 	/* The end of the vector table is a null pointer. */
379 	suword(vectp, 0);
380 
381 	return (stack_base);
382 }
383 
384 static void
385 linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
386 {
387 	struct thread *td = curthread;
388 	struct proc *p = td->td_proc;
389 	struct sigacts *psp;
390 	struct trapframe *regs;
391 	struct l_rt_sigframe *fp, frame;
392 	int sig, code;
393 	int oonstack;
394 
395 	sig = ksi->ksi_signo;
396 	code = ksi->ksi_code;
397 	PROC_LOCK_ASSERT(p, MA_OWNED);
398 	psp = p->p_sigacts;
399 	mtx_assert(&psp->ps_mtx, MA_OWNED);
400 	regs = td->td_frame;
401 	oonstack = sigonstack(regs->tf_esp);
402 
403 #ifdef DEBUG
404 	if (ldebug(rt_sendsig))
405 		printf(ARGS(rt_sendsig, "%p, %d, %p, %u"),
406 		    catcher, sig, (void*)mask, code);
407 #endif
408 	/* Allocate space for the signal handler context. */
409 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
410 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
411 		fp = (struct l_rt_sigframe *)((uintptr_t)td->td_sigstk.ss_sp +
412 		    td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
413 	} else
414 		fp = (struct l_rt_sigframe *)regs->tf_esp - 1;
415 	mtx_unlock(&psp->ps_mtx);
416 
417 	/* Build the argument list for the signal handler. */
418 	sig = bsd_to_linux_signal(sig);
419 
420 	bzero(&frame, sizeof(frame));
421 
422 	frame.sf_handler = catcher;
423 	frame.sf_sig = sig;
424 	frame.sf_siginfo = &fp->sf_si;
425 	frame.sf_ucontext = &fp->sf_sc;
426 
427 	/* Fill in POSIX parts. */
428 	ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig);
429 
430 	/* Build the signal context to be used by sigreturn. */
431 	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
432 	frame.sf_sc.uc_link = NULL;		/* XXX ??? */
433 
434 	frame.sf_sc.uc_stack.ss_sp = td->td_sigstk.ss_sp;
435 	frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
436 	frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
437 	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
438 	PROC_UNLOCK(p);
439 
440 	bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
441 
442 	frame.sf_sc.uc_mcontext.sc_mask   = frame.sf_sc.uc_sigmask.__mask;
443 	frame.sf_sc.uc_mcontext.sc_gs     = rgs();
444 	frame.sf_sc.uc_mcontext.sc_fs     = regs->tf_fs;
445 	frame.sf_sc.uc_mcontext.sc_es     = regs->tf_es;
446 	frame.sf_sc.uc_mcontext.sc_ds     = regs->tf_ds;
447 	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_edi;
448 	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_esi;
449 	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_ebp;
450 	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_ebx;
451 	frame.sf_sc.uc_mcontext.sc_esp    = regs->tf_esp;
452 	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_edx;
453 	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_ecx;
454 	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_eax;
455 	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_eip;
456 	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
457 	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
458 	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
459 	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
460 	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
461 	frame.sf_sc.uc_mcontext.sc_cr2    = (register_t)ksi->ksi_addr;
462 	frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
463 
464 #ifdef DEBUG
465 	if (ldebug(rt_sendsig))
466 		printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
467 		    frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
468 		    td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
469 #endif
470 
471 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
472 		/*
473 		 * Process has trashed its stack; give it an illegal
474 		 * instruction to halt it in its tracks.
475 		 */
476 #ifdef DEBUG
477 		if (ldebug(rt_sendsig))
478 			printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
479 			    fp, oonstack);
480 #endif
481 		PROC_LOCK(p);
482 		sigexit(td, SIGILL);
483 	}
484 
485 	/* Build context to run handler in. */
486 	regs->tf_esp = (int)fp;
487 	regs->tf_eip = linux_rt_sigcode;
488 	regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
489 	regs->tf_cs = _ucodesel;
490 	regs->tf_ds = _udatasel;
491 	regs->tf_es = _udatasel;
492 	regs->tf_fs = _udatasel;
493 	regs->tf_ss = _udatasel;
494 	PROC_LOCK(p);
495 	mtx_lock(&psp->ps_mtx);
496 }
497 
498 
499 /*
500  * Send an interrupt to process.
501  *
502  * Stack is set up to allow sigcode stored
503  * in u. to call routine, followed by kcall
504  * to sigreturn routine below.  After sigreturn
505  * resets the signal mask, the stack, and the
506  * frame pointer, it returns to the user
507  * specified pc, psl.
508  */
509 static void
510 linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
511 {
512 	struct thread *td = curthread;
513 	struct proc *p = td->td_proc;
514 	struct sigacts *psp;
515 	struct trapframe *regs;
516 	struct l_sigframe *fp, frame;
517 	l_sigset_t lmask;
518 	int sig, code;
519 	int oonstack;
520 
521 	PROC_LOCK_ASSERT(p, MA_OWNED);
522 	psp = p->p_sigacts;
523 	sig = ksi->ksi_signo;
524 	code = ksi->ksi_code;
525 	mtx_assert(&psp->ps_mtx, MA_OWNED);
526 	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
527 		/* Signal handler installed with SA_SIGINFO. */
528 		linux_rt_sendsig(catcher, ksi, mask);
529 		return;
530 	}
531 	regs = td->td_frame;
532 	oonstack = sigonstack(regs->tf_esp);
533 
534 #ifdef DEBUG
535 	if (ldebug(sendsig))
536 		printf(ARGS(sendsig, "%p, %d, %p, %u"),
537 		    catcher, sig, (void*)mask, code);
538 #endif
539 
540 	/* Allocate space for the signal handler context. */
541 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
542 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
543 		fp = (struct l_sigframe *)((uintptr_t)td->td_sigstk.ss_sp +
544 		    td->td_sigstk.ss_size - sizeof(struct l_sigframe));
545 	} else
546 		fp = (struct l_sigframe *)regs->tf_esp - 1;
547 	mtx_unlock(&psp->ps_mtx);
548 	PROC_UNLOCK(p);
549 
550 	/* Build the argument list for the signal handler. */
551 	sig = bsd_to_linux_signal(sig);
552 
553 	bzero(&frame, sizeof(frame));
554 
555 	frame.sf_handler = catcher;
556 	frame.sf_sig = sig;
557 
558 	bsd_to_linux_sigset(mask, &lmask);
559 
560 	/* Build the signal context to be used by sigreturn. */
561 	frame.sf_sc.sc_mask   = lmask.__mask;
562 	frame.sf_sc.sc_gs     = rgs();
563 	frame.sf_sc.sc_fs     = regs->tf_fs;
564 	frame.sf_sc.sc_es     = regs->tf_es;
565 	frame.sf_sc.sc_ds     = regs->tf_ds;
566 	frame.sf_sc.sc_edi    = regs->tf_edi;
567 	frame.sf_sc.sc_esi    = regs->tf_esi;
568 	frame.sf_sc.sc_ebp    = regs->tf_ebp;
569 	frame.sf_sc.sc_ebx    = regs->tf_ebx;
570 	frame.sf_sc.sc_esp    = regs->tf_esp;
571 	frame.sf_sc.sc_edx    = regs->tf_edx;
572 	frame.sf_sc.sc_ecx    = regs->tf_ecx;
573 	frame.sf_sc.sc_eax    = regs->tf_eax;
574 	frame.sf_sc.sc_eip    = regs->tf_eip;
575 	frame.sf_sc.sc_cs     = regs->tf_cs;
576 	frame.sf_sc.sc_eflags = regs->tf_eflags;
577 	frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
578 	frame.sf_sc.sc_ss     = regs->tf_ss;
579 	frame.sf_sc.sc_err    = regs->tf_err;
580 	frame.sf_sc.sc_cr2    = (register_t)ksi->ksi_addr;
581 	frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(ksi->ksi_trapno);
582 
583 	frame.sf_extramask[0] = lmask.__mask;
584 
585 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
586 		/*
587 		 * Process has trashed its stack; give it an illegal
588 		 * instruction to halt it in its tracks.
589 		 */
590 		PROC_LOCK(p);
591 		sigexit(td, SIGILL);
592 	}
593 
594 	/* Build context to run handler in. */
595 	regs->tf_esp = (int)fp;
596 	regs->tf_eip = linux_sigcode;
597 	regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
598 	regs->tf_cs = _ucodesel;
599 	regs->tf_ds = _udatasel;
600 	regs->tf_es = _udatasel;
601 	regs->tf_fs = _udatasel;
602 	regs->tf_ss = _udatasel;
603 	PROC_LOCK(p);
604 	mtx_lock(&psp->ps_mtx);
605 }
606 
607 /*
608  * System call to cleanup state after a signal
609  * has been taken.  Reset signal mask and
610  * stack state from context left by sendsig (above).
611  * Return to previous pc and psl as specified by
612  * context left by sendsig. Check carefully to
613  * make sure that the user has not modified the
614  * psl to gain improper privileges or to cause
615  * a machine fault.
616  */
617 int
618 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
619 {
620 	struct l_sigframe frame;
621 	struct trapframe *regs;
622 	l_sigset_t lmask;
623 	sigset_t bmask;
624 	int eflags;
625 	ksiginfo_t ksi;
626 
627 	regs = td->td_frame;
628 
629 #ifdef DEBUG
630 	if (ldebug(sigreturn))
631 		printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
632 #endif
633 	/*
634 	 * The trampoline code hands us the sigframe.
635 	 * It is unsafe to keep track of it ourselves, in the event that a
636 	 * program jumps out of a signal handler.
637 	 */
638 	if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
639 		return (EFAULT);
640 
641 	/* Check for security violations. */
642 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
643 	eflags = frame.sf_sc.sc_eflags;
644 	if (!EFLAGS_SECURE(eflags, regs->tf_eflags))
645 		return (EINVAL);
646 
647 	/*
648 	 * Don't allow users to load a valid privileged %cs.  Let the
649 	 * hardware check for invalid selectors, excess privilege in
650 	 * other selectors, invalid %eip's and invalid %esp's.
651 	 */
652 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
653 	if (!CS_SECURE(frame.sf_sc.sc_cs)) {
654 		ksiginfo_init_trap(&ksi);
655 		ksi.ksi_signo = SIGBUS;
656 		ksi.ksi_code = BUS_OBJERR;
657 		ksi.ksi_trapno = T_PROTFLT;
658 		ksi.ksi_addr = (void *)regs->tf_eip;
659 		trapsignal(td, &ksi);
660 		return (EINVAL);
661 	}
662 
663 	lmask.__mask = frame.sf_sc.sc_mask;
664 	linux_to_bsd_sigset(&lmask, &bmask);
665 	kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
666 
667 	/* Restore signal context. */
668 	/* %gs was restored by the trampoline. */
669 	regs->tf_fs     = frame.sf_sc.sc_fs;
670 	regs->tf_es     = frame.sf_sc.sc_es;
671 	regs->tf_ds     = frame.sf_sc.sc_ds;
672 	regs->tf_edi    = frame.sf_sc.sc_edi;
673 	regs->tf_esi    = frame.sf_sc.sc_esi;
674 	regs->tf_ebp    = frame.sf_sc.sc_ebp;
675 	regs->tf_ebx    = frame.sf_sc.sc_ebx;
676 	regs->tf_edx    = frame.sf_sc.sc_edx;
677 	regs->tf_ecx    = frame.sf_sc.sc_ecx;
678 	regs->tf_eax    = frame.sf_sc.sc_eax;
679 	regs->tf_eip    = frame.sf_sc.sc_eip;
680 	regs->tf_cs     = frame.sf_sc.sc_cs;
681 	regs->tf_eflags = eflags;
682 	regs->tf_esp    = frame.sf_sc.sc_esp_at_signal;
683 	regs->tf_ss     = frame.sf_sc.sc_ss;
684 
685 	return (EJUSTRETURN);
686 }
687 
688 /*
689  * System call to cleanup state after a signal
690  * has been taken.  Reset signal mask and
691  * stack state from context left by rt_sendsig (above).
692  * Return to previous pc and psl as specified by
693  * context left by sendsig. Check carefully to
694  * make sure that the user has not modified the
695  * psl to gain improper privileges or to cause
696  * a machine fault.
697  */
698 int
699 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
700 {
701 	struct l_ucontext uc;
702 	struct l_sigcontext *context;
703 	sigset_t bmask;
704 	l_stack_t *lss;
705 	stack_t ss;
706 	struct trapframe *regs;
707 	int eflags;
708 	ksiginfo_t ksi;
709 
710 	regs = td->td_frame;
711 
712 #ifdef DEBUG
713 	if (ldebug(rt_sigreturn))
714 		printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
715 #endif
716 	/*
717 	 * The trampoline code hands us the ucontext.
718 	 * It is unsafe to keep track of it ourselves, in the event that a
719 	 * program jumps out of a signal handler.
720 	 */
721 	if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
722 		return (EFAULT);
723 
724 	context = &uc.uc_mcontext;
725 
726 	/* Check for security violations. */
727 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
728 	eflags = context->sc_eflags;
729 	if (!EFLAGS_SECURE(eflags, regs->tf_eflags))
730 		return (EINVAL);
731 
732 	/*
733 	 * Don't allow users to load a valid privileged %cs.  Let the
734 	 * hardware check for invalid selectors, excess privilege in
735 	 * other selectors, invalid %eip's and invalid %esp's.
736 	 */
737 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
738 	if (!CS_SECURE(context->sc_cs)) {
739 		ksiginfo_init_trap(&ksi);
740 		ksi.ksi_signo = SIGBUS;
741 		ksi.ksi_code = BUS_OBJERR;
742 		ksi.ksi_trapno = T_PROTFLT;
743 		ksi.ksi_addr = (void *)regs->tf_eip;
744 		trapsignal(td, &ksi);
745 		return (EINVAL);
746 	}
747 
748 	linux_to_bsd_sigset(&uc.uc_sigmask, &bmask);
749 	kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
750 
751 	/* Restore signal context. */
752 	/* %gs was restored by the trampoline. */
753 	regs->tf_fs     = context->sc_fs;
754 	regs->tf_es     = context->sc_es;
755 	regs->tf_ds     = context->sc_ds;
756 	regs->tf_edi    = context->sc_edi;
757 	regs->tf_esi    = context->sc_esi;
758 	regs->tf_ebp    = context->sc_ebp;
759 	regs->tf_ebx    = context->sc_ebx;
760 	regs->tf_edx    = context->sc_edx;
761 	regs->tf_ecx    = context->sc_ecx;
762 	regs->tf_eax    = context->sc_eax;
763 	regs->tf_eip    = context->sc_eip;
764 	regs->tf_cs     = context->sc_cs;
765 	regs->tf_eflags = eflags;
766 	regs->tf_esp    = context->sc_esp_at_signal;
767 	regs->tf_ss     = context->sc_ss;
768 
769 	/* Call sigaltstack & ignore results. */
770 	lss = &uc.uc_stack;
771 	ss.ss_sp = lss->ss_sp;
772 	ss.ss_size = lss->ss_size;
773 	ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
774 
775 #ifdef DEBUG
776 	if (ldebug(rt_sigreturn))
777 		printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
778 		    ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
779 #endif
780 	(void)kern_sigaltstack(td, &ss, NULL);
781 
782 	return (EJUSTRETURN);
783 }
784 
785 static int
786 linux_fetch_syscall_args(struct thread *td)
787 {
788 	struct proc *p;
789 	struct trapframe *frame;
790 	struct syscall_args *sa;
791 
792 	p = td->td_proc;
793 	frame = td->td_frame;
794 	sa = &td->td_sa;
795 
796 	sa->code = frame->tf_eax;
797 	sa->args[0] = frame->tf_ebx;
798 	sa->args[1] = frame->tf_ecx;
799 	sa->args[2] = frame->tf_edx;
800 	sa->args[3] = frame->tf_esi;
801 	sa->args[4] = frame->tf_edi;
802 	sa->args[5] = frame->tf_ebp;	/* Unconfirmed */
803 
804 	if (sa->code >= p->p_sysent->sv_size)
805 		/* nosys */
806 		sa->callp = &p->p_sysent->sv_table[p->p_sysent->sv_size - 1];
807 	else
808 		sa->callp = &p->p_sysent->sv_table[sa->code];
809 	sa->narg = sa->callp->sy_narg;
810 
811 	td->td_retval[0] = 0;
812 	td->td_retval[1] = frame->tf_edx;
813 
814 	return (0);
815 }
816 
817 /*
818  * If a Linux binary is exec'ing something, try this image activator
819  * first.  We override standard shell script execution in order to
820  * be able to modify the interpreter path.  We only do this if a Linux
821  * binary is doing the exec, so we do not create an EXEC module for it.
822  */
823 static int
824 linux_exec_imgact_try(struct image_params *imgp)
825 {
826 	const char *head = (const char *)imgp->image_header;
827 	char *rpath;
828 	int error = -1;
829 
830 	/*
831 	 * The interpreter for shell scripts run from a Linux binary needs
832 	 * to be located in /compat/linux if possible in order to recursively
833 	 * maintain Linux path emulation.
834 	 */
835 	if (((const short *)head)[0] == SHELLMAGIC) {
836 		/*
837 		 * Run our normal shell image activator.  If it succeeds then
838 		 * attempt to use the alternate path for the interpreter.  If
839 		 * an alternate path is found, use our stringspace to store it.
840 		 */
841 		if ((error = exec_shell_imgact(imgp)) == 0) {
842 			linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
843 			    imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0,
844 			    AT_FDCWD);
845 			if (rpath != NULL)
846 				imgp->args->fname_buf =
847 				    imgp->interpreter_name = rpath;
848 		}
849 	}
850 	return (error);
851 }
852 
853 /*
854  * exec_setregs may initialize some registers differently than Linux
855  * does, thus potentially confusing Linux binaries. If necessary, we
856  * override the exec_setregs default(s) here.
857  */
858 static void
859 linux_exec_setregs(struct thread *td, struct image_params *imgp, u_long stack)
860 {
861 	struct pcb *pcb = td->td_pcb;
862 
863 	exec_setregs(td, imgp, stack);
864 
865 	/* Linux sets %gs to 0, we default to _udatasel. */
866 	pcb->pcb_gs = 0;
867 	load_gs(0);
868 
869 	pcb->pcb_initial_npxcw = __LINUX_NPXCW__;
870 }
871 
872 static void
873 linux_get_machine(const char **dst)
874 {
875 
876 	switch (cpu_class) {
877 	case CPUCLASS_686:
878 		*dst = "i686";
879 		break;
880 	case CPUCLASS_586:
881 		*dst = "i586";
882 		break;
883 	case CPUCLASS_486:
884 		*dst = "i486";
885 		break;
886 	default:
887 		*dst = "i386";
888 	}
889 }
890 
891 struct sysentvec linux_sysvec = {
892 	.sv_size	= LINUX_SYS_MAXSYSCALL,
893 	.sv_table	= linux_sysent,
894 	.sv_mask	= 0,
895 	.sv_errsize	= ELAST + 1,
896 	.sv_errtbl	= linux_errtbl,
897 	.sv_transtrap	= linux_translate_traps,
898 	.sv_fixup	= linux_fixup,
899 	.sv_sendsig	= linux_sendsig,
900 	.sv_sigcode	= &_binary_linux_locore_o_start,
901 	.sv_szsigcode	= &linux_szsigcode,
902 	.sv_name	= "Linux a.out",
903 	.sv_coredump	= NULL,
904 	.sv_imgact_try	= linux_exec_imgact_try,
905 	.sv_minsigstksz	= LINUX_MINSIGSTKSZ,
906 	.sv_pagesize	= PAGE_SIZE,
907 	.sv_minuser	= VM_MIN_ADDRESS,
908 	.sv_maxuser	= VM_MAXUSER_ADDRESS,
909 	.sv_usrstack	= LINUX_USRSTACK,
910 	.sv_psstrings	= PS_STRINGS,
911 	.sv_stackprot	= VM_PROT_ALL,
912 	.sv_copyout_strings = exec_copyout_strings,
913 	.sv_setregs	= linux_exec_setregs,
914 	.sv_fixlimit	= NULL,
915 	.sv_maxssiz	= NULL,
916 	.sv_flags	= SV_ABI_LINUX | SV_AOUT | SV_IA32 | SV_ILP32,
917 	.sv_set_syscall_retval = cpu_set_syscall_retval,
918 	.sv_fetch_syscall_args = linux_fetch_syscall_args,
919 	.sv_syscallnames = NULL,
920 	.sv_shared_page_base = LINUX_SHAREDPAGE,
921 	.sv_shared_page_len = PAGE_SIZE,
922 	.sv_schedtail	= linux_schedtail,
923 	.sv_thread_detach = linux_thread_detach,
924 	.sv_trap	= NULL,
925 };
926 INIT_SYSENTVEC(aout_sysvec, &linux_sysvec);
927 
928 struct sysentvec elf_linux_sysvec = {
929 	.sv_size	= LINUX_SYS_MAXSYSCALL,
930 	.sv_table	= linux_sysent,
931 	.sv_mask	= 0,
932 	.sv_errsize	= ELAST + 1,
933 	.sv_errtbl	= linux_errtbl,
934 	.sv_transtrap	= linux_translate_traps,
935 	.sv_fixup	= linux_fixup_elf,
936 	.sv_sendsig	= linux_sendsig,
937 	.sv_sigcode	= &_binary_linux_locore_o_start,
938 	.sv_szsigcode	= &linux_szsigcode,
939 	.sv_name	= "Linux ELF",
940 	.sv_coredump	= elf32_coredump,
941 	.sv_imgact_try	= linux_exec_imgact_try,
942 	.sv_minsigstksz	= LINUX_MINSIGSTKSZ,
943 	.sv_pagesize	= PAGE_SIZE,
944 	.sv_minuser	= VM_MIN_ADDRESS,
945 	.sv_maxuser	= VM_MAXUSER_ADDRESS,
946 	.sv_usrstack	= LINUX_USRSTACK,
947 	.sv_psstrings	= LINUX_PS_STRINGS,
948 	.sv_stackprot	= VM_PROT_ALL,
949 	.sv_copyout_strings = linux_copyout_strings,
950 	.sv_setregs	= linux_exec_setregs,
951 	.sv_fixlimit	= NULL,
952 	.sv_maxssiz	= NULL,
953 	.sv_flags	= SV_ABI_LINUX | SV_IA32 | SV_ILP32 | SV_SHP,
954 	.sv_set_syscall_retval = cpu_set_syscall_retval,
955 	.sv_fetch_syscall_args = linux_fetch_syscall_args,
956 	.sv_syscallnames = NULL,
957 	.sv_shared_page_base = LINUX_SHAREDPAGE,
958 	.sv_shared_page_len = PAGE_SIZE,
959 	.sv_schedtail	= linux_schedtail,
960 	.sv_thread_detach = linux_thread_detach,
961 	.sv_trap	= NULL,
962 };
963 
964 static void
965 linux_vdso_install(void *param)
966 {
967 
968 	linux_szsigcode = (&_binary_linux_locore_o_end -
969 	    &_binary_linux_locore_o_start);
970 
971 	if (linux_szsigcode > elf_linux_sysvec.sv_shared_page_len)
972 		panic("Linux invalid vdso size\n");
973 
974 	__elfN(linux_vdso_fixup)(&elf_linux_sysvec);
975 
976 	linux_shared_page_obj = __elfN(linux_shared_page_init)
977 	    (&linux_shared_page_mapping);
978 
979 	__elfN(linux_vdso_reloc)(&elf_linux_sysvec);
980 
981 	bcopy(elf_linux_sysvec.sv_sigcode, linux_shared_page_mapping,
982 	    linux_szsigcode);
983 	elf_linux_sysvec.sv_shared_page_obj = linux_shared_page_obj;
984 }
985 SYSINIT(elf_linux_vdso_init, SI_SUB_EXEC, SI_ORDER_ANY,
986     linux_vdso_install, NULL);
987 
988 static void
989 linux_vdso_deinstall(void *param)
990 {
991 
992 	__elfN(linux_shared_page_fini)(linux_shared_page_obj);
993 }
994 SYSUNINIT(elf_linux_vdso_uninit, SI_SUB_EXEC, SI_ORDER_FIRST,
995     linux_vdso_deinstall, NULL);
996 
997 static char GNU_ABI_VENDOR[] = "GNU";
998 static int GNULINUX_ABI_DESC = 0;
999 
1000 static bool
1001 linux_trans_osrel(const Elf_Note *note, int32_t *osrel)
1002 {
1003 	const Elf32_Word *desc;
1004 	uintptr_t p;
1005 
1006 	p = (uintptr_t)(note + 1);
1007 	p += roundup2(note->n_namesz, sizeof(Elf32_Addr));
1008 
1009 	desc = (const Elf32_Word *)p;
1010 	if (desc[0] != GNULINUX_ABI_DESC)
1011 		return (false);
1012 
1013 	/*
1014 	 * For Linux we encode osrel as follows (see linux_mib.c):
1015 	 * VVVMMMIII (version, major, minor), see linux_mib.c.
1016 	 */
1017 	*osrel = desc[1] * 1000000 + desc[2] * 1000 + desc[3];
1018 
1019 	return (true);
1020 }
1021 
1022 static Elf_Brandnote linux_brandnote = {
1023 	.hdr.n_namesz	= sizeof(GNU_ABI_VENDOR),
1024 	.hdr.n_descsz	= 16,	/* XXX at least 16 */
1025 	.hdr.n_type	= 1,
1026 	.vendor		= GNU_ABI_VENDOR,
1027 	.flags		= BN_TRANSLATE_OSREL,
1028 	.trans_osrel	= linux_trans_osrel
1029 };
1030 
1031 static Elf32_Brandinfo linux_brand = {
1032 	.brand		= ELFOSABI_LINUX,
1033 	.machine	= EM_386,
1034 	.compat_3_brand	= "Linux",
1035 	.emul_path	= "/compat/linux",
1036 	.interp_path	= "/lib/ld-linux.so.1",
1037 	.sysvec		= &elf_linux_sysvec,
1038 	.interp_newpath	= NULL,
1039 	.brand_note	= &linux_brandnote,
1040 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1041 };
1042 
1043 static Elf32_Brandinfo linux_glibc2brand = {
1044 	.brand		= ELFOSABI_LINUX,
1045 	.machine	= EM_386,
1046 	.compat_3_brand	= "Linux",
1047 	.emul_path	= "/compat/linux",
1048 	.interp_path	= "/lib/ld-linux.so.2",
1049 	.sysvec		= &elf_linux_sysvec,
1050 	.interp_newpath	= NULL,
1051 	.brand_note	= &linux_brandnote,
1052 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1053 };
1054 
1055 static Elf32_Brandinfo linux_muslbrand = {
1056 	.brand		= ELFOSABI_LINUX,
1057 	.machine	= EM_386,
1058 	.compat_3_brand	= "Linux",
1059 	.emul_path	= "/compat/linux",
1060 	.interp_path	= "/lib/ld-musl-i386.so.1",
1061 	.sysvec		= &elf_linux_sysvec,
1062 	.interp_newpath	= NULL,
1063 	.brand_note	= &linux_brandnote,
1064 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1065 };
1066 
1067 Elf32_Brandinfo *linux_brandlist[] = {
1068 	&linux_brand,
1069 	&linux_glibc2brand,
1070 	&linux_muslbrand,
1071 	NULL
1072 };
1073 
1074 static int
1075 linux_elf_modevent(module_t mod, int type, void *data)
1076 {
1077 	Elf32_Brandinfo **brandinfo;
1078 	int error;
1079 	struct linux_ioctl_handler **lihp;
1080 
1081 	error = 0;
1082 
1083 	switch(type) {
1084 	case MOD_LOAD:
1085 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1086 		     ++brandinfo)
1087 			if (elf32_insert_brand_entry(*brandinfo) < 0)
1088 				error = EINVAL;
1089 		if (error == 0) {
1090 			SET_FOREACH(lihp, linux_ioctl_handler_set)
1091 				linux_ioctl_register_handler(*lihp);
1092 			LIST_INIT(&futex_list);
1093 			mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF);
1094 			linux_exit_tag = EVENTHANDLER_REGISTER(process_exit, linux_proc_exit,
1095 			      NULL, 1000);
1096 			linux_exec_tag = EVENTHANDLER_REGISTER(process_exec, linux_proc_exec,
1097 			      NULL, 1000);
1098 			linux_thread_dtor_tag = EVENTHANDLER_REGISTER(thread_dtor,
1099 			    linux_thread_dtor, NULL, EVENTHANDLER_PRI_ANY);
1100 			linux_get_machine(&linux_kplatform);
1101 			linux_szplatform = roundup(strlen(linux_kplatform) + 1,
1102 			    sizeof(char *));
1103 			linux_osd_jail_register();
1104 			stclohz = (stathz ? stathz : hz);
1105 			if (bootverbose)
1106 				printf("Linux ELF exec handler installed\n");
1107 		} else
1108 			printf("cannot insert Linux ELF brand handler\n");
1109 		break;
1110 	case MOD_UNLOAD:
1111 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1112 		     ++brandinfo)
1113 			if (elf32_brand_inuse(*brandinfo))
1114 				error = EBUSY;
1115 		if (error == 0) {
1116 			for (brandinfo = &linux_brandlist[0];
1117 			     *brandinfo != NULL; ++brandinfo)
1118 				if (elf32_remove_brand_entry(*brandinfo) < 0)
1119 					error = EINVAL;
1120 		}
1121 		if (error == 0) {
1122 			SET_FOREACH(lihp, linux_ioctl_handler_set)
1123 				linux_ioctl_unregister_handler(*lihp);
1124 			mtx_destroy(&futex_mtx);
1125 			EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag);
1126 			EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag);
1127 			EVENTHANDLER_DEREGISTER(thread_dtor, linux_thread_dtor_tag);
1128 			linux_osd_jail_deregister();
1129 			if (bootverbose)
1130 				printf("Linux ELF exec handler removed\n");
1131 		} else
1132 			printf("Could not deinstall ELF interpreter entry\n");
1133 		break;
1134 	default:
1135 		return (EOPNOTSUPP);
1136 	}
1137 	return (error);
1138 }
1139 
1140 static moduledata_t linux_elf_mod = {
1141 	"linuxelf",
1142 	linux_elf_modevent,
1143 	0
1144 };
1145 
1146 DECLARE_MODULE_TIED(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
1147 FEATURE(linux, "Linux 32bit support");
1148