xref: /freebsd/sys/i386/linux/linux_sysvec.c (revision 6e481f83f7ff59b62539d608ba87818588fd5797)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 1994-1996 Søren Schmidt
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/exec.h>
35 #include <sys/fcntl.h>
36 #include <sys/imgact.h>
37 #include <sys/imgact_aout.h>
38 #include <sys/imgact_elf.h>
39 #include <sys/kernel.h>
40 #include <sys/lock.h>
41 #include <sys/malloc.h>
42 #include <sys/module.h>
43 #include <sys/mutex.h>
44 #include <sys/proc.h>
45 #include <sys/signalvar.h>
46 #include <sys/syscallsubr.h>
47 #include <sys/sysctl.h>
48 #include <sys/sysent.h>
49 #include <sys/sysproto.h>
50 #include <sys/vnode.h>
51 #include <sys/eventhandler.h>
52 
53 #include <vm/vm.h>
54 #include <vm/pmap.h>
55 #include <vm/vm_extern.h>
56 #include <vm/vm_map.h>
57 #include <vm/vm_object.h>
58 #include <vm/vm_page.h>
59 #include <vm/vm_param.h>
60 
61 #include <machine/cpu.h>
62 #include <machine/cputypes.h>
63 #include <machine/md_var.h>
64 #include <machine/pcb.h>
65 
66 #include <i386/linux/linux.h>
67 #include <i386/linux/linux_proto.h>
68 #include <compat/linux/linux_emul.h>
69 #include <compat/linux/linux_futex.h>
70 #include <compat/linux/linux_ioctl.h>
71 #include <compat/linux/linux_mib.h>
72 #include <compat/linux/linux_misc.h>
73 #include <compat/linux/linux_signal.h>
74 #include <compat/linux/linux_util.h>
75 #include <compat/linux/linux_vdso.h>
76 
77 MODULE_VERSION(linux, 1);
78 
79 #if BYTE_ORDER == LITTLE_ENDIAN
80 #define SHELLMAGIC      0x2123 /* #! */
81 #else
82 #define SHELLMAGIC      0x2321
83 #endif
84 
85 #if defined(DEBUG)
86 SYSCTL_PROC(_compat_linux, OID_AUTO, debug, CTLTYPE_STRING | CTLFLAG_RW, 0, 0,
87     linux_sysctl_debug, "A", "Linux debugging control");
88 #endif
89 
90 /*
91  * Allow the sendsig functions to use the ldebug() facility even though they
92  * are not syscalls themselves.  Map them to syscall 0.  This is slightly less
93  * bogus than using ldebug(sigreturn).
94  */
95 #define	LINUX_SYS_linux_rt_sendsig	0
96 #define	LINUX_SYS_linux_sendsig		0
97 
98 #define	LINUX_PS_STRINGS	(LINUX_USRSTACK - sizeof(struct ps_strings))
99 
100 static int linux_szsigcode;
101 static vm_object_t linux_shared_page_obj;
102 static char *linux_shared_page_mapping;
103 extern char _binary_linux_locore_o_start;
104 extern char _binary_linux_locore_o_end;
105 
106 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
107 
108 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
109 
110 static int	linux_fixup(register_t **stack_base,
111 		    struct image_params *iparams);
112 static int	elf_linux_fixup(register_t **stack_base,
113 		    struct image_params *iparams);
114 static void     linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
115 static void	exec_linux_setregs(struct thread *td,
116 		    struct image_params *imgp, u_long stack);
117 static register_t *linux_copyout_strings(struct image_params *imgp);
118 static bool	linux_trans_osrel(const Elf_Note *note, int32_t *osrel);
119 static void	linux_vdso_install(void *param);
120 static void	linux_vdso_deinstall(void *param);
121 
122 static int linux_szplatform;
123 const char *linux_kplatform;
124 
125 static eventhandler_tag linux_exit_tag;
126 static eventhandler_tag linux_exec_tag;
127 static eventhandler_tag linux_thread_dtor_tag;
128 
129 #define LINUX_T_UNKNOWN  255
130 static int _bsd_to_linux_trapcode[] = {
131 	LINUX_T_UNKNOWN,	/* 0 */
132 	6,			/* 1  T_PRIVINFLT */
133 	LINUX_T_UNKNOWN,	/* 2 */
134 	3,			/* 3  T_BPTFLT */
135 	LINUX_T_UNKNOWN,	/* 4 */
136 	LINUX_T_UNKNOWN,	/* 5 */
137 	16,			/* 6  T_ARITHTRAP */
138 	254,			/* 7  T_ASTFLT */
139 	LINUX_T_UNKNOWN,	/* 8 */
140 	13,			/* 9  T_PROTFLT */
141 	1,			/* 10 T_TRCTRAP */
142 	LINUX_T_UNKNOWN,	/* 11 */
143 	14,			/* 12 T_PAGEFLT */
144 	LINUX_T_UNKNOWN,	/* 13 */
145 	17,			/* 14 T_ALIGNFLT */
146 	LINUX_T_UNKNOWN,	/* 15 */
147 	LINUX_T_UNKNOWN,	/* 16 */
148 	LINUX_T_UNKNOWN,	/* 17 */
149 	0,			/* 18 T_DIVIDE */
150 	2,			/* 19 T_NMI */
151 	4,			/* 20 T_OFLOW */
152 	5,			/* 21 T_BOUND */
153 	7,			/* 22 T_DNA */
154 	8,			/* 23 T_DOUBLEFLT */
155 	9,			/* 24 T_FPOPFLT */
156 	10,			/* 25 T_TSSFLT */
157 	11,			/* 26 T_SEGNPFLT */
158 	12,			/* 27 T_STKFLT */
159 	18,			/* 28 T_MCHK */
160 	19,			/* 29 T_XMMFLT */
161 	15			/* 30 T_RESERVED */
162 };
163 #define bsd_to_linux_trapcode(code) \
164     ((code)<nitems(_bsd_to_linux_trapcode)? \
165      _bsd_to_linux_trapcode[(code)]: \
166      LINUX_T_UNKNOWN)
167 
168 LINUX_VDSO_SYM_INTPTR(linux_sigcode);
169 LINUX_VDSO_SYM_INTPTR(linux_rt_sigcode);
170 LINUX_VDSO_SYM_INTPTR(linux_vsyscall);
171 
172 /*
173  * If FreeBSD & Linux have a difference of opinion about what a trap
174  * means, deal with it here.
175  *
176  * MPSAFE
177  */
178 static int
179 translate_traps(int signal, int trap_code)
180 {
181 	if (signal != SIGBUS)
182 		return (signal);
183 	switch (trap_code) {
184 	case T_PROTFLT:
185 	case T_TSSFLT:
186 	case T_DOUBLEFLT:
187 	case T_PAGEFLT:
188 		return (SIGSEGV);
189 	default:
190 		return (signal);
191 	}
192 }
193 
194 static int
195 linux_fixup(register_t **stack_base, struct image_params *imgp)
196 {
197 	register_t *argv, *envp;
198 
199 	argv = *stack_base;
200 	envp = *stack_base + (imgp->args->argc + 1);
201 	(*stack_base)--;
202 	suword(*stack_base, (intptr_t)(void *)envp);
203 	(*stack_base)--;
204 	suword(*stack_base, (intptr_t)(void *)argv);
205 	(*stack_base)--;
206 	suword(*stack_base, imgp->args->argc);
207 	return (0);
208 }
209 
210 static int
211 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
212 {
213 	struct proc *p;
214 	Elf32_Auxargs *args;
215 	Elf32_Addr *uplatform;
216 	struct ps_strings *arginfo;
217 	register_t *pos;
218 	int issetugid;
219 
220 	KASSERT(curthread->td_proc == imgp->proc,
221 	    ("unsafe elf_linux_fixup(), should be curproc"));
222 
223 	p = imgp->proc;
224 	issetugid = imgp->proc->p_flag & P_SUGID ? 1 : 0;
225 	arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
226 	uplatform = (Elf32_Addr *)((caddr_t)arginfo - linux_szplatform);
227 	args = (Elf32_Auxargs *)imgp->auxargs;
228 	pos = *stack_base + (imgp->args->argc + imgp->args->envc + 2);
229 
230 	AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO_EHDR,
231 	    imgp->proc->p_sysent->sv_shared_page_base);
232 	AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO, linux_vsyscall);
233 	AUXARGS_ENTRY(pos, LINUX_AT_HWCAP, cpu_feature);
234 
235 	/*
236 	 * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0,
237 	 * as it has appeared in the 2.4.0-rc7 first time.
238 	 * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK),
239 	 * glibc falls back to the hard-coded CLK_TCK value when aux entry
240 	 * is not present.
241 	 * Also see linux_times() implementation.
242 	 */
243 	if (linux_kernver(curthread) >= LINUX_KERNVER_2004000)
244 		AUXARGS_ENTRY(pos, LINUX_AT_CLKTCK, stclohz);
245 	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
246 	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
247 	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
248 	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
249 	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
250 	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
251 	AUXARGS_ENTRY(pos, AT_BASE, args->base);
252 	AUXARGS_ENTRY(pos, LINUX_AT_SECURE, issetugid);
253 	AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
254 	AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
255 	AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
256 	AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
257 	AUXARGS_ENTRY(pos, LINUX_AT_PLATFORM, PTROUT(uplatform));
258 	AUXARGS_ENTRY(pos, LINUX_AT_RANDOM, imgp->canary);
259 	if (imgp->execpathp != 0)
260 		AUXARGS_ENTRY(pos, LINUX_AT_EXECFN, imgp->execpathp);
261 	if (args->execfd != -1)
262 		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
263 	AUXARGS_ENTRY(pos, AT_NULL, 0);
264 
265 	free(imgp->auxargs, M_TEMP);
266 	imgp->auxargs = NULL;
267 
268 	(*stack_base)--;
269 	suword(*stack_base, (register_t)imgp->args->argc);
270 	return (0);
271 }
272 
273 /*
274  * Copied from kern/kern_exec.c
275  */
276 static register_t *
277 linux_copyout_strings(struct image_params *imgp)
278 {
279 	int argc, envc;
280 	char **vectp;
281 	char *stringp, *destp;
282 	register_t *stack_base;
283 	struct ps_strings *arginfo;
284 	char canary[LINUX_AT_RANDOM_LEN];
285 	size_t execpath_len;
286 	struct proc *p;
287 
288 	/* Calculate string base and vector table pointers. */
289 	p = imgp->proc;
290 	if (imgp->execpath != NULL && imgp->auxargs != NULL)
291 		execpath_len = strlen(imgp->execpath) + 1;
292 	else
293 		execpath_len = 0;
294 	arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings;
295 	destp = (caddr_t)arginfo - SPARE_USRSPACE - linux_szplatform -
296 	    roundup(sizeof(canary), sizeof(char *)) -
297 	    roundup(execpath_len, sizeof(char *)) -
298 	    roundup(ARG_MAX - imgp->args->stringspace, sizeof(char *));
299 
300 	/* Install LINUX_PLATFORM. */
301 	copyout(linux_kplatform, ((caddr_t)arginfo - linux_szplatform),
302 	    linux_szplatform);
303 
304 	if (execpath_len != 0) {
305 		imgp->execpathp = (uintptr_t)arginfo -
306 		linux_szplatform - execpath_len;
307 		copyout(imgp->execpath, (void *)imgp->execpathp, execpath_len);
308 	}
309 
310 	/* Prepare the canary for SSP. */
311 	arc4rand(canary, sizeof(canary), 0);
312 	imgp->canary = (uintptr_t)arginfo - linux_szplatform -
313 	    roundup(execpath_len, sizeof(char *)) -
314 	    roundup(sizeof(canary), sizeof(char *));
315 	copyout(canary, (void *)imgp->canary, sizeof(canary));
316 
317 	/* If we have a valid auxargs ptr, prepare some room on the stack. */
318 	if (imgp->auxargs) {
319 		/*
320 		 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
321 		 * lower compatibility.
322 		 */
323 		imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size :
324 		    (LINUX_AT_COUNT * 2);
325 		/*
326 		 * The '+ 2' is for the null pointers at the end of each of
327 		 * the arg and env vector sets,and imgp->auxarg_size is room
328 		 * for argument of Runtime loader.
329 		 */
330 		vectp = (char **)(destp - (imgp->args->argc +
331 		    imgp->args->envc + 2 + imgp->auxarg_size) * sizeof(char *));
332 	} else {
333 		/*
334 		 * The '+ 2' is for the null pointers at the end of each of
335 		 * the arg and env vector sets
336 		 */
337 		vectp = (char **)(destp - (imgp->args->argc + imgp->args->envc + 2) *
338 		    sizeof(char *));
339 	}
340 
341 	/* vectp also becomes our initial stack base. */
342 	stack_base = (register_t *)vectp;
343 
344 	stringp = imgp->args->begin_argv;
345 	argc = imgp->args->argc;
346 	envc = imgp->args->envc;
347 
348 	/* Copy out strings - arguments and environment. */
349 	copyout(stringp, destp, ARG_MAX - imgp->args->stringspace);
350 
351 	/* Fill in "ps_strings" struct for ps, w, etc. */
352 	suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp);
353 	suword(&arginfo->ps_nargvstr, argc);
354 
355 	/* Fill in argument portion of vector table. */
356 	for (; argc > 0; --argc) {
357 		suword(vectp++, (long)(intptr_t)destp);
358 		while (*stringp++ != 0)
359 			destp++;
360 		destp++;
361 	}
362 
363 	/* A null vector table pointer separates the argp's from the envp's. */
364 	suword(vectp++, 0);
365 
366 	suword(&arginfo->ps_envstr, (long)(intptr_t)vectp);
367 	suword(&arginfo->ps_nenvstr, envc);
368 
369 	/* Fill in environment portion of vector table. */
370 	for (; envc > 0; --envc) {
371 		suword(vectp++, (long)(intptr_t)destp);
372 		while (*stringp++ != 0)
373 			destp++;
374 		destp++;
375 	}
376 
377 	/* The end of the vector table is a null pointer. */
378 	suword(vectp, 0);
379 
380 	return (stack_base);
381 }
382 
383 static void
384 linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
385 {
386 	struct thread *td = curthread;
387 	struct proc *p = td->td_proc;
388 	struct sigacts *psp;
389 	struct trapframe *regs;
390 	struct l_rt_sigframe *fp, frame;
391 	int sig, code;
392 	int oonstack;
393 
394 	sig = ksi->ksi_signo;
395 	code = ksi->ksi_code;
396 	PROC_LOCK_ASSERT(p, MA_OWNED);
397 	psp = p->p_sigacts;
398 	mtx_assert(&psp->ps_mtx, MA_OWNED);
399 	regs = td->td_frame;
400 	oonstack = sigonstack(regs->tf_esp);
401 
402 #ifdef DEBUG
403 	if (ldebug(rt_sendsig))
404 		printf(ARGS(rt_sendsig, "%p, %d, %p, %u"),
405 		    catcher, sig, (void*)mask, code);
406 #endif
407 	/* Allocate space for the signal handler context. */
408 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
409 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
410 		fp = (struct l_rt_sigframe *)((uintptr_t)td->td_sigstk.ss_sp +
411 		    td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
412 	} else
413 		fp = (struct l_rt_sigframe *)regs->tf_esp - 1;
414 	mtx_unlock(&psp->ps_mtx);
415 
416 	/* Build the argument list for the signal handler. */
417 	sig = bsd_to_linux_signal(sig);
418 
419 	bzero(&frame, sizeof(frame));
420 
421 	frame.sf_handler = catcher;
422 	frame.sf_sig = sig;
423 	frame.sf_siginfo = &fp->sf_si;
424 	frame.sf_ucontext = &fp->sf_sc;
425 
426 	/* Fill in POSIX parts. */
427 	ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig);
428 
429 	/* Build the signal context to be used by sigreturn. */
430 	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
431 	frame.sf_sc.uc_link = NULL;		/* XXX ??? */
432 
433 	frame.sf_sc.uc_stack.ss_sp = td->td_sigstk.ss_sp;
434 	frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
435 	frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
436 	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
437 	PROC_UNLOCK(p);
438 
439 	bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
440 
441 	frame.sf_sc.uc_mcontext.sc_mask   = frame.sf_sc.uc_sigmask.__mask;
442 	frame.sf_sc.uc_mcontext.sc_gs     = rgs();
443 	frame.sf_sc.uc_mcontext.sc_fs     = regs->tf_fs;
444 	frame.sf_sc.uc_mcontext.sc_es     = regs->tf_es;
445 	frame.sf_sc.uc_mcontext.sc_ds     = regs->tf_ds;
446 	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_edi;
447 	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_esi;
448 	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_ebp;
449 	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_ebx;
450 	frame.sf_sc.uc_mcontext.sc_esp    = regs->tf_esp;
451 	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_edx;
452 	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_ecx;
453 	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_eax;
454 	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_eip;
455 	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
456 	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
457 	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
458 	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
459 	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
460 	frame.sf_sc.uc_mcontext.sc_cr2    = (register_t)ksi->ksi_addr;
461 	frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
462 
463 #ifdef DEBUG
464 	if (ldebug(rt_sendsig))
465 		printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
466 		    frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
467 		    td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
468 #endif
469 
470 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
471 		/*
472 		 * Process has trashed its stack; give it an illegal
473 		 * instruction to halt it in its tracks.
474 		 */
475 #ifdef DEBUG
476 		if (ldebug(rt_sendsig))
477 			printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
478 			    fp, oonstack);
479 #endif
480 		PROC_LOCK(p);
481 		sigexit(td, SIGILL);
482 	}
483 
484 	/* Build context to run handler in. */
485 	regs->tf_esp = (int)fp;
486 	regs->tf_eip = linux_rt_sigcode;
487 	regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
488 	regs->tf_cs = _ucodesel;
489 	regs->tf_ds = _udatasel;
490 	regs->tf_es = _udatasel;
491 	regs->tf_fs = _udatasel;
492 	regs->tf_ss = _udatasel;
493 	PROC_LOCK(p);
494 	mtx_lock(&psp->ps_mtx);
495 }
496 
497 
498 /*
499  * Send an interrupt to process.
500  *
501  * Stack is set up to allow sigcode stored
502  * in u. to call routine, followed by kcall
503  * to sigreturn routine below.  After sigreturn
504  * resets the signal mask, the stack, and the
505  * frame pointer, it returns to the user
506  * specified pc, psl.
507  */
508 static void
509 linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
510 {
511 	struct thread *td = curthread;
512 	struct proc *p = td->td_proc;
513 	struct sigacts *psp;
514 	struct trapframe *regs;
515 	struct l_sigframe *fp, frame;
516 	l_sigset_t lmask;
517 	int sig, code;
518 	int oonstack;
519 
520 	PROC_LOCK_ASSERT(p, MA_OWNED);
521 	psp = p->p_sigacts;
522 	sig = ksi->ksi_signo;
523 	code = ksi->ksi_code;
524 	mtx_assert(&psp->ps_mtx, MA_OWNED);
525 	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
526 		/* Signal handler installed with SA_SIGINFO. */
527 		linux_rt_sendsig(catcher, ksi, mask);
528 		return;
529 	}
530 	regs = td->td_frame;
531 	oonstack = sigonstack(regs->tf_esp);
532 
533 #ifdef DEBUG
534 	if (ldebug(sendsig))
535 		printf(ARGS(sendsig, "%p, %d, %p, %u"),
536 		    catcher, sig, (void*)mask, code);
537 #endif
538 
539 	/* Allocate space for the signal handler context. */
540 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
541 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
542 		fp = (struct l_sigframe *)((uintptr_t)td->td_sigstk.ss_sp +
543 		    td->td_sigstk.ss_size - sizeof(struct l_sigframe));
544 	} else
545 		fp = (struct l_sigframe *)regs->tf_esp - 1;
546 	mtx_unlock(&psp->ps_mtx);
547 	PROC_UNLOCK(p);
548 
549 	/* Build the argument list for the signal handler. */
550 	sig = bsd_to_linux_signal(sig);
551 
552 	bzero(&frame, sizeof(frame));
553 
554 	frame.sf_handler = catcher;
555 	frame.sf_sig = sig;
556 
557 	bsd_to_linux_sigset(mask, &lmask);
558 
559 	/* Build the signal context to be used by sigreturn. */
560 	frame.sf_sc.sc_mask   = lmask.__mask;
561 	frame.sf_sc.sc_gs     = rgs();
562 	frame.sf_sc.sc_fs     = regs->tf_fs;
563 	frame.sf_sc.sc_es     = regs->tf_es;
564 	frame.sf_sc.sc_ds     = regs->tf_ds;
565 	frame.sf_sc.sc_edi    = regs->tf_edi;
566 	frame.sf_sc.sc_esi    = regs->tf_esi;
567 	frame.sf_sc.sc_ebp    = regs->tf_ebp;
568 	frame.sf_sc.sc_ebx    = regs->tf_ebx;
569 	frame.sf_sc.sc_esp    = regs->tf_esp;
570 	frame.sf_sc.sc_edx    = regs->tf_edx;
571 	frame.sf_sc.sc_ecx    = regs->tf_ecx;
572 	frame.sf_sc.sc_eax    = regs->tf_eax;
573 	frame.sf_sc.sc_eip    = regs->tf_eip;
574 	frame.sf_sc.sc_cs     = regs->tf_cs;
575 	frame.sf_sc.sc_eflags = regs->tf_eflags;
576 	frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
577 	frame.sf_sc.sc_ss     = regs->tf_ss;
578 	frame.sf_sc.sc_err    = regs->tf_err;
579 	frame.sf_sc.sc_cr2    = (register_t)ksi->ksi_addr;
580 	frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(ksi->ksi_trapno);
581 
582 	frame.sf_extramask[0] = lmask.__mask;
583 
584 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
585 		/*
586 		 * Process has trashed its stack; give it an illegal
587 		 * instruction to halt it in its tracks.
588 		 */
589 		PROC_LOCK(p);
590 		sigexit(td, SIGILL);
591 	}
592 
593 	/* Build context to run handler in. */
594 	regs->tf_esp = (int)fp;
595 	regs->tf_eip = linux_sigcode;
596 	regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
597 	regs->tf_cs = _ucodesel;
598 	regs->tf_ds = _udatasel;
599 	regs->tf_es = _udatasel;
600 	regs->tf_fs = _udatasel;
601 	regs->tf_ss = _udatasel;
602 	PROC_LOCK(p);
603 	mtx_lock(&psp->ps_mtx);
604 }
605 
606 /*
607  * System call to cleanup state after a signal
608  * has been taken.  Reset signal mask and
609  * stack state from context left by sendsig (above).
610  * Return to previous pc and psl as specified by
611  * context left by sendsig. Check carefully to
612  * make sure that the user has not modified the
613  * psl to gain improper privileges or to cause
614  * a machine fault.
615  */
616 int
617 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
618 {
619 	struct l_sigframe frame;
620 	struct trapframe *regs;
621 	l_sigset_t lmask;
622 	sigset_t bmask;
623 	int eflags;
624 	ksiginfo_t ksi;
625 
626 	regs = td->td_frame;
627 
628 #ifdef DEBUG
629 	if (ldebug(sigreturn))
630 		printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
631 #endif
632 	/*
633 	 * The trampoline code hands us the sigframe.
634 	 * It is unsafe to keep track of it ourselves, in the event that a
635 	 * program jumps out of a signal handler.
636 	 */
637 	if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
638 		return (EFAULT);
639 
640 	/* Check for security violations. */
641 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
642 	eflags = frame.sf_sc.sc_eflags;
643 	if (!EFLAGS_SECURE(eflags, regs->tf_eflags))
644 		return (EINVAL);
645 
646 	/*
647 	 * Don't allow users to load a valid privileged %cs.  Let the
648 	 * hardware check for invalid selectors, excess privilege in
649 	 * other selectors, invalid %eip's and invalid %esp's.
650 	 */
651 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
652 	if (!CS_SECURE(frame.sf_sc.sc_cs)) {
653 		ksiginfo_init_trap(&ksi);
654 		ksi.ksi_signo = SIGBUS;
655 		ksi.ksi_code = BUS_OBJERR;
656 		ksi.ksi_trapno = T_PROTFLT;
657 		ksi.ksi_addr = (void *)regs->tf_eip;
658 		trapsignal(td, &ksi);
659 		return (EINVAL);
660 	}
661 
662 	lmask.__mask = frame.sf_sc.sc_mask;
663 	linux_to_bsd_sigset(&lmask, &bmask);
664 	kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
665 
666 	/* Restore signal context. */
667 	/* %gs was restored by the trampoline. */
668 	regs->tf_fs     = frame.sf_sc.sc_fs;
669 	regs->tf_es     = frame.sf_sc.sc_es;
670 	regs->tf_ds     = frame.sf_sc.sc_ds;
671 	regs->tf_edi    = frame.sf_sc.sc_edi;
672 	regs->tf_esi    = frame.sf_sc.sc_esi;
673 	regs->tf_ebp    = frame.sf_sc.sc_ebp;
674 	regs->tf_ebx    = frame.sf_sc.sc_ebx;
675 	regs->tf_edx    = frame.sf_sc.sc_edx;
676 	regs->tf_ecx    = frame.sf_sc.sc_ecx;
677 	regs->tf_eax    = frame.sf_sc.sc_eax;
678 	regs->tf_eip    = frame.sf_sc.sc_eip;
679 	regs->tf_cs     = frame.sf_sc.sc_cs;
680 	regs->tf_eflags = eflags;
681 	regs->tf_esp    = frame.sf_sc.sc_esp_at_signal;
682 	regs->tf_ss     = frame.sf_sc.sc_ss;
683 
684 	return (EJUSTRETURN);
685 }
686 
687 /*
688  * System call to cleanup state after a signal
689  * has been taken.  Reset signal mask and
690  * stack state from context left by rt_sendsig (above).
691  * Return to previous pc and psl as specified by
692  * context left by sendsig. Check carefully to
693  * make sure that the user has not modified the
694  * psl to gain improper privileges or to cause
695  * a machine fault.
696  */
697 int
698 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
699 {
700 	struct l_ucontext uc;
701 	struct l_sigcontext *context;
702 	sigset_t bmask;
703 	l_stack_t *lss;
704 	stack_t ss;
705 	struct trapframe *regs;
706 	int eflags;
707 	ksiginfo_t ksi;
708 
709 	regs = td->td_frame;
710 
711 #ifdef DEBUG
712 	if (ldebug(rt_sigreturn))
713 		printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
714 #endif
715 	/*
716 	 * The trampoline code hands us the ucontext.
717 	 * It is unsafe to keep track of it ourselves, in the event that a
718 	 * program jumps out of a signal handler.
719 	 */
720 	if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
721 		return (EFAULT);
722 
723 	context = &uc.uc_mcontext;
724 
725 	/* Check for security violations. */
726 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
727 	eflags = context->sc_eflags;
728 	if (!EFLAGS_SECURE(eflags, regs->tf_eflags))
729 		return (EINVAL);
730 
731 	/*
732 	 * Don't allow users to load a valid privileged %cs.  Let the
733 	 * hardware check for invalid selectors, excess privilege in
734 	 * other selectors, invalid %eip's and invalid %esp's.
735 	 */
736 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
737 	if (!CS_SECURE(context->sc_cs)) {
738 		ksiginfo_init_trap(&ksi);
739 		ksi.ksi_signo = SIGBUS;
740 		ksi.ksi_code = BUS_OBJERR;
741 		ksi.ksi_trapno = T_PROTFLT;
742 		ksi.ksi_addr = (void *)regs->tf_eip;
743 		trapsignal(td, &ksi);
744 		return (EINVAL);
745 	}
746 
747 	linux_to_bsd_sigset(&uc.uc_sigmask, &bmask);
748 	kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
749 
750 	/* Restore signal context. */
751 	/* %gs was restored by the trampoline. */
752 	regs->tf_fs     = context->sc_fs;
753 	regs->tf_es     = context->sc_es;
754 	regs->tf_ds     = context->sc_ds;
755 	regs->tf_edi    = context->sc_edi;
756 	regs->tf_esi    = context->sc_esi;
757 	regs->tf_ebp    = context->sc_ebp;
758 	regs->tf_ebx    = context->sc_ebx;
759 	regs->tf_edx    = context->sc_edx;
760 	regs->tf_ecx    = context->sc_ecx;
761 	regs->tf_eax    = context->sc_eax;
762 	regs->tf_eip    = context->sc_eip;
763 	regs->tf_cs     = context->sc_cs;
764 	regs->tf_eflags = eflags;
765 	regs->tf_esp    = context->sc_esp_at_signal;
766 	regs->tf_ss     = context->sc_ss;
767 
768 	/* Call sigaltstack & ignore results. */
769 	lss = &uc.uc_stack;
770 	ss.ss_sp = lss->ss_sp;
771 	ss.ss_size = lss->ss_size;
772 	ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
773 
774 #ifdef DEBUG
775 	if (ldebug(rt_sigreturn))
776 		printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
777 		    ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
778 #endif
779 	(void)kern_sigaltstack(td, &ss, NULL);
780 
781 	return (EJUSTRETURN);
782 }
783 
784 static int
785 linux_fetch_syscall_args(struct thread *td)
786 {
787 	struct proc *p;
788 	struct trapframe *frame;
789 	struct syscall_args *sa;
790 
791 	p = td->td_proc;
792 	frame = td->td_frame;
793 	sa = &td->td_sa;
794 
795 	sa->code = frame->tf_eax;
796 	sa->args[0] = frame->tf_ebx;
797 	sa->args[1] = frame->tf_ecx;
798 	sa->args[2] = frame->tf_edx;
799 	sa->args[3] = frame->tf_esi;
800 	sa->args[4] = frame->tf_edi;
801 	sa->args[5] = frame->tf_ebp;	/* Unconfirmed */
802 
803 	if (sa->code >= p->p_sysent->sv_size)
804 		/* nosys */
805 		sa->callp = &p->p_sysent->sv_table[p->p_sysent->sv_size - 1];
806 	else
807 		sa->callp = &p->p_sysent->sv_table[sa->code];
808 	sa->narg = sa->callp->sy_narg;
809 
810 	td->td_retval[0] = 0;
811 	td->td_retval[1] = frame->tf_edx;
812 
813 	return (0);
814 }
815 
816 /*
817  * If a Linux binary is exec'ing something, try this image activator
818  * first.  We override standard shell script execution in order to
819  * be able to modify the interpreter path.  We only do this if a Linux
820  * binary is doing the exec, so we do not create an EXEC module for it.
821  */
822 static int	exec_linux_imgact_try(struct image_params *iparams);
823 
824 static int
825 exec_linux_imgact_try(struct image_params *imgp)
826 {
827     const char *head = (const char *)imgp->image_header;
828     char *rpath;
829     int error = -1;
830 
831     /*
832      * The interpreter for shell scripts run from a Linux binary needs
833      * to be located in /compat/linux if possible in order to recursively
834      * maintain Linux path emulation.
835      */
836     if (((const short *)head)[0] == SHELLMAGIC) {
837 	    /*
838 	     * Run our normal shell image activator.  If it succeeds attempt
839 	     * to use the alternate path for the interpreter.  If an alternate
840 	     * path is found, use our stringspace to store it.
841 	     */
842 	    if ((error = exec_shell_imgact(imgp)) == 0) {
843 		    linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
844 			imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0, AT_FDCWD);
845 		    if (rpath != NULL)
846 			    imgp->args->fname_buf =
847 				imgp->interpreter_name = rpath;
848 	    }
849     }
850     return (error);
851 }
852 
853 /*
854  * exec_setregs may initialize some registers differently than Linux
855  * does, thus potentially confusing Linux binaries. If necessary, we
856  * override the exec_setregs default(s) here.
857  */
858 static void
859 exec_linux_setregs(struct thread *td, struct image_params *imgp, u_long stack)
860 {
861 	struct pcb *pcb = td->td_pcb;
862 
863 	exec_setregs(td, imgp, stack);
864 
865 	/* Linux sets %gs to 0, we default to _udatasel. */
866 	pcb->pcb_gs = 0;
867 	load_gs(0);
868 
869 	pcb->pcb_initial_npxcw = __LINUX_NPXCW__;
870 }
871 
872 static void
873 linux_get_machine(const char **dst)
874 {
875 
876 	switch (cpu_class) {
877 	case CPUCLASS_686:
878 		*dst = "i686";
879 		break;
880 	case CPUCLASS_586:
881 		*dst = "i586";
882 		break;
883 	case CPUCLASS_486:
884 		*dst = "i486";
885 		break;
886 	default:
887 		*dst = "i386";
888 	}
889 }
890 
891 struct sysentvec linux_sysvec = {
892 	.sv_size	= LINUX_SYS_MAXSYSCALL,
893 	.sv_table	= linux_sysent,
894 	.sv_mask	= 0,
895 	.sv_errsize	= ELAST + 1,
896 	.sv_errtbl	= bsd_to_linux_errno_generic,
897 	.sv_transtrap	= translate_traps,
898 	.sv_fixup	= linux_fixup,
899 	.sv_sendsig	= linux_sendsig,
900 	.sv_sigcode	= &_binary_linux_locore_o_start,
901 	.sv_szsigcode	= &linux_szsigcode,
902 	.sv_name	= "Linux a.out",
903 	.sv_coredump	= NULL,
904 	.sv_imgact_try	= exec_linux_imgact_try,
905 	.sv_minsigstksz	= LINUX_MINSIGSTKSZ,
906 	.sv_pagesize	= PAGE_SIZE,
907 	.sv_minuser	= VM_MIN_ADDRESS,
908 	.sv_maxuser	= VM_MAXUSER_ADDRESS,
909 	.sv_usrstack	= LINUX_USRSTACK,
910 	.sv_psstrings	= PS_STRINGS,
911 	.sv_stackprot	= VM_PROT_ALL,
912 	.sv_copyout_strings = exec_copyout_strings,
913 	.sv_setregs	= exec_linux_setregs,
914 	.sv_fixlimit	= NULL,
915 	.sv_maxssiz	= NULL,
916 	.sv_flags	= SV_ABI_LINUX | SV_AOUT | SV_IA32 | SV_ILP32,
917 	.sv_set_syscall_retval = cpu_set_syscall_retval,
918 	.sv_fetch_syscall_args = linux_fetch_syscall_args,
919 	.sv_syscallnames = NULL,
920 	.sv_shared_page_base = LINUX_SHAREDPAGE,
921 	.sv_shared_page_len = PAGE_SIZE,
922 	.sv_schedtail	= linux_schedtail,
923 	.sv_thread_detach = linux_thread_detach,
924 	.sv_trap	= NULL,
925 };
926 INIT_SYSENTVEC(aout_sysvec, &linux_sysvec);
927 
928 struct sysentvec elf_linux_sysvec = {
929 	.sv_size	= LINUX_SYS_MAXSYSCALL,
930 	.sv_table	= linux_sysent,
931 	.sv_mask	= 0,
932 	.sv_errsize	= ELAST + 1,
933 	.sv_errtbl	= bsd_to_linux_errno_generic,
934 	.sv_transtrap	= translate_traps,
935 	.sv_fixup	= elf_linux_fixup,
936 	.sv_sendsig	= linux_sendsig,
937 	.sv_sigcode	= &_binary_linux_locore_o_start,
938 	.sv_szsigcode	= &linux_szsigcode,
939 	.sv_name	= "Linux ELF",
940 	.sv_coredump	= elf32_coredump,
941 	.sv_imgact_try	= exec_linux_imgact_try,
942 	.sv_minsigstksz	= LINUX_MINSIGSTKSZ,
943 	.sv_pagesize	= PAGE_SIZE,
944 	.sv_minuser	= VM_MIN_ADDRESS,
945 	.sv_maxuser	= VM_MAXUSER_ADDRESS,
946 	.sv_usrstack	= LINUX_USRSTACK,
947 	.sv_psstrings	= LINUX_PS_STRINGS,
948 	.sv_stackprot	= VM_PROT_ALL,
949 	.sv_copyout_strings = linux_copyout_strings,
950 	.sv_setregs	= exec_linux_setregs,
951 	.sv_fixlimit	= NULL,
952 	.sv_maxssiz	= NULL,
953 	.sv_flags	= SV_ABI_LINUX | SV_IA32 | SV_ILP32 | SV_SHP,
954 	.sv_set_syscall_retval = cpu_set_syscall_retval,
955 	.sv_fetch_syscall_args = linux_fetch_syscall_args,
956 	.sv_syscallnames = NULL,
957 	.sv_shared_page_base = LINUX_SHAREDPAGE,
958 	.sv_shared_page_len = PAGE_SIZE,
959 	.sv_schedtail	= linux_schedtail,
960 	.sv_thread_detach = linux_thread_detach,
961 	.sv_trap	= NULL,
962 };
963 
964 static void
965 linux_vdso_install(void *param)
966 {
967 
968 	linux_szsigcode = (&_binary_linux_locore_o_end -
969 	    &_binary_linux_locore_o_start);
970 
971 	if (linux_szsigcode > elf_linux_sysvec.sv_shared_page_len)
972 		panic("Linux invalid vdso size\n");
973 
974 	__elfN(linux_vdso_fixup)(&elf_linux_sysvec);
975 
976 	linux_shared_page_obj = __elfN(linux_shared_page_init)
977 	    (&linux_shared_page_mapping);
978 
979 	__elfN(linux_vdso_reloc)(&elf_linux_sysvec);
980 
981 	bcopy(elf_linux_sysvec.sv_sigcode, linux_shared_page_mapping,
982 	    linux_szsigcode);
983 	elf_linux_sysvec.sv_shared_page_obj = linux_shared_page_obj;
984 }
985 SYSINIT(elf_linux_vdso_init, SI_SUB_EXEC, SI_ORDER_ANY,
986     (sysinit_cfunc_t)linux_vdso_install, NULL);
987 
988 static void
989 linux_vdso_deinstall(void *param)
990 {
991 
992 	__elfN(linux_shared_page_fini)(linux_shared_page_obj);
993 }
994 SYSUNINIT(elf_linux_vdso_uninit, SI_SUB_EXEC, SI_ORDER_FIRST,
995     (sysinit_cfunc_t)linux_vdso_deinstall, NULL);
996 
997 static char GNU_ABI_VENDOR[] = "GNU";
998 static int GNULINUX_ABI_DESC = 0;
999 
1000 static bool
1001 linux_trans_osrel(const Elf_Note *note, int32_t *osrel)
1002 {
1003 	const Elf32_Word *desc;
1004 	uintptr_t p;
1005 
1006 	p = (uintptr_t)(note + 1);
1007 	p += roundup2(note->n_namesz, sizeof(Elf32_Addr));
1008 
1009 	desc = (const Elf32_Word *)p;
1010 	if (desc[0] != GNULINUX_ABI_DESC)
1011 		return (false);
1012 
1013 	/*
1014 	 * For Linux we encode osrel as follows (see linux_mib.c):
1015 	 * VVVMMMIII (version, major, minor), see linux_mib.c.
1016 	 */
1017 	*osrel = desc[1] * 1000000 + desc[2] * 1000 + desc[3];
1018 
1019 	return (true);
1020 }
1021 
1022 static Elf_Brandnote linux_brandnote = {
1023 	.hdr.n_namesz	= sizeof(GNU_ABI_VENDOR),
1024 	.hdr.n_descsz	= 16,	/* XXX at least 16 */
1025 	.hdr.n_type	= 1,
1026 	.vendor		= GNU_ABI_VENDOR,
1027 	.flags		= BN_TRANSLATE_OSREL,
1028 	.trans_osrel	= linux_trans_osrel
1029 };
1030 
1031 static Elf32_Brandinfo linux_brand = {
1032 	.brand		= ELFOSABI_LINUX,
1033 	.machine	= EM_386,
1034 	.compat_3_brand	= "Linux",
1035 	.emul_path	= "/compat/linux",
1036 	.interp_path	= "/lib/ld-linux.so.1",
1037 	.sysvec		= &elf_linux_sysvec,
1038 	.interp_newpath	= NULL,
1039 	.brand_note	= &linux_brandnote,
1040 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1041 };
1042 
1043 static Elf32_Brandinfo linux_glibc2brand = {
1044 	.brand		= ELFOSABI_LINUX,
1045 	.machine	= EM_386,
1046 	.compat_3_brand	= "Linux",
1047 	.emul_path	= "/compat/linux",
1048 	.interp_path	= "/lib/ld-linux.so.2",
1049 	.sysvec		= &elf_linux_sysvec,
1050 	.interp_newpath	= NULL,
1051 	.brand_note	= &linux_brandnote,
1052 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1053 };
1054 
1055 static Elf32_Brandinfo linux_muslbrand = {
1056 	.brand		= ELFOSABI_LINUX,
1057 	.machine	= EM_386,
1058 	.compat_3_brand	= "Linux",
1059 	.emul_path	= "/compat/linux",
1060 	.interp_path	= "/lib/ld-musl-i386.so.1",
1061 	.sysvec		= &elf_linux_sysvec,
1062 	.interp_newpath	= NULL,
1063 	.brand_note	= &linux_brandnote,
1064 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1065 };
1066 
1067 Elf32_Brandinfo *linux_brandlist[] = {
1068 	&linux_brand,
1069 	&linux_glibc2brand,
1070 	&linux_muslbrand,
1071 	NULL
1072 };
1073 
1074 static int
1075 linux_elf_modevent(module_t mod, int type, void *data)
1076 {
1077 	Elf32_Brandinfo **brandinfo;
1078 	int error;
1079 	struct linux_ioctl_handler **lihp;
1080 
1081 	error = 0;
1082 
1083 	switch(type) {
1084 	case MOD_LOAD:
1085 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1086 		     ++brandinfo)
1087 			if (elf32_insert_brand_entry(*brandinfo) < 0)
1088 				error = EINVAL;
1089 		if (error == 0) {
1090 			SET_FOREACH(lihp, linux_ioctl_handler_set)
1091 				linux_ioctl_register_handler(*lihp);
1092 			LIST_INIT(&futex_list);
1093 			mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF);
1094 			linux_exit_tag = EVENTHANDLER_REGISTER(process_exit, linux_proc_exit,
1095 			      NULL, 1000);
1096 			linux_exec_tag = EVENTHANDLER_REGISTER(process_exec, linux_proc_exec,
1097 			      NULL, 1000);
1098 			linux_thread_dtor_tag = EVENTHANDLER_REGISTER(thread_dtor,
1099 			    linux_thread_dtor, NULL, EVENTHANDLER_PRI_ANY);
1100 			linux_get_machine(&linux_kplatform);
1101 			linux_szplatform = roundup(strlen(linux_kplatform) + 1,
1102 			    sizeof(char *));
1103 			linux_osd_jail_register();
1104 			stclohz = (stathz ? stathz : hz);
1105 			if (bootverbose)
1106 				printf("Linux ELF exec handler installed\n");
1107 		} else
1108 			printf("cannot insert Linux ELF brand handler\n");
1109 		break;
1110 	case MOD_UNLOAD:
1111 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1112 		     ++brandinfo)
1113 			if (elf32_brand_inuse(*brandinfo))
1114 				error = EBUSY;
1115 		if (error == 0) {
1116 			for (brandinfo = &linux_brandlist[0];
1117 			     *brandinfo != NULL; ++brandinfo)
1118 				if (elf32_remove_brand_entry(*brandinfo) < 0)
1119 					error = EINVAL;
1120 		}
1121 		if (error == 0) {
1122 			SET_FOREACH(lihp, linux_ioctl_handler_set)
1123 				linux_ioctl_unregister_handler(*lihp);
1124 			mtx_destroy(&futex_mtx);
1125 			EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag);
1126 			EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag);
1127 			EVENTHANDLER_DEREGISTER(thread_dtor, linux_thread_dtor_tag);
1128 			linux_osd_jail_deregister();
1129 			if (bootverbose)
1130 				printf("Linux ELF exec handler removed\n");
1131 		} else
1132 			printf("Could not deinstall ELF interpreter entry\n");
1133 		break;
1134 	default:
1135 		return (EOPNOTSUPP);
1136 	}
1137 	return (error);
1138 }
1139 
1140 static moduledata_t linux_elf_mod = {
1141 	"linuxelf",
1142 	linux_elf_modevent,
1143 	0
1144 };
1145 
1146 DECLARE_MODULE_TIED(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
1147 FEATURE(linux, "Linux 32bit support");
1148