xref: /freebsd/sys/amd64/linux32/linux32_sysvec.c (revision af682d487b6c8ebba4858b2a6578b795c885e15b)
1 /*-
2  * Copyright (c) 2004 Tim J. Robbins
3  * Copyright (c) 2003 Peter Wemm
4  * Copyright (c) 2002 Doug Rabson
5  * Copyright (c) 1998-1999 Andrew Gallatin
6  * Copyright (c) 1994-1996 Søren Schmidt
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer
14  *    in this position and unchanged.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. The name of the author may not be used to endorse or promote products
19  *    derived from this software without specific prior written permission
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 #include <sys/cdefs.h>
34 __FBSDID("$FreeBSD$");
35 #include "opt_compat.h"
36 
37 #ifndef COMPAT_FREEBSD32
38 #error "Unable to compile Linux-emulator due to missing COMPAT_FREEBSD32 option!"
39 #endif
40 
41 #define	__ELF_WORD_SIZE	32
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/exec.h>
46 #include <sys/fcntl.h>
47 #include <sys/imgact.h>
48 #include <sys/imgact_elf.h>
49 #include <sys/kernel.h>
50 #include <sys/lock.h>
51 #include <sys/malloc.h>
52 #include <sys/module.h>
53 #include <sys/mutex.h>
54 #include <sys/proc.h>
55 #include <sys/resourcevar.h>
56 #include <sys/signalvar.h>
57 #include <sys/sysctl.h>
58 #include <sys/syscallsubr.h>
59 #include <sys/sysent.h>
60 #include <sys/sysproto.h>
61 #include <sys/vnode.h>
62 #include <sys/eventhandler.h>
63 
64 #include <vm/vm.h>
65 #include <vm/pmap.h>
66 #include <vm/vm_extern.h>
67 #include <vm/vm_map.h>
68 #include <vm/vm_object.h>
69 #include <vm/vm_page.h>
70 #include <vm/vm_param.h>
71 
72 #include <machine/cpu.h>
73 #include <machine/md_var.h>
74 #include <machine/pcb.h>
75 #include <machine/specialreg.h>
76 
77 #include <amd64/linux32/linux.h>
78 #include <amd64/linux32/linux32_proto.h>
79 #include <compat/linux/linux_emul.h>
80 #include <compat/linux/linux_futex.h>
81 #include <compat/linux/linux_ioctl.h>
82 #include <compat/linux/linux_mib.h>
83 #include <compat/linux/linux_misc.h>
84 #include <compat/linux/linux_signal.h>
85 #include <compat/linux/linux_util.h>
86 
87 MODULE_VERSION(linux, 1);
88 
89 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
90 
91 #define	AUXARGS_ENTRY_32(pos, id, val)	\
92 	do {				\
93 		suword32(pos++, id);	\
94 		suword32(pos++, val);	\
95 	} while (0)
96 
97 #if BYTE_ORDER == LITTLE_ENDIAN
98 #define SHELLMAGIC      0x2123 /* #! */
99 #else
100 #define SHELLMAGIC      0x2321
101 #endif
102 
103 /*
104  * Allow the sendsig functions to use the ldebug() facility
105  * even though they are not syscalls themselves. Map them
106  * to syscall 0. This is slightly less bogus than using
107  * ldebug(sigreturn).
108  */
109 #define	LINUX_SYS_linux_rt_sendsig	0
110 #define	LINUX_SYS_linux_sendsig		0
111 
112 const char *linux_platform = "i686";
113 static int linux_szplatform;
114 extern char linux_sigcode[];
115 extern int linux_szsigcode;
116 
117 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
118 
119 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
120 SET_DECLARE(linux_device_handler_set, struct linux_device_handler);
121 
122 static int	elf_linux_fixup(register_t **stack_base,
123 		    struct image_params *iparams);
124 static register_t *linux_copyout_strings(struct image_params *imgp);
125 static void     linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
126 static void	exec_linux_setregs(struct thread *td,
127 				   struct image_params *imgp, u_long stack);
128 static void	linux32_fixlimit(struct rlimit *rl, int which);
129 static boolean_t linux32_trans_osrel(const Elf_Note *note, int32_t *osrel);
130 
131 static eventhandler_tag linux_exit_tag;
132 static eventhandler_tag linux_exec_tag;
133 static eventhandler_tag linux_thread_dtor_tag;
134 
135 /*
136  * Linux syscalls return negative errno's, we do positive and map them
137  * Reference:
138  *   FreeBSD: src/sys/sys/errno.h
139  *   Linux:   linux-2.6.17.8/include/asm-generic/errno-base.h
140  *            linux-2.6.17.8/include/asm-generic/errno.h
141  */
142 static int bsd_to_linux_errno[ELAST + 1] = {
143 	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
144 	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
145 	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
146 	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
147 	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
148 	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
149 	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
150 	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
151 	  -6,  -6, -43, -42, -75,-125, -84, -95, -16, -74,
152 	 -72, -67, -71
153 };
154 
155 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
156 	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
157 	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
158 	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
159 	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
160 	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
161 	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
162 	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
163 	0, LINUX_SIGUSR1, LINUX_SIGUSR2
164 };
165 
166 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
167 	SIGHUP, SIGINT, SIGQUIT, SIGILL,
168 	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
169 	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
170 	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
171 	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
172 	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
173 	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
174 	SIGIO, SIGURG, SIGSYS
175 };
176 
177 #define LINUX_T_UNKNOWN  255
178 static int _bsd_to_linux_trapcode[] = {
179 	LINUX_T_UNKNOWN,	/* 0 */
180 	6,			/* 1  T_PRIVINFLT */
181 	LINUX_T_UNKNOWN,	/* 2 */
182 	3,			/* 3  T_BPTFLT */
183 	LINUX_T_UNKNOWN,	/* 4 */
184 	LINUX_T_UNKNOWN,	/* 5 */
185 	16,			/* 6  T_ARITHTRAP */
186 	254,			/* 7  T_ASTFLT */
187 	LINUX_T_UNKNOWN,	/* 8 */
188 	13,			/* 9  T_PROTFLT */
189 	1,			/* 10 T_TRCTRAP */
190 	LINUX_T_UNKNOWN,	/* 11 */
191 	14,			/* 12 T_PAGEFLT */
192 	LINUX_T_UNKNOWN,	/* 13 */
193 	17,			/* 14 T_ALIGNFLT */
194 	LINUX_T_UNKNOWN,	/* 15 */
195 	LINUX_T_UNKNOWN,	/* 16 */
196 	LINUX_T_UNKNOWN,	/* 17 */
197 	0,			/* 18 T_DIVIDE */
198 	2,			/* 19 T_NMI */
199 	4,			/* 20 T_OFLOW */
200 	5,			/* 21 T_BOUND */
201 	7,			/* 22 T_DNA */
202 	8,			/* 23 T_DOUBLEFLT */
203 	9,			/* 24 T_FPOPFLT */
204 	10,			/* 25 T_TSSFLT */
205 	11,			/* 26 T_SEGNPFLT */
206 	12,			/* 27 T_STKFLT */
207 	18,			/* 28 T_MCHK */
208 	19,			/* 29 T_XMMFLT */
209 	15			/* 30 T_RESERVED */
210 };
211 #define bsd_to_linux_trapcode(code) \
212     ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
213      _bsd_to_linux_trapcode[(code)]: \
214      LINUX_T_UNKNOWN)
215 
216 struct linux32_ps_strings {
217 	u_int32_t ps_argvstr;	/* first of 0 or more argument strings */
218 	u_int ps_nargvstr;	/* the number of argument strings */
219 	u_int32_t ps_envstr;	/* first of 0 or more environment strings */
220 	u_int ps_nenvstr;	/* the number of environment strings */
221 };
222 
223 /*
224  * If FreeBSD & Linux have a difference of opinion about what a trap
225  * means, deal with it here.
226  *
227  * MPSAFE
228  */
229 static int
230 translate_traps(int signal, int trap_code)
231 {
232 	if (signal != SIGBUS)
233 		return signal;
234 	switch (trap_code) {
235 	case T_PROTFLT:
236 	case T_TSSFLT:
237 	case T_DOUBLEFLT:
238 	case T_PAGEFLT:
239 		return SIGSEGV;
240 	default:
241 		return signal;
242 	}
243 }
244 
245 static int
246 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
247 {
248 	Elf32_Auxargs *args;
249 	Elf32_Addr *base;
250 	Elf32_Addr *pos, *uplatform;
251 	struct linux32_ps_strings *arginfo;
252 
253 	arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS;
254 	uplatform = (Elf32_Addr *)((caddr_t)arginfo - linux_szplatform);
255 
256 	KASSERT(curthread->td_proc == imgp->proc,
257 	    ("unsafe elf_linux_fixup(), should be curproc"));
258 	base = (Elf32_Addr *)*stack_base;
259 	args = (Elf32_Auxargs *)imgp->auxargs;
260 	pos = base + (imgp->args->argc + imgp->args->envc + 2);
261 
262 	AUXARGS_ENTRY_32(pos, LINUX_AT_HWCAP, cpu_feature);
263 
264 	/*
265 	 * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0,
266 	 * as it has appeared in the 2.4.0-rc7 first time.
267 	 * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK),
268 	 * glibc falls back to the hard-coded CLK_TCK value when aux entry
269 	 * is not present.
270 	 * Also see linux_times() implementation.
271 	 */
272 	if (linux_kernver(curthread) >= LINUX_KERNVER_2004000)
273 		AUXARGS_ENTRY_32(pos, LINUX_AT_CLKTCK, stclohz);
274 	AUXARGS_ENTRY_32(pos, AT_PHDR, args->phdr);
275 	AUXARGS_ENTRY_32(pos, AT_PHENT, args->phent);
276 	AUXARGS_ENTRY_32(pos, AT_PHNUM, args->phnum);
277 	AUXARGS_ENTRY_32(pos, AT_PAGESZ, args->pagesz);
278 	AUXARGS_ENTRY_32(pos, AT_FLAGS, args->flags);
279 	AUXARGS_ENTRY_32(pos, AT_ENTRY, args->entry);
280 	AUXARGS_ENTRY_32(pos, AT_BASE, args->base);
281 	AUXARGS_ENTRY_32(pos, LINUX_AT_SECURE, 0);
282 	AUXARGS_ENTRY_32(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
283 	AUXARGS_ENTRY_32(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
284 	AUXARGS_ENTRY_32(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
285 	AUXARGS_ENTRY_32(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
286 	AUXARGS_ENTRY_32(pos, LINUX_AT_PLATFORM, PTROUT(uplatform));
287 	if (args->execfd != -1)
288 		AUXARGS_ENTRY_32(pos, AT_EXECFD, args->execfd);
289 	AUXARGS_ENTRY_32(pos, AT_NULL, 0);
290 
291 	free(imgp->auxargs, M_TEMP);
292 	imgp->auxargs = NULL;
293 
294 	base--;
295 	suword32(base, (uint32_t)imgp->args->argc);
296 	*stack_base = (register_t *)base;
297 	return (0);
298 }
299 
300 extern unsigned long linux_sznonrtsigcode;
301 
302 static void
303 linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
304 {
305 	struct thread *td = curthread;
306 	struct proc *p = td->td_proc;
307 	struct sigacts *psp;
308 	struct trapframe *regs;
309 	struct l_rt_sigframe *fp, frame;
310 	int oonstack;
311 	int sig;
312 	int code;
313 
314 	sig = ksi->ksi_signo;
315 	code = ksi->ksi_code;
316 	PROC_LOCK_ASSERT(p, MA_OWNED);
317 	psp = p->p_sigacts;
318 	mtx_assert(&psp->ps_mtx, MA_OWNED);
319 	regs = td->td_frame;
320 	oonstack = sigonstack(regs->tf_rsp);
321 
322 #ifdef DEBUG
323 	if (ldebug(rt_sendsig))
324 		printf(ARGS(rt_sendsig, "%p, %d, %p, %u"),
325 		    catcher, sig, (void*)mask, code);
326 #endif
327 	/*
328 	 * Allocate space for the signal handler context.
329 	 */
330 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
331 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
332 		fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
333 		    td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
334 	} else
335 		fp = (struct l_rt_sigframe *)regs->tf_rsp - 1;
336 	mtx_unlock(&psp->ps_mtx);
337 
338 	/*
339 	 * Build the argument list for the signal handler.
340 	 */
341 	if (p->p_sysent->sv_sigtbl)
342 		if (sig <= p->p_sysent->sv_sigsize)
343 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
344 
345 	bzero(&frame, sizeof(frame));
346 
347 	frame.sf_handler = PTROUT(catcher);
348 	frame.sf_sig = sig;
349 	frame.sf_siginfo = PTROUT(&fp->sf_si);
350 	frame.sf_ucontext = PTROUT(&fp->sf_sc);
351 
352 	/* Fill in POSIX parts */
353 	ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig);
354 
355 	/*
356 	 * Build the signal context to be used by sigreturn.
357 	 */
358 	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
359 	frame.sf_sc.uc_link = 0;		/* XXX ??? */
360 
361 	frame.sf_sc.uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp);
362 	frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
363 	frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
364 	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
365 	PROC_UNLOCK(p);
366 
367 	bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
368 
369 	frame.sf_sc.uc_mcontext.sc_mask   = frame.sf_sc.uc_sigmask.__bits[0];
370 	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_rdi;
371 	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_rsi;
372 	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_rbp;
373 	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_rbx;
374 	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_rdx;
375 	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_rcx;
376 	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_rax;
377 	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_rip;
378 	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
379 	frame.sf_sc.uc_mcontext.sc_gs     = regs->tf_gs;
380 	frame.sf_sc.uc_mcontext.sc_fs     = regs->tf_fs;
381 	frame.sf_sc.uc_mcontext.sc_es     = regs->tf_es;
382 	frame.sf_sc.uc_mcontext.sc_ds     = regs->tf_ds;
383 	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_rflags;
384 	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_rsp;
385 	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
386 	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
387 	frame.sf_sc.uc_mcontext.sc_cr2    = (u_int32_t)(uintptr_t)ksi->ksi_addr;
388 	frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
389 
390 #ifdef DEBUG
391 	if (ldebug(rt_sendsig))
392 		printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
393 		    frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
394 		    td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
395 #endif
396 
397 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
398 		/*
399 		 * Process has trashed its stack; give it an illegal
400 		 * instruction to halt it in its tracks.
401 		 */
402 #ifdef DEBUG
403 		if (ldebug(rt_sendsig))
404 			printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
405 			    fp, oonstack);
406 #endif
407 		PROC_LOCK(p);
408 		sigexit(td, SIGILL);
409 	}
410 
411 	/*
412 	 * Build context to run handler in.
413 	 */
414 	regs->tf_rsp = PTROUT(fp);
415 	regs->tf_rip = p->p_sysent->sv_sigcode_base + linux_sznonrtsigcode;
416 	regs->tf_rflags &= ~(PSL_T | PSL_D);
417 	regs->tf_cs = _ucode32sel;
418 	regs->tf_ss = _udatasel;
419 	regs->tf_ds = _udatasel;
420 	regs->tf_es = _udatasel;
421 	regs->tf_fs = _ufssel;
422 	regs->tf_gs = _ugssel;
423 	regs->tf_flags = TF_HASSEGS;
424 	set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
425 	PROC_LOCK(p);
426 	mtx_lock(&psp->ps_mtx);
427 }
428 
429 
430 /*
431  * Send an interrupt to process.
432  *
433  * Stack is set up to allow sigcode stored
434  * in u. to call routine, followed by kcall
435  * to sigreturn routine below.  After sigreturn
436  * resets the signal mask, the stack, and the
437  * frame pointer, it returns to the user
438  * specified pc, psl.
439  */
440 static void
441 linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
442 {
443 	struct thread *td = curthread;
444 	struct proc *p = td->td_proc;
445 	struct sigacts *psp;
446 	struct trapframe *regs;
447 	struct l_sigframe *fp, frame;
448 	l_sigset_t lmask;
449 	int oonstack, i;
450 	int sig, code;
451 
452 	sig = ksi->ksi_signo;
453 	code = ksi->ksi_code;
454 	PROC_LOCK_ASSERT(p, MA_OWNED);
455 	psp = p->p_sigacts;
456 	mtx_assert(&psp->ps_mtx, MA_OWNED);
457 	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
458 		/* Signal handler installed with SA_SIGINFO. */
459 		linux_rt_sendsig(catcher, ksi, mask);
460 		return;
461 	}
462 
463 	regs = td->td_frame;
464 	oonstack = sigonstack(regs->tf_rsp);
465 
466 #ifdef DEBUG
467 	if (ldebug(sendsig))
468 		printf(ARGS(sendsig, "%p, %d, %p, %u"),
469 		    catcher, sig, (void*)mask, code);
470 #endif
471 
472 	/*
473 	 * Allocate space for the signal handler context.
474 	 */
475 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
476 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
477 		fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
478 		    td->td_sigstk.ss_size - sizeof(struct l_sigframe));
479 	} else
480 		fp = (struct l_sigframe *)regs->tf_rsp - 1;
481 	mtx_unlock(&psp->ps_mtx);
482 	PROC_UNLOCK(p);
483 
484 	/*
485 	 * Build the argument list for the signal handler.
486 	 */
487 	if (p->p_sysent->sv_sigtbl)
488 		if (sig <= p->p_sysent->sv_sigsize)
489 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
490 
491 	bzero(&frame, sizeof(frame));
492 
493 	frame.sf_handler = PTROUT(catcher);
494 	frame.sf_sig = sig;
495 
496 	bsd_to_linux_sigset(mask, &lmask);
497 
498 	/*
499 	 * Build the signal context to be used by sigreturn.
500 	 */
501 	frame.sf_sc.sc_mask   = lmask.__bits[0];
502 	frame.sf_sc.sc_gs     = regs->tf_gs;
503 	frame.sf_sc.sc_fs     = regs->tf_fs;
504 	frame.sf_sc.sc_es     = regs->tf_es;
505 	frame.sf_sc.sc_ds     = regs->tf_ds;
506 	frame.sf_sc.sc_edi    = regs->tf_rdi;
507 	frame.sf_sc.sc_esi    = regs->tf_rsi;
508 	frame.sf_sc.sc_ebp    = regs->tf_rbp;
509 	frame.sf_sc.sc_ebx    = regs->tf_rbx;
510 	frame.sf_sc.sc_edx    = regs->tf_rdx;
511 	frame.sf_sc.sc_ecx    = regs->tf_rcx;
512 	frame.sf_sc.sc_eax    = regs->tf_rax;
513 	frame.sf_sc.sc_eip    = regs->tf_rip;
514 	frame.sf_sc.sc_cs     = regs->tf_cs;
515 	frame.sf_sc.sc_eflags = regs->tf_rflags;
516 	frame.sf_sc.sc_esp_at_signal = regs->tf_rsp;
517 	frame.sf_sc.sc_ss     = regs->tf_ss;
518 	frame.sf_sc.sc_err    = regs->tf_err;
519 	frame.sf_sc.sc_cr2    = (u_int32_t)(uintptr_t)ksi->ksi_addr;
520 	frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code);
521 
522 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
523 		frame.sf_extramask[i] = lmask.__bits[i+1];
524 
525 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
526 		/*
527 		 * Process has trashed its stack; give it an illegal
528 		 * instruction to halt it in its tracks.
529 		 */
530 		PROC_LOCK(p);
531 		sigexit(td, SIGILL);
532 	}
533 
534 	/*
535 	 * Build context to run handler in.
536 	 */
537 	regs->tf_rsp = PTROUT(fp);
538 	regs->tf_rip = p->p_sysent->sv_sigcode_base;
539 	regs->tf_rflags &= ~(PSL_T | PSL_D);
540 	regs->tf_cs = _ucode32sel;
541 	regs->tf_ss = _udatasel;
542 	regs->tf_ds = _udatasel;
543 	regs->tf_es = _udatasel;
544 	regs->tf_fs = _ufssel;
545 	regs->tf_gs = _ugssel;
546 	regs->tf_flags = TF_HASSEGS;
547 	set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
548 	PROC_LOCK(p);
549 	mtx_lock(&psp->ps_mtx);
550 }
551 
552 /*
553  * System call to cleanup state after a signal
554  * has been taken.  Reset signal mask and
555  * stack state from context left by sendsig (above).
556  * Return to previous pc and psl as specified by
557  * context left by sendsig. Check carefully to
558  * make sure that the user has not modified the
559  * psl to gain improper privileges or to cause
560  * a machine fault.
561  */
562 int
563 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
564 {
565 	struct l_sigframe frame;
566 	struct trapframe *regs;
567 	sigset_t bmask;
568 	l_sigset_t lmask;
569 	int eflags, i;
570 	ksiginfo_t ksi;
571 
572 	regs = td->td_frame;
573 
574 #ifdef DEBUG
575 	if (ldebug(sigreturn))
576 		printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
577 #endif
578 	/*
579 	 * The trampoline code hands us the sigframe.
580 	 * It is unsafe to keep track of it ourselves, in the event that a
581 	 * program jumps out of a signal handler.
582 	 */
583 	if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
584 		return (EFAULT);
585 
586 	/*
587 	 * Check for security violations.
588 	 */
589 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
590 	eflags = frame.sf_sc.sc_eflags;
591 	if (!EFLAGS_SECURE(eflags, regs->tf_rflags))
592 		return(EINVAL);
593 
594 	/*
595 	 * Don't allow users to load a valid privileged %cs.  Let the
596 	 * hardware check for invalid selectors, excess privilege in
597 	 * other selectors, invalid %eip's and invalid %esp's.
598 	 */
599 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
600 	if (!CS_SECURE(frame.sf_sc.sc_cs)) {
601 		ksiginfo_init_trap(&ksi);
602 		ksi.ksi_signo = SIGBUS;
603 		ksi.ksi_code = BUS_OBJERR;
604 		ksi.ksi_trapno = T_PROTFLT;
605 		ksi.ksi_addr = (void *)regs->tf_rip;
606 		trapsignal(td, &ksi);
607 		return(EINVAL);
608 	}
609 
610 	lmask.__bits[0] = frame.sf_sc.sc_mask;
611 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
612 		lmask.__bits[i+1] = frame.sf_extramask[i];
613 	linux_to_bsd_sigset(&lmask, &bmask);
614 	kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
615 
616 	/*
617 	 * Restore signal context.
618 	 */
619 	regs->tf_rdi    = frame.sf_sc.sc_edi;
620 	regs->tf_rsi    = frame.sf_sc.sc_esi;
621 	regs->tf_rbp    = frame.sf_sc.sc_ebp;
622 	regs->tf_rbx    = frame.sf_sc.sc_ebx;
623 	regs->tf_rdx    = frame.sf_sc.sc_edx;
624 	regs->tf_rcx    = frame.sf_sc.sc_ecx;
625 	regs->tf_rax    = frame.sf_sc.sc_eax;
626 	regs->tf_rip    = frame.sf_sc.sc_eip;
627 	regs->tf_cs     = frame.sf_sc.sc_cs;
628 	regs->tf_ds     = frame.sf_sc.sc_ds;
629 	regs->tf_es     = frame.sf_sc.sc_es;
630 	regs->tf_fs     = frame.sf_sc.sc_fs;
631 	regs->tf_gs     = frame.sf_sc.sc_gs;
632 	regs->tf_rflags = eflags;
633 	regs->tf_rsp    = frame.sf_sc.sc_esp_at_signal;
634 	regs->tf_ss     = frame.sf_sc.sc_ss;
635 	set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
636 
637 	return (EJUSTRETURN);
638 }
639 
640 /*
641  * System call to cleanup state after a signal
642  * has been taken.  Reset signal mask and
643  * stack state from context left by rt_sendsig (above).
644  * Return to previous pc and psl as specified by
645  * context left by sendsig. Check carefully to
646  * make sure that the user has not modified the
647  * psl to gain improper privileges or to cause
648  * a machine fault.
649  */
650 int
651 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
652 {
653 	struct l_ucontext uc;
654 	struct l_sigcontext *context;
655 	sigset_t bmask;
656 	l_stack_t *lss;
657 	stack_t ss;
658 	struct trapframe *regs;
659 	int eflags;
660 	ksiginfo_t ksi;
661 
662 	regs = td->td_frame;
663 
664 #ifdef DEBUG
665 	if (ldebug(rt_sigreturn))
666 		printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
667 #endif
668 	/*
669 	 * The trampoline code hands us the ucontext.
670 	 * It is unsafe to keep track of it ourselves, in the event that a
671 	 * program jumps out of a signal handler.
672 	 */
673 	if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
674 		return (EFAULT);
675 
676 	context = &uc.uc_mcontext;
677 
678 	/*
679 	 * Check for security violations.
680 	 */
681 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
682 	eflags = context->sc_eflags;
683 	if (!EFLAGS_SECURE(eflags, regs->tf_rflags))
684 		return(EINVAL);
685 
686 	/*
687 	 * Don't allow users to load a valid privileged %cs.  Let the
688 	 * hardware check for invalid selectors, excess privilege in
689 	 * other selectors, invalid %eip's and invalid %esp's.
690 	 */
691 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
692 	if (!CS_SECURE(context->sc_cs)) {
693 		ksiginfo_init_trap(&ksi);
694 		ksi.ksi_signo = SIGBUS;
695 		ksi.ksi_code = BUS_OBJERR;
696 		ksi.ksi_trapno = T_PROTFLT;
697 		ksi.ksi_addr = (void *)regs->tf_rip;
698 		trapsignal(td, &ksi);
699 		return(EINVAL);
700 	}
701 
702 	linux_to_bsd_sigset(&uc.uc_sigmask, &bmask);
703 	kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
704 
705 	/*
706 	 * Restore signal context
707 	 */
708 	regs->tf_gs	= context->sc_gs;
709 	regs->tf_fs	= context->sc_fs;
710 	regs->tf_es	= context->sc_es;
711 	regs->tf_ds	= context->sc_ds;
712 	regs->tf_rdi    = context->sc_edi;
713 	regs->tf_rsi    = context->sc_esi;
714 	regs->tf_rbp    = context->sc_ebp;
715 	regs->tf_rbx    = context->sc_ebx;
716 	regs->tf_rdx    = context->sc_edx;
717 	regs->tf_rcx    = context->sc_ecx;
718 	regs->tf_rax    = context->sc_eax;
719 	regs->tf_rip    = context->sc_eip;
720 	regs->tf_cs     = context->sc_cs;
721 	regs->tf_rflags = eflags;
722 	regs->tf_rsp    = context->sc_esp_at_signal;
723 	regs->tf_ss     = context->sc_ss;
724 	set_pcb_flags(td->td_pcb, PCB_FULL_IRET);
725 
726 	/*
727 	 * call sigaltstack & ignore results..
728 	 */
729 	lss = &uc.uc_stack;
730 	ss.ss_sp = PTRIN(lss->ss_sp);
731 	ss.ss_size = lss->ss_size;
732 	ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
733 
734 #ifdef DEBUG
735 	if (ldebug(rt_sigreturn))
736 		printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
737 		    ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
738 #endif
739 	(void)kern_sigaltstack(td, &ss, NULL);
740 
741 	return (EJUSTRETURN);
742 }
743 
744 static int
745 linux32_fetch_syscall_args(struct thread *td, struct syscall_args *sa)
746 {
747 	struct proc *p;
748 	struct trapframe *frame;
749 
750 	p = td->td_proc;
751 	frame = td->td_frame;
752 
753 	sa->args[0] = frame->tf_rbx;
754 	sa->args[1] = frame->tf_rcx;
755 	sa->args[2] = frame->tf_rdx;
756 	sa->args[3] = frame->tf_rsi;
757 	sa->args[4] = frame->tf_rdi;
758 	sa->args[5] = frame->tf_rbp;	/* Unconfirmed */
759 	sa->code = frame->tf_rax;
760 
761 	if (sa->code >= p->p_sysent->sv_size)
762 		sa->callp = &p->p_sysent->sv_table[0];
763 	else
764 		sa->callp = &p->p_sysent->sv_table[sa->code];
765 	sa->narg = sa->callp->sy_narg;
766 
767 	td->td_retval[0] = 0;
768 	td->td_retval[1] = frame->tf_rdx;
769 
770 	return (0);
771 }
772 
773 /*
774  * If a linux binary is exec'ing something, try this image activator
775  * first.  We override standard shell script execution in order to
776  * be able to modify the interpreter path.  We only do this if a linux
777  * binary is doing the exec, so we do not create an EXEC module for it.
778  */
779 static int	exec_linux_imgact_try(struct image_params *iparams);
780 
781 static int
782 exec_linux_imgact_try(struct image_params *imgp)
783 {
784 	const char *head = (const char *)imgp->image_header;
785 	char *rpath;
786 	int error = -1;
787 
788 	/*
789 	* The interpreter for shell scripts run from a linux binary needs
790 	* to be located in /compat/linux if possible in order to recursively
791 	* maintain linux path emulation.
792 	*/
793 	if (((const short *)head)[0] == SHELLMAGIC) {
794 		/*
795 		* Run our normal shell image activator.  If it succeeds attempt
796 		* to use the alternate path for the interpreter.  If an
797 		* alternate * path is found, use our stringspace to store it.
798 		*/
799 		if ((error = exec_shell_imgact(imgp)) == 0) {
800 			linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
801 			    imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0,
802 			    AT_FDCWD);
803 			if (rpath != NULL)
804 				imgp->args->fname_buf =
805 				    imgp->interpreter_name = rpath;
806 		}
807 	}
808 	return (error);
809 }
810 
811 /*
812  * Clear registers on exec
813  * XXX copied from ia32_signal.c.
814  */
815 static void
816 exec_linux_setregs(struct thread *td, struct image_params *imgp, u_long stack)
817 {
818 	struct trapframe *regs = td->td_frame;
819 	struct pcb *pcb = td->td_pcb;
820 
821 	mtx_lock(&dt_lock);
822 	if (td->td_proc->p_md.md_ldt != NULL)
823 		user_ldt_free(td);
824 	else
825 		mtx_unlock(&dt_lock);
826 
827 	critical_enter();
828 	wrmsr(MSR_FSBASE, 0);
829 	wrmsr(MSR_KGSBASE, 0);	/* User value while we're in the kernel */
830 	pcb->pcb_fsbase = 0;
831 	pcb->pcb_gsbase = 0;
832 	critical_exit();
833 	pcb->pcb_initial_fpucw = __LINUX_NPXCW__;
834 
835 	bzero((char *)regs, sizeof(struct trapframe));
836 	regs->tf_rip = imgp->entry_addr;
837 	regs->tf_rsp = stack;
838 	regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T);
839 	regs->tf_gs = _ugssel;
840 	regs->tf_fs = _ufssel;
841 	regs->tf_es = _udatasel;
842 	regs->tf_ds = _udatasel;
843 	regs->tf_ss = _udatasel;
844 	regs->tf_flags = TF_HASSEGS;
845 	regs->tf_cs = _ucode32sel;
846 	regs->tf_rbx = imgp->ps_strings;
847 
848 	fpstate_drop(td);
849 
850 	/* Do full restore on return so that we can change to a different %cs */
851 	set_pcb_flags(pcb, PCB_32BIT | PCB_FULL_IRET);
852 	td->td_retval[1] = 0;
853 }
854 
855 /*
856  * XXX copied from ia32_sysvec.c.
857  */
858 static register_t *
859 linux_copyout_strings(struct image_params *imgp)
860 {
861 	int argc, envc;
862 	u_int32_t *vectp;
863 	char *stringp, *destp;
864 	u_int32_t *stack_base;
865 	struct linux32_ps_strings *arginfo;
866 
867 	/*
868 	 * Calculate string base and vector table pointers.
869 	 * Also deal with signal trampoline code for this exec type.
870 	 */
871 	arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS;
872 	destp =	(caddr_t)arginfo - SPARE_USRSPACE - linux_szplatform -
873 	    roundup((ARG_MAX - imgp->args->stringspace),
874 	    sizeof(char *));
875 
876 	/*
877 	 * Install LINUX_PLATFORM
878 	 */
879 	copyout(linux_platform, ((caddr_t)arginfo - linux_szplatform),
880 	    linux_szplatform);
881 
882 	/*
883 	 * If we have a valid auxargs ptr, prepare some room
884 	 * on the stack.
885 	 */
886 	if (imgp->auxargs) {
887 		/*
888 		 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
889 		 * lower compatibility.
890 		 */
891 		imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size :
892 		    (LINUX_AT_COUNT * 2);
893 		/*
894 		 * The '+ 2' is for the null pointers at the end of each of
895 		 * the arg and env vector sets,and imgp->auxarg_size is room
896 		 * for argument of Runtime loader.
897 		 */
898 		vectp = (u_int32_t *) (destp - (imgp->args->argc +
899 		    imgp->args->envc + 2 + imgp->auxarg_size) *
900 		    sizeof(u_int32_t));
901 
902 	} else
903 		/*
904 		 * The '+ 2' is for the null pointers at the end of each of
905 		 * the arg and env vector sets
906 		 */
907 		vectp = (u_int32_t *)(destp - (imgp->args->argc +
908 		    imgp->args->envc + 2) * sizeof(u_int32_t));
909 
910 	/*
911 	 * vectp also becomes our initial stack base
912 	 */
913 	stack_base = vectp;
914 
915 	stringp = imgp->args->begin_argv;
916 	argc = imgp->args->argc;
917 	envc = imgp->args->envc;
918 	/*
919 	 * Copy out strings - arguments and environment.
920 	 */
921 	copyout(stringp, destp, ARG_MAX - imgp->args->stringspace);
922 
923 	/*
924 	 * Fill in "ps_strings" struct for ps, w, etc.
925 	 */
926 	suword32(&arginfo->ps_argvstr, (uint32_t)(intptr_t)vectp);
927 	suword32(&arginfo->ps_nargvstr, argc);
928 
929 	/*
930 	 * Fill in argument portion of vector table.
931 	 */
932 	for (; argc > 0; --argc) {
933 		suword32(vectp++, (uint32_t)(intptr_t)destp);
934 		while (*stringp++ != 0)
935 			destp++;
936 		destp++;
937 	}
938 
939 	/* a null vector table pointer separates the argp's from the envp's */
940 	suword32(vectp++, 0);
941 
942 	suword32(&arginfo->ps_envstr, (uint32_t)(intptr_t)vectp);
943 	suword32(&arginfo->ps_nenvstr, envc);
944 
945 	/*
946 	 * Fill in environment portion of vector table.
947 	 */
948 	for (; envc > 0; --envc) {
949 		suword32(vectp++, (uint32_t)(intptr_t)destp);
950 		while (*stringp++ != 0)
951 			destp++;
952 		destp++;
953 	}
954 
955 	/* end of vector table is a null pointer */
956 	suword32(vectp, 0);
957 
958 	return ((register_t *)stack_base);
959 }
960 
961 static SYSCTL_NODE(_compat, OID_AUTO, linux32, CTLFLAG_RW, 0,
962     "32-bit Linux emulation");
963 
964 static u_long	linux32_maxdsiz = LINUX32_MAXDSIZ;
965 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxdsiz, CTLFLAG_RW,
966     &linux32_maxdsiz, 0, "");
967 static u_long	linux32_maxssiz = LINUX32_MAXSSIZ;
968 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxssiz, CTLFLAG_RW,
969     &linux32_maxssiz, 0, "");
970 static u_long	linux32_maxvmem = LINUX32_MAXVMEM;
971 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxvmem, CTLFLAG_RW,
972     &linux32_maxvmem, 0, "");
973 
974 static void
975 linux32_fixlimit(struct rlimit *rl, int which)
976 {
977 
978 	switch (which) {
979 	case RLIMIT_DATA:
980 		if (linux32_maxdsiz != 0) {
981 			if (rl->rlim_cur > linux32_maxdsiz)
982 				rl->rlim_cur = linux32_maxdsiz;
983 			if (rl->rlim_max > linux32_maxdsiz)
984 				rl->rlim_max = linux32_maxdsiz;
985 		}
986 		break;
987 	case RLIMIT_STACK:
988 		if (linux32_maxssiz != 0) {
989 			if (rl->rlim_cur > linux32_maxssiz)
990 				rl->rlim_cur = linux32_maxssiz;
991 			if (rl->rlim_max > linux32_maxssiz)
992 				rl->rlim_max = linux32_maxssiz;
993 		}
994 		break;
995 	case RLIMIT_VMEM:
996 		if (linux32_maxvmem != 0) {
997 			if (rl->rlim_cur > linux32_maxvmem)
998 				rl->rlim_cur = linux32_maxvmem;
999 			if (rl->rlim_max > linux32_maxvmem)
1000 				rl->rlim_max = linux32_maxvmem;
1001 		}
1002 		break;
1003 	}
1004 }
1005 
1006 struct sysentvec elf_linux_sysvec = {
1007 	.sv_size	= LINUX_SYS_MAXSYSCALL,
1008 	.sv_table	= linux_sysent,
1009 	.sv_mask	= 0,
1010 	.sv_sigsize	= LINUX_SIGTBLSZ,
1011 	.sv_sigtbl	= bsd_to_linux_signal,
1012 	.sv_errsize	= ELAST + 1,
1013 	.sv_errtbl	= bsd_to_linux_errno,
1014 	.sv_transtrap	= translate_traps,
1015 	.sv_fixup	= elf_linux_fixup,
1016 	.sv_sendsig	= linux_sendsig,
1017 	.sv_sigcode	= linux_sigcode,
1018 	.sv_szsigcode	= &linux_szsigcode,
1019 	.sv_prepsyscall	= NULL,
1020 	.sv_name	= "Linux ELF32",
1021 	.sv_coredump	= elf32_coredump,
1022 	.sv_imgact_try	= exec_linux_imgact_try,
1023 	.sv_minsigstksz	= LINUX_MINSIGSTKSZ,
1024 	.sv_pagesize	= PAGE_SIZE,
1025 	.sv_minuser	= VM_MIN_ADDRESS,
1026 	.sv_maxuser	= LINUX32_MAXUSER,
1027 	.sv_usrstack	= LINUX32_USRSTACK,
1028 	.sv_psstrings	= LINUX32_PS_STRINGS,
1029 	.sv_stackprot	= VM_PROT_ALL,
1030 	.sv_copyout_strings = linux_copyout_strings,
1031 	.sv_setregs	= exec_linux_setregs,
1032 	.sv_fixlimit	= linux32_fixlimit,
1033 	.sv_maxssiz	= &linux32_maxssiz,
1034 	.sv_flags	= SV_ABI_LINUX | SV_ILP32 | SV_IA32 | SV_SHP,
1035 	.sv_set_syscall_retval = cpu_set_syscall_retval,
1036 	.sv_fetch_syscall_args = linux32_fetch_syscall_args,
1037 	.sv_syscallnames = NULL,
1038 	.sv_shared_page_base = LINUX32_SHAREDPAGE,
1039 	.sv_shared_page_len = PAGE_SIZE,
1040 	.sv_schedtail	= linux_schedtail,
1041 	.sv_thread_detach = linux_thread_detach,
1042 };
1043 INIT_SYSENTVEC(elf_sysvec, &elf_linux_sysvec);
1044 
1045 static char GNU_ABI_VENDOR[] = "GNU";
1046 static int GNULINUX_ABI_DESC = 0;
1047 
1048 static boolean_t
1049 linux32_trans_osrel(const Elf_Note *note, int32_t *osrel)
1050 {
1051 	const Elf32_Word *desc;
1052 	uintptr_t p;
1053 
1054 	p = (uintptr_t)(note + 1);
1055 	p += roundup2(note->n_namesz, sizeof(Elf32_Addr));
1056 
1057 	desc = (const Elf32_Word *)p;
1058 	if (desc[0] != GNULINUX_ABI_DESC)
1059 		return (FALSE);
1060 
1061 	/*
1062 	 * For linux we encode osrel as follows (see linux_mib.c):
1063 	 * VVVMMMIII (version, major, minor), see linux_mib.c.
1064 	 */
1065 	*osrel = desc[1] * 1000000 + desc[2] * 1000 + desc[3];
1066 
1067 	return (TRUE);
1068 }
1069 
1070 static Elf_Brandnote linux32_brandnote = {
1071 	.hdr.n_namesz	= sizeof(GNU_ABI_VENDOR),
1072 	.hdr.n_descsz	= 16,	/* XXX at least 16 */
1073 	.hdr.n_type	= 1,
1074 	.vendor		= GNU_ABI_VENDOR,
1075 	.flags		= BN_TRANSLATE_OSREL,
1076 	.trans_osrel	= linux32_trans_osrel
1077 };
1078 
1079 static Elf32_Brandinfo linux_brand = {
1080 	.brand		= ELFOSABI_LINUX,
1081 	.machine	= EM_386,
1082 	.compat_3_brand	= "Linux",
1083 	.emul_path	= "/compat/linux",
1084 	.interp_path	= "/lib/ld-linux.so.1",
1085 	.sysvec		= &elf_linux_sysvec,
1086 	.interp_newpath	= NULL,
1087 	.brand_note	= &linux32_brandnote,
1088 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1089 };
1090 
1091 static Elf32_Brandinfo linux_glibc2brand = {
1092 	.brand		= ELFOSABI_LINUX,
1093 	.machine	= EM_386,
1094 	.compat_3_brand	= "Linux",
1095 	.emul_path	= "/compat/linux",
1096 	.interp_path	= "/lib/ld-linux.so.2",
1097 	.sysvec		= &elf_linux_sysvec,
1098 	.interp_newpath	= NULL,
1099 	.brand_note	= &linux32_brandnote,
1100 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1101 };
1102 
1103 Elf32_Brandinfo *linux_brandlist[] = {
1104 	&linux_brand,
1105 	&linux_glibc2brand,
1106 	NULL
1107 };
1108 
1109 static int
1110 linux_elf_modevent(module_t mod, int type, void *data)
1111 {
1112 	Elf32_Brandinfo **brandinfo;
1113 	int error;
1114 	struct linux_ioctl_handler **lihp;
1115 	struct linux_device_handler **ldhp;
1116 
1117 	error = 0;
1118 
1119 	switch(type) {
1120 	case MOD_LOAD:
1121 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1122 		     ++brandinfo)
1123 			if (elf32_insert_brand_entry(*brandinfo) < 0)
1124 				error = EINVAL;
1125 		if (error == 0) {
1126 			SET_FOREACH(lihp, linux_ioctl_handler_set)
1127 				linux_ioctl_register_handler(*lihp);
1128 			SET_FOREACH(ldhp, linux_device_handler_set)
1129 				linux_device_register_handler(*ldhp);
1130 			LIST_INIT(&futex_list);
1131 			mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF);
1132 			linux_exit_tag = EVENTHANDLER_REGISTER(process_exit,
1133 			    linux_proc_exit, NULL, 1000);
1134 			linux_exec_tag = EVENTHANDLER_REGISTER(process_exec,
1135 			    linux_proc_exec, NULL, 1000);
1136 			linux_thread_dtor_tag = EVENTHANDLER_REGISTER(thread_dtor,
1137 			    linux_thread_dtor, NULL, EVENTHANDLER_PRI_ANY);
1138 			linux_szplatform = roundup(strlen(linux_platform) + 1,
1139 			    sizeof(char *));
1140 			linux_osd_jail_register();
1141 			stclohz = (stathz ? stathz : hz);
1142 			if (bootverbose)
1143 				printf("Linux ELF exec handler installed\n");
1144 		} else
1145 			printf("cannot insert Linux ELF brand handler\n");
1146 		break;
1147 	case MOD_UNLOAD:
1148 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1149 		     ++brandinfo)
1150 			if (elf32_brand_inuse(*brandinfo))
1151 				error = EBUSY;
1152 		if (error == 0) {
1153 			for (brandinfo = &linux_brandlist[0];
1154 			     *brandinfo != NULL; ++brandinfo)
1155 				if (elf32_remove_brand_entry(*brandinfo) < 0)
1156 					error = EINVAL;
1157 		}
1158 		if (error == 0) {
1159 			SET_FOREACH(lihp, linux_ioctl_handler_set)
1160 				linux_ioctl_unregister_handler(*lihp);
1161 			SET_FOREACH(ldhp, linux_device_handler_set)
1162 				linux_device_unregister_handler(*ldhp);
1163 			mtx_destroy(&futex_mtx);
1164 			EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag);
1165 			EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag);
1166 			EVENTHANDLER_DEREGISTER(thread_dtor, linux_thread_dtor_tag);
1167 			linux_osd_jail_deregister();
1168 			if (bootverbose)
1169 				printf("Linux ELF exec handler removed\n");
1170 		} else
1171 			printf("Could not deinstall ELF interpreter entry\n");
1172 		break;
1173 	default:
1174 		return (EOPNOTSUPP);
1175 	}
1176 	return (error);
1177 }
1178 
1179 static moduledata_t linux_elf_mod = {
1180 	"linuxelf",
1181 	linux_elf_modevent,
1182 	0
1183 };
1184 
1185 DECLARE_MODULE_TIED(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
1186