xref: /freebsd/sys/amd64/linux32/linux32_sysvec.c (revision f4f8f02054f3abb6ceb84aefcdecc78d5c8b462f)
1 /*-
2  * Copyright (c) 2004 Tim J. Robbins
3  * Copyright (c) 2003 Peter Wemm
4  * Copyright (c) 2002 Doug Rabson
5  * Copyright (c) 1998-1999 Andrew Gallatin
6  * Copyright (c) 1994-1996 S�ren Schmidt
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer
14  *    in this position and unchanged.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. The name of the author may not be used to endorse or promote products
19  *    derived from this software without specific prior written permission
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 #include <sys/cdefs.h>
34 __FBSDID("$FreeBSD$");
35 #include "opt_compat.h"
36 
37 #ifndef COMPAT_IA32
38 #error "Unable to compile Linux-emulator due to missing COMPAT_IA32 option!"
39 #endif
40 
41 #define	__ELF_WORD_SIZE	32
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/exec.h>
46 #include <sys/fcntl.h>
47 #include <sys/imgact.h>
48 #include <sys/imgact_elf.h>
49 #include <sys/kernel.h>
50 #include <sys/lock.h>
51 #include <sys/malloc.h>
52 #include <sys/module.h>
53 #include <sys/mutex.h>
54 #include <sys/proc.h>
55 #include <sys/resourcevar.h>
56 #include <sys/signalvar.h>
57 #include <sys/sysctl.h>
58 #include <sys/syscallsubr.h>
59 #include <sys/sysent.h>
60 #include <sys/sysproto.h>
61 #include <sys/vnode.h>
62 #include <sys/eventhandler.h>
63 
64 #include <vm/vm.h>
65 #include <vm/pmap.h>
66 #include <vm/vm_extern.h>
67 #include <vm/vm_map.h>
68 #include <vm/vm_object.h>
69 #include <vm/vm_page.h>
70 #include <vm/vm_param.h>
71 
72 #include <machine/cpu.h>
73 #include <machine/md_var.h>
74 #include <machine/pcb.h>
75 #include <machine/specialreg.h>
76 
77 #include <amd64/linux32/linux.h>
78 #include <amd64/linux32/linux32_proto.h>
79 #include <compat/linux/linux_futex.h>
80 #include <compat/linux/linux_emul.h>
81 #include <compat/linux/linux_mib.h>
82 #include <compat/linux/linux_misc.h>
83 #include <compat/linux/linux_signal.h>
84 #include <compat/linux/linux_util.h>
85 
86 MODULE_VERSION(linux, 1);
87 
88 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
89 
90 #define	AUXARGS_ENTRY_32(pos, id, val)	\
91 	do {				\
92 		suword32(pos++, id);	\
93 		suword32(pos++, val);	\
94 	} while (0)
95 
96 #if BYTE_ORDER == LITTLE_ENDIAN
97 #define SHELLMAGIC      0x2123 /* #! */
98 #else
99 #define SHELLMAGIC      0x2321
100 #endif
101 
102 /*
103  * Allow the sendsig functions to use the ldebug() facility
104  * even though they are not syscalls themselves. Map them
105  * to syscall 0. This is slightly less bogus than using
106  * ldebug(sigreturn).
107  */
108 #define	LINUX_SYS_linux_rt_sendsig	0
109 #define	LINUX_SYS_linux_sendsig		0
110 
111 const char *linux_platform = "i686";
112 static int linux_szplatform;
113 extern char linux_sigcode[];
114 extern int linux_szsigcode;
115 
116 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
117 
118 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
119 SET_DECLARE(linux_device_handler_set, struct linux_device_handler);
120 
121 static int	elf_linux_fixup(register_t **stack_base,
122 		    struct image_params *iparams);
123 static register_t *linux_copyout_strings(struct image_params *imgp);
124 static void	linux_prepsyscall(struct trapframe *tf, int *args, u_int *code,
125 		    caddr_t *params);
126 static void     linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
127 static void	exec_linux_setregs(struct thread *td, u_long entry,
128 				   u_long stack, u_long ps_strings);
129 static void	linux32_fixlimit(struct rlimit *rl, int which);
130 
131 static eventhandler_tag linux_exit_tag;
132 static eventhandler_tag linux_schedtail_tag;
133 static eventhandler_tag linux_exec_tag;
134 
135 /*
136  * Linux syscalls return negative errno's, we do positive and map them
137  * Reference:
138  *   FreeBSD: src/sys/sys/errno.h
139  *   Linux:   linux-2.6.17.8/include/asm-generic/errno-base.h
140  *            linux-2.6.17.8/include/asm-generic/errno.h
141  */
142 static int bsd_to_linux_errno[ELAST + 1] = {
143 	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
144 	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
145 	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
146 	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
147 	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
148 	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
149 	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
150 	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
151 	  -6,  -6, -43, -42, -75,-125, -84, -95, -16, -74,
152 	 -72, -67, -71
153 };
154 
155 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
156 	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
157 	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
158 	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
159 	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
160 	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
161 	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
162 	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
163 	0, LINUX_SIGUSR1, LINUX_SIGUSR2
164 };
165 
166 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
167 	SIGHUP, SIGINT, SIGQUIT, SIGILL,
168 	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
169 	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
170 	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
171 	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
172 	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
173 	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
174 	SIGIO, SIGURG, SIGSYS
175 };
176 
177 #define LINUX_T_UNKNOWN  255
178 static int _bsd_to_linux_trapcode[] = {
179 	LINUX_T_UNKNOWN,	/* 0 */
180 	6,			/* 1  T_PRIVINFLT */
181 	LINUX_T_UNKNOWN,	/* 2 */
182 	3,			/* 3  T_BPTFLT */
183 	LINUX_T_UNKNOWN,	/* 4 */
184 	LINUX_T_UNKNOWN,	/* 5 */
185 	16,			/* 6  T_ARITHTRAP */
186 	254,			/* 7  T_ASTFLT */
187 	LINUX_T_UNKNOWN,	/* 8 */
188 	13,			/* 9  T_PROTFLT */
189 	1,			/* 10 T_TRCTRAP */
190 	LINUX_T_UNKNOWN,	/* 11 */
191 	14,			/* 12 T_PAGEFLT */
192 	LINUX_T_UNKNOWN,	/* 13 */
193 	17,			/* 14 T_ALIGNFLT */
194 	LINUX_T_UNKNOWN,	/* 15 */
195 	LINUX_T_UNKNOWN,	/* 16 */
196 	LINUX_T_UNKNOWN,	/* 17 */
197 	0,			/* 18 T_DIVIDE */
198 	2,			/* 19 T_NMI */
199 	4,			/* 20 T_OFLOW */
200 	5,			/* 21 T_BOUND */
201 	7,			/* 22 T_DNA */
202 	8,			/* 23 T_DOUBLEFLT */
203 	9,			/* 24 T_FPOPFLT */
204 	10,			/* 25 T_TSSFLT */
205 	11,			/* 26 T_SEGNPFLT */
206 	12,			/* 27 T_STKFLT */
207 	18,			/* 28 T_MCHK */
208 	19,			/* 29 T_XMMFLT */
209 	15			/* 30 T_RESERVED */
210 };
211 #define bsd_to_linux_trapcode(code) \
212     ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
213      _bsd_to_linux_trapcode[(code)]: \
214      LINUX_T_UNKNOWN)
215 
216 struct linux32_ps_strings {
217 	u_int32_t ps_argvstr;	/* first of 0 or more argument strings */
218 	u_int ps_nargvstr;	/* the number of argument strings */
219 	u_int32_t ps_envstr;	/* first of 0 or more environment strings */
220 	u_int ps_nenvstr;	/* the number of environment strings */
221 };
222 
223 /*
224  * If FreeBSD & Linux have a difference of opinion about what a trap
225  * means, deal with it here.
226  *
227  * MPSAFE
228  */
229 static int
230 translate_traps(int signal, int trap_code)
231 {
232 	if (signal != SIGBUS)
233 		return signal;
234 	switch (trap_code) {
235 	case T_PROTFLT:
236 	case T_TSSFLT:
237 	case T_DOUBLEFLT:
238 	case T_PAGEFLT:
239 		return SIGSEGV;
240 	default:
241 		return signal;
242 	}
243 }
244 
245 static int
246 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
247 {
248 	Elf32_Auxargs *args;
249 	Elf32_Addr *base;
250 	Elf32_Addr *pos, *uplatform;
251 	struct linux32_ps_strings *arginfo;
252 
253 	arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS;
254 	uplatform = (Elf32_Addr *)((caddr_t)arginfo - linux_szsigcode -
255 	    linux_szplatform);
256 
257 	KASSERT(curthread->td_proc == imgp->proc,
258 	    ("unsafe elf_linux_fixup(), should be curproc"));
259 	base = (Elf32_Addr *)*stack_base;
260 	args = (Elf32_Auxargs *)imgp->auxargs;
261 	pos = base + (imgp->args->argc + imgp->args->envc + 2);
262 
263 	AUXARGS_ENTRY_32(pos, LINUX_AT_HWCAP, cpu_feature);
264 	AUXARGS_ENTRY_32(pos, LINUX_AT_CLKTCK, hz);
265 	AUXARGS_ENTRY_32(pos, AT_PHDR, args->phdr);
266 	AUXARGS_ENTRY_32(pos, AT_PHENT, args->phent);
267 	AUXARGS_ENTRY_32(pos, AT_PHNUM, args->phnum);
268 	AUXARGS_ENTRY_32(pos, AT_PAGESZ, args->pagesz);
269 	AUXARGS_ENTRY_32(pos, AT_FLAGS, args->flags);
270 	AUXARGS_ENTRY_32(pos, AT_ENTRY, args->entry);
271 	AUXARGS_ENTRY_32(pos, AT_BASE, args->base);
272 	AUXARGS_ENTRY_32(pos, LINUX_AT_SECURE, 0);
273 	AUXARGS_ENTRY_32(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
274 	AUXARGS_ENTRY_32(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
275 	AUXARGS_ENTRY_32(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
276 	AUXARGS_ENTRY_32(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
277 	AUXARGS_ENTRY_32(pos, LINUX_AT_PLATFORM, PTROUT(uplatform));
278 	if (args->execfd != -1)
279 		AUXARGS_ENTRY_32(pos, AT_EXECFD, args->execfd);
280 	AUXARGS_ENTRY_32(pos, AT_NULL, 0);
281 
282 	free(imgp->auxargs, M_TEMP);
283 	imgp->auxargs = NULL;
284 
285 	base--;
286 	suword32(base, (uint32_t)imgp->args->argc);
287 	*stack_base = (register_t *)base;
288 	return 0;
289 }
290 
291 extern unsigned long linux_sznonrtsigcode;
292 
293 static void
294 linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
295 {
296 	struct thread *td = curthread;
297 	struct proc *p = td->td_proc;
298 	struct sigacts *psp;
299 	struct trapframe *regs;
300 	struct l_rt_sigframe *fp, frame;
301 	int oonstack;
302 	int sig;
303 	int code;
304 
305 	sig = ksi->ksi_signo;
306 	code = ksi->ksi_code;
307 	PROC_LOCK_ASSERT(p, MA_OWNED);
308 	psp = p->p_sigacts;
309 	mtx_assert(&psp->ps_mtx, MA_OWNED);
310 	regs = td->td_frame;
311 	oonstack = sigonstack(regs->tf_rsp);
312 
313 #ifdef DEBUG
314 	if (ldebug(rt_sendsig))
315 		printf(ARGS(rt_sendsig, "%p, %d, %p, %u"),
316 		    catcher, sig, (void*)mask, code);
317 #endif
318 	/*
319 	 * Allocate space for the signal handler context.
320 	 */
321 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
322 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
323 		fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
324 		    td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
325 	} else
326 		fp = (struct l_rt_sigframe *)regs->tf_rsp - 1;
327 	mtx_unlock(&psp->ps_mtx);
328 
329 	/*
330 	 * Build the argument list for the signal handler.
331 	 */
332 	if (p->p_sysent->sv_sigtbl)
333 		if (sig <= p->p_sysent->sv_sigsize)
334 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
335 
336 	bzero(&frame, sizeof(frame));
337 
338 	frame.sf_handler = PTROUT(catcher);
339 	frame.sf_sig = sig;
340 	frame.sf_siginfo = PTROUT(&fp->sf_si);
341 	frame.sf_ucontext = PTROUT(&fp->sf_sc);
342 
343 	/* Fill in POSIX parts */
344 	ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig);
345 
346 	/*
347 	 * Build the signal context to be used by sigreturn.
348 	 */
349 	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
350 	frame.sf_sc.uc_link = 0;		/* XXX ??? */
351 
352 	frame.sf_sc.uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp);
353 	frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
354 	frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
355 	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
356 	PROC_UNLOCK(p);
357 
358 	bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
359 
360 	frame.sf_sc.uc_mcontext.sc_mask   = frame.sf_sc.uc_sigmask.__bits[0];
361 	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_rdi;
362 	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_rsi;
363 	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_rbp;
364 	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_rbx;
365 	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_rdx;
366 	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_rcx;
367 	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_rax;
368 	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_rip;
369 	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
370 	frame.sf_sc.uc_mcontext.sc_gs     = regs->tf_gs;
371 	frame.sf_sc.uc_mcontext.sc_fs     = regs->tf_fs;
372 	frame.sf_sc.uc_mcontext.sc_es     = regs->tf_es;
373 	frame.sf_sc.uc_mcontext.sc_ds     = regs->tf_ds;
374 	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_rflags;
375 	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_rsp;
376 	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
377 	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
378 	frame.sf_sc.uc_mcontext.sc_cr2    = (u_int32_t)(uintptr_t)ksi->ksi_addr;
379 	frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
380 
381 #ifdef DEBUG
382 	if (ldebug(rt_sendsig))
383 		printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
384 		    frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
385 		    td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
386 #endif
387 
388 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
389 		/*
390 		 * Process has trashed its stack; give it an illegal
391 		 * instruction to halt it in its tracks.
392 		 */
393 #ifdef DEBUG
394 		if (ldebug(rt_sendsig))
395 			printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
396 			    fp, oonstack);
397 #endif
398 		PROC_LOCK(p);
399 		sigexit(td, SIGILL);
400 	}
401 
402 	/*
403 	 * Build context to run handler in.
404 	 */
405 	regs->tf_rsp = PTROUT(fp);
406 	regs->tf_rip = LINUX32_PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
407 	    linux_sznonrtsigcode;
408 	regs->tf_rflags &= ~(PSL_T | PSL_D);
409 	regs->tf_cs = _ucode32sel;
410 	regs->tf_ss = _udatasel;
411 	regs->tf_ds = _udatasel;
412 	regs->tf_es = _udatasel;
413 	regs->tf_fs = _ufssel;
414 	regs->tf_gs = _ugssel;
415 	regs->tf_flags = TF_HASSEGS;
416 	PROC_LOCK(p);
417 	mtx_lock(&psp->ps_mtx);
418 }
419 
420 
421 /*
422  * Send an interrupt to process.
423  *
424  * Stack is set up to allow sigcode stored
425  * in u. to call routine, followed by kcall
426  * to sigreturn routine below.  After sigreturn
427  * resets the signal mask, the stack, and the
428  * frame pointer, it returns to the user
429  * specified pc, psl.
430  */
431 static void
432 linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
433 {
434 	struct thread *td = curthread;
435 	struct proc *p = td->td_proc;
436 	struct sigacts *psp;
437 	struct trapframe *regs;
438 	struct l_sigframe *fp, frame;
439 	l_sigset_t lmask;
440 	int oonstack, i;
441 	int sig, code;
442 
443 	sig = ksi->ksi_signo;
444 	code = ksi->ksi_code;
445 	PROC_LOCK_ASSERT(p, MA_OWNED);
446 	psp = p->p_sigacts;
447 	mtx_assert(&psp->ps_mtx, MA_OWNED);
448 	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
449 		/* Signal handler installed with SA_SIGINFO. */
450 		linux_rt_sendsig(catcher, ksi, mask);
451 		return;
452 	}
453 
454 	regs = td->td_frame;
455 	oonstack = sigonstack(regs->tf_rsp);
456 
457 #ifdef DEBUG
458 	if (ldebug(sendsig))
459 		printf(ARGS(sendsig, "%p, %d, %p, %u"),
460 		    catcher, sig, (void*)mask, code);
461 #endif
462 
463 	/*
464 	 * Allocate space for the signal handler context.
465 	 */
466 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
467 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
468 		fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
469 		    td->td_sigstk.ss_size - sizeof(struct l_sigframe));
470 	} else
471 		fp = (struct l_sigframe *)regs->tf_rsp - 1;
472 	mtx_unlock(&psp->ps_mtx);
473 	PROC_UNLOCK(p);
474 
475 	/*
476 	 * Build the argument list for the signal handler.
477 	 */
478 	if (p->p_sysent->sv_sigtbl)
479 		if (sig <= p->p_sysent->sv_sigsize)
480 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
481 
482 	bzero(&frame, sizeof(frame));
483 
484 	frame.sf_handler = PTROUT(catcher);
485 	frame.sf_sig = sig;
486 
487 	bsd_to_linux_sigset(mask, &lmask);
488 
489 	/*
490 	 * Build the signal context to be used by sigreturn.
491 	 */
492 	frame.sf_sc.sc_mask   = lmask.__bits[0];
493 	frame.sf_sc.sc_gs     = regs->tf_gs;
494 	frame.sf_sc.sc_fs     = regs->tf_fs;
495 	frame.sf_sc.sc_es     = regs->tf_es;
496 	frame.sf_sc.sc_ds     = regs->tf_ds;
497 	frame.sf_sc.sc_edi    = regs->tf_rdi;
498 	frame.sf_sc.sc_esi    = regs->tf_rsi;
499 	frame.sf_sc.sc_ebp    = regs->tf_rbp;
500 	frame.sf_sc.sc_ebx    = regs->tf_rbx;
501 	frame.sf_sc.sc_edx    = regs->tf_rdx;
502 	frame.sf_sc.sc_ecx    = regs->tf_rcx;
503 	frame.sf_sc.sc_eax    = regs->tf_rax;
504 	frame.sf_sc.sc_eip    = regs->tf_rip;
505 	frame.sf_sc.sc_cs     = regs->tf_cs;
506 	frame.sf_sc.sc_eflags = regs->tf_rflags;
507 	frame.sf_sc.sc_esp_at_signal = regs->tf_rsp;
508 	frame.sf_sc.sc_ss     = regs->tf_ss;
509 	frame.sf_sc.sc_err    = regs->tf_err;
510 	frame.sf_sc.sc_cr2    = (u_int32_t)(uintptr_t)ksi->ksi_addr;
511 	frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code);
512 
513 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
514 		frame.sf_extramask[i] = lmask.__bits[i+1];
515 
516 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
517 		/*
518 		 * Process has trashed its stack; give it an illegal
519 		 * instruction to halt it in its tracks.
520 		 */
521 		PROC_LOCK(p);
522 		sigexit(td, SIGILL);
523 	}
524 
525 	/*
526 	 * Build context to run handler in.
527 	 */
528 	regs->tf_rsp = PTROUT(fp);
529 	regs->tf_rip = LINUX32_PS_STRINGS - *(p->p_sysent->sv_szsigcode);
530 	regs->tf_rflags &= ~(PSL_T | PSL_D);
531 	regs->tf_cs = _ucode32sel;
532 	regs->tf_ss = _udatasel;
533 	regs->tf_ds = _udatasel;
534 	regs->tf_es = _udatasel;
535 	regs->tf_fs = _ufssel;
536 	regs->tf_gs = _ugssel;
537 	regs->tf_flags = TF_HASSEGS;
538 	PROC_LOCK(p);
539 	mtx_lock(&psp->ps_mtx);
540 }
541 
542 /*
543  * System call to cleanup state after a signal
544  * has been taken.  Reset signal mask and
545  * stack state from context left by sendsig (above).
546  * Return to previous pc and psl as specified by
547  * context left by sendsig. Check carefully to
548  * make sure that the user has not modified the
549  * psl to gain improper privileges or to cause
550  * a machine fault.
551  */
552 int
553 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
554 {
555 	struct proc *p = td->td_proc;
556 	struct l_sigframe frame;
557 	struct trapframe *regs;
558 	l_sigset_t lmask;
559 	int eflags, i;
560 	ksiginfo_t ksi;
561 
562 	regs = td->td_frame;
563 
564 #ifdef DEBUG
565 	if (ldebug(sigreturn))
566 		printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
567 #endif
568 	/*
569 	 * The trampoline code hands us the sigframe.
570 	 * It is unsafe to keep track of it ourselves, in the event that a
571 	 * program jumps out of a signal handler.
572 	 */
573 	if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
574 		return (EFAULT);
575 
576 	/*
577 	 * Check for security violations.
578 	 */
579 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
580 	eflags = frame.sf_sc.sc_eflags;
581 	/*
582 	 * XXX do allow users to change the privileged flag PSL_RF.  The
583 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
584 	 * sometimes set it there too.  tf_eflags is kept in the signal
585 	 * context during signal handling and there is no other place
586 	 * to remember it, so the PSL_RF bit may be corrupted by the
587 	 * signal handler without us knowing.  Corruption of the PSL_RF
588 	 * bit at worst causes one more or one less debugger trap, so
589 	 * allowing it is fairly harmless.
590 	 */
591 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF))
592 		return(EINVAL);
593 
594 	/*
595 	 * Don't allow users to load a valid privileged %cs.  Let the
596 	 * hardware check for invalid selectors, excess privilege in
597 	 * other selectors, invalid %eip's and invalid %esp's.
598 	 */
599 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
600 	if (!CS_SECURE(frame.sf_sc.sc_cs)) {
601 		ksiginfo_init_trap(&ksi);
602 		ksi.ksi_signo = SIGBUS;
603 		ksi.ksi_code = BUS_OBJERR;
604 		ksi.ksi_trapno = T_PROTFLT;
605 		ksi.ksi_addr = (void *)regs->tf_rip;
606 		trapsignal(td, &ksi);
607 		return(EINVAL);
608 	}
609 
610 	lmask.__bits[0] = frame.sf_sc.sc_mask;
611 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
612 		lmask.__bits[i+1] = frame.sf_extramask[i];
613 	PROC_LOCK(p);
614 	linux_to_bsd_sigset(&lmask, &td->td_sigmask);
615 	SIG_CANTMASK(td->td_sigmask);
616 	signotify(td);
617 	PROC_UNLOCK(p);
618 
619 	/*
620 	 * Restore signal context.
621 	 */
622 	regs->tf_rdi    = frame.sf_sc.sc_edi;
623 	regs->tf_rsi    = frame.sf_sc.sc_esi;
624 	regs->tf_rbp    = frame.sf_sc.sc_ebp;
625 	regs->tf_rbx    = frame.sf_sc.sc_ebx;
626 	regs->tf_rdx    = frame.sf_sc.sc_edx;
627 	regs->tf_rcx    = frame.sf_sc.sc_ecx;
628 	regs->tf_rax    = frame.sf_sc.sc_eax;
629 	regs->tf_rip    = frame.sf_sc.sc_eip;
630 	regs->tf_cs     = frame.sf_sc.sc_cs;
631 	regs->tf_ds     = frame.sf_sc.sc_ds;
632 	regs->tf_es     = frame.sf_sc.sc_es;
633 	regs->tf_fs     = frame.sf_sc.sc_fs;
634 	regs->tf_gs     = frame.sf_sc.sc_gs;
635 	regs->tf_rflags = eflags;
636 	regs->tf_rsp    = frame.sf_sc.sc_esp_at_signal;
637 	regs->tf_ss     = frame.sf_sc.sc_ss;
638 
639 	return (EJUSTRETURN);
640 }
641 
642 /*
643  * System call to cleanup state after a signal
644  * has been taken.  Reset signal mask and
645  * stack state from context left by rt_sendsig (above).
646  * Return to previous pc and psl as specified by
647  * context left by sendsig. Check carefully to
648  * make sure that the user has not modified the
649  * psl to gain improper privileges or to cause
650  * a machine fault.
651  */
652 int
653 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
654 {
655 	struct proc *p = td->td_proc;
656 	struct l_ucontext uc;
657 	struct l_sigcontext *context;
658 	l_stack_t *lss;
659 	stack_t ss;
660 	struct trapframe *regs;
661 	int eflags;
662 	ksiginfo_t ksi;
663 
664 	regs = td->td_frame;
665 
666 #ifdef DEBUG
667 	if (ldebug(rt_sigreturn))
668 		printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
669 #endif
670 	/*
671 	 * The trampoline code hands us the ucontext.
672 	 * It is unsafe to keep track of it ourselves, in the event that a
673 	 * program jumps out of a signal handler.
674 	 */
675 	if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
676 		return (EFAULT);
677 
678 	context = &uc.uc_mcontext;
679 
680 	/*
681 	 * Check for security violations.
682 	 */
683 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
684 	eflags = context->sc_eflags;
685 	/*
686 	 * XXX do allow users to change the privileged flag PSL_RF.  The
687 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
688 	 * sometimes set it there too.  tf_eflags is kept in the signal
689 	 * context during signal handling and there is no other place
690 	 * to remember it, so the PSL_RF bit may be corrupted by the
691 	 * signal handler without us knowing.  Corruption of the PSL_RF
692 	 * bit at worst causes one more or one less debugger trap, so
693 	 * allowing it is fairly harmless.
694 	 */
695 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF))
696 		return(EINVAL);
697 
698 	/*
699 	 * Don't allow users to load a valid privileged %cs.  Let the
700 	 * hardware check for invalid selectors, excess privilege in
701 	 * other selectors, invalid %eip's and invalid %esp's.
702 	 */
703 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
704 	if (!CS_SECURE(context->sc_cs)) {
705 		ksiginfo_init_trap(&ksi);
706 		ksi.ksi_signo = SIGBUS;
707 		ksi.ksi_code = BUS_OBJERR;
708 		ksi.ksi_trapno = T_PROTFLT;
709 		ksi.ksi_addr = (void *)regs->tf_rip;
710 		trapsignal(td, &ksi);
711 		return(EINVAL);
712 	}
713 
714 	PROC_LOCK(p);
715 	linux_to_bsd_sigset(&uc.uc_sigmask, &td->td_sigmask);
716 	SIG_CANTMASK(td->td_sigmask);
717 	signotify(td);
718 	PROC_UNLOCK(p);
719 
720 	/*
721 	 * Restore signal context
722 	 */
723 	regs->tf_gs	= context->sc_gs;
724 	regs->tf_fs	= context->sc_fs;
725 	regs->tf_es	= context->sc_es;
726 	regs->tf_ds	= context->sc_ds;
727 	regs->tf_rdi    = context->sc_edi;
728 	regs->tf_rsi    = context->sc_esi;
729 	regs->tf_rbp    = context->sc_ebp;
730 	regs->tf_rbx    = context->sc_ebx;
731 	regs->tf_rdx    = context->sc_edx;
732 	regs->tf_rcx    = context->sc_ecx;
733 	regs->tf_rax    = context->sc_eax;
734 	regs->tf_rip    = context->sc_eip;
735 	regs->tf_cs     = context->sc_cs;
736 	regs->tf_rflags = eflags;
737 	regs->tf_rsp    = context->sc_esp_at_signal;
738 	regs->tf_ss     = context->sc_ss;
739 
740 	/*
741 	 * call sigaltstack & ignore results..
742 	 */
743 	lss = &uc.uc_stack;
744 	ss.ss_sp = PTRIN(lss->ss_sp);
745 	ss.ss_size = lss->ss_size;
746 	ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
747 
748 #ifdef DEBUG
749 	if (ldebug(rt_sigreturn))
750 		printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
751 		    ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
752 #endif
753 	(void)kern_sigaltstack(td, &ss, NULL);
754 
755 	return (EJUSTRETURN);
756 }
757 
758 /*
759  * MPSAFE
760  */
761 static void
762 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
763 {
764 	args[0] = tf->tf_rbx;
765 	args[1] = tf->tf_rcx;
766 	args[2] = tf->tf_rdx;
767 	args[3] = tf->tf_rsi;
768 	args[4] = tf->tf_rdi;
769 	args[5] = tf->tf_rbp;	/* Unconfirmed */
770 	*params = NULL;		/* no copyin */
771 }
772 
773 /*
774  * If a linux binary is exec'ing something, try this image activator
775  * first.  We override standard shell script execution in order to
776  * be able to modify the interpreter path.  We only do this if a linux
777  * binary is doing the exec, so we do not create an EXEC module for it.
778  */
779 static int	exec_linux_imgact_try(struct image_params *iparams);
780 
781 static int
782 exec_linux_imgact_try(struct image_params *imgp)
783 {
784 	const char *head = (const char *)imgp->image_header;
785 	char *rpath;
786 	int error = -1, len;
787 
788 	/*
789 	* The interpreter for shell scripts run from a linux binary needs
790 	* to be located in /compat/linux if possible in order to recursively
791 	* maintain linux path emulation.
792 	*/
793 	if (((const short *)head)[0] == SHELLMAGIC) {
794 		/*
795 		* Run our normal shell image activator.  If it succeeds attempt
796 		* to use the alternate path for the interpreter.  If an
797 		* alternate * path is found, use our stringspace to store it.
798 		*/
799 		if ((error = exec_shell_imgact(imgp)) == 0) {
800 			linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
801 			    imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0,
802 			    AT_FDCWD);
803 			if (rpath != NULL) {
804 				len = strlen(rpath) + 1;
805 
806 				if (len <= MAXSHELLCMDLEN) {
807 					memcpy(imgp->interpreter_name, rpath,
808 					    len);
809 				}
810 				free(rpath, M_TEMP);
811 			}
812 		}
813 	}
814 	return(error);
815 }
816 
817 /*
818  * Clear registers on exec
819  * XXX copied from ia32_signal.c.
820  */
821 static void
822 exec_linux_setregs(td, entry, stack, ps_strings)
823 	struct thread *td;
824 	u_long entry;
825 	u_long stack;
826 	u_long ps_strings;
827 {
828 	struct trapframe *regs = td->td_frame;
829 	struct pcb *pcb = td->td_pcb;
830 
831 	mtx_lock(&dt_lock);
832 	if (td->td_proc->p_md.md_ldt != NULL)
833 		user_ldt_free(td);
834 	else
835 		mtx_unlock(&dt_lock);
836 
837 	critical_enter();
838 	wrmsr(MSR_FSBASE, 0);
839 	wrmsr(MSR_KGSBASE, 0);	/* User value while we're in the kernel */
840 	pcb->pcb_fsbase = 0;
841 	pcb->pcb_gsbase = 0;
842 	critical_exit();
843 	pcb->pcb_initial_fpucw = __LINUX_NPXCW__;
844 
845 	bzero((char *)regs, sizeof(struct trapframe));
846 	regs->tf_rip = entry;
847 	regs->tf_rsp = stack;
848 	regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T);
849 	regs->tf_gs = _ugssel;
850 	regs->tf_fs = _ufssel;
851 	regs->tf_es = _udatasel;
852 	regs->tf_ds = _udatasel;
853 	regs->tf_ss = _udatasel;
854 	regs->tf_flags = TF_HASSEGS;
855 	regs->tf_cs = _ucode32sel;
856 	regs->tf_rbx = ps_strings;
857 	load_cr0(rcr0() | CR0_MP | CR0_TS);
858 	fpstate_drop(td);
859 
860 	/* Return via doreti so that we can change to a different %cs */
861 	pcb->pcb_flags |= PCB_FULLCTX | PCB_32BIT;
862 	pcb->pcb_flags &= ~PCB_GS32BIT;
863 	td->td_retval[1] = 0;
864 }
865 
866 /*
867  * XXX copied from ia32_sysvec.c.
868  */
869 static register_t *
870 linux_copyout_strings(struct image_params *imgp)
871 {
872 	int argc, envc;
873 	u_int32_t *vectp;
874 	char *stringp, *destp;
875 	u_int32_t *stack_base;
876 	struct linux32_ps_strings *arginfo;
877 
878 	/*
879 	 * Calculate string base and vector table pointers.
880 	 * Also deal with signal trampoline code for this exec type.
881 	 */
882 	arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS;
883 	destp =	(caddr_t)arginfo - linux_szsigcode - SPARE_USRSPACE -
884 	    linux_szplatform - roundup((ARG_MAX - imgp->args->stringspace),
885 	    sizeof(char *));
886 
887 	/*
888 	 * install sigcode
889 	 */
890 	copyout(imgp->proc->p_sysent->sv_sigcode,
891 	    ((caddr_t)arginfo - linux_szsigcode), linux_szsigcode);
892 
893 	/*
894 	 * Install LINUX_PLATFORM
895 	 */
896 	copyout(linux_platform, ((caddr_t)arginfo - linux_szsigcode -
897 	    linux_szplatform), linux_szplatform);
898 
899 	/*
900 	 * If we have a valid auxargs ptr, prepare some room
901 	 * on the stack.
902 	 */
903 	if (imgp->auxargs) {
904 		/*
905 		 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
906 		 * lower compatibility.
907 		 */
908 		imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size :
909 		    (LINUX_AT_COUNT * 2);
910 		/*
911 		 * The '+ 2' is for the null pointers at the end of each of
912 		 * the arg and env vector sets,and imgp->auxarg_size is room
913 		 * for argument of Runtime loader.
914 		 */
915 		vectp = (u_int32_t *) (destp - (imgp->args->argc +
916 		    imgp->args->envc + 2 + imgp->auxarg_size) *
917 		    sizeof(u_int32_t));
918 
919 	} else
920 		/*
921 		 * The '+ 2' is for the null pointers at the end of each of
922 		 * the arg and env vector sets
923 		 */
924 		vectp = (u_int32_t *)(destp - (imgp->args->argc +
925 		    imgp->args->envc + 2) * sizeof(u_int32_t));
926 
927 	/*
928 	 * vectp also becomes our initial stack base
929 	 */
930 	stack_base = vectp;
931 
932 	stringp = imgp->args->begin_argv;
933 	argc = imgp->args->argc;
934 	envc = imgp->args->envc;
935 	/*
936 	 * Copy out strings - arguments and environment.
937 	 */
938 	copyout(stringp, destp, ARG_MAX - imgp->args->stringspace);
939 
940 	/*
941 	 * Fill in "ps_strings" struct for ps, w, etc.
942 	 */
943 	suword32(&arginfo->ps_argvstr, (uint32_t)(intptr_t)vectp);
944 	suword32(&arginfo->ps_nargvstr, argc);
945 
946 	/*
947 	 * Fill in argument portion of vector table.
948 	 */
949 	for (; argc > 0; --argc) {
950 		suword32(vectp++, (uint32_t)(intptr_t)destp);
951 		while (*stringp++ != 0)
952 			destp++;
953 		destp++;
954 	}
955 
956 	/* a null vector table pointer separates the argp's from the envp's */
957 	suword32(vectp++, 0);
958 
959 	suword32(&arginfo->ps_envstr, (uint32_t)(intptr_t)vectp);
960 	suword32(&arginfo->ps_nenvstr, envc);
961 
962 	/*
963 	 * Fill in environment portion of vector table.
964 	 */
965 	for (; envc > 0; --envc) {
966 		suword32(vectp++, (uint32_t)(intptr_t)destp);
967 		while (*stringp++ != 0)
968 			destp++;
969 		destp++;
970 	}
971 
972 	/* end of vector table is a null pointer */
973 	suword32(vectp, 0);
974 
975 	return ((register_t *)stack_base);
976 }
977 
978 SYSCTL_NODE(_compat, OID_AUTO, linux32, CTLFLAG_RW, 0,
979     "32-bit Linux emulation");
980 
981 static u_long	linux32_maxdsiz = LINUX32_MAXDSIZ;
982 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxdsiz, CTLFLAG_RW,
983     &linux32_maxdsiz, 0, "");
984 static u_long	linux32_maxssiz = LINUX32_MAXSSIZ;
985 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxssiz, CTLFLAG_RW,
986     &linux32_maxssiz, 0, "");
987 static u_long	linux32_maxvmem = LINUX32_MAXVMEM;
988 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxvmem, CTLFLAG_RW,
989     &linux32_maxvmem, 0, "");
990 
991 static void
992 linux32_fixlimit(struct rlimit *rl, int which)
993 {
994 
995 	switch (which) {
996 	case RLIMIT_DATA:
997 		if (linux32_maxdsiz != 0) {
998 			if (rl->rlim_cur > linux32_maxdsiz)
999 				rl->rlim_cur = linux32_maxdsiz;
1000 			if (rl->rlim_max > linux32_maxdsiz)
1001 				rl->rlim_max = linux32_maxdsiz;
1002 		}
1003 		break;
1004 	case RLIMIT_STACK:
1005 		if (linux32_maxssiz != 0) {
1006 			if (rl->rlim_cur > linux32_maxssiz)
1007 				rl->rlim_cur = linux32_maxssiz;
1008 			if (rl->rlim_max > linux32_maxssiz)
1009 				rl->rlim_max = linux32_maxssiz;
1010 		}
1011 		break;
1012 	case RLIMIT_VMEM:
1013 		if (linux32_maxvmem != 0) {
1014 			if (rl->rlim_cur > linux32_maxvmem)
1015 				rl->rlim_cur = linux32_maxvmem;
1016 			if (rl->rlim_max > linux32_maxvmem)
1017 				rl->rlim_max = linux32_maxvmem;
1018 		}
1019 		break;
1020 	}
1021 }
1022 
1023 struct sysentvec elf_linux_sysvec = {
1024 	.sv_size	= LINUX_SYS_MAXSYSCALL,
1025 	.sv_table	= linux_sysent,
1026 	.sv_mask	= 0,
1027 	.sv_sigsize	= LINUX_SIGTBLSZ,
1028 	.sv_sigtbl	= bsd_to_linux_signal,
1029 	.sv_errsize	= ELAST + 1,
1030 	.sv_errtbl	= bsd_to_linux_errno,
1031 	.sv_transtrap	= translate_traps,
1032 	.sv_fixup	= elf_linux_fixup,
1033 	.sv_sendsig	= linux_sendsig,
1034 	.sv_sigcode	= linux_sigcode,
1035 	.sv_szsigcode	= &linux_szsigcode,
1036 	.sv_prepsyscall	= linux_prepsyscall,
1037 	.sv_name	= "Linux ELF32",
1038 	.sv_coredump	= elf32_coredump,
1039 	.sv_imgact_try	= exec_linux_imgact_try,
1040 	.sv_minsigstksz	= LINUX_MINSIGSTKSZ,
1041 	.sv_pagesize	= PAGE_SIZE,
1042 	.sv_minuser	= VM_MIN_ADDRESS,
1043 	.sv_maxuser	= LINUX32_USRSTACK,
1044 	.sv_usrstack	= LINUX32_USRSTACK,
1045 	.sv_psstrings	= LINUX32_PS_STRINGS,
1046 	.sv_stackprot	= VM_PROT_ALL,
1047 	.sv_copyout_strings = linux_copyout_strings,
1048 	.sv_setregs	= exec_linux_setregs,
1049 	.sv_fixlimit	= linux32_fixlimit,
1050 	.sv_maxssiz	= &linux32_maxssiz,
1051 	.sv_flags	= SV_ABI_LINUX | SV_ILP32 | SV_IA32
1052 };
1053 
1054 static char GNULINUX_ABI_VENDOR[] = "GNU";
1055 
1056 static Elf_Brandnote linux32_brandnote = {
1057 	.hdr.n_namesz	= sizeof(GNULINUX_ABI_VENDOR),
1058 	.hdr.n_descsz	= 16,
1059 	.hdr.n_type	= 1,
1060 	.vendor		= GNULINUX_ABI_VENDOR,
1061 	.flags		= 0
1062 };
1063 
1064 static Elf32_Brandinfo linux_brand = {
1065 	.brand		= ELFOSABI_LINUX,
1066 	.machine	= EM_386,
1067 	.compat_3_brand	= "Linux",
1068 	.emul_path	= "/compat/linux",
1069 	.interp_path	= "/lib/ld-linux.so.1",
1070 	.sysvec		= &elf_linux_sysvec,
1071 	.interp_newpath	= NULL,
1072 	.brand_note	= &linux32_brandnote,
1073 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1074 };
1075 
1076 static Elf32_Brandinfo linux_glibc2brand = {
1077 	.brand		= ELFOSABI_LINUX,
1078 	.machine	= EM_386,
1079 	.compat_3_brand	= "Linux",
1080 	.emul_path	= "/compat/linux",
1081 	.interp_path	= "/lib/ld-linux.so.2",
1082 	.sysvec		= &elf_linux_sysvec,
1083 	.interp_newpath	= NULL,
1084 	.brand_note	= &linux32_brandnote,
1085 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1086 };
1087 
1088 Elf32_Brandinfo *linux_brandlist[] = {
1089 	&linux_brand,
1090 	&linux_glibc2brand,
1091 	NULL
1092 };
1093 
1094 static int
1095 linux_elf_modevent(module_t mod, int type, void *data)
1096 {
1097 	Elf32_Brandinfo **brandinfo;
1098 	int error;
1099 	struct linux_ioctl_handler **lihp;
1100 	struct linux_device_handler **ldhp;
1101 
1102 	error = 0;
1103 
1104 	switch(type) {
1105 	case MOD_LOAD:
1106 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1107 		     ++brandinfo)
1108 			if (elf32_insert_brand_entry(*brandinfo) < 0)
1109 				error = EINVAL;
1110 		if (error == 0) {
1111 			SET_FOREACH(lihp, linux_ioctl_handler_set)
1112 				linux_ioctl_register_handler(*lihp);
1113 			SET_FOREACH(ldhp, linux_device_handler_set)
1114 				linux_device_register_handler(*ldhp);
1115 			mtx_init(&emul_lock, "emuldata lock", NULL, MTX_DEF);
1116 			sx_init(&emul_shared_lock, "emuldata->shared lock");
1117 			LIST_INIT(&futex_list);
1118 			mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF);
1119 			linux_exit_tag = EVENTHANDLER_REGISTER(process_exit,
1120 			    linux_proc_exit, NULL, 1000);
1121 			linux_schedtail_tag = EVENTHANDLER_REGISTER(schedtail,
1122 			    linux_schedtail, NULL, 1000);
1123 			linux_exec_tag = EVENTHANDLER_REGISTER(process_exec,
1124 			    linux_proc_exec, NULL, 1000);
1125 			linux_szplatform = roundup(strlen(linux_platform) + 1,
1126 			    sizeof(char *));
1127 			if (bootverbose)
1128 				printf("Linux ELF exec handler installed\n");
1129 		} else
1130 			printf("cannot insert Linux ELF brand handler\n");
1131 		break;
1132 	case MOD_UNLOAD:
1133 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1134 		     ++brandinfo)
1135 			if (elf32_brand_inuse(*brandinfo))
1136 				error = EBUSY;
1137 		if (error == 0) {
1138 			for (brandinfo = &linux_brandlist[0];
1139 			     *brandinfo != NULL; ++brandinfo)
1140 				if (elf32_remove_brand_entry(*brandinfo) < 0)
1141 					error = EINVAL;
1142 		}
1143 		if (error == 0) {
1144 			SET_FOREACH(lihp, linux_ioctl_handler_set)
1145 				linux_ioctl_unregister_handler(*lihp);
1146 			SET_FOREACH(ldhp, linux_device_handler_set)
1147 				linux_device_unregister_handler(*ldhp);
1148 			mtx_destroy(&emul_lock);
1149 			sx_destroy(&emul_shared_lock);
1150 			mtx_destroy(&futex_mtx);
1151 			EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag);
1152 			EVENTHANDLER_DEREGISTER(schedtail, linux_schedtail_tag);
1153 			EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag);
1154 			if (bootverbose)
1155 				printf("Linux ELF exec handler removed\n");
1156 		} else
1157 			printf("Could not deinstall ELF interpreter entry\n");
1158 		break;
1159 	default:
1160 		return EOPNOTSUPP;
1161 	}
1162 	return error;
1163 }
1164 
1165 static moduledata_t linux_elf_mod = {
1166 	"linuxelf",
1167 	linux_elf_modevent,
1168 	0
1169 };
1170 
1171 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
1172