xref: /freebsd/sys/amd64/linux32/linux32_sysvec.c (revision 79262bf1f010ea03a26c9f39ff80a5fe7bcbc264)
1 /*-
2  * Copyright (c) 2004 Tim J. Robbins
3  * Copyright (c) 2003 Peter Wemm
4  * Copyright (c) 2002 Doug Rabson
5  * Copyright (c) 1998-1999 Andrew Gallatin
6  * Copyright (c) 1994-1996 S�ren Schmidt
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer
14  *    in this position and unchanged.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. The name of the author may not be used to endorse or promote products
19  *    derived from this software without specific prior written permission
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 #include <sys/cdefs.h>
34 __FBSDID("$FreeBSD$");
35 #include "opt_compat.h"
36 
37 #ifndef COMPAT_IA32
38 #error "Unable to compile Linux-emulator due to missing COMPAT_IA32 option!"
39 #endif
40 
41 #define	__ELF_WORD_SIZE	32
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/exec.h>
46 #include <sys/fcntl.h>
47 #include <sys/imgact.h>
48 #include <sys/imgact_elf.h>
49 #include <sys/kernel.h>
50 #include <sys/lock.h>
51 #include <sys/malloc.h>
52 #include <sys/module.h>
53 #include <sys/mutex.h>
54 #include <sys/proc.h>
55 #include <sys/resourcevar.h>
56 #include <sys/signalvar.h>
57 #include <sys/sysctl.h>
58 #include <sys/syscallsubr.h>
59 #include <sys/sysent.h>
60 #include <sys/sysproto.h>
61 #include <sys/vnode.h>
62 #include <sys/eventhandler.h>
63 
64 #include <vm/vm.h>
65 #include <vm/pmap.h>
66 #include <vm/vm_extern.h>
67 #include <vm/vm_map.h>
68 #include <vm/vm_object.h>
69 #include <vm/vm_page.h>
70 #include <vm/vm_param.h>
71 
72 #include <machine/cpu.h>
73 #include <machine/md_var.h>
74 #include <machine/pcb.h>
75 #include <machine/specialreg.h>
76 
77 #include <amd64/linux32/linux.h>
78 #include <amd64/linux32/linux32_proto.h>
79 #include <compat/linux/linux_emul.h>
80 #include <compat/linux/linux_mib.h>
81 #include <compat/linux/linux_misc.h>
82 #include <compat/linux/linux_signal.h>
83 #include <compat/linux/linux_util.h>
84 
85 MODULE_VERSION(linux, 1);
86 
87 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
88 
89 #define	AUXARGS_ENTRY_32(pos, id, val)	\
90 	do {				\
91 		suword32(pos++, id);	\
92 		suword32(pos++, val);	\
93 	} while (0)
94 
95 #if BYTE_ORDER == LITTLE_ENDIAN
96 #define SHELLMAGIC      0x2123 /* #! */
97 #else
98 #define SHELLMAGIC      0x2321
99 #endif
100 
101 /*
102  * Allow the sendsig functions to use the ldebug() facility
103  * even though they are not syscalls themselves. Map them
104  * to syscall 0. This is slightly less bogus than using
105  * ldebug(sigreturn).
106  */
107 #define	LINUX_SYS_linux_rt_sendsig	0
108 #define	LINUX_SYS_linux_sendsig		0
109 
110 const char *linux_platform = "i686";
111 static int linux_szplatform;
112 extern char linux_sigcode[];
113 extern int linux_szsigcode;
114 
115 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
116 
117 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
118 SET_DECLARE(linux_device_handler_set, struct linux_device_handler);
119 
120 static int	elf_linux_fixup(register_t **stack_base,
121 		    struct image_params *iparams);
122 static register_t *linux_copyout_strings(struct image_params *imgp);
123 static void	linux_prepsyscall(struct trapframe *tf, int *args, u_int *code,
124 		    caddr_t *params);
125 static void     linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
126 static void	exec_linux_setregs(struct thread *td, u_long entry,
127 				   u_long stack, u_long ps_strings);
128 static void	linux32_fixlimit(struct rlimit *rl, int which);
129 
130 extern LIST_HEAD(futex_list, futex) futex_list;
131 extern struct mtx futex_mtx;
132 
133 static eventhandler_tag linux_exit_tag;
134 static eventhandler_tag linux_schedtail_tag;
135 static eventhandler_tag linux_exec_tag;
136 
137 /*
138  * Linux syscalls return negative errno's, we do positive and map them
139  * Reference:
140  *   FreeBSD: src/sys/sys/errno.h
141  *   Linux:   linux-2.6.17.8/include/asm-generic/errno-base.h
142  *            linux-2.6.17.8/include/asm-generic/errno.h
143  */
144 static int bsd_to_linux_errno[ELAST + 1] = {
145 	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
146 	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
147 	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
148 	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
149 	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
150 	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
151 	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
152 	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
153 	  -6,  -6, -43, -42, -75,-125, -84, -95, -16, -74,
154 	 -72, -67, -71
155 };
156 
157 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
158 	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
159 	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
160 	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
161 	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
162 	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
163 	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
164 	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
165 	0, LINUX_SIGUSR1, LINUX_SIGUSR2
166 };
167 
168 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
169 	SIGHUP, SIGINT, SIGQUIT, SIGILL,
170 	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
171 	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
172 	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
173 	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
174 	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
175 	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
176 	SIGIO, SIGURG, SIGSYS
177 };
178 
179 #define LINUX_T_UNKNOWN  255
180 static int _bsd_to_linux_trapcode[] = {
181 	LINUX_T_UNKNOWN,	/* 0 */
182 	6,			/* 1  T_PRIVINFLT */
183 	LINUX_T_UNKNOWN,	/* 2 */
184 	3,			/* 3  T_BPTFLT */
185 	LINUX_T_UNKNOWN,	/* 4 */
186 	LINUX_T_UNKNOWN,	/* 5 */
187 	16,			/* 6  T_ARITHTRAP */
188 	254,			/* 7  T_ASTFLT */
189 	LINUX_T_UNKNOWN,	/* 8 */
190 	13,			/* 9  T_PROTFLT */
191 	1,			/* 10 T_TRCTRAP */
192 	LINUX_T_UNKNOWN,	/* 11 */
193 	14,			/* 12 T_PAGEFLT */
194 	LINUX_T_UNKNOWN,	/* 13 */
195 	17,			/* 14 T_ALIGNFLT */
196 	LINUX_T_UNKNOWN,	/* 15 */
197 	LINUX_T_UNKNOWN,	/* 16 */
198 	LINUX_T_UNKNOWN,	/* 17 */
199 	0,			/* 18 T_DIVIDE */
200 	2,			/* 19 T_NMI */
201 	4,			/* 20 T_OFLOW */
202 	5,			/* 21 T_BOUND */
203 	7,			/* 22 T_DNA */
204 	8,			/* 23 T_DOUBLEFLT */
205 	9,			/* 24 T_FPOPFLT */
206 	10,			/* 25 T_TSSFLT */
207 	11,			/* 26 T_SEGNPFLT */
208 	12,			/* 27 T_STKFLT */
209 	18,			/* 28 T_MCHK */
210 	19,			/* 29 T_XMMFLT */
211 	15			/* 30 T_RESERVED */
212 };
213 #define bsd_to_linux_trapcode(code) \
214     ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
215      _bsd_to_linux_trapcode[(code)]: \
216      LINUX_T_UNKNOWN)
217 
218 struct linux32_ps_strings {
219 	u_int32_t ps_argvstr;	/* first of 0 or more argument strings */
220 	u_int ps_nargvstr;	/* the number of argument strings */
221 	u_int32_t ps_envstr;	/* first of 0 or more environment strings */
222 	u_int ps_nenvstr;	/* the number of environment strings */
223 };
224 
225 /*
226  * If FreeBSD & Linux have a difference of opinion about what a trap
227  * means, deal with it here.
228  *
229  * MPSAFE
230  */
231 static int
232 translate_traps(int signal, int trap_code)
233 {
234 	if (signal != SIGBUS)
235 		return signal;
236 	switch (trap_code) {
237 	case T_PROTFLT:
238 	case T_TSSFLT:
239 	case T_DOUBLEFLT:
240 	case T_PAGEFLT:
241 		return SIGSEGV;
242 	default:
243 		return signal;
244 	}
245 }
246 
247 static int
248 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
249 {
250 	Elf32_Auxargs *args;
251 	Elf32_Addr *base;
252 	Elf32_Addr *pos, *uplatform;
253 	struct linux32_ps_strings *arginfo;
254 
255 	arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS;
256 	uplatform = (Elf32_Addr *)((caddr_t)arginfo - linux_szsigcode -
257 	    linux_szplatform);
258 
259 	KASSERT(curthread->td_proc == imgp->proc,
260 	    ("unsafe elf_linux_fixup(), should be curproc"));
261 	base = (Elf32_Addr *)*stack_base;
262 	args = (Elf32_Auxargs *)imgp->auxargs;
263 	pos = base + (imgp->args->argc + imgp->args->envc + 2);
264 
265 	AUXARGS_ENTRY_32(pos, LINUX_AT_HWCAP, cpu_feature);
266 	AUXARGS_ENTRY_32(pos, LINUX_AT_CLKTCK, hz);
267 	AUXARGS_ENTRY_32(pos, AT_PHDR, args->phdr);
268 	AUXARGS_ENTRY_32(pos, AT_PHENT, args->phent);
269 	AUXARGS_ENTRY_32(pos, AT_PHNUM, args->phnum);
270 	AUXARGS_ENTRY_32(pos, AT_PAGESZ, args->pagesz);
271 	AUXARGS_ENTRY_32(pos, AT_FLAGS, args->flags);
272 	AUXARGS_ENTRY_32(pos, AT_ENTRY, args->entry);
273 	AUXARGS_ENTRY_32(pos, AT_BASE, args->base);
274 	AUXARGS_ENTRY_32(pos, LINUX_AT_SECURE, 0);
275 	AUXARGS_ENTRY_32(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
276 	AUXARGS_ENTRY_32(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
277 	AUXARGS_ENTRY_32(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
278 	AUXARGS_ENTRY_32(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
279 	AUXARGS_ENTRY_32(pos, LINUX_AT_PLATFORM, PTROUT(uplatform));
280 	if (args->execfd != -1)
281 		AUXARGS_ENTRY_32(pos, AT_EXECFD, args->execfd);
282 	AUXARGS_ENTRY_32(pos, AT_NULL, 0);
283 
284 	free(imgp->auxargs, M_TEMP);
285 	imgp->auxargs = NULL;
286 
287 	base--;
288 	suword32(base, (uint32_t)imgp->args->argc);
289 	*stack_base = (register_t *)base;
290 	return 0;
291 }
292 
293 extern unsigned long linux_sznonrtsigcode;
294 
295 static void
296 linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
297 {
298 	struct thread *td = curthread;
299 	struct proc *p = td->td_proc;
300 	struct sigacts *psp;
301 	struct trapframe *regs;
302 	struct l_rt_sigframe *fp, frame;
303 	int oonstack;
304 	int sig;
305 	int code;
306 
307 	sig = ksi->ksi_signo;
308 	code = ksi->ksi_code;
309 	PROC_LOCK_ASSERT(p, MA_OWNED);
310 	psp = p->p_sigacts;
311 	mtx_assert(&psp->ps_mtx, MA_OWNED);
312 	regs = td->td_frame;
313 	oonstack = sigonstack(regs->tf_rsp);
314 
315 #ifdef DEBUG
316 	if (ldebug(rt_sendsig))
317 		printf(ARGS(rt_sendsig, "%p, %d, %p, %u"),
318 		    catcher, sig, (void*)mask, code);
319 #endif
320 	/*
321 	 * Allocate space for the signal handler context.
322 	 */
323 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
324 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
325 		fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
326 		    td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
327 	} else
328 		fp = (struct l_rt_sigframe *)regs->tf_rsp - 1;
329 	mtx_unlock(&psp->ps_mtx);
330 
331 	/*
332 	 * Build the argument list for the signal handler.
333 	 */
334 	if (p->p_sysent->sv_sigtbl)
335 		if (sig <= p->p_sysent->sv_sigsize)
336 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
337 
338 	bzero(&frame, sizeof(frame));
339 
340 	frame.sf_handler = PTROUT(catcher);
341 	frame.sf_sig = sig;
342 	frame.sf_siginfo = PTROUT(&fp->sf_si);
343 	frame.sf_ucontext = PTROUT(&fp->sf_sc);
344 
345 	/* Fill in POSIX parts */
346 	ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig);
347 
348 	/*
349 	 * Build the signal context to be used by sigreturn.
350 	 */
351 	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
352 	frame.sf_sc.uc_link = 0;		/* XXX ??? */
353 
354 	frame.sf_sc.uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp);
355 	frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
356 	frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
357 	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
358 	PROC_UNLOCK(p);
359 
360 	bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
361 
362 	frame.sf_sc.uc_mcontext.sc_mask   = frame.sf_sc.uc_sigmask.__bits[0];
363 	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_rdi;
364 	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_rsi;
365 	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_rbp;
366 	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_rbx;
367 	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_rdx;
368 	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_rcx;
369 	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_rax;
370 	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_rip;
371 	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
372 	frame.sf_sc.uc_mcontext.sc_gs     = regs->tf_gs;
373 	frame.sf_sc.uc_mcontext.sc_fs     = regs->tf_fs;
374 	frame.sf_sc.uc_mcontext.sc_es     = regs->tf_es;
375 	frame.sf_sc.uc_mcontext.sc_ds     = regs->tf_ds;
376 	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_rflags;
377 	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_rsp;
378 	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
379 	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
380 	frame.sf_sc.uc_mcontext.sc_cr2    = (u_int32_t)(uintptr_t)ksi->ksi_addr;
381 	frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
382 
383 #ifdef DEBUG
384 	if (ldebug(rt_sendsig))
385 		printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
386 		    frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
387 		    td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
388 #endif
389 
390 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
391 		/*
392 		 * Process has trashed its stack; give it an illegal
393 		 * instruction to halt it in its tracks.
394 		 */
395 #ifdef DEBUG
396 		if (ldebug(rt_sendsig))
397 			printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
398 			    fp, oonstack);
399 #endif
400 		PROC_LOCK(p);
401 		sigexit(td, SIGILL);
402 	}
403 
404 	/*
405 	 * Build context to run handler in.
406 	 */
407 	regs->tf_rsp = PTROUT(fp);
408 	regs->tf_rip = LINUX32_PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
409 	    linux_sznonrtsigcode;
410 	regs->tf_rflags &= ~(PSL_T | PSL_D);
411 	regs->tf_cs = _ucode32sel;
412 	regs->tf_ss = _udatasel;
413 	regs->tf_ds = _udatasel;
414 	regs->tf_es = _udatasel;
415 	regs->tf_fs = _ufssel;
416 	regs->tf_gs = _ugssel;
417 	regs->tf_flags = TF_HASSEGS;
418 	PROC_LOCK(p);
419 	mtx_lock(&psp->ps_mtx);
420 }
421 
422 
423 /*
424  * Send an interrupt to process.
425  *
426  * Stack is set up to allow sigcode stored
427  * in u. to call routine, followed by kcall
428  * to sigreturn routine below.  After sigreturn
429  * resets the signal mask, the stack, and the
430  * frame pointer, it returns to the user
431  * specified pc, psl.
432  */
433 static void
434 linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
435 {
436 	struct thread *td = curthread;
437 	struct proc *p = td->td_proc;
438 	struct sigacts *psp;
439 	struct trapframe *regs;
440 	struct l_sigframe *fp, frame;
441 	l_sigset_t lmask;
442 	int oonstack, i;
443 	int sig, code;
444 
445 	sig = ksi->ksi_signo;
446 	code = ksi->ksi_code;
447 	PROC_LOCK_ASSERT(p, MA_OWNED);
448 	psp = p->p_sigacts;
449 	mtx_assert(&psp->ps_mtx, MA_OWNED);
450 	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
451 		/* Signal handler installed with SA_SIGINFO. */
452 		linux_rt_sendsig(catcher, ksi, mask);
453 		return;
454 	}
455 
456 	regs = td->td_frame;
457 	oonstack = sigonstack(regs->tf_rsp);
458 
459 #ifdef DEBUG
460 	if (ldebug(sendsig))
461 		printf(ARGS(sendsig, "%p, %d, %p, %u"),
462 		    catcher, sig, (void*)mask, code);
463 #endif
464 
465 	/*
466 	 * Allocate space for the signal handler context.
467 	 */
468 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
469 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
470 		fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
471 		    td->td_sigstk.ss_size - sizeof(struct l_sigframe));
472 	} else
473 		fp = (struct l_sigframe *)regs->tf_rsp - 1;
474 	mtx_unlock(&psp->ps_mtx);
475 	PROC_UNLOCK(p);
476 
477 	/*
478 	 * Build the argument list for the signal handler.
479 	 */
480 	if (p->p_sysent->sv_sigtbl)
481 		if (sig <= p->p_sysent->sv_sigsize)
482 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
483 
484 	bzero(&frame, sizeof(frame));
485 
486 	frame.sf_handler = PTROUT(catcher);
487 	frame.sf_sig = sig;
488 
489 	bsd_to_linux_sigset(mask, &lmask);
490 
491 	/*
492 	 * Build the signal context to be used by sigreturn.
493 	 */
494 	frame.sf_sc.sc_mask   = lmask.__bits[0];
495 	frame.sf_sc.sc_gs     = regs->tf_gs;
496 	frame.sf_sc.sc_fs     = regs->tf_fs;
497 	frame.sf_sc.sc_es     = regs->tf_es;
498 	frame.sf_sc.sc_ds     = regs->tf_ds;
499 	frame.sf_sc.sc_edi    = regs->tf_rdi;
500 	frame.sf_sc.sc_esi    = regs->tf_rsi;
501 	frame.sf_sc.sc_ebp    = regs->tf_rbp;
502 	frame.sf_sc.sc_ebx    = regs->tf_rbx;
503 	frame.sf_sc.sc_edx    = regs->tf_rdx;
504 	frame.sf_sc.sc_ecx    = regs->tf_rcx;
505 	frame.sf_sc.sc_eax    = regs->tf_rax;
506 	frame.sf_sc.sc_eip    = regs->tf_rip;
507 	frame.sf_sc.sc_cs     = regs->tf_cs;
508 	frame.sf_sc.sc_eflags = regs->tf_rflags;
509 	frame.sf_sc.sc_esp_at_signal = regs->tf_rsp;
510 	frame.sf_sc.sc_ss     = regs->tf_ss;
511 	frame.sf_sc.sc_err    = regs->tf_err;
512 	frame.sf_sc.sc_cr2    = (u_int32_t)(uintptr_t)ksi->ksi_addr;
513 	frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code);
514 
515 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
516 		frame.sf_extramask[i] = lmask.__bits[i+1];
517 
518 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
519 		/*
520 		 * Process has trashed its stack; give it an illegal
521 		 * instruction to halt it in its tracks.
522 		 */
523 		PROC_LOCK(p);
524 		sigexit(td, SIGILL);
525 	}
526 
527 	/*
528 	 * Build context to run handler in.
529 	 */
530 	regs->tf_rsp = PTROUT(fp);
531 	regs->tf_rip = LINUX32_PS_STRINGS - *(p->p_sysent->sv_szsigcode);
532 	regs->tf_rflags &= ~(PSL_T | PSL_D);
533 	regs->tf_cs = _ucode32sel;
534 	regs->tf_ss = _udatasel;
535 	regs->tf_ds = _udatasel;
536 	regs->tf_es = _udatasel;
537 	regs->tf_fs = _ufssel;
538 	regs->tf_gs = _ugssel;
539 	regs->tf_flags = TF_HASSEGS;
540 	PROC_LOCK(p);
541 	mtx_lock(&psp->ps_mtx);
542 }
543 
544 /*
545  * System call to cleanup state after a signal
546  * has been taken.  Reset signal mask and
547  * stack state from context left by sendsig (above).
548  * Return to previous pc and psl as specified by
549  * context left by sendsig. Check carefully to
550  * make sure that the user has not modified the
551  * psl to gain improper privileges or to cause
552  * a machine fault.
553  */
554 int
555 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
556 {
557 	struct proc *p = td->td_proc;
558 	struct l_sigframe frame;
559 	struct trapframe *regs;
560 	l_sigset_t lmask;
561 	int eflags, i;
562 	ksiginfo_t ksi;
563 
564 	regs = td->td_frame;
565 
566 #ifdef DEBUG
567 	if (ldebug(sigreturn))
568 		printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
569 #endif
570 	/*
571 	 * The trampoline code hands us the sigframe.
572 	 * It is unsafe to keep track of it ourselves, in the event that a
573 	 * program jumps out of a signal handler.
574 	 */
575 	if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
576 		return (EFAULT);
577 
578 	/*
579 	 * Check for security violations.
580 	 */
581 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
582 	eflags = frame.sf_sc.sc_eflags;
583 	/*
584 	 * XXX do allow users to change the privileged flag PSL_RF.  The
585 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
586 	 * sometimes set it there too.  tf_eflags is kept in the signal
587 	 * context during signal handling and there is no other place
588 	 * to remember it, so the PSL_RF bit may be corrupted by the
589 	 * signal handler without us knowing.  Corruption of the PSL_RF
590 	 * bit at worst causes one more or one less debugger trap, so
591 	 * allowing it is fairly harmless.
592 	 */
593 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF))
594 		return(EINVAL);
595 
596 	/*
597 	 * Don't allow users to load a valid privileged %cs.  Let the
598 	 * hardware check for invalid selectors, excess privilege in
599 	 * other selectors, invalid %eip's and invalid %esp's.
600 	 */
601 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
602 	if (!CS_SECURE(frame.sf_sc.sc_cs)) {
603 		ksiginfo_init_trap(&ksi);
604 		ksi.ksi_signo = SIGBUS;
605 		ksi.ksi_code = BUS_OBJERR;
606 		ksi.ksi_trapno = T_PROTFLT;
607 		ksi.ksi_addr = (void *)regs->tf_rip;
608 		trapsignal(td, &ksi);
609 		return(EINVAL);
610 	}
611 
612 	lmask.__bits[0] = frame.sf_sc.sc_mask;
613 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
614 		lmask.__bits[i+1] = frame.sf_extramask[i];
615 	PROC_LOCK(p);
616 	linux_to_bsd_sigset(&lmask, &td->td_sigmask);
617 	SIG_CANTMASK(td->td_sigmask);
618 	signotify(td);
619 	PROC_UNLOCK(p);
620 
621 	/*
622 	 * Restore signal context.
623 	 */
624 	regs->tf_rdi    = frame.sf_sc.sc_edi;
625 	regs->tf_rsi    = frame.sf_sc.sc_esi;
626 	regs->tf_rbp    = frame.sf_sc.sc_ebp;
627 	regs->tf_rbx    = frame.sf_sc.sc_ebx;
628 	regs->tf_rdx    = frame.sf_sc.sc_edx;
629 	regs->tf_rcx    = frame.sf_sc.sc_ecx;
630 	regs->tf_rax    = frame.sf_sc.sc_eax;
631 	regs->tf_rip    = frame.sf_sc.sc_eip;
632 	regs->tf_cs     = frame.sf_sc.sc_cs;
633 	regs->tf_ds     = frame.sf_sc.sc_ds;
634 	regs->tf_es     = frame.sf_sc.sc_es;
635 	regs->tf_fs     = frame.sf_sc.sc_fs;
636 	regs->tf_gs     = frame.sf_sc.sc_gs;
637 	regs->tf_rflags = eflags;
638 	regs->tf_rsp    = frame.sf_sc.sc_esp_at_signal;
639 	regs->tf_ss     = frame.sf_sc.sc_ss;
640 
641 	return (EJUSTRETURN);
642 }
643 
644 /*
645  * System call to cleanup state after a signal
646  * has been taken.  Reset signal mask and
647  * stack state from context left by rt_sendsig (above).
648  * Return to previous pc and psl as specified by
649  * context left by sendsig. Check carefully to
650  * make sure that the user has not modified the
651  * psl to gain improper privileges or to cause
652  * a machine fault.
653  */
654 int
655 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
656 {
657 	struct proc *p = td->td_proc;
658 	struct l_ucontext uc;
659 	struct l_sigcontext *context;
660 	l_stack_t *lss;
661 	stack_t ss;
662 	struct trapframe *regs;
663 	int eflags;
664 	ksiginfo_t ksi;
665 
666 	regs = td->td_frame;
667 
668 #ifdef DEBUG
669 	if (ldebug(rt_sigreturn))
670 		printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
671 #endif
672 	/*
673 	 * The trampoline code hands us the ucontext.
674 	 * It is unsafe to keep track of it ourselves, in the event that a
675 	 * program jumps out of a signal handler.
676 	 */
677 	if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
678 		return (EFAULT);
679 
680 	context = &uc.uc_mcontext;
681 
682 	/*
683 	 * Check for security violations.
684 	 */
685 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
686 	eflags = context->sc_eflags;
687 	/*
688 	 * XXX do allow users to change the privileged flag PSL_RF.  The
689 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
690 	 * sometimes set it there too.  tf_eflags is kept in the signal
691 	 * context during signal handling and there is no other place
692 	 * to remember it, so the PSL_RF bit may be corrupted by the
693 	 * signal handler without us knowing.  Corruption of the PSL_RF
694 	 * bit at worst causes one more or one less debugger trap, so
695 	 * allowing it is fairly harmless.
696 	 */
697 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF))
698 		return(EINVAL);
699 
700 	/*
701 	 * Don't allow users to load a valid privileged %cs.  Let the
702 	 * hardware check for invalid selectors, excess privilege in
703 	 * other selectors, invalid %eip's and invalid %esp's.
704 	 */
705 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
706 	if (!CS_SECURE(context->sc_cs)) {
707 		ksiginfo_init_trap(&ksi);
708 		ksi.ksi_signo = SIGBUS;
709 		ksi.ksi_code = BUS_OBJERR;
710 		ksi.ksi_trapno = T_PROTFLT;
711 		ksi.ksi_addr = (void *)regs->tf_rip;
712 		trapsignal(td, &ksi);
713 		return(EINVAL);
714 	}
715 
716 	PROC_LOCK(p);
717 	linux_to_bsd_sigset(&uc.uc_sigmask, &td->td_sigmask);
718 	SIG_CANTMASK(td->td_sigmask);
719 	signotify(td);
720 	PROC_UNLOCK(p);
721 
722 	/*
723 	 * Restore signal context
724 	 */
725 	regs->tf_gs	= context->sc_gs;
726 	regs->tf_fs	= context->sc_fs;
727 	regs->tf_es	= context->sc_es;
728 	regs->tf_ds	= context->sc_ds;
729 	regs->tf_rdi    = context->sc_edi;
730 	regs->tf_rsi    = context->sc_esi;
731 	regs->tf_rbp    = context->sc_ebp;
732 	regs->tf_rbx    = context->sc_ebx;
733 	regs->tf_rdx    = context->sc_edx;
734 	regs->tf_rcx    = context->sc_ecx;
735 	regs->tf_rax    = context->sc_eax;
736 	regs->tf_rip    = context->sc_eip;
737 	regs->tf_cs     = context->sc_cs;
738 	regs->tf_rflags = eflags;
739 	regs->tf_rsp    = context->sc_esp_at_signal;
740 	regs->tf_ss     = context->sc_ss;
741 
742 	/*
743 	 * call sigaltstack & ignore results..
744 	 */
745 	lss = &uc.uc_stack;
746 	ss.ss_sp = PTRIN(lss->ss_sp);
747 	ss.ss_size = lss->ss_size;
748 	ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
749 
750 #ifdef DEBUG
751 	if (ldebug(rt_sigreturn))
752 		printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
753 		    ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
754 #endif
755 	(void)kern_sigaltstack(td, &ss, NULL);
756 
757 	return (EJUSTRETURN);
758 }
759 
760 /*
761  * MPSAFE
762  */
763 static void
764 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
765 {
766 	args[0] = tf->tf_rbx;
767 	args[1] = tf->tf_rcx;
768 	args[2] = tf->tf_rdx;
769 	args[3] = tf->tf_rsi;
770 	args[4] = tf->tf_rdi;
771 	args[5] = tf->tf_rbp;	/* Unconfirmed */
772 	*params = NULL;		/* no copyin */
773 }
774 
775 /*
776  * If a linux binary is exec'ing something, try this image activator
777  * first.  We override standard shell script execution in order to
778  * be able to modify the interpreter path.  We only do this if a linux
779  * binary is doing the exec, so we do not create an EXEC module for it.
780  */
781 static int	exec_linux_imgact_try(struct image_params *iparams);
782 
783 static int
784 exec_linux_imgact_try(struct image_params *imgp)
785 {
786 	const char *head = (const char *)imgp->image_header;
787 	char *rpath;
788 	int error = -1, len;
789 
790 	/*
791 	* The interpreter for shell scripts run from a linux binary needs
792 	* to be located in /compat/linux if possible in order to recursively
793 	* maintain linux path emulation.
794 	*/
795 	if (((const short *)head)[0] == SHELLMAGIC) {
796 		/*
797 		* Run our normal shell image activator.  If it succeeds attempt
798 		* to use the alternate path for the interpreter.  If an
799 		* alternate * path is found, use our stringspace to store it.
800 		*/
801 		if ((error = exec_shell_imgact(imgp)) == 0) {
802 			linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
803 			    imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0,
804 			    AT_FDCWD);
805 			if (rpath != NULL) {
806 				len = strlen(rpath) + 1;
807 
808 				if (len <= MAXSHELLCMDLEN) {
809 					memcpy(imgp->interpreter_name, rpath,
810 					    len);
811 				}
812 				free(rpath, M_TEMP);
813 			}
814 		}
815 	}
816 	return(error);
817 }
818 
819 /*
820  * Clear registers on exec
821  * XXX copied from ia32_signal.c.
822  */
823 static void
824 exec_linux_setregs(td, entry, stack, ps_strings)
825 	struct thread *td;
826 	u_long entry;
827 	u_long stack;
828 	u_long ps_strings;
829 {
830 	struct trapframe *regs = td->td_frame;
831 	struct pcb *pcb = td->td_pcb;
832 
833 	mtx_lock(&dt_lock);
834 	if (td->td_proc->p_md.md_ldt != NULL)
835 		user_ldt_free(td);
836 	else
837 		mtx_unlock(&dt_lock);
838 
839 	critical_enter();
840 	wrmsr(MSR_FSBASE, 0);
841 	wrmsr(MSR_KGSBASE, 0);	/* User value while we're in the kernel */
842 	pcb->pcb_fsbase = 0;
843 	pcb->pcb_gsbase = 0;
844 	critical_exit();
845 	pcb->pcb_initial_fpucw = __LINUX_NPXCW__;
846 
847 	bzero((char *)regs, sizeof(struct trapframe));
848 	regs->tf_rip = entry;
849 	regs->tf_rsp = stack;
850 	regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T);
851 	regs->tf_gs = _ugssel;
852 	regs->tf_fs = _ufssel;
853 	regs->tf_es = _udatasel;
854 	regs->tf_ds = _udatasel;
855 	regs->tf_ss = _udatasel;
856 	regs->tf_flags = TF_HASSEGS;
857 	regs->tf_cs = _ucode32sel;
858 	regs->tf_rbx = ps_strings;
859 	load_cr0(rcr0() | CR0_MP | CR0_TS);
860 	fpstate_drop(td);
861 
862 	/* Return via doreti so that we can change to a different %cs */
863 	pcb->pcb_flags |= PCB_FULLCTX | PCB_32BIT;
864 	pcb->pcb_flags &= ~PCB_GS32BIT;
865 	td->td_retval[1] = 0;
866 }
867 
868 /*
869  * XXX copied from ia32_sysvec.c.
870  */
871 static register_t *
872 linux_copyout_strings(struct image_params *imgp)
873 {
874 	int argc, envc;
875 	u_int32_t *vectp;
876 	char *stringp, *destp;
877 	u_int32_t *stack_base;
878 	struct linux32_ps_strings *arginfo;
879 
880 	/*
881 	 * Calculate string base and vector table pointers.
882 	 * Also deal with signal trampoline code for this exec type.
883 	 */
884 	arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS;
885 	destp =	(caddr_t)arginfo - linux_szsigcode - SPARE_USRSPACE -
886 	    linux_szplatform - roundup((ARG_MAX - imgp->args->stringspace),
887 	    sizeof(char *));
888 
889 	/*
890 	 * install sigcode
891 	 */
892 	copyout(imgp->proc->p_sysent->sv_sigcode,
893 	    ((caddr_t)arginfo - linux_szsigcode), linux_szsigcode);
894 
895 	/*
896 	 * Install LINUX_PLATFORM
897 	 */
898 	copyout(linux_platform, ((caddr_t)arginfo - linux_szsigcode -
899 	    linux_szplatform), linux_szplatform);
900 
901 	/*
902 	 * If we have a valid auxargs ptr, prepare some room
903 	 * on the stack.
904 	 */
905 	if (imgp->auxargs) {
906 		/*
907 		 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
908 		 * lower compatibility.
909 		 */
910 		imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size :
911 		    (LINUX_AT_COUNT * 2);
912 		/*
913 		 * The '+ 2' is for the null pointers at the end of each of
914 		 * the arg and env vector sets,and imgp->auxarg_size is room
915 		 * for argument of Runtime loader.
916 		 */
917 		vectp = (u_int32_t *) (destp - (imgp->args->argc +
918 		    imgp->args->envc + 2 + imgp->auxarg_size) *
919 		    sizeof(u_int32_t));
920 
921 	} else
922 		/*
923 		 * The '+ 2' is for the null pointers at the end of each of
924 		 * the arg and env vector sets
925 		 */
926 		vectp = (u_int32_t *)(destp - (imgp->args->argc +
927 		    imgp->args->envc + 2) * sizeof(u_int32_t));
928 
929 	/*
930 	 * vectp also becomes our initial stack base
931 	 */
932 	stack_base = vectp;
933 
934 	stringp = imgp->args->begin_argv;
935 	argc = imgp->args->argc;
936 	envc = imgp->args->envc;
937 	/*
938 	 * Copy out strings - arguments and environment.
939 	 */
940 	copyout(stringp, destp, ARG_MAX - imgp->args->stringspace);
941 
942 	/*
943 	 * Fill in "ps_strings" struct for ps, w, etc.
944 	 */
945 	suword32(&arginfo->ps_argvstr, (uint32_t)(intptr_t)vectp);
946 	suword32(&arginfo->ps_nargvstr, argc);
947 
948 	/*
949 	 * Fill in argument portion of vector table.
950 	 */
951 	for (; argc > 0; --argc) {
952 		suword32(vectp++, (uint32_t)(intptr_t)destp);
953 		while (*stringp++ != 0)
954 			destp++;
955 		destp++;
956 	}
957 
958 	/* a null vector table pointer separates the argp's from the envp's */
959 	suword32(vectp++, 0);
960 
961 	suword32(&arginfo->ps_envstr, (uint32_t)(intptr_t)vectp);
962 	suword32(&arginfo->ps_nenvstr, envc);
963 
964 	/*
965 	 * Fill in environment portion of vector table.
966 	 */
967 	for (; envc > 0; --envc) {
968 		suword32(vectp++, (uint32_t)(intptr_t)destp);
969 		while (*stringp++ != 0)
970 			destp++;
971 		destp++;
972 	}
973 
974 	/* end of vector table is a null pointer */
975 	suword32(vectp, 0);
976 
977 	return ((register_t *)stack_base);
978 }
979 
980 SYSCTL_NODE(_compat, OID_AUTO, linux32, CTLFLAG_RW, 0,
981     "32-bit Linux emulation");
982 
983 static u_long	linux32_maxdsiz = LINUX32_MAXDSIZ;
984 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxdsiz, CTLFLAG_RW,
985     &linux32_maxdsiz, 0, "");
986 static u_long	linux32_maxssiz = LINUX32_MAXSSIZ;
987 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxssiz, CTLFLAG_RW,
988     &linux32_maxssiz, 0, "");
989 static u_long	linux32_maxvmem = LINUX32_MAXVMEM;
990 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxvmem, CTLFLAG_RW,
991     &linux32_maxvmem, 0, "");
992 
993 static void
994 linux32_fixlimit(struct rlimit *rl, int which)
995 {
996 
997 	switch (which) {
998 	case RLIMIT_DATA:
999 		if (linux32_maxdsiz != 0) {
1000 			if (rl->rlim_cur > linux32_maxdsiz)
1001 				rl->rlim_cur = linux32_maxdsiz;
1002 			if (rl->rlim_max > linux32_maxdsiz)
1003 				rl->rlim_max = linux32_maxdsiz;
1004 		}
1005 		break;
1006 	case RLIMIT_STACK:
1007 		if (linux32_maxssiz != 0) {
1008 			if (rl->rlim_cur > linux32_maxssiz)
1009 				rl->rlim_cur = linux32_maxssiz;
1010 			if (rl->rlim_max > linux32_maxssiz)
1011 				rl->rlim_max = linux32_maxssiz;
1012 		}
1013 		break;
1014 	case RLIMIT_VMEM:
1015 		if (linux32_maxvmem != 0) {
1016 			if (rl->rlim_cur > linux32_maxvmem)
1017 				rl->rlim_cur = linux32_maxvmem;
1018 			if (rl->rlim_max > linux32_maxvmem)
1019 				rl->rlim_max = linux32_maxvmem;
1020 		}
1021 		break;
1022 	}
1023 }
1024 
1025 struct sysentvec elf_linux_sysvec = {
1026 	.sv_size	= LINUX_SYS_MAXSYSCALL,
1027 	.sv_table	= linux_sysent,
1028 	.sv_mask	= 0,
1029 	.sv_sigsize	= LINUX_SIGTBLSZ,
1030 	.sv_sigtbl	= bsd_to_linux_signal,
1031 	.sv_errsize	= ELAST + 1,
1032 	.sv_errtbl	= bsd_to_linux_errno,
1033 	.sv_transtrap	= translate_traps,
1034 	.sv_fixup	= elf_linux_fixup,
1035 	.sv_sendsig	= linux_sendsig,
1036 	.sv_sigcode	= linux_sigcode,
1037 	.sv_szsigcode	= &linux_szsigcode,
1038 	.sv_prepsyscall	= linux_prepsyscall,
1039 	.sv_name	= "Linux ELF32",
1040 	.sv_coredump	= elf32_coredump,
1041 	.sv_imgact_try	= exec_linux_imgact_try,
1042 	.sv_minsigstksz	= LINUX_MINSIGSTKSZ,
1043 	.sv_pagesize	= PAGE_SIZE,
1044 	.sv_minuser	= VM_MIN_ADDRESS,
1045 	.sv_maxuser	= LINUX32_USRSTACK,
1046 	.sv_usrstack	= LINUX32_USRSTACK,
1047 	.sv_psstrings	= LINUX32_PS_STRINGS,
1048 	.sv_stackprot	= VM_PROT_ALL,
1049 	.sv_copyout_strings = linux_copyout_strings,
1050 	.sv_setregs	= exec_linux_setregs,
1051 	.sv_fixlimit	= linux32_fixlimit,
1052 	.sv_maxssiz	= &linux32_maxssiz,
1053 	.sv_flags	= SV_ABI_LINUX | SV_ILP32 | SV_IA32
1054 };
1055 
1056 static char GNULINUX_ABI_VENDOR[] = "GNU";
1057 
1058 static Elf_Brandnote linux32_brandnote = {
1059 	.hdr.n_namesz	= sizeof(GNULINUX_ABI_VENDOR),
1060 	.hdr.n_descsz	= 16,
1061 	.hdr.n_type	= 1,
1062 	.vendor		= GNULINUX_ABI_VENDOR,
1063 	.flags		= 0
1064 };
1065 
1066 static Elf32_Brandinfo linux_brand = {
1067 	.brand		= ELFOSABI_LINUX,
1068 	.machine	= EM_386,
1069 	.compat_3_brand	= "Linux",
1070 	.emul_path	= "/compat/linux",
1071 	.interp_path	= "/lib/ld-linux.so.1",
1072 	.sysvec		= &elf_linux_sysvec,
1073 	.interp_newpath	= NULL,
1074 	.brand_note	= &linux32_brandnote,
1075 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1076 };
1077 
1078 static Elf32_Brandinfo linux_glibc2brand = {
1079 	.brand		= ELFOSABI_LINUX,
1080 	.machine	= EM_386,
1081 	.compat_3_brand	= "Linux",
1082 	.emul_path	= "/compat/linux",
1083 	.interp_path	= "/lib/ld-linux.so.2",
1084 	.sysvec		= &elf_linux_sysvec,
1085 	.interp_newpath	= NULL,
1086 	.brand_note	= &linux32_brandnote,
1087 	.flags		= BI_CAN_EXEC_DYN | BI_BRAND_NOTE
1088 };
1089 
1090 Elf32_Brandinfo *linux_brandlist[] = {
1091 	&linux_brand,
1092 	&linux_glibc2brand,
1093 	NULL
1094 };
1095 
1096 static int
1097 linux_elf_modevent(module_t mod, int type, void *data)
1098 {
1099 	Elf32_Brandinfo **brandinfo;
1100 	int error;
1101 	struct linux_ioctl_handler **lihp;
1102 	struct linux_device_handler **ldhp;
1103 
1104 	error = 0;
1105 
1106 	switch(type) {
1107 	case MOD_LOAD:
1108 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1109 		     ++brandinfo)
1110 			if (elf32_insert_brand_entry(*brandinfo) < 0)
1111 				error = EINVAL;
1112 		if (error == 0) {
1113 			SET_FOREACH(lihp, linux_ioctl_handler_set)
1114 				linux_ioctl_register_handler(*lihp);
1115 			SET_FOREACH(ldhp, linux_device_handler_set)
1116 				linux_device_register_handler(*ldhp);
1117 			mtx_init(&emul_lock, "emuldata lock", NULL, MTX_DEF);
1118 			sx_init(&emul_shared_lock, "emuldata->shared lock");
1119 			LIST_INIT(&futex_list);
1120 			mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF);
1121 			linux_exit_tag = EVENTHANDLER_REGISTER(process_exit,
1122 			    linux_proc_exit, NULL, 1000);
1123 			linux_schedtail_tag = EVENTHANDLER_REGISTER(schedtail,
1124 			    linux_schedtail, NULL, 1000);
1125 			linux_exec_tag = EVENTHANDLER_REGISTER(process_exec,
1126 			    linux_proc_exec, NULL, 1000);
1127 			linux_szplatform = roundup(strlen(linux_platform) + 1,
1128 			    sizeof(char *));
1129 			if (bootverbose)
1130 				printf("Linux ELF exec handler installed\n");
1131 		} else
1132 			printf("cannot insert Linux ELF brand handler\n");
1133 		break;
1134 	case MOD_UNLOAD:
1135 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1136 		     ++brandinfo)
1137 			if (elf32_brand_inuse(*brandinfo))
1138 				error = EBUSY;
1139 		if (error == 0) {
1140 			for (brandinfo = &linux_brandlist[0];
1141 			     *brandinfo != NULL; ++brandinfo)
1142 				if (elf32_remove_brand_entry(*brandinfo) < 0)
1143 					error = EINVAL;
1144 		}
1145 		if (error == 0) {
1146 			SET_FOREACH(lihp, linux_ioctl_handler_set)
1147 				linux_ioctl_unregister_handler(*lihp);
1148 			SET_FOREACH(ldhp, linux_device_handler_set)
1149 				linux_device_unregister_handler(*ldhp);
1150 			mtx_destroy(&emul_lock);
1151 			sx_destroy(&emul_shared_lock);
1152 			mtx_destroy(&futex_mtx);
1153 			EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag);
1154 			EVENTHANDLER_DEREGISTER(schedtail, linux_schedtail_tag);
1155 			EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag);
1156 			if (bootverbose)
1157 				printf("Linux ELF exec handler removed\n");
1158 		} else
1159 			printf("Could not deinstall ELF interpreter entry\n");
1160 		break;
1161 	default:
1162 		return EOPNOTSUPP;
1163 	}
1164 	return error;
1165 }
1166 
1167 static moduledata_t linux_elf_mod = {
1168 	"linuxelf",
1169 	linux_elf_modevent,
1170 	0
1171 };
1172 
1173 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
1174