xref: /freebsd/sys/amd64/linux32/linux32_sysvec.c (revision 4b79449e2fb67cb37c4c9f46d31791893a39ddd8)
1 /*-
2  * Copyright (c) 2004 Tim J. Robbins
3  * Copyright (c) 2003 Peter Wemm
4  * Copyright (c) 2002 Doug Rabson
5  * Copyright (c) 1998-1999 Andrew Gallatin
6  * Copyright (c) 1994-1996 S�ren Schmidt
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer
14  *    in this position and unchanged.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. The name of the author may not be used to endorse or promote products
19  *    derived from this software without specific prior written permission
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 #include <sys/cdefs.h>
34 __FBSDID("$FreeBSD$");
35 #include "opt_compat.h"
36 
37 #ifndef COMPAT_IA32
38 #error "Unable to compile Linux-emulator due to missing COMPAT_IA32 option!"
39 #endif
40 
41 #define	__ELF_WORD_SIZE	32
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/exec.h>
46 #include <sys/fcntl.h>
47 #include <sys/imgact.h>
48 #include <sys/imgact_elf.h>
49 #include <sys/kernel.h>
50 #include <sys/lock.h>
51 #include <sys/malloc.h>
52 #include <sys/module.h>
53 #include <sys/mutex.h>
54 #include <sys/proc.h>
55 #include <sys/resourcevar.h>
56 #include <sys/signalvar.h>
57 #include <sys/sysctl.h>
58 #include <sys/syscallsubr.h>
59 #include <sys/sysent.h>
60 #include <sys/sysproto.h>
61 #include <sys/vnode.h>
62 #include <sys/eventhandler.h>
63 
64 #include <vm/vm.h>
65 #include <vm/pmap.h>
66 #include <vm/vm_extern.h>
67 #include <vm/vm_map.h>
68 #include <vm/vm_object.h>
69 #include <vm/vm_page.h>
70 #include <vm/vm_param.h>
71 
72 #include <machine/cpu.h>
73 #include <machine/md_var.h>
74 #include <machine/pcb.h>
75 #include <machine/specialreg.h>
76 
77 #include <amd64/linux32/linux.h>
78 #include <amd64/linux32/linux32_proto.h>
79 #include <compat/linux/linux_emul.h>
80 #include <compat/linux/linux_mib.h>
81 #include <compat/linux/linux_signal.h>
82 #include <compat/linux/linux_util.h>
83 
84 MODULE_VERSION(linux, 1);
85 
86 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
87 
88 #define	AUXARGS_ENTRY_32(pos, id, val)	\
89 	do {				\
90 		suword32(pos++, id);	\
91 		suword32(pos++, val);	\
92 	} while (0)
93 
94 #if BYTE_ORDER == LITTLE_ENDIAN
95 #define SHELLMAGIC      0x2123 /* #! */
96 #else
97 #define SHELLMAGIC      0x2321
98 #endif
99 
100 /*
101  * Allow the sendsig functions to use the ldebug() facility
102  * even though they are not syscalls themselves. Map them
103  * to syscall 0. This is slightly less bogus than using
104  * ldebug(sigreturn).
105  */
106 #define	LINUX_SYS_linux_rt_sendsig	0
107 #define	LINUX_SYS_linux_sendsig		0
108 
109 extern char linux_sigcode[];
110 extern int linux_szsigcode;
111 
112 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
113 
114 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
115 SET_DECLARE(linux_device_handler_set, struct linux_device_handler);
116 
117 static int	elf_linux_fixup(register_t **stack_base,
118 		    struct image_params *iparams);
119 static register_t *linux_copyout_strings(struct image_params *imgp);
120 static void	linux_prepsyscall(struct trapframe *tf, int *args, u_int *code,
121 		    caddr_t *params);
122 static void     linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
123 static void	exec_linux_setregs(struct thread *td, u_long entry,
124 				   u_long stack, u_long ps_strings);
125 static void	linux32_fixlimit(struct rlimit *rl, int which);
126 
127 extern LIST_HEAD(futex_list, futex) futex_list;
128 extern struct sx futex_sx;
129 
130 static eventhandler_tag linux_exit_tag;
131 static eventhandler_tag linux_schedtail_tag;
132 static eventhandler_tag linux_exec_tag;
133 
134 /*
135  * Linux syscalls return negative errno's, we do positive and map them
136  * Reference:
137  *   FreeBSD: src/sys/sys/errno.h
138  *   Linux:   linux-2.6.17.8/include/asm-generic/errno-base.h
139  *            linux-2.6.17.8/include/asm-generic/errno.h
140  */
141 static int bsd_to_linux_errno[ELAST + 1] = {
142 	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
143 	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
144 	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
145 	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
146 	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
147 	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
148 	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
149 	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
150 	  -6,  -6, -43, -42, -75,-125, -84, -95, -16, -74,
151 	 -72, -67, -71
152 };
153 
154 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
155 	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
156 	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
157 	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
158 	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
159 	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
160 	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
161 	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
162 	0, LINUX_SIGUSR1, LINUX_SIGUSR2
163 };
164 
165 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
166 	SIGHUP, SIGINT, SIGQUIT, SIGILL,
167 	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
168 	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
169 	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
170 	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
171 	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
172 	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
173 	SIGIO, SIGURG, SIGSYS
174 };
175 
176 #define LINUX_T_UNKNOWN  255
177 static int _bsd_to_linux_trapcode[] = {
178 	LINUX_T_UNKNOWN,	/* 0 */
179 	6,			/* 1  T_PRIVINFLT */
180 	LINUX_T_UNKNOWN,	/* 2 */
181 	3,			/* 3  T_BPTFLT */
182 	LINUX_T_UNKNOWN,	/* 4 */
183 	LINUX_T_UNKNOWN,	/* 5 */
184 	16,			/* 6  T_ARITHTRAP */
185 	254,			/* 7  T_ASTFLT */
186 	LINUX_T_UNKNOWN,	/* 8 */
187 	13,			/* 9  T_PROTFLT */
188 	1,			/* 10 T_TRCTRAP */
189 	LINUX_T_UNKNOWN,	/* 11 */
190 	14,			/* 12 T_PAGEFLT */
191 	LINUX_T_UNKNOWN,	/* 13 */
192 	17,			/* 14 T_ALIGNFLT */
193 	LINUX_T_UNKNOWN,	/* 15 */
194 	LINUX_T_UNKNOWN,	/* 16 */
195 	LINUX_T_UNKNOWN,	/* 17 */
196 	0,			/* 18 T_DIVIDE */
197 	2,			/* 19 T_NMI */
198 	4,			/* 20 T_OFLOW */
199 	5,			/* 21 T_BOUND */
200 	7,			/* 22 T_DNA */
201 	8,			/* 23 T_DOUBLEFLT */
202 	9,			/* 24 T_FPOPFLT */
203 	10,			/* 25 T_TSSFLT */
204 	11,			/* 26 T_SEGNPFLT */
205 	12,			/* 27 T_STKFLT */
206 	18,			/* 28 T_MCHK */
207 	19,			/* 29 T_XMMFLT */
208 	15			/* 30 T_RESERVED */
209 };
210 #define bsd_to_linux_trapcode(code) \
211     ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
212      _bsd_to_linux_trapcode[(code)]: \
213      LINUX_T_UNKNOWN)
214 
215 struct linux32_ps_strings {
216 	u_int32_t ps_argvstr;	/* first of 0 or more argument strings */
217 	u_int ps_nargvstr;	/* the number of argument strings */
218 	u_int32_t ps_envstr;	/* first of 0 or more environment strings */
219 	u_int ps_nenvstr;	/* the number of environment strings */
220 };
221 
222 /*
223  * If FreeBSD & Linux have a difference of opinion about what a trap
224  * means, deal with it here.
225  *
226  * MPSAFE
227  */
228 static int
229 translate_traps(int signal, int trap_code)
230 {
231 	if (signal != SIGBUS)
232 		return signal;
233 	switch (trap_code) {
234 	case T_PROTFLT:
235 	case T_TSSFLT:
236 	case T_DOUBLEFLT:
237 	case T_PAGEFLT:
238 		return SIGSEGV;
239 	default:
240 		return signal;
241 	}
242 }
243 
244 static int
245 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
246 {
247 	Elf32_Auxargs *args;
248 	Elf32_Addr *base;
249 	Elf32_Addr *pos;
250 
251 	KASSERT(curthread->td_proc == imgp->proc,
252 	    ("unsafe elf_linux_fixup(), should be curproc"));
253 	base = (Elf32_Addr *)*stack_base;
254 	args = (Elf32_Auxargs *)imgp->auxargs;
255 	pos = base + (imgp->args->argc + imgp->args->envc + 2);
256 
257 	if (args->trace)
258 		AUXARGS_ENTRY_32(pos, AT_DEBUG, 1);
259 	if (args->execfd != -1)
260 		AUXARGS_ENTRY_32(pos, AT_EXECFD, args->execfd);
261 	AUXARGS_ENTRY_32(pos, AT_PHDR, args->phdr);
262 	AUXARGS_ENTRY_32(pos, AT_PHENT, args->phent);
263 	AUXARGS_ENTRY_32(pos, AT_PHNUM, args->phnum);
264 	AUXARGS_ENTRY_32(pos, AT_PAGESZ, args->pagesz);
265 	AUXARGS_ENTRY_32(pos, AT_FLAGS, args->flags);
266 	AUXARGS_ENTRY_32(pos, AT_ENTRY, args->entry);
267 	AUXARGS_ENTRY_32(pos, AT_BASE, args->base);
268 	AUXARGS_ENTRY_32(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
269 	AUXARGS_ENTRY_32(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
270 	AUXARGS_ENTRY_32(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
271 	AUXARGS_ENTRY_32(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
272 	AUXARGS_ENTRY_32(pos, AT_NULL, 0);
273 
274 	free(imgp->auxargs, M_TEMP);
275 	imgp->auxargs = NULL;
276 
277 	base--;
278 	suword32(base, (uint32_t)imgp->args->argc);
279 	*stack_base = (register_t *)base;
280 	return 0;
281 }
282 
283 extern int _ucodesel, _ucode32sel, _udatasel;
284 extern unsigned long linux_sznonrtsigcode;
285 
286 static void
287 linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
288 {
289 	struct thread *td = curthread;
290 	struct proc *p = td->td_proc;
291 	struct sigacts *psp;
292 	struct trapframe *regs;
293 	struct l_rt_sigframe *fp, frame;
294 	int oonstack;
295 	int sig;
296 	int code;
297 
298 	sig = ksi->ksi_signo;
299 	code = ksi->ksi_code;
300 	PROC_LOCK_ASSERT(p, MA_OWNED);
301 	psp = p->p_sigacts;
302 	mtx_assert(&psp->ps_mtx, MA_OWNED);
303 	regs = td->td_frame;
304 	oonstack = sigonstack(regs->tf_rsp);
305 
306 #ifdef DEBUG
307 	if (ldebug(rt_sendsig))
308 		printf(ARGS(rt_sendsig, "%p, %d, %p, %u"),
309 		    catcher, sig, (void*)mask, code);
310 #endif
311 	/*
312 	 * Allocate space for the signal handler context.
313 	 */
314 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
315 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
316 		fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
317 		    td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
318 	} else
319 		fp = (struct l_rt_sigframe *)regs->tf_rsp - 1;
320 	mtx_unlock(&psp->ps_mtx);
321 
322 	/*
323 	 * Build the argument list for the signal handler.
324 	 */
325 	if (p->p_sysent->sv_sigtbl)
326 		if (sig <= p->p_sysent->sv_sigsize)
327 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
328 
329 	bzero(&frame, sizeof(frame));
330 
331 	frame.sf_handler = PTROUT(catcher);
332 	frame.sf_sig = sig;
333 	frame.sf_siginfo = PTROUT(&fp->sf_si);
334 	frame.sf_ucontext = PTROUT(&fp->sf_sc);
335 
336 	/* Fill in POSIX parts */
337 	ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig);
338 
339 	/*
340 	 * Build the signal context to be used by sigreturn.
341 	 */
342 	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
343 	frame.sf_sc.uc_link = 0;		/* XXX ??? */
344 
345 	frame.sf_sc.uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp);
346 	frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
347 	frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
348 	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
349 	PROC_UNLOCK(p);
350 
351 	bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
352 
353 	frame.sf_sc.uc_mcontext.sc_mask   = frame.sf_sc.uc_sigmask.__bits[0];
354         frame.sf_sc.uc_mcontext.sc_gs     = rgs();
355         frame.sf_sc.uc_mcontext.sc_fs     = rfs();
356         __asm __volatile("movl %%es,%0" :
357 	    "=rm" (frame.sf_sc.uc_mcontext.sc_es));
358         __asm __volatile("movl %%ds,%0" :
359 	    "=rm" (frame.sf_sc.uc_mcontext.sc_ds));
360 	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_rdi;
361 	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_rsi;
362 	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_rbp;
363 	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_rbx;
364 	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_rdx;
365 	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_rcx;
366 	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_rax;
367 	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_rip;
368 	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
369 	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_rflags;
370 	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_rsp;
371 	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
372 	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
373 	frame.sf_sc.uc_mcontext.sc_cr2    = (u_int32_t)(uintptr_t)ksi->ksi_addr;
374 	frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
375 
376 #ifdef DEBUG
377 	if (ldebug(rt_sendsig))
378 		printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
379 		    frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
380 		    td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
381 #endif
382 
383 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
384 		/*
385 		 * Process has trashed its stack; give it an illegal
386 		 * instruction to halt it in its tracks.
387 		 */
388 #ifdef DEBUG
389 		if (ldebug(rt_sendsig))
390 			printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
391 			    fp, oonstack);
392 #endif
393 		PROC_LOCK(p);
394 		sigexit(td, SIGILL);
395 	}
396 
397 	/*
398 	 * Build context to run handler in.
399 	 */
400 	regs->tf_rsp = PTROUT(fp);
401 	regs->tf_rip = LINUX32_PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
402 	    linux_sznonrtsigcode;
403 	regs->tf_rflags &= ~(PSL_T | PSL_D);
404 	regs->tf_cs = _ucode32sel;
405 	regs->tf_ss = _udatasel;
406 	load_ds(_udatasel);
407 	td->td_pcb->pcb_ds = _udatasel;
408 	load_es(_udatasel);
409 	td->td_pcb->pcb_es = _udatasel;
410 	/* leave user %fs and %gs untouched */
411 	PROC_LOCK(p);
412 	mtx_lock(&psp->ps_mtx);
413 }
414 
415 
416 /*
417  * Send an interrupt to process.
418  *
419  * Stack is set up to allow sigcode stored
420  * in u. to call routine, followed by kcall
421  * to sigreturn routine below.  After sigreturn
422  * resets the signal mask, the stack, and the
423  * frame pointer, it returns to the user
424  * specified pc, psl.
425  */
426 static void
427 linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
428 {
429 	struct thread *td = curthread;
430 	struct proc *p = td->td_proc;
431 	struct sigacts *psp;
432 	struct trapframe *regs;
433 	struct l_sigframe *fp, frame;
434 	l_sigset_t lmask;
435 	int oonstack, i;
436 	int sig, code;
437 
438 	sig = ksi->ksi_signo;
439 	code = ksi->ksi_code;
440 	PROC_LOCK_ASSERT(p, MA_OWNED);
441 	psp = p->p_sigacts;
442 	mtx_assert(&psp->ps_mtx, MA_OWNED);
443 	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
444 		/* Signal handler installed with SA_SIGINFO. */
445 		linux_rt_sendsig(catcher, ksi, mask);
446 		return;
447 	}
448 
449 	regs = td->td_frame;
450 	oonstack = sigonstack(regs->tf_rsp);
451 
452 #ifdef DEBUG
453 	if (ldebug(sendsig))
454 		printf(ARGS(sendsig, "%p, %d, %p, %u"),
455 		    catcher, sig, (void*)mask, code);
456 #endif
457 
458 	/*
459 	 * Allocate space for the signal handler context.
460 	 */
461 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
462 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
463 		fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
464 		    td->td_sigstk.ss_size - sizeof(struct l_sigframe));
465 	} else
466 		fp = (struct l_sigframe *)regs->tf_rsp - 1;
467 	mtx_unlock(&psp->ps_mtx);
468 	PROC_UNLOCK(p);
469 
470 	/*
471 	 * Build the argument list for the signal handler.
472 	 */
473 	if (p->p_sysent->sv_sigtbl)
474 		if (sig <= p->p_sysent->sv_sigsize)
475 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
476 
477 	bzero(&frame, sizeof(frame));
478 
479 	frame.sf_handler = PTROUT(catcher);
480 	frame.sf_sig = sig;
481 
482 	bsd_to_linux_sigset(mask, &lmask);
483 
484 	/*
485 	 * Build the signal context to be used by sigreturn.
486 	 */
487 	frame.sf_sc.sc_mask   = lmask.__bits[0];
488         frame.sf_sc.sc_gs     = rgs();
489         frame.sf_sc.sc_fs     = rfs();
490         __asm __volatile("movl %%es,%0" : "=rm" (frame.sf_sc.sc_es));
491         __asm __volatile("movl %%ds,%0" : "=rm" (frame.sf_sc.sc_ds));
492 	frame.sf_sc.sc_edi    = regs->tf_rdi;
493 	frame.sf_sc.sc_esi    = regs->tf_rsi;
494 	frame.sf_sc.sc_ebp    = regs->tf_rbp;
495 	frame.sf_sc.sc_ebx    = regs->tf_rbx;
496 	frame.sf_sc.sc_edx    = regs->tf_rdx;
497 	frame.sf_sc.sc_ecx    = regs->tf_rcx;
498 	frame.sf_sc.sc_eax    = regs->tf_rax;
499 	frame.sf_sc.sc_eip    = regs->tf_rip;
500 	frame.sf_sc.sc_cs     = regs->tf_cs;
501 	frame.sf_sc.sc_eflags = regs->tf_rflags;
502 	frame.sf_sc.sc_esp_at_signal = regs->tf_rsp;
503 	frame.sf_sc.sc_ss     = regs->tf_ss;
504 	frame.sf_sc.sc_err    = regs->tf_err;
505 	frame.sf_sc.sc_cr2    = (u_int32_t)(uintptr_t)ksi->ksi_addr;
506 	frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code);
507 
508 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
509 		frame.sf_extramask[i] = lmask.__bits[i+1];
510 
511 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
512 		/*
513 		 * Process has trashed its stack; give it an illegal
514 		 * instruction to halt it in its tracks.
515 		 */
516 		PROC_LOCK(p);
517 		sigexit(td, SIGILL);
518 	}
519 
520 	/*
521 	 * Build context to run handler in.
522 	 */
523 	regs->tf_rsp = PTROUT(fp);
524 	regs->tf_rip = LINUX32_PS_STRINGS - *(p->p_sysent->sv_szsigcode);
525 	regs->tf_rflags &= ~(PSL_T | PSL_D);
526 	regs->tf_cs = _ucode32sel;
527 	regs->tf_ss = _udatasel;
528 	load_ds(_udatasel);
529 	td->td_pcb->pcb_ds = _udatasel;
530 	load_es(_udatasel);
531 	td->td_pcb->pcb_es = _udatasel;
532 	/* leave user %fs and %gs untouched */
533 	PROC_LOCK(p);
534 	mtx_lock(&psp->ps_mtx);
535 }
536 
537 /*
538  * System call to cleanup state after a signal
539  * has been taken.  Reset signal mask and
540  * stack state from context left by sendsig (above).
541  * Return to previous pc and psl as specified by
542  * context left by sendsig. Check carefully to
543  * make sure that the user has not modified the
544  * psl to gain improper privileges or to cause
545  * a machine fault.
546  */
547 int
548 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
549 {
550 	struct proc *p = td->td_proc;
551 	struct l_sigframe frame;
552 	struct trapframe *regs;
553 	l_sigset_t lmask;
554 	int eflags, i;
555 	ksiginfo_t ksi;
556 
557 	regs = td->td_frame;
558 
559 #ifdef DEBUG
560 	if (ldebug(sigreturn))
561 		printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
562 #endif
563 	/*
564 	 * The trampoline code hands us the sigframe.
565 	 * It is unsafe to keep track of it ourselves, in the event that a
566 	 * program jumps out of a signal handler.
567 	 */
568 	if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
569 		return (EFAULT);
570 
571 	/*
572 	 * Check for security violations.
573 	 */
574 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
575 	eflags = frame.sf_sc.sc_eflags;
576 	/*
577 	 * XXX do allow users to change the privileged flag PSL_RF.  The
578 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
579 	 * sometimes set it there too.  tf_eflags is kept in the signal
580 	 * context during signal handling and there is no other place
581 	 * to remember it, so the PSL_RF bit may be corrupted by the
582 	 * signal handler without us knowing.  Corruption of the PSL_RF
583 	 * bit at worst causes one more or one less debugger trap, so
584 	 * allowing it is fairly harmless.
585 	 */
586 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF))
587 		return(EINVAL);
588 
589 	/*
590 	 * Don't allow users to load a valid privileged %cs.  Let the
591 	 * hardware check for invalid selectors, excess privilege in
592 	 * other selectors, invalid %eip's and invalid %esp's.
593 	 */
594 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
595 	if (!CS_SECURE(frame.sf_sc.sc_cs)) {
596 		ksiginfo_init_trap(&ksi);
597 		ksi.ksi_signo = SIGBUS;
598 		ksi.ksi_code = BUS_OBJERR;
599 		ksi.ksi_trapno = T_PROTFLT;
600 		ksi.ksi_addr = (void *)regs->tf_rip;
601 		trapsignal(td, &ksi);
602 		return(EINVAL);
603 	}
604 
605 	lmask.__bits[0] = frame.sf_sc.sc_mask;
606 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
607 		lmask.__bits[i+1] = frame.sf_extramask[i];
608 	PROC_LOCK(p);
609 	linux_to_bsd_sigset(&lmask, &td->td_sigmask);
610 	SIG_CANTMASK(td->td_sigmask);
611 	signotify(td);
612 	PROC_UNLOCK(p);
613 
614 	/*
615 	 * Restore signal context.
616 	 */
617 	/* Selectors were restored by the trampoline. */
618 	regs->tf_rdi    = frame.sf_sc.sc_edi;
619 	regs->tf_rsi    = frame.sf_sc.sc_esi;
620 	regs->tf_rbp    = frame.sf_sc.sc_ebp;
621 	regs->tf_rbx    = frame.sf_sc.sc_ebx;
622 	regs->tf_rdx    = frame.sf_sc.sc_edx;
623 	regs->tf_rcx    = frame.sf_sc.sc_ecx;
624 	regs->tf_rax    = frame.sf_sc.sc_eax;
625 	regs->tf_rip    = frame.sf_sc.sc_eip;
626 	regs->tf_cs     = frame.sf_sc.sc_cs;
627 	regs->tf_rflags = eflags;
628 	regs->tf_rsp    = frame.sf_sc.sc_esp_at_signal;
629 	regs->tf_ss     = frame.sf_sc.sc_ss;
630 
631 	return (EJUSTRETURN);
632 }
633 
634 /*
635  * System call to cleanup state after a signal
636  * has been taken.  Reset signal mask and
637  * stack state from context left by rt_sendsig (above).
638  * Return to previous pc and psl as specified by
639  * context left by sendsig. Check carefully to
640  * make sure that the user has not modified the
641  * psl to gain improper privileges or to cause
642  * a machine fault.
643  */
644 int
645 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
646 {
647 	struct proc *p = td->td_proc;
648 	struct l_ucontext uc;
649 	struct l_sigcontext *context;
650 	l_stack_t *lss;
651 	stack_t ss;
652 	struct trapframe *regs;
653 	int eflags;
654 	ksiginfo_t ksi;
655 
656 	regs = td->td_frame;
657 
658 #ifdef DEBUG
659 	if (ldebug(rt_sigreturn))
660 		printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
661 #endif
662 	/*
663 	 * The trampoline code hands us the ucontext.
664 	 * It is unsafe to keep track of it ourselves, in the event that a
665 	 * program jumps out of a signal handler.
666 	 */
667 	if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
668 		return (EFAULT);
669 
670 	context = &uc.uc_mcontext;
671 
672 	/*
673 	 * Check for security violations.
674 	 */
675 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
676 	eflags = context->sc_eflags;
677 	/*
678 	 * XXX do allow users to change the privileged flag PSL_RF.  The
679 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
680 	 * sometimes set it there too.  tf_eflags is kept in the signal
681 	 * context during signal handling and there is no other place
682 	 * to remember it, so the PSL_RF bit may be corrupted by the
683 	 * signal handler without us knowing.  Corruption of the PSL_RF
684 	 * bit at worst causes one more or one less debugger trap, so
685 	 * allowing it is fairly harmless.
686 	 */
687 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF))
688 		return(EINVAL);
689 
690 	/*
691 	 * Don't allow users to load a valid privileged %cs.  Let the
692 	 * hardware check for invalid selectors, excess privilege in
693 	 * other selectors, invalid %eip's and invalid %esp's.
694 	 */
695 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
696 	if (!CS_SECURE(context->sc_cs)) {
697 		ksiginfo_init_trap(&ksi);
698 		ksi.ksi_signo = SIGBUS;
699 		ksi.ksi_code = BUS_OBJERR;
700 		ksi.ksi_trapno = T_PROTFLT;
701 		ksi.ksi_addr = (void *)regs->tf_rip;
702 		trapsignal(td, &ksi);
703 		return(EINVAL);
704 	}
705 
706 	PROC_LOCK(p);
707 	linux_to_bsd_sigset(&uc.uc_sigmask, &td->td_sigmask);
708 	SIG_CANTMASK(td->td_sigmask);
709 	signotify(td);
710 	PROC_UNLOCK(p);
711 
712 	/*
713 	 * Restore signal context
714 	 */
715 	/* Selectors were restored by the trampoline. */
716 	regs->tf_rdi    = context->sc_edi;
717 	regs->tf_rsi    = context->sc_esi;
718 	regs->tf_rbp    = context->sc_ebp;
719 	regs->tf_rbx    = context->sc_ebx;
720 	regs->tf_rdx    = context->sc_edx;
721 	regs->tf_rcx    = context->sc_ecx;
722 	regs->tf_rax    = context->sc_eax;
723 	regs->tf_rip    = context->sc_eip;
724 	regs->tf_cs     = context->sc_cs;
725 	regs->tf_rflags = eflags;
726 	regs->tf_rsp    = context->sc_esp_at_signal;
727 	regs->tf_ss     = context->sc_ss;
728 
729 	/*
730 	 * call sigaltstack & ignore results..
731 	 */
732 	lss = &uc.uc_stack;
733 	ss.ss_sp = PTRIN(lss->ss_sp);
734 	ss.ss_size = lss->ss_size;
735 	ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
736 
737 #ifdef DEBUG
738 	if (ldebug(rt_sigreturn))
739 		printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
740 		    ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
741 #endif
742 	(void)kern_sigaltstack(td, &ss, NULL);
743 
744 	return (EJUSTRETURN);
745 }
746 
747 /*
748  * MPSAFE
749  */
750 static void
751 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
752 {
753 	args[0] = tf->tf_rbx;
754 	args[1] = tf->tf_rcx;
755 	args[2] = tf->tf_rdx;
756 	args[3] = tf->tf_rsi;
757 	args[4] = tf->tf_rdi;
758 	args[5] = tf->tf_rbp;	/* Unconfirmed */
759 	*params = NULL;		/* no copyin */
760 }
761 
762 /*
763  * If a linux binary is exec'ing something, try this image activator
764  * first.  We override standard shell script execution in order to
765  * be able to modify the interpreter path.  We only do this if a linux
766  * binary is doing the exec, so we do not create an EXEC module for it.
767  */
768 static int	exec_linux_imgact_try(struct image_params *iparams);
769 
770 static int
771 exec_linux_imgact_try(struct image_params *imgp)
772 {
773     const char *head = (const char *)imgp->image_header;
774     char *rpath;
775     int error = -1, len;
776 
777     /*
778      * The interpreter for shell scripts run from a linux binary needs
779      * to be located in /compat/linux if possible in order to recursively
780      * maintain linux path emulation.
781      */
782     if (((const short *)head)[0] == SHELLMAGIC) {
783 	    /*
784 	     * Run our normal shell image activator.  If it succeeds attempt
785 	     * to use the alternate path for the interpreter.  If an alternate
786 	     * path is found, use our stringspace to store it.
787 	     */
788 	    if ((error = exec_shell_imgact(imgp)) == 0) {
789 		    linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
790 			imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0, AT_FDCWD);
791 		    if (rpath != NULL) {
792 			    len = strlen(rpath) + 1;
793 
794 			    if (len <= MAXSHELLCMDLEN) {
795 				    memcpy(imgp->interpreter_name, rpath, len);
796 			    }
797 			    free(rpath, M_TEMP);
798 		    }
799 	    }
800     }
801     return(error);
802 }
803 
804 /*
805  * Clear registers on exec
806  * XXX copied from ia32_signal.c.
807  */
808 static void
809 exec_linux_setregs(td, entry, stack, ps_strings)
810 	struct thread *td;
811 	u_long entry;
812 	u_long stack;
813 	u_long ps_strings;
814 {
815 	struct trapframe *regs = td->td_frame;
816 	struct pcb *pcb = td->td_pcb;
817 
818 	critical_enter();
819 	wrmsr(MSR_FSBASE, 0);
820 	wrmsr(MSR_KGSBASE, 0);	/* User value while we're in the kernel */
821 	pcb->pcb_fsbase = 0;
822 	pcb->pcb_gsbase = 0;
823 	critical_exit();
824 	load_ds(_udatasel);
825 	load_es(_udatasel);
826 	load_fs(_udatasel);
827 	load_gs(_udatasel);
828 	pcb->pcb_ds = _udatasel;
829 	pcb->pcb_es = _udatasel;
830 	pcb->pcb_fs = _udatasel;
831 	pcb->pcb_gs = _udatasel;
832 
833 	bzero((char *)regs, sizeof(struct trapframe));
834 	regs->tf_rip = entry;
835 	regs->tf_rsp = stack;
836 	regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T);
837 	regs->tf_ss = _udatasel;
838 	regs->tf_cs = _ucode32sel;
839 	regs->tf_rbx = ps_strings;
840 	load_cr0(rcr0() | CR0_MP | CR0_TS);
841 	fpstate_drop(td);
842 
843 	/* Return via doreti so that we can change to a different %cs */
844 	pcb->pcb_flags |= PCB_FULLCTX | PCB_32BIT;
845 	pcb->pcb_flags &= ~PCB_GS32BIT;
846 	td->td_retval[1] = 0;
847 }
848 
849 /*
850  * XXX copied from ia32_sysvec.c.
851  */
852 static register_t *
853 linux_copyout_strings(struct image_params *imgp)
854 {
855 	int argc, envc;
856 	u_int32_t *vectp;
857 	char *stringp, *destp;
858 	u_int32_t *stack_base;
859 	struct linux32_ps_strings *arginfo;
860 	int sigcodesz;
861 
862 	/*
863 	 * Calculate string base and vector table pointers.
864 	 * Also deal with signal trampoline code for this exec type.
865 	 */
866 	arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS;
867 	sigcodesz = *(imgp->proc->p_sysent->sv_szsigcode);
868 	destp =	(caddr_t)arginfo - sigcodesz - SPARE_USRSPACE -
869 		roundup((ARG_MAX - imgp->args->stringspace), sizeof(char *));
870 
871 	/*
872 	 * install sigcode
873 	 */
874 	if (sigcodesz)
875 		copyout(imgp->proc->p_sysent->sv_sigcode,
876 			((caddr_t)arginfo - sigcodesz), sigcodesz);
877 
878 	/*
879 	 * If we have a valid auxargs ptr, prepare some room
880 	 * on the stack.
881 	 */
882 	if (imgp->auxargs) {
883 		/*
884 		 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
885 		 * lower compatibility.
886 		 */
887 		imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size
888 			: (AT_COUNT * 2);
889 		/*
890 		 * The '+ 2' is for the null pointers at the end of each of
891 		 * the arg and env vector sets,and imgp->auxarg_size is room
892 		 * for argument of Runtime loader.
893 		 */
894 		vectp = (u_int32_t *) (destp - (imgp->args->argc + imgp->args->envc + 2 +
895 				       imgp->auxarg_size) * sizeof(u_int32_t));
896 
897 	} else
898 		/*
899 		 * The '+ 2' is for the null pointers at the end of each of
900 		 * the arg and env vector sets
901 		 */
902 		vectp = (u_int32_t *)
903 			(destp - (imgp->args->argc + imgp->args->envc + 2) * sizeof(u_int32_t));
904 
905 	/*
906 	 * vectp also becomes our initial stack base
907 	 */
908 	stack_base = vectp;
909 
910 	stringp = imgp->args->begin_argv;
911 	argc = imgp->args->argc;
912 	envc = imgp->args->envc;
913 	/*
914 	 * Copy out strings - arguments and environment.
915 	 */
916 	copyout(stringp, destp, ARG_MAX - imgp->args->stringspace);
917 
918 	/*
919 	 * Fill in "ps_strings" struct for ps, w, etc.
920 	 */
921 	suword32(&arginfo->ps_argvstr, (u_int32_t)(intptr_t)vectp);
922 	suword32(&arginfo->ps_nargvstr, argc);
923 
924 	/*
925 	 * Fill in argument portion of vector table.
926 	 */
927 	for (; argc > 0; --argc) {
928 		suword32(vectp++, (u_int32_t)(intptr_t)destp);
929 		while (*stringp++ != 0)
930 			destp++;
931 		destp++;
932 	}
933 
934 	/* a null vector table pointer separates the argp's from the envp's */
935 	suword32(vectp++, 0);
936 
937 	suword32(&arginfo->ps_envstr, (u_int32_t)(intptr_t)vectp);
938 	suword32(&arginfo->ps_nenvstr, envc);
939 
940 	/*
941 	 * Fill in environment portion of vector table.
942 	 */
943 	for (; envc > 0; --envc) {
944 		suword32(vectp++, (u_int32_t)(intptr_t)destp);
945 		while (*stringp++ != 0)
946 			destp++;
947 		destp++;
948 	}
949 
950 	/* end of vector table is a null pointer */
951 	suword32(vectp, 0);
952 
953 	return ((register_t *)stack_base);
954 }
955 
956 SYSCTL_NODE(_compat, OID_AUTO, linux32, CTLFLAG_RW, 0,
957     "32-bit Linux emulation");
958 
959 static u_long	linux32_maxdsiz = LINUX32_MAXDSIZ;
960 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxdsiz, CTLFLAG_RW,
961     &linux32_maxdsiz, 0, "");
962 static u_long	linux32_maxssiz = LINUX32_MAXSSIZ;
963 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxssiz, CTLFLAG_RW,
964     &linux32_maxssiz, 0, "");
965 static u_long	linux32_maxvmem = LINUX32_MAXVMEM;
966 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxvmem, CTLFLAG_RW,
967     &linux32_maxvmem, 0, "");
968 
969 static void
970 linux32_fixlimit(struct rlimit *rl, int which)
971 {
972 
973 	switch (which) {
974 	case RLIMIT_DATA:
975 		if (linux32_maxdsiz != 0) {
976 			if (rl->rlim_cur > linux32_maxdsiz)
977 				rl->rlim_cur = linux32_maxdsiz;
978 			if (rl->rlim_max > linux32_maxdsiz)
979 				rl->rlim_max = linux32_maxdsiz;
980 		}
981 		break;
982 	case RLIMIT_STACK:
983 		if (linux32_maxssiz != 0) {
984 			if (rl->rlim_cur > linux32_maxssiz)
985 				rl->rlim_cur = linux32_maxssiz;
986 			if (rl->rlim_max > linux32_maxssiz)
987 				rl->rlim_max = linux32_maxssiz;
988 		}
989 		break;
990 	case RLIMIT_VMEM:
991 		if (linux32_maxvmem != 0) {
992 			if (rl->rlim_cur > linux32_maxvmem)
993 				rl->rlim_cur = linux32_maxvmem;
994 			if (rl->rlim_max > linux32_maxvmem)
995 				rl->rlim_max = linux32_maxvmem;
996 		}
997 		break;
998 	}
999 }
1000 
1001 struct sysentvec elf_linux_sysvec = {
1002 	.sv_size	= LINUX_SYS_MAXSYSCALL,
1003 	.sv_table	= linux_sysent,
1004 	.sv_mask	= 0,
1005 	.sv_sigsize	= LINUX_SIGTBLSZ,
1006 	.sv_sigtbl	= bsd_to_linux_signal,
1007 	.sv_errsize	= ELAST + 1,
1008 	.sv_errtbl	= bsd_to_linux_errno,
1009 	.sv_transtrap	= translate_traps,
1010 	.sv_fixup	= elf_linux_fixup,
1011 	.sv_sendsig	= linux_sendsig,
1012 	.sv_sigcode	= linux_sigcode,
1013 	.sv_szsigcode	= &linux_szsigcode,
1014 	.sv_prepsyscall	= linux_prepsyscall,
1015 	.sv_name	= "Linux ELF32",
1016 	.sv_coredump	= elf32_coredump,
1017 	.sv_imgact_try	= exec_linux_imgact_try,
1018 	.sv_minsigstksz	= LINUX_MINSIGSTKSZ,
1019 	.sv_pagesize	= PAGE_SIZE,
1020 	.sv_minuser	= VM_MIN_ADDRESS,
1021 	.sv_maxuser	= LINUX32_USRSTACK,
1022 	.sv_usrstack	= LINUX32_USRSTACK,
1023 	.sv_psstrings	= LINUX32_PS_STRINGS,
1024 	.sv_stackprot	= VM_PROT_ALL,
1025 	.sv_copyout_strings = linux_copyout_strings,
1026 	.sv_setregs	= exec_linux_setregs,
1027 	.sv_fixlimit	= linux32_fixlimit,
1028 	.sv_maxssiz	= &linux32_maxssiz,
1029 	.sv_flags	= SV_ABI_LINUX | SV_ILP32 | SV_IA32
1030 };
1031 
1032 static Elf32_Brandinfo linux_brand = {
1033 	.brand		= ELFOSABI_LINUX,
1034 	.machine	= EM_386,
1035 	.compat_3_brand	= "Linux",
1036 	.emul_path	= "/compat/linux",
1037 	.interp_path	= "/lib/ld-linux.so.1",
1038 	.sysvec		= &elf_linux_sysvec,
1039 	.interp_newpath	= NULL,
1040 	.flags		= BI_CAN_EXEC_DYN,
1041 };
1042 
1043 static Elf32_Brandinfo linux_glibc2brand = {
1044 	.brand		= ELFOSABI_LINUX,
1045 	.machine	= EM_386,
1046 	.compat_3_brand	= "Linux",
1047 	.emul_path	= "/compat/linux",
1048 	.interp_path	= "/lib/ld-linux.so.2",
1049 	.sysvec		= &elf_linux_sysvec,
1050 	.interp_newpath	= NULL,
1051 	.flags		= BI_CAN_EXEC_DYN,
1052 };
1053 
1054 Elf32_Brandinfo *linux_brandlist[] = {
1055 	&linux_brand,
1056 	&linux_glibc2brand,
1057 	NULL
1058 };
1059 
1060 static int
1061 linux_elf_modevent(module_t mod, int type, void *data)
1062 {
1063 	Elf32_Brandinfo **brandinfo;
1064 	int error;
1065 	struct linux_ioctl_handler **lihp;
1066 	struct linux_device_handler **ldhp;
1067 
1068 	error = 0;
1069 
1070 	switch(type) {
1071 	case MOD_LOAD:
1072 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1073 		     ++brandinfo)
1074 			if (elf32_insert_brand_entry(*brandinfo) < 0)
1075 				error = EINVAL;
1076 		if (error == 0) {
1077 			SET_FOREACH(lihp, linux_ioctl_handler_set)
1078 				linux_ioctl_register_handler(*lihp);
1079 			SET_FOREACH(ldhp, linux_device_handler_set)
1080 				linux_device_register_handler(*ldhp);
1081 			mtx_init(&emul_lock, "emuldata lock", NULL, MTX_DEF);
1082 			sx_init(&emul_shared_lock, "emuldata->shared lock");
1083 			LIST_INIT(&futex_list);
1084 			sx_init(&futex_sx, "futex protection lock");
1085 			linux_exit_tag = EVENTHANDLER_REGISTER(process_exit, linux_proc_exit,
1086 			      NULL, 1000);
1087 			linux_schedtail_tag = EVENTHANDLER_REGISTER(schedtail, linux_schedtail,
1088 			      NULL, 1000);
1089 			linux_exec_tag = EVENTHANDLER_REGISTER(process_exec, linux_proc_exec,
1090 			      NULL, 1000);
1091 			if (bootverbose)
1092 				printf("Linux ELF exec handler installed\n");
1093 		} else
1094 			printf("cannot insert Linux ELF brand handler\n");
1095 		break;
1096 	case MOD_UNLOAD:
1097 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1098 		     ++brandinfo)
1099 			if (elf32_brand_inuse(*brandinfo))
1100 				error = EBUSY;
1101 		if (error == 0) {
1102 			for (brandinfo = &linux_brandlist[0];
1103 			     *brandinfo != NULL; ++brandinfo)
1104 				if (elf32_remove_brand_entry(*brandinfo) < 0)
1105 					error = EINVAL;
1106 		}
1107 		if (error == 0) {
1108 			SET_FOREACH(lihp, linux_ioctl_handler_set)
1109 				linux_ioctl_unregister_handler(*lihp);
1110 			SET_FOREACH(ldhp, linux_device_handler_set)
1111 				linux_device_unregister_handler(*ldhp);
1112 			mtx_destroy(&emul_lock);
1113 			sx_destroy(&emul_shared_lock);
1114 			sx_destroy(&futex_sx);
1115 			EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag);
1116 			EVENTHANDLER_DEREGISTER(schedtail, linux_schedtail_tag);
1117 			EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag);
1118 			if (bootverbose)
1119 				printf("Linux ELF exec handler removed\n");
1120 		} else
1121 			printf("Could not deinstall ELF interpreter entry\n");
1122 		break;
1123 	default:
1124 		return EOPNOTSUPP;
1125 	}
1126 	return error;
1127 }
1128 
1129 static moduledata_t linux_elf_mod = {
1130 	"linuxelf",
1131 	linux_elf_modevent,
1132 	0
1133 };
1134 
1135 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
1136