xref: /freebsd/sys/amd64/linux32/linux32_sysvec.c (revision b3aaa0cc21c63d388230c7ef2a80abd631ff20d5)
1 /*-
2  * Copyright (c) 2004 Tim J. Robbins
3  * Copyright (c) 2003 Peter Wemm
4  * Copyright (c) 2002 Doug Rabson
5  * Copyright (c) 1998-1999 Andrew Gallatin
6  * Copyright (c) 1994-1996 S�ren Schmidt
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer
14  *    in this position and unchanged.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. The name of the author may not be used to endorse or promote products
19  *    derived from this software without specific prior written permission
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 #include <sys/cdefs.h>
34 __FBSDID("$FreeBSD$");
35 #include "opt_compat.h"
36 
37 #ifndef COMPAT_IA32
38 #error "Unable to compile Linux-emulator due to missing COMPAT_IA32 option!"
39 #endif
40 
41 #define	__ELF_WORD_SIZE	32
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/exec.h>
46 #include <sys/fcntl.h>
47 #include <sys/imgact.h>
48 #include <sys/imgact_elf.h>
49 #include <sys/kernel.h>
50 #include <sys/lock.h>
51 #include <sys/malloc.h>
52 #include <sys/module.h>
53 #include <sys/mutex.h>
54 #include <sys/proc.h>
55 #include <sys/resourcevar.h>
56 #include <sys/signalvar.h>
57 #include <sys/sysctl.h>
58 #include <sys/syscallsubr.h>
59 #include <sys/sysent.h>
60 #include <sys/sysproto.h>
61 #include <sys/vnode.h>
62 #include <sys/eventhandler.h>
63 
64 #include <vm/vm.h>
65 #include <vm/pmap.h>
66 #include <vm/vm_extern.h>
67 #include <vm/vm_map.h>
68 #include <vm/vm_object.h>
69 #include <vm/vm_page.h>
70 #include <vm/vm_param.h>
71 
72 #include <machine/cpu.h>
73 #include <machine/md_var.h>
74 #include <machine/pcb.h>
75 #include <machine/specialreg.h>
76 
77 #include <amd64/linux32/linux.h>
78 #include <amd64/linux32/linux32_proto.h>
79 #include <compat/linux/linux_emul.h>
80 #include <compat/linux/linux_mib.h>
81 #include <compat/linux/linux_signal.h>
82 #include <compat/linux/linux_util.h>
83 
84 MODULE_VERSION(linux, 1);
85 
86 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
87 
88 #define	AUXARGS_ENTRY_32(pos, id, val)	\
89 	do {				\
90 		suword32(pos++, id);	\
91 		suword32(pos++, val);	\
92 	} while (0)
93 
94 #if BYTE_ORDER == LITTLE_ENDIAN
95 #define SHELLMAGIC      0x2123 /* #! */
96 #else
97 #define SHELLMAGIC      0x2321
98 #endif
99 
100 /*
101  * Allow the sendsig functions to use the ldebug() facility
102  * even though they are not syscalls themselves. Map them
103  * to syscall 0. This is slightly less bogus than using
104  * ldebug(sigreturn).
105  */
106 #define	LINUX_SYS_linux_rt_sendsig	0
107 #define	LINUX_SYS_linux_sendsig		0
108 
109 extern char linux_sigcode[];
110 extern int linux_szsigcode;
111 
112 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
113 
114 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
115 SET_DECLARE(linux_device_handler_set, struct linux_device_handler);
116 
117 static int	elf_linux_fixup(register_t **stack_base,
118 		    struct image_params *iparams);
119 static register_t *linux_copyout_strings(struct image_params *imgp);
120 static void	linux_prepsyscall(struct trapframe *tf, int *args, u_int *code,
121 		    caddr_t *params);
122 static void     linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
123 static void	exec_linux_setregs(struct thread *td, u_long entry,
124 				   u_long stack, u_long ps_strings);
125 static void	linux32_fixlimit(struct rlimit *rl, int which);
126 
127 extern LIST_HEAD(futex_list, futex) futex_list;
128 extern struct sx futex_sx;
129 
130 static eventhandler_tag linux_exit_tag;
131 static eventhandler_tag linux_schedtail_tag;
132 static eventhandler_tag linux_exec_tag;
133 
134 /*
135  * Linux syscalls return negative errno's, we do positive and map them
136  * Reference:
137  *   FreeBSD: src/sys/sys/errno.h
138  *   Linux:   linux-2.6.17.8/include/asm-generic/errno-base.h
139  *            linux-2.6.17.8/include/asm-generic/errno.h
140  */
141 static int bsd_to_linux_errno[ELAST + 1] = {
142 	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
143 	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
144 	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
145 	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
146 	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
147 	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
148 	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
149 	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
150 	  -6,  -6, -43, -42, -75,-125, -84, -95, -16, -74,
151 	 -72, -67, -71
152 };
153 
154 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
155 	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
156 	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
157 	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
158 	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
159 	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
160 	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
161 	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
162 	0, LINUX_SIGUSR1, LINUX_SIGUSR2
163 };
164 
165 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
166 	SIGHUP, SIGINT, SIGQUIT, SIGILL,
167 	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
168 	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
169 	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
170 	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
171 	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
172 	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
173 	SIGIO, SIGURG, SIGSYS
174 };
175 
176 #define LINUX_T_UNKNOWN  255
177 static int _bsd_to_linux_trapcode[] = {
178 	LINUX_T_UNKNOWN,	/* 0 */
179 	6,			/* 1  T_PRIVINFLT */
180 	LINUX_T_UNKNOWN,	/* 2 */
181 	3,			/* 3  T_BPTFLT */
182 	LINUX_T_UNKNOWN,	/* 4 */
183 	LINUX_T_UNKNOWN,	/* 5 */
184 	16,			/* 6  T_ARITHTRAP */
185 	254,			/* 7  T_ASTFLT */
186 	LINUX_T_UNKNOWN,	/* 8 */
187 	13,			/* 9  T_PROTFLT */
188 	1,			/* 10 T_TRCTRAP */
189 	LINUX_T_UNKNOWN,	/* 11 */
190 	14,			/* 12 T_PAGEFLT */
191 	LINUX_T_UNKNOWN,	/* 13 */
192 	17,			/* 14 T_ALIGNFLT */
193 	LINUX_T_UNKNOWN,	/* 15 */
194 	LINUX_T_UNKNOWN,	/* 16 */
195 	LINUX_T_UNKNOWN,	/* 17 */
196 	0,			/* 18 T_DIVIDE */
197 	2,			/* 19 T_NMI */
198 	4,			/* 20 T_OFLOW */
199 	5,			/* 21 T_BOUND */
200 	7,			/* 22 T_DNA */
201 	8,			/* 23 T_DOUBLEFLT */
202 	9,			/* 24 T_FPOPFLT */
203 	10,			/* 25 T_TSSFLT */
204 	11,			/* 26 T_SEGNPFLT */
205 	12,			/* 27 T_STKFLT */
206 	18,			/* 28 T_MCHK */
207 	19,			/* 29 T_XMMFLT */
208 	15			/* 30 T_RESERVED */
209 };
210 #define bsd_to_linux_trapcode(code) \
211     ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
212      _bsd_to_linux_trapcode[(code)]: \
213      LINUX_T_UNKNOWN)
214 
215 struct linux32_ps_strings {
216 	u_int32_t ps_argvstr;	/* first of 0 or more argument strings */
217 	u_int ps_nargvstr;	/* the number of argument strings */
218 	u_int32_t ps_envstr;	/* first of 0 or more environment strings */
219 	u_int ps_nenvstr;	/* the number of environment strings */
220 };
221 
222 /*
223  * If FreeBSD & Linux have a difference of opinion about what a trap
224  * means, deal with it here.
225  *
226  * MPSAFE
227  */
228 static int
229 translate_traps(int signal, int trap_code)
230 {
231 	if (signal != SIGBUS)
232 		return signal;
233 	switch (trap_code) {
234 	case T_PROTFLT:
235 	case T_TSSFLT:
236 	case T_DOUBLEFLT:
237 	case T_PAGEFLT:
238 		return SIGSEGV;
239 	default:
240 		return signal;
241 	}
242 }
243 
244 static int
245 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
246 {
247 	Elf32_Auxargs *args;
248 	Elf32_Addr *base;
249 	Elf32_Addr *pos;
250 
251 	KASSERT(curthread->td_proc == imgp->proc,
252 	    ("unsafe elf_linux_fixup(), should be curproc"));
253 	base = (Elf32_Addr *)*stack_base;
254 	args = (Elf32_Auxargs *)imgp->auxargs;
255 	pos = base + (imgp->args->argc + imgp->args->envc + 2);
256 
257 	if (args->execfd != -1)
258 		AUXARGS_ENTRY_32(pos, AT_EXECFD, args->execfd);
259 	AUXARGS_ENTRY_32(pos, AT_PHDR, args->phdr);
260 	AUXARGS_ENTRY_32(pos, AT_PHENT, args->phent);
261 	AUXARGS_ENTRY_32(pos, AT_PHNUM, args->phnum);
262 	AUXARGS_ENTRY_32(pos, AT_PAGESZ, args->pagesz);
263 	AUXARGS_ENTRY_32(pos, AT_FLAGS, args->flags);
264 	AUXARGS_ENTRY_32(pos, AT_ENTRY, args->entry);
265 	AUXARGS_ENTRY_32(pos, AT_BASE, args->base);
266 	AUXARGS_ENTRY_32(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
267 	AUXARGS_ENTRY_32(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
268 	AUXARGS_ENTRY_32(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
269 	AUXARGS_ENTRY_32(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
270 	AUXARGS_ENTRY_32(pos, AT_NULL, 0);
271 
272 	free(imgp->auxargs, M_TEMP);
273 	imgp->auxargs = NULL;
274 
275 	base--;
276 	suword32(base, (uint32_t)imgp->args->argc);
277 	*stack_base = (register_t *)base;
278 	return 0;
279 }
280 
281 extern int _ucodesel, _ucode32sel, _udatasel;
282 extern unsigned long linux_sznonrtsigcode;
283 
284 static void
285 linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
286 {
287 	struct thread *td = curthread;
288 	struct proc *p = td->td_proc;
289 	struct sigacts *psp;
290 	struct trapframe *regs;
291 	struct l_rt_sigframe *fp, frame;
292 	int oonstack;
293 	int sig;
294 	int code;
295 
296 	sig = ksi->ksi_signo;
297 	code = ksi->ksi_code;
298 	PROC_LOCK_ASSERT(p, MA_OWNED);
299 	psp = p->p_sigacts;
300 	mtx_assert(&psp->ps_mtx, MA_OWNED);
301 	regs = td->td_frame;
302 	oonstack = sigonstack(regs->tf_rsp);
303 
304 #ifdef DEBUG
305 	if (ldebug(rt_sendsig))
306 		printf(ARGS(rt_sendsig, "%p, %d, %p, %u"),
307 		    catcher, sig, (void*)mask, code);
308 #endif
309 	/*
310 	 * Allocate space for the signal handler context.
311 	 */
312 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
313 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
314 		fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
315 		    td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
316 	} else
317 		fp = (struct l_rt_sigframe *)regs->tf_rsp - 1;
318 	mtx_unlock(&psp->ps_mtx);
319 
320 	/*
321 	 * Build the argument list for the signal handler.
322 	 */
323 	if (p->p_sysent->sv_sigtbl)
324 		if (sig <= p->p_sysent->sv_sigsize)
325 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
326 
327 	bzero(&frame, sizeof(frame));
328 
329 	frame.sf_handler = PTROUT(catcher);
330 	frame.sf_sig = sig;
331 	frame.sf_siginfo = PTROUT(&fp->sf_si);
332 	frame.sf_ucontext = PTROUT(&fp->sf_sc);
333 
334 	/* Fill in POSIX parts */
335 	ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig);
336 
337 	/*
338 	 * Build the signal context to be used by sigreturn.
339 	 */
340 	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
341 	frame.sf_sc.uc_link = 0;		/* XXX ??? */
342 
343 	frame.sf_sc.uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp);
344 	frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
345 	frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
346 	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
347 	PROC_UNLOCK(p);
348 
349 	bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
350 
351 	frame.sf_sc.uc_mcontext.sc_mask	= frame.sf_sc.uc_sigmask.__bits[0];
352 	frame.sf_sc.uc_mcontext.sc_gs     = rgs();
353 	frame.sf_sc.uc_mcontext.sc_fs     = rfs();
354 	__asm __volatile("mov %%es,%0" :
355 	    "=rm" (frame.sf_sc.uc_mcontext.sc_es));
356 	__asm __volatile("mov %%ds,%0" :
357 	    "=rm" (frame.sf_sc.uc_mcontext.sc_ds));
358 	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_rdi;
359 	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_rsi;
360 	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_rbp;
361 	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_rbx;
362 	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_rdx;
363 	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_rcx;
364 	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_rax;
365 	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_rip;
366 	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
367 	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_rflags;
368 	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_rsp;
369 	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
370 	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
371 	frame.sf_sc.uc_mcontext.sc_cr2    = (u_int32_t)(uintptr_t)ksi->ksi_addr;
372 	frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
373 
374 #ifdef DEBUG
375 	if (ldebug(rt_sendsig))
376 		printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
377 		    frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
378 		    td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
379 #endif
380 
381 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
382 		/*
383 		 * Process has trashed its stack; give it an illegal
384 		 * instruction to halt it in its tracks.
385 		 */
386 #ifdef DEBUG
387 		if (ldebug(rt_sendsig))
388 			printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
389 			    fp, oonstack);
390 #endif
391 		PROC_LOCK(p);
392 		sigexit(td, SIGILL);
393 	}
394 
395 	/*
396 	 * Build context to run handler in.
397 	 */
398 	regs->tf_rsp = PTROUT(fp);
399 	regs->tf_rip = LINUX32_PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
400 	    linux_sznonrtsigcode;
401 	regs->tf_rflags &= ~(PSL_T | PSL_D);
402 	regs->tf_cs = _ucode32sel;
403 	regs->tf_ss = _udatasel;
404 	load_ds(_udatasel);
405 	td->td_pcb->pcb_ds = _udatasel;
406 	load_es(_udatasel);
407 	td->td_pcb->pcb_es = _udatasel;
408 	/* leave user %fs and %gs untouched */
409 	PROC_LOCK(p);
410 	mtx_lock(&psp->ps_mtx);
411 }
412 
413 
414 /*
415  * Send an interrupt to process.
416  *
417  * Stack is set up to allow sigcode stored
418  * in u. to call routine, followed by kcall
419  * to sigreturn routine below.  After sigreturn
420  * resets the signal mask, the stack, and the
421  * frame pointer, it returns to the user
422  * specified pc, psl.
423  */
424 static void
425 linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
426 {
427 	struct thread *td = curthread;
428 	struct proc *p = td->td_proc;
429 	struct sigacts *psp;
430 	struct trapframe *regs;
431 	struct l_sigframe *fp, frame;
432 	l_sigset_t lmask;
433 	int oonstack, i;
434 	int sig, code;
435 
436 	sig = ksi->ksi_signo;
437 	code = ksi->ksi_code;
438 	PROC_LOCK_ASSERT(p, MA_OWNED);
439 	psp = p->p_sigacts;
440 	mtx_assert(&psp->ps_mtx, MA_OWNED);
441 	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
442 		/* Signal handler installed with SA_SIGINFO. */
443 		linux_rt_sendsig(catcher, ksi, mask);
444 		return;
445 	}
446 
447 	regs = td->td_frame;
448 	oonstack = sigonstack(regs->tf_rsp);
449 
450 #ifdef DEBUG
451 	if (ldebug(sendsig))
452 		printf(ARGS(sendsig, "%p, %d, %p, %u"),
453 		    catcher, sig, (void*)mask, code);
454 #endif
455 
456 	/*
457 	 * Allocate space for the signal handler context.
458 	 */
459 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
460 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
461 		fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
462 		    td->td_sigstk.ss_size - sizeof(struct l_sigframe));
463 	} else
464 		fp = (struct l_sigframe *)regs->tf_rsp - 1;
465 	mtx_unlock(&psp->ps_mtx);
466 	PROC_UNLOCK(p);
467 
468 	/*
469 	 * Build the argument list for the signal handler.
470 	 */
471 	if (p->p_sysent->sv_sigtbl)
472 		if (sig <= p->p_sysent->sv_sigsize)
473 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
474 
475 	bzero(&frame, sizeof(frame));
476 
477 	frame.sf_handler = PTROUT(catcher);
478 	frame.sf_sig = sig;
479 
480 	bsd_to_linux_sigset(mask, &lmask);
481 
482 	/*
483 	 * Build the signal context to be used by sigreturn.
484 	 */
485 	frame.sf_sc.sc_mask   = lmask.__bits[0];
486 	frame.sf_sc.sc_gs     = rgs();
487 	frame.sf_sc.sc_fs     = rfs();
488 	__asm __volatile("mov %%es,%0" : "=rm" (frame.sf_sc.sc_es));
489 	__asm __volatile("mov %%ds,%0" : "=rm" (frame.sf_sc.sc_ds));
490 	frame.sf_sc.sc_edi    = regs->tf_rdi;
491 	frame.sf_sc.sc_esi    = regs->tf_rsi;
492 	frame.sf_sc.sc_ebp    = regs->tf_rbp;
493 	frame.sf_sc.sc_ebx    = regs->tf_rbx;
494 	frame.sf_sc.sc_edx    = regs->tf_rdx;
495 	frame.sf_sc.sc_ecx    = regs->tf_rcx;
496 	frame.sf_sc.sc_eax    = regs->tf_rax;
497 	frame.sf_sc.sc_eip    = regs->tf_rip;
498 	frame.sf_sc.sc_cs     = regs->tf_cs;
499 	frame.sf_sc.sc_eflags = regs->tf_rflags;
500 	frame.sf_sc.sc_esp_at_signal = regs->tf_rsp;
501 	frame.sf_sc.sc_ss     = regs->tf_ss;
502 	frame.sf_sc.sc_err    = regs->tf_err;
503 	frame.sf_sc.sc_cr2    = (u_int32_t)(uintptr_t)ksi->ksi_addr;
504 	frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code);
505 
506 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
507 		frame.sf_extramask[i] = lmask.__bits[i+1];
508 
509 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
510 		/*
511 		 * Process has trashed its stack; give it an illegal
512 		 * instruction to halt it in its tracks.
513 		 */
514 		PROC_LOCK(p);
515 		sigexit(td, SIGILL);
516 	}
517 
518 	/*
519 	 * Build context to run handler in.
520 	 */
521 	regs->tf_rsp = PTROUT(fp);
522 	regs->tf_rip = LINUX32_PS_STRINGS - *(p->p_sysent->sv_szsigcode);
523 	regs->tf_rflags &= ~(PSL_T | PSL_D);
524 	regs->tf_cs = _ucode32sel;
525 	regs->tf_ss = _udatasel;
526 	load_ds(_udatasel);
527 	td->td_pcb->pcb_ds = _udatasel;
528 	load_es(_udatasel);
529 	td->td_pcb->pcb_es = _udatasel;
530 	/* leave user %fs and %gs untouched */
531 	PROC_LOCK(p);
532 	mtx_lock(&psp->ps_mtx);
533 }
534 
535 /*
536  * System call to cleanup state after a signal
537  * has been taken.  Reset signal mask and
538  * stack state from context left by sendsig (above).
539  * Return to previous pc and psl as specified by
540  * context left by sendsig. Check carefully to
541  * make sure that the user has not modified the
542  * psl to gain improper privileges or to cause
543  * a machine fault.
544  */
545 int
546 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
547 {
548 	struct proc *p = td->td_proc;
549 	struct l_sigframe frame;
550 	struct trapframe *regs;
551 	l_sigset_t lmask;
552 	int eflags, i;
553 	ksiginfo_t ksi;
554 
555 	regs = td->td_frame;
556 
557 #ifdef DEBUG
558 	if (ldebug(sigreturn))
559 		printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
560 #endif
561 	/*
562 	 * The trampoline code hands us the sigframe.
563 	 * It is unsafe to keep track of it ourselves, in the event that a
564 	 * program jumps out of a signal handler.
565 	 */
566 	if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
567 		return (EFAULT);
568 
569 	/*
570 	 * Check for security violations.
571 	 */
572 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
573 	eflags = frame.sf_sc.sc_eflags;
574 	/*
575 	 * XXX do allow users to change the privileged flag PSL_RF.  The
576 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
577 	 * sometimes set it there too.  tf_eflags is kept in the signal
578 	 * context during signal handling and there is no other place
579 	 * to remember it, so the PSL_RF bit may be corrupted by the
580 	 * signal handler without us knowing.  Corruption of the PSL_RF
581 	 * bit at worst causes one more or one less debugger trap, so
582 	 * allowing it is fairly harmless.
583 	 */
584 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF))
585 		return(EINVAL);
586 
587 	/*
588 	 * Don't allow users to load a valid privileged %cs.  Let the
589 	 * hardware check for invalid selectors, excess privilege in
590 	 * other selectors, invalid %eip's and invalid %esp's.
591 	 */
592 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
593 	if (!CS_SECURE(frame.sf_sc.sc_cs)) {
594 		ksiginfo_init_trap(&ksi);
595 		ksi.ksi_signo = SIGBUS;
596 		ksi.ksi_code = BUS_OBJERR;
597 		ksi.ksi_trapno = T_PROTFLT;
598 		ksi.ksi_addr = (void *)regs->tf_rip;
599 		trapsignal(td, &ksi);
600 		return(EINVAL);
601 	}
602 
603 	lmask.__bits[0] = frame.sf_sc.sc_mask;
604 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
605 		lmask.__bits[i+1] = frame.sf_extramask[i];
606 	PROC_LOCK(p);
607 	linux_to_bsd_sigset(&lmask, &td->td_sigmask);
608 	SIG_CANTMASK(td->td_sigmask);
609 	signotify(td);
610 	PROC_UNLOCK(p);
611 
612 	/*
613 	 * Restore signal context.
614 	 */
615 	/* Selectors were restored by the trampoline. */
616 	regs->tf_rdi    = frame.sf_sc.sc_edi;
617 	regs->tf_rsi    = frame.sf_sc.sc_esi;
618 	regs->tf_rbp    = frame.sf_sc.sc_ebp;
619 	regs->tf_rbx    = frame.sf_sc.sc_ebx;
620 	regs->tf_rdx    = frame.sf_sc.sc_edx;
621 	regs->tf_rcx    = frame.sf_sc.sc_ecx;
622 	regs->tf_rax    = frame.sf_sc.sc_eax;
623 	regs->tf_rip    = frame.sf_sc.sc_eip;
624 	regs->tf_cs     = frame.sf_sc.sc_cs;
625 	regs->tf_rflags = eflags;
626 	regs->tf_rsp    = frame.sf_sc.sc_esp_at_signal;
627 	regs->tf_ss     = frame.sf_sc.sc_ss;
628 
629 	return (EJUSTRETURN);
630 }
631 
632 /*
633  * System call to cleanup state after a signal
634  * has been taken.  Reset signal mask and
635  * stack state from context left by rt_sendsig (above).
636  * Return to previous pc and psl as specified by
637  * context left by sendsig. Check carefully to
638  * make sure that the user has not modified the
639  * psl to gain improper privileges or to cause
640  * a machine fault.
641  */
642 int
643 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
644 {
645 	struct proc *p = td->td_proc;
646 	struct l_ucontext uc;
647 	struct l_sigcontext *context;
648 	l_stack_t *lss;
649 	stack_t ss;
650 	struct trapframe *regs;
651 	int eflags;
652 	ksiginfo_t ksi;
653 
654 	regs = td->td_frame;
655 
656 #ifdef DEBUG
657 	if (ldebug(rt_sigreturn))
658 		printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
659 #endif
660 	/*
661 	 * The trampoline code hands us the ucontext.
662 	 * It is unsafe to keep track of it ourselves, in the event that a
663 	 * program jumps out of a signal handler.
664 	 */
665 	if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
666 		return (EFAULT);
667 
668 	context = &uc.uc_mcontext;
669 
670 	/*
671 	 * Check for security violations.
672 	 */
673 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
674 	eflags = context->sc_eflags;
675 	/*
676 	 * XXX do allow users to change the privileged flag PSL_RF.  The
677 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
678 	 * sometimes set it there too.  tf_eflags is kept in the signal
679 	 * context during signal handling and there is no other place
680 	 * to remember it, so the PSL_RF bit may be corrupted by the
681 	 * signal handler without us knowing.  Corruption of the PSL_RF
682 	 * bit at worst causes one more or one less debugger trap, so
683 	 * allowing it is fairly harmless.
684 	 */
685 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF))
686 		return(EINVAL);
687 
688 	/*
689 	 * Don't allow users to load a valid privileged %cs.  Let the
690 	 * hardware check for invalid selectors, excess privilege in
691 	 * other selectors, invalid %eip's and invalid %esp's.
692 	 */
693 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
694 	if (!CS_SECURE(context->sc_cs)) {
695 		ksiginfo_init_trap(&ksi);
696 		ksi.ksi_signo = SIGBUS;
697 		ksi.ksi_code = BUS_OBJERR;
698 		ksi.ksi_trapno = T_PROTFLT;
699 		ksi.ksi_addr = (void *)regs->tf_rip;
700 		trapsignal(td, &ksi);
701 		return(EINVAL);
702 	}
703 
704 	PROC_LOCK(p);
705 	linux_to_bsd_sigset(&uc.uc_sigmask, &td->td_sigmask);
706 	SIG_CANTMASK(td->td_sigmask);
707 	signotify(td);
708 	PROC_UNLOCK(p);
709 
710 	/*
711 	 * Restore signal context
712 	 */
713 	/* Selectors were restored by the trampoline. */
714 	regs->tf_rdi    = context->sc_edi;
715 	regs->tf_rsi    = context->sc_esi;
716 	regs->tf_rbp    = context->sc_ebp;
717 	regs->tf_rbx    = context->sc_ebx;
718 	regs->tf_rdx    = context->sc_edx;
719 	regs->tf_rcx    = context->sc_ecx;
720 	regs->tf_rax    = context->sc_eax;
721 	regs->tf_rip    = context->sc_eip;
722 	regs->tf_cs     = context->sc_cs;
723 	regs->tf_rflags = eflags;
724 	regs->tf_rsp    = context->sc_esp_at_signal;
725 	regs->tf_ss     = context->sc_ss;
726 
727 	/*
728 	 * call sigaltstack & ignore results..
729 	 */
730 	lss = &uc.uc_stack;
731 	ss.ss_sp = PTRIN(lss->ss_sp);
732 	ss.ss_size = lss->ss_size;
733 	ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
734 
735 #ifdef DEBUG
736 	if (ldebug(rt_sigreturn))
737 		printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
738 		    ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
739 #endif
740 	(void)kern_sigaltstack(td, &ss, NULL);
741 
742 	return (EJUSTRETURN);
743 }
744 
745 /*
746  * MPSAFE
747  */
748 static void
749 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
750 {
751 	args[0] = tf->tf_rbx;
752 	args[1] = tf->tf_rcx;
753 	args[2] = tf->tf_rdx;
754 	args[3] = tf->tf_rsi;
755 	args[4] = tf->tf_rdi;
756 	args[5] = tf->tf_rbp;	/* Unconfirmed */
757 	*params = NULL;		/* no copyin */
758 }
759 
760 /*
761  * If a linux binary is exec'ing something, try this image activator
762  * first.  We override standard shell script execution in order to
763  * be able to modify the interpreter path.  We only do this if a linux
764  * binary is doing the exec, so we do not create an EXEC module for it.
765  */
766 static int	exec_linux_imgact_try(struct image_params *iparams);
767 
768 static int
769 exec_linux_imgact_try(struct image_params *imgp)
770 {
771 	const char *head = (const char *)imgp->image_header;
772 	char *rpath;
773 	int error = -1, len;
774 
775 	/*
776 	* The interpreter for shell scripts run from a linux binary needs
777 	* to be located in /compat/linux if possible in order to recursively
778 	* maintain linux path emulation.
779 	*/
780 	if (((const short *)head)[0] == SHELLMAGIC) {
781 		/*
782 		* Run our normal shell image activator.  If it succeeds attempt
783 		* to use the alternate path for the interpreter.  If an
784 		* alternate * path is found, use our stringspace to store it.
785 		*/
786 		if ((error = exec_shell_imgact(imgp)) == 0) {
787 			linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
788 			    imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0,
789 			    AT_FDCWD);
790 			if (rpath != NULL) {
791 				len = strlen(rpath) + 1;
792 
793 				if (len <= MAXSHELLCMDLEN) {
794 					memcpy(imgp->interpreter_name, rpath,
795 					    len);
796 				}
797 				free(rpath, M_TEMP);
798 			}
799 		}
800 	}
801 	return(error);
802 }
803 
804 /*
805  * Clear registers on exec
806  * XXX copied from ia32_signal.c.
807  */
808 static void
809 exec_linux_setregs(td, entry, stack, ps_strings)
810 	struct thread *td;
811 	u_long entry;
812 	u_long stack;
813 	u_long ps_strings;
814 {
815 	struct trapframe *regs = td->td_frame;
816 	struct pcb *pcb = td->td_pcb;
817 
818 	critical_enter();
819 	wrmsr(MSR_FSBASE, 0);
820 	wrmsr(MSR_KGSBASE, 0);	/* User value while we're in the kernel */
821 	pcb->pcb_fsbase = 0;
822 	pcb->pcb_gsbase = 0;
823 	critical_exit();
824 	load_ds(_udatasel);
825 	load_es(_udatasel);
826 	load_fs(_udatasel);
827 	load_gs(_udatasel);
828 	pcb->pcb_ds = _udatasel;
829 	pcb->pcb_es = _udatasel;
830 	pcb->pcb_fs = _udatasel;
831 	pcb->pcb_gs = _udatasel;
832 
833 	bzero((char *)regs, sizeof(struct trapframe));
834 	regs->tf_rip = entry;
835 	regs->tf_rsp = stack;
836 	regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T);
837 	regs->tf_ss = _udatasel;
838 	regs->tf_cs = _ucode32sel;
839 	regs->tf_rbx = ps_strings;
840 	load_cr0(rcr0() | CR0_MP | CR0_TS);
841 	fpstate_drop(td);
842 
843 	/* Return via doreti so that we can change to a different %cs */
844 	pcb->pcb_flags |= PCB_FULLCTX | PCB_32BIT;
845 	pcb->pcb_flags &= ~PCB_GS32BIT;
846 	td->td_retval[1] = 0;
847 }
848 
849 /*
850  * XXX copied from ia32_sysvec.c.
851  */
852 static register_t *
853 linux_copyout_strings(struct image_params *imgp)
854 {
855 	int argc, envc;
856 	u_int32_t *vectp;
857 	char *stringp, *destp;
858 	u_int32_t *stack_base;
859 	struct linux32_ps_strings *arginfo;
860 	int sigcodesz;
861 
862 	/*
863 	 * Calculate string base and vector table pointers.
864 	 * Also deal with signal trampoline code for this exec type.
865 	 */
866 	arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS;
867 	sigcodesz = *(imgp->proc->p_sysent->sv_szsigcode);
868 	destp =	(caddr_t)arginfo - sigcodesz - SPARE_USRSPACE -
869 	    roundup((ARG_MAX - imgp->args->stringspace), sizeof(char *));
870 
871 	/*
872 	 * install sigcode
873 	 */
874 	if (sigcodesz)
875 		copyout(imgp->proc->p_sysent->sv_sigcode,
876 			((caddr_t)arginfo - sigcodesz), sigcodesz);
877 
878 	/*
879 	 * If we have a valid auxargs ptr, prepare some room
880 	 * on the stack.
881 	 */
882 	if (imgp->auxargs) {
883 		/*
884 		 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
885 		 * lower compatibility.
886 		 */
887 		imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size :
888 		    (AT_COUNT * 2);
889 		/*
890 		 * The '+ 2' is for the null pointers at the end of each of
891 		 * the arg and env vector sets,and imgp->auxarg_size is room
892 		 * for argument of Runtime loader.
893 		 */
894 		vectp = (u_int32_t *) (destp - (imgp->args->argc +
895 		    imgp->args->envc + 2 + imgp->auxarg_size) *
896 		    sizeof(u_int32_t));
897 
898 	} else
899 		/*
900 		 * The '+ 2' is for the null pointers at the end of each of
901 		 * the arg and env vector sets
902 		 */
903 		vectp = (u_int32_t *)(destp - (imgp->args->argc +
904 		    imgp->args->envc + 2) * sizeof(u_int32_t));
905 
906 	/*
907 	 * vectp also becomes our initial stack base
908 	 */
909 	stack_base = vectp;
910 
911 	stringp = imgp->args->begin_argv;
912 	argc = imgp->args->argc;
913 	envc = imgp->args->envc;
914 	/*
915 	 * Copy out strings - arguments and environment.
916 	 */
917 	copyout(stringp, destp, ARG_MAX - imgp->args->stringspace);
918 
919 	/*
920 	 * Fill in "ps_strings" struct for ps, w, etc.
921 	 */
922 	suword32(&arginfo->ps_argvstr, (u_int32_t)(intptr_t)vectp);
923 	suword32(&arginfo->ps_nargvstr, argc);
924 
925 	/*
926 	 * Fill in argument portion of vector table.
927 	 */
928 	for (; argc > 0; --argc) {
929 		suword32(vectp++, (u_int32_t)(intptr_t)destp);
930 		while (*stringp++ != 0)
931 			destp++;
932 		destp++;
933 	}
934 
935 	/* a null vector table pointer separates the argp's from the envp's */
936 	suword32(vectp++, 0);
937 
938 	suword32(&arginfo->ps_envstr, (u_int32_t)(intptr_t)vectp);
939 	suword32(&arginfo->ps_nenvstr, envc);
940 
941 	/*
942 	 * Fill in environment portion of vector table.
943 	 */
944 	for (; envc > 0; --envc) {
945 		suword32(vectp++, (u_int32_t)(intptr_t)destp);
946 		while (*stringp++ != 0)
947 			destp++;
948 		destp++;
949 	}
950 
951 	/* end of vector table is a null pointer */
952 	suword32(vectp, 0);
953 
954 	return ((register_t *)stack_base);
955 }
956 
957 SYSCTL_NODE(_compat, OID_AUTO, linux32, CTLFLAG_RW, 0,
958     "32-bit Linux emulation");
959 
960 static u_long	linux32_maxdsiz = LINUX32_MAXDSIZ;
961 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxdsiz, CTLFLAG_RW,
962     &linux32_maxdsiz, 0, "");
963 static u_long	linux32_maxssiz = LINUX32_MAXSSIZ;
964 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxssiz, CTLFLAG_RW,
965     &linux32_maxssiz, 0, "");
966 static u_long	linux32_maxvmem = LINUX32_MAXVMEM;
967 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxvmem, CTLFLAG_RW,
968     &linux32_maxvmem, 0, "");
969 
970 static void
971 linux32_fixlimit(struct rlimit *rl, int which)
972 {
973 
974 	switch (which) {
975 	case RLIMIT_DATA:
976 		if (linux32_maxdsiz != 0) {
977 			if (rl->rlim_cur > linux32_maxdsiz)
978 				rl->rlim_cur = linux32_maxdsiz;
979 			if (rl->rlim_max > linux32_maxdsiz)
980 				rl->rlim_max = linux32_maxdsiz;
981 		}
982 		break;
983 	case RLIMIT_STACK:
984 		if (linux32_maxssiz != 0) {
985 			if (rl->rlim_cur > linux32_maxssiz)
986 				rl->rlim_cur = linux32_maxssiz;
987 			if (rl->rlim_max > linux32_maxssiz)
988 				rl->rlim_max = linux32_maxssiz;
989 		}
990 		break;
991 	case RLIMIT_VMEM:
992 		if (linux32_maxvmem != 0) {
993 			if (rl->rlim_cur > linux32_maxvmem)
994 				rl->rlim_cur = linux32_maxvmem;
995 			if (rl->rlim_max > linux32_maxvmem)
996 				rl->rlim_max = linux32_maxvmem;
997 		}
998 		break;
999 	}
1000 }
1001 
1002 struct sysentvec elf_linux_sysvec = {
1003 	.sv_size	= LINUX_SYS_MAXSYSCALL,
1004 	.sv_table	= linux_sysent,
1005 	.sv_mask	= 0,
1006 	.sv_sigsize	= LINUX_SIGTBLSZ,
1007 	.sv_sigtbl	= bsd_to_linux_signal,
1008 	.sv_errsize	= ELAST + 1,
1009 	.sv_errtbl	= bsd_to_linux_errno,
1010 	.sv_transtrap	= translate_traps,
1011 	.sv_fixup	= elf_linux_fixup,
1012 	.sv_sendsig	= linux_sendsig,
1013 	.sv_sigcode	= linux_sigcode,
1014 	.sv_szsigcode	= &linux_szsigcode,
1015 	.sv_prepsyscall	= linux_prepsyscall,
1016 	.sv_name	= "Linux ELF32",
1017 	.sv_coredump	= elf32_coredump,
1018 	.sv_imgact_try	= exec_linux_imgact_try,
1019 	.sv_minsigstksz	= LINUX_MINSIGSTKSZ,
1020 	.sv_pagesize	= PAGE_SIZE,
1021 	.sv_minuser	= VM_MIN_ADDRESS,
1022 	.sv_maxuser	= LINUX32_USRSTACK,
1023 	.sv_usrstack	= LINUX32_USRSTACK,
1024 	.sv_psstrings	= LINUX32_PS_STRINGS,
1025 	.sv_stackprot	= VM_PROT_ALL,
1026 	.sv_copyout_strings = linux_copyout_strings,
1027 	.sv_setregs	= exec_linux_setregs,
1028 	.sv_fixlimit	= linux32_fixlimit,
1029 	.sv_maxssiz	= &linux32_maxssiz,
1030 	.sv_flags	= SV_ABI_LINUX | SV_ILP32 | SV_IA32
1031 };
1032 
1033 static Elf32_Brandinfo linux_brand = {
1034 	.brand		= ELFOSABI_LINUX,
1035 	.machine	= EM_386,
1036 	.compat_3_brand	= "Linux",
1037 	.emul_path	= "/compat/linux",
1038 	.interp_path	= "/lib/ld-linux.so.1",
1039 	.sysvec		= &elf_linux_sysvec,
1040 	.interp_newpath	= NULL,
1041 	.flags		= BI_CAN_EXEC_DYN,
1042 };
1043 
1044 static Elf32_Brandinfo linux_glibc2brand = {
1045 	.brand		= ELFOSABI_LINUX,
1046 	.machine	= EM_386,
1047 	.compat_3_brand	= "Linux",
1048 	.emul_path	= "/compat/linux",
1049 	.interp_path	= "/lib/ld-linux.so.2",
1050 	.sysvec		= &elf_linux_sysvec,
1051 	.interp_newpath	= NULL,
1052 	.flags		= BI_CAN_EXEC_DYN,
1053 };
1054 
1055 Elf32_Brandinfo *linux_brandlist[] = {
1056 	&linux_brand,
1057 	&linux_glibc2brand,
1058 	NULL
1059 };
1060 
1061 static int
1062 linux_elf_modevent(module_t mod, int type, void *data)
1063 {
1064 	Elf32_Brandinfo **brandinfo;
1065 	int error;
1066 	struct linux_ioctl_handler **lihp;
1067 	struct linux_device_handler **ldhp;
1068 
1069 	error = 0;
1070 
1071 	switch(type) {
1072 	case MOD_LOAD:
1073 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1074 		     ++brandinfo)
1075 			if (elf32_insert_brand_entry(*brandinfo) < 0)
1076 				error = EINVAL;
1077 		if (error == 0) {
1078 			SET_FOREACH(lihp, linux_ioctl_handler_set)
1079 				linux_ioctl_register_handler(*lihp);
1080 			SET_FOREACH(ldhp, linux_device_handler_set)
1081 				linux_device_register_handler(*ldhp);
1082 			mtx_init(&emul_lock, "emuldata lock", NULL, MTX_DEF);
1083 			sx_init(&emul_shared_lock, "emuldata->shared lock");
1084 			LIST_INIT(&futex_list);
1085 			sx_init(&futex_sx, "futex protection lock");
1086 			linux_exit_tag = EVENTHANDLER_REGISTER(process_exit,
1087 			    linux_proc_exit, NULL, 1000);
1088 			linux_schedtail_tag = EVENTHANDLER_REGISTER(schedtail,
1089 			    linux_schedtail, NULL, 1000);
1090 			linux_exec_tag = EVENTHANDLER_REGISTER(process_exec,
1091 			    linux_proc_exec, NULL, 1000);
1092 			if (bootverbose)
1093 				printf("Linux ELF exec handler installed\n");
1094 		} else
1095 			printf("cannot insert Linux ELF brand handler\n");
1096 		break;
1097 	case MOD_UNLOAD:
1098 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1099 		     ++brandinfo)
1100 			if (elf32_brand_inuse(*brandinfo))
1101 				error = EBUSY;
1102 		if (error == 0) {
1103 			for (brandinfo = &linux_brandlist[0];
1104 			     *brandinfo != NULL; ++brandinfo)
1105 				if (elf32_remove_brand_entry(*brandinfo) < 0)
1106 					error = EINVAL;
1107 		}
1108 		if (error == 0) {
1109 			SET_FOREACH(lihp, linux_ioctl_handler_set)
1110 				linux_ioctl_unregister_handler(*lihp);
1111 			SET_FOREACH(ldhp, linux_device_handler_set)
1112 				linux_device_unregister_handler(*ldhp);
1113 			mtx_destroy(&emul_lock);
1114 			sx_destroy(&emul_shared_lock);
1115 			sx_destroy(&futex_sx);
1116 			EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag);
1117 			EVENTHANDLER_DEREGISTER(schedtail, linux_schedtail_tag);
1118 			EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag);
1119 			if (bootverbose)
1120 				printf("Linux ELF exec handler removed\n");
1121 		} else
1122 			printf("Could not deinstall ELF interpreter entry\n");
1123 		break;
1124 	default:
1125 		return EOPNOTSUPP;
1126 	}
1127 	return error;
1128 }
1129 
1130 static moduledata_t linux_elf_mod = {
1131 	"linuxelf",
1132 	linux_elf_modevent,
1133 	0
1134 };
1135 
1136 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
1137