xref: /freebsd/sys/amd64/linux32/linux32_sysvec.c (revision d2b2128a286a00ee53d79cb88b4e59bf42525cf9)
1 /*-
2  * Copyright (c) 2004 Tim J. Robbins
3  * Copyright (c) 2003 Peter Wemm
4  * Copyright (c) 2002 Doug Rabson
5  * Copyright (c) 1998-1999 Andrew Gallatin
6  * Copyright (c) 1994-1996 S�ren Schmidt
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer
14  *    in this position and unchanged.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. The name of the author may not be used to endorse or promote products
19  *    derived from this software without specific prior written permission
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 #include <sys/cdefs.h>
34 __FBSDID("$FreeBSD$");
35 #include "opt_compat.h"
36 
37 #ifndef COMPAT_IA32
38 #error "Unable to compile Linux-emulator due to missing COMPAT_IA32 option!"
39 #endif
40 
41 #define	__ELF_WORD_SIZE	32
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/exec.h>
46 #include <sys/fcntl.h>
47 #include <sys/imgact.h>
48 #include <sys/imgact_elf.h>
49 #include <sys/kernel.h>
50 #include <sys/lock.h>
51 #include <sys/malloc.h>
52 #include <sys/module.h>
53 #include <sys/mutex.h>
54 #include <sys/proc.h>
55 #include <sys/resourcevar.h>
56 #include <sys/signalvar.h>
57 #include <sys/sysctl.h>
58 #include <sys/syscallsubr.h>
59 #include <sys/sysent.h>
60 #include <sys/sysproto.h>
61 #include <sys/vnode.h>
62 #include <sys/eventhandler.h>
63 
64 #include <vm/vm.h>
65 #include <vm/pmap.h>
66 #include <vm/vm_extern.h>
67 #include <vm/vm_map.h>
68 #include <vm/vm_object.h>
69 #include <vm/vm_page.h>
70 #include <vm/vm_param.h>
71 
72 #include <machine/cpu.h>
73 #include <machine/md_var.h>
74 #include <machine/pcb.h>
75 #include <machine/specialreg.h>
76 
77 #include <amd64/linux32/linux.h>
78 #include <amd64/linux32/linux32_proto.h>
79 #include <compat/linux/linux_emul.h>
80 #include <compat/linux/linux_mib.h>
81 #include <compat/linux/linux_misc.h>
82 #include <compat/linux/linux_signal.h>
83 #include <compat/linux/linux_util.h>
84 
85 MODULE_VERSION(linux, 1);
86 
87 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
88 
89 #define	AUXARGS_ENTRY_32(pos, id, val)	\
90 	do {				\
91 		suword32(pos++, id);	\
92 		suword32(pos++, val);	\
93 	} while (0)
94 
95 #if BYTE_ORDER == LITTLE_ENDIAN
96 #define SHELLMAGIC      0x2123 /* #! */
97 #else
98 #define SHELLMAGIC      0x2321
99 #endif
100 
101 /*
102  * Allow the sendsig functions to use the ldebug() facility
103  * even though they are not syscalls themselves. Map them
104  * to syscall 0. This is slightly less bogus than using
105  * ldebug(sigreturn).
106  */
107 #define	LINUX_SYS_linux_rt_sendsig	0
108 #define	LINUX_SYS_linux_sendsig		0
109 
110 const char *linux_platform = "i686";
111 static int linux_szplatform;
112 extern char linux_sigcode[];
113 extern int linux_szsigcode;
114 
115 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
116 
117 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
118 SET_DECLARE(linux_device_handler_set, struct linux_device_handler);
119 
120 static int	elf_linux_fixup(register_t **stack_base,
121 		    struct image_params *iparams);
122 static register_t *linux_copyout_strings(struct image_params *imgp);
123 static void	linux_prepsyscall(struct trapframe *tf, int *args, u_int *code,
124 		    caddr_t *params);
125 static void     linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
126 static void	exec_linux_setregs(struct thread *td, u_long entry,
127 				   u_long stack, u_long ps_strings);
128 static void	linux32_fixlimit(struct rlimit *rl, int which);
129 
130 extern LIST_HEAD(futex_list, futex) futex_list;
131 extern struct sx futex_sx;
132 
133 static eventhandler_tag linux_exit_tag;
134 static eventhandler_tag linux_schedtail_tag;
135 static eventhandler_tag linux_exec_tag;
136 
137 /*
138  * Linux syscalls return negative errno's, we do positive and map them
139  * Reference:
140  *   FreeBSD: src/sys/sys/errno.h
141  *   Linux:   linux-2.6.17.8/include/asm-generic/errno-base.h
142  *            linux-2.6.17.8/include/asm-generic/errno.h
143  */
144 static int bsd_to_linux_errno[ELAST + 1] = {
145 	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
146 	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
147 	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
148 	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
149 	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
150 	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
151 	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
152 	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
153 	  -6,  -6, -43, -42, -75,-125, -84, -95, -16, -74,
154 	 -72, -67, -71
155 };
156 
157 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
158 	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
159 	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
160 	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
161 	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
162 	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
163 	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
164 	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
165 	0, LINUX_SIGUSR1, LINUX_SIGUSR2
166 };
167 
168 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
169 	SIGHUP, SIGINT, SIGQUIT, SIGILL,
170 	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
171 	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
172 	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
173 	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
174 	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
175 	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
176 	SIGIO, SIGURG, SIGSYS
177 };
178 
179 #define LINUX_T_UNKNOWN  255
180 static int _bsd_to_linux_trapcode[] = {
181 	LINUX_T_UNKNOWN,	/* 0 */
182 	6,			/* 1  T_PRIVINFLT */
183 	LINUX_T_UNKNOWN,	/* 2 */
184 	3,			/* 3  T_BPTFLT */
185 	LINUX_T_UNKNOWN,	/* 4 */
186 	LINUX_T_UNKNOWN,	/* 5 */
187 	16,			/* 6  T_ARITHTRAP */
188 	254,			/* 7  T_ASTFLT */
189 	LINUX_T_UNKNOWN,	/* 8 */
190 	13,			/* 9  T_PROTFLT */
191 	1,			/* 10 T_TRCTRAP */
192 	LINUX_T_UNKNOWN,	/* 11 */
193 	14,			/* 12 T_PAGEFLT */
194 	LINUX_T_UNKNOWN,	/* 13 */
195 	17,			/* 14 T_ALIGNFLT */
196 	LINUX_T_UNKNOWN,	/* 15 */
197 	LINUX_T_UNKNOWN,	/* 16 */
198 	LINUX_T_UNKNOWN,	/* 17 */
199 	0,			/* 18 T_DIVIDE */
200 	2,			/* 19 T_NMI */
201 	4,			/* 20 T_OFLOW */
202 	5,			/* 21 T_BOUND */
203 	7,			/* 22 T_DNA */
204 	8,			/* 23 T_DOUBLEFLT */
205 	9,			/* 24 T_FPOPFLT */
206 	10,			/* 25 T_TSSFLT */
207 	11,			/* 26 T_SEGNPFLT */
208 	12,			/* 27 T_STKFLT */
209 	18,			/* 28 T_MCHK */
210 	19,			/* 29 T_XMMFLT */
211 	15			/* 30 T_RESERVED */
212 };
213 #define bsd_to_linux_trapcode(code) \
214     ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
215      _bsd_to_linux_trapcode[(code)]: \
216      LINUX_T_UNKNOWN)
217 
218 struct linux32_ps_strings {
219 	u_int32_t ps_argvstr;	/* first of 0 or more argument strings */
220 	u_int ps_nargvstr;	/* the number of argument strings */
221 	u_int32_t ps_envstr;	/* first of 0 or more environment strings */
222 	u_int ps_nenvstr;	/* the number of environment strings */
223 };
224 
225 /*
226  * If FreeBSD & Linux have a difference of opinion about what a trap
227  * means, deal with it here.
228  *
229  * MPSAFE
230  */
231 static int
232 translate_traps(int signal, int trap_code)
233 {
234 	if (signal != SIGBUS)
235 		return signal;
236 	switch (trap_code) {
237 	case T_PROTFLT:
238 	case T_TSSFLT:
239 	case T_DOUBLEFLT:
240 	case T_PAGEFLT:
241 		return SIGSEGV;
242 	default:
243 		return signal;
244 	}
245 }
246 
247 static int
248 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
249 {
250 	Elf32_Auxargs *args;
251 	Elf32_Addr *base;
252 	Elf32_Addr *pos, *uplatform;
253 	struct linux32_ps_strings *arginfo;
254 
255 	arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS;
256 	uplatform = (Elf32_Addr *)((caddr_t)arginfo - linux_szsigcode -
257 	    linux_szplatform);
258 
259 	KASSERT(curthread->td_proc == imgp->proc,
260 	    ("unsafe elf_linux_fixup(), should be curproc"));
261 	base = (Elf32_Addr *)*stack_base;
262 	args = (Elf32_Auxargs *)imgp->auxargs;
263 	pos = base + (imgp->args->argc + imgp->args->envc + 2);
264 
265 	AUXARGS_ENTRY_32(pos, LINUX_AT_HWCAP, cpu_feature);
266 	AUXARGS_ENTRY_32(pos, LINUX_AT_CLKTCK, hz);
267 	AUXARGS_ENTRY_32(pos, AT_PHDR, args->phdr);
268 	AUXARGS_ENTRY_32(pos, AT_PHENT, args->phent);
269 	AUXARGS_ENTRY_32(pos, AT_PHNUM, args->phnum);
270 	AUXARGS_ENTRY_32(pos, AT_PAGESZ, args->pagesz);
271 	AUXARGS_ENTRY_32(pos, AT_FLAGS, args->flags);
272 	AUXARGS_ENTRY_32(pos, AT_ENTRY, args->entry);
273 	AUXARGS_ENTRY_32(pos, AT_BASE, args->base);
274 	AUXARGS_ENTRY_32(pos, LINUX_AT_SECURE, 0);
275 	AUXARGS_ENTRY_32(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
276 	AUXARGS_ENTRY_32(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
277 	AUXARGS_ENTRY_32(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
278 	AUXARGS_ENTRY_32(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
279 	AUXARGS_ENTRY_32(pos, LINUX_AT_PLATFORM, PTROUT(uplatform));
280 	if (args->execfd != -1)
281 		AUXARGS_ENTRY_32(pos, AT_EXECFD, args->execfd);
282 	AUXARGS_ENTRY_32(pos, AT_NULL, 0);
283 
284 	free(imgp->auxargs, M_TEMP);
285 	imgp->auxargs = NULL;
286 
287 	base--;
288 	suword32(base, (uint32_t)imgp->args->argc);
289 	*stack_base = (register_t *)base;
290 	return 0;
291 }
292 
293 extern int _ucodesel, _ucode32sel, _udatasel;
294 extern unsigned long linux_sznonrtsigcode;
295 
296 static void
297 linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
298 {
299 	struct thread *td = curthread;
300 	struct proc *p = td->td_proc;
301 	struct sigacts *psp;
302 	struct trapframe *regs;
303 	struct l_rt_sigframe *fp, frame;
304 	int oonstack;
305 	int sig;
306 	int code;
307 
308 	sig = ksi->ksi_signo;
309 	code = ksi->ksi_code;
310 	PROC_LOCK_ASSERT(p, MA_OWNED);
311 	psp = p->p_sigacts;
312 	mtx_assert(&psp->ps_mtx, MA_OWNED);
313 	regs = td->td_frame;
314 	oonstack = sigonstack(regs->tf_rsp);
315 
316 #ifdef DEBUG
317 	if (ldebug(rt_sendsig))
318 		printf(ARGS(rt_sendsig, "%p, %d, %p, %u"),
319 		    catcher, sig, (void*)mask, code);
320 #endif
321 	/*
322 	 * Allocate space for the signal handler context.
323 	 */
324 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
325 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
326 		fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
327 		    td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
328 	} else
329 		fp = (struct l_rt_sigframe *)regs->tf_rsp - 1;
330 	mtx_unlock(&psp->ps_mtx);
331 
332 	/*
333 	 * Build the argument list for the signal handler.
334 	 */
335 	if (p->p_sysent->sv_sigtbl)
336 		if (sig <= p->p_sysent->sv_sigsize)
337 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
338 
339 	bzero(&frame, sizeof(frame));
340 
341 	frame.sf_handler = PTROUT(catcher);
342 	frame.sf_sig = sig;
343 	frame.sf_siginfo = PTROUT(&fp->sf_si);
344 	frame.sf_ucontext = PTROUT(&fp->sf_sc);
345 
346 	/* Fill in POSIX parts */
347 	ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig);
348 
349 	/*
350 	 * Build the signal context to be used by sigreturn.
351 	 */
352 	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
353 	frame.sf_sc.uc_link = 0;		/* XXX ??? */
354 
355 	frame.sf_sc.uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp);
356 	frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
357 	frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
358 	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
359 	PROC_UNLOCK(p);
360 
361 	bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
362 
363 	frame.sf_sc.uc_mcontext.sc_mask	= frame.sf_sc.uc_sigmask.__bits[0];
364 	frame.sf_sc.uc_mcontext.sc_gs     = rgs();
365 	frame.sf_sc.uc_mcontext.sc_fs     = rfs();
366 	__asm __volatile("mov %%es,%0" :
367 	    "=rm" (frame.sf_sc.uc_mcontext.sc_es));
368 	__asm __volatile("mov %%ds,%0" :
369 	    "=rm" (frame.sf_sc.uc_mcontext.sc_ds));
370 	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_rdi;
371 	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_rsi;
372 	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_rbp;
373 	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_rbx;
374 	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_rdx;
375 	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_rcx;
376 	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_rax;
377 	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_rip;
378 	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
379 	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_rflags;
380 	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_rsp;
381 	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
382 	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
383 	frame.sf_sc.uc_mcontext.sc_cr2    = (u_int32_t)(uintptr_t)ksi->ksi_addr;
384 	frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
385 
386 #ifdef DEBUG
387 	if (ldebug(rt_sendsig))
388 		printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
389 		    frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
390 		    td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
391 #endif
392 
393 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
394 		/*
395 		 * Process has trashed its stack; give it an illegal
396 		 * instruction to halt it in its tracks.
397 		 */
398 #ifdef DEBUG
399 		if (ldebug(rt_sendsig))
400 			printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
401 			    fp, oonstack);
402 #endif
403 		PROC_LOCK(p);
404 		sigexit(td, SIGILL);
405 	}
406 
407 	/*
408 	 * Build context to run handler in.
409 	 */
410 	regs->tf_rsp = PTROUT(fp);
411 	regs->tf_rip = LINUX32_PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
412 	    linux_sznonrtsigcode;
413 	regs->tf_rflags &= ~(PSL_T | PSL_D);
414 	regs->tf_cs = _ucode32sel;
415 	regs->tf_ss = _udatasel;
416 	load_ds(_udatasel);
417 	td->td_pcb->pcb_ds = _udatasel;
418 	load_es(_udatasel);
419 	td->td_pcb->pcb_es = _udatasel;
420 	/* leave user %fs and %gs untouched */
421 	PROC_LOCK(p);
422 	mtx_lock(&psp->ps_mtx);
423 }
424 
425 
426 /*
427  * Send an interrupt to process.
428  *
429  * Stack is set up to allow sigcode stored
430  * in u. to call routine, followed by kcall
431  * to sigreturn routine below.  After sigreturn
432  * resets the signal mask, the stack, and the
433  * frame pointer, it returns to the user
434  * specified pc, psl.
435  */
436 static void
437 linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
438 {
439 	struct thread *td = curthread;
440 	struct proc *p = td->td_proc;
441 	struct sigacts *psp;
442 	struct trapframe *regs;
443 	struct l_sigframe *fp, frame;
444 	l_sigset_t lmask;
445 	int oonstack, i;
446 	int sig, code;
447 
448 	sig = ksi->ksi_signo;
449 	code = ksi->ksi_code;
450 	PROC_LOCK_ASSERT(p, MA_OWNED);
451 	psp = p->p_sigacts;
452 	mtx_assert(&psp->ps_mtx, MA_OWNED);
453 	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
454 		/* Signal handler installed with SA_SIGINFO. */
455 		linux_rt_sendsig(catcher, ksi, mask);
456 		return;
457 	}
458 
459 	regs = td->td_frame;
460 	oonstack = sigonstack(regs->tf_rsp);
461 
462 #ifdef DEBUG
463 	if (ldebug(sendsig))
464 		printf(ARGS(sendsig, "%p, %d, %p, %u"),
465 		    catcher, sig, (void*)mask, code);
466 #endif
467 
468 	/*
469 	 * Allocate space for the signal handler context.
470 	 */
471 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
472 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
473 		fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
474 		    td->td_sigstk.ss_size - sizeof(struct l_sigframe));
475 	} else
476 		fp = (struct l_sigframe *)regs->tf_rsp - 1;
477 	mtx_unlock(&psp->ps_mtx);
478 	PROC_UNLOCK(p);
479 
480 	/*
481 	 * Build the argument list for the signal handler.
482 	 */
483 	if (p->p_sysent->sv_sigtbl)
484 		if (sig <= p->p_sysent->sv_sigsize)
485 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
486 
487 	bzero(&frame, sizeof(frame));
488 
489 	frame.sf_handler = PTROUT(catcher);
490 	frame.sf_sig = sig;
491 
492 	bsd_to_linux_sigset(mask, &lmask);
493 
494 	/*
495 	 * Build the signal context to be used by sigreturn.
496 	 */
497 	frame.sf_sc.sc_mask   = lmask.__bits[0];
498 	frame.sf_sc.sc_gs     = rgs();
499 	frame.sf_sc.sc_fs     = rfs();
500 	__asm __volatile("mov %%es,%0" : "=rm" (frame.sf_sc.sc_es));
501 	__asm __volatile("mov %%ds,%0" : "=rm" (frame.sf_sc.sc_ds));
502 	frame.sf_sc.sc_edi    = regs->tf_rdi;
503 	frame.sf_sc.sc_esi    = regs->tf_rsi;
504 	frame.sf_sc.sc_ebp    = regs->tf_rbp;
505 	frame.sf_sc.sc_ebx    = regs->tf_rbx;
506 	frame.sf_sc.sc_edx    = regs->tf_rdx;
507 	frame.sf_sc.sc_ecx    = regs->tf_rcx;
508 	frame.sf_sc.sc_eax    = regs->tf_rax;
509 	frame.sf_sc.sc_eip    = regs->tf_rip;
510 	frame.sf_sc.sc_cs     = regs->tf_cs;
511 	frame.sf_sc.sc_eflags = regs->tf_rflags;
512 	frame.sf_sc.sc_esp_at_signal = regs->tf_rsp;
513 	frame.sf_sc.sc_ss     = regs->tf_ss;
514 	frame.sf_sc.sc_err    = regs->tf_err;
515 	frame.sf_sc.sc_cr2    = (u_int32_t)(uintptr_t)ksi->ksi_addr;
516 	frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code);
517 
518 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
519 		frame.sf_extramask[i] = lmask.__bits[i+1];
520 
521 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
522 		/*
523 		 * Process has trashed its stack; give it an illegal
524 		 * instruction to halt it in its tracks.
525 		 */
526 		PROC_LOCK(p);
527 		sigexit(td, SIGILL);
528 	}
529 
530 	/*
531 	 * Build context to run handler in.
532 	 */
533 	regs->tf_rsp = PTROUT(fp);
534 	regs->tf_rip = LINUX32_PS_STRINGS - *(p->p_sysent->sv_szsigcode);
535 	regs->tf_rflags &= ~(PSL_T | PSL_D);
536 	regs->tf_cs = _ucode32sel;
537 	regs->tf_ss = _udatasel;
538 	load_ds(_udatasel);
539 	td->td_pcb->pcb_ds = _udatasel;
540 	load_es(_udatasel);
541 	td->td_pcb->pcb_es = _udatasel;
542 	/* leave user %fs and %gs untouched */
543 	PROC_LOCK(p);
544 	mtx_lock(&psp->ps_mtx);
545 }
546 
547 /*
548  * System call to cleanup state after a signal
549  * has been taken.  Reset signal mask and
550  * stack state from context left by sendsig (above).
551  * Return to previous pc and psl as specified by
552  * context left by sendsig. Check carefully to
553  * make sure that the user has not modified the
554  * psl to gain improper privileges or to cause
555  * a machine fault.
556  */
557 int
558 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
559 {
560 	struct proc *p = td->td_proc;
561 	struct l_sigframe frame;
562 	struct trapframe *regs;
563 	l_sigset_t lmask;
564 	int eflags, i;
565 	ksiginfo_t ksi;
566 
567 	regs = td->td_frame;
568 
569 #ifdef DEBUG
570 	if (ldebug(sigreturn))
571 		printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
572 #endif
573 	/*
574 	 * The trampoline code hands us the sigframe.
575 	 * It is unsafe to keep track of it ourselves, in the event that a
576 	 * program jumps out of a signal handler.
577 	 */
578 	if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
579 		return (EFAULT);
580 
581 	/*
582 	 * Check for security violations.
583 	 */
584 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
585 	eflags = frame.sf_sc.sc_eflags;
586 	/*
587 	 * XXX do allow users to change the privileged flag PSL_RF.  The
588 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
589 	 * sometimes set it there too.  tf_eflags is kept in the signal
590 	 * context during signal handling and there is no other place
591 	 * to remember it, so the PSL_RF bit may be corrupted by the
592 	 * signal handler without us knowing.  Corruption of the PSL_RF
593 	 * bit at worst causes one more or one less debugger trap, so
594 	 * allowing it is fairly harmless.
595 	 */
596 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF))
597 		return(EINVAL);
598 
599 	/*
600 	 * Don't allow users to load a valid privileged %cs.  Let the
601 	 * hardware check for invalid selectors, excess privilege in
602 	 * other selectors, invalid %eip's and invalid %esp's.
603 	 */
604 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
605 	if (!CS_SECURE(frame.sf_sc.sc_cs)) {
606 		ksiginfo_init_trap(&ksi);
607 		ksi.ksi_signo = SIGBUS;
608 		ksi.ksi_code = BUS_OBJERR;
609 		ksi.ksi_trapno = T_PROTFLT;
610 		ksi.ksi_addr = (void *)regs->tf_rip;
611 		trapsignal(td, &ksi);
612 		return(EINVAL);
613 	}
614 
615 	lmask.__bits[0] = frame.sf_sc.sc_mask;
616 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
617 		lmask.__bits[i+1] = frame.sf_extramask[i];
618 	PROC_LOCK(p);
619 	linux_to_bsd_sigset(&lmask, &td->td_sigmask);
620 	SIG_CANTMASK(td->td_sigmask);
621 	signotify(td);
622 	PROC_UNLOCK(p);
623 
624 	/*
625 	 * Restore signal context.
626 	 */
627 	/* Selectors were restored by the trampoline. */
628 	regs->tf_rdi    = frame.sf_sc.sc_edi;
629 	regs->tf_rsi    = frame.sf_sc.sc_esi;
630 	regs->tf_rbp    = frame.sf_sc.sc_ebp;
631 	regs->tf_rbx    = frame.sf_sc.sc_ebx;
632 	regs->tf_rdx    = frame.sf_sc.sc_edx;
633 	regs->tf_rcx    = frame.sf_sc.sc_ecx;
634 	regs->tf_rax    = frame.sf_sc.sc_eax;
635 	regs->tf_rip    = frame.sf_sc.sc_eip;
636 	regs->tf_cs     = frame.sf_sc.sc_cs;
637 	regs->tf_rflags = eflags;
638 	regs->tf_rsp    = frame.sf_sc.sc_esp_at_signal;
639 	regs->tf_ss     = frame.sf_sc.sc_ss;
640 
641 	return (EJUSTRETURN);
642 }
643 
644 /*
645  * System call to cleanup state after a signal
646  * has been taken.  Reset signal mask and
647  * stack state from context left by rt_sendsig (above).
648  * Return to previous pc and psl as specified by
649  * context left by sendsig. Check carefully to
650  * make sure that the user has not modified the
651  * psl to gain improper privileges or to cause
652  * a machine fault.
653  */
654 int
655 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
656 {
657 	struct proc *p = td->td_proc;
658 	struct l_ucontext uc;
659 	struct l_sigcontext *context;
660 	l_stack_t *lss;
661 	stack_t ss;
662 	struct trapframe *regs;
663 	int eflags;
664 	ksiginfo_t ksi;
665 
666 	regs = td->td_frame;
667 
668 #ifdef DEBUG
669 	if (ldebug(rt_sigreturn))
670 		printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
671 #endif
672 	/*
673 	 * The trampoline code hands us the ucontext.
674 	 * It is unsafe to keep track of it ourselves, in the event that a
675 	 * program jumps out of a signal handler.
676 	 */
677 	if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
678 		return (EFAULT);
679 
680 	context = &uc.uc_mcontext;
681 
682 	/*
683 	 * Check for security violations.
684 	 */
685 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
686 	eflags = context->sc_eflags;
687 	/*
688 	 * XXX do allow users to change the privileged flag PSL_RF.  The
689 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
690 	 * sometimes set it there too.  tf_eflags is kept in the signal
691 	 * context during signal handling and there is no other place
692 	 * to remember it, so the PSL_RF bit may be corrupted by the
693 	 * signal handler without us knowing.  Corruption of the PSL_RF
694 	 * bit at worst causes one more or one less debugger trap, so
695 	 * allowing it is fairly harmless.
696 	 */
697 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF))
698 		return(EINVAL);
699 
700 	/*
701 	 * Don't allow users to load a valid privileged %cs.  Let the
702 	 * hardware check for invalid selectors, excess privilege in
703 	 * other selectors, invalid %eip's and invalid %esp's.
704 	 */
705 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
706 	if (!CS_SECURE(context->sc_cs)) {
707 		ksiginfo_init_trap(&ksi);
708 		ksi.ksi_signo = SIGBUS;
709 		ksi.ksi_code = BUS_OBJERR;
710 		ksi.ksi_trapno = T_PROTFLT;
711 		ksi.ksi_addr = (void *)regs->tf_rip;
712 		trapsignal(td, &ksi);
713 		return(EINVAL);
714 	}
715 
716 	PROC_LOCK(p);
717 	linux_to_bsd_sigset(&uc.uc_sigmask, &td->td_sigmask);
718 	SIG_CANTMASK(td->td_sigmask);
719 	signotify(td);
720 	PROC_UNLOCK(p);
721 
722 	/*
723 	 * Restore signal context
724 	 */
725 	/* Selectors were restored by the trampoline. */
726 	regs->tf_rdi    = context->sc_edi;
727 	regs->tf_rsi    = context->sc_esi;
728 	regs->tf_rbp    = context->sc_ebp;
729 	regs->tf_rbx    = context->sc_ebx;
730 	regs->tf_rdx    = context->sc_edx;
731 	regs->tf_rcx    = context->sc_ecx;
732 	regs->tf_rax    = context->sc_eax;
733 	regs->tf_rip    = context->sc_eip;
734 	regs->tf_cs     = context->sc_cs;
735 	regs->tf_rflags = eflags;
736 	regs->tf_rsp    = context->sc_esp_at_signal;
737 	regs->tf_ss     = context->sc_ss;
738 
739 	/*
740 	 * call sigaltstack & ignore results..
741 	 */
742 	lss = &uc.uc_stack;
743 	ss.ss_sp = PTRIN(lss->ss_sp);
744 	ss.ss_size = lss->ss_size;
745 	ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
746 
747 #ifdef DEBUG
748 	if (ldebug(rt_sigreturn))
749 		printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"),
750 		    ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
751 #endif
752 	(void)kern_sigaltstack(td, &ss, NULL);
753 
754 	return (EJUSTRETURN);
755 }
756 
757 /*
758  * MPSAFE
759  */
760 static void
761 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
762 {
763 	args[0] = tf->tf_rbx;
764 	args[1] = tf->tf_rcx;
765 	args[2] = tf->tf_rdx;
766 	args[3] = tf->tf_rsi;
767 	args[4] = tf->tf_rdi;
768 	args[5] = tf->tf_rbp;	/* Unconfirmed */
769 	*params = NULL;		/* no copyin */
770 }
771 
772 /*
773  * If a linux binary is exec'ing something, try this image activator
774  * first.  We override standard shell script execution in order to
775  * be able to modify the interpreter path.  We only do this if a linux
776  * binary is doing the exec, so we do not create an EXEC module for it.
777  */
778 static int	exec_linux_imgact_try(struct image_params *iparams);
779 
780 static int
781 exec_linux_imgact_try(struct image_params *imgp)
782 {
783 	const char *head = (const char *)imgp->image_header;
784 	char *rpath;
785 	int error = -1, len;
786 
787 	/*
788 	* The interpreter for shell scripts run from a linux binary needs
789 	* to be located in /compat/linux if possible in order to recursively
790 	* maintain linux path emulation.
791 	*/
792 	if (((const short *)head)[0] == SHELLMAGIC) {
793 		/*
794 		* Run our normal shell image activator.  If it succeeds attempt
795 		* to use the alternate path for the interpreter.  If an
796 		* alternate * path is found, use our stringspace to store it.
797 		*/
798 		if ((error = exec_shell_imgact(imgp)) == 0) {
799 			linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
800 			    imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0,
801 			    AT_FDCWD);
802 			if (rpath != NULL) {
803 				len = strlen(rpath) + 1;
804 
805 				if (len <= MAXSHELLCMDLEN) {
806 					memcpy(imgp->interpreter_name, rpath,
807 					    len);
808 				}
809 				free(rpath, M_TEMP);
810 			}
811 		}
812 	}
813 	return(error);
814 }
815 
816 /*
817  * Clear registers on exec
818  * XXX copied from ia32_signal.c.
819  */
820 static void
821 exec_linux_setregs(td, entry, stack, ps_strings)
822 	struct thread *td;
823 	u_long entry;
824 	u_long stack;
825 	u_long ps_strings;
826 {
827 	struct trapframe *regs = td->td_frame;
828 	struct pcb *pcb = td->td_pcb;
829 
830 	critical_enter();
831 	wrmsr(MSR_FSBASE, 0);
832 	wrmsr(MSR_KGSBASE, 0);	/* User value while we're in the kernel */
833 	pcb->pcb_fsbase = 0;
834 	pcb->pcb_gsbase = 0;
835 	critical_exit();
836 	load_ds(_udatasel);
837 	load_es(_udatasel);
838 	load_fs(_udatasel);
839 	load_gs(_udatasel);
840 	pcb->pcb_ds = _udatasel;
841 	pcb->pcb_es = _udatasel;
842 	pcb->pcb_fs = _udatasel;
843 	pcb->pcb_gs = _udatasel;
844 	pcb->pcb_initial_fpucw = __LINUX_NPXCW__;
845 
846 	bzero((char *)regs, sizeof(struct trapframe));
847 	regs->tf_rip = entry;
848 	regs->tf_rsp = stack;
849 	regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T);
850 	regs->tf_ss = _udatasel;
851 	regs->tf_cs = _ucode32sel;
852 	regs->tf_rbx = ps_strings;
853 	load_cr0(rcr0() | CR0_MP | CR0_TS);
854 	fpstate_drop(td);
855 
856 	/* Return via doreti so that we can change to a different %cs */
857 	pcb->pcb_flags |= PCB_FULLCTX | PCB_32BIT;
858 	pcb->pcb_flags &= ~PCB_GS32BIT;
859 	td->td_retval[1] = 0;
860 }
861 
862 /*
863  * XXX copied from ia32_sysvec.c.
864  */
865 static register_t *
866 linux_copyout_strings(struct image_params *imgp)
867 {
868 	int argc, envc;
869 	u_int32_t *vectp;
870 	char *stringp, *destp;
871 	u_int32_t *stack_base;
872 	struct linux32_ps_strings *arginfo;
873 
874 	/*
875 	 * Calculate string base and vector table pointers.
876 	 * Also deal with signal trampoline code for this exec type.
877 	 */
878 	arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS;
879 	destp =	(caddr_t)arginfo - linux_szsigcode - SPARE_USRSPACE -
880 	    linux_szplatform - roundup((ARG_MAX - imgp->args->stringspace),
881 	    sizeof(char *));
882 
883 	/*
884 	 * install sigcode
885 	 */
886 	copyout(imgp->proc->p_sysent->sv_sigcode,
887 	    ((caddr_t)arginfo - linux_szsigcode), linux_szsigcode);
888 
889 	/*
890 	 * Install LINUX_PLATFORM
891 	 */
892 	copyout(linux_platform, ((caddr_t)arginfo - linux_szsigcode -
893 	    linux_szplatform), linux_szplatform);
894 
895 	/*
896 	 * If we have a valid auxargs ptr, prepare some room
897 	 * on the stack.
898 	 */
899 	if (imgp->auxargs) {
900 		/*
901 		 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for
902 		 * lower compatibility.
903 		 */
904 		imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size :
905 		    (LINUX_AT_COUNT * 2);
906 		/*
907 		 * The '+ 2' is for the null pointers at the end of each of
908 		 * the arg and env vector sets,and imgp->auxarg_size is room
909 		 * for argument of Runtime loader.
910 		 */
911 		vectp = (u_int32_t *) (destp - (imgp->args->argc +
912 		    imgp->args->envc + 2 + imgp->auxarg_size) *
913 		    sizeof(u_int32_t));
914 
915 	} else
916 		/*
917 		 * The '+ 2' is for the null pointers at the end of each of
918 		 * the arg and env vector sets
919 		 */
920 		vectp = (u_int32_t *)(destp - (imgp->args->argc +
921 		    imgp->args->envc + 2) * sizeof(u_int32_t));
922 
923 	/*
924 	 * vectp also becomes our initial stack base
925 	 */
926 	stack_base = vectp;
927 
928 	stringp = imgp->args->begin_argv;
929 	argc = imgp->args->argc;
930 	envc = imgp->args->envc;
931 	/*
932 	 * Copy out strings - arguments and environment.
933 	 */
934 	copyout(stringp, destp, ARG_MAX - imgp->args->stringspace);
935 
936 	/*
937 	 * Fill in "ps_strings" struct for ps, w, etc.
938 	 */
939 	suword32(&arginfo->ps_argvstr, (uint32_t)(intptr_t)vectp);
940 	suword32(&arginfo->ps_nargvstr, argc);
941 
942 	/*
943 	 * Fill in argument portion of vector table.
944 	 */
945 	for (; argc > 0; --argc) {
946 		suword32(vectp++, (uint32_t)(intptr_t)destp);
947 		while (*stringp++ != 0)
948 			destp++;
949 		destp++;
950 	}
951 
952 	/* a null vector table pointer separates the argp's from the envp's */
953 	suword32(vectp++, 0);
954 
955 	suword32(&arginfo->ps_envstr, (uint32_t)(intptr_t)vectp);
956 	suword32(&arginfo->ps_nenvstr, envc);
957 
958 	/*
959 	 * Fill in environment portion of vector table.
960 	 */
961 	for (; envc > 0; --envc) {
962 		suword32(vectp++, (uint32_t)(intptr_t)destp);
963 		while (*stringp++ != 0)
964 			destp++;
965 		destp++;
966 	}
967 
968 	/* end of vector table is a null pointer */
969 	suword32(vectp, 0);
970 
971 	return ((register_t *)stack_base);
972 }
973 
974 SYSCTL_NODE(_compat, OID_AUTO, linux32, CTLFLAG_RW, 0,
975     "32-bit Linux emulation");
976 
977 static u_long	linux32_maxdsiz = LINUX32_MAXDSIZ;
978 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxdsiz, CTLFLAG_RW,
979     &linux32_maxdsiz, 0, "");
980 static u_long	linux32_maxssiz = LINUX32_MAXSSIZ;
981 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxssiz, CTLFLAG_RW,
982     &linux32_maxssiz, 0, "");
983 static u_long	linux32_maxvmem = LINUX32_MAXVMEM;
984 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxvmem, CTLFLAG_RW,
985     &linux32_maxvmem, 0, "");
986 
987 static void
988 linux32_fixlimit(struct rlimit *rl, int which)
989 {
990 
991 	switch (which) {
992 	case RLIMIT_DATA:
993 		if (linux32_maxdsiz != 0) {
994 			if (rl->rlim_cur > linux32_maxdsiz)
995 				rl->rlim_cur = linux32_maxdsiz;
996 			if (rl->rlim_max > linux32_maxdsiz)
997 				rl->rlim_max = linux32_maxdsiz;
998 		}
999 		break;
1000 	case RLIMIT_STACK:
1001 		if (linux32_maxssiz != 0) {
1002 			if (rl->rlim_cur > linux32_maxssiz)
1003 				rl->rlim_cur = linux32_maxssiz;
1004 			if (rl->rlim_max > linux32_maxssiz)
1005 				rl->rlim_max = linux32_maxssiz;
1006 		}
1007 		break;
1008 	case RLIMIT_VMEM:
1009 		if (linux32_maxvmem != 0) {
1010 			if (rl->rlim_cur > linux32_maxvmem)
1011 				rl->rlim_cur = linux32_maxvmem;
1012 			if (rl->rlim_max > linux32_maxvmem)
1013 				rl->rlim_max = linux32_maxvmem;
1014 		}
1015 		break;
1016 	}
1017 }
1018 
1019 struct sysentvec elf_linux_sysvec = {
1020 	.sv_size	= LINUX_SYS_MAXSYSCALL,
1021 	.sv_table	= linux_sysent,
1022 	.sv_mask	= 0,
1023 	.sv_sigsize	= LINUX_SIGTBLSZ,
1024 	.sv_sigtbl	= bsd_to_linux_signal,
1025 	.sv_errsize	= ELAST + 1,
1026 	.sv_errtbl	= bsd_to_linux_errno,
1027 	.sv_transtrap	= translate_traps,
1028 	.sv_fixup	= elf_linux_fixup,
1029 	.sv_sendsig	= linux_sendsig,
1030 	.sv_sigcode	= linux_sigcode,
1031 	.sv_szsigcode	= &linux_szsigcode,
1032 	.sv_prepsyscall	= linux_prepsyscall,
1033 	.sv_name	= "Linux ELF32",
1034 	.sv_coredump	= elf32_coredump,
1035 	.sv_imgact_try	= exec_linux_imgact_try,
1036 	.sv_minsigstksz	= LINUX_MINSIGSTKSZ,
1037 	.sv_pagesize	= PAGE_SIZE,
1038 	.sv_minuser	= VM_MIN_ADDRESS,
1039 	.sv_maxuser	= LINUX32_USRSTACK,
1040 	.sv_usrstack	= LINUX32_USRSTACK,
1041 	.sv_psstrings	= LINUX32_PS_STRINGS,
1042 	.sv_stackprot	= VM_PROT_ALL,
1043 	.sv_copyout_strings = linux_copyout_strings,
1044 	.sv_setregs	= exec_linux_setregs,
1045 	.sv_fixlimit	= linux32_fixlimit,
1046 	.sv_maxssiz	= &linux32_maxssiz,
1047 	.sv_flags	= SV_ABI_LINUX | SV_ILP32 | SV_IA32
1048 };
1049 
1050 static char GNULINUX_ABI_VENDOR[] = "GNU";
1051 
1052 static Elf_Brandnote linux32_brandnote = {
1053 	.hdr.n_namesz	= sizeof(GNULINUX_ABI_VENDOR),
1054 	.hdr.n_descsz	= 16,
1055 	.hdr.n_type	= 1,
1056 	.vendor		= GNULINUX_ABI_VENDOR,
1057 	.flags		= 0
1058 };
1059 
1060 static Elf32_Brandinfo linux_brand = {
1061 	.brand		= ELFOSABI_LINUX,
1062 	.machine	= EM_386,
1063 	.compat_3_brand	= "Linux",
1064 	.emul_path	= "/compat/linux",
1065 	.interp_path	= "/lib/ld-linux.so.1",
1066 	.sysvec		= &elf_linux_sysvec,
1067 	.interp_newpath	= NULL,
1068 	.brand_note	= &linux32_brandnote,
1069 	.flags		= BI_CAN_EXEC_DYN
1070 };
1071 
1072 static Elf32_Brandinfo linux_glibc2brand = {
1073 	.brand		= ELFOSABI_LINUX,
1074 	.machine	= EM_386,
1075 	.compat_3_brand	= "Linux",
1076 	.emul_path	= "/compat/linux",
1077 	.interp_path	= "/lib/ld-linux.so.2",
1078 	.sysvec		= &elf_linux_sysvec,
1079 	.interp_newpath	= NULL,
1080 	.brand_note	= &linux32_brandnote,
1081 	.flags		= BI_CAN_EXEC_DYN
1082 };
1083 
1084 Elf32_Brandinfo *linux_brandlist[] = {
1085 	&linux_brand,
1086 	&linux_glibc2brand,
1087 	NULL
1088 };
1089 
1090 static int
1091 linux_elf_modevent(module_t mod, int type, void *data)
1092 {
1093 	Elf32_Brandinfo **brandinfo;
1094 	int error;
1095 	struct linux_ioctl_handler **lihp;
1096 	struct linux_device_handler **ldhp;
1097 
1098 	error = 0;
1099 
1100 	switch(type) {
1101 	case MOD_LOAD:
1102 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1103 		     ++brandinfo)
1104 			if (elf32_insert_brand_entry(*brandinfo) < 0)
1105 				error = EINVAL;
1106 		if (error == 0) {
1107 			SET_FOREACH(lihp, linux_ioctl_handler_set)
1108 				linux_ioctl_register_handler(*lihp);
1109 			SET_FOREACH(ldhp, linux_device_handler_set)
1110 				linux_device_register_handler(*ldhp);
1111 			mtx_init(&emul_lock, "emuldata lock", NULL, MTX_DEF);
1112 			sx_init(&emul_shared_lock, "emuldata->shared lock");
1113 			LIST_INIT(&futex_list);
1114 			sx_init(&futex_sx, "futex protection lock");
1115 			linux_exit_tag = EVENTHANDLER_REGISTER(process_exit,
1116 			    linux_proc_exit, NULL, 1000);
1117 			linux_schedtail_tag = EVENTHANDLER_REGISTER(schedtail,
1118 			    linux_schedtail, NULL, 1000);
1119 			linux_exec_tag = EVENTHANDLER_REGISTER(process_exec,
1120 			    linux_proc_exec, NULL, 1000);
1121 			linux_szplatform = roundup(strlen(linux_platform) + 1,
1122 			    sizeof(char *));
1123 			if (bootverbose)
1124 				printf("Linux ELF exec handler installed\n");
1125 		} else
1126 			printf("cannot insert Linux ELF brand handler\n");
1127 		break;
1128 	case MOD_UNLOAD:
1129 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
1130 		     ++brandinfo)
1131 			if (elf32_brand_inuse(*brandinfo))
1132 				error = EBUSY;
1133 		if (error == 0) {
1134 			for (brandinfo = &linux_brandlist[0];
1135 			     *brandinfo != NULL; ++brandinfo)
1136 				if (elf32_remove_brand_entry(*brandinfo) < 0)
1137 					error = EINVAL;
1138 		}
1139 		if (error == 0) {
1140 			SET_FOREACH(lihp, linux_ioctl_handler_set)
1141 				linux_ioctl_unregister_handler(*lihp);
1142 			SET_FOREACH(ldhp, linux_device_handler_set)
1143 				linux_device_unregister_handler(*ldhp);
1144 			mtx_destroy(&emul_lock);
1145 			sx_destroy(&emul_shared_lock);
1146 			sx_destroy(&futex_sx);
1147 			EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag);
1148 			EVENTHANDLER_DEREGISTER(schedtail, linux_schedtail_tag);
1149 			EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag);
1150 			if (bootverbose)
1151 				printf("Linux ELF exec handler removed\n");
1152 		} else
1153 			printf("Could not deinstall ELF interpreter entry\n");
1154 		break;
1155 	default:
1156 		return EOPNOTSUPP;
1157 	}
1158 	return error;
1159 }
1160 
1161 static moduledata_t linux_elf_mod = {
1162 	"linuxelf",
1163 	linux_elf_modevent,
1164 	0
1165 };
1166 
1167 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
1168