xref: /freebsd/sys/i386/linux/linux_sysvec.c (revision 52267f7411adcc76ede961420e08c0e42f42d415)
1 /*-
2  * Copyright (c) 1994-1996 S�ren Schmidt
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software without specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/exec.h>
35 #include <sys/fcntl.h>
36 #include <sys/imgact.h>
37 #include <sys/imgact_aout.h>
38 #include <sys/imgact_elf.h>
39 #include <sys/kernel.h>
40 #include <sys/lock.h>
41 #include <sys/malloc.h>
42 #include <sys/module.h>
43 #include <sys/mutex.h>
44 #include <sys/proc.h>
45 #include <sys/signalvar.h>
46 #include <sys/syscallsubr.h>
47 #include <sys/sysent.h>
48 #include <sys/sysproto.h>
49 #include <sys/vnode.h>
50 #include <sys/eventhandler.h>
51 
52 #include <vm/vm.h>
53 #include <vm/pmap.h>
54 #include <vm/vm_extern.h>
55 #include <vm/vm_map.h>
56 #include <vm/vm_object.h>
57 #include <vm/vm_page.h>
58 #include <vm/vm_param.h>
59 
60 #include <machine/cpu.h>
61 #include <machine/md_var.h>
62 #include <machine/pcb.h>
63 
64 #include <i386/linux/linux.h>
65 #include <i386/linux/linux_proto.h>
66 #include <compat/linux/linux_emul.h>
67 #include <compat/linux/linux_mib.h>
68 #include <compat/linux/linux_signal.h>
69 #include <compat/linux/linux_util.h>
70 
71 MODULE_VERSION(linux, 1);
72 
73 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
74 
75 #if BYTE_ORDER == LITTLE_ENDIAN
76 #define SHELLMAGIC      0x2123 /* #! */
77 #else
78 #define SHELLMAGIC      0x2321
79 #endif
80 
81 /*
82  * Allow the sendsig functions to use the ldebug() facility
83  * even though they are not syscalls themselves. Map them
84  * to syscall 0. This is slightly less bogus than using
85  * ldebug(sigreturn).
86  */
87 #define	LINUX_SYS_linux_rt_sendsig	0
88 #define	LINUX_SYS_linux_sendsig		0
89 
90 #define	fldcw(addr)		__asm("fldcw %0" : : "m" (*(addr)))
91 #define	__LINUX_NPXCW__		0x37f
92 
93 extern char linux_sigcode[];
94 extern int linux_szsigcode;
95 
96 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
97 
98 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
99 SET_DECLARE(linux_device_handler_set, struct linux_device_handler);
100 
101 static int	linux_fixup(register_t **stack_base,
102 		    struct image_params *iparams);
103 static int	elf_linux_fixup(register_t **stack_base,
104 		    struct image_params *iparams);
105 static void	linux_prepsyscall(struct trapframe *tf, int *args, u_int *code,
106 		    caddr_t *params);
107 static void     linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
108 static void	exec_linux_setregs(struct thread *td, u_long entry,
109 				   u_long stack, u_long ps_strings);
110 
111 extern LIST_HEAD(futex_list, futex) futex_list;
112 extern struct sx futex_sx;
113 
114 static eventhandler_tag linux_exit_tag;
115 static eventhandler_tag linux_schedtail_tag;
116 static eventhandler_tag linux_exec_tag;
117 
118 /*
119  * Linux syscalls return negative errno's, we do positive and map them
120  * Reference:
121  *   FreeBSD: src/sys/sys/errno.h
122  *   Linux:   linux-2.6.17.8/include/asm-generic/errno-base.h
123  *            linux-2.6.17.8/include/asm-generic/errno.h
124  */
125 static int bsd_to_linux_errno[ELAST + 1] = {
126 	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
127 	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
128 	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
129 	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
130 	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
131 	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
132 	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
133 	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
134 	  -6,  -6, -43, -42, -75,-125, -84, -95, -16, -74,
135 	 -72, -67, -71
136 };
137 
138 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
139 	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
140 	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
141 	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
142 	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
143 	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
144 	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
145 	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
146 	0, LINUX_SIGUSR1, LINUX_SIGUSR2
147 };
148 
149 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
150 	SIGHUP, SIGINT, SIGQUIT, SIGILL,
151 	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
152 	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
153 	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
154 	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
155 	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
156 	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
157 	SIGIO, SIGURG, SIGSYS
158 };
159 
160 #define LINUX_T_UNKNOWN  255
161 static int _bsd_to_linux_trapcode[] = {
162 	LINUX_T_UNKNOWN,	/* 0 */
163 	6,			/* 1  T_PRIVINFLT */
164 	LINUX_T_UNKNOWN,	/* 2 */
165 	3,			/* 3  T_BPTFLT */
166 	LINUX_T_UNKNOWN,	/* 4 */
167 	LINUX_T_UNKNOWN,	/* 5 */
168 	16,			/* 6  T_ARITHTRAP */
169 	254,			/* 7  T_ASTFLT */
170 	LINUX_T_UNKNOWN,	/* 8 */
171 	13,			/* 9  T_PROTFLT */
172 	1,			/* 10 T_TRCTRAP */
173 	LINUX_T_UNKNOWN,	/* 11 */
174 	14,			/* 12 T_PAGEFLT */
175 	LINUX_T_UNKNOWN,	/* 13 */
176 	17,			/* 14 T_ALIGNFLT */
177 	LINUX_T_UNKNOWN,	/* 15 */
178 	LINUX_T_UNKNOWN,	/* 16 */
179 	LINUX_T_UNKNOWN,	/* 17 */
180 	0,			/* 18 T_DIVIDE */
181 	2,			/* 19 T_NMI */
182 	4,			/* 20 T_OFLOW */
183 	5,			/* 21 T_BOUND */
184 	7,			/* 22 T_DNA */
185 	8,			/* 23 T_DOUBLEFLT */
186 	9,			/* 24 T_FPOPFLT */
187 	10,			/* 25 T_TSSFLT */
188 	11,			/* 26 T_SEGNPFLT */
189 	12,			/* 27 T_STKFLT */
190 	18,			/* 28 T_MCHK */
191 	19,			/* 29 T_XMMFLT */
192 	15			/* 30 T_RESERVED */
193 };
194 #define bsd_to_linux_trapcode(code) \
195     ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
196      _bsd_to_linux_trapcode[(code)]: \
197      LINUX_T_UNKNOWN)
198 
199 /*
200  * If FreeBSD & Linux have a difference of opinion about what a trap
201  * means, deal with it here.
202  *
203  * MPSAFE
204  */
205 static int
206 translate_traps(int signal, int trap_code)
207 {
208 	if (signal != SIGBUS)
209 		return signal;
210 	switch (trap_code) {
211 	case T_PROTFLT:
212 	case T_TSSFLT:
213 	case T_DOUBLEFLT:
214 	case T_PAGEFLT:
215 		return SIGSEGV;
216 	default:
217 		return signal;
218 	}
219 }
220 
221 static int
222 linux_fixup(register_t **stack_base, struct image_params *imgp)
223 {
224 	register_t *argv, *envp;
225 
226 	argv = *stack_base;
227 	envp = *stack_base + (imgp->args->argc + 1);
228 	(*stack_base)--;
229 	**stack_base = (intptr_t)(void *)envp;
230 	(*stack_base)--;
231 	**stack_base = (intptr_t)(void *)argv;
232 	(*stack_base)--;
233 	**stack_base = imgp->args->argc;
234 	return 0;
235 }
236 
237 static int
238 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
239 {
240 	Elf32_Auxargs *args;
241 	register_t *pos;
242 
243 	KASSERT(curthread->td_proc == imgp->proc,
244 	    ("unsafe elf_linux_fixup(), should be curproc"));
245 	args = (Elf32_Auxargs *)imgp->auxargs;
246 	pos = *stack_base + (imgp->args->argc + imgp->args->envc + 2);
247 
248 	if (args->trace)
249 		AUXARGS_ENTRY(pos, AT_DEBUG, 1);
250 	if (args->execfd != -1)
251 		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
252 	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
253 	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
254 	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
255 	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
256 	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
257 	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
258 	AUXARGS_ENTRY(pos, AT_BASE, args->base);
259 	AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
260 	AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
261 	AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
262 	AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
263 	AUXARGS_ENTRY(pos, AT_NULL, 0);
264 
265 	free(imgp->auxargs, M_TEMP);
266 	imgp->auxargs = NULL;
267 
268 	(*stack_base)--;
269 	**stack_base = (register_t)imgp->args->argc;
270 	return 0;
271 }
272 
273 extern int _ucodesel, _udatasel;
274 extern unsigned long linux_sznonrtsigcode;
275 
276 static void
277 linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
278 {
279 	struct thread *td = curthread;
280 	struct proc *p = td->td_proc;
281 	struct sigacts *psp;
282 	struct trapframe *regs;
283 	struct l_rt_sigframe *fp, frame;
284 	int sig, code;
285 	int oonstack;
286 
287 	sig = ksi->ksi_signo;
288 	code = ksi->ksi_code;
289 	PROC_LOCK_ASSERT(p, MA_OWNED);
290 	psp = p->p_sigacts;
291 	mtx_assert(&psp->ps_mtx, MA_OWNED);
292 	regs = td->td_frame;
293 	oonstack = sigonstack(regs->tf_esp);
294 
295 #ifdef DEBUG
296 	if (ldebug(rt_sendsig))
297 		printf(ARGS(rt_sendsig, "%p, %d, %p, %u"),
298 		    catcher, sig, (void*)mask, code);
299 #endif
300 	/*
301 	 * Allocate space for the signal handler context.
302 	 */
303 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
304 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
305 		fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
306 		    td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
307 	} else
308 		fp = (struct l_rt_sigframe *)regs->tf_esp - 1;
309 	mtx_unlock(&psp->ps_mtx);
310 
311 	/*
312 	 * Build the argument list for the signal handler.
313 	 */
314 	if (p->p_sysent->sv_sigtbl)
315 		if (sig <= p->p_sysent->sv_sigsize)
316 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
317 
318 	bzero(&frame, sizeof(frame));
319 
320 	frame.sf_handler = catcher;
321 	frame.sf_sig = sig;
322 	frame.sf_siginfo = &fp->sf_si;
323 	frame.sf_ucontext = &fp->sf_sc;
324 
325 	/* Fill in POSIX parts */
326 	ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig);
327 
328 	/*
329 	 * Build the signal context to be used by sigreturn.
330 	 */
331 	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
332 	frame.sf_sc.uc_link = NULL;		/* XXX ??? */
333 
334 	frame.sf_sc.uc_stack.ss_sp = td->td_sigstk.ss_sp;
335 	frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
336 	frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
337 	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
338 	PROC_UNLOCK(p);
339 
340 	bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
341 
342 	frame.sf_sc.uc_mcontext.sc_mask   = frame.sf_sc.uc_sigmask.__bits[0];
343 	frame.sf_sc.uc_mcontext.sc_gs     = rgs();
344 	frame.sf_sc.uc_mcontext.sc_fs     = regs->tf_fs;
345 	frame.sf_sc.uc_mcontext.sc_es     = regs->tf_es;
346 	frame.sf_sc.uc_mcontext.sc_ds     = regs->tf_ds;
347 	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_edi;
348 	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_esi;
349 	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_ebp;
350 	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_ebx;
351 	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_edx;
352 	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_ecx;
353 	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_eax;
354 	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_eip;
355 	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
356 	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
357 	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
358 	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
359 	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
360 	frame.sf_sc.uc_mcontext.sc_cr2    = (register_t)ksi->ksi_addr;
361 	frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
362 
363 #ifdef DEBUG
364 	if (ldebug(rt_sendsig))
365 		printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
366 		    frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
367 		    td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
368 #endif
369 
370 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
371 		/*
372 		 * Process has trashed its stack; give it an illegal
373 		 * instruction to halt it in its tracks.
374 		 */
375 #ifdef DEBUG
376 		if (ldebug(rt_sendsig))
377 			printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
378 			    fp, oonstack);
379 #endif
380 		PROC_LOCK(p);
381 		sigexit(td, SIGILL);
382 	}
383 
384 	/*
385 	 * Build context to run handler in.
386 	 */
387 	regs->tf_esp = (int)fp;
388 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
389 	    linux_sznonrtsigcode;
390 	regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
391 	regs->tf_cs = _ucodesel;
392 	regs->tf_ds = _udatasel;
393 	regs->tf_es = _udatasel;
394 	regs->tf_fs = _udatasel;
395 	regs->tf_ss = _udatasel;
396 	PROC_LOCK(p);
397 	mtx_lock(&psp->ps_mtx);
398 }
399 
400 
401 /*
402  * Send an interrupt to process.
403  *
404  * Stack is set up to allow sigcode stored
405  * in u. to call routine, followed by kcall
406  * to sigreturn routine below.  After sigreturn
407  * resets the signal mask, the stack, and the
408  * frame pointer, it returns to the user
409  * specified pc, psl.
410  */
411 static void
412 linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
413 {
414 	struct thread *td = curthread;
415 	struct proc *p = td->td_proc;
416 	struct sigacts *psp;
417 	struct trapframe *regs;
418 	struct l_sigframe *fp, frame;
419 	l_sigset_t lmask;
420 	int sig, code;
421 	int oonstack, i;
422 
423 	PROC_LOCK_ASSERT(p, MA_OWNED);
424 	psp = p->p_sigacts;
425 	sig = ksi->ksi_signo;
426 	code = ksi->ksi_code;
427 	mtx_assert(&psp->ps_mtx, MA_OWNED);
428 	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
429 		/* Signal handler installed with SA_SIGINFO. */
430 		linux_rt_sendsig(catcher, ksi, mask);
431 		return;
432 	}
433 	regs = td->td_frame;
434 	oonstack = sigonstack(regs->tf_esp);
435 
436 #ifdef DEBUG
437 	if (ldebug(sendsig))
438 		printf(ARGS(sendsig, "%p, %d, %p, %u"),
439 		    catcher, sig, (void*)mask, code);
440 #endif
441 
442 	/*
443 	 * Allocate space for the signal handler context.
444 	 */
445 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
446 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
447 		fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
448 		    td->td_sigstk.ss_size - sizeof(struct l_sigframe));
449 	} else
450 		fp = (struct l_sigframe *)regs->tf_esp - 1;
451 	mtx_unlock(&psp->ps_mtx);
452 	PROC_UNLOCK(p);
453 
454 	/*
455 	 * Build the argument list for the signal handler.
456 	 */
457 	if (p->p_sysent->sv_sigtbl)
458 		if (sig <= p->p_sysent->sv_sigsize)
459 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
460 
461 	bzero(&frame, sizeof(frame));
462 
463 	frame.sf_handler = catcher;
464 	frame.sf_sig = sig;
465 
466 	bsd_to_linux_sigset(mask, &lmask);
467 
468 	/*
469 	 * Build the signal context to be used by sigreturn.
470 	 */
471 	frame.sf_sc.sc_mask   = lmask.__bits[0];
472 	frame.sf_sc.sc_gs     = rgs();
473 	frame.sf_sc.sc_fs     = regs->tf_fs;
474 	frame.sf_sc.sc_es     = regs->tf_es;
475 	frame.sf_sc.sc_ds     = regs->tf_ds;
476 	frame.sf_sc.sc_edi    = regs->tf_edi;
477 	frame.sf_sc.sc_esi    = regs->tf_esi;
478 	frame.sf_sc.sc_ebp    = regs->tf_ebp;
479 	frame.sf_sc.sc_ebx    = regs->tf_ebx;
480 	frame.sf_sc.sc_edx    = regs->tf_edx;
481 	frame.sf_sc.sc_ecx    = regs->tf_ecx;
482 	frame.sf_sc.sc_eax    = regs->tf_eax;
483 	frame.sf_sc.sc_eip    = regs->tf_eip;
484 	frame.sf_sc.sc_cs     = regs->tf_cs;
485 	frame.sf_sc.sc_eflags = regs->tf_eflags;
486 	frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
487 	frame.sf_sc.sc_ss     = regs->tf_ss;
488 	frame.sf_sc.sc_err    = regs->tf_err;
489 	frame.sf_sc.sc_cr2    = (register_t)ksi->ksi_addr;
490 	frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(ksi->ksi_trapno);
491 
492 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
493 		frame.sf_extramask[i] = lmask.__bits[i+1];
494 
495 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
496 		/*
497 		 * Process has trashed its stack; give it an illegal
498 		 * instruction to halt it in its tracks.
499 		 */
500 		PROC_LOCK(p);
501 		sigexit(td, SIGILL);
502 	}
503 
504 	/*
505 	 * Build context to run handler in.
506 	 */
507 	regs->tf_esp = (int)fp;
508 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
509 	regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
510 	regs->tf_cs = _ucodesel;
511 	regs->tf_ds = _udatasel;
512 	regs->tf_es = _udatasel;
513 	regs->tf_fs = _udatasel;
514 	regs->tf_ss = _udatasel;
515 	PROC_LOCK(p);
516 	mtx_lock(&psp->ps_mtx);
517 }
518 
519 /*
520  * System call to cleanup state after a signal
521  * has been taken.  Reset signal mask and
522  * stack state from context left by sendsig (above).
523  * Return to previous pc and psl as specified by
524  * context left by sendsig. Check carefully to
525  * make sure that the user has not modified the
526  * psl to gain improper privileges or to cause
527  * a machine fault.
528  */
529 int
530 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
531 {
532 	struct proc *p = td->td_proc;
533 	struct l_sigframe frame;
534 	struct trapframe *regs;
535 	l_sigset_t lmask;
536 	int eflags, i;
537 	ksiginfo_t ksi;
538 
539 	regs = td->td_frame;
540 
541 #ifdef DEBUG
542 	if (ldebug(sigreturn))
543 		printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
544 #endif
545 	/*
546 	 * The trampoline code hands us the sigframe.
547 	 * It is unsafe to keep track of it ourselves, in the event that a
548 	 * program jumps out of a signal handler.
549 	 */
550 	if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
551 		return (EFAULT);
552 
553 	/*
554 	 * Check for security violations.
555 	 */
556 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
557 	eflags = frame.sf_sc.sc_eflags;
558 	/*
559 	 * XXX do allow users to change the privileged flag PSL_RF.  The
560 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
561 	 * sometimes set it there too.  tf_eflags is kept in the signal
562 	 * context during signal handling and there is no other place
563 	 * to remember it, so the PSL_RF bit may be corrupted by the
564 	 * signal handler without us knowing.  Corruption of the PSL_RF
565 	 * bit at worst causes one more or one less debugger trap, so
566 	 * allowing it is fairly harmless.
567 	 */
568 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF))
569 		return(EINVAL);
570 
571 	/*
572 	 * Don't allow users to load a valid privileged %cs.  Let the
573 	 * hardware check for invalid selectors, excess privilege in
574 	 * other selectors, invalid %eip's and invalid %esp's.
575 	 */
576 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
577 	if (!CS_SECURE(frame.sf_sc.sc_cs)) {
578 		ksiginfo_init_trap(&ksi);
579 		ksi.ksi_signo = SIGBUS;
580 		ksi.ksi_code = BUS_OBJERR;
581 		ksi.ksi_trapno = T_PROTFLT;
582 		ksi.ksi_addr = (void *)regs->tf_eip;
583 		trapsignal(td, &ksi);
584 		return(EINVAL);
585 	}
586 
587 	lmask.__bits[0] = frame.sf_sc.sc_mask;
588 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
589 		lmask.__bits[i+1] = frame.sf_extramask[i];
590 	PROC_LOCK(p);
591 	linux_to_bsd_sigset(&lmask, &td->td_sigmask);
592 	SIG_CANTMASK(td->td_sigmask);
593 	signotify(td);
594 	PROC_UNLOCK(p);
595 
596 	/*
597 	 * Restore signal context.
598 	 */
599 	/* %gs was restored by the trampoline. */
600 	regs->tf_fs     = frame.sf_sc.sc_fs;
601 	regs->tf_es     = frame.sf_sc.sc_es;
602 	regs->tf_ds     = frame.sf_sc.sc_ds;
603 	regs->tf_edi    = frame.sf_sc.sc_edi;
604 	regs->tf_esi    = frame.sf_sc.sc_esi;
605 	regs->tf_ebp    = frame.sf_sc.sc_ebp;
606 	regs->tf_ebx    = frame.sf_sc.sc_ebx;
607 	regs->tf_edx    = frame.sf_sc.sc_edx;
608 	regs->tf_ecx    = frame.sf_sc.sc_ecx;
609 	regs->tf_eax    = frame.sf_sc.sc_eax;
610 	regs->tf_eip    = frame.sf_sc.sc_eip;
611 	regs->tf_cs     = frame.sf_sc.sc_cs;
612 	regs->tf_eflags = eflags;
613 	regs->tf_esp    = frame.sf_sc.sc_esp_at_signal;
614 	regs->tf_ss     = frame.sf_sc.sc_ss;
615 
616 	return (EJUSTRETURN);
617 }
618 
619 /*
620  * System call to cleanup state after a signal
621  * has been taken.  Reset signal mask and
622  * stack state from context left by rt_sendsig (above).
623  * Return to previous pc and psl as specified by
624  * context left by sendsig. Check carefully to
625  * make sure that the user has not modified the
626  * psl to gain improper privileges or to cause
627  * a machine fault.
628  */
629 int
630 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
631 {
632 	struct proc *p = td->td_proc;
633 	struct l_ucontext uc;
634 	struct l_sigcontext *context;
635 	l_stack_t *lss;
636 	stack_t ss;
637 	struct trapframe *regs;
638 	int eflags;
639 	ksiginfo_t ksi;
640 
641 	regs = td->td_frame;
642 
643 #ifdef DEBUG
644 	if (ldebug(rt_sigreturn))
645 		printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
646 #endif
647 	/*
648 	 * The trampoline code hands us the ucontext.
649 	 * It is unsafe to keep track of it ourselves, in the event that a
650 	 * program jumps out of a signal handler.
651 	 */
652 	if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
653 		return (EFAULT);
654 
655 	context = &uc.uc_mcontext;
656 
657 	/*
658 	 * Check for security violations.
659 	 */
660 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
661 	eflags = context->sc_eflags;
662 	/*
663 	 * XXX do allow users to change the privileged flag PSL_RF.  The
664 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
665 	 * sometimes set it there too.  tf_eflags is kept in the signal
666 	 * context during signal handling and there is no other place
667 	 * to remember it, so the PSL_RF bit may be corrupted by the
668 	 * signal handler without us knowing.  Corruption of the PSL_RF
669 	 * bit at worst causes one more or one less debugger trap, so
670 	 * allowing it is fairly harmless.
671 	 */
672 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF))
673 		return(EINVAL);
674 
675 	/*
676 	 * Don't allow users to load a valid privileged %cs.  Let the
677 	 * hardware check for invalid selectors, excess privilege in
678 	 * other selectors, invalid %eip's and invalid %esp's.
679 	 */
680 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
681 	if (!CS_SECURE(context->sc_cs)) {
682 		ksiginfo_init_trap(&ksi);
683 		ksi.ksi_signo = SIGBUS;
684 		ksi.ksi_code = BUS_OBJERR;
685 		ksi.ksi_trapno = T_PROTFLT;
686 		ksi.ksi_addr = (void *)regs->tf_eip;
687 		trapsignal(td, &ksi);
688 		return(EINVAL);
689 	}
690 
691 	PROC_LOCK(p);
692 	linux_to_bsd_sigset(&uc.uc_sigmask, &td->td_sigmask);
693 	SIG_CANTMASK(td->td_sigmask);
694 	signotify(td);
695 	PROC_UNLOCK(p);
696 
697 	/*
698 	 * Restore signal context
699 	 */
700 	/* %gs was restored by the trampoline. */
701 	regs->tf_fs     = context->sc_fs;
702 	regs->tf_es     = context->sc_es;
703 	regs->tf_ds     = context->sc_ds;
704 	regs->tf_edi    = context->sc_edi;
705 	regs->tf_esi    = context->sc_esi;
706 	regs->tf_ebp    = context->sc_ebp;
707 	regs->tf_ebx    = context->sc_ebx;
708 	regs->tf_edx    = context->sc_edx;
709 	regs->tf_ecx    = context->sc_ecx;
710 	regs->tf_eax    = context->sc_eax;
711 	regs->tf_eip    = context->sc_eip;
712 	regs->tf_cs     = context->sc_cs;
713 	regs->tf_eflags = eflags;
714 	regs->tf_esp    = context->sc_esp_at_signal;
715 	regs->tf_ss     = context->sc_ss;
716 
717 	/*
718 	 * call sigaltstack & ignore results..
719 	 */
720 	lss = &uc.uc_stack;
721 	ss.ss_sp = lss->ss_sp;
722 	ss.ss_size = lss->ss_size;
723 	ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
724 
725 #ifdef DEBUG
726 	if (ldebug(rt_sigreturn))
727 		printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
728 		    ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
729 #endif
730 	(void)kern_sigaltstack(td, &ss, NULL);
731 
732 	return (EJUSTRETURN);
733 }
734 
735 /*
736  * MPSAFE
737  */
738 static void
739 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
740 {
741 	args[0] = tf->tf_ebx;
742 	args[1] = tf->tf_ecx;
743 	args[2] = tf->tf_edx;
744 	args[3] = tf->tf_esi;
745 	args[4] = tf->tf_edi;
746 	args[5] = tf->tf_ebp;	/* Unconfirmed */
747 	*params = NULL;		/* no copyin */
748 }
749 
750 /*
751  * If a linux binary is exec'ing something, try this image activator
752  * first.  We override standard shell script execution in order to
753  * be able to modify the interpreter path.  We only do this if a linux
754  * binary is doing the exec, so we do not create an EXEC module for it.
755  */
756 static int	exec_linux_imgact_try(struct image_params *iparams);
757 
758 static int
759 exec_linux_imgact_try(struct image_params *imgp)
760 {
761     const char *head = (const char *)imgp->image_header;
762     char *rpath;
763     int error = -1, len;
764 
765     /*
766      * The interpreter for shell scripts run from a linux binary needs
767      * to be located in /compat/linux if possible in order to recursively
768      * maintain linux path emulation.
769      */
770     if (((const short *)head)[0] == SHELLMAGIC) {
771 	    /*
772 	     * Run our normal shell image activator.  If it succeeds attempt
773 	     * to use the alternate path for the interpreter.  If an alternate
774 	     * path is found, use our stringspace to store it.
775 	     */
776 	    if ((error = exec_shell_imgact(imgp)) == 0) {
777 		    linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
778 			imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0, AT_FDCWD);
779 		    if (rpath != NULL) {
780 			    len = strlen(rpath) + 1;
781 
782 			    if (len <= MAXSHELLCMDLEN) {
783 				    memcpy(imgp->interpreter_name, rpath, len);
784 			    }
785 			    free(rpath, M_TEMP);
786 		    }
787 	    }
788     }
789     return(error);
790 }
791 
792 /*
793  * exec_setregs may initialize some registers differently than Linux
794  * does, thus potentially confusing Linux binaries. If necessary, we
795  * override the exec_setregs default(s) here.
796  */
797 static void
798 exec_linux_setregs(struct thread *td, u_long entry,
799 		   u_long stack, u_long ps_strings)
800 {
801 	static const u_short control = __LINUX_NPXCW__;
802 	struct pcb *pcb = td->td_pcb;
803 
804 	exec_setregs(td, entry, stack, ps_strings);
805 
806 	/* Linux sets %gs to 0, we default to _udatasel */
807 	pcb->pcb_gs = 0; load_gs(0);
808 
809 	/* Linux sets the i387 to extended precision. */
810 	fldcw(&control);
811 }
812 
813 struct sysentvec linux_sysvec = {
814 	.sv_size	= LINUX_SYS_MAXSYSCALL,
815 	.sv_table	= linux_sysent,
816 	.sv_mask	= 0,
817 	.sv_sigsize	= LINUX_SIGTBLSZ,
818 	.sv_sigtbl	= bsd_to_linux_signal,
819 	.sv_errsize	= ELAST + 1,
820 	.sv_errtbl	= bsd_to_linux_errno,
821 	.sv_transtrap	= translate_traps,
822 	.sv_fixup	= linux_fixup,
823 	.sv_sendsig	= linux_sendsig,
824 	.sv_sigcode	= linux_sigcode,
825 	.sv_szsigcode	= &linux_szsigcode,
826 	.sv_prepsyscall	= linux_prepsyscall,
827 	.sv_name	= "Linux a.out",
828 	.sv_coredump	= NULL,
829 	.sv_imgact_try	= exec_linux_imgact_try,
830 	.sv_minsigstksz	= LINUX_MINSIGSTKSZ,
831 	.sv_pagesize	= PAGE_SIZE,
832 	.sv_minuser	= VM_MIN_ADDRESS,
833 	.sv_maxuser	= VM_MAXUSER_ADDRESS,
834 	.sv_usrstack	= USRSTACK,
835 	.sv_psstrings	= PS_STRINGS,
836 	.sv_stackprot	= VM_PROT_ALL,
837 	.sv_copyout_strings = exec_copyout_strings,
838 	.sv_setregs	= exec_linux_setregs,
839 	.sv_fixlimit	= NULL,
840 	.sv_maxssiz	= NULL,
841 	.sv_flags	= SV_ABI_LINUX | SV_AOUT | SV_IA32 | SV_ILP32
842 };
843 
844 struct sysentvec elf_linux_sysvec = {
845 	.sv_size	= LINUX_SYS_MAXSYSCALL,
846 	.sv_table	= linux_sysent,
847 	.sv_mask	= 0,
848 	.sv_sigsize	= LINUX_SIGTBLSZ,
849 	.sv_sigtbl	= bsd_to_linux_signal,
850 	.sv_errsize	= ELAST + 1,
851 	.sv_errtbl	= bsd_to_linux_errno,
852 	.sv_transtrap	= translate_traps,
853 	.sv_fixup	= elf_linux_fixup,
854 	.sv_sendsig	= linux_sendsig,
855 	.sv_sigcode	= linux_sigcode,
856 	.sv_szsigcode	= &linux_szsigcode,
857 	.sv_prepsyscall	= linux_prepsyscall,
858 	.sv_name	= "Linux ELF",
859 	.sv_coredump	= elf32_coredump,
860 	.sv_imgact_try	= exec_linux_imgact_try,
861 	.sv_minsigstksz	= LINUX_MINSIGSTKSZ,
862 	.sv_pagesize	= PAGE_SIZE,
863 	.sv_minuser	= VM_MIN_ADDRESS,
864 	.sv_maxuser	= VM_MAXUSER_ADDRESS,
865 	.sv_usrstack	= USRSTACK,
866 	.sv_psstrings	= PS_STRINGS,
867 	.sv_stackprot	= VM_PROT_ALL,
868 	.sv_copyout_strings = exec_copyout_strings,
869 	.sv_setregs	= exec_linux_setregs,
870 	.sv_fixlimit	= NULL,
871 	.sv_maxssiz	= NULL,
872 	.sv_flags	= SV_ABI_LINUX | SV_IA32 | SV_ILP32
873 };
874 
875 static Elf32_Brandinfo linux_brand = {
876 	.brand		= ELFOSABI_LINUX,
877 	.machine	= EM_386,
878 	.compat_3_brand	= "Linux",
879 	.emul_path	= "/compat/linux",
880 	.interp_path	= "/lib/ld-linux.so.1",
881 	.sysvec		= &elf_linux_sysvec,
882 	.interp_newpath	= NULL,
883 	.flags		= BI_CAN_EXEC_DYN,
884 };
885 
886 static Elf32_Brandinfo linux_glibc2brand = {
887 	.brand		= ELFOSABI_LINUX,
888 	.machine	= EM_386,
889 	.compat_3_brand	= "Linux",
890 	.emul_path	= "/compat/linux",
891 	.interp_path	= "/lib/ld-linux.so.2",
892 	.sysvec		= &elf_linux_sysvec,
893 	.interp_newpath	= NULL,
894 	.flags		= BI_CAN_EXEC_DYN,
895 };
896 
897 Elf32_Brandinfo *linux_brandlist[] = {
898 	&linux_brand,
899 	&linux_glibc2brand,
900 	NULL
901 };
902 
903 static int
904 linux_elf_modevent(module_t mod, int type, void *data)
905 {
906 	Elf32_Brandinfo **brandinfo;
907 	int error;
908 	struct linux_ioctl_handler **lihp;
909 	struct linux_device_handler **ldhp;
910 
911 	error = 0;
912 
913 	switch(type) {
914 	case MOD_LOAD:
915 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
916 		     ++brandinfo)
917 			if (elf32_insert_brand_entry(*brandinfo) < 0)
918 				error = EINVAL;
919 		if (error == 0) {
920 			SET_FOREACH(lihp, linux_ioctl_handler_set)
921 				linux_ioctl_register_handler(*lihp);
922 			SET_FOREACH(ldhp, linux_device_handler_set)
923 				linux_device_register_handler(*ldhp);
924 			mtx_init(&emul_lock, "emuldata lock", NULL, MTX_DEF);
925 			sx_init(&emul_shared_lock, "emuldata->shared lock");
926 			LIST_INIT(&futex_list);
927 			sx_init(&futex_sx, "futex protection lock");
928 			linux_exit_tag = EVENTHANDLER_REGISTER(process_exit, linux_proc_exit,
929 			      NULL, 1000);
930 			linux_schedtail_tag = EVENTHANDLER_REGISTER(schedtail, linux_schedtail,
931 			      NULL, 1000);
932 			linux_exec_tag = EVENTHANDLER_REGISTER(process_exec, linux_proc_exec,
933 			      NULL, 1000);
934 			if (bootverbose)
935 				printf("Linux ELF exec handler installed\n");
936 		} else
937 			printf("cannot insert Linux ELF brand handler\n");
938 		break;
939 	case MOD_UNLOAD:
940 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
941 		     ++brandinfo)
942 			if (elf32_brand_inuse(*brandinfo))
943 				error = EBUSY;
944 		if (error == 0) {
945 			for (brandinfo = &linux_brandlist[0];
946 			     *brandinfo != NULL; ++brandinfo)
947 				if (elf32_remove_brand_entry(*brandinfo) < 0)
948 					error = EINVAL;
949 		}
950 		if (error == 0) {
951 			SET_FOREACH(lihp, linux_ioctl_handler_set)
952 				linux_ioctl_unregister_handler(*lihp);
953 			SET_FOREACH(ldhp, linux_device_handler_set)
954 				linux_device_unregister_handler(*ldhp);
955 			mtx_destroy(&emul_lock);
956 			sx_destroy(&emul_shared_lock);
957 			sx_destroy(&futex_sx);
958 			EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag);
959 			EVENTHANDLER_DEREGISTER(schedtail, linux_schedtail_tag);
960 			EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag);
961 			if (bootverbose)
962 				printf("Linux ELF exec handler removed\n");
963 		} else
964 			printf("Could not deinstall ELF interpreter entry\n");
965 		break;
966 	default:
967 		return EOPNOTSUPP;
968 	}
969 	return error;
970 }
971 
972 static moduledata_t linux_elf_mod = {
973 	"linuxelf",
974 	linux_elf_modevent,
975 	0
976 };
977 
978 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
979