xref: /freebsd/sys/i386/linux/linux_sysvec.c (revision 41466b50c1d5bfd1cf6adaae547a579a75d7c04e)
1 /*-
2  * Copyright (c) 1994-1996 S�ren Schmidt
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software withough specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30 
31 /* XXX we use functions that might not exist. */
32 #include "opt_compat.h"
33 
34 #ifndef COMPAT_43
35 #error "Unable to compile Linux-emulator due to missing COMPAT_43 option!"
36 #endif
37 
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/imgact.h>
41 #include <sys/imgact_aout.h>
42 #include <sys/imgact_elf.h>
43 #include <sys/lock.h>
44 #include <sys/malloc.h>
45 #include <sys/mutex.h>
46 #include <sys/proc.h>
47 #include <sys/signalvar.h>
48 #include <sys/sysent.h>
49 #include <sys/sysproto.h>
50 
51 #include <vm/vm.h>
52 #include <vm/vm_param.h>
53 #include <vm/vm_page.h>
54 #include <vm/vm_extern.h>
55 #include <sys/exec.h>
56 #include <sys/kernel.h>
57 #include <sys/module.h>
58 #include <machine/cpu.h>
59 #include <sys/mutex.h>
60 
61 #include <i386/linux/linux.h>
62 #include <i386/linux/linux_proto.h>
63 #include <compat/linux/linux_signal.h>
64 #include <compat/linux/linux_util.h>
65 
66 MODULE_VERSION(linux, 1);
67 MODULE_DEPEND(linux, sysvmsg, 1, 1, 1);
68 MODULE_DEPEND(linux, sysvsem, 1, 1, 1);
69 MODULE_DEPEND(linux, sysvshm, 1, 1, 1);
70 
71 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
72 
73 #if BYTE_ORDER == LITTLE_ENDIAN
74 #define SHELLMAGIC      0x2123 /* #! */
75 #else
76 #define SHELLMAGIC      0x2321
77 #endif
78 
79 /*
80  * Allow the sendsig functions to use the ldebug() facility
81  * even though they are not syscalls themselves. Map them
82  * to syscall 0. This is slightly less bogus than using
83  * ldebug(sigreturn).
84  */
85 #define	LINUX_SYS_linux_rt_sendsig	0
86 #define	LINUX_SYS_linux_sendsig		0
87 
88 extern char linux_sigcode[];
89 extern int linux_szsigcode;
90 
91 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
92 
93 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
94 
95 static int	linux_fixup __P((register_t **stack_base,
96 				 struct image_params *iparams));
97 static int	elf_linux_fixup __P((register_t **stack_base,
98 				     struct image_params *iparams));
99 static void	linux_prepsyscall __P((struct trapframe *tf, int *args,
100 				       u_int *code, caddr_t *params));
101 static void     linux_sendsig __P((sig_t catcher, int sig, sigset_t *mask,
102 				   u_long code));
103 
104 /*
105  * Linux syscalls return negative errno's, we do positive and map them
106  */
107 static int bsd_to_linux_errno[ELAST + 1] = {
108   	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
109  	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
110  	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
111  	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
112  	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
113 	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
114 	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
115 	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
116   	-6, -6, -43, -42, -75, -6, -84
117 };
118 
119 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
120 	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
121 	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
122 	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, 0,
123 	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
124 	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
125 	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
126 	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
127 	0, LINUX_SIGUSR1, LINUX_SIGUSR2
128 };
129 
130 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
131 	SIGHUP, SIGINT, SIGQUIT, SIGILL,
132 	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
133 	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
134 	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
135 	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
136 	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
137 	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
138 	SIGIO, SIGURG, 0
139 };
140 
141 /*
142  * If FreeBSD & Linux have a difference of opinion about what a trap
143  * means, deal with it here.
144  *
145  * MPSAFE
146  */
147 static int
148 translate_traps(int signal, int trap_code)
149 {
150 	if (signal != SIGBUS)
151 		return signal;
152 	switch (trap_code) {
153 	case T_PROTFLT:
154 	case T_TSSFLT:
155 	case T_DOUBLEFLT:
156 	case T_PAGEFLT:
157 		return SIGSEGV;
158 	default:
159 		return signal;
160 	}
161 }
162 
163 static int
164 linux_fixup(register_t **stack_base, struct image_params *imgp)
165 {
166 	register_t *argv, *envp;
167 
168 	argv = *stack_base;
169 	envp = *stack_base + (imgp->argc + 1);
170 	(*stack_base)--;
171 	**stack_base = (intptr_t)(void *)envp;
172 	(*stack_base)--;
173 	**stack_base = (intptr_t)(void *)argv;
174 	(*stack_base)--;
175 	**stack_base = imgp->argc;
176 	return 0;
177 }
178 
179 static int
180 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
181 {
182 	Elf32_Auxargs *args = (Elf32_Auxargs *)imgp->auxargs;
183 	register_t *pos;
184 
185 	pos = *stack_base + (imgp->argc + imgp->envc + 2);
186 
187 	if (args->trace) {
188 		AUXARGS_ENTRY(pos, AT_DEBUG, 1);
189 	}
190 	if (args->execfd != -1) {
191 		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
192 	}
193 	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
194 	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
195 	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
196 	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
197 	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
198 	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
199 	AUXARGS_ENTRY(pos, AT_BASE, args->base);
200 	PROC_LOCK(imgp->proc);
201 	AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
202 	AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
203 	AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
204 	AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
205 	PROC_UNLOCK(imgp->proc);
206 	AUXARGS_ENTRY(pos, AT_NULL, 0);
207 
208 	free(imgp->auxargs, M_TEMP);
209 	imgp->auxargs = NULL;
210 
211 	(*stack_base)--;
212 	**stack_base = (long)imgp->argc;
213 	return 0;
214 }
215 
216 extern int _ucodesel, _udatasel;
217 extern unsigned long linux_sznonrtsigcode;
218 
219 static void
220 linux_rt_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
221 {
222 	register struct thread *td = curthread;
223 	register struct proc *p = td->td_proc;
224 	register struct trapframe *regs;
225 	struct l_rt_sigframe *fp, frame;
226 	int oonstack;
227 
228 	PROC_LOCK_ASSERT(p, MA_OWNED);
229 	regs = td->td_frame;
230 	oonstack = sigonstack(regs->tf_esp);
231 
232 #ifdef DEBUG
233 	if (ldebug(rt_sendsig))
234 		printf(ARGS(rt_sendsig, "%p, %d, %p, %lu"),
235 		    catcher, sig, (void*)mask, code);
236 #endif
237 	/*
238 	 * Allocate space for the signal handler context.
239 	 */
240 	if ((p->p_flag & P_ALTSTACK) && !oonstack &&
241 	    SIGISMEMBER(p->p_sigacts->ps_sigonstack, sig)) {
242 		fp = (struct l_rt_sigframe *)(p->p_sigstk.ss_sp +
243 		    p->p_sigstk.ss_size - sizeof(struct l_rt_sigframe));
244 	} else
245 		fp = (struct l_rt_sigframe *)regs->tf_esp - 1;
246 	PROC_UNLOCK(p);
247 
248 	/*
249 	 * grow() will return FALSE if the fp will not fit inside the stack
250 	 *	and the stack can not be grown. useracc will return FALSE
251 	 *	if access is denied.
252 	 */
253 	if ((grow_stack (p, (int)fp) == FALSE) ||
254 	    !useracc((caddr_t)fp, sizeof (struct l_rt_sigframe),
255 	    VM_PROT_WRITE)) {
256 		/*
257 		 * Process has trashed its stack; give it an illegal
258 		 * instruction to halt it in its tracks.
259 		 */
260 		PROC_LOCK(p);
261 		SIGACTION(p, SIGILL) = SIG_DFL;
262 		SIGDELSET(p->p_sigignore, SIGILL);
263 		SIGDELSET(p->p_sigcatch, SIGILL);
264 		SIGDELSET(p->p_sigmask, SIGILL);
265 #ifdef DEBUG
266 		if (ldebug(rt_sendsig))
267 			printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
268 			    fp, oonstack);
269 #endif
270 		psignal(p, SIGILL);
271 		return;
272 	}
273 
274 	/*
275 	 * Build the argument list for the signal handler.
276 	 */
277 	if (p->p_sysent->sv_sigtbl)
278 		if (sig <= p->p_sysent->sv_sigsize)
279 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
280 
281 	frame.sf_handler = catcher;
282 	frame.sf_sig = sig;
283 	frame.sf_siginfo = &fp->sf_si;
284 	frame.sf_ucontext = &fp->sf_sc;
285 
286 	/* Fill siginfo structure. */
287 	frame.sf_si.lsi_signo = sig;
288 	frame.sf_si.lsi_code = code;
289 	frame.sf_si.lsi_addr = (void *)regs->tf_err;
290 
291 	/*
292 	 * Build the signal context to be used by sigreturn.
293 	 */
294 	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
295 	frame.sf_sc.uc_link = NULL;		/* XXX ??? */
296 
297 	PROC_LOCK(p);
298 	frame.sf_sc.uc_stack.ss_sp = p->p_sigstk.ss_sp;
299 	frame.sf_sc.uc_stack.ss_size = p->p_sigstk.ss_size;
300 	frame.sf_sc.uc_stack.ss_flags = (p->p_flag & P_ALTSTACK)
301 	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
302 	PROC_UNLOCK(p);
303 
304 	bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
305 
306 	frame.sf_sc.uc_mcontext.sc_mask   = frame.sf_sc.uc_sigmask.__bits[0];
307 	frame.sf_sc.uc_mcontext.sc_gs     = rgs();
308 	frame.sf_sc.uc_mcontext.sc_fs     = regs->tf_fs;
309 	frame.sf_sc.uc_mcontext.sc_es     = regs->tf_es;
310 	frame.sf_sc.uc_mcontext.sc_ds     = regs->tf_ds;
311 	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_edi;
312 	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_esi;
313 	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_ebp;
314 	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_ebx;
315 	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_edx;
316 	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_ecx;
317 	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_eax;
318 	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_eip;
319 	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
320 	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
321 	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
322 	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
323 	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
324 	frame.sf_sc.uc_mcontext.sc_trapno = code;	/* XXX ???? */
325 
326 #ifdef DEBUG
327 	if (ldebug(rt_sendsig))
328 		printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
329 		    frame.sf_sc.uc_stack.ss_flags, p->p_sigstk.ss_sp,
330 		    p->p_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
331 #endif
332 
333 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
334 		/*
335 		 * Process has trashed its stack; give it an illegal
336 		 * instruction to halt it in its tracks.
337 		 */
338 		PROC_LOCK(p);
339 		sigexit(td, SIGILL);
340 		/* NOTREACHED */
341 	}
342 
343 	/*
344 	 * Build context to run handler in.
345 	 */
346 	regs->tf_esp = (int)fp;
347 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
348 	    linux_sznonrtsigcode;
349 	regs->tf_eflags &= ~PSL_VM;
350 	regs->tf_cs = _ucodesel;
351 	regs->tf_ds = _udatasel;
352 	regs->tf_es = _udatasel;
353 	regs->tf_fs = _udatasel;
354 	regs->tf_ss = _udatasel;
355 	PROC_LOCK(p);
356 }
357 
358 
359 /*
360  * Send an interrupt to process.
361  *
362  * Stack is set up to allow sigcode stored
363  * in u. to call routine, followed by kcall
364  * to sigreturn routine below.  After sigreturn
365  * resets the signal mask, the stack, and the
366  * frame pointer, it returns to the user
367  * specified pc, psl.
368  */
369 
370 static void
371 linux_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
372 {
373 	register struct thread *td = curthread;
374 	register struct proc *p = td->td_proc;
375 	register struct trapframe *regs;
376 	struct l_sigframe *fp, frame;
377 	l_sigset_t lmask;
378 	int oonstack, i;
379 
380 	PROC_LOCK_ASSERT(p, MA_OWNED);
381 	if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) {
382 		/* Signal handler installed with SA_SIGINFO. */
383 		linux_rt_sendsig(catcher, sig, mask, code);
384 		return;
385 	}
386 
387 	regs = td->td_frame;
388 	oonstack = sigonstack(regs->tf_esp);
389 
390 #ifdef DEBUG
391 	if (ldebug(sendsig))
392 		printf(ARGS(sendsig, "%p, %d, %p, %lu"),
393 		    catcher, sig, (void*)mask, code);
394 #endif
395 
396 	/*
397 	 * Allocate space for the signal handler context.
398 	 */
399 	if ((p->p_flag & P_ALTSTACK) && !oonstack &&
400 	    SIGISMEMBER(p->p_sigacts->ps_sigonstack, sig)) {
401 		fp = (struct l_sigframe *)(p->p_sigstk.ss_sp +
402 		    p->p_sigstk.ss_size - sizeof(struct l_sigframe));
403 	} else
404 		fp = (struct l_sigframe *)regs->tf_esp - 1;
405 	PROC_UNLOCK(p);
406 
407 	/*
408 	 * grow() will return FALSE if the fp will not fit inside the stack
409 	 *	and the stack can not be grown. useracc will return FALSE
410 	 *	if access is denied.
411 	 */
412 	if ((grow_stack (p, (int)fp) == FALSE) ||
413 	    !useracc((caddr_t)fp, sizeof (struct l_sigframe),
414 	    VM_PROT_WRITE)) {
415 		/*
416 		 * Process has trashed its stack; give it an illegal
417 		 * instruction to halt it in its tracks.
418 		 */
419 		PROC_LOCK(p);
420 		SIGACTION(p, SIGILL) = SIG_DFL;
421 		SIGDELSET(p->p_sigignore, SIGILL);
422 		SIGDELSET(p->p_sigcatch, SIGILL);
423 		SIGDELSET(p->p_sigmask, SIGILL);
424 		psignal(p, SIGILL);
425 		return;
426 	}
427 
428 	/*
429 	 * Build the argument list for the signal handler.
430 	 */
431 	if (p->p_sysent->sv_sigtbl)
432 		if (sig <= p->p_sysent->sv_sigsize)
433 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
434 
435 	frame.sf_handler = catcher;
436 	frame.sf_sig = sig;
437 
438 	bsd_to_linux_sigset(mask, &lmask);
439 
440 	/*
441 	 * Build the signal context to be used by sigreturn.
442 	 */
443 	frame.sf_sc.sc_mask   = lmask.__bits[0];
444 	frame.sf_sc.sc_gs     = rgs();
445 	frame.sf_sc.sc_fs     = regs->tf_fs;
446 	frame.sf_sc.sc_es     = regs->tf_es;
447 	frame.sf_sc.sc_ds     = regs->tf_ds;
448 	frame.sf_sc.sc_edi    = regs->tf_edi;
449 	frame.sf_sc.sc_esi    = regs->tf_esi;
450 	frame.sf_sc.sc_ebp    = regs->tf_ebp;
451 	frame.sf_sc.sc_ebx    = regs->tf_ebx;
452 	frame.sf_sc.sc_edx    = regs->tf_edx;
453 	frame.sf_sc.sc_ecx    = regs->tf_ecx;
454 	frame.sf_sc.sc_eax    = regs->tf_eax;
455 	frame.sf_sc.sc_eip    = regs->tf_eip;
456 	frame.sf_sc.sc_cs     = regs->tf_cs;
457 	frame.sf_sc.sc_eflags = regs->tf_eflags;
458 	frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
459 	frame.sf_sc.sc_ss     = regs->tf_ss;
460 	frame.sf_sc.sc_err    = regs->tf_err;
461 	frame.sf_sc.sc_trapno = code;	/* XXX ???? */
462 
463 	bzero(&frame.sf_fpstate, sizeof(struct l_fpstate));
464 
465 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
466 		frame.sf_extramask[i] = lmask.__bits[i+1];
467 
468 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
469 		/*
470 		 * Process has trashed its stack; give it an illegal
471 		 * instruction to halt it in its tracks.
472 		 */
473 		PROC_LOCK(p);
474 		sigexit(td, SIGILL);
475 		/* NOTREACHED */
476 	}
477 
478 	/*
479 	 * Build context to run handler in.
480 	 */
481 	regs->tf_esp = (int)fp;
482 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
483 	regs->tf_eflags &= ~PSL_VM;
484 	regs->tf_cs = _ucodesel;
485 	regs->tf_ds = _udatasel;
486 	regs->tf_es = _udatasel;
487 	regs->tf_fs = _udatasel;
488 	regs->tf_ss = _udatasel;
489 	PROC_LOCK(p);
490 }
491 
492 /*
493  * System call to cleanup state after a signal
494  * has been taken.  Reset signal mask and
495  * stack state from context left by sendsig (above).
496  * Return to previous pc and psl as specified by
497  * context left by sendsig. Check carefully to
498  * make sure that the user has not modified the
499  * psl to gain improper privileges or to cause
500  * a machine fault.
501  */
502 int
503 linux_sigreturn(td, args)
504 	struct thread *td;
505 	struct linux_sigreturn_args *args;
506 {
507 	struct proc *p = td->td_proc;
508 	struct l_sigframe frame;
509 	register struct trapframe *regs;
510 	l_sigset_t lmask;
511 	int eflags, i;
512 
513 	regs = td->td_frame;
514 
515 #ifdef DEBUG
516 	if (ldebug(sigreturn))
517 		printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
518 #endif
519 	/*
520 	 * The trampoline code hands us the sigframe.
521 	 * It is unsafe to keep track of it ourselves, in the event that a
522 	 * program jumps out of a signal handler.
523 	 */
524 	if (copyin((caddr_t)args->sfp, &frame, sizeof(frame)) != 0)
525 		return (EFAULT);
526 
527 	/*
528 	 * Check for security violations.
529 	 */
530 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
531 	eflags = frame.sf_sc.sc_eflags;
532 	/*
533 	 * XXX do allow users to change the privileged flag PSL_RF.  The
534 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
535 	 * sometimes set it there too.  tf_eflags is kept in the signal
536 	 * context during signal handling and there is no other place
537 	 * to remember it, so the PSL_RF bit may be corrupted by the
538 	 * signal handler without us knowing.  Corruption of the PSL_RF
539 	 * bit at worst causes one more or one less debugger trap, so
540 	 * allowing it is fairly harmless.
541 	 */
542 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
543     		return(EINVAL);
544 	}
545 
546 	/*
547 	 * Don't allow users to load a valid privileged %cs.  Let the
548 	 * hardware check for invalid selectors, excess privilege in
549 	 * other selectors, invalid %eip's and invalid %esp's.
550 	 */
551 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
552 	if (!CS_SECURE(frame.sf_sc.sc_cs)) {
553 		trapsignal(p, SIGBUS, T_PROTFLT);
554 		return(EINVAL);
555 	}
556 
557 	lmask.__bits[0] = frame.sf_sc.sc_mask;
558 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
559 		lmask.__bits[i+1] = frame.sf_extramask[i];
560 	PROC_LOCK(p);
561 	linux_to_bsd_sigset(&lmask, &p->p_sigmask);
562 	SIG_CANTMASK(p->p_sigmask);
563 	PROC_UNLOCK(p);
564 
565 	/*
566 	 * Restore signal context.
567 	 */
568 	/* %gs was restored by the trampoline. */
569 	regs->tf_fs     = frame.sf_sc.sc_fs;
570 	regs->tf_es     = frame.sf_sc.sc_es;
571 	regs->tf_ds     = frame.sf_sc.sc_ds;
572 	regs->tf_edi    = frame.sf_sc.sc_edi;
573 	regs->tf_esi    = frame.sf_sc.sc_esi;
574 	regs->tf_ebp    = frame.sf_sc.sc_ebp;
575 	regs->tf_ebx    = frame.sf_sc.sc_ebx;
576 	regs->tf_edx    = frame.sf_sc.sc_edx;
577 	regs->tf_ecx    = frame.sf_sc.sc_ecx;
578 	regs->tf_eax    = frame.sf_sc.sc_eax;
579 	regs->tf_eip    = frame.sf_sc.sc_eip;
580 	regs->tf_cs     = frame.sf_sc.sc_cs;
581 	regs->tf_eflags = eflags;
582 	regs->tf_esp    = frame.sf_sc.sc_esp_at_signal;
583 	regs->tf_ss     = frame.sf_sc.sc_ss;
584 
585 	return (EJUSTRETURN);
586 }
587 
588 /*
589  * System call to cleanup state after a signal
590  * has been taken.  Reset signal mask and
591  * stack state from context left by rt_sendsig (above).
592  * Return to previous pc and psl as specified by
593  * context left by sendsig. Check carefully to
594  * make sure that the user has not modified the
595  * psl to gain improper privileges or to cause
596  * a machine fault.
597  */
598 int
599 linux_rt_sigreturn(td, args)
600 	struct thread *td;
601 	struct linux_rt_sigreturn_args *args;
602 {
603 	struct proc *p = td->td_proc;
604 	struct sigaltstack_args sasargs;
605 	struct l_ucontext uc;
606 	struct l_sigcontext *context;
607 	l_stack_t *lss;
608 	stack_t *ss;
609 	register struct trapframe *regs;
610 	int eflags;
611 	caddr_t sg = stackgap_init();
612 
613 	regs = td->td_frame;
614 
615 #ifdef DEBUG
616 	if (ldebug(rt_sigreturn))
617 		printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
618 #endif
619 	/*
620 	 * The trampoline code hands us the ucontext.
621 	 * It is unsafe to keep track of it ourselves, in the event that a
622 	 * program jumps out of a signal handler.
623 	 */
624 	if (copyin((caddr_t)args->ucp, &uc, sizeof(uc)) != 0)
625 		return (EFAULT);
626 
627 	context = &uc.uc_mcontext;
628 
629 	/*
630 	 * Check for security violations.
631 	 */
632 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
633 	eflags = context->sc_eflags;
634 	/*
635 	 * XXX do allow users to change the privileged flag PSL_RF.  The
636 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
637 	 * sometimes set it there too.  tf_eflags is kept in the signal
638 	 * context during signal handling and there is no other place
639 	 * to remember it, so the PSL_RF bit may be corrupted by the
640 	 * signal handler without us knowing.  Corruption of the PSL_RF
641 	 * bit at worst causes one more or one less debugger trap, so
642 	 * allowing it is fairly harmless.
643 	 */
644 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
645     		return(EINVAL);
646 	}
647 
648 	/*
649 	 * Don't allow users to load a valid privileged %cs.  Let the
650 	 * hardware check for invalid selectors, excess privilege in
651 	 * other selectors, invalid %eip's and invalid %esp's.
652 	 */
653 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
654 	if (!CS_SECURE(context->sc_cs)) {
655 		trapsignal(p, SIGBUS, T_PROTFLT);
656 		return(EINVAL);
657 	}
658 
659 	PROC_LOCK(p);
660 	linux_to_bsd_sigset(&uc.uc_sigmask, &p->p_sigmask);
661 	SIG_CANTMASK(p->p_sigmask);
662 	PROC_UNLOCK(p);
663 
664 	/*
665 	 * Restore signal context
666 	 */
667 	/* %gs was restored by the trampoline. */
668 	regs->tf_fs     = context->sc_fs;
669 	regs->tf_es     = context->sc_es;
670 	regs->tf_ds     = context->sc_ds;
671 	regs->tf_edi    = context->sc_edi;
672 	regs->tf_esi    = context->sc_esi;
673 	regs->tf_ebp    = context->sc_ebp;
674 	regs->tf_ebx    = context->sc_ebx;
675 	regs->tf_edx    = context->sc_edx;
676 	regs->tf_ecx    = context->sc_ecx;
677 	regs->tf_eax    = context->sc_eax;
678 	regs->tf_eip    = context->sc_eip;
679 	regs->tf_cs     = context->sc_cs;
680 	regs->tf_eflags = eflags;
681 	regs->tf_esp    = context->sc_esp_at_signal;
682 	regs->tf_ss     = context->sc_ss;
683 
684 	/*
685 	 * call sigaltstack & ignore results..
686 	 */
687 	ss = stackgap_alloc(&sg, sizeof(stack_t));
688 	lss = &uc.uc_stack;
689 	ss->ss_sp = lss->ss_sp;
690 	ss->ss_size = lss->ss_size;
691 	ss->ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
692 
693 #ifdef DEBUG
694 	if (ldebug(rt_sigreturn))
695 		printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
696 		    ss->ss_flags, ss->ss_sp, ss->ss_size, context->sc_mask);
697 #endif
698 	sasargs.ss = ss;
699 	sasargs.oss = NULL;
700 	(void) sigaltstack(td, &sasargs);
701 
702 	return (EJUSTRETURN);
703 }
704 
705 /*
706  * MPSAFE
707  */
708 static void
709 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
710 {
711 	args[0] = tf->tf_ebx;
712 	args[1] = tf->tf_ecx;
713 	args[2] = tf->tf_edx;
714 	args[3] = tf->tf_esi;
715 	args[4] = tf->tf_edi;
716 	*params = NULL;		/* no copyin */
717 }
718 
719 /*
720  * If a linux binary is exec'ing something, try this image activator
721  * first.  We override standard shell script execution in order to
722  * be able to modify the interpreter path.  We only do this if a linux
723  * binary is doing the exec, so we do not create an EXEC module for it.
724  */
725 static int	exec_linux_imgact_try __P((struct image_params *iparams));
726 
727 static int
728 exec_linux_imgact_try(imgp)
729     struct image_params *imgp;
730 {
731     const char *head = (const char *)imgp->image_header;
732     int error = -1;
733 
734     /*
735      * The interpreter for shell scripts run from a linux binary needs
736      * to be located in /compat/linux if possible in order to recursively
737      * maintain linux path emulation.
738      */
739     if (((const short *)head)[0] == SHELLMAGIC) {
740 	    /*
741 	     * Run our normal shell image activator.  If it succeeds attempt
742 	     * to use the alternate path for the interpreter.  If an alternate
743 	     * path is found, use our stringspace to store it.
744 	     */
745 	    if ((error = exec_shell_imgact(imgp)) == 0) {
746 		    char *rpath = NULL;
747 
748 		    linux_emul_find(&imgp->proc->p_thread, NULL,
749 			imgp->interpreter_name, &rpath, 0);
750 		    if (rpath != imgp->interpreter_name) {
751 			    int len = strlen(rpath) + 1;
752 
753 			    if (len <= MAXSHELLCMDLEN) {
754 				    memcpy(imgp->interpreter_name, rpath, len);
755 			    }
756 			    free(rpath, M_TEMP);
757 		    }
758 	    }
759     }
760     return(error);
761 }
762 
763 struct sysentvec linux_sysvec = {
764 	LINUX_SYS_MAXSYSCALL,
765 	linux_sysent,
766 	0xff,
767 	LINUX_SIGTBLSZ,
768 	bsd_to_linux_signal,
769 	ELAST + 1,
770 	bsd_to_linux_errno,
771 	translate_traps,
772 	linux_fixup,
773 	linux_sendsig,
774 	linux_sigcode,
775 	&linux_szsigcode,
776 	linux_prepsyscall,
777 	"Linux a.out",
778 	aout_coredump,
779 	exec_linux_imgact_try,
780 	LINUX_MINSIGSTKSZ
781 };
782 
783 struct sysentvec elf_linux_sysvec = {
784 	LINUX_SYS_MAXSYSCALL,
785 	linux_sysent,
786 	0xff,
787 	LINUX_SIGTBLSZ,
788 	bsd_to_linux_signal,
789 	ELAST + 1,
790 	bsd_to_linux_errno,
791 	translate_traps,
792 	elf_linux_fixup,
793 	linux_sendsig,
794 	linux_sigcode,
795 	&linux_szsigcode,
796 	linux_prepsyscall,
797 	"Linux ELF",
798 	elf_coredump,
799 	exec_linux_imgact_try,
800 	LINUX_MINSIGSTKSZ
801 };
802 
803 static Elf32_Brandinfo linux_brand = {
804 					ELFOSABI_LINUX,
805 					"Linux",
806 					"/compat/linux",
807 					"/lib/ld-linux.so.1",
808 					&elf_linux_sysvec
809 				 };
810 
811 static Elf32_Brandinfo linux_glibc2brand = {
812 					ELFOSABI_LINUX,
813 					"Linux",
814 					"/compat/linux",
815 					"/lib/ld-linux.so.2",
816 					&elf_linux_sysvec
817 				 };
818 
819 Elf32_Brandinfo *linux_brandlist[] = {
820 					&linux_brand,
821 					&linux_glibc2brand,
822 					NULL
823 				};
824 
825 static int
826 linux_elf_modevent(module_t mod, int type, void *data)
827 {
828 	Elf32_Brandinfo **brandinfo;
829 	int error;
830 	struct linux_ioctl_handler **lihp;
831 
832 	error = 0;
833 
834 	switch(type) {
835 	case MOD_LOAD:
836 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
837 		     ++brandinfo)
838 			if (elf_insert_brand_entry(*brandinfo) < 0)
839 				error = EINVAL;
840 		if (error == 0) {
841 			SET_FOREACH(lihp, linux_ioctl_handler_set)
842 				linux_ioctl_register_handler(*lihp);
843 			if (bootverbose)
844 				printf("Linux ELF exec handler installed\n");
845 		} else
846 			printf("cannot insert Linux ELF brand handler\n");
847 		break;
848 	case MOD_UNLOAD:
849 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
850 		     ++brandinfo)
851 			if (elf_brand_inuse(*brandinfo))
852 				error = EBUSY;
853 		if (error == 0) {
854 			for (brandinfo = &linux_brandlist[0];
855 			     *brandinfo != NULL; ++brandinfo)
856 				if (elf_remove_brand_entry(*brandinfo) < 0)
857 					error = EINVAL;
858 		}
859 		if (error == 0) {
860 			SET_FOREACH(lihp, linux_ioctl_handler_set)
861 				linux_ioctl_unregister_handler(*lihp);
862 			if (bootverbose)
863 				printf("Linux ELF exec handler removed\n");
864 		} else
865 			printf("Could not deinstall ELF interpreter entry\n");
866 		break;
867 	default:
868 		break;
869 	}
870 	return error;
871 }
872 
873 static moduledata_t linux_elf_mod = {
874 	"linuxelf",
875 	linux_elf_modevent,
876 	0
877 };
878 
879 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
880