xref: /freebsd/sys/i386/linux/linux_sysvec.c (revision d1ba25f456132eabc6f1244e4bbbf3d19e8f3a31)
1 /*-
2  * Copyright (c) 1994-1996 S�ren Schmidt
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software withough specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30 
31 /* XXX we use functions that might not exist. */
32 #include "opt_compat.h"
33 
34 #ifndef COMPAT_43
35 #error "Unable to compile Linux-emulator due to missing COMPAT_43 option!"
36 #endif
37 
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/proc.h>
41 #include <sys/sysproto.h>
42 #include <sys/sysent.h>
43 #include <sys/imgact.h>
44 #include <sys/imgact_aout.h>
45 #include <sys/imgact_elf.h>
46 #include <sys/signalvar.h>
47 #include <sys/malloc.h>
48 #include <vm/vm.h>
49 #include <vm/vm_param.h>
50 #include <vm/vm_page.h>
51 #include <vm/vm_extern.h>
52 #include <sys/exec.h>
53 #include <sys/kernel.h>
54 #include <sys/module.h>
55 #include <machine/cpu.h>
56 
57 #include <i386/linux/linux.h>
58 #include <i386/linux/linux_proto.h>
59 #include <compat/linux/linux_util.h>
60 
61 MODULE_VERSION(linux, 1);
62 
63 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
64 
65 #if BYTE_ORDER == LITTLE_ENDIAN
66 #define SHELLMAGIC      0x2123 /* #! */
67 #else
68 #define SHELLMAGIC      0x2321
69 #endif
70 
71 extern char linux_sigcode[];
72 extern int linux_szsigcode;
73 
74 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
75 
76 extern struct linker_set linux_ioctl_handler_set;
77 
78 static int	linux_fixup __P((register_t **stack_base,
79 				 struct image_params *iparams));
80 static int	elf_linux_fixup __P((register_t **stack_base,
81 				     struct image_params *iparams));
82 static void	linux_prepsyscall __P((struct trapframe *tf, int *args,
83 				       u_int *code, caddr_t *params));
84 static void     linux_sendsig __P((sig_t catcher, int sig, sigset_t *mask,
85 				   u_long code));
86 
87 /*
88  * Linux syscalls return negative errno's, we do positive and map them
89  */
90 static int bsd_to_linux_errno[ELAST + 1] = {
91   	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
92  	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
93  	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
94  	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
95  	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
96 	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
97 	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
98 	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
99   	-6, -6, -43, -42, -75, -6, -84
100 };
101 
102 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
103 	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
104 	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
105 	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, 0,
106 	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
107 	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
108 	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
109 	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
110 	0, LINUX_SIGUSR1, LINUX_SIGUSR2
111 };
112 
113 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
114 	SIGHUP, SIGINT, SIGQUIT, SIGILL,
115 	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
116 	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
117 	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
118 	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
119 	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
120 	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
121 	SIGIO, SIGURG, 0
122 };
123 
124 /*
125  * If FreeBSD & Linux have a difference of opinion about what a trap
126  * means, deal with it here.
127  */
128 static int
129 translate_traps(int signal, int trap_code)
130 {
131 	if (signal != SIGBUS)
132 		return signal;
133 	switch (trap_code) {
134 	case T_PROTFLT:
135 	case T_TSSFLT:
136 	case T_DOUBLEFLT:
137 	case T_PAGEFLT:
138 		return SIGSEGV;
139 	default:
140 		return signal;
141 	}
142 }
143 
144 static int
145 linux_fixup(register_t **stack_base, struct image_params *imgp)
146 {
147 	register_t *argv, *envp;
148 
149 	argv = *stack_base;
150 	envp = *stack_base + (imgp->argc + 1);
151 	(*stack_base)--;
152 	**stack_base = (intptr_t)(void *)envp;
153 	(*stack_base)--;
154 	**stack_base = (intptr_t)(void *)argv;
155 	(*stack_base)--;
156 	**stack_base = imgp->argc;
157 	return 0;
158 }
159 
160 static int
161 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
162 {
163 	Elf32_Auxargs *args = (Elf32_Auxargs *)imgp->auxargs;
164 	register_t *pos;
165 
166 	pos = *stack_base + (imgp->argc + imgp->envc + 2);
167 
168 	if (args->trace) {
169 		AUXARGS_ENTRY(pos, AT_DEBUG, 1);
170 	}
171 	if (args->execfd != -1) {
172 		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
173 	}
174 	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
175 	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
176 	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
177 	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
178 	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
179 	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
180 	AUXARGS_ENTRY(pos, AT_BASE, args->base);
181 	AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_cred->p_ruid);
182 	AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_cred->p_svuid);
183 	AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_cred->p_rgid);
184 	AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_cred->p_svgid);
185 	AUXARGS_ENTRY(pos, AT_NULL, 0);
186 
187 	free(imgp->auxargs, M_TEMP);
188 	imgp->auxargs = NULL;
189 
190 	(*stack_base)--;
191 	**stack_base = (long)imgp->argc;
192 	return 0;
193 }
194 
195 extern int _ucodesel, _udatasel;
196 extern unsigned long _linux_sznonrtsigcode;
197 
198 static void
199 linux_rt_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
200 {
201 	register struct proc *p = curproc;
202 	register struct trapframe *regs;
203 	struct linux_rt_sigframe *fp, frame;
204 	struct sigacts *psp = p->p_sigacts;
205 	int oonstack;
206 
207 	regs = p->p_md.md_regs;
208 	oonstack = p->p_sigstk.ss_flags & SS_ONSTACK;
209 
210 #ifdef DEBUG
211 	printf("Linux-emul(%ld): linux_rt_sendsig(%p, %d, %p, %lu)\n",
212 	    (long)p->p_pid, catcher, sig, (void*)mask, code);
213 #endif
214 	/*
215 	 * Allocate space for the signal handler context.
216 	 */
217 	if ((p->p_flag & P_ALTSTACK) && !oonstack &&
218 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
219 		fp = (struct linux_rt_sigframe *)(p->p_sigstk.ss_sp +
220 		    p->p_sigstk.ss_size - sizeof(struct linux_rt_sigframe));
221 		p->p_sigstk.ss_flags |= SS_ONSTACK;
222 	} else {
223 		fp = (struct linux_rt_sigframe *)regs->tf_esp - 1;
224 	}
225 
226 	/*
227 	 * grow() will return FALSE if the fp will not fit inside the stack
228 	 *	and the stack can not be grown. useracc will return FALSE
229 	 *	if access is denied.
230 	 */
231 	if ((grow_stack (p, (int)fp) == FALSE) ||
232 	    !useracc((caddr_t)fp, sizeof (struct linux_rt_sigframe),
233 	    VM_PROT_WRITE)) {
234 		/*
235 		 * Process has trashed its stack; give it an illegal
236 		 * instruction to halt it in its tracks.
237 		 */
238 		SIGACTION(p, SIGILL) = SIG_DFL;
239 		SIGDELSET(p->p_sigignore, SIGILL);
240 		SIGDELSET(p->p_sigcatch, SIGILL);
241 		SIGDELSET(p->p_sigmask, SIGILL);
242 #ifdef DEBUG
243 		printf("Linux-emul(%ld): linux_rt_sendsig -- bad stack %p, SS_ONSTACK: 0x%x ",
244 	    (long)p->p_pid, fp, p->p_sigstk.ss_flags & SS_ONSTACK);
245 #endif
246 		psignal(p, SIGILL);
247 		return;
248 	}
249 
250 	/*
251 	 * Build the argument list for the signal handler.
252 	 */
253 	if (p->p_sysent->sv_sigtbl)
254 		if (sig <= p->p_sysent->sv_sigsize)
255 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
256 
257 	frame.sf_handler = catcher;
258 	frame.sf_sig = sig;
259 
260 	frame.sf_siginfo = &fp->sf_si;
261 	frame.sf_ucontext = &fp->sf_sc;
262 	/* Fill siginfo structure. */
263 	frame.sf_si.lsi_signo = sig;
264 	frame.sf_si.lsi_code = code;
265 	frame.sf_si.lsi_addr = (void *)regs->tf_err;
266 	/*
267 	 * Build the signal context to be used by sigreturn.
268 	 */
269 	frame.sf_sc.uc_mcontext.sc_mask   = mask->__bits[0];
270 	frame.sf_sc.uc_mcontext.sc_gs     = rgs();
271 	frame.sf_sc.uc_mcontext.sc_fs     = regs->tf_fs;
272 	frame.sf_sc.uc_mcontext.sc_es     = regs->tf_es;
273 	frame.sf_sc.uc_mcontext.sc_ds     = regs->tf_ds;
274 	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_edi;
275 	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_esi;
276 	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_ebp;
277 	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_ebx;
278 	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_edx;
279 	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_ecx;
280 	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_eax;
281 	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_eip;
282 	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
283 	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
284 	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
285 	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
286 	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
287 	frame.sf_sc.uc_mcontext.sc_trapno = code;	/* XXX ???? */
288 
289 	/*
290 	 * Build the remainder of the ucontext struct to be used by sigreturn.
291 	 */
292 	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
293 	frame.sf_sc.uc_link = NULL;		/* XXX ??? */
294 	frame.sf_sc.uc_stack.ss_sp = p->p_sigstk.ss_sp;
295 	frame.sf_sc.uc_stack.ss_flags =
296 	    bsd_to_linux_sigaltstack(p->p_sigstk.ss_flags);
297 	frame.sf_sc.uc_stack.ss_size = p->p_sigstk.ss_size;
298 #ifdef DEBUG
299 	printf("Linux-emul(%ld): rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x\n",
300 	    (long)p->p_pid, frame.sf_sc.uc_stack.ss_flags,  p->p_sigstk.ss_sp,
301 	    p->p_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
302 #endif
303 	bsd_to_linux_sigset(&p->p_sigmask, &frame.sf_sc.uc_sigmask);
304 
305 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
306 		/*
307 		 * Process has trashed its stack; give it an illegal
308 		 * instruction to halt it in its tracks.
309 		 */
310 		sigexit(p, SIGILL);
311 		/* NOTREACHED */
312 	}
313 
314 	/*
315 	 * Build context to run handler in.
316 	 */
317 	regs->tf_esp = (int)fp;
318 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
319 	    _linux_sznonrtsigcode;
320 	regs->tf_eflags &= ~PSL_VM;
321 	regs->tf_cs = _ucodesel;
322 	regs->tf_ds = _udatasel;
323 	regs->tf_es = _udatasel;
324 	regs->tf_fs = _udatasel;
325 	load_gs(_udatasel);
326 	regs->tf_ss = _udatasel;
327 }
328 
329 
330 /*
331  * Send an interrupt to process.
332  *
333  * Stack is set up to allow sigcode stored
334  * in u. to call routine, followed by kcall
335  * to sigreturn routine below.  After sigreturn
336  * resets the signal mask, the stack, and the
337  * frame pointer, it returns to the user
338  * specified pc, psl.
339  */
340 
341 static void
342 linux_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
343 {
344 	register struct proc *p = curproc;
345 	register struct trapframe *regs;
346 	struct linux_sigframe *fp, frame;
347 	struct sigacts *psp = p->p_sigacts;
348 	int oonstack;
349 
350 	regs = p->p_md.md_regs;
351 	oonstack = p->p_sigstk.ss_flags & SS_ONSTACK;
352 
353 #ifdef DEBUG
354 	printf("Linux-emul(%ld): linux_sendsig(%p, %d, %p, %lu)\n",
355 	    (long)p->p_pid, catcher, sig, (void*)mask, code);
356 #endif
357 
358 	if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) {
359 		/* Signal handler installed with SA_SIGINFO. */
360 		linux_rt_sendsig(catcher, sig, mask, code);
361 		return;
362 	}
363 
364 	/*
365 	 * Allocate space for the signal handler context.
366 	 */
367 	if ((p->p_flag & P_ALTSTACK) && !oonstack &&
368 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
369 		fp = (struct linux_sigframe *)(p->p_sigstk.ss_sp +
370 		    p->p_sigstk.ss_size - sizeof(struct linux_sigframe));
371 		p->p_sigstk.ss_flags |= SS_ONSTACK;
372 	} else {
373 		fp = (struct linux_sigframe *)regs->tf_esp - 1;
374 	}
375 
376 	/*
377 	 * grow() will return FALSE if the fp will not fit inside the stack
378 	 *	and the stack can not be grown. useracc will return FALSE
379 	 *	if access is denied.
380 	 */
381 	if ((grow_stack (p, (int)fp) == FALSE) ||
382 	    !useracc((caddr_t)fp, sizeof (struct linux_sigframe),
383 	    VM_PROT_WRITE)) {
384 		/*
385 		 * Process has trashed its stack; give it an illegal
386 		 * instruction to halt it in its tracks.
387 		 */
388 		SIGACTION(p, SIGILL) = SIG_DFL;
389 		SIGDELSET(p->p_sigignore, SIGILL);
390 		SIGDELSET(p->p_sigcatch, SIGILL);
391 		SIGDELSET(p->p_sigmask, SIGILL);
392 		psignal(p, SIGILL);
393 		return;
394 	}
395 
396 	/*
397 	 * Build the argument list for the signal handler.
398 	 */
399 	if (p->p_sysent->sv_sigtbl)
400 		if (sig <= p->p_sysent->sv_sigsize)
401 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
402 
403 	frame.sf_handler = catcher;
404 	frame.sf_sig = sig;
405 
406 	/*
407 	 * Build the signal context to be used by sigreturn.
408 	 */
409 	frame.sf_sc.sc_mask   = mask->__bits[0];
410 	frame.sf_sc.sc_gs     = rgs();
411 	frame.sf_sc.sc_fs     = regs->tf_fs;
412 	frame.sf_sc.sc_es     = regs->tf_es;
413 	frame.sf_sc.sc_ds     = regs->tf_ds;
414 	frame.sf_sc.sc_edi    = regs->tf_edi;
415 	frame.sf_sc.sc_esi    = regs->tf_esi;
416 	frame.sf_sc.sc_ebp    = regs->tf_ebp;
417 	frame.sf_sc.sc_ebx    = regs->tf_ebx;
418 	frame.sf_sc.sc_edx    = regs->tf_edx;
419 	frame.sf_sc.sc_ecx    = regs->tf_ecx;
420 	frame.sf_sc.sc_eax    = regs->tf_eax;
421 	frame.sf_sc.sc_eip    = regs->tf_eip;
422 	frame.sf_sc.sc_cs     = regs->tf_cs;
423 	frame.sf_sc.sc_eflags = regs->tf_eflags;
424 	frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
425 	frame.sf_sc.sc_ss     = regs->tf_ss;
426 	frame.sf_sc.sc_err    = regs->tf_err;
427 	frame.sf_sc.sc_trapno = code;	/* XXX ???? */
428 
429 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
430 		/*
431 		 * Process has trashed its stack; give it an illegal
432 		 * instruction to halt it in its tracks.
433 		 */
434 		sigexit(p, SIGILL);
435 		/* NOTREACHED */
436 	}
437 
438 	/*
439 	 * Build context to run handler in.
440 	 */
441 	regs->tf_esp = (int)fp;
442 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
443 	regs->tf_eflags &= ~PSL_VM;
444 	regs->tf_cs = _ucodesel;
445 	regs->tf_ds = _udatasel;
446 	regs->tf_es = _udatasel;
447 	regs->tf_fs = _udatasel;
448 	load_gs(_udatasel);
449 	regs->tf_ss = _udatasel;
450 }
451 
452 /*
453  * System call to cleanup state after a signal
454  * has been taken.  Reset signal mask and
455  * stack state from context left by sendsig (above).
456  * Return to previous pc and psl as specified by
457  * context left by sendsig. Check carefully to
458  * make sure that the user has not modified the
459  * psl to gain improper privileges or to cause
460  * a machine fault.
461  */
462 int
463 linux_sigreturn(p, args)
464 	struct proc *p;
465 	struct linux_sigreturn_args *args;
466 {
467 	struct linux_sigcontext context;
468 	register struct trapframe *regs;
469 	int eflags;
470 
471 	regs = p->p_md.md_regs;
472 
473 #ifdef DEBUG
474 	printf("Linux-emul(%ld): linux_sigreturn(%p)\n",
475 	    (long)p->p_pid, (void *)args->scp);
476 #endif
477 	/*
478 	 * The trampoline code hands us the context.
479 	 * It is unsafe to keep track of it ourselves, in the event that a
480 	 * program jumps out of a signal handler.
481 	 */
482 	if (copyin((caddr_t)args->scp, &context, sizeof(context)) != 0)
483 		return (EFAULT);
484 
485 	/*
486 	 * Check for security violations.
487 	 */
488 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
489 	eflags = context.sc_eflags;
490 	/*
491 	 * XXX do allow users to change the privileged flag PSL_RF.  The
492 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
493 	 * sometimes set it there too.  tf_eflags is kept in the signal
494 	 * context during signal handling and there is no other place
495 	 * to remember it, so the PSL_RF bit may be corrupted by the
496 	 * signal handler without us knowing.  Corruption of the PSL_RF
497 	 * bit at worst causes one more or one less debugger trap, so
498 	 * allowing it is fairly harmless.
499 	 */
500 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
501     		return(EINVAL);
502 	}
503 
504 	/*
505 	 * Don't allow users to load a valid privileged %cs.  Let the
506 	 * hardware check for invalid selectors, excess privilege in
507 	 * other selectors, invalid %eip's and invalid %esp's.
508 	 */
509 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
510 	if (!CS_SECURE(context.sc_cs)) {
511 		trapsignal(p, SIGBUS, T_PROTFLT);
512 		return(EINVAL);
513 	}
514 
515 	p->p_sigstk.ss_flags &= ~SS_ONSTACK;
516 	SIGSETOLD(p->p_sigmask, context.sc_mask);
517 	SIG_CANTMASK(p->p_sigmask);
518 
519 	/*
520 	 * Restore signal context.
521 	 */
522 	/* %gs was restored by the trampoline. */
523 	regs->tf_fs     = context.sc_fs;
524 	regs->tf_es     = context.sc_es;
525 	regs->tf_ds     = context.sc_ds;
526 	regs->tf_edi    = context.sc_edi;
527 	regs->tf_esi    = context.sc_esi;
528 	regs->tf_ebp    = context.sc_ebp;
529 	regs->tf_ebx    = context.sc_ebx;
530 	regs->tf_edx    = context.sc_edx;
531 	regs->tf_ecx    = context.sc_ecx;
532 	regs->tf_eax    = context.sc_eax;
533 	regs->tf_eip    = context.sc_eip;
534 	regs->tf_cs     = context.sc_cs;
535 	regs->tf_eflags = eflags;
536 	regs->tf_esp    = context.sc_esp_at_signal;
537 	regs->tf_ss     = context.sc_ss;
538 
539 	return (EJUSTRETURN);
540 }
541 
542 /*
543  * System call to cleanup state after a signal
544  * has been taken.  Reset signal mask and
545  * stack state from context left by rt_sendsig (above).
546  * Return to previous pc and psl as specified by
547  * context left by sendsig. Check carefully to
548  * make sure that the user has not modified the
549  * psl to gain improper privileges or to cause
550  * a machine fault.
551  */
552 int
553 linux_rt_sigreturn(p, args)
554 	struct proc *p;
555 	struct linux_rt_sigreturn_args *args;
556 {
557 	struct sigaltstack_args sasargs;
558 	struct linux_ucontext 	 uc;
559 	struct linux_sigcontext *context;
560 	linux_stack_t *lss;
561 	stack_t *ss;
562 	register struct trapframe *regs;
563 	int eflags;
564 	caddr_t sg = stackgap_init();
565 
566 	regs = p->p_md.md_regs;
567 
568 #ifdef DEBUG
569 	printf("Linux-emul(%ld): linux_rt_sigreturn(%p)\n",
570 	    (long)p->p_pid, (void *)args->ucp);
571 #endif
572 	/*
573 	 * The trampoline code hands us the u_context.
574 	 * It is unsafe to keep track of it ourselves, in the event that a
575 	 * program jumps out of a signal handler.
576 	 */
577 	if (copyin((caddr_t)args->ucp, &uc, sizeof(uc)) != 0)
578 		return (EFAULT);
579 
580 	context = &uc.uc_mcontext;
581 
582 	/*
583 	 * Check for security violations.
584 	 */
585 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
586 	eflags = context->sc_eflags;
587 	/*
588 	 * XXX do allow users to change the privileged flag PSL_RF.  The
589 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
590 	 * sometimes set it there too.  tf_eflags is kept in the signal
591 	 * context during signal handling and there is no other place
592 	 * to remember it, so the PSL_RF bit may be corrupted by the
593 	 * signal handler without us knowing.  Corruption of the PSL_RF
594 	 * bit at worst causes one more or one less debugger trap, so
595 	 * allowing it is fairly harmless.
596 	 */
597 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
598     		return(EINVAL);
599 	}
600 
601 	/*
602 	 * Don't allow users to load a valid privileged %cs.  Let the
603 	 * hardware check for invalid selectors, excess privilege in
604 	 * other selectors, invalid %eip's and invalid %esp's.
605 	 */
606 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
607 	if (!CS_SECURE(context->sc_cs)) {
608 		trapsignal(p, SIGBUS, T_PROTFLT);
609 		return(EINVAL);
610 	}
611 
612 	p->p_sigstk.ss_flags &= ~SS_ONSTACK;
613 	SIGSETOLD(p->p_sigmask, context->sc_mask);
614 	SIG_CANTMASK(p->p_sigmask);
615 
616 	/*
617 	 * Restore signal context->
618 	 */
619 	/* %gs was restored by the trampoline. */
620 	regs->tf_fs     = context->sc_fs;
621 	regs->tf_es     = context->sc_es;
622 	regs->tf_ds     = context->sc_ds;
623 	regs->tf_edi    = context->sc_edi;
624 	regs->tf_esi    = context->sc_esi;
625 	regs->tf_ebp    = context->sc_ebp;
626 	regs->tf_ebx    = context->sc_ebx;
627 	regs->tf_edx    = context->sc_edx;
628 	regs->tf_ecx    = context->sc_ecx;
629 	regs->tf_eax    = context->sc_eax;
630 	regs->tf_eip    = context->sc_eip;
631 	regs->tf_cs     = context->sc_cs;
632 	regs->tf_eflags = eflags;
633 	regs->tf_esp    = context->sc_esp_at_signal;
634 	regs->tf_ss     = context->sc_ss;
635 
636 
637 	/*
638 	 * call sigaltstack & ignore results..
639 	 */
640 	ss = stackgap_alloc(&sg, sizeof(stack_t));
641 	lss = &uc.uc_stack;
642 	ss->ss_sp = lss->ss_sp;
643 	ss->ss_size = (lss->ss_size >= LINUX_MINSIGSTKSZ &&
644 	    lss->ss_size < MINSIGSTKSZ) ? MINSIGSTKSZ : lss->ss_size;
645 	ss->ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
646 
647 #ifdef DEBUG
648 	printf("Linux-emul(%ld): rt_sigret  flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x\n",
649 	    (long)p->p_pid, ss->ss_flags, ss->ss_sp, ss->ss_size, context->sc_mask);
650 #endif
651 	sasargs.ss = ss;
652 	sasargs.oss = NULL;
653 	(void) sigaltstack(p, &sasargs);
654 
655 	return (EJUSTRETURN);
656 }
657 
658 static void
659 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
660 {
661 	args[0] = tf->tf_ebx;
662 	args[1] = tf->tf_ecx;
663 	args[2] = tf->tf_edx;
664 	args[3] = tf->tf_esi;
665 	args[4] = tf->tf_edi;
666 	*params = NULL;		/* no copyin */
667 }
668 
669 /*
670  * If a linux binary is exec'ing something, try this image activator
671  * first.  We override standard shell script execution in order to
672  * be able to modify the interpreter path.  We only do this if a linux
673  * binary is doing the exec, so we do not create an EXEC module for it.
674  */
675 static int	exec_linux_imgact_try __P((struct image_params *iparams));
676 
677 static int
678 exec_linux_imgact_try(imgp)
679     struct image_params *imgp;
680 {
681     const char *head = (const char *)imgp->image_header;
682     int error = -1;
683 
684     /*
685      * The interpreter for shell scripts run from a linux binary needs
686      * to be located in /compat/linux if possible in order to recursively
687      * maintain linux path emulation.
688      */
689     if (((const short *)head)[0] == SHELLMAGIC) {
690 	    /*
691 	     * Run our normal shell image activator.  If it succeeds attempt
692 	     * to use the alternate path for the interpreter.  If an alternate
693 	     * path is found, use our stringspace to store it.
694 	     */
695 	    if ((error = exec_shell_imgact(imgp)) == 0) {
696 		    char *rpath = NULL;
697 
698 		    linux_emul_find(imgp->proc, NULL, linux_emul_path,
699 			imgp->interpreter_name, &rpath, 0);
700 		    if (rpath != imgp->interpreter_name) {
701 			    int len = strlen(rpath) + 1;
702 
703 			    if (len <= MAXSHELLCMDLEN) {
704 				memcpy(imgp->interpreter_name, rpath, len);
705 			    }
706 			    free(rpath, M_TEMP);
707 		    }
708 	    }
709     }
710     return(error);
711 }
712 
713 struct sysentvec linux_sysvec = {
714 	LINUX_SYS_MAXSYSCALL,
715 	linux_sysent,
716 	0xff,
717 	LINUX_SIGTBLSZ,
718 	bsd_to_linux_signal,
719 	ELAST + 1,
720 	bsd_to_linux_errno,
721 	translate_traps,
722 	linux_fixup,
723 	linux_sendsig,
724 	linux_sigcode,
725 	&linux_szsigcode,
726 	linux_prepsyscall,
727 	"Linux a.out",
728 	aout_coredump,
729 	exec_linux_imgact_try
730 };
731 
732 struct sysentvec elf_linux_sysvec = {
733 	LINUX_SYS_MAXSYSCALL,
734 	linux_sysent,
735 	0xff,
736 	LINUX_SIGTBLSZ,
737 	bsd_to_linux_signal,
738 	ELAST + 1,
739 	bsd_to_linux_errno,
740 	translate_traps,
741 	elf_linux_fixup,
742 	linux_sendsig,
743 	linux_sigcode,
744 	&linux_szsigcode,
745 	linux_prepsyscall,
746 	"Linux ELF",
747 	elf_coredump,
748 	exec_linux_imgact_try
749 };
750 
751 static Elf32_Brandinfo linux_brand = {
752 					ELFOSABI_LINUX,
753 					"/compat/linux",
754 					"/lib/ld-linux.so.1",
755 					&elf_linux_sysvec
756 				 };
757 
758 static Elf32_Brandinfo linux_glibc2brand = {
759 					ELFOSABI_LINUX,
760 					"/compat/linux",
761 					"/lib/ld-linux.so.2",
762 					&elf_linux_sysvec
763 				 };
764 
765 Elf32_Brandinfo *linux_brandlist[] = {
766 					&linux_brand,
767 					&linux_glibc2brand,
768 					NULL
769 				};
770 
771 static int
772 linux_elf_modevent(module_t mod, int type, void *data)
773 {
774 	Elf32_Brandinfo **brandinfo;
775 	int error;
776 
777 	error = 0;
778 
779 	switch(type) {
780 	case MOD_LOAD:
781 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
782 		     ++brandinfo)
783 			if (elf_insert_brand_entry(*brandinfo) < 0)
784 				error = EINVAL;
785 		if (error)
786 			printf("cannot insert Linux elf brand handler\n");
787 		else {
788 			linux_ioctl_register_handlers(&linux_ioctl_handler_set);
789 			if (bootverbose)
790 				printf("Linux-ELF exec handler installed\n");
791 		}
792 		break;
793 	case MOD_UNLOAD:
794 		linux_ioctl_unregister_handlers(&linux_ioctl_handler_set);
795 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
796 		     ++brandinfo)
797 			if (elf_brand_inuse(*brandinfo))
798 				error = EBUSY;
799 
800 		if (error == 0) {
801 			for (brandinfo = &linux_brandlist[0];
802 			     *brandinfo != NULL; ++brandinfo)
803 				if (elf_remove_brand_entry(*brandinfo) < 0)
804 					error = EINVAL;
805 		}
806 		if (error)
807 			printf("Could not deinstall ELF interpreter entry\n");
808 		else if (bootverbose)
809 			printf("Linux-elf exec handler removed\n");
810 		break;
811 	default:
812 		break;
813 	}
814 	return error;
815 }
816 static moduledata_t linux_elf_mod = {
817 	"linuxelf",
818 	linux_elf_modevent,
819 	0
820 };
821 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
822