xref: /freebsd/sys/i386/linux/linux_sysvec.c (revision 35ccfc58d8065d58df2469c13fce9d3a60d53845)
1 /*-
2  * Copyright (c) 1994-1996 S�ren Schmidt
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software withough specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30 
31 /* XXX we use functions that might not exist. */
32 #include "opt_compat.h"
33 
34 #ifndef COMPAT_43
35 #error "Unable to compile Linux-emulator due to missing COMPAT_43 option!"
36 #endif
37 
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/proc.h>
41 #include <sys/sysproto.h>
42 #include <sys/sysent.h>
43 #include <sys/imgact.h>
44 #include <sys/imgact_aout.h>
45 #include <sys/imgact_elf.h>
46 #include <sys/signalvar.h>
47 #include <sys/malloc.h>
48 #include <vm/vm.h>
49 #include <vm/vm_param.h>
50 #include <vm/vm_page.h>
51 #include <vm/vm_extern.h>
52 #include <sys/exec.h>
53 #include <sys/kernel.h>
54 #include <sys/module.h>
55 #include <machine/cpu.h>
56 
57 #include <i386/linux/linux.h>
58 #include <i386/linux/linux_proto.h>
59 #include <compat/linux/linux_signal.h>
60 #include <compat/linux/linux_util.h>
61 
62 MODULE_VERSION(linux, 1);
63 
64 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
65 
66 #if BYTE_ORDER == LITTLE_ENDIAN
67 #define SHELLMAGIC      0x2123 /* #! */
68 #else
69 #define SHELLMAGIC      0x2321
70 #endif
71 
72 extern char linux_sigcode[];
73 extern int linux_szsigcode;
74 
75 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
76 
77 extern struct linker_set linux_ioctl_handler_set;
78 
79 static int	linux_fixup __P((register_t **stack_base,
80 				 struct image_params *iparams));
81 static int	elf_linux_fixup __P((register_t **stack_base,
82 				     struct image_params *iparams));
83 static void	linux_prepsyscall __P((struct trapframe *tf, int *args,
84 				       u_int *code, caddr_t *params));
85 static void     linux_sendsig __P((sig_t catcher, int sig, sigset_t *mask,
86 				   u_long code));
87 
88 /*
89  * Linux syscalls return negative errno's, we do positive and map them
90  */
91 static int bsd_to_linux_errno[ELAST + 1] = {
92   	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
93  	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
94  	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
95  	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
96  	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
97 	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
98 	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
99 	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
100   	-6, -6, -43, -42, -75, -6, -84
101 };
102 
103 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
104 	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
105 	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
106 	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, 0,
107 	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
108 	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
109 	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
110 	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
111 	0, LINUX_SIGUSR1, LINUX_SIGUSR2
112 };
113 
114 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
115 	SIGHUP, SIGINT, SIGQUIT, SIGILL,
116 	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
117 	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
118 	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
119 	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
120 	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
121 	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
122 	SIGIO, SIGURG, 0
123 };
124 
125 /*
126  * If FreeBSD & Linux have a difference of opinion about what a trap
127  * means, deal with it here.
128  */
129 static int
130 translate_traps(int signal, int trap_code)
131 {
132 	if (signal != SIGBUS)
133 		return signal;
134 	switch (trap_code) {
135 	case T_PROTFLT:
136 	case T_TSSFLT:
137 	case T_DOUBLEFLT:
138 	case T_PAGEFLT:
139 		return SIGSEGV;
140 	default:
141 		return signal;
142 	}
143 }
144 
145 static int
146 linux_fixup(register_t **stack_base, struct image_params *imgp)
147 {
148 	register_t *argv, *envp;
149 
150 	argv = *stack_base;
151 	envp = *stack_base + (imgp->argc + 1);
152 	(*stack_base)--;
153 	**stack_base = (intptr_t)(void *)envp;
154 	(*stack_base)--;
155 	**stack_base = (intptr_t)(void *)argv;
156 	(*stack_base)--;
157 	**stack_base = imgp->argc;
158 	return 0;
159 }
160 
161 static int
162 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
163 {
164 	Elf32_Auxargs *args = (Elf32_Auxargs *)imgp->auxargs;
165 	register_t *pos;
166 
167 	pos = *stack_base + (imgp->argc + imgp->envc + 2);
168 
169 	if (args->trace) {
170 		AUXARGS_ENTRY(pos, AT_DEBUG, 1);
171 	}
172 	if (args->execfd != -1) {
173 		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
174 	}
175 	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
176 	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
177 	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
178 	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
179 	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
180 	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
181 	AUXARGS_ENTRY(pos, AT_BASE, args->base);
182 	AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_cred->p_ruid);
183 	AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_cred->p_svuid);
184 	AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_cred->p_rgid);
185 	AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_cred->p_svgid);
186 	AUXARGS_ENTRY(pos, AT_NULL, 0);
187 
188 	free(imgp->auxargs, M_TEMP);
189 	imgp->auxargs = NULL;
190 
191 	(*stack_base)--;
192 	**stack_base = (long)imgp->argc;
193 	return 0;
194 }
195 
196 extern int _ucodesel, _udatasel;
197 extern unsigned long _linux_sznonrtsigcode;
198 
199 static void
200 linux_rt_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
201 {
202 	register struct proc *p = curproc;
203 	register struct trapframe *regs;
204 	struct linux_rt_sigframe *fp, frame;
205 	int oonstack;
206 
207 	regs = p->p_md.md_regs;
208 	oonstack = sigonstack(regs->tf_esp);
209 
210 #ifdef DEBUG
211 	printf("Linux-emul(%ld): linux_rt_sendsig(%p, %d, %p, %lu)\n",
212 	    (long)p->p_pid, catcher, sig, (void*)mask, code);
213 #endif
214 	/*
215 	 * Allocate space for the signal handler context.
216 	 */
217 	if ((p->p_flag & P_ALTSTACK) && !oonstack &&
218 	    SIGISMEMBER(p->p_sigacts->ps_sigonstack, sig)) {
219 		fp = (struct linux_rt_sigframe *)(p->p_sigstk.ss_sp +
220 		    p->p_sigstk.ss_size - sizeof(struct linux_rt_sigframe));
221 	} else
222 		fp = (struct linux_rt_sigframe *)regs->tf_esp - 1;
223 
224 	/*
225 	 * grow() will return FALSE if the fp will not fit inside the stack
226 	 *	and the stack can not be grown. useracc will return FALSE
227 	 *	if access is denied.
228 	 */
229 	if ((grow_stack (p, (int)fp) == FALSE) ||
230 	    !useracc((caddr_t)fp, sizeof (struct linux_rt_sigframe),
231 	    VM_PROT_WRITE)) {
232 		/*
233 		 * Process has trashed its stack; give it an illegal
234 		 * instruction to halt it in its tracks.
235 		 */
236 		SIGACTION(p, SIGILL) = SIG_DFL;
237 		SIGDELSET(p->p_sigignore, SIGILL);
238 		SIGDELSET(p->p_sigcatch, SIGILL);
239 		SIGDELSET(p->p_sigmask, SIGILL);
240 #ifdef DEBUG
241 		printf("Linux-emul(%ld): linux_rt_sendsig -- bad stack %p, "
242 		    "oonstack=%x\n", (long)p->p_pid, fp, oonstack);
243 #endif
244 		psignal(p, SIGILL);
245 		return;
246 	}
247 
248 	/*
249 	 * Build the argument list for the signal handler.
250 	 */
251 	if (p->p_sysent->sv_sigtbl)
252 		if (sig <= p->p_sysent->sv_sigsize)
253 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
254 
255 	frame.sf_handler = catcher;
256 	frame.sf_sig = sig;
257 	frame.sf_siginfo = &fp->sf_si;
258 	frame.sf_ucontext = &fp->sf_sc;
259 
260 	/* Fill siginfo structure. */
261 	frame.sf_si.lsi_signo = sig;
262 	frame.sf_si.lsi_code = code;
263 	frame.sf_si.lsi_addr = (void *)regs->tf_err;
264 
265 	/*
266 	 * Build the signal context to be used by sigreturn.
267 	 */
268 	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
269 	frame.sf_sc.uc_link = NULL;		/* XXX ??? */
270 
271 	frame.sf_sc.uc_stack.ss_sp = p->p_sigstk.ss_sp;
272 	frame.sf_sc.uc_stack.ss_size = p->p_sigstk.ss_size;
273 	frame.sf_sc.uc_stack.ss_flags = (p->p_flag & P_ALTSTACK)
274 	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
275 
276 	bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
277 
278 	frame.sf_sc.uc_mcontext.sc_mask   = frame.sf_sc.uc_sigmask.__bits[0];
279 	frame.sf_sc.uc_mcontext.sc_gs     = rgs();
280 	frame.sf_sc.uc_mcontext.sc_fs     = regs->tf_fs;
281 	frame.sf_sc.uc_mcontext.sc_es     = regs->tf_es;
282 	frame.sf_sc.uc_mcontext.sc_ds     = regs->tf_ds;
283 	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_edi;
284 	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_esi;
285 	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_ebp;
286 	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_ebx;
287 	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_edx;
288 	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_ecx;
289 	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_eax;
290 	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_eip;
291 	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
292 	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
293 	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
294 	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
295 	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
296 	frame.sf_sc.uc_mcontext.sc_trapno = code;	/* XXX ???? */
297 
298 #ifdef DEBUG
299 	printf("Linux-emul(%ld): rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, "
300 	    "mask: 0x%x\n", (long)p->p_pid, frame.sf_sc.uc_stack.ss_flags,
301 	    p->p_sigstk.ss_sp, p->p_sigstk.ss_size,
302 	    frame.sf_sc.uc_mcontext.sc_mask);
303 #endif
304 
305 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
306 		/*
307 		 * Process has trashed its stack; give it an illegal
308 		 * instruction to halt it in its tracks.
309 		 */
310 		sigexit(p, SIGILL);
311 		/* NOTREACHED */
312 	}
313 
314 	/*
315 	 * Build context to run handler in.
316 	 */
317 	regs->tf_esp = (int)fp;
318 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
319 	    _linux_sznonrtsigcode;
320 	regs->tf_eflags &= ~PSL_VM;
321 	regs->tf_cs = _ucodesel;
322 	regs->tf_ds = _udatasel;
323 	regs->tf_es = _udatasel;
324 	regs->tf_fs = _udatasel;
325 	load_gs(_udatasel);
326 	regs->tf_ss = _udatasel;
327 }
328 
329 
330 /*
331  * Send an interrupt to process.
332  *
333  * Stack is set up to allow sigcode stored
334  * in u. to call routine, followed by kcall
335  * to sigreturn routine below.  After sigreturn
336  * resets the signal mask, the stack, and the
337  * frame pointer, it returns to the user
338  * specified pc, psl.
339  */
340 
341 static void
342 linux_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
343 {
344 	register struct proc *p = curproc;
345 	register struct trapframe *regs;
346 	struct linux_sigframe *fp, frame;
347 	linux_sigset_t lmask;
348 	int oonstack, i;
349 
350 	if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) {
351 		/* Signal handler installed with SA_SIGINFO. */
352 		linux_rt_sendsig(catcher, sig, mask, code);
353 		return;
354 	}
355 
356 	regs = p->p_md.md_regs;
357 	oonstack = sigonstack(regs->tf_esp);
358 
359 #ifdef DEBUG
360 	printf("Linux-emul(%ld): linux_sendsig(%p, %d, %p, %lu)\n",
361 	    (long)p->p_pid, catcher, sig, (void*)mask, code);
362 #endif
363 
364 	/*
365 	 * Allocate space for the signal handler context.
366 	 */
367 	if ((p->p_flag & P_ALTSTACK) && !oonstack &&
368 	    SIGISMEMBER(p->p_sigacts->ps_sigonstack, sig)) {
369 		fp = (struct linux_sigframe *)(p->p_sigstk.ss_sp +
370 		    p->p_sigstk.ss_size - sizeof(struct linux_sigframe));
371 	} else
372 		fp = (struct linux_sigframe *)regs->tf_esp - 1;
373 
374 	/*
375 	 * grow() will return FALSE if the fp will not fit inside the stack
376 	 *	and the stack can not be grown. useracc will return FALSE
377 	 *	if access is denied.
378 	 */
379 	if ((grow_stack (p, (int)fp) == FALSE) ||
380 	    !useracc((caddr_t)fp, sizeof (struct linux_sigframe),
381 	    VM_PROT_WRITE)) {
382 		/*
383 		 * Process has trashed its stack; give it an illegal
384 		 * instruction to halt it in its tracks.
385 		 */
386 		SIGACTION(p, SIGILL) = SIG_DFL;
387 		SIGDELSET(p->p_sigignore, SIGILL);
388 		SIGDELSET(p->p_sigcatch, SIGILL);
389 		SIGDELSET(p->p_sigmask, SIGILL);
390 		psignal(p, SIGILL);
391 		return;
392 	}
393 
394 	/*
395 	 * Build the argument list for the signal handler.
396 	 */
397 	if (p->p_sysent->sv_sigtbl)
398 		if (sig <= p->p_sysent->sv_sigsize)
399 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
400 
401 	frame.sf_handler = catcher;
402 	frame.sf_sig = sig;
403 
404 	bsd_to_linux_sigset(mask, &lmask);
405 
406 	/*
407 	 * Build the signal context to be used by sigreturn.
408 	 */
409 	frame.sf_sc.sc_mask   = lmask.__bits[0];
410 	frame.sf_sc.sc_gs     = rgs();
411 	frame.sf_sc.sc_fs     = regs->tf_fs;
412 	frame.sf_sc.sc_es     = regs->tf_es;
413 	frame.sf_sc.sc_ds     = regs->tf_ds;
414 	frame.sf_sc.sc_edi    = regs->tf_edi;
415 	frame.sf_sc.sc_esi    = regs->tf_esi;
416 	frame.sf_sc.sc_ebp    = regs->tf_ebp;
417 	frame.sf_sc.sc_ebx    = regs->tf_ebx;
418 	frame.sf_sc.sc_edx    = regs->tf_edx;
419 	frame.sf_sc.sc_ecx    = regs->tf_ecx;
420 	frame.sf_sc.sc_eax    = regs->tf_eax;
421 	frame.sf_sc.sc_eip    = regs->tf_eip;
422 	frame.sf_sc.sc_cs     = regs->tf_cs;
423 	frame.sf_sc.sc_eflags = regs->tf_eflags;
424 	frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
425 	frame.sf_sc.sc_ss     = regs->tf_ss;
426 	frame.sf_sc.sc_err    = regs->tf_err;
427 	frame.sf_sc.sc_trapno = code;	/* XXX ???? */
428 
429 	bzero(&frame.sf_fpstate, sizeof(struct linux_fpstate));
430 
431 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
432 		frame.sf_extramask[i] = lmask.__bits[i+1];
433 
434 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
435 		/*
436 		 * Process has trashed its stack; give it an illegal
437 		 * instruction to halt it in its tracks.
438 		 */
439 		sigexit(p, SIGILL);
440 		/* NOTREACHED */
441 	}
442 
443 	/*
444 	 * Build context to run handler in.
445 	 */
446 	regs->tf_esp = (int)fp;
447 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
448 	regs->tf_eflags &= ~PSL_VM;
449 	regs->tf_cs = _ucodesel;
450 	regs->tf_ds = _udatasel;
451 	regs->tf_es = _udatasel;
452 	regs->tf_fs = _udatasel;
453 	load_gs(_udatasel);
454 	regs->tf_ss = _udatasel;
455 }
456 
457 /*
458  * System call to cleanup state after a signal
459  * has been taken.  Reset signal mask and
460  * stack state from context left by sendsig (above).
461  * Return to previous pc and psl as specified by
462  * context left by sendsig. Check carefully to
463  * make sure that the user has not modified the
464  * psl to gain improper privileges or to cause
465  * a machine fault.
466  */
467 int
468 linux_sigreturn(p, args)
469 	struct proc *p;
470 	struct linux_sigreturn_args *args;
471 {
472 	struct linux_sigframe frame;
473 	register struct trapframe *regs;
474 	linux_sigset_t lmask;
475 	int eflags, i;
476 
477 	regs = p->p_md.md_regs;
478 
479 #ifdef DEBUG
480 	printf("Linux-emul(%ld): linux_sigreturn(%p)\n",
481 	    (long)p->p_pid, (void *)args->sfp);
482 #endif
483 	/*
484 	 * The trampoline code hands us the sigframe.
485 	 * It is unsafe to keep track of it ourselves, in the event that a
486 	 * program jumps out of a signal handler.
487 	 */
488 	if (copyin((caddr_t)args->sfp, &frame, sizeof(frame)) != 0)
489 		return (EFAULT);
490 
491 	/*
492 	 * Check for security violations.
493 	 */
494 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
495 	eflags = frame.sf_sc.sc_eflags;
496 	/*
497 	 * XXX do allow users to change the privileged flag PSL_RF.  The
498 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
499 	 * sometimes set it there too.  tf_eflags is kept in the signal
500 	 * context during signal handling and there is no other place
501 	 * to remember it, so the PSL_RF bit may be corrupted by the
502 	 * signal handler without us knowing.  Corruption of the PSL_RF
503 	 * bit at worst causes one more or one less debugger trap, so
504 	 * allowing it is fairly harmless.
505 	 */
506 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
507     		return(EINVAL);
508 	}
509 
510 	/*
511 	 * Don't allow users to load a valid privileged %cs.  Let the
512 	 * hardware check for invalid selectors, excess privilege in
513 	 * other selectors, invalid %eip's and invalid %esp's.
514 	 */
515 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
516 	if (!CS_SECURE(frame.sf_sc.sc_cs)) {
517 		trapsignal(p, SIGBUS, T_PROTFLT);
518 		return(EINVAL);
519 	}
520 
521 	lmask.__bits[0] = frame.sf_sc.sc_mask;
522 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
523 		lmask.__bits[i+1] = frame.sf_extramask[i];
524 	linux_to_bsd_sigset(&lmask, &p->p_sigmask);
525 	SIG_CANTMASK(p->p_sigmask);
526 
527 	/*
528 	 * Restore signal context.
529 	 */
530 	/* %gs was restored by the trampoline. */
531 	regs->tf_fs     = frame.sf_sc.sc_fs;
532 	regs->tf_es     = frame.sf_sc.sc_es;
533 	regs->tf_ds     = frame.sf_sc.sc_ds;
534 	regs->tf_edi    = frame.sf_sc.sc_edi;
535 	regs->tf_esi    = frame.sf_sc.sc_esi;
536 	regs->tf_ebp    = frame.sf_sc.sc_ebp;
537 	regs->tf_ebx    = frame.sf_sc.sc_ebx;
538 	regs->tf_edx    = frame.sf_sc.sc_edx;
539 	regs->tf_ecx    = frame.sf_sc.sc_ecx;
540 	regs->tf_eax    = frame.sf_sc.sc_eax;
541 	regs->tf_eip    = frame.sf_sc.sc_eip;
542 	regs->tf_cs     = frame.sf_sc.sc_cs;
543 	regs->tf_eflags = eflags;
544 	regs->tf_esp    = frame.sf_sc.sc_esp_at_signal;
545 	regs->tf_ss     = frame.sf_sc.sc_ss;
546 
547 	return (EJUSTRETURN);
548 }
549 
550 /*
551  * System call to cleanup state after a signal
552  * has been taken.  Reset signal mask and
553  * stack state from context left by rt_sendsig (above).
554  * Return to previous pc and psl as specified by
555  * context left by sendsig. Check carefully to
556  * make sure that the user has not modified the
557  * psl to gain improper privileges or to cause
558  * a machine fault.
559  */
560 int
561 linux_rt_sigreturn(p, args)
562 	struct proc *p;
563 	struct linux_rt_sigreturn_args *args;
564 {
565 	struct sigaltstack_args sasargs;
566 	struct linux_ucontext 	 uc;
567 	struct linux_sigcontext *context;
568 	linux_stack_t *lss;
569 	stack_t *ss;
570 	register struct trapframe *regs;
571 	int eflags;
572 	caddr_t sg = stackgap_init();
573 
574 	regs = p->p_md.md_regs;
575 
576 #ifdef DEBUG
577 	printf("Linux-emul(%ld): linux_rt_sigreturn(%p)\n",
578 	    (long)p->p_pid, (void *)args->ucp);
579 #endif
580 	/*
581 	 * The trampoline code hands us the ucontext.
582 	 * It is unsafe to keep track of it ourselves, in the event that a
583 	 * program jumps out of a signal handler.
584 	 */
585 	if (copyin((caddr_t)args->ucp, &uc, sizeof(uc)) != 0)
586 		return (EFAULT);
587 
588 	context = &uc.uc_mcontext;
589 
590 	/*
591 	 * Check for security violations.
592 	 */
593 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
594 	eflags = context->sc_eflags;
595 	/*
596 	 * XXX do allow users to change the privileged flag PSL_RF.  The
597 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
598 	 * sometimes set it there too.  tf_eflags is kept in the signal
599 	 * context during signal handling and there is no other place
600 	 * to remember it, so the PSL_RF bit may be corrupted by the
601 	 * signal handler without us knowing.  Corruption of the PSL_RF
602 	 * bit at worst causes one more or one less debugger trap, so
603 	 * allowing it is fairly harmless.
604 	 */
605 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
606     		return(EINVAL);
607 	}
608 
609 	/*
610 	 * Don't allow users to load a valid privileged %cs.  Let the
611 	 * hardware check for invalid selectors, excess privilege in
612 	 * other selectors, invalid %eip's and invalid %esp's.
613 	 */
614 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
615 	if (!CS_SECURE(context->sc_cs)) {
616 		trapsignal(p, SIGBUS, T_PROTFLT);
617 		return(EINVAL);
618 	}
619 
620 	linux_to_bsd_sigset(&uc.uc_sigmask, &p->p_sigmask);
621 	SIG_CANTMASK(p->p_sigmask);
622 
623 	/*
624 	 * Restore signal context
625 	 */
626 	/* %gs was restored by the trampoline. */
627 	regs->tf_fs     = context->sc_fs;
628 	regs->tf_es     = context->sc_es;
629 	regs->tf_ds     = context->sc_ds;
630 	regs->tf_edi    = context->sc_edi;
631 	regs->tf_esi    = context->sc_esi;
632 	regs->tf_ebp    = context->sc_ebp;
633 	regs->tf_ebx    = context->sc_ebx;
634 	regs->tf_edx    = context->sc_edx;
635 	regs->tf_ecx    = context->sc_ecx;
636 	regs->tf_eax    = context->sc_eax;
637 	regs->tf_eip    = context->sc_eip;
638 	regs->tf_cs     = context->sc_cs;
639 	regs->tf_eflags = eflags;
640 	regs->tf_esp    = context->sc_esp_at_signal;
641 	regs->tf_ss     = context->sc_ss;
642 
643 	/*
644 	 * call sigaltstack & ignore results..
645 	 */
646 	ss = stackgap_alloc(&sg, sizeof(stack_t));
647 	lss = &uc.uc_stack;
648 	ss->ss_sp = lss->ss_sp;
649 	ss->ss_size = lss->ss_size;
650 	ss->ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
651 
652 #ifdef DEBUG
653 	printf("Linux-emul(%ld): rt_sigret  flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x\n",
654 	    (long)p->p_pid, ss->ss_flags, ss->ss_sp, ss->ss_size, context->sc_mask);
655 #endif
656 	sasargs.ss = ss;
657 	sasargs.oss = NULL;
658 	(void) sigaltstack(p, &sasargs);
659 
660 	return (EJUSTRETURN);
661 }
662 
663 static void
664 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
665 {
666 	args[0] = tf->tf_ebx;
667 	args[1] = tf->tf_ecx;
668 	args[2] = tf->tf_edx;
669 	args[3] = tf->tf_esi;
670 	args[4] = tf->tf_edi;
671 	*params = NULL;		/* no copyin */
672 }
673 
674 /*
675  * If a linux binary is exec'ing something, try this image activator
676  * first.  We override standard shell script execution in order to
677  * be able to modify the interpreter path.  We only do this if a linux
678  * binary is doing the exec, so we do not create an EXEC module for it.
679  */
680 static int	exec_linux_imgact_try __P((struct image_params *iparams));
681 
682 static int
683 exec_linux_imgact_try(imgp)
684     struct image_params *imgp;
685 {
686     const char *head = (const char *)imgp->image_header;
687     int error = -1;
688 
689     /*
690      * The interpreter for shell scripts run from a linux binary needs
691      * to be located in /compat/linux if possible in order to recursively
692      * maintain linux path emulation.
693      */
694     if (((const short *)head)[0] == SHELLMAGIC) {
695 	    /*
696 	     * Run our normal shell image activator.  If it succeeds attempt
697 	     * to use the alternate path for the interpreter.  If an alternate
698 	     * path is found, use our stringspace to store it.
699 	     */
700 	    if ((error = exec_shell_imgact(imgp)) == 0) {
701 		    char *rpath = NULL;
702 
703 		    linux_emul_find(imgp->proc, NULL, linux_emul_path,
704 			imgp->interpreter_name, &rpath, 0);
705 		    if (rpath != imgp->interpreter_name) {
706 			    int len = strlen(rpath) + 1;
707 
708 			    if (len <= MAXSHELLCMDLEN) {
709 				memcpy(imgp->interpreter_name, rpath, len);
710 			    }
711 			    free(rpath, M_TEMP);
712 		    }
713 	    }
714     }
715     return(error);
716 }
717 
718 struct sysentvec linux_sysvec = {
719 	LINUX_SYS_MAXSYSCALL,
720 	linux_sysent,
721 	0xff,
722 	LINUX_SIGTBLSZ,
723 	bsd_to_linux_signal,
724 	ELAST + 1,
725 	bsd_to_linux_errno,
726 	translate_traps,
727 	linux_fixup,
728 	linux_sendsig,
729 	linux_sigcode,
730 	&linux_szsigcode,
731 	linux_prepsyscall,
732 	"Linux a.out",
733 	aout_coredump,
734 	exec_linux_imgact_try,
735 	LINUX_MINSIGSTKSZ
736 };
737 
738 struct sysentvec elf_linux_sysvec = {
739 	LINUX_SYS_MAXSYSCALL,
740 	linux_sysent,
741 	0xff,
742 	LINUX_SIGTBLSZ,
743 	bsd_to_linux_signal,
744 	ELAST + 1,
745 	bsd_to_linux_errno,
746 	translate_traps,
747 	elf_linux_fixup,
748 	linux_sendsig,
749 	linux_sigcode,
750 	&linux_szsigcode,
751 	linux_prepsyscall,
752 	"Linux ELF",
753 	elf_coredump,
754 	exec_linux_imgact_try,
755 	LINUX_MINSIGSTKSZ
756 };
757 
758 static Elf32_Brandinfo linux_brand = {
759 					ELFOSABI_LINUX,
760 					"/compat/linux",
761 					"/lib/ld-linux.so.1",
762 					&elf_linux_sysvec
763 				 };
764 
765 static Elf32_Brandinfo linux_glibc2brand = {
766 					ELFOSABI_LINUX,
767 					"/compat/linux",
768 					"/lib/ld-linux.so.2",
769 					&elf_linux_sysvec
770 				 };
771 
772 Elf32_Brandinfo *linux_brandlist[] = {
773 					&linux_brand,
774 					&linux_glibc2brand,
775 					NULL
776 				};
777 
778 static int
779 linux_elf_modevent(module_t mod, int type, void *data)
780 {
781 	Elf32_Brandinfo **brandinfo;
782 	int error;
783 
784 	error = 0;
785 
786 	switch(type) {
787 	case MOD_LOAD:
788 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
789 		     ++brandinfo)
790 			if (elf_insert_brand_entry(*brandinfo) < 0)
791 				error = EINVAL;
792 		if (error == 0) {
793 			linux_ioctl_register_handlers(
794 				&linux_ioctl_handler_set);
795 			if (bootverbose)
796 				printf("Linux ELF exec handler installed\n");
797 		} else
798 			printf("cannot insert Linux ELF brand handler\n");
799 		break;
800 	case MOD_UNLOAD:
801 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
802 		     ++brandinfo)
803 			if (elf_brand_inuse(*brandinfo))
804 				error = EBUSY;
805 		if (error == 0) {
806 			for (brandinfo = &linux_brandlist[0];
807 			     *brandinfo != NULL; ++brandinfo)
808 				if (elf_remove_brand_entry(*brandinfo) < 0)
809 					error = EINVAL;
810 		}
811 		if (error == 0) {
812 			linux_ioctl_unregister_handlers(
813 				&linux_ioctl_handler_set);
814 			if (bootverbose)
815 				printf("Linux ELF exec handler removed\n");
816 		} else
817 			printf("Could not deinstall ELF interpreter entry\n");
818 		break;
819 	default:
820 		break;
821 	}
822 	return error;
823 }
824 
825 static moduledata_t linux_elf_mod = {
826 	"linuxelf",
827 	linux_elf_modevent,
828 	0
829 };
830 
831 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
832