xref: /freebsd/sys/i386/linux/linux_sysvec.c (revision ba4c5e6950b14ce5eb69bc5d953574f203e01dc4)
1 /*-
2  * Copyright (c) 1994-1996 S�ren Schmidt
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software withough specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30 
31 /* XXX we use functions that might not exist. */
32 #include "opt_compat.h"
33 
34 #ifndef COMPAT_43
35 #error "Unable to compile Linux-emulator due to missing COMPAT_43 option!"
36 #endif
37 
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/proc.h>
41 #include <sys/sysproto.h>
42 #include <sys/sysent.h>
43 #include <sys/imgact.h>
44 #include <sys/imgact_aout.h>
45 #include <sys/imgact_elf.h>
46 #include <sys/signalvar.h>
47 #include <sys/malloc.h>
48 #include <vm/vm.h>
49 #include <vm/vm_param.h>
50 #include <vm/vm_page.h>
51 #include <vm/vm_extern.h>
52 #include <sys/exec.h>
53 #include <sys/kernel.h>
54 #include <sys/module.h>
55 #include <machine/cpu.h>
56 
57 #include <i386/linux/linux.h>
58 #include <i386/linux/linux_proto.h>
59 #include <compat/linux/linux_signal.h>
60 #include <compat/linux/linux_util.h>
61 
62 MODULE_VERSION(linux, 1);
63 MODULE_DEPEND(linux, sysvmsg, 1, 1, 1);
64 MODULE_DEPEND(linux, sysvsem, 1, 1, 1);
65 MODULE_DEPEND(linux, sysvshm, 1, 1, 1);
66 
67 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
68 
69 #if BYTE_ORDER == LITTLE_ENDIAN
70 #define SHELLMAGIC      0x2123 /* #! */
71 #else
72 #define SHELLMAGIC      0x2321
73 #endif
74 
75 extern char linux_sigcode[];
76 extern int linux_szsigcode;
77 
78 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
79 
80 extern struct linker_set linux_ioctl_handler_set;
81 
82 static int	linux_fixup __P((register_t **stack_base,
83 				 struct image_params *iparams));
84 static int	elf_linux_fixup __P((register_t **stack_base,
85 				     struct image_params *iparams));
86 static void	linux_prepsyscall __P((struct trapframe *tf, int *args,
87 				       u_int *code, caddr_t *params));
88 static void     linux_sendsig __P((sig_t catcher, int sig, sigset_t *mask,
89 				   u_long code));
90 
91 /*
92  * Linux syscalls return negative errno's, we do positive and map them
93  */
94 static int bsd_to_linux_errno[ELAST + 1] = {
95   	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
96  	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
97  	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
98  	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
99  	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
100 	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
101 	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
102 	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
103   	-6, -6, -43, -42, -75, -6, -84
104 };
105 
106 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
107 	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
108 	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
109 	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, 0,
110 	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
111 	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
112 	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
113 	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
114 	0, LINUX_SIGUSR1, LINUX_SIGUSR2
115 };
116 
117 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
118 	SIGHUP, SIGINT, SIGQUIT, SIGILL,
119 	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
120 	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
121 	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
122 	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
123 	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
124 	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
125 	SIGIO, SIGURG, 0
126 };
127 
128 /*
129  * If FreeBSD & Linux have a difference of opinion about what a trap
130  * means, deal with it here.
131  */
132 static int
133 translate_traps(int signal, int trap_code)
134 {
135 	if (signal != SIGBUS)
136 		return signal;
137 	switch (trap_code) {
138 	case T_PROTFLT:
139 	case T_TSSFLT:
140 	case T_DOUBLEFLT:
141 	case T_PAGEFLT:
142 		return SIGSEGV;
143 	default:
144 		return signal;
145 	}
146 }
147 
148 static int
149 linux_fixup(register_t **stack_base, struct image_params *imgp)
150 {
151 	register_t *argv, *envp;
152 
153 	argv = *stack_base;
154 	envp = *stack_base + (imgp->argc + 1);
155 	(*stack_base)--;
156 	**stack_base = (intptr_t)(void *)envp;
157 	(*stack_base)--;
158 	**stack_base = (intptr_t)(void *)argv;
159 	(*stack_base)--;
160 	**stack_base = imgp->argc;
161 	return 0;
162 }
163 
164 static int
165 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
166 {
167 	Elf32_Auxargs *args = (Elf32_Auxargs *)imgp->auxargs;
168 	register_t *pos;
169 
170 	pos = *stack_base + (imgp->argc + imgp->envc + 2);
171 
172 	if (args->trace) {
173 		AUXARGS_ENTRY(pos, AT_DEBUG, 1);
174 	}
175 	if (args->execfd != -1) {
176 		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
177 	}
178 	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
179 	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
180 	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
181 	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
182 	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
183 	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
184 	AUXARGS_ENTRY(pos, AT_BASE, args->base);
185 	PROC_LOCK(imgp->proc);
186 	AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_cred->p_ruid);
187 	AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_cred->p_svuid);
188 	AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_cred->p_rgid);
189 	AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_cred->p_svgid);
190 	PROC_UNLOCK(imgp->proc);
191 	AUXARGS_ENTRY(pos, AT_NULL, 0);
192 
193 	free(imgp->auxargs, M_TEMP);
194 	imgp->auxargs = NULL;
195 
196 	(*stack_base)--;
197 	**stack_base = (long)imgp->argc;
198 	return 0;
199 }
200 
201 extern int _ucodesel, _udatasel;
202 extern unsigned long _linux_sznonrtsigcode;
203 
204 static void
205 linux_rt_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
206 {
207 	register struct proc *p = curproc;
208 	register struct trapframe *regs;
209 	struct linux_rt_sigframe *fp, frame;
210 	int oonstack;
211 
212 	regs = p->p_md.md_regs;
213 	oonstack = sigonstack(regs->tf_esp);
214 
215 #ifdef DEBUG
216 	printf("Linux-emul(%ld): linux_rt_sendsig(%p, %d, %p, %lu)\n",
217 	    (long)p->p_pid, catcher, sig, (void*)mask, code);
218 #endif
219 	/*
220 	 * Allocate space for the signal handler context.
221 	 */
222 	PROC_LOCK(p);
223 	if ((p->p_flag & P_ALTSTACK) && !oonstack &&
224 	    SIGISMEMBER(p->p_sigacts->ps_sigonstack, sig)) {
225 		fp = (struct linux_rt_sigframe *)(p->p_sigstk.ss_sp +
226 		    p->p_sigstk.ss_size - sizeof(struct linux_rt_sigframe));
227 	} else
228 		fp = (struct linux_rt_sigframe *)regs->tf_esp - 1;
229 	PROC_UNLOCK(p);
230 
231 	/*
232 	 * grow() will return FALSE if the fp will not fit inside the stack
233 	 *	and the stack can not be grown. useracc will return FALSE
234 	 *	if access is denied.
235 	 */
236 	if ((grow_stack (p, (int)fp) == FALSE) ||
237 	    !useracc((caddr_t)fp, sizeof (struct linux_rt_sigframe),
238 	    VM_PROT_WRITE)) {
239 		/*
240 		 * Process has trashed its stack; give it an illegal
241 		 * instruction to halt it in its tracks.
242 		 */
243 		PROC_LOCK(p);
244 		SIGACTION(p, SIGILL) = SIG_DFL;
245 		SIGDELSET(p->p_sigignore, SIGILL);
246 		SIGDELSET(p->p_sigcatch, SIGILL);
247 		SIGDELSET(p->p_sigmask, SIGILL);
248 		PROC_UNLOCK(p);
249 #ifdef DEBUG
250 		printf("Linux-emul(%ld): linux_rt_sendsig -- bad stack %p, "
251 		    "oonstack=%x\n", (long)p->p_pid, fp, oonstack);
252 #endif
253 		psignal(p, SIGILL);
254 		return;
255 	}
256 
257 	/*
258 	 * Build the argument list for the signal handler.
259 	 */
260 	if (p->p_sysent->sv_sigtbl)
261 		if (sig <= p->p_sysent->sv_sigsize)
262 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
263 
264 	frame.sf_handler = catcher;
265 	frame.sf_sig = sig;
266 	frame.sf_siginfo = &fp->sf_si;
267 	frame.sf_ucontext = &fp->sf_sc;
268 
269 	/* Fill siginfo structure. */
270 	frame.sf_si.lsi_signo = sig;
271 	frame.sf_si.lsi_code = code;
272 	frame.sf_si.lsi_addr = (void *)regs->tf_err;
273 
274 	/*
275 	 * Build the signal context to be used by sigreturn.
276 	 */
277 	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
278 	frame.sf_sc.uc_link = NULL;		/* XXX ??? */
279 
280 	PROC_LOCK(p);
281 	frame.sf_sc.uc_stack.ss_sp = p->p_sigstk.ss_sp;
282 	frame.sf_sc.uc_stack.ss_size = p->p_sigstk.ss_size;
283 	frame.sf_sc.uc_stack.ss_flags = (p->p_flag & P_ALTSTACK)
284 	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
285 	PROC_UNLOCK(p);
286 
287 	bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
288 
289 	frame.sf_sc.uc_mcontext.sc_mask   = frame.sf_sc.uc_sigmask.__bits[0];
290 	frame.sf_sc.uc_mcontext.sc_gs     = rgs();
291 	frame.sf_sc.uc_mcontext.sc_fs     = regs->tf_fs;
292 	frame.sf_sc.uc_mcontext.sc_es     = regs->tf_es;
293 	frame.sf_sc.uc_mcontext.sc_ds     = regs->tf_ds;
294 	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_edi;
295 	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_esi;
296 	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_ebp;
297 	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_ebx;
298 	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_edx;
299 	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_ecx;
300 	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_eax;
301 	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_eip;
302 	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
303 	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
304 	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
305 	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
306 	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
307 	frame.sf_sc.uc_mcontext.sc_trapno = code;	/* XXX ???? */
308 
309 #ifdef DEBUG
310 	printf("Linux-emul(%ld): rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, "
311 	    "mask: 0x%x\n", (long)p->p_pid, frame.sf_sc.uc_stack.ss_flags,
312 	    p->p_sigstk.ss_sp, p->p_sigstk.ss_size,
313 	    frame.sf_sc.uc_mcontext.sc_mask);
314 #endif
315 
316 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
317 		/*
318 		 * Process has trashed its stack; give it an illegal
319 		 * instruction to halt it in its tracks.
320 		 */
321 		sigexit(p, SIGILL);
322 		/* NOTREACHED */
323 	}
324 
325 	/*
326 	 * Build context to run handler in.
327 	 */
328 	regs->tf_esp = (int)fp;
329 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
330 	    _linux_sznonrtsigcode;
331 	regs->tf_eflags &= ~PSL_VM;
332 	regs->tf_cs = _ucodesel;
333 	regs->tf_ds = _udatasel;
334 	regs->tf_es = _udatasel;
335 	regs->tf_fs = _udatasel;
336 	load_gs(_udatasel);
337 	regs->tf_ss = _udatasel;
338 }
339 
340 
341 /*
342  * Send an interrupt to process.
343  *
344  * Stack is set up to allow sigcode stored
345  * in u. to call routine, followed by kcall
346  * to sigreturn routine below.  After sigreturn
347  * resets the signal mask, the stack, and the
348  * frame pointer, it returns to the user
349  * specified pc, psl.
350  */
351 
352 static void
353 linux_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
354 {
355 	register struct proc *p = curproc;
356 	register struct trapframe *regs;
357 	struct linux_sigframe *fp, frame;
358 	linux_sigset_t lmask;
359 	int oonstack, i;
360 
361 	if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) {
362 		/* Signal handler installed with SA_SIGINFO. */
363 		linux_rt_sendsig(catcher, sig, mask, code);
364 		return;
365 	}
366 
367 	regs = p->p_md.md_regs;
368 	oonstack = sigonstack(regs->tf_esp);
369 
370 #ifdef DEBUG
371 	printf("Linux-emul(%ld): linux_sendsig(%p, %d, %p, %lu)\n",
372 	    (long)p->p_pid, catcher, sig, (void*)mask, code);
373 #endif
374 
375 	/*
376 	 * Allocate space for the signal handler context.
377 	 */
378 	PROC_LOCK(p);
379 	if ((p->p_flag & P_ALTSTACK) && !oonstack &&
380 	    SIGISMEMBER(p->p_sigacts->ps_sigonstack, sig)) {
381 		fp = (struct linux_sigframe *)(p->p_sigstk.ss_sp +
382 		    p->p_sigstk.ss_size - sizeof(struct linux_sigframe));
383 	} else
384 		fp = (struct linux_sigframe *)regs->tf_esp - 1;
385 	PROC_UNLOCK(p);
386 
387 	/*
388 	 * grow() will return FALSE if the fp will not fit inside the stack
389 	 *	and the stack can not be grown. useracc will return FALSE
390 	 *	if access is denied.
391 	 */
392 	if ((grow_stack (p, (int)fp) == FALSE) ||
393 	    !useracc((caddr_t)fp, sizeof (struct linux_sigframe),
394 	    VM_PROT_WRITE)) {
395 		/*
396 		 * Process has trashed its stack; give it an illegal
397 		 * instruction to halt it in its tracks.
398 		 */
399 		PROC_LOCK(p);
400 		SIGACTION(p, SIGILL) = SIG_DFL;
401 		SIGDELSET(p->p_sigignore, SIGILL);
402 		SIGDELSET(p->p_sigcatch, SIGILL);
403 		SIGDELSET(p->p_sigmask, SIGILL);
404 		PROC_UNLOCK(p);
405 		psignal(p, SIGILL);
406 		return;
407 	}
408 
409 	/*
410 	 * Build the argument list for the signal handler.
411 	 */
412 	if (p->p_sysent->sv_sigtbl)
413 		if (sig <= p->p_sysent->sv_sigsize)
414 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
415 
416 	frame.sf_handler = catcher;
417 	frame.sf_sig = sig;
418 
419 	bsd_to_linux_sigset(mask, &lmask);
420 
421 	/*
422 	 * Build the signal context to be used by sigreturn.
423 	 */
424 	frame.sf_sc.sc_mask   = lmask.__bits[0];
425 	frame.sf_sc.sc_gs     = rgs();
426 	frame.sf_sc.sc_fs     = regs->tf_fs;
427 	frame.sf_sc.sc_es     = regs->tf_es;
428 	frame.sf_sc.sc_ds     = regs->tf_ds;
429 	frame.sf_sc.sc_edi    = regs->tf_edi;
430 	frame.sf_sc.sc_esi    = regs->tf_esi;
431 	frame.sf_sc.sc_ebp    = regs->tf_ebp;
432 	frame.sf_sc.sc_ebx    = regs->tf_ebx;
433 	frame.sf_sc.sc_edx    = regs->tf_edx;
434 	frame.sf_sc.sc_ecx    = regs->tf_ecx;
435 	frame.sf_sc.sc_eax    = regs->tf_eax;
436 	frame.sf_sc.sc_eip    = regs->tf_eip;
437 	frame.sf_sc.sc_cs     = regs->tf_cs;
438 	frame.sf_sc.sc_eflags = regs->tf_eflags;
439 	frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
440 	frame.sf_sc.sc_ss     = regs->tf_ss;
441 	frame.sf_sc.sc_err    = regs->tf_err;
442 	frame.sf_sc.sc_trapno = code;	/* XXX ???? */
443 
444 	bzero(&frame.sf_fpstate, sizeof(struct linux_fpstate));
445 
446 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
447 		frame.sf_extramask[i] = lmask.__bits[i+1];
448 
449 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
450 		/*
451 		 * Process has trashed its stack; give it an illegal
452 		 * instruction to halt it in its tracks.
453 		 */
454 		sigexit(p, SIGILL);
455 		/* NOTREACHED */
456 	}
457 
458 	/*
459 	 * Build context to run handler in.
460 	 */
461 	regs->tf_esp = (int)fp;
462 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
463 	regs->tf_eflags &= ~PSL_VM;
464 	regs->tf_cs = _ucodesel;
465 	regs->tf_ds = _udatasel;
466 	regs->tf_es = _udatasel;
467 	regs->tf_fs = _udatasel;
468 	load_gs(_udatasel);
469 	regs->tf_ss = _udatasel;
470 }
471 
472 /*
473  * System call to cleanup state after a signal
474  * has been taken.  Reset signal mask and
475  * stack state from context left by sendsig (above).
476  * Return to previous pc and psl as specified by
477  * context left by sendsig. Check carefully to
478  * make sure that the user has not modified the
479  * psl to gain improper privileges or to cause
480  * a machine fault.
481  */
482 int
483 linux_sigreturn(p, args)
484 	struct proc *p;
485 	struct linux_sigreturn_args *args;
486 {
487 	struct linux_sigframe frame;
488 	register struct trapframe *regs;
489 	linux_sigset_t lmask;
490 	int eflags, i;
491 
492 	regs = p->p_md.md_regs;
493 
494 #ifdef DEBUG
495 	printf("Linux-emul(%ld): linux_sigreturn(%p)\n",
496 	    (long)p->p_pid, (void *)args->sfp);
497 #endif
498 	/*
499 	 * The trampoline code hands us the sigframe.
500 	 * It is unsafe to keep track of it ourselves, in the event that a
501 	 * program jumps out of a signal handler.
502 	 */
503 	if (copyin((caddr_t)args->sfp, &frame, sizeof(frame)) != 0)
504 		return (EFAULT);
505 
506 	/*
507 	 * Check for security violations.
508 	 */
509 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
510 	eflags = frame.sf_sc.sc_eflags;
511 	/*
512 	 * XXX do allow users to change the privileged flag PSL_RF.  The
513 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
514 	 * sometimes set it there too.  tf_eflags is kept in the signal
515 	 * context during signal handling and there is no other place
516 	 * to remember it, so the PSL_RF bit may be corrupted by the
517 	 * signal handler without us knowing.  Corruption of the PSL_RF
518 	 * bit at worst causes one more or one less debugger trap, so
519 	 * allowing it is fairly harmless.
520 	 */
521 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
522     		return(EINVAL);
523 	}
524 
525 	/*
526 	 * Don't allow users to load a valid privileged %cs.  Let the
527 	 * hardware check for invalid selectors, excess privilege in
528 	 * other selectors, invalid %eip's and invalid %esp's.
529 	 */
530 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
531 	if (!CS_SECURE(frame.sf_sc.sc_cs)) {
532 		trapsignal(p, SIGBUS, T_PROTFLT);
533 		return(EINVAL);
534 	}
535 
536 	lmask.__bits[0] = frame.sf_sc.sc_mask;
537 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
538 		lmask.__bits[i+1] = frame.sf_extramask[i];
539 	PROC_LOCK(p);
540 	linux_to_bsd_sigset(&lmask, &p->p_sigmask);
541 	SIG_CANTMASK(p->p_sigmask);
542 	PROC_UNLOCK(p);
543 
544 	/*
545 	 * Restore signal context.
546 	 */
547 	/* %gs was restored by the trampoline. */
548 	regs->tf_fs     = frame.sf_sc.sc_fs;
549 	regs->tf_es     = frame.sf_sc.sc_es;
550 	regs->tf_ds     = frame.sf_sc.sc_ds;
551 	regs->tf_edi    = frame.sf_sc.sc_edi;
552 	regs->tf_esi    = frame.sf_sc.sc_esi;
553 	regs->tf_ebp    = frame.sf_sc.sc_ebp;
554 	regs->tf_ebx    = frame.sf_sc.sc_ebx;
555 	regs->tf_edx    = frame.sf_sc.sc_edx;
556 	regs->tf_ecx    = frame.sf_sc.sc_ecx;
557 	regs->tf_eax    = frame.sf_sc.sc_eax;
558 	regs->tf_eip    = frame.sf_sc.sc_eip;
559 	regs->tf_cs     = frame.sf_sc.sc_cs;
560 	regs->tf_eflags = eflags;
561 	regs->tf_esp    = frame.sf_sc.sc_esp_at_signal;
562 	regs->tf_ss     = frame.sf_sc.sc_ss;
563 
564 	return (EJUSTRETURN);
565 }
566 
567 /*
568  * System call to cleanup state after a signal
569  * has been taken.  Reset signal mask and
570  * stack state from context left by rt_sendsig (above).
571  * Return to previous pc and psl as specified by
572  * context left by sendsig. Check carefully to
573  * make sure that the user has not modified the
574  * psl to gain improper privileges or to cause
575  * a machine fault.
576  */
577 int
578 linux_rt_sigreturn(p, args)
579 	struct proc *p;
580 	struct linux_rt_sigreturn_args *args;
581 {
582 	struct sigaltstack_args sasargs;
583 	struct linux_ucontext 	 uc;
584 	struct linux_sigcontext *context;
585 	linux_stack_t *lss;
586 	stack_t *ss;
587 	register struct trapframe *regs;
588 	int eflags;
589 	caddr_t sg = stackgap_init();
590 
591 	regs = p->p_md.md_regs;
592 
593 #ifdef DEBUG
594 	printf("Linux-emul(%ld): linux_rt_sigreturn(%p)\n",
595 	    (long)p->p_pid, (void *)args->ucp);
596 #endif
597 	/*
598 	 * The trampoline code hands us the ucontext.
599 	 * It is unsafe to keep track of it ourselves, in the event that a
600 	 * program jumps out of a signal handler.
601 	 */
602 	if (copyin((caddr_t)args->ucp, &uc, sizeof(uc)) != 0)
603 		return (EFAULT);
604 
605 	context = &uc.uc_mcontext;
606 
607 	/*
608 	 * Check for security violations.
609 	 */
610 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
611 	eflags = context->sc_eflags;
612 	/*
613 	 * XXX do allow users to change the privileged flag PSL_RF.  The
614 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
615 	 * sometimes set it there too.  tf_eflags is kept in the signal
616 	 * context during signal handling and there is no other place
617 	 * to remember it, so the PSL_RF bit may be corrupted by the
618 	 * signal handler without us knowing.  Corruption of the PSL_RF
619 	 * bit at worst causes one more or one less debugger trap, so
620 	 * allowing it is fairly harmless.
621 	 */
622 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
623     		return(EINVAL);
624 	}
625 
626 	/*
627 	 * Don't allow users to load a valid privileged %cs.  Let the
628 	 * hardware check for invalid selectors, excess privilege in
629 	 * other selectors, invalid %eip's and invalid %esp's.
630 	 */
631 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
632 	if (!CS_SECURE(context->sc_cs)) {
633 		trapsignal(p, SIGBUS, T_PROTFLT);
634 		return(EINVAL);
635 	}
636 
637 	PROC_LOCK(p);
638 	linux_to_bsd_sigset(&uc.uc_sigmask, &p->p_sigmask);
639 	SIG_CANTMASK(p->p_sigmask);
640 	PROC_UNLOCK(p);
641 
642 	/*
643 	 * Restore signal context
644 	 */
645 	/* %gs was restored by the trampoline. */
646 	regs->tf_fs     = context->sc_fs;
647 	regs->tf_es     = context->sc_es;
648 	regs->tf_ds     = context->sc_ds;
649 	regs->tf_edi    = context->sc_edi;
650 	regs->tf_esi    = context->sc_esi;
651 	regs->tf_ebp    = context->sc_ebp;
652 	regs->tf_ebx    = context->sc_ebx;
653 	regs->tf_edx    = context->sc_edx;
654 	regs->tf_ecx    = context->sc_ecx;
655 	regs->tf_eax    = context->sc_eax;
656 	regs->tf_eip    = context->sc_eip;
657 	regs->tf_cs     = context->sc_cs;
658 	regs->tf_eflags = eflags;
659 	regs->tf_esp    = context->sc_esp_at_signal;
660 	regs->tf_ss     = context->sc_ss;
661 
662 	/*
663 	 * call sigaltstack & ignore results..
664 	 */
665 	ss = stackgap_alloc(&sg, sizeof(stack_t));
666 	lss = &uc.uc_stack;
667 	ss->ss_sp = lss->ss_sp;
668 	ss->ss_size = lss->ss_size;
669 	ss->ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
670 
671 #ifdef DEBUG
672 	printf("Linux-emul(%ld): rt_sigret  flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x\n",
673 	    (long)p->p_pid, ss->ss_flags, ss->ss_sp, ss->ss_size, context->sc_mask);
674 #endif
675 	sasargs.ss = ss;
676 	sasargs.oss = NULL;
677 	(void) sigaltstack(p, &sasargs);
678 
679 	return (EJUSTRETURN);
680 }
681 
682 static void
683 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
684 {
685 	args[0] = tf->tf_ebx;
686 	args[1] = tf->tf_ecx;
687 	args[2] = tf->tf_edx;
688 	args[3] = tf->tf_esi;
689 	args[4] = tf->tf_edi;
690 	*params = NULL;		/* no copyin */
691 }
692 
693 /*
694  * If a linux binary is exec'ing something, try this image activator
695  * first.  We override standard shell script execution in order to
696  * be able to modify the interpreter path.  We only do this if a linux
697  * binary is doing the exec, so we do not create an EXEC module for it.
698  */
699 static int	exec_linux_imgact_try __P((struct image_params *iparams));
700 
701 static int
702 exec_linux_imgact_try(imgp)
703     struct image_params *imgp;
704 {
705     const char *head = (const char *)imgp->image_header;
706     int error = -1;
707 
708     /*
709      * The interpreter for shell scripts run from a linux binary needs
710      * to be located in /compat/linux if possible in order to recursively
711      * maintain linux path emulation.
712      */
713     if (((const short *)head)[0] == SHELLMAGIC) {
714 	    /*
715 	     * Run our normal shell image activator.  If it succeeds attempt
716 	     * to use the alternate path for the interpreter.  If an alternate
717 	     * path is found, use our stringspace to store it.
718 	     */
719 	    if ((error = exec_shell_imgact(imgp)) == 0) {
720 		    char *rpath = NULL;
721 
722 		    linux_emul_find(imgp->proc, NULL, linux_emul_path,
723 			imgp->interpreter_name, &rpath, 0);
724 		    if (rpath != imgp->interpreter_name) {
725 			    int len = strlen(rpath) + 1;
726 
727 			    if (len <= MAXSHELLCMDLEN) {
728 				memcpy(imgp->interpreter_name, rpath, len);
729 			    }
730 			    free(rpath, M_TEMP);
731 		    }
732 	    }
733     }
734     return(error);
735 }
736 
737 struct sysentvec linux_sysvec = {
738 	LINUX_SYS_MAXSYSCALL,
739 	linux_sysent,
740 	0xff,
741 	LINUX_SIGTBLSZ,
742 	bsd_to_linux_signal,
743 	ELAST + 1,
744 	bsd_to_linux_errno,
745 	translate_traps,
746 	linux_fixup,
747 	linux_sendsig,
748 	linux_sigcode,
749 	&linux_szsigcode,
750 	linux_prepsyscall,
751 	"Linux a.out",
752 	aout_coredump,
753 	exec_linux_imgact_try,
754 	LINUX_MINSIGSTKSZ
755 };
756 
757 struct sysentvec elf_linux_sysvec = {
758 	LINUX_SYS_MAXSYSCALL,
759 	linux_sysent,
760 	0xff,
761 	LINUX_SIGTBLSZ,
762 	bsd_to_linux_signal,
763 	ELAST + 1,
764 	bsd_to_linux_errno,
765 	translate_traps,
766 	elf_linux_fixup,
767 	linux_sendsig,
768 	linux_sigcode,
769 	&linux_szsigcode,
770 	linux_prepsyscall,
771 	"Linux ELF",
772 	elf_coredump,
773 	exec_linux_imgact_try,
774 	LINUX_MINSIGSTKSZ
775 };
776 
777 static Elf32_Brandinfo linux_brand = {
778 					ELFOSABI_LINUX,
779 					"/compat/linux",
780 					"/lib/ld-linux.so.1",
781 					&elf_linux_sysvec
782 				 };
783 
784 static Elf32_Brandinfo linux_glibc2brand = {
785 					ELFOSABI_LINUX,
786 					"/compat/linux",
787 					"/lib/ld-linux.so.2",
788 					&elf_linux_sysvec
789 				 };
790 
791 Elf32_Brandinfo *linux_brandlist[] = {
792 					&linux_brand,
793 					&linux_glibc2brand,
794 					NULL
795 				};
796 
797 static int
798 linux_elf_modevent(module_t mod, int type, void *data)
799 {
800 	Elf32_Brandinfo **brandinfo;
801 	int error;
802 
803 	error = 0;
804 
805 	switch(type) {
806 	case MOD_LOAD:
807 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
808 		     ++brandinfo)
809 			if (elf_insert_brand_entry(*brandinfo) < 0)
810 				error = EINVAL;
811 		if (error == 0) {
812 			linux_ioctl_register_handlers(
813 				&linux_ioctl_handler_set);
814 			if (bootverbose)
815 				printf("Linux ELF exec handler installed\n");
816 		} else
817 			printf("cannot insert Linux ELF brand handler\n");
818 		break;
819 	case MOD_UNLOAD:
820 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
821 		     ++brandinfo)
822 			if (elf_brand_inuse(*brandinfo))
823 				error = EBUSY;
824 		if (error == 0) {
825 			for (brandinfo = &linux_brandlist[0];
826 			     *brandinfo != NULL; ++brandinfo)
827 				if (elf_remove_brand_entry(*brandinfo) < 0)
828 					error = EINVAL;
829 		}
830 		if (error == 0) {
831 			linux_ioctl_unregister_handlers(
832 				&linux_ioctl_handler_set);
833 			if (bootverbose)
834 				printf("Linux ELF exec handler removed\n");
835 		} else
836 			printf("Could not deinstall ELF interpreter entry\n");
837 		break;
838 	default:
839 		break;
840 	}
841 	return error;
842 }
843 
844 static moduledata_t linux_elf_mod = {
845 	"linuxelf",
846 	linux_elf_modevent,
847 	0
848 };
849 
850 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
851