xref: /freebsd/sys/i386/linux/linux_sysvec.c (revision 6de306ecee3831f48debaad1d0b22418faa48e10)
1 /*-
2  * Copyright (c) 1994-1996 S�ren Schmidt
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software withough specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30 
31 /* XXX we use functions that might not exist. */
32 #include "opt_compat.h"
33 
34 #ifndef COMPAT_43
35 #error "Unable to compile Linux-emulator due to missing COMPAT_43 option!"
36 #endif
37 
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/proc.h>
41 #include <sys/sysproto.h>
42 #include <sys/sysent.h>
43 #include <sys/imgact.h>
44 #include <sys/imgact_aout.h>
45 #include <sys/imgact_elf.h>
46 #include <sys/signalvar.h>
47 #include <sys/malloc.h>
48 #include <vm/vm.h>
49 #include <vm/vm_param.h>
50 #include <vm/vm_page.h>
51 #include <vm/vm_extern.h>
52 #include <sys/exec.h>
53 #include <sys/kernel.h>
54 #include <sys/module.h>
55 #include <machine/cpu.h>
56 
57 #include <i386/linux/linux.h>
58 #include <i386/linux/linux_proto.h>
59 #include <compat/linux/linux_signal.h>
60 #include <compat/linux/linux_util.h>
61 
62 MODULE_VERSION(linux, 1);
63 MODULE_DEPEND(linux, sysvmsg, 1, 1, 1);
64 MODULE_DEPEND(linux, sysvsem, 1, 1, 1);
65 MODULE_DEPEND(linux, sysvshm, 1, 1, 1);
66 
67 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
68 
69 #if BYTE_ORDER == LITTLE_ENDIAN
70 #define SHELLMAGIC      0x2123 /* #! */
71 #else
72 #define SHELLMAGIC      0x2321
73 #endif
74 
75 extern char linux_sigcode[];
76 extern int linux_szsigcode;
77 
78 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
79 
80 extern struct linker_set linux_ioctl_handler_set;
81 
82 static int	linux_fixup __P((register_t **stack_base,
83 				 struct image_params *iparams));
84 static int	elf_linux_fixup __P((register_t **stack_base,
85 				     struct image_params *iparams));
86 static void	linux_prepsyscall __P((struct trapframe *tf, int *args,
87 				       u_int *code, caddr_t *params));
88 static void     linux_sendsig __P((sig_t catcher, int sig, sigset_t *mask,
89 				   u_long code));
90 
91 /*
92  * Linux syscalls return negative errno's, we do positive and map them
93  */
94 static int bsd_to_linux_errno[ELAST + 1] = {
95   	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
96  	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
97  	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
98  	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
99  	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
100 	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
101 	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
102 	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
103   	-6, -6, -43, -42, -75, -6, -84
104 };
105 
106 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
107 	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
108 	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
109 	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, 0,
110 	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
111 	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
112 	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
113 	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
114 	0, LINUX_SIGUSR1, LINUX_SIGUSR2
115 };
116 
117 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
118 	SIGHUP, SIGINT, SIGQUIT, SIGILL,
119 	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
120 	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
121 	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
122 	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
123 	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
124 	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
125 	SIGIO, SIGURG, 0
126 };
127 
128 /*
129  * If FreeBSD & Linux have a difference of opinion about what a trap
130  * means, deal with it here.
131  */
132 static int
133 translate_traps(int signal, int trap_code)
134 {
135 	if (signal != SIGBUS)
136 		return signal;
137 	switch (trap_code) {
138 	case T_PROTFLT:
139 	case T_TSSFLT:
140 	case T_DOUBLEFLT:
141 	case T_PAGEFLT:
142 		return SIGSEGV;
143 	default:
144 		return signal;
145 	}
146 }
147 
148 static int
149 linux_fixup(register_t **stack_base, struct image_params *imgp)
150 {
151 	register_t *argv, *envp;
152 
153 	argv = *stack_base;
154 	envp = *stack_base + (imgp->argc + 1);
155 	(*stack_base)--;
156 	**stack_base = (intptr_t)(void *)envp;
157 	(*stack_base)--;
158 	**stack_base = (intptr_t)(void *)argv;
159 	(*stack_base)--;
160 	**stack_base = imgp->argc;
161 	return 0;
162 }
163 
164 static int
165 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
166 {
167 	Elf32_Auxargs *args = (Elf32_Auxargs *)imgp->auxargs;
168 	register_t *pos;
169 
170 	pos = *stack_base + (imgp->argc + imgp->envc + 2);
171 
172 	if (args->trace) {
173 		AUXARGS_ENTRY(pos, AT_DEBUG, 1);
174 	}
175 	if (args->execfd != -1) {
176 		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
177 	}
178 	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
179 	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
180 	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
181 	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
182 	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
183 	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
184 	AUXARGS_ENTRY(pos, AT_BASE, args->base);
185 	PROC_LOCK(imgp->proc);
186 	AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_cred->p_ruid);
187 	AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_cred->p_svuid);
188 	AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_cred->p_rgid);
189 	AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_cred->p_svgid);
190 	PROC_UNLOCK(imgp->proc);
191 	AUXARGS_ENTRY(pos, AT_NULL, 0);
192 
193 	free(imgp->auxargs, M_TEMP);
194 	imgp->auxargs = NULL;
195 
196 	(*stack_base)--;
197 	**stack_base = (long)imgp->argc;
198 	return 0;
199 }
200 
201 extern int _ucodesel, _udatasel;
202 extern unsigned long _linux_sznonrtsigcode;
203 
204 static void
205 linux_rt_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
206 {
207 	register struct proc *p = curproc;
208 	register struct trapframe *regs;
209 	struct linux_rt_sigframe *fp, frame;
210 	int oonstack;
211 
212 	regs = p->p_md.md_regs;
213 	oonstack = sigonstack(regs->tf_esp);
214 
215 #ifdef DEBUG
216 	if (ldebug(sigreturn))
217 		printf(ARGS(rt_sendsig, "%p, %d, %p, %lu"),
218 		    catcher, sig, (void*)mask, code);
219 #endif
220 	/*
221 	 * Allocate space for the signal handler context.
222 	 */
223 	PROC_LOCK(p);
224 	if ((p->p_flag & P_ALTSTACK) && !oonstack &&
225 	    SIGISMEMBER(p->p_sigacts->ps_sigonstack, sig)) {
226 		fp = (struct linux_rt_sigframe *)(p->p_sigstk.ss_sp +
227 		    p->p_sigstk.ss_size - sizeof(struct linux_rt_sigframe));
228 	} else
229 		fp = (struct linux_rt_sigframe *)regs->tf_esp - 1;
230 	PROC_UNLOCK(p);
231 
232 	/*
233 	 * grow() will return FALSE if the fp will not fit inside the stack
234 	 *	and the stack can not be grown. useracc will return FALSE
235 	 *	if access is denied.
236 	 */
237 	if ((grow_stack (p, (int)fp) == FALSE) ||
238 	    !useracc((caddr_t)fp, sizeof (struct linux_rt_sigframe),
239 	    VM_PROT_WRITE)) {
240 		/*
241 		 * Process has trashed its stack; give it an illegal
242 		 * instruction to halt it in its tracks.
243 		 */
244 		PROC_LOCK(p);
245 		SIGACTION(p, SIGILL) = SIG_DFL;
246 		SIGDELSET(p->p_sigignore, SIGILL);
247 		SIGDELSET(p->p_sigcatch, SIGILL);
248 		SIGDELSET(p->p_sigmask, SIGILL);
249 		PROC_UNLOCK(p);
250 #ifdef DEBUG
251 		if (ldebug(sigreturn))
252 			printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
253 			    fp, oonstack);
254 #endif
255 		psignal(p, SIGILL);
256 		return;
257 	}
258 
259 	/*
260 	 * Build the argument list for the signal handler.
261 	 */
262 	if (p->p_sysent->sv_sigtbl)
263 		if (sig <= p->p_sysent->sv_sigsize)
264 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
265 
266 	frame.sf_handler = catcher;
267 	frame.sf_sig = sig;
268 	frame.sf_siginfo = &fp->sf_si;
269 	frame.sf_ucontext = &fp->sf_sc;
270 
271 	/* Fill siginfo structure. */
272 	frame.sf_si.lsi_signo = sig;
273 	frame.sf_si.lsi_code = code;
274 	frame.sf_si.lsi_addr = (void *)regs->tf_err;
275 
276 	/*
277 	 * Build the signal context to be used by sigreturn.
278 	 */
279 	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
280 	frame.sf_sc.uc_link = NULL;		/* XXX ??? */
281 
282 	PROC_LOCK(p);
283 	frame.sf_sc.uc_stack.ss_sp = p->p_sigstk.ss_sp;
284 	frame.sf_sc.uc_stack.ss_size = p->p_sigstk.ss_size;
285 	frame.sf_sc.uc_stack.ss_flags = (p->p_flag & P_ALTSTACK)
286 	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
287 	PROC_UNLOCK(p);
288 
289 	bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
290 
291 	frame.sf_sc.uc_mcontext.sc_mask   = frame.sf_sc.uc_sigmask.__bits[0];
292 	frame.sf_sc.uc_mcontext.sc_gs     = rgs();
293 	frame.sf_sc.uc_mcontext.sc_fs     = regs->tf_fs;
294 	frame.sf_sc.uc_mcontext.sc_es     = regs->tf_es;
295 	frame.sf_sc.uc_mcontext.sc_ds     = regs->tf_ds;
296 	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_edi;
297 	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_esi;
298 	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_ebp;
299 	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_ebx;
300 	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_edx;
301 	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_ecx;
302 	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_eax;
303 	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_eip;
304 	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
305 	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
306 	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
307 	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
308 	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
309 	frame.sf_sc.uc_mcontext.sc_trapno = code;	/* XXX ???? */
310 
311 #ifdef DEBUG
312 	if (ldebug(sigreturn))
313 		printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
314 		    frame.sf_sc.uc_stack.ss_flags, p->p_sigstk.ss_sp,
315 		    p->p_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
316 #endif
317 
318 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
319 		/*
320 		 * Process has trashed its stack; give it an illegal
321 		 * instruction to halt it in its tracks.
322 		 */
323 		sigexit(p, SIGILL);
324 		/* NOTREACHED */
325 	}
326 
327 	/*
328 	 * Build context to run handler in.
329 	 */
330 	regs->tf_esp = (int)fp;
331 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
332 	    _linux_sznonrtsigcode;
333 	regs->tf_eflags &= ~PSL_VM;
334 	regs->tf_cs = _ucodesel;
335 	regs->tf_ds = _udatasel;
336 	regs->tf_es = _udatasel;
337 	regs->tf_fs = _udatasel;
338 	load_gs(_udatasel);
339 	regs->tf_ss = _udatasel;
340 }
341 
342 
343 /*
344  * Send an interrupt to process.
345  *
346  * Stack is set up to allow sigcode stored
347  * in u. to call routine, followed by kcall
348  * to sigreturn routine below.  After sigreturn
349  * resets the signal mask, the stack, and the
350  * frame pointer, it returns to the user
351  * specified pc, psl.
352  */
353 
354 static void
355 linux_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
356 {
357 	register struct proc *p = curproc;
358 	register struct trapframe *regs;
359 	struct linux_sigframe *fp, frame;
360 	linux_sigset_t lmask;
361 	int oonstack, i;
362 
363 	if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) {
364 		/* Signal handler installed with SA_SIGINFO. */
365 		linux_rt_sendsig(catcher, sig, mask, code);
366 		return;
367 	}
368 
369 	regs = p->p_md.md_regs;
370 	oonstack = sigonstack(regs->tf_esp);
371 
372 #ifdef DEBUG
373 	if (ldebug(sigreturn))
374 		printf(ARGS(sendsig, "%p, %d, %p, %lu"),
375 		    catcher, sig, (void*)mask, code);
376 #endif
377 
378 	/*
379 	 * Allocate space for the signal handler context.
380 	 */
381 	PROC_LOCK(p);
382 	if ((p->p_flag & P_ALTSTACK) && !oonstack &&
383 	    SIGISMEMBER(p->p_sigacts->ps_sigonstack, sig)) {
384 		fp = (struct linux_sigframe *)(p->p_sigstk.ss_sp +
385 		    p->p_sigstk.ss_size - sizeof(struct linux_sigframe));
386 	} else
387 		fp = (struct linux_sigframe *)regs->tf_esp - 1;
388 	PROC_UNLOCK(p);
389 
390 	/*
391 	 * grow() will return FALSE if the fp will not fit inside the stack
392 	 *	and the stack can not be grown. useracc will return FALSE
393 	 *	if access is denied.
394 	 */
395 	if ((grow_stack (p, (int)fp) == FALSE) ||
396 	    !useracc((caddr_t)fp, sizeof (struct linux_sigframe),
397 	    VM_PROT_WRITE)) {
398 		/*
399 		 * Process has trashed its stack; give it an illegal
400 		 * instruction to halt it in its tracks.
401 		 */
402 		PROC_LOCK(p);
403 		SIGACTION(p, SIGILL) = SIG_DFL;
404 		SIGDELSET(p->p_sigignore, SIGILL);
405 		SIGDELSET(p->p_sigcatch, SIGILL);
406 		SIGDELSET(p->p_sigmask, SIGILL);
407 		PROC_UNLOCK(p);
408 		psignal(p, SIGILL);
409 		return;
410 	}
411 
412 	/*
413 	 * Build the argument list for the signal handler.
414 	 */
415 	if (p->p_sysent->sv_sigtbl)
416 		if (sig <= p->p_sysent->sv_sigsize)
417 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
418 
419 	frame.sf_handler = catcher;
420 	frame.sf_sig = sig;
421 
422 	bsd_to_linux_sigset(mask, &lmask);
423 
424 	/*
425 	 * Build the signal context to be used by sigreturn.
426 	 */
427 	frame.sf_sc.sc_mask   = lmask.__bits[0];
428 	frame.sf_sc.sc_gs     = rgs();
429 	frame.sf_sc.sc_fs     = regs->tf_fs;
430 	frame.sf_sc.sc_es     = regs->tf_es;
431 	frame.sf_sc.sc_ds     = regs->tf_ds;
432 	frame.sf_sc.sc_edi    = regs->tf_edi;
433 	frame.sf_sc.sc_esi    = regs->tf_esi;
434 	frame.sf_sc.sc_ebp    = regs->tf_ebp;
435 	frame.sf_sc.sc_ebx    = regs->tf_ebx;
436 	frame.sf_sc.sc_edx    = regs->tf_edx;
437 	frame.sf_sc.sc_ecx    = regs->tf_ecx;
438 	frame.sf_sc.sc_eax    = regs->tf_eax;
439 	frame.sf_sc.sc_eip    = regs->tf_eip;
440 	frame.sf_sc.sc_cs     = regs->tf_cs;
441 	frame.sf_sc.sc_eflags = regs->tf_eflags;
442 	frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
443 	frame.sf_sc.sc_ss     = regs->tf_ss;
444 	frame.sf_sc.sc_err    = regs->tf_err;
445 	frame.sf_sc.sc_trapno = code;	/* XXX ???? */
446 
447 	bzero(&frame.sf_fpstate, sizeof(struct linux_fpstate));
448 
449 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
450 		frame.sf_extramask[i] = lmask.__bits[i+1];
451 
452 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
453 		/*
454 		 * Process has trashed its stack; give it an illegal
455 		 * instruction to halt it in its tracks.
456 		 */
457 		sigexit(p, SIGILL);
458 		/* NOTREACHED */
459 	}
460 
461 	/*
462 	 * Build context to run handler in.
463 	 */
464 	regs->tf_esp = (int)fp;
465 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
466 	regs->tf_eflags &= ~PSL_VM;
467 	regs->tf_cs = _ucodesel;
468 	regs->tf_ds = _udatasel;
469 	regs->tf_es = _udatasel;
470 	regs->tf_fs = _udatasel;
471 	load_gs(_udatasel);
472 	regs->tf_ss = _udatasel;
473 }
474 
475 /*
476  * System call to cleanup state after a signal
477  * has been taken.  Reset signal mask and
478  * stack state from context left by sendsig (above).
479  * Return to previous pc and psl as specified by
480  * context left by sendsig. Check carefully to
481  * make sure that the user has not modified the
482  * psl to gain improper privileges or to cause
483  * a machine fault.
484  */
485 int
486 linux_sigreturn(p, args)
487 	struct proc *p;
488 	struct linux_sigreturn_args *args;
489 {
490 	struct linux_sigframe frame;
491 	register struct trapframe *regs;
492 	linux_sigset_t lmask;
493 	int eflags, i;
494 
495 	regs = p->p_md.md_regs;
496 
497 #ifdef DEBUG
498 	if (ldebug(sigreturn))
499 		printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
500 #endif
501 	/*
502 	 * The trampoline code hands us the sigframe.
503 	 * It is unsafe to keep track of it ourselves, in the event that a
504 	 * program jumps out of a signal handler.
505 	 */
506 	if (copyin((caddr_t)args->sfp, &frame, sizeof(frame)) != 0)
507 		return (EFAULT);
508 
509 	/*
510 	 * Check for security violations.
511 	 */
512 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
513 	eflags = frame.sf_sc.sc_eflags;
514 	/*
515 	 * XXX do allow users to change the privileged flag PSL_RF.  The
516 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
517 	 * sometimes set it there too.  tf_eflags is kept in the signal
518 	 * context during signal handling and there is no other place
519 	 * to remember it, so the PSL_RF bit may be corrupted by the
520 	 * signal handler without us knowing.  Corruption of the PSL_RF
521 	 * bit at worst causes one more or one less debugger trap, so
522 	 * allowing it is fairly harmless.
523 	 */
524 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
525     		return(EINVAL);
526 	}
527 
528 	/*
529 	 * Don't allow users to load a valid privileged %cs.  Let the
530 	 * hardware check for invalid selectors, excess privilege in
531 	 * other selectors, invalid %eip's and invalid %esp's.
532 	 */
533 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
534 	if (!CS_SECURE(frame.sf_sc.sc_cs)) {
535 		trapsignal(p, SIGBUS, T_PROTFLT);
536 		return(EINVAL);
537 	}
538 
539 	lmask.__bits[0] = frame.sf_sc.sc_mask;
540 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
541 		lmask.__bits[i+1] = frame.sf_extramask[i];
542 	PROC_LOCK(p);
543 	linux_to_bsd_sigset(&lmask, &p->p_sigmask);
544 	SIG_CANTMASK(p->p_sigmask);
545 	PROC_UNLOCK(p);
546 
547 	/*
548 	 * Restore signal context.
549 	 */
550 	/* %gs was restored by the trampoline. */
551 	regs->tf_fs     = frame.sf_sc.sc_fs;
552 	regs->tf_es     = frame.sf_sc.sc_es;
553 	regs->tf_ds     = frame.sf_sc.sc_ds;
554 	regs->tf_edi    = frame.sf_sc.sc_edi;
555 	regs->tf_esi    = frame.sf_sc.sc_esi;
556 	regs->tf_ebp    = frame.sf_sc.sc_ebp;
557 	regs->tf_ebx    = frame.sf_sc.sc_ebx;
558 	regs->tf_edx    = frame.sf_sc.sc_edx;
559 	regs->tf_ecx    = frame.sf_sc.sc_ecx;
560 	regs->tf_eax    = frame.sf_sc.sc_eax;
561 	regs->tf_eip    = frame.sf_sc.sc_eip;
562 	regs->tf_cs     = frame.sf_sc.sc_cs;
563 	regs->tf_eflags = eflags;
564 	regs->tf_esp    = frame.sf_sc.sc_esp_at_signal;
565 	regs->tf_ss     = frame.sf_sc.sc_ss;
566 
567 	return (EJUSTRETURN);
568 }
569 
570 /*
571  * System call to cleanup state after a signal
572  * has been taken.  Reset signal mask and
573  * stack state from context left by rt_sendsig (above).
574  * Return to previous pc and psl as specified by
575  * context left by sendsig. Check carefully to
576  * make sure that the user has not modified the
577  * psl to gain improper privileges or to cause
578  * a machine fault.
579  */
580 int
581 linux_rt_sigreturn(p, args)
582 	struct proc *p;
583 	struct linux_rt_sigreturn_args *args;
584 {
585 	struct sigaltstack_args sasargs;
586 	struct linux_ucontext 	 uc;
587 	struct linux_sigcontext *context;
588 	linux_stack_t *lss;
589 	stack_t *ss;
590 	register struct trapframe *regs;
591 	int eflags;
592 	caddr_t sg = stackgap_init();
593 
594 	regs = p->p_md.md_regs;
595 
596 #ifdef DEBUG
597 	if (ldebug(rt_sigreturn))
598 		printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
599 #endif
600 	/*
601 	 * The trampoline code hands us the ucontext.
602 	 * It is unsafe to keep track of it ourselves, in the event that a
603 	 * program jumps out of a signal handler.
604 	 */
605 	if (copyin((caddr_t)args->ucp, &uc, sizeof(uc)) != 0)
606 		return (EFAULT);
607 
608 	context = &uc.uc_mcontext;
609 
610 	/*
611 	 * Check for security violations.
612 	 */
613 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
614 	eflags = context->sc_eflags;
615 	/*
616 	 * XXX do allow users to change the privileged flag PSL_RF.  The
617 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
618 	 * sometimes set it there too.  tf_eflags is kept in the signal
619 	 * context during signal handling and there is no other place
620 	 * to remember it, so the PSL_RF bit may be corrupted by the
621 	 * signal handler without us knowing.  Corruption of the PSL_RF
622 	 * bit at worst causes one more or one less debugger trap, so
623 	 * allowing it is fairly harmless.
624 	 */
625 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
626     		return(EINVAL);
627 	}
628 
629 	/*
630 	 * Don't allow users to load a valid privileged %cs.  Let the
631 	 * hardware check for invalid selectors, excess privilege in
632 	 * other selectors, invalid %eip's and invalid %esp's.
633 	 */
634 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
635 	if (!CS_SECURE(context->sc_cs)) {
636 		trapsignal(p, SIGBUS, T_PROTFLT);
637 		return(EINVAL);
638 	}
639 
640 	PROC_LOCK(p);
641 	linux_to_bsd_sigset(&uc.uc_sigmask, &p->p_sigmask);
642 	SIG_CANTMASK(p->p_sigmask);
643 	PROC_UNLOCK(p);
644 
645 	/*
646 	 * Restore signal context
647 	 */
648 	/* %gs was restored by the trampoline. */
649 	regs->tf_fs     = context->sc_fs;
650 	regs->tf_es     = context->sc_es;
651 	regs->tf_ds     = context->sc_ds;
652 	regs->tf_edi    = context->sc_edi;
653 	regs->tf_esi    = context->sc_esi;
654 	regs->tf_ebp    = context->sc_ebp;
655 	regs->tf_ebx    = context->sc_ebx;
656 	regs->tf_edx    = context->sc_edx;
657 	regs->tf_ecx    = context->sc_ecx;
658 	regs->tf_eax    = context->sc_eax;
659 	regs->tf_eip    = context->sc_eip;
660 	regs->tf_cs     = context->sc_cs;
661 	regs->tf_eflags = eflags;
662 	regs->tf_esp    = context->sc_esp_at_signal;
663 	regs->tf_ss     = context->sc_ss;
664 
665 	/*
666 	 * call sigaltstack & ignore results..
667 	 */
668 	ss = stackgap_alloc(&sg, sizeof(stack_t));
669 	lss = &uc.uc_stack;
670 	ss->ss_sp = lss->ss_sp;
671 	ss->ss_size = lss->ss_size;
672 	ss->ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
673 
674 #ifdef DEBUG
675 	if (ldebug(rt_sigreturn))
676 		printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
677 		    ss->ss_flags, ss->ss_sp, ss->ss_size, context->sc_mask);
678 #endif
679 	sasargs.ss = ss;
680 	sasargs.oss = NULL;
681 	(void) sigaltstack(p, &sasargs);
682 
683 	return (EJUSTRETURN);
684 }
685 
686 static void
687 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
688 {
689 	args[0] = tf->tf_ebx;
690 	args[1] = tf->tf_ecx;
691 	args[2] = tf->tf_edx;
692 	args[3] = tf->tf_esi;
693 	args[4] = tf->tf_edi;
694 	*params = NULL;		/* no copyin */
695 }
696 
697 /*
698  * If a linux binary is exec'ing something, try this image activator
699  * first.  We override standard shell script execution in order to
700  * be able to modify the interpreter path.  We only do this if a linux
701  * binary is doing the exec, so we do not create an EXEC module for it.
702  */
703 static int	exec_linux_imgact_try __P((struct image_params *iparams));
704 
705 static int
706 exec_linux_imgact_try(imgp)
707     struct image_params *imgp;
708 {
709     const char *head = (const char *)imgp->image_header;
710     int error = -1;
711 
712     /*
713      * The interpreter for shell scripts run from a linux binary needs
714      * to be located in /compat/linux if possible in order to recursively
715      * maintain linux path emulation.
716      */
717     if (((const short *)head)[0] == SHELLMAGIC) {
718 	    /*
719 	     * Run our normal shell image activator.  If it succeeds attempt
720 	     * to use the alternate path for the interpreter.  If an alternate
721 	     * path is found, use our stringspace to store it.
722 	     */
723 	    if ((error = exec_shell_imgact(imgp)) == 0) {
724 		    char *rpath = NULL;
725 
726 		    linux_emul_find(imgp->proc, NULL, linux_emul_path,
727 			imgp->interpreter_name, &rpath, 0);
728 		    if (rpath != imgp->interpreter_name) {
729 			    int len = strlen(rpath) + 1;
730 
731 			    if (len <= MAXSHELLCMDLEN) {
732 				memcpy(imgp->interpreter_name, rpath, len);
733 			    }
734 			    free(rpath, M_TEMP);
735 		    }
736 	    }
737     }
738     return(error);
739 }
740 
741 struct sysentvec linux_sysvec = {
742 	LINUX_SYS_MAXSYSCALL,
743 	linux_sysent,
744 	0xff,
745 	LINUX_SIGTBLSZ,
746 	bsd_to_linux_signal,
747 	ELAST + 1,
748 	bsd_to_linux_errno,
749 	translate_traps,
750 	linux_fixup,
751 	linux_sendsig,
752 	linux_sigcode,
753 	&linux_szsigcode,
754 	linux_prepsyscall,
755 	"Linux a.out",
756 	aout_coredump,
757 	exec_linux_imgact_try,
758 	LINUX_MINSIGSTKSZ
759 };
760 
761 struct sysentvec elf_linux_sysvec = {
762 	LINUX_SYS_MAXSYSCALL,
763 	linux_sysent,
764 	0xff,
765 	LINUX_SIGTBLSZ,
766 	bsd_to_linux_signal,
767 	ELAST + 1,
768 	bsd_to_linux_errno,
769 	translate_traps,
770 	elf_linux_fixup,
771 	linux_sendsig,
772 	linux_sigcode,
773 	&linux_szsigcode,
774 	linux_prepsyscall,
775 	"Linux ELF",
776 	elf_coredump,
777 	exec_linux_imgact_try,
778 	LINUX_MINSIGSTKSZ
779 };
780 
781 static Elf32_Brandinfo linux_brand = {
782 					ELFOSABI_LINUX,
783 					"Linux",
784 					"/compat/linux",
785 					"/lib/ld-linux.so.1",
786 					&elf_linux_sysvec
787 				 };
788 
789 static Elf32_Brandinfo linux_glibc2brand = {
790 					ELFOSABI_LINUX,
791 					"Linux",
792 					"/compat/linux",
793 					"/lib/ld-linux.so.2",
794 					&elf_linux_sysvec
795 				 };
796 
797 Elf32_Brandinfo *linux_brandlist[] = {
798 					&linux_brand,
799 					&linux_glibc2brand,
800 					NULL
801 				};
802 
803 static int
804 linux_elf_modevent(module_t mod, int type, void *data)
805 {
806 	Elf32_Brandinfo **brandinfo;
807 	int error;
808 
809 	error = 0;
810 
811 	switch(type) {
812 	case MOD_LOAD:
813 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
814 		     ++brandinfo)
815 			if (elf_insert_brand_entry(*brandinfo) < 0)
816 				error = EINVAL;
817 		if (error == 0) {
818 			linux_ioctl_register_handlers(
819 				&linux_ioctl_handler_set);
820 			if (bootverbose)
821 				printf("Linux ELF exec handler installed\n");
822 		} else
823 			printf("cannot insert Linux ELF brand handler\n");
824 		break;
825 	case MOD_UNLOAD:
826 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
827 		     ++brandinfo)
828 			if (elf_brand_inuse(*brandinfo))
829 				error = EBUSY;
830 		if (error == 0) {
831 			for (brandinfo = &linux_brandlist[0];
832 			     *brandinfo != NULL; ++brandinfo)
833 				if (elf_remove_brand_entry(*brandinfo) < 0)
834 					error = EINVAL;
835 		}
836 		if (error == 0) {
837 			linux_ioctl_unregister_handlers(
838 				&linux_ioctl_handler_set);
839 			if (bootverbose)
840 				printf("Linux ELF exec handler removed\n");
841 		} else
842 			printf("Could not deinstall ELF interpreter entry\n");
843 		break;
844 	default:
845 		break;
846 	}
847 	return error;
848 }
849 
850 static moduledata_t linux_elf_mod = {
851 	"linuxelf",
852 	linux_elf_modevent,
853 	0
854 };
855 
856 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
857