xref: /freebsd/sys/i386/linux/linux_sysvec.c (revision 4b8db4a299bae48efdd22b8c27709dd317f8647b)
1 /*-
2  * Copyright (c) 1994-1996 S�ren Schmidt
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software withough specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30 
31 /* XXX we use functions that might not exist. */
32 #include "opt_compat.h"
33 
34 #ifndef COMPAT_43
35 #error "Unable to compile Linux-emulator due to missing COMPAT_43 option!"
36 #endif
37 
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/imgact.h>
41 #include <sys/imgact_aout.h>
42 #include <sys/imgact_elf.h>
43 #include <sys/malloc.h>
44 #include <sys/proc.h>
45 #include <sys/signalvar.h>
46 #include <sys/sysent.h>
47 #include <sys/sysproto.h>
48 
49 #include <vm/vm.h>
50 #include <vm/vm_param.h>
51 #include <vm/vm_page.h>
52 #include <vm/vm_extern.h>
53 #include <sys/exec.h>
54 #include <sys/kernel.h>
55 #include <sys/module.h>
56 #include <machine/cpu.h>
57 #include <sys/lock.h>
58 #include <sys/mutex.h>
59 
60 #include <i386/linux/linux.h>
61 #include <i386/linux/linux_proto.h>
62 #include <compat/linux/linux_signal.h>
63 #include <compat/linux/linux_util.h>
64 
65 MODULE_VERSION(linux, 1);
66 MODULE_DEPEND(linux, sysvmsg, 1, 1, 1);
67 MODULE_DEPEND(linux, sysvsem, 1, 1, 1);
68 MODULE_DEPEND(linux, sysvshm, 1, 1, 1);
69 
70 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
71 
72 #if BYTE_ORDER == LITTLE_ENDIAN
73 #define SHELLMAGIC      0x2123 /* #! */
74 #else
75 #define SHELLMAGIC      0x2321
76 #endif
77 
78 extern char linux_sigcode[];
79 extern int linux_szsigcode;
80 
81 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
82 
83 extern struct linker_set linux_ioctl_handler_set;
84 
85 static int	linux_fixup __P((register_t **stack_base,
86 				 struct image_params *iparams));
87 static int	elf_linux_fixup __P((register_t **stack_base,
88 				     struct image_params *iparams));
89 static void	linux_prepsyscall __P((struct trapframe *tf, int *args,
90 				       u_int *code, caddr_t *params));
91 static void     linux_sendsig __P((sig_t catcher, int sig, sigset_t *mask,
92 				   u_long code));
93 
94 /*
95  * Linux syscalls return negative errno's, we do positive and map them
96  */
97 static int bsd_to_linux_errno[ELAST + 1] = {
98   	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
99  	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
100  	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
101  	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
102  	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
103 	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
104 	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
105 	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
106   	-6, -6, -43, -42, -75, -6, -84
107 };
108 
109 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
110 	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
111 	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
112 	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, 0,
113 	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
114 	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
115 	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
116 	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
117 	0, LINUX_SIGUSR1, LINUX_SIGUSR2
118 };
119 
120 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
121 	SIGHUP, SIGINT, SIGQUIT, SIGILL,
122 	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
123 	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
124 	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
125 	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
126 	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
127 	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
128 	SIGIO, SIGURG, 0
129 };
130 
131 /*
132  * If FreeBSD & Linux have a difference of opinion about what a trap
133  * means, deal with it here.
134  */
135 static int
136 translate_traps(int signal, int trap_code)
137 {
138 	if (signal != SIGBUS)
139 		return signal;
140 	switch (trap_code) {
141 	case T_PROTFLT:
142 	case T_TSSFLT:
143 	case T_DOUBLEFLT:
144 	case T_PAGEFLT:
145 		return SIGSEGV;
146 	default:
147 		return signal;
148 	}
149 }
150 
151 static int
152 linux_fixup(register_t **stack_base, struct image_params *imgp)
153 {
154 	register_t *argv, *envp;
155 
156 	argv = *stack_base;
157 	envp = *stack_base + (imgp->argc + 1);
158 	(*stack_base)--;
159 	**stack_base = (intptr_t)(void *)envp;
160 	(*stack_base)--;
161 	**stack_base = (intptr_t)(void *)argv;
162 	(*stack_base)--;
163 	**stack_base = imgp->argc;
164 	return 0;
165 }
166 
167 static int
168 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
169 {
170 	Elf32_Auxargs *args = (Elf32_Auxargs *)imgp->auxargs;
171 	register_t *pos;
172 
173 	pos = *stack_base + (imgp->argc + imgp->envc + 2);
174 
175 	if (args->trace) {
176 		AUXARGS_ENTRY(pos, AT_DEBUG, 1);
177 	}
178 	if (args->execfd != -1) {
179 		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
180 	}
181 	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
182 	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
183 	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
184 	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
185 	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
186 	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
187 	AUXARGS_ENTRY(pos, AT_BASE, args->base);
188 	PROC_LOCK(imgp->proc);
189 	AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_cred->p_ruid);
190 	AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_cred->p_svuid);
191 	AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_cred->p_rgid);
192 	AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_cred->p_svgid);
193 	PROC_UNLOCK(imgp->proc);
194 	AUXARGS_ENTRY(pos, AT_NULL, 0);
195 
196 	free(imgp->auxargs, M_TEMP);
197 	imgp->auxargs = NULL;
198 
199 	(*stack_base)--;
200 	**stack_base = (long)imgp->argc;
201 	return 0;
202 }
203 
204 extern int _ucodesel, _udatasel;
205 extern unsigned long linux_sznonrtsigcode;
206 
207 static void
208 linux_rt_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
209 {
210 	register struct proc *p = curproc;
211 	register struct trapframe *regs;
212 	struct linux_rt_sigframe *fp, frame;
213 	int oonstack;
214 
215 	regs = p->p_md.md_regs;
216 	oonstack = sigonstack(regs->tf_esp);
217 
218 #ifdef DEBUG
219 	if (ldebug(sigreturn))
220 		printf(ARGS(rt_sendsig, "%p, %d, %p, %lu"),
221 		    catcher, sig, (void*)mask, code);
222 #endif
223 	/*
224 	 * Allocate space for the signal handler context.
225 	 */
226 	PROC_LOCK(p);
227 	if ((p->p_flag & P_ALTSTACK) && !oonstack &&
228 	    SIGISMEMBER(p->p_sigacts->ps_sigonstack, sig)) {
229 		fp = (struct linux_rt_sigframe *)(p->p_sigstk.ss_sp +
230 		    p->p_sigstk.ss_size - sizeof(struct linux_rt_sigframe));
231 	} else
232 		fp = (struct linux_rt_sigframe *)regs->tf_esp - 1;
233 	PROC_UNLOCK(p);
234 
235 	/*
236 	 * grow() will return FALSE if the fp will not fit inside the stack
237 	 *	and the stack can not be grown. useracc will return FALSE
238 	 *	if access is denied.
239 	 */
240 	if ((grow_stack (p, (int)fp) == FALSE) ||
241 	    !useracc((caddr_t)fp, sizeof (struct linux_rt_sigframe),
242 	    VM_PROT_WRITE)) {
243 		/*
244 		 * Process has trashed its stack; give it an illegal
245 		 * instruction to halt it in its tracks.
246 		 */
247 		PROC_LOCK(p);
248 		SIGACTION(p, SIGILL) = SIG_DFL;
249 		SIGDELSET(p->p_sigignore, SIGILL);
250 		SIGDELSET(p->p_sigcatch, SIGILL);
251 		SIGDELSET(p->p_sigmask, SIGILL);
252 #ifdef DEBUG
253 		if (ldebug(sigreturn))
254 			printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
255 			    fp, oonstack);
256 #endif
257 		psignal(p, SIGILL);
258 		PROC_UNLOCK(p);
259 		return;
260 	}
261 
262 	/*
263 	 * Build the argument list for the signal handler.
264 	 */
265 	if (p->p_sysent->sv_sigtbl)
266 		if (sig <= p->p_sysent->sv_sigsize)
267 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
268 
269 	frame.sf_handler = catcher;
270 	frame.sf_sig = sig;
271 	frame.sf_siginfo = &fp->sf_si;
272 	frame.sf_ucontext = &fp->sf_sc;
273 
274 	/* Fill siginfo structure. */
275 	frame.sf_si.lsi_signo = sig;
276 	frame.sf_si.lsi_code = code;
277 	frame.sf_si.lsi_addr = (void *)regs->tf_err;
278 
279 	/*
280 	 * Build the signal context to be used by sigreturn.
281 	 */
282 	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
283 	frame.sf_sc.uc_link = NULL;		/* XXX ??? */
284 
285 	PROC_LOCK(p);
286 	frame.sf_sc.uc_stack.ss_sp = p->p_sigstk.ss_sp;
287 	frame.sf_sc.uc_stack.ss_size = p->p_sigstk.ss_size;
288 	frame.sf_sc.uc_stack.ss_flags = (p->p_flag & P_ALTSTACK)
289 	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
290 	PROC_UNLOCK(p);
291 
292 	bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
293 
294 	frame.sf_sc.uc_mcontext.sc_mask   = frame.sf_sc.uc_sigmask.__bits[0];
295 	frame.sf_sc.uc_mcontext.sc_gs     = rgs();
296 	frame.sf_sc.uc_mcontext.sc_fs     = regs->tf_fs;
297 	frame.sf_sc.uc_mcontext.sc_es     = regs->tf_es;
298 	frame.sf_sc.uc_mcontext.sc_ds     = regs->tf_ds;
299 	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_edi;
300 	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_esi;
301 	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_ebp;
302 	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_ebx;
303 	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_edx;
304 	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_ecx;
305 	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_eax;
306 	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_eip;
307 	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
308 	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
309 	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
310 	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
311 	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
312 	frame.sf_sc.uc_mcontext.sc_trapno = code;	/* XXX ???? */
313 
314 #ifdef DEBUG
315 	if (ldebug(sigreturn))
316 		printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
317 		    frame.sf_sc.uc_stack.ss_flags, p->p_sigstk.ss_sp,
318 		    p->p_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
319 #endif
320 
321 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
322 		/*
323 		 * Process has trashed its stack; give it an illegal
324 		 * instruction to halt it in its tracks.
325 		 */
326 		PROC_LOCK(p);
327 		sigexit(p, SIGILL);
328 		/* NOTREACHED */
329 	}
330 
331 	/*
332 	 * Build context to run handler in.
333 	 */
334 	regs->tf_esp = (int)fp;
335 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
336 	    linux_sznonrtsigcode;
337 	regs->tf_eflags &= ~PSL_VM;
338 	regs->tf_cs = _ucodesel;
339 	regs->tf_ds = _udatasel;
340 	regs->tf_es = _udatasel;
341 	regs->tf_fs = _udatasel;
342 	regs->tf_ss = _udatasel;
343 }
344 
345 
346 /*
347  * Send an interrupt to process.
348  *
349  * Stack is set up to allow sigcode stored
350  * in u. to call routine, followed by kcall
351  * to sigreturn routine below.  After sigreturn
352  * resets the signal mask, the stack, and the
353  * frame pointer, it returns to the user
354  * specified pc, psl.
355  */
356 
357 static void
358 linux_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
359 {
360 	register struct proc *p = curproc;
361 	register struct trapframe *regs;
362 	struct linux_sigframe *fp, frame;
363 	linux_sigset_t lmask;
364 	int oonstack, i;
365 
366 	if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) {
367 		/* Signal handler installed with SA_SIGINFO. */
368 		linux_rt_sendsig(catcher, sig, mask, code);
369 		return;
370 	}
371 
372 	regs = p->p_md.md_regs;
373 	oonstack = sigonstack(regs->tf_esp);
374 
375 #ifdef DEBUG
376 	if (ldebug(sigreturn))
377 		printf(ARGS(sendsig, "%p, %d, %p, %lu"),
378 		    catcher, sig, (void*)mask, code);
379 #endif
380 
381 	/*
382 	 * Allocate space for the signal handler context.
383 	 */
384 	PROC_LOCK(p);
385 	if ((p->p_flag & P_ALTSTACK) && !oonstack &&
386 	    SIGISMEMBER(p->p_sigacts->ps_sigonstack, sig)) {
387 		fp = (struct linux_sigframe *)(p->p_sigstk.ss_sp +
388 		    p->p_sigstk.ss_size - sizeof(struct linux_sigframe));
389 	} else
390 		fp = (struct linux_sigframe *)regs->tf_esp - 1;
391 	PROC_UNLOCK(p);
392 
393 	/*
394 	 * grow() will return FALSE if the fp will not fit inside the stack
395 	 *	and the stack can not be grown. useracc will return FALSE
396 	 *	if access is denied.
397 	 */
398 	if ((grow_stack (p, (int)fp) == FALSE) ||
399 	    !useracc((caddr_t)fp, sizeof (struct linux_sigframe),
400 	    VM_PROT_WRITE)) {
401 		/*
402 		 * Process has trashed its stack; give it an illegal
403 		 * instruction to halt it in its tracks.
404 		 */
405 		PROC_LOCK(p);
406 		SIGACTION(p, SIGILL) = SIG_DFL;
407 		SIGDELSET(p->p_sigignore, SIGILL);
408 		SIGDELSET(p->p_sigcatch, SIGILL);
409 		SIGDELSET(p->p_sigmask, SIGILL);
410 		psignal(p, SIGILL);
411 		PROC_UNLOCK(p);
412 		return;
413 	}
414 
415 	/*
416 	 * Build the argument list for the signal handler.
417 	 */
418 	if (p->p_sysent->sv_sigtbl)
419 		if (sig <= p->p_sysent->sv_sigsize)
420 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
421 
422 	frame.sf_handler = catcher;
423 	frame.sf_sig = sig;
424 
425 	bsd_to_linux_sigset(mask, &lmask);
426 
427 	/*
428 	 * Build the signal context to be used by sigreturn.
429 	 */
430 	frame.sf_sc.sc_mask   = lmask.__bits[0];
431 	frame.sf_sc.sc_gs     = rgs();
432 	frame.sf_sc.sc_fs     = regs->tf_fs;
433 	frame.sf_sc.sc_es     = regs->tf_es;
434 	frame.sf_sc.sc_ds     = regs->tf_ds;
435 	frame.sf_sc.sc_edi    = regs->tf_edi;
436 	frame.sf_sc.sc_esi    = regs->tf_esi;
437 	frame.sf_sc.sc_ebp    = regs->tf_ebp;
438 	frame.sf_sc.sc_ebx    = regs->tf_ebx;
439 	frame.sf_sc.sc_edx    = regs->tf_edx;
440 	frame.sf_sc.sc_ecx    = regs->tf_ecx;
441 	frame.sf_sc.sc_eax    = regs->tf_eax;
442 	frame.sf_sc.sc_eip    = regs->tf_eip;
443 	frame.sf_sc.sc_cs     = regs->tf_cs;
444 	frame.sf_sc.sc_eflags = regs->tf_eflags;
445 	frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
446 	frame.sf_sc.sc_ss     = regs->tf_ss;
447 	frame.sf_sc.sc_err    = regs->tf_err;
448 	frame.sf_sc.sc_trapno = code;	/* XXX ???? */
449 
450 	bzero(&frame.sf_fpstate, sizeof(struct linux_fpstate));
451 
452 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
453 		frame.sf_extramask[i] = lmask.__bits[i+1];
454 
455 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
456 		/*
457 		 * Process has trashed its stack; give it an illegal
458 		 * instruction to halt it in its tracks.
459 		 */
460 		PROC_LOCK(p);
461 		sigexit(p, SIGILL);
462 		/* NOTREACHED */
463 	}
464 
465 	/*
466 	 * Build context to run handler in.
467 	 */
468 	regs->tf_esp = (int)fp;
469 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
470 	regs->tf_eflags &= ~PSL_VM;
471 	regs->tf_cs = _ucodesel;
472 	regs->tf_ds = _udatasel;
473 	regs->tf_es = _udatasel;
474 	regs->tf_fs = _udatasel;
475 	regs->tf_ss = _udatasel;
476 }
477 
478 /*
479  * System call to cleanup state after a signal
480  * has been taken.  Reset signal mask and
481  * stack state from context left by sendsig (above).
482  * Return to previous pc and psl as specified by
483  * context left by sendsig. Check carefully to
484  * make sure that the user has not modified the
485  * psl to gain improper privileges or to cause
486  * a machine fault.
487  */
488 int
489 linux_sigreturn(p, args)
490 	struct proc *p;
491 	struct linux_sigreturn_args *args;
492 {
493 	struct linux_sigframe frame;
494 	register struct trapframe *regs;
495 	linux_sigset_t lmask;
496 	int eflags, i;
497 
498 	regs = p->p_md.md_regs;
499 
500 #ifdef DEBUG
501 	if (ldebug(sigreturn))
502 		printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
503 #endif
504 	/*
505 	 * The trampoline code hands us the sigframe.
506 	 * It is unsafe to keep track of it ourselves, in the event that a
507 	 * program jumps out of a signal handler.
508 	 */
509 	if (copyin((caddr_t)args->sfp, &frame, sizeof(frame)) != 0)
510 		return (EFAULT);
511 
512 	/*
513 	 * Check for security violations.
514 	 */
515 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
516 	eflags = frame.sf_sc.sc_eflags;
517 	/*
518 	 * XXX do allow users to change the privileged flag PSL_RF.  The
519 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
520 	 * sometimes set it there too.  tf_eflags is kept in the signal
521 	 * context during signal handling and there is no other place
522 	 * to remember it, so the PSL_RF bit may be corrupted by the
523 	 * signal handler without us knowing.  Corruption of the PSL_RF
524 	 * bit at worst causes one more or one less debugger trap, so
525 	 * allowing it is fairly harmless.
526 	 */
527 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
528     		return(EINVAL);
529 	}
530 
531 	/*
532 	 * Don't allow users to load a valid privileged %cs.  Let the
533 	 * hardware check for invalid selectors, excess privilege in
534 	 * other selectors, invalid %eip's and invalid %esp's.
535 	 */
536 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
537 	if (!CS_SECURE(frame.sf_sc.sc_cs)) {
538 		trapsignal(p, SIGBUS, T_PROTFLT);
539 		return(EINVAL);
540 	}
541 
542 	lmask.__bits[0] = frame.sf_sc.sc_mask;
543 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
544 		lmask.__bits[i+1] = frame.sf_extramask[i];
545 	PROC_LOCK(p);
546 	linux_to_bsd_sigset(&lmask, &p->p_sigmask);
547 	SIG_CANTMASK(p->p_sigmask);
548 	PROC_UNLOCK(p);
549 
550 	/*
551 	 * Restore signal context.
552 	 */
553 	/* %gs was restored by the trampoline. */
554 	regs->tf_fs     = frame.sf_sc.sc_fs;
555 	regs->tf_es     = frame.sf_sc.sc_es;
556 	regs->tf_ds     = frame.sf_sc.sc_ds;
557 	regs->tf_edi    = frame.sf_sc.sc_edi;
558 	regs->tf_esi    = frame.sf_sc.sc_esi;
559 	regs->tf_ebp    = frame.sf_sc.sc_ebp;
560 	regs->tf_ebx    = frame.sf_sc.sc_ebx;
561 	regs->tf_edx    = frame.sf_sc.sc_edx;
562 	regs->tf_ecx    = frame.sf_sc.sc_ecx;
563 	regs->tf_eax    = frame.sf_sc.sc_eax;
564 	regs->tf_eip    = frame.sf_sc.sc_eip;
565 	regs->tf_cs     = frame.sf_sc.sc_cs;
566 	regs->tf_eflags = eflags;
567 	regs->tf_esp    = frame.sf_sc.sc_esp_at_signal;
568 	regs->tf_ss     = frame.sf_sc.sc_ss;
569 
570 	return (EJUSTRETURN);
571 }
572 
573 /*
574  * System call to cleanup state after a signal
575  * has been taken.  Reset signal mask and
576  * stack state from context left by rt_sendsig (above).
577  * Return to previous pc and psl as specified by
578  * context left by sendsig. Check carefully to
579  * make sure that the user has not modified the
580  * psl to gain improper privileges or to cause
581  * a machine fault.
582  */
583 int
584 linux_rt_sigreturn(p, args)
585 	struct proc *p;
586 	struct linux_rt_sigreturn_args *args;
587 {
588 	struct sigaltstack_args sasargs;
589 	struct linux_ucontext 	 uc;
590 	struct linux_sigcontext *context;
591 	linux_stack_t *lss;
592 	stack_t *ss;
593 	register struct trapframe *regs;
594 	int eflags;
595 	caddr_t sg = stackgap_init();
596 
597 	regs = p->p_md.md_regs;
598 
599 #ifdef DEBUG
600 	if (ldebug(rt_sigreturn))
601 		printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
602 #endif
603 	/*
604 	 * The trampoline code hands us the ucontext.
605 	 * It is unsafe to keep track of it ourselves, in the event that a
606 	 * program jumps out of a signal handler.
607 	 */
608 	if (copyin((caddr_t)args->ucp, &uc, sizeof(uc)) != 0)
609 		return (EFAULT);
610 
611 	context = &uc.uc_mcontext;
612 
613 	/*
614 	 * Check for security violations.
615 	 */
616 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
617 	eflags = context->sc_eflags;
618 	/*
619 	 * XXX do allow users to change the privileged flag PSL_RF.  The
620 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
621 	 * sometimes set it there too.  tf_eflags is kept in the signal
622 	 * context during signal handling and there is no other place
623 	 * to remember it, so the PSL_RF bit may be corrupted by the
624 	 * signal handler without us knowing.  Corruption of the PSL_RF
625 	 * bit at worst causes one more or one less debugger trap, so
626 	 * allowing it is fairly harmless.
627 	 */
628 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
629     		return(EINVAL);
630 	}
631 
632 	/*
633 	 * Don't allow users to load a valid privileged %cs.  Let the
634 	 * hardware check for invalid selectors, excess privilege in
635 	 * other selectors, invalid %eip's and invalid %esp's.
636 	 */
637 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
638 	if (!CS_SECURE(context->sc_cs)) {
639 		trapsignal(p, SIGBUS, T_PROTFLT);
640 		return(EINVAL);
641 	}
642 
643 	PROC_LOCK(p);
644 	linux_to_bsd_sigset(&uc.uc_sigmask, &p->p_sigmask);
645 	SIG_CANTMASK(p->p_sigmask);
646 	PROC_UNLOCK(p);
647 
648 	/*
649 	 * Restore signal context
650 	 */
651 	/* %gs was restored by the trampoline. */
652 	regs->tf_fs     = context->sc_fs;
653 	regs->tf_es     = context->sc_es;
654 	regs->tf_ds     = context->sc_ds;
655 	regs->tf_edi    = context->sc_edi;
656 	regs->tf_esi    = context->sc_esi;
657 	regs->tf_ebp    = context->sc_ebp;
658 	regs->tf_ebx    = context->sc_ebx;
659 	regs->tf_edx    = context->sc_edx;
660 	regs->tf_ecx    = context->sc_ecx;
661 	regs->tf_eax    = context->sc_eax;
662 	regs->tf_eip    = context->sc_eip;
663 	regs->tf_cs     = context->sc_cs;
664 	regs->tf_eflags = eflags;
665 	regs->tf_esp    = context->sc_esp_at_signal;
666 	regs->tf_ss     = context->sc_ss;
667 
668 	/*
669 	 * call sigaltstack & ignore results..
670 	 */
671 	ss = stackgap_alloc(&sg, sizeof(stack_t));
672 	lss = &uc.uc_stack;
673 	ss->ss_sp = lss->ss_sp;
674 	ss->ss_size = lss->ss_size;
675 	ss->ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
676 
677 #ifdef DEBUG
678 	if (ldebug(rt_sigreturn))
679 		printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
680 		    ss->ss_flags, ss->ss_sp, ss->ss_size, context->sc_mask);
681 #endif
682 	sasargs.ss = ss;
683 	sasargs.oss = NULL;
684 	(void) sigaltstack(p, &sasargs);
685 
686 	return (EJUSTRETURN);
687 }
688 
689 static void
690 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
691 {
692 	args[0] = tf->tf_ebx;
693 	args[1] = tf->tf_ecx;
694 	args[2] = tf->tf_edx;
695 	args[3] = tf->tf_esi;
696 	args[4] = tf->tf_edi;
697 	*params = NULL;		/* no copyin */
698 }
699 
700 /*
701  * If a linux binary is exec'ing something, try this image activator
702  * first.  We override standard shell script execution in order to
703  * be able to modify the interpreter path.  We only do this if a linux
704  * binary is doing the exec, so we do not create an EXEC module for it.
705  */
706 static int	exec_linux_imgact_try __P((struct image_params *iparams));
707 
708 static int
709 exec_linux_imgact_try(imgp)
710     struct image_params *imgp;
711 {
712     const char *head = (const char *)imgp->image_header;
713     int error = -1;
714 
715     /*
716      * The interpreter for shell scripts run from a linux binary needs
717      * to be located in /compat/linux if possible in order to recursively
718      * maintain linux path emulation.
719      */
720     if (((const short *)head)[0] == SHELLMAGIC) {
721 	    /*
722 	     * Run our normal shell image activator.  If it succeeds attempt
723 	     * to use the alternate path for the interpreter.  If an alternate
724 	     * path is found, use our stringspace to store it.
725 	     */
726 	    if ((error = exec_shell_imgact(imgp)) == 0) {
727 		    char *rpath = NULL;
728 
729 		    linux_emul_find(imgp->proc, NULL, linux_emul_path,
730 			imgp->interpreter_name, &rpath, 0);
731 		    if (rpath != imgp->interpreter_name) {
732 			    int len = strlen(rpath) + 1;
733 
734 			    if (len <= MAXSHELLCMDLEN) {
735 				memcpy(imgp->interpreter_name, rpath, len);
736 			    }
737 			    free(rpath, M_TEMP);
738 		    }
739 	    }
740     }
741     return(error);
742 }
743 
744 struct sysentvec linux_sysvec = {
745 	LINUX_SYS_MAXSYSCALL,
746 	linux_sysent,
747 	0xff,
748 	LINUX_SIGTBLSZ,
749 	bsd_to_linux_signal,
750 	ELAST + 1,
751 	bsd_to_linux_errno,
752 	translate_traps,
753 	linux_fixup,
754 	linux_sendsig,
755 	linux_sigcode,
756 	&linux_szsigcode,
757 	linux_prepsyscall,
758 	"Linux a.out",
759 	aout_coredump,
760 	exec_linux_imgact_try,
761 	LINUX_MINSIGSTKSZ
762 };
763 
764 struct sysentvec elf_linux_sysvec = {
765 	LINUX_SYS_MAXSYSCALL,
766 	linux_sysent,
767 	0xff,
768 	LINUX_SIGTBLSZ,
769 	bsd_to_linux_signal,
770 	ELAST + 1,
771 	bsd_to_linux_errno,
772 	translate_traps,
773 	elf_linux_fixup,
774 	linux_sendsig,
775 	linux_sigcode,
776 	&linux_szsigcode,
777 	linux_prepsyscall,
778 	"Linux ELF",
779 	elf_coredump,
780 	exec_linux_imgact_try,
781 	LINUX_MINSIGSTKSZ
782 };
783 
784 static Elf32_Brandinfo linux_brand = {
785 					ELFOSABI_LINUX,
786 					"Linux",
787 					"/compat/linux",
788 					"/lib/ld-linux.so.1",
789 					&elf_linux_sysvec
790 				 };
791 
792 static Elf32_Brandinfo linux_glibc2brand = {
793 					ELFOSABI_LINUX,
794 					"Linux",
795 					"/compat/linux",
796 					"/lib/ld-linux.so.2",
797 					&elf_linux_sysvec
798 				 };
799 
800 Elf32_Brandinfo *linux_brandlist[] = {
801 					&linux_brand,
802 					&linux_glibc2brand,
803 					NULL
804 				};
805 
806 static int
807 linux_elf_modevent(module_t mod, int type, void *data)
808 {
809 	Elf32_Brandinfo **brandinfo;
810 	int error;
811 
812 	error = 0;
813 
814 	switch(type) {
815 	case MOD_LOAD:
816 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
817 		     ++brandinfo)
818 			if (elf_insert_brand_entry(*brandinfo) < 0)
819 				error = EINVAL;
820 		if (error == 0) {
821 			linux_ioctl_register_handlers(
822 				&linux_ioctl_handler_set);
823 			if (bootverbose)
824 				printf("Linux ELF exec handler installed\n");
825 		} else
826 			printf("cannot insert Linux ELF brand handler\n");
827 		break;
828 	case MOD_UNLOAD:
829 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
830 		     ++brandinfo)
831 			if (elf_brand_inuse(*brandinfo))
832 				error = EBUSY;
833 		if (error == 0) {
834 			for (brandinfo = &linux_brandlist[0];
835 			     *brandinfo != NULL; ++brandinfo)
836 				if (elf_remove_brand_entry(*brandinfo) < 0)
837 					error = EINVAL;
838 		}
839 		if (error == 0) {
840 			linux_ioctl_unregister_handlers(
841 				&linux_ioctl_handler_set);
842 			if (bootverbose)
843 				printf("Linux ELF exec handler removed\n");
844 		} else
845 			printf("Could not deinstall ELF interpreter entry\n");
846 		break;
847 	default:
848 		break;
849 	}
850 	return error;
851 }
852 
853 static moduledata_t linux_elf_mod = {
854 	"linuxelf",
855 	linux_elf_modevent,
856 	0
857 };
858 
859 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
860