xref: /freebsd/sys/i386/linux/linux_sysvec.c (revision 5521ff5a4d1929056e7ffc982fac3341ca54df7c)
1 /*-
2  * Copyright (c) 1994-1996 S�ren Schmidt
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software withough specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30 
31 /* XXX we use functions that might not exist. */
32 #include "opt_compat.h"
33 
34 #ifndef COMPAT_43
35 #error "Unable to compile Linux-emulator due to missing COMPAT_43 option!"
36 #endif
37 
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/imgact.h>
41 #include <sys/imgact_aout.h>
42 #include <sys/imgact_elf.h>
43 #include <sys/malloc.h>
44 #include <sys/mutex.h>
45 #include <sys/proc.h>
46 #include <sys/signalvar.h>
47 #include <sys/sysent.h>
48 #include <sys/sysproto.h>
49 
50 #include <vm/vm.h>
51 #include <vm/vm_param.h>
52 #include <vm/vm_page.h>
53 #include <vm/vm_extern.h>
54 #include <sys/exec.h>
55 #include <sys/kernel.h>
56 #include <sys/module.h>
57 #include <machine/cpu.h>
58 #include <sys/lock.h>
59 #include <sys/mutex.h>
60 
61 #include <i386/linux/linux.h>
62 #include <i386/linux/linux_proto.h>
63 #include <compat/linux/linux_signal.h>
64 #include <compat/linux/linux_util.h>
65 
66 MODULE_VERSION(linux, 1);
67 MODULE_DEPEND(linux, sysvmsg, 1, 1, 1);
68 MODULE_DEPEND(linux, sysvsem, 1, 1, 1);
69 MODULE_DEPEND(linux, sysvshm, 1, 1, 1);
70 
71 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
72 
73 #if BYTE_ORDER == LITTLE_ENDIAN
74 #define SHELLMAGIC      0x2123 /* #! */
75 #else
76 #define SHELLMAGIC      0x2321
77 #endif
78 
79 extern char linux_sigcode[];
80 extern int linux_szsigcode;
81 
82 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
83 
84 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
85 
86 static int	linux_fixup __P((register_t **stack_base,
87 				 struct image_params *iparams));
88 static int	elf_linux_fixup __P((register_t **stack_base,
89 				     struct image_params *iparams));
90 static void	linux_prepsyscall __P((struct trapframe *tf, int *args,
91 				       u_int *code, caddr_t *params));
92 static void     linux_sendsig __P((sig_t catcher, int sig, sigset_t *mask,
93 				   u_long code));
94 
95 /*
96  * Linux syscalls return negative errno's, we do positive and map them
97  */
98 static int bsd_to_linux_errno[ELAST + 1] = {
99   	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
100  	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
101  	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
102  	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
103  	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
104 	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
105 	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
106 	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
107   	-6, -6, -43, -42, -75, -6, -84
108 };
109 
110 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
111 	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
112 	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
113 	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, 0,
114 	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
115 	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
116 	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
117 	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
118 	0, LINUX_SIGUSR1, LINUX_SIGUSR2
119 };
120 
121 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
122 	SIGHUP, SIGINT, SIGQUIT, SIGILL,
123 	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
124 	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
125 	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
126 	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
127 	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
128 	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
129 	SIGIO, SIGURG, 0
130 };
131 
132 /*
133  * If FreeBSD & Linux have a difference of opinion about what a trap
134  * means, deal with it here.
135  */
136 static int
137 translate_traps(int signal, int trap_code)
138 {
139 	if (signal != SIGBUS)
140 		return signal;
141 	switch (trap_code) {
142 	case T_PROTFLT:
143 	case T_TSSFLT:
144 	case T_DOUBLEFLT:
145 	case T_PAGEFLT:
146 		return SIGSEGV;
147 	default:
148 		return signal;
149 	}
150 }
151 
152 static int
153 linux_fixup(register_t **stack_base, struct image_params *imgp)
154 {
155 	register_t *argv, *envp;
156 
157 	argv = *stack_base;
158 	envp = *stack_base + (imgp->argc + 1);
159 	(*stack_base)--;
160 	**stack_base = (intptr_t)(void *)envp;
161 	(*stack_base)--;
162 	**stack_base = (intptr_t)(void *)argv;
163 	(*stack_base)--;
164 	**stack_base = imgp->argc;
165 	return 0;
166 }
167 
168 static int
169 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
170 {
171 	Elf32_Auxargs *args = (Elf32_Auxargs *)imgp->auxargs;
172 	register_t *pos;
173 
174 	pos = *stack_base + (imgp->argc + imgp->envc + 2);
175 
176 	if (args->trace) {
177 		AUXARGS_ENTRY(pos, AT_DEBUG, 1);
178 	}
179 	if (args->execfd != -1) {
180 		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
181 	}
182 	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
183 	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
184 	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
185 	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
186 	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
187 	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
188 	AUXARGS_ENTRY(pos, AT_BASE, args->base);
189 	PROC_LOCK(imgp->proc);
190 	AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
191 	AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
192 	AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
193 	AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
194 	PROC_UNLOCK(imgp->proc);
195 	AUXARGS_ENTRY(pos, AT_NULL, 0);
196 
197 	free(imgp->auxargs, M_TEMP);
198 	imgp->auxargs = NULL;
199 
200 	(*stack_base)--;
201 	**stack_base = (long)imgp->argc;
202 	return 0;
203 }
204 
205 extern int _ucodesel, _udatasel;
206 extern unsigned long linux_sznonrtsigcode;
207 
208 static void
209 linux_rt_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
210 {
211 	register struct proc *p = curproc;
212 	register struct trapframe *regs;
213 	struct linux_rt_sigframe *fp, frame;
214 	int oonstack;
215 
216 	regs = p->p_frame;
217 	oonstack = sigonstack(regs->tf_esp);
218 
219 #ifdef DEBUG
220 	if (ldebug(sigreturn))
221 		printf(ARGS(rt_sendsig, "%p, %d, %p, %lu"),
222 		    catcher, sig, (void*)mask, code);
223 #endif
224 	/*
225 	 * Allocate space for the signal handler context.
226 	 */
227 	PROC_LOCK(p);
228 	if ((p->p_flag & P_ALTSTACK) && !oonstack &&
229 	    SIGISMEMBER(p->p_sigacts->ps_sigonstack, sig)) {
230 		fp = (struct linux_rt_sigframe *)(p->p_sigstk.ss_sp +
231 		    p->p_sigstk.ss_size - sizeof(struct linux_rt_sigframe));
232 	} else
233 		fp = (struct linux_rt_sigframe *)regs->tf_esp - 1;
234 	PROC_UNLOCK(p);
235 
236 	/*
237 	 * grow() will return FALSE if the fp will not fit inside the stack
238 	 *	and the stack can not be grown. useracc will return FALSE
239 	 *	if access is denied.
240 	 */
241 	if ((grow_stack (p, (int)fp) == FALSE) ||
242 	    !useracc((caddr_t)fp, sizeof (struct linux_rt_sigframe),
243 	    VM_PROT_WRITE)) {
244 		/*
245 		 * Process has trashed its stack; give it an illegal
246 		 * instruction to halt it in its tracks.
247 		 */
248 		PROC_LOCK(p);
249 		SIGACTION(p, SIGILL) = SIG_DFL;
250 		SIGDELSET(p->p_sigignore, SIGILL);
251 		SIGDELSET(p->p_sigcatch, SIGILL);
252 		SIGDELSET(p->p_sigmask, SIGILL);
253 #ifdef DEBUG
254 		if (ldebug(sigreturn))
255 			printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
256 			    fp, oonstack);
257 #endif
258 		psignal(p, SIGILL);
259 		PROC_UNLOCK(p);
260 		return;
261 	}
262 
263 	/*
264 	 * Build the argument list for the signal handler.
265 	 */
266 	if (p->p_sysent->sv_sigtbl)
267 		if (sig <= p->p_sysent->sv_sigsize)
268 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
269 
270 	frame.sf_handler = catcher;
271 	frame.sf_sig = sig;
272 	frame.sf_siginfo = &fp->sf_si;
273 	frame.sf_ucontext = &fp->sf_sc;
274 
275 	/* Fill siginfo structure. */
276 	frame.sf_si.lsi_signo = sig;
277 	frame.sf_si.lsi_code = code;
278 	frame.sf_si.lsi_addr = (void *)regs->tf_err;
279 
280 	/*
281 	 * Build the signal context to be used by sigreturn.
282 	 */
283 	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
284 	frame.sf_sc.uc_link = NULL;		/* XXX ??? */
285 
286 	PROC_LOCK(p);
287 	frame.sf_sc.uc_stack.ss_sp = p->p_sigstk.ss_sp;
288 	frame.sf_sc.uc_stack.ss_size = p->p_sigstk.ss_size;
289 	frame.sf_sc.uc_stack.ss_flags = (p->p_flag & P_ALTSTACK)
290 	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
291 	PROC_UNLOCK(p);
292 
293 	bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
294 
295 	frame.sf_sc.uc_mcontext.sc_mask   = frame.sf_sc.uc_sigmask.__bits[0];
296 	frame.sf_sc.uc_mcontext.sc_gs     = rgs();
297 	frame.sf_sc.uc_mcontext.sc_fs     = regs->tf_fs;
298 	frame.sf_sc.uc_mcontext.sc_es     = regs->tf_es;
299 	frame.sf_sc.uc_mcontext.sc_ds     = regs->tf_ds;
300 	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_edi;
301 	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_esi;
302 	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_ebp;
303 	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_ebx;
304 	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_edx;
305 	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_ecx;
306 	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_eax;
307 	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_eip;
308 	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
309 	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
310 	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
311 	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
312 	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
313 	frame.sf_sc.uc_mcontext.sc_trapno = code;	/* XXX ???? */
314 
315 #ifdef DEBUG
316 	if (ldebug(sigreturn))
317 		printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
318 		    frame.sf_sc.uc_stack.ss_flags, p->p_sigstk.ss_sp,
319 		    p->p_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
320 #endif
321 
322 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
323 		/*
324 		 * Process has trashed its stack; give it an illegal
325 		 * instruction to halt it in its tracks.
326 		 */
327 		PROC_LOCK(p);
328 		sigexit(p, SIGILL);
329 		/* NOTREACHED */
330 	}
331 
332 	/*
333 	 * Build context to run handler in.
334 	 */
335 	regs->tf_esp = (int)fp;
336 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
337 	    linux_sznonrtsigcode;
338 	regs->tf_eflags &= ~PSL_VM;
339 	regs->tf_cs = _ucodesel;
340 	regs->tf_ds = _udatasel;
341 	regs->tf_es = _udatasel;
342 	regs->tf_fs = _udatasel;
343 	regs->tf_ss = _udatasel;
344 }
345 
346 
347 /*
348  * Send an interrupt to process.
349  *
350  * Stack is set up to allow sigcode stored
351  * in u. to call routine, followed by kcall
352  * to sigreturn routine below.  After sigreturn
353  * resets the signal mask, the stack, and the
354  * frame pointer, it returns to the user
355  * specified pc, psl.
356  */
357 
358 static void
359 linux_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
360 {
361 	register struct proc *p = curproc;
362 	register struct trapframe *regs;
363 	struct linux_sigframe *fp, frame;
364 	linux_sigset_t lmask;
365 	int oonstack, i;
366 
367 	if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) {
368 		/* Signal handler installed with SA_SIGINFO. */
369 		linux_rt_sendsig(catcher, sig, mask, code);
370 		return;
371 	}
372 
373 	regs = p->p_frame;
374 	oonstack = sigonstack(regs->tf_esp);
375 
376 #ifdef DEBUG
377 	if (ldebug(sigreturn))
378 		printf(ARGS(sendsig, "%p, %d, %p, %lu"),
379 		    catcher, sig, (void*)mask, code);
380 #endif
381 
382 	/*
383 	 * Allocate space for the signal handler context.
384 	 */
385 	PROC_LOCK(p);
386 	if ((p->p_flag & P_ALTSTACK) && !oonstack &&
387 	    SIGISMEMBER(p->p_sigacts->ps_sigonstack, sig)) {
388 		fp = (struct linux_sigframe *)(p->p_sigstk.ss_sp +
389 		    p->p_sigstk.ss_size - sizeof(struct linux_sigframe));
390 	} else
391 		fp = (struct linux_sigframe *)regs->tf_esp - 1;
392 	PROC_UNLOCK(p);
393 
394 	/*
395 	 * grow() will return FALSE if the fp will not fit inside the stack
396 	 *	and the stack can not be grown. useracc will return FALSE
397 	 *	if access is denied.
398 	 */
399 	if ((grow_stack (p, (int)fp) == FALSE) ||
400 	    !useracc((caddr_t)fp, sizeof (struct linux_sigframe),
401 	    VM_PROT_WRITE)) {
402 		/*
403 		 * Process has trashed its stack; give it an illegal
404 		 * instruction to halt it in its tracks.
405 		 */
406 		PROC_LOCK(p);
407 		SIGACTION(p, SIGILL) = SIG_DFL;
408 		SIGDELSET(p->p_sigignore, SIGILL);
409 		SIGDELSET(p->p_sigcatch, SIGILL);
410 		SIGDELSET(p->p_sigmask, SIGILL);
411 		psignal(p, SIGILL);
412 		PROC_UNLOCK(p);
413 		return;
414 	}
415 
416 	/*
417 	 * Build the argument list for the signal handler.
418 	 */
419 	if (p->p_sysent->sv_sigtbl)
420 		if (sig <= p->p_sysent->sv_sigsize)
421 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
422 
423 	frame.sf_handler = catcher;
424 	frame.sf_sig = sig;
425 
426 	bsd_to_linux_sigset(mask, &lmask);
427 
428 	/*
429 	 * Build the signal context to be used by sigreturn.
430 	 */
431 	frame.sf_sc.sc_mask   = lmask.__bits[0];
432 	frame.sf_sc.sc_gs     = rgs();
433 	frame.sf_sc.sc_fs     = regs->tf_fs;
434 	frame.sf_sc.sc_es     = regs->tf_es;
435 	frame.sf_sc.sc_ds     = regs->tf_ds;
436 	frame.sf_sc.sc_edi    = regs->tf_edi;
437 	frame.sf_sc.sc_esi    = regs->tf_esi;
438 	frame.sf_sc.sc_ebp    = regs->tf_ebp;
439 	frame.sf_sc.sc_ebx    = regs->tf_ebx;
440 	frame.sf_sc.sc_edx    = regs->tf_edx;
441 	frame.sf_sc.sc_ecx    = regs->tf_ecx;
442 	frame.sf_sc.sc_eax    = regs->tf_eax;
443 	frame.sf_sc.sc_eip    = regs->tf_eip;
444 	frame.sf_sc.sc_cs     = regs->tf_cs;
445 	frame.sf_sc.sc_eflags = regs->tf_eflags;
446 	frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
447 	frame.sf_sc.sc_ss     = regs->tf_ss;
448 	frame.sf_sc.sc_err    = regs->tf_err;
449 	frame.sf_sc.sc_trapno = code;	/* XXX ???? */
450 
451 	bzero(&frame.sf_fpstate, sizeof(struct linux_fpstate));
452 
453 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
454 		frame.sf_extramask[i] = lmask.__bits[i+1];
455 
456 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
457 		/*
458 		 * Process has trashed its stack; give it an illegal
459 		 * instruction to halt it in its tracks.
460 		 */
461 		PROC_LOCK(p);
462 		sigexit(p, SIGILL);
463 		/* NOTREACHED */
464 	}
465 
466 	/*
467 	 * Build context to run handler in.
468 	 */
469 	regs->tf_esp = (int)fp;
470 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
471 	regs->tf_eflags &= ~PSL_VM;
472 	regs->tf_cs = _ucodesel;
473 	regs->tf_ds = _udatasel;
474 	regs->tf_es = _udatasel;
475 	regs->tf_fs = _udatasel;
476 	regs->tf_ss = _udatasel;
477 }
478 
479 /*
480  * System call to cleanup state after a signal
481  * has been taken.  Reset signal mask and
482  * stack state from context left by sendsig (above).
483  * Return to previous pc and psl as specified by
484  * context left by sendsig. Check carefully to
485  * make sure that the user has not modified the
486  * psl to gain improper privileges or to cause
487  * a machine fault.
488  */
489 int
490 linux_sigreturn(p, args)
491 	struct proc *p;
492 	struct linux_sigreturn_args *args;
493 {
494 	struct linux_sigframe frame;
495 	register struct trapframe *regs;
496 	linux_sigset_t lmask;
497 	int eflags, i;
498 
499 	regs = p->p_frame;
500 
501 #ifdef DEBUG
502 	if (ldebug(sigreturn))
503 		printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
504 #endif
505 	/*
506 	 * The trampoline code hands us the sigframe.
507 	 * It is unsafe to keep track of it ourselves, in the event that a
508 	 * program jumps out of a signal handler.
509 	 */
510 	if (copyin((caddr_t)args->sfp, &frame, sizeof(frame)) != 0)
511 		return (EFAULT);
512 
513 	/*
514 	 * Check for security violations.
515 	 */
516 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
517 	eflags = frame.sf_sc.sc_eflags;
518 	/*
519 	 * XXX do allow users to change the privileged flag PSL_RF.  The
520 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
521 	 * sometimes set it there too.  tf_eflags is kept in the signal
522 	 * context during signal handling and there is no other place
523 	 * to remember it, so the PSL_RF bit may be corrupted by the
524 	 * signal handler without us knowing.  Corruption of the PSL_RF
525 	 * bit at worst causes one more or one less debugger trap, so
526 	 * allowing it is fairly harmless.
527 	 */
528 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
529     		return(EINVAL);
530 	}
531 
532 	/*
533 	 * Don't allow users to load a valid privileged %cs.  Let the
534 	 * hardware check for invalid selectors, excess privilege in
535 	 * other selectors, invalid %eip's and invalid %esp's.
536 	 */
537 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
538 	if (!CS_SECURE(frame.sf_sc.sc_cs)) {
539 		trapsignal(p, SIGBUS, T_PROTFLT);
540 		return(EINVAL);
541 	}
542 
543 	lmask.__bits[0] = frame.sf_sc.sc_mask;
544 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
545 		lmask.__bits[i+1] = frame.sf_extramask[i];
546 	PROC_LOCK(p);
547 	linux_to_bsd_sigset(&lmask, &p->p_sigmask);
548 	SIG_CANTMASK(p->p_sigmask);
549 	PROC_UNLOCK(p);
550 
551 	/*
552 	 * Restore signal context.
553 	 */
554 	/* %gs was restored by the trampoline. */
555 	regs->tf_fs     = frame.sf_sc.sc_fs;
556 	regs->tf_es     = frame.sf_sc.sc_es;
557 	regs->tf_ds     = frame.sf_sc.sc_ds;
558 	regs->tf_edi    = frame.sf_sc.sc_edi;
559 	regs->tf_esi    = frame.sf_sc.sc_esi;
560 	regs->tf_ebp    = frame.sf_sc.sc_ebp;
561 	regs->tf_ebx    = frame.sf_sc.sc_ebx;
562 	regs->tf_edx    = frame.sf_sc.sc_edx;
563 	regs->tf_ecx    = frame.sf_sc.sc_ecx;
564 	regs->tf_eax    = frame.sf_sc.sc_eax;
565 	regs->tf_eip    = frame.sf_sc.sc_eip;
566 	regs->tf_cs     = frame.sf_sc.sc_cs;
567 	regs->tf_eflags = eflags;
568 	regs->tf_esp    = frame.sf_sc.sc_esp_at_signal;
569 	regs->tf_ss     = frame.sf_sc.sc_ss;
570 
571 	return (EJUSTRETURN);
572 }
573 
574 /*
575  * System call to cleanup state after a signal
576  * has been taken.  Reset signal mask and
577  * stack state from context left by rt_sendsig (above).
578  * Return to previous pc and psl as specified by
579  * context left by sendsig. Check carefully to
580  * make sure that the user has not modified the
581  * psl to gain improper privileges or to cause
582  * a machine fault.
583  */
584 int
585 linux_rt_sigreturn(p, args)
586 	struct proc *p;
587 	struct linux_rt_sigreturn_args *args;
588 {
589 	struct sigaltstack_args sasargs;
590 	struct linux_ucontext 	 uc;
591 	struct linux_sigcontext *context;
592 	linux_stack_t *lss;
593 	stack_t *ss;
594 	register struct trapframe *regs;
595 	int eflags;
596 	caddr_t sg = stackgap_init();
597 
598 	regs = p->p_frame;
599 
600 #ifdef DEBUG
601 	if (ldebug(rt_sigreturn))
602 		printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
603 #endif
604 	/*
605 	 * The trampoline code hands us the ucontext.
606 	 * It is unsafe to keep track of it ourselves, in the event that a
607 	 * program jumps out of a signal handler.
608 	 */
609 	if (copyin((caddr_t)args->ucp, &uc, sizeof(uc)) != 0)
610 		return (EFAULT);
611 
612 	context = &uc.uc_mcontext;
613 
614 	/*
615 	 * Check for security violations.
616 	 */
617 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
618 	eflags = context->sc_eflags;
619 	/*
620 	 * XXX do allow users to change the privileged flag PSL_RF.  The
621 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
622 	 * sometimes set it there too.  tf_eflags is kept in the signal
623 	 * context during signal handling and there is no other place
624 	 * to remember it, so the PSL_RF bit may be corrupted by the
625 	 * signal handler without us knowing.  Corruption of the PSL_RF
626 	 * bit at worst causes one more or one less debugger trap, so
627 	 * allowing it is fairly harmless.
628 	 */
629 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
630     		return(EINVAL);
631 	}
632 
633 	/*
634 	 * Don't allow users to load a valid privileged %cs.  Let the
635 	 * hardware check for invalid selectors, excess privilege in
636 	 * other selectors, invalid %eip's and invalid %esp's.
637 	 */
638 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
639 	if (!CS_SECURE(context->sc_cs)) {
640 		trapsignal(p, SIGBUS, T_PROTFLT);
641 		return(EINVAL);
642 	}
643 
644 	PROC_LOCK(p);
645 	linux_to_bsd_sigset(&uc.uc_sigmask, &p->p_sigmask);
646 	SIG_CANTMASK(p->p_sigmask);
647 	PROC_UNLOCK(p);
648 
649 	/*
650 	 * Restore signal context
651 	 */
652 	/* %gs was restored by the trampoline. */
653 	regs->tf_fs     = context->sc_fs;
654 	regs->tf_es     = context->sc_es;
655 	regs->tf_ds     = context->sc_ds;
656 	regs->tf_edi    = context->sc_edi;
657 	regs->tf_esi    = context->sc_esi;
658 	regs->tf_ebp    = context->sc_ebp;
659 	regs->tf_ebx    = context->sc_ebx;
660 	regs->tf_edx    = context->sc_edx;
661 	regs->tf_ecx    = context->sc_ecx;
662 	regs->tf_eax    = context->sc_eax;
663 	regs->tf_eip    = context->sc_eip;
664 	regs->tf_cs     = context->sc_cs;
665 	regs->tf_eflags = eflags;
666 	regs->tf_esp    = context->sc_esp_at_signal;
667 	regs->tf_ss     = context->sc_ss;
668 
669 	/*
670 	 * call sigaltstack & ignore results..
671 	 */
672 	ss = stackgap_alloc(&sg, sizeof(stack_t));
673 	lss = &uc.uc_stack;
674 	ss->ss_sp = lss->ss_sp;
675 	ss->ss_size = lss->ss_size;
676 	ss->ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
677 
678 #ifdef DEBUG
679 	if (ldebug(rt_sigreturn))
680 		printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
681 		    ss->ss_flags, ss->ss_sp, ss->ss_size, context->sc_mask);
682 #endif
683 	sasargs.ss = ss;
684 	sasargs.oss = NULL;
685 	(void) sigaltstack(p, &sasargs);
686 
687 	return (EJUSTRETURN);
688 }
689 
690 static void
691 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
692 {
693 	args[0] = tf->tf_ebx;
694 	args[1] = tf->tf_ecx;
695 	args[2] = tf->tf_edx;
696 	args[3] = tf->tf_esi;
697 	args[4] = tf->tf_edi;
698 	*params = NULL;		/* no copyin */
699 }
700 
701 /*
702  * If a linux binary is exec'ing something, try this image activator
703  * first.  We override standard shell script execution in order to
704  * be able to modify the interpreter path.  We only do this if a linux
705  * binary is doing the exec, so we do not create an EXEC module for it.
706  */
707 static int	exec_linux_imgact_try __P((struct image_params *iparams));
708 
709 static int
710 exec_linux_imgact_try(imgp)
711     struct image_params *imgp;
712 {
713     const char *head = (const char *)imgp->image_header;
714     int error = -1;
715 
716     /*
717      * The interpreter for shell scripts run from a linux binary needs
718      * to be located in /compat/linux if possible in order to recursively
719      * maintain linux path emulation.
720      */
721     if (((const short *)head)[0] == SHELLMAGIC) {
722 	    /*
723 	     * Run our normal shell image activator.  If it succeeds attempt
724 	     * to use the alternate path for the interpreter.  If an alternate
725 	     * path is found, use our stringspace to store it.
726 	     */
727 	    if ((error = exec_shell_imgact(imgp)) == 0) {
728 		    char *rpath = NULL;
729 
730 		    linux_emul_find(imgp->proc, NULL, linux_emul_path,
731 			imgp->interpreter_name, &rpath, 0);
732 		    if (rpath != imgp->interpreter_name) {
733 			    int len = strlen(rpath) + 1;
734 
735 			    if (len <= MAXSHELLCMDLEN) {
736 				memcpy(imgp->interpreter_name, rpath, len);
737 			    }
738 			    free(rpath, M_TEMP);
739 		    }
740 	    }
741     }
742     return(error);
743 }
744 
745 struct sysentvec linux_sysvec = {
746 	LINUX_SYS_MAXSYSCALL,
747 	linux_sysent,
748 	0xff,
749 	LINUX_SIGTBLSZ,
750 	bsd_to_linux_signal,
751 	ELAST + 1,
752 	bsd_to_linux_errno,
753 	translate_traps,
754 	linux_fixup,
755 	linux_sendsig,
756 	linux_sigcode,
757 	&linux_szsigcode,
758 	linux_prepsyscall,
759 	"Linux a.out",
760 	aout_coredump,
761 	exec_linux_imgact_try,
762 	LINUX_MINSIGSTKSZ
763 };
764 
765 struct sysentvec elf_linux_sysvec = {
766 	LINUX_SYS_MAXSYSCALL,
767 	linux_sysent,
768 	0xff,
769 	LINUX_SIGTBLSZ,
770 	bsd_to_linux_signal,
771 	ELAST + 1,
772 	bsd_to_linux_errno,
773 	translate_traps,
774 	elf_linux_fixup,
775 	linux_sendsig,
776 	linux_sigcode,
777 	&linux_szsigcode,
778 	linux_prepsyscall,
779 	"Linux ELF",
780 	elf_coredump,
781 	exec_linux_imgact_try,
782 	LINUX_MINSIGSTKSZ
783 };
784 
785 static Elf32_Brandinfo linux_brand = {
786 					ELFOSABI_LINUX,
787 					"Linux",
788 					"/compat/linux",
789 					"/lib/ld-linux.so.1",
790 					&elf_linux_sysvec
791 				 };
792 
793 static Elf32_Brandinfo linux_glibc2brand = {
794 					ELFOSABI_LINUX,
795 					"Linux",
796 					"/compat/linux",
797 					"/lib/ld-linux.so.2",
798 					&elf_linux_sysvec
799 				 };
800 
801 Elf32_Brandinfo *linux_brandlist[] = {
802 					&linux_brand,
803 					&linux_glibc2brand,
804 					NULL
805 				};
806 
807 static int
808 linux_elf_modevent(module_t mod, int type, void *data)
809 {
810 	Elf32_Brandinfo **brandinfo;
811 	int error;
812 	struct linux_ioctl_handler **lihp;
813 
814 	error = 0;
815 
816 	switch(type) {
817 	case MOD_LOAD:
818 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
819 		     ++brandinfo)
820 			if (elf_insert_brand_entry(*brandinfo) < 0)
821 				error = EINVAL;
822 		if (error == 0) {
823 			SET_FOREACH(lihp, linux_ioctl_handler_set)
824 				linux_ioctl_register_handler(*lihp);
825 			if (bootverbose)
826 				printf("Linux ELF exec handler installed\n");
827 		} else
828 			printf("cannot insert Linux ELF brand handler\n");
829 		break;
830 	case MOD_UNLOAD:
831 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
832 		     ++brandinfo)
833 			if (elf_brand_inuse(*brandinfo))
834 				error = EBUSY;
835 		if (error == 0) {
836 			for (brandinfo = &linux_brandlist[0];
837 			     *brandinfo != NULL; ++brandinfo)
838 				if (elf_remove_brand_entry(*brandinfo) < 0)
839 					error = EINVAL;
840 		}
841 		if (error == 0) {
842 			SET_FOREACH(lihp, linux_ioctl_handler_set)
843 				linux_ioctl_unregister_handler(*lihp);
844 			if (bootverbose)
845 				printf("Linux ELF exec handler removed\n");
846 		} else
847 			printf("Could not deinstall ELF interpreter entry\n");
848 		break;
849 	default:
850 		break;
851 	}
852 	return error;
853 }
854 
855 static moduledata_t linux_elf_mod = {
856 	"linuxelf",
857 	linux_elf_modevent,
858 	0
859 };
860 
861 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
862