xref: /freebsd/sys/i386/linux/linux_sysvec.c (revision 9849949cae0603df0485a0be8a3f80fb8f68f304)
1 /*-
2  * Copyright (c) 1994-1996 S�ren Schmidt
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software withough specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30 
31 /* XXX we use functions that might not exist. */
32 #include "opt_compat.h"
33 
34 #ifndef COMPAT_43
35 #error "Unable to compile Linux-emulator due to missing COMPAT_43 option!"
36 #endif
37 
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/imgact.h>
41 #include <sys/imgact_aout.h>
42 #include <sys/imgact_elf.h>
43 #include <sys/malloc.h>
44 #include <sys/mutex.h>
45 #include <sys/proc.h>
46 #include <sys/signalvar.h>
47 #include <sys/sysent.h>
48 #include <sys/sysproto.h>
49 
50 #include <vm/vm.h>
51 #include <vm/vm_param.h>
52 #include <vm/vm_page.h>
53 #include <vm/vm_extern.h>
54 #include <sys/exec.h>
55 #include <sys/kernel.h>
56 #include <sys/module.h>
57 #include <machine/cpu.h>
58 #include <sys/lock.h>
59 #include <sys/mutex.h>
60 
61 #include <i386/linux/linux.h>
62 #include <i386/linux/linux_proto.h>
63 #include <compat/linux/linux_signal.h>
64 #include <compat/linux/linux_util.h>
65 
66 MODULE_VERSION(linux, 1);
67 MODULE_DEPEND(linux, sysvmsg, 1, 1, 1);
68 MODULE_DEPEND(linux, sysvsem, 1, 1, 1);
69 MODULE_DEPEND(linux, sysvshm, 1, 1, 1);
70 
71 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
72 
73 #if BYTE_ORDER == LITTLE_ENDIAN
74 #define SHELLMAGIC      0x2123 /* #! */
75 #else
76 #define SHELLMAGIC      0x2321
77 #endif
78 
79 /*
80  * Allow the sendsig functions to use the ldebug() facility
81  * even though they are not syscalls themselves. Map them
82  * to syscall 0. This is slightly less bogus than using
83  * ldebug(sigreturn).
84  */
85 #define	LINUX_SYS_linux_rt_sendsig	0
86 #define	LINUX_SYS_linux_sendsig		0
87 
88 extern char linux_sigcode[];
89 extern int linux_szsigcode;
90 
91 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
92 
93 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
94 
95 static int	linux_fixup __P((register_t **stack_base,
96 				 struct image_params *iparams));
97 static int	elf_linux_fixup __P((register_t **stack_base,
98 				     struct image_params *iparams));
99 static void	linux_prepsyscall __P((struct trapframe *tf, int *args,
100 				       u_int *code, caddr_t *params));
101 static void     linux_sendsig __P((sig_t catcher, int sig, sigset_t *mask,
102 				   u_long code));
103 
104 /*
105  * Linux syscalls return negative errno's, we do positive and map them
106  */
107 static int bsd_to_linux_errno[ELAST + 1] = {
108   	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
109  	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
110  	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
111  	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
112  	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
113 	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
114 	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
115 	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
116   	-6, -6, -43, -42, -75, -6, -84
117 };
118 
119 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
120 	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
121 	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
122 	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, 0,
123 	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
124 	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
125 	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
126 	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
127 	0, LINUX_SIGUSR1, LINUX_SIGUSR2
128 };
129 
130 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
131 	SIGHUP, SIGINT, SIGQUIT, SIGILL,
132 	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
133 	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
134 	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
135 	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
136 	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
137 	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
138 	SIGIO, SIGURG, 0
139 };
140 
141 /*
142  * If FreeBSD & Linux have a difference of opinion about what a trap
143  * means, deal with it here.
144  *
145  * MPSAFE
146  */
147 static int
148 translate_traps(int signal, int trap_code)
149 {
150 	if (signal != SIGBUS)
151 		return signal;
152 	switch (trap_code) {
153 	case T_PROTFLT:
154 	case T_TSSFLT:
155 	case T_DOUBLEFLT:
156 	case T_PAGEFLT:
157 		return SIGSEGV;
158 	default:
159 		return signal;
160 	}
161 }
162 
163 static int
164 linux_fixup(register_t **stack_base, struct image_params *imgp)
165 {
166 	register_t *argv, *envp;
167 
168 	argv = *stack_base;
169 	envp = *stack_base + (imgp->argc + 1);
170 	(*stack_base)--;
171 	**stack_base = (intptr_t)(void *)envp;
172 	(*stack_base)--;
173 	**stack_base = (intptr_t)(void *)argv;
174 	(*stack_base)--;
175 	**stack_base = imgp->argc;
176 	return 0;
177 }
178 
179 static int
180 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
181 {
182 	Elf32_Auxargs *args = (Elf32_Auxargs *)imgp->auxargs;
183 	register_t *pos;
184 
185 	pos = *stack_base + (imgp->argc + imgp->envc + 2);
186 
187 	if (args->trace) {
188 		AUXARGS_ENTRY(pos, AT_DEBUG, 1);
189 	}
190 	if (args->execfd != -1) {
191 		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
192 	}
193 	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
194 	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
195 	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
196 	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
197 	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
198 	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
199 	AUXARGS_ENTRY(pos, AT_BASE, args->base);
200 	PROC_LOCK(imgp->proc);
201 	AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
202 	AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
203 	AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
204 	AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
205 	PROC_UNLOCK(imgp->proc);
206 	AUXARGS_ENTRY(pos, AT_NULL, 0);
207 
208 	free(imgp->auxargs, M_TEMP);
209 	imgp->auxargs = NULL;
210 
211 	(*stack_base)--;
212 	**stack_base = (long)imgp->argc;
213 	return 0;
214 }
215 
216 extern int _ucodesel, _udatasel;
217 extern unsigned long linux_sznonrtsigcode;
218 
219 static void
220 linux_rt_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
221 {
222 	register struct thread *td = curthread;
223 	register struct proc *p = td->td_proc;
224 	register struct trapframe *regs;
225 	struct l_rt_sigframe *fp, frame;
226 	int oonstack;
227 
228 	PROC_LOCK_ASSERT(p, MA_OWNED);
229 	regs = td->td_frame;
230 	oonstack = sigonstack(regs->tf_esp);
231 
232 #ifdef DEBUG
233 	if (ldebug(rt_sendsig))
234 		printf(ARGS(rt_sendsig, "%p, %d, %p, %lu"),
235 		    catcher, sig, (void*)mask, code);
236 #endif
237 	/*
238 	 * Allocate space for the signal handler context.
239 	 */
240 	if ((p->p_flag & P_ALTSTACK) && !oonstack &&
241 	    SIGISMEMBER(p->p_sigacts->ps_sigonstack, sig)) {
242 		fp = (struct l_rt_sigframe *)(p->p_sigstk.ss_sp +
243 		    p->p_sigstk.ss_size - sizeof(struct l_rt_sigframe));
244 	} else
245 		fp = (struct l_rt_sigframe *)regs->tf_esp - 1;
246 	PROC_UNLOCK(p);
247 
248 	/*
249 	 * grow() will return FALSE if the fp will not fit inside the stack
250 	 *	and the stack can not be grown. useracc will return FALSE
251 	 *	if access is denied.
252 	 */
253 	if ((grow_stack (p, (int)fp) == FALSE) ||
254 	    !useracc((caddr_t)fp, sizeof (struct l_rt_sigframe),
255 	    VM_PROT_WRITE)) {
256 		/*
257 		 * Process has trashed its stack; give it an illegal
258 		 * instruction to halt it in its tracks.
259 		 */
260 		PROC_LOCK(p);
261 		SIGACTION(p, SIGILL) = SIG_DFL;
262 		SIGDELSET(p->p_sigignore, SIGILL);
263 		SIGDELSET(p->p_sigcatch, SIGILL);
264 		SIGDELSET(p->p_sigmask, SIGILL);
265 #ifdef DEBUG
266 		if (ldebug(rt_sendsig))
267 			printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
268 			    fp, oonstack);
269 #endif
270 		psignal(p, SIGILL);
271 		return;
272 	}
273 
274 	/*
275 	 * Build the argument list for the signal handler.
276 	 */
277 	if (p->p_sysent->sv_sigtbl)
278 		if (sig <= p->p_sysent->sv_sigsize)
279 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
280 
281 	frame.sf_handler = catcher;
282 	frame.sf_sig = sig;
283 	frame.sf_siginfo = &fp->sf_si;
284 	frame.sf_ucontext = &fp->sf_sc;
285 
286 	/* Fill siginfo structure. */
287 	frame.sf_si.lsi_signo = sig;
288 	frame.sf_si.lsi_code = code;
289 	frame.sf_si.lsi_addr = (void *)regs->tf_err;
290 
291 	/*
292 	 * Build the signal context to be used by sigreturn.
293 	 */
294 	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
295 	frame.sf_sc.uc_link = NULL;		/* XXX ??? */
296 
297 	PROC_LOCK(p);
298 	frame.sf_sc.uc_stack.ss_sp = p->p_sigstk.ss_sp;
299 	frame.sf_sc.uc_stack.ss_size = p->p_sigstk.ss_size;
300 	frame.sf_sc.uc_stack.ss_flags = (p->p_flag & P_ALTSTACK)
301 	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
302 	PROC_UNLOCK(p);
303 
304 	bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
305 
306 	frame.sf_sc.uc_mcontext.sc_mask   = frame.sf_sc.uc_sigmask.__bits[0];
307 	frame.sf_sc.uc_mcontext.sc_gs     = rgs();
308 	frame.sf_sc.uc_mcontext.sc_fs     = regs->tf_fs;
309 	frame.sf_sc.uc_mcontext.sc_es     = regs->tf_es;
310 	frame.sf_sc.uc_mcontext.sc_ds     = regs->tf_ds;
311 	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_edi;
312 	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_esi;
313 	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_ebp;
314 	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_ebx;
315 	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_edx;
316 	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_ecx;
317 	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_eax;
318 	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_eip;
319 	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
320 	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
321 	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
322 	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
323 	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
324 	frame.sf_sc.uc_mcontext.sc_trapno = code;	/* XXX ???? */
325 
326 #ifdef DEBUG
327 	if (ldebug(rt_sendsig))
328 		printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
329 		    frame.sf_sc.uc_stack.ss_flags, p->p_sigstk.ss_sp,
330 		    p->p_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
331 #endif
332 
333 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
334 		/*
335 		 * Process has trashed its stack; give it an illegal
336 		 * instruction to halt it in its tracks.
337 		 */
338 		PROC_LOCK(p);
339 		sigexit(td, SIGILL);
340 		/* NOTREACHED */
341 	}
342 
343 	/*
344 	 * Build context to run handler in.
345 	 */
346 	regs->tf_esp = (int)fp;
347 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
348 	    linux_sznonrtsigcode;
349 	regs->tf_eflags &= ~PSL_VM;
350 	regs->tf_cs = _ucodesel;
351 	regs->tf_ds = _udatasel;
352 	regs->tf_es = _udatasel;
353 	regs->tf_fs = _udatasel;
354 	regs->tf_ss = _udatasel;
355 	PROC_LOCK(p);
356 }
357 
358 
359 /*
360  * Send an interrupt to process.
361  *
362  * Stack is set up to allow sigcode stored
363  * in u. to call routine, followed by kcall
364  * to sigreturn routine below.  After sigreturn
365  * resets the signal mask, the stack, and the
366  * frame pointer, it returns to the user
367  * specified pc, psl.
368  */
369 
370 static void
371 linux_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
372 {
373 	register struct thread *td = curthread;
374 	register struct proc *p = td->td_proc;
375 	register struct trapframe *regs;
376 	struct l_sigframe *fp, frame;
377 	l_sigset_t lmask;
378 	int oonstack, i;
379 
380 	if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) {
381 		/* Signal handler installed with SA_SIGINFO. */
382 		linux_rt_sendsig(catcher, sig, mask, code);
383 		return;
384 	}
385 
386 	regs = td->td_frame;
387 	oonstack = sigonstack(regs->tf_esp);
388 
389 #ifdef DEBUG
390 	if (ldebug(sendsig))
391 		printf(ARGS(sendsig, "%p, %d, %p, %lu"),
392 		    catcher, sig, (void*)mask, code);
393 #endif
394 
395 	/*
396 	 * Allocate space for the signal handler context.
397 	 */
398 	if ((p->p_flag & P_ALTSTACK) && !oonstack &&
399 	    SIGISMEMBER(p->p_sigacts->ps_sigonstack, sig)) {
400 		fp = (struct l_sigframe *)(p->p_sigstk.ss_sp +
401 		    p->p_sigstk.ss_size - sizeof(struct l_sigframe));
402 	} else
403 		fp = (struct l_sigframe *)regs->tf_esp - 1;
404 	PROC_UNLOCK(p);
405 
406 	/*
407 	 * grow() will return FALSE if the fp will not fit inside the stack
408 	 *	and the stack can not be grown. useracc will return FALSE
409 	 *	if access is denied.
410 	 */
411 	if ((grow_stack (p, (int)fp) == FALSE) ||
412 	    !useracc((caddr_t)fp, sizeof (struct l_sigframe),
413 	    VM_PROT_WRITE)) {
414 		/*
415 		 * Process has trashed its stack; give it an illegal
416 		 * instruction to halt it in its tracks.
417 		 */
418 		PROC_LOCK(p);
419 		SIGACTION(p, SIGILL) = SIG_DFL;
420 		SIGDELSET(p->p_sigignore, SIGILL);
421 		SIGDELSET(p->p_sigcatch, SIGILL);
422 		SIGDELSET(p->p_sigmask, SIGILL);
423 		psignal(p, SIGILL);
424 		return;
425 	}
426 
427 	/*
428 	 * Build the argument list for the signal handler.
429 	 */
430 	if (p->p_sysent->sv_sigtbl)
431 		if (sig <= p->p_sysent->sv_sigsize)
432 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
433 
434 	frame.sf_handler = catcher;
435 	frame.sf_sig = sig;
436 
437 	bsd_to_linux_sigset(mask, &lmask);
438 
439 	/*
440 	 * Build the signal context to be used by sigreturn.
441 	 */
442 	frame.sf_sc.sc_mask   = lmask.__bits[0];
443 	frame.sf_sc.sc_gs     = rgs();
444 	frame.sf_sc.sc_fs     = regs->tf_fs;
445 	frame.sf_sc.sc_es     = regs->tf_es;
446 	frame.sf_sc.sc_ds     = regs->tf_ds;
447 	frame.sf_sc.sc_edi    = regs->tf_edi;
448 	frame.sf_sc.sc_esi    = regs->tf_esi;
449 	frame.sf_sc.sc_ebp    = regs->tf_ebp;
450 	frame.sf_sc.sc_ebx    = regs->tf_ebx;
451 	frame.sf_sc.sc_edx    = regs->tf_edx;
452 	frame.sf_sc.sc_ecx    = regs->tf_ecx;
453 	frame.sf_sc.sc_eax    = regs->tf_eax;
454 	frame.sf_sc.sc_eip    = regs->tf_eip;
455 	frame.sf_sc.sc_cs     = regs->tf_cs;
456 	frame.sf_sc.sc_eflags = regs->tf_eflags;
457 	frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
458 	frame.sf_sc.sc_ss     = regs->tf_ss;
459 	frame.sf_sc.sc_err    = regs->tf_err;
460 	frame.sf_sc.sc_trapno = code;	/* XXX ???? */
461 
462 	bzero(&frame.sf_fpstate, sizeof(struct l_fpstate));
463 
464 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
465 		frame.sf_extramask[i] = lmask.__bits[i+1];
466 
467 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
468 		/*
469 		 * Process has trashed its stack; give it an illegal
470 		 * instruction to halt it in its tracks.
471 		 */
472 		PROC_LOCK(p);
473 		sigexit(td, SIGILL);
474 		/* NOTREACHED */
475 	}
476 
477 	/*
478 	 * Build context to run handler in.
479 	 */
480 	regs->tf_esp = (int)fp;
481 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
482 	regs->tf_eflags &= ~PSL_VM;
483 	regs->tf_cs = _ucodesel;
484 	regs->tf_ds = _udatasel;
485 	regs->tf_es = _udatasel;
486 	regs->tf_fs = _udatasel;
487 	regs->tf_ss = _udatasel;
488 	PROC_LOCK(p);
489 }
490 
491 /*
492  * System call to cleanup state after a signal
493  * has been taken.  Reset signal mask and
494  * stack state from context left by sendsig (above).
495  * Return to previous pc and psl as specified by
496  * context left by sendsig. Check carefully to
497  * make sure that the user has not modified the
498  * psl to gain improper privileges or to cause
499  * a machine fault.
500  */
501 int
502 linux_sigreturn(td, args)
503 	struct thread *td;
504 	struct linux_sigreturn_args *args;
505 {
506 	struct proc *p = td->td_proc;
507 	struct l_sigframe frame;
508 	register struct trapframe *regs;
509 	l_sigset_t lmask;
510 	int eflags, i;
511 
512 	regs = td->td_frame;
513 
514 #ifdef DEBUG
515 	if (ldebug(sigreturn))
516 		printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
517 #endif
518 	/*
519 	 * The trampoline code hands us the sigframe.
520 	 * It is unsafe to keep track of it ourselves, in the event that a
521 	 * program jumps out of a signal handler.
522 	 */
523 	if (copyin((caddr_t)args->sfp, &frame, sizeof(frame)) != 0)
524 		return (EFAULT);
525 
526 	/*
527 	 * Check for security violations.
528 	 */
529 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
530 	eflags = frame.sf_sc.sc_eflags;
531 	/*
532 	 * XXX do allow users to change the privileged flag PSL_RF.  The
533 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
534 	 * sometimes set it there too.  tf_eflags is kept in the signal
535 	 * context during signal handling and there is no other place
536 	 * to remember it, so the PSL_RF bit may be corrupted by the
537 	 * signal handler without us knowing.  Corruption of the PSL_RF
538 	 * bit at worst causes one more or one less debugger trap, so
539 	 * allowing it is fairly harmless.
540 	 */
541 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
542     		return(EINVAL);
543 	}
544 
545 	/*
546 	 * Don't allow users to load a valid privileged %cs.  Let the
547 	 * hardware check for invalid selectors, excess privilege in
548 	 * other selectors, invalid %eip's and invalid %esp's.
549 	 */
550 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
551 	if (!CS_SECURE(frame.sf_sc.sc_cs)) {
552 		trapsignal(p, SIGBUS, T_PROTFLT);
553 		return(EINVAL);
554 	}
555 
556 	lmask.__bits[0] = frame.sf_sc.sc_mask;
557 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
558 		lmask.__bits[i+1] = frame.sf_extramask[i];
559 	PROC_LOCK(p);
560 	linux_to_bsd_sigset(&lmask, &p->p_sigmask);
561 	SIG_CANTMASK(p->p_sigmask);
562 	PROC_UNLOCK(p);
563 
564 	/*
565 	 * Restore signal context.
566 	 */
567 	/* %gs was restored by the trampoline. */
568 	regs->tf_fs     = frame.sf_sc.sc_fs;
569 	regs->tf_es     = frame.sf_sc.sc_es;
570 	regs->tf_ds     = frame.sf_sc.sc_ds;
571 	regs->tf_edi    = frame.sf_sc.sc_edi;
572 	regs->tf_esi    = frame.sf_sc.sc_esi;
573 	regs->tf_ebp    = frame.sf_sc.sc_ebp;
574 	regs->tf_ebx    = frame.sf_sc.sc_ebx;
575 	regs->tf_edx    = frame.sf_sc.sc_edx;
576 	regs->tf_ecx    = frame.sf_sc.sc_ecx;
577 	regs->tf_eax    = frame.sf_sc.sc_eax;
578 	regs->tf_eip    = frame.sf_sc.sc_eip;
579 	regs->tf_cs     = frame.sf_sc.sc_cs;
580 	regs->tf_eflags = eflags;
581 	regs->tf_esp    = frame.sf_sc.sc_esp_at_signal;
582 	regs->tf_ss     = frame.sf_sc.sc_ss;
583 
584 	return (EJUSTRETURN);
585 }
586 
587 /*
588  * System call to cleanup state after a signal
589  * has been taken.  Reset signal mask and
590  * stack state from context left by rt_sendsig (above).
591  * Return to previous pc and psl as specified by
592  * context left by sendsig. Check carefully to
593  * make sure that the user has not modified the
594  * psl to gain improper privileges or to cause
595  * a machine fault.
596  */
597 int
598 linux_rt_sigreturn(td, args)
599 	struct thread *td;
600 	struct linux_rt_sigreturn_args *args;
601 {
602 	struct proc *p = td->td_proc;
603 	struct sigaltstack_args sasargs;
604 	struct l_ucontext uc;
605 	struct l_sigcontext *context;
606 	l_stack_t *lss;
607 	stack_t *ss;
608 	register struct trapframe *regs;
609 	int eflags;
610 	caddr_t sg = stackgap_init();
611 
612 	regs = td->td_frame;
613 
614 #ifdef DEBUG
615 	if (ldebug(rt_sigreturn))
616 		printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
617 #endif
618 	/*
619 	 * The trampoline code hands us the ucontext.
620 	 * It is unsafe to keep track of it ourselves, in the event that a
621 	 * program jumps out of a signal handler.
622 	 */
623 	if (copyin((caddr_t)args->ucp, &uc, sizeof(uc)) != 0)
624 		return (EFAULT);
625 
626 	context = &uc.uc_mcontext;
627 
628 	/*
629 	 * Check for security violations.
630 	 */
631 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
632 	eflags = context->sc_eflags;
633 	/*
634 	 * XXX do allow users to change the privileged flag PSL_RF.  The
635 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
636 	 * sometimes set it there too.  tf_eflags is kept in the signal
637 	 * context during signal handling and there is no other place
638 	 * to remember it, so the PSL_RF bit may be corrupted by the
639 	 * signal handler without us knowing.  Corruption of the PSL_RF
640 	 * bit at worst causes one more or one less debugger trap, so
641 	 * allowing it is fairly harmless.
642 	 */
643 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
644     		return(EINVAL);
645 	}
646 
647 	/*
648 	 * Don't allow users to load a valid privileged %cs.  Let the
649 	 * hardware check for invalid selectors, excess privilege in
650 	 * other selectors, invalid %eip's and invalid %esp's.
651 	 */
652 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
653 	if (!CS_SECURE(context->sc_cs)) {
654 		trapsignal(p, SIGBUS, T_PROTFLT);
655 		return(EINVAL);
656 	}
657 
658 	PROC_LOCK(p);
659 	linux_to_bsd_sigset(&uc.uc_sigmask, &p->p_sigmask);
660 	SIG_CANTMASK(p->p_sigmask);
661 	PROC_UNLOCK(p);
662 
663 	/*
664 	 * Restore signal context
665 	 */
666 	/* %gs was restored by the trampoline. */
667 	regs->tf_fs     = context->sc_fs;
668 	regs->tf_es     = context->sc_es;
669 	regs->tf_ds     = context->sc_ds;
670 	regs->tf_edi    = context->sc_edi;
671 	regs->tf_esi    = context->sc_esi;
672 	regs->tf_ebp    = context->sc_ebp;
673 	regs->tf_ebx    = context->sc_ebx;
674 	regs->tf_edx    = context->sc_edx;
675 	regs->tf_ecx    = context->sc_ecx;
676 	regs->tf_eax    = context->sc_eax;
677 	regs->tf_eip    = context->sc_eip;
678 	regs->tf_cs     = context->sc_cs;
679 	regs->tf_eflags = eflags;
680 	regs->tf_esp    = context->sc_esp_at_signal;
681 	regs->tf_ss     = context->sc_ss;
682 
683 	/*
684 	 * call sigaltstack & ignore results..
685 	 */
686 	ss = stackgap_alloc(&sg, sizeof(stack_t));
687 	lss = &uc.uc_stack;
688 	ss->ss_sp = lss->ss_sp;
689 	ss->ss_size = lss->ss_size;
690 	ss->ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
691 
692 #ifdef DEBUG
693 	if (ldebug(rt_sigreturn))
694 		printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
695 		    ss->ss_flags, ss->ss_sp, ss->ss_size, context->sc_mask);
696 #endif
697 	sasargs.ss = ss;
698 	sasargs.oss = NULL;
699 	(void) sigaltstack(td, &sasargs);
700 
701 	return (EJUSTRETURN);
702 }
703 
704 /*
705  * MPSAFE
706  */
707 static void
708 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
709 {
710 	args[0] = tf->tf_ebx;
711 	args[1] = tf->tf_ecx;
712 	args[2] = tf->tf_edx;
713 	args[3] = tf->tf_esi;
714 	args[4] = tf->tf_edi;
715 	*params = NULL;		/* no copyin */
716 }
717 
718 /*
719  * If a linux binary is exec'ing something, try this image activator
720  * first.  We override standard shell script execution in order to
721  * be able to modify the interpreter path.  We only do this if a linux
722  * binary is doing the exec, so we do not create an EXEC module for it.
723  */
724 static int	exec_linux_imgact_try __P((struct image_params *iparams));
725 
726 static int
727 exec_linux_imgact_try(imgp)
728     struct image_params *imgp;
729 {
730     const char *head = (const char *)imgp->image_header;
731     int error = -1;
732 
733     /*
734      * The interpreter for shell scripts run from a linux binary needs
735      * to be located in /compat/linux if possible in order to recursively
736      * maintain linux path emulation.
737      */
738     if (((const short *)head)[0] == SHELLMAGIC) {
739 	    /*
740 	     * Run our normal shell image activator.  If it succeeds attempt
741 	     * to use the alternate path for the interpreter.  If an alternate
742 	     * path is found, use our stringspace to store it.
743 	     */
744 	    if ((error = exec_shell_imgact(imgp)) == 0) {
745 		    char *rpath = NULL;
746 
747 		    linux_emul_find(&imgp->proc->p_thread, NULL, linux_emul_path,
748 			imgp->interpreter_name, &rpath, 0);
749 		    if (rpath != imgp->interpreter_name) {
750 			    int len = strlen(rpath) + 1;
751 
752 			    if (len <= MAXSHELLCMDLEN) {
753 				memcpy(imgp->interpreter_name, rpath, len);
754 			    }
755 			    free(rpath, M_TEMP);
756 		    }
757 	    }
758     }
759     return(error);
760 }
761 
762 struct sysentvec linux_sysvec = {
763 	LINUX_SYS_MAXSYSCALL,
764 	linux_sysent,
765 	0xff,
766 	LINUX_SIGTBLSZ,
767 	bsd_to_linux_signal,
768 	ELAST + 1,
769 	bsd_to_linux_errno,
770 	translate_traps,
771 	linux_fixup,
772 	linux_sendsig,
773 	linux_sigcode,
774 	&linux_szsigcode,
775 	linux_prepsyscall,
776 	"Linux a.out",
777 	aout_coredump,
778 	exec_linux_imgact_try,
779 	LINUX_MINSIGSTKSZ
780 };
781 
782 struct sysentvec elf_linux_sysvec = {
783 	LINUX_SYS_MAXSYSCALL,
784 	linux_sysent,
785 	0xff,
786 	LINUX_SIGTBLSZ,
787 	bsd_to_linux_signal,
788 	ELAST + 1,
789 	bsd_to_linux_errno,
790 	translate_traps,
791 	elf_linux_fixup,
792 	linux_sendsig,
793 	linux_sigcode,
794 	&linux_szsigcode,
795 	linux_prepsyscall,
796 	"Linux ELF",
797 	elf_coredump,
798 	exec_linux_imgact_try,
799 	LINUX_MINSIGSTKSZ
800 };
801 
802 static Elf32_Brandinfo linux_brand = {
803 					ELFOSABI_LINUX,
804 					"Linux",
805 					"/compat/linux",
806 					"/lib/ld-linux.so.1",
807 					&elf_linux_sysvec
808 				 };
809 
810 static Elf32_Brandinfo linux_glibc2brand = {
811 					ELFOSABI_LINUX,
812 					"Linux",
813 					"/compat/linux",
814 					"/lib/ld-linux.so.2",
815 					&elf_linux_sysvec
816 				 };
817 
818 Elf32_Brandinfo *linux_brandlist[] = {
819 					&linux_brand,
820 					&linux_glibc2brand,
821 					NULL
822 				};
823 
824 static int
825 linux_elf_modevent(module_t mod, int type, void *data)
826 {
827 	Elf32_Brandinfo **brandinfo;
828 	int error;
829 	struct linux_ioctl_handler **lihp;
830 
831 	error = 0;
832 
833 	switch(type) {
834 	case MOD_LOAD:
835 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
836 		     ++brandinfo)
837 			if (elf_insert_brand_entry(*brandinfo) < 0)
838 				error = EINVAL;
839 		if (error == 0) {
840 			SET_FOREACH(lihp, linux_ioctl_handler_set)
841 				linux_ioctl_register_handler(*lihp);
842 			if (bootverbose)
843 				printf("Linux ELF exec handler installed\n");
844 		} else
845 			printf("cannot insert Linux ELF brand handler\n");
846 		break;
847 	case MOD_UNLOAD:
848 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
849 		     ++brandinfo)
850 			if (elf_brand_inuse(*brandinfo))
851 				error = EBUSY;
852 		if (error == 0) {
853 			for (brandinfo = &linux_brandlist[0];
854 			     *brandinfo != NULL; ++brandinfo)
855 				if (elf_remove_brand_entry(*brandinfo) < 0)
856 					error = EINVAL;
857 		}
858 		if (error == 0) {
859 			SET_FOREACH(lihp, linux_ioctl_handler_set)
860 				linux_ioctl_unregister_handler(*lihp);
861 			if (bootverbose)
862 				printf("Linux ELF exec handler removed\n");
863 		} else
864 			printf("Could not deinstall ELF interpreter entry\n");
865 		break;
866 	default:
867 		break;
868 	}
869 	return error;
870 }
871 
872 static moduledata_t linux_elf_mod = {
873 	"linuxelf",
874 	linux_elf_modevent,
875 	0
876 };
877 
878 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
879