xref: /freebsd/sys/i386/linux/linux_sysvec.c (revision 77a0943ded95b9e6438f7db70c4a28e4d93946d4)
1 /*-
2  * Copyright (c) 1994-1996 S�ren Schmidt
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software withough specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30 
31 /* XXX we use functions that might not exist. */
32 #include "opt_compat.h"
33 
34 #ifndef COMPAT_43
35 #error "Unable to compile Linux-emulator due to missing COMPAT_43 option!"
36 #endif
37 
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/proc.h>
41 #include <sys/sysproto.h>
42 #include <sys/sysent.h>
43 #include <sys/imgact.h>
44 #include <sys/imgact_aout.h>
45 #include <sys/imgact_elf.h>
46 #include <sys/signalvar.h>
47 #include <sys/malloc.h>
48 #include <vm/vm.h>
49 #include <vm/vm_param.h>
50 #include <vm/vm_page.h>
51 #include <vm/vm_extern.h>
52 #include <sys/exec.h>
53 #include <sys/kernel.h>
54 #include <sys/module.h>
55 #include <machine/cpu.h>
56 
57 #include <i386/linux/linux.h>
58 #include <i386/linux/linux_proto.h>
59 #include <compat/linux/linux_signal.h>
60 #include <compat/linux/linux_util.h>
61 
62 MODULE_VERSION(linux, 1);
63 
64 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
65 
66 #if BYTE_ORDER == LITTLE_ENDIAN
67 #define SHELLMAGIC      0x2123 /* #! */
68 #else
69 #define SHELLMAGIC      0x2321
70 #endif
71 
72 extern char linux_sigcode[];
73 extern int linux_szsigcode;
74 
75 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
76 
77 extern struct linker_set linux_ioctl_handler_set;
78 
79 static int	linux_fixup __P((register_t **stack_base,
80 				 struct image_params *iparams));
81 static int	elf_linux_fixup __P((register_t **stack_base,
82 				     struct image_params *iparams));
83 static void	linux_prepsyscall __P((struct trapframe *tf, int *args,
84 				       u_int *code, caddr_t *params));
85 static void     linux_sendsig __P((sig_t catcher, int sig, sigset_t *mask,
86 				   u_long code));
87 
88 /*
89  * Linux syscalls return negative errno's, we do positive and map them
90  */
91 static int bsd_to_linux_errno[ELAST + 1] = {
92   	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
93  	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
94  	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
95  	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
96  	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
97 	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
98 	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
99 	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
100   	-6, -6, -43, -42, -75, -6, -84
101 };
102 
103 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
104 	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
105 	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
106 	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, 0,
107 	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
108 	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
109 	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
110 	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
111 	0, LINUX_SIGUSR1, LINUX_SIGUSR2
112 };
113 
114 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
115 	SIGHUP, SIGINT, SIGQUIT, SIGILL,
116 	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
117 	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
118 	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
119 	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
120 	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
121 	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
122 	SIGIO, SIGURG, 0
123 };
124 
125 /*
126  * If FreeBSD & Linux have a difference of opinion about what a trap
127  * means, deal with it here.
128  */
129 static int
130 translate_traps(int signal, int trap_code)
131 {
132 	if (signal != SIGBUS)
133 		return signal;
134 	switch (trap_code) {
135 	case T_PROTFLT:
136 	case T_TSSFLT:
137 	case T_DOUBLEFLT:
138 	case T_PAGEFLT:
139 		return SIGSEGV;
140 	default:
141 		return signal;
142 	}
143 }
144 
145 static int
146 linux_fixup(register_t **stack_base, struct image_params *imgp)
147 {
148 	register_t *argv, *envp;
149 
150 	argv = *stack_base;
151 	envp = *stack_base + (imgp->argc + 1);
152 	(*stack_base)--;
153 	**stack_base = (intptr_t)(void *)envp;
154 	(*stack_base)--;
155 	**stack_base = (intptr_t)(void *)argv;
156 	(*stack_base)--;
157 	**stack_base = imgp->argc;
158 	return 0;
159 }
160 
161 static int
162 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
163 {
164 	Elf32_Auxargs *args = (Elf32_Auxargs *)imgp->auxargs;
165 	register_t *pos;
166 
167 	pos = *stack_base + (imgp->argc + imgp->envc + 2);
168 
169 	if (args->trace) {
170 		AUXARGS_ENTRY(pos, AT_DEBUG, 1);
171 	}
172 	if (args->execfd != -1) {
173 		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
174 	}
175 	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
176 	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
177 	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
178 	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
179 	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
180 	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
181 	AUXARGS_ENTRY(pos, AT_BASE, args->base);
182 	AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_cred->p_ruid);
183 	AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_cred->p_svuid);
184 	AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_cred->p_rgid);
185 	AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_cred->p_svgid);
186 	AUXARGS_ENTRY(pos, AT_NULL, 0);
187 
188 	free(imgp->auxargs, M_TEMP);
189 	imgp->auxargs = NULL;
190 
191 	(*stack_base)--;
192 	**stack_base = (long)imgp->argc;
193 	return 0;
194 }
195 
196 extern int _ucodesel, _udatasel;
197 extern unsigned long _linux_sznonrtsigcode;
198 
199 static void
200 linux_rt_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
201 {
202 	register struct proc *p = curproc;
203 	register struct trapframe *regs;
204 	struct linux_rt_sigframe *fp, frame;
205 	int oonstack;
206 
207 	regs = p->p_md.md_regs;
208 	oonstack = p->p_sigstk.ss_flags & SS_ONSTACK;
209 
210 #ifdef DEBUG
211 	printf("Linux-emul(%ld): linux_rt_sendsig(%p, %d, %p, %lu)\n",
212 	    (long)p->p_pid, catcher, sig, (void*)mask, code);
213 #endif
214 	/*
215 	 * Allocate space for the signal handler context.
216 	 */
217 	if ((p->p_flag & P_ALTSTACK) && !oonstack &&
218 	    SIGISMEMBER(p->p_sigacts->ps_sigonstack, sig)) {
219 		fp = (struct linux_rt_sigframe *)(p->p_sigstk.ss_sp +
220 		    p->p_sigstk.ss_size - sizeof(struct linux_rt_sigframe));
221 		p->p_sigstk.ss_flags |= SS_ONSTACK;
222 	} else {
223 		fp = (struct linux_rt_sigframe *)regs->tf_esp - 1;
224 	}
225 
226 	/*
227 	 * grow() will return FALSE if the fp will not fit inside the stack
228 	 *	and the stack can not be grown. useracc will return FALSE
229 	 *	if access is denied.
230 	 */
231 	if ((grow_stack (p, (int)fp) == FALSE) ||
232 	    !useracc((caddr_t)fp, sizeof (struct linux_rt_sigframe),
233 	    VM_PROT_WRITE)) {
234 		/*
235 		 * Process has trashed its stack; give it an illegal
236 		 * instruction to halt it in its tracks.
237 		 */
238 		SIGACTION(p, SIGILL) = SIG_DFL;
239 		SIGDELSET(p->p_sigignore, SIGILL);
240 		SIGDELSET(p->p_sigcatch, SIGILL);
241 		SIGDELSET(p->p_sigmask, SIGILL);
242 #ifdef DEBUG
243 		printf("Linux-emul(%ld): linux_rt_sendsig -- bad stack %p, SS_ONSTACK: 0x%x ",
244 	    (long)p->p_pid, fp, p->p_sigstk.ss_flags & SS_ONSTACK);
245 #endif
246 		psignal(p, SIGILL);
247 		return;
248 	}
249 
250 	/*
251 	 * Build the argument list for the signal handler.
252 	 */
253 	if (p->p_sysent->sv_sigtbl)
254 		if (sig <= p->p_sysent->sv_sigsize)
255 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
256 
257 	frame.sf_handler = catcher;
258 	frame.sf_sig = sig;
259 	frame.sf_siginfo = &fp->sf_si;
260 	frame.sf_ucontext = &fp->sf_sc;
261 
262 	/* Fill siginfo structure. */
263 	frame.sf_si.lsi_signo = sig;
264 	frame.sf_si.lsi_code = code;
265 	frame.sf_si.lsi_addr = (void *)regs->tf_err;
266 
267 	/*
268 	 * Build the signal context to be used by sigreturn.
269 	 */
270 	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
271 	frame.sf_sc.uc_link = NULL;		/* XXX ??? */
272 
273 	frame.sf_sc.uc_stack.ss_sp = p->p_sigstk.ss_sp;
274 	frame.sf_sc.uc_stack.ss_flags =
275 	    bsd_to_linux_sigaltstack(p->p_sigstk.ss_flags);
276 	frame.sf_sc.uc_stack.ss_size = p->p_sigstk.ss_size;
277 
278 	bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
279 
280 	frame.sf_sc.uc_mcontext.sc_mask   = frame.sf_sc.uc_sigmask.__bits[0];
281 	frame.sf_sc.uc_mcontext.sc_gs     = rgs();
282 	frame.sf_sc.uc_mcontext.sc_fs     = regs->tf_fs;
283 	frame.sf_sc.uc_mcontext.sc_es     = regs->tf_es;
284 	frame.sf_sc.uc_mcontext.sc_ds     = regs->tf_ds;
285 	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_edi;
286 	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_esi;
287 	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_ebp;
288 	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_ebx;
289 	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_edx;
290 	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_ecx;
291 	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_eax;
292 	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_eip;
293 	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
294 	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
295 	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
296 	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
297 	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
298 	frame.sf_sc.uc_mcontext.sc_trapno = code;	/* XXX ???? */
299 
300 #ifdef DEBUG
301 	printf("Linux-emul(%ld): rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x\n",
302 	    (long)p->p_pid, frame.sf_sc.uc_stack.ss_flags,  p->p_sigstk.ss_sp,
303 	    p->p_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
304 #endif
305 
306 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
307 		/*
308 		 * Process has trashed its stack; give it an illegal
309 		 * instruction to halt it in its tracks.
310 		 */
311 		sigexit(p, SIGILL);
312 		/* NOTREACHED */
313 	}
314 
315 	/*
316 	 * Build context to run handler in.
317 	 */
318 	regs->tf_esp = (int)fp;
319 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
320 	    _linux_sznonrtsigcode;
321 	regs->tf_eflags &= ~PSL_VM;
322 	regs->tf_cs = _ucodesel;
323 	regs->tf_ds = _udatasel;
324 	regs->tf_es = _udatasel;
325 	regs->tf_fs = _udatasel;
326 	load_gs(_udatasel);
327 	regs->tf_ss = _udatasel;
328 }
329 
330 
331 /*
332  * Send an interrupt to process.
333  *
334  * Stack is set up to allow sigcode stored
335  * in u. to call routine, followed by kcall
336  * to sigreturn routine below.  After sigreturn
337  * resets the signal mask, the stack, and the
338  * frame pointer, it returns to the user
339  * specified pc, psl.
340  */
341 
342 static void
343 linux_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
344 {
345 	register struct proc *p = curproc;
346 	register struct trapframe *regs;
347 	struct linux_sigframe *fp, frame;
348 	linux_sigset_t lmask;
349 	int oonstack, i;
350 
351 	if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) {
352 		/* Signal handler installed with SA_SIGINFO. */
353 		linux_rt_sendsig(catcher, sig, mask, code);
354 		return;
355 	}
356 
357 	regs = p->p_md.md_regs;
358 	oonstack = p->p_sigstk.ss_flags & SS_ONSTACK;
359 
360 #ifdef DEBUG
361 	printf("Linux-emul(%ld): linux_sendsig(%p, %d, %p, %lu)\n",
362 	    (long)p->p_pid, catcher, sig, (void*)mask, code);
363 #endif
364 
365 	/*
366 	 * Allocate space for the signal handler context.
367 	 */
368 	if ((p->p_flag & P_ALTSTACK) && !oonstack &&
369 	    SIGISMEMBER(p->p_sigacts->ps_sigonstack, sig)) {
370 		fp = (struct linux_sigframe *)(p->p_sigstk.ss_sp +
371 		    p->p_sigstk.ss_size - sizeof(struct linux_sigframe));
372 		p->p_sigstk.ss_flags |= SS_ONSTACK;
373 	} else {
374 		fp = (struct linux_sigframe *)regs->tf_esp - 1;
375 	}
376 
377 	/*
378 	 * grow() will return FALSE if the fp will not fit inside the stack
379 	 *	and the stack can not be grown. useracc will return FALSE
380 	 *	if access is denied.
381 	 */
382 	if ((grow_stack (p, (int)fp) == FALSE) ||
383 	    !useracc((caddr_t)fp, sizeof (struct linux_sigframe),
384 	    VM_PROT_WRITE)) {
385 		/*
386 		 * Process has trashed its stack; give it an illegal
387 		 * instruction to halt it in its tracks.
388 		 */
389 		SIGACTION(p, SIGILL) = SIG_DFL;
390 		SIGDELSET(p->p_sigignore, SIGILL);
391 		SIGDELSET(p->p_sigcatch, SIGILL);
392 		SIGDELSET(p->p_sigmask, SIGILL);
393 		psignal(p, SIGILL);
394 		return;
395 	}
396 
397 	/*
398 	 * Build the argument list for the signal handler.
399 	 */
400 	if (p->p_sysent->sv_sigtbl)
401 		if (sig <= p->p_sysent->sv_sigsize)
402 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
403 
404 	frame.sf_handler = catcher;
405 	frame.sf_sig = sig;
406 
407 	bsd_to_linux_sigset(mask, &lmask);
408 
409 	/*
410 	 * Build the signal context to be used by sigreturn.
411 	 */
412 	frame.sf_sc.sc_mask   = lmask.__bits[0];
413 	frame.sf_sc.sc_gs     = rgs();
414 	frame.sf_sc.sc_fs     = regs->tf_fs;
415 	frame.sf_sc.sc_es     = regs->tf_es;
416 	frame.sf_sc.sc_ds     = regs->tf_ds;
417 	frame.sf_sc.sc_edi    = regs->tf_edi;
418 	frame.sf_sc.sc_esi    = regs->tf_esi;
419 	frame.sf_sc.sc_ebp    = regs->tf_ebp;
420 	frame.sf_sc.sc_ebx    = regs->tf_ebx;
421 	frame.sf_sc.sc_edx    = regs->tf_edx;
422 	frame.sf_sc.sc_ecx    = regs->tf_ecx;
423 	frame.sf_sc.sc_eax    = regs->tf_eax;
424 	frame.sf_sc.sc_eip    = regs->tf_eip;
425 	frame.sf_sc.sc_cs     = regs->tf_cs;
426 	frame.sf_sc.sc_eflags = regs->tf_eflags;
427 	frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
428 	frame.sf_sc.sc_ss     = regs->tf_ss;
429 	frame.sf_sc.sc_err    = regs->tf_err;
430 	frame.sf_sc.sc_trapno = code;	/* XXX ???? */
431 
432 	bzero(&frame.sf_fpstate, sizeof(struct linux_fpstate));
433 
434 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
435 		frame.sf_extramask[i] = lmask.__bits[i+1];
436 
437 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
438 		/*
439 		 * Process has trashed its stack; give it an illegal
440 		 * instruction to halt it in its tracks.
441 		 */
442 		sigexit(p, SIGILL);
443 		/* NOTREACHED */
444 	}
445 
446 	/*
447 	 * Build context to run handler in.
448 	 */
449 	regs->tf_esp = (int)fp;
450 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
451 	regs->tf_eflags &= ~PSL_VM;
452 	regs->tf_cs = _ucodesel;
453 	regs->tf_ds = _udatasel;
454 	regs->tf_es = _udatasel;
455 	regs->tf_fs = _udatasel;
456 	load_gs(_udatasel);
457 	regs->tf_ss = _udatasel;
458 }
459 
460 /*
461  * System call to cleanup state after a signal
462  * has been taken.  Reset signal mask and
463  * stack state from context left by sendsig (above).
464  * Return to previous pc and psl as specified by
465  * context left by sendsig. Check carefully to
466  * make sure that the user has not modified the
467  * psl to gain improper privileges or to cause
468  * a machine fault.
469  */
470 int
471 linux_sigreturn(p, args)
472 	struct proc *p;
473 	struct linux_sigreturn_args *args;
474 {
475 	struct linux_sigframe frame;
476 	register struct trapframe *regs;
477 	linux_sigset_t lmask;
478 	int eflags, i;
479 
480 	regs = p->p_md.md_regs;
481 
482 #ifdef DEBUG
483 	printf("Linux-emul(%ld): linux_sigreturn(%p)\n",
484 	    (long)p->p_pid, (void *)args->sfp);
485 #endif
486 	/*
487 	 * The trampoline code hands us the sigframe.
488 	 * It is unsafe to keep track of it ourselves, in the event that a
489 	 * program jumps out of a signal handler.
490 	 */
491 	if (copyin((caddr_t)args->sfp, &frame, sizeof(frame)) != 0)
492 		return (EFAULT);
493 
494 	/*
495 	 * Check for security violations.
496 	 */
497 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
498 	eflags = frame.sf_sc.sc_eflags;
499 	/*
500 	 * XXX do allow users to change the privileged flag PSL_RF.  The
501 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
502 	 * sometimes set it there too.  tf_eflags is kept in the signal
503 	 * context during signal handling and there is no other place
504 	 * to remember it, so the PSL_RF bit may be corrupted by the
505 	 * signal handler without us knowing.  Corruption of the PSL_RF
506 	 * bit at worst causes one more or one less debugger trap, so
507 	 * allowing it is fairly harmless.
508 	 */
509 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
510     		return(EINVAL);
511 	}
512 
513 	/*
514 	 * Don't allow users to load a valid privileged %cs.  Let the
515 	 * hardware check for invalid selectors, excess privilege in
516 	 * other selectors, invalid %eip's and invalid %esp's.
517 	 */
518 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
519 	if (!CS_SECURE(frame.sf_sc.sc_cs)) {
520 		trapsignal(p, SIGBUS, T_PROTFLT);
521 		return(EINVAL);
522 	}
523 
524 	p->p_sigstk.ss_flags &= ~SS_ONSTACK;
525 	lmask.__bits[0] = frame.sf_sc.sc_mask;
526 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
527 		lmask.__bits[i+1] = frame.sf_extramask[i];
528 	linux_to_bsd_sigset(&lmask, &p->p_sigmask);
529 	SIG_CANTMASK(p->p_sigmask);
530 
531 	/*
532 	 * Restore signal context.
533 	 */
534 	/* %gs was restored by the trampoline. */
535 	regs->tf_fs     = frame.sf_sc.sc_fs;
536 	regs->tf_es     = frame.sf_sc.sc_es;
537 	regs->tf_ds     = frame.sf_sc.sc_ds;
538 	regs->tf_edi    = frame.sf_sc.sc_edi;
539 	regs->tf_esi    = frame.sf_sc.sc_esi;
540 	regs->tf_ebp    = frame.sf_sc.sc_ebp;
541 	regs->tf_ebx    = frame.sf_sc.sc_ebx;
542 	regs->tf_edx    = frame.sf_sc.sc_edx;
543 	regs->tf_ecx    = frame.sf_sc.sc_ecx;
544 	regs->tf_eax    = frame.sf_sc.sc_eax;
545 	regs->tf_eip    = frame.sf_sc.sc_eip;
546 	regs->tf_cs     = frame.sf_sc.sc_cs;
547 	regs->tf_eflags = eflags;
548 	regs->tf_esp    = frame.sf_sc.sc_esp_at_signal;
549 	regs->tf_ss     = frame.sf_sc.sc_ss;
550 
551 	return (EJUSTRETURN);
552 }
553 
554 /*
555  * System call to cleanup state after a signal
556  * has been taken.  Reset signal mask and
557  * stack state from context left by rt_sendsig (above).
558  * Return to previous pc and psl as specified by
559  * context left by sendsig. Check carefully to
560  * make sure that the user has not modified the
561  * psl to gain improper privileges or to cause
562  * a machine fault.
563  */
564 int
565 linux_rt_sigreturn(p, args)
566 	struct proc *p;
567 	struct linux_rt_sigreturn_args *args;
568 {
569 	struct sigaltstack_args sasargs;
570 	struct linux_ucontext 	 uc;
571 	struct linux_sigcontext *context;
572 	linux_stack_t *lss;
573 	stack_t *ss;
574 	register struct trapframe *regs;
575 	int eflags;
576 	caddr_t sg = stackgap_init();
577 
578 	regs = p->p_md.md_regs;
579 
580 #ifdef DEBUG
581 	printf("Linux-emul(%ld): linux_rt_sigreturn(%p)\n",
582 	    (long)p->p_pid, (void *)args->ucp);
583 #endif
584 	/*
585 	 * The trampoline code hands us the ucontext.
586 	 * It is unsafe to keep track of it ourselves, in the event that a
587 	 * program jumps out of a signal handler.
588 	 */
589 	if (copyin((caddr_t)args->ucp, &uc, sizeof(uc)) != 0)
590 		return (EFAULT);
591 
592 	context = &uc.uc_mcontext;
593 
594 	/*
595 	 * Check for security violations.
596 	 */
597 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
598 	eflags = context->sc_eflags;
599 	/*
600 	 * XXX do allow users to change the privileged flag PSL_RF.  The
601 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
602 	 * sometimes set it there too.  tf_eflags is kept in the signal
603 	 * context during signal handling and there is no other place
604 	 * to remember it, so the PSL_RF bit may be corrupted by the
605 	 * signal handler without us knowing.  Corruption of the PSL_RF
606 	 * bit at worst causes one more or one less debugger trap, so
607 	 * allowing it is fairly harmless.
608 	 */
609 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
610     		return(EINVAL);
611 	}
612 
613 	/*
614 	 * Don't allow users to load a valid privileged %cs.  Let the
615 	 * hardware check for invalid selectors, excess privilege in
616 	 * other selectors, invalid %eip's and invalid %esp's.
617 	 */
618 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
619 	if (!CS_SECURE(context->sc_cs)) {
620 		trapsignal(p, SIGBUS, T_PROTFLT);
621 		return(EINVAL);
622 	}
623 
624 	p->p_sigstk.ss_flags &= ~SS_ONSTACK;
625 	linux_to_bsd_sigset(&uc.uc_sigmask, &p->p_sigmask);
626 	SIG_CANTMASK(p->p_sigmask);
627 
628 	/*
629 	 * Restore signal context
630 	 */
631 	/* %gs was restored by the trampoline. */
632 	regs->tf_fs     = context->sc_fs;
633 	regs->tf_es     = context->sc_es;
634 	regs->tf_ds     = context->sc_ds;
635 	regs->tf_edi    = context->sc_edi;
636 	regs->tf_esi    = context->sc_esi;
637 	regs->tf_ebp    = context->sc_ebp;
638 	regs->tf_ebx    = context->sc_ebx;
639 	regs->tf_edx    = context->sc_edx;
640 	regs->tf_ecx    = context->sc_ecx;
641 	regs->tf_eax    = context->sc_eax;
642 	regs->tf_eip    = context->sc_eip;
643 	regs->tf_cs     = context->sc_cs;
644 	regs->tf_eflags = eflags;
645 	regs->tf_esp    = context->sc_esp_at_signal;
646 	regs->tf_ss     = context->sc_ss;
647 
648 	/*
649 	 * call sigaltstack & ignore results..
650 	 */
651 	ss = stackgap_alloc(&sg, sizeof(stack_t));
652 	lss = &uc.uc_stack;
653 	ss->ss_sp = lss->ss_sp;
654 	ss->ss_size = lss->ss_size;
655 	ss->ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
656 
657 #ifdef DEBUG
658 	printf("Linux-emul(%ld): rt_sigret  flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x\n",
659 	    (long)p->p_pid, ss->ss_flags, ss->ss_sp, ss->ss_size, context->sc_mask);
660 #endif
661 	sasargs.ss = ss;
662 	sasargs.oss = NULL;
663 	(void) sigaltstack(p, &sasargs);
664 
665 	return (EJUSTRETURN);
666 }
667 
668 static void
669 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
670 {
671 	args[0] = tf->tf_ebx;
672 	args[1] = tf->tf_ecx;
673 	args[2] = tf->tf_edx;
674 	args[3] = tf->tf_esi;
675 	args[4] = tf->tf_edi;
676 	*params = NULL;		/* no copyin */
677 }
678 
679 /*
680  * If a linux binary is exec'ing something, try this image activator
681  * first.  We override standard shell script execution in order to
682  * be able to modify the interpreter path.  We only do this if a linux
683  * binary is doing the exec, so we do not create an EXEC module for it.
684  */
685 static int	exec_linux_imgact_try __P((struct image_params *iparams));
686 
687 static int
688 exec_linux_imgact_try(imgp)
689     struct image_params *imgp;
690 {
691     const char *head = (const char *)imgp->image_header;
692     int error = -1;
693 
694     /*
695      * The interpreter for shell scripts run from a linux binary needs
696      * to be located in /compat/linux if possible in order to recursively
697      * maintain linux path emulation.
698      */
699     if (((const short *)head)[0] == SHELLMAGIC) {
700 	    /*
701 	     * Run our normal shell image activator.  If it succeeds attempt
702 	     * to use the alternate path for the interpreter.  If an alternate
703 	     * path is found, use our stringspace to store it.
704 	     */
705 	    if ((error = exec_shell_imgact(imgp)) == 0) {
706 		    char *rpath = NULL;
707 
708 		    linux_emul_find(imgp->proc, NULL, linux_emul_path,
709 			imgp->interpreter_name, &rpath, 0);
710 		    if (rpath != imgp->interpreter_name) {
711 			    int len = strlen(rpath) + 1;
712 
713 			    if (len <= MAXSHELLCMDLEN) {
714 				memcpy(imgp->interpreter_name, rpath, len);
715 			    }
716 			    free(rpath, M_TEMP);
717 		    }
718 	    }
719     }
720     return(error);
721 }
722 
723 struct sysentvec linux_sysvec = {
724 	LINUX_SYS_MAXSYSCALL,
725 	linux_sysent,
726 	0xff,
727 	LINUX_SIGTBLSZ,
728 	bsd_to_linux_signal,
729 	ELAST + 1,
730 	bsd_to_linux_errno,
731 	translate_traps,
732 	linux_fixup,
733 	linux_sendsig,
734 	linux_sigcode,
735 	&linux_szsigcode,
736 	linux_prepsyscall,
737 	"Linux a.out",
738 	aout_coredump,
739 	exec_linux_imgact_try,
740 	LINUX_MINSIGSTKSZ
741 };
742 
743 struct sysentvec elf_linux_sysvec = {
744 	LINUX_SYS_MAXSYSCALL,
745 	linux_sysent,
746 	0xff,
747 	LINUX_SIGTBLSZ,
748 	bsd_to_linux_signal,
749 	ELAST + 1,
750 	bsd_to_linux_errno,
751 	translate_traps,
752 	elf_linux_fixup,
753 	linux_sendsig,
754 	linux_sigcode,
755 	&linux_szsigcode,
756 	linux_prepsyscall,
757 	"Linux ELF",
758 	elf_coredump,
759 	exec_linux_imgact_try,
760 	LINUX_MINSIGSTKSZ
761 };
762 
763 static Elf32_Brandinfo linux_brand = {
764 					ELFOSABI_LINUX,
765 					"/compat/linux",
766 					"/lib/ld-linux.so.1",
767 					&elf_linux_sysvec
768 				 };
769 
770 static Elf32_Brandinfo linux_glibc2brand = {
771 					ELFOSABI_LINUX,
772 					"/compat/linux",
773 					"/lib/ld-linux.so.2",
774 					&elf_linux_sysvec
775 				 };
776 
777 Elf32_Brandinfo *linux_brandlist[] = {
778 					&linux_brand,
779 					&linux_glibc2brand,
780 					NULL
781 				};
782 
783 static int
784 linux_elf_modevent(module_t mod, int type, void *data)
785 {
786 	Elf32_Brandinfo **brandinfo;
787 	int error;
788 
789 	error = 0;
790 
791 	switch(type) {
792 	case MOD_LOAD:
793 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
794 		     ++brandinfo)
795 			if (elf_insert_brand_entry(*brandinfo) < 0)
796 				error = EINVAL;
797 		if (error == 0) {
798 			linux_ioctl_register_handlers(
799 				&linux_ioctl_handler_set);
800 			if (bootverbose)
801 				printf("Linux ELF exec handler installed\n");
802 		} else
803 			printf("cannot insert Linux ELF brand handler\n");
804 		break;
805 	case MOD_UNLOAD:
806 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
807 		     ++brandinfo)
808 			if (elf_brand_inuse(*brandinfo))
809 				error = EBUSY;
810 		if (error == 0) {
811 			for (brandinfo = &linux_brandlist[0];
812 			     *brandinfo != NULL; ++brandinfo)
813 				if (elf_remove_brand_entry(*brandinfo) < 0)
814 					error = EINVAL;
815 		}
816 		if (error == 0) {
817 			linux_ioctl_unregister_handlers(
818 				&linux_ioctl_handler_set);
819 			if (bootverbose)
820 				printf("Linux ELF exec handler removed\n");
821 		} else
822 			printf("Could not deinstall ELF interpreter entry\n");
823 		break;
824 	default:
825 		break;
826 	}
827 	return error;
828 }
829 
830 static moduledata_t linux_elf_mod = {
831 	"linuxelf",
832 	linux_elf_modevent,
833 	0
834 };
835 
836 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
837