xref: /freebsd/sys/i386/linux/linux_sysvec.c (revision 5069714534cba67f1985e6dfe23b145178372b5f)
1 /*-
2  * Copyright (c) 1994-1996 S�ren Schmidt
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software withough specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30 
31 /* XXX we use functions that might not exist. */
32 #include "opt_compat.h"
33 
34 #ifndef COMPAT_43
35 #error "Unable to compile Linux-emulator due to missing COMPAT_43 option!"
36 #endif
37 
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/proc.h>
41 #include <sys/sysproto.h>
42 #include <sys/sysent.h>
43 #include <sys/imgact.h>
44 #include <sys/imgact_aout.h>
45 #include <sys/imgact_elf.h>
46 #include <sys/signalvar.h>
47 #include <sys/malloc.h>
48 #include <vm/vm.h>
49 #include <vm/vm_param.h>
50 #include <vm/vm_page.h>
51 #include <vm/vm_extern.h>
52 #include <sys/exec.h>
53 #include <sys/kernel.h>
54 #include <sys/module.h>
55 #include <machine/cpu.h>
56 
57 #include <i386/linux/linux.h>
58 #include <i386/linux/linux_proto.h>
59 #include <compat/linux/linux_signal.h>
60 #include <compat/linux/linux_util.h>
61 
62 MODULE_VERSION(linux, 1);
63 MODULE_DEPEND(linux, sysvmsg, 1, 1, 1);
64 MODULE_DEPEND(linux, sysvsem, 1, 1, 1);
65 MODULE_DEPEND(linux, sysvshm, 1, 1, 1);
66 
67 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
68 
69 #if BYTE_ORDER == LITTLE_ENDIAN
70 #define SHELLMAGIC      0x2123 /* #! */
71 #else
72 #define SHELLMAGIC      0x2321
73 #endif
74 
75 extern char linux_sigcode[];
76 extern int linux_szsigcode;
77 
78 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
79 
80 extern struct linker_set linux_ioctl_handler_set;
81 
82 static int	linux_fixup __P((register_t **stack_base,
83 				 struct image_params *iparams));
84 static int	elf_linux_fixup __P((register_t **stack_base,
85 				     struct image_params *iparams));
86 static void	linux_prepsyscall __P((struct trapframe *tf, int *args,
87 				       u_int *code, caddr_t *params));
88 static void     linux_sendsig __P((sig_t catcher, int sig, sigset_t *mask,
89 				   u_long code));
90 
91 /*
92  * Linux syscalls return negative errno's, we do positive and map them
93  */
94 static int bsd_to_linux_errno[ELAST + 1] = {
95   	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
96  	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
97  	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
98  	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
99  	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
100 	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
101 	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
102 	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
103   	-6, -6, -43, -42, -75, -6, -84
104 };
105 
106 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
107 	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
108 	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
109 	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, 0,
110 	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
111 	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
112 	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
113 	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
114 	0, LINUX_SIGUSR1, LINUX_SIGUSR2
115 };
116 
117 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
118 	SIGHUP, SIGINT, SIGQUIT, SIGILL,
119 	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
120 	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
121 	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
122 	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
123 	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
124 	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
125 	SIGIO, SIGURG, 0
126 };
127 
128 /*
129  * If FreeBSD & Linux have a difference of opinion about what a trap
130  * means, deal with it here.
131  */
132 static int
133 translate_traps(int signal, int trap_code)
134 {
135 	if (signal != SIGBUS)
136 		return signal;
137 	switch (trap_code) {
138 	case T_PROTFLT:
139 	case T_TSSFLT:
140 	case T_DOUBLEFLT:
141 	case T_PAGEFLT:
142 		return SIGSEGV;
143 	default:
144 		return signal;
145 	}
146 }
147 
148 static int
149 linux_fixup(register_t **stack_base, struct image_params *imgp)
150 {
151 	register_t *argv, *envp;
152 
153 	argv = *stack_base;
154 	envp = *stack_base + (imgp->argc + 1);
155 	(*stack_base)--;
156 	**stack_base = (intptr_t)(void *)envp;
157 	(*stack_base)--;
158 	**stack_base = (intptr_t)(void *)argv;
159 	(*stack_base)--;
160 	**stack_base = imgp->argc;
161 	return 0;
162 }
163 
164 static int
165 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
166 {
167 	Elf32_Auxargs *args = (Elf32_Auxargs *)imgp->auxargs;
168 	register_t *pos;
169 
170 	pos = *stack_base + (imgp->argc + imgp->envc + 2);
171 
172 	if (args->trace) {
173 		AUXARGS_ENTRY(pos, AT_DEBUG, 1);
174 	}
175 	if (args->execfd != -1) {
176 		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
177 	}
178 	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
179 	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
180 	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
181 	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
182 	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
183 	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
184 	AUXARGS_ENTRY(pos, AT_BASE, args->base);
185 	AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_cred->p_ruid);
186 	AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_cred->p_svuid);
187 	AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_cred->p_rgid);
188 	AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_cred->p_svgid);
189 	AUXARGS_ENTRY(pos, AT_NULL, 0);
190 
191 	free(imgp->auxargs, M_TEMP);
192 	imgp->auxargs = NULL;
193 
194 	(*stack_base)--;
195 	**stack_base = (long)imgp->argc;
196 	return 0;
197 }
198 
199 extern int _ucodesel, _udatasel;
200 extern unsigned long _linux_sznonrtsigcode;
201 
202 static void
203 linux_rt_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
204 {
205 	register struct proc *p = curproc;
206 	register struct trapframe *regs;
207 	struct linux_rt_sigframe *fp, frame;
208 	int oonstack;
209 
210 	regs = p->p_md.md_regs;
211 	oonstack = sigonstack(regs->tf_esp);
212 
213 #ifdef DEBUG
214 	printf("Linux-emul(%ld): linux_rt_sendsig(%p, %d, %p, %lu)\n",
215 	    (long)p->p_pid, catcher, sig, (void*)mask, code);
216 #endif
217 	/*
218 	 * Allocate space for the signal handler context.
219 	 */
220 	if ((p->p_flag & P_ALTSTACK) && !oonstack &&
221 	    SIGISMEMBER(p->p_sigacts->ps_sigonstack, sig)) {
222 		fp = (struct linux_rt_sigframe *)(p->p_sigstk.ss_sp +
223 		    p->p_sigstk.ss_size - sizeof(struct linux_rt_sigframe));
224 	} else
225 		fp = (struct linux_rt_sigframe *)regs->tf_esp - 1;
226 
227 	/*
228 	 * grow() will return FALSE if the fp will not fit inside the stack
229 	 *	and the stack can not be grown. useracc will return FALSE
230 	 *	if access is denied.
231 	 */
232 	if ((grow_stack (p, (int)fp) == FALSE) ||
233 	    !useracc((caddr_t)fp, sizeof (struct linux_rt_sigframe),
234 	    VM_PROT_WRITE)) {
235 		/*
236 		 * Process has trashed its stack; give it an illegal
237 		 * instruction to halt it in its tracks.
238 		 */
239 		SIGACTION(p, SIGILL) = SIG_DFL;
240 		SIGDELSET(p->p_sigignore, SIGILL);
241 		SIGDELSET(p->p_sigcatch, SIGILL);
242 		SIGDELSET(p->p_sigmask, SIGILL);
243 #ifdef DEBUG
244 		printf("Linux-emul(%ld): linux_rt_sendsig -- bad stack %p, "
245 		    "oonstack=%x\n", (long)p->p_pid, fp, oonstack);
246 #endif
247 		psignal(p, SIGILL);
248 		return;
249 	}
250 
251 	/*
252 	 * Build the argument list for the signal handler.
253 	 */
254 	if (p->p_sysent->sv_sigtbl)
255 		if (sig <= p->p_sysent->sv_sigsize)
256 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
257 
258 	frame.sf_handler = catcher;
259 	frame.sf_sig = sig;
260 	frame.sf_siginfo = &fp->sf_si;
261 	frame.sf_ucontext = &fp->sf_sc;
262 
263 	/* Fill siginfo structure. */
264 	frame.sf_si.lsi_signo = sig;
265 	frame.sf_si.lsi_code = code;
266 	frame.sf_si.lsi_addr = (void *)regs->tf_err;
267 
268 	/*
269 	 * Build the signal context to be used by sigreturn.
270 	 */
271 	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
272 	frame.sf_sc.uc_link = NULL;		/* XXX ??? */
273 
274 	frame.sf_sc.uc_stack.ss_sp = p->p_sigstk.ss_sp;
275 	frame.sf_sc.uc_stack.ss_size = p->p_sigstk.ss_size;
276 	frame.sf_sc.uc_stack.ss_flags = (p->p_flag & P_ALTSTACK)
277 	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
278 
279 	bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
280 
281 	frame.sf_sc.uc_mcontext.sc_mask   = frame.sf_sc.uc_sigmask.__bits[0];
282 	frame.sf_sc.uc_mcontext.sc_gs     = rgs();
283 	frame.sf_sc.uc_mcontext.sc_fs     = regs->tf_fs;
284 	frame.sf_sc.uc_mcontext.sc_es     = regs->tf_es;
285 	frame.sf_sc.uc_mcontext.sc_ds     = regs->tf_ds;
286 	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_edi;
287 	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_esi;
288 	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_ebp;
289 	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_ebx;
290 	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_edx;
291 	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_ecx;
292 	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_eax;
293 	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_eip;
294 	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
295 	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
296 	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
297 	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
298 	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
299 	frame.sf_sc.uc_mcontext.sc_trapno = code;	/* XXX ???? */
300 
301 #ifdef DEBUG
302 	printf("Linux-emul(%ld): rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, "
303 	    "mask: 0x%x\n", (long)p->p_pid, frame.sf_sc.uc_stack.ss_flags,
304 	    p->p_sigstk.ss_sp, p->p_sigstk.ss_size,
305 	    frame.sf_sc.uc_mcontext.sc_mask);
306 #endif
307 
308 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
309 		/*
310 		 * Process has trashed its stack; give it an illegal
311 		 * instruction to halt it in its tracks.
312 		 */
313 		sigexit(p, SIGILL);
314 		/* NOTREACHED */
315 	}
316 
317 	/*
318 	 * Build context to run handler in.
319 	 */
320 	regs->tf_esp = (int)fp;
321 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
322 	    _linux_sznonrtsigcode;
323 	regs->tf_eflags &= ~PSL_VM;
324 	regs->tf_cs = _ucodesel;
325 	regs->tf_ds = _udatasel;
326 	regs->tf_es = _udatasel;
327 	regs->tf_fs = _udatasel;
328 	load_gs(_udatasel);
329 	regs->tf_ss = _udatasel;
330 }
331 
332 
333 /*
334  * Send an interrupt to process.
335  *
336  * Stack is set up to allow sigcode stored
337  * in u. to call routine, followed by kcall
338  * to sigreturn routine below.  After sigreturn
339  * resets the signal mask, the stack, and the
340  * frame pointer, it returns to the user
341  * specified pc, psl.
342  */
343 
344 static void
345 linux_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
346 {
347 	register struct proc *p = curproc;
348 	register struct trapframe *regs;
349 	struct linux_sigframe *fp, frame;
350 	linux_sigset_t lmask;
351 	int oonstack, i;
352 
353 	if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) {
354 		/* Signal handler installed with SA_SIGINFO. */
355 		linux_rt_sendsig(catcher, sig, mask, code);
356 		return;
357 	}
358 
359 	regs = p->p_md.md_regs;
360 	oonstack = sigonstack(regs->tf_esp);
361 
362 #ifdef DEBUG
363 	printf("Linux-emul(%ld): linux_sendsig(%p, %d, %p, %lu)\n",
364 	    (long)p->p_pid, catcher, sig, (void*)mask, code);
365 #endif
366 
367 	/*
368 	 * Allocate space for the signal handler context.
369 	 */
370 	if ((p->p_flag & P_ALTSTACK) && !oonstack &&
371 	    SIGISMEMBER(p->p_sigacts->ps_sigonstack, sig)) {
372 		fp = (struct linux_sigframe *)(p->p_sigstk.ss_sp +
373 		    p->p_sigstk.ss_size - sizeof(struct linux_sigframe));
374 	} else
375 		fp = (struct linux_sigframe *)regs->tf_esp - 1;
376 
377 	/*
378 	 * grow() will return FALSE if the fp will not fit inside the stack
379 	 *	and the stack can not be grown. useracc will return FALSE
380 	 *	if access is denied.
381 	 */
382 	if ((grow_stack (p, (int)fp) == FALSE) ||
383 	    !useracc((caddr_t)fp, sizeof (struct linux_sigframe),
384 	    VM_PROT_WRITE)) {
385 		/*
386 		 * Process has trashed its stack; give it an illegal
387 		 * instruction to halt it in its tracks.
388 		 */
389 		SIGACTION(p, SIGILL) = SIG_DFL;
390 		SIGDELSET(p->p_sigignore, SIGILL);
391 		SIGDELSET(p->p_sigcatch, SIGILL);
392 		SIGDELSET(p->p_sigmask, SIGILL);
393 		psignal(p, SIGILL);
394 		return;
395 	}
396 
397 	/*
398 	 * Build the argument list for the signal handler.
399 	 */
400 	if (p->p_sysent->sv_sigtbl)
401 		if (sig <= p->p_sysent->sv_sigsize)
402 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
403 
404 	frame.sf_handler = catcher;
405 	frame.sf_sig = sig;
406 
407 	bsd_to_linux_sigset(mask, &lmask);
408 
409 	/*
410 	 * Build the signal context to be used by sigreturn.
411 	 */
412 	frame.sf_sc.sc_mask   = lmask.__bits[0];
413 	frame.sf_sc.sc_gs     = rgs();
414 	frame.sf_sc.sc_fs     = regs->tf_fs;
415 	frame.sf_sc.sc_es     = regs->tf_es;
416 	frame.sf_sc.sc_ds     = regs->tf_ds;
417 	frame.sf_sc.sc_edi    = regs->tf_edi;
418 	frame.sf_sc.sc_esi    = regs->tf_esi;
419 	frame.sf_sc.sc_ebp    = regs->tf_ebp;
420 	frame.sf_sc.sc_ebx    = regs->tf_ebx;
421 	frame.sf_sc.sc_edx    = regs->tf_edx;
422 	frame.sf_sc.sc_ecx    = regs->tf_ecx;
423 	frame.sf_sc.sc_eax    = regs->tf_eax;
424 	frame.sf_sc.sc_eip    = regs->tf_eip;
425 	frame.sf_sc.sc_cs     = regs->tf_cs;
426 	frame.sf_sc.sc_eflags = regs->tf_eflags;
427 	frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
428 	frame.sf_sc.sc_ss     = regs->tf_ss;
429 	frame.sf_sc.sc_err    = regs->tf_err;
430 	frame.sf_sc.sc_trapno = code;	/* XXX ???? */
431 
432 	bzero(&frame.sf_fpstate, sizeof(struct linux_fpstate));
433 
434 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
435 		frame.sf_extramask[i] = lmask.__bits[i+1];
436 
437 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
438 		/*
439 		 * Process has trashed its stack; give it an illegal
440 		 * instruction to halt it in its tracks.
441 		 */
442 		sigexit(p, SIGILL);
443 		/* NOTREACHED */
444 	}
445 
446 	/*
447 	 * Build context to run handler in.
448 	 */
449 	regs->tf_esp = (int)fp;
450 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
451 	regs->tf_eflags &= ~PSL_VM;
452 	regs->tf_cs = _ucodesel;
453 	regs->tf_ds = _udatasel;
454 	regs->tf_es = _udatasel;
455 	regs->tf_fs = _udatasel;
456 	load_gs(_udatasel);
457 	regs->tf_ss = _udatasel;
458 }
459 
460 /*
461  * System call to cleanup state after a signal
462  * has been taken.  Reset signal mask and
463  * stack state from context left by sendsig (above).
464  * Return to previous pc and psl as specified by
465  * context left by sendsig. Check carefully to
466  * make sure that the user has not modified the
467  * psl to gain improper privileges or to cause
468  * a machine fault.
469  */
470 int
471 linux_sigreturn(p, args)
472 	struct proc *p;
473 	struct linux_sigreturn_args *args;
474 {
475 	struct linux_sigframe frame;
476 	register struct trapframe *regs;
477 	linux_sigset_t lmask;
478 	int eflags, i;
479 
480 	regs = p->p_md.md_regs;
481 
482 #ifdef DEBUG
483 	printf("Linux-emul(%ld): linux_sigreturn(%p)\n",
484 	    (long)p->p_pid, (void *)args->sfp);
485 #endif
486 	/*
487 	 * The trampoline code hands us the sigframe.
488 	 * It is unsafe to keep track of it ourselves, in the event that a
489 	 * program jumps out of a signal handler.
490 	 */
491 	if (copyin((caddr_t)args->sfp, &frame, sizeof(frame)) != 0)
492 		return (EFAULT);
493 
494 	/*
495 	 * Check for security violations.
496 	 */
497 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
498 	eflags = frame.sf_sc.sc_eflags;
499 	/*
500 	 * XXX do allow users to change the privileged flag PSL_RF.  The
501 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
502 	 * sometimes set it there too.  tf_eflags is kept in the signal
503 	 * context during signal handling and there is no other place
504 	 * to remember it, so the PSL_RF bit may be corrupted by the
505 	 * signal handler without us knowing.  Corruption of the PSL_RF
506 	 * bit at worst causes one more or one less debugger trap, so
507 	 * allowing it is fairly harmless.
508 	 */
509 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
510     		return(EINVAL);
511 	}
512 
513 	/*
514 	 * Don't allow users to load a valid privileged %cs.  Let the
515 	 * hardware check for invalid selectors, excess privilege in
516 	 * other selectors, invalid %eip's and invalid %esp's.
517 	 */
518 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
519 	if (!CS_SECURE(frame.sf_sc.sc_cs)) {
520 		trapsignal(p, SIGBUS, T_PROTFLT);
521 		return(EINVAL);
522 	}
523 
524 	lmask.__bits[0] = frame.sf_sc.sc_mask;
525 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
526 		lmask.__bits[i+1] = frame.sf_extramask[i];
527 	linux_to_bsd_sigset(&lmask, &p->p_sigmask);
528 	SIG_CANTMASK(p->p_sigmask);
529 
530 	/*
531 	 * Restore signal context.
532 	 */
533 	/* %gs was restored by the trampoline. */
534 	regs->tf_fs     = frame.sf_sc.sc_fs;
535 	regs->tf_es     = frame.sf_sc.sc_es;
536 	regs->tf_ds     = frame.sf_sc.sc_ds;
537 	regs->tf_edi    = frame.sf_sc.sc_edi;
538 	regs->tf_esi    = frame.sf_sc.sc_esi;
539 	regs->tf_ebp    = frame.sf_sc.sc_ebp;
540 	regs->tf_ebx    = frame.sf_sc.sc_ebx;
541 	regs->tf_edx    = frame.sf_sc.sc_edx;
542 	regs->tf_ecx    = frame.sf_sc.sc_ecx;
543 	regs->tf_eax    = frame.sf_sc.sc_eax;
544 	regs->tf_eip    = frame.sf_sc.sc_eip;
545 	regs->tf_cs     = frame.sf_sc.sc_cs;
546 	regs->tf_eflags = eflags;
547 	regs->tf_esp    = frame.sf_sc.sc_esp_at_signal;
548 	regs->tf_ss     = frame.sf_sc.sc_ss;
549 
550 	return (EJUSTRETURN);
551 }
552 
553 /*
554  * System call to cleanup state after a signal
555  * has been taken.  Reset signal mask and
556  * stack state from context left by rt_sendsig (above).
557  * Return to previous pc and psl as specified by
558  * context left by sendsig. Check carefully to
559  * make sure that the user has not modified the
560  * psl to gain improper privileges or to cause
561  * a machine fault.
562  */
563 int
564 linux_rt_sigreturn(p, args)
565 	struct proc *p;
566 	struct linux_rt_sigreturn_args *args;
567 {
568 	struct sigaltstack_args sasargs;
569 	struct linux_ucontext 	 uc;
570 	struct linux_sigcontext *context;
571 	linux_stack_t *lss;
572 	stack_t *ss;
573 	register struct trapframe *regs;
574 	int eflags;
575 	caddr_t sg = stackgap_init();
576 
577 	regs = p->p_md.md_regs;
578 
579 #ifdef DEBUG
580 	printf("Linux-emul(%ld): linux_rt_sigreturn(%p)\n",
581 	    (long)p->p_pid, (void *)args->ucp);
582 #endif
583 	/*
584 	 * The trampoline code hands us the ucontext.
585 	 * It is unsafe to keep track of it ourselves, in the event that a
586 	 * program jumps out of a signal handler.
587 	 */
588 	if (copyin((caddr_t)args->ucp, &uc, sizeof(uc)) != 0)
589 		return (EFAULT);
590 
591 	context = &uc.uc_mcontext;
592 
593 	/*
594 	 * Check for security violations.
595 	 */
596 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
597 	eflags = context->sc_eflags;
598 	/*
599 	 * XXX do allow users to change the privileged flag PSL_RF.  The
600 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
601 	 * sometimes set it there too.  tf_eflags is kept in the signal
602 	 * context during signal handling and there is no other place
603 	 * to remember it, so the PSL_RF bit may be corrupted by the
604 	 * signal handler without us knowing.  Corruption of the PSL_RF
605 	 * bit at worst causes one more or one less debugger trap, so
606 	 * allowing it is fairly harmless.
607 	 */
608 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
609     		return(EINVAL);
610 	}
611 
612 	/*
613 	 * Don't allow users to load a valid privileged %cs.  Let the
614 	 * hardware check for invalid selectors, excess privilege in
615 	 * other selectors, invalid %eip's and invalid %esp's.
616 	 */
617 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
618 	if (!CS_SECURE(context->sc_cs)) {
619 		trapsignal(p, SIGBUS, T_PROTFLT);
620 		return(EINVAL);
621 	}
622 
623 	linux_to_bsd_sigset(&uc.uc_sigmask, &p->p_sigmask);
624 	SIG_CANTMASK(p->p_sigmask);
625 
626 	/*
627 	 * Restore signal context
628 	 */
629 	/* %gs was restored by the trampoline. */
630 	regs->tf_fs     = context->sc_fs;
631 	regs->tf_es     = context->sc_es;
632 	regs->tf_ds     = context->sc_ds;
633 	regs->tf_edi    = context->sc_edi;
634 	regs->tf_esi    = context->sc_esi;
635 	regs->tf_ebp    = context->sc_ebp;
636 	regs->tf_ebx    = context->sc_ebx;
637 	regs->tf_edx    = context->sc_edx;
638 	regs->tf_ecx    = context->sc_ecx;
639 	regs->tf_eax    = context->sc_eax;
640 	regs->tf_eip    = context->sc_eip;
641 	regs->tf_cs     = context->sc_cs;
642 	regs->tf_eflags = eflags;
643 	regs->tf_esp    = context->sc_esp_at_signal;
644 	regs->tf_ss     = context->sc_ss;
645 
646 	/*
647 	 * call sigaltstack & ignore results..
648 	 */
649 	ss = stackgap_alloc(&sg, sizeof(stack_t));
650 	lss = &uc.uc_stack;
651 	ss->ss_sp = lss->ss_sp;
652 	ss->ss_size = lss->ss_size;
653 	ss->ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
654 
655 #ifdef DEBUG
656 	printf("Linux-emul(%ld): rt_sigret  flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x\n",
657 	    (long)p->p_pid, ss->ss_flags, ss->ss_sp, ss->ss_size, context->sc_mask);
658 #endif
659 	sasargs.ss = ss;
660 	sasargs.oss = NULL;
661 	(void) sigaltstack(p, &sasargs);
662 
663 	return (EJUSTRETURN);
664 }
665 
666 static void
667 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
668 {
669 	args[0] = tf->tf_ebx;
670 	args[1] = tf->tf_ecx;
671 	args[2] = tf->tf_edx;
672 	args[3] = tf->tf_esi;
673 	args[4] = tf->tf_edi;
674 	*params = NULL;		/* no copyin */
675 }
676 
677 /*
678  * If a linux binary is exec'ing something, try this image activator
679  * first.  We override standard shell script execution in order to
680  * be able to modify the interpreter path.  We only do this if a linux
681  * binary is doing the exec, so we do not create an EXEC module for it.
682  */
683 static int	exec_linux_imgact_try __P((struct image_params *iparams));
684 
685 static int
686 exec_linux_imgact_try(imgp)
687     struct image_params *imgp;
688 {
689     const char *head = (const char *)imgp->image_header;
690     int error = -1;
691 
692     /*
693      * The interpreter for shell scripts run from a linux binary needs
694      * to be located in /compat/linux if possible in order to recursively
695      * maintain linux path emulation.
696      */
697     if (((const short *)head)[0] == SHELLMAGIC) {
698 	    /*
699 	     * Run our normal shell image activator.  If it succeeds attempt
700 	     * to use the alternate path for the interpreter.  If an alternate
701 	     * path is found, use our stringspace to store it.
702 	     */
703 	    if ((error = exec_shell_imgact(imgp)) == 0) {
704 		    char *rpath = NULL;
705 
706 		    linux_emul_find(imgp->proc, NULL, linux_emul_path,
707 			imgp->interpreter_name, &rpath, 0);
708 		    if (rpath != imgp->interpreter_name) {
709 			    int len = strlen(rpath) + 1;
710 
711 			    if (len <= MAXSHELLCMDLEN) {
712 				memcpy(imgp->interpreter_name, rpath, len);
713 			    }
714 			    free(rpath, M_TEMP);
715 		    }
716 	    }
717     }
718     return(error);
719 }
720 
721 struct sysentvec linux_sysvec = {
722 	LINUX_SYS_MAXSYSCALL,
723 	linux_sysent,
724 	0xff,
725 	LINUX_SIGTBLSZ,
726 	bsd_to_linux_signal,
727 	ELAST + 1,
728 	bsd_to_linux_errno,
729 	translate_traps,
730 	linux_fixup,
731 	linux_sendsig,
732 	linux_sigcode,
733 	&linux_szsigcode,
734 	linux_prepsyscall,
735 	"Linux a.out",
736 	aout_coredump,
737 	exec_linux_imgact_try,
738 	LINUX_MINSIGSTKSZ
739 };
740 
741 struct sysentvec elf_linux_sysvec = {
742 	LINUX_SYS_MAXSYSCALL,
743 	linux_sysent,
744 	0xff,
745 	LINUX_SIGTBLSZ,
746 	bsd_to_linux_signal,
747 	ELAST + 1,
748 	bsd_to_linux_errno,
749 	translate_traps,
750 	elf_linux_fixup,
751 	linux_sendsig,
752 	linux_sigcode,
753 	&linux_szsigcode,
754 	linux_prepsyscall,
755 	"Linux ELF",
756 	elf_coredump,
757 	exec_linux_imgact_try,
758 	LINUX_MINSIGSTKSZ
759 };
760 
761 static Elf32_Brandinfo linux_brand = {
762 					ELFOSABI_LINUX,
763 					"/compat/linux",
764 					"/lib/ld-linux.so.1",
765 					&elf_linux_sysvec
766 				 };
767 
768 static Elf32_Brandinfo linux_glibc2brand = {
769 					ELFOSABI_LINUX,
770 					"/compat/linux",
771 					"/lib/ld-linux.so.2",
772 					&elf_linux_sysvec
773 				 };
774 
775 Elf32_Brandinfo *linux_brandlist[] = {
776 					&linux_brand,
777 					&linux_glibc2brand,
778 					NULL
779 				};
780 
781 static int
782 linux_elf_modevent(module_t mod, int type, void *data)
783 {
784 	Elf32_Brandinfo **brandinfo;
785 	int error;
786 
787 	error = 0;
788 
789 	switch(type) {
790 	case MOD_LOAD:
791 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
792 		     ++brandinfo)
793 			if (elf_insert_brand_entry(*brandinfo) < 0)
794 				error = EINVAL;
795 		if (error == 0) {
796 			linux_ioctl_register_handlers(
797 				&linux_ioctl_handler_set);
798 			if (bootverbose)
799 				printf("Linux ELF exec handler installed\n");
800 		} else
801 			printf("cannot insert Linux ELF brand handler\n");
802 		break;
803 	case MOD_UNLOAD:
804 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
805 		     ++brandinfo)
806 			if (elf_brand_inuse(*brandinfo))
807 				error = EBUSY;
808 		if (error == 0) {
809 			for (brandinfo = &linux_brandlist[0];
810 			     *brandinfo != NULL; ++brandinfo)
811 				if (elf_remove_brand_entry(*brandinfo) < 0)
812 					error = EINVAL;
813 		}
814 		if (error == 0) {
815 			linux_ioctl_unregister_handlers(
816 				&linux_ioctl_handler_set);
817 			if (bootverbose)
818 				printf("Linux ELF exec handler removed\n");
819 		} else
820 			printf("Could not deinstall ELF interpreter entry\n");
821 		break;
822 	default:
823 		break;
824 	}
825 	return error;
826 }
827 
828 static moduledata_t linux_elf_mod = {
829 	"linuxelf",
830 	linux_elf_modevent,
831 	0
832 };
833 
834 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
835