xref: /freebsd/sys/i386/linux/linux_sysvec.c (revision 77b7cdf1999ee965ad494fddd184b18f532ac91a)
1 /*-
2  * Copyright (c) 1994-1996 S�ren Schmidt
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software without specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30 
31 /* XXX we use functions that might not exist. */
32 #include "opt_compat.h"
33 
34 #ifndef COMPAT_43
35 #error "Unable to compile Linux-emulator due to missing COMPAT_43 option!"
36 #endif
37 
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/imgact.h>
41 #include <sys/imgact_aout.h>
42 #include <sys/imgact_elf.h>
43 #include <sys/lock.h>
44 #include <sys/malloc.h>
45 #include <sys/mutex.h>
46 #include <sys/proc.h>
47 #include <sys/signalvar.h>
48 #include <sys/syscallsubr.h>
49 #include <sys/sysent.h>
50 #include <sys/sysproto.h>
51 #include <sys/user.h>
52 #include <sys/vnode.h>
53 
54 #include <vm/vm.h>
55 #include <vm/vm_param.h>
56 #include <vm/vm_page.h>
57 #include <vm/vm_extern.h>
58 #include <sys/exec.h>
59 #include <sys/kernel.h>
60 #include <sys/module.h>
61 #include <machine/cpu.h>
62 #include <machine/md_var.h>
63 #include <sys/mutex.h>
64 
65 #include <vm/vm.h>
66 #include <vm/vm_param.h>
67 #include <vm/pmap.h>
68 #include <vm/vm_map.h>
69 #include <vm/vm_object.h>
70 
71 #include <i386/linux/linux.h>
72 #include <i386/linux/linux_proto.h>
73 #include <compat/linux/linux_mib.h>
74 #include <compat/linux/linux_signal.h>
75 #include <compat/linux/linux_util.h>
76 
77 MODULE_VERSION(linux, 1);
78 MODULE_DEPEND(linux, sysvmsg, 1, 1, 1);
79 MODULE_DEPEND(linux, sysvsem, 1, 1, 1);
80 MODULE_DEPEND(linux, sysvshm, 1, 1, 1);
81 
82 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
83 
84 #if BYTE_ORDER == LITTLE_ENDIAN
85 #define SHELLMAGIC      0x2123 /* #! */
86 #else
87 #define SHELLMAGIC      0x2321
88 #endif
89 
90 /*
91  * Allow the sendsig functions to use the ldebug() facility
92  * even though they are not syscalls themselves. Map them
93  * to syscall 0. This is slightly less bogus than using
94  * ldebug(sigreturn).
95  */
96 #define	LINUX_SYS_linux_rt_sendsig	0
97 #define	LINUX_SYS_linux_sendsig		0
98 
99 extern char linux_sigcode[];
100 extern int linux_szsigcode;
101 
102 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
103 
104 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
105 
106 static int	linux_fixup(register_t **stack_base,
107 		    struct image_params *iparams);
108 static int	elf_linux_fixup(register_t **stack_base,
109 		    struct image_params *iparams);
110 static void	linux_prepsyscall(struct trapframe *tf, int *args, u_int *code,
111 		    caddr_t *params);
112 static void     linux_sendsig(sig_t catcher, int sig, sigset_t *mask,
113 		    u_long code);
114 
115 /*
116  * Linux syscalls return negative errno's, we do positive and map them
117  */
118 static int bsd_to_linux_errno[ELAST + 1] = {
119 	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
120 	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
121 	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
122 	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
123 	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
124 	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
125 	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
126 	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
127 	-6, -6, -43, -42, -75, -6, -84
128 };
129 
130 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
131 	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
132 	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
133 	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
134 	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
135 	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
136 	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
137 	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
138 	0, LINUX_SIGUSR1, LINUX_SIGUSR2
139 };
140 
141 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
142 	SIGHUP, SIGINT, SIGQUIT, SIGILL,
143 	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
144 	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
145 	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
146 	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
147 	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
148 	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
149 	SIGIO, SIGURG, SIGSYS
150 };
151 
152 #define LINUX_T_UNKNOWN  255
153 static int _bsd_to_linux_trapcode[] = {
154 	LINUX_T_UNKNOWN,	/* 0 */
155 	6,			/* 1  T_PRIVINFLT */
156 	LINUX_T_UNKNOWN,	/* 2 */
157 	3,			/* 3  T_BPTFLT */
158 	LINUX_T_UNKNOWN,	/* 4 */
159 	LINUX_T_UNKNOWN,	/* 5 */
160 	16,			/* 6  T_ARITHTRAP */
161 	254,			/* 7  T_ASTFLT */
162 	LINUX_T_UNKNOWN,	/* 8 */
163 	13,			/* 9  T_PROTFLT */
164 	1,			/* 10 T_TRCTRAP */
165 	LINUX_T_UNKNOWN,	/* 11 */
166 	14,			/* 12 T_PAGEFLT */
167 	LINUX_T_UNKNOWN,	/* 13 */
168 	17,			/* 14 T_ALIGNFLT */
169 	LINUX_T_UNKNOWN,	/* 15 */
170 	LINUX_T_UNKNOWN,	/* 16 */
171 	LINUX_T_UNKNOWN,	/* 17 */
172 	0,			/* 18 T_DIVIDE */
173 	2,			/* 19 T_NMI */
174 	4,			/* 20 T_OFLOW */
175 	5,			/* 21 T_BOUND */
176 	7,			/* 22 T_DNA */
177 	8,			/* 23 T_DOUBLEFLT */
178 	9,			/* 24 T_FPOPFLT */
179 	10,			/* 25 T_TSSFLT */
180 	11,			/* 26 T_SEGNPFLT */
181 	12,			/* 27 T_STKFLT */
182 	18,			/* 28 T_MCHK */
183 	19,			/* 29 T_XMMFLT */
184 	15			/* 30 T_RESERVED */
185 };
186 #define bsd_to_linux_trapcode(code) \
187     ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
188      _bsd_to_linux_trapcode[(code)]: \
189      LINUX_T_UNKNOWN)
190 
191 /*
192  * If FreeBSD & Linux have a difference of opinion about what a trap
193  * means, deal with it here.
194  *
195  * MPSAFE
196  */
197 static int
198 translate_traps(int signal, int trap_code)
199 {
200 	if (signal != SIGBUS)
201 		return signal;
202 	switch (trap_code) {
203 	case T_PROTFLT:
204 	case T_TSSFLT:
205 	case T_DOUBLEFLT:
206 	case T_PAGEFLT:
207 		return SIGSEGV;
208 	default:
209 		return signal;
210 	}
211 }
212 
213 static int
214 linux_fixup(register_t **stack_base, struct image_params *imgp)
215 {
216 	register_t *argv, *envp;
217 
218 	argv = *stack_base;
219 	envp = *stack_base + (imgp->argc + 1);
220 	(*stack_base)--;
221 	**stack_base = (intptr_t)(void *)envp;
222 	(*stack_base)--;
223 	**stack_base = (intptr_t)(void *)argv;
224 	(*stack_base)--;
225 	**stack_base = imgp->argc;
226 	return 0;
227 }
228 
229 static int
230 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
231 {
232 	Elf32_Auxargs *args;
233 	register_t *pos;
234 
235 	KASSERT(curthread->td_proc == imgp->proc &&
236 	    (curthread->td_proc->p_flag & P_THREADED) == 0,
237 	    ("unsafe elf_linux_fixup(), should be curproc"));
238 	args = (Elf32_Auxargs *)imgp->auxargs;
239 	pos = *stack_base + (imgp->argc + imgp->envc + 2);
240 
241 	if (args->trace)
242 		AUXARGS_ENTRY(pos, AT_DEBUG, 1);
243 	if (args->execfd != -1)
244 		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
245 	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
246 	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
247 	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
248 	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
249 	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
250 	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
251 	AUXARGS_ENTRY(pos, AT_BASE, args->base);
252 	AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
253 	AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
254 	AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
255 	AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
256 	AUXARGS_ENTRY(pos, AT_NULL, 0);
257 
258 	free(imgp->auxargs, M_TEMP);
259 	imgp->auxargs = NULL;
260 
261 	(*stack_base)--;
262 	**stack_base = (register_t)imgp->argc;
263 	return 0;
264 }
265 
266 extern int _ucodesel, _udatasel;
267 extern unsigned long linux_sznonrtsigcode;
268 
269 static void
270 linux_rt_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
271 {
272 	struct thread *td = curthread;
273 	struct proc *p = td->td_proc;
274 	struct trapframe *regs;
275 	struct l_rt_sigframe *fp, frame;
276 	int oonstack;
277 
278 	PROC_LOCK_ASSERT(p, MA_OWNED);
279 	regs = td->td_frame;
280 	oonstack = sigonstack(regs->tf_esp);
281 
282 #ifdef DEBUG
283 	if (ldebug(rt_sendsig))
284 		printf(ARGS(rt_sendsig, "%p, %d, %p, %lu"),
285 		    catcher, sig, (void*)mask, code);
286 #endif
287 	/*
288 	 * Allocate space for the signal handler context.
289 	 */
290 	if ((p->p_flag & P_ALTSTACK) && !oonstack &&
291 	    SIGISMEMBER(p->p_sigacts->ps_sigonstack, sig)) {
292 		fp = (struct l_rt_sigframe *)(p->p_sigstk.ss_sp +
293 		    p->p_sigstk.ss_size - sizeof(struct l_rt_sigframe));
294 	} else
295 		fp = (struct l_rt_sigframe *)regs->tf_esp - 1;
296 
297 	/*
298 	 * Build the argument list for the signal handler.
299 	 */
300 	if (p->p_sysent->sv_sigtbl)
301 		if (sig <= p->p_sysent->sv_sigsize)
302 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
303 
304 	bzero(&frame, sizeof(frame));
305 
306 	frame.sf_handler = catcher;
307 	frame.sf_sig = sig;
308 	frame.sf_siginfo = &fp->sf_si;
309 	frame.sf_ucontext = &fp->sf_sc;
310 
311 	/* Fill in POSIX parts */
312 	frame.sf_si.lsi_signo = sig;
313 	frame.sf_si.lsi_code = code;
314 	frame.sf_si.lsi_addr = (void *)regs->tf_err;
315 
316 	/*
317 	 * Build the signal context to be used by sigreturn.
318 	 */
319 	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
320 	frame.sf_sc.uc_link = NULL;		/* XXX ??? */
321 
322 	frame.sf_sc.uc_stack.ss_sp = p->p_sigstk.ss_sp;
323 	frame.sf_sc.uc_stack.ss_size = p->p_sigstk.ss_size;
324 	frame.sf_sc.uc_stack.ss_flags = (p->p_flag & P_ALTSTACK)
325 	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
326 	PROC_UNLOCK(p);
327 
328 	bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
329 
330 	frame.sf_sc.uc_mcontext.sc_mask   = frame.sf_sc.uc_sigmask.__bits[0];
331 	frame.sf_sc.uc_mcontext.sc_gs     = rgs();
332 	frame.sf_sc.uc_mcontext.sc_fs     = regs->tf_fs;
333 	frame.sf_sc.uc_mcontext.sc_es     = regs->tf_es;
334 	frame.sf_sc.uc_mcontext.sc_ds     = regs->tf_ds;
335 	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_edi;
336 	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_esi;
337 	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_ebp;
338 	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_ebx;
339 	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_edx;
340 	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_ecx;
341 	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_eax;
342 	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_eip;
343 	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
344 	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
345 	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
346 	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
347 	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
348 	frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
349 
350 #ifdef DEBUG
351 	if (ldebug(rt_sendsig))
352 		printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
353 		    frame.sf_sc.uc_stack.ss_flags, p->p_sigstk.ss_sp,
354 		    p->p_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
355 #endif
356 
357 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
358 		/*
359 		 * Process has trashed its stack; give it an illegal
360 		 * instruction to halt it in its tracks.
361 		 */
362 #ifdef DEBUG
363 		if (ldebug(rt_sendsig))
364 			printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
365 			    fp, oonstack);
366 #endif
367 		PROC_LOCK(p);
368 		sigexit(td, SIGILL);
369 	}
370 
371 	/*
372 	 * Build context to run handler in.
373 	 */
374 	regs->tf_esp = (int)fp;
375 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
376 	    linux_sznonrtsigcode;
377 	regs->tf_eflags &= ~(PSL_T | PSL_VM);
378 	regs->tf_cs = _ucodesel;
379 	regs->tf_ds = _udatasel;
380 	regs->tf_es = _udatasel;
381 	regs->tf_fs = _udatasel;
382 	regs->tf_ss = _udatasel;
383 	PROC_LOCK(p);
384 }
385 
386 
387 /*
388  * Send an interrupt to process.
389  *
390  * Stack is set up to allow sigcode stored
391  * in u. to call routine, followed by kcall
392  * to sigreturn routine below.  After sigreturn
393  * resets the signal mask, the stack, and the
394  * frame pointer, it returns to the user
395  * specified pc, psl.
396  */
397 static void
398 linux_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
399 {
400 	struct thread *td = curthread;
401 	struct proc *p = td->td_proc;
402 	struct trapframe *regs;
403 	struct l_sigframe *fp, frame;
404 	l_sigset_t lmask;
405 	int oonstack, i;
406 
407 	PROC_LOCK_ASSERT(p, MA_OWNED);
408 	if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) {
409 		/* Signal handler installed with SA_SIGINFO. */
410 		linux_rt_sendsig(catcher, sig, mask, code);
411 		return;
412 	}
413 
414 	regs = td->td_frame;
415 	oonstack = sigonstack(regs->tf_esp);
416 
417 #ifdef DEBUG
418 	if (ldebug(sendsig))
419 		printf(ARGS(sendsig, "%p, %d, %p, %lu"),
420 		    catcher, sig, (void*)mask, code);
421 #endif
422 
423 	/*
424 	 * Allocate space for the signal handler context.
425 	 */
426 	if ((p->p_flag & P_ALTSTACK) && !oonstack &&
427 	    SIGISMEMBER(p->p_sigacts->ps_sigonstack, sig)) {
428 		fp = (struct l_sigframe *)(p->p_sigstk.ss_sp +
429 		    p->p_sigstk.ss_size - sizeof(struct l_sigframe));
430 	} else
431 		fp = (struct l_sigframe *)regs->tf_esp - 1;
432 	PROC_UNLOCK(p);
433 
434 	/*
435 	 * Build the argument list for the signal handler.
436 	 */
437 	if (p->p_sysent->sv_sigtbl)
438 		if (sig <= p->p_sysent->sv_sigsize)
439 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
440 
441 	bzero(&frame, sizeof(frame));
442 
443 	frame.sf_handler = catcher;
444 	frame.sf_sig = sig;
445 
446 	bsd_to_linux_sigset(mask, &lmask);
447 
448 	/*
449 	 * Build the signal context to be used by sigreturn.
450 	 */
451 	frame.sf_sc.sc_mask   = lmask.__bits[0];
452 	frame.sf_sc.sc_gs     = rgs();
453 	frame.sf_sc.sc_fs     = regs->tf_fs;
454 	frame.sf_sc.sc_es     = regs->tf_es;
455 	frame.sf_sc.sc_ds     = regs->tf_ds;
456 	frame.sf_sc.sc_edi    = regs->tf_edi;
457 	frame.sf_sc.sc_esi    = regs->tf_esi;
458 	frame.sf_sc.sc_ebp    = regs->tf_ebp;
459 	frame.sf_sc.sc_ebx    = regs->tf_ebx;
460 	frame.sf_sc.sc_edx    = regs->tf_edx;
461 	frame.sf_sc.sc_ecx    = regs->tf_ecx;
462 	frame.sf_sc.sc_eax    = regs->tf_eax;
463 	frame.sf_sc.sc_eip    = regs->tf_eip;
464 	frame.sf_sc.sc_cs     = regs->tf_cs;
465 	frame.sf_sc.sc_eflags = regs->tf_eflags;
466 	frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
467 	frame.sf_sc.sc_ss     = regs->tf_ss;
468 	frame.sf_sc.sc_err    = regs->tf_err;
469 	frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code);
470 
471 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
472 		frame.sf_extramask[i] = lmask.__bits[i+1];
473 
474 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
475 		/*
476 		 * Process has trashed its stack; give it an illegal
477 		 * instruction to halt it in its tracks.
478 		 */
479 		PROC_LOCK(p);
480 		sigexit(td, SIGILL);
481 	}
482 
483 	/*
484 	 * Build context to run handler in.
485 	 */
486 	regs->tf_esp = (int)fp;
487 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
488 	regs->tf_eflags &= ~(PSL_T | PSL_VM);
489 	regs->tf_cs = _ucodesel;
490 	regs->tf_ds = _udatasel;
491 	regs->tf_es = _udatasel;
492 	regs->tf_fs = _udatasel;
493 	regs->tf_ss = _udatasel;
494 	PROC_LOCK(p);
495 }
496 
497 /*
498  * System call to cleanup state after a signal
499  * has been taken.  Reset signal mask and
500  * stack state from context left by sendsig (above).
501  * Return to previous pc and psl as specified by
502  * context left by sendsig. Check carefully to
503  * make sure that the user has not modified the
504  * psl to gain improper privileges or to cause
505  * a machine fault.
506  */
507 int
508 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
509 {
510 	struct proc *p = td->td_proc;
511 	struct l_sigframe frame;
512 	struct trapframe *regs;
513 	l_sigset_t lmask;
514 	int eflags, i;
515 
516 	regs = td->td_frame;
517 
518 #ifdef DEBUG
519 	if (ldebug(sigreturn))
520 		printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
521 #endif
522 	/*
523 	 * The trampoline code hands us the sigframe.
524 	 * It is unsafe to keep track of it ourselves, in the event that a
525 	 * program jumps out of a signal handler.
526 	 */
527 	if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
528 		return (EFAULT);
529 
530 	/*
531 	 * Check for security violations.
532 	 */
533 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
534 	eflags = frame.sf_sc.sc_eflags;
535 	/*
536 	 * XXX do allow users to change the privileged flag PSL_RF.  The
537 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
538 	 * sometimes set it there too.  tf_eflags is kept in the signal
539 	 * context during signal handling and there is no other place
540 	 * to remember it, so the PSL_RF bit may be corrupted by the
541 	 * signal handler without us knowing.  Corruption of the PSL_RF
542 	 * bit at worst causes one more or one less debugger trap, so
543 	 * allowing it is fairly harmless.
544 	 */
545 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF))
546 		return(EINVAL);
547 
548 	/*
549 	 * Don't allow users to load a valid privileged %cs.  Let the
550 	 * hardware check for invalid selectors, excess privilege in
551 	 * other selectors, invalid %eip's and invalid %esp's.
552 	 */
553 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
554 	if (!CS_SECURE(frame.sf_sc.sc_cs)) {
555 		trapsignal(td, SIGBUS, T_PROTFLT);
556 		return(EINVAL);
557 	}
558 
559 	lmask.__bits[0] = frame.sf_sc.sc_mask;
560 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
561 		lmask.__bits[i+1] = frame.sf_extramask[i];
562 	PROC_LOCK(p);
563 	linux_to_bsd_sigset(&lmask, &td->td_sigmask);
564 	SIG_CANTMASK(td->td_sigmask);
565 	signotify(td);
566 	PROC_UNLOCK(p);
567 
568 	/*
569 	 * Restore signal context.
570 	 */
571 	/* %gs was restored by the trampoline. */
572 	regs->tf_fs     = frame.sf_sc.sc_fs;
573 	regs->tf_es     = frame.sf_sc.sc_es;
574 	regs->tf_ds     = frame.sf_sc.sc_ds;
575 	regs->tf_edi    = frame.sf_sc.sc_edi;
576 	regs->tf_esi    = frame.sf_sc.sc_esi;
577 	regs->tf_ebp    = frame.sf_sc.sc_ebp;
578 	regs->tf_ebx    = frame.sf_sc.sc_ebx;
579 	regs->tf_edx    = frame.sf_sc.sc_edx;
580 	regs->tf_ecx    = frame.sf_sc.sc_ecx;
581 	regs->tf_eax    = frame.sf_sc.sc_eax;
582 	regs->tf_eip    = frame.sf_sc.sc_eip;
583 	regs->tf_cs     = frame.sf_sc.sc_cs;
584 	regs->tf_eflags = eflags;
585 	regs->tf_esp    = frame.sf_sc.sc_esp_at_signal;
586 	regs->tf_ss     = frame.sf_sc.sc_ss;
587 
588 	return (EJUSTRETURN);
589 }
590 
591 /*
592  * System call to cleanup state after a signal
593  * has been taken.  Reset signal mask and
594  * stack state from context left by rt_sendsig (above).
595  * Return to previous pc and psl as specified by
596  * context left by sendsig. Check carefully to
597  * make sure that the user has not modified the
598  * psl to gain improper privileges or to cause
599  * a machine fault.
600  */
601 int
602 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
603 {
604 	struct proc *p = td->td_proc;
605 	struct l_ucontext uc;
606 	struct l_sigcontext *context;
607 	l_stack_t *lss;
608 	stack_t ss;
609 	struct trapframe *regs;
610 	int eflags;
611 
612 	regs = td->td_frame;
613 
614 #ifdef DEBUG
615 	if (ldebug(rt_sigreturn))
616 		printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
617 #endif
618 	/*
619 	 * The trampoline code hands us the ucontext.
620 	 * It is unsafe to keep track of it ourselves, in the event that a
621 	 * program jumps out of a signal handler.
622 	 */
623 	if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
624 		return (EFAULT);
625 
626 	context = &uc.uc_mcontext;
627 
628 	/*
629 	 * Check for security violations.
630 	 */
631 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
632 	eflags = context->sc_eflags;
633 	/*
634 	 * XXX do allow users to change the privileged flag PSL_RF.  The
635 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
636 	 * sometimes set it there too.  tf_eflags is kept in the signal
637 	 * context during signal handling and there is no other place
638 	 * to remember it, so the PSL_RF bit may be corrupted by the
639 	 * signal handler without us knowing.  Corruption of the PSL_RF
640 	 * bit at worst causes one more or one less debugger trap, so
641 	 * allowing it is fairly harmless.
642 	 */
643 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF))
644 		return(EINVAL);
645 
646 	/*
647 	 * Don't allow users to load a valid privileged %cs.  Let the
648 	 * hardware check for invalid selectors, excess privilege in
649 	 * other selectors, invalid %eip's and invalid %esp's.
650 	 */
651 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
652 	if (!CS_SECURE(context->sc_cs)) {
653 		trapsignal(td, SIGBUS, T_PROTFLT);
654 		return(EINVAL);
655 	}
656 
657 	PROC_LOCK(p);
658 	linux_to_bsd_sigset(&uc.uc_sigmask, &td->td_sigmask);
659 	SIG_CANTMASK(td->td_sigmask);
660 	signotify(td);
661 	PROC_UNLOCK(p);
662 
663 	/*
664 	 * Restore signal context
665 	 */
666 	/* %gs was restored by the trampoline. */
667 	regs->tf_fs     = context->sc_fs;
668 	regs->tf_es     = context->sc_es;
669 	regs->tf_ds     = context->sc_ds;
670 	regs->tf_edi    = context->sc_edi;
671 	regs->tf_esi    = context->sc_esi;
672 	regs->tf_ebp    = context->sc_ebp;
673 	regs->tf_ebx    = context->sc_ebx;
674 	regs->tf_edx    = context->sc_edx;
675 	regs->tf_ecx    = context->sc_ecx;
676 	regs->tf_eax    = context->sc_eax;
677 	regs->tf_eip    = context->sc_eip;
678 	regs->tf_cs     = context->sc_cs;
679 	regs->tf_eflags = eflags;
680 	regs->tf_esp    = context->sc_esp_at_signal;
681 	regs->tf_ss     = context->sc_ss;
682 
683 	/*
684 	 * call sigaltstack & ignore results..
685 	 */
686 	lss = &uc.uc_stack;
687 	ss.ss_sp = lss->ss_sp;
688 	ss.ss_size = lss->ss_size;
689 	ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
690 
691 #ifdef DEBUG
692 	if (ldebug(rt_sigreturn))
693 		printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
694 		    ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
695 #endif
696 	(void)kern_sigaltstack(td, &ss, NULL);
697 
698 	return (EJUSTRETURN);
699 }
700 
701 /*
702  * MPSAFE
703  */
704 static void
705 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
706 {
707 	args[0] = tf->tf_ebx;
708 	args[1] = tf->tf_ecx;
709 	args[2] = tf->tf_edx;
710 	args[3] = tf->tf_esi;
711 	args[4] = tf->tf_edi;
712 	args[5] = tf->tf_ebp;	/* Unconfirmed */
713 	*params = NULL;		/* no copyin */
714 }
715 
716 
717 
718 /*
719  * Dump core, into a file named as described in the comments for
720  * expand_name(), unless the process was setuid/setgid.
721  */
722 static int
723 linux_aout_coredump(struct thread *td, struct vnode *vp, off_t limit)
724 {
725 	struct proc *p = td->td_proc;
726 	struct ucred *cred = td->td_ucred;
727 	struct vmspace *vm = p->p_vmspace;
728 	char *tempuser;
729 	int error;
730 
731 	if (ctob((uarea_pages + kstack_pages) +
732 	    vm->vm_dsize + vm->vm_ssize) >= limit)
733 		return (EFAULT);
734 	tempuser = malloc(ctob(uarea_pages + kstack_pages), M_TEMP,
735 	    M_WAITOK | M_ZERO);
736 	if (tempuser == NULL)
737 		return (ENOMEM);
738 	PROC_LOCK(p);
739 	fill_kinfo_proc(p, &p->p_uarea->u_kproc);
740 	PROC_UNLOCK(p);
741 	bcopy(p->p_uarea, tempuser, sizeof(struct user));
742 	bcopy(td->td_frame,
743 	    tempuser + ctob(uarea_pages) +
744 	    ((caddr_t)td->td_frame - (caddr_t)td->td_kstack),
745 	    sizeof(struct trapframe));
746 	error = vn_rdwr(UIO_WRITE, vp, (caddr_t)tempuser,
747 	    ctob(uarea_pages + kstack_pages),
748 	    (off_t)0, UIO_SYSSPACE, IO_UNIT, cred, NOCRED,
749 	    (int *)NULL, td);
750 	free(tempuser, M_TEMP);
751 	if (error == 0)
752 		error = vn_rdwr(UIO_WRITE, vp, vm->vm_daddr,
753 		    (int)ctob(vm->vm_dsize),
754 		    (off_t)ctob(uarea_pages + kstack_pages), UIO_USERSPACE,
755 		    IO_UNIT | IO_DIRECT, cred, NOCRED, (int *) NULL, td);
756 	if (error == 0)
757 		error = vn_rdwr_inchunks(UIO_WRITE, vp,
758 		    (caddr_t)trunc_page(USRSTACK - ctob(vm->vm_ssize)),
759 		    round_page(ctob(vm->vm_ssize)),
760 		    (off_t)ctob(uarea_pages + kstack_pages) +
761 			ctob(vm->vm_dsize), UIO_USERSPACE,
762 		    IO_UNIT | IO_DIRECT, cred, NOCRED, (int *) NULL, td);
763 	return (error);
764 }
765 /*
766  * If a linux binary is exec'ing something, try this image activator
767  * first.  We override standard shell script execution in order to
768  * be able to modify the interpreter path.  We only do this if a linux
769  * binary is doing the exec, so we do not create an EXEC module for it.
770  */
771 static int	exec_linux_imgact_try(struct image_params *iparams);
772 
773 static int
774 exec_linux_imgact_try(struct image_params *imgp)
775 {
776     const char *head = (const char *)imgp->image_header;
777     int error = -1;
778 
779     /*
780      * The interpreter for shell scripts run from a linux binary needs
781      * to be located in /compat/linux if possible in order to recursively
782      * maintain linux path emulation.
783      */
784     if (((const short *)head)[0] == SHELLMAGIC) {
785 	    /*
786 	     * Run our normal shell image activator.  If it succeeds attempt
787 	     * to use the alternate path for the interpreter.  If an alternate
788 	     * path is found, use our stringspace to store it.
789 	     */
790 	    if ((error = exec_shell_imgact(imgp)) == 0) {
791 		    char *rpath = NULL;
792 
793 		    linux_emul_find(FIRST_THREAD_IN_PROC(imgp->proc), NULL,
794 			imgp->interpreter_name, &rpath, 0);
795 		    if (rpath != imgp->interpreter_name) {
796 			    int len = strlen(rpath) + 1;
797 
798 			    if (len <= MAXSHELLCMDLEN) {
799 				    memcpy(imgp->interpreter_name, rpath, len);
800 			    }
801 			    free(rpath, M_TEMP);
802 		    }
803 	    }
804     }
805     return(error);
806 }
807 
808 struct sysentvec linux_sysvec = {
809 	LINUX_SYS_MAXSYSCALL,
810 	linux_sysent,
811 	0xff,
812 	LINUX_SIGTBLSZ,
813 	bsd_to_linux_signal,
814 	ELAST + 1,
815 	bsd_to_linux_errno,
816 	translate_traps,
817 	linux_fixup,
818 	linux_sendsig,
819 	linux_sigcode,
820 	&linux_szsigcode,
821 	linux_prepsyscall,
822 	"Linux a.out",
823 	linux_aout_coredump,
824 	exec_linux_imgact_try,
825 	LINUX_MINSIGSTKSZ,
826 	PAGE_SIZE,
827 	VM_MIN_ADDRESS,
828 	VM_MAXUSER_ADDRESS,
829 	USRSTACK,
830 	PS_STRINGS,
831 	VM_PROT_ALL,
832 	exec_copyout_strings,
833 	exec_setregs
834 };
835 
836 struct sysentvec elf_linux_sysvec = {
837 	LINUX_SYS_MAXSYSCALL,
838 	linux_sysent,
839 	0xff,
840 	LINUX_SIGTBLSZ,
841 	bsd_to_linux_signal,
842 	ELAST + 1,
843 	bsd_to_linux_errno,
844 	translate_traps,
845 	elf_linux_fixup,
846 	linux_sendsig,
847 	linux_sigcode,
848 	&linux_szsigcode,
849 	linux_prepsyscall,
850 	"Linux ELF",
851 	elf32_coredump,
852 	exec_linux_imgact_try,
853 	LINUX_MINSIGSTKSZ,
854 	PAGE_SIZE,
855 	VM_MIN_ADDRESS,
856 	VM_MAXUSER_ADDRESS,
857 	USRSTACK,
858 	PS_STRINGS,
859 	VM_PROT_ALL,
860 	exec_copyout_strings,
861 	exec_setregs
862 };
863 
864 static Elf32_Brandinfo linux_brand = {
865 					ELFOSABI_LINUX,
866 					EM_386,
867 					"Linux",
868 					"/compat/linux",
869 					"/lib/ld-linux.so.1",
870 					&elf_linux_sysvec
871 				 };
872 
873 static Elf32_Brandinfo linux_glibc2brand = {
874 					ELFOSABI_LINUX,
875 					EM_386,
876 					"Linux",
877 					"/compat/linux",
878 					"/lib/ld-linux.so.2",
879 					&elf_linux_sysvec
880 				 };
881 
882 Elf32_Brandinfo *linux_brandlist[] = {
883 					&linux_brand,
884 					&linux_glibc2brand,
885 					NULL
886 				};
887 
888 static int
889 linux_elf_modevent(module_t mod, int type, void *data)
890 {
891 	Elf32_Brandinfo **brandinfo;
892 	int error;
893 	struct linux_ioctl_handler **lihp;
894 
895 	error = 0;
896 
897 	switch(type) {
898 	case MOD_LOAD:
899 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
900 		     ++brandinfo)
901 			if (elf32_insert_brand_entry(*brandinfo) < 0)
902 				error = EINVAL;
903 		if (error == 0) {
904 			SET_FOREACH(lihp, linux_ioctl_handler_set)
905 				linux_ioctl_register_handler(*lihp);
906 			if (bootverbose)
907 				printf("Linux ELF exec handler installed\n");
908 		} else
909 			printf("cannot insert Linux ELF brand handler\n");
910 		break;
911 	case MOD_UNLOAD:
912 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
913 		     ++brandinfo)
914 			if (elf32_brand_inuse(*brandinfo))
915 				error = EBUSY;
916 		if (error == 0) {
917 			for (brandinfo = &linux_brandlist[0];
918 			     *brandinfo != NULL; ++brandinfo)
919 				if (elf32_remove_brand_entry(*brandinfo) < 0)
920 					error = EINVAL;
921 		}
922 		if (error == 0) {
923 			SET_FOREACH(lihp, linux_ioctl_handler_set)
924 				linux_ioctl_unregister_handler(*lihp);
925 			if (bootverbose)
926 				printf("Linux ELF exec handler removed\n");
927 			linux_mib_destroy();
928 		} else
929 			printf("Could not deinstall ELF interpreter entry\n");
930 		break;
931 	default:
932 		break;
933 	}
934 	return error;
935 }
936 
937 static moduledata_t linux_elf_mod = {
938 	"linuxelf",
939 	linux_elf_modevent,
940 	0
941 };
942 
943 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
944