xref: /freebsd/sys/i386/linux/linux_sysvec.c (revision f9218d3d4fd34f082473b3a021c6d4d109fb47cf)
1 /*-
2  * Copyright (c) 1994-1996 S�ren Schmidt
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software without specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30 
31 /* XXX we use functions that might not exist. */
32 #include "opt_compat.h"
33 
34 #ifndef COMPAT_43
35 #error "Unable to compile Linux-emulator due to missing COMPAT_43 option!"
36 #endif
37 
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/imgact.h>
41 #include <sys/imgact_aout.h>
42 #include <sys/imgact_elf.h>
43 #include <sys/lock.h>
44 #include <sys/malloc.h>
45 #include <sys/mutex.h>
46 #include <sys/proc.h>
47 #include <sys/signalvar.h>
48 #include <sys/syscallsubr.h>
49 #include <sys/sysent.h>
50 #include <sys/sysproto.h>
51 #include <sys/user.h>
52 #include <sys/vnode.h>
53 
54 #include <vm/vm.h>
55 #include <vm/vm_param.h>
56 #include <vm/vm_page.h>
57 #include <vm/vm_extern.h>
58 #include <sys/exec.h>
59 #include <sys/kernel.h>
60 #include <sys/module.h>
61 #include <machine/cpu.h>
62 #include <machine/md_var.h>
63 #include <sys/mutex.h>
64 
65 #include <vm/vm.h>
66 #include <vm/vm_param.h>
67 #include <vm/pmap.h>
68 #include <vm/vm_map.h>
69 #include <vm/vm_object.h>
70 
71 #include <i386/linux/linux.h>
72 #include <i386/linux/linux_proto.h>
73 #include <compat/linux/linux_signal.h>
74 #include <compat/linux/linux_util.h>
75 
76 MODULE_VERSION(linux, 1);
77 MODULE_DEPEND(linux, sysvmsg, 1, 1, 1);
78 MODULE_DEPEND(linux, sysvsem, 1, 1, 1);
79 MODULE_DEPEND(linux, sysvshm, 1, 1, 1);
80 
81 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
82 
83 #if BYTE_ORDER == LITTLE_ENDIAN
84 #define SHELLMAGIC      0x2123 /* #! */
85 #else
86 #define SHELLMAGIC      0x2321
87 #endif
88 
89 /*
90  * Allow the sendsig functions to use the ldebug() facility
91  * even though they are not syscalls themselves. Map them
92  * to syscall 0. This is slightly less bogus than using
93  * ldebug(sigreturn).
94  */
95 #define	LINUX_SYS_linux_rt_sendsig	0
96 #define	LINUX_SYS_linux_sendsig		0
97 
98 extern char linux_sigcode[];
99 extern int linux_szsigcode;
100 
101 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
102 
103 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
104 
105 static int	linux_fixup(register_t **stack_base,
106 		    struct image_params *iparams);
107 static int	elf_linux_fixup(register_t **stack_base,
108 		    struct image_params *iparams);
109 static void	linux_prepsyscall(struct trapframe *tf, int *args, u_int *code,
110 		    caddr_t *params);
111 static void     linux_sendsig(sig_t catcher, int sig, sigset_t *mask,
112 		    u_long code);
113 
114 /*
115  * Linux syscalls return negative errno's, we do positive and map them
116  */
117 static int bsd_to_linux_errno[ELAST + 1] = {
118 	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
119 	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
120 	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
121 	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
122 	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
123 	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
124 	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
125 	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
126 	-6, -6, -43, -42, -75, -6, -84
127 };
128 
129 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
130 	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
131 	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
132 	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
133 	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
134 	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
135 	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
136 	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
137 	0, LINUX_SIGUSR1, LINUX_SIGUSR2
138 };
139 
140 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
141 	SIGHUP, SIGINT, SIGQUIT, SIGILL,
142 	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
143 	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
144 	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
145 	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
146 	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
147 	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
148 	SIGIO, SIGURG, SIGSYS
149 };
150 
151 #define LINUX_T_UNKNOWN  255
152 static int _bsd_to_linux_trapcode[] = {
153 	LINUX_T_UNKNOWN,	/* 0 */
154 	6,			/* 1  T_PRIVINFLT */
155 	LINUX_T_UNKNOWN,	/* 2 */
156 	3,			/* 3  T_BPTFLT */
157 	LINUX_T_UNKNOWN,	/* 4 */
158 	LINUX_T_UNKNOWN,	/* 5 */
159 	16,			/* 6  T_ARITHTRAP */
160 	254,			/* 7  T_ASTFLT */
161 	LINUX_T_UNKNOWN,	/* 8 */
162 	13,			/* 9  T_PROTFLT */
163 	1,			/* 10 T_TRCTRAP */
164 	LINUX_T_UNKNOWN,	/* 11 */
165 	14,			/* 12 T_PAGEFLT */
166 	LINUX_T_UNKNOWN,	/* 13 */
167 	17,			/* 14 T_ALIGNFLT */
168 	LINUX_T_UNKNOWN,	/* 15 */
169 	LINUX_T_UNKNOWN,	/* 16 */
170 	LINUX_T_UNKNOWN,	/* 17 */
171 	0,			/* 18 T_DIVIDE */
172 	2,			/* 19 T_NMI */
173 	4,			/* 20 T_OFLOW */
174 	5,			/* 21 T_BOUND */
175 	7,			/* 22 T_DNA */
176 	8,			/* 23 T_DOUBLEFLT */
177 	9,			/* 24 T_FPOPFLT */
178 	10,			/* 25 T_TSSFLT */
179 	11,			/* 26 T_SEGNPFLT */
180 	12,			/* 27 T_STKFLT */
181 	18,			/* 28 T_MCHK */
182 	19,			/* 29 T_XMMFLT */
183 	15			/* 30 T_RESERVED */
184 };
185 #define bsd_to_linux_trapcode(code) \
186     ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
187      _bsd_to_linux_trapcode[(code)]: \
188      LINUX_T_UNKNOWN)
189 
190 /*
191  * If FreeBSD & Linux have a difference of opinion about what a trap
192  * means, deal with it here.
193  *
194  * MPSAFE
195  */
196 static int
197 translate_traps(int signal, int trap_code)
198 {
199 	if (signal != SIGBUS)
200 		return signal;
201 	switch (trap_code) {
202 	case T_PROTFLT:
203 	case T_TSSFLT:
204 	case T_DOUBLEFLT:
205 	case T_PAGEFLT:
206 		return SIGSEGV;
207 	default:
208 		return signal;
209 	}
210 }
211 
212 static int
213 linux_fixup(register_t **stack_base, struct image_params *imgp)
214 {
215 	register_t *argv, *envp;
216 
217 	argv = *stack_base;
218 	envp = *stack_base + (imgp->argc + 1);
219 	(*stack_base)--;
220 	**stack_base = (intptr_t)(void *)envp;
221 	(*stack_base)--;
222 	**stack_base = (intptr_t)(void *)argv;
223 	(*stack_base)--;
224 	**stack_base = imgp->argc;
225 	return 0;
226 }
227 
228 static int
229 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
230 {
231 	Elf32_Auxargs *args = (Elf32_Auxargs *)imgp->auxargs;
232 	register_t *pos;
233 
234 	pos = *stack_base + (imgp->argc + imgp->envc + 2);
235 
236 	if (args->trace)
237 		AUXARGS_ENTRY(pos, AT_DEBUG, 1);
238 	if (args->execfd != -1)
239 		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
240 	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
241 	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
242 	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
243 	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
244 	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
245 	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
246 	AUXARGS_ENTRY(pos, AT_BASE, args->base);
247 	PROC_LOCK(imgp->proc);
248 	AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
249 	AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
250 	AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
251 	AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
252 	PROC_UNLOCK(imgp->proc);
253 	AUXARGS_ENTRY(pos, AT_NULL, 0);
254 
255 	free(imgp->auxargs, M_TEMP);
256 	imgp->auxargs = NULL;
257 
258 	(*stack_base)--;
259 	**stack_base = (long)imgp->argc;
260 	return 0;
261 }
262 
263 extern int _ucodesel, _udatasel;
264 extern unsigned long linux_sznonrtsigcode;
265 
266 static void
267 linux_rt_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
268 {
269 	struct thread *td = curthread;
270 	struct proc *p = td->td_proc;
271 	struct trapframe *regs;
272 	struct l_rt_sigframe *fp, frame;
273 	int oonstack;
274 
275 	PROC_LOCK_ASSERT(p, MA_OWNED);
276 	regs = td->td_frame;
277 	oonstack = sigonstack(regs->tf_esp);
278 
279 #ifdef DEBUG
280 	if (ldebug(rt_sendsig))
281 		printf(ARGS(rt_sendsig, "%p, %d, %p, %lu"),
282 		    catcher, sig, (void*)mask, code);
283 #endif
284 	/*
285 	 * Allocate space for the signal handler context.
286 	 */
287 	if ((p->p_flag & P_ALTSTACK) && !oonstack &&
288 	    SIGISMEMBER(p->p_sigacts->ps_sigonstack, sig)) {
289 		fp = (struct l_rt_sigframe *)(p->p_sigstk.ss_sp +
290 		    p->p_sigstk.ss_size - sizeof(struct l_rt_sigframe));
291 	} else
292 		fp = (struct l_rt_sigframe *)regs->tf_esp - 1;
293 	PROC_UNLOCK(p);
294 
295 	/*
296 	 * Build the argument list for the signal handler.
297 	 */
298 	if (p->p_sysent->sv_sigtbl)
299 		if (sig <= p->p_sysent->sv_sigsize)
300 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
301 
302 	bzero(&frame, sizeof(frame));
303 
304 	frame.sf_handler = catcher;
305 	frame.sf_sig = sig;
306 	frame.sf_siginfo = &fp->sf_si;
307 	frame.sf_ucontext = &fp->sf_sc;
308 
309 	/* Fill in POSIX parts */
310 	frame.sf_si.lsi_signo = sig;
311 	frame.sf_si.lsi_code = code;
312 	frame.sf_si.lsi_addr = (void *)regs->tf_err;
313 
314 	/*
315 	 * Build the signal context to be used by sigreturn.
316 	 */
317 	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
318 	frame.sf_sc.uc_link = NULL;		/* XXX ??? */
319 
320 	PROC_LOCK(p);
321 	frame.sf_sc.uc_stack.ss_sp = p->p_sigstk.ss_sp;
322 	frame.sf_sc.uc_stack.ss_size = p->p_sigstk.ss_size;
323 	frame.sf_sc.uc_stack.ss_flags = (p->p_flag & P_ALTSTACK)
324 	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
325 	PROC_UNLOCK(p);
326 
327 	bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
328 
329 	frame.sf_sc.uc_mcontext.sc_mask   = frame.sf_sc.uc_sigmask.__bits[0];
330 	frame.sf_sc.uc_mcontext.sc_gs     = rgs();
331 	frame.sf_sc.uc_mcontext.sc_fs     = regs->tf_fs;
332 	frame.sf_sc.uc_mcontext.sc_es     = regs->tf_es;
333 	frame.sf_sc.uc_mcontext.sc_ds     = regs->tf_ds;
334 	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_edi;
335 	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_esi;
336 	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_ebp;
337 	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_ebx;
338 	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_edx;
339 	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_ecx;
340 	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_eax;
341 	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_eip;
342 	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
343 	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
344 	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
345 	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
346 	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
347 	frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
348 
349 #ifdef DEBUG
350 	if (ldebug(rt_sendsig))
351 		printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
352 		    frame.sf_sc.uc_stack.ss_flags, p->p_sigstk.ss_sp,
353 		    p->p_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
354 #endif
355 
356 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
357 		/*
358 		 * Process has trashed its stack; give it an illegal
359 		 * instruction to halt it in its tracks.
360 		 */
361 #ifdef DEBUG
362 		if (ldebug(rt_sendsig))
363 			printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
364 			    fp, oonstack);
365 #endif
366 		PROC_LOCK(p);
367 		sigexit(td, SIGILL);
368 	}
369 
370 	/*
371 	 * Build context to run handler in.
372 	 */
373 	regs->tf_esp = (int)fp;
374 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
375 	    linux_sznonrtsigcode;
376 	regs->tf_eflags &= ~(PSL_T | PSL_VM);
377 	regs->tf_cs = _ucodesel;
378 	regs->tf_ds = _udatasel;
379 	regs->tf_es = _udatasel;
380 	regs->tf_fs = _udatasel;
381 	regs->tf_ss = _udatasel;
382 	PROC_LOCK(p);
383 }
384 
385 
386 /*
387  * Send an interrupt to process.
388  *
389  * Stack is set up to allow sigcode stored
390  * in u. to call routine, followed by kcall
391  * to sigreturn routine below.  After sigreturn
392  * resets the signal mask, the stack, and the
393  * frame pointer, it returns to the user
394  * specified pc, psl.
395  */
396 static void
397 linux_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
398 {
399 	struct thread *td = curthread;
400 	struct proc *p = td->td_proc;
401 	struct trapframe *regs;
402 	struct l_sigframe *fp, frame;
403 	l_sigset_t lmask;
404 	int oonstack, i;
405 
406 	PROC_LOCK_ASSERT(p, MA_OWNED);
407 	if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) {
408 		/* Signal handler installed with SA_SIGINFO. */
409 		linux_rt_sendsig(catcher, sig, mask, code);
410 		return;
411 	}
412 
413 	regs = td->td_frame;
414 	oonstack = sigonstack(regs->tf_esp);
415 
416 #ifdef DEBUG
417 	if (ldebug(sendsig))
418 		printf(ARGS(sendsig, "%p, %d, %p, %lu"),
419 		    catcher, sig, (void*)mask, code);
420 #endif
421 
422 	/*
423 	 * Allocate space for the signal handler context.
424 	 */
425 	if ((p->p_flag & P_ALTSTACK) && !oonstack &&
426 	    SIGISMEMBER(p->p_sigacts->ps_sigonstack, sig)) {
427 		fp = (struct l_sigframe *)(p->p_sigstk.ss_sp +
428 		    p->p_sigstk.ss_size - sizeof(struct l_sigframe));
429 	} else
430 		fp = (struct l_sigframe *)regs->tf_esp - 1;
431 	PROC_UNLOCK(p);
432 
433 	/*
434 	 * Build the argument list for the signal handler.
435 	 */
436 	if (p->p_sysent->sv_sigtbl)
437 		if (sig <= p->p_sysent->sv_sigsize)
438 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
439 
440 	bzero(&frame, sizeof(frame));
441 
442 	frame.sf_handler = catcher;
443 	frame.sf_sig = sig;
444 
445 	bsd_to_linux_sigset(mask, &lmask);
446 
447 	/*
448 	 * Build the signal context to be used by sigreturn.
449 	 */
450 	frame.sf_sc.sc_mask   = lmask.__bits[0];
451 	frame.sf_sc.sc_gs     = rgs();
452 	frame.sf_sc.sc_fs     = regs->tf_fs;
453 	frame.sf_sc.sc_es     = regs->tf_es;
454 	frame.sf_sc.sc_ds     = regs->tf_ds;
455 	frame.sf_sc.sc_edi    = regs->tf_edi;
456 	frame.sf_sc.sc_esi    = regs->tf_esi;
457 	frame.sf_sc.sc_ebp    = regs->tf_ebp;
458 	frame.sf_sc.sc_ebx    = regs->tf_ebx;
459 	frame.sf_sc.sc_edx    = regs->tf_edx;
460 	frame.sf_sc.sc_ecx    = regs->tf_ecx;
461 	frame.sf_sc.sc_eax    = regs->tf_eax;
462 	frame.sf_sc.sc_eip    = regs->tf_eip;
463 	frame.sf_sc.sc_cs     = regs->tf_cs;
464 	frame.sf_sc.sc_eflags = regs->tf_eflags;
465 	frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
466 	frame.sf_sc.sc_ss     = regs->tf_ss;
467 	frame.sf_sc.sc_err    = regs->tf_err;
468 	frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code);
469 
470 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
471 		frame.sf_extramask[i] = lmask.__bits[i+1];
472 
473 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
474 		/*
475 		 * Process has trashed its stack; give it an illegal
476 		 * instruction to halt it in its tracks.
477 		 */
478 		PROC_LOCK(p);
479 		sigexit(td, SIGILL);
480 	}
481 
482 	/*
483 	 * Build context to run handler in.
484 	 */
485 	regs->tf_esp = (int)fp;
486 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
487 	regs->tf_eflags &= ~(PSL_T | PSL_VM);
488 	regs->tf_cs = _ucodesel;
489 	regs->tf_ds = _udatasel;
490 	regs->tf_es = _udatasel;
491 	regs->tf_fs = _udatasel;
492 	regs->tf_ss = _udatasel;
493 	PROC_LOCK(p);
494 }
495 
496 /*
497  * System call to cleanup state after a signal
498  * has been taken.  Reset signal mask and
499  * stack state from context left by sendsig (above).
500  * Return to previous pc and psl as specified by
501  * context left by sendsig. Check carefully to
502  * make sure that the user has not modified the
503  * psl to gain improper privileges or to cause
504  * a machine fault.
505  */
506 int
507 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
508 {
509 	struct proc *p = td->td_proc;
510 	struct l_sigframe frame;
511 	struct trapframe *regs;
512 	l_sigset_t lmask;
513 	int eflags, i;
514 
515 	regs = td->td_frame;
516 
517 #ifdef DEBUG
518 	if (ldebug(sigreturn))
519 		printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
520 #endif
521 	/*
522 	 * The trampoline code hands us the sigframe.
523 	 * It is unsafe to keep track of it ourselves, in the event that a
524 	 * program jumps out of a signal handler.
525 	 */
526 	if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
527 		return (EFAULT);
528 
529 	/*
530 	 * Check for security violations.
531 	 */
532 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
533 	eflags = frame.sf_sc.sc_eflags;
534 	/*
535 	 * XXX do allow users to change the privileged flag PSL_RF.  The
536 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
537 	 * sometimes set it there too.  tf_eflags is kept in the signal
538 	 * context during signal handling and there is no other place
539 	 * to remember it, so the PSL_RF bit may be corrupted by the
540 	 * signal handler without us knowing.  Corruption of the PSL_RF
541 	 * bit at worst causes one more or one less debugger trap, so
542 	 * allowing it is fairly harmless.
543 	 */
544 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF))
545 		return(EINVAL);
546 
547 	/*
548 	 * Don't allow users to load a valid privileged %cs.  Let the
549 	 * hardware check for invalid selectors, excess privilege in
550 	 * other selectors, invalid %eip's and invalid %esp's.
551 	 */
552 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
553 	if (!CS_SECURE(frame.sf_sc.sc_cs)) {
554 		trapsignal(p, SIGBUS, T_PROTFLT);
555 		return(EINVAL);
556 	}
557 
558 	lmask.__bits[0] = frame.sf_sc.sc_mask;
559 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
560 		lmask.__bits[i+1] = frame.sf_extramask[i];
561 	PROC_LOCK(p);
562 	linux_to_bsd_sigset(&lmask, &p->p_sigmask);
563 	SIG_CANTMASK(p->p_sigmask);
564 	signotify(p);
565 	PROC_UNLOCK(p);
566 
567 	/*
568 	 * Restore signal context.
569 	 */
570 	/* %gs was restored by the trampoline. */
571 	regs->tf_fs     = frame.sf_sc.sc_fs;
572 	regs->tf_es     = frame.sf_sc.sc_es;
573 	regs->tf_ds     = frame.sf_sc.sc_ds;
574 	regs->tf_edi    = frame.sf_sc.sc_edi;
575 	regs->tf_esi    = frame.sf_sc.sc_esi;
576 	regs->tf_ebp    = frame.sf_sc.sc_ebp;
577 	regs->tf_ebx    = frame.sf_sc.sc_ebx;
578 	regs->tf_edx    = frame.sf_sc.sc_edx;
579 	regs->tf_ecx    = frame.sf_sc.sc_ecx;
580 	regs->tf_eax    = frame.sf_sc.sc_eax;
581 	regs->tf_eip    = frame.sf_sc.sc_eip;
582 	regs->tf_cs     = frame.sf_sc.sc_cs;
583 	regs->tf_eflags = eflags;
584 	regs->tf_esp    = frame.sf_sc.sc_esp_at_signal;
585 	regs->tf_ss     = frame.sf_sc.sc_ss;
586 
587 	return (EJUSTRETURN);
588 }
589 
590 /*
591  * System call to cleanup state after a signal
592  * has been taken.  Reset signal mask and
593  * stack state from context left by rt_sendsig (above).
594  * Return to previous pc and psl as specified by
595  * context left by sendsig. Check carefully to
596  * make sure that the user has not modified the
597  * psl to gain improper privileges or to cause
598  * a machine fault.
599  */
600 int
601 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
602 {
603 	struct proc *p = td->td_proc;
604 	struct l_ucontext uc;
605 	struct l_sigcontext *context;
606 	l_stack_t *lss;
607 	stack_t ss;
608 	struct trapframe *regs;
609 	int eflags;
610 
611 	regs = td->td_frame;
612 
613 #ifdef DEBUG
614 	if (ldebug(rt_sigreturn))
615 		printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
616 #endif
617 	/*
618 	 * The trampoline code hands us the ucontext.
619 	 * It is unsafe to keep track of it ourselves, in the event that a
620 	 * program jumps out of a signal handler.
621 	 */
622 	if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
623 		return (EFAULT);
624 
625 	context = &uc.uc_mcontext;
626 
627 	/*
628 	 * Check for security violations.
629 	 */
630 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
631 	eflags = context->sc_eflags;
632 	/*
633 	 * XXX do allow users to change the privileged flag PSL_RF.  The
634 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
635 	 * sometimes set it there too.  tf_eflags is kept in the signal
636 	 * context during signal handling and there is no other place
637 	 * to remember it, so the PSL_RF bit may be corrupted by the
638 	 * signal handler without us knowing.  Corruption of the PSL_RF
639 	 * bit at worst causes one more or one less debugger trap, so
640 	 * allowing it is fairly harmless.
641 	 */
642 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF))
643 		return(EINVAL);
644 
645 	/*
646 	 * Don't allow users to load a valid privileged %cs.  Let the
647 	 * hardware check for invalid selectors, excess privilege in
648 	 * other selectors, invalid %eip's and invalid %esp's.
649 	 */
650 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
651 	if (!CS_SECURE(context->sc_cs)) {
652 		trapsignal(p, SIGBUS, T_PROTFLT);
653 		return(EINVAL);
654 	}
655 
656 	PROC_LOCK(p);
657 	linux_to_bsd_sigset(&uc.uc_sigmask, &p->p_sigmask);
658 	SIG_CANTMASK(p->p_sigmask);
659 	signotify(p);
660 	PROC_UNLOCK(p);
661 
662 	/*
663 	 * Restore signal context
664 	 */
665 	/* %gs was restored by the trampoline. */
666 	regs->tf_fs     = context->sc_fs;
667 	regs->tf_es     = context->sc_es;
668 	regs->tf_ds     = context->sc_ds;
669 	regs->tf_edi    = context->sc_edi;
670 	regs->tf_esi    = context->sc_esi;
671 	regs->tf_ebp    = context->sc_ebp;
672 	regs->tf_ebx    = context->sc_ebx;
673 	regs->tf_edx    = context->sc_edx;
674 	regs->tf_ecx    = context->sc_ecx;
675 	regs->tf_eax    = context->sc_eax;
676 	regs->tf_eip    = context->sc_eip;
677 	regs->tf_cs     = context->sc_cs;
678 	regs->tf_eflags = eflags;
679 	regs->tf_esp    = context->sc_esp_at_signal;
680 	regs->tf_ss     = context->sc_ss;
681 
682 	/*
683 	 * call sigaltstack & ignore results..
684 	 */
685 	lss = &uc.uc_stack;
686 	ss.ss_sp = lss->ss_sp;
687 	ss.ss_size = lss->ss_size;
688 	ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
689 
690 #ifdef DEBUG
691 	if (ldebug(rt_sigreturn))
692 		printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
693 		    ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
694 #endif
695 	(void)kern_sigaltstack(td, &ss, NULL);
696 
697 	return (EJUSTRETURN);
698 }
699 
700 /*
701  * MPSAFE
702  */
703 static void
704 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
705 {
706 	args[0] = tf->tf_ebx;
707 	args[1] = tf->tf_ecx;
708 	args[2] = tf->tf_edx;
709 	args[3] = tf->tf_esi;
710 	args[4] = tf->tf_edi;
711 	args[5] = tf->tf_ebp;	/* Unconfirmed */
712 	*params = NULL;		/* no copyin */
713 }
714 
715 
716 
717 /*
718  * Dump core, into a file named as described in the comments for
719  * expand_name(), unless the process was setuid/setgid.
720  */
721 static int
722 linux_aout_coredump(struct thread *td, struct vnode *vp, off_t limit)
723 {
724 	struct proc *p = td->td_proc;
725 	struct ucred *cred = td->td_ucred;
726 	struct vmspace *vm = p->p_vmspace;
727 	char *tempuser;
728 	int error;
729 
730 	if (ctob((uarea_pages + kstack_pages) +
731 	    vm->vm_dsize + vm->vm_ssize) >= limit)
732 		return (EFAULT);
733 	tempuser = malloc(ctob(uarea_pages + kstack_pages), M_TEMP,
734 	    M_WAITOK | M_ZERO);
735 	if (tempuser == NULL)
736 		return (ENOMEM);
737 	PROC_LOCK(p);
738 	fill_kinfo_proc(p, &p->p_uarea->u_kproc);
739 	PROC_UNLOCK(p);
740 	bcopy(p->p_uarea, tempuser, sizeof(struct user));
741 	bcopy(td->td_frame,
742 	    tempuser + ctob(uarea_pages) +
743 	    ((caddr_t)td->td_frame - (caddr_t)td->td_kstack),
744 	    sizeof(struct trapframe));
745 	error = vn_rdwr(UIO_WRITE, vp, (caddr_t)tempuser,
746 	    ctob(uarea_pages + kstack_pages),
747 	    (off_t)0, UIO_SYSSPACE, IO_UNIT, cred, NOCRED,
748 	    (int *)NULL, td);
749 	free(tempuser, M_TEMP);
750 	if (error == 0)
751 		error = vn_rdwr(UIO_WRITE, vp, vm->vm_daddr,
752 		    (int)ctob(vm->vm_dsize),
753 		    (off_t)ctob(uarea_pages + kstack_pages), UIO_USERSPACE,
754 		    IO_UNIT | IO_DIRECT, cred, NOCRED, (int *) NULL, td);
755 	if (error == 0)
756 		error = vn_rdwr_inchunks(UIO_WRITE, vp,
757 		    (caddr_t)trunc_page(USRSTACK - ctob(vm->vm_ssize)),
758 		    round_page(ctob(vm->vm_ssize)),
759 		    (off_t)ctob(uarea_pages + kstack_pages) +
760 			ctob(vm->vm_dsize), UIO_USERSPACE,
761 		    IO_UNIT | IO_DIRECT, cred, NOCRED, (int *) NULL, td);
762 	return (error);
763 }
764 /*
765  * If a linux binary is exec'ing something, try this image activator
766  * first.  We override standard shell script execution in order to
767  * be able to modify the interpreter path.  We only do this if a linux
768  * binary is doing the exec, so we do not create an EXEC module for it.
769  */
770 static int	exec_linux_imgact_try(struct image_params *iparams);
771 
772 static int
773 exec_linux_imgact_try(struct image_params *imgp)
774 {
775     const char *head = (const char *)imgp->image_header;
776     int error = -1;
777 
778     /*
779      * The interpreter for shell scripts run from a linux binary needs
780      * to be located in /compat/linux if possible in order to recursively
781      * maintain linux path emulation.
782      */
783     if (((const short *)head)[0] == SHELLMAGIC) {
784 	    /*
785 	     * Run our normal shell image activator.  If it succeeds attempt
786 	     * to use the alternate path for the interpreter.  If an alternate
787 	     * path is found, use our stringspace to store it.
788 	     */
789 	    if ((error = exec_shell_imgact(imgp)) == 0) {
790 		    char *rpath = NULL;
791 
792 		    linux_emul_find(FIRST_THREAD_IN_PROC(imgp->proc), NULL,
793 			imgp->interpreter_name, &rpath, 0);
794 		    if (rpath != imgp->interpreter_name) {
795 			    int len = strlen(rpath) + 1;
796 
797 			    if (len <= MAXSHELLCMDLEN) {
798 				    memcpy(imgp->interpreter_name, rpath, len);
799 			    }
800 			    free(rpath, M_TEMP);
801 		    }
802 	    }
803     }
804     return(error);
805 }
806 
807 struct sysentvec linux_sysvec = {
808 	LINUX_SYS_MAXSYSCALL,
809 	linux_sysent,
810 	0xff,
811 	LINUX_SIGTBLSZ,
812 	bsd_to_linux_signal,
813 	ELAST + 1,
814 	bsd_to_linux_errno,
815 	translate_traps,
816 	linux_fixup,
817 	linux_sendsig,
818 	linux_sigcode,
819 	&linux_szsigcode,
820 	linux_prepsyscall,
821 	"Linux a.out",
822 	linux_aout_coredump,
823 	exec_linux_imgact_try,
824 	LINUX_MINSIGSTKSZ,
825 	PAGE_SIZE,
826 	VM_MIN_ADDRESS,
827 	VM_MAXUSER_ADDRESS,
828 	USRSTACK,
829 	PS_STRINGS,
830 	VM_PROT_ALL,
831 	exec_copyout_strings,
832 	exec_setregs
833 };
834 
835 struct sysentvec elf_linux_sysvec = {
836 	LINUX_SYS_MAXSYSCALL,
837 	linux_sysent,
838 	0xff,
839 	LINUX_SIGTBLSZ,
840 	bsd_to_linux_signal,
841 	ELAST + 1,
842 	bsd_to_linux_errno,
843 	translate_traps,
844 	elf_linux_fixup,
845 	linux_sendsig,
846 	linux_sigcode,
847 	&linux_szsigcode,
848 	linux_prepsyscall,
849 	"Linux ELF",
850 	elf32_coredump,
851 	exec_linux_imgact_try,
852 	LINUX_MINSIGSTKSZ,
853 	PAGE_SIZE,
854 	VM_MIN_ADDRESS,
855 	VM_MAXUSER_ADDRESS,
856 	USRSTACK,
857 	PS_STRINGS,
858 	VM_PROT_ALL,
859 	exec_copyout_strings,
860 	exec_setregs
861 };
862 
863 static Elf32_Brandinfo linux_brand = {
864 					ELFOSABI_LINUX,
865 					EM_386,
866 					"Linux",
867 					"/compat/linux",
868 					"/lib/ld-linux.so.1",
869 					&elf_linux_sysvec
870 				 };
871 
872 static Elf32_Brandinfo linux_glibc2brand = {
873 					ELFOSABI_LINUX,
874 					EM_386,
875 					"Linux",
876 					"/compat/linux",
877 					"/lib/ld-linux.so.2",
878 					&elf_linux_sysvec
879 				 };
880 
881 Elf32_Brandinfo *linux_brandlist[] = {
882 					&linux_brand,
883 					&linux_glibc2brand,
884 					NULL
885 				};
886 
887 static int
888 linux_elf_modevent(module_t mod, int type, void *data)
889 {
890 	Elf32_Brandinfo **brandinfo;
891 	int error;
892 	struct linux_ioctl_handler **lihp;
893 
894 	error = 0;
895 
896 	switch(type) {
897 	case MOD_LOAD:
898 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
899 		     ++brandinfo)
900 			if (elf32_insert_brand_entry(*brandinfo) < 0)
901 				error = EINVAL;
902 		if (error == 0) {
903 			SET_FOREACH(lihp, linux_ioctl_handler_set)
904 				linux_ioctl_register_handler(*lihp);
905 			if (bootverbose)
906 				printf("Linux ELF exec handler installed\n");
907 		} else
908 			printf("cannot insert Linux ELF brand handler\n");
909 		break;
910 	case MOD_UNLOAD:
911 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
912 		     ++brandinfo)
913 			if (elf32_brand_inuse(*brandinfo))
914 				error = EBUSY;
915 		if (error == 0) {
916 			for (brandinfo = &linux_brandlist[0];
917 			     *brandinfo != NULL; ++brandinfo)
918 				if (elf32_remove_brand_entry(*brandinfo) < 0)
919 					error = EINVAL;
920 		}
921 		if (error == 0) {
922 			SET_FOREACH(lihp, linux_ioctl_handler_set)
923 				linux_ioctl_unregister_handler(*lihp);
924 			if (bootverbose)
925 				printf("Linux ELF exec handler removed\n");
926 		} else
927 			printf("Could not deinstall ELF interpreter entry\n");
928 		break;
929 	default:
930 		break;
931 	}
932 	return error;
933 }
934 
935 static moduledata_t linux_elf_mod = {
936 	"linuxelf",
937 	linux_elf_modevent,
938 	0
939 };
940 
941 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
942