xref: /freebsd/sys/i386/linux/linux_sysvec.c (revision aed23bc4ceaab17b4717b9f4dab300cd372f801b)
1 /*-
2  * Copyright (c) 1994-1996 S�ren Schmidt
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software without specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30 
31 /* XXX we use functions that might not exist. */
32 #include "opt_compat.h"
33 
34 #ifndef COMPAT_43
35 #error "Unable to compile Linux-emulator due to missing COMPAT_43 option!"
36 #endif
37 
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/imgact.h>
41 #include <sys/imgact_aout.h>
42 #include <sys/imgact_elf.h>
43 #include <sys/lock.h>
44 #include <sys/malloc.h>
45 #include <sys/mutex.h>
46 #include <sys/proc.h>
47 #include <sys/signalvar.h>
48 #include <sys/syscallsubr.h>
49 #include <sys/sysent.h>
50 #include <sys/sysproto.h>
51 #include <sys/user.h>
52 #include <sys/vnode.h>
53 
54 #include <vm/vm.h>
55 #include <vm/vm_param.h>
56 #include <vm/vm_page.h>
57 #include <vm/vm_extern.h>
58 #include <sys/exec.h>
59 #include <sys/kernel.h>
60 #include <sys/module.h>
61 #include <machine/cpu.h>
62 #include <machine/md_var.h>
63 #include <sys/mutex.h>
64 
65 #include <vm/vm.h>
66 #include <vm/vm_param.h>
67 #include <vm/pmap.h>
68 #include <vm/vm_map.h>
69 #include <vm/vm_object.h>
70 
71 #include <i386/linux/linux.h>
72 #include <i386/linux/linux_proto.h>
73 #include <compat/linux/linux_signal.h>
74 #include <compat/linux/linux_util.h>
75 
76 MODULE_VERSION(linux, 1);
77 MODULE_DEPEND(linux, sysvmsg, 1, 1, 1);
78 MODULE_DEPEND(linux, sysvsem, 1, 1, 1);
79 MODULE_DEPEND(linux, sysvshm, 1, 1, 1);
80 
81 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
82 
83 #if BYTE_ORDER == LITTLE_ENDIAN
84 #define SHELLMAGIC      0x2123 /* #! */
85 #else
86 #define SHELLMAGIC      0x2321
87 #endif
88 
89 /*
90  * Allow the sendsig functions to use the ldebug() facility
91  * even though they are not syscalls themselves. Map them
92  * to syscall 0. This is slightly less bogus than using
93  * ldebug(sigreturn).
94  */
95 #define	LINUX_SYS_linux_rt_sendsig	0
96 #define	LINUX_SYS_linux_sendsig		0
97 
98 extern char linux_sigcode[];
99 extern int linux_szsigcode;
100 
101 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
102 
103 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
104 
105 static int	linux_fixup(register_t **stack_base,
106 		    struct image_params *iparams);
107 static int	elf_linux_fixup(register_t **stack_base,
108 		    struct image_params *iparams);
109 static void	linux_prepsyscall(struct trapframe *tf, int *args, u_int *code,
110 		    caddr_t *params);
111 static void     linux_sendsig(sig_t catcher, int sig, sigset_t *mask,
112 		    u_long code);
113 
114 /*
115  * Linux syscalls return negative errno's, we do positive and map them
116  */
117 static int bsd_to_linux_errno[ELAST + 1] = {
118   	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
119  	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
120  	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
121  	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
122  	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
123 	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
124 	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
125 	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
126   	-6, -6, -43, -42, -75, -6, -84
127 };
128 
129 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
130 	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
131 	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
132 	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, 0,
133 	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
134 	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
135 	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
136 	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
137 	0, LINUX_SIGUSR1, LINUX_SIGUSR2
138 };
139 
140 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
141 	SIGHUP, SIGINT, SIGQUIT, SIGILL,
142 	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
143 	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
144 	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
145 	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
146 	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
147 	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
148 	SIGIO, SIGURG, 0
149 };
150 
151 #define LINUX_T_UNKNOWN  255
152 static int _bsd_to_linux_trapcode[] = {
153 	LINUX_T_UNKNOWN,	/* 0 */
154 	6,			/* 1  T_PRIVINFLT */
155 	LINUX_T_UNKNOWN,	/* 2 */
156 	3,			/* 3  T_BPTFLT */
157 	LINUX_T_UNKNOWN,	/* 4 */
158 	LINUX_T_UNKNOWN,	/* 5 */
159 	16,			/* 6  T_ARITHTRAP */
160 	254,			/* 7  T_ASTFLT */
161 	LINUX_T_UNKNOWN,	/* 8 */
162 	13,			/* 9  T_PROTFLT */
163 	1,			/* 10 T_TRCTRAP */
164 	LINUX_T_UNKNOWN,	/* 11 */
165 	14,			/* 12 T_PAGEFLT */
166 	LINUX_T_UNKNOWN,	/* 13 */
167 	17,			/* 14 T_ALIGNFLT */
168 	LINUX_T_UNKNOWN,	/* 15 */
169 	LINUX_T_UNKNOWN,	/* 16 */
170 	LINUX_T_UNKNOWN,	/* 17 */
171 	0,			/* 18 T_DIVIDE */
172 	2,			/* 19 T_NMI */
173 	4,			/* 20 T_OFLOW */
174 	5,			/* 21 T_BOUND */
175 	7,			/* 22 T_DNA */
176 	8,			/* 23 T_DOUBLEFLT */
177 	9,			/* 24 T_FPOPFLT */
178 	10,			/* 25 T_TSSFLT */
179 	11,			/* 26 T_SEGNPFLT */
180 	12,			/* 27 T_STKFLT */
181 	18,			/* 28 T_MCHK */
182 	19,			/* 29 T_XMMFLT */
183 	15			/* 30 T_RESERVED */
184 };
185 #define bsd_to_linux_trapcode(code) \
186     ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
187      _bsd_to_linux_trapcode[(code)]: \
188      LINUX_T_UNKNOWN)
189 
190 /*
191  * If FreeBSD & Linux have a difference of opinion about what a trap
192  * means, deal with it here.
193  *
194  * MPSAFE
195  */
196 static int
197 translate_traps(int signal, int trap_code)
198 {
199 	if (signal != SIGBUS)
200 		return signal;
201 	switch (trap_code) {
202 	case T_PROTFLT:
203 	case T_TSSFLT:
204 	case T_DOUBLEFLT:
205 	case T_PAGEFLT:
206 		return SIGSEGV;
207 	default:
208 		return signal;
209 	}
210 }
211 
212 static int
213 linux_fixup(register_t **stack_base, struct image_params *imgp)
214 {
215 	register_t *argv, *envp;
216 
217 	argv = *stack_base;
218 	envp = *stack_base + (imgp->argc + 1);
219 	(*stack_base)--;
220 	**stack_base = (intptr_t)(void *)envp;
221 	(*stack_base)--;
222 	**stack_base = (intptr_t)(void *)argv;
223 	(*stack_base)--;
224 	**stack_base = imgp->argc;
225 	return 0;
226 }
227 
228 static int
229 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
230 {
231 	Elf32_Auxargs *args = (Elf32_Auxargs *)imgp->auxargs;
232 	register_t *pos;
233 
234 	pos = *stack_base + (imgp->argc + imgp->envc + 2);
235 
236 	if (args->trace) {
237 		AUXARGS_ENTRY(pos, AT_DEBUG, 1);
238 	}
239 	if (args->execfd != -1) {
240 		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
241 	}
242 	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
243 	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
244 	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
245 	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
246 	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
247 	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
248 	AUXARGS_ENTRY(pos, AT_BASE, args->base);
249 	PROC_LOCK(imgp->proc);
250 	AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
251 	AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
252 	AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
253 	AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
254 	PROC_UNLOCK(imgp->proc);
255 	AUXARGS_ENTRY(pos, AT_NULL, 0);
256 
257 	free(imgp->auxargs, M_TEMP);
258 	imgp->auxargs = NULL;
259 
260 	(*stack_base)--;
261 	**stack_base = (long)imgp->argc;
262 	return 0;
263 }
264 
265 extern int _ucodesel, _udatasel;
266 extern unsigned long linux_sznonrtsigcode;
267 
268 static void
269 linux_rt_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
270 {
271 	register struct thread *td = curthread;
272 	register struct proc *p = td->td_proc;
273 	register struct trapframe *regs;
274 	struct l_rt_sigframe *fp, frame;
275 	int oonstack;
276 
277 	PROC_LOCK_ASSERT(p, MA_OWNED);
278 	regs = td->td_frame;
279 	oonstack = sigonstack(regs->tf_esp);
280 
281 #ifdef DEBUG
282 	if (ldebug(rt_sendsig))
283 		printf(ARGS(rt_sendsig, "%p, %d, %p, %lu"),
284 		    catcher, sig, (void*)mask, code);
285 #endif
286 	/*
287 	 * Allocate space for the signal handler context.
288 	 */
289 	if ((p->p_flag & P_ALTSTACK) && !oonstack &&
290 	    SIGISMEMBER(p->p_sigacts->ps_sigonstack, sig)) {
291 		fp = (struct l_rt_sigframe *)(p->p_sigstk.ss_sp +
292 		    p->p_sigstk.ss_size - sizeof(struct l_rt_sigframe));
293 	} else
294 		fp = (struct l_rt_sigframe *)regs->tf_esp - 1;
295 	PROC_UNLOCK(p);
296 
297 	/*
298 	 * Build the argument list for the signal handler.
299 	 */
300 	if (p->p_sysent->sv_sigtbl)
301 		if (sig <= p->p_sysent->sv_sigsize)
302 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
303 
304 	frame.sf_handler = catcher;
305 	frame.sf_sig = sig;
306 	frame.sf_siginfo = &fp->sf_si;
307 	frame.sf_ucontext = &fp->sf_sc;
308 
309 	/* Fill in POSIX parts */
310 	frame.sf_si.lsi_signo = sig;
311 	frame.sf_si.lsi_code = code;
312 	frame.sf_si.lsi_addr = (void *)regs->tf_err;
313 
314 	/*
315 	 * Build the signal context to be used by sigreturn.
316 	 */
317 	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
318 	frame.sf_sc.uc_link = NULL;		/* XXX ??? */
319 
320 	PROC_LOCK(p);
321 	frame.sf_sc.uc_stack.ss_sp = p->p_sigstk.ss_sp;
322 	frame.sf_sc.uc_stack.ss_size = p->p_sigstk.ss_size;
323 	frame.sf_sc.uc_stack.ss_flags = (p->p_flag & P_ALTSTACK)
324 	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
325 	PROC_UNLOCK(p);
326 
327 	bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
328 
329 	frame.sf_sc.uc_mcontext.sc_mask   = frame.sf_sc.uc_sigmask.__bits[0];
330 	frame.sf_sc.uc_mcontext.sc_gs     = rgs();
331 	frame.sf_sc.uc_mcontext.sc_fs     = regs->tf_fs;
332 	frame.sf_sc.uc_mcontext.sc_es     = regs->tf_es;
333 	frame.sf_sc.uc_mcontext.sc_ds     = regs->tf_ds;
334 	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_edi;
335 	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_esi;
336 	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_ebp;
337 	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_ebx;
338 	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_edx;
339 	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_ecx;
340 	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_eax;
341 	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_eip;
342 	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
343 	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
344 	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
345 	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
346 	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
347 	frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
348 
349 #ifdef DEBUG
350 	if (ldebug(rt_sendsig))
351 		printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
352 		    frame.sf_sc.uc_stack.ss_flags, p->p_sigstk.ss_sp,
353 		    p->p_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
354 #endif
355 
356 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
357 		/*
358 		 * Process has trashed its stack; give it an illegal
359 		 * instruction to halt it in its tracks.
360 		 */
361 #ifdef DEBUG
362 		if (ldebug(rt_sendsig))
363 			printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
364 			    fp, oonstack);
365 #endif
366 		PROC_LOCK(p);
367 		sigexit(td, SIGILL);
368 	}
369 
370 	/*
371 	 * Build context to run handler in.
372 	 */
373 	regs->tf_esp = (int)fp;
374 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
375 	    linux_sznonrtsigcode;
376 	regs->tf_eflags &= ~(PSL_T | PSL_VM);
377 	regs->tf_cs = _ucodesel;
378 	regs->tf_ds = _udatasel;
379 	regs->tf_es = _udatasel;
380 	regs->tf_fs = _udatasel;
381 	regs->tf_ss = _udatasel;
382 	PROC_LOCK(p);
383 }
384 
385 
386 /*
387  * Send an interrupt to process.
388  *
389  * Stack is set up to allow sigcode stored
390  * in u. to call routine, followed by kcall
391  * to sigreturn routine below.  After sigreturn
392  * resets the signal mask, the stack, and the
393  * frame pointer, it returns to the user
394  * specified pc, psl.
395  */
396 
397 static void
398 linux_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
399 {
400 	register struct thread *td = curthread;
401 	register struct proc *p = td->td_proc;
402 	register struct trapframe *regs;
403 	struct l_sigframe *fp, frame;
404 	l_sigset_t lmask;
405 	int oonstack, i;
406 
407 	PROC_LOCK_ASSERT(p, MA_OWNED);
408 	if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) {
409 		/* Signal handler installed with SA_SIGINFO. */
410 		linux_rt_sendsig(catcher, sig, mask, code);
411 		return;
412 	}
413 
414 	regs = td->td_frame;
415 	oonstack = sigonstack(regs->tf_esp);
416 
417 #ifdef DEBUG
418 	if (ldebug(sendsig))
419 		printf(ARGS(sendsig, "%p, %d, %p, %lu"),
420 		    catcher, sig, (void*)mask, code);
421 #endif
422 
423 	/*
424 	 * Allocate space for the signal handler context.
425 	 */
426 	if ((p->p_flag & P_ALTSTACK) && !oonstack &&
427 	    SIGISMEMBER(p->p_sigacts->ps_sigonstack, sig)) {
428 		fp = (struct l_sigframe *)(p->p_sigstk.ss_sp +
429 		    p->p_sigstk.ss_size - sizeof(struct l_sigframe));
430 	} else
431 		fp = (struct l_sigframe *)regs->tf_esp - 1;
432 	PROC_UNLOCK(p);
433 
434 	/*
435 	 * Build the argument list for the signal handler.
436 	 */
437 	if (p->p_sysent->sv_sigtbl)
438 		if (sig <= p->p_sysent->sv_sigsize)
439 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
440 
441 	frame.sf_handler = catcher;
442 	frame.sf_sig = sig;
443 
444 	bsd_to_linux_sigset(mask, &lmask);
445 
446 	/*
447 	 * Build the signal context to be used by sigreturn.
448 	 */
449 	frame.sf_sc.sc_mask   = lmask.__bits[0];
450 	frame.sf_sc.sc_gs     = rgs();
451 	frame.sf_sc.sc_fs     = regs->tf_fs;
452 	frame.sf_sc.sc_es     = regs->tf_es;
453 	frame.sf_sc.sc_ds     = regs->tf_ds;
454 	frame.sf_sc.sc_edi    = regs->tf_edi;
455 	frame.sf_sc.sc_esi    = regs->tf_esi;
456 	frame.sf_sc.sc_ebp    = regs->tf_ebp;
457 	frame.sf_sc.sc_ebx    = regs->tf_ebx;
458 	frame.sf_sc.sc_edx    = regs->tf_edx;
459 	frame.sf_sc.sc_ecx    = regs->tf_ecx;
460 	frame.sf_sc.sc_eax    = regs->tf_eax;
461 	frame.sf_sc.sc_eip    = regs->tf_eip;
462 	frame.sf_sc.sc_cs     = regs->tf_cs;
463 	frame.sf_sc.sc_eflags = regs->tf_eflags;
464 	frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
465 	frame.sf_sc.sc_ss     = regs->tf_ss;
466 	frame.sf_sc.sc_err    = regs->tf_err;
467 	frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code);
468 
469 	bzero(&frame.sf_fpstate, sizeof(struct l_fpstate));
470 
471 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
472 		frame.sf_extramask[i] = lmask.__bits[i+1];
473 
474 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
475 		/*
476 		 * Process has trashed its stack; give it an illegal
477 		 * instruction to halt it in its tracks.
478 		 */
479 		PROC_LOCK(p);
480 		sigexit(td, SIGILL);
481 	}
482 
483 	/*
484 	 * Build context to run handler in.
485 	 */
486 	regs->tf_esp = (int)fp;
487 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
488 	regs->tf_eflags &= ~(PSL_T | PSL_VM);
489 	regs->tf_cs = _ucodesel;
490 	regs->tf_ds = _udatasel;
491 	regs->tf_es = _udatasel;
492 	regs->tf_fs = _udatasel;
493 	regs->tf_ss = _udatasel;
494 	PROC_LOCK(p);
495 }
496 
497 /*
498  * System call to cleanup state after a signal
499  * has been taken.  Reset signal mask and
500  * stack state from context left by sendsig (above).
501  * Return to previous pc and psl as specified by
502  * context left by sendsig. Check carefully to
503  * make sure that the user has not modified the
504  * psl to gain improper privileges or to cause
505  * a machine fault.
506  */
507 int
508 linux_sigreturn(td, args)
509 	struct thread *td;
510 	struct linux_sigreturn_args *args;
511 {
512 	struct proc *p = td->td_proc;
513 	struct l_sigframe frame;
514 	register struct trapframe *regs;
515 	l_sigset_t lmask;
516 	int eflags, i;
517 
518 	regs = td->td_frame;
519 
520 #ifdef DEBUG
521 	if (ldebug(sigreturn))
522 		printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
523 #endif
524 	/*
525 	 * The trampoline code hands us the sigframe.
526 	 * It is unsafe to keep track of it ourselves, in the event that a
527 	 * program jumps out of a signal handler.
528 	 */
529 	if (copyin((caddr_t)args->sfp, &frame, sizeof(frame)) != 0)
530 		return (EFAULT);
531 
532 	/*
533 	 * Check for security violations.
534 	 */
535 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
536 	eflags = frame.sf_sc.sc_eflags;
537 	/*
538 	 * XXX do allow users to change the privileged flag PSL_RF.  The
539 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
540 	 * sometimes set it there too.  tf_eflags is kept in the signal
541 	 * context during signal handling and there is no other place
542 	 * to remember it, so the PSL_RF bit may be corrupted by the
543 	 * signal handler without us knowing.  Corruption of the PSL_RF
544 	 * bit at worst causes one more or one less debugger trap, so
545 	 * allowing it is fairly harmless.
546 	 */
547 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
548     		return(EINVAL);
549 	}
550 
551 	/*
552 	 * Don't allow users to load a valid privileged %cs.  Let the
553 	 * hardware check for invalid selectors, excess privilege in
554 	 * other selectors, invalid %eip's and invalid %esp's.
555 	 */
556 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
557 	if (!CS_SECURE(frame.sf_sc.sc_cs)) {
558 		trapsignal(p, SIGBUS, T_PROTFLT);
559 		return(EINVAL);
560 	}
561 
562 	lmask.__bits[0] = frame.sf_sc.sc_mask;
563 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
564 		lmask.__bits[i+1] = frame.sf_extramask[i];
565 	PROC_LOCK(p);
566 	linux_to_bsd_sigset(&lmask, &p->p_sigmask);
567 	SIG_CANTMASK(p->p_sigmask);
568 	signotify(p);
569 	PROC_UNLOCK(p);
570 
571 	/*
572 	 * Restore signal context.
573 	 */
574 	/* %gs was restored by the trampoline. */
575 	regs->tf_fs     = frame.sf_sc.sc_fs;
576 	regs->tf_es     = frame.sf_sc.sc_es;
577 	regs->tf_ds     = frame.sf_sc.sc_ds;
578 	regs->tf_edi    = frame.sf_sc.sc_edi;
579 	regs->tf_esi    = frame.sf_sc.sc_esi;
580 	regs->tf_ebp    = frame.sf_sc.sc_ebp;
581 	regs->tf_ebx    = frame.sf_sc.sc_ebx;
582 	regs->tf_edx    = frame.sf_sc.sc_edx;
583 	regs->tf_ecx    = frame.sf_sc.sc_ecx;
584 	regs->tf_eax    = frame.sf_sc.sc_eax;
585 	regs->tf_eip    = frame.sf_sc.sc_eip;
586 	regs->tf_cs     = frame.sf_sc.sc_cs;
587 	regs->tf_eflags = eflags;
588 	regs->tf_esp    = frame.sf_sc.sc_esp_at_signal;
589 	regs->tf_ss     = frame.sf_sc.sc_ss;
590 
591 	return (EJUSTRETURN);
592 }
593 
594 /*
595  * System call to cleanup state after a signal
596  * has been taken.  Reset signal mask and
597  * stack state from context left by rt_sendsig (above).
598  * Return to previous pc and psl as specified by
599  * context left by sendsig. Check carefully to
600  * make sure that the user has not modified the
601  * psl to gain improper privileges or to cause
602  * a machine fault.
603  */
604 int
605 linux_rt_sigreturn(td, args)
606 	struct thread *td;
607 	struct linux_rt_sigreturn_args *args;
608 {
609 	struct proc *p = td->td_proc;
610 	struct l_ucontext uc;
611 	struct l_sigcontext *context;
612 	l_stack_t *lss;
613 	stack_t ss;
614 	register struct trapframe *regs;
615 	int eflags;
616 
617 	regs = td->td_frame;
618 
619 #ifdef DEBUG
620 	if (ldebug(rt_sigreturn))
621 		printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
622 #endif
623 	/*
624 	 * The trampoline code hands us the ucontext.
625 	 * It is unsafe to keep track of it ourselves, in the event that a
626 	 * program jumps out of a signal handler.
627 	 */
628 	if (copyin((caddr_t)args->ucp, &uc, sizeof(uc)) != 0)
629 		return (EFAULT);
630 
631 	context = &uc.uc_mcontext;
632 
633 	/*
634 	 * Check for security violations.
635 	 */
636 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
637 	eflags = context->sc_eflags;
638 	/*
639 	 * XXX do allow users to change the privileged flag PSL_RF.  The
640 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
641 	 * sometimes set it there too.  tf_eflags is kept in the signal
642 	 * context during signal handling and there is no other place
643 	 * to remember it, so the PSL_RF bit may be corrupted by the
644 	 * signal handler without us knowing.  Corruption of the PSL_RF
645 	 * bit at worst causes one more or one less debugger trap, so
646 	 * allowing it is fairly harmless.
647 	 */
648 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
649     		return(EINVAL);
650 	}
651 
652 	/*
653 	 * Don't allow users to load a valid privileged %cs.  Let the
654 	 * hardware check for invalid selectors, excess privilege in
655 	 * other selectors, invalid %eip's and invalid %esp's.
656 	 */
657 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
658 	if (!CS_SECURE(context->sc_cs)) {
659 		trapsignal(p, SIGBUS, T_PROTFLT);
660 		return(EINVAL);
661 	}
662 
663 	PROC_LOCK(p);
664 	linux_to_bsd_sigset(&uc.uc_sigmask, &p->p_sigmask);
665 	SIG_CANTMASK(p->p_sigmask);
666 	signotify(p);
667 	PROC_UNLOCK(p);
668 
669 	/*
670 	 * Restore signal context
671 	 */
672 	/* %gs was restored by the trampoline. */
673 	regs->tf_fs     = context->sc_fs;
674 	regs->tf_es     = context->sc_es;
675 	regs->tf_ds     = context->sc_ds;
676 	regs->tf_edi    = context->sc_edi;
677 	regs->tf_esi    = context->sc_esi;
678 	regs->tf_ebp    = context->sc_ebp;
679 	regs->tf_ebx    = context->sc_ebx;
680 	regs->tf_edx    = context->sc_edx;
681 	regs->tf_ecx    = context->sc_ecx;
682 	regs->tf_eax    = context->sc_eax;
683 	regs->tf_eip    = context->sc_eip;
684 	regs->tf_cs     = context->sc_cs;
685 	regs->tf_eflags = eflags;
686 	regs->tf_esp    = context->sc_esp_at_signal;
687 	regs->tf_ss     = context->sc_ss;
688 
689 	/*
690 	 * call sigaltstack & ignore results..
691 	 */
692 	lss = &uc.uc_stack;
693 	ss.ss_sp = lss->ss_sp;
694 	ss.ss_size = lss->ss_size;
695 	ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
696 
697 #ifdef DEBUG
698 	if (ldebug(rt_sigreturn))
699 		printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
700 		    ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
701 #endif
702 	(void)kern_sigaltstack(td, &ss, NULL);
703 
704 	return (EJUSTRETURN);
705 }
706 
707 /*
708  * MPSAFE
709  */
710 static void
711 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
712 {
713 	args[0] = tf->tf_ebx;
714 	args[1] = tf->tf_ecx;
715 	args[2] = tf->tf_edx;
716 	args[3] = tf->tf_esi;
717 	args[4] = tf->tf_edi;
718 	args[5] = tf->tf_ebp;	/* Unconfirmed */
719 	*params = NULL;		/* no copyin */
720 }
721 
722 
723 
724 /*
725  * Dump core, into a file named as described in the comments for
726  * expand_name(), unless the process was setuid/setgid.
727  */
728 static int
729 linux_aout_coredump(struct thread *td, struct vnode *vp, off_t limit)
730 {
731 	struct proc *p = td->td_proc;
732 	struct ucred *cred = td->td_ucred;
733 	struct vmspace *vm = p->p_vmspace;
734 	char *tempuser;
735 	int error;
736 
737 	if (ctob((uarea_pages + kstack_pages) +
738 	    vm->vm_dsize + vm->vm_ssize) >= limit)
739 		return (EFAULT);
740 	tempuser = malloc(ctob(uarea_pages + kstack_pages), M_TEMP,
741 	    M_WAITOK | M_ZERO);
742 	if (tempuser == NULL)
743 		return (ENOMEM);
744 	PROC_LOCK(p);
745 	fill_kinfo_proc(p, &p->p_uarea->u_kproc);
746 	PROC_UNLOCK(p);
747 	bcopy(p->p_uarea, tempuser, sizeof(struct user));
748 	bcopy(td->td_frame,
749 	    tempuser + ctob(uarea_pages) +
750 	    ((caddr_t)td->td_frame - (caddr_t)td->td_kstack),
751 	    sizeof(struct trapframe));
752 	error = vn_rdwr(UIO_WRITE, vp, (caddr_t)tempuser,
753 	    ctob(uarea_pages + kstack_pages),
754 	    (off_t)0, UIO_SYSSPACE, IO_UNIT, cred, NOCRED,
755 	    (int *)NULL, td);
756 	free(tempuser, M_TEMP);
757 	if (error == 0)
758 		error = vn_rdwr(UIO_WRITE, vp, vm->vm_daddr,
759 		    (int)ctob(vm->vm_dsize),
760 		    (off_t)ctob(uarea_pages + kstack_pages), UIO_USERSPACE,
761 		    IO_UNIT | IO_DIRECT, cred, NOCRED, (int *) NULL, td);
762 	if (error == 0)
763 		error = vn_rdwr_inchunks(UIO_WRITE, vp,
764 		    (caddr_t)trunc_page(USRSTACK - ctob(vm->vm_ssize)),
765 		    round_page(ctob(vm->vm_ssize)),
766 		    (off_t)ctob(uarea_pages + kstack_pages) +
767 		        ctob(vm->vm_dsize), UIO_USERSPACE,
768 		    IO_UNIT | IO_DIRECT, cred, NOCRED, (int *) NULL, td);
769 	return (error);
770 }
771 /*
772  * If a linux binary is exec'ing something, try this image activator
773  * first.  We override standard shell script execution in order to
774  * be able to modify the interpreter path.  We only do this if a linux
775  * binary is doing the exec, so we do not create an EXEC module for it.
776  */
777 static int	exec_linux_imgact_try(struct image_params *iparams);
778 
779 static int
780 exec_linux_imgact_try(imgp)
781     struct image_params *imgp;
782 {
783     const char *head = (const char *)imgp->image_header;
784     int error = -1;
785 
786     /*
787      * The interpreter for shell scripts run from a linux binary needs
788      * to be located in /compat/linux if possible in order to recursively
789      * maintain linux path emulation.
790      */
791     if (((const short *)head)[0] == SHELLMAGIC) {
792 	    /*
793 	     * Run our normal shell image activator.  If it succeeds attempt
794 	     * to use the alternate path for the interpreter.  If an alternate
795 	     * path is found, use our stringspace to store it.
796 	     */
797 	    if ((error = exec_shell_imgact(imgp)) == 0) {
798 		    char *rpath = NULL;
799 
800 		    linux_emul_find(FIRST_THREAD_IN_PROC(imgp->proc), NULL,
801 			imgp->interpreter_name, &rpath, 0);
802 		    if (rpath != imgp->interpreter_name) {
803 			    int len = strlen(rpath) + 1;
804 
805 			    if (len <= MAXSHELLCMDLEN) {
806 				    memcpy(imgp->interpreter_name, rpath, len);
807 			    }
808 			    free(rpath, M_TEMP);
809 		    }
810 	    }
811     }
812     return(error);
813 }
814 
815 struct sysentvec linux_sysvec = {
816 	LINUX_SYS_MAXSYSCALL,
817 	linux_sysent,
818 	0xff,
819 	LINUX_SIGTBLSZ,
820 	bsd_to_linux_signal,
821 	ELAST + 1,
822 	bsd_to_linux_errno,
823 	translate_traps,
824 	linux_fixup,
825 	linux_sendsig,
826 	linux_sigcode,
827 	&linux_szsigcode,
828 	linux_prepsyscall,
829 	"Linux a.out",
830 	linux_aout_coredump,
831 	exec_linux_imgact_try,
832 	LINUX_MINSIGSTKSZ,
833 	PAGE_SIZE,
834 	VM_MIN_ADDRESS,
835 	VM_MAXUSER_ADDRESS,
836 	USRSTACK,
837 	PS_STRINGS,
838 	VM_PROT_ALL,
839 	exec_copyout_strings,
840 	exec_setregs
841 };
842 
843 struct sysentvec elf_linux_sysvec = {
844 	LINUX_SYS_MAXSYSCALL,
845 	linux_sysent,
846 	0xff,
847 	LINUX_SIGTBLSZ,
848 	bsd_to_linux_signal,
849 	ELAST + 1,
850 	bsd_to_linux_errno,
851 	translate_traps,
852 	elf_linux_fixup,
853 	linux_sendsig,
854 	linux_sigcode,
855 	&linux_szsigcode,
856 	linux_prepsyscall,
857 	"Linux ELF",
858 	elf32_coredump,
859 	exec_linux_imgact_try,
860 	LINUX_MINSIGSTKSZ,
861 	PAGE_SIZE,
862 	VM_MIN_ADDRESS,
863 	VM_MAXUSER_ADDRESS,
864 	USRSTACK,
865 	PS_STRINGS,
866 	VM_PROT_ALL,
867 	exec_copyout_strings,
868 	exec_setregs
869 };
870 
871 static Elf32_Brandinfo linux_brand = {
872 					ELFOSABI_LINUX,
873 					EM_386,
874 					"Linux",
875 					"/compat/linux",
876 					"/lib/ld-linux.so.1",
877 					&elf_linux_sysvec
878 				 };
879 
880 static Elf32_Brandinfo linux_glibc2brand = {
881 					ELFOSABI_LINUX,
882 					EM_386,
883 					"Linux",
884 					"/compat/linux",
885 					"/lib/ld-linux.so.2",
886 					&elf_linux_sysvec
887 				 };
888 
889 Elf32_Brandinfo *linux_brandlist[] = {
890 					&linux_brand,
891 					&linux_glibc2brand,
892 					NULL
893 				};
894 
895 static int
896 linux_elf_modevent(module_t mod, int type, void *data)
897 {
898 	Elf32_Brandinfo **brandinfo;
899 	int error;
900 	struct linux_ioctl_handler **lihp;
901 
902 	error = 0;
903 
904 	switch(type) {
905 	case MOD_LOAD:
906 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
907 		     ++brandinfo)
908 			if (elf32_insert_brand_entry(*brandinfo) < 0)
909 				error = EINVAL;
910 		if (error == 0) {
911 			SET_FOREACH(lihp, linux_ioctl_handler_set)
912 				linux_ioctl_register_handler(*lihp);
913 			if (bootverbose)
914 				printf("Linux ELF exec handler installed\n");
915 		} else
916 			printf("cannot insert Linux ELF brand handler\n");
917 		break;
918 	case MOD_UNLOAD:
919 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
920 		     ++brandinfo)
921 			if (elf32_brand_inuse(*brandinfo))
922 				error = EBUSY;
923 		if (error == 0) {
924 			for (brandinfo = &linux_brandlist[0];
925 			     *brandinfo != NULL; ++brandinfo)
926 				if (elf32_remove_brand_entry(*brandinfo) < 0)
927 					error = EINVAL;
928 		}
929 		if (error == 0) {
930 			SET_FOREACH(lihp, linux_ioctl_handler_set)
931 				linux_ioctl_unregister_handler(*lihp);
932 			if (bootverbose)
933 				printf("Linux ELF exec handler removed\n");
934 		} else
935 			printf("Could not deinstall ELF interpreter entry\n");
936 		break;
937 	default:
938 		break;
939 	}
940 	return error;
941 }
942 
943 static moduledata_t linux_elf_mod = {
944 	"linuxelf",
945 	linux_elf_modevent,
946 	0
947 };
948 
949 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
950