xref: /freebsd/sys/i386/linux/linux_sysvec.c (revision a3e8fd0b7f663db7eafff527d5c3ca3bcfa8a537)
1 /*-
2  * Copyright (c) 1994-1996 S�ren Schmidt
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software without specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30 
31 /* XXX we use functions that might not exist. */
32 #include "opt_compat.h"
33 
34 #ifndef COMPAT_43
35 #error "Unable to compile Linux-emulator due to missing COMPAT_43 option!"
36 #endif
37 
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/imgact.h>
41 #include <sys/imgact_aout.h>
42 #include <sys/imgact_elf.h>
43 #include <sys/lock.h>
44 #include <sys/malloc.h>
45 #include <sys/mutex.h>
46 #include <sys/proc.h>
47 #include <sys/signalvar.h>
48 #include <sys/syscallsubr.h>
49 #include <sys/sysent.h>
50 #include <sys/sysproto.h>
51 #include <sys/user.h>
52 #include <sys/vnode.h>
53 
54 #include <vm/vm.h>
55 #include <vm/vm_param.h>
56 #include <vm/vm_page.h>
57 #include <vm/vm_extern.h>
58 #include <sys/exec.h>
59 #include <sys/kernel.h>
60 #include <sys/module.h>
61 #include <machine/cpu.h>
62 #include <machine/md_var.h>
63 #include <sys/mutex.h>
64 
65 #include <vm/vm.h>
66 #include <vm/vm_param.h>
67 #include <vm/pmap.h>
68 #include <vm/vm_map.h>
69 #include <vm/vm_object.h>
70 
71 #include <i386/linux/linux.h>
72 #include <i386/linux/linux_proto.h>
73 #include <compat/linux/linux_signal.h>
74 #include <compat/linux/linux_util.h>
75 
76 MODULE_VERSION(linux, 1);
77 MODULE_DEPEND(linux, sysvmsg, 1, 1, 1);
78 MODULE_DEPEND(linux, sysvsem, 1, 1, 1);
79 MODULE_DEPEND(linux, sysvshm, 1, 1, 1);
80 
81 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
82 
83 #if BYTE_ORDER == LITTLE_ENDIAN
84 #define SHELLMAGIC      0x2123 /* #! */
85 #else
86 #define SHELLMAGIC      0x2321
87 #endif
88 
89 /*
90  * Allow the sendsig functions to use the ldebug() facility
91  * even though they are not syscalls themselves. Map them
92  * to syscall 0. This is slightly less bogus than using
93  * ldebug(sigreturn).
94  */
95 #define	LINUX_SYS_linux_rt_sendsig	0
96 #define	LINUX_SYS_linux_sendsig		0
97 
98 extern char linux_sigcode[];
99 extern int linux_szsigcode;
100 
101 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
102 
103 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
104 
105 static int	linux_fixup(register_t **stack_base,
106 		    struct image_params *iparams);
107 static int	elf_linux_fixup(register_t **stack_base,
108 		    struct image_params *iparams);
109 static void	linux_prepsyscall(struct trapframe *tf, int *args, u_int *code,
110 		    caddr_t *params);
111 static void     linux_sendsig(sig_t catcher, int sig, sigset_t *mask,
112 		    u_long code);
113 
114 /*
115  * Linux syscalls return negative errno's, we do positive and map them
116  */
117 static int bsd_to_linux_errno[ELAST + 1] = {
118   	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
119  	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
120  	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
121  	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
122  	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
123 	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
124 	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
125 	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
126   	-6, -6, -43, -42, -75, -6, -84
127 };
128 
129 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
130 	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
131 	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
132 	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, 0,
133 	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
134 	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
135 	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
136 	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
137 	0, LINUX_SIGUSR1, LINUX_SIGUSR2
138 };
139 
140 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
141 	SIGHUP, SIGINT, SIGQUIT, SIGILL,
142 	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
143 	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
144 	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
145 	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
146 	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
147 	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
148 	SIGIO, SIGURG, 0
149 };
150 
151 #define LINUX_T_UNKNOWN  255
152 static int _bsd_to_linux_trapcode[] = {
153 	LINUX_T_UNKNOWN,	/* 0 */
154 	6,			/* 1  T_PRIVINFLT */
155 	LINUX_T_UNKNOWN,	/* 2 */
156 	3,			/* 3  T_BPTFLT */
157 	LINUX_T_UNKNOWN,	/* 4 */
158 	LINUX_T_UNKNOWN,	/* 5 */
159 	16,			/* 6  T_ARITHTRAP */
160 	254,			/* 7  T_ASTFLT */
161 	LINUX_T_UNKNOWN,	/* 8 */
162 	13,			/* 9  T_PROTFLT */
163 	1,			/* 10 T_TRCTRAP */
164 	LINUX_T_UNKNOWN,	/* 11 */
165 	14,			/* 12 T_PAGEFLT */
166 	LINUX_T_UNKNOWN,	/* 13 */
167 	17,			/* 14 T_ALIGNFLT */
168 	LINUX_T_UNKNOWN,	/* 15 */
169 	LINUX_T_UNKNOWN,	/* 16 */
170 	LINUX_T_UNKNOWN,	/* 17 */
171 	0,			/* 18 T_DIVIDE */
172 	2,			/* 19 T_NMI */
173 	4,			/* 20 T_OFLOW */
174 	5,			/* 21 T_BOUND */
175 	7,			/* 22 T_DNA */
176 	8,			/* 23 T_DOUBLEFLT */
177 	9,			/* 24 T_FPOPFLT */
178 	10,			/* 25 T_TSSFLT */
179 	11,			/* 26 T_SEGNPFLT */
180 	12,			/* 27 T_STKFLT */
181 	18,			/* 28 T_MCHK */
182 	19,			/* 29 T_XMMFLT */
183 	15			/* 30 T_RESERVED */
184 };
185 #define bsd_to_linux_trapcode(code) \
186     ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
187      _bsd_to_linux_trapcode[(code)]: \
188      LINUX_T_UNKNOWN)
189 
190 /*
191  * If FreeBSD & Linux have a difference of opinion about what a trap
192  * means, deal with it here.
193  *
194  * MPSAFE
195  */
196 static int
197 translate_traps(int signal, int trap_code)
198 {
199 	if (signal != SIGBUS)
200 		return signal;
201 	switch (trap_code) {
202 	case T_PROTFLT:
203 	case T_TSSFLT:
204 	case T_DOUBLEFLT:
205 	case T_PAGEFLT:
206 		return SIGSEGV;
207 	default:
208 		return signal;
209 	}
210 }
211 
212 static int
213 linux_fixup(register_t **stack_base, struct image_params *imgp)
214 {
215 	register_t *argv, *envp;
216 
217 	argv = *stack_base;
218 	envp = *stack_base + (imgp->argc + 1);
219 	(*stack_base)--;
220 	**stack_base = (intptr_t)(void *)envp;
221 	(*stack_base)--;
222 	**stack_base = (intptr_t)(void *)argv;
223 	(*stack_base)--;
224 	**stack_base = imgp->argc;
225 	return 0;
226 }
227 
228 static int
229 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
230 {
231 	Elf32_Auxargs *args = (Elf32_Auxargs *)imgp->auxargs;
232 	register_t *pos;
233 
234 	pos = *stack_base + (imgp->argc + imgp->envc + 2);
235 
236 	if (args->trace)
237 		AUXARGS_ENTRY(pos, AT_DEBUG, 1);
238 	if (args->execfd != -1)
239 		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
240 	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
241 	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
242 	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
243 	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
244 	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
245 	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
246 	AUXARGS_ENTRY(pos, AT_BASE, args->base);
247 	PROC_LOCK(imgp->proc);
248 	AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
249 	AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
250 	AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
251 	AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
252 	PROC_UNLOCK(imgp->proc);
253 	AUXARGS_ENTRY(pos, AT_NULL, 0);
254 
255 	free(imgp->auxargs, M_TEMP);
256 	imgp->auxargs = NULL;
257 
258 	(*stack_base)--;
259 	**stack_base = (long)imgp->argc;
260 	return 0;
261 }
262 
263 extern int _ucodesel, _udatasel;
264 extern unsigned long linux_sznonrtsigcode;
265 
266 static void
267 linux_rt_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
268 {
269 	register struct thread *td = curthread;
270 	register struct proc *p = td->td_proc;
271 	register struct trapframe *regs;
272 	struct l_rt_sigframe *fp, frame;
273 	int oonstack;
274 
275 	PROC_LOCK_ASSERT(p, MA_OWNED);
276 	regs = td->td_frame;
277 	oonstack = sigonstack(regs->tf_esp);
278 
279 #ifdef DEBUG
280 	if (ldebug(rt_sendsig))
281 		printf(ARGS(rt_sendsig, "%p, %d, %p, %lu"),
282 		    catcher, sig, (void*)mask, code);
283 #endif
284 	/*
285 	 * Allocate space for the signal handler context.
286 	 */
287 	if ((p->p_flag & P_ALTSTACK) && !oonstack &&
288 	    SIGISMEMBER(p->p_sigacts->ps_sigonstack, sig)) {
289 		fp = (struct l_rt_sigframe *)(p->p_sigstk.ss_sp +
290 		    p->p_sigstk.ss_size - sizeof(struct l_rt_sigframe));
291 	} else
292 		fp = (struct l_rt_sigframe *)regs->tf_esp - 1;
293 	PROC_UNLOCK(p);
294 
295 	/*
296 	 * Build the argument list for the signal handler.
297 	 */
298 	if (p->p_sysent->sv_sigtbl)
299 		if (sig <= p->p_sysent->sv_sigsize)
300 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
301 
302 	frame.sf_handler = catcher;
303 	frame.sf_sig = sig;
304 	frame.sf_siginfo = &fp->sf_si;
305 	frame.sf_ucontext = &fp->sf_sc;
306 
307 	/* Fill in POSIX parts */
308 	frame.sf_si.lsi_signo = sig;
309 	frame.sf_si.lsi_code = code;
310 	frame.sf_si.lsi_addr = (void *)regs->tf_err;
311 
312 	/*
313 	 * Build the signal context to be used by sigreturn.
314 	 */
315 	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
316 	frame.sf_sc.uc_link = NULL;		/* XXX ??? */
317 
318 	PROC_LOCK(p);
319 	frame.sf_sc.uc_stack.ss_sp = p->p_sigstk.ss_sp;
320 	frame.sf_sc.uc_stack.ss_size = p->p_sigstk.ss_size;
321 	frame.sf_sc.uc_stack.ss_flags = (p->p_flag & P_ALTSTACK)
322 	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
323 	PROC_UNLOCK(p);
324 
325 	bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
326 
327 	frame.sf_sc.uc_mcontext.sc_mask   = frame.sf_sc.uc_sigmask.__bits[0];
328 	frame.sf_sc.uc_mcontext.sc_gs     = rgs();
329 	frame.sf_sc.uc_mcontext.sc_fs     = regs->tf_fs;
330 	frame.sf_sc.uc_mcontext.sc_es     = regs->tf_es;
331 	frame.sf_sc.uc_mcontext.sc_ds     = regs->tf_ds;
332 	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_edi;
333 	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_esi;
334 	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_ebp;
335 	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_ebx;
336 	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_edx;
337 	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_ecx;
338 	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_eax;
339 	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_eip;
340 	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
341 	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
342 	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
343 	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
344 	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
345 	frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
346 
347 #ifdef DEBUG
348 	if (ldebug(rt_sendsig))
349 		printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
350 		    frame.sf_sc.uc_stack.ss_flags, p->p_sigstk.ss_sp,
351 		    p->p_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
352 #endif
353 
354 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
355 		/*
356 		 * Process has trashed its stack; give it an illegal
357 		 * instruction to halt it in its tracks.
358 		 */
359 #ifdef DEBUG
360 		if (ldebug(rt_sendsig))
361 			printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
362 			    fp, oonstack);
363 #endif
364 		PROC_LOCK(p);
365 		sigexit(td, SIGILL);
366 	}
367 
368 	/*
369 	 * Build context to run handler in.
370 	 */
371 	regs->tf_esp = (int)fp;
372 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
373 	    linux_sznonrtsigcode;
374 	regs->tf_eflags &= ~(PSL_T | PSL_VM);
375 	regs->tf_cs = _ucodesel;
376 	regs->tf_ds = _udatasel;
377 	regs->tf_es = _udatasel;
378 	regs->tf_fs = _udatasel;
379 	regs->tf_ss = _udatasel;
380 	PROC_LOCK(p);
381 }
382 
383 
384 /*
385  * Send an interrupt to process.
386  *
387  * Stack is set up to allow sigcode stored
388  * in u. to call routine, followed by kcall
389  * to sigreturn routine below.  After sigreturn
390  * resets the signal mask, the stack, and the
391  * frame pointer, it returns to the user
392  * specified pc, psl.
393  */
394 
395 static void
396 linux_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
397 {
398 	register struct thread *td = curthread;
399 	register struct proc *p = td->td_proc;
400 	register struct trapframe *regs;
401 	struct l_sigframe *fp, frame;
402 	l_sigset_t lmask;
403 	int oonstack, i;
404 
405 	PROC_LOCK_ASSERT(p, MA_OWNED);
406 	if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) {
407 		/* Signal handler installed with SA_SIGINFO. */
408 		linux_rt_sendsig(catcher, sig, mask, code);
409 		return;
410 	}
411 
412 	regs = td->td_frame;
413 	oonstack = sigonstack(regs->tf_esp);
414 
415 #ifdef DEBUG
416 	if (ldebug(sendsig))
417 		printf(ARGS(sendsig, "%p, %d, %p, %lu"),
418 		    catcher, sig, (void*)mask, code);
419 #endif
420 
421 	/*
422 	 * Allocate space for the signal handler context.
423 	 */
424 	if ((p->p_flag & P_ALTSTACK) && !oonstack &&
425 	    SIGISMEMBER(p->p_sigacts->ps_sigonstack, sig)) {
426 		fp = (struct l_sigframe *)(p->p_sigstk.ss_sp +
427 		    p->p_sigstk.ss_size - sizeof(struct l_sigframe));
428 	} else
429 		fp = (struct l_sigframe *)regs->tf_esp - 1;
430 	PROC_UNLOCK(p);
431 
432 	/*
433 	 * Build the argument list for the signal handler.
434 	 */
435 	if (p->p_sysent->sv_sigtbl)
436 		if (sig <= p->p_sysent->sv_sigsize)
437 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
438 
439 	frame.sf_handler = catcher;
440 	frame.sf_sig = sig;
441 
442 	bsd_to_linux_sigset(mask, &lmask);
443 
444 	/*
445 	 * Build the signal context to be used by sigreturn.
446 	 */
447 	frame.sf_sc.sc_mask   = lmask.__bits[0];
448 	frame.sf_sc.sc_gs     = rgs();
449 	frame.sf_sc.sc_fs     = regs->tf_fs;
450 	frame.sf_sc.sc_es     = regs->tf_es;
451 	frame.sf_sc.sc_ds     = regs->tf_ds;
452 	frame.sf_sc.sc_edi    = regs->tf_edi;
453 	frame.sf_sc.sc_esi    = regs->tf_esi;
454 	frame.sf_sc.sc_ebp    = regs->tf_ebp;
455 	frame.sf_sc.sc_ebx    = regs->tf_ebx;
456 	frame.sf_sc.sc_edx    = regs->tf_edx;
457 	frame.sf_sc.sc_ecx    = regs->tf_ecx;
458 	frame.sf_sc.sc_eax    = regs->tf_eax;
459 	frame.sf_sc.sc_eip    = regs->tf_eip;
460 	frame.sf_sc.sc_cs     = regs->tf_cs;
461 	frame.sf_sc.sc_eflags = regs->tf_eflags;
462 	frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
463 	frame.sf_sc.sc_ss     = regs->tf_ss;
464 	frame.sf_sc.sc_err    = regs->tf_err;
465 	frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code);
466 
467 	bzero(&frame.sf_fpstate, sizeof(struct l_fpstate));
468 
469 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
470 		frame.sf_extramask[i] = lmask.__bits[i+1];
471 
472 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
473 		/*
474 		 * Process has trashed its stack; give it an illegal
475 		 * instruction to halt it in its tracks.
476 		 */
477 		PROC_LOCK(p);
478 		sigexit(td, SIGILL);
479 	}
480 
481 	/*
482 	 * Build context to run handler in.
483 	 */
484 	regs->tf_esp = (int)fp;
485 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
486 	regs->tf_eflags &= ~(PSL_T | PSL_VM);
487 	regs->tf_cs = _ucodesel;
488 	regs->tf_ds = _udatasel;
489 	regs->tf_es = _udatasel;
490 	regs->tf_fs = _udatasel;
491 	regs->tf_ss = _udatasel;
492 	PROC_LOCK(p);
493 }
494 
495 /*
496  * System call to cleanup state after a signal
497  * has been taken.  Reset signal mask and
498  * stack state from context left by sendsig (above).
499  * Return to previous pc and psl as specified by
500  * context left by sendsig. Check carefully to
501  * make sure that the user has not modified the
502  * psl to gain improper privileges or to cause
503  * a machine fault.
504  */
505 int
506 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
507 {
508 	struct proc *p = td->td_proc;
509 	struct l_sigframe frame;
510 	register struct trapframe *regs;
511 	l_sigset_t lmask;
512 	int eflags, i;
513 
514 	regs = td->td_frame;
515 
516 #ifdef DEBUG
517 	if (ldebug(sigreturn))
518 		printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
519 #endif
520 	/*
521 	 * The trampoline code hands us the sigframe.
522 	 * It is unsafe to keep track of it ourselves, in the event that a
523 	 * program jumps out of a signal handler.
524 	 */
525 	if (copyin((caddr_t)args->sfp, &frame, sizeof(frame)) != 0)
526 		return (EFAULT);
527 
528 	/*
529 	 * Check for security violations.
530 	 */
531 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
532 	eflags = frame.sf_sc.sc_eflags;
533 	/*
534 	 * XXX do allow users to change the privileged flag PSL_RF.  The
535 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
536 	 * sometimes set it there too.  tf_eflags is kept in the signal
537 	 * context during signal handling and there is no other place
538 	 * to remember it, so the PSL_RF bit may be corrupted by the
539 	 * signal handler without us knowing.  Corruption of the PSL_RF
540 	 * bit at worst causes one more or one less debugger trap, so
541 	 * allowing it is fairly harmless.
542 	 */
543 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF))
544     		return(EINVAL);
545 
546 	/*
547 	 * Don't allow users to load a valid privileged %cs.  Let the
548 	 * hardware check for invalid selectors, excess privilege in
549 	 * other selectors, invalid %eip's and invalid %esp's.
550 	 */
551 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
552 	if (!CS_SECURE(frame.sf_sc.sc_cs)) {
553 		trapsignal(p, SIGBUS, T_PROTFLT);
554 		return(EINVAL);
555 	}
556 
557 	lmask.__bits[0] = frame.sf_sc.sc_mask;
558 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
559 		lmask.__bits[i+1] = frame.sf_extramask[i];
560 	PROC_LOCK(p);
561 	linux_to_bsd_sigset(&lmask, &p->p_sigmask);
562 	SIG_CANTMASK(p->p_sigmask);
563 	signotify(p);
564 	PROC_UNLOCK(p);
565 
566 	/*
567 	 * Restore signal context.
568 	 */
569 	/* %gs was restored by the trampoline. */
570 	regs->tf_fs     = frame.sf_sc.sc_fs;
571 	regs->tf_es     = frame.sf_sc.sc_es;
572 	regs->tf_ds     = frame.sf_sc.sc_ds;
573 	regs->tf_edi    = frame.sf_sc.sc_edi;
574 	regs->tf_esi    = frame.sf_sc.sc_esi;
575 	regs->tf_ebp    = frame.sf_sc.sc_ebp;
576 	regs->tf_ebx    = frame.sf_sc.sc_ebx;
577 	regs->tf_edx    = frame.sf_sc.sc_edx;
578 	regs->tf_ecx    = frame.sf_sc.sc_ecx;
579 	regs->tf_eax    = frame.sf_sc.sc_eax;
580 	regs->tf_eip    = frame.sf_sc.sc_eip;
581 	regs->tf_cs     = frame.sf_sc.sc_cs;
582 	regs->tf_eflags = eflags;
583 	regs->tf_esp    = frame.sf_sc.sc_esp_at_signal;
584 	regs->tf_ss     = frame.sf_sc.sc_ss;
585 
586 	return (EJUSTRETURN);
587 }
588 
589 /*
590  * System call to cleanup state after a signal
591  * has been taken.  Reset signal mask and
592  * stack state from context left by rt_sendsig (above).
593  * Return to previous pc and psl as specified by
594  * context left by sendsig. Check carefully to
595  * make sure that the user has not modified the
596  * psl to gain improper privileges or to cause
597  * a machine fault.
598  */
599 int
600 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
601 {
602 	struct proc *p = td->td_proc;
603 	struct l_ucontext uc;
604 	struct l_sigcontext *context;
605 	l_stack_t *lss;
606 	stack_t ss;
607 	register struct trapframe *regs;
608 	int eflags;
609 
610 	regs = td->td_frame;
611 
612 #ifdef DEBUG
613 	if (ldebug(rt_sigreturn))
614 		printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
615 #endif
616 	/*
617 	 * The trampoline code hands us the ucontext.
618 	 * It is unsafe to keep track of it ourselves, in the event that a
619 	 * program jumps out of a signal handler.
620 	 */
621 	if (copyin((caddr_t)args->ucp, &uc, sizeof(uc)) != 0)
622 		return (EFAULT);
623 
624 	context = &uc.uc_mcontext;
625 
626 	/*
627 	 * Check for security violations.
628 	 */
629 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
630 	eflags = context->sc_eflags;
631 	/*
632 	 * XXX do allow users to change the privileged flag PSL_RF.  The
633 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
634 	 * sometimes set it there too.  tf_eflags is kept in the signal
635 	 * context during signal handling and there is no other place
636 	 * to remember it, so the PSL_RF bit may be corrupted by the
637 	 * signal handler without us knowing.  Corruption of the PSL_RF
638 	 * bit at worst causes one more or one less debugger trap, so
639 	 * allowing it is fairly harmless.
640 	 */
641 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF))
642     		return(EINVAL);
643 
644 	/*
645 	 * Don't allow users to load a valid privileged %cs.  Let the
646 	 * hardware check for invalid selectors, excess privilege in
647 	 * other selectors, invalid %eip's and invalid %esp's.
648 	 */
649 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
650 	if (!CS_SECURE(context->sc_cs)) {
651 		trapsignal(p, SIGBUS, T_PROTFLT);
652 		return(EINVAL);
653 	}
654 
655 	PROC_LOCK(p);
656 	linux_to_bsd_sigset(&uc.uc_sigmask, &p->p_sigmask);
657 	SIG_CANTMASK(p->p_sigmask);
658 	signotify(p);
659 	PROC_UNLOCK(p);
660 
661 	/*
662 	 * Restore signal context
663 	 */
664 	/* %gs was restored by the trampoline. */
665 	regs->tf_fs     = context->sc_fs;
666 	regs->tf_es     = context->sc_es;
667 	regs->tf_ds     = context->sc_ds;
668 	regs->tf_edi    = context->sc_edi;
669 	regs->tf_esi    = context->sc_esi;
670 	regs->tf_ebp    = context->sc_ebp;
671 	regs->tf_ebx    = context->sc_ebx;
672 	regs->tf_edx    = context->sc_edx;
673 	regs->tf_ecx    = context->sc_ecx;
674 	regs->tf_eax    = context->sc_eax;
675 	regs->tf_eip    = context->sc_eip;
676 	regs->tf_cs     = context->sc_cs;
677 	regs->tf_eflags = eflags;
678 	regs->tf_esp    = context->sc_esp_at_signal;
679 	regs->tf_ss     = context->sc_ss;
680 
681 	/*
682 	 * call sigaltstack & ignore results..
683 	 */
684 	lss = &uc.uc_stack;
685 	ss.ss_sp = lss->ss_sp;
686 	ss.ss_size = lss->ss_size;
687 	ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
688 
689 #ifdef DEBUG
690 	if (ldebug(rt_sigreturn))
691 		printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
692 		    ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
693 #endif
694 	(void)kern_sigaltstack(td, &ss, NULL);
695 
696 	return (EJUSTRETURN);
697 }
698 
699 /*
700  * MPSAFE
701  */
702 static void
703 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
704 {
705 	args[0] = tf->tf_ebx;
706 	args[1] = tf->tf_ecx;
707 	args[2] = tf->tf_edx;
708 	args[3] = tf->tf_esi;
709 	args[4] = tf->tf_edi;
710 	args[5] = tf->tf_ebp;	/* Unconfirmed */
711 	*params = NULL;		/* no copyin */
712 }
713 
714 
715 
716 /*
717  * Dump core, into a file named as described in the comments for
718  * expand_name(), unless the process was setuid/setgid.
719  */
720 static int
721 linux_aout_coredump(struct thread *td, struct vnode *vp, off_t limit)
722 {
723 	struct proc *p = td->td_proc;
724 	struct ucred *cred = td->td_ucred;
725 	struct vmspace *vm = p->p_vmspace;
726 	char *tempuser;
727 	int error;
728 
729 	if (ctob((uarea_pages + kstack_pages) +
730 	    vm->vm_dsize + vm->vm_ssize) >= limit)
731 		return (EFAULT);
732 	tempuser = malloc(ctob(uarea_pages + kstack_pages), M_TEMP,
733 	    M_WAITOK | M_ZERO);
734 	if (tempuser == NULL)
735 		return (ENOMEM);
736 	PROC_LOCK(p);
737 	fill_kinfo_proc(p, &p->p_uarea->u_kproc);
738 	PROC_UNLOCK(p);
739 	bcopy(p->p_uarea, tempuser, sizeof(struct user));
740 	bcopy(td->td_frame,
741 	    tempuser + ctob(uarea_pages) +
742 	    ((caddr_t)td->td_frame - (caddr_t)td->td_kstack),
743 	    sizeof(struct trapframe));
744 	error = vn_rdwr(UIO_WRITE, vp, (caddr_t)tempuser,
745 	    ctob(uarea_pages + kstack_pages),
746 	    (off_t)0, UIO_SYSSPACE, IO_UNIT, cred, NOCRED,
747 	    (int *)NULL, td);
748 	free(tempuser, M_TEMP);
749 	if (error == 0)
750 		error = vn_rdwr(UIO_WRITE, vp, vm->vm_daddr,
751 		    (int)ctob(vm->vm_dsize),
752 		    (off_t)ctob(uarea_pages + kstack_pages), UIO_USERSPACE,
753 		    IO_UNIT | IO_DIRECT, cred, NOCRED, (int *) NULL, td);
754 	if (error == 0)
755 		error = vn_rdwr_inchunks(UIO_WRITE, vp,
756 		    (caddr_t)trunc_page(USRSTACK - ctob(vm->vm_ssize)),
757 		    round_page(ctob(vm->vm_ssize)),
758 		    (off_t)ctob(uarea_pages + kstack_pages) +
759 		        ctob(vm->vm_dsize), UIO_USERSPACE,
760 		    IO_UNIT | IO_DIRECT, cred, NOCRED, (int *) NULL, td);
761 	return (error);
762 }
763 /*
764  * If a linux binary is exec'ing something, try this image activator
765  * first.  We override standard shell script execution in order to
766  * be able to modify the interpreter path.  We only do this if a linux
767  * binary is doing the exec, so we do not create an EXEC module for it.
768  */
769 static int	exec_linux_imgact_try(struct image_params *iparams);
770 
771 static int
772 exec_linux_imgact_try(struct image_params *imgp)
773 {
774     const char *head = (const char *)imgp->image_header;
775     int error = -1;
776 
777     /*
778      * The interpreter for shell scripts run from a linux binary needs
779      * to be located in /compat/linux if possible in order to recursively
780      * maintain linux path emulation.
781      */
782     if (((const short *)head)[0] == SHELLMAGIC) {
783 	    /*
784 	     * Run our normal shell image activator.  If it succeeds attempt
785 	     * to use the alternate path for the interpreter.  If an alternate
786 	     * path is found, use our stringspace to store it.
787 	     */
788 	    if ((error = exec_shell_imgact(imgp)) == 0) {
789 		    char *rpath = NULL;
790 
791 		    linux_emul_find(FIRST_THREAD_IN_PROC(imgp->proc), NULL,
792 			imgp->interpreter_name, &rpath, 0);
793 		    if (rpath != imgp->interpreter_name) {
794 			    int len = strlen(rpath) + 1;
795 
796 			    if (len <= MAXSHELLCMDLEN) {
797 				    memcpy(imgp->interpreter_name, rpath, len);
798 			    }
799 			    free(rpath, M_TEMP);
800 		    }
801 	    }
802     }
803     return(error);
804 }
805 
806 struct sysentvec linux_sysvec = {
807 	LINUX_SYS_MAXSYSCALL,
808 	linux_sysent,
809 	0xff,
810 	LINUX_SIGTBLSZ,
811 	bsd_to_linux_signal,
812 	ELAST + 1,
813 	bsd_to_linux_errno,
814 	translate_traps,
815 	linux_fixup,
816 	linux_sendsig,
817 	linux_sigcode,
818 	&linux_szsigcode,
819 	linux_prepsyscall,
820 	"Linux a.out",
821 	linux_aout_coredump,
822 	exec_linux_imgact_try,
823 	LINUX_MINSIGSTKSZ,
824 	PAGE_SIZE,
825 	VM_MIN_ADDRESS,
826 	VM_MAXUSER_ADDRESS,
827 	USRSTACK,
828 	PS_STRINGS,
829 	VM_PROT_ALL,
830 	exec_copyout_strings,
831 	exec_setregs
832 };
833 
834 struct sysentvec elf_linux_sysvec = {
835 	LINUX_SYS_MAXSYSCALL,
836 	linux_sysent,
837 	0xff,
838 	LINUX_SIGTBLSZ,
839 	bsd_to_linux_signal,
840 	ELAST + 1,
841 	bsd_to_linux_errno,
842 	translate_traps,
843 	elf_linux_fixup,
844 	linux_sendsig,
845 	linux_sigcode,
846 	&linux_szsigcode,
847 	linux_prepsyscall,
848 	"Linux ELF",
849 	elf32_coredump,
850 	exec_linux_imgact_try,
851 	LINUX_MINSIGSTKSZ,
852 	PAGE_SIZE,
853 	VM_MIN_ADDRESS,
854 	VM_MAXUSER_ADDRESS,
855 	USRSTACK,
856 	PS_STRINGS,
857 	VM_PROT_ALL,
858 	exec_copyout_strings,
859 	exec_setregs
860 };
861 
862 static Elf32_Brandinfo linux_brand = {
863 					ELFOSABI_LINUX,
864 					EM_386,
865 					"Linux",
866 					"/compat/linux",
867 					"/lib/ld-linux.so.1",
868 					&elf_linux_sysvec
869 				 };
870 
871 static Elf32_Brandinfo linux_glibc2brand = {
872 					ELFOSABI_LINUX,
873 					EM_386,
874 					"Linux",
875 					"/compat/linux",
876 					"/lib/ld-linux.so.2",
877 					&elf_linux_sysvec
878 				 };
879 
880 Elf32_Brandinfo *linux_brandlist[] = {
881 					&linux_brand,
882 					&linux_glibc2brand,
883 					NULL
884 				};
885 
886 static int
887 linux_elf_modevent(module_t mod, int type, void *data)
888 {
889 	Elf32_Brandinfo **brandinfo;
890 	int error;
891 	struct linux_ioctl_handler **lihp;
892 
893 	error = 0;
894 
895 	switch(type) {
896 	case MOD_LOAD:
897 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
898 		     ++brandinfo)
899 			if (elf32_insert_brand_entry(*brandinfo) < 0)
900 				error = EINVAL;
901 		if (error == 0) {
902 			SET_FOREACH(lihp, linux_ioctl_handler_set)
903 				linux_ioctl_register_handler(*lihp);
904 			if (bootverbose)
905 				printf("Linux ELF exec handler installed\n");
906 		} else
907 			printf("cannot insert Linux ELF brand handler\n");
908 		break;
909 	case MOD_UNLOAD:
910 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
911 		     ++brandinfo)
912 			if (elf32_brand_inuse(*brandinfo))
913 				error = EBUSY;
914 		if (error == 0) {
915 			for (brandinfo = &linux_brandlist[0];
916 			     *brandinfo != NULL; ++brandinfo)
917 				if (elf32_remove_brand_entry(*brandinfo) < 0)
918 					error = EINVAL;
919 		}
920 		if (error == 0) {
921 			SET_FOREACH(lihp, linux_ioctl_handler_set)
922 				linux_ioctl_unregister_handler(*lihp);
923 			if (bootverbose)
924 				printf("Linux ELF exec handler removed\n");
925 		} else
926 			printf("Could not deinstall ELF interpreter entry\n");
927 		break;
928 	default:
929 		break;
930 	}
931 	return error;
932 }
933 
934 static moduledata_t linux_elf_mod = {
935 	"linuxelf",
936 	linux_elf_modevent,
937 	0
938 };
939 
940 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
941