xref: /freebsd/sys/i386/linux/linux_sysvec.c (revision eacee0ff7ec955b32e09515246bd97b6edcd2b0f)
1 /*-
2  * Copyright (c) 1994-1996 S�ren Schmidt
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software withough specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30 
31 /* XXX we use functions that might not exist. */
32 #include "opt_compat.h"
33 
34 #ifndef COMPAT_43
35 #error "Unable to compile Linux-emulator due to missing COMPAT_43 option!"
36 #endif
37 
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/imgact.h>
41 #include <sys/imgact_aout.h>
42 #include <sys/imgact_elf.h>
43 #include <sys/lock.h>
44 #include <sys/malloc.h>
45 #include <sys/mutex.h>
46 #include <sys/proc.h>
47 #include <sys/signalvar.h>
48 #include <sys/sysent.h>
49 #include <sys/sysproto.h>
50 
51 #include <vm/vm.h>
52 #include <vm/vm_param.h>
53 #include <vm/vm_page.h>
54 #include <vm/vm_extern.h>
55 #include <sys/exec.h>
56 #include <sys/kernel.h>
57 #include <sys/module.h>
58 #include <machine/cpu.h>
59 #include <sys/mutex.h>
60 
61 #include <i386/linux/linux.h>
62 #include <i386/linux/linux_proto.h>
63 #include <compat/linux/linux_signal.h>
64 #include <compat/linux/linux_util.h>
65 
66 MODULE_VERSION(linux, 1);
67 MODULE_DEPEND(linux, sysvmsg, 1, 1, 1);
68 MODULE_DEPEND(linux, sysvsem, 1, 1, 1);
69 MODULE_DEPEND(linux, sysvshm, 1, 1, 1);
70 
71 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
72 
73 #if BYTE_ORDER == LITTLE_ENDIAN
74 #define SHELLMAGIC      0x2123 /* #! */
75 #else
76 #define SHELLMAGIC      0x2321
77 #endif
78 
79 /*
80  * Allow the sendsig functions to use the ldebug() facility
81  * even though they are not syscalls themselves. Map them
82  * to syscall 0. This is slightly less bogus than using
83  * ldebug(sigreturn).
84  */
85 #define	LINUX_SYS_linux_rt_sendsig	0
86 #define	LINUX_SYS_linux_sendsig		0
87 
88 extern char linux_sigcode[];
89 extern int linux_szsigcode;
90 
91 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
92 
93 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
94 
95 static int	linux_fixup __P((register_t **stack_base,
96 				 struct image_params *iparams));
97 static int	elf_linux_fixup __P((register_t **stack_base,
98 				     struct image_params *iparams));
99 static void	linux_prepsyscall __P((struct trapframe *tf, int *args,
100 				       u_int *code, caddr_t *params));
101 static void     linux_sendsig __P((sig_t catcher, int sig, sigset_t *mask,
102 				   u_long code));
103 
104 /*
105  * Linux syscalls return negative errno's, we do positive and map them
106  */
107 static int bsd_to_linux_errno[ELAST + 1] = {
108   	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
109  	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
110  	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
111  	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
112  	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
113 	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
114 	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
115 	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
116   	-6, -6, -43, -42, -75, -6, -84
117 };
118 
119 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
120 	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
121 	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
122 	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, 0,
123 	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
124 	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
125 	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
126 	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
127 	0, LINUX_SIGUSR1, LINUX_SIGUSR2
128 };
129 
130 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
131 	SIGHUP, SIGINT, SIGQUIT, SIGILL,
132 	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
133 	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
134 	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
135 	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
136 	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
137 	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
138 	SIGIO, SIGURG, 0
139 };
140 
141 #define LINUX_T_UNKNOWN  255
142 static int _bsd_to_linux_trapcode[] = {
143 	LINUX_T_UNKNOWN,	/* 0 */
144 	6,			/* 1  T_PRIVINFLT */
145 	LINUX_T_UNKNOWN,	/* 2 */
146 	3,			/* 3  T_BPTFLT */
147 	LINUX_T_UNKNOWN,	/* 4 */
148 	LINUX_T_UNKNOWN,	/* 5 */
149 	16,			/* 6  T_ARITHTRAP */
150 	254,			/* 7  T_ASTFLT */
151 	LINUX_T_UNKNOWN,	/* 8 */
152 	13,			/* 9  T_PROTFLT */
153 	1,			/* 10 T_TRCTRAP */
154 	LINUX_T_UNKNOWN,	/* 11 */
155 	14,			/* 12 T_PAGEFLT */
156 	LINUX_T_UNKNOWN,	/* 13 */
157 	17,			/* 14 T_ALIGNFLT */
158 	LINUX_T_UNKNOWN,	/* 15 */
159 	LINUX_T_UNKNOWN,	/* 16 */
160 	LINUX_T_UNKNOWN,	/* 17 */
161 	0,			/* 18 T_DIVIDE */
162 	2,			/* 19 T_NMI */
163 	4,			/* 20 T_OFLOW */
164 	5,			/* 21 T_BOUND */
165 	7,			/* 22 T_DNA */
166 	8,			/* 23 T_DOUBLEFLT */
167 	9,			/* 24 T_FPOPFLT */
168 	10,			/* 25 T_TSSFLT */
169 	11,			/* 26 T_SEGNPFLT */
170 	12,			/* 27 T_STKFLT */
171 	18,			/* 28 T_MCHK */
172 	19,			/* 29 T_XMMFLT */
173 	15			/* 30 T_RESERVED */
174 };
175 #define bsd_to_linux_trapcode(code) \
176     ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
177      _bsd_to_linux_trapcode[(code)]: \
178      LINUX_T_UNKNOWN)
179 
180 /*
181  * If FreeBSD & Linux have a difference of opinion about what a trap
182  * means, deal with it here.
183  *
184  * MPSAFE
185  */
186 static int
187 translate_traps(int signal, int trap_code)
188 {
189 	if (signal != SIGBUS)
190 		return signal;
191 	switch (trap_code) {
192 	case T_PROTFLT:
193 	case T_TSSFLT:
194 	case T_DOUBLEFLT:
195 	case T_PAGEFLT:
196 		return SIGSEGV;
197 	default:
198 		return signal;
199 	}
200 }
201 
202 static int
203 linux_fixup(register_t **stack_base, struct image_params *imgp)
204 {
205 	register_t *argv, *envp;
206 
207 	argv = *stack_base;
208 	envp = *stack_base + (imgp->argc + 1);
209 	(*stack_base)--;
210 	**stack_base = (intptr_t)(void *)envp;
211 	(*stack_base)--;
212 	**stack_base = (intptr_t)(void *)argv;
213 	(*stack_base)--;
214 	**stack_base = imgp->argc;
215 	return 0;
216 }
217 
218 static int
219 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
220 {
221 	Elf32_Auxargs *args = (Elf32_Auxargs *)imgp->auxargs;
222 	register_t *pos;
223 
224 	pos = *stack_base + (imgp->argc + imgp->envc + 2);
225 
226 	if (args->trace) {
227 		AUXARGS_ENTRY(pos, AT_DEBUG, 1);
228 	}
229 	if (args->execfd != -1) {
230 		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
231 	}
232 	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
233 	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
234 	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
235 	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
236 	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
237 	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
238 	AUXARGS_ENTRY(pos, AT_BASE, args->base);
239 	PROC_LOCK(imgp->proc);
240 	AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
241 	AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
242 	AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
243 	AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
244 	PROC_UNLOCK(imgp->proc);
245 	AUXARGS_ENTRY(pos, AT_NULL, 0);
246 
247 	free(imgp->auxargs, M_TEMP);
248 	imgp->auxargs = NULL;
249 
250 	(*stack_base)--;
251 	**stack_base = (long)imgp->argc;
252 	return 0;
253 }
254 
255 extern int _ucodesel, _udatasel;
256 extern unsigned long linux_sznonrtsigcode;
257 
258 static void
259 linux_rt_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
260 {
261 	register struct thread *td = curthread;
262 	register struct proc *p = td->td_proc;
263 	register struct trapframe *regs;
264 	struct l_rt_sigframe *fp, frame;
265 	int oonstack;
266 
267 	PROC_LOCK_ASSERT(p, MA_OWNED);
268 	regs = td->td_frame;
269 	oonstack = sigonstack(regs->tf_esp);
270 
271 #ifdef DEBUG
272 	if (ldebug(rt_sendsig))
273 		printf(ARGS(rt_sendsig, "%p, %d, %p, %lu"),
274 		    catcher, sig, (void*)mask, code);
275 #endif
276 	/*
277 	 * Allocate space for the signal handler context.
278 	 */
279 	if ((p->p_flag & P_ALTSTACK) && !oonstack &&
280 	    SIGISMEMBER(p->p_sigacts->ps_sigonstack, sig)) {
281 		fp = (struct l_rt_sigframe *)(p->p_sigstk.ss_sp +
282 		    p->p_sigstk.ss_size - sizeof(struct l_rt_sigframe));
283 	} else
284 		fp = (struct l_rt_sigframe *)regs->tf_esp - 1;
285 	PROC_UNLOCK(p);
286 
287 	/*
288 	 * grow() will return FALSE if the fp will not fit inside the stack
289 	 *	and the stack can not be grown. useracc will return FALSE
290 	 *	if access is denied.
291 	 */
292 	if ((grow_stack (p, (int)fp) == FALSE) ||
293 	    !useracc((caddr_t)fp, sizeof (struct l_rt_sigframe),
294 	    VM_PROT_WRITE)) {
295 		/*
296 		 * Process has trashed its stack; give it an illegal
297 		 * instruction to halt it in its tracks.
298 		 */
299 		PROC_LOCK(p);
300 		SIGACTION(p, SIGILL) = SIG_DFL;
301 		SIGDELSET(p->p_sigignore, SIGILL);
302 		SIGDELSET(p->p_sigcatch, SIGILL);
303 		SIGDELSET(p->p_sigmask, SIGILL);
304 #ifdef DEBUG
305 		if (ldebug(rt_sendsig))
306 			printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
307 			    fp, oonstack);
308 #endif
309 		psignal(p, SIGILL);
310 		return;
311 	}
312 
313 	/*
314 	 * Build the argument list for the signal handler.
315 	 */
316 	if (p->p_sysent->sv_sigtbl)
317 		if (sig <= p->p_sysent->sv_sigsize)
318 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
319 
320 	frame.sf_handler = catcher;
321 	frame.sf_sig = sig;
322 	frame.sf_siginfo = &fp->sf_si;
323 	frame.sf_ucontext = &fp->sf_sc;
324 
325 	/* Fill siginfo structure. */
326 	frame.sf_si.lsi_signo = sig;
327 	frame.sf_si.lsi_code = code;
328 	frame.sf_si.lsi_addr = (void *)regs->tf_err;
329 
330 	/*
331 	 * Build the signal context to be used by sigreturn.
332 	 */
333 	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
334 	frame.sf_sc.uc_link = NULL;		/* XXX ??? */
335 
336 	PROC_LOCK(p);
337 	frame.sf_sc.uc_stack.ss_sp = p->p_sigstk.ss_sp;
338 	frame.sf_sc.uc_stack.ss_size = p->p_sigstk.ss_size;
339 	frame.sf_sc.uc_stack.ss_flags = (p->p_flag & P_ALTSTACK)
340 	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
341 	PROC_UNLOCK(p);
342 
343 	bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
344 
345 	frame.sf_sc.uc_mcontext.sc_mask   = frame.sf_sc.uc_sigmask.__bits[0];
346 	frame.sf_sc.uc_mcontext.sc_gs     = rgs();
347 	frame.sf_sc.uc_mcontext.sc_fs     = regs->tf_fs;
348 	frame.sf_sc.uc_mcontext.sc_es     = regs->tf_es;
349 	frame.sf_sc.uc_mcontext.sc_ds     = regs->tf_ds;
350 	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_edi;
351 	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_esi;
352 	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_ebp;
353 	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_ebx;
354 	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_edx;
355 	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_ecx;
356 	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_eax;
357 	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_eip;
358 	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
359 	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
360 	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
361 	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
362 	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
363 	frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
364 
365 #ifdef DEBUG
366 	if (ldebug(rt_sendsig))
367 		printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
368 		    frame.sf_sc.uc_stack.ss_flags, p->p_sigstk.ss_sp,
369 		    p->p_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
370 #endif
371 
372 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
373 		/*
374 		 * Process has trashed its stack; give it an illegal
375 		 * instruction to halt it in its tracks.
376 		 */
377 		PROC_LOCK(p);
378 		sigexit(td, SIGILL);
379 		/* NOTREACHED */
380 	}
381 
382 	/*
383 	 * Build context to run handler in.
384 	 */
385 	regs->tf_esp = (int)fp;
386 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
387 	    linux_sznonrtsigcode;
388 	regs->tf_eflags &= ~(PSL_T | PSL_VM);
389 	regs->tf_cs = _ucodesel;
390 	regs->tf_ds = _udatasel;
391 	regs->tf_es = _udatasel;
392 	regs->tf_fs = _udatasel;
393 	regs->tf_ss = _udatasel;
394 	PROC_LOCK(p);
395 }
396 
397 
398 /*
399  * Send an interrupt to process.
400  *
401  * Stack is set up to allow sigcode stored
402  * in u. to call routine, followed by kcall
403  * to sigreturn routine below.  After sigreturn
404  * resets the signal mask, the stack, and the
405  * frame pointer, it returns to the user
406  * specified pc, psl.
407  */
408 
409 static void
410 linux_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
411 {
412 	register struct thread *td = curthread;
413 	register struct proc *p = td->td_proc;
414 	register struct trapframe *regs;
415 	struct l_sigframe *fp, frame;
416 	l_sigset_t lmask;
417 	int oonstack, i;
418 
419 	PROC_LOCK_ASSERT(p, MA_OWNED);
420 	if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) {
421 		/* Signal handler installed with SA_SIGINFO. */
422 		linux_rt_sendsig(catcher, sig, mask, code);
423 		return;
424 	}
425 
426 	regs = td->td_frame;
427 	oonstack = sigonstack(regs->tf_esp);
428 
429 #ifdef DEBUG
430 	if (ldebug(sendsig))
431 		printf(ARGS(sendsig, "%p, %d, %p, %lu"),
432 		    catcher, sig, (void*)mask, code);
433 #endif
434 
435 	/*
436 	 * Allocate space for the signal handler context.
437 	 */
438 	if ((p->p_flag & P_ALTSTACK) && !oonstack &&
439 	    SIGISMEMBER(p->p_sigacts->ps_sigonstack, sig)) {
440 		fp = (struct l_sigframe *)(p->p_sigstk.ss_sp +
441 		    p->p_sigstk.ss_size - sizeof(struct l_sigframe));
442 	} else
443 		fp = (struct l_sigframe *)regs->tf_esp - 1;
444 	PROC_UNLOCK(p);
445 
446 	/*
447 	 * grow() will return FALSE if the fp will not fit inside the stack
448 	 *	and the stack can not be grown. useracc will return FALSE
449 	 *	if access is denied.
450 	 */
451 	if ((grow_stack (p, (int)fp) == FALSE) ||
452 	    !useracc((caddr_t)fp, sizeof (struct l_sigframe),
453 	    VM_PROT_WRITE)) {
454 		/*
455 		 * Process has trashed its stack; give it an illegal
456 		 * instruction to halt it in its tracks.
457 		 */
458 		PROC_LOCK(p);
459 		SIGACTION(p, SIGILL) = SIG_DFL;
460 		SIGDELSET(p->p_sigignore, SIGILL);
461 		SIGDELSET(p->p_sigcatch, SIGILL);
462 		SIGDELSET(p->p_sigmask, SIGILL);
463 		psignal(p, SIGILL);
464 		return;
465 	}
466 
467 	/*
468 	 * Build the argument list for the signal handler.
469 	 */
470 	if (p->p_sysent->sv_sigtbl)
471 		if (sig <= p->p_sysent->sv_sigsize)
472 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
473 
474 	frame.sf_handler = catcher;
475 	frame.sf_sig = sig;
476 
477 	bsd_to_linux_sigset(mask, &lmask);
478 
479 	/*
480 	 * Build the signal context to be used by sigreturn.
481 	 */
482 	frame.sf_sc.sc_mask   = lmask.__bits[0];
483 	frame.sf_sc.sc_gs     = rgs();
484 	frame.sf_sc.sc_fs     = regs->tf_fs;
485 	frame.sf_sc.sc_es     = regs->tf_es;
486 	frame.sf_sc.sc_ds     = regs->tf_ds;
487 	frame.sf_sc.sc_edi    = regs->tf_edi;
488 	frame.sf_sc.sc_esi    = regs->tf_esi;
489 	frame.sf_sc.sc_ebp    = regs->tf_ebp;
490 	frame.sf_sc.sc_ebx    = regs->tf_ebx;
491 	frame.sf_sc.sc_edx    = regs->tf_edx;
492 	frame.sf_sc.sc_ecx    = regs->tf_ecx;
493 	frame.sf_sc.sc_eax    = regs->tf_eax;
494 	frame.sf_sc.sc_eip    = regs->tf_eip;
495 	frame.sf_sc.sc_cs     = regs->tf_cs;
496 	frame.sf_sc.sc_eflags = regs->tf_eflags;
497 	frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
498 	frame.sf_sc.sc_ss     = regs->tf_ss;
499 	frame.sf_sc.sc_err    = regs->tf_err;
500 	frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code);
501 
502 	bzero(&frame.sf_fpstate, sizeof(struct l_fpstate));
503 
504 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
505 		frame.sf_extramask[i] = lmask.__bits[i+1];
506 
507 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
508 		/*
509 		 * Process has trashed its stack; give it an illegal
510 		 * instruction to halt it in its tracks.
511 		 */
512 		PROC_LOCK(p);
513 		sigexit(td, SIGILL);
514 		/* NOTREACHED */
515 	}
516 
517 	/*
518 	 * Build context to run handler in.
519 	 */
520 	regs->tf_esp = (int)fp;
521 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
522 	regs->tf_eflags &= ~(PSL_T | PSL_VM);
523 	regs->tf_cs = _ucodesel;
524 	regs->tf_ds = _udatasel;
525 	regs->tf_es = _udatasel;
526 	regs->tf_fs = _udatasel;
527 	regs->tf_ss = _udatasel;
528 	PROC_LOCK(p);
529 }
530 
531 /*
532  * System call to cleanup state after a signal
533  * has been taken.  Reset signal mask and
534  * stack state from context left by sendsig (above).
535  * Return to previous pc and psl as specified by
536  * context left by sendsig. Check carefully to
537  * make sure that the user has not modified the
538  * psl to gain improper privileges or to cause
539  * a machine fault.
540  */
541 int
542 linux_sigreturn(td, args)
543 	struct thread *td;
544 	struct linux_sigreturn_args *args;
545 {
546 	struct proc *p = td->td_proc;
547 	struct l_sigframe frame;
548 	register struct trapframe *regs;
549 	l_sigset_t lmask;
550 	int eflags, i;
551 
552 	regs = td->td_frame;
553 
554 #ifdef DEBUG
555 	if (ldebug(sigreturn))
556 		printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
557 #endif
558 	/*
559 	 * The trampoline code hands us the sigframe.
560 	 * It is unsafe to keep track of it ourselves, in the event that a
561 	 * program jumps out of a signal handler.
562 	 */
563 	if (copyin((caddr_t)args->sfp, &frame, sizeof(frame)) != 0)
564 		return (EFAULT);
565 
566 	/*
567 	 * Check for security violations.
568 	 */
569 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
570 	eflags = frame.sf_sc.sc_eflags;
571 	/*
572 	 * XXX do allow users to change the privileged flag PSL_RF.  The
573 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
574 	 * sometimes set it there too.  tf_eflags is kept in the signal
575 	 * context during signal handling and there is no other place
576 	 * to remember it, so the PSL_RF bit may be corrupted by the
577 	 * signal handler without us knowing.  Corruption of the PSL_RF
578 	 * bit at worst causes one more or one less debugger trap, so
579 	 * allowing it is fairly harmless.
580 	 */
581 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
582     		return(EINVAL);
583 	}
584 
585 	/*
586 	 * Don't allow users to load a valid privileged %cs.  Let the
587 	 * hardware check for invalid selectors, excess privilege in
588 	 * other selectors, invalid %eip's and invalid %esp's.
589 	 */
590 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
591 	if (!CS_SECURE(frame.sf_sc.sc_cs)) {
592 		trapsignal(p, SIGBUS, T_PROTFLT);
593 		return(EINVAL);
594 	}
595 
596 	lmask.__bits[0] = frame.sf_sc.sc_mask;
597 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
598 		lmask.__bits[i+1] = frame.sf_extramask[i];
599 	PROC_LOCK(p);
600 	linux_to_bsd_sigset(&lmask, &p->p_sigmask);
601 	SIG_CANTMASK(p->p_sigmask);
602 	PROC_UNLOCK(p);
603 
604 	/*
605 	 * Restore signal context.
606 	 */
607 	/* %gs was restored by the trampoline. */
608 	regs->tf_fs     = frame.sf_sc.sc_fs;
609 	regs->tf_es     = frame.sf_sc.sc_es;
610 	regs->tf_ds     = frame.sf_sc.sc_ds;
611 	regs->tf_edi    = frame.sf_sc.sc_edi;
612 	regs->tf_esi    = frame.sf_sc.sc_esi;
613 	regs->tf_ebp    = frame.sf_sc.sc_ebp;
614 	regs->tf_ebx    = frame.sf_sc.sc_ebx;
615 	regs->tf_edx    = frame.sf_sc.sc_edx;
616 	regs->tf_ecx    = frame.sf_sc.sc_ecx;
617 	regs->tf_eax    = frame.sf_sc.sc_eax;
618 	regs->tf_eip    = frame.sf_sc.sc_eip;
619 	regs->tf_cs     = frame.sf_sc.sc_cs;
620 	regs->tf_eflags = eflags;
621 	regs->tf_esp    = frame.sf_sc.sc_esp_at_signal;
622 	regs->tf_ss     = frame.sf_sc.sc_ss;
623 
624 	return (EJUSTRETURN);
625 }
626 
627 /*
628  * System call to cleanup state after a signal
629  * has been taken.  Reset signal mask and
630  * stack state from context left by rt_sendsig (above).
631  * Return to previous pc and psl as specified by
632  * context left by sendsig. Check carefully to
633  * make sure that the user has not modified the
634  * psl to gain improper privileges or to cause
635  * a machine fault.
636  */
637 int
638 linux_rt_sigreturn(td, args)
639 	struct thread *td;
640 	struct linux_rt_sigreturn_args *args;
641 {
642 	struct proc *p = td->td_proc;
643 	struct sigaltstack_args sasargs;
644 	struct l_ucontext uc;
645 	struct l_sigcontext *context;
646 	l_stack_t *lss;
647 	stack_t *ss;
648 	register struct trapframe *regs;
649 	int eflags;
650 	caddr_t sg = stackgap_init();
651 
652 	regs = td->td_frame;
653 
654 #ifdef DEBUG
655 	if (ldebug(rt_sigreturn))
656 		printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
657 #endif
658 	/*
659 	 * The trampoline code hands us the ucontext.
660 	 * It is unsafe to keep track of it ourselves, in the event that a
661 	 * program jumps out of a signal handler.
662 	 */
663 	if (copyin((caddr_t)args->ucp, &uc, sizeof(uc)) != 0)
664 		return (EFAULT);
665 
666 	context = &uc.uc_mcontext;
667 
668 	/*
669 	 * Check for security violations.
670 	 */
671 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
672 	eflags = context->sc_eflags;
673 	/*
674 	 * XXX do allow users to change the privileged flag PSL_RF.  The
675 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
676 	 * sometimes set it there too.  tf_eflags is kept in the signal
677 	 * context during signal handling and there is no other place
678 	 * to remember it, so the PSL_RF bit may be corrupted by the
679 	 * signal handler without us knowing.  Corruption of the PSL_RF
680 	 * bit at worst causes one more or one less debugger trap, so
681 	 * allowing it is fairly harmless.
682 	 */
683 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
684     		return(EINVAL);
685 	}
686 
687 	/*
688 	 * Don't allow users to load a valid privileged %cs.  Let the
689 	 * hardware check for invalid selectors, excess privilege in
690 	 * other selectors, invalid %eip's and invalid %esp's.
691 	 */
692 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
693 	if (!CS_SECURE(context->sc_cs)) {
694 		trapsignal(p, SIGBUS, T_PROTFLT);
695 		return(EINVAL);
696 	}
697 
698 	PROC_LOCK(p);
699 	linux_to_bsd_sigset(&uc.uc_sigmask, &p->p_sigmask);
700 	SIG_CANTMASK(p->p_sigmask);
701 	PROC_UNLOCK(p);
702 
703 	/*
704 	 * Restore signal context
705 	 */
706 	/* %gs was restored by the trampoline. */
707 	regs->tf_fs     = context->sc_fs;
708 	regs->tf_es     = context->sc_es;
709 	regs->tf_ds     = context->sc_ds;
710 	regs->tf_edi    = context->sc_edi;
711 	regs->tf_esi    = context->sc_esi;
712 	regs->tf_ebp    = context->sc_ebp;
713 	regs->tf_ebx    = context->sc_ebx;
714 	regs->tf_edx    = context->sc_edx;
715 	regs->tf_ecx    = context->sc_ecx;
716 	regs->tf_eax    = context->sc_eax;
717 	regs->tf_eip    = context->sc_eip;
718 	regs->tf_cs     = context->sc_cs;
719 	regs->tf_eflags = eflags;
720 	regs->tf_esp    = context->sc_esp_at_signal;
721 	regs->tf_ss     = context->sc_ss;
722 
723 	/*
724 	 * call sigaltstack & ignore results..
725 	 */
726 	ss = stackgap_alloc(&sg, sizeof(stack_t));
727 	lss = &uc.uc_stack;
728 	ss->ss_sp = lss->ss_sp;
729 	ss->ss_size = lss->ss_size;
730 	ss->ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
731 
732 #ifdef DEBUG
733 	if (ldebug(rt_sigreturn))
734 		printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
735 		    ss->ss_flags, ss->ss_sp, ss->ss_size, context->sc_mask);
736 #endif
737 	sasargs.ss = ss;
738 	sasargs.oss = NULL;
739 	(void) sigaltstack(td, &sasargs);
740 
741 	return (EJUSTRETURN);
742 }
743 
744 /*
745  * MPSAFE
746  */
747 static void
748 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
749 {
750 	args[0] = tf->tf_ebx;
751 	args[1] = tf->tf_ecx;
752 	args[2] = tf->tf_edx;
753 	args[3] = tf->tf_esi;
754 	args[4] = tf->tf_edi;
755 	*params = NULL;		/* no copyin */
756 }
757 
758 /*
759  * If a linux binary is exec'ing something, try this image activator
760  * first.  We override standard shell script execution in order to
761  * be able to modify the interpreter path.  We only do this if a linux
762  * binary is doing the exec, so we do not create an EXEC module for it.
763  */
764 static int	exec_linux_imgact_try __P((struct image_params *iparams));
765 
766 static int
767 exec_linux_imgact_try(imgp)
768     struct image_params *imgp;
769 {
770     const char *head = (const char *)imgp->image_header;
771     int error = -1;
772 
773     /*
774      * The interpreter for shell scripts run from a linux binary needs
775      * to be located in /compat/linux if possible in order to recursively
776      * maintain linux path emulation.
777      */
778     if (((const short *)head)[0] == SHELLMAGIC) {
779 	    /*
780 	     * Run our normal shell image activator.  If it succeeds attempt
781 	     * to use the alternate path for the interpreter.  If an alternate
782 	     * path is found, use our stringspace to store it.
783 	     */
784 	    if ((error = exec_shell_imgact(imgp)) == 0) {
785 		    char *rpath = NULL;
786 
787 		    linux_emul_find(FIRST_THREAD_IN_PROC(imgp->proc), NULL,
788 			imgp->interpreter_name, &rpath, 0);
789 		    if (rpath != imgp->interpreter_name) {
790 			    int len = strlen(rpath) + 1;
791 
792 			    if (len <= MAXSHELLCMDLEN) {
793 				    memcpy(imgp->interpreter_name, rpath, len);
794 			    }
795 			    free(rpath, M_TEMP);
796 		    }
797 	    }
798     }
799     return(error);
800 }
801 
802 struct sysentvec linux_sysvec = {
803 	LINUX_SYS_MAXSYSCALL,
804 	linux_sysent,
805 	0xff,
806 	LINUX_SIGTBLSZ,
807 	bsd_to_linux_signal,
808 	ELAST + 1,
809 	bsd_to_linux_errno,
810 	translate_traps,
811 	linux_fixup,
812 	linux_sendsig,
813 	linux_sigcode,
814 	&linux_szsigcode,
815 	linux_prepsyscall,
816 	"Linux a.out",
817 	aout_coredump,
818 	exec_linux_imgact_try,
819 	LINUX_MINSIGSTKSZ
820 };
821 
822 struct sysentvec elf_linux_sysvec = {
823 	LINUX_SYS_MAXSYSCALL,
824 	linux_sysent,
825 	0xff,
826 	LINUX_SIGTBLSZ,
827 	bsd_to_linux_signal,
828 	ELAST + 1,
829 	bsd_to_linux_errno,
830 	translate_traps,
831 	elf_linux_fixup,
832 	linux_sendsig,
833 	linux_sigcode,
834 	&linux_szsigcode,
835 	linux_prepsyscall,
836 	"Linux ELF",
837 	elf_coredump,
838 	exec_linux_imgact_try,
839 	LINUX_MINSIGSTKSZ
840 };
841 
842 static Elf32_Brandinfo linux_brand = {
843 					ELFOSABI_LINUX,
844 					"Linux",
845 					"/compat/linux",
846 					"/lib/ld-linux.so.1",
847 					&elf_linux_sysvec
848 				 };
849 
850 static Elf32_Brandinfo linux_glibc2brand = {
851 					ELFOSABI_LINUX,
852 					"Linux",
853 					"/compat/linux",
854 					"/lib/ld-linux.so.2",
855 					&elf_linux_sysvec
856 				 };
857 
858 Elf32_Brandinfo *linux_brandlist[] = {
859 					&linux_brand,
860 					&linux_glibc2brand,
861 					NULL
862 				};
863 
864 static int
865 linux_elf_modevent(module_t mod, int type, void *data)
866 {
867 	Elf32_Brandinfo **brandinfo;
868 	int error;
869 	struct linux_ioctl_handler **lihp;
870 
871 	error = 0;
872 
873 	switch(type) {
874 	case MOD_LOAD:
875 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
876 		     ++brandinfo)
877 			if (elf_insert_brand_entry(*brandinfo) < 0)
878 				error = EINVAL;
879 		if (error == 0) {
880 			SET_FOREACH(lihp, linux_ioctl_handler_set)
881 				linux_ioctl_register_handler(*lihp);
882 			if (bootverbose)
883 				printf("Linux ELF exec handler installed\n");
884 		} else
885 			printf("cannot insert Linux ELF brand handler\n");
886 		break;
887 	case MOD_UNLOAD:
888 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
889 		     ++brandinfo)
890 			if (elf_brand_inuse(*brandinfo))
891 				error = EBUSY;
892 		if (error == 0) {
893 			for (brandinfo = &linux_brandlist[0];
894 			     *brandinfo != NULL; ++brandinfo)
895 				if (elf_remove_brand_entry(*brandinfo) < 0)
896 					error = EINVAL;
897 		}
898 		if (error == 0) {
899 			SET_FOREACH(lihp, linux_ioctl_handler_set)
900 				linux_ioctl_unregister_handler(*lihp);
901 			if (bootverbose)
902 				printf("Linux ELF exec handler removed\n");
903 		} else
904 			printf("Could not deinstall ELF interpreter entry\n");
905 		break;
906 	default:
907 		break;
908 	}
909 	return error;
910 }
911 
912 static moduledata_t linux_elf_mod = {
913 	"linuxelf",
914 	linux_elf_modevent,
915 	0
916 };
917 
918 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
919