xref: /freebsd/sys/i386/linux/linux_sysvec.c (revision 6af83ee0d2941d18880b6aaa2b4facd1d30c6106)
1 /*-
2  * Copyright (c) 1994-1996 S�ren Schmidt
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software without specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 /* XXX we use functions that might not exist. */
33 #include "opt_compat.h"
34 
35 #ifndef COMPAT_43
36 #error "Unable to compile Linux-emulator due to missing COMPAT_43 option!"
37 #endif
38 
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/exec.h>
42 #include <sys/imgact.h>
43 #include <sys/imgact_aout.h>
44 #include <sys/imgact_elf.h>
45 #include <sys/kernel.h>
46 #include <sys/lock.h>
47 #include <sys/malloc.h>
48 #include <sys/module.h>
49 #include <sys/mutex.h>
50 #include <sys/proc.h>
51 #include <sys/signalvar.h>
52 #include <sys/syscallsubr.h>
53 #include <sys/sysent.h>
54 #include <sys/sysproto.h>
55 #include <sys/vnode.h>
56 
57 #include <vm/vm.h>
58 #include <vm/pmap.h>
59 #include <vm/vm_extern.h>
60 #include <vm/vm_map.h>
61 #include <vm/vm_object.h>
62 #include <vm/vm_page.h>
63 #include <vm/vm_param.h>
64 
65 #include <machine/cpu.h>
66 #include <machine/md_var.h>
67 #include <machine/pcb.h>
68 
69 #include <i386/linux/linux.h>
70 #include <i386/linux/linux_proto.h>
71 #include <compat/linux/linux_mib.h>
72 #include <compat/linux/linux_signal.h>
73 #include <compat/linux/linux_util.h>
74 
75 MODULE_VERSION(linux, 1);
76 MODULE_DEPEND(linux, sysvmsg, 1, 1, 1);
77 MODULE_DEPEND(linux, sysvsem, 1, 1, 1);
78 MODULE_DEPEND(linux, sysvshm, 1, 1, 1);
79 
80 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
81 
82 #if BYTE_ORDER == LITTLE_ENDIAN
83 #define SHELLMAGIC      0x2123 /* #! */
84 #else
85 #define SHELLMAGIC      0x2321
86 #endif
87 
88 /*
89  * Allow the sendsig functions to use the ldebug() facility
90  * even though they are not syscalls themselves. Map them
91  * to syscall 0. This is slightly less bogus than using
92  * ldebug(sigreturn).
93  */
94 #define	LINUX_SYS_linux_rt_sendsig	0
95 #define	LINUX_SYS_linux_sendsig		0
96 
97 #define	fldcw(addr)		__asm("fldcw %0" : : "m" (*(addr)))
98 #define	__LINUX_NPXCW__		0x37f
99 
100 extern char linux_sigcode[];
101 extern int linux_szsigcode;
102 
103 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
104 
105 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
106 
107 static int	linux_fixup(register_t **stack_base,
108 		    struct image_params *iparams);
109 static int	elf_linux_fixup(register_t **stack_base,
110 		    struct image_params *iparams);
111 static void	linux_prepsyscall(struct trapframe *tf, int *args, u_int *code,
112 		    caddr_t *params);
113 static void     linux_sendsig(sig_t catcher, int sig, sigset_t *mask,
114 		    u_long code);
115 static void	exec_linux_setregs(struct thread *td, u_long entry,
116 				   u_long stack, u_long ps_strings);
117 
118 /*
119  * Linux syscalls return negative errno's, we do positive and map them
120  */
121 static int bsd_to_linux_errno[ELAST + 1] = {
122 	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
123 	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
124 	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
125 	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
126 	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
127 	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
128 	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
129 	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
130 	-6, -6, -43, -42, -75, -6, -84
131 };
132 
133 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
134 	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
135 	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
136 	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
137 	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
138 	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
139 	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
140 	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
141 	0, LINUX_SIGUSR1, LINUX_SIGUSR2
142 };
143 
144 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
145 	SIGHUP, SIGINT, SIGQUIT, SIGILL,
146 	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
147 	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
148 	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
149 	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
150 	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
151 	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
152 	SIGIO, SIGURG, SIGSYS
153 };
154 
155 #define LINUX_T_UNKNOWN  255
156 static int _bsd_to_linux_trapcode[] = {
157 	LINUX_T_UNKNOWN,	/* 0 */
158 	6,			/* 1  T_PRIVINFLT */
159 	LINUX_T_UNKNOWN,	/* 2 */
160 	3,			/* 3  T_BPTFLT */
161 	LINUX_T_UNKNOWN,	/* 4 */
162 	LINUX_T_UNKNOWN,	/* 5 */
163 	16,			/* 6  T_ARITHTRAP */
164 	254,			/* 7  T_ASTFLT */
165 	LINUX_T_UNKNOWN,	/* 8 */
166 	13,			/* 9  T_PROTFLT */
167 	1,			/* 10 T_TRCTRAP */
168 	LINUX_T_UNKNOWN,	/* 11 */
169 	14,			/* 12 T_PAGEFLT */
170 	LINUX_T_UNKNOWN,	/* 13 */
171 	17,			/* 14 T_ALIGNFLT */
172 	LINUX_T_UNKNOWN,	/* 15 */
173 	LINUX_T_UNKNOWN,	/* 16 */
174 	LINUX_T_UNKNOWN,	/* 17 */
175 	0,			/* 18 T_DIVIDE */
176 	2,			/* 19 T_NMI */
177 	4,			/* 20 T_OFLOW */
178 	5,			/* 21 T_BOUND */
179 	7,			/* 22 T_DNA */
180 	8,			/* 23 T_DOUBLEFLT */
181 	9,			/* 24 T_FPOPFLT */
182 	10,			/* 25 T_TSSFLT */
183 	11,			/* 26 T_SEGNPFLT */
184 	12,			/* 27 T_STKFLT */
185 	18,			/* 28 T_MCHK */
186 	19,			/* 29 T_XMMFLT */
187 	15			/* 30 T_RESERVED */
188 };
189 #define bsd_to_linux_trapcode(code) \
190     ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
191      _bsd_to_linux_trapcode[(code)]: \
192      LINUX_T_UNKNOWN)
193 
194 /*
195  * If FreeBSD & Linux have a difference of opinion about what a trap
196  * means, deal with it here.
197  *
198  * MPSAFE
199  */
200 static int
201 translate_traps(int signal, int trap_code)
202 {
203 	if (signal != SIGBUS)
204 		return signal;
205 	switch (trap_code) {
206 	case T_PROTFLT:
207 	case T_TSSFLT:
208 	case T_DOUBLEFLT:
209 	case T_PAGEFLT:
210 		return SIGSEGV;
211 	default:
212 		return signal;
213 	}
214 }
215 
216 static int
217 linux_fixup(register_t **stack_base, struct image_params *imgp)
218 {
219 	register_t *argv, *envp;
220 
221 	argv = *stack_base;
222 	envp = *stack_base + (imgp->args->argc + 1);
223 	(*stack_base)--;
224 	**stack_base = (intptr_t)(void *)envp;
225 	(*stack_base)--;
226 	**stack_base = (intptr_t)(void *)argv;
227 	(*stack_base)--;
228 	**stack_base = imgp->args->argc;
229 	return 0;
230 }
231 
232 static int
233 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
234 {
235 	Elf32_Auxargs *args;
236 	register_t *pos;
237 
238 	KASSERT(curthread->td_proc == imgp->proc &&
239 	    (curthread->td_proc->p_flag & P_SA) == 0,
240 	    ("unsafe elf_linux_fixup(), should be curproc"));
241 	args = (Elf32_Auxargs *)imgp->auxargs;
242 	pos = *stack_base + (imgp->args->argc + imgp->args->envc + 2);
243 
244 	if (args->trace)
245 		AUXARGS_ENTRY(pos, AT_DEBUG, 1);
246 	if (args->execfd != -1)
247 		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
248 	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
249 	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
250 	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
251 	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
252 	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
253 	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
254 	AUXARGS_ENTRY(pos, AT_BASE, args->base);
255 	AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
256 	AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
257 	AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
258 	AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
259 	AUXARGS_ENTRY(pos, AT_NULL, 0);
260 
261 	free(imgp->auxargs, M_TEMP);
262 	imgp->auxargs = NULL;
263 
264 	(*stack_base)--;
265 	**stack_base = (register_t)imgp->args->argc;
266 	return 0;
267 }
268 
269 extern int _ucodesel, _udatasel;
270 extern unsigned long linux_sznonrtsigcode;
271 
272 static void
273 linux_rt_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
274 {
275 	struct thread *td = curthread;
276 	struct proc *p = td->td_proc;
277 	struct sigacts *psp;
278 	struct trapframe *regs;
279 	struct l_rt_sigframe *fp, frame;
280 	int oonstack;
281 
282 	PROC_LOCK_ASSERT(p, MA_OWNED);
283 	psp = p->p_sigacts;
284 	mtx_assert(&psp->ps_mtx, MA_OWNED);
285 	regs = td->td_frame;
286 	oonstack = sigonstack(regs->tf_esp);
287 
288 #ifdef DEBUG
289 	if (ldebug(rt_sendsig))
290 		printf(ARGS(rt_sendsig, "%p, %d, %p, %lu"),
291 		    catcher, sig, (void*)mask, code);
292 #endif
293 	/*
294 	 * Allocate space for the signal handler context.
295 	 */
296 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
297 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
298 		fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
299 		    td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
300 	} else
301 		fp = (struct l_rt_sigframe *)regs->tf_esp - 1;
302 	mtx_unlock(&psp->ps_mtx);
303 
304 	/*
305 	 * Build the argument list for the signal handler.
306 	 */
307 	if (p->p_sysent->sv_sigtbl)
308 		if (sig <= p->p_sysent->sv_sigsize)
309 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
310 
311 	bzero(&frame, sizeof(frame));
312 
313 	frame.sf_handler = catcher;
314 	frame.sf_sig = sig;
315 	frame.sf_siginfo = &fp->sf_si;
316 	frame.sf_ucontext = &fp->sf_sc;
317 
318 	/* Fill in POSIX parts */
319 	frame.sf_si.lsi_signo = sig;
320 	frame.sf_si.lsi_code = code;
321 	frame.sf_si.lsi_addr = (void *)regs->tf_err;
322 
323 	/*
324 	 * Build the signal context to be used by sigreturn.
325 	 */
326 	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
327 	frame.sf_sc.uc_link = NULL;		/* XXX ??? */
328 
329 	frame.sf_sc.uc_stack.ss_sp = td->td_sigstk.ss_sp;
330 	frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
331 	frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
332 	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
333 	PROC_UNLOCK(p);
334 
335 	bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
336 
337 	frame.sf_sc.uc_mcontext.sc_mask   = frame.sf_sc.uc_sigmask.__bits[0];
338 	frame.sf_sc.uc_mcontext.sc_gs     = rgs();
339 	frame.sf_sc.uc_mcontext.sc_fs     = regs->tf_fs;
340 	frame.sf_sc.uc_mcontext.sc_es     = regs->tf_es;
341 	frame.sf_sc.uc_mcontext.sc_ds     = regs->tf_ds;
342 	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_edi;
343 	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_esi;
344 	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_ebp;
345 	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_ebx;
346 	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_edx;
347 	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_ecx;
348 	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_eax;
349 	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_eip;
350 	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
351 	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
352 	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
353 	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
354 	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
355 	frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
356 
357 #ifdef DEBUG
358 	if (ldebug(rt_sendsig))
359 		printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
360 		    frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
361 		    td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
362 #endif
363 
364 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
365 		/*
366 		 * Process has trashed its stack; give it an illegal
367 		 * instruction to halt it in its tracks.
368 		 */
369 #ifdef DEBUG
370 		if (ldebug(rt_sendsig))
371 			printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
372 			    fp, oonstack);
373 #endif
374 		PROC_LOCK(p);
375 		sigexit(td, SIGILL);
376 	}
377 
378 	/*
379 	 * Build context to run handler in.
380 	 */
381 	regs->tf_esp = (int)fp;
382 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
383 	    linux_sznonrtsigcode;
384 	regs->tf_eflags &= ~(PSL_T | PSL_VM);
385 	regs->tf_cs = _ucodesel;
386 	regs->tf_ds = _udatasel;
387 	regs->tf_es = _udatasel;
388 	regs->tf_fs = _udatasel;
389 	regs->tf_ss = _udatasel;
390 	PROC_LOCK(p);
391 	mtx_lock(&psp->ps_mtx);
392 }
393 
394 
395 /*
396  * Send an interrupt to process.
397  *
398  * Stack is set up to allow sigcode stored
399  * in u. to call routine, followed by kcall
400  * to sigreturn routine below.  After sigreturn
401  * resets the signal mask, the stack, and the
402  * frame pointer, it returns to the user
403  * specified pc, psl.
404  */
405 static void
406 linux_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
407 {
408 	struct thread *td = curthread;
409 	struct proc *p = td->td_proc;
410 	struct sigacts *psp;
411 	struct trapframe *regs;
412 	struct l_sigframe *fp, frame;
413 	l_sigset_t lmask;
414 	int oonstack, i;
415 
416 	PROC_LOCK_ASSERT(p, MA_OWNED);
417 	psp = p->p_sigacts;
418 	mtx_assert(&psp->ps_mtx, MA_OWNED);
419 	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
420 		/* Signal handler installed with SA_SIGINFO. */
421 		linux_rt_sendsig(catcher, sig, mask, code);
422 		return;
423 	}
424 
425 	regs = td->td_frame;
426 	oonstack = sigonstack(regs->tf_esp);
427 
428 #ifdef DEBUG
429 	if (ldebug(sendsig))
430 		printf(ARGS(sendsig, "%p, %d, %p, %lu"),
431 		    catcher, sig, (void*)mask, code);
432 #endif
433 
434 	/*
435 	 * Allocate space for the signal handler context.
436 	 */
437 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
438 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
439 		fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
440 		    td->td_sigstk.ss_size - sizeof(struct l_sigframe));
441 	} else
442 		fp = (struct l_sigframe *)regs->tf_esp - 1;
443 	mtx_unlock(&psp->ps_mtx);
444 	PROC_UNLOCK(p);
445 
446 	/*
447 	 * Build the argument list for the signal handler.
448 	 */
449 	if (p->p_sysent->sv_sigtbl)
450 		if (sig <= p->p_sysent->sv_sigsize)
451 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
452 
453 	bzero(&frame, sizeof(frame));
454 
455 	frame.sf_handler = catcher;
456 	frame.sf_sig = sig;
457 
458 	bsd_to_linux_sigset(mask, &lmask);
459 
460 	/*
461 	 * Build the signal context to be used by sigreturn.
462 	 */
463 	frame.sf_sc.sc_mask   = lmask.__bits[0];
464 	frame.sf_sc.sc_gs     = rgs();
465 	frame.sf_sc.sc_fs     = regs->tf_fs;
466 	frame.sf_sc.sc_es     = regs->tf_es;
467 	frame.sf_sc.sc_ds     = regs->tf_ds;
468 	frame.sf_sc.sc_edi    = regs->tf_edi;
469 	frame.sf_sc.sc_esi    = regs->tf_esi;
470 	frame.sf_sc.sc_ebp    = regs->tf_ebp;
471 	frame.sf_sc.sc_ebx    = regs->tf_ebx;
472 	frame.sf_sc.sc_edx    = regs->tf_edx;
473 	frame.sf_sc.sc_ecx    = regs->tf_ecx;
474 	frame.sf_sc.sc_eax    = regs->tf_eax;
475 	frame.sf_sc.sc_eip    = regs->tf_eip;
476 	frame.sf_sc.sc_cs     = regs->tf_cs;
477 	frame.sf_sc.sc_eflags = regs->tf_eflags;
478 	frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
479 	frame.sf_sc.sc_ss     = regs->tf_ss;
480 	frame.sf_sc.sc_err    = regs->tf_err;
481 	frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code);
482 
483 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
484 		frame.sf_extramask[i] = lmask.__bits[i+1];
485 
486 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
487 		/*
488 		 * Process has trashed its stack; give it an illegal
489 		 * instruction to halt it in its tracks.
490 		 */
491 		PROC_LOCK(p);
492 		sigexit(td, SIGILL);
493 	}
494 
495 	/*
496 	 * Build context to run handler in.
497 	 */
498 	regs->tf_esp = (int)fp;
499 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
500 	regs->tf_eflags &= ~(PSL_T | PSL_VM);
501 	regs->tf_cs = _ucodesel;
502 	regs->tf_ds = _udatasel;
503 	regs->tf_es = _udatasel;
504 	regs->tf_fs = _udatasel;
505 	regs->tf_ss = _udatasel;
506 	PROC_LOCK(p);
507 	mtx_lock(&psp->ps_mtx);
508 }
509 
510 /*
511  * System call to cleanup state after a signal
512  * has been taken.  Reset signal mask and
513  * stack state from context left by sendsig (above).
514  * Return to previous pc and psl as specified by
515  * context left by sendsig. Check carefully to
516  * make sure that the user has not modified the
517  * psl to gain improper privileges or to cause
518  * a machine fault.
519  */
520 int
521 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
522 {
523 	struct proc *p = td->td_proc;
524 	struct l_sigframe frame;
525 	struct trapframe *regs;
526 	l_sigset_t lmask;
527 	int eflags, i;
528 
529 	regs = td->td_frame;
530 
531 #ifdef DEBUG
532 	if (ldebug(sigreturn))
533 		printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
534 #endif
535 	/*
536 	 * The trampoline code hands us the sigframe.
537 	 * It is unsafe to keep track of it ourselves, in the event that a
538 	 * program jumps out of a signal handler.
539 	 */
540 	if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
541 		return (EFAULT);
542 
543 	/*
544 	 * Check for security violations.
545 	 */
546 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
547 	eflags = frame.sf_sc.sc_eflags;
548 	/*
549 	 * XXX do allow users to change the privileged flag PSL_RF.  The
550 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
551 	 * sometimes set it there too.  tf_eflags is kept in the signal
552 	 * context during signal handling and there is no other place
553 	 * to remember it, so the PSL_RF bit may be corrupted by the
554 	 * signal handler without us knowing.  Corruption of the PSL_RF
555 	 * bit at worst causes one more or one less debugger trap, so
556 	 * allowing it is fairly harmless.
557 	 */
558 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF))
559 		return(EINVAL);
560 
561 	/*
562 	 * Don't allow users to load a valid privileged %cs.  Let the
563 	 * hardware check for invalid selectors, excess privilege in
564 	 * other selectors, invalid %eip's and invalid %esp's.
565 	 */
566 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
567 	if (!CS_SECURE(frame.sf_sc.sc_cs)) {
568 		trapsignal(td, SIGBUS, T_PROTFLT);
569 		return(EINVAL);
570 	}
571 
572 	lmask.__bits[0] = frame.sf_sc.sc_mask;
573 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
574 		lmask.__bits[i+1] = frame.sf_extramask[i];
575 	PROC_LOCK(p);
576 	linux_to_bsd_sigset(&lmask, &td->td_sigmask);
577 	SIG_CANTMASK(td->td_sigmask);
578 	signotify(td);
579 	PROC_UNLOCK(p);
580 
581 	/*
582 	 * Restore signal context.
583 	 */
584 	/* %gs was restored by the trampoline. */
585 	regs->tf_fs     = frame.sf_sc.sc_fs;
586 	regs->tf_es     = frame.sf_sc.sc_es;
587 	regs->tf_ds     = frame.sf_sc.sc_ds;
588 	regs->tf_edi    = frame.sf_sc.sc_edi;
589 	regs->tf_esi    = frame.sf_sc.sc_esi;
590 	regs->tf_ebp    = frame.sf_sc.sc_ebp;
591 	regs->tf_ebx    = frame.sf_sc.sc_ebx;
592 	regs->tf_edx    = frame.sf_sc.sc_edx;
593 	regs->tf_ecx    = frame.sf_sc.sc_ecx;
594 	regs->tf_eax    = frame.sf_sc.sc_eax;
595 	regs->tf_eip    = frame.sf_sc.sc_eip;
596 	regs->tf_cs     = frame.sf_sc.sc_cs;
597 	regs->tf_eflags = eflags;
598 	regs->tf_esp    = frame.sf_sc.sc_esp_at_signal;
599 	regs->tf_ss     = frame.sf_sc.sc_ss;
600 
601 	return (EJUSTRETURN);
602 }
603 
604 /*
605  * System call to cleanup state after a signal
606  * has been taken.  Reset signal mask and
607  * stack state from context left by rt_sendsig (above).
608  * Return to previous pc and psl as specified by
609  * context left by sendsig. Check carefully to
610  * make sure that the user has not modified the
611  * psl to gain improper privileges or to cause
612  * a machine fault.
613  */
614 int
615 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
616 {
617 	struct proc *p = td->td_proc;
618 	struct l_ucontext uc;
619 	struct l_sigcontext *context;
620 	l_stack_t *lss;
621 	stack_t ss;
622 	struct trapframe *regs;
623 	int eflags;
624 
625 	regs = td->td_frame;
626 
627 #ifdef DEBUG
628 	if (ldebug(rt_sigreturn))
629 		printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
630 #endif
631 	/*
632 	 * The trampoline code hands us the ucontext.
633 	 * It is unsafe to keep track of it ourselves, in the event that a
634 	 * program jumps out of a signal handler.
635 	 */
636 	if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
637 		return (EFAULT);
638 
639 	context = &uc.uc_mcontext;
640 
641 	/*
642 	 * Check for security violations.
643 	 */
644 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
645 	eflags = context->sc_eflags;
646 	/*
647 	 * XXX do allow users to change the privileged flag PSL_RF.  The
648 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
649 	 * sometimes set it there too.  tf_eflags is kept in the signal
650 	 * context during signal handling and there is no other place
651 	 * to remember it, so the PSL_RF bit may be corrupted by the
652 	 * signal handler without us knowing.  Corruption of the PSL_RF
653 	 * bit at worst causes one more or one less debugger trap, so
654 	 * allowing it is fairly harmless.
655 	 */
656 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF))
657 		return(EINVAL);
658 
659 	/*
660 	 * Don't allow users to load a valid privileged %cs.  Let the
661 	 * hardware check for invalid selectors, excess privilege in
662 	 * other selectors, invalid %eip's and invalid %esp's.
663 	 */
664 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
665 	if (!CS_SECURE(context->sc_cs)) {
666 		trapsignal(td, SIGBUS, T_PROTFLT);
667 		return(EINVAL);
668 	}
669 
670 	PROC_LOCK(p);
671 	linux_to_bsd_sigset(&uc.uc_sigmask, &td->td_sigmask);
672 	SIG_CANTMASK(td->td_sigmask);
673 	signotify(td);
674 	PROC_UNLOCK(p);
675 
676 	/*
677 	 * Restore signal context
678 	 */
679 	/* %gs was restored by the trampoline. */
680 	regs->tf_fs     = context->sc_fs;
681 	regs->tf_es     = context->sc_es;
682 	regs->tf_ds     = context->sc_ds;
683 	regs->tf_edi    = context->sc_edi;
684 	regs->tf_esi    = context->sc_esi;
685 	regs->tf_ebp    = context->sc_ebp;
686 	regs->tf_ebx    = context->sc_ebx;
687 	regs->tf_edx    = context->sc_edx;
688 	regs->tf_ecx    = context->sc_ecx;
689 	regs->tf_eax    = context->sc_eax;
690 	regs->tf_eip    = context->sc_eip;
691 	regs->tf_cs     = context->sc_cs;
692 	regs->tf_eflags = eflags;
693 	regs->tf_esp    = context->sc_esp_at_signal;
694 	regs->tf_ss     = context->sc_ss;
695 
696 	/*
697 	 * call sigaltstack & ignore results..
698 	 */
699 	lss = &uc.uc_stack;
700 	ss.ss_sp = lss->ss_sp;
701 	ss.ss_size = lss->ss_size;
702 	ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
703 
704 #ifdef DEBUG
705 	if (ldebug(rt_sigreturn))
706 		printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
707 		    ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
708 #endif
709 	(void)kern_sigaltstack(td, &ss, NULL);
710 
711 	return (EJUSTRETURN);
712 }
713 
714 /*
715  * MPSAFE
716  */
717 static void
718 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
719 {
720 	args[0] = tf->tf_ebx;
721 	args[1] = tf->tf_ecx;
722 	args[2] = tf->tf_edx;
723 	args[3] = tf->tf_esi;
724 	args[4] = tf->tf_edi;
725 	args[5] = tf->tf_ebp;	/* Unconfirmed */
726 	*params = NULL;		/* no copyin */
727 }
728 
729 /*
730  * If a linux binary is exec'ing something, try this image activator
731  * first.  We override standard shell script execution in order to
732  * be able to modify the interpreter path.  We only do this if a linux
733  * binary is doing the exec, so we do not create an EXEC module for it.
734  */
735 static int	exec_linux_imgact_try(struct image_params *iparams);
736 
737 static int
738 exec_linux_imgact_try(struct image_params *imgp)
739 {
740     const char *head = (const char *)imgp->image_header;
741     char *rpath;
742     int error = -1, len;
743 
744     /*
745      * The interpreter for shell scripts run from a linux binary needs
746      * to be located in /compat/linux if possible in order to recursively
747      * maintain linux path emulation.
748      */
749     if (((const short *)head)[0] == SHELLMAGIC) {
750 	    /*
751 	     * Run our normal shell image activator.  If it succeeds attempt
752 	     * to use the alternate path for the interpreter.  If an alternate
753 	     * path is found, use our stringspace to store it.
754 	     */
755 	    if ((error = exec_shell_imgact(imgp)) == 0) {
756 		    linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
757 			imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0);
758 		    if (rpath != NULL) {
759 			    len = strlen(rpath) + 1;
760 
761 			    if (len <= MAXSHELLCMDLEN) {
762 				    memcpy(imgp->interpreter_name, rpath, len);
763 			    }
764 			    free(rpath, M_TEMP);
765 		    }
766 	    }
767     }
768     return(error);
769 }
770 
771 /*
772  * exec_setregs may initialize some registers differently than Linux
773  * does, thus potentially confusing Linux binaries. If necessary, we
774  * override the exec_setregs default(s) here.
775  */
776 static void
777 exec_linux_setregs(struct thread *td, u_long entry,
778 		   u_long stack, u_long ps_strings)
779 {
780 	static const u_short control = __LINUX_NPXCW__;
781 	struct pcb *pcb = td->td_pcb;
782 
783 	exec_setregs(td, entry, stack, ps_strings);
784 
785 	/* Linux sets %gs to 0, we default to _udatasel */
786 	pcb->pcb_gs = 0; load_gs(0);
787 
788 	/* Linux sets the i387 to extended precision. */
789 	fldcw(&control);
790 }
791 
792 struct sysentvec linux_sysvec = {
793 	LINUX_SYS_MAXSYSCALL,
794 	linux_sysent,
795 	0xff,
796 	LINUX_SIGTBLSZ,
797 	bsd_to_linux_signal,
798 	ELAST + 1,
799 	bsd_to_linux_errno,
800 	translate_traps,
801 	linux_fixup,
802 	linux_sendsig,
803 	linux_sigcode,
804 	&linux_szsigcode,
805 	linux_prepsyscall,
806 	"Linux a.out",
807 	NULL,
808 	exec_linux_imgact_try,
809 	LINUX_MINSIGSTKSZ,
810 	PAGE_SIZE,
811 	VM_MIN_ADDRESS,
812 	VM_MAXUSER_ADDRESS,
813 	USRSTACK,
814 	PS_STRINGS,
815 	VM_PROT_ALL,
816 	exec_copyout_strings,
817 	exec_linux_setregs,
818 	NULL
819 };
820 
821 struct sysentvec elf_linux_sysvec = {
822 	LINUX_SYS_MAXSYSCALL,
823 	linux_sysent,
824 	0xff,
825 	LINUX_SIGTBLSZ,
826 	bsd_to_linux_signal,
827 	ELAST + 1,
828 	bsd_to_linux_errno,
829 	translate_traps,
830 	elf_linux_fixup,
831 	linux_sendsig,
832 	linux_sigcode,
833 	&linux_szsigcode,
834 	linux_prepsyscall,
835 	"Linux ELF",
836 	elf32_coredump,
837 	exec_linux_imgact_try,
838 	LINUX_MINSIGSTKSZ,
839 	PAGE_SIZE,
840 	VM_MIN_ADDRESS,
841 	VM_MAXUSER_ADDRESS,
842 	USRSTACK,
843 	PS_STRINGS,
844 	VM_PROT_ALL,
845 	exec_copyout_strings,
846 	exec_linux_setregs,
847 	NULL
848 };
849 
850 static Elf32_Brandinfo linux_brand = {
851 					ELFOSABI_LINUX,
852 					EM_386,
853 					"Linux",
854 					"/compat/linux",
855 					"/lib/ld-linux.so.1",
856 					&elf_linux_sysvec,
857 					NULL,
858 				 };
859 
860 static Elf32_Brandinfo linux_glibc2brand = {
861 					ELFOSABI_LINUX,
862 					EM_386,
863 					"Linux",
864 					"/compat/linux",
865 					"/lib/ld-linux.so.2",
866 					&elf_linux_sysvec,
867 					NULL,
868 				 };
869 
870 Elf32_Brandinfo *linux_brandlist[] = {
871 					&linux_brand,
872 					&linux_glibc2brand,
873 					NULL
874 				};
875 
876 static int
877 linux_elf_modevent(module_t mod, int type, void *data)
878 {
879 	Elf32_Brandinfo **brandinfo;
880 	int error;
881 	struct linux_ioctl_handler **lihp;
882 
883 	error = 0;
884 
885 	switch(type) {
886 	case MOD_LOAD:
887 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
888 		     ++brandinfo)
889 			if (elf32_insert_brand_entry(*brandinfo) < 0)
890 				error = EINVAL;
891 		if (error == 0) {
892 			SET_FOREACH(lihp, linux_ioctl_handler_set)
893 				linux_ioctl_register_handler(*lihp);
894 			if (bootverbose)
895 				printf("Linux ELF exec handler installed\n");
896 		} else
897 			printf("cannot insert Linux ELF brand handler\n");
898 		break;
899 	case MOD_UNLOAD:
900 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
901 		     ++brandinfo)
902 			if (elf32_brand_inuse(*brandinfo))
903 				error = EBUSY;
904 		if (error == 0) {
905 			for (brandinfo = &linux_brandlist[0];
906 			     *brandinfo != NULL; ++brandinfo)
907 				if (elf32_remove_brand_entry(*brandinfo) < 0)
908 					error = EINVAL;
909 		}
910 		if (error == 0) {
911 			SET_FOREACH(lihp, linux_ioctl_handler_set)
912 				linux_ioctl_unregister_handler(*lihp);
913 			if (bootverbose)
914 				printf("Linux ELF exec handler removed\n");
915 			linux_mib_destroy();
916 		} else
917 			printf("Could not deinstall ELF interpreter entry\n");
918 		break;
919 	default:
920 		return EOPNOTSUPP;
921 	}
922 	return error;
923 }
924 
925 static moduledata_t linux_elf_mod = {
926 	"linuxelf",
927 	linux_elf_modevent,
928 	0
929 };
930 
931 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
932