xref: /freebsd/sys/i386/linux/linux_sysvec.c (revision 262e143bd46171a6415a5b28af260a5efa2a3db8)
1 /*-
2  * Copyright (c) 1994-1996 S�ren Schmidt
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software without specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 /* XXX we use functions that might not exist. */
33 #include "opt_compat.h"
34 
35 #ifndef COMPAT_43
36 #error "Unable to compile Linux-emulator due to missing COMPAT_43 option!"
37 #endif
38 
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/exec.h>
42 #include <sys/imgact.h>
43 #include <sys/imgact_aout.h>
44 #include <sys/imgact_elf.h>
45 #include <sys/kernel.h>
46 #include <sys/lock.h>
47 #include <sys/malloc.h>
48 #include <sys/module.h>
49 #include <sys/mutex.h>
50 #include <sys/proc.h>
51 #include <sys/signalvar.h>
52 #include <sys/syscallsubr.h>
53 #include <sys/sysent.h>
54 #include <sys/sysproto.h>
55 #include <sys/vnode.h>
56 
57 #include <vm/vm.h>
58 #include <vm/pmap.h>
59 #include <vm/vm_extern.h>
60 #include <vm/vm_map.h>
61 #include <vm/vm_object.h>
62 #include <vm/vm_page.h>
63 #include <vm/vm_param.h>
64 
65 #include <machine/cpu.h>
66 #include <machine/md_var.h>
67 #include <machine/pcb.h>
68 
69 #include <i386/linux/linux.h>
70 #include <i386/linux/linux_proto.h>
71 #include <compat/linux/linux_mib.h>
72 #include <compat/linux/linux_signal.h>
73 #include <compat/linux/linux_util.h>
74 
75 MODULE_VERSION(linux, 1);
76 
77 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
78 
79 #if BYTE_ORDER == LITTLE_ENDIAN
80 #define SHELLMAGIC      0x2123 /* #! */
81 #else
82 #define SHELLMAGIC      0x2321
83 #endif
84 
85 /*
86  * Allow the sendsig functions to use the ldebug() facility
87  * even though they are not syscalls themselves. Map them
88  * to syscall 0. This is slightly less bogus than using
89  * ldebug(sigreturn).
90  */
91 #define	LINUX_SYS_linux_rt_sendsig	0
92 #define	LINUX_SYS_linux_sendsig		0
93 
94 #define	fldcw(addr)		__asm("fldcw %0" : : "m" (*(addr)))
95 #define	__LINUX_NPXCW__		0x37f
96 
97 extern char linux_sigcode[];
98 extern int linux_szsigcode;
99 
100 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
101 
102 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
103 
104 static int	linux_fixup(register_t **stack_base,
105 		    struct image_params *iparams);
106 static int	elf_linux_fixup(register_t **stack_base,
107 		    struct image_params *iparams);
108 static void	linux_prepsyscall(struct trapframe *tf, int *args, u_int *code,
109 		    caddr_t *params);
110 static void     linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
111 static void	exec_linux_setregs(struct thread *td, u_long entry,
112 				   u_long stack, u_long ps_strings);
113 
114 /*
115  * Linux syscalls return negative errno's, we do positive and map them
116  */
117 static int bsd_to_linux_errno[ELAST + 1] = {
118 	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
119 	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
120 	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
121 	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
122 	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
123 	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
124 	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
125 	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
126 	-6, -6, -43, -42, -75, -6, -84
127 };
128 
129 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
130 	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
131 	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
132 	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
133 	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
134 	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
135 	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
136 	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
137 	0, LINUX_SIGUSR1, LINUX_SIGUSR2
138 };
139 
140 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
141 	SIGHUP, SIGINT, SIGQUIT, SIGILL,
142 	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
143 	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
144 	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
145 	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
146 	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
147 	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
148 	SIGIO, SIGURG, SIGSYS
149 };
150 
151 #define LINUX_T_UNKNOWN  255
152 static int _bsd_to_linux_trapcode[] = {
153 	LINUX_T_UNKNOWN,	/* 0 */
154 	6,			/* 1  T_PRIVINFLT */
155 	LINUX_T_UNKNOWN,	/* 2 */
156 	3,			/* 3  T_BPTFLT */
157 	LINUX_T_UNKNOWN,	/* 4 */
158 	LINUX_T_UNKNOWN,	/* 5 */
159 	16,			/* 6  T_ARITHTRAP */
160 	254,			/* 7  T_ASTFLT */
161 	LINUX_T_UNKNOWN,	/* 8 */
162 	13,			/* 9  T_PROTFLT */
163 	1,			/* 10 T_TRCTRAP */
164 	LINUX_T_UNKNOWN,	/* 11 */
165 	14,			/* 12 T_PAGEFLT */
166 	LINUX_T_UNKNOWN,	/* 13 */
167 	17,			/* 14 T_ALIGNFLT */
168 	LINUX_T_UNKNOWN,	/* 15 */
169 	LINUX_T_UNKNOWN,	/* 16 */
170 	LINUX_T_UNKNOWN,	/* 17 */
171 	0,			/* 18 T_DIVIDE */
172 	2,			/* 19 T_NMI */
173 	4,			/* 20 T_OFLOW */
174 	5,			/* 21 T_BOUND */
175 	7,			/* 22 T_DNA */
176 	8,			/* 23 T_DOUBLEFLT */
177 	9,			/* 24 T_FPOPFLT */
178 	10,			/* 25 T_TSSFLT */
179 	11,			/* 26 T_SEGNPFLT */
180 	12,			/* 27 T_STKFLT */
181 	18,			/* 28 T_MCHK */
182 	19,			/* 29 T_XMMFLT */
183 	15			/* 30 T_RESERVED */
184 };
185 #define bsd_to_linux_trapcode(code) \
186     ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
187      _bsd_to_linux_trapcode[(code)]: \
188      LINUX_T_UNKNOWN)
189 
190 /*
191  * If FreeBSD & Linux have a difference of opinion about what a trap
192  * means, deal with it here.
193  *
194  * MPSAFE
195  */
196 static int
197 translate_traps(int signal, int trap_code)
198 {
199 	if (signal != SIGBUS)
200 		return signal;
201 	switch (trap_code) {
202 	case T_PROTFLT:
203 	case T_TSSFLT:
204 	case T_DOUBLEFLT:
205 	case T_PAGEFLT:
206 		return SIGSEGV;
207 	default:
208 		return signal;
209 	}
210 }
211 
212 static int
213 linux_fixup(register_t **stack_base, struct image_params *imgp)
214 {
215 	register_t *argv, *envp;
216 
217 	argv = *stack_base;
218 	envp = *stack_base + (imgp->args->argc + 1);
219 	(*stack_base)--;
220 	**stack_base = (intptr_t)(void *)envp;
221 	(*stack_base)--;
222 	**stack_base = (intptr_t)(void *)argv;
223 	(*stack_base)--;
224 	**stack_base = imgp->args->argc;
225 	return 0;
226 }
227 
228 static int
229 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
230 {
231 	Elf32_Auxargs *args;
232 	register_t *pos;
233 
234 	KASSERT(curthread->td_proc == imgp->proc &&
235 	    (curthread->td_proc->p_flag & P_SA) == 0,
236 	    ("unsafe elf_linux_fixup(), should be curproc"));
237 	args = (Elf32_Auxargs *)imgp->auxargs;
238 	pos = *stack_base + (imgp->args->argc + imgp->args->envc + 2);
239 
240 	if (args->trace)
241 		AUXARGS_ENTRY(pos, AT_DEBUG, 1);
242 	if (args->execfd != -1)
243 		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
244 	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
245 	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
246 	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
247 	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
248 	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
249 	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
250 	AUXARGS_ENTRY(pos, AT_BASE, args->base);
251 	AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
252 	AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
253 	AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
254 	AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
255 	AUXARGS_ENTRY(pos, AT_NULL, 0);
256 
257 	free(imgp->auxargs, M_TEMP);
258 	imgp->auxargs = NULL;
259 
260 	(*stack_base)--;
261 	**stack_base = (register_t)imgp->args->argc;
262 	return 0;
263 }
264 
265 extern int _ucodesel, _udatasel;
266 extern unsigned long linux_sznonrtsigcode;
267 
268 static void
269 linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
270 {
271 	struct thread *td = curthread;
272 	struct proc *p = td->td_proc;
273 	struct sigacts *psp;
274 	struct trapframe *regs;
275 	struct l_rt_sigframe *fp, frame;
276 	int sig, code;
277 	int oonstack;
278 
279 	sig = ksi->ksi_signo;
280 	code = ksi->ksi_code;
281 	PROC_LOCK_ASSERT(p, MA_OWNED);
282 	psp = p->p_sigacts;
283 	mtx_assert(&psp->ps_mtx, MA_OWNED);
284 	regs = td->td_frame;
285 	oonstack = sigonstack(regs->tf_esp);
286 
287 #ifdef DEBUG
288 	if (ldebug(rt_sendsig))
289 		printf(ARGS(rt_sendsig, "%p, %d, %p, %u"),
290 		    catcher, sig, (void*)mask, code);
291 #endif
292 	/*
293 	 * Allocate space for the signal handler context.
294 	 */
295 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
296 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
297 		fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
298 		    td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
299 	} else
300 		fp = (struct l_rt_sigframe *)regs->tf_esp - 1;
301 	mtx_unlock(&psp->ps_mtx);
302 
303 	/*
304 	 * Build the argument list for the signal handler.
305 	 */
306 	if (p->p_sysent->sv_sigtbl)
307 		if (sig <= p->p_sysent->sv_sigsize)
308 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
309 
310 	bzero(&frame, sizeof(frame));
311 
312 	frame.sf_handler = catcher;
313 	frame.sf_sig = sig;
314 	frame.sf_siginfo = &fp->sf_si;
315 	frame.sf_ucontext = &fp->sf_sc;
316 
317 	/* Fill in POSIX parts */
318 	frame.sf_si.lsi_signo = sig;
319 	frame.sf_si.lsi_code = code;
320 	frame.sf_si.lsi_addr = ksi->ksi_addr;
321 
322 	/*
323 	 * Build the signal context to be used by sigreturn.
324 	 */
325 	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
326 	frame.sf_sc.uc_link = NULL;		/* XXX ??? */
327 
328 	frame.sf_sc.uc_stack.ss_sp = td->td_sigstk.ss_sp;
329 	frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
330 	frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
331 	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
332 	PROC_UNLOCK(p);
333 
334 	bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
335 
336 	frame.sf_sc.uc_mcontext.sc_mask   = frame.sf_sc.uc_sigmask.__bits[0];
337 	frame.sf_sc.uc_mcontext.sc_gs     = rgs();
338 	frame.sf_sc.uc_mcontext.sc_fs     = regs->tf_fs;
339 	frame.sf_sc.uc_mcontext.sc_es     = regs->tf_es;
340 	frame.sf_sc.uc_mcontext.sc_ds     = regs->tf_ds;
341 	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_edi;
342 	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_esi;
343 	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_ebp;
344 	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_ebx;
345 	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_edx;
346 	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_ecx;
347 	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_eax;
348 	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_eip;
349 	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
350 	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
351 	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
352 	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
353 	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
354 	frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
355 
356 #ifdef DEBUG
357 	if (ldebug(rt_sendsig))
358 		printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
359 		    frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
360 		    td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
361 #endif
362 
363 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
364 		/*
365 		 * Process has trashed its stack; give it an illegal
366 		 * instruction to halt it in its tracks.
367 		 */
368 #ifdef DEBUG
369 		if (ldebug(rt_sendsig))
370 			printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
371 			    fp, oonstack);
372 #endif
373 		PROC_LOCK(p);
374 		sigexit(td, SIGILL);
375 	}
376 
377 	/*
378 	 * Build context to run handler in.
379 	 */
380 	regs->tf_esp = (int)fp;
381 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
382 	    linux_sznonrtsigcode;
383 	regs->tf_eflags &= ~(PSL_T | PSL_VM);
384 	regs->tf_cs = _ucodesel;
385 	regs->tf_ds = _udatasel;
386 	regs->tf_es = _udatasel;
387 	regs->tf_fs = _udatasel;
388 	regs->tf_ss = _udatasel;
389 	PROC_LOCK(p);
390 	mtx_lock(&psp->ps_mtx);
391 }
392 
393 
394 /*
395  * Send an interrupt to process.
396  *
397  * Stack is set up to allow sigcode stored
398  * in u. to call routine, followed by kcall
399  * to sigreturn routine below.  After sigreturn
400  * resets the signal mask, the stack, and the
401  * frame pointer, it returns to the user
402  * specified pc, psl.
403  */
404 static void
405 linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
406 {
407 	struct thread *td = curthread;
408 	struct proc *p = td->td_proc;
409 	struct sigacts *psp;
410 	struct trapframe *regs;
411 	struct l_sigframe *fp, frame;
412 	l_sigset_t lmask;
413 	int sig, code;
414 	int oonstack, i;
415 
416 	PROC_LOCK_ASSERT(p, MA_OWNED);
417 	psp = p->p_sigacts;
418 	sig = ksi->ksi_signo;
419 	code = ksi->ksi_code;
420 	mtx_assert(&psp->ps_mtx, MA_OWNED);
421 	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
422 		/* Signal handler installed with SA_SIGINFO. */
423 		linux_rt_sendsig(catcher, ksi, mask);
424 		return;
425 	}
426 	regs = td->td_frame;
427 	oonstack = sigonstack(regs->tf_esp);
428 
429 #ifdef DEBUG
430 	if (ldebug(sendsig))
431 		printf(ARGS(sendsig, "%p, %d, %p, %u"),
432 		    catcher, sig, (void*)mask, code);
433 #endif
434 
435 	/*
436 	 * Allocate space for the signal handler context.
437 	 */
438 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
439 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
440 		fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
441 		    td->td_sigstk.ss_size - sizeof(struct l_sigframe));
442 	} else
443 		fp = (struct l_sigframe *)regs->tf_esp - 1;
444 	mtx_unlock(&psp->ps_mtx);
445 	PROC_UNLOCK(p);
446 
447 	/*
448 	 * Build the argument list for the signal handler.
449 	 */
450 	if (p->p_sysent->sv_sigtbl)
451 		if (sig <= p->p_sysent->sv_sigsize)
452 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
453 
454 	bzero(&frame, sizeof(frame));
455 
456 	frame.sf_handler = catcher;
457 	frame.sf_sig = sig;
458 
459 	bsd_to_linux_sigset(mask, &lmask);
460 
461 	/*
462 	 * Build the signal context to be used by sigreturn.
463 	 */
464 	frame.sf_sc.sc_mask   = lmask.__bits[0];
465 	frame.sf_sc.sc_gs     = rgs();
466 	frame.sf_sc.sc_fs     = regs->tf_fs;
467 	frame.sf_sc.sc_es     = regs->tf_es;
468 	frame.sf_sc.sc_ds     = regs->tf_ds;
469 	frame.sf_sc.sc_edi    = regs->tf_edi;
470 	frame.sf_sc.sc_esi    = regs->tf_esi;
471 	frame.sf_sc.sc_ebp    = regs->tf_ebp;
472 	frame.sf_sc.sc_ebx    = regs->tf_ebx;
473 	frame.sf_sc.sc_edx    = regs->tf_edx;
474 	frame.sf_sc.sc_ecx    = regs->tf_ecx;
475 	frame.sf_sc.sc_eax    = regs->tf_eax;
476 	frame.sf_sc.sc_eip    = regs->tf_eip;
477 	frame.sf_sc.sc_cs     = regs->tf_cs;
478 	frame.sf_sc.sc_eflags = regs->tf_eflags;
479 	frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
480 	frame.sf_sc.sc_ss     = regs->tf_ss;
481 	frame.sf_sc.sc_err    = regs->tf_err;
482 	frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(ksi->ksi_trapno);
483 
484 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
485 		frame.sf_extramask[i] = lmask.__bits[i+1];
486 
487 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
488 		/*
489 		 * Process has trashed its stack; give it an illegal
490 		 * instruction to halt it in its tracks.
491 		 */
492 		PROC_LOCK(p);
493 		sigexit(td, SIGILL);
494 	}
495 
496 	/*
497 	 * Build context to run handler in.
498 	 */
499 	regs->tf_esp = (int)fp;
500 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
501 	regs->tf_eflags &= ~(PSL_T | PSL_VM);
502 	regs->tf_cs = _ucodesel;
503 	regs->tf_ds = _udatasel;
504 	regs->tf_es = _udatasel;
505 	regs->tf_fs = _udatasel;
506 	regs->tf_ss = _udatasel;
507 	PROC_LOCK(p);
508 	mtx_lock(&psp->ps_mtx);
509 }
510 
511 /*
512  * System call to cleanup state after a signal
513  * has been taken.  Reset signal mask and
514  * stack state from context left by sendsig (above).
515  * Return to previous pc and psl as specified by
516  * context left by sendsig. Check carefully to
517  * make sure that the user has not modified the
518  * psl to gain improper privileges or to cause
519  * a machine fault.
520  */
521 int
522 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
523 {
524 	struct proc *p = td->td_proc;
525 	struct l_sigframe frame;
526 	struct trapframe *regs;
527 	l_sigset_t lmask;
528 	int eflags, i;
529 	ksiginfo_t ksi;
530 
531 	regs = td->td_frame;
532 
533 #ifdef DEBUG
534 	if (ldebug(sigreturn))
535 		printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
536 #endif
537 	/*
538 	 * The trampoline code hands us the sigframe.
539 	 * It is unsafe to keep track of it ourselves, in the event that a
540 	 * program jumps out of a signal handler.
541 	 */
542 	if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
543 		return (EFAULT);
544 
545 	/*
546 	 * Check for security violations.
547 	 */
548 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
549 	eflags = frame.sf_sc.sc_eflags;
550 	/*
551 	 * XXX do allow users to change the privileged flag PSL_RF.  The
552 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
553 	 * sometimes set it there too.  tf_eflags is kept in the signal
554 	 * context during signal handling and there is no other place
555 	 * to remember it, so the PSL_RF bit may be corrupted by the
556 	 * signal handler without us knowing.  Corruption of the PSL_RF
557 	 * bit at worst causes one more or one less debugger trap, so
558 	 * allowing it is fairly harmless.
559 	 */
560 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF))
561 		return(EINVAL);
562 
563 	/*
564 	 * Don't allow users to load a valid privileged %cs.  Let the
565 	 * hardware check for invalid selectors, excess privilege in
566 	 * other selectors, invalid %eip's and invalid %esp's.
567 	 */
568 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
569 	if (!CS_SECURE(frame.sf_sc.sc_cs)) {
570 		ksiginfo_init_trap(&ksi);
571 		ksi.ksi_signo = SIGBUS;
572 		ksi.ksi_code = BUS_OBJERR;
573 		ksi.ksi_trapno = T_PROTFLT;
574 		ksi.ksi_addr = (void *)regs->tf_eip;
575 		trapsignal(td, &ksi);
576 		return(EINVAL);
577 	}
578 
579 	lmask.__bits[0] = frame.sf_sc.sc_mask;
580 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
581 		lmask.__bits[i+1] = frame.sf_extramask[i];
582 	PROC_LOCK(p);
583 	linux_to_bsd_sigset(&lmask, &td->td_sigmask);
584 	SIG_CANTMASK(td->td_sigmask);
585 	signotify(td);
586 	PROC_UNLOCK(p);
587 
588 	/*
589 	 * Restore signal context.
590 	 */
591 	/* %gs was restored by the trampoline. */
592 	regs->tf_fs     = frame.sf_sc.sc_fs;
593 	regs->tf_es     = frame.sf_sc.sc_es;
594 	regs->tf_ds     = frame.sf_sc.sc_ds;
595 	regs->tf_edi    = frame.sf_sc.sc_edi;
596 	regs->tf_esi    = frame.sf_sc.sc_esi;
597 	regs->tf_ebp    = frame.sf_sc.sc_ebp;
598 	regs->tf_ebx    = frame.sf_sc.sc_ebx;
599 	regs->tf_edx    = frame.sf_sc.sc_edx;
600 	regs->tf_ecx    = frame.sf_sc.sc_ecx;
601 	regs->tf_eax    = frame.sf_sc.sc_eax;
602 	regs->tf_eip    = frame.sf_sc.sc_eip;
603 	regs->tf_cs     = frame.sf_sc.sc_cs;
604 	regs->tf_eflags = eflags;
605 	regs->tf_esp    = frame.sf_sc.sc_esp_at_signal;
606 	regs->tf_ss     = frame.sf_sc.sc_ss;
607 
608 	return (EJUSTRETURN);
609 }
610 
611 /*
612  * System call to cleanup state after a signal
613  * has been taken.  Reset signal mask and
614  * stack state from context left by rt_sendsig (above).
615  * Return to previous pc and psl as specified by
616  * context left by sendsig. Check carefully to
617  * make sure that the user has not modified the
618  * psl to gain improper privileges or to cause
619  * a machine fault.
620  */
621 int
622 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
623 {
624 	struct proc *p = td->td_proc;
625 	struct l_ucontext uc;
626 	struct l_sigcontext *context;
627 	l_stack_t *lss;
628 	stack_t ss;
629 	struct trapframe *regs;
630 	int eflags;
631 	ksiginfo_t ksi;
632 
633 	regs = td->td_frame;
634 
635 #ifdef DEBUG
636 	if (ldebug(rt_sigreturn))
637 		printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
638 #endif
639 	/*
640 	 * The trampoline code hands us the ucontext.
641 	 * It is unsafe to keep track of it ourselves, in the event that a
642 	 * program jumps out of a signal handler.
643 	 */
644 	if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
645 		return (EFAULT);
646 
647 	context = &uc.uc_mcontext;
648 
649 	/*
650 	 * Check for security violations.
651 	 */
652 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
653 	eflags = context->sc_eflags;
654 	/*
655 	 * XXX do allow users to change the privileged flag PSL_RF.  The
656 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
657 	 * sometimes set it there too.  tf_eflags is kept in the signal
658 	 * context during signal handling and there is no other place
659 	 * to remember it, so the PSL_RF bit may be corrupted by the
660 	 * signal handler without us knowing.  Corruption of the PSL_RF
661 	 * bit at worst causes one more or one less debugger trap, so
662 	 * allowing it is fairly harmless.
663 	 */
664 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF))
665 		return(EINVAL);
666 
667 	/*
668 	 * Don't allow users to load a valid privileged %cs.  Let the
669 	 * hardware check for invalid selectors, excess privilege in
670 	 * other selectors, invalid %eip's and invalid %esp's.
671 	 */
672 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
673 	if (!CS_SECURE(context->sc_cs)) {
674 		ksiginfo_init_trap(&ksi);
675 		ksi.ksi_signo = SIGBUS;
676 		ksi.ksi_code = BUS_OBJERR;
677 		ksi.ksi_trapno = T_PROTFLT;
678 		ksi.ksi_addr = (void *)regs->tf_eip;
679 		trapsignal(td, &ksi);
680 		return(EINVAL);
681 	}
682 
683 	PROC_LOCK(p);
684 	linux_to_bsd_sigset(&uc.uc_sigmask, &td->td_sigmask);
685 	SIG_CANTMASK(td->td_sigmask);
686 	signotify(td);
687 	PROC_UNLOCK(p);
688 
689 	/*
690 	 * Restore signal context
691 	 */
692 	/* %gs was restored by the trampoline. */
693 	regs->tf_fs     = context->sc_fs;
694 	regs->tf_es     = context->sc_es;
695 	regs->tf_ds     = context->sc_ds;
696 	regs->tf_edi    = context->sc_edi;
697 	regs->tf_esi    = context->sc_esi;
698 	regs->tf_ebp    = context->sc_ebp;
699 	regs->tf_ebx    = context->sc_ebx;
700 	regs->tf_edx    = context->sc_edx;
701 	regs->tf_ecx    = context->sc_ecx;
702 	regs->tf_eax    = context->sc_eax;
703 	regs->tf_eip    = context->sc_eip;
704 	regs->tf_cs     = context->sc_cs;
705 	regs->tf_eflags = eflags;
706 	regs->tf_esp    = context->sc_esp_at_signal;
707 	regs->tf_ss     = context->sc_ss;
708 
709 	/*
710 	 * call sigaltstack & ignore results..
711 	 */
712 	lss = &uc.uc_stack;
713 	ss.ss_sp = lss->ss_sp;
714 	ss.ss_size = lss->ss_size;
715 	ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
716 
717 #ifdef DEBUG
718 	if (ldebug(rt_sigreturn))
719 		printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
720 		    ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
721 #endif
722 	(void)kern_sigaltstack(td, &ss, NULL);
723 
724 	return (EJUSTRETURN);
725 }
726 
727 /*
728  * MPSAFE
729  */
730 static void
731 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
732 {
733 	args[0] = tf->tf_ebx;
734 	args[1] = tf->tf_ecx;
735 	args[2] = tf->tf_edx;
736 	args[3] = tf->tf_esi;
737 	args[4] = tf->tf_edi;
738 	args[5] = tf->tf_ebp;	/* Unconfirmed */
739 	*params = NULL;		/* no copyin */
740 }
741 
742 /*
743  * If a linux binary is exec'ing something, try this image activator
744  * first.  We override standard shell script execution in order to
745  * be able to modify the interpreter path.  We only do this if a linux
746  * binary is doing the exec, so we do not create an EXEC module for it.
747  */
748 static int	exec_linux_imgact_try(struct image_params *iparams);
749 
750 static int
751 exec_linux_imgact_try(struct image_params *imgp)
752 {
753     const char *head = (const char *)imgp->image_header;
754     char *rpath;
755     int error = -1, len;
756 
757     /*
758      * The interpreter for shell scripts run from a linux binary needs
759      * to be located in /compat/linux if possible in order to recursively
760      * maintain linux path emulation.
761      */
762     if (((const short *)head)[0] == SHELLMAGIC) {
763 	    /*
764 	     * Run our normal shell image activator.  If it succeeds attempt
765 	     * to use the alternate path for the interpreter.  If an alternate
766 	     * path is found, use our stringspace to store it.
767 	     */
768 	    if ((error = exec_shell_imgact(imgp)) == 0) {
769 		    linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
770 			imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0);
771 		    if (rpath != NULL) {
772 			    len = strlen(rpath) + 1;
773 
774 			    if (len <= MAXSHELLCMDLEN) {
775 				    memcpy(imgp->interpreter_name, rpath, len);
776 			    }
777 			    free(rpath, M_TEMP);
778 		    }
779 	    }
780     }
781     return(error);
782 }
783 
784 /*
785  * exec_setregs may initialize some registers differently than Linux
786  * does, thus potentially confusing Linux binaries. If necessary, we
787  * override the exec_setregs default(s) here.
788  */
789 static void
790 exec_linux_setregs(struct thread *td, u_long entry,
791 		   u_long stack, u_long ps_strings)
792 {
793 	static const u_short control = __LINUX_NPXCW__;
794 	struct pcb *pcb = td->td_pcb;
795 
796 	exec_setregs(td, entry, stack, ps_strings);
797 
798 	/* Linux sets %gs to 0, we default to _udatasel */
799 	pcb->pcb_gs = 0; load_gs(0);
800 
801 	/* Linux sets the i387 to extended precision. */
802 	fldcw(&control);
803 }
804 
805 struct sysentvec linux_sysvec = {
806 	LINUX_SYS_MAXSYSCALL,
807 	linux_sysent,
808 	0xff,
809 	LINUX_SIGTBLSZ,
810 	bsd_to_linux_signal,
811 	ELAST + 1,
812 	bsd_to_linux_errno,
813 	translate_traps,
814 	linux_fixup,
815 	linux_sendsig,
816 	linux_sigcode,
817 	&linux_szsigcode,
818 	linux_prepsyscall,
819 	"Linux a.out",
820 	NULL,
821 	exec_linux_imgact_try,
822 	LINUX_MINSIGSTKSZ,
823 	PAGE_SIZE,
824 	VM_MIN_ADDRESS,
825 	VM_MAXUSER_ADDRESS,
826 	USRSTACK,
827 	PS_STRINGS,
828 	VM_PROT_ALL,
829 	exec_copyout_strings,
830 	exec_linux_setregs,
831 	NULL
832 };
833 
834 struct sysentvec elf_linux_sysvec = {
835 	LINUX_SYS_MAXSYSCALL,
836 	linux_sysent,
837 	0xff,
838 	LINUX_SIGTBLSZ,
839 	bsd_to_linux_signal,
840 	ELAST + 1,
841 	bsd_to_linux_errno,
842 	translate_traps,
843 	elf_linux_fixup,
844 	linux_sendsig,
845 	linux_sigcode,
846 	&linux_szsigcode,
847 	linux_prepsyscall,
848 	"Linux ELF",
849 	elf32_coredump,
850 	exec_linux_imgact_try,
851 	LINUX_MINSIGSTKSZ,
852 	PAGE_SIZE,
853 	VM_MIN_ADDRESS,
854 	VM_MAXUSER_ADDRESS,
855 	USRSTACK,
856 	PS_STRINGS,
857 	VM_PROT_ALL,
858 	exec_copyout_strings,
859 	exec_linux_setregs,
860 	NULL
861 };
862 
863 static Elf32_Brandinfo linux_brand = {
864 					ELFOSABI_LINUX,
865 					EM_386,
866 					"Linux",
867 					"/compat/linux",
868 					"/lib/ld-linux.so.1",
869 					&elf_linux_sysvec,
870 					NULL,
871 				 };
872 
873 static Elf32_Brandinfo linux_glibc2brand = {
874 					ELFOSABI_LINUX,
875 					EM_386,
876 					"Linux",
877 					"/compat/linux",
878 					"/lib/ld-linux.so.2",
879 					&elf_linux_sysvec,
880 					NULL,
881 				 };
882 
883 Elf32_Brandinfo *linux_brandlist[] = {
884 					&linux_brand,
885 					&linux_glibc2brand,
886 					NULL
887 				};
888 
889 static int
890 linux_elf_modevent(module_t mod, int type, void *data)
891 {
892 	Elf32_Brandinfo **brandinfo;
893 	int error;
894 	struct linux_ioctl_handler **lihp;
895 
896 	error = 0;
897 
898 	switch(type) {
899 	case MOD_LOAD:
900 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
901 		     ++brandinfo)
902 			if (elf32_insert_brand_entry(*brandinfo) < 0)
903 				error = EINVAL;
904 		if (error == 0) {
905 			SET_FOREACH(lihp, linux_ioctl_handler_set)
906 				linux_ioctl_register_handler(*lihp);
907 			if (bootverbose)
908 				printf("Linux ELF exec handler installed\n");
909 		} else
910 			printf("cannot insert Linux ELF brand handler\n");
911 		break;
912 	case MOD_UNLOAD:
913 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
914 		     ++brandinfo)
915 			if (elf32_brand_inuse(*brandinfo))
916 				error = EBUSY;
917 		if (error == 0) {
918 			for (brandinfo = &linux_brandlist[0];
919 			     *brandinfo != NULL; ++brandinfo)
920 				if (elf32_remove_brand_entry(*brandinfo) < 0)
921 					error = EINVAL;
922 		}
923 		if (error == 0) {
924 			SET_FOREACH(lihp, linux_ioctl_handler_set)
925 				linux_ioctl_unregister_handler(*lihp);
926 			if (bootverbose)
927 				printf("Linux ELF exec handler removed\n");
928 			linux_mib_destroy();
929 		} else
930 			printf("Could not deinstall ELF interpreter entry\n");
931 		break;
932 	default:
933 		return EOPNOTSUPP;
934 	}
935 	return error;
936 }
937 
938 static moduledata_t linux_elf_mod = {
939 	"linuxelf",
940 	linux_elf_modevent,
941 	0
942 };
943 
944 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
945