xref: /freebsd/sys/i386/linux/linux_sysvec.c (revision 7660b554bc59a07be0431c17e0e33815818baa69)
1 /*-
2  * Copyright (c) 1994-1996 S�ren Schmidt
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software without specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 /* XXX we use functions that might not exist. */
33 #include "opt_compat.h"
34 
35 #ifndef COMPAT_43
36 #error "Unable to compile Linux-emulator due to missing COMPAT_43 option!"
37 #endif
38 
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/imgact.h>
42 #include <sys/imgact_aout.h>
43 #include <sys/imgact_elf.h>
44 #include <sys/lock.h>
45 #include <sys/malloc.h>
46 #include <sys/mutex.h>
47 #include <sys/proc.h>
48 #include <sys/signalvar.h>
49 #include <sys/syscallsubr.h>
50 #include <sys/sysent.h>
51 #include <sys/sysproto.h>
52 #include <sys/user.h>
53 #include <sys/vnode.h>
54 
55 #include <vm/vm.h>
56 #include <vm/vm_param.h>
57 #include <vm/vm_page.h>
58 #include <vm/vm_extern.h>
59 #include <sys/exec.h>
60 #include <sys/kernel.h>
61 #include <sys/module.h>
62 #include <machine/cpu.h>
63 #include <machine/md_var.h>
64 #include <sys/mutex.h>
65 
66 #include <vm/vm.h>
67 #include <vm/vm_param.h>
68 #include <vm/pmap.h>
69 #include <vm/vm_map.h>
70 #include <vm/vm_object.h>
71 
72 #include <i386/linux/linux.h>
73 #include <i386/linux/linux_proto.h>
74 #include <compat/linux/linux_mib.h>
75 #include <compat/linux/linux_signal.h>
76 #include <compat/linux/linux_util.h>
77 
78 MODULE_VERSION(linux, 1);
79 MODULE_DEPEND(linux, sysvmsg, 1, 1, 1);
80 MODULE_DEPEND(linux, sysvsem, 1, 1, 1);
81 MODULE_DEPEND(linux, sysvshm, 1, 1, 1);
82 
83 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
84 
85 #if BYTE_ORDER == LITTLE_ENDIAN
86 #define SHELLMAGIC      0x2123 /* #! */
87 #else
88 #define SHELLMAGIC      0x2321
89 #endif
90 
91 /*
92  * Allow the sendsig functions to use the ldebug() facility
93  * even though they are not syscalls themselves. Map them
94  * to syscall 0. This is slightly less bogus than using
95  * ldebug(sigreturn).
96  */
97 #define	LINUX_SYS_linux_rt_sendsig	0
98 #define	LINUX_SYS_linux_sendsig		0
99 
100 extern char linux_sigcode[];
101 extern int linux_szsigcode;
102 
103 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
104 
105 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
106 
107 static int	linux_fixup(register_t **stack_base,
108 		    struct image_params *iparams);
109 static int	elf_linux_fixup(register_t **stack_base,
110 		    struct image_params *iparams);
111 static void	linux_prepsyscall(struct trapframe *tf, int *args, u_int *code,
112 		    caddr_t *params);
113 static void     linux_sendsig(sig_t catcher, int sig, sigset_t *mask,
114 		    u_long code);
115 static void	exec_linux_setregs(struct thread *td, u_long entry,
116 				   u_long stack, u_long ps_strings);
117 
118 /*
119  * Linux syscalls return negative errno's, we do positive and map them
120  */
121 static int bsd_to_linux_errno[ELAST + 1] = {
122 	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
123 	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
124 	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
125 	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
126 	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
127 	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
128 	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
129 	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
130 	-6, -6, -43, -42, -75, -6, -84
131 };
132 
133 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
134 	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
135 	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
136 	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
137 	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
138 	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
139 	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
140 	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
141 	0, LINUX_SIGUSR1, LINUX_SIGUSR2
142 };
143 
144 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
145 	SIGHUP, SIGINT, SIGQUIT, SIGILL,
146 	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
147 	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
148 	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
149 	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
150 	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
151 	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
152 	SIGIO, SIGURG, SIGSYS
153 };
154 
155 #define LINUX_T_UNKNOWN  255
156 static int _bsd_to_linux_trapcode[] = {
157 	LINUX_T_UNKNOWN,	/* 0 */
158 	6,			/* 1  T_PRIVINFLT */
159 	LINUX_T_UNKNOWN,	/* 2 */
160 	3,			/* 3  T_BPTFLT */
161 	LINUX_T_UNKNOWN,	/* 4 */
162 	LINUX_T_UNKNOWN,	/* 5 */
163 	16,			/* 6  T_ARITHTRAP */
164 	254,			/* 7  T_ASTFLT */
165 	LINUX_T_UNKNOWN,	/* 8 */
166 	13,			/* 9  T_PROTFLT */
167 	1,			/* 10 T_TRCTRAP */
168 	LINUX_T_UNKNOWN,	/* 11 */
169 	14,			/* 12 T_PAGEFLT */
170 	LINUX_T_UNKNOWN,	/* 13 */
171 	17,			/* 14 T_ALIGNFLT */
172 	LINUX_T_UNKNOWN,	/* 15 */
173 	LINUX_T_UNKNOWN,	/* 16 */
174 	LINUX_T_UNKNOWN,	/* 17 */
175 	0,			/* 18 T_DIVIDE */
176 	2,			/* 19 T_NMI */
177 	4,			/* 20 T_OFLOW */
178 	5,			/* 21 T_BOUND */
179 	7,			/* 22 T_DNA */
180 	8,			/* 23 T_DOUBLEFLT */
181 	9,			/* 24 T_FPOPFLT */
182 	10,			/* 25 T_TSSFLT */
183 	11,			/* 26 T_SEGNPFLT */
184 	12,			/* 27 T_STKFLT */
185 	18,			/* 28 T_MCHK */
186 	19,			/* 29 T_XMMFLT */
187 	15			/* 30 T_RESERVED */
188 };
189 #define bsd_to_linux_trapcode(code) \
190     ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
191      _bsd_to_linux_trapcode[(code)]: \
192      LINUX_T_UNKNOWN)
193 
194 /*
195  * If FreeBSD & Linux have a difference of opinion about what a trap
196  * means, deal with it here.
197  *
198  * MPSAFE
199  */
200 static int
201 translate_traps(int signal, int trap_code)
202 {
203 	if (signal != SIGBUS)
204 		return signal;
205 	switch (trap_code) {
206 	case T_PROTFLT:
207 	case T_TSSFLT:
208 	case T_DOUBLEFLT:
209 	case T_PAGEFLT:
210 		return SIGSEGV;
211 	default:
212 		return signal;
213 	}
214 }
215 
216 static int
217 linux_fixup(register_t **stack_base, struct image_params *imgp)
218 {
219 	register_t *argv, *envp;
220 
221 	argv = *stack_base;
222 	envp = *stack_base + (imgp->argc + 1);
223 	(*stack_base)--;
224 	**stack_base = (intptr_t)(void *)envp;
225 	(*stack_base)--;
226 	**stack_base = (intptr_t)(void *)argv;
227 	(*stack_base)--;
228 	**stack_base = imgp->argc;
229 	return 0;
230 }
231 
232 static int
233 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
234 {
235 	Elf32_Auxargs *args;
236 	register_t *pos;
237 
238 	KASSERT(curthread->td_proc == imgp->proc &&
239 	    (curthread->td_proc->p_flag & P_SA) == 0,
240 	    ("unsafe elf_linux_fixup(), should be curproc"));
241 	args = (Elf32_Auxargs *)imgp->auxargs;
242 	pos = *stack_base + (imgp->argc + imgp->envc + 2);
243 
244 	if (args->trace)
245 		AUXARGS_ENTRY(pos, AT_DEBUG, 1);
246 	if (args->execfd != -1)
247 		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
248 	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
249 	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
250 	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
251 	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
252 	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
253 	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
254 	AUXARGS_ENTRY(pos, AT_BASE, args->base);
255 	AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
256 	AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
257 	AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
258 	AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
259 	AUXARGS_ENTRY(pos, AT_NULL, 0);
260 
261 	free(imgp->auxargs, M_TEMP);
262 	imgp->auxargs = NULL;
263 
264 	(*stack_base)--;
265 	**stack_base = (register_t)imgp->argc;
266 	return 0;
267 }
268 
269 extern int _ucodesel, _udatasel;
270 extern unsigned long linux_sznonrtsigcode;
271 
272 static void
273 linux_rt_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
274 {
275 	struct thread *td = curthread;
276 	struct proc *p = td->td_proc;
277 	struct sigacts *psp;
278 	struct trapframe *regs;
279 	struct l_rt_sigframe *fp, frame;
280 	int oonstack;
281 
282 	PROC_LOCK_ASSERT(p, MA_OWNED);
283 	psp = p->p_sigacts;
284 	mtx_assert(&psp->ps_mtx, MA_OWNED);
285 	regs = td->td_frame;
286 	oonstack = sigonstack(regs->tf_esp);
287 
288 #ifdef DEBUG
289 	if (ldebug(rt_sendsig))
290 		printf(ARGS(rt_sendsig, "%p, %d, %p, %lu"),
291 		    catcher, sig, (void*)mask, code);
292 #endif
293 	/*
294 	 * Allocate space for the signal handler context.
295 	 */
296 	if ((p->p_flag & P_ALTSTACK) && !oonstack &&
297 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
298 		fp = (struct l_rt_sigframe *)(p->p_sigstk.ss_sp +
299 		    p->p_sigstk.ss_size - sizeof(struct l_rt_sigframe));
300 	} else
301 		fp = (struct l_rt_sigframe *)regs->tf_esp - 1;
302 	mtx_unlock(&psp->ps_mtx);
303 
304 	/*
305 	 * Build the argument list for the signal handler.
306 	 */
307 	if (p->p_sysent->sv_sigtbl)
308 		if (sig <= p->p_sysent->sv_sigsize)
309 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
310 
311 	bzero(&frame, sizeof(frame));
312 
313 	frame.sf_handler = catcher;
314 	frame.sf_sig = sig;
315 	frame.sf_siginfo = &fp->sf_si;
316 	frame.sf_ucontext = &fp->sf_sc;
317 
318 	/* Fill in POSIX parts */
319 	frame.sf_si.lsi_signo = sig;
320 	frame.sf_si.lsi_code = code;
321 	frame.sf_si.lsi_addr = (void *)regs->tf_err;
322 
323 	/*
324 	 * Build the signal context to be used by sigreturn.
325 	 */
326 	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
327 	frame.sf_sc.uc_link = NULL;		/* XXX ??? */
328 
329 	frame.sf_sc.uc_stack.ss_sp = p->p_sigstk.ss_sp;
330 	frame.sf_sc.uc_stack.ss_size = p->p_sigstk.ss_size;
331 	frame.sf_sc.uc_stack.ss_flags = (p->p_flag & P_ALTSTACK)
332 	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
333 	PROC_UNLOCK(p);
334 
335 	bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
336 
337 	frame.sf_sc.uc_mcontext.sc_mask   = frame.sf_sc.uc_sigmask.__bits[0];
338 	frame.sf_sc.uc_mcontext.sc_gs     = rgs();
339 	frame.sf_sc.uc_mcontext.sc_fs     = regs->tf_fs;
340 	frame.sf_sc.uc_mcontext.sc_es     = regs->tf_es;
341 	frame.sf_sc.uc_mcontext.sc_ds     = regs->tf_ds;
342 	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_edi;
343 	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_esi;
344 	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_ebp;
345 	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_ebx;
346 	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_edx;
347 	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_ecx;
348 	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_eax;
349 	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_eip;
350 	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
351 	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
352 	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
353 	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
354 	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
355 	frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
356 
357 #ifdef DEBUG
358 	if (ldebug(rt_sendsig))
359 		printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
360 		    frame.sf_sc.uc_stack.ss_flags, p->p_sigstk.ss_sp,
361 		    p->p_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
362 #endif
363 
364 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
365 		/*
366 		 * Process has trashed its stack; give it an illegal
367 		 * instruction to halt it in its tracks.
368 		 */
369 #ifdef DEBUG
370 		if (ldebug(rt_sendsig))
371 			printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
372 			    fp, oonstack);
373 #endif
374 		PROC_LOCK(p);
375 		sigexit(td, SIGILL);
376 	}
377 
378 	/*
379 	 * Build context to run handler in.
380 	 */
381 	regs->tf_esp = (int)fp;
382 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
383 	    linux_sznonrtsigcode;
384 	regs->tf_eflags &= ~(PSL_T | PSL_VM);
385 	regs->tf_cs = _ucodesel;
386 	regs->tf_ds = _udatasel;
387 	regs->tf_es = _udatasel;
388 	regs->tf_fs = _udatasel;
389 	regs->tf_ss = _udatasel;
390 	PROC_LOCK(p);
391 	mtx_lock(&psp->ps_mtx);
392 }
393 
394 
395 /*
396  * Send an interrupt to process.
397  *
398  * Stack is set up to allow sigcode stored
399  * in u. to call routine, followed by kcall
400  * to sigreturn routine below.  After sigreturn
401  * resets the signal mask, the stack, and the
402  * frame pointer, it returns to the user
403  * specified pc, psl.
404  */
405 static void
406 linux_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
407 {
408 	struct thread *td = curthread;
409 	struct proc *p = td->td_proc;
410 	struct sigacts *psp;
411 	struct trapframe *regs;
412 	struct l_sigframe *fp, frame;
413 	l_sigset_t lmask;
414 	int oonstack, i;
415 
416 	PROC_LOCK_ASSERT(p, MA_OWNED);
417 	psp = p->p_sigacts;
418 	mtx_assert(&psp->ps_mtx, MA_OWNED);
419 	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
420 		/* Signal handler installed with SA_SIGINFO. */
421 		linux_rt_sendsig(catcher, sig, mask, code);
422 		return;
423 	}
424 
425 	regs = td->td_frame;
426 	oonstack = sigonstack(regs->tf_esp);
427 
428 #ifdef DEBUG
429 	if (ldebug(sendsig))
430 		printf(ARGS(sendsig, "%p, %d, %p, %lu"),
431 		    catcher, sig, (void*)mask, code);
432 #endif
433 
434 	/*
435 	 * Allocate space for the signal handler context.
436 	 */
437 	if ((p->p_flag & P_ALTSTACK) && !oonstack &&
438 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
439 		fp = (struct l_sigframe *)(p->p_sigstk.ss_sp +
440 		    p->p_sigstk.ss_size - sizeof(struct l_sigframe));
441 	} else
442 		fp = (struct l_sigframe *)regs->tf_esp - 1;
443 	mtx_unlock(&psp->ps_mtx);
444 	PROC_UNLOCK(p);
445 
446 	/*
447 	 * Build the argument list for the signal handler.
448 	 */
449 	if (p->p_sysent->sv_sigtbl)
450 		if (sig <= p->p_sysent->sv_sigsize)
451 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
452 
453 	bzero(&frame, sizeof(frame));
454 
455 	frame.sf_handler = catcher;
456 	frame.sf_sig = sig;
457 
458 	bsd_to_linux_sigset(mask, &lmask);
459 
460 	/*
461 	 * Build the signal context to be used by sigreturn.
462 	 */
463 	frame.sf_sc.sc_mask   = lmask.__bits[0];
464 	frame.sf_sc.sc_gs     = rgs();
465 	frame.sf_sc.sc_fs     = regs->tf_fs;
466 	frame.sf_sc.sc_es     = regs->tf_es;
467 	frame.sf_sc.sc_ds     = regs->tf_ds;
468 	frame.sf_sc.sc_edi    = regs->tf_edi;
469 	frame.sf_sc.sc_esi    = regs->tf_esi;
470 	frame.sf_sc.sc_ebp    = regs->tf_ebp;
471 	frame.sf_sc.sc_ebx    = regs->tf_ebx;
472 	frame.sf_sc.sc_edx    = regs->tf_edx;
473 	frame.sf_sc.sc_ecx    = regs->tf_ecx;
474 	frame.sf_sc.sc_eax    = regs->tf_eax;
475 	frame.sf_sc.sc_eip    = regs->tf_eip;
476 	frame.sf_sc.sc_cs     = regs->tf_cs;
477 	frame.sf_sc.sc_eflags = regs->tf_eflags;
478 	frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
479 	frame.sf_sc.sc_ss     = regs->tf_ss;
480 	frame.sf_sc.sc_err    = regs->tf_err;
481 	frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code);
482 
483 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
484 		frame.sf_extramask[i] = lmask.__bits[i+1];
485 
486 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
487 		/*
488 		 * Process has trashed its stack; give it an illegal
489 		 * instruction to halt it in its tracks.
490 		 */
491 		PROC_LOCK(p);
492 		sigexit(td, SIGILL);
493 	}
494 
495 	/*
496 	 * Build context to run handler in.
497 	 */
498 	regs->tf_esp = (int)fp;
499 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
500 	regs->tf_eflags &= ~(PSL_T | PSL_VM);
501 	regs->tf_cs = _ucodesel;
502 	regs->tf_ds = _udatasel;
503 	regs->tf_es = _udatasel;
504 	regs->tf_fs = _udatasel;
505 	regs->tf_ss = _udatasel;
506 	PROC_LOCK(p);
507 	mtx_lock(&psp->ps_mtx);
508 }
509 
510 /*
511  * System call to cleanup state after a signal
512  * has been taken.  Reset signal mask and
513  * stack state from context left by sendsig (above).
514  * Return to previous pc and psl as specified by
515  * context left by sendsig. Check carefully to
516  * make sure that the user has not modified the
517  * psl to gain improper privileges or to cause
518  * a machine fault.
519  */
520 int
521 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
522 {
523 	struct proc *p = td->td_proc;
524 	struct l_sigframe frame;
525 	struct trapframe *regs;
526 	l_sigset_t lmask;
527 	int eflags, i;
528 
529 	regs = td->td_frame;
530 
531 #ifdef DEBUG
532 	if (ldebug(sigreturn))
533 		printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
534 #endif
535 	/*
536 	 * The trampoline code hands us the sigframe.
537 	 * It is unsafe to keep track of it ourselves, in the event that a
538 	 * program jumps out of a signal handler.
539 	 */
540 	if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
541 		return (EFAULT);
542 
543 	/*
544 	 * Check for security violations.
545 	 */
546 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
547 	eflags = frame.sf_sc.sc_eflags;
548 	/*
549 	 * XXX do allow users to change the privileged flag PSL_RF.  The
550 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
551 	 * sometimes set it there too.  tf_eflags is kept in the signal
552 	 * context during signal handling and there is no other place
553 	 * to remember it, so the PSL_RF bit may be corrupted by the
554 	 * signal handler without us knowing.  Corruption of the PSL_RF
555 	 * bit at worst causes one more or one less debugger trap, so
556 	 * allowing it is fairly harmless.
557 	 */
558 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF))
559 		return(EINVAL);
560 
561 	/*
562 	 * Don't allow users to load a valid privileged %cs.  Let the
563 	 * hardware check for invalid selectors, excess privilege in
564 	 * other selectors, invalid %eip's and invalid %esp's.
565 	 */
566 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
567 	if (!CS_SECURE(frame.sf_sc.sc_cs)) {
568 		trapsignal(td, SIGBUS, T_PROTFLT);
569 		return(EINVAL);
570 	}
571 
572 	lmask.__bits[0] = frame.sf_sc.sc_mask;
573 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
574 		lmask.__bits[i+1] = frame.sf_extramask[i];
575 	PROC_LOCK(p);
576 	linux_to_bsd_sigset(&lmask, &td->td_sigmask);
577 	SIG_CANTMASK(td->td_sigmask);
578 	signotify(td);
579 	PROC_UNLOCK(p);
580 
581 	/*
582 	 * Restore signal context.
583 	 */
584 	/* %gs was restored by the trampoline. */
585 	regs->tf_fs     = frame.sf_sc.sc_fs;
586 	regs->tf_es     = frame.sf_sc.sc_es;
587 	regs->tf_ds     = frame.sf_sc.sc_ds;
588 	regs->tf_edi    = frame.sf_sc.sc_edi;
589 	regs->tf_esi    = frame.sf_sc.sc_esi;
590 	regs->tf_ebp    = frame.sf_sc.sc_ebp;
591 	regs->tf_ebx    = frame.sf_sc.sc_ebx;
592 	regs->tf_edx    = frame.sf_sc.sc_edx;
593 	regs->tf_ecx    = frame.sf_sc.sc_ecx;
594 	regs->tf_eax    = frame.sf_sc.sc_eax;
595 	regs->tf_eip    = frame.sf_sc.sc_eip;
596 	regs->tf_cs     = frame.sf_sc.sc_cs;
597 	regs->tf_eflags = eflags;
598 	regs->tf_esp    = frame.sf_sc.sc_esp_at_signal;
599 	regs->tf_ss     = frame.sf_sc.sc_ss;
600 
601 	return (EJUSTRETURN);
602 }
603 
604 /*
605  * System call to cleanup state after a signal
606  * has been taken.  Reset signal mask and
607  * stack state from context left by rt_sendsig (above).
608  * Return to previous pc and psl as specified by
609  * context left by sendsig. Check carefully to
610  * make sure that the user has not modified the
611  * psl to gain improper privileges or to cause
612  * a machine fault.
613  */
614 int
615 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
616 {
617 	struct proc *p = td->td_proc;
618 	struct l_ucontext uc;
619 	struct l_sigcontext *context;
620 	l_stack_t *lss;
621 	stack_t ss;
622 	struct trapframe *regs;
623 	int eflags;
624 
625 	regs = td->td_frame;
626 
627 #ifdef DEBUG
628 	if (ldebug(rt_sigreturn))
629 		printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
630 #endif
631 	/*
632 	 * The trampoline code hands us the ucontext.
633 	 * It is unsafe to keep track of it ourselves, in the event that a
634 	 * program jumps out of a signal handler.
635 	 */
636 	if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
637 		return (EFAULT);
638 
639 	context = &uc.uc_mcontext;
640 
641 	/*
642 	 * Check for security violations.
643 	 */
644 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
645 	eflags = context->sc_eflags;
646 	/*
647 	 * XXX do allow users to change the privileged flag PSL_RF.  The
648 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
649 	 * sometimes set it there too.  tf_eflags is kept in the signal
650 	 * context during signal handling and there is no other place
651 	 * to remember it, so the PSL_RF bit may be corrupted by the
652 	 * signal handler without us knowing.  Corruption of the PSL_RF
653 	 * bit at worst causes one more or one less debugger trap, so
654 	 * allowing it is fairly harmless.
655 	 */
656 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF))
657 		return(EINVAL);
658 
659 	/*
660 	 * Don't allow users to load a valid privileged %cs.  Let the
661 	 * hardware check for invalid selectors, excess privilege in
662 	 * other selectors, invalid %eip's and invalid %esp's.
663 	 */
664 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
665 	if (!CS_SECURE(context->sc_cs)) {
666 		trapsignal(td, SIGBUS, T_PROTFLT);
667 		return(EINVAL);
668 	}
669 
670 	PROC_LOCK(p);
671 	linux_to_bsd_sigset(&uc.uc_sigmask, &td->td_sigmask);
672 	SIG_CANTMASK(td->td_sigmask);
673 	signotify(td);
674 	PROC_UNLOCK(p);
675 
676 	/*
677 	 * Restore signal context
678 	 */
679 	/* %gs was restored by the trampoline. */
680 	regs->tf_fs     = context->sc_fs;
681 	regs->tf_es     = context->sc_es;
682 	regs->tf_ds     = context->sc_ds;
683 	regs->tf_edi    = context->sc_edi;
684 	regs->tf_esi    = context->sc_esi;
685 	regs->tf_ebp    = context->sc_ebp;
686 	regs->tf_ebx    = context->sc_ebx;
687 	regs->tf_edx    = context->sc_edx;
688 	regs->tf_ecx    = context->sc_ecx;
689 	regs->tf_eax    = context->sc_eax;
690 	regs->tf_eip    = context->sc_eip;
691 	regs->tf_cs     = context->sc_cs;
692 	regs->tf_eflags = eflags;
693 	regs->tf_esp    = context->sc_esp_at_signal;
694 	regs->tf_ss     = context->sc_ss;
695 
696 	/*
697 	 * call sigaltstack & ignore results..
698 	 */
699 	lss = &uc.uc_stack;
700 	ss.ss_sp = lss->ss_sp;
701 	ss.ss_size = lss->ss_size;
702 	ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
703 
704 #ifdef DEBUG
705 	if (ldebug(rt_sigreturn))
706 		printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
707 		    ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
708 #endif
709 	(void)kern_sigaltstack(td, &ss, NULL);
710 
711 	return (EJUSTRETURN);
712 }
713 
714 /*
715  * MPSAFE
716  */
717 static void
718 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
719 {
720 	args[0] = tf->tf_ebx;
721 	args[1] = tf->tf_ecx;
722 	args[2] = tf->tf_edx;
723 	args[3] = tf->tf_esi;
724 	args[4] = tf->tf_edi;
725 	args[5] = tf->tf_ebp;	/* Unconfirmed */
726 	*params = NULL;		/* no copyin */
727 }
728 
729 
730 
731 /*
732  * Dump core, into a file named as described in the comments for
733  * expand_name(), unless the process was setuid/setgid.
734  */
735 static int
736 linux_aout_coredump(struct thread *td, struct vnode *vp, off_t limit)
737 {
738 	struct proc *p = td->td_proc;
739 	struct ucred *cred = td->td_ucred;
740 	struct vmspace *vm = p->p_vmspace;
741 	char *tempuser;
742 	int error;
743 
744 	if (ctob((uarea_pages + kstack_pages) +
745 	    vm->vm_dsize + vm->vm_ssize) >= limit)
746 		return (EFAULT);
747 	tempuser = malloc(ctob(uarea_pages + kstack_pages), M_TEMP,
748 	    M_WAITOK | M_ZERO);
749 	if (tempuser == NULL)
750 		return (ENOMEM);
751 	PROC_LOCK(p);
752 	fill_kinfo_proc(p, &p->p_uarea->u_kproc);
753 	PROC_UNLOCK(p);
754 	bcopy(p->p_uarea, tempuser, sizeof(struct user));
755 	bcopy(td->td_frame,
756 	    tempuser + ctob(uarea_pages) +
757 	    ((caddr_t)td->td_frame - (caddr_t)td->td_kstack),
758 	    sizeof(struct trapframe));
759 	error = vn_rdwr(UIO_WRITE, vp, (caddr_t)tempuser,
760 	    ctob(uarea_pages + kstack_pages),
761 	    (off_t)0, UIO_SYSSPACE, IO_UNIT, cred, NOCRED,
762 	    (int *)NULL, td);
763 	free(tempuser, M_TEMP);
764 	if (error == 0)
765 		error = vn_rdwr(UIO_WRITE, vp, vm->vm_daddr,
766 		    (int)ctob(vm->vm_dsize),
767 		    (off_t)ctob(uarea_pages + kstack_pages), UIO_USERSPACE,
768 		    IO_UNIT | IO_DIRECT, cred, NOCRED, (int *) NULL, td);
769 	if (error == 0)
770 		error = vn_rdwr_inchunks(UIO_WRITE, vp,
771 		    (caddr_t)trunc_page(USRSTACK - ctob(vm->vm_ssize)),
772 		    round_page(ctob(vm->vm_ssize)),
773 		    (off_t)ctob(uarea_pages + kstack_pages) +
774 			ctob(vm->vm_dsize), UIO_USERSPACE,
775 		    IO_UNIT | IO_DIRECT, cred, NOCRED, (int *) NULL, td);
776 	return (error);
777 }
778 /*
779  * If a linux binary is exec'ing something, try this image activator
780  * first.  We override standard shell script execution in order to
781  * be able to modify the interpreter path.  We only do this if a linux
782  * binary is doing the exec, so we do not create an EXEC module for it.
783  */
784 static int	exec_linux_imgact_try(struct image_params *iparams);
785 
786 static int
787 exec_linux_imgact_try(struct image_params *imgp)
788 {
789     const char *head = (const char *)imgp->image_header;
790     int error = -1;
791 
792     /*
793      * The interpreter for shell scripts run from a linux binary needs
794      * to be located in /compat/linux if possible in order to recursively
795      * maintain linux path emulation.
796      */
797     if (((const short *)head)[0] == SHELLMAGIC) {
798 	    /*
799 	     * Run our normal shell image activator.  If it succeeds attempt
800 	     * to use the alternate path for the interpreter.  If an alternate
801 	     * path is found, use our stringspace to store it.
802 	     */
803 	    if ((error = exec_shell_imgact(imgp)) == 0) {
804 		    char *rpath = NULL;
805 
806 		    linux_emul_find(FIRST_THREAD_IN_PROC(imgp->proc), NULL,
807 			imgp->interpreter_name, &rpath, 0);
808 		    if (rpath != imgp->interpreter_name) {
809 			    int len = strlen(rpath) + 1;
810 
811 			    if (len <= MAXSHELLCMDLEN) {
812 				    memcpy(imgp->interpreter_name, rpath, len);
813 			    }
814 			    free(rpath, M_TEMP);
815 		    }
816 	    }
817     }
818     return(error);
819 }
820 
821 /*
822  * exec_setregs may initialize some registers differently than Linux
823  * does, thus potentially confusing Linux binaries. If necessary, we
824  * override the exec_setregs default(s) here.
825  */
826 static void
827 exec_linux_setregs(struct thread *td, u_long entry,
828 		   u_long stack, u_long ps_strings)
829 {
830 	struct pcb *pcb = td->td_pcb;
831 
832 	exec_setregs(td, entry, stack, ps_strings);
833 
834 	/* Linux sets %gs to 0, we default to _udatasel */
835 	pcb->pcb_gs = 0; load_gs(0);
836 }
837 
838 struct sysentvec linux_sysvec = {
839 	LINUX_SYS_MAXSYSCALL,
840 	linux_sysent,
841 	0xff,
842 	LINUX_SIGTBLSZ,
843 	bsd_to_linux_signal,
844 	ELAST + 1,
845 	bsd_to_linux_errno,
846 	translate_traps,
847 	linux_fixup,
848 	linux_sendsig,
849 	linux_sigcode,
850 	&linux_szsigcode,
851 	linux_prepsyscall,
852 	"Linux a.out",
853 	linux_aout_coredump,
854 	exec_linux_imgact_try,
855 	LINUX_MINSIGSTKSZ,
856 	PAGE_SIZE,
857 	VM_MIN_ADDRESS,
858 	VM_MAXUSER_ADDRESS,
859 	USRSTACK,
860 	PS_STRINGS,
861 	VM_PROT_ALL,
862 	exec_copyout_strings,
863 	exec_linux_setregs
864 };
865 
866 struct sysentvec elf_linux_sysvec = {
867 	LINUX_SYS_MAXSYSCALL,
868 	linux_sysent,
869 	0xff,
870 	LINUX_SIGTBLSZ,
871 	bsd_to_linux_signal,
872 	ELAST + 1,
873 	bsd_to_linux_errno,
874 	translate_traps,
875 	elf_linux_fixup,
876 	linux_sendsig,
877 	linux_sigcode,
878 	&linux_szsigcode,
879 	linux_prepsyscall,
880 	"Linux ELF",
881 	elf32_coredump,
882 	exec_linux_imgact_try,
883 	LINUX_MINSIGSTKSZ,
884 	PAGE_SIZE,
885 	VM_MIN_ADDRESS,
886 	VM_MAXUSER_ADDRESS,
887 	USRSTACK,
888 	PS_STRINGS,
889 	VM_PROT_ALL,
890 	exec_copyout_strings,
891 	exec_linux_setregs
892 };
893 
894 static Elf32_Brandinfo linux_brand = {
895 					ELFOSABI_LINUX,
896 					EM_386,
897 					"Linux",
898 					"/compat/linux",
899 					"/lib/ld-linux.so.1",
900 					&elf_linux_sysvec
901 				 };
902 
903 static Elf32_Brandinfo linux_glibc2brand = {
904 					ELFOSABI_LINUX,
905 					EM_386,
906 					"Linux",
907 					"/compat/linux",
908 					"/lib/ld-linux.so.2",
909 					&elf_linux_sysvec
910 				 };
911 
912 Elf32_Brandinfo *linux_brandlist[] = {
913 					&linux_brand,
914 					&linux_glibc2brand,
915 					NULL
916 				};
917 
918 static int
919 linux_elf_modevent(module_t mod, int type, void *data)
920 {
921 	Elf32_Brandinfo **brandinfo;
922 	int error;
923 	struct linux_ioctl_handler **lihp;
924 
925 	error = 0;
926 
927 	switch(type) {
928 	case MOD_LOAD:
929 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
930 		     ++brandinfo)
931 			if (elf32_insert_brand_entry(*brandinfo) < 0)
932 				error = EINVAL;
933 		if (error == 0) {
934 			SET_FOREACH(lihp, linux_ioctl_handler_set)
935 				linux_ioctl_register_handler(*lihp);
936 			if (bootverbose)
937 				printf("Linux ELF exec handler installed\n");
938 		} else
939 			printf("cannot insert Linux ELF brand handler\n");
940 		break;
941 	case MOD_UNLOAD:
942 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
943 		     ++brandinfo)
944 			if (elf32_brand_inuse(*brandinfo))
945 				error = EBUSY;
946 		if (error == 0) {
947 			for (brandinfo = &linux_brandlist[0];
948 			     *brandinfo != NULL; ++brandinfo)
949 				if (elf32_remove_brand_entry(*brandinfo) < 0)
950 					error = EINVAL;
951 		}
952 		if (error == 0) {
953 			SET_FOREACH(lihp, linux_ioctl_handler_set)
954 				linux_ioctl_unregister_handler(*lihp);
955 			if (bootverbose)
956 				printf("Linux ELF exec handler removed\n");
957 			linux_mib_destroy();
958 		} else
959 			printf("Could not deinstall ELF interpreter entry\n");
960 		break;
961 	default:
962 		break;
963 	}
964 	return error;
965 }
966 
967 static moduledata_t linux_elf_mod = {
968 	"linuxelf",
969 	linux_elf_modevent,
970 	0
971 };
972 
973 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
974