xref: /freebsd/sys/i386/linux/linux_sysvec.c (revision 3507a1571359e36fafd825c6a5e7574e50a58702)
1 /*-
2  * Copyright (c) 1994-1996 S�ren Schmidt
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software without specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 /* XXX we use functions that might not exist. */
33 #include "opt_compat.h"
34 
35 #ifndef COMPAT_43
36 #error "Unable to compile Linux-emulator due to missing COMPAT_43 option!"
37 #endif
38 
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/exec.h>
42 #include <sys/imgact.h>
43 #include <sys/imgact_aout.h>
44 #include <sys/imgact_elf.h>
45 #include <sys/kernel.h>
46 #include <sys/lock.h>
47 #include <sys/malloc.h>
48 #include <sys/module.h>
49 #include <sys/mutex.h>
50 #include <sys/proc.h>
51 #include <sys/signalvar.h>
52 #include <sys/syscallsubr.h>
53 #include <sys/sysent.h>
54 #include <sys/sysproto.h>
55 #include <sys/vnode.h>
56 
57 #include <vm/vm.h>
58 #include <vm/pmap.h>
59 #include <vm/vm_extern.h>
60 #include <vm/vm_map.h>
61 #include <vm/vm_object.h>
62 #include <vm/vm_page.h>
63 #include <vm/vm_param.h>
64 
65 #include <machine/cpu.h>
66 #include <machine/md_var.h>
67 #include <machine/pcb.h>
68 
69 #include <i386/linux/linux.h>
70 #include <i386/linux/linux_proto.h>
71 #include <compat/linux/linux_mib.h>
72 #include <compat/linux/linux_signal.h>
73 #include <compat/linux/linux_util.h>
74 
75 MODULE_VERSION(linux, 1);
76 
77 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
78 
79 #if BYTE_ORDER == LITTLE_ENDIAN
80 #define SHELLMAGIC      0x2123 /* #! */
81 #else
82 #define SHELLMAGIC      0x2321
83 #endif
84 
85 /*
86  * Allow the sendsig functions to use the ldebug() facility
87  * even though they are not syscalls themselves. Map them
88  * to syscall 0. This is slightly less bogus than using
89  * ldebug(sigreturn).
90  */
91 #define	LINUX_SYS_linux_rt_sendsig	0
92 #define	LINUX_SYS_linux_sendsig		0
93 
94 #define	fldcw(addr)		__asm("fldcw %0" : : "m" (*(addr)))
95 #define	__LINUX_NPXCW__		0x37f
96 
97 extern char linux_sigcode[];
98 extern int linux_szsigcode;
99 
100 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
101 
102 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
103 
104 static int	linux_fixup(register_t **stack_base,
105 		    struct image_params *iparams);
106 static int	elf_linux_fixup(register_t **stack_base,
107 		    struct image_params *iparams);
108 static void	linux_prepsyscall(struct trapframe *tf, int *args, u_int *code,
109 		    caddr_t *params);
110 static void     linux_sendsig(sig_t catcher, int sig, sigset_t *mask,
111 		    u_long code);
112 static void	exec_linux_setregs(struct thread *td, u_long entry,
113 				   u_long stack, u_long ps_strings);
114 
115 /*
116  * Linux syscalls return negative errno's, we do positive and map them
117  */
118 static int bsd_to_linux_errno[ELAST + 1] = {
119 	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
120 	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
121 	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
122 	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
123 	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
124 	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
125 	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
126 	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
127 	-6, -6, -43, -42, -75, -6, -84
128 };
129 
130 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
131 	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
132 	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
133 	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
134 	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
135 	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
136 	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
137 	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
138 	0, LINUX_SIGUSR1, LINUX_SIGUSR2
139 };
140 
141 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
142 	SIGHUP, SIGINT, SIGQUIT, SIGILL,
143 	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
144 	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
145 	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
146 	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
147 	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
148 	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
149 	SIGIO, SIGURG, SIGSYS
150 };
151 
152 #define LINUX_T_UNKNOWN  255
153 static int _bsd_to_linux_trapcode[] = {
154 	LINUX_T_UNKNOWN,	/* 0 */
155 	6,			/* 1  T_PRIVINFLT */
156 	LINUX_T_UNKNOWN,	/* 2 */
157 	3,			/* 3  T_BPTFLT */
158 	LINUX_T_UNKNOWN,	/* 4 */
159 	LINUX_T_UNKNOWN,	/* 5 */
160 	16,			/* 6  T_ARITHTRAP */
161 	254,			/* 7  T_ASTFLT */
162 	LINUX_T_UNKNOWN,	/* 8 */
163 	13,			/* 9  T_PROTFLT */
164 	1,			/* 10 T_TRCTRAP */
165 	LINUX_T_UNKNOWN,	/* 11 */
166 	14,			/* 12 T_PAGEFLT */
167 	LINUX_T_UNKNOWN,	/* 13 */
168 	17,			/* 14 T_ALIGNFLT */
169 	LINUX_T_UNKNOWN,	/* 15 */
170 	LINUX_T_UNKNOWN,	/* 16 */
171 	LINUX_T_UNKNOWN,	/* 17 */
172 	0,			/* 18 T_DIVIDE */
173 	2,			/* 19 T_NMI */
174 	4,			/* 20 T_OFLOW */
175 	5,			/* 21 T_BOUND */
176 	7,			/* 22 T_DNA */
177 	8,			/* 23 T_DOUBLEFLT */
178 	9,			/* 24 T_FPOPFLT */
179 	10,			/* 25 T_TSSFLT */
180 	11,			/* 26 T_SEGNPFLT */
181 	12,			/* 27 T_STKFLT */
182 	18,			/* 28 T_MCHK */
183 	19,			/* 29 T_XMMFLT */
184 	15			/* 30 T_RESERVED */
185 };
186 #define bsd_to_linux_trapcode(code) \
187     ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
188      _bsd_to_linux_trapcode[(code)]: \
189      LINUX_T_UNKNOWN)
190 
191 /*
192  * If FreeBSD & Linux have a difference of opinion about what a trap
193  * means, deal with it here.
194  *
195  * MPSAFE
196  */
197 static int
198 translate_traps(int signal, int trap_code)
199 {
200 	if (signal != SIGBUS)
201 		return signal;
202 	switch (trap_code) {
203 	case T_PROTFLT:
204 	case T_TSSFLT:
205 	case T_DOUBLEFLT:
206 	case T_PAGEFLT:
207 		return SIGSEGV;
208 	default:
209 		return signal;
210 	}
211 }
212 
213 static int
214 linux_fixup(register_t **stack_base, struct image_params *imgp)
215 {
216 	register_t *argv, *envp;
217 
218 	argv = *stack_base;
219 	envp = *stack_base + (imgp->args->argc + 1);
220 	(*stack_base)--;
221 	**stack_base = (intptr_t)(void *)envp;
222 	(*stack_base)--;
223 	**stack_base = (intptr_t)(void *)argv;
224 	(*stack_base)--;
225 	**stack_base = imgp->args->argc;
226 	return 0;
227 }
228 
229 static int
230 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
231 {
232 	Elf32_Auxargs *args;
233 	register_t *pos;
234 
235 	KASSERT(curthread->td_proc == imgp->proc &&
236 	    (curthread->td_proc->p_flag & P_SA) == 0,
237 	    ("unsafe elf_linux_fixup(), should be curproc"));
238 	args = (Elf32_Auxargs *)imgp->auxargs;
239 	pos = *stack_base + (imgp->args->argc + imgp->args->envc + 2);
240 
241 	if (args->trace)
242 		AUXARGS_ENTRY(pos, AT_DEBUG, 1);
243 	if (args->execfd != -1)
244 		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
245 	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
246 	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
247 	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
248 	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
249 	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
250 	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
251 	AUXARGS_ENTRY(pos, AT_BASE, args->base);
252 	AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
253 	AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
254 	AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
255 	AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
256 	AUXARGS_ENTRY(pos, AT_NULL, 0);
257 
258 	free(imgp->auxargs, M_TEMP);
259 	imgp->auxargs = NULL;
260 
261 	(*stack_base)--;
262 	**stack_base = (register_t)imgp->args->argc;
263 	return 0;
264 }
265 
266 extern int _ucodesel, _udatasel;
267 extern unsigned long linux_sznonrtsigcode;
268 
269 static void
270 linux_rt_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
271 {
272 	struct thread *td = curthread;
273 	struct proc *p = td->td_proc;
274 	struct sigacts *psp;
275 	struct trapframe *regs;
276 	struct l_rt_sigframe *fp, frame;
277 	int oonstack;
278 
279 	PROC_LOCK_ASSERT(p, MA_OWNED);
280 	psp = p->p_sigacts;
281 	mtx_assert(&psp->ps_mtx, MA_OWNED);
282 	regs = td->td_frame;
283 	oonstack = sigonstack(regs->tf_esp);
284 
285 #ifdef DEBUG
286 	if (ldebug(rt_sendsig))
287 		printf(ARGS(rt_sendsig, "%p, %d, %p, %lu"),
288 		    catcher, sig, (void*)mask, code);
289 #endif
290 	/*
291 	 * Allocate space for the signal handler context.
292 	 */
293 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
294 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
295 		fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
296 		    td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
297 	} else
298 		fp = (struct l_rt_sigframe *)regs->tf_esp - 1;
299 	mtx_unlock(&psp->ps_mtx);
300 
301 	/*
302 	 * Build the argument list for the signal handler.
303 	 */
304 	if (p->p_sysent->sv_sigtbl)
305 		if (sig <= p->p_sysent->sv_sigsize)
306 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
307 
308 	bzero(&frame, sizeof(frame));
309 
310 	frame.sf_handler = catcher;
311 	frame.sf_sig = sig;
312 	frame.sf_siginfo = &fp->sf_si;
313 	frame.sf_ucontext = &fp->sf_sc;
314 
315 	/* Fill in POSIX parts */
316 	frame.sf_si.lsi_signo = sig;
317 	frame.sf_si.lsi_code = code;
318 	frame.sf_si.lsi_addr = (void *)regs->tf_err;
319 
320 	/*
321 	 * Build the signal context to be used by sigreturn.
322 	 */
323 	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
324 	frame.sf_sc.uc_link = NULL;		/* XXX ??? */
325 
326 	frame.sf_sc.uc_stack.ss_sp = td->td_sigstk.ss_sp;
327 	frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
328 	frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
329 	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
330 	PROC_UNLOCK(p);
331 
332 	bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
333 
334 	frame.sf_sc.uc_mcontext.sc_mask   = frame.sf_sc.uc_sigmask.__bits[0];
335 	frame.sf_sc.uc_mcontext.sc_gs     = rgs();
336 	frame.sf_sc.uc_mcontext.sc_fs     = regs->tf_fs;
337 	frame.sf_sc.uc_mcontext.sc_es     = regs->tf_es;
338 	frame.sf_sc.uc_mcontext.sc_ds     = regs->tf_ds;
339 	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_edi;
340 	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_esi;
341 	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_ebp;
342 	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_ebx;
343 	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_edx;
344 	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_ecx;
345 	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_eax;
346 	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_eip;
347 	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
348 	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
349 	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
350 	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
351 	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
352 	frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
353 
354 #ifdef DEBUG
355 	if (ldebug(rt_sendsig))
356 		printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
357 		    frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
358 		    td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
359 #endif
360 
361 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
362 		/*
363 		 * Process has trashed its stack; give it an illegal
364 		 * instruction to halt it in its tracks.
365 		 */
366 #ifdef DEBUG
367 		if (ldebug(rt_sendsig))
368 			printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
369 			    fp, oonstack);
370 #endif
371 		PROC_LOCK(p);
372 		sigexit(td, SIGILL);
373 	}
374 
375 	/*
376 	 * Build context to run handler in.
377 	 */
378 	regs->tf_esp = (int)fp;
379 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
380 	    linux_sznonrtsigcode;
381 	regs->tf_eflags &= ~(PSL_T | PSL_VM);
382 	regs->tf_cs = _ucodesel;
383 	regs->tf_ds = _udatasel;
384 	regs->tf_es = _udatasel;
385 	regs->tf_fs = _udatasel;
386 	regs->tf_ss = _udatasel;
387 	PROC_LOCK(p);
388 	mtx_lock(&psp->ps_mtx);
389 }
390 
391 
392 /*
393  * Send an interrupt to process.
394  *
395  * Stack is set up to allow sigcode stored
396  * in u. to call routine, followed by kcall
397  * to sigreturn routine below.  After sigreturn
398  * resets the signal mask, the stack, and the
399  * frame pointer, it returns to the user
400  * specified pc, psl.
401  */
402 static void
403 linux_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
404 {
405 	struct thread *td = curthread;
406 	struct proc *p = td->td_proc;
407 	struct sigacts *psp;
408 	struct trapframe *regs;
409 	struct l_sigframe *fp, frame;
410 	l_sigset_t lmask;
411 	int oonstack, i;
412 
413 	PROC_LOCK_ASSERT(p, MA_OWNED);
414 	psp = p->p_sigacts;
415 	mtx_assert(&psp->ps_mtx, MA_OWNED);
416 	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
417 		/* Signal handler installed with SA_SIGINFO. */
418 		linux_rt_sendsig(catcher, sig, mask, code);
419 		return;
420 	}
421 
422 	regs = td->td_frame;
423 	oonstack = sigonstack(regs->tf_esp);
424 
425 #ifdef DEBUG
426 	if (ldebug(sendsig))
427 		printf(ARGS(sendsig, "%p, %d, %p, %lu"),
428 		    catcher, sig, (void*)mask, code);
429 #endif
430 
431 	/*
432 	 * Allocate space for the signal handler context.
433 	 */
434 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
435 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
436 		fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
437 		    td->td_sigstk.ss_size - sizeof(struct l_sigframe));
438 	} else
439 		fp = (struct l_sigframe *)regs->tf_esp - 1;
440 	mtx_unlock(&psp->ps_mtx);
441 	PROC_UNLOCK(p);
442 
443 	/*
444 	 * Build the argument list for the signal handler.
445 	 */
446 	if (p->p_sysent->sv_sigtbl)
447 		if (sig <= p->p_sysent->sv_sigsize)
448 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
449 
450 	bzero(&frame, sizeof(frame));
451 
452 	frame.sf_handler = catcher;
453 	frame.sf_sig = sig;
454 
455 	bsd_to_linux_sigset(mask, &lmask);
456 
457 	/*
458 	 * Build the signal context to be used by sigreturn.
459 	 */
460 	frame.sf_sc.sc_mask   = lmask.__bits[0];
461 	frame.sf_sc.sc_gs     = rgs();
462 	frame.sf_sc.sc_fs     = regs->tf_fs;
463 	frame.sf_sc.sc_es     = regs->tf_es;
464 	frame.sf_sc.sc_ds     = regs->tf_ds;
465 	frame.sf_sc.sc_edi    = regs->tf_edi;
466 	frame.sf_sc.sc_esi    = regs->tf_esi;
467 	frame.sf_sc.sc_ebp    = regs->tf_ebp;
468 	frame.sf_sc.sc_ebx    = regs->tf_ebx;
469 	frame.sf_sc.sc_edx    = regs->tf_edx;
470 	frame.sf_sc.sc_ecx    = regs->tf_ecx;
471 	frame.sf_sc.sc_eax    = regs->tf_eax;
472 	frame.sf_sc.sc_eip    = regs->tf_eip;
473 	frame.sf_sc.sc_cs     = regs->tf_cs;
474 	frame.sf_sc.sc_eflags = regs->tf_eflags;
475 	frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
476 	frame.sf_sc.sc_ss     = regs->tf_ss;
477 	frame.sf_sc.sc_err    = regs->tf_err;
478 	frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code);
479 
480 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
481 		frame.sf_extramask[i] = lmask.__bits[i+1];
482 
483 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
484 		/*
485 		 * Process has trashed its stack; give it an illegal
486 		 * instruction to halt it in its tracks.
487 		 */
488 		PROC_LOCK(p);
489 		sigexit(td, SIGILL);
490 	}
491 
492 	/*
493 	 * Build context to run handler in.
494 	 */
495 	regs->tf_esp = (int)fp;
496 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
497 	regs->tf_eflags &= ~(PSL_T | PSL_VM);
498 	regs->tf_cs = _ucodesel;
499 	regs->tf_ds = _udatasel;
500 	regs->tf_es = _udatasel;
501 	regs->tf_fs = _udatasel;
502 	regs->tf_ss = _udatasel;
503 	PROC_LOCK(p);
504 	mtx_lock(&psp->ps_mtx);
505 }
506 
507 /*
508  * System call to cleanup state after a signal
509  * has been taken.  Reset signal mask and
510  * stack state from context left by sendsig (above).
511  * Return to previous pc and psl as specified by
512  * context left by sendsig. Check carefully to
513  * make sure that the user has not modified the
514  * psl to gain improper privileges or to cause
515  * a machine fault.
516  */
517 int
518 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
519 {
520 	struct proc *p = td->td_proc;
521 	struct l_sigframe frame;
522 	struct trapframe *regs;
523 	l_sigset_t lmask;
524 	int eflags, i;
525 
526 	regs = td->td_frame;
527 
528 #ifdef DEBUG
529 	if (ldebug(sigreturn))
530 		printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
531 #endif
532 	/*
533 	 * The trampoline code hands us the sigframe.
534 	 * It is unsafe to keep track of it ourselves, in the event that a
535 	 * program jumps out of a signal handler.
536 	 */
537 	if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
538 		return (EFAULT);
539 
540 	/*
541 	 * Check for security violations.
542 	 */
543 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
544 	eflags = frame.sf_sc.sc_eflags;
545 	/*
546 	 * XXX do allow users to change the privileged flag PSL_RF.  The
547 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
548 	 * sometimes set it there too.  tf_eflags is kept in the signal
549 	 * context during signal handling and there is no other place
550 	 * to remember it, so the PSL_RF bit may be corrupted by the
551 	 * signal handler without us knowing.  Corruption of the PSL_RF
552 	 * bit at worst causes one more or one less debugger trap, so
553 	 * allowing it is fairly harmless.
554 	 */
555 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF))
556 		return(EINVAL);
557 
558 	/*
559 	 * Don't allow users to load a valid privileged %cs.  Let the
560 	 * hardware check for invalid selectors, excess privilege in
561 	 * other selectors, invalid %eip's and invalid %esp's.
562 	 */
563 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
564 	if (!CS_SECURE(frame.sf_sc.sc_cs)) {
565 		trapsignal(td, SIGBUS, T_PROTFLT);
566 		return(EINVAL);
567 	}
568 
569 	lmask.__bits[0] = frame.sf_sc.sc_mask;
570 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
571 		lmask.__bits[i+1] = frame.sf_extramask[i];
572 	PROC_LOCK(p);
573 	linux_to_bsd_sigset(&lmask, &td->td_sigmask);
574 	SIG_CANTMASK(td->td_sigmask);
575 	signotify(td);
576 	PROC_UNLOCK(p);
577 
578 	/*
579 	 * Restore signal context.
580 	 */
581 	/* %gs was restored by the trampoline. */
582 	regs->tf_fs     = frame.sf_sc.sc_fs;
583 	regs->tf_es     = frame.sf_sc.sc_es;
584 	regs->tf_ds     = frame.sf_sc.sc_ds;
585 	regs->tf_edi    = frame.sf_sc.sc_edi;
586 	regs->tf_esi    = frame.sf_sc.sc_esi;
587 	regs->tf_ebp    = frame.sf_sc.sc_ebp;
588 	regs->tf_ebx    = frame.sf_sc.sc_ebx;
589 	regs->tf_edx    = frame.sf_sc.sc_edx;
590 	regs->tf_ecx    = frame.sf_sc.sc_ecx;
591 	regs->tf_eax    = frame.sf_sc.sc_eax;
592 	regs->tf_eip    = frame.sf_sc.sc_eip;
593 	regs->tf_cs     = frame.sf_sc.sc_cs;
594 	regs->tf_eflags = eflags;
595 	regs->tf_esp    = frame.sf_sc.sc_esp_at_signal;
596 	regs->tf_ss     = frame.sf_sc.sc_ss;
597 
598 	return (EJUSTRETURN);
599 }
600 
601 /*
602  * System call to cleanup state after a signal
603  * has been taken.  Reset signal mask and
604  * stack state from context left by rt_sendsig (above).
605  * Return to previous pc and psl as specified by
606  * context left by sendsig. Check carefully to
607  * make sure that the user has not modified the
608  * psl to gain improper privileges or to cause
609  * a machine fault.
610  */
611 int
612 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
613 {
614 	struct proc *p = td->td_proc;
615 	struct l_ucontext uc;
616 	struct l_sigcontext *context;
617 	l_stack_t *lss;
618 	stack_t ss;
619 	struct trapframe *regs;
620 	int eflags;
621 
622 	regs = td->td_frame;
623 
624 #ifdef DEBUG
625 	if (ldebug(rt_sigreturn))
626 		printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
627 #endif
628 	/*
629 	 * The trampoline code hands us the ucontext.
630 	 * It is unsafe to keep track of it ourselves, in the event that a
631 	 * program jumps out of a signal handler.
632 	 */
633 	if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
634 		return (EFAULT);
635 
636 	context = &uc.uc_mcontext;
637 
638 	/*
639 	 * Check for security violations.
640 	 */
641 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
642 	eflags = context->sc_eflags;
643 	/*
644 	 * XXX do allow users to change the privileged flag PSL_RF.  The
645 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
646 	 * sometimes set it there too.  tf_eflags is kept in the signal
647 	 * context during signal handling and there is no other place
648 	 * to remember it, so the PSL_RF bit may be corrupted by the
649 	 * signal handler without us knowing.  Corruption of the PSL_RF
650 	 * bit at worst causes one more or one less debugger trap, so
651 	 * allowing it is fairly harmless.
652 	 */
653 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF))
654 		return(EINVAL);
655 
656 	/*
657 	 * Don't allow users to load a valid privileged %cs.  Let the
658 	 * hardware check for invalid selectors, excess privilege in
659 	 * other selectors, invalid %eip's and invalid %esp's.
660 	 */
661 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
662 	if (!CS_SECURE(context->sc_cs)) {
663 		trapsignal(td, SIGBUS, T_PROTFLT);
664 		return(EINVAL);
665 	}
666 
667 	PROC_LOCK(p);
668 	linux_to_bsd_sigset(&uc.uc_sigmask, &td->td_sigmask);
669 	SIG_CANTMASK(td->td_sigmask);
670 	signotify(td);
671 	PROC_UNLOCK(p);
672 
673 	/*
674 	 * Restore signal context
675 	 */
676 	/* %gs was restored by the trampoline. */
677 	regs->tf_fs     = context->sc_fs;
678 	regs->tf_es     = context->sc_es;
679 	regs->tf_ds     = context->sc_ds;
680 	regs->tf_edi    = context->sc_edi;
681 	regs->tf_esi    = context->sc_esi;
682 	regs->tf_ebp    = context->sc_ebp;
683 	regs->tf_ebx    = context->sc_ebx;
684 	regs->tf_edx    = context->sc_edx;
685 	regs->tf_ecx    = context->sc_ecx;
686 	regs->tf_eax    = context->sc_eax;
687 	regs->tf_eip    = context->sc_eip;
688 	regs->tf_cs     = context->sc_cs;
689 	regs->tf_eflags = eflags;
690 	regs->tf_esp    = context->sc_esp_at_signal;
691 	regs->tf_ss     = context->sc_ss;
692 
693 	/*
694 	 * call sigaltstack & ignore results..
695 	 */
696 	lss = &uc.uc_stack;
697 	ss.ss_sp = lss->ss_sp;
698 	ss.ss_size = lss->ss_size;
699 	ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
700 
701 #ifdef DEBUG
702 	if (ldebug(rt_sigreturn))
703 		printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
704 		    ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
705 #endif
706 	(void)kern_sigaltstack(td, &ss, NULL);
707 
708 	return (EJUSTRETURN);
709 }
710 
711 /*
712  * MPSAFE
713  */
714 static void
715 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
716 {
717 	args[0] = tf->tf_ebx;
718 	args[1] = tf->tf_ecx;
719 	args[2] = tf->tf_edx;
720 	args[3] = tf->tf_esi;
721 	args[4] = tf->tf_edi;
722 	args[5] = tf->tf_ebp;	/* Unconfirmed */
723 	*params = NULL;		/* no copyin */
724 }
725 
726 /*
727  * If a linux binary is exec'ing something, try this image activator
728  * first.  We override standard shell script execution in order to
729  * be able to modify the interpreter path.  We only do this if a linux
730  * binary is doing the exec, so we do not create an EXEC module for it.
731  */
732 static int	exec_linux_imgact_try(struct image_params *iparams);
733 
734 static int
735 exec_linux_imgact_try(struct image_params *imgp)
736 {
737     const char *head = (const char *)imgp->image_header;
738     char *rpath;
739     int error = -1, len;
740 
741     /*
742      * The interpreter for shell scripts run from a linux binary needs
743      * to be located in /compat/linux if possible in order to recursively
744      * maintain linux path emulation.
745      */
746     if (((const short *)head)[0] == SHELLMAGIC) {
747 	    /*
748 	     * Run our normal shell image activator.  If it succeeds attempt
749 	     * to use the alternate path for the interpreter.  If an alternate
750 	     * path is found, use our stringspace to store it.
751 	     */
752 	    if ((error = exec_shell_imgact(imgp)) == 0) {
753 		    linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
754 			imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0);
755 		    if (rpath != NULL) {
756 			    len = strlen(rpath) + 1;
757 
758 			    if (len <= MAXSHELLCMDLEN) {
759 				    memcpy(imgp->interpreter_name, rpath, len);
760 			    }
761 			    free(rpath, M_TEMP);
762 		    }
763 	    }
764     }
765     return(error);
766 }
767 
768 /*
769  * exec_setregs may initialize some registers differently than Linux
770  * does, thus potentially confusing Linux binaries. If necessary, we
771  * override the exec_setregs default(s) here.
772  */
773 static void
774 exec_linux_setregs(struct thread *td, u_long entry,
775 		   u_long stack, u_long ps_strings)
776 {
777 	static const u_short control = __LINUX_NPXCW__;
778 	struct pcb *pcb = td->td_pcb;
779 
780 	exec_setregs(td, entry, stack, ps_strings);
781 
782 	/* Linux sets %gs to 0, we default to _udatasel */
783 	pcb->pcb_gs = 0; load_gs(0);
784 
785 	/* Linux sets the i387 to extended precision. */
786 	fldcw(&control);
787 }
788 
789 struct sysentvec linux_sysvec = {
790 	LINUX_SYS_MAXSYSCALL,
791 	linux_sysent,
792 	0xff,
793 	LINUX_SIGTBLSZ,
794 	bsd_to_linux_signal,
795 	ELAST + 1,
796 	bsd_to_linux_errno,
797 	translate_traps,
798 	linux_fixup,
799 	linux_sendsig,
800 	linux_sigcode,
801 	&linux_szsigcode,
802 	linux_prepsyscall,
803 	"Linux a.out",
804 	NULL,
805 	exec_linux_imgact_try,
806 	LINUX_MINSIGSTKSZ,
807 	PAGE_SIZE,
808 	VM_MIN_ADDRESS,
809 	VM_MAXUSER_ADDRESS,
810 	USRSTACK,
811 	PS_STRINGS,
812 	VM_PROT_ALL,
813 	exec_copyout_strings,
814 	exec_linux_setregs,
815 	NULL
816 };
817 
818 struct sysentvec elf_linux_sysvec = {
819 	LINUX_SYS_MAXSYSCALL,
820 	linux_sysent,
821 	0xff,
822 	LINUX_SIGTBLSZ,
823 	bsd_to_linux_signal,
824 	ELAST + 1,
825 	bsd_to_linux_errno,
826 	translate_traps,
827 	elf_linux_fixup,
828 	linux_sendsig,
829 	linux_sigcode,
830 	&linux_szsigcode,
831 	linux_prepsyscall,
832 	"Linux ELF",
833 	elf32_coredump,
834 	exec_linux_imgact_try,
835 	LINUX_MINSIGSTKSZ,
836 	PAGE_SIZE,
837 	VM_MIN_ADDRESS,
838 	VM_MAXUSER_ADDRESS,
839 	USRSTACK,
840 	PS_STRINGS,
841 	VM_PROT_ALL,
842 	exec_copyout_strings,
843 	exec_linux_setregs,
844 	NULL
845 };
846 
847 static Elf32_Brandinfo linux_brand = {
848 					ELFOSABI_LINUX,
849 					EM_386,
850 					"Linux",
851 					"/compat/linux",
852 					"/lib/ld-linux.so.1",
853 					&elf_linux_sysvec,
854 					NULL,
855 				 };
856 
857 static Elf32_Brandinfo linux_glibc2brand = {
858 					ELFOSABI_LINUX,
859 					EM_386,
860 					"Linux",
861 					"/compat/linux",
862 					"/lib/ld-linux.so.2",
863 					&elf_linux_sysvec,
864 					NULL,
865 				 };
866 
867 Elf32_Brandinfo *linux_brandlist[] = {
868 					&linux_brand,
869 					&linux_glibc2brand,
870 					NULL
871 				};
872 
873 static int
874 linux_elf_modevent(module_t mod, int type, void *data)
875 {
876 	Elf32_Brandinfo **brandinfo;
877 	int error;
878 	struct linux_ioctl_handler **lihp;
879 
880 	error = 0;
881 
882 	switch(type) {
883 	case MOD_LOAD:
884 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
885 		     ++brandinfo)
886 			if (elf32_insert_brand_entry(*brandinfo) < 0)
887 				error = EINVAL;
888 		if (error == 0) {
889 			SET_FOREACH(lihp, linux_ioctl_handler_set)
890 				linux_ioctl_register_handler(*lihp);
891 			if (bootverbose)
892 				printf("Linux ELF exec handler installed\n");
893 		} else
894 			printf("cannot insert Linux ELF brand handler\n");
895 		break;
896 	case MOD_UNLOAD:
897 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
898 		     ++brandinfo)
899 			if (elf32_brand_inuse(*brandinfo))
900 				error = EBUSY;
901 		if (error == 0) {
902 			for (brandinfo = &linux_brandlist[0];
903 			     *brandinfo != NULL; ++brandinfo)
904 				if (elf32_remove_brand_entry(*brandinfo) < 0)
905 					error = EINVAL;
906 		}
907 		if (error == 0) {
908 			SET_FOREACH(lihp, linux_ioctl_handler_set)
909 				linux_ioctl_unregister_handler(*lihp);
910 			if (bootverbose)
911 				printf("Linux ELF exec handler removed\n");
912 			linux_mib_destroy();
913 		} else
914 			printf("Could not deinstall ELF interpreter entry\n");
915 		break;
916 	default:
917 		return EOPNOTSUPP;
918 	}
919 	return error;
920 }
921 
922 static moduledata_t linux_elf_mod = {
923 	"linuxelf",
924 	linux_elf_modevent,
925 	0
926 };
927 
928 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
929