xref: /freebsd/sys/i386/linux/linux_sysvec.c (revision 4f29da19bd44f0e99f021510460a81bf754c21d2)
1 /*-
2  * Copyright (c) 1994-1996 S�ren Schmidt
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software without specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/exec.h>
35 #include <sys/imgact.h>
36 #include <sys/imgact_aout.h>
37 #include <sys/imgact_elf.h>
38 #include <sys/kernel.h>
39 #include <sys/lock.h>
40 #include <sys/malloc.h>
41 #include <sys/module.h>
42 #include <sys/mutex.h>
43 #include <sys/proc.h>
44 #include <sys/signalvar.h>
45 #include <sys/syscallsubr.h>
46 #include <sys/sysent.h>
47 #include <sys/sysproto.h>
48 #include <sys/vnode.h>
49 
50 #include <vm/vm.h>
51 #include <vm/pmap.h>
52 #include <vm/vm_extern.h>
53 #include <vm/vm_map.h>
54 #include <vm/vm_object.h>
55 #include <vm/vm_page.h>
56 #include <vm/vm_param.h>
57 
58 #include <machine/cpu.h>
59 #include <machine/md_var.h>
60 #include <machine/pcb.h>
61 
62 #include <i386/linux/linux.h>
63 #include <i386/linux/linux_proto.h>
64 #include <compat/linux/linux_mib.h>
65 #include <compat/linux/linux_signal.h>
66 #include <compat/linux/linux_util.h>
67 
68 MODULE_VERSION(linux, 1);
69 
70 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
71 
72 #if BYTE_ORDER == LITTLE_ENDIAN
73 #define SHELLMAGIC      0x2123 /* #! */
74 #else
75 #define SHELLMAGIC      0x2321
76 #endif
77 
78 /*
79  * Allow the sendsig functions to use the ldebug() facility
80  * even though they are not syscalls themselves. Map them
81  * to syscall 0. This is slightly less bogus than using
82  * ldebug(sigreturn).
83  */
84 #define	LINUX_SYS_linux_rt_sendsig	0
85 #define	LINUX_SYS_linux_sendsig		0
86 
87 #define	fldcw(addr)		__asm("fldcw %0" : : "m" (*(addr)))
88 #define	__LINUX_NPXCW__		0x37f
89 
90 extern char linux_sigcode[];
91 extern int linux_szsigcode;
92 
93 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
94 
95 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
96 
97 static int	linux_fixup(register_t **stack_base,
98 		    struct image_params *iparams);
99 static int	elf_linux_fixup(register_t **stack_base,
100 		    struct image_params *iparams);
101 static void	linux_prepsyscall(struct trapframe *tf, int *args, u_int *code,
102 		    caddr_t *params);
103 static void     linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
104 static void	exec_linux_setregs(struct thread *td, u_long entry,
105 				   u_long stack, u_long ps_strings);
106 
107 /*
108  * Linux syscalls return negative errno's, we do positive and map them
109  */
110 static int bsd_to_linux_errno[ELAST + 1] = {
111 	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
112 	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
113 	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
114 	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
115 	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
116 	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
117 	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
118 	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
119 	-6, -6, -43, -42, -75, -6, -84
120 };
121 
122 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
123 	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
124 	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
125 	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
126 	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
127 	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
128 	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
129 	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
130 	0, LINUX_SIGUSR1, LINUX_SIGUSR2
131 };
132 
133 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
134 	SIGHUP, SIGINT, SIGQUIT, SIGILL,
135 	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
136 	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
137 	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
138 	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
139 	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
140 	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
141 	SIGIO, SIGURG, SIGSYS
142 };
143 
144 #define LINUX_T_UNKNOWN  255
145 static int _bsd_to_linux_trapcode[] = {
146 	LINUX_T_UNKNOWN,	/* 0 */
147 	6,			/* 1  T_PRIVINFLT */
148 	LINUX_T_UNKNOWN,	/* 2 */
149 	3,			/* 3  T_BPTFLT */
150 	LINUX_T_UNKNOWN,	/* 4 */
151 	LINUX_T_UNKNOWN,	/* 5 */
152 	16,			/* 6  T_ARITHTRAP */
153 	254,			/* 7  T_ASTFLT */
154 	LINUX_T_UNKNOWN,	/* 8 */
155 	13,			/* 9  T_PROTFLT */
156 	1,			/* 10 T_TRCTRAP */
157 	LINUX_T_UNKNOWN,	/* 11 */
158 	14,			/* 12 T_PAGEFLT */
159 	LINUX_T_UNKNOWN,	/* 13 */
160 	17,			/* 14 T_ALIGNFLT */
161 	LINUX_T_UNKNOWN,	/* 15 */
162 	LINUX_T_UNKNOWN,	/* 16 */
163 	LINUX_T_UNKNOWN,	/* 17 */
164 	0,			/* 18 T_DIVIDE */
165 	2,			/* 19 T_NMI */
166 	4,			/* 20 T_OFLOW */
167 	5,			/* 21 T_BOUND */
168 	7,			/* 22 T_DNA */
169 	8,			/* 23 T_DOUBLEFLT */
170 	9,			/* 24 T_FPOPFLT */
171 	10,			/* 25 T_TSSFLT */
172 	11,			/* 26 T_SEGNPFLT */
173 	12,			/* 27 T_STKFLT */
174 	18,			/* 28 T_MCHK */
175 	19,			/* 29 T_XMMFLT */
176 	15			/* 30 T_RESERVED */
177 };
178 #define bsd_to_linux_trapcode(code) \
179     ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
180      _bsd_to_linux_trapcode[(code)]: \
181      LINUX_T_UNKNOWN)
182 
183 /*
184  * If FreeBSD & Linux have a difference of opinion about what a trap
185  * means, deal with it here.
186  *
187  * MPSAFE
188  */
189 static int
190 translate_traps(int signal, int trap_code)
191 {
192 	if (signal != SIGBUS)
193 		return signal;
194 	switch (trap_code) {
195 	case T_PROTFLT:
196 	case T_TSSFLT:
197 	case T_DOUBLEFLT:
198 	case T_PAGEFLT:
199 		return SIGSEGV;
200 	default:
201 		return signal;
202 	}
203 }
204 
205 static int
206 linux_fixup(register_t **stack_base, struct image_params *imgp)
207 {
208 	register_t *argv, *envp;
209 
210 	argv = *stack_base;
211 	envp = *stack_base + (imgp->args->argc + 1);
212 	(*stack_base)--;
213 	**stack_base = (intptr_t)(void *)envp;
214 	(*stack_base)--;
215 	**stack_base = (intptr_t)(void *)argv;
216 	(*stack_base)--;
217 	**stack_base = imgp->args->argc;
218 	return 0;
219 }
220 
221 static int
222 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
223 {
224 	Elf32_Auxargs *args;
225 	register_t *pos;
226 
227 	KASSERT(curthread->td_proc == imgp->proc &&
228 	    (curthread->td_proc->p_flag & P_SA) == 0,
229 	    ("unsafe elf_linux_fixup(), should be curproc"));
230 	args = (Elf32_Auxargs *)imgp->auxargs;
231 	pos = *stack_base + (imgp->args->argc + imgp->args->envc + 2);
232 
233 	if (args->trace)
234 		AUXARGS_ENTRY(pos, AT_DEBUG, 1);
235 	if (args->execfd != -1)
236 		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
237 	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
238 	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
239 	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
240 	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
241 	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
242 	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
243 	AUXARGS_ENTRY(pos, AT_BASE, args->base);
244 	AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
245 	AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
246 	AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
247 	AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
248 	AUXARGS_ENTRY(pos, AT_NULL, 0);
249 
250 	free(imgp->auxargs, M_TEMP);
251 	imgp->auxargs = NULL;
252 
253 	(*stack_base)--;
254 	**stack_base = (register_t)imgp->args->argc;
255 	return 0;
256 }
257 
258 extern int _ucodesel, _udatasel;
259 extern unsigned long linux_sznonrtsigcode;
260 
261 static void
262 linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
263 {
264 	struct thread *td = curthread;
265 	struct proc *p = td->td_proc;
266 	struct sigacts *psp;
267 	struct trapframe *regs;
268 	struct l_rt_sigframe *fp, frame;
269 	int sig, code;
270 	int oonstack;
271 
272 	sig = ksi->ksi_signo;
273 	code = ksi->ksi_code;
274 	PROC_LOCK_ASSERT(p, MA_OWNED);
275 	psp = p->p_sigacts;
276 	mtx_assert(&psp->ps_mtx, MA_OWNED);
277 	regs = td->td_frame;
278 	oonstack = sigonstack(regs->tf_esp);
279 
280 #ifdef DEBUG
281 	if (ldebug(rt_sendsig))
282 		printf(ARGS(rt_sendsig, "%p, %d, %p, %u"),
283 		    catcher, sig, (void*)mask, code);
284 #endif
285 	/*
286 	 * Allocate space for the signal handler context.
287 	 */
288 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
289 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
290 		fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
291 		    td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
292 	} else
293 		fp = (struct l_rt_sigframe *)regs->tf_esp - 1;
294 	mtx_unlock(&psp->ps_mtx);
295 
296 	/*
297 	 * Build the argument list for the signal handler.
298 	 */
299 	if (p->p_sysent->sv_sigtbl)
300 		if (sig <= p->p_sysent->sv_sigsize)
301 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
302 
303 	bzero(&frame, sizeof(frame));
304 
305 	frame.sf_handler = catcher;
306 	frame.sf_sig = sig;
307 	frame.sf_siginfo = &fp->sf_si;
308 	frame.sf_ucontext = &fp->sf_sc;
309 
310 	/* Fill in POSIX parts */
311 	frame.sf_si.lsi_signo = sig;
312 	frame.sf_si.lsi_code = code;
313 	frame.sf_si.lsi_addr = ksi->ksi_addr;
314 
315 	/*
316 	 * Build the signal context to be used by sigreturn.
317 	 */
318 	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
319 	frame.sf_sc.uc_link = NULL;		/* XXX ??? */
320 
321 	frame.sf_sc.uc_stack.ss_sp = td->td_sigstk.ss_sp;
322 	frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
323 	frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
324 	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
325 	PROC_UNLOCK(p);
326 
327 	bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
328 
329 	frame.sf_sc.uc_mcontext.sc_mask   = frame.sf_sc.uc_sigmask.__bits[0];
330 	frame.sf_sc.uc_mcontext.sc_gs     = rgs();
331 	frame.sf_sc.uc_mcontext.sc_fs     = regs->tf_fs;
332 	frame.sf_sc.uc_mcontext.sc_es     = regs->tf_es;
333 	frame.sf_sc.uc_mcontext.sc_ds     = regs->tf_ds;
334 	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_edi;
335 	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_esi;
336 	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_ebp;
337 	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_ebx;
338 	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_edx;
339 	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_ecx;
340 	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_eax;
341 	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_eip;
342 	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
343 	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
344 	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
345 	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
346 	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
347 	frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
348 
349 #ifdef DEBUG
350 	if (ldebug(rt_sendsig))
351 		printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
352 		    frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
353 		    td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
354 #endif
355 
356 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
357 		/*
358 		 * Process has trashed its stack; give it an illegal
359 		 * instruction to halt it in its tracks.
360 		 */
361 #ifdef DEBUG
362 		if (ldebug(rt_sendsig))
363 			printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
364 			    fp, oonstack);
365 #endif
366 		PROC_LOCK(p);
367 		sigexit(td, SIGILL);
368 	}
369 
370 	/*
371 	 * Build context to run handler in.
372 	 */
373 	regs->tf_esp = (int)fp;
374 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
375 	    linux_sznonrtsigcode;
376 	regs->tf_eflags &= ~(PSL_T | PSL_VM);
377 	regs->tf_cs = _ucodesel;
378 	regs->tf_ds = _udatasel;
379 	regs->tf_es = _udatasel;
380 	regs->tf_fs = _udatasel;
381 	regs->tf_ss = _udatasel;
382 	PROC_LOCK(p);
383 	mtx_lock(&psp->ps_mtx);
384 }
385 
386 
387 /*
388  * Send an interrupt to process.
389  *
390  * Stack is set up to allow sigcode stored
391  * in u. to call routine, followed by kcall
392  * to sigreturn routine below.  After sigreturn
393  * resets the signal mask, the stack, and the
394  * frame pointer, it returns to the user
395  * specified pc, psl.
396  */
397 static void
398 linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
399 {
400 	struct thread *td = curthread;
401 	struct proc *p = td->td_proc;
402 	struct sigacts *psp;
403 	struct trapframe *regs;
404 	struct l_sigframe *fp, frame;
405 	l_sigset_t lmask;
406 	int sig, code;
407 	int oonstack, i;
408 
409 	PROC_LOCK_ASSERT(p, MA_OWNED);
410 	psp = p->p_sigacts;
411 	sig = ksi->ksi_signo;
412 	code = ksi->ksi_code;
413 	mtx_assert(&psp->ps_mtx, MA_OWNED);
414 	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
415 		/* Signal handler installed with SA_SIGINFO. */
416 		linux_rt_sendsig(catcher, ksi, mask);
417 		return;
418 	}
419 	regs = td->td_frame;
420 	oonstack = sigonstack(regs->tf_esp);
421 
422 #ifdef DEBUG
423 	if (ldebug(sendsig))
424 		printf(ARGS(sendsig, "%p, %d, %p, %u"),
425 		    catcher, sig, (void*)mask, code);
426 #endif
427 
428 	/*
429 	 * Allocate space for the signal handler context.
430 	 */
431 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
432 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
433 		fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
434 		    td->td_sigstk.ss_size - sizeof(struct l_sigframe));
435 	} else
436 		fp = (struct l_sigframe *)regs->tf_esp - 1;
437 	mtx_unlock(&psp->ps_mtx);
438 	PROC_UNLOCK(p);
439 
440 	/*
441 	 * Build the argument list for the signal handler.
442 	 */
443 	if (p->p_sysent->sv_sigtbl)
444 		if (sig <= p->p_sysent->sv_sigsize)
445 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
446 
447 	bzero(&frame, sizeof(frame));
448 
449 	frame.sf_handler = catcher;
450 	frame.sf_sig = sig;
451 
452 	bsd_to_linux_sigset(mask, &lmask);
453 
454 	/*
455 	 * Build the signal context to be used by sigreturn.
456 	 */
457 	frame.sf_sc.sc_mask   = lmask.__bits[0];
458 	frame.sf_sc.sc_gs     = rgs();
459 	frame.sf_sc.sc_fs     = regs->tf_fs;
460 	frame.sf_sc.sc_es     = regs->tf_es;
461 	frame.sf_sc.sc_ds     = regs->tf_ds;
462 	frame.sf_sc.sc_edi    = regs->tf_edi;
463 	frame.sf_sc.sc_esi    = regs->tf_esi;
464 	frame.sf_sc.sc_ebp    = regs->tf_ebp;
465 	frame.sf_sc.sc_ebx    = regs->tf_ebx;
466 	frame.sf_sc.sc_edx    = regs->tf_edx;
467 	frame.sf_sc.sc_ecx    = regs->tf_ecx;
468 	frame.sf_sc.sc_eax    = regs->tf_eax;
469 	frame.sf_sc.sc_eip    = regs->tf_eip;
470 	frame.sf_sc.sc_cs     = regs->tf_cs;
471 	frame.sf_sc.sc_eflags = regs->tf_eflags;
472 	frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
473 	frame.sf_sc.sc_ss     = regs->tf_ss;
474 	frame.sf_sc.sc_err    = regs->tf_err;
475 	frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(ksi->ksi_trapno);
476 
477 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
478 		frame.sf_extramask[i] = lmask.__bits[i+1];
479 
480 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
481 		/*
482 		 * Process has trashed its stack; give it an illegal
483 		 * instruction to halt it in its tracks.
484 		 */
485 		PROC_LOCK(p);
486 		sigexit(td, SIGILL);
487 	}
488 
489 	/*
490 	 * Build context to run handler in.
491 	 */
492 	regs->tf_esp = (int)fp;
493 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
494 	regs->tf_eflags &= ~(PSL_T | PSL_VM);
495 	regs->tf_cs = _ucodesel;
496 	regs->tf_ds = _udatasel;
497 	regs->tf_es = _udatasel;
498 	regs->tf_fs = _udatasel;
499 	regs->tf_ss = _udatasel;
500 	PROC_LOCK(p);
501 	mtx_lock(&psp->ps_mtx);
502 }
503 
504 /*
505  * System call to cleanup state after a signal
506  * has been taken.  Reset signal mask and
507  * stack state from context left by sendsig (above).
508  * Return to previous pc and psl as specified by
509  * context left by sendsig. Check carefully to
510  * make sure that the user has not modified the
511  * psl to gain improper privileges or to cause
512  * a machine fault.
513  */
514 int
515 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
516 {
517 	struct proc *p = td->td_proc;
518 	struct l_sigframe frame;
519 	struct trapframe *regs;
520 	l_sigset_t lmask;
521 	int eflags, i;
522 	ksiginfo_t ksi;
523 
524 	regs = td->td_frame;
525 
526 #ifdef DEBUG
527 	if (ldebug(sigreturn))
528 		printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
529 #endif
530 	/*
531 	 * The trampoline code hands us the sigframe.
532 	 * It is unsafe to keep track of it ourselves, in the event that a
533 	 * program jumps out of a signal handler.
534 	 */
535 	if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
536 		return (EFAULT);
537 
538 	/*
539 	 * Check for security violations.
540 	 */
541 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
542 	eflags = frame.sf_sc.sc_eflags;
543 	/*
544 	 * XXX do allow users to change the privileged flag PSL_RF.  The
545 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
546 	 * sometimes set it there too.  tf_eflags is kept in the signal
547 	 * context during signal handling and there is no other place
548 	 * to remember it, so the PSL_RF bit may be corrupted by the
549 	 * signal handler without us knowing.  Corruption of the PSL_RF
550 	 * bit at worst causes one more or one less debugger trap, so
551 	 * allowing it is fairly harmless.
552 	 */
553 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF))
554 		return(EINVAL);
555 
556 	/*
557 	 * Don't allow users to load a valid privileged %cs.  Let the
558 	 * hardware check for invalid selectors, excess privilege in
559 	 * other selectors, invalid %eip's and invalid %esp's.
560 	 */
561 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
562 	if (!CS_SECURE(frame.sf_sc.sc_cs)) {
563 		ksiginfo_init_trap(&ksi);
564 		ksi.ksi_signo = SIGBUS;
565 		ksi.ksi_code = BUS_OBJERR;
566 		ksi.ksi_trapno = T_PROTFLT;
567 		ksi.ksi_addr = (void *)regs->tf_eip;
568 		trapsignal(td, &ksi);
569 		return(EINVAL);
570 	}
571 
572 	lmask.__bits[0] = frame.sf_sc.sc_mask;
573 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
574 		lmask.__bits[i+1] = frame.sf_extramask[i];
575 	PROC_LOCK(p);
576 	linux_to_bsd_sigset(&lmask, &td->td_sigmask);
577 	SIG_CANTMASK(td->td_sigmask);
578 	signotify(td);
579 	PROC_UNLOCK(p);
580 
581 	/*
582 	 * Restore signal context.
583 	 */
584 	/* %gs was restored by the trampoline. */
585 	regs->tf_fs     = frame.sf_sc.sc_fs;
586 	regs->tf_es     = frame.sf_sc.sc_es;
587 	regs->tf_ds     = frame.sf_sc.sc_ds;
588 	regs->tf_edi    = frame.sf_sc.sc_edi;
589 	regs->tf_esi    = frame.sf_sc.sc_esi;
590 	regs->tf_ebp    = frame.sf_sc.sc_ebp;
591 	regs->tf_ebx    = frame.sf_sc.sc_ebx;
592 	regs->tf_edx    = frame.sf_sc.sc_edx;
593 	regs->tf_ecx    = frame.sf_sc.sc_ecx;
594 	regs->tf_eax    = frame.sf_sc.sc_eax;
595 	regs->tf_eip    = frame.sf_sc.sc_eip;
596 	regs->tf_cs     = frame.sf_sc.sc_cs;
597 	regs->tf_eflags = eflags;
598 	regs->tf_esp    = frame.sf_sc.sc_esp_at_signal;
599 	regs->tf_ss     = frame.sf_sc.sc_ss;
600 
601 	return (EJUSTRETURN);
602 }
603 
604 /*
605  * System call to cleanup state after a signal
606  * has been taken.  Reset signal mask and
607  * stack state from context left by rt_sendsig (above).
608  * Return to previous pc and psl as specified by
609  * context left by sendsig. Check carefully to
610  * make sure that the user has not modified the
611  * psl to gain improper privileges or to cause
612  * a machine fault.
613  */
614 int
615 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
616 {
617 	struct proc *p = td->td_proc;
618 	struct l_ucontext uc;
619 	struct l_sigcontext *context;
620 	l_stack_t *lss;
621 	stack_t ss;
622 	struct trapframe *regs;
623 	int eflags;
624 	ksiginfo_t ksi;
625 
626 	regs = td->td_frame;
627 
628 #ifdef DEBUG
629 	if (ldebug(rt_sigreturn))
630 		printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
631 #endif
632 	/*
633 	 * The trampoline code hands us the ucontext.
634 	 * It is unsafe to keep track of it ourselves, in the event that a
635 	 * program jumps out of a signal handler.
636 	 */
637 	if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
638 		return (EFAULT);
639 
640 	context = &uc.uc_mcontext;
641 
642 	/*
643 	 * Check for security violations.
644 	 */
645 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
646 	eflags = context->sc_eflags;
647 	/*
648 	 * XXX do allow users to change the privileged flag PSL_RF.  The
649 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
650 	 * sometimes set it there too.  tf_eflags is kept in the signal
651 	 * context during signal handling and there is no other place
652 	 * to remember it, so the PSL_RF bit may be corrupted by the
653 	 * signal handler without us knowing.  Corruption of the PSL_RF
654 	 * bit at worst causes one more or one less debugger trap, so
655 	 * allowing it is fairly harmless.
656 	 */
657 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF))
658 		return(EINVAL);
659 
660 	/*
661 	 * Don't allow users to load a valid privileged %cs.  Let the
662 	 * hardware check for invalid selectors, excess privilege in
663 	 * other selectors, invalid %eip's and invalid %esp's.
664 	 */
665 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
666 	if (!CS_SECURE(context->sc_cs)) {
667 		ksiginfo_init_trap(&ksi);
668 		ksi.ksi_signo = SIGBUS;
669 		ksi.ksi_code = BUS_OBJERR;
670 		ksi.ksi_trapno = T_PROTFLT;
671 		ksi.ksi_addr = (void *)regs->tf_eip;
672 		trapsignal(td, &ksi);
673 		return(EINVAL);
674 	}
675 
676 	PROC_LOCK(p);
677 	linux_to_bsd_sigset(&uc.uc_sigmask, &td->td_sigmask);
678 	SIG_CANTMASK(td->td_sigmask);
679 	signotify(td);
680 	PROC_UNLOCK(p);
681 
682 	/*
683 	 * Restore signal context
684 	 */
685 	/* %gs was restored by the trampoline. */
686 	regs->tf_fs     = context->sc_fs;
687 	regs->tf_es     = context->sc_es;
688 	regs->tf_ds     = context->sc_ds;
689 	regs->tf_edi    = context->sc_edi;
690 	regs->tf_esi    = context->sc_esi;
691 	regs->tf_ebp    = context->sc_ebp;
692 	regs->tf_ebx    = context->sc_ebx;
693 	regs->tf_edx    = context->sc_edx;
694 	regs->tf_ecx    = context->sc_ecx;
695 	regs->tf_eax    = context->sc_eax;
696 	regs->tf_eip    = context->sc_eip;
697 	regs->tf_cs     = context->sc_cs;
698 	regs->tf_eflags = eflags;
699 	regs->tf_esp    = context->sc_esp_at_signal;
700 	regs->tf_ss     = context->sc_ss;
701 
702 	/*
703 	 * call sigaltstack & ignore results..
704 	 */
705 	lss = &uc.uc_stack;
706 	ss.ss_sp = lss->ss_sp;
707 	ss.ss_size = lss->ss_size;
708 	ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
709 
710 #ifdef DEBUG
711 	if (ldebug(rt_sigreturn))
712 		printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
713 		    ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
714 #endif
715 	(void)kern_sigaltstack(td, &ss, NULL);
716 
717 	return (EJUSTRETURN);
718 }
719 
720 /*
721  * MPSAFE
722  */
723 static void
724 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
725 {
726 	args[0] = tf->tf_ebx;
727 	args[1] = tf->tf_ecx;
728 	args[2] = tf->tf_edx;
729 	args[3] = tf->tf_esi;
730 	args[4] = tf->tf_edi;
731 	args[5] = tf->tf_ebp;	/* Unconfirmed */
732 	*params = NULL;		/* no copyin */
733 }
734 
735 /*
736  * If a linux binary is exec'ing something, try this image activator
737  * first.  We override standard shell script execution in order to
738  * be able to modify the interpreter path.  We only do this if a linux
739  * binary is doing the exec, so we do not create an EXEC module for it.
740  */
741 static int	exec_linux_imgact_try(struct image_params *iparams);
742 
743 static int
744 exec_linux_imgact_try(struct image_params *imgp)
745 {
746     const char *head = (const char *)imgp->image_header;
747     char *rpath;
748     int error = -1, len;
749 
750     /*
751      * The interpreter for shell scripts run from a linux binary needs
752      * to be located in /compat/linux if possible in order to recursively
753      * maintain linux path emulation.
754      */
755     if (((const short *)head)[0] == SHELLMAGIC) {
756 	    /*
757 	     * Run our normal shell image activator.  If it succeeds attempt
758 	     * to use the alternate path for the interpreter.  If an alternate
759 	     * path is found, use our stringspace to store it.
760 	     */
761 	    if ((error = exec_shell_imgact(imgp)) == 0) {
762 		    linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
763 			imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0);
764 		    if (rpath != NULL) {
765 			    len = strlen(rpath) + 1;
766 
767 			    if (len <= MAXSHELLCMDLEN) {
768 				    memcpy(imgp->interpreter_name, rpath, len);
769 			    }
770 			    free(rpath, M_TEMP);
771 		    }
772 	    }
773     }
774     return(error);
775 }
776 
777 /*
778  * exec_setregs may initialize some registers differently than Linux
779  * does, thus potentially confusing Linux binaries. If necessary, we
780  * override the exec_setregs default(s) here.
781  */
782 static void
783 exec_linux_setregs(struct thread *td, u_long entry,
784 		   u_long stack, u_long ps_strings)
785 {
786 	static const u_short control = __LINUX_NPXCW__;
787 	struct pcb *pcb = td->td_pcb;
788 
789 	exec_setregs(td, entry, stack, ps_strings);
790 
791 	/* Linux sets %gs to 0, we default to _udatasel */
792 	pcb->pcb_gs = 0; load_gs(0);
793 
794 	/* Linux sets the i387 to extended precision. */
795 	fldcw(&control);
796 }
797 
798 struct sysentvec linux_sysvec = {
799 	LINUX_SYS_MAXSYSCALL,
800 	linux_sysent,
801 	0xff,
802 	LINUX_SIGTBLSZ,
803 	bsd_to_linux_signal,
804 	ELAST + 1,
805 	bsd_to_linux_errno,
806 	translate_traps,
807 	linux_fixup,
808 	linux_sendsig,
809 	linux_sigcode,
810 	&linux_szsigcode,
811 	linux_prepsyscall,
812 	"Linux a.out",
813 	NULL,
814 	exec_linux_imgact_try,
815 	LINUX_MINSIGSTKSZ,
816 	PAGE_SIZE,
817 	VM_MIN_ADDRESS,
818 	VM_MAXUSER_ADDRESS,
819 	USRSTACK,
820 	PS_STRINGS,
821 	VM_PROT_ALL,
822 	exec_copyout_strings,
823 	exec_linux_setregs,
824 	NULL
825 };
826 
827 struct sysentvec elf_linux_sysvec = {
828 	LINUX_SYS_MAXSYSCALL,
829 	linux_sysent,
830 	0xff,
831 	LINUX_SIGTBLSZ,
832 	bsd_to_linux_signal,
833 	ELAST + 1,
834 	bsd_to_linux_errno,
835 	translate_traps,
836 	elf_linux_fixup,
837 	linux_sendsig,
838 	linux_sigcode,
839 	&linux_szsigcode,
840 	linux_prepsyscall,
841 	"Linux ELF",
842 	elf32_coredump,
843 	exec_linux_imgact_try,
844 	LINUX_MINSIGSTKSZ,
845 	PAGE_SIZE,
846 	VM_MIN_ADDRESS,
847 	VM_MAXUSER_ADDRESS,
848 	USRSTACK,
849 	PS_STRINGS,
850 	VM_PROT_ALL,
851 	exec_copyout_strings,
852 	exec_linux_setregs,
853 	NULL
854 };
855 
856 static Elf32_Brandinfo linux_brand = {
857 					ELFOSABI_LINUX,
858 					EM_386,
859 					"Linux",
860 					"/compat/linux",
861 					"/lib/ld-linux.so.1",
862 					&elf_linux_sysvec,
863 					NULL,
864 					BI_CAN_EXEC_DYN,
865 				 };
866 
867 static Elf32_Brandinfo linux_glibc2brand = {
868 					ELFOSABI_LINUX,
869 					EM_386,
870 					"Linux",
871 					"/compat/linux",
872 					"/lib/ld-linux.so.2",
873 					&elf_linux_sysvec,
874 					NULL,
875 					BI_CAN_EXEC_DYN,
876 				 };
877 
878 Elf32_Brandinfo *linux_brandlist[] = {
879 					&linux_brand,
880 					&linux_glibc2brand,
881 					NULL
882 				};
883 
884 static int
885 linux_elf_modevent(module_t mod, int type, void *data)
886 {
887 	Elf32_Brandinfo **brandinfo;
888 	int error;
889 	struct linux_ioctl_handler **lihp;
890 
891 	error = 0;
892 
893 	switch(type) {
894 	case MOD_LOAD:
895 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
896 		     ++brandinfo)
897 			if (elf32_insert_brand_entry(*brandinfo) < 0)
898 				error = EINVAL;
899 		if (error == 0) {
900 			SET_FOREACH(lihp, linux_ioctl_handler_set)
901 				linux_ioctl_register_handler(*lihp);
902 			if (bootverbose)
903 				printf("Linux ELF exec handler installed\n");
904 		} else
905 			printf("cannot insert Linux ELF brand handler\n");
906 		break;
907 	case MOD_UNLOAD:
908 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
909 		     ++brandinfo)
910 			if (elf32_brand_inuse(*brandinfo))
911 				error = EBUSY;
912 		if (error == 0) {
913 			for (brandinfo = &linux_brandlist[0];
914 			     *brandinfo != NULL; ++brandinfo)
915 				if (elf32_remove_brand_entry(*brandinfo) < 0)
916 					error = EINVAL;
917 		}
918 		if (error == 0) {
919 			SET_FOREACH(lihp, linux_ioctl_handler_set)
920 				linux_ioctl_unregister_handler(*lihp);
921 			if (bootverbose)
922 				printf("Linux ELF exec handler removed\n");
923 		} else
924 			printf("Could not deinstall ELF interpreter entry\n");
925 		break;
926 	default:
927 		return EOPNOTSUPP;
928 	}
929 	return error;
930 }
931 
932 static moduledata_t linux_elf_mod = {
933 	"linuxelf",
934 	linux_elf_modevent,
935 	0
936 };
937 
938 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
939