xref: /freebsd/sys/i386/linux/linux_sysvec.c (revision 8b3ae668b13db776ced151f20e9ad3d23eca545d)
1 /*-
2  * Copyright (c) 1994-1996 S�ren Schmidt
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software without specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/exec.h>
35 #include <sys/imgact.h>
36 #include <sys/imgact_aout.h>
37 #include <sys/imgact_elf.h>
38 #include <sys/kernel.h>
39 #include <sys/lock.h>
40 #include <sys/malloc.h>
41 #include <sys/module.h>
42 #include <sys/mutex.h>
43 #include <sys/proc.h>
44 #include <sys/signalvar.h>
45 #include <sys/syscallsubr.h>
46 #include <sys/sysent.h>
47 #include <sys/sysproto.h>
48 #include <sys/vnode.h>
49 
50 #include <vm/vm.h>
51 #include <vm/pmap.h>
52 #include <vm/vm_extern.h>
53 #include <vm/vm_map.h>
54 #include <vm/vm_object.h>
55 #include <vm/vm_page.h>
56 #include <vm/vm_param.h>
57 
58 #include <machine/cpu.h>
59 #include <machine/md_var.h>
60 #include <machine/pcb.h>
61 
62 #include <i386/linux/linux.h>
63 #include <i386/linux/linux_proto.h>
64 #include <compat/linux/linux_mib.h>
65 #include <compat/linux/linux_signal.h>
66 #include <compat/linux/linux_util.h>
67 
68 MODULE_VERSION(linux, 1);
69 
70 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
71 
72 #if BYTE_ORDER == LITTLE_ENDIAN
73 #define SHELLMAGIC      0x2123 /* #! */
74 #else
75 #define SHELLMAGIC      0x2321
76 #endif
77 
78 /*
79  * Allow the sendsig functions to use the ldebug() facility
80  * even though they are not syscalls themselves. Map them
81  * to syscall 0. This is slightly less bogus than using
82  * ldebug(sigreturn).
83  */
84 #define	LINUX_SYS_linux_rt_sendsig	0
85 #define	LINUX_SYS_linux_sendsig		0
86 
87 #define	fldcw(addr)		__asm("fldcw %0" : : "m" (*(addr)))
88 #define	__LINUX_NPXCW__		0x37f
89 
90 extern char linux_sigcode[];
91 extern int linux_szsigcode;
92 
93 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
94 
95 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
96 SET_DECLARE(linux_device_handler_set, struct linux_device_handler);
97 
98 static int	linux_fixup(register_t **stack_base,
99 		    struct image_params *iparams);
100 static int	elf_linux_fixup(register_t **stack_base,
101 		    struct image_params *iparams);
102 static void	linux_prepsyscall(struct trapframe *tf, int *args, u_int *code,
103 		    caddr_t *params);
104 static void     linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
105 static void	exec_linux_setregs(struct thread *td, u_long entry,
106 				   u_long stack, u_long ps_strings);
107 
108 /*
109  * Linux syscalls return negative errno's, we do positive and map them
110  * Reference:
111  *   FreeBSD: src/sys/sys/errno.h
112  *   Linux:   linux-2.6.17.8/include/asm-generic/errno-base.h
113  *            linux-2.6.17.8/include/asm-generic/errno.h
114  */
115 static int bsd_to_linux_errno[ELAST + 1] = {
116 	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
117 	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
118 	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
119 	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
120 	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
121 	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
122 	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
123 	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
124 	  -6,  -6, -43, -42, -75,-125, -84, -95, -16, -74,
125 	 -72, -67, -71
126 };
127 
128 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
129 	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
130 	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
131 	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
132 	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
133 	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
134 	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
135 	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
136 	0, LINUX_SIGUSR1, LINUX_SIGUSR2
137 };
138 
139 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
140 	SIGHUP, SIGINT, SIGQUIT, SIGILL,
141 	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
142 	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
143 	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
144 	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
145 	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
146 	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
147 	SIGIO, SIGURG, SIGSYS
148 };
149 
150 #define LINUX_T_UNKNOWN  255
151 static int _bsd_to_linux_trapcode[] = {
152 	LINUX_T_UNKNOWN,	/* 0 */
153 	6,			/* 1  T_PRIVINFLT */
154 	LINUX_T_UNKNOWN,	/* 2 */
155 	3,			/* 3  T_BPTFLT */
156 	LINUX_T_UNKNOWN,	/* 4 */
157 	LINUX_T_UNKNOWN,	/* 5 */
158 	16,			/* 6  T_ARITHTRAP */
159 	254,			/* 7  T_ASTFLT */
160 	LINUX_T_UNKNOWN,	/* 8 */
161 	13,			/* 9  T_PROTFLT */
162 	1,			/* 10 T_TRCTRAP */
163 	LINUX_T_UNKNOWN,	/* 11 */
164 	14,			/* 12 T_PAGEFLT */
165 	LINUX_T_UNKNOWN,	/* 13 */
166 	17,			/* 14 T_ALIGNFLT */
167 	LINUX_T_UNKNOWN,	/* 15 */
168 	LINUX_T_UNKNOWN,	/* 16 */
169 	LINUX_T_UNKNOWN,	/* 17 */
170 	0,			/* 18 T_DIVIDE */
171 	2,			/* 19 T_NMI */
172 	4,			/* 20 T_OFLOW */
173 	5,			/* 21 T_BOUND */
174 	7,			/* 22 T_DNA */
175 	8,			/* 23 T_DOUBLEFLT */
176 	9,			/* 24 T_FPOPFLT */
177 	10,			/* 25 T_TSSFLT */
178 	11,			/* 26 T_SEGNPFLT */
179 	12,			/* 27 T_STKFLT */
180 	18,			/* 28 T_MCHK */
181 	19,			/* 29 T_XMMFLT */
182 	15			/* 30 T_RESERVED */
183 };
184 #define bsd_to_linux_trapcode(code) \
185     ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
186      _bsd_to_linux_trapcode[(code)]: \
187      LINUX_T_UNKNOWN)
188 
189 /*
190  * If FreeBSD & Linux have a difference of opinion about what a trap
191  * means, deal with it here.
192  *
193  * MPSAFE
194  */
195 static int
196 translate_traps(int signal, int trap_code)
197 {
198 	if (signal != SIGBUS)
199 		return signal;
200 	switch (trap_code) {
201 	case T_PROTFLT:
202 	case T_TSSFLT:
203 	case T_DOUBLEFLT:
204 	case T_PAGEFLT:
205 		return SIGSEGV;
206 	default:
207 		return signal;
208 	}
209 }
210 
211 static int
212 linux_fixup(register_t **stack_base, struct image_params *imgp)
213 {
214 	register_t *argv, *envp;
215 
216 	argv = *stack_base;
217 	envp = *stack_base + (imgp->args->argc + 1);
218 	(*stack_base)--;
219 	**stack_base = (intptr_t)(void *)envp;
220 	(*stack_base)--;
221 	**stack_base = (intptr_t)(void *)argv;
222 	(*stack_base)--;
223 	**stack_base = imgp->args->argc;
224 	return 0;
225 }
226 
227 static int
228 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
229 {
230 	Elf32_Auxargs *args;
231 	register_t *pos;
232 
233 	KASSERT(curthread->td_proc == imgp->proc &&
234 	    (curthread->td_proc->p_flag & P_SA) == 0,
235 	    ("unsafe elf_linux_fixup(), should be curproc"));
236 	args = (Elf32_Auxargs *)imgp->auxargs;
237 	pos = *stack_base + (imgp->args->argc + imgp->args->envc + 2);
238 
239 	if (args->trace)
240 		AUXARGS_ENTRY(pos, AT_DEBUG, 1);
241 	if (args->execfd != -1)
242 		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
243 	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
244 	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
245 	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
246 	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
247 	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
248 	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
249 	AUXARGS_ENTRY(pos, AT_BASE, args->base);
250 	AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
251 	AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
252 	AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
253 	AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
254 	AUXARGS_ENTRY(pos, AT_NULL, 0);
255 
256 	free(imgp->auxargs, M_TEMP);
257 	imgp->auxargs = NULL;
258 
259 	(*stack_base)--;
260 	**stack_base = (register_t)imgp->args->argc;
261 	return 0;
262 }
263 
264 extern int _ucodesel, _udatasel;
265 extern unsigned long linux_sznonrtsigcode;
266 
267 static void
268 linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
269 {
270 	struct thread *td = curthread;
271 	struct proc *p = td->td_proc;
272 	struct sigacts *psp;
273 	struct trapframe *regs;
274 	struct l_rt_sigframe *fp, frame;
275 	int sig, code;
276 	int oonstack;
277 
278 	sig = ksi->ksi_signo;
279 	code = ksi->ksi_code;
280 	PROC_LOCK_ASSERT(p, MA_OWNED);
281 	psp = p->p_sigacts;
282 	mtx_assert(&psp->ps_mtx, MA_OWNED);
283 	regs = td->td_frame;
284 	oonstack = sigonstack(regs->tf_esp);
285 
286 #ifdef DEBUG
287 	if (ldebug(rt_sendsig))
288 		printf(ARGS(rt_sendsig, "%p, %d, %p, %u"),
289 		    catcher, sig, (void*)mask, code);
290 #endif
291 	/*
292 	 * Allocate space for the signal handler context.
293 	 */
294 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
295 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
296 		fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp +
297 		    td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
298 	} else
299 		fp = (struct l_rt_sigframe *)regs->tf_esp - 1;
300 	mtx_unlock(&psp->ps_mtx);
301 
302 	/*
303 	 * Build the argument list for the signal handler.
304 	 */
305 	if (p->p_sysent->sv_sigtbl)
306 		if (sig <= p->p_sysent->sv_sigsize)
307 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
308 
309 	bzero(&frame, sizeof(frame));
310 
311 	frame.sf_handler = catcher;
312 	frame.sf_sig = sig;
313 	frame.sf_siginfo = &fp->sf_si;
314 	frame.sf_ucontext = &fp->sf_sc;
315 
316 	/* Fill in POSIX parts */
317 	frame.sf_si.lsi_signo = sig;
318 	frame.sf_si.lsi_code = code;
319 	frame.sf_si.lsi_addr = ksi->ksi_addr;
320 
321 	/*
322 	 * Build the signal context to be used by sigreturn.
323 	 */
324 	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
325 	frame.sf_sc.uc_link = NULL;		/* XXX ??? */
326 
327 	frame.sf_sc.uc_stack.ss_sp = td->td_sigstk.ss_sp;
328 	frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size;
329 	frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
330 	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
331 	PROC_UNLOCK(p);
332 
333 	bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
334 
335 	frame.sf_sc.uc_mcontext.sc_mask   = frame.sf_sc.uc_sigmask.__bits[0];
336 	frame.sf_sc.uc_mcontext.sc_gs     = rgs();
337 	frame.sf_sc.uc_mcontext.sc_fs     = regs->tf_fs;
338 	frame.sf_sc.uc_mcontext.sc_es     = regs->tf_es;
339 	frame.sf_sc.uc_mcontext.sc_ds     = regs->tf_ds;
340 	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_edi;
341 	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_esi;
342 	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_ebp;
343 	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_ebx;
344 	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_edx;
345 	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_ecx;
346 	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_eax;
347 	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_eip;
348 	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
349 	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
350 	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
351 	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
352 	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
353 	frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
354 
355 #ifdef DEBUG
356 	if (ldebug(rt_sendsig))
357 		printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
358 		    frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp,
359 		    td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
360 #endif
361 
362 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
363 		/*
364 		 * Process has trashed its stack; give it an illegal
365 		 * instruction to halt it in its tracks.
366 		 */
367 #ifdef DEBUG
368 		if (ldebug(rt_sendsig))
369 			printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
370 			    fp, oonstack);
371 #endif
372 		PROC_LOCK(p);
373 		sigexit(td, SIGILL);
374 	}
375 
376 	/*
377 	 * Build context to run handler in.
378 	 */
379 	regs->tf_esp = (int)fp;
380 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
381 	    linux_sznonrtsigcode;
382 	regs->tf_eflags &= ~(PSL_T | PSL_VM);
383 	regs->tf_cs = _ucodesel;
384 	regs->tf_ds = _udatasel;
385 	regs->tf_es = _udatasel;
386 	regs->tf_fs = _udatasel;
387 	regs->tf_ss = _udatasel;
388 	PROC_LOCK(p);
389 	mtx_lock(&psp->ps_mtx);
390 }
391 
392 
393 /*
394  * Send an interrupt to process.
395  *
396  * Stack is set up to allow sigcode stored
397  * in u. to call routine, followed by kcall
398  * to sigreturn routine below.  After sigreturn
399  * resets the signal mask, the stack, and the
400  * frame pointer, it returns to the user
401  * specified pc, psl.
402  */
403 static void
404 linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
405 {
406 	struct thread *td = curthread;
407 	struct proc *p = td->td_proc;
408 	struct sigacts *psp;
409 	struct trapframe *regs;
410 	struct l_sigframe *fp, frame;
411 	l_sigset_t lmask;
412 	int sig, code;
413 	int oonstack, i;
414 
415 	PROC_LOCK_ASSERT(p, MA_OWNED);
416 	psp = p->p_sigacts;
417 	sig = ksi->ksi_signo;
418 	code = ksi->ksi_code;
419 	mtx_assert(&psp->ps_mtx, MA_OWNED);
420 	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
421 		/* Signal handler installed with SA_SIGINFO. */
422 		linux_rt_sendsig(catcher, ksi, mask);
423 		return;
424 	}
425 	regs = td->td_frame;
426 	oonstack = sigonstack(regs->tf_esp);
427 
428 #ifdef DEBUG
429 	if (ldebug(sendsig))
430 		printf(ARGS(sendsig, "%p, %d, %p, %u"),
431 		    catcher, sig, (void*)mask, code);
432 #endif
433 
434 	/*
435 	 * Allocate space for the signal handler context.
436 	 */
437 	if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
438 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
439 		fp = (struct l_sigframe *)(td->td_sigstk.ss_sp +
440 		    td->td_sigstk.ss_size - sizeof(struct l_sigframe));
441 	} else
442 		fp = (struct l_sigframe *)regs->tf_esp - 1;
443 	mtx_unlock(&psp->ps_mtx);
444 	PROC_UNLOCK(p);
445 
446 	/*
447 	 * Build the argument list for the signal handler.
448 	 */
449 	if (p->p_sysent->sv_sigtbl)
450 		if (sig <= p->p_sysent->sv_sigsize)
451 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
452 
453 	bzero(&frame, sizeof(frame));
454 
455 	frame.sf_handler = catcher;
456 	frame.sf_sig = sig;
457 
458 	bsd_to_linux_sigset(mask, &lmask);
459 
460 	/*
461 	 * Build the signal context to be used by sigreturn.
462 	 */
463 	frame.sf_sc.sc_mask   = lmask.__bits[0];
464 	frame.sf_sc.sc_gs     = rgs();
465 	frame.sf_sc.sc_fs     = regs->tf_fs;
466 	frame.sf_sc.sc_es     = regs->tf_es;
467 	frame.sf_sc.sc_ds     = regs->tf_ds;
468 	frame.sf_sc.sc_edi    = regs->tf_edi;
469 	frame.sf_sc.sc_esi    = regs->tf_esi;
470 	frame.sf_sc.sc_ebp    = regs->tf_ebp;
471 	frame.sf_sc.sc_ebx    = regs->tf_ebx;
472 	frame.sf_sc.sc_edx    = regs->tf_edx;
473 	frame.sf_sc.sc_ecx    = regs->tf_ecx;
474 	frame.sf_sc.sc_eax    = regs->tf_eax;
475 	frame.sf_sc.sc_eip    = regs->tf_eip;
476 	frame.sf_sc.sc_cs     = regs->tf_cs;
477 	frame.sf_sc.sc_eflags = regs->tf_eflags;
478 	frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
479 	frame.sf_sc.sc_ss     = regs->tf_ss;
480 	frame.sf_sc.sc_err    = regs->tf_err;
481 	frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(ksi->ksi_trapno);
482 
483 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
484 		frame.sf_extramask[i] = lmask.__bits[i+1];
485 
486 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
487 		/*
488 		 * Process has trashed its stack; give it an illegal
489 		 * instruction to halt it in its tracks.
490 		 */
491 		PROC_LOCK(p);
492 		sigexit(td, SIGILL);
493 	}
494 
495 	/*
496 	 * Build context to run handler in.
497 	 */
498 	regs->tf_esp = (int)fp;
499 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
500 	regs->tf_eflags &= ~(PSL_T | PSL_VM);
501 	regs->tf_cs = _ucodesel;
502 	regs->tf_ds = _udatasel;
503 	regs->tf_es = _udatasel;
504 	regs->tf_fs = _udatasel;
505 	regs->tf_ss = _udatasel;
506 	PROC_LOCK(p);
507 	mtx_lock(&psp->ps_mtx);
508 }
509 
510 /*
511  * System call to cleanup state after a signal
512  * has been taken.  Reset signal mask and
513  * stack state from context left by sendsig (above).
514  * Return to previous pc and psl as specified by
515  * context left by sendsig. Check carefully to
516  * make sure that the user has not modified the
517  * psl to gain improper privileges or to cause
518  * a machine fault.
519  */
520 int
521 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
522 {
523 	struct proc *p = td->td_proc;
524 	struct l_sigframe frame;
525 	struct trapframe *regs;
526 	l_sigset_t lmask;
527 	int eflags, i;
528 	ksiginfo_t ksi;
529 
530 	regs = td->td_frame;
531 
532 #ifdef DEBUG
533 	if (ldebug(sigreturn))
534 		printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
535 #endif
536 	/*
537 	 * The trampoline code hands us the sigframe.
538 	 * It is unsafe to keep track of it ourselves, in the event that a
539 	 * program jumps out of a signal handler.
540 	 */
541 	if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
542 		return (EFAULT);
543 
544 	/*
545 	 * Check for security violations.
546 	 */
547 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
548 	eflags = frame.sf_sc.sc_eflags;
549 	/*
550 	 * XXX do allow users to change the privileged flag PSL_RF.  The
551 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
552 	 * sometimes set it there too.  tf_eflags is kept in the signal
553 	 * context during signal handling and there is no other place
554 	 * to remember it, so the PSL_RF bit may be corrupted by the
555 	 * signal handler without us knowing.  Corruption of the PSL_RF
556 	 * bit at worst causes one more or one less debugger trap, so
557 	 * allowing it is fairly harmless.
558 	 */
559 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF))
560 		return(EINVAL);
561 
562 	/*
563 	 * Don't allow users to load a valid privileged %cs.  Let the
564 	 * hardware check for invalid selectors, excess privilege in
565 	 * other selectors, invalid %eip's and invalid %esp's.
566 	 */
567 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
568 	if (!CS_SECURE(frame.sf_sc.sc_cs)) {
569 		ksiginfo_init_trap(&ksi);
570 		ksi.ksi_signo = SIGBUS;
571 		ksi.ksi_code = BUS_OBJERR;
572 		ksi.ksi_trapno = T_PROTFLT;
573 		ksi.ksi_addr = (void *)regs->tf_eip;
574 		trapsignal(td, &ksi);
575 		return(EINVAL);
576 	}
577 
578 	lmask.__bits[0] = frame.sf_sc.sc_mask;
579 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
580 		lmask.__bits[i+1] = frame.sf_extramask[i];
581 	PROC_LOCK(p);
582 	linux_to_bsd_sigset(&lmask, &td->td_sigmask);
583 	SIG_CANTMASK(td->td_sigmask);
584 	signotify(td);
585 	PROC_UNLOCK(p);
586 
587 	/*
588 	 * Restore signal context.
589 	 */
590 	/* %gs was restored by the trampoline. */
591 	regs->tf_fs     = frame.sf_sc.sc_fs;
592 	regs->tf_es     = frame.sf_sc.sc_es;
593 	regs->tf_ds     = frame.sf_sc.sc_ds;
594 	regs->tf_edi    = frame.sf_sc.sc_edi;
595 	regs->tf_esi    = frame.sf_sc.sc_esi;
596 	regs->tf_ebp    = frame.sf_sc.sc_ebp;
597 	regs->tf_ebx    = frame.sf_sc.sc_ebx;
598 	regs->tf_edx    = frame.sf_sc.sc_edx;
599 	regs->tf_ecx    = frame.sf_sc.sc_ecx;
600 	regs->tf_eax    = frame.sf_sc.sc_eax;
601 	regs->tf_eip    = frame.sf_sc.sc_eip;
602 	regs->tf_cs     = frame.sf_sc.sc_cs;
603 	regs->tf_eflags = eflags;
604 	regs->tf_esp    = frame.sf_sc.sc_esp_at_signal;
605 	regs->tf_ss     = frame.sf_sc.sc_ss;
606 
607 	return (EJUSTRETURN);
608 }
609 
610 /*
611  * System call to cleanup state after a signal
612  * has been taken.  Reset signal mask and
613  * stack state from context left by rt_sendsig (above).
614  * Return to previous pc and psl as specified by
615  * context left by sendsig. Check carefully to
616  * make sure that the user has not modified the
617  * psl to gain improper privileges or to cause
618  * a machine fault.
619  */
620 int
621 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
622 {
623 	struct proc *p = td->td_proc;
624 	struct l_ucontext uc;
625 	struct l_sigcontext *context;
626 	l_stack_t *lss;
627 	stack_t ss;
628 	struct trapframe *regs;
629 	int eflags;
630 	ksiginfo_t ksi;
631 
632 	regs = td->td_frame;
633 
634 #ifdef DEBUG
635 	if (ldebug(rt_sigreturn))
636 		printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
637 #endif
638 	/*
639 	 * The trampoline code hands us the ucontext.
640 	 * It is unsafe to keep track of it ourselves, in the event that a
641 	 * program jumps out of a signal handler.
642 	 */
643 	if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
644 		return (EFAULT);
645 
646 	context = &uc.uc_mcontext;
647 
648 	/*
649 	 * Check for security violations.
650 	 */
651 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
652 	eflags = context->sc_eflags;
653 	/*
654 	 * XXX do allow users to change the privileged flag PSL_RF.  The
655 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
656 	 * sometimes set it there too.  tf_eflags is kept in the signal
657 	 * context during signal handling and there is no other place
658 	 * to remember it, so the PSL_RF bit may be corrupted by the
659 	 * signal handler without us knowing.  Corruption of the PSL_RF
660 	 * bit at worst causes one more or one less debugger trap, so
661 	 * allowing it is fairly harmless.
662 	 */
663 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF))
664 		return(EINVAL);
665 
666 	/*
667 	 * Don't allow users to load a valid privileged %cs.  Let the
668 	 * hardware check for invalid selectors, excess privilege in
669 	 * other selectors, invalid %eip's and invalid %esp's.
670 	 */
671 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
672 	if (!CS_SECURE(context->sc_cs)) {
673 		ksiginfo_init_trap(&ksi);
674 		ksi.ksi_signo = SIGBUS;
675 		ksi.ksi_code = BUS_OBJERR;
676 		ksi.ksi_trapno = T_PROTFLT;
677 		ksi.ksi_addr = (void *)regs->tf_eip;
678 		trapsignal(td, &ksi);
679 		return(EINVAL);
680 	}
681 
682 	PROC_LOCK(p);
683 	linux_to_bsd_sigset(&uc.uc_sigmask, &td->td_sigmask);
684 	SIG_CANTMASK(td->td_sigmask);
685 	signotify(td);
686 	PROC_UNLOCK(p);
687 
688 	/*
689 	 * Restore signal context
690 	 */
691 	/* %gs was restored by the trampoline. */
692 	regs->tf_fs     = context->sc_fs;
693 	regs->tf_es     = context->sc_es;
694 	regs->tf_ds     = context->sc_ds;
695 	regs->tf_edi    = context->sc_edi;
696 	regs->tf_esi    = context->sc_esi;
697 	regs->tf_ebp    = context->sc_ebp;
698 	regs->tf_ebx    = context->sc_ebx;
699 	regs->tf_edx    = context->sc_edx;
700 	regs->tf_ecx    = context->sc_ecx;
701 	regs->tf_eax    = context->sc_eax;
702 	regs->tf_eip    = context->sc_eip;
703 	regs->tf_cs     = context->sc_cs;
704 	regs->tf_eflags = eflags;
705 	regs->tf_esp    = context->sc_esp_at_signal;
706 	regs->tf_ss     = context->sc_ss;
707 
708 	/*
709 	 * call sigaltstack & ignore results..
710 	 */
711 	lss = &uc.uc_stack;
712 	ss.ss_sp = lss->ss_sp;
713 	ss.ss_size = lss->ss_size;
714 	ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
715 
716 #ifdef DEBUG
717 	if (ldebug(rt_sigreturn))
718 		printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
719 		    ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
720 #endif
721 	(void)kern_sigaltstack(td, &ss, NULL);
722 
723 	return (EJUSTRETURN);
724 }
725 
726 /*
727  * MPSAFE
728  */
729 static void
730 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
731 {
732 	args[0] = tf->tf_ebx;
733 	args[1] = tf->tf_ecx;
734 	args[2] = tf->tf_edx;
735 	args[3] = tf->tf_esi;
736 	args[4] = tf->tf_edi;
737 	args[5] = tf->tf_ebp;	/* Unconfirmed */
738 	*params = NULL;		/* no copyin */
739 }
740 
741 /*
742  * If a linux binary is exec'ing something, try this image activator
743  * first.  We override standard shell script execution in order to
744  * be able to modify the interpreter path.  We only do this if a linux
745  * binary is doing the exec, so we do not create an EXEC module for it.
746  */
747 static int	exec_linux_imgact_try(struct image_params *iparams);
748 
749 static int
750 exec_linux_imgact_try(struct image_params *imgp)
751 {
752     const char *head = (const char *)imgp->image_header;
753     char *rpath;
754     int error = -1, len;
755 
756     /*
757      * The interpreter for shell scripts run from a linux binary needs
758      * to be located in /compat/linux if possible in order to recursively
759      * maintain linux path emulation.
760      */
761     if (((const short *)head)[0] == SHELLMAGIC) {
762 	    /*
763 	     * Run our normal shell image activator.  If it succeeds attempt
764 	     * to use the alternate path for the interpreter.  If an alternate
765 	     * path is found, use our stringspace to store it.
766 	     */
767 	    if ((error = exec_shell_imgact(imgp)) == 0) {
768 		    linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
769 			imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0);
770 		    if (rpath != NULL) {
771 			    len = strlen(rpath) + 1;
772 
773 			    if (len <= MAXSHELLCMDLEN) {
774 				    memcpy(imgp->interpreter_name, rpath, len);
775 			    }
776 			    free(rpath, M_TEMP);
777 		    }
778 	    }
779     }
780     return(error);
781 }
782 
783 /*
784  * exec_setregs may initialize some registers differently than Linux
785  * does, thus potentially confusing Linux binaries. If necessary, we
786  * override the exec_setregs default(s) here.
787  */
788 static void
789 exec_linux_setregs(struct thread *td, u_long entry,
790 		   u_long stack, u_long ps_strings)
791 {
792 	static const u_short control = __LINUX_NPXCW__;
793 	struct pcb *pcb = td->td_pcb;
794 
795 	exec_setregs(td, entry, stack, ps_strings);
796 
797 	/* Linux sets %gs to 0, we default to _udatasel */
798 	pcb->pcb_gs = 0; load_gs(0);
799 
800 	/* Linux sets the i387 to extended precision. */
801 	fldcw(&control);
802 }
803 
804 struct sysentvec linux_sysvec = {
805 	LINUX_SYS_MAXSYSCALL,
806 	linux_sysent,
807 	0xff,
808 	LINUX_SIGTBLSZ,
809 	bsd_to_linux_signal,
810 	ELAST + 1,
811 	bsd_to_linux_errno,
812 	translate_traps,
813 	linux_fixup,
814 	linux_sendsig,
815 	linux_sigcode,
816 	&linux_szsigcode,
817 	linux_prepsyscall,
818 	"Linux a.out",
819 	NULL,
820 	exec_linux_imgact_try,
821 	LINUX_MINSIGSTKSZ,
822 	PAGE_SIZE,
823 	VM_MIN_ADDRESS,
824 	VM_MAXUSER_ADDRESS,
825 	USRSTACK,
826 	PS_STRINGS,
827 	VM_PROT_ALL,
828 	exec_copyout_strings,
829 	exec_linux_setregs,
830 	NULL
831 };
832 
833 struct sysentvec elf_linux_sysvec = {
834 	LINUX_SYS_MAXSYSCALL,
835 	linux_sysent,
836 	0xff,
837 	LINUX_SIGTBLSZ,
838 	bsd_to_linux_signal,
839 	ELAST + 1,
840 	bsd_to_linux_errno,
841 	translate_traps,
842 	elf_linux_fixup,
843 	linux_sendsig,
844 	linux_sigcode,
845 	&linux_szsigcode,
846 	linux_prepsyscall,
847 	"Linux ELF",
848 	elf32_coredump,
849 	exec_linux_imgact_try,
850 	LINUX_MINSIGSTKSZ,
851 	PAGE_SIZE,
852 	VM_MIN_ADDRESS,
853 	VM_MAXUSER_ADDRESS,
854 	USRSTACK,
855 	PS_STRINGS,
856 	VM_PROT_ALL,
857 	exec_copyout_strings,
858 	exec_linux_setregs,
859 	NULL
860 };
861 
862 static Elf32_Brandinfo linux_brand = {
863 					ELFOSABI_LINUX,
864 					EM_386,
865 					"Linux",
866 					"/compat/linux",
867 					"/lib/ld-linux.so.1",
868 					&elf_linux_sysvec,
869 					NULL,
870 					BI_CAN_EXEC_DYN,
871 				 };
872 
873 static Elf32_Brandinfo linux_glibc2brand = {
874 					ELFOSABI_LINUX,
875 					EM_386,
876 					"Linux",
877 					"/compat/linux",
878 					"/lib/ld-linux.so.2",
879 					&elf_linux_sysvec,
880 					NULL,
881 					BI_CAN_EXEC_DYN,
882 				 };
883 
884 Elf32_Brandinfo *linux_brandlist[] = {
885 					&linux_brand,
886 					&linux_glibc2brand,
887 					NULL
888 				};
889 
890 static int
891 linux_elf_modevent(module_t mod, int type, void *data)
892 {
893 	Elf32_Brandinfo **brandinfo;
894 	int error;
895 	struct linux_ioctl_handler **lihp;
896 	struct linux_device_handler **ldhp;
897 
898 	error = 0;
899 
900 	switch(type) {
901 	case MOD_LOAD:
902 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
903 		     ++brandinfo)
904 			if (elf32_insert_brand_entry(*brandinfo) < 0)
905 				error = EINVAL;
906 		if (error == 0) {
907 			SET_FOREACH(lihp, linux_ioctl_handler_set)
908 				linux_ioctl_register_handler(*lihp);
909 			SET_FOREACH(ldhp, linux_device_handler_set)
910 				linux_device_register_handler(*ldhp);
911 			if (bootverbose)
912 				printf("Linux ELF exec handler installed\n");
913 		} else
914 			printf("cannot insert Linux ELF brand handler\n");
915 		break;
916 	case MOD_UNLOAD:
917 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
918 		     ++brandinfo)
919 			if (elf32_brand_inuse(*brandinfo))
920 				error = EBUSY;
921 		if (error == 0) {
922 			for (brandinfo = &linux_brandlist[0];
923 			     *brandinfo != NULL; ++brandinfo)
924 				if (elf32_remove_brand_entry(*brandinfo) < 0)
925 					error = EINVAL;
926 		}
927 		if (error == 0) {
928 			SET_FOREACH(lihp, linux_ioctl_handler_set)
929 				linux_ioctl_unregister_handler(*lihp);
930 			SET_FOREACH(ldhp, linux_device_handler_set)
931 				linux_device_unregister_handler(*ldhp);
932 			if (bootverbose)
933 				printf("Linux ELF exec handler removed\n");
934 		} else
935 			printf("Could not deinstall ELF interpreter entry\n");
936 		break;
937 	default:
938 		return EOPNOTSUPP;
939 	}
940 	return error;
941 }
942 
943 static moduledata_t linux_elf_mod = {
944 	"linuxelf",
945 	linux_elf_modevent,
946 	0
947 };
948 
949 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
950