xref: /freebsd/sys/i386/linux/linux_sysvec.c (revision e40db2c46ecb75fdaf399d0a439ae31e501c097c)
1 /*-
2  * Copyright (c) 1994-1996 S�ren Schmidt
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software without specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30 
31 /* XXX we use functions that might not exist. */
32 #include "opt_compat.h"
33 
34 #ifndef COMPAT_43
35 #error "Unable to compile Linux-emulator due to missing COMPAT_43 option!"
36 #endif
37 
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/imgact.h>
41 #include <sys/imgact_aout.h>
42 #include <sys/imgact_elf.h>
43 #include <sys/lock.h>
44 #include <sys/malloc.h>
45 #include <sys/mutex.h>
46 #include <sys/proc.h>
47 #include <sys/signalvar.h>
48 #include <sys/syscallsubr.h>
49 #include <sys/sysent.h>
50 #include <sys/sysproto.h>
51 #include <sys/user.h>
52 #include <sys/vnode.h>
53 
54 #include <vm/vm.h>
55 #include <vm/vm_param.h>
56 #include <vm/vm_page.h>
57 #include <vm/vm_extern.h>
58 #include <sys/exec.h>
59 #include <sys/kernel.h>
60 #include <sys/module.h>
61 #include <machine/cpu.h>
62 #include <machine/md_var.h>
63 #include <sys/mutex.h>
64 
65 #include <vm/vm.h>
66 #include <vm/vm_param.h>
67 #include <vm/pmap.h>
68 #include <vm/vm_map.h>
69 #include <vm/vm_object.h>
70 
71 #include <i386/linux/linux.h>
72 #include <i386/linux/linux_proto.h>
73 #include <compat/linux/linux_mib.h>
74 #include <compat/linux/linux_signal.h>
75 #include <compat/linux/linux_util.h>
76 
77 MODULE_VERSION(linux, 1);
78 MODULE_DEPEND(linux, sysvmsg, 1, 1, 1);
79 MODULE_DEPEND(linux, sysvsem, 1, 1, 1);
80 MODULE_DEPEND(linux, sysvshm, 1, 1, 1);
81 
82 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
83 
84 #if BYTE_ORDER == LITTLE_ENDIAN
85 #define SHELLMAGIC      0x2123 /* #! */
86 #else
87 #define SHELLMAGIC      0x2321
88 #endif
89 
90 /*
91  * Allow the sendsig functions to use the ldebug() facility
92  * even though they are not syscalls themselves. Map them
93  * to syscall 0. This is slightly less bogus than using
94  * ldebug(sigreturn).
95  */
96 #define	LINUX_SYS_linux_rt_sendsig	0
97 #define	LINUX_SYS_linux_sendsig		0
98 
99 extern char linux_sigcode[];
100 extern int linux_szsigcode;
101 
102 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
103 
104 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
105 
106 static int	linux_fixup(register_t **stack_base,
107 		    struct image_params *iparams);
108 static int	elf_linux_fixup(register_t **stack_base,
109 		    struct image_params *iparams);
110 static void	linux_prepsyscall(struct trapframe *tf, int *args, u_int *code,
111 		    caddr_t *params);
112 static void     linux_sendsig(sig_t catcher, int sig, sigset_t *mask,
113 		    u_long code);
114 
115 /*
116  * Linux syscalls return negative errno's, we do positive and map them
117  */
118 static int bsd_to_linux_errno[ELAST + 1] = {
119 	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
120 	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
121 	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
122 	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
123 	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
124 	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
125 	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
126 	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
127 	-6, -6, -43, -42, -75, -6, -84
128 };
129 
130 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
131 	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
132 	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
133 	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS,
134 	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
135 	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
136 	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
137 	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
138 	0, LINUX_SIGUSR1, LINUX_SIGUSR2
139 };
140 
141 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
142 	SIGHUP, SIGINT, SIGQUIT, SIGILL,
143 	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
144 	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
145 	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
146 	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
147 	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
148 	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
149 	SIGIO, SIGURG, SIGSYS
150 };
151 
152 #define LINUX_T_UNKNOWN  255
153 static int _bsd_to_linux_trapcode[] = {
154 	LINUX_T_UNKNOWN,	/* 0 */
155 	6,			/* 1  T_PRIVINFLT */
156 	LINUX_T_UNKNOWN,	/* 2 */
157 	3,			/* 3  T_BPTFLT */
158 	LINUX_T_UNKNOWN,	/* 4 */
159 	LINUX_T_UNKNOWN,	/* 5 */
160 	16,			/* 6  T_ARITHTRAP */
161 	254,			/* 7  T_ASTFLT */
162 	LINUX_T_UNKNOWN,	/* 8 */
163 	13,			/* 9  T_PROTFLT */
164 	1,			/* 10 T_TRCTRAP */
165 	LINUX_T_UNKNOWN,	/* 11 */
166 	14,			/* 12 T_PAGEFLT */
167 	LINUX_T_UNKNOWN,	/* 13 */
168 	17,			/* 14 T_ALIGNFLT */
169 	LINUX_T_UNKNOWN,	/* 15 */
170 	LINUX_T_UNKNOWN,	/* 16 */
171 	LINUX_T_UNKNOWN,	/* 17 */
172 	0,			/* 18 T_DIVIDE */
173 	2,			/* 19 T_NMI */
174 	4,			/* 20 T_OFLOW */
175 	5,			/* 21 T_BOUND */
176 	7,			/* 22 T_DNA */
177 	8,			/* 23 T_DOUBLEFLT */
178 	9,			/* 24 T_FPOPFLT */
179 	10,			/* 25 T_TSSFLT */
180 	11,			/* 26 T_SEGNPFLT */
181 	12,			/* 27 T_STKFLT */
182 	18,			/* 28 T_MCHK */
183 	19,			/* 29 T_XMMFLT */
184 	15			/* 30 T_RESERVED */
185 };
186 #define bsd_to_linux_trapcode(code) \
187     ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
188      _bsd_to_linux_trapcode[(code)]: \
189      LINUX_T_UNKNOWN)
190 
191 /*
192  * If FreeBSD & Linux have a difference of opinion about what a trap
193  * means, deal with it here.
194  *
195  * MPSAFE
196  */
197 static int
198 translate_traps(int signal, int trap_code)
199 {
200 	if (signal != SIGBUS)
201 		return signal;
202 	switch (trap_code) {
203 	case T_PROTFLT:
204 	case T_TSSFLT:
205 	case T_DOUBLEFLT:
206 	case T_PAGEFLT:
207 		return SIGSEGV;
208 	default:
209 		return signal;
210 	}
211 }
212 
213 static int
214 linux_fixup(register_t **stack_base, struct image_params *imgp)
215 {
216 	register_t *argv, *envp;
217 
218 	argv = *stack_base;
219 	envp = *stack_base + (imgp->argc + 1);
220 	(*stack_base)--;
221 	**stack_base = (intptr_t)(void *)envp;
222 	(*stack_base)--;
223 	**stack_base = (intptr_t)(void *)argv;
224 	(*stack_base)--;
225 	**stack_base = imgp->argc;
226 	return 0;
227 }
228 
229 static int
230 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
231 {
232 	Elf32_Auxargs *args;
233 	register_t *pos;
234 
235 	KASSERT(curthread->td_proc == imgp->proc &&
236 	    (curthread->td_proc->p_flag & P_THREADED) == 0,
237 	    ("unsafe elf_linux_fixup(), should be curproc"));
238 	args = (Elf32_Auxargs *)imgp->auxargs;
239 	pos = *stack_base + (imgp->argc + imgp->envc + 2);
240 
241 	if (args->trace)
242 		AUXARGS_ENTRY(pos, AT_DEBUG, 1);
243 	if (args->execfd != -1)
244 		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
245 	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
246 	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
247 	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
248 	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
249 	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
250 	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
251 	AUXARGS_ENTRY(pos, AT_BASE, args->base);
252 	AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
253 	AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
254 	AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
255 	AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
256 	AUXARGS_ENTRY(pos, AT_NULL, 0);
257 
258 	free(imgp->auxargs, M_TEMP);
259 	imgp->auxargs = NULL;
260 
261 	(*stack_base)--;
262 	**stack_base = (register_t)imgp->argc;
263 	return 0;
264 }
265 
266 extern int _ucodesel, _udatasel;
267 extern unsigned long linux_sznonrtsigcode;
268 
269 static void
270 linux_rt_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
271 {
272 	struct thread *td = curthread;
273 	struct proc *p = td->td_proc;
274 	struct trapframe *regs;
275 	struct l_rt_sigframe *fp, frame;
276 	int oonstack;
277 
278 	PROC_LOCK_ASSERT(p, MA_OWNED);
279 	regs = td->td_frame;
280 	oonstack = sigonstack(regs->tf_esp);
281 
282 #ifdef DEBUG
283 	if (ldebug(rt_sendsig))
284 		printf(ARGS(rt_sendsig, "%p, %d, %p, %lu"),
285 		    catcher, sig, (void*)mask, code);
286 #endif
287 	/*
288 	 * Allocate space for the signal handler context.
289 	 */
290 	if ((p->p_flag & P_ALTSTACK) && !oonstack &&
291 	    SIGISMEMBER(p->p_sigacts->ps_sigonstack, sig)) {
292 		fp = (struct l_rt_sigframe *)(p->p_sigstk.ss_sp +
293 		    p->p_sigstk.ss_size - sizeof(struct l_rt_sigframe));
294 	} else
295 		fp = (struct l_rt_sigframe *)regs->tf_esp - 1;
296 	PROC_UNLOCK(p);
297 
298 	/*
299 	 * Build the argument list for the signal handler.
300 	 */
301 	if (p->p_sysent->sv_sigtbl)
302 		if (sig <= p->p_sysent->sv_sigsize)
303 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
304 
305 	bzero(&frame, sizeof(frame));
306 
307 	frame.sf_handler = catcher;
308 	frame.sf_sig = sig;
309 	frame.sf_siginfo = &fp->sf_si;
310 	frame.sf_ucontext = &fp->sf_sc;
311 
312 	/* Fill in POSIX parts */
313 	frame.sf_si.lsi_signo = sig;
314 	frame.sf_si.lsi_code = code;
315 	frame.sf_si.lsi_addr = (void *)regs->tf_err;
316 
317 	/*
318 	 * Build the signal context to be used by sigreturn.
319 	 */
320 	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
321 	frame.sf_sc.uc_link = NULL;		/* XXX ??? */
322 
323 	PROC_LOCK(p);
324 	frame.sf_sc.uc_stack.ss_sp = p->p_sigstk.ss_sp;
325 	frame.sf_sc.uc_stack.ss_size = p->p_sigstk.ss_size;
326 	frame.sf_sc.uc_stack.ss_flags = (p->p_flag & P_ALTSTACK)
327 	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
328 	PROC_UNLOCK(p);
329 
330 	bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
331 
332 	frame.sf_sc.uc_mcontext.sc_mask   = frame.sf_sc.uc_sigmask.__bits[0];
333 	frame.sf_sc.uc_mcontext.sc_gs     = rgs();
334 	frame.sf_sc.uc_mcontext.sc_fs     = regs->tf_fs;
335 	frame.sf_sc.uc_mcontext.sc_es     = regs->tf_es;
336 	frame.sf_sc.uc_mcontext.sc_ds     = regs->tf_ds;
337 	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_edi;
338 	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_esi;
339 	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_ebp;
340 	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_ebx;
341 	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_edx;
342 	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_ecx;
343 	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_eax;
344 	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_eip;
345 	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
346 	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
347 	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
348 	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
349 	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
350 	frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
351 
352 #ifdef DEBUG
353 	if (ldebug(rt_sendsig))
354 		printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
355 		    frame.sf_sc.uc_stack.ss_flags, p->p_sigstk.ss_sp,
356 		    p->p_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
357 #endif
358 
359 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
360 		/*
361 		 * Process has trashed its stack; give it an illegal
362 		 * instruction to halt it in its tracks.
363 		 */
364 #ifdef DEBUG
365 		if (ldebug(rt_sendsig))
366 			printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
367 			    fp, oonstack);
368 #endif
369 		PROC_LOCK(p);
370 		sigexit(td, SIGILL);
371 	}
372 
373 	/*
374 	 * Build context to run handler in.
375 	 */
376 	regs->tf_esp = (int)fp;
377 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
378 	    linux_sznonrtsigcode;
379 	regs->tf_eflags &= ~(PSL_T | PSL_VM);
380 	regs->tf_cs = _ucodesel;
381 	regs->tf_ds = _udatasel;
382 	regs->tf_es = _udatasel;
383 	regs->tf_fs = _udatasel;
384 	regs->tf_ss = _udatasel;
385 	PROC_LOCK(p);
386 }
387 
388 
389 /*
390  * Send an interrupt to process.
391  *
392  * Stack is set up to allow sigcode stored
393  * in u. to call routine, followed by kcall
394  * to sigreturn routine below.  After sigreturn
395  * resets the signal mask, the stack, and the
396  * frame pointer, it returns to the user
397  * specified pc, psl.
398  */
399 static void
400 linux_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
401 {
402 	struct thread *td = curthread;
403 	struct proc *p = td->td_proc;
404 	struct trapframe *regs;
405 	struct l_sigframe *fp, frame;
406 	l_sigset_t lmask;
407 	int oonstack, i;
408 
409 	PROC_LOCK_ASSERT(p, MA_OWNED);
410 	if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) {
411 		/* Signal handler installed with SA_SIGINFO. */
412 		linux_rt_sendsig(catcher, sig, mask, code);
413 		return;
414 	}
415 
416 	regs = td->td_frame;
417 	oonstack = sigonstack(regs->tf_esp);
418 
419 #ifdef DEBUG
420 	if (ldebug(sendsig))
421 		printf(ARGS(sendsig, "%p, %d, %p, %lu"),
422 		    catcher, sig, (void*)mask, code);
423 #endif
424 
425 	/*
426 	 * Allocate space for the signal handler context.
427 	 */
428 	if ((p->p_flag & P_ALTSTACK) && !oonstack &&
429 	    SIGISMEMBER(p->p_sigacts->ps_sigonstack, sig)) {
430 		fp = (struct l_sigframe *)(p->p_sigstk.ss_sp +
431 		    p->p_sigstk.ss_size - sizeof(struct l_sigframe));
432 	} else
433 		fp = (struct l_sigframe *)regs->tf_esp - 1;
434 	PROC_UNLOCK(p);
435 
436 	/*
437 	 * Build the argument list for the signal handler.
438 	 */
439 	if (p->p_sysent->sv_sigtbl)
440 		if (sig <= p->p_sysent->sv_sigsize)
441 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
442 
443 	bzero(&frame, sizeof(frame));
444 
445 	frame.sf_handler = catcher;
446 	frame.sf_sig = sig;
447 
448 	bsd_to_linux_sigset(mask, &lmask);
449 
450 	/*
451 	 * Build the signal context to be used by sigreturn.
452 	 */
453 	frame.sf_sc.sc_mask   = lmask.__bits[0];
454 	frame.sf_sc.sc_gs     = rgs();
455 	frame.sf_sc.sc_fs     = regs->tf_fs;
456 	frame.sf_sc.sc_es     = regs->tf_es;
457 	frame.sf_sc.sc_ds     = regs->tf_ds;
458 	frame.sf_sc.sc_edi    = regs->tf_edi;
459 	frame.sf_sc.sc_esi    = regs->tf_esi;
460 	frame.sf_sc.sc_ebp    = regs->tf_ebp;
461 	frame.sf_sc.sc_ebx    = regs->tf_ebx;
462 	frame.sf_sc.sc_edx    = regs->tf_edx;
463 	frame.sf_sc.sc_ecx    = regs->tf_ecx;
464 	frame.sf_sc.sc_eax    = regs->tf_eax;
465 	frame.sf_sc.sc_eip    = regs->tf_eip;
466 	frame.sf_sc.sc_cs     = regs->tf_cs;
467 	frame.sf_sc.sc_eflags = regs->tf_eflags;
468 	frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
469 	frame.sf_sc.sc_ss     = regs->tf_ss;
470 	frame.sf_sc.sc_err    = regs->tf_err;
471 	frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code);
472 
473 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
474 		frame.sf_extramask[i] = lmask.__bits[i+1];
475 
476 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
477 		/*
478 		 * Process has trashed its stack; give it an illegal
479 		 * instruction to halt it in its tracks.
480 		 */
481 		PROC_LOCK(p);
482 		sigexit(td, SIGILL);
483 	}
484 
485 	/*
486 	 * Build context to run handler in.
487 	 */
488 	regs->tf_esp = (int)fp;
489 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
490 	regs->tf_eflags &= ~(PSL_T | PSL_VM);
491 	regs->tf_cs = _ucodesel;
492 	regs->tf_ds = _udatasel;
493 	regs->tf_es = _udatasel;
494 	regs->tf_fs = _udatasel;
495 	regs->tf_ss = _udatasel;
496 	PROC_LOCK(p);
497 }
498 
499 /*
500  * System call to cleanup state after a signal
501  * has been taken.  Reset signal mask and
502  * stack state from context left by sendsig (above).
503  * Return to previous pc and psl as specified by
504  * context left by sendsig. Check carefully to
505  * make sure that the user has not modified the
506  * psl to gain improper privileges or to cause
507  * a machine fault.
508  */
509 int
510 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
511 {
512 	struct proc *p = td->td_proc;
513 	struct l_sigframe frame;
514 	struct trapframe *regs;
515 	l_sigset_t lmask;
516 	int eflags, i;
517 
518 	regs = td->td_frame;
519 
520 #ifdef DEBUG
521 	if (ldebug(sigreturn))
522 		printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
523 #endif
524 	/*
525 	 * The trampoline code hands us the sigframe.
526 	 * It is unsafe to keep track of it ourselves, in the event that a
527 	 * program jumps out of a signal handler.
528 	 */
529 	if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
530 		return (EFAULT);
531 
532 	/*
533 	 * Check for security violations.
534 	 */
535 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
536 	eflags = frame.sf_sc.sc_eflags;
537 	/*
538 	 * XXX do allow users to change the privileged flag PSL_RF.  The
539 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
540 	 * sometimes set it there too.  tf_eflags is kept in the signal
541 	 * context during signal handling and there is no other place
542 	 * to remember it, so the PSL_RF bit may be corrupted by the
543 	 * signal handler without us knowing.  Corruption of the PSL_RF
544 	 * bit at worst causes one more or one less debugger trap, so
545 	 * allowing it is fairly harmless.
546 	 */
547 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF))
548 		return(EINVAL);
549 
550 	/*
551 	 * Don't allow users to load a valid privileged %cs.  Let the
552 	 * hardware check for invalid selectors, excess privilege in
553 	 * other selectors, invalid %eip's and invalid %esp's.
554 	 */
555 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
556 	if (!CS_SECURE(frame.sf_sc.sc_cs)) {
557 		trapsignal(td, SIGBUS, T_PROTFLT);
558 		return(EINVAL);
559 	}
560 
561 	lmask.__bits[0] = frame.sf_sc.sc_mask;
562 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
563 		lmask.__bits[i+1] = frame.sf_extramask[i];
564 	PROC_LOCK(p);
565 	linux_to_bsd_sigset(&lmask, &td->td_sigmask);
566 	SIG_CANTMASK(td->td_sigmask);
567 	signotify(td);
568 	PROC_UNLOCK(p);
569 
570 	/*
571 	 * Restore signal context.
572 	 */
573 	/* %gs was restored by the trampoline. */
574 	regs->tf_fs     = frame.sf_sc.sc_fs;
575 	regs->tf_es     = frame.sf_sc.sc_es;
576 	regs->tf_ds     = frame.sf_sc.sc_ds;
577 	regs->tf_edi    = frame.sf_sc.sc_edi;
578 	regs->tf_esi    = frame.sf_sc.sc_esi;
579 	regs->tf_ebp    = frame.sf_sc.sc_ebp;
580 	regs->tf_ebx    = frame.sf_sc.sc_ebx;
581 	regs->tf_edx    = frame.sf_sc.sc_edx;
582 	regs->tf_ecx    = frame.sf_sc.sc_ecx;
583 	regs->tf_eax    = frame.sf_sc.sc_eax;
584 	regs->tf_eip    = frame.sf_sc.sc_eip;
585 	regs->tf_cs     = frame.sf_sc.sc_cs;
586 	regs->tf_eflags = eflags;
587 	regs->tf_esp    = frame.sf_sc.sc_esp_at_signal;
588 	regs->tf_ss     = frame.sf_sc.sc_ss;
589 
590 	return (EJUSTRETURN);
591 }
592 
593 /*
594  * System call to cleanup state after a signal
595  * has been taken.  Reset signal mask and
596  * stack state from context left by rt_sendsig (above).
597  * Return to previous pc and psl as specified by
598  * context left by sendsig. Check carefully to
599  * make sure that the user has not modified the
600  * psl to gain improper privileges or to cause
601  * a machine fault.
602  */
603 int
604 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
605 {
606 	struct proc *p = td->td_proc;
607 	struct l_ucontext uc;
608 	struct l_sigcontext *context;
609 	l_stack_t *lss;
610 	stack_t ss;
611 	struct trapframe *regs;
612 	int eflags;
613 
614 	regs = td->td_frame;
615 
616 #ifdef DEBUG
617 	if (ldebug(rt_sigreturn))
618 		printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
619 #endif
620 	/*
621 	 * The trampoline code hands us the ucontext.
622 	 * It is unsafe to keep track of it ourselves, in the event that a
623 	 * program jumps out of a signal handler.
624 	 */
625 	if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
626 		return (EFAULT);
627 
628 	context = &uc.uc_mcontext;
629 
630 	/*
631 	 * Check for security violations.
632 	 */
633 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
634 	eflags = context->sc_eflags;
635 	/*
636 	 * XXX do allow users to change the privileged flag PSL_RF.  The
637 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
638 	 * sometimes set it there too.  tf_eflags is kept in the signal
639 	 * context during signal handling and there is no other place
640 	 * to remember it, so the PSL_RF bit may be corrupted by the
641 	 * signal handler without us knowing.  Corruption of the PSL_RF
642 	 * bit at worst causes one more or one less debugger trap, so
643 	 * allowing it is fairly harmless.
644 	 */
645 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF))
646 		return(EINVAL);
647 
648 	/*
649 	 * Don't allow users to load a valid privileged %cs.  Let the
650 	 * hardware check for invalid selectors, excess privilege in
651 	 * other selectors, invalid %eip's and invalid %esp's.
652 	 */
653 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
654 	if (!CS_SECURE(context->sc_cs)) {
655 		trapsignal(td, SIGBUS, T_PROTFLT);
656 		return(EINVAL);
657 	}
658 
659 	PROC_LOCK(p);
660 	linux_to_bsd_sigset(&uc.uc_sigmask, &td->td_sigmask);
661 	SIG_CANTMASK(td->td_sigmask);
662 	signotify(td);
663 	PROC_UNLOCK(p);
664 
665 	/*
666 	 * Restore signal context
667 	 */
668 	/* %gs was restored by the trampoline. */
669 	regs->tf_fs     = context->sc_fs;
670 	regs->tf_es     = context->sc_es;
671 	regs->tf_ds     = context->sc_ds;
672 	regs->tf_edi    = context->sc_edi;
673 	regs->tf_esi    = context->sc_esi;
674 	regs->tf_ebp    = context->sc_ebp;
675 	regs->tf_ebx    = context->sc_ebx;
676 	regs->tf_edx    = context->sc_edx;
677 	regs->tf_ecx    = context->sc_ecx;
678 	regs->tf_eax    = context->sc_eax;
679 	regs->tf_eip    = context->sc_eip;
680 	regs->tf_cs     = context->sc_cs;
681 	regs->tf_eflags = eflags;
682 	regs->tf_esp    = context->sc_esp_at_signal;
683 	regs->tf_ss     = context->sc_ss;
684 
685 	/*
686 	 * call sigaltstack & ignore results..
687 	 */
688 	lss = &uc.uc_stack;
689 	ss.ss_sp = lss->ss_sp;
690 	ss.ss_size = lss->ss_size;
691 	ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
692 
693 #ifdef DEBUG
694 	if (ldebug(rt_sigreturn))
695 		printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
696 		    ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask);
697 #endif
698 	(void)kern_sigaltstack(td, &ss, NULL);
699 
700 	return (EJUSTRETURN);
701 }
702 
703 /*
704  * MPSAFE
705  */
706 static void
707 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
708 {
709 	args[0] = tf->tf_ebx;
710 	args[1] = tf->tf_ecx;
711 	args[2] = tf->tf_edx;
712 	args[3] = tf->tf_esi;
713 	args[4] = tf->tf_edi;
714 	args[5] = tf->tf_ebp;	/* Unconfirmed */
715 	*params = NULL;		/* no copyin */
716 }
717 
718 
719 
720 /*
721  * Dump core, into a file named as described in the comments for
722  * expand_name(), unless the process was setuid/setgid.
723  */
724 static int
725 linux_aout_coredump(struct thread *td, struct vnode *vp, off_t limit)
726 {
727 	struct proc *p = td->td_proc;
728 	struct ucred *cred = td->td_ucred;
729 	struct vmspace *vm = p->p_vmspace;
730 	char *tempuser;
731 	int error;
732 
733 	if (ctob((uarea_pages + kstack_pages) +
734 	    vm->vm_dsize + vm->vm_ssize) >= limit)
735 		return (EFAULT);
736 	tempuser = malloc(ctob(uarea_pages + kstack_pages), M_TEMP,
737 	    M_WAITOK | M_ZERO);
738 	if (tempuser == NULL)
739 		return (ENOMEM);
740 	PROC_LOCK(p);
741 	fill_kinfo_proc(p, &p->p_uarea->u_kproc);
742 	PROC_UNLOCK(p);
743 	bcopy(p->p_uarea, tempuser, sizeof(struct user));
744 	bcopy(td->td_frame,
745 	    tempuser + ctob(uarea_pages) +
746 	    ((caddr_t)td->td_frame - (caddr_t)td->td_kstack),
747 	    sizeof(struct trapframe));
748 	error = vn_rdwr(UIO_WRITE, vp, (caddr_t)tempuser,
749 	    ctob(uarea_pages + kstack_pages),
750 	    (off_t)0, UIO_SYSSPACE, IO_UNIT, cred, NOCRED,
751 	    (int *)NULL, td);
752 	free(tempuser, M_TEMP);
753 	if (error == 0)
754 		error = vn_rdwr(UIO_WRITE, vp, vm->vm_daddr,
755 		    (int)ctob(vm->vm_dsize),
756 		    (off_t)ctob(uarea_pages + kstack_pages), UIO_USERSPACE,
757 		    IO_UNIT | IO_DIRECT, cred, NOCRED, (int *) NULL, td);
758 	if (error == 0)
759 		error = vn_rdwr_inchunks(UIO_WRITE, vp,
760 		    (caddr_t)trunc_page(USRSTACK - ctob(vm->vm_ssize)),
761 		    round_page(ctob(vm->vm_ssize)),
762 		    (off_t)ctob(uarea_pages + kstack_pages) +
763 			ctob(vm->vm_dsize), UIO_USERSPACE,
764 		    IO_UNIT | IO_DIRECT, cred, NOCRED, (int *) NULL, td);
765 	return (error);
766 }
767 /*
768  * If a linux binary is exec'ing something, try this image activator
769  * first.  We override standard shell script execution in order to
770  * be able to modify the interpreter path.  We only do this if a linux
771  * binary is doing the exec, so we do not create an EXEC module for it.
772  */
773 static int	exec_linux_imgact_try(struct image_params *iparams);
774 
775 static int
776 exec_linux_imgact_try(struct image_params *imgp)
777 {
778     const char *head = (const char *)imgp->image_header;
779     int error = -1;
780 
781     /*
782      * The interpreter for shell scripts run from a linux binary needs
783      * to be located in /compat/linux if possible in order to recursively
784      * maintain linux path emulation.
785      */
786     if (((const short *)head)[0] == SHELLMAGIC) {
787 	    /*
788 	     * Run our normal shell image activator.  If it succeeds attempt
789 	     * to use the alternate path for the interpreter.  If an alternate
790 	     * path is found, use our stringspace to store it.
791 	     */
792 	    if ((error = exec_shell_imgact(imgp)) == 0) {
793 		    char *rpath = NULL;
794 
795 		    linux_emul_find(FIRST_THREAD_IN_PROC(imgp->proc), NULL,
796 			imgp->interpreter_name, &rpath, 0);
797 		    if (rpath != imgp->interpreter_name) {
798 			    int len = strlen(rpath) + 1;
799 
800 			    if (len <= MAXSHELLCMDLEN) {
801 				    memcpy(imgp->interpreter_name, rpath, len);
802 			    }
803 			    free(rpath, M_TEMP);
804 		    }
805 	    }
806     }
807     return(error);
808 }
809 
810 struct sysentvec linux_sysvec = {
811 	LINUX_SYS_MAXSYSCALL,
812 	linux_sysent,
813 	0xff,
814 	LINUX_SIGTBLSZ,
815 	bsd_to_linux_signal,
816 	ELAST + 1,
817 	bsd_to_linux_errno,
818 	translate_traps,
819 	linux_fixup,
820 	linux_sendsig,
821 	linux_sigcode,
822 	&linux_szsigcode,
823 	linux_prepsyscall,
824 	"Linux a.out",
825 	linux_aout_coredump,
826 	exec_linux_imgact_try,
827 	LINUX_MINSIGSTKSZ,
828 	PAGE_SIZE,
829 	VM_MIN_ADDRESS,
830 	VM_MAXUSER_ADDRESS,
831 	USRSTACK,
832 	PS_STRINGS,
833 	VM_PROT_ALL,
834 	exec_copyout_strings,
835 	exec_setregs
836 };
837 
838 struct sysentvec elf_linux_sysvec = {
839 	LINUX_SYS_MAXSYSCALL,
840 	linux_sysent,
841 	0xff,
842 	LINUX_SIGTBLSZ,
843 	bsd_to_linux_signal,
844 	ELAST + 1,
845 	bsd_to_linux_errno,
846 	translate_traps,
847 	elf_linux_fixup,
848 	linux_sendsig,
849 	linux_sigcode,
850 	&linux_szsigcode,
851 	linux_prepsyscall,
852 	"Linux ELF",
853 	elf32_coredump,
854 	exec_linux_imgact_try,
855 	LINUX_MINSIGSTKSZ,
856 	PAGE_SIZE,
857 	VM_MIN_ADDRESS,
858 	VM_MAXUSER_ADDRESS,
859 	USRSTACK,
860 	PS_STRINGS,
861 	VM_PROT_ALL,
862 	exec_copyout_strings,
863 	exec_setregs
864 };
865 
866 static Elf32_Brandinfo linux_brand = {
867 					ELFOSABI_LINUX,
868 					EM_386,
869 					"Linux",
870 					"/compat/linux",
871 					"/lib/ld-linux.so.1",
872 					&elf_linux_sysvec
873 				 };
874 
875 static Elf32_Brandinfo linux_glibc2brand = {
876 					ELFOSABI_LINUX,
877 					EM_386,
878 					"Linux",
879 					"/compat/linux",
880 					"/lib/ld-linux.so.2",
881 					&elf_linux_sysvec
882 				 };
883 
884 Elf32_Brandinfo *linux_brandlist[] = {
885 					&linux_brand,
886 					&linux_glibc2brand,
887 					NULL
888 				};
889 
890 static int
891 linux_elf_modevent(module_t mod, int type, void *data)
892 {
893 	Elf32_Brandinfo **brandinfo;
894 	int error;
895 	struct linux_ioctl_handler **lihp;
896 
897 	error = 0;
898 
899 	switch(type) {
900 	case MOD_LOAD:
901 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
902 		     ++brandinfo)
903 			if (elf32_insert_brand_entry(*brandinfo) < 0)
904 				error = EINVAL;
905 		if (error == 0) {
906 			SET_FOREACH(lihp, linux_ioctl_handler_set)
907 				linux_ioctl_register_handler(*lihp);
908 			if (bootverbose)
909 				printf("Linux ELF exec handler installed\n");
910 		} else
911 			printf("cannot insert Linux ELF brand handler\n");
912 		break;
913 	case MOD_UNLOAD:
914 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
915 		     ++brandinfo)
916 			if (elf32_brand_inuse(*brandinfo))
917 				error = EBUSY;
918 		if (error == 0) {
919 			for (brandinfo = &linux_brandlist[0];
920 			     *brandinfo != NULL; ++brandinfo)
921 				if (elf32_remove_brand_entry(*brandinfo) < 0)
922 					error = EINVAL;
923 		}
924 		if (error == 0) {
925 			SET_FOREACH(lihp, linux_ioctl_handler_set)
926 				linux_ioctl_unregister_handler(*lihp);
927 			if (bootverbose)
928 				printf("Linux ELF exec handler removed\n");
929 			linux_mib_destroy();
930 		} else
931 			printf("Could not deinstall ELF interpreter entry\n");
932 		break;
933 	default:
934 		break;
935 	}
936 	return error;
937 }
938 
939 static moduledata_t linux_elf_mod = {
940 	"linuxelf",
941 	linux_elf_modevent,
942 	0
943 };
944 
945 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
946