xref: /freebsd/sys/i386/linux/linux_sysvec.c (revision ee2ea5ceafed78a5bd9810beb9e3ca927180c226)
1 /*-
2  * Copyright (c) 1994-1996 S�ren Schmidt
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software withough specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30 
31 /* XXX we use functions that might not exist. */
32 #include "opt_compat.h"
33 
34 #ifndef COMPAT_43
35 #error "Unable to compile Linux-emulator due to missing COMPAT_43 option!"
36 #endif
37 
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/imgact.h>
41 #include <sys/imgact_aout.h>
42 #include <sys/imgact_elf.h>
43 #include <sys/lock.h>
44 #include <sys/malloc.h>
45 #include <sys/mutex.h>
46 #include <sys/proc.h>
47 #include <sys/signalvar.h>
48 #include <sys/sysent.h>
49 #include <sys/sysproto.h>
50 
51 #include <vm/vm.h>
52 #include <vm/vm_param.h>
53 #include <vm/vm_page.h>
54 #include <vm/vm_extern.h>
55 #include <sys/exec.h>
56 #include <sys/kernel.h>
57 #include <sys/module.h>
58 #include <machine/cpu.h>
59 #include <sys/mutex.h>
60 
61 #include <i386/linux/linux.h>
62 #include <i386/linux/linux_proto.h>
63 #include <compat/linux/linux_signal.h>
64 #include <compat/linux/linux_util.h>
65 
66 MODULE_VERSION(linux, 1);
67 MODULE_DEPEND(linux, sysvmsg, 1, 1, 1);
68 MODULE_DEPEND(linux, sysvsem, 1, 1, 1);
69 MODULE_DEPEND(linux, sysvshm, 1, 1, 1);
70 
71 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
72 
73 #if BYTE_ORDER == LITTLE_ENDIAN
74 #define SHELLMAGIC      0x2123 /* #! */
75 #else
76 #define SHELLMAGIC      0x2321
77 #endif
78 
79 /*
80  * Allow the sendsig functions to use the ldebug() facility
81  * even though they are not syscalls themselves. Map them
82  * to syscall 0. This is slightly less bogus than using
83  * ldebug(sigreturn).
84  */
85 #define	LINUX_SYS_linux_rt_sendsig	0
86 #define	LINUX_SYS_linux_sendsig		0
87 
88 extern char linux_sigcode[];
89 extern int linux_szsigcode;
90 
91 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
92 
93 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
94 
95 static int	linux_fixup(register_t **stack_base,
96 		    struct image_params *iparams);
97 static int	elf_linux_fixup(register_t **stack_base,
98 		    struct image_params *iparams);
99 static void	linux_prepsyscall(struct trapframe *tf, int *args, u_int *code,
100 		    caddr_t *params);
101 static void     linux_sendsig(sig_t catcher, int sig, sigset_t *mask,
102 		    u_long code);
103 
104 /*
105  * Linux syscalls return negative errno's, we do positive and map them
106  */
107 static int bsd_to_linux_errno[ELAST + 1] = {
108   	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
109  	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
110  	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
111  	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
112  	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
113 	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
114 	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
115 	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
116   	-6, -6, -43, -42, -75, -6, -84
117 };
118 
119 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
120 	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
121 	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
122 	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, 0,
123 	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
124 	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
125 	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
126 	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
127 	0, LINUX_SIGUSR1, LINUX_SIGUSR2
128 };
129 
130 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
131 	SIGHUP, SIGINT, SIGQUIT, SIGILL,
132 	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
133 	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
134 	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
135 	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
136 	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
137 	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
138 	SIGIO, SIGURG, 0
139 };
140 
141 #define LINUX_T_UNKNOWN  255
142 static int _bsd_to_linux_trapcode[] = {
143 	LINUX_T_UNKNOWN,	/* 0 */
144 	6,			/* 1  T_PRIVINFLT */
145 	LINUX_T_UNKNOWN,	/* 2 */
146 	3,			/* 3  T_BPTFLT */
147 	LINUX_T_UNKNOWN,	/* 4 */
148 	LINUX_T_UNKNOWN,	/* 5 */
149 	16,			/* 6  T_ARITHTRAP */
150 	254,			/* 7  T_ASTFLT */
151 	LINUX_T_UNKNOWN,	/* 8 */
152 	13,			/* 9  T_PROTFLT */
153 	1,			/* 10 T_TRCTRAP */
154 	LINUX_T_UNKNOWN,	/* 11 */
155 	14,			/* 12 T_PAGEFLT */
156 	LINUX_T_UNKNOWN,	/* 13 */
157 	17,			/* 14 T_ALIGNFLT */
158 	LINUX_T_UNKNOWN,	/* 15 */
159 	LINUX_T_UNKNOWN,	/* 16 */
160 	LINUX_T_UNKNOWN,	/* 17 */
161 	0,			/* 18 T_DIVIDE */
162 	2,			/* 19 T_NMI */
163 	4,			/* 20 T_OFLOW */
164 	5,			/* 21 T_BOUND */
165 	7,			/* 22 T_DNA */
166 	8,			/* 23 T_DOUBLEFLT */
167 	9,			/* 24 T_FPOPFLT */
168 	10,			/* 25 T_TSSFLT */
169 	11,			/* 26 T_SEGNPFLT */
170 	12,			/* 27 T_STKFLT */
171 	18,			/* 28 T_MCHK */
172 	19,			/* 29 T_XMMFLT */
173 	15			/* 30 T_RESERVED */
174 };
175 #define bsd_to_linux_trapcode(code) \
176     ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
177      _bsd_to_linux_trapcode[(code)]: \
178      LINUX_T_UNKNOWN)
179 
180 /*
181  * If FreeBSD & Linux have a difference of opinion about what a trap
182  * means, deal with it here.
183  *
184  * MPSAFE
185  */
186 static int
187 translate_traps(int signal, int trap_code)
188 {
189 	if (signal != SIGBUS)
190 		return signal;
191 	switch (trap_code) {
192 	case T_PROTFLT:
193 	case T_TSSFLT:
194 	case T_DOUBLEFLT:
195 	case T_PAGEFLT:
196 		return SIGSEGV;
197 	default:
198 		return signal;
199 	}
200 }
201 
202 static int
203 linux_fixup(register_t **stack_base, struct image_params *imgp)
204 {
205 	register_t *argv, *envp;
206 
207 	argv = *stack_base;
208 	envp = *stack_base + (imgp->argc + 1);
209 	(*stack_base)--;
210 	**stack_base = (intptr_t)(void *)envp;
211 	(*stack_base)--;
212 	**stack_base = (intptr_t)(void *)argv;
213 	(*stack_base)--;
214 	**stack_base = imgp->argc;
215 	return 0;
216 }
217 
218 static int
219 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
220 {
221 	Elf32_Auxargs *args = (Elf32_Auxargs *)imgp->auxargs;
222 	register_t *pos;
223 
224 	pos = *stack_base + (imgp->argc + imgp->envc + 2);
225 
226 	if (args->trace) {
227 		AUXARGS_ENTRY(pos, AT_DEBUG, 1);
228 	}
229 	if (args->execfd != -1) {
230 		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
231 	}
232 	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
233 	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
234 	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
235 	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
236 	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
237 	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
238 	AUXARGS_ENTRY(pos, AT_BASE, args->base);
239 	PROC_LOCK(imgp->proc);
240 	AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
241 	AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
242 	AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
243 	AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
244 	PROC_UNLOCK(imgp->proc);
245 	AUXARGS_ENTRY(pos, AT_NULL, 0);
246 
247 	free(imgp->auxargs, M_TEMP);
248 	imgp->auxargs = NULL;
249 
250 	(*stack_base)--;
251 	**stack_base = (long)imgp->argc;
252 	return 0;
253 }
254 
255 extern int _ucodesel, _udatasel;
256 extern unsigned long linux_sznonrtsigcode;
257 
258 static void
259 linux_rt_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
260 {
261 	register struct thread *td = curthread;
262 	register struct proc *p = td->td_proc;
263 	register struct trapframe *regs;
264 	struct l_rt_sigframe *fp, frame;
265 	int oonstack;
266 
267 	PROC_LOCK_ASSERT(p, MA_OWNED);
268 	regs = td->td_frame;
269 	oonstack = sigonstack(regs->tf_esp);
270 
271 #ifdef DEBUG
272 	if (ldebug(rt_sendsig))
273 		printf(ARGS(rt_sendsig, "%p, %d, %p, %lu"),
274 		    catcher, sig, (void*)mask, code);
275 #endif
276 	/*
277 	 * Allocate space for the signal handler context.
278 	 */
279 	if ((p->p_flag & P_ALTSTACK) && !oonstack &&
280 	    SIGISMEMBER(p->p_sigacts->ps_sigonstack, sig)) {
281 		fp = (struct l_rt_sigframe *)(p->p_sigstk.ss_sp +
282 		    p->p_sigstk.ss_size - sizeof(struct l_rt_sigframe));
283 	} else
284 		fp = (struct l_rt_sigframe *)regs->tf_esp - 1;
285 	PROC_UNLOCK(p);
286 
287 	/*
288 	 * Build the argument list for the signal handler.
289 	 */
290 	if (p->p_sysent->sv_sigtbl)
291 		if (sig <= p->p_sysent->sv_sigsize)
292 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
293 
294 	frame.sf_handler = catcher;
295 	frame.sf_sig = sig;
296 	frame.sf_siginfo = &fp->sf_si;
297 	frame.sf_ucontext = &fp->sf_sc;
298 
299 	/* Fill siginfo structure. */
300 	frame.sf_si.lsi_signo = sig;
301 	frame.sf_si.lsi_code = code;
302 	frame.sf_si.lsi_addr = (void *)regs->tf_err;
303 
304 	/*
305 	 * Build the signal context to be used by sigreturn.
306 	 */
307 	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
308 	frame.sf_sc.uc_link = NULL;		/* XXX ??? */
309 
310 	PROC_LOCK(p);
311 	frame.sf_sc.uc_stack.ss_sp = p->p_sigstk.ss_sp;
312 	frame.sf_sc.uc_stack.ss_size = p->p_sigstk.ss_size;
313 	frame.sf_sc.uc_stack.ss_flags = (p->p_flag & P_ALTSTACK)
314 	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
315 	PROC_UNLOCK(p);
316 
317 	bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
318 
319 	frame.sf_sc.uc_mcontext.sc_mask   = frame.sf_sc.uc_sigmask.__bits[0];
320 	frame.sf_sc.uc_mcontext.sc_gs     = rgs();
321 	frame.sf_sc.uc_mcontext.sc_fs     = regs->tf_fs;
322 	frame.sf_sc.uc_mcontext.sc_es     = regs->tf_es;
323 	frame.sf_sc.uc_mcontext.sc_ds     = regs->tf_ds;
324 	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_edi;
325 	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_esi;
326 	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_ebp;
327 	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_ebx;
328 	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_edx;
329 	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_ecx;
330 	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_eax;
331 	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_eip;
332 	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
333 	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
334 	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
335 	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
336 	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
337 	frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
338 
339 #ifdef DEBUG
340 	if (ldebug(rt_sendsig))
341 		printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
342 		    frame.sf_sc.uc_stack.ss_flags, p->p_sigstk.ss_sp,
343 		    p->p_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
344 #endif
345 
346 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
347 		/*
348 		 * Process has trashed its stack; give it an illegal
349 		 * instruction to halt it in its tracks.
350 		 */
351 #ifdef DEBUG
352 		if (ldebug(rt_sendsig))
353 			printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
354 			    fp, oonstack);
355 #endif
356 		PROC_LOCK(p);
357 		sigexit(td, SIGILL);
358 	}
359 
360 	/*
361 	 * Build context to run handler in.
362 	 */
363 	regs->tf_esp = (int)fp;
364 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
365 	    linux_sznonrtsigcode;
366 	regs->tf_eflags &= ~(PSL_T | PSL_VM);
367 	regs->tf_cs = _ucodesel;
368 	regs->tf_ds = _udatasel;
369 	regs->tf_es = _udatasel;
370 	regs->tf_fs = _udatasel;
371 	regs->tf_ss = _udatasel;
372 	PROC_LOCK(p);
373 }
374 
375 
376 /*
377  * Send an interrupt to process.
378  *
379  * Stack is set up to allow sigcode stored
380  * in u. to call routine, followed by kcall
381  * to sigreturn routine below.  After sigreturn
382  * resets the signal mask, the stack, and the
383  * frame pointer, it returns to the user
384  * specified pc, psl.
385  */
386 
387 static void
388 linux_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
389 {
390 	register struct thread *td = curthread;
391 	register struct proc *p = td->td_proc;
392 	register struct trapframe *regs;
393 	struct l_sigframe *fp, frame;
394 	l_sigset_t lmask;
395 	int oonstack, i;
396 
397 	PROC_LOCK_ASSERT(p, MA_OWNED);
398 	if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) {
399 		/* Signal handler installed with SA_SIGINFO. */
400 		linux_rt_sendsig(catcher, sig, mask, code);
401 		return;
402 	}
403 
404 	regs = td->td_frame;
405 	oonstack = sigonstack(regs->tf_esp);
406 
407 #ifdef DEBUG
408 	if (ldebug(sendsig))
409 		printf(ARGS(sendsig, "%p, %d, %p, %lu"),
410 		    catcher, sig, (void*)mask, code);
411 #endif
412 
413 	/*
414 	 * Allocate space for the signal handler context.
415 	 */
416 	if ((p->p_flag & P_ALTSTACK) && !oonstack &&
417 	    SIGISMEMBER(p->p_sigacts->ps_sigonstack, sig)) {
418 		fp = (struct l_sigframe *)(p->p_sigstk.ss_sp +
419 		    p->p_sigstk.ss_size - sizeof(struct l_sigframe));
420 	} else
421 		fp = (struct l_sigframe *)regs->tf_esp - 1;
422 	PROC_UNLOCK(p);
423 
424 	/*
425 	 * Build the argument list for the signal handler.
426 	 */
427 	if (p->p_sysent->sv_sigtbl)
428 		if (sig <= p->p_sysent->sv_sigsize)
429 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
430 
431 	frame.sf_handler = catcher;
432 	frame.sf_sig = sig;
433 
434 	bsd_to_linux_sigset(mask, &lmask);
435 
436 	/*
437 	 * Build the signal context to be used by sigreturn.
438 	 */
439 	frame.sf_sc.sc_mask   = lmask.__bits[0];
440 	frame.sf_sc.sc_gs     = rgs();
441 	frame.sf_sc.sc_fs     = regs->tf_fs;
442 	frame.sf_sc.sc_es     = regs->tf_es;
443 	frame.sf_sc.sc_ds     = regs->tf_ds;
444 	frame.sf_sc.sc_edi    = regs->tf_edi;
445 	frame.sf_sc.sc_esi    = regs->tf_esi;
446 	frame.sf_sc.sc_ebp    = regs->tf_ebp;
447 	frame.sf_sc.sc_ebx    = regs->tf_ebx;
448 	frame.sf_sc.sc_edx    = regs->tf_edx;
449 	frame.sf_sc.sc_ecx    = regs->tf_ecx;
450 	frame.sf_sc.sc_eax    = regs->tf_eax;
451 	frame.sf_sc.sc_eip    = regs->tf_eip;
452 	frame.sf_sc.sc_cs     = regs->tf_cs;
453 	frame.sf_sc.sc_eflags = regs->tf_eflags;
454 	frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
455 	frame.sf_sc.sc_ss     = regs->tf_ss;
456 	frame.sf_sc.sc_err    = regs->tf_err;
457 	frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code);
458 
459 	bzero(&frame.sf_fpstate, sizeof(struct l_fpstate));
460 
461 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
462 		frame.sf_extramask[i] = lmask.__bits[i+1];
463 
464 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
465 		/*
466 		 * Process has trashed its stack; give it an illegal
467 		 * instruction to halt it in its tracks.
468 		 */
469 		PROC_LOCK(p);
470 		sigexit(td, SIGILL);
471 	}
472 
473 	/*
474 	 * Build context to run handler in.
475 	 */
476 	regs->tf_esp = (int)fp;
477 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
478 	regs->tf_eflags &= ~(PSL_T | PSL_VM);
479 	regs->tf_cs = _ucodesel;
480 	regs->tf_ds = _udatasel;
481 	regs->tf_es = _udatasel;
482 	regs->tf_fs = _udatasel;
483 	regs->tf_ss = _udatasel;
484 	PROC_LOCK(p);
485 }
486 
487 /*
488  * System call to cleanup state after a signal
489  * has been taken.  Reset signal mask and
490  * stack state from context left by sendsig (above).
491  * Return to previous pc and psl as specified by
492  * context left by sendsig. Check carefully to
493  * make sure that the user has not modified the
494  * psl to gain improper privileges or to cause
495  * a machine fault.
496  */
497 int
498 linux_sigreturn(td, args)
499 	struct thread *td;
500 	struct linux_sigreturn_args *args;
501 {
502 	struct proc *p = td->td_proc;
503 	struct l_sigframe frame;
504 	register struct trapframe *regs;
505 	l_sigset_t lmask;
506 	int eflags, i;
507 
508 	regs = td->td_frame;
509 
510 #ifdef DEBUG
511 	if (ldebug(sigreturn))
512 		printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
513 #endif
514 	/*
515 	 * The trampoline code hands us the sigframe.
516 	 * It is unsafe to keep track of it ourselves, in the event that a
517 	 * program jumps out of a signal handler.
518 	 */
519 	if (copyin((caddr_t)args->sfp, &frame, sizeof(frame)) != 0)
520 		return (EFAULT);
521 
522 	/*
523 	 * Check for security violations.
524 	 */
525 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
526 	eflags = frame.sf_sc.sc_eflags;
527 	/*
528 	 * XXX do allow users to change the privileged flag PSL_RF.  The
529 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
530 	 * sometimes set it there too.  tf_eflags is kept in the signal
531 	 * context during signal handling and there is no other place
532 	 * to remember it, so the PSL_RF bit may be corrupted by the
533 	 * signal handler without us knowing.  Corruption of the PSL_RF
534 	 * bit at worst causes one more or one less debugger trap, so
535 	 * allowing it is fairly harmless.
536 	 */
537 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
538     		return(EINVAL);
539 	}
540 
541 	/*
542 	 * Don't allow users to load a valid privileged %cs.  Let the
543 	 * hardware check for invalid selectors, excess privilege in
544 	 * other selectors, invalid %eip's and invalid %esp's.
545 	 */
546 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
547 	if (!CS_SECURE(frame.sf_sc.sc_cs)) {
548 		trapsignal(p, SIGBUS, T_PROTFLT);
549 		return(EINVAL);
550 	}
551 
552 	lmask.__bits[0] = frame.sf_sc.sc_mask;
553 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
554 		lmask.__bits[i+1] = frame.sf_extramask[i];
555 	PROC_LOCK(p);
556 	linux_to_bsd_sigset(&lmask, &p->p_sigmask);
557 	SIG_CANTMASK(p->p_sigmask);
558 	signotify(p);
559 	PROC_UNLOCK(p);
560 
561 	/*
562 	 * Restore signal context.
563 	 */
564 	/* %gs was restored by the trampoline. */
565 	regs->tf_fs     = frame.sf_sc.sc_fs;
566 	regs->tf_es     = frame.sf_sc.sc_es;
567 	regs->tf_ds     = frame.sf_sc.sc_ds;
568 	regs->tf_edi    = frame.sf_sc.sc_edi;
569 	regs->tf_esi    = frame.sf_sc.sc_esi;
570 	regs->tf_ebp    = frame.sf_sc.sc_ebp;
571 	regs->tf_ebx    = frame.sf_sc.sc_ebx;
572 	regs->tf_edx    = frame.sf_sc.sc_edx;
573 	regs->tf_ecx    = frame.sf_sc.sc_ecx;
574 	regs->tf_eax    = frame.sf_sc.sc_eax;
575 	regs->tf_eip    = frame.sf_sc.sc_eip;
576 	regs->tf_cs     = frame.sf_sc.sc_cs;
577 	regs->tf_eflags = eflags;
578 	regs->tf_esp    = frame.sf_sc.sc_esp_at_signal;
579 	regs->tf_ss     = frame.sf_sc.sc_ss;
580 
581 	return (EJUSTRETURN);
582 }
583 
584 /*
585  * System call to cleanup state after a signal
586  * has been taken.  Reset signal mask and
587  * stack state from context left by rt_sendsig (above).
588  * Return to previous pc and psl as specified by
589  * context left by sendsig. Check carefully to
590  * make sure that the user has not modified the
591  * psl to gain improper privileges or to cause
592  * a machine fault.
593  */
594 int
595 linux_rt_sigreturn(td, args)
596 	struct thread *td;
597 	struct linux_rt_sigreturn_args *args;
598 {
599 	struct proc *p = td->td_proc;
600 	struct sigaltstack_args sasargs;
601 	struct l_ucontext uc;
602 	struct l_sigcontext *context;
603 	l_stack_t *lss;
604 	stack_t *ss;
605 	register struct trapframe *regs;
606 	int eflags;
607 	caddr_t sg = stackgap_init();
608 
609 	regs = td->td_frame;
610 
611 #ifdef DEBUG
612 	if (ldebug(rt_sigreturn))
613 		printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
614 #endif
615 	/*
616 	 * The trampoline code hands us the ucontext.
617 	 * It is unsafe to keep track of it ourselves, in the event that a
618 	 * program jumps out of a signal handler.
619 	 */
620 	if (copyin((caddr_t)args->ucp, &uc, sizeof(uc)) != 0)
621 		return (EFAULT);
622 
623 	context = &uc.uc_mcontext;
624 
625 	/*
626 	 * Check for security violations.
627 	 */
628 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
629 	eflags = context->sc_eflags;
630 	/*
631 	 * XXX do allow users to change the privileged flag PSL_RF.  The
632 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
633 	 * sometimes set it there too.  tf_eflags is kept in the signal
634 	 * context during signal handling and there is no other place
635 	 * to remember it, so the PSL_RF bit may be corrupted by the
636 	 * signal handler without us knowing.  Corruption of the PSL_RF
637 	 * bit at worst causes one more or one less debugger trap, so
638 	 * allowing it is fairly harmless.
639 	 */
640 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
641     		return(EINVAL);
642 	}
643 
644 	/*
645 	 * Don't allow users to load a valid privileged %cs.  Let the
646 	 * hardware check for invalid selectors, excess privilege in
647 	 * other selectors, invalid %eip's and invalid %esp's.
648 	 */
649 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
650 	if (!CS_SECURE(context->sc_cs)) {
651 		trapsignal(p, SIGBUS, T_PROTFLT);
652 		return(EINVAL);
653 	}
654 
655 	PROC_LOCK(p);
656 	linux_to_bsd_sigset(&uc.uc_sigmask, &p->p_sigmask);
657 	SIG_CANTMASK(p->p_sigmask);
658 	signotify(p);
659 	PROC_UNLOCK(p);
660 
661 	/*
662 	 * Restore signal context
663 	 */
664 	/* %gs was restored by the trampoline. */
665 	regs->tf_fs     = context->sc_fs;
666 	regs->tf_es     = context->sc_es;
667 	regs->tf_ds     = context->sc_ds;
668 	regs->tf_edi    = context->sc_edi;
669 	regs->tf_esi    = context->sc_esi;
670 	regs->tf_ebp    = context->sc_ebp;
671 	regs->tf_ebx    = context->sc_ebx;
672 	regs->tf_edx    = context->sc_edx;
673 	regs->tf_ecx    = context->sc_ecx;
674 	regs->tf_eax    = context->sc_eax;
675 	regs->tf_eip    = context->sc_eip;
676 	regs->tf_cs     = context->sc_cs;
677 	regs->tf_eflags = eflags;
678 	regs->tf_esp    = context->sc_esp_at_signal;
679 	regs->tf_ss     = context->sc_ss;
680 
681 	/*
682 	 * call sigaltstack & ignore results..
683 	 */
684 	ss = stackgap_alloc(&sg, sizeof(stack_t));
685 	lss = &uc.uc_stack;
686 	ss->ss_sp = lss->ss_sp;
687 	ss->ss_size = lss->ss_size;
688 	ss->ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
689 
690 #ifdef DEBUG
691 	if (ldebug(rt_sigreturn))
692 		printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
693 		    ss->ss_flags, ss->ss_sp, ss->ss_size, context->sc_mask);
694 #endif
695 	sasargs.ss = ss;
696 	sasargs.oss = NULL;
697 	(void) sigaltstack(td, &sasargs);
698 
699 	return (EJUSTRETURN);
700 }
701 
702 /*
703  * MPSAFE
704  */
705 static void
706 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
707 {
708 	args[0] = tf->tf_ebx;
709 	args[1] = tf->tf_ecx;
710 	args[2] = tf->tf_edx;
711 	args[3] = tf->tf_esi;
712 	args[4] = tf->tf_edi;
713 	*params = NULL;		/* no copyin */
714 }
715 
716 /*
717  * If a linux binary is exec'ing something, try this image activator
718  * first.  We override standard shell script execution in order to
719  * be able to modify the interpreter path.  We only do this if a linux
720  * binary is doing the exec, so we do not create an EXEC module for it.
721  */
722 static int	exec_linux_imgact_try(struct image_params *iparams);
723 
724 static int
725 exec_linux_imgact_try(imgp)
726     struct image_params *imgp;
727 {
728     const char *head = (const char *)imgp->image_header;
729     int error = -1;
730 
731     /*
732      * The interpreter for shell scripts run from a linux binary needs
733      * to be located in /compat/linux if possible in order to recursively
734      * maintain linux path emulation.
735      */
736     if (((const short *)head)[0] == SHELLMAGIC) {
737 	    /*
738 	     * Run our normal shell image activator.  If it succeeds attempt
739 	     * to use the alternate path for the interpreter.  If an alternate
740 	     * path is found, use our stringspace to store it.
741 	     */
742 	    if ((error = exec_shell_imgact(imgp)) == 0) {
743 		    char *rpath = NULL;
744 
745 		    linux_emul_find(FIRST_THREAD_IN_PROC(imgp->proc), NULL,
746 			imgp->interpreter_name, &rpath, 0);
747 		    if (rpath != imgp->interpreter_name) {
748 			    int len = strlen(rpath) + 1;
749 
750 			    if (len <= MAXSHELLCMDLEN) {
751 				    memcpy(imgp->interpreter_name, rpath, len);
752 			    }
753 			    free(rpath, M_TEMP);
754 		    }
755 	    }
756     }
757     return(error);
758 }
759 
760 struct sysentvec linux_sysvec = {
761 	LINUX_SYS_MAXSYSCALL,
762 	linux_sysent,
763 	0xff,
764 	LINUX_SIGTBLSZ,
765 	bsd_to_linux_signal,
766 	ELAST + 1,
767 	bsd_to_linux_errno,
768 	translate_traps,
769 	linux_fixup,
770 	linux_sendsig,
771 	linux_sigcode,
772 	&linux_szsigcode,
773 	linux_prepsyscall,
774 	"Linux a.out",
775 	aout_coredump,
776 	exec_linux_imgact_try,
777 	LINUX_MINSIGSTKSZ
778 };
779 
780 struct sysentvec elf_linux_sysvec = {
781 	LINUX_SYS_MAXSYSCALL,
782 	linux_sysent,
783 	0xff,
784 	LINUX_SIGTBLSZ,
785 	bsd_to_linux_signal,
786 	ELAST + 1,
787 	bsd_to_linux_errno,
788 	translate_traps,
789 	elf_linux_fixup,
790 	linux_sendsig,
791 	linux_sigcode,
792 	&linux_szsigcode,
793 	linux_prepsyscall,
794 	"Linux ELF",
795 	elf_coredump,
796 	exec_linux_imgact_try,
797 	LINUX_MINSIGSTKSZ
798 };
799 
800 static Elf32_Brandinfo linux_brand = {
801 					ELFOSABI_LINUX,
802 					"Linux",
803 					"/compat/linux",
804 					"/lib/ld-linux.so.1",
805 					&elf_linux_sysvec
806 				 };
807 
808 static Elf32_Brandinfo linux_glibc2brand = {
809 					ELFOSABI_LINUX,
810 					"Linux",
811 					"/compat/linux",
812 					"/lib/ld-linux.so.2",
813 					&elf_linux_sysvec
814 				 };
815 
816 Elf32_Brandinfo *linux_brandlist[] = {
817 					&linux_brand,
818 					&linux_glibc2brand,
819 					NULL
820 				};
821 
822 static int
823 linux_elf_modevent(module_t mod, int type, void *data)
824 {
825 	Elf32_Brandinfo **brandinfo;
826 	int error;
827 	struct linux_ioctl_handler **lihp;
828 
829 	error = 0;
830 
831 	switch(type) {
832 	case MOD_LOAD:
833 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
834 		     ++brandinfo)
835 			if (elf_insert_brand_entry(*brandinfo) < 0)
836 				error = EINVAL;
837 		if (error == 0) {
838 			SET_FOREACH(lihp, linux_ioctl_handler_set)
839 				linux_ioctl_register_handler(*lihp);
840 			if (bootverbose)
841 				printf("Linux ELF exec handler installed\n");
842 		} else
843 			printf("cannot insert Linux ELF brand handler\n");
844 		break;
845 	case MOD_UNLOAD:
846 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
847 		     ++brandinfo)
848 			if (elf_brand_inuse(*brandinfo))
849 				error = EBUSY;
850 		if (error == 0) {
851 			for (brandinfo = &linux_brandlist[0];
852 			     *brandinfo != NULL; ++brandinfo)
853 				if (elf_remove_brand_entry(*brandinfo) < 0)
854 					error = EINVAL;
855 		}
856 		if (error == 0) {
857 			SET_FOREACH(lihp, linux_ioctl_handler_set)
858 				linux_ioctl_unregister_handler(*lihp);
859 			if (bootverbose)
860 				printf("Linux ELF exec handler removed\n");
861 		} else
862 			printf("Could not deinstall ELF interpreter entry\n");
863 		break;
864 	default:
865 		break;
866 	}
867 	return error;
868 }
869 
870 static moduledata_t linux_elf_mod = {
871 	"linuxelf",
872 	linux_elf_modevent,
873 	0
874 };
875 
876 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
877