xref: /freebsd/sys/i386/linux/linux_sysvec.c (revision c17d43407fe04133a94055b0dbc7ea8965654a9f)
1 /*-
2  * Copyright (c) 1994-1996 S�ren Schmidt
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software withough specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30 
31 /* XXX we use functions that might not exist. */
32 #include "opt_compat.h"
33 
34 #ifndef COMPAT_43
35 #error "Unable to compile Linux-emulator due to missing COMPAT_43 option!"
36 #endif
37 
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/imgact.h>
41 #include <sys/imgact_aout.h>
42 #include <sys/imgact_elf.h>
43 #include <sys/lock.h>
44 #include <sys/malloc.h>
45 #include <sys/mutex.h>
46 #include <sys/proc.h>
47 #include <sys/signalvar.h>
48 #include <sys/sysent.h>
49 #include <sys/sysproto.h>
50 
51 #include <vm/vm.h>
52 #include <vm/vm_param.h>
53 #include <vm/vm_page.h>
54 #include <vm/vm_extern.h>
55 #include <sys/exec.h>
56 #include <sys/kernel.h>
57 #include <sys/module.h>
58 #include <machine/cpu.h>
59 #include <sys/mutex.h>
60 
61 #include <i386/linux/linux.h>
62 #include <i386/linux/linux_proto.h>
63 #include <compat/linux/linux_signal.h>
64 #include <compat/linux/linux_util.h>
65 
66 MODULE_VERSION(linux, 1);
67 MODULE_DEPEND(linux, sysvmsg, 1, 1, 1);
68 MODULE_DEPEND(linux, sysvsem, 1, 1, 1);
69 MODULE_DEPEND(linux, sysvshm, 1, 1, 1);
70 
71 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
72 
73 #if BYTE_ORDER == LITTLE_ENDIAN
74 #define SHELLMAGIC      0x2123 /* #! */
75 #else
76 #define SHELLMAGIC      0x2321
77 #endif
78 
79 /*
80  * Allow the sendsig functions to use the ldebug() facility
81  * even though they are not syscalls themselves. Map them
82  * to syscall 0. This is slightly less bogus than using
83  * ldebug(sigreturn).
84  */
85 #define	LINUX_SYS_linux_rt_sendsig	0
86 #define	LINUX_SYS_linux_sendsig		0
87 
88 extern char linux_sigcode[];
89 extern int linux_szsigcode;
90 
91 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
92 
93 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
94 
95 static int	linux_fixup(register_t **stack_base,
96 				 struct image_params *iparams);
97 static int	elf_linux_fixup(register_t **stack_base,
98 				     struct image_params *iparams);
99 static void	linux_prepsyscall(struct trapframe *tf, int *args,
100 				       u_int *code, caddr_t *params);
101 static void     linux_sendsig(sig_t catcher, int sig, sigset_t *mask,
102 				   u_long code);
103 
104 /*
105  * Linux syscalls return negative errno's, we do positive and map them
106  */
107 static int bsd_to_linux_errno[ELAST + 1] = {
108   	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
109  	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
110  	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
111  	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
112  	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
113 	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
114 	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
115 	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
116   	-6, -6, -43, -42, -75, -6, -84
117 };
118 
119 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
120 	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
121 	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
122 	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, 0,
123 	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
124 	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
125 	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
126 	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
127 	0, LINUX_SIGUSR1, LINUX_SIGUSR2
128 };
129 
130 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
131 	SIGHUP, SIGINT, SIGQUIT, SIGILL,
132 	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
133 	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
134 	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
135 	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
136 	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
137 	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
138 	SIGIO, SIGURG, 0
139 };
140 
141 #define LINUX_T_UNKNOWN  255
142 static int _bsd_to_linux_trapcode[] = {
143 	LINUX_T_UNKNOWN,	/* 0 */
144 	6,			/* 1  T_PRIVINFLT */
145 	LINUX_T_UNKNOWN,	/* 2 */
146 	3,			/* 3  T_BPTFLT */
147 	LINUX_T_UNKNOWN,	/* 4 */
148 	LINUX_T_UNKNOWN,	/* 5 */
149 	16,			/* 6  T_ARITHTRAP */
150 	254,			/* 7  T_ASTFLT */
151 	LINUX_T_UNKNOWN,	/* 8 */
152 	13,			/* 9  T_PROTFLT */
153 	1,			/* 10 T_TRCTRAP */
154 	LINUX_T_UNKNOWN,	/* 11 */
155 	14,			/* 12 T_PAGEFLT */
156 	LINUX_T_UNKNOWN,	/* 13 */
157 	17,			/* 14 T_ALIGNFLT */
158 	LINUX_T_UNKNOWN,	/* 15 */
159 	LINUX_T_UNKNOWN,	/* 16 */
160 	LINUX_T_UNKNOWN,	/* 17 */
161 	0,			/* 18 T_DIVIDE */
162 	2,			/* 19 T_NMI */
163 	4,			/* 20 T_OFLOW */
164 	5,			/* 21 T_BOUND */
165 	7,			/* 22 T_DNA */
166 	8,			/* 23 T_DOUBLEFLT */
167 	9,			/* 24 T_FPOPFLT */
168 	10,			/* 25 T_TSSFLT */
169 	11,			/* 26 T_SEGNPFLT */
170 	12,			/* 27 T_STKFLT */
171 	18,			/* 28 T_MCHK */
172 	19,			/* 29 T_XMMFLT */
173 	15			/* 30 T_RESERVED */
174 };
175 #define bsd_to_linux_trapcode(code) \
176     ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \
177      _bsd_to_linux_trapcode[(code)]: \
178      LINUX_T_UNKNOWN)
179 
180 /*
181  * If FreeBSD & Linux have a difference of opinion about what a trap
182  * means, deal with it here.
183  *
184  * MPSAFE
185  */
186 static int
187 translate_traps(int signal, int trap_code)
188 {
189 	if (signal != SIGBUS)
190 		return signal;
191 	switch (trap_code) {
192 	case T_PROTFLT:
193 	case T_TSSFLT:
194 	case T_DOUBLEFLT:
195 	case T_PAGEFLT:
196 		return SIGSEGV;
197 	default:
198 		return signal;
199 	}
200 }
201 
202 static int
203 linux_fixup(register_t **stack_base, struct image_params *imgp)
204 {
205 	register_t *argv, *envp;
206 
207 	argv = *stack_base;
208 	envp = *stack_base + (imgp->argc + 1);
209 	(*stack_base)--;
210 	**stack_base = (intptr_t)(void *)envp;
211 	(*stack_base)--;
212 	**stack_base = (intptr_t)(void *)argv;
213 	(*stack_base)--;
214 	**stack_base = imgp->argc;
215 	return 0;
216 }
217 
218 static int
219 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
220 {
221 	Elf32_Auxargs *args = (Elf32_Auxargs *)imgp->auxargs;
222 	register_t *pos;
223 
224 	pos = *stack_base + (imgp->argc + imgp->envc + 2);
225 
226 	if (args->trace) {
227 		AUXARGS_ENTRY(pos, AT_DEBUG, 1);
228 	}
229 	if (args->execfd != -1) {
230 		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
231 	}
232 	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
233 	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
234 	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
235 	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
236 	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
237 	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
238 	AUXARGS_ENTRY(pos, AT_BASE, args->base);
239 	PROC_LOCK(imgp->proc);
240 	AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid);
241 	AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid);
242 	AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid);
243 	AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid);
244 	PROC_UNLOCK(imgp->proc);
245 	AUXARGS_ENTRY(pos, AT_NULL, 0);
246 
247 	free(imgp->auxargs, M_TEMP);
248 	imgp->auxargs = NULL;
249 
250 	(*stack_base)--;
251 	**stack_base = (long)imgp->argc;
252 	return 0;
253 }
254 
255 extern int _ucodesel, _udatasel;
256 extern unsigned long linux_sznonrtsigcode;
257 
258 static void
259 linux_rt_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
260 {
261 	register struct thread *td = curthread;
262 	register struct proc *p = td->td_proc;
263 	register struct trapframe *regs;
264 	struct l_rt_sigframe *fp, frame;
265 	int oonstack;
266 
267 	PROC_LOCK_ASSERT(p, MA_OWNED);
268 	regs = td->td_frame;
269 	oonstack = sigonstack(regs->tf_esp);
270 
271 #ifdef DEBUG
272 	if (ldebug(rt_sendsig))
273 		printf(ARGS(rt_sendsig, "%p, %d, %p, %lu"),
274 		    catcher, sig, (void*)mask, code);
275 #endif
276 	/*
277 	 * Allocate space for the signal handler context.
278 	 */
279 	if ((p->p_flag & P_ALTSTACK) && !oonstack &&
280 	    SIGISMEMBER(p->p_sigacts->ps_sigonstack, sig)) {
281 		fp = (struct l_rt_sigframe *)(p->p_sigstk.ss_sp +
282 		    p->p_sigstk.ss_size - sizeof(struct l_rt_sigframe));
283 	} else
284 		fp = (struct l_rt_sigframe *)regs->tf_esp - 1;
285 	PROC_UNLOCK(p);
286 
287 	/*
288 	 * Build the argument list for the signal handler.
289 	 */
290 	if (p->p_sysent->sv_sigtbl)
291 		if (sig <= p->p_sysent->sv_sigsize)
292 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
293 
294 	frame.sf_handler = catcher;
295 	frame.sf_sig = sig;
296 	frame.sf_siginfo = &fp->sf_si;
297 	frame.sf_ucontext = &fp->sf_sc;
298 
299 	/* Fill siginfo structure. */
300 	frame.sf_si.lsi_signo = sig;
301 	frame.sf_si.lsi_code = code;
302 	frame.sf_si.lsi_addr = (void *)regs->tf_err;
303 
304 	/*
305 	 * Build the signal context to be used by sigreturn.
306 	 */
307 	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
308 	frame.sf_sc.uc_link = NULL;		/* XXX ??? */
309 
310 	PROC_LOCK(p);
311 	frame.sf_sc.uc_stack.ss_sp = p->p_sigstk.ss_sp;
312 	frame.sf_sc.uc_stack.ss_size = p->p_sigstk.ss_size;
313 	frame.sf_sc.uc_stack.ss_flags = (p->p_flag & P_ALTSTACK)
314 	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
315 	PROC_UNLOCK(p);
316 
317 	bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
318 
319 	frame.sf_sc.uc_mcontext.sc_mask   = frame.sf_sc.uc_sigmask.__bits[0];
320 	frame.sf_sc.uc_mcontext.sc_gs     = rgs();
321 	frame.sf_sc.uc_mcontext.sc_fs     = regs->tf_fs;
322 	frame.sf_sc.uc_mcontext.sc_es     = regs->tf_es;
323 	frame.sf_sc.uc_mcontext.sc_ds     = regs->tf_ds;
324 	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_edi;
325 	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_esi;
326 	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_ebp;
327 	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_ebx;
328 	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_edx;
329 	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_ecx;
330 	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_eax;
331 	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_eip;
332 	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
333 	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
334 	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
335 	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
336 	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
337 	frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
338 
339 #ifdef DEBUG
340 	if (ldebug(rt_sendsig))
341 		printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
342 		    frame.sf_sc.uc_stack.ss_flags, p->p_sigstk.ss_sp,
343 		    p->p_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
344 #endif
345 
346 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
347 		/*
348 		 * Process has trashed its stack; give it an illegal
349 		 * instruction to halt it in its tracks.
350 		 */
351 #ifdef DEBUG
352 		if (ldebug(rt_sendsig))
353 			printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
354 			    fp, oonstack);
355 #endif
356 		PROC_LOCK(p);
357 		sigexit(td, SIGILL);
358 	}
359 
360 	/*
361 	 * Build context to run handler in.
362 	 */
363 	regs->tf_esp = (int)fp;
364 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
365 	    linux_sznonrtsigcode;
366 	regs->tf_eflags &= ~(PSL_T | PSL_VM);
367 	regs->tf_cs = _ucodesel;
368 	regs->tf_ds = _udatasel;
369 	regs->tf_es = _udatasel;
370 	regs->tf_fs = _udatasel;
371 	regs->tf_ss = _udatasel;
372 	PROC_LOCK(p);
373 }
374 
375 
376 /*
377  * Send an interrupt to process.
378  *
379  * Stack is set up to allow sigcode stored
380  * in u. to call routine, followed by kcall
381  * to sigreturn routine below.  After sigreturn
382  * resets the signal mask, the stack, and the
383  * frame pointer, it returns to the user
384  * specified pc, psl.
385  */
386 
387 static void
388 linux_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
389 {
390 	register struct thread *td = curthread;
391 	register struct proc *p = td->td_proc;
392 	register struct trapframe *regs;
393 	struct l_sigframe *fp, frame;
394 	l_sigset_t lmask;
395 	int oonstack, i;
396 
397 	PROC_LOCK_ASSERT(p, MA_OWNED);
398 	if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) {
399 		/* Signal handler installed with SA_SIGINFO. */
400 		linux_rt_sendsig(catcher, sig, mask, code);
401 		return;
402 	}
403 
404 	regs = td->td_frame;
405 	oonstack = sigonstack(regs->tf_esp);
406 
407 #ifdef DEBUG
408 	if (ldebug(sendsig))
409 		printf(ARGS(sendsig, "%p, %d, %p, %lu"),
410 		    catcher, sig, (void*)mask, code);
411 #endif
412 
413 	/*
414 	 * Allocate space for the signal handler context.
415 	 */
416 	if ((p->p_flag & P_ALTSTACK) && !oonstack &&
417 	    SIGISMEMBER(p->p_sigacts->ps_sigonstack, sig)) {
418 		fp = (struct l_sigframe *)(p->p_sigstk.ss_sp +
419 		    p->p_sigstk.ss_size - sizeof(struct l_sigframe));
420 	} else
421 		fp = (struct l_sigframe *)regs->tf_esp - 1;
422 	PROC_UNLOCK(p);
423 
424 	/*
425 	 * Build the argument list for the signal handler.
426 	 */
427 	if (p->p_sysent->sv_sigtbl)
428 		if (sig <= p->p_sysent->sv_sigsize)
429 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
430 
431 	frame.sf_handler = catcher;
432 	frame.sf_sig = sig;
433 
434 	bsd_to_linux_sigset(mask, &lmask);
435 
436 	/*
437 	 * Build the signal context to be used by sigreturn.
438 	 */
439 	frame.sf_sc.sc_mask   = lmask.__bits[0];
440 	frame.sf_sc.sc_gs     = rgs();
441 	frame.sf_sc.sc_fs     = regs->tf_fs;
442 	frame.sf_sc.sc_es     = regs->tf_es;
443 	frame.sf_sc.sc_ds     = regs->tf_ds;
444 	frame.sf_sc.sc_edi    = regs->tf_edi;
445 	frame.sf_sc.sc_esi    = regs->tf_esi;
446 	frame.sf_sc.sc_ebp    = regs->tf_ebp;
447 	frame.sf_sc.sc_ebx    = regs->tf_ebx;
448 	frame.sf_sc.sc_edx    = regs->tf_edx;
449 	frame.sf_sc.sc_ecx    = regs->tf_ecx;
450 	frame.sf_sc.sc_eax    = regs->tf_eax;
451 	frame.sf_sc.sc_eip    = regs->tf_eip;
452 	frame.sf_sc.sc_cs     = regs->tf_cs;
453 	frame.sf_sc.sc_eflags = regs->tf_eflags;
454 	frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
455 	frame.sf_sc.sc_ss     = regs->tf_ss;
456 	frame.sf_sc.sc_err    = regs->tf_err;
457 	frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code);
458 
459 	bzero(&frame.sf_fpstate, sizeof(struct l_fpstate));
460 
461 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
462 		frame.sf_extramask[i] = lmask.__bits[i+1];
463 
464 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
465 		/*
466 		 * Process has trashed its stack; give it an illegal
467 		 * instruction to halt it in its tracks.
468 		 */
469 		PROC_LOCK(p);
470 		sigexit(td, SIGILL);
471 	}
472 
473 	/*
474 	 * Build context to run handler in.
475 	 */
476 	regs->tf_esp = (int)fp;
477 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
478 	regs->tf_eflags &= ~(PSL_T | PSL_VM);
479 	regs->tf_cs = _ucodesel;
480 	regs->tf_ds = _udatasel;
481 	regs->tf_es = _udatasel;
482 	regs->tf_fs = _udatasel;
483 	regs->tf_ss = _udatasel;
484 	PROC_LOCK(p);
485 }
486 
487 /*
488  * System call to cleanup state after a signal
489  * has been taken.  Reset signal mask and
490  * stack state from context left by sendsig (above).
491  * Return to previous pc and psl as specified by
492  * context left by sendsig. Check carefully to
493  * make sure that the user has not modified the
494  * psl to gain improper privileges or to cause
495  * a machine fault.
496  */
497 int
498 linux_sigreturn(td, args)
499 	struct thread *td;
500 	struct linux_sigreturn_args *args;
501 {
502 	struct proc *p = td->td_proc;
503 	struct l_sigframe frame;
504 	register struct trapframe *regs;
505 	l_sigset_t lmask;
506 	int eflags, i;
507 
508 	regs = td->td_frame;
509 
510 #ifdef DEBUG
511 	if (ldebug(sigreturn))
512 		printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
513 #endif
514 	/*
515 	 * The trampoline code hands us the sigframe.
516 	 * It is unsafe to keep track of it ourselves, in the event that a
517 	 * program jumps out of a signal handler.
518 	 */
519 	if (copyin((caddr_t)args->sfp, &frame, sizeof(frame)) != 0)
520 		return (EFAULT);
521 
522 	/*
523 	 * Check for security violations.
524 	 */
525 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
526 	eflags = frame.sf_sc.sc_eflags;
527 	/*
528 	 * XXX do allow users to change the privileged flag PSL_RF.  The
529 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
530 	 * sometimes set it there too.  tf_eflags is kept in the signal
531 	 * context during signal handling and there is no other place
532 	 * to remember it, so the PSL_RF bit may be corrupted by the
533 	 * signal handler without us knowing.  Corruption of the PSL_RF
534 	 * bit at worst causes one more or one less debugger trap, so
535 	 * allowing it is fairly harmless.
536 	 */
537 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
538     		return(EINVAL);
539 	}
540 
541 	/*
542 	 * Don't allow users to load a valid privileged %cs.  Let the
543 	 * hardware check for invalid selectors, excess privilege in
544 	 * other selectors, invalid %eip's and invalid %esp's.
545 	 */
546 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
547 	if (!CS_SECURE(frame.sf_sc.sc_cs)) {
548 		trapsignal(p, SIGBUS, T_PROTFLT);
549 		return(EINVAL);
550 	}
551 
552 	lmask.__bits[0] = frame.sf_sc.sc_mask;
553 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
554 		lmask.__bits[i+1] = frame.sf_extramask[i];
555 	PROC_LOCK(p);
556 	linux_to_bsd_sigset(&lmask, &p->p_sigmask);
557 	SIG_CANTMASK(p->p_sigmask);
558 	PROC_UNLOCK(p);
559 
560 	/*
561 	 * Restore signal context.
562 	 */
563 	/* %gs was restored by the trampoline. */
564 	regs->tf_fs     = frame.sf_sc.sc_fs;
565 	regs->tf_es     = frame.sf_sc.sc_es;
566 	regs->tf_ds     = frame.sf_sc.sc_ds;
567 	regs->tf_edi    = frame.sf_sc.sc_edi;
568 	regs->tf_esi    = frame.sf_sc.sc_esi;
569 	regs->tf_ebp    = frame.sf_sc.sc_ebp;
570 	regs->tf_ebx    = frame.sf_sc.sc_ebx;
571 	regs->tf_edx    = frame.sf_sc.sc_edx;
572 	regs->tf_ecx    = frame.sf_sc.sc_ecx;
573 	regs->tf_eax    = frame.sf_sc.sc_eax;
574 	regs->tf_eip    = frame.sf_sc.sc_eip;
575 	regs->tf_cs     = frame.sf_sc.sc_cs;
576 	regs->tf_eflags = eflags;
577 	regs->tf_esp    = frame.sf_sc.sc_esp_at_signal;
578 	regs->tf_ss     = frame.sf_sc.sc_ss;
579 
580 	return (EJUSTRETURN);
581 }
582 
583 /*
584  * System call to cleanup state after a signal
585  * has been taken.  Reset signal mask and
586  * stack state from context left by rt_sendsig (above).
587  * Return to previous pc and psl as specified by
588  * context left by sendsig. Check carefully to
589  * make sure that the user has not modified the
590  * psl to gain improper privileges or to cause
591  * a machine fault.
592  */
593 int
594 linux_rt_sigreturn(td, args)
595 	struct thread *td;
596 	struct linux_rt_sigreturn_args *args;
597 {
598 	struct proc *p = td->td_proc;
599 	struct sigaltstack_args sasargs;
600 	struct l_ucontext uc;
601 	struct l_sigcontext *context;
602 	l_stack_t *lss;
603 	stack_t *ss;
604 	register struct trapframe *regs;
605 	int eflags;
606 	caddr_t sg = stackgap_init();
607 
608 	regs = td->td_frame;
609 
610 #ifdef DEBUG
611 	if (ldebug(rt_sigreturn))
612 		printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
613 #endif
614 	/*
615 	 * The trampoline code hands us the ucontext.
616 	 * It is unsafe to keep track of it ourselves, in the event that a
617 	 * program jumps out of a signal handler.
618 	 */
619 	if (copyin((caddr_t)args->ucp, &uc, sizeof(uc)) != 0)
620 		return (EFAULT);
621 
622 	context = &uc.uc_mcontext;
623 
624 	/*
625 	 * Check for security violations.
626 	 */
627 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
628 	eflags = context->sc_eflags;
629 	/*
630 	 * XXX do allow users to change the privileged flag PSL_RF.  The
631 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
632 	 * sometimes set it there too.  tf_eflags is kept in the signal
633 	 * context during signal handling and there is no other place
634 	 * to remember it, so the PSL_RF bit may be corrupted by the
635 	 * signal handler without us knowing.  Corruption of the PSL_RF
636 	 * bit at worst causes one more or one less debugger trap, so
637 	 * allowing it is fairly harmless.
638 	 */
639 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
640     		return(EINVAL);
641 	}
642 
643 	/*
644 	 * Don't allow users to load a valid privileged %cs.  Let the
645 	 * hardware check for invalid selectors, excess privilege in
646 	 * other selectors, invalid %eip's and invalid %esp's.
647 	 */
648 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
649 	if (!CS_SECURE(context->sc_cs)) {
650 		trapsignal(p, SIGBUS, T_PROTFLT);
651 		return(EINVAL);
652 	}
653 
654 	PROC_LOCK(p);
655 	linux_to_bsd_sigset(&uc.uc_sigmask, &p->p_sigmask);
656 	SIG_CANTMASK(p->p_sigmask);
657 	PROC_UNLOCK(p);
658 
659 	/*
660 	 * Restore signal context
661 	 */
662 	/* %gs was restored by the trampoline. */
663 	regs->tf_fs     = context->sc_fs;
664 	regs->tf_es     = context->sc_es;
665 	regs->tf_ds     = context->sc_ds;
666 	regs->tf_edi    = context->sc_edi;
667 	regs->tf_esi    = context->sc_esi;
668 	regs->tf_ebp    = context->sc_ebp;
669 	regs->tf_ebx    = context->sc_ebx;
670 	regs->tf_edx    = context->sc_edx;
671 	regs->tf_ecx    = context->sc_ecx;
672 	regs->tf_eax    = context->sc_eax;
673 	regs->tf_eip    = context->sc_eip;
674 	regs->tf_cs     = context->sc_cs;
675 	regs->tf_eflags = eflags;
676 	regs->tf_esp    = context->sc_esp_at_signal;
677 	regs->tf_ss     = context->sc_ss;
678 
679 	/*
680 	 * call sigaltstack & ignore results..
681 	 */
682 	ss = stackgap_alloc(&sg, sizeof(stack_t));
683 	lss = &uc.uc_stack;
684 	ss->ss_sp = lss->ss_sp;
685 	ss->ss_size = lss->ss_size;
686 	ss->ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
687 
688 #ifdef DEBUG
689 	if (ldebug(rt_sigreturn))
690 		printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
691 		    ss->ss_flags, ss->ss_sp, ss->ss_size, context->sc_mask);
692 #endif
693 	sasargs.ss = ss;
694 	sasargs.oss = NULL;
695 	(void) sigaltstack(td, &sasargs);
696 
697 	return (EJUSTRETURN);
698 }
699 
700 /*
701  * MPSAFE
702  */
703 static void
704 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
705 {
706 	args[0] = tf->tf_ebx;
707 	args[1] = tf->tf_ecx;
708 	args[2] = tf->tf_edx;
709 	args[3] = tf->tf_esi;
710 	args[4] = tf->tf_edi;
711 	*params = NULL;		/* no copyin */
712 }
713 
714 /*
715  * If a linux binary is exec'ing something, try this image activator
716  * first.  We override standard shell script execution in order to
717  * be able to modify the interpreter path.  We only do this if a linux
718  * binary is doing the exec, so we do not create an EXEC module for it.
719  */
720 static int	exec_linux_imgact_try(struct image_params *iparams);
721 
722 static int
723 exec_linux_imgact_try(imgp)
724     struct image_params *imgp;
725 {
726     const char *head = (const char *)imgp->image_header;
727     int error = -1;
728 
729     /*
730      * The interpreter for shell scripts run from a linux binary needs
731      * to be located in /compat/linux if possible in order to recursively
732      * maintain linux path emulation.
733      */
734     if (((const short *)head)[0] == SHELLMAGIC) {
735 	    /*
736 	     * Run our normal shell image activator.  If it succeeds attempt
737 	     * to use the alternate path for the interpreter.  If an alternate
738 	     * path is found, use our stringspace to store it.
739 	     */
740 	    if ((error = exec_shell_imgact(imgp)) == 0) {
741 		    char *rpath = NULL;
742 
743 		    linux_emul_find(FIRST_THREAD_IN_PROC(imgp->proc), NULL,
744 			imgp->interpreter_name, &rpath, 0);
745 		    if (rpath != imgp->interpreter_name) {
746 			    int len = strlen(rpath) + 1;
747 
748 			    if (len <= MAXSHELLCMDLEN) {
749 				    memcpy(imgp->interpreter_name, rpath, len);
750 			    }
751 			    free(rpath, M_TEMP);
752 		    }
753 	    }
754     }
755     return(error);
756 }
757 
758 struct sysentvec linux_sysvec = {
759 	LINUX_SYS_MAXSYSCALL,
760 	linux_sysent,
761 	0xff,
762 	LINUX_SIGTBLSZ,
763 	bsd_to_linux_signal,
764 	ELAST + 1,
765 	bsd_to_linux_errno,
766 	translate_traps,
767 	linux_fixup,
768 	linux_sendsig,
769 	linux_sigcode,
770 	&linux_szsigcode,
771 	linux_prepsyscall,
772 	"Linux a.out",
773 	aout_coredump,
774 	exec_linux_imgact_try,
775 	LINUX_MINSIGSTKSZ
776 };
777 
778 struct sysentvec elf_linux_sysvec = {
779 	LINUX_SYS_MAXSYSCALL,
780 	linux_sysent,
781 	0xff,
782 	LINUX_SIGTBLSZ,
783 	bsd_to_linux_signal,
784 	ELAST + 1,
785 	bsd_to_linux_errno,
786 	translate_traps,
787 	elf_linux_fixup,
788 	linux_sendsig,
789 	linux_sigcode,
790 	&linux_szsigcode,
791 	linux_prepsyscall,
792 	"Linux ELF",
793 	elf_coredump,
794 	exec_linux_imgact_try,
795 	LINUX_MINSIGSTKSZ
796 };
797 
798 static Elf32_Brandinfo linux_brand = {
799 					ELFOSABI_LINUX,
800 					"Linux",
801 					"/compat/linux",
802 					"/lib/ld-linux.so.1",
803 					&elf_linux_sysvec
804 				 };
805 
806 static Elf32_Brandinfo linux_glibc2brand = {
807 					ELFOSABI_LINUX,
808 					"Linux",
809 					"/compat/linux",
810 					"/lib/ld-linux.so.2",
811 					&elf_linux_sysvec
812 				 };
813 
814 Elf32_Brandinfo *linux_brandlist[] = {
815 					&linux_brand,
816 					&linux_glibc2brand,
817 					NULL
818 				};
819 
820 static int
821 linux_elf_modevent(module_t mod, int type, void *data)
822 {
823 	Elf32_Brandinfo **brandinfo;
824 	int error;
825 	struct linux_ioctl_handler **lihp;
826 
827 	error = 0;
828 
829 	switch(type) {
830 	case MOD_LOAD:
831 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
832 		     ++brandinfo)
833 			if (elf_insert_brand_entry(*brandinfo) < 0)
834 				error = EINVAL;
835 		if (error == 0) {
836 			SET_FOREACH(lihp, linux_ioctl_handler_set)
837 				linux_ioctl_register_handler(*lihp);
838 			if (bootverbose)
839 				printf("Linux ELF exec handler installed\n");
840 		} else
841 			printf("cannot insert Linux ELF brand handler\n");
842 		break;
843 	case MOD_UNLOAD:
844 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
845 		     ++brandinfo)
846 			if (elf_brand_inuse(*brandinfo))
847 				error = EBUSY;
848 		if (error == 0) {
849 			for (brandinfo = &linux_brandlist[0];
850 			     *brandinfo != NULL; ++brandinfo)
851 				if (elf_remove_brand_entry(*brandinfo) < 0)
852 					error = EINVAL;
853 		}
854 		if (error == 0) {
855 			SET_FOREACH(lihp, linux_ioctl_handler_set)
856 				linux_ioctl_unregister_handler(*lihp);
857 			if (bootverbose)
858 				printf("Linux ELF exec handler removed\n");
859 		} else
860 			printf("Could not deinstall ELF interpreter entry\n");
861 		break;
862 	default:
863 		break;
864 	}
865 	return error;
866 }
867 
868 static moduledata_t linux_elf_mod = {
869 	"linuxelf",
870 	linux_elf_modevent,
871 	0
872 };
873 
874 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
875