xref: /freebsd/sys/i386/linux/linux_sysvec.c (revision 3a51f88a2721f4cf7aded2a63f20925f8d700a7c)
1 /*-
2  * Copyright (c) 1994-1996 S�ren Schmidt
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software withough specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30 
31 /* XXX we use functions that might not exist. */
32 #include "opt_compat.h"
33 
34 #ifndef COMPAT_43
35 #error "Unable to compile Linux-emulator due to missing COMPAT_43 option!"
36 #endif
37 
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/imgact.h>
41 #include <sys/imgact_aout.h>
42 #include <sys/imgact_elf.h>
43 #include <sys/malloc.h>
44 #include <sys/proc.h>
45 #include <sys/signalvar.h>
46 #include <sys/sysent.h>
47 #include <sys/sysproto.h>
48 
49 #include <vm/vm.h>
50 #include <vm/vm_param.h>
51 #include <vm/vm_page.h>
52 #include <vm/vm_extern.h>
53 #include <sys/exec.h>
54 #include <sys/kernel.h>
55 #include <sys/module.h>
56 #include <machine/cpu.h>
57 #include <sys/lock.h>
58 #include <sys/mutex.h>
59 
60 #include <i386/linux/linux.h>
61 #include <i386/linux/linux_proto.h>
62 #include <compat/linux/linux_signal.h>
63 #include <compat/linux/linux_util.h>
64 
65 MODULE_VERSION(linux, 1);
66 MODULE_DEPEND(linux, sysvmsg, 1, 1, 1);
67 MODULE_DEPEND(linux, sysvsem, 1, 1, 1);
68 MODULE_DEPEND(linux, sysvshm, 1, 1, 1);
69 
70 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
71 
72 #if BYTE_ORDER == LITTLE_ENDIAN
73 #define SHELLMAGIC      0x2123 /* #! */
74 #else
75 #define SHELLMAGIC      0x2321
76 #endif
77 
78 extern char linux_sigcode[];
79 extern int linux_szsigcode;
80 
81 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
82 
83 extern struct linker_set linux_ioctl_handler_set;
84 
85 static int	linux_fixup __P((register_t **stack_base,
86 				 struct image_params *iparams));
87 static int	elf_linux_fixup __P((register_t **stack_base,
88 				     struct image_params *iparams));
89 static void	linux_prepsyscall __P((struct trapframe *tf, int *args,
90 				       u_int *code, caddr_t *params));
91 static void     linux_sendsig __P((sig_t catcher, int sig, sigset_t *mask,
92 				   u_long code));
93 
94 /*
95  * Linux syscalls return negative errno's, we do positive and map them
96  */
97 static int bsd_to_linux_errno[ELAST + 1] = {
98   	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
99  	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
100  	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
101  	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
102  	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
103 	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
104 	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
105 	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
106   	-6, -6, -43, -42, -75, -6, -84
107 };
108 
109 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
110 	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
111 	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
112 	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, 0,
113 	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
114 	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
115 	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
116 	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
117 	0, LINUX_SIGUSR1, LINUX_SIGUSR2
118 };
119 
120 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
121 	SIGHUP, SIGINT, SIGQUIT, SIGILL,
122 	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
123 	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
124 	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
125 	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
126 	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
127 	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
128 	SIGIO, SIGURG, 0
129 };
130 
131 /*
132  * If FreeBSD & Linux have a difference of opinion about what a trap
133  * means, deal with it here.
134  */
135 static int
136 translate_traps(int signal, int trap_code)
137 {
138 	if (signal != SIGBUS)
139 		return signal;
140 	switch (trap_code) {
141 	case T_PROTFLT:
142 	case T_TSSFLT:
143 	case T_DOUBLEFLT:
144 	case T_PAGEFLT:
145 		return SIGSEGV;
146 	default:
147 		return signal;
148 	}
149 }
150 
151 static int
152 linux_fixup(register_t **stack_base, struct image_params *imgp)
153 {
154 	register_t *argv, *envp;
155 
156 	argv = *stack_base;
157 	envp = *stack_base + (imgp->argc + 1);
158 	(*stack_base)--;
159 	**stack_base = (intptr_t)(void *)envp;
160 	(*stack_base)--;
161 	**stack_base = (intptr_t)(void *)argv;
162 	(*stack_base)--;
163 	**stack_base = imgp->argc;
164 	return 0;
165 }
166 
167 static int
168 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
169 {
170 	Elf32_Auxargs *args = (Elf32_Auxargs *)imgp->auxargs;
171 	register_t *pos;
172 
173 	pos = *stack_base + (imgp->argc + imgp->envc + 2);
174 
175 	if (args->trace) {
176 		AUXARGS_ENTRY(pos, AT_DEBUG, 1);
177 	}
178 	if (args->execfd != -1) {
179 		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
180 	}
181 	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
182 	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
183 	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
184 	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
185 	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
186 	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
187 	AUXARGS_ENTRY(pos, AT_BASE, args->base);
188 	PROC_LOCK(imgp->proc);
189 	AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_cred->p_ruid);
190 	AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_cred->p_svuid);
191 	AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_cred->p_rgid);
192 	AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_cred->p_svgid);
193 	PROC_UNLOCK(imgp->proc);
194 	AUXARGS_ENTRY(pos, AT_NULL, 0);
195 
196 	free(imgp->auxargs, M_TEMP);
197 	imgp->auxargs = NULL;
198 
199 	(*stack_base)--;
200 	**stack_base = (long)imgp->argc;
201 	return 0;
202 }
203 
204 extern int _ucodesel, _udatasel;
205 extern unsigned long linux_sznonrtsigcode;
206 
207 static void
208 linux_rt_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
209 {
210 	register struct proc *p = curproc;
211 	register struct trapframe *regs;
212 	struct linux_rt_sigframe *fp, frame;
213 	int oonstack;
214 
215 	regs = p->p_md.md_regs;
216 	oonstack = sigonstack(regs->tf_esp);
217 
218 #ifdef DEBUG
219 	if (ldebug(sigreturn))
220 		printf(ARGS(rt_sendsig, "%p, %d, %p, %lu"),
221 		    catcher, sig, (void*)mask, code);
222 #endif
223 	/*
224 	 * Allocate space for the signal handler context.
225 	 */
226 	PROC_LOCK(p);
227 	if ((p->p_flag & P_ALTSTACK) && !oonstack &&
228 	    SIGISMEMBER(p->p_sigacts->ps_sigonstack, sig)) {
229 		fp = (struct linux_rt_sigframe *)(p->p_sigstk.ss_sp +
230 		    p->p_sigstk.ss_size - sizeof(struct linux_rt_sigframe));
231 	} else
232 		fp = (struct linux_rt_sigframe *)regs->tf_esp - 1;
233 	PROC_UNLOCK(p);
234 
235 	/*
236 	 * grow() will return FALSE if the fp will not fit inside the stack
237 	 *	and the stack can not be grown. useracc will return FALSE
238 	 *	if access is denied.
239 	 */
240 	if ((grow_stack (p, (int)fp) == FALSE) ||
241 	    !useracc((caddr_t)fp, sizeof (struct linux_rt_sigframe),
242 	    VM_PROT_WRITE)) {
243 		/*
244 		 * Process has trashed its stack; give it an illegal
245 		 * instruction to halt it in its tracks.
246 		 */
247 		PROC_LOCK(p);
248 		SIGACTION(p, SIGILL) = SIG_DFL;
249 		SIGDELSET(p->p_sigignore, SIGILL);
250 		SIGDELSET(p->p_sigcatch, SIGILL);
251 		SIGDELSET(p->p_sigmask, SIGILL);
252 #ifdef DEBUG
253 		if (ldebug(sigreturn))
254 			printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"),
255 			    fp, oonstack);
256 #endif
257 		psignal(p, SIGILL);
258 		PROC_UNLOCK(p);
259 		return;
260 	}
261 
262 	/*
263 	 * Build the argument list for the signal handler.
264 	 */
265 	if (p->p_sysent->sv_sigtbl)
266 		if (sig <= p->p_sysent->sv_sigsize)
267 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
268 
269 	frame.sf_handler = catcher;
270 	frame.sf_sig = sig;
271 	frame.sf_siginfo = &fp->sf_si;
272 	frame.sf_ucontext = &fp->sf_sc;
273 
274 	/* Fill siginfo structure. */
275 	frame.sf_si.lsi_signo = sig;
276 	frame.sf_si.lsi_code = code;
277 	frame.sf_si.lsi_addr = (void *)regs->tf_err;
278 
279 	/*
280 	 * Build the signal context to be used by sigreturn.
281 	 */
282 	frame.sf_sc.uc_flags = 0;		/* XXX ??? */
283 	frame.sf_sc.uc_link = NULL;		/* XXX ??? */
284 
285 	PROC_LOCK(p);
286 	frame.sf_sc.uc_stack.ss_sp = p->p_sigstk.ss_sp;
287 	frame.sf_sc.uc_stack.ss_size = p->p_sigstk.ss_size;
288 	frame.sf_sc.uc_stack.ss_flags = (p->p_flag & P_ALTSTACK)
289 	    ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
290 	PROC_UNLOCK(p);
291 
292 	bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask);
293 
294 	frame.sf_sc.uc_mcontext.sc_mask   = frame.sf_sc.uc_sigmask.__bits[0];
295 	frame.sf_sc.uc_mcontext.sc_gs     = rgs();
296 	frame.sf_sc.uc_mcontext.sc_fs     = regs->tf_fs;
297 	frame.sf_sc.uc_mcontext.sc_es     = regs->tf_es;
298 	frame.sf_sc.uc_mcontext.sc_ds     = regs->tf_ds;
299 	frame.sf_sc.uc_mcontext.sc_edi    = regs->tf_edi;
300 	frame.sf_sc.uc_mcontext.sc_esi    = regs->tf_esi;
301 	frame.sf_sc.uc_mcontext.sc_ebp    = regs->tf_ebp;
302 	frame.sf_sc.uc_mcontext.sc_ebx    = regs->tf_ebx;
303 	frame.sf_sc.uc_mcontext.sc_edx    = regs->tf_edx;
304 	frame.sf_sc.uc_mcontext.sc_ecx    = regs->tf_ecx;
305 	frame.sf_sc.uc_mcontext.sc_eax    = regs->tf_eax;
306 	frame.sf_sc.uc_mcontext.sc_eip    = regs->tf_eip;
307 	frame.sf_sc.uc_mcontext.sc_cs     = regs->tf_cs;
308 	frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags;
309 	frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
310 	frame.sf_sc.uc_mcontext.sc_ss     = regs->tf_ss;
311 	frame.sf_sc.uc_mcontext.sc_err    = regs->tf_err;
312 	frame.sf_sc.uc_mcontext.sc_trapno = code;	/* XXX ???? */
313 
314 #ifdef DEBUG
315 	if (ldebug(sigreturn))
316 		printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
317 		    frame.sf_sc.uc_stack.ss_flags, p->p_sigstk.ss_sp,
318 		    p->p_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask);
319 #endif
320 
321 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
322 		/*
323 		 * Process has trashed its stack; give it an illegal
324 		 * instruction to halt it in its tracks.
325 		 */
326 		PROC_LOCK(p);
327 		sigexit(p, SIGILL);
328 		/* NOTREACHED */
329 	}
330 
331 	/*
332 	 * Build context to run handler in.
333 	 */
334 	regs->tf_esp = (int)fp;
335 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) +
336 	    linux_sznonrtsigcode;
337 	regs->tf_eflags &= ~PSL_VM;
338 	regs->tf_cs = _ucodesel;
339 	regs->tf_ds = _udatasel;
340 	regs->tf_es = _udatasel;
341 	regs->tf_fs = _udatasel;
342 	load_gs(_udatasel);
343 	regs->tf_ss = _udatasel;
344 }
345 
346 
347 /*
348  * Send an interrupt to process.
349  *
350  * Stack is set up to allow sigcode stored
351  * in u. to call routine, followed by kcall
352  * to sigreturn routine below.  After sigreturn
353  * resets the signal mask, the stack, and the
354  * frame pointer, it returns to the user
355  * specified pc, psl.
356  */
357 
358 static void
359 linux_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
360 {
361 	register struct proc *p = curproc;
362 	register struct trapframe *regs;
363 	struct linux_sigframe *fp, frame;
364 	linux_sigset_t lmask;
365 	int oonstack, i;
366 
367 	if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) {
368 		/* Signal handler installed with SA_SIGINFO. */
369 		linux_rt_sendsig(catcher, sig, mask, code);
370 		return;
371 	}
372 
373 	regs = p->p_md.md_regs;
374 	oonstack = sigonstack(regs->tf_esp);
375 
376 #ifdef DEBUG
377 	if (ldebug(sigreturn))
378 		printf(ARGS(sendsig, "%p, %d, %p, %lu"),
379 		    catcher, sig, (void*)mask, code);
380 #endif
381 
382 	/*
383 	 * Allocate space for the signal handler context.
384 	 */
385 	PROC_LOCK(p);
386 	if ((p->p_flag & P_ALTSTACK) && !oonstack &&
387 	    SIGISMEMBER(p->p_sigacts->ps_sigonstack, sig)) {
388 		fp = (struct linux_sigframe *)(p->p_sigstk.ss_sp +
389 		    p->p_sigstk.ss_size - sizeof(struct linux_sigframe));
390 	} else
391 		fp = (struct linux_sigframe *)regs->tf_esp - 1;
392 	PROC_UNLOCK(p);
393 
394 	/*
395 	 * grow() will return FALSE if the fp will not fit inside the stack
396 	 *	and the stack can not be grown. useracc will return FALSE
397 	 *	if access is denied.
398 	 */
399 	if ((grow_stack (p, (int)fp) == FALSE) ||
400 	    !useracc((caddr_t)fp, sizeof (struct linux_sigframe),
401 	    VM_PROT_WRITE)) {
402 		/*
403 		 * Process has trashed its stack; give it an illegal
404 		 * instruction to halt it in its tracks.
405 		 */
406 		PROC_LOCK(p);
407 		SIGACTION(p, SIGILL) = SIG_DFL;
408 		SIGDELSET(p->p_sigignore, SIGILL);
409 		SIGDELSET(p->p_sigcatch, SIGILL);
410 		SIGDELSET(p->p_sigmask, SIGILL);
411 		psignal(p, SIGILL);
412 		PROC_UNLOCK(p);
413 		return;
414 	}
415 
416 	/*
417 	 * Build the argument list for the signal handler.
418 	 */
419 	if (p->p_sysent->sv_sigtbl)
420 		if (sig <= p->p_sysent->sv_sigsize)
421 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
422 
423 	frame.sf_handler = catcher;
424 	frame.sf_sig = sig;
425 
426 	bsd_to_linux_sigset(mask, &lmask);
427 
428 	/*
429 	 * Build the signal context to be used by sigreturn.
430 	 */
431 	frame.sf_sc.sc_mask   = lmask.__bits[0];
432 	frame.sf_sc.sc_gs     = rgs();
433 	frame.sf_sc.sc_fs     = regs->tf_fs;
434 	frame.sf_sc.sc_es     = regs->tf_es;
435 	frame.sf_sc.sc_ds     = regs->tf_ds;
436 	frame.sf_sc.sc_edi    = regs->tf_edi;
437 	frame.sf_sc.sc_esi    = regs->tf_esi;
438 	frame.sf_sc.sc_ebp    = regs->tf_ebp;
439 	frame.sf_sc.sc_ebx    = regs->tf_ebx;
440 	frame.sf_sc.sc_edx    = regs->tf_edx;
441 	frame.sf_sc.sc_ecx    = regs->tf_ecx;
442 	frame.sf_sc.sc_eax    = regs->tf_eax;
443 	frame.sf_sc.sc_eip    = regs->tf_eip;
444 	frame.sf_sc.sc_cs     = regs->tf_cs;
445 	frame.sf_sc.sc_eflags = regs->tf_eflags;
446 	frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
447 	frame.sf_sc.sc_ss     = regs->tf_ss;
448 	frame.sf_sc.sc_err    = regs->tf_err;
449 	frame.sf_sc.sc_trapno = code;	/* XXX ???? */
450 
451 	bzero(&frame.sf_fpstate, sizeof(struct linux_fpstate));
452 
453 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
454 		frame.sf_extramask[i] = lmask.__bits[i+1];
455 
456 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
457 		/*
458 		 * Process has trashed its stack; give it an illegal
459 		 * instruction to halt it in its tracks.
460 		 */
461 		PROC_LOCK(p);
462 		sigexit(p, SIGILL);
463 		/* NOTREACHED */
464 	}
465 
466 	/*
467 	 * Build context to run handler in.
468 	 */
469 	regs->tf_esp = (int)fp;
470 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
471 	regs->tf_eflags &= ~PSL_VM;
472 	regs->tf_cs = _ucodesel;
473 	regs->tf_ds = _udatasel;
474 	regs->tf_es = _udatasel;
475 	regs->tf_fs = _udatasel;
476 	load_gs(_udatasel);
477 	regs->tf_ss = _udatasel;
478 }
479 
480 /*
481  * System call to cleanup state after a signal
482  * has been taken.  Reset signal mask and
483  * stack state from context left by sendsig (above).
484  * Return to previous pc and psl as specified by
485  * context left by sendsig. Check carefully to
486  * make sure that the user has not modified the
487  * psl to gain improper privileges or to cause
488  * a machine fault.
489  */
490 int
491 linux_sigreturn(p, args)
492 	struct proc *p;
493 	struct linux_sigreturn_args *args;
494 {
495 	struct linux_sigframe frame;
496 	register struct trapframe *regs;
497 	linux_sigset_t lmask;
498 	int eflags, i;
499 
500 	regs = p->p_md.md_regs;
501 
502 #ifdef DEBUG
503 	if (ldebug(sigreturn))
504 		printf(ARGS(sigreturn, "%p"), (void *)args->sfp);
505 #endif
506 	/*
507 	 * The trampoline code hands us the sigframe.
508 	 * It is unsafe to keep track of it ourselves, in the event that a
509 	 * program jumps out of a signal handler.
510 	 */
511 	if (copyin((caddr_t)args->sfp, &frame, sizeof(frame)) != 0)
512 		return (EFAULT);
513 
514 	/*
515 	 * Check for security violations.
516 	 */
517 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
518 	eflags = frame.sf_sc.sc_eflags;
519 	/*
520 	 * XXX do allow users to change the privileged flag PSL_RF.  The
521 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
522 	 * sometimes set it there too.  tf_eflags is kept in the signal
523 	 * context during signal handling and there is no other place
524 	 * to remember it, so the PSL_RF bit may be corrupted by the
525 	 * signal handler without us knowing.  Corruption of the PSL_RF
526 	 * bit at worst causes one more or one less debugger trap, so
527 	 * allowing it is fairly harmless.
528 	 */
529 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
530     		return(EINVAL);
531 	}
532 
533 	/*
534 	 * Don't allow users to load a valid privileged %cs.  Let the
535 	 * hardware check for invalid selectors, excess privilege in
536 	 * other selectors, invalid %eip's and invalid %esp's.
537 	 */
538 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
539 	if (!CS_SECURE(frame.sf_sc.sc_cs)) {
540 		trapsignal(p, SIGBUS, T_PROTFLT);
541 		return(EINVAL);
542 	}
543 
544 	lmask.__bits[0] = frame.sf_sc.sc_mask;
545 	for (i = 0; i < (LINUX_NSIG_WORDS-1); i++)
546 		lmask.__bits[i+1] = frame.sf_extramask[i];
547 	PROC_LOCK(p);
548 	linux_to_bsd_sigset(&lmask, &p->p_sigmask);
549 	SIG_CANTMASK(p->p_sigmask);
550 	PROC_UNLOCK(p);
551 
552 	/*
553 	 * Restore signal context.
554 	 */
555 	/* %gs was restored by the trampoline. */
556 	regs->tf_fs     = frame.sf_sc.sc_fs;
557 	regs->tf_es     = frame.sf_sc.sc_es;
558 	regs->tf_ds     = frame.sf_sc.sc_ds;
559 	regs->tf_edi    = frame.sf_sc.sc_edi;
560 	regs->tf_esi    = frame.sf_sc.sc_esi;
561 	regs->tf_ebp    = frame.sf_sc.sc_ebp;
562 	regs->tf_ebx    = frame.sf_sc.sc_ebx;
563 	regs->tf_edx    = frame.sf_sc.sc_edx;
564 	regs->tf_ecx    = frame.sf_sc.sc_ecx;
565 	regs->tf_eax    = frame.sf_sc.sc_eax;
566 	regs->tf_eip    = frame.sf_sc.sc_eip;
567 	regs->tf_cs     = frame.sf_sc.sc_cs;
568 	regs->tf_eflags = eflags;
569 	regs->tf_esp    = frame.sf_sc.sc_esp_at_signal;
570 	regs->tf_ss     = frame.sf_sc.sc_ss;
571 
572 	return (EJUSTRETURN);
573 }
574 
575 /*
576  * System call to cleanup state after a signal
577  * has been taken.  Reset signal mask and
578  * stack state from context left by rt_sendsig (above).
579  * Return to previous pc and psl as specified by
580  * context left by sendsig. Check carefully to
581  * make sure that the user has not modified the
582  * psl to gain improper privileges or to cause
583  * a machine fault.
584  */
585 int
586 linux_rt_sigreturn(p, args)
587 	struct proc *p;
588 	struct linux_rt_sigreturn_args *args;
589 {
590 	struct sigaltstack_args sasargs;
591 	struct linux_ucontext 	 uc;
592 	struct linux_sigcontext *context;
593 	linux_stack_t *lss;
594 	stack_t *ss;
595 	register struct trapframe *regs;
596 	int eflags;
597 	caddr_t sg = stackgap_init();
598 
599 	regs = p->p_md.md_regs;
600 
601 #ifdef DEBUG
602 	if (ldebug(rt_sigreturn))
603 		printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp);
604 #endif
605 	/*
606 	 * The trampoline code hands us the ucontext.
607 	 * It is unsafe to keep track of it ourselves, in the event that a
608 	 * program jumps out of a signal handler.
609 	 */
610 	if (copyin((caddr_t)args->ucp, &uc, sizeof(uc)) != 0)
611 		return (EFAULT);
612 
613 	context = &uc.uc_mcontext;
614 
615 	/*
616 	 * Check for security violations.
617 	 */
618 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
619 	eflags = context->sc_eflags;
620 	/*
621 	 * XXX do allow users to change the privileged flag PSL_RF.  The
622 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
623 	 * sometimes set it there too.  tf_eflags is kept in the signal
624 	 * context during signal handling and there is no other place
625 	 * to remember it, so the PSL_RF bit may be corrupted by the
626 	 * signal handler without us knowing.  Corruption of the PSL_RF
627 	 * bit at worst causes one more or one less debugger trap, so
628 	 * allowing it is fairly harmless.
629 	 */
630 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
631     		return(EINVAL);
632 	}
633 
634 	/*
635 	 * Don't allow users to load a valid privileged %cs.  Let the
636 	 * hardware check for invalid selectors, excess privilege in
637 	 * other selectors, invalid %eip's and invalid %esp's.
638 	 */
639 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
640 	if (!CS_SECURE(context->sc_cs)) {
641 		trapsignal(p, SIGBUS, T_PROTFLT);
642 		return(EINVAL);
643 	}
644 
645 	PROC_LOCK(p);
646 	linux_to_bsd_sigset(&uc.uc_sigmask, &p->p_sigmask);
647 	SIG_CANTMASK(p->p_sigmask);
648 	PROC_UNLOCK(p);
649 
650 	/*
651 	 * Restore signal context
652 	 */
653 	/* %gs was restored by the trampoline. */
654 	regs->tf_fs     = context->sc_fs;
655 	regs->tf_es     = context->sc_es;
656 	regs->tf_ds     = context->sc_ds;
657 	regs->tf_edi    = context->sc_edi;
658 	regs->tf_esi    = context->sc_esi;
659 	regs->tf_ebp    = context->sc_ebp;
660 	regs->tf_ebx    = context->sc_ebx;
661 	regs->tf_edx    = context->sc_edx;
662 	regs->tf_ecx    = context->sc_ecx;
663 	regs->tf_eax    = context->sc_eax;
664 	regs->tf_eip    = context->sc_eip;
665 	regs->tf_cs     = context->sc_cs;
666 	regs->tf_eflags = eflags;
667 	regs->tf_esp    = context->sc_esp_at_signal;
668 	regs->tf_ss     = context->sc_ss;
669 
670 	/*
671 	 * call sigaltstack & ignore results..
672 	 */
673 	ss = stackgap_alloc(&sg, sizeof(stack_t));
674 	lss = &uc.uc_stack;
675 	ss->ss_sp = lss->ss_sp;
676 	ss->ss_size = lss->ss_size;
677 	ss->ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
678 
679 #ifdef DEBUG
680 	if (ldebug(rt_sigreturn))
681 		printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"),
682 		    ss->ss_flags, ss->ss_sp, ss->ss_size, context->sc_mask);
683 #endif
684 	sasargs.ss = ss;
685 	sasargs.oss = NULL;
686 	(void) sigaltstack(p, &sasargs);
687 
688 	return (EJUSTRETURN);
689 }
690 
691 static void
692 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
693 {
694 	args[0] = tf->tf_ebx;
695 	args[1] = tf->tf_ecx;
696 	args[2] = tf->tf_edx;
697 	args[3] = tf->tf_esi;
698 	args[4] = tf->tf_edi;
699 	*params = NULL;		/* no copyin */
700 }
701 
702 /*
703  * If a linux binary is exec'ing something, try this image activator
704  * first.  We override standard shell script execution in order to
705  * be able to modify the interpreter path.  We only do this if a linux
706  * binary is doing the exec, so we do not create an EXEC module for it.
707  */
708 static int	exec_linux_imgact_try __P((struct image_params *iparams));
709 
710 static int
711 exec_linux_imgact_try(imgp)
712     struct image_params *imgp;
713 {
714     const char *head = (const char *)imgp->image_header;
715     int error = -1;
716 
717     /*
718      * The interpreter for shell scripts run from a linux binary needs
719      * to be located in /compat/linux if possible in order to recursively
720      * maintain linux path emulation.
721      */
722     if (((const short *)head)[0] == SHELLMAGIC) {
723 	    /*
724 	     * Run our normal shell image activator.  If it succeeds attempt
725 	     * to use the alternate path for the interpreter.  If an alternate
726 	     * path is found, use our stringspace to store it.
727 	     */
728 	    if ((error = exec_shell_imgact(imgp)) == 0) {
729 		    char *rpath = NULL;
730 
731 		    linux_emul_find(imgp->proc, NULL, linux_emul_path,
732 			imgp->interpreter_name, &rpath, 0);
733 		    if (rpath != imgp->interpreter_name) {
734 			    int len = strlen(rpath) + 1;
735 
736 			    if (len <= MAXSHELLCMDLEN) {
737 				memcpy(imgp->interpreter_name, rpath, len);
738 			    }
739 			    free(rpath, M_TEMP);
740 		    }
741 	    }
742     }
743     return(error);
744 }
745 
746 struct sysentvec linux_sysvec = {
747 	LINUX_SYS_MAXSYSCALL,
748 	linux_sysent,
749 	0xff,
750 	LINUX_SIGTBLSZ,
751 	bsd_to_linux_signal,
752 	ELAST + 1,
753 	bsd_to_linux_errno,
754 	translate_traps,
755 	linux_fixup,
756 	linux_sendsig,
757 	linux_sigcode,
758 	&linux_szsigcode,
759 	linux_prepsyscall,
760 	"Linux a.out",
761 	aout_coredump,
762 	exec_linux_imgact_try,
763 	LINUX_MINSIGSTKSZ
764 };
765 
766 struct sysentvec elf_linux_sysvec = {
767 	LINUX_SYS_MAXSYSCALL,
768 	linux_sysent,
769 	0xff,
770 	LINUX_SIGTBLSZ,
771 	bsd_to_linux_signal,
772 	ELAST + 1,
773 	bsd_to_linux_errno,
774 	translate_traps,
775 	elf_linux_fixup,
776 	linux_sendsig,
777 	linux_sigcode,
778 	&linux_szsigcode,
779 	linux_prepsyscall,
780 	"Linux ELF",
781 	elf_coredump,
782 	exec_linux_imgact_try,
783 	LINUX_MINSIGSTKSZ
784 };
785 
786 static Elf32_Brandinfo linux_brand = {
787 					ELFOSABI_LINUX,
788 					"Linux",
789 					"/compat/linux",
790 					"/lib/ld-linux.so.1",
791 					&elf_linux_sysvec
792 				 };
793 
794 static Elf32_Brandinfo linux_glibc2brand = {
795 					ELFOSABI_LINUX,
796 					"Linux",
797 					"/compat/linux",
798 					"/lib/ld-linux.so.2",
799 					&elf_linux_sysvec
800 				 };
801 
802 Elf32_Brandinfo *linux_brandlist[] = {
803 					&linux_brand,
804 					&linux_glibc2brand,
805 					NULL
806 				};
807 
808 static int
809 linux_elf_modevent(module_t mod, int type, void *data)
810 {
811 	Elf32_Brandinfo **brandinfo;
812 	int error;
813 
814 	error = 0;
815 
816 	switch(type) {
817 	case MOD_LOAD:
818 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
819 		     ++brandinfo)
820 			if (elf_insert_brand_entry(*brandinfo) < 0)
821 				error = EINVAL;
822 		if (error == 0) {
823 			linux_ioctl_register_handlers(
824 				&linux_ioctl_handler_set);
825 			if (bootverbose)
826 				printf("Linux ELF exec handler installed\n");
827 		} else
828 			printf("cannot insert Linux ELF brand handler\n");
829 		break;
830 	case MOD_UNLOAD:
831 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
832 		     ++brandinfo)
833 			if (elf_brand_inuse(*brandinfo))
834 				error = EBUSY;
835 		if (error == 0) {
836 			for (brandinfo = &linux_brandlist[0];
837 			     *brandinfo != NULL; ++brandinfo)
838 				if (elf_remove_brand_entry(*brandinfo) < 0)
839 					error = EINVAL;
840 		}
841 		if (error == 0) {
842 			linux_ioctl_unregister_handlers(
843 				&linux_ioctl_handler_set);
844 			if (bootverbose)
845 				printf("Linux ELF exec handler removed\n");
846 		} else
847 			printf("Could not deinstall ELF interpreter entry\n");
848 		break;
849 	default:
850 		break;
851 	}
852 	return error;
853 }
854 
855 static moduledata_t linux_elf_mod = {
856 	"linuxelf",
857 	linux_elf_modevent,
858 	0
859 };
860 
861 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
862