xref: /freebsd/sys/i386/linux/linux_sysvec.c (revision 2aebedc3ad9e722b272254e6dd3a12e399595e57)
1 /*-
2  * Copyright (c) 1994-1996 S�ren Schmidt
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software withough specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  *  $Id: linux_sysvec.c,v 1.41 1998/12/19 02:55:33 julian Exp $
29  */
30 
31 /* XXX we use functions that might not exist. */
32 #include "opt_compat.h"
33 
34 #ifndef COMPAT_43
35 #error "Unable to compile Linux-emulator due to missing COMPAT_43 option!"
36 #endif
37 
38 #include <sys/param.h>
39 #include <sys/buf.h>
40 #include <sys/proc.h>
41 #include <sys/systm.h>
42 #include <sys/sysent.h>
43 #include <sys/imgact.h>
44 #include <sys/imgact_aout.h>
45 #include <sys/imgact_elf.h>
46 #include <sys/signalvar.h>
47 #include <sys/malloc.h>
48 #include <vm/vm.h>
49 #include <vm/vm_param.h>
50 #include <vm/vm_prot.h>
51 #include <vm/vm_page.h>
52 #include <vm/vm_extern.h>
53 #ifdef COMPAT_LINUX_THREADS
54 #include <sys/lock.h>             /* needed, for now, by vm_map.h */
55 #include <vm/vm_map.h>            /* needed, for now, for VM_STACK defines */
56 #endif /* COMPAT_LINUX_THREADS */
57 #include <sys/exec.h>
58 #include <sys/kernel.h>
59 #include <sys/module.h>
60 #include <machine/cpu.h>
61 
62 #include <i386/linux/linux.h>
63 #include <i386/linux/linux_proto.h>
64 
65 static int	linux_fixup __P((long **stack_base,
66 				 struct image_params *iparams));
67 static int	elf_linux_fixup __P((long **stack_base,
68 				     struct image_params *iparams));
69 static void	linux_prepsyscall __P((struct trapframe *tf, int *args,
70 				       u_int *code, caddr_t *params));
71 static void     linux_sendsig __P((sig_t catcher, int sig, int mask,
72 				   u_long code));
73 
74 /*
75  * Linux syscalls return negative errno's, we do positive and map them
76  */
77 static int bsd_to_linux_errno[ELAST + 1] = {
78   	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
79  	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
80  	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
81  	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
82  	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
83 	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
84 	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
85 	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
86   	-6, -6, -43, -42, -75, -6, -84
87 };
88 
89 int bsd_to_linux_signal[NSIG] = {
90 	0, LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT,
91 	LINUX_SIGILL, LINUX_SIGTRAP, LINUX_SIGABRT, 0,
92 	LINUX_SIGFPE, LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV,
93 	0, LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM,
94 	LINUX_SIGURG, LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT,
95 	LINUX_SIGCHLD, LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO,
96 	LINUX_SIGXCPU, LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF,
97 	LINUX_SIGWINCH, 0, LINUX_SIGUSR1, LINUX_SIGUSR2
98 };
99 
100 int linux_to_bsd_signal[LINUX_NSIG] = {
101 	0, SIGHUP, SIGINT, SIGQUIT, SIGILL, SIGTRAP, SIGABRT, SIGBUS,
102 	SIGFPE, SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2, SIGPIPE, SIGALRM, SIGTERM,
103 	SIGBUS, SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP, SIGTTIN, SIGTTOU, SIGURG,
104 	SIGXCPU, SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH, SIGIO, SIGURG, 0
105 };
106 
107 /*
108  * If FreeBSD & Linux have a difference of opinion about what a trap
109  * means, deal with it here.
110  */
111 static int
112 translate_traps(int signal, int trap_code)
113 {
114 	if (signal != SIGBUS)
115 		return signal;
116 	switch (trap_code) {
117 	case T_PROTFLT:
118 	case T_TSSFLT:
119 	case T_DOUBLEFLT:
120 	case T_PAGEFLT:
121 		return SIGSEGV;
122 	default:
123 		return signal;
124 	}
125 }
126 
127 static int
128 linux_fixup(long **stack_base, struct image_params *imgp)
129 {
130 	long *argv, *envp;
131 
132 	argv = *stack_base;
133 	envp = *stack_base + (imgp->argc + 1);
134 	(*stack_base)--;
135 	**stack_base = (intptr_t)(void *)envp;
136 	(*stack_base)--;
137 	**stack_base = (intptr_t)(void *)argv;
138 	(*stack_base)--;
139 	**stack_base = imgp->argc;
140 	return 0;
141 }
142 
143 static int
144 elf_linux_fixup(long **stack_base, struct image_params *imgp)
145 {
146 	Elf32_Auxargs *args = (Elf32_Auxargs *)imgp->auxargs;
147 	long *pos;
148 
149 	pos = *stack_base + (imgp->argc + imgp->envc + 2);
150 
151 	if (args->trace) {
152 		AUXARGS_ENTRY(pos, AT_DEBUG, 1);
153 	}
154 	if (args->execfd != -1) {
155 		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
156 	}
157 	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
158 	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
159 	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
160 	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
161 	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
162 	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
163 	AUXARGS_ENTRY(pos, AT_BASE, args->base);
164 	AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_cred->p_ruid);
165 	AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_cred->p_svuid);
166 	AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_cred->p_rgid);
167 	AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_cred->p_svgid);
168 	AUXARGS_ENTRY(pos, AT_NULL, 0);
169 
170 	free(imgp->auxargs, M_TEMP);
171 	imgp->auxargs = NULL;
172 
173 	(*stack_base)--;
174 	**stack_base = (long)imgp->argc;
175 	return 0;
176 }
177 
178 extern int _ucodesel, _udatasel;
179 
180 /*
181  * Send an interrupt to process.
182  *
183  * Stack is set up to allow sigcode stored
184  * in u. to call routine, followed by kcall
185  * to sigreturn routine below.  After sigreturn
186  * resets the signal mask, the stack, and the
187  * frame pointer, it returns to the user
188  * specified pc, psl.
189  */
190 
191 static void
192 linux_sendsig(sig_t catcher, int sig, int mask, u_long code)
193 {
194 	register struct proc *p = curproc;
195 	register struct trapframe *regs;
196 	struct linux_sigframe *fp, frame;
197 	struct sigacts *psp = p->p_sigacts;
198 	int oonstack;
199 
200 	regs = p->p_md.md_regs;
201 	oonstack = psp->ps_sigstk.ss_flags & SS_ONSTACK;
202 
203 #ifdef DEBUG
204 	printf("Linux-emul(%ld): linux_sendsig(%p, %d, %d, %lu)\n",
205 	    (long)p->p_pid, catcher, sig, mask, code);
206 #endif
207 	/*
208 	 * Allocate space for the signal handler context.
209 	 */
210 	if ((psp->ps_flags & SAS_ALTSTACK) && !oonstack &&
211 	    (psp->ps_sigonstack & sigmask(sig))) {
212 		fp = (struct linux_sigframe *)(psp->ps_sigstk.ss_sp +
213 		    psp->ps_sigstk.ss_size - sizeof(struct linux_sigframe));
214 		psp->ps_sigstk.ss_flags |= SS_ONSTACK;
215 	} else {
216 		fp = (struct linux_sigframe *)regs->tf_esp - 1;
217 	}
218 
219 	/*
220 	 * grow() will return FALSE if the fp will not fit inside the stack
221 	 *	and the stack can not be grown. useracc will return FALSE
222 	 *	if access is denied.
223 	 */
224 #ifdef COMPAT_LINUX_THREADS
225 #ifdef USE_VM_STACK
226 #ifndef USE_VM_STACK_FOR_EXEC
227 	if ((((caddr_t)fp > p->p_vmspace->vm_maxsaddr &&
228 	      (caddr_t)fp < (caddr_t)USRSTACK &&
229 	      grow(p, (int)fp) == FALSE) ||
230 	     (((caddr_t)fp <= p->p_vmspace->vm_maxsaddr ||
231 	       (caddr_t)fp >= (caddr_t)USRSTACK) &&
232 	      grow_stack (p, (int)fp) == FALSE)) ||
233 #else
234 	if ((grow_stack (p, (int)fp) == FALSE) ||
235 #endif /* USE_VM_STACK_FOR_EXEC */
236 #else
237 	if ((grow(p, (int)fp) == FALSE) ||
238 #endif /* USE_VM_STACK */
239 #else
240 	if ((grow(p, (int)fp) == FALSE) ||
241 #endif /* COMPAT_LINUX_THREADS */
242 	    (useracc((caddr_t)fp, sizeof (struct linux_sigframe), B_WRITE) == FALSE)) {
243 		/*
244 		 * Process has trashed its stack; give it an illegal
245 		 * instruction to halt it in its tracks.
246 		 */
247 		SIGACTION(p, SIGILL) = SIG_DFL;
248 		sig = sigmask(SIGILL);
249 		p->p_sigignore &= ~sig;
250 		p->p_sigcatch &= ~sig;
251 		p->p_sigmask &= ~sig;
252 		psignal(p, SIGILL);
253 		return;
254 	}
255 
256 	/*
257 	 * Build the argument list for the signal handler.
258 	 */
259 	if (p->p_sysent->sv_sigtbl) {
260 		if (sig < p->p_sysent->sv_sigsize)
261 			sig = p->p_sysent->sv_sigtbl[sig];
262 		else
263 			sig = p->p_sysent->sv_sigsize + 1;
264 	}
265 
266 	frame.sf_handler = catcher;
267 	frame.sf_sig = sig;
268 
269 	/*
270 	 * Build the signal context to be used by sigreturn.
271 	 */
272 	frame.sf_sc.sc_mask   = mask;
273 	__asm("movl %%gs,%w0" : "=r" (frame.sf_sc.sc_gs));
274 	__asm("movl %%fs,%w0" : "=r" (frame.sf_sc.sc_fs));
275 	frame.sf_sc.sc_es     = regs->tf_es;
276 	frame.sf_sc.sc_ds     = regs->tf_ds;
277 	frame.sf_sc.sc_edi    = regs->tf_edi;
278 	frame.sf_sc.sc_esi    = regs->tf_esi;
279 	frame.sf_sc.sc_ebp    = regs->tf_ebp;
280 	frame.sf_sc.sc_ebx    = regs->tf_ebx;
281 	frame.sf_sc.sc_edx    = regs->tf_edx;
282 	frame.sf_sc.sc_ecx    = regs->tf_ecx;
283 	frame.sf_sc.sc_eax    = regs->tf_eax;
284 	frame.sf_sc.sc_eip    = regs->tf_eip;
285 	frame.sf_sc.sc_cs     = regs->tf_cs;
286 	frame.sf_sc.sc_eflags = regs->tf_eflags;
287 	frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
288 	frame.sf_sc.sc_ss     = regs->tf_ss;
289 	frame.sf_sc.sc_err    = regs->tf_err;
290 	frame.sf_sc.sc_trapno = code;	/* XXX ???? */
291 
292 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
293 		/*
294 		 * Process has trashed its stack; give it an illegal
295 		 * instruction to halt it in its tracks.
296 		 */
297 		sigexit(p, SIGILL);
298 		/* NOTREACHED */
299 	}
300 
301 	/*
302 	 * Build context to run handler in.
303 	 */
304 	regs->tf_esp = (int)fp;
305 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
306 	regs->tf_eflags &= ~PSL_VM;
307 	regs->tf_cs = _ucodesel;
308 	regs->tf_ds = _udatasel;
309 	regs->tf_es = _udatasel;
310 	regs->tf_ss = _udatasel;
311 }
312 
313 /*
314  * System call to cleanup state after a signal
315  * has been taken.  Reset signal mask and
316  * stack state from context left by sendsig (above).
317  * Return to previous pc and psl as specified by
318  * context left by sendsig. Check carefully to
319  * make sure that the user has not modified the
320  * psl to gain improper privileges or to cause
321  * a machine fault.
322  */
323 int
324 linux_sigreturn(p, args)
325 	struct proc *p;
326 	struct linux_sigreturn_args *args;
327 {
328 	struct linux_sigcontext *scp, context;
329 	register struct trapframe *regs;
330 	int eflags;
331 
332 	regs = p->p_md.md_regs;
333 
334 #ifdef DEBUG
335 	printf("Linux-emul(%ld): linux_sigreturn(%p)\n",
336 	    (long)p->p_pid, (void *)args->scp);
337 #endif
338 	/*
339 	 * The trampoline code hands us the context.
340 	 * It is unsafe to keep track of it ourselves, in the event that a
341 	 * program jumps out of a signal handler.
342 	 */
343 	scp = SCARG(args,scp);
344 	if (copyin((caddr_t)scp, &context, sizeof(*scp)) != 0)
345 		return (EFAULT);
346 
347 	/*
348 	 * Check for security violations.
349 	 */
350 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
351 	eflags = context.sc_eflags;
352 	/*
353 	 * XXX do allow users to change the privileged flag PSL_RF.  The
354 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
355 	 * sometimes set it there too.  tf_eflags is kept in the signal
356 	 * context during signal handling and there is no other place
357 	 * to remember it, so the PSL_RF bit may be corrupted by the
358 	 * signal handler without us knowing.  Corruption of the PSL_RF
359 	 * bit at worst causes one more or one less debugger trap, so
360 	 * allowing it is fairly harmless.
361 	 */
362 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
363     		return(EINVAL);
364 	}
365 
366 	/*
367 	 * Don't allow users to load a valid privileged %cs.  Let the
368 	 * hardware check for invalid selectors, excess privilege in
369 	 * other selectors, invalid %eip's and invalid %esp's.
370 	 */
371 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
372 	if (!CS_SECURE(context.sc_cs)) {
373 		trapsignal(p, SIGBUS, T_PROTFLT);
374 		return(EINVAL);
375 	}
376 
377 	p->p_sigacts->ps_sigstk.ss_flags &= ~SS_ONSTACK;
378 	p->p_sigmask = context.sc_mask &~
379 		(sigmask(SIGKILL)|sigmask(SIGCONT)|sigmask(SIGSTOP));
380 	/*
381 	 * Restore signal context.
382 	 */
383 	/* %fs and %gs were restored by the trampoline. */
384 	regs->tf_es     = context.sc_es;
385 	regs->tf_ds     = context.sc_ds;
386 	regs->tf_edi    = context.sc_edi;
387 	regs->tf_esi    = context.sc_esi;
388 	regs->tf_ebp    = context.sc_ebp;
389 	regs->tf_ebx    = context.sc_ebx;
390 	regs->tf_edx    = context.sc_edx;
391 	regs->tf_ecx    = context.sc_ecx;
392 	regs->tf_eax    = context.sc_eax;
393 	regs->tf_eip    = context.sc_eip;
394 	regs->tf_cs     = context.sc_cs;
395 	regs->tf_eflags = eflags;
396 	regs->tf_esp    = context.sc_esp_at_signal;
397 	regs->tf_ss     = context.sc_ss;
398 
399 	return (EJUSTRETURN);
400 }
401 
402 static void
403 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
404 {
405 	args[0] = tf->tf_ebx;
406 	args[1] = tf->tf_ecx;
407 	args[2] = tf->tf_edx;
408 	args[3] = tf->tf_esi;
409 	args[4] = tf->tf_edi;
410 	*params = NULL;		/* no copyin */
411 }
412 
413 struct sysentvec linux_sysvec = {
414 	LINUX_SYS_MAXSYSCALL,
415 	linux_sysent,
416 	0xff,
417 	NSIG,
418 	bsd_to_linux_signal,
419 	ELAST + 1,
420 	bsd_to_linux_errno,
421 	translate_traps,
422 	linux_fixup,
423 	linux_sendsig,
424 	linux_sigcode,
425 	&linux_szsigcode,
426 	linux_prepsyscall,
427 	"Linux a.out",
428 	aout_coredump
429 };
430 
431 struct sysentvec elf_linux_sysvec = {
432         LINUX_SYS_MAXSYSCALL,
433         linux_sysent,
434         0xff,
435         NSIG,
436         bsd_to_linux_signal,
437         ELAST + 1,
438         bsd_to_linux_errno,
439         translate_traps,
440         elf_linux_fixup,
441         linux_sendsig,
442         linux_sigcode,
443         &linux_szsigcode,
444         linux_prepsyscall,
445 	"Linux ELF",
446 	elf_coredump
447 };
448 
449 /*
450  * Installed either via SYSINIT() or via LKM stubs.
451  */
452 static Elf32_Brandinfo linux_brand = {
453 					"Linux",
454 					"/compat/linux",
455 					"/lib/ld-linux.so.1",
456 					&elf_linux_sysvec
457 				 };
458 
459 static Elf32_Brandinfo linux_glibc2brand = {
460 					"Linux",
461 					"/compat/linux",
462 					"/lib/ld-linux.so.2",
463 					&elf_linux_sysvec
464 				 };
465 
466 Elf32_Brandinfo *linux_brandlist[] = {
467 					&linux_brand,
468 					&linux_glibc2brand,
469 					NULL
470 				};
471 
472 static int
473 linux_elf_modevent(module_t mod, int type, void *data)
474 {
475 	Elf32_Brandinfo **brandinfo;
476 	int error;
477 
478 	error = 0;
479 
480 	switch(type) {
481 	case MOD_LOAD:
482 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
483 		    ++brandinfo)
484 			if (elf_insert_brand_entry(*brandinfo) < 0)
485 				error = EINVAL;
486 		if (error)
487 			printf("cannot insert Linux elf brand handler\n");
488 		else if (bootverbose)
489 			printf("Linux-ELF exec handler installed\n");
490 		break;
491 	case MOD_UNLOAD:
492 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
493 		    ++brandinfo)
494 			if (elf_remove_brand_entry(*brandinfo) < 0)
495 				error = EINVAL;
496 		if (error)
497 			printf("Could not deinstall ELF interpreter entry\n");
498 		else if (bootverbose)
499 			printf("Linux-elf exec handler removed\n");
500 		break;
501 	default:
502 		break;
503 	}
504 	return error;
505 }
506 static moduledata_t linux_elf_mod = {
507 	"linuxelf",
508 	linux_elf_modevent,
509 	0
510 };
511 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
512