xref: /freebsd/sys/i386/linux/linux_sysvec.c (revision b95b56c7a06bd268299c6d3b1104e09831e600dc)
1 /*-
2  * Copyright (c) 1994-1996 S�ren Schmidt
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software withough specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30 
31 /* XXX we use functions that might not exist. */
32 #include "opt_compat.h"
33 
34 #ifndef COMPAT_43
35 #error "Unable to compile Linux-emulator due to missing COMPAT_43 option!"
36 #endif
37 
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/proc.h>
41 #include <sys/sysent.h>
42 #include <sys/imgact.h>
43 #include <sys/imgact_aout.h>
44 #include <sys/imgact_elf.h>
45 #include <sys/signalvar.h>
46 #include <sys/malloc.h>
47 #include <vm/vm.h>
48 #include <vm/vm_param.h>
49 #include <vm/vm_page.h>
50 #include <vm/vm_extern.h>
51 #include <sys/exec.h>
52 #include <sys/kernel.h>
53 #include <sys/module.h>
54 #include <machine/cpu.h>
55 
56 #include <i386/linux/linux.h>
57 #include <i386/linux/linux_proto.h>
58 #include <i386/linux/linux_util.h>
59 
60 MODULE_VERSION(linux, 1);
61 
62 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
63 
64 #if BYTE_ORDER == LITTLE_ENDIAN
65 #define SHELLMAGIC      0x2123 /* #! */
66 #else
67 #define SHELLMAGIC      0x2321
68 #endif
69 
70 extern char linux_sigcode[];
71 extern int linux_szsigcode;
72 
73 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
74 
75 extern struct linker_set linux_ioctl_handler_set;
76 
77 static int	linux_fixup __P((register_t **stack_base,
78 				 struct image_params *iparams));
79 static int	elf_linux_fixup __P((register_t **stack_base,
80 				     struct image_params *iparams));
81 static void	linux_prepsyscall __P((struct trapframe *tf, int *args,
82 				       u_int *code, caddr_t *params));
83 static void     linux_sendsig __P((sig_t catcher, int sig, sigset_t *mask,
84 				   u_long code));
85 
86 /*
87  * Linux syscalls return negative errno's, we do positive and map them
88  */
89 static int bsd_to_linux_errno[ELAST + 1] = {
90   	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
91  	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
92  	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
93  	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
94  	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
95 	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
96 	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
97 	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
98   	-6, -6, -43, -42, -75, -6, -84
99 };
100 
101 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
102 	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
103 	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
104 	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, 0,
105 	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
106 	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
107 	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
108 	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
109 	0, LINUX_SIGUSR1, LINUX_SIGUSR2
110 };
111 
112 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
113 	SIGHUP, SIGINT, SIGQUIT, SIGILL,
114 	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
115 	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
116 	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
117 	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
118 	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
119 	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
120 	SIGIO, SIGURG, 0
121 };
122 
123 /*
124  * If FreeBSD & Linux have a difference of opinion about what a trap
125  * means, deal with it here.
126  */
127 static int
128 translate_traps(int signal, int trap_code)
129 {
130 	if (signal != SIGBUS)
131 		return signal;
132 	switch (trap_code) {
133 	case T_PROTFLT:
134 	case T_TSSFLT:
135 	case T_DOUBLEFLT:
136 	case T_PAGEFLT:
137 		return SIGSEGV;
138 	default:
139 		return signal;
140 	}
141 }
142 
143 static int
144 linux_fixup(register_t **stack_base, struct image_params *imgp)
145 {
146 	register_t *argv, *envp;
147 
148 	argv = *stack_base;
149 	envp = *stack_base + (imgp->argc + 1);
150 	(*stack_base)--;
151 	**stack_base = (intptr_t)(void *)envp;
152 	(*stack_base)--;
153 	**stack_base = (intptr_t)(void *)argv;
154 	(*stack_base)--;
155 	**stack_base = imgp->argc;
156 	return 0;
157 }
158 
159 static int
160 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
161 {
162 	Elf32_Auxargs *args = (Elf32_Auxargs *)imgp->auxargs;
163 	register_t *pos;
164 
165 	pos = *stack_base + (imgp->argc + imgp->envc + 2);
166 
167 	if (args->trace) {
168 		AUXARGS_ENTRY(pos, AT_DEBUG, 1);
169 	}
170 	if (args->execfd != -1) {
171 		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
172 	}
173 	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
174 	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
175 	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
176 	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
177 	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
178 	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
179 	AUXARGS_ENTRY(pos, AT_BASE, args->base);
180 	AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_cred->p_ruid);
181 	AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_cred->p_svuid);
182 	AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_cred->p_rgid);
183 	AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_cred->p_svgid);
184 	AUXARGS_ENTRY(pos, AT_NULL, 0);
185 
186 	free(imgp->auxargs, M_TEMP);
187 	imgp->auxargs = NULL;
188 
189 	(*stack_base)--;
190 	**stack_base = (long)imgp->argc;
191 	return 0;
192 }
193 
194 extern int _ucodesel, _udatasel;
195 
196 /*
197  * Send an interrupt to process.
198  *
199  * Stack is set up to allow sigcode stored
200  * in u. to call routine, followed by kcall
201  * to sigreturn routine below.  After sigreturn
202  * resets the signal mask, the stack, and the
203  * frame pointer, it returns to the user
204  * specified pc, psl.
205  */
206 
207 static void
208 linux_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
209 {
210 	register struct proc *p = curproc;
211 	register struct trapframe *regs;
212 	struct linux_sigframe *fp, frame;
213 	struct sigacts *psp = p->p_sigacts;
214 	int oonstack;
215 
216 	regs = p->p_md.md_regs;
217 	oonstack = p->p_sigstk.ss_flags & SS_ONSTACK;
218 
219 #ifdef DEBUG
220 	printf("Linux-emul(%ld): linux_sendsig(%p, %d, %p, %lu)\n",
221 	    (long)p->p_pid, catcher, sig, (void*)mask, code);
222 #endif
223 	/*
224 	 * Allocate space for the signal handler context.
225 	 */
226 	if ((p->p_flag & P_ALTSTACK) && !oonstack &&
227 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
228 		fp = (struct linux_sigframe *)(p->p_sigstk.ss_sp +
229 		    p->p_sigstk.ss_size - sizeof(struct linux_sigframe));
230 		p->p_sigstk.ss_flags |= SS_ONSTACK;
231 	} else {
232 		fp = (struct linux_sigframe *)regs->tf_esp - 1;
233 	}
234 
235 	/*
236 	 * grow() will return FALSE if the fp will not fit inside the stack
237 	 *	and the stack can not be grown. useracc will return FALSE
238 	 *	if access is denied.
239 	 */
240 	if ((grow_stack (p, (int)fp) == FALSE) ||
241 	    !useracc((caddr_t)fp, sizeof (struct linux_sigframe),
242 	    VM_PROT_WRITE)) {
243 		/*
244 		 * Process has trashed its stack; give it an illegal
245 		 * instruction to halt it in its tracks.
246 		 */
247 		SIGACTION(p, SIGILL) = SIG_DFL;
248 		SIGDELSET(p->p_sigignore, SIGILL);
249 		SIGDELSET(p->p_sigcatch, SIGILL);
250 		SIGDELSET(p->p_sigmask, SIGILL);
251 		psignal(p, SIGILL);
252 		return;
253 	}
254 
255 	/*
256 	 * Build the argument list for the signal handler.
257 	 */
258 	if (p->p_sysent->sv_sigtbl)
259 		if (sig <= p->p_sysent->sv_sigsize)
260 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
261 
262 	frame.sf_handler = catcher;
263 	frame.sf_sig = sig;
264 
265 	/*
266 	 * Build the signal context to be used by sigreturn.
267 	 */
268 	frame.sf_sc.sc_mask   = mask->__bits[0];
269 	frame.sf_sc.sc_gs     = rgs();
270 	frame.sf_sc.sc_fs     = regs->tf_fs;
271 	frame.sf_sc.sc_es     = regs->tf_es;
272 	frame.sf_sc.sc_ds     = regs->tf_ds;
273 	frame.sf_sc.sc_edi    = regs->tf_edi;
274 	frame.sf_sc.sc_esi    = regs->tf_esi;
275 	frame.sf_sc.sc_ebp    = regs->tf_ebp;
276 	frame.sf_sc.sc_ebx    = regs->tf_ebx;
277 	frame.sf_sc.sc_edx    = regs->tf_edx;
278 	frame.sf_sc.sc_ecx    = regs->tf_ecx;
279 	frame.sf_sc.sc_eax    = regs->tf_eax;
280 	frame.sf_sc.sc_eip    = regs->tf_eip;
281 	frame.sf_sc.sc_cs     = regs->tf_cs;
282 	frame.sf_sc.sc_eflags = regs->tf_eflags;
283 	frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
284 	frame.sf_sc.sc_ss     = regs->tf_ss;
285 	frame.sf_sc.sc_err    = regs->tf_err;
286 	frame.sf_sc.sc_trapno = code;	/* XXX ???? */
287 
288 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
289 		/*
290 		 * Process has trashed its stack; give it an illegal
291 		 * instruction to halt it in its tracks.
292 		 */
293 		sigexit(p, SIGILL);
294 		/* NOTREACHED */
295 	}
296 
297 	/*
298 	 * Build context to run handler in.
299 	 */
300 	regs->tf_esp = (int)fp;
301 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
302 	regs->tf_eflags &= ~PSL_VM;
303 	regs->tf_cs = _ucodesel;
304 	regs->tf_ds = _udatasel;
305 	regs->tf_es = _udatasel;
306 	regs->tf_fs = _udatasel;
307 	load_gs(_udatasel);
308 	regs->tf_ss = _udatasel;
309 }
310 
311 /*
312  * System call to cleanup state after a signal
313  * has been taken.  Reset signal mask and
314  * stack state from context left by sendsig (above).
315  * Return to previous pc and psl as specified by
316  * context left by sendsig. Check carefully to
317  * make sure that the user has not modified the
318  * psl to gain improper privileges or to cause
319  * a machine fault.
320  */
321 int
322 linux_sigreturn(p, args)
323 	struct proc *p;
324 	struct linux_sigreturn_args *args;
325 {
326 	struct linux_sigcontext *scp, context;
327 	register struct trapframe *regs;
328 	int eflags;
329 
330 	regs = p->p_md.md_regs;
331 
332 #ifdef DEBUG
333 	printf("Linux-emul(%ld): linux_sigreturn(%p)\n",
334 	    (long)p->p_pid, (void *)args->scp);
335 #endif
336 	/*
337 	 * The trampoline code hands us the context.
338 	 * It is unsafe to keep track of it ourselves, in the event that a
339 	 * program jumps out of a signal handler.
340 	 */
341 	scp = SCARG(args,scp);
342 	if (copyin((caddr_t)scp, &context, sizeof(*scp)) != 0)
343 		return (EFAULT);
344 
345 	/*
346 	 * Check for security violations.
347 	 */
348 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
349 	eflags = context.sc_eflags;
350 	/*
351 	 * XXX do allow users to change the privileged flag PSL_RF.  The
352 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
353 	 * sometimes set it there too.  tf_eflags is kept in the signal
354 	 * context during signal handling and there is no other place
355 	 * to remember it, so the PSL_RF bit may be corrupted by the
356 	 * signal handler without us knowing.  Corruption of the PSL_RF
357 	 * bit at worst causes one more or one less debugger trap, so
358 	 * allowing it is fairly harmless.
359 	 */
360 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
361     		return(EINVAL);
362 	}
363 
364 	/*
365 	 * Don't allow users to load a valid privileged %cs.  Let the
366 	 * hardware check for invalid selectors, excess privilege in
367 	 * other selectors, invalid %eip's and invalid %esp's.
368 	 */
369 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
370 	if (!CS_SECURE(context.sc_cs)) {
371 		trapsignal(p, SIGBUS, T_PROTFLT);
372 		return(EINVAL);
373 	}
374 
375 	p->p_sigstk.ss_flags &= ~SS_ONSTACK;
376 	SIGSETOLD(p->p_sigmask, context.sc_mask);
377 	SIG_CANTMASK(p->p_sigmask);
378 
379 	/*
380 	 * Restore signal context.
381 	 */
382 	/* %gs was restored by the trampoline. */
383 	regs->tf_fs     = context.sc_fs;
384 	regs->tf_es     = context.sc_es;
385 	regs->tf_ds     = context.sc_ds;
386 	regs->tf_edi    = context.sc_edi;
387 	regs->tf_esi    = context.sc_esi;
388 	regs->tf_ebp    = context.sc_ebp;
389 	regs->tf_ebx    = context.sc_ebx;
390 	regs->tf_edx    = context.sc_edx;
391 	regs->tf_ecx    = context.sc_ecx;
392 	regs->tf_eax    = context.sc_eax;
393 	regs->tf_eip    = context.sc_eip;
394 	regs->tf_cs     = context.sc_cs;
395 	regs->tf_eflags = eflags;
396 	regs->tf_esp    = context.sc_esp_at_signal;
397 	regs->tf_ss     = context.sc_ss;
398 
399 	return (EJUSTRETURN);
400 }
401 
402 static void
403 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
404 {
405 	args[0] = tf->tf_ebx;
406 	args[1] = tf->tf_ecx;
407 	args[2] = tf->tf_edx;
408 	args[3] = tf->tf_esi;
409 	args[4] = tf->tf_edi;
410 	*params = NULL;		/* no copyin */
411 }
412 
413 /*
414  * If a linux binary is exec'ing something, try this image activator
415  * first.  We override standard shell script execution in order to
416  * be able to modify the interpreter path.  We only do this if a linux
417  * binary is doing the exec, so we do not create an EXEC module for it.
418  */
419 static int	exec_linux_imgact_try __P((struct image_params *iparams));
420 
421 static int
422 exec_linux_imgact_try(imgp)
423     struct image_params *imgp;
424 {
425     const char *head = (const char *)imgp->image_header;
426     int error = -1;
427 
428     /*
429      * The interpreter for shell scripts run from a linux binary needs
430      * to be located in /compat/linux if possible in order to recursively
431      * maintain linux path emulation.
432      */
433     if (((const short *)head)[0] == SHELLMAGIC) {
434 	    /*
435 	     * Run our normal shell image activator.  If it succeeds attempt
436 	     * to use the alternate path for the interpreter.  If an alternate
437 	     * path is found, use our stringspace to store it.
438 	     */
439 	    if ((error = exec_shell_imgact(imgp)) == 0) {
440 		    char *rpath = NULL;
441 
442 		    linux_emul_find(imgp->proc, NULL, linux_emul_path,
443 			imgp->interpreter_name, &rpath, 0);
444 		    if (rpath != imgp->interpreter_name) {
445 			    int len = strlen(rpath) + 1;
446 
447 			    if (len <= MAXSHELLCMDLEN) {
448 				memcpy(imgp->interpreter_name, rpath, len);
449 			    }
450 			    free(rpath, M_TEMP);
451 		    }
452 	    }
453     }
454     return(error);
455 }
456 
457 struct sysentvec linux_sysvec = {
458 	LINUX_SYS_MAXSYSCALL,
459 	linux_sysent,
460 	0xff,
461 	LINUX_SIGTBLSZ,
462 	bsd_to_linux_signal,
463 	ELAST + 1,
464 	bsd_to_linux_errno,
465 	translate_traps,
466 	linux_fixup,
467 	linux_sendsig,
468 	linux_sigcode,
469 	&linux_szsigcode,
470 	linux_prepsyscall,
471 	"Linux a.out",
472 	aout_coredump,
473 	exec_linux_imgact_try
474 };
475 
476 struct sysentvec elf_linux_sysvec = {
477 	LINUX_SYS_MAXSYSCALL,
478 	linux_sysent,
479 	0xff,
480 	LINUX_SIGTBLSZ,
481 	bsd_to_linux_signal,
482 	ELAST + 1,
483 	bsd_to_linux_errno,
484 	translate_traps,
485 	elf_linux_fixup,
486 	linux_sendsig,
487 	linux_sigcode,
488 	&linux_szsigcode,
489 	linux_prepsyscall,
490 	"Linux ELF",
491 	elf_coredump,
492 	exec_linux_imgact_try
493 };
494 
495 static Elf32_Brandinfo linux_brand = {
496 					ELFOSABI_LINUX,
497 					"/compat/linux",
498 					"/lib/ld-linux.so.1",
499 					&elf_linux_sysvec
500 				 };
501 
502 static Elf32_Brandinfo linux_glibc2brand = {
503 					ELFOSABI_LINUX,
504 					"/compat/linux",
505 					"/lib/ld-linux.so.2",
506 					&elf_linux_sysvec
507 				 };
508 
509 Elf32_Brandinfo *linux_brandlist[] = {
510 					&linux_brand,
511 					&linux_glibc2brand,
512 					NULL
513 				};
514 
515 static int
516 linux_elf_modevent(module_t mod, int type, void *data)
517 {
518 	Elf32_Brandinfo **brandinfo;
519 	int error;
520 
521 	error = 0;
522 
523 	switch(type) {
524 	case MOD_LOAD:
525 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
526 		     ++brandinfo)
527 			if (elf_insert_brand_entry(*brandinfo) < 0)
528 				error = EINVAL;
529 		if (error)
530 			printf("cannot insert Linux elf brand handler\n");
531 		else {
532 			linux_ioctl_register_handlers(&linux_ioctl_handler_set);
533 			if (bootverbose)
534 				printf("Linux-ELF exec handler installed\n");
535 		}
536 		break;
537 	case MOD_UNLOAD:
538 		linux_ioctl_unregister_handlers(&linux_ioctl_handler_set);
539 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
540 		     ++brandinfo)
541 			if (elf_brand_inuse(*brandinfo))
542 				error = EBUSY;
543 
544 		if (error == 0) {
545 			for (brandinfo = &linux_brandlist[0];
546 			     *brandinfo != NULL; ++brandinfo)
547 				if (elf_remove_brand_entry(*brandinfo) < 0)
548 					error = EINVAL;
549 		}
550 		if (error)
551 			printf("Could not deinstall ELF interpreter entry\n");
552 		else if (bootverbose)
553 			printf("Linux-elf exec handler removed\n");
554 		break;
555 	default:
556 		break;
557 	}
558 	return error;
559 }
560 static moduledata_t linux_elf_mod = {
561 	"linuxelf",
562 	linux_elf_modevent,
563 	0
564 };
565 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
566