xref: /freebsd/sys/i386/linux/linux_sysvec.c (revision 2cdbd5eec4e32beddb3adcca014dda56debc6f5b)
1 /*-
2  * Copyright (c) 1994-1996 S�ren Schmidt
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software withough specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30 
31 /* XXX we use functions that might not exist. */
32 #include "opt_compat.h"
33 
34 #ifndef COMPAT_43
35 #error "Unable to compile Linux-emulator due to missing COMPAT_43 option!"
36 #endif
37 
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/proc.h>
41 #include <sys/sysent.h>
42 #include <sys/imgact.h>
43 #include <sys/imgact_aout.h>
44 #include <sys/imgact_elf.h>
45 #include <sys/signalvar.h>
46 #include <sys/malloc.h>
47 #include <vm/vm.h>
48 #include <vm/vm_param.h>
49 #include <vm/vm_page.h>
50 #include <vm/vm_extern.h>
51 #include <sys/exec.h>
52 #include <sys/kernel.h>
53 #include <sys/module.h>
54 #include <machine/cpu.h>
55 
56 #include <i386/linux/linux.h>
57 #include <i386/linux/linux_proto.h>
58 #include <compat/linux/linux_util.h>
59 
60 MODULE_VERSION(linux, 1);
61 
62 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
63 
64 #if BYTE_ORDER == LITTLE_ENDIAN
65 #define SHELLMAGIC      0x2123 /* #! */
66 #else
67 #define SHELLMAGIC      0x2321
68 #endif
69 
70 extern char linux_sigcode[];
71 extern int linux_szsigcode;
72 
73 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
74 
75 extern struct linker_set linux_ioctl_handler_set;
76 
77 static int	linux_fixup __P((register_t **stack_base,
78 				 struct image_params *iparams));
79 static int	elf_linux_fixup __P((register_t **stack_base,
80 				     struct image_params *iparams));
81 static void	linux_prepsyscall __P((struct trapframe *tf, int *args,
82 				       u_int *code, caddr_t *params));
83 static void     linux_sendsig __P((sig_t catcher, int sig, sigset_t *mask,
84 				   u_long code));
85 
86 /*
87  * Linux syscalls return negative errno's, we do positive and map them
88  */
89 static int bsd_to_linux_errno[ELAST + 1] = {
90   	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
91  	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
92  	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
93  	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
94  	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
95 	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
96 	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
97 	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
98   	-6, -6, -43, -42, -75, -6, -84
99 };
100 
101 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
102 	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
103 	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
104 	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, 0,
105 	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
106 	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
107 	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
108 	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
109 	0, LINUX_SIGUSR1, LINUX_SIGUSR2
110 };
111 
112 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
113 	SIGHUP, SIGINT, SIGQUIT, SIGILL,
114 	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
115 	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
116 	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
117 	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
118 	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
119 	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
120 	SIGIO, SIGURG, 0
121 };
122 
123 /*
124  * If FreeBSD & Linux have a difference of opinion about what a trap
125  * means, deal with it here.
126  */
127 static int
128 translate_traps(int signal, int trap_code)
129 {
130 	if (signal != SIGBUS)
131 		return signal;
132 	switch (trap_code) {
133 	case T_PROTFLT:
134 	case T_TSSFLT:
135 	case T_DOUBLEFLT:
136 	case T_PAGEFLT:
137 		return SIGSEGV;
138 	default:
139 		return signal;
140 	}
141 }
142 
143 static int
144 linux_fixup(register_t **stack_base, struct image_params *imgp)
145 {
146 	register_t *argv, *envp;
147 
148 	argv = *stack_base;
149 	envp = *stack_base + (imgp->argc + 1);
150 	(*stack_base)--;
151 	**stack_base = (intptr_t)(void *)envp;
152 	(*stack_base)--;
153 	**stack_base = (intptr_t)(void *)argv;
154 	(*stack_base)--;
155 	**stack_base = imgp->argc;
156 	return 0;
157 }
158 
159 static int
160 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
161 {
162 	Elf32_Auxargs *args = (Elf32_Auxargs *)imgp->auxargs;
163 	register_t *pos;
164 
165 	pos = *stack_base + (imgp->argc + imgp->envc + 2);
166 
167 	if (args->trace) {
168 		AUXARGS_ENTRY(pos, AT_DEBUG, 1);
169 	}
170 	if (args->execfd != -1) {
171 		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
172 	}
173 	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
174 	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
175 	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
176 	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
177 	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
178 	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
179 	AUXARGS_ENTRY(pos, AT_BASE, args->base);
180 	AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_cred->p_ruid);
181 	AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_cred->p_svuid);
182 	AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_cred->p_rgid);
183 	AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_cred->p_svgid);
184 	AUXARGS_ENTRY(pos, AT_NULL, 0);
185 
186 	free(imgp->auxargs, M_TEMP);
187 	imgp->auxargs = NULL;
188 
189 	(*stack_base)--;
190 	**stack_base = (long)imgp->argc;
191 	return 0;
192 }
193 
194 extern int _ucodesel, _udatasel;
195 
196 /*
197  * Send an interrupt to process.
198  *
199  * Stack is set up to allow sigcode stored
200  * in u. to call routine, followed by kcall
201  * to sigreturn routine below.  After sigreturn
202  * resets the signal mask, the stack, and the
203  * frame pointer, it returns to the user
204  * specified pc, psl.
205  */
206 
207 static void
208 linux_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
209 {
210 	register struct proc *p = curproc;
211 	register struct trapframe *regs;
212 	struct linux_sigframe *fp, frame;
213 	struct sigacts *psp = p->p_sigacts;
214 	int oonstack;
215 
216 	regs = p->p_md.md_regs;
217 	oonstack = p->p_sigstk.ss_flags & SS_ONSTACK;
218 
219 #ifdef DEBUG
220 	printf("Linux-emul(%ld): linux_sendsig(%p, %d, %p, %lu)\n",
221 	    (long)p->p_pid, catcher, sig, (void*)mask, code);
222 #endif
223 	/*
224 	 * Allocate space for the signal handler context.
225 	 */
226 	if ((p->p_flag & P_ALTSTACK) && !oonstack &&
227 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
228 		fp = (struct linux_sigframe *)(p->p_sigstk.ss_sp +
229 		    p->p_sigstk.ss_size - sizeof(struct linux_sigframe));
230 		p->p_sigstk.ss_flags |= SS_ONSTACK;
231 	} else {
232 		fp = (struct linux_sigframe *)regs->tf_esp - 1;
233 	}
234 
235 	/*
236 	 * grow() will return FALSE if the fp will not fit inside the stack
237 	 *	and the stack can not be grown. useracc will return FALSE
238 	 *	if access is denied.
239 	 */
240 	if ((grow_stack (p, (int)fp) == FALSE) ||
241 	    !useracc((caddr_t)fp, sizeof (struct linux_sigframe),
242 	    VM_PROT_WRITE)) {
243 		/*
244 		 * Process has trashed its stack; give it an illegal
245 		 * instruction to halt it in its tracks.
246 		 */
247 		SIGACTION(p, SIGILL) = SIG_DFL;
248 		SIGDELSET(p->p_sigignore, SIGILL);
249 		SIGDELSET(p->p_sigcatch, SIGILL);
250 		SIGDELSET(p->p_sigmask, SIGILL);
251 		psignal(p, SIGILL);
252 		return;
253 	}
254 
255 	/*
256 	 * Build the argument list for the signal handler.
257 	 */
258 	if (p->p_sysent->sv_sigtbl)
259 		if (sig <= p->p_sysent->sv_sigsize)
260 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
261 
262 	frame.sf_handler = catcher;
263 	frame.sf_sig = sig;
264 
265 	/*
266 	 * Build the signal context to be used by sigreturn.
267 	 */
268 	frame.sf_sc.sc_mask   = mask->__bits[0];
269 	frame.sf_sc.sc_gs     = rgs();
270 	frame.sf_sc.sc_fs     = regs->tf_fs;
271 	frame.sf_sc.sc_es     = regs->tf_es;
272 	frame.sf_sc.sc_ds     = regs->tf_ds;
273 	frame.sf_sc.sc_edi    = regs->tf_edi;
274 	frame.sf_sc.sc_esi    = regs->tf_esi;
275 	frame.sf_sc.sc_ebp    = regs->tf_ebp;
276 	frame.sf_sc.sc_ebx    = regs->tf_ebx;
277 	frame.sf_sc.sc_edx    = regs->tf_edx;
278 	frame.sf_sc.sc_ecx    = regs->tf_ecx;
279 	frame.sf_sc.sc_eax    = regs->tf_eax;
280 	frame.sf_sc.sc_eip    = regs->tf_eip;
281 	frame.sf_sc.sc_cs     = regs->tf_cs;
282 	frame.sf_sc.sc_eflags = regs->tf_eflags;
283 	frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
284 	frame.sf_sc.sc_ss     = regs->tf_ss;
285 	frame.sf_sc.sc_err    = regs->tf_err;
286 	frame.sf_sc.sc_trapno = code;	/* XXX ???? */
287 
288 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
289 		/*
290 		 * Process has trashed its stack; give it an illegal
291 		 * instruction to halt it in its tracks.
292 		 */
293 		sigexit(p, SIGILL);
294 		/* NOTREACHED */
295 	}
296 
297 	/*
298 	 * Build context to run handler in.
299 	 */
300 	regs->tf_esp = (int)fp;
301 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
302 	regs->tf_eflags &= ~PSL_VM;
303 	regs->tf_cs = _ucodesel;
304 	regs->tf_ds = _udatasel;
305 	regs->tf_es = _udatasel;
306 	regs->tf_fs = _udatasel;
307 	load_gs(_udatasel);
308 	regs->tf_ss = _udatasel;
309 }
310 
311 /*
312  * System call to cleanup state after a signal
313  * has been taken.  Reset signal mask and
314  * stack state from context left by sendsig (above).
315  * Return to previous pc and psl as specified by
316  * context left by sendsig. Check carefully to
317  * make sure that the user has not modified the
318  * psl to gain improper privileges or to cause
319  * a machine fault.
320  */
321 int
322 linux_sigreturn(p, args)
323 	struct proc *p;
324 	struct linux_sigreturn_args *args;
325 {
326 	struct linux_sigcontext context;
327 	register struct trapframe *regs;
328 	int eflags;
329 
330 	regs = p->p_md.md_regs;
331 
332 #ifdef DEBUG
333 	printf("Linux-emul(%ld): linux_sigreturn(%p)\n",
334 	    (long)p->p_pid, (void *)args->scp);
335 #endif
336 	/*
337 	 * The trampoline code hands us the context.
338 	 * It is unsafe to keep track of it ourselves, in the event that a
339 	 * program jumps out of a signal handler.
340 	 */
341 	if (copyin((caddr_t)args->scp, &context, sizeof(context)) != 0)
342 		return (EFAULT);
343 
344 	/*
345 	 * Check for security violations.
346 	 */
347 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
348 	eflags = context.sc_eflags;
349 	/*
350 	 * XXX do allow users to change the privileged flag PSL_RF.  The
351 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
352 	 * sometimes set it there too.  tf_eflags is kept in the signal
353 	 * context during signal handling and there is no other place
354 	 * to remember it, so the PSL_RF bit may be corrupted by the
355 	 * signal handler without us knowing.  Corruption of the PSL_RF
356 	 * bit at worst causes one more or one less debugger trap, so
357 	 * allowing it is fairly harmless.
358 	 */
359 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
360     		return(EINVAL);
361 	}
362 
363 	/*
364 	 * Don't allow users to load a valid privileged %cs.  Let the
365 	 * hardware check for invalid selectors, excess privilege in
366 	 * other selectors, invalid %eip's and invalid %esp's.
367 	 */
368 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
369 	if (!CS_SECURE(context.sc_cs)) {
370 		trapsignal(p, SIGBUS, T_PROTFLT);
371 		return(EINVAL);
372 	}
373 
374 	p->p_sigstk.ss_flags &= ~SS_ONSTACK;
375 	SIGSETOLD(p->p_sigmask, context.sc_mask);
376 	SIG_CANTMASK(p->p_sigmask);
377 
378 	/*
379 	 * Restore signal context.
380 	 */
381 	/* %gs was restored by the trampoline. */
382 	regs->tf_fs     = context.sc_fs;
383 	regs->tf_es     = context.sc_es;
384 	regs->tf_ds     = context.sc_ds;
385 	regs->tf_edi    = context.sc_edi;
386 	regs->tf_esi    = context.sc_esi;
387 	regs->tf_ebp    = context.sc_ebp;
388 	regs->tf_ebx    = context.sc_ebx;
389 	regs->tf_edx    = context.sc_edx;
390 	regs->tf_ecx    = context.sc_ecx;
391 	regs->tf_eax    = context.sc_eax;
392 	regs->tf_eip    = context.sc_eip;
393 	regs->tf_cs     = context.sc_cs;
394 	regs->tf_eflags = eflags;
395 	regs->tf_esp    = context.sc_esp_at_signal;
396 	regs->tf_ss     = context.sc_ss;
397 
398 	return (EJUSTRETURN);
399 }
400 
401 static void
402 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
403 {
404 	args[0] = tf->tf_ebx;
405 	args[1] = tf->tf_ecx;
406 	args[2] = tf->tf_edx;
407 	args[3] = tf->tf_esi;
408 	args[4] = tf->tf_edi;
409 	*params = NULL;		/* no copyin */
410 }
411 
412 /*
413  * If a linux binary is exec'ing something, try this image activator
414  * first.  We override standard shell script execution in order to
415  * be able to modify the interpreter path.  We only do this if a linux
416  * binary is doing the exec, so we do not create an EXEC module for it.
417  */
418 static int	exec_linux_imgact_try __P((struct image_params *iparams));
419 
420 static int
421 exec_linux_imgact_try(imgp)
422     struct image_params *imgp;
423 {
424     const char *head = (const char *)imgp->image_header;
425     int error = -1;
426 
427     /*
428      * The interpreter for shell scripts run from a linux binary needs
429      * to be located in /compat/linux if possible in order to recursively
430      * maintain linux path emulation.
431      */
432     if (((const short *)head)[0] == SHELLMAGIC) {
433 	    /*
434 	     * Run our normal shell image activator.  If it succeeds attempt
435 	     * to use the alternate path for the interpreter.  If an alternate
436 	     * path is found, use our stringspace to store it.
437 	     */
438 	    if ((error = exec_shell_imgact(imgp)) == 0) {
439 		    char *rpath = NULL;
440 
441 		    linux_emul_find(imgp->proc, NULL, linux_emul_path,
442 			imgp->interpreter_name, &rpath, 0);
443 		    if (rpath != imgp->interpreter_name) {
444 			    int len = strlen(rpath) + 1;
445 
446 			    if (len <= MAXSHELLCMDLEN) {
447 				memcpy(imgp->interpreter_name, rpath, len);
448 			    }
449 			    free(rpath, M_TEMP);
450 		    }
451 	    }
452     }
453     return(error);
454 }
455 
456 struct sysentvec linux_sysvec = {
457 	LINUX_SYS_MAXSYSCALL,
458 	linux_sysent,
459 	0xff,
460 	LINUX_SIGTBLSZ,
461 	bsd_to_linux_signal,
462 	ELAST + 1,
463 	bsd_to_linux_errno,
464 	translate_traps,
465 	linux_fixup,
466 	linux_sendsig,
467 	linux_sigcode,
468 	&linux_szsigcode,
469 	linux_prepsyscall,
470 	"Linux a.out",
471 	aout_coredump,
472 	exec_linux_imgact_try
473 };
474 
475 struct sysentvec elf_linux_sysvec = {
476 	LINUX_SYS_MAXSYSCALL,
477 	linux_sysent,
478 	0xff,
479 	LINUX_SIGTBLSZ,
480 	bsd_to_linux_signal,
481 	ELAST + 1,
482 	bsd_to_linux_errno,
483 	translate_traps,
484 	elf_linux_fixup,
485 	linux_sendsig,
486 	linux_sigcode,
487 	&linux_szsigcode,
488 	linux_prepsyscall,
489 	"Linux ELF",
490 	elf_coredump,
491 	exec_linux_imgact_try
492 };
493 
494 static Elf32_Brandinfo linux_brand = {
495 					ELFOSABI_LINUX,
496 					"/compat/linux",
497 					"/lib/ld-linux.so.1",
498 					&elf_linux_sysvec
499 				 };
500 
501 static Elf32_Brandinfo linux_glibc2brand = {
502 					ELFOSABI_LINUX,
503 					"/compat/linux",
504 					"/lib/ld-linux.so.2",
505 					&elf_linux_sysvec
506 				 };
507 
508 Elf32_Brandinfo *linux_brandlist[] = {
509 					&linux_brand,
510 					&linux_glibc2brand,
511 					NULL
512 				};
513 
514 static int
515 linux_elf_modevent(module_t mod, int type, void *data)
516 {
517 	Elf32_Brandinfo **brandinfo;
518 	int error;
519 
520 	error = 0;
521 
522 	switch(type) {
523 	case MOD_LOAD:
524 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
525 		     ++brandinfo)
526 			if (elf_insert_brand_entry(*brandinfo) < 0)
527 				error = EINVAL;
528 		if (error)
529 			printf("cannot insert Linux elf brand handler\n");
530 		else {
531 			linux_ioctl_register_handlers(&linux_ioctl_handler_set);
532 			if (bootverbose)
533 				printf("Linux-ELF exec handler installed\n");
534 		}
535 		break;
536 	case MOD_UNLOAD:
537 		linux_ioctl_unregister_handlers(&linux_ioctl_handler_set);
538 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
539 		     ++brandinfo)
540 			if (elf_brand_inuse(*brandinfo))
541 				error = EBUSY;
542 
543 		if (error == 0) {
544 			for (brandinfo = &linux_brandlist[0];
545 			     *brandinfo != NULL; ++brandinfo)
546 				if (elf_remove_brand_entry(*brandinfo) < 0)
547 					error = EINVAL;
548 		}
549 		if (error)
550 			printf("Could not deinstall ELF interpreter entry\n");
551 		else if (bootverbose)
552 			printf("Linux-elf exec handler removed\n");
553 		break;
554 	default:
555 		break;
556 	}
557 	return error;
558 }
559 static moduledata_t linux_elf_mod = {
560 	"linuxelf",
561 	linux_elf_modevent,
562 	0
563 };
564 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
565