xref: /freebsd/sys/i386/linux/linux_sysvec.c (revision 23f282aa31e9b6fceacd449020e936e98d6f2298)
1 /*-
2  * Copyright (c) 1994-1996 S�ren Schmidt
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software withough specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30 
31 /* XXX we use functions that might not exist. */
32 #include "opt_compat.h"
33 
34 #ifndef COMPAT_43
35 #error "Unable to compile Linux-emulator due to missing COMPAT_43 option!"
36 #endif
37 
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/proc.h>
41 #include <sys/sysent.h>
42 #include <sys/imgact.h>
43 #include <sys/imgact_aout.h>
44 #include <sys/imgact_elf.h>
45 #include <sys/signalvar.h>
46 #include <sys/malloc.h>
47 #include <vm/vm.h>
48 #include <vm/vm_param.h>
49 #include <vm/vm_page.h>
50 #include <vm/vm_extern.h>
51 #include <sys/exec.h>
52 #include <sys/kernel.h>
53 #include <sys/module.h>
54 #include <machine/cpu.h>
55 
56 #include <i386/linux/linux.h>
57 #include <i386/linux/linux_proto.h>
58 #include <i386/linux/linux_util.h>
59 
60 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
61 
62 #if BYTE_ORDER == LITTLE_ENDIAN
63 #define SHELLMAGIC      0x2123 /* #! */
64 #else
65 #define SHELLMAGIC      0x2321
66 #endif
67 
68 extern char linux_sigcode[];
69 extern int linux_szsigcode;
70 
71 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
72 
73 extern struct linker_set linux_ioctl_handler_set;
74 
75 static int	linux_fixup __P((register_t **stack_base,
76 				 struct image_params *iparams));
77 static int	elf_linux_fixup __P((register_t **stack_base,
78 				     struct image_params *iparams));
79 static void	linux_prepsyscall __P((struct trapframe *tf, int *args,
80 				       u_int *code, caddr_t *params));
81 static void     linux_sendsig __P((sig_t catcher, int sig, sigset_t *mask,
82 				   u_long code));
83 
84 /*
85  * Linux syscalls return negative errno's, we do positive and map them
86  */
87 static int bsd_to_linux_errno[ELAST + 1] = {
88   	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
89  	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
90  	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
91  	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
92  	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
93 	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
94 	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
95 	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
96   	-6, -6, -43, -42, -75, -6, -84
97 };
98 
99 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
100 	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
101 	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
102 	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, 0,
103 	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
104 	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
105 	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
106 	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
107 	0, LINUX_SIGUSR1, LINUX_SIGUSR2
108 };
109 
110 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
111 	SIGHUP, SIGINT, SIGQUIT, SIGILL,
112 	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
113 	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
114 	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
115 	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
116 	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
117 	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
118 	SIGIO, SIGURG, 0
119 };
120 
121 /*
122  * If FreeBSD & Linux have a difference of opinion about what a trap
123  * means, deal with it here.
124  */
125 static int
126 translate_traps(int signal, int trap_code)
127 {
128 	if (signal != SIGBUS)
129 		return signal;
130 	switch (trap_code) {
131 	case T_PROTFLT:
132 	case T_TSSFLT:
133 	case T_DOUBLEFLT:
134 	case T_PAGEFLT:
135 		return SIGSEGV;
136 	default:
137 		return signal;
138 	}
139 }
140 
141 static int
142 linux_fixup(register_t **stack_base, struct image_params *imgp)
143 {
144 	register_t *argv, *envp;
145 
146 	argv = *stack_base;
147 	envp = *stack_base + (imgp->argc + 1);
148 	(*stack_base)--;
149 	**stack_base = (intptr_t)(void *)envp;
150 	(*stack_base)--;
151 	**stack_base = (intptr_t)(void *)argv;
152 	(*stack_base)--;
153 	**stack_base = imgp->argc;
154 	return 0;
155 }
156 
157 static int
158 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
159 {
160 	Elf32_Auxargs *args = (Elf32_Auxargs *)imgp->auxargs;
161 	register_t *pos;
162 
163 	pos = *stack_base + (imgp->argc + imgp->envc + 2);
164 
165 	if (args->trace) {
166 		AUXARGS_ENTRY(pos, AT_DEBUG, 1);
167 	}
168 	if (args->execfd != -1) {
169 		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
170 	}
171 	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
172 	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
173 	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
174 	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
175 	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
176 	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
177 	AUXARGS_ENTRY(pos, AT_BASE, args->base);
178 	AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_cred->p_ruid);
179 	AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_cred->p_svuid);
180 	AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_cred->p_rgid);
181 	AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_cred->p_svgid);
182 	AUXARGS_ENTRY(pos, AT_NULL, 0);
183 
184 	free(imgp->auxargs, M_TEMP);
185 	imgp->auxargs = NULL;
186 
187 	(*stack_base)--;
188 	**stack_base = (long)imgp->argc;
189 	return 0;
190 }
191 
192 extern int _ucodesel, _udatasel;
193 
194 /*
195  * Send an interrupt to process.
196  *
197  * Stack is set up to allow sigcode stored
198  * in u. to call routine, followed by kcall
199  * to sigreturn routine below.  After sigreturn
200  * resets the signal mask, the stack, and the
201  * frame pointer, it returns to the user
202  * specified pc, psl.
203  */
204 
205 static void
206 linux_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
207 {
208 	register struct proc *p = curproc;
209 	register struct trapframe *regs;
210 	struct linux_sigframe *fp, frame;
211 	struct sigacts *psp = p->p_sigacts;
212 	int oonstack;
213 
214 	regs = p->p_md.md_regs;
215 	oonstack = p->p_sigstk.ss_flags & SS_ONSTACK;
216 
217 #ifdef DEBUG
218 	printf("Linux-emul(%ld): linux_sendsig(%p, %d, %p, %lu)\n",
219 	    (long)p->p_pid, catcher, sig, (void*)mask, code);
220 #endif
221 	/*
222 	 * Allocate space for the signal handler context.
223 	 */
224 	if ((p->p_flag & P_ALTSTACK) && !oonstack &&
225 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
226 		fp = (struct linux_sigframe *)(p->p_sigstk.ss_sp +
227 		    p->p_sigstk.ss_size - sizeof(struct linux_sigframe));
228 		p->p_sigstk.ss_flags |= SS_ONSTACK;
229 	} else {
230 		fp = (struct linux_sigframe *)regs->tf_esp - 1;
231 	}
232 
233 	/*
234 	 * grow() will return FALSE if the fp will not fit inside the stack
235 	 *	and the stack can not be grown. useracc will return FALSE
236 	 *	if access is denied.
237 	 */
238 	if ((grow_stack (p, (int)fp) == FALSE) ||
239 	    !useracc((caddr_t)fp, sizeof (struct linux_sigframe),
240 	    VM_PROT_WRITE)) {
241 		/*
242 		 * Process has trashed its stack; give it an illegal
243 		 * instruction to halt it in its tracks.
244 		 */
245 		SIGACTION(p, SIGILL) = SIG_DFL;
246 		SIGDELSET(p->p_sigignore, SIGILL);
247 		SIGDELSET(p->p_sigcatch, SIGILL);
248 		SIGDELSET(p->p_sigmask, SIGILL);
249 		psignal(p, SIGILL);
250 		return;
251 	}
252 
253 	/*
254 	 * Build the argument list for the signal handler.
255 	 */
256 	if (p->p_sysent->sv_sigtbl)
257 		if (sig <= p->p_sysent->sv_sigsize)
258 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
259 
260 	frame.sf_handler = catcher;
261 	frame.sf_sig = sig;
262 
263 	/*
264 	 * Build the signal context to be used by sigreturn.
265 	 */
266 	frame.sf_sc.sc_mask   = mask->__bits[0];
267 	frame.sf_sc.sc_gs     = rgs();
268 	frame.sf_sc.sc_fs     = regs->tf_fs;
269 	frame.sf_sc.sc_es     = regs->tf_es;
270 	frame.sf_sc.sc_ds     = regs->tf_ds;
271 	frame.sf_sc.sc_edi    = regs->tf_edi;
272 	frame.sf_sc.sc_esi    = regs->tf_esi;
273 	frame.sf_sc.sc_ebp    = regs->tf_ebp;
274 	frame.sf_sc.sc_ebx    = regs->tf_ebx;
275 	frame.sf_sc.sc_edx    = regs->tf_edx;
276 	frame.sf_sc.sc_ecx    = regs->tf_ecx;
277 	frame.sf_sc.sc_eax    = regs->tf_eax;
278 	frame.sf_sc.sc_eip    = regs->tf_eip;
279 	frame.sf_sc.sc_cs     = regs->tf_cs;
280 	frame.sf_sc.sc_eflags = regs->tf_eflags;
281 	frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
282 	frame.sf_sc.sc_ss     = regs->tf_ss;
283 	frame.sf_sc.sc_err    = regs->tf_err;
284 	frame.sf_sc.sc_trapno = code;	/* XXX ???? */
285 
286 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
287 		/*
288 		 * Process has trashed its stack; give it an illegal
289 		 * instruction to halt it in its tracks.
290 		 */
291 		sigexit(p, SIGILL);
292 		/* NOTREACHED */
293 	}
294 
295 	/*
296 	 * Build context to run handler in.
297 	 */
298 	regs->tf_esp = (int)fp;
299 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
300 	regs->tf_eflags &= ~PSL_VM;
301 	regs->tf_cs = _ucodesel;
302 	regs->tf_ds = _udatasel;
303 	regs->tf_es = _udatasel;
304 	regs->tf_fs = _udatasel;
305 	load_gs(_udatasel);
306 	regs->tf_ss = _udatasel;
307 }
308 
309 /*
310  * System call to cleanup state after a signal
311  * has been taken.  Reset signal mask and
312  * stack state from context left by sendsig (above).
313  * Return to previous pc and psl as specified by
314  * context left by sendsig. Check carefully to
315  * make sure that the user has not modified the
316  * psl to gain improper privileges or to cause
317  * a machine fault.
318  */
319 int
320 linux_sigreturn(p, args)
321 	struct proc *p;
322 	struct linux_sigreturn_args *args;
323 {
324 	struct linux_sigcontext *scp, context;
325 	register struct trapframe *regs;
326 	int eflags;
327 
328 	regs = p->p_md.md_regs;
329 
330 #ifdef DEBUG
331 	printf("Linux-emul(%ld): linux_sigreturn(%p)\n",
332 	    (long)p->p_pid, (void *)args->scp);
333 #endif
334 	/*
335 	 * The trampoline code hands us the context.
336 	 * It is unsafe to keep track of it ourselves, in the event that a
337 	 * program jumps out of a signal handler.
338 	 */
339 	scp = SCARG(args,scp);
340 	if (copyin((caddr_t)scp, &context, sizeof(*scp)) != 0)
341 		return (EFAULT);
342 
343 	/*
344 	 * Check for security violations.
345 	 */
346 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
347 	eflags = context.sc_eflags;
348 	/*
349 	 * XXX do allow users to change the privileged flag PSL_RF.  The
350 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
351 	 * sometimes set it there too.  tf_eflags is kept in the signal
352 	 * context during signal handling and there is no other place
353 	 * to remember it, so the PSL_RF bit may be corrupted by the
354 	 * signal handler without us knowing.  Corruption of the PSL_RF
355 	 * bit at worst causes one more or one less debugger trap, so
356 	 * allowing it is fairly harmless.
357 	 */
358 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
359     		return(EINVAL);
360 	}
361 
362 	/*
363 	 * Don't allow users to load a valid privileged %cs.  Let the
364 	 * hardware check for invalid selectors, excess privilege in
365 	 * other selectors, invalid %eip's and invalid %esp's.
366 	 */
367 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
368 	if (!CS_SECURE(context.sc_cs)) {
369 		trapsignal(p, SIGBUS, T_PROTFLT);
370 		return(EINVAL);
371 	}
372 
373 	p->p_sigstk.ss_flags &= ~SS_ONSTACK;
374 	SIGSETOLD(p->p_sigmask, context.sc_mask);
375 	SIG_CANTMASK(p->p_sigmask);
376 
377 	/*
378 	 * Restore signal context.
379 	 */
380 	/* %gs was restored by the trampoline. */
381 	regs->tf_fs     = context.sc_fs;
382 	regs->tf_es     = context.sc_es;
383 	regs->tf_ds     = context.sc_ds;
384 	regs->tf_edi    = context.sc_edi;
385 	regs->tf_esi    = context.sc_esi;
386 	regs->tf_ebp    = context.sc_ebp;
387 	regs->tf_ebx    = context.sc_ebx;
388 	regs->tf_edx    = context.sc_edx;
389 	regs->tf_ecx    = context.sc_ecx;
390 	regs->tf_eax    = context.sc_eax;
391 	regs->tf_eip    = context.sc_eip;
392 	regs->tf_cs     = context.sc_cs;
393 	regs->tf_eflags = eflags;
394 	regs->tf_esp    = context.sc_esp_at_signal;
395 	regs->tf_ss     = context.sc_ss;
396 
397 	return (EJUSTRETURN);
398 }
399 
400 static void
401 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
402 {
403 	args[0] = tf->tf_ebx;
404 	args[1] = tf->tf_ecx;
405 	args[2] = tf->tf_edx;
406 	args[3] = tf->tf_esi;
407 	args[4] = tf->tf_edi;
408 	*params = NULL;		/* no copyin */
409 }
410 
411 /*
412  * If a linux binary is exec'ing something, try this image activator
413  * first.  We override standard shell script execution in order to
414  * be able to modify the interpreter path.  We only do this if a linux
415  * binary is doing the exec, so we do not create an EXEC module for it.
416  */
417 static int	exec_linux_imgact_try __P((struct image_params *iparams));
418 
419 static int
420 exec_linux_imgact_try(imgp)
421     struct image_params *imgp;
422 {
423     const char *head = (const char *)imgp->image_header;
424     int error = -1;
425 
426     /*
427      * The interpreter for shell scripts run from a linux binary needs
428      * to be located in /compat/linux if possible in order to recursively
429      * maintain linux path emulation.
430      */
431     if (((const short *)head)[0] == SHELLMAGIC) {
432 	    /*
433 	     * Run our normal shell image activator.  If it succeeds attempt
434 	     * to use the alternate path for the interpreter.  If an alternate
435 	     * path is found, use our stringspace to store it.
436 	     */
437 	    if ((error = exec_shell_imgact(imgp)) == 0) {
438 		    char *rpath = NULL;
439 
440 		    linux_emul_find(imgp->proc, NULL, linux_emul_path,
441 			imgp->interpreter_name, &rpath, 0);
442 		    if (rpath != imgp->interpreter_name) {
443 			    int len = strlen(rpath) + 1;
444 
445 			    if (len <= MAXSHELLCMDLEN) {
446 				memcpy(imgp->interpreter_name, rpath, len);
447 			    }
448 			    free(rpath, M_TEMP);
449 		    }
450 	    }
451     }
452     return(error);
453 }
454 
455 struct sysentvec linux_sysvec = {
456 	LINUX_SYS_MAXSYSCALL,
457 	linux_sysent,
458 	0xff,
459 	LINUX_SIGTBLSZ,
460 	bsd_to_linux_signal,
461 	ELAST + 1,
462 	bsd_to_linux_errno,
463 	translate_traps,
464 	linux_fixup,
465 	linux_sendsig,
466 	linux_sigcode,
467 	&linux_szsigcode,
468 	linux_prepsyscall,
469 	"Linux a.out",
470 	aout_coredump,
471 	exec_linux_imgact_try
472 };
473 
474 struct sysentvec elf_linux_sysvec = {
475 	LINUX_SYS_MAXSYSCALL,
476 	linux_sysent,
477 	0xff,
478 	LINUX_SIGTBLSZ,
479 	bsd_to_linux_signal,
480 	ELAST + 1,
481 	bsd_to_linux_errno,
482 	translate_traps,
483 	elf_linux_fixup,
484 	linux_sendsig,
485 	linux_sigcode,
486 	&linux_szsigcode,
487 	linux_prepsyscall,
488 	"Linux ELF",
489 	elf_coredump,
490 	exec_linux_imgact_try
491 };
492 
493 static Elf32_Brandinfo linux_brand = {
494 					ELFOSABI_LINUX,
495 					"/compat/linux",
496 					"/lib/ld-linux.so.1",
497 					&elf_linux_sysvec
498 				 };
499 
500 static Elf32_Brandinfo linux_glibc2brand = {
501 					ELFOSABI_LINUX,
502 					"/compat/linux",
503 					"/lib/ld-linux.so.2",
504 					&elf_linux_sysvec
505 				 };
506 
507 Elf32_Brandinfo *linux_brandlist[] = {
508 					&linux_brand,
509 					&linux_glibc2brand,
510 					NULL
511 				};
512 
513 static int
514 linux_elf_modevent(module_t mod, int type, void *data)
515 {
516 	Elf32_Brandinfo **brandinfo;
517 	int error;
518 
519 	error = 0;
520 
521 	switch(type) {
522 	case MOD_LOAD:
523 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
524 		     ++brandinfo)
525 			if (elf_insert_brand_entry(*brandinfo) < 0)
526 				error = EINVAL;
527 		if (error)
528 			printf("cannot insert Linux elf brand handler\n");
529 		else {
530 			linux_ioctl_register_handlers(&linux_ioctl_handler_set);
531 			if (bootverbose)
532 				printf("Linux-ELF exec handler installed\n");
533 		}
534 		break;
535 	case MOD_UNLOAD:
536 		linux_ioctl_unregister_handlers(&linux_ioctl_handler_set);
537 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
538 		     ++brandinfo)
539 			if (elf_brand_inuse(*brandinfo))
540 				error = EBUSY;
541 
542 		if (error == 0) {
543 			for (brandinfo = &linux_brandlist[0];
544 			     *brandinfo != NULL; ++brandinfo)
545 				if (elf_remove_brand_entry(*brandinfo) < 0)
546 					error = EINVAL;
547 		}
548 		if (error)
549 			printf("Could not deinstall ELF interpreter entry\n");
550 		else if (bootverbose)
551 			printf("Linux-elf exec handler removed\n");
552 		break;
553 	default:
554 		break;
555 	}
556 	return error;
557 }
558 static moduledata_t linux_elf_mod = {
559 	"linuxelf",
560 	linux_elf_modevent,
561 	0
562 };
563 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
564