1 /*- 2 * Copyright (c) 1994-1996 S�ren Schmidt 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer 10 * in this position and unchanged. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software withough specific prior written permission 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 * 28 * $Id: linux_sysvec.c,v 1.41 1998/12/19 02:55:33 julian Exp $ 29 */ 30 31 /* XXX we use functions that might not exist. */ 32 #include "opt_compat.h" 33 34 #ifndef COMPAT_43 35 #error "Unable to compile Linux-emulator due to missing COMPAT_43 option!" 36 #endif 37 38 #include <sys/param.h> 39 #include <sys/buf.h> 40 #include <sys/proc.h> 41 #include <sys/systm.h> 42 #include <sys/sysent.h> 43 #include <sys/imgact.h> 44 #include <sys/imgact_aout.h> 45 #include <sys/imgact_elf.h> 46 #include <sys/signalvar.h> 47 #include <sys/malloc.h> 48 #include <vm/vm.h> 49 #include <vm/vm_param.h> 50 #include <vm/vm_prot.h> 51 #include <vm/vm_page.h> 52 #include <vm/vm_extern.h> 53 #ifdef COMPAT_LINUX_THREADS 54 #include <sys/lock.h> /* needed, for now, by vm_map.h */ 55 #include <vm/vm_map.h> /* needed, for now, for VM_STACK defines */ 56 #endif /* COMPAT_LINUX_THREADS */ 57 #include <sys/exec.h> 58 #include <sys/kernel.h> 59 #include <sys/module.h> 60 #include <machine/cpu.h> 61 62 #include <i386/linux/linux.h> 63 #include <i386/linux/linux_proto.h> 64 65 static int linux_fixup __P((long **stack_base, 66 struct image_params *iparams)); 67 static int elf_linux_fixup __P((long **stack_base, 68 struct image_params *iparams)); 69 static void linux_prepsyscall __P((struct trapframe *tf, int *args, 70 u_int *code, caddr_t *params)); 71 static void linux_sendsig __P((sig_t catcher, int sig, int mask, 72 u_long code)); 73 74 /* 75 * Linux syscalls return negative errno's, we do positive and map them 76 */ 77 static int bsd_to_linux_errno[ELAST + 1] = { 78 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9, 79 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19, 80 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, 81 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89, 82 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99, 83 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109, 84 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122, 85 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9, 86 -6, -6, -43, -42, -75, -6, -84 87 }; 88 89 int bsd_to_linux_signal[NSIG] = { 90 0, LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, 91 LINUX_SIGILL, LINUX_SIGTRAP, LINUX_SIGABRT, 0, 92 LINUX_SIGFPE, LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, 93 0, LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, 94 LINUX_SIGURG, LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, 95 LINUX_SIGCHLD, LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, 96 LINUX_SIGXCPU, LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, 97 LINUX_SIGWINCH, 0, LINUX_SIGUSR1, LINUX_SIGUSR2 98 }; 99 100 int linux_to_bsd_signal[LINUX_NSIG] = { 101 0, SIGHUP, SIGINT, SIGQUIT, SIGILL, SIGTRAP, SIGABRT, SIGBUS, 102 SIGFPE, SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2, SIGPIPE, SIGALRM, SIGTERM, 103 SIGBUS, SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP, SIGTTIN, SIGTTOU, SIGURG, 104 SIGXCPU, SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH, SIGIO, SIGURG, 0 105 }; 106 107 /* 108 * If FreeBSD & Linux have a difference of opinion about what a trap 109 * means, deal with it here. 110 */ 111 static int 112 translate_traps(int signal, int trap_code) 113 { 114 if (signal != SIGBUS) 115 return signal; 116 switch (trap_code) { 117 case T_PROTFLT: 118 case T_TSSFLT: 119 case T_DOUBLEFLT: 120 case T_PAGEFLT: 121 return SIGSEGV; 122 default: 123 return signal; 124 } 125 } 126 127 static int 128 linux_fixup(long **stack_base, struct image_params *imgp) 129 { 130 long *argv, *envp; 131 132 argv = *stack_base; 133 envp = *stack_base + (imgp->argc + 1); 134 (*stack_base)--; 135 **stack_base = (intptr_t)(void *)envp; 136 (*stack_base)--; 137 **stack_base = (intptr_t)(void *)argv; 138 (*stack_base)--; 139 **stack_base = imgp->argc; 140 return 0; 141 } 142 143 static int 144 elf_linux_fixup(long **stack_base, struct image_params *imgp) 145 { 146 Elf32_Auxargs *args = (Elf32_Auxargs *)imgp->auxargs; 147 long *pos; 148 149 pos = *stack_base + (imgp->argc + imgp->envc + 2); 150 151 if (args->trace) { 152 AUXARGS_ENTRY(pos, AT_DEBUG, 1); 153 } 154 if (args->execfd != -1) { 155 AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd); 156 } 157 AUXARGS_ENTRY(pos, AT_PHDR, args->phdr); 158 AUXARGS_ENTRY(pos, AT_PHENT, args->phent); 159 AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum); 160 AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz); 161 AUXARGS_ENTRY(pos, AT_FLAGS, args->flags); 162 AUXARGS_ENTRY(pos, AT_ENTRY, args->entry); 163 AUXARGS_ENTRY(pos, AT_BASE, args->base); 164 AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_cred->p_ruid); 165 AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_cred->p_svuid); 166 AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_cred->p_rgid); 167 AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_cred->p_svgid); 168 AUXARGS_ENTRY(pos, AT_NULL, 0); 169 170 free(imgp->auxargs, M_TEMP); 171 imgp->auxargs = NULL; 172 173 (*stack_base)--; 174 **stack_base = (long)imgp->argc; 175 return 0; 176 } 177 178 extern int _ucodesel, _udatasel; 179 180 /* 181 * Send an interrupt to process. 182 * 183 * Stack is set up to allow sigcode stored 184 * in u. to call routine, followed by kcall 185 * to sigreturn routine below. After sigreturn 186 * resets the signal mask, the stack, and the 187 * frame pointer, it returns to the user 188 * specified pc, psl. 189 */ 190 191 static void 192 linux_sendsig(sig_t catcher, int sig, int mask, u_long code) 193 { 194 register struct proc *p = curproc; 195 register struct trapframe *regs; 196 struct linux_sigframe *fp, frame; 197 struct sigacts *psp = p->p_sigacts; 198 int oonstack; 199 200 regs = p->p_md.md_regs; 201 oonstack = psp->ps_sigstk.ss_flags & SS_ONSTACK; 202 203 #ifdef DEBUG 204 printf("Linux-emul(%ld): linux_sendsig(%p, %d, %d, %lu)\n", 205 (long)p->p_pid, catcher, sig, mask, code); 206 #endif 207 /* 208 * Allocate space for the signal handler context. 209 */ 210 if ((psp->ps_flags & SAS_ALTSTACK) && !oonstack && 211 (psp->ps_sigonstack & sigmask(sig))) { 212 fp = (struct linux_sigframe *)(psp->ps_sigstk.ss_sp + 213 psp->ps_sigstk.ss_size - sizeof(struct linux_sigframe)); 214 psp->ps_sigstk.ss_flags |= SS_ONSTACK; 215 } else { 216 fp = (struct linux_sigframe *)regs->tf_esp - 1; 217 } 218 219 /* 220 * grow() will return FALSE if the fp will not fit inside the stack 221 * and the stack can not be grown. useracc will return FALSE 222 * if access is denied. 223 */ 224 #ifdef COMPAT_LINUX_THREADS 225 #ifdef USE_VM_STACK 226 #ifndef USE_VM_STACK_FOR_EXEC 227 if ((((caddr_t)fp > p->p_vmspace->vm_maxsaddr && 228 (caddr_t)fp < (caddr_t)USRSTACK && 229 grow(p, (int)fp) == FALSE) || 230 (((caddr_t)fp <= p->p_vmspace->vm_maxsaddr || 231 (caddr_t)fp >= (caddr_t)USRSTACK) && 232 grow_stack (p, (int)fp) == FALSE)) || 233 #else 234 if ((grow_stack (p, (int)fp) == FALSE) || 235 #endif /* USE_VM_STACK_FOR_EXEC */ 236 #else 237 if ((grow(p, (int)fp) == FALSE) || 238 #endif /* USE_VM_STACK */ 239 #else 240 if ((grow(p, (int)fp) == FALSE) || 241 #endif /* COMPAT_LINUX_THREADS */ 242 (useracc((caddr_t)fp, sizeof (struct linux_sigframe), B_WRITE) == FALSE)) { 243 /* 244 * Process has trashed its stack; give it an illegal 245 * instruction to halt it in its tracks. 246 */ 247 SIGACTION(p, SIGILL) = SIG_DFL; 248 sig = sigmask(SIGILL); 249 p->p_sigignore &= ~sig; 250 p->p_sigcatch &= ~sig; 251 p->p_sigmask &= ~sig; 252 psignal(p, SIGILL); 253 return; 254 } 255 256 /* 257 * Build the argument list for the signal handler. 258 */ 259 if (p->p_sysent->sv_sigtbl) { 260 if (sig < p->p_sysent->sv_sigsize) 261 sig = p->p_sysent->sv_sigtbl[sig]; 262 else 263 sig = p->p_sysent->sv_sigsize + 1; 264 } 265 266 frame.sf_handler = catcher; 267 frame.sf_sig = sig; 268 269 /* 270 * Build the signal context to be used by sigreturn. 271 */ 272 frame.sf_sc.sc_mask = mask; 273 __asm("movl %%gs,%w0" : "=r" (frame.sf_sc.sc_gs)); 274 __asm("movl %%fs,%w0" : "=r" (frame.sf_sc.sc_fs)); 275 frame.sf_sc.sc_es = regs->tf_es; 276 frame.sf_sc.sc_ds = regs->tf_ds; 277 frame.sf_sc.sc_edi = regs->tf_edi; 278 frame.sf_sc.sc_esi = regs->tf_esi; 279 frame.sf_sc.sc_ebp = regs->tf_ebp; 280 frame.sf_sc.sc_ebx = regs->tf_ebx; 281 frame.sf_sc.sc_edx = regs->tf_edx; 282 frame.sf_sc.sc_ecx = regs->tf_ecx; 283 frame.sf_sc.sc_eax = regs->tf_eax; 284 frame.sf_sc.sc_eip = regs->tf_eip; 285 frame.sf_sc.sc_cs = regs->tf_cs; 286 frame.sf_sc.sc_eflags = regs->tf_eflags; 287 frame.sf_sc.sc_esp_at_signal = regs->tf_esp; 288 frame.sf_sc.sc_ss = regs->tf_ss; 289 frame.sf_sc.sc_err = regs->tf_err; 290 frame.sf_sc.sc_trapno = code; /* XXX ???? */ 291 292 if (copyout(&frame, fp, sizeof(frame)) != 0) { 293 /* 294 * Process has trashed its stack; give it an illegal 295 * instruction to halt it in its tracks. 296 */ 297 sigexit(p, SIGILL); 298 /* NOTREACHED */ 299 } 300 301 /* 302 * Build context to run handler in. 303 */ 304 regs->tf_esp = (int)fp; 305 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode); 306 regs->tf_eflags &= ~PSL_VM; 307 regs->tf_cs = _ucodesel; 308 regs->tf_ds = _udatasel; 309 regs->tf_es = _udatasel; 310 regs->tf_ss = _udatasel; 311 } 312 313 /* 314 * System call to cleanup state after a signal 315 * has been taken. Reset signal mask and 316 * stack state from context left by sendsig (above). 317 * Return to previous pc and psl as specified by 318 * context left by sendsig. Check carefully to 319 * make sure that the user has not modified the 320 * psl to gain improper privileges or to cause 321 * a machine fault. 322 */ 323 int 324 linux_sigreturn(p, args) 325 struct proc *p; 326 struct linux_sigreturn_args *args; 327 { 328 struct linux_sigcontext *scp, context; 329 register struct trapframe *regs; 330 int eflags; 331 332 regs = p->p_md.md_regs; 333 334 #ifdef DEBUG 335 printf("Linux-emul(%ld): linux_sigreturn(%p)\n", 336 (long)p->p_pid, (void *)args->scp); 337 #endif 338 /* 339 * The trampoline code hands us the context. 340 * It is unsafe to keep track of it ourselves, in the event that a 341 * program jumps out of a signal handler. 342 */ 343 scp = SCARG(args,scp); 344 if (copyin((caddr_t)scp, &context, sizeof(*scp)) != 0) 345 return (EFAULT); 346 347 /* 348 * Check for security violations. 349 */ 350 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 351 eflags = context.sc_eflags; 352 /* 353 * XXX do allow users to change the privileged flag PSL_RF. The 354 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should 355 * sometimes set it there too. tf_eflags is kept in the signal 356 * context during signal handling and there is no other place 357 * to remember it, so the PSL_RF bit may be corrupted by the 358 * signal handler without us knowing. Corruption of the PSL_RF 359 * bit at worst causes one more or one less debugger trap, so 360 * allowing it is fairly harmless. 361 */ 362 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) { 363 return(EINVAL); 364 } 365 366 /* 367 * Don't allow users to load a valid privileged %cs. Let the 368 * hardware check for invalid selectors, excess privilege in 369 * other selectors, invalid %eip's and invalid %esp's. 370 */ 371 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 372 if (!CS_SECURE(context.sc_cs)) { 373 trapsignal(p, SIGBUS, T_PROTFLT); 374 return(EINVAL); 375 } 376 377 p->p_sigacts->ps_sigstk.ss_flags &= ~SS_ONSTACK; 378 p->p_sigmask = context.sc_mask &~ 379 (sigmask(SIGKILL)|sigmask(SIGCONT)|sigmask(SIGSTOP)); 380 /* 381 * Restore signal context. 382 */ 383 /* %fs and %gs were restored by the trampoline. */ 384 regs->tf_es = context.sc_es; 385 regs->tf_ds = context.sc_ds; 386 regs->tf_edi = context.sc_edi; 387 regs->tf_esi = context.sc_esi; 388 regs->tf_ebp = context.sc_ebp; 389 regs->tf_ebx = context.sc_ebx; 390 regs->tf_edx = context.sc_edx; 391 regs->tf_ecx = context.sc_ecx; 392 regs->tf_eax = context.sc_eax; 393 regs->tf_eip = context.sc_eip; 394 regs->tf_cs = context.sc_cs; 395 regs->tf_eflags = eflags; 396 regs->tf_esp = context.sc_esp_at_signal; 397 regs->tf_ss = context.sc_ss; 398 399 return (EJUSTRETURN); 400 } 401 402 static void 403 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params) 404 { 405 args[0] = tf->tf_ebx; 406 args[1] = tf->tf_ecx; 407 args[2] = tf->tf_edx; 408 args[3] = tf->tf_esi; 409 args[4] = tf->tf_edi; 410 *params = NULL; /* no copyin */ 411 } 412 413 struct sysentvec linux_sysvec = { 414 LINUX_SYS_MAXSYSCALL, 415 linux_sysent, 416 0xff, 417 NSIG, 418 bsd_to_linux_signal, 419 ELAST + 1, 420 bsd_to_linux_errno, 421 translate_traps, 422 linux_fixup, 423 linux_sendsig, 424 linux_sigcode, 425 &linux_szsigcode, 426 linux_prepsyscall, 427 "Linux a.out", 428 aout_coredump 429 }; 430 431 struct sysentvec elf_linux_sysvec = { 432 LINUX_SYS_MAXSYSCALL, 433 linux_sysent, 434 0xff, 435 NSIG, 436 bsd_to_linux_signal, 437 ELAST + 1, 438 bsd_to_linux_errno, 439 translate_traps, 440 elf_linux_fixup, 441 linux_sendsig, 442 linux_sigcode, 443 &linux_szsigcode, 444 linux_prepsyscall, 445 "Linux ELF", 446 elf_coredump 447 }; 448 449 /* 450 * Installed either via SYSINIT() or via LKM stubs. 451 */ 452 static Elf32_Brandinfo linux_brand = { 453 "Linux", 454 "/compat/linux", 455 "/lib/ld-linux.so.1", 456 &elf_linux_sysvec 457 }; 458 459 static Elf32_Brandinfo linux_glibc2brand = { 460 "Linux", 461 "/compat/linux", 462 "/lib/ld-linux.so.2", 463 &elf_linux_sysvec 464 }; 465 466 Elf32_Brandinfo *linux_brandlist[] = { 467 &linux_brand, 468 &linux_glibc2brand, 469 NULL 470 }; 471 472 static int 473 linux_elf_modevent(module_t mod, int type, void *data) 474 { 475 Elf32_Brandinfo **brandinfo; 476 int error; 477 478 error = 0; 479 480 switch(type) { 481 case MOD_LOAD: 482 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 483 ++brandinfo) 484 if (elf_insert_brand_entry(*brandinfo) < 0) 485 error = EINVAL; 486 if (error) 487 printf("cannot insert Linux elf brand handler\n"); 488 else if (bootverbose) 489 printf("Linux-ELF exec handler installed\n"); 490 break; 491 case MOD_UNLOAD: 492 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 493 ++brandinfo) 494 if (elf_remove_brand_entry(*brandinfo) < 0) 495 error = EINVAL; 496 if (error) 497 printf("Could not deinstall ELF interpreter entry\n"); 498 else if (bootverbose) 499 printf("Linux-elf exec handler removed\n"); 500 break; 501 default: 502 break; 503 } 504 return error; 505 } 506 static moduledata_t linux_elf_mod = { 507 "linuxelf", 508 linux_elf_modevent, 509 0 510 }; 511 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY); 512