1 /*- 2 * Copyright (c) 1994-1996 S�ren Schmidt 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer 10 * in this position and unchanged. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software without specific prior written permission 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/exec.h> 35 #include <sys/fcntl.h> 36 #include <sys/imgact.h> 37 #include <sys/imgact_aout.h> 38 #include <sys/imgact_elf.h> 39 #include <sys/kernel.h> 40 #include <sys/lock.h> 41 #include <sys/malloc.h> 42 #include <sys/module.h> 43 #include <sys/mutex.h> 44 #include <sys/proc.h> 45 #include <sys/signalvar.h> 46 #include <sys/syscallsubr.h> 47 #include <sys/sysent.h> 48 #include <sys/sysproto.h> 49 #include <sys/vnode.h> 50 #include <sys/eventhandler.h> 51 52 #include <vm/vm.h> 53 #include <vm/pmap.h> 54 #include <vm/vm_extern.h> 55 #include <vm/vm_map.h> 56 #include <vm/vm_object.h> 57 #include <vm/vm_page.h> 58 #include <vm/vm_param.h> 59 60 #include <machine/cpu.h> 61 #include <machine/cputypes.h> 62 #include <machine/md_var.h> 63 #include <machine/pcb.h> 64 65 #include <i386/linux/linux.h> 66 #include <i386/linux/linux_proto.h> 67 #include <compat/linux/linux_emul.h> 68 #include <compat/linux/linux_mib.h> 69 #include <compat/linux/linux_misc.h> 70 #include <compat/linux/linux_signal.h> 71 #include <compat/linux/linux_util.h> 72 73 MODULE_VERSION(linux, 1); 74 75 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures"); 76 77 #if BYTE_ORDER == LITTLE_ENDIAN 78 #define SHELLMAGIC 0x2123 /* #! */ 79 #else 80 #define SHELLMAGIC 0x2321 81 #endif 82 83 /* 84 * Allow the sendsig functions to use the ldebug() facility 85 * even though they are not syscalls themselves. Map them 86 * to syscall 0. This is slightly less bogus than using 87 * ldebug(sigreturn). 88 */ 89 #define LINUX_SYS_linux_rt_sendsig 0 90 #define LINUX_SYS_linux_sendsig 0 91 92 extern char linux_sigcode[]; 93 extern int linux_szsigcode; 94 95 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL]; 96 97 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler); 98 SET_DECLARE(linux_device_handler_set, struct linux_device_handler); 99 100 static int linux_fixup(register_t **stack_base, 101 struct image_params *iparams); 102 static int elf_linux_fixup(register_t **stack_base, 103 struct image_params *iparams); 104 static void linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, 105 caddr_t *params); 106 static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask); 107 static void exec_linux_setregs(struct thread *td, u_long entry, 108 u_long stack, u_long ps_strings); 109 static register_t *linux_copyout_strings(struct image_params *imgp); 110 111 static int linux_szplatform; 112 const char *linux_platform; 113 114 extern LIST_HEAD(futex_list, futex) futex_list; 115 extern struct sx futex_sx; 116 117 static eventhandler_tag linux_exit_tag; 118 static eventhandler_tag linux_schedtail_tag; 119 static eventhandler_tag linux_exec_tag; 120 121 /* 122 * Linux syscalls return negative errno's, we do positive and map them 123 * Reference: 124 * FreeBSD: src/sys/sys/errno.h 125 * Linux: linux-2.6.17.8/include/asm-generic/errno-base.h 126 * linux-2.6.17.8/include/asm-generic/errno.h 127 */ 128 static int bsd_to_linux_errno[ELAST + 1] = { 129 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9, 130 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19, 131 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, 132 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89, 133 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99, 134 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109, 135 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122, 136 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9, 137 -6, -6, -43, -42, -75,-125, -84, -95, -16, -74, 138 -72, -67, -71 139 }; 140 141 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = { 142 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL, 143 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE, 144 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS, 145 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG, 146 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD, 147 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU, 148 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH, 149 0, LINUX_SIGUSR1, LINUX_SIGUSR2 150 }; 151 152 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = { 153 SIGHUP, SIGINT, SIGQUIT, SIGILL, 154 SIGTRAP, SIGABRT, SIGBUS, SIGFPE, 155 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2, 156 SIGPIPE, SIGALRM, SIGTERM, SIGBUS, 157 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP, 158 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU, 159 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH, 160 SIGIO, SIGURG, SIGSYS 161 }; 162 163 #define LINUX_T_UNKNOWN 255 164 static int _bsd_to_linux_trapcode[] = { 165 LINUX_T_UNKNOWN, /* 0 */ 166 6, /* 1 T_PRIVINFLT */ 167 LINUX_T_UNKNOWN, /* 2 */ 168 3, /* 3 T_BPTFLT */ 169 LINUX_T_UNKNOWN, /* 4 */ 170 LINUX_T_UNKNOWN, /* 5 */ 171 16, /* 6 T_ARITHTRAP */ 172 254, /* 7 T_ASTFLT */ 173 LINUX_T_UNKNOWN, /* 8 */ 174 13, /* 9 T_PROTFLT */ 175 1, /* 10 T_TRCTRAP */ 176 LINUX_T_UNKNOWN, /* 11 */ 177 14, /* 12 T_PAGEFLT */ 178 LINUX_T_UNKNOWN, /* 13 */ 179 17, /* 14 T_ALIGNFLT */ 180 LINUX_T_UNKNOWN, /* 15 */ 181 LINUX_T_UNKNOWN, /* 16 */ 182 LINUX_T_UNKNOWN, /* 17 */ 183 0, /* 18 T_DIVIDE */ 184 2, /* 19 T_NMI */ 185 4, /* 20 T_OFLOW */ 186 5, /* 21 T_BOUND */ 187 7, /* 22 T_DNA */ 188 8, /* 23 T_DOUBLEFLT */ 189 9, /* 24 T_FPOPFLT */ 190 10, /* 25 T_TSSFLT */ 191 11, /* 26 T_SEGNPFLT */ 192 12, /* 27 T_STKFLT */ 193 18, /* 28 T_MCHK */ 194 19, /* 29 T_XMMFLT */ 195 15 /* 30 T_RESERVED */ 196 }; 197 #define bsd_to_linux_trapcode(code) \ 198 ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \ 199 _bsd_to_linux_trapcode[(code)]: \ 200 LINUX_T_UNKNOWN) 201 202 /* 203 * If FreeBSD & Linux have a difference of opinion about what a trap 204 * means, deal with it here. 205 * 206 * MPSAFE 207 */ 208 static int 209 translate_traps(int signal, int trap_code) 210 { 211 if (signal != SIGBUS) 212 return signal; 213 switch (trap_code) { 214 case T_PROTFLT: 215 case T_TSSFLT: 216 case T_DOUBLEFLT: 217 case T_PAGEFLT: 218 return SIGSEGV; 219 default: 220 return signal; 221 } 222 } 223 224 static int 225 linux_fixup(register_t **stack_base, struct image_params *imgp) 226 { 227 register_t *argv, *envp; 228 229 argv = *stack_base; 230 envp = *stack_base + (imgp->args->argc + 1); 231 (*stack_base)--; 232 **stack_base = (intptr_t)(void *)envp; 233 (*stack_base)--; 234 **stack_base = (intptr_t)(void *)argv; 235 (*stack_base)--; 236 **stack_base = imgp->args->argc; 237 return (0); 238 } 239 240 static int 241 elf_linux_fixup(register_t **stack_base, struct image_params *imgp) 242 { 243 struct proc *p; 244 Elf32_Auxargs *args; 245 Elf32_Addr *uplatform; 246 struct ps_strings *arginfo; 247 register_t *pos; 248 249 KASSERT(curthread->td_proc == imgp->proc, 250 ("unsafe elf_linux_fixup(), should be curproc")); 251 252 p = imgp->proc; 253 arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings; 254 uplatform = (Elf32_Addr *)((caddr_t)arginfo - linux_szsigcode - 255 linux_szplatform); 256 args = (Elf32_Auxargs *)imgp->auxargs; 257 pos = *stack_base + (imgp->args->argc + imgp->args->envc + 2); 258 259 AUXARGS_ENTRY(pos, LINUX_AT_HWCAP, cpu_feature); 260 AUXARGS_ENTRY(pos, LINUX_AT_CLKTCK, hz); 261 AUXARGS_ENTRY(pos, AT_PHDR, args->phdr); 262 AUXARGS_ENTRY(pos, AT_PHENT, args->phent); 263 AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum); 264 AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz); 265 AUXARGS_ENTRY(pos, AT_FLAGS, args->flags); 266 AUXARGS_ENTRY(pos, AT_ENTRY, args->entry); 267 AUXARGS_ENTRY(pos, AT_BASE, args->base); 268 AUXARGS_ENTRY(pos, LINUX_AT_SECURE, 0); 269 AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid); 270 AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid); 271 AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid); 272 AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid); 273 AUXARGS_ENTRY(pos, LINUX_AT_PLATFORM, PTROUT(uplatform)); 274 if (args->execfd != -1) 275 AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd); 276 AUXARGS_ENTRY(pos, AT_NULL, 0); 277 278 free(imgp->auxargs, M_TEMP); 279 imgp->auxargs = NULL; 280 281 (*stack_base)--; 282 **stack_base = (register_t)imgp->args->argc; 283 return (0); 284 } 285 286 /* 287 * Copied from kern/kern_exec.c 288 */ 289 static register_t * 290 linux_copyout_strings(struct image_params *imgp) 291 { 292 int argc, envc; 293 char **vectp; 294 char *stringp, *destp; 295 register_t *stack_base; 296 struct ps_strings *arginfo; 297 struct proc *p; 298 299 /* 300 * Calculate string base and vector table pointers. 301 * Also deal with signal trampoline code for this exec type. 302 */ 303 p = imgp->proc; 304 arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings; 305 destp = (caddr_t)arginfo - linux_szsigcode - SPARE_USRSPACE - 306 linux_szplatform - roundup((ARG_MAX - imgp->args->stringspace), 307 sizeof(char *)); 308 309 /* 310 * install sigcode 311 */ 312 copyout(p->p_sysent->sv_sigcode, ((caddr_t)arginfo - 313 linux_szsigcode), linux_szsigcode); 314 315 /* 316 * install LINUX_PLATFORM 317 */ 318 copyout(linux_platform, ((caddr_t)arginfo - linux_szsigcode - 319 linux_szplatform), linux_szplatform); 320 321 /* 322 * If we have a valid auxargs ptr, prepare some room 323 * on the stack. 324 */ 325 if (imgp->auxargs) { 326 /* 327 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for 328 * lower compatibility. 329 */ 330 imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size : 331 (LINUX_AT_COUNT * 2); 332 /* 333 * The '+ 2' is for the null pointers at the end of each of 334 * the arg and env vector sets,and imgp->auxarg_size is room 335 * for argument of Runtime loader. 336 */ 337 vectp = (char **)(destp - (imgp->args->argc + 338 imgp->args->envc + 2 + imgp->auxarg_size) * sizeof(char *)); 339 } else { 340 /* 341 * The '+ 2' is for the null pointers at the end of each of 342 * the arg and env vector sets 343 */ 344 vectp = (char **)(destp - (imgp->args->argc + imgp->args->envc + 2) * 345 sizeof(char *)); 346 } 347 348 /* 349 * vectp also becomes our initial stack base 350 */ 351 stack_base = (register_t *)vectp; 352 353 stringp = imgp->args->begin_argv; 354 argc = imgp->args->argc; 355 envc = imgp->args->envc; 356 357 /* 358 * Copy out strings - arguments and environment. 359 */ 360 copyout(stringp, destp, ARG_MAX - imgp->args->stringspace); 361 362 /* 363 * Fill in "ps_strings" struct for ps, w, etc. 364 */ 365 suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp); 366 suword(&arginfo->ps_nargvstr, argc); 367 368 /* 369 * Fill in argument portion of vector table. 370 */ 371 for (; argc > 0; --argc) { 372 suword(vectp++, (long)(intptr_t)destp); 373 while (*stringp++ != 0) 374 destp++; 375 destp++; 376 } 377 378 /* a null vector table pointer separates the argp's from the envp's */ 379 suword(vectp++, 0); 380 381 suword(&arginfo->ps_envstr, (long)(intptr_t)vectp); 382 suword(&arginfo->ps_nenvstr, envc); 383 384 /* 385 * Fill in environment portion of vector table. 386 */ 387 for (; envc > 0; --envc) { 388 suword(vectp++, (long)(intptr_t)destp); 389 while (*stringp++ != 0) 390 destp++; 391 destp++; 392 } 393 394 /* end of vector table is a null pointer */ 395 suword(vectp, 0); 396 397 return (stack_base); 398 } 399 400 401 402 extern int _ucodesel, _udatasel; 403 extern unsigned long linux_sznonrtsigcode; 404 405 static void 406 linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) 407 { 408 struct thread *td = curthread; 409 struct proc *p = td->td_proc; 410 struct sigacts *psp; 411 struct trapframe *regs; 412 struct l_rt_sigframe *fp, frame; 413 int sig, code; 414 int oonstack; 415 416 sig = ksi->ksi_signo; 417 code = ksi->ksi_code; 418 PROC_LOCK_ASSERT(p, MA_OWNED); 419 psp = p->p_sigacts; 420 mtx_assert(&psp->ps_mtx, MA_OWNED); 421 regs = td->td_frame; 422 oonstack = sigonstack(regs->tf_esp); 423 424 #ifdef DEBUG 425 if (ldebug(rt_sendsig)) 426 printf(ARGS(rt_sendsig, "%p, %d, %p, %u"), 427 catcher, sig, (void*)mask, code); 428 #endif 429 /* 430 * Allocate space for the signal handler context. 431 */ 432 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && 433 SIGISMEMBER(psp->ps_sigonstack, sig)) { 434 fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp + 435 td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe)); 436 } else 437 fp = (struct l_rt_sigframe *)regs->tf_esp - 1; 438 mtx_unlock(&psp->ps_mtx); 439 440 /* 441 * Build the argument list for the signal handler. 442 */ 443 if (p->p_sysent->sv_sigtbl) 444 if (sig <= p->p_sysent->sv_sigsize) 445 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 446 447 bzero(&frame, sizeof(frame)); 448 449 frame.sf_handler = catcher; 450 frame.sf_sig = sig; 451 frame.sf_siginfo = &fp->sf_si; 452 frame.sf_ucontext = &fp->sf_sc; 453 454 /* Fill in POSIX parts */ 455 ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig); 456 457 /* 458 * Build the signal context to be used by sigreturn. 459 */ 460 frame.sf_sc.uc_flags = 0; /* XXX ??? */ 461 frame.sf_sc.uc_link = NULL; /* XXX ??? */ 462 463 frame.sf_sc.uc_stack.ss_sp = td->td_sigstk.ss_sp; 464 frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size; 465 frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) 466 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE; 467 PROC_UNLOCK(p); 468 469 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask); 470 471 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0]; 472 frame.sf_sc.uc_mcontext.sc_gs = rgs(); 473 frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs; 474 frame.sf_sc.uc_mcontext.sc_es = regs->tf_es; 475 frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds; 476 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_edi; 477 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_esi; 478 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_ebp; 479 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_ebx; 480 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_edx; 481 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_ecx; 482 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_eax; 483 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_eip; 484 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs; 485 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags; 486 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp; 487 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss; 488 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err; 489 frame.sf_sc.uc_mcontext.sc_cr2 = (register_t)ksi->ksi_addr; 490 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code); 491 492 #ifdef DEBUG 493 if (ldebug(rt_sendsig)) 494 printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"), 495 frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp, 496 td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask); 497 #endif 498 499 if (copyout(&frame, fp, sizeof(frame)) != 0) { 500 /* 501 * Process has trashed its stack; give it an illegal 502 * instruction to halt it in its tracks. 503 */ 504 #ifdef DEBUG 505 if (ldebug(rt_sendsig)) 506 printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"), 507 fp, oonstack); 508 #endif 509 PROC_LOCK(p); 510 sigexit(td, SIGILL); 511 } 512 513 /* 514 * Build context to run handler in. 515 */ 516 regs->tf_esp = (int)fp; 517 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) + 518 linux_sznonrtsigcode; 519 regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D); 520 regs->tf_cs = _ucodesel; 521 regs->tf_ds = _udatasel; 522 regs->tf_es = _udatasel; 523 regs->tf_fs = _udatasel; 524 regs->tf_ss = _udatasel; 525 PROC_LOCK(p); 526 mtx_lock(&psp->ps_mtx); 527 } 528 529 530 /* 531 * Send an interrupt to process. 532 * 533 * Stack is set up to allow sigcode stored 534 * in u. to call routine, followed by kcall 535 * to sigreturn routine below. After sigreturn 536 * resets the signal mask, the stack, and the 537 * frame pointer, it returns to the user 538 * specified pc, psl. 539 */ 540 static void 541 linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) 542 { 543 struct thread *td = curthread; 544 struct proc *p = td->td_proc; 545 struct sigacts *psp; 546 struct trapframe *regs; 547 struct l_sigframe *fp, frame; 548 l_sigset_t lmask; 549 int sig, code; 550 int oonstack, i; 551 552 PROC_LOCK_ASSERT(p, MA_OWNED); 553 psp = p->p_sigacts; 554 sig = ksi->ksi_signo; 555 code = ksi->ksi_code; 556 mtx_assert(&psp->ps_mtx, MA_OWNED); 557 if (SIGISMEMBER(psp->ps_siginfo, sig)) { 558 /* Signal handler installed with SA_SIGINFO. */ 559 linux_rt_sendsig(catcher, ksi, mask); 560 return; 561 } 562 regs = td->td_frame; 563 oonstack = sigonstack(regs->tf_esp); 564 565 #ifdef DEBUG 566 if (ldebug(sendsig)) 567 printf(ARGS(sendsig, "%p, %d, %p, %u"), 568 catcher, sig, (void*)mask, code); 569 #endif 570 571 /* 572 * Allocate space for the signal handler context. 573 */ 574 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && 575 SIGISMEMBER(psp->ps_sigonstack, sig)) { 576 fp = (struct l_sigframe *)(td->td_sigstk.ss_sp + 577 td->td_sigstk.ss_size - sizeof(struct l_sigframe)); 578 } else 579 fp = (struct l_sigframe *)regs->tf_esp - 1; 580 mtx_unlock(&psp->ps_mtx); 581 PROC_UNLOCK(p); 582 583 /* 584 * Build the argument list for the signal handler. 585 */ 586 if (p->p_sysent->sv_sigtbl) 587 if (sig <= p->p_sysent->sv_sigsize) 588 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 589 590 bzero(&frame, sizeof(frame)); 591 592 frame.sf_handler = catcher; 593 frame.sf_sig = sig; 594 595 bsd_to_linux_sigset(mask, &lmask); 596 597 /* 598 * Build the signal context to be used by sigreturn. 599 */ 600 frame.sf_sc.sc_mask = lmask.__bits[0]; 601 frame.sf_sc.sc_gs = rgs(); 602 frame.sf_sc.sc_fs = regs->tf_fs; 603 frame.sf_sc.sc_es = regs->tf_es; 604 frame.sf_sc.sc_ds = regs->tf_ds; 605 frame.sf_sc.sc_edi = regs->tf_edi; 606 frame.sf_sc.sc_esi = regs->tf_esi; 607 frame.sf_sc.sc_ebp = regs->tf_ebp; 608 frame.sf_sc.sc_ebx = regs->tf_ebx; 609 frame.sf_sc.sc_edx = regs->tf_edx; 610 frame.sf_sc.sc_ecx = regs->tf_ecx; 611 frame.sf_sc.sc_eax = regs->tf_eax; 612 frame.sf_sc.sc_eip = regs->tf_eip; 613 frame.sf_sc.sc_cs = regs->tf_cs; 614 frame.sf_sc.sc_eflags = regs->tf_eflags; 615 frame.sf_sc.sc_esp_at_signal = regs->tf_esp; 616 frame.sf_sc.sc_ss = regs->tf_ss; 617 frame.sf_sc.sc_err = regs->tf_err; 618 frame.sf_sc.sc_cr2 = (register_t)ksi->ksi_addr; 619 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(ksi->ksi_trapno); 620 621 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++) 622 frame.sf_extramask[i] = lmask.__bits[i+1]; 623 624 if (copyout(&frame, fp, sizeof(frame)) != 0) { 625 /* 626 * Process has trashed its stack; give it an illegal 627 * instruction to halt it in its tracks. 628 */ 629 PROC_LOCK(p); 630 sigexit(td, SIGILL); 631 } 632 633 /* 634 * Build context to run handler in. 635 */ 636 regs->tf_esp = (int)fp; 637 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode); 638 regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D); 639 regs->tf_cs = _ucodesel; 640 regs->tf_ds = _udatasel; 641 regs->tf_es = _udatasel; 642 regs->tf_fs = _udatasel; 643 regs->tf_ss = _udatasel; 644 PROC_LOCK(p); 645 mtx_lock(&psp->ps_mtx); 646 } 647 648 /* 649 * System call to cleanup state after a signal 650 * has been taken. Reset signal mask and 651 * stack state from context left by sendsig (above). 652 * Return to previous pc and psl as specified by 653 * context left by sendsig. Check carefully to 654 * make sure that the user has not modified the 655 * psl to gain improper privileges or to cause 656 * a machine fault. 657 */ 658 int 659 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args) 660 { 661 struct proc *p = td->td_proc; 662 struct l_sigframe frame; 663 struct trapframe *regs; 664 l_sigset_t lmask; 665 int eflags, i; 666 ksiginfo_t ksi; 667 668 regs = td->td_frame; 669 670 #ifdef DEBUG 671 if (ldebug(sigreturn)) 672 printf(ARGS(sigreturn, "%p"), (void *)args->sfp); 673 #endif 674 /* 675 * The trampoline code hands us the sigframe. 676 * It is unsafe to keep track of it ourselves, in the event that a 677 * program jumps out of a signal handler. 678 */ 679 if (copyin(args->sfp, &frame, sizeof(frame)) != 0) 680 return (EFAULT); 681 682 /* 683 * Check for security violations. 684 */ 685 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 686 eflags = frame.sf_sc.sc_eflags; 687 /* 688 * XXX do allow users to change the privileged flag PSL_RF. The 689 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should 690 * sometimes set it there too. tf_eflags is kept in the signal 691 * context during signal handling and there is no other place 692 * to remember it, so the PSL_RF bit may be corrupted by the 693 * signal handler without us knowing. Corruption of the PSL_RF 694 * bit at worst causes one more or one less debugger trap, so 695 * allowing it is fairly harmless. 696 */ 697 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) 698 return(EINVAL); 699 700 /* 701 * Don't allow users to load a valid privileged %cs. Let the 702 * hardware check for invalid selectors, excess privilege in 703 * other selectors, invalid %eip's and invalid %esp's. 704 */ 705 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 706 if (!CS_SECURE(frame.sf_sc.sc_cs)) { 707 ksiginfo_init_trap(&ksi); 708 ksi.ksi_signo = SIGBUS; 709 ksi.ksi_code = BUS_OBJERR; 710 ksi.ksi_trapno = T_PROTFLT; 711 ksi.ksi_addr = (void *)regs->tf_eip; 712 trapsignal(td, &ksi); 713 return(EINVAL); 714 } 715 716 lmask.__bits[0] = frame.sf_sc.sc_mask; 717 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++) 718 lmask.__bits[i+1] = frame.sf_extramask[i]; 719 PROC_LOCK(p); 720 linux_to_bsd_sigset(&lmask, &td->td_sigmask); 721 SIG_CANTMASK(td->td_sigmask); 722 signotify(td); 723 PROC_UNLOCK(p); 724 725 /* 726 * Restore signal context. 727 */ 728 /* %gs was restored by the trampoline. */ 729 regs->tf_fs = frame.sf_sc.sc_fs; 730 regs->tf_es = frame.sf_sc.sc_es; 731 regs->tf_ds = frame.sf_sc.sc_ds; 732 regs->tf_edi = frame.sf_sc.sc_edi; 733 regs->tf_esi = frame.sf_sc.sc_esi; 734 regs->tf_ebp = frame.sf_sc.sc_ebp; 735 regs->tf_ebx = frame.sf_sc.sc_ebx; 736 regs->tf_edx = frame.sf_sc.sc_edx; 737 regs->tf_ecx = frame.sf_sc.sc_ecx; 738 regs->tf_eax = frame.sf_sc.sc_eax; 739 regs->tf_eip = frame.sf_sc.sc_eip; 740 regs->tf_cs = frame.sf_sc.sc_cs; 741 regs->tf_eflags = eflags; 742 regs->tf_esp = frame.sf_sc.sc_esp_at_signal; 743 regs->tf_ss = frame.sf_sc.sc_ss; 744 745 return (EJUSTRETURN); 746 } 747 748 /* 749 * System call to cleanup state after a signal 750 * has been taken. Reset signal mask and 751 * stack state from context left by rt_sendsig (above). 752 * Return to previous pc and psl as specified by 753 * context left by sendsig. Check carefully to 754 * make sure that the user has not modified the 755 * psl to gain improper privileges or to cause 756 * a machine fault. 757 */ 758 int 759 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args) 760 { 761 struct proc *p = td->td_proc; 762 struct l_ucontext uc; 763 struct l_sigcontext *context; 764 l_stack_t *lss; 765 stack_t ss; 766 struct trapframe *regs; 767 int eflags; 768 ksiginfo_t ksi; 769 770 regs = td->td_frame; 771 772 #ifdef DEBUG 773 if (ldebug(rt_sigreturn)) 774 printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp); 775 #endif 776 /* 777 * The trampoline code hands us the ucontext. 778 * It is unsafe to keep track of it ourselves, in the event that a 779 * program jumps out of a signal handler. 780 */ 781 if (copyin(args->ucp, &uc, sizeof(uc)) != 0) 782 return (EFAULT); 783 784 context = &uc.uc_mcontext; 785 786 /* 787 * Check for security violations. 788 */ 789 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 790 eflags = context->sc_eflags; 791 /* 792 * XXX do allow users to change the privileged flag PSL_RF. The 793 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should 794 * sometimes set it there too. tf_eflags is kept in the signal 795 * context during signal handling and there is no other place 796 * to remember it, so the PSL_RF bit may be corrupted by the 797 * signal handler without us knowing. Corruption of the PSL_RF 798 * bit at worst causes one more or one less debugger trap, so 799 * allowing it is fairly harmless. 800 */ 801 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) 802 return(EINVAL); 803 804 /* 805 * Don't allow users to load a valid privileged %cs. Let the 806 * hardware check for invalid selectors, excess privilege in 807 * other selectors, invalid %eip's and invalid %esp's. 808 */ 809 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 810 if (!CS_SECURE(context->sc_cs)) { 811 ksiginfo_init_trap(&ksi); 812 ksi.ksi_signo = SIGBUS; 813 ksi.ksi_code = BUS_OBJERR; 814 ksi.ksi_trapno = T_PROTFLT; 815 ksi.ksi_addr = (void *)regs->tf_eip; 816 trapsignal(td, &ksi); 817 return(EINVAL); 818 } 819 820 PROC_LOCK(p); 821 linux_to_bsd_sigset(&uc.uc_sigmask, &td->td_sigmask); 822 SIG_CANTMASK(td->td_sigmask); 823 signotify(td); 824 PROC_UNLOCK(p); 825 826 /* 827 * Restore signal context 828 */ 829 /* %gs was restored by the trampoline. */ 830 regs->tf_fs = context->sc_fs; 831 regs->tf_es = context->sc_es; 832 regs->tf_ds = context->sc_ds; 833 regs->tf_edi = context->sc_edi; 834 regs->tf_esi = context->sc_esi; 835 regs->tf_ebp = context->sc_ebp; 836 regs->tf_ebx = context->sc_ebx; 837 regs->tf_edx = context->sc_edx; 838 regs->tf_ecx = context->sc_ecx; 839 regs->tf_eax = context->sc_eax; 840 regs->tf_eip = context->sc_eip; 841 regs->tf_cs = context->sc_cs; 842 regs->tf_eflags = eflags; 843 regs->tf_esp = context->sc_esp_at_signal; 844 regs->tf_ss = context->sc_ss; 845 846 /* 847 * call sigaltstack & ignore results.. 848 */ 849 lss = &uc.uc_stack; 850 ss.ss_sp = lss->ss_sp; 851 ss.ss_size = lss->ss_size; 852 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags); 853 854 #ifdef DEBUG 855 if (ldebug(rt_sigreturn)) 856 printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"), 857 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask); 858 #endif 859 (void)kern_sigaltstack(td, &ss, NULL); 860 861 return (EJUSTRETURN); 862 } 863 864 /* 865 * MPSAFE 866 */ 867 static void 868 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params) 869 { 870 args[0] = tf->tf_ebx; 871 args[1] = tf->tf_ecx; 872 args[2] = tf->tf_edx; 873 args[3] = tf->tf_esi; 874 args[4] = tf->tf_edi; 875 args[5] = tf->tf_ebp; /* Unconfirmed */ 876 *params = NULL; /* no copyin */ 877 } 878 879 /* 880 * If a linux binary is exec'ing something, try this image activator 881 * first. We override standard shell script execution in order to 882 * be able to modify the interpreter path. We only do this if a linux 883 * binary is doing the exec, so we do not create an EXEC module for it. 884 */ 885 static int exec_linux_imgact_try(struct image_params *iparams); 886 887 static int 888 exec_linux_imgact_try(struct image_params *imgp) 889 { 890 const char *head = (const char *)imgp->image_header; 891 char *rpath; 892 int error = -1, len; 893 894 /* 895 * The interpreter for shell scripts run from a linux binary needs 896 * to be located in /compat/linux if possible in order to recursively 897 * maintain linux path emulation. 898 */ 899 if (((const short *)head)[0] == SHELLMAGIC) { 900 /* 901 * Run our normal shell image activator. If it succeeds attempt 902 * to use the alternate path for the interpreter. If an alternate 903 * path is found, use our stringspace to store it. 904 */ 905 if ((error = exec_shell_imgact(imgp)) == 0) { 906 linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc), 907 imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0, AT_FDCWD); 908 if (rpath != NULL) { 909 len = strlen(rpath) + 1; 910 911 if (len <= MAXSHELLCMDLEN) { 912 memcpy(imgp->interpreter_name, rpath, len); 913 } 914 free(rpath, M_TEMP); 915 } 916 } 917 } 918 return(error); 919 } 920 921 /* 922 * exec_setregs may initialize some registers differently than Linux 923 * does, thus potentially confusing Linux binaries. If necessary, we 924 * override the exec_setregs default(s) here. 925 */ 926 static void 927 exec_linux_setregs(struct thread *td, u_long entry, 928 u_long stack, u_long ps_strings) 929 { 930 struct pcb *pcb = td->td_pcb; 931 932 exec_setregs(td, entry, stack, ps_strings); 933 934 /* Linux sets %gs to 0, we default to _udatasel */ 935 pcb->pcb_gs = 0; 936 load_gs(0); 937 938 pcb->pcb_initial_npxcw = __LINUX_NPXCW__; 939 } 940 941 static void 942 linux_get_machine(const char **dst) 943 { 944 945 switch (cpu_class) { 946 case CPUCLASS_686: 947 *dst = "i686"; 948 break; 949 case CPUCLASS_586: 950 *dst = "i586"; 951 break; 952 case CPUCLASS_486: 953 *dst = "i486"; 954 break; 955 default: 956 *dst = "i386"; 957 } 958 } 959 960 struct sysentvec linux_sysvec = { 961 .sv_size = LINUX_SYS_MAXSYSCALL, 962 .sv_table = linux_sysent, 963 .sv_mask = 0, 964 .sv_sigsize = LINUX_SIGTBLSZ, 965 .sv_sigtbl = bsd_to_linux_signal, 966 .sv_errsize = ELAST + 1, 967 .sv_errtbl = bsd_to_linux_errno, 968 .sv_transtrap = translate_traps, 969 .sv_fixup = linux_fixup, 970 .sv_sendsig = linux_sendsig, 971 .sv_sigcode = linux_sigcode, 972 .sv_szsigcode = &linux_szsigcode, 973 .sv_prepsyscall = linux_prepsyscall, 974 .sv_name = "Linux a.out", 975 .sv_coredump = NULL, 976 .sv_imgact_try = exec_linux_imgact_try, 977 .sv_minsigstksz = LINUX_MINSIGSTKSZ, 978 .sv_pagesize = PAGE_SIZE, 979 .sv_minuser = VM_MIN_ADDRESS, 980 .sv_maxuser = VM_MAXUSER_ADDRESS, 981 .sv_usrstack = USRSTACK, 982 .sv_psstrings = PS_STRINGS, 983 .sv_stackprot = VM_PROT_ALL, 984 .sv_copyout_strings = exec_copyout_strings, 985 .sv_setregs = exec_linux_setregs, 986 .sv_fixlimit = NULL, 987 .sv_maxssiz = NULL, 988 .sv_flags = SV_ABI_LINUX | SV_AOUT | SV_IA32 | SV_ILP32 989 }; 990 991 struct sysentvec elf_linux_sysvec = { 992 .sv_size = LINUX_SYS_MAXSYSCALL, 993 .sv_table = linux_sysent, 994 .sv_mask = 0, 995 .sv_sigsize = LINUX_SIGTBLSZ, 996 .sv_sigtbl = bsd_to_linux_signal, 997 .sv_errsize = ELAST + 1, 998 .sv_errtbl = bsd_to_linux_errno, 999 .sv_transtrap = translate_traps, 1000 .sv_fixup = elf_linux_fixup, 1001 .sv_sendsig = linux_sendsig, 1002 .sv_sigcode = linux_sigcode, 1003 .sv_szsigcode = &linux_szsigcode, 1004 .sv_prepsyscall = linux_prepsyscall, 1005 .sv_name = "Linux ELF", 1006 .sv_coredump = elf32_coredump, 1007 .sv_imgact_try = exec_linux_imgact_try, 1008 .sv_minsigstksz = LINUX_MINSIGSTKSZ, 1009 .sv_pagesize = PAGE_SIZE, 1010 .sv_minuser = VM_MIN_ADDRESS, 1011 .sv_maxuser = VM_MAXUSER_ADDRESS, 1012 .sv_usrstack = USRSTACK, 1013 .sv_psstrings = PS_STRINGS, 1014 .sv_stackprot = VM_PROT_ALL, 1015 .sv_copyout_strings = linux_copyout_strings, 1016 .sv_setregs = exec_linux_setregs, 1017 .sv_fixlimit = NULL, 1018 .sv_maxssiz = NULL, 1019 .sv_flags = SV_ABI_LINUX | SV_IA32 | SV_ILP32 1020 }; 1021 1022 static char GNULINUX_ABI_VENDOR[] = "GNU"; 1023 1024 static Elf_Brandnote linux_brandnote = { 1025 .hdr.n_namesz = sizeof(GNULINUX_ABI_VENDOR), 1026 .hdr.n_descsz = 16, 1027 .hdr.n_type = 1, 1028 .vendor = GNULINUX_ABI_VENDOR, 1029 .flags = 0 1030 }; 1031 1032 static Elf32_Brandinfo linux_brand = { 1033 .brand = ELFOSABI_LINUX, 1034 .machine = EM_386, 1035 .compat_3_brand = "Linux", 1036 .emul_path = "/compat/linux", 1037 .interp_path = "/lib/ld-linux.so.1", 1038 .sysvec = &elf_linux_sysvec, 1039 .interp_newpath = NULL, 1040 .brand_note = &linux_brandnote, 1041 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE 1042 }; 1043 1044 static Elf32_Brandinfo linux_glibc2brand = { 1045 .brand = ELFOSABI_LINUX, 1046 .machine = EM_386, 1047 .compat_3_brand = "Linux", 1048 .emul_path = "/compat/linux", 1049 .interp_path = "/lib/ld-linux.so.2", 1050 .sysvec = &elf_linux_sysvec, 1051 .interp_newpath = NULL, 1052 .brand_note = &linux_brandnote, 1053 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE 1054 }; 1055 1056 Elf32_Brandinfo *linux_brandlist[] = { 1057 &linux_brand, 1058 &linux_glibc2brand, 1059 NULL 1060 }; 1061 1062 static int 1063 linux_elf_modevent(module_t mod, int type, void *data) 1064 { 1065 Elf32_Brandinfo **brandinfo; 1066 int error; 1067 struct linux_ioctl_handler **lihp; 1068 struct linux_device_handler **ldhp; 1069 1070 error = 0; 1071 1072 switch(type) { 1073 case MOD_LOAD: 1074 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 1075 ++brandinfo) 1076 if (elf32_insert_brand_entry(*brandinfo) < 0) 1077 error = EINVAL; 1078 if (error == 0) { 1079 SET_FOREACH(lihp, linux_ioctl_handler_set) 1080 linux_ioctl_register_handler(*lihp); 1081 SET_FOREACH(ldhp, linux_device_handler_set) 1082 linux_device_register_handler(*ldhp); 1083 mtx_init(&emul_lock, "emuldata lock", NULL, MTX_DEF); 1084 sx_init(&emul_shared_lock, "emuldata->shared lock"); 1085 LIST_INIT(&futex_list); 1086 sx_init(&futex_sx, "futex protection lock"); 1087 linux_exit_tag = EVENTHANDLER_REGISTER(process_exit, linux_proc_exit, 1088 NULL, 1000); 1089 linux_schedtail_tag = EVENTHANDLER_REGISTER(schedtail, linux_schedtail, 1090 NULL, 1000); 1091 linux_exec_tag = EVENTHANDLER_REGISTER(process_exec, linux_proc_exec, 1092 NULL, 1000); 1093 linux_get_machine(&linux_platform); 1094 linux_szplatform = roundup(strlen(linux_platform) + 1, 1095 sizeof(char *)); 1096 if (bootverbose) 1097 printf("Linux ELF exec handler installed\n"); 1098 } else 1099 printf("cannot insert Linux ELF brand handler\n"); 1100 break; 1101 case MOD_UNLOAD: 1102 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 1103 ++brandinfo) 1104 if (elf32_brand_inuse(*brandinfo)) 1105 error = EBUSY; 1106 if (error == 0) { 1107 for (brandinfo = &linux_brandlist[0]; 1108 *brandinfo != NULL; ++brandinfo) 1109 if (elf32_remove_brand_entry(*brandinfo) < 0) 1110 error = EINVAL; 1111 } 1112 if (error == 0) { 1113 SET_FOREACH(lihp, linux_ioctl_handler_set) 1114 linux_ioctl_unregister_handler(*lihp); 1115 SET_FOREACH(ldhp, linux_device_handler_set) 1116 linux_device_unregister_handler(*ldhp); 1117 mtx_destroy(&emul_lock); 1118 sx_destroy(&emul_shared_lock); 1119 sx_destroy(&futex_sx); 1120 EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag); 1121 EVENTHANDLER_DEREGISTER(schedtail, linux_schedtail_tag); 1122 EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag); 1123 if (bootverbose) 1124 printf("Linux ELF exec handler removed\n"); 1125 } else 1126 printf("Could not deinstall ELF interpreter entry\n"); 1127 break; 1128 default: 1129 return EOPNOTSUPP; 1130 } 1131 return error; 1132 } 1133 1134 static moduledata_t linux_elf_mod = { 1135 "linuxelf", 1136 linux_elf_modevent, 1137 0 1138 }; 1139 1140 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY); 1141