1 /*- 2 * Copyright (c) 1994-1996 S�ren Schmidt 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer 10 * in this position and unchanged. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software without specific prior written permission 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/exec.h> 35 #include <sys/imgact.h> 36 #include <sys/imgact_aout.h> 37 #include <sys/imgact_elf.h> 38 #include <sys/kernel.h> 39 #include <sys/lock.h> 40 #include <sys/malloc.h> 41 #include <sys/module.h> 42 #include <sys/mutex.h> 43 #include <sys/proc.h> 44 #include <sys/signalvar.h> 45 #include <sys/syscallsubr.h> 46 #include <sys/sysent.h> 47 #include <sys/sysproto.h> 48 #include <sys/vnode.h> 49 #include <sys/eventhandler.h> 50 51 #include <vm/vm.h> 52 #include <vm/pmap.h> 53 #include <vm/vm_extern.h> 54 #include <vm/vm_map.h> 55 #include <vm/vm_object.h> 56 #include <vm/vm_page.h> 57 #include <vm/vm_param.h> 58 59 #include <machine/cpu.h> 60 #include <machine/md_var.h> 61 #include <machine/pcb.h> 62 63 #include <i386/linux/linux.h> 64 #include <i386/linux/linux_proto.h> 65 #include <compat/linux/linux_emul.h> 66 #include <compat/linux/linux_mib.h> 67 #include <compat/linux/linux_signal.h> 68 #include <compat/linux/linux_util.h> 69 70 MODULE_VERSION(linux, 1); 71 72 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures"); 73 74 #if BYTE_ORDER == LITTLE_ENDIAN 75 #define SHELLMAGIC 0x2123 /* #! */ 76 #else 77 #define SHELLMAGIC 0x2321 78 #endif 79 80 /* 81 * Allow the sendsig functions to use the ldebug() facility 82 * even though they are not syscalls themselves. Map them 83 * to syscall 0. This is slightly less bogus than using 84 * ldebug(sigreturn). 85 */ 86 #define LINUX_SYS_linux_rt_sendsig 0 87 #define LINUX_SYS_linux_sendsig 0 88 89 #define fldcw(addr) __asm("fldcw %0" : : "m" (*(addr))) 90 #define __LINUX_NPXCW__ 0x37f 91 92 extern char linux_sigcode[]; 93 extern int linux_szsigcode; 94 95 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL]; 96 97 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler); 98 SET_DECLARE(linux_device_handler_set, struct linux_device_handler); 99 100 static int linux_fixup(register_t **stack_base, 101 struct image_params *iparams); 102 static int elf_linux_fixup(register_t **stack_base, 103 struct image_params *iparams); 104 static void linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, 105 caddr_t *params); 106 static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask); 107 static void exec_linux_setregs(struct thread *td, u_long entry, 108 u_long stack, u_long ps_strings); 109 110 extern LIST_HEAD(futex_list, futex) futex_list; 111 extern struct sx futex_sx; 112 113 static eventhandler_tag linux_exit_tag; 114 static eventhandler_tag linux_schedtail_tag; 115 static eventhandler_tag linux_exec_tag; 116 117 /* 118 * Linux syscalls return negative errno's, we do positive and map them 119 * Reference: 120 * FreeBSD: src/sys/sys/errno.h 121 * Linux: linux-2.6.17.8/include/asm-generic/errno-base.h 122 * linux-2.6.17.8/include/asm-generic/errno.h 123 */ 124 static int bsd_to_linux_errno[ELAST + 1] = { 125 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9, 126 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19, 127 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, 128 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89, 129 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99, 130 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109, 131 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122, 132 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9, 133 -6, -6, -43, -42, -75,-125, -84, -95, -16, -74, 134 -72, -67, -71 135 }; 136 137 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = { 138 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL, 139 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE, 140 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS, 141 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG, 142 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD, 143 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU, 144 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH, 145 0, LINUX_SIGUSR1, LINUX_SIGUSR2 146 }; 147 148 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = { 149 SIGHUP, SIGINT, SIGQUIT, SIGILL, 150 SIGTRAP, SIGABRT, SIGBUS, SIGFPE, 151 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2, 152 SIGPIPE, SIGALRM, SIGTERM, SIGBUS, 153 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP, 154 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU, 155 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH, 156 SIGIO, SIGURG, SIGSYS 157 }; 158 159 #define LINUX_T_UNKNOWN 255 160 static int _bsd_to_linux_trapcode[] = { 161 LINUX_T_UNKNOWN, /* 0 */ 162 6, /* 1 T_PRIVINFLT */ 163 LINUX_T_UNKNOWN, /* 2 */ 164 3, /* 3 T_BPTFLT */ 165 LINUX_T_UNKNOWN, /* 4 */ 166 LINUX_T_UNKNOWN, /* 5 */ 167 16, /* 6 T_ARITHTRAP */ 168 254, /* 7 T_ASTFLT */ 169 LINUX_T_UNKNOWN, /* 8 */ 170 13, /* 9 T_PROTFLT */ 171 1, /* 10 T_TRCTRAP */ 172 LINUX_T_UNKNOWN, /* 11 */ 173 14, /* 12 T_PAGEFLT */ 174 LINUX_T_UNKNOWN, /* 13 */ 175 17, /* 14 T_ALIGNFLT */ 176 LINUX_T_UNKNOWN, /* 15 */ 177 LINUX_T_UNKNOWN, /* 16 */ 178 LINUX_T_UNKNOWN, /* 17 */ 179 0, /* 18 T_DIVIDE */ 180 2, /* 19 T_NMI */ 181 4, /* 20 T_OFLOW */ 182 5, /* 21 T_BOUND */ 183 7, /* 22 T_DNA */ 184 8, /* 23 T_DOUBLEFLT */ 185 9, /* 24 T_FPOPFLT */ 186 10, /* 25 T_TSSFLT */ 187 11, /* 26 T_SEGNPFLT */ 188 12, /* 27 T_STKFLT */ 189 18, /* 28 T_MCHK */ 190 19, /* 29 T_XMMFLT */ 191 15 /* 30 T_RESERVED */ 192 }; 193 #define bsd_to_linux_trapcode(code) \ 194 ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \ 195 _bsd_to_linux_trapcode[(code)]: \ 196 LINUX_T_UNKNOWN) 197 198 /* 199 * If FreeBSD & Linux have a difference of opinion about what a trap 200 * means, deal with it here. 201 * 202 * MPSAFE 203 */ 204 static int 205 translate_traps(int signal, int trap_code) 206 { 207 if (signal != SIGBUS) 208 return signal; 209 switch (trap_code) { 210 case T_PROTFLT: 211 case T_TSSFLT: 212 case T_DOUBLEFLT: 213 case T_PAGEFLT: 214 return SIGSEGV; 215 default: 216 return signal; 217 } 218 } 219 220 static int 221 linux_fixup(register_t **stack_base, struct image_params *imgp) 222 { 223 register_t *argv, *envp; 224 225 argv = *stack_base; 226 envp = *stack_base + (imgp->args->argc + 1); 227 (*stack_base)--; 228 **stack_base = (intptr_t)(void *)envp; 229 (*stack_base)--; 230 **stack_base = (intptr_t)(void *)argv; 231 (*stack_base)--; 232 **stack_base = imgp->args->argc; 233 return 0; 234 } 235 236 static int 237 elf_linux_fixup(register_t **stack_base, struct image_params *imgp) 238 { 239 Elf32_Auxargs *args; 240 register_t *pos; 241 242 KASSERT(curthread->td_proc == imgp->proc, 243 ("unsafe elf_linux_fixup(), should be curproc")); 244 args = (Elf32_Auxargs *)imgp->auxargs; 245 pos = *stack_base + (imgp->args->argc + imgp->args->envc + 2); 246 247 if (args->trace) 248 AUXARGS_ENTRY(pos, AT_DEBUG, 1); 249 if (args->execfd != -1) 250 AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd); 251 AUXARGS_ENTRY(pos, AT_PHDR, args->phdr); 252 AUXARGS_ENTRY(pos, AT_PHENT, args->phent); 253 AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum); 254 AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz); 255 AUXARGS_ENTRY(pos, AT_FLAGS, args->flags); 256 AUXARGS_ENTRY(pos, AT_ENTRY, args->entry); 257 AUXARGS_ENTRY(pos, AT_BASE, args->base); 258 AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid); 259 AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid); 260 AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid); 261 AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid); 262 AUXARGS_ENTRY(pos, AT_NULL, 0); 263 264 free(imgp->auxargs, M_TEMP); 265 imgp->auxargs = NULL; 266 267 (*stack_base)--; 268 **stack_base = (register_t)imgp->args->argc; 269 return 0; 270 } 271 272 extern int _ucodesel, _udatasel; 273 extern unsigned long linux_sznonrtsigcode; 274 275 static void 276 linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) 277 { 278 struct thread *td = curthread; 279 struct proc *p = td->td_proc; 280 struct sigacts *psp; 281 struct trapframe *regs; 282 struct l_rt_sigframe *fp, frame; 283 int sig, code; 284 int oonstack; 285 286 sig = ksi->ksi_signo; 287 code = ksi->ksi_code; 288 PROC_LOCK_ASSERT(p, MA_OWNED); 289 psp = p->p_sigacts; 290 mtx_assert(&psp->ps_mtx, MA_OWNED); 291 regs = td->td_frame; 292 oonstack = sigonstack(regs->tf_esp); 293 294 #ifdef DEBUG 295 if (ldebug(rt_sendsig)) 296 printf(ARGS(rt_sendsig, "%p, %d, %p, %u"), 297 catcher, sig, (void*)mask, code); 298 #endif 299 /* 300 * Allocate space for the signal handler context. 301 */ 302 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && 303 SIGISMEMBER(psp->ps_sigonstack, sig)) { 304 fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp + 305 td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe)); 306 } else 307 fp = (struct l_rt_sigframe *)regs->tf_esp - 1; 308 mtx_unlock(&psp->ps_mtx); 309 310 /* 311 * Build the argument list for the signal handler. 312 */ 313 if (p->p_sysent->sv_sigtbl) 314 if (sig <= p->p_sysent->sv_sigsize) 315 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 316 317 bzero(&frame, sizeof(frame)); 318 319 frame.sf_handler = catcher; 320 frame.sf_sig = sig; 321 frame.sf_siginfo = &fp->sf_si; 322 frame.sf_ucontext = &fp->sf_sc; 323 324 /* Fill in POSIX parts */ 325 frame.sf_si.lsi_signo = sig; 326 frame.sf_si.lsi_code = code; 327 frame.sf_si.lsi_addr = ksi->ksi_addr; 328 329 /* 330 * Build the signal context to be used by sigreturn. 331 */ 332 frame.sf_sc.uc_flags = 0; /* XXX ??? */ 333 frame.sf_sc.uc_link = NULL; /* XXX ??? */ 334 335 frame.sf_sc.uc_stack.ss_sp = td->td_sigstk.ss_sp; 336 frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size; 337 frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) 338 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE; 339 PROC_UNLOCK(p); 340 341 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask); 342 343 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0]; 344 frame.sf_sc.uc_mcontext.sc_gs = rgs(); 345 frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs; 346 frame.sf_sc.uc_mcontext.sc_es = regs->tf_es; 347 frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds; 348 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_edi; 349 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_esi; 350 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_ebp; 351 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_ebx; 352 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_edx; 353 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_ecx; 354 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_eax; 355 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_eip; 356 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs; 357 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags; 358 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp; 359 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss; 360 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err; 361 frame.sf_sc.uc_mcontext.sc_cr2 = (register_t)ksi->ksi_addr; 362 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code); 363 364 #ifdef DEBUG 365 if (ldebug(rt_sendsig)) 366 printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"), 367 frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp, 368 td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask); 369 #endif 370 371 if (copyout(&frame, fp, sizeof(frame)) != 0) { 372 /* 373 * Process has trashed its stack; give it an illegal 374 * instruction to halt it in its tracks. 375 */ 376 #ifdef DEBUG 377 if (ldebug(rt_sendsig)) 378 printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"), 379 fp, oonstack); 380 #endif 381 PROC_LOCK(p); 382 sigexit(td, SIGILL); 383 } 384 385 /* 386 * Build context to run handler in. 387 */ 388 regs->tf_esp = (int)fp; 389 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) + 390 linux_sznonrtsigcode; 391 regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D); 392 regs->tf_cs = _ucodesel; 393 regs->tf_ds = _udatasel; 394 regs->tf_es = _udatasel; 395 regs->tf_fs = _udatasel; 396 regs->tf_ss = _udatasel; 397 PROC_LOCK(p); 398 mtx_lock(&psp->ps_mtx); 399 } 400 401 402 /* 403 * Send an interrupt to process. 404 * 405 * Stack is set up to allow sigcode stored 406 * in u. to call routine, followed by kcall 407 * to sigreturn routine below. After sigreturn 408 * resets the signal mask, the stack, and the 409 * frame pointer, it returns to the user 410 * specified pc, psl. 411 */ 412 static void 413 linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) 414 { 415 struct thread *td = curthread; 416 struct proc *p = td->td_proc; 417 struct sigacts *psp; 418 struct trapframe *regs; 419 struct l_sigframe *fp, frame; 420 l_sigset_t lmask; 421 int sig, code; 422 int oonstack, i; 423 424 PROC_LOCK_ASSERT(p, MA_OWNED); 425 psp = p->p_sigacts; 426 sig = ksi->ksi_signo; 427 code = ksi->ksi_code; 428 mtx_assert(&psp->ps_mtx, MA_OWNED); 429 if (SIGISMEMBER(psp->ps_siginfo, sig)) { 430 /* Signal handler installed with SA_SIGINFO. */ 431 linux_rt_sendsig(catcher, ksi, mask); 432 return; 433 } 434 regs = td->td_frame; 435 oonstack = sigonstack(regs->tf_esp); 436 437 #ifdef DEBUG 438 if (ldebug(sendsig)) 439 printf(ARGS(sendsig, "%p, %d, %p, %u"), 440 catcher, sig, (void*)mask, code); 441 #endif 442 443 /* 444 * Allocate space for the signal handler context. 445 */ 446 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && 447 SIGISMEMBER(psp->ps_sigonstack, sig)) { 448 fp = (struct l_sigframe *)(td->td_sigstk.ss_sp + 449 td->td_sigstk.ss_size - sizeof(struct l_sigframe)); 450 } else 451 fp = (struct l_sigframe *)regs->tf_esp - 1; 452 mtx_unlock(&psp->ps_mtx); 453 PROC_UNLOCK(p); 454 455 /* 456 * Build the argument list for the signal handler. 457 */ 458 if (p->p_sysent->sv_sigtbl) 459 if (sig <= p->p_sysent->sv_sigsize) 460 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 461 462 bzero(&frame, sizeof(frame)); 463 464 frame.sf_handler = catcher; 465 frame.sf_sig = sig; 466 467 bsd_to_linux_sigset(mask, &lmask); 468 469 /* 470 * Build the signal context to be used by sigreturn. 471 */ 472 frame.sf_sc.sc_mask = lmask.__bits[0]; 473 frame.sf_sc.sc_gs = rgs(); 474 frame.sf_sc.sc_fs = regs->tf_fs; 475 frame.sf_sc.sc_es = regs->tf_es; 476 frame.sf_sc.sc_ds = regs->tf_ds; 477 frame.sf_sc.sc_edi = regs->tf_edi; 478 frame.sf_sc.sc_esi = regs->tf_esi; 479 frame.sf_sc.sc_ebp = regs->tf_ebp; 480 frame.sf_sc.sc_ebx = regs->tf_ebx; 481 frame.sf_sc.sc_edx = regs->tf_edx; 482 frame.sf_sc.sc_ecx = regs->tf_ecx; 483 frame.sf_sc.sc_eax = regs->tf_eax; 484 frame.sf_sc.sc_eip = regs->tf_eip; 485 frame.sf_sc.sc_cs = regs->tf_cs; 486 frame.sf_sc.sc_eflags = regs->tf_eflags; 487 frame.sf_sc.sc_esp_at_signal = regs->tf_esp; 488 frame.sf_sc.sc_ss = regs->tf_ss; 489 frame.sf_sc.sc_err = regs->tf_err; 490 frame.sf_sc.sc_cr2 = (register_t)ksi->ksi_addr; 491 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(ksi->ksi_trapno); 492 493 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++) 494 frame.sf_extramask[i] = lmask.__bits[i+1]; 495 496 if (copyout(&frame, fp, sizeof(frame)) != 0) { 497 /* 498 * Process has trashed its stack; give it an illegal 499 * instruction to halt it in its tracks. 500 */ 501 PROC_LOCK(p); 502 sigexit(td, SIGILL); 503 } 504 505 /* 506 * Build context to run handler in. 507 */ 508 regs->tf_esp = (int)fp; 509 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode); 510 regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D); 511 regs->tf_cs = _ucodesel; 512 regs->tf_ds = _udatasel; 513 regs->tf_es = _udatasel; 514 regs->tf_fs = _udatasel; 515 regs->tf_ss = _udatasel; 516 PROC_LOCK(p); 517 mtx_lock(&psp->ps_mtx); 518 } 519 520 /* 521 * System call to cleanup state after a signal 522 * has been taken. Reset signal mask and 523 * stack state from context left by sendsig (above). 524 * Return to previous pc and psl as specified by 525 * context left by sendsig. Check carefully to 526 * make sure that the user has not modified the 527 * psl to gain improper privileges or to cause 528 * a machine fault. 529 */ 530 int 531 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args) 532 { 533 struct proc *p = td->td_proc; 534 struct l_sigframe frame; 535 struct trapframe *regs; 536 l_sigset_t lmask; 537 int eflags, i; 538 ksiginfo_t ksi; 539 540 regs = td->td_frame; 541 542 #ifdef DEBUG 543 if (ldebug(sigreturn)) 544 printf(ARGS(sigreturn, "%p"), (void *)args->sfp); 545 #endif 546 /* 547 * The trampoline code hands us the sigframe. 548 * It is unsafe to keep track of it ourselves, in the event that a 549 * program jumps out of a signal handler. 550 */ 551 if (copyin(args->sfp, &frame, sizeof(frame)) != 0) 552 return (EFAULT); 553 554 /* 555 * Check for security violations. 556 */ 557 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 558 eflags = frame.sf_sc.sc_eflags; 559 /* 560 * XXX do allow users to change the privileged flag PSL_RF. The 561 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should 562 * sometimes set it there too. tf_eflags is kept in the signal 563 * context during signal handling and there is no other place 564 * to remember it, so the PSL_RF bit may be corrupted by the 565 * signal handler without us knowing. Corruption of the PSL_RF 566 * bit at worst causes one more or one less debugger trap, so 567 * allowing it is fairly harmless. 568 */ 569 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) 570 return(EINVAL); 571 572 /* 573 * Don't allow users to load a valid privileged %cs. Let the 574 * hardware check for invalid selectors, excess privilege in 575 * other selectors, invalid %eip's and invalid %esp's. 576 */ 577 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 578 if (!CS_SECURE(frame.sf_sc.sc_cs)) { 579 ksiginfo_init_trap(&ksi); 580 ksi.ksi_signo = SIGBUS; 581 ksi.ksi_code = BUS_OBJERR; 582 ksi.ksi_trapno = T_PROTFLT; 583 ksi.ksi_addr = (void *)regs->tf_eip; 584 trapsignal(td, &ksi); 585 return(EINVAL); 586 } 587 588 lmask.__bits[0] = frame.sf_sc.sc_mask; 589 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++) 590 lmask.__bits[i+1] = frame.sf_extramask[i]; 591 PROC_LOCK(p); 592 linux_to_bsd_sigset(&lmask, &td->td_sigmask); 593 SIG_CANTMASK(td->td_sigmask); 594 signotify(td); 595 PROC_UNLOCK(p); 596 597 /* 598 * Restore signal context. 599 */ 600 /* %gs was restored by the trampoline. */ 601 regs->tf_fs = frame.sf_sc.sc_fs; 602 regs->tf_es = frame.sf_sc.sc_es; 603 regs->tf_ds = frame.sf_sc.sc_ds; 604 regs->tf_edi = frame.sf_sc.sc_edi; 605 regs->tf_esi = frame.sf_sc.sc_esi; 606 regs->tf_ebp = frame.sf_sc.sc_ebp; 607 regs->tf_ebx = frame.sf_sc.sc_ebx; 608 regs->tf_edx = frame.sf_sc.sc_edx; 609 regs->tf_ecx = frame.sf_sc.sc_ecx; 610 regs->tf_eax = frame.sf_sc.sc_eax; 611 regs->tf_eip = frame.sf_sc.sc_eip; 612 regs->tf_cs = frame.sf_sc.sc_cs; 613 regs->tf_eflags = eflags; 614 regs->tf_esp = frame.sf_sc.sc_esp_at_signal; 615 regs->tf_ss = frame.sf_sc.sc_ss; 616 617 return (EJUSTRETURN); 618 } 619 620 /* 621 * System call to cleanup state after a signal 622 * has been taken. Reset signal mask and 623 * stack state from context left by rt_sendsig (above). 624 * Return to previous pc and psl as specified by 625 * context left by sendsig. Check carefully to 626 * make sure that the user has not modified the 627 * psl to gain improper privileges or to cause 628 * a machine fault. 629 */ 630 int 631 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args) 632 { 633 struct proc *p = td->td_proc; 634 struct l_ucontext uc; 635 struct l_sigcontext *context; 636 l_stack_t *lss; 637 stack_t ss; 638 struct trapframe *regs; 639 int eflags; 640 ksiginfo_t ksi; 641 642 regs = td->td_frame; 643 644 #ifdef DEBUG 645 if (ldebug(rt_sigreturn)) 646 printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp); 647 #endif 648 /* 649 * The trampoline code hands us the ucontext. 650 * It is unsafe to keep track of it ourselves, in the event that a 651 * program jumps out of a signal handler. 652 */ 653 if (copyin(args->ucp, &uc, sizeof(uc)) != 0) 654 return (EFAULT); 655 656 context = &uc.uc_mcontext; 657 658 /* 659 * Check for security violations. 660 */ 661 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 662 eflags = context->sc_eflags; 663 /* 664 * XXX do allow users to change the privileged flag PSL_RF. The 665 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should 666 * sometimes set it there too. tf_eflags is kept in the signal 667 * context during signal handling and there is no other place 668 * to remember it, so the PSL_RF bit may be corrupted by the 669 * signal handler without us knowing. Corruption of the PSL_RF 670 * bit at worst causes one more or one less debugger trap, so 671 * allowing it is fairly harmless. 672 */ 673 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) 674 return(EINVAL); 675 676 /* 677 * Don't allow users to load a valid privileged %cs. Let the 678 * hardware check for invalid selectors, excess privilege in 679 * other selectors, invalid %eip's and invalid %esp's. 680 */ 681 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 682 if (!CS_SECURE(context->sc_cs)) { 683 ksiginfo_init_trap(&ksi); 684 ksi.ksi_signo = SIGBUS; 685 ksi.ksi_code = BUS_OBJERR; 686 ksi.ksi_trapno = T_PROTFLT; 687 ksi.ksi_addr = (void *)regs->tf_eip; 688 trapsignal(td, &ksi); 689 return(EINVAL); 690 } 691 692 PROC_LOCK(p); 693 linux_to_bsd_sigset(&uc.uc_sigmask, &td->td_sigmask); 694 SIG_CANTMASK(td->td_sigmask); 695 signotify(td); 696 PROC_UNLOCK(p); 697 698 /* 699 * Restore signal context 700 */ 701 /* %gs was restored by the trampoline. */ 702 regs->tf_fs = context->sc_fs; 703 regs->tf_es = context->sc_es; 704 regs->tf_ds = context->sc_ds; 705 regs->tf_edi = context->sc_edi; 706 regs->tf_esi = context->sc_esi; 707 regs->tf_ebp = context->sc_ebp; 708 regs->tf_ebx = context->sc_ebx; 709 regs->tf_edx = context->sc_edx; 710 regs->tf_ecx = context->sc_ecx; 711 regs->tf_eax = context->sc_eax; 712 regs->tf_eip = context->sc_eip; 713 regs->tf_cs = context->sc_cs; 714 regs->tf_eflags = eflags; 715 regs->tf_esp = context->sc_esp_at_signal; 716 regs->tf_ss = context->sc_ss; 717 718 /* 719 * call sigaltstack & ignore results.. 720 */ 721 lss = &uc.uc_stack; 722 ss.ss_sp = lss->ss_sp; 723 ss.ss_size = lss->ss_size; 724 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags); 725 726 #ifdef DEBUG 727 if (ldebug(rt_sigreturn)) 728 printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"), 729 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask); 730 #endif 731 (void)kern_sigaltstack(td, &ss, NULL); 732 733 return (EJUSTRETURN); 734 } 735 736 /* 737 * MPSAFE 738 */ 739 static void 740 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params) 741 { 742 args[0] = tf->tf_ebx; 743 args[1] = tf->tf_ecx; 744 args[2] = tf->tf_edx; 745 args[3] = tf->tf_esi; 746 args[4] = tf->tf_edi; 747 args[5] = tf->tf_ebp; /* Unconfirmed */ 748 *params = NULL; /* no copyin */ 749 } 750 751 /* 752 * If a linux binary is exec'ing something, try this image activator 753 * first. We override standard shell script execution in order to 754 * be able to modify the interpreter path. We only do this if a linux 755 * binary is doing the exec, so we do not create an EXEC module for it. 756 */ 757 static int exec_linux_imgact_try(struct image_params *iparams); 758 759 static int 760 exec_linux_imgact_try(struct image_params *imgp) 761 { 762 const char *head = (const char *)imgp->image_header; 763 char *rpath; 764 int error = -1, len; 765 766 /* 767 * The interpreter for shell scripts run from a linux binary needs 768 * to be located in /compat/linux if possible in order to recursively 769 * maintain linux path emulation. 770 */ 771 if (((const short *)head)[0] == SHELLMAGIC) { 772 /* 773 * Run our normal shell image activator. If it succeeds attempt 774 * to use the alternate path for the interpreter. If an alternate 775 * path is found, use our stringspace to store it. 776 */ 777 if ((error = exec_shell_imgact(imgp)) == 0) { 778 linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc), 779 imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0); 780 if (rpath != NULL) { 781 len = strlen(rpath) + 1; 782 783 if (len <= MAXSHELLCMDLEN) { 784 memcpy(imgp->interpreter_name, rpath, len); 785 } 786 free(rpath, M_TEMP); 787 } 788 } 789 } 790 return(error); 791 } 792 793 /* 794 * exec_setregs may initialize some registers differently than Linux 795 * does, thus potentially confusing Linux binaries. If necessary, we 796 * override the exec_setregs default(s) here. 797 */ 798 static void 799 exec_linux_setregs(struct thread *td, u_long entry, 800 u_long stack, u_long ps_strings) 801 { 802 static const u_short control = __LINUX_NPXCW__; 803 struct pcb *pcb = td->td_pcb; 804 805 exec_setregs(td, entry, stack, ps_strings); 806 807 /* Linux sets %gs to 0, we default to _udatasel */ 808 pcb->pcb_gs = 0; load_gs(0); 809 810 /* Linux sets the i387 to extended precision. */ 811 fldcw(&control); 812 } 813 814 struct sysentvec linux_sysvec = { 815 LINUX_SYS_MAXSYSCALL, 816 linux_sysent, 817 0, 818 LINUX_SIGTBLSZ, 819 bsd_to_linux_signal, 820 ELAST + 1, 821 bsd_to_linux_errno, 822 translate_traps, 823 linux_fixup, 824 linux_sendsig, 825 linux_sigcode, 826 &linux_szsigcode, 827 linux_prepsyscall, 828 "Linux a.out", 829 NULL, 830 exec_linux_imgact_try, 831 LINUX_MINSIGSTKSZ, 832 PAGE_SIZE, 833 VM_MIN_ADDRESS, 834 VM_MAXUSER_ADDRESS, 835 USRSTACK, 836 PS_STRINGS, 837 VM_PROT_ALL, 838 exec_copyout_strings, 839 exec_linux_setregs, 840 NULL 841 }; 842 843 struct sysentvec elf_linux_sysvec = { 844 LINUX_SYS_MAXSYSCALL, 845 linux_sysent, 846 0, 847 LINUX_SIGTBLSZ, 848 bsd_to_linux_signal, 849 ELAST + 1, 850 bsd_to_linux_errno, 851 translate_traps, 852 elf_linux_fixup, 853 linux_sendsig, 854 linux_sigcode, 855 &linux_szsigcode, 856 linux_prepsyscall, 857 "Linux ELF", 858 elf32_coredump, 859 exec_linux_imgact_try, 860 LINUX_MINSIGSTKSZ, 861 PAGE_SIZE, 862 VM_MIN_ADDRESS, 863 VM_MAXUSER_ADDRESS, 864 USRSTACK, 865 PS_STRINGS, 866 VM_PROT_ALL, 867 exec_copyout_strings, 868 exec_linux_setregs, 869 NULL 870 }; 871 872 static Elf32_Brandinfo linux_brand = { 873 ELFOSABI_LINUX, 874 EM_386, 875 "Linux", 876 "/compat/linux", 877 "/lib/ld-linux.so.1", 878 &elf_linux_sysvec, 879 NULL, 880 BI_CAN_EXEC_DYN, 881 }; 882 883 static Elf32_Brandinfo linux_glibc2brand = { 884 ELFOSABI_LINUX, 885 EM_386, 886 "Linux", 887 "/compat/linux", 888 "/lib/ld-linux.so.2", 889 &elf_linux_sysvec, 890 NULL, 891 BI_CAN_EXEC_DYN, 892 }; 893 894 Elf32_Brandinfo *linux_brandlist[] = { 895 &linux_brand, 896 &linux_glibc2brand, 897 NULL 898 }; 899 900 static int 901 linux_elf_modevent(module_t mod, int type, void *data) 902 { 903 Elf32_Brandinfo **brandinfo; 904 int error; 905 struct linux_ioctl_handler **lihp; 906 struct linux_device_handler **ldhp; 907 908 error = 0; 909 910 switch(type) { 911 case MOD_LOAD: 912 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 913 ++brandinfo) 914 if (elf32_insert_brand_entry(*brandinfo) < 0) 915 error = EINVAL; 916 if (error == 0) { 917 SET_FOREACH(lihp, linux_ioctl_handler_set) 918 linux_ioctl_register_handler(*lihp); 919 SET_FOREACH(ldhp, linux_device_handler_set) 920 linux_device_register_handler(*ldhp); 921 mtx_init(&emul_lock, "emuldata lock", NULL, MTX_DEF); 922 sx_init(&emul_shared_lock, "emuldata->shared lock"); 923 LIST_INIT(&futex_list); 924 sx_init(&futex_sx, "futex protection lock"); 925 linux_exit_tag = EVENTHANDLER_REGISTER(process_exit, linux_proc_exit, 926 NULL, 1000); 927 linux_schedtail_tag = EVENTHANDLER_REGISTER(schedtail, linux_schedtail, 928 NULL, 1000); 929 linux_exec_tag = EVENTHANDLER_REGISTER(process_exec, linux_proc_exec, 930 NULL, 1000); 931 if (bootverbose) 932 printf("Linux ELF exec handler installed\n"); 933 } else 934 printf("cannot insert Linux ELF brand handler\n"); 935 break; 936 case MOD_UNLOAD: 937 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 938 ++brandinfo) 939 if (elf32_brand_inuse(*brandinfo)) 940 error = EBUSY; 941 if (error == 0) { 942 for (brandinfo = &linux_brandlist[0]; 943 *brandinfo != NULL; ++brandinfo) 944 if (elf32_remove_brand_entry(*brandinfo) < 0) 945 error = EINVAL; 946 } 947 if (error == 0) { 948 SET_FOREACH(lihp, linux_ioctl_handler_set) 949 linux_ioctl_unregister_handler(*lihp); 950 SET_FOREACH(ldhp, linux_device_handler_set) 951 linux_device_unregister_handler(*ldhp); 952 mtx_destroy(&emul_lock); 953 sx_destroy(&emul_shared_lock); 954 sx_destroy(&futex_sx); 955 EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag); 956 EVENTHANDLER_DEREGISTER(schedtail, linux_schedtail_tag); 957 EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag); 958 if (bootverbose) 959 printf("Linux ELF exec handler removed\n"); 960 } else 961 printf("Could not deinstall ELF interpreter entry\n"); 962 break; 963 default: 964 return EOPNOTSUPP; 965 } 966 return error; 967 } 968 969 static moduledata_t linux_elf_mod = { 970 "linuxelf", 971 linux_elf_modevent, 972 0 973 }; 974 975 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY); 976