1 /*- 2 * Copyright (c) 1994-1996 S�ren Schmidt 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer 10 * in this position and unchanged. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software without specific prior written permission 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/exec.h> 35 #include <sys/imgact.h> 36 #include <sys/imgact_aout.h> 37 #include <sys/imgact_elf.h> 38 #include <sys/kernel.h> 39 #include <sys/lock.h> 40 #include <sys/malloc.h> 41 #include <sys/module.h> 42 #include <sys/mutex.h> 43 #include <sys/proc.h> 44 #include <sys/signalvar.h> 45 #include <sys/syscallsubr.h> 46 #include <sys/sysent.h> 47 #include <sys/sysproto.h> 48 #include <sys/vnode.h> 49 #include <sys/eventhandler.h> 50 51 #include <vm/vm.h> 52 #include <vm/pmap.h> 53 #include <vm/vm_extern.h> 54 #include <vm/vm_map.h> 55 #include <vm/vm_object.h> 56 #include <vm/vm_page.h> 57 #include <vm/vm_param.h> 58 59 #include <machine/cpu.h> 60 #include <machine/md_var.h> 61 #include <machine/pcb.h> 62 63 #include <i386/linux/linux.h> 64 #include <i386/linux/linux_proto.h> 65 #include <compat/linux/linux_emul.h> 66 #include <compat/linux/linux_mib.h> 67 #include <compat/linux/linux_signal.h> 68 #include <compat/linux/linux_util.h> 69 70 MODULE_VERSION(linux, 1); 71 72 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures"); 73 74 #if BYTE_ORDER == LITTLE_ENDIAN 75 #define SHELLMAGIC 0x2123 /* #! */ 76 #else 77 #define SHELLMAGIC 0x2321 78 #endif 79 80 /* 81 * Allow the sendsig functions to use the ldebug() facility 82 * even though they are not syscalls themselves. Map them 83 * to syscall 0. This is slightly less bogus than using 84 * ldebug(sigreturn). 85 */ 86 #define LINUX_SYS_linux_rt_sendsig 0 87 #define LINUX_SYS_linux_sendsig 0 88 89 #define fldcw(addr) __asm("fldcw %0" : : "m" (*(addr))) 90 #define __LINUX_NPXCW__ 0x37f 91 92 extern char linux_sigcode[]; 93 extern int linux_szsigcode; 94 95 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL]; 96 97 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler); 98 SET_DECLARE(linux_device_handler_set, struct linux_device_handler); 99 100 static int linux_fixup(register_t **stack_base, 101 struct image_params *iparams); 102 static int elf_linux_fixup(register_t **stack_base, 103 struct image_params *iparams); 104 static void linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, 105 caddr_t *params); 106 static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask); 107 static void exec_linux_setregs(struct thread *td, u_long entry, 108 u_long stack, u_long ps_strings); 109 110 extern LIST_HEAD(futex_list, futex) futex_list; 111 extern struct sx futex_sx; 112 113 static eventhandler_tag linux_exit_tag; 114 static eventhandler_tag linux_schedtail_tag; 115 static eventhandler_tag linux_exec_tag; 116 117 /* 118 * Linux syscalls return negative errno's, we do positive and map them 119 * Reference: 120 * FreeBSD: src/sys/sys/errno.h 121 * Linux: linux-2.6.17.8/include/asm-generic/errno-base.h 122 * linux-2.6.17.8/include/asm-generic/errno.h 123 */ 124 static int bsd_to_linux_errno[ELAST + 1] = { 125 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9, 126 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19, 127 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, 128 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89, 129 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99, 130 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109, 131 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122, 132 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9, 133 -6, -6, -43, -42, -75,-125, -84, -95, -16, -74, 134 -72, -67, -71 135 }; 136 137 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = { 138 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL, 139 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE, 140 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS, 141 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG, 142 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD, 143 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU, 144 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH, 145 0, LINUX_SIGUSR1, LINUX_SIGUSR2 146 }; 147 148 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = { 149 SIGHUP, SIGINT, SIGQUIT, SIGILL, 150 SIGTRAP, SIGABRT, SIGBUS, SIGFPE, 151 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2, 152 SIGPIPE, SIGALRM, SIGTERM, SIGBUS, 153 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP, 154 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU, 155 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH, 156 SIGIO, SIGURG, SIGSYS 157 }; 158 159 #define LINUX_T_UNKNOWN 255 160 static int _bsd_to_linux_trapcode[] = { 161 LINUX_T_UNKNOWN, /* 0 */ 162 6, /* 1 T_PRIVINFLT */ 163 LINUX_T_UNKNOWN, /* 2 */ 164 3, /* 3 T_BPTFLT */ 165 LINUX_T_UNKNOWN, /* 4 */ 166 LINUX_T_UNKNOWN, /* 5 */ 167 16, /* 6 T_ARITHTRAP */ 168 254, /* 7 T_ASTFLT */ 169 LINUX_T_UNKNOWN, /* 8 */ 170 13, /* 9 T_PROTFLT */ 171 1, /* 10 T_TRCTRAP */ 172 LINUX_T_UNKNOWN, /* 11 */ 173 14, /* 12 T_PAGEFLT */ 174 LINUX_T_UNKNOWN, /* 13 */ 175 17, /* 14 T_ALIGNFLT */ 176 LINUX_T_UNKNOWN, /* 15 */ 177 LINUX_T_UNKNOWN, /* 16 */ 178 LINUX_T_UNKNOWN, /* 17 */ 179 0, /* 18 T_DIVIDE */ 180 2, /* 19 T_NMI */ 181 4, /* 20 T_OFLOW */ 182 5, /* 21 T_BOUND */ 183 7, /* 22 T_DNA */ 184 8, /* 23 T_DOUBLEFLT */ 185 9, /* 24 T_FPOPFLT */ 186 10, /* 25 T_TSSFLT */ 187 11, /* 26 T_SEGNPFLT */ 188 12, /* 27 T_STKFLT */ 189 18, /* 28 T_MCHK */ 190 19, /* 29 T_XMMFLT */ 191 15 /* 30 T_RESERVED */ 192 }; 193 #define bsd_to_linux_trapcode(code) \ 194 ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \ 195 _bsd_to_linux_trapcode[(code)]: \ 196 LINUX_T_UNKNOWN) 197 198 /* 199 * If FreeBSD & Linux have a difference of opinion about what a trap 200 * means, deal with it here. 201 * 202 * MPSAFE 203 */ 204 static int 205 translate_traps(int signal, int trap_code) 206 { 207 if (signal != SIGBUS) 208 return signal; 209 switch (trap_code) { 210 case T_PROTFLT: 211 case T_TSSFLT: 212 case T_DOUBLEFLT: 213 case T_PAGEFLT: 214 return SIGSEGV; 215 default: 216 return signal; 217 } 218 } 219 220 static int 221 linux_fixup(register_t **stack_base, struct image_params *imgp) 222 { 223 register_t *argv, *envp; 224 225 argv = *stack_base; 226 envp = *stack_base + (imgp->args->argc + 1); 227 (*stack_base)--; 228 **stack_base = (intptr_t)(void *)envp; 229 (*stack_base)--; 230 **stack_base = (intptr_t)(void *)argv; 231 (*stack_base)--; 232 **stack_base = imgp->args->argc; 233 return 0; 234 } 235 236 static int 237 elf_linux_fixup(register_t **stack_base, struct image_params *imgp) 238 { 239 Elf32_Auxargs *args; 240 register_t *pos; 241 242 KASSERT(curthread->td_proc == imgp->proc && 243 (curthread->td_proc->p_flag & P_SA) == 0, 244 ("unsafe elf_linux_fixup(), should be curproc")); 245 args = (Elf32_Auxargs *)imgp->auxargs; 246 pos = *stack_base + (imgp->args->argc + imgp->args->envc + 2); 247 248 if (args->trace) 249 AUXARGS_ENTRY(pos, AT_DEBUG, 1); 250 if (args->execfd != -1) 251 AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd); 252 AUXARGS_ENTRY(pos, AT_PHDR, args->phdr); 253 AUXARGS_ENTRY(pos, AT_PHENT, args->phent); 254 AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum); 255 AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz); 256 AUXARGS_ENTRY(pos, AT_FLAGS, args->flags); 257 AUXARGS_ENTRY(pos, AT_ENTRY, args->entry); 258 AUXARGS_ENTRY(pos, AT_BASE, args->base); 259 AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid); 260 AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid); 261 AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid); 262 AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid); 263 AUXARGS_ENTRY(pos, AT_NULL, 0); 264 265 free(imgp->auxargs, M_TEMP); 266 imgp->auxargs = NULL; 267 268 (*stack_base)--; 269 **stack_base = (register_t)imgp->args->argc; 270 return 0; 271 } 272 273 extern int _ucodesel, _udatasel; 274 extern unsigned long linux_sznonrtsigcode; 275 276 static void 277 linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) 278 { 279 struct thread *td = curthread; 280 struct proc *p = td->td_proc; 281 struct sigacts *psp; 282 struct trapframe *regs; 283 struct l_rt_sigframe *fp, frame; 284 int sig, code; 285 int oonstack; 286 287 sig = ksi->ksi_signo; 288 code = ksi->ksi_code; 289 PROC_LOCK_ASSERT(p, MA_OWNED); 290 psp = p->p_sigacts; 291 mtx_assert(&psp->ps_mtx, MA_OWNED); 292 regs = td->td_frame; 293 oonstack = sigonstack(regs->tf_esp); 294 295 #ifdef DEBUG 296 if (ldebug(rt_sendsig)) 297 printf(ARGS(rt_sendsig, "%p, %d, %p, %u"), 298 catcher, sig, (void*)mask, code); 299 #endif 300 /* 301 * Allocate space for the signal handler context. 302 */ 303 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && 304 SIGISMEMBER(psp->ps_sigonstack, sig)) { 305 fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp + 306 td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe)); 307 } else 308 fp = (struct l_rt_sigframe *)regs->tf_esp - 1; 309 mtx_unlock(&psp->ps_mtx); 310 311 /* 312 * Build the argument list for the signal handler. 313 */ 314 if (p->p_sysent->sv_sigtbl) 315 if (sig <= p->p_sysent->sv_sigsize) 316 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 317 318 bzero(&frame, sizeof(frame)); 319 320 frame.sf_handler = catcher; 321 frame.sf_sig = sig; 322 frame.sf_siginfo = &fp->sf_si; 323 frame.sf_ucontext = &fp->sf_sc; 324 325 /* Fill in POSIX parts */ 326 frame.sf_si.lsi_signo = sig; 327 frame.sf_si.lsi_code = code; 328 frame.sf_si.lsi_addr = ksi->ksi_addr; 329 330 /* 331 * Build the signal context to be used by sigreturn. 332 */ 333 frame.sf_sc.uc_flags = 0; /* XXX ??? */ 334 frame.sf_sc.uc_link = NULL; /* XXX ??? */ 335 336 frame.sf_sc.uc_stack.ss_sp = td->td_sigstk.ss_sp; 337 frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size; 338 frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) 339 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE; 340 PROC_UNLOCK(p); 341 342 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask); 343 344 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0]; 345 frame.sf_sc.uc_mcontext.sc_gs = rgs(); 346 frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs; 347 frame.sf_sc.uc_mcontext.sc_es = regs->tf_es; 348 frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds; 349 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_edi; 350 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_esi; 351 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_ebp; 352 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_ebx; 353 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_edx; 354 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_ecx; 355 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_eax; 356 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_eip; 357 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs; 358 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags; 359 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp; 360 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss; 361 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err; 362 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code); 363 364 #ifdef DEBUG 365 if (ldebug(rt_sendsig)) 366 printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"), 367 frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp, 368 td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask); 369 #endif 370 371 if (copyout(&frame, fp, sizeof(frame)) != 0) { 372 /* 373 * Process has trashed its stack; give it an illegal 374 * instruction to halt it in its tracks. 375 */ 376 #ifdef DEBUG 377 if (ldebug(rt_sendsig)) 378 printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"), 379 fp, oonstack); 380 #endif 381 PROC_LOCK(p); 382 sigexit(td, SIGILL); 383 } 384 385 /* 386 * Build context to run handler in. 387 */ 388 regs->tf_esp = (int)fp; 389 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) + 390 linux_sznonrtsigcode; 391 regs->tf_eflags &= ~(PSL_T | PSL_VM); 392 regs->tf_cs = _ucodesel; 393 regs->tf_ds = _udatasel; 394 regs->tf_es = _udatasel; 395 regs->tf_fs = _udatasel; 396 regs->tf_ss = _udatasel; 397 PROC_LOCK(p); 398 mtx_lock(&psp->ps_mtx); 399 } 400 401 402 /* 403 * Send an interrupt to process. 404 * 405 * Stack is set up to allow sigcode stored 406 * in u. to call routine, followed by kcall 407 * to sigreturn routine below. After sigreturn 408 * resets the signal mask, the stack, and the 409 * frame pointer, it returns to the user 410 * specified pc, psl. 411 */ 412 static void 413 linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) 414 { 415 struct thread *td = curthread; 416 struct proc *p = td->td_proc; 417 struct sigacts *psp; 418 struct trapframe *regs; 419 struct l_sigframe *fp, frame; 420 l_sigset_t lmask; 421 int sig, code; 422 int oonstack, i; 423 424 PROC_LOCK_ASSERT(p, MA_OWNED); 425 psp = p->p_sigacts; 426 sig = ksi->ksi_signo; 427 code = ksi->ksi_code; 428 mtx_assert(&psp->ps_mtx, MA_OWNED); 429 if (SIGISMEMBER(psp->ps_siginfo, sig)) { 430 /* Signal handler installed with SA_SIGINFO. */ 431 linux_rt_sendsig(catcher, ksi, mask); 432 return; 433 } 434 regs = td->td_frame; 435 oonstack = sigonstack(regs->tf_esp); 436 437 #ifdef DEBUG 438 if (ldebug(sendsig)) 439 printf(ARGS(sendsig, "%p, %d, %p, %u"), 440 catcher, sig, (void*)mask, code); 441 #endif 442 443 /* 444 * Allocate space for the signal handler context. 445 */ 446 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && 447 SIGISMEMBER(psp->ps_sigonstack, sig)) { 448 fp = (struct l_sigframe *)(td->td_sigstk.ss_sp + 449 td->td_sigstk.ss_size - sizeof(struct l_sigframe)); 450 } else 451 fp = (struct l_sigframe *)regs->tf_esp - 1; 452 mtx_unlock(&psp->ps_mtx); 453 PROC_UNLOCK(p); 454 455 /* 456 * Build the argument list for the signal handler. 457 */ 458 if (p->p_sysent->sv_sigtbl) 459 if (sig <= p->p_sysent->sv_sigsize) 460 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 461 462 bzero(&frame, sizeof(frame)); 463 464 frame.sf_handler = catcher; 465 frame.sf_sig = sig; 466 467 bsd_to_linux_sigset(mask, &lmask); 468 469 /* 470 * Build the signal context to be used by sigreturn. 471 */ 472 frame.sf_sc.sc_mask = lmask.__bits[0]; 473 frame.sf_sc.sc_gs = rgs(); 474 frame.sf_sc.sc_fs = regs->tf_fs; 475 frame.sf_sc.sc_es = regs->tf_es; 476 frame.sf_sc.sc_ds = regs->tf_ds; 477 frame.sf_sc.sc_edi = regs->tf_edi; 478 frame.sf_sc.sc_esi = regs->tf_esi; 479 frame.sf_sc.sc_ebp = regs->tf_ebp; 480 frame.sf_sc.sc_ebx = regs->tf_ebx; 481 frame.sf_sc.sc_edx = regs->tf_edx; 482 frame.sf_sc.sc_ecx = regs->tf_ecx; 483 frame.sf_sc.sc_eax = regs->tf_eax; 484 frame.sf_sc.sc_eip = regs->tf_eip; 485 frame.sf_sc.sc_cs = regs->tf_cs; 486 frame.sf_sc.sc_eflags = regs->tf_eflags; 487 frame.sf_sc.sc_esp_at_signal = regs->tf_esp; 488 frame.sf_sc.sc_ss = regs->tf_ss; 489 frame.sf_sc.sc_err = regs->tf_err; 490 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(ksi->ksi_trapno); 491 492 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++) 493 frame.sf_extramask[i] = lmask.__bits[i+1]; 494 495 if (copyout(&frame, fp, sizeof(frame)) != 0) { 496 /* 497 * Process has trashed its stack; give it an illegal 498 * instruction to halt it in its tracks. 499 */ 500 PROC_LOCK(p); 501 sigexit(td, SIGILL); 502 } 503 504 /* 505 * Build context to run handler in. 506 */ 507 regs->tf_esp = (int)fp; 508 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode); 509 regs->tf_eflags &= ~(PSL_T | PSL_VM); 510 regs->tf_cs = _ucodesel; 511 regs->tf_ds = _udatasel; 512 regs->tf_es = _udatasel; 513 regs->tf_fs = _udatasel; 514 regs->tf_ss = _udatasel; 515 PROC_LOCK(p); 516 mtx_lock(&psp->ps_mtx); 517 } 518 519 /* 520 * System call to cleanup state after a signal 521 * has been taken. Reset signal mask and 522 * stack state from context left by sendsig (above). 523 * Return to previous pc and psl as specified by 524 * context left by sendsig. Check carefully to 525 * make sure that the user has not modified the 526 * psl to gain improper privileges or to cause 527 * a machine fault. 528 */ 529 int 530 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args) 531 { 532 struct proc *p = td->td_proc; 533 struct l_sigframe frame; 534 struct trapframe *regs; 535 l_sigset_t lmask; 536 int eflags, i; 537 ksiginfo_t ksi; 538 539 regs = td->td_frame; 540 541 #ifdef DEBUG 542 if (ldebug(sigreturn)) 543 printf(ARGS(sigreturn, "%p"), (void *)args->sfp); 544 #endif 545 /* 546 * The trampoline code hands us the sigframe. 547 * It is unsafe to keep track of it ourselves, in the event that a 548 * program jumps out of a signal handler. 549 */ 550 if (copyin(args->sfp, &frame, sizeof(frame)) != 0) 551 return (EFAULT); 552 553 /* 554 * Check for security violations. 555 */ 556 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 557 eflags = frame.sf_sc.sc_eflags; 558 /* 559 * XXX do allow users to change the privileged flag PSL_RF. The 560 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should 561 * sometimes set it there too. tf_eflags is kept in the signal 562 * context during signal handling and there is no other place 563 * to remember it, so the PSL_RF bit may be corrupted by the 564 * signal handler without us knowing. Corruption of the PSL_RF 565 * bit at worst causes one more or one less debugger trap, so 566 * allowing it is fairly harmless. 567 */ 568 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) 569 return(EINVAL); 570 571 /* 572 * Don't allow users to load a valid privileged %cs. Let the 573 * hardware check for invalid selectors, excess privilege in 574 * other selectors, invalid %eip's and invalid %esp's. 575 */ 576 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 577 if (!CS_SECURE(frame.sf_sc.sc_cs)) { 578 ksiginfo_init_trap(&ksi); 579 ksi.ksi_signo = SIGBUS; 580 ksi.ksi_code = BUS_OBJERR; 581 ksi.ksi_trapno = T_PROTFLT; 582 ksi.ksi_addr = (void *)regs->tf_eip; 583 trapsignal(td, &ksi); 584 return(EINVAL); 585 } 586 587 lmask.__bits[0] = frame.sf_sc.sc_mask; 588 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++) 589 lmask.__bits[i+1] = frame.sf_extramask[i]; 590 PROC_LOCK(p); 591 linux_to_bsd_sigset(&lmask, &td->td_sigmask); 592 SIG_CANTMASK(td->td_sigmask); 593 signotify(td); 594 PROC_UNLOCK(p); 595 596 /* 597 * Restore signal context. 598 */ 599 /* %gs was restored by the trampoline. */ 600 regs->tf_fs = frame.sf_sc.sc_fs; 601 regs->tf_es = frame.sf_sc.sc_es; 602 regs->tf_ds = frame.sf_sc.sc_ds; 603 regs->tf_edi = frame.sf_sc.sc_edi; 604 regs->tf_esi = frame.sf_sc.sc_esi; 605 regs->tf_ebp = frame.sf_sc.sc_ebp; 606 regs->tf_ebx = frame.sf_sc.sc_ebx; 607 regs->tf_edx = frame.sf_sc.sc_edx; 608 regs->tf_ecx = frame.sf_sc.sc_ecx; 609 regs->tf_eax = frame.sf_sc.sc_eax; 610 regs->tf_eip = frame.sf_sc.sc_eip; 611 regs->tf_cs = frame.sf_sc.sc_cs; 612 regs->tf_eflags = eflags; 613 regs->tf_esp = frame.sf_sc.sc_esp_at_signal; 614 regs->tf_ss = frame.sf_sc.sc_ss; 615 616 return (EJUSTRETURN); 617 } 618 619 /* 620 * System call to cleanup state after a signal 621 * has been taken. Reset signal mask and 622 * stack state from context left by rt_sendsig (above). 623 * Return to previous pc and psl as specified by 624 * context left by sendsig. Check carefully to 625 * make sure that the user has not modified the 626 * psl to gain improper privileges or to cause 627 * a machine fault. 628 */ 629 int 630 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args) 631 { 632 struct proc *p = td->td_proc; 633 struct l_ucontext uc; 634 struct l_sigcontext *context; 635 l_stack_t *lss; 636 stack_t ss; 637 struct trapframe *regs; 638 int eflags; 639 ksiginfo_t ksi; 640 641 regs = td->td_frame; 642 643 #ifdef DEBUG 644 if (ldebug(rt_sigreturn)) 645 printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp); 646 #endif 647 /* 648 * The trampoline code hands us the ucontext. 649 * It is unsafe to keep track of it ourselves, in the event that a 650 * program jumps out of a signal handler. 651 */ 652 if (copyin(args->ucp, &uc, sizeof(uc)) != 0) 653 return (EFAULT); 654 655 context = &uc.uc_mcontext; 656 657 /* 658 * Check for security violations. 659 */ 660 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 661 eflags = context->sc_eflags; 662 /* 663 * XXX do allow users to change the privileged flag PSL_RF. The 664 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should 665 * sometimes set it there too. tf_eflags is kept in the signal 666 * context during signal handling and there is no other place 667 * to remember it, so the PSL_RF bit may be corrupted by the 668 * signal handler without us knowing. Corruption of the PSL_RF 669 * bit at worst causes one more or one less debugger trap, so 670 * allowing it is fairly harmless. 671 */ 672 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) 673 return(EINVAL); 674 675 /* 676 * Don't allow users to load a valid privileged %cs. Let the 677 * hardware check for invalid selectors, excess privilege in 678 * other selectors, invalid %eip's and invalid %esp's. 679 */ 680 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 681 if (!CS_SECURE(context->sc_cs)) { 682 ksiginfo_init_trap(&ksi); 683 ksi.ksi_signo = SIGBUS; 684 ksi.ksi_code = BUS_OBJERR; 685 ksi.ksi_trapno = T_PROTFLT; 686 ksi.ksi_addr = (void *)regs->tf_eip; 687 trapsignal(td, &ksi); 688 return(EINVAL); 689 } 690 691 PROC_LOCK(p); 692 linux_to_bsd_sigset(&uc.uc_sigmask, &td->td_sigmask); 693 SIG_CANTMASK(td->td_sigmask); 694 signotify(td); 695 PROC_UNLOCK(p); 696 697 /* 698 * Restore signal context 699 */ 700 /* %gs was restored by the trampoline. */ 701 regs->tf_fs = context->sc_fs; 702 regs->tf_es = context->sc_es; 703 regs->tf_ds = context->sc_ds; 704 regs->tf_edi = context->sc_edi; 705 regs->tf_esi = context->sc_esi; 706 regs->tf_ebp = context->sc_ebp; 707 regs->tf_ebx = context->sc_ebx; 708 regs->tf_edx = context->sc_edx; 709 regs->tf_ecx = context->sc_ecx; 710 regs->tf_eax = context->sc_eax; 711 regs->tf_eip = context->sc_eip; 712 regs->tf_cs = context->sc_cs; 713 regs->tf_eflags = eflags; 714 regs->tf_esp = context->sc_esp_at_signal; 715 regs->tf_ss = context->sc_ss; 716 717 /* 718 * call sigaltstack & ignore results.. 719 */ 720 lss = &uc.uc_stack; 721 ss.ss_sp = lss->ss_sp; 722 ss.ss_size = lss->ss_size; 723 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags); 724 725 #ifdef DEBUG 726 if (ldebug(rt_sigreturn)) 727 printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"), 728 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask); 729 #endif 730 (void)kern_sigaltstack(td, &ss, NULL); 731 732 return (EJUSTRETURN); 733 } 734 735 /* 736 * MPSAFE 737 */ 738 static void 739 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params) 740 { 741 args[0] = tf->tf_ebx; 742 args[1] = tf->tf_ecx; 743 args[2] = tf->tf_edx; 744 args[3] = tf->tf_esi; 745 args[4] = tf->tf_edi; 746 args[5] = tf->tf_ebp; /* Unconfirmed */ 747 *params = NULL; /* no copyin */ 748 } 749 750 /* 751 * If a linux binary is exec'ing something, try this image activator 752 * first. We override standard shell script execution in order to 753 * be able to modify the interpreter path. We only do this if a linux 754 * binary is doing the exec, so we do not create an EXEC module for it. 755 */ 756 static int exec_linux_imgact_try(struct image_params *iparams); 757 758 static int 759 exec_linux_imgact_try(struct image_params *imgp) 760 { 761 const char *head = (const char *)imgp->image_header; 762 char *rpath; 763 int error = -1, len; 764 765 /* 766 * The interpreter for shell scripts run from a linux binary needs 767 * to be located in /compat/linux if possible in order to recursively 768 * maintain linux path emulation. 769 */ 770 if (((const short *)head)[0] == SHELLMAGIC) { 771 /* 772 * Run our normal shell image activator. If it succeeds attempt 773 * to use the alternate path for the interpreter. If an alternate 774 * path is found, use our stringspace to store it. 775 */ 776 if ((error = exec_shell_imgact(imgp)) == 0) { 777 linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc), 778 imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0); 779 if (rpath != NULL) { 780 len = strlen(rpath) + 1; 781 782 if (len <= MAXSHELLCMDLEN) { 783 memcpy(imgp->interpreter_name, rpath, len); 784 } 785 free(rpath, M_TEMP); 786 } 787 } 788 } 789 return(error); 790 } 791 792 /* 793 * exec_setregs may initialize some registers differently than Linux 794 * does, thus potentially confusing Linux binaries. If necessary, we 795 * override the exec_setregs default(s) here. 796 */ 797 static void 798 exec_linux_setregs(struct thread *td, u_long entry, 799 u_long stack, u_long ps_strings) 800 { 801 static const u_short control = __LINUX_NPXCW__; 802 struct pcb *pcb = td->td_pcb; 803 804 exec_setregs(td, entry, stack, ps_strings); 805 806 /* Linux sets %gs to 0, we default to _udatasel */ 807 pcb->pcb_gs = 0; load_gs(0); 808 809 /* Linux sets the i387 to extended precision. */ 810 fldcw(&control); 811 } 812 813 struct sysentvec linux_sysvec = { 814 LINUX_SYS_MAXSYSCALL, 815 linux_sysent, 816 0, 817 LINUX_SIGTBLSZ, 818 bsd_to_linux_signal, 819 ELAST + 1, 820 bsd_to_linux_errno, 821 translate_traps, 822 linux_fixup, 823 linux_sendsig, 824 linux_sigcode, 825 &linux_szsigcode, 826 linux_prepsyscall, 827 "Linux a.out", 828 NULL, 829 exec_linux_imgact_try, 830 LINUX_MINSIGSTKSZ, 831 PAGE_SIZE, 832 VM_MIN_ADDRESS, 833 VM_MAXUSER_ADDRESS, 834 USRSTACK, 835 PS_STRINGS, 836 VM_PROT_ALL, 837 exec_copyout_strings, 838 exec_linux_setregs, 839 NULL 840 }; 841 842 struct sysentvec elf_linux_sysvec = { 843 LINUX_SYS_MAXSYSCALL, 844 linux_sysent, 845 0, 846 LINUX_SIGTBLSZ, 847 bsd_to_linux_signal, 848 ELAST + 1, 849 bsd_to_linux_errno, 850 translate_traps, 851 elf_linux_fixup, 852 linux_sendsig, 853 linux_sigcode, 854 &linux_szsigcode, 855 linux_prepsyscall, 856 "Linux ELF", 857 elf32_coredump, 858 exec_linux_imgact_try, 859 LINUX_MINSIGSTKSZ, 860 PAGE_SIZE, 861 VM_MIN_ADDRESS, 862 VM_MAXUSER_ADDRESS, 863 USRSTACK, 864 PS_STRINGS, 865 VM_PROT_ALL, 866 exec_copyout_strings, 867 exec_linux_setregs, 868 NULL 869 }; 870 871 static Elf32_Brandinfo linux_brand = { 872 ELFOSABI_LINUX, 873 EM_386, 874 "Linux", 875 "/compat/linux", 876 "/lib/ld-linux.so.1", 877 &elf_linux_sysvec, 878 NULL, 879 BI_CAN_EXEC_DYN, 880 }; 881 882 static Elf32_Brandinfo linux_glibc2brand = { 883 ELFOSABI_LINUX, 884 EM_386, 885 "Linux", 886 "/compat/linux", 887 "/lib/ld-linux.so.2", 888 &elf_linux_sysvec, 889 NULL, 890 BI_CAN_EXEC_DYN, 891 }; 892 893 Elf32_Brandinfo *linux_brandlist[] = { 894 &linux_brand, 895 &linux_glibc2brand, 896 NULL 897 }; 898 899 static int 900 linux_elf_modevent(module_t mod, int type, void *data) 901 { 902 Elf32_Brandinfo **brandinfo; 903 int error; 904 struct linux_ioctl_handler **lihp; 905 struct linux_device_handler **ldhp; 906 907 error = 0; 908 909 switch(type) { 910 case MOD_LOAD: 911 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 912 ++brandinfo) 913 if (elf32_insert_brand_entry(*brandinfo) < 0) 914 error = EINVAL; 915 if (error == 0) { 916 SET_FOREACH(lihp, linux_ioctl_handler_set) 917 linux_ioctl_register_handler(*lihp); 918 SET_FOREACH(ldhp, linux_device_handler_set) 919 linux_device_register_handler(*ldhp); 920 mtx_init(&emul_lock, "emuldata lock", NULL, MTX_DEF); 921 sx_init(&emul_shared_lock, "emuldata->shared lock"); 922 LIST_INIT(&futex_list); 923 sx_init(&futex_sx, "futex protection lock"); 924 linux_exit_tag = EVENTHANDLER_REGISTER(process_exit, linux_proc_exit, 925 NULL, 1000); 926 linux_schedtail_tag = EVENTHANDLER_REGISTER(schedtail, linux_schedtail, 927 NULL, 1000); 928 linux_exec_tag = EVENTHANDLER_REGISTER(process_exec, linux_proc_exec, 929 NULL, 1000); 930 if (bootverbose) 931 printf("Linux ELF exec handler installed\n"); 932 } else 933 printf("cannot insert Linux ELF brand handler\n"); 934 break; 935 case MOD_UNLOAD: 936 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 937 ++brandinfo) 938 if (elf32_brand_inuse(*brandinfo)) 939 error = EBUSY; 940 if (error == 0) { 941 for (brandinfo = &linux_brandlist[0]; 942 *brandinfo != NULL; ++brandinfo) 943 if (elf32_remove_brand_entry(*brandinfo) < 0) 944 error = EINVAL; 945 } 946 if (error == 0) { 947 SET_FOREACH(lihp, linux_ioctl_handler_set) 948 linux_ioctl_unregister_handler(*lihp); 949 SET_FOREACH(ldhp, linux_device_handler_set) 950 linux_device_unregister_handler(*ldhp); 951 mtx_destroy(&emul_lock); 952 sx_destroy(&emul_shared_lock); 953 sx_destroy(&futex_sx); 954 EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag); 955 EVENTHANDLER_DEREGISTER(schedtail, linux_schedtail_tag); 956 EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag); 957 if (bootverbose) 958 printf("Linux ELF exec handler removed\n"); 959 } else 960 printf("Could not deinstall ELF interpreter entry\n"); 961 break; 962 default: 963 return EOPNOTSUPP; 964 } 965 return error; 966 } 967 968 static moduledata_t linux_elf_mod = { 969 "linuxelf", 970 linux_elf_modevent, 971 0 972 }; 973 974 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY); 975