1 /*- 2 * Copyright (c) 2004 Tim J. Robbins 3 * Copyright (c) 2003 Peter Wemm 4 * Copyright (c) 2002 Doug Rabson 5 * Copyright (c) 1998-1999 Andrew Gallatin 6 * Copyright (c) 1994-1996 Søren Schmidt 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer 14 * in this position and unchanged. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. The name of the author may not be used to endorse or promote products 19 * derived from this software without specific prior written permission 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 26 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 30 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 #include <sys/cdefs.h> 34 __FBSDID("$FreeBSD$"); 35 #include "opt_compat.h" 36 37 #ifndef COMPAT_FREEBSD32 38 #error "Unable to compile Linux-emulator due to missing COMPAT_FREEBSD32 option!" 39 #endif 40 41 #define __ELF_WORD_SIZE 32 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/exec.h> 46 #include <sys/fcntl.h> 47 #include <sys/imgact.h> 48 #include <sys/imgact_elf.h> 49 #include <sys/kernel.h> 50 #include <sys/lock.h> 51 #include <sys/malloc.h> 52 #include <sys/module.h> 53 #include <sys/mutex.h> 54 #include <sys/proc.h> 55 #include <sys/resourcevar.h> 56 #include <sys/signalvar.h> 57 #include <sys/sysctl.h> 58 #include <sys/syscallsubr.h> 59 #include <sys/sysent.h> 60 #include <sys/sysproto.h> 61 #include <sys/vnode.h> 62 #include <sys/eventhandler.h> 63 64 #include <vm/vm.h> 65 #include <vm/pmap.h> 66 #include <vm/vm_extern.h> 67 #include <vm/vm_map.h> 68 #include <vm/vm_object.h> 69 #include <vm/vm_page.h> 70 #include <vm/vm_param.h> 71 72 #include <machine/cpu.h> 73 #include <machine/md_var.h> 74 #include <machine/pcb.h> 75 #include <machine/specialreg.h> 76 77 #include <amd64/linux32/linux.h> 78 #include <amd64/linux32/linux32_proto.h> 79 #include <compat/linux/linux_emul.h> 80 #include <compat/linux/linux_futex.h> 81 #include <compat/linux/linux_ioctl.h> 82 #include <compat/linux/linux_mib.h> 83 #include <compat/linux/linux_misc.h> 84 #include <compat/linux/linux_signal.h> 85 #include <compat/linux/linux_util.h> 86 #include <compat/linux/linux_vdso.h> 87 88 MODULE_VERSION(linux, 1); 89 90 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures"); 91 92 #define AUXARGS_ENTRY_32(pos, id, val) \ 93 do { \ 94 suword32(pos++, id); \ 95 suword32(pos++, val); \ 96 } while (0) 97 98 #if BYTE_ORDER == LITTLE_ENDIAN 99 #define SHELLMAGIC 0x2123 /* #! */ 100 #else 101 #define SHELLMAGIC 0x2321 102 #endif 103 104 /* 105 * Allow the sendsig functions to use the ldebug() facility 106 * even though they are not syscalls themselves. Map them 107 * to syscall 0. This is slightly less bogus than using 108 * ldebug(sigreturn). 109 */ 110 #define LINUX_SYS_linux_rt_sendsig 0 111 #define LINUX_SYS_linux_sendsig 0 112 113 const char *linux_kplatform; 114 static int linux_szsigcode; 115 static vm_object_t linux_shared_page_obj; 116 static char *linux_shared_page_mapping; 117 extern char _binary_linux32_locore_o_start; 118 extern char _binary_linux32_locore_o_end; 119 120 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL]; 121 122 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler); 123 SET_DECLARE(linux_device_handler_set, struct linux_device_handler); 124 125 static int elf_linux_fixup(register_t **stack_base, 126 struct image_params *iparams); 127 static register_t *linux_copyout_strings(struct image_params *imgp); 128 static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask); 129 static void exec_linux_setregs(struct thread *td, 130 struct image_params *imgp, u_long stack); 131 static void linux32_fixlimit(struct rlimit *rl, int which); 132 static boolean_t linux32_trans_osrel(const Elf_Note *note, int32_t *osrel); 133 static void linux_vdso_install(void *param); 134 static void linux_vdso_deinstall(void *param); 135 136 static eventhandler_tag linux_exit_tag; 137 static eventhandler_tag linux_exec_tag; 138 static eventhandler_tag linux_thread_dtor_tag; 139 140 /* 141 * Linux syscalls return negative errno's, we do positive and map them 142 * Reference: 143 * FreeBSD: src/sys/sys/errno.h 144 * Linux: linux-2.6.17.8/include/asm-generic/errno-base.h 145 * linux-2.6.17.8/include/asm-generic/errno.h 146 */ 147 static int bsd_to_linux_errno[ELAST + 1] = { 148 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9, 149 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19, 150 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, 151 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89, 152 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99, 153 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109, 154 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122, 155 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9, 156 -6, -6, -43, -42, -75,-125, -84, -95, -16, -74, 157 -72, -67, -71 158 }; 159 160 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = { 161 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL, 162 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE, 163 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS, 164 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG, 165 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD, 166 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU, 167 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH, 168 0, LINUX_SIGUSR1, LINUX_SIGUSR2 169 }; 170 171 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = { 172 SIGHUP, SIGINT, SIGQUIT, SIGILL, 173 SIGTRAP, SIGABRT, SIGBUS, SIGFPE, 174 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2, 175 SIGPIPE, SIGALRM, SIGTERM, SIGBUS, 176 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP, 177 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU, 178 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH, 179 SIGIO, SIGURG, SIGSYS 180 }; 181 182 #define LINUX_T_UNKNOWN 255 183 static int _bsd_to_linux_trapcode[] = { 184 LINUX_T_UNKNOWN, /* 0 */ 185 6, /* 1 T_PRIVINFLT */ 186 LINUX_T_UNKNOWN, /* 2 */ 187 3, /* 3 T_BPTFLT */ 188 LINUX_T_UNKNOWN, /* 4 */ 189 LINUX_T_UNKNOWN, /* 5 */ 190 16, /* 6 T_ARITHTRAP */ 191 254, /* 7 T_ASTFLT */ 192 LINUX_T_UNKNOWN, /* 8 */ 193 13, /* 9 T_PROTFLT */ 194 1, /* 10 T_TRCTRAP */ 195 LINUX_T_UNKNOWN, /* 11 */ 196 14, /* 12 T_PAGEFLT */ 197 LINUX_T_UNKNOWN, /* 13 */ 198 17, /* 14 T_ALIGNFLT */ 199 LINUX_T_UNKNOWN, /* 15 */ 200 LINUX_T_UNKNOWN, /* 16 */ 201 LINUX_T_UNKNOWN, /* 17 */ 202 0, /* 18 T_DIVIDE */ 203 2, /* 19 T_NMI */ 204 4, /* 20 T_OFLOW */ 205 5, /* 21 T_BOUND */ 206 7, /* 22 T_DNA */ 207 8, /* 23 T_DOUBLEFLT */ 208 9, /* 24 T_FPOPFLT */ 209 10, /* 25 T_TSSFLT */ 210 11, /* 26 T_SEGNPFLT */ 211 12, /* 27 T_STKFLT */ 212 18, /* 28 T_MCHK */ 213 19, /* 29 T_XMMFLT */ 214 15 /* 30 T_RESERVED */ 215 }; 216 #define bsd_to_linux_trapcode(code) \ 217 ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \ 218 _bsd_to_linux_trapcode[(code)]: \ 219 LINUX_T_UNKNOWN) 220 221 struct linux32_ps_strings { 222 u_int32_t ps_argvstr; /* first of 0 or more argument strings */ 223 u_int ps_nargvstr; /* the number of argument strings */ 224 u_int32_t ps_envstr; /* first of 0 or more environment strings */ 225 u_int ps_nenvstr; /* the number of environment strings */ 226 }; 227 228 LINUX_VDSO_SYM_INTPTR(linux32_sigcode); 229 LINUX_VDSO_SYM_INTPTR(linux32_rt_sigcode); 230 LINUX_VDSO_SYM_INTPTR(linux32_vsyscall); 231 LINUX_VDSO_SYM_CHAR(linux_platform); 232 233 /* 234 * If FreeBSD & Linux have a difference of opinion about what a trap 235 * means, deal with it here. 236 * 237 * MPSAFE 238 */ 239 static int 240 translate_traps(int signal, int trap_code) 241 { 242 if (signal != SIGBUS) 243 return signal; 244 switch (trap_code) { 245 case T_PROTFLT: 246 case T_TSSFLT: 247 case T_DOUBLEFLT: 248 case T_PAGEFLT: 249 return SIGSEGV; 250 default: 251 return signal; 252 } 253 } 254 255 static int 256 elf_linux_fixup(register_t **stack_base, struct image_params *imgp) 257 { 258 Elf32_Auxargs *args; 259 Elf32_Addr *base; 260 Elf32_Addr *pos; 261 struct linux32_ps_strings *arginfo; 262 263 arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS; 264 265 KASSERT(curthread->td_proc == imgp->proc, 266 ("unsafe elf_linux_fixup(), should be curproc")); 267 base = (Elf32_Addr *)*stack_base; 268 args = (Elf32_Auxargs *)imgp->auxargs; 269 pos = base + (imgp->args->argc + imgp->args->envc + 2); 270 271 AUXARGS_ENTRY_32(pos, LINUX_AT_SYSINFO_EHDR, 272 imgp->proc->p_sysent->sv_shared_page_base); 273 AUXARGS_ENTRY_32(pos, LINUX_AT_SYSINFO, linux32_vsyscall); 274 AUXARGS_ENTRY_32(pos, LINUX_AT_HWCAP, cpu_feature); 275 276 /* 277 * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0, 278 * as it has appeared in the 2.4.0-rc7 first time. 279 * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK), 280 * glibc falls back to the hard-coded CLK_TCK value when aux entry 281 * is not present. 282 * Also see linux_times() implementation. 283 */ 284 if (linux_kernver(curthread) >= LINUX_KERNVER_2004000) 285 AUXARGS_ENTRY_32(pos, LINUX_AT_CLKTCK, stclohz); 286 AUXARGS_ENTRY_32(pos, AT_PHDR, args->phdr); 287 AUXARGS_ENTRY_32(pos, AT_PHENT, args->phent); 288 AUXARGS_ENTRY_32(pos, AT_PHNUM, args->phnum); 289 AUXARGS_ENTRY_32(pos, AT_PAGESZ, args->pagesz); 290 AUXARGS_ENTRY_32(pos, AT_FLAGS, args->flags); 291 AUXARGS_ENTRY_32(pos, AT_ENTRY, args->entry); 292 AUXARGS_ENTRY_32(pos, AT_BASE, args->base); 293 AUXARGS_ENTRY_32(pos, LINUX_AT_SECURE, 0); 294 AUXARGS_ENTRY_32(pos, AT_UID, imgp->proc->p_ucred->cr_ruid); 295 AUXARGS_ENTRY_32(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid); 296 AUXARGS_ENTRY_32(pos, AT_GID, imgp->proc->p_ucred->cr_rgid); 297 AUXARGS_ENTRY_32(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid); 298 AUXARGS_ENTRY_32(pos, LINUX_AT_PLATFORM, PTROUT(linux_platform)); 299 if (args->execfd != -1) 300 AUXARGS_ENTRY_32(pos, AT_EXECFD, args->execfd); 301 AUXARGS_ENTRY_32(pos, AT_NULL, 0); 302 303 free(imgp->auxargs, M_TEMP); 304 imgp->auxargs = NULL; 305 306 base--; 307 suword32(base, (uint32_t)imgp->args->argc); 308 *stack_base = (register_t *)base; 309 return (0); 310 } 311 312 static void 313 linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) 314 { 315 struct thread *td = curthread; 316 struct proc *p = td->td_proc; 317 struct sigacts *psp; 318 struct trapframe *regs; 319 struct l_rt_sigframe *fp, frame; 320 int oonstack; 321 int sig; 322 int code; 323 324 sig = ksi->ksi_signo; 325 code = ksi->ksi_code; 326 PROC_LOCK_ASSERT(p, MA_OWNED); 327 psp = p->p_sigacts; 328 mtx_assert(&psp->ps_mtx, MA_OWNED); 329 regs = td->td_frame; 330 oonstack = sigonstack(regs->tf_rsp); 331 332 #ifdef DEBUG 333 if (ldebug(rt_sendsig)) 334 printf(ARGS(rt_sendsig, "%p, %d, %p, %u"), 335 catcher, sig, (void*)mask, code); 336 #endif 337 /* 338 * Allocate space for the signal handler context. 339 */ 340 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && 341 SIGISMEMBER(psp->ps_sigonstack, sig)) { 342 fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp + 343 td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe)); 344 } else 345 fp = (struct l_rt_sigframe *)regs->tf_rsp - 1; 346 mtx_unlock(&psp->ps_mtx); 347 348 /* 349 * Build the argument list for the signal handler. 350 */ 351 if (p->p_sysent->sv_sigtbl) 352 if (sig <= p->p_sysent->sv_sigsize) 353 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 354 355 bzero(&frame, sizeof(frame)); 356 357 frame.sf_handler = PTROUT(catcher); 358 frame.sf_sig = sig; 359 frame.sf_siginfo = PTROUT(&fp->sf_si); 360 frame.sf_ucontext = PTROUT(&fp->sf_sc); 361 362 /* Fill in POSIX parts */ 363 ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig); 364 365 /* 366 * Build the signal context to be used by sigreturn 367 * and libgcc unwind. 368 */ 369 frame.sf_sc.uc_flags = 0; /* XXX ??? */ 370 frame.sf_sc.uc_link = 0; /* XXX ??? */ 371 372 frame.sf_sc.uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp); 373 frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size; 374 frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) 375 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE; 376 PROC_UNLOCK(p); 377 378 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask); 379 380 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0]; 381 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_rdi; 382 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_rsi; 383 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_rbp; 384 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_rbx; 385 frame.sf_sc.uc_mcontext.sc_esp = regs->tf_rsp; 386 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_rdx; 387 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_rcx; 388 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_rax; 389 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_rip; 390 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs; 391 frame.sf_sc.uc_mcontext.sc_gs = regs->tf_gs; 392 frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs; 393 frame.sf_sc.uc_mcontext.sc_es = regs->tf_es; 394 frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds; 395 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_rflags; 396 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_rsp; 397 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss; 398 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err; 399 frame.sf_sc.uc_mcontext.sc_cr2 = (u_int32_t)(uintptr_t)ksi->ksi_addr; 400 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code); 401 402 #ifdef DEBUG 403 if (ldebug(rt_sendsig)) 404 printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"), 405 frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp, 406 td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask); 407 #endif 408 409 if (copyout(&frame, fp, sizeof(frame)) != 0) { 410 /* 411 * Process has trashed its stack; give it an illegal 412 * instruction to halt it in its tracks. 413 */ 414 #ifdef DEBUG 415 if (ldebug(rt_sendsig)) 416 printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"), 417 fp, oonstack); 418 #endif 419 PROC_LOCK(p); 420 sigexit(td, SIGILL); 421 } 422 423 /* 424 * Build context to run handler in. 425 */ 426 regs->tf_rsp = PTROUT(fp); 427 regs->tf_rip = linux32_rt_sigcode; 428 regs->tf_rflags &= ~(PSL_T | PSL_D); 429 regs->tf_cs = _ucode32sel; 430 regs->tf_ss = _udatasel; 431 regs->tf_ds = _udatasel; 432 regs->tf_es = _udatasel; 433 regs->tf_fs = _ufssel; 434 regs->tf_gs = _ugssel; 435 regs->tf_flags = TF_HASSEGS; 436 set_pcb_flags(td->td_pcb, PCB_FULL_IRET); 437 PROC_LOCK(p); 438 mtx_lock(&psp->ps_mtx); 439 } 440 441 442 /* 443 * Send an interrupt to process. 444 * 445 * Stack is set up to allow sigcode stored 446 * in u. to call routine, followed by kcall 447 * to sigreturn routine below. After sigreturn 448 * resets the signal mask, the stack, and the 449 * frame pointer, it returns to the user 450 * specified pc, psl. 451 */ 452 static void 453 linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) 454 { 455 struct thread *td = curthread; 456 struct proc *p = td->td_proc; 457 struct sigacts *psp; 458 struct trapframe *regs; 459 struct l_sigframe *fp, frame; 460 l_sigset_t lmask; 461 int oonstack, i; 462 int sig, code; 463 464 sig = ksi->ksi_signo; 465 code = ksi->ksi_code; 466 PROC_LOCK_ASSERT(p, MA_OWNED); 467 psp = p->p_sigacts; 468 mtx_assert(&psp->ps_mtx, MA_OWNED); 469 if (SIGISMEMBER(psp->ps_siginfo, sig)) { 470 /* Signal handler installed with SA_SIGINFO. */ 471 linux_rt_sendsig(catcher, ksi, mask); 472 return; 473 } 474 475 regs = td->td_frame; 476 oonstack = sigonstack(regs->tf_rsp); 477 478 #ifdef DEBUG 479 if (ldebug(sendsig)) 480 printf(ARGS(sendsig, "%p, %d, %p, %u"), 481 catcher, sig, (void*)mask, code); 482 #endif 483 484 /* 485 * Allocate space for the signal handler context. 486 */ 487 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && 488 SIGISMEMBER(psp->ps_sigonstack, sig)) { 489 fp = (struct l_sigframe *)(td->td_sigstk.ss_sp + 490 td->td_sigstk.ss_size - sizeof(struct l_sigframe)); 491 } else 492 fp = (struct l_sigframe *)regs->tf_rsp - 1; 493 mtx_unlock(&psp->ps_mtx); 494 PROC_UNLOCK(p); 495 496 /* 497 * Build the argument list for the signal handler. 498 */ 499 if (p->p_sysent->sv_sigtbl) 500 if (sig <= p->p_sysent->sv_sigsize) 501 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 502 503 bzero(&frame, sizeof(frame)); 504 505 frame.sf_handler = PTROUT(catcher); 506 frame.sf_sig = sig; 507 508 bsd_to_linux_sigset(mask, &lmask); 509 510 /* 511 * Build the signal context to be used by sigreturn. 512 */ 513 frame.sf_sc.sc_mask = lmask.__bits[0]; 514 frame.sf_sc.sc_gs = regs->tf_gs; 515 frame.sf_sc.sc_fs = regs->tf_fs; 516 frame.sf_sc.sc_es = regs->tf_es; 517 frame.sf_sc.sc_ds = regs->tf_ds; 518 frame.sf_sc.sc_edi = regs->tf_rdi; 519 frame.sf_sc.sc_esi = regs->tf_rsi; 520 frame.sf_sc.sc_ebp = regs->tf_rbp; 521 frame.sf_sc.sc_ebx = regs->tf_rbx; 522 frame.sf_sc.sc_esp = regs->tf_rsp; 523 frame.sf_sc.sc_edx = regs->tf_rdx; 524 frame.sf_sc.sc_ecx = regs->tf_rcx; 525 frame.sf_sc.sc_eax = regs->tf_rax; 526 frame.sf_sc.sc_eip = regs->tf_rip; 527 frame.sf_sc.sc_cs = regs->tf_cs; 528 frame.sf_sc.sc_eflags = regs->tf_rflags; 529 frame.sf_sc.sc_esp_at_signal = regs->tf_rsp; 530 frame.sf_sc.sc_ss = regs->tf_ss; 531 frame.sf_sc.sc_err = regs->tf_err; 532 frame.sf_sc.sc_cr2 = (u_int32_t)(uintptr_t)ksi->ksi_addr; 533 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code); 534 535 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++) 536 frame.sf_extramask[i] = lmask.__bits[i+1]; 537 538 if (copyout(&frame, fp, sizeof(frame)) != 0) { 539 /* 540 * Process has trashed its stack; give it an illegal 541 * instruction to halt it in its tracks. 542 */ 543 PROC_LOCK(p); 544 sigexit(td, SIGILL); 545 } 546 547 /* 548 * Build context to run handler in. 549 */ 550 regs->tf_rsp = PTROUT(fp); 551 regs->tf_rip = linux32_sigcode; 552 regs->tf_rflags &= ~(PSL_T | PSL_D); 553 regs->tf_cs = _ucode32sel; 554 regs->tf_ss = _udatasel; 555 regs->tf_ds = _udatasel; 556 regs->tf_es = _udatasel; 557 regs->tf_fs = _ufssel; 558 regs->tf_gs = _ugssel; 559 regs->tf_flags = TF_HASSEGS; 560 set_pcb_flags(td->td_pcb, PCB_FULL_IRET); 561 PROC_LOCK(p); 562 mtx_lock(&psp->ps_mtx); 563 } 564 565 /* 566 * System call to cleanup state after a signal 567 * has been taken. Reset signal mask and 568 * stack state from context left by sendsig (above). 569 * Return to previous pc and psl as specified by 570 * context left by sendsig. Check carefully to 571 * make sure that the user has not modified the 572 * psl to gain improper privileges or to cause 573 * a machine fault. 574 */ 575 int 576 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args) 577 { 578 struct l_sigframe frame; 579 struct trapframe *regs; 580 sigset_t bmask; 581 l_sigset_t lmask; 582 int eflags, i; 583 ksiginfo_t ksi; 584 585 regs = td->td_frame; 586 587 #ifdef DEBUG 588 if (ldebug(sigreturn)) 589 printf(ARGS(sigreturn, "%p"), (void *)args->sfp); 590 #endif 591 /* 592 * The trampoline code hands us the sigframe. 593 * It is unsafe to keep track of it ourselves, in the event that a 594 * program jumps out of a signal handler. 595 */ 596 if (copyin(args->sfp, &frame, sizeof(frame)) != 0) 597 return (EFAULT); 598 599 /* 600 * Check for security violations. 601 */ 602 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 603 eflags = frame.sf_sc.sc_eflags; 604 if (!EFLAGS_SECURE(eflags, regs->tf_rflags)) 605 return(EINVAL); 606 607 /* 608 * Don't allow users to load a valid privileged %cs. Let the 609 * hardware check for invalid selectors, excess privilege in 610 * other selectors, invalid %eip's and invalid %esp's. 611 */ 612 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 613 if (!CS_SECURE(frame.sf_sc.sc_cs)) { 614 ksiginfo_init_trap(&ksi); 615 ksi.ksi_signo = SIGBUS; 616 ksi.ksi_code = BUS_OBJERR; 617 ksi.ksi_trapno = T_PROTFLT; 618 ksi.ksi_addr = (void *)regs->tf_rip; 619 trapsignal(td, &ksi); 620 return(EINVAL); 621 } 622 623 lmask.__bits[0] = frame.sf_sc.sc_mask; 624 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++) 625 lmask.__bits[i+1] = frame.sf_extramask[i]; 626 linux_to_bsd_sigset(&lmask, &bmask); 627 kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0); 628 629 /* 630 * Restore signal context. 631 */ 632 regs->tf_rdi = frame.sf_sc.sc_edi; 633 regs->tf_rsi = frame.sf_sc.sc_esi; 634 regs->tf_rbp = frame.sf_sc.sc_ebp; 635 regs->tf_rbx = frame.sf_sc.sc_ebx; 636 regs->tf_rdx = frame.sf_sc.sc_edx; 637 regs->tf_rcx = frame.sf_sc.sc_ecx; 638 regs->tf_rax = frame.sf_sc.sc_eax; 639 regs->tf_rip = frame.sf_sc.sc_eip; 640 regs->tf_cs = frame.sf_sc.sc_cs; 641 regs->tf_ds = frame.sf_sc.sc_ds; 642 regs->tf_es = frame.sf_sc.sc_es; 643 regs->tf_fs = frame.sf_sc.sc_fs; 644 regs->tf_gs = frame.sf_sc.sc_gs; 645 regs->tf_rflags = eflags; 646 regs->tf_rsp = frame.sf_sc.sc_esp_at_signal; 647 regs->tf_ss = frame.sf_sc.sc_ss; 648 set_pcb_flags(td->td_pcb, PCB_FULL_IRET); 649 650 return (EJUSTRETURN); 651 } 652 653 /* 654 * System call to cleanup state after a signal 655 * has been taken. Reset signal mask and 656 * stack state from context left by rt_sendsig (above). 657 * Return to previous pc and psl as specified by 658 * context left by sendsig. Check carefully to 659 * make sure that the user has not modified the 660 * psl to gain improper privileges or to cause 661 * a machine fault. 662 */ 663 int 664 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args) 665 { 666 struct l_ucontext uc; 667 struct l_sigcontext *context; 668 sigset_t bmask; 669 l_stack_t *lss; 670 stack_t ss; 671 struct trapframe *regs; 672 int eflags; 673 ksiginfo_t ksi; 674 675 regs = td->td_frame; 676 677 #ifdef DEBUG 678 if (ldebug(rt_sigreturn)) 679 printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp); 680 #endif 681 /* 682 * The trampoline code hands us the ucontext. 683 * It is unsafe to keep track of it ourselves, in the event that a 684 * program jumps out of a signal handler. 685 */ 686 if (copyin(args->ucp, &uc, sizeof(uc)) != 0) 687 return (EFAULT); 688 689 context = &uc.uc_mcontext; 690 691 /* 692 * Check for security violations. 693 */ 694 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 695 eflags = context->sc_eflags; 696 if (!EFLAGS_SECURE(eflags, regs->tf_rflags)) 697 return(EINVAL); 698 699 /* 700 * Don't allow users to load a valid privileged %cs. Let the 701 * hardware check for invalid selectors, excess privilege in 702 * other selectors, invalid %eip's and invalid %esp's. 703 */ 704 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 705 if (!CS_SECURE(context->sc_cs)) { 706 ksiginfo_init_trap(&ksi); 707 ksi.ksi_signo = SIGBUS; 708 ksi.ksi_code = BUS_OBJERR; 709 ksi.ksi_trapno = T_PROTFLT; 710 ksi.ksi_addr = (void *)regs->tf_rip; 711 trapsignal(td, &ksi); 712 return(EINVAL); 713 } 714 715 linux_to_bsd_sigset(&uc.uc_sigmask, &bmask); 716 kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0); 717 718 /* 719 * Restore signal context 720 */ 721 regs->tf_gs = context->sc_gs; 722 regs->tf_fs = context->sc_fs; 723 regs->tf_es = context->sc_es; 724 regs->tf_ds = context->sc_ds; 725 regs->tf_rdi = context->sc_edi; 726 regs->tf_rsi = context->sc_esi; 727 regs->tf_rbp = context->sc_ebp; 728 regs->tf_rbx = context->sc_ebx; 729 regs->tf_rdx = context->sc_edx; 730 regs->tf_rcx = context->sc_ecx; 731 regs->tf_rax = context->sc_eax; 732 regs->tf_rip = context->sc_eip; 733 regs->tf_cs = context->sc_cs; 734 regs->tf_rflags = eflags; 735 regs->tf_rsp = context->sc_esp_at_signal; 736 regs->tf_ss = context->sc_ss; 737 set_pcb_flags(td->td_pcb, PCB_FULL_IRET); 738 739 /* 740 * call sigaltstack & ignore results.. 741 */ 742 lss = &uc.uc_stack; 743 ss.ss_sp = PTRIN(lss->ss_sp); 744 ss.ss_size = lss->ss_size; 745 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags); 746 747 #ifdef DEBUG 748 if (ldebug(rt_sigreturn)) 749 printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"), 750 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask); 751 #endif 752 (void)kern_sigaltstack(td, &ss, NULL); 753 754 return (EJUSTRETURN); 755 } 756 757 static int 758 linux32_fetch_syscall_args(struct thread *td, struct syscall_args *sa) 759 { 760 struct proc *p; 761 struct trapframe *frame; 762 763 p = td->td_proc; 764 frame = td->td_frame; 765 766 sa->args[0] = frame->tf_rbx; 767 sa->args[1] = frame->tf_rcx; 768 sa->args[2] = frame->tf_rdx; 769 sa->args[3] = frame->tf_rsi; 770 sa->args[4] = frame->tf_rdi; 771 sa->args[5] = frame->tf_rbp; /* Unconfirmed */ 772 sa->code = frame->tf_rax; 773 774 if (sa->code >= p->p_sysent->sv_size) 775 sa->callp = &p->p_sysent->sv_table[0]; 776 else 777 sa->callp = &p->p_sysent->sv_table[sa->code]; 778 sa->narg = sa->callp->sy_narg; 779 780 td->td_retval[0] = 0; 781 td->td_retval[1] = frame->tf_rdx; 782 783 return (0); 784 } 785 786 /* 787 * If a linux binary is exec'ing something, try this image activator 788 * first. We override standard shell script execution in order to 789 * be able to modify the interpreter path. We only do this if a linux 790 * binary is doing the exec, so we do not create an EXEC module for it. 791 */ 792 static int exec_linux_imgact_try(struct image_params *iparams); 793 794 static int 795 exec_linux_imgact_try(struct image_params *imgp) 796 { 797 const char *head = (const char *)imgp->image_header; 798 char *rpath; 799 int error = -1; 800 801 /* 802 * The interpreter for shell scripts run from a linux binary needs 803 * to be located in /compat/linux if possible in order to recursively 804 * maintain linux path emulation. 805 */ 806 if (((const short *)head)[0] == SHELLMAGIC) { 807 /* 808 * Run our normal shell image activator. If it succeeds attempt 809 * to use the alternate path for the interpreter. If an 810 * alternate * path is found, use our stringspace to store it. 811 */ 812 if ((error = exec_shell_imgact(imgp)) == 0) { 813 linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc), 814 imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0, 815 AT_FDCWD); 816 if (rpath != NULL) 817 imgp->args->fname_buf = 818 imgp->interpreter_name = rpath; 819 } 820 } 821 return (error); 822 } 823 824 /* 825 * Clear registers on exec 826 * XXX copied from ia32_signal.c. 827 */ 828 static void 829 exec_linux_setregs(struct thread *td, struct image_params *imgp, u_long stack) 830 { 831 struct trapframe *regs = td->td_frame; 832 struct pcb *pcb = td->td_pcb; 833 834 mtx_lock(&dt_lock); 835 if (td->td_proc->p_md.md_ldt != NULL) 836 user_ldt_free(td); 837 else 838 mtx_unlock(&dt_lock); 839 840 critical_enter(); 841 wrmsr(MSR_FSBASE, 0); 842 wrmsr(MSR_KGSBASE, 0); /* User value while we're in the kernel */ 843 pcb->pcb_fsbase = 0; 844 pcb->pcb_gsbase = 0; 845 critical_exit(); 846 pcb->pcb_initial_fpucw = __LINUX_NPXCW__; 847 848 bzero((char *)regs, sizeof(struct trapframe)); 849 regs->tf_rip = imgp->entry_addr; 850 regs->tf_rsp = stack; 851 regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T); 852 regs->tf_gs = _ugssel; 853 regs->tf_fs = _ufssel; 854 regs->tf_es = _udatasel; 855 regs->tf_ds = _udatasel; 856 regs->tf_ss = _udatasel; 857 regs->tf_flags = TF_HASSEGS; 858 regs->tf_cs = _ucode32sel; 859 regs->tf_rbx = imgp->ps_strings; 860 861 fpstate_drop(td); 862 863 /* Do full restore on return so that we can change to a different %cs */ 864 set_pcb_flags(pcb, PCB_32BIT | PCB_FULL_IRET); 865 td->td_retval[1] = 0; 866 } 867 868 /* 869 * XXX copied from ia32_sysvec.c. 870 */ 871 static register_t * 872 linux_copyout_strings(struct image_params *imgp) 873 { 874 int argc, envc; 875 u_int32_t *vectp; 876 char *stringp, *destp; 877 u_int32_t *stack_base; 878 struct linux32_ps_strings *arginfo; 879 880 /* 881 * Calculate string base and vector table pointers. 882 */ 883 arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS; 884 destp = (caddr_t)arginfo - SPARE_USRSPACE - 885 roundup((ARG_MAX - imgp->args->stringspace), sizeof(char *)); 886 887 /* 888 * If we have a valid auxargs ptr, prepare some room 889 * on the stack. 890 */ 891 if (imgp->auxargs) { 892 /* 893 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for 894 * lower compatibility. 895 */ 896 imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size : 897 (LINUX_AT_COUNT * 2); 898 /* 899 * The '+ 2' is for the null pointers at the end of each of 900 * the arg and env vector sets,and imgp->auxarg_size is room 901 * for argument of Runtime loader. 902 */ 903 vectp = (u_int32_t *) (destp - (imgp->args->argc + 904 imgp->args->envc + 2 + imgp->auxarg_size) * 905 sizeof(u_int32_t)); 906 907 } else 908 /* 909 * The '+ 2' is for the null pointers at the end of each of 910 * the arg and env vector sets 911 */ 912 vectp = (u_int32_t *)(destp - (imgp->args->argc + 913 imgp->args->envc + 2) * sizeof(u_int32_t)); 914 915 /* 916 * vectp also becomes our initial stack base 917 */ 918 stack_base = vectp; 919 920 stringp = imgp->args->begin_argv; 921 argc = imgp->args->argc; 922 envc = imgp->args->envc; 923 /* 924 * Copy out strings - arguments and environment. 925 */ 926 copyout(stringp, destp, ARG_MAX - imgp->args->stringspace); 927 928 /* 929 * Fill in "ps_strings" struct for ps, w, etc. 930 */ 931 suword32(&arginfo->ps_argvstr, (uint32_t)(intptr_t)vectp); 932 suword32(&arginfo->ps_nargvstr, argc); 933 934 /* 935 * Fill in argument portion of vector table. 936 */ 937 for (; argc > 0; --argc) { 938 suword32(vectp++, (uint32_t)(intptr_t)destp); 939 while (*stringp++ != 0) 940 destp++; 941 destp++; 942 } 943 944 /* a null vector table pointer separates the argp's from the envp's */ 945 suword32(vectp++, 0); 946 947 suword32(&arginfo->ps_envstr, (uint32_t)(intptr_t)vectp); 948 suword32(&arginfo->ps_nenvstr, envc); 949 950 /* 951 * Fill in environment portion of vector table. 952 */ 953 for (; envc > 0; --envc) { 954 suword32(vectp++, (uint32_t)(intptr_t)destp); 955 while (*stringp++ != 0) 956 destp++; 957 destp++; 958 } 959 960 /* end of vector table is a null pointer */ 961 suword32(vectp, 0); 962 963 return ((register_t *)stack_base); 964 } 965 966 static SYSCTL_NODE(_compat, OID_AUTO, linux32, CTLFLAG_RW, 0, 967 "32-bit Linux emulation"); 968 969 static u_long linux32_maxdsiz = LINUX32_MAXDSIZ; 970 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxdsiz, CTLFLAG_RW, 971 &linux32_maxdsiz, 0, ""); 972 static u_long linux32_maxssiz = LINUX32_MAXSSIZ; 973 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxssiz, CTLFLAG_RW, 974 &linux32_maxssiz, 0, ""); 975 static u_long linux32_maxvmem = LINUX32_MAXVMEM; 976 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxvmem, CTLFLAG_RW, 977 &linux32_maxvmem, 0, ""); 978 979 static void 980 linux32_fixlimit(struct rlimit *rl, int which) 981 { 982 983 switch (which) { 984 case RLIMIT_DATA: 985 if (linux32_maxdsiz != 0) { 986 if (rl->rlim_cur > linux32_maxdsiz) 987 rl->rlim_cur = linux32_maxdsiz; 988 if (rl->rlim_max > linux32_maxdsiz) 989 rl->rlim_max = linux32_maxdsiz; 990 } 991 break; 992 case RLIMIT_STACK: 993 if (linux32_maxssiz != 0) { 994 if (rl->rlim_cur > linux32_maxssiz) 995 rl->rlim_cur = linux32_maxssiz; 996 if (rl->rlim_max > linux32_maxssiz) 997 rl->rlim_max = linux32_maxssiz; 998 } 999 break; 1000 case RLIMIT_VMEM: 1001 if (linux32_maxvmem != 0) { 1002 if (rl->rlim_cur > linux32_maxvmem) 1003 rl->rlim_cur = linux32_maxvmem; 1004 if (rl->rlim_max > linux32_maxvmem) 1005 rl->rlim_max = linux32_maxvmem; 1006 } 1007 break; 1008 } 1009 } 1010 1011 struct sysentvec elf_linux_sysvec = { 1012 .sv_size = LINUX_SYS_MAXSYSCALL, 1013 .sv_table = linux_sysent, 1014 .sv_mask = 0, 1015 .sv_sigsize = LINUX_SIGTBLSZ, 1016 .sv_sigtbl = bsd_to_linux_signal, 1017 .sv_errsize = ELAST + 1, 1018 .sv_errtbl = bsd_to_linux_errno, 1019 .sv_transtrap = translate_traps, 1020 .sv_fixup = elf_linux_fixup, 1021 .sv_sendsig = linux_sendsig, 1022 .sv_sigcode = &_binary_linux32_locore_o_start, 1023 .sv_szsigcode = &linux_szsigcode, 1024 .sv_prepsyscall = NULL, 1025 .sv_name = "Linux ELF32", 1026 .sv_coredump = elf32_coredump, 1027 .sv_imgact_try = exec_linux_imgact_try, 1028 .sv_minsigstksz = LINUX_MINSIGSTKSZ, 1029 .sv_pagesize = PAGE_SIZE, 1030 .sv_minuser = VM_MIN_ADDRESS, 1031 .sv_maxuser = LINUX32_MAXUSER, 1032 .sv_usrstack = LINUX32_USRSTACK, 1033 .sv_psstrings = LINUX32_PS_STRINGS, 1034 .sv_stackprot = VM_PROT_ALL, 1035 .sv_copyout_strings = linux_copyout_strings, 1036 .sv_setregs = exec_linux_setregs, 1037 .sv_fixlimit = linux32_fixlimit, 1038 .sv_maxssiz = &linux32_maxssiz, 1039 .sv_flags = SV_ABI_LINUX | SV_ILP32 | SV_IA32 | SV_SHP, 1040 .sv_set_syscall_retval = cpu_set_syscall_retval, 1041 .sv_fetch_syscall_args = linux32_fetch_syscall_args, 1042 .sv_syscallnames = NULL, 1043 .sv_shared_page_base = LINUX32_SHAREDPAGE, 1044 .sv_shared_page_len = PAGE_SIZE, 1045 .sv_schedtail = linux_schedtail, 1046 .sv_thread_detach = linux_thread_detach, 1047 }; 1048 1049 static void 1050 linux_vdso_install(void *param) 1051 { 1052 1053 linux_szsigcode = (&_binary_linux32_locore_o_end - 1054 &_binary_linux32_locore_o_start); 1055 1056 if (linux_szsigcode > elf_linux_sysvec.sv_shared_page_len) 1057 panic("Linux invalid vdso size\n"); 1058 1059 __elfN(linux_vdso_fixup)(&elf_linux_sysvec); 1060 1061 linux_shared_page_obj = __elfN(linux_shared_page_init) 1062 (&linux_shared_page_mapping); 1063 1064 __elfN(linux_vdso_reloc)(&elf_linux_sysvec, LINUX32_SHAREDPAGE); 1065 1066 bcopy(elf_linux_sysvec.sv_sigcode, linux_shared_page_mapping, 1067 linux_szsigcode); 1068 elf_linux_sysvec.sv_shared_page_obj = linux_shared_page_obj; 1069 1070 linux_kplatform = linux_shared_page_mapping + 1071 (linux_platform - (caddr_t)LINUX32_SHAREDPAGE); 1072 } 1073 SYSINIT(elf_linux_vdso_init, SI_SUB_EXEC, SI_ORDER_ANY, 1074 (sysinit_cfunc_t)linux_vdso_install, NULL); 1075 1076 static void 1077 linux_vdso_deinstall(void *param) 1078 { 1079 1080 __elfN(linux_shared_page_fini)(linux_shared_page_obj); 1081 }; 1082 SYSUNINIT(elf_linux_vdso_uninit, SI_SUB_EXEC, SI_ORDER_FIRST, 1083 (sysinit_cfunc_t)linux_vdso_deinstall, NULL); 1084 1085 static char GNU_ABI_VENDOR[] = "GNU"; 1086 static int GNULINUX_ABI_DESC = 0; 1087 1088 static boolean_t 1089 linux32_trans_osrel(const Elf_Note *note, int32_t *osrel) 1090 { 1091 const Elf32_Word *desc; 1092 uintptr_t p; 1093 1094 p = (uintptr_t)(note + 1); 1095 p += roundup2(note->n_namesz, sizeof(Elf32_Addr)); 1096 1097 desc = (const Elf32_Word *)p; 1098 if (desc[0] != GNULINUX_ABI_DESC) 1099 return (FALSE); 1100 1101 /* 1102 * For linux we encode osrel as follows (see linux_mib.c): 1103 * VVVMMMIII (version, major, minor), see linux_mib.c. 1104 */ 1105 *osrel = desc[1] * 1000000 + desc[2] * 1000 + desc[3]; 1106 1107 return (TRUE); 1108 } 1109 1110 static Elf_Brandnote linux32_brandnote = { 1111 .hdr.n_namesz = sizeof(GNU_ABI_VENDOR), 1112 .hdr.n_descsz = 16, /* XXX at least 16 */ 1113 .hdr.n_type = 1, 1114 .vendor = GNU_ABI_VENDOR, 1115 .flags = BN_TRANSLATE_OSREL, 1116 .trans_osrel = linux32_trans_osrel 1117 }; 1118 1119 static Elf32_Brandinfo linux_brand = { 1120 .brand = ELFOSABI_LINUX, 1121 .machine = EM_386, 1122 .compat_3_brand = "Linux", 1123 .emul_path = "/compat/linux", 1124 .interp_path = "/lib/ld-linux.so.1", 1125 .sysvec = &elf_linux_sysvec, 1126 .interp_newpath = NULL, 1127 .brand_note = &linux32_brandnote, 1128 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE 1129 }; 1130 1131 static Elf32_Brandinfo linux_glibc2brand = { 1132 .brand = ELFOSABI_LINUX, 1133 .machine = EM_386, 1134 .compat_3_brand = "Linux", 1135 .emul_path = "/compat/linux", 1136 .interp_path = "/lib/ld-linux.so.2", 1137 .sysvec = &elf_linux_sysvec, 1138 .interp_newpath = NULL, 1139 .brand_note = &linux32_brandnote, 1140 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE 1141 }; 1142 1143 Elf32_Brandinfo *linux_brandlist[] = { 1144 &linux_brand, 1145 &linux_glibc2brand, 1146 NULL 1147 }; 1148 1149 static int 1150 linux_elf_modevent(module_t mod, int type, void *data) 1151 { 1152 Elf32_Brandinfo **brandinfo; 1153 int error; 1154 struct linux_ioctl_handler **lihp; 1155 struct linux_device_handler **ldhp; 1156 1157 error = 0; 1158 1159 switch(type) { 1160 case MOD_LOAD: 1161 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 1162 ++brandinfo) 1163 if (elf32_insert_brand_entry(*brandinfo) < 0) 1164 error = EINVAL; 1165 if (error == 0) { 1166 SET_FOREACH(lihp, linux_ioctl_handler_set) 1167 linux_ioctl_register_handler(*lihp); 1168 SET_FOREACH(ldhp, linux_device_handler_set) 1169 linux_device_register_handler(*ldhp); 1170 LIST_INIT(&futex_list); 1171 mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF); 1172 linux_exit_tag = EVENTHANDLER_REGISTER(process_exit, 1173 linux_proc_exit, NULL, 1000); 1174 linux_exec_tag = EVENTHANDLER_REGISTER(process_exec, 1175 linux_proc_exec, NULL, 1000); 1176 linux_thread_dtor_tag = EVENTHANDLER_REGISTER(thread_dtor, 1177 linux_thread_dtor, NULL, EVENTHANDLER_PRI_ANY); 1178 linux_osd_jail_register(); 1179 stclohz = (stathz ? stathz : hz); 1180 if (bootverbose) 1181 printf("Linux ELF exec handler installed\n"); 1182 } else 1183 printf("cannot insert Linux ELF brand handler\n"); 1184 break; 1185 case MOD_UNLOAD: 1186 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 1187 ++brandinfo) 1188 if (elf32_brand_inuse(*brandinfo)) 1189 error = EBUSY; 1190 if (error == 0) { 1191 for (brandinfo = &linux_brandlist[0]; 1192 *brandinfo != NULL; ++brandinfo) 1193 if (elf32_remove_brand_entry(*brandinfo) < 0) 1194 error = EINVAL; 1195 } 1196 if (error == 0) { 1197 SET_FOREACH(lihp, linux_ioctl_handler_set) 1198 linux_ioctl_unregister_handler(*lihp); 1199 SET_FOREACH(ldhp, linux_device_handler_set) 1200 linux_device_unregister_handler(*ldhp); 1201 mtx_destroy(&futex_mtx); 1202 EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag); 1203 EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag); 1204 EVENTHANDLER_DEREGISTER(thread_dtor, linux_thread_dtor_tag); 1205 linux_osd_jail_deregister(); 1206 if (bootverbose) 1207 printf("Linux ELF exec handler removed\n"); 1208 } else 1209 printf("Could not deinstall ELF interpreter entry\n"); 1210 break; 1211 default: 1212 return (EOPNOTSUPP); 1213 } 1214 return (error); 1215 } 1216 1217 static moduledata_t linux_elf_mod = { 1218 "linuxelf", 1219 linux_elf_modevent, 1220 0 1221 }; 1222 1223 DECLARE_MODULE_TIED(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY); 1224