1 /*- 2 * Copyright (c) 2004 Tim J. Robbins 3 * Copyright (c) 2003 Peter Wemm 4 * Copyright (c) 2002 Doug Rabson 5 * Copyright (c) 1998-1999 Andrew Gallatin 6 * Copyright (c) 1994-1996 Søren Schmidt 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer 14 * in this position and unchanged. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. The name of the author may not be used to endorse or promote products 19 * derived from this software without specific prior written permission 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 26 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 30 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 #include <sys/cdefs.h> 34 __FBSDID("$FreeBSD$"); 35 #include "opt_compat.h" 36 37 #ifndef COMPAT_FREEBSD32 38 #error "Unable to compile Linux-emulator due to missing COMPAT_FREEBSD32 option!" 39 #endif 40 41 #define __ELF_WORD_SIZE 32 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/exec.h> 46 #include <sys/fcntl.h> 47 #include <sys/imgact.h> 48 #include <sys/imgact_elf.h> 49 #include <sys/kernel.h> 50 #include <sys/lock.h> 51 #include <sys/malloc.h> 52 #include <sys/module.h> 53 #include <sys/mutex.h> 54 #include <sys/proc.h> 55 #include <sys/resourcevar.h> 56 #include <sys/signalvar.h> 57 #include <sys/sysctl.h> 58 #include <sys/syscallsubr.h> 59 #include <sys/sysent.h> 60 #include <sys/sysproto.h> 61 #include <sys/vnode.h> 62 #include <sys/eventhandler.h> 63 64 #include <vm/vm.h> 65 #include <vm/pmap.h> 66 #include <vm/vm_extern.h> 67 #include <vm/vm_map.h> 68 #include <vm/vm_object.h> 69 #include <vm/vm_page.h> 70 #include <vm/vm_param.h> 71 72 #include <machine/cpu.h> 73 #include <machine/md_var.h> 74 #include <machine/pcb.h> 75 #include <machine/specialreg.h> 76 77 #include <amd64/linux32/linux.h> 78 #include <amd64/linux32/linux32_proto.h> 79 #include <compat/linux/linux_emul.h> 80 #include <compat/linux/linux_futex.h> 81 #include <compat/linux/linux_ioctl.h> 82 #include <compat/linux/linux_mib.h> 83 #include <compat/linux/linux_misc.h> 84 #include <compat/linux/linux_signal.h> 85 #include <compat/linux/linux_util.h> 86 #include <compat/linux/linux_vdso.h> 87 88 MODULE_VERSION(linux, 1); 89 90 #define AUXARGS_ENTRY_32(pos, id, val) \ 91 do { \ 92 suword32(pos++, id); \ 93 suword32(pos++, val); \ 94 } while (0) 95 96 #if BYTE_ORDER == LITTLE_ENDIAN 97 #define SHELLMAGIC 0x2123 /* #! */ 98 #else 99 #define SHELLMAGIC 0x2321 100 #endif 101 102 /* 103 * Allow the sendsig functions to use the ldebug() facility 104 * even though they are not syscalls themselves. Map them 105 * to syscall 0. This is slightly less bogus than using 106 * ldebug(sigreturn). 107 */ 108 #define LINUX_SYS_linux_rt_sendsig 0 109 #define LINUX_SYS_linux_sendsig 0 110 111 const char *linux_kplatform; 112 static int linux_szsigcode; 113 static vm_object_t linux_shared_page_obj; 114 static char *linux_shared_page_mapping; 115 extern char _binary_linux32_locore_o_start; 116 extern char _binary_linux32_locore_o_end; 117 118 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL]; 119 120 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler); 121 122 static int elf_linux_fixup(register_t **stack_base, 123 struct image_params *iparams); 124 static register_t *linux_copyout_strings(struct image_params *imgp); 125 static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask); 126 static void exec_linux_setregs(struct thread *td, 127 struct image_params *imgp, u_long stack); 128 static void linux32_fixlimit(struct rlimit *rl, int which); 129 static boolean_t linux32_trans_osrel(const Elf_Note *note, int32_t *osrel); 130 static void linux_vdso_install(void *param); 131 static void linux_vdso_deinstall(void *param); 132 133 static eventhandler_tag linux_exit_tag; 134 static eventhandler_tag linux_exec_tag; 135 static eventhandler_tag linux_thread_dtor_tag; 136 137 /* 138 * Linux syscalls return negative errno's, we do positive and map them 139 * Reference: 140 * FreeBSD: src/sys/sys/errno.h 141 * Linux: linux-2.6.17.8/include/asm-generic/errno-base.h 142 * linux-2.6.17.8/include/asm-generic/errno.h 143 */ 144 static int bsd_to_linux_errno[ELAST + 1] = { 145 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9, 146 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19, 147 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, 148 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89, 149 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99, 150 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109, 151 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122, 152 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9, 153 -6, -6, -43, -42, -75,-125, -84, -95, -16, -74, 154 -72, -67, -71 155 }; 156 157 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = { 158 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL, 159 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE, 160 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS, 161 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG, 162 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD, 163 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU, 164 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH, 165 0, LINUX_SIGUSR1, LINUX_SIGUSR2 166 }; 167 168 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = { 169 SIGHUP, SIGINT, SIGQUIT, SIGILL, 170 SIGTRAP, SIGABRT, SIGBUS, SIGFPE, 171 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2, 172 SIGPIPE, SIGALRM, SIGTERM, SIGBUS, 173 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP, 174 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU, 175 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH, 176 SIGIO, SIGURG, SIGSYS 177 }; 178 179 #define LINUX_T_UNKNOWN 255 180 static int _bsd_to_linux_trapcode[] = { 181 LINUX_T_UNKNOWN, /* 0 */ 182 6, /* 1 T_PRIVINFLT */ 183 LINUX_T_UNKNOWN, /* 2 */ 184 3, /* 3 T_BPTFLT */ 185 LINUX_T_UNKNOWN, /* 4 */ 186 LINUX_T_UNKNOWN, /* 5 */ 187 16, /* 6 T_ARITHTRAP */ 188 254, /* 7 T_ASTFLT */ 189 LINUX_T_UNKNOWN, /* 8 */ 190 13, /* 9 T_PROTFLT */ 191 1, /* 10 T_TRCTRAP */ 192 LINUX_T_UNKNOWN, /* 11 */ 193 14, /* 12 T_PAGEFLT */ 194 LINUX_T_UNKNOWN, /* 13 */ 195 17, /* 14 T_ALIGNFLT */ 196 LINUX_T_UNKNOWN, /* 15 */ 197 LINUX_T_UNKNOWN, /* 16 */ 198 LINUX_T_UNKNOWN, /* 17 */ 199 0, /* 18 T_DIVIDE */ 200 2, /* 19 T_NMI */ 201 4, /* 20 T_OFLOW */ 202 5, /* 21 T_BOUND */ 203 7, /* 22 T_DNA */ 204 8, /* 23 T_DOUBLEFLT */ 205 9, /* 24 T_FPOPFLT */ 206 10, /* 25 T_TSSFLT */ 207 11, /* 26 T_SEGNPFLT */ 208 12, /* 27 T_STKFLT */ 209 18, /* 28 T_MCHK */ 210 19, /* 29 T_XMMFLT */ 211 15 /* 30 T_RESERVED */ 212 }; 213 #define bsd_to_linux_trapcode(code) \ 214 ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \ 215 _bsd_to_linux_trapcode[(code)]: \ 216 LINUX_T_UNKNOWN) 217 218 struct linux32_ps_strings { 219 u_int32_t ps_argvstr; /* first of 0 or more argument strings */ 220 u_int ps_nargvstr; /* the number of argument strings */ 221 u_int32_t ps_envstr; /* first of 0 or more environment strings */ 222 u_int ps_nenvstr; /* the number of environment strings */ 223 }; 224 225 LINUX_VDSO_SYM_INTPTR(linux32_sigcode); 226 LINUX_VDSO_SYM_INTPTR(linux32_rt_sigcode); 227 LINUX_VDSO_SYM_INTPTR(linux32_vsyscall); 228 LINUX_VDSO_SYM_CHAR(linux_platform); 229 230 /* 231 * If FreeBSD & Linux have a difference of opinion about what a trap 232 * means, deal with it here. 233 * 234 * MPSAFE 235 */ 236 static int 237 translate_traps(int signal, int trap_code) 238 { 239 if (signal != SIGBUS) 240 return signal; 241 switch (trap_code) { 242 case T_PROTFLT: 243 case T_TSSFLT: 244 case T_DOUBLEFLT: 245 case T_PAGEFLT: 246 return SIGSEGV; 247 default: 248 return signal; 249 } 250 } 251 252 static int 253 elf_linux_fixup(register_t **stack_base, struct image_params *imgp) 254 { 255 Elf32_Auxargs *args; 256 Elf32_Addr *base; 257 Elf32_Addr *pos; 258 struct linux32_ps_strings *arginfo; 259 260 arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS; 261 262 KASSERT(curthread->td_proc == imgp->proc, 263 ("unsafe elf_linux_fixup(), should be curproc")); 264 base = (Elf32_Addr *)*stack_base; 265 args = (Elf32_Auxargs *)imgp->auxargs; 266 pos = base + (imgp->args->argc + imgp->args->envc + 2); 267 268 AUXARGS_ENTRY_32(pos, LINUX_AT_SYSINFO_EHDR, 269 imgp->proc->p_sysent->sv_shared_page_base); 270 AUXARGS_ENTRY_32(pos, LINUX_AT_SYSINFO, linux32_vsyscall); 271 AUXARGS_ENTRY_32(pos, LINUX_AT_HWCAP, cpu_feature); 272 273 /* 274 * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0, 275 * as it has appeared in the 2.4.0-rc7 first time. 276 * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK), 277 * glibc falls back to the hard-coded CLK_TCK value when aux entry 278 * is not present. 279 * Also see linux_times() implementation. 280 */ 281 if (linux_kernver(curthread) >= LINUX_KERNVER_2004000) 282 AUXARGS_ENTRY_32(pos, LINUX_AT_CLKTCK, stclohz); 283 AUXARGS_ENTRY_32(pos, AT_PHDR, args->phdr); 284 AUXARGS_ENTRY_32(pos, AT_PHENT, args->phent); 285 AUXARGS_ENTRY_32(pos, AT_PHNUM, args->phnum); 286 AUXARGS_ENTRY_32(pos, AT_PAGESZ, args->pagesz); 287 AUXARGS_ENTRY_32(pos, AT_FLAGS, args->flags); 288 AUXARGS_ENTRY_32(pos, AT_ENTRY, args->entry); 289 AUXARGS_ENTRY_32(pos, AT_BASE, args->base); 290 AUXARGS_ENTRY_32(pos, LINUX_AT_SECURE, 0); 291 AUXARGS_ENTRY_32(pos, AT_UID, imgp->proc->p_ucred->cr_ruid); 292 AUXARGS_ENTRY_32(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid); 293 AUXARGS_ENTRY_32(pos, AT_GID, imgp->proc->p_ucred->cr_rgid); 294 AUXARGS_ENTRY_32(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid); 295 AUXARGS_ENTRY_32(pos, LINUX_AT_PLATFORM, PTROUT(linux_platform)); 296 if (args->execfd != -1) 297 AUXARGS_ENTRY_32(pos, AT_EXECFD, args->execfd); 298 AUXARGS_ENTRY_32(pos, AT_NULL, 0); 299 300 free(imgp->auxargs, M_TEMP); 301 imgp->auxargs = NULL; 302 303 base--; 304 suword32(base, (uint32_t)imgp->args->argc); 305 *stack_base = (register_t *)base; 306 return (0); 307 } 308 309 static void 310 linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) 311 { 312 struct thread *td = curthread; 313 struct proc *p = td->td_proc; 314 struct sigacts *psp; 315 struct trapframe *regs; 316 struct l_rt_sigframe *fp, frame; 317 int oonstack; 318 int sig; 319 int code; 320 321 sig = ksi->ksi_signo; 322 code = ksi->ksi_code; 323 PROC_LOCK_ASSERT(p, MA_OWNED); 324 psp = p->p_sigacts; 325 mtx_assert(&psp->ps_mtx, MA_OWNED); 326 regs = td->td_frame; 327 oonstack = sigonstack(regs->tf_rsp); 328 329 #ifdef DEBUG 330 if (ldebug(rt_sendsig)) 331 printf(ARGS(rt_sendsig, "%p, %d, %p, %u"), 332 catcher, sig, (void*)mask, code); 333 #endif 334 /* 335 * Allocate space for the signal handler context. 336 */ 337 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && 338 SIGISMEMBER(psp->ps_sigonstack, sig)) { 339 fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp + 340 td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe)); 341 } else 342 fp = (struct l_rt_sigframe *)regs->tf_rsp - 1; 343 mtx_unlock(&psp->ps_mtx); 344 345 /* 346 * Build the argument list for the signal handler. 347 */ 348 if (p->p_sysent->sv_sigtbl) 349 if (sig <= p->p_sysent->sv_sigsize) 350 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 351 352 bzero(&frame, sizeof(frame)); 353 354 frame.sf_handler = PTROUT(catcher); 355 frame.sf_sig = sig; 356 frame.sf_siginfo = PTROUT(&fp->sf_si); 357 frame.sf_ucontext = PTROUT(&fp->sf_sc); 358 359 /* Fill in POSIX parts */ 360 ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig); 361 362 /* 363 * Build the signal context to be used by sigreturn 364 * and libgcc unwind. 365 */ 366 frame.sf_sc.uc_flags = 0; /* XXX ??? */ 367 frame.sf_sc.uc_link = 0; /* XXX ??? */ 368 369 frame.sf_sc.uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp); 370 frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size; 371 frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) 372 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE; 373 PROC_UNLOCK(p); 374 375 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask); 376 377 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0]; 378 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_rdi; 379 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_rsi; 380 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_rbp; 381 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_rbx; 382 frame.sf_sc.uc_mcontext.sc_esp = regs->tf_rsp; 383 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_rdx; 384 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_rcx; 385 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_rax; 386 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_rip; 387 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs; 388 frame.sf_sc.uc_mcontext.sc_gs = regs->tf_gs; 389 frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs; 390 frame.sf_sc.uc_mcontext.sc_es = regs->tf_es; 391 frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds; 392 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_rflags; 393 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_rsp; 394 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss; 395 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err; 396 frame.sf_sc.uc_mcontext.sc_cr2 = (u_int32_t)(uintptr_t)ksi->ksi_addr; 397 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code); 398 399 #ifdef DEBUG 400 if (ldebug(rt_sendsig)) 401 printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"), 402 frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp, 403 td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask); 404 #endif 405 406 if (copyout(&frame, fp, sizeof(frame)) != 0) { 407 /* 408 * Process has trashed its stack; give it an illegal 409 * instruction to halt it in its tracks. 410 */ 411 #ifdef DEBUG 412 if (ldebug(rt_sendsig)) 413 printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"), 414 fp, oonstack); 415 #endif 416 PROC_LOCK(p); 417 sigexit(td, SIGILL); 418 } 419 420 /* 421 * Build context to run handler in. 422 */ 423 regs->tf_rsp = PTROUT(fp); 424 regs->tf_rip = linux32_rt_sigcode; 425 regs->tf_rflags &= ~(PSL_T | PSL_D); 426 regs->tf_cs = _ucode32sel; 427 regs->tf_ss = _udatasel; 428 regs->tf_ds = _udatasel; 429 regs->tf_es = _udatasel; 430 regs->tf_fs = _ufssel; 431 regs->tf_gs = _ugssel; 432 regs->tf_flags = TF_HASSEGS; 433 set_pcb_flags(td->td_pcb, PCB_FULL_IRET); 434 PROC_LOCK(p); 435 mtx_lock(&psp->ps_mtx); 436 } 437 438 439 /* 440 * Send an interrupt to process. 441 * 442 * Stack is set up to allow sigcode stored 443 * in u. to call routine, followed by kcall 444 * to sigreturn routine below. After sigreturn 445 * resets the signal mask, the stack, and the 446 * frame pointer, it returns to the user 447 * specified pc, psl. 448 */ 449 static void 450 linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) 451 { 452 struct thread *td = curthread; 453 struct proc *p = td->td_proc; 454 struct sigacts *psp; 455 struct trapframe *regs; 456 struct l_sigframe *fp, frame; 457 l_sigset_t lmask; 458 int oonstack, i; 459 int sig, code; 460 461 sig = ksi->ksi_signo; 462 code = ksi->ksi_code; 463 PROC_LOCK_ASSERT(p, MA_OWNED); 464 psp = p->p_sigacts; 465 mtx_assert(&psp->ps_mtx, MA_OWNED); 466 if (SIGISMEMBER(psp->ps_siginfo, sig)) { 467 /* Signal handler installed with SA_SIGINFO. */ 468 linux_rt_sendsig(catcher, ksi, mask); 469 return; 470 } 471 472 regs = td->td_frame; 473 oonstack = sigonstack(regs->tf_rsp); 474 475 #ifdef DEBUG 476 if (ldebug(sendsig)) 477 printf(ARGS(sendsig, "%p, %d, %p, %u"), 478 catcher, sig, (void*)mask, code); 479 #endif 480 481 /* 482 * Allocate space for the signal handler context. 483 */ 484 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && 485 SIGISMEMBER(psp->ps_sigonstack, sig)) { 486 fp = (struct l_sigframe *)(td->td_sigstk.ss_sp + 487 td->td_sigstk.ss_size - sizeof(struct l_sigframe)); 488 } else 489 fp = (struct l_sigframe *)regs->tf_rsp - 1; 490 mtx_unlock(&psp->ps_mtx); 491 PROC_UNLOCK(p); 492 493 /* 494 * Build the argument list for the signal handler. 495 */ 496 if (p->p_sysent->sv_sigtbl) 497 if (sig <= p->p_sysent->sv_sigsize) 498 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 499 500 bzero(&frame, sizeof(frame)); 501 502 frame.sf_handler = PTROUT(catcher); 503 frame.sf_sig = sig; 504 505 bsd_to_linux_sigset(mask, &lmask); 506 507 /* 508 * Build the signal context to be used by sigreturn. 509 */ 510 frame.sf_sc.sc_mask = lmask.__bits[0]; 511 frame.sf_sc.sc_gs = regs->tf_gs; 512 frame.sf_sc.sc_fs = regs->tf_fs; 513 frame.sf_sc.sc_es = regs->tf_es; 514 frame.sf_sc.sc_ds = regs->tf_ds; 515 frame.sf_sc.sc_edi = regs->tf_rdi; 516 frame.sf_sc.sc_esi = regs->tf_rsi; 517 frame.sf_sc.sc_ebp = regs->tf_rbp; 518 frame.sf_sc.sc_ebx = regs->tf_rbx; 519 frame.sf_sc.sc_esp = regs->tf_rsp; 520 frame.sf_sc.sc_edx = regs->tf_rdx; 521 frame.sf_sc.sc_ecx = regs->tf_rcx; 522 frame.sf_sc.sc_eax = regs->tf_rax; 523 frame.sf_sc.sc_eip = regs->tf_rip; 524 frame.sf_sc.sc_cs = regs->tf_cs; 525 frame.sf_sc.sc_eflags = regs->tf_rflags; 526 frame.sf_sc.sc_esp_at_signal = regs->tf_rsp; 527 frame.sf_sc.sc_ss = regs->tf_ss; 528 frame.sf_sc.sc_err = regs->tf_err; 529 frame.sf_sc.sc_cr2 = (u_int32_t)(uintptr_t)ksi->ksi_addr; 530 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code); 531 532 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++) 533 frame.sf_extramask[i] = lmask.__bits[i+1]; 534 535 if (copyout(&frame, fp, sizeof(frame)) != 0) { 536 /* 537 * Process has trashed its stack; give it an illegal 538 * instruction to halt it in its tracks. 539 */ 540 PROC_LOCK(p); 541 sigexit(td, SIGILL); 542 } 543 544 /* 545 * Build context to run handler in. 546 */ 547 regs->tf_rsp = PTROUT(fp); 548 regs->tf_rip = linux32_sigcode; 549 regs->tf_rflags &= ~(PSL_T | PSL_D); 550 regs->tf_cs = _ucode32sel; 551 regs->tf_ss = _udatasel; 552 regs->tf_ds = _udatasel; 553 regs->tf_es = _udatasel; 554 regs->tf_fs = _ufssel; 555 regs->tf_gs = _ugssel; 556 regs->tf_flags = TF_HASSEGS; 557 set_pcb_flags(td->td_pcb, PCB_FULL_IRET); 558 PROC_LOCK(p); 559 mtx_lock(&psp->ps_mtx); 560 } 561 562 /* 563 * System call to cleanup state after a signal 564 * has been taken. Reset signal mask and 565 * stack state from context left by sendsig (above). 566 * Return to previous pc and psl as specified by 567 * context left by sendsig. Check carefully to 568 * make sure that the user has not modified the 569 * psl to gain improper privileges or to cause 570 * a machine fault. 571 */ 572 int 573 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args) 574 { 575 struct l_sigframe frame; 576 struct trapframe *regs; 577 sigset_t bmask; 578 l_sigset_t lmask; 579 int eflags, i; 580 ksiginfo_t ksi; 581 582 regs = td->td_frame; 583 584 #ifdef DEBUG 585 if (ldebug(sigreturn)) 586 printf(ARGS(sigreturn, "%p"), (void *)args->sfp); 587 #endif 588 /* 589 * The trampoline code hands us the sigframe. 590 * It is unsafe to keep track of it ourselves, in the event that a 591 * program jumps out of a signal handler. 592 */ 593 if (copyin(args->sfp, &frame, sizeof(frame)) != 0) 594 return (EFAULT); 595 596 /* 597 * Check for security violations. 598 */ 599 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 600 eflags = frame.sf_sc.sc_eflags; 601 if (!EFLAGS_SECURE(eflags, regs->tf_rflags)) 602 return(EINVAL); 603 604 /* 605 * Don't allow users to load a valid privileged %cs. Let the 606 * hardware check for invalid selectors, excess privilege in 607 * other selectors, invalid %eip's and invalid %esp's. 608 */ 609 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 610 if (!CS_SECURE(frame.sf_sc.sc_cs)) { 611 ksiginfo_init_trap(&ksi); 612 ksi.ksi_signo = SIGBUS; 613 ksi.ksi_code = BUS_OBJERR; 614 ksi.ksi_trapno = T_PROTFLT; 615 ksi.ksi_addr = (void *)regs->tf_rip; 616 trapsignal(td, &ksi); 617 return(EINVAL); 618 } 619 620 lmask.__bits[0] = frame.sf_sc.sc_mask; 621 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++) 622 lmask.__bits[i+1] = frame.sf_extramask[i]; 623 linux_to_bsd_sigset(&lmask, &bmask); 624 kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0); 625 626 /* 627 * Restore signal context. 628 */ 629 regs->tf_rdi = frame.sf_sc.sc_edi; 630 regs->tf_rsi = frame.sf_sc.sc_esi; 631 regs->tf_rbp = frame.sf_sc.sc_ebp; 632 regs->tf_rbx = frame.sf_sc.sc_ebx; 633 regs->tf_rdx = frame.sf_sc.sc_edx; 634 regs->tf_rcx = frame.sf_sc.sc_ecx; 635 regs->tf_rax = frame.sf_sc.sc_eax; 636 regs->tf_rip = frame.sf_sc.sc_eip; 637 regs->tf_cs = frame.sf_sc.sc_cs; 638 regs->tf_ds = frame.sf_sc.sc_ds; 639 regs->tf_es = frame.sf_sc.sc_es; 640 regs->tf_fs = frame.sf_sc.sc_fs; 641 regs->tf_gs = frame.sf_sc.sc_gs; 642 regs->tf_rflags = eflags; 643 regs->tf_rsp = frame.sf_sc.sc_esp_at_signal; 644 regs->tf_ss = frame.sf_sc.sc_ss; 645 set_pcb_flags(td->td_pcb, PCB_FULL_IRET); 646 647 return (EJUSTRETURN); 648 } 649 650 /* 651 * System call to cleanup state after a signal 652 * has been taken. Reset signal mask and 653 * stack state from context left by rt_sendsig (above). 654 * Return to previous pc and psl as specified by 655 * context left by sendsig. Check carefully to 656 * make sure that the user has not modified the 657 * psl to gain improper privileges or to cause 658 * a machine fault. 659 */ 660 int 661 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args) 662 { 663 struct l_ucontext uc; 664 struct l_sigcontext *context; 665 sigset_t bmask; 666 l_stack_t *lss; 667 stack_t ss; 668 struct trapframe *regs; 669 int eflags; 670 ksiginfo_t ksi; 671 672 regs = td->td_frame; 673 674 #ifdef DEBUG 675 if (ldebug(rt_sigreturn)) 676 printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp); 677 #endif 678 /* 679 * The trampoline code hands us the ucontext. 680 * It is unsafe to keep track of it ourselves, in the event that a 681 * program jumps out of a signal handler. 682 */ 683 if (copyin(args->ucp, &uc, sizeof(uc)) != 0) 684 return (EFAULT); 685 686 context = &uc.uc_mcontext; 687 688 /* 689 * Check for security violations. 690 */ 691 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 692 eflags = context->sc_eflags; 693 if (!EFLAGS_SECURE(eflags, regs->tf_rflags)) 694 return(EINVAL); 695 696 /* 697 * Don't allow users to load a valid privileged %cs. Let the 698 * hardware check for invalid selectors, excess privilege in 699 * other selectors, invalid %eip's and invalid %esp's. 700 */ 701 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 702 if (!CS_SECURE(context->sc_cs)) { 703 ksiginfo_init_trap(&ksi); 704 ksi.ksi_signo = SIGBUS; 705 ksi.ksi_code = BUS_OBJERR; 706 ksi.ksi_trapno = T_PROTFLT; 707 ksi.ksi_addr = (void *)regs->tf_rip; 708 trapsignal(td, &ksi); 709 return(EINVAL); 710 } 711 712 linux_to_bsd_sigset(&uc.uc_sigmask, &bmask); 713 kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0); 714 715 /* 716 * Restore signal context 717 */ 718 regs->tf_gs = context->sc_gs; 719 regs->tf_fs = context->sc_fs; 720 regs->tf_es = context->sc_es; 721 regs->tf_ds = context->sc_ds; 722 regs->tf_rdi = context->sc_edi; 723 regs->tf_rsi = context->sc_esi; 724 regs->tf_rbp = context->sc_ebp; 725 regs->tf_rbx = context->sc_ebx; 726 regs->tf_rdx = context->sc_edx; 727 regs->tf_rcx = context->sc_ecx; 728 regs->tf_rax = context->sc_eax; 729 regs->tf_rip = context->sc_eip; 730 regs->tf_cs = context->sc_cs; 731 regs->tf_rflags = eflags; 732 regs->tf_rsp = context->sc_esp_at_signal; 733 regs->tf_ss = context->sc_ss; 734 set_pcb_flags(td->td_pcb, PCB_FULL_IRET); 735 736 /* 737 * call sigaltstack & ignore results.. 738 */ 739 lss = &uc.uc_stack; 740 ss.ss_sp = PTRIN(lss->ss_sp); 741 ss.ss_size = lss->ss_size; 742 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags); 743 744 #ifdef DEBUG 745 if (ldebug(rt_sigreturn)) 746 printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"), 747 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask); 748 #endif 749 (void)kern_sigaltstack(td, &ss, NULL); 750 751 return (EJUSTRETURN); 752 } 753 754 static int 755 linux32_fetch_syscall_args(struct thread *td, struct syscall_args *sa) 756 { 757 struct proc *p; 758 struct trapframe *frame; 759 760 p = td->td_proc; 761 frame = td->td_frame; 762 763 sa->args[0] = frame->tf_rbx; 764 sa->args[1] = frame->tf_rcx; 765 sa->args[2] = frame->tf_rdx; 766 sa->args[3] = frame->tf_rsi; 767 sa->args[4] = frame->tf_rdi; 768 sa->args[5] = frame->tf_rbp; /* Unconfirmed */ 769 sa->code = frame->tf_rax; 770 771 if (sa->code >= p->p_sysent->sv_size) 772 sa->callp = &p->p_sysent->sv_table[0]; 773 else 774 sa->callp = &p->p_sysent->sv_table[sa->code]; 775 sa->narg = sa->callp->sy_narg; 776 777 td->td_retval[0] = 0; 778 td->td_retval[1] = frame->tf_rdx; 779 780 return (0); 781 } 782 783 /* 784 * If a linux binary is exec'ing something, try this image activator 785 * first. We override standard shell script execution in order to 786 * be able to modify the interpreter path. We only do this if a linux 787 * binary is doing the exec, so we do not create an EXEC module for it. 788 */ 789 static int exec_linux_imgact_try(struct image_params *iparams); 790 791 static int 792 exec_linux_imgact_try(struct image_params *imgp) 793 { 794 const char *head = (const char *)imgp->image_header; 795 char *rpath; 796 int error = -1; 797 798 /* 799 * The interpreter for shell scripts run from a linux binary needs 800 * to be located in /compat/linux if possible in order to recursively 801 * maintain linux path emulation. 802 */ 803 if (((const short *)head)[0] == SHELLMAGIC) { 804 /* 805 * Run our normal shell image activator. If it succeeds attempt 806 * to use the alternate path for the interpreter. If an 807 * alternate * path is found, use our stringspace to store it. 808 */ 809 if ((error = exec_shell_imgact(imgp)) == 0) { 810 linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc), 811 imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0, 812 AT_FDCWD); 813 if (rpath != NULL) 814 imgp->args->fname_buf = 815 imgp->interpreter_name = rpath; 816 } 817 } 818 return (error); 819 } 820 821 /* 822 * Clear registers on exec 823 * XXX copied from ia32_signal.c. 824 */ 825 static void 826 exec_linux_setregs(struct thread *td, struct image_params *imgp, u_long stack) 827 { 828 struct trapframe *regs = td->td_frame; 829 struct pcb *pcb = td->td_pcb; 830 831 mtx_lock(&dt_lock); 832 if (td->td_proc->p_md.md_ldt != NULL) 833 user_ldt_free(td); 834 else 835 mtx_unlock(&dt_lock); 836 837 critical_enter(); 838 wrmsr(MSR_FSBASE, 0); 839 wrmsr(MSR_KGSBASE, 0); /* User value while we're in the kernel */ 840 pcb->pcb_fsbase = 0; 841 pcb->pcb_gsbase = 0; 842 critical_exit(); 843 pcb->pcb_initial_fpucw = __LINUX_NPXCW__; 844 845 bzero((char *)regs, sizeof(struct trapframe)); 846 regs->tf_rip = imgp->entry_addr; 847 regs->tf_rsp = stack; 848 regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T); 849 regs->tf_gs = _ugssel; 850 regs->tf_fs = _ufssel; 851 regs->tf_es = _udatasel; 852 regs->tf_ds = _udatasel; 853 regs->tf_ss = _udatasel; 854 regs->tf_flags = TF_HASSEGS; 855 regs->tf_cs = _ucode32sel; 856 regs->tf_rbx = imgp->ps_strings; 857 858 fpstate_drop(td); 859 860 /* Do full restore on return so that we can change to a different %cs */ 861 set_pcb_flags(pcb, PCB_32BIT | PCB_FULL_IRET); 862 td->td_retval[1] = 0; 863 } 864 865 /* 866 * XXX copied from ia32_sysvec.c. 867 */ 868 static register_t * 869 linux_copyout_strings(struct image_params *imgp) 870 { 871 int argc, envc; 872 u_int32_t *vectp; 873 char *stringp, *destp; 874 u_int32_t *stack_base; 875 struct linux32_ps_strings *arginfo; 876 877 /* 878 * Calculate string base and vector table pointers. 879 */ 880 arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS; 881 destp = (caddr_t)arginfo - SPARE_USRSPACE - 882 roundup((ARG_MAX - imgp->args->stringspace), sizeof(char *)); 883 884 /* 885 * If we have a valid auxargs ptr, prepare some room 886 * on the stack. 887 */ 888 if (imgp->auxargs) { 889 /* 890 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for 891 * lower compatibility. 892 */ 893 imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size : 894 (LINUX_AT_COUNT * 2); 895 /* 896 * The '+ 2' is for the null pointers at the end of each of 897 * the arg and env vector sets,and imgp->auxarg_size is room 898 * for argument of Runtime loader. 899 */ 900 vectp = (u_int32_t *) (destp - (imgp->args->argc + 901 imgp->args->envc + 2 + imgp->auxarg_size) * 902 sizeof(u_int32_t)); 903 904 } else 905 /* 906 * The '+ 2' is for the null pointers at the end of each of 907 * the arg and env vector sets 908 */ 909 vectp = (u_int32_t *)(destp - (imgp->args->argc + 910 imgp->args->envc + 2) * sizeof(u_int32_t)); 911 912 /* 913 * vectp also becomes our initial stack base 914 */ 915 stack_base = vectp; 916 917 stringp = imgp->args->begin_argv; 918 argc = imgp->args->argc; 919 envc = imgp->args->envc; 920 /* 921 * Copy out strings - arguments and environment. 922 */ 923 copyout(stringp, destp, ARG_MAX - imgp->args->stringspace); 924 925 /* 926 * Fill in "ps_strings" struct for ps, w, etc. 927 */ 928 suword32(&arginfo->ps_argvstr, (uint32_t)(intptr_t)vectp); 929 suword32(&arginfo->ps_nargvstr, argc); 930 931 /* 932 * Fill in argument portion of vector table. 933 */ 934 for (; argc > 0; --argc) { 935 suword32(vectp++, (uint32_t)(intptr_t)destp); 936 while (*stringp++ != 0) 937 destp++; 938 destp++; 939 } 940 941 /* a null vector table pointer separates the argp's from the envp's */ 942 suword32(vectp++, 0); 943 944 suword32(&arginfo->ps_envstr, (uint32_t)(intptr_t)vectp); 945 suword32(&arginfo->ps_nenvstr, envc); 946 947 /* 948 * Fill in environment portion of vector table. 949 */ 950 for (; envc > 0; --envc) { 951 suword32(vectp++, (uint32_t)(intptr_t)destp); 952 while (*stringp++ != 0) 953 destp++; 954 destp++; 955 } 956 957 /* end of vector table is a null pointer */ 958 suword32(vectp, 0); 959 960 return ((register_t *)stack_base); 961 } 962 963 static SYSCTL_NODE(_compat, OID_AUTO, linux32, CTLFLAG_RW, 0, 964 "32-bit Linux emulation"); 965 966 static u_long linux32_maxdsiz = LINUX32_MAXDSIZ; 967 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxdsiz, CTLFLAG_RW, 968 &linux32_maxdsiz, 0, ""); 969 static u_long linux32_maxssiz = LINUX32_MAXSSIZ; 970 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxssiz, CTLFLAG_RW, 971 &linux32_maxssiz, 0, ""); 972 static u_long linux32_maxvmem = LINUX32_MAXVMEM; 973 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxvmem, CTLFLAG_RW, 974 &linux32_maxvmem, 0, ""); 975 976 #if defined(DEBUG) 977 SYSCTL_PROC(_compat_linux32, OID_AUTO, debug, 978 CTLTYPE_STRING | CTLFLAG_RW, 979 0, 0, linux_sysctl_debug, "A", 980 "Linux debugging control"); 981 #endif 982 983 static void 984 linux32_fixlimit(struct rlimit *rl, int which) 985 { 986 987 switch (which) { 988 case RLIMIT_DATA: 989 if (linux32_maxdsiz != 0) { 990 if (rl->rlim_cur > linux32_maxdsiz) 991 rl->rlim_cur = linux32_maxdsiz; 992 if (rl->rlim_max > linux32_maxdsiz) 993 rl->rlim_max = linux32_maxdsiz; 994 } 995 break; 996 case RLIMIT_STACK: 997 if (linux32_maxssiz != 0) { 998 if (rl->rlim_cur > linux32_maxssiz) 999 rl->rlim_cur = linux32_maxssiz; 1000 if (rl->rlim_max > linux32_maxssiz) 1001 rl->rlim_max = linux32_maxssiz; 1002 } 1003 break; 1004 case RLIMIT_VMEM: 1005 if (linux32_maxvmem != 0) { 1006 if (rl->rlim_cur > linux32_maxvmem) 1007 rl->rlim_cur = linux32_maxvmem; 1008 if (rl->rlim_max > linux32_maxvmem) 1009 rl->rlim_max = linux32_maxvmem; 1010 } 1011 break; 1012 } 1013 } 1014 1015 struct sysentvec elf_linux_sysvec = { 1016 .sv_size = LINUX_SYS_MAXSYSCALL, 1017 .sv_table = linux_sysent, 1018 .sv_mask = 0, 1019 .sv_sigsize = LINUX_SIGTBLSZ, 1020 .sv_sigtbl = bsd_to_linux_signal, 1021 .sv_errsize = ELAST + 1, 1022 .sv_errtbl = bsd_to_linux_errno, 1023 .sv_transtrap = translate_traps, 1024 .sv_fixup = elf_linux_fixup, 1025 .sv_sendsig = linux_sendsig, 1026 .sv_sigcode = &_binary_linux32_locore_o_start, 1027 .sv_szsigcode = &linux_szsigcode, 1028 .sv_prepsyscall = NULL, 1029 .sv_name = "Linux ELF32", 1030 .sv_coredump = elf32_coredump, 1031 .sv_imgact_try = exec_linux_imgact_try, 1032 .sv_minsigstksz = LINUX_MINSIGSTKSZ, 1033 .sv_pagesize = PAGE_SIZE, 1034 .sv_minuser = VM_MIN_ADDRESS, 1035 .sv_maxuser = LINUX32_MAXUSER, 1036 .sv_usrstack = LINUX32_USRSTACK, 1037 .sv_psstrings = LINUX32_PS_STRINGS, 1038 .sv_stackprot = VM_PROT_ALL, 1039 .sv_copyout_strings = linux_copyout_strings, 1040 .sv_setregs = exec_linux_setregs, 1041 .sv_fixlimit = linux32_fixlimit, 1042 .sv_maxssiz = &linux32_maxssiz, 1043 .sv_flags = SV_ABI_LINUX | SV_ILP32 | SV_IA32 | SV_SHP, 1044 .sv_set_syscall_retval = cpu_set_syscall_retval, 1045 .sv_fetch_syscall_args = linux32_fetch_syscall_args, 1046 .sv_syscallnames = NULL, 1047 .sv_shared_page_base = LINUX32_SHAREDPAGE, 1048 .sv_shared_page_len = PAGE_SIZE, 1049 .sv_schedtail = linux_schedtail, 1050 .sv_thread_detach = linux_thread_detach, 1051 }; 1052 1053 static void 1054 linux_vdso_install(void *param) 1055 { 1056 1057 linux_szsigcode = (&_binary_linux32_locore_o_end - 1058 &_binary_linux32_locore_o_start); 1059 1060 if (linux_szsigcode > elf_linux_sysvec.sv_shared_page_len) 1061 panic("Linux invalid vdso size\n"); 1062 1063 __elfN(linux_vdso_fixup)(&elf_linux_sysvec); 1064 1065 linux_shared_page_obj = __elfN(linux_shared_page_init) 1066 (&linux_shared_page_mapping); 1067 1068 __elfN(linux_vdso_reloc)(&elf_linux_sysvec, LINUX32_SHAREDPAGE); 1069 1070 bcopy(elf_linux_sysvec.sv_sigcode, linux_shared_page_mapping, 1071 linux_szsigcode); 1072 elf_linux_sysvec.sv_shared_page_obj = linux_shared_page_obj; 1073 1074 linux_kplatform = linux_shared_page_mapping + 1075 (linux_platform - (caddr_t)LINUX32_SHAREDPAGE); 1076 } 1077 SYSINIT(elf_linux_vdso_init, SI_SUB_EXEC, SI_ORDER_ANY, 1078 (sysinit_cfunc_t)linux_vdso_install, NULL); 1079 1080 static void 1081 linux_vdso_deinstall(void *param) 1082 { 1083 1084 __elfN(linux_shared_page_fini)(linux_shared_page_obj); 1085 }; 1086 SYSUNINIT(elf_linux_vdso_uninit, SI_SUB_EXEC, SI_ORDER_FIRST, 1087 (sysinit_cfunc_t)linux_vdso_deinstall, NULL); 1088 1089 static char GNU_ABI_VENDOR[] = "GNU"; 1090 static int GNULINUX_ABI_DESC = 0; 1091 1092 static boolean_t 1093 linux32_trans_osrel(const Elf_Note *note, int32_t *osrel) 1094 { 1095 const Elf32_Word *desc; 1096 uintptr_t p; 1097 1098 p = (uintptr_t)(note + 1); 1099 p += roundup2(note->n_namesz, sizeof(Elf32_Addr)); 1100 1101 desc = (const Elf32_Word *)p; 1102 if (desc[0] != GNULINUX_ABI_DESC) 1103 return (FALSE); 1104 1105 /* 1106 * For linux we encode osrel as follows (see linux_mib.c): 1107 * VVVMMMIII (version, major, minor), see linux_mib.c. 1108 */ 1109 *osrel = desc[1] * 1000000 + desc[2] * 1000 + desc[3]; 1110 1111 return (TRUE); 1112 } 1113 1114 static Elf_Brandnote linux32_brandnote = { 1115 .hdr.n_namesz = sizeof(GNU_ABI_VENDOR), 1116 .hdr.n_descsz = 16, /* XXX at least 16 */ 1117 .hdr.n_type = 1, 1118 .vendor = GNU_ABI_VENDOR, 1119 .flags = BN_TRANSLATE_OSREL, 1120 .trans_osrel = linux32_trans_osrel 1121 }; 1122 1123 static Elf32_Brandinfo linux_brand = { 1124 .brand = ELFOSABI_LINUX, 1125 .machine = EM_386, 1126 .compat_3_brand = "Linux", 1127 .emul_path = "/compat/linux", 1128 .interp_path = "/lib/ld-linux.so.1", 1129 .sysvec = &elf_linux_sysvec, 1130 .interp_newpath = NULL, 1131 .brand_note = &linux32_brandnote, 1132 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE 1133 }; 1134 1135 static Elf32_Brandinfo linux_glibc2brand = { 1136 .brand = ELFOSABI_LINUX, 1137 .machine = EM_386, 1138 .compat_3_brand = "Linux", 1139 .emul_path = "/compat/linux", 1140 .interp_path = "/lib/ld-linux.so.2", 1141 .sysvec = &elf_linux_sysvec, 1142 .interp_newpath = NULL, 1143 .brand_note = &linux32_brandnote, 1144 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE 1145 }; 1146 1147 Elf32_Brandinfo *linux_brandlist[] = { 1148 &linux_brand, 1149 &linux_glibc2brand, 1150 NULL 1151 }; 1152 1153 static int 1154 linux_elf_modevent(module_t mod, int type, void *data) 1155 { 1156 Elf32_Brandinfo **brandinfo; 1157 int error; 1158 struct linux_ioctl_handler **lihp; 1159 1160 error = 0; 1161 1162 switch(type) { 1163 case MOD_LOAD: 1164 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 1165 ++brandinfo) 1166 if (elf32_insert_brand_entry(*brandinfo) < 0) 1167 error = EINVAL; 1168 if (error == 0) { 1169 SET_FOREACH(lihp, linux_ioctl_handler_set) 1170 linux_ioctl_register_handler(*lihp); 1171 LIST_INIT(&futex_list); 1172 mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF); 1173 linux_exit_tag = EVENTHANDLER_REGISTER(process_exit, 1174 linux_proc_exit, NULL, 1000); 1175 linux_exec_tag = EVENTHANDLER_REGISTER(process_exec, 1176 linux_proc_exec, NULL, 1000); 1177 linux_thread_dtor_tag = EVENTHANDLER_REGISTER(thread_dtor, 1178 linux_thread_dtor, NULL, EVENTHANDLER_PRI_ANY); 1179 stclohz = (stathz ? stathz : hz); 1180 if (bootverbose) 1181 printf("Linux ELF exec handler installed\n"); 1182 } else 1183 printf("cannot insert Linux ELF brand handler\n"); 1184 break; 1185 case MOD_UNLOAD: 1186 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 1187 ++brandinfo) 1188 if (elf32_brand_inuse(*brandinfo)) 1189 error = EBUSY; 1190 if (error == 0) { 1191 for (brandinfo = &linux_brandlist[0]; 1192 *brandinfo != NULL; ++brandinfo) 1193 if (elf32_remove_brand_entry(*brandinfo) < 0) 1194 error = EINVAL; 1195 } 1196 if (error == 0) { 1197 SET_FOREACH(lihp, linux_ioctl_handler_set) 1198 linux_ioctl_unregister_handler(*lihp); 1199 mtx_destroy(&futex_mtx); 1200 EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag); 1201 EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag); 1202 EVENTHANDLER_DEREGISTER(thread_dtor, linux_thread_dtor_tag); 1203 if (bootverbose) 1204 printf("Linux ELF exec handler removed\n"); 1205 } else 1206 printf("Could not deinstall ELF interpreter entry\n"); 1207 break; 1208 default: 1209 return (EOPNOTSUPP); 1210 } 1211 return (error); 1212 } 1213 1214 static moduledata_t linux_elf_mod = { 1215 "linuxelf", 1216 linux_elf_modevent, 1217 0 1218 }; 1219 1220 DECLARE_MODULE_TIED(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY); 1221 MODULE_DEPEND(linuxelf, linux_common, 1, 1, 1); 1222