1 /*- 2 * Copyright (c) 2004 Tim J. Robbins 3 * Copyright (c) 2003 Peter Wemm 4 * Copyright (c) 2002 Doug Rabson 5 * Copyright (c) 1998-1999 Andrew Gallatin 6 * Copyright (c) 1994-1996 S�ren Schmidt 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer 14 * in this position and unchanged. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. The name of the author may not be used to endorse or promote products 19 * derived from this software without specific prior written permission 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 26 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 30 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 #include <sys/cdefs.h> 34 __FBSDID("$FreeBSD$"); 35 #include "opt_compat.h" 36 37 #ifndef COMPAT_FREEBSD32 38 #error "Unable to compile Linux-emulator due to missing COMPAT_FREEBSD32 option!" 39 #endif 40 41 #define __ELF_WORD_SIZE 32 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/exec.h> 46 #include <sys/fcntl.h> 47 #include <sys/imgact.h> 48 #include <sys/imgact_elf.h> 49 #include <sys/kernel.h> 50 #include <sys/lock.h> 51 #include <sys/malloc.h> 52 #include <sys/module.h> 53 #include <sys/mutex.h> 54 #include <sys/proc.h> 55 #include <sys/resourcevar.h> 56 #include <sys/signalvar.h> 57 #include <sys/sysctl.h> 58 #include <sys/syscallsubr.h> 59 #include <sys/sysent.h> 60 #include <sys/sysproto.h> 61 #include <sys/vnode.h> 62 #include <sys/eventhandler.h> 63 64 #include <vm/vm.h> 65 #include <vm/pmap.h> 66 #include <vm/vm_extern.h> 67 #include <vm/vm_map.h> 68 #include <vm/vm_object.h> 69 #include <vm/vm_page.h> 70 #include <vm/vm_param.h> 71 72 #include <machine/cpu.h> 73 #include <machine/md_var.h> 74 #include <machine/pcb.h> 75 #include <machine/specialreg.h> 76 77 #include <amd64/linux32/linux.h> 78 #include <amd64/linux32/linux32_proto.h> 79 #include <compat/linux/linux_emul.h> 80 #include <compat/linux/linux_futex.h> 81 #include <compat/linux/linux_mib.h> 82 #include <compat/linux/linux_misc.h> 83 #include <compat/linux/linux_signal.h> 84 #include <compat/linux/linux_util.h> 85 86 MODULE_VERSION(linux, 1); 87 88 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures"); 89 90 #define AUXARGS_ENTRY_32(pos, id, val) \ 91 do { \ 92 suword32(pos++, id); \ 93 suword32(pos++, val); \ 94 } while (0) 95 96 #if BYTE_ORDER == LITTLE_ENDIAN 97 #define SHELLMAGIC 0x2123 /* #! */ 98 #else 99 #define SHELLMAGIC 0x2321 100 #endif 101 102 /* 103 * Allow the sendsig functions to use the ldebug() facility 104 * even though they are not syscalls themselves. Map them 105 * to syscall 0. This is slightly less bogus than using 106 * ldebug(sigreturn). 107 */ 108 #define LINUX_SYS_linux_rt_sendsig 0 109 #define LINUX_SYS_linux_sendsig 0 110 111 const char *linux_platform = "i686"; 112 static int linux_szplatform; 113 extern char linux_sigcode[]; 114 extern int linux_szsigcode; 115 116 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL]; 117 118 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler); 119 SET_DECLARE(linux_device_handler_set, struct linux_device_handler); 120 121 static int elf_linux_fixup(register_t **stack_base, 122 struct image_params *iparams); 123 static register_t *linux_copyout_strings(struct image_params *imgp); 124 static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask); 125 static void exec_linux_setregs(struct thread *td, 126 struct image_params *imgp, u_long stack); 127 static void linux32_fixlimit(struct rlimit *rl, int which); 128 static boolean_t linux32_trans_osrel(const Elf_Note *note, int32_t *osrel); 129 130 static eventhandler_tag linux_exit_tag; 131 static eventhandler_tag linux_exec_tag; 132 133 /* 134 * Linux syscalls return negative errno's, we do positive and map them 135 * Reference: 136 * FreeBSD: src/sys/sys/errno.h 137 * Linux: linux-2.6.17.8/include/asm-generic/errno-base.h 138 * linux-2.6.17.8/include/asm-generic/errno.h 139 */ 140 static int bsd_to_linux_errno[ELAST + 1] = { 141 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9, 142 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19, 143 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, 144 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89, 145 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99, 146 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109, 147 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122, 148 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9, 149 -6, -6, -43, -42, -75,-125, -84, -95, -16, -74, 150 -72, -67, -71 151 }; 152 153 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = { 154 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL, 155 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE, 156 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS, 157 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG, 158 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD, 159 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU, 160 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH, 161 0, LINUX_SIGUSR1, LINUX_SIGUSR2 162 }; 163 164 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = { 165 SIGHUP, SIGINT, SIGQUIT, SIGILL, 166 SIGTRAP, SIGABRT, SIGBUS, SIGFPE, 167 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2, 168 SIGPIPE, SIGALRM, SIGTERM, SIGBUS, 169 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP, 170 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU, 171 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH, 172 SIGIO, SIGURG, SIGSYS 173 }; 174 175 #define LINUX_T_UNKNOWN 255 176 static int _bsd_to_linux_trapcode[] = { 177 LINUX_T_UNKNOWN, /* 0 */ 178 6, /* 1 T_PRIVINFLT */ 179 LINUX_T_UNKNOWN, /* 2 */ 180 3, /* 3 T_BPTFLT */ 181 LINUX_T_UNKNOWN, /* 4 */ 182 LINUX_T_UNKNOWN, /* 5 */ 183 16, /* 6 T_ARITHTRAP */ 184 254, /* 7 T_ASTFLT */ 185 LINUX_T_UNKNOWN, /* 8 */ 186 13, /* 9 T_PROTFLT */ 187 1, /* 10 T_TRCTRAP */ 188 LINUX_T_UNKNOWN, /* 11 */ 189 14, /* 12 T_PAGEFLT */ 190 LINUX_T_UNKNOWN, /* 13 */ 191 17, /* 14 T_ALIGNFLT */ 192 LINUX_T_UNKNOWN, /* 15 */ 193 LINUX_T_UNKNOWN, /* 16 */ 194 LINUX_T_UNKNOWN, /* 17 */ 195 0, /* 18 T_DIVIDE */ 196 2, /* 19 T_NMI */ 197 4, /* 20 T_OFLOW */ 198 5, /* 21 T_BOUND */ 199 7, /* 22 T_DNA */ 200 8, /* 23 T_DOUBLEFLT */ 201 9, /* 24 T_FPOPFLT */ 202 10, /* 25 T_TSSFLT */ 203 11, /* 26 T_SEGNPFLT */ 204 12, /* 27 T_STKFLT */ 205 18, /* 28 T_MCHK */ 206 19, /* 29 T_XMMFLT */ 207 15 /* 30 T_RESERVED */ 208 }; 209 #define bsd_to_linux_trapcode(code) \ 210 ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \ 211 _bsd_to_linux_trapcode[(code)]: \ 212 LINUX_T_UNKNOWN) 213 214 struct linux32_ps_strings { 215 u_int32_t ps_argvstr; /* first of 0 or more argument strings */ 216 u_int ps_nargvstr; /* the number of argument strings */ 217 u_int32_t ps_envstr; /* first of 0 or more environment strings */ 218 u_int ps_nenvstr; /* the number of environment strings */ 219 }; 220 221 /* 222 * If FreeBSD & Linux have a difference of opinion about what a trap 223 * means, deal with it here. 224 * 225 * MPSAFE 226 */ 227 static int 228 translate_traps(int signal, int trap_code) 229 { 230 if (signal != SIGBUS) 231 return signal; 232 switch (trap_code) { 233 case T_PROTFLT: 234 case T_TSSFLT: 235 case T_DOUBLEFLT: 236 case T_PAGEFLT: 237 return SIGSEGV; 238 default: 239 return signal; 240 } 241 } 242 243 static int 244 elf_linux_fixup(register_t **stack_base, struct image_params *imgp) 245 { 246 Elf32_Auxargs *args; 247 Elf32_Addr *base; 248 Elf32_Addr *pos, *uplatform; 249 struct linux32_ps_strings *arginfo; 250 251 arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS; 252 uplatform = (Elf32_Addr *)((caddr_t)arginfo - linux_szsigcode - 253 linux_szplatform); 254 255 KASSERT(curthread->td_proc == imgp->proc, 256 ("unsafe elf_linux_fixup(), should be curproc")); 257 base = (Elf32_Addr *)*stack_base; 258 args = (Elf32_Auxargs *)imgp->auxargs; 259 pos = base + (imgp->args->argc + imgp->args->envc + 2); 260 261 AUXARGS_ENTRY_32(pos, LINUX_AT_HWCAP, cpu_feature); 262 263 /* 264 * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0, 265 * as it has appeared in the 2.4.0-rc7 first time. 266 * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK), 267 * glibc falls back to the hard-coded CLK_TCK value when aux entry 268 * is not present. 269 * Also see linux_times() implementation. 270 */ 271 if (linux_kernver(curthread) >= LINUX_KERNVER_2004000) 272 AUXARGS_ENTRY_32(pos, LINUX_AT_CLKTCK, stclohz); 273 AUXARGS_ENTRY_32(pos, AT_PHDR, args->phdr); 274 AUXARGS_ENTRY_32(pos, AT_PHENT, args->phent); 275 AUXARGS_ENTRY_32(pos, AT_PHNUM, args->phnum); 276 AUXARGS_ENTRY_32(pos, AT_PAGESZ, args->pagesz); 277 AUXARGS_ENTRY_32(pos, AT_FLAGS, args->flags); 278 AUXARGS_ENTRY_32(pos, AT_ENTRY, args->entry); 279 AUXARGS_ENTRY_32(pos, AT_BASE, args->base); 280 AUXARGS_ENTRY_32(pos, LINUX_AT_SECURE, 0); 281 AUXARGS_ENTRY_32(pos, AT_UID, imgp->proc->p_ucred->cr_ruid); 282 AUXARGS_ENTRY_32(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid); 283 AUXARGS_ENTRY_32(pos, AT_GID, imgp->proc->p_ucred->cr_rgid); 284 AUXARGS_ENTRY_32(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid); 285 AUXARGS_ENTRY_32(pos, LINUX_AT_PLATFORM, PTROUT(uplatform)); 286 if (args->execfd != -1) 287 AUXARGS_ENTRY_32(pos, AT_EXECFD, args->execfd); 288 AUXARGS_ENTRY_32(pos, AT_NULL, 0); 289 290 free(imgp->auxargs, M_TEMP); 291 imgp->auxargs = NULL; 292 293 base--; 294 suword32(base, (uint32_t)imgp->args->argc); 295 *stack_base = (register_t *)base; 296 return 0; 297 } 298 299 extern unsigned long linux_sznonrtsigcode; 300 301 static void 302 linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) 303 { 304 struct thread *td = curthread; 305 struct proc *p = td->td_proc; 306 struct sigacts *psp; 307 struct trapframe *regs; 308 struct l_rt_sigframe *fp, frame; 309 int oonstack; 310 int sig; 311 int code; 312 313 sig = ksi->ksi_signo; 314 code = ksi->ksi_code; 315 PROC_LOCK_ASSERT(p, MA_OWNED); 316 psp = p->p_sigacts; 317 mtx_assert(&psp->ps_mtx, MA_OWNED); 318 regs = td->td_frame; 319 oonstack = sigonstack(regs->tf_rsp); 320 321 #ifdef DEBUG 322 if (ldebug(rt_sendsig)) 323 printf(ARGS(rt_sendsig, "%p, %d, %p, %u"), 324 catcher, sig, (void*)mask, code); 325 #endif 326 /* 327 * Allocate space for the signal handler context. 328 */ 329 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && 330 SIGISMEMBER(psp->ps_sigonstack, sig)) { 331 fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp + 332 td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe)); 333 } else 334 fp = (struct l_rt_sigframe *)regs->tf_rsp - 1; 335 mtx_unlock(&psp->ps_mtx); 336 337 /* 338 * Build the argument list for the signal handler. 339 */ 340 if (p->p_sysent->sv_sigtbl) 341 if (sig <= p->p_sysent->sv_sigsize) 342 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 343 344 bzero(&frame, sizeof(frame)); 345 346 frame.sf_handler = PTROUT(catcher); 347 frame.sf_sig = sig; 348 frame.sf_siginfo = PTROUT(&fp->sf_si); 349 frame.sf_ucontext = PTROUT(&fp->sf_sc); 350 351 /* Fill in POSIX parts */ 352 ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig); 353 354 /* 355 * Build the signal context to be used by sigreturn. 356 */ 357 frame.sf_sc.uc_flags = 0; /* XXX ??? */ 358 frame.sf_sc.uc_link = 0; /* XXX ??? */ 359 360 frame.sf_sc.uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp); 361 frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size; 362 frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) 363 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE; 364 PROC_UNLOCK(p); 365 366 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask); 367 368 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0]; 369 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_rdi; 370 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_rsi; 371 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_rbp; 372 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_rbx; 373 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_rdx; 374 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_rcx; 375 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_rax; 376 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_rip; 377 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs; 378 frame.sf_sc.uc_mcontext.sc_gs = regs->tf_gs; 379 frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs; 380 frame.sf_sc.uc_mcontext.sc_es = regs->tf_es; 381 frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds; 382 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_rflags; 383 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_rsp; 384 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss; 385 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err; 386 frame.sf_sc.uc_mcontext.sc_cr2 = (u_int32_t)(uintptr_t)ksi->ksi_addr; 387 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code); 388 389 #ifdef DEBUG 390 if (ldebug(rt_sendsig)) 391 printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"), 392 frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp, 393 td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask); 394 #endif 395 396 if (copyout(&frame, fp, sizeof(frame)) != 0) { 397 /* 398 * Process has trashed its stack; give it an illegal 399 * instruction to halt it in its tracks. 400 */ 401 #ifdef DEBUG 402 if (ldebug(rt_sendsig)) 403 printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"), 404 fp, oonstack); 405 #endif 406 PROC_LOCK(p); 407 sigexit(td, SIGILL); 408 } 409 410 /* 411 * Build context to run handler in. 412 */ 413 regs->tf_rsp = PTROUT(fp); 414 regs->tf_rip = LINUX32_PS_STRINGS - *(p->p_sysent->sv_szsigcode) + 415 linux_sznonrtsigcode; 416 regs->tf_rflags &= ~(PSL_T | PSL_D); 417 regs->tf_cs = _ucode32sel; 418 regs->tf_ss = _udatasel; 419 regs->tf_ds = _udatasel; 420 regs->tf_es = _udatasel; 421 regs->tf_fs = _ufssel; 422 regs->tf_gs = _ugssel; 423 regs->tf_flags = TF_HASSEGS; 424 set_pcb_flags(td->td_pcb, PCB_FULL_IRET); 425 PROC_LOCK(p); 426 mtx_lock(&psp->ps_mtx); 427 } 428 429 430 /* 431 * Send an interrupt to process. 432 * 433 * Stack is set up to allow sigcode stored 434 * in u. to call routine, followed by kcall 435 * to sigreturn routine below. After sigreturn 436 * resets the signal mask, the stack, and the 437 * frame pointer, it returns to the user 438 * specified pc, psl. 439 */ 440 static void 441 linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) 442 { 443 struct thread *td = curthread; 444 struct proc *p = td->td_proc; 445 struct sigacts *psp; 446 struct trapframe *regs; 447 struct l_sigframe *fp, frame; 448 l_sigset_t lmask; 449 int oonstack, i; 450 int sig, code; 451 452 sig = ksi->ksi_signo; 453 code = ksi->ksi_code; 454 PROC_LOCK_ASSERT(p, MA_OWNED); 455 psp = p->p_sigacts; 456 mtx_assert(&psp->ps_mtx, MA_OWNED); 457 if (SIGISMEMBER(psp->ps_siginfo, sig)) { 458 /* Signal handler installed with SA_SIGINFO. */ 459 linux_rt_sendsig(catcher, ksi, mask); 460 return; 461 } 462 463 regs = td->td_frame; 464 oonstack = sigonstack(regs->tf_rsp); 465 466 #ifdef DEBUG 467 if (ldebug(sendsig)) 468 printf(ARGS(sendsig, "%p, %d, %p, %u"), 469 catcher, sig, (void*)mask, code); 470 #endif 471 472 /* 473 * Allocate space for the signal handler context. 474 */ 475 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && 476 SIGISMEMBER(psp->ps_sigonstack, sig)) { 477 fp = (struct l_sigframe *)(td->td_sigstk.ss_sp + 478 td->td_sigstk.ss_size - sizeof(struct l_sigframe)); 479 } else 480 fp = (struct l_sigframe *)regs->tf_rsp - 1; 481 mtx_unlock(&psp->ps_mtx); 482 PROC_UNLOCK(p); 483 484 /* 485 * Build the argument list for the signal handler. 486 */ 487 if (p->p_sysent->sv_sigtbl) 488 if (sig <= p->p_sysent->sv_sigsize) 489 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 490 491 bzero(&frame, sizeof(frame)); 492 493 frame.sf_handler = PTROUT(catcher); 494 frame.sf_sig = sig; 495 496 bsd_to_linux_sigset(mask, &lmask); 497 498 /* 499 * Build the signal context to be used by sigreturn. 500 */ 501 frame.sf_sc.sc_mask = lmask.__bits[0]; 502 frame.sf_sc.sc_gs = regs->tf_gs; 503 frame.sf_sc.sc_fs = regs->tf_fs; 504 frame.sf_sc.sc_es = regs->tf_es; 505 frame.sf_sc.sc_ds = regs->tf_ds; 506 frame.sf_sc.sc_edi = regs->tf_rdi; 507 frame.sf_sc.sc_esi = regs->tf_rsi; 508 frame.sf_sc.sc_ebp = regs->tf_rbp; 509 frame.sf_sc.sc_ebx = regs->tf_rbx; 510 frame.sf_sc.sc_edx = regs->tf_rdx; 511 frame.sf_sc.sc_ecx = regs->tf_rcx; 512 frame.sf_sc.sc_eax = regs->tf_rax; 513 frame.sf_sc.sc_eip = regs->tf_rip; 514 frame.sf_sc.sc_cs = regs->tf_cs; 515 frame.sf_sc.sc_eflags = regs->tf_rflags; 516 frame.sf_sc.sc_esp_at_signal = regs->tf_rsp; 517 frame.sf_sc.sc_ss = regs->tf_ss; 518 frame.sf_sc.sc_err = regs->tf_err; 519 frame.sf_sc.sc_cr2 = (u_int32_t)(uintptr_t)ksi->ksi_addr; 520 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code); 521 522 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++) 523 frame.sf_extramask[i] = lmask.__bits[i+1]; 524 525 if (copyout(&frame, fp, sizeof(frame)) != 0) { 526 /* 527 * Process has trashed its stack; give it an illegal 528 * instruction to halt it in its tracks. 529 */ 530 PROC_LOCK(p); 531 sigexit(td, SIGILL); 532 } 533 534 /* 535 * Build context to run handler in. 536 */ 537 regs->tf_rsp = PTROUT(fp); 538 regs->tf_rip = LINUX32_PS_STRINGS - *(p->p_sysent->sv_szsigcode); 539 regs->tf_rflags &= ~(PSL_T | PSL_D); 540 regs->tf_cs = _ucode32sel; 541 regs->tf_ss = _udatasel; 542 regs->tf_ds = _udatasel; 543 regs->tf_es = _udatasel; 544 regs->tf_fs = _ufssel; 545 regs->tf_gs = _ugssel; 546 regs->tf_flags = TF_HASSEGS; 547 set_pcb_flags(td->td_pcb, PCB_FULL_IRET); 548 PROC_LOCK(p); 549 mtx_lock(&psp->ps_mtx); 550 } 551 552 /* 553 * System call to cleanup state after a signal 554 * has been taken. Reset signal mask and 555 * stack state from context left by sendsig (above). 556 * Return to previous pc and psl as specified by 557 * context left by sendsig. Check carefully to 558 * make sure that the user has not modified the 559 * psl to gain improper privileges or to cause 560 * a machine fault. 561 */ 562 int 563 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args) 564 { 565 struct l_sigframe frame; 566 struct trapframe *regs; 567 sigset_t bmask; 568 l_sigset_t lmask; 569 int eflags, i; 570 ksiginfo_t ksi; 571 572 regs = td->td_frame; 573 574 #ifdef DEBUG 575 if (ldebug(sigreturn)) 576 printf(ARGS(sigreturn, "%p"), (void *)args->sfp); 577 #endif 578 /* 579 * The trampoline code hands us the sigframe. 580 * It is unsafe to keep track of it ourselves, in the event that a 581 * program jumps out of a signal handler. 582 */ 583 if (copyin(args->sfp, &frame, sizeof(frame)) != 0) 584 return (EFAULT); 585 586 /* 587 * Check for security violations. 588 */ 589 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 590 eflags = frame.sf_sc.sc_eflags; 591 /* 592 * XXX do allow users to change the privileged flag PSL_RF. The 593 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should 594 * sometimes set it there too. tf_eflags is kept in the signal 595 * context during signal handling and there is no other place 596 * to remember it, so the PSL_RF bit may be corrupted by the 597 * signal handler without us knowing. Corruption of the PSL_RF 598 * bit at worst causes one more or one less debugger trap, so 599 * allowing it is fairly harmless. 600 */ 601 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF)) 602 return(EINVAL); 603 604 /* 605 * Don't allow users to load a valid privileged %cs. Let the 606 * hardware check for invalid selectors, excess privilege in 607 * other selectors, invalid %eip's and invalid %esp's. 608 */ 609 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 610 if (!CS_SECURE(frame.sf_sc.sc_cs)) { 611 ksiginfo_init_trap(&ksi); 612 ksi.ksi_signo = SIGBUS; 613 ksi.ksi_code = BUS_OBJERR; 614 ksi.ksi_trapno = T_PROTFLT; 615 ksi.ksi_addr = (void *)regs->tf_rip; 616 trapsignal(td, &ksi); 617 return(EINVAL); 618 } 619 620 lmask.__bits[0] = frame.sf_sc.sc_mask; 621 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++) 622 lmask.__bits[i+1] = frame.sf_extramask[i]; 623 linux_to_bsd_sigset(&lmask, &bmask); 624 kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0); 625 626 /* 627 * Restore signal context. 628 */ 629 regs->tf_rdi = frame.sf_sc.sc_edi; 630 regs->tf_rsi = frame.sf_sc.sc_esi; 631 regs->tf_rbp = frame.sf_sc.sc_ebp; 632 regs->tf_rbx = frame.sf_sc.sc_ebx; 633 regs->tf_rdx = frame.sf_sc.sc_edx; 634 regs->tf_rcx = frame.sf_sc.sc_ecx; 635 regs->tf_rax = frame.sf_sc.sc_eax; 636 regs->tf_rip = frame.sf_sc.sc_eip; 637 regs->tf_cs = frame.sf_sc.sc_cs; 638 regs->tf_ds = frame.sf_sc.sc_ds; 639 regs->tf_es = frame.sf_sc.sc_es; 640 regs->tf_fs = frame.sf_sc.sc_fs; 641 regs->tf_gs = frame.sf_sc.sc_gs; 642 regs->tf_rflags = eflags; 643 regs->tf_rsp = frame.sf_sc.sc_esp_at_signal; 644 regs->tf_ss = frame.sf_sc.sc_ss; 645 set_pcb_flags(td->td_pcb, PCB_FULL_IRET); 646 647 return (EJUSTRETURN); 648 } 649 650 /* 651 * System call to cleanup state after a signal 652 * has been taken. Reset signal mask and 653 * stack state from context left by rt_sendsig (above). 654 * Return to previous pc and psl as specified by 655 * context left by sendsig. Check carefully to 656 * make sure that the user has not modified the 657 * psl to gain improper privileges or to cause 658 * a machine fault. 659 */ 660 int 661 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args) 662 { 663 struct l_ucontext uc; 664 struct l_sigcontext *context; 665 sigset_t bmask; 666 l_stack_t *lss; 667 stack_t ss; 668 struct trapframe *regs; 669 int eflags; 670 ksiginfo_t ksi; 671 672 regs = td->td_frame; 673 674 #ifdef DEBUG 675 if (ldebug(rt_sigreturn)) 676 printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp); 677 #endif 678 /* 679 * The trampoline code hands us the ucontext. 680 * It is unsafe to keep track of it ourselves, in the event that a 681 * program jumps out of a signal handler. 682 */ 683 if (copyin(args->ucp, &uc, sizeof(uc)) != 0) 684 return (EFAULT); 685 686 context = &uc.uc_mcontext; 687 688 /* 689 * Check for security violations. 690 */ 691 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 692 eflags = context->sc_eflags; 693 /* 694 * XXX do allow users to change the privileged flag PSL_RF. The 695 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should 696 * sometimes set it there too. tf_eflags is kept in the signal 697 * context during signal handling and there is no other place 698 * to remember it, so the PSL_RF bit may be corrupted by the 699 * signal handler without us knowing. Corruption of the PSL_RF 700 * bit at worst causes one more or one less debugger trap, so 701 * allowing it is fairly harmless. 702 */ 703 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF)) 704 return(EINVAL); 705 706 /* 707 * Don't allow users to load a valid privileged %cs. Let the 708 * hardware check for invalid selectors, excess privilege in 709 * other selectors, invalid %eip's and invalid %esp's. 710 */ 711 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 712 if (!CS_SECURE(context->sc_cs)) { 713 ksiginfo_init_trap(&ksi); 714 ksi.ksi_signo = SIGBUS; 715 ksi.ksi_code = BUS_OBJERR; 716 ksi.ksi_trapno = T_PROTFLT; 717 ksi.ksi_addr = (void *)regs->tf_rip; 718 trapsignal(td, &ksi); 719 return(EINVAL); 720 } 721 722 linux_to_bsd_sigset(&uc.uc_sigmask, &bmask); 723 kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0); 724 725 /* 726 * Restore signal context 727 */ 728 regs->tf_gs = context->sc_gs; 729 regs->tf_fs = context->sc_fs; 730 regs->tf_es = context->sc_es; 731 regs->tf_ds = context->sc_ds; 732 regs->tf_rdi = context->sc_edi; 733 regs->tf_rsi = context->sc_esi; 734 regs->tf_rbp = context->sc_ebp; 735 regs->tf_rbx = context->sc_ebx; 736 regs->tf_rdx = context->sc_edx; 737 regs->tf_rcx = context->sc_ecx; 738 regs->tf_rax = context->sc_eax; 739 regs->tf_rip = context->sc_eip; 740 regs->tf_cs = context->sc_cs; 741 regs->tf_rflags = eflags; 742 regs->tf_rsp = context->sc_esp_at_signal; 743 regs->tf_ss = context->sc_ss; 744 set_pcb_flags(td->td_pcb, PCB_FULL_IRET); 745 746 /* 747 * call sigaltstack & ignore results.. 748 */ 749 lss = &uc.uc_stack; 750 ss.ss_sp = PTRIN(lss->ss_sp); 751 ss.ss_size = lss->ss_size; 752 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags); 753 754 #ifdef DEBUG 755 if (ldebug(rt_sigreturn)) 756 printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"), 757 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask); 758 #endif 759 (void)kern_sigaltstack(td, &ss, NULL); 760 761 return (EJUSTRETURN); 762 } 763 764 static int 765 linux32_fetch_syscall_args(struct thread *td, struct syscall_args *sa) 766 { 767 struct proc *p; 768 struct trapframe *frame; 769 770 p = td->td_proc; 771 frame = td->td_frame; 772 773 sa->args[0] = frame->tf_rbx; 774 sa->args[1] = frame->tf_rcx; 775 sa->args[2] = frame->tf_rdx; 776 sa->args[3] = frame->tf_rsi; 777 sa->args[4] = frame->tf_rdi; 778 sa->args[5] = frame->tf_rbp; /* Unconfirmed */ 779 sa->code = frame->tf_rax; 780 781 if (sa->code >= p->p_sysent->sv_size) 782 sa->callp = &p->p_sysent->sv_table[0]; 783 else 784 sa->callp = &p->p_sysent->sv_table[sa->code]; 785 sa->narg = sa->callp->sy_narg; 786 787 td->td_retval[0] = 0; 788 td->td_retval[1] = frame->tf_rdx; 789 790 return (0); 791 } 792 793 /* 794 * If a linux binary is exec'ing something, try this image activator 795 * first. We override standard shell script execution in order to 796 * be able to modify the interpreter path. We only do this if a linux 797 * binary is doing the exec, so we do not create an EXEC module for it. 798 */ 799 static int exec_linux_imgact_try(struct image_params *iparams); 800 801 static int 802 exec_linux_imgact_try(struct image_params *imgp) 803 { 804 const char *head = (const char *)imgp->image_header; 805 char *rpath; 806 int error = -1; 807 808 /* 809 * The interpreter for shell scripts run from a linux binary needs 810 * to be located in /compat/linux if possible in order to recursively 811 * maintain linux path emulation. 812 */ 813 if (((const short *)head)[0] == SHELLMAGIC) { 814 /* 815 * Run our normal shell image activator. If it succeeds attempt 816 * to use the alternate path for the interpreter. If an 817 * alternate * path is found, use our stringspace to store it. 818 */ 819 if ((error = exec_shell_imgact(imgp)) == 0) { 820 linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc), 821 imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0, 822 AT_FDCWD); 823 if (rpath != NULL) 824 imgp->args->fname_buf = 825 imgp->interpreter_name = rpath; 826 } 827 } 828 return (error); 829 } 830 831 /* 832 * Clear registers on exec 833 * XXX copied from ia32_signal.c. 834 */ 835 static void 836 exec_linux_setregs(struct thread *td, struct image_params *imgp, u_long stack) 837 { 838 struct trapframe *regs = td->td_frame; 839 struct pcb *pcb = td->td_pcb; 840 841 mtx_lock(&dt_lock); 842 if (td->td_proc->p_md.md_ldt != NULL) 843 user_ldt_free(td); 844 else 845 mtx_unlock(&dt_lock); 846 847 critical_enter(); 848 wrmsr(MSR_FSBASE, 0); 849 wrmsr(MSR_KGSBASE, 0); /* User value while we're in the kernel */ 850 pcb->pcb_fsbase = 0; 851 pcb->pcb_gsbase = 0; 852 critical_exit(); 853 pcb->pcb_initial_fpucw = __LINUX_NPXCW__; 854 855 bzero((char *)regs, sizeof(struct trapframe)); 856 regs->tf_rip = imgp->entry_addr; 857 regs->tf_rsp = stack; 858 regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T); 859 regs->tf_gs = _ugssel; 860 regs->tf_fs = _ufssel; 861 regs->tf_es = _udatasel; 862 regs->tf_ds = _udatasel; 863 regs->tf_ss = _udatasel; 864 regs->tf_flags = TF_HASSEGS; 865 regs->tf_cs = _ucode32sel; 866 regs->tf_rbx = imgp->ps_strings; 867 868 fpstate_drop(td); 869 870 /* Do full restore on return so that we can change to a different %cs */ 871 set_pcb_flags(pcb, PCB_32BIT | PCB_FULL_IRET); 872 clear_pcb_flags(pcb, PCB_GS32BIT); 873 td->td_retval[1] = 0; 874 } 875 876 /* 877 * XXX copied from ia32_sysvec.c. 878 */ 879 static register_t * 880 linux_copyout_strings(struct image_params *imgp) 881 { 882 int argc, envc; 883 u_int32_t *vectp; 884 char *stringp, *destp; 885 u_int32_t *stack_base; 886 struct linux32_ps_strings *arginfo; 887 888 /* 889 * Calculate string base and vector table pointers. 890 * Also deal with signal trampoline code for this exec type. 891 */ 892 arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS; 893 destp = (caddr_t)arginfo - linux_szsigcode - SPARE_USRSPACE - 894 linux_szplatform - roundup((ARG_MAX - imgp->args->stringspace), 895 sizeof(char *)); 896 897 /* 898 * install sigcode 899 */ 900 copyout(imgp->proc->p_sysent->sv_sigcode, 901 ((caddr_t)arginfo - linux_szsigcode), linux_szsigcode); 902 903 /* 904 * Install LINUX_PLATFORM 905 */ 906 copyout(linux_platform, ((caddr_t)arginfo - linux_szsigcode - 907 linux_szplatform), linux_szplatform); 908 909 /* 910 * If we have a valid auxargs ptr, prepare some room 911 * on the stack. 912 */ 913 if (imgp->auxargs) { 914 /* 915 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for 916 * lower compatibility. 917 */ 918 imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size : 919 (LINUX_AT_COUNT * 2); 920 /* 921 * The '+ 2' is for the null pointers at the end of each of 922 * the arg and env vector sets,and imgp->auxarg_size is room 923 * for argument of Runtime loader. 924 */ 925 vectp = (u_int32_t *) (destp - (imgp->args->argc + 926 imgp->args->envc + 2 + imgp->auxarg_size) * 927 sizeof(u_int32_t)); 928 929 } else 930 /* 931 * The '+ 2' is for the null pointers at the end of each of 932 * the arg and env vector sets 933 */ 934 vectp = (u_int32_t *)(destp - (imgp->args->argc + 935 imgp->args->envc + 2) * sizeof(u_int32_t)); 936 937 /* 938 * vectp also becomes our initial stack base 939 */ 940 stack_base = vectp; 941 942 stringp = imgp->args->begin_argv; 943 argc = imgp->args->argc; 944 envc = imgp->args->envc; 945 /* 946 * Copy out strings - arguments and environment. 947 */ 948 copyout(stringp, destp, ARG_MAX - imgp->args->stringspace); 949 950 /* 951 * Fill in "ps_strings" struct for ps, w, etc. 952 */ 953 suword32(&arginfo->ps_argvstr, (uint32_t)(intptr_t)vectp); 954 suword32(&arginfo->ps_nargvstr, argc); 955 956 /* 957 * Fill in argument portion of vector table. 958 */ 959 for (; argc > 0; --argc) { 960 suword32(vectp++, (uint32_t)(intptr_t)destp); 961 while (*stringp++ != 0) 962 destp++; 963 destp++; 964 } 965 966 /* a null vector table pointer separates the argp's from the envp's */ 967 suword32(vectp++, 0); 968 969 suword32(&arginfo->ps_envstr, (uint32_t)(intptr_t)vectp); 970 suword32(&arginfo->ps_nenvstr, envc); 971 972 /* 973 * Fill in environment portion of vector table. 974 */ 975 for (; envc > 0; --envc) { 976 suword32(vectp++, (uint32_t)(intptr_t)destp); 977 while (*stringp++ != 0) 978 destp++; 979 destp++; 980 } 981 982 /* end of vector table is a null pointer */ 983 suword32(vectp, 0); 984 985 return ((register_t *)stack_base); 986 } 987 988 SYSCTL_NODE(_compat, OID_AUTO, linux32, CTLFLAG_RW, 0, 989 "32-bit Linux emulation"); 990 991 static u_long linux32_maxdsiz = LINUX32_MAXDSIZ; 992 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxdsiz, CTLFLAG_RW, 993 &linux32_maxdsiz, 0, ""); 994 static u_long linux32_maxssiz = LINUX32_MAXSSIZ; 995 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxssiz, CTLFLAG_RW, 996 &linux32_maxssiz, 0, ""); 997 static u_long linux32_maxvmem = LINUX32_MAXVMEM; 998 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxvmem, CTLFLAG_RW, 999 &linux32_maxvmem, 0, ""); 1000 1001 static void 1002 linux32_fixlimit(struct rlimit *rl, int which) 1003 { 1004 1005 switch (which) { 1006 case RLIMIT_DATA: 1007 if (linux32_maxdsiz != 0) { 1008 if (rl->rlim_cur > linux32_maxdsiz) 1009 rl->rlim_cur = linux32_maxdsiz; 1010 if (rl->rlim_max > linux32_maxdsiz) 1011 rl->rlim_max = linux32_maxdsiz; 1012 } 1013 break; 1014 case RLIMIT_STACK: 1015 if (linux32_maxssiz != 0) { 1016 if (rl->rlim_cur > linux32_maxssiz) 1017 rl->rlim_cur = linux32_maxssiz; 1018 if (rl->rlim_max > linux32_maxssiz) 1019 rl->rlim_max = linux32_maxssiz; 1020 } 1021 break; 1022 case RLIMIT_VMEM: 1023 if (linux32_maxvmem != 0) { 1024 if (rl->rlim_cur > linux32_maxvmem) 1025 rl->rlim_cur = linux32_maxvmem; 1026 if (rl->rlim_max > linux32_maxvmem) 1027 rl->rlim_max = linux32_maxvmem; 1028 } 1029 break; 1030 } 1031 } 1032 1033 struct sysentvec elf_linux_sysvec = { 1034 .sv_size = LINUX_SYS_MAXSYSCALL, 1035 .sv_table = linux_sysent, 1036 .sv_mask = 0, 1037 .sv_sigsize = LINUX_SIGTBLSZ, 1038 .sv_sigtbl = bsd_to_linux_signal, 1039 .sv_errsize = ELAST + 1, 1040 .sv_errtbl = bsd_to_linux_errno, 1041 .sv_transtrap = translate_traps, 1042 .sv_fixup = elf_linux_fixup, 1043 .sv_sendsig = linux_sendsig, 1044 .sv_sigcode = linux_sigcode, 1045 .sv_szsigcode = &linux_szsigcode, 1046 .sv_prepsyscall = NULL, 1047 .sv_name = "Linux ELF32", 1048 .sv_coredump = elf32_coredump, 1049 .sv_imgact_try = exec_linux_imgact_try, 1050 .sv_minsigstksz = LINUX_MINSIGSTKSZ, 1051 .sv_pagesize = PAGE_SIZE, 1052 .sv_minuser = VM_MIN_ADDRESS, 1053 .sv_maxuser = LINUX32_USRSTACK, 1054 .sv_usrstack = LINUX32_USRSTACK, 1055 .sv_psstrings = LINUX32_PS_STRINGS, 1056 .sv_stackprot = VM_PROT_ALL, 1057 .sv_copyout_strings = linux_copyout_strings, 1058 .sv_setregs = exec_linux_setregs, 1059 .sv_fixlimit = linux32_fixlimit, 1060 .sv_maxssiz = &linux32_maxssiz, 1061 .sv_flags = SV_ABI_LINUX | SV_ILP32 | SV_IA32, 1062 .sv_set_syscall_retval = cpu_set_syscall_retval, 1063 .sv_fetch_syscall_args = linux32_fetch_syscall_args, 1064 .sv_syscallnames = NULL, 1065 .sv_schedtail = linux_schedtail, 1066 }; 1067 1068 static char GNU_ABI_VENDOR[] = "GNU"; 1069 static int GNULINUX_ABI_DESC = 0; 1070 1071 static boolean_t 1072 linux32_trans_osrel(const Elf_Note *note, int32_t *osrel) 1073 { 1074 const Elf32_Word *desc; 1075 uintptr_t p; 1076 1077 p = (uintptr_t)(note + 1); 1078 p += roundup2(note->n_namesz, sizeof(Elf32_Addr)); 1079 1080 desc = (const Elf32_Word *)p; 1081 if (desc[0] != GNULINUX_ABI_DESC) 1082 return (FALSE); 1083 1084 /* 1085 * For linux we encode osrel as follows (see linux_mib.c): 1086 * VVVMMMIII (version, major, minor), see linux_mib.c. 1087 */ 1088 *osrel = desc[1] * 1000000 + desc[2] * 1000 + desc[3]; 1089 1090 return (TRUE); 1091 } 1092 1093 static Elf_Brandnote linux32_brandnote = { 1094 .hdr.n_namesz = sizeof(GNU_ABI_VENDOR), 1095 .hdr.n_descsz = 16, /* XXX at least 16 */ 1096 .hdr.n_type = 1, 1097 .vendor = GNU_ABI_VENDOR, 1098 .flags = BN_TRANSLATE_OSREL, 1099 .trans_osrel = linux32_trans_osrel 1100 }; 1101 1102 static Elf32_Brandinfo linux_brand = { 1103 .brand = ELFOSABI_LINUX, 1104 .machine = EM_386, 1105 .compat_3_brand = "Linux", 1106 .emul_path = "/compat/linux", 1107 .interp_path = "/lib/ld-linux.so.1", 1108 .sysvec = &elf_linux_sysvec, 1109 .interp_newpath = NULL, 1110 .brand_note = &linux32_brandnote, 1111 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE 1112 }; 1113 1114 static Elf32_Brandinfo linux_glibc2brand = { 1115 .brand = ELFOSABI_LINUX, 1116 .machine = EM_386, 1117 .compat_3_brand = "Linux", 1118 .emul_path = "/compat/linux", 1119 .interp_path = "/lib/ld-linux.so.2", 1120 .sysvec = &elf_linux_sysvec, 1121 .interp_newpath = NULL, 1122 .brand_note = &linux32_brandnote, 1123 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE 1124 }; 1125 1126 Elf32_Brandinfo *linux_brandlist[] = { 1127 &linux_brand, 1128 &linux_glibc2brand, 1129 NULL 1130 }; 1131 1132 static int 1133 linux_elf_modevent(module_t mod, int type, void *data) 1134 { 1135 Elf32_Brandinfo **brandinfo; 1136 int error; 1137 struct linux_ioctl_handler **lihp; 1138 struct linux_device_handler **ldhp; 1139 1140 error = 0; 1141 1142 switch(type) { 1143 case MOD_LOAD: 1144 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 1145 ++brandinfo) 1146 if (elf32_insert_brand_entry(*brandinfo) < 0) 1147 error = EINVAL; 1148 if (error == 0) { 1149 SET_FOREACH(lihp, linux_ioctl_handler_set) 1150 linux_ioctl_register_handler(*lihp); 1151 SET_FOREACH(ldhp, linux_device_handler_set) 1152 linux_device_register_handler(*ldhp); 1153 mtx_init(&emul_lock, "emuldata lock", NULL, MTX_DEF); 1154 sx_init(&emul_shared_lock, "emuldata->shared lock"); 1155 LIST_INIT(&futex_list); 1156 mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF); 1157 linux_exit_tag = EVENTHANDLER_REGISTER(process_exit, 1158 linux_proc_exit, NULL, 1000); 1159 linux_exec_tag = EVENTHANDLER_REGISTER(process_exec, 1160 linux_proc_exec, NULL, 1000); 1161 linux_szplatform = roundup(strlen(linux_platform) + 1, 1162 sizeof(char *)); 1163 linux_osd_jail_register(); 1164 stclohz = (stathz ? stathz : hz); 1165 if (bootverbose) 1166 printf("Linux ELF exec handler installed\n"); 1167 } else 1168 printf("cannot insert Linux ELF brand handler\n"); 1169 break; 1170 case MOD_UNLOAD: 1171 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 1172 ++brandinfo) 1173 if (elf32_brand_inuse(*brandinfo)) 1174 error = EBUSY; 1175 if (error == 0) { 1176 for (brandinfo = &linux_brandlist[0]; 1177 *brandinfo != NULL; ++brandinfo) 1178 if (elf32_remove_brand_entry(*brandinfo) < 0) 1179 error = EINVAL; 1180 } 1181 if (error == 0) { 1182 SET_FOREACH(lihp, linux_ioctl_handler_set) 1183 linux_ioctl_unregister_handler(*lihp); 1184 SET_FOREACH(ldhp, linux_device_handler_set) 1185 linux_device_unregister_handler(*ldhp); 1186 mtx_destroy(&emul_lock); 1187 sx_destroy(&emul_shared_lock); 1188 mtx_destroy(&futex_mtx); 1189 EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag); 1190 EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag); 1191 linux_osd_jail_deregister(); 1192 if (bootverbose) 1193 printf("Linux ELF exec handler removed\n"); 1194 } else 1195 printf("Could not deinstall ELF interpreter entry\n"); 1196 break; 1197 default: 1198 return EOPNOTSUPP; 1199 } 1200 return error; 1201 } 1202 1203 static moduledata_t linux_elf_mod = { 1204 "linuxelf", 1205 linux_elf_modevent, 1206 0 1207 }; 1208 1209 DECLARE_MODULE_TIED(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY); 1210