1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 1994-1996 Søren Schmidt 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/exec.h> 35 #include <sys/fcntl.h> 36 #include <sys/imgact.h> 37 #include <sys/imgact_aout.h> 38 #include <sys/imgact_elf.h> 39 #include <sys/kernel.h> 40 #include <sys/lock.h> 41 #include <sys/malloc.h> 42 #include <sys/module.h> 43 #include <sys/mutex.h> 44 #include <sys/proc.h> 45 #include <sys/signalvar.h> 46 #include <sys/syscallsubr.h> 47 #include <sys/sysctl.h> 48 #include <sys/sysent.h> 49 #include <sys/sysproto.h> 50 #include <sys/vnode.h> 51 #include <sys/eventhandler.h> 52 53 #include <vm/vm.h> 54 #include <vm/pmap.h> 55 #include <vm/vm_extern.h> 56 #include <vm/vm_map.h> 57 #include <vm/vm_object.h> 58 #include <vm/vm_page.h> 59 #include <vm/vm_param.h> 60 61 #include <machine/cpu.h> 62 #include <machine/cputypes.h> 63 #include <machine/md_var.h> 64 #include <machine/pcb.h> 65 #include <machine/trap.h> 66 67 #include <i386/linux/linux.h> 68 #include <i386/linux/linux_proto.h> 69 #include <compat/linux/linux_emul.h> 70 #include <compat/linux/linux_futex.h> 71 #include <compat/linux/linux_ioctl.h> 72 #include <compat/linux/linux_mib.h> 73 #include <compat/linux/linux_misc.h> 74 #include <compat/linux/linux_signal.h> 75 #include <compat/linux/linux_util.h> 76 #include <compat/linux/linux_vdso.h> 77 78 MODULE_VERSION(linux, 1); 79 80 #if defined(DEBUG) 81 SYSCTL_PROC(_compat_linux, OID_AUTO, debug, CTLTYPE_STRING | CTLFLAG_RW, 0, 0, 82 linux_sysctl_debug, "A", "Linux debugging control"); 83 #endif 84 85 /* 86 * Allow the sendsig functions to use the ldebug() facility even though they 87 * are not syscalls themselves. Map them to syscall 0. This is slightly less 88 * bogus than using ldebug(sigreturn). 89 */ 90 #define LINUX_SYS_linux_rt_sendsig 0 91 #define LINUX_SYS_linux_sendsig 0 92 93 #define LINUX_PS_STRINGS (LINUX_USRSTACK - sizeof(struct ps_strings)) 94 95 static int linux_szsigcode; 96 static vm_object_t linux_shared_page_obj; 97 static char *linux_shared_page_mapping; 98 extern char _binary_linux_locore_o_start; 99 extern char _binary_linux_locore_o_end; 100 101 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL]; 102 103 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler); 104 105 static int linux_fixup(register_t **stack_base, 106 struct image_params *iparams); 107 static int linux_fixup_elf(register_t **stack_base, 108 struct image_params *iparams); 109 static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask); 110 static void linux_exec_setregs(struct thread *td, 111 struct image_params *imgp, u_long stack); 112 static register_t *linux_copyout_strings(struct image_params *imgp); 113 static bool linux_trans_osrel(const Elf_Note *note, int32_t *osrel); 114 static void linux_vdso_install(void *param); 115 static void linux_vdso_deinstall(void *param); 116 117 static int linux_szplatform; 118 const char *linux_kplatform; 119 120 static eventhandler_tag linux_exit_tag; 121 static eventhandler_tag linux_exec_tag; 122 static eventhandler_tag linux_thread_dtor_tag; 123 124 #define LINUX_T_UNKNOWN 255 125 static int _bsd_to_linux_trapcode[] = { 126 LINUX_T_UNKNOWN, /* 0 */ 127 6, /* 1 T_PRIVINFLT */ 128 LINUX_T_UNKNOWN, /* 2 */ 129 3, /* 3 T_BPTFLT */ 130 LINUX_T_UNKNOWN, /* 4 */ 131 LINUX_T_UNKNOWN, /* 5 */ 132 16, /* 6 T_ARITHTRAP */ 133 254, /* 7 T_ASTFLT */ 134 LINUX_T_UNKNOWN, /* 8 */ 135 13, /* 9 T_PROTFLT */ 136 1, /* 10 T_TRCTRAP */ 137 LINUX_T_UNKNOWN, /* 11 */ 138 14, /* 12 T_PAGEFLT */ 139 LINUX_T_UNKNOWN, /* 13 */ 140 17, /* 14 T_ALIGNFLT */ 141 LINUX_T_UNKNOWN, /* 15 */ 142 LINUX_T_UNKNOWN, /* 16 */ 143 LINUX_T_UNKNOWN, /* 17 */ 144 0, /* 18 T_DIVIDE */ 145 2, /* 19 T_NMI */ 146 4, /* 20 T_OFLOW */ 147 5, /* 21 T_BOUND */ 148 7, /* 22 T_DNA */ 149 8, /* 23 T_DOUBLEFLT */ 150 9, /* 24 T_FPOPFLT */ 151 10, /* 25 T_TSSFLT */ 152 11, /* 26 T_SEGNPFLT */ 153 12, /* 27 T_STKFLT */ 154 18, /* 28 T_MCHK */ 155 19, /* 29 T_XMMFLT */ 156 15 /* 30 T_RESERVED */ 157 }; 158 #define bsd_to_linux_trapcode(code) \ 159 ((code)<nitems(_bsd_to_linux_trapcode)? \ 160 _bsd_to_linux_trapcode[(code)]: \ 161 LINUX_T_UNKNOWN) 162 163 LINUX_VDSO_SYM_INTPTR(linux_sigcode); 164 LINUX_VDSO_SYM_INTPTR(linux_rt_sigcode); 165 LINUX_VDSO_SYM_INTPTR(linux_vsyscall); 166 167 /* 168 * If FreeBSD & Linux have a difference of opinion about what a trap 169 * means, deal with it here. 170 * 171 * MPSAFE 172 */ 173 static int 174 linux_translate_traps(int signal, int trap_code) 175 { 176 if (signal != SIGBUS) 177 return (signal); 178 switch (trap_code) { 179 case T_PROTFLT: 180 case T_TSSFLT: 181 case T_DOUBLEFLT: 182 case T_PAGEFLT: 183 return (SIGSEGV); 184 default: 185 return (signal); 186 } 187 } 188 189 static int 190 linux_fixup(register_t **stack_base, struct image_params *imgp) 191 { 192 register_t *argv, *envp; 193 194 argv = *stack_base; 195 envp = *stack_base + (imgp->args->argc + 1); 196 (*stack_base)--; 197 suword(*stack_base, (intptr_t)(void *)envp); 198 (*stack_base)--; 199 suword(*stack_base, (intptr_t)(void *)argv); 200 (*stack_base)--; 201 suword(*stack_base, imgp->args->argc); 202 return (0); 203 } 204 205 static int 206 linux_fixup_elf(register_t **stack_base, struct image_params *imgp) 207 { 208 struct proc *p; 209 Elf32_Auxargs *args; 210 Elf32_Auxinfo *argarray, *pos; 211 Elf32_Addr *auxbase, *uplatform; 212 struct ps_strings *arginfo; 213 int error, issetugid; 214 215 KASSERT(curthread->td_proc == imgp->proc, 216 ("unsafe linux_fixup_elf(), should be curproc")); 217 218 p = imgp->proc; 219 issetugid = imgp->proc->p_flag & P_SUGID ? 1 : 0; 220 arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings; 221 uplatform = (Elf32_Addr *)((caddr_t)arginfo - linux_szplatform); 222 args = (Elf32_Auxargs *)imgp->auxargs; 223 auxbase = *stack_base + imgp->args->argc + 1 + imgp->args->envc + 1; 224 argarray = pos = malloc(LINUX_AT_COUNT * sizeof(*pos), M_TEMP, 225 M_WAITOK | M_ZERO); 226 227 AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO_EHDR, 228 imgp->proc->p_sysent->sv_shared_page_base); 229 AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO, linux_vsyscall); 230 AUXARGS_ENTRY(pos, LINUX_AT_HWCAP, cpu_feature); 231 232 /* 233 * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0, 234 * as it has appeared in the 2.4.0-rc7 first time. 235 * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK), 236 * glibc falls back to the hard-coded CLK_TCK value when aux entry 237 * is not present. 238 * Also see linux_times() implementation. 239 */ 240 if (linux_kernver(curthread) >= LINUX_KERNVER_2004000) 241 AUXARGS_ENTRY(pos, LINUX_AT_CLKTCK, stclohz); 242 AUXARGS_ENTRY(pos, AT_PHDR, args->phdr); 243 AUXARGS_ENTRY(pos, AT_PHENT, args->phent); 244 AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum); 245 AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz); 246 AUXARGS_ENTRY(pos, AT_FLAGS, args->flags); 247 AUXARGS_ENTRY(pos, AT_ENTRY, args->entry); 248 AUXARGS_ENTRY(pos, AT_BASE, args->base); 249 AUXARGS_ENTRY(pos, LINUX_AT_SECURE, issetugid); 250 AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid); 251 AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid); 252 AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid); 253 AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid); 254 AUXARGS_ENTRY(pos, LINUX_AT_PLATFORM, PTROUT(uplatform)); 255 AUXARGS_ENTRY(pos, LINUX_AT_RANDOM, imgp->canary); 256 if (imgp->execpathp != 0) 257 AUXARGS_ENTRY(pos, LINUX_AT_EXECFN, imgp->execpathp); 258 if (args->execfd != -1) 259 AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd); 260 AUXARGS_ENTRY(pos, AT_NULL, 0); 261 262 free(imgp->auxargs, M_TEMP); 263 imgp->auxargs = NULL; 264 KASSERT((pos - argarray) / sizeof(*pos) <= LINUX_AT_COUNT, 265 ("Too many auxargs")); 266 267 error = copyout(argarray, auxbase, sizeof(*argarray) * LINUX_AT_COUNT); 268 free(argarray, M_TEMP); 269 if (error != 0) 270 return (error); 271 272 (*stack_base)--; 273 if (suword(*stack_base, (register_t)imgp->args->argc) == -1) 274 return (EFAULT); 275 return (0); 276 } 277 278 /* 279 * Copied from kern/kern_exec.c 280 */ 281 static register_t * 282 linux_copyout_strings(struct image_params *imgp) 283 { 284 int argc, envc; 285 char **vectp; 286 char *stringp, *destp; 287 register_t *stack_base; 288 struct ps_strings *arginfo; 289 char canary[LINUX_AT_RANDOM_LEN]; 290 size_t execpath_len; 291 struct proc *p; 292 293 /* Calculate string base and vector table pointers. */ 294 p = imgp->proc; 295 if (imgp->execpath != NULL && imgp->auxargs != NULL) 296 execpath_len = strlen(imgp->execpath) + 1; 297 else 298 execpath_len = 0; 299 arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings; 300 destp = (caddr_t)arginfo - SPARE_USRSPACE - linux_szplatform - 301 roundup(sizeof(canary), sizeof(char *)) - 302 roundup(execpath_len, sizeof(char *)) - 303 roundup(ARG_MAX - imgp->args->stringspace, sizeof(char *)); 304 305 /* Install LINUX_PLATFORM. */ 306 copyout(linux_kplatform, ((caddr_t)arginfo - linux_szplatform), 307 linux_szplatform); 308 309 if (execpath_len != 0) { 310 imgp->execpathp = (uintptr_t)arginfo - 311 linux_szplatform - execpath_len; 312 copyout(imgp->execpath, (void *)imgp->execpathp, execpath_len); 313 } 314 315 /* Prepare the canary for SSP. */ 316 arc4rand(canary, sizeof(canary), 0); 317 imgp->canary = (uintptr_t)arginfo - linux_szplatform - 318 roundup(execpath_len, sizeof(char *)) - 319 roundup(sizeof(canary), sizeof(char *)); 320 copyout(canary, (void *)imgp->canary, sizeof(canary)); 321 322 vectp = (char **)destp; 323 if (imgp->auxargs) { 324 /* 325 * Allocate room on the stack for the ELF auxargs 326 * array. It has LINUX_AT_COUNT entries. 327 */ 328 vectp -= howmany(LINUX_AT_COUNT * sizeof(Elf32_Auxinfo), 329 sizeof(*vectp)); 330 } 331 332 /* 333 * Allocate room for the argv[] and env vectors including the 334 * terminating NULL pointers. 335 */ 336 vectp -= imgp->args->argc + 1 + imgp->args->envc + 1; 337 338 /* vectp also becomes our initial stack base. */ 339 stack_base = (register_t *)vectp; 340 341 stringp = imgp->args->begin_argv; 342 argc = imgp->args->argc; 343 envc = imgp->args->envc; 344 345 /* Copy out strings - arguments and environment. */ 346 copyout(stringp, destp, ARG_MAX - imgp->args->stringspace); 347 348 /* Fill in "ps_strings" struct for ps, w, etc. */ 349 suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp); 350 suword(&arginfo->ps_nargvstr, argc); 351 352 /* Fill in argument portion of vector table. */ 353 for (; argc > 0; --argc) { 354 suword(vectp++, (long)(intptr_t)destp); 355 while (*stringp++ != 0) 356 destp++; 357 destp++; 358 } 359 360 /* A null vector table pointer separates the argp's from the envp's. */ 361 suword(vectp++, 0); 362 363 suword(&arginfo->ps_envstr, (long)(intptr_t)vectp); 364 suword(&arginfo->ps_nenvstr, envc); 365 366 /* Fill in environment portion of vector table. */ 367 for (; envc > 0; --envc) { 368 suword(vectp++, (long)(intptr_t)destp); 369 while (*stringp++ != 0) 370 destp++; 371 destp++; 372 } 373 374 /* The end of the vector table is a null pointer. */ 375 suword(vectp, 0); 376 377 return (stack_base); 378 } 379 380 static void 381 linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) 382 { 383 struct thread *td = curthread; 384 struct proc *p = td->td_proc; 385 struct sigacts *psp; 386 struct trapframe *regs; 387 struct l_rt_sigframe *fp, frame; 388 int sig, code; 389 int oonstack; 390 391 sig = ksi->ksi_signo; 392 code = ksi->ksi_code; 393 PROC_LOCK_ASSERT(p, MA_OWNED); 394 psp = p->p_sigacts; 395 mtx_assert(&psp->ps_mtx, MA_OWNED); 396 regs = td->td_frame; 397 oonstack = sigonstack(regs->tf_esp); 398 399 #ifdef DEBUG 400 if (ldebug(rt_sendsig)) 401 printf(ARGS(rt_sendsig, "%p, %d, %p, %u"), 402 catcher, sig, (void*)mask, code); 403 #endif 404 /* Allocate space for the signal handler context. */ 405 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && 406 SIGISMEMBER(psp->ps_sigonstack, sig)) { 407 fp = (struct l_rt_sigframe *)((uintptr_t)td->td_sigstk.ss_sp + 408 td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe)); 409 } else 410 fp = (struct l_rt_sigframe *)regs->tf_esp - 1; 411 mtx_unlock(&psp->ps_mtx); 412 413 /* Build the argument list for the signal handler. */ 414 sig = bsd_to_linux_signal(sig); 415 416 bzero(&frame, sizeof(frame)); 417 418 frame.sf_handler = catcher; 419 frame.sf_sig = sig; 420 frame.sf_siginfo = &fp->sf_si; 421 frame.sf_ucontext = &fp->sf_sc; 422 423 /* Fill in POSIX parts. */ 424 ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig); 425 426 /* Build the signal context to be used by sigreturn. */ 427 frame.sf_sc.uc_flags = 0; /* XXX ??? */ 428 frame.sf_sc.uc_link = NULL; /* XXX ??? */ 429 430 frame.sf_sc.uc_stack.ss_sp = td->td_sigstk.ss_sp; 431 frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size; 432 frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) 433 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE; 434 PROC_UNLOCK(p); 435 436 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask); 437 438 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__mask; 439 frame.sf_sc.uc_mcontext.sc_gs = rgs(); 440 frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs; 441 frame.sf_sc.uc_mcontext.sc_es = regs->tf_es; 442 frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds; 443 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_edi; 444 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_esi; 445 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_ebp; 446 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_ebx; 447 frame.sf_sc.uc_mcontext.sc_esp = regs->tf_esp; 448 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_edx; 449 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_ecx; 450 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_eax; 451 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_eip; 452 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs; 453 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags; 454 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp; 455 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss; 456 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err; 457 frame.sf_sc.uc_mcontext.sc_cr2 = (register_t)ksi->ksi_addr; 458 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code); 459 460 #ifdef DEBUG 461 if (ldebug(rt_sendsig)) 462 printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"), 463 frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp, 464 td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask); 465 #endif 466 467 if (copyout(&frame, fp, sizeof(frame)) != 0) { 468 /* 469 * Process has trashed its stack; give it an illegal 470 * instruction to halt it in its tracks. 471 */ 472 #ifdef DEBUG 473 if (ldebug(rt_sendsig)) 474 printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"), 475 fp, oonstack); 476 #endif 477 PROC_LOCK(p); 478 sigexit(td, SIGILL); 479 } 480 481 /* Build context to run handler in. */ 482 regs->tf_esp = (int)fp; 483 regs->tf_eip = linux_rt_sigcode; 484 regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D); 485 regs->tf_cs = _ucodesel; 486 regs->tf_ds = _udatasel; 487 regs->tf_es = _udatasel; 488 regs->tf_fs = _udatasel; 489 regs->tf_ss = _udatasel; 490 PROC_LOCK(p); 491 mtx_lock(&psp->ps_mtx); 492 } 493 494 495 /* 496 * Send an interrupt to process. 497 * 498 * Stack is set up to allow sigcode stored 499 * in u. to call routine, followed by kcall 500 * to sigreturn routine below. After sigreturn 501 * resets the signal mask, the stack, and the 502 * frame pointer, it returns to the user 503 * specified pc, psl. 504 */ 505 static void 506 linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) 507 { 508 struct thread *td = curthread; 509 struct proc *p = td->td_proc; 510 struct sigacts *psp; 511 struct trapframe *regs; 512 struct l_sigframe *fp, frame; 513 l_sigset_t lmask; 514 int sig, code; 515 int oonstack; 516 517 PROC_LOCK_ASSERT(p, MA_OWNED); 518 psp = p->p_sigacts; 519 sig = ksi->ksi_signo; 520 code = ksi->ksi_code; 521 mtx_assert(&psp->ps_mtx, MA_OWNED); 522 if (SIGISMEMBER(psp->ps_siginfo, sig)) { 523 /* Signal handler installed with SA_SIGINFO. */ 524 linux_rt_sendsig(catcher, ksi, mask); 525 return; 526 } 527 regs = td->td_frame; 528 oonstack = sigonstack(regs->tf_esp); 529 530 #ifdef DEBUG 531 if (ldebug(sendsig)) 532 printf(ARGS(sendsig, "%p, %d, %p, %u"), 533 catcher, sig, (void*)mask, code); 534 #endif 535 536 /* Allocate space for the signal handler context. */ 537 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && 538 SIGISMEMBER(psp->ps_sigonstack, sig)) { 539 fp = (struct l_sigframe *)((uintptr_t)td->td_sigstk.ss_sp + 540 td->td_sigstk.ss_size - sizeof(struct l_sigframe)); 541 } else 542 fp = (struct l_sigframe *)regs->tf_esp - 1; 543 mtx_unlock(&psp->ps_mtx); 544 PROC_UNLOCK(p); 545 546 /* Build the argument list for the signal handler. */ 547 sig = bsd_to_linux_signal(sig); 548 549 bzero(&frame, sizeof(frame)); 550 551 frame.sf_handler = catcher; 552 frame.sf_sig = sig; 553 554 bsd_to_linux_sigset(mask, &lmask); 555 556 /* Build the signal context to be used by sigreturn. */ 557 frame.sf_sc.sc_mask = lmask.__mask; 558 frame.sf_sc.sc_gs = rgs(); 559 frame.sf_sc.sc_fs = regs->tf_fs; 560 frame.sf_sc.sc_es = regs->tf_es; 561 frame.sf_sc.sc_ds = regs->tf_ds; 562 frame.sf_sc.sc_edi = regs->tf_edi; 563 frame.sf_sc.sc_esi = regs->tf_esi; 564 frame.sf_sc.sc_ebp = regs->tf_ebp; 565 frame.sf_sc.sc_ebx = regs->tf_ebx; 566 frame.sf_sc.sc_esp = regs->tf_esp; 567 frame.sf_sc.sc_edx = regs->tf_edx; 568 frame.sf_sc.sc_ecx = regs->tf_ecx; 569 frame.sf_sc.sc_eax = regs->tf_eax; 570 frame.sf_sc.sc_eip = regs->tf_eip; 571 frame.sf_sc.sc_cs = regs->tf_cs; 572 frame.sf_sc.sc_eflags = regs->tf_eflags; 573 frame.sf_sc.sc_esp_at_signal = regs->tf_esp; 574 frame.sf_sc.sc_ss = regs->tf_ss; 575 frame.sf_sc.sc_err = regs->tf_err; 576 frame.sf_sc.sc_cr2 = (register_t)ksi->ksi_addr; 577 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(ksi->ksi_trapno); 578 579 frame.sf_extramask[0] = lmask.__mask; 580 581 if (copyout(&frame, fp, sizeof(frame)) != 0) { 582 /* 583 * Process has trashed its stack; give it an illegal 584 * instruction to halt it in its tracks. 585 */ 586 PROC_LOCK(p); 587 sigexit(td, SIGILL); 588 } 589 590 /* Build context to run handler in. */ 591 regs->tf_esp = (int)fp; 592 regs->tf_eip = linux_sigcode; 593 regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D); 594 regs->tf_cs = _ucodesel; 595 regs->tf_ds = _udatasel; 596 regs->tf_es = _udatasel; 597 regs->tf_fs = _udatasel; 598 regs->tf_ss = _udatasel; 599 PROC_LOCK(p); 600 mtx_lock(&psp->ps_mtx); 601 } 602 603 /* 604 * System call to cleanup state after a signal 605 * has been taken. Reset signal mask and 606 * stack state from context left by sendsig (above). 607 * Return to previous pc and psl as specified by 608 * context left by sendsig. Check carefully to 609 * make sure that the user has not modified the 610 * psl to gain improper privileges or to cause 611 * a machine fault. 612 */ 613 int 614 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args) 615 { 616 struct l_sigframe frame; 617 struct trapframe *regs; 618 l_sigset_t lmask; 619 sigset_t bmask; 620 int eflags; 621 ksiginfo_t ksi; 622 623 regs = td->td_frame; 624 625 #ifdef DEBUG 626 if (ldebug(sigreturn)) 627 printf(ARGS(sigreturn, "%p"), (void *)args->sfp); 628 #endif 629 /* 630 * The trampoline code hands us the sigframe. 631 * It is unsafe to keep track of it ourselves, in the event that a 632 * program jumps out of a signal handler. 633 */ 634 if (copyin(args->sfp, &frame, sizeof(frame)) != 0) 635 return (EFAULT); 636 637 /* Check for security violations. */ 638 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 639 eflags = frame.sf_sc.sc_eflags; 640 if (!EFLAGS_SECURE(eflags, regs->tf_eflags)) 641 return (EINVAL); 642 643 /* 644 * Don't allow users to load a valid privileged %cs. Let the 645 * hardware check for invalid selectors, excess privilege in 646 * other selectors, invalid %eip's and invalid %esp's. 647 */ 648 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 649 if (!CS_SECURE(frame.sf_sc.sc_cs)) { 650 ksiginfo_init_trap(&ksi); 651 ksi.ksi_signo = SIGBUS; 652 ksi.ksi_code = BUS_OBJERR; 653 ksi.ksi_trapno = T_PROTFLT; 654 ksi.ksi_addr = (void *)regs->tf_eip; 655 trapsignal(td, &ksi); 656 return (EINVAL); 657 } 658 659 lmask.__mask = frame.sf_sc.sc_mask; 660 linux_to_bsd_sigset(&lmask, &bmask); 661 kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0); 662 663 /* Restore signal context. */ 664 /* %gs was restored by the trampoline. */ 665 regs->tf_fs = frame.sf_sc.sc_fs; 666 regs->tf_es = frame.sf_sc.sc_es; 667 regs->tf_ds = frame.sf_sc.sc_ds; 668 regs->tf_edi = frame.sf_sc.sc_edi; 669 regs->tf_esi = frame.sf_sc.sc_esi; 670 regs->tf_ebp = frame.sf_sc.sc_ebp; 671 regs->tf_ebx = frame.sf_sc.sc_ebx; 672 regs->tf_edx = frame.sf_sc.sc_edx; 673 regs->tf_ecx = frame.sf_sc.sc_ecx; 674 regs->tf_eax = frame.sf_sc.sc_eax; 675 regs->tf_eip = frame.sf_sc.sc_eip; 676 regs->tf_cs = frame.sf_sc.sc_cs; 677 regs->tf_eflags = eflags; 678 regs->tf_esp = frame.sf_sc.sc_esp_at_signal; 679 regs->tf_ss = frame.sf_sc.sc_ss; 680 681 return (EJUSTRETURN); 682 } 683 684 /* 685 * System call to cleanup state after a signal 686 * has been taken. Reset signal mask and 687 * stack state from context left by rt_sendsig (above). 688 * Return to previous pc and psl as specified by 689 * context left by sendsig. Check carefully to 690 * make sure that the user has not modified the 691 * psl to gain improper privileges or to cause 692 * a machine fault. 693 */ 694 int 695 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args) 696 { 697 struct l_ucontext uc; 698 struct l_sigcontext *context; 699 sigset_t bmask; 700 l_stack_t *lss; 701 stack_t ss; 702 struct trapframe *regs; 703 int eflags; 704 ksiginfo_t ksi; 705 706 regs = td->td_frame; 707 708 #ifdef DEBUG 709 if (ldebug(rt_sigreturn)) 710 printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp); 711 #endif 712 /* 713 * The trampoline code hands us the ucontext. 714 * It is unsafe to keep track of it ourselves, in the event that a 715 * program jumps out of a signal handler. 716 */ 717 if (copyin(args->ucp, &uc, sizeof(uc)) != 0) 718 return (EFAULT); 719 720 context = &uc.uc_mcontext; 721 722 /* Check for security violations. */ 723 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 724 eflags = context->sc_eflags; 725 if (!EFLAGS_SECURE(eflags, regs->tf_eflags)) 726 return (EINVAL); 727 728 /* 729 * Don't allow users to load a valid privileged %cs. Let the 730 * hardware check for invalid selectors, excess privilege in 731 * other selectors, invalid %eip's and invalid %esp's. 732 */ 733 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 734 if (!CS_SECURE(context->sc_cs)) { 735 ksiginfo_init_trap(&ksi); 736 ksi.ksi_signo = SIGBUS; 737 ksi.ksi_code = BUS_OBJERR; 738 ksi.ksi_trapno = T_PROTFLT; 739 ksi.ksi_addr = (void *)regs->tf_eip; 740 trapsignal(td, &ksi); 741 return (EINVAL); 742 } 743 744 linux_to_bsd_sigset(&uc.uc_sigmask, &bmask); 745 kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0); 746 747 /* Restore signal context. */ 748 /* %gs was restored by the trampoline. */ 749 regs->tf_fs = context->sc_fs; 750 regs->tf_es = context->sc_es; 751 regs->tf_ds = context->sc_ds; 752 regs->tf_edi = context->sc_edi; 753 regs->tf_esi = context->sc_esi; 754 regs->tf_ebp = context->sc_ebp; 755 regs->tf_ebx = context->sc_ebx; 756 regs->tf_edx = context->sc_edx; 757 regs->tf_ecx = context->sc_ecx; 758 regs->tf_eax = context->sc_eax; 759 regs->tf_eip = context->sc_eip; 760 regs->tf_cs = context->sc_cs; 761 regs->tf_eflags = eflags; 762 regs->tf_esp = context->sc_esp_at_signal; 763 regs->tf_ss = context->sc_ss; 764 765 /* Call sigaltstack & ignore results. */ 766 lss = &uc.uc_stack; 767 ss.ss_sp = lss->ss_sp; 768 ss.ss_size = lss->ss_size; 769 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags); 770 771 #ifdef DEBUG 772 if (ldebug(rt_sigreturn)) 773 printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"), 774 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask); 775 #endif 776 (void)kern_sigaltstack(td, &ss, NULL); 777 778 return (EJUSTRETURN); 779 } 780 781 static int 782 linux_fetch_syscall_args(struct thread *td) 783 { 784 struct proc *p; 785 struct trapframe *frame; 786 struct syscall_args *sa; 787 788 p = td->td_proc; 789 frame = td->td_frame; 790 sa = &td->td_sa; 791 792 sa->code = frame->tf_eax; 793 sa->args[0] = frame->tf_ebx; 794 sa->args[1] = frame->tf_ecx; 795 sa->args[2] = frame->tf_edx; 796 sa->args[3] = frame->tf_esi; 797 sa->args[4] = frame->tf_edi; 798 sa->args[5] = frame->tf_ebp; /* Unconfirmed */ 799 800 if (sa->code >= p->p_sysent->sv_size) 801 /* nosys */ 802 sa->callp = &p->p_sysent->sv_table[p->p_sysent->sv_size - 1]; 803 else 804 sa->callp = &p->p_sysent->sv_table[sa->code]; 805 sa->narg = sa->callp->sy_narg; 806 807 td->td_retval[0] = 0; 808 td->td_retval[1] = frame->tf_edx; 809 810 return (0); 811 } 812 813 /* 814 * exec_setregs may initialize some registers differently than Linux 815 * does, thus potentially confusing Linux binaries. If necessary, we 816 * override the exec_setregs default(s) here. 817 */ 818 static void 819 linux_exec_setregs(struct thread *td, struct image_params *imgp, u_long stack) 820 { 821 struct pcb *pcb = td->td_pcb; 822 823 exec_setregs(td, imgp, stack); 824 825 /* Linux sets %gs to 0, we default to _udatasel. */ 826 pcb->pcb_gs = 0; 827 load_gs(0); 828 829 pcb->pcb_initial_npxcw = __LINUX_NPXCW__; 830 } 831 832 static void 833 linux_get_machine(const char **dst) 834 { 835 836 switch (cpu_class) { 837 case CPUCLASS_686: 838 *dst = "i686"; 839 break; 840 case CPUCLASS_586: 841 *dst = "i586"; 842 break; 843 case CPUCLASS_486: 844 *dst = "i486"; 845 break; 846 default: 847 *dst = "i386"; 848 } 849 } 850 851 struct sysentvec linux_sysvec = { 852 .sv_size = LINUX_SYS_MAXSYSCALL, 853 .sv_table = linux_sysent, 854 .sv_mask = 0, 855 .sv_errsize = ELAST + 1, 856 .sv_errtbl = linux_errtbl, 857 .sv_transtrap = linux_translate_traps, 858 .sv_fixup = linux_fixup, 859 .sv_sendsig = linux_sendsig, 860 .sv_sigcode = &_binary_linux_locore_o_start, 861 .sv_szsigcode = &linux_szsigcode, 862 .sv_name = "Linux a.out", 863 .sv_coredump = NULL, 864 .sv_imgact_try = linux_exec_imgact_try, 865 .sv_minsigstksz = LINUX_MINSIGSTKSZ, 866 .sv_pagesize = PAGE_SIZE, 867 .sv_minuser = VM_MIN_ADDRESS, 868 .sv_maxuser = VM_MAXUSER_ADDRESS, 869 .sv_usrstack = LINUX_USRSTACK, 870 .sv_psstrings = PS_STRINGS, 871 .sv_stackprot = VM_PROT_ALL, 872 .sv_copyout_strings = exec_copyout_strings, 873 .sv_setregs = linux_exec_setregs, 874 .sv_fixlimit = NULL, 875 .sv_maxssiz = NULL, 876 .sv_flags = SV_ABI_LINUX | SV_AOUT | SV_IA32 | SV_ILP32, 877 .sv_set_syscall_retval = cpu_set_syscall_retval, 878 .sv_fetch_syscall_args = linux_fetch_syscall_args, 879 .sv_syscallnames = NULL, 880 .sv_shared_page_base = LINUX_SHAREDPAGE, 881 .sv_shared_page_len = PAGE_SIZE, 882 .sv_schedtail = linux_schedtail, 883 .sv_thread_detach = linux_thread_detach, 884 .sv_trap = NULL, 885 }; 886 INIT_SYSENTVEC(aout_sysvec, &linux_sysvec); 887 888 struct sysentvec elf_linux_sysvec = { 889 .sv_size = LINUX_SYS_MAXSYSCALL, 890 .sv_table = linux_sysent, 891 .sv_mask = 0, 892 .sv_errsize = ELAST + 1, 893 .sv_errtbl = linux_errtbl, 894 .sv_transtrap = linux_translate_traps, 895 .sv_fixup = linux_fixup_elf, 896 .sv_sendsig = linux_sendsig, 897 .sv_sigcode = &_binary_linux_locore_o_start, 898 .sv_szsigcode = &linux_szsigcode, 899 .sv_name = "Linux ELF", 900 .sv_coredump = elf32_coredump, 901 .sv_imgact_try = linux_exec_imgact_try, 902 .sv_minsigstksz = LINUX_MINSIGSTKSZ, 903 .sv_pagesize = PAGE_SIZE, 904 .sv_minuser = VM_MIN_ADDRESS, 905 .sv_maxuser = VM_MAXUSER_ADDRESS, 906 .sv_usrstack = LINUX_USRSTACK, 907 .sv_psstrings = LINUX_PS_STRINGS, 908 .sv_stackprot = VM_PROT_ALL, 909 .sv_copyout_strings = linux_copyout_strings, 910 .sv_setregs = linux_exec_setregs, 911 .sv_fixlimit = NULL, 912 .sv_maxssiz = NULL, 913 .sv_flags = SV_ABI_LINUX | SV_IA32 | SV_ILP32 | SV_SHP, 914 .sv_set_syscall_retval = cpu_set_syscall_retval, 915 .sv_fetch_syscall_args = linux_fetch_syscall_args, 916 .sv_syscallnames = NULL, 917 .sv_shared_page_base = LINUX_SHAREDPAGE, 918 .sv_shared_page_len = PAGE_SIZE, 919 .sv_schedtail = linux_schedtail, 920 .sv_thread_detach = linux_thread_detach, 921 .sv_trap = NULL, 922 }; 923 924 static void 925 linux_vdso_install(void *param) 926 { 927 928 linux_szsigcode = (&_binary_linux_locore_o_end - 929 &_binary_linux_locore_o_start); 930 931 if (linux_szsigcode > elf_linux_sysvec.sv_shared_page_len) 932 panic("Linux invalid vdso size\n"); 933 934 __elfN(linux_vdso_fixup)(&elf_linux_sysvec); 935 936 linux_shared_page_obj = __elfN(linux_shared_page_init) 937 (&linux_shared_page_mapping); 938 939 __elfN(linux_vdso_reloc)(&elf_linux_sysvec); 940 941 bcopy(elf_linux_sysvec.sv_sigcode, linux_shared_page_mapping, 942 linux_szsigcode); 943 elf_linux_sysvec.sv_shared_page_obj = linux_shared_page_obj; 944 } 945 SYSINIT(elf_linux_vdso_init, SI_SUB_EXEC, SI_ORDER_ANY, 946 linux_vdso_install, NULL); 947 948 static void 949 linux_vdso_deinstall(void *param) 950 { 951 952 __elfN(linux_shared_page_fini)(linux_shared_page_obj); 953 } 954 SYSUNINIT(elf_linux_vdso_uninit, SI_SUB_EXEC, SI_ORDER_FIRST, 955 linux_vdso_deinstall, NULL); 956 957 static char GNU_ABI_VENDOR[] = "GNU"; 958 static int GNULINUX_ABI_DESC = 0; 959 960 static bool 961 linux_trans_osrel(const Elf_Note *note, int32_t *osrel) 962 { 963 const Elf32_Word *desc; 964 uintptr_t p; 965 966 p = (uintptr_t)(note + 1); 967 p += roundup2(note->n_namesz, sizeof(Elf32_Addr)); 968 969 desc = (const Elf32_Word *)p; 970 if (desc[0] != GNULINUX_ABI_DESC) 971 return (false); 972 973 /* 974 * For Linux we encode osrel as follows (see linux_mib.c): 975 * VVVMMMIII (version, major, minor), see linux_mib.c. 976 */ 977 *osrel = desc[1] * 1000000 + desc[2] * 1000 + desc[3]; 978 979 return (true); 980 } 981 982 static Elf_Brandnote linux_brandnote = { 983 .hdr.n_namesz = sizeof(GNU_ABI_VENDOR), 984 .hdr.n_descsz = 16, /* XXX at least 16 */ 985 .hdr.n_type = 1, 986 .vendor = GNU_ABI_VENDOR, 987 .flags = BN_TRANSLATE_OSREL, 988 .trans_osrel = linux_trans_osrel 989 }; 990 991 static Elf32_Brandinfo linux_brand = { 992 .brand = ELFOSABI_LINUX, 993 .machine = EM_386, 994 .compat_3_brand = "Linux", 995 .emul_path = "/compat/linux", 996 .interp_path = "/lib/ld-linux.so.1", 997 .sysvec = &elf_linux_sysvec, 998 .interp_newpath = NULL, 999 .brand_note = &linux_brandnote, 1000 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE 1001 }; 1002 1003 static Elf32_Brandinfo linux_glibc2brand = { 1004 .brand = ELFOSABI_LINUX, 1005 .machine = EM_386, 1006 .compat_3_brand = "Linux", 1007 .emul_path = "/compat/linux", 1008 .interp_path = "/lib/ld-linux.so.2", 1009 .sysvec = &elf_linux_sysvec, 1010 .interp_newpath = NULL, 1011 .brand_note = &linux_brandnote, 1012 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE 1013 }; 1014 1015 static Elf32_Brandinfo linux_muslbrand = { 1016 .brand = ELFOSABI_LINUX, 1017 .machine = EM_386, 1018 .compat_3_brand = "Linux", 1019 .emul_path = "/compat/linux", 1020 .interp_path = "/lib/ld-musl-i386.so.1", 1021 .sysvec = &elf_linux_sysvec, 1022 .interp_newpath = NULL, 1023 .brand_note = &linux_brandnote, 1024 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE 1025 }; 1026 1027 Elf32_Brandinfo *linux_brandlist[] = { 1028 &linux_brand, 1029 &linux_glibc2brand, 1030 &linux_muslbrand, 1031 NULL 1032 }; 1033 1034 static int 1035 linux_elf_modevent(module_t mod, int type, void *data) 1036 { 1037 Elf32_Brandinfo **brandinfo; 1038 int error; 1039 struct linux_ioctl_handler **lihp; 1040 1041 error = 0; 1042 1043 switch(type) { 1044 case MOD_LOAD: 1045 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 1046 ++brandinfo) 1047 if (elf32_insert_brand_entry(*brandinfo) < 0) 1048 error = EINVAL; 1049 if (error == 0) { 1050 SET_FOREACH(lihp, linux_ioctl_handler_set) 1051 linux_ioctl_register_handler(*lihp); 1052 LIST_INIT(&futex_list); 1053 mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF); 1054 linux_exit_tag = EVENTHANDLER_REGISTER(process_exit, linux_proc_exit, 1055 NULL, 1000); 1056 linux_exec_tag = EVENTHANDLER_REGISTER(process_exec, linux_proc_exec, 1057 NULL, 1000); 1058 linux_thread_dtor_tag = EVENTHANDLER_REGISTER(thread_dtor, 1059 linux_thread_dtor, NULL, EVENTHANDLER_PRI_ANY); 1060 linux_get_machine(&linux_kplatform); 1061 linux_szplatform = roundup(strlen(linux_kplatform) + 1, 1062 sizeof(char *)); 1063 linux_osd_jail_register(); 1064 stclohz = (stathz ? stathz : hz); 1065 if (bootverbose) 1066 printf("Linux ELF exec handler installed\n"); 1067 } else 1068 printf("cannot insert Linux ELF brand handler\n"); 1069 break; 1070 case MOD_UNLOAD: 1071 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 1072 ++brandinfo) 1073 if (elf32_brand_inuse(*brandinfo)) 1074 error = EBUSY; 1075 if (error == 0) { 1076 for (brandinfo = &linux_brandlist[0]; 1077 *brandinfo != NULL; ++brandinfo) 1078 if (elf32_remove_brand_entry(*brandinfo) < 0) 1079 error = EINVAL; 1080 } 1081 if (error == 0) { 1082 SET_FOREACH(lihp, linux_ioctl_handler_set) 1083 linux_ioctl_unregister_handler(*lihp); 1084 mtx_destroy(&futex_mtx); 1085 EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag); 1086 EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag); 1087 EVENTHANDLER_DEREGISTER(thread_dtor, linux_thread_dtor_tag); 1088 linux_osd_jail_deregister(); 1089 if (bootverbose) 1090 printf("Linux ELF exec handler removed\n"); 1091 } else 1092 printf("Could not deinstall ELF interpreter entry\n"); 1093 break; 1094 default: 1095 return (EOPNOTSUPP); 1096 } 1097 return (error); 1098 } 1099 1100 static moduledata_t linux_elf_mod = { 1101 "linuxelf", 1102 linux_elf_modevent, 1103 0 1104 }; 1105 1106 DECLARE_MODULE_TIED(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY); 1107 FEATURE(linux, "Linux 32bit support"); 1108