1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 1994-1996 Søren Schmidt 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/exec.h> 35 #include <sys/fcntl.h> 36 #include <sys/imgact.h> 37 #include <sys/imgact_aout.h> 38 #include <sys/imgact_elf.h> 39 #include <sys/kernel.h> 40 #include <sys/lock.h> 41 #include <sys/malloc.h> 42 #include <sys/module.h> 43 #include <sys/mutex.h> 44 #include <sys/proc.h> 45 #include <sys/signalvar.h> 46 #include <sys/syscallsubr.h> 47 #include <sys/sysctl.h> 48 #include <sys/sysent.h> 49 #include <sys/sysproto.h> 50 #include <sys/vnode.h> 51 #include <sys/eventhandler.h> 52 53 #include <vm/vm.h> 54 #include <vm/pmap.h> 55 #include <vm/vm_extern.h> 56 #include <vm/vm_map.h> 57 #include <vm/vm_object.h> 58 #include <vm/vm_page.h> 59 #include <vm/vm_param.h> 60 61 #include <machine/cpu.h> 62 #include <machine/cputypes.h> 63 #include <machine/md_var.h> 64 #include <machine/pcb.h> 65 #include <machine/trap.h> 66 67 #include <i386/linux/linux.h> 68 #include <i386/linux/linux_proto.h> 69 #include <compat/linux/linux_emul.h> 70 #include <compat/linux/linux_ioctl.h> 71 #include <compat/linux/linux_mib.h> 72 #include <compat/linux/linux_misc.h> 73 #include <compat/linux/linux_signal.h> 74 #include <compat/linux/linux_util.h> 75 #include <compat/linux/linux_vdso.h> 76 77 MODULE_VERSION(linux, 1); 78 79 #define LINUX_PS_STRINGS (LINUX_USRSTACK - sizeof(struct ps_strings)) 80 81 static int linux_szsigcode; 82 static vm_object_t linux_shared_page_obj; 83 static char *linux_shared_page_mapping; 84 extern char _binary_linux_locore_o_start; 85 extern char _binary_linux_locore_o_end; 86 87 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL]; 88 89 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler); 90 91 static int linux_fixup(uintptr_t *stack_base, 92 struct image_params *iparams); 93 static int linux_fixup_elf(uintptr_t *stack_base, 94 struct image_params *iparams); 95 static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask); 96 static void linux_exec_setregs(struct thread *td, 97 struct image_params *imgp, uintptr_t stack); 98 static int linux_copyout_strings(struct image_params *imgp, 99 uintptr_t *stack_base); 100 static bool linux_trans_osrel(const Elf_Note *note, int32_t *osrel); 101 static void linux_vdso_install(void *param); 102 static void linux_vdso_deinstall(void *param); 103 104 static int linux_szplatform; 105 const char *linux_kplatform; 106 107 static eventhandler_tag linux_exit_tag; 108 static eventhandler_tag linux_exec_tag; 109 static eventhandler_tag linux_thread_dtor_tag; 110 111 #define LINUX_T_UNKNOWN 255 112 static int _bsd_to_linux_trapcode[] = { 113 LINUX_T_UNKNOWN, /* 0 */ 114 6, /* 1 T_PRIVINFLT */ 115 LINUX_T_UNKNOWN, /* 2 */ 116 3, /* 3 T_BPTFLT */ 117 LINUX_T_UNKNOWN, /* 4 */ 118 LINUX_T_UNKNOWN, /* 5 */ 119 16, /* 6 T_ARITHTRAP */ 120 254, /* 7 T_ASTFLT */ 121 LINUX_T_UNKNOWN, /* 8 */ 122 13, /* 9 T_PROTFLT */ 123 1, /* 10 T_TRCTRAP */ 124 LINUX_T_UNKNOWN, /* 11 */ 125 14, /* 12 T_PAGEFLT */ 126 LINUX_T_UNKNOWN, /* 13 */ 127 17, /* 14 T_ALIGNFLT */ 128 LINUX_T_UNKNOWN, /* 15 */ 129 LINUX_T_UNKNOWN, /* 16 */ 130 LINUX_T_UNKNOWN, /* 17 */ 131 0, /* 18 T_DIVIDE */ 132 2, /* 19 T_NMI */ 133 4, /* 20 T_OFLOW */ 134 5, /* 21 T_BOUND */ 135 7, /* 22 T_DNA */ 136 8, /* 23 T_DOUBLEFLT */ 137 9, /* 24 T_FPOPFLT */ 138 10, /* 25 T_TSSFLT */ 139 11, /* 26 T_SEGNPFLT */ 140 12, /* 27 T_STKFLT */ 141 18, /* 28 T_MCHK */ 142 19, /* 29 T_XMMFLT */ 143 15 /* 30 T_RESERVED */ 144 }; 145 #define bsd_to_linux_trapcode(code) \ 146 ((code)<nitems(_bsd_to_linux_trapcode)? \ 147 _bsd_to_linux_trapcode[(code)]: \ 148 LINUX_T_UNKNOWN) 149 150 LINUX_VDSO_SYM_INTPTR(linux_sigcode); 151 LINUX_VDSO_SYM_INTPTR(linux_rt_sigcode); 152 LINUX_VDSO_SYM_INTPTR(linux_vsyscall); 153 154 /* 155 * If FreeBSD & Linux have a difference of opinion about what a trap 156 * means, deal with it here. 157 * 158 * MPSAFE 159 */ 160 static int 161 linux_translate_traps(int signal, int trap_code) 162 { 163 if (signal != SIGBUS) 164 return (signal); 165 switch (trap_code) { 166 case T_PROTFLT: 167 case T_TSSFLT: 168 case T_DOUBLEFLT: 169 case T_PAGEFLT: 170 return (SIGSEGV); 171 default: 172 return (signal); 173 } 174 } 175 176 static int 177 linux_fixup(uintptr_t *stack_base, struct image_params *imgp) 178 { 179 register_t *base, *argv, *envp; 180 181 base = (register_t *)*stack_base; 182 argv = base; 183 envp = base + (imgp->args->argc + 1); 184 base--; 185 suword(base, (intptr_t)envp); 186 base--; 187 suword(base, (intptr_t)argv); 188 base--; 189 suword(base, imgp->args->argc); 190 *stack_base = (uintptr_t)base; 191 return (0); 192 } 193 194 static int 195 linux_copyout_auxargs(struct image_params *imgp, uintptr_t *base) 196 { 197 struct proc *p; 198 Elf32_Auxargs *args; 199 Elf32_Auxinfo *argarray, *pos; 200 Elf32_Addr *uplatform; 201 struct ps_strings *arginfo; 202 u_long auxlen; 203 int error, issetugid; 204 205 p = imgp->proc; 206 issetugid = imgp->proc->p_flag & P_SUGID ? 1 : 0; 207 arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings; 208 uplatform = (Elf32_Addr *)((caddr_t)arginfo - linux_szplatform); 209 args = (Elf32_Auxargs *)imgp->auxargs; 210 argarray = pos = malloc(LINUX_AT_COUNT * sizeof(*pos), M_TEMP, 211 M_WAITOK | M_ZERO); 212 213 AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO_EHDR, 214 imgp->proc->p_sysent->sv_shared_page_base); 215 AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO, linux_vsyscall); 216 AUXARGS_ENTRY(pos, LINUX_AT_HWCAP, cpu_feature); 217 218 /* 219 * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0, 220 * as it has appeared in the 2.4.0-rc7 first time. 221 * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK), 222 * glibc falls back to the hard-coded CLK_TCK value when aux entry 223 * is not present. 224 * Also see linux_times() implementation. 225 */ 226 if (linux_kernver(curthread) >= LINUX_KERNVER_2004000) 227 AUXARGS_ENTRY(pos, LINUX_AT_CLKTCK, stclohz); 228 AUXARGS_ENTRY(pos, AT_PHDR, args->phdr); 229 AUXARGS_ENTRY(pos, AT_PHENT, args->phent); 230 AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum); 231 AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz); 232 AUXARGS_ENTRY(pos, AT_FLAGS, args->flags); 233 AUXARGS_ENTRY(pos, AT_ENTRY, args->entry); 234 AUXARGS_ENTRY(pos, AT_BASE, args->base); 235 AUXARGS_ENTRY(pos, LINUX_AT_SECURE, issetugid); 236 AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid); 237 AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid); 238 AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid); 239 AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid); 240 AUXARGS_ENTRY(pos, LINUX_AT_PLATFORM, PTROUT(uplatform)); 241 AUXARGS_ENTRY(pos, LINUX_AT_RANDOM, imgp->canary); 242 if (imgp->execpathp != 0) 243 AUXARGS_ENTRY(pos, LINUX_AT_EXECFN, imgp->execpathp); 244 if (args->execfd != -1) 245 AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd); 246 AUXARGS_ENTRY(pos, AT_NULL, 0); 247 248 free(imgp->auxargs, M_TEMP); 249 imgp->auxargs = NULL; 250 KASSERT(pos - argarray <= LINUX_AT_COUNT, ("Too many auxargs")); 251 252 auxlen = sizeof(*argarray) * (pos - argarray); 253 *base -= auxlen; 254 error = copyout(argarray, (void *)*base, auxlen); 255 free(argarray, M_TEMP); 256 return (error); 257 } 258 259 static int 260 linux_fixup_elf(uintptr_t *stack_base, struct image_params *imgp) 261 { 262 register_t *base; 263 264 base = (register_t *)*stack_base; 265 base--; 266 if (suword(base, (register_t)imgp->args->argc) == -1) 267 return (EFAULT); 268 *stack_base = (uintptr_t)base; 269 return (0); 270 } 271 272 /* 273 * Copied from kern/kern_exec.c 274 */ 275 static int 276 linux_copyout_strings(struct image_params *imgp, uintptr_t *stack_base) 277 { 278 int argc, envc, error; 279 char **vectp; 280 char *stringp; 281 uintptr_t destp, ustringp; 282 struct ps_strings *arginfo; 283 char canary[LINUX_AT_RANDOM_LEN]; 284 size_t execpath_len; 285 struct proc *p; 286 287 /* Calculate string base and vector table pointers. */ 288 p = imgp->proc; 289 if (imgp->execpath != NULL && imgp->auxargs != NULL) 290 execpath_len = strlen(imgp->execpath) + 1; 291 else 292 execpath_len = 0; 293 arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings; 294 destp = (uintptr_t)arginfo; 295 296 /* Install LINUX_PLATFORM. */ 297 destp -= linux_szplatform; 298 destp = rounddown2(destp, sizeof(void *)); 299 error = copyout(linux_kplatform, (void *)destp, linux_szplatform); 300 if (error != 0) 301 return (error); 302 303 if (execpath_len != 0) { 304 destp -= execpath_len; 305 destp = rounddown2(destp, sizeof(void *)); 306 imgp->execpathp = destp; 307 error = copyout(imgp->execpath, (void *)destp, execpath_len); 308 if (error != 0) 309 return (error); 310 } 311 312 /* Prepare the canary for SSP. */ 313 arc4rand(canary, sizeof(canary), 0); 314 destp -= roundup(sizeof(canary), sizeof(void *)); 315 imgp->canary = destp; 316 error = copyout(canary, (void *)destp, sizeof(canary)); 317 if (error != 0) 318 return (error); 319 320 /* Allocate room for the argument and environment strings. */ 321 destp -= ARG_MAX - imgp->args->stringspace; 322 destp = rounddown2(destp, sizeof(void *)); 323 ustringp = destp; 324 325 if (imgp->auxargs) { 326 error = imgp->sysent->sv_copyout_auxargs(imgp, &destp); 327 if (error != 0) 328 return (error); 329 } 330 331 vectp = (char **)destp; 332 333 /* 334 * Allocate room for the argv[] and env vectors including the 335 * terminating NULL pointers. 336 */ 337 vectp -= imgp->args->argc + 1 + imgp->args->envc + 1; 338 339 /* vectp also becomes our initial stack base. */ 340 *stack_base = (uintptr_t)vectp; 341 342 stringp = imgp->args->begin_argv; 343 argc = imgp->args->argc; 344 envc = imgp->args->envc; 345 346 /* Copy out strings - arguments and environment. */ 347 error = copyout(stringp, (void *)ustringp, 348 ARG_MAX - imgp->args->stringspace); 349 if (error != 0) 350 return (error); 351 352 /* Fill in "ps_strings" struct for ps, w, etc. */ 353 if (suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp) != 0 || 354 suword(&arginfo->ps_nargvstr, argc) != 0) 355 return (EFAULT); 356 357 /* Fill in argument portion of vector table. */ 358 for (; argc > 0; --argc) { 359 if (suword(vectp++, ustringp) != 0) 360 return (EFAULT); 361 while (*stringp++ != 0) 362 ustringp++; 363 ustringp++; 364 } 365 366 /* A null vector table pointer separates the argp's from the envp's. */ 367 if (suword(vectp++, 0) != 0) 368 return (EFAULT); 369 370 if (suword(&arginfo->ps_envstr, (long)(intptr_t)vectp) != 0 || 371 suword(&arginfo->ps_nenvstr, envc) != 0) 372 return (EFAULT); 373 374 /* Fill in environment portion of vector table. */ 375 for (; envc > 0; --envc) { 376 if (suword(vectp++, ustringp) != 0) 377 return (EFAULT); 378 while (*stringp++ != 0) 379 ustringp++; 380 ustringp++; 381 } 382 383 /* The end of the vector table is a null pointer. */ 384 if (suword(vectp, 0) != 0) 385 return (EFAULT); 386 387 return (0); 388 } 389 390 static void 391 linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) 392 { 393 struct thread *td = curthread; 394 struct proc *p = td->td_proc; 395 struct sigacts *psp; 396 struct trapframe *regs; 397 struct l_rt_sigframe *fp, frame; 398 int sig, code; 399 int oonstack; 400 401 sig = ksi->ksi_signo; 402 code = ksi->ksi_code; 403 PROC_LOCK_ASSERT(p, MA_OWNED); 404 psp = p->p_sigacts; 405 mtx_assert(&psp->ps_mtx, MA_OWNED); 406 regs = td->td_frame; 407 oonstack = sigonstack(regs->tf_esp); 408 409 /* Allocate space for the signal handler context. */ 410 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && 411 SIGISMEMBER(psp->ps_sigonstack, sig)) { 412 fp = (struct l_rt_sigframe *)((uintptr_t)td->td_sigstk.ss_sp + 413 td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe)); 414 } else 415 fp = (struct l_rt_sigframe *)regs->tf_esp - 1; 416 mtx_unlock(&psp->ps_mtx); 417 418 /* Build the argument list for the signal handler. */ 419 sig = bsd_to_linux_signal(sig); 420 421 bzero(&frame, sizeof(frame)); 422 423 frame.sf_handler = catcher; 424 frame.sf_sig = sig; 425 frame.sf_siginfo = &fp->sf_si; 426 frame.sf_ucontext = &fp->sf_sc; 427 428 /* Fill in POSIX parts. */ 429 ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig); 430 431 /* Build the signal context to be used by sigreturn. */ 432 frame.sf_sc.uc_flags = 0; /* XXX ??? */ 433 frame.sf_sc.uc_link = NULL; /* XXX ??? */ 434 435 frame.sf_sc.uc_stack.ss_sp = td->td_sigstk.ss_sp; 436 frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size; 437 frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) 438 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE; 439 PROC_UNLOCK(p); 440 441 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask); 442 443 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__mask; 444 frame.sf_sc.uc_mcontext.sc_gs = rgs(); 445 frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs; 446 frame.sf_sc.uc_mcontext.sc_es = regs->tf_es; 447 frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds; 448 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_edi; 449 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_esi; 450 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_ebp; 451 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_ebx; 452 frame.sf_sc.uc_mcontext.sc_esp = regs->tf_esp; 453 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_edx; 454 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_ecx; 455 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_eax; 456 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_eip; 457 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs; 458 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags; 459 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp; 460 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss; 461 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err; 462 frame.sf_sc.uc_mcontext.sc_cr2 = (register_t)ksi->ksi_addr; 463 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code); 464 465 if (copyout(&frame, fp, sizeof(frame)) != 0) { 466 /* 467 * Process has trashed its stack; give it an illegal 468 * instruction to halt it in its tracks. 469 */ 470 PROC_LOCK(p); 471 sigexit(td, SIGILL); 472 } 473 474 /* Build context to run handler in. */ 475 regs->tf_esp = (int)fp; 476 regs->tf_eip = linux_rt_sigcode; 477 regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D); 478 regs->tf_cs = _ucodesel; 479 regs->tf_ds = _udatasel; 480 regs->tf_es = _udatasel; 481 regs->tf_fs = _udatasel; 482 regs->tf_ss = _udatasel; 483 PROC_LOCK(p); 484 mtx_lock(&psp->ps_mtx); 485 } 486 487 488 /* 489 * Send an interrupt to process. 490 * 491 * Stack is set up to allow sigcode stored 492 * in u. to call routine, followed by kcall 493 * to sigreturn routine below. After sigreturn 494 * resets the signal mask, the stack, and the 495 * frame pointer, it returns to the user 496 * specified pc, psl. 497 */ 498 static void 499 linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) 500 { 501 struct thread *td = curthread; 502 struct proc *p = td->td_proc; 503 struct sigacts *psp; 504 struct trapframe *regs; 505 struct l_sigframe *fp, frame; 506 l_sigset_t lmask; 507 int sig, code; 508 int oonstack; 509 510 PROC_LOCK_ASSERT(p, MA_OWNED); 511 psp = p->p_sigacts; 512 sig = ksi->ksi_signo; 513 code = ksi->ksi_code; 514 mtx_assert(&psp->ps_mtx, MA_OWNED); 515 if (SIGISMEMBER(psp->ps_siginfo, sig)) { 516 /* Signal handler installed with SA_SIGINFO. */ 517 linux_rt_sendsig(catcher, ksi, mask); 518 return; 519 } 520 regs = td->td_frame; 521 oonstack = sigonstack(regs->tf_esp); 522 523 /* Allocate space for the signal handler context. */ 524 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && 525 SIGISMEMBER(psp->ps_sigonstack, sig)) { 526 fp = (struct l_sigframe *)((uintptr_t)td->td_sigstk.ss_sp + 527 td->td_sigstk.ss_size - sizeof(struct l_sigframe)); 528 } else 529 fp = (struct l_sigframe *)regs->tf_esp - 1; 530 mtx_unlock(&psp->ps_mtx); 531 PROC_UNLOCK(p); 532 533 /* Build the argument list for the signal handler. */ 534 sig = bsd_to_linux_signal(sig); 535 536 bzero(&frame, sizeof(frame)); 537 538 frame.sf_handler = catcher; 539 frame.sf_sig = sig; 540 541 bsd_to_linux_sigset(mask, &lmask); 542 543 /* Build the signal context to be used by sigreturn. */ 544 frame.sf_sc.sc_mask = lmask.__mask; 545 frame.sf_sc.sc_gs = rgs(); 546 frame.sf_sc.sc_fs = regs->tf_fs; 547 frame.sf_sc.sc_es = regs->tf_es; 548 frame.sf_sc.sc_ds = regs->tf_ds; 549 frame.sf_sc.sc_edi = regs->tf_edi; 550 frame.sf_sc.sc_esi = regs->tf_esi; 551 frame.sf_sc.sc_ebp = regs->tf_ebp; 552 frame.sf_sc.sc_ebx = regs->tf_ebx; 553 frame.sf_sc.sc_esp = regs->tf_esp; 554 frame.sf_sc.sc_edx = regs->tf_edx; 555 frame.sf_sc.sc_ecx = regs->tf_ecx; 556 frame.sf_sc.sc_eax = regs->tf_eax; 557 frame.sf_sc.sc_eip = regs->tf_eip; 558 frame.sf_sc.sc_cs = regs->tf_cs; 559 frame.sf_sc.sc_eflags = regs->tf_eflags; 560 frame.sf_sc.sc_esp_at_signal = regs->tf_esp; 561 frame.sf_sc.sc_ss = regs->tf_ss; 562 frame.sf_sc.sc_err = regs->tf_err; 563 frame.sf_sc.sc_cr2 = (register_t)ksi->ksi_addr; 564 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(ksi->ksi_trapno); 565 566 frame.sf_extramask[0] = lmask.__mask; 567 568 if (copyout(&frame, fp, sizeof(frame)) != 0) { 569 /* 570 * Process has trashed its stack; give it an illegal 571 * instruction to halt it in its tracks. 572 */ 573 PROC_LOCK(p); 574 sigexit(td, SIGILL); 575 } 576 577 /* Build context to run handler in. */ 578 regs->tf_esp = (int)fp; 579 regs->tf_eip = linux_sigcode; 580 regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D); 581 regs->tf_cs = _ucodesel; 582 regs->tf_ds = _udatasel; 583 regs->tf_es = _udatasel; 584 regs->tf_fs = _udatasel; 585 regs->tf_ss = _udatasel; 586 PROC_LOCK(p); 587 mtx_lock(&psp->ps_mtx); 588 } 589 590 /* 591 * System call to cleanup state after a signal 592 * has been taken. Reset signal mask and 593 * stack state from context left by sendsig (above). 594 * Return to previous pc and psl as specified by 595 * context left by sendsig. Check carefully to 596 * make sure that the user has not modified the 597 * psl to gain improper privileges or to cause 598 * a machine fault. 599 */ 600 int 601 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args) 602 { 603 struct l_sigframe frame; 604 struct trapframe *regs; 605 l_sigset_t lmask; 606 sigset_t bmask; 607 int eflags; 608 ksiginfo_t ksi; 609 610 regs = td->td_frame; 611 612 /* 613 * The trampoline code hands us the sigframe. 614 * It is unsafe to keep track of it ourselves, in the event that a 615 * program jumps out of a signal handler. 616 */ 617 if (copyin(args->sfp, &frame, sizeof(frame)) != 0) 618 return (EFAULT); 619 620 /* Check for security violations. */ 621 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 622 eflags = frame.sf_sc.sc_eflags; 623 if (!EFLAGS_SECURE(eflags, regs->tf_eflags)) 624 return (EINVAL); 625 626 /* 627 * Don't allow users to load a valid privileged %cs. Let the 628 * hardware check for invalid selectors, excess privilege in 629 * other selectors, invalid %eip's and invalid %esp's. 630 */ 631 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 632 if (!CS_SECURE(frame.sf_sc.sc_cs)) { 633 ksiginfo_init_trap(&ksi); 634 ksi.ksi_signo = SIGBUS; 635 ksi.ksi_code = BUS_OBJERR; 636 ksi.ksi_trapno = T_PROTFLT; 637 ksi.ksi_addr = (void *)regs->tf_eip; 638 trapsignal(td, &ksi); 639 return (EINVAL); 640 } 641 642 lmask.__mask = frame.sf_sc.sc_mask; 643 linux_to_bsd_sigset(&lmask, &bmask); 644 kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0); 645 646 /* Restore signal context. */ 647 /* %gs was restored by the trampoline. */ 648 regs->tf_fs = frame.sf_sc.sc_fs; 649 regs->tf_es = frame.sf_sc.sc_es; 650 regs->tf_ds = frame.sf_sc.sc_ds; 651 regs->tf_edi = frame.sf_sc.sc_edi; 652 regs->tf_esi = frame.sf_sc.sc_esi; 653 regs->tf_ebp = frame.sf_sc.sc_ebp; 654 regs->tf_ebx = frame.sf_sc.sc_ebx; 655 regs->tf_edx = frame.sf_sc.sc_edx; 656 regs->tf_ecx = frame.sf_sc.sc_ecx; 657 regs->tf_eax = frame.sf_sc.sc_eax; 658 regs->tf_eip = frame.sf_sc.sc_eip; 659 regs->tf_cs = frame.sf_sc.sc_cs; 660 regs->tf_eflags = eflags; 661 regs->tf_esp = frame.sf_sc.sc_esp_at_signal; 662 regs->tf_ss = frame.sf_sc.sc_ss; 663 664 return (EJUSTRETURN); 665 } 666 667 /* 668 * System call to cleanup state after a signal 669 * has been taken. Reset signal mask and 670 * stack state from context left by rt_sendsig (above). 671 * Return to previous pc and psl as specified by 672 * context left by sendsig. Check carefully to 673 * make sure that the user has not modified the 674 * psl to gain improper privileges or to cause 675 * a machine fault. 676 */ 677 int 678 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args) 679 { 680 struct l_ucontext uc; 681 struct l_sigcontext *context; 682 sigset_t bmask; 683 l_stack_t *lss; 684 stack_t ss; 685 struct trapframe *regs; 686 int eflags; 687 ksiginfo_t ksi; 688 689 regs = td->td_frame; 690 691 /* 692 * The trampoline code hands us the ucontext. 693 * It is unsafe to keep track of it ourselves, in the event that a 694 * program jumps out of a signal handler. 695 */ 696 if (copyin(args->ucp, &uc, sizeof(uc)) != 0) 697 return (EFAULT); 698 699 context = &uc.uc_mcontext; 700 701 /* Check for security violations. */ 702 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 703 eflags = context->sc_eflags; 704 if (!EFLAGS_SECURE(eflags, regs->tf_eflags)) 705 return (EINVAL); 706 707 /* 708 * Don't allow users to load a valid privileged %cs. Let the 709 * hardware check for invalid selectors, excess privilege in 710 * other selectors, invalid %eip's and invalid %esp's. 711 */ 712 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 713 if (!CS_SECURE(context->sc_cs)) { 714 ksiginfo_init_trap(&ksi); 715 ksi.ksi_signo = SIGBUS; 716 ksi.ksi_code = BUS_OBJERR; 717 ksi.ksi_trapno = T_PROTFLT; 718 ksi.ksi_addr = (void *)regs->tf_eip; 719 trapsignal(td, &ksi); 720 return (EINVAL); 721 } 722 723 linux_to_bsd_sigset(&uc.uc_sigmask, &bmask); 724 kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0); 725 726 /* Restore signal context. */ 727 /* %gs was restored by the trampoline. */ 728 regs->tf_fs = context->sc_fs; 729 regs->tf_es = context->sc_es; 730 regs->tf_ds = context->sc_ds; 731 regs->tf_edi = context->sc_edi; 732 regs->tf_esi = context->sc_esi; 733 regs->tf_ebp = context->sc_ebp; 734 regs->tf_ebx = context->sc_ebx; 735 regs->tf_edx = context->sc_edx; 736 regs->tf_ecx = context->sc_ecx; 737 regs->tf_eax = context->sc_eax; 738 regs->tf_eip = context->sc_eip; 739 regs->tf_cs = context->sc_cs; 740 regs->tf_eflags = eflags; 741 regs->tf_esp = context->sc_esp_at_signal; 742 regs->tf_ss = context->sc_ss; 743 744 /* Call sigaltstack & ignore results. */ 745 lss = &uc.uc_stack; 746 ss.ss_sp = lss->ss_sp; 747 ss.ss_size = lss->ss_size; 748 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags); 749 750 (void)kern_sigaltstack(td, &ss, NULL); 751 752 return (EJUSTRETURN); 753 } 754 755 static int 756 linux_fetch_syscall_args(struct thread *td) 757 { 758 struct proc *p; 759 struct trapframe *frame; 760 struct syscall_args *sa; 761 762 p = td->td_proc; 763 frame = td->td_frame; 764 sa = &td->td_sa; 765 766 sa->code = frame->tf_eax; 767 sa->args[0] = frame->tf_ebx; 768 sa->args[1] = frame->tf_ecx; 769 sa->args[2] = frame->tf_edx; 770 sa->args[3] = frame->tf_esi; 771 sa->args[4] = frame->tf_edi; 772 sa->args[5] = frame->tf_ebp; /* Unconfirmed */ 773 774 if (sa->code >= p->p_sysent->sv_size) 775 /* nosys */ 776 sa->callp = &p->p_sysent->sv_table[p->p_sysent->sv_size - 1]; 777 else 778 sa->callp = &p->p_sysent->sv_table[sa->code]; 779 sa->narg = sa->callp->sy_narg; 780 781 td->td_retval[0] = 0; 782 td->td_retval[1] = frame->tf_edx; 783 784 return (0); 785 } 786 787 /* 788 * exec_setregs may initialize some registers differently than Linux 789 * does, thus potentially confusing Linux binaries. If necessary, we 790 * override the exec_setregs default(s) here. 791 */ 792 static void 793 linux_exec_setregs(struct thread *td, struct image_params *imgp, 794 uintptr_t stack) 795 { 796 struct pcb *pcb = td->td_pcb; 797 798 exec_setregs(td, imgp, stack); 799 800 /* Linux sets %gs to 0, we default to _udatasel. */ 801 pcb->pcb_gs = 0; 802 load_gs(0); 803 804 pcb->pcb_initial_npxcw = __LINUX_NPXCW__; 805 } 806 807 static void 808 linux_get_machine(const char **dst) 809 { 810 811 switch (cpu_class) { 812 case CPUCLASS_686: 813 *dst = "i686"; 814 break; 815 case CPUCLASS_586: 816 *dst = "i586"; 817 break; 818 case CPUCLASS_486: 819 *dst = "i486"; 820 break; 821 default: 822 *dst = "i386"; 823 } 824 } 825 826 struct sysentvec linux_sysvec = { 827 .sv_size = LINUX_SYS_MAXSYSCALL, 828 .sv_table = linux_sysent, 829 .sv_errsize = ELAST + 1, 830 .sv_errtbl = linux_errtbl, 831 .sv_transtrap = linux_translate_traps, 832 .sv_fixup = linux_fixup, 833 .sv_sendsig = linux_sendsig, 834 .sv_sigcode = &_binary_linux_locore_o_start, 835 .sv_szsigcode = &linux_szsigcode, 836 .sv_name = "Linux a.out", 837 .sv_coredump = NULL, 838 .sv_imgact_try = linux_exec_imgact_try, 839 .sv_minsigstksz = LINUX_MINSIGSTKSZ, 840 .sv_minuser = VM_MIN_ADDRESS, 841 .sv_maxuser = VM_MAXUSER_ADDRESS, 842 .sv_usrstack = LINUX_USRSTACK, 843 .sv_psstrings = PS_STRINGS, 844 .sv_stackprot = VM_PROT_ALL, 845 .sv_copyout_strings = exec_copyout_strings, 846 .sv_setregs = linux_exec_setregs, 847 .sv_fixlimit = NULL, 848 .sv_maxssiz = NULL, 849 .sv_flags = SV_ABI_LINUX | SV_AOUT | SV_IA32 | SV_ILP32, 850 .sv_set_syscall_retval = cpu_set_syscall_retval, 851 .sv_fetch_syscall_args = linux_fetch_syscall_args, 852 .sv_syscallnames = NULL, 853 .sv_shared_page_base = LINUX_SHAREDPAGE, 854 .sv_shared_page_len = PAGE_SIZE, 855 .sv_schedtail = linux_schedtail, 856 .sv_thread_detach = linux_thread_detach, 857 .sv_trap = NULL, 858 }; 859 INIT_SYSENTVEC(aout_sysvec, &linux_sysvec); 860 861 struct sysentvec elf_linux_sysvec = { 862 .sv_size = LINUX_SYS_MAXSYSCALL, 863 .sv_table = linux_sysent, 864 .sv_errsize = ELAST + 1, 865 .sv_errtbl = linux_errtbl, 866 .sv_transtrap = linux_translate_traps, 867 .sv_fixup = linux_fixup_elf, 868 .sv_sendsig = linux_sendsig, 869 .sv_sigcode = &_binary_linux_locore_o_start, 870 .sv_szsigcode = &linux_szsigcode, 871 .sv_name = "Linux ELF", 872 .sv_coredump = elf32_coredump, 873 .sv_imgact_try = linux_exec_imgact_try, 874 .sv_minsigstksz = LINUX_MINSIGSTKSZ, 875 .sv_minuser = VM_MIN_ADDRESS, 876 .sv_maxuser = VM_MAXUSER_ADDRESS, 877 .sv_usrstack = LINUX_USRSTACK, 878 .sv_psstrings = LINUX_PS_STRINGS, 879 .sv_stackprot = VM_PROT_ALL, 880 .sv_copyout_auxargs = linux_copyout_auxargs, 881 .sv_copyout_strings = linux_copyout_strings, 882 .sv_setregs = linux_exec_setregs, 883 .sv_fixlimit = NULL, 884 .sv_maxssiz = NULL, 885 .sv_flags = SV_ABI_LINUX | SV_IA32 | SV_ILP32 | SV_SHP, 886 .sv_set_syscall_retval = cpu_set_syscall_retval, 887 .sv_fetch_syscall_args = linux_fetch_syscall_args, 888 .sv_syscallnames = NULL, 889 .sv_shared_page_base = LINUX_SHAREDPAGE, 890 .sv_shared_page_len = PAGE_SIZE, 891 .sv_schedtail = linux_schedtail, 892 .sv_thread_detach = linux_thread_detach, 893 .sv_trap = NULL, 894 }; 895 896 static void 897 linux_vdso_install(void *param) 898 { 899 900 linux_szsigcode = (&_binary_linux_locore_o_end - 901 &_binary_linux_locore_o_start); 902 903 if (linux_szsigcode > elf_linux_sysvec.sv_shared_page_len) 904 panic("Linux invalid vdso size\n"); 905 906 __elfN(linux_vdso_fixup)(&elf_linux_sysvec); 907 908 linux_shared_page_obj = __elfN(linux_shared_page_init) 909 (&linux_shared_page_mapping); 910 911 __elfN(linux_vdso_reloc)(&elf_linux_sysvec); 912 913 bcopy(elf_linux_sysvec.sv_sigcode, linux_shared_page_mapping, 914 linux_szsigcode); 915 elf_linux_sysvec.sv_shared_page_obj = linux_shared_page_obj; 916 } 917 SYSINIT(elf_linux_vdso_init, SI_SUB_EXEC, SI_ORDER_ANY, 918 linux_vdso_install, NULL); 919 920 static void 921 linux_vdso_deinstall(void *param) 922 { 923 924 __elfN(linux_shared_page_fini)(linux_shared_page_obj); 925 } 926 SYSUNINIT(elf_linux_vdso_uninit, SI_SUB_EXEC, SI_ORDER_FIRST, 927 linux_vdso_deinstall, NULL); 928 929 static char GNU_ABI_VENDOR[] = "GNU"; 930 static int GNULINUX_ABI_DESC = 0; 931 932 static bool 933 linux_trans_osrel(const Elf_Note *note, int32_t *osrel) 934 { 935 const Elf32_Word *desc; 936 uintptr_t p; 937 938 p = (uintptr_t)(note + 1); 939 p += roundup2(note->n_namesz, sizeof(Elf32_Addr)); 940 941 desc = (const Elf32_Word *)p; 942 if (desc[0] != GNULINUX_ABI_DESC) 943 return (false); 944 945 /* 946 * For Linux we encode osrel using the Linux convention of 947 * (version << 16) | (major << 8) | (minor) 948 * See macro in linux_mib.h 949 */ 950 *osrel = LINUX_KERNVER(desc[1], desc[2], desc[3]); 951 952 return (true); 953 } 954 955 static Elf_Brandnote linux_brandnote = { 956 .hdr.n_namesz = sizeof(GNU_ABI_VENDOR), 957 .hdr.n_descsz = 16, /* XXX at least 16 */ 958 .hdr.n_type = 1, 959 .vendor = GNU_ABI_VENDOR, 960 .flags = BN_TRANSLATE_OSREL, 961 .trans_osrel = linux_trans_osrel 962 }; 963 964 static Elf32_Brandinfo linux_brand = { 965 .brand = ELFOSABI_LINUX, 966 .machine = EM_386, 967 .compat_3_brand = "Linux", 968 .emul_path = "/compat/linux", 969 .interp_path = "/lib/ld-linux.so.1", 970 .sysvec = &elf_linux_sysvec, 971 .interp_newpath = NULL, 972 .brand_note = &linux_brandnote, 973 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE 974 }; 975 976 static Elf32_Brandinfo linux_glibc2brand = { 977 .brand = ELFOSABI_LINUX, 978 .machine = EM_386, 979 .compat_3_brand = "Linux", 980 .emul_path = "/compat/linux", 981 .interp_path = "/lib/ld-linux.so.2", 982 .sysvec = &elf_linux_sysvec, 983 .interp_newpath = NULL, 984 .brand_note = &linux_brandnote, 985 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE 986 }; 987 988 static Elf32_Brandinfo linux_muslbrand = { 989 .brand = ELFOSABI_LINUX, 990 .machine = EM_386, 991 .compat_3_brand = "Linux", 992 .emul_path = "/compat/linux", 993 .interp_path = "/lib/ld-musl-i386.so.1", 994 .sysvec = &elf_linux_sysvec, 995 .interp_newpath = NULL, 996 .brand_note = &linux_brandnote, 997 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE 998 }; 999 1000 Elf32_Brandinfo *linux_brandlist[] = { 1001 &linux_brand, 1002 &linux_glibc2brand, 1003 &linux_muslbrand, 1004 NULL 1005 }; 1006 1007 static int 1008 linux_elf_modevent(module_t mod, int type, void *data) 1009 { 1010 Elf32_Brandinfo **brandinfo; 1011 int error; 1012 struct linux_ioctl_handler **lihp; 1013 1014 error = 0; 1015 1016 switch(type) { 1017 case MOD_LOAD: 1018 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 1019 ++brandinfo) 1020 if (elf32_insert_brand_entry(*brandinfo) < 0) 1021 error = EINVAL; 1022 if (error == 0) { 1023 SET_FOREACH(lihp, linux_ioctl_handler_set) 1024 linux_ioctl_register_handler(*lihp); 1025 LIST_INIT(&futex_list); 1026 mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF); 1027 linux_exit_tag = EVENTHANDLER_REGISTER(process_exit, linux_proc_exit, 1028 NULL, 1000); 1029 linux_exec_tag = EVENTHANDLER_REGISTER(process_exec, linux_proc_exec, 1030 NULL, 1000); 1031 linux_thread_dtor_tag = EVENTHANDLER_REGISTER(thread_dtor, 1032 linux_thread_dtor, NULL, EVENTHANDLER_PRI_ANY); 1033 linux_get_machine(&linux_kplatform); 1034 linux_szplatform = roundup(strlen(linux_kplatform) + 1, 1035 sizeof(char *)); 1036 linux_dev_shm_create(); 1037 linux_osd_jail_register(); 1038 stclohz = (stathz ? stathz : hz); 1039 if (bootverbose) 1040 printf("Linux ELF exec handler installed\n"); 1041 } else 1042 printf("cannot insert Linux ELF brand handler\n"); 1043 break; 1044 case MOD_UNLOAD: 1045 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 1046 ++brandinfo) 1047 if (elf32_brand_inuse(*brandinfo)) 1048 error = EBUSY; 1049 if (error == 0) { 1050 for (brandinfo = &linux_brandlist[0]; 1051 *brandinfo != NULL; ++brandinfo) 1052 if (elf32_remove_brand_entry(*brandinfo) < 0) 1053 error = EINVAL; 1054 } 1055 if (error == 0) { 1056 SET_FOREACH(lihp, linux_ioctl_handler_set) 1057 linux_ioctl_unregister_handler(*lihp); 1058 mtx_destroy(&futex_mtx); 1059 EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag); 1060 EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag); 1061 EVENTHANDLER_DEREGISTER(thread_dtor, linux_thread_dtor_tag); 1062 linux_dev_shm_destroy(); 1063 linux_osd_jail_deregister(); 1064 if (bootverbose) 1065 printf("Linux ELF exec handler removed\n"); 1066 } else 1067 printf("Could not deinstall ELF interpreter entry\n"); 1068 break; 1069 default: 1070 return (EOPNOTSUPP); 1071 } 1072 return (error); 1073 } 1074 1075 static moduledata_t linux_elf_mod = { 1076 "linuxelf", 1077 linux_elf_modevent, 1078 0 1079 }; 1080 1081 DECLARE_MODULE_TIED(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY); 1082 FEATURE(linux, "Linux 32bit support"); 1083