1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2000 Marcel Moolenaar 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/capsicum.h> 34 #include <sys/fcntl.h> 35 #include <sys/file.h> 36 #include <sys/imgact.h> 37 #include <sys/lock.h> 38 #include <sys/malloc.h> 39 #include <sys/mman.h> 40 #include <sys/mutex.h> 41 #include <sys/priv.h> 42 #include <sys/proc.h> 43 #include <sys/queue.h> 44 #include <sys/resource.h> 45 #include <sys/resourcevar.h> 46 #include <sys/sched.h> 47 #include <sys/signalvar.h> 48 #include <sys/syscallsubr.h> 49 #include <sys/sysproto.h> 50 #include <sys/systm.h> 51 #include <sys/sx.h> 52 #include <sys/unistd.h> 53 #include <sys/wait.h> 54 55 #include <machine/frame.h> 56 #include <machine/psl.h> 57 #include <machine/segments.h> 58 #include <machine/sysarch.h> 59 60 #include <vm/pmap.h> 61 #include <vm/vm.h> 62 #include <vm/vm_map.h> 63 64 #include <security/audit/audit.h> 65 66 #include <i386/linux/linux.h> 67 #include <i386/linux/linux_proto.h> 68 #include <compat/linux/linux_emul.h> 69 #include <compat/linux/linux_fork.h> 70 #include <compat/linux/linux_ipc.h> 71 #include <compat/linux/linux_misc.h> 72 #include <compat/linux/linux_mmap.h> 73 #include <compat/linux/linux_signal.h> 74 #include <compat/linux/linux_util.h> 75 76 #include <i386/include/pcb.h> /* needed for pcb definition in linux_set_thread_area */ 77 78 #include "opt_posix.h" 79 80 struct l_descriptor { 81 l_uint entry_number; 82 l_ulong base_addr; 83 l_uint limit; 84 l_uint seg_32bit:1; 85 l_uint contents:2; 86 l_uint read_exec_only:1; 87 l_uint limit_in_pages:1; 88 l_uint seg_not_present:1; 89 l_uint useable:1; 90 }; 91 92 struct l_old_select_argv { 93 l_int nfds; 94 l_fd_set *readfds; 95 l_fd_set *writefds; 96 l_fd_set *exceptfds; 97 struct l_timeval *timeout; 98 }; 99 100 int 101 linux_execve(struct thread *td, struct linux_execve_args *args) 102 { 103 struct image_args eargs; 104 char *newpath; 105 int error; 106 107 if (!LUSECONVPATH(td)) { 108 error = exec_copyin_args(&eargs, args->path, UIO_USERSPACE, 109 args->argp, args->envp); 110 } else { 111 LCONVPATHEXIST(args->path, &newpath); 112 error = exec_copyin_args(&eargs, newpath, UIO_SYSSPACE, 113 args->argp, args->envp); 114 LFREEPATH(newpath); 115 } 116 if (error == 0) 117 error = linux_common_execve(td, &eargs); 118 AUDIT_SYSCALL_EXIT(error == EJUSTRETURN ? 0 : error, td); 119 return (error); 120 } 121 122 struct l_ipc_kludge { 123 struct l_msgbuf *msgp; 124 l_long msgtyp; 125 }; 126 127 int 128 linux_ipc(struct thread *td, struct linux_ipc_args *args) 129 { 130 131 switch (args->what & 0xFFFF) { 132 case LINUX_SEMOP: { 133 134 return (kern_semop(td, args->arg1, PTRIN(args->ptr), 135 args->arg2, NULL)); 136 } 137 case LINUX_SEMGET: { 138 struct linux_semget_args a; 139 140 a.key = args->arg1; 141 a.nsems = args->arg2; 142 a.semflg = args->arg3; 143 return (linux_semget(td, &a)); 144 } 145 case LINUX_SEMCTL: { 146 struct linux_semctl_args a; 147 int error; 148 149 a.semid = args->arg1; 150 a.semnum = args->arg2; 151 a.cmd = args->arg3; 152 error = copyin(PTRIN(args->ptr), &a.arg, sizeof(a.arg)); 153 if (error) 154 return (error); 155 return (linux_semctl(td, &a)); 156 } 157 case LINUX_SEMTIMEDOP: { 158 struct linux_semtimedop_args a; 159 160 a.semid = args->arg1; 161 a.tsops = PTRIN(args->ptr); 162 a.nsops = args->arg2; 163 a.timeout = PTRIN(args->arg5); 164 return (linux_semtimedop(td, &a)); 165 } 166 case LINUX_MSGSND: { 167 struct linux_msgsnd_args a; 168 169 a.msqid = args->arg1; 170 a.msgp = PTRIN(args->ptr); 171 a.msgsz = args->arg2; 172 a.msgflg = args->arg3; 173 return (linux_msgsnd(td, &a)); 174 } 175 case LINUX_MSGRCV: { 176 struct linux_msgrcv_args a; 177 178 a.msqid = args->arg1; 179 a.msgsz = args->arg2; 180 a.msgflg = args->arg3; 181 if ((args->what >> 16) == 0) { 182 struct l_ipc_kludge tmp; 183 int error; 184 185 if (args->ptr == 0) 186 return (EINVAL); 187 error = copyin(PTRIN(args->ptr), &tmp, sizeof(tmp)); 188 if (error) 189 return (error); 190 a.msgp = PTRIN(tmp.msgp); 191 a.msgtyp = tmp.msgtyp; 192 } else { 193 a.msgp = PTRIN(args->ptr); 194 a.msgtyp = args->arg5; 195 } 196 return (linux_msgrcv(td, &a)); 197 } 198 case LINUX_MSGGET: { 199 struct linux_msgget_args a; 200 201 a.key = args->arg1; 202 a.msgflg = args->arg2; 203 return (linux_msgget(td, &a)); 204 } 205 case LINUX_MSGCTL: { 206 struct linux_msgctl_args a; 207 208 a.msqid = args->arg1; 209 a.cmd = args->arg2; 210 a.buf = PTRIN(args->ptr); 211 return (linux_msgctl(td, &a)); 212 } 213 case LINUX_SHMAT: { 214 struct linux_shmat_args a; 215 l_uintptr_t addr; 216 int error; 217 218 a.shmid = args->arg1; 219 a.shmaddr = PTRIN(args->ptr); 220 a.shmflg = args->arg2; 221 error = linux_shmat(td, &a); 222 if (error != 0) 223 return (error); 224 addr = td->td_retval[0]; 225 error = copyout(&addr, PTRIN(args->arg3), sizeof(addr)); 226 td->td_retval[0] = 0; 227 return (error); 228 } 229 case LINUX_SHMDT: { 230 struct linux_shmdt_args a; 231 232 a.shmaddr = PTRIN(args->ptr); 233 return (linux_shmdt(td, &a)); 234 } 235 case LINUX_SHMGET: { 236 struct linux_shmget_args a; 237 238 a.key = args->arg1; 239 a.size = args->arg2; 240 a.shmflg = args->arg3; 241 return (linux_shmget(td, &a)); 242 } 243 case LINUX_SHMCTL: { 244 struct linux_shmctl_args a; 245 246 a.shmid = args->arg1; 247 a.cmd = args->arg2; 248 a.buf = PTRIN(args->ptr); 249 return (linux_shmctl(td, &a)); 250 } 251 default: 252 break; 253 } 254 255 return (EINVAL); 256 } 257 258 int 259 linux_old_select(struct thread *td, struct linux_old_select_args *args) 260 { 261 struct l_old_select_argv linux_args; 262 struct linux_select_args newsel; 263 int error; 264 265 error = copyin(args->ptr, &linux_args, sizeof(linux_args)); 266 if (error) 267 return (error); 268 269 newsel.nfds = linux_args.nfds; 270 newsel.readfds = linux_args.readfds; 271 newsel.writefds = linux_args.writefds; 272 newsel.exceptfds = linux_args.exceptfds; 273 newsel.timeout = linux_args.timeout; 274 return (linux_select(td, &newsel)); 275 } 276 277 int 278 linux_set_cloned_tls(struct thread *td, void *desc) 279 { 280 struct segment_descriptor sd; 281 struct l_user_desc info; 282 int idx, error; 283 int a[2]; 284 285 error = copyin(desc, &info, sizeof(struct l_user_desc)); 286 if (error) { 287 linux_msg(td, "set_cloned_tls copyin failed!"); 288 } else { 289 idx = info.entry_number; 290 291 /* 292 * looks like we're getting the idx we returned 293 * in the set_thread_area() syscall 294 */ 295 if (idx != 6 && idx != 3) { 296 linux_msg(td, "set_cloned_tls resetting idx!"); 297 idx = 3; 298 } 299 300 /* this doesnt happen in practice */ 301 if (idx == 6) { 302 /* we might copy out the entry_number as 3 */ 303 info.entry_number = 3; 304 error = copyout(&info, desc, sizeof(struct l_user_desc)); 305 if (error) 306 linux_msg(td, "set_cloned_tls copyout failed!"); 307 } 308 309 a[0] = LINUX_LDT_entry_a(&info); 310 a[1] = LINUX_LDT_entry_b(&info); 311 312 memcpy(&sd, &a, sizeof(a)); 313 /* set %gs */ 314 td->td_pcb->pcb_gsd = sd; 315 td->td_pcb->pcb_gs = GSEL(GUGS_SEL, SEL_UPL); 316 } 317 318 return (error); 319 } 320 321 int 322 linux_set_upcall(struct thread *td, register_t stack) 323 { 324 325 if (stack) 326 td->td_frame->tf_esp = stack; 327 328 /* 329 * The newly created Linux thread returns 330 * to the user space by the same path that a parent do. 331 */ 332 td->td_frame->tf_eax = 0; 333 return (0); 334 } 335 336 int 337 linux_mmap2(struct thread *td, struct linux_mmap2_args *args) 338 { 339 340 return (linux_mmap_common(td, args->addr, args->len, args->prot, 341 args->flags, args->fd, (uint64_t)(uint32_t)args->pgoff * 342 PAGE_SIZE)); 343 } 344 345 int 346 linux_mmap(struct thread *td, struct linux_mmap_args *args) 347 { 348 int error; 349 struct l_mmap_argv linux_args; 350 351 error = copyin(args->ptr, &linux_args, sizeof(linux_args)); 352 if (error) 353 return (error); 354 355 return (linux_mmap_common(td, linux_args.addr, linux_args.len, 356 linux_args.prot, linux_args.flags, linux_args.fd, 357 (uint32_t)linux_args.pgoff)); 358 } 359 360 int 361 linux_mprotect(struct thread *td, struct linux_mprotect_args *uap) 362 { 363 364 return (linux_mprotect_common(td, PTROUT(uap->addr), uap->len, uap->prot)); 365 } 366 367 int 368 linux_madvise(struct thread *td, struct linux_madvise_args *uap) 369 { 370 371 return (linux_madvise_common(td, PTROUT(uap->addr), uap->len, uap->behav)); 372 } 373 374 int 375 linux_ioperm(struct thread *td, struct linux_ioperm_args *args) 376 { 377 int error; 378 struct i386_ioperm_args iia; 379 380 iia.start = args->start; 381 iia.length = args->length; 382 iia.enable = args->enable; 383 error = i386_set_ioperm(td, &iia); 384 return (error); 385 } 386 387 int 388 linux_iopl(struct thread *td, struct linux_iopl_args *args) 389 { 390 int error; 391 392 if (args->level < 0 || args->level > 3) 393 return (EINVAL); 394 if ((error = priv_check(td, PRIV_IO)) != 0) 395 return (error); 396 if ((error = securelevel_gt(td->td_ucred, 0)) != 0) 397 return (error); 398 td->td_frame->tf_eflags = (td->td_frame->tf_eflags & ~PSL_IOPL) | 399 (args->level * (PSL_IOPL / 3)); 400 return (0); 401 } 402 403 int 404 linux_modify_ldt(struct thread *td, struct linux_modify_ldt_args *uap) 405 { 406 int error; 407 struct i386_ldt_args ldt; 408 struct l_descriptor ld; 409 union descriptor desc; 410 int size, written; 411 412 switch (uap->func) { 413 case 0x00: /* read_ldt */ 414 ldt.start = 0; 415 ldt.descs = uap->ptr; 416 ldt.num = uap->bytecount / sizeof(union descriptor); 417 error = i386_get_ldt(td, &ldt); 418 td->td_retval[0] *= sizeof(union descriptor); 419 break; 420 case 0x02: /* read_default_ldt = 0 */ 421 size = 5*sizeof(struct l_desc_struct); 422 if (size > uap->bytecount) 423 size = uap->bytecount; 424 for (written = error = 0; written < size && error == 0; written++) 425 error = subyte((char *)uap->ptr + written, 0); 426 td->td_retval[0] = written; 427 break; 428 case 0x01: /* write_ldt */ 429 case 0x11: /* write_ldt */ 430 if (uap->bytecount != sizeof(ld)) 431 return (EINVAL); 432 433 error = copyin(uap->ptr, &ld, sizeof(ld)); 434 if (error) 435 return (error); 436 437 ldt.start = ld.entry_number; 438 ldt.descs = &desc; 439 ldt.num = 1; 440 desc.sd.sd_lolimit = (ld.limit & 0x0000ffff); 441 desc.sd.sd_hilimit = (ld.limit & 0x000f0000) >> 16; 442 desc.sd.sd_lobase = (ld.base_addr & 0x00ffffff); 443 desc.sd.sd_hibase = (ld.base_addr & 0xff000000) >> 24; 444 desc.sd.sd_type = SDT_MEMRO | ((ld.read_exec_only ^ 1) << 1) | 445 (ld.contents << 2); 446 desc.sd.sd_dpl = 3; 447 desc.sd.sd_p = (ld.seg_not_present ^ 1); 448 desc.sd.sd_xx = 0; 449 desc.sd.sd_def32 = ld.seg_32bit; 450 desc.sd.sd_gran = ld.limit_in_pages; 451 error = i386_set_ldt(td, &ldt, &desc); 452 break; 453 default: 454 error = ENOSYS; 455 break; 456 } 457 458 if (error == EOPNOTSUPP) { 459 linux_msg(td, "modify_ldt needs kernel option USER_LDT"); 460 error = ENOSYS; 461 } 462 463 return (error); 464 } 465 466 int 467 linux_sigaction(struct thread *td, struct linux_sigaction_args *args) 468 { 469 l_osigaction_t osa; 470 l_sigaction_t act, oact; 471 int error; 472 473 if (args->nsa != NULL) { 474 error = copyin(args->nsa, &osa, sizeof(l_osigaction_t)); 475 if (error) 476 return (error); 477 act.lsa_handler = osa.lsa_handler; 478 act.lsa_flags = osa.lsa_flags; 479 act.lsa_restorer = osa.lsa_restorer; 480 LINUX_SIGEMPTYSET(act.lsa_mask); 481 act.lsa_mask.__mask = osa.lsa_mask; 482 } 483 484 error = linux_do_sigaction(td, args->sig, args->nsa ? &act : NULL, 485 args->osa ? &oact : NULL); 486 487 if (args->osa != NULL && !error) { 488 osa.lsa_handler = oact.lsa_handler; 489 osa.lsa_flags = oact.lsa_flags; 490 osa.lsa_restorer = oact.lsa_restorer; 491 osa.lsa_mask = oact.lsa_mask.__mask; 492 error = copyout(&osa, args->osa, sizeof(l_osigaction_t)); 493 } 494 495 return (error); 496 } 497 498 /* 499 * Linux has two extra args, restart and oldmask. We dont use these, 500 * but it seems that "restart" is actually a context pointer that 501 * enables the signal to happen with a different register set. 502 */ 503 int 504 linux_sigsuspend(struct thread *td, struct linux_sigsuspend_args *args) 505 { 506 sigset_t sigmask; 507 l_sigset_t mask; 508 509 LINUX_SIGEMPTYSET(mask); 510 mask.__mask = args->mask; 511 linux_to_bsd_sigset(&mask, &sigmask); 512 return (kern_sigsuspend(td, sigmask)); 513 } 514 515 int 516 linux_pause(struct thread *td, struct linux_pause_args *args) 517 { 518 struct proc *p = td->td_proc; 519 sigset_t sigmask; 520 521 PROC_LOCK(p); 522 sigmask = td->td_sigmask; 523 PROC_UNLOCK(p); 524 return (kern_sigsuspend(td, sigmask)); 525 } 526 527 int 528 linux_set_thread_area(struct thread *td, struct linux_set_thread_area_args *args) 529 { 530 struct l_user_desc info; 531 int error; 532 int idx; 533 int a[2]; 534 struct segment_descriptor sd; 535 536 error = copyin(args->desc, &info, sizeof(struct l_user_desc)); 537 if (error) 538 return (error); 539 540 idx = info.entry_number; 541 /* 542 * Semantics of Linux version: every thread in the system has array of 543 * 3 tls descriptors. 1st is GLIBC TLS, 2nd is WINE, 3rd unknown. This 544 * syscall loads one of the selected tls decriptors with a value and 545 * also loads GDT descriptors 6, 7 and 8 with the content of the 546 * per-thread descriptors. 547 * 548 * Semantics of FreeBSD version: I think we can ignore that Linux has 3 549 * per-thread descriptors and use just the 1st one. The tls_array[] 550 * is used only in set/get-thread_area() syscalls and for loading the 551 * GDT descriptors. In FreeBSD we use just one GDT descriptor for TLS 552 * so we will load just one. 553 * 554 * XXX: this doesn't work when a user space process tries to use more 555 * than 1 TLS segment. Comment in the Linux sources says wine might do 556 * this. 557 */ 558 559 /* 560 * we support just GLIBC TLS now 561 * we should let 3 proceed as well because we use this segment so 562 * if code does two subsequent calls it should succeed 563 */ 564 if (idx != 6 && idx != -1 && idx != 3) 565 return (EINVAL); 566 567 /* 568 * we have to copy out the GDT entry we use 569 * FreeBSD uses GDT entry #3 for storing %gs so load that 570 * 571 * XXX: what if a user space program doesn't check this value and tries 572 * to use 6, 7 or 8? 573 */ 574 idx = info.entry_number = 3; 575 error = copyout(&info, args->desc, sizeof(struct l_user_desc)); 576 if (error) 577 return (error); 578 579 if (LINUX_LDT_empty(&info)) { 580 a[0] = 0; 581 a[1] = 0; 582 } else { 583 a[0] = LINUX_LDT_entry_a(&info); 584 a[1] = LINUX_LDT_entry_b(&info); 585 } 586 587 memcpy(&sd, &a, sizeof(a)); 588 /* this is taken from i386 version of cpu_set_user_tls() */ 589 critical_enter(); 590 /* set %gs */ 591 td->td_pcb->pcb_gsd = sd; 592 PCPU_GET(fsgs_gdt)[1] = sd; 593 load_gs(GSEL(GUGS_SEL, SEL_UPL)); 594 critical_exit(); 595 596 return (0); 597 } 598 599 int 600 linux_get_thread_area(struct thread *td, struct linux_get_thread_area_args *args) 601 { 602 603 struct l_user_desc info; 604 int error; 605 int idx; 606 struct l_desc_struct desc; 607 struct segment_descriptor sd; 608 609 error = copyin(args->desc, &info, sizeof(struct l_user_desc)); 610 if (error) 611 return (error); 612 613 idx = info.entry_number; 614 /* XXX: I am not sure if we want 3 to be allowed too. */ 615 if (idx != 6 && idx != 3) 616 return (EINVAL); 617 618 idx = 3; 619 620 memset(&info, 0, sizeof(info)); 621 622 sd = PCPU_GET(fsgs_gdt)[1]; 623 624 memcpy(&desc, &sd, sizeof(desc)); 625 626 info.entry_number = idx; 627 info.base_addr = LINUX_GET_BASE(&desc); 628 info.limit = LINUX_GET_LIMIT(&desc); 629 info.seg_32bit = LINUX_GET_32BIT(&desc); 630 info.contents = LINUX_GET_CONTENTS(&desc); 631 info.read_exec_only = !LINUX_GET_WRITABLE(&desc); 632 info.limit_in_pages = LINUX_GET_LIMIT_PAGES(&desc); 633 info.seg_not_present = !LINUX_GET_PRESENT(&desc); 634 info.useable = LINUX_GET_USEABLE(&desc); 635 636 error = copyout(&info, args->desc, sizeof(struct l_user_desc)); 637 if (error) 638 return (EFAULT); 639 640 return (0); 641 } 642 643 /* XXX: this wont work with module - convert it */ 644 int 645 linux_mq_open(struct thread *td, struct linux_mq_open_args *args) 646 { 647 #ifdef P1003_1B_MQUEUE 648 return (sys_kmq_open(td, (struct kmq_open_args *)args)); 649 #else 650 return (ENOSYS); 651 #endif 652 } 653 654 int 655 linux_mq_unlink(struct thread *td, struct linux_mq_unlink_args *args) 656 { 657 #ifdef P1003_1B_MQUEUE 658 return (sys_kmq_unlink(td, (struct kmq_unlink_args *)args)); 659 #else 660 return (ENOSYS); 661 #endif 662 } 663 664 int 665 linux_mq_timedsend(struct thread *td, struct linux_mq_timedsend_args *args) 666 { 667 #ifdef P1003_1B_MQUEUE 668 return (sys_kmq_timedsend(td, (struct kmq_timedsend_args *)args)); 669 #else 670 return (ENOSYS); 671 #endif 672 } 673 674 int 675 linux_mq_timedreceive(struct thread *td, struct linux_mq_timedreceive_args *args) 676 { 677 #ifdef P1003_1B_MQUEUE 678 return (sys_kmq_timedreceive(td, (struct kmq_timedreceive_args *)args)); 679 #else 680 return (ENOSYS); 681 #endif 682 } 683 684 int 685 linux_mq_notify(struct thread *td, struct linux_mq_notify_args *args) 686 { 687 #ifdef P1003_1B_MQUEUE 688 return (sys_kmq_notify(td, (struct kmq_notify_args *)args)); 689 #else 690 return (ENOSYS); 691 #endif 692 } 693 694 int 695 linux_mq_getsetattr(struct thread *td, struct linux_mq_getsetattr_args *args) 696 { 697 #ifdef P1003_1B_MQUEUE 698 return (sys_kmq_setattr(td, (struct kmq_setattr_args *)args)); 699 #else 700 return (ENOSYS); 701 #endif 702 } 703