1 /*- 2 * Copyright (c) 2004 Tim J. Robbins 3 * Copyright (c) 2002 Doug Rabson 4 * Copyright (c) 2000 Marcel Moolenaar 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer 12 * in this position and unchanged. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. The name of the author may not be used to endorse or promote products 17 * derived from this software without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include <sys/param.h> 35 #include <sys/kernel.h> 36 #include <sys/systm.h> 37 #include <sys/capability.h> 38 #include <sys/file.h> 39 #include <sys/fcntl.h> 40 #include <sys/clock.h> 41 #include <sys/imgact.h> 42 #include <sys/limits.h> 43 #include <sys/lock.h> 44 #include <sys/malloc.h> 45 #include <sys/mman.h> 46 #include <sys/mutex.h> 47 #include <sys/priv.h> 48 #include <sys/proc.h> 49 #include <sys/resource.h> 50 #include <sys/resourcevar.h> 51 #include <sys/sched.h> 52 #include <sys/syscallsubr.h> 53 #include <sys/sysproto.h> 54 #include <sys/unistd.h> 55 #include <sys/wait.h> 56 57 #include <machine/frame.h> 58 #include <machine/pcb.h> 59 #include <machine/psl.h> 60 #include <machine/segments.h> 61 #include <machine/specialreg.h> 62 63 #include <vm/vm.h> 64 #include <vm/pmap.h> 65 #include <vm/vm_map.h> 66 67 #include <compat/freebsd32/freebsd32_util.h> 68 #include <amd64/linux32/linux.h> 69 #include <amd64/linux32/linux32_proto.h> 70 #include <compat/linux/linux_ipc.h> 71 #include <compat/linux/linux_misc.h> 72 #include <compat/linux/linux_signal.h> 73 #include <compat/linux/linux_util.h> 74 #include <compat/linux/linux_emul.h> 75 76 struct l_old_select_argv { 77 l_int nfds; 78 l_uintptr_t readfds; 79 l_uintptr_t writefds; 80 l_uintptr_t exceptfds; 81 l_uintptr_t timeout; 82 } __packed; 83 84 int 85 linux_to_bsd_sigaltstack(int lsa) 86 { 87 int bsa = 0; 88 89 if (lsa & LINUX_SS_DISABLE) 90 bsa |= SS_DISABLE; 91 if (lsa & LINUX_SS_ONSTACK) 92 bsa |= SS_ONSTACK; 93 return (bsa); 94 } 95 96 static int linux_mmap_common(struct thread *td, l_uintptr_t addr, 97 l_size_t len, l_int prot, l_int flags, l_int fd, 98 l_loff_t pos); 99 100 int 101 bsd_to_linux_sigaltstack(int bsa) 102 { 103 int lsa = 0; 104 105 if (bsa & SS_DISABLE) 106 lsa |= LINUX_SS_DISABLE; 107 if (bsa & SS_ONSTACK) 108 lsa |= LINUX_SS_ONSTACK; 109 return (lsa); 110 } 111 112 static void 113 bsd_to_linux_rusage(struct rusage *ru, struct l_rusage *lru) 114 { 115 116 lru->ru_utime.tv_sec = ru->ru_utime.tv_sec; 117 lru->ru_utime.tv_usec = ru->ru_utime.tv_usec; 118 lru->ru_stime.tv_sec = ru->ru_stime.tv_sec; 119 lru->ru_stime.tv_usec = ru->ru_stime.tv_usec; 120 lru->ru_maxrss = ru->ru_maxrss; 121 lru->ru_ixrss = ru->ru_ixrss; 122 lru->ru_idrss = ru->ru_idrss; 123 lru->ru_isrss = ru->ru_isrss; 124 lru->ru_minflt = ru->ru_minflt; 125 lru->ru_majflt = ru->ru_majflt; 126 lru->ru_nswap = ru->ru_nswap; 127 lru->ru_inblock = ru->ru_inblock; 128 lru->ru_oublock = ru->ru_oublock; 129 lru->ru_msgsnd = ru->ru_msgsnd; 130 lru->ru_msgrcv = ru->ru_msgrcv; 131 lru->ru_nsignals = ru->ru_nsignals; 132 lru->ru_nvcsw = ru->ru_nvcsw; 133 lru->ru_nivcsw = ru->ru_nivcsw; 134 } 135 136 int 137 linux_execve(struct thread *td, struct linux_execve_args *args) 138 { 139 struct image_args eargs; 140 char *path; 141 int error; 142 143 LCONVPATHEXIST(td, args->path, &path); 144 145 #ifdef DEBUG 146 if (ldebug(execve)) 147 printf(ARGS(execve, "%s"), path); 148 #endif 149 150 error = freebsd32_exec_copyin_args(&eargs, path, UIO_SYSSPACE, 151 args->argp, args->envp); 152 free(path, M_TEMP); 153 if (error == 0) 154 error = kern_execve(td, &eargs, NULL); 155 if (error == 0) 156 /* Linux process can execute FreeBSD one, do not attempt 157 * to create emuldata for such process using 158 * linux_proc_init, this leads to a panic on KASSERT 159 * because such process has p->p_emuldata == NULL. 160 */ 161 if (SV_PROC_ABI(td->td_proc) == SV_ABI_LINUX) 162 error = linux_proc_init(td, 0, 0); 163 return (error); 164 } 165 166 CTASSERT(sizeof(struct l_iovec32) == 8); 167 168 static int 169 linux32_copyinuio(struct l_iovec32 *iovp, l_ulong iovcnt, struct uio **uiop) 170 { 171 struct l_iovec32 iov32; 172 struct iovec *iov; 173 struct uio *uio; 174 uint32_t iovlen; 175 int error, i; 176 177 *uiop = NULL; 178 if (iovcnt > UIO_MAXIOV) 179 return (EINVAL); 180 iovlen = iovcnt * sizeof(struct iovec); 181 uio = malloc(iovlen + sizeof(*uio), M_IOV, M_WAITOK); 182 iov = (struct iovec *)(uio + 1); 183 for (i = 0; i < iovcnt; i++) { 184 error = copyin(&iovp[i], &iov32, sizeof(struct l_iovec32)); 185 if (error) { 186 free(uio, M_IOV); 187 return (error); 188 } 189 iov[i].iov_base = PTRIN(iov32.iov_base); 190 iov[i].iov_len = iov32.iov_len; 191 } 192 uio->uio_iov = iov; 193 uio->uio_iovcnt = iovcnt; 194 uio->uio_segflg = UIO_USERSPACE; 195 uio->uio_offset = -1; 196 uio->uio_resid = 0; 197 for (i = 0; i < iovcnt; i++) { 198 if (iov->iov_len > INT_MAX - uio->uio_resid) { 199 free(uio, M_IOV); 200 return (EINVAL); 201 } 202 uio->uio_resid += iov->iov_len; 203 iov++; 204 } 205 *uiop = uio; 206 return (0); 207 } 208 209 int 210 linux32_copyiniov(struct l_iovec32 *iovp32, l_ulong iovcnt, struct iovec **iovp, 211 int error) 212 { 213 struct l_iovec32 iov32; 214 struct iovec *iov; 215 uint32_t iovlen; 216 int i; 217 218 *iovp = NULL; 219 if (iovcnt > UIO_MAXIOV) 220 return (error); 221 iovlen = iovcnt * sizeof(struct iovec); 222 iov = malloc(iovlen, M_IOV, M_WAITOK); 223 for (i = 0; i < iovcnt; i++) { 224 error = copyin(&iovp32[i], &iov32, sizeof(struct l_iovec32)); 225 if (error) { 226 free(iov, M_IOV); 227 return (error); 228 } 229 iov[i].iov_base = PTRIN(iov32.iov_base); 230 iov[i].iov_len = iov32.iov_len; 231 } 232 *iovp = iov; 233 return(0); 234 235 } 236 237 int 238 linux_readv(struct thread *td, struct linux_readv_args *uap) 239 { 240 struct uio *auio; 241 int error; 242 243 error = linux32_copyinuio(uap->iovp, uap->iovcnt, &auio); 244 if (error) 245 return (error); 246 error = kern_readv(td, uap->fd, auio); 247 free(auio, M_IOV); 248 return (error); 249 } 250 251 int 252 linux_writev(struct thread *td, struct linux_writev_args *uap) 253 { 254 struct uio *auio; 255 int error; 256 257 error = linux32_copyinuio(uap->iovp, uap->iovcnt, &auio); 258 if (error) 259 return (error); 260 error = kern_writev(td, uap->fd, auio); 261 free(auio, M_IOV); 262 return (error); 263 } 264 265 struct l_ipc_kludge { 266 l_uintptr_t msgp; 267 l_long msgtyp; 268 } __packed; 269 270 int 271 linux_ipc(struct thread *td, struct linux_ipc_args *args) 272 { 273 274 switch (args->what & 0xFFFF) { 275 case LINUX_SEMOP: { 276 struct linux_semop_args a; 277 278 a.semid = args->arg1; 279 a.tsops = args->ptr; 280 a.nsops = args->arg2; 281 return (linux_semop(td, &a)); 282 } 283 case LINUX_SEMGET: { 284 struct linux_semget_args a; 285 286 a.key = args->arg1; 287 a.nsems = args->arg2; 288 a.semflg = args->arg3; 289 return (linux_semget(td, &a)); 290 } 291 case LINUX_SEMCTL: { 292 struct linux_semctl_args a; 293 int error; 294 295 a.semid = args->arg1; 296 a.semnum = args->arg2; 297 a.cmd = args->arg3; 298 error = copyin(args->ptr, &a.arg, sizeof(a.arg)); 299 if (error) 300 return (error); 301 return (linux_semctl(td, &a)); 302 } 303 case LINUX_MSGSND: { 304 struct linux_msgsnd_args a; 305 306 a.msqid = args->arg1; 307 a.msgp = args->ptr; 308 a.msgsz = args->arg2; 309 a.msgflg = args->arg3; 310 return (linux_msgsnd(td, &a)); 311 } 312 case LINUX_MSGRCV: { 313 struct linux_msgrcv_args a; 314 315 a.msqid = args->arg1; 316 a.msgsz = args->arg2; 317 a.msgflg = args->arg3; 318 if ((args->what >> 16) == 0) { 319 struct l_ipc_kludge tmp; 320 int error; 321 322 if (args->ptr == 0) 323 return (EINVAL); 324 error = copyin(args->ptr, &tmp, sizeof(tmp)); 325 if (error) 326 return (error); 327 a.msgp = PTRIN(tmp.msgp); 328 a.msgtyp = tmp.msgtyp; 329 } else { 330 a.msgp = args->ptr; 331 a.msgtyp = args->arg5; 332 } 333 return (linux_msgrcv(td, &a)); 334 } 335 case LINUX_MSGGET: { 336 struct linux_msgget_args a; 337 338 a.key = args->arg1; 339 a.msgflg = args->arg2; 340 return (linux_msgget(td, &a)); 341 } 342 case LINUX_MSGCTL: { 343 struct linux_msgctl_args a; 344 345 a.msqid = args->arg1; 346 a.cmd = args->arg2; 347 a.buf = args->ptr; 348 return (linux_msgctl(td, &a)); 349 } 350 case LINUX_SHMAT: { 351 struct linux_shmat_args a; 352 353 a.shmid = args->arg1; 354 a.shmaddr = args->ptr; 355 a.shmflg = args->arg2; 356 a.raddr = PTRIN((l_uint)args->arg3); 357 return (linux_shmat(td, &a)); 358 } 359 case LINUX_SHMDT: { 360 struct linux_shmdt_args a; 361 362 a.shmaddr = args->ptr; 363 return (linux_shmdt(td, &a)); 364 } 365 case LINUX_SHMGET: { 366 struct linux_shmget_args a; 367 368 a.key = args->arg1; 369 a.size = args->arg2; 370 a.shmflg = args->arg3; 371 return (linux_shmget(td, &a)); 372 } 373 case LINUX_SHMCTL: { 374 struct linux_shmctl_args a; 375 376 a.shmid = args->arg1; 377 a.cmd = args->arg2; 378 a.buf = args->ptr; 379 return (linux_shmctl(td, &a)); 380 } 381 default: 382 break; 383 } 384 385 return (EINVAL); 386 } 387 388 int 389 linux_old_select(struct thread *td, struct linux_old_select_args *args) 390 { 391 struct l_old_select_argv linux_args; 392 struct linux_select_args newsel; 393 int error; 394 395 #ifdef DEBUG 396 if (ldebug(old_select)) 397 printf(ARGS(old_select, "%p"), args->ptr); 398 #endif 399 400 error = copyin(args->ptr, &linux_args, sizeof(linux_args)); 401 if (error) 402 return (error); 403 404 newsel.nfds = linux_args.nfds; 405 newsel.readfds = PTRIN(linux_args.readfds); 406 newsel.writefds = PTRIN(linux_args.writefds); 407 newsel.exceptfds = PTRIN(linux_args.exceptfds); 408 newsel.timeout = PTRIN(linux_args.timeout); 409 return (linux_select(td, &newsel)); 410 } 411 412 int 413 linux_set_cloned_tls(struct thread *td, void *desc) 414 { 415 struct user_segment_descriptor sd; 416 struct l_user_desc info; 417 struct pcb *pcb; 418 int error; 419 int a[2]; 420 421 error = copyin(desc, &info, sizeof(struct l_user_desc)); 422 if (error) { 423 printf(LMSG("copyin failed!")); 424 } else { 425 /* We might copy out the entry_number as GUGS32_SEL. */ 426 info.entry_number = GUGS32_SEL; 427 error = copyout(&info, desc, sizeof(struct l_user_desc)); 428 if (error) 429 printf(LMSG("copyout failed!")); 430 431 a[0] = LINUX_LDT_entry_a(&info); 432 a[1] = LINUX_LDT_entry_b(&info); 433 434 memcpy(&sd, &a, sizeof(a)); 435 #ifdef DEBUG 436 if (ldebug(clone)) 437 printf("Segment created in clone with " 438 "CLONE_SETTLS: lobase: %x, hibase: %x, " 439 "lolimit: %x, hilimit: %x, type: %i, " 440 "dpl: %i, p: %i, xx: %i, long: %i, " 441 "def32: %i, gran: %i\n", sd.sd_lobase, 442 sd.sd_hibase, sd.sd_lolimit, sd.sd_hilimit, 443 sd.sd_type, sd.sd_dpl, sd.sd_p, sd.sd_xx, 444 sd.sd_long, sd.sd_def32, sd.sd_gran); 445 #endif 446 pcb = td->td_pcb; 447 pcb->pcb_gsbase = (register_t)info.base_addr; 448 /* XXXKIB pcb->pcb_gs32sd = sd; */ 449 td->td_frame->tf_gs = GSEL(GUGS32_SEL, SEL_UPL); 450 set_pcb_flags(pcb, PCB_GS32BIT | PCB_32BIT); 451 } 452 453 return (error); 454 } 455 456 int 457 linux_set_upcall_kse(struct thread *td, register_t stack) 458 { 459 460 td->td_frame->tf_rsp = stack; 461 462 return (0); 463 } 464 465 #define STACK_SIZE (2 * 1024 * 1024) 466 #define GUARD_SIZE (4 * PAGE_SIZE) 467 468 int 469 linux_mmap2(struct thread *td, struct linux_mmap2_args *args) 470 { 471 472 #ifdef DEBUG 473 if (ldebug(mmap2)) 474 printf(ARGS(mmap2, "0x%08x, %d, %d, 0x%08x, %d, %d"), 475 args->addr, args->len, args->prot, 476 args->flags, args->fd, args->pgoff); 477 #endif 478 479 return (linux_mmap_common(td, PTROUT(args->addr), args->len, args->prot, 480 args->flags, args->fd, (uint64_t)(uint32_t)args->pgoff * 481 PAGE_SIZE)); 482 } 483 484 int 485 linux_mmap(struct thread *td, struct linux_mmap_args *args) 486 { 487 int error; 488 struct l_mmap_argv linux_args; 489 490 error = copyin(args->ptr, &linux_args, sizeof(linux_args)); 491 if (error) 492 return (error); 493 494 #ifdef DEBUG 495 if (ldebug(mmap)) 496 printf(ARGS(mmap, "0x%08x, %d, %d, 0x%08x, %d, %d"), 497 linux_args.addr, linux_args.len, linux_args.prot, 498 linux_args.flags, linux_args.fd, linux_args.pgoff); 499 #endif 500 501 return (linux_mmap_common(td, linux_args.addr, linux_args.len, 502 linux_args.prot, linux_args.flags, linux_args.fd, 503 (uint32_t)linux_args.pgoff)); 504 } 505 506 static int 507 linux_mmap_common(struct thread *td, l_uintptr_t addr, l_size_t len, l_int prot, 508 l_int flags, l_int fd, l_loff_t pos) 509 { 510 struct proc *p = td->td_proc; 511 struct mmap_args /* { 512 caddr_t addr; 513 size_t len; 514 int prot; 515 int flags; 516 int fd; 517 long pad; 518 off_t pos; 519 } */ bsd_args; 520 int error; 521 struct file *fp; 522 523 error = 0; 524 bsd_args.flags = 0; 525 fp = NULL; 526 527 /* 528 * Linux mmap(2): 529 * You must specify exactly one of MAP_SHARED and MAP_PRIVATE 530 */ 531 if (!((flags & LINUX_MAP_SHARED) ^ (flags & LINUX_MAP_PRIVATE))) 532 return (EINVAL); 533 534 if (flags & LINUX_MAP_SHARED) 535 bsd_args.flags |= MAP_SHARED; 536 if (flags & LINUX_MAP_PRIVATE) 537 bsd_args.flags |= MAP_PRIVATE; 538 if (flags & LINUX_MAP_FIXED) 539 bsd_args.flags |= MAP_FIXED; 540 if (flags & LINUX_MAP_ANON) { 541 /* Enforce pos to be on page boundary, then ignore. */ 542 if ((pos & PAGE_MASK) != 0) 543 return (EINVAL); 544 pos = 0; 545 bsd_args.flags |= MAP_ANON; 546 } else 547 bsd_args.flags |= MAP_NOSYNC; 548 if (flags & LINUX_MAP_GROWSDOWN) 549 bsd_args.flags |= MAP_STACK; 550 551 /* 552 * PROT_READ, PROT_WRITE, or PROT_EXEC implies PROT_READ and PROT_EXEC 553 * on Linux/i386. We do this to ensure maximum compatibility. 554 * Linux/ia64 does the same in i386 emulation mode. 555 */ 556 bsd_args.prot = prot; 557 if (bsd_args.prot & (PROT_READ | PROT_WRITE | PROT_EXEC)) 558 bsd_args.prot |= PROT_READ | PROT_EXEC; 559 560 /* Linux does not check file descriptor when MAP_ANONYMOUS is set. */ 561 bsd_args.fd = (bsd_args.flags & MAP_ANON) ? -1 : fd; 562 if (bsd_args.fd != -1) { 563 /* 564 * Linux follows Solaris mmap(2) description: 565 * The file descriptor fildes is opened with 566 * read permission, regardless of the 567 * protection options specified. 568 */ 569 570 if ((error = fget(td, bsd_args.fd, CAP_MMAP, &fp)) != 0) 571 return (error); 572 if (fp->f_type != DTYPE_VNODE) { 573 fdrop(fp, td); 574 return (EINVAL); 575 } 576 577 /* Linux mmap() just fails for O_WRONLY files */ 578 if (!(fp->f_flag & FREAD)) { 579 fdrop(fp, td); 580 return (EACCES); 581 } 582 583 fdrop(fp, td); 584 } 585 586 if (flags & LINUX_MAP_GROWSDOWN) { 587 /* 588 * The Linux MAP_GROWSDOWN option does not limit auto 589 * growth of the region. Linux mmap with this option 590 * takes as addr the inital BOS, and as len, the initial 591 * region size. It can then grow down from addr without 592 * limit. However, Linux threads has an implicit internal 593 * limit to stack size of STACK_SIZE. Its just not 594 * enforced explicitly in Linux. But, here we impose 595 * a limit of (STACK_SIZE - GUARD_SIZE) on the stack 596 * region, since we can do this with our mmap. 597 * 598 * Our mmap with MAP_STACK takes addr as the maximum 599 * downsize limit on BOS, and as len the max size of 600 * the region. It then maps the top SGROWSIZ bytes, 601 * and auto grows the region down, up to the limit 602 * in addr. 603 * 604 * If we don't use the MAP_STACK option, the effect 605 * of this code is to allocate a stack region of a 606 * fixed size of (STACK_SIZE - GUARD_SIZE). 607 */ 608 609 if ((caddr_t)PTRIN(addr) + len > p->p_vmspace->vm_maxsaddr) { 610 /* 611 * Some Linux apps will attempt to mmap 612 * thread stacks near the top of their 613 * address space. If their TOS is greater 614 * than vm_maxsaddr, vm_map_growstack() 615 * will confuse the thread stack with the 616 * process stack and deliver a SEGV if they 617 * attempt to grow the thread stack past their 618 * current stacksize rlimit. To avoid this, 619 * adjust vm_maxsaddr upwards to reflect 620 * the current stacksize rlimit rather 621 * than the maximum possible stacksize. 622 * It would be better to adjust the 623 * mmap'ed region, but some apps do not check 624 * mmap's return value. 625 */ 626 PROC_LOCK(p); 627 p->p_vmspace->vm_maxsaddr = (char *)LINUX32_USRSTACK - 628 lim_cur(p, RLIMIT_STACK); 629 PROC_UNLOCK(p); 630 } 631 632 /* 633 * This gives us our maximum stack size and a new BOS. 634 * If we're using VM_STACK, then mmap will just map 635 * the top SGROWSIZ bytes, and let the stack grow down 636 * to the limit at BOS. If we're not using VM_STACK 637 * we map the full stack, since we don't have a way 638 * to autogrow it. 639 */ 640 if (len > STACK_SIZE - GUARD_SIZE) { 641 bsd_args.addr = (caddr_t)PTRIN(addr); 642 bsd_args.len = len; 643 } else { 644 bsd_args.addr = (caddr_t)PTRIN(addr) - 645 (STACK_SIZE - GUARD_SIZE - len); 646 bsd_args.len = STACK_SIZE - GUARD_SIZE; 647 } 648 } else { 649 bsd_args.addr = (caddr_t)PTRIN(addr); 650 bsd_args.len = len; 651 } 652 bsd_args.pos = pos; 653 654 #ifdef DEBUG 655 if (ldebug(mmap)) 656 printf("-> %s(%p, %d, %d, 0x%08x, %d, 0x%x)\n", 657 __func__, 658 (void *)bsd_args.addr, (int)bsd_args.len, bsd_args.prot, 659 bsd_args.flags, bsd_args.fd, (int)bsd_args.pos); 660 #endif 661 error = sys_mmap(td, &bsd_args); 662 #ifdef DEBUG 663 if (ldebug(mmap)) 664 printf("-> %s() return: 0x%x (0x%08x)\n", 665 __func__, error, (u_int)td->td_retval[0]); 666 #endif 667 return (error); 668 } 669 670 int 671 linux_mprotect(struct thread *td, struct linux_mprotect_args *uap) 672 { 673 struct mprotect_args bsd_args; 674 675 bsd_args.addr = uap->addr; 676 bsd_args.len = uap->len; 677 bsd_args.prot = uap->prot; 678 if (bsd_args.prot & (PROT_READ | PROT_WRITE | PROT_EXEC)) 679 bsd_args.prot |= PROT_READ | PROT_EXEC; 680 return (sys_mprotect(td, &bsd_args)); 681 } 682 683 int 684 linux_iopl(struct thread *td, struct linux_iopl_args *args) 685 { 686 int error; 687 688 if (args->level < 0 || args->level > 3) 689 return (EINVAL); 690 if ((error = priv_check(td, PRIV_IO)) != 0) 691 return (error); 692 if ((error = securelevel_gt(td->td_ucred, 0)) != 0) 693 return (error); 694 td->td_frame->tf_rflags = (td->td_frame->tf_rflags & ~PSL_IOPL) | 695 (args->level * (PSL_IOPL / 3)); 696 697 return (0); 698 } 699 700 int 701 linux_pipe(struct thread *td, struct linux_pipe_args *args) 702 { 703 int error; 704 int fildes[2]; 705 706 #ifdef DEBUG 707 if (ldebug(pipe)) 708 printf(ARGS(pipe, "*")); 709 #endif 710 711 error = kern_pipe(td, fildes); 712 if (error) 713 return (error); 714 715 /* XXX: Close descriptors on error. */ 716 return (copyout(fildes, args->pipefds, sizeof fildes)); 717 } 718 719 int 720 linux_sigaction(struct thread *td, struct linux_sigaction_args *args) 721 { 722 l_osigaction_t osa; 723 l_sigaction_t act, oact; 724 int error; 725 726 #ifdef DEBUG 727 if (ldebug(sigaction)) 728 printf(ARGS(sigaction, "%d, %p, %p"), 729 args->sig, (void *)args->nsa, (void *)args->osa); 730 #endif 731 732 if (args->nsa != NULL) { 733 error = copyin(args->nsa, &osa, sizeof(l_osigaction_t)); 734 if (error) 735 return (error); 736 act.lsa_handler = osa.lsa_handler; 737 act.lsa_flags = osa.lsa_flags; 738 act.lsa_restorer = osa.lsa_restorer; 739 LINUX_SIGEMPTYSET(act.lsa_mask); 740 act.lsa_mask.__bits[0] = osa.lsa_mask; 741 } 742 743 error = linux_do_sigaction(td, args->sig, args->nsa ? &act : NULL, 744 args->osa ? &oact : NULL); 745 746 if (args->osa != NULL && !error) { 747 osa.lsa_handler = oact.lsa_handler; 748 osa.lsa_flags = oact.lsa_flags; 749 osa.lsa_restorer = oact.lsa_restorer; 750 osa.lsa_mask = oact.lsa_mask.__bits[0]; 751 error = copyout(&osa, args->osa, sizeof(l_osigaction_t)); 752 } 753 754 return (error); 755 } 756 757 /* 758 * Linux has two extra args, restart and oldmask. We don't use these, 759 * but it seems that "restart" is actually a context pointer that 760 * enables the signal to happen with a different register set. 761 */ 762 int 763 linux_sigsuspend(struct thread *td, struct linux_sigsuspend_args *args) 764 { 765 sigset_t sigmask; 766 l_sigset_t mask; 767 768 #ifdef DEBUG 769 if (ldebug(sigsuspend)) 770 printf(ARGS(sigsuspend, "%08lx"), (unsigned long)args->mask); 771 #endif 772 773 LINUX_SIGEMPTYSET(mask); 774 mask.__bits[0] = args->mask; 775 linux_to_bsd_sigset(&mask, &sigmask); 776 return (kern_sigsuspend(td, sigmask)); 777 } 778 779 int 780 linux_rt_sigsuspend(struct thread *td, struct linux_rt_sigsuspend_args *uap) 781 { 782 l_sigset_t lmask; 783 sigset_t sigmask; 784 int error; 785 786 #ifdef DEBUG 787 if (ldebug(rt_sigsuspend)) 788 printf(ARGS(rt_sigsuspend, "%p, %d"), 789 (void *)uap->newset, uap->sigsetsize); 790 #endif 791 792 if (uap->sigsetsize != sizeof(l_sigset_t)) 793 return (EINVAL); 794 795 error = copyin(uap->newset, &lmask, sizeof(l_sigset_t)); 796 if (error) 797 return (error); 798 799 linux_to_bsd_sigset(&lmask, &sigmask); 800 return (kern_sigsuspend(td, sigmask)); 801 } 802 803 int 804 linux_pause(struct thread *td, struct linux_pause_args *args) 805 { 806 struct proc *p = td->td_proc; 807 sigset_t sigmask; 808 809 #ifdef DEBUG 810 if (ldebug(pause)) 811 printf(ARGS(pause, "")); 812 #endif 813 814 PROC_LOCK(p); 815 sigmask = td->td_sigmask; 816 PROC_UNLOCK(p); 817 return (kern_sigsuspend(td, sigmask)); 818 } 819 820 int 821 linux_sigaltstack(struct thread *td, struct linux_sigaltstack_args *uap) 822 { 823 stack_t ss, oss; 824 l_stack_t lss; 825 int error; 826 827 #ifdef DEBUG 828 if (ldebug(sigaltstack)) 829 printf(ARGS(sigaltstack, "%p, %p"), uap->uss, uap->uoss); 830 #endif 831 832 if (uap->uss != NULL) { 833 error = copyin(uap->uss, &lss, sizeof(l_stack_t)); 834 if (error) 835 return (error); 836 837 ss.ss_sp = PTRIN(lss.ss_sp); 838 ss.ss_size = lss.ss_size; 839 ss.ss_flags = linux_to_bsd_sigaltstack(lss.ss_flags); 840 } 841 error = kern_sigaltstack(td, (uap->uss != NULL) ? &ss : NULL, 842 (uap->uoss != NULL) ? &oss : NULL); 843 if (!error && uap->uoss != NULL) { 844 lss.ss_sp = PTROUT(oss.ss_sp); 845 lss.ss_size = oss.ss_size; 846 lss.ss_flags = bsd_to_linux_sigaltstack(oss.ss_flags); 847 error = copyout(&lss, uap->uoss, sizeof(l_stack_t)); 848 } 849 850 return (error); 851 } 852 853 int 854 linux_ftruncate64(struct thread *td, struct linux_ftruncate64_args *args) 855 { 856 struct ftruncate_args sa; 857 858 #ifdef DEBUG 859 if (ldebug(ftruncate64)) 860 printf(ARGS(ftruncate64, "%u, %jd"), args->fd, 861 (intmax_t)args->length); 862 #endif 863 864 sa.fd = args->fd; 865 sa.length = args->length; 866 return sys_ftruncate(td, &sa); 867 } 868 869 int 870 linux_gettimeofday(struct thread *td, struct linux_gettimeofday_args *uap) 871 { 872 struct timeval atv; 873 l_timeval atv32; 874 struct timezone rtz; 875 int error = 0; 876 877 if (uap->tp) { 878 microtime(&atv); 879 atv32.tv_sec = atv.tv_sec; 880 atv32.tv_usec = atv.tv_usec; 881 error = copyout(&atv32, uap->tp, sizeof(atv32)); 882 } 883 if (error == 0 && uap->tzp != NULL) { 884 rtz.tz_minuteswest = tz_minuteswest; 885 rtz.tz_dsttime = tz_dsttime; 886 error = copyout(&rtz, uap->tzp, sizeof(rtz)); 887 } 888 return (error); 889 } 890 891 int 892 linux_settimeofday(struct thread *td, struct linux_settimeofday_args *uap) 893 { 894 l_timeval atv32; 895 struct timeval atv, *tvp; 896 struct timezone atz, *tzp; 897 int error; 898 899 if (uap->tp) { 900 error = copyin(uap->tp, &atv32, sizeof(atv32)); 901 if (error) 902 return (error); 903 atv.tv_sec = atv32.tv_sec; 904 atv.tv_usec = atv32.tv_usec; 905 tvp = &atv; 906 } else 907 tvp = NULL; 908 if (uap->tzp) { 909 error = copyin(uap->tzp, &atz, sizeof(atz)); 910 if (error) 911 return (error); 912 tzp = &atz; 913 } else 914 tzp = NULL; 915 return (kern_settimeofday(td, tvp, tzp)); 916 } 917 918 int 919 linux_getrusage(struct thread *td, struct linux_getrusage_args *uap) 920 { 921 struct l_rusage s32; 922 struct rusage s; 923 int error; 924 925 error = kern_getrusage(td, uap->who, &s); 926 if (error != 0) 927 return (error); 928 if (uap->rusage != NULL) { 929 bsd_to_linux_rusage(&s, &s32); 930 error = copyout(&s32, uap->rusage, sizeof(s32)); 931 } 932 return (error); 933 } 934 935 int 936 linux_sched_rr_get_interval(struct thread *td, 937 struct linux_sched_rr_get_interval_args *uap) 938 { 939 struct timespec ts; 940 struct l_timespec ts32; 941 int error; 942 943 error = kern_sched_rr_get_interval(td, uap->pid, &ts); 944 if (error != 0) 945 return (error); 946 ts32.tv_sec = ts.tv_sec; 947 ts32.tv_nsec = ts.tv_nsec; 948 return (copyout(&ts32, uap->interval, sizeof(ts32))); 949 } 950 951 int 952 linux_set_thread_area(struct thread *td, 953 struct linux_set_thread_area_args *args) 954 { 955 struct l_user_desc info; 956 struct user_segment_descriptor sd; 957 struct pcb *pcb; 958 int a[2]; 959 int error; 960 961 error = copyin(args->desc, &info, sizeof(struct l_user_desc)); 962 if (error) 963 return (error); 964 965 #ifdef DEBUG 966 if (ldebug(set_thread_area)) 967 printf(ARGS(set_thread_area, "%i, %x, %x, %i, %i, %i, " 968 "%i, %i, %i"), info.entry_number, info.base_addr, 969 info.limit, info.seg_32bit, info.contents, 970 info.read_exec_only, info.limit_in_pages, 971 info.seg_not_present, info.useable); 972 #endif 973 974 /* 975 * Semantics of Linux version: every thread in the system has array 976 * of three TLS descriptors. 1st is GLIBC TLS, 2nd is WINE, 3rd unknown. 977 * This syscall loads one of the selected TLS decriptors with a value 978 * and also loads GDT descriptors 6, 7 and 8 with the content of 979 * the per-thread descriptors. 980 * 981 * Semantics of FreeBSD version: I think we can ignore that Linux has 982 * three per-thread descriptors and use just the first one. 983 * The tls_array[] is used only in [gs]et_thread_area() syscalls and 984 * for loading the GDT descriptors. We use just one GDT descriptor 985 * for TLS, so we will load just one. 986 * 987 * XXX: This doesn't work when a user space process tries to use more 988 * than one TLS segment. Comment in the Linux source says wine might 989 * do this. 990 */ 991 992 /* 993 * GLIBC reads current %gs and call set_thread_area() with it. 994 * We should let GUDATA_SEL and GUGS32_SEL proceed as well because 995 * we use these segments. 996 */ 997 switch (info.entry_number) { 998 case GUGS32_SEL: 999 case GUDATA_SEL: 1000 case 6: 1001 case -1: 1002 info.entry_number = GUGS32_SEL; 1003 break; 1004 default: 1005 return (EINVAL); 1006 } 1007 1008 /* 1009 * We have to copy out the GDT entry we use. 1010 * 1011 * XXX: What if a user space program does not check the return value 1012 * and tries to use 6, 7 or 8? 1013 */ 1014 error = copyout(&info, args->desc, sizeof(struct l_user_desc)); 1015 if (error) 1016 return (error); 1017 1018 if (LINUX_LDT_empty(&info)) { 1019 a[0] = 0; 1020 a[1] = 0; 1021 } else { 1022 a[0] = LINUX_LDT_entry_a(&info); 1023 a[1] = LINUX_LDT_entry_b(&info); 1024 } 1025 1026 memcpy(&sd, &a, sizeof(a)); 1027 #ifdef DEBUG 1028 if (ldebug(set_thread_area)) 1029 printf("Segment created in set_thread_area: " 1030 "lobase: %x, hibase: %x, lolimit: %x, hilimit: %x, " 1031 "type: %i, dpl: %i, p: %i, xx: %i, long: %i, " 1032 "def32: %i, gran: %i\n", 1033 sd.sd_lobase, 1034 sd.sd_hibase, 1035 sd.sd_lolimit, 1036 sd.sd_hilimit, 1037 sd.sd_type, 1038 sd.sd_dpl, 1039 sd.sd_p, 1040 sd.sd_xx, 1041 sd.sd_long, 1042 sd.sd_def32, 1043 sd.sd_gran); 1044 #endif 1045 1046 pcb = td->td_pcb; 1047 pcb->pcb_gsbase = (register_t)info.base_addr; 1048 set_pcb_flags(pcb, PCB_32BIT | PCB_GS32BIT); 1049 update_gdt_gsbase(td, info.base_addr); 1050 1051 return (0); 1052 } 1053 1054 int 1055 linux_wait4(struct thread *td, struct linux_wait4_args *args) 1056 { 1057 int error, options; 1058 struct rusage ru, *rup; 1059 struct l_rusage lru; 1060 1061 #ifdef DEBUG 1062 if (ldebug(wait4)) 1063 printf(ARGS(wait4, "%d, %p, %d, %p"), 1064 args->pid, (void *)args->status, args->options, 1065 (void *)args->rusage); 1066 #endif 1067 1068 options = (args->options & (WNOHANG | WUNTRACED)); 1069 /* WLINUXCLONE should be equal to __WCLONE, but we make sure */ 1070 if (args->options & __WCLONE) 1071 options |= WLINUXCLONE; 1072 1073 if (args->rusage != NULL) 1074 rup = &ru; 1075 else 1076 rup = NULL; 1077 error = linux_common_wait(td, args->pid, args->status, options, rup); 1078 if (error) 1079 return (error); 1080 if (args->rusage != NULL) { 1081 bsd_to_linux_rusage(rup, &lru); 1082 error = copyout(&lru, args->rusage, sizeof(lru)); 1083 } 1084 1085 return (error); 1086 } 1087