1 /*- 2 * Copyright (c) 1994-1995 S�ren Schmidt 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer 10 * in this position and unchanged. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software withough specific prior written permission 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 * 28 * $Id: linux_misc.c,v 1.50 1998/12/30 21:01:33 sos Exp $ 29 */ 30 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/sysproto.h> 34 #include <sys/kernel.h> 35 #include <sys/mman.h> 36 #include <sys/proc.h> 37 #include <sys/fcntl.h> 38 #include <sys/imgact_aout.h> 39 #include <sys/mount.h> 40 #include <sys/namei.h> 41 #include <sys/resourcevar.h> 42 #include <sys/stat.h> 43 #include <sys/sysctl.h> 44 #ifdef COMPAT_LINUX_THREADS 45 #include <sys/unistd.h> 46 #endif /* COMPAT_LINUX_THREADS */ 47 #include <sys/vnode.h> 48 #include <sys/wait.h> 49 #include <sys/time.h> 50 51 #include <vm/vm.h> 52 #include <vm/pmap.h> 53 #include <vm/vm_kern.h> 54 #include <vm/vm_prot.h> 55 #include <vm/vm_map.h> 56 #include <vm/vm_extern.h> 57 58 #include <machine/frame.h> 59 #include <machine/psl.h> 60 61 #include <i386/linux/linux.h> 62 #include <i386/linux/linux_proto.h> 63 #include <i386/linux/linux_util.h> 64 65 int 66 linux_alarm(struct proc *p, struct linux_alarm_args *args) 67 { 68 struct itimerval it, old_it; 69 struct timeval tv; 70 int s; 71 72 #ifdef DEBUG 73 printf("Linux-emul(%ld): alarm(%u)\n", (long)p->p_pid, args->secs); 74 #endif 75 if (args->secs > 100000000) 76 return EINVAL; 77 it.it_value.tv_sec = (long)args->secs; 78 it.it_value.tv_usec = 0; 79 it.it_interval.tv_sec = 0; 80 it.it_interval.tv_usec = 0; 81 s = splsoftclock(); 82 old_it = p->p_realtimer; 83 getmicrouptime(&tv); 84 if (timevalisset(&old_it.it_value)) 85 untimeout(realitexpire, (caddr_t)p, p->p_ithandle); 86 if (it.it_value.tv_sec != 0) { 87 p->p_ithandle = timeout(realitexpire, (caddr_t)p, tvtohz(&it.it_value)); 88 timevaladd(&it.it_value, &tv); 89 } 90 p->p_realtimer = it; 91 splx(s); 92 if (timevalcmp(&old_it.it_value, &tv, >)) { 93 timevalsub(&old_it.it_value, &tv); 94 if (old_it.it_value.tv_usec != 0) 95 old_it.it_value.tv_sec++; 96 p->p_retval[0] = old_it.it_value.tv_sec; 97 } 98 return 0; 99 } 100 101 int 102 linux_brk(struct proc *p, struct linux_brk_args *args) 103 { 104 #if 0 105 struct vmspace *vm = p->p_vmspace; 106 vm_offset_t new, old; 107 int error; 108 109 if ((vm_offset_t)args->dsend < (vm_offset_t)vm->vm_daddr) 110 return EINVAL; 111 if (((caddr_t)args->dsend - (caddr_t)vm->vm_daddr) 112 > p->p_rlimit[RLIMIT_DATA].rlim_cur) 113 return ENOMEM; 114 115 old = round_page((vm_offset_t)vm->vm_daddr) + ctob(vm->vm_dsize); 116 new = round_page((vm_offset_t)args->dsend); 117 p->p_retval[0] = old; 118 if ((new-old) > 0) { 119 if (swap_pager_full) 120 return ENOMEM; 121 error = vm_map_find(&vm->vm_map, NULL, 0, &old, (new-old), FALSE, 122 VM_PROT_ALL, VM_PROT_ALL, 0); 123 if (error) 124 return error; 125 vm->vm_dsize += btoc((new-old)); 126 p->p_retval[0] = (int)(vm->vm_daddr + ctob(vm->vm_dsize)); 127 } 128 return 0; 129 #else 130 struct vmspace *vm = p->p_vmspace; 131 vm_offset_t new, old; 132 struct obreak_args /* { 133 char * nsize; 134 } */ tmp; 135 136 #ifdef DEBUG 137 printf("Linux-emul(%ld): brk(%p)\n", (long)p->p_pid, (void *)args->dsend); 138 #endif 139 old = (vm_offset_t)vm->vm_daddr + ctob(vm->vm_dsize); 140 new = (vm_offset_t)args->dsend; 141 tmp.nsize = (char *) new; 142 if (((caddr_t)new > vm->vm_daddr) && !obreak(p, &tmp)) 143 p->p_retval[0] = (int)new; 144 else 145 p->p_retval[0] = (int)old; 146 147 return 0; 148 #endif 149 } 150 151 int 152 linux_uselib(struct proc *p, struct linux_uselib_args *args) 153 { 154 struct nameidata ni; 155 struct vnode *vp; 156 struct exec *a_out; 157 struct vattr attr; 158 vm_offset_t vmaddr; 159 unsigned long file_offset; 160 vm_offset_t buffer; 161 unsigned long bss_size; 162 int error; 163 caddr_t sg; 164 int locked; 165 166 sg = stackgap_init(); 167 CHECKALTEXIST(p, &sg, args->library); 168 169 #ifdef DEBUG 170 printf("Linux-emul(%d): uselib(%s)\n", p->p_pid, args->library); 171 #endif 172 173 a_out = NULL; 174 locked = 0; 175 vp = NULL; 176 177 NDINIT(&ni, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, args->library, p); 178 if (error = namei(&ni)) 179 goto cleanup; 180 181 vp = ni.ni_vp; 182 if (vp == NULL) { 183 error = ENOEXEC; /* ?? */ 184 goto cleanup; 185 } 186 187 /* 188 * From here on down, we have a locked vnode that must be unlocked. 189 */ 190 locked++; 191 192 /* 193 * Writable? 194 */ 195 if (vp->v_writecount) { 196 error = ETXTBSY; 197 goto cleanup; 198 } 199 200 /* 201 * Executable? 202 */ 203 if (error = VOP_GETATTR(vp, &attr, p->p_ucred, p)) 204 goto cleanup; 205 206 if ((vp->v_mount->mnt_flag & MNT_NOEXEC) || 207 ((attr.va_mode & 0111) == 0) || 208 (attr.va_type != VREG)) { 209 error = ENOEXEC; 210 goto cleanup; 211 } 212 213 /* 214 * Sensible size? 215 */ 216 if (attr.va_size == 0) { 217 error = ENOEXEC; 218 goto cleanup; 219 } 220 221 /* 222 * Can we access it? 223 */ 224 if (error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p)) 225 goto cleanup; 226 227 if (error = VOP_OPEN(vp, FREAD, p->p_ucred, p)) 228 goto cleanup; 229 230 /* 231 * Lock no longer needed 232 */ 233 VOP_UNLOCK(vp, 0, p); 234 locked = 0; 235 236 /* 237 * Pull in executable header into kernel_map 238 */ 239 error = vm_mmap(kernel_map, (vm_offset_t *)&a_out, PAGE_SIZE, 240 VM_PROT_READ, VM_PROT_READ, 0, (caddr_t)vp, 0); 241 if (error) 242 goto cleanup; 243 244 /* 245 * Is it a Linux binary ? 246 */ 247 if (((a_out->a_magic >> 16) & 0xff) != 0x64) { 248 error = ENOEXEC; 249 goto cleanup; 250 } 251 252 /* While we are here, we should REALLY do some more checks */ 253 254 /* 255 * Set file/virtual offset based on a.out variant. 256 */ 257 switch ((int)(a_out->a_magic & 0xffff)) { 258 case 0413: /* ZMAGIC */ 259 file_offset = 1024; 260 break; 261 case 0314: /* QMAGIC */ 262 file_offset = 0; 263 break; 264 default: 265 error = ENOEXEC; 266 goto cleanup; 267 } 268 269 bss_size = round_page(a_out->a_bss); 270 271 /* 272 * Check various fields in header for validity/bounds. 273 */ 274 if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) { 275 error = ENOEXEC; 276 goto cleanup; 277 } 278 279 /* text + data can't exceed file size */ 280 if (a_out->a_data + a_out->a_text > attr.va_size) { 281 error = EFAULT; 282 goto cleanup; 283 } 284 285 /* 286 * text/data/bss must not exceed limits 287 * XXX: this is not complete. it should check current usage PLUS 288 * the resources needed by this library. 289 */ 290 if (a_out->a_text > MAXTSIZ || 291 a_out->a_data + bss_size > p->p_rlimit[RLIMIT_DATA].rlim_cur) { 292 error = ENOMEM; 293 goto cleanup; 294 } 295 296 /* 297 * prevent more writers 298 */ 299 vp->v_flag |= VTEXT; 300 301 /* 302 * Check if file_offset page aligned,. 303 * Currently we cannot handle misalinged file offsets, 304 * and so we read in the entire image (what a waste). 305 */ 306 if (file_offset & PAGE_MASK) { 307 #ifdef DEBUG 308 printf("uselib: Non page aligned binary %lu\n", file_offset); 309 #endif 310 /* 311 * Map text+data read/write/execute 312 */ 313 314 /* a_entry is the load address and is page aligned */ 315 vmaddr = trunc_page(a_out->a_entry); 316 317 /* get anon user mapping, read+write+execute */ 318 error = vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &vmaddr, 319 a_out->a_text + a_out->a_data, FALSE, 320 VM_PROT_ALL, VM_PROT_ALL, 0); 321 if (error) 322 goto cleanup; 323 324 /* map file into kernel_map */ 325 error = vm_mmap(kernel_map, &buffer, 326 round_page(a_out->a_text + a_out->a_data + file_offset), 327 VM_PROT_READ, VM_PROT_READ, 0, 328 (caddr_t)vp, trunc_page(file_offset)); 329 if (error) 330 goto cleanup; 331 332 /* copy from kernel VM space to user space */ 333 error = copyout((caddr_t)(void *)(uintptr_t)(buffer + file_offset), 334 (caddr_t)vmaddr, a_out->a_text + a_out->a_data); 335 336 /* release temporary kernel space */ 337 vm_map_remove(kernel_map, buffer, 338 buffer + round_page(a_out->a_text + a_out->a_data + file_offset)); 339 340 if (error) 341 goto cleanup; 342 } 343 else { 344 #ifdef DEBUG 345 printf("uselib: Page aligned binary %lu\n", file_offset); 346 #endif 347 /* 348 * for QMAGIC, a_entry is 20 bytes beyond the load address 349 * to skip the executable header 350 */ 351 vmaddr = trunc_page(a_out->a_entry); 352 353 /* 354 * Map it all into the process's space as a single copy-on-write 355 * "data" segment. 356 */ 357 error = vm_mmap(&p->p_vmspace->vm_map, &vmaddr, 358 a_out->a_text + a_out->a_data, 359 VM_PROT_ALL, VM_PROT_ALL, MAP_PRIVATE | MAP_FIXED, 360 (caddr_t)vp, file_offset); 361 if (error) 362 goto cleanup; 363 } 364 #ifdef DEBUG 365 printf("mem=%08x = %08x %08x\n", vmaddr, ((int*)vmaddr)[0], ((int*)vmaddr)[1]); 366 #endif 367 if (bss_size != 0) { 368 /* 369 * Calculate BSS start address 370 */ 371 vmaddr = trunc_page(a_out->a_entry) + a_out->a_text + a_out->a_data; 372 373 /* 374 * allocate some 'anon' space 375 */ 376 error = vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &vmaddr, 377 bss_size, FALSE, 378 VM_PROT_ALL, VM_PROT_ALL, 0); 379 if (error) 380 goto cleanup; 381 } 382 383 cleanup: 384 /* 385 * Unlock vnode if needed 386 */ 387 if (locked) 388 VOP_UNLOCK(vp, 0, p); 389 390 /* 391 * Release the kernel mapping. 392 */ 393 if (a_out) 394 vm_map_remove(kernel_map, (vm_offset_t)a_out, (vm_offset_t)a_out + PAGE_SIZE); 395 396 return error; 397 } 398 399 /* XXX move */ 400 struct linux_select_argv { 401 int nfds; 402 fd_set *readfds; 403 fd_set *writefds; 404 fd_set *exceptfds; 405 struct timeval *timeout; 406 }; 407 408 int 409 linux_select(struct proc *p, struct linux_select_args *args) 410 { 411 struct linux_select_argv linux_args; 412 struct linux_newselect_args newsel; 413 int error; 414 415 #ifdef SELECT_DEBUG 416 printf("Linux-emul(%d): select(%x)\n", 417 p->p_pid, args->ptr); 418 #endif 419 if ((error = copyin((caddr_t)args->ptr, (caddr_t)&linux_args, 420 sizeof(linux_args)))) 421 return error; 422 423 newsel.nfds = linux_args.nfds; 424 newsel.readfds = linux_args.readfds; 425 newsel.writefds = linux_args.writefds; 426 newsel.exceptfds = linux_args.exceptfds; 427 newsel.timeout = linux_args.timeout; 428 429 return linux_newselect(p, &newsel); 430 } 431 432 int 433 linux_newselect(struct proc *p, struct linux_newselect_args *args) 434 { 435 struct select_args bsa; 436 struct timeval tv0, tv1, utv, *tvp; 437 caddr_t sg; 438 int error; 439 440 #ifdef DEBUG 441 printf("Linux-emul(%ld): newselect(%d, %p, %p, %p, %p)\n", 442 (long)p->p_pid, args->nfds, (void *)args->readfds, 443 (void *)args->writefds, (void *)args->exceptfds, 444 (void *)args->timeout); 445 #endif 446 error = 0; 447 bsa.nd = args->nfds; 448 bsa.in = args->readfds; 449 bsa.ou = args->writefds; 450 bsa.ex = args->exceptfds; 451 bsa.tv = args->timeout; 452 453 /* 454 * Store current time for computation of the amount of 455 * time left. 456 */ 457 if (args->timeout) { 458 if ((error = copyin(args->timeout, &utv, sizeof(utv)))) 459 goto select_out; 460 #ifdef DEBUG 461 printf("Linux-emul(%ld): incoming timeout (%ld/%ld)\n", 462 (long)p->p_pid, utv.tv_sec, utv.tv_usec); 463 #endif 464 if (itimerfix(&utv)) { 465 /* 466 * The timeval was invalid. Convert it to something 467 * valid that will act as it does under Linux. 468 */ 469 sg = stackgap_init(); 470 tvp = stackgap_alloc(&sg, sizeof(utv)); 471 utv.tv_sec += utv.tv_usec / 1000000; 472 utv.tv_usec %= 1000000; 473 if (utv.tv_usec < 0) { 474 utv.tv_sec -= 1; 475 utv.tv_usec += 1000000; 476 } 477 if (utv.tv_sec < 0) 478 timevalclear(&utv); 479 if ((error = copyout(&utv, tvp, sizeof(utv)))) 480 goto select_out; 481 bsa.tv = tvp; 482 } 483 microtime(&tv0); 484 } 485 486 error = select(p, &bsa); 487 #ifdef DEBUG 488 printf("Linux-emul(%d): real select returns %d\n", 489 p->p_pid, error); 490 #endif 491 492 if (error) { 493 /* 494 * See fs/select.c in the Linux kernel. Without this, 495 * Maelstrom doesn't work. 496 */ 497 if (error == ERESTART) 498 error = EINTR; 499 goto select_out; 500 } 501 502 if (args->timeout) { 503 if (p->p_retval[0]) { 504 /* 505 * Compute how much time was left of the timeout, 506 * by subtracting the current time and the time 507 * before we started the call, and subtracting 508 * that result from the user-supplied value. 509 */ 510 microtime(&tv1); 511 timevalsub(&tv1, &tv0); 512 timevalsub(&utv, &tv1); 513 if (utv.tv_sec < 0) 514 timevalclear(&utv); 515 } else 516 timevalclear(&utv); 517 #ifdef DEBUG 518 printf("Linux-emul(%ld): outgoing timeout (%ld/%ld)\n", 519 (long)p->p_pid, utv.tv_sec, utv.tv_usec); 520 #endif 521 if ((error = copyout(&utv, args->timeout, sizeof(utv)))) 522 goto select_out; 523 } 524 525 select_out: 526 #ifdef DEBUG 527 printf("Linux-emul(%d): newselect_out -> %d\n", 528 p->p_pid, error); 529 #endif 530 return error; 531 } 532 533 int 534 linux_getpgid(struct proc *p, struct linux_getpgid_args *args) 535 { 536 struct proc *curproc; 537 538 #ifdef DEBUG 539 printf("Linux-emul(%d): getpgid(%d)\n", p->p_pid, args->pid); 540 #endif 541 if (args->pid != p->p_pid) { 542 if (!(curproc = pfind(args->pid))) 543 return ESRCH; 544 } 545 else 546 curproc = p; 547 p->p_retval[0] = curproc->p_pgid; 548 return 0; 549 } 550 551 int 552 linux_fork(struct proc *p, struct linux_fork_args *args) 553 { 554 int error; 555 556 #ifdef DEBUG 557 printf("Linux-emul(%d): fork()\n", p->p_pid); 558 #endif 559 if (error = fork(p, (struct fork_args *)args)) 560 return error; 561 if (p->p_retval[1] == 1) 562 p->p_retval[0] = 0; 563 return 0; 564 } 565 566 #ifndef COMPAT_LINUX_THREADS 567 int 568 linux_clone(struct proc *p, struct linux_clone_args *args) 569 { 570 printf("linux_clone(%d): Not enabled\n", p->p_pid); 571 return (EOPNOTSUPP); 572 } 573 574 #else 575 #define CLONE_VM 0x100 576 #define CLONE_FS 0x200 577 #define CLONE_FILES 0x400 578 #define CLONE_SIGHAND 0x800 579 #define CLONE_PID 0x1000 580 581 int 582 linux_clone(struct proc *p, struct linux_clone_args *args) 583 { 584 int error, ff = RFPROC; 585 struct proc *p2; 586 int exit_signal; 587 vm_offset_t start; 588 struct rfork_args rf_args; 589 590 #ifdef SMP 591 printf("linux_clone(%d): does not work with SMP yet\n", p->p_pid); 592 return (EOPNOTSUPP); 593 #endif 594 #ifdef DEBUG 595 if (args->flags & CLONE_PID) 596 printf("linux_clone(%d): CLONE_PID not yet supported\n", p->p_pid); 597 printf ("linux_clone(%d): invoked with flags %x and stack %x\n", p->p_pid, 598 (unsigned int)args->flags, (unsigned int)args->stack); 599 #endif 600 601 if (!args->stack) 602 return (EINVAL); 603 exit_signal = args->flags & 0x000000ff; 604 if (exit_signal >= LINUX_NSIG) 605 return EINVAL; 606 exit_signal = linux_to_bsd_signal[exit_signal]; 607 608 /* RFTHREAD probably not necessary here, but it shouldn't hurt either */ 609 ff |= RFTHREAD; 610 611 if (args->flags & CLONE_VM) 612 ff |= RFMEM; 613 if (args->flags & CLONE_SIGHAND) 614 ff |= RFSIGSHARE; 615 if (!(args->flags & CLONE_FILES)) 616 ff |= RFFDG; 617 618 error = 0; 619 start = 0; 620 621 rf_args.flags = ff; 622 if (error = rfork(p, &rf_args)) 623 return error; 624 625 p2 = pfind(p->p_retval[0]); 626 if (p2 == 0) 627 return ESRCH; 628 629 p2->p_sigparent = exit_signal; 630 p2->p_md.md_regs->tf_esp = (unsigned int)args->stack; 631 632 #ifdef DEBUG 633 printf ("linux_clone(%d): successful rfork to %d\n", p->p_pid, p2->p_pid); 634 #endif 635 return 0; 636 } 637 638 #endif /* COMPAT_LINUX_THREADS */ 639 /* XXX move */ 640 struct linux_mmap_argv { 641 linux_caddr_t addr; 642 int len; 643 int prot; 644 int flags; 645 int fd; 646 int pos; 647 }; 648 649 #ifdef COMPAT_LINUX_THREADS 650 #define STACK_SIZE (2 * 1024 * 1024) 651 #define GUARD_SIZE (4 * PAGE_SIZE) 652 653 #endif /* COMPAT_LINUX_THREADS */ 654 int 655 linux_mmap(struct proc *p, struct linux_mmap_args *args) 656 { 657 struct mmap_args /* { 658 caddr_t addr; 659 size_t len; 660 int prot; 661 int flags; 662 int fd; 663 long pad; 664 off_t pos; 665 } */ bsd_args; 666 int error; 667 struct linux_mmap_argv linux_args; 668 669 if ((error = copyin((caddr_t)args->ptr, (caddr_t)&linux_args, 670 sizeof(linux_args)))) 671 return error; 672 #ifdef DEBUG 673 printf("Linux-emul(%ld): mmap(%p, %d, %d, %08x, %d, %d)\n", 674 (long)p->p_pid, (void *)linux_args.addr, linux_args.len, 675 linux_args.prot, linux_args.flags, linux_args.fd, linux_args.pos); 676 #endif 677 bsd_args.flags = 0; 678 if (linux_args.flags & LINUX_MAP_SHARED) 679 bsd_args.flags |= MAP_SHARED; 680 if (linux_args.flags & LINUX_MAP_PRIVATE) 681 bsd_args.flags |= MAP_PRIVATE; 682 if (linux_args.flags & LINUX_MAP_FIXED) 683 bsd_args.flags |= MAP_FIXED; 684 if (linux_args.flags & LINUX_MAP_ANON) 685 bsd_args.flags |= MAP_ANON; 686 #ifndef COMPAT_LINUX_THREADS 687 bsd_args.addr = linux_args.addr; 688 bsd_args.len = linux_args.len; 689 #else 690 691 #ifndef VM_STACK 692 /* Linux Threads will map into the proc stack space, unless 693 * we prevent it. This causes problems if we're not using 694 * our VM_STACK options. 695 */ 696 if ((unsigned int)linux_args.addr + linux_args.len > (USRSTACK - MAXSSIZ)) 697 return (EINVAL); 698 #endif 699 700 if (linux_args.flags & LINUX_MAP_GROWSDOWN) { 701 702 #ifdef VM_STACK 703 bsd_args.flags |= MAP_STACK; 704 #endif 705 706 /* The linux MAP_GROWSDOWN option does not limit auto 707 * growth of the region. Linux mmap with this option 708 * takes as addr the inital BOS, and as len, the initial 709 * region size. It can then grow down from addr without 710 * limit. However, linux threads has an implicit internal 711 * limit to stack size of STACK_SIZE. Its just not 712 * enforced explicitly in linux. But, here we impose 713 * a limit of (STACK_SIZE - GUARD_SIZE) on the stack 714 * region, since we can do this with our mmap. 715 * 716 * Our mmap with MAP_STACK takes addr as the maximum 717 * downsize limit on BOS, and as len the max size of 718 * the region. It them maps the top SGROWSIZ bytes, 719 * and autgrows the region down, up to the limit 720 * in addr. 721 * 722 * If we don't use the MAP_STACK option, the effect 723 * of this code is to allocate a stack region of a 724 * fixed size of (STACK_SIZE - GUARD_SIZE). 725 */ 726 727 /* This gives us TOS */ 728 bsd_args.addr = linux_args.addr + linux_args.len; 729 730 /* This gives us our maximum stack size */ 731 if (linux_args.len > STACK_SIZE - GUARD_SIZE) 732 bsd_args.len = linux_args.len; 733 else 734 bsd_args.len = STACK_SIZE - GUARD_SIZE; 735 736 /* This gives us a new BOS. If we're using VM_STACK, then 737 * mmap will just map the top SGROWSIZ bytes, and let 738 * the stack grow down to the limit at BOS. If we're 739 * not using VM_STACK we map the full stack, since we 740 * don't have a way to autogrow it. 741 */ 742 bsd_args.addr -= bsd_args.len; 743 744 } else { 745 bsd_args.addr = linux_args.addr; 746 bsd_args.len = linux_args.len; 747 } 748 #endif /* COMPAT_LINUX_THREADS */ 749 bsd_args.prot = linux_args.prot | PROT_READ; /* always required */ 750 bsd_args.fd = linux_args.fd; 751 bsd_args.pos = linux_args.pos; 752 bsd_args.pad = 0; 753 return mmap(p, &bsd_args); 754 } 755 756 int 757 linux_mremap(struct proc *p, struct linux_mremap_args *args) 758 { 759 struct munmap_args /* { 760 void *addr; 761 size_t len; 762 } */ bsd_args; 763 int error = 0; 764 765 #ifdef DEBUG 766 printf("Linux-emul(%ld): mremap(%p, %08x, %08x, %08x)\n", 767 (long)p->p_pid, (void *)args->addr, args->old_len, args->new_len, 768 args->flags); 769 #endif 770 args->new_len = round_page(args->new_len); 771 args->old_len = round_page(args->old_len); 772 773 if (args->new_len > args->old_len) { 774 p->p_retval[0] = 0; 775 return ENOMEM; 776 } 777 778 if (args->new_len < args->old_len) { 779 bsd_args.addr = args->addr + args->new_len; 780 bsd_args.len = args->old_len - args->new_len; 781 error = munmap(p, &bsd_args); 782 } 783 784 p->p_retval[0] = error ? 0 : (int)args->addr; 785 return error; 786 } 787 788 int 789 linux_msync(struct proc *p, struct linux_msync_args *args) 790 { 791 struct msync_args bsd_args; 792 793 bsd_args.addr = args->addr; 794 bsd_args.len = args->len; 795 bsd_args.flags = 0; /* XXX ignore */ 796 797 return msync(p, &bsd_args); 798 } 799 800 int 801 linux_pipe(struct proc *p, struct linux_pipe_args *args) 802 { 803 int error; 804 int reg_edx; 805 806 #ifdef DEBUG 807 printf("Linux-emul(%d): pipe(*)\n", p->p_pid); 808 #endif 809 reg_edx = p->p_retval[1]; 810 if (error = pipe(p, 0)) { 811 p->p_retval[1] = reg_edx; 812 return error; 813 } 814 815 if (error = copyout(p->p_retval, args->pipefds, 2*sizeof(int))) { 816 p->p_retval[1] = reg_edx; 817 return error; 818 } 819 820 p->p_retval[1] = reg_edx; 821 p->p_retval[0] = 0; 822 return 0; 823 } 824 825 int 826 linux_time(struct proc *p, struct linux_time_args *args) 827 { 828 struct timeval tv; 829 linux_time_t tm; 830 int error; 831 832 #ifdef DEBUG 833 printf("Linux-emul(%d): time(*)\n", p->p_pid); 834 #endif 835 microtime(&tv); 836 tm = tv.tv_sec; 837 if (args->tm && (error = copyout(&tm, args->tm, sizeof(linux_time_t)))) 838 return error; 839 p->p_retval[0] = tm; 840 return 0; 841 } 842 843 struct linux_times_argv { 844 long tms_utime; 845 long tms_stime; 846 long tms_cutime; 847 long tms_cstime; 848 }; 849 850 #define CLK_TCK 100 /* Linux uses 100 */ 851 #define CONVTCK(r) (r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK)) 852 853 int 854 linux_times(struct proc *p, struct linux_times_args *args) 855 { 856 struct timeval tv; 857 struct linux_times_argv tms; 858 struct rusage ru; 859 int error; 860 861 #ifdef DEBUG 862 printf("Linux-emul(%d): times(*)\n", p->p_pid); 863 #endif 864 calcru(p, &ru.ru_utime, &ru.ru_stime, NULL); 865 866 tms.tms_utime = CONVTCK(ru.ru_utime); 867 tms.tms_stime = CONVTCK(ru.ru_stime); 868 869 tms.tms_cutime = CONVTCK(p->p_stats->p_cru.ru_utime); 870 tms.tms_cstime = CONVTCK(p->p_stats->p_cru.ru_stime); 871 872 if ((error = copyout((caddr_t)&tms, (caddr_t)args->buf, 873 sizeof(struct linux_times_argv)))) 874 return error; 875 876 microuptime(&tv); 877 p->p_retval[0] = (int)CONVTCK(tv); 878 return 0; 879 } 880 881 /* XXX move */ 882 struct linux_newuname_t { 883 char sysname[65]; 884 char nodename[65]; 885 char release[65]; 886 char version[65]; 887 char machine[65]; 888 char domainname[65]; 889 }; 890 891 int 892 linux_newuname(struct proc *p, struct linux_newuname_args *args) 893 { 894 struct linux_newuname_t linux_newuname; 895 896 #ifdef DEBUG 897 printf("Linux-emul(%d): newuname(*)\n", p->p_pid); 898 #endif 899 bzero(&linux_newuname, sizeof(struct linux_newuname_t)); 900 strncpy(linux_newuname.sysname, ostype, 901 sizeof(linux_newuname.sysname) - 1); 902 strncpy(linux_newuname.nodename, hostname, 903 sizeof(linux_newuname.nodename) - 1); 904 strncpy(linux_newuname.release, osrelease, 905 sizeof(linux_newuname.release) - 1); 906 strncpy(linux_newuname.version, version, 907 sizeof(linux_newuname.version) - 1); 908 strncpy(linux_newuname.machine, machine, 909 sizeof(linux_newuname.machine) - 1); 910 strncpy(linux_newuname.domainname, domainname, 911 sizeof(linux_newuname.domainname) - 1); 912 return (copyout((caddr_t)&linux_newuname, (caddr_t)args->buf, 913 sizeof(struct linux_newuname_t))); 914 } 915 916 struct linux_utimbuf { 917 linux_time_t l_actime; 918 linux_time_t l_modtime; 919 }; 920 921 int 922 linux_utime(struct proc *p, struct linux_utime_args *args) 923 { 924 struct utimes_args /* { 925 char *path; 926 struct timeval *tptr; 927 } */ bsdutimes; 928 struct timeval tv[2], *tvp; 929 struct linux_utimbuf lut; 930 int error; 931 caddr_t sg; 932 933 sg = stackgap_init(); 934 CHECKALTEXIST(p, &sg, args->fname); 935 936 #ifdef DEBUG 937 printf("Linux-emul(%d): utime(%s, *)\n", p->p_pid, args->fname); 938 #endif 939 if (args->times) { 940 if ((error = copyin(args->times, &lut, sizeof lut))) 941 return error; 942 tv[0].tv_sec = lut.l_actime; 943 tv[0].tv_usec = 0; 944 tv[1].tv_sec = lut.l_modtime; 945 tv[1].tv_usec = 0; 946 /* so that utimes can copyin */ 947 tvp = (struct timeval *)stackgap_alloc(&sg, sizeof(tv)); 948 if ((error = copyout(tv, tvp, sizeof(tv)))) 949 return error; 950 bsdutimes.tptr = tvp; 951 } else 952 bsdutimes.tptr = NULL; 953 954 bsdutimes.path = args->fname; 955 return utimes(p, &bsdutimes); 956 } 957 958 int 959 linux_waitpid(struct proc *p, struct linux_waitpid_args *args) 960 { 961 struct wait_args /* { 962 int pid; 963 int *status; 964 int options; 965 struct rusage *rusage; 966 } */ tmp; 967 int error, tmpstat; 968 969 #ifdef DEBUG 970 printf("Linux-emul(%ld): waitpid(%d, %p, %d)\n", 971 (long)p->p_pid, args->pid, (void *)args->status, args->options); 972 #endif 973 tmp.pid = args->pid; 974 tmp.status = args->status; 975 #ifndef COMPAT_LINUX_THREADS 976 tmp.options = args->options; 977 #else 978 /* This filters out the linux option _WCLONE. I don't 979 * think we need it, but I could be wrong. If we need 980 * it, we need to fix wait4, since it will give us an 981 * error return of EINVAL if we pass in _WCLONE, and 982 * of course, it won't do anything with it. 983 */ 984 tmp.options = (args->options & (WNOHANG | WUNTRACED)); 985 #endif /* COMPAT_LINUX_THREADS */ 986 tmp.rusage = NULL; 987 988 if (error = wait4(p, &tmp)) 989 #ifndef COMPAT_LINUX_THREADS 990 return error; 991 #else 992 return error; 993 #endif /* COMPAT_LINUX_THREADS */ 994 if (args->status) { 995 if (error = copyin(args->status, &tmpstat, sizeof(int))) 996 return error; 997 if (WIFSIGNALED(tmpstat)) 998 tmpstat = (tmpstat & 0xffffff80) | 999 bsd_to_linux_signal[WTERMSIG(tmpstat)]; 1000 else if (WIFSTOPPED(tmpstat)) 1001 tmpstat = (tmpstat & 0xffff00ff) | 1002 (bsd_to_linux_signal[WSTOPSIG(tmpstat)]<<8); 1003 return copyout(&tmpstat, args->status, sizeof(int)); 1004 } else 1005 return 0; 1006 } 1007 1008 int 1009 linux_wait4(struct proc *p, struct linux_wait4_args *args) 1010 { 1011 struct wait_args /* { 1012 int pid; 1013 int *status; 1014 int options; 1015 struct rusage *rusage; 1016 } */ tmp; 1017 int error, tmpstat; 1018 1019 #ifdef DEBUG 1020 printf("Linux-emul(%ld): wait4(%d, %p, %d, %p)\n", 1021 (long)p->p_pid, args->pid, (void *)args->status, args->options, 1022 (void *)args->rusage); 1023 #endif 1024 tmp.pid = args->pid; 1025 tmp.status = args->status; 1026 #ifndef COMPAT_LINUX_THREADS 1027 tmp.options = args->options; 1028 #else 1029 /* This filters out the linux option _WCLONE. I don't 1030 * think we need it, but I could be wrong. If we need 1031 * it, we need to fix wait4, since it will give us an 1032 * error return of EINVAL if we pass in _WCLONE, and 1033 * of course, it won't do anything with it. 1034 */ 1035 tmp.options = (args->options & (WNOHANG | WUNTRACED)); 1036 #endif /* COMPAT_LINUX_THREADS */ 1037 tmp.rusage = args->rusage; 1038 1039 if (error = wait4(p, &tmp)) 1040 return error; 1041 1042 p->p_siglist &= ~sigmask(SIGCHLD); 1043 1044 if (args->status) { 1045 if (error = copyin(args->status, &tmpstat, sizeof(int))) 1046 return error; 1047 if (WIFSIGNALED(tmpstat)) 1048 tmpstat = (tmpstat & 0xffffff80) | 1049 bsd_to_linux_signal[WTERMSIG(tmpstat)]; 1050 else if (WIFSTOPPED(tmpstat)) 1051 tmpstat = (tmpstat & 0xffff00ff) | 1052 (bsd_to_linux_signal[WSTOPSIG(tmpstat)]<<8); 1053 return copyout(&tmpstat, args->status, sizeof(int)); 1054 } else 1055 return 0; 1056 } 1057 1058 int 1059 linux_mknod(struct proc *p, struct linux_mknod_args *args) 1060 { 1061 caddr_t sg; 1062 struct mknod_args bsd_mknod; 1063 struct mkfifo_args bsd_mkfifo; 1064 1065 sg = stackgap_init(); 1066 1067 CHECKALTCREAT(p, &sg, args->path); 1068 1069 #ifdef DEBUG 1070 printf("Linux-emul(%d): mknod(%s, %d, %d)\n", 1071 p->p_pid, args->path, args->mode, args->dev); 1072 #endif 1073 1074 if (args->mode & S_IFIFO) { 1075 bsd_mkfifo.path = args->path; 1076 bsd_mkfifo.mode = args->mode; 1077 return mkfifo(p, &bsd_mkfifo); 1078 } else { 1079 bsd_mknod.path = args->path; 1080 bsd_mknod.mode = args->mode; 1081 bsd_mknod.dev = args->dev; 1082 return mknod(p, &bsd_mknod); 1083 } 1084 } 1085 1086 /* 1087 * UGH! This is just about the dumbest idea I've ever heard!! 1088 */ 1089 int 1090 linux_personality(struct proc *p, struct linux_personality_args *args) 1091 { 1092 #ifdef DEBUG 1093 printf("Linux-emul(%d): personality(%d)\n", 1094 p->p_pid, args->per); 1095 #endif 1096 if (args->per != 0) 1097 return EINVAL; 1098 1099 /* Yes Jim, it's still a Linux... */ 1100 p->p_retval[0] = 0; 1101 return 0; 1102 } 1103 1104 /* 1105 * Wrappers for get/setitimer for debugging.. 1106 */ 1107 int 1108 linux_setitimer(struct proc *p, struct linux_setitimer_args *args) 1109 { 1110 struct setitimer_args bsa; 1111 struct itimerval foo; 1112 int error; 1113 1114 #ifdef DEBUG 1115 printf("Linux-emul(%ld): setitimer(%p, %p)\n", 1116 (long)p->p_pid, (void *)args->itv, (void *)args->oitv); 1117 #endif 1118 bsa.which = args->which; 1119 bsa.itv = args->itv; 1120 bsa.oitv = args->oitv; 1121 if (args->itv) { 1122 if ((error = copyin((caddr_t)args->itv, (caddr_t)&foo, 1123 sizeof(foo)))) 1124 return error; 1125 #ifdef DEBUG 1126 printf("setitimer: value: sec: %ld, usec: %ld\n", 1127 foo.it_value.tv_sec, foo.it_value.tv_usec); 1128 printf("setitimer: interval: sec: %ld, usec: %ld\n", 1129 foo.it_interval.tv_sec, foo.it_interval.tv_usec); 1130 #endif 1131 } 1132 return setitimer(p, &bsa); 1133 } 1134 1135 int 1136 linux_getitimer(struct proc *p, struct linux_getitimer_args *args) 1137 { 1138 struct getitimer_args bsa; 1139 #ifdef DEBUG 1140 printf("Linux-emul(%ld): getitimer(%p)\n", 1141 (long)p->p_pid, (void *)args->itv); 1142 #endif 1143 bsa.which = args->which; 1144 bsa.itv = args->itv; 1145 return getitimer(p, &bsa); 1146 } 1147 1148 int 1149 linux_iopl(struct proc *p, struct linux_iopl_args *args) 1150 { 1151 int error; 1152 1153 error = suser(p->p_ucred, &p->p_acflag); 1154 if (error != 0) 1155 return error; 1156 if (securelevel > 0) 1157 return EPERM; 1158 p->p_md.md_regs->tf_eflags |= PSL_IOPL; 1159 return 0; 1160 } 1161 1162 int 1163 linux_nice(struct proc *p, struct linux_nice_args *args) 1164 { 1165 struct setpriority_args bsd_args; 1166 1167 bsd_args.which = PRIO_PROCESS; 1168 bsd_args.who = 0; /* current process */ 1169 bsd_args.prio = args->inc; 1170 return setpriority(p, &bsd_args); 1171 } 1172 1173 int 1174 linux_setgroups(p, uap) 1175 struct proc *p; 1176 struct linux_setgroups_args *uap; 1177 { 1178 struct pcred *pc = p->p_cred; 1179 linux_gid_t linux_gidset[NGROUPS]; 1180 gid_t *bsd_gidset; 1181 int ngrp, error; 1182 1183 if ((error = suser(pc->pc_ucred, &p->p_acflag))) 1184 return error; 1185 1186 if (uap->gidsetsize > NGROUPS) 1187 return EINVAL; 1188 1189 ngrp = uap->gidsetsize; 1190 pc->pc_ucred = crcopy(pc->pc_ucred); 1191 if (ngrp >= 1) { 1192 if ((error = copyin((caddr_t)uap->gidset, 1193 (caddr_t)linux_gidset, 1194 ngrp * sizeof(linux_gid_t)))) 1195 return error; 1196 1197 pc->pc_ucred->cr_ngroups = ngrp; 1198 1199 bsd_gidset = pc->pc_ucred->cr_groups; 1200 ngrp--; 1201 while (ngrp >= 0) { 1202 bsd_gidset[ngrp] = linux_gidset[ngrp]; 1203 ngrp--; 1204 } 1205 } 1206 else 1207 pc->pc_ucred->cr_ngroups = 1; 1208 1209 setsugid(p); 1210 return 0; 1211 } 1212 1213 int 1214 linux_getgroups(p, uap) 1215 struct proc *p; 1216 struct linux_getgroups_args *uap; 1217 { 1218 struct pcred *pc = p->p_cred; 1219 linux_gid_t linux_gidset[NGROUPS]; 1220 gid_t *bsd_gidset; 1221 int ngrp, error; 1222 1223 if ((ngrp = uap->gidsetsize) == 0) { 1224 p->p_retval[0] = pc->pc_ucred->cr_ngroups; 1225 return 0; 1226 } 1227 1228 if (ngrp < pc->pc_ucred->cr_ngroups) 1229 return EINVAL; 1230 1231 ngrp = 0; 1232 bsd_gidset = pc->pc_ucred->cr_groups; 1233 while (ngrp < pc->pc_ucred->cr_ngroups) { 1234 linux_gidset[ngrp] = bsd_gidset[ngrp]; 1235 ngrp++; 1236 } 1237 1238 if ((error = copyout((caddr_t)linux_gidset, (caddr_t)uap->gidset, 1239 ngrp * sizeof(linux_gid_t)))) 1240 return error; 1241 1242 p->p_retval[0] = ngrp; 1243 return (0); 1244 } 1245