1 /*- 2 * Copyright (c) 1994-1995 S�ren Schmidt 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer 10 * in this position and unchanged. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software withough specific prior written permission 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 * 28 * $Id: linux_misc.c,v 1.49 1998/12/24 21:21:20 julian Exp $ 29 */ 30 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/sysproto.h> 34 #include <sys/kernel.h> 35 #include <sys/mman.h> 36 #include <sys/proc.h> 37 #include <sys/fcntl.h> 38 #include <sys/imgact_aout.h> 39 #include <sys/mount.h> 40 #include <sys/namei.h> 41 #include <sys/resourcevar.h> 42 #include <sys/stat.h> 43 #include <sys/sysctl.h> 44 #ifdef COMPAT_LINUX_THREADS 45 #include <sys/unistd.h> 46 #endif /* COMPAT_LINUX_THREADS */ 47 #include <sys/vnode.h> 48 #include <sys/wait.h> 49 #include <sys/time.h> 50 51 #include <vm/vm.h> 52 #include <vm/pmap.h> 53 #include <vm/vm_kern.h> 54 #include <vm/vm_prot.h> 55 #include <vm/vm_map.h> 56 #include <vm/vm_extern.h> 57 58 #include <machine/frame.h> 59 #include <machine/psl.h> 60 61 #include <i386/linux/linux.h> 62 #include <i386/linux/linux_proto.h> 63 #include <i386/linux/linux_util.h> 64 65 int 66 linux_alarm(struct proc *p, struct linux_alarm_args *args) 67 { 68 struct itimerval it, old_it; 69 struct timeval tv; 70 int s; 71 72 #ifdef DEBUG 73 printf("Linux-emul(%ld): alarm(%u)\n", (long)p->p_pid, args->secs); 74 #endif 75 if (args->secs > 100000000) 76 return EINVAL; 77 it.it_value.tv_sec = (long)args->secs; 78 it.it_value.tv_usec = 0; 79 it.it_interval.tv_sec = 0; 80 it.it_interval.tv_usec = 0; 81 s = splsoftclock(); 82 old_it = p->p_realtimer; 83 getmicrouptime(&tv); 84 if (timevalisset(&old_it.it_value)) 85 untimeout(realitexpire, (caddr_t)p, p->p_ithandle); 86 if (it.it_value.tv_sec != 0) { 87 p->p_ithandle = timeout(realitexpire, (caddr_t)p, tvtohz(&it.it_value)); 88 timevaladd(&it.it_value, &tv); 89 } 90 p->p_realtimer = it; 91 splx(s); 92 if (timevalcmp(&old_it.it_value, &tv, >)) { 93 timevalsub(&old_it.it_value, &tv); 94 if (old_it.it_value.tv_usec != 0) 95 old_it.it_value.tv_sec++; 96 p->p_retval[0] = old_it.it_value.tv_sec; 97 } 98 return 0; 99 } 100 101 int 102 linux_brk(struct proc *p, struct linux_brk_args *args) 103 { 104 #if 0 105 struct vmspace *vm = p->p_vmspace; 106 vm_offset_t new, old; 107 int error; 108 109 if ((vm_offset_t)args->dsend < (vm_offset_t)vm->vm_daddr) 110 return EINVAL; 111 if (((caddr_t)args->dsend - (caddr_t)vm->vm_daddr) 112 > p->p_rlimit[RLIMIT_DATA].rlim_cur) 113 return ENOMEM; 114 115 old = round_page((vm_offset_t)vm->vm_daddr) + ctob(vm->vm_dsize); 116 new = round_page((vm_offset_t)args->dsend); 117 p->p_retval[0] = old; 118 if ((new-old) > 0) { 119 if (swap_pager_full) 120 return ENOMEM; 121 error = vm_map_find(&vm->vm_map, NULL, 0, &old, (new-old), FALSE, 122 VM_PROT_ALL, VM_PROT_ALL, 0); 123 if (error) 124 return error; 125 vm->vm_dsize += btoc((new-old)); 126 p->p_retval[0] = (int)(vm->vm_daddr + ctob(vm->vm_dsize)); 127 } 128 return 0; 129 #else 130 struct vmspace *vm = p->p_vmspace; 131 vm_offset_t new, old; 132 struct obreak_args /* { 133 char * nsize; 134 } */ tmp; 135 136 #ifdef DEBUG 137 printf("Linux-emul(%ld): brk(%p)\n", (long)p->p_pid, (void *)args->dsend); 138 #endif 139 old = (vm_offset_t)vm->vm_daddr + ctob(vm->vm_dsize); 140 new = (vm_offset_t)args->dsend; 141 tmp.nsize = (char *) new; 142 if (((caddr_t)new > vm->vm_daddr) && !obreak(p, &tmp)) 143 p->p_retval[0] = (int)new; 144 else 145 p->p_retval[0] = (int)old; 146 147 return 0; 148 #endif 149 } 150 151 int 152 linux_uselib(struct proc *p, struct linux_uselib_args *args) 153 { 154 struct nameidata ni; 155 struct vnode *vp; 156 struct exec *a_out; 157 struct vattr attr; 158 vm_offset_t vmaddr; 159 unsigned long file_offset; 160 vm_offset_t buffer; 161 unsigned long bss_size; 162 int error; 163 caddr_t sg; 164 int locked; 165 166 sg = stackgap_init(); 167 CHECKALTEXIST(p, &sg, args->library); 168 169 #ifdef DEBUG 170 printf("Linux-emul(%d): uselib(%s)\n", p->p_pid, args->library); 171 #endif 172 173 a_out = NULL; 174 locked = 0; 175 vp = NULL; 176 177 NDINIT(&ni, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, args->library, p); 178 if (error = namei(&ni)) 179 goto cleanup; 180 181 vp = ni.ni_vp; 182 if (vp == NULL) { 183 error = ENOEXEC; /* ?? */ 184 goto cleanup; 185 } 186 187 /* 188 * From here on down, we have a locked vnode that must be unlocked. 189 */ 190 locked++; 191 192 /* 193 * Writable? 194 */ 195 if (vp->v_writecount) { 196 error = ETXTBSY; 197 goto cleanup; 198 } 199 200 /* 201 * Executable? 202 */ 203 if (error = VOP_GETATTR(vp, &attr, p->p_ucred, p)) 204 goto cleanup; 205 206 if ((vp->v_mount->mnt_flag & MNT_NOEXEC) || 207 ((attr.va_mode & 0111) == 0) || 208 (attr.va_type != VREG)) { 209 error = ENOEXEC; 210 goto cleanup; 211 } 212 213 /* 214 * Sensible size? 215 */ 216 if (attr.va_size == 0) { 217 error = ENOEXEC; 218 goto cleanup; 219 } 220 221 /* 222 * Can we access it? 223 */ 224 if (error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p)) 225 goto cleanup; 226 227 if (error = VOP_OPEN(vp, FREAD, p->p_ucred, p)) 228 goto cleanup; 229 230 /* 231 * Lock no longer needed 232 */ 233 VOP_UNLOCK(vp, 0, p); 234 locked = 0; 235 236 /* 237 * Pull in executable header into kernel_map 238 */ 239 error = vm_mmap(kernel_map, (vm_offset_t *)&a_out, PAGE_SIZE, 240 VM_PROT_READ, VM_PROT_READ, 0, (caddr_t)vp, 0); 241 if (error) 242 goto cleanup; 243 244 /* 245 * Is it a Linux binary ? 246 */ 247 if (((a_out->a_magic >> 16) & 0xff) != 0x64) { 248 error = ENOEXEC; 249 goto cleanup; 250 } 251 252 /* While we are here, we should REALLY do some more checks */ 253 254 /* 255 * Set file/virtual offset based on a.out variant. 256 */ 257 switch ((int)(a_out->a_magic & 0xffff)) { 258 case 0413: /* ZMAGIC */ 259 file_offset = 1024; 260 break; 261 case 0314: /* QMAGIC */ 262 file_offset = 0; 263 break; 264 default: 265 error = ENOEXEC; 266 goto cleanup; 267 } 268 269 bss_size = round_page(a_out->a_bss); 270 271 /* 272 * Check various fields in header for validity/bounds. 273 */ 274 if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) { 275 error = ENOEXEC; 276 goto cleanup; 277 } 278 279 /* text + data can't exceed file size */ 280 if (a_out->a_data + a_out->a_text > attr.va_size) { 281 error = EFAULT; 282 goto cleanup; 283 } 284 285 /* 286 * text/data/bss must not exceed limits 287 * XXX: this is not complete. it should check current usage PLUS 288 * the resources needed by this library. 289 */ 290 if (a_out->a_text > MAXTSIZ || 291 a_out->a_data + bss_size > p->p_rlimit[RLIMIT_DATA].rlim_cur) { 292 error = ENOMEM; 293 goto cleanup; 294 } 295 296 /* 297 * prevent more writers 298 */ 299 vp->v_flag |= VTEXT; 300 301 /* 302 * Check if file_offset page aligned,. 303 * Currently we cannot handle misalinged file offsets, 304 * and so we read in the entire image (what a waste). 305 */ 306 if (file_offset & PAGE_MASK) { 307 #ifdef DEBUG 308 printf("uselib: Non page aligned binary %lu\n", file_offset); 309 #endif 310 /* 311 * Map text+data read/write/execute 312 */ 313 314 /* a_entry is the load address and is page aligned */ 315 vmaddr = trunc_page(a_out->a_entry); 316 317 /* get anon user mapping, read+write+execute */ 318 error = vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &vmaddr, 319 a_out->a_text + a_out->a_data, FALSE, 320 VM_PROT_ALL, VM_PROT_ALL, 0); 321 if (error) 322 goto cleanup; 323 324 /* map file into kernel_map */ 325 error = vm_mmap(kernel_map, &buffer, 326 round_page(a_out->a_text + a_out->a_data + file_offset), 327 VM_PROT_READ, VM_PROT_READ, 0, 328 (caddr_t)vp, trunc_page(file_offset)); 329 if (error) 330 goto cleanup; 331 332 /* copy from kernel VM space to user space */ 333 error = copyout((caddr_t)(void *)(uintptr_t)(buffer + file_offset), 334 (caddr_t)vmaddr, a_out->a_text + a_out->a_data); 335 336 /* release temporary kernel space */ 337 vm_map_remove(kernel_map, buffer, 338 buffer + round_page(a_out->a_text + a_out->a_data + file_offset)); 339 340 if (error) 341 goto cleanup; 342 } 343 else { 344 #ifdef DEBUG 345 printf("uselib: Page aligned binary %lu\n", file_offset); 346 #endif 347 /* 348 * for QMAGIC, a_entry is 20 bytes beyond the load address 349 * to skip the executable header 350 */ 351 vmaddr = trunc_page(a_out->a_entry); 352 353 /* 354 * Map it all into the process's space as a single copy-on-write 355 * "data" segment. 356 */ 357 error = vm_mmap(&p->p_vmspace->vm_map, &vmaddr, 358 a_out->a_text + a_out->a_data, 359 VM_PROT_ALL, VM_PROT_ALL, MAP_PRIVATE | MAP_FIXED, 360 (caddr_t)vp, file_offset); 361 if (error) 362 goto cleanup; 363 } 364 #ifdef DEBUG 365 printf("mem=%08x = %08x %08x\n", vmaddr, ((int*)vmaddr)[0], ((int*)vmaddr)[1]); 366 #endif 367 if (bss_size != 0) { 368 /* 369 * Calculate BSS start address 370 */ 371 vmaddr = trunc_page(a_out->a_entry) + a_out->a_text + a_out->a_data; 372 373 /* 374 * allocate some 'anon' space 375 */ 376 error = vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &vmaddr, 377 bss_size, FALSE, 378 VM_PROT_ALL, VM_PROT_ALL, 0); 379 if (error) 380 goto cleanup; 381 } 382 383 cleanup: 384 /* 385 * Unlock vnode if needed 386 */ 387 if (locked) 388 VOP_UNLOCK(vp, 0, p); 389 390 /* 391 * Release the kernel mapping. 392 */ 393 if (a_out) 394 vm_map_remove(kernel_map, (vm_offset_t)a_out, (vm_offset_t)a_out + PAGE_SIZE); 395 396 return error; 397 } 398 399 /* XXX move */ 400 struct linux_select_argv { 401 int nfds; 402 fd_set *readfds; 403 fd_set *writefds; 404 fd_set *exceptfds; 405 struct timeval *timeout; 406 }; 407 408 int 409 linux_select(struct proc *p, struct linux_select_args *args) 410 { 411 struct linux_select_argv linux_args; 412 struct linux_newselect_args newsel; 413 int error; 414 415 #ifdef SELECT_DEBUG 416 printf("Linux-emul(%d): select(%x)\n", 417 p->p_pid, args->ptr); 418 #endif 419 if ((error = copyin((caddr_t)args->ptr, (caddr_t)&linux_args, 420 sizeof(linux_args)))) 421 return error; 422 423 newsel.nfds = linux_args.nfds; 424 newsel.readfds = linux_args.readfds; 425 newsel.writefds = linux_args.writefds; 426 newsel.exceptfds = linux_args.exceptfds; 427 newsel.timeout = linux_args.timeout; 428 429 return linux_newselect(p, &newsel); 430 } 431 432 int 433 linux_newselect(struct proc *p, struct linux_newselect_args *args) 434 { 435 struct select_args bsa; 436 struct timeval tv0, tv1, utv, *tvp; 437 caddr_t sg; 438 int error; 439 440 #ifdef DEBUG 441 printf("Linux-emul(%ld): newselect(%d, %p, %p, %p, %p)\n", 442 (long)p->p_pid, args->nfds, (void *)args->readfds, 443 (void *)args->writefds, (void *)args->exceptfds, 444 (void *)args->timeout); 445 #endif 446 error = 0; 447 bsa.nd = args->nfds; 448 bsa.in = args->readfds; 449 bsa.ou = args->writefds; 450 bsa.ex = args->exceptfds; 451 bsa.tv = args->timeout; 452 453 /* 454 * Store current time for computation of the amount of 455 * time left. 456 */ 457 if (args->timeout) { 458 if ((error = copyin(args->timeout, &utv, sizeof(utv)))) 459 goto select_out; 460 #ifdef DEBUG 461 printf("Linux-emul(%ld): incoming timeout (%ld/%ld)\n", 462 (long)p->p_pid, utv.tv_sec, utv.tv_usec); 463 #endif 464 if (itimerfix(&utv)) { 465 /* 466 * The timeval was invalid. Convert it to something 467 * valid that will act as it does under Linux. 468 */ 469 sg = stackgap_init(); 470 tvp = stackgap_alloc(&sg, sizeof(utv)); 471 utv.tv_sec += utv.tv_usec / 1000000; 472 utv.tv_usec %= 1000000; 473 if (utv.tv_usec < 0) { 474 utv.tv_sec -= 1; 475 utv.tv_usec += 1000000; 476 } 477 if (utv.tv_sec < 0) 478 timevalclear(&utv); 479 if ((error = copyout(&utv, tvp, sizeof(utv)))) 480 goto select_out; 481 bsa.tv = tvp; 482 } 483 microtime(&tv0); 484 } 485 486 error = select(p, &bsa); 487 #ifdef DEBUG 488 printf("Linux-emul(%d): real select returns %d\n", 489 p->p_pid, error); 490 #endif 491 492 if (error) { 493 /* 494 * See fs/select.c in the Linux kernel. Without this, 495 * Maelstrom doesn't work. 496 */ 497 if (error == ERESTART) 498 error = EINTR; 499 goto select_out; 500 } 501 502 if (args->timeout) { 503 if (p->p_retval[0]) { 504 /* 505 * Compute how much time was left of the timeout, 506 * by subtracting the current time and the time 507 * before we started the call, and subtracting 508 * that result from the user-supplied value. 509 */ 510 microtime(&tv1); 511 timevalsub(&tv1, &tv0); 512 timevalsub(&utv, &tv1); 513 if (utv.tv_sec < 0) 514 timevalclear(&utv); 515 } else 516 timevalclear(&utv); 517 #ifdef DEBUG 518 printf("Linux-emul(%ld): outgoing timeout (%ld/%ld)\n", 519 (long)p->p_pid, utv.tv_sec, utv.tv_usec); 520 #endif 521 if ((error = copyout(&utv, args->timeout, sizeof(utv)))) 522 goto select_out; 523 } 524 525 select_out: 526 #ifdef DEBUG 527 printf("Linux-emul(%d): newselect_out -> %d\n", 528 p->p_pid, error); 529 #endif 530 return error; 531 } 532 533 int 534 linux_getpgid(struct proc *p, struct linux_getpgid_args *args) 535 { 536 struct proc *curproc; 537 538 #ifdef DEBUG 539 printf("Linux-emul(%d): getpgid(%d)\n", p->p_pid, args->pid); 540 #endif 541 if (args->pid != p->p_pid) { 542 if (!(curproc = pfind(args->pid))) 543 return ESRCH; 544 } 545 else 546 curproc = p; 547 p->p_retval[0] = curproc->p_pgid; 548 return 0; 549 } 550 551 int 552 linux_fork(struct proc *p, struct linux_fork_args *args) 553 { 554 int error; 555 556 #ifdef DEBUG 557 printf("Linux-emul(%d): fork()\n", p->p_pid); 558 #endif 559 if (error = fork(p, (struct fork_args *)args)) 560 return error; 561 if (p->p_retval[1] == 1) 562 p->p_retval[0] = 0; 563 return 0; 564 } 565 566 #ifndef COMPAT_LINUX_THREADS 567 int 568 linux_clone(struct proc *p, struct linux_clone_args *args) 569 { 570 printf("linux_clone(%d): Not enabled\n", p->p_pid); 571 return (EOPNOTSUPP); 572 } 573 574 #else 575 #define CLONE_VM 0x100 576 #define CLONE_FS 0x200 577 #define CLONE_FILES 0x400 578 #define CLONE_SIGHAND 0x800 579 #define CLONE_PID 0x1000 580 581 int 582 linux_clone(struct proc *p, struct linux_clone_args *args) 583 { 584 int error, ff = RFPROC; 585 struct proc *p2; 586 int exit_signal; 587 vm_offset_t start; 588 struct rfork_args rf_args; 589 590 #ifdef SMP 591 printf("linux_clone(%d): does not work with SMP yet\n", p->p_pid); 592 return (EOPNOTSUPP); 593 #endif 594 #ifdef DEBUG 595 if (args->flags & CLONE_PID) 596 printf("linux_clone(%d): CLONE_PID not yet supported\n", p->p_pid); 597 printf ("linux_clone(%d): invoked with flags %x and stack %x\n", p->p_pid, 598 (unsigned int)args->flags, (unsigned int)args->stack); 599 #endif 600 601 if (!args->stack) 602 return (EINVAL); 603 exit_signal = args->flags & 0x000000ff; 604 if (exit_signal >= LINUX_NSIG) 605 return EINVAL; 606 exit_signal = linux_to_bsd_signal[exit_signal]; 607 608 /* RFTHREAD probably not necessary here, but it shouldn't hurt either */ 609 ff |= RFTHREAD; 610 611 if (args->flags & CLONE_VM) 612 ff |= RFMEM; 613 if (args->flags & CLONE_SIGHAND) 614 ff |= RFSIGSHARE; 615 if (!(args->flags & CLONE_FILES)) 616 ff |= RFFDG; 617 618 error = 0; 619 start = 0; 620 621 rf_args.flags = ff; 622 if (error = rfork(p, &rf_args)) 623 return error; 624 625 p2 = pfind(p->p_retval[0]); 626 if (p2 == 0) 627 return ESRCH; 628 629 p2->p_sigparent = exit_signal; 630 p2->p_md.md_regs->tf_esp = (unsigned int)args->stack; 631 632 #ifdef DEBUG 633 printf ("linux_clone(%d): successful rfork to %d\n", p->p_pid, p2->p_pid); 634 #endif 635 return 0; 636 } 637 638 #endif /* COMPAT_LINUX_THREADS */ 639 /* XXX move */ 640 struct linux_mmap_argv { 641 linux_caddr_t addr; 642 int len; 643 int prot; 644 int flags; 645 int fd; 646 int pos; 647 }; 648 649 #ifdef COMPAT_LINUX_THREADS 650 #define STACK_SIZE (2 * 1024 * 1024) 651 #define GUARD_SIZE (4 * PAGE_SIZE) 652 653 #endif /* COMPAT_LINUX_THREADS */ 654 int 655 linux_mmap(struct proc *p, struct linux_mmap_args *args) 656 { 657 struct mmap_args /* { 658 caddr_t addr; 659 size_t len; 660 int prot; 661 int flags; 662 int fd; 663 long pad; 664 off_t pos; 665 } */ bsd_args; 666 int error; 667 struct linux_mmap_argv linux_args; 668 669 if ((error = copyin((caddr_t)args->ptr, (caddr_t)&linux_args, 670 sizeof(linux_args)))) 671 return error; 672 #ifdef DEBUG 673 printf("Linux-emul(%ld): mmap(%p, %d, %d, %08x, %d, %d)\n", 674 (long)p->p_pid, (void *)linux_args.addr, linux_args.len, 675 linux_args.prot, linux_args.flags, linux_args.fd, linux_args.pos); 676 #endif 677 bsd_args.flags = 0; 678 if (linux_args.flags & LINUX_MAP_SHARED) 679 bsd_args.flags |= MAP_SHARED; 680 if (linux_args.flags & LINUX_MAP_PRIVATE) 681 bsd_args.flags |= MAP_PRIVATE; 682 if (linux_args.flags & LINUX_MAP_FIXED) 683 bsd_args.flags |= MAP_FIXED; 684 if (linux_args.flags & LINUX_MAP_ANON) 685 bsd_args.flags |= MAP_ANON; 686 #ifndef COMPAT_LINUX_THREADS 687 bsd_args.addr = linux_args.addr; 688 bsd_args.len = linux_args.len; 689 #else 690 691 /*#if !defined(USE_VM_STACK) && !defined(USE_VM_STACK_FOR_EXEC)*/ 692 /* Linux Threads will map into the proc stack space, unless 693 we prevent it. This causes problems if we're not using 694 our VM_STACK options. 695 */ 696 if ((unsigned int)linux_args.addr + linux_args.len > (USRSTACK - MAXSSIZ)) 697 return (EINVAL); 698 /*#endif*/ 699 700 if (linux_args.flags & LINUX_MAP_GROWSDOWN) { 701 702 #ifdef USE_VM_STACK 703 /* USE_VM_STACK is defined (or not) in vm/vm_map.h */ 704 bsd_args.flags |= MAP_STACK; 705 #endif 706 707 /* The linux MAP_GROWSDOWN option does not limit auto 708 growth of the region. Linux mmap with this option 709 takes as addr the inital BOS, and as len, the initial 710 region size. It can then grow down from addr without 711 limit. However, linux threads has an implicit internal 712 limit to stack size of STACK_SIZE. Its just not 713 enforced explicitly in linux. But, here we impose 714 a limit of (STACK_SIZE - GUARD_SIZE) on the stack 715 region, since we can do this with our mmap. 716 717 Our mmap with MAP_STACK takes addr as the maximum 718 downsize limit on BOS, and as len the max size of 719 the region. It them maps the top SGROWSIZ bytes, 720 and autgrows the region down, up to the limit 721 in addr. 722 723 If we don't use the MAP_STACK option, the effect 724 of this code is to allocate a stack region of a 725 fixed size of (STACK_SIZE - GUARD_SIZE). 726 */ 727 728 /* This gives us TOS */ 729 bsd_args.addr = linux_args.addr + linux_args.len; 730 731 /* This gives us our maximum stack size */ 732 if (linux_args.len > STACK_SIZE - GUARD_SIZE) 733 bsd_args.len = linux_args.len; 734 else 735 bsd_args.len = STACK_SIZE - GUARD_SIZE; 736 737 /* This gives us a new BOS. If we're using VM_STACK, then 738 mmap will just map the top SGROWSIZ bytes, and let 739 the stack grow down to the limit at BOS. If we're 740 not using VM_STACK we map the full stack, since we 741 don't have a way to autogrow it. 742 */ 743 bsd_args.addr -= bsd_args.len; 744 745 } else { 746 bsd_args.addr = linux_args.addr; 747 bsd_args.len = linux_args.len; 748 } 749 #endif /* COMPAT_LINUX_THREADS */ 750 bsd_args.prot = linux_args.prot | PROT_READ; /* always required */ 751 bsd_args.fd = linux_args.fd; 752 bsd_args.pos = linux_args.pos; 753 bsd_args.pad = 0; 754 return mmap(p, &bsd_args); 755 } 756 757 int 758 linux_mremap(struct proc *p, struct linux_mremap_args *args) 759 { 760 struct munmap_args /* { 761 void *addr; 762 size_t len; 763 } */ bsd_args; 764 int error = 0; 765 766 #ifdef DEBUG 767 printf("Linux-emul(%ld): mremap(%p, %08x, %08x, %08x)\n", 768 (long)p->p_pid, (void *)args->addr, args->old_len, args->new_len, 769 args->flags); 770 #endif 771 args->new_len = round_page(args->new_len); 772 args->old_len = round_page(args->old_len); 773 774 if (args->new_len > args->old_len) { 775 p->p_retval[0] = 0; 776 return ENOMEM; 777 } 778 779 if (args->new_len < args->old_len) { 780 bsd_args.addr = args->addr + args->new_len; 781 bsd_args.len = args->old_len - args->new_len; 782 error = munmap(p, &bsd_args); 783 } 784 785 p->p_retval[0] = error ? 0 : (int)args->addr; 786 return error; 787 } 788 789 int 790 linux_msync(struct proc *p, struct linux_msync_args *args) 791 { 792 struct msync_args bsd_args; 793 794 bsd_args.addr = args->addr; 795 bsd_args.len = args->len; 796 bsd_args.flags = 0; /* XXX ignore */ 797 798 return msync(p, &bsd_args); 799 } 800 801 int 802 linux_pipe(struct proc *p, struct linux_pipe_args *args) 803 { 804 int error; 805 int reg_edx; 806 807 #ifdef DEBUG 808 printf("Linux-emul(%d): pipe(*)\n", p->p_pid); 809 #endif 810 reg_edx = p->p_retval[1]; 811 if (error = pipe(p, 0)) { 812 p->p_retval[1] = reg_edx; 813 return error; 814 } 815 816 if (error = copyout(p->p_retval, args->pipefds, 2*sizeof(int))) { 817 p->p_retval[1] = reg_edx; 818 return error; 819 } 820 821 p->p_retval[1] = reg_edx; 822 p->p_retval[0] = 0; 823 return 0; 824 } 825 826 int 827 linux_time(struct proc *p, struct linux_time_args *args) 828 { 829 struct timeval tv; 830 linux_time_t tm; 831 int error; 832 833 #ifdef DEBUG 834 printf("Linux-emul(%d): time(*)\n", p->p_pid); 835 #endif 836 microtime(&tv); 837 tm = tv.tv_sec; 838 if (args->tm && (error = copyout(&tm, args->tm, sizeof(linux_time_t)))) 839 return error; 840 p->p_retval[0] = tm; 841 return 0; 842 } 843 844 struct linux_times_argv { 845 long tms_utime; 846 long tms_stime; 847 long tms_cutime; 848 long tms_cstime; 849 }; 850 851 #define CLK_TCK 100 /* Linux uses 100 */ 852 #define CONVTCK(r) (r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK)) 853 854 int 855 linux_times(struct proc *p, struct linux_times_args *args) 856 { 857 struct timeval tv; 858 struct linux_times_argv tms; 859 struct rusage ru; 860 int error; 861 862 #ifdef DEBUG 863 printf("Linux-emul(%d): times(*)\n", p->p_pid); 864 #endif 865 calcru(p, &ru.ru_utime, &ru.ru_stime, NULL); 866 867 tms.tms_utime = CONVTCK(ru.ru_utime); 868 tms.tms_stime = CONVTCK(ru.ru_stime); 869 870 tms.tms_cutime = CONVTCK(p->p_stats->p_cru.ru_utime); 871 tms.tms_cstime = CONVTCK(p->p_stats->p_cru.ru_stime); 872 873 if ((error = copyout((caddr_t)&tms, (caddr_t)args->buf, 874 sizeof(struct linux_times_argv)))) 875 return error; 876 877 microuptime(&tv); 878 p->p_retval[0] = (int)CONVTCK(tv); 879 return 0; 880 } 881 882 /* XXX move */ 883 struct linux_newuname_t { 884 char sysname[65]; 885 char nodename[65]; 886 char release[65]; 887 char version[65]; 888 char machine[65]; 889 char domainname[65]; 890 }; 891 892 int 893 linux_newuname(struct proc *p, struct linux_newuname_args *args) 894 { 895 struct linux_newuname_t linux_newuname; 896 897 #ifdef DEBUG 898 printf("Linux-emul(%d): newuname(*)\n", p->p_pid); 899 #endif 900 bzero(&linux_newuname, sizeof(struct linux_newuname_t)); 901 strncpy(linux_newuname.sysname, ostype, 902 sizeof(linux_newuname.sysname) - 1); 903 strncpy(linux_newuname.nodename, hostname, 904 sizeof(linux_newuname.nodename) - 1); 905 strncpy(linux_newuname.release, osrelease, 906 sizeof(linux_newuname.release) - 1); 907 strncpy(linux_newuname.version, version, 908 sizeof(linux_newuname.version) - 1); 909 strncpy(linux_newuname.machine, machine, 910 sizeof(linux_newuname.machine) - 1); 911 strncpy(linux_newuname.domainname, domainname, 912 sizeof(linux_newuname.domainname) - 1); 913 return (copyout((caddr_t)&linux_newuname, (caddr_t)args->buf, 914 sizeof(struct linux_newuname_t))); 915 } 916 917 struct linux_utimbuf { 918 linux_time_t l_actime; 919 linux_time_t l_modtime; 920 }; 921 922 int 923 linux_utime(struct proc *p, struct linux_utime_args *args) 924 { 925 struct utimes_args /* { 926 char *path; 927 struct timeval *tptr; 928 } */ bsdutimes; 929 struct timeval tv[2], *tvp; 930 struct linux_utimbuf lut; 931 int error; 932 caddr_t sg; 933 934 sg = stackgap_init(); 935 CHECKALTEXIST(p, &sg, args->fname); 936 937 #ifdef DEBUG 938 printf("Linux-emul(%d): utime(%s, *)\n", p->p_pid, args->fname); 939 #endif 940 if (args->times) { 941 if ((error = copyin(args->times, &lut, sizeof lut))) 942 return error; 943 tv[0].tv_sec = lut.l_actime; 944 tv[0].tv_usec = 0; 945 tv[1].tv_sec = lut.l_modtime; 946 tv[1].tv_usec = 0; 947 /* so that utimes can copyin */ 948 tvp = (struct timeval *)stackgap_alloc(&sg, sizeof(tv)); 949 if ((error = copyout(tv, tvp, sizeof(tv)))) 950 return error; 951 bsdutimes.tptr = tvp; 952 } else 953 bsdutimes.tptr = NULL; 954 955 bsdutimes.path = args->fname; 956 return utimes(p, &bsdutimes); 957 } 958 959 int 960 linux_waitpid(struct proc *p, struct linux_waitpid_args *args) 961 { 962 struct wait_args /* { 963 int pid; 964 int *status; 965 int options; 966 struct rusage *rusage; 967 } */ tmp; 968 int error, tmpstat; 969 970 #ifdef DEBUG 971 printf("Linux-emul(%ld): waitpid(%d, %p, %d)\n", 972 (long)p->p_pid, args->pid, (void *)args->status, args->options); 973 #endif 974 tmp.pid = args->pid; 975 tmp.status = args->status; 976 #ifndef COMPAT_LINUX_THREADS 977 tmp.options = args->options; 978 #else 979 /* This filters out the linux option _WCLONE. I don't 980 think we need it, but I could be wrong. If we need 981 it, we need to fix wait4, since it will give us an 982 error return of EINVAL if we pass in _WCLONE, and 983 of course, it won't do anything with it. 984 */ 985 tmp.options = (args->options & (WNOHANG | WUNTRACED)); 986 #endif /* COMPAT_LINUX_THREADS */ 987 tmp.rusage = NULL; 988 989 if (error = wait4(p, &tmp)) 990 #ifndef COMPAT_LINUX_THREADS 991 return error; 992 #else 993 return error; 994 #endif /* COMPAT_LINUX_THREADS */ 995 if (args->status) { 996 if (error = copyin(args->status, &tmpstat, sizeof(int))) 997 return error; 998 if (WIFSIGNALED(tmpstat)) 999 tmpstat = (tmpstat & 0xffffff80) | 1000 bsd_to_linux_signal[WTERMSIG(tmpstat)]; 1001 else if (WIFSTOPPED(tmpstat)) 1002 tmpstat = (tmpstat & 0xffff00ff) | 1003 (bsd_to_linux_signal[WSTOPSIG(tmpstat)]<<8); 1004 return copyout(&tmpstat, args->status, sizeof(int)); 1005 } else 1006 return 0; 1007 } 1008 1009 int 1010 linux_wait4(struct proc *p, struct linux_wait4_args *args) 1011 { 1012 struct wait_args /* { 1013 int pid; 1014 int *status; 1015 int options; 1016 struct rusage *rusage; 1017 } */ tmp; 1018 int error, tmpstat; 1019 1020 #ifdef DEBUG 1021 printf("Linux-emul(%ld): wait4(%d, %p, %d, %p)\n", 1022 (long)p->p_pid, args->pid, (void *)args->status, args->options, 1023 (void *)args->rusage); 1024 #endif 1025 tmp.pid = args->pid; 1026 tmp.status = args->status; 1027 #ifndef COMPAT_LINUX_THREADS 1028 tmp.options = args->options; 1029 #else 1030 /* This filters out the linux option _WCLONE. I don't 1031 think we need it, but I could be wrong. If we need 1032 it, we need to fix wait4, since it will give us an 1033 error return of EINVAL if we pass in _WCLONE, and 1034 of course, it won't do anything with it. 1035 */ 1036 tmp.options = (args->options & (WNOHANG | WUNTRACED)); 1037 #endif /* COMPAT_LINUX_THREADS */ 1038 tmp.rusage = args->rusage; 1039 1040 if (error = wait4(p, &tmp)) 1041 return error; 1042 1043 p->p_siglist &= ~sigmask(SIGCHLD); 1044 1045 if (args->status) { 1046 if (error = copyin(args->status, &tmpstat, sizeof(int))) 1047 return error; 1048 if (WIFSIGNALED(tmpstat)) 1049 tmpstat = (tmpstat & 0xffffff80) | 1050 bsd_to_linux_signal[WTERMSIG(tmpstat)]; 1051 else if (WIFSTOPPED(tmpstat)) 1052 tmpstat = (tmpstat & 0xffff00ff) | 1053 (bsd_to_linux_signal[WSTOPSIG(tmpstat)]<<8); 1054 return copyout(&tmpstat, args->status, sizeof(int)); 1055 } else 1056 return 0; 1057 } 1058 1059 int 1060 linux_mknod(struct proc *p, struct linux_mknod_args *args) 1061 { 1062 caddr_t sg; 1063 struct mknod_args bsd_mknod; 1064 struct mkfifo_args bsd_mkfifo; 1065 1066 sg = stackgap_init(); 1067 1068 CHECKALTCREAT(p, &sg, args->path); 1069 1070 #ifdef DEBUG 1071 printf("Linux-emul(%d): mknod(%s, %d, %d)\n", 1072 p->p_pid, args->path, args->mode, args->dev); 1073 #endif 1074 1075 if (args->mode & S_IFIFO) { 1076 bsd_mkfifo.path = args->path; 1077 bsd_mkfifo.mode = args->mode; 1078 return mkfifo(p, &bsd_mkfifo); 1079 } else { 1080 bsd_mknod.path = args->path; 1081 bsd_mknod.mode = args->mode; 1082 bsd_mknod.dev = args->dev; 1083 return mknod(p, &bsd_mknod); 1084 } 1085 } 1086 1087 /* 1088 * UGH! This is just about the dumbest idea I've ever heard!! 1089 */ 1090 int 1091 linux_personality(struct proc *p, struct linux_personality_args *args) 1092 { 1093 #ifdef DEBUG 1094 printf("Linux-emul(%d): personality(%d)\n", 1095 p->p_pid, args->per); 1096 #endif 1097 if (args->per != 0) 1098 return EINVAL; 1099 1100 /* Yes Jim, it's still a Linux... */ 1101 p->p_retval[0] = 0; 1102 return 0; 1103 } 1104 1105 /* 1106 * Wrappers for get/setitimer for debugging.. 1107 */ 1108 int 1109 linux_setitimer(struct proc *p, struct linux_setitimer_args *args) 1110 { 1111 struct setitimer_args bsa; 1112 struct itimerval foo; 1113 int error; 1114 1115 #ifdef DEBUG 1116 printf("Linux-emul(%ld): setitimer(%p, %p)\n", 1117 (long)p->p_pid, (void *)args->itv, (void *)args->oitv); 1118 #endif 1119 bsa.which = args->which; 1120 bsa.itv = args->itv; 1121 bsa.oitv = args->oitv; 1122 if (args->itv) { 1123 if ((error = copyin((caddr_t)args->itv, (caddr_t)&foo, 1124 sizeof(foo)))) 1125 return error; 1126 #ifdef DEBUG 1127 printf("setitimer: value: sec: %ld, usec: %ld\n", 1128 foo.it_value.tv_sec, foo.it_value.tv_usec); 1129 printf("setitimer: interval: sec: %ld, usec: %ld\n", 1130 foo.it_interval.tv_sec, foo.it_interval.tv_usec); 1131 #endif 1132 } 1133 return setitimer(p, &bsa); 1134 } 1135 1136 int 1137 linux_getitimer(struct proc *p, struct linux_getitimer_args *args) 1138 { 1139 struct getitimer_args bsa; 1140 #ifdef DEBUG 1141 printf("Linux-emul(%ld): getitimer(%p)\n", 1142 (long)p->p_pid, (void *)args->itv); 1143 #endif 1144 bsa.which = args->which; 1145 bsa.itv = args->itv; 1146 return getitimer(p, &bsa); 1147 } 1148 1149 int 1150 linux_iopl(struct proc *p, struct linux_iopl_args *args) 1151 { 1152 int error; 1153 1154 error = suser(p->p_ucred, &p->p_acflag); 1155 if (error != 0) 1156 return error; 1157 if (securelevel > 0) 1158 return EPERM; 1159 p->p_md.md_regs->tf_eflags |= PSL_IOPL; 1160 return 0; 1161 } 1162 1163 int 1164 linux_nice(struct proc *p, struct linux_nice_args *args) 1165 { 1166 struct setpriority_args bsd_args; 1167 1168 bsd_args.which = PRIO_PROCESS; 1169 bsd_args.who = 0; /* current process */ 1170 bsd_args.prio = args->inc; 1171 return setpriority(p, &bsd_args); 1172 } 1173 1174 int 1175 linux_setgroups(p, uap) 1176 struct proc *p; 1177 struct linux_setgroups_args *uap; 1178 { 1179 struct pcred *pc = p->p_cred; 1180 linux_gid_t linux_gidset[NGROUPS]; 1181 gid_t *bsd_gidset; 1182 int ngrp, error; 1183 1184 if ((error = suser(pc->pc_ucred, &p->p_acflag))) 1185 return error; 1186 1187 if (uap->gidsetsize > NGROUPS) 1188 return EINVAL; 1189 1190 ngrp = uap->gidsetsize; 1191 pc->pc_ucred = crcopy(pc->pc_ucred); 1192 if (ngrp >= 1) { 1193 if ((error = copyin((caddr_t)uap->gidset, 1194 (caddr_t)linux_gidset, 1195 ngrp * sizeof(linux_gid_t)))) 1196 return error; 1197 1198 pc->pc_ucred->cr_ngroups = ngrp; 1199 1200 bsd_gidset = pc->pc_ucred->cr_groups; 1201 ngrp--; 1202 while (ngrp >= 0) { 1203 bsd_gidset[ngrp] = linux_gidset[ngrp]; 1204 ngrp--; 1205 } 1206 } 1207 else 1208 pc->pc_ucred->cr_ngroups = 1; 1209 1210 setsugid(p); 1211 return 0; 1212 } 1213 1214 int 1215 linux_getgroups(p, uap) 1216 struct proc *p; 1217 struct linux_getgroups_args *uap; 1218 { 1219 struct pcred *pc = p->p_cred; 1220 linux_gid_t linux_gidset[NGROUPS]; 1221 gid_t *bsd_gidset; 1222 int ngrp, error; 1223 1224 if ((ngrp = uap->gidsetsize) == 0) { 1225 p->p_retval[0] = pc->pc_ucred->cr_ngroups; 1226 return 0; 1227 } 1228 1229 if (ngrp < pc->pc_ucred->cr_ngroups) 1230 return EINVAL; 1231 1232 ngrp = 0; 1233 bsd_gidset = pc->pc_ucred->cr_groups; 1234 while (ngrp < pc->pc_ucred->cr_ngroups) { 1235 linux_gidset[ngrp] = bsd_gidset[ngrp]; 1236 ngrp++; 1237 } 1238 1239 if ((error = copyout((caddr_t)linux_gidset, (caddr_t)uap->gidset, 1240 ngrp * sizeof(linux_gid_t)))) 1241 return error; 1242 1243 p->p_retval[0] = ngrp; 1244 return (0); 1245 } 1246