1 /*- 2 * Copyright (c) 1994-1995 S�ren Schmidt 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer 10 * in this position and unchanged. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software withough specific prior written permission 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 * 28 * $FreeBSD$ 29 */ 30 31 #include "opt_compat.h" 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/sysproto.h> 36 #include <sys/kernel.h> 37 #include <sys/mman.h> 38 #include <sys/proc.h> 39 #include <sys/fcntl.h> 40 #include <sys/imgact_aout.h> 41 #include <sys/mount.h> 42 #include <sys/namei.h> 43 #include <sys/resourcevar.h> 44 #include <sys/stat.h> 45 #include <sys/sysctl.h> 46 #include <sys/unistd.h> 47 #include <sys/vnode.h> 48 #include <sys/wait.h> 49 #include <sys/time.h> 50 51 #include <vm/vm.h> 52 #include <vm/pmap.h> 53 #include <vm/vm_kern.h> 54 #include <vm/vm_prot.h> 55 #include <vm/vm_map.h> 56 #include <vm/vm_extern.h> 57 58 #include <machine/frame.h> 59 #include <machine/psl.h> 60 61 #include <i386/linux/linux.h> 62 #include <i386/linux/linux_proto.h> 63 #include <i386/linux/linux_util.h> 64 #include <i386/linux/linux_mib.h> 65 66 #include <posix4/sched.h> 67 68 static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] = 69 { RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK, 70 RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE, 71 RLIMIT_MEMLOCK, -1 72 }; 73 74 int 75 linux_alarm(struct proc *p, struct linux_alarm_args *args) 76 { 77 struct itimerval it, old_it; 78 struct timeval tv; 79 int s; 80 81 #ifdef DEBUG 82 printf("Linux-emul(%ld): alarm(%u)\n", (long)p->p_pid, args->secs); 83 #endif 84 if (args->secs > 100000000) 85 return EINVAL; 86 it.it_value.tv_sec = (long)args->secs; 87 it.it_value.tv_usec = 0; 88 it.it_interval.tv_sec = 0; 89 it.it_interval.tv_usec = 0; 90 s = splsoftclock(); 91 old_it = p->p_realtimer; 92 getmicrouptime(&tv); 93 if (timevalisset(&old_it.it_value)) 94 untimeout(realitexpire, (caddr_t)p, p->p_ithandle); 95 if (it.it_value.tv_sec != 0) { 96 p->p_ithandle = timeout(realitexpire, (caddr_t)p, tvtohz(&it.it_value)); 97 timevaladd(&it.it_value, &tv); 98 } 99 p->p_realtimer = it; 100 splx(s); 101 if (timevalcmp(&old_it.it_value, &tv, >)) { 102 timevalsub(&old_it.it_value, &tv); 103 if (old_it.it_value.tv_usec != 0) 104 old_it.it_value.tv_sec++; 105 p->p_retval[0] = old_it.it_value.tv_sec; 106 } 107 return 0; 108 } 109 110 int 111 linux_brk(struct proc *p, struct linux_brk_args *args) 112 { 113 #if 0 114 struct vmspace *vm = p->p_vmspace; 115 vm_offset_t new, old; 116 int error; 117 118 if ((vm_offset_t)args->dsend < (vm_offset_t)vm->vm_daddr) 119 return EINVAL; 120 if (((caddr_t)args->dsend - (caddr_t)vm->vm_daddr) 121 > p->p_rlimit[RLIMIT_DATA].rlim_cur) 122 return ENOMEM; 123 124 old = round_page((vm_offset_t)vm->vm_daddr) + ctob(vm->vm_dsize); 125 new = round_page((vm_offset_t)args->dsend); 126 p->p_retval[0] = old; 127 if ((new-old) > 0) { 128 if (swap_pager_full) 129 return ENOMEM; 130 error = vm_map_find(&vm->vm_map, NULL, 0, &old, (new-old), FALSE, 131 VM_PROT_ALL, VM_PROT_ALL, 0); 132 if (error) 133 return error; 134 vm->vm_dsize += btoc((new-old)); 135 p->p_retval[0] = (int)(vm->vm_daddr + ctob(vm->vm_dsize)); 136 } 137 return 0; 138 #else 139 struct vmspace *vm = p->p_vmspace; 140 vm_offset_t new, old; 141 struct obreak_args /* { 142 char * nsize; 143 } */ tmp; 144 145 #ifdef DEBUG 146 printf("Linux-emul(%ld): brk(%p)\n", (long)p->p_pid, (void *)args->dsend); 147 #endif 148 old = (vm_offset_t)vm->vm_daddr + ctob(vm->vm_dsize); 149 new = (vm_offset_t)args->dsend; 150 tmp.nsize = (char *) new; 151 if (((caddr_t)new > vm->vm_daddr) && !obreak(p, &tmp)) 152 p->p_retval[0] = (int)new; 153 else 154 p->p_retval[0] = (int)old; 155 156 return 0; 157 #endif 158 } 159 160 int 161 linux_uselib(struct proc *p, struct linux_uselib_args *args) 162 { 163 struct nameidata ni; 164 struct vnode *vp; 165 struct exec *a_out; 166 struct vattr attr; 167 vm_offset_t vmaddr; 168 unsigned long file_offset; 169 vm_offset_t buffer; 170 unsigned long bss_size; 171 int error; 172 caddr_t sg; 173 int locked; 174 175 sg = stackgap_init(); 176 CHECKALTEXIST(p, &sg, args->library); 177 178 #ifdef DEBUG 179 printf("Linux-emul(%ld): uselib(%s)\n", (long)p->p_pid, args->library); 180 #endif 181 182 a_out = NULL; 183 locked = 0; 184 vp = NULL; 185 186 NDINIT(&ni, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, args->library, p); 187 error = namei(&ni); 188 if (error) 189 goto cleanup; 190 191 vp = ni.ni_vp; 192 if (vp == NULL) { 193 error = ENOEXEC; /* ?? */ 194 goto cleanup; 195 } 196 197 /* 198 * From here on down, we have a locked vnode that must be unlocked. 199 */ 200 locked++; 201 202 /* 203 * Writable? 204 */ 205 if (vp->v_writecount) { 206 error = ETXTBSY; 207 goto cleanup; 208 } 209 210 /* 211 * Executable? 212 */ 213 error = VOP_GETATTR(vp, &attr, p->p_ucred, p); 214 if (error) 215 goto cleanup; 216 217 if ((vp->v_mount->mnt_flag & MNT_NOEXEC) || 218 ((attr.va_mode & 0111) == 0) || 219 (attr.va_type != VREG)) { 220 error = ENOEXEC; 221 goto cleanup; 222 } 223 224 /* 225 * Sensible size? 226 */ 227 if (attr.va_size == 0) { 228 error = ENOEXEC; 229 goto cleanup; 230 } 231 232 /* 233 * Can we access it? 234 */ 235 error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p); 236 if (error) 237 goto cleanup; 238 239 error = VOP_OPEN(vp, FREAD, p->p_ucred, p); 240 if (error) 241 goto cleanup; 242 243 /* 244 * Lock no longer needed 245 */ 246 VOP_UNLOCK(vp, 0, p); 247 locked = 0; 248 249 /* 250 * Pull in executable header into kernel_map 251 */ 252 error = vm_mmap(kernel_map, (vm_offset_t *)&a_out, PAGE_SIZE, 253 VM_PROT_READ, VM_PROT_READ, 0, (caddr_t)vp, 0); 254 if (error) 255 goto cleanup; 256 257 /* 258 * Is it a Linux binary ? 259 */ 260 if (((a_out->a_magic >> 16) & 0xff) != 0x64) { 261 error = ENOEXEC; 262 goto cleanup; 263 } 264 265 /* While we are here, we should REALLY do some more checks */ 266 267 /* 268 * Set file/virtual offset based on a.out variant. 269 */ 270 switch ((int)(a_out->a_magic & 0xffff)) { 271 case 0413: /* ZMAGIC */ 272 file_offset = 1024; 273 break; 274 case 0314: /* QMAGIC */ 275 file_offset = 0; 276 break; 277 default: 278 error = ENOEXEC; 279 goto cleanup; 280 } 281 282 bss_size = round_page(a_out->a_bss); 283 284 /* 285 * Check various fields in header for validity/bounds. 286 */ 287 if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) { 288 error = ENOEXEC; 289 goto cleanup; 290 } 291 292 /* text + data can't exceed file size */ 293 if (a_out->a_data + a_out->a_text > attr.va_size) { 294 error = EFAULT; 295 goto cleanup; 296 } 297 298 /* 299 * text/data/bss must not exceed limits 300 * XXX: this is not complete. it should check current usage PLUS 301 * the resources needed by this library. 302 */ 303 if (a_out->a_text > MAXTSIZ || 304 a_out->a_data + bss_size > p->p_rlimit[RLIMIT_DATA].rlim_cur) { 305 error = ENOMEM; 306 goto cleanup; 307 } 308 309 /* 310 * prevent more writers 311 */ 312 vp->v_flag |= VTEXT; 313 314 /* 315 * Check if file_offset page aligned,. 316 * Currently we cannot handle misalinged file offsets, 317 * and so we read in the entire image (what a waste). 318 */ 319 if (file_offset & PAGE_MASK) { 320 #ifdef DEBUG 321 printf("uselib: Non page aligned binary %lu\n", file_offset); 322 #endif 323 /* 324 * Map text+data read/write/execute 325 */ 326 327 /* a_entry is the load address and is page aligned */ 328 vmaddr = trunc_page(a_out->a_entry); 329 330 /* get anon user mapping, read+write+execute */ 331 error = vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &vmaddr, 332 a_out->a_text + a_out->a_data, FALSE, 333 VM_PROT_ALL, VM_PROT_ALL, 0); 334 if (error) 335 goto cleanup; 336 337 /* map file into kernel_map */ 338 error = vm_mmap(kernel_map, &buffer, 339 round_page(a_out->a_text + a_out->a_data + file_offset), 340 VM_PROT_READ, VM_PROT_READ, 0, 341 (caddr_t)vp, trunc_page(file_offset)); 342 if (error) 343 goto cleanup; 344 345 /* copy from kernel VM space to user space */ 346 error = copyout((caddr_t)(void *)(uintptr_t)(buffer + file_offset), 347 (caddr_t)vmaddr, a_out->a_text + a_out->a_data); 348 349 /* release temporary kernel space */ 350 vm_map_remove(kernel_map, buffer, 351 buffer + round_page(a_out->a_text + a_out->a_data + file_offset)); 352 353 if (error) 354 goto cleanup; 355 } 356 else { 357 #ifdef DEBUG 358 printf("uselib: Page aligned binary %lu\n", file_offset); 359 #endif 360 /* 361 * for QMAGIC, a_entry is 20 bytes beyond the load address 362 * to skip the executable header 363 */ 364 vmaddr = trunc_page(a_out->a_entry); 365 366 /* 367 * Map it all into the process's space as a single copy-on-write 368 * "data" segment. 369 */ 370 error = vm_mmap(&p->p_vmspace->vm_map, &vmaddr, 371 a_out->a_text + a_out->a_data, 372 VM_PROT_ALL, VM_PROT_ALL, MAP_PRIVATE | MAP_FIXED, 373 (caddr_t)vp, file_offset); 374 if (error) 375 goto cleanup; 376 } 377 #ifdef DEBUG 378 printf("mem=%08x = %08x %08x\n", vmaddr, ((int*)vmaddr)[0], ((int*)vmaddr)[1]); 379 #endif 380 if (bss_size != 0) { 381 /* 382 * Calculate BSS start address 383 */ 384 vmaddr = trunc_page(a_out->a_entry) + a_out->a_text + a_out->a_data; 385 386 /* 387 * allocate some 'anon' space 388 */ 389 error = vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &vmaddr, 390 bss_size, FALSE, 391 VM_PROT_ALL, VM_PROT_ALL, 0); 392 if (error) 393 goto cleanup; 394 } 395 396 cleanup: 397 /* 398 * Unlock vnode if needed 399 */ 400 if (locked) 401 VOP_UNLOCK(vp, 0, p); 402 403 /* 404 * Release the kernel mapping. 405 */ 406 if (a_out) 407 vm_map_remove(kernel_map, (vm_offset_t)a_out, (vm_offset_t)a_out + PAGE_SIZE); 408 409 return error; 410 } 411 412 /* XXX move */ 413 struct linux_select_argv { 414 int nfds; 415 fd_set *readfds; 416 fd_set *writefds; 417 fd_set *exceptfds; 418 struct timeval *timeout; 419 }; 420 421 int 422 linux_select(struct proc *p, struct linux_select_args *args) 423 { 424 struct linux_select_argv linux_args; 425 struct linux_newselect_args newsel; 426 int error; 427 428 #ifdef SELECT_DEBUG 429 printf("Linux-emul(%ld): select(%x)\n", (long)p->p_pid, args->ptr); 430 #endif 431 if ((error = copyin((caddr_t)args->ptr, (caddr_t)&linux_args, 432 sizeof(linux_args)))) 433 return error; 434 435 newsel.nfds = linux_args.nfds; 436 newsel.readfds = linux_args.readfds; 437 newsel.writefds = linux_args.writefds; 438 newsel.exceptfds = linux_args.exceptfds; 439 newsel.timeout = linux_args.timeout; 440 441 return linux_newselect(p, &newsel); 442 } 443 444 int 445 linux_newselect(struct proc *p, struct linux_newselect_args *args) 446 { 447 struct select_args bsa; 448 struct timeval tv0, tv1, utv, *tvp; 449 caddr_t sg; 450 int error; 451 452 #ifdef DEBUG 453 printf("Linux-emul(%ld): newselect(%d, %p, %p, %p, %p)\n", 454 (long)p->p_pid, args->nfds, (void *)args->readfds, 455 (void *)args->writefds, (void *)args->exceptfds, 456 (void *)args->timeout); 457 #endif 458 error = 0; 459 bsa.nd = args->nfds; 460 bsa.in = args->readfds; 461 bsa.ou = args->writefds; 462 bsa.ex = args->exceptfds; 463 bsa.tv = args->timeout; 464 465 /* 466 * Store current time for computation of the amount of 467 * time left. 468 */ 469 if (args->timeout) { 470 if ((error = copyin(args->timeout, &utv, sizeof(utv)))) 471 goto select_out; 472 #ifdef DEBUG 473 printf("Linux-emul(%ld): incoming timeout (%ld/%ld)\n", 474 (long)p->p_pid, utv.tv_sec, utv.tv_usec); 475 #endif 476 if (itimerfix(&utv)) { 477 /* 478 * The timeval was invalid. Convert it to something 479 * valid that will act as it does under Linux. 480 */ 481 sg = stackgap_init(); 482 tvp = stackgap_alloc(&sg, sizeof(utv)); 483 utv.tv_sec += utv.tv_usec / 1000000; 484 utv.tv_usec %= 1000000; 485 if (utv.tv_usec < 0) { 486 utv.tv_sec -= 1; 487 utv.tv_usec += 1000000; 488 } 489 if (utv.tv_sec < 0) 490 timevalclear(&utv); 491 if ((error = copyout(&utv, tvp, sizeof(utv)))) 492 goto select_out; 493 bsa.tv = tvp; 494 } 495 microtime(&tv0); 496 } 497 498 error = select(p, &bsa); 499 #ifdef DEBUG 500 printf("Linux-emul(%ld): real select returns %d\n", (long)p->p_pid, error); 501 #endif 502 503 if (error) { 504 /* 505 * See fs/select.c in the Linux kernel. Without this, 506 * Maelstrom doesn't work. 507 */ 508 if (error == ERESTART) 509 error = EINTR; 510 goto select_out; 511 } 512 513 if (args->timeout) { 514 if (p->p_retval[0]) { 515 /* 516 * Compute how much time was left of the timeout, 517 * by subtracting the current time and the time 518 * before we started the call, and subtracting 519 * that result from the user-supplied value. 520 */ 521 microtime(&tv1); 522 timevalsub(&tv1, &tv0); 523 timevalsub(&utv, &tv1); 524 if (utv.tv_sec < 0) 525 timevalclear(&utv); 526 } else 527 timevalclear(&utv); 528 #ifdef DEBUG 529 printf("Linux-emul(%ld): outgoing timeout (%ld/%ld)\n", 530 (long)p->p_pid, utv.tv_sec, utv.tv_usec); 531 #endif 532 if ((error = copyout(&utv, args->timeout, sizeof(utv)))) 533 goto select_out; 534 } 535 536 select_out: 537 #ifdef DEBUG 538 printf("Linux-emul(%ld): newselect_out -> %d\n", (long)p->p_pid, error); 539 #endif 540 return error; 541 } 542 543 int 544 linux_getpgid(struct proc *p, struct linux_getpgid_args *args) 545 { 546 struct proc *curp; 547 548 #ifdef DEBUG 549 printf("Linux-emul(%ld): getpgid(%d)\n", (long)p->p_pid, args->pid); 550 #endif 551 if (args->pid != p->p_pid) { 552 if (!(curp = pfind(args->pid))) 553 return ESRCH; 554 } 555 else 556 curp = p; 557 p->p_retval[0] = curp->p_pgid; 558 return 0; 559 } 560 561 int 562 linux_fork(struct proc *p, struct linux_fork_args *args) 563 { 564 int error; 565 566 #ifdef DEBUG 567 printf("Linux-emul(%ld): fork()\n", (long)p->p_pid); 568 #endif 569 if ((error = fork(p, (struct fork_args *)args)) != 0) 570 return error; 571 if (p->p_retval[1] == 1) 572 p->p_retval[0] = 0; 573 return 0; 574 } 575 576 int 577 linux_vfork(struct proc *p, struct linux_vfork_args *args) 578 { 579 int error; 580 581 #ifdef DEBUG 582 printf("Linux-emul(%ld): vfork()\n", (long)p->p_pid); 583 #endif 584 585 if ((error = vfork(p, (struct vfork_args *)args)) != 0) 586 return error; 587 /* Are we the child? */ 588 if (p->p_retval[1] == 1) 589 p->p_retval[0] = 0; 590 return 0; 591 } 592 593 #define CLONE_VM 0x100 594 #define CLONE_FS 0x200 595 #define CLONE_FILES 0x400 596 #define CLONE_SIGHAND 0x800 597 #define CLONE_PID 0x1000 598 599 int 600 linux_clone(struct proc *p, struct linux_clone_args *args) 601 { 602 int error, ff = RFPROC; 603 struct proc *p2; 604 int exit_signal; 605 vm_offset_t start; 606 struct rfork_args rf_args; 607 608 #ifdef DEBUG 609 if (args->flags & CLONE_PID) 610 printf("linux_clone(%ld): CLONE_PID not yet supported\n", 611 (long)p->p_pid); 612 printf("linux_clone(%ld): invoked with flags %x and stack %x\n", 613 (long)p->p_pid, (unsigned int)args->flags, 614 (unsigned int)args->stack); 615 #endif 616 617 if (!args->stack) 618 return (EINVAL); 619 620 exit_signal = args->flags & 0x000000ff; 621 if (exit_signal >= LINUX_NSIG) 622 return EINVAL; 623 exit_signal = linux_to_bsd_signal[exit_signal]; 624 625 /* RFTHREAD probably not necessary here, but it shouldn't hurt either */ 626 ff |= RFTHREAD; 627 628 if (args->flags & CLONE_VM) 629 ff |= RFMEM; 630 if (args->flags & CLONE_SIGHAND) 631 ff |= RFSIGSHARE; 632 if (!(args->flags & CLONE_FILES)) 633 ff |= RFFDG; 634 635 error = 0; 636 start = 0; 637 638 rf_args.flags = ff; 639 if ((error = rfork(p, &rf_args)) != 0) 640 return error; 641 642 p2 = pfind(p->p_retval[0]); 643 if (p2 == 0) 644 return ESRCH; 645 646 p2->p_sigparent = exit_signal; 647 p2->p_md.md_regs->tf_esp = (unsigned int)args->stack; 648 649 #ifdef DEBUG 650 printf ("linux_clone(%ld): successful rfork to %ld\n", 651 (long)p->p_pid, (long)p2->p_pid); 652 #endif 653 return 0; 654 } 655 656 /* XXX move */ 657 struct linux_mmap_argv { 658 linux_caddr_t addr; 659 int len; 660 int prot; 661 int flags; 662 int fd; 663 int pos; 664 }; 665 666 #define STACK_SIZE (2 * 1024 * 1024) 667 #define GUARD_SIZE (4 * PAGE_SIZE) 668 int 669 linux_mmap(struct proc *p, struct linux_mmap_args *args) 670 { 671 struct mmap_args /* { 672 caddr_t addr; 673 size_t len; 674 int prot; 675 int flags; 676 int fd; 677 long pad; 678 off_t pos; 679 } */ bsd_args; 680 int error; 681 struct linux_mmap_argv linux_args; 682 683 if ((error = copyin((caddr_t)args->ptr, (caddr_t)&linux_args, 684 sizeof(linux_args)))) 685 return error; 686 #ifdef DEBUG 687 printf("Linux-emul(%ld): mmap(%p, %d, %d, %08x, %d, %d)\n", 688 (long)p->p_pid, (void *)linux_args.addr, linux_args.len, 689 linux_args.prot, linux_args.flags, linux_args.fd, linux_args.pos); 690 #endif 691 bsd_args.flags = 0; 692 if (linux_args.flags & LINUX_MAP_SHARED) 693 bsd_args.flags |= MAP_SHARED; 694 if (linux_args.flags & LINUX_MAP_PRIVATE) 695 bsd_args.flags |= MAP_PRIVATE; 696 if (linux_args.flags & LINUX_MAP_FIXED) 697 bsd_args.flags |= MAP_FIXED; 698 if (linux_args.flags & LINUX_MAP_ANON) 699 bsd_args.flags |= MAP_ANON; 700 if (linux_args.flags & LINUX_MAP_GROWSDOWN) { 701 bsd_args.flags |= MAP_STACK; 702 703 /* The linux MAP_GROWSDOWN option does not limit auto 704 * growth of the region. Linux mmap with this option 705 * takes as addr the inital BOS, and as len, the initial 706 * region size. It can then grow down from addr without 707 * limit. However, linux threads has an implicit internal 708 * limit to stack size of STACK_SIZE. Its just not 709 * enforced explicitly in linux. But, here we impose 710 * a limit of (STACK_SIZE - GUARD_SIZE) on the stack 711 * region, since we can do this with our mmap. 712 * 713 * Our mmap with MAP_STACK takes addr as the maximum 714 * downsize limit on BOS, and as len the max size of 715 * the region. It them maps the top SGROWSIZ bytes, 716 * and autgrows the region down, up to the limit 717 * in addr. 718 * 719 * If we don't use the MAP_STACK option, the effect 720 * of this code is to allocate a stack region of a 721 * fixed size of (STACK_SIZE - GUARD_SIZE). 722 */ 723 724 /* This gives us TOS */ 725 bsd_args.addr = linux_args.addr + linux_args.len; 726 727 /* This gives us our maximum stack size */ 728 if (linux_args.len > STACK_SIZE - GUARD_SIZE) 729 bsd_args.len = linux_args.len; 730 else 731 bsd_args.len = STACK_SIZE - GUARD_SIZE; 732 733 /* This gives us a new BOS. If we're using VM_STACK, then 734 * mmap will just map the top SGROWSIZ bytes, and let 735 * the stack grow down to the limit at BOS. If we're 736 * not using VM_STACK we map the full stack, since we 737 * don't have a way to autogrow it. 738 */ 739 bsd_args.addr -= bsd_args.len; 740 741 } else { 742 bsd_args.addr = linux_args.addr; 743 bsd_args.len = linux_args.len; 744 } 745 746 bsd_args.prot = linux_args.prot | PROT_READ; /* always required */ 747 bsd_args.fd = linux_args.fd; 748 bsd_args.pos = linux_args.pos; 749 bsd_args.pad = 0; 750 return mmap(p, &bsd_args); 751 } 752 753 int 754 linux_mremap(struct proc *p, struct linux_mremap_args *args) 755 { 756 struct munmap_args /* { 757 void *addr; 758 size_t len; 759 } */ bsd_args; 760 int error = 0; 761 762 #ifdef DEBUG 763 printf("Linux-emul(%ld): mremap(%p, %08x, %08x, %08x)\n", 764 (long)p->p_pid, (void *)args->addr, args->old_len, args->new_len, 765 args->flags); 766 #endif 767 args->new_len = round_page(args->new_len); 768 args->old_len = round_page(args->old_len); 769 770 if (args->new_len > args->old_len) { 771 p->p_retval[0] = 0; 772 return ENOMEM; 773 } 774 775 if (args->new_len < args->old_len) { 776 bsd_args.addr = args->addr + args->new_len; 777 bsd_args.len = args->old_len - args->new_len; 778 error = munmap(p, &bsd_args); 779 } 780 781 p->p_retval[0] = error ? 0 : (int)args->addr; 782 return error; 783 } 784 785 int 786 linux_msync(struct proc *p, struct linux_msync_args *args) 787 { 788 struct msync_args bsd_args; 789 790 bsd_args.addr = args->addr; 791 bsd_args.len = args->len; 792 bsd_args.flags = 0; /* XXX ignore */ 793 794 return msync(p, &bsd_args); 795 } 796 797 int 798 linux_pipe(struct proc *p, struct linux_pipe_args *args) 799 { 800 int error; 801 int reg_edx; 802 803 #ifdef DEBUG 804 printf("Linux-emul(%ld): pipe(*)\n", (long)p->p_pid); 805 #endif 806 reg_edx = p->p_retval[1]; 807 error = pipe(p, 0); 808 if (error) { 809 p->p_retval[1] = reg_edx; 810 return error; 811 } 812 813 error = copyout(p->p_retval, args->pipefds, 2*sizeof(int)); 814 if (error) { 815 p->p_retval[1] = reg_edx; 816 return error; 817 } 818 819 p->p_retval[1] = reg_edx; 820 p->p_retval[0] = 0; 821 return 0; 822 } 823 824 int 825 linux_time(struct proc *p, struct linux_time_args *args) 826 { 827 struct timeval tv; 828 linux_time_t tm; 829 int error; 830 831 #ifdef DEBUG 832 printf("Linux-emul(%ld): time(*)\n", (long)p->p_pid); 833 #endif 834 microtime(&tv); 835 tm = tv.tv_sec; 836 if (args->tm && (error = copyout(&tm, args->tm, sizeof(linux_time_t)))) 837 return error; 838 p->p_retval[0] = tm; 839 return 0; 840 } 841 842 struct linux_times_argv { 843 long tms_utime; 844 long tms_stime; 845 long tms_cutime; 846 long tms_cstime; 847 }; 848 849 #define CLK_TCK 100 /* Linux uses 100 */ 850 #define CONVTCK(r) (r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK)) 851 852 int 853 linux_times(struct proc *p, struct linux_times_args *args) 854 { 855 struct timeval tv; 856 struct linux_times_argv tms; 857 struct rusage ru; 858 int error; 859 860 #ifdef DEBUG 861 printf("Linux-emul(%ld): times(*)\n", (long)p->p_pid); 862 #endif 863 calcru(p, &ru.ru_utime, &ru.ru_stime, NULL); 864 865 tms.tms_utime = CONVTCK(ru.ru_utime); 866 tms.tms_stime = CONVTCK(ru.ru_stime); 867 868 tms.tms_cutime = CONVTCK(p->p_stats->p_cru.ru_utime); 869 tms.tms_cstime = CONVTCK(p->p_stats->p_cru.ru_stime); 870 871 if ((error = copyout((caddr_t)&tms, (caddr_t)args->buf, 872 sizeof(struct linux_times_argv)))) 873 return error; 874 875 microuptime(&tv); 876 p->p_retval[0] = (int)CONVTCK(tv); 877 return 0; 878 } 879 880 int 881 linux_newuname(struct proc *p, struct linux_newuname_args *args) 882 { 883 struct linux_new_utsname utsname; 884 char *osrelease, *osname; 885 886 #ifdef DEBUG 887 printf("Linux-emul(%ld): newuname(*)\n", (long)p->p_pid); 888 #endif 889 890 osname = linux_get_osname(p); 891 osrelease = linux_get_osrelease(p); 892 893 bzero(&utsname, sizeof(struct linux_new_utsname)); 894 strncpy(utsname.sysname, osname, LINUX_MAX_UTSNAME-1); 895 strncpy(utsname.nodename, hostname, LINUX_MAX_UTSNAME-1); 896 strncpy(utsname.release, osrelease, LINUX_MAX_UTSNAME-1); 897 strncpy(utsname.version, version, LINUX_MAX_UTSNAME-1); 898 strncpy(utsname.machine, machine, LINUX_MAX_UTSNAME-1); 899 strncpy(utsname.domainname, domainname, LINUX_MAX_UTSNAME-1); 900 901 return (copyout((caddr_t)&utsname, (caddr_t)args->buf, 902 sizeof(struct linux_new_utsname))); 903 } 904 905 struct linux_utimbuf { 906 linux_time_t l_actime; 907 linux_time_t l_modtime; 908 }; 909 910 int 911 linux_utime(struct proc *p, struct linux_utime_args *args) 912 { 913 struct utimes_args /* { 914 char *path; 915 struct timeval *tptr; 916 } */ bsdutimes; 917 struct timeval tv[2], *tvp; 918 struct linux_utimbuf lut; 919 int error; 920 caddr_t sg; 921 922 sg = stackgap_init(); 923 CHECKALTEXIST(p, &sg, args->fname); 924 925 #ifdef DEBUG 926 printf("Linux-emul(%ld): utime(%s, *)\n", (long)p->p_pid, args->fname); 927 #endif 928 if (args->times) { 929 if ((error = copyin(args->times, &lut, sizeof lut))) 930 return error; 931 tv[0].tv_sec = lut.l_actime; 932 tv[0].tv_usec = 0; 933 tv[1].tv_sec = lut.l_modtime; 934 tv[1].tv_usec = 0; 935 /* so that utimes can copyin */ 936 tvp = (struct timeval *)stackgap_alloc(&sg, sizeof(tv)); 937 if ((error = copyout(tv, tvp, sizeof(tv)))) 938 return error; 939 bsdutimes.tptr = tvp; 940 } else 941 bsdutimes.tptr = NULL; 942 943 bsdutimes.path = args->fname; 944 return utimes(p, &bsdutimes); 945 } 946 947 #define __WCLONE 0x80000000 948 949 int 950 linux_waitpid(struct proc *p, struct linux_waitpid_args *args) 951 { 952 struct wait_args /* { 953 int pid; 954 int *status; 955 int options; 956 struct rusage *rusage; 957 } */ tmp; 958 int error, tmpstat; 959 960 #ifdef DEBUG 961 printf("Linux-emul(%ld): waitpid(%d, %p, %d)\n", 962 (long)p->p_pid, args->pid, (void *)args->status, args->options); 963 #endif 964 tmp.pid = args->pid; 965 tmp.status = args->status; 966 tmp.options = (args->options & (WNOHANG | WUNTRACED)); 967 /* WLINUXCLONE should be equal to __WCLONE, but we make sure */ 968 if (args->options & __WCLONE) 969 tmp.options |= WLINUXCLONE; 970 tmp.rusage = NULL; 971 972 if ((error = wait4(p, &tmp)) != 0) 973 return error; 974 975 if (args->status) { 976 if ((error = copyin(args->status, &tmpstat, sizeof(int))) != 0) 977 return error; 978 if (WIFSIGNALED(tmpstat)) 979 tmpstat = (tmpstat & 0xffffff80) | 980 bsd_to_linux_signal[WTERMSIG(tmpstat)]; 981 else if (WIFSTOPPED(tmpstat)) 982 tmpstat = (tmpstat & 0xffff00ff) | 983 (bsd_to_linux_signal[WSTOPSIG(tmpstat)]<<8); 984 return copyout(&tmpstat, args->status, sizeof(int)); 985 } else 986 return 0; 987 } 988 989 int 990 linux_wait4(struct proc *p, struct linux_wait4_args *args) 991 { 992 struct wait_args /* { 993 int pid; 994 int *status; 995 int options; 996 struct rusage *rusage; 997 } */ tmp; 998 int error, tmpstat; 999 1000 #ifdef DEBUG 1001 printf("Linux-emul(%ld): wait4(%d, %p, %d, %p)\n", 1002 (long)p->p_pid, args->pid, (void *)args->status, args->options, 1003 (void *)args->rusage); 1004 #endif 1005 tmp.pid = args->pid; 1006 tmp.status = args->status; 1007 tmp.options = (args->options & (WNOHANG | WUNTRACED)); 1008 /* WLINUXCLONE should be equal to __WCLONE, but we make sure */ 1009 if (args->options & __WCLONE) 1010 tmp.options |= WLINUXCLONE; 1011 tmp.rusage = args->rusage; 1012 1013 if ((error = wait4(p, &tmp)) != 0) 1014 return error; 1015 1016 p->p_siglist &= ~sigmask(SIGCHLD); 1017 1018 if (args->status) { 1019 if ((error = copyin(args->status, &tmpstat, sizeof(int))) != 0) 1020 return error; 1021 if (WIFSIGNALED(tmpstat)) 1022 tmpstat = (tmpstat & 0xffffff80) | 1023 bsd_to_linux_signal[WTERMSIG(tmpstat)]; 1024 else if (WIFSTOPPED(tmpstat)) 1025 tmpstat = (tmpstat & 0xffff00ff) | 1026 (bsd_to_linux_signal[WSTOPSIG(tmpstat)]<<8); 1027 return copyout(&tmpstat, args->status, sizeof(int)); 1028 } else 1029 return 0; 1030 } 1031 1032 int 1033 linux_mknod(struct proc *p, struct linux_mknod_args *args) 1034 { 1035 caddr_t sg; 1036 struct mknod_args bsd_mknod; 1037 struct mkfifo_args bsd_mkfifo; 1038 1039 sg = stackgap_init(); 1040 1041 CHECKALTCREAT(p, &sg, args->path); 1042 1043 #ifdef DEBUG 1044 printf("Linux-emul(%ld): mknod(%s, %d, %d)\n", 1045 (long)p->p_pid, args->path, args->mode, args->dev); 1046 #endif 1047 1048 if (args->mode & S_IFIFO) { 1049 bsd_mkfifo.path = args->path; 1050 bsd_mkfifo.mode = args->mode; 1051 return mkfifo(p, &bsd_mkfifo); 1052 } else { 1053 bsd_mknod.path = args->path; 1054 bsd_mknod.mode = args->mode; 1055 bsd_mknod.dev = args->dev; 1056 return mknod(p, &bsd_mknod); 1057 } 1058 } 1059 1060 /* 1061 * UGH! This is just about the dumbest idea I've ever heard!! 1062 */ 1063 int 1064 linux_personality(struct proc *p, struct linux_personality_args *args) 1065 { 1066 #ifdef DEBUG 1067 printf("Linux-emul(%ld): personality(%d)\n", 1068 (long)p->p_pid, args->per); 1069 #endif 1070 if (args->per != 0) 1071 return EINVAL; 1072 1073 /* Yes Jim, it's still a Linux... */ 1074 p->p_retval[0] = 0; 1075 return 0; 1076 } 1077 1078 /* 1079 * Wrappers for get/setitimer for debugging.. 1080 */ 1081 int 1082 linux_setitimer(struct proc *p, struct linux_setitimer_args *args) 1083 { 1084 struct setitimer_args bsa; 1085 struct itimerval foo; 1086 int error; 1087 1088 #ifdef DEBUG 1089 printf("Linux-emul(%ld): setitimer(%p, %p)\n", 1090 (long)p->p_pid, (void *)args->itv, (void *)args->oitv); 1091 #endif 1092 bsa.which = args->which; 1093 bsa.itv = args->itv; 1094 bsa.oitv = args->oitv; 1095 if (args->itv) { 1096 if ((error = copyin((caddr_t)args->itv, (caddr_t)&foo, 1097 sizeof(foo)))) 1098 return error; 1099 #ifdef DEBUG 1100 printf("setitimer: value: sec: %ld, usec: %ld\n", 1101 foo.it_value.tv_sec, foo.it_value.tv_usec); 1102 printf("setitimer: interval: sec: %ld, usec: %ld\n", 1103 foo.it_interval.tv_sec, foo.it_interval.tv_usec); 1104 #endif 1105 } 1106 return setitimer(p, &bsa); 1107 } 1108 1109 int 1110 linux_getitimer(struct proc *p, struct linux_getitimer_args *args) 1111 { 1112 struct getitimer_args bsa; 1113 #ifdef DEBUG 1114 printf("Linux-emul(%ld): getitimer(%p)\n", 1115 (long)p->p_pid, (void *)args->itv); 1116 #endif 1117 bsa.which = args->which; 1118 bsa.itv = args->itv; 1119 return getitimer(p, &bsa); 1120 } 1121 1122 int 1123 linux_iopl(struct proc *p, struct linux_iopl_args *args) 1124 { 1125 int error; 1126 1127 error = suser(p); 1128 if (error != 0) 1129 return error; 1130 if (securelevel > 0) 1131 return EPERM; 1132 p->p_md.md_regs->tf_eflags |= PSL_IOPL; 1133 return 0; 1134 } 1135 1136 int 1137 linux_nice(struct proc *p, struct linux_nice_args *args) 1138 { 1139 struct setpriority_args bsd_args; 1140 1141 bsd_args.which = PRIO_PROCESS; 1142 bsd_args.who = 0; /* current process */ 1143 bsd_args.prio = args->inc; 1144 return setpriority(p, &bsd_args); 1145 } 1146 1147 int 1148 linux_setgroups(p, uap) 1149 struct proc *p; 1150 struct linux_setgroups_args *uap; 1151 { 1152 struct pcred *pc; 1153 linux_gid_t linux_gidset[NGROUPS]; 1154 gid_t *bsd_gidset; 1155 int ngrp, error; 1156 1157 pc = p->p_cred; 1158 ngrp = uap->gidsetsize; 1159 1160 /* 1161 * cr_groups[0] holds egid. Setting the whole set from 1162 * the supplied set will cause egid to be changed too. 1163 * Keep cr_groups[0] unchanged to prevent that. 1164 */ 1165 1166 if ((error = suser(p)) != 0) 1167 return (error); 1168 1169 if (ngrp >= NGROUPS) 1170 return (EINVAL); 1171 1172 pc->pc_ucred = crcopy(pc->pc_ucred); 1173 if (ngrp > 0) { 1174 error = copyin((caddr_t)uap->gidset, (caddr_t)linux_gidset, 1175 ngrp * sizeof(linux_gid_t)); 1176 if (error) 1177 return (error); 1178 1179 pc->pc_ucred->cr_ngroups = ngrp + 1; 1180 1181 bsd_gidset = pc->pc_ucred->cr_groups; 1182 ngrp--; 1183 while (ngrp >= 0) { 1184 bsd_gidset[ngrp + 1] = linux_gidset[ngrp]; 1185 ngrp--; 1186 } 1187 } 1188 else 1189 pc->pc_ucred->cr_ngroups = 1; 1190 1191 setsugid(p); 1192 return (0); 1193 } 1194 1195 int 1196 linux_getgroups(p, uap) 1197 struct proc *p; 1198 struct linux_getgroups_args *uap; 1199 { 1200 struct pcred *pc; 1201 linux_gid_t linux_gidset[NGROUPS]; 1202 gid_t *bsd_gidset; 1203 int bsd_gidsetsz, ngrp, error; 1204 1205 pc = p->p_cred; 1206 bsd_gidset = pc->pc_ucred->cr_groups; 1207 bsd_gidsetsz = pc->pc_ucred->cr_ngroups - 1; 1208 1209 /* 1210 * cr_groups[0] holds egid. Returning the whole set 1211 * here will cause a duplicate. Exclude cr_groups[0] 1212 * to prevent that. 1213 */ 1214 1215 if ((ngrp = uap->gidsetsize) == 0) { 1216 p->p_retval[0] = bsd_gidsetsz; 1217 return (0); 1218 } 1219 1220 if (ngrp < bsd_gidsetsz) 1221 return (EINVAL); 1222 1223 ngrp = 0; 1224 while (ngrp < bsd_gidsetsz) { 1225 linux_gidset[ngrp] = bsd_gidset[ngrp + 1]; 1226 ngrp++; 1227 } 1228 1229 if ((error = copyout((caddr_t)linux_gidset, (caddr_t)uap->gidset, 1230 ngrp * sizeof(linux_gid_t)))) 1231 return (error); 1232 1233 p->p_retval[0] = ngrp; 1234 return (0); 1235 } 1236 1237 int 1238 linux_setrlimit(p, uap) 1239 struct proc *p; 1240 struct linux_setrlimit_args *uap; 1241 { 1242 struct osetrlimit_args bsd; 1243 1244 #ifdef DEBUG 1245 printf("Linux-emul(%ld): setrlimit(%d, %p)\n", 1246 (long)p->p_pid, uap->resource, (void *)uap->rlim); 1247 #endif 1248 1249 if (uap->resource >= LINUX_RLIM_NLIMITS) 1250 return EINVAL; 1251 1252 bsd.which = linux_to_bsd_resource[uap->resource]; 1253 1254 if (bsd.which == -1) 1255 return EINVAL; 1256 1257 bsd.rlp = uap->rlim; 1258 return osetrlimit(p, &bsd); 1259 } 1260 1261 int 1262 linux_getrlimit(p, uap) 1263 struct proc *p; 1264 struct linux_getrlimit_args *uap; 1265 { 1266 struct ogetrlimit_args bsd; 1267 1268 #ifdef DEBUG 1269 printf("Linux-emul(%ld): getrlimit(%d, %p)\n", 1270 (long)p->p_pid, uap->resource, (void *)uap->rlim); 1271 #endif 1272 1273 if (uap->resource >= LINUX_RLIM_NLIMITS) 1274 return EINVAL; 1275 1276 bsd.which = linux_to_bsd_resource[uap->resource]; 1277 1278 if (bsd.which == -1) 1279 return EINVAL; 1280 1281 bsd.rlp = uap->rlim; 1282 return ogetrlimit(p, &bsd); 1283 } 1284 1285 int 1286 linux_sched_setscheduler(p, uap) 1287 struct proc *p; 1288 struct linux_sched_setscheduler_args *uap; 1289 { 1290 struct sched_setscheduler_args bsd; 1291 1292 #ifdef DEBUG 1293 printf("Linux-emul(%ld): sched_setscheduler(%d, %d, %p)\n", 1294 (long)p->p_pid, uap->pid, uap->policy, (void *)uap->param); 1295 #endif 1296 1297 switch (uap->policy) { 1298 case LINUX_SCHED_OTHER: 1299 bsd.policy = SCHED_OTHER; 1300 break; 1301 case LINUX_SCHED_FIFO: 1302 bsd.policy = SCHED_FIFO; 1303 break; 1304 case LINUX_SCHED_RR: 1305 bsd.policy = SCHED_RR; 1306 break; 1307 default: 1308 return EINVAL; 1309 } 1310 1311 bsd.pid = uap->pid; 1312 bsd.param = uap->param; 1313 return sched_setscheduler(p, &bsd); 1314 } 1315 1316 int 1317 linux_sched_getscheduler(p, uap) 1318 struct proc *p; 1319 struct linux_sched_getscheduler_args *uap; 1320 { 1321 struct sched_getscheduler_args bsd; 1322 int error; 1323 1324 #ifdef DEBUG 1325 printf("Linux-emul(%ld): sched_getscheduler(%d)\n", 1326 (long)p->p_pid, uap->pid); 1327 #endif 1328 1329 bsd.pid = uap->pid; 1330 error = sched_getscheduler(p, &bsd); 1331 1332 switch (p->p_retval[0]) { 1333 case SCHED_OTHER: 1334 p->p_retval[0] = LINUX_SCHED_OTHER; 1335 break; 1336 case SCHED_FIFO: 1337 p->p_retval[0] = LINUX_SCHED_FIFO; 1338 break; 1339 case SCHED_RR: 1340 p->p_retval[0] = LINUX_SCHED_RR; 1341 break; 1342 } 1343 1344 return error; 1345 } 1346