1 /*- 2 * Copyright (c) 2002 Doug Rabson 3 * Copyright (c) 1994-1995 S�ren Schmidt 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer 11 * in this position and unchanged. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. The name of the author may not be used to endorse or promote products 16 * derived from this software without specific prior written permission 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include "opt_mac.h" 34 35 #include <sys/param.h> 36 #include <sys/blist.h> 37 #include <sys/fcntl.h> 38 #if defined(__i386__) || defined(__alpha__) 39 #include <sys/imgact_aout.h> 40 #endif 41 #include <sys/jail.h> 42 #include <sys/kernel.h> 43 #include <sys/limits.h> 44 #include <sys/lock.h> 45 #include <sys/mac.h> 46 #include <sys/malloc.h> 47 #include <sys/mman.h> 48 #include <sys/mount.h> 49 #include <sys/mutex.h> 50 #include <sys/namei.h> 51 #include <sys/proc.h> 52 #include <sys/reboot.h> 53 #include <sys/resourcevar.h> 54 #include <sys/signalvar.h> 55 #include <sys/stat.h> 56 #include <sys/syscallsubr.h> 57 #include <sys/sysctl.h> 58 #include <sys/sysproto.h> 59 #include <sys/systm.h> 60 #include <sys/time.h> 61 #include <sys/vmmeter.h> 62 #include <sys/vnode.h> 63 #include <sys/wait.h> 64 65 #include <vm/vm.h> 66 #include <vm/pmap.h> 67 #include <vm/vm_kern.h> 68 #include <vm/vm_map.h> 69 #include <vm/vm_extern.h> 70 #include <vm/vm_object.h> 71 #include <vm/swap_pager.h> 72 73 #include <posix4/sched.h> 74 75 #include "opt_compat.h" 76 77 #include <compat/linux/linux_sysproto.h> 78 79 #ifdef COMPAT_LINUX32 80 #include <machine/../linux32/linux.h> 81 #include <machine/../linux32/linux32_proto.h> 82 #else 83 #include <machine/../linux/linux.h> 84 #include <machine/../linux/linux_proto.h> 85 #endif 86 87 #include <compat/linux/linux_mib.h> 88 #include <compat/linux/linux_util.h> 89 90 #ifdef __i386__ 91 #include <machine/cputypes.h> 92 #endif 93 94 #ifdef __alpha__ 95 #define BSD_TO_LINUX_SIGNAL(sig) (sig) 96 #else 97 #define BSD_TO_LINUX_SIGNAL(sig) \ 98 (((sig) <= LINUX_SIGTBLSZ) ? bsd_to_linux_signal[_SIG_IDX(sig)] : sig) 99 #endif 100 101 #ifndef __alpha__ 102 static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] = { 103 RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK, 104 RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE, 105 RLIMIT_MEMLOCK, -1 106 }; 107 #endif /*!__alpha__*/ 108 109 struct l_sysinfo { 110 l_long uptime; /* Seconds since boot */ 111 l_ulong loads[3]; /* 1, 5, and 15 minute load averages */ 112 #define LINUX_SYSINFO_LOADS_SCALE 65536 113 l_ulong totalram; /* Total usable main memory size */ 114 l_ulong freeram; /* Available memory size */ 115 l_ulong sharedram; /* Amount of shared memory */ 116 l_ulong bufferram; /* Memory used by buffers */ 117 l_ulong totalswap; /* Total swap space size */ 118 l_ulong freeswap; /* swap space still available */ 119 l_ushort procs; /* Number of current processes */ 120 l_ulong totalbig; 121 l_ulong freebig; 122 l_uint mem_unit; 123 char _f[6]; /* Pads structure to 64 bytes */ 124 }; 125 #ifndef __alpha__ 126 int 127 linux_sysinfo(struct thread *td, struct linux_sysinfo_args *args) 128 { 129 struct l_sysinfo sysinfo; 130 vm_object_t object; 131 int i, j; 132 struct timespec ts; 133 134 getnanouptime(&ts); 135 if (ts.tv_nsec != 0) 136 ts.tv_sec++; 137 sysinfo.uptime = ts.tv_sec; 138 139 /* Use the information from the mib to get our load averages */ 140 for (i = 0; i < 3; i++) 141 sysinfo.loads[i] = averunnable.ldavg[i] * 142 LINUX_SYSINFO_LOADS_SCALE / averunnable.fscale; 143 144 sysinfo.totalram = physmem * PAGE_SIZE; 145 sysinfo.freeram = sysinfo.totalram - cnt.v_wire_count * PAGE_SIZE; 146 147 sysinfo.sharedram = 0; 148 mtx_lock(&vm_object_list_mtx); 149 TAILQ_FOREACH(object, &vm_object_list, object_list) 150 if (object->shadow_count > 1) 151 sysinfo.sharedram += object->resident_page_count; 152 mtx_unlock(&vm_object_list_mtx); 153 154 sysinfo.sharedram *= PAGE_SIZE; 155 sysinfo.bufferram = 0; 156 157 swap_pager_status(&i, &j); 158 sysinfo.totalswap= i * PAGE_SIZE; 159 sysinfo.freeswap = (i - j) * PAGE_SIZE; 160 161 sysinfo.procs = nprocs; 162 163 /* The following are only present in newer Linux kernels. */ 164 sysinfo.totalbig = 0; 165 sysinfo.freebig = 0; 166 sysinfo.mem_unit = 1; 167 168 return copyout(&sysinfo, args->info, sizeof(sysinfo)); 169 } 170 #endif /*!__alpha__*/ 171 172 #ifndef __alpha__ 173 int 174 linux_alarm(struct thread *td, struct linux_alarm_args *args) 175 { 176 struct itimerval it, old_it; 177 int error; 178 179 #ifdef DEBUG 180 if (ldebug(alarm)) 181 printf(ARGS(alarm, "%u"), args->secs); 182 #endif 183 184 if (args->secs > 100000000) 185 return (EINVAL); 186 187 it.it_value.tv_sec = (long)args->secs; 188 it.it_value.tv_usec = 0; 189 it.it_interval.tv_sec = 0; 190 it.it_interval.tv_usec = 0; 191 error = kern_setitimer(td, ITIMER_REAL, &it, &old_it); 192 if (error) 193 return (error); 194 if (timevalisset(&old_it.it_value)) { 195 if (old_it.it_value.tv_usec != 0) 196 old_it.it_value.tv_sec++; 197 td->td_retval[0] = old_it.it_value.tv_sec; 198 } 199 return (0); 200 } 201 #endif /*!__alpha__*/ 202 203 int 204 linux_brk(struct thread *td, struct linux_brk_args *args) 205 { 206 struct vmspace *vm = td->td_proc->p_vmspace; 207 vm_offset_t new, old; 208 struct obreak_args /* { 209 char * nsize; 210 } */ tmp; 211 212 #ifdef DEBUG 213 if (ldebug(brk)) 214 printf(ARGS(brk, "%p"), (void *)(uintptr_t)args->dsend); 215 #endif 216 old = (vm_offset_t)vm->vm_daddr + ctob(vm->vm_dsize); 217 new = (vm_offset_t)args->dsend; 218 tmp.nsize = (char *) new; 219 if (((caddr_t)new > vm->vm_daddr) && !obreak(td, &tmp)) 220 td->td_retval[0] = (long)new; 221 else 222 td->td_retval[0] = (long)old; 223 224 return 0; 225 } 226 227 #if defined(__i386__) || defined(__alpha__) 228 229 int 230 linux_uselib(struct thread *td, struct linux_uselib_args *args) 231 { 232 struct nameidata ni; 233 struct vnode *vp; 234 struct exec *a_out; 235 struct vattr attr; 236 vm_offset_t vmaddr; 237 unsigned long file_offset; 238 vm_offset_t buffer; 239 unsigned long bss_size; 240 char *library; 241 int error; 242 int locked; 243 244 LCONVPATHEXIST(td, args->library, &library); 245 246 #ifdef DEBUG 247 if (ldebug(uselib)) 248 printf(ARGS(uselib, "%s"), library); 249 #endif 250 251 a_out = NULL; 252 locked = 0; 253 vp = NULL; 254 255 /* 256 * XXX: This code should make use of vn_open(), rather than doing 257 * all this stuff itself. 258 */ 259 NDINIT(&ni, LOOKUP, ISOPEN|FOLLOW|LOCKLEAF, UIO_SYSSPACE, library, td); 260 error = namei(&ni); 261 LFREEPATH(library); 262 if (error) 263 goto cleanup; 264 265 vp = ni.ni_vp; 266 /* 267 * XXX - This looks like a bogus check. A LOCKLEAF namei should not 268 * succeed without returning a vnode. 269 */ 270 if (vp == NULL) { 271 error = ENOEXEC; /* ?? */ 272 goto cleanup; 273 } 274 NDFREE(&ni, NDF_ONLY_PNBUF); 275 276 /* 277 * From here on down, we have a locked vnode that must be unlocked. 278 */ 279 locked++; 280 281 /* Writable? */ 282 if (vp->v_writecount) { 283 error = ETXTBSY; 284 goto cleanup; 285 } 286 287 /* Executable? */ 288 error = VOP_GETATTR(vp, &attr, td->td_ucred, td); 289 if (error) 290 goto cleanup; 291 292 if ((vp->v_mount->mnt_flag & MNT_NOEXEC) || 293 ((attr.va_mode & 0111) == 0) || (attr.va_type != VREG)) { 294 error = ENOEXEC; 295 goto cleanup; 296 } 297 298 /* Sensible size? */ 299 if (attr.va_size == 0) { 300 error = ENOEXEC; 301 goto cleanup; 302 } 303 304 /* Can we access it? */ 305 error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td); 306 if (error) 307 goto cleanup; 308 309 /* 310 * XXX: This should use vn_open() so that it is properly authorized, 311 * and to reduce code redundancy all over the place here. 312 */ 313 #ifdef MAC 314 error = mac_check_vnode_open(td->td_ucred, vp, FREAD); 315 if (error) 316 goto cleanup; 317 #endif 318 error = VOP_OPEN(vp, FREAD, td->td_ucred, td, -1); 319 if (error) 320 goto cleanup; 321 322 /* Pull in executable header into kernel_map */ 323 error = vm_mmap(kernel_map, (vm_offset_t *)&a_out, PAGE_SIZE, 324 VM_PROT_READ, VM_PROT_READ, 0, OBJT_VNODE, vp, 0); 325 /* 326 * Lock no longer needed 327 */ 328 locked = 0; 329 VOP_UNLOCK(vp, 0, td); 330 331 if (error) 332 goto cleanup; 333 334 /* Is it a Linux binary ? */ 335 if (((a_out->a_magic >> 16) & 0xff) != 0x64) { 336 error = ENOEXEC; 337 goto cleanup; 338 } 339 340 /* 341 * While we are here, we should REALLY do some more checks 342 */ 343 344 /* Set file/virtual offset based on a.out variant. */ 345 switch ((int)(a_out->a_magic & 0xffff)) { 346 case 0413: /* ZMAGIC */ 347 file_offset = 1024; 348 break; 349 case 0314: /* QMAGIC */ 350 file_offset = 0; 351 break; 352 default: 353 error = ENOEXEC; 354 goto cleanup; 355 } 356 357 bss_size = round_page(a_out->a_bss); 358 359 /* Check various fields in header for validity/bounds. */ 360 if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) { 361 error = ENOEXEC; 362 goto cleanup; 363 } 364 365 /* text + data can't exceed file size */ 366 if (a_out->a_data + a_out->a_text > attr.va_size) { 367 error = EFAULT; 368 goto cleanup; 369 } 370 371 /* 372 * text/data/bss must not exceed limits 373 * XXX - this is not complete. it should check current usage PLUS 374 * the resources needed by this library. 375 */ 376 PROC_LOCK(td->td_proc); 377 if (a_out->a_text > maxtsiz || 378 a_out->a_data + bss_size > lim_cur(td->td_proc, RLIMIT_DATA)) { 379 PROC_UNLOCK(td->td_proc); 380 error = ENOMEM; 381 goto cleanup; 382 } 383 PROC_UNLOCK(td->td_proc); 384 385 mp_fixme("Unlocked vflags access."); 386 /* prevent more writers */ 387 vp->v_vflag |= VV_TEXT; 388 389 /* 390 * Check if file_offset page aligned. Currently we cannot handle 391 * misalinged file offsets, and so we read in the entire image 392 * (what a waste). 393 */ 394 if (file_offset & PAGE_MASK) { 395 #ifdef DEBUG 396 printf("uselib: Non page aligned binary %lu\n", file_offset); 397 #endif 398 /* Map text+data read/write/execute */ 399 400 /* a_entry is the load address and is page aligned */ 401 vmaddr = trunc_page(a_out->a_entry); 402 403 /* get anon user mapping, read+write+execute */ 404 error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0, 405 &vmaddr, a_out->a_text + a_out->a_data, FALSE, VM_PROT_ALL, 406 VM_PROT_ALL, 0); 407 if (error) 408 goto cleanup; 409 410 /* map file into kernel_map */ 411 error = vm_mmap(kernel_map, &buffer, 412 round_page(a_out->a_text + a_out->a_data + file_offset), 413 VM_PROT_READ, VM_PROT_READ, 0, OBJT_VNODE, vp, 414 trunc_page(file_offset)); 415 if (error) 416 goto cleanup; 417 418 /* copy from kernel VM space to user space */ 419 error = copyout(PTRIN(buffer + file_offset), 420 (void *)vmaddr, a_out->a_text + a_out->a_data); 421 422 /* release temporary kernel space */ 423 vm_map_remove(kernel_map, buffer, buffer + 424 round_page(a_out->a_text + a_out->a_data + file_offset)); 425 426 if (error) 427 goto cleanup; 428 } else { 429 #ifdef DEBUG 430 printf("uselib: Page aligned binary %lu\n", file_offset); 431 #endif 432 /* 433 * for QMAGIC, a_entry is 20 bytes beyond the load address 434 * to skip the executable header 435 */ 436 vmaddr = trunc_page(a_out->a_entry); 437 438 /* 439 * Map it all into the process's space as a single 440 * copy-on-write "data" segment. 441 */ 442 error = vm_mmap(&td->td_proc->p_vmspace->vm_map, &vmaddr, 443 a_out->a_text + a_out->a_data, VM_PROT_ALL, VM_PROT_ALL, 444 MAP_PRIVATE | MAP_FIXED, OBJT_VNODE, vp, file_offset); 445 if (error) 446 goto cleanup; 447 } 448 #ifdef DEBUG 449 printf("mem=%08lx = %08lx %08lx\n", (long)vmaddr, ((long*)vmaddr)[0], 450 ((long*)vmaddr)[1]); 451 #endif 452 if (bss_size != 0) { 453 /* Calculate BSS start address */ 454 vmaddr = trunc_page(a_out->a_entry) + a_out->a_text + 455 a_out->a_data; 456 457 /* allocate some 'anon' space */ 458 error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0, 459 &vmaddr, bss_size, FALSE, VM_PROT_ALL, VM_PROT_ALL, 0); 460 if (error) 461 goto cleanup; 462 } 463 464 cleanup: 465 /* Unlock vnode if needed */ 466 if (locked) 467 VOP_UNLOCK(vp, 0, td); 468 469 /* Release the kernel mapping. */ 470 if (a_out) 471 vm_map_remove(kernel_map, (vm_offset_t)a_out, 472 (vm_offset_t)a_out + PAGE_SIZE); 473 474 return error; 475 } 476 477 #endif /* __i386__ || __alpha__ */ 478 479 int 480 linux_select(struct thread *td, struct linux_select_args *args) 481 { 482 l_timeval ltv; 483 struct timeval tv0, tv1, utv, *tvp; 484 int error; 485 486 #ifdef DEBUG 487 if (ldebug(select)) 488 printf(ARGS(select, "%d, %p, %p, %p, %p"), args->nfds, 489 (void *)args->readfds, (void *)args->writefds, 490 (void *)args->exceptfds, (void *)args->timeout); 491 #endif 492 493 /* 494 * Store current time for computation of the amount of 495 * time left. 496 */ 497 if (args->timeout) { 498 if ((error = copyin(args->timeout, <v, sizeof(ltv)))) 499 goto select_out; 500 utv.tv_sec = ltv.tv_sec; 501 utv.tv_usec = ltv.tv_usec; 502 #ifdef DEBUG 503 if (ldebug(select)) 504 printf(LMSG("incoming timeout (%ld/%ld)"), 505 utv.tv_sec, utv.tv_usec); 506 #endif 507 508 if (itimerfix(&utv)) { 509 /* 510 * The timeval was invalid. Convert it to something 511 * valid that will act as it does under Linux. 512 */ 513 utv.tv_sec += utv.tv_usec / 1000000; 514 utv.tv_usec %= 1000000; 515 if (utv.tv_usec < 0) { 516 utv.tv_sec -= 1; 517 utv.tv_usec += 1000000; 518 } 519 if (utv.tv_sec < 0) 520 timevalclear(&utv); 521 } 522 microtime(&tv0); 523 tvp = &utv; 524 } else 525 tvp = NULL; 526 527 error = kern_select(td, args->nfds, args->readfds, args->writefds, 528 args->exceptfds, tvp); 529 530 #ifdef DEBUG 531 if (ldebug(select)) 532 printf(LMSG("real select returns %d"), error); 533 #endif 534 if (error) { 535 /* 536 * See fs/select.c in the Linux kernel. Without this, 537 * Maelstrom doesn't work. 538 */ 539 if (error == ERESTART) 540 error = EINTR; 541 goto select_out; 542 } 543 544 if (args->timeout) { 545 if (td->td_retval[0]) { 546 /* 547 * Compute how much time was left of the timeout, 548 * by subtracting the current time and the time 549 * before we started the call, and subtracting 550 * that result from the user-supplied value. 551 */ 552 microtime(&tv1); 553 timevalsub(&tv1, &tv0); 554 timevalsub(&utv, &tv1); 555 if (utv.tv_sec < 0) 556 timevalclear(&utv); 557 } else 558 timevalclear(&utv); 559 #ifdef DEBUG 560 if (ldebug(select)) 561 printf(LMSG("outgoing timeout (%ld/%ld)"), 562 utv.tv_sec, utv.tv_usec); 563 #endif 564 ltv.tv_sec = utv.tv_sec; 565 ltv.tv_usec = utv.tv_usec; 566 if ((error = copyout(<v, args->timeout, sizeof(ltv)))) 567 goto select_out; 568 } 569 570 select_out: 571 #ifdef DEBUG 572 if (ldebug(select)) 573 printf(LMSG("select_out -> %d"), error); 574 #endif 575 return error; 576 } 577 578 int 579 linux_mremap(struct thread *td, struct linux_mremap_args *args) 580 { 581 struct munmap_args /* { 582 void *addr; 583 size_t len; 584 } */ bsd_args; 585 int error = 0; 586 587 #ifdef DEBUG 588 if (ldebug(mremap)) 589 printf(ARGS(mremap, "%p, %08lx, %08lx, %08lx"), 590 (void *)(uintptr_t)args->addr, 591 (unsigned long)args->old_len, 592 (unsigned long)args->new_len, 593 (unsigned long)args->flags); 594 #endif 595 args->new_len = round_page(args->new_len); 596 args->old_len = round_page(args->old_len); 597 598 if (args->new_len > args->old_len) { 599 td->td_retval[0] = 0; 600 return ENOMEM; 601 } 602 603 if (args->new_len < args->old_len) { 604 bsd_args.addr = 605 (caddr_t)((uintptr_t)args->addr + args->new_len); 606 bsd_args.len = args->old_len - args->new_len; 607 error = munmap(td, &bsd_args); 608 } 609 610 td->td_retval[0] = error ? 0 : (uintptr_t)args->addr; 611 return error; 612 } 613 614 #define LINUX_MS_ASYNC 0x0001 615 #define LINUX_MS_INVALIDATE 0x0002 616 #define LINUX_MS_SYNC 0x0004 617 618 int 619 linux_msync(struct thread *td, struct linux_msync_args *args) 620 { 621 struct msync_args bsd_args; 622 623 bsd_args.addr = (caddr_t)(uintptr_t)args->addr; 624 bsd_args.len = (uintptr_t)args->len; 625 bsd_args.flags = args->fl & ~LINUX_MS_SYNC; 626 627 return msync(td, &bsd_args); 628 } 629 630 #ifndef __alpha__ 631 int 632 linux_time(struct thread *td, struct linux_time_args *args) 633 { 634 struct timeval tv; 635 l_time_t tm; 636 int error; 637 638 #ifdef DEBUG 639 if (ldebug(time)) 640 printf(ARGS(time, "*")); 641 #endif 642 643 microtime(&tv); 644 tm = tv.tv_sec; 645 if (args->tm && (error = copyout(&tm, args->tm, sizeof(tm)))) 646 return error; 647 td->td_retval[0] = tm; 648 return 0; 649 } 650 #endif /*!__alpha__*/ 651 652 struct l_times_argv { 653 l_long tms_utime; 654 l_long tms_stime; 655 l_long tms_cutime; 656 l_long tms_cstime; 657 }; 658 659 #ifdef __alpha__ 660 #define CLK_TCK 1024 /* Linux uses 1024 on alpha */ 661 #else 662 #define CLK_TCK 100 /* Linux uses 100 */ 663 #endif 664 665 #define CONVTCK(r) (r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK)) 666 667 int 668 linux_times(struct thread *td, struct linux_times_args *args) 669 { 670 struct timeval tv, utime, stime, cutime, cstime; 671 struct l_times_argv tms; 672 struct proc *p; 673 int error; 674 675 #ifdef DEBUG 676 if (ldebug(times)) 677 printf(ARGS(times, "*")); 678 #endif 679 680 p = td->td_proc; 681 PROC_LOCK(p); 682 calcru(p, &utime, &stime); 683 calccru(p, &cutime, &cstime); 684 PROC_UNLOCK(p); 685 686 tms.tms_utime = CONVTCK(utime); 687 tms.tms_stime = CONVTCK(stime); 688 689 tms.tms_cutime = CONVTCK(cutime); 690 tms.tms_cstime = CONVTCK(cstime); 691 692 if ((error = copyout(&tms, args->buf, sizeof(tms)))) 693 return error; 694 695 microuptime(&tv); 696 td->td_retval[0] = (int)CONVTCK(tv); 697 return 0; 698 } 699 700 int 701 linux_newuname(struct thread *td, struct linux_newuname_args *args) 702 { 703 struct l_new_utsname utsname; 704 char osname[LINUX_MAX_UTSNAME]; 705 char osrelease[LINUX_MAX_UTSNAME]; 706 char *p; 707 708 #ifdef DEBUG 709 if (ldebug(newuname)) 710 printf(ARGS(newuname, "*")); 711 #endif 712 713 linux_get_osname(td, osname); 714 linux_get_osrelease(td, osrelease); 715 716 bzero(&utsname, sizeof(utsname)); 717 strlcpy(utsname.sysname, osname, LINUX_MAX_UTSNAME); 718 getcredhostname(td->td_ucred, utsname.nodename, LINUX_MAX_UTSNAME); 719 strlcpy(utsname.release, osrelease, LINUX_MAX_UTSNAME); 720 strlcpy(utsname.version, version, LINUX_MAX_UTSNAME); 721 for (p = utsname.version; *p != '\0'; ++p) 722 if (*p == '\n') { 723 *p = '\0'; 724 break; 725 } 726 #ifdef __i386__ 727 { 728 const char *class; 729 switch (cpu_class) { 730 case CPUCLASS_686: 731 class = "i686"; 732 break; 733 case CPUCLASS_586: 734 class = "i586"; 735 break; 736 case CPUCLASS_486: 737 class = "i486"; 738 break; 739 default: 740 class = "i386"; 741 } 742 strlcpy(utsname.machine, class, LINUX_MAX_UTSNAME); 743 } 744 #elif defined(__amd64__) /* XXX: Linux can change 'personality'. */ 745 #ifdef COMPAT_LINUX32 746 strlcpy(utsname.machine, "i686", LINUX_MAX_UTSNAME); 747 #else 748 strlcpy(utsname.machine, "x86_64", LINUX_MAX_UTSNAME); 749 #endif /* COMPAT_LINUX32 */ 750 #else /* something other than i386 or amd64 - assume we and Linux agree */ 751 strlcpy(utsname.machine, machine, LINUX_MAX_UTSNAME); 752 #endif /* __i386__ */ 753 strlcpy(utsname.domainname, domainname, LINUX_MAX_UTSNAME); 754 755 return (copyout(&utsname, args->buf, sizeof(utsname))); 756 } 757 758 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 759 struct l_utimbuf { 760 l_time_t l_actime; 761 l_time_t l_modtime; 762 }; 763 764 int 765 linux_utime(struct thread *td, struct linux_utime_args *args) 766 { 767 struct timeval tv[2], *tvp; 768 struct l_utimbuf lut; 769 char *fname; 770 int error; 771 772 LCONVPATHEXIST(td, args->fname, &fname); 773 774 #ifdef DEBUG 775 if (ldebug(utime)) 776 printf(ARGS(utime, "%s, *"), fname); 777 #endif 778 779 if (args->times) { 780 if ((error = copyin(args->times, &lut, sizeof lut))) { 781 LFREEPATH(fname); 782 return error; 783 } 784 tv[0].tv_sec = lut.l_actime; 785 tv[0].tv_usec = 0; 786 tv[1].tv_sec = lut.l_modtime; 787 tv[1].tv_usec = 0; 788 tvp = tv; 789 } else 790 tvp = NULL; 791 792 error = kern_utimes(td, fname, UIO_SYSSPACE, tvp, UIO_SYSSPACE); 793 LFREEPATH(fname); 794 return (error); 795 } 796 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 797 798 #define __WCLONE 0x80000000 799 800 #ifndef __alpha__ 801 int 802 linux_waitpid(struct thread *td, struct linux_waitpid_args *args) 803 { 804 int error, options, tmpstat; 805 806 #ifdef DEBUG 807 if (ldebug(waitpid)) 808 printf(ARGS(waitpid, "%d, %p, %d"), 809 args->pid, (void *)args->status, args->options); 810 #endif 811 812 options = (args->options & (WNOHANG | WUNTRACED)); 813 /* WLINUXCLONE should be equal to __WCLONE, but we make sure */ 814 if (args->options & __WCLONE) 815 options |= WLINUXCLONE; 816 817 error = kern_wait(td, args->pid, &tmpstat, options, NULL); 818 if (error) 819 return error; 820 821 if (args->status) { 822 tmpstat &= 0xffff; 823 if (WIFSIGNALED(tmpstat)) 824 tmpstat = (tmpstat & 0xffffff80) | 825 BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat)); 826 else if (WIFSTOPPED(tmpstat)) 827 tmpstat = (tmpstat & 0xffff00ff) | 828 (BSD_TO_LINUX_SIGNAL(WSTOPSIG(tmpstat)) << 8); 829 return copyout(&tmpstat, args->status, sizeof(int)); 830 } 831 832 return 0; 833 } 834 #endif /*!__alpha__*/ 835 836 int 837 linux_wait4(struct thread *td, struct linux_wait4_args *args) 838 { 839 int error, options, tmpstat; 840 struct rusage ru, *rup; 841 struct proc *p; 842 843 #ifdef DEBUG 844 if (ldebug(wait4)) 845 printf(ARGS(wait4, "%d, %p, %d, %p"), 846 args->pid, (void *)args->status, args->options, 847 (void *)args->rusage); 848 #endif 849 850 options = (args->options & (WNOHANG | WUNTRACED)); 851 /* WLINUXCLONE should be equal to __WCLONE, but we make sure */ 852 if (args->options & __WCLONE) 853 options |= WLINUXCLONE; 854 855 if (args->rusage != NULL) 856 rup = &ru; 857 else 858 rup = NULL; 859 error = kern_wait(td, args->pid, &tmpstat, options, rup); 860 if (error) 861 return error; 862 863 p = td->td_proc; 864 PROC_LOCK(p); 865 sigqueue_delete(&p->p_sigqueue, SIGCHLD); 866 PROC_UNLOCK(p); 867 868 if (args->status) { 869 tmpstat &= 0xffff; 870 if (WIFSIGNALED(tmpstat)) 871 tmpstat = (tmpstat & 0xffffff80) | 872 BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat)); 873 else if (WIFSTOPPED(tmpstat)) 874 tmpstat = (tmpstat & 0xffff00ff) | 875 (BSD_TO_LINUX_SIGNAL(WSTOPSIG(tmpstat)) << 8); 876 error = copyout(&tmpstat, args->status, sizeof(int)); 877 } 878 if (args->rusage != NULL && error == 0) 879 error = copyout(&ru, args->rusage, sizeof(ru)); 880 881 return (error); 882 } 883 884 int 885 linux_mknod(struct thread *td, struct linux_mknod_args *args) 886 { 887 char *path; 888 int error; 889 890 LCONVPATHCREAT(td, args->path, &path); 891 892 #ifdef DEBUG 893 if (ldebug(mknod)) 894 printf(ARGS(mknod, "%s, %d, %d"), path, args->mode, args->dev); 895 #endif 896 897 if (args->mode & S_IFIFO) 898 error = kern_mkfifo(td, path, UIO_SYSSPACE, args->mode); 899 else 900 error = kern_mknod(td, path, UIO_SYSSPACE, args->mode, 901 args->dev); 902 LFREEPATH(path); 903 return (error); 904 } 905 906 /* 907 * UGH! This is just about the dumbest idea I've ever heard!! 908 */ 909 int 910 linux_personality(struct thread *td, struct linux_personality_args *args) 911 { 912 #ifdef DEBUG 913 if (ldebug(personality)) 914 printf(ARGS(personality, "%lu"), (unsigned long)args->per); 915 #endif 916 #ifndef __alpha__ 917 if (args->per != 0) 918 return EINVAL; 919 #endif 920 921 /* Yes Jim, it's still a Linux... */ 922 td->td_retval[0] = 0; 923 return 0; 924 } 925 926 struct l_itimerval { 927 l_timeval it_interval; 928 l_timeval it_value; 929 }; 930 931 #define B2L_ITIMERVAL(bip, lip) \ 932 (bip)->it_interval.tv_sec = (lip)->it_interval.tv_sec; \ 933 (bip)->it_interval.tv_usec = (lip)->it_interval.tv_usec; \ 934 (bip)->it_value.tv_sec = (lip)->it_value.tv_sec; \ 935 (bip)->it_value.tv_usec = (lip)->it_value.tv_usec; 936 937 int 938 linux_setitimer(struct thread *td, struct linux_setitimer_args *uap) 939 { 940 int error; 941 struct l_itimerval ls; 942 struct itimerval aitv, oitv; 943 944 #ifdef DEBUG 945 if (ldebug(setitimer)) 946 printf(ARGS(setitimer, "%p, %p"), 947 (void *)uap->itv, (void *)uap->oitv); 948 #endif 949 950 if (uap->itv == NULL) { 951 uap->itv = uap->oitv; 952 return (linux_getitimer(td, (struct linux_getitimer_args *)uap)); 953 } 954 955 error = copyin(uap->itv, &ls, sizeof(ls)); 956 if (error != 0) 957 return (error); 958 B2L_ITIMERVAL(&aitv, &ls); 959 #ifdef DEBUG 960 if (ldebug(setitimer)) { 961 printf("setitimer: value: sec: %ld, usec: %ld\n", 962 aitv.it_value.tv_sec, aitv.it_value.tv_usec); 963 printf("setitimer: interval: sec: %ld, usec: %ld\n", 964 aitv.it_interval.tv_sec, aitv.it_interval.tv_usec); 965 } 966 #endif 967 error = kern_setitimer(td, uap->which, &aitv, &oitv); 968 if (error != 0 || uap->oitv == NULL) 969 return (error); 970 B2L_ITIMERVAL(&ls, &oitv); 971 972 return (copyout(&ls, uap->oitv, sizeof(ls))); 973 } 974 975 int 976 linux_getitimer(struct thread *td, struct linux_getitimer_args *uap) 977 { 978 int error; 979 struct l_itimerval ls; 980 struct itimerval aitv; 981 982 #ifdef DEBUG 983 if (ldebug(getitimer)) 984 printf(ARGS(getitimer, "%p"), (void *)uap->itv); 985 #endif 986 error = kern_getitimer(td, uap->which, &aitv); 987 if (error != 0) 988 return (error); 989 B2L_ITIMERVAL(&ls, &aitv); 990 return (copyout(&ls, uap->itv, sizeof(ls))); 991 } 992 993 #ifndef __alpha__ 994 int 995 linux_nice(struct thread *td, struct linux_nice_args *args) 996 { 997 struct setpriority_args bsd_args; 998 999 bsd_args.which = PRIO_PROCESS; 1000 bsd_args.who = 0; /* current process */ 1001 bsd_args.prio = args->inc; 1002 return setpriority(td, &bsd_args); 1003 } 1004 #endif /*!__alpha__*/ 1005 1006 int 1007 linux_setgroups(struct thread *td, struct linux_setgroups_args *args) 1008 { 1009 struct ucred *newcred, *oldcred; 1010 l_gid_t linux_gidset[NGROUPS]; 1011 gid_t *bsd_gidset; 1012 int ngrp, error; 1013 struct proc *p; 1014 1015 ngrp = args->gidsetsize; 1016 if (ngrp < 0 || ngrp >= NGROUPS) 1017 return (EINVAL); 1018 error = copyin(args->grouplist, linux_gidset, ngrp * sizeof(l_gid_t)); 1019 if (error) 1020 return (error); 1021 newcred = crget(); 1022 p = td->td_proc; 1023 PROC_LOCK(p); 1024 oldcred = p->p_ucred; 1025 1026 /* 1027 * cr_groups[0] holds egid. Setting the whole set from 1028 * the supplied set will cause egid to be changed too. 1029 * Keep cr_groups[0] unchanged to prevent that. 1030 */ 1031 1032 if ((error = suser_cred(oldcred, SUSER_ALLOWJAIL)) != 0) { 1033 PROC_UNLOCK(p); 1034 crfree(newcred); 1035 return (error); 1036 } 1037 1038 crcopy(newcred, oldcred); 1039 if (ngrp > 0) { 1040 newcred->cr_ngroups = ngrp + 1; 1041 1042 bsd_gidset = newcred->cr_groups; 1043 ngrp--; 1044 while (ngrp >= 0) { 1045 bsd_gidset[ngrp + 1] = linux_gidset[ngrp]; 1046 ngrp--; 1047 } 1048 } 1049 else 1050 newcred->cr_ngroups = 1; 1051 1052 setsugid(p); 1053 p->p_ucred = newcred; 1054 PROC_UNLOCK(p); 1055 crfree(oldcred); 1056 return (0); 1057 } 1058 1059 int 1060 linux_getgroups(struct thread *td, struct linux_getgroups_args *args) 1061 { 1062 struct ucred *cred; 1063 l_gid_t linux_gidset[NGROUPS]; 1064 gid_t *bsd_gidset; 1065 int bsd_gidsetsz, ngrp, error; 1066 1067 cred = td->td_ucred; 1068 bsd_gidset = cred->cr_groups; 1069 bsd_gidsetsz = cred->cr_ngroups - 1; 1070 1071 /* 1072 * cr_groups[0] holds egid. Returning the whole set 1073 * here will cause a duplicate. Exclude cr_groups[0] 1074 * to prevent that. 1075 */ 1076 1077 if ((ngrp = args->gidsetsize) == 0) { 1078 td->td_retval[0] = bsd_gidsetsz; 1079 return (0); 1080 } 1081 1082 if (ngrp < bsd_gidsetsz) 1083 return (EINVAL); 1084 1085 ngrp = 0; 1086 while (ngrp < bsd_gidsetsz) { 1087 linux_gidset[ngrp] = bsd_gidset[ngrp + 1]; 1088 ngrp++; 1089 } 1090 1091 if ((error = copyout(linux_gidset, args->grouplist, 1092 ngrp * sizeof(l_gid_t)))) 1093 return (error); 1094 1095 td->td_retval[0] = ngrp; 1096 return (0); 1097 } 1098 1099 #ifndef __alpha__ 1100 int 1101 linux_setrlimit(struct thread *td, struct linux_setrlimit_args *args) 1102 { 1103 struct rlimit bsd_rlim; 1104 struct l_rlimit rlim; 1105 u_int which; 1106 int error; 1107 1108 #ifdef DEBUG 1109 if (ldebug(setrlimit)) 1110 printf(ARGS(setrlimit, "%d, %p"), 1111 args->resource, (void *)args->rlim); 1112 #endif 1113 1114 if (args->resource >= LINUX_RLIM_NLIMITS) 1115 return (EINVAL); 1116 1117 which = linux_to_bsd_resource[args->resource]; 1118 if (which == -1) 1119 return (EINVAL); 1120 1121 error = copyin(args->rlim, &rlim, sizeof(rlim)); 1122 if (error) 1123 return (error); 1124 1125 bsd_rlim.rlim_cur = (rlim_t)rlim.rlim_cur; 1126 bsd_rlim.rlim_max = (rlim_t)rlim.rlim_max; 1127 return (kern_setrlimit(td, which, &bsd_rlim)); 1128 } 1129 1130 int 1131 linux_old_getrlimit(struct thread *td, struct linux_old_getrlimit_args *args) 1132 { 1133 struct l_rlimit rlim; 1134 struct proc *p = td->td_proc; 1135 struct rlimit bsd_rlim; 1136 u_int which; 1137 1138 #ifdef DEBUG 1139 if (ldebug(old_getrlimit)) 1140 printf(ARGS(old_getrlimit, "%d, %p"), 1141 args->resource, (void *)args->rlim); 1142 #endif 1143 1144 if (args->resource >= LINUX_RLIM_NLIMITS) 1145 return (EINVAL); 1146 1147 which = linux_to_bsd_resource[args->resource]; 1148 if (which == -1) 1149 return (EINVAL); 1150 1151 PROC_LOCK(p); 1152 lim_rlimit(p, which, &bsd_rlim); 1153 PROC_UNLOCK(p); 1154 1155 #ifdef COMPAT_LINUX32 1156 rlim.rlim_cur = (unsigned int)bsd_rlim.rlim_cur; 1157 if (rlim.rlim_cur == UINT_MAX) 1158 rlim.rlim_cur = INT_MAX; 1159 rlim.rlim_max = (unsigned int)bsd_rlim.rlim_max; 1160 if (rlim.rlim_max == UINT_MAX) 1161 rlim.rlim_max = INT_MAX; 1162 #else 1163 rlim.rlim_cur = (unsigned long)bsd_rlim.rlim_cur; 1164 if (rlim.rlim_cur == ULONG_MAX) 1165 rlim.rlim_cur = LONG_MAX; 1166 rlim.rlim_max = (unsigned long)bsd_rlim.rlim_max; 1167 if (rlim.rlim_max == ULONG_MAX) 1168 rlim.rlim_max = LONG_MAX; 1169 #endif 1170 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1171 } 1172 1173 int 1174 linux_getrlimit(struct thread *td, struct linux_getrlimit_args *args) 1175 { 1176 struct l_rlimit rlim; 1177 struct proc *p = td->td_proc; 1178 struct rlimit bsd_rlim; 1179 u_int which; 1180 1181 #ifdef DEBUG 1182 if (ldebug(getrlimit)) 1183 printf(ARGS(getrlimit, "%d, %p"), 1184 args->resource, (void *)args->rlim); 1185 #endif 1186 1187 if (args->resource >= LINUX_RLIM_NLIMITS) 1188 return (EINVAL); 1189 1190 which = linux_to_bsd_resource[args->resource]; 1191 if (which == -1) 1192 return (EINVAL); 1193 1194 PROC_LOCK(p); 1195 lim_rlimit(p, which, &bsd_rlim); 1196 PROC_UNLOCK(p); 1197 1198 rlim.rlim_cur = (l_ulong)bsd_rlim.rlim_cur; 1199 rlim.rlim_max = (l_ulong)bsd_rlim.rlim_max; 1200 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1201 } 1202 #endif /*!__alpha__*/ 1203 1204 int 1205 linux_sched_setscheduler(struct thread *td, 1206 struct linux_sched_setscheduler_args *args) 1207 { 1208 struct sched_setscheduler_args bsd; 1209 1210 #ifdef DEBUG 1211 if (ldebug(sched_setscheduler)) 1212 printf(ARGS(sched_setscheduler, "%d, %d, %p"), 1213 args->pid, args->policy, (const void *)args->param); 1214 #endif 1215 1216 switch (args->policy) { 1217 case LINUX_SCHED_OTHER: 1218 bsd.policy = SCHED_OTHER; 1219 break; 1220 case LINUX_SCHED_FIFO: 1221 bsd.policy = SCHED_FIFO; 1222 break; 1223 case LINUX_SCHED_RR: 1224 bsd.policy = SCHED_RR; 1225 break; 1226 default: 1227 return EINVAL; 1228 } 1229 1230 bsd.pid = args->pid; 1231 bsd.param = (struct sched_param *)args->param; 1232 return sched_setscheduler(td, &bsd); 1233 } 1234 1235 int 1236 linux_sched_getscheduler(struct thread *td, 1237 struct linux_sched_getscheduler_args *args) 1238 { 1239 struct sched_getscheduler_args bsd; 1240 int error; 1241 1242 #ifdef DEBUG 1243 if (ldebug(sched_getscheduler)) 1244 printf(ARGS(sched_getscheduler, "%d"), args->pid); 1245 #endif 1246 1247 bsd.pid = args->pid; 1248 error = sched_getscheduler(td, &bsd); 1249 1250 switch (td->td_retval[0]) { 1251 case SCHED_OTHER: 1252 td->td_retval[0] = LINUX_SCHED_OTHER; 1253 break; 1254 case SCHED_FIFO: 1255 td->td_retval[0] = LINUX_SCHED_FIFO; 1256 break; 1257 case SCHED_RR: 1258 td->td_retval[0] = LINUX_SCHED_RR; 1259 break; 1260 } 1261 1262 return error; 1263 } 1264 1265 int 1266 linux_sched_get_priority_max(struct thread *td, 1267 struct linux_sched_get_priority_max_args *args) 1268 { 1269 struct sched_get_priority_max_args bsd; 1270 1271 #ifdef DEBUG 1272 if (ldebug(sched_get_priority_max)) 1273 printf(ARGS(sched_get_priority_max, "%d"), args->policy); 1274 #endif 1275 1276 switch (args->policy) { 1277 case LINUX_SCHED_OTHER: 1278 bsd.policy = SCHED_OTHER; 1279 break; 1280 case LINUX_SCHED_FIFO: 1281 bsd.policy = SCHED_FIFO; 1282 break; 1283 case LINUX_SCHED_RR: 1284 bsd.policy = SCHED_RR; 1285 break; 1286 default: 1287 return EINVAL; 1288 } 1289 return sched_get_priority_max(td, &bsd); 1290 } 1291 1292 int 1293 linux_sched_get_priority_min(struct thread *td, 1294 struct linux_sched_get_priority_min_args *args) 1295 { 1296 struct sched_get_priority_min_args bsd; 1297 1298 #ifdef DEBUG 1299 if (ldebug(sched_get_priority_min)) 1300 printf(ARGS(sched_get_priority_min, "%d"), args->policy); 1301 #endif 1302 1303 switch (args->policy) { 1304 case LINUX_SCHED_OTHER: 1305 bsd.policy = SCHED_OTHER; 1306 break; 1307 case LINUX_SCHED_FIFO: 1308 bsd.policy = SCHED_FIFO; 1309 break; 1310 case LINUX_SCHED_RR: 1311 bsd.policy = SCHED_RR; 1312 break; 1313 default: 1314 return EINVAL; 1315 } 1316 return sched_get_priority_min(td, &bsd); 1317 } 1318 1319 #define REBOOT_CAD_ON 0x89abcdef 1320 #define REBOOT_CAD_OFF 0 1321 #define REBOOT_HALT 0xcdef0123 1322 1323 int 1324 linux_reboot(struct thread *td, struct linux_reboot_args *args) 1325 { 1326 struct reboot_args bsd_args; 1327 1328 #ifdef DEBUG 1329 if (ldebug(reboot)) 1330 printf(ARGS(reboot, "0x%x"), args->cmd); 1331 #endif 1332 if (args->cmd == REBOOT_CAD_ON || args->cmd == REBOOT_CAD_OFF) 1333 return (0); 1334 bsd_args.opt = (args->cmd == REBOOT_HALT) ? RB_HALT : 0; 1335 return (reboot(td, &bsd_args)); 1336 } 1337 1338 #ifndef __alpha__ 1339 1340 /* 1341 * The FreeBSD native getpid(2), getgid(2) and getuid(2) also modify 1342 * td->td_retval[1] when COMPAT_43 is defined. This 1343 * globbers registers that are assumed to be preserved. The following 1344 * lightweight syscalls fixes this. See also linux_getgid16() and 1345 * linux_getuid16() in linux_uid16.c. 1346 * 1347 * linux_getpid() - MP SAFE 1348 * linux_getgid() - MP SAFE 1349 * linux_getuid() - MP SAFE 1350 */ 1351 1352 int 1353 linux_getpid(struct thread *td, struct linux_getpid_args *args) 1354 { 1355 1356 td->td_retval[0] = td->td_proc->p_pid; 1357 return (0); 1358 } 1359 1360 int 1361 linux_getgid(struct thread *td, struct linux_getgid_args *args) 1362 { 1363 1364 td->td_retval[0] = td->td_ucred->cr_rgid; 1365 return (0); 1366 } 1367 1368 int 1369 linux_getuid(struct thread *td, struct linux_getuid_args *args) 1370 { 1371 1372 td->td_retval[0] = td->td_ucred->cr_ruid; 1373 return (0); 1374 } 1375 1376 #endif /*!__alpha__*/ 1377 1378 int 1379 linux_getsid(struct thread *td, struct linux_getsid_args *args) 1380 { 1381 struct getsid_args bsd; 1382 bsd.pid = args->pid; 1383 return getsid(td, &bsd); 1384 } 1385 1386 int 1387 linux_nosys(struct thread *td, struct nosys_args *ignore) 1388 { 1389 1390 return (ENOSYS); 1391 } 1392 1393 int 1394 linux_getpriority(struct thread *td, struct linux_getpriority_args *args) 1395 { 1396 struct getpriority_args bsd_args; 1397 int error; 1398 1399 bsd_args.which = args->which; 1400 bsd_args.who = args->who; 1401 error = getpriority(td, &bsd_args); 1402 td->td_retval[0] = 20 - td->td_retval[0]; 1403 return error; 1404 } 1405