1 /*- 2 * Copyright (c) 2002 Doug Rabson 3 * Copyright (c) 1994-1995 S�ren Schmidt 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer 11 * in this position and unchanged. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. The name of the author may not be used to endorse or promote products 16 * derived from this software without specific prior written permission 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include "opt_compat.h" 34 #include "opt_mac.h" 35 36 #include <sys/param.h> 37 #include <sys/blist.h> 38 #include <sys/fcntl.h> 39 #if defined(__i386__) 40 #include <sys/imgact_aout.h> 41 #endif 42 #include <sys/jail.h> 43 #include <sys/kernel.h> 44 #include <sys/limits.h> 45 #include <sys/lock.h> 46 #include <sys/mac.h> 47 #include <sys/malloc.h> 48 #include <sys/mman.h> 49 #include <sys/mount.h> 50 #include <sys/mutex.h> 51 #include <sys/namei.h> 52 #include <sys/proc.h> 53 #include <sys/reboot.h> 54 #include <sys/resourcevar.h> 55 #include <sys/signalvar.h> 56 #include <sys/stat.h> 57 #include <sys/syscallsubr.h> 58 #include <sys/sysctl.h> 59 #include <sys/sysproto.h> 60 #include <sys/systm.h> 61 #include <sys/time.h> 62 #include <sys/vmmeter.h> 63 #include <sys/vnode.h> 64 #include <sys/wait.h> 65 66 #include <vm/vm.h> 67 #include <vm/pmap.h> 68 #include <vm/vm_kern.h> 69 #include <vm/vm_map.h> 70 #include <vm/vm_extern.h> 71 #include <vm/vm_object.h> 72 #include <vm/swap_pager.h> 73 74 #include <posix4/sched.h> 75 76 #include <compat/linux/linux_sysproto.h> 77 78 #ifdef COMPAT_LINUX32 79 #include <machine/../linux32/linux.h> 80 #include <machine/../linux32/linux32_proto.h> 81 #else 82 #include <machine/../linux/linux.h> 83 #include <machine/../linux/linux_proto.h> 84 #endif 85 86 #include <compat/linux/linux_mib.h> 87 #include <compat/linux/linux_util.h> 88 89 #ifdef __i386__ 90 #include <machine/cputypes.h> 91 #endif 92 93 #define BSD_TO_LINUX_SIGNAL(sig) \ 94 (((sig) <= LINUX_SIGTBLSZ) ? bsd_to_linux_signal[_SIG_IDX(sig)] : sig) 95 96 static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] = { 97 RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK, 98 RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE, 99 RLIMIT_MEMLOCK, -1 100 }; 101 102 struct l_sysinfo { 103 l_long uptime; /* Seconds since boot */ 104 l_ulong loads[3]; /* 1, 5, and 15 minute load averages */ 105 #define LINUX_SYSINFO_LOADS_SCALE 65536 106 l_ulong totalram; /* Total usable main memory size */ 107 l_ulong freeram; /* Available memory size */ 108 l_ulong sharedram; /* Amount of shared memory */ 109 l_ulong bufferram; /* Memory used by buffers */ 110 l_ulong totalswap; /* Total swap space size */ 111 l_ulong freeswap; /* swap space still available */ 112 l_ushort procs; /* Number of current processes */ 113 l_ulong totalbig; 114 l_ulong freebig; 115 l_uint mem_unit; 116 char _f[6]; /* Pads structure to 64 bytes */ 117 }; 118 int 119 linux_sysinfo(struct thread *td, struct linux_sysinfo_args *args) 120 { 121 struct l_sysinfo sysinfo; 122 vm_object_t object; 123 int i, j; 124 struct timespec ts; 125 126 getnanouptime(&ts); 127 if (ts.tv_nsec != 0) 128 ts.tv_sec++; 129 sysinfo.uptime = ts.tv_sec; 130 131 /* Use the information from the mib to get our load averages */ 132 for (i = 0; i < 3; i++) 133 sysinfo.loads[i] = averunnable.ldavg[i] * 134 LINUX_SYSINFO_LOADS_SCALE / averunnable.fscale; 135 136 sysinfo.totalram = physmem * PAGE_SIZE; 137 sysinfo.freeram = sysinfo.totalram - cnt.v_wire_count * PAGE_SIZE; 138 139 sysinfo.sharedram = 0; 140 mtx_lock(&vm_object_list_mtx); 141 TAILQ_FOREACH(object, &vm_object_list, object_list) 142 if (object->shadow_count > 1) 143 sysinfo.sharedram += object->resident_page_count; 144 mtx_unlock(&vm_object_list_mtx); 145 146 sysinfo.sharedram *= PAGE_SIZE; 147 sysinfo.bufferram = 0; 148 149 swap_pager_status(&i, &j); 150 sysinfo.totalswap= i * PAGE_SIZE; 151 sysinfo.freeswap = (i - j) * PAGE_SIZE; 152 153 sysinfo.procs = nprocs; 154 155 /* The following are only present in newer Linux kernels. */ 156 sysinfo.totalbig = 0; 157 sysinfo.freebig = 0; 158 sysinfo.mem_unit = 1; 159 160 return copyout(&sysinfo, args->info, sizeof(sysinfo)); 161 } 162 163 int 164 linux_alarm(struct thread *td, struct linux_alarm_args *args) 165 { 166 struct itimerval it, old_it; 167 int error; 168 169 #ifdef DEBUG 170 if (ldebug(alarm)) 171 printf(ARGS(alarm, "%u"), args->secs); 172 #endif 173 174 if (args->secs > 100000000) 175 return (EINVAL); 176 177 it.it_value.tv_sec = (long)args->secs; 178 it.it_value.tv_usec = 0; 179 it.it_interval.tv_sec = 0; 180 it.it_interval.tv_usec = 0; 181 error = kern_setitimer(td, ITIMER_REAL, &it, &old_it); 182 if (error) 183 return (error); 184 if (timevalisset(&old_it.it_value)) { 185 if (old_it.it_value.tv_usec != 0) 186 old_it.it_value.tv_sec++; 187 td->td_retval[0] = old_it.it_value.tv_sec; 188 } 189 return (0); 190 } 191 192 int 193 linux_brk(struct thread *td, struct linux_brk_args *args) 194 { 195 struct vmspace *vm = td->td_proc->p_vmspace; 196 vm_offset_t new, old; 197 struct obreak_args /* { 198 char * nsize; 199 } */ tmp; 200 201 #ifdef DEBUG 202 if (ldebug(brk)) 203 printf(ARGS(brk, "%p"), (void *)(uintptr_t)args->dsend); 204 #endif 205 old = (vm_offset_t)vm->vm_daddr + ctob(vm->vm_dsize); 206 new = (vm_offset_t)args->dsend; 207 tmp.nsize = (char *) new; 208 if (((caddr_t)new > vm->vm_daddr) && !obreak(td, &tmp)) 209 td->td_retval[0] = (long)new; 210 else 211 td->td_retval[0] = (long)old; 212 213 return 0; 214 } 215 216 #if defined(__i386__) 217 /* XXX: what about amd64/linux32? */ 218 219 int 220 linux_uselib(struct thread *td, struct linux_uselib_args *args) 221 { 222 struct nameidata ni; 223 struct vnode *vp; 224 struct exec *a_out; 225 struct vattr attr; 226 vm_offset_t vmaddr; 227 unsigned long file_offset; 228 vm_offset_t buffer; 229 unsigned long bss_size; 230 char *library; 231 int error; 232 int locked; 233 234 LCONVPATHEXIST(td, args->library, &library); 235 236 #ifdef DEBUG 237 if (ldebug(uselib)) 238 printf(ARGS(uselib, "%s"), library); 239 #endif 240 241 a_out = NULL; 242 locked = 0; 243 vp = NULL; 244 245 /* 246 * XXX: This code should make use of vn_open(), rather than doing 247 * all this stuff itself. 248 */ 249 NDINIT(&ni, LOOKUP, ISOPEN|FOLLOW|LOCKLEAF, UIO_SYSSPACE, library, td); 250 error = namei(&ni); 251 LFREEPATH(library); 252 if (error) 253 goto cleanup; 254 255 vp = ni.ni_vp; 256 /* 257 * XXX - This looks like a bogus check. A LOCKLEAF namei should not 258 * succeed without returning a vnode. 259 */ 260 if (vp == NULL) { 261 error = ENOEXEC; /* ?? */ 262 goto cleanup; 263 } 264 NDFREE(&ni, NDF_ONLY_PNBUF); 265 266 /* 267 * From here on down, we have a locked vnode that must be unlocked. 268 */ 269 locked++; 270 271 /* Writable? */ 272 if (vp->v_writecount) { 273 error = ETXTBSY; 274 goto cleanup; 275 } 276 277 /* Executable? */ 278 error = VOP_GETATTR(vp, &attr, td->td_ucred, td); 279 if (error) 280 goto cleanup; 281 282 if ((vp->v_mount->mnt_flag & MNT_NOEXEC) || 283 ((attr.va_mode & 0111) == 0) || (attr.va_type != VREG)) { 284 error = ENOEXEC; 285 goto cleanup; 286 } 287 288 /* Sensible size? */ 289 if (attr.va_size == 0) { 290 error = ENOEXEC; 291 goto cleanup; 292 } 293 294 /* Can we access it? */ 295 error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td); 296 if (error) 297 goto cleanup; 298 299 /* 300 * XXX: This should use vn_open() so that it is properly authorized, 301 * and to reduce code redundancy all over the place here. 302 */ 303 #ifdef MAC 304 error = mac_check_vnode_open(td->td_ucred, vp, FREAD); 305 if (error) 306 goto cleanup; 307 #endif 308 error = VOP_OPEN(vp, FREAD, td->td_ucred, td, -1); 309 if (error) 310 goto cleanup; 311 312 /* Pull in executable header into kernel_map */ 313 error = vm_mmap(kernel_map, (vm_offset_t *)&a_out, PAGE_SIZE, 314 VM_PROT_READ, VM_PROT_READ, 0, OBJT_VNODE, vp, 0); 315 /* 316 * Lock no longer needed 317 */ 318 locked = 0; 319 VOP_UNLOCK(vp, 0, td); 320 321 if (error) 322 goto cleanup; 323 324 /* Is it a Linux binary ? */ 325 if (((a_out->a_magic >> 16) & 0xff) != 0x64) { 326 error = ENOEXEC; 327 goto cleanup; 328 } 329 330 /* 331 * While we are here, we should REALLY do some more checks 332 */ 333 334 /* Set file/virtual offset based on a.out variant. */ 335 switch ((int)(a_out->a_magic & 0xffff)) { 336 case 0413: /* ZMAGIC */ 337 file_offset = 1024; 338 break; 339 case 0314: /* QMAGIC */ 340 file_offset = 0; 341 break; 342 default: 343 error = ENOEXEC; 344 goto cleanup; 345 } 346 347 bss_size = round_page(a_out->a_bss); 348 349 /* Check various fields in header for validity/bounds. */ 350 if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) { 351 error = ENOEXEC; 352 goto cleanup; 353 } 354 355 /* text + data can't exceed file size */ 356 if (a_out->a_data + a_out->a_text > attr.va_size) { 357 error = EFAULT; 358 goto cleanup; 359 } 360 361 /* 362 * text/data/bss must not exceed limits 363 * XXX - this is not complete. it should check current usage PLUS 364 * the resources needed by this library. 365 */ 366 PROC_LOCK(td->td_proc); 367 if (a_out->a_text > maxtsiz || 368 a_out->a_data + bss_size > lim_cur(td->td_proc, RLIMIT_DATA)) { 369 PROC_UNLOCK(td->td_proc); 370 error = ENOMEM; 371 goto cleanup; 372 } 373 PROC_UNLOCK(td->td_proc); 374 375 mp_fixme("Unlocked vflags access."); 376 /* prevent more writers */ 377 vp->v_vflag |= VV_TEXT; 378 379 /* 380 * Check if file_offset page aligned. Currently we cannot handle 381 * misalinged file offsets, and so we read in the entire image 382 * (what a waste). 383 */ 384 if (file_offset & PAGE_MASK) { 385 #ifdef DEBUG 386 printf("uselib: Non page aligned binary %lu\n", file_offset); 387 #endif 388 /* Map text+data read/write/execute */ 389 390 /* a_entry is the load address and is page aligned */ 391 vmaddr = trunc_page(a_out->a_entry); 392 393 /* get anon user mapping, read+write+execute */ 394 error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0, 395 &vmaddr, a_out->a_text + a_out->a_data, FALSE, VM_PROT_ALL, 396 VM_PROT_ALL, 0); 397 if (error) 398 goto cleanup; 399 400 /* map file into kernel_map */ 401 error = vm_mmap(kernel_map, &buffer, 402 round_page(a_out->a_text + a_out->a_data + file_offset), 403 VM_PROT_READ, VM_PROT_READ, 0, OBJT_VNODE, vp, 404 trunc_page(file_offset)); 405 if (error) 406 goto cleanup; 407 408 /* copy from kernel VM space to user space */ 409 error = copyout(PTRIN(buffer + file_offset), 410 (void *)vmaddr, a_out->a_text + a_out->a_data); 411 412 /* release temporary kernel space */ 413 vm_map_remove(kernel_map, buffer, buffer + 414 round_page(a_out->a_text + a_out->a_data + file_offset)); 415 416 if (error) 417 goto cleanup; 418 } else { 419 #ifdef DEBUG 420 printf("uselib: Page aligned binary %lu\n", file_offset); 421 #endif 422 /* 423 * for QMAGIC, a_entry is 20 bytes beyond the load address 424 * to skip the executable header 425 */ 426 vmaddr = trunc_page(a_out->a_entry); 427 428 /* 429 * Map it all into the process's space as a single 430 * copy-on-write "data" segment. 431 */ 432 error = vm_mmap(&td->td_proc->p_vmspace->vm_map, &vmaddr, 433 a_out->a_text + a_out->a_data, VM_PROT_ALL, VM_PROT_ALL, 434 MAP_PRIVATE | MAP_FIXED, OBJT_VNODE, vp, file_offset); 435 if (error) 436 goto cleanup; 437 } 438 #ifdef DEBUG 439 printf("mem=%08lx = %08lx %08lx\n", (long)vmaddr, ((long*)vmaddr)[0], 440 ((long*)vmaddr)[1]); 441 #endif 442 if (bss_size != 0) { 443 /* Calculate BSS start address */ 444 vmaddr = trunc_page(a_out->a_entry) + a_out->a_text + 445 a_out->a_data; 446 447 /* allocate some 'anon' space */ 448 error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0, 449 &vmaddr, bss_size, FALSE, VM_PROT_ALL, VM_PROT_ALL, 0); 450 if (error) 451 goto cleanup; 452 } 453 454 cleanup: 455 /* Unlock vnode if needed */ 456 if (locked) 457 VOP_UNLOCK(vp, 0, td); 458 459 /* Release the kernel mapping. */ 460 if (a_out) 461 vm_map_remove(kernel_map, (vm_offset_t)a_out, 462 (vm_offset_t)a_out + PAGE_SIZE); 463 464 return error; 465 } 466 467 #endif /* __i386__ */ 468 469 int 470 linux_select(struct thread *td, struct linux_select_args *args) 471 { 472 l_timeval ltv; 473 struct timeval tv0, tv1, utv, *tvp; 474 int error; 475 476 #ifdef DEBUG 477 if (ldebug(select)) 478 printf(ARGS(select, "%d, %p, %p, %p, %p"), args->nfds, 479 (void *)args->readfds, (void *)args->writefds, 480 (void *)args->exceptfds, (void *)args->timeout); 481 #endif 482 483 /* 484 * Store current time for computation of the amount of 485 * time left. 486 */ 487 if (args->timeout) { 488 if ((error = copyin(args->timeout, <v, sizeof(ltv)))) 489 goto select_out; 490 utv.tv_sec = ltv.tv_sec; 491 utv.tv_usec = ltv.tv_usec; 492 #ifdef DEBUG 493 if (ldebug(select)) 494 printf(LMSG("incoming timeout (%jd/%ld)"), 495 (intmax_t)utv.tv_sec, utv.tv_usec); 496 #endif 497 498 if (itimerfix(&utv)) { 499 /* 500 * The timeval was invalid. Convert it to something 501 * valid that will act as it does under Linux. 502 */ 503 utv.tv_sec += utv.tv_usec / 1000000; 504 utv.tv_usec %= 1000000; 505 if (utv.tv_usec < 0) { 506 utv.tv_sec -= 1; 507 utv.tv_usec += 1000000; 508 } 509 if (utv.tv_sec < 0) 510 timevalclear(&utv); 511 } 512 microtime(&tv0); 513 tvp = &utv; 514 } else 515 tvp = NULL; 516 517 error = kern_select(td, args->nfds, args->readfds, args->writefds, 518 args->exceptfds, tvp); 519 520 #ifdef DEBUG 521 if (ldebug(select)) 522 printf(LMSG("real select returns %d"), error); 523 #endif 524 if (error) { 525 /* 526 * See fs/select.c in the Linux kernel. Without this, 527 * Maelstrom doesn't work. 528 */ 529 if (error == ERESTART) 530 error = EINTR; 531 goto select_out; 532 } 533 534 if (args->timeout) { 535 if (td->td_retval[0]) { 536 /* 537 * Compute how much time was left of the timeout, 538 * by subtracting the current time and the time 539 * before we started the call, and subtracting 540 * that result from the user-supplied value. 541 */ 542 microtime(&tv1); 543 timevalsub(&tv1, &tv0); 544 timevalsub(&utv, &tv1); 545 if (utv.tv_sec < 0) 546 timevalclear(&utv); 547 } else 548 timevalclear(&utv); 549 #ifdef DEBUG 550 if (ldebug(select)) 551 printf(LMSG("outgoing timeout (%jd/%ld)"), 552 (intmax_t)utv.tv_sec, utv.tv_usec); 553 #endif 554 ltv.tv_sec = utv.tv_sec; 555 ltv.tv_usec = utv.tv_usec; 556 if ((error = copyout(<v, args->timeout, sizeof(ltv)))) 557 goto select_out; 558 } 559 560 select_out: 561 #ifdef DEBUG 562 if (ldebug(select)) 563 printf(LMSG("select_out -> %d"), error); 564 #endif 565 return error; 566 } 567 568 int 569 linux_mremap(struct thread *td, struct linux_mremap_args *args) 570 { 571 struct munmap_args /* { 572 void *addr; 573 size_t len; 574 } */ bsd_args; 575 int error = 0; 576 577 #ifdef DEBUG 578 if (ldebug(mremap)) 579 printf(ARGS(mremap, "%p, %08lx, %08lx, %08lx"), 580 (void *)(uintptr_t)args->addr, 581 (unsigned long)args->old_len, 582 (unsigned long)args->new_len, 583 (unsigned long)args->flags); 584 #endif 585 args->new_len = round_page(args->new_len); 586 args->old_len = round_page(args->old_len); 587 588 if (args->new_len > args->old_len) { 589 td->td_retval[0] = 0; 590 return ENOMEM; 591 } 592 593 if (args->new_len < args->old_len) { 594 bsd_args.addr = 595 (caddr_t)((uintptr_t)args->addr + args->new_len); 596 bsd_args.len = args->old_len - args->new_len; 597 error = munmap(td, &bsd_args); 598 } 599 600 td->td_retval[0] = error ? 0 : (uintptr_t)args->addr; 601 return error; 602 } 603 604 #define LINUX_MS_ASYNC 0x0001 605 #define LINUX_MS_INVALIDATE 0x0002 606 #define LINUX_MS_SYNC 0x0004 607 608 int 609 linux_msync(struct thread *td, struct linux_msync_args *args) 610 { 611 struct msync_args bsd_args; 612 613 bsd_args.addr = (caddr_t)(uintptr_t)args->addr; 614 bsd_args.len = (uintptr_t)args->len; 615 bsd_args.flags = args->fl & ~LINUX_MS_SYNC; 616 617 return msync(td, &bsd_args); 618 } 619 620 int 621 linux_time(struct thread *td, struct linux_time_args *args) 622 { 623 struct timeval tv; 624 l_time_t tm; 625 int error; 626 627 #ifdef DEBUG 628 if (ldebug(time)) 629 printf(ARGS(time, "*")); 630 #endif 631 632 microtime(&tv); 633 tm = tv.tv_sec; 634 if (args->tm && (error = copyout(&tm, args->tm, sizeof(tm)))) 635 return error; 636 td->td_retval[0] = tm; 637 return 0; 638 } 639 640 struct l_times_argv { 641 l_long tms_utime; 642 l_long tms_stime; 643 l_long tms_cutime; 644 l_long tms_cstime; 645 }; 646 647 #define CLK_TCK 100 /* Linux uses 100 */ 648 649 #define CONVTCK(r) (r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK)) 650 651 int 652 linux_times(struct thread *td, struct linux_times_args *args) 653 { 654 struct timeval tv, utime, stime, cutime, cstime; 655 struct l_times_argv tms; 656 struct proc *p; 657 int error; 658 659 #ifdef DEBUG 660 if (ldebug(times)) 661 printf(ARGS(times, "*")); 662 #endif 663 664 if (args->buf != NULL) { 665 p = td->td_proc; 666 PROC_LOCK(p); 667 calcru(p, &utime, &stime); 668 calccru(p, &cutime, &cstime); 669 PROC_UNLOCK(p); 670 671 tms.tms_utime = CONVTCK(utime); 672 tms.tms_stime = CONVTCK(stime); 673 674 tms.tms_cutime = CONVTCK(cutime); 675 tms.tms_cstime = CONVTCK(cstime); 676 677 if ((error = copyout(&tms, args->buf, sizeof(tms)))) 678 return error; 679 } 680 681 microuptime(&tv); 682 td->td_retval[0] = (int)CONVTCK(tv); 683 return 0; 684 } 685 686 int 687 linux_newuname(struct thread *td, struct linux_newuname_args *args) 688 { 689 struct l_new_utsname utsname; 690 char osname[LINUX_MAX_UTSNAME]; 691 char osrelease[LINUX_MAX_UTSNAME]; 692 char *p; 693 694 #ifdef DEBUG 695 if (ldebug(newuname)) 696 printf(ARGS(newuname, "*")); 697 #endif 698 699 linux_get_osname(td, osname); 700 linux_get_osrelease(td, osrelease); 701 702 bzero(&utsname, sizeof(utsname)); 703 strlcpy(utsname.sysname, osname, LINUX_MAX_UTSNAME); 704 getcredhostname(td->td_ucred, utsname.nodename, LINUX_MAX_UTSNAME); 705 strlcpy(utsname.release, osrelease, LINUX_MAX_UTSNAME); 706 strlcpy(utsname.version, version, LINUX_MAX_UTSNAME); 707 for (p = utsname.version; *p != '\0'; ++p) 708 if (*p == '\n') { 709 *p = '\0'; 710 break; 711 } 712 #ifdef __i386__ 713 { 714 const char *class; 715 switch (cpu_class) { 716 case CPUCLASS_686: 717 class = "i686"; 718 break; 719 case CPUCLASS_586: 720 class = "i586"; 721 break; 722 case CPUCLASS_486: 723 class = "i486"; 724 break; 725 default: 726 class = "i386"; 727 } 728 strlcpy(utsname.machine, class, LINUX_MAX_UTSNAME); 729 } 730 #elif defined(__amd64__) /* XXX: Linux can change 'personality'. */ 731 #ifdef COMPAT_LINUX32 732 strlcpy(utsname.machine, "i686", LINUX_MAX_UTSNAME); 733 #else 734 strlcpy(utsname.machine, "x86_64", LINUX_MAX_UTSNAME); 735 #endif /* COMPAT_LINUX32 */ 736 #else /* something other than i386 or amd64 - assume we and Linux agree */ 737 strlcpy(utsname.machine, machine, LINUX_MAX_UTSNAME); 738 #endif /* __i386__ */ 739 strlcpy(utsname.domainname, domainname, LINUX_MAX_UTSNAME); 740 741 return (copyout(&utsname, args->buf, sizeof(utsname))); 742 } 743 744 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 745 struct l_utimbuf { 746 l_time_t l_actime; 747 l_time_t l_modtime; 748 }; 749 750 int 751 linux_utime(struct thread *td, struct linux_utime_args *args) 752 { 753 struct timeval tv[2], *tvp; 754 struct l_utimbuf lut; 755 char *fname; 756 int error; 757 758 LCONVPATHEXIST(td, args->fname, &fname); 759 760 #ifdef DEBUG 761 if (ldebug(utime)) 762 printf(ARGS(utime, "%s, *"), fname); 763 #endif 764 765 if (args->times) { 766 if ((error = copyin(args->times, &lut, sizeof lut))) { 767 LFREEPATH(fname); 768 return error; 769 } 770 tv[0].tv_sec = lut.l_actime; 771 tv[0].tv_usec = 0; 772 tv[1].tv_sec = lut.l_modtime; 773 tv[1].tv_usec = 0; 774 tvp = tv; 775 } else 776 tvp = NULL; 777 778 error = kern_utimes(td, fname, UIO_SYSSPACE, tvp, UIO_SYSSPACE); 779 LFREEPATH(fname); 780 return (error); 781 } 782 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 783 784 #define __WCLONE 0x80000000 785 786 int 787 linux_waitpid(struct thread *td, struct linux_waitpid_args *args) 788 { 789 int error, options, tmpstat; 790 791 #ifdef DEBUG 792 if (ldebug(waitpid)) 793 printf(ARGS(waitpid, "%d, %p, %d"), 794 args->pid, (void *)args->status, args->options); 795 #endif 796 797 options = (args->options & (WNOHANG | WUNTRACED)); 798 /* WLINUXCLONE should be equal to __WCLONE, but we make sure */ 799 if (args->options & __WCLONE) 800 options |= WLINUXCLONE; 801 802 error = kern_wait(td, args->pid, &tmpstat, options, NULL); 803 if (error) 804 return error; 805 806 if (args->status) { 807 tmpstat &= 0xffff; 808 if (WIFSIGNALED(tmpstat)) 809 tmpstat = (tmpstat & 0xffffff80) | 810 BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat)); 811 else if (WIFSTOPPED(tmpstat)) 812 tmpstat = (tmpstat & 0xffff00ff) | 813 (BSD_TO_LINUX_SIGNAL(WSTOPSIG(tmpstat)) << 8); 814 return copyout(&tmpstat, args->status, sizeof(int)); 815 } 816 817 return 0; 818 } 819 820 int 821 linux_wait4(struct thread *td, struct linux_wait4_args *args) 822 { 823 int error, options, tmpstat; 824 struct rusage ru, *rup; 825 struct proc *p; 826 827 #ifdef DEBUG 828 if (ldebug(wait4)) 829 printf(ARGS(wait4, "%d, %p, %d, %p"), 830 args->pid, (void *)args->status, args->options, 831 (void *)args->rusage); 832 #endif 833 834 options = (args->options & (WNOHANG | WUNTRACED)); 835 /* WLINUXCLONE should be equal to __WCLONE, but we make sure */ 836 if (args->options & __WCLONE) 837 options |= WLINUXCLONE; 838 839 if (args->rusage != NULL) 840 rup = &ru; 841 else 842 rup = NULL; 843 error = kern_wait(td, args->pid, &tmpstat, options, rup); 844 if (error) 845 return error; 846 847 p = td->td_proc; 848 PROC_LOCK(p); 849 sigqueue_delete(&p->p_sigqueue, SIGCHLD); 850 PROC_UNLOCK(p); 851 852 if (args->status) { 853 tmpstat &= 0xffff; 854 if (WIFSIGNALED(tmpstat)) 855 tmpstat = (tmpstat & 0xffffff80) | 856 BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat)); 857 else if (WIFSTOPPED(tmpstat)) 858 tmpstat = (tmpstat & 0xffff00ff) | 859 (BSD_TO_LINUX_SIGNAL(WSTOPSIG(tmpstat)) << 8); 860 error = copyout(&tmpstat, args->status, sizeof(int)); 861 } 862 if (args->rusage != NULL && error == 0) 863 error = copyout(&ru, args->rusage, sizeof(ru)); 864 865 return (error); 866 } 867 868 int 869 linux_mknod(struct thread *td, struct linux_mknod_args *args) 870 { 871 char *path; 872 int error; 873 874 LCONVPATHCREAT(td, args->path, &path); 875 876 #ifdef DEBUG 877 if (ldebug(mknod)) 878 printf(ARGS(mknod, "%s, %d, %d"), path, args->mode, args->dev); 879 #endif 880 881 if (args->mode & S_IFIFO) 882 error = kern_mkfifo(td, path, UIO_SYSSPACE, args->mode); 883 else 884 error = kern_mknod(td, path, UIO_SYSSPACE, args->mode, 885 args->dev); 886 LFREEPATH(path); 887 return (error); 888 } 889 890 /* 891 * UGH! This is just about the dumbest idea I've ever heard!! 892 */ 893 int 894 linux_personality(struct thread *td, struct linux_personality_args *args) 895 { 896 #ifdef DEBUG 897 if (ldebug(personality)) 898 printf(ARGS(personality, "%lu"), (unsigned long)args->per); 899 #endif 900 if (args->per != 0) 901 return EINVAL; 902 903 /* Yes Jim, it's still a Linux... */ 904 td->td_retval[0] = 0; 905 return 0; 906 } 907 908 struct l_itimerval { 909 l_timeval it_interval; 910 l_timeval it_value; 911 }; 912 913 #define B2L_ITIMERVAL(bip, lip) \ 914 (bip)->it_interval.tv_sec = (lip)->it_interval.tv_sec; \ 915 (bip)->it_interval.tv_usec = (lip)->it_interval.tv_usec; \ 916 (bip)->it_value.tv_sec = (lip)->it_value.tv_sec; \ 917 (bip)->it_value.tv_usec = (lip)->it_value.tv_usec; 918 919 int 920 linux_setitimer(struct thread *td, struct linux_setitimer_args *uap) 921 { 922 int error; 923 struct l_itimerval ls; 924 struct itimerval aitv, oitv; 925 926 #ifdef DEBUG 927 if (ldebug(setitimer)) 928 printf(ARGS(setitimer, "%p, %p"), 929 (void *)uap->itv, (void *)uap->oitv); 930 #endif 931 932 if (uap->itv == NULL) { 933 uap->itv = uap->oitv; 934 return (linux_getitimer(td, (struct linux_getitimer_args *)uap)); 935 } 936 937 error = copyin(uap->itv, &ls, sizeof(ls)); 938 if (error != 0) 939 return (error); 940 B2L_ITIMERVAL(&aitv, &ls); 941 #ifdef DEBUG 942 if (ldebug(setitimer)) { 943 printf("setitimer: value: sec: %jd, usec: %ld\n", 944 (intmax_t)aitv.it_value.tv_sec, aitv.it_value.tv_usec); 945 printf("setitimer: interval: sec: %jd, usec: %ld\n", 946 (intmax_t)aitv.it_interval.tv_sec, aitv.it_interval.tv_usec); 947 } 948 #endif 949 error = kern_setitimer(td, uap->which, &aitv, &oitv); 950 if (error != 0 || uap->oitv == NULL) 951 return (error); 952 B2L_ITIMERVAL(&ls, &oitv); 953 954 return (copyout(&ls, uap->oitv, sizeof(ls))); 955 } 956 957 int 958 linux_getitimer(struct thread *td, struct linux_getitimer_args *uap) 959 { 960 int error; 961 struct l_itimerval ls; 962 struct itimerval aitv; 963 964 #ifdef DEBUG 965 if (ldebug(getitimer)) 966 printf(ARGS(getitimer, "%p"), (void *)uap->itv); 967 #endif 968 error = kern_getitimer(td, uap->which, &aitv); 969 if (error != 0) 970 return (error); 971 B2L_ITIMERVAL(&ls, &aitv); 972 return (copyout(&ls, uap->itv, sizeof(ls))); 973 } 974 975 int 976 linux_nice(struct thread *td, struct linux_nice_args *args) 977 { 978 struct setpriority_args bsd_args; 979 980 bsd_args.which = PRIO_PROCESS; 981 bsd_args.who = 0; /* current process */ 982 bsd_args.prio = args->inc; 983 return setpriority(td, &bsd_args); 984 } 985 986 int 987 linux_setgroups(struct thread *td, struct linux_setgroups_args *args) 988 { 989 struct ucred *newcred, *oldcred; 990 l_gid_t linux_gidset[NGROUPS]; 991 gid_t *bsd_gidset; 992 int ngrp, error; 993 struct proc *p; 994 995 ngrp = args->gidsetsize; 996 if (ngrp < 0 || ngrp >= NGROUPS) 997 return (EINVAL); 998 error = copyin(args->grouplist, linux_gidset, ngrp * sizeof(l_gid_t)); 999 if (error) 1000 return (error); 1001 newcred = crget(); 1002 p = td->td_proc; 1003 PROC_LOCK(p); 1004 oldcred = p->p_ucred; 1005 1006 /* 1007 * cr_groups[0] holds egid. Setting the whole set from 1008 * the supplied set will cause egid to be changed too. 1009 * Keep cr_groups[0] unchanged to prevent that. 1010 */ 1011 1012 if ((error = suser_cred(oldcred, SUSER_ALLOWJAIL)) != 0) { 1013 PROC_UNLOCK(p); 1014 crfree(newcred); 1015 return (error); 1016 } 1017 1018 crcopy(newcred, oldcred); 1019 if (ngrp > 0) { 1020 newcred->cr_ngroups = ngrp + 1; 1021 1022 bsd_gidset = newcred->cr_groups; 1023 ngrp--; 1024 while (ngrp >= 0) { 1025 bsd_gidset[ngrp + 1] = linux_gidset[ngrp]; 1026 ngrp--; 1027 } 1028 } 1029 else 1030 newcred->cr_ngroups = 1; 1031 1032 setsugid(p); 1033 p->p_ucred = newcred; 1034 PROC_UNLOCK(p); 1035 crfree(oldcred); 1036 return (0); 1037 } 1038 1039 int 1040 linux_getgroups(struct thread *td, struct linux_getgroups_args *args) 1041 { 1042 struct ucred *cred; 1043 l_gid_t linux_gidset[NGROUPS]; 1044 gid_t *bsd_gidset; 1045 int bsd_gidsetsz, ngrp, error; 1046 1047 cred = td->td_ucred; 1048 bsd_gidset = cred->cr_groups; 1049 bsd_gidsetsz = cred->cr_ngroups - 1; 1050 1051 /* 1052 * cr_groups[0] holds egid. Returning the whole set 1053 * here will cause a duplicate. Exclude cr_groups[0] 1054 * to prevent that. 1055 */ 1056 1057 if ((ngrp = args->gidsetsize) == 0) { 1058 td->td_retval[0] = bsd_gidsetsz; 1059 return (0); 1060 } 1061 1062 if (ngrp < bsd_gidsetsz) 1063 return (EINVAL); 1064 1065 ngrp = 0; 1066 while (ngrp < bsd_gidsetsz) { 1067 linux_gidset[ngrp] = bsd_gidset[ngrp + 1]; 1068 ngrp++; 1069 } 1070 1071 if ((error = copyout(linux_gidset, args->grouplist, 1072 ngrp * sizeof(l_gid_t)))) 1073 return (error); 1074 1075 td->td_retval[0] = ngrp; 1076 return (0); 1077 } 1078 1079 int 1080 linux_setrlimit(struct thread *td, struct linux_setrlimit_args *args) 1081 { 1082 struct rlimit bsd_rlim; 1083 struct l_rlimit rlim; 1084 u_int which; 1085 int error; 1086 1087 #ifdef DEBUG 1088 if (ldebug(setrlimit)) 1089 printf(ARGS(setrlimit, "%d, %p"), 1090 args->resource, (void *)args->rlim); 1091 #endif 1092 1093 if (args->resource >= LINUX_RLIM_NLIMITS) 1094 return (EINVAL); 1095 1096 which = linux_to_bsd_resource[args->resource]; 1097 if (which == -1) 1098 return (EINVAL); 1099 1100 error = copyin(args->rlim, &rlim, sizeof(rlim)); 1101 if (error) 1102 return (error); 1103 1104 bsd_rlim.rlim_cur = (rlim_t)rlim.rlim_cur; 1105 bsd_rlim.rlim_max = (rlim_t)rlim.rlim_max; 1106 return (kern_setrlimit(td, which, &bsd_rlim)); 1107 } 1108 1109 int 1110 linux_old_getrlimit(struct thread *td, struct linux_old_getrlimit_args *args) 1111 { 1112 struct l_rlimit rlim; 1113 struct proc *p = td->td_proc; 1114 struct rlimit bsd_rlim; 1115 u_int which; 1116 1117 #ifdef DEBUG 1118 if (ldebug(old_getrlimit)) 1119 printf(ARGS(old_getrlimit, "%d, %p"), 1120 args->resource, (void *)args->rlim); 1121 #endif 1122 1123 if (args->resource >= LINUX_RLIM_NLIMITS) 1124 return (EINVAL); 1125 1126 which = linux_to_bsd_resource[args->resource]; 1127 if (which == -1) 1128 return (EINVAL); 1129 1130 PROC_LOCK(p); 1131 lim_rlimit(p, which, &bsd_rlim); 1132 PROC_UNLOCK(p); 1133 1134 #ifdef COMPAT_LINUX32 1135 rlim.rlim_cur = (unsigned int)bsd_rlim.rlim_cur; 1136 if (rlim.rlim_cur == UINT_MAX) 1137 rlim.rlim_cur = INT_MAX; 1138 rlim.rlim_max = (unsigned int)bsd_rlim.rlim_max; 1139 if (rlim.rlim_max == UINT_MAX) 1140 rlim.rlim_max = INT_MAX; 1141 #else 1142 rlim.rlim_cur = (unsigned long)bsd_rlim.rlim_cur; 1143 if (rlim.rlim_cur == ULONG_MAX) 1144 rlim.rlim_cur = LONG_MAX; 1145 rlim.rlim_max = (unsigned long)bsd_rlim.rlim_max; 1146 if (rlim.rlim_max == ULONG_MAX) 1147 rlim.rlim_max = LONG_MAX; 1148 #endif 1149 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1150 } 1151 1152 int 1153 linux_getrlimit(struct thread *td, struct linux_getrlimit_args *args) 1154 { 1155 struct l_rlimit rlim; 1156 struct proc *p = td->td_proc; 1157 struct rlimit bsd_rlim; 1158 u_int which; 1159 1160 #ifdef DEBUG 1161 if (ldebug(getrlimit)) 1162 printf(ARGS(getrlimit, "%d, %p"), 1163 args->resource, (void *)args->rlim); 1164 #endif 1165 1166 if (args->resource >= LINUX_RLIM_NLIMITS) 1167 return (EINVAL); 1168 1169 which = linux_to_bsd_resource[args->resource]; 1170 if (which == -1) 1171 return (EINVAL); 1172 1173 PROC_LOCK(p); 1174 lim_rlimit(p, which, &bsd_rlim); 1175 PROC_UNLOCK(p); 1176 1177 rlim.rlim_cur = (l_ulong)bsd_rlim.rlim_cur; 1178 rlim.rlim_max = (l_ulong)bsd_rlim.rlim_max; 1179 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1180 } 1181 1182 int 1183 linux_sched_setscheduler(struct thread *td, 1184 struct linux_sched_setscheduler_args *args) 1185 { 1186 struct sched_setscheduler_args bsd; 1187 1188 #ifdef DEBUG 1189 if (ldebug(sched_setscheduler)) 1190 printf(ARGS(sched_setscheduler, "%d, %d, %p"), 1191 args->pid, args->policy, (const void *)args->param); 1192 #endif 1193 1194 switch (args->policy) { 1195 case LINUX_SCHED_OTHER: 1196 bsd.policy = SCHED_OTHER; 1197 break; 1198 case LINUX_SCHED_FIFO: 1199 bsd.policy = SCHED_FIFO; 1200 break; 1201 case LINUX_SCHED_RR: 1202 bsd.policy = SCHED_RR; 1203 break; 1204 default: 1205 return EINVAL; 1206 } 1207 1208 bsd.pid = args->pid; 1209 bsd.param = (struct sched_param *)args->param; 1210 return sched_setscheduler(td, &bsd); 1211 } 1212 1213 int 1214 linux_sched_getscheduler(struct thread *td, 1215 struct linux_sched_getscheduler_args *args) 1216 { 1217 struct sched_getscheduler_args bsd; 1218 int error; 1219 1220 #ifdef DEBUG 1221 if (ldebug(sched_getscheduler)) 1222 printf(ARGS(sched_getscheduler, "%d"), args->pid); 1223 #endif 1224 1225 bsd.pid = args->pid; 1226 error = sched_getscheduler(td, &bsd); 1227 1228 switch (td->td_retval[0]) { 1229 case SCHED_OTHER: 1230 td->td_retval[0] = LINUX_SCHED_OTHER; 1231 break; 1232 case SCHED_FIFO: 1233 td->td_retval[0] = LINUX_SCHED_FIFO; 1234 break; 1235 case SCHED_RR: 1236 td->td_retval[0] = LINUX_SCHED_RR; 1237 break; 1238 } 1239 1240 return error; 1241 } 1242 1243 int 1244 linux_sched_get_priority_max(struct thread *td, 1245 struct linux_sched_get_priority_max_args *args) 1246 { 1247 struct sched_get_priority_max_args bsd; 1248 1249 #ifdef DEBUG 1250 if (ldebug(sched_get_priority_max)) 1251 printf(ARGS(sched_get_priority_max, "%d"), args->policy); 1252 #endif 1253 1254 switch (args->policy) { 1255 case LINUX_SCHED_OTHER: 1256 bsd.policy = SCHED_OTHER; 1257 break; 1258 case LINUX_SCHED_FIFO: 1259 bsd.policy = SCHED_FIFO; 1260 break; 1261 case LINUX_SCHED_RR: 1262 bsd.policy = SCHED_RR; 1263 break; 1264 default: 1265 return EINVAL; 1266 } 1267 return sched_get_priority_max(td, &bsd); 1268 } 1269 1270 int 1271 linux_sched_get_priority_min(struct thread *td, 1272 struct linux_sched_get_priority_min_args *args) 1273 { 1274 struct sched_get_priority_min_args bsd; 1275 1276 #ifdef DEBUG 1277 if (ldebug(sched_get_priority_min)) 1278 printf(ARGS(sched_get_priority_min, "%d"), args->policy); 1279 #endif 1280 1281 switch (args->policy) { 1282 case LINUX_SCHED_OTHER: 1283 bsd.policy = SCHED_OTHER; 1284 break; 1285 case LINUX_SCHED_FIFO: 1286 bsd.policy = SCHED_FIFO; 1287 break; 1288 case LINUX_SCHED_RR: 1289 bsd.policy = SCHED_RR; 1290 break; 1291 default: 1292 return EINVAL; 1293 } 1294 return sched_get_priority_min(td, &bsd); 1295 } 1296 1297 #define REBOOT_CAD_ON 0x89abcdef 1298 #define REBOOT_CAD_OFF 0 1299 #define REBOOT_HALT 0xcdef0123 1300 1301 int 1302 linux_reboot(struct thread *td, struct linux_reboot_args *args) 1303 { 1304 struct reboot_args bsd_args; 1305 1306 #ifdef DEBUG 1307 if (ldebug(reboot)) 1308 printf(ARGS(reboot, "0x%x"), args->cmd); 1309 #endif 1310 if (args->cmd == REBOOT_CAD_ON || args->cmd == REBOOT_CAD_OFF) 1311 return (0); 1312 bsd_args.opt = (args->cmd == REBOOT_HALT) ? RB_HALT : 0; 1313 return (reboot(td, &bsd_args)); 1314 } 1315 1316 1317 /* 1318 * The FreeBSD native getpid(2), getgid(2) and getuid(2) also modify 1319 * td->td_retval[1] when COMPAT_43 is defined. This 1320 * globbers registers that are assumed to be preserved. The following 1321 * lightweight syscalls fixes this. See also linux_getgid16() and 1322 * linux_getuid16() in linux_uid16.c. 1323 * 1324 * linux_getpid() - MP SAFE 1325 * linux_getgid() - MP SAFE 1326 * linux_getuid() - MP SAFE 1327 */ 1328 1329 int 1330 linux_getpid(struct thread *td, struct linux_getpid_args *args) 1331 { 1332 1333 td->td_retval[0] = td->td_proc->p_pid; 1334 return (0); 1335 } 1336 1337 int 1338 linux_getgid(struct thread *td, struct linux_getgid_args *args) 1339 { 1340 1341 td->td_retval[0] = td->td_ucred->cr_rgid; 1342 return (0); 1343 } 1344 1345 int 1346 linux_getuid(struct thread *td, struct linux_getuid_args *args) 1347 { 1348 1349 td->td_retval[0] = td->td_ucred->cr_ruid; 1350 return (0); 1351 } 1352 1353 1354 int 1355 linux_getsid(struct thread *td, struct linux_getsid_args *args) 1356 { 1357 struct getsid_args bsd; 1358 bsd.pid = args->pid; 1359 return getsid(td, &bsd); 1360 } 1361 1362 int 1363 linux_nosys(struct thread *td, struct nosys_args *ignore) 1364 { 1365 1366 return (ENOSYS); 1367 } 1368 1369 int 1370 linux_getpriority(struct thread *td, struct linux_getpriority_args *args) 1371 { 1372 struct getpriority_args bsd_args; 1373 int error; 1374 1375 bsd_args.which = args->which; 1376 bsd_args.who = args->who; 1377 error = getpriority(td, &bsd_args); 1378 td->td_retval[0] = 20 - td->td_retval[0]; 1379 return error; 1380 } 1381 1382 int 1383 linux_sethostname(struct thread *td, struct linux_sethostname_args *args) 1384 { 1385 int name[2]; 1386 int error; 1387 1388 name[0] = CTL_KERN; 1389 name[1] = KERN_HOSTNAME; 1390 if ((error = suser_cred(td->td_ucred, SUSER_ALLOWJAIL))) 1391 return (error); 1392 return (userland_sysctl(td, name, 2, 0, 0, 0, args->hostname, 1393 args->len, 0, 0)); 1394 } 1395 1396