1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2002 Doug Rabson 5 * Copyright (c) 1994-1995 Søren Schmidt 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer 13 * in this position and unchanged. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. The name of the author may not be used to endorse or promote products 18 * derived from this software without specific prior written permission 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 23 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 29 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include "opt_compat.h" 36 37 #include <sys/param.h> 38 #include <sys/blist.h> 39 #include <sys/fcntl.h> 40 #if defined(__i386__) 41 #include <sys/imgact_aout.h> 42 #endif 43 #include <sys/jail.h> 44 #include <sys/kernel.h> 45 #include <sys/limits.h> 46 #include <sys/lock.h> 47 #include <sys/malloc.h> 48 #include <sys/mman.h> 49 #include <sys/mount.h> 50 #include <sys/mutex.h> 51 #include <sys/namei.h> 52 #include <sys/priv.h> 53 #include <sys/proc.h> 54 #include <sys/reboot.h> 55 #include <sys/racct.h> 56 #include <sys/random.h> 57 #include <sys/resourcevar.h> 58 #include <sys/sched.h> 59 #include <sys/sdt.h> 60 #include <sys/signalvar.h> 61 #include <sys/stat.h> 62 #include <sys/syscallsubr.h> 63 #include <sys/sysctl.h> 64 #include <sys/sysproto.h> 65 #include <sys/systm.h> 66 #include <sys/time.h> 67 #include <sys/vmmeter.h> 68 #include <sys/vnode.h> 69 #include <sys/wait.h> 70 #include <sys/cpuset.h> 71 #include <sys/uio.h> 72 73 #include <security/mac/mac_framework.h> 74 75 #include <vm/vm.h> 76 #include <vm/pmap.h> 77 #include <vm/vm_kern.h> 78 #include <vm/vm_map.h> 79 #include <vm/vm_extern.h> 80 #include <vm/vm_object.h> 81 #include <vm/swap_pager.h> 82 83 #ifdef COMPAT_LINUX32 84 #include <machine/../linux32/linux.h> 85 #include <machine/../linux32/linux32_proto.h> 86 #else 87 #include <machine/../linux/linux.h> 88 #include <machine/../linux/linux_proto.h> 89 #endif 90 91 #include <compat/linux/linux_dtrace.h> 92 #include <compat/linux/linux_file.h> 93 #include <compat/linux/linux_mib.h> 94 #include <compat/linux/linux_signal.h> 95 #include <compat/linux/linux_timer.h> 96 #include <compat/linux/linux_util.h> 97 #include <compat/linux/linux_sysproto.h> 98 #include <compat/linux/linux_emul.h> 99 #include <compat/linux/linux_misc.h> 100 101 /** 102 * Special DTrace provider for the linuxulator. 103 * 104 * In this file we define the provider for the entire linuxulator. All 105 * modules (= files of the linuxulator) use it. 106 * 107 * We define a different name depending on the emulated bitsize, see 108 * ../../<ARCH>/linux{,32}/linux.h, e.g.: 109 * native bitsize = linuxulator 110 * amd64, 32bit emulation = linuxulator32 111 */ 112 LIN_SDT_PROVIDER_DEFINE(LINUX_DTRACE); 113 114 int stclohz; /* Statistics clock frequency */ 115 116 static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] = { 117 RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK, 118 RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE, 119 RLIMIT_MEMLOCK, RLIMIT_AS 120 }; 121 122 struct l_sysinfo { 123 l_long uptime; /* Seconds since boot */ 124 l_ulong loads[3]; /* 1, 5, and 15 minute load averages */ 125 #define LINUX_SYSINFO_LOADS_SCALE 65536 126 l_ulong totalram; /* Total usable main memory size */ 127 l_ulong freeram; /* Available memory size */ 128 l_ulong sharedram; /* Amount of shared memory */ 129 l_ulong bufferram; /* Memory used by buffers */ 130 l_ulong totalswap; /* Total swap space size */ 131 l_ulong freeswap; /* swap space still available */ 132 l_ushort procs; /* Number of current processes */ 133 l_ushort pads; 134 l_ulong totalbig; 135 l_ulong freebig; 136 l_uint mem_unit; 137 char _f[20-2*sizeof(l_long)-sizeof(l_int)]; /* padding */ 138 }; 139 140 struct l_pselect6arg { 141 l_uintptr_t ss; 142 l_size_t ss_len; 143 }; 144 145 static int linux_utimensat_nsec_valid(l_long); 146 147 148 int 149 linux_sysinfo(struct thread *td, struct linux_sysinfo_args *args) 150 { 151 struct l_sysinfo sysinfo; 152 vm_object_t object; 153 int i, j; 154 struct timespec ts; 155 156 bzero(&sysinfo, sizeof(sysinfo)); 157 getnanouptime(&ts); 158 if (ts.tv_nsec != 0) 159 ts.tv_sec++; 160 sysinfo.uptime = ts.tv_sec; 161 162 /* Use the information from the mib to get our load averages */ 163 for (i = 0; i < 3; i++) 164 sysinfo.loads[i] = averunnable.ldavg[i] * 165 LINUX_SYSINFO_LOADS_SCALE / averunnable.fscale; 166 167 sysinfo.totalram = physmem * PAGE_SIZE; 168 sysinfo.freeram = sysinfo.totalram - vm_wire_count() * PAGE_SIZE; 169 170 sysinfo.sharedram = 0; 171 mtx_lock(&vm_object_list_mtx); 172 TAILQ_FOREACH(object, &vm_object_list, object_list) 173 if (object->shadow_count > 1) 174 sysinfo.sharedram += object->resident_page_count; 175 mtx_unlock(&vm_object_list_mtx); 176 177 sysinfo.sharedram *= PAGE_SIZE; 178 sysinfo.bufferram = 0; 179 180 swap_pager_status(&i, &j); 181 sysinfo.totalswap = i * PAGE_SIZE; 182 sysinfo.freeswap = (i - j) * PAGE_SIZE; 183 184 sysinfo.procs = nprocs; 185 186 /* The following are only present in newer Linux kernels. */ 187 sysinfo.totalbig = 0; 188 sysinfo.freebig = 0; 189 sysinfo.mem_unit = 1; 190 191 return (copyout(&sysinfo, args->info, sizeof(sysinfo))); 192 } 193 194 #ifdef LINUX_LEGACY_SYSCALLS 195 int 196 linux_alarm(struct thread *td, struct linux_alarm_args *args) 197 { 198 struct itimerval it, old_it; 199 u_int secs; 200 int error; 201 202 #ifdef DEBUG 203 if (ldebug(alarm)) 204 printf(ARGS(alarm, "%u"), args->secs); 205 #endif 206 secs = args->secs; 207 /* 208 * Linux alarm() is always successful. Limit secs to INT32_MAX / 2 209 * to match kern_setitimer()'s limit to avoid error from it. 210 * 211 * XXX. Linux limit secs to INT_MAX on 32 and does not limit on 64-bit 212 * platforms. 213 */ 214 if (secs > INT32_MAX / 2) 215 secs = INT32_MAX / 2; 216 217 it.it_value.tv_sec = secs; 218 it.it_value.tv_usec = 0; 219 timevalclear(&it.it_interval); 220 error = kern_setitimer(td, ITIMER_REAL, &it, &old_it); 221 KASSERT(error == 0, ("kern_setitimer returns %d", error)); 222 223 if ((old_it.it_value.tv_sec == 0 && old_it.it_value.tv_usec > 0) || 224 old_it.it_value.tv_usec >= 500000) 225 old_it.it_value.tv_sec++; 226 td->td_retval[0] = old_it.it_value.tv_sec; 227 return (0); 228 } 229 #endif 230 231 int 232 linux_brk(struct thread *td, struct linux_brk_args *args) 233 { 234 struct vmspace *vm = td->td_proc->p_vmspace; 235 vm_offset_t new, old; 236 struct break_args /* { 237 char * nsize; 238 } */ tmp; 239 240 #ifdef DEBUG 241 if (ldebug(brk)) 242 printf(ARGS(brk, "%p"), (void *)(uintptr_t)args->dsend); 243 #endif 244 old = (vm_offset_t)vm->vm_daddr + ctob(vm->vm_dsize); 245 new = (vm_offset_t)args->dsend; 246 tmp.nsize = (char *)new; 247 if (((caddr_t)new > vm->vm_daddr) && !sys_break(td, &tmp)) 248 td->td_retval[0] = (long)new; 249 else 250 td->td_retval[0] = (long)old; 251 252 return (0); 253 } 254 255 #if defined(__i386__) 256 /* XXX: what about amd64/linux32? */ 257 258 int 259 linux_uselib(struct thread *td, struct linux_uselib_args *args) 260 { 261 struct nameidata ni; 262 struct vnode *vp; 263 struct exec *a_out; 264 struct vattr attr; 265 vm_offset_t vmaddr; 266 unsigned long file_offset; 267 unsigned long bss_size; 268 char *library; 269 ssize_t aresid; 270 int error, locked, writecount; 271 272 LCONVPATHEXIST(td, args->library, &library); 273 274 #ifdef DEBUG 275 if (ldebug(uselib)) 276 printf(ARGS(uselib, "%s"), library); 277 #endif 278 279 a_out = NULL; 280 locked = 0; 281 vp = NULL; 282 283 NDINIT(&ni, LOOKUP, ISOPEN | FOLLOW | LOCKLEAF | AUDITVNODE1, 284 UIO_SYSSPACE, library, td); 285 error = namei(&ni); 286 LFREEPATH(library); 287 if (error) 288 goto cleanup; 289 290 vp = ni.ni_vp; 291 NDFREE(&ni, NDF_ONLY_PNBUF); 292 293 /* 294 * From here on down, we have a locked vnode that must be unlocked. 295 * XXX: The code below largely duplicates exec_check_permissions(). 296 */ 297 locked = 1; 298 299 /* Writable? */ 300 error = VOP_GET_WRITECOUNT(vp, &writecount); 301 if (error != 0) 302 goto cleanup; 303 if (writecount != 0) { 304 error = ETXTBSY; 305 goto cleanup; 306 } 307 308 /* Executable? */ 309 error = VOP_GETATTR(vp, &attr, td->td_ucred); 310 if (error) 311 goto cleanup; 312 313 if ((vp->v_mount->mnt_flag & MNT_NOEXEC) || 314 ((attr.va_mode & 0111) == 0) || (attr.va_type != VREG)) { 315 /* EACCESS is what exec(2) returns. */ 316 error = ENOEXEC; 317 goto cleanup; 318 } 319 320 /* Sensible size? */ 321 if (attr.va_size == 0) { 322 error = ENOEXEC; 323 goto cleanup; 324 } 325 326 /* Can we access it? */ 327 error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td); 328 if (error) 329 goto cleanup; 330 331 /* 332 * XXX: This should use vn_open() so that it is properly authorized, 333 * and to reduce code redundancy all over the place here. 334 * XXX: Not really, it duplicates far more of exec_check_permissions() 335 * than vn_open(). 336 */ 337 #ifdef MAC 338 error = mac_vnode_check_open(td->td_ucred, vp, VREAD); 339 if (error) 340 goto cleanup; 341 #endif 342 error = VOP_OPEN(vp, FREAD, td->td_ucred, td, NULL); 343 if (error) 344 goto cleanup; 345 346 /* Pull in executable header into exec_map */ 347 error = vm_mmap(exec_map, (vm_offset_t *)&a_out, PAGE_SIZE, 348 VM_PROT_READ, VM_PROT_READ, 0, OBJT_VNODE, vp, 0); 349 if (error) 350 goto cleanup; 351 352 /* Is it a Linux binary ? */ 353 if (((a_out->a_magic >> 16) & 0xff) != 0x64) { 354 error = ENOEXEC; 355 goto cleanup; 356 } 357 358 /* 359 * While we are here, we should REALLY do some more checks 360 */ 361 362 /* Set file/virtual offset based on a.out variant. */ 363 switch ((int)(a_out->a_magic & 0xffff)) { 364 case 0413: /* ZMAGIC */ 365 file_offset = 1024; 366 break; 367 case 0314: /* QMAGIC */ 368 file_offset = 0; 369 break; 370 default: 371 error = ENOEXEC; 372 goto cleanup; 373 } 374 375 bss_size = round_page(a_out->a_bss); 376 377 /* Check various fields in header for validity/bounds. */ 378 if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) { 379 error = ENOEXEC; 380 goto cleanup; 381 } 382 383 /* text + data can't exceed file size */ 384 if (a_out->a_data + a_out->a_text > attr.va_size) { 385 error = EFAULT; 386 goto cleanup; 387 } 388 389 /* 390 * text/data/bss must not exceed limits 391 * XXX - this is not complete. it should check current usage PLUS 392 * the resources needed by this library. 393 */ 394 PROC_LOCK(td->td_proc); 395 if (a_out->a_text > maxtsiz || 396 a_out->a_data + bss_size > lim_cur_proc(td->td_proc, RLIMIT_DATA) || 397 racct_set(td->td_proc, RACCT_DATA, a_out->a_data + 398 bss_size) != 0) { 399 PROC_UNLOCK(td->td_proc); 400 error = ENOMEM; 401 goto cleanup; 402 } 403 PROC_UNLOCK(td->td_proc); 404 405 /* 406 * Prevent more writers. 407 * XXX: Note that if any of the VM operations fail below we don't 408 * clear this flag. 409 */ 410 VOP_SET_TEXT(vp); 411 412 /* 413 * Lock no longer needed 414 */ 415 locked = 0; 416 VOP_UNLOCK(vp, 0); 417 418 /* 419 * Check if file_offset page aligned. Currently we cannot handle 420 * misalinged file offsets, and so we read in the entire image 421 * (what a waste). 422 */ 423 if (file_offset & PAGE_MASK) { 424 #ifdef DEBUG 425 printf("uselib: Non page aligned binary %lu\n", file_offset); 426 #endif 427 /* Map text+data read/write/execute */ 428 429 /* a_entry is the load address and is page aligned */ 430 vmaddr = trunc_page(a_out->a_entry); 431 432 /* get anon user mapping, read+write+execute */ 433 error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0, 434 &vmaddr, a_out->a_text + a_out->a_data, 0, VMFS_NO_SPACE, 435 VM_PROT_ALL, VM_PROT_ALL, 0); 436 if (error) 437 goto cleanup; 438 439 error = vn_rdwr(UIO_READ, vp, (void *)vmaddr, file_offset, 440 a_out->a_text + a_out->a_data, UIO_USERSPACE, 0, 441 td->td_ucred, NOCRED, &aresid, td); 442 if (error != 0) 443 goto cleanup; 444 if (aresid != 0) { 445 error = ENOEXEC; 446 goto cleanup; 447 } 448 } else { 449 #ifdef DEBUG 450 printf("uselib: Page aligned binary %lu\n", file_offset); 451 #endif 452 /* 453 * for QMAGIC, a_entry is 20 bytes beyond the load address 454 * to skip the executable header 455 */ 456 vmaddr = trunc_page(a_out->a_entry); 457 458 /* 459 * Map it all into the process's space as a single 460 * copy-on-write "data" segment. 461 */ 462 error = vm_mmap(&td->td_proc->p_vmspace->vm_map, &vmaddr, 463 a_out->a_text + a_out->a_data, VM_PROT_ALL, VM_PROT_ALL, 464 MAP_PRIVATE | MAP_FIXED, OBJT_VNODE, vp, file_offset); 465 if (error) 466 goto cleanup; 467 } 468 #ifdef DEBUG 469 printf("mem=%08lx = %08lx %08lx\n", (long)vmaddr, ((long *)vmaddr)[0], 470 ((long *)vmaddr)[1]); 471 #endif 472 if (bss_size != 0) { 473 /* Calculate BSS start address */ 474 vmaddr = trunc_page(a_out->a_entry) + a_out->a_text + 475 a_out->a_data; 476 477 /* allocate some 'anon' space */ 478 error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0, 479 &vmaddr, bss_size, 0, VMFS_NO_SPACE, VM_PROT_ALL, 480 VM_PROT_ALL, 0); 481 if (error) 482 goto cleanup; 483 } 484 485 cleanup: 486 /* Unlock vnode if needed */ 487 if (locked) 488 VOP_UNLOCK(vp, 0); 489 490 /* Release the temporary mapping. */ 491 if (a_out) 492 kmap_free_wakeup(exec_map, (vm_offset_t)a_out, PAGE_SIZE); 493 494 return (error); 495 } 496 497 #endif /* __i386__ */ 498 499 #ifdef LINUX_LEGACY_SYSCALLS 500 int 501 linux_select(struct thread *td, struct linux_select_args *args) 502 { 503 l_timeval ltv; 504 struct timeval tv0, tv1, utv, *tvp; 505 int error; 506 507 #ifdef DEBUG 508 if (ldebug(select)) 509 printf(ARGS(select, "%d, %p, %p, %p, %p"), args->nfds, 510 (void *)args->readfds, (void *)args->writefds, 511 (void *)args->exceptfds, (void *)args->timeout); 512 #endif 513 514 /* 515 * Store current time for computation of the amount of 516 * time left. 517 */ 518 if (args->timeout) { 519 if ((error = copyin(args->timeout, <v, sizeof(ltv)))) 520 goto select_out; 521 utv.tv_sec = ltv.tv_sec; 522 utv.tv_usec = ltv.tv_usec; 523 #ifdef DEBUG 524 if (ldebug(select)) 525 printf(LMSG("incoming timeout (%jd/%ld)"), 526 (intmax_t)utv.tv_sec, utv.tv_usec); 527 #endif 528 529 if (itimerfix(&utv)) { 530 /* 531 * The timeval was invalid. Convert it to something 532 * valid that will act as it does under Linux. 533 */ 534 utv.tv_sec += utv.tv_usec / 1000000; 535 utv.tv_usec %= 1000000; 536 if (utv.tv_usec < 0) { 537 utv.tv_sec -= 1; 538 utv.tv_usec += 1000000; 539 } 540 if (utv.tv_sec < 0) 541 timevalclear(&utv); 542 } 543 microtime(&tv0); 544 tvp = &utv; 545 } else 546 tvp = NULL; 547 548 error = kern_select(td, args->nfds, args->readfds, args->writefds, 549 args->exceptfds, tvp, LINUX_NFDBITS); 550 551 #ifdef DEBUG 552 if (ldebug(select)) 553 printf(LMSG("real select returns %d"), error); 554 #endif 555 if (error) 556 goto select_out; 557 558 if (args->timeout) { 559 if (td->td_retval[0]) { 560 /* 561 * Compute how much time was left of the timeout, 562 * by subtracting the current time and the time 563 * before we started the call, and subtracting 564 * that result from the user-supplied value. 565 */ 566 microtime(&tv1); 567 timevalsub(&tv1, &tv0); 568 timevalsub(&utv, &tv1); 569 if (utv.tv_sec < 0) 570 timevalclear(&utv); 571 } else 572 timevalclear(&utv); 573 #ifdef DEBUG 574 if (ldebug(select)) 575 printf(LMSG("outgoing timeout (%jd/%ld)"), 576 (intmax_t)utv.tv_sec, utv.tv_usec); 577 #endif 578 ltv.tv_sec = utv.tv_sec; 579 ltv.tv_usec = utv.tv_usec; 580 if ((error = copyout(<v, args->timeout, sizeof(ltv)))) 581 goto select_out; 582 } 583 584 select_out: 585 #ifdef DEBUG 586 if (ldebug(select)) 587 printf(LMSG("select_out -> %d"), error); 588 #endif 589 return (error); 590 } 591 #endif 592 593 int 594 linux_mremap(struct thread *td, struct linux_mremap_args *args) 595 { 596 uintptr_t addr; 597 size_t len; 598 int error = 0; 599 600 #ifdef DEBUG 601 if (ldebug(mremap)) 602 printf(ARGS(mremap, "%p, %08lx, %08lx, %08lx"), 603 (void *)(uintptr_t)args->addr, 604 (unsigned long)args->old_len, 605 (unsigned long)args->new_len, 606 (unsigned long)args->flags); 607 #endif 608 609 if (args->flags & ~(LINUX_MREMAP_FIXED | LINUX_MREMAP_MAYMOVE)) { 610 td->td_retval[0] = 0; 611 return (EINVAL); 612 } 613 614 /* 615 * Check for the page alignment. 616 * Linux defines PAGE_MASK to be FreeBSD ~PAGE_MASK. 617 */ 618 if (args->addr & PAGE_MASK) { 619 td->td_retval[0] = 0; 620 return (EINVAL); 621 } 622 623 args->new_len = round_page(args->new_len); 624 args->old_len = round_page(args->old_len); 625 626 if (args->new_len > args->old_len) { 627 td->td_retval[0] = 0; 628 return (ENOMEM); 629 } 630 631 if (args->new_len < args->old_len) { 632 addr = args->addr + args->new_len; 633 len = args->old_len - args->new_len; 634 error = kern_munmap(td, addr, len); 635 } 636 637 td->td_retval[0] = error ? 0 : (uintptr_t)args->addr; 638 return (error); 639 } 640 641 #define LINUX_MS_ASYNC 0x0001 642 #define LINUX_MS_INVALIDATE 0x0002 643 #define LINUX_MS_SYNC 0x0004 644 645 int 646 linux_msync(struct thread *td, struct linux_msync_args *args) 647 { 648 649 return (kern_msync(td, args->addr, args->len, 650 args->fl & ~LINUX_MS_SYNC)); 651 } 652 653 #ifdef LINUX_LEGACY_SYSCALLS 654 int 655 linux_time(struct thread *td, struct linux_time_args *args) 656 { 657 struct timeval tv; 658 l_time_t tm; 659 int error; 660 661 #ifdef DEBUG 662 if (ldebug(time)) 663 printf(ARGS(time, "*")); 664 #endif 665 666 microtime(&tv); 667 tm = tv.tv_sec; 668 if (args->tm && (error = copyout(&tm, args->tm, sizeof(tm)))) 669 return (error); 670 td->td_retval[0] = tm; 671 return (0); 672 } 673 #endif 674 675 struct l_times_argv { 676 l_clock_t tms_utime; 677 l_clock_t tms_stime; 678 l_clock_t tms_cutime; 679 l_clock_t tms_cstime; 680 }; 681 682 683 /* 684 * Glibc versions prior to 2.2.1 always use hard-coded CLK_TCK value. 685 * Since 2.2.1 Glibc uses value exported from kernel via AT_CLKTCK 686 * auxiliary vector entry. 687 */ 688 #define CLK_TCK 100 689 690 #define CONVOTCK(r) (r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK)) 691 #define CONVNTCK(r) (r.tv_sec * stclohz + r.tv_usec / (1000000 / stclohz)) 692 693 #define CONVTCK(r) (linux_kernver(td) >= LINUX_KERNVER_2004000 ? \ 694 CONVNTCK(r) : CONVOTCK(r)) 695 696 int 697 linux_times(struct thread *td, struct linux_times_args *args) 698 { 699 struct timeval tv, utime, stime, cutime, cstime; 700 struct l_times_argv tms; 701 struct proc *p; 702 int error; 703 704 #ifdef DEBUG 705 if (ldebug(times)) 706 printf(ARGS(times, "*")); 707 #endif 708 709 if (args->buf != NULL) { 710 p = td->td_proc; 711 PROC_LOCK(p); 712 PROC_STATLOCK(p); 713 calcru(p, &utime, &stime); 714 PROC_STATUNLOCK(p); 715 calccru(p, &cutime, &cstime); 716 PROC_UNLOCK(p); 717 718 tms.tms_utime = CONVTCK(utime); 719 tms.tms_stime = CONVTCK(stime); 720 721 tms.tms_cutime = CONVTCK(cutime); 722 tms.tms_cstime = CONVTCK(cstime); 723 724 if ((error = copyout(&tms, args->buf, sizeof(tms)))) 725 return (error); 726 } 727 728 microuptime(&tv); 729 td->td_retval[0] = (int)CONVTCK(tv); 730 return (0); 731 } 732 733 int 734 linux_newuname(struct thread *td, struct linux_newuname_args *args) 735 { 736 struct l_new_utsname utsname; 737 char osname[LINUX_MAX_UTSNAME]; 738 char osrelease[LINUX_MAX_UTSNAME]; 739 char *p; 740 741 #ifdef DEBUG 742 if (ldebug(newuname)) 743 printf(ARGS(newuname, "*")); 744 #endif 745 746 linux_get_osname(td, osname); 747 linux_get_osrelease(td, osrelease); 748 749 bzero(&utsname, sizeof(utsname)); 750 strlcpy(utsname.sysname, osname, LINUX_MAX_UTSNAME); 751 getcredhostname(td->td_ucred, utsname.nodename, LINUX_MAX_UTSNAME); 752 getcreddomainname(td->td_ucred, utsname.domainname, LINUX_MAX_UTSNAME); 753 strlcpy(utsname.release, osrelease, LINUX_MAX_UTSNAME); 754 strlcpy(utsname.version, version, LINUX_MAX_UTSNAME); 755 for (p = utsname.version; *p != '\0'; ++p) 756 if (*p == '\n') { 757 *p = '\0'; 758 break; 759 } 760 strlcpy(utsname.machine, linux_kplatform, LINUX_MAX_UTSNAME); 761 762 return (copyout(&utsname, args->buf, sizeof(utsname))); 763 } 764 765 struct l_utimbuf { 766 l_time_t l_actime; 767 l_time_t l_modtime; 768 }; 769 770 #ifdef LINUX_LEGACY_SYSCALLS 771 int 772 linux_utime(struct thread *td, struct linux_utime_args *args) 773 { 774 struct timeval tv[2], *tvp; 775 struct l_utimbuf lut; 776 char *fname; 777 int error; 778 779 LCONVPATHEXIST(td, args->fname, &fname); 780 781 #ifdef DEBUG 782 if (ldebug(utime)) 783 printf(ARGS(utime, "%s, *"), fname); 784 #endif 785 786 if (args->times) { 787 if ((error = copyin(args->times, &lut, sizeof lut))) { 788 LFREEPATH(fname); 789 return (error); 790 } 791 tv[0].tv_sec = lut.l_actime; 792 tv[0].tv_usec = 0; 793 tv[1].tv_sec = lut.l_modtime; 794 tv[1].tv_usec = 0; 795 tvp = tv; 796 } else 797 tvp = NULL; 798 799 error = kern_utimesat(td, AT_FDCWD, fname, UIO_SYSSPACE, tvp, 800 UIO_SYSSPACE); 801 LFREEPATH(fname); 802 return (error); 803 } 804 #endif 805 806 #ifdef LINUX_LEGACY_SYSCALLS 807 int 808 linux_utimes(struct thread *td, struct linux_utimes_args *args) 809 { 810 l_timeval ltv[2]; 811 struct timeval tv[2], *tvp = NULL; 812 char *fname; 813 int error; 814 815 LCONVPATHEXIST(td, args->fname, &fname); 816 817 #ifdef DEBUG 818 if (ldebug(utimes)) 819 printf(ARGS(utimes, "%s, *"), fname); 820 #endif 821 822 if (args->tptr != NULL) { 823 if ((error = copyin(args->tptr, ltv, sizeof ltv))) { 824 LFREEPATH(fname); 825 return (error); 826 } 827 tv[0].tv_sec = ltv[0].tv_sec; 828 tv[0].tv_usec = ltv[0].tv_usec; 829 tv[1].tv_sec = ltv[1].tv_sec; 830 tv[1].tv_usec = ltv[1].tv_usec; 831 tvp = tv; 832 } 833 834 error = kern_utimesat(td, AT_FDCWD, fname, UIO_SYSSPACE, 835 tvp, UIO_SYSSPACE); 836 LFREEPATH(fname); 837 return (error); 838 } 839 #endif 840 841 static int 842 linux_utimensat_nsec_valid(l_long nsec) 843 { 844 845 if (nsec == LINUX_UTIME_OMIT || nsec == LINUX_UTIME_NOW) 846 return (0); 847 if (nsec >= 0 && nsec <= 999999999) 848 return (0); 849 return (1); 850 } 851 852 int 853 linux_utimensat(struct thread *td, struct linux_utimensat_args *args) 854 { 855 struct l_timespec l_times[2]; 856 struct timespec times[2], *timesp = NULL; 857 char *path = NULL; 858 int error, dfd, flags = 0; 859 860 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; 861 862 #ifdef DEBUG 863 if (ldebug(utimensat)) 864 printf(ARGS(utimensat, "%d, *"), dfd); 865 #endif 866 867 if (args->flags & ~LINUX_AT_SYMLINK_NOFOLLOW) 868 return (EINVAL); 869 870 if (args->times != NULL) { 871 error = copyin(args->times, l_times, sizeof(l_times)); 872 if (error != 0) 873 return (error); 874 875 if (linux_utimensat_nsec_valid(l_times[0].tv_nsec) != 0 || 876 linux_utimensat_nsec_valid(l_times[1].tv_nsec) != 0) 877 return (EINVAL); 878 879 times[0].tv_sec = l_times[0].tv_sec; 880 switch (l_times[0].tv_nsec) 881 { 882 case LINUX_UTIME_OMIT: 883 times[0].tv_nsec = UTIME_OMIT; 884 break; 885 case LINUX_UTIME_NOW: 886 times[0].tv_nsec = UTIME_NOW; 887 break; 888 default: 889 times[0].tv_nsec = l_times[0].tv_nsec; 890 } 891 892 times[1].tv_sec = l_times[1].tv_sec; 893 switch (l_times[1].tv_nsec) 894 { 895 case LINUX_UTIME_OMIT: 896 times[1].tv_nsec = UTIME_OMIT; 897 break; 898 case LINUX_UTIME_NOW: 899 times[1].tv_nsec = UTIME_NOW; 900 break; 901 default: 902 times[1].tv_nsec = l_times[1].tv_nsec; 903 break; 904 } 905 timesp = times; 906 907 /* This breaks POSIX, but is what the Linux kernel does 908 * _on purpose_ (documented in the man page for utimensat(2)), 909 * so we must follow that behaviour. */ 910 if (times[0].tv_nsec == UTIME_OMIT && 911 times[1].tv_nsec == UTIME_OMIT) 912 return (0); 913 } 914 915 if (args->pathname != NULL) 916 LCONVPATHEXIST_AT(td, args->pathname, &path, dfd); 917 else if (args->flags != 0) 918 return (EINVAL); 919 920 if (args->flags & LINUX_AT_SYMLINK_NOFOLLOW) 921 flags |= AT_SYMLINK_NOFOLLOW; 922 923 if (path == NULL) 924 error = kern_futimens(td, dfd, timesp, UIO_SYSSPACE); 925 else { 926 error = kern_utimensat(td, dfd, path, UIO_SYSSPACE, timesp, 927 UIO_SYSSPACE, flags); 928 LFREEPATH(path); 929 } 930 931 return (error); 932 } 933 934 #ifdef LINUX_LEGACY_SYSCALLS 935 int 936 linux_futimesat(struct thread *td, struct linux_futimesat_args *args) 937 { 938 l_timeval ltv[2]; 939 struct timeval tv[2], *tvp = NULL; 940 char *fname; 941 int error, dfd; 942 943 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; 944 LCONVPATHEXIST_AT(td, args->filename, &fname, dfd); 945 946 #ifdef DEBUG 947 if (ldebug(futimesat)) 948 printf(ARGS(futimesat, "%s, *"), fname); 949 #endif 950 951 if (args->utimes != NULL) { 952 if ((error = copyin(args->utimes, ltv, sizeof ltv))) { 953 LFREEPATH(fname); 954 return (error); 955 } 956 tv[0].tv_sec = ltv[0].tv_sec; 957 tv[0].tv_usec = ltv[0].tv_usec; 958 tv[1].tv_sec = ltv[1].tv_sec; 959 tv[1].tv_usec = ltv[1].tv_usec; 960 tvp = tv; 961 } 962 963 error = kern_utimesat(td, dfd, fname, UIO_SYSSPACE, tvp, UIO_SYSSPACE); 964 LFREEPATH(fname); 965 return (error); 966 } 967 #endif 968 969 int 970 linux_common_wait(struct thread *td, int pid, int *status, 971 int options, struct rusage *ru) 972 { 973 int error, tmpstat; 974 975 error = kern_wait(td, pid, &tmpstat, options, ru); 976 if (error) 977 return (error); 978 979 if (status) { 980 tmpstat &= 0xffff; 981 if (WIFSIGNALED(tmpstat)) 982 tmpstat = (tmpstat & 0xffffff80) | 983 bsd_to_linux_signal(WTERMSIG(tmpstat)); 984 else if (WIFSTOPPED(tmpstat)) 985 tmpstat = (tmpstat & 0xffff00ff) | 986 (bsd_to_linux_signal(WSTOPSIG(tmpstat)) << 8); 987 else if (WIFCONTINUED(tmpstat)) 988 tmpstat = 0xffff; 989 error = copyout(&tmpstat, status, sizeof(int)); 990 } 991 992 return (error); 993 } 994 995 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 996 int 997 linux_waitpid(struct thread *td, struct linux_waitpid_args *args) 998 { 999 struct linux_wait4_args wait4_args; 1000 1001 #ifdef DEBUG 1002 if (ldebug(waitpid)) 1003 printf(ARGS(waitpid, "%d, %p, %d"), 1004 args->pid, (void *)args->status, args->options); 1005 #endif 1006 1007 wait4_args.pid = args->pid; 1008 wait4_args.status = args->status; 1009 wait4_args.options = args->options; 1010 wait4_args.rusage = NULL; 1011 1012 return (linux_wait4(td, &wait4_args)); 1013 } 1014 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 1015 1016 int 1017 linux_wait4(struct thread *td, struct linux_wait4_args *args) 1018 { 1019 int error, options; 1020 struct rusage ru, *rup; 1021 1022 #ifdef DEBUG 1023 if (ldebug(wait4)) 1024 printf(ARGS(wait4, "%d, %p, %d, %p"), 1025 args->pid, (void *)args->status, args->options, 1026 (void *)args->rusage); 1027 #endif 1028 if (args->options & ~(LINUX_WUNTRACED | LINUX_WNOHANG | 1029 LINUX_WCONTINUED | __WCLONE | __WNOTHREAD | __WALL)) 1030 return (EINVAL); 1031 1032 options = WEXITED; 1033 linux_to_bsd_waitopts(args->options, &options); 1034 1035 if (args->rusage != NULL) 1036 rup = &ru; 1037 else 1038 rup = NULL; 1039 error = linux_common_wait(td, args->pid, args->status, options, rup); 1040 if (error != 0) 1041 return (error); 1042 if (args->rusage != NULL) 1043 error = linux_copyout_rusage(&ru, args->rusage); 1044 return (error); 1045 } 1046 1047 int 1048 linux_waitid(struct thread *td, struct linux_waitid_args *args) 1049 { 1050 int status, options, sig; 1051 struct __wrusage wru; 1052 siginfo_t siginfo; 1053 l_siginfo_t lsi; 1054 idtype_t idtype; 1055 struct proc *p; 1056 int error; 1057 1058 options = 0; 1059 linux_to_bsd_waitopts(args->options, &options); 1060 1061 if (options & ~(WNOHANG | WNOWAIT | WEXITED | WUNTRACED | WCONTINUED)) 1062 return (EINVAL); 1063 if (!(options & (WEXITED | WUNTRACED | WCONTINUED))) 1064 return (EINVAL); 1065 1066 switch (args->idtype) { 1067 case LINUX_P_ALL: 1068 idtype = P_ALL; 1069 break; 1070 case LINUX_P_PID: 1071 if (args->id <= 0) 1072 return (EINVAL); 1073 idtype = P_PID; 1074 break; 1075 case LINUX_P_PGID: 1076 if (args->id <= 0) 1077 return (EINVAL); 1078 idtype = P_PGID; 1079 break; 1080 default: 1081 return (EINVAL); 1082 } 1083 1084 error = kern_wait6(td, idtype, args->id, &status, options, 1085 &wru, &siginfo); 1086 if (error != 0) 1087 return (error); 1088 if (args->rusage != NULL) { 1089 error = linux_copyout_rusage(&wru.wru_children, 1090 args->rusage); 1091 if (error != 0) 1092 return (error); 1093 } 1094 if (args->info != NULL) { 1095 p = td->td_proc; 1096 if (td->td_retval[0] == 0) 1097 bzero(&lsi, sizeof(lsi)); 1098 else { 1099 sig = bsd_to_linux_signal(siginfo.si_signo); 1100 siginfo_to_lsiginfo(&siginfo, &lsi, sig); 1101 } 1102 error = copyout(&lsi, args->info, sizeof(lsi)); 1103 } 1104 td->td_retval[0] = 0; 1105 1106 return (error); 1107 } 1108 1109 #ifdef LINUX_LEGACY_SYSCALLS 1110 int 1111 linux_mknod(struct thread *td, struct linux_mknod_args *args) 1112 { 1113 char *path; 1114 int error; 1115 1116 LCONVPATHCREAT(td, args->path, &path); 1117 1118 #ifdef DEBUG 1119 if (ldebug(mknod)) 1120 printf(ARGS(mknod, "%s, %d, %ju"), path, args->mode, 1121 (uintmax_t)args->dev); 1122 #endif 1123 1124 switch (args->mode & S_IFMT) { 1125 case S_IFIFO: 1126 case S_IFSOCK: 1127 error = kern_mkfifoat(td, AT_FDCWD, path, UIO_SYSSPACE, 1128 args->mode); 1129 break; 1130 1131 case S_IFCHR: 1132 case S_IFBLK: 1133 error = kern_mknodat(td, AT_FDCWD, path, UIO_SYSSPACE, 1134 args->mode, args->dev); 1135 break; 1136 1137 case S_IFDIR: 1138 error = EPERM; 1139 break; 1140 1141 case 0: 1142 args->mode |= S_IFREG; 1143 /* FALLTHROUGH */ 1144 case S_IFREG: 1145 error = kern_openat(td, AT_FDCWD, path, UIO_SYSSPACE, 1146 O_WRONLY | O_CREAT | O_TRUNC, args->mode); 1147 if (error == 0) 1148 kern_close(td, td->td_retval[0]); 1149 break; 1150 1151 default: 1152 error = EINVAL; 1153 break; 1154 } 1155 LFREEPATH(path); 1156 return (error); 1157 } 1158 #endif 1159 1160 int 1161 linux_mknodat(struct thread *td, struct linux_mknodat_args *args) 1162 { 1163 char *path; 1164 int error, dfd; 1165 1166 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; 1167 LCONVPATHCREAT_AT(td, args->filename, &path, dfd); 1168 1169 #ifdef DEBUG 1170 if (ldebug(mknodat)) 1171 printf(ARGS(mknodat, "%s, %d, %d"), path, args->mode, args->dev); 1172 #endif 1173 1174 switch (args->mode & S_IFMT) { 1175 case S_IFIFO: 1176 case S_IFSOCK: 1177 error = kern_mkfifoat(td, dfd, path, UIO_SYSSPACE, args->mode); 1178 break; 1179 1180 case S_IFCHR: 1181 case S_IFBLK: 1182 error = kern_mknodat(td, dfd, path, UIO_SYSSPACE, args->mode, 1183 args->dev); 1184 break; 1185 1186 case S_IFDIR: 1187 error = EPERM; 1188 break; 1189 1190 case 0: 1191 args->mode |= S_IFREG; 1192 /* FALLTHROUGH */ 1193 case S_IFREG: 1194 error = kern_openat(td, dfd, path, UIO_SYSSPACE, 1195 O_WRONLY | O_CREAT | O_TRUNC, args->mode); 1196 if (error == 0) 1197 kern_close(td, td->td_retval[0]); 1198 break; 1199 1200 default: 1201 error = EINVAL; 1202 break; 1203 } 1204 LFREEPATH(path); 1205 return (error); 1206 } 1207 1208 /* 1209 * UGH! This is just about the dumbest idea I've ever heard!! 1210 */ 1211 int 1212 linux_personality(struct thread *td, struct linux_personality_args *args) 1213 { 1214 struct linux_pemuldata *pem; 1215 struct proc *p = td->td_proc; 1216 uint32_t old; 1217 1218 #ifdef DEBUG 1219 if (ldebug(personality)) 1220 printf(ARGS(personality, "%u"), args->per); 1221 #endif 1222 1223 PROC_LOCK(p); 1224 pem = pem_find(p); 1225 old = pem->persona; 1226 if (args->per != 0xffffffff) 1227 pem->persona = args->per; 1228 PROC_UNLOCK(p); 1229 1230 td->td_retval[0] = old; 1231 return (0); 1232 } 1233 1234 struct l_itimerval { 1235 l_timeval it_interval; 1236 l_timeval it_value; 1237 }; 1238 1239 #define B2L_ITIMERVAL(bip, lip) \ 1240 (bip)->it_interval.tv_sec = (lip)->it_interval.tv_sec; \ 1241 (bip)->it_interval.tv_usec = (lip)->it_interval.tv_usec; \ 1242 (bip)->it_value.tv_sec = (lip)->it_value.tv_sec; \ 1243 (bip)->it_value.tv_usec = (lip)->it_value.tv_usec; 1244 1245 int 1246 linux_setitimer(struct thread *td, struct linux_setitimer_args *uap) 1247 { 1248 int error; 1249 struct l_itimerval ls; 1250 struct itimerval aitv, oitv; 1251 1252 #ifdef DEBUG 1253 if (ldebug(setitimer)) 1254 printf(ARGS(setitimer, "%p, %p"), 1255 (void *)uap->itv, (void *)uap->oitv); 1256 #endif 1257 1258 if (uap->itv == NULL) { 1259 uap->itv = uap->oitv; 1260 return (linux_getitimer(td, (struct linux_getitimer_args *)uap)); 1261 } 1262 1263 error = copyin(uap->itv, &ls, sizeof(ls)); 1264 if (error != 0) 1265 return (error); 1266 B2L_ITIMERVAL(&aitv, &ls); 1267 #ifdef DEBUG 1268 if (ldebug(setitimer)) { 1269 printf("setitimer: value: sec: %jd, usec: %ld\n", 1270 (intmax_t)aitv.it_value.tv_sec, aitv.it_value.tv_usec); 1271 printf("setitimer: interval: sec: %jd, usec: %ld\n", 1272 (intmax_t)aitv.it_interval.tv_sec, aitv.it_interval.tv_usec); 1273 } 1274 #endif 1275 error = kern_setitimer(td, uap->which, &aitv, &oitv); 1276 if (error != 0 || uap->oitv == NULL) 1277 return (error); 1278 B2L_ITIMERVAL(&ls, &oitv); 1279 1280 return (copyout(&ls, uap->oitv, sizeof(ls))); 1281 } 1282 1283 int 1284 linux_getitimer(struct thread *td, struct linux_getitimer_args *uap) 1285 { 1286 int error; 1287 struct l_itimerval ls; 1288 struct itimerval aitv; 1289 1290 #ifdef DEBUG 1291 if (ldebug(getitimer)) 1292 printf(ARGS(getitimer, "%p"), (void *)uap->itv); 1293 #endif 1294 error = kern_getitimer(td, uap->which, &aitv); 1295 if (error != 0) 1296 return (error); 1297 B2L_ITIMERVAL(&ls, &aitv); 1298 return (copyout(&ls, uap->itv, sizeof(ls))); 1299 } 1300 1301 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 1302 int 1303 linux_nice(struct thread *td, struct linux_nice_args *args) 1304 { 1305 struct setpriority_args bsd_args; 1306 1307 bsd_args.which = PRIO_PROCESS; 1308 bsd_args.who = 0; /* current process */ 1309 bsd_args.prio = args->inc; 1310 return (sys_setpriority(td, &bsd_args)); 1311 } 1312 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 1313 1314 int 1315 linux_setgroups(struct thread *td, struct linux_setgroups_args *args) 1316 { 1317 struct ucred *newcred, *oldcred; 1318 l_gid_t *linux_gidset; 1319 gid_t *bsd_gidset; 1320 int ngrp, error; 1321 struct proc *p; 1322 1323 ngrp = args->gidsetsize; 1324 if (ngrp < 0 || ngrp >= ngroups_max + 1) 1325 return (EINVAL); 1326 linux_gidset = malloc(ngrp * sizeof(*linux_gidset), M_LINUX, M_WAITOK); 1327 error = copyin(args->grouplist, linux_gidset, ngrp * sizeof(l_gid_t)); 1328 if (error) 1329 goto out; 1330 newcred = crget(); 1331 crextend(newcred, ngrp + 1); 1332 p = td->td_proc; 1333 PROC_LOCK(p); 1334 oldcred = p->p_ucred; 1335 crcopy(newcred, oldcred); 1336 1337 /* 1338 * cr_groups[0] holds egid. Setting the whole set from 1339 * the supplied set will cause egid to be changed too. 1340 * Keep cr_groups[0] unchanged to prevent that. 1341 */ 1342 1343 if ((error = priv_check_cred(oldcred, PRIV_CRED_SETGROUPS, 0)) != 0) { 1344 PROC_UNLOCK(p); 1345 crfree(newcred); 1346 goto out; 1347 } 1348 1349 if (ngrp > 0) { 1350 newcred->cr_ngroups = ngrp + 1; 1351 1352 bsd_gidset = newcred->cr_groups; 1353 ngrp--; 1354 while (ngrp >= 0) { 1355 bsd_gidset[ngrp + 1] = linux_gidset[ngrp]; 1356 ngrp--; 1357 } 1358 } else 1359 newcred->cr_ngroups = 1; 1360 1361 setsugid(p); 1362 proc_set_cred(p, newcred); 1363 PROC_UNLOCK(p); 1364 crfree(oldcred); 1365 error = 0; 1366 out: 1367 free(linux_gidset, M_LINUX); 1368 return (error); 1369 } 1370 1371 int 1372 linux_getgroups(struct thread *td, struct linux_getgroups_args *args) 1373 { 1374 struct ucred *cred; 1375 l_gid_t *linux_gidset; 1376 gid_t *bsd_gidset; 1377 int bsd_gidsetsz, ngrp, error; 1378 1379 cred = td->td_ucred; 1380 bsd_gidset = cred->cr_groups; 1381 bsd_gidsetsz = cred->cr_ngroups - 1; 1382 1383 /* 1384 * cr_groups[0] holds egid. Returning the whole set 1385 * here will cause a duplicate. Exclude cr_groups[0] 1386 * to prevent that. 1387 */ 1388 1389 if ((ngrp = args->gidsetsize) == 0) { 1390 td->td_retval[0] = bsd_gidsetsz; 1391 return (0); 1392 } 1393 1394 if (ngrp < bsd_gidsetsz) 1395 return (EINVAL); 1396 1397 ngrp = 0; 1398 linux_gidset = malloc(bsd_gidsetsz * sizeof(*linux_gidset), 1399 M_LINUX, M_WAITOK); 1400 while (ngrp < bsd_gidsetsz) { 1401 linux_gidset[ngrp] = bsd_gidset[ngrp + 1]; 1402 ngrp++; 1403 } 1404 1405 error = copyout(linux_gidset, args->grouplist, ngrp * sizeof(l_gid_t)); 1406 free(linux_gidset, M_LINUX); 1407 if (error) 1408 return (error); 1409 1410 td->td_retval[0] = ngrp; 1411 return (0); 1412 } 1413 1414 int 1415 linux_setrlimit(struct thread *td, struct linux_setrlimit_args *args) 1416 { 1417 struct rlimit bsd_rlim; 1418 struct l_rlimit rlim; 1419 u_int which; 1420 int error; 1421 1422 #ifdef DEBUG 1423 if (ldebug(setrlimit)) 1424 printf(ARGS(setrlimit, "%d, %p"), 1425 args->resource, (void *)args->rlim); 1426 #endif 1427 1428 if (args->resource >= LINUX_RLIM_NLIMITS) 1429 return (EINVAL); 1430 1431 which = linux_to_bsd_resource[args->resource]; 1432 if (which == -1) 1433 return (EINVAL); 1434 1435 error = copyin(args->rlim, &rlim, sizeof(rlim)); 1436 if (error) 1437 return (error); 1438 1439 bsd_rlim.rlim_cur = (rlim_t)rlim.rlim_cur; 1440 bsd_rlim.rlim_max = (rlim_t)rlim.rlim_max; 1441 return (kern_setrlimit(td, which, &bsd_rlim)); 1442 } 1443 1444 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 1445 int 1446 linux_old_getrlimit(struct thread *td, struct linux_old_getrlimit_args *args) 1447 { 1448 struct l_rlimit rlim; 1449 struct rlimit bsd_rlim; 1450 u_int which; 1451 1452 #ifdef DEBUG 1453 if (ldebug(old_getrlimit)) 1454 printf(ARGS(old_getrlimit, "%d, %p"), 1455 args->resource, (void *)args->rlim); 1456 #endif 1457 1458 if (args->resource >= LINUX_RLIM_NLIMITS) 1459 return (EINVAL); 1460 1461 which = linux_to_bsd_resource[args->resource]; 1462 if (which == -1) 1463 return (EINVAL); 1464 1465 lim_rlimit(td, which, &bsd_rlim); 1466 1467 #ifdef COMPAT_LINUX32 1468 rlim.rlim_cur = (unsigned int)bsd_rlim.rlim_cur; 1469 if (rlim.rlim_cur == UINT_MAX) 1470 rlim.rlim_cur = INT_MAX; 1471 rlim.rlim_max = (unsigned int)bsd_rlim.rlim_max; 1472 if (rlim.rlim_max == UINT_MAX) 1473 rlim.rlim_max = INT_MAX; 1474 #else 1475 rlim.rlim_cur = (unsigned long)bsd_rlim.rlim_cur; 1476 if (rlim.rlim_cur == ULONG_MAX) 1477 rlim.rlim_cur = LONG_MAX; 1478 rlim.rlim_max = (unsigned long)bsd_rlim.rlim_max; 1479 if (rlim.rlim_max == ULONG_MAX) 1480 rlim.rlim_max = LONG_MAX; 1481 #endif 1482 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1483 } 1484 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 1485 1486 int 1487 linux_getrlimit(struct thread *td, struct linux_getrlimit_args *args) 1488 { 1489 struct l_rlimit rlim; 1490 struct rlimit bsd_rlim; 1491 u_int which; 1492 1493 #ifdef DEBUG 1494 if (ldebug(getrlimit)) 1495 printf(ARGS(getrlimit, "%d, %p"), 1496 args->resource, (void *)args->rlim); 1497 #endif 1498 1499 if (args->resource >= LINUX_RLIM_NLIMITS) 1500 return (EINVAL); 1501 1502 which = linux_to_bsd_resource[args->resource]; 1503 if (which == -1) 1504 return (EINVAL); 1505 1506 lim_rlimit(td, which, &bsd_rlim); 1507 1508 rlim.rlim_cur = (l_ulong)bsd_rlim.rlim_cur; 1509 rlim.rlim_max = (l_ulong)bsd_rlim.rlim_max; 1510 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1511 } 1512 1513 int 1514 linux_sched_setscheduler(struct thread *td, 1515 struct linux_sched_setscheduler_args *args) 1516 { 1517 struct sched_param sched_param; 1518 struct thread *tdt; 1519 int error, policy; 1520 1521 #ifdef DEBUG 1522 if (ldebug(sched_setscheduler)) 1523 printf(ARGS(sched_setscheduler, "%d, %d, %p"), 1524 args->pid, args->policy, (const void *)args->param); 1525 #endif 1526 1527 switch (args->policy) { 1528 case LINUX_SCHED_OTHER: 1529 policy = SCHED_OTHER; 1530 break; 1531 case LINUX_SCHED_FIFO: 1532 policy = SCHED_FIFO; 1533 break; 1534 case LINUX_SCHED_RR: 1535 policy = SCHED_RR; 1536 break; 1537 default: 1538 return (EINVAL); 1539 } 1540 1541 error = copyin(args->param, &sched_param, sizeof(sched_param)); 1542 if (error) 1543 return (error); 1544 1545 tdt = linux_tdfind(td, args->pid, -1); 1546 if (tdt == NULL) 1547 return (ESRCH); 1548 1549 error = kern_sched_setscheduler(td, tdt, policy, &sched_param); 1550 PROC_UNLOCK(tdt->td_proc); 1551 return (error); 1552 } 1553 1554 int 1555 linux_sched_getscheduler(struct thread *td, 1556 struct linux_sched_getscheduler_args *args) 1557 { 1558 struct thread *tdt; 1559 int error, policy; 1560 1561 #ifdef DEBUG 1562 if (ldebug(sched_getscheduler)) 1563 printf(ARGS(sched_getscheduler, "%d"), args->pid); 1564 #endif 1565 1566 tdt = linux_tdfind(td, args->pid, -1); 1567 if (tdt == NULL) 1568 return (ESRCH); 1569 1570 error = kern_sched_getscheduler(td, tdt, &policy); 1571 PROC_UNLOCK(tdt->td_proc); 1572 1573 switch (policy) { 1574 case SCHED_OTHER: 1575 td->td_retval[0] = LINUX_SCHED_OTHER; 1576 break; 1577 case SCHED_FIFO: 1578 td->td_retval[0] = LINUX_SCHED_FIFO; 1579 break; 1580 case SCHED_RR: 1581 td->td_retval[0] = LINUX_SCHED_RR; 1582 break; 1583 } 1584 return (error); 1585 } 1586 1587 int 1588 linux_sched_get_priority_max(struct thread *td, 1589 struct linux_sched_get_priority_max_args *args) 1590 { 1591 struct sched_get_priority_max_args bsd; 1592 1593 #ifdef DEBUG 1594 if (ldebug(sched_get_priority_max)) 1595 printf(ARGS(sched_get_priority_max, "%d"), args->policy); 1596 #endif 1597 1598 switch (args->policy) { 1599 case LINUX_SCHED_OTHER: 1600 bsd.policy = SCHED_OTHER; 1601 break; 1602 case LINUX_SCHED_FIFO: 1603 bsd.policy = SCHED_FIFO; 1604 break; 1605 case LINUX_SCHED_RR: 1606 bsd.policy = SCHED_RR; 1607 break; 1608 default: 1609 return (EINVAL); 1610 } 1611 return (sys_sched_get_priority_max(td, &bsd)); 1612 } 1613 1614 int 1615 linux_sched_get_priority_min(struct thread *td, 1616 struct linux_sched_get_priority_min_args *args) 1617 { 1618 struct sched_get_priority_min_args bsd; 1619 1620 #ifdef DEBUG 1621 if (ldebug(sched_get_priority_min)) 1622 printf(ARGS(sched_get_priority_min, "%d"), args->policy); 1623 #endif 1624 1625 switch (args->policy) { 1626 case LINUX_SCHED_OTHER: 1627 bsd.policy = SCHED_OTHER; 1628 break; 1629 case LINUX_SCHED_FIFO: 1630 bsd.policy = SCHED_FIFO; 1631 break; 1632 case LINUX_SCHED_RR: 1633 bsd.policy = SCHED_RR; 1634 break; 1635 default: 1636 return (EINVAL); 1637 } 1638 return (sys_sched_get_priority_min(td, &bsd)); 1639 } 1640 1641 #define REBOOT_CAD_ON 0x89abcdef 1642 #define REBOOT_CAD_OFF 0 1643 #define REBOOT_HALT 0xcdef0123 1644 #define REBOOT_RESTART 0x01234567 1645 #define REBOOT_RESTART2 0xA1B2C3D4 1646 #define REBOOT_POWEROFF 0x4321FEDC 1647 #define REBOOT_MAGIC1 0xfee1dead 1648 #define REBOOT_MAGIC2 0x28121969 1649 #define REBOOT_MAGIC2A 0x05121996 1650 #define REBOOT_MAGIC2B 0x16041998 1651 1652 int 1653 linux_reboot(struct thread *td, struct linux_reboot_args *args) 1654 { 1655 struct reboot_args bsd_args; 1656 1657 #ifdef DEBUG 1658 if (ldebug(reboot)) 1659 printf(ARGS(reboot, "0x%x"), args->cmd); 1660 #endif 1661 1662 if (args->magic1 != REBOOT_MAGIC1) 1663 return (EINVAL); 1664 1665 switch (args->magic2) { 1666 case REBOOT_MAGIC2: 1667 case REBOOT_MAGIC2A: 1668 case REBOOT_MAGIC2B: 1669 break; 1670 default: 1671 return (EINVAL); 1672 } 1673 1674 switch (args->cmd) { 1675 case REBOOT_CAD_ON: 1676 case REBOOT_CAD_OFF: 1677 return (priv_check(td, PRIV_REBOOT)); 1678 case REBOOT_HALT: 1679 bsd_args.opt = RB_HALT; 1680 break; 1681 case REBOOT_RESTART: 1682 case REBOOT_RESTART2: 1683 bsd_args.opt = 0; 1684 break; 1685 case REBOOT_POWEROFF: 1686 bsd_args.opt = RB_POWEROFF; 1687 break; 1688 default: 1689 return (EINVAL); 1690 } 1691 return (sys_reboot(td, &bsd_args)); 1692 } 1693 1694 1695 /* 1696 * The FreeBSD native getpid(2), getgid(2) and getuid(2) also modify 1697 * td->td_retval[1] when COMPAT_43 is defined. This clobbers registers that 1698 * are assumed to be preserved. The following lightweight syscalls fixes 1699 * this. See also linux_getgid16() and linux_getuid16() in linux_uid16.c 1700 * 1701 * linux_getpid() - MP SAFE 1702 * linux_getgid() - MP SAFE 1703 * linux_getuid() - MP SAFE 1704 */ 1705 1706 int 1707 linux_getpid(struct thread *td, struct linux_getpid_args *args) 1708 { 1709 1710 #ifdef DEBUG 1711 if (ldebug(getpid)) 1712 printf(ARGS(getpid, "")); 1713 #endif 1714 td->td_retval[0] = td->td_proc->p_pid; 1715 1716 return (0); 1717 } 1718 1719 int 1720 linux_gettid(struct thread *td, struct linux_gettid_args *args) 1721 { 1722 struct linux_emuldata *em; 1723 1724 #ifdef DEBUG 1725 if (ldebug(gettid)) 1726 printf(ARGS(gettid, "")); 1727 #endif 1728 1729 em = em_find(td); 1730 KASSERT(em != NULL, ("gettid: emuldata not found.\n")); 1731 1732 td->td_retval[0] = em->em_tid; 1733 1734 return (0); 1735 } 1736 1737 1738 int 1739 linux_getppid(struct thread *td, struct linux_getppid_args *args) 1740 { 1741 1742 #ifdef DEBUG 1743 if (ldebug(getppid)) 1744 printf(ARGS(getppid, "")); 1745 #endif 1746 1747 td->td_retval[0] = kern_getppid(td); 1748 return (0); 1749 } 1750 1751 int 1752 linux_getgid(struct thread *td, struct linux_getgid_args *args) 1753 { 1754 1755 #ifdef DEBUG 1756 if (ldebug(getgid)) 1757 printf(ARGS(getgid, "")); 1758 #endif 1759 1760 td->td_retval[0] = td->td_ucred->cr_rgid; 1761 return (0); 1762 } 1763 1764 int 1765 linux_getuid(struct thread *td, struct linux_getuid_args *args) 1766 { 1767 1768 #ifdef DEBUG 1769 if (ldebug(getuid)) 1770 printf(ARGS(getuid, "")); 1771 #endif 1772 1773 td->td_retval[0] = td->td_ucred->cr_ruid; 1774 return (0); 1775 } 1776 1777 1778 int 1779 linux_getsid(struct thread *td, struct linux_getsid_args *args) 1780 { 1781 struct getsid_args bsd; 1782 1783 #ifdef DEBUG 1784 if (ldebug(getsid)) 1785 printf(ARGS(getsid, "%i"), args->pid); 1786 #endif 1787 1788 bsd.pid = args->pid; 1789 return (sys_getsid(td, &bsd)); 1790 } 1791 1792 int 1793 linux_nosys(struct thread *td, struct nosys_args *ignore) 1794 { 1795 1796 return (ENOSYS); 1797 } 1798 1799 int 1800 linux_getpriority(struct thread *td, struct linux_getpriority_args *args) 1801 { 1802 struct getpriority_args bsd_args; 1803 int error; 1804 1805 #ifdef DEBUG 1806 if (ldebug(getpriority)) 1807 printf(ARGS(getpriority, "%i, %i"), args->which, args->who); 1808 #endif 1809 1810 bsd_args.which = args->which; 1811 bsd_args.who = args->who; 1812 error = sys_getpriority(td, &bsd_args); 1813 td->td_retval[0] = 20 - td->td_retval[0]; 1814 return (error); 1815 } 1816 1817 int 1818 linux_sethostname(struct thread *td, struct linux_sethostname_args *args) 1819 { 1820 int name[2]; 1821 1822 #ifdef DEBUG 1823 if (ldebug(sethostname)) 1824 printf(ARGS(sethostname, "*, %i"), args->len); 1825 #endif 1826 1827 name[0] = CTL_KERN; 1828 name[1] = KERN_HOSTNAME; 1829 return (userland_sysctl(td, name, 2, 0, 0, 0, args->hostname, 1830 args->len, 0, 0)); 1831 } 1832 1833 int 1834 linux_setdomainname(struct thread *td, struct linux_setdomainname_args *args) 1835 { 1836 int name[2]; 1837 1838 #ifdef DEBUG 1839 if (ldebug(setdomainname)) 1840 printf(ARGS(setdomainname, "*, %i"), args->len); 1841 #endif 1842 1843 name[0] = CTL_KERN; 1844 name[1] = KERN_NISDOMAINNAME; 1845 return (userland_sysctl(td, name, 2, 0, 0, 0, args->name, 1846 args->len, 0, 0)); 1847 } 1848 1849 int 1850 linux_exit_group(struct thread *td, struct linux_exit_group_args *args) 1851 { 1852 1853 #ifdef DEBUG 1854 if (ldebug(exit_group)) 1855 printf(ARGS(exit_group, "%i"), args->error_code); 1856 #endif 1857 1858 LINUX_CTR2(exit_group, "thread(%d) (%d)", td->td_tid, 1859 args->error_code); 1860 1861 /* 1862 * XXX: we should send a signal to the parent if 1863 * SIGNAL_EXIT_GROUP is set. We ignore that (temporarily?) 1864 * as it doesnt occur often. 1865 */ 1866 exit1(td, args->error_code, 0); 1867 /* NOTREACHED */ 1868 } 1869 1870 #define _LINUX_CAPABILITY_VERSION 0x19980330 1871 1872 struct l_user_cap_header { 1873 l_int version; 1874 l_int pid; 1875 }; 1876 1877 struct l_user_cap_data { 1878 l_int effective; 1879 l_int permitted; 1880 l_int inheritable; 1881 }; 1882 1883 int 1884 linux_capget(struct thread *td, struct linux_capget_args *args) 1885 { 1886 struct l_user_cap_header luch; 1887 struct l_user_cap_data lucd; 1888 int error; 1889 1890 if (args->hdrp == NULL) 1891 return (EFAULT); 1892 1893 error = copyin(args->hdrp, &luch, sizeof(luch)); 1894 if (error != 0) 1895 return (error); 1896 1897 if (luch.version != _LINUX_CAPABILITY_VERSION) { 1898 luch.version = _LINUX_CAPABILITY_VERSION; 1899 error = copyout(&luch, args->hdrp, sizeof(luch)); 1900 if (error) 1901 return (error); 1902 return (EINVAL); 1903 } 1904 1905 if (luch.pid) 1906 return (EPERM); 1907 1908 if (args->datap) { 1909 /* 1910 * The current implementation doesn't support setting 1911 * a capability (it's essentially a stub) so indicate 1912 * that no capabilities are currently set or available 1913 * to request. 1914 */ 1915 bzero (&lucd, sizeof(lucd)); 1916 error = copyout(&lucd, args->datap, sizeof(lucd)); 1917 } 1918 1919 return (error); 1920 } 1921 1922 int 1923 linux_capset(struct thread *td, struct linux_capset_args *args) 1924 { 1925 struct l_user_cap_header luch; 1926 struct l_user_cap_data lucd; 1927 int error; 1928 1929 if (args->hdrp == NULL || args->datap == NULL) 1930 return (EFAULT); 1931 1932 error = copyin(args->hdrp, &luch, sizeof(luch)); 1933 if (error != 0) 1934 return (error); 1935 1936 if (luch.version != _LINUX_CAPABILITY_VERSION) { 1937 luch.version = _LINUX_CAPABILITY_VERSION; 1938 error = copyout(&luch, args->hdrp, sizeof(luch)); 1939 if (error) 1940 return (error); 1941 return (EINVAL); 1942 } 1943 1944 if (luch.pid) 1945 return (EPERM); 1946 1947 error = copyin(args->datap, &lucd, sizeof(lucd)); 1948 if (error != 0) 1949 return (error); 1950 1951 /* We currently don't support setting any capabilities. */ 1952 if (lucd.effective || lucd.permitted || lucd.inheritable) { 1953 linux_msg(td, 1954 "capset effective=0x%x, permitted=0x%x, " 1955 "inheritable=0x%x is not implemented", 1956 (int)lucd.effective, (int)lucd.permitted, 1957 (int)lucd.inheritable); 1958 return (EPERM); 1959 } 1960 1961 return (0); 1962 } 1963 1964 int 1965 linux_prctl(struct thread *td, struct linux_prctl_args *args) 1966 { 1967 int error = 0, max_size; 1968 struct proc *p = td->td_proc; 1969 char comm[LINUX_MAX_COMM_LEN]; 1970 struct linux_emuldata *em; 1971 int pdeath_signal; 1972 1973 #ifdef DEBUG 1974 if (ldebug(prctl)) 1975 printf(ARGS(prctl, "%d, %ju, %ju, %ju, %ju"), args->option, 1976 (uintmax_t)args->arg2, (uintmax_t)args->arg3, 1977 (uintmax_t)args->arg4, (uintmax_t)args->arg5); 1978 #endif 1979 1980 switch (args->option) { 1981 case LINUX_PR_SET_PDEATHSIG: 1982 if (!LINUX_SIG_VALID(args->arg2)) 1983 return (EINVAL); 1984 em = em_find(td); 1985 KASSERT(em != NULL, ("prctl: emuldata not found.\n")); 1986 em->pdeath_signal = args->arg2; 1987 break; 1988 case LINUX_PR_GET_PDEATHSIG: 1989 em = em_find(td); 1990 KASSERT(em != NULL, ("prctl: emuldata not found.\n")); 1991 pdeath_signal = em->pdeath_signal; 1992 error = copyout(&pdeath_signal, 1993 (void *)(register_t)args->arg2, 1994 sizeof(pdeath_signal)); 1995 break; 1996 case LINUX_PR_GET_KEEPCAPS: 1997 /* 1998 * Indicate that we always clear the effective and 1999 * permitted capability sets when the user id becomes 2000 * non-zero (actually the capability sets are simply 2001 * always zero in the current implementation). 2002 */ 2003 td->td_retval[0] = 0; 2004 break; 2005 case LINUX_PR_SET_KEEPCAPS: 2006 /* 2007 * Ignore requests to keep the effective and permitted 2008 * capability sets when the user id becomes non-zero. 2009 */ 2010 break; 2011 case LINUX_PR_SET_NAME: 2012 /* 2013 * To be on the safe side we need to make sure to not 2014 * overflow the size a Linux program expects. We already 2015 * do this here in the copyin, so that we don't need to 2016 * check on copyout. 2017 */ 2018 max_size = MIN(sizeof(comm), sizeof(p->p_comm)); 2019 error = copyinstr((void *)(register_t)args->arg2, comm, 2020 max_size, NULL); 2021 2022 /* Linux silently truncates the name if it is too long. */ 2023 if (error == ENAMETOOLONG) { 2024 /* 2025 * XXX: copyinstr() isn't documented to populate the 2026 * array completely, so do a copyin() to be on the 2027 * safe side. This should be changed in case 2028 * copyinstr() is changed to guarantee this. 2029 */ 2030 error = copyin((void *)(register_t)args->arg2, comm, 2031 max_size - 1); 2032 comm[max_size - 1] = '\0'; 2033 } 2034 if (error) 2035 return (error); 2036 2037 PROC_LOCK(p); 2038 strlcpy(p->p_comm, comm, sizeof(p->p_comm)); 2039 PROC_UNLOCK(p); 2040 break; 2041 case LINUX_PR_GET_NAME: 2042 PROC_LOCK(p); 2043 strlcpy(comm, p->p_comm, sizeof(comm)); 2044 PROC_UNLOCK(p); 2045 error = copyout(comm, (void *)(register_t)args->arg2, 2046 strlen(comm) + 1); 2047 break; 2048 default: 2049 error = EINVAL; 2050 break; 2051 } 2052 2053 return (error); 2054 } 2055 2056 int 2057 linux_sched_setparam(struct thread *td, 2058 struct linux_sched_setparam_args *uap) 2059 { 2060 struct sched_param sched_param; 2061 struct thread *tdt; 2062 int error; 2063 2064 #ifdef DEBUG 2065 if (ldebug(sched_setparam)) 2066 printf(ARGS(sched_setparam, "%d, *"), uap->pid); 2067 #endif 2068 2069 error = copyin(uap->param, &sched_param, sizeof(sched_param)); 2070 if (error) 2071 return (error); 2072 2073 tdt = linux_tdfind(td, uap->pid, -1); 2074 if (tdt == NULL) 2075 return (ESRCH); 2076 2077 error = kern_sched_setparam(td, tdt, &sched_param); 2078 PROC_UNLOCK(tdt->td_proc); 2079 return (error); 2080 } 2081 2082 int 2083 linux_sched_getparam(struct thread *td, 2084 struct linux_sched_getparam_args *uap) 2085 { 2086 struct sched_param sched_param; 2087 struct thread *tdt; 2088 int error; 2089 2090 #ifdef DEBUG 2091 if (ldebug(sched_getparam)) 2092 printf(ARGS(sched_getparam, "%d, *"), uap->pid); 2093 #endif 2094 2095 tdt = linux_tdfind(td, uap->pid, -1); 2096 if (tdt == NULL) 2097 return (ESRCH); 2098 2099 error = kern_sched_getparam(td, tdt, &sched_param); 2100 PROC_UNLOCK(tdt->td_proc); 2101 if (error == 0) 2102 error = copyout(&sched_param, uap->param, 2103 sizeof(sched_param)); 2104 return (error); 2105 } 2106 2107 /* 2108 * Get affinity of a process. 2109 */ 2110 int 2111 linux_sched_getaffinity(struct thread *td, 2112 struct linux_sched_getaffinity_args *args) 2113 { 2114 int error; 2115 struct thread *tdt; 2116 2117 #ifdef DEBUG 2118 if (ldebug(sched_getaffinity)) 2119 printf(ARGS(sched_getaffinity, "%d, %d, *"), args->pid, 2120 args->len); 2121 #endif 2122 if (args->len < sizeof(cpuset_t)) 2123 return (EINVAL); 2124 2125 tdt = linux_tdfind(td, args->pid, -1); 2126 if (tdt == NULL) 2127 return (ESRCH); 2128 2129 PROC_UNLOCK(tdt->td_proc); 2130 2131 error = kern_cpuset_getaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID, 2132 tdt->td_tid, sizeof(cpuset_t), (cpuset_t *)args->user_mask_ptr); 2133 if (error == 0) 2134 td->td_retval[0] = sizeof(cpuset_t); 2135 2136 return (error); 2137 } 2138 2139 /* 2140 * Set affinity of a process. 2141 */ 2142 int 2143 linux_sched_setaffinity(struct thread *td, 2144 struct linux_sched_setaffinity_args *args) 2145 { 2146 struct thread *tdt; 2147 2148 #ifdef DEBUG 2149 if (ldebug(sched_setaffinity)) 2150 printf(ARGS(sched_setaffinity, "%d, %d, *"), args->pid, 2151 args->len); 2152 #endif 2153 if (args->len < sizeof(cpuset_t)) 2154 return (EINVAL); 2155 2156 tdt = linux_tdfind(td, args->pid, -1); 2157 if (tdt == NULL) 2158 return (ESRCH); 2159 2160 PROC_UNLOCK(tdt->td_proc); 2161 2162 return (kern_cpuset_setaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID, 2163 tdt->td_tid, sizeof(cpuset_t), (cpuset_t *) args->user_mask_ptr)); 2164 } 2165 2166 struct linux_rlimit64 { 2167 uint64_t rlim_cur; 2168 uint64_t rlim_max; 2169 }; 2170 2171 int 2172 linux_prlimit64(struct thread *td, struct linux_prlimit64_args *args) 2173 { 2174 struct rlimit rlim, nrlim; 2175 struct linux_rlimit64 lrlim; 2176 struct proc *p; 2177 u_int which; 2178 int flags; 2179 int error; 2180 2181 #ifdef DEBUG 2182 if (ldebug(prlimit64)) 2183 printf(ARGS(prlimit64, "%d, %d, %p, %p"), args->pid, 2184 args->resource, (void *)args->new, (void *)args->old); 2185 #endif 2186 2187 if (args->resource >= LINUX_RLIM_NLIMITS) 2188 return (EINVAL); 2189 2190 which = linux_to_bsd_resource[args->resource]; 2191 if (which == -1) 2192 return (EINVAL); 2193 2194 if (args->new != NULL) { 2195 /* 2196 * Note. Unlike FreeBSD where rlim is signed 64-bit Linux 2197 * rlim is unsigned 64-bit. FreeBSD treats negative limits 2198 * as INFINITY so we do not need a conversion even. 2199 */ 2200 error = copyin(args->new, &nrlim, sizeof(nrlim)); 2201 if (error != 0) 2202 return (error); 2203 } 2204 2205 flags = PGET_HOLD | PGET_NOTWEXIT; 2206 if (args->new != NULL) 2207 flags |= PGET_CANDEBUG; 2208 else 2209 flags |= PGET_CANSEE; 2210 error = pget(args->pid, flags, &p); 2211 if (error != 0) 2212 return (error); 2213 2214 if (args->old != NULL) { 2215 PROC_LOCK(p); 2216 lim_rlimit_proc(p, which, &rlim); 2217 PROC_UNLOCK(p); 2218 if (rlim.rlim_cur == RLIM_INFINITY) 2219 lrlim.rlim_cur = LINUX_RLIM_INFINITY; 2220 else 2221 lrlim.rlim_cur = rlim.rlim_cur; 2222 if (rlim.rlim_max == RLIM_INFINITY) 2223 lrlim.rlim_max = LINUX_RLIM_INFINITY; 2224 else 2225 lrlim.rlim_max = rlim.rlim_max; 2226 error = copyout(&lrlim, args->old, sizeof(lrlim)); 2227 if (error != 0) 2228 goto out; 2229 } 2230 2231 if (args->new != NULL) 2232 error = kern_proc_setrlimit(td, p, which, &nrlim); 2233 2234 out: 2235 PRELE(p); 2236 return (error); 2237 } 2238 2239 int 2240 linux_pselect6(struct thread *td, struct linux_pselect6_args *args) 2241 { 2242 struct timeval utv, tv0, tv1, *tvp; 2243 struct l_pselect6arg lpse6; 2244 struct l_timespec lts; 2245 struct timespec uts; 2246 l_sigset_t l_ss; 2247 sigset_t *ssp; 2248 sigset_t ss; 2249 int error; 2250 2251 ssp = NULL; 2252 if (args->sig != NULL) { 2253 error = copyin(args->sig, &lpse6, sizeof(lpse6)); 2254 if (error != 0) 2255 return (error); 2256 if (lpse6.ss_len != sizeof(l_ss)) 2257 return (EINVAL); 2258 if (lpse6.ss != 0) { 2259 error = copyin(PTRIN(lpse6.ss), &l_ss, 2260 sizeof(l_ss)); 2261 if (error != 0) 2262 return (error); 2263 linux_to_bsd_sigset(&l_ss, &ss); 2264 ssp = &ss; 2265 } 2266 } 2267 2268 /* 2269 * Currently glibc changes nanosecond number to microsecond. 2270 * This mean losing precision but for now it is hardly seen. 2271 */ 2272 if (args->tsp != NULL) { 2273 error = copyin(args->tsp, <s, sizeof(lts)); 2274 if (error != 0) 2275 return (error); 2276 error = linux_to_native_timespec(&uts, <s); 2277 if (error != 0) 2278 return (error); 2279 2280 TIMESPEC_TO_TIMEVAL(&utv, &uts); 2281 if (itimerfix(&utv)) 2282 return (EINVAL); 2283 2284 microtime(&tv0); 2285 tvp = &utv; 2286 } else 2287 tvp = NULL; 2288 2289 error = kern_pselect(td, args->nfds, args->readfds, args->writefds, 2290 args->exceptfds, tvp, ssp, LINUX_NFDBITS); 2291 2292 if (error == 0 && args->tsp != NULL) { 2293 if (td->td_retval[0] != 0) { 2294 /* 2295 * Compute how much time was left of the timeout, 2296 * by subtracting the current time and the time 2297 * before we started the call, and subtracting 2298 * that result from the user-supplied value. 2299 */ 2300 2301 microtime(&tv1); 2302 timevalsub(&tv1, &tv0); 2303 timevalsub(&utv, &tv1); 2304 if (utv.tv_sec < 0) 2305 timevalclear(&utv); 2306 } else 2307 timevalclear(&utv); 2308 2309 TIMEVAL_TO_TIMESPEC(&utv, &uts); 2310 2311 error = native_to_linux_timespec(<s, &uts); 2312 if (error == 0) 2313 error = copyout(<s, args->tsp, sizeof(lts)); 2314 } 2315 2316 return (error); 2317 } 2318 2319 int 2320 linux_ppoll(struct thread *td, struct linux_ppoll_args *args) 2321 { 2322 struct timespec ts0, ts1; 2323 struct l_timespec lts; 2324 struct timespec uts, *tsp; 2325 l_sigset_t l_ss; 2326 sigset_t *ssp; 2327 sigset_t ss; 2328 int error; 2329 2330 if (args->sset != NULL) { 2331 if (args->ssize != sizeof(l_ss)) 2332 return (EINVAL); 2333 error = copyin(args->sset, &l_ss, sizeof(l_ss)); 2334 if (error) 2335 return (error); 2336 linux_to_bsd_sigset(&l_ss, &ss); 2337 ssp = &ss; 2338 } else 2339 ssp = NULL; 2340 if (args->tsp != NULL) { 2341 error = copyin(args->tsp, <s, sizeof(lts)); 2342 if (error) 2343 return (error); 2344 error = linux_to_native_timespec(&uts, <s); 2345 if (error != 0) 2346 return (error); 2347 2348 nanotime(&ts0); 2349 tsp = &uts; 2350 } else 2351 tsp = NULL; 2352 2353 error = kern_poll(td, args->fds, args->nfds, tsp, ssp); 2354 2355 if (error == 0 && args->tsp != NULL) { 2356 if (td->td_retval[0]) { 2357 nanotime(&ts1); 2358 timespecsub(&ts1, &ts0); 2359 timespecsub(&uts, &ts1); 2360 if (uts.tv_sec < 0) 2361 timespecclear(&uts); 2362 } else 2363 timespecclear(&uts); 2364 2365 error = native_to_linux_timespec(<s, &uts); 2366 if (error == 0) 2367 error = copyout(<s, args->tsp, sizeof(lts)); 2368 } 2369 2370 return (error); 2371 } 2372 2373 #if defined(DEBUG) || defined(KTR) 2374 /* XXX: can be removed when every ldebug(...) and KTR stuff are removed. */ 2375 2376 #ifdef COMPAT_LINUX32 2377 #define L_MAXSYSCALL LINUX32_SYS_MAXSYSCALL 2378 #else 2379 #define L_MAXSYSCALL LINUX_SYS_MAXSYSCALL 2380 #endif 2381 2382 u_char linux_debug_map[howmany(L_MAXSYSCALL, sizeof(u_char))]; 2383 2384 static int 2385 linux_debug(int syscall, int toggle, int global) 2386 { 2387 2388 if (global) { 2389 char c = toggle ? 0 : 0xff; 2390 2391 memset(linux_debug_map, c, sizeof(linux_debug_map)); 2392 return (0); 2393 } 2394 if (syscall < 0 || syscall >= L_MAXSYSCALL) 2395 return (EINVAL); 2396 if (toggle) 2397 clrbit(linux_debug_map, syscall); 2398 else 2399 setbit(linux_debug_map, syscall); 2400 return (0); 2401 } 2402 #undef L_MAXSYSCALL 2403 2404 /* 2405 * Usage: sysctl linux.debug=<syscall_nr>.<0/1> 2406 * 2407 * E.g.: sysctl linux.debug=21.0 2408 * 2409 * As a special case, syscall "all" will apply to all syscalls globally. 2410 */ 2411 #define LINUX_MAX_DEBUGSTR 16 2412 int 2413 linux_sysctl_debug(SYSCTL_HANDLER_ARGS) 2414 { 2415 char value[LINUX_MAX_DEBUGSTR], *p; 2416 int error, sysc, toggle; 2417 int global = 0; 2418 2419 value[0] = '\0'; 2420 error = sysctl_handle_string(oidp, value, LINUX_MAX_DEBUGSTR, req); 2421 if (error || req->newptr == NULL) 2422 return (error); 2423 for (p = value; *p != '\0' && *p != '.'; p++); 2424 if (*p == '\0') 2425 return (EINVAL); 2426 *p++ = '\0'; 2427 sysc = strtol(value, NULL, 0); 2428 toggle = strtol(p, NULL, 0); 2429 if (strcmp(value, "all") == 0) 2430 global = 1; 2431 error = linux_debug(sysc, toggle, global); 2432 return (error); 2433 } 2434 2435 #endif /* DEBUG || KTR */ 2436 2437 int 2438 linux_sched_rr_get_interval(struct thread *td, 2439 struct linux_sched_rr_get_interval_args *uap) 2440 { 2441 struct timespec ts; 2442 struct l_timespec lts; 2443 struct thread *tdt; 2444 int error; 2445 2446 /* 2447 * According to man in case the invalid pid specified 2448 * EINVAL should be returned. 2449 */ 2450 if (uap->pid < 0) 2451 return (EINVAL); 2452 2453 tdt = linux_tdfind(td, uap->pid, -1); 2454 if (tdt == NULL) 2455 return (ESRCH); 2456 2457 error = kern_sched_rr_get_interval_td(td, tdt, &ts); 2458 PROC_UNLOCK(tdt->td_proc); 2459 if (error != 0) 2460 return (error); 2461 error = native_to_linux_timespec(<s, &ts); 2462 if (error != 0) 2463 return (error); 2464 return (copyout(<s, uap->interval, sizeof(lts))); 2465 } 2466 2467 /* 2468 * In case when the Linux thread is the initial thread in 2469 * the thread group thread id is equal to the process id. 2470 * Glibc depends on this magic (assert in pthread_getattr_np.c). 2471 */ 2472 struct thread * 2473 linux_tdfind(struct thread *td, lwpid_t tid, pid_t pid) 2474 { 2475 struct linux_emuldata *em; 2476 struct thread *tdt; 2477 struct proc *p; 2478 2479 tdt = NULL; 2480 if (tid == 0 || tid == td->td_tid) { 2481 tdt = td; 2482 PROC_LOCK(tdt->td_proc); 2483 } else if (tid > PID_MAX) 2484 tdt = tdfind(tid, pid); 2485 else { 2486 /* 2487 * Initial thread where the tid equal to the pid. 2488 */ 2489 p = pfind(tid); 2490 if (p != NULL) { 2491 if (SV_PROC_ABI(p) != SV_ABI_LINUX) { 2492 /* 2493 * p is not a Linuxulator process. 2494 */ 2495 PROC_UNLOCK(p); 2496 return (NULL); 2497 } 2498 FOREACH_THREAD_IN_PROC(p, tdt) { 2499 em = em_find(tdt); 2500 if (tid == em->em_tid) 2501 return (tdt); 2502 } 2503 PROC_UNLOCK(p); 2504 } 2505 return (NULL); 2506 } 2507 2508 return (tdt); 2509 } 2510 2511 void 2512 linux_to_bsd_waitopts(int options, int *bsdopts) 2513 { 2514 2515 if (options & LINUX_WNOHANG) 2516 *bsdopts |= WNOHANG; 2517 if (options & LINUX_WUNTRACED) 2518 *bsdopts |= WUNTRACED; 2519 if (options & LINUX_WEXITED) 2520 *bsdopts |= WEXITED; 2521 if (options & LINUX_WCONTINUED) 2522 *bsdopts |= WCONTINUED; 2523 if (options & LINUX_WNOWAIT) 2524 *bsdopts |= WNOWAIT; 2525 2526 if (options & __WCLONE) 2527 *bsdopts |= WLINUXCLONE; 2528 } 2529 2530 int 2531 linux_getrandom(struct thread *td, struct linux_getrandom_args *args) 2532 { 2533 struct uio uio; 2534 struct iovec iov; 2535 int error; 2536 2537 if (args->flags & ~(LINUX_GRND_NONBLOCK|LINUX_GRND_RANDOM)) 2538 return (EINVAL); 2539 if (args->count > INT_MAX) 2540 args->count = INT_MAX; 2541 2542 iov.iov_base = args->buf; 2543 iov.iov_len = args->count; 2544 2545 uio.uio_iov = &iov; 2546 uio.uio_iovcnt = 1; 2547 uio.uio_resid = iov.iov_len; 2548 uio.uio_segflg = UIO_USERSPACE; 2549 uio.uio_rw = UIO_READ; 2550 uio.uio_td = td; 2551 2552 error = read_random_uio(&uio, args->flags & LINUX_GRND_NONBLOCK); 2553 if (error == 0) 2554 td->td_retval[0] = args->count - uio.uio_resid; 2555 return (error); 2556 } 2557 2558 int 2559 linux_mincore(struct thread *td, struct linux_mincore_args *args) 2560 { 2561 2562 /* Needs to be page-aligned */ 2563 if (args->start & PAGE_MASK) 2564 return (EINVAL); 2565 return (kern_mincore(td, args->start, args->len, args->vec)); 2566 } 2567