1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2002 Doug Rabson 5 * Copyright (c) 1994-1995 Søren Schmidt 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer 13 * in this position and unchanged. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. The name of the author may not be used to endorse or promote products 18 * derived from this software without specific prior written permission 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 23 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 29 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include "opt_compat.h" 36 37 #include <sys/param.h> 38 #include <sys/blist.h> 39 #include <sys/fcntl.h> 40 #if defined(__i386__) 41 #include <sys/imgact_aout.h> 42 #endif 43 #include <sys/jail.h> 44 #include <sys/kernel.h> 45 #include <sys/limits.h> 46 #include <sys/lock.h> 47 #include <sys/malloc.h> 48 #include <sys/mman.h> 49 #include <sys/mount.h> 50 #include <sys/mutex.h> 51 #include <sys/namei.h> 52 #include <sys/priv.h> 53 #include <sys/proc.h> 54 #include <sys/reboot.h> 55 #include <sys/racct.h> 56 #include <sys/random.h> 57 #include <sys/resourcevar.h> 58 #include <sys/sched.h> 59 #include <sys/sdt.h> 60 #include <sys/signalvar.h> 61 #include <sys/stat.h> 62 #include <sys/syscallsubr.h> 63 #include <sys/sysctl.h> 64 #include <sys/sysproto.h> 65 #include <sys/systm.h> 66 #include <sys/time.h> 67 #include <sys/vmmeter.h> 68 #include <sys/vnode.h> 69 #include <sys/wait.h> 70 #include <sys/cpuset.h> 71 #include <sys/uio.h> 72 73 #include <security/mac/mac_framework.h> 74 75 #include <vm/vm.h> 76 #include <vm/pmap.h> 77 #include <vm/vm_kern.h> 78 #include <vm/vm_map.h> 79 #include <vm/vm_extern.h> 80 #include <vm/vm_object.h> 81 #include <vm/swap_pager.h> 82 83 #ifdef COMPAT_LINUX32 84 #include <machine/../linux32/linux.h> 85 #include <machine/../linux32/linux32_proto.h> 86 #else 87 #include <machine/../linux/linux.h> 88 #include <machine/../linux/linux_proto.h> 89 #endif 90 91 #include <compat/linux/linux_dtrace.h> 92 #include <compat/linux/linux_file.h> 93 #include <compat/linux/linux_mib.h> 94 #include <compat/linux/linux_signal.h> 95 #include <compat/linux/linux_timer.h> 96 #include <compat/linux/linux_util.h> 97 #include <compat/linux/linux_sysproto.h> 98 #include <compat/linux/linux_emul.h> 99 #include <compat/linux/linux_misc.h> 100 101 /** 102 * Special DTrace provider for the linuxulator. 103 * 104 * In this file we define the provider for the entire linuxulator. All 105 * modules (= files of the linuxulator) use it. 106 * 107 * We define a different name depending on the emulated bitsize, see 108 * ../../<ARCH>/linux{,32}/linux.h, e.g.: 109 * native bitsize = linuxulator 110 * amd64, 32bit emulation = linuxulator32 111 */ 112 LIN_SDT_PROVIDER_DEFINE(LINUX_DTRACE); 113 114 int stclohz; /* Statistics clock frequency */ 115 116 static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] = { 117 RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK, 118 RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE, 119 RLIMIT_MEMLOCK, RLIMIT_AS 120 }; 121 122 struct l_sysinfo { 123 l_long uptime; /* Seconds since boot */ 124 l_ulong loads[3]; /* 1, 5, and 15 minute load averages */ 125 #define LINUX_SYSINFO_LOADS_SCALE 65536 126 l_ulong totalram; /* Total usable main memory size */ 127 l_ulong freeram; /* Available memory size */ 128 l_ulong sharedram; /* Amount of shared memory */ 129 l_ulong bufferram; /* Memory used by buffers */ 130 l_ulong totalswap; /* Total swap space size */ 131 l_ulong freeswap; /* swap space still available */ 132 l_ushort procs; /* Number of current processes */ 133 l_ushort pads; 134 l_ulong totalbig; 135 l_ulong freebig; 136 l_uint mem_unit; 137 char _f[20-2*sizeof(l_long)-sizeof(l_int)]; /* padding */ 138 }; 139 140 struct l_pselect6arg { 141 l_uintptr_t ss; 142 l_size_t ss_len; 143 }; 144 145 static int linux_utimensat_nsec_valid(l_long); 146 147 148 int 149 linux_sysinfo(struct thread *td, struct linux_sysinfo_args *args) 150 { 151 struct l_sysinfo sysinfo; 152 vm_object_t object; 153 int i, j; 154 struct timespec ts; 155 156 bzero(&sysinfo, sizeof(sysinfo)); 157 getnanouptime(&ts); 158 if (ts.tv_nsec != 0) 159 ts.tv_sec++; 160 sysinfo.uptime = ts.tv_sec; 161 162 /* Use the information from the mib to get our load averages */ 163 for (i = 0; i < 3; i++) 164 sysinfo.loads[i] = averunnable.ldavg[i] * 165 LINUX_SYSINFO_LOADS_SCALE / averunnable.fscale; 166 167 sysinfo.totalram = physmem * PAGE_SIZE; 168 sysinfo.freeram = sysinfo.totalram - vm_wire_count() * PAGE_SIZE; 169 170 sysinfo.sharedram = 0; 171 mtx_lock(&vm_object_list_mtx); 172 TAILQ_FOREACH(object, &vm_object_list, object_list) 173 if (object->shadow_count > 1) 174 sysinfo.sharedram += object->resident_page_count; 175 mtx_unlock(&vm_object_list_mtx); 176 177 sysinfo.sharedram *= PAGE_SIZE; 178 sysinfo.bufferram = 0; 179 180 swap_pager_status(&i, &j); 181 sysinfo.totalswap = i * PAGE_SIZE; 182 sysinfo.freeswap = (i - j) * PAGE_SIZE; 183 184 sysinfo.procs = nprocs; 185 186 /* The following are only present in newer Linux kernels. */ 187 sysinfo.totalbig = 0; 188 sysinfo.freebig = 0; 189 sysinfo.mem_unit = 1; 190 191 return (copyout(&sysinfo, args->info, sizeof(sysinfo))); 192 } 193 194 int 195 linux_alarm(struct thread *td, struct linux_alarm_args *args) 196 { 197 struct itimerval it, old_it; 198 u_int secs; 199 int error; 200 201 #ifdef DEBUG 202 if (ldebug(alarm)) 203 printf(ARGS(alarm, "%u"), args->secs); 204 #endif 205 secs = args->secs; 206 /* 207 * Linux alarm() is always successful. Limit secs to INT32_MAX / 2 208 * to match kern_setitimer()'s limit to avoid error from it. 209 * 210 * XXX. Linux limit secs to INT_MAX on 32 and does not limit on 64-bit 211 * platforms. 212 */ 213 if (secs > INT32_MAX / 2) 214 secs = INT32_MAX / 2; 215 216 it.it_value.tv_sec = secs; 217 it.it_value.tv_usec = 0; 218 timevalclear(&it.it_interval); 219 error = kern_setitimer(td, ITIMER_REAL, &it, &old_it); 220 KASSERT(error == 0, ("kern_setitimer returns %d", error)); 221 222 if ((old_it.it_value.tv_sec == 0 && old_it.it_value.tv_usec > 0) || 223 old_it.it_value.tv_usec >= 500000) 224 old_it.it_value.tv_sec++; 225 td->td_retval[0] = old_it.it_value.tv_sec; 226 return (0); 227 } 228 229 int 230 linux_brk(struct thread *td, struct linux_brk_args *args) 231 { 232 struct vmspace *vm = td->td_proc->p_vmspace; 233 vm_offset_t new, old; 234 struct obreak_args /* { 235 char * nsize; 236 } */ tmp; 237 238 #ifdef DEBUG 239 if (ldebug(brk)) 240 printf(ARGS(brk, "%p"), (void *)(uintptr_t)args->dsend); 241 #endif 242 old = (vm_offset_t)vm->vm_daddr + ctob(vm->vm_dsize); 243 new = (vm_offset_t)args->dsend; 244 tmp.nsize = (char *)new; 245 if (((caddr_t)new > vm->vm_daddr) && !sys_obreak(td, &tmp)) 246 td->td_retval[0] = (long)new; 247 else 248 td->td_retval[0] = (long)old; 249 250 return (0); 251 } 252 253 #if defined(__i386__) 254 /* XXX: what about amd64/linux32? */ 255 256 int 257 linux_uselib(struct thread *td, struct linux_uselib_args *args) 258 { 259 struct nameidata ni; 260 struct vnode *vp; 261 struct exec *a_out; 262 struct vattr attr; 263 vm_offset_t vmaddr; 264 unsigned long file_offset; 265 unsigned long bss_size; 266 char *library; 267 ssize_t aresid; 268 int error, locked, writecount; 269 270 LCONVPATHEXIST(td, args->library, &library); 271 272 #ifdef DEBUG 273 if (ldebug(uselib)) 274 printf(ARGS(uselib, "%s"), library); 275 #endif 276 277 a_out = NULL; 278 locked = 0; 279 vp = NULL; 280 281 NDINIT(&ni, LOOKUP, ISOPEN | FOLLOW | LOCKLEAF | AUDITVNODE1, 282 UIO_SYSSPACE, library, td); 283 error = namei(&ni); 284 LFREEPATH(library); 285 if (error) 286 goto cleanup; 287 288 vp = ni.ni_vp; 289 NDFREE(&ni, NDF_ONLY_PNBUF); 290 291 /* 292 * From here on down, we have a locked vnode that must be unlocked. 293 * XXX: The code below largely duplicates exec_check_permissions(). 294 */ 295 locked = 1; 296 297 /* Writable? */ 298 error = VOP_GET_WRITECOUNT(vp, &writecount); 299 if (error != 0) 300 goto cleanup; 301 if (writecount != 0) { 302 error = ETXTBSY; 303 goto cleanup; 304 } 305 306 /* Executable? */ 307 error = VOP_GETATTR(vp, &attr, td->td_ucred); 308 if (error) 309 goto cleanup; 310 311 if ((vp->v_mount->mnt_flag & MNT_NOEXEC) || 312 ((attr.va_mode & 0111) == 0) || (attr.va_type != VREG)) { 313 /* EACCESS is what exec(2) returns. */ 314 error = ENOEXEC; 315 goto cleanup; 316 } 317 318 /* Sensible size? */ 319 if (attr.va_size == 0) { 320 error = ENOEXEC; 321 goto cleanup; 322 } 323 324 /* Can we access it? */ 325 error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td); 326 if (error) 327 goto cleanup; 328 329 /* 330 * XXX: This should use vn_open() so that it is properly authorized, 331 * and to reduce code redundancy all over the place here. 332 * XXX: Not really, it duplicates far more of exec_check_permissions() 333 * than vn_open(). 334 */ 335 #ifdef MAC 336 error = mac_vnode_check_open(td->td_ucred, vp, VREAD); 337 if (error) 338 goto cleanup; 339 #endif 340 error = VOP_OPEN(vp, FREAD, td->td_ucred, td, NULL); 341 if (error) 342 goto cleanup; 343 344 /* Pull in executable header into exec_map */ 345 error = vm_mmap(exec_map, (vm_offset_t *)&a_out, PAGE_SIZE, 346 VM_PROT_READ, VM_PROT_READ, 0, OBJT_VNODE, vp, 0); 347 if (error) 348 goto cleanup; 349 350 /* Is it a Linux binary ? */ 351 if (((a_out->a_magic >> 16) & 0xff) != 0x64) { 352 error = ENOEXEC; 353 goto cleanup; 354 } 355 356 /* 357 * While we are here, we should REALLY do some more checks 358 */ 359 360 /* Set file/virtual offset based on a.out variant. */ 361 switch ((int)(a_out->a_magic & 0xffff)) { 362 case 0413: /* ZMAGIC */ 363 file_offset = 1024; 364 break; 365 case 0314: /* QMAGIC */ 366 file_offset = 0; 367 break; 368 default: 369 error = ENOEXEC; 370 goto cleanup; 371 } 372 373 bss_size = round_page(a_out->a_bss); 374 375 /* Check various fields in header for validity/bounds. */ 376 if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) { 377 error = ENOEXEC; 378 goto cleanup; 379 } 380 381 /* text + data can't exceed file size */ 382 if (a_out->a_data + a_out->a_text > attr.va_size) { 383 error = EFAULT; 384 goto cleanup; 385 } 386 387 /* 388 * text/data/bss must not exceed limits 389 * XXX - this is not complete. it should check current usage PLUS 390 * the resources needed by this library. 391 */ 392 PROC_LOCK(td->td_proc); 393 if (a_out->a_text > maxtsiz || 394 a_out->a_data + bss_size > lim_cur_proc(td->td_proc, RLIMIT_DATA) || 395 racct_set(td->td_proc, RACCT_DATA, a_out->a_data + 396 bss_size) != 0) { 397 PROC_UNLOCK(td->td_proc); 398 error = ENOMEM; 399 goto cleanup; 400 } 401 PROC_UNLOCK(td->td_proc); 402 403 /* 404 * Prevent more writers. 405 * XXX: Note that if any of the VM operations fail below we don't 406 * clear this flag. 407 */ 408 VOP_SET_TEXT(vp); 409 410 /* 411 * Lock no longer needed 412 */ 413 locked = 0; 414 VOP_UNLOCK(vp, 0); 415 416 /* 417 * Check if file_offset page aligned. Currently we cannot handle 418 * misalinged file offsets, and so we read in the entire image 419 * (what a waste). 420 */ 421 if (file_offset & PAGE_MASK) { 422 #ifdef DEBUG 423 printf("uselib: Non page aligned binary %lu\n", file_offset); 424 #endif 425 /* Map text+data read/write/execute */ 426 427 /* a_entry is the load address and is page aligned */ 428 vmaddr = trunc_page(a_out->a_entry); 429 430 /* get anon user mapping, read+write+execute */ 431 error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0, 432 &vmaddr, a_out->a_text + a_out->a_data, 0, VMFS_NO_SPACE, 433 VM_PROT_ALL, VM_PROT_ALL, 0); 434 if (error) 435 goto cleanup; 436 437 error = vn_rdwr(UIO_READ, vp, (void *)vmaddr, file_offset, 438 a_out->a_text + a_out->a_data, UIO_USERSPACE, 0, 439 td->td_ucred, NOCRED, &aresid, td); 440 if (error != 0) 441 goto cleanup; 442 if (aresid != 0) { 443 error = ENOEXEC; 444 goto cleanup; 445 } 446 } else { 447 #ifdef DEBUG 448 printf("uselib: Page aligned binary %lu\n", file_offset); 449 #endif 450 /* 451 * for QMAGIC, a_entry is 20 bytes beyond the load address 452 * to skip the executable header 453 */ 454 vmaddr = trunc_page(a_out->a_entry); 455 456 /* 457 * Map it all into the process's space as a single 458 * copy-on-write "data" segment. 459 */ 460 error = vm_mmap(&td->td_proc->p_vmspace->vm_map, &vmaddr, 461 a_out->a_text + a_out->a_data, VM_PROT_ALL, VM_PROT_ALL, 462 MAP_PRIVATE | MAP_FIXED, OBJT_VNODE, vp, file_offset); 463 if (error) 464 goto cleanup; 465 } 466 #ifdef DEBUG 467 printf("mem=%08lx = %08lx %08lx\n", (long)vmaddr, ((long *)vmaddr)[0], 468 ((long *)vmaddr)[1]); 469 #endif 470 if (bss_size != 0) { 471 /* Calculate BSS start address */ 472 vmaddr = trunc_page(a_out->a_entry) + a_out->a_text + 473 a_out->a_data; 474 475 /* allocate some 'anon' space */ 476 error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0, 477 &vmaddr, bss_size, 0, VMFS_NO_SPACE, VM_PROT_ALL, 478 VM_PROT_ALL, 0); 479 if (error) 480 goto cleanup; 481 } 482 483 cleanup: 484 /* Unlock vnode if needed */ 485 if (locked) 486 VOP_UNLOCK(vp, 0); 487 488 /* Release the temporary mapping. */ 489 if (a_out) 490 kmap_free_wakeup(exec_map, (vm_offset_t)a_out, PAGE_SIZE); 491 492 return (error); 493 } 494 495 #endif /* __i386__ */ 496 497 int 498 linux_select(struct thread *td, struct linux_select_args *args) 499 { 500 l_timeval ltv; 501 struct timeval tv0, tv1, utv, *tvp; 502 int error; 503 504 #ifdef DEBUG 505 if (ldebug(select)) 506 printf(ARGS(select, "%d, %p, %p, %p, %p"), args->nfds, 507 (void *)args->readfds, (void *)args->writefds, 508 (void *)args->exceptfds, (void *)args->timeout); 509 #endif 510 511 /* 512 * Store current time for computation of the amount of 513 * time left. 514 */ 515 if (args->timeout) { 516 if ((error = copyin(args->timeout, <v, sizeof(ltv)))) 517 goto select_out; 518 utv.tv_sec = ltv.tv_sec; 519 utv.tv_usec = ltv.tv_usec; 520 #ifdef DEBUG 521 if (ldebug(select)) 522 printf(LMSG("incoming timeout (%jd/%ld)"), 523 (intmax_t)utv.tv_sec, utv.tv_usec); 524 #endif 525 526 if (itimerfix(&utv)) { 527 /* 528 * The timeval was invalid. Convert it to something 529 * valid that will act as it does under Linux. 530 */ 531 utv.tv_sec += utv.tv_usec / 1000000; 532 utv.tv_usec %= 1000000; 533 if (utv.tv_usec < 0) { 534 utv.tv_sec -= 1; 535 utv.tv_usec += 1000000; 536 } 537 if (utv.tv_sec < 0) 538 timevalclear(&utv); 539 } 540 microtime(&tv0); 541 tvp = &utv; 542 } else 543 tvp = NULL; 544 545 error = kern_select(td, args->nfds, args->readfds, args->writefds, 546 args->exceptfds, tvp, LINUX_NFDBITS); 547 548 #ifdef DEBUG 549 if (ldebug(select)) 550 printf(LMSG("real select returns %d"), error); 551 #endif 552 if (error) 553 goto select_out; 554 555 if (args->timeout) { 556 if (td->td_retval[0]) { 557 /* 558 * Compute how much time was left of the timeout, 559 * by subtracting the current time and the time 560 * before we started the call, and subtracting 561 * that result from the user-supplied value. 562 */ 563 microtime(&tv1); 564 timevalsub(&tv1, &tv0); 565 timevalsub(&utv, &tv1); 566 if (utv.tv_sec < 0) 567 timevalclear(&utv); 568 } else 569 timevalclear(&utv); 570 #ifdef DEBUG 571 if (ldebug(select)) 572 printf(LMSG("outgoing timeout (%jd/%ld)"), 573 (intmax_t)utv.tv_sec, utv.tv_usec); 574 #endif 575 ltv.tv_sec = utv.tv_sec; 576 ltv.tv_usec = utv.tv_usec; 577 if ((error = copyout(<v, args->timeout, sizeof(ltv)))) 578 goto select_out; 579 } 580 581 select_out: 582 #ifdef DEBUG 583 if (ldebug(select)) 584 printf(LMSG("select_out -> %d"), error); 585 #endif 586 return (error); 587 } 588 589 int 590 linux_mremap(struct thread *td, struct linux_mremap_args *args) 591 { 592 uintptr_t addr; 593 size_t len; 594 int error = 0; 595 596 #ifdef DEBUG 597 if (ldebug(mremap)) 598 printf(ARGS(mremap, "%p, %08lx, %08lx, %08lx"), 599 (void *)(uintptr_t)args->addr, 600 (unsigned long)args->old_len, 601 (unsigned long)args->new_len, 602 (unsigned long)args->flags); 603 #endif 604 605 if (args->flags & ~(LINUX_MREMAP_FIXED | LINUX_MREMAP_MAYMOVE)) { 606 td->td_retval[0] = 0; 607 return (EINVAL); 608 } 609 610 /* 611 * Check for the page alignment. 612 * Linux defines PAGE_MASK to be FreeBSD ~PAGE_MASK. 613 */ 614 if (args->addr & PAGE_MASK) { 615 td->td_retval[0] = 0; 616 return (EINVAL); 617 } 618 619 args->new_len = round_page(args->new_len); 620 args->old_len = round_page(args->old_len); 621 622 if (args->new_len > args->old_len) { 623 td->td_retval[0] = 0; 624 return (ENOMEM); 625 } 626 627 if (args->new_len < args->old_len) { 628 addr = args->addr + args->new_len; 629 len = args->old_len - args->new_len; 630 error = kern_munmap(td, addr, len); 631 } 632 633 td->td_retval[0] = error ? 0 : (uintptr_t)args->addr; 634 return (error); 635 } 636 637 #define LINUX_MS_ASYNC 0x0001 638 #define LINUX_MS_INVALIDATE 0x0002 639 #define LINUX_MS_SYNC 0x0004 640 641 int 642 linux_msync(struct thread *td, struct linux_msync_args *args) 643 { 644 645 return (kern_msync(td, args->addr, args->len, 646 args->fl & ~LINUX_MS_SYNC)); 647 } 648 649 int 650 linux_time(struct thread *td, struct linux_time_args *args) 651 { 652 struct timeval tv; 653 l_time_t tm; 654 int error; 655 656 #ifdef DEBUG 657 if (ldebug(time)) 658 printf(ARGS(time, "*")); 659 #endif 660 661 microtime(&tv); 662 tm = tv.tv_sec; 663 if (args->tm && (error = copyout(&tm, args->tm, sizeof(tm)))) 664 return (error); 665 td->td_retval[0] = tm; 666 return (0); 667 } 668 669 struct l_times_argv { 670 l_clock_t tms_utime; 671 l_clock_t tms_stime; 672 l_clock_t tms_cutime; 673 l_clock_t tms_cstime; 674 }; 675 676 677 /* 678 * Glibc versions prior to 2.2.1 always use hard-coded CLK_TCK value. 679 * Since 2.2.1 Glibc uses value exported from kernel via AT_CLKTCK 680 * auxiliary vector entry. 681 */ 682 #define CLK_TCK 100 683 684 #define CONVOTCK(r) (r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK)) 685 #define CONVNTCK(r) (r.tv_sec * stclohz + r.tv_usec / (1000000 / stclohz)) 686 687 #define CONVTCK(r) (linux_kernver(td) >= LINUX_KERNVER_2004000 ? \ 688 CONVNTCK(r) : CONVOTCK(r)) 689 690 int 691 linux_times(struct thread *td, struct linux_times_args *args) 692 { 693 struct timeval tv, utime, stime, cutime, cstime; 694 struct l_times_argv tms; 695 struct proc *p; 696 int error; 697 698 #ifdef DEBUG 699 if (ldebug(times)) 700 printf(ARGS(times, "*")); 701 #endif 702 703 if (args->buf != NULL) { 704 p = td->td_proc; 705 PROC_LOCK(p); 706 PROC_STATLOCK(p); 707 calcru(p, &utime, &stime); 708 PROC_STATUNLOCK(p); 709 calccru(p, &cutime, &cstime); 710 PROC_UNLOCK(p); 711 712 tms.tms_utime = CONVTCK(utime); 713 tms.tms_stime = CONVTCK(stime); 714 715 tms.tms_cutime = CONVTCK(cutime); 716 tms.tms_cstime = CONVTCK(cstime); 717 718 if ((error = copyout(&tms, args->buf, sizeof(tms)))) 719 return (error); 720 } 721 722 microuptime(&tv); 723 td->td_retval[0] = (int)CONVTCK(tv); 724 return (0); 725 } 726 727 int 728 linux_newuname(struct thread *td, struct linux_newuname_args *args) 729 { 730 struct l_new_utsname utsname; 731 char osname[LINUX_MAX_UTSNAME]; 732 char osrelease[LINUX_MAX_UTSNAME]; 733 char *p; 734 735 #ifdef DEBUG 736 if (ldebug(newuname)) 737 printf(ARGS(newuname, "*")); 738 #endif 739 740 linux_get_osname(td, osname); 741 linux_get_osrelease(td, osrelease); 742 743 bzero(&utsname, sizeof(utsname)); 744 strlcpy(utsname.sysname, osname, LINUX_MAX_UTSNAME); 745 getcredhostname(td->td_ucred, utsname.nodename, LINUX_MAX_UTSNAME); 746 getcreddomainname(td->td_ucred, utsname.domainname, LINUX_MAX_UTSNAME); 747 strlcpy(utsname.release, osrelease, LINUX_MAX_UTSNAME); 748 strlcpy(utsname.version, version, LINUX_MAX_UTSNAME); 749 for (p = utsname.version; *p != '\0'; ++p) 750 if (*p == '\n') { 751 *p = '\0'; 752 break; 753 } 754 strlcpy(utsname.machine, linux_kplatform, LINUX_MAX_UTSNAME); 755 756 return (copyout(&utsname, args->buf, sizeof(utsname))); 757 } 758 759 struct l_utimbuf { 760 l_time_t l_actime; 761 l_time_t l_modtime; 762 }; 763 764 int 765 linux_utime(struct thread *td, struct linux_utime_args *args) 766 { 767 struct timeval tv[2], *tvp; 768 struct l_utimbuf lut; 769 char *fname; 770 int error; 771 772 LCONVPATHEXIST(td, args->fname, &fname); 773 774 #ifdef DEBUG 775 if (ldebug(utime)) 776 printf(ARGS(utime, "%s, *"), fname); 777 #endif 778 779 if (args->times) { 780 if ((error = copyin(args->times, &lut, sizeof lut))) { 781 LFREEPATH(fname); 782 return (error); 783 } 784 tv[0].tv_sec = lut.l_actime; 785 tv[0].tv_usec = 0; 786 tv[1].tv_sec = lut.l_modtime; 787 tv[1].tv_usec = 0; 788 tvp = tv; 789 } else 790 tvp = NULL; 791 792 error = kern_utimesat(td, AT_FDCWD, fname, UIO_SYSSPACE, tvp, 793 UIO_SYSSPACE); 794 LFREEPATH(fname); 795 return (error); 796 } 797 798 int 799 linux_utimes(struct thread *td, struct linux_utimes_args *args) 800 { 801 l_timeval ltv[2]; 802 struct timeval tv[2], *tvp = NULL; 803 char *fname; 804 int error; 805 806 LCONVPATHEXIST(td, args->fname, &fname); 807 808 #ifdef DEBUG 809 if (ldebug(utimes)) 810 printf(ARGS(utimes, "%s, *"), fname); 811 #endif 812 813 if (args->tptr != NULL) { 814 if ((error = copyin(args->tptr, ltv, sizeof ltv))) { 815 LFREEPATH(fname); 816 return (error); 817 } 818 tv[0].tv_sec = ltv[0].tv_sec; 819 tv[0].tv_usec = ltv[0].tv_usec; 820 tv[1].tv_sec = ltv[1].tv_sec; 821 tv[1].tv_usec = ltv[1].tv_usec; 822 tvp = tv; 823 } 824 825 error = kern_utimesat(td, AT_FDCWD, fname, UIO_SYSSPACE, 826 tvp, UIO_SYSSPACE); 827 LFREEPATH(fname); 828 return (error); 829 } 830 831 static int 832 linux_utimensat_nsec_valid(l_long nsec) 833 { 834 835 if (nsec == LINUX_UTIME_OMIT || nsec == LINUX_UTIME_NOW) 836 return (0); 837 if (nsec >= 0 && nsec <= 999999999) 838 return (0); 839 return (1); 840 } 841 842 int 843 linux_utimensat(struct thread *td, struct linux_utimensat_args *args) 844 { 845 struct l_timespec l_times[2]; 846 struct timespec times[2], *timesp = NULL; 847 char *path = NULL; 848 int error, dfd, flags = 0; 849 850 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; 851 852 #ifdef DEBUG 853 if (ldebug(utimensat)) 854 printf(ARGS(utimensat, "%d, *"), dfd); 855 #endif 856 857 if (args->flags & ~LINUX_AT_SYMLINK_NOFOLLOW) 858 return (EINVAL); 859 860 if (args->times != NULL) { 861 error = copyin(args->times, l_times, sizeof(l_times)); 862 if (error != 0) 863 return (error); 864 865 if (linux_utimensat_nsec_valid(l_times[0].tv_nsec) != 0 || 866 linux_utimensat_nsec_valid(l_times[1].tv_nsec) != 0) 867 return (EINVAL); 868 869 times[0].tv_sec = l_times[0].tv_sec; 870 switch (l_times[0].tv_nsec) 871 { 872 case LINUX_UTIME_OMIT: 873 times[0].tv_nsec = UTIME_OMIT; 874 break; 875 case LINUX_UTIME_NOW: 876 times[0].tv_nsec = UTIME_NOW; 877 break; 878 default: 879 times[0].tv_nsec = l_times[0].tv_nsec; 880 } 881 882 times[1].tv_sec = l_times[1].tv_sec; 883 switch (l_times[1].tv_nsec) 884 { 885 case LINUX_UTIME_OMIT: 886 times[1].tv_nsec = UTIME_OMIT; 887 break; 888 case LINUX_UTIME_NOW: 889 times[1].tv_nsec = UTIME_NOW; 890 break; 891 default: 892 times[1].tv_nsec = l_times[1].tv_nsec; 893 break; 894 } 895 timesp = times; 896 897 /* This breaks POSIX, but is what the Linux kernel does 898 * _on purpose_ (documented in the man page for utimensat(2)), 899 * so we must follow that behaviour. */ 900 if (times[0].tv_nsec == UTIME_OMIT && 901 times[1].tv_nsec == UTIME_OMIT) 902 return (0); 903 } 904 905 if (args->pathname != NULL) 906 LCONVPATHEXIST_AT(td, args->pathname, &path, dfd); 907 else if (args->flags != 0) 908 return (EINVAL); 909 910 if (args->flags & LINUX_AT_SYMLINK_NOFOLLOW) 911 flags |= AT_SYMLINK_NOFOLLOW; 912 913 if (path == NULL) 914 error = kern_futimens(td, dfd, timesp, UIO_SYSSPACE); 915 else { 916 error = kern_utimensat(td, dfd, path, UIO_SYSSPACE, timesp, 917 UIO_SYSSPACE, flags); 918 LFREEPATH(path); 919 } 920 921 return (error); 922 } 923 924 int 925 linux_futimesat(struct thread *td, struct linux_futimesat_args *args) 926 { 927 l_timeval ltv[2]; 928 struct timeval tv[2], *tvp = NULL; 929 char *fname; 930 int error, dfd; 931 932 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; 933 LCONVPATHEXIST_AT(td, args->filename, &fname, dfd); 934 935 #ifdef DEBUG 936 if (ldebug(futimesat)) 937 printf(ARGS(futimesat, "%s, *"), fname); 938 #endif 939 940 if (args->utimes != NULL) { 941 if ((error = copyin(args->utimes, ltv, sizeof ltv))) { 942 LFREEPATH(fname); 943 return (error); 944 } 945 tv[0].tv_sec = ltv[0].tv_sec; 946 tv[0].tv_usec = ltv[0].tv_usec; 947 tv[1].tv_sec = ltv[1].tv_sec; 948 tv[1].tv_usec = ltv[1].tv_usec; 949 tvp = tv; 950 } 951 952 error = kern_utimesat(td, dfd, fname, UIO_SYSSPACE, tvp, UIO_SYSSPACE); 953 LFREEPATH(fname); 954 return (error); 955 } 956 957 int 958 linux_common_wait(struct thread *td, int pid, int *status, 959 int options, struct rusage *ru) 960 { 961 int error, tmpstat; 962 963 error = kern_wait(td, pid, &tmpstat, options, ru); 964 if (error) 965 return (error); 966 967 if (status) { 968 tmpstat &= 0xffff; 969 if (WIFSIGNALED(tmpstat)) 970 tmpstat = (tmpstat & 0xffffff80) | 971 bsd_to_linux_signal(WTERMSIG(tmpstat)); 972 else if (WIFSTOPPED(tmpstat)) 973 tmpstat = (tmpstat & 0xffff00ff) | 974 (bsd_to_linux_signal(WSTOPSIG(tmpstat)) << 8); 975 else if (WIFCONTINUED(tmpstat)) 976 tmpstat = 0xffff; 977 error = copyout(&tmpstat, status, sizeof(int)); 978 } 979 980 return (error); 981 } 982 983 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 984 int 985 linux_waitpid(struct thread *td, struct linux_waitpid_args *args) 986 { 987 struct linux_wait4_args wait4_args; 988 989 #ifdef DEBUG 990 if (ldebug(waitpid)) 991 printf(ARGS(waitpid, "%d, %p, %d"), 992 args->pid, (void *)args->status, args->options); 993 #endif 994 995 wait4_args.pid = args->pid; 996 wait4_args.status = args->status; 997 wait4_args.options = args->options; 998 wait4_args.rusage = NULL; 999 1000 return (linux_wait4(td, &wait4_args)); 1001 } 1002 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 1003 1004 int 1005 linux_wait4(struct thread *td, struct linux_wait4_args *args) 1006 { 1007 int error, options; 1008 struct rusage ru, *rup; 1009 1010 #ifdef DEBUG 1011 if (ldebug(wait4)) 1012 printf(ARGS(wait4, "%d, %p, %d, %p"), 1013 args->pid, (void *)args->status, args->options, 1014 (void *)args->rusage); 1015 #endif 1016 if (args->options & ~(LINUX_WUNTRACED | LINUX_WNOHANG | 1017 LINUX_WCONTINUED | __WCLONE | __WNOTHREAD | __WALL)) 1018 return (EINVAL); 1019 1020 options = WEXITED; 1021 linux_to_bsd_waitopts(args->options, &options); 1022 1023 if (args->rusage != NULL) 1024 rup = &ru; 1025 else 1026 rup = NULL; 1027 error = linux_common_wait(td, args->pid, args->status, options, rup); 1028 if (error != 0) 1029 return (error); 1030 if (args->rusage != NULL) 1031 error = linux_copyout_rusage(&ru, args->rusage); 1032 return (error); 1033 } 1034 1035 int 1036 linux_waitid(struct thread *td, struct linux_waitid_args *args) 1037 { 1038 int status, options, sig; 1039 struct __wrusage wru; 1040 siginfo_t siginfo; 1041 l_siginfo_t lsi; 1042 idtype_t idtype; 1043 struct proc *p; 1044 int error; 1045 1046 options = 0; 1047 linux_to_bsd_waitopts(args->options, &options); 1048 1049 if (options & ~(WNOHANG | WNOWAIT | WEXITED | WUNTRACED | WCONTINUED)) 1050 return (EINVAL); 1051 if (!(options & (WEXITED | WUNTRACED | WCONTINUED))) 1052 return (EINVAL); 1053 1054 switch (args->idtype) { 1055 case LINUX_P_ALL: 1056 idtype = P_ALL; 1057 break; 1058 case LINUX_P_PID: 1059 if (args->id <= 0) 1060 return (EINVAL); 1061 idtype = P_PID; 1062 break; 1063 case LINUX_P_PGID: 1064 if (args->id <= 0) 1065 return (EINVAL); 1066 idtype = P_PGID; 1067 break; 1068 default: 1069 return (EINVAL); 1070 } 1071 1072 error = kern_wait6(td, idtype, args->id, &status, options, 1073 &wru, &siginfo); 1074 if (error != 0) 1075 return (error); 1076 if (args->rusage != NULL) { 1077 error = linux_copyout_rusage(&wru.wru_children, 1078 args->rusage); 1079 if (error != 0) 1080 return (error); 1081 } 1082 if (args->info != NULL) { 1083 p = td->td_proc; 1084 if (td->td_retval[0] == 0) 1085 bzero(&lsi, sizeof(lsi)); 1086 else { 1087 sig = bsd_to_linux_signal(siginfo.si_signo); 1088 siginfo_to_lsiginfo(&siginfo, &lsi, sig); 1089 } 1090 error = copyout(&lsi, args->info, sizeof(lsi)); 1091 } 1092 td->td_retval[0] = 0; 1093 1094 return (error); 1095 } 1096 1097 int 1098 linux_mknod(struct thread *td, struct linux_mknod_args *args) 1099 { 1100 char *path; 1101 int error; 1102 1103 LCONVPATHCREAT(td, args->path, &path); 1104 1105 #ifdef DEBUG 1106 if (ldebug(mknod)) 1107 printf(ARGS(mknod, "%s, %d, %ju"), path, args->mode, 1108 (uintmax_t)args->dev); 1109 #endif 1110 1111 switch (args->mode & S_IFMT) { 1112 case S_IFIFO: 1113 case S_IFSOCK: 1114 error = kern_mkfifoat(td, AT_FDCWD, path, UIO_SYSSPACE, 1115 args->mode); 1116 break; 1117 1118 case S_IFCHR: 1119 case S_IFBLK: 1120 error = kern_mknodat(td, AT_FDCWD, path, UIO_SYSSPACE, 1121 args->mode, args->dev); 1122 break; 1123 1124 case S_IFDIR: 1125 error = EPERM; 1126 break; 1127 1128 case 0: 1129 args->mode |= S_IFREG; 1130 /* FALLTHROUGH */ 1131 case S_IFREG: 1132 error = kern_openat(td, AT_FDCWD, path, UIO_SYSSPACE, 1133 O_WRONLY | O_CREAT | O_TRUNC, args->mode); 1134 if (error == 0) 1135 kern_close(td, td->td_retval[0]); 1136 break; 1137 1138 default: 1139 error = EINVAL; 1140 break; 1141 } 1142 LFREEPATH(path); 1143 return (error); 1144 } 1145 1146 int 1147 linux_mknodat(struct thread *td, struct linux_mknodat_args *args) 1148 { 1149 char *path; 1150 int error, dfd; 1151 1152 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; 1153 LCONVPATHCREAT_AT(td, args->filename, &path, dfd); 1154 1155 #ifdef DEBUG 1156 if (ldebug(mknodat)) 1157 printf(ARGS(mknodat, "%s, %d, %d"), path, args->mode, args->dev); 1158 #endif 1159 1160 switch (args->mode & S_IFMT) { 1161 case S_IFIFO: 1162 case S_IFSOCK: 1163 error = kern_mkfifoat(td, dfd, path, UIO_SYSSPACE, args->mode); 1164 break; 1165 1166 case S_IFCHR: 1167 case S_IFBLK: 1168 error = kern_mknodat(td, dfd, path, UIO_SYSSPACE, args->mode, 1169 args->dev); 1170 break; 1171 1172 case S_IFDIR: 1173 error = EPERM; 1174 break; 1175 1176 case 0: 1177 args->mode |= S_IFREG; 1178 /* FALLTHROUGH */ 1179 case S_IFREG: 1180 error = kern_openat(td, dfd, path, UIO_SYSSPACE, 1181 O_WRONLY | O_CREAT | O_TRUNC, args->mode); 1182 if (error == 0) 1183 kern_close(td, td->td_retval[0]); 1184 break; 1185 1186 default: 1187 error = EINVAL; 1188 break; 1189 } 1190 LFREEPATH(path); 1191 return (error); 1192 } 1193 1194 /* 1195 * UGH! This is just about the dumbest idea I've ever heard!! 1196 */ 1197 int 1198 linux_personality(struct thread *td, struct linux_personality_args *args) 1199 { 1200 struct linux_pemuldata *pem; 1201 struct proc *p = td->td_proc; 1202 uint32_t old; 1203 1204 #ifdef DEBUG 1205 if (ldebug(personality)) 1206 printf(ARGS(personality, "%u"), args->per); 1207 #endif 1208 1209 PROC_LOCK(p); 1210 pem = pem_find(p); 1211 old = pem->persona; 1212 if (args->per != 0xffffffff) 1213 pem->persona = args->per; 1214 PROC_UNLOCK(p); 1215 1216 td->td_retval[0] = old; 1217 return (0); 1218 } 1219 1220 struct l_itimerval { 1221 l_timeval it_interval; 1222 l_timeval it_value; 1223 }; 1224 1225 #define B2L_ITIMERVAL(bip, lip) \ 1226 (bip)->it_interval.tv_sec = (lip)->it_interval.tv_sec; \ 1227 (bip)->it_interval.tv_usec = (lip)->it_interval.tv_usec; \ 1228 (bip)->it_value.tv_sec = (lip)->it_value.tv_sec; \ 1229 (bip)->it_value.tv_usec = (lip)->it_value.tv_usec; 1230 1231 int 1232 linux_setitimer(struct thread *td, struct linux_setitimer_args *uap) 1233 { 1234 int error; 1235 struct l_itimerval ls; 1236 struct itimerval aitv, oitv; 1237 1238 #ifdef DEBUG 1239 if (ldebug(setitimer)) 1240 printf(ARGS(setitimer, "%p, %p"), 1241 (void *)uap->itv, (void *)uap->oitv); 1242 #endif 1243 1244 if (uap->itv == NULL) { 1245 uap->itv = uap->oitv; 1246 return (linux_getitimer(td, (struct linux_getitimer_args *)uap)); 1247 } 1248 1249 error = copyin(uap->itv, &ls, sizeof(ls)); 1250 if (error != 0) 1251 return (error); 1252 B2L_ITIMERVAL(&aitv, &ls); 1253 #ifdef DEBUG 1254 if (ldebug(setitimer)) { 1255 printf("setitimer: value: sec: %jd, usec: %ld\n", 1256 (intmax_t)aitv.it_value.tv_sec, aitv.it_value.tv_usec); 1257 printf("setitimer: interval: sec: %jd, usec: %ld\n", 1258 (intmax_t)aitv.it_interval.tv_sec, aitv.it_interval.tv_usec); 1259 } 1260 #endif 1261 error = kern_setitimer(td, uap->which, &aitv, &oitv); 1262 if (error != 0 || uap->oitv == NULL) 1263 return (error); 1264 B2L_ITIMERVAL(&ls, &oitv); 1265 1266 return (copyout(&ls, uap->oitv, sizeof(ls))); 1267 } 1268 1269 int 1270 linux_getitimer(struct thread *td, struct linux_getitimer_args *uap) 1271 { 1272 int error; 1273 struct l_itimerval ls; 1274 struct itimerval aitv; 1275 1276 #ifdef DEBUG 1277 if (ldebug(getitimer)) 1278 printf(ARGS(getitimer, "%p"), (void *)uap->itv); 1279 #endif 1280 error = kern_getitimer(td, uap->which, &aitv); 1281 if (error != 0) 1282 return (error); 1283 B2L_ITIMERVAL(&ls, &aitv); 1284 return (copyout(&ls, uap->itv, sizeof(ls))); 1285 } 1286 1287 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 1288 int 1289 linux_nice(struct thread *td, struct linux_nice_args *args) 1290 { 1291 struct setpriority_args bsd_args; 1292 1293 bsd_args.which = PRIO_PROCESS; 1294 bsd_args.who = 0; /* current process */ 1295 bsd_args.prio = args->inc; 1296 return (sys_setpriority(td, &bsd_args)); 1297 } 1298 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 1299 1300 int 1301 linux_setgroups(struct thread *td, struct linux_setgroups_args *args) 1302 { 1303 struct ucred *newcred, *oldcred; 1304 l_gid_t *linux_gidset; 1305 gid_t *bsd_gidset; 1306 int ngrp, error; 1307 struct proc *p; 1308 1309 ngrp = args->gidsetsize; 1310 if (ngrp < 0 || ngrp >= ngroups_max + 1) 1311 return (EINVAL); 1312 linux_gidset = malloc(ngrp * sizeof(*linux_gidset), M_LINUX, M_WAITOK); 1313 error = copyin(args->grouplist, linux_gidset, ngrp * sizeof(l_gid_t)); 1314 if (error) 1315 goto out; 1316 newcred = crget(); 1317 crextend(newcred, ngrp + 1); 1318 p = td->td_proc; 1319 PROC_LOCK(p); 1320 oldcred = p->p_ucred; 1321 crcopy(newcred, oldcred); 1322 1323 /* 1324 * cr_groups[0] holds egid. Setting the whole set from 1325 * the supplied set will cause egid to be changed too. 1326 * Keep cr_groups[0] unchanged to prevent that. 1327 */ 1328 1329 if ((error = priv_check_cred(oldcred, PRIV_CRED_SETGROUPS, 0)) != 0) { 1330 PROC_UNLOCK(p); 1331 crfree(newcred); 1332 goto out; 1333 } 1334 1335 if (ngrp > 0) { 1336 newcred->cr_ngroups = ngrp + 1; 1337 1338 bsd_gidset = newcred->cr_groups; 1339 ngrp--; 1340 while (ngrp >= 0) { 1341 bsd_gidset[ngrp + 1] = linux_gidset[ngrp]; 1342 ngrp--; 1343 } 1344 } else 1345 newcred->cr_ngroups = 1; 1346 1347 setsugid(p); 1348 proc_set_cred(p, newcred); 1349 PROC_UNLOCK(p); 1350 crfree(oldcred); 1351 error = 0; 1352 out: 1353 free(linux_gidset, M_LINUX); 1354 return (error); 1355 } 1356 1357 int 1358 linux_getgroups(struct thread *td, struct linux_getgroups_args *args) 1359 { 1360 struct ucred *cred; 1361 l_gid_t *linux_gidset; 1362 gid_t *bsd_gidset; 1363 int bsd_gidsetsz, ngrp, error; 1364 1365 cred = td->td_ucred; 1366 bsd_gidset = cred->cr_groups; 1367 bsd_gidsetsz = cred->cr_ngroups - 1; 1368 1369 /* 1370 * cr_groups[0] holds egid. Returning the whole set 1371 * here will cause a duplicate. Exclude cr_groups[0] 1372 * to prevent that. 1373 */ 1374 1375 if ((ngrp = args->gidsetsize) == 0) { 1376 td->td_retval[0] = bsd_gidsetsz; 1377 return (0); 1378 } 1379 1380 if (ngrp < bsd_gidsetsz) 1381 return (EINVAL); 1382 1383 ngrp = 0; 1384 linux_gidset = malloc(bsd_gidsetsz * sizeof(*linux_gidset), 1385 M_LINUX, M_WAITOK); 1386 while (ngrp < bsd_gidsetsz) { 1387 linux_gidset[ngrp] = bsd_gidset[ngrp + 1]; 1388 ngrp++; 1389 } 1390 1391 error = copyout(linux_gidset, args->grouplist, ngrp * sizeof(l_gid_t)); 1392 free(linux_gidset, M_LINUX); 1393 if (error) 1394 return (error); 1395 1396 td->td_retval[0] = ngrp; 1397 return (0); 1398 } 1399 1400 int 1401 linux_setrlimit(struct thread *td, struct linux_setrlimit_args *args) 1402 { 1403 struct rlimit bsd_rlim; 1404 struct l_rlimit rlim; 1405 u_int which; 1406 int error; 1407 1408 #ifdef DEBUG 1409 if (ldebug(setrlimit)) 1410 printf(ARGS(setrlimit, "%d, %p"), 1411 args->resource, (void *)args->rlim); 1412 #endif 1413 1414 if (args->resource >= LINUX_RLIM_NLIMITS) 1415 return (EINVAL); 1416 1417 which = linux_to_bsd_resource[args->resource]; 1418 if (which == -1) 1419 return (EINVAL); 1420 1421 error = copyin(args->rlim, &rlim, sizeof(rlim)); 1422 if (error) 1423 return (error); 1424 1425 bsd_rlim.rlim_cur = (rlim_t)rlim.rlim_cur; 1426 bsd_rlim.rlim_max = (rlim_t)rlim.rlim_max; 1427 return (kern_setrlimit(td, which, &bsd_rlim)); 1428 } 1429 1430 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 1431 int 1432 linux_old_getrlimit(struct thread *td, struct linux_old_getrlimit_args *args) 1433 { 1434 struct l_rlimit rlim; 1435 struct rlimit bsd_rlim; 1436 u_int which; 1437 1438 #ifdef DEBUG 1439 if (ldebug(old_getrlimit)) 1440 printf(ARGS(old_getrlimit, "%d, %p"), 1441 args->resource, (void *)args->rlim); 1442 #endif 1443 1444 if (args->resource >= LINUX_RLIM_NLIMITS) 1445 return (EINVAL); 1446 1447 which = linux_to_bsd_resource[args->resource]; 1448 if (which == -1) 1449 return (EINVAL); 1450 1451 lim_rlimit(td, which, &bsd_rlim); 1452 1453 #ifdef COMPAT_LINUX32 1454 rlim.rlim_cur = (unsigned int)bsd_rlim.rlim_cur; 1455 if (rlim.rlim_cur == UINT_MAX) 1456 rlim.rlim_cur = INT_MAX; 1457 rlim.rlim_max = (unsigned int)bsd_rlim.rlim_max; 1458 if (rlim.rlim_max == UINT_MAX) 1459 rlim.rlim_max = INT_MAX; 1460 #else 1461 rlim.rlim_cur = (unsigned long)bsd_rlim.rlim_cur; 1462 if (rlim.rlim_cur == ULONG_MAX) 1463 rlim.rlim_cur = LONG_MAX; 1464 rlim.rlim_max = (unsigned long)bsd_rlim.rlim_max; 1465 if (rlim.rlim_max == ULONG_MAX) 1466 rlim.rlim_max = LONG_MAX; 1467 #endif 1468 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1469 } 1470 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 1471 1472 int 1473 linux_getrlimit(struct thread *td, struct linux_getrlimit_args *args) 1474 { 1475 struct l_rlimit rlim; 1476 struct rlimit bsd_rlim; 1477 u_int which; 1478 1479 #ifdef DEBUG 1480 if (ldebug(getrlimit)) 1481 printf(ARGS(getrlimit, "%d, %p"), 1482 args->resource, (void *)args->rlim); 1483 #endif 1484 1485 if (args->resource >= LINUX_RLIM_NLIMITS) 1486 return (EINVAL); 1487 1488 which = linux_to_bsd_resource[args->resource]; 1489 if (which == -1) 1490 return (EINVAL); 1491 1492 lim_rlimit(td, which, &bsd_rlim); 1493 1494 rlim.rlim_cur = (l_ulong)bsd_rlim.rlim_cur; 1495 rlim.rlim_max = (l_ulong)bsd_rlim.rlim_max; 1496 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1497 } 1498 1499 int 1500 linux_sched_setscheduler(struct thread *td, 1501 struct linux_sched_setscheduler_args *args) 1502 { 1503 struct sched_param sched_param; 1504 struct thread *tdt; 1505 int error, policy; 1506 1507 #ifdef DEBUG 1508 if (ldebug(sched_setscheduler)) 1509 printf(ARGS(sched_setscheduler, "%d, %d, %p"), 1510 args->pid, args->policy, (const void *)args->param); 1511 #endif 1512 1513 switch (args->policy) { 1514 case LINUX_SCHED_OTHER: 1515 policy = SCHED_OTHER; 1516 break; 1517 case LINUX_SCHED_FIFO: 1518 policy = SCHED_FIFO; 1519 break; 1520 case LINUX_SCHED_RR: 1521 policy = SCHED_RR; 1522 break; 1523 default: 1524 return (EINVAL); 1525 } 1526 1527 error = copyin(args->param, &sched_param, sizeof(sched_param)); 1528 if (error) 1529 return (error); 1530 1531 tdt = linux_tdfind(td, args->pid, -1); 1532 if (tdt == NULL) 1533 return (ESRCH); 1534 1535 error = kern_sched_setscheduler(td, tdt, policy, &sched_param); 1536 PROC_UNLOCK(tdt->td_proc); 1537 return (error); 1538 } 1539 1540 int 1541 linux_sched_getscheduler(struct thread *td, 1542 struct linux_sched_getscheduler_args *args) 1543 { 1544 struct thread *tdt; 1545 int error, policy; 1546 1547 #ifdef DEBUG 1548 if (ldebug(sched_getscheduler)) 1549 printf(ARGS(sched_getscheduler, "%d"), args->pid); 1550 #endif 1551 1552 tdt = linux_tdfind(td, args->pid, -1); 1553 if (tdt == NULL) 1554 return (ESRCH); 1555 1556 error = kern_sched_getscheduler(td, tdt, &policy); 1557 PROC_UNLOCK(tdt->td_proc); 1558 1559 switch (policy) { 1560 case SCHED_OTHER: 1561 td->td_retval[0] = LINUX_SCHED_OTHER; 1562 break; 1563 case SCHED_FIFO: 1564 td->td_retval[0] = LINUX_SCHED_FIFO; 1565 break; 1566 case SCHED_RR: 1567 td->td_retval[0] = LINUX_SCHED_RR; 1568 break; 1569 } 1570 return (error); 1571 } 1572 1573 int 1574 linux_sched_get_priority_max(struct thread *td, 1575 struct linux_sched_get_priority_max_args *args) 1576 { 1577 struct sched_get_priority_max_args bsd; 1578 1579 #ifdef DEBUG 1580 if (ldebug(sched_get_priority_max)) 1581 printf(ARGS(sched_get_priority_max, "%d"), args->policy); 1582 #endif 1583 1584 switch (args->policy) { 1585 case LINUX_SCHED_OTHER: 1586 bsd.policy = SCHED_OTHER; 1587 break; 1588 case LINUX_SCHED_FIFO: 1589 bsd.policy = SCHED_FIFO; 1590 break; 1591 case LINUX_SCHED_RR: 1592 bsd.policy = SCHED_RR; 1593 break; 1594 default: 1595 return (EINVAL); 1596 } 1597 return (sys_sched_get_priority_max(td, &bsd)); 1598 } 1599 1600 int 1601 linux_sched_get_priority_min(struct thread *td, 1602 struct linux_sched_get_priority_min_args *args) 1603 { 1604 struct sched_get_priority_min_args bsd; 1605 1606 #ifdef DEBUG 1607 if (ldebug(sched_get_priority_min)) 1608 printf(ARGS(sched_get_priority_min, "%d"), args->policy); 1609 #endif 1610 1611 switch (args->policy) { 1612 case LINUX_SCHED_OTHER: 1613 bsd.policy = SCHED_OTHER; 1614 break; 1615 case LINUX_SCHED_FIFO: 1616 bsd.policy = SCHED_FIFO; 1617 break; 1618 case LINUX_SCHED_RR: 1619 bsd.policy = SCHED_RR; 1620 break; 1621 default: 1622 return (EINVAL); 1623 } 1624 return (sys_sched_get_priority_min(td, &bsd)); 1625 } 1626 1627 #define REBOOT_CAD_ON 0x89abcdef 1628 #define REBOOT_CAD_OFF 0 1629 #define REBOOT_HALT 0xcdef0123 1630 #define REBOOT_RESTART 0x01234567 1631 #define REBOOT_RESTART2 0xA1B2C3D4 1632 #define REBOOT_POWEROFF 0x4321FEDC 1633 #define REBOOT_MAGIC1 0xfee1dead 1634 #define REBOOT_MAGIC2 0x28121969 1635 #define REBOOT_MAGIC2A 0x05121996 1636 #define REBOOT_MAGIC2B 0x16041998 1637 1638 int 1639 linux_reboot(struct thread *td, struct linux_reboot_args *args) 1640 { 1641 struct reboot_args bsd_args; 1642 1643 #ifdef DEBUG 1644 if (ldebug(reboot)) 1645 printf(ARGS(reboot, "0x%x"), args->cmd); 1646 #endif 1647 1648 if (args->magic1 != REBOOT_MAGIC1) 1649 return (EINVAL); 1650 1651 switch (args->magic2) { 1652 case REBOOT_MAGIC2: 1653 case REBOOT_MAGIC2A: 1654 case REBOOT_MAGIC2B: 1655 break; 1656 default: 1657 return (EINVAL); 1658 } 1659 1660 switch (args->cmd) { 1661 case REBOOT_CAD_ON: 1662 case REBOOT_CAD_OFF: 1663 return (priv_check(td, PRIV_REBOOT)); 1664 case REBOOT_HALT: 1665 bsd_args.opt = RB_HALT; 1666 break; 1667 case REBOOT_RESTART: 1668 case REBOOT_RESTART2: 1669 bsd_args.opt = 0; 1670 break; 1671 case REBOOT_POWEROFF: 1672 bsd_args.opt = RB_POWEROFF; 1673 break; 1674 default: 1675 return (EINVAL); 1676 } 1677 return (sys_reboot(td, &bsd_args)); 1678 } 1679 1680 1681 /* 1682 * The FreeBSD native getpid(2), getgid(2) and getuid(2) also modify 1683 * td->td_retval[1] when COMPAT_43 is defined. This clobbers registers that 1684 * are assumed to be preserved. The following lightweight syscalls fixes 1685 * this. See also linux_getgid16() and linux_getuid16() in linux_uid16.c 1686 * 1687 * linux_getpid() - MP SAFE 1688 * linux_getgid() - MP SAFE 1689 * linux_getuid() - MP SAFE 1690 */ 1691 1692 int 1693 linux_getpid(struct thread *td, struct linux_getpid_args *args) 1694 { 1695 1696 #ifdef DEBUG 1697 if (ldebug(getpid)) 1698 printf(ARGS(getpid, "")); 1699 #endif 1700 td->td_retval[0] = td->td_proc->p_pid; 1701 1702 return (0); 1703 } 1704 1705 int 1706 linux_gettid(struct thread *td, struct linux_gettid_args *args) 1707 { 1708 struct linux_emuldata *em; 1709 1710 #ifdef DEBUG 1711 if (ldebug(gettid)) 1712 printf(ARGS(gettid, "")); 1713 #endif 1714 1715 em = em_find(td); 1716 KASSERT(em != NULL, ("gettid: emuldata not found.\n")); 1717 1718 td->td_retval[0] = em->em_tid; 1719 1720 return (0); 1721 } 1722 1723 1724 int 1725 linux_getppid(struct thread *td, struct linux_getppid_args *args) 1726 { 1727 1728 #ifdef DEBUG 1729 if (ldebug(getppid)) 1730 printf(ARGS(getppid, "")); 1731 #endif 1732 1733 td->td_retval[0] = kern_getppid(td); 1734 return (0); 1735 } 1736 1737 int 1738 linux_getgid(struct thread *td, struct linux_getgid_args *args) 1739 { 1740 1741 #ifdef DEBUG 1742 if (ldebug(getgid)) 1743 printf(ARGS(getgid, "")); 1744 #endif 1745 1746 td->td_retval[0] = td->td_ucred->cr_rgid; 1747 return (0); 1748 } 1749 1750 int 1751 linux_getuid(struct thread *td, struct linux_getuid_args *args) 1752 { 1753 1754 #ifdef DEBUG 1755 if (ldebug(getuid)) 1756 printf(ARGS(getuid, "")); 1757 #endif 1758 1759 td->td_retval[0] = td->td_ucred->cr_ruid; 1760 return (0); 1761 } 1762 1763 1764 int 1765 linux_getsid(struct thread *td, struct linux_getsid_args *args) 1766 { 1767 struct getsid_args bsd; 1768 1769 #ifdef DEBUG 1770 if (ldebug(getsid)) 1771 printf(ARGS(getsid, "%i"), args->pid); 1772 #endif 1773 1774 bsd.pid = args->pid; 1775 return (sys_getsid(td, &bsd)); 1776 } 1777 1778 int 1779 linux_nosys(struct thread *td, struct nosys_args *ignore) 1780 { 1781 1782 return (ENOSYS); 1783 } 1784 1785 int 1786 linux_getpriority(struct thread *td, struct linux_getpriority_args *args) 1787 { 1788 struct getpriority_args bsd_args; 1789 int error; 1790 1791 #ifdef DEBUG 1792 if (ldebug(getpriority)) 1793 printf(ARGS(getpriority, "%i, %i"), args->which, args->who); 1794 #endif 1795 1796 bsd_args.which = args->which; 1797 bsd_args.who = args->who; 1798 error = sys_getpriority(td, &bsd_args); 1799 td->td_retval[0] = 20 - td->td_retval[0]; 1800 return (error); 1801 } 1802 1803 int 1804 linux_sethostname(struct thread *td, struct linux_sethostname_args *args) 1805 { 1806 int name[2]; 1807 1808 #ifdef DEBUG 1809 if (ldebug(sethostname)) 1810 printf(ARGS(sethostname, "*, %i"), args->len); 1811 #endif 1812 1813 name[0] = CTL_KERN; 1814 name[1] = KERN_HOSTNAME; 1815 return (userland_sysctl(td, name, 2, 0, 0, 0, args->hostname, 1816 args->len, 0, 0)); 1817 } 1818 1819 int 1820 linux_setdomainname(struct thread *td, struct linux_setdomainname_args *args) 1821 { 1822 int name[2]; 1823 1824 #ifdef DEBUG 1825 if (ldebug(setdomainname)) 1826 printf(ARGS(setdomainname, "*, %i"), args->len); 1827 #endif 1828 1829 name[0] = CTL_KERN; 1830 name[1] = KERN_NISDOMAINNAME; 1831 return (userland_sysctl(td, name, 2, 0, 0, 0, args->name, 1832 args->len, 0, 0)); 1833 } 1834 1835 int 1836 linux_exit_group(struct thread *td, struct linux_exit_group_args *args) 1837 { 1838 1839 #ifdef DEBUG 1840 if (ldebug(exit_group)) 1841 printf(ARGS(exit_group, "%i"), args->error_code); 1842 #endif 1843 1844 LINUX_CTR2(exit_group, "thread(%d) (%d)", td->td_tid, 1845 args->error_code); 1846 1847 /* 1848 * XXX: we should send a signal to the parent if 1849 * SIGNAL_EXIT_GROUP is set. We ignore that (temporarily?) 1850 * as it doesnt occur often. 1851 */ 1852 exit1(td, args->error_code, 0); 1853 /* NOTREACHED */ 1854 } 1855 1856 #define _LINUX_CAPABILITY_VERSION 0x19980330 1857 1858 struct l_user_cap_header { 1859 l_int version; 1860 l_int pid; 1861 }; 1862 1863 struct l_user_cap_data { 1864 l_int effective; 1865 l_int permitted; 1866 l_int inheritable; 1867 }; 1868 1869 int 1870 linux_capget(struct thread *td, struct linux_capget_args *args) 1871 { 1872 struct l_user_cap_header luch; 1873 struct l_user_cap_data lucd; 1874 int error; 1875 1876 if (args->hdrp == NULL) 1877 return (EFAULT); 1878 1879 error = copyin(args->hdrp, &luch, sizeof(luch)); 1880 if (error != 0) 1881 return (error); 1882 1883 if (luch.version != _LINUX_CAPABILITY_VERSION) { 1884 luch.version = _LINUX_CAPABILITY_VERSION; 1885 error = copyout(&luch, args->hdrp, sizeof(luch)); 1886 if (error) 1887 return (error); 1888 return (EINVAL); 1889 } 1890 1891 if (luch.pid) 1892 return (EPERM); 1893 1894 if (args->datap) { 1895 /* 1896 * The current implementation doesn't support setting 1897 * a capability (it's essentially a stub) so indicate 1898 * that no capabilities are currently set or available 1899 * to request. 1900 */ 1901 bzero (&lucd, sizeof(lucd)); 1902 error = copyout(&lucd, args->datap, sizeof(lucd)); 1903 } 1904 1905 return (error); 1906 } 1907 1908 int 1909 linux_capset(struct thread *td, struct linux_capset_args *args) 1910 { 1911 struct l_user_cap_header luch; 1912 struct l_user_cap_data lucd; 1913 int error; 1914 1915 if (args->hdrp == NULL || args->datap == NULL) 1916 return (EFAULT); 1917 1918 error = copyin(args->hdrp, &luch, sizeof(luch)); 1919 if (error != 0) 1920 return (error); 1921 1922 if (luch.version != _LINUX_CAPABILITY_VERSION) { 1923 luch.version = _LINUX_CAPABILITY_VERSION; 1924 error = copyout(&luch, args->hdrp, sizeof(luch)); 1925 if (error) 1926 return (error); 1927 return (EINVAL); 1928 } 1929 1930 if (luch.pid) 1931 return (EPERM); 1932 1933 error = copyin(args->datap, &lucd, sizeof(lucd)); 1934 if (error != 0) 1935 return (error); 1936 1937 /* We currently don't support setting any capabilities. */ 1938 if (lucd.effective || lucd.permitted || lucd.inheritable) { 1939 linux_msg(td, 1940 "capset effective=0x%x, permitted=0x%x, " 1941 "inheritable=0x%x is not implemented", 1942 (int)lucd.effective, (int)lucd.permitted, 1943 (int)lucd.inheritable); 1944 return (EPERM); 1945 } 1946 1947 return (0); 1948 } 1949 1950 int 1951 linux_prctl(struct thread *td, struct linux_prctl_args *args) 1952 { 1953 int error = 0, max_size; 1954 struct proc *p = td->td_proc; 1955 char comm[LINUX_MAX_COMM_LEN]; 1956 struct linux_emuldata *em; 1957 int pdeath_signal; 1958 1959 #ifdef DEBUG 1960 if (ldebug(prctl)) 1961 printf(ARGS(prctl, "%d, %ju, %ju, %ju, %ju"), args->option, 1962 (uintmax_t)args->arg2, (uintmax_t)args->arg3, 1963 (uintmax_t)args->arg4, (uintmax_t)args->arg5); 1964 #endif 1965 1966 switch (args->option) { 1967 case LINUX_PR_SET_PDEATHSIG: 1968 if (!LINUX_SIG_VALID(args->arg2)) 1969 return (EINVAL); 1970 em = em_find(td); 1971 KASSERT(em != NULL, ("prctl: emuldata not found.\n")); 1972 em->pdeath_signal = args->arg2; 1973 break; 1974 case LINUX_PR_GET_PDEATHSIG: 1975 em = em_find(td); 1976 KASSERT(em != NULL, ("prctl: emuldata not found.\n")); 1977 pdeath_signal = em->pdeath_signal; 1978 error = copyout(&pdeath_signal, 1979 (void *)(register_t)args->arg2, 1980 sizeof(pdeath_signal)); 1981 break; 1982 case LINUX_PR_GET_KEEPCAPS: 1983 /* 1984 * Indicate that we always clear the effective and 1985 * permitted capability sets when the user id becomes 1986 * non-zero (actually the capability sets are simply 1987 * always zero in the current implementation). 1988 */ 1989 td->td_retval[0] = 0; 1990 break; 1991 case LINUX_PR_SET_KEEPCAPS: 1992 /* 1993 * Ignore requests to keep the effective and permitted 1994 * capability sets when the user id becomes non-zero. 1995 */ 1996 break; 1997 case LINUX_PR_SET_NAME: 1998 /* 1999 * To be on the safe side we need to make sure to not 2000 * overflow the size a Linux program expects. We already 2001 * do this here in the copyin, so that we don't need to 2002 * check on copyout. 2003 */ 2004 max_size = MIN(sizeof(comm), sizeof(p->p_comm)); 2005 error = copyinstr((void *)(register_t)args->arg2, comm, 2006 max_size, NULL); 2007 2008 /* Linux silently truncates the name if it is too long. */ 2009 if (error == ENAMETOOLONG) { 2010 /* 2011 * XXX: copyinstr() isn't documented to populate the 2012 * array completely, so do a copyin() to be on the 2013 * safe side. This should be changed in case 2014 * copyinstr() is changed to guarantee this. 2015 */ 2016 error = copyin((void *)(register_t)args->arg2, comm, 2017 max_size - 1); 2018 comm[max_size - 1] = '\0'; 2019 } 2020 if (error) 2021 return (error); 2022 2023 PROC_LOCK(p); 2024 strlcpy(p->p_comm, comm, sizeof(p->p_comm)); 2025 PROC_UNLOCK(p); 2026 break; 2027 case LINUX_PR_GET_NAME: 2028 PROC_LOCK(p); 2029 strlcpy(comm, p->p_comm, sizeof(comm)); 2030 PROC_UNLOCK(p); 2031 error = copyout(comm, (void *)(register_t)args->arg2, 2032 strlen(comm) + 1); 2033 break; 2034 default: 2035 error = EINVAL; 2036 break; 2037 } 2038 2039 return (error); 2040 } 2041 2042 int 2043 linux_sched_setparam(struct thread *td, 2044 struct linux_sched_setparam_args *uap) 2045 { 2046 struct sched_param sched_param; 2047 struct thread *tdt; 2048 int error; 2049 2050 #ifdef DEBUG 2051 if (ldebug(sched_setparam)) 2052 printf(ARGS(sched_setparam, "%d, *"), uap->pid); 2053 #endif 2054 2055 error = copyin(uap->param, &sched_param, sizeof(sched_param)); 2056 if (error) 2057 return (error); 2058 2059 tdt = linux_tdfind(td, uap->pid, -1); 2060 if (tdt == NULL) 2061 return (ESRCH); 2062 2063 error = kern_sched_setparam(td, tdt, &sched_param); 2064 PROC_UNLOCK(tdt->td_proc); 2065 return (error); 2066 } 2067 2068 int 2069 linux_sched_getparam(struct thread *td, 2070 struct linux_sched_getparam_args *uap) 2071 { 2072 struct sched_param sched_param; 2073 struct thread *tdt; 2074 int error; 2075 2076 #ifdef DEBUG 2077 if (ldebug(sched_getparam)) 2078 printf(ARGS(sched_getparam, "%d, *"), uap->pid); 2079 #endif 2080 2081 tdt = linux_tdfind(td, uap->pid, -1); 2082 if (tdt == NULL) 2083 return (ESRCH); 2084 2085 error = kern_sched_getparam(td, tdt, &sched_param); 2086 PROC_UNLOCK(tdt->td_proc); 2087 if (error == 0) 2088 error = copyout(&sched_param, uap->param, 2089 sizeof(sched_param)); 2090 return (error); 2091 } 2092 2093 /* 2094 * Get affinity of a process. 2095 */ 2096 int 2097 linux_sched_getaffinity(struct thread *td, 2098 struct linux_sched_getaffinity_args *args) 2099 { 2100 int error; 2101 struct thread *tdt; 2102 2103 #ifdef DEBUG 2104 if (ldebug(sched_getaffinity)) 2105 printf(ARGS(sched_getaffinity, "%d, %d, *"), args->pid, 2106 args->len); 2107 #endif 2108 if (args->len < sizeof(cpuset_t)) 2109 return (EINVAL); 2110 2111 tdt = linux_tdfind(td, args->pid, -1); 2112 if (tdt == NULL) 2113 return (ESRCH); 2114 2115 PROC_UNLOCK(tdt->td_proc); 2116 2117 error = kern_cpuset_getaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID, 2118 tdt->td_tid, sizeof(cpuset_t), (cpuset_t *)args->user_mask_ptr); 2119 if (error == 0) 2120 td->td_retval[0] = sizeof(cpuset_t); 2121 2122 return (error); 2123 } 2124 2125 /* 2126 * Set affinity of a process. 2127 */ 2128 int 2129 linux_sched_setaffinity(struct thread *td, 2130 struct linux_sched_setaffinity_args *args) 2131 { 2132 struct thread *tdt; 2133 2134 #ifdef DEBUG 2135 if (ldebug(sched_setaffinity)) 2136 printf(ARGS(sched_setaffinity, "%d, %d, *"), args->pid, 2137 args->len); 2138 #endif 2139 if (args->len < sizeof(cpuset_t)) 2140 return (EINVAL); 2141 2142 tdt = linux_tdfind(td, args->pid, -1); 2143 if (tdt == NULL) 2144 return (ESRCH); 2145 2146 PROC_UNLOCK(tdt->td_proc); 2147 2148 return (kern_cpuset_setaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID, 2149 tdt->td_tid, sizeof(cpuset_t), (cpuset_t *) args->user_mask_ptr)); 2150 } 2151 2152 struct linux_rlimit64 { 2153 uint64_t rlim_cur; 2154 uint64_t rlim_max; 2155 }; 2156 2157 int 2158 linux_prlimit64(struct thread *td, struct linux_prlimit64_args *args) 2159 { 2160 struct rlimit rlim, nrlim; 2161 struct linux_rlimit64 lrlim; 2162 struct proc *p; 2163 u_int which; 2164 int flags; 2165 int error; 2166 2167 #ifdef DEBUG 2168 if (ldebug(prlimit64)) 2169 printf(ARGS(prlimit64, "%d, %d, %p, %p"), args->pid, 2170 args->resource, (void *)args->new, (void *)args->old); 2171 #endif 2172 2173 if (args->resource >= LINUX_RLIM_NLIMITS) 2174 return (EINVAL); 2175 2176 which = linux_to_bsd_resource[args->resource]; 2177 if (which == -1) 2178 return (EINVAL); 2179 2180 if (args->new != NULL) { 2181 /* 2182 * Note. Unlike FreeBSD where rlim is signed 64-bit Linux 2183 * rlim is unsigned 64-bit. FreeBSD treats negative limits 2184 * as INFINITY so we do not need a conversion even. 2185 */ 2186 error = copyin(args->new, &nrlim, sizeof(nrlim)); 2187 if (error != 0) 2188 return (error); 2189 } 2190 2191 flags = PGET_HOLD | PGET_NOTWEXIT; 2192 if (args->new != NULL) 2193 flags |= PGET_CANDEBUG; 2194 else 2195 flags |= PGET_CANSEE; 2196 error = pget(args->pid, flags, &p); 2197 if (error != 0) 2198 return (error); 2199 2200 if (args->old != NULL) { 2201 PROC_LOCK(p); 2202 lim_rlimit_proc(p, which, &rlim); 2203 PROC_UNLOCK(p); 2204 if (rlim.rlim_cur == RLIM_INFINITY) 2205 lrlim.rlim_cur = LINUX_RLIM_INFINITY; 2206 else 2207 lrlim.rlim_cur = rlim.rlim_cur; 2208 if (rlim.rlim_max == RLIM_INFINITY) 2209 lrlim.rlim_max = LINUX_RLIM_INFINITY; 2210 else 2211 lrlim.rlim_max = rlim.rlim_max; 2212 error = copyout(&lrlim, args->old, sizeof(lrlim)); 2213 if (error != 0) 2214 goto out; 2215 } 2216 2217 if (args->new != NULL) 2218 error = kern_proc_setrlimit(td, p, which, &nrlim); 2219 2220 out: 2221 PRELE(p); 2222 return (error); 2223 } 2224 2225 int 2226 linux_pselect6(struct thread *td, struct linux_pselect6_args *args) 2227 { 2228 struct timeval utv, tv0, tv1, *tvp; 2229 struct l_pselect6arg lpse6; 2230 struct l_timespec lts; 2231 struct timespec uts; 2232 l_sigset_t l_ss; 2233 sigset_t *ssp; 2234 sigset_t ss; 2235 int error; 2236 2237 ssp = NULL; 2238 if (args->sig != NULL) { 2239 error = copyin(args->sig, &lpse6, sizeof(lpse6)); 2240 if (error != 0) 2241 return (error); 2242 if (lpse6.ss_len != sizeof(l_ss)) 2243 return (EINVAL); 2244 if (lpse6.ss != 0) { 2245 error = copyin(PTRIN(lpse6.ss), &l_ss, 2246 sizeof(l_ss)); 2247 if (error != 0) 2248 return (error); 2249 linux_to_bsd_sigset(&l_ss, &ss); 2250 ssp = &ss; 2251 } 2252 } 2253 2254 /* 2255 * Currently glibc changes nanosecond number to microsecond. 2256 * This mean losing precision but for now it is hardly seen. 2257 */ 2258 if (args->tsp != NULL) { 2259 error = copyin(args->tsp, <s, sizeof(lts)); 2260 if (error != 0) 2261 return (error); 2262 error = linux_to_native_timespec(&uts, <s); 2263 if (error != 0) 2264 return (error); 2265 2266 TIMESPEC_TO_TIMEVAL(&utv, &uts); 2267 if (itimerfix(&utv)) 2268 return (EINVAL); 2269 2270 microtime(&tv0); 2271 tvp = &utv; 2272 } else 2273 tvp = NULL; 2274 2275 error = kern_pselect(td, args->nfds, args->readfds, args->writefds, 2276 args->exceptfds, tvp, ssp, LINUX_NFDBITS); 2277 2278 if (error == 0 && args->tsp != NULL) { 2279 if (td->td_retval[0] != 0) { 2280 /* 2281 * Compute how much time was left of the timeout, 2282 * by subtracting the current time and the time 2283 * before we started the call, and subtracting 2284 * that result from the user-supplied value. 2285 */ 2286 2287 microtime(&tv1); 2288 timevalsub(&tv1, &tv0); 2289 timevalsub(&utv, &tv1); 2290 if (utv.tv_sec < 0) 2291 timevalclear(&utv); 2292 } else 2293 timevalclear(&utv); 2294 2295 TIMEVAL_TO_TIMESPEC(&utv, &uts); 2296 2297 error = native_to_linux_timespec(<s, &uts); 2298 if (error == 0) 2299 error = copyout(<s, args->tsp, sizeof(lts)); 2300 } 2301 2302 return (error); 2303 } 2304 2305 int 2306 linux_ppoll(struct thread *td, struct linux_ppoll_args *args) 2307 { 2308 struct timespec ts0, ts1; 2309 struct l_timespec lts; 2310 struct timespec uts, *tsp; 2311 l_sigset_t l_ss; 2312 sigset_t *ssp; 2313 sigset_t ss; 2314 int error; 2315 2316 if (args->sset != NULL) { 2317 if (args->ssize != sizeof(l_ss)) 2318 return (EINVAL); 2319 error = copyin(args->sset, &l_ss, sizeof(l_ss)); 2320 if (error) 2321 return (error); 2322 linux_to_bsd_sigset(&l_ss, &ss); 2323 ssp = &ss; 2324 } else 2325 ssp = NULL; 2326 if (args->tsp != NULL) { 2327 error = copyin(args->tsp, <s, sizeof(lts)); 2328 if (error) 2329 return (error); 2330 error = linux_to_native_timespec(&uts, <s); 2331 if (error != 0) 2332 return (error); 2333 2334 nanotime(&ts0); 2335 tsp = &uts; 2336 } else 2337 tsp = NULL; 2338 2339 error = kern_poll(td, args->fds, args->nfds, tsp, ssp); 2340 2341 if (error == 0 && args->tsp != NULL) { 2342 if (td->td_retval[0]) { 2343 nanotime(&ts1); 2344 timespecsub(&ts1, &ts0); 2345 timespecsub(&uts, &ts1); 2346 if (uts.tv_sec < 0) 2347 timespecclear(&uts); 2348 } else 2349 timespecclear(&uts); 2350 2351 error = native_to_linux_timespec(<s, &uts); 2352 if (error == 0) 2353 error = copyout(<s, args->tsp, sizeof(lts)); 2354 } 2355 2356 return (error); 2357 } 2358 2359 #if defined(DEBUG) || defined(KTR) 2360 /* XXX: can be removed when every ldebug(...) and KTR stuff are removed. */ 2361 2362 #ifdef COMPAT_LINUX32 2363 #define L_MAXSYSCALL LINUX32_SYS_MAXSYSCALL 2364 #else 2365 #define L_MAXSYSCALL LINUX_SYS_MAXSYSCALL 2366 #endif 2367 2368 u_char linux_debug_map[howmany(L_MAXSYSCALL, sizeof(u_char))]; 2369 2370 static int 2371 linux_debug(int syscall, int toggle, int global) 2372 { 2373 2374 if (global) { 2375 char c = toggle ? 0 : 0xff; 2376 2377 memset(linux_debug_map, c, sizeof(linux_debug_map)); 2378 return (0); 2379 } 2380 if (syscall < 0 || syscall >= L_MAXSYSCALL) 2381 return (EINVAL); 2382 if (toggle) 2383 clrbit(linux_debug_map, syscall); 2384 else 2385 setbit(linux_debug_map, syscall); 2386 return (0); 2387 } 2388 #undef L_MAXSYSCALL 2389 2390 /* 2391 * Usage: sysctl linux.debug=<syscall_nr>.<0/1> 2392 * 2393 * E.g.: sysctl linux.debug=21.0 2394 * 2395 * As a special case, syscall "all" will apply to all syscalls globally. 2396 */ 2397 #define LINUX_MAX_DEBUGSTR 16 2398 int 2399 linux_sysctl_debug(SYSCTL_HANDLER_ARGS) 2400 { 2401 char value[LINUX_MAX_DEBUGSTR], *p; 2402 int error, sysc, toggle; 2403 int global = 0; 2404 2405 value[0] = '\0'; 2406 error = sysctl_handle_string(oidp, value, LINUX_MAX_DEBUGSTR, req); 2407 if (error || req->newptr == NULL) 2408 return (error); 2409 for (p = value; *p != '\0' && *p != '.'; p++); 2410 if (*p == '\0') 2411 return (EINVAL); 2412 *p++ = '\0'; 2413 sysc = strtol(value, NULL, 0); 2414 toggle = strtol(p, NULL, 0); 2415 if (strcmp(value, "all") == 0) 2416 global = 1; 2417 error = linux_debug(sysc, toggle, global); 2418 return (error); 2419 } 2420 2421 #endif /* DEBUG || KTR */ 2422 2423 int 2424 linux_sched_rr_get_interval(struct thread *td, 2425 struct linux_sched_rr_get_interval_args *uap) 2426 { 2427 struct timespec ts; 2428 struct l_timespec lts; 2429 struct thread *tdt; 2430 int error; 2431 2432 /* 2433 * According to man in case the invalid pid specified 2434 * EINVAL should be returned. 2435 */ 2436 if (uap->pid < 0) 2437 return (EINVAL); 2438 2439 tdt = linux_tdfind(td, uap->pid, -1); 2440 if (tdt == NULL) 2441 return (ESRCH); 2442 2443 error = kern_sched_rr_get_interval_td(td, tdt, &ts); 2444 PROC_UNLOCK(tdt->td_proc); 2445 if (error != 0) 2446 return (error); 2447 error = native_to_linux_timespec(<s, &ts); 2448 if (error != 0) 2449 return (error); 2450 return (copyout(<s, uap->interval, sizeof(lts))); 2451 } 2452 2453 /* 2454 * In case when the Linux thread is the initial thread in 2455 * the thread group thread id is equal to the process id. 2456 * Glibc depends on this magic (assert in pthread_getattr_np.c). 2457 */ 2458 struct thread * 2459 linux_tdfind(struct thread *td, lwpid_t tid, pid_t pid) 2460 { 2461 struct linux_emuldata *em; 2462 struct thread *tdt; 2463 struct proc *p; 2464 2465 tdt = NULL; 2466 if (tid == 0 || tid == td->td_tid) { 2467 tdt = td; 2468 PROC_LOCK(tdt->td_proc); 2469 } else if (tid > PID_MAX) 2470 tdt = tdfind(tid, pid); 2471 else { 2472 /* 2473 * Initial thread where the tid equal to the pid. 2474 */ 2475 p = pfind(tid); 2476 if (p != NULL) { 2477 if (SV_PROC_ABI(p) != SV_ABI_LINUX) { 2478 /* 2479 * p is not a Linuxulator process. 2480 */ 2481 PROC_UNLOCK(p); 2482 return (NULL); 2483 } 2484 FOREACH_THREAD_IN_PROC(p, tdt) { 2485 em = em_find(tdt); 2486 if (tid == em->em_tid) 2487 return (tdt); 2488 } 2489 PROC_UNLOCK(p); 2490 } 2491 return (NULL); 2492 } 2493 2494 return (tdt); 2495 } 2496 2497 void 2498 linux_to_bsd_waitopts(int options, int *bsdopts) 2499 { 2500 2501 if (options & LINUX_WNOHANG) 2502 *bsdopts |= WNOHANG; 2503 if (options & LINUX_WUNTRACED) 2504 *bsdopts |= WUNTRACED; 2505 if (options & LINUX_WEXITED) 2506 *bsdopts |= WEXITED; 2507 if (options & LINUX_WCONTINUED) 2508 *bsdopts |= WCONTINUED; 2509 if (options & LINUX_WNOWAIT) 2510 *bsdopts |= WNOWAIT; 2511 2512 if (options & __WCLONE) 2513 *bsdopts |= WLINUXCLONE; 2514 } 2515 2516 int 2517 linux_getrandom(struct thread *td, struct linux_getrandom_args *args) 2518 { 2519 struct uio uio; 2520 struct iovec iov; 2521 int error; 2522 2523 if (args->flags & ~(LINUX_GRND_NONBLOCK|LINUX_GRND_RANDOM)) 2524 return (EINVAL); 2525 if (args->count > INT_MAX) 2526 args->count = INT_MAX; 2527 2528 iov.iov_base = args->buf; 2529 iov.iov_len = args->count; 2530 2531 uio.uio_iov = &iov; 2532 uio.uio_iovcnt = 1; 2533 uio.uio_resid = iov.iov_len; 2534 uio.uio_segflg = UIO_USERSPACE; 2535 uio.uio_rw = UIO_READ; 2536 uio.uio_td = td; 2537 2538 error = read_random_uio(&uio, args->flags & LINUX_GRND_NONBLOCK); 2539 if (error == 0) 2540 td->td_retval[0] = args->count - uio.uio_resid; 2541 return (error); 2542 } 2543 2544 int 2545 linux_mincore(struct thread *td, struct linux_mincore_args *args) 2546 { 2547 2548 /* Needs to be page-aligned */ 2549 if (args->start & PAGE_MASK) 2550 return (EINVAL); 2551 return (kern_mincore(td, args->start, args->len, args->vec)); 2552 } 2553