1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2002 Doug Rabson 5 * Copyright (c) 1994-1995 Søren Schmidt 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer 13 * in this position and unchanged. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. The name of the author may not be used to endorse or promote products 18 * derived from this software without specific prior written permission 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 23 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 29 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include "opt_compat.h" 36 37 #include <sys/param.h> 38 #include <sys/blist.h> 39 #include <sys/fcntl.h> 40 #if defined(__i386__) 41 #include <sys/imgact_aout.h> 42 #endif 43 #include <sys/jail.h> 44 #include <sys/kernel.h> 45 #include <sys/limits.h> 46 #include <sys/lock.h> 47 #include <sys/malloc.h> 48 #include <sys/mman.h> 49 #include <sys/mount.h> 50 #include <sys/msgbuf.h> 51 #include <sys/mutex.h> 52 #include <sys/namei.h> 53 #include <sys/priv.h> 54 #include <sys/proc.h> 55 #include <sys/procctl.h> 56 #include <sys/reboot.h> 57 #include <sys/racct.h> 58 #include <sys/random.h> 59 #include <sys/resourcevar.h> 60 #include <sys/sched.h> 61 #include <sys/sdt.h> 62 #include <sys/signalvar.h> 63 #include <sys/stat.h> 64 #include <sys/syscallsubr.h> 65 #include <sys/sysctl.h> 66 #include <sys/sysproto.h> 67 #include <sys/systm.h> 68 #include <sys/time.h> 69 #include <sys/vmmeter.h> 70 #include <sys/vnode.h> 71 #include <sys/wait.h> 72 #include <sys/cpuset.h> 73 #include <sys/uio.h> 74 75 #include <security/mac/mac_framework.h> 76 77 #include <vm/vm.h> 78 #include <vm/pmap.h> 79 #include <vm/vm_kern.h> 80 #include <vm/vm_map.h> 81 #include <vm/vm_extern.h> 82 #include <vm/swap_pager.h> 83 84 #ifdef COMPAT_LINUX32 85 #include <machine/../linux32/linux.h> 86 #include <machine/../linux32/linux32_proto.h> 87 #else 88 #include <machine/../linux/linux.h> 89 #include <machine/../linux/linux_proto.h> 90 #endif 91 92 #include <compat/linux/linux_dtrace.h> 93 #include <compat/linux/linux_file.h> 94 #include <compat/linux/linux_mib.h> 95 #include <compat/linux/linux_signal.h> 96 #include <compat/linux/linux_timer.h> 97 #include <compat/linux/linux_util.h> 98 #include <compat/linux/linux_sysproto.h> 99 #include <compat/linux/linux_emul.h> 100 #include <compat/linux/linux_misc.h> 101 102 /** 103 * Special DTrace provider for the linuxulator. 104 * 105 * In this file we define the provider for the entire linuxulator. All 106 * modules (= files of the linuxulator) use it. 107 * 108 * We define a different name depending on the emulated bitsize, see 109 * ../../<ARCH>/linux{,32}/linux.h, e.g.: 110 * native bitsize = linuxulator 111 * amd64, 32bit emulation = linuxulator32 112 */ 113 LIN_SDT_PROVIDER_DEFINE(LINUX_DTRACE); 114 115 int stclohz; /* Statistics clock frequency */ 116 117 static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] = { 118 RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK, 119 RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE, 120 RLIMIT_MEMLOCK, RLIMIT_AS 121 }; 122 123 struct l_sysinfo { 124 l_long uptime; /* Seconds since boot */ 125 l_ulong loads[3]; /* 1, 5, and 15 minute load averages */ 126 #define LINUX_SYSINFO_LOADS_SCALE 65536 127 l_ulong totalram; /* Total usable main memory size */ 128 l_ulong freeram; /* Available memory size */ 129 l_ulong sharedram; /* Amount of shared memory */ 130 l_ulong bufferram; /* Memory used by buffers */ 131 l_ulong totalswap; /* Total swap space size */ 132 l_ulong freeswap; /* swap space still available */ 133 l_ushort procs; /* Number of current processes */ 134 l_ushort pads; 135 l_ulong totalhigh; 136 l_ulong freehigh; 137 l_uint mem_unit; 138 char _f[20-2*sizeof(l_long)-sizeof(l_int)]; /* padding */ 139 }; 140 141 struct l_pselect6arg { 142 l_uintptr_t ss; 143 l_size_t ss_len; 144 }; 145 146 static int linux_utimensat_nsec_valid(l_long); 147 148 149 int 150 linux_sysinfo(struct thread *td, struct linux_sysinfo_args *args) 151 { 152 struct l_sysinfo sysinfo; 153 int i, j; 154 struct timespec ts; 155 156 bzero(&sysinfo, sizeof(sysinfo)); 157 getnanouptime(&ts); 158 if (ts.tv_nsec != 0) 159 ts.tv_sec++; 160 sysinfo.uptime = ts.tv_sec; 161 162 /* Use the information from the mib to get our load averages */ 163 for (i = 0; i < 3; i++) 164 sysinfo.loads[i] = averunnable.ldavg[i] * 165 LINUX_SYSINFO_LOADS_SCALE / averunnable.fscale; 166 167 sysinfo.totalram = physmem * PAGE_SIZE; 168 sysinfo.freeram = (u_long)vm_free_count() * PAGE_SIZE; 169 170 /* 171 * sharedram counts pages allocated to named, swap-backed objects such 172 * as shared memory segments and tmpfs files. There is no cheap way to 173 * compute this, so just leave the field unpopulated. Linux itself only 174 * started setting this field in the 3.x timeframe. 175 */ 176 sysinfo.sharedram = 0; 177 sysinfo.bufferram = 0; 178 179 swap_pager_status(&i, &j); 180 sysinfo.totalswap = i * PAGE_SIZE; 181 sysinfo.freeswap = (i - j) * PAGE_SIZE; 182 183 sysinfo.procs = nprocs; 184 185 /* 186 * Platforms supported by the emulation layer do not have a notion of 187 * high memory. 188 */ 189 sysinfo.totalhigh = 0; 190 sysinfo.freehigh = 0; 191 192 sysinfo.mem_unit = 1; 193 194 return (copyout(&sysinfo, args->info, sizeof(sysinfo))); 195 } 196 197 #ifdef LINUX_LEGACY_SYSCALLS 198 int 199 linux_alarm(struct thread *td, struct linux_alarm_args *args) 200 { 201 struct itimerval it, old_it; 202 u_int secs; 203 int error; 204 205 secs = args->secs; 206 /* 207 * Linux alarm() is always successful. Limit secs to INT32_MAX / 2 208 * to match kern_setitimer()'s limit to avoid error from it. 209 * 210 * XXX. Linux limit secs to INT_MAX on 32 and does not limit on 64-bit 211 * platforms. 212 */ 213 if (secs > INT32_MAX / 2) 214 secs = INT32_MAX / 2; 215 216 it.it_value.tv_sec = secs; 217 it.it_value.tv_usec = 0; 218 timevalclear(&it.it_interval); 219 error = kern_setitimer(td, ITIMER_REAL, &it, &old_it); 220 KASSERT(error == 0, ("kern_setitimer returns %d", error)); 221 222 if ((old_it.it_value.tv_sec == 0 && old_it.it_value.tv_usec > 0) || 223 old_it.it_value.tv_usec >= 500000) 224 old_it.it_value.tv_sec++; 225 td->td_retval[0] = old_it.it_value.tv_sec; 226 return (0); 227 } 228 #endif 229 230 int 231 linux_brk(struct thread *td, struct linux_brk_args *args) 232 { 233 struct vmspace *vm = td->td_proc->p_vmspace; 234 uintptr_t new, old; 235 236 old = (uintptr_t)vm->vm_daddr + ctob(vm->vm_dsize); 237 new = (uintptr_t)args->dsend; 238 if ((caddr_t)new > vm->vm_daddr && !kern_break(td, &new)) 239 td->td_retval[0] = (register_t)new; 240 else 241 td->td_retval[0] = (register_t)old; 242 243 return (0); 244 } 245 246 #if defined(__i386__) 247 /* XXX: what about amd64/linux32? */ 248 249 int 250 linux_uselib(struct thread *td, struct linux_uselib_args *args) 251 { 252 struct nameidata ni; 253 struct vnode *vp; 254 struct exec *a_out; 255 vm_map_t map; 256 vm_map_entry_t entry; 257 struct vattr attr; 258 vm_offset_t vmaddr; 259 unsigned long file_offset; 260 unsigned long bss_size; 261 char *library; 262 ssize_t aresid; 263 int error; 264 bool locked, opened, textset; 265 266 LCONVPATHEXIST(td, args->library, &library); 267 268 a_out = NULL; 269 vp = NULL; 270 locked = false; 271 textset = false; 272 opened = false; 273 274 NDINIT(&ni, LOOKUP, ISOPEN | FOLLOW | LOCKLEAF | AUDITVNODE1, 275 UIO_SYSSPACE, library, td); 276 error = namei(&ni); 277 LFREEPATH(library); 278 if (error) 279 goto cleanup; 280 281 vp = ni.ni_vp; 282 NDFREE(&ni, NDF_ONLY_PNBUF); 283 284 /* 285 * From here on down, we have a locked vnode that must be unlocked. 286 * XXX: The code below largely duplicates exec_check_permissions(). 287 */ 288 locked = true; 289 290 /* Executable? */ 291 error = VOP_GETATTR(vp, &attr, td->td_ucred); 292 if (error) 293 goto cleanup; 294 295 if ((vp->v_mount->mnt_flag & MNT_NOEXEC) || 296 ((attr.va_mode & 0111) == 0) || (attr.va_type != VREG)) { 297 /* EACCESS is what exec(2) returns. */ 298 error = ENOEXEC; 299 goto cleanup; 300 } 301 302 /* Sensible size? */ 303 if (attr.va_size == 0) { 304 error = ENOEXEC; 305 goto cleanup; 306 } 307 308 /* Can we access it? */ 309 error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td); 310 if (error) 311 goto cleanup; 312 313 /* 314 * XXX: This should use vn_open() so that it is properly authorized, 315 * and to reduce code redundancy all over the place here. 316 * XXX: Not really, it duplicates far more of exec_check_permissions() 317 * than vn_open(). 318 */ 319 #ifdef MAC 320 error = mac_vnode_check_open(td->td_ucred, vp, VREAD); 321 if (error) 322 goto cleanup; 323 #endif 324 error = VOP_OPEN(vp, FREAD, td->td_ucred, td, NULL); 325 if (error) 326 goto cleanup; 327 opened = true; 328 329 /* Pull in executable header into exec_map */ 330 error = vm_mmap(exec_map, (vm_offset_t *)&a_out, PAGE_SIZE, 331 VM_PROT_READ, VM_PROT_READ, 0, OBJT_VNODE, vp, 0); 332 if (error) 333 goto cleanup; 334 335 /* Is it a Linux binary ? */ 336 if (((a_out->a_magic >> 16) & 0xff) != 0x64) { 337 error = ENOEXEC; 338 goto cleanup; 339 } 340 341 /* 342 * While we are here, we should REALLY do some more checks 343 */ 344 345 /* Set file/virtual offset based on a.out variant. */ 346 switch ((int)(a_out->a_magic & 0xffff)) { 347 case 0413: /* ZMAGIC */ 348 file_offset = 1024; 349 break; 350 case 0314: /* QMAGIC */ 351 file_offset = 0; 352 break; 353 default: 354 error = ENOEXEC; 355 goto cleanup; 356 } 357 358 bss_size = round_page(a_out->a_bss); 359 360 /* Check various fields in header for validity/bounds. */ 361 if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) { 362 error = ENOEXEC; 363 goto cleanup; 364 } 365 366 /* text + data can't exceed file size */ 367 if (a_out->a_data + a_out->a_text > attr.va_size) { 368 error = EFAULT; 369 goto cleanup; 370 } 371 372 /* 373 * text/data/bss must not exceed limits 374 * XXX - this is not complete. it should check current usage PLUS 375 * the resources needed by this library. 376 */ 377 PROC_LOCK(td->td_proc); 378 if (a_out->a_text > maxtsiz || 379 a_out->a_data + bss_size > lim_cur_proc(td->td_proc, RLIMIT_DATA) || 380 racct_set(td->td_proc, RACCT_DATA, a_out->a_data + 381 bss_size) != 0) { 382 PROC_UNLOCK(td->td_proc); 383 error = ENOMEM; 384 goto cleanup; 385 } 386 PROC_UNLOCK(td->td_proc); 387 388 /* 389 * Prevent more writers. 390 */ 391 error = VOP_SET_TEXT(vp); 392 if (error != 0) 393 goto cleanup; 394 textset = true; 395 396 /* 397 * Lock no longer needed 398 */ 399 locked = false; 400 VOP_UNLOCK(vp); 401 402 /* 403 * Check if file_offset page aligned. Currently we cannot handle 404 * misalinged file offsets, and so we read in the entire image 405 * (what a waste). 406 */ 407 if (file_offset & PAGE_MASK) { 408 /* Map text+data read/write/execute */ 409 410 /* a_entry is the load address and is page aligned */ 411 vmaddr = trunc_page(a_out->a_entry); 412 413 /* get anon user mapping, read+write+execute */ 414 error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0, 415 &vmaddr, a_out->a_text + a_out->a_data, 0, VMFS_NO_SPACE, 416 VM_PROT_ALL, VM_PROT_ALL, 0); 417 if (error) 418 goto cleanup; 419 420 error = vn_rdwr(UIO_READ, vp, (void *)vmaddr, file_offset, 421 a_out->a_text + a_out->a_data, UIO_USERSPACE, 0, 422 td->td_ucred, NOCRED, &aresid, td); 423 if (error != 0) 424 goto cleanup; 425 if (aresid != 0) { 426 error = ENOEXEC; 427 goto cleanup; 428 } 429 } else { 430 /* 431 * for QMAGIC, a_entry is 20 bytes beyond the load address 432 * to skip the executable header 433 */ 434 vmaddr = trunc_page(a_out->a_entry); 435 436 /* 437 * Map it all into the process's space as a single 438 * copy-on-write "data" segment. 439 */ 440 map = &td->td_proc->p_vmspace->vm_map; 441 error = vm_mmap(map, &vmaddr, 442 a_out->a_text + a_out->a_data, VM_PROT_ALL, VM_PROT_ALL, 443 MAP_PRIVATE | MAP_FIXED, OBJT_VNODE, vp, file_offset); 444 if (error) 445 goto cleanup; 446 vm_map_lock(map); 447 if (!vm_map_lookup_entry(map, vmaddr, &entry)) { 448 vm_map_unlock(map); 449 error = EDOOFUS; 450 goto cleanup; 451 } 452 entry->eflags |= MAP_ENTRY_VN_EXEC; 453 vm_map_unlock(map); 454 textset = false; 455 } 456 457 if (bss_size != 0) { 458 /* Calculate BSS start address */ 459 vmaddr = trunc_page(a_out->a_entry) + a_out->a_text + 460 a_out->a_data; 461 462 /* allocate some 'anon' space */ 463 error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0, 464 &vmaddr, bss_size, 0, VMFS_NO_SPACE, VM_PROT_ALL, 465 VM_PROT_ALL, 0); 466 if (error) 467 goto cleanup; 468 } 469 470 cleanup: 471 if (opened) { 472 if (locked) 473 VOP_UNLOCK(vp); 474 locked = false; 475 VOP_CLOSE(vp, FREAD, td->td_ucred, td); 476 } 477 if (textset) { 478 if (!locked) { 479 locked = true; 480 VOP_LOCK(vp, LK_SHARED | LK_RETRY); 481 } 482 VOP_UNSET_TEXT_CHECKED(vp); 483 } 484 if (locked) 485 VOP_UNLOCK(vp); 486 487 /* Release the temporary mapping. */ 488 if (a_out) 489 kmap_free_wakeup(exec_map, (vm_offset_t)a_out, PAGE_SIZE); 490 491 return (error); 492 } 493 494 #endif /* __i386__ */ 495 496 #ifdef LINUX_LEGACY_SYSCALLS 497 int 498 linux_select(struct thread *td, struct linux_select_args *args) 499 { 500 l_timeval ltv; 501 struct timeval tv0, tv1, utv, *tvp; 502 int error; 503 504 /* 505 * Store current time for computation of the amount of 506 * time left. 507 */ 508 if (args->timeout) { 509 if ((error = copyin(args->timeout, <v, sizeof(ltv)))) 510 goto select_out; 511 utv.tv_sec = ltv.tv_sec; 512 utv.tv_usec = ltv.tv_usec; 513 514 if (itimerfix(&utv)) { 515 /* 516 * The timeval was invalid. Convert it to something 517 * valid that will act as it does under Linux. 518 */ 519 utv.tv_sec += utv.tv_usec / 1000000; 520 utv.tv_usec %= 1000000; 521 if (utv.tv_usec < 0) { 522 utv.tv_sec -= 1; 523 utv.tv_usec += 1000000; 524 } 525 if (utv.tv_sec < 0) 526 timevalclear(&utv); 527 } 528 microtime(&tv0); 529 tvp = &utv; 530 } else 531 tvp = NULL; 532 533 error = kern_select(td, args->nfds, args->readfds, args->writefds, 534 args->exceptfds, tvp, LINUX_NFDBITS); 535 if (error) 536 goto select_out; 537 538 if (args->timeout) { 539 if (td->td_retval[0]) { 540 /* 541 * Compute how much time was left of the timeout, 542 * by subtracting the current time and the time 543 * before we started the call, and subtracting 544 * that result from the user-supplied value. 545 */ 546 microtime(&tv1); 547 timevalsub(&tv1, &tv0); 548 timevalsub(&utv, &tv1); 549 if (utv.tv_sec < 0) 550 timevalclear(&utv); 551 } else 552 timevalclear(&utv); 553 ltv.tv_sec = utv.tv_sec; 554 ltv.tv_usec = utv.tv_usec; 555 if ((error = copyout(<v, args->timeout, sizeof(ltv)))) 556 goto select_out; 557 } 558 559 select_out: 560 return (error); 561 } 562 #endif 563 564 int 565 linux_mremap(struct thread *td, struct linux_mremap_args *args) 566 { 567 uintptr_t addr; 568 size_t len; 569 int error = 0; 570 571 if (args->flags & ~(LINUX_MREMAP_FIXED | LINUX_MREMAP_MAYMOVE)) { 572 td->td_retval[0] = 0; 573 return (EINVAL); 574 } 575 576 /* 577 * Check for the page alignment. 578 * Linux defines PAGE_MASK to be FreeBSD ~PAGE_MASK. 579 */ 580 if (args->addr & PAGE_MASK) { 581 td->td_retval[0] = 0; 582 return (EINVAL); 583 } 584 585 args->new_len = round_page(args->new_len); 586 args->old_len = round_page(args->old_len); 587 588 if (args->new_len > args->old_len) { 589 td->td_retval[0] = 0; 590 return (ENOMEM); 591 } 592 593 if (args->new_len < args->old_len) { 594 addr = args->addr + args->new_len; 595 len = args->old_len - args->new_len; 596 error = kern_munmap(td, addr, len); 597 } 598 599 td->td_retval[0] = error ? 0 : (uintptr_t)args->addr; 600 return (error); 601 } 602 603 #define LINUX_MS_ASYNC 0x0001 604 #define LINUX_MS_INVALIDATE 0x0002 605 #define LINUX_MS_SYNC 0x0004 606 607 int 608 linux_msync(struct thread *td, struct linux_msync_args *args) 609 { 610 611 return (kern_msync(td, args->addr, args->len, 612 args->fl & ~LINUX_MS_SYNC)); 613 } 614 615 #ifdef LINUX_LEGACY_SYSCALLS 616 int 617 linux_time(struct thread *td, struct linux_time_args *args) 618 { 619 struct timeval tv; 620 l_time_t tm; 621 int error; 622 623 microtime(&tv); 624 tm = tv.tv_sec; 625 if (args->tm && (error = copyout(&tm, args->tm, sizeof(tm)))) 626 return (error); 627 td->td_retval[0] = tm; 628 return (0); 629 } 630 #endif 631 632 struct l_times_argv { 633 l_clock_t tms_utime; 634 l_clock_t tms_stime; 635 l_clock_t tms_cutime; 636 l_clock_t tms_cstime; 637 }; 638 639 640 /* 641 * Glibc versions prior to 2.2.1 always use hard-coded CLK_TCK value. 642 * Since 2.2.1 Glibc uses value exported from kernel via AT_CLKTCK 643 * auxiliary vector entry. 644 */ 645 #define CLK_TCK 100 646 647 #define CONVOTCK(r) (r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK)) 648 #define CONVNTCK(r) (r.tv_sec * stclohz + r.tv_usec / (1000000 / stclohz)) 649 650 #define CONVTCK(r) (linux_kernver(td) >= LINUX_KERNVER_2004000 ? \ 651 CONVNTCK(r) : CONVOTCK(r)) 652 653 int 654 linux_times(struct thread *td, struct linux_times_args *args) 655 { 656 struct timeval tv, utime, stime, cutime, cstime; 657 struct l_times_argv tms; 658 struct proc *p; 659 int error; 660 661 if (args->buf != NULL) { 662 p = td->td_proc; 663 PROC_LOCK(p); 664 PROC_STATLOCK(p); 665 calcru(p, &utime, &stime); 666 PROC_STATUNLOCK(p); 667 calccru(p, &cutime, &cstime); 668 PROC_UNLOCK(p); 669 670 tms.tms_utime = CONVTCK(utime); 671 tms.tms_stime = CONVTCK(stime); 672 673 tms.tms_cutime = CONVTCK(cutime); 674 tms.tms_cstime = CONVTCK(cstime); 675 676 if ((error = copyout(&tms, args->buf, sizeof(tms)))) 677 return (error); 678 } 679 680 microuptime(&tv); 681 td->td_retval[0] = (int)CONVTCK(tv); 682 return (0); 683 } 684 685 int 686 linux_newuname(struct thread *td, struct linux_newuname_args *args) 687 { 688 struct l_new_utsname utsname; 689 char osname[LINUX_MAX_UTSNAME]; 690 char osrelease[LINUX_MAX_UTSNAME]; 691 char *p; 692 693 linux_get_osname(td, osname); 694 linux_get_osrelease(td, osrelease); 695 696 bzero(&utsname, sizeof(utsname)); 697 strlcpy(utsname.sysname, osname, LINUX_MAX_UTSNAME); 698 getcredhostname(td->td_ucred, utsname.nodename, LINUX_MAX_UTSNAME); 699 getcreddomainname(td->td_ucred, utsname.domainname, LINUX_MAX_UTSNAME); 700 strlcpy(utsname.release, osrelease, LINUX_MAX_UTSNAME); 701 strlcpy(utsname.version, version, LINUX_MAX_UTSNAME); 702 for (p = utsname.version; *p != '\0'; ++p) 703 if (*p == '\n') { 704 *p = '\0'; 705 break; 706 } 707 strlcpy(utsname.machine, linux_kplatform, LINUX_MAX_UTSNAME); 708 709 return (copyout(&utsname, args->buf, sizeof(utsname))); 710 } 711 712 struct l_utimbuf { 713 l_time_t l_actime; 714 l_time_t l_modtime; 715 }; 716 717 #ifdef LINUX_LEGACY_SYSCALLS 718 int 719 linux_utime(struct thread *td, struct linux_utime_args *args) 720 { 721 struct timeval tv[2], *tvp; 722 struct l_utimbuf lut; 723 char *fname; 724 int error; 725 726 LCONVPATHEXIST(td, args->fname, &fname); 727 728 if (args->times) { 729 if ((error = copyin(args->times, &lut, sizeof lut))) { 730 LFREEPATH(fname); 731 return (error); 732 } 733 tv[0].tv_sec = lut.l_actime; 734 tv[0].tv_usec = 0; 735 tv[1].tv_sec = lut.l_modtime; 736 tv[1].tv_usec = 0; 737 tvp = tv; 738 } else 739 tvp = NULL; 740 741 error = kern_utimesat(td, AT_FDCWD, fname, UIO_SYSSPACE, tvp, 742 UIO_SYSSPACE); 743 LFREEPATH(fname); 744 return (error); 745 } 746 #endif 747 748 #ifdef LINUX_LEGACY_SYSCALLS 749 int 750 linux_utimes(struct thread *td, struct linux_utimes_args *args) 751 { 752 l_timeval ltv[2]; 753 struct timeval tv[2], *tvp = NULL; 754 char *fname; 755 int error; 756 757 LCONVPATHEXIST(td, args->fname, &fname); 758 759 if (args->tptr != NULL) { 760 if ((error = copyin(args->tptr, ltv, sizeof ltv))) { 761 LFREEPATH(fname); 762 return (error); 763 } 764 tv[0].tv_sec = ltv[0].tv_sec; 765 tv[0].tv_usec = ltv[0].tv_usec; 766 tv[1].tv_sec = ltv[1].tv_sec; 767 tv[1].tv_usec = ltv[1].tv_usec; 768 tvp = tv; 769 } 770 771 error = kern_utimesat(td, AT_FDCWD, fname, UIO_SYSSPACE, 772 tvp, UIO_SYSSPACE); 773 LFREEPATH(fname); 774 return (error); 775 } 776 #endif 777 778 static int 779 linux_utimensat_nsec_valid(l_long nsec) 780 { 781 782 if (nsec == LINUX_UTIME_OMIT || nsec == LINUX_UTIME_NOW) 783 return (0); 784 if (nsec >= 0 && nsec <= 999999999) 785 return (0); 786 return (1); 787 } 788 789 int 790 linux_utimensat(struct thread *td, struct linux_utimensat_args *args) 791 { 792 struct l_timespec l_times[2]; 793 struct timespec times[2], *timesp = NULL; 794 char *path = NULL; 795 int error, dfd, flags = 0; 796 797 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; 798 799 if (args->flags & ~LINUX_AT_SYMLINK_NOFOLLOW) 800 return (EINVAL); 801 802 if (args->times != NULL) { 803 error = copyin(args->times, l_times, sizeof(l_times)); 804 if (error != 0) 805 return (error); 806 807 if (linux_utimensat_nsec_valid(l_times[0].tv_nsec) != 0 || 808 linux_utimensat_nsec_valid(l_times[1].tv_nsec) != 0) 809 return (EINVAL); 810 811 times[0].tv_sec = l_times[0].tv_sec; 812 switch (l_times[0].tv_nsec) 813 { 814 case LINUX_UTIME_OMIT: 815 times[0].tv_nsec = UTIME_OMIT; 816 break; 817 case LINUX_UTIME_NOW: 818 times[0].tv_nsec = UTIME_NOW; 819 break; 820 default: 821 times[0].tv_nsec = l_times[0].tv_nsec; 822 } 823 824 times[1].tv_sec = l_times[1].tv_sec; 825 switch (l_times[1].tv_nsec) 826 { 827 case LINUX_UTIME_OMIT: 828 times[1].tv_nsec = UTIME_OMIT; 829 break; 830 case LINUX_UTIME_NOW: 831 times[1].tv_nsec = UTIME_NOW; 832 break; 833 default: 834 times[1].tv_nsec = l_times[1].tv_nsec; 835 break; 836 } 837 timesp = times; 838 839 /* This breaks POSIX, but is what the Linux kernel does 840 * _on purpose_ (documented in the man page for utimensat(2)), 841 * so we must follow that behaviour. */ 842 if (times[0].tv_nsec == UTIME_OMIT && 843 times[1].tv_nsec == UTIME_OMIT) 844 return (0); 845 } 846 847 if (args->pathname != NULL) 848 LCONVPATHEXIST_AT(td, args->pathname, &path, dfd); 849 else if (args->flags != 0) 850 return (EINVAL); 851 852 if (args->flags & LINUX_AT_SYMLINK_NOFOLLOW) 853 flags |= AT_SYMLINK_NOFOLLOW; 854 855 if (path == NULL) 856 error = kern_futimens(td, dfd, timesp, UIO_SYSSPACE); 857 else { 858 error = kern_utimensat(td, dfd, path, UIO_SYSSPACE, timesp, 859 UIO_SYSSPACE, flags); 860 LFREEPATH(path); 861 } 862 863 return (error); 864 } 865 866 #ifdef LINUX_LEGACY_SYSCALLS 867 int 868 linux_futimesat(struct thread *td, struct linux_futimesat_args *args) 869 { 870 l_timeval ltv[2]; 871 struct timeval tv[2], *tvp = NULL; 872 char *fname; 873 int error, dfd; 874 875 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; 876 LCONVPATHEXIST_AT(td, args->filename, &fname, dfd); 877 878 if (args->utimes != NULL) { 879 if ((error = copyin(args->utimes, ltv, sizeof ltv))) { 880 LFREEPATH(fname); 881 return (error); 882 } 883 tv[0].tv_sec = ltv[0].tv_sec; 884 tv[0].tv_usec = ltv[0].tv_usec; 885 tv[1].tv_sec = ltv[1].tv_sec; 886 tv[1].tv_usec = ltv[1].tv_usec; 887 tvp = tv; 888 } 889 890 error = kern_utimesat(td, dfd, fname, UIO_SYSSPACE, tvp, UIO_SYSSPACE); 891 LFREEPATH(fname); 892 return (error); 893 } 894 #endif 895 896 static int 897 linux_common_wait(struct thread *td, int pid, int *statusp, 898 int options, struct __wrusage *wrup) 899 { 900 siginfo_t siginfo; 901 idtype_t idtype; 902 id_t id; 903 int error, status, tmpstat; 904 905 if (pid == WAIT_ANY) { 906 idtype = P_ALL; 907 id = 0; 908 } else if (pid < 0) { 909 idtype = P_PGID; 910 id = (id_t)-pid; 911 } else { 912 idtype = P_PID; 913 id = (id_t)pid; 914 } 915 916 /* 917 * For backward compatibility we implicitly add flags WEXITED 918 * and WTRAPPED here. 919 */ 920 options |= WEXITED | WTRAPPED; 921 error = kern_wait6(td, idtype, id, &status, options, wrup, &siginfo); 922 if (error) 923 return (error); 924 925 if (statusp) { 926 tmpstat = status & 0xffff; 927 if (WIFSIGNALED(tmpstat)) { 928 tmpstat = (tmpstat & 0xffffff80) | 929 bsd_to_linux_signal(WTERMSIG(tmpstat)); 930 } else if (WIFSTOPPED(tmpstat)) { 931 tmpstat = (tmpstat & 0xffff00ff) | 932 (bsd_to_linux_signal(WSTOPSIG(tmpstat)) << 8); 933 #if defined(__amd64__) && !defined(COMPAT_LINUX32) 934 if (WSTOPSIG(status) == SIGTRAP) { 935 tmpstat = linux_ptrace_status(td, 936 siginfo.si_pid, tmpstat); 937 } 938 #endif 939 } else if (WIFCONTINUED(tmpstat)) { 940 tmpstat = 0xffff; 941 } 942 error = copyout(&tmpstat, statusp, sizeof(int)); 943 } 944 945 return (error); 946 } 947 948 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 949 int 950 linux_waitpid(struct thread *td, struct linux_waitpid_args *args) 951 { 952 struct linux_wait4_args wait4_args; 953 954 wait4_args.pid = args->pid; 955 wait4_args.status = args->status; 956 wait4_args.options = args->options; 957 wait4_args.rusage = NULL; 958 959 return (linux_wait4(td, &wait4_args)); 960 } 961 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 962 963 int 964 linux_wait4(struct thread *td, struct linux_wait4_args *args) 965 { 966 int error, options; 967 struct __wrusage wru, *wrup; 968 969 if (args->options & ~(LINUX_WUNTRACED | LINUX_WNOHANG | 970 LINUX_WCONTINUED | __WCLONE | __WNOTHREAD | __WALL)) 971 return (EINVAL); 972 973 options = WEXITED; 974 linux_to_bsd_waitopts(args->options, &options); 975 976 if (args->rusage != NULL) 977 wrup = &wru; 978 else 979 wrup = NULL; 980 error = linux_common_wait(td, args->pid, args->status, options, wrup); 981 if (error != 0) 982 return (error); 983 if (args->rusage != NULL) 984 error = linux_copyout_rusage(&wru.wru_self, args->rusage); 985 return (error); 986 } 987 988 int 989 linux_waitid(struct thread *td, struct linux_waitid_args *args) 990 { 991 int status, options, sig; 992 struct __wrusage wru; 993 siginfo_t siginfo; 994 l_siginfo_t lsi; 995 idtype_t idtype; 996 struct proc *p; 997 int error; 998 999 options = 0; 1000 linux_to_bsd_waitopts(args->options, &options); 1001 1002 if (options & ~(WNOHANG | WNOWAIT | WEXITED | WUNTRACED | WCONTINUED)) 1003 return (EINVAL); 1004 if (!(options & (WEXITED | WUNTRACED | WCONTINUED))) 1005 return (EINVAL); 1006 1007 switch (args->idtype) { 1008 case LINUX_P_ALL: 1009 idtype = P_ALL; 1010 break; 1011 case LINUX_P_PID: 1012 if (args->id <= 0) 1013 return (EINVAL); 1014 idtype = P_PID; 1015 break; 1016 case LINUX_P_PGID: 1017 if (args->id <= 0) 1018 return (EINVAL); 1019 idtype = P_PGID; 1020 break; 1021 default: 1022 return (EINVAL); 1023 } 1024 1025 error = kern_wait6(td, idtype, args->id, &status, options, 1026 &wru, &siginfo); 1027 if (error != 0) 1028 return (error); 1029 if (args->rusage != NULL) { 1030 error = linux_copyout_rusage(&wru.wru_children, 1031 args->rusage); 1032 if (error != 0) 1033 return (error); 1034 } 1035 if (args->info != NULL) { 1036 p = td->td_proc; 1037 bzero(&lsi, sizeof(lsi)); 1038 if (td->td_retval[0] != 0) { 1039 sig = bsd_to_linux_signal(siginfo.si_signo); 1040 siginfo_to_lsiginfo(&siginfo, &lsi, sig); 1041 } 1042 error = copyout(&lsi, args->info, sizeof(lsi)); 1043 } 1044 td->td_retval[0] = 0; 1045 1046 return (error); 1047 } 1048 1049 #ifdef LINUX_LEGACY_SYSCALLS 1050 int 1051 linux_mknod(struct thread *td, struct linux_mknod_args *args) 1052 { 1053 char *path; 1054 int error; 1055 1056 LCONVPATHCREAT(td, args->path, &path); 1057 1058 switch (args->mode & S_IFMT) { 1059 case S_IFIFO: 1060 case S_IFSOCK: 1061 error = kern_mkfifoat(td, AT_FDCWD, path, UIO_SYSSPACE, 1062 args->mode); 1063 break; 1064 1065 case S_IFCHR: 1066 case S_IFBLK: 1067 error = kern_mknodat(td, AT_FDCWD, path, UIO_SYSSPACE, 1068 args->mode, args->dev); 1069 break; 1070 1071 case S_IFDIR: 1072 error = EPERM; 1073 break; 1074 1075 case 0: 1076 args->mode |= S_IFREG; 1077 /* FALLTHROUGH */ 1078 case S_IFREG: 1079 error = kern_openat(td, AT_FDCWD, path, UIO_SYSSPACE, 1080 O_WRONLY | O_CREAT | O_TRUNC, args->mode); 1081 if (error == 0) 1082 kern_close(td, td->td_retval[0]); 1083 break; 1084 1085 default: 1086 error = EINVAL; 1087 break; 1088 } 1089 LFREEPATH(path); 1090 return (error); 1091 } 1092 #endif 1093 1094 int 1095 linux_mknodat(struct thread *td, struct linux_mknodat_args *args) 1096 { 1097 char *path; 1098 int error, dfd; 1099 1100 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; 1101 LCONVPATHCREAT_AT(td, args->filename, &path, dfd); 1102 1103 switch (args->mode & S_IFMT) { 1104 case S_IFIFO: 1105 case S_IFSOCK: 1106 error = kern_mkfifoat(td, dfd, path, UIO_SYSSPACE, args->mode); 1107 break; 1108 1109 case S_IFCHR: 1110 case S_IFBLK: 1111 error = kern_mknodat(td, dfd, path, UIO_SYSSPACE, args->mode, 1112 args->dev); 1113 break; 1114 1115 case S_IFDIR: 1116 error = EPERM; 1117 break; 1118 1119 case 0: 1120 args->mode |= S_IFREG; 1121 /* FALLTHROUGH */ 1122 case S_IFREG: 1123 error = kern_openat(td, dfd, path, UIO_SYSSPACE, 1124 O_WRONLY | O_CREAT | O_TRUNC, args->mode); 1125 if (error == 0) 1126 kern_close(td, td->td_retval[0]); 1127 break; 1128 1129 default: 1130 error = EINVAL; 1131 break; 1132 } 1133 LFREEPATH(path); 1134 return (error); 1135 } 1136 1137 /* 1138 * UGH! This is just about the dumbest idea I've ever heard!! 1139 */ 1140 int 1141 linux_personality(struct thread *td, struct linux_personality_args *args) 1142 { 1143 struct linux_pemuldata *pem; 1144 struct proc *p = td->td_proc; 1145 uint32_t old; 1146 1147 PROC_LOCK(p); 1148 pem = pem_find(p); 1149 old = pem->persona; 1150 if (args->per != 0xffffffff) 1151 pem->persona = args->per; 1152 PROC_UNLOCK(p); 1153 1154 td->td_retval[0] = old; 1155 return (0); 1156 } 1157 1158 struct l_itimerval { 1159 l_timeval it_interval; 1160 l_timeval it_value; 1161 }; 1162 1163 #define B2L_ITIMERVAL(bip, lip) \ 1164 (bip)->it_interval.tv_sec = (lip)->it_interval.tv_sec; \ 1165 (bip)->it_interval.tv_usec = (lip)->it_interval.tv_usec; \ 1166 (bip)->it_value.tv_sec = (lip)->it_value.tv_sec; \ 1167 (bip)->it_value.tv_usec = (lip)->it_value.tv_usec; 1168 1169 int 1170 linux_setitimer(struct thread *td, struct linux_setitimer_args *uap) 1171 { 1172 int error; 1173 struct l_itimerval ls; 1174 struct itimerval aitv, oitv; 1175 1176 if (uap->itv == NULL) { 1177 uap->itv = uap->oitv; 1178 return (linux_getitimer(td, (struct linux_getitimer_args *)uap)); 1179 } 1180 1181 error = copyin(uap->itv, &ls, sizeof(ls)); 1182 if (error != 0) 1183 return (error); 1184 B2L_ITIMERVAL(&aitv, &ls); 1185 error = kern_setitimer(td, uap->which, &aitv, &oitv); 1186 if (error != 0 || uap->oitv == NULL) 1187 return (error); 1188 B2L_ITIMERVAL(&ls, &oitv); 1189 1190 return (copyout(&ls, uap->oitv, sizeof(ls))); 1191 } 1192 1193 int 1194 linux_getitimer(struct thread *td, struct linux_getitimer_args *uap) 1195 { 1196 int error; 1197 struct l_itimerval ls; 1198 struct itimerval aitv; 1199 1200 error = kern_getitimer(td, uap->which, &aitv); 1201 if (error != 0) 1202 return (error); 1203 B2L_ITIMERVAL(&ls, &aitv); 1204 return (copyout(&ls, uap->itv, sizeof(ls))); 1205 } 1206 1207 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 1208 int 1209 linux_nice(struct thread *td, struct linux_nice_args *args) 1210 { 1211 1212 return (kern_setpriority(td, PRIO_PROCESS, 0, args->inc)); 1213 } 1214 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 1215 1216 int 1217 linux_setgroups(struct thread *td, struct linux_setgroups_args *args) 1218 { 1219 struct ucred *newcred, *oldcred; 1220 l_gid_t *linux_gidset; 1221 gid_t *bsd_gidset; 1222 int ngrp, error; 1223 struct proc *p; 1224 1225 ngrp = args->gidsetsize; 1226 if (ngrp < 0 || ngrp >= ngroups_max + 1) 1227 return (EINVAL); 1228 linux_gidset = malloc(ngrp * sizeof(*linux_gidset), M_LINUX, M_WAITOK); 1229 error = copyin(args->grouplist, linux_gidset, ngrp * sizeof(l_gid_t)); 1230 if (error) 1231 goto out; 1232 newcred = crget(); 1233 crextend(newcred, ngrp + 1); 1234 p = td->td_proc; 1235 PROC_LOCK(p); 1236 oldcred = p->p_ucred; 1237 crcopy(newcred, oldcred); 1238 1239 /* 1240 * cr_groups[0] holds egid. Setting the whole set from 1241 * the supplied set will cause egid to be changed too. 1242 * Keep cr_groups[0] unchanged to prevent that. 1243 */ 1244 1245 if ((error = priv_check_cred(oldcred, PRIV_CRED_SETGROUPS)) != 0) { 1246 PROC_UNLOCK(p); 1247 crfree(newcred); 1248 goto out; 1249 } 1250 1251 if (ngrp > 0) { 1252 newcred->cr_ngroups = ngrp + 1; 1253 1254 bsd_gidset = newcred->cr_groups; 1255 ngrp--; 1256 while (ngrp >= 0) { 1257 bsd_gidset[ngrp + 1] = linux_gidset[ngrp]; 1258 ngrp--; 1259 } 1260 } else 1261 newcred->cr_ngroups = 1; 1262 1263 setsugid(p); 1264 proc_set_cred(p, newcred); 1265 PROC_UNLOCK(p); 1266 crfree(oldcred); 1267 error = 0; 1268 out: 1269 free(linux_gidset, M_LINUX); 1270 return (error); 1271 } 1272 1273 int 1274 linux_getgroups(struct thread *td, struct linux_getgroups_args *args) 1275 { 1276 struct ucred *cred; 1277 l_gid_t *linux_gidset; 1278 gid_t *bsd_gidset; 1279 int bsd_gidsetsz, ngrp, error; 1280 1281 cred = td->td_ucred; 1282 bsd_gidset = cred->cr_groups; 1283 bsd_gidsetsz = cred->cr_ngroups - 1; 1284 1285 /* 1286 * cr_groups[0] holds egid. Returning the whole set 1287 * here will cause a duplicate. Exclude cr_groups[0] 1288 * to prevent that. 1289 */ 1290 1291 if ((ngrp = args->gidsetsize) == 0) { 1292 td->td_retval[0] = bsd_gidsetsz; 1293 return (0); 1294 } 1295 1296 if (ngrp < bsd_gidsetsz) 1297 return (EINVAL); 1298 1299 ngrp = 0; 1300 linux_gidset = malloc(bsd_gidsetsz * sizeof(*linux_gidset), 1301 M_LINUX, M_WAITOK); 1302 while (ngrp < bsd_gidsetsz) { 1303 linux_gidset[ngrp] = bsd_gidset[ngrp + 1]; 1304 ngrp++; 1305 } 1306 1307 error = copyout(linux_gidset, args->grouplist, ngrp * sizeof(l_gid_t)); 1308 free(linux_gidset, M_LINUX); 1309 if (error) 1310 return (error); 1311 1312 td->td_retval[0] = ngrp; 1313 return (0); 1314 } 1315 1316 int 1317 linux_setrlimit(struct thread *td, struct linux_setrlimit_args *args) 1318 { 1319 struct rlimit bsd_rlim; 1320 struct l_rlimit rlim; 1321 u_int which; 1322 int error; 1323 1324 if (args->resource >= LINUX_RLIM_NLIMITS) 1325 return (EINVAL); 1326 1327 which = linux_to_bsd_resource[args->resource]; 1328 if (which == -1) 1329 return (EINVAL); 1330 1331 error = copyin(args->rlim, &rlim, sizeof(rlim)); 1332 if (error) 1333 return (error); 1334 1335 bsd_rlim.rlim_cur = (rlim_t)rlim.rlim_cur; 1336 bsd_rlim.rlim_max = (rlim_t)rlim.rlim_max; 1337 return (kern_setrlimit(td, which, &bsd_rlim)); 1338 } 1339 1340 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 1341 int 1342 linux_old_getrlimit(struct thread *td, struct linux_old_getrlimit_args *args) 1343 { 1344 struct l_rlimit rlim; 1345 struct rlimit bsd_rlim; 1346 u_int which; 1347 1348 if (args->resource >= LINUX_RLIM_NLIMITS) 1349 return (EINVAL); 1350 1351 which = linux_to_bsd_resource[args->resource]; 1352 if (which == -1) 1353 return (EINVAL); 1354 1355 lim_rlimit(td, which, &bsd_rlim); 1356 1357 #ifdef COMPAT_LINUX32 1358 rlim.rlim_cur = (unsigned int)bsd_rlim.rlim_cur; 1359 if (rlim.rlim_cur == UINT_MAX) 1360 rlim.rlim_cur = INT_MAX; 1361 rlim.rlim_max = (unsigned int)bsd_rlim.rlim_max; 1362 if (rlim.rlim_max == UINT_MAX) 1363 rlim.rlim_max = INT_MAX; 1364 #else 1365 rlim.rlim_cur = (unsigned long)bsd_rlim.rlim_cur; 1366 if (rlim.rlim_cur == ULONG_MAX) 1367 rlim.rlim_cur = LONG_MAX; 1368 rlim.rlim_max = (unsigned long)bsd_rlim.rlim_max; 1369 if (rlim.rlim_max == ULONG_MAX) 1370 rlim.rlim_max = LONG_MAX; 1371 #endif 1372 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1373 } 1374 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 1375 1376 int 1377 linux_getrlimit(struct thread *td, struct linux_getrlimit_args *args) 1378 { 1379 struct l_rlimit rlim; 1380 struct rlimit bsd_rlim; 1381 u_int which; 1382 1383 if (args->resource >= LINUX_RLIM_NLIMITS) 1384 return (EINVAL); 1385 1386 which = linux_to_bsd_resource[args->resource]; 1387 if (which == -1) 1388 return (EINVAL); 1389 1390 lim_rlimit(td, which, &bsd_rlim); 1391 1392 rlim.rlim_cur = (l_ulong)bsd_rlim.rlim_cur; 1393 rlim.rlim_max = (l_ulong)bsd_rlim.rlim_max; 1394 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1395 } 1396 1397 int 1398 linux_sched_setscheduler(struct thread *td, 1399 struct linux_sched_setscheduler_args *args) 1400 { 1401 struct sched_param sched_param; 1402 struct thread *tdt; 1403 int error, policy; 1404 1405 switch (args->policy) { 1406 case LINUX_SCHED_OTHER: 1407 policy = SCHED_OTHER; 1408 break; 1409 case LINUX_SCHED_FIFO: 1410 policy = SCHED_FIFO; 1411 break; 1412 case LINUX_SCHED_RR: 1413 policy = SCHED_RR; 1414 break; 1415 default: 1416 return (EINVAL); 1417 } 1418 1419 error = copyin(args->param, &sched_param, sizeof(sched_param)); 1420 if (error) 1421 return (error); 1422 1423 if (linux_map_sched_prio) { 1424 switch (policy) { 1425 case SCHED_OTHER: 1426 if (sched_param.sched_priority != 0) 1427 return (EINVAL); 1428 1429 sched_param.sched_priority = 1430 PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE; 1431 break; 1432 case SCHED_FIFO: 1433 case SCHED_RR: 1434 if (sched_param.sched_priority < 1 || 1435 sched_param.sched_priority >= LINUX_MAX_RT_PRIO) 1436 return (EINVAL); 1437 1438 /* 1439 * Map [1, LINUX_MAX_RT_PRIO - 1] to 1440 * [0, RTP_PRIO_MAX - RTP_PRIO_MIN] (rounding down). 1441 */ 1442 sched_param.sched_priority = 1443 (sched_param.sched_priority - 1) * 1444 (RTP_PRIO_MAX - RTP_PRIO_MIN + 1) / 1445 (LINUX_MAX_RT_PRIO - 1); 1446 break; 1447 } 1448 } 1449 1450 tdt = linux_tdfind(td, args->pid, -1); 1451 if (tdt == NULL) 1452 return (ESRCH); 1453 1454 error = kern_sched_setscheduler(td, tdt, policy, &sched_param); 1455 PROC_UNLOCK(tdt->td_proc); 1456 return (error); 1457 } 1458 1459 int 1460 linux_sched_getscheduler(struct thread *td, 1461 struct linux_sched_getscheduler_args *args) 1462 { 1463 struct thread *tdt; 1464 int error, policy; 1465 1466 tdt = linux_tdfind(td, args->pid, -1); 1467 if (tdt == NULL) 1468 return (ESRCH); 1469 1470 error = kern_sched_getscheduler(td, tdt, &policy); 1471 PROC_UNLOCK(tdt->td_proc); 1472 1473 switch (policy) { 1474 case SCHED_OTHER: 1475 td->td_retval[0] = LINUX_SCHED_OTHER; 1476 break; 1477 case SCHED_FIFO: 1478 td->td_retval[0] = LINUX_SCHED_FIFO; 1479 break; 1480 case SCHED_RR: 1481 td->td_retval[0] = LINUX_SCHED_RR; 1482 break; 1483 } 1484 return (error); 1485 } 1486 1487 int 1488 linux_sched_get_priority_max(struct thread *td, 1489 struct linux_sched_get_priority_max_args *args) 1490 { 1491 struct sched_get_priority_max_args bsd; 1492 1493 if (linux_map_sched_prio) { 1494 switch (args->policy) { 1495 case LINUX_SCHED_OTHER: 1496 td->td_retval[0] = 0; 1497 return (0); 1498 case LINUX_SCHED_FIFO: 1499 case LINUX_SCHED_RR: 1500 td->td_retval[0] = LINUX_MAX_RT_PRIO - 1; 1501 return (0); 1502 default: 1503 return (EINVAL); 1504 } 1505 } 1506 1507 switch (args->policy) { 1508 case LINUX_SCHED_OTHER: 1509 bsd.policy = SCHED_OTHER; 1510 break; 1511 case LINUX_SCHED_FIFO: 1512 bsd.policy = SCHED_FIFO; 1513 break; 1514 case LINUX_SCHED_RR: 1515 bsd.policy = SCHED_RR; 1516 break; 1517 default: 1518 return (EINVAL); 1519 } 1520 return (sys_sched_get_priority_max(td, &bsd)); 1521 } 1522 1523 int 1524 linux_sched_get_priority_min(struct thread *td, 1525 struct linux_sched_get_priority_min_args *args) 1526 { 1527 struct sched_get_priority_min_args bsd; 1528 1529 if (linux_map_sched_prio) { 1530 switch (args->policy) { 1531 case LINUX_SCHED_OTHER: 1532 td->td_retval[0] = 0; 1533 return (0); 1534 case LINUX_SCHED_FIFO: 1535 case LINUX_SCHED_RR: 1536 td->td_retval[0] = 1; 1537 return (0); 1538 default: 1539 return (EINVAL); 1540 } 1541 } 1542 1543 switch (args->policy) { 1544 case LINUX_SCHED_OTHER: 1545 bsd.policy = SCHED_OTHER; 1546 break; 1547 case LINUX_SCHED_FIFO: 1548 bsd.policy = SCHED_FIFO; 1549 break; 1550 case LINUX_SCHED_RR: 1551 bsd.policy = SCHED_RR; 1552 break; 1553 default: 1554 return (EINVAL); 1555 } 1556 return (sys_sched_get_priority_min(td, &bsd)); 1557 } 1558 1559 #define REBOOT_CAD_ON 0x89abcdef 1560 #define REBOOT_CAD_OFF 0 1561 #define REBOOT_HALT 0xcdef0123 1562 #define REBOOT_RESTART 0x01234567 1563 #define REBOOT_RESTART2 0xA1B2C3D4 1564 #define REBOOT_POWEROFF 0x4321FEDC 1565 #define REBOOT_MAGIC1 0xfee1dead 1566 #define REBOOT_MAGIC2 0x28121969 1567 #define REBOOT_MAGIC2A 0x05121996 1568 #define REBOOT_MAGIC2B 0x16041998 1569 1570 int 1571 linux_reboot(struct thread *td, struct linux_reboot_args *args) 1572 { 1573 struct reboot_args bsd_args; 1574 1575 if (args->magic1 != REBOOT_MAGIC1) 1576 return (EINVAL); 1577 1578 switch (args->magic2) { 1579 case REBOOT_MAGIC2: 1580 case REBOOT_MAGIC2A: 1581 case REBOOT_MAGIC2B: 1582 break; 1583 default: 1584 return (EINVAL); 1585 } 1586 1587 switch (args->cmd) { 1588 case REBOOT_CAD_ON: 1589 case REBOOT_CAD_OFF: 1590 return (priv_check(td, PRIV_REBOOT)); 1591 case REBOOT_HALT: 1592 bsd_args.opt = RB_HALT; 1593 break; 1594 case REBOOT_RESTART: 1595 case REBOOT_RESTART2: 1596 bsd_args.opt = 0; 1597 break; 1598 case REBOOT_POWEROFF: 1599 bsd_args.opt = RB_POWEROFF; 1600 break; 1601 default: 1602 return (EINVAL); 1603 } 1604 return (sys_reboot(td, &bsd_args)); 1605 } 1606 1607 1608 int 1609 linux_getpid(struct thread *td, struct linux_getpid_args *args) 1610 { 1611 1612 td->td_retval[0] = td->td_proc->p_pid; 1613 1614 return (0); 1615 } 1616 1617 int 1618 linux_gettid(struct thread *td, struct linux_gettid_args *args) 1619 { 1620 struct linux_emuldata *em; 1621 1622 em = em_find(td); 1623 KASSERT(em != NULL, ("gettid: emuldata not found.\n")); 1624 1625 td->td_retval[0] = em->em_tid; 1626 1627 return (0); 1628 } 1629 1630 1631 int 1632 linux_getppid(struct thread *td, struct linux_getppid_args *args) 1633 { 1634 1635 td->td_retval[0] = kern_getppid(td); 1636 return (0); 1637 } 1638 1639 int 1640 linux_getgid(struct thread *td, struct linux_getgid_args *args) 1641 { 1642 1643 td->td_retval[0] = td->td_ucred->cr_rgid; 1644 return (0); 1645 } 1646 1647 int 1648 linux_getuid(struct thread *td, struct linux_getuid_args *args) 1649 { 1650 1651 td->td_retval[0] = td->td_ucred->cr_ruid; 1652 return (0); 1653 } 1654 1655 int 1656 linux_getsid(struct thread *td, struct linux_getsid_args *args) 1657 { 1658 1659 return (kern_getsid(td, args->pid)); 1660 } 1661 1662 int 1663 linux_nosys(struct thread *td, struct nosys_args *ignore) 1664 { 1665 1666 return (ENOSYS); 1667 } 1668 1669 int 1670 linux_getpriority(struct thread *td, struct linux_getpriority_args *args) 1671 { 1672 int error; 1673 1674 error = kern_getpriority(td, args->which, args->who); 1675 td->td_retval[0] = 20 - td->td_retval[0]; 1676 return (error); 1677 } 1678 1679 int 1680 linux_sethostname(struct thread *td, struct linux_sethostname_args *args) 1681 { 1682 int name[2]; 1683 1684 name[0] = CTL_KERN; 1685 name[1] = KERN_HOSTNAME; 1686 return (userland_sysctl(td, name, 2, 0, 0, 0, args->hostname, 1687 args->len, 0, 0)); 1688 } 1689 1690 int 1691 linux_setdomainname(struct thread *td, struct linux_setdomainname_args *args) 1692 { 1693 int name[2]; 1694 1695 name[0] = CTL_KERN; 1696 name[1] = KERN_NISDOMAINNAME; 1697 return (userland_sysctl(td, name, 2, 0, 0, 0, args->name, 1698 args->len, 0, 0)); 1699 } 1700 1701 int 1702 linux_exit_group(struct thread *td, struct linux_exit_group_args *args) 1703 { 1704 1705 LINUX_CTR2(exit_group, "thread(%d) (%d)", td->td_tid, 1706 args->error_code); 1707 1708 /* 1709 * XXX: we should send a signal to the parent if 1710 * SIGNAL_EXIT_GROUP is set. We ignore that (temporarily?) 1711 * as it doesnt occur often. 1712 */ 1713 exit1(td, args->error_code, 0); 1714 /* NOTREACHED */ 1715 } 1716 1717 #define _LINUX_CAPABILITY_VERSION_1 0x19980330 1718 #define _LINUX_CAPABILITY_VERSION_2 0x20071026 1719 #define _LINUX_CAPABILITY_VERSION_3 0x20080522 1720 1721 struct l_user_cap_header { 1722 l_int version; 1723 l_int pid; 1724 }; 1725 1726 struct l_user_cap_data { 1727 l_int effective; 1728 l_int permitted; 1729 l_int inheritable; 1730 }; 1731 1732 int 1733 linux_capget(struct thread *td, struct linux_capget_args *uap) 1734 { 1735 struct l_user_cap_header luch; 1736 struct l_user_cap_data lucd[2]; 1737 int error, u32s; 1738 1739 if (uap->hdrp == NULL) 1740 return (EFAULT); 1741 1742 error = copyin(uap->hdrp, &luch, sizeof(luch)); 1743 if (error != 0) 1744 return (error); 1745 1746 switch (luch.version) { 1747 case _LINUX_CAPABILITY_VERSION_1: 1748 u32s = 1; 1749 break; 1750 case _LINUX_CAPABILITY_VERSION_2: 1751 case _LINUX_CAPABILITY_VERSION_3: 1752 u32s = 2; 1753 break; 1754 default: 1755 luch.version = _LINUX_CAPABILITY_VERSION_1; 1756 error = copyout(&luch, uap->hdrp, sizeof(luch)); 1757 if (error) 1758 return (error); 1759 return (EINVAL); 1760 } 1761 1762 if (luch.pid) 1763 return (EPERM); 1764 1765 if (uap->datap) { 1766 /* 1767 * The current implementation doesn't support setting 1768 * a capability (it's essentially a stub) so indicate 1769 * that no capabilities are currently set or available 1770 * to request. 1771 */ 1772 memset(&lucd, 0, u32s * sizeof(lucd[0])); 1773 error = copyout(&lucd, uap->datap, u32s * sizeof(lucd[0])); 1774 } 1775 1776 return (error); 1777 } 1778 1779 int 1780 linux_capset(struct thread *td, struct linux_capset_args *uap) 1781 { 1782 struct l_user_cap_header luch; 1783 struct l_user_cap_data lucd[2]; 1784 int error, i, u32s; 1785 1786 if (uap->hdrp == NULL || uap->datap == NULL) 1787 return (EFAULT); 1788 1789 error = copyin(uap->hdrp, &luch, sizeof(luch)); 1790 if (error != 0) 1791 return (error); 1792 1793 switch (luch.version) { 1794 case _LINUX_CAPABILITY_VERSION_1: 1795 u32s = 1; 1796 break; 1797 case _LINUX_CAPABILITY_VERSION_2: 1798 case _LINUX_CAPABILITY_VERSION_3: 1799 u32s = 2; 1800 break; 1801 default: 1802 luch.version = _LINUX_CAPABILITY_VERSION_1; 1803 error = copyout(&luch, uap->hdrp, sizeof(luch)); 1804 if (error) 1805 return (error); 1806 return (EINVAL); 1807 } 1808 1809 if (luch.pid) 1810 return (EPERM); 1811 1812 error = copyin(uap->datap, &lucd, u32s * sizeof(lucd[0])); 1813 if (error != 0) 1814 return (error); 1815 1816 /* We currently don't support setting any capabilities. */ 1817 for (i = 0; i < u32s; i++) { 1818 if (lucd[i].effective || lucd[i].permitted || 1819 lucd[i].inheritable) { 1820 linux_msg(td, 1821 "capset[%d] effective=0x%x, permitted=0x%x, " 1822 "inheritable=0x%x is not implemented", i, 1823 (int)lucd[i].effective, (int)lucd[i].permitted, 1824 (int)lucd[i].inheritable); 1825 return (EPERM); 1826 } 1827 } 1828 1829 return (0); 1830 } 1831 1832 int 1833 linux_prctl(struct thread *td, struct linux_prctl_args *args) 1834 { 1835 int error = 0, max_size; 1836 struct proc *p = td->td_proc; 1837 char comm[LINUX_MAX_COMM_LEN]; 1838 int pdeath_signal; 1839 1840 switch (args->option) { 1841 case LINUX_PR_SET_PDEATHSIG: 1842 if (!LINUX_SIG_VALID(args->arg2)) 1843 return (EINVAL); 1844 pdeath_signal = linux_to_bsd_signal(args->arg2); 1845 return (kern_procctl(td, P_PID, 0, PROC_PDEATHSIG_CTL, 1846 &pdeath_signal)); 1847 case LINUX_PR_GET_PDEATHSIG: 1848 error = kern_procctl(td, P_PID, 0, PROC_PDEATHSIG_STATUS, 1849 &pdeath_signal); 1850 if (error != 0) 1851 return (error); 1852 pdeath_signal = bsd_to_linux_signal(pdeath_signal); 1853 return (copyout(&pdeath_signal, 1854 (void *)(register_t)args->arg2, 1855 sizeof(pdeath_signal))); 1856 break; 1857 case LINUX_PR_GET_KEEPCAPS: 1858 /* 1859 * Indicate that we always clear the effective and 1860 * permitted capability sets when the user id becomes 1861 * non-zero (actually the capability sets are simply 1862 * always zero in the current implementation). 1863 */ 1864 td->td_retval[0] = 0; 1865 break; 1866 case LINUX_PR_SET_KEEPCAPS: 1867 /* 1868 * Ignore requests to keep the effective and permitted 1869 * capability sets when the user id becomes non-zero. 1870 */ 1871 break; 1872 case LINUX_PR_SET_NAME: 1873 /* 1874 * To be on the safe side we need to make sure to not 1875 * overflow the size a Linux program expects. We already 1876 * do this here in the copyin, so that we don't need to 1877 * check on copyout. 1878 */ 1879 max_size = MIN(sizeof(comm), sizeof(p->p_comm)); 1880 error = copyinstr((void *)(register_t)args->arg2, comm, 1881 max_size, NULL); 1882 1883 /* Linux silently truncates the name if it is too long. */ 1884 if (error == ENAMETOOLONG) { 1885 /* 1886 * XXX: copyinstr() isn't documented to populate the 1887 * array completely, so do a copyin() to be on the 1888 * safe side. This should be changed in case 1889 * copyinstr() is changed to guarantee this. 1890 */ 1891 error = copyin((void *)(register_t)args->arg2, comm, 1892 max_size - 1); 1893 comm[max_size - 1] = '\0'; 1894 } 1895 if (error) 1896 return (error); 1897 1898 PROC_LOCK(p); 1899 strlcpy(p->p_comm, comm, sizeof(p->p_comm)); 1900 PROC_UNLOCK(p); 1901 break; 1902 case LINUX_PR_GET_NAME: 1903 PROC_LOCK(p); 1904 strlcpy(comm, p->p_comm, sizeof(comm)); 1905 PROC_UNLOCK(p); 1906 error = copyout(comm, (void *)(register_t)args->arg2, 1907 strlen(comm) + 1); 1908 break; 1909 default: 1910 error = EINVAL; 1911 break; 1912 } 1913 1914 return (error); 1915 } 1916 1917 int 1918 linux_sched_setparam(struct thread *td, 1919 struct linux_sched_setparam_args *uap) 1920 { 1921 struct sched_param sched_param; 1922 struct thread *tdt; 1923 int error, policy; 1924 1925 error = copyin(uap->param, &sched_param, sizeof(sched_param)); 1926 if (error) 1927 return (error); 1928 1929 tdt = linux_tdfind(td, uap->pid, -1); 1930 if (tdt == NULL) 1931 return (ESRCH); 1932 1933 if (linux_map_sched_prio) { 1934 error = kern_sched_getscheduler(td, tdt, &policy); 1935 if (error) 1936 goto out; 1937 1938 switch (policy) { 1939 case SCHED_OTHER: 1940 if (sched_param.sched_priority != 0) { 1941 error = EINVAL; 1942 goto out; 1943 } 1944 sched_param.sched_priority = 1945 PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE; 1946 break; 1947 case SCHED_FIFO: 1948 case SCHED_RR: 1949 if (sched_param.sched_priority < 1 || 1950 sched_param.sched_priority >= LINUX_MAX_RT_PRIO) { 1951 error = EINVAL; 1952 goto out; 1953 } 1954 /* 1955 * Map [1, LINUX_MAX_RT_PRIO - 1] to 1956 * [0, RTP_PRIO_MAX - RTP_PRIO_MIN] (rounding down). 1957 */ 1958 sched_param.sched_priority = 1959 (sched_param.sched_priority - 1) * 1960 (RTP_PRIO_MAX - RTP_PRIO_MIN + 1) / 1961 (LINUX_MAX_RT_PRIO - 1); 1962 break; 1963 } 1964 } 1965 1966 error = kern_sched_setparam(td, tdt, &sched_param); 1967 out: PROC_UNLOCK(tdt->td_proc); 1968 return (error); 1969 } 1970 1971 int 1972 linux_sched_getparam(struct thread *td, 1973 struct linux_sched_getparam_args *uap) 1974 { 1975 struct sched_param sched_param; 1976 struct thread *tdt; 1977 int error, policy; 1978 1979 tdt = linux_tdfind(td, uap->pid, -1); 1980 if (tdt == NULL) 1981 return (ESRCH); 1982 1983 error = kern_sched_getparam(td, tdt, &sched_param); 1984 if (error) { 1985 PROC_UNLOCK(tdt->td_proc); 1986 return (error); 1987 } 1988 1989 if (linux_map_sched_prio) { 1990 error = kern_sched_getscheduler(td, tdt, &policy); 1991 PROC_UNLOCK(tdt->td_proc); 1992 if (error) 1993 return (error); 1994 1995 switch (policy) { 1996 case SCHED_OTHER: 1997 sched_param.sched_priority = 0; 1998 break; 1999 case SCHED_FIFO: 2000 case SCHED_RR: 2001 /* 2002 * Map [0, RTP_PRIO_MAX - RTP_PRIO_MIN] to 2003 * [1, LINUX_MAX_RT_PRIO - 1] (rounding up). 2004 */ 2005 sched_param.sched_priority = 2006 (sched_param.sched_priority * 2007 (LINUX_MAX_RT_PRIO - 1) + 2008 (RTP_PRIO_MAX - RTP_PRIO_MIN - 1)) / 2009 (RTP_PRIO_MAX - RTP_PRIO_MIN) + 1; 2010 break; 2011 } 2012 } else 2013 PROC_UNLOCK(tdt->td_proc); 2014 2015 error = copyout(&sched_param, uap->param, sizeof(sched_param)); 2016 return (error); 2017 } 2018 2019 /* 2020 * Get affinity of a process. 2021 */ 2022 int 2023 linux_sched_getaffinity(struct thread *td, 2024 struct linux_sched_getaffinity_args *args) 2025 { 2026 int error; 2027 struct thread *tdt; 2028 2029 if (args->len < sizeof(cpuset_t)) 2030 return (EINVAL); 2031 2032 tdt = linux_tdfind(td, args->pid, -1); 2033 if (tdt == NULL) 2034 return (ESRCH); 2035 2036 PROC_UNLOCK(tdt->td_proc); 2037 2038 error = kern_cpuset_getaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID, 2039 tdt->td_tid, sizeof(cpuset_t), (cpuset_t *)args->user_mask_ptr); 2040 if (error == 0) 2041 td->td_retval[0] = sizeof(cpuset_t); 2042 2043 return (error); 2044 } 2045 2046 /* 2047 * Set affinity of a process. 2048 */ 2049 int 2050 linux_sched_setaffinity(struct thread *td, 2051 struct linux_sched_setaffinity_args *args) 2052 { 2053 struct thread *tdt; 2054 2055 if (args->len < sizeof(cpuset_t)) 2056 return (EINVAL); 2057 2058 tdt = linux_tdfind(td, args->pid, -1); 2059 if (tdt == NULL) 2060 return (ESRCH); 2061 2062 PROC_UNLOCK(tdt->td_proc); 2063 2064 return (kern_cpuset_setaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID, 2065 tdt->td_tid, sizeof(cpuset_t), (cpuset_t *) args->user_mask_ptr)); 2066 } 2067 2068 struct linux_rlimit64 { 2069 uint64_t rlim_cur; 2070 uint64_t rlim_max; 2071 }; 2072 2073 int 2074 linux_prlimit64(struct thread *td, struct linux_prlimit64_args *args) 2075 { 2076 struct rlimit rlim, nrlim; 2077 struct linux_rlimit64 lrlim; 2078 struct proc *p; 2079 u_int which; 2080 int flags; 2081 int error; 2082 2083 if (args->resource >= LINUX_RLIM_NLIMITS) 2084 return (EINVAL); 2085 2086 which = linux_to_bsd_resource[args->resource]; 2087 if (which == -1) 2088 return (EINVAL); 2089 2090 if (args->new != NULL) { 2091 /* 2092 * Note. Unlike FreeBSD where rlim is signed 64-bit Linux 2093 * rlim is unsigned 64-bit. FreeBSD treats negative limits 2094 * as INFINITY so we do not need a conversion even. 2095 */ 2096 error = copyin(args->new, &nrlim, sizeof(nrlim)); 2097 if (error != 0) 2098 return (error); 2099 } 2100 2101 flags = PGET_HOLD | PGET_NOTWEXIT; 2102 if (args->new != NULL) 2103 flags |= PGET_CANDEBUG; 2104 else 2105 flags |= PGET_CANSEE; 2106 if (args->pid == 0) { 2107 p = td->td_proc; 2108 PHOLD(p); 2109 } else { 2110 error = pget(args->pid, flags, &p); 2111 if (error != 0) 2112 return (error); 2113 } 2114 if (args->old != NULL) { 2115 PROC_LOCK(p); 2116 lim_rlimit_proc(p, which, &rlim); 2117 PROC_UNLOCK(p); 2118 if (rlim.rlim_cur == RLIM_INFINITY) 2119 lrlim.rlim_cur = LINUX_RLIM_INFINITY; 2120 else 2121 lrlim.rlim_cur = rlim.rlim_cur; 2122 if (rlim.rlim_max == RLIM_INFINITY) 2123 lrlim.rlim_max = LINUX_RLIM_INFINITY; 2124 else 2125 lrlim.rlim_max = rlim.rlim_max; 2126 error = copyout(&lrlim, args->old, sizeof(lrlim)); 2127 if (error != 0) 2128 goto out; 2129 } 2130 2131 if (args->new != NULL) 2132 error = kern_proc_setrlimit(td, p, which, &nrlim); 2133 2134 out: 2135 PRELE(p); 2136 return (error); 2137 } 2138 2139 int 2140 linux_pselect6(struct thread *td, struct linux_pselect6_args *args) 2141 { 2142 struct timeval utv, tv0, tv1, *tvp; 2143 struct l_pselect6arg lpse6; 2144 struct l_timespec lts; 2145 struct timespec uts; 2146 l_sigset_t l_ss; 2147 sigset_t *ssp; 2148 sigset_t ss; 2149 int error; 2150 2151 ssp = NULL; 2152 if (args->sig != NULL) { 2153 error = copyin(args->sig, &lpse6, sizeof(lpse6)); 2154 if (error != 0) 2155 return (error); 2156 if (lpse6.ss_len != sizeof(l_ss)) 2157 return (EINVAL); 2158 if (lpse6.ss != 0) { 2159 error = copyin(PTRIN(lpse6.ss), &l_ss, 2160 sizeof(l_ss)); 2161 if (error != 0) 2162 return (error); 2163 linux_to_bsd_sigset(&l_ss, &ss); 2164 ssp = &ss; 2165 } 2166 } 2167 2168 /* 2169 * Currently glibc changes nanosecond number to microsecond. 2170 * This mean losing precision but for now it is hardly seen. 2171 */ 2172 if (args->tsp != NULL) { 2173 error = copyin(args->tsp, <s, sizeof(lts)); 2174 if (error != 0) 2175 return (error); 2176 error = linux_to_native_timespec(&uts, <s); 2177 if (error != 0) 2178 return (error); 2179 2180 TIMESPEC_TO_TIMEVAL(&utv, &uts); 2181 if (itimerfix(&utv)) 2182 return (EINVAL); 2183 2184 microtime(&tv0); 2185 tvp = &utv; 2186 } else 2187 tvp = NULL; 2188 2189 error = kern_pselect(td, args->nfds, args->readfds, args->writefds, 2190 args->exceptfds, tvp, ssp, LINUX_NFDBITS); 2191 2192 if (error == 0 && args->tsp != NULL) { 2193 if (td->td_retval[0] != 0) { 2194 /* 2195 * Compute how much time was left of the timeout, 2196 * by subtracting the current time and the time 2197 * before we started the call, and subtracting 2198 * that result from the user-supplied value. 2199 */ 2200 2201 microtime(&tv1); 2202 timevalsub(&tv1, &tv0); 2203 timevalsub(&utv, &tv1); 2204 if (utv.tv_sec < 0) 2205 timevalclear(&utv); 2206 } else 2207 timevalclear(&utv); 2208 2209 TIMEVAL_TO_TIMESPEC(&utv, &uts); 2210 2211 error = native_to_linux_timespec(<s, &uts); 2212 if (error == 0) 2213 error = copyout(<s, args->tsp, sizeof(lts)); 2214 } 2215 2216 return (error); 2217 } 2218 2219 int 2220 linux_ppoll(struct thread *td, struct linux_ppoll_args *args) 2221 { 2222 struct timespec ts0, ts1; 2223 struct l_timespec lts; 2224 struct timespec uts, *tsp; 2225 l_sigset_t l_ss; 2226 sigset_t *ssp; 2227 sigset_t ss; 2228 int error; 2229 2230 if (args->sset != NULL) { 2231 if (args->ssize != sizeof(l_ss)) 2232 return (EINVAL); 2233 error = copyin(args->sset, &l_ss, sizeof(l_ss)); 2234 if (error) 2235 return (error); 2236 linux_to_bsd_sigset(&l_ss, &ss); 2237 ssp = &ss; 2238 } else 2239 ssp = NULL; 2240 if (args->tsp != NULL) { 2241 error = copyin(args->tsp, <s, sizeof(lts)); 2242 if (error) 2243 return (error); 2244 error = linux_to_native_timespec(&uts, <s); 2245 if (error != 0) 2246 return (error); 2247 2248 nanotime(&ts0); 2249 tsp = &uts; 2250 } else 2251 tsp = NULL; 2252 2253 error = kern_poll(td, args->fds, args->nfds, tsp, ssp); 2254 2255 if (error == 0 && args->tsp != NULL) { 2256 if (td->td_retval[0]) { 2257 nanotime(&ts1); 2258 timespecsub(&ts1, &ts0, &ts1); 2259 timespecsub(&uts, &ts1, &uts); 2260 if (uts.tv_sec < 0) 2261 timespecclear(&uts); 2262 } else 2263 timespecclear(&uts); 2264 2265 error = native_to_linux_timespec(<s, &uts); 2266 if (error == 0) 2267 error = copyout(<s, args->tsp, sizeof(lts)); 2268 } 2269 2270 return (error); 2271 } 2272 2273 int 2274 linux_sched_rr_get_interval(struct thread *td, 2275 struct linux_sched_rr_get_interval_args *uap) 2276 { 2277 struct timespec ts; 2278 struct l_timespec lts; 2279 struct thread *tdt; 2280 int error; 2281 2282 /* 2283 * According to man in case the invalid pid specified 2284 * EINVAL should be returned. 2285 */ 2286 if (uap->pid < 0) 2287 return (EINVAL); 2288 2289 tdt = linux_tdfind(td, uap->pid, -1); 2290 if (tdt == NULL) 2291 return (ESRCH); 2292 2293 error = kern_sched_rr_get_interval_td(td, tdt, &ts); 2294 PROC_UNLOCK(tdt->td_proc); 2295 if (error != 0) 2296 return (error); 2297 error = native_to_linux_timespec(<s, &ts); 2298 if (error != 0) 2299 return (error); 2300 return (copyout(<s, uap->interval, sizeof(lts))); 2301 } 2302 2303 /* 2304 * In case when the Linux thread is the initial thread in 2305 * the thread group thread id is equal to the process id. 2306 * Glibc depends on this magic (assert in pthread_getattr_np.c). 2307 */ 2308 struct thread * 2309 linux_tdfind(struct thread *td, lwpid_t tid, pid_t pid) 2310 { 2311 struct linux_emuldata *em; 2312 struct thread *tdt; 2313 struct proc *p; 2314 2315 tdt = NULL; 2316 if (tid == 0 || tid == td->td_tid) { 2317 tdt = td; 2318 PROC_LOCK(tdt->td_proc); 2319 } else if (tid > PID_MAX) 2320 tdt = tdfind(tid, pid); 2321 else { 2322 /* 2323 * Initial thread where the tid equal to the pid. 2324 */ 2325 p = pfind(tid); 2326 if (p != NULL) { 2327 if (SV_PROC_ABI(p) != SV_ABI_LINUX) { 2328 /* 2329 * p is not a Linuxulator process. 2330 */ 2331 PROC_UNLOCK(p); 2332 return (NULL); 2333 } 2334 FOREACH_THREAD_IN_PROC(p, tdt) { 2335 em = em_find(tdt); 2336 if (tid == em->em_tid) 2337 return (tdt); 2338 } 2339 PROC_UNLOCK(p); 2340 } 2341 return (NULL); 2342 } 2343 2344 return (tdt); 2345 } 2346 2347 void 2348 linux_to_bsd_waitopts(int options, int *bsdopts) 2349 { 2350 2351 if (options & LINUX_WNOHANG) 2352 *bsdopts |= WNOHANG; 2353 if (options & LINUX_WUNTRACED) 2354 *bsdopts |= WUNTRACED; 2355 if (options & LINUX_WEXITED) 2356 *bsdopts |= WEXITED; 2357 if (options & LINUX_WCONTINUED) 2358 *bsdopts |= WCONTINUED; 2359 if (options & LINUX_WNOWAIT) 2360 *bsdopts |= WNOWAIT; 2361 2362 if (options & __WCLONE) 2363 *bsdopts |= WLINUXCLONE; 2364 } 2365 2366 int 2367 linux_getrandom(struct thread *td, struct linux_getrandom_args *args) 2368 { 2369 struct uio uio; 2370 struct iovec iov; 2371 int error; 2372 2373 if (args->flags & ~(LINUX_GRND_NONBLOCK|LINUX_GRND_RANDOM)) 2374 return (EINVAL); 2375 if (args->count > INT_MAX) 2376 args->count = INT_MAX; 2377 2378 iov.iov_base = args->buf; 2379 iov.iov_len = args->count; 2380 2381 uio.uio_iov = &iov; 2382 uio.uio_iovcnt = 1; 2383 uio.uio_resid = iov.iov_len; 2384 uio.uio_segflg = UIO_USERSPACE; 2385 uio.uio_rw = UIO_READ; 2386 uio.uio_td = td; 2387 2388 error = read_random_uio(&uio, args->flags & LINUX_GRND_NONBLOCK); 2389 if (error == 0) 2390 td->td_retval[0] = args->count - uio.uio_resid; 2391 return (error); 2392 } 2393 2394 int 2395 linux_mincore(struct thread *td, struct linux_mincore_args *args) 2396 { 2397 2398 /* Needs to be page-aligned */ 2399 if (args->start & PAGE_MASK) 2400 return (EINVAL); 2401 return (kern_mincore(td, args->start, args->len, args->vec)); 2402 } 2403 2404 #define SYSLOG_TAG "<6>" 2405 2406 int 2407 linux_syslog(struct thread *td, struct linux_syslog_args *args) 2408 { 2409 char buf[128], *src, *dst; 2410 u_int seq; 2411 int buflen, error; 2412 2413 if (args->type != LINUX_SYSLOG_ACTION_READ_ALL) { 2414 linux_msg(td, "syslog unsupported type 0x%x", args->type); 2415 return (EINVAL); 2416 } 2417 2418 if (args->len < 6) { 2419 td->td_retval[0] = 0; 2420 return (0); 2421 } 2422 2423 error = priv_check(td, PRIV_MSGBUF); 2424 if (error) 2425 return (error); 2426 2427 mtx_lock(&msgbuf_lock); 2428 msgbuf_peekbytes(msgbufp, NULL, 0, &seq); 2429 mtx_unlock(&msgbuf_lock); 2430 2431 dst = args->buf; 2432 error = copyout(&SYSLOG_TAG, dst, sizeof(SYSLOG_TAG)); 2433 /* The -1 is to skip the trailing '\0'. */ 2434 dst += sizeof(SYSLOG_TAG) - 1; 2435 2436 while (error == 0) { 2437 mtx_lock(&msgbuf_lock); 2438 buflen = msgbuf_peekbytes(msgbufp, buf, sizeof(buf), &seq); 2439 mtx_unlock(&msgbuf_lock); 2440 2441 if (buflen == 0) 2442 break; 2443 2444 for (src = buf; src < buf + buflen && error == 0; src++) { 2445 if (*src == '\0') 2446 continue; 2447 2448 if (dst >= args->buf + args->len) 2449 goto out; 2450 2451 error = copyout(src, dst, 1); 2452 dst++; 2453 2454 if (*src == '\n' && *(src + 1) != '<' && 2455 dst + sizeof(SYSLOG_TAG) < args->buf + args->len) { 2456 error = copyout(&SYSLOG_TAG, 2457 dst, sizeof(SYSLOG_TAG)); 2458 dst += sizeof(SYSLOG_TAG) - 1; 2459 } 2460 } 2461 } 2462 out: 2463 td->td_retval[0] = dst - args->buf; 2464 return (error); 2465 } 2466 2467 int 2468 linux_getcpu(struct thread *td, struct linux_getcpu_args *args) 2469 { 2470 int cpu, error, node; 2471 2472 cpu = td->td_oncpu; /* Make sure it doesn't change during copyout(9) */ 2473 error = 0; 2474 node = cpuid_to_pcpu[cpu]->pc_domain; 2475 2476 if (args->cpu != NULL) 2477 error = copyout(&cpu, args->cpu, sizeof(l_int)); 2478 if (args->node != NULL) 2479 error = copyout(&node, args->node, sizeof(l_int)); 2480 return (error); 2481 } 2482