1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2002 Doug Rabson 5 * Copyright (c) 1994-1995 Søren Schmidt 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer 13 * in this position and unchanged. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. The name of the author may not be used to endorse or promote products 18 * derived from this software without specific prior written permission 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 23 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 29 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include "opt_compat.h" 36 37 #include <sys/param.h> 38 #include <sys/blist.h> 39 #include <sys/fcntl.h> 40 #if defined(__i386__) 41 #include <sys/imgact_aout.h> 42 #endif 43 #include <sys/jail.h> 44 #include <sys/kernel.h> 45 #include <sys/limits.h> 46 #include <sys/lock.h> 47 #include <sys/malloc.h> 48 #include <sys/mman.h> 49 #include <sys/mount.h> 50 #include <sys/msgbuf.h> 51 #include <sys/mutex.h> 52 #include <sys/namei.h> 53 #include <sys/priv.h> 54 #include <sys/proc.h> 55 #include <sys/procctl.h> 56 #include <sys/reboot.h> 57 #include <sys/racct.h> 58 #include <sys/random.h> 59 #include <sys/resourcevar.h> 60 #include <sys/sched.h> 61 #include <sys/sdt.h> 62 #include <sys/signalvar.h> 63 #include <sys/stat.h> 64 #include <sys/syscallsubr.h> 65 #include <sys/sysctl.h> 66 #include <sys/sysproto.h> 67 #include <sys/systm.h> 68 #include <sys/time.h> 69 #include <sys/vmmeter.h> 70 #include <sys/vnode.h> 71 #include <sys/wait.h> 72 #include <sys/cpuset.h> 73 #include <sys/uio.h> 74 75 #include <security/mac/mac_framework.h> 76 77 #include <vm/vm.h> 78 #include <vm/pmap.h> 79 #include <vm/vm_kern.h> 80 #include <vm/vm_map.h> 81 #include <vm/vm_extern.h> 82 #include <vm/swap_pager.h> 83 84 #ifdef COMPAT_LINUX32 85 #include <machine/../linux32/linux.h> 86 #include <machine/../linux32/linux32_proto.h> 87 #else 88 #include <machine/../linux/linux.h> 89 #include <machine/../linux/linux_proto.h> 90 #endif 91 92 #include <compat/linux/linux_dtrace.h> 93 #include <compat/linux/linux_file.h> 94 #include <compat/linux/linux_mib.h> 95 #include <compat/linux/linux_signal.h> 96 #include <compat/linux/linux_timer.h> 97 #include <compat/linux/linux_util.h> 98 #include <compat/linux/linux_sysproto.h> 99 #include <compat/linux/linux_emul.h> 100 #include <compat/linux/linux_misc.h> 101 102 /** 103 * Special DTrace provider for the linuxulator. 104 * 105 * In this file we define the provider for the entire linuxulator. All 106 * modules (= files of the linuxulator) use it. 107 * 108 * We define a different name depending on the emulated bitsize, see 109 * ../../<ARCH>/linux{,32}/linux.h, e.g.: 110 * native bitsize = linuxulator 111 * amd64, 32bit emulation = linuxulator32 112 */ 113 LIN_SDT_PROVIDER_DEFINE(LINUX_DTRACE); 114 115 int stclohz; /* Statistics clock frequency */ 116 117 static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] = { 118 RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK, 119 RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE, 120 RLIMIT_MEMLOCK, RLIMIT_AS 121 }; 122 123 struct l_sysinfo { 124 l_long uptime; /* Seconds since boot */ 125 l_ulong loads[3]; /* 1, 5, and 15 minute load averages */ 126 #define LINUX_SYSINFO_LOADS_SCALE 65536 127 l_ulong totalram; /* Total usable main memory size */ 128 l_ulong freeram; /* Available memory size */ 129 l_ulong sharedram; /* Amount of shared memory */ 130 l_ulong bufferram; /* Memory used by buffers */ 131 l_ulong totalswap; /* Total swap space size */ 132 l_ulong freeswap; /* swap space still available */ 133 l_ushort procs; /* Number of current processes */ 134 l_ushort pads; 135 l_ulong totalhigh; 136 l_ulong freehigh; 137 l_uint mem_unit; 138 char _f[20-2*sizeof(l_long)-sizeof(l_int)]; /* padding */ 139 }; 140 141 struct l_pselect6arg { 142 l_uintptr_t ss; 143 l_size_t ss_len; 144 }; 145 146 static int linux_utimensat_nsec_valid(l_long); 147 148 149 int 150 linux_sysinfo(struct thread *td, struct linux_sysinfo_args *args) 151 { 152 struct l_sysinfo sysinfo; 153 int i, j; 154 struct timespec ts; 155 156 bzero(&sysinfo, sizeof(sysinfo)); 157 getnanouptime(&ts); 158 if (ts.tv_nsec != 0) 159 ts.tv_sec++; 160 sysinfo.uptime = ts.tv_sec; 161 162 /* Use the information from the mib to get our load averages */ 163 for (i = 0; i < 3; i++) 164 sysinfo.loads[i] = averunnable.ldavg[i] * 165 LINUX_SYSINFO_LOADS_SCALE / averunnable.fscale; 166 167 sysinfo.totalram = physmem * PAGE_SIZE; 168 sysinfo.freeram = (u_long)vm_free_count() * PAGE_SIZE; 169 170 /* 171 * sharedram counts pages allocated to named, swap-backed objects such 172 * as shared memory segments and tmpfs files. There is no cheap way to 173 * compute this, so just leave the field unpopulated. Linux itself only 174 * started setting this field in the 3.x timeframe. 175 */ 176 sysinfo.sharedram = 0; 177 sysinfo.bufferram = 0; 178 179 swap_pager_status(&i, &j); 180 sysinfo.totalswap = i * PAGE_SIZE; 181 sysinfo.freeswap = (i - j) * PAGE_SIZE; 182 183 sysinfo.procs = nprocs; 184 185 /* 186 * Platforms supported by the emulation layer do not have a notion of 187 * high memory. 188 */ 189 sysinfo.totalhigh = 0; 190 sysinfo.freehigh = 0; 191 192 sysinfo.mem_unit = 1; 193 194 return (copyout(&sysinfo, args->info, sizeof(sysinfo))); 195 } 196 197 #ifdef LINUX_LEGACY_SYSCALLS 198 int 199 linux_alarm(struct thread *td, struct linux_alarm_args *args) 200 { 201 struct itimerval it, old_it; 202 u_int secs; 203 int error; 204 205 secs = args->secs; 206 /* 207 * Linux alarm() is always successful. Limit secs to INT32_MAX / 2 208 * to match kern_setitimer()'s limit to avoid error from it. 209 * 210 * XXX. Linux limit secs to INT_MAX on 32 and does not limit on 64-bit 211 * platforms. 212 */ 213 if (secs > INT32_MAX / 2) 214 secs = INT32_MAX / 2; 215 216 it.it_value.tv_sec = secs; 217 it.it_value.tv_usec = 0; 218 timevalclear(&it.it_interval); 219 error = kern_setitimer(td, ITIMER_REAL, &it, &old_it); 220 KASSERT(error == 0, ("kern_setitimer returns %d", error)); 221 222 if ((old_it.it_value.tv_sec == 0 && old_it.it_value.tv_usec > 0) || 223 old_it.it_value.tv_usec >= 500000) 224 old_it.it_value.tv_sec++; 225 td->td_retval[0] = old_it.it_value.tv_sec; 226 return (0); 227 } 228 #endif 229 230 int 231 linux_brk(struct thread *td, struct linux_brk_args *args) 232 { 233 struct vmspace *vm = td->td_proc->p_vmspace; 234 uintptr_t new, old; 235 236 old = (uintptr_t)vm->vm_daddr + ctob(vm->vm_dsize); 237 new = (uintptr_t)args->dsend; 238 if ((caddr_t)new > vm->vm_daddr && !kern_break(td, &new)) 239 td->td_retval[0] = (register_t)new; 240 else 241 td->td_retval[0] = (register_t)old; 242 243 return (0); 244 } 245 246 #if defined(__i386__) 247 /* XXX: what about amd64/linux32? */ 248 249 int 250 linux_uselib(struct thread *td, struct linux_uselib_args *args) 251 { 252 struct nameidata ni; 253 struct vnode *vp; 254 struct exec *a_out; 255 vm_map_t map; 256 vm_map_entry_t entry; 257 struct vattr attr; 258 vm_offset_t vmaddr; 259 unsigned long file_offset; 260 unsigned long bss_size; 261 char *library; 262 ssize_t aresid; 263 int error; 264 bool locked, opened, textset; 265 266 LCONVPATHEXIST(td, args->library, &library); 267 268 a_out = NULL; 269 vp = NULL; 270 locked = false; 271 textset = false; 272 opened = false; 273 274 NDINIT(&ni, LOOKUP, ISOPEN | FOLLOW | LOCKLEAF | AUDITVNODE1, 275 UIO_SYSSPACE, library, td); 276 error = namei(&ni); 277 LFREEPATH(library); 278 if (error) 279 goto cleanup; 280 281 vp = ni.ni_vp; 282 NDFREE(&ni, NDF_ONLY_PNBUF); 283 284 /* 285 * From here on down, we have a locked vnode that must be unlocked. 286 * XXX: The code below largely duplicates exec_check_permissions(). 287 */ 288 locked = true; 289 290 /* Executable? */ 291 error = VOP_GETATTR(vp, &attr, td->td_ucred); 292 if (error) 293 goto cleanup; 294 295 if ((vp->v_mount->mnt_flag & MNT_NOEXEC) || 296 ((attr.va_mode & 0111) == 0) || (attr.va_type != VREG)) { 297 /* EACCESS is what exec(2) returns. */ 298 error = ENOEXEC; 299 goto cleanup; 300 } 301 302 /* Sensible size? */ 303 if (attr.va_size == 0) { 304 error = ENOEXEC; 305 goto cleanup; 306 } 307 308 /* Can we access it? */ 309 error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td); 310 if (error) 311 goto cleanup; 312 313 /* 314 * XXX: This should use vn_open() so that it is properly authorized, 315 * and to reduce code redundancy all over the place here. 316 * XXX: Not really, it duplicates far more of exec_check_permissions() 317 * than vn_open(). 318 */ 319 #ifdef MAC 320 error = mac_vnode_check_open(td->td_ucred, vp, VREAD); 321 if (error) 322 goto cleanup; 323 #endif 324 error = VOP_OPEN(vp, FREAD, td->td_ucred, td, NULL); 325 if (error) 326 goto cleanup; 327 opened = true; 328 329 /* Pull in executable header into exec_map */ 330 error = vm_mmap(exec_map, (vm_offset_t *)&a_out, PAGE_SIZE, 331 VM_PROT_READ, VM_PROT_READ, 0, OBJT_VNODE, vp, 0); 332 if (error) 333 goto cleanup; 334 335 /* Is it a Linux binary ? */ 336 if (((a_out->a_magic >> 16) & 0xff) != 0x64) { 337 error = ENOEXEC; 338 goto cleanup; 339 } 340 341 /* 342 * While we are here, we should REALLY do some more checks 343 */ 344 345 /* Set file/virtual offset based on a.out variant. */ 346 switch ((int)(a_out->a_magic & 0xffff)) { 347 case 0413: /* ZMAGIC */ 348 file_offset = 1024; 349 break; 350 case 0314: /* QMAGIC */ 351 file_offset = 0; 352 break; 353 default: 354 error = ENOEXEC; 355 goto cleanup; 356 } 357 358 bss_size = round_page(a_out->a_bss); 359 360 /* Check various fields in header for validity/bounds. */ 361 if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) { 362 error = ENOEXEC; 363 goto cleanup; 364 } 365 366 /* text + data can't exceed file size */ 367 if (a_out->a_data + a_out->a_text > attr.va_size) { 368 error = EFAULT; 369 goto cleanup; 370 } 371 372 /* 373 * text/data/bss must not exceed limits 374 * XXX - this is not complete. it should check current usage PLUS 375 * the resources needed by this library. 376 */ 377 PROC_LOCK(td->td_proc); 378 if (a_out->a_text > maxtsiz || 379 a_out->a_data + bss_size > lim_cur_proc(td->td_proc, RLIMIT_DATA) || 380 racct_set(td->td_proc, RACCT_DATA, a_out->a_data + 381 bss_size) != 0) { 382 PROC_UNLOCK(td->td_proc); 383 error = ENOMEM; 384 goto cleanup; 385 } 386 PROC_UNLOCK(td->td_proc); 387 388 /* 389 * Prevent more writers. 390 */ 391 error = VOP_SET_TEXT(vp); 392 if (error != 0) 393 goto cleanup; 394 textset = true; 395 396 /* 397 * Lock no longer needed 398 */ 399 locked = false; 400 VOP_UNLOCK(vp); 401 402 /* 403 * Check if file_offset page aligned. Currently we cannot handle 404 * misalinged file offsets, and so we read in the entire image 405 * (what a waste). 406 */ 407 if (file_offset & PAGE_MASK) { 408 /* Map text+data read/write/execute */ 409 410 /* a_entry is the load address and is page aligned */ 411 vmaddr = trunc_page(a_out->a_entry); 412 413 /* get anon user mapping, read+write+execute */ 414 error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0, 415 &vmaddr, a_out->a_text + a_out->a_data, 0, VMFS_NO_SPACE, 416 VM_PROT_ALL, VM_PROT_ALL, 0); 417 if (error) 418 goto cleanup; 419 420 error = vn_rdwr(UIO_READ, vp, (void *)vmaddr, file_offset, 421 a_out->a_text + a_out->a_data, UIO_USERSPACE, 0, 422 td->td_ucred, NOCRED, &aresid, td); 423 if (error != 0) 424 goto cleanup; 425 if (aresid != 0) { 426 error = ENOEXEC; 427 goto cleanup; 428 } 429 } else { 430 /* 431 * for QMAGIC, a_entry is 20 bytes beyond the load address 432 * to skip the executable header 433 */ 434 vmaddr = trunc_page(a_out->a_entry); 435 436 /* 437 * Map it all into the process's space as a single 438 * copy-on-write "data" segment. 439 */ 440 map = &td->td_proc->p_vmspace->vm_map; 441 error = vm_mmap(map, &vmaddr, 442 a_out->a_text + a_out->a_data, VM_PROT_ALL, VM_PROT_ALL, 443 MAP_PRIVATE | MAP_FIXED, OBJT_VNODE, vp, file_offset); 444 if (error) 445 goto cleanup; 446 vm_map_lock(map); 447 if (!vm_map_lookup_entry(map, vmaddr, &entry)) { 448 vm_map_unlock(map); 449 error = EDOOFUS; 450 goto cleanup; 451 } 452 entry->eflags |= MAP_ENTRY_VN_EXEC; 453 vm_map_unlock(map); 454 textset = false; 455 } 456 457 if (bss_size != 0) { 458 /* Calculate BSS start address */ 459 vmaddr = trunc_page(a_out->a_entry) + a_out->a_text + 460 a_out->a_data; 461 462 /* allocate some 'anon' space */ 463 error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0, 464 &vmaddr, bss_size, 0, VMFS_NO_SPACE, VM_PROT_ALL, 465 VM_PROT_ALL, 0); 466 if (error) 467 goto cleanup; 468 } 469 470 cleanup: 471 if (opened) { 472 if (locked) 473 VOP_UNLOCK(vp); 474 locked = false; 475 VOP_CLOSE(vp, FREAD, td->td_ucred, td); 476 } 477 if (textset) { 478 if (!locked) { 479 locked = true; 480 VOP_LOCK(vp, LK_SHARED | LK_RETRY); 481 } 482 VOP_UNSET_TEXT_CHECKED(vp); 483 } 484 if (locked) 485 VOP_UNLOCK(vp); 486 487 /* Release the temporary mapping. */ 488 if (a_out) 489 kmap_free_wakeup(exec_map, (vm_offset_t)a_out, PAGE_SIZE); 490 491 return (error); 492 } 493 494 #endif /* __i386__ */ 495 496 #ifdef LINUX_LEGACY_SYSCALLS 497 int 498 linux_select(struct thread *td, struct linux_select_args *args) 499 { 500 l_timeval ltv; 501 struct timeval tv0, tv1, utv, *tvp; 502 int error; 503 504 /* 505 * Store current time for computation of the amount of 506 * time left. 507 */ 508 if (args->timeout) { 509 if ((error = copyin(args->timeout, <v, sizeof(ltv)))) 510 goto select_out; 511 utv.tv_sec = ltv.tv_sec; 512 utv.tv_usec = ltv.tv_usec; 513 514 if (itimerfix(&utv)) { 515 /* 516 * The timeval was invalid. Convert it to something 517 * valid that will act as it does under Linux. 518 */ 519 utv.tv_sec += utv.tv_usec / 1000000; 520 utv.tv_usec %= 1000000; 521 if (utv.tv_usec < 0) { 522 utv.tv_sec -= 1; 523 utv.tv_usec += 1000000; 524 } 525 if (utv.tv_sec < 0) 526 timevalclear(&utv); 527 } 528 microtime(&tv0); 529 tvp = &utv; 530 } else 531 tvp = NULL; 532 533 error = kern_select(td, args->nfds, args->readfds, args->writefds, 534 args->exceptfds, tvp, LINUX_NFDBITS); 535 if (error) 536 goto select_out; 537 538 if (args->timeout) { 539 if (td->td_retval[0]) { 540 /* 541 * Compute how much time was left of the timeout, 542 * by subtracting the current time and the time 543 * before we started the call, and subtracting 544 * that result from the user-supplied value. 545 */ 546 microtime(&tv1); 547 timevalsub(&tv1, &tv0); 548 timevalsub(&utv, &tv1); 549 if (utv.tv_sec < 0) 550 timevalclear(&utv); 551 } else 552 timevalclear(&utv); 553 ltv.tv_sec = utv.tv_sec; 554 ltv.tv_usec = utv.tv_usec; 555 if ((error = copyout(<v, args->timeout, sizeof(ltv)))) 556 goto select_out; 557 } 558 559 select_out: 560 return (error); 561 } 562 #endif 563 564 int 565 linux_mremap(struct thread *td, struct linux_mremap_args *args) 566 { 567 uintptr_t addr; 568 size_t len; 569 int error = 0; 570 571 if (args->flags & ~(LINUX_MREMAP_FIXED | LINUX_MREMAP_MAYMOVE)) { 572 td->td_retval[0] = 0; 573 return (EINVAL); 574 } 575 576 /* 577 * Check for the page alignment. 578 * Linux defines PAGE_MASK to be FreeBSD ~PAGE_MASK. 579 */ 580 if (args->addr & PAGE_MASK) { 581 td->td_retval[0] = 0; 582 return (EINVAL); 583 } 584 585 args->new_len = round_page(args->new_len); 586 args->old_len = round_page(args->old_len); 587 588 if (args->new_len > args->old_len) { 589 td->td_retval[0] = 0; 590 return (ENOMEM); 591 } 592 593 if (args->new_len < args->old_len) { 594 addr = args->addr + args->new_len; 595 len = args->old_len - args->new_len; 596 error = kern_munmap(td, addr, len); 597 } 598 599 td->td_retval[0] = error ? 0 : (uintptr_t)args->addr; 600 return (error); 601 } 602 603 #define LINUX_MS_ASYNC 0x0001 604 #define LINUX_MS_INVALIDATE 0x0002 605 #define LINUX_MS_SYNC 0x0004 606 607 int 608 linux_msync(struct thread *td, struct linux_msync_args *args) 609 { 610 611 return (kern_msync(td, args->addr, args->len, 612 args->fl & ~LINUX_MS_SYNC)); 613 } 614 615 #ifdef LINUX_LEGACY_SYSCALLS 616 int 617 linux_time(struct thread *td, struct linux_time_args *args) 618 { 619 struct timeval tv; 620 l_time_t tm; 621 int error; 622 623 microtime(&tv); 624 tm = tv.tv_sec; 625 if (args->tm && (error = copyout(&tm, args->tm, sizeof(tm)))) 626 return (error); 627 td->td_retval[0] = tm; 628 return (0); 629 } 630 #endif 631 632 struct l_times_argv { 633 l_clock_t tms_utime; 634 l_clock_t tms_stime; 635 l_clock_t tms_cutime; 636 l_clock_t tms_cstime; 637 }; 638 639 640 /* 641 * Glibc versions prior to 2.2.1 always use hard-coded CLK_TCK value. 642 * Since 2.2.1 Glibc uses value exported from kernel via AT_CLKTCK 643 * auxiliary vector entry. 644 */ 645 #define CLK_TCK 100 646 647 #define CONVOTCK(r) (r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK)) 648 #define CONVNTCK(r) (r.tv_sec * stclohz + r.tv_usec / (1000000 / stclohz)) 649 650 #define CONVTCK(r) (linux_kernver(td) >= LINUX_KERNVER_2004000 ? \ 651 CONVNTCK(r) : CONVOTCK(r)) 652 653 int 654 linux_times(struct thread *td, struct linux_times_args *args) 655 { 656 struct timeval tv, utime, stime, cutime, cstime; 657 struct l_times_argv tms; 658 struct proc *p; 659 int error; 660 661 if (args->buf != NULL) { 662 p = td->td_proc; 663 PROC_LOCK(p); 664 PROC_STATLOCK(p); 665 calcru(p, &utime, &stime); 666 PROC_STATUNLOCK(p); 667 calccru(p, &cutime, &cstime); 668 PROC_UNLOCK(p); 669 670 tms.tms_utime = CONVTCK(utime); 671 tms.tms_stime = CONVTCK(stime); 672 673 tms.tms_cutime = CONVTCK(cutime); 674 tms.tms_cstime = CONVTCK(cstime); 675 676 if ((error = copyout(&tms, args->buf, sizeof(tms)))) 677 return (error); 678 } 679 680 microuptime(&tv); 681 td->td_retval[0] = (int)CONVTCK(tv); 682 return (0); 683 } 684 685 int 686 linux_newuname(struct thread *td, struct linux_newuname_args *args) 687 { 688 struct l_new_utsname utsname; 689 char osname[LINUX_MAX_UTSNAME]; 690 char osrelease[LINUX_MAX_UTSNAME]; 691 char *p; 692 693 linux_get_osname(td, osname); 694 linux_get_osrelease(td, osrelease); 695 696 bzero(&utsname, sizeof(utsname)); 697 strlcpy(utsname.sysname, osname, LINUX_MAX_UTSNAME); 698 getcredhostname(td->td_ucred, utsname.nodename, LINUX_MAX_UTSNAME); 699 getcreddomainname(td->td_ucred, utsname.domainname, LINUX_MAX_UTSNAME); 700 strlcpy(utsname.release, osrelease, LINUX_MAX_UTSNAME); 701 strlcpy(utsname.version, version, LINUX_MAX_UTSNAME); 702 for (p = utsname.version; *p != '\0'; ++p) 703 if (*p == '\n') { 704 *p = '\0'; 705 break; 706 } 707 #if defined(__amd64__) 708 /* 709 * On amd64, Linux uname(2) needs to return "x86_64" 710 * for both 64-bit and 32-bit applications. On 32-bit, 711 * the string returned by getauxval(AT_PLATFORM) needs 712 * to remain "i686", though. 713 */ 714 strlcpy(utsname.machine, "x86_64", LINUX_MAX_UTSNAME); 715 #else 716 strlcpy(utsname.machine, linux_kplatform, LINUX_MAX_UTSNAME); 717 #endif 718 719 return (copyout(&utsname, args->buf, sizeof(utsname))); 720 } 721 722 struct l_utimbuf { 723 l_time_t l_actime; 724 l_time_t l_modtime; 725 }; 726 727 #ifdef LINUX_LEGACY_SYSCALLS 728 int 729 linux_utime(struct thread *td, struct linux_utime_args *args) 730 { 731 struct timeval tv[2], *tvp; 732 struct l_utimbuf lut; 733 char *fname; 734 int error; 735 736 LCONVPATHEXIST(td, args->fname, &fname); 737 738 if (args->times) { 739 if ((error = copyin(args->times, &lut, sizeof lut))) { 740 LFREEPATH(fname); 741 return (error); 742 } 743 tv[0].tv_sec = lut.l_actime; 744 tv[0].tv_usec = 0; 745 tv[1].tv_sec = lut.l_modtime; 746 tv[1].tv_usec = 0; 747 tvp = tv; 748 } else 749 tvp = NULL; 750 751 error = kern_utimesat(td, AT_FDCWD, fname, UIO_SYSSPACE, tvp, 752 UIO_SYSSPACE); 753 LFREEPATH(fname); 754 return (error); 755 } 756 #endif 757 758 #ifdef LINUX_LEGACY_SYSCALLS 759 int 760 linux_utimes(struct thread *td, struct linux_utimes_args *args) 761 { 762 l_timeval ltv[2]; 763 struct timeval tv[2], *tvp = NULL; 764 char *fname; 765 int error; 766 767 LCONVPATHEXIST(td, args->fname, &fname); 768 769 if (args->tptr != NULL) { 770 if ((error = copyin(args->tptr, ltv, sizeof ltv))) { 771 LFREEPATH(fname); 772 return (error); 773 } 774 tv[0].tv_sec = ltv[0].tv_sec; 775 tv[0].tv_usec = ltv[0].tv_usec; 776 tv[1].tv_sec = ltv[1].tv_sec; 777 tv[1].tv_usec = ltv[1].tv_usec; 778 tvp = tv; 779 } 780 781 error = kern_utimesat(td, AT_FDCWD, fname, UIO_SYSSPACE, 782 tvp, UIO_SYSSPACE); 783 LFREEPATH(fname); 784 return (error); 785 } 786 #endif 787 788 static int 789 linux_utimensat_nsec_valid(l_long nsec) 790 { 791 792 if (nsec == LINUX_UTIME_OMIT || nsec == LINUX_UTIME_NOW) 793 return (0); 794 if (nsec >= 0 && nsec <= 999999999) 795 return (0); 796 return (1); 797 } 798 799 int 800 linux_utimensat(struct thread *td, struct linux_utimensat_args *args) 801 { 802 struct l_timespec l_times[2]; 803 struct timespec times[2], *timesp = NULL; 804 char *path = NULL; 805 int error, dfd, flags = 0; 806 807 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; 808 809 if (args->flags & ~LINUX_AT_SYMLINK_NOFOLLOW) 810 return (EINVAL); 811 812 if (args->times != NULL) { 813 error = copyin(args->times, l_times, sizeof(l_times)); 814 if (error != 0) 815 return (error); 816 817 if (linux_utimensat_nsec_valid(l_times[0].tv_nsec) != 0 || 818 linux_utimensat_nsec_valid(l_times[1].tv_nsec) != 0) 819 return (EINVAL); 820 821 times[0].tv_sec = l_times[0].tv_sec; 822 switch (l_times[0].tv_nsec) 823 { 824 case LINUX_UTIME_OMIT: 825 times[0].tv_nsec = UTIME_OMIT; 826 break; 827 case LINUX_UTIME_NOW: 828 times[0].tv_nsec = UTIME_NOW; 829 break; 830 default: 831 times[0].tv_nsec = l_times[0].tv_nsec; 832 } 833 834 times[1].tv_sec = l_times[1].tv_sec; 835 switch (l_times[1].tv_nsec) 836 { 837 case LINUX_UTIME_OMIT: 838 times[1].tv_nsec = UTIME_OMIT; 839 break; 840 case LINUX_UTIME_NOW: 841 times[1].tv_nsec = UTIME_NOW; 842 break; 843 default: 844 times[1].tv_nsec = l_times[1].tv_nsec; 845 break; 846 } 847 timesp = times; 848 849 /* This breaks POSIX, but is what the Linux kernel does 850 * _on purpose_ (documented in the man page for utimensat(2)), 851 * so we must follow that behaviour. */ 852 if (times[0].tv_nsec == UTIME_OMIT && 853 times[1].tv_nsec == UTIME_OMIT) 854 return (0); 855 } 856 857 if (args->pathname != NULL) 858 LCONVPATHEXIST_AT(td, args->pathname, &path, dfd); 859 else if (args->flags != 0) 860 return (EINVAL); 861 862 if (args->flags & LINUX_AT_SYMLINK_NOFOLLOW) 863 flags |= AT_SYMLINK_NOFOLLOW; 864 865 if (path == NULL) 866 error = kern_futimens(td, dfd, timesp, UIO_SYSSPACE); 867 else { 868 error = kern_utimensat(td, dfd, path, UIO_SYSSPACE, timesp, 869 UIO_SYSSPACE, flags); 870 LFREEPATH(path); 871 } 872 873 return (error); 874 } 875 876 #ifdef LINUX_LEGACY_SYSCALLS 877 int 878 linux_futimesat(struct thread *td, struct linux_futimesat_args *args) 879 { 880 l_timeval ltv[2]; 881 struct timeval tv[2], *tvp = NULL; 882 char *fname; 883 int error, dfd; 884 885 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; 886 LCONVPATHEXIST_AT(td, args->filename, &fname, dfd); 887 888 if (args->utimes != NULL) { 889 if ((error = copyin(args->utimes, ltv, sizeof ltv))) { 890 LFREEPATH(fname); 891 return (error); 892 } 893 tv[0].tv_sec = ltv[0].tv_sec; 894 tv[0].tv_usec = ltv[0].tv_usec; 895 tv[1].tv_sec = ltv[1].tv_sec; 896 tv[1].tv_usec = ltv[1].tv_usec; 897 tvp = tv; 898 } 899 900 error = kern_utimesat(td, dfd, fname, UIO_SYSSPACE, tvp, UIO_SYSSPACE); 901 LFREEPATH(fname); 902 return (error); 903 } 904 #endif 905 906 static int 907 linux_common_wait(struct thread *td, int pid, int *statusp, 908 int options, struct __wrusage *wrup) 909 { 910 siginfo_t siginfo; 911 idtype_t idtype; 912 id_t id; 913 int error, status, tmpstat; 914 915 if (pid == WAIT_ANY) { 916 idtype = P_ALL; 917 id = 0; 918 } else if (pid < 0) { 919 idtype = P_PGID; 920 id = (id_t)-pid; 921 } else { 922 idtype = P_PID; 923 id = (id_t)pid; 924 } 925 926 /* 927 * For backward compatibility we implicitly add flags WEXITED 928 * and WTRAPPED here. 929 */ 930 options |= WEXITED | WTRAPPED; 931 error = kern_wait6(td, idtype, id, &status, options, wrup, &siginfo); 932 if (error) 933 return (error); 934 935 if (statusp) { 936 tmpstat = status & 0xffff; 937 if (WIFSIGNALED(tmpstat)) { 938 tmpstat = (tmpstat & 0xffffff80) | 939 bsd_to_linux_signal(WTERMSIG(tmpstat)); 940 } else if (WIFSTOPPED(tmpstat)) { 941 tmpstat = (tmpstat & 0xffff00ff) | 942 (bsd_to_linux_signal(WSTOPSIG(tmpstat)) << 8); 943 #if defined(__amd64__) && !defined(COMPAT_LINUX32) 944 if (WSTOPSIG(status) == SIGTRAP) { 945 tmpstat = linux_ptrace_status(td, 946 siginfo.si_pid, tmpstat); 947 } 948 #endif 949 } else if (WIFCONTINUED(tmpstat)) { 950 tmpstat = 0xffff; 951 } 952 error = copyout(&tmpstat, statusp, sizeof(int)); 953 } 954 955 return (error); 956 } 957 958 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 959 int 960 linux_waitpid(struct thread *td, struct linux_waitpid_args *args) 961 { 962 struct linux_wait4_args wait4_args; 963 964 wait4_args.pid = args->pid; 965 wait4_args.status = args->status; 966 wait4_args.options = args->options; 967 wait4_args.rusage = NULL; 968 969 return (linux_wait4(td, &wait4_args)); 970 } 971 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 972 973 int 974 linux_wait4(struct thread *td, struct linux_wait4_args *args) 975 { 976 int error, options; 977 struct __wrusage wru, *wrup; 978 979 if (args->options & ~(LINUX_WUNTRACED | LINUX_WNOHANG | 980 LINUX_WCONTINUED | __WCLONE | __WNOTHREAD | __WALL)) 981 return (EINVAL); 982 983 options = WEXITED; 984 linux_to_bsd_waitopts(args->options, &options); 985 986 if (args->rusage != NULL) 987 wrup = &wru; 988 else 989 wrup = NULL; 990 error = linux_common_wait(td, args->pid, args->status, options, wrup); 991 if (error != 0) 992 return (error); 993 if (args->rusage != NULL) 994 error = linux_copyout_rusage(&wru.wru_self, args->rusage); 995 return (error); 996 } 997 998 int 999 linux_waitid(struct thread *td, struct linux_waitid_args *args) 1000 { 1001 int status, options, sig; 1002 struct __wrusage wru; 1003 siginfo_t siginfo; 1004 l_siginfo_t lsi; 1005 idtype_t idtype; 1006 struct proc *p; 1007 int error; 1008 1009 options = 0; 1010 linux_to_bsd_waitopts(args->options, &options); 1011 1012 if (options & ~(WNOHANG | WNOWAIT | WEXITED | WUNTRACED | WCONTINUED)) 1013 return (EINVAL); 1014 if (!(options & (WEXITED | WUNTRACED | WCONTINUED))) 1015 return (EINVAL); 1016 1017 switch (args->idtype) { 1018 case LINUX_P_ALL: 1019 idtype = P_ALL; 1020 break; 1021 case LINUX_P_PID: 1022 if (args->id <= 0) 1023 return (EINVAL); 1024 idtype = P_PID; 1025 break; 1026 case LINUX_P_PGID: 1027 if (args->id <= 0) 1028 return (EINVAL); 1029 idtype = P_PGID; 1030 break; 1031 default: 1032 return (EINVAL); 1033 } 1034 1035 error = kern_wait6(td, idtype, args->id, &status, options, 1036 &wru, &siginfo); 1037 if (error != 0) 1038 return (error); 1039 if (args->rusage != NULL) { 1040 error = linux_copyout_rusage(&wru.wru_children, 1041 args->rusage); 1042 if (error != 0) 1043 return (error); 1044 } 1045 if (args->info != NULL) { 1046 p = td->td_proc; 1047 bzero(&lsi, sizeof(lsi)); 1048 if (td->td_retval[0] != 0) { 1049 sig = bsd_to_linux_signal(siginfo.si_signo); 1050 siginfo_to_lsiginfo(&siginfo, &lsi, sig); 1051 } 1052 error = copyout(&lsi, args->info, sizeof(lsi)); 1053 } 1054 td->td_retval[0] = 0; 1055 1056 return (error); 1057 } 1058 1059 #ifdef LINUX_LEGACY_SYSCALLS 1060 int 1061 linux_mknod(struct thread *td, struct linux_mknod_args *args) 1062 { 1063 char *path; 1064 int error; 1065 1066 LCONVPATHCREAT(td, args->path, &path); 1067 1068 switch (args->mode & S_IFMT) { 1069 case S_IFIFO: 1070 case S_IFSOCK: 1071 error = kern_mkfifoat(td, AT_FDCWD, path, UIO_SYSSPACE, 1072 args->mode); 1073 break; 1074 1075 case S_IFCHR: 1076 case S_IFBLK: 1077 error = kern_mknodat(td, AT_FDCWD, path, UIO_SYSSPACE, 1078 args->mode, args->dev); 1079 break; 1080 1081 case S_IFDIR: 1082 error = EPERM; 1083 break; 1084 1085 case 0: 1086 args->mode |= S_IFREG; 1087 /* FALLTHROUGH */ 1088 case S_IFREG: 1089 error = kern_openat(td, AT_FDCWD, path, UIO_SYSSPACE, 1090 O_WRONLY | O_CREAT | O_TRUNC, args->mode); 1091 if (error == 0) 1092 kern_close(td, td->td_retval[0]); 1093 break; 1094 1095 default: 1096 error = EINVAL; 1097 break; 1098 } 1099 LFREEPATH(path); 1100 return (error); 1101 } 1102 #endif 1103 1104 int 1105 linux_mknodat(struct thread *td, struct linux_mknodat_args *args) 1106 { 1107 char *path; 1108 int error, dfd; 1109 1110 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; 1111 LCONVPATHCREAT_AT(td, args->filename, &path, dfd); 1112 1113 switch (args->mode & S_IFMT) { 1114 case S_IFIFO: 1115 case S_IFSOCK: 1116 error = kern_mkfifoat(td, dfd, path, UIO_SYSSPACE, args->mode); 1117 break; 1118 1119 case S_IFCHR: 1120 case S_IFBLK: 1121 error = kern_mknodat(td, dfd, path, UIO_SYSSPACE, args->mode, 1122 args->dev); 1123 break; 1124 1125 case S_IFDIR: 1126 error = EPERM; 1127 break; 1128 1129 case 0: 1130 args->mode |= S_IFREG; 1131 /* FALLTHROUGH */ 1132 case S_IFREG: 1133 error = kern_openat(td, dfd, path, UIO_SYSSPACE, 1134 O_WRONLY | O_CREAT | O_TRUNC, args->mode); 1135 if (error == 0) 1136 kern_close(td, td->td_retval[0]); 1137 break; 1138 1139 default: 1140 error = EINVAL; 1141 break; 1142 } 1143 LFREEPATH(path); 1144 return (error); 1145 } 1146 1147 /* 1148 * UGH! This is just about the dumbest idea I've ever heard!! 1149 */ 1150 int 1151 linux_personality(struct thread *td, struct linux_personality_args *args) 1152 { 1153 struct linux_pemuldata *pem; 1154 struct proc *p = td->td_proc; 1155 uint32_t old; 1156 1157 PROC_LOCK(p); 1158 pem = pem_find(p); 1159 old = pem->persona; 1160 if (args->per != 0xffffffff) 1161 pem->persona = args->per; 1162 PROC_UNLOCK(p); 1163 1164 td->td_retval[0] = old; 1165 return (0); 1166 } 1167 1168 struct l_itimerval { 1169 l_timeval it_interval; 1170 l_timeval it_value; 1171 }; 1172 1173 #define B2L_ITIMERVAL(bip, lip) \ 1174 (bip)->it_interval.tv_sec = (lip)->it_interval.tv_sec; \ 1175 (bip)->it_interval.tv_usec = (lip)->it_interval.tv_usec; \ 1176 (bip)->it_value.tv_sec = (lip)->it_value.tv_sec; \ 1177 (bip)->it_value.tv_usec = (lip)->it_value.tv_usec; 1178 1179 int 1180 linux_setitimer(struct thread *td, struct linux_setitimer_args *uap) 1181 { 1182 int error; 1183 struct l_itimerval ls; 1184 struct itimerval aitv, oitv; 1185 1186 if (uap->itv == NULL) { 1187 uap->itv = uap->oitv; 1188 return (linux_getitimer(td, (struct linux_getitimer_args *)uap)); 1189 } 1190 1191 error = copyin(uap->itv, &ls, sizeof(ls)); 1192 if (error != 0) 1193 return (error); 1194 B2L_ITIMERVAL(&aitv, &ls); 1195 error = kern_setitimer(td, uap->which, &aitv, &oitv); 1196 if (error != 0 || uap->oitv == NULL) 1197 return (error); 1198 B2L_ITIMERVAL(&ls, &oitv); 1199 1200 return (copyout(&ls, uap->oitv, sizeof(ls))); 1201 } 1202 1203 int 1204 linux_getitimer(struct thread *td, struct linux_getitimer_args *uap) 1205 { 1206 int error; 1207 struct l_itimerval ls; 1208 struct itimerval aitv; 1209 1210 error = kern_getitimer(td, uap->which, &aitv); 1211 if (error != 0) 1212 return (error); 1213 B2L_ITIMERVAL(&ls, &aitv); 1214 return (copyout(&ls, uap->itv, sizeof(ls))); 1215 } 1216 1217 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 1218 int 1219 linux_nice(struct thread *td, struct linux_nice_args *args) 1220 { 1221 1222 return (kern_setpriority(td, PRIO_PROCESS, 0, args->inc)); 1223 } 1224 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 1225 1226 int 1227 linux_setgroups(struct thread *td, struct linux_setgroups_args *args) 1228 { 1229 struct ucred *newcred, *oldcred; 1230 l_gid_t *linux_gidset; 1231 gid_t *bsd_gidset; 1232 int ngrp, error; 1233 struct proc *p; 1234 1235 ngrp = args->gidsetsize; 1236 if (ngrp < 0 || ngrp >= ngroups_max + 1) 1237 return (EINVAL); 1238 linux_gidset = malloc(ngrp * sizeof(*linux_gidset), M_LINUX, M_WAITOK); 1239 error = copyin(args->grouplist, linux_gidset, ngrp * sizeof(l_gid_t)); 1240 if (error) 1241 goto out; 1242 newcred = crget(); 1243 crextend(newcred, ngrp + 1); 1244 p = td->td_proc; 1245 PROC_LOCK(p); 1246 oldcred = p->p_ucred; 1247 crcopy(newcred, oldcred); 1248 1249 /* 1250 * cr_groups[0] holds egid. Setting the whole set from 1251 * the supplied set will cause egid to be changed too. 1252 * Keep cr_groups[0] unchanged to prevent that. 1253 */ 1254 1255 if ((error = priv_check_cred(oldcred, PRIV_CRED_SETGROUPS)) != 0) { 1256 PROC_UNLOCK(p); 1257 crfree(newcred); 1258 goto out; 1259 } 1260 1261 if (ngrp > 0) { 1262 newcred->cr_ngroups = ngrp + 1; 1263 1264 bsd_gidset = newcred->cr_groups; 1265 ngrp--; 1266 while (ngrp >= 0) { 1267 bsd_gidset[ngrp + 1] = linux_gidset[ngrp]; 1268 ngrp--; 1269 } 1270 } else 1271 newcred->cr_ngroups = 1; 1272 1273 setsugid(p); 1274 proc_set_cred(p, newcred); 1275 PROC_UNLOCK(p); 1276 crfree(oldcred); 1277 error = 0; 1278 out: 1279 free(linux_gidset, M_LINUX); 1280 return (error); 1281 } 1282 1283 int 1284 linux_getgroups(struct thread *td, struct linux_getgroups_args *args) 1285 { 1286 struct ucred *cred; 1287 l_gid_t *linux_gidset; 1288 gid_t *bsd_gidset; 1289 int bsd_gidsetsz, ngrp, error; 1290 1291 cred = td->td_ucred; 1292 bsd_gidset = cred->cr_groups; 1293 bsd_gidsetsz = cred->cr_ngroups - 1; 1294 1295 /* 1296 * cr_groups[0] holds egid. Returning the whole set 1297 * here will cause a duplicate. Exclude cr_groups[0] 1298 * to prevent that. 1299 */ 1300 1301 if ((ngrp = args->gidsetsize) == 0) { 1302 td->td_retval[0] = bsd_gidsetsz; 1303 return (0); 1304 } 1305 1306 if (ngrp < bsd_gidsetsz) 1307 return (EINVAL); 1308 1309 ngrp = 0; 1310 linux_gidset = malloc(bsd_gidsetsz * sizeof(*linux_gidset), 1311 M_LINUX, M_WAITOK); 1312 while (ngrp < bsd_gidsetsz) { 1313 linux_gidset[ngrp] = bsd_gidset[ngrp + 1]; 1314 ngrp++; 1315 } 1316 1317 error = copyout(linux_gidset, args->grouplist, ngrp * sizeof(l_gid_t)); 1318 free(linux_gidset, M_LINUX); 1319 if (error) 1320 return (error); 1321 1322 td->td_retval[0] = ngrp; 1323 return (0); 1324 } 1325 1326 int 1327 linux_setrlimit(struct thread *td, struct linux_setrlimit_args *args) 1328 { 1329 struct rlimit bsd_rlim; 1330 struct l_rlimit rlim; 1331 u_int which; 1332 int error; 1333 1334 if (args->resource >= LINUX_RLIM_NLIMITS) 1335 return (EINVAL); 1336 1337 which = linux_to_bsd_resource[args->resource]; 1338 if (which == -1) 1339 return (EINVAL); 1340 1341 error = copyin(args->rlim, &rlim, sizeof(rlim)); 1342 if (error) 1343 return (error); 1344 1345 bsd_rlim.rlim_cur = (rlim_t)rlim.rlim_cur; 1346 bsd_rlim.rlim_max = (rlim_t)rlim.rlim_max; 1347 return (kern_setrlimit(td, which, &bsd_rlim)); 1348 } 1349 1350 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 1351 int 1352 linux_old_getrlimit(struct thread *td, struct linux_old_getrlimit_args *args) 1353 { 1354 struct l_rlimit rlim; 1355 struct rlimit bsd_rlim; 1356 u_int which; 1357 1358 if (args->resource >= LINUX_RLIM_NLIMITS) 1359 return (EINVAL); 1360 1361 which = linux_to_bsd_resource[args->resource]; 1362 if (which == -1) 1363 return (EINVAL); 1364 1365 lim_rlimit(td, which, &bsd_rlim); 1366 1367 #ifdef COMPAT_LINUX32 1368 rlim.rlim_cur = (unsigned int)bsd_rlim.rlim_cur; 1369 if (rlim.rlim_cur == UINT_MAX) 1370 rlim.rlim_cur = INT_MAX; 1371 rlim.rlim_max = (unsigned int)bsd_rlim.rlim_max; 1372 if (rlim.rlim_max == UINT_MAX) 1373 rlim.rlim_max = INT_MAX; 1374 #else 1375 rlim.rlim_cur = (unsigned long)bsd_rlim.rlim_cur; 1376 if (rlim.rlim_cur == ULONG_MAX) 1377 rlim.rlim_cur = LONG_MAX; 1378 rlim.rlim_max = (unsigned long)bsd_rlim.rlim_max; 1379 if (rlim.rlim_max == ULONG_MAX) 1380 rlim.rlim_max = LONG_MAX; 1381 #endif 1382 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1383 } 1384 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 1385 1386 int 1387 linux_getrlimit(struct thread *td, struct linux_getrlimit_args *args) 1388 { 1389 struct l_rlimit rlim; 1390 struct rlimit bsd_rlim; 1391 u_int which; 1392 1393 if (args->resource >= LINUX_RLIM_NLIMITS) 1394 return (EINVAL); 1395 1396 which = linux_to_bsd_resource[args->resource]; 1397 if (which == -1) 1398 return (EINVAL); 1399 1400 lim_rlimit(td, which, &bsd_rlim); 1401 1402 rlim.rlim_cur = (l_ulong)bsd_rlim.rlim_cur; 1403 rlim.rlim_max = (l_ulong)bsd_rlim.rlim_max; 1404 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1405 } 1406 1407 int 1408 linux_sched_setscheduler(struct thread *td, 1409 struct linux_sched_setscheduler_args *args) 1410 { 1411 struct sched_param sched_param; 1412 struct thread *tdt; 1413 int error, policy; 1414 1415 switch (args->policy) { 1416 case LINUX_SCHED_OTHER: 1417 policy = SCHED_OTHER; 1418 break; 1419 case LINUX_SCHED_FIFO: 1420 policy = SCHED_FIFO; 1421 break; 1422 case LINUX_SCHED_RR: 1423 policy = SCHED_RR; 1424 break; 1425 default: 1426 return (EINVAL); 1427 } 1428 1429 error = copyin(args->param, &sched_param, sizeof(sched_param)); 1430 if (error) 1431 return (error); 1432 1433 if (linux_map_sched_prio) { 1434 switch (policy) { 1435 case SCHED_OTHER: 1436 if (sched_param.sched_priority != 0) 1437 return (EINVAL); 1438 1439 sched_param.sched_priority = 1440 PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE; 1441 break; 1442 case SCHED_FIFO: 1443 case SCHED_RR: 1444 if (sched_param.sched_priority < 1 || 1445 sched_param.sched_priority >= LINUX_MAX_RT_PRIO) 1446 return (EINVAL); 1447 1448 /* 1449 * Map [1, LINUX_MAX_RT_PRIO - 1] to 1450 * [0, RTP_PRIO_MAX - RTP_PRIO_MIN] (rounding down). 1451 */ 1452 sched_param.sched_priority = 1453 (sched_param.sched_priority - 1) * 1454 (RTP_PRIO_MAX - RTP_PRIO_MIN + 1) / 1455 (LINUX_MAX_RT_PRIO - 1); 1456 break; 1457 } 1458 } 1459 1460 tdt = linux_tdfind(td, args->pid, -1); 1461 if (tdt == NULL) 1462 return (ESRCH); 1463 1464 error = kern_sched_setscheduler(td, tdt, policy, &sched_param); 1465 PROC_UNLOCK(tdt->td_proc); 1466 return (error); 1467 } 1468 1469 int 1470 linux_sched_getscheduler(struct thread *td, 1471 struct linux_sched_getscheduler_args *args) 1472 { 1473 struct thread *tdt; 1474 int error, policy; 1475 1476 tdt = linux_tdfind(td, args->pid, -1); 1477 if (tdt == NULL) 1478 return (ESRCH); 1479 1480 error = kern_sched_getscheduler(td, tdt, &policy); 1481 PROC_UNLOCK(tdt->td_proc); 1482 1483 switch (policy) { 1484 case SCHED_OTHER: 1485 td->td_retval[0] = LINUX_SCHED_OTHER; 1486 break; 1487 case SCHED_FIFO: 1488 td->td_retval[0] = LINUX_SCHED_FIFO; 1489 break; 1490 case SCHED_RR: 1491 td->td_retval[0] = LINUX_SCHED_RR; 1492 break; 1493 } 1494 return (error); 1495 } 1496 1497 int 1498 linux_sched_get_priority_max(struct thread *td, 1499 struct linux_sched_get_priority_max_args *args) 1500 { 1501 struct sched_get_priority_max_args bsd; 1502 1503 if (linux_map_sched_prio) { 1504 switch (args->policy) { 1505 case LINUX_SCHED_OTHER: 1506 td->td_retval[0] = 0; 1507 return (0); 1508 case LINUX_SCHED_FIFO: 1509 case LINUX_SCHED_RR: 1510 td->td_retval[0] = LINUX_MAX_RT_PRIO - 1; 1511 return (0); 1512 default: 1513 return (EINVAL); 1514 } 1515 } 1516 1517 switch (args->policy) { 1518 case LINUX_SCHED_OTHER: 1519 bsd.policy = SCHED_OTHER; 1520 break; 1521 case LINUX_SCHED_FIFO: 1522 bsd.policy = SCHED_FIFO; 1523 break; 1524 case LINUX_SCHED_RR: 1525 bsd.policy = SCHED_RR; 1526 break; 1527 default: 1528 return (EINVAL); 1529 } 1530 return (sys_sched_get_priority_max(td, &bsd)); 1531 } 1532 1533 int 1534 linux_sched_get_priority_min(struct thread *td, 1535 struct linux_sched_get_priority_min_args *args) 1536 { 1537 struct sched_get_priority_min_args bsd; 1538 1539 if (linux_map_sched_prio) { 1540 switch (args->policy) { 1541 case LINUX_SCHED_OTHER: 1542 td->td_retval[0] = 0; 1543 return (0); 1544 case LINUX_SCHED_FIFO: 1545 case LINUX_SCHED_RR: 1546 td->td_retval[0] = 1; 1547 return (0); 1548 default: 1549 return (EINVAL); 1550 } 1551 } 1552 1553 switch (args->policy) { 1554 case LINUX_SCHED_OTHER: 1555 bsd.policy = SCHED_OTHER; 1556 break; 1557 case LINUX_SCHED_FIFO: 1558 bsd.policy = SCHED_FIFO; 1559 break; 1560 case LINUX_SCHED_RR: 1561 bsd.policy = SCHED_RR; 1562 break; 1563 default: 1564 return (EINVAL); 1565 } 1566 return (sys_sched_get_priority_min(td, &bsd)); 1567 } 1568 1569 #define REBOOT_CAD_ON 0x89abcdef 1570 #define REBOOT_CAD_OFF 0 1571 #define REBOOT_HALT 0xcdef0123 1572 #define REBOOT_RESTART 0x01234567 1573 #define REBOOT_RESTART2 0xA1B2C3D4 1574 #define REBOOT_POWEROFF 0x4321FEDC 1575 #define REBOOT_MAGIC1 0xfee1dead 1576 #define REBOOT_MAGIC2 0x28121969 1577 #define REBOOT_MAGIC2A 0x05121996 1578 #define REBOOT_MAGIC2B 0x16041998 1579 1580 int 1581 linux_reboot(struct thread *td, struct linux_reboot_args *args) 1582 { 1583 struct reboot_args bsd_args; 1584 1585 if (args->magic1 != REBOOT_MAGIC1) 1586 return (EINVAL); 1587 1588 switch (args->magic2) { 1589 case REBOOT_MAGIC2: 1590 case REBOOT_MAGIC2A: 1591 case REBOOT_MAGIC2B: 1592 break; 1593 default: 1594 return (EINVAL); 1595 } 1596 1597 switch (args->cmd) { 1598 case REBOOT_CAD_ON: 1599 case REBOOT_CAD_OFF: 1600 return (priv_check(td, PRIV_REBOOT)); 1601 case REBOOT_HALT: 1602 bsd_args.opt = RB_HALT; 1603 break; 1604 case REBOOT_RESTART: 1605 case REBOOT_RESTART2: 1606 bsd_args.opt = 0; 1607 break; 1608 case REBOOT_POWEROFF: 1609 bsd_args.opt = RB_POWEROFF; 1610 break; 1611 default: 1612 return (EINVAL); 1613 } 1614 return (sys_reboot(td, &bsd_args)); 1615 } 1616 1617 1618 int 1619 linux_getpid(struct thread *td, struct linux_getpid_args *args) 1620 { 1621 1622 td->td_retval[0] = td->td_proc->p_pid; 1623 1624 return (0); 1625 } 1626 1627 int 1628 linux_gettid(struct thread *td, struct linux_gettid_args *args) 1629 { 1630 struct linux_emuldata *em; 1631 1632 em = em_find(td); 1633 KASSERT(em != NULL, ("gettid: emuldata not found.\n")); 1634 1635 td->td_retval[0] = em->em_tid; 1636 1637 return (0); 1638 } 1639 1640 1641 int 1642 linux_getppid(struct thread *td, struct linux_getppid_args *args) 1643 { 1644 1645 td->td_retval[0] = kern_getppid(td); 1646 return (0); 1647 } 1648 1649 int 1650 linux_getgid(struct thread *td, struct linux_getgid_args *args) 1651 { 1652 1653 td->td_retval[0] = td->td_ucred->cr_rgid; 1654 return (0); 1655 } 1656 1657 int 1658 linux_getuid(struct thread *td, struct linux_getuid_args *args) 1659 { 1660 1661 td->td_retval[0] = td->td_ucred->cr_ruid; 1662 return (0); 1663 } 1664 1665 int 1666 linux_getsid(struct thread *td, struct linux_getsid_args *args) 1667 { 1668 1669 return (kern_getsid(td, args->pid)); 1670 } 1671 1672 int 1673 linux_nosys(struct thread *td, struct nosys_args *ignore) 1674 { 1675 1676 return (ENOSYS); 1677 } 1678 1679 int 1680 linux_getpriority(struct thread *td, struct linux_getpriority_args *args) 1681 { 1682 int error; 1683 1684 error = kern_getpriority(td, args->which, args->who); 1685 td->td_retval[0] = 20 - td->td_retval[0]; 1686 return (error); 1687 } 1688 1689 int 1690 linux_sethostname(struct thread *td, struct linux_sethostname_args *args) 1691 { 1692 int name[2]; 1693 1694 name[0] = CTL_KERN; 1695 name[1] = KERN_HOSTNAME; 1696 return (userland_sysctl(td, name, 2, 0, 0, 0, args->hostname, 1697 args->len, 0, 0)); 1698 } 1699 1700 int 1701 linux_setdomainname(struct thread *td, struct linux_setdomainname_args *args) 1702 { 1703 int name[2]; 1704 1705 name[0] = CTL_KERN; 1706 name[1] = KERN_NISDOMAINNAME; 1707 return (userland_sysctl(td, name, 2, 0, 0, 0, args->name, 1708 args->len, 0, 0)); 1709 } 1710 1711 int 1712 linux_exit_group(struct thread *td, struct linux_exit_group_args *args) 1713 { 1714 1715 LINUX_CTR2(exit_group, "thread(%d) (%d)", td->td_tid, 1716 args->error_code); 1717 1718 /* 1719 * XXX: we should send a signal to the parent if 1720 * SIGNAL_EXIT_GROUP is set. We ignore that (temporarily?) 1721 * as it doesnt occur often. 1722 */ 1723 exit1(td, args->error_code, 0); 1724 /* NOTREACHED */ 1725 } 1726 1727 #define _LINUX_CAPABILITY_VERSION_1 0x19980330 1728 #define _LINUX_CAPABILITY_VERSION_2 0x20071026 1729 #define _LINUX_CAPABILITY_VERSION_3 0x20080522 1730 1731 struct l_user_cap_header { 1732 l_int version; 1733 l_int pid; 1734 }; 1735 1736 struct l_user_cap_data { 1737 l_int effective; 1738 l_int permitted; 1739 l_int inheritable; 1740 }; 1741 1742 int 1743 linux_capget(struct thread *td, struct linux_capget_args *uap) 1744 { 1745 struct l_user_cap_header luch; 1746 struct l_user_cap_data lucd[2]; 1747 int error, u32s; 1748 1749 if (uap->hdrp == NULL) 1750 return (EFAULT); 1751 1752 error = copyin(uap->hdrp, &luch, sizeof(luch)); 1753 if (error != 0) 1754 return (error); 1755 1756 switch (luch.version) { 1757 case _LINUX_CAPABILITY_VERSION_1: 1758 u32s = 1; 1759 break; 1760 case _LINUX_CAPABILITY_VERSION_2: 1761 case _LINUX_CAPABILITY_VERSION_3: 1762 u32s = 2; 1763 break; 1764 default: 1765 luch.version = _LINUX_CAPABILITY_VERSION_1; 1766 error = copyout(&luch, uap->hdrp, sizeof(luch)); 1767 if (error) 1768 return (error); 1769 return (EINVAL); 1770 } 1771 1772 if (luch.pid) 1773 return (EPERM); 1774 1775 if (uap->datap) { 1776 /* 1777 * The current implementation doesn't support setting 1778 * a capability (it's essentially a stub) so indicate 1779 * that no capabilities are currently set or available 1780 * to request. 1781 */ 1782 memset(&lucd, 0, u32s * sizeof(lucd[0])); 1783 error = copyout(&lucd, uap->datap, u32s * sizeof(lucd[0])); 1784 } 1785 1786 return (error); 1787 } 1788 1789 int 1790 linux_capset(struct thread *td, struct linux_capset_args *uap) 1791 { 1792 struct l_user_cap_header luch; 1793 struct l_user_cap_data lucd[2]; 1794 int error, i, u32s; 1795 1796 if (uap->hdrp == NULL || uap->datap == NULL) 1797 return (EFAULT); 1798 1799 error = copyin(uap->hdrp, &luch, sizeof(luch)); 1800 if (error != 0) 1801 return (error); 1802 1803 switch (luch.version) { 1804 case _LINUX_CAPABILITY_VERSION_1: 1805 u32s = 1; 1806 break; 1807 case _LINUX_CAPABILITY_VERSION_2: 1808 case _LINUX_CAPABILITY_VERSION_3: 1809 u32s = 2; 1810 break; 1811 default: 1812 luch.version = _LINUX_CAPABILITY_VERSION_1; 1813 error = copyout(&luch, uap->hdrp, sizeof(luch)); 1814 if (error) 1815 return (error); 1816 return (EINVAL); 1817 } 1818 1819 if (luch.pid) 1820 return (EPERM); 1821 1822 error = copyin(uap->datap, &lucd, u32s * sizeof(lucd[0])); 1823 if (error != 0) 1824 return (error); 1825 1826 /* We currently don't support setting any capabilities. */ 1827 for (i = 0; i < u32s; i++) { 1828 if (lucd[i].effective || lucd[i].permitted || 1829 lucd[i].inheritable) { 1830 linux_msg(td, 1831 "capset[%d] effective=0x%x, permitted=0x%x, " 1832 "inheritable=0x%x is not implemented", i, 1833 (int)lucd[i].effective, (int)lucd[i].permitted, 1834 (int)lucd[i].inheritable); 1835 return (EPERM); 1836 } 1837 } 1838 1839 return (0); 1840 } 1841 1842 int 1843 linux_prctl(struct thread *td, struct linux_prctl_args *args) 1844 { 1845 int error = 0, max_size; 1846 struct proc *p = td->td_proc; 1847 char comm[LINUX_MAX_COMM_LEN]; 1848 int pdeath_signal; 1849 1850 switch (args->option) { 1851 case LINUX_PR_SET_PDEATHSIG: 1852 if (!LINUX_SIG_VALID(args->arg2)) 1853 return (EINVAL); 1854 pdeath_signal = linux_to_bsd_signal(args->arg2); 1855 return (kern_procctl(td, P_PID, 0, PROC_PDEATHSIG_CTL, 1856 &pdeath_signal)); 1857 case LINUX_PR_GET_PDEATHSIG: 1858 error = kern_procctl(td, P_PID, 0, PROC_PDEATHSIG_STATUS, 1859 &pdeath_signal); 1860 if (error != 0) 1861 return (error); 1862 pdeath_signal = bsd_to_linux_signal(pdeath_signal); 1863 return (copyout(&pdeath_signal, 1864 (void *)(register_t)args->arg2, 1865 sizeof(pdeath_signal))); 1866 break; 1867 case LINUX_PR_GET_KEEPCAPS: 1868 /* 1869 * Indicate that we always clear the effective and 1870 * permitted capability sets when the user id becomes 1871 * non-zero (actually the capability sets are simply 1872 * always zero in the current implementation). 1873 */ 1874 td->td_retval[0] = 0; 1875 break; 1876 case LINUX_PR_SET_KEEPCAPS: 1877 /* 1878 * Ignore requests to keep the effective and permitted 1879 * capability sets when the user id becomes non-zero. 1880 */ 1881 break; 1882 case LINUX_PR_SET_NAME: 1883 /* 1884 * To be on the safe side we need to make sure to not 1885 * overflow the size a Linux program expects. We already 1886 * do this here in the copyin, so that we don't need to 1887 * check on copyout. 1888 */ 1889 max_size = MIN(sizeof(comm), sizeof(p->p_comm)); 1890 error = copyinstr((void *)(register_t)args->arg2, comm, 1891 max_size, NULL); 1892 1893 /* Linux silently truncates the name if it is too long. */ 1894 if (error == ENAMETOOLONG) { 1895 /* 1896 * XXX: copyinstr() isn't documented to populate the 1897 * array completely, so do a copyin() to be on the 1898 * safe side. This should be changed in case 1899 * copyinstr() is changed to guarantee this. 1900 */ 1901 error = copyin((void *)(register_t)args->arg2, comm, 1902 max_size - 1); 1903 comm[max_size - 1] = '\0'; 1904 } 1905 if (error) 1906 return (error); 1907 1908 PROC_LOCK(p); 1909 strlcpy(p->p_comm, comm, sizeof(p->p_comm)); 1910 PROC_UNLOCK(p); 1911 break; 1912 case LINUX_PR_GET_NAME: 1913 PROC_LOCK(p); 1914 strlcpy(comm, p->p_comm, sizeof(comm)); 1915 PROC_UNLOCK(p); 1916 error = copyout(comm, (void *)(register_t)args->arg2, 1917 strlen(comm) + 1); 1918 break; 1919 default: 1920 error = EINVAL; 1921 break; 1922 } 1923 1924 return (error); 1925 } 1926 1927 int 1928 linux_sched_setparam(struct thread *td, 1929 struct linux_sched_setparam_args *uap) 1930 { 1931 struct sched_param sched_param; 1932 struct thread *tdt; 1933 int error, policy; 1934 1935 error = copyin(uap->param, &sched_param, sizeof(sched_param)); 1936 if (error) 1937 return (error); 1938 1939 tdt = linux_tdfind(td, uap->pid, -1); 1940 if (tdt == NULL) 1941 return (ESRCH); 1942 1943 if (linux_map_sched_prio) { 1944 error = kern_sched_getscheduler(td, tdt, &policy); 1945 if (error) 1946 goto out; 1947 1948 switch (policy) { 1949 case SCHED_OTHER: 1950 if (sched_param.sched_priority != 0) { 1951 error = EINVAL; 1952 goto out; 1953 } 1954 sched_param.sched_priority = 1955 PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE; 1956 break; 1957 case SCHED_FIFO: 1958 case SCHED_RR: 1959 if (sched_param.sched_priority < 1 || 1960 sched_param.sched_priority >= LINUX_MAX_RT_PRIO) { 1961 error = EINVAL; 1962 goto out; 1963 } 1964 /* 1965 * Map [1, LINUX_MAX_RT_PRIO - 1] to 1966 * [0, RTP_PRIO_MAX - RTP_PRIO_MIN] (rounding down). 1967 */ 1968 sched_param.sched_priority = 1969 (sched_param.sched_priority - 1) * 1970 (RTP_PRIO_MAX - RTP_PRIO_MIN + 1) / 1971 (LINUX_MAX_RT_PRIO - 1); 1972 break; 1973 } 1974 } 1975 1976 error = kern_sched_setparam(td, tdt, &sched_param); 1977 out: PROC_UNLOCK(tdt->td_proc); 1978 return (error); 1979 } 1980 1981 int 1982 linux_sched_getparam(struct thread *td, 1983 struct linux_sched_getparam_args *uap) 1984 { 1985 struct sched_param sched_param; 1986 struct thread *tdt; 1987 int error, policy; 1988 1989 tdt = linux_tdfind(td, uap->pid, -1); 1990 if (tdt == NULL) 1991 return (ESRCH); 1992 1993 error = kern_sched_getparam(td, tdt, &sched_param); 1994 if (error) { 1995 PROC_UNLOCK(tdt->td_proc); 1996 return (error); 1997 } 1998 1999 if (linux_map_sched_prio) { 2000 error = kern_sched_getscheduler(td, tdt, &policy); 2001 PROC_UNLOCK(tdt->td_proc); 2002 if (error) 2003 return (error); 2004 2005 switch (policy) { 2006 case SCHED_OTHER: 2007 sched_param.sched_priority = 0; 2008 break; 2009 case SCHED_FIFO: 2010 case SCHED_RR: 2011 /* 2012 * Map [0, RTP_PRIO_MAX - RTP_PRIO_MIN] to 2013 * [1, LINUX_MAX_RT_PRIO - 1] (rounding up). 2014 */ 2015 sched_param.sched_priority = 2016 (sched_param.sched_priority * 2017 (LINUX_MAX_RT_PRIO - 1) + 2018 (RTP_PRIO_MAX - RTP_PRIO_MIN - 1)) / 2019 (RTP_PRIO_MAX - RTP_PRIO_MIN) + 1; 2020 break; 2021 } 2022 } else 2023 PROC_UNLOCK(tdt->td_proc); 2024 2025 error = copyout(&sched_param, uap->param, sizeof(sched_param)); 2026 return (error); 2027 } 2028 2029 /* 2030 * Get affinity of a process. 2031 */ 2032 int 2033 linux_sched_getaffinity(struct thread *td, 2034 struct linux_sched_getaffinity_args *args) 2035 { 2036 int error; 2037 struct thread *tdt; 2038 2039 if (args->len < sizeof(cpuset_t)) 2040 return (EINVAL); 2041 2042 tdt = linux_tdfind(td, args->pid, -1); 2043 if (tdt == NULL) 2044 return (ESRCH); 2045 2046 PROC_UNLOCK(tdt->td_proc); 2047 2048 error = kern_cpuset_getaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID, 2049 tdt->td_tid, sizeof(cpuset_t), (cpuset_t *)args->user_mask_ptr); 2050 if (error == 0) 2051 td->td_retval[0] = sizeof(cpuset_t); 2052 2053 return (error); 2054 } 2055 2056 /* 2057 * Set affinity of a process. 2058 */ 2059 int 2060 linux_sched_setaffinity(struct thread *td, 2061 struct linux_sched_setaffinity_args *args) 2062 { 2063 struct thread *tdt; 2064 2065 if (args->len < sizeof(cpuset_t)) 2066 return (EINVAL); 2067 2068 tdt = linux_tdfind(td, args->pid, -1); 2069 if (tdt == NULL) 2070 return (ESRCH); 2071 2072 PROC_UNLOCK(tdt->td_proc); 2073 2074 return (kern_cpuset_setaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID, 2075 tdt->td_tid, sizeof(cpuset_t), (cpuset_t *) args->user_mask_ptr)); 2076 } 2077 2078 struct linux_rlimit64 { 2079 uint64_t rlim_cur; 2080 uint64_t rlim_max; 2081 }; 2082 2083 int 2084 linux_prlimit64(struct thread *td, struct linux_prlimit64_args *args) 2085 { 2086 struct rlimit rlim, nrlim; 2087 struct linux_rlimit64 lrlim; 2088 struct proc *p; 2089 u_int which; 2090 int flags; 2091 int error; 2092 2093 if (args->resource >= LINUX_RLIM_NLIMITS) 2094 return (EINVAL); 2095 2096 which = linux_to_bsd_resource[args->resource]; 2097 if (which == -1) 2098 return (EINVAL); 2099 2100 if (args->new != NULL) { 2101 /* 2102 * Note. Unlike FreeBSD where rlim is signed 64-bit Linux 2103 * rlim is unsigned 64-bit. FreeBSD treats negative limits 2104 * as INFINITY so we do not need a conversion even. 2105 */ 2106 error = copyin(args->new, &nrlim, sizeof(nrlim)); 2107 if (error != 0) 2108 return (error); 2109 } 2110 2111 flags = PGET_HOLD | PGET_NOTWEXIT; 2112 if (args->new != NULL) 2113 flags |= PGET_CANDEBUG; 2114 else 2115 flags |= PGET_CANSEE; 2116 if (args->pid == 0) { 2117 p = td->td_proc; 2118 PHOLD(p); 2119 } else { 2120 error = pget(args->pid, flags, &p); 2121 if (error != 0) 2122 return (error); 2123 } 2124 if (args->old != NULL) { 2125 PROC_LOCK(p); 2126 lim_rlimit_proc(p, which, &rlim); 2127 PROC_UNLOCK(p); 2128 if (rlim.rlim_cur == RLIM_INFINITY) 2129 lrlim.rlim_cur = LINUX_RLIM_INFINITY; 2130 else 2131 lrlim.rlim_cur = rlim.rlim_cur; 2132 if (rlim.rlim_max == RLIM_INFINITY) 2133 lrlim.rlim_max = LINUX_RLIM_INFINITY; 2134 else 2135 lrlim.rlim_max = rlim.rlim_max; 2136 error = copyout(&lrlim, args->old, sizeof(lrlim)); 2137 if (error != 0) 2138 goto out; 2139 } 2140 2141 if (args->new != NULL) 2142 error = kern_proc_setrlimit(td, p, which, &nrlim); 2143 2144 out: 2145 PRELE(p); 2146 return (error); 2147 } 2148 2149 int 2150 linux_pselect6(struct thread *td, struct linux_pselect6_args *args) 2151 { 2152 struct timeval utv, tv0, tv1, *tvp; 2153 struct l_pselect6arg lpse6; 2154 struct l_timespec lts; 2155 struct timespec uts; 2156 l_sigset_t l_ss; 2157 sigset_t *ssp; 2158 sigset_t ss; 2159 int error; 2160 2161 ssp = NULL; 2162 if (args->sig != NULL) { 2163 error = copyin(args->sig, &lpse6, sizeof(lpse6)); 2164 if (error != 0) 2165 return (error); 2166 if (lpse6.ss_len != sizeof(l_ss)) 2167 return (EINVAL); 2168 if (lpse6.ss != 0) { 2169 error = copyin(PTRIN(lpse6.ss), &l_ss, 2170 sizeof(l_ss)); 2171 if (error != 0) 2172 return (error); 2173 linux_to_bsd_sigset(&l_ss, &ss); 2174 ssp = &ss; 2175 } 2176 } 2177 2178 /* 2179 * Currently glibc changes nanosecond number to microsecond. 2180 * This mean losing precision but for now it is hardly seen. 2181 */ 2182 if (args->tsp != NULL) { 2183 error = copyin(args->tsp, <s, sizeof(lts)); 2184 if (error != 0) 2185 return (error); 2186 error = linux_to_native_timespec(&uts, <s); 2187 if (error != 0) 2188 return (error); 2189 2190 TIMESPEC_TO_TIMEVAL(&utv, &uts); 2191 if (itimerfix(&utv)) 2192 return (EINVAL); 2193 2194 microtime(&tv0); 2195 tvp = &utv; 2196 } else 2197 tvp = NULL; 2198 2199 error = kern_pselect(td, args->nfds, args->readfds, args->writefds, 2200 args->exceptfds, tvp, ssp, LINUX_NFDBITS); 2201 2202 if (error == 0 && args->tsp != NULL) { 2203 if (td->td_retval[0] != 0) { 2204 /* 2205 * Compute how much time was left of the timeout, 2206 * by subtracting the current time and the time 2207 * before we started the call, and subtracting 2208 * that result from the user-supplied value. 2209 */ 2210 2211 microtime(&tv1); 2212 timevalsub(&tv1, &tv0); 2213 timevalsub(&utv, &tv1); 2214 if (utv.tv_sec < 0) 2215 timevalclear(&utv); 2216 } else 2217 timevalclear(&utv); 2218 2219 TIMEVAL_TO_TIMESPEC(&utv, &uts); 2220 2221 error = native_to_linux_timespec(<s, &uts); 2222 if (error == 0) 2223 error = copyout(<s, args->tsp, sizeof(lts)); 2224 } 2225 2226 return (error); 2227 } 2228 2229 int 2230 linux_ppoll(struct thread *td, struct linux_ppoll_args *args) 2231 { 2232 struct timespec ts0, ts1; 2233 struct l_timespec lts; 2234 struct timespec uts, *tsp; 2235 l_sigset_t l_ss; 2236 sigset_t *ssp; 2237 sigset_t ss; 2238 int error; 2239 2240 if (args->sset != NULL) { 2241 if (args->ssize != sizeof(l_ss)) 2242 return (EINVAL); 2243 error = copyin(args->sset, &l_ss, sizeof(l_ss)); 2244 if (error) 2245 return (error); 2246 linux_to_bsd_sigset(&l_ss, &ss); 2247 ssp = &ss; 2248 } else 2249 ssp = NULL; 2250 if (args->tsp != NULL) { 2251 error = copyin(args->tsp, <s, sizeof(lts)); 2252 if (error) 2253 return (error); 2254 error = linux_to_native_timespec(&uts, <s); 2255 if (error != 0) 2256 return (error); 2257 2258 nanotime(&ts0); 2259 tsp = &uts; 2260 } else 2261 tsp = NULL; 2262 2263 error = kern_poll(td, args->fds, args->nfds, tsp, ssp); 2264 2265 if (error == 0 && args->tsp != NULL) { 2266 if (td->td_retval[0]) { 2267 nanotime(&ts1); 2268 timespecsub(&ts1, &ts0, &ts1); 2269 timespecsub(&uts, &ts1, &uts); 2270 if (uts.tv_sec < 0) 2271 timespecclear(&uts); 2272 } else 2273 timespecclear(&uts); 2274 2275 error = native_to_linux_timespec(<s, &uts); 2276 if (error == 0) 2277 error = copyout(<s, args->tsp, sizeof(lts)); 2278 } 2279 2280 return (error); 2281 } 2282 2283 int 2284 linux_sched_rr_get_interval(struct thread *td, 2285 struct linux_sched_rr_get_interval_args *uap) 2286 { 2287 struct timespec ts; 2288 struct l_timespec lts; 2289 struct thread *tdt; 2290 int error; 2291 2292 /* 2293 * According to man in case the invalid pid specified 2294 * EINVAL should be returned. 2295 */ 2296 if (uap->pid < 0) 2297 return (EINVAL); 2298 2299 tdt = linux_tdfind(td, uap->pid, -1); 2300 if (tdt == NULL) 2301 return (ESRCH); 2302 2303 error = kern_sched_rr_get_interval_td(td, tdt, &ts); 2304 PROC_UNLOCK(tdt->td_proc); 2305 if (error != 0) 2306 return (error); 2307 error = native_to_linux_timespec(<s, &ts); 2308 if (error != 0) 2309 return (error); 2310 return (copyout(<s, uap->interval, sizeof(lts))); 2311 } 2312 2313 /* 2314 * In case when the Linux thread is the initial thread in 2315 * the thread group thread id is equal to the process id. 2316 * Glibc depends on this magic (assert in pthread_getattr_np.c). 2317 */ 2318 struct thread * 2319 linux_tdfind(struct thread *td, lwpid_t tid, pid_t pid) 2320 { 2321 struct linux_emuldata *em; 2322 struct thread *tdt; 2323 struct proc *p; 2324 2325 tdt = NULL; 2326 if (tid == 0 || tid == td->td_tid) { 2327 tdt = td; 2328 PROC_LOCK(tdt->td_proc); 2329 } else if (tid > PID_MAX) 2330 tdt = tdfind(tid, pid); 2331 else { 2332 /* 2333 * Initial thread where the tid equal to the pid. 2334 */ 2335 p = pfind(tid); 2336 if (p != NULL) { 2337 if (SV_PROC_ABI(p) != SV_ABI_LINUX) { 2338 /* 2339 * p is not a Linuxulator process. 2340 */ 2341 PROC_UNLOCK(p); 2342 return (NULL); 2343 } 2344 FOREACH_THREAD_IN_PROC(p, tdt) { 2345 em = em_find(tdt); 2346 if (tid == em->em_tid) 2347 return (tdt); 2348 } 2349 PROC_UNLOCK(p); 2350 } 2351 return (NULL); 2352 } 2353 2354 return (tdt); 2355 } 2356 2357 void 2358 linux_to_bsd_waitopts(int options, int *bsdopts) 2359 { 2360 2361 if (options & LINUX_WNOHANG) 2362 *bsdopts |= WNOHANG; 2363 if (options & LINUX_WUNTRACED) 2364 *bsdopts |= WUNTRACED; 2365 if (options & LINUX_WEXITED) 2366 *bsdopts |= WEXITED; 2367 if (options & LINUX_WCONTINUED) 2368 *bsdopts |= WCONTINUED; 2369 if (options & LINUX_WNOWAIT) 2370 *bsdopts |= WNOWAIT; 2371 2372 if (options & __WCLONE) 2373 *bsdopts |= WLINUXCLONE; 2374 } 2375 2376 int 2377 linux_getrandom(struct thread *td, struct linux_getrandom_args *args) 2378 { 2379 struct uio uio; 2380 struct iovec iov; 2381 int error; 2382 2383 if (args->flags & ~(LINUX_GRND_NONBLOCK|LINUX_GRND_RANDOM)) 2384 return (EINVAL); 2385 if (args->count > INT_MAX) 2386 args->count = INT_MAX; 2387 2388 iov.iov_base = args->buf; 2389 iov.iov_len = args->count; 2390 2391 uio.uio_iov = &iov; 2392 uio.uio_iovcnt = 1; 2393 uio.uio_resid = iov.iov_len; 2394 uio.uio_segflg = UIO_USERSPACE; 2395 uio.uio_rw = UIO_READ; 2396 uio.uio_td = td; 2397 2398 error = read_random_uio(&uio, args->flags & LINUX_GRND_NONBLOCK); 2399 if (error == 0) 2400 td->td_retval[0] = args->count - uio.uio_resid; 2401 return (error); 2402 } 2403 2404 int 2405 linux_mincore(struct thread *td, struct linux_mincore_args *args) 2406 { 2407 2408 /* Needs to be page-aligned */ 2409 if (args->start & PAGE_MASK) 2410 return (EINVAL); 2411 return (kern_mincore(td, args->start, args->len, args->vec)); 2412 } 2413 2414 #define SYSLOG_TAG "<6>" 2415 2416 int 2417 linux_syslog(struct thread *td, struct linux_syslog_args *args) 2418 { 2419 char buf[128], *src, *dst; 2420 u_int seq; 2421 int buflen, error; 2422 2423 if (args->type != LINUX_SYSLOG_ACTION_READ_ALL) { 2424 linux_msg(td, "syslog unsupported type 0x%x", args->type); 2425 return (EINVAL); 2426 } 2427 2428 if (args->len < 6) { 2429 td->td_retval[0] = 0; 2430 return (0); 2431 } 2432 2433 error = priv_check(td, PRIV_MSGBUF); 2434 if (error) 2435 return (error); 2436 2437 mtx_lock(&msgbuf_lock); 2438 msgbuf_peekbytes(msgbufp, NULL, 0, &seq); 2439 mtx_unlock(&msgbuf_lock); 2440 2441 dst = args->buf; 2442 error = copyout(&SYSLOG_TAG, dst, sizeof(SYSLOG_TAG)); 2443 /* The -1 is to skip the trailing '\0'. */ 2444 dst += sizeof(SYSLOG_TAG) - 1; 2445 2446 while (error == 0) { 2447 mtx_lock(&msgbuf_lock); 2448 buflen = msgbuf_peekbytes(msgbufp, buf, sizeof(buf), &seq); 2449 mtx_unlock(&msgbuf_lock); 2450 2451 if (buflen == 0) 2452 break; 2453 2454 for (src = buf; src < buf + buflen && error == 0; src++) { 2455 if (*src == '\0') 2456 continue; 2457 2458 if (dst >= args->buf + args->len) 2459 goto out; 2460 2461 error = copyout(src, dst, 1); 2462 dst++; 2463 2464 if (*src == '\n' && *(src + 1) != '<' && 2465 dst + sizeof(SYSLOG_TAG) < args->buf + args->len) { 2466 error = copyout(&SYSLOG_TAG, 2467 dst, sizeof(SYSLOG_TAG)); 2468 dst += sizeof(SYSLOG_TAG) - 1; 2469 } 2470 } 2471 } 2472 out: 2473 td->td_retval[0] = dst - args->buf; 2474 return (error); 2475 } 2476 2477 int 2478 linux_getcpu(struct thread *td, struct linux_getcpu_args *args) 2479 { 2480 int cpu, error, node; 2481 2482 cpu = td->td_oncpu; /* Make sure it doesn't change during copyout(9) */ 2483 error = 0; 2484 node = cpuid_to_pcpu[cpu]->pc_domain; 2485 2486 if (args->cpu != NULL) 2487 error = copyout(&cpu, args->cpu, sizeof(l_int)); 2488 if (args->node != NULL) 2489 error = copyout(&node, args->node, sizeof(l_int)); 2490 return (error); 2491 } 2492