1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2002 Doug Rabson 5 * Copyright (c) 1994-1995 Søren Schmidt 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer 13 * in this position and unchanged. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. The name of the author may not be used to endorse or promote products 18 * derived from this software without specific prior written permission 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 23 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 29 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/param.h> 33 #include <sys/fcntl.h> 34 #include <sys/jail.h> 35 #include <sys/imgact.h> 36 #include <sys/limits.h> 37 #include <sys/lock.h> 38 #include <sys/msgbuf.h> 39 #include <sys/mutex.h> 40 #include <sys/poll.h> 41 #include <sys/priv.h> 42 #include <sys/proc.h> 43 #include <sys/procctl.h> 44 #include <sys/reboot.h> 45 #include <sys/random.h> 46 #include <sys/resourcevar.h> 47 #include <sys/rtprio.h> 48 #include <sys/sched.h> 49 #include <sys/smp.h> 50 #include <sys/stat.h> 51 #include <sys/syscallsubr.h> 52 #include <sys/sysctl.h> 53 #include <sys/sysent.h> 54 #include <sys/sysproto.h> 55 #include <sys/time.h> 56 #include <sys/vmmeter.h> 57 #include <sys/vnode.h> 58 59 #include <security/audit/audit.h> 60 #include <security/mac/mac_framework.h> 61 62 #include <vm/pmap.h> 63 #include <vm/vm_map.h> 64 #include <vm/swap_pager.h> 65 66 #ifdef COMPAT_LINUX32 67 #include <machine/../linux32/linux.h> 68 #include <machine/../linux32/linux32_proto.h> 69 #else 70 #include <machine/../linux/linux.h> 71 #include <machine/../linux/linux_proto.h> 72 #endif 73 74 #include <compat/linux/linux_common.h> 75 #include <compat/linux/linux_dtrace.h> 76 #include <compat/linux/linux_file.h> 77 #include <compat/linux/linux_mib.h> 78 #include <compat/linux/linux_mmap.h> 79 #include <compat/linux/linux_signal.h> 80 #include <compat/linux/linux_time.h> 81 #include <compat/linux/linux_util.h> 82 #include <compat/linux/linux_emul.h> 83 #include <compat/linux/linux_misc.h> 84 85 int stclohz; /* Statistics clock frequency */ 86 87 static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] = { 88 RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK, 89 RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE, 90 RLIMIT_MEMLOCK, RLIMIT_AS 91 }; 92 93 struct l_sysinfo { 94 l_long uptime; /* Seconds since boot */ 95 l_ulong loads[3]; /* 1, 5, and 15 minute load averages */ 96 #define LINUX_SYSINFO_LOADS_SCALE 65536 97 l_ulong totalram; /* Total usable main memory size */ 98 l_ulong freeram; /* Available memory size */ 99 l_ulong sharedram; /* Amount of shared memory */ 100 l_ulong bufferram; /* Memory used by buffers */ 101 l_ulong totalswap; /* Total swap space size */ 102 l_ulong freeswap; /* swap space still available */ 103 l_ushort procs; /* Number of current processes */ 104 l_ushort pads; 105 l_ulong totalhigh; 106 l_ulong freehigh; 107 l_uint mem_unit; 108 char _f[20-2*sizeof(l_long)-sizeof(l_int)]; /* padding */ 109 }; 110 111 struct l_pselect6arg { 112 l_uintptr_t ss; 113 l_size_t ss_len; 114 }; 115 116 static int linux_utimensat_lts_to_ts(struct l_timespec *, 117 struct timespec *); 118 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 119 static int linux_utimensat_lts64_to_ts(struct l_timespec64 *, 120 struct timespec *); 121 #endif 122 static int linux_common_utimensat(struct thread *, int, 123 const char *, struct timespec *, int); 124 static int linux_common_pselect6(struct thread *, l_int, 125 l_fd_set *, l_fd_set *, l_fd_set *, 126 struct timespec *, l_uintptr_t *); 127 static int linux_common_ppoll(struct thread *, struct pollfd *, 128 uint32_t, struct timespec *, l_sigset_t *, 129 l_size_t); 130 static int linux_pollin(struct thread *, struct pollfd *, 131 struct pollfd *, u_int); 132 static int linux_pollout(struct thread *, struct pollfd *, 133 struct pollfd *, u_int); 134 135 int 136 linux_sysinfo(struct thread *td, struct linux_sysinfo_args *args) 137 { 138 struct l_sysinfo sysinfo; 139 int i, j; 140 struct timespec ts; 141 142 bzero(&sysinfo, sizeof(sysinfo)); 143 getnanouptime(&ts); 144 if (ts.tv_nsec != 0) 145 ts.tv_sec++; 146 sysinfo.uptime = ts.tv_sec; 147 148 /* Use the information from the mib to get our load averages */ 149 for (i = 0; i < 3; i++) 150 sysinfo.loads[i] = averunnable.ldavg[i] * 151 LINUX_SYSINFO_LOADS_SCALE / averunnable.fscale; 152 153 sysinfo.totalram = physmem * PAGE_SIZE; 154 sysinfo.freeram = (u_long)vm_free_count() * PAGE_SIZE; 155 156 /* 157 * sharedram counts pages allocated to named, swap-backed objects such 158 * as shared memory segments and tmpfs files. There is no cheap way to 159 * compute this, so just leave the field unpopulated. Linux itself only 160 * started setting this field in the 3.x timeframe. 161 */ 162 sysinfo.sharedram = 0; 163 sysinfo.bufferram = 0; 164 165 swap_pager_status(&i, &j); 166 sysinfo.totalswap = i * PAGE_SIZE; 167 sysinfo.freeswap = (i - j) * PAGE_SIZE; 168 169 sysinfo.procs = nprocs; 170 171 /* 172 * Platforms supported by the emulation layer do not have a notion of 173 * high memory. 174 */ 175 sysinfo.totalhigh = 0; 176 sysinfo.freehigh = 0; 177 178 sysinfo.mem_unit = 1; 179 180 return (copyout(&sysinfo, args->info, sizeof(sysinfo))); 181 } 182 183 #ifdef LINUX_LEGACY_SYSCALLS 184 int 185 linux_alarm(struct thread *td, struct linux_alarm_args *args) 186 { 187 struct itimerval it, old_it; 188 u_int secs; 189 int error __diagused; 190 191 secs = args->secs; 192 /* 193 * Linux alarm() is always successful. Limit secs to INT32_MAX / 2 194 * to match kern_setitimer()'s limit to avoid error from it. 195 * 196 * XXX. Linux limit secs to INT_MAX on 32 and does not limit on 64-bit 197 * platforms. 198 */ 199 if (secs > INT32_MAX / 2) 200 secs = INT32_MAX / 2; 201 202 it.it_value.tv_sec = secs; 203 it.it_value.tv_usec = 0; 204 timevalclear(&it.it_interval); 205 error = kern_setitimer(td, ITIMER_REAL, &it, &old_it); 206 KASSERT(error == 0, ("kern_setitimer returns %d", error)); 207 208 if ((old_it.it_value.tv_sec == 0 && old_it.it_value.tv_usec > 0) || 209 old_it.it_value.tv_usec >= 500000) 210 old_it.it_value.tv_sec++; 211 td->td_retval[0] = old_it.it_value.tv_sec; 212 return (0); 213 } 214 #endif 215 216 int 217 linux_brk(struct thread *td, struct linux_brk_args *args) 218 { 219 struct vmspace *vm = td->td_proc->p_vmspace; 220 uintptr_t new, old; 221 222 old = (uintptr_t)vm->vm_daddr + ctob(vm->vm_dsize); 223 new = (uintptr_t)args->dsend; 224 if ((caddr_t)new > vm->vm_daddr && !kern_break(td, &new)) 225 td->td_retval[0] = (register_t)new; 226 else 227 td->td_retval[0] = (register_t)old; 228 229 return (0); 230 } 231 232 #ifdef LINUX_LEGACY_SYSCALLS 233 int 234 linux_select(struct thread *td, struct linux_select_args *args) 235 { 236 l_timeval ltv; 237 struct timeval tv0, tv1, utv, *tvp; 238 int error; 239 240 /* 241 * Store current time for computation of the amount of 242 * time left. 243 */ 244 if (args->timeout) { 245 if ((error = copyin(args->timeout, <v, sizeof(ltv)))) 246 goto select_out; 247 utv.tv_sec = ltv.tv_sec; 248 utv.tv_usec = ltv.tv_usec; 249 250 if (itimerfix(&utv)) { 251 /* 252 * The timeval was invalid. Convert it to something 253 * valid that will act as it does under Linux. 254 */ 255 utv.tv_sec += utv.tv_usec / 1000000; 256 utv.tv_usec %= 1000000; 257 if (utv.tv_usec < 0) { 258 utv.tv_sec -= 1; 259 utv.tv_usec += 1000000; 260 } 261 if (utv.tv_sec < 0) 262 timevalclear(&utv); 263 } 264 microtime(&tv0); 265 tvp = &utv; 266 } else 267 tvp = NULL; 268 269 error = kern_select(td, args->nfds, args->readfds, args->writefds, 270 args->exceptfds, tvp, LINUX_NFDBITS); 271 if (error) 272 goto select_out; 273 274 if (args->timeout) { 275 if (td->td_retval[0]) { 276 /* 277 * Compute how much time was left of the timeout, 278 * by subtracting the current time and the time 279 * before we started the call, and subtracting 280 * that result from the user-supplied value. 281 */ 282 microtime(&tv1); 283 timevalsub(&tv1, &tv0); 284 timevalsub(&utv, &tv1); 285 if (utv.tv_sec < 0) 286 timevalclear(&utv); 287 } else 288 timevalclear(&utv); 289 ltv.tv_sec = utv.tv_sec; 290 ltv.tv_usec = utv.tv_usec; 291 if ((error = copyout(<v, args->timeout, sizeof(ltv)))) 292 goto select_out; 293 } 294 295 select_out: 296 return (error); 297 } 298 #endif 299 300 int 301 linux_mremap(struct thread *td, struct linux_mremap_args *args) 302 { 303 uintptr_t addr; 304 size_t len; 305 int error = 0; 306 307 if (args->flags & ~(LINUX_MREMAP_FIXED | LINUX_MREMAP_MAYMOVE)) { 308 td->td_retval[0] = 0; 309 return (EINVAL); 310 } 311 312 /* 313 * Check for the page alignment. 314 * Linux defines PAGE_MASK to be FreeBSD ~PAGE_MASK. 315 */ 316 if (args->addr & PAGE_MASK) { 317 td->td_retval[0] = 0; 318 return (EINVAL); 319 } 320 321 args->new_len = round_page(args->new_len); 322 args->old_len = round_page(args->old_len); 323 324 if (args->new_len > args->old_len) { 325 td->td_retval[0] = 0; 326 return (ENOMEM); 327 } 328 329 if (args->new_len < args->old_len) { 330 addr = args->addr + args->new_len; 331 len = args->old_len - args->new_len; 332 error = kern_munmap(td, addr, len); 333 } 334 335 td->td_retval[0] = error ? 0 : (uintptr_t)args->addr; 336 return (error); 337 } 338 339 #define LINUX_MS_ASYNC 0x0001 340 #define LINUX_MS_INVALIDATE 0x0002 341 #define LINUX_MS_SYNC 0x0004 342 343 int 344 linux_msync(struct thread *td, struct linux_msync_args *args) 345 { 346 347 return (kern_msync(td, args->addr, args->len, 348 args->fl & ~LINUX_MS_SYNC)); 349 } 350 351 int 352 linux_mprotect(struct thread *td, struct linux_mprotect_args *uap) 353 { 354 355 return (linux_mprotect_common(td, PTROUT(uap->addr), uap->len, 356 uap->prot)); 357 } 358 359 int 360 linux_madvise(struct thread *td, struct linux_madvise_args *uap) 361 { 362 363 return (linux_madvise_common(td, PTROUT(uap->addr), uap->len, 364 uap->behav)); 365 } 366 367 int 368 linux_mmap2(struct thread *td, struct linux_mmap2_args *uap) 369 { 370 #if defined(LINUX_ARCHWANT_MMAP2PGOFF) 371 /* 372 * For architectures with sizeof (off_t) < sizeof (loff_t) mmap is 373 * implemented with mmap2 syscall and the offset is represented in 374 * multiples of page size. 375 */ 376 return (linux_mmap_common(td, PTROUT(uap->addr), uap->len, uap->prot, 377 uap->flags, uap->fd, (uint64_t)(uint32_t)uap->pgoff * PAGE_SIZE)); 378 #else 379 return (linux_mmap_common(td, PTROUT(uap->addr), uap->len, uap->prot, 380 uap->flags, uap->fd, uap->pgoff)); 381 #endif 382 } 383 384 #ifdef LINUX_LEGACY_SYSCALLS 385 int 386 linux_time(struct thread *td, struct linux_time_args *args) 387 { 388 struct timeval tv; 389 l_time_t tm; 390 int error; 391 392 microtime(&tv); 393 tm = tv.tv_sec; 394 if (args->tm && (error = copyout(&tm, args->tm, sizeof(tm)))) 395 return (error); 396 td->td_retval[0] = tm; 397 return (0); 398 } 399 #endif 400 401 struct l_times_argv { 402 l_clock_t tms_utime; 403 l_clock_t tms_stime; 404 l_clock_t tms_cutime; 405 l_clock_t tms_cstime; 406 }; 407 408 /* 409 * Glibc versions prior to 2.2.1 always use hard-coded CLK_TCK value. 410 * Since 2.2.1 Glibc uses value exported from kernel via AT_CLKTCK 411 * auxiliary vector entry. 412 */ 413 #define CLK_TCK 100 414 415 #define CONVOTCK(r) (r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK)) 416 #define CONVNTCK(r) (r.tv_sec * stclohz + r.tv_usec / (1000000 / stclohz)) 417 418 #define CONVTCK(r) (linux_kernver(td) >= LINUX_KERNVER(2,4,0) ? \ 419 CONVNTCK(r) : CONVOTCK(r)) 420 421 int 422 linux_times(struct thread *td, struct linux_times_args *args) 423 { 424 struct timeval tv, utime, stime, cutime, cstime; 425 struct l_times_argv tms; 426 struct proc *p; 427 int error; 428 429 if (args->buf != NULL) { 430 p = td->td_proc; 431 PROC_LOCK(p); 432 PROC_STATLOCK(p); 433 calcru(p, &utime, &stime); 434 PROC_STATUNLOCK(p); 435 calccru(p, &cutime, &cstime); 436 PROC_UNLOCK(p); 437 438 tms.tms_utime = CONVTCK(utime); 439 tms.tms_stime = CONVTCK(stime); 440 441 tms.tms_cutime = CONVTCK(cutime); 442 tms.tms_cstime = CONVTCK(cstime); 443 444 if ((error = copyout(&tms, args->buf, sizeof(tms)))) 445 return (error); 446 } 447 448 microuptime(&tv); 449 td->td_retval[0] = (int)CONVTCK(tv); 450 return (0); 451 } 452 453 int 454 linux_newuname(struct thread *td, struct linux_newuname_args *args) 455 { 456 struct l_new_utsname utsname; 457 char osname[LINUX_MAX_UTSNAME]; 458 char osrelease[LINUX_MAX_UTSNAME]; 459 char *p; 460 461 linux_get_osname(td, osname); 462 linux_get_osrelease(td, osrelease); 463 464 bzero(&utsname, sizeof(utsname)); 465 strlcpy(utsname.sysname, osname, LINUX_MAX_UTSNAME); 466 getcredhostname(td->td_ucred, utsname.nodename, LINUX_MAX_UTSNAME); 467 getcreddomainname(td->td_ucred, utsname.domainname, LINUX_MAX_UTSNAME); 468 strlcpy(utsname.release, osrelease, LINUX_MAX_UTSNAME); 469 strlcpy(utsname.version, version, LINUX_MAX_UTSNAME); 470 for (p = utsname.version; *p != '\0'; ++p) 471 if (*p == '\n') { 472 *p = '\0'; 473 break; 474 } 475 #if defined(__amd64__) 476 /* 477 * On amd64, Linux uname(2) needs to return "x86_64" 478 * for both 64-bit and 32-bit applications. On 32-bit, 479 * the string returned by getauxval(AT_PLATFORM) needs 480 * to remain "i686", though. 481 */ 482 #if defined(COMPAT_LINUX32) 483 if (linux32_emulate_i386) 484 strlcpy(utsname.machine, "i686", LINUX_MAX_UTSNAME); 485 else 486 #endif 487 strlcpy(utsname.machine, "x86_64", LINUX_MAX_UTSNAME); 488 #elif defined(__aarch64__) 489 strlcpy(utsname.machine, "aarch64", LINUX_MAX_UTSNAME); 490 #elif defined(__i386__) 491 strlcpy(utsname.machine, "i686", LINUX_MAX_UTSNAME); 492 #endif 493 494 return (copyout(&utsname, args->buf, sizeof(utsname))); 495 } 496 497 struct l_utimbuf { 498 l_time_t l_actime; 499 l_time_t l_modtime; 500 }; 501 502 #ifdef LINUX_LEGACY_SYSCALLS 503 int 504 linux_utime(struct thread *td, struct linux_utime_args *args) 505 { 506 struct timeval tv[2], *tvp; 507 struct l_utimbuf lut; 508 int error; 509 510 if (args->times) { 511 if ((error = copyin(args->times, &lut, sizeof lut)) != 0) 512 return (error); 513 tv[0].tv_sec = lut.l_actime; 514 tv[0].tv_usec = 0; 515 tv[1].tv_sec = lut.l_modtime; 516 tv[1].tv_usec = 0; 517 tvp = tv; 518 } else 519 tvp = NULL; 520 521 return (kern_utimesat(td, AT_FDCWD, args->fname, UIO_USERSPACE, 522 tvp, UIO_SYSSPACE)); 523 } 524 #endif 525 526 #ifdef LINUX_LEGACY_SYSCALLS 527 int 528 linux_utimes(struct thread *td, struct linux_utimes_args *args) 529 { 530 l_timeval ltv[2]; 531 struct timeval tv[2], *tvp = NULL; 532 int error; 533 534 if (args->tptr != NULL) { 535 if ((error = copyin(args->tptr, ltv, sizeof ltv)) != 0) 536 return (error); 537 tv[0].tv_sec = ltv[0].tv_sec; 538 tv[0].tv_usec = ltv[0].tv_usec; 539 tv[1].tv_sec = ltv[1].tv_sec; 540 tv[1].tv_usec = ltv[1].tv_usec; 541 tvp = tv; 542 } 543 544 return (kern_utimesat(td, AT_FDCWD, args->fname, UIO_USERSPACE, 545 tvp, UIO_SYSSPACE)); 546 } 547 #endif 548 549 static int 550 linux_utimensat_lts_to_ts(struct l_timespec *l_times, struct timespec *times) 551 { 552 553 if (l_times->tv_nsec != LINUX_UTIME_OMIT && 554 l_times->tv_nsec != LINUX_UTIME_NOW && 555 (l_times->tv_nsec < 0 || l_times->tv_nsec > 999999999)) 556 return (EINVAL); 557 558 times->tv_sec = l_times->tv_sec; 559 switch (l_times->tv_nsec) 560 { 561 case LINUX_UTIME_OMIT: 562 times->tv_nsec = UTIME_OMIT; 563 break; 564 case LINUX_UTIME_NOW: 565 times->tv_nsec = UTIME_NOW; 566 break; 567 default: 568 times->tv_nsec = l_times->tv_nsec; 569 } 570 571 return (0); 572 } 573 574 static int 575 linux_common_utimensat(struct thread *td, int ldfd, const char *pathname, 576 struct timespec *timesp, int lflags) 577 { 578 int dfd, flags = 0; 579 580 dfd = (ldfd == LINUX_AT_FDCWD) ? AT_FDCWD : ldfd; 581 582 if (lflags & ~(LINUX_AT_SYMLINK_NOFOLLOW | LINUX_AT_EMPTY_PATH)) 583 return (EINVAL); 584 585 if (timesp != NULL) { 586 /* This breaks POSIX, but is what the Linux kernel does 587 * _on purpose_ (documented in the man page for utimensat(2)), 588 * so we must follow that behaviour. */ 589 if (timesp[0].tv_nsec == UTIME_OMIT && 590 timesp[1].tv_nsec == UTIME_OMIT) 591 return (0); 592 } 593 594 if (lflags & LINUX_AT_SYMLINK_NOFOLLOW) 595 flags |= AT_SYMLINK_NOFOLLOW; 596 if (lflags & LINUX_AT_EMPTY_PATH) 597 flags |= AT_EMPTY_PATH; 598 599 if (pathname != NULL) 600 return (kern_utimensat(td, dfd, pathname, 601 UIO_USERSPACE, timesp, UIO_SYSSPACE, flags)); 602 603 if (lflags != 0) 604 return (EINVAL); 605 606 return (kern_futimens(td, dfd, timesp, UIO_SYSSPACE)); 607 } 608 609 int 610 linux_utimensat(struct thread *td, struct linux_utimensat_args *args) 611 { 612 struct l_timespec l_times[2]; 613 struct timespec times[2], *timesp; 614 int error; 615 616 if (args->times != NULL) { 617 error = copyin(args->times, l_times, sizeof(l_times)); 618 if (error != 0) 619 return (error); 620 621 error = linux_utimensat_lts_to_ts(&l_times[0], ×[0]); 622 if (error != 0) 623 return (error); 624 error = linux_utimensat_lts_to_ts(&l_times[1], ×[1]); 625 if (error != 0) 626 return (error); 627 timesp = times; 628 } else 629 timesp = NULL; 630 631 return (linux_common_utimensat(td, args->dfd, args->pathname, 632 timesp, args->flags)); 633 } 634 635 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 636 static int 637 linux_utimensat_lts64_to_ts(struct l_timespec64 *l_times, struct timespec *times) 638 { 639 640 /* Zero out the padding in compat mode. */ 641 l_times->tv_nsec &= 0xFFFFFFFFUL; 642 643 if (l_times->tv_nsec != LINUX_UTIME_OMIT && 644 l_times->tv_nsec != LINUX_UTIME_NOW && 645 (l_times->tv_nsec < 0 || l_times->tv_nsec > 999999999)) 646 return (EINVAL); 647 648 times->tv_sec = l_times->tv_sec; 649 switch (l_times->tv_nsec) 650 { 651 case LINUX_UTIME_OMIT: 652 times->tv_nsec = UTIME_OMIT; 653 break; 654 case LINUX_UTIME_NOW: 655 times->tv_nsec = UTIME_NOW; 656 break; 657 default: 658 times->tv_nsec = l_times->tv_nsec; 659 } 660 661 return (0); 662 } 663 664 int 665 linux_utimensat_time64(struct thread *td, struct linux_utimensat_time64_args *args) 666 { 667 struct l_timespec64 l_times[2]; 668 struct timespec times[2], *timesp; 669 int error; 670 671 if (args->times64 != NULL) { 672 error = copyin(args->times64, l_times, sizeof(l_times)); 673 if (error != 0) 674 return (error); 675 676 error = linux_utimensat_lts64_to_ts(&l_times[0], ×[0]); 677 if (error != 0) 678 return (error); 679 error = linux_utimensat_lts64_to_ts(&l_times[1], ×[1]); 680 if (error != 0) 681 return (error); 682 timesp = times; 683 } else 684 timesp = NULL; 685 686 return (linux_common_utimensat(td, args->dfd, args->pathname, 687 timesp, args->flags)); 688 } 689 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 690 691 #ifdef LINUX_LEGACY_SYSCALLS 692 int 693 linux_futimesat(struct thread *td, struct linux_futimesat_args *args) 694 { 695 l_timeval ltv[2]; 696 struct timeval tv[2], *tvp = NULL; 697 int error, dfd; 698 699 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; 700 701 if (args->utimes != NULL) { 702 if ((error = copyin(args->utimes, ltv, sizeof ltv)) != 0) 703 return (error); 704 tv[0].tv_sec = ltv[0].tv_sec; 705 tv[0].tv_usec = ltv[0].tv_usec; 706 tv[1].tv_sec = ltv[1].tv_sec; 707 tv[1].tv_usec = ltv[1].tv_usec; 708 tvp = tv; 709 } 710 711 return (kern_utimesat(td, dfd, args->filename, UIO_USERSPACE, 712 tvp, UIO_SYSSPACE)); 713 } 714 #endif 715 716 static int 717 linux_common_wait(struct thread *td, idtype_t idtype, int id, int *statusp, 718 int options, void *rup, l_siginfo_t *infop) 719 { 720 l_siginfo_t lsi; 721 siginfo_t siginfo; 722 struct __wrusage wru; 723 int error, status, tmpstat, sig; 724 725 error = kern_wait6(td, idtype, id, &status, options, 726 rup != NULL ? &wru : NULL, &siginfo); 727 728 if (error == 0 && statusp) { 729 tmpstat = status & 0xffff; 730 if (WIFSIGNALED(tmpstat)) { 731 tmpstat = (tmpstat & 0xffffff80) | 732 bsd_to_linux_signal(WTERMSIG(tmpstat)); 733 } else if (WIFSTOPPED(tmpstat)) { 734 tmpstat = (tmpstat & 0xffff00ff) | 735 (bsd_to_linux_signal(WSTOPSIG(tmpstat)) << 8); 736 #if defined(__aarch64__) || (defined(__amd64__) && !defined(COMPAT_LINUX32)) 737 if (WSTOPSIG(status) == SIGTRAP) { 738 tmpstat = linux_ptrace_status(td, 739 siginfo.si_pid, tmpstat); 740 } 741 #endif 742 } else if (WIFCONTINUED(tmpstat)) { 743 tmpstat = 0xffff; 744 } 745 error = copyout(&tmpstat, statusp, sizeof(int)); 746 } 747 if (error == 0 && rup != NULL) 748 error = linux_copyout_rusage(&wru.wru_self, rup); 749 if (error == 0 && infop != NULL && td->td_retval[0] != 0) { 750 sig = bsd_to_linux_signal(siginfo.si_signo); 751 siginfo_to_lsiginfo(&siginfo, &lsi, sig); 752 error = copyout(&lsi, infop, sizeof(lsi)); 753 } 754 755 return (error); 756 } 757 758 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 759 int 760 linux_waitpid(struct thread *td, struct linux_waitpid_args *args) 761 { 762 struct linux_wait4_args wait4_args = { 763 .pid = args->pid, 764 .status = args->status, 765 .options = args->options, 766 .rusage = NULL, 767 }; 768 769 return (linux_wait4(td, &wait4_args)); 770 } 771 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 772 773 int 774 linux_wait4(struct thread *td, struct linux_wait4_args *args) 775 { 776 struct proc *p; 777 int options, id, idtype; 778 779 if (args->options & ~(LINUX_WUNTRACED | LINUX_WNOHANG | 780 LINUX_WCONTINUED | __WCLONE | __WNOTHREAD | __WALL)) 781 return (EINVAL); 782 783 /* -INT_MIN is not defined. */ 784 if (args->pid == INT_MIN) 785 return (ESRCH); 786 787 options = 0; 788 linux_to_bsd_waitopts(args->options, &options); 789 790 /* 791 * For backward compatibility we implicitly add flags WEXITED 792 * and WTRAPPED here. 793 */ 794 options |= WEXITED | WTRAPPED; 795 796 if (args->pid == WAIT_ANY) { 797 idtype = P_ALL; 798 id = 0; 799 } else if (args->pid < 0) { 800 idtype = P_PGID; 801 id = (id_t)-args->pid; 802 } else if (args->pid == 0) { 803 idtype = P_PGID; 804 p = td->td_proc; 805 PROC_LOCK(p); 806 id = p->p_pgid; 807 PROC_UNLOCK(p); 808 } else { 809 idtype = P_PID; 810 id = (id_t)args->pid; 811 } 812 813 return (linux_common_wait(td, idtype, id, args->status, options, 814 args->rusage, NULL)); 815 } 816 817 int 818 linux_waitid(struct thread *td, struct linux_waitid_args *args) 819 { 820 idtype_t idtype; 821 int error, options; 822 struct proc *p; 823 pid_t id; 824 825 if (args->options & ~(LINUX_WNOHANG | LINUX_WNOWAIT | LINUX_WEXITED | 826 LINUX_WSTOPPED | LINUX_WCONTINUED | __WCLONE | __WNOTHREAD | __WALL)) 827 return (EINVAL); 828 829 options = 0; 830 linux_to_bsd_waitopts(args->options, &options); 831 832 id = args->id; 833 switch (args->idtype) { 834 case LINUX_P_ALL: 835 idtype = P_ALL; 836 break; 837 case LINUX_P_PID: 838 if (args->id <= 0) 839 return (EINVAL); 840 idtype = P_PID; 841 break; 842 case LINUX_P_PGID: 843 if (linux_kernver(td) >= LINUX_KERNVER(5,4,0) && args->id == 0) { 844 p = td->td_proc; 845 PROC_LOCK(p); 846 id = p->p_pgid; 847 PROC_UNLOCK(p); 848 } else if (args->id <= 0) 849 return (EINVAL); 850 idtype = P_PGID; 851 break; 852 case LINUX_P_PIDFD: 853 LINUX_RATELIMIT_MSG("unsupported waitid P_PIDFD idtype"); 854 return (ENOSYS); 855 default: 856 return (EINVAL); 857 } 858 859 error = linux_common_wait(td, idtype, id, NULL, options, 860 args->rusage, args->info); 861 td->td_retval[0] = 0; 862 863 return (error); 864 } 865 866 #ifdef LINUX_LEGACY_SYSCALLS 867 int 868 linux_mknod(struct thread *td, struct linux_mknod_args *args) 869 { 870 int error; 871 872 switch (args->mode & S_IFMT) { 873 case S_IFIFO: 874 case S_IFSOCK: 875 error = kern_mkfifoat(td, AT_FDCWD, args->path, UIO_USERSPACE, 876 args->mode); 877 break; 878 879 case S_IFCHR: 880 case S_IFBLK: 881 error = kern_mknodat(td, AT_FDCWD, args->path, UIO_USERSPACE, 882 args->mode, linux_decode_dev(args->dev)); 883 break; 884 885 case S_IFDIR: 886 error = EPERM; 887 break; 888 889 case 0: 890 args->mode |= S_IFREG; 891 /* FALLTHROUGH */ 892 case S_IFREG: 893 error = kern_openat(td, AT_FDCWD, args->path, UIO_USERSPACE, 894 O_WRONLY | O_CREAT | O_TRUNC, args->mode); 895 if (error == 0) 896 kern_close(td, td->td_retval[0]); 897 break; 898 899 default: 900 error = EINVAL; 901 break; 902 } 903 return (error); 904 } 905 #endif 906 907 int 908 linux_mknodat(struct thread *td, struct linux_mknodat_args *args) 909 { 910 int error, dfd; 911 912 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; 913 914 switch (args->mode & S_IFMT) { 915 case S_IFIFO: 916 case S_IFSOCK: 917 error = kern_mkfifoat(td, dfd, args->filename, UIO_USERSPACE, 918 args->mode); 919 break; 920 921 case S_IFCHR: 922 case S_IFBLK: 923 error = kern_mknodat(td, dfd, args->filename, UIO_USERSPACE, 924 args->mode, linux_decode_dev(args->dev)); 925 break; 926 927 case S_IFDIR: 928 error = EPERM; 929 break; 930 931 case 0: 932 args->mode |= S_IFREG; 933 /* FALLTHROUGH */ 934 case S_IFREG: 935 error = kern_openat(td, dfd, args->filename, UIO_USERSPACE, 936 O_WRONLY | O_CREAT | O_TRUNC, args->mode); 937 if (error == 0) 938 kern_close(td, td->td_retval[0]); 939 break; 940 941 default: 942 error = EINVAL; 943 break; 944 } 945 return (error); 946 } 947 948 /* 949 * UGH! This is just about the dumbest idea I've ever heard!! 950 */ 951 int 952 linux_personality(struct thread *td, struct linux_personality_args *args) 953 { 954 struct linux_pemuldata *pem; 955 struct proc *p = td->td_proc; 956 uint32_t old; 957 958 PROC_LOCK(p); 959 pem = pem_find(p); 960 old = pem->persona; 961 if (args->per != 0xffffffff) 962 pem->persona = args->per; 963 PROC_UNLOCK(p); 964 965 td->td_retval[0] = old; 966 return (0); 967 } 968 969 struct l_itimerval { 970 l_timeval it_interval; 971 l_timeval it_value; 972 }; 973 974 #define B2L_ITIMERVAL(bip, lip) \ 975 (bip)->it_interval.tv_sec = (lip)->it_interval.tv_sec; \ 976 (bip)->it_interval.tv_usec = (lip)->it_interval.tv_usec; \ 977 (bip)->it_value.tv_sec = (lip)->it_value.tv_sec; \ 978 (bip)->it_value.tv_usec = (lip)->it_value.tv_usec; 979 980 int 981 linux_setitimer(struct thread *td, struct linux_setitimer_args *uap) 982 { 983 int error; 984 struct l_itimerval ls; 985 struct itimerval aitv, oitv; 986 987 if (uap->itv == NULL) { 988 uap->itv = uap->oitv; 989 return (linux_getitimer(td, (struct linux_getitimer_args *)uap)); 990 } 991 992 error = copyin(uap->itv, &ls, sizeof(ls)); 993 if (error != 0) 994 return (error); 995 B2L_ITIMERVAL(&aitv, &ls); 996 error = kern_setitimer(td, uap->which, &aitv, &oitv); 997 if (error != 0 || uap->oitv == NULL) 998 return (error); 999 B2L_ITIMERVAL(&ls, &oitv); 1000 1001 return (copyout(&ls, uap->oitv, sizeof(ls))); 1002 } 1003 1004 int 1005 linux_getitimer(struct thread *td, struct linux_getitimer_args *uap) 1006 { 1007 int error; 1008 struct l_itimerval ls; 1009 struct itimerval aitv; 1010 1011 error = kern_getitimer(td, uap->which, &aitv); 1012 if (error != 0) 1013 return (error); 1014 B2L_ITIMERVAL(&ls, &aitv); 1015 return (copyout(&ls, uap->itv, sizeof(ls))); 1016 } 1017 1018 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 1019 int 1020 linux_nice(struct thread *td, struct linux_nice_args *args) 1021 { 1022 1023 return (kern_setpriority(td, PRIO_PROCESS, 0, args->inc)); 1024 } 1025 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 1026 1027 int 1028 linux_setgroups(struct thread *td, struct linux_setgroups_args *args) 1029 { 1030 struct ucred *newcred, *oldcred; 1031 l_gid_t *linux_gidset; 1032 gid_t *bsd_gidset; 1033 int ngrp, error; 1034 struct proc *p; 1035 1036 ngrp = args->gidsetsize; 1037 if (ngrp < 0 || ngrp >= ngroups_max + 1) 1038 return (EINVAL); 1039 linux_gidset = malloc(ngrp * sizeof(*linux_gidset), M_LINUX, M_WAITOK); 1040 error = copyin(args->grouplist, linux_gidset, ngrp * sizeof(l_gid_t)); 1041 if (error) 1042 goto out; 1043 newcred = crget(); 1044 crextend(newcred, ngrp + 1); 1045 p = td->td_proc; 1046 PROC_LOCK(p); 1047 oldcred = p->p_ucred; 1048 crcopy(newcred, oldcred); 1049 1050 /* 1051 * cr_groups[0] holds egid. Setting the whole set from 1052 * the supplied set will cause egid to be changed too. 1053 * Keep cr_groups[0] unchanged to prevent that. 1054 */ 1055 1056 if ((error = priv_check_cred(oldcred, PRIV_CRED_SETGROUPS)) != 0) { 1057 PROC_UNLOCK(p); 1058 crfree(newcred); 1059 goto out; 1060 } 1061 1062 if (ngrp > 0) { 1063 newcred->cr_ngroups = ngrp + 1; 1064 1065 bsd_gidset = newcred->cr_groups; 1066 ngrp--; 1067 while (ngrp >= 0) { 1068 bsd_gidset[ngrp + 1] = linux_gidset[ngrp]; 1069 ngrp--; 1070 } 1071 } else 1072 newcred->cr_ngroups = 1; 1073 1074 setsugid(p); 1075 proc_set_cred(p, newcred); 1076 PROC_UNLOCK(p); 1077 crfree(oldcred); 1078 error = 0; 1079 out: 1080 free(linux_gidset, M_LINUX); 1081 return (error); 1082 } 1083 1084 int 1085 linux_getgroups(struct thread *td, struct linux_getgroups_args *args) 1086 { 1087 struct ucred *cred; 1088 l_gid_t *linux_gidset; 1089 gid_t *bsd_gidset; 1090 int bsd_gidsetsz, ngrp, error; 1091 1092 cred = td->td_ucred; 1093 bsd_gidset = cred->cr_groups; 1094 bsd_gidsetsz = cred->cr_ngroups - 1; 1095 1096 /* 1097 * cr_groups[0] holds egid. Returning the whole set 1098 * here will cause a duplicate. Exclude cr_groups[0] 1099 * to prevent that. 1100 */ 1101 1102 if ((ngrp = args->gidsetsize) == 0) { 1103 td->td_retval[0] = bsd_gidsetsz; 1104 return (0); 1105 } 1106 1107 if (ngrp < bsd_gidsetsz) 1108 return (EINVAL); 1109 1110 ngrp = 0; 1111 linux_gidset = malloc(bsd_gidsetsz * sizeof(*linux_gidset), 1112 M_LINUX, M_WAITOK); 1113 while (ngrp < bsd_gidsetsz) { 1114 linux_gidset[ngrp] = bsd_gidset[ngrp + 1]; 1115 ngrp++; 1116 } 1117 1118 error = copyout(linux_gidset, args->grouplist, ngrp * sizeof(l_gid_t)); 1119 free(linux_gidset, M_LINUX); 1120 if (error) 1121 return (error); 1122 1123 td->td_retval[0] = ngrp; 1124 return (0); 1125 } 1126 1127 static bool 1128 linux_get_dummy_limit(l_uint resource, struct rlimit *rlim) 1129 { 1130 1131 if (linux_dummy_rlimits == 0) 1132 return (false); 1133 1134 switch (resource) { 1135 case LINUX_RLIMIT_LOCKS: 1136 case LINUX_RLIMIT_SIGPENDING: 1137 case LINUX_RLIMIT_MSGQUEUE: 1138 case LINUX_RLIMIT_RTTIME: 1139 rlim->rlim_cur = LINUX_RLIM_INFINITY; 1140 rlim->rlim_max = LINUX_RLIM_INFINITY; 1141 return (true); 1142 case LINUX_RLIMIT_NICE: 1143 case LINUX_RLIMIT_RTPRIO: 1144 rlim->rlim_cur = 0; 1145 rlim->rlim_max = 0; 1146 return (true); 1147 default: 1148 return (false); 1149 } 1150 } 1151 1152 int 1153 linux_setrlimit(struct thread *td, struct linux_setrlimit_args *args) 1154 { 1155 struct rlimit bsd_rlim; 1156 struct l_rlimit rlim; 1157 u_int which; 1158 int error; 1159 1160 if (args->resource >= LINUX_RLIM_NLIMITS) 1161 return (EINVAL); 1162 1163 which = linux_to_bsd_resource[args->resource]; 1164 if (which == -1) 1165 return (EINVAL); 1166 1167 error = copyin(args->rlim, &rlim, sizeof(rlim)); 1168 if (error) 1169 return (error); 1170 1171 bsd_rlim.rlim_cur = (rlim_t)rlim.rlim_cur; 1172 bsd_rlim.rlim_max = (rlim_t)rlim.rlim_max; 1173 return (kern_setrlimit(td, which, &bsd_rlim)); 1174 } 1175 1176 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 1177 int 1178 linux_old_getrlimit(struct thread *td, struct linux_old_getrlimit_args *args) 1179 { 1180 struct l_rlimit rlim; 1181 struct rlimit bsd_rlim; 1182 u_int which; 1183 1184 if (linux_get_dummy_limit(args->resource, &bsd_rlim)) { 1185 rlim.rlim_cur = bsd_rlim.rlim_cur; 1186 rlim.rlim_max = bsd_rlim.rlim_max; 1187 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1188 } 1189 1190 if (args->resource >= LINUX_RLIM_NLIMITS) 1191 return (EINVAL); 1192 1193 which = linux_to_bsd_resource[args->resource]; 1194 if (which == -1) 1195 return (EINVAL); 1196 1197 lim_rlimit(td, which, &bsd_rlim); 1198 1199 #ifdef COMPAT_LINUX32 1200 rlim.rlim_cur = (unsigned int)bsd_rlim.rlim_cur; 1201 if (rlim.rlim_cur == UINT_MAX) 1202 rlim.rlim_cur = INT_MAX; 1203 rlim.rlim_max = (unsigned int)bsd_rlim.rlim_max; 1204 if (rlim.rlim_max == UINT_MAX) 1205 rlim.rlim_max = INT_MAX; 1206 #else 1207 rlim.rlim_cur = (unsigned long)bsd_rlim.rlim_cur; 1208 if (rlim.rlim_cur == ULONG_MAX) 1209 rlim.rlim_cur = LONG_MAX; 1210 rlim.rlim_max = (unsigned long)bsd_rlim.rlim_max; 1211 if (rlim.rlim_max == ULONG_MAX) 1212 rlim.rlim_max = LONG_MAX; 1213 #endif 1214 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1215 } 1216 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 1217 1218 int 1219 linux_getrlimit(struct thread *td, struct linux_getrlimit_args *args) 1220 { 1221 struct l_rlimit rlim; 1222 struct rlimit bsd_rlim; 1223 u_int which; 1224 1225 if (linux_get_dummy_limit(args->resource, &bsd_rlim)) { 1226 rlim.rlim_cur = bsd_rlim.rlim_cur; 1227 rlim.rlim_max = bsd_rlim.rlim_max; 1228 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1229 } 1230 1231 if (args->resource >= LINUX_RLIM_NLIMITS) 1232 return (EINVAL); 1233 1234 which = linux_to_bsd_resource[args->resource]; 1235 if (which == -1) 1236 return (EINVAL); 1237 1238 lim_rlimit(td, which, &bsd_rlim); 1239 1240 rlim.rlim_cur = (l_ulong)bsd_rlim.rlim_cur; 1241 rlim.rlim_max = (l_ulong)bsd_rlim.rlim_max; 1242 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1243 } 1244 1245 int 1246 linux_sched_setscheduler(struct thread *td, 1247 struct linux_sched_setscheduler_args *args) 1248 { 1249 struct sched_param sched_param; 1250 struct thread *tdt; 1251 int error, policy; 1252 1253 switch (args->policy) { 1254 case LINUX_SCHED_OTHER: 1255 policy = SCHED_OTHER; 1256 break; 1257 case LINUX_SCHED_FIFO: 1258 policy = SCHED_FIFO; 1259 break; 1260 case LINUX_SCHED_RR: 1261 policy = SCHED_RR; 1262 break; 1263 default: 1264 return (EINVAL); 1265 } 1266 1267 error = copyin(args->param, &sched_param, sizeof(sched_param)); 1268 if (error) 1269 return (error); 1270 1271 if (linux_map_sched_prio) { 1272 switch (policy) { 1273 case SCHED_OTHER: 1274 if (sched_param.sched_priority != 0) 1275 return (EINVAL); 1276 1277 sched_param.sched_priority = 1278 PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE; 1279 break; 1280 case SCHED_FIFO: 1281 case SCHED_RR: 1282 if (sched_param.sched_priority < 1 || 1283 sched_param.sched_priority >= LINUX_MAX_RT_PRIO) 1284 return (EINVAL); 1285 1286 /* 1287 * Map [1, LINUX_MAX_RT_PRIO - 1] to 1288 * [0, RTP_PRIO_MAX - RTP_PRIO_MIN] (rounding down). 1289 */ 1290 sched_param.sched_priority = 1291 (sched_param.sched_priority - 1) * 1292 (RTP_PRIO_MAX - RTP_PRIO_MIN + 1) / 1293 (LINUX_MAX_RT_PRIO - 1); 1294 break; 1295 } 1296 } 1297 1298 tdt = linux_tdfind(td, args->pid, -1); 1299 if (tdt == NULL) 1300 return (ESRCH); 1301 1302 error = kern_sched_setscheduler(td, tdt, policy, &sched_param); 1303 PROC_UNLOCK(tdt->td_proc); 1304 return (error); 1305 } 1306 1307 int 1308 linux_sched_getscheduler(struct thread *td, 1309 struct linux_sched_getscheduler_args *args) 1310 { 1311 struct thread *tdt; 1312 int error, policy; 1313 1314 tdt = linux_tdfind(td, args->pid, -1); 1315 if (tdt == NULL) 1316 return (ESRCH); 1317 1318 error = kern_sched_getscheduler(td, tdt, &policy); 1319 PROC_UNLOCK(tdt->td_proc); 1320 1321 switch (policy) { 1322 case SCHED_OTHER: 1323 td->td_retval[0] = LINUX_SCHED_OTHER; 1324 break; 1325 case SCHED_FIFO: 1326 td->td_retval[0] = LINUX_SCHED_FIFO; 1327 break; 1328 case SCHED_RR: 1329 td->td_retval[0] = LINUX_SCHED_RR; 1330 break; 1331 } 1332 return (error); 1333 } 1334 1335 int 1336 linux_sched_get_priority_max(struct thread *td, 1337 struct linux_sched_get_priority_max_args *args) 1338 { 1339 struct sched_get_priority_max_args bsd; 1340 1341 if (linux_map_sched_prio) { 1342 switch (args->policy) { 1343 case LINUX_SCHED_OTHER: 1344 td->td_retval[0] = 0; 1345 return (0); 1346 case LINUX_SCHED_FIFO: 1347 case LINUX_SCHED_RR: 1348 td->td_retval[0] = LINUX_MAX_RT_PRIO - 1; 1349 return (0); 1350 default: 1351 return (EINVAL); 1352 } 1353 } 1354 1355 switch (args->policy) { 1356 case LINUX_SCHED_OTHER: 1357 bsd.policy = SCHED_OTHER; 1358 break; 1359 case LINUX_SCHED_FIFO: 1360 bsd.policy = SCHED_FIFO; 1361 break; 1362 case LINUX_SCHED_RR: 1363 bsd.policy = SCHED_RR; 1364 break; 1365 default: 1366 return (EINVAL); 1367 } 1368 return (sys_sched_get_priority_max(td, &bsd)); 1369 } 1370 1371 int 1372 linux_sched_get_priority_min(struct thread *td, 1373 struct linux_sched_get_priority_min_args *args) 1374 { 1375 struct sched_get_priority_min_args bsd; 1376 1377 if (linux_map_sched_prio) { 1378 switch (args->policy) { 1379 case LINUX_SCHED_OTHER: 1380 td->td_retval[0] = 0; 1381 return (0); 1382 case LINUX_SCHED_FIFO: 1383 case LINUX_SCHED_RR: 1384 td->td_retval[0] = 1; 1385 return (0); 1386 default: 1387 return (EINVAL); 1388 } 1389 } 1390 1391 switch (args->policy) { 1392 case LINUX_SCHED_OTHER: 1393 bsd.policy = SCHED_OTHER; 1394 break; 1395 case LINUX_SCHED_FIFO: 1396 bsd.policy = SCHED_FIFO; 1397 break; 1398 case LINUX_SCHED_RR: 1399 bsd.policy = SCHED_RR; 1400 break; 1401 default: 1402 return (EINVAL); 1403 } 1404 return (sys_sched_get_priority_min(td, &bsd)); 1405 } 1406 1407 #define REBOOT_CAD_ON 0x89abcdef 1408 #define REBOOT_CAD_OFF 0 1409 #define REBOOT_HALT 0xcdef0123 1410 #define REBOOT_RESTART 0x01234567 1411 #define REBOOT_RESTART2 0xA1B2C3D4 1412 #define REBOOT_POWEROFF 0x4321FEDC 1413 #define REBOOT_MAGIC1 0xfee1dead 1414 #define REBOOT_MAGIC2 0x28121969 1415 #define REBOOT_MAGIC2A 0x05121996 1416 #define REBOOT_MAGIC2B 0x16041998 1417 1418 int 1419 linux_reboot(struct thread *td, struct linux_reboot_args *args) 1420 { 1421 struct reboot_args bsd_args; 1422 1423 if (args->magic1 != REBOOT_MAGIC1) 1424 return (EINVAL); 1425 1426 switch (args->magic2) { 1427 case REBOOT_MAGIC2: 1428 case REBOOT_MAGIC2A: 1429 case REBOOT_MAGIC2B: 1430 break; 1431 default: 1432 return (EINVAL); 1433 } 1434 1435 switch (args->cmd) { 1436 case REBOOT_CAD_ON: 1437 case REBOOT_CAD_OFF: 1438 return (priv_check(td, PRIV_REBOOT)); 1439 case REBOOT_HALT: 1440 bsd_args.opt = RB_HALT; 1441 break; 1442 case REBOOT_RESTART: 1443 case REBOOT_RESTART2: 1444 bsd_args.opt = 0; 1445 break; 1446 case REBOOT_POWEROFF: 1447 bsd_args.opt = RB_POWEROFF; 1448 break; 1449 default: 1450 return (EINVAL); 1451 } 1452 return (sys_reboot(td, &bsd_args)); 1453 } 1454 1455 int 1456 linux_getpid(struct thread *td, struct linux_getpid_args *args) 1457 { 1458 1459 td->td_retval[0] = td->td_proc->p_pid; 1460 1461 return (0); 1462 } 1463 1464 int 1465 linux_gettid(struct thread *td, struct linux_gettid_args *args) 1466 { 1467 struct linux_emuldata *em; 1468 1469 em = em_find(td); 1470 KASSERT(em != NULL, ("gettid: emuldata not found.\n")); 1471 1472 td->td_retval[0] = em->em_tid; 1473 1474 return (0); 1475 } 1476 1477 int 1478 linux_getppid(struct thread *td, struct linux_getppid_args *args) 1479 { 1480 1481 td->td_retval[0] = kern_getppid(td); 1482 return (0); 1483 } 1484 1485 int 1486 linux_getgid(struct thread *td, struct linux_getgid_args *args) 1487 { 1488 1489 td->td_retval[0] = td->td_ucred->cr_rgid; 1490 return (0); 1491 } 1492 1493 int 1494 linux_getuid(struct thread *td, struct linux_getuid_args *args) 1495 { 1496 1497 td->td_retval[0] = td->td_ucred->cr_ruid; 1498 return (0); 1499 } 1500 1501 int 1502 linux_getsid(struct thread *td, struct linux_getsid_args *args) 1503 { 1504 1505 return (kern_getsid(td, args->pid)); 1506 } 1507 1508 int 1509 linux_getpriority(struct thread *td, struct linux_getpriority_args *args) 1510 { 1511 int error; 1512 1513 error = kern_getpriority(td, args->which, args->who); 1514 td->td_retval[0] = 20 - td->td_retval[0]; 1515 return (error); 1516 } 1517 1518 int 1519 linux_sethostname(struct thread *td, struct linux_sethostname_args *args) 1520 { 1521 int name[2]; 1522 1523 name[0] = CTL_KERN; 1524 name[1] = KERN_HOSTNAME; 1525 return (userland_sysctl(td, name, 2, 0, 0, 0, args->hostname, 1526 args->len, 0, 0)); 1527 } 1528 1529 int 1530 linux_setdomainname(struct thread *td, struct linux_setdomainname_args *args) 1531 { 1532 int name[2]; 1533 1534 name[0] = CTL_KERN; 1535 name[1] = KERN_NISDOMAINNAME; 1536 return (userland_sysctl(td, name, 2, 0, 0, 0, args->name, 1537 args->len, 0, 0)); 1538 } 1539 1540 int 1541 linux_exit_group(struct thread *td, struct linux_exit_group_args *args) 1542 { 1543 1544 LINUX_CTR2(exit_group, "thread(%d) (%d)", td->td_tid, 1545 args->error_code); 1546 1547 /* 1548 * XXX: we should send a signal to the parent if 1549 * SIGNAL_EXIT_GROUP is set. We ignore that (temporarily?) 1550 * as it doesnt occur often. 1551 */ 1552 exit1(td, args->error_code, 0); 1553 /* NOTREACHED */ 1554 } 1555 1556 #define _LINUX_CAPABILITY_VERSION_1 0x19980330 1557 #define _LINUX_CAPABILITY_VERSION_2 0x20071026 1558 #define _LINUX_CAPABILITY_VERSION_3 0x20080522 1559 1560 struct l_user_cap_header { 1561 l_int version; 1562 l_int pid; 1563 }; 1564 1565 struct l_user_cap_data { 1566 l_int effective; 1567 l_int permitted; 1568 l_int inheritable; 1569 }; 1570 1571 int 1572 linux_capget(struct thread *td, struct linux_capget_args *uap) 1573 { 1574 struct l_user_cap_header luch; 1575 struct l_user_cap_data lucd[2]; 1576 int error, u32s; 1577 1578 if (uap->hdrp == NULL) 1579 return (EFAULT); 1580 1581 error = copyin(uap->hdrp, &luch, sizeof(luch)); 1582 if (error != 0) 1583 return (error); 1584 1585 switch (luch.version) { 1586 case _LINUX_CAPABILITY_VERSION_1: 1587 u32s = 1; 1588 break; 1589 case _LINUX_CAPABILITY_VERSION_2: 1590 case _LINUX_CAPABILITY_VERSION_3: 1591 u32s = 2; 1592 break; 1593 default: 1594 luch.version = _LINUX_CAPABILITY_VERSION_1; 1595 error = copyout(&luch, uap->hdrp, sizeof(luch)); 1596 if (error) 1597 return (error); 1598 return (EINVAL); 1599 } 1600 1601 if (luch.pid) 1602 return (EPERM); 1603 1604 if (uap->datap) { 1605 /* 1606 * The current implementation doesn't support setting 1607 * a capability (it's essentially a stub) so indicate 1608 * that no capabilities are currently set or available 1609 * to request. 1610 */ 1611 memset(&lucd, 0, u32s * sizeof(lucd[0])); 1612 error = copyout(&lucd, uap->datap, u32s * sizeof(lucd[0])); 1613 } 1614 1615 return (error); 1616 } 1617 1618 int 1619 linux_capset(struct thread *td, struct linux_capset_args *uap) 1620 { 1621 struct l_user_cap_header luch; 1622 struct l_user_cap_data lucd[2]; 1623 int error, i, u32s; 1624 1625 if (uap->hdrp == NULL || uap->datap == NULL) 1626 return (EFAULT); 1627 1628 error = copyin(uap->hdrp, &luch, sizeof(luch)); 1629 if (error != 0) 1630 return (error); 1631 1632 switch (luch.version) { 1633 case _LINUX_CAPABILITY_VERSION_1: 1634 u32s = 1; 1635 break; 1636 case _LINUX_CAPABILITY_VERSION_2: 1637 case _LINUX_CAPABILITY_VERSION_3: 1638 u32s = 2; 1639 break; 1640 default: 1641 luch.version = _LINUX_CAPABILITY_VERSION_1; 1642 error = copyout(&luch, uap->hdrp, sizeof(luch)); 1643 if (error) 1644 return (error); 1645 return (EINVAL); 1646 } 1647 1648 if (luch.pid) 1649 return (EPERM); 1650 1651 error = copyin(uap->datap, &lucd, u32s * sizeof(lucd[0])); 1652 if (error != 0) 1653 return (error); 1654 1655 /* We currently don't support setting any capabilities. */ 1656 for (i = 0; i < u32s; i++) { 1657 if (lucd[i].effective || lucd[i].permitted || 1658 lucd[i].inheritable) { 1659 linux_msg(td, 1660 "capset[%d] effective=0x%x, permitted=0x%x, " 1661 "inheritable=0x%x is not implemented", i, 1662 (int)lucd[i].effective, (int)lucd[i].permitted, 1663 (int)lucd[i].inheritable); 1664 return (EPERM); 1665 } 1666 } 1667 1668 return (0); 1669 } 1670 1671 int 1672 linux_prctl(struct thread *td, struct linux_prctl_args *args) 1673 { 1674 int error = 0, max_size, arg; 1675 struct proc *p = td->td_proc; 1676 char comm[LINUX_MAX_COMM_LEN]; 1677 int pdeath_signal, trace_state; 1678 1679 switch (args->option) { 1680 case LINUX_PR_SET_PDEATHSIG: 1681 if (!LINUX_SIG_VALID(args->arg2)) 1682 return (EINVAL); 1683 pdeath_signal = linux_to_bsd_signal(args->arg2); 1684 return (kern_procctl(td, P_PID, 0, PROC_PDEATHSIG_CTL, 1685 &pdeath_signal)); 1686 case LINUX_PR_GET_PDEATHSIG: 1687 error = kern_procctl(td, P_PID, 0, PROC_PDEATHSIG_STATUS, 1688 &pdeath_signal); 1689 if (error != 0) 1690 return (error); 1691 pdeath_signal = bsd_to_linux_signal(pdeath_signal); 1692 return (copyout(&pdeath_signal, 1693 (void *)(register_t)args->arg2, 1694 sizeof(pdeath_signal))); 1695 /* 1696 * In Linux, this flag controls if set[gu]id processes can coredump. 1697 * There are additional semantics imposed on processes that cannot 1698 * coredump: 1699 * - Such processes can not be ptraced. 1700 * - There are some semantics around ownership of process-related files 1701 * in the /proc namespace. 1702 * 1703 * In FreeBSD, we can (and by default, do) disable setuid coredump 1704 * system-wide with 'sugid_coredump.' We control tracability on a 1705 * per-process basis with the procctl PROC_TRACE (=> P2_NOTRACE flag). 1706 * By happy coincidence, P2_NOTRACE also prevents coredumping. So the 1707 * procctl is roughly analogous to Linux's DUMPABLE. 1708 * 1709 * So, proxy these knobs to the corresponding PROC_TRACE setting. 1710 */ 1711 case LINUX_PR_GET_DUMPABLE: 1712 error = kern_procctl(td, P_PID, p->p_pid, PROC_TRACE_STATUS, 1713 &trace_state); 1714 if (error != 0) 1715 return (error); 1716 td->td_retval[0] = (trace_state != -1); 1717 return (0); 1718 case LINUX_PR_SET_DUMPABLE: 1719 /* 1720 * It is only valid for userspace to set one of these two 1721 * flags, and only one at a time. 1722 */ 1723 switch (args->arg2) { 1724 case LINUX_SUID_DUMP_DISABLE: 1725 trace_state = PROC_TRACE_CTL_DISABLE_EXEC; 1726 break; 1727 case LINUX_SUID_DUMP_USER: 1728 trace_state = PROC_TRACE_CTL_ENABLE; 1729 break; 1730 default: 1731 return (EINVAL); 1732 } 1733 return (kern_procctl(td, P_PID, p->p_pid, PROC_TRACE_CTL, 1734 &trace_state)); 1735 case LINUX_PR_GET_KEEPCAPS: 1736 /* 1737 * Indicate that we always clear the effective and 1738 * permitted capability sets when the user id becomes 1739 * non-zero (actually the capability sets are simply 1740 * always zero in the current implementation). 1741 */ 1742 td->td_retval[0] = 0; 1743 break; 1744 case LINUX_PR_SET_KEEPCAPS: 1745 /* 1746 * Ignore requests to keep the effective and permitted 1747 * capability sets when the user id becomes non-zero. 1748 */ 1749 break; 1750 case LINUX_PR_SET_NAME: 1751 /* 1752 * To be on the safe side we need to make sure to not 1753 * overflow the size a Linux program expects. We already 1754 * do this here in the copyin, so that we don't need to 1755 * check on copyout. 1756 */ 1757 max_size = MIN(sizeof(comm), sizeof(p->p_comm)); 1758 error = copyinstr((void *)(register_t)args->arg2, comm, 1759 max_size, NULL); 1760 1761 /* Linux silently truncates the name if it is too long. */ 1762 if (error == ENAMETOOLONG) { 1763 /* 1764 * XXX: copyinstr() isn't documented to populate the 1765 * array completely, so do a copyin() to be on the 1766 * safe side. This should be changed in case 1767 * copyinstr() is changed to guarantee this. 1768 */ 1769 error = copyin((void *)(register_t)args->arg2, comm, 1770 max_size - 1); 1771 comm[max_size - 1] = '\0'; 1772 } 1773 if (error) 1774 return (error); 1775 1776 PROC_LOCK(p); 1777 strlcpy(p->p_comm, comm, sizeof(p->p_comm)); 1778 PROC_UNLOCK(p); 1779 break; 1780 case LINUX_PR_GET_NAME: 1781 PROC_LOCK(p); 1782 strlcpy(comm, p->p_comm, sizeof(comm)); 1783 PROC_UNLOCK(p); 1784 error = copyout(comm, (void *)(register_t)args->arg2, 1785 strlen(comm) + 1); 1786 break; 1787 case LINUX_PR_GET_SECCOMP: 1788 case LINUX_PR_SET_SECCOMP: 1789 /* 1790 * Same as returned by Linux without CONFIG_SECCOMP enabled. 1791 */ 1792 error = EINVAL; 1793 break; 1794 case LINUX_PR_CAPBSET_READ: 1795 #if 0 1796 /* 1797 * This makes too much noise with Ubuntu Focal. 1798 */ 1799 linux_msg(td, "unsupported prctl PR_CAPBSET_READ %d", 1800 (int)args->arg2); 1801 #endif 1802 error = EINVAL; 1803 break; 1804 case LINUX_PR_SET_NO_NEW_PRIVS: 1805 arg = args->arg2 == 1 ? 1806 PROC_NO_NEW_PRIVS_ENABLE : PROC_NO_NEW_PRIVS_DISABLE; 1807 error = kern_procctl(td, P_PID, p->p_pid, 1808 PROC_NO_NEW_PRIVS_CTL, &arg); 1809 break; 1810 case LINUX_PR_SET_PTRACER: 1811 linux_msg(td, "unsupported prctl PR_SET_PTRACER"); 1812 error = EINVAL; 1813 break; 1814 default: 1815 linux_msg(td, "unsupported prctl option %d", args->option); 1816 error = EINVAL; 1817 break; 1818 } 1819 1820 return (error); 1821 } 1822 1823 int 1824 linux_sched_setparam(struct thread *td, 1825 struct linux_sched_setparam_args *uap) 1826 { 1827 struct sched_param sched_param; 1828 struct thread *tdt; 1829 int error, policy; 1830 1831 error = copyin(uap->param, &sched_param, sizeof(sched_param)); 1832 if (error) 1833 return (error); 1834 1835 tdt = linux_tdfind(td, uap->pid, -1); 1836 if (tdt == NULL) 1837 return (ESRCH); 1838 1839 if (linux_map_sched_prio) { 1840 error = kern_sched_getscheduler(td, tdt, &policy); 1841 if (error) 1842 goto out; 1843 1844 switch (policy) { 1845 case SCHED_OTHER: 1846 if (sched_param.sched_priority != 0) { 1847 error = EINVAL; 1848 goto out; 1849 } 1850 sched_param.sched_priority = 1851 PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE; 1852 break; 1853 case SCHED_FIFO: 1854 case SCHED_RR: 1855 if (sched_param.sched_priority < 1 || 1856 sched_param.sched_priority >= LINUX_MAX_RT_PRIO) { 1857 error = EINVAL; 1858 goto out; 1859 } 1860 /* 1861 * Map [1, LINUX_MAX_RT_PRIO - 1] to 1862 * [0, RTP_PRIO_MAX - RTP_PRIO_MIN] (rounding down). 1863 */ 1864 sched_param.sched_priority = 1865 (sched_param.sched_priority - 1) * 1866 (RTP_PRIO_MAX - RTP_PRIO_MIN + 1) / 1867 (LINUX_MAX_RT_PRIO - 1); 1868 break; 1869 } 1870 } 1871 1872 error = kern_sched_setparam(td, tdt, &sched_param); 1873 out: PROC_UNLOCK(tdt->td_proc); 1874 return (error); 1875 } 1876 1877 int 1878 linux_sched_getparam(struct thread *td, 1879 struct linux_sched_getparam_args *uap) 1880 { 1881 struct sched_param sched_param; 1882 struct thread *tdt; 1883 int error, policy; 1884 1885 tdt = linux_tdfind(td, uap->pid, -1); 1886 if (tdt == NULL) 1887 return (ESRCH); 1888 1889 error = kern_sched_getparam(td, tdt, &sched_param); 1890 if (error) { 1891 PROC_UNLOCK(tdt->td_proc); 1892 return (error); 1893 } 1894 1895 if (linux_map_sched_prio) { 1896 error = kern_sched_getscheduler(td, tdt, &policy); 1897 PROC_UNLOCK(tdt->td_proc); 1898 if (error) 1899 return (error); 1900 1901 switch (policy) { 1902 case SCHED_OTHER: 1903 sched_param.sched_priority = 0; 1904 break; 1905 case SCHED_FIFO: 1906 case SCHED_RR: 1907 /* 1908 * Map [0, RTP_PRIO_MAX - RTP_PRIO_MIN] to 1909 * [1, LINUX_MAX_RT_PRIO - 1] (rounding up). 1910 */ 1911 sched_param.sched_priority = 1912 (sched_param.sched_priority * 1913 (LINUX_MAX_RT_PRIO - 1) + 1914 (RTP_PRIO_MAX - RTP_PRIO_MIN - 1)) / 1915 (RTP_PRIO_MAX - RTP_PRIO_MIN) + 1; 1916 break; 1917 } 1918 } else 1919 PROC_UNLOCK(tdt->td_proc); 1920 1921 error = copyout(&sched_param, uap->param, sizeof(sched_param)); 1922 return (error); 1923 } 1924 1925 /* 1926 * Get affinity of a process. 1927 */ 1928 int 1929 linux_sched_getaffinity(struct thread *td, 1930 struct linux_sched_getaffinity_args *args) 1931 { 1932 struct thread *tdt; 1933 cpuset_t *mask; 1934 size_t size; 1935 int error; 1936 id_t tid; 1937 1938 tdt = linux_tdfind(td, args->pid, -1); 1939 if (tdt == NULL) 1940 return (ESRCH); 1941 tid = tdt->td_tid; 1942 PROC_UNLOCK(tdt->td_proc); 1943 1944 mask = malloc(sizeof(cpuset_t), M_LINUX, M_WAITOK | M_ZERO); 1945 size = min(args->len, sizeof(cpuset_t)); 1946 error = kern_cpuset_getaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID, 1947 tid, size, mask); 1948 if (error == ERANGE) 1949 error = EINVAL; 1950 if (error == 0) 1951 error = copyout(mask, args->user_mask_ptr, size); 1952 if (error == 0) 1953 td->td_retval[0] = size; 1954 free(mask, M_LINUX); 1955 return (error); 1956 } 1957 1958 /* 1959 * Set affinity of a process. 1960 */ 1961 int 1962 linux_sched_setaffinity(struct thread *td, 1963 struct linux_sched_setaffinity_args *args) 1964 { 1965 struct thread *tdt; 1966 cpuset_t *mask; 1967 int cpu, error; 1968 size_t len; 1969 id_t tid; 1970 1971 tdt = linux_tdfind(td, args->pid, -1); 1972 if (tdt == NULL) 1973 return (ESRCH); 1974 tid = tdt->td_tid; 1975 PROC_UNLOCK(tdt->td_proc); 1976 1977 len = min(args->len, sizeof(cpuset_t)); 1978 mask = malloc(sizeof(cpuset_t), M_TEMP, M_WAITOK | M_ZERO); 1979 error = copyin(args->user_mask_ptr, mask, len); 1980 if (error != 0) 1981 goto out; 1982 /* Linux ignore high bits */ 1983 CPU_FOREACH_ISSET(cpu, mask) 1984 if (cpu > mp_maxid) 1985 CPU_CLR(cpu, mask); 1986 1987 error = kern_cpuset_setaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID, 1988 tid, mask); 1989 if (error == EDEADLK) 1990 error = EINVAL; 1991 out: 1992 free(mask, M_TEMP); 1993 return (error); 1994 } 1995 1996 struct linux_rlimit64 { 1997 uint64_t rlim_cur; 1998 uint64_t rlim_max; 1999 }; 2000 2001 int 2002 linux_prlimit64(struct thread *td, struct linux_prlimit64_args *args) 2003 { 2004 struct rlimit rlim, nrlim; 2005 struct linux_rlimit64 lrlim; 2006 struct proc *p; 2007 u_int which; 2008 int flags; 2009 int error; 2010 2011 if (args->new == NULL && args->old != NULL) { 2012 if (linux_get_dummy_limit(args->resource, &rlim)) { 2013 lrlim.rlim_cur = rlim.rlim_cur; 2014 lrlim.rlim_max = rlim.rlim_max; 2015 return (copyout(&lrlim, args->old, sizeof(lrlim))); 2016 } 2017 } 2018 2019 if (args->resource >= LINUX_RLIM_NLIMITS) 2020 return (EINVAL); 2021 2022 which = linux_to_bsd_resource[args->resource]; 2023 if (which == -1) 2024 return (EINVAL); 2025 2026 if (args->new != NULL) { 2027 /* 2028 * Note. Unlike FreeBSD where rlim is signed 64-bit Linux 2029 * rlim is unsigned 64-bit. FreeBSD treats negative limits 2030 * as INFINITY so we do not need a conversion even. 2031 */ 2032 error = copyin(args->new, &nrlim, sizeof(nrlim)); 2033 if (error != 0) 2034 return (error); 2035 } 2036 2037 flags = PGET_HOLD | PGET_NOTWEXIT; 2038 if (args->new != NULL) 2039 flags |= PGET_CANDEBUG; 2040 else 2041 flags |= PGET_CANSEE; 2042 if (args->pid == 0) { 2043 p = td->td_proc; 2044 PHOLD(p); 2045 } else { 2046 error = pget(args->pid, flags, &p); 2047 if (error != 0) 2048 return (error); 2049 } 2050 if (args->old != NULL) { 2051 PROC_LOCK(p); 2052 lim_rlimit_proc(p, which, &rlim); 2053 PROC_UNLOCK(p); 2054 if (rlim.rlim_cur == RLIM_INFINITY) 2055 lrlim.rlim_cur = LINUX_RLIM_INFINITY; 2056 else 2057 lrlim.rlim_cur = rlim.rlim_cur; 2058 if (rlim.rlim_max == RLIM_INFINITY) 2059 lrlim.rlim_max = LINUX_RLIM_INFINITY; 2060 else 2061 lrlim.rlim_max = rlim.rlim_max; 2062 error = copyout(&lrlim, args->old, sizeof(lrlim)); 2063 if (error != 0) 2064 goto out; 2065 } 2066 2067 if (args->new != NULL) 2068 error = kern_proc_setrlimit(td, p, which, &nrlim); 2069 2070 out: 2071 PRELE(p); 2072 return (error); 2073 } 2074 2075 int 2076 linux_pselect6(struct thread *td, struct linux_pselect6_args *args) 2077 { 2078 struct timespec ts, *tsp; 2079 int error; 2080 2081 if (args->tsp != NULL) { 2082 error = linux_get_timespec(&ts, args->tsp); 2083 if (error != 0) 2084 return (error); 2085 tsp = &ts; 2086 } else 2087 tsp = NULL; 2088 2089 error = linux_common_pselect6(td, args->nfds, args->readfds, 2090 args->writefds, args->exceptfds, tsp, args->sig); 2091 2092 if (args->tsp != NULL) 2093 linux_put_timespec(&ts, args->tsp); 2094 return (error); 2095 } 2096 2097 static int 2098 linux_common_pselect6(struct thread *td, l_int nfds, l_fd_set *readfds, 2099 l_fd_set *writefds, l_fd_set *exceptfds, struct timespec *tsp, 2100 l_uintptr_t *sig) 2101 { 2102 struct timeval utv, tv0, tv1, *tvp; 2103 struct l_pselect6arg lpse6; 2104 sigset_t *ssp; 2105 sigset_t ss; 2106 int error; 2107 2108 ssp = NULL; 2109 if (sig != NULL) { 2110 error = copyin(sig, &lpse6, sizeof(lpse6)); 2111 if (error != 0) 2112 return (error); 2113 error = linux_copyin_sigset(td, PTRIN(lpse6.ss), 2114 lpse6.ss_len, &ss, &ssp); 2115 if (error != 0) 2116 return (error); 2117 } else 2118 ssp = NULL; 2119 2120 /* 2121 * Currently glibc changes nanosecond number to microsecond. 2122 * This mean losing precision but for now it is hardly seen. 2123 */ 2124 if (tsp != NULL) { 2125 TIMESPEC_TO_TIMEVAL(&utv, tsp); 2126 if (itimerfix(&utv)) 2127 return (EINVAL); 2128 2129 microtime(&tv0); 2130 tvp = &utv; 2131 } else 2132 tvp = NULL; 2133 2134 error = kern_pselect(td, nfds, readfds, writefds, 2135 exceptfds, tvp, ssp, LINUX_NFDBITS); 2136 2137 if (tsp != NULL) { 2138 /* 2139 * Compute how much time was left of the timeout, 2140 * by subtracting the current time and the time 2141 * before we started the call, and subtracting 2142 * that result from the user-supplied value. 2143 */ 2144 microtime(&tv1); 2145 timevalsub(&tv1, &tv0); 2146 timevalsub(&utv, &tv1); 2147 if (utv.tv_sec < 0) 2148 timevalclear(&utv); 2149 TIMEVAL_TO_TIMESPEC(&utv, tsp); 2150 } 2151 return (error); 2152 } 2153 2154 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 2155 int 2156 linux_pselect6_time64(struct thread *td, 2157 struct linux_pselect6_time64_args *args) 2158 { 2159 struct timespec ts, *tsp; 2160 int error; 2161 2162 if (args->tsp != NULL) { 2163 error = linux_get_timespec64(&ts, args->tsp); 2164 if (error != 0) 2165 return (error); 2166 tsp = &ts; 2167 } else 2168 tsp = NULL; 2169 2170 error = linux_common_pselect6(td, args->nfds, args->readfds, 2171 args->writefds, args->exceptfds, tsp, args->sig); 2172 2173 if (args->tsp != NULL) 2174 linux_put_timespec64(&ts, args->tsp); 2175 return (error); 2176 } 2177 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 2178 2179 int 2180 linux_ppoll(struct thread *td, struct linux_ppoll_args *args) 2181 { 2182 struct timespec uts, *tsp; 2183 int error; 2184 2185 if (args->tsp != NULL) { 2186 error = linux_get_timespec(&uts, args->tsp); 2187 if (error != 0) 2188 return (error); 2189 tsp = &uts; 2190 } else 2191 tsp = NULL; 2192 2193 error = linux_common_ppoll(td, args->fds, args->nfds, tsp, 2194 args->sset, args->ssize); 2195 if (error == 0 && args->tsp != NULL) 2196 error = linux_put_timespec(&uts, args->tsp); 2197 return (error); 2198 } 2199 2200 static int 2201 linux_common_ppoll(struct thread *td, struct pollfd *fds, uint32_t nfds, 2202 struct timespec *tsp, l_sigset_t *sset, l_size_t ssize) 2203 { 2204 struct timespec ts0, ts1; 2205 struct pollfd stackfds[32]; 2206 struct pollfd *kfds; 2207 sigset_t *ssp; 2208 sigset_t ss; 2209 int error; 2210 2211 if (kern_poll_maxfds(nfds)) 2212 return (EINVAL); 2213 if (sset != NULL) { 2214 error = linux_copyin_sigset(td, sset, ssize, &ss, &ssp); 2215 if (error != 0) 2216 return (error); 2217 } else 2218 ssp = NULL; 2219 if (tsp != NULL) 2220 nanotime(&ts0); 2221 2222 if (nfds > nitems(stackfds)) 2223 kfds = mallocarray(nfds, sizeof(*kfds), M_TEMP, M_WAITOK); 2224 else 2225 kfds = stackfds; 2226 error = linux_pollin(td, kfds, fds, nfds); 2227 if (error != 0) 2228 goto out; 2229 2230 error = kern_poll_kfds(td, kfds, nfds, tsp, ssp); 2231 if (error == 0) 2232 error = linux_pollout(td, kfds, fds, nfds); 2233 2234 if (error == 0 && tsp != NULL) { 2235 if (td->td_retval[0]) { 2236 nanotime(&ts1); 2237 timespecsub(&ts1, &ts0, &ts1); 2238 timespecsub(tsp, &ts1, tsp); 2239 if (tsp->tv_sec < 0) 2240 timespecclear(tsp); 2241 } else 2242 timespecclear(tsp); 2243 } 2244 2245 out: 2246 if (nfds > nitems(stackfds)) 2247 free(kfds, M_TEMP); 2248 return (error); 2249 } 2250 2251 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 2252 int 2253 linux_ppoll_time64(struct thread *td, struct linux_ppoll_time64_args *args) 2254 { 2255 struct timespec uts, *tsp; 2256 int error; 2257 2258 if (args->tsp != NULL) { 2259 error = linux_get_timespec64(&uts, args->tsp); 2260 if (error != 0) 2261 return (error); 2262 tsp = &uts; 2263 } else 2264 tsp = NULL; 2265 error = linux_common_ppoll(td, args->fds, args->nfds, tsp, 2266 args->sset, args->ssize); 2267 if (error == 0 && args->tsp != NULL) 2268 error = linux_put_timespec64(&uts, args->tsp); 2269 return (error); 2270 } 2271 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 2272 2273 static int 2274 linux_pollin(struct thread *td, struct pollfd *fds, struct pollfd *ufds, u_int nfd) 2275 { 2276 int error; 2277 u_int i; 2278 2279 error = copyin(ufds, fds, nfd * sizeof(*fds)); 2280 if (error != 0) 2281 return (error); 2282 2283 for (i = 0; i < nfd; i++) { 2284 if (fds->events != 0) 2285 linux_to_bsd_poll_events(td, fds->fd, 2286 fds->events, &fds->events); 2287 fds++; 2288 } 2289 return (0); 2290 } 2291 2292 static int 2293 linux_pollout(struct thread *td, struct pollfd *fds, struct pollfd *ufds, u_int nfd) 2294 { 2295 int error = 0; 2296 u_int i, n = 0; 2297 2298 for (i = 0; i < nfd; i++) { 2299 if (fds->revents != 0) { 2300 bsd_to_linux_poll_events(fds->revents, 2301 &fds->revents); 2302 n++; 2303 } 2304 error = copyout(&fds->revents, &ufds->revents, 2305 sizeof(ufds->revents)); 2306 if (error) 2307 return (error); 2308 fds++; 2309 ufds++; 2310 } 2311 td->td_retval[0] = n; 2312 return (0); 2313 } 2314 2315 static int 2316 linux_sched_rr_get_interval_common(struct thread *td, pid_t pid, 2317 struct timespec *ts) 2318 { 2319 struct thread *tdt; 2320 int error; 2321 2322 /* 2323 * According to man in case the invalid pid specified 2324 * EINVAL should be returned. 2325 */ 2326 if (pid < 0) 2327 return (EINVAL); 2328 2329 tdt = linux_tdfind(td, pid, -1); 2330 if (tdt == NULL) 2331 return (ESRCH); 2332 2333 error = kern_sched_rr_get_interval_td(td, tdt, ts); 2334 PROC_UNLOCK(tdt->td_proc); 2335 return (error); 2336 } 2337 2338 int 2339 linux_sched_rr_get_interval(struct thread *td, 2340 struct linux_sched_rr_get_interval_args *uap) 2341 { 2342 struct timespec ts; 2343 int error; 2344 2345 error = linux_sched_rr_get_interval_common(td, uap->pid, &ts); 2346 if (error != 0) 2347 return (error); 2348 return (linux_put_timespec(&ts, uap->interval)); 2349 } 2350 2351 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 2352 int 2353 linux_sched_rr_get_interval_time64(struct thread *td, 2354 struct linux_sched_rr_get_interval_time64_args *uap) 2355 { 2356 struct timespec ts; 2357 int error; 2358 2359 error = linux_sched_rr_get_interval_common(td, uap->pid, &ts); 2360 if (error != 0) 2361 return (error); 2362 return (linux_put_timespec64(&ts, uap->interval)); 2363 } 2364 #endif 2365 2366 /* 2367 * In case when the Linux thread is the initial thread in 2368 * the thread group thread id is equal to the process id. 2369 * Glibc depends on this magic (assert in pthread_getattr_np.c). 2370 */ 2371 struct thread * 2372 linux_tdfind(struct thread *td, lwpid_t tid, pid_t pid) 2373 { 2374 struct linux_emuldata *em; 2375 struct thread *tdt; 2376 struct proc *p; 2377 2378 tdt = NULL; 2379 if (tid == 0 || tid == td->td_tid) { 2380 if (pid != -1 && td->td_proc->p_pid != pid) 2381 return (NULL); 2382 PROC_LOCK(td->td_proc); 2383 return (td); 2384 } else if (tid > PID_MAX) 2385 return (tdfind(tid, pid)); 2386 2387 /* 2388 * Initial thread where the tid equal to the pid. 2389 */ 2390 p = pfind(tid); 2391 if (p != NULL) { 2392 if (SV_PROC_ABI(p) != SV_ABI_LINUX || 2393 (pid != -1 && tid != pid)) { 2394 /* 2395 * p is not a Linuxulator process. 2396 */ 2397 PROC_UNLOCK(p); 2398 return (NULL); 2399 } 2400 FOREACH_THREAD_IN_PROC(p, tdt) { 2401 em = em_find(tdt); 2402 if (tid == em->em_tid) 2403 return (tdt); 2404 } 2405 PROC_UNLOCK(p); 2406 } 2407 return (NULL); 2408 } 2409 2410 void 2411 linux_to_bsd_waitopts(int options, int *bsdopts) 2412 { 2413 2414 if (options & LINUX_WNOHANG) 2415 *bsdopts |= WNOHANG; 2416 if (options & LINUX_WUNTRACED) 2417 *bsdopts |= WUNTRACED; 2418 if (options & LINUX_WEXITED) 2419 *bsdopts |= WEXITED; 2420 if (options & LINUX_WCONTINUED) 2421 *bsdopts |= WCONTINUED; 2422 if (options & LINUX_WNOWAIT) 2423 *bsdopts |= WNOWAIT; 2424 2425 if (options & __WCLONE) 2426 *bsdopts |= WLINUXCLONE; 2427 } 2428 2429 int 2430 linux_getrandom(struct thread *td, struct linux_getrandom_args *args) 2431 { 2432 struct uio uio; 2433 struct iovec iov; 2434 int error; 2435 2436 if (args->flags & ~(LINUX_GRND_NONBLOCK|LINUX_GRND_RANDOM)) 2437 return (EINVAL); 2438 if (args->count > INT_MAX) 2439 args->count = INT_MAX; 2440 2441 iov.iov_base = args->buf; 2442 iov.iov_len = args->count; 2443 2444 uio.uio_iov = &iov; 2445 uio.uio_iovcnt = 1; 2446 uio.uio_resid = iov.iov_len; 2447 uio.uio_segflg = UIO_USERSPACE; 2448 uio.uio_rw = UIO_READ; 2449 uio.uio_td = td; 2450 2451 error = read_random_uio(&uio, args->flags & LINUX_GRND_NONBLOCK); 2452 if (error == 0) 2453 td->td_retval[0] = args->count - uio.uio_resid; 2454 return (error); 2455 } 2456 2457 int 2458 linux_mincore(struct thread *td, struct linux_mincore_args *args) 2459 { 2460 2461 /* Needs to be page-aligned */ 2462 if (args->start & PAGE_MASK) 2463 return (EINVAL); 2464 return (kern_mincore(td, args->start, args->len, args->vec)); 2465 } 2466 2467 #define SYSLOG_TAG "<6>" 2468 2469 int 2470 linux_syslog(struct thread *td, struct linux_syslog_args *args) 2471 { 2472 char buf[128], *src, *dst; 2473 u_int seq; 2474 int buflen, error; 2475 2476 if (args->type != LINUX_SYSLOG_ACTION_READ_ALL) { 2477 linux_msg(td, "syslog unsupported type 0x%x", args->type); 2478 return (EINVAL); 2479 } 2480 2481 if (args->len < 6) { 2482 td->td_retval[0] = 0; 2483 return (0); 2484 } 2485 2486 error = priv_check(td, PRIV_MSGBUF); 2487 if (error) 2488 return (error); 2489 2490 mtx_lock(&msgbuf_lock); 2491 msgbuf_peekbytes(msgbufp, NULL, 0, &seq); 2492 mtx_unlock(&msgbuf_lock); 2493 2494 dst = args->buf; 2495 error = copyout(&SYSLOG_TAG, dst, sizeof(SYSLOG_TAG)); 2496 /* The -1 is to skip the trailing '\0'. */ 2497 dst += sizeof(SYSLOG_TAG) - 1; 2498 2499 while (error == 0) { 2500 mtx_lock(&msgbuf_lock); 2501 buflen = msgbuf_peekbytes(msgbufp, buf, sizeof(buf), &seq); 2502 mtx_unlock(&msgbuf_lock); 2503 2504 if (buflen == 0) 2505 break; 2506 2507 for (src = buf; src < buf + buflen && error == 0; src++) { 2508 if (*src == '\0') 2509 continue; 2510 2511 if (dst >= args->buf + args->len) 2512 goto out; 2513 2514 error = copyout(src, dst, 1); 2515 dst++; 2516 2517 if (*src == '\n' && *(src + 1) != '<' && 2518 dst + sizeof(SYSLOG_TAG) < args->buf + args->len) { 2519 error = copyout(&SYSLOG_TAG, 2520 dst, sizeof(SYSLOG_TAG)); 2521 dst += sizeof(SYSLOG_TAG) - 1; 2522 } 2523 } 2524 } 2525 out: 2526 td->td_retval[0] = dst - args->buf; 2527 return (error); 2528 } 2529 2530 int 2531 linux_getcpu(struct thread *td, struct linux_getcpu_args *args) 2532 { 2533 int cpu, error, node; 2534 2535 cpu = td->td_oncpu; /* Make sure it doesn't change during copyout(9) */ 2536 error = 0; 2537 node = cpuid_to_pcpu[cpu]->pc_domain; 2538 2539 if (args->cpu != NULL) 2540 error = copyout(&cpu, args->cpu, sizeof(l_int)); 2541 if (args->node != NULL) 2542 error = copyout(&node, args->node, sizeof(l_int)); 2543 return (error); 2544 } 2545 2546 #if defined(__i386__) || defined(__amd64__) 2547 int 2548 linux_poll(struct thread *td, struct linux_poll_args *args) 2549 { 2550 struct timespec ts, *tsp; 2551 2552 if (args->timeout != INFTIM) { 2553 if (args->timeout < 0) 2554 return (EINVAL); 2555 ts.tv_sec = args->timeout / 1000; 2556 ts.tv_nsec = (args->timeout % 1000) * 1000000; 2557 tsp = &ts; 2558 } else 2559 tsp = NULL; 2560 2561 return (linux_common_ppoll(td, args->fds, args->nfds, 2562 tsp, NULL, 0)); 2563 } 2564 #endif /* __i386__ || __amd64__ */ 2565 2566 int 2567 linux_seccomp(struct thread *td, struct linux_seccomp_args *args) 2568 { 2569 2570 switch (args->op) { 2571 case LINUX_SECCOMP_GET_ACTION_AVAIL: 2572 return (EOPNOTSUPP); 2573 default: 2574 /* 2575 * Ignore unknown operations, just like Linux kernel built 2576 * without CONFIG_SECCOMP. 2577 */ 2578 return (EINVAL); 2579 } 2580 } 2581 2582 /* 2583 * Custom version of exec_copyin_args(), to copy out argument and environment 2584 * strings from the old process address space into the temporary string buffer. 2585 * Based on freebsd32_exec_copyin_args. 2586 */ 2587 static int 2588 linux_exec_copyin_args(struct image_args *args, const char *fname, 2589 enum uio_seg segflg, l_uintptr_t *argv, l_uintptr_t *envv) 2590 { 2591 char *argp, *envp; 2592 l_uintptr_t *ptr, arg; 2593 int error; 2594 2595 bzero(args, sizeof(*args)); 2596 if (argv == NULL) 2597 return (EFAULT); 2598 2599 /* 2600 * Allocate demand-paged memory for the file name, argument, and 2601 * environment strings. 2602 */ 2603 error = exec_alloc_args(args); 2604 if (error != 0) 2605 return (error); 2606 2607 /* 2608 * Copy the file name. 2609 */ 2610 error = exec_args_add_fname(args, fname, segflg); 2611 if (error != 0) 2612 goto err_exit; 2613 2614 /* 2615 * extract arguments first 2616 */ 2617 ptr = argv; 2618 for (;;) { 2619 error = copyin(ptr++, &arg, sizeof(arg)); 2620 if (error) 2621 goto err_exit; 2622 if (arg == 0) 2623 break; 2624 argp = PTRIN(arg); 2625 error = exec_args_add_arg(args, argp, UIO_USERSPACE); 2626 if (error != 0) 2627 goto err_exit; 2628 } 2629 2630 /* 2631 * This comment is from Linux do_execveat_common: 2632 * When argv is empty, add an empty string ("") as argv[0] to 2633 * ensure confused userspace programs that start processing 2634 * from argv[1] won't end up walking envp. 2635 */ 2636 if (args->argc == 0 && 2637 (error = exec_args_add_arg(args, "", UIO_SYSSPACE) != 0)) 2638 goto err_exit; 2639 2640 /* 2641 * extract environment strings 2642 */ 2643 if (envv) { 2644 ptr = envv; 2645 for (;;) { 2646 error = copyin(ptr++, &arg, sizeof(arg)); 2647 if (error) 2648 goto err_exit; 2649 if (arg == 0) 2650 break; 2651 envp = PTRIN(arg); 2652 error = exec_args_add_env(args, envp, UIO_USERSPACE); 2653 if (error != 0) 2654 goto err_exit; 2655 } 2656 } 2657 2658 return (0); 2659 2660 err_exit: 2661 exec_free_args(args); 2662 return (error); 2663 } 2664 2665 int 2666 linux_execve(struct thread *td, struct linux_execve_args *args) 2667 { 2668 struct image_args eargs; 2669 int error; 2670 2671 LINUX_CTR(execve); 2672 2673 error = linux_exec_copyin_args(&eargs, args->path, UIO_USERSPACE, 2674 args->argp, args->envp); 2675 if (error == 0) 2676 error = linux_common_execve(td, &eargs); 2677 AUDIT_SYSCALL_EXIT(error == EJUSTRETURN ? 0 : error, td); 2678 return (error); 2679 } 2680 2681 static void 2682 linux_up_rtprio_if(struct thread *td1, struct rtprio *rtp) 2683 { 2684 struct rtprio rtp2; 2685 2686 pri_to_rtp(td1, &rtp2); 2687 if (rtp2.type < rtp->type || 2688 (rtp2.type == rtp->type && 2689 rtp2.prio < rtp->prio)) { 2690 rtp->type = rtp2.type; 2691 rtp->prio = rtp2.prio; 2692 } 2693 } 2694 2695 #define LINUX_PRIO_DIVIDER RTP_PRIO_MAX / LINUX_IOPRIO_MAX 2696 2697 static int 2698 linux_rtprio2ioprio(struct rtprio *rtp) 2699 { 2700 int ioprio, prio; 2701 2702 switch (rtp->type) { 2703 case RTP_PRIO_IDLE: 2704 prio = RTP_PRIO_MIN; 2705 ioprio = LINUX_IOPRIO_PRIO(LINUX_IOPRIO_CLASS_IDLE, prio); 2706 break; 2707 case RTP_PRIO_NORMAL: 2708 prio = rtp->prio / LINUX_PRIO_DIVIDER; 2709 ioprio = LINUX_IOPRIO_PRIO(LINUX_IOPRIO_CLASS_BE, prio); 2710 break; 2711 case RTP_PRIO_REALTIME: 2712 prio = rtp->prio / LINUX_PRIO_DIVIDER; 2713 ioprio = LINUX_IOPRIO_PRIO(LINUX_IOPRIO_CLASS_RT, prio); 2714 break; 2715 default: 2716 prio = RTP_PRIO_MIN; 2717 ioprio = LINUX_IOPRIO_PRIO(LINUX_IOPRIO_CLASS_NONE, prio); 2718 break; 2719 } 2720 return (ioprio); 2721 } 2722 2723 static int 2724 linux_ioprio2rtprio(int ioprio, struct rtprio *rtp) 2725 { 2726 2727 switch (LINUX_IOPRIO_PRIO_CLASS(ioprio)) { 2728 case LINUX_IOPRIO_CLASS_IDLE: 2729 rtp->prio = RTP_PRIO_MIN; 2730 rtp->type = RTP_PRIO_IDLE; 2731 break; 2732 case LINUX_IOPRIO_CLASS_BE: 2733 rtp->prio = LINUX_IOPRIO_PRIO_DATA(ioprio) * LINUX_PRIO_DIVIDER; 2734 rtp->type = RTP_PRIO_NORMAL; 2735 break; 2736 case LINUX_IOPRIO_CLASS_RT: 2737 rtp->prio = LINUX_IOPRIO_PRIO_DATA(ioprio) * LINUX_PRIO_DIVIDER; 2738 rtp->type = RTP_PRIO_REALTIME; 2739 break; 2740 default: 2741 return (EINVAL); 2742 } 2743 return (0); 2744 } 2745 #undef LINUX_PRIO_DIVIDER 2746 2747 int 2748 linux_ioprio_get(struct thread *td, struct linux_ioprio_get_args *args) 2749 { 2750 struct thread *td1; 2751 struct rtprio rtp; 2752 struct pgrp *pg; 2753 struct proc *p; 2754 int error, found; 2755 2756 p = NULL; 2757 td1 = NULL; 2758 error = 0; 2759 found = 0; 2760 rtp.type = RTP_PRIO_IDLE; 2761 rtp.prio = RTP_PRIO_MAX; 2762 switch (args->which) { 2763 case LINUX_IOPRIO_WHO_PROCESS: 2764 if (args->who == 0) { 2765 td1 = td; 2766 p = td1->td_proc; 2767 PROC_LOCK(p); 2768 } else if (args->who > PID_MAX) { 2769 td1 = linux_tdfind(td, args->who, -1); 2770 if (td1 != NULL) 2771 p = td1->td_proc; 2772 } else 2773 p = pfind(args->who); 2774 if (p == NULL) 2775 return (ESRCH); 2776 if ((error = p_cansee(td, p))) { 2777 PROC_UNLOCK(p); 2778 break; 2779 } 2780 if (td1 != NULL) { 2781 pri_to_rtp(td1, &rtp); 2782 } else { 2783 FOREACH_THREAD_IN_PROC(p, td1) { 2784 linux_up_rtprio_if(td1, &rtp); 2785 } 2786 } 2787 found++; 2788 PROC_UNLOCK(p); 2789 break; 2790 case LINUX_IOPRIO_WHO_PGRP: 2791 sx_slock(&proctree_lock); 2792 if (args->who == 0) { 2793 pg = td->td_proc->p_pgrp; 2794 PGRP_LOCK(pg); 2795 } else { 2796 pg = pgfind(args->who); 2797 if (pg == NULL) { 2798 sx_sunlock(&proctree_lock); 2799 error = ESRCH; 2800 break; 2801 } 2802 } 2803 sx_sunlock(&proctree_lock); 2804 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 2805 PROC_LOCK(p); 2806 if (p->p_state == PRS_NORMAL && 2807 p_cansee(td, p) == 0) { 2808 FOREACH_THREAD_IN_PROC(p, td1) { 2809 linux_up_rtprio_if(td1, &rtp); 2810 found++; 2811 } 2812 } 2813 PROC_UNLOCK(p); 2814 } 2815 PGRP_UNLOCK(pg); 2816 break; 2817 case LINUX_IOPRIO_WHO_USER: 2818 if (args->who == 0) 2819 args->who = td->td_ucred->cr_uid; 2820 sx_slock(&allproc_lock); 2821 FOREACH_PROC_IN_SYSTEM(p) { 2822 PROC_LOCK(p); 2823 if (p->p_state == PRS_NORMAL && 2824 p->p_ucred->cr_uid == args->who && 2825 p_cansee(td, p) == 0) { 2826 FOREACH_THREAD_IN_PROC(p, td1) { 2827 linux_up_rtprio_if(td1, &rtp); 2828 found++; 2829 } 2830 } 2831 PROC_UNLOCK(p); 2832 } 2833 sx_sunlock(&allproc_lock); 2834 break; 2835 default: 2836 error = EINVAL; 2837 break; 2838 } 2839 if (error == 0) { 2840 if (found != 0) 2841 td->td_retval[0] = linux_rtprio2ioprio(&rtp); 2842 else 2843 error = ESRCH; 2844 } 2845 return (error); 2846 } 2847 2848 int 2849 linux_ioprio_set(struct thread *td, struct linux_ioprio_set_args *args) 2850 { 2851 struct thread *td1; 2852 struct rtprio rtp; 2853 struct pgrp *pg; 2854 struct proc *p; 2855 int error; 2856 2857 if ((error = linux_ioprio2rtprio(args->ioprio, &rtp)) != 0) 2858 return (error); 2859 /* Attempts to set high priorities (REALTIME) require su privileges. */ 2860 if (RTP_PRIO_BASE(rtp.type) == RTP_PRIO_REALTIME && 2861 (error = priv_check(td, PRIV_SCHED_RTPRIO)) != 0) 2862 return (error); 2863 2864 p = NULL; 2865 td1 = NULL; 2866 switch (args->which) { 2867 case LINUX_IOPRIO_WHO_PROCESS: 2868 if (args->who == 0) { 2869 td1 = td; 2870 p = td1->td_proc; 2871 PROC_LOCK(p); 2872 } else if (args->who > PID_MAX) { 2873 td1 = linux_tdfind(td, args->who, -1); 2874 if (td1 != NULL) 2875 p = td1->td_proc; 2876 } else 2877 p = pfind(args->who); 2878 if (p == NULL) 2879 return (ESRCH); 2880 if ((error = p_cansched(td, p))) { 2881 PROC_UNLOCK(p); 2882 break; 2883 } 2884 if (td1 != NULL) { 2885 error = rtp_to_pri(&rtp, td1); 2886 } else { 2887 FOREACH_THREAD_IN_PROC(p, td1) { 2888 if ((error = rtp_to_pri(&rtp, td1)) != 0) 2889 break; 2890 } 2891 } 2892 PROC_UNLOCK(p); 2893 break; 2894 case LINUX_IOPRIO_WHO_PGRP: 2895 sx_slock(&proctree_lock); 2896 if (args->who == 0) { 2897 pg = td->td_proc->p_pgrp; 2898 PGRP_LOCK(pg); 2899 } else { 2900 pg = pgfind(args->who); 2901 if (pg == NULL) { 2902 sx_sunlock(&proctree_lock); 2903 error = ESRCH; 2904 break; 2905 } 2906 } 2907 sx_sunlock(&proctree_lock); 2908 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 2909 PROC_LOCK(p); 2910 if (p->p_state == PRS_NORMAL && 2911 p_cansched(td, p) == 0) { 2912 FOREACH_THREAD_IN_PROC(p, td1) { 2913 if ((error = rtp_to_pri(&rtp, td1)) != 0) 2914 break; 2915 } 2916 } 2917 PROC_UNLOCK(p); 2918 if (error != 0) 2919 break; 2920 } 2921 PGRP_UNLOCK(pg); 2922 break; 2923 case LINUX_IOPRIO_WHO_USER: 2924 if (args->who == 0) 2925 args->who = td->td_ucred->cr_uid; 2926 sx_slock(&allproc_lock); 2927 FOREACH_PROC_IN_SYSTEM(p) { 2928 PROC_LOCK(p); 2929 if (p->p_state == PRS_NORMAL && 2930 p->p_ucred->cr_uid == args->who && 2931 p_cansched(td, p) == 0) { 2932 FOREACH_THREAD_IN_PROC(p, td1) { 2933 if ((error = rtp_to_pri(&rtp, td1)) != 0) 2934 break; 2935 } 2936 } 2937 PROC_UNLOCK(p); 2938 if (error != 0) 2939 break; 2940 } 2941 sx_sunlock(&allproc_lock); 2942 break; 2943 default: 2944 error = EINVAL; 2945 break; 2946 } 2947 return (error); 2948 } 2949