1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2002 Doug Rabson 5 * Copyright (c) 1994-1995 Søren Schmidt 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer 13 * in this position and unchanged. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. The name of the author may not be used to endorse or promote products 18 * derived from this software without specific prior written permission 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 23 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 29 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/param.h> 33 #include <sys/fcntl.h> 34 #include <sys/jail.h> 35 #include <sys/imgact.h> 36 #include <sys/limits.h> 37 #include <sys/lock.h> 38 #include <sys/msgbuf.h> 39 #include <sys/mutex.h> 40 #include <sys/poll.h> 41 #include <sys/priv.h> 42 #include <sys/proc.h> 43 #include <sys/procctl.h> 44 #include <sys/reboot.h> 45 #include <sys/random.h> 46 #include <sys/resourcevar.h> 47 #include <sys/rtprio.h> 48 #include <sys/sched.h> 49 #include <sys/smp.h> 50 #include <sys/stat.h> 51 #include <sys/syscallsubr.h> 52 #include <sys/sysctl.h> 53 #include <sys/sysent.h> 54 #include <sys/sysproto.h> 55 #include <sys/time.h> 56 #include <sys/vmmeter.h> 57 #include <sys/vnode.h> 58 59 #include <security/audit/audit.h> 60 #include <security/mac/mac_framework.h> 61 62 #include <vm/pmap.h> 63 #include <vm/vm_map.h> 64 #include <vm/swap_pager.h> 65 66 #ifdef COMPAT_LINUX32 67 #include <machine/../linux32/linux.h> 68 #include <machine/../linux32/linux32_proto.h> 69 #else 70 #include <machine/../linux/linux.h> 71 #include <machine/../linux/linux_proto.h> 72 #endif 73 74 #include <compat/linux/linux_common.h> 75 #include <compat/linux/linux_dtrace.h> 76 #include <compat/linux/linux_file.h> 77 #include <compat/linux/linux_mib.h> 78 #include <compat/linux/linux_mmap.h> 79 #include <compat/linux/linux_signal.h> 80 #include <compat/linux/linux_time.h> 81 #include <compat/linux/linux_util.h> 82 #include <compat/linux/linux_emul.h> 83 #include <compat/linux/linux_misc.h> 84 85 int stclohz; /* Statistics clock frequency */ 86 87 static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] = { 88 RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK, 89 RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE, 90 RLIMIT_MEMLOCK, RLIMIT_AS 91 }; 92 93 struct l_sysinfo { 94 l_long uptime; /* Seconds since boot */ 95 l_ulong loads[3]; /* 1, 5, and 15 minute load averages */ 96 #define LINUX_SYSINFO_LOADS_SCALE 65536 97 l_ulong totalram; /* Total usable main memory size */ 98 l_ulong freeram; /* Available memory size */ 99 l_ulong sharedram; /* Amount of shared memory */ 100 l_ulong bufferram; /* Memory used by buffers */ 101 l_ulong totalswap; /* Total swap space size */ 102 l_ulong freeswap; /* swap space still available */ 103 l_ushort procs; /* Number of current processes */ 104 l_ushort pads; 105 l_ulong totalhigh; 106 l_ulong freehigh; 107 l_uint mem_unit; 108 char _f[20-2*sizeof(l_long)-sizeof(l_int)]; /* padding */ 109 }; 110 111 struct l_pselect6arg { 112 l_uintptr_t ss; 113 l_size_t ss_len; 114 }; 115 116 static int linux_utimensat_lts_to_ts(struct l_timespec *, 117 struct timespec *); 118 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 119 static int linux_utimensat_lts64_to_ts(struct l_timespec64 *, 120 struct timespec *); 121 #endif 122 static int linux_common_utimensat(struct thread *, int, 123 const char *, struct timespec *, int); 124 static int linux_common_pselect6(struct thread *, l_int, 125 l_fd_set *, l_fd_set *, l_fd_set *, 126 struct timespec *, l_uintptr_t *); 127 static int linux_common_ppoll(struct thread *, struct pollfd *, 128 uint32_t, struct timespec *, l_sigset_t *, 129 l_size_t); 130 static int linux_pollin(struct thread *, struct pollfd *, 131 struct pollfd *, u_int); 132 static int linux_pollout(struct thread *, struct pollfd *, 133 struct pollfd *, u_int); 134 135 int 136 linux_sysinfo(struct thread *td, struct linux_sysinfo_args *args) 137 { 138 struct l_sysinfo sysinfo; 139 int i, j; 140 struct timespec ts; 141 142 bzero(&sysinfo, sizeof(sysinfo)); 143 getnanouptime(&ts); 144 if (ts.tv_nsec != 0) 145 ts.tv_sec++; 146 sysinfo.uptime = ts.tv_sec; 147 148 /* Use the information from the mib to get our load averages */ 149 for (i = 0; i < 3; i++) 150 sysinfo.loads[i] = averunnable.ldavg[i] * 151 LINUX_SYSINFO_LOADS_SCALE / averunnable.fscale; 152 153 sysinfo.totalram = physmem * PAGE_SIZE; 154 sysinfo.freeram = (u_long)vm_free_count() * PAGE_SIZE; 155 156 /* 157 * sharedram counts pages allocated to named, swap-backed objects such 158 * as shared memory segments and tmpfs files. There is no cheap way to 159 * compute this, so just leave the field unpopulated. Linux itself only 160 * started setting this field in the 3.x timeframe. 161 */ 162 sysinfo.sharedram = 0; 163 sysinfo.bufferram = 0; 164 165 swap_pager_status(&i, &j); 166 sysinfo.totalswap = i * PAGE_SIZE; 167 sysinfo.freeswap = (i - j) * PAGE_SIZE; 168 169 sysinfo.procs = nprocs; 170 171 /* 172 * Platforms supported by the emulation layer do not have a notion of 173 * high memory. 174 */ 175 sysinfo.totalhigh = 0; 176 sysinfo.freehigh = 0; 177 178 sysinfo.mem_unit = 1; 179 180 return (copyout(&sysinfo, args->info, sizeof(sysinfo))); 181 } 182 183 #ifdef LINUX_LEGACY_SYSCALLS 184 int 185 linux_alarm(struct thread *td, struct linux_alarm_args *args) 186 { 187 struct itimerval it, old_it; 188 u_int secs; 189 int error __diagused; 190 191 secs = args->secs; 192 /* 193 * Linux alarm() is always successful. Limit secs to INT32_MAX / 2 194 * to match kern_setitimer()'s limit to avoid error from it. 195 * 196 * XXX. Linux limit secs to INT_MAX on 32 and does not limit on 64-bit 197 * platforms. 198 */ 199 if (secs > INT32_MAX / 2) 200 secs = INT32_MAX / 2; 201 202 it.it_value.tv_sec = secs; 203 it.it_value.tv_usec = 0; 204 timevalclear(&it.it_interval); 205 error = kern_setitimer(td, ITIMER_REAL, &it, &old_it); 206 KASSERT(error == 0, ("kern_setitimer returns %d", error)); 207 208 if ((old_it.it_value.tv_sec == 0 && old_it.it_value.tv_usec > 0) || 209 old_it.it_value.tv_usec >= 500000) 210 old_it.it_value.tv_sec++; 211 td->td_retval[0] = old_it.it_value.tv_sec; 212 return (0); 213 } 214 #endif 215 216 int 217 linux_brk(struct thread *td, struct linux_brk_args *args) 218 { 219 struct vmspace *vm = td->td_proc->p_vmspace; 220 uintptr_t new, old; 221 222 old = (uintptr_t)vm->vm_daddr + ctob(vm->vm_dsize); 223 new = (uintptr_t)args->dsend; 224 if ((caddr_t)new > vm->vm_daddr && !kern_break(td, &new)) 225 td->td_retval[0] = (register_t)new; 226 else 227 td->td_retval[0] = (register_t)old; 228 229 return (0); 230 } 231 232 #ifdef LINUX_LEGACY_SYSCALLS 233 int 234 linux_select(struct thread *td, struct linux_select_args *args) 235 { 236 l_timeval ltv; 237 struct timeval tv0, tv1, utv, *tvp; 238 int error; 239 240 /* 241 * Store current time for computation of the amount of 242 * time left. 243 */ 244 if (args->timeout) { 245 if ((error = copyin(args->timeout, <v, sizeof(ltv)))) 246 goto select_out; 247 utv.tv_sec = ltv.tv_sec; 248 utv.tv_usec = ltv.tv_usec; 249 250 if (itimerfix(&utv)) { 251 /* 252 * The timeval was invalid. Convert it to something 253 * valid that will act as it does under Linux. 254 */ 255 utv.tv_sec += utv.tv_usec / 1000000; 256 utv.tv_usec %= 1000000; 257 if (utv.tv_usec < 0) { 258 utv.tv_sec -= 1; 259 utv.tv_usec += 1000000; 260 } 261 if (utv.tv_sec < 0) 262 timevalclear(&utv); 263 } 264 microtime(&tv0); 265 tvp = &utv; 266 } else 267 tvp = NULL; 268 269 error = kern_select(td, args->nfds, args->readfds, args->writefds, 270 args->exceptfds, tvp, LINUX_NFDBITS); 271 if (error) 272 goto select_out; 273 274 if (args->timeout) { 275 if (td->td_retval[0]) { 276 /* 277 * Compute how much time was left of the timeout, 278 * by subtracting the current time and the time 279 * before we started the call, and subtracting 280 * that result from the user-supplied value. 281 */ 282 microtime(&tv1); 283 timevalsub(&tv1, &tv0); 284 timevalsub(&utv, &tv1); 285 if (utv.tv_sec < 0) 286 timevalclear(&utv); 287 } else 288 timevalclear(&utv); 289 ltv.tv_sec = utv.tv_sec; 290 ltv.tv_usec = utv.tv_usec; 291 if ((error = copyout(<v, args->timeout, sizeof(ltv)))) 292 goto select_out; 293 } 294 295 select_out: 296 return (error); 297 } 298 #endif 299 300 int 301 linux_mremap(struct thread *td, struct linux_mremap_args *args) 302 { 303 uintptr_t addr; 304 size_t len; 305 int error = 0; 306 307 if (args->flags & ~(LINUX_MREMAP_FIXED | LINUX_MREMAP_MAYMOVE)) { 308 td->td_retval[0] = 0; 309 return (EINVAL); 310 } 311 312 /* 313 * Check for the page alignment. 314 * Linux defines PAGE_MASK to be FreeBSD ~PAGE_MASK. 315 */ 316 if (args->addr & PAGE_MASK) { 317 td->td_retval[0] = 0; 318 return (EINVAL); 319 } 320 321 args->new_len = round_page(args->new_len); 322 args->old_len = round_page(args->old_len); 323 324 if (args->new_len > args->old_len) { 325 td->td_retval[0] = 0; 326 return (ENOMEM); 327 } 328 329 if (args->new_len < args->old_len) { 330 addr = args->addr + args->new_len; 331 len = args->old_len - args->new_len; 332 error = kern_munmap(td, addr, len); 333 } 334 335 td->td_retval[0] = error ? 0 : (uintptr_t)args->addr; 336 return (error); 337 } 338 339 #define LINUX_MS_ASYNC 0x0001 340 #define LINUX_MS_INVALIDATE 0x0002 341 #define LINUX_MS_SYNC 0x0004 342 343 int 344 linux_msync(struct thread *td, struct linux_msync_args *args) 345 { 346 347 return (kern_msync(td, args->addr, args->len, 348 args->fl & ~LINUX_MS_SYNC)); 349 } 350 351 int 352 linux_mprotect(struct thread *td, struct linux_mprotect_args *uap) 353 { 354 355 return (linux_mprotect_common(td, PTROUT(uap->addr), uap->len, 356 uap->prot)); 357 } 358 359 int 360 linux_madvise(struct thread *td, struct linux_madvise_args *uap) 361 { 362 363 return (linux_madvise_common(td, PTROUT(uap->addr), uap->len, 364 uap->behav)); 365 } 366 367 int 368 linux_mmap2(struct thread *td, struct linux_mmap2_args *uap) 369 { 370 #if defined(LINUX_ARCHWANT_MMAP2PGOFF) 371 /* 372 * For architectures with sizeof (off_t) < sizeof (loff_t) mmap is 373 * implemented with mmap2 syscall and the offset is represented in 374 * multiples of page size. 375 */ 376 return (linux_mmap_common(td, PTROUT(uap->addr), uap->len, uap->prot, 377 uap->flags, uap->fd, (uint64_t)(uint32_t)uap->pgoff * PAGE_SIZE)); 378 #else 379 return (linux_mmap_common(td, PTROUT(uap->addr), uap->len, uap->prot, 380 uap->flags, uap->fd, uap->pgoff)); 381 #endif 382 } 383 384 #ifdef LINUX_LEGACY_SYSCALLS 385 int 386 linux_time(struct thread *td, struct linux_time_args *args) 387 { 388 struct timeval tv; 389 l_time_t tm; 390 int error; 391 392 microtime(&tv); 393 tm = tv.tv_sec; 394 if (args->tm && (error = copyout(&tm, args->tm, sizeof(tm)))) 395 return (error); 396 td->td_retval[0] = tm; 397 return (0); 398 } 399 #endif 400 401 struct l_times_argv { 402 l_clock_t tms_utime; 403 l_clock_t tms_stime; 404 l_clock_t tms_cutime; 405 l_clock_t tms_cstime; 406 }; 407 408 /* 409 * Glibc versions prior to 2.2.1 always use hard-coded CLK_TCK value. 410 * Since 2.2.1 Glibc uses value exported from kernel via AT_CLKTCK 411 * auxiliary vector entry. 412 */ 413 #define CLK_TCK 100 414 415 #define CONVOTCK(r) (r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK)) 416 #define CONVNTCK(r) (r.tv_sec * stclohz + r.tv_usec / (1000000 / stclohz)) 417 418 #define CONVTCK(r) (linux_kernver(td) >= LINUX_KERNVER(2,4,0) ? \ 419 CONVNTCK(r) : CONVOTCK(r)) 420 421 int 422 linux_times(struct thread *td, struct linux_times_args *args) 423 { 424 struct timeval tv, utime, stime, cutime, cstime; 425 struct l_times_argv tms; 426 struct proc *p; 427 int error; 428 429 if (args->buf != NULL) { 430 p = td->td_proc; 431 PROC_LOCK(p); 432 PROC_STATLOCK(p); 433 calcru(p, &utime, &stime); 434 PROC_STATUNLOCK(p); 435 calccru(p, &cutime, &cstime); 436 PROC_UNLOCK(p); 437 438 tms.tms_utime = CONVTCK(utime); 439 tms.tms_stime = CONVTCK(stime); 440 441 tms.tms_cutime = CONVTCK(cutime); 442 tms.tms_cstime = CONVTCK(cstime); 443 444 if ((error = copyout(&tms, args->buf, sizeof(tms)))) 445 return (error); 446 } 447 448 microuptime(&tv); 449 td->td_retval[0] = (int)CONVTCK(tv); 450 return (0); 451 } 452 453 int 454 linux_newuname(struct thread *td, struct linux_newuname_args *args) 455 { 456 struct l_new_utsname utsname; 457 char osname[LINUX_MAX_UTSNAME]; 458 char osrelease[LINUX_MAX_UTSNAME]; 459 char *p; 460 461 linux_get_osname(td, osname); 462 linux_get_osrelease(td, osrelease); 463 464 bzero(&utsname, sizeof(utsname)); 465 strlcpy(utsname.sysname, osname, LINUX_MAX_UTSNAME); 466 getcredhostname(td->td_ucred, utsname.nodename, LINUX_MAX_UTSNAME); 467 getcreddomainname(td->td_ucred, utsname.domainname, LINUX_MAX_UTSNAME); 468 strlcpy(utsname.release, osrelease, LINUX_MAX_UTSNAME); 469 strlcpy(utsname.version, version, LINUX_MAX_UTSNAME); 470 for (p = utsname.version; *p != '\0'; ++p) 471 if (*p == '\n') { 472 *p = '\0'; 473 break; 474 } 475 #if defined(__amd64__) 476 /* 477 * On amd64, Linux uname(2) needs to return "x86_64" 478 * for both 64-bit and 32-bit applications. On 32-bit, 479 * the string returned by getauxval(AT_PLATFORM) needs 480 * to remain "i686", though. 481 */ 482 #if defined(COMPAT_LINUX32) 483 if (linux32_emulate_i386) 484 strlcpy(utsname.machine, "i686", LINUX_MAX_UTSNAME); 485 else 486 #endif 487 strlcpy(utsname.machine, "x86_64", LINUX_MAX_UTSNAME); 488 #elif defined(__aarch64__) 489 strlcpy(utsname.machine, "aarch64", LINUX_MAX_UTSNAME); 490 #elif defined(__i386__) 491 strlcpy(utsname.machine, "i686", LINUX_MAX_UTSNAME); 492 #endif 493 494 return (copyout(&utsname, args->buf, sizeof(utsname))); 495 } 496 497 struct l_utimbuf { 498 l_time_t l_actime; 499 l_time_t l_modtime; 500 }; 501 502 #ifdef LINUX_LEGACY_SYSCALLS 503 int 504 linux_utime(struct thread *td, struct linux_utime_args *args) 505 { 506 struct timeval tv[2], *tvp; 507 struct l_utimbuf lut; 508 int error; 509 510 if (args->times) { 511 if ((error = copyin(args->times, &lut, sizeof lut)) != 0) 512 return (error); 513 tv[0].tv_sec = lut.l_actime; 514 tv[0].tv_usec = 0; 515 tv[1].tv_sec = lut.l_modtime; 516 tv[1].tv_usec = 0; 517 tvp = tv; 518 } else 519 tvp = NULL; 520 521 return (kern_utimesat(td, AT_FDCWD, args->fname, UIO_USERSPACE, 522 tvp, UIO_SYSSPACE)); 523 } 524 #endif 525 526 #ifdef LINUX_LEGACY_SYSCALLS 527 int 528 linux_utimes(struct thread *td, struct linux_utimes_args *args) 529 { 530 l_timeval ltv[2]; 531 struct timeval tv[2], *tvp = NULL; 532 int error; 533 534 if (args->tptr != NULL) { 535 if ((error = copyin(args->tptr, ltv, sizeof ltv)) != 0) 536 return (error); 537 tv[0].tv_sec = ltv[0].tv_sec; 538 tv[0].tv_usec = ltv[0].tv_usec; 539 tv[1].tv_sec = ltv[1].tv_sec; 540 tv[1].tv_usec = ltv[1].tv_usec; 541 tvp = tv; 542 } 543 544 return (kern_utimesat(td, AT_FDCWD, args->fname, UIO_USERSPACE, 545 tvp, UIO_SYSSPACE)); 546 } 547 #endif 548 549 static int 550 linux_utimensat_lts_to_ts(struct l_timespec *l_times, struct timespec *times) 551 { 552 553 if (l_times->tv_nsec != LINUX_UTIME_OMIT && 554 l_times->tv_nsec != LINUX_UTIME_NOW && 555 (l_times->tv_nsec < 0 || l_times->tv_nsec > 999999999)) 556 return (EINVAL); 557 558 times->tv_sec = l_times->tv_sec; 559 switch (l_times->tv_nsec) 560 { 561 case LINUX_UTIME_OMIT: 562 times->tv_nsec = UTIME_OMIT; 563 break; 564 case LINUX_UTIME_NOW: 565 times->tv_nsec = UTIME_NOW; 566 break; 567 default: 568 times->tv_nsec = l_times->tv_nsec; 569 } 570 571 return (0); 572 } 573 574 static int 575 linux_common_utimensat(struct thread *td, int ldfd, const char *pathname, 576 struct timespec *timesp, int lflags) 577 { 578 int dfd, flags = 0; 579 580 dfd = (ldfd == LINUX_AT_FDCWD) ? AT_FDCWD : ldfd; 581 582 if (lflags & ~(LINUX_AT_SYMLINK_NOFOLLOW | LINUX_AT_EMPTY_PATH)) 583 return (EINVAL); 584 585 if (timesp != NULL) { 586 /* This breaks POSIX, but is what the Linux kernel does 587 * _on purpose_ (documented in the man page for utimensat(2)), 588 * so we must follow that behaviour. */ 589 if (timesp[0].tv_nsec == UTIME_OMIT && 590 timesp[1].tv_nsec == UTIME_OMIT) 591 return (0); 592 } 593 594 if (lflags & LINUX_AT_SYMLINK_NOFOLLOW) 595 flags |= AT_SYMLINK_NOFOLLOW; 596 if (lflags & LINUX_AT_EMPTY_PATH) 597 flags |= AT_EMPTY_PATH; 598 599 if (pathname != NULL) 600 return (kern_utimensat(td, dfd, pathname, 601 UIO_USERSPACE, timesp, UIO_SYSSPACE, flags)); 602 603 if (lflags != 0) 604 return (EINVAL); 605 606 return (kern_futimens(td, dfd, timesp, UIO_SYSSPACE)); 607 } 608 609 int 610 linux_utimensat(struct thread *td, struct linux_utimensat_args *args) 611 { 612 struct l_timespec l_times[2]; 613 struct timespec times[2], *timesp; 614 int error; 615 616 if (args->times != NULL) { 617 error = copyin(args->times, l_times, sizeof(l_times)); 618 if (error != 0) 619 return (error); 620 621 error = linux_utimensat_lts_to_ts(&l_times[0], ×[0]); 622 if (error != 0) 623 return (error); 624 error = linux_utimensat_lts_to_ts(&l_times[1], ×[1]); 625 if (error != 0) 626 return (error); 627 timesp = times; 628 } else 629 timesp = NULL; 630 631 return (linux_common_utimensat(td, args->dfd, args->pathname, 632 timesp, args->flags)); 633 } 634 635 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 636 static int 637 linux_utimensat_lts64_to_ts(struct l_timespec64 *l_times, struct timespec *times) 638 { 639 640 /* Zero out the padding in compat mode. */ 641 l_times->tv_nsec &= 0xFFFFFFFFUL; 642 643 if (l_times->tv_nsec != LINUX_UTIME_OMIT && 644 l_times->tv_nsec != LINUX_UTIME_NOW && 645 (l_times->tv_nsec < 0 || l_times->tv_nsec > 999999999)) 646 return (EINVAL); 647 648 times->tv_sec = l_times->tv_sec; 649 switch (l_times->tv_nsec) 650 { 651 case LINUX_UTIME_OMIT: 652 times->tv_nsec = UTIME_OMIT; 653 break; 654 case LINUX_UTIME_NOW: 655 times->tv_nsec = UTIME_NOW; 656 break; 657 default: 658 times->tv_nsec = l_times->tv_nsec; 659 } 660 661 return (0); 662 } 663 664 int 665 linux_utimensat_time64(struct thread *td, struct linux_utimensat_time64_args *args) 666 { 667 struct l_timespec64 l_times[2]; 668 struct timespec times[2], *timesp; 669 int error; 670 671 if (args->times64 != NULL) { 672 error = copyin(args->times64, l_times, sizeof(l_times)); 673 if (error != 0) 674 return (error); 675 676 error = linux_utimensat_lts64_to_ts(&l_times[0], ×[0]); 677 if (error != 0) 678 return (error); 679 error = linux_utimensat_lts64_to_ts(&l_times[1], ×[1]); 680 if (error != 0) 681 return (error); 682 timesp = times; 683 } else 684 timesp = NULL; 685 686 return (linux_common_utimensat(td, args->dfd, args->pathname, 687 timesp, args->flags)); 688 } 689 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 690 691 #ifdef LINUX_LEGACY_SYSCALLS 692 int 693 linux_futimesat(struct thread *td, struct linux_futimesat_args *args) 694 { 695 l_timeval ltv[2]; 696 struct timeval tv[2], *tvp = NULL; 697 int error, dfd; 698 699 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; 700 701 if (args->utimes != NULL) { 702 if ((error = copyin(args->utimes, ltv, sizeof ltv)) != 0) 703 return (error); 704 tv[0].tv_sec = ltv[0].tv_sec; 705 tv[0].tv_usec = ltv[0].tv_usec; 706 tv[1].tv_sec = ltv[1].tv_sec; 707 tv[1].tv_usec = ltv[1].tv_usec; 708 tvp = tv; 709 } 710 711 return (kern_utimesat(td, dfd, args->filename, UIO_USERSPACE, 712 tvp, UIO_SYSSPACE)); 713 } 714 #endif 715 716 static int 717 linux_common_wait(struct thread *td, idtype_t idtype, int id, int *statusp, 718 int options, void *rup, l_siginfo_t *infop) 719 { 720 l_siginfo_t lsi; 721 siginfo_t siginfo; 722 struct __wrusage wru; 723 int error, status, tmpstat, sig; 724 725 error = kern_wait6(td, idtype, id, &status, options, 726 rup != NULL ? &wru : NULL, &siginfo); 727 728 if (error == 0 && statusp) { 729 tmpstat = status & 0xffff; 730 if (WIFSIGNALED(tmpstat)) { 731 tmpstat = (tmpstat & 0xffffff80) | 732 bsd_to_linux_signal(WTERMSIG(tmpstat)); 733 } else if (WIFSTOPPED(tmpstat)) { 734 tmpstat = (tmpstat & 0xffff00ff) | 735 (bsd_to_linux_signal(WSTOPSIG(tmpstat)) << 8); 736 #if defined(__aarch64__) || (defined(__amd64__) && !defined(COMPAT_LINUX32)) 737 if (WSTOPSIG(status) == SIGTRAP) { 738 tmpstat = linux_ptrace_status(td, 739 siginfo.si_pid, tmpstat); 740 } 741 #endif 742 } else if (WIFCONTINUED(tmpstat)) { 743 tmpstat = 0xffff; 744 } 745 error = copyout(&tmpstat, statusp, sizeof(int)); 746 } 747 if (error == 0 && rup != NULL) 748 error = linux_copyout_rusage(&wru.wru_self, rup); 749 if (error == 0 && infop != NULL && td->td_retval[0] != 0) { 750 sig = bsd_to_linux_signal(siginfo.si_signo); 751 siginfo_to_lsiginfo(&siginfo, &lsi, sig); 752 error = copyout(&lsi, infop, sizeof(lsi)); 753 } 754 755 return (error); 756 } 757 758 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 759 int 760 linux_waitpid(struct thread *td, struct linux_waitpid_args *args) 761 { 762 struct linux_wait4_args wait4_args = { 763 .pid = args->pid, 764 .status = args->status, 765 .options = args->options, 766 .rusage = NULL, 767 }; 768 769 return (linux_wait4(td, &wait4_args)); 770 } 771 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 772 773 int 774 linux_wait4(struct thread *td, struct linux_wait4_args *args) 775 { 776 struct proc *p; 777 int options, id, idtype; 778 779 if (args->options & ~(LINUX_WUNTRACED | LINUX_WNOHANG | 780 LINUX_WCONTINUED | __WCLONE | __WNOTHREAD | __WALL)) 781 return (EINVAL); 782 783 /* -INT_MIN is not defined. */ 784 if (args->pid == INT_MIN) 785 return (ESRCH); 786 787 options = 0; 788 linux_to_bsd_waitopts(args->options, &options); 789 790 /* 791 * For backward compatibility we implicitly add flags WEXITED 792 * and WTRAPPED here. 793 */ 794 options |= WEXITED | WTRAPPED; 795 796 if (args->pid == WAIT_ANY) { 797 idtype = P_ALL; 798 id = 0; 799 } else if (args->pid < 0) { 800 idtype = P_PGID; 801 id = (id_t)-args->pid; 802 } else if (args->pid == 0) { 803 idtype = P_PGID; 804 p = td->td_proc; 805 PROC_LOCK(p); 806 id = p->p_pgid; 807 PROC_UNLOCK(p); 808 } else { 809 idtype = P_PID; 810 id = (id_t)args->pid; 811 } 812 813 return (linux_common_wait(td, idtype, id, args->status, options, 814 args->rusage, NULL)); 815 } 816 817 int 818 linux_waitid(struct thread *td, struct linux_waitid_args *args) 819 { 820 idtype_t idtype; 821 int error, options; 822 struct proc *p; 823 pid_t id; 824 825 if (args->options & ~(LINUX_WNOHANG | LINUX_WNOWAIT | LINUX_WEXITED | 826 LINUX_WSTOPPED | LINUX_WCONTINUED | __WCLONE | __WNOTHREAD | __WALL)) 827 return (EINVAL); 828 829 options = 0; 830 linux_to_bsd_waitopts(args->options, &options); 831 832 id = args->id; 833 switch (args->idtype) { 834 case LINUX_P_ALL: 835 idtype = P_ALL; 836 break; 837 case LINUX_P_PID: 838 if (args->id <= 0) 839 return (EINVAL); 840 idtype = P_PID; 841 break; 842 case LINUX_P_PGID: 843 if (linux_kernver(td) >= LINUX_KERNVER(5,4,0) && args->id == 0) { 844 p = td->td_proc; 845 PROC_LOCK(p); 846 id = p->p_pgid; 847 PROC_UNLOCK(p); 848 } else if (args->id <= 0) 849 return (EINVAL); 850 idtype = P_PGID; 851 break; 852 case LINUX_P_PIDFD: 853 LINUX_RATELIMIT_MSG("unsupported waitid P_PIDFD idtype"); 854 return (ENOSYS); 855 default: 856 return (EINVAL); 857 } 858 859 error = linux_common_wait(td, idtype, id, NULL, options, 860 args->rusage, args->info); 861 td->td_retval[0] = 0; 862 863 return (error); 864 } 865 866 #ifdef LINUX_LEGACY_SYSCALLS 867 int 868 linux_mknod(struct thread *td, struct linux_mknod_args *args) 869 { 870 int error; 871 872 switch (args->mode & S_IFMT) { 873 case S_IFIFO: 874 case S_IFSOCK: 875 error = kern_mkfifoat(td, AT_FDCWD, args->path, UIO_USERSPACE, 876 args->mode); 877 break; 878 879 case S_IFCHR: 880 case S_IFBLK: 881 error = kern_mknodat(td, AT_FDCWD, args->path, UIO_USERSPACE, 882 args->mode, linux_decode_dev(args->dev)); 883 break; 884 885 case S_IFDIR: 886 error = EPERM; 887 break; 888 889 case 0: 890 args->mode |= S_IFREG; 891 /* FALLTHROUGH */ 892 case S_IFREG: 893 error = kern_openat(td, AT_FDCWD, args->path, UIO_USERSPACE, 894 O_WRONLY | O_CREAT | O_TRUNC, args->mode); 895 if (error == 0) 896 kern_close(td, td->td_retval[0]); 897 break; 898 899 default: 900 error = EINVAL; 901 break; 902 } 903 return (error); 904 } 905 #endif 906 907 int 908 linux_mknodat(struct thread *td, struct linux_mknodat_args *args) 909 { 910 int error, dfd; 911 912 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; 913 914 switch (args->mode & S_IFMT) { 915 case S_IFIFO: 916 case S_IFSOCK: 917 error = kern_mkfifoat(td, dfd, args->filename, UIO_USERSPACE, 918 args->mode); 919 break; 920 921 case S_IFCHR: 922 case S_IFBLK: 923 error = kern_mknodat(td, dfd, args->filename, UIO_USERSPACE, 924 args->mode, linux_decode_dev(args->dev)); 925 break; 926 927 case S_IFDIR: 928 error = EPERM; 929 break; 930 931 case 0: 932 args->mode |= S_IFREG; 933 /* FALLTHROUGH */ 934 case S_IFREG: 935 error = kern_openat(td, dfd, args->filename, UIO_USERSPACE, 936 O_WRONLY | O_CREAT | O_TRUNC, args->mode); 937 if (error == 0) 938 kern_close(td, td->td_retval[0]); 939 break; 940 941 default: 942 error = EINVAL; 943 break; 944 } 945 return (error); 946 } 947 948 /* 949 * UGH! This is just about the dumbest idea I've ever heard!! 950 */ 951 int 952 linux_personality(struct thread *td, struct linux_personality_args *args) 953 { 954 struct linux_pemuldata *pem; 955 struct proc *p = td->td_proc; 956 uint32_t old; 957 958 PROC_LOCK(p); 959 pem = pem_find(p); 960 old = pem->persona; 961 if (args->per != 0xffffffff) 962 pem->persona = args->per; 963 PROC_UNLOCK(p); 964 965 td->td_retval[0] = old; 966 return (0); 967 } 968 969 struct l_itimerval { 970 l_timeval it_interval; 971 l_timeval it_value; 972 }; 973 974 #define B2L_ITIMERVAL(bip, lip) \ 975 (bip)->it_interval.tv_sec = (lip)->it_interval.tv_sec; \ 976 (bip)->it_interval.tv_usec = (lip)->it_interval.tv_usec; \ 977 (bip)->it_value.tv_sec = (lip)->it_value.tv_sec; \ 978 (bip)->it_value.tv_usec = (lip)->it_value.tv_usec; 979 980 int 981 linux_setitimer(struct thread *td, struct linux_setitimer_args *uap) 982 { 983 int error; 984 struct l_itimerval ls; 985 struct itimerval aitv, oitv; 986 987 if (uap->itv == NULL) { 988 uap->itv = uap->oitv; 989 return (linux_getitimer(td, (struct linux_getitimer_args *)uap)); 990 } 991 992 error = copyin(uap->itv, &ls, sizeof(ls)); 993 if (error != 0) 994 return (error); 995 B2L_ITIMERVAL(&aitv, &ls); 996 error = kern_setitimer(td, uap->which, &aitv, &oitv); 997 if (error != 0 || uap->oitv == NULL) 998 return (error); 999 B2L_ITIMERVAL(&ls, &oitv); 1000 1001 return (copyout(&ls, uap->oitv, sizeof(ls))); 1002 } 1003 1004 int 1005 linux_getitimer(struct thread *td, struct linux_getitimer_args *uap) 1006 { 1007 int error; 1008 struct l_itimerval ls; 1009 struct itimerval aitv; 1010 1011 error = kern_getitimer(td, uap->which, &aitv); 1012 if (error != 0) 1013 return (error); 1014 B2L_ITIMERVAL(&ls, &aitv); 1015 return (copyout(&ls, uap->itv, sizeof(ls))); 1016 } 1017 1018 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 1019 int 1020 linux_nice(struct thread *td, struct linux_nice_args *args) 1021 { 1022 1023 return (kern_setpriority(td, PRIO_PROCESS, 0, args->inc)); 1024 } 1025 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 1026 1027 int 1028 linux_setgroups(struct thread *td, struct linux_setgroups_args *args) 1029 { 1030 struct ucred *newcred, *oldcred; 1031 l_gid_t *linux_gidset; 1032 gid_t *bsd_gidset; 1033 int ngrp, error; 1034 struct proc *p; 1035 1036 ngrp = args->gidsetsize; 1037 if (ngrp < 0 || ngrp >= ngroups_max + 1) 1038 return (EINVAL); 1039 linux_gidset = malloc(ngrp * sizeof(*linux_gidset), M_LINUX, M_WAITOK); 1040 error = copyin(args->grouplist, linux_gidset, ngrp * sizeof(l_gid_t)); 1041 if (error) 1042 goto out; 1043 newcred = crget(); 1044 crextend(newcred, ngrp + 1); 1045 p = td->td_proc; 1046 PROC_LOCK(p); 1047 oldcred = p->p_ucred; 1048 crcopy(newcred, oldcred); 1049 1050 /* 1051 * cr_groups[0] holds egid. Setting the whole set from 1052 * the supplied set will cause egid to be changed too. 1053 * Keep cr_groups[0] unchanged to prevent that. 1054 */ 1055 1056 if ((error = priv_check_cred(oldcred, PRIV_CRED_SETGROUPS)) != 0) { 1057 PROC_UNLOCK(p); 1058 crfree(newcred); 1059 goto out; 1060 } 1061 1062 if (ngrp > 0) { 1063 newcred->cr_ngroups = ngrp + 1; 1064 1065 bsd_gidset = newcred->cr_groups; 1066 ngrp--; 1067 while (ngrp >= 0) { 1068 bsd_gidset[ngrp + 1] = linux_gidset[ngrp]; 1069 ngrp--; 1070 } 1071 } else 1072 newcred->cr_ngroups = 1; 1073 1074 setsugid(p); 1075 proc_set_cred(p, newcred); 1076 PROC_UNLOCK(p); 1077 crfree(oldcred); 1078 error = 0; 1079 out: 1080 free(linux_gidset, M_LINUX); 1081 return (error); 1082 } 1083 1084 int 1085 linux_getgroups(struct thread *td, struct linux_getgroups_args *args) 1086 { 1087 struct ucred *cred; 1088 l_gid_t *linux_gidset; 1089 gid_t *bsd_gidset; 1090 int bsd_gidsetsz, ngrp, error; 1091 1092 cred = td->td_ucred; 1093 bsd_gidset = cred->cr_groups; 1094 bsd_gidsetsz = cred->cr_ngroups - 1; 1095 1096 /* 1097 * cr_groups[0] holds egid. Returning the whole set 1098 * here will cause a duplicate. Exclude cr_groups[0] 1099 * to prevent that. 1100 */ 1101 1102 if ((ngrp = args->gidsetsize) == 0) { 1103 td->td_retval[0] = bsd_gidsetsz; 1104 return (0); 1105 } 1106 1107 if (ngrp < bsd_gidsetsz) 1108 return (EINVAL); 1109 1110 ngrp = 0; 1111 linux_gidset = malloc(bsd_gidsetsz * sizeof(*linux_gidset), 1112 M_LINUX, M_WAITOK); 1113 while (ngrp < bsd_gidsetsz) { 1114 linux_gidset[ngrp] = bsd_gidset[ngrp + 1]; 1115 ngrp++; 1116 } 1117 1118 error = copyout(linux_gidset, args->grouplist, ngrp * sizeof(l_gid_t)); 1119 free(linux_gidset, M_LINUX); 1120 if (error) 1121 return (error); 1122 1123 td->td_retval[0] = ngrp; 1124 return (0); 1125 } 1126 1127 static bool 1128 linux_get_dummy_limit(struct thread *td, l_uint resource, struct rlimit *rlim) 1129 { 1130 ssize_t size; 1131 int res, error; 1132 1133 if (linux_dummy_rlimits == 0) 1134 return (false); 1135 1136 switch (resource) { 1137 case LINUX_RLIMIT_LOCKS: 1138 case LINUX_RLIMIT_RTTIME: 1139 rlim->rlim_cur = LINUX_RLIM_INFINITY; 1140 rlim->rlim_max = LINUX_RLIM_INFINITY; 1141 return (true); 1142 case LINUX_RLIMIT_NICE: 1143 case LINUX_RLIMIT_RTPRIO: 1144 rlim->rlim_cur = 0; 1145 rlim->rlim_max = 0; 1146 return (true); 1147 case LINUX_RLIMIT_SIGPENDING: 1148 error = kernel_sysctlbyname(td, 1149 "kern.sigqueue.max_pending_per_proc", 1150 &res, &size, 0, 0, 0, 0); 1151 if (error != 0) 1152 return (false); 1153 rlim->rlim_cur = res; 1154 rlim->rlim_max = res; 1155 return (true); 1156 case LINUX_RLIMIT_MSGQUEUE: 1157 error = kernel_sysctlbyname(td, 1158 "kern.ipc.msgmnb", &res, &size, 0, 0, 0, 0); 1159 if (error != 0) 1160 return (false); 1161 rlim->rlim_cur = res; 1162 rlim->rlim_max = res; 1163 return (true); 1164 default: 1165 return (false); 1166 } 1167 } 1168 1169 int 1170 linux_setrlimit(struct thread *td, struct linux_setrlimit_args *args) 1171 { 1172 struct rlimit bsd_rlim; 1173 struct l_rlimit rlim; 1174 u_int which; 1175 int error; 1176 1177 if (args->resource >= LINUX_RLIM_NLIMITS) 1178 return (EINVAL); 1179 1180 which = linux_to_bsd_resource[args->resource]; 1181 if (which == -1) 1182 return (EINVAL); 1183 1184 error = copyin(args->rlim, &rlim, sizeof(rlim)); 1185 if (error) 1186 return (error); 1187 1188 bsd_rlim.rlim_cur = (rlim_t)rlim.rlim_cur; 1189 bsd_rlim.rlim_max = (rlim_t)rlim.rlim_max; 1190 return (kern_setrlimit(td, which, &bsd_rlim)); 1191 } 1192 1193 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 1194 int 1195 linux_old_getrlimit(struct thread *td, struct linux_old_getrlimit_args *args) 1196 { 1197 struct l_rlimit rlim; 1198 struct rlimit bsd_rlim; 1199 u_int which; 1200 1201 if (linux_get_dummy_limit(td, args->resource, &bsd_rlim)) { 1202 rlim.rlim_cur = bsd_rlim.rlim_cur; 1203 rlim.rlim_max = bsd_rlim.rlim_max; 1204 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1205 } 1206 1207 if (args->resource >= LINUX_RLIM_NLIMITS) 1208 return (EINVAL); 1209 1210 which = linux_to_bsd_resource[args->resource]; 1211 if (which == -1) 1212 return (EINVAL); 1213 1214 lim_rlimit(td, which, &bsd_rlim); 1215 1216 #ifdef COMPAT_LINUX32 1217 rlim.rlim_cur = (unsigned int)bsd_rlim.rlim_cur; 1218 if (rlim.rlim_cur == UINT_MAX) 1219 rlim.rlim_cur = INT_MAX; 1220 rlim.rlim_max = (unsigned int)bsd_rlim.rlim_max; 1221 if (rlim.rlim_max == UINT_MAX) 1222 rlim.rlim_max = INT_MAX; 1223 #else 1224 rlim.rlim_cur = (unsigned long)bsd_rlim.rlim_cur; 1225 if (rlim.rlim_cur == ULONG_MAX) 1226 rlim.rlim_cur = LONG_MAX; 1227 rlim.rlim_max = (unsigned long)bsd_rlim.rlim_max; 1228 if (rlim.rlim_max == ULONG_MAX) 1229 rlim.rlim_max = LONG_MAX; 1230 #endif 1231 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1232 } 1233 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 1234 1235 int 1236 linux_getrlimit(struct thread *td, struct linux_getrlimit_args *args) 1237 { 1238 struct l_rlimit rlim; 1239 struct rlimit bsd_rlim; 1240 u_int which; 1241 1242 if (linux_get_dummy_limit(td, args->resource, &bsd_rlim)) { 1243 rlim.rlim_cur = bsd_rlim.rlim_cur; 1244 rlim.rlim_max = bsd_rlim.rlim_max; 1245 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1246 } 1247 1248 if (args->resource >= LINUX_RLIM_NLIMITS) 1249 return (EINVAL); 1250 1251 which = linux_to_bsd_resource[args->resource]; 1252 if (which == -1) 1253 return (EINVAL); 1254 1255 lim_rlimit(td, which, &bsd_rlim); 1256 1257 rlim.rlim_cur = (l_ulong)bsd_rlim.rlim_cur; 1258 rlim.rlim_max = (l_ulong)bsd_rlim.rlim_max; 1259 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1260 } 1261 1262 int 1263 linux_sched_setscheduler(struct thread *td, 1264 struct linux_sched_setscheduler_args *args) 1265 { 1266 struct sched_param sched_param; 1267 struct thread *tdt; 1268 int error, policy; 1269 1270 switch (args->policy) { 1271 case LINUX_SCHED_OTHER: 1272 policy = SCHED_OTHER; 1273 break; 1274 case LINUX_SCHED_FIFO: 1275 policy = SCHED_FIFO; 1276 break; 1277 case LINUX_SCHED_RR: 1278 policy = SCHED_RR; 1279 break; 1280 default: 1281 return (EINVAL); 1282 } 1283 1284 error = copyin(args->param, &sched_param, sizeof(sched_param)); 1285 if (error) 1286 return (error); 1287 1288 if (linux_map_sched_prio) { 1289 switch (policy) { 1290 case SCHED_OTHER: 1291 if (sched_param.sched_priority != 0) 1292 return (EINVAL); 1293 1294 sched_param.sched_priority = 1295 PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE; 1296 break; 1297 case SCHED_FIFO: 1298 case SCHED_RR: 1299 if (sched_param.sched_priority < 1 || 1300 sched_param.sched_priority >= LINUX_MAX_RT_PRIO) 1301 return (EINVAL); 1302 1303 /* 1304 * Map [1, LINUX_MAX_RT_PRIO - 1] to 1305 * [0, RTP_PRIO_MAX - RTP_PRIO_MIN] (rounding down). 1306 */ 1307 sched_param.sched_priority = 1308 (sched_param.sched_priority - 1) * 1309 (RTP_PRIO_MAX - RTP_PRIO_MIN + 1) / 1310 (LINUX_MAX_RT_PRIO - 1); 1311 break; 1312 } 1313 } 1314 1315 tdt = linux_tdfind(td, args->pid, -1); 1316 if (tdt == NULL) 1317 return (ESRCH); 1318 1319 error = kern_sched_setscheduler(td, tdt, policy, &sched_param); 1320 PROC_UNLOCK(tdt->td_proc); 1321 return (error); 1322 } 1323 1324 int 1325 linux_sched_getscheduler(struct thread *td, 1326 struct linux_sched_getscheduler_args *args) 1327 { 1328 struct thread *tdt; 1329 int error, policy; 1330 1331 tdt = linux_tdfind(td, args->pid, -1); 1332 if (tdt == NULL) 1333 return (ESRCH); 1334 1335 error = kern_sched_getscheduler(td, tdt, &policy); 1336 PROC_UNLOCK(tdt->td_proc); 1337 1338 switch (policy) { 1339 case SCHED_OTHER: 1340 td->td_retval[0] = LINUX_SCHED_OTHER; 1341 break; 1342 case SCHED_FIFO: 1343 td->td_retval[0] = LINUX_SCHED_FIFO; 1344 break; 1345 case SCHED_RR: 1346 td->td_retval[0] = LINUX_SCHED_RR; 1347 break; 1348 } 1349 return (error); 1350 } 1351 1352 int 1353 linux_sched_get_priority_max(struct thread *td, 1354 struct linux_sched_get_priority_max_args *args) 1355 { 1356 struct sched_get_priority_max_args bsd; 1357 1358 if (linux_map_sched_prio) { 1359 switch (args->policy) { 1360 case LINUX_SCHED_OTHER: 1361 td->td_retval[0] = 0; 1362 return (0); 1363 case LINUX_SCHED_FIFO: 1364 case LINUX_SCHED_RR: 1365 td->td_retval[0] = LINUX_MAX_RT_PRIO - 1; 1366 return (0); 1367 default: 1368 return (EINVAL); 1369 } 1370 } 1371 1372 switch (args->policy) { 1373 case LINUX_SCHED_OTHER: 1374 bsd.policy = SCHED_OTHER; 1375 break; 1376 case LINUX_SCHED_FIFO: 1377 bsd.policy = SCHED_FIFO; 1378 break; 1379 case LINUX_SCHED_RR: 1380 bsd.policy = SCHED_RR; 1381 break; 1382 default: 1383 return (EINVAL); 1384 } 1385 return (sys_sched_get_priority_max(td, &bsd)); 1386 } 1387 1388 int 1389 linux_sched_get_priority_min(struct thread *td, 1390 struct linux_sched_get_priority_min_args *args) 1391 { 1392 struct sched_get_priority_min_args bsd; 1393 1394 if (linux_map_sched_prio) { 1395 switch (args->policy) { 1396 case LINUX_SCHED_OTHER: 1397 td->td_retval[0] = 0; 1398 return (0); 1399 case LINUX_SCHED_FIFO: 1400 case LINUX_SCHED_RR: 1401 td->td_retval[0] = 1; 1402 return (0); 1403 default: 1404 return (EINVAL); 1405 } 1406 } 1407 1408 switch (args->policy) { 1409 case LINUX_SCHED_OTHER: 1410 bsd.policy = SCHED_OTHER; 1411 break; 1412 case LINUX_SCHED_FIFO: 1413 bsd.policy = SCHED_FIFO; 1414 break; 1415 case LINUX_SCHED_RR: 1416 bsd.policy = SCHED_RR; 1417 break; 1418 default: 1419 return (EINVAL); 1420 } 1421 return (sys_sched_get_priority_min(td, &bsd)); 1422 } 1423 1424 #define REBOOT_CAD_ON 0x89abcdef 1425 #define REBOOT_CAD_OFF 0 1426 #define REBOOT_HALT 0xcdef0123 1427 #define REBOOT_RESTART 0x01234567 1428 #define REBOOT_RESTART2 0xA1B2C3D4 1429 #define REBOOT_POWEROFF 0x4321FEDC 1430 #define REBOOT_MAGIC1 0xfee1dead 1431 #define REBOOT_MAGIC2 0x28121969 1432 #define REBOOT_MAGIC2A 0x05121996 1433 #define REBOOT_MAGIC2B 0x16041998 1434 1435 int 1436 linux_reboot(struct thread *td, struct linux_reboot_args *args) 1437 { 1438 struct reboot_args bsd_args; 1439 1440 if (args->magic1 != REBOOT_MAGIC1) 1441 return (EINVAL); 1442 1443 switch (args->magic2) { 1444 case REBOOT_MAGIC2: 1445 case REBOOT_MAGIC2A: 1446 case REBOOT_MAGIC2B: 1447 break; 1448 default: 1449 return (EINVAL); 1450 } 1451 1452 switch (args->cmd) { 1453 case REBOOT_CAD_ON: 1454 case REBOOT_CAD_OFF: 1455 return (priv_check(td, PRIV_REBOOT)); 1456 case REBOOT_HALT: 1457 bsd_args.opt = RB_HALT; 1458 break; 1459 case REBOOT_RESTART: 1460 case REBOOT_RESTART2: 1461 bsd_args.opt = 0; 1462 break; 1463 case REBOOT_POWEROFF: 1464 bsd_args.opt = RB_POWEROFF; 1465 break; 1466 default: 1467 return (EINVAL); 1468 } 1469 return (sys_reboot(td, &bsd_args)); 1470 } 1471 1472 int 1473 linux_getpid(struct thread *td, struct linux_getpid_args *args) 1474 { 1475 1476 td->td_retval[0] = td->td_proc->p_pid; 1477 1478 return (0); 1479 } 1480 1481 int 1482 linux_gettid(struct thread *td, struct linux_gettid_args *args) 1483 { 1484 struct linux_emuldata *em; 1485 1486 em = em_find(td); 1487 KASSERT(em != NULL, ("gettid: emuldata not found.\n")); 1488 1489 td->td_retval[0] = em->em_tid; 1490 1491 return (0); 1492 } 1493 1494 int 1495 linux_getppid(struct thread *td, struct linux_getppid_args *args) 1496 { 1497 1498 td->td_retval[0] = kern_getppid(td); 1499 return (0); 1500 } 1501 1502 int 1503 linux_getgid(struct thread *td, struct linux_getgid_args *args) 1504 { 1505 1506 td->td_retval[0] = td->td_ucred->cr_rgid; 1507 return (0); 1508 } 1509 1510 int 1511 linux_getuid(struct thread *td, struct linux_getuid_args *args) 1512 { 1513 1514 td->td_retval[0] = td->td_ucred->cr_ruid; 1515 return (0); 1516 } 1517 1518 int 1519 linux_getsid(struct thread *td, struct linux_getsid_args *args) 1520 { 1521 1522 return (kern_getsid(td, args->pid)); 1523 } 1524 1525 int 1526 linux_getpriority(struct thread *td, struct linux_getpriority_args *args) 1527 { 1528 int error; 1529 1530 error = kern_getpriority(td, args->which, args->who); 1531 td->td_retval[0] = 20 - td->td_retval[0]; 1532 return (error); 1533 } 1534 1535 int 1536 linux_sethostname(struct thread *td, struct linux_sethostname_args *args) 1537 { 1538 int name[2]; 1539 1540 name[0] = CTL_KERN; 1541 name[1] = KERN_HOSTNAME; 1542 return (userland_sysctl(td, name, 2, 0, 0, 0, args->hostname, 1543 args->len, 0, 0)); 1544 } 1545 1546 int 1547 linux_setdomainname(struct thread *td, struct linux_setdomainname_args *args) 1548 { 1549 int name[2]; 1550 1551 name[0] = CTL_KERN; 1552 name[1] = KERN_NISDOMAINNAME; 1553 return (userland_sysctl(td, name, 2, 0, 0, 0, args->name, 1554 args->len, 0, 0)); 1555 } 1556 1557 int 1558 linux_exit_group(struct thread *td, struct linux_exit_group_args *args) 1559 { 1560 1561 LINUX_CTR2(exit_group, "thread(%d) (%d)", td->td_tid, 1562 args->error_code); 1563 1564 /* 1565 * XXX: we should send a signal to the parent if 1566 * SIGNAL_EXIT_GROUP is set. We ignore that (temporarily?) 1567 * as it doesnt occur often. 1568 */ 1569 exit1(td, args->error_code, 0); 1570 /* NOTREACHED */ 1571 } 1572 1573 #define _LINUX_CAPABILITY_VERSION_1 0x19980330 1574 #define _LINUX_CAPABILITY_VERSION_2 0x20071026 1575 #define _LINUX_CAPABILITY_VERSION_3 0x20080522 1576 1577 struct l_user_cap_header { 1578 l_int version; 1579 l_int pid; 1580 }; 1581 1582 struct l_user_cap_data { 1583 l_int effective; 1584 l_int permitted; 1585 l_int inheritable; 1586 }; 1587 1588 int 1589 linux_capget(struct thread *td, struct linux_capget_args *uap) 1590 { 1591 struct l_user_cap_header luch; 1592 struct l_user_cap_data lucd[2]; 1593 int error, u32s; 1594 1595 if (uap->hdrp == NULL) 1596 return (EFAULT); 1597 1598 error = copyin(uap->hdrp, &luch, sizeof(luch)); 1599 if (error != 0) 1600 return (error); 1601 1602 switch (luch.version) { 1603 case _LINUX_CAPABILITY_VERSION_1: 1604 u32s = 1; 1605 break; 1606 case _LINUX_CAPABILITY_VERSION_2: 1607 case _LINUX_CAPABILITY_VERSION_3: 1608 u32s = 2; 1609 break; 1610 default: 1611 luch.version = _LINUX_CAPABILITY_VERSION_1; 1612 error = copyout(&luch, uap->hdrp, sizeof(luch)); 1613 if (error) 1614 return (error); 1615 return (EINVAL); 1616 } 1617 1618 if (luch.pid) 1619 return (EPERM); 1620 1621 if (uap->datap) { 1622 /* 1623 * The current implementation doesn't support setting 1624 * a capability (it's essentially a stub) so indicate 1625 * that no capabilities are currently set or available 1626 * to request. 1627 */ 1628 memset(&lucd, 0, u32s * sizeof(lucd[0])); 1629 error = copyout(&lucd, uap->datap, u32s * sizeof(lucd[0])); 1630 } 1631 1632 return (error); 1633 } 1634 1635 int 1636 linux_capset(struct thread *td, struct linux_capset_args *uap) 1637 { 1638 struct l_user_cap_header luch; 1639 struct l_user_cap_data lucd[2]; 1640 int error, i, u32s; 1641 1642 if (uap->hdrp == NULL || uap->datap == NULL) 1643 return (EFAULT); 1644 1645 error = copyin(uap->hdrp, &luch, sizeof(luch)); 1646 if (error != 0) 1647 return (error); 1648 1649 switch (luch.version) { 1650 case _LINUX_CAPABILITY_VERSION_1: 1651 u32s = 1; 1652 break; 1653 case _LINUX_CAPABILITY_VERSION_2: 1654 case _LINUX_CAPABILITY_VERSION_3: 1655 u32s = 2; 1656 break; 1657 default: 1658 luch.version = _LINUX_CAPABILITY_VERSION_1; 1659 error = copyout(&luch, uap->hdrp, sizeof(luch)); 1660 if (error) 1661 return (error); 1662 return (EINVAL); 1663 } 1664 1665 if (luch.pid) 1666 return (EPERM); 1667 1668 error = copyin(uap->datap, &lucd, u32s * sizeof(lucd[0])); 1669 if (error != 0) 1670 return (error); 1671 1672 /* We currently don't support setting any capabilities. */ 1673 for (i = 0; i < u32s; i++) { 1674 if (lucd[i].effective || lucd[i].permitted || 1675 lucd[i].inheritable) { 1676 linux_msg(td, 1677 "capset[%d] effective=0x%x, permitted=0x%x, " 1678 "inheritable=0x%x is not implemented", i, 1679 (int)lucd[i].effective, (int)lucd[i].permitted, 1680 (int)lucd[i].inheritable); 1681 return (EPERM); 1682 } 1683 } 1684 1685 return (0); 1686 } 1687 1688 int 1689 linux_prctl(struct thread *td, struct linux_prctl_args *args) 1690 { 1691 int error = 0, max_size, arg; 1692 struct proc *p = td->td_proc; 1693 char comm[LINUX_MAX_COMM_LEN]; 1694 int pdeath_signal, trace_state; 1695 1696 switch (args->option) { 1697 case LINUX_PR_SET_PDEATHSIG: 1698 if (!LINUX_SIG_VALID(args->arg2)) 1699 return (EINVAL); 1700 pdeath_signal = linux_to_bsd_signal(args->arg2); 1701 return (kern_procctl(td, P_PID, 0, PROC_PDEATHSIG_CTL, 1702 &pdeath_signal)); 1703 case LINUX_PR_GET_PDEATHSIG: 1704 error = kern_procctl(td, P_PID, 0, PROC_PDEATHSIG_STATUS, 1705 &pdeath_signal); 1706 if (error != 0) 1707 return (error); 1708 pdeath_signal = bsd_to_linux_signal(pdeath_signal); 1709 return (copyout(&pdeath_signal, 1710 (void *)(register_t)args->arg2, 1711 sizeof(pdeath_signal))); 1712 /* 1713 * In Linux, this flag controls if set[gu]id processes can coredump. 1714 * There are additional semantics imposed on processes that cannot 1715 * coredump: 1716 * - Such processes can not be ptraced. 1717 * - There are some semantics around ownership of process-related files 1718 * in the /proc namespace. 1719 * 1720 * In FreeBSD, we can (and by default, do) disable setuid coredump 1721 * system-wide with 'sugid_coredump.' We control tracability on a 1722 * per-process basis with the procctl PROC_TRACE (=> P2_NOTRACE flag). 1723 * By happy coincidence, P2_NOTRACE also prevents coredumping. So the 1724 * procctl is roughly analogous to Linux's DUMPABLE. 1725 * 1726 * So, proxy these knobs to the corresponding PROC_TRACE setting. 1727 */ 1728 case LINUX_PR_GET_DUMPABLE: 1729 error = kern_procctl(td, P_PID, p->p_pid, PROC_TRACE_STATUS, 1730 &trace_state); 1731 if (error != 0) 1732 return (error); 1733 td->td_retval[0] = (trace_state != -1); 1734 return (0); 1735 case LINUX_PR_SET_DUMPABLE: 1736 /* 1737 * It is only valid for userspace to set one of these two 1738 * flags, and only one at a time. 1739 */ 1740 switch (args->arg2) { 1741 case LINUX_SUID_DUMP_DISABLE: 1742 trace_state = PROC_TRACE_CTL_DISABLE_EXEC; 1743 break; 1744 case LINUX_SUID_DUMP_USER: 1745 trace_state = PROC_TRACE_CTL_ENABLE; 1746 break; 1747 default: 1748 return (EINVAL); 1749 } 1750 return (kern_procctl(td, P_PID, p->p_pid, PROC_TRACE_CTL, 1751 &trace_state)); 1752 case LINUX_PR_GET_KEEPCAPS: 1753 /* 1754 * Indicate that we always clear the effective and 1755 * permitted capability sets when the user id becomes 1756 * non-zero (actually the capability sets are simply 1757 * always zero in the current implementation). 1758 */ 1759 td->td_retval[0] = 0; 1760 break; 1761 case LINUX_PR_SET_KEEPCAPS: 1762 /* 1763 * Ignore requests to keep the effective and permitted 1764 * capability sets when the user id becomes non-zero. 1765 */ 1766 break; 1767 case LINUX_PR_SET_NAME: 1768 /* 1769 * To be on the safe side we need to make sure to not 1770 * overflow the size a Linux program expects. We already 1771 * do this here in the copyin, so that we don't need to 1772 * check on copyout. 1773 */ 1774 max_size = MIN(sizeof(comm), sizeof(p->p_comm)); 1775 error = copyinstr((void *)(register_t)args->arg2, comm, 1776 max_size, NULL); 1777 1778 /* Linux silently truncates the name if it is too long. */ 1779 if (error == ENAMETOOLONG) { 1780 /* 1781 * XXX: copyinstr() isn't documented to populate the 1782 * array completely, so do a copyin() to be on the 1783 * safe side. This should be changed in case 1784 * copyinstr() is changed to guarantee this. 1785 */ 1786 error = copyin((void *)(register_t)args->arg2, comm, 1787 max_size - 1); 1788 comm[max_size - 1] = '\0'; 1789 } 1790 if (error) 1791 return (error); 1792 1793 PROC_LOCK(p); 1794 strlcpy(p->p_comm, comm, sizeof(p->p_comm)); 1795 PROC_UNLOCK(p); 1796 break; 1797 case LINUX_PR_GET_NAME: 1798 PROC_LOCK(p); 1799 strlcpy(comm, p->p_comm, sizeof(comm)); 1800 PROC_UNLOCK(p); 1801 error = copyout(comm, (void *)(register_t)args->arg2, 1802 strlen(comm) + 1); 1803 break; 1804 case LINUX_PR_GET_SECCOMP: 1805 case LINUX_PR_SET_SECCOMP: 1806 /* 1807 * Same as returned by Linux without CONFIG_SECCOMP enabled. 1808 */ 1809 error = EINVAL; 1810 break; 1811 case LINUX_PR_CAPBSET_READ: 1812 #if 0 1813 /* 1814 * This makes too much noise with Ubuntu Focal. 1815 */ 1816 linux_msg(td, "unsupported prctl PR_CAPBSET_READ %d", 1817 (int)args->arg2); 1818 #endif 1819 error = EINVAL; 1820 break; 1821 case LINUX_PR_SET_NO_NEW_PRIVS: 1822 arg = args->arg2 == 1 ? 1823 PROC_NO_NEW_PRIVS_ENABLE : PROC_NO_NEW_PRIVS_DISABLE; 1824 error = kern_procctl(td, P_PID, p->p_pid, 1825 PROC_NO_NEW_PRIVS_CTL, &arg); 1826 break; 1827 case LINUX_PR_SET_PTRACER: 1828 linux_msg(td, "unsupported prctl PR_SET_PTRACER"); 1829 error = EINVAL; 1830 break; 1831 default: 1832 linux_msg(td, "unsupported prctl option %d", args->option); 1833 error = EINVAL; 1834 break; 1835 } 1836 1837 return (error); 1838 } 1839 1840 int 1841 linux_sched_setparam(struct thread *td, 1842 struct linux_sched_setparam_args *uap) 1843 { 1844 struct sched_param sched_param; 1845 struct thread *tdt; 1846 int error, policy; 1847 1848 error = copyin(uap->param, &sched_param, sizeof(sched_param)); 1849 if (error) 1850 return (error); 1851 1852 tdt = linux_tdfind(td, uap->pid, -1); 1853 if (tdt == NULL) 1854 return (ESRCH); 1855 1856 if (linux_map_sched_prio) { 1857 error = kern_sched_getscheduler(td, tdt, &policy); 1858 if (error) 1859 goto out; 1860 1861 switch (policy) { 1862 case SCHED_OTHER: 1863 if (sched_param.sched_priority != 0) { 1864 error = EINVAL; 1865 goto out; 1866 } 1867 sched_param.sched_priority = 1868 PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE; 1869 break; 1870 case SCHED_FIFO: 1871 case SCHED_RR: 1872 if (sched_param.sched_priority < 1 || 1873 sched_param.sched_priority >= LINUX_MAX_RT_PRIO) { 1874 error = EINVAL; 1875 goto out; 1876 } 1877 /* 1878 * Map [1, LINUX_MAX_RT_PRIO - 1] to 1879 * [0, RTP_PRIO_MAX - RTP_PRIO_MIN] (rounding down). 1880 */ 1881 sched_param.sched_priority = 1882 (sched_param.sched_priority - 1) * 1883 (RTP_PRIO_MAX - RTP_PRIO_MIN + 1) / 1884 (LINUX_MAX_RT_PRIO - 1); 1885 break; 1886 } 1887 } 1888 1889 error = kern_sched_setparam(td, tdt, &sched_param); 1890 out: PROC_UNLOCK(tdt->td_proc); 1891 return (error); 1892 } 1893 1894 int 1895 linux_sched_getparam(struct thread *td, 1896 struct linux_sched_getparam_args *uap) 1897 { 1898 struct sched_param sched_param; 1899 struct thread *tdt; 1900 int error, policy; 1901 1902 tdt = linux_tdfind(td, uap->pid, -1); 1903 if (tdt == NULL) 1904 return (ESRCH); 1905 1906 error = kern_sched_getparam(td, tdt, &sched_param); 1907 if (error) { 1908 PROC_UNLOCK(tdt->td_proc); 1909 return (error); 1910 } 1911 1912 if (linux_map_sched_prio) { 1913 error = kern_sched_getscheduler(td, tdt, &policy); 1914 PROC_UNLOCK(tdt->td_proc); 1915 if (error) 1916 return (error); 1917 1918 switch (policy) { 1919 case SCHED_OTHER: 1920 sched_param.sched_priority = 0; 1921 break; 1922 case SCHED_FIFO: 1923 case SCHED_RR: 1924 /* 1925 * Map [0, RTP_PRIO_MAX - RTP_PRIO_MIN] to 1926 * [1, LINUX_MAX_RT_PRIO - 1] (rounding up). 1927 */ 1928 sched_param.sched_priority = 1929 (sched_param.sched_priority * 1930 (LINUX_MAX_RT_PRIO - 1) + 1931 (RTP_PRIO_MAX - RTP_PRIO_MIN - 1)) / 1932 (RTP_PRIO_MAX - RTP_PRIO_MIN) + 1; 1933 break; 1934 } 1935 } else 1936 PROC_UNLOCK(tdt->td_proc); 1937 1938 error = copyout(&sched_param, uap->param, sizeof(sched_param)); 1939 return (error); 1940 } 1941 1942 /* 1943 * Get affinity of a process. 1944 */ 1945 int 1946 linux_sched_getaffinity(struct thread *td, 1947 struct linux_sched_getaffinity_args *args) 1948 { 1949 struct thread *tdt; 1950 cpuset_t *mask; 1951 size_t size; 1952 int error; 1953 id_t tid; 1954 1955 tdt = linux_tdfind(td, args->pid, -1); 1956 if (tdt == NULL) 1957 return (ESRCH); 1958 tid = tdt->td_tid; 1959 PROC_UNLOCK(tdt->td_proc); 1960 1961 mask = malloc(sizeof(cpuset_t), M_LINUX, M_WAITOK | M_ZERO); 1962 size = min(args->len, sizeof(cpuset_t)); 1963 error = kern_cpuset_getaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID, 1964 tid, size, mask); 1965 if (error == ERANGE) 1966 error = EINVAL; 1967 if (error == 0) 1968 error = copyout(mask, args->user_mask_ptr, size); 1969 if (error == 0) 1970 td->td_retval[0] = size; 1971 free(mask, M_LINUX); 1972 return (error); 1973 } 1974 1975 /* 1976 * Set affinity of a process. 1977 */ 1978 int 1979 linux_sched_setaffinity(struct thread *td, 1980 struct linux_sched_setaffinity_args *args) 1981 { 1982 struct thread *tdt; 1983 cpuset_t *mask; 1984 int cpu, error; 1985 size_t len; 1986 id_t tid; 1987 1988 tdt = linux_tdfind(td, args->pid, -1); 1989 if (tdt == NULL) 1990 return (ESRCH); 1991 tid = tdt->td_tid; 1992 PROC_UNLOCK(tdt->td_proc); 1993 1994 len = min(args->len, sizeof(cpuset_t)); 1995 mask = malloc(sizeof(cpuset_t), M_TEMP, M_WAITOK | M_ZERO); 1996 error = copyin(args->user_mask_ptr, mask, len); 1997 if (error != 0) 1998 goto out; 1999 /* Linux ignore high bits */ 2000 CPU_FOREACH_ISSET(cpu, mask) 2001 if (cpu > mp_maxid) 2002 CPU_CLR(cpu, mask); 2003 2004 error = kern_cpuset_setaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID, 2005 tid, mask); 2006 if (error == EDEADLK) 2007 error = EINVAL; 2008 out: 2009 free(mask, M_TEMP); 2010 return (error); 2011 } 2012 2013 struct linux_rlimit64 { 2014 uint64_t rlim_cur; 2015 uint64_t rlim_max; 2016 }; 2017 2018 int 2019 linux_prlimit64(struct thread *td, struct linux_prlimit64_args *args) 2020 { 2021 struct rlimit rlim, nrlim; 2022 struct linux_rlimit64 lrlim; 2023 struct proc *p; 2024 u_int which; 2025 int flags; 2026 int error; 2027 2028 if (args->new == NULL && args->old != NULL) { 2029 if (linux_get_dummy_limit(td, args->resource, &rlim)) { 2030 lrlim.rlim_cur = rlim.rlim_cur; 2031 lrlim.rlim_max = rlim.rlim_max; 2032 return (copyout(&lrlim, args->old, sizeof(lrlim))); 2033 } 2034 } 2035 2036 if (args->resource >= LINUX_RLIM_NLIMITS) 2037 return (EINVAL); 2038 2039 which = linux_to_bsd_resource[args->resource]; 2040 if (which == -1) 2041 return (EINVAL); 2042 2043 if (args->new != NULL) { 2044 /* 2045 * Note. Unlike FreeBSD where rlim is signed 64-bit Linux 2046 * rlim is unsigned 64-bit. FreeBSD treats negative limits 2047 * as INFINITY so we do not need a conversion even. 2048 */ 2049 error = copyin(args->new, &nrlim, sizeof(nrlim)); 2050 if (error != 0) 2051 return (error); 2052 } 2053 2054 flags = PGET_HOLD | PGET_NOTWEXIT; 2055 if (args->new != NULL) 2056 flags |= PGET_CANDEBUG; 2057 else 2058 flags |= PGET_CANSEE; 2059 if (args->pid == 0) { 2060 p = td->td_proc; 2061 PHOLD(p); 2062 } else { 2063 error = pget(args->pid, flags, &p); 2064 if (error != 0) 2065 return (error); 2066 } 2067 if (args->old != NULL) { 2068 PROC_LOCK(p); 2069 lim_rlimit_proc(p, which, &rlim); 2070 PROC_UNLOCK(p); 2071 if (rlim.rlim_cur == RLIM_INFINITY) 2072 lrlim.rlim_cur = LINUX_RLIM_INFINITY; 2073 else 2074 lrlim.rlim_cur = rlim.rlim_cur; 2075 if (rlim.rlim_max == RLIM_INFINITY) 2076 lrlim.rlim_max = LINUX_RLIM_INFINITY; 2077 else 2078 lrlim.rlim_max = rlim.rlim_max; 2079 error = copyout(&lrlim, args->old, sizeof(lrlim)); 2080 if (error != 0) 2081 goto out; 2082 } 2083 2084 if (args->new != NULL) 2085 error = kern_proc_setrlimit(td, p, which, &nrlim); 2086 2087 out: 2088 PRELE(p); 2089 return (error); 2090 } 2091 2092 int 2093 linux_pselect6(struct thread *td, struct linux_pselect6_args *args) 2094 { 2095 struct timespec ts, *tsp; 2096 int error; 2097 2098 if (args->tsp != NULL) { 2099 error = linux_get_timespec(&ts, args->tsp); 2100 if (error != 0) 2101 return (error); 2102 tsp = &ts; 2103 } else 2104 tsp = NULL; 2105 2106 error = linux_common_pselect6(td, args->nfds, args->readfds, 2107 args->writefds, args->exceptfds, tsp, args->sig); 2108 2109 if (args->tsp != NULL) 2110 linux_put_timespec(&ts, args->tsp); 2111 return (error); 2112 } 2113 2114 static int 2115 linux_common_pselect6(struct thread *td, l_int nfds, l_fd_set *readfds, 2116 l_fd_set *writefds, l_fd_set *exceptfds, struct timespec *tsp, 2117 l_uintptr_t *sig) 2118 { 2119 struct timeval utv, tv0, tv1, *tvp; 2120 struct l_pselect6arg lpse6; 2121 sigset_t *ssp; 2122 sigset_t ss; 2123 int error; 2124 2125 ssp = NULL; 2126 if (sig != NULL) { 2127 error = copyin(sig, &lpse6, sizeof(lpse6)); 2128 if (error != 0) 2129 return (error); 2130 error = linux_copyin_sigset(td, PTRIN(lpse6.ss), 2131 lpse6.ss_len, &ss, &ssp); 2132 if (error != 0) 2133 return (error); 2134 } else 2135 ssp = NULL; 2136 2137 /* 2138 * Currently glibc changes nanosecond number to microsecond. 2139 * This mean losing precision but for now it is hardly seen. 2140 */ 2141 if (tsp != NULL) { 2142 TIMESPEC_TO_TIMEVAL(&utv, tsp); 2143 if (itimerfix(&utv)) 2144 return (EINVAL); 2145 2146 microtime(&tv0); 2147 tvp = &utv; 2148 } else 2149 tvp = NULL; 2150 2151 error = kern_pselect(td, nfds, readfds, writefds, 2152 exceptfds, tvp, ssp, LINUX_NFDBITS); 2153 2154 if (tsp != NULL) { 2155 /* 2156 * Compute how much time was left of the timeout, 2157 * by subtracting the current time and the time 2158 * before we started the call, and subtracting 2159 * that result from the user-supplied value. 2160 */ 2161 microtime(&tv1); 2162 timevalsub(&tv1, &tv0); 2163 timevalsub(&utv, &tv1); 2164 if (utv.tv_sec < 0) 2165 timevalclear(&utv); 2166 TIMEVAL_TO_TIMESPEC(&utv, tsp); 2167 } 2168 return (error); 2169 } 2170 2171 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 2172 int 2173 linux_pselect6_time64(struct thread *td, 2174 struct linux_pselect6_time64_args *args) 2175 { 2176 struct timespec ts, *tsp; 2177 int error; 2178 2179 if (args->tsp != NULL) { 2180 error = linux_get_timespec64(&ts, args->tsp); 2181 if (error != 0) 2182 return (error); 2183 tsp = &ts; 2184 } else 2185 tsp = NULL; 2186 2187 error = linux_common_pselect6(td, args->nfds, args->readfds, 2188 args->writefds, args->exceptfds, tsp, args->sig); 2189 2190 if (args->tsp != NULL) 2191 linux_put_timespec64(&ts, args->tsp); 2192 return (error); 2193 } 2194 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 2195 2196 int 2197 linux_ppoll(struct thread *td, struct linux_ppoll_args *args) 2198 { 2199 struct timespec uts, *tsp; 2200 int error; 2201 2202 if (args->tsp != NULL) { 2203 error = linux_get_timespec(&uts, args->tsp); 2204 if (error != 0) 2205 return (error); 2206 tsp = &uts; 2207 } else 2208 tsp = NULL; 2209 2210 error = linux_common_ppoll(td, args->fds, args->nfds, tsp, 2211 args->sset, args->ssize); 2212 if (error == 0 && args->tsp != NULL) 2213 error = linux_put_timespec(&uts, args->tsp); 2214 return (error); 2215 } 2216 2217 static int 2218 linux_common_ppoll(struct thread *td, struct pollfd *fds, uint32_t nfds, 2219 struct timespec *tsp, l_sigset_t *sset, l_size_t ssize) 2220 { 2221 struct timespec ts0, ts1; 2222 struct pollfd stackfds[32]; 2223 struct pollfd *kfds; 2224 sigset_t *ssp; 2225 sigset_t ss; 2226 int error; 2227 2228 if (kern_poll_maxfds(nfds)) 2229 return (EINVAL); 2230 if (sset != NULL) { 2231 error = linux_copyin_sigset(td, sset, ssize, &ss, &ssp); 2232 if (error != 0) 2233 return (error); 2234 } else 2235 ssp = NULL; 2236 if (tsp != NULL) 2237 nanotime(&ts0); 2238 2239 if (nfds > nitems(stackfds)) 2240 kfds = mallocarray(nfds, sizeof(*kfds), M_TEMP, M_WAITOK); 2241 else 2242 kfds = stackfds; 2243 error = linux_pollin(td, kfds, fds, nfds); 2244 if (error != 0) 2245 goto out; 2246 2247 error = kern_poll_kfds(td, kfds, nfds, tsp, ssp); 2248 if (error == 0) 2249 error = linux_pollout(td, kfds, fds, nfds); 2250 2251 if (error == 0 && tsp != NULL) { 2252 if (td->td_retval[0]) { 2253 nanotime(&ts1); 2254 timespecsub(&ts1, &ts0, &ts1); 2255 timespecsub(tsp, &ts1, tsp); 2256 if (tsp->tv_sec < 0) 2257 timespecclear(tsp); 2258 } else 2259 timespecclear(tsp); 2260 } 2261 2262 out: 2263 if (nfds > nitems(stackfds)) 2264 free(kfds, M_TEMP); 2265 return (error); 2266 } 2267 2268 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 2269 int 2270 linux_ppoll_time64(struct thread *td, struct linux_ppoll_time64_args *args) 2271 { 2272 struct timespec uts, *tsp; 2273 int error; 2274 2275 if (args->tsp != NULL) { 2276 error = linux_get_timespec64(&uts, args->tsp); 2277 if (error != 0) 2278 return (error); 2279 tsp = &uts; 2280 } else 2281 tsp = NULL; 2282 error = linux_common_ppoll(td, args->fds, args->nfds, tsp, 2283 args->sset, args->ssize); 2284 if (error == 0 && args->tsp != NULL) 2285 error = linux_put_timespec64(&uts, args->tsp); 2286 return (error); 2287 } 2288 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 2289 2290 static int 2291 linux_pollin(struct thread *td, struct pollfd *fds, struct pollfd *ufds, u_int nfd) 2292 { 2293 int error; 2294 u_int i; 2295 2296 error = copyin(ufds, fds, nfd * sizeof(*fds)); 2297 if (error != 0) 2298 return (error); 2299 2300 for (i = 0; i < nfd; i++) { 2301 if (fds->events != 0) 2302 linux_to_bsd_poll_events(td, fds->fd, 2303 fds->events, &fds->events); 2304 fds++; 2305 } 2306 return (0); 2307 } 2308 2309 static int 2310 linux_pollout(struct thread *td, struct pollfd *fds, struct pollfd *ufds, u_int nfd) 2311 { 2312 int error = 0; 2313 u_int i, n = 0; 2314 2315 for (i = 0; i < nfd; i++) { 2316 if (fds->revents != 0) { 2317 bsd_to_linux_poll_events(fds->revents, 2318 &fds->revents); 2319 n++; 2320 } 2321 error = copyout(&fds->revents, &ufds->revents, 2322 sizeof(ufds->revents)); 2323 if (error) 2324 return (error); 2325 fds++; 2326 ufds++; 2327 } 2328 td->td_retval[0] = n; 2329 return (0); 2330 } 2331 2332 static int 2333 linux_sched_rr_get_interval_common(struct thread *td, pid_t pid, 2334 struct timespec *ts) 2335 { 2336 struct thread *tdt; 2337 int error; 2338 2339 /* 2340 * According to man in case the invalid pid specified 2341 * EINVAL should be returned. 2342 */ 2343 if (pid < 0) 2344 return (EINVAL); 2345 2346 tdt = linux_tdfind(td, pid, -1); 2347 if (tdt == NULL) 2348 return (ESRCH); 2349 2350 error = kern_sched_rr_get_interval_td(td, tdt, ts); 2351 PROC_UNLOCK(tdt->td_proc); 2352 return (error); 2353 } 2354 2355 int 2356 linux_sched_rr_get_interval(struct thread *td, 2357 struct linux_sched_rr_get_interval_args *uap) 2358 { 2359 struct timespec ts; 2360 int error; 2361 2362 error = linux_sched_rr_get_interval_common(td, uap->pid, &ts); 2363 if (error != 0) 2364 return (error); 2365 return (linux_put_timespec(&ts, uap->interval)); 2366 } 2367 2368 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 2369 int 2370 linux_sched_rr_get_interval_time64(struct thread *td, 2371 struct linux_sched_rr_get_interval_time64_args *uap) 2372 { 2373 struct timespec ts; 2374 int error; 2375 2376 error = linux_sched_rr_get_interval_common(td, uap->pid, &ts); 2377 if (error != 0) 2378 return (error); 2379 return (linux_put_timespec64(&ts, uap->interval)); 2380 } 2381 #endif 2382 2383 /* 2384 * In case when the Linux thread is the initial thread in 2385 * the thread group thread id is equal to the process id. 2386 * Glibc depends on this magic (assert in pthread_getattr_np.c). 2387 */ 2388 struct thread * 2389 linux_tdfind(struct thread *td, lwpid_t tid, pid_t pid) 2390 { 2391 struct linux_emuldata *em; 2392 struct thread *tdt; 2393 struct proc *p; 2394 2395 tdt = NULL; 2396 if (tid == 0 || tid == td->td_tid) { 2397 if (pid != -1 && td->td_proc->p_pid != pid) 2398 return (NULL); 2399 PROC_LOCK(td->td_proc); 2400 return (td); 2401 } else if (tid > PID_MAX) 2402 return (tdfind(tid, pid)); 2403 2404 /* 2405 * Initial thread where the tid equal to the pid. 2406 */ 2407 p = pfind(tid); 2408 if (p != NULL) { 2409 if (SV_PROC_ABI(p) != SV_ABI_LINUX || 2410 (pid != -1 && tid != pid)) { 2411 /* 2412 * p is not a Linuxulator process. 2413 */ 2414 PROC_UNLOCK(p); 2415 return (NULL); 2416 } 2417 FOREACH_THREAD_IN_PROC(p, tdt) { 2418 em = em_find(tdt); 2419 if (tid == em->em_tid) 2420 return (tdt); 2421 } 2422 PROC_UNLOCK(p); 2423 } 2424 return (NULL); 2425 } 2426 2427 void 2428 linux_to_bsd_waitopts(int options, int *bsdopts) 2429 { 2430 2431 if (options & LINUX_WNOHANG) 2432 *bsdopts |= WNOHANG; 2433 if (options & LINUX_WUNTRACED) 2434 *bsdopts |= WUNTRACED; 2435 if (options & LINUX_WEXITED) 2436 *bsdopts |= WEXITED; 2437 if (options & LINUX_WCONTINUED) 2438 *bsdopts |= WCONTINUED; 2439 if (options & LINUX_WNOWAIT) 2440 *bsdopts |= WNOWAIT; 2441 2442 if (options & __WCLONE) 2443 *bsdopts |= WLINUXCLONE; 2444 } 2445 2446 int 2447 linux_getrandom(struct thread *td, struct linux_getrandom_args *args) 2448 { 2449 struct uio uio; 2450 struct iovec iov; 2451 int error; 2452 2453 if (args->flags & ~(LINUX_GRND_NONBLOCK|LINUX_GRND_RANDOM)) 2454 return (EINVAL); 2455 if (args->count > INT_MAX) 2456 args->count = INT_MAX; 2457 2458 iov.iov_base = args->buf; 2459 iov.iov_len = args->count; 2460 2461 uio.uio_iov = &iov; 2462 uio.uio_iovcnt = 1; 2463 uio.uio_resid = iov.iov_len; 2464 uio.uio_segflg = UIO_USERSPACE; 2465 uio.uio_rw = UIO_READ; 2466 uio.uio_td = td; 2467 2468 error = read_random_uio(&uio, args->flags & LINUX_GRND_NONBLOCK); 2469 if (error == 0) 2470 td->td_retval[0] = args->count - uio.uio_resid; 2471 return (error); 2472 } 2473 2474 int 2475 linux_mincore(struct thread *td, struct linux_mincore_args *args) 2476 { 2477 2478 /* Needs to be page-aligned */ 2479 if (args->start & PAGE_MASK) 2480 return (EINVAL); 2481 return (kern_mincore(td, args->start, args->len, args->vec)); 2482 } 2483 2484 #define SYSLOG_TAG "<6>" 2485 2486 int 2487 linux_syslog(struct thread *td, struct linux_syslog_args *args) 2488 { 2489 char buf[128], *src, *dst; 2490 u_int seq; 2491 int buflen, error; 2492 2493 if (args->type != LINUX_SYSLOG_ACTION_READ_ALL) { 2494 linux_msg(td, "syslog unsupported type 0x%x", args->type); 2495 return (EINVAL); 2496 } 2497 2498 if (args->len < 6) { 2499 td->td_retval[0] = 0; 2500 return (0); 2501 } 2502 2503 error = priv_check(td, PRIV_MSGBUF); 2504 if (error) 2505 return (error); 2506 2507 mtx_lock(&msgbuf_lock); 2508 msgbuf_peekbytes(msgbufp, NULL, 0, &seq); 2509 mtx_unlock(&msgbuf_lock); 2510 2511 dst = args->buf; 2512 error = copyout(&SYSLOG_TAG, dst, sizeof(SYSLOG_TAG)); 2513 /* The -1 is to skip the trailing '\0'. */ 2514 dst += sizeof(SYSLOG_TAG) - 1; 2515 2516 while (error == 0) { 2517 mtx_lock(&msgbuf_lock); 2518 buflen = msgbuf_peekbytes(msgbufp, buf, sizeof(buf), &seq); 2519 mtx_unlock(&msgbuf_lock); 2520 2521 if (buflen == 0) 2522 break; 2523 2524 for (src = buf; src < buf + buflen && error == 0; src++) { 2525 if (*src == '\0') 2526 continue; 2527 2528 if (dst >= args->buf + args->len) 2529 goto out; 2530 2531 error = copyout(src, dst, 1); 2532 dst++; 2533 2534 if (*src == '\n' && *(src + 1) != '<' && 2535 dst + sizeof(SYSLOG_TAG) < args->buf + args->len) { 2536 error = copyout(&SYSLOG_TAG, 2537 dst, sizeof(SYSLOG_TAG)); 2538 dst += sizeof(SYSLOG_TAG) - 1; 2539 } 2540 } 2541 } 2542 out: 2543 td->td_retval[0] = dst - args->buf; 2544 return (error); 2545 } 2546 2547 int 2548 linux_getcpu(struct thread *td, struct linux_getcpu_args *args) 2549 { 2550 int cpu, error, node; 2551 2552 cpu = td->td_oncpu; /* Make sure it doesn't change during copyout(9) */ 2553 error = 0; 2554 node = cpuid_to_pcpu[cpu]->pc_domain; 2555 2556 if (args->cpu != NULL) 2557 error = copyout(&cpu, args->cpu, sizeof(l_int)); 2558 if (args->node != NULL) 2559 error = copyout(&node, args->node, sizeof(l_int)); 2560 return (error); 2561 } 2562 2563 #if defined(__i386__) || defined(__amd64__) 2564 int 2565 linux_poll(struct thread *td, struct linux_poll_args *args) 2566 { 2567 struct timespec ts, *tsp; 2568 2569 if (args->timeout != INFTIM) { 2570 if (args->timeout < 0) 2571 return (EINVAL); 2572 ts.tv_sec = args->timeout / 1000; 2573 ts.tv_nsec = (args->timeout % 1000) * 1000000; 2574 tsp = &ts; 2575 } else 2576 tsp = NULL; 2577 2578 return (linux_common_ppoll(td, args->fds, args->nfds, 2579 tsp, NULL, 0)); 2580 } 2581 #endif /* __i386__ || __amd64__ */ 2582 2583 int 2584 linux_seccomp(struct thread *td, struct linux_seccomp_args *args) 2585 { 2586 2587 switch (args->op) { 2588 case LINUX_SECCOMP_GET_ACTION_AVAIL: 2589 return (EOPNOTSUPP); 2590 default: 2591 /* 2592 * Ignore unknown operations, just like Linux kernel built 2593 * without CONFIG_SECCOMP. 2594 */ 2595 return (EINVAL); 2596 } 2597 } 2598 2599 /* 2600 * Custom version of exec_copyin_args(), to copy out argument and environment 2601 * strings from the old process address space into the temporary string buffer. 2602 * Based on freebsd32_exec_copyin_args. 2603 */ 2604 static int 2605 linux_exec_copyin_args(struct image_args *args, const char *fname, 2606 enum uio_seg segflg, l_uintptr_t *argv, l_uintptr_t *envv) 2607 { 2608 char *argp, *envp; 2609 l_uintptr_t *ptr, arg; 2610 int error; 2611 2612 bzero(args, sizeof(*args)); 2613 if (argv == NULL) 2614 return (EFAULT); 2615 2616 /* 2617 * Allocate demand-paged memory for the file name, argument, and 2618 * environment strings. 2619 */ 2620 error = exec_alloc_args(args); 2621 if (error != 0) 2622 return (error); 2623 2624 /* 2625 * Copy the file name. 2626 */ 2627 error = exec_args_add_fname(args, fname, segflg); 2628 if (error != 0) 2629 goto err_exit; 2630 2631 /* 2632 * extract arguments first 2633 */ 2634 ptr = argv; 2635 for (;;) { 2636 error = copyin(ptr++, &arg, sizeof(arg)); 2637 if (error) 2638 goto err_exit; 2639 if (arg == 0) 2640 break; 2641 argp = PTRIN(arg); 2642 error = exec_args_add_arg(args, argp, UIO_USERSPACE); 2643 if (error != 0) 2644 goto err_exit; 2645 } 2646 2647 /* 2648 * This comment is from Linux do_execveat_common: 2649 * When argv is empty, add an empty string ("") as argv[0] to 2650 * ensure confused userspace programs that start processing 2651 * from argv[1] won't end up walking envp. 2652 */ 2653 if (args->argc == 0 && 2654 (error = exec_args_add_arg(args, "", UIO_SYSSPACE) != 0)) 2655 goto err_exit; 2656 2657 /* 2658 * extract environment strings 2659 */ 2660 if (envv) { 2661 ptr = envv; 2662 for (;;) { 2663 error = copyin(ptr++, &arg, sizeof(arg)); 2664 if (error) 2665 goto err_exit; 2666 if (arg == 0) 2667 break; 2668 envp = PTRIN(arg); 2669 error = exec_args_add_env(args, envp, UIO_USERSPACE); 2670 if (error != 0) 2671 goto err_exit; 2672 } 2673 } 2674 2675 return (0); 2676 2677 err_exit: 2678 exec_free_args(args); 2679 return (error); 2680 } 2681 2682 int 2683 linux_execve(struct thread *td, struct linux_execve_args *args) 2684 { 2685 struct image_args eargs; 2686 int error; 2687 2688 LINUX_CTR(execve); 2689 2690 error = linux_exec_copyin_args(&eargs, args->path, UIO_USERSPACE, 2691 args->argp, args->envp); 2692 if (error == 0) 2693 error = linux_common_execve(td, &eargs); 2694 AUDIT_SYSCALL_EXIT(error == EJUSTRETURN ? 0 : error, td); 2695 return (error); 2696 } 2697 2698 static void 2699 linux_up_rtprio_if(struct thread *td1, struct rtprio *rtp) 2700 { 2701 struct rtprio rtp2; 2702 2703 pri_to_rtp(td1, &rtp2); 2704 if (rtp2.type < rtp->type || 2705 (rtp2.type == rtp->type && 2706 rtp2.prio < rtp->prio)) { 2707 rtp->type = rtp2.type; 2708 rtp->prio = rtp2.prio; 2709 } 2710 } 2711 2712 #define LINUX_PRIO_DIVIDER RTP_PRIO_MAX / LINUX_IOPRIO_MAX 2713 2714 static int 2715 linux_rtprio2ioprio(struct rtprio *rtp) 2716 { 2717 int ioprio, prio; 2718 2719 switch (rtp->type) { 2720 case RTP_PRIO_IDLE: 2721 prio = RTP_PRIO_MIN; 2722 ioprio = LINUX_IOPRIO_PRIO(LINUX_IOPRIO_CLASS_IDLE, prio); 2723 break; 2724 case RTP_PRIO_NORMAL: 2725 prio = rtp->prio / LINUX_PRIO_DIVIDER; 2726 ioprio = LINUX_IOPRIO_PRIO(LINUX_IOPRIO_CLASS_BE, prio); 2727 break; 2728 case RTP_PRIO_REALTIME: 2729 prio = rtp->prio / LINUX_PRIO_DIVIDER; 2730 ioprio = LINUX_IOPRIO_PRIO(LINUX_IOPRIO_CLASS_RT, prio); 2731 break; 2732 default: 2733 prio = RTP_PRIO_MIN; 2734 ioprio = LINUX_IOPRIO_PRIO(LINUX_IOPRIO_CLASS_NONE, prio); 2735 break; 2736 } 2737 return (ioprio); 2738 } 2739 2740 static int 2741 linux_ioprio2rtprio(int ioprio, struct rtprio *rtp) 2742 { 2743 2744 switch (LINUX_IOPRIO_PRIO_CLASS(ioprio)) { 2745 case LINUX_IOPRIO_CLASS_IDLE: 2746 rtp->prio = RTP_PRIO_MIN; 2747 rtp->type = RTP_PRIO_IDLE; 2748 break; 2749 case LINUX_IOPRIO_CLASS_BE: 2750 rtp->prio = LINUX_IOPRIO_PRIO_DATA(ioprio) * LINUX_PRIO_DIVIDER; 2751 rtp->type = RTP_PRIO_NORMAL; 2752 break; 2753 case LINUX_IOPRIO_CLASS_RT: 2754 rtp->prio = LINUX_IOPRIO_PRIO_DATA(ioprio) * LINUX_PRIO_DIVIDER; 2755 rtp->type = RTP_PRIO_REALTIME; 2756 break; 2757 default: 2758 return (EINVAL); 2759 } 2760 return (0); 2761 } 2762 #undef LINUX_PRIO_DIVIDER 2763 2764 int 2765 linux_ioprio_get(struct thread *td, struct linux_ioprio_get_args *args) 2766 { 2767 struct thread *td1; 2768 struct rtprio rtp; 2769 struct pgrp *pg; 2770 struct proc *p; 2771 int error, found; 2772 2773 p = NULL; 2774 td1 = NULL; 2775 error = 0; 2776 found = 0; 2777 rtp.type = RTP_PRIO_IDLE; 2778 rtp.prio = RTP_PRIO_MAX; 2779 switch (args->which) { 2780 case LINUX_IOPRIO_WHO_PROCESS: 2781 if (args->who == 0) { 2782 td1 = td; 2783 p = td1->td_proc; 2784 PROC_LOCK(p); 2785 } else if (args->who > PID_MAX) { 2786 td1 = linux_tdfind(td, args->who, -1); 2787 if (td1 != NULL) 2788 p = td1->td_proc; 2789 } else 2790 p = pfind(args->who); 2791 if (p == NULL) 2792 return (ESRCH); 2793 if ((error = p_cansee(td, p))) { 2794 PROC_UNLOCK(p); 2795 break; 2796 } 2797 if (td1 != NULL) { 2798 pri_to_rtp(td1, &rtp); 2799 } else { 2800 FOREACH_THREAD_IN_PROC(p, td1) { 2801 linux_up_rtprio_if(td1, &rtp); 2802 } 2803 } 2804 found++; 2805 PROC_UNLOCK(p); 2806 break; 2807 case LINUX_IOPRIO_WHO_PGRP: 2808 sx_slock(&proctree_lock); 2809 if (args->who == 0) { 2810 pg = td->td_proc->p_pgrp; 2811 PGRP_LOCK(pg); 2812 } else { 2813 pg = pgfind(args->who); 2814 if (pg == NULL) { 2815 sx_sunlock(&proctree_lock); 2816 error = ESRCH; 2817 break; 2818 } 2819 } 2820 sx_sunlock(&proctree_lock); 2821 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 2822 PROC_LOCK(p); 2823 if (p->p_state == PRS_NORMAL && 2824 p_cansee(td, p) == 0) { 2825 FOREACH_THREAD_IN_PROC(p, td1) { 2826 linux_up_rtprio_if(td1, &rtp); 2827 found++; 2828 } 2829 } 2830 PROC_UNLOCK(p); 2831 } 2832 PGRP_UNLOCK(pg); 2833 break; 2834 case LINUX_IOPRIO_WHO_USER: 2835 if (args->who == 0) 2836 args->who = td->td_ucred->cr_uid; 2837 sx_slock(&allproc_lock); 2838 FOREACH_PROC_IN_SYSTEM(p) { 2839 PROC_LOCK(p); 2840 if (p->p_state == PRS_NORMAL && 2841 p->p_ucred->cr_uid == args->who && 2842 p_cansee(td, p) == 0) { 2843 FOREACH_THREAD_IN_PROC(p, td1) { 2844 linux_up_rtprio_if(td1, &rtp); 2845 found++; 2846 } 2847 } 2848 PROC_UNLOCK(p); 2849 } 2850 sx_sunlock(&allproc_lock); 2851 break; 2852 default: 2853 error = EINVAL; 2854 break; 2855 } 2856 if (error == 0) { 2857 if (found != 0) 2858 td->td_retval[0] = linux_rtprio2ioprio(&rtp); 2859 else 2860 error = ESRCH; 2861 } 2862 return (error); 2863 } 2864 2865 int 2866 linux_ioprio_set(struct thread *td, struct linux_ioprio_set_args *args) 2867 { 2868 struct thread *td1; 2869 struct rtprio rtp; 2870 struct pgrp *pg; 2871 struct proc *p; 2872 int error; 2873 2874 if ((error = linux_ioprio2rtprio(args->ioprio, &rtp)) != 0) 2875 return (error); 2876 /* Attempts to set high priorities (REALTIME) require su privileges. */ 2877 if (RTP_PRIO_BASE(rtp.type) == RTP_PRIO_REALTIME && 2878 (error = priv_check(td, PRIV_SCHED_RTPRIO)) != 0) 2879 return (error); 2880 2881 p = NULL; 2882 td1 = NULL; 2883 switch (args->which) { 2884 case LINUX_IOPRIO_WHO_PROCESS: 2885 if (args->who == 0) { 2886 td1 = td; 2887 p = td1->td_proc; 2888 PROC_LOCK(p); 2889 } else if (args->who > PID_MAX) { 2890 td1 = linux_tdfind(td, args->who, -1); 2891 if (td1 != NULL) 2892 p = td1->td_proc; 2893 } else 2894 p = pfind(args->who); 2895 if (p == NULL) 2896 return (ESRCH); 2897 if ((error = p_cansched(td, p))) { 2898 PROC_UNLOCK(p); 2899 break; 2900 } 2901 if (td1 != NULL) { 2902 error = rtp_to_pri(&rtp, td1); 2903 } else { 2904 FOREACH_THREAD_IN_PROC(p, td1) { 2905 if ((error = rtp_to_pri(&rtp, td1)) != 0) 2906 break; 2907 } 2908 } 2909 PROC_UNLOCK(p); 2910 break; 2911 case LINUX_IOPRIO_WHO_PGRP: 2912 sx_slock(&proctree_lock); 2913 if (args->who == 0) { 2914 pg = td->td_proc->p_pgrp; 2915 PGRP_LOCK(pg); 2916 } else { 2917 pg = pgfind(args->who); 2918 if (pg == NULL) { 2919 sx_sunlock(&proctree_lock); 2920 error = ESRCH; 2921 break; 2922 } 2923 } 2924 sx_sunlock(&proctree_lock); 2925 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 2926 PROC_LOCK(p); 2927 if (p->p_state == PRS_NORMAL && 2928 p_cansched(td, p) == 0) { 2929 FOREACH_THREAD_IN_PROC(p, td1) { 2930 if ((error = rtp_to_pri(&rtp, td1)) != 0) 2931 break; 2932 } 2933 } 2934 PROC_UNLOCK(p); 2935 if (error != 0) 2936 break; 2937 } 2938 PGRP_UNLOCK(pg); 2939 break; 2940 case LINUX_IOPRIO_WHO_USER: 2941 if (args->who == 0) 2942 args->who = td->td_ucred->cr_uid; 2943 sx_slock(&allproc_lock); 2944 FOREACH_PROC_IN_SYSTEM(p) { 2945 PROC_LOCK(p); 2946 if (p->p_state == PRS_NORMAL && 2947 p->p_ucred->cr_uid == args->who && 2948 p_cansched(td, p) == 0) { 2949 FOREACH_THREAD_IN_PROC(p, td1) { 2950 if ((error = rtp_to_pri(&rtp, td1)) != 0) 2951 break; 2952 } 2953 } 2954 PROC_UNLOCK(p); 2955 if (error != 0) 2956 break; 2957 } 2958 sx_sunlock(&allproc_lock); 2959 break; 2960 default: 2961 error = EINVAL; 2962 break; 2963 } 2964 return (error); 2965 } 2966