1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2002 Doug Rabson 5 * Copyright (c) 1994-1995 Søren Schmidt 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer 13 * in this position and unchanged. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. The name of the author may not be used to endorse or promote products 18 * derived from this software without specific prior written permission 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 23 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 29 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/param.h> 33 #include <sys/fcntl.h> 34 #include <sys/jail.h> 35 #include <sys/imgact.h> 36 #include <sys/limits.h> 37 #include <sys/lock.h> 38 #include <sys/msgbuf.h> 39 #include <sys/mutex.h> 40 #include <sys/poll.h> 41 #include <sys/priv.h> 42 #include <sys/proc.h> 43 #include <sys/procctl.h> 44 #include <sys/reboot.h> 45 #include <sys/random.h> 46 #include <sys/resourcevar.h> 47 #include <sys/rtprio.h> 48 #include <sys/sched.h> 49 #include <sys/smp.h> 50 #include <sys/stat.h> 51 #include <sys/syscallsubr.h> 52 #include <sys/sysctl.h> 53 #include <sys/sysent.h> 54 #include <sys/sysproto.h> 55 #include <sys/time.h> 56 #include <sys/vmmeter.h> 57 #include <sys/vnode.h> 58 59 #include <security/audit/audit.h> 60 #include <security/mac/mac_framework.h> 61 62 #include <vm/pmap.h> 63 #include <vm/vm_map.h> 64 #include <vm/swap_pager.h> 65 66 #ifdef COMPAT_LINUX32 67 #include <machine/../linux32/linux.h> 68 #include <machine/../linux32/linux32_proto.h> 69 #else 70 #include <machine/../linux/linux.h> 71 #include <machine/../linux/linux_proto.h> 72 #endif 73 74 #include <compat/linux/linux_common.h> 75 #include <compat/linux/linux_dtrace.h> 76 #include <compat/linux/linux_file.h> 77 #include <compat/linux/linux_mib.h> 78 #include <compat/linux/linux_signal.h> 79 #include <compat/linux/linux_time.h> 80 #include <compat/linux/linux_util.h> 81 #include <compat/linux/linux_sysproto.h> 82 #include <compat/linux/linux_emul.h> 83 #include <compat/linux/linux_misc.h> 84 85 int stclohz; /* Statistics clock frequency */ 86 87 static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] = { 88 RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK, 89 RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE, 90 RLIMIT_MEMLOCK, RLIMIT_AS 91 }; 92 93 struct l_sysinfo { 94 l_long uptime; /* Seconds since boot */ 95 l_ulong loads[3]; /* 1, 5, and 15 minute load averages */ 96 #define LINUX_SYSINFO_LOADS_SCALE 65536 97 l_ulong totalram; /* Total usable main memory size */ 98 l_ulong freeram; /* Available memory size */ 99 l_ulong sharedram; /* Amount of shared memory */ 100 l_ulong bufferram; /* Memory used by buffers */ 101 l_ulong totalswap; /* Total swap space size */ 102 l_ulong freeswap; /* swap space still available */ 103 l_ushort procs; /* Number of current processes */ 104 l_ushort pads; 105 l_ulong totalhigh; 106 l_ulong freehigh; 107 l_uint mem_unit; 108 char _f[20-2*sizeof(l_long)-sizeof(l_int)]; /* padding */ 109 }; 110 111 struct l_pselect6arg { 112 l_uintptr_t ss; 113 l_size_t ss_len; 114 }; 115 116 static int linux_utimensat_lts_to_ts(struct l_timespec *, 117 struct timespec *); 118 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 119 static int linux_utimensat_lts64_to_ts(struct l_timespec64 *, 120 struct timespec *); 121 #endif 122 static int linux_common_utimensat(struct thread *, int, 123 const char *, struct timespec *, int); 124 static int linux_common_pselect6(struct thread *, l_int, 125 l_fd_set *, l_fd_set *, l_fd_set *, 126 struct timespec *, l_uintptr_t *); 127 static int linux_common_ppoll(struct thread *, struct pollfd *, 128 uint32_t, struct timespec *, l_sigset_t *, 129 l_size_t); 130 static int linux_pollin(struct thread *, struct pollfd *, 131 struct pollfd *, u_int); 132 static int linux_pollout(struct thread *, struct pollfd *, 133 struct pollfd *, u_int); 134 135 int 136 linux_sysinfo(struct thread *td, struct linux_sysinfo_args *args) 137 { 138 struct l_sysinfo sysinfo; 139 int i, j; 140 struct timespec ts; 141 142 bzero(&sysinfo, sizeof(sysinfo)); 143 getnanouptime(&ts); 144 if (ts.tv_nsec != 0) 145 ts.tv_sec++; 146 sysinfo.uptime = ts.tv_sec; 147 148 /* Use the information from the mib to get our load averages */ 149 for (i = 0; i < 3; i++) 150 sysinfo.loads[i] = averunnable.ldavg[i] * 151 LINUX_SYSINFO_LOADS_SCALE / averunnable.fscale; 152 153 sysinfo.totalram = physmem * PAGE_SIZE; 154 sysinfo.freeram = (u_long)vm_free_count() * PAGE_SIZE; 155 156 /* 157 * sharedram counts pages allocated to named, swap-backed objects such 158 * as shared memory segments and tmpfs files. There is no cheap way to 159 * compute this, so just leave the field unpopulated. Linux itself only 160 * started setting this field in the 3.x timeframe. 161 */ 162 sysinfo.sharedram = 0; 163 sysinfo.bufferram = 0; 164 165 swap_pager_status(&i, &j); 166 sysinfo.totalswap = i * PAGE_SIZE; 167 sysinfo.freeswap = (i - j) * PAGE_SIZE; 168 169 sysinfo.procs = nprocs; 170 171 /* 172 * Platforms supported by the emulation layer do not have a notion of 173 * high memory. 174 */ 175 sysinfo.totalhigh = 0; 176 sysinfo.freehigh = 0; 177 178 sysinfo.mem_unit = 1; 179 180 return (copyout(&sysinfo, args->info, sizeof(sysinfo))); 181 } 182 183 #ifdef LINUX_LEGACY_SYSCALLS 184 int 185 linux_alarm(struct thread *td, struct linux_alarm_args *args) 186 { 187 struct itimerval it, old_it; 188 u_int secs; 189 int error __diagused; 190 191 secs = args->secs; 192 /* 193 * Linux alarm() is always successful. Limit secs to INT32_MAX / 2 194 * to match kern_setitimer()'s limit to avoid error from it. 195 * 196 * XXX. Linux limit secs to INT_MAX on 32 and does not limit on 64-bit 197 * platforms. 198 */ 199 if (secs > INT32_MAX / 2) 200 secs = INT32_MAX / 2; 201 202 it.it_value.tv_sec = secs; 203 it.it_value.tv_usec = 0; 204 timevalclear(&it.it_interval); 205 error = kern_setitimer(td, ITIMER_REAL, &it, &old_it); 206 KASSERT(error == 0, ("kern_setitimer returns %d", error)); 207 208 if ((old_it.it_value.tv_sec == 0 && old_it.it_value.tv_usec > 0) || 209 old_it.it_value.tv_usec >= 500000) 210 old_it.it_value.tv_sec++; 211 td->td_retval[0] = old_it.it_value.tv_sec; 212 return (0); 213 } 214 #endif 215 216 int 217 linux_brk(struct thread *td, struct linux_brk_args *args) 218 { 219 struct vmspace *vm = td->td_proc->p_vmspace; 220 uintptr_t new, old; 221 222 old = (uintptr_t)vm->vm_daddr + ctob(vm->vm_dsize); 223 new = (uintptr_t)args->dsend; 224 if ((caddr_t)new > vm->vm_daddr && !kern_break(td, &new)) 225 td->td_retval[0] = (register_t)new; 226 else 227 td->td_retval[0] = (register_t)old; 228 229 return (0); 230 } 231 232 #ifdef LINUX_LEGACY_SYSCALLS 233 int 234 linux_select(struct thread *td, struct linux_select_args *args) 235 { 236 l_timeval ltv; 237 struct timeval tv0, tv1, utv, *tvp; 238 int error; 239 240 /* 241 * Store current time for computation of the amount of 242 * time left. 243 */ 244 if (args->timeout) { 245 if ((error = copyin(args->timeout, <v, sizeof(ltv)))) 246 goto select_out; 247 utv.tv_sec = ltv.tv_sec; 248 utv.tv_usec = ltv.tv_usec; 249 250 if (itimerfix(&utv)) { 251 /* 252 * The timeval was invalid. Convert it to something 253 * valid that will act as it does under Linux. 254 */ 255 utv.tv_sec += utv.tv_usec / 1000000; 256 utv.tv_usec %= 1000000; 257 if (utv.tv_usec < 0) { 258 utv.tv_sec -= 1; 259 utv.tv_usec += 1000000; 260 } 261 if (utv.tv_sec < 0) 262 timevalclear(&utv); 263 } 264 microtime(&tv0); 265 tvp = &utv; 266 } else 267 tvp = NULL; 268 269 error = kern_select(td, args->nfds, args->readfds, args->writefds, 270 args->exceptfds, tvp, LINUX_NFDBITS); 271 if (error) 272 goto select_out; 273 274 if (args->timeout) { 275 if (td->td_retval[0]) { 276 /* 277 * Compute how much time was left of the timeout, 278 * by subtracting the current time and the time 279 * before we started the call, and subtracting 280 * that result from the user-supplied value. 281 */ 282 microtime(&tv1); 283 timevalsub(&tv1, &tv0); 284 timevalsub(&utv, &tv1); 285 if (utv.tv_sec < 0) 286 timevalclear(&utv); 287 } else 288 timevalclear(&utv); 289 ltv.tv_sec = utv.tv_sec; 290 ltv.tv_usec = utv.tv_usec; 291 if ((error = copyout(<v, args->timeout, sizeof(ltv)))) 292 goto select_out; 293 } 294 295 select_out: 296 return (error); 297 } 298 #endif 299 300 int 301 linux_mremap(struct thread *td, struct linux_mremap_args *args) 302 { 303 uintptr_t addr; 304 size_t len; 305 int error = 0; 306 307 if (args->flags & ~(LINUX_MREMAP_FIXED | LINUX_MREMAP_MAYMOVE)) { 308 td->td_retval[0] = 0; 309 return (EINVAL); 310 } 311 312 /* 313 * Check for the page alignment. 314 * Linux defines PAGE_MASK to be FreeBSD ~PAGE_MASK. 315 */ 316 if (args->addr & PAGE_MASK) { 317 td->td_retval[0] = 0; 318 return (EINVAL); 319 } 320 321 args->new_len = round_page(args->new_len); 322 args->old_len = round_page(args->old_len); 323 324 if (args->new_len > args->old_len) { 325 td->td_retval[0] = 0; 326 return (ENOMEM); 327 } 328 329 if (args->new_len < args->old_len) { 330 addr = args->addr + args->new_len; 331 len = args->old_len - args->new_len; 332 error = kern_munmap(td, addr, len); 333 } 334 335 td->td_retval[0] = error ? 0 : (uintptr_t)args->addr; 336 return (error); 337 } 338 339 #define LINUX_MS_ASYNC 0x0001 340 #define LINUX_MS_INVALIDATE 0x0002 341 #define LINUX_MS_SYNC 0x0004 342 343 int 344 linux_msync(struct thread *td, struct linux_msync_args *args) 345 { 346 347 return (kern_msync(td, args->addr, args->len, 348 args->fl & ~LINUX_MS_SYNC)); 349 } 350 351 #ifdef LINUX_LEGACY_SYSCALLS 352 int 353 linux_time(struct thread *td, struct linux_time_args *args) 354 { 355 struct timeval tv; 356 l_time_t tm; 357 int error; 358 359 microtime(&tv); 360 tm = tv.tv_sec; 361 if (args->tm && (error = copyout(&tm, args->tm, sizeof(tm)))) 362 return (error); 363 td->td_retval[0] = tm; 364 return (0); 365 } 366 #endif 367 368 struct l_times_argv { 369 l_clock_t tms_utime; 370 l_clock_t tms_stime; 371 l_clock_t tms_cutime; 372 l_clock_t tms_cstime; 373 }; 374 375 /* 376 * Glibc versions prior to 2.2.1 always use hard-coded CLK_TCK value. 377 * Since 2.2.1 Glibc uses value exported from kernel via AT_CLKTCK 378 * auxiliary vector entry. 379 */ 380 #define CLK_TCK 100 381 382 #define CONVOTCK(r) (r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK)) 383 #define CONVNTCK(r) (r.tv_sec * stclohz + r.tv_usec / (1000000 / stclohz)) 384 385 #define CONVTCK(r) (linux_kernver(td) >= LINUX_KERNVER(2,4,0) ? \ 386 CONVNTCK(r) : CONVOTCK(r)) 387 388 int 389 linux_times(struct thread *td, struct linux_times_args *args) 390 { 391 struct timeval tv, utime, stime, cutime, cstime; 392 struct l_times_argv tms; 393 struct proc *p; 394 int error; 395 396 if (args->buf != NULL) { 397 p = td->td_proc; 398 PROC_LOCK(p); 399 PROC_STATLOCK(p); 400 calcru(p, &utime, &stime); 401 PROC_STATUNLOCK(p); 402 calccru(p, &cutime, &cstime); 403 PROC_UNLOCK(p); 404 405 tms.tms_utime = CONVTCK(utime); 406 tms.tms_stime = CONVTCK(stime); 407 408 tms.tms_cutime = CONVTCK(cutime); 409 tms.tms_cstime = CONVTCK(cstime); 410 411 if ((error = copyout(&tms, args->buf, sizeof(tms)))) 412 return (error); 413 } 414 415 microuptime(&tv); 416 td->td_retval[0] = (int)CONVTCK(tv); 417 return (0); 418 } 419 420 int 421 linux_newuname(struct thread *td, struct linux_newuname_args *args) 422 { 423 struct l_new_utsname utsname; 424 char osname[LINUX_MAX_UTSNAME]; 425 char osrelease[LINUX_MAX_UTSNAME]; 426 char *p; 427 428 linux_get_osname(td, osname); 429 linux_get_osrelease(td, osrelease); 430 431 bzero(&utsname, sizeof(utsname)); 432 strlcpy(utsname.sysname, osname, LINUX_MAX_UTSNAME); 433 getcredhostname(td->td_ucred, utsname.nodename, LINUX_MAX_UTSNAME); 434 getcreddomainname(td->td_ucred, utsname.domainname, LINUX_MAX_UTSNAME); 435 strlcpy(utsname.release, osrelease, LINUX_MAX_UTSNAME); 436 strlcpy(utsname.version, version, LINUX_MAX_UTSNAME); 437 for (p = utsname.version; *p != '\0'; ++p) 438 if (*p == '\n') { 439 *p = '\0'; 440 break; 441 } 442 #if defined(__amd64__) 443 /* 444 * On amd64, Linux uname(2) needs to return "x86_64" 445 * for both 64-bit and 32-bit applications. On 32-bit, 446 * the string returned by getauxval(AT_PLATFORM) needs 447 * to remain "i686", though. 448 */ 449 #if defined(COMPAT_LINUX32) 450 if (linux32_emulate_i386) 451 strlcpy(utsname.machine, "i686", LINUX_MAX_UTSNAME); 452 else 453 #endif 454 strlcpy(utsname.machine, "x86_64", LINUX_MAX_UTSNAME); 455 #elif defined(__aarch64__) 456 strlcpy(utsname.machine, "aarch64", LINUX_MAX_UTSNAME); 457 #elif defined(__i386__) 458 strlcpy(utsname.machine, "i686", LINUX_MAX_UTSNAME); 459 #endif 460 461 return (copyout(&utsname, args->buf, sizeof(utsname))); 462 } 463 464 struct l_utimbuf { 465 l_time_t l_actime; 466 l_time_t l_modtime; 467 }; 468 469 #ifdef LINUX_LEGACY_SYSCALLS 470 int 471 linux_utime(struct thread *td, struct linux_utime_args *args) 472 { 473 struct timeval tv[2], *tvp; 474 struct l_utimbuf lut; 475 int error; 476 477 if (args->times) { 478 if ((error = copyin(args->times, &lut, sizeof lut)) != 0) 479 return (error); 480 tv[0].tv_sec = lut.l_actime; 481 tv[0].tv_usec = 0; 482 tv[1].tv_sec = lut.l_modtime; 483 tv[1].tv_usec = 0; 484 tvp = tv; 485 } else 486 tvp = NULL; 487 488 return (kern_utimesat(td, AT_FDCWD, args->fname, UIO_USERSPACE, 489 tvp, UIO_SYSSPACE)); 490 } 491 #endif 492 493 #ifdef LINUX_LEGACY_SYSCALLS 494 int 495 linux_utimes(struct thread *td, struct linux_utimes_args *args) 496 { 497 l_timeval ltv[2]; 498 struct timeval tv[2], *tvp = NULL; 499 int error; 500 501 if (args->tptr != NULL) { 502 if ((error = copyin(args->tptr, ltv, sizeof ltv)) != 0) 503 return (error); 504 tv[0].tv_sec = ltv[0].tv_sec; 505 tv[0].tv_usec = ltv[0].tv_usec; 506 tv[1].tv_sec = ltv[1].tv_sec; 507 tv[1].tv_usec = ltv[1].tv_usec; 508 tvp = tv; 509 } 510 511 return (kern_utimesat(td, AT_FDCWD, args->fname, UIO_USERSPACE, 512 tvp, UIO_SYSSPACE)); 513 } 514 #endif 515 516 static int 517 linux_utimensat_lts_to_ts(struct l_timespec *l_times, struct timespec *times) 518 { 519 520 if (l_times->tv_nsec != LINUX_UTIME_OMIT && 521 l_times->tv_nsec != LINUX_UTIME_NOW && 522 (l_times->tv_nsec < 0 || l_times->tv_nsec > 999999999)) 523 return (EINVAL); 524 525 times->tv_sec = l_times->tv_sec; 526 switch (l_times->tv_nsec) 527 { 528 case LINUX_UTIME_OMIT: 529 times->tv_nsec = UTIME_OMIT; 530 break; 531 case LINUX_UTIME_NOW: 532 times->tv_nsec = UTIME_NOW; 533 break; 534 default: 535 times->tv_nsec = l_times->tv_nsec; 536 } 537 538 return (0); 539 } 540 541 static int 542 linux_common_utimensat(struct thread *td, int ldfd, const char *pathname, 543 struct timespec *timesp, int lflags) 544 { 545 int dfd, flags = 0; 546 547 dfd = (ldfd == LINUX_AT_FDCWD) ? AT_FDCWD : ldfd; 548 549 if (lflags & ~(LINUX_AT_SYMLINK_NOFOLLOW | LINUX_AT_EMPTY_PATH)) 550 return (EINVAL); 551 552 if (timesp != NULL) { 553 /* This breaks POSIX, but is what the Linux kernel does 554 * _on purpose_ (documented in the man page for utimensat(2)), 555 * so we must follow that behaviour. */ 556 if (timesp[0].tv_nsec == UTIME_OMIT && 557 timesp[1].tv_nsec == UTIME_OMIT) 558 return (0); 559 } 560 561 if (lflags & LINUX_AT_SYMLINK_NOFOLLOW) 562 flags |= AT_SYMLINK_NOFOLLOW; 563 if (lflags & LINUX_AT_EMPTY_PATH) 564 flags |= AT_EMPTY_PATH; 565 566 if (pathname != NULL) 567 return (kern_utimensat(td, dfd, pathname, 568 UIO_USERSPACE, timesp, UIO_SYSSPACE, flags)); 569 570 if (lflags != 0) 571 return (EINVAL); 572 573 return (kern_futimens(td, dfd, timesp, UIO_SYSSPACE)); 574 } 575 576 int 577 linux_utimensat(struct thread *td, struct linux_utimensat_args *args) 578 { 579 struct l_timespec l_times[2]; 580 struct timespec times[2], *timesp; 581 int error; 582 583 if (args->times != NULL) { 584 error = copyin(args->times, l_times, sizeof(l_times)); 585 if (error != 0) 586 return (error); 587 588 error = linux_utimensat_lts_to_ts(&l_times[0], ×[0]); 589 if (error != 0) 590 return (error); 591 error = linux_utimensat_lts_to_ts(&l_times[1], ×[1]); 592 if (error != 0) 593 return (error); 594 timesp = times; 595 } else 596 timesp = NULL; 597 598 return (linux_common_utimensat(td, args->dfd, args->pathname, 599 timesp, args->flags)); 600 } 601 602 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 603 static int 604 linux_utimensat_lts64_to_ts(struct l_timespec64 *l_times, struct timespec *times) 605 { 606 607 /* Zero out the padding in compat mode. */ 608 l_times->tv_nsec &= 0xFFFFFFFFUL; 609 610 if (l_times->tv_nsec != LINUX_UTIME_OMIT && 611 l_times->tv_nsec != LINUX_UTIME_NOW && 612 (l_times->tv_nsec < 0 || l_times->tv_nsec > 999999999)) 613 return (EINVAL); 614 615 times->tv_sec = l_times->tv_sec; 616 switch (l_times->tv_nsec) 617 { 618 case LINUX_UTIME_OMIT: 619 times->tv_nsec = UTIME_OMIT; 620 break; 621 case LINUX_UTIME_NOW: 622 times->tv_nsec = UTIME_NOW; 623 break; 624 default: 625 times->tv_nsec = l_times->tv_nsec; 626 } 627 628 return (0); 629 } 630 631 int 632 linux_utimensat_time64(struct thread *td, struct linux_utimensat_time64_args *args) 633 { 634 struct l_timespec64 l_times[2]; 635 struct timespec times[2], *timesp; 636 int error; 637 638 if (args->times64 != NULL) { 639 error = copyin(args->times64, l_times, sizeof(l_times)); 640 if (error != 0) 641 return (error); 642 643 error = linux_utimensat_lts64_to_ts(&l_times[0], ×[0]); 644 if (error != 0) 645 return (error); 646 error = linux_utimensat_lts64_to_ts(&l_times[1], ×[1]); 647 if (error != 0) 648 return (error); 649 timesp = times; 650 } else 651 timesp = NULL; 652 653 return (linux_common_utimensat(td, args->dfd, args->pathname, 654 timesp, args->flags)); 655 } 656 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 657 658 #ifdef LINUX_LEGACY_SYSCALLS 659 int 660 linux_futimesat(struct thread *td, struct linux_futimesat_args *args) 661 { 662 l_timeval ltv[2]; 663 struct timeval tv[2], *tvp = NULL; 664 int error, dfd; 665 666 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; 667 668 if (args->utimes != NULL) { 669 if ((error = copyin(args->utimes, ltv, sizeof ltv)) != 0) 670 return (error); 671 tv[0].tv_sec = ltv[0].tv_sec; 672 tv[0].tv_usec = ltv[0].tv_usec; 673 tv[1].tv_sec = ltv[1].tv_sec; 674 tv[1].tv_usec = ltv[1].tv_usec; 675 tvp = tv; 676 } 677 678 return (kern_utimesat(td, dfd, args->filename, UIO_USERSPACE, 679 tvp, UIO_SYSSPACE)); 680 } 681 #endif 682 683 static int 684 linux_common_wait(struct thread *td, idtype_t idtype, int id, int *statusp, 685 int options, void *rup, l_siginfo_t *infop) 686 { 687 l_siginfo_t lsi; 688 siginfo_t siginfo; 689 struct __wrusage wru; 690 int error, status, tmpstat, sig; 691 692 error = kern_wait6(td, idtype, id, &status, options, 693 rup != NULL ? &wru : NULL, &siginfo); 694 695 if (error == 0 && statusp) { 696 tmpstat = status & 0xffff; 697 if (WIFSIGNALED(tmpstat)) { 698 tmpstat = (tmpstat & 0xffffff80) | 699 bsd_to_linux_signal(WTERMSIG(tmpstat)); 700 } else if (WIFSTOPPED(tmpstat)) { 701 tmpstat = (tmpstat & 0xffff00ff) | 702 (bsd_to_linux_signal(WSTOPSIG(tmpstat)) << 8); 703 #if defined(__aarch64__) || (defined(__amd64__) && !defined(COMPAT_LINUX32)) 704 if (WSTOPSIG(status) == SIGTRAP) { 705 tmpstat = linux_ptrace_status(td, 706 siginfo.si_pid, tmpstat); 707 } 708 #endif 709 } else if (WIFCONTINUED(tmpstat)) { 710 tmpstat = 0xffff; 711 } 712 error = copyout(&tmpstat, statusp, sizeof(int)); 713 } 714 if (error == 0 && rup != NULL) 715 error = linux_copyout_rusage(&wru.wru_self, rup); 716 if (error == 0 && infop != NULL && td->td_retval[0] != 0) { 717 sig = bsd_to_linux_signal(siginfo.si_signo); 718 siginfo_to_lsiginfo(&siginfo, &lsi, sig); 719 error = copyout(&lsi, infop, sizeof(lsi)); 720 } 721 722 return (error); 723 } 724 725 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 726 int 727 linux_waitpid(struct thread *td, struct linux_waitpid_args *args) 728 { 729 struct linux_wait4_args wait4_args = { 730 .pid = args->pid, 731 .status = args->status, 732 .options = args->options, 733 .rusage = NULL, 734 }; 735 736 return (linux_wait4(td, &wait4_args)); 737 } 738 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 739 740 int 741 linux_wait4(struct thread *td, struct linux_wait4_args *args) 742 { 743 struct proc *p; 744 int options, id, idtype; 745 746 if (args->options & ~(LINUX_WUNTRACED | LINUX_WNOHANG | 747 LINUX_WCONTINUED | __WCLONE | __WNOTHREAD | __WALL)) 748 return (EINVAL); 749 750 /* -INT_MIN is not defined. */ 751 if (args->pid == INT_MIN) 752 return (ESRCH); 753 754 options = 0; 755 linux_to_bsd_waitopts(args->options, &options); 756 757 /* 758 * For backward compatibility we implicitly add flags WEXITED 759 * and WTRAPPED here. 760 */ 761 options |= WEXITED | WTRAPPED; 762 763 if (args->pid == WAIT_ANY) { 764 idtype = P_ALL; 765 id = 0; 766 } else if (args->pid < 0) { 767 idtype = P_PGID; 768 id = (id_t)-args->pid; 769 } else if (args->pid == 0) { 770 idtype = P_PGID; 771 p = td->td_proc; 772 PROC_LOCK(p); 773 id = p->p_pgid; 774 PROC_UNLOCK(p); 775 } else { 776 idtype = P_PID; 777 id = (id_t)args->pid; 778 } 779 780 return (linux_common_wait(td, idtype, id, args->status, options, 781 args->rusage, NULL)); 782 } 783 784 int 785 linux_waitid(struct thread *td, struct linux_waitid_args *args) 786 { 787 idtype_t idtype; 788 int error, options; 789 struct proc *p; 790 pid_t id; 791 792 if (args->options & ~(LINUX_WNOHANG | LINUX_WNOWAIT | LINUX_WEXITED | 793 LINUX_WSTOPPED | LINUX_WCONTINUED | __WCLONE | __WNOTHREAD | __WALL)) 794 return (EINVAL); 795 796 options = 0; 797 linux_to_bsd_waitopts(args->options, &options); 798 799 id = args->id; 800 switch (args->idtype) { 801 case LINUX_P_ALL: 802 idtype = P_ALL; 803 break; 804 case LINUX_P_PID: 805 if (args->id <= 0) 806 return (EINVAL); 807 idtype = P_PID; 808 break; 809 case LINUX_P_PGID: 810 if (linux_kernver(td) >= LINUX_KERNVER(5,4,0) && args->id == 0) { 811 p = td->td_proc; 812 PROC_LOCK(p); 813 id = p->p_pgid; 814 PROC_UNLOCK(p); 815 } else if (args->id <= 0) 816 return (EINVAL); 817 idtype = P_PGID; 818 break; 819 case LINUX_P_PIDFD: 820 LINUX_RATELIMIT_MSG("unsupported waitid P_PIDFD idtype"); 821 return (ENOSYS); 822 default: 823 return (EINVAL); 824 } 825 826 error = linux_common_wait(td, idtype, id, NULL, options, 827 args->rusage, args->info); 828 td->td_retval[0] = 0; 829 830 return (error); 831 } 832 833 #ifdef LINUX_LEGACY_SYSCALLS 834 int 835 linux_mknod(struct thread *td, struct linux_mknod_args *args) 836 { 837 int error; 838 839 switch (args->mode & S_IFMT) { 840 case S_IFIFO: 841 case S_IFSOCK: 842 error = kern_mkfifoat(td, AT_FDCWD, args->path, UIO_USERSPACE, 843 args->mode); 844 break; 845 846 case S_IFCHR: 847 case S_IFBLK: 848 error = kern_mknodat(td, AT_FDCWD, args->path, UIO_USERSPACE, 849 args->mode, linux_decode_dev(args->dev)); 850 break; 851 852 case S_IFDIR: 853 error = EPERM; 854 break; 855 856 case 0: 857 args->mode |= S_IFREG; 858 /* FALLTHROUGH */ 859 case S_IFREG: 860 error = kern_openat(td, AT_FDCWD, args->path, UIO_USERSPACE, 861 O_WRONLY | O_CREAT | O_TRUNC, args->mode); 862 if (error == 0) 863 kern_close(td, td->td_retval[0]); 864 break; 865 866 default: 867 error = EINVAL; 868 break; 869 } 870 return (error); 871 } 872 #endif 873 874 int 875 linux_mknodat(struct thread *td, struct linux_mknodat_args *args) 876 { 877 int error, dfd; 878 879 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; 880 881 switch (args->mode & S_IFMT) { 882 case S_IFIFO: 883 case S_IFSOCK: 884 error = kern_mkfifoat(td, dfd, args->filename, UIO_USERSPACE, 885 args->mode); 886 break; 887 888 case S_IFCHR: 889 case S_IFBLK: 890 error = kern_mknodat(td, dfd, args->filename, UIO_USERSPACE, 891 args->mode, linux_decode_dev(args->dev)); 892 break; 893 894 case S_IFDIR: 895 error = EPERM; 896 break; 897 898 case 0: 899 args->mode |= S_IFREG; 900 /* FALLTHROUGH */ 901 case S_IFREG: 902 error = kern_openat(td, dfd, args->filename, UIO_USERSPACE, 903 O_WRONLY | O_CREAT | O_TRUNC, args->mode); 904 if (error == 0) 905 kern_close(td, td->td_retval[0]); 906 break; 907 908 default: 909 error = EINVAL; 910 break; 911 } 912 return (error); 913 } 914 915 /* 916 * UGH! This is just about the dumbest idea I've ever heard!! 917 */ 918 int 919 linux_personality(struct thread *td, struct linux_personality_args *args) 920 { 921 struct linux_pemuldata *pem; 922 struct proc *p = td->td_proc; 923 uint32_t old; 924 925 PROC_LOCK(p); 926 pem = pem_find(p); 927 old = pem->persona; 928 if (args->per != 0xffffffff) 929 pem->persona = args->per; 930 PROC_UNLOCK(p); 931 932 td->td_retval[0] = old; 933 return (0); 934 } 935 936 struct l_itimerval { 937 l_timeval it_interval; 938 l_timeval it_value; 939 }; 940 941 #define B2L_ITIMERVAL(bip, lip) \ 942 (bip)->it_interval.tv_sec = (lip)->it_interval.tv_sec; \ 943 (bip)->it_interval.tv_usec = (lip)->it_interval.tv_usec; \ 944 (bip)->it_value.tv_sec = (lip)->it_value.tv_sec; \ 945 (bip)->it_value.tv_usec = (lip)->it_value.tv_usec; 946 947 int 948 linux_setitimer(struct thread *td, struct linux_setitimer_args *uap) 949 { 950 int error; 951 struct l_itimerval ls; 952 struct itimerval aitv, oitv; 953 954 if (uap->itv == NULL) { 955 uap->itv = uap->oitv; 956 return (linux_getitimer(td, (struct linux_getitimer_args *)uap)); 957 } 958 959 error = copyin(uap->itv, &ls, sizeof(ls)); 960 if (error != 0) 961 return (error); 962 B2L_ITIMERVAL(&aitv, &ls); 963 error = kern_setitimer(td, uap->which, &aitv, &oitv); 964 if (error != 0 || uap->oitv == NULL) 965 return (error); 966 B2L_ITIMERVAL(&ls, &oitv); 967 968 return (copyout(&ls, uap->oitv, sizeof(ls))); 969 } 970 971 int 972 linux_getitimer(struct thread *td, struct linux_getitimer_args *uap) 973 { 974 int error; 975 struct l_itimerval ls; 976 struct itimerval aitv; 977 978 error = kern_getitimer(td, uap->which, &aitv); 979 if (error != 0) 980 return (error); 981 B2L_ITIMERVAL(&ls, &aitv); 982 return (copyout(&ls, uap->itv, sizeof(ls))); 983 } 984 985 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 986 int 987 linux_nice(struct thread *td, struct linux_nice_args *args) 988 { 989 990 return (kern_setpriority(td, PRIO_PROCESS, 0, args->inc)); 991 } 992 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 993 994 int 995 linux_setgroups(struct thread *td, struct linux_setgroups_args *args) 996 { 997 struct ucred *newcred, *oldcred; 998 l_gid_t *linux_gidset; 999 gid_t *bsd_gidset; 1000 int ngrp, error; 1001 struct proc *p; 1002 1003 ngrp = args->gidsetsize; 1004 if (ngrp < 0 || ngrp >= ngroups_max + 1) 1005 return (EINVAL); 1006 linux_gidset = malloc(ngrp * sizeof(*linux_gidset), M_LINUX, M_WAITOK); 1007 error = copyin(args->grouplist, linux_gidset, ngrp * sizeof(l_gid_t)); 1008 if (error) 1009 goto out; 1010 newcred = crget(); 1011 crextend(newcred, ngrp + 1); 1012 p = td->td_proc; 1013 PROC_LOCK(p); 1014 oldcred = p->p_ucred; 1015 crcopy(newcred, oldcred); 1016 1017 /* 1018 * cr_groups[0] holds egid. Setting the whole set from 1019 * the supplied set will cause egid to be changed too. 1020 * Keep cr_groups[0] unchanged to prevent that. 1021 */ 1022 1023 if ((error = priv_check_cred(oldcred, PRIV_CRED_SETGROUPS)) != 0) { 1024 PROC_UNLOCK(p); 1025 crfree(newcred); 1026 goto out; 1027 } 1028 1029 if (ngrp > 0) { 1030 newcred->cr_ngroups = ngrp + 1; 1031 1032 bsd_gidset = newcred->cr_groups; 1033 ngrp--; 1034 while (ngrp >= 0) { 1035 bsd_gidset[ngrp + 1] = linux_gidset[ngrp]; 1036 ngrp--; 1037 } 1038 } else 1039 newcred->cr_ngroups = 1; 1040 1041 setsugid(p); 1042 proc_set_cred(p, newcred); 1043 PROC_UNLOCK(p); 1044 crfree(oldcred); 1045 error = 0; 1046 out: 1047 free(linux_gidset, M_LINUX); 1048 return (error); 1049 } 1050 1051 int 1052 linux_getgroups(struct thread *td, struct linux_getgroups_args *args) 1053 { 1054 struct ucred *cred; 1055 l_gid_t *linux_gidset; 1056 gid_t *bsd_gidset; 1057 int bsd_gidsetsz, ngrp, error; 1058 1059 cred = td->td_ucred; 1060 bsd_gidset = cred->cr_groups; 1061 bsd_gidsetsz = cred->cr_ngroups - 1; 1062 1063 /* 1064 * cr_groups[0] holds egid. Returning the whole set 1065 * here will cause a duplicate. Exclude cr_groups[0] 1066 * to prevent that. 1067 */ 1068 1069 if ((ngrp = args->gidsetsize) == 0) { 1070 td->td_retval[0] = bsd_gidsetsz; 1071 return (0); 1072 } 1073 1074 if (ngrp < bsd_gidsetsz) 1075 return (EINVAL); 1076 1077 ngrp = 0; 1078 linux_gidset = malloc(bsd_gidsetsz * sizeof(*linux_gidset), 1079 M_LINUX, M_WAITOK); 1080 while (ngrp < bsd_gidsetsz) { 1081 linux_gidset[ngrp] = bsd_gidset[ngrp + 1]; 1082 ngrp++; 1083 } 1084 1085 error = copyout(linux_gidset, args->grouplist, ngrp * sizeof(l_gid_t)); 1086 free(linux_gidset, M_LINUX); 1087 if (error) 1088 return (error); 1089 1090 td->td_retval[0] = ngrp; 1091 return (0); 1092 } 1093 1094 static bool 1095 linux_get_dummy_limit(l_uint resource, struct rlimit *rlim) 1096 { 1097 1098 if (linux_dummy_rlimits == 0) 1099 return (false); 1100 1101 switch (resource) { 1102 case LINUX_RLIMIT_LOCKS: 1103 case LINUX_RLIMIT_SIGPENDING: 1104 case LINUX_RLIMIT_MSGQUEUE: 1105 case LINUX_RLIMIT_RTTIME: 1106 rlim->rlim_cur = LINUX_RLIM_INFINITY; 1107 rlim->rlim_max = LINUX_RLIM_INFINITY; 1108 return (true); 1109 case LINUX_RLIMIT_NICE: 1110 case LINUX_RLIMIT_RTPRIO: 1111 rlim->rlim_cur = 0; 1112 rlim->rlim_max = 0; 1113 return (true); 1114 default: 1115 return (false); 1116 } 1117 } 1118 1119 int 1120 linux_setrlimit(struct thread *td, struct linux_setrlimit_args *args) 1121 { 1122 struct rlimit bsd_rlim; 1123 struct l_rlimit rlim; 1124 u_int which; 1125 int error; 1126 1127 if (args->resource >= LINUX_RLIM_NLIMITS) 1128 return (EINVAL); 1129 1130 which = linux_to_bsd_resource[args->resource]; 1131 if (which == -1) 1132 return (EINVAL); 1133 1134 error = copyin(args->rlim, &rlim, sizeof(rlim)); 1135 if (error) 1136 return (error); 1137 1138 bsd_rlim.rlim_cur = (rlim_t)rlim.rlim_cur; 1139 bsd_rlim.rlim_max = (rlim_t)rlim.rlim_max; 1140 return (kern_setrlimit(td, which, &bsd_rlim)); 1141 } 1142 1143 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 1144 int 1145 linux_old_getrlimit(struct thread *td, struct linux_old_getrlimit_args *args) 1146 { 1147 struct l_rlimit rlim; 1148 struct rlimit bsd_rlim; 1149 u_int which; 1150 1151 if (linux_get_dummy_limit(args->resource, &bsd_rlim)) { 1152 rlim.rlim_cur = bsd_rlim.rlim_cur; 1153 rlim.rlim_max = bsd_rlim.rlim_max; 1154 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1155 } 1156 1157 if (args->resource >= LINUX_RLIM_NLIMITS) 1158 return (EINVAL); 1159 1160 which = linux_to_bsd_resource[args->resource]; 1161 if (which == -1) 1162 return (EINVAL); 1163 1164 lim_rlimit(td, which, &bsd_rlim); 1165 1166 #ifdef COMPAT_LINUX32 1167 rlim.rlim_cur = (unsigned int)bsd_rlim.rlim_cur; 1168 if (rlim.rlim_cur == UINT_MAX) 1169 rlim.rlim_cur = INT_MAX; 1170 rlim.rlim_max = (unsigned int)bsd_rlim.rlim_max; 1171 if (rlim.rlim_max == UINT_MAX) 1172 rlim.rlim_max = INT_MAX; 1173 #else 1174 rlim.rlim_cur = (unsigned long)bsd_rlim.rlim_cur; 1175 if (rlim.rlim_cur == ULONG_MAX) 1176 rlim.rlim_cur = LONG_MAX; 1177 rlim.rlim_max = (unsigned long)bsd_rlim.rlim_max; 1178 if (rlim.rlim_max == ULONG_MAX) 1179 rlim.rlim_max = LONG_MAX; 1180 #endif 1181 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1182 } 1183 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 1184 1185 int 1186 linux_getrlimit(struct thread *td, struct linux_getrlimit_args *args) 1187 { 1188 struct l_rlimit rlim; 1189 struct rlimit bsd_rlim; 1190 u_int which; 1191 1192 if (linux_get_dummy_limit(args->resource, &bsd_rlim)) { 1193 rlim.rlim_cur = bsd_rlim.rlim_cur; 1194 rlim.rlim_max = bsd_rlim.rlim_max; 1195 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1196 } 1197 1198 if (args->resource >= LINUX_RLIM_NLIMITS) 1199 return (EINVAL); 1200 1201 which = linux_to_bsd_resource[args->resource]; 1202 if (which == -1) 1203 return (EINVAL); 1204 1205 lim_rlimit(td, which, &bsd_rlim); 1206 1207 rlim.rlim_cur = (l_ulong)bsd_rlim.rlim_cur; 1208 rlim.rlim_max = (l_ulong)bsd_rlim.rlim_max; 1209 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1210 } 1211 1212 int 1213 linux_sched_setscheduler(struct thread *td, 1214 struct linux_sched_setscheduler_args *args) 1215 { 1216 struct sched_param sched_param; 1217 struct thread *tdt; 1218 int error, policy; 1219 1220 switch (args->policy) { 1221 case LINUX_SCHED_OTHER: 1222 policy = SCHED_OTHER; 1223 break; 1224 case LINUX_SCHED_FIFO: 1225 policy = SCHED_FIFO; 1226 break; 1227 case LINUX_SCHED_RR: 1228 policy = SCHED_RR; 1229 break; 1230 default: 1231 return (EINVAL); 1232 } 1233 1234 error = copyin(args->param, &sched_param, sizeof(sched_param)); 1235 if (error) 1236 return (error); 1237 1238 if (linux_map_sched_prio) { 1239 switch (policy) { 1240 case SCHED_OTHER: 1241 if (sched_param.sched_priority != 0) 1242 return (EINVAL); 1243 1244 sched_param.sched_priority = 1245 PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE; 1246 break; 1247 case SCHED_FIFO: 1248 case SCHED_RR: 1249 if (sched_param.sched_priority < 1 || 1250 sched_param.sched_priority >= LINUX_MAX_RT_PRIO) 1251 return (EINVAL); 1252 1253 /* 1254 * Map [1, LINUX_MAX_RT_PRIO - 1] to 1255 * [0, RTP_PRIO_MAX - RTP_PRIO_MIN] (rounding down). 1256 */ 1257 sched_param.sched_priority = 1258 (sched_param.sched_priority - 1) * 1259 (RTP_PRIO_MAX - RTP_PRIO_MIN + 1) / 1260 (LINUX_MAX_RT_PRIO - 1); 1261 break; 1262 } 1263 } 1264 1265 tdt = linux_tdfind(td, args->pid, -1); 1266 if (tdt == NULL) 1267 return (ESRCH); 1268 1269 error = kern_sched_setscheduler(td, tdt, policy, &sched_param); 1270 PROC_UNLOCK(tdt->td_proc); 1271 return (error); 1272 } 1273 1274 int 1275 linux_sched_getscheduler(struct thread *td, 1276 struct linux_sched_getscheduler_args *args) 1277 { 1278 struct thread *tdt; 1279 int error, policy; 1280 1281 tdt = linux_tdfind(td, args->pid, -1); 1282 if (tdt == NULL) 1283 return (ESRCH); 1284 1285 error = kern_sched_getscheduler(td, tdt, &policy); 1286 PROC_UNLOCK(tdt->td_proc); 1287 1288 switch (policy) { 1289 case SCHED_OTHER: 1290 td->td_retval[0] = LINUX_SCHED_OTHER; 1291 break; 1292 case SCHED_FIFO: 1293 td->td_retval[0] = LINUX_SCHED_FIFO; 1294 break; 1295 case SCHED_RR: 1296 td->td_retval[0] = LINUX_SCHED_RR; 1297 break; 1298 } 1299 return (error); 1300 } 1301 1302 int 1303 linux_sched_get_priority_max(struct thread *td, 1304 struct linux_sched_get_priority_max_args *args) 1305 { 1306 struct sched_get_priority_max_args bsd; 1307 1308 if (linux_map_sched_prio) { 1309 switch (args->policy) { 1310 case LINUX_SCHED_OTHER: 1311 td->td_retval[0] = 0; 1312 return (0); 1313 case LINUX_SCHED_FIFO: 1314 case LINUX_SCHED_RR: 1315 td->td_retval[0] = LINUX_MAX_RT_PRIO - 1; 1316 return (0); 1317 default: 1318 return (EINVAL); 1319 } 1320 } 1321 1322 switch (args->policy) { 1323 case LINUX_SCHED_OTHER: 1324 bsd.policy = SCHED_OTHER; 1325 break; 1326 case LINUX_SCHED_FIFO: 1327 bsd.policy = SCHED_FIFO; 1328 break; 1329 case LINUX_SCHED_RR: 1330 bsd.policy = SCHED_RR; 1331 break; 1332 default: 1333 return (EINVAL); 1334 } 1335 return (sys_sched_get_priority_max(td, &bsd)); 1336 } 1337 1338 int 1339 linux_sched_get_priority_min(struct thread *td, 1340 struct linux_sched_get_priority_min_args *args) 1341 { 1342 struct sched_get_priority_min_args bsd; 1343 1344 if (linux_map_sched_prio) { 1345 switch (args->policy) { 1346 case LINUX_SCHED_OTHER: 1347 td->td_retval[0] = 0; 1348 return (0); 1349 case LINUX_SCHED_FIFO: 1350 case LINUX_SCHED_RR: 1351 td->td_retval[0] = 1; 1352 return (0); 1353 default: 1354 return (EINVAL); 1355 } 1356 } 1357 1358 switch (args->policy) { 1359 case LINUX_SCHED_OTHER: 1360 bsd.policy = SCHED_OTHER; 1361 break; 1362 case LINUX_SCHED_FIFO: 1363 bsd.policy = SCHED_FIFO; 1364 break; 1365 case LINUX_SCHED_RR: 1366 bsd.policy = SCHED_RR; 1367 break; 1368 default: 1369 return (EINVAL); 1370 } 1371 return (sys_sched_get_priority_min(td, &bsd)); 1372 } 1373 1374 #define REBOOT_CAD_ON 0x89abcdef 1375 #define REBOOT_CAD_OFF 0 1376 #define REBOOT_HALT 0xcdef0123 1377 #define REBOOT_RESTART 0x01234567 1378 #define REBOOT_RESTART2 0xA1B2C3D4 1379 #define REBOOT_POWEROFF 0x4321FEDC 1380 #define REBOOT_MAGIC1 0xfee1dead 1381 #define REBOOT_MAGIC2 0x28121969 1382 #define REBOOT_MAGIC2A 0x05121996 1383 #define REBOOT_MAGIC2B 0x16041998 1384 1385 int 1386 linux_reboot(struct thread *td, struct linux_reboot_args *args) 1387 { 1388 struct reboot_args bsd_args; 1389 1390 if (args->magic1 != REBOOT_MAGIC1) 1391 return (EINVAL); 1392 1393 switch (args->magic2) { 1394 case REBOOT_MAGIC2: 1395 case REBOOT_MAGIC2A: 1396 case REBOOT_MAGIC2B: 1397 break; 1398 default: 1399 return (EINVAL); 1400 } 1401 1402 switch (args->cmd) { 1403 case REBOOT_CAD_ON: 1404 case REBOOT_CAD_OFF: 1405 return (priv_check(td, PRIV_REBOOT)); 1406 case REBOOT_HALT: 1407 bsd_args.opt = RB_HALT; 1408 break; 1409 case REBOOT_RESTART: 1410 case REBOOT_RESTART2: 1411 bsd_args.opt = 0; 1412 break; 1413 case REBOOT_POWEROFF: 1414 bsd_args.opt = RB_POWEROFF; 1415 break; 1416 default: 1417 return (EINVAL); 1418 } 1419 return (sys_reboot(td, &bsd_args)); 1420 } 1421 1422 int 1423 linux_getpid(struct thread *td, struct linux_getpid_args *args) 1424 { 1425 1426 td->td_retval[0] = td->td_proc->p_pid; 1427 1428 return (0); 1429 } 1430 1431 int 1432 linux_gettid(struct thread *td, struct linux_gettid_args *args) 1433 { 1434 struct linux_emuldata *em; 1435 1436 em = em_find(td); 1437 KASSERT(em != NULL, ("gettid: emuldata not found.\n")); 1438 1439 td->td_retval[0] = em->em_tid; 1440 1441 return (0); 1442 } 1443 1444 int 1445 linux_getppid(struct thread *td, struct linux_getppid_args *args) 1446 { 1447 1448 td->td_retval[0] = kern_getppid(td); 1449 return (0); 1450 } 1451 1452 int 1453 linux_getgid(struct thread *td, struct linux_getgid_args *args) 1454 { 1455 1456 td->td_retval[0] = td->td_ucred->cr_rgid; 1457 return (0); 1458 } 1459 1460 int 1461 linux_getuid(struct thread *td, struct linux_getuid_args *args) 1462 { 1463 1464 td->td_retval[0] = td->td_ucred->cr_ruid; 1465 return (0); 1466 } 1467 1468 int 1469 linux_getsid(struct thread *td, struct linux_getsid_args *args) 1470 { 1471 1472 return (kern_getsid(td, args->pid)); 1473 } 1474 1475 int 1476 linux_nosys(struct thread *td, struct nosys_args *ignore) 1477 { 1478 1479 return (ENOSYS); 1480 } 1481 1482 int 1483 linux_getpriority(struct thread *td, struct linux_getpriority_args *args) 1484 { 1485 int error; 1486 1487 error = kern_getpriority(td, args->which, args->who); 1488 td->td_retval[0] = 20 - td->td_retval[0]; 1489 return (error); 1490 } 1491 1492 int 1493 linux_sethostname(struct thread *td, struct linux_sethostname_args *args) 1494 { 1495 int name[2]; 1496 1497 name[0] = CTL_KERN; 1498 name[1] = KERN_HOSTNAME; 1499 return (userland_sysctl(td, name, 2, 0, 0, 0, args->hostname, 1500 args->len, 0, 0)); 1501 } 1502 1503 int 1504 linux_setdomainname(struct thread *td, struct linux_setdomainname_args *args) 1505 { 1506 int name[2]; 1507 1508 name[0] = CTL_KERN; 1509 name[1] = KERN_NISDOMAINNAME; 1510 return (userland_sysctl(td, name, 2, 0, 0, 0, args->name, 1511 args->len, 0, 0)); 1512 } 1513 1514 int 1515 linux_exit_group(struct thread *td, struct linux_exit_group_args *args) 1516 { 1517 1518 LINUX_CTR2(exit_group, "thread(%d) (%d)", td->td_tid, 1519 args->error_code); 1520 1521 /* 1522 * XXX: we should send a signal to the parent if 1523 * SIGNAL_EXIT_GROUP is set. We ignore that (temporarily?) 1524 * as it doesnt occur often. 1525 */ 1526 exit1(td, args->error_code, 0); 1527 /* NOTREACHED */ 1528 } 1529 1530 #define _LINUX_CAPABILITY_VERSION_1 0x19980330 1531 #define _LINUX_CAPABILITY_VERSION_2 0x20071026 1532 #define _LINUX_CAPABILITY_VERSION_3 0x20080522 1533 1534 struct l_user_cap_header { 1535 l_int version; 1536 l_int pid; 1537 }; 1538 1539 struct l_user_cap_data { 1540 l_int effective; 1541 l_int permitted; 1542 l_int inheritable; 1543 }; 1544 1545 int 1546 linux_capget(struct thread *td, struct linux_capget_args *uap) 1547 { 1548 struct l_user_cap_header luch; 1549 struct l_user_cap_data lucd[2]; 1550 int error, u32s; 1551 1552 if (uap->hdrp == NULL) 1553 return (EFAULT); 1554 1555 error = copyin(uap->hdrp, &luch, sizeof(luch)); 1556 if (error != 0) 1557 return (error); 1558 1559 switch (luch.version) { 1560 case _LINUX_CAPABILITY_VERSION_1: 1561 u32s = 1; 1562 break; 1563 case _LINUX_CAPABILITY_VERSION_2: 1564 case _LINUX_CAPABILITY_VERSION_3: 1565 u32s = 2; 1566 break; 1567 default: 1568 luch.version = _LINUX_CAPABILITY_VERSION_1; 1569 error = copyout(&luch, uap->hdrp, sizeof(luch)); 1570 if (error) 1571 return (error); 1572 return (EINVAL); 1573 } 1574 1575 if (luch.pid) 1576 return (EPERM); 1577 1578 if (uap->datap) { 1579 /* 1580 * The current implementation doesn't support setting 1581 * a capability (it's essentially a stub) so indicate 1582 * that no capabilities are currently set or available 1583 * to request. 1584 */ 1585 memset(&lucd, 0, u32s * sizeof(lucd[0])); 1586 error = copyout(&lucd, uap->datap, u32s * sizeof(lucd[0])); 1587 } 1588 1589 return (error); 1590 } 1591 1592 int 1593 linux_capset(struct thread *td, struct linux_capset_args *uap) 1594 { 1595 struct l_user_cap_header luch; 1596 struct l_user_cap_data lucd[2]; 1597 int error, i, u32s; 1598 1599 if (uap->hdrp == NULL || uap->datap == NULL) 1600 return (EFAULT); 1601 1602 error = copyin(uap->hdrp, &luch, sizeof(luch)); 1603 if (error != 0) 1604 return (error); 1605 1606 switch (luch.version) { 1607 case _LINUX_CAPABILITY_VERSION_1: 1608 u32s = 1; 1609 break; 1610 case _LINUX_CAPABILITY_VERSION_2: 1611 case _LINUX_CAPABILITY_VERSION_3: 1612 u32s = 2; 1613 break; 1614 default: 1615 luch.version = _LINUX_CAPABILITY_VERSION_1; 1616 error = copyout(&luch, uap->hdrp, sizeof(luch)); 1617 if (error) 1618 return (error); 1619 return (EINVAL); 1620 } 1621 1622 if (luch.pid) 1623 return (EPERM); 1624 1625 error = copyin(uap->datap, &lucd, u32s * sizeof(lucd[0])); 1626 if (error != 0) 1627 return (error); 1628 1629 /* We currently don't support setting any capabilities. */ 1630 for (i = 0; i < u32s; i++) { 1631 if (lucd[i].effective || lucd[i].permitted || 1632 lucd[i].inheritable) { 1633 linux_msg(td, 1634 "capset[%d] effective=0x%x, permitted=0x%x, " 1635 "inheritable=0x%x is not implemented", i, 1636 (int)lucd[i].effective, (int)lucd[i].permitted, 1637 (int)lucd[i].inheritable); 1638 return (EPERM); 1639 } 1640 } 1641 1642 return (0); 1643 } 1644 1645 int 1646 linux_prctl(struct thread *td, struct linux_prctl_args *args) 1647 { 1648 int error = 0, max_size, arg; 1649 struct proc *p = td->td_proc; 1650 char comm[LINUX_MAX_COMM_LEN]; 1651 int pdeath_signal, trace_state; 1652 1653 switch (args->option) { 1654 case LINUX_PR_SET_PDEATHSIG: 1655 if (!LINUX_SIG_VALID(args->arg2)) 1656 return (EINVAL); 1657 pdeath_signal = linux_to_bsd_signal(args->arg2); 1658 return (kern_procctl(td, P_PID, 0, PROC_PDEATHSIG_CTL, 1659 &pdeath_signal)); 1660 case LINUX_PR_GET_PDEATHSIG: 1661 error = kern_procctl(td, P_PID, 0, PROC_PDEATHSIG_STATUS, 1662 &pdeath_signal); 1663 if (error != 0) 1664 return (error); 1665 pdeath_signal = bsd_to_linux_signal(pdeath_signal); 1666 return (copyout(&pdeath_signal, 1667 (void *)(register_t)args->arg2, 1668 sizeof(pdeath_signal))); 1669 /* 1670 * In Linux, this flag controls if set[gu]id processes can coredump. 1671 * There are additional semantics imposed on processes that cannot 1672 * coredump: 1673 * - Such processes can not be ptraced. 1674 * - There are some semantics around ownership of process-related files 1675 * in the /proc namespace. 1676 * 1677 * In FreeBSD, we can (and by default, do) disable setuid coredump 1678 * system-wide with 'sugid_coredump.' We control tracability on a 1679 * per-process basis with the procctl PROC_TRACE (=> P2_NOTRACE flag). 1680 * By happy coincidence, P2_NOTRACE also prevents coredumping. So the 1681 * procctl is roughly analogous to Linux's DUMPABLE. 1682 * 1683 * So, proxy these knobs to the corresponding PROC_TRACE setting. 1684 */ 1685 case LINUX_PR_GET_DUMPABLE: 1686 error = kern_procctl(td, P_PID, p->p_pid, PROC_TRACE_STATUS, 1687 &trace_state); 1688 if (error != 0) 1689 return (error); 1690 td->td_retval[0] = (trace_state != -1); 1691 return (0); 1692 case LINUX_PR_SET_DUMPABLE: 1693 /* 1694 * It is only valid for userspace to set one of these two 1695 * flags, and only one at a time. 1696 */ 1697 switch (args->arg2) { 1698 case LINUX_SUID_DUMP_DISABLE: 1699 trace_state = PROC_TRACE_CTL_DISABLE_EXEC; 1700 break; 1701 case LINUX_SUID_DUMP_USER: 1702 trace_state = PROC_TRACE_CTL_ENABLE; 1703 break; 1704 default: 1705 return (EINVAL); 1706 } 1707 return (kern_procctl(td, P_PID, p->p_pid, PROC_TRACE_CTL, 1708 &trace_state)); 1709 case LINUX_PR_GET_KEEPCAPS: 1710 /* 1711 * Indicate that we always clear the effective and 1712 * permitted capability sets when the user id becomes 1713 * non-zero (actually the capability sets are simply 1714 * always zero in the current implementation). 1715 */ 1716 td->td_retval[0] = 0; 1717 break; 1718 case LINUX_PR_SET_KEEPCAPS: 1719 /* 1720 * Ignore requests to keep the effective and permitted 1721 * capability sets when the user id becomes non-zero. 1722 */ 1723 break; 1724 case LINUX_PR_SET_NAME: 1725 /* 1726 * To be on the safe side we need to make sure to not 1727 * overflow the size a Linux program expects. We already 1728 * do this here in the copyin, so that we don't need to 1729 * check on copyout. 1730 */ 1731 max_size = MIN(sizeof(comm), sizeof(p->p_comm)); 1732 error = copyinstr((void *)(register_t)args->arg2, comm, 1733 max_size, NULL); 1734 1735 /* Linux silently truncates the name if it is too long. */ 1736 if (error == ENAMETOOLONG) { 1737 /* 1738 * XXX: copyinstr() isn't documented to populate the 1739 * array completely, so do a copyin() to be on the 1740 * safe side. This should be changed in case 1741 * copyinstr() is changed to guarantee this. 1742 */ 1743 error = copyin((void *)(register_t)args->arg2, comm, 1744 max_size - 1); 1745 comm[max_size - 1] = '\0'; 1746 } 1747 if (error) 1748 return (error); 1749 1750 PROC_LOCK(p); 1751 strlcpy(p->p_comm, comm, sizeof(p->p_comm)); 1752 PROC_UNLOCK(p); 1753 break; 1754 case LINUX_PR_GET_NAME: 1755 PROC_LOCK(p); 1756 strlcpy(comm, p->p_comm, sizeof(comm)); 1757 PROC_UNLOCK(p); 1758 error = copyout(comm, (void *)(register_t)args->arg2, 1759 strlen(comm) + 1); 1760 break; 1761 case LINUX_PR_GET_SECCOMP: 1762 case LINUX_PR_SET_SECCOMP: 1763 /* 1764 * Same as returned by Linux without CONFIG_SECCOMP enabled. 1765 */ 1766 error = EINVAL; 1767 break; 1768 case LINUX_PR_CAPBSET_READ: 1769 #if 0 1770 /* 1771 * This makes too much noise with Ubuntu Focal. 1772 */ 1773 linux_msg(td, "unsupported prctl PR_CAPBSET_READ %d", 1774 (int)args->arg2); 1775 #endif 1776 error = EINVAL; 1777 break; 1778 case LINUX_PR_SET_NO_NEW_PRIVS: 1779 arg = args->arg2 == 1 ? 1780 PROC_NO_NEW_PRIVS_ENABLE : PROC_NO_NEW_PRIVS_DISABLE; 1781 error = kern_procctl(td, P_PID, p->p_pid, 1782 PROC_NO_NEW_PRIVS_CTL, &arg); 1783 break; 1784 case LINUX_PR_SET_PTRACER: 1785 linux_msg(td, "unsupported prctl PR_SET_PTRACER"); 1786 error = EINVAL; 1787 break; 1788 default: 1789 linux_msg(td, "unsupported prctl option %d", args->option); 1790 error = EINVAL; 1791 break; 1792 } 1793 1794 return (error); 1795 } 1796 1797 int 1798 linux_sched_setparam(struct thread *td, 1799 struct linux_sched_setparam_args *uap) 1800 { 1801 struct sched_param sched_param; 1802 struct thread *tdt; 1803 int error, policy; 1804 1805 error = copyin(uap->param, &sched_param, sizeof(sched_param)); 1806 if (error) 1807 return (error); 1808 1809 tdt = linux_tdfind(td, uap->pid, -1); 1810 if (tdt == NULL) 1811 return (ESRCH); 1812 1813 if (linux_map_sched_prio) { 1814 error = kern_sched_getscheduler(td, tdt, &policy); 1815 if (error) 1816 goto out; 1817 1818 switch (policy) { 1819 case SCHED_OTHER: 1820 if (sched_param.sched_priority != 0) { 1821 error = EINVAL; 1822 goto out; 1823 } 1824 sched_param.sched_priority = 1825 PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE; 1826 break; 1827 case SCHED_FIFO: 1828 case SCHED_RR: 1829 if (sched_param.sched_priority < 1 || 1830 sched_param.sched_priority >= LINUX_MAX_RT_PRIO) { 1831 error = EINVAL; 1832 goto out; 1833 } 1834 /* 1835 * Map [1, LINUX_MAX_RT_PRIO - 1] to 1836 * [0, RTP_PRIO_MAX - RTP_PRIO_MIN] (rounding down). 1837 */ 1838 sched_param.sched_priority = 1839 (sched_param.sched_priority - 1) * 1840 (RTP_PRIO_MAX - RTP_PRIO_MIN + 1) / 1841 (LINUX_MAX_RT_PRIO - 1); 1842 break; 1843 } 1844 } 1845 1846 error = kern_sched_setparam(td, tdt, &sched_param); 1847 out: PROC_UNLOCK(tdt->td_proc); 1848 return (error); 1849 } 1850 1851 int 1852 linux_sched_getparam(struct thread *td, 1853 struct linux_sched_getparam_args *uap) 1854 { 1855 struct sched_param sched_param; 1856 struct thread *tdt; 1857 int error, policy; 1858 1859 tdt = linux_tdfind(td, uap->pid, -1); 1860 if (tdt == NULL) 1861 return (ESRCH); 1862 1863 error = kern_sched_getparam(td, tdt, &sched_param); 1864 if (error) { 1865 PROC_UNLOCK(tdt->td_proc); 1866 return (error); 1867 } 1868 1869 if (linux_map_sched_prio) { 1870 error = kern_sched_getscheduler(td, tdt, &policy); 1871 PROC_UNLOCK(tdt->td_proc); 1872 if (error) 1873 return (error); 1874 1875 switch (policy) { 1876 case SCHED_OTHER: 1877 sched_param.sched_priority = 0; 1878 break; 1879 case SCHED_FIFO: 1880 case SCHED_RR: 1881 /* 1882 * Map [0, RTP_PRIO_MAX - RTP_PRIO_MIN] to 1883 * [1, LINUX_MAX_RT_PRIO - 1] (rounding up). 1884 */ 1885 sched_param.sched_priority = 1886 (sched_param.sched_priority * 1887 (LINUX_MAX_RT_PRIO - 1) + 1888 (RTP_PRIO_MAX - RTP_PRIO_MIN - 1)) / 1889 (RTP_PRIO_MAX - RTP_PRIO_MIN) + 1; 1890 break; 1891 } 1892 } else 1893 PROC_UNLOCK(tdt->td_proc); 1894 1895 error = copyout(&sched_param, uap->param, sizeof(sched_param)); 1896 return (error); 1897 } 1898 1899 /* 1900 * Get affinity of a process. 1901 */ 1902 int 1903 linux_sched_getaffinity(struct thread *td, 1904 struct linux_sched_getaffinity_args *args) 1905 { 1906 struct thread *tdt; 1907 cpuset_t *mask; 1908 size_t size; 1909 int error; 1910 id_t tid; 1911 1912 tdt = linux_tdfind(td, args->pid, -1); 1913 if (tdt == NULL) 1914 return (ESRCH); 1915 tid = tdt->td_tid; 1916 PROC_UNLOCK(tdt->td_proc); 1917 1918 mask = malloc(sizeof(cpuset_t), M_LINUX, M_WAITOK | M_ZERO); 1919 size = min(args->len, sizeof(cpuset_t)); 1920 error = kern_cpuset_getaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID, 1921 tid, size, mask); 1922 if (error == ERANGE) 1923 error = EINVAL; 1924 if (error == 0) 1925 error = copyout(mask, args->user_mask_ptr, size); 1926 if (error == 0) 1927 td->td_retval[0] = size; 1928 free(mask, M_LINUX); 1929 return (error); 1930 } 1931 1932 /* 1933 * Set affinity of a process. 1934 */ 1935 int 1936 linux_sched_setaffinity(struct thread *td, 1937 struct linux_sched_setaffinity_args *args) 1938 { 1939 struct thread *tdt; 1940 cpuset_t *mask; 1941 int cpu, error; 1942 size_t len; 1943 id_t tid; 1944 1945 tdt = linux_tdfind(td, args->pid, -1); 1946 if (tdt == NULL) 1947 return (ESRCH); 1948 tid = tdt->td_tid; 1949 PROC_UNLOCK(tdt->td_proc); 1950 1951 len = min(args->len, sizeof(cpuset_t)); 1952 mask = malloc(sizeof(cpuset_t), M_TEMP, M_WAITOK | M_ZERO);; 1953 error = copyin(args->user_mask_ptr, mask, len); 1954 if (error != 0) 1955 goto out; 1956 /* Linux ignore high bits */ 1957 CPU_FOREACH_ISSET(cpu, mask) 1958 if (cpu > mp_maxid) 1959 CPU_CLR(cpu, mask); 1960 1961 error = kern_cpuset_setaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID, 1962 tid, mask); 1963 if (error == EDEADLK) 1964 error = EINVAL; 1965 out: 1966 free(mask, M_TEMP); 1967 return (error); 1968 } 1969 1970 struct linux_rlimit64 { 1971 uint64_t rlim_cur; 1972 uint64_t rlim_max; 1973 }; 1974 1975 int 1976 linux_prlimit64(struct thread *td, struct linux_prlimit64_args *args) 1977 { 1978 struct rlimit rlim, nrlim; 1979 struct linux_rlimit64 lrlim; 1980 struct proc *p; 1981 u_int which; 1982 int flags; 1983 int error; 1984 1985 if (args->new == NULL && args->old != NULL) { 1986 if (linux_get_dummy_limit(args->resource, &rlim)) { 1987 lrlim.rlim_cur = rlim.rlim_cur; 1988 lrlim.rlim_max = rlim.rlim_max; 1989 return (copyout(&lrlim, args->old, sizeof(lrlim))); 1990 } 1991 } 1992 1993 if (args->resource >= LINUX_RLIM_NLIMITS) 1994 return (EINVAL); 1995 1996 which = linux_to_bsd_resource[args->resource]; 1997 if (which == -1) 1998 return (EINVAL); 1999 2000 if (args->new != NULL) { 2001 /* 2002 * Note. Unlike FreeBSD where rlim is signed 64-bit Linux 2003 * rlim is unsigned 64-bit. FreeBSD treats negative limits 2004 * as INFINITY so we do not need a conversion even. 2005 */ 2006 error = copyin(args->new, &nrlim, sizeof(nrlim)); 2007 if (error != 0) 2008 return (error); 2009 } 2010 2011 flags = PGET_HOLD | PGET_NOTWEXIT; 2012 if (args->new != NULL) 2013 flags |= PGET_CANDEBUG; 2014 else 2015 flags |= PGET_CANSEE; 2016 if (args->pid == 0) { 2017 p = td->td_proc; 2018 PHOLD(p); 2019 } else { 2020 error = pget(args->pid, flags, &p); 2021 if (error != 0) 2022 return (error); 2023 } 2024 if (args->old != NULL) { 2025 PROC_LOCK(p); 2026 lim_rlimit_proc(p, which, &rlim); 2027 PROC_UNLOCK(p); 2028 if (rlim.rlim_cur == RLIM_INFINITY) 2029 lrlim.rlim_cur = LINUX_RLIM_INFINITY; 2030 else 2031 lrlim.rlim_cur = rlim.rlim_cur; 2032 if (rlim.rlim_max == RLIM_INFINITY) 2033 lrlim.rlim_max = LINUX_RLIM_INFINITY; 2034 else 2035 lrlim.rlim_max = rlim.rlim_max; 2036 error = copyout(&lrlim, args->old, sizeof(lrlim)); 2037 if (error != 0) 2038 goto out; 2039 } 2040 2041 if (args->new != NULL) 2042 error = kern_proc_setrlimit(td, p, which, &nrlim); 2043 2044 out: 2045 PRELE(p); 2046 return (error); 2047 } 2048 2049 int 2050 linux_pselect6(struct thread *td, struct linux_pselect6_args *args) 2051 { 2052 struct timespec ts, *tsp; 2053 int error; 2054 2055 if (args->tsp != NULL) { 2056 error = linux_get_timespec(&ts, args->tsp); 2057 if (error != 0) 2058 return (error); 2059 tsp = &ts; 2060 } else 2061 tsp = NULL; 2062 2063 error = linux_common_pselect6(td, args->nfds, args->readfds, 2064 args->writefds, args->exceptfds, tsp, args->sig); 2065 2066 if (args->tsp != NULL) 2067 linux_put_timespec(&ts, args->tsp); 2068 return (error); 2069 } 2070 2071 static int 2072 linux_common_pselect6(struct thread *td, l_int nfds, l_fd_set *readfds, 2073 l_fd_set *writefds, l_fd_set *exceptfds, struct timespec *tsp, 2074 l_uintptr_t *sig) 2075 { 2076 struct timeval utv, tv0, tv1, *tvp; 2077 struct l_pselect6arg lpse6; 2078 sigset_t *ssp; 2079 sigset_t ss; 2080 int error; 2081 2082 ssp = NULL; 2083 if (sig != NULL) { 2084 error = copyin(sig, &lpse6, sizeof(lpse6)); 2085 if (error != 0) 2086 return (error); 2087 error = linux_copyin_sigset(td, PTRIN(lpse6.ss), 2088 lpse6.ss_len, &ss, &ssp); 2089 if (error != 0) 2090 return (error); 2091 } else 2092 ssp = NULL; 2093 2094 /* 2095 * Currently glibc changes nanosecond number to microsecond. 2096 * This mean losing precision but for now it is hardly seen. 2097 */ 2098 if (tsp != NULL) { 2099 TIMESPEC_TO_TIMEVAL(&utv, tsp); 2100 if (itimerfix(&utv)) 2101 return (EINVAL); 2102 2103 microtime(&tv0); 2104 tvp = &utv; 2105 } else 2106 tvp = NULL; 2107 2108 error = kern_pselect(td, nfds, readfds, writefds, 2109 exceptfds, tvp, ssp, LINUX_NFDBITS); 2110 2111 if (tsp != NULL) { 2112 /* 2113 * Compute how much time was left of the timeout, 2114 * by subtracting the current time and the time 2115 * before we started the call, and subtracting 2116 * that result from the user-supplied value. 2117 */ 2118 microtime(&tv1); 2119 timevalsub(&tv1, &tv0); 2120 timevalsub(&utv, &tv1); 2121 if (utv.tv_sec < 0) 2122 timevalclear(&utv); 2123 TIMEVAL_TO_TIMESPEC(&utv, tsp); 2124 } 2125 return (error); 2126 } 2127 2128 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 2129 int 2130 linux_pselect6_time64(struct thread *td, 2131 struct linux_pselect6_time64_args *args) 2132 { 2133 struct timespec ts, *tsp; 2134 int error; 2135 2136 if (args->tsp != NULL) { 2137 error = linux_get_timespec64(&ts, args->tsp); 2138 if (error != 0) 2139 return (error); 2140 tsp = &ts; 2141 } else 2142 tsp = NULL; 2143 2144 error = linux_common_pselect6(td, args->nfds, args->readfds, 2145 args->writefds, args->exceptfds, tsp, args->sig); 2146 2147 if (args->tsp != NULL) 2148 linux_put_timespec64(&ts, args->tsp); 2149 return (error); 2150 } 2151 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 2152 2153 int 2154 linux_ppoll(struct thread *td, struct linux_ppoll_args *args) 2155 { 2156 struct timespec uts, *tsp; 2157 int error; 2158 2159 if (args->tsp != NULL) { 2160 error = linux_get_timespec(&uts, args->tsp); 2161 if (error != 0) 2162 return (error); 2163 tsp = &uts; 2164 } else 2165 tsp = NULL; 2166 2167 error = linux_common_ppoll(td, args->fds, args->nfds, tsp, 2168 args->sset, args->ssize); 2169 if (error == 0 && args->tsp != NULL) 2170 error = linux_put_timespec(&uts, args->tsp); 2171 return (error); 2172 } 2173 2174 static int 2175 linux_common_ppoll(struct thread *td, struct pollfd *fds, uint32_t nfds, 2176 struct timespec *tsp, l_sigset_t *sset, l_size_t ssize) 2177 { 2178 struct timespec ts0, ts1; 2179 struct pollfd stackfds[32]; 2180 struct pollfd *kfds; 2181 sigset_t *ssp; 2182 sigset_t ss; 2183 int error; 2184 2185 if (kern_poll_maxfds(nfds)) 2186 return (EINVAL); 2187 if (sset != NULL) { 2188 error = linux_copyin_sigset(td, sset, ssize, &ss, &ssp); 2189 if (error != 0) 2190 return (error); 2191 } else 2192 ssp = NULL; 2193 if (tsp != NULL) 2194 nanotime(&ts0); 2195 2196 if (nfds > nitems(stackfds)) 2197 kfds = mallocarray(nfds, sizeof(*kfds), M_TEMP, M_WAITOK); 2198 else 2199 kfds = stackfds; 2200 error = linux_pollin(td, kfds, fds, nfds); 2201 if (error != 0) 2202 goto out; 2203 2204 error = kern_poll_kfds(td, kfds, nfds, tsp, ssp); 2205 if (error == 0) 2206 error = linux_pollout(td, kfds, fds, nfds); 2207 2208 if (error == 0 && tsp != NULL) { 2209 if (td->td_retval[0]) { 2210 nanotime(&ts1); 2211 timespecsub(&ts1, &ts0, &ts1); 2212 timespecsub(tsp, &ts1, tsp); 2213 if (tsp->tv_sec < 0) 2214 timespecclear(tsp); 2215 } else 2216 timespecclear(tsp); 2217 } 2218 2219 out: 2220 if (nfds > nitems(stackfds)) 2221 free(kfds, M_TEMP); 2222 return (error); 2223 } 2224 2225 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 2226 int 2227 linux_ppoll_time64(struct thread *td, struct linux_ppoll_time64_args *args) 2228 { 2229 struct timespec uts, *tsp; 2230 int error; 2231 2232 if (args->tsp != NULL) { 2233 error = linux_get_timespec64(&uts, args->tsp); 2234 if (error != 0) 2235 return (error); 2236 tsp = &uts; 2237 } else 2238 tsp = NULL; 2239 error = linux_common_ppoll(td, args->fds, args->nfds, tsp, 2240 args->sset, args->ssize); 2241 if (error == 0 && args->tsp != NULL) 2242 error = linux_put_timespec64(&uts, args->tsp); 2243 return (error); 2244 } 2245 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 2246 2247 static int 2248 linux_pollin(struct thread *td, struct pollfd *fds, struct pollfd *ufds, u_int nfd) 2249 { 2250 int error; 2251 u_int i; 2252 2253 error = copyin(ufds, fds, nfd * sizeof(*fds)); 2254 if (error != 0) 2255 return (error); 2256 2257 for (i = 0; i < nfd; i++) { 2258 if (fds->events != 0) 2259 linux_to_bsd_poll_events(td, fds->fd, 2260 fds->events, &fds->events); 2261 fds++; 2262 } 2263 return (0); 2264 } 2265 2266 static int 2267 linux_pollout(struct thread *td, struct pollfd *fds, struct pollfd *ufds, u_int nfd) 2268 { 2269 int error = 0; 2270 u_int i, n = 0; 2271 2272 for (i = 0; i < nfd; i++) { 2273 if (fds->revents != 0) { 2274 bsd_to_linux_poll_events(fds->revents, 2275 &fds->revents); 2276 n++; 2277 } 2278 error = copyout(&fds->revents, &ufds->revents, 2279 sizeof(ufds->revents)); 2280 if (error) 2281 return (error); 2282 fds++; 2283 ufds++; 2284 } 2285 td->td_retval[0] = n; 2286 return (0); 2287 } 2288 2289 static int 2290 linux_sched_rr_get_interval_common(struct thread *td, pid_t pid, 2291 struct timespec *ts) 2292 { 2293 struct thread *tdt; 2294 int error; 2295 2296 /* 2297 * According to man in case the invalid pid specified 2298 * EINVAL should be returned. 2299 */ 2300 if (pid < 0) 2301 return (EINVAL); 2302 2303 tdt = linux_tdfind(td, pid, -1); 2304 if (tdt == NULL) 2305 return (ESRCH); 2306 2307 error = kern_sched_rr_get_interval_td(td, tdt, ts); 2308 PROC_UNLOCK(tdt->td_proc); 2309 return (error); 2310 } 2311 2312 int 2313 linux_sched_rr_get_interval(struct thread *td, 2314 struct linux_sched_rr_get_interval_args *uap) 2315 { 2316 struct timespec ts; 2317 int error; 2318 2319 error = linux_sched_rr_get_interval_common(td, uap->pid, &ts); 2320 if (error != 0) 2321 return (error); 2322 return (linux_put_timespec(&ts, uap->interval)); 2323 } 2324 2325 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 2326 int 2327 linux_sched_rr_get_interval_time64(struct thread *td, 2328 struct linux_sched_rr_get_interval_time64_args *uap) 2329 { 2330 struct timespec ts; 2331 int error; 2332 2333 error = linux_sched_rr_get_interval_common(td, uap->pid, &ts); 2334 if (error != 0) 2335 return (error); 2336 return (linux_put_timespec64(&ts, uap->interval)); 2337 } 2338 #endif 2339 2340 /* 2341 * In case when the Linux thread is the initial thread in 2342 * the thread group thread id is equal to the process id. 2343 * Glibc depends on this magic (assert in pthread_getattr_np.c). 2344 */ 2345 struct thread * 2346 linux_tdfind(struct thread *td, lwpid_t tid, pid_t pid) 2347 { 2348 struct linux_emuldata *em; 2349 struct thread *tdt; 2350 struct proc *p; 2351 2352 tdt = NULL; 2353 if (tid == 0 || tid == td->td_tid) { 2354 if (pid != -1 && td->td_proc->p_pid != pid) 2355 return (NULL); 2356 PROC_LOCK(td->td_proc); 2357 return (td); 2358 } else if (tid > PID_MAX) 2359 return (tdfind(tid, pid)); 2360 2361 /* 2362 * Initial thread where the tid equal to the pid. 2363 */ 2364 p = pfind(tid); 2365 if (p != NULL) { 2366 if (SV_PROC_ABI(p) != SV_ABI_LINUX || 2367 (pid != -1 && tid != pid)) { 2368 /* 2369 * p is not a Linuxulator process. 2370 */ 2371 PROC_UNLOCK(p); 2372 return (NULL); 2373 } 2374 FOREACH_THREAD_IN_PROC(p, tdt) { 2375 em = em_find(tdt); 2376 if (tid == em->em_tid) 2377 return (tdt); 2378 } 2379 PROC_UNLOCK(p); 2380 } 2381 return (NULL); 2382 } 2383 2384 void 2385 linux_to_bsd_waitopts(int options, int *bsdopts) 2386 { 2387 2388 if (options & LINUX_WNOHANG) 2389 *bsdopts |= WNOHANG; 2390 if (options & LINUX_WUNTRACED) 2391 *bsdopts |= WUNTRACED; 2392 if (options & LINUX_WEXITED) 2393 *bsdopts |= WEXITED; 2394 if (options & LINUX_WCONTINUED) 2395 *bsdopts |= WCONTINUED; 2396 if (options & LINUX_WNOWAIT) 2397 *bsdopts |= WNOWAIT; 2398 2399 if (options & __WCLONE) 2400 *bsdopts |= WLINUXCLONE; 2401 } 2402 2403 int 2404 linux_getrandom(struct thread *td, struct linux_getrandom_args *args) 2405 { 2406 struct uio uio; 2407 struct iovec iov; 2408 int error; 2409 2410 if (args->flags & ~(LINUX_GRND_NONBLOCK|LINUX_GRND_RANDOM)) 2411 return (EINVAL); 2412 if (args->count > INT_MAX) 2413 args->count = INT_MAX; 2414 2415 iov.iov_base = args->buf; 2416 iov.iov_len = args->count; 2417 2418 uio.uio_iov = &iov; 2419 uio.uio_iovcnt = 1; 2420 uio.uio_resid = iov.iov_len; 2421 uio.uio_segflg = UIO_USERSPACE; 2422 uio.uio_rw = UIO_READ; 2423 uio.uio_td = td; 2424 2425 error = read_random_uio(&uio, args->flags & LINUX_GRND_NONBLOCK); 2426 if (error == 0) 2427 td->td_retval[0] = args->count - uio.uio_resid; 2428 return (error); 2429 } 2430 2431 int 2432 linux_mincore(struct thread *td, struct linux_mincore_args *args) 2433 { 2434 2435 /* Needs to be page-aligned */ 2436 if (args->start & PAGE_MASK) 2437 return (EINVAL); 2438 return (kern_mincore(td, args->start, args->len, args->vec)); 2439 } 2440 2441 #define SYSLOG_TAG "<6>" 2442 2443 int 2444 linux_syslog(struct thread *td, struct linux_syslog_args *args) 2445 { 2446 char buf[128], *src, *dst; 2447 u_int seq; 2448 int buflen, error; 2449 2450 if (args->type != LINUX_SYSLOG_ACTION_READ_ALL) { 2451 linux_msg(td, "syslog unsupported type 0x%x", args->type); 2452 return (EINVAL); 2453 } 2454 2455 if (args->len < 6) { 2456 td->td_retval[0] = 0; 2457 return (0); 2458 } 2459 2460 error = priv_check(td, PRIV_MSGBUF); 2461 if (error) 2462 return (error); 2463 2464 mtx_lock(&msgbuf_lock); 2465 msgbuf_peekbytes(msgbufp, NULL, 0, &seq); 2466 mtx_unlock(&msgbuf_lock); 2467 2468 dst = args->buf; 2469 error = copyout(&SYSLOG_TAG, dst, sizeof(SYSLOG_TAG)); 2470 /* The -1 is to skip the trailing '\0'. */ 2471 dst += sizeof(SYSLOG_TAG) - 1; 2472 2473 while (error == 0) { 2474 mtx_lock(&msgbuf_lock); 2475 buflen = msgbuf_peekbytes(msgbufp, buf, sizeof(buf), &seq); 2476 mtx_unlock(&msgbuf_lock); 2477 2478 if (buflen == 0) 2479 break; 2480 2481 for (src = buf; src < buf + buflen && error == 0; src++) { 2482 if (*src == '\0') 2483 continue; 2484 2485 if (dst >= args->buf + args->len) 2486 goto out; 2487 2488 error = copyout(src, dst, 1); 2489 dst++; 2490 2491 if (*src == '\n' && *(src + 1) != '<' && 2492 dst + sizeof(SYSLOG_TAG) < args->buf + args->len) { 2493 error = copyout(&SYSLOG_TAG, 2494 dst, sizeof(SYSLOG_TAG)); 2495 dst += sizeof(SYSLOG_TAG) - 1; 2496 } 2497 } 2498 } 2499 out: 2500 td->td_retval[0] = dst - args->buf; 2501 return (error); 2502 } 2503 2504 int 2505 linux_getcpu(struct thread *td, struct linux_getcpu_args *args) 2506 { 2507 int cpu, error, node; 2508 2509 cpu = td->td_oncpu; /* Make sure it doesn't change during copyout(9) */ 2510 error = 0; 2511 node = cpuid_to_pcpu[cpu]->pc_domain; 2512 2513 if (args->cpu != NULL) 2514 error = copyout(&cpu, args->cpu, sizeof(l_int)); 2515 if (args->node != NULL) 2516 error = copyout(&node, args->node, sizeof(l_int)); 2517 return (error); 2518 } 2519 2520 #if defined(__i386__) || defined(__amd64__) 2521 int 2522 linux_poll(struct thread *td, struct linux_poll_args *args) 2523 { 2524 struct timespec ts, *tsp; 2525 2526 if (args->timeout != INFTIM) { 2527 if (args->timeout < 0) 2528 return (EINVAL); 2529 ts.tv_sec = args->timeout / 1000; 2530 ts.tv_nsec = (args->timeout % 1000) * 1000000; 2531 tsp = &ts; 2532 } else 2533 tsp = NULL; 2534 2535 return (linux_common_ppoll(td, args->fds, args->nfds, 2536 tsp, NULL, 0)); 2537 } 2538 #endif /* __i386__ || __amd64__ */ 2539 2540 int 2541 linux_seccomp(struct thread *td, struct linux_seccomp_args *args) 2542 { 2543 2544 switch (args->op) { 2545 case LINUX_SECCOMP_GET_ACTION_AVAIL: 2546 return (EOPNOTSUPP); 2547 default: 2548 /* 2549 * Ignore unknown operations, just like Linux kernel built 2550 * without CONFIG_SECCOMP. 2551 */ 2552 return (EINVAL); 2553 } 2554 } 2555 2556 /* 2557 * Custom version of exec_copyin_args(), to copy out argument and environment 2558 * strings from the old process address space into the temporary string buffer. 2559 * Based on freebsd32_exec_copyin_args. 2560 */ 2561 static int 2562 linux_exec_copyin_args(struct image_args *args, const char *fname, 2563 enum uio_seg segflg, l_uintptr_t *argv, l_uintptr_t *envv) 2564 { 2565 char *argp, *envp; 2566 l_uintptr_t *ptr, arg; 2567 int error; 2568 2569 bzero(args, sizeof(*args)); 2570 if (argv == NULL) 2571 return (EFAULT); 2572 2573 /* 2574 * Allocate demand-paged memory for the file name, argument, and 2575 * environment strings. 2576 */ 2577 error = exec_alloc_args(args); 2578 if (error != 0) 2579 return (error); 2580 2581 /* 2582 * Copy the file name. 2583 */ 2584 error = exec_args_add_fname(args, fname, segflg); 2585 if (error != 0) 2586 goto err_exit; 2587 2588 /* 2589 * extract arguments first 2590 */ 2591 ptr = argv; 2592 for (;;) { 2593 error = copyin(ptr++, &arg, sizeof(arg)); 2594 if (error) 2595 goto err_exit; 2596 if (arg == 0) 2597 break; 2598 argp = PTRIN(arg); 2599 error = exec_args_add_arg(args, argp, UIO_USERSPACE); 2600 if (error != 0) 2601 goto err_exit; 2602 } 2603 2604 /* 2605 * This comment is from Linux do_execveat_common: 2606 * When argv is empty, add an empty string ("") as argv[0] to 2607 * ensure confused userspace programs that start processing 2608 * from argv[1] won't end up walking envp. 2609 */ 2610 if (args->argc == 0 && 2611 (error = exec_args_add_arg(args, "", UIO_SYSSPACE) != 0)) 2612 goto err_exit; 2613 2614 /* 2615 * extract environment strings 2616 */ 2617 if (envv) { 2618 ptr = envv; 2619 for (;;) { 2620 error = copyin(ptr++, &arg, sizeof(arg)); 2621 if (error) 2622 goto err_exit; 2623 if (arg == 0) 2624 break; 2625 envp = PTRIN(arg); 2626 error = exec_args_add_env(args, envp, UIO_USERSPACE); 2627 if (error != 0) 2628 goto err_exit; 2629 } 2630 } 2631 2632 return (0); 2633 2634 err_exit: 2635 exec_free_args(args); 2636 return (error); 2637 } 2638 2639 int 2640 linux_execve(struct thread *td, struct linux_execve_args *args) 2641 { 2642 struct image_args eargs; 2643 int error; 2644 2645 LINUX_CTR(execve); 2646 2647 error = linux_exec_copyin_args(&eargs, args->path, UIO_USERSPACE, 2648 args->argp, args->envp); 2649 if (error == 0) 2650 error = linux_common_execve(td, &eargs); 2651 AUDIT_SYSCALL_EXIT(error == EJUSTRETURN ? 0 : error, td); 2652 return (error); 2653 } 2654 2655 static void 2656 linux_up_rtprio_if(struct thread *td1, struct rtprio *rtp) 2657 { 2658 struct rtprio rtp2; 2659 2660 pri_to_rtp(td1, &rtp2); 2661 if (rtp2.type < rtp->type || 2662 (rtp2.type == rtp->type && 2663 rtp2.prio < rtp->prio)) { 2664 rtp->type = rtp2.type; 2665 rtp->prio = rtp2.prio; 2666 } 2667 } 2668 2669 #define LINUX_PRIO_DIVIDER RTP_PRIO_MAX / LINUX_IOPRIO_MAX 2670 2671 static int 2672 linux_rtprio2ioprio(struct rtprio *rtp) 2673 { 2674 int ioprio, prio; 2675 2676 switch (rtp->type) { 2677 case RTP_PRIO_IDLE: 2678 prio = RTP_PRIO_MIN; 2679 ioprio = LINUX_IOPRIO_PRIO(LINUX_IOPRIO_CLASS_IDLE, prio); 2680 break; 2681 case RTP_PRIO_NORMAL: 2682 prio = rtp->prio / LINUX_PRIO_DIVIDER; 2683 ioprio = LINUX_IOPRIO_PRIO(LINUX_IOPRIO_CLASS_BE, prio); 2684 break; 2685 case RTP_PRIO_REALTIME: 2686 prio = rtp->prio / LINUX_PRIO_DIVIDER; 2687 ioprio = LINUX_IOPRIO_PRIO(LINUX_IOPRIO_CLASS_RT, prio); 2688 break; 2689 default: 2690 prio = RTP_PRIO_MIN; 2691 ioprio = LINUX_IOPRIO_PRIO(LINUX_IOPRIO_CLASS_NONE, prio); 2692 break; 2693 } 2694 return (ioprio); 2695 } 2696 2697 static int 2698 linux_ioprio2rtprio(int ioprio, struct rtprio *rtp) 2699 { 2700 2701 switch (LINUX_IOPRIO_PRIO_CLASS(ioprio)) { 2702 case LINUX_IOPRIO_CLASS_IDLE: 2703 rtp->prio = RTP_PRIO_MIN; 2704 rtp->type = RTP_PRIO_IDLE; 2705 break; 2706 case LINUX_IOPRIO_CLASS_BE: 2707 rtp->prio = LINUX_IOPRIO_PRIO_DATA(ioprio) * LINUX_PRIO_DIVIDER; 2708 rtp->type = RTP_PRIO_NORMAL; 2709 break; 2710 case LINUX_IOPRIO_CLASS_RT: 2711 rtp->prio = LINUX_IOPRIO_PRIO_DATA(ioprio) * LINUX_PRIO_DIVIDER; 2712 rtp->type = RTP_PRIO_REALTIME; 2713 break; 2714 default: 2715 return (EINVAL); 2716 } 2717 return (0); 2718 } 2719 #undef LINUX_PRIO_DIVIDER 2720 2721 int 2722 linux_ioprio_get(struct thread *td, struct linux_ioprio_get_args *args) 2723 { 2724 struct thread *td1; 2725 struct rtprio rtp; 2726 struct pgrp *pg; 2727 struct proc *p; 2728 int error, found; 2729 2730 p = NULL; 2731 td1 = NULL; 2732 error = 0; 2733 found = 0; 2734 rtp.type = RTP_PRIO_IDLE; 2735 rtp.prio = RTP_PRIO_MAX; 2736 switch (args->which) { 2737 case LINUX_IOPRIO_WHO_PROCESS: 2738 if (args->who == 0) { 2739 td1 = td; 2740 p = td1->td_proc; 2741 PROC_LOCK(p); 2742 } else if (args->who > PID_MAX) { 2743 td1 = linux_tdfind(td, args->who, -1); 2744 if (td1 != NULL) 2745 p = td1->td_proc; 2746 } else 2747 p = pfind(args->who); 2748 if (p == NULL) 2749 return (ESRCH); 2750 if ((error = p_cansee(td, p))) { 2751 PROC_UNLOCK(p); 2752 break; 2753 } 2754 if (td1 != NULL) { 2755 pri_to_rtp(td1, &rtp); 2756 } else { 2757 FOREACH_THREAD_IN_PROC(p, td1) { 2758 linux_up_rtprio_if(td1, &rtp); 2759 } 2760 } 2761 found++; 2762 PROC_UNLOCK(p); 2763 break; 2764 case LINUX_IOPRIO_WHO_PGRP: 2765 sx_slock(&proctree_lock); 2766 if (args->who == 0) { 2767 pg = td->td_proc->p_pgrp; 2768 PGRP_LOCK(pg); 2769 } else { 2770 pg = pgfind(args->who); 2771 if (pg == NULL) { 2772 sx_sunlock(&proctree_lock); 2773 error = ESRCH; 2774 break; 2775 } 2776 } 2777 sx_sunlock(&proctree_lock); 2778 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 2779 PROC_LOCK(p); 2780 if (p->p_state == PRS_NORMAL && 2781 p_cansee(td, p) == 0) { 2782 FOREACH_THREAD_IN_PROC(p, td1) { 2783 linux_up_rtprio_if(td1, &rtp); 2784 found++; 2785 } 2786 } 2787 PROC_UNLOCK(p); 2788 } 2789 PGRP_UNLOCK(pg); 2790 break; 2791 case LINUX_IOPRIO_WHO_USER: 2792 if (args->who == 0) 2793 args->who = td->td_ucred->cr_uid; 2794 sx_slock(&allproc_lock); 2795 FOREACH_PROC_IN_SYSTEM(p) { 2796 PROC_LOCK(p); 2797 if (p->p_state == PRS_NORMAL && 2798 p->p_ucred->cr_uid == args->who && 2799 p_cansee(td, p) == 0) { 2800 FOREACH_THREAD_IN_PROC(p, td1) { 2801 linux_up_rtprio_if(td1, &rtp); 2802 found++; 2803 } 2804 } 2805 PROC_UNLOCK(p); 2806 } 2807 sx_sunlock(&allproc_lock); 2808 break; 2809 default: 2810 error = EINVAL; 2811 break; 2812 } 2813 if (error == 0) { 2814 if (found != 0) 2815 td->td_retval[0] = linux_rtprio2ioprio(&rtp); 2816 else 2817 error = ESRCH; 2818 } 2819 return (error); 2820 } 2821 2822 int 2823 linux_ioprio_set(struct thread *td, struct linux_ioprio_set_args *args) 2824 { 2825 struct thread *td1; 2826 struct rtprio rtp; 2827 struct pgrp *pg; 2828 struct proc *p; 2829 int error; 2830 2831 if ((error = linux_ioprio2rtprio(args->ioprio, &rtp)) != 0) 2832 return (error); 2833 /* Attempts to set high priorities (REALTIME) require su privileges. */ 2834 if (RTP_PRIO_BASE(rtp.type) == RTP_PRIO_REALTIME && 2835 (error = priv_check(td, PRIV_SCHED_RTPRIO)) != 0) 2836 return (error); 2837 2838 p = NULL; 2839 td1 = NULL; 2840 switch (args->which) { 2841 case LINUX_IOPRIO_WHO_PROCESS: 2842 if (args->who == 0) { 2843 td1 = td; 2844 p = td1->td_proc; 2845 PROC_LOCK(p); 2846 } else if (args->who > PID_MAX) { 2847 td1 = linux_tdfind(td, args->who, -1); 2848 if (td1 != NULL) 2849 p = td1->td_proc; 2850 } else 2851 p = pfind(args->who); 2852 if (p == NULL) 2853 return (ESRCH); 2854 if ((error = p_cansched(td, p))) { 2855 PROC_UNLOCK(p); 2856 break; 2857 } 2858 if (td1 != NULL) { 2859 error = rtp_to_pri(&rtp, td1); 2860 } else { 2861 FOREACH_THREAD_IN_PROC(p, td1) { 2862 if ((error = rtp_to_pri(&rtp, td1)) != 0) 2863 break; 2864 } 2865 } 2866 PROC_UNLOCK(p); 2867 break; 2868 case LINUX_IOPRIO_WHO_PGRP: 2869 sx_slock(&proctree_lock); 2870 if (args->who == 0) { 2871 pg = td->td_proc->p_pgrp; 2872 PGRP_LOCK(pg); 2873 } else { 2874 pg = pgfind(args->who); 2875 if (pg == NULL) { 2876 sx_sunlock(&proctree_lock); 2877 error = ESRCH; 2878 break; 2879 } 2880 } 2881 sx_sunlock(&proctree_lock); 2882 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 2883 PROC_LOCK(p); 2884 if (p->p_state == PRS_NORMAL && 2885 p_cansched(td, p) == 0) { 2886 FOREACH_THREAD_IN_PROC(p, td1) { 2887 if ((error = rtp_to_pri(&rtp, td1)) != 0) 2888 break; 2889 } 2890 } 2891 PROC_UNLOCK(p); 2892 if (error != 0) 2893 break; 2894 } 2895 PGRP_UNLOCK(pg); 2896 break; 2897 case LINUX_IOPRIO_WHO_USER: 2898 if (args->who == 0) 2899 args->who = td->td_ucred->cr_uid; 2900 sx_slock(&allproc_lock); 2901 FOREACH_PROC_IN_SYSTEM(p) { 2902 PROC_LOCK(p); 2903 if (p->p_state == PRS_NORMAL && 2904 p->p_ucred->cr_uid == args->who && 2905 p_cansched(td, p) == 0) { 2906 FOREACH_THREAD_IN_PROC(p, td1) { 2907 if ((error = rtp_to_pri(&rtp, td1)) != 0) 2908 break; 2909 } 2910 } 2911 PROC_UNLOCK(p); 2912 if (error != 0) 2913 break; 2914 } 2915 sx_sunlock(&allproc_lock); 2916 break; 2917 default: 2918 error = EINVAL; 2919 break; 2920 } 2921 return (error); 2922 } 2923