1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2002 Doug Rabson 5 * Copyright (c) 1994-1995 Søren Schmidt 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer 13 * in this position and unchanged. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. The name of the author may not be used to endorse or promote products 18 * derived from this software without specific prior written permission 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 23 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 29 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include <sys/param.h> 36 #include <sys/fcntl.h> 37 #include <sys/jail.h> 38 #include <sys/imgact.h> 39 #include <sys/limits.h> 40 #include <sys/lock.h> 41 #include <sys/msgbuf.h> 42 #include <sys/mutex.h> 43 #include <sys/poll.h> 44 #include <sys/priv.h> 45 #include <sys/proc.h> 46 #include <sys/procctl.h> 47 #include <sys/reboot.h> 48 #include <sys/random.h> 49 #include <sys/resourcevar.h> 50 #include <sys/rtprio.h> 51 #include <sys/sched.h> 52 #include <sys/smp.h> 53 #include <sys/stat.h> 54 #include <sys/syscallsubr.h> 55 #include <sys/sysctl.h> 56 #include <sys/sysent.h> 57 #include <sys/sysproto.h> 58 #include <sys/time.h> 59 #include <sys/vmmeter.h> 60 #include <sys/vnode.h> 61 62 #include <security/audit/audit.h> 63 #include <security/mac/mac_framework.h> 64 65 #include <vm/pmap.h> 66 #include <vm/vm_map.h> 67 #include <vm/swap_pager.h> 68 69 #ifdef COMPAT_LINUX32 70 #include <machine/../linux32/linux.h> 71 #include <machine/../linux32/linux32_proto.h> 72 #else 73 #include <machine/../linux/linux.h> 74 #include <machine/../linux/linux_proto.h> 75 #endif 76 77 #include <compat/linux/linux_common.h> 78 #include <compat/linux/linux_dtrace.h> 79 #include <compat/linux/linux_file.h> 80 #include <compat/linux/linux_mib.h> 81 #include <compat/linux/linux_signal.h> 82 #include <compat/linux/linux_time.h> 83 #include <compat/linux/linux_util.h> 84 #include <compat/linux/linux_sysproto.h> 85 #include <compat/linux/linux_emul.h> 86 #include <compat/linux/linux_misc.h> 87 88 int stclohz; /* Statistics clock frequency */ 89 90 static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] = { 91 RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK, 92 RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE, 93 RLIMIT_MEMLOCK, RLIMIT_AS 94 }; 95 96 struct l_sysinfo { 97 l_long uptime; /* Seconds since boot */ 98 l_ulong loads[3]; /* 1, 5, and 15 minute load averages */ 99 #define LINUX_SYSINFO_LOADS_SCALE 65536 100 l_ulong totalram; /* Total usable main memory size */ 101 l_ulong freeram; /* Available memory size */ 102 l_ulong sharedram; /* Amount of shared memory */ 103 l_ulong bufferram; /* Memory used by buffers */ 104 l_ulong totalswap; /* Total swap space size */ 105 l_ulong freeswap; /* swap space still available */ 106 l_ushort procs; /* Number of current processes */ 107 l_ushort pads; 108 l_ulong totalhigh; 109 l_ulong freehigh; 110 l_uint mem_unit; 111 char _f[20-2*sizeof(l_long)-sizeof(l_int)]; /* padding */ 112 }; 113 114 struct l_pselect6arg { 115 l_uintptr_t ss; 116 l_size_t ss_len; 117 }; 118 119 static int linux_utimensat_lts_to_ts(struct l_timespec *, 120 struct timespec *); 121 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 122 static int linux_utimensat_lts64_to_ts(struct l_timespec64 *, 123 struct timespec *); 124 #endif 125 static int linux_common_utimensat(struct thread *, int, 126 const char *, struct timespec *, int); 127 static int linux_common_pselect6(struct thread *, l_int, 128 l_fd_set *, l_fd_set *, l_fd_set *, 129 struct timespec *, l_uintptr_t *); 130 static int linux_common_ppoll(struct thread *, struct pollfd *, 131 uint32_t, struct timespec *, l_sigset_t *, 132 l_size_t); 133 static int linux_pollin(struct thread *, struct pollfd *, 134 struct pollfd *, u_int); 135 static int linux_pollout(struct thread *, struct pollfd *, 136 struct pollfd *, u_int); 137 138 int 139 linux_sysinfo(struct thread *td, struct linux_sysinfo_args *args) 140 { 141 struct l_sysinfo sysinfo; 142 int i, j; 143 struct timespec ts; 144 145 bzero(&sysinfo, sizeof(sysinfo)); 146 getnanouptime(&ts); 147 if (ts.tv_nsec != 0) 148 ts.tv_sec++; 149 sysinfo.uptime = ts.tv_sec; 150 151 /* Use the information from the mib to get our load averages */ 152 for (i = 0; i < 3; i++) 153 sysinfo.loads[i] = averunnable.ldavg[i] * 154 LINUX_SYSINFO_LOADS_SCALE / averunnable.fscale; 155 156 sysinfo.totalram = physmem * PAGE_SIZE; 157 sysinfo.freeram = (u_long)vm_free_count() * PAGE_SIZE; 158 159 /* 160 * sharedram counts pages allocated to named, swap-backed objects such 161 * as shared memory segments and tmpfs files. There is no cheap way to 162 * compute this, so just leave the field unpopulated. Linux itself only 163 * started setting this field in the 3.x timeframe. 164 */ 165 sysinfo.sharedram = 0; 166 sysinfo.bufferram = 0; 167 168 swap_pager_status(&i, &j); 169 sysinfo.totalswap = i * PAGE_SIZE; 170 sysinfo.freeswap = (i - j) * PAGE_SIZE; 171 172 sysinfo.procs = nprocs; 173 174 /* 175 * Platforms supported by the emulation layer do not have a notion of 176 * high memory. 177 */ 178 sysinfo.totalhigh = 0; 179 sysinfo.freehigh = 0; 180 181 sysinfo.mem_unit = 1; 182 183 return (copyout(&sysinfo, args->info, sizeof(sysinfo))); 184 } 185 186 #ifdef LINUX_LEGACY_SYSCALLS 187 int 188 linux_alarm(struct thread *td, struct linux_alarm_args *args) 189 { 190 struct itimerval it, old_it; 191 u_int secs; 192 int error __diagused; 193 194 secs = args->secs; 195 /* 196 * Linux alarm() is always successful. Limit secs to INT32_MAX / 2 197 * to match kern_setitimer()'s limit to avoid error from it. 198 * 199 * XXX. Linux limit secs to INT_MAX on 32 and does not limit on 64-bit 200 * platforms. 201 */ 202 if (secs > INT32_MAX / 2) 203 secs = INT32_MAX / 2; 204 205 it.it_value.tv_sec = secs; 206 it.it_value.tv_usec = 0; 207 timevalclear(&it.it_interval); 208 error = kern_setitimer(td, ITIMER_REAL, &it, &old_it); 209 KASSERT(error == 0, ("kern_setitimer returns %d", error)); 210 211 if ((old_it.it_value.tv_sec == 0 && old_it.it_value.tv_usec > 0) || 212 old_it.it_value.tv_usec >= 500000) 213 old_it.it_value.tv_sec++; 214 td->td_retval[0] = old_it.it_value.tv_sec; 215 return (0); 216 } 217 #endif 218 219 int 220 linux_brk(struct thread *td, struct linux_brk_args *args) 221 { 222 struct vmspace *vm = td->td_proc->p_vmspace; 223 uintptr_t new, old; 224 225 old = (uintptr_t)vm->vm_daddr + ctob(vm->vm_dsize); 226 new = (uintptr_t)args->dsend; 227 if ((caddr_t)new > vm->vm_daddr && !kern_break(td, &new)) 228 td->td_retval[0] = (register_t)new; 229 else 230 td->td_retval[0] = (register_t)old; 231 232 return (0); 233 } 234 235 #ifdef LINUX_LEGACY_SYSCALLS 236 int 237 linux_select(struct thread *td, struct linux_select_args *args) 238 { 239 l_timeval ltv; 240 struct timeval tv0, tv1, utv, *tvp; 241 int error; 242 243 /* 244 * Store current time for computation of the amount of 245 * time left. 246 */ 247 if (args->timeout) { 248 if ((error = copyin(args->timeout, <v, sizeof(ltv)))) 249 goto select_out; 250 utv.tv_sec = ltv.tv_sec; 251 utv.tv_usec = ltv.tv_usec; 252 253 if (itimerfix(&utv)) { 254 /* 255 * The timeval was invalid. Convert it to something 256 * valid that will act as it does under Linux. 257 */ 258 utv.tv_sec += utv.tv_usec / 1000000; 259 utv.tv_usec %= 1000000; 260 if (utv.tv_usec < 0) { 261 utv.tv_sec -= 1; 262 utv.tv_usec += 1000000; 263 } 264 if (utv.tv_sec < 0) 265 timevalclear(&utv); 266 } 267 microtime(&tv0); 268 tvp = &utv; 269 } else 270 tvp = NULL; 271 272 error = kern_select(td, args->nfds, args->readfds, args->writefds, 273 args->exceptfds, tvp, LINUX_NFDBITS); 274 if (error) 275 goto select_out; 276 277 if (args->timeout) { 278 if (td->td_retval[0]) { 279 /* 280 * Compute how much time was left of the timeout, 281 * by subtracting the current time and the time 282 * before we started the call, and subtracting 283 * that result from the user-supplied value. 284 */ 285 microtime(&tv1); 286 timevalsub(&tv1, &tv0); 287 timevalsub(&utv, &tv1); 288 if (utv.tv_sec < 0) 289 timevalclear(&utv); 290 } else 291 timevalclear(&utv); 292 ltv.tv_sec = utv.tv_sec; 293 ltv.tv_usec = utv.tv_usec; 294 if ((error = copyout(<v, args->timeout, sizeof(ltv)))) 295 goto select_out; 296 } 297 298 select_out: 299 return (error); 300 } 301 #endif 302 303 int 304 linux_mremap(struct thread *td, struct linux_mremap_args *args) 305 { 306 uintptr_t addr; 307 size_t len; 308 int error = 0; 309 310 if (args->flags & ~(LINUX_MREMAP_FIXED | LINUX_MREMAP_MAYMOVE)) { 311 td->td_retval[0] = 0; 312 return (EINVAL); 313 } 314 315 /* 316 * Check for the page alignment. 317 * Linux defines PAGE_MASK to be FreeBSD ~PAGE_MASK. 318 */ 319 if (args->addr & PAGE_MASK) { 320 td->td_retval[0] = 0; 321 return (EINVAL); 322 } 323 324 args->new_len = round_page(args->new_len); 325 args->old_len = round_page(args->old_len); 326 327 if (args->new_len > args->old_len) { 328 td->td_retval[0] = 0; 329 return (ENOMEM); 330 } 331 332 if (args->new_len < args->old_len) { 333 addr = args->addr + args->new_len; 334 len = args->old_len - args->new_len; 335 error = kern_munmap(td, addr, len); 336 } 337 338 td->td_retval[0] = error ? 0 : (uintptr_t)args->addr; 339 return (error); 340 } 341 342 #define LINUX_MS_ASYNC 0x0001 343 #define LINUX_MS_INVALIDATE 0x0002 344 #define LINUX_MS_SYNC 0x0004 345 346 int 347 linux_msync(struct thread *td, struct linux_msync_args *args) 348 { 349 350 return (kern_msync(td, args->addr, args->len, 351 args->fl & ~LINUX_MS_SYNC)); 352 } 353 354 #ifdef LINUX_LEGACY_SYSCALLS 355 int 356 linux_time(struct thread *td, struct linux_time_args *args) 357 { 358 struct timeval tv; 359 l_time_t tm; 360 int error; 361 362 microtime(&tv); 363 tm = tv.tv_sec; 364 if (args->tm && (error = copyout(&tm, args->tm, sizeof(tm)))) 365 return (error); 366 td->td_retval[0] = tm; 367 return (0); 368 } 369 #endif 370 371 struct l_times_argv { 372 l_clock_t tms_utime; 373 l_clock_t tms_stime; 374 l_clock_t tms_cutime; 375 l_clock_t tms_cstime; 376 }; 377 378 /* 379 * Glibc versions prior to 2.2.1 always use hard-coded CLK_TCK value. 380 * Since 2.2.1 Glibc uses value exported from kernel via AT_CLKTCK 381 * auxiliary vector entry. 382 */ 383 #define CLK_TCK 100 384 385 #define CONVOTCK(r) (r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK)) 386 #define CONVNTCK(r) (r.tv_sec * stclohz + r.tv_usec / (1000000 / stclohz)) 387 388 #define CONVTCK(r) (linux_kernver(td) >= LINUX_KERNVER(2,4,0) ? \ 389 CONVNTCK(r) : CONVOTCK(r)) 390 391 int 392 linux_times(struct thread *td, struct linux_times_args *args) 393 { 394 struct timeval tv, utime, stime, cutime, cstime; 395 struct l_times_argv tms; 396 struct proc *p; 397 int error; 398 399 if (args->buf != NULL) { 400 p = td->td_proc; 401 PROC_LOCK(p); 402 PROC_STATLOCK(p); 403 calcru(p, &utime, &stime); 404 PROC_STATUNLOCK(p); 405 calccru(p, &cutime, &cstime); 406 PROC_UNLOCK(p); 407 408 tms.tms_utime = CONVTCK(utime); 409 tms.tms_stime = CONVTCK(stime); 410 411 tms.tms_cutime = CONVTCK(cutime); 412 tms.tms_cstime = CONVTCK(cstime); 413 414 if ((error = copyout(&tms, args->buf, sizeof(tms)))) 415 return (error); 416 } 417 418 microuptime(&tv); 419 td->td_retval[0] = (int)CONVTCK(tv); 420 return (0); 421 } 422 423 int 424 linux_newuname(struct thread *td, struct linux_newuname_args *args) 425 { 426 struct l_new_utsname utsname; 427 char osname[LINUX_MAX_UTSNAME]; 428 char osrelease[LINUX_MAX_UTSNAME]; 429 char *p; 430 431 linux_get_osname(td, osname); 432 linux_get_osrelease(td, osrelease); 433 434 bzero(&utsname, sizeof(utsname)); 435 strlcpy(utsname.sysname, osname, LINUX_MAX_UTSNAME); 436 getcredhostname(td->td_ucred, utsname.nodename, LINUX_MAX_UTSNAME); 437 getcreddomainname(td->td_ucred, utsname.domainname, LINUX_MAX_UTSNAME); 438 strlcpy(utsname.release, osrelease, LINUX_MAX_UTSNAME); 439 strlcpy(utsname.version, version, LINUX_MAX_UTSNAME); 440 for (p = utsname.version; *p != '\0'; ++p) 441 if (*p == '\n') { 442 *p = '\0'; 443 break; 444 } 445 #if defined(__amd64__) 446 /* 447 * On amd64, Linux uname(2) needs to return "x86_64" 448 * for both 64-bit and 32-bit applications. On 32-bit, 449 * the string returned by getauxval(AT_PLATFORM) needs 450 * to remain "i686", though. 451 */ 452 #if defined(COMPAT_LINUX32) 453 if (linux32_emulate_i386) 454 strlcpy(utsname.machine, "i686", LINUX_MAX_UTSNAME); 455 else 456 #endif 457 strlcpy(utsname.machine, "x86_64", LINUX_MAX_UTSNAME); 458 #elif defined(__aarch64__) 459 strlcpy(utsname.machine, "aarch64", LINUX_MAX_UTSNAME); 460 #elif defined(__i386__) 461 strlcpy(utsname.machine, "i686", LINUX_MAX_UTSNAME); 462 #endif 463 464 return (copyout(&utsname, args->buf, sizeof(utsname))); 465 } 466 467 struct l_utimbuf { 468 l_time_t l_actime; 469 l_time_t l_modtime; 470 }; 471 472 #ifdef LINUX_LEGACY_SYSCALLS 473 int 474 linux_utime(struct thread *td, struct linux_utime_args *args) 475 { 476 struct timeval tv[2], *tvp; 477 struct l_utimbuf lut; 478 int error; 479 480 if (args->times) { 481 if ((error = copyin(args->times, &lut, sizeof lut)) != 0) 482 return (error); 483 tv[0].tv_sec = lut.l_actime; 484 tv[0].tv_usec = 0; 485 tv[1].tv_sec = lut.l_modtime; 486 tv[1].tv_usec = 0; 487 tvp = tv; 488 } else 489 tvp = NULL; 490 491 return (kern_utimesat(td, AT_FDCWD, args->fname, UIO_USERSPACE, 492 tvp, UIO_SYSSPACE)); 493 } 494 #endif 495 496 #ifdef LINUX_LEGACY_SYSCALLS 497 int 498 linux_utimes(struct thread *td, struct linux_utimes_args *args) 499 { 500 l_timeval ltv[2]; 501 struct timeval tv[2], *tvp = NULL; 502 int error; 503 504 if (args->tptr != NULL) { 505 if ((error = copyin(args->tptr, ltv, sizeof ltv)) != 0) 506 return (error); 507 tv[0].tv_sec = ltv[0].tv_sec; 508 tv[0].tv_usec = ltv[0].tv_usec; 509 tv[1].tv_sec = ltv[1].tv_sec; 510 tv[1].tv_usec = ltv[1].tv_usec; 511 tvp = tv; 512 } 513 514 return (kern_utimesat(td, AT_FDCWD, args->fname, UIO_USERSPACE, 515 tvp, UIO_SYSSPACE)); 516 } 517 #endif 518 519 static int 520 linux_utimensat_lts_to_ts(struct l_timespec *l_times, struct timespec *times) 521 { 522 523 if (l_times->tv_nsec != LINUX_UTIME_OMIT && 524 l_times->tv_nsec != LINUX_UTIME_NOW && 525 (l_times->tv_nsec < 0 || l_times->tv_nsec > 999999999)) 526 return (EINVAL); 527 528 times->tv_sec = l_times->tv_sec; 529 switch (l_times->tv_nsec) 530 { 531 case LINUX_UTIME_OMIT: 532 times->tv_nsec = UTIME_OMIT; 533 break; 534 case LINUX_UTIME_NOW: 535 times->tv_nsec = UTIME_NOW; 536 break; 537 default: 538 times->tv_nsec = l_times->tv_nsec; 539 } 540 541 return (0); 542 } 543 544 static int 545 linux_common_utimensat(struct thread *td, int ldfd, const char *pathname, 546 struct timespec *timesp, int lflags) 547 { 548 int dfd, flags = 0; 549 550 dfd = (ldfd == LINUX_AT_FDCWD) ? AT_FDCWD : ldfd; 551 552 if (lflags & ~(LINUX_AT_SYMLINK_NOFOLLOW | LINUX_AT_EMPTY_PATH)) 553 return (EINVAL); 554 555 if (timesp != NULL) { 556 /* This breaks POSIX, but is what the Linux kernel does 557 * _on purpose_ (documented in the man page for utimensat(2)), 558 * so we must follow that behaviour. */ 559 if (timesp[0].tv_nsec == UTIME_OMIT && 560 timesp[1].tv_nsec == UTIME_OMIT) 561 return (0); 562 } 563 564 if (lflags & LINUX_AT_SYMLINK_NOFOLLOW) 565 flags |= AT_SYMLINK_NOFOLLOW; 566 if (lflags & LINUX_AT_EMPTY_PATH) 567 flags |= AT_EMPTY_PATH; 568 569 if (pathname != NULL) 570 return (kern_utimensat(td, dfd, pathname, 571 UIO_USERSPACE, timesp, UIO_SYSSPACE, flags)); 572 573 if (lflags != 0) 574 return (EINVAL); 575 576 return (kern_futimens(td, dfd, timesp, UIO_SYSSPACE)); 577 } 578 579 int 580 linux_utimensat(struct thread *td, struct linux_utimensat_args *args) 581 { 582 struct l_timespec l_times[2]; 583 struct timespec times[2], *timesp; 584 int error; 585 586 if (args->times != NULL) { 587 error = copyin(args->times, l_times, sizeof(l_times)); 588 if (error != 0) 589 return (error); 590 591 error = linux_utimensat_lts_to_ts(&l_times[0], ×[0]); 592 if (error != 0) 593 return (error); 594 error = linux_utimensat_lts_to_ts(&l_times[1], ×[1]); 595 if (error != 0) 596 return (error); 597 timesp = times; 598 } else 599 timesp = NULL; 600 601 return (linux_common_utimensat(td, args->dfd, args->pathname, 602 timesp, args->flags)); 603 } 604 605 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 606 static int 607 linux_utimensat_lts64_to_ts(struct l_timespec64 *l_times, struct timespec *times) 608 { 609 610 /* Zero out the padding in compat mode. */ 611 l_times->tv_nsec &= 0xFFFFFFFFUL; 612 613 if (l_times->tv_nsec != LINUX_UTIME_OMIT && 614 l_times->tv_nsec != LINUX_UTIME_NOW && 615 (l_times->tv_nsec < 0 || l_times->tv_nsec > 999999999)) 616 return (EINVAL); 617 618 times->tv_sec = l_times->tv_sec; 619 switch (l_times->tv_nsec) 620 { 621 case LINUX_UTIME_OMIT: 622 times->tv_nsec = UTIME_OMIT; 623 break; 624 case LINUX_UTIME_NOW: 625 times->tv_nsec = UTIME_NOW; 626 break; 627 default: 628 times->tv_nsec = l_times->tv_nsec; 629 } 630 631 return (0); 632 } 633 634 int 635 linux_utimensat_time64(struct thread *td, struct linux_utimensat_time64_args *args) 636 { 637 struct l_timespec64 l_times[2]; 638 struct timespec times[2], *timesp; 639 int error; 640 641 if (args->times64 != NULL) { 642 error = copyin(args->times64, l_times, sizeof(l_times)); 643 if (error != 0) 644 return (error); 645 646 error = linux_utimensat_lts64_to_ts(&l_times[0], ×[0]); 647 if (error != 0) 648 return (error); 649 error = linux_utimensat_lts64_to_ts(&l_times[1], ×[1]); 650 if (error != 0) 651 return (error); 652 timesp = times; 653 } else 654 timesp = NULL; 655 656 return (linux_common_utimensat(td, args->dfd, args->pathname, 657 timesp, args->flags)); 658 } 659 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 660 661 #ifdef LINUX_LEGACY_SYSCALLS 662 int 663 linux_futimesat(struct thread *td, struct linux_futimesat_args *args) 664 { 665 l_timeval ltv[2]; 666 struct timeval tv[2], *tvp = NULL; 667 int error, dfd; 668 669 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; 670 671 if (args->utimes != NULL) { 672 if ((error = copyin(args->utimes, ltv, sizeof ltv)) != 0) 673 return (error); 674 tv[0].tv_sec = ltv[0].tv_sec; 675 tv[0].tv_usec = ltv[0].tv_usec; 676 tv[1].tv_sec = ltv[1].tv_sec; 677 tv[1].tv_usec = ltv[1].tv_usec; 678 tvp = tv; 679 } 680 681 return (kern_utimesat(td, dfd, args->filename, UIO_USERSPACE, 682 tvp, UIO_SYSSPACE)); 683 } 684 #endif 685 686 static int 687 linux_common_wait(struct thread *td, idtype_t idtype, int id, int *statusp, 688 int options, void *rup, l_siginfo_t *infop) 689 { 690 l_siginfo_t lsi; 691 siginfo_t siginfo; 692 struct __wrusage wru; 693 int error, status, tmpstat, sig; 694 695 error = kern_wait6(td, idtype, id, &status, options, 696 rup != NULL ? &wru : NULL, &siginfo); 697 698 if (error == 0 && statusp) { 699 tmpstat = status & 0xffff; 700 if (WIFSIGNALED(tmpstat)) { 701 tmpstat = (tmpstat & 0xffffff80) | 702 bsd_to_linux_signal(WTERMSIG(tmpstat)); 703 } else if (WIFSTOPPED(tmpstat)) { 704 tmpstat = (tmpstat & 0xffff00ff) | 705 (bsd_to_linux_signal(WSTOPSIG(tmpstat)) << 8); 706 #if defined(__aarch64__) || (defined(__amd64__) && !defined(COMPAT_LINUX32)) 707 if (WSTOPSIG(status) == SIGTRAP) { 708 tmpstat = linux_ptrace_status(td, 709 siginfo.si_pid, tmpstat); 710 } 711 #endif 712 } else if (WIFCONTINUED(tmpstat)) { 713 tmpstat = 0xffff; 714 } 715 error = copyout(&tmpstat, statusp, sizeof(int)); 716 } 717 if (error == 0 && rup != NULL) 718 error = linux_copyout_rusage(&wru.wru_self, rup); 719 if (error == 0 && infop != NULL && td->td_retval[0] != 0) { 720 sig = bsd_to_linux_signal(siginfo.si_signo); 721 siginfo_to_lsiginfo(&siginfo, &lsi, sig); 722 error = copyout(&lsi, infop, sizeof(lsi)); 723 } 724 725 return (error); 726 } 727 728 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 729 int 730 linux_waitpid(struct thread *td, struct linux_waitpid_args *args) 731 { 732 struct linux_wait4_args wait4_args = { 733 .pid = args->pid, 734 .status = args->status, 735 .options = args->options, 736 .rusage = NULL, 737 }; 738 739 return (linux_wait4(td, &wait4_args)); 740 } 741 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 742 743 int 744 linux_wait4(struct thread *td, struct linux_wait4_args *args) 745 { 746 struct proc *p; 747 int options, id, idtype; 748 749 if (args->options & ~(LINUX_WUNTRACED | LINUX_WNOHANG | 750 LINUX_WCONTINUED | __WCLONE | __WNOTHREAD | __WALL)) 751 return (EINVAL); 752 753 /* -INT_MIN is not defined. */ 754 if (args->pid == INT_MIN) 755 return (ESRCH); 756 757 options = 0; 758 linux_to_bsd_waitopts(args->options, &options); 759 760 /* 761 * For backward compatibility we implicitly add flags WEXITED 762 * and WTRAPPED here. 763 */ 764 options |= WEXITED | WTRAPPED; 765 766 if (args->pid == WAIT_ANY) { 767 idtype = P_ALL; 768 id = 0; 769 } else if (args->pid < 0) { 770 idtype = P_PGID; 771 id = (id_t)-args->pid; 772 } else if (args->pid == 0) { 773 idtype = P_PGID; 774 p = td->td_proc; 775 PROC_LOCK(p); 776 id = p->p_pgid; 777 PROC_UNLOCK(p); 778 } else { 779 idtype = P_PID; 780 id = (id_t)args->pid; 781 } 782 783 return (linux_common_wait(td, idtype, id, args->status, options, 784 args->rusage, NULL)); 785 } 786 787 int 788 linux_waitid(struct thread *td, struct linux_waitid_args *args) 789 { 790 idtype_t idtype; 791 int error, options; 792 struct proc *p; 793 pid_t id; 794 795 if (args->options & ~(LINUX_WNOHANG | LINUX_WNOWAIT | LINUX_WEXITED | 796 LINUX_WSTOPPED | LINUX_WCONTINUED | __WCLONE | __WNOTHREAD | __WALL)) 797 return (EINVAL); 798 799 options = 0; 800 linux_to_bsd_waitopts(args->options, &options); 801 802 id = args->id; 803 switch (args->idtype) { 804 case LINUX_P_ALL: 805 idtype = P_ALL; 806 break; 807 case LINUX_P_PID: 808 if (args->id <= 0) 809 return (EINVAL); 810 idtype = P_PID; 811 break; 812 case LINUX_P_PGID: 813 if (linux_kernver(td) >= LINUX_KERNVER(5,4,0) && args->id == 0) { 814 p = td->td_proc; 815 PROC_LOCK(p); 816 id = p->p_pgid; 817 PROC_UNLOCK(p); 818 } else if (args->id <= 0) 819 return (EINVAL); 820 idtype = P_PGID; 821 break; 822 case LINUX_P_PIDFD: 823 LINUX_RATELIMIT_MSG("unsupported waitid P_PIDFD idtype"); 824 return (ENOSYS); 825 default: 826 return (EINVAL); 827 } 828 829 error = linux_common_wait(td, idtype, id, NULL, options, 830 args->rusage, args->info); 831 td->td_retval[0] = 0; 832 833 return (error); 834 } 835 836 #ifdef LINUX_LEGACY_SYSCALLS 837 int 838 linux_mknod(struct thread *td, struct linux_mknod_args *args) 839 { 840 int error; 841 842 switch (args->mode & S_IFMT) { 843 case S_IFIFO: 844 case S_IFSOCK: 845 error = kern_mkfifoat(td, AT_FDCWD, args->path, UIO_USERSPACE, 846 args->mode); 847 break; 848 849 case S_IFCHR: 850 case S_IFBLK: 851 error = kern_mknodat(td, AT_FDCWD, args->path, UIO_USERSPACE, 852 args->mode, linux_decode_dev(args->dev)); 853 break; 854 855 case S_IFDIR: 856 error = EPERM; 857 break; 858 859 case 0: 860 args->mode |= S_IFREG; 861 /* FALLTHROUGH */ 862 case S_IFREG: 863 error = kern_openat(td, AT_FDCWD, args->path, UIO_USERSPACE, 864 O_WRONLY | O_CREAT | O_TRUNC, args->mode); 865 if (error == 0) 866 kern_close(td, td->td_retval[0]); 867 break; 868 869 default: 870 error = EINVAL; 871 break; 872 } 873 return (error); 874 } 875 #endif 876 877 int 878 linux_mknodat(struct thread *td, struct linux_mknodat_args *args) 879 { 880 int error, dfd; 881 882 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; 883 884 switch (args->mode & S_IFMT) { 885 case S_IFIFO: 886 case S_IFSOCK: 887 error = kern_mkfifoat(td, dfd, args->filename, UIO_USERSPACE, 888 args->mode); 889 break; 890 891 case S_IFCHR: 892 case S_IFBLK: 893 error = kern_mknodat(td, dfd, args->filename, UIO_USERSPACE, 894 args->mode, linux_decode_dev(args->dev)); 895 break; 896 897 case S_IFDIR: 898 error = EPERM; 899 break; 900 901 case 0: 902 args->mode |= S_IFREG; 903 /* FALLTHROUGH */ 904 case S_IFREG: 905 error = kern_openat(td, dfd, args->filename, UIO_USERSPACE, 906 O_WRONLY | O_CREAT | O_TRUNC, args->mode); 907 if (error == 0) 908 kern_close(td, td->td_retval[0]); 909 break; 910 911 default: 912 error = EINVAL; 913 break; 914 } 915 return (error); 916 } 917 918 /* 919 * UGH! This is just about the dumbest idea I've ever heard!! 920 */ 921 int 922 linux_personality(struct thread *td, struct linux_personality_args *args) 923 { 924 struct linux_pemuldata *pem; 925 struct proc *p = td->td_proc; 926 uint32_t old; 927 928 PROC_LOCK(p); 929 pem = pem_find(p); 930 old = pem->persona; 931 if (args->per != 0xffffffff) 932 pem->persona = args->per; 933 PROC_UNLOCK(p); 934 935 td->td_retval[0] = old; 936 return (0); 937 } 938 939 struct l_itimerval { 940 l_timeval it_interval; 941 l_timeval it_value; 942 }; 943 944 #define B2L_ITIMERVAL(bip, lip) \ 945 (bip)->it_interval.tv_sec = (lip)->it_interval.tv_sec; \ 946 (bip)->it_interval.tv_usec = (lip)->it_interval.tv_usec; \ 947 (bip)->it_value.tv_sec = (lip)->it_value.tv_sec; \ 948 (bip)->it_value.tv_usec = (lip)->it_value.tv_usec; 949 950 int 951 linux_setitimer(struct thread *td, struct linux_setitimer_args *uap) 952 { 953 int error; 954 struct l_itimerval ls; 955 struct itimerval aitv, oitv; 956 957 if (uap->itv == NULL) { 958 uap->itv = uap->oitv; 959 return (linux_getitimer(td, (struct linux_getitimer_args *)uap)); 960 } 961 962 error = copyin(uap->itv, &ls, sizeof(ls)); 963 if (error != 0) 964 return (error); 965 B2L_ITIMERVAL(&aitv, &ls); 966 error = kern_setitimer(td, uap->which, &aitv, &oitv); 967 if (error != 0 || uap->oitv == NULL) 968 return (error); 969 B2L_ITIMERVAL(&ls, &oitv); 970 971 return (copyout(&ls, uap->oitv, sizeof(ls))); 972 } 973 974 int 975 linux_getitimer(struct thread *td, struct linux_getitimer_args *uap) 976 { 977 int error; 978 struct l_itimerval ls; 979 struct itimerval aitv; 980 981 error = kern_getitimer(td, uap->which, &aitv); 982 if (error != 0) 983 return (error); 984 B2L_ITIMERVAL(&ls, &aitv); 985 return (copyout(&ls, uap->itv, sizeof(ls))); 986 } 987 988 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 989 int 990 linux_nice(struct thread *td, struct linux_nice_args *args) 991 { 992 993 return (kern_setpriority(td, PRIO_PROCESS, 0, args->inc)); 994 } 995 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 996 997 int 998 linux_setgroups(struct thread *td, struct linux_setgroups_args *args) 999 { 1000 struct ucred *newcred, *oldcred; 1001 l_gid_t *linux_gidset; 1002 gid_t *bsd_gidset; 1003 int ngrp, error; 1004 struct proc *p; 1005 1006 ngrp = args->gidsetsize; 1007 if (ngrp < 0 || ngrp >= ngroups_max + 1) 1008 return (EINVAL); 1009 linux_gidset = malloc(ngrp * sizeof(*linux_gidset), M_LINUX, M_WAITOK); 1010 error = copyin(args->grouplist, linux_gidset, ngrp * sizeof(l_gid_t)); 1011 if (error) 1012 goto out; 1013 newcred = crget(); 1014 crextend(newcred, ngrp + 1); 1015 p = td->td_proc; 1016 PROC_LOCK(p); 1017 oldcred = p->p_ucred; 1018 crcopy(newcred, oldcred); 1019 1020 /* 1021 * cr_groups[0] holds egid. Setting the whole set from 1022 * the supplied set will cause egid to be changed too. 1023 * Keep cr_groups[0] unchanged to prevent that. 1024 */ 1025 1026 if ((error = priv_check_cred(oldcred, PRIV_CRED_SETGROUPS)) != 0) { 1027 PROC_UNLOCK(p); 1028 crfree(newcred); 1029 goto out; 1030 } 1031 1032 if (ngrp > 0) { 1033 newcred->cr_ngroups = ngrp + 1; 1034 1035 bsd_gidset = newcred->cr_groups; 1036 ngrp--; 1037 while (ngrp >= 0) { 1038 bsd_gidset[ngrp + 1] = linux_gidset[ngrp]; 1039 ngrp--; 1040 } 1041 } else 1042 newcred->cr_ngroups = 1; 1043 1044 setsugid(p); 1045 proc_set_cred(p, newcred); 1046 PROC_UNLOCK(p); 1047 crfree(oldcred); 1048 error = 0; 1049 out: 1050 free(linux_gidset, M_LINUX); 1051 return (error); 1052 } 1053 1054 int 1055 linux_getgroups(struct thread *td, struct linux_getgroups_args *args) 1056 { 1057 struct ucred *cred; 1058 l_gid_t *linux_gidset; 1059 gid_t *bsd_gidset; 1060 int bsd_gidsetsz, ngrp, error; 1061 1062 cred = td->td_ucred; 1063 bsd_gidset = cred->cr_groups; 1064 bsd_gidsetsz = cred->cr_ngroups - 1; 1065 1066 /* 1067 * cr_groups[0] holds egid. Returning the whole set 1068 * here will cause a duplicate. Exclude cr_groups[0] 1069 * to prevent that. 1070 */ 1071 1072 if ((ngrp = args->gidsetsize) == 0) { 1073 td->td_retval[0] = bsd_gidsetsz; 1074 return (0); 1075 } 1076 1077 if (ngrp < bsd_gidsetsz) 1078 return (EINVAL); 1079 1080 ngrp = 0; 1081 linux_gidset = malloc(bsd_gidsetsz * sizeof(*linux_gidset), 1082 M_LINUX, M_WAITOK); 1083 while (ngrp < bsd_gidsetsz) { 1084 linux_gidset[ngrp] = bsd_gidset[ngrp + 1]; 1085 ngrp++; 1086 } 1087 1088 error = copyout(linux_gidset, args->grouplist, ngrp * sizeof(l_gid_t)); 1089 free(linux_gidset, M_LINUX); 1090 if (error) 1091 return (error); 1092 1093 td->td_retval[0] = ngrp; 1094 return (0); 1095 } 1096 1097 static bool 1098 linux_get_dummy_limit(l_uint resource, struct rlimit *rlim) 1099 { 1100 1101 if (linux_dummy_rlimits == 0) 1102 return (false); 1103 1104 switch (resource) { 1105 case LINUX_RLIMIT_LOCKS: 1106 case LINUX_RLIMIT_SIGPENDING: 1107 case LINUX_RLIMIT_MSGQUEUE: 1108 case LINUX_RLIMIT_RTTIME: 1109 rlim->rlim_cur = LINUX_RLIM_INFINITY; 1110 rlim->rlim_max = LINUX_RLIM_INFINITY; 1111 return (true); 1112 case LINUX_RLIMIT_NICE: 1113 case LINUX_RLIMIT_RTPRIO: 1114 rlim->rlim_cur = 0; 1115 rlim->rlim_max = 0; 1116 return (true); 1117 default: 1118 return (false); 1119 } 1120 } 1121 1122 int 1123 linux_setrlimit(struct thread *td, struct linux_setrlimit_args *args) 1124 { 1125 struct rlimit bsd_rlim; 1126 struct l_rlimit rlim; 1127 u_int which; 1128 int error; 1129 1130 if (args->resource >= LINUX_RLIM_NLIMITS) 1131 return (EINVAL); 1132 1133 which = linux_to_bsd_resource[args->resource]; 1134 if (which == -1) 1135 return (EINVAL); 1136 1137 error = copyin(args->rlim, &rlim, sizeof(rlim)); 1138 if (error) 1139 return (error); 1140 1141 bsd_rlim.rlim_cur = (rlim_t)rlim.rlim_cur; 1142 bsd_rlim.rlim_max = (rlim_t)rlim.rlim_max; 1143 return (kern_setrlimit(td, which, &bsd_rlim)); 1144 } 1145 1146 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 1147 int 1148 linux_old_getrlimit(struct thread *td, struct linux_old_getrlimit_args *args) 1149 { 1150 struct l_rlimit rlim; 1151 struct rlimit bsd_rlim; 1152 u_int which; 1153 1154 if (linux_get_dummy_limit(args->resource, &bsd_rlim)) { 1155 rlim.rlim_cur = bsd_rlim.rlim_cur; 1156 rlim.rlim_max = bsd_rlim.rlim_max; 1157 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1158 } 1159 1160 if (args->resource >= LINUX_RLIM_NLIMITS) 1161 return (EINVAL); 1162 1163 which = linux_to_bsd_resource[args->resource]; 1164 if (which == -1) 1165 return (EINVAL); 1166 1167 lim_rlimit(td, which, &bsd_rlim); 1168 1169 #ifdef COMPAT_LINUX32 1170 rlim.rlim_cur = (unsigned int)bsd_rlim.rlim_cur; 1171 if (rlim.rlim_cur == UINT_MAX) 1172 rlim.rlim_cur = INT_MAX; 1173 rlim.rlim_max = (unsigned int)bsd_rlim.rlim_max; 1174 if (rlim.rlim_max == UINT_MAX) 1175 rlim.rlim_max = INT_MAX; 1176 #else 1177 rlim.rlim_cur = (unsigned long)bsd_rlim.rlim_cur; 1178 if (rlim.rlim_cur == ULONG_MAX) 1179 rlim.rlim_cur = LONG_MAX; 1180 rlim.rlim_max = (unsigned long)bsd_rlim.rlim_max; 1181 if (rlim.rlim_max == ULONG_MAX) 1182 rlim.rlim_max = LONG_MAX; 1183 #endif 1184 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1185 } 1186 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 1187 1188 int 1189 linux_getrlimit(struct thread *td, struct linux_getrlimit_args *args) 1190 { 1191 struct l_rlimit rlim; 1192 struct rlimit bsd_rlim; 1193 u_int which; 1194 1195 if (linux_get_dummy_limit(args->resource, &bsd_rlim)) { 1196 rlim.rlim_cur = bsd_rlim.rlim_cur; 1197 rlim.rlim_max = bsd_rlim.rlim_max; 1198 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1199 } 1200 1201 if (args->resource >= LINUX_RLIM_NLIMITS) 1202 return (EINVAL); 1203 1204 which = linux_to_bsd_resource[args->resource]; 1205 if (which == -1) 1206 return (EINVAL); 1207 1208 lim_rlimit(td, which, &bsd_rlim); 1209 1210 rlim.rlim_cur = (l_ulong)bsd_rlim.rlim_cur; 1211 rlim.rlim_max = (l_ulong)bsd_rlim.rlim_max; 1212 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1213 } 1214 1215 int 1216 linux_sched_setscheduler(struct thread *td, 1217 struct linux_sched_setscheduler_args *args) 1218 { 1219 struct sched_param sched_param; 1220 struct thread *tdt; 1221 int error, policy; 1222 1223 switch (args->policy) { 1224 case LINUX_SCHED_OTHER: 1225 policy = SCHED_OTHER; 1226 break; 1227 case LINUX_SCHED_FIFO: 1228 policy = SCHED_FIFO; 1229 break; 1230 case LINUX_SCHED_RR: 1231 policy = SCHED_RR; 1232 break; 1233 default: 1234 return (EINVAL); 1235 } 1236 1237 error = copyin(args->param, &sched_param, sizeof(sched_param)); 1238 if (error) 1239 return (error); 1240 1241 if (linux_map_sched_prio) { 1242 switch (policy) { 1243 case SCHED_OTHER: 1244 if (sched_param.sched_priority != 0) 1245 return (EINVAL); 1246 1247 sched_param.sched_priority = 1248 PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE; 1249 break; 1250 case SCHED_FIFO: 1251 case SCHED_RR: 1252 if (sched_param.sched_priority < 1 || 1253 sched_param.sched_priority >= LINUX_MAX_RT_PRIO) 1254 return (EINVAL); 1255 1256 /* 1257 * Map [1, LINUX_MAX_RT_PRIO - 1] to 1258 * [0, RTP_PRIO_MAX - RTP_PRIO_MIN] (rounding down). 1259 */ 1260 sched_param.sched_priority = 1261 (sched_param.sched_priority - 1) * 1262 (RTP_PRIO_MAX - RTP_PRIO_MIN + 1) / 1263 (LINUX_MAX_RT_PRIO - 1); 1264 break; 1265 } 1266 } 1267 1268 tdt = linux_tdfind(td, args->pid, -1); 1269 if (tdt == NULL) 1270 return (ESRCH); 1271 1272 error = kern_sched_setscheduler(td, tdt, policy, &sched_param); 1273 PROC_UNLOCK(tdt->td_proc); 1274 return (error); 1275 } 1276 1277 int 1278 linux_sched_getscheduler(struct thread *td, 1279 struct linux_sched_getscheduler_args *args) 1280 { 1281 struct thread *tdt; 1282 int error, policy; 1283 1284 tdt = linux_tdfind(td, args->pid, -1); 1285 if (tdt == NULL) 1286 return (ESRCH); 1287 1288 error = kern_sched_getscheduler(td, tdt, &policy); 1289 PROC_UNLOCK(tdt->td_proc); 1290 1291 switch (policy) { 1292 case SCHED_OTHER: 1293 td->td_retval[0] = LINUX_SCHED_OTHER; 1294 break; 1295 case SCHED_FIFO: 1296 td->td_retval[0] = LINUX_SCHED_FIFO; 1297 break; 1298 case SCHED_RR: 1299 td->td_retval[0] = LINUX_SCHED_RR; 1300 break; 1301 } 1302 return (error); 1303 } 1304 1305 int 1306 linux_sched_get_priority_max(struct thread *td, 1307 struct linux_sched_get_priority_max_args *args) 1308 { 1309 struct sched_get_priority_max_args bsd; 1310 1311 if (linux_map_sched_prio) { 1312 switch (args->policy) { 1313 case LINUX_SCHED_OTHER: 1314 td->td_retval[0] = 0; 1315 return (0); 1316 case LINUX_SCHED_FIFO: 1317 case LINUX_SCHED_RR: 1318 td->td_retval[0] = LINUX_MAX_RT_PRIO - 1; 1319 return (0); 1320 default: 1321 return (EINVAL); 1322 } 1323 } 1324 1325 switch (args->policy) { 1326 case LINUX_SCHED_OTHER: 1327 bsd.policy = SCHED_OTHER; 1328 break; 1329 case LINUX_SCHED_FIFO: 1330 bsd.policy = SCHED_FIFO; 1331 break; 1332 case LINUX_SCHED_RR: 1333 bsd.policy = SCHED_RR; 1334 break; 1335 default: 1336 return (EINVAL); 1337 } 1338 return (sys_sched_get_priority_max(td, &bsd)); 1339 } 1340 1341 int 1342 linux_sched_get_priority_min(struct thread *td, 1343 struct linux_sched_get_priority_min_args *args) 1344 { 1345 struct sched_get_priority_min_args bsd; 1346 1347 if (linux_map_sched_prio) { 1348 switch (args->policy) { 1349 case LINUX_SCHED_OTHER: 1350 td->td_retval[0] = 0; 1351 return (0); 1352 case LINUX_SCHED_FIFO: 1353 case LINUX_SCHED_RR: 1354 td->td_retval[0] = 1; 1355 return (0); 1356 default: 1357 return (EINVAL); 1358 } 1359 } 1360 1361 switch (args->policy) { 1362 case LINUX_SCHED_OTHER: 1363 bsd.policy = SCHED_OTHER; 1364 break; 1365 case LINUX_SCHED_FIFO: 1366 bsd.policy = SCHED_FIFO; 1367 break; 1368 case LINUX_SCHED_RR: 1369 bsd.policy = SCHED_RR; 1370 break; 1371 default: 1372 return (EINVAL); 1373 } 1374 return (sys_sched_get_priority_min(td, &bsd)); 1375 } 1376 1377 #define REBOOT_CAD_ON 0x89abcdef 1378 #define REBOOT_CAD_OFF 0 1379 #define REBOOT_HALT 0xcdef0123 1380 #define REBOOT_RESTART 0x01234567 1381 #define REBOOT_RESTART2 0xA1B2C3D4 1382 #define REBOOT_POWEROFF 0x4321FEDC 1383 #define REBOOT_MAGIC1 0xfee1dead 1384 #define REBOOT_MAGIC2 0x28121969 1385 #define REBOOT_MAGIC2A 0x05121996 1386 #define REBOOT_MAGIC2B 0x16041998 1387 1388 int 1389 linux_reboot(struct thread *td, struct linux_reboot_args *args) 1390 { 1391 struct reboot_args bsd_args; 1392 1393 if (args->magic1 != REBOOT_MAGIC1) 1394 return (EINVAL); 1395 1396 switch (args->magic2) { 1397 case REBOOT_MAGIC2: 1398 case REBOOT_MAGIC2A: 1399 case REBOOT_MAGIC2B: 1400 break; 1401 default: 1402 return (EINVAL); 1403 } 1404 1405 switch (args->cmd) { 1406 case REBOOT_CAD_ON: 1407 case REBOOT_CAD_OFF: 1408 return (priv_check(td, PRIV_REBOOT)); 1409 case REBOOT_HALT: 1410 bsd_args.opt = RB_HALT; 1411 break; 1412 case REBOOT_RESTART: 1413 case REBOOT_RESTART2: 1414 bsd_args.opt = 0; 1415 break; 1416 case REBOOT_POWEROFF: 1417 bsd_args.opt = RB_POWEROFF; 1418 break; 1419 default: 1420 return (EINVAL); 1421 } 1422 return (sys_reboot(td, &bsd_args)); 1423 } 1424 1425 int 1426 linux_getpid(struct thread *td, struct linux_getpid_args *args) 1427 { 1428 1429 td->td_retval[0] = td->td_proc->p_pid; 1430 1431 return (0); 1432 } 1433 1434 int 1435 linux_gettid(struct thread *td, struct linux_gettid_args *args) 1436 { 1437 struct linux_emuldata *em; 1438 1439 em = em_find(td); 1440 KASSERT(em != NULL, ("gettid: emuldata not found.\n")); 1441 1442 td->td_retval[0] = em->em_tid; 1443 1444 return (0); 1445 } 1446 1447 int 1448 linux_getppid(struct thread *td, struct linux_getppid_args *args) 1449 { 1450 1451 td->td_retval[0] = kern_getppid(td); 1452 return (0); 1453 } 1454 1455 int 1456 linux_getgid(struct thread *td, struct linux_getgid_args *args) 1457 { 1458 1459 td->td_retval[0] = td->td_ucred->cr_rgid; 1460 return (0); 1461 } 1462 1463 int 1464 linux_getuid(struct thread *td, struct linux_getuid_args *args) 1465 { 1466 1467 td->td_retval[0] = td->td_ucred->cr_ruid; 1468 return (0); 1469 } 1470 1471 int 1472 linux_getsid(struct thread *td, struct linux_getsid_args *args) 1473 { 1474 1475 return (kern_getsid(td, args->pid)); 1476 } 1477 1478 int 1479 linux_nosys(struct thread *td, struct nosys_args *ignore) 1480 { 1481 1482 return (ENOSYS); 1483 } 1484 1485 int 1486 linux_getpriority(struct thread *td, struct linux_getpriority_args *args) 1487 { 1488 int error; 1489 1490 error = kern_getpriority(td, args->which, args->who); 1491 td->td_retval[0] = 20 - td->td_retval[0]; 1492 return (error); 1493 } 1494 1495 int 1496 linux_sethostname(struct thread *td, struct linux_sethostname_args *args) 1497 { 1498 int name[2]; 1499 1500 name[0] = CTL_KERN; 1501 name[1] = KERN_HOSTNAME; 1502 return (userland_sysctl(td, name, 2, 0, 0, 0, args->hostname, 1503 args->len, 0, 0)); 1504 } 1505 1506 int 1507 linux_setdomainname(struct thread *td, struct linux_setdomainname_args *args) 1508 { 1509 int name[2]; 1510 1511 name[0] = CTL_KERN; 1512 name[1] = KERN_NISDOMAINNAME; 1513 return (userland_sysctl(td, name, 2, 0, 0, 0, args->name, 1514 args->len, 0, 0)); 1515 } 1516 1517 int 1518 linux_exit_group(struct thread *td, struct linux_exit_group_args *args) 1519 { 1520 1521 LINUX_CTR2(exit_group, "thread(%d) (%d)", td->td_tid, 1522 args->error_code); 1523 1524 /* 1525 * XXX: we should send a signal to the parent if 1526 * SIGNAL_EXIT_GROUP is set. We ignore that (temporarily?) 1527 * as it doesnt occur often. 1528 */ 1529 exit1(td, args->error_code, 0); 1530 /* NOTREACHED */ 1531 } 1532 1533 #define _LINUX_CAPABILITY_VERSION_1 0x19980330 1534 #define _LINUX_CAPABILITY_VERSION_2 0x20071026 1535 #define _LINUX_CAPABILITY_VERSION_3 0x20080522 1536 1537 struct l_user_cap_header { 1538 l_int version; 1539 l_int pid; 1540 }; 1541 1542 struct l_user_cap_data { 1543 l_int effective; 1544 l_int permitted; 1545 l_int inheritable; 1546 }; 1547 1548 int 1549 linux_capget(struct thread *td, struct linux_capget_args *uap) 1550 { 1551 struct l_user_cap_header luch; 1552 struct l_user_cap_data lucd[2]; 1553 int error, u32s; 1554 1555 if (uap->hdrp == NULL) 1556 return (EFAULT); 1557 1558 error = copyin(uap->hdrp, &luch, sizeof(luch)); 1559 if (error != 0) 1560 return (error); 1561 1562 switch (luch.version) { 1563 case _LINUX_CAPABILITY_VERSION_1: 1564 u32s = 1; 1565 break; 1566 case _LINUX_CAPABILITY_VERSION_2: 1567 case _LINUX_CAPABILITY_VERSION_3: 1568 u32s = 2; 1569 break; 1570 default: 1571 luch.version = _LINUX_CAPABILITY_VERSION_1; 1572 error = copyout(&luch, uap->hdrp, sizeof(luch)); 1573 if (error) 1574 return (error); 1575 return (EINVAL); 1576 } 1577 1578 if (luch.pid) 1579 return (EPERM); 1580 1581 if (uap->datap) { 1582 /* 1583 * The current implementation doesn't support setting 1584 * a capability (it's essentially a stub) so indicate 1585 * that no capabilities are currently set or available 1586 * to request. 1587 */ 1588 memset(&lucd, 0, u32s * sizeof(lucd[0])); 1589 error = copyout(&lucd, uap->datap, u32s * sizeof(lucd[0])); 1590 } 1591 1592 return (error); 1593 } 1594 1595 int 1596 linux_capset(struct thread *td, struct linux_capset_args *uap) 1597 { 1598 struct l_user_cap_header luch; 1599 struct l_user_cap_data lucd[2]; 1600 int error, i, u32s; 1601 1602 if (uap->hdrp == NULL || uap->datap == NULL) 1603 return (EFAULT); 1604 1605 error = copyin(uap->hdrp, &luch, sizeof(luch)); 1606 if (error != 0) 1607 return (error); 1608 1609 switch (luch.version) { 1610 case _LINUX_CAPABILITY_VERSION_1: 1611 u32s = 1; 1612 break; 1613 case _LINUX_CAPABILITY_VERSION_2: 1614 case _LINUX_CAPABILITY_VERSION_3: 1615 u32s = 2; 1616 break; 1617 default: 1618 luch.version = _LINUX_CAPABILITY_VERSION_1; 1619 error = copyout(&luch, uap->hdrp, sizeof(luch)); 1620 if (error) 1621 return (error); 1622 return (EINVAL); 1623 } 1624 1625 if (luch.pid) 1626 return (EPERM); 1627 1628 error = copyin(uap->datap, &lucd, u32s * sizeof(lucd[0])); 1629 if (error != 0) 1630 return (error); 1631 1632 /* We currently don't support setting any capabilities. */ 1633 for (i = 0; i < u32s; i++) { 1634 if (lucd[i].effective || lucd[i].permitted || 1635 lucd[i].inheritable) { 1636 linux_msg(td, 1637 "capset[%d] effective=0x%x, permitted=0x%x, " 1638 "inheritable=0x%x is not implemented", i, 1639 (int)lucd[i].effective, (int)lucd[i].permitted, 1640 (int)lucd[i].inheritable); 1641 return (EPERM); 1642 } 1643 } 1644 1645 return (0); 1646 } 1647 1648 int 1649 linux_prctl(struct thread *td, struct linux_prctl_args *args) 1650 { 1651 int error = 0, max_size, arg; 1652 struct proc *p = td->td_proc; 1653 char comm[LINUX_MAX_COMM_LEN]; 1654 int pdeath_signal, trace_state; 1655 1656 switch (args->option) { 1657 case LINUX_PR_SET_PDEATHSIG: 1658 if (!LINUX_SIG_VALID(args->arg2)) 1659 return (EINVAL); 1660 pdeath_signal = linux_to_bsd_signal(args->arg2); 1661 return (kern_procctl(td, P_PID, 0, PROC_PDEATHSIG_CTL, 1662 &pdeath_signal)); 1663 case LINUX_PR_GET_PDEATHSIG: 1664 error = kern_procctl(td, P_PID, 0, PROC_PDEATHSIG_STATUS, 1665 &pdeath_signal); 1666 if (error != 0) 1667 return (error); 1668 pdeath_signal = bsd_to_linux_signal(pdeath_signal); 1669 return (copyout(&pdeath_signal, 1670 (void *)(register_t)args->arg2, 1671 sizeof(pdeath_signal))); 1672 /* 1673 * In Linux, this flag controls if set[gu]id processes can coredump. 1674 * There are additional semantics imposed on processes that cannot 1675 * coredump: 1676 * - Such processes can not be ptraced. 1677 * - There are some semantics around ownership of process-related files 1678 * in the /proc namespace. 1679 * 1680 * In FreeBSD, we can (and by default, do) disable setuid coredump 1681 * system-wide with 'sugid_coredump.' We control tracability on a 1682 * per-process basis with the procctl PROC_TRACE (=> P2_NOTRACE flag). 1683 * By happy coincidence, P2_NOTRACE also prevents coredumping. So the 1684 * procctl is roughly analogous to Linux's DUMPABLE. 1685 * 1686 * So, proxy these knobs to the corresponding PROC_TRACE setting. 1687 */ 1688 case LINUX_PR_GET_DUMPABLE: 1689 error = kern_procctl(td, P_PID, p->p_pid, PROC_TRACE_STATUS, 1690 &trace_state); 1691 if (error != 0) 1692 return (error); 1693 td->td_retval[0] = (trace_state != -1); 1694 return (0); 1695 case LINUX_PR_SET_DUMPABLE: 1696 /* 1697 * It is only valid for userspace to set one of these two 1698 * flags, and only one at a time. 1699 */ 1700 switch (args->arg2) { 1701 case LINUX_SUID_DUMP_DISABLE: 1702 trace_state = PROC_TRACE_CTL_DISABLE_EXEC; 1703 break; 1704 case LINUX_SUID_DUMP_USER: 1705 trace_state = PROC_TRACE_CTL_ENABLE; 1706 break; 1707 default: 1708 return (EINVAL); 1709 } 1710 return (kern_procctl(td, P_PID, p->p_pid, PROC_TRACE_CTL, 1711 &trace_state)); 1712 case LINUX_PR_GET_KEEPCAPS: 1713 /* 1714 * Indicate that we always clear the effective and 1715 * permitted capability sets when the user id becomes 1716 * non-zero (actually the capability sets are simply 1717 * always zero in the current implementation). 1718 */ 1719 td->td_retval[0] = 0; 1720 break; 1721 case LINUX_PR_SET_KEEPCAPS: 1722 /* 1723 * Ignore requests to keep the effective and permitted 1724 * capability sets when the user id becomes non-zero. 1725 */ 1726 break; 1727 case LINUX_PR_SET_NAME: 1728 /* 1729 * To be on the safe side we need to make sure to not 1730 * overflow the size a Linux program expects. We already 1731 * do this here in the copyin, so that we don't need to 1732 * check on copyout. 1733 */ 1734 max_size = MIN(sizeof(comm), sizeof(p->p_comm)); 1735 error = copyinstr((void *)(register_t)args->arg2, comm, 1736 max_size, NULL); 1737 1738 /* Linux silently truncates the name if it is too long. */ 1739 if (error == ENAMETOOLONG) { 1740 /* 1741 * XXX: copyinstr() isn't documented to populate the 1742 * array completely, so do a copyin() to be on the 1743 * safe side. This should be changed in case 1744 * copyinstr() is changed to guarantee this. 1745 */ 1746 error = copyin((void *)(register_t)args->arg2, comm, 1747 max_size - 1); 1748 comm[max_size - 1] = '\0'; 1749 } 1750 if (error) 1751 return (error); 1752 1753 PROC_LOCK(p); 1754 strlcpy(p->p_comm, comm, sizeof(p->p_comm)); 1755 PROC_UNLOCK(p); 1756 break; 1757 case LINUX_PR_GET_NAME: 1758 PROC_LOCK(p); 1759 strlcpy(comm, p->p_comm, sizeof(comm)); 1760 PROC_UNLOCK(p); 1761 error = copyout(comm, (void *)(register_t)args->arg2, 1762 strlen(comm) + 1); 1763 break; 1764 case LINUX_PR_GET_SECCOMP: 1765 case LINUX_PR_SET_SECCOMP: 1766 /* 1767 * Same as returned by Linux without CONFIG_SECCOMP enabled. 1768 */ 1769 error = EINVAL; 1770 break; 1771 case LINUX_PR_CAPBSET_READ: 1772 #if 0 1773 /* 1774 * This makes too much noise with Ubuntu Focal. 1775 */ 1776 linux_msg(td, "unsupported prctl PR_CAPBSET_READ %d", 1777 (int)args->arg2); 1778 #endif 1779 error = EINVAL; 1780 break; 1781 case LINUX_PR_SET_NO_NEW_PRIVS: 1782 arg = args->arg2 == 1 ? 1783 PROC_NO_NEW_PRIVS_ENABLE : PROC_NO_NEW_PRIVS_DISABLE; 1784 error = kern_procctl(td, P_PID, p->p_pid, 1785 PROC_NO_NEW_PRIVS_CTL, &arg); 1786 break; 1787 case LINUX_PR_SET_PTRACER: 1788 linux_msg(td, "unsupported prctl PR_SET_PTRACER"); 1789 error = EINVAL; 1790 break; 1791 default: 1792 linux_msg(td, "unsupported prctl option %d", args->option); 1793 error = EINVAL; 1794 break; 1795 } 1796 1797 return (error); 1798 } 1799 1800 int 1801 linux_sched_setparam(struct thread *td, 1802 struct linux_sched_setparam_args *uap) 1803 { 1804 struct sched_param sched_param; 1805 struct thread *tdt; 1806 int error, policy; 1807 1808 error = copyin(uap->param, &sched_param, sizeof(sched_param)); 1809 if (error) 1810 return (error); 1811 1812 tdt = linux_tdfind(td, uap->pid, -1); 1813 if (tdt == NULL) 1814 return (ESRCH); 1815 1816 if (linux_map_sched_prio) { 1817 error = kern_sched_getscheduler(td, tdt, &policy); 1818 if (error) 1819 goto out; 1820 1821 switch (policy) { 1822 case SCHED_OTHER: 1823 if (sched_param.sched_priority != 0) { 1824 error = EINVAL; 1825 goto out; 1826 } 1827 sched_param.sched_priority = 1828 PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE; 1829 break; 1830 case SCHED_FIFO: 1831 case SCHED_RR: 1832 if (sched_param.sched_priority < 1 || 1833 sched_param.sched_priority >= LINUX_MAX_RT_PRIO) { 1834 error = EINVAL; 1835 goto out; 1836 } 1837 /* 1838 * Map [1, LINUX_MAX_RT_PRIO - 1] to 1839 * [0, RTP_PRIO_MAX - RTP_PRIO_MIN] (rounding down). 1840 */ 1841 sched_param.sched_priority = 1842 (sched_param.sched_priority - 1) * 1843 (RTP_PRIO_MAX - RTP_PRIO_MIN + 1) / 1844 (LINUX_MAX_RT_PRIO - 1); 1845 break; 1846 } 1847 } 1848 1849 error = kern_sched_setparam(td, tdt, &sched_param); 1850 out: PROC_UNLOCK(tdt->td_proc); 1851 return (error); 1852 } 1853 1854 int 1855 linux_sched_getparam(struct thread *td, 1856 struct linux_sched_getparam_args *uap) 1857 { 1858 struct sched_param sched_param; 1859 struct thread *tdt; 1860 int error, policy; 1861 1862 tdt = linux_tdfind(td, uap->pid, -1); 1863 if (tdt == NULL) 1864 return (ESRCH); 1865 1866 error = kern_sched_getparam(td, tdt, &sched_param); 1867 if (error) { 1868 PROC_UNLOCK(tdt->td_proc); 1869 return (error); 1870 } 1871 1872 if (linux_map_sched_prio) { 1873 error = kern_sched_getscheduler(td, tdt, &policy); 1874 PROC_UNLOCK(tdt->td_proc); 1875 if (error) 1876 return (error); 1877 1878 switch (policy) { 1879 case SCHED_OTHER: 1880 sched_param.sched_priority = 0; 1881 break; 1882 case SCHED_FIFO: 1883 case SCHED_RR: 1884 /* 1885 * Map [0, RTP_PRIO_MAX - RTP_PRIO_MIN] to 1886 * [1, LINUX_MAX_RT_PRIO - 1] (rounding up). 1887 */ 1888 sched_param.sched_priority = 1889 (sched_param.sched_priority * 1890 (LINUX_MAX_RT_PRIO - 1) + 1891 (RTP_PRIO_MAX - RTP_PRIO_MIN - 1)) / 1892 (RTP_PRIO_MAX - RTP_PRIO_MIN) + 1; 1893 break; 1894 } 1895 } else 1896 PROC_UNLOCK(tdt->td_proc); 1897 1898 error = copyout(&sched_param, uap->param, sizeof(sched_param)); 1899 return (error); 1900 } 1901 1902 /* 1903 * Get affinity of a process. 1904 */ 1905 int 1906 linux_sched_getaffinity(struct thread *td, 1907 struct linux_sched_getaffinity_args *args) 1908 { 1909 struct thread *tdt; 1910 cpuset_t *mask; 1911 size_t size; 1912 int error; 1913 id_t tid; 1914 1915 tdt = linux_tdfind(td, args->pid, -1); 1916 if (tdt == NULL) 1917 return (ESRCH); 1918 tid = tdt->td_tid; 1919 PROC_UNLOCK(tdt->td_proc); 1920 1921 mask = malloc(sizeof(cpuset_t), M_LINUX, M_WAITOK | M_ZERO); 1922 size = min(args->len, sizeof(cpuset_t)); 1923 error = kern_cpuset_getaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID, 1924 tid, size, mask); 1925 if (error == ERANGE) 1926 error = EINVAL; 1927 if (error == 0) 1928 error = copyout(mask, args->user_mask_ptr, size); 1929 if (error == 0) 1930 td->td_retval[0] = size; 1931 free(mask, M_LINUX); 1932 return (error); 1933 } 1934 1935 /* 1936 * Set affinity of a process. 1937 */ 1938 int 1939 linux_sched_setaffinity(struct thread *td, 1940 struct linux_sched_setaffinity_args *args) 1941 { 1942 struct thread *tdt; 1943 cpuset_t *mask; 1944 int cpu, error; 1945 size_t len; 1946 id_t tid; 1947 1948 tdt = linux_tdfind(td, args->pid, -1); 1949 if (tdt == NULL) 1950 return (ESRCH); 1951 tid = tdt->td_tid; 1952 PROC_UNLOCK(tdt->td_proc); 1953 1954 len = min(args->len, sizeof(cpuset_t)); 1955 mask = malloc(sizeof(cpuset_t), M_TEMP, M_WAITOK | M_ZERO);; 1956 error = copyin(args->user_mask_ptr, mask, len); 1957 if (error != 0) 1958 goto out; 1959 /* Linux ignore high bits */ 1960 CPU_FOREACH_ISSET(cpu, mask) 1961 if (cpu > mp_maxid) 1962 CPU_CLR(cpu, mask); 1963 1964 error = kern_cpuset_setaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID, 1965 tid, mask); 1966 if (error == EDEADLK) 1967 error = EINVAL; 1968 out: 1969 free(mask, M_TEMP); 1970 return (error); 1971 } 1972 1973 struct linux_rlimit64 { 1974 uint64_t rlim_cur; 1975 uint64_t rlim_max; 1976 }; 1977 1978 int 1979 linux_prlimit64(struct thread *td, struct linux_prlimit64_args *args) 1980 { 1981 struct rlimit rlim, nrlim; 1982 struct linux_rlimit64 lrlim; 1983 struct proc *p; 1984 u_int which; 1985 int flags; 1986 int error; 1987 1988 if (args->new == NULL && args->old != NULL) { 1989 if (linux_get_dummy_limit(args->resource, &rlim)) { 1990 lrlim.rlim_cur = rlim.rlim_cur; 1991 lrlim.rlim_max = rlim.rlim_max; 1992 return (copyout(&lrlim, args->old, sizeof(lrlim))); 1993 } 1994 } 1995 1996 if (args->resource >= LINUX_RLIM_NLIMITS) 1997 return (EINVAL); 1998 1999 which = linux_to_bsd_resource[args->resource]; 2000 if (which == -1) 2001 return (EINVAL); 2002 2003 if (args->new != NULL) { 2004 /* 2005 * Note. Unlike FreeBSD where rlim is signed 64-bit Linux 2006 * rlim is unsigned 64-bit. FreeBSD treats negative limits 2007 * as INFINITY so we do not need a conversion even. 2008 */ 2009 error = copyin(args->new, &nrlim, sizeof(nrlim)); 2010 if (error != 0) 2011 return (error); 2012 } 2013 2014 flags = PGET_HOLD | PGET_NOTWEXIT; 2015 if (args->new != NULL) 2016 flags |= PGET_CANDEBUG; 2017 else 2018 flags |= PGET_CANSEE; 2019 if (args->pid == 0) { 2020 p = td->td_proc; 2021 PHOLD(p); 2022 } else { 2023 error = pget(args->pid, flags, &p); 2024 if (error != 0) 2025 return (error); 2026 } 2027 if (args->old != NULL) { 2028 PROC_LOCK(p); 2029 lim_rlimit_proc(p, which, &rlim); 2030 PROC_UNLOCK(p); 2031 if (rlim.rlim_cur == RLIM_INFINITY) 2032 lrlim.rlim_cur = LINUX_RLIM_INFINITY; 2033 else 2034 lrlim.rlim_cur = rlim.rlim_cur; 2035 if (rlim.rlim_max == RLIM_INFINITY) 2036 lrlim.rlim_max = LINUX_RLIM_INFINITY; 2037 else 2038 lrlim.rlim_max = rlim.rlim_max; 2039 error = copyout(&lrlim, args->old, sizeof(lrlim)); 2040 if (error != 0) 2041 goto out; 2042 } 2043 2044 if (args->new != NULL) 2045 error = kern_proc_setrlimit(td, p, which, &nrlim); 2046 2047 out: 2048 PRELE(p); 2049 return (error); 2050 } 2051 2052 int 2053 linux_pselect6(struct thread *td, struct linux_pselect6_args *args) 2054 { 2055 struct timespec ts, *tsp; 2056 int error; 2057 2058 if (args->tsp != NULL) { 2059 error = linux_get_timespec(&ts, args->tsp); 2060 if (error != 0) 2061 return (error); 2062 tsp = &ts; 2063 } else 2064 tsp = NULL; 2065 2066 error = linux_common_pselect6(td, args->nfds, args->readfds, 2067 args->writefds, args->exceptfds, tsp, args->sig); 2068 2069 if (args->tsp != NULL) 2070 linux_put_timespec(&ts, args->tsp); 2071 return (error); 2072 } 2073 2074 static int 2075 linux_common_pselect6(struct thread *td, l_int nfds, l_fd_set *readfds, 2076 l_fd_set *writefds, l_fd_set *exceptfds, struct timespec *tsp, 2077 l_uintptr_t *sig) 2078 { 2079 struct timeval utv, tv0, tv1, *tvp; 2080 struct l_pselect6arg lpse6; 2081 sigset_t *ssp; 2082 sigset_t ss; 2083 int error; 2084 2085 ssp = NULL; 2086 if (sig != NULL) { 2087 error = copyin(sig, &lpse6, sizeof(lpse6)); 2088 if (error != 0) 2089 return (error); 2090 error = linux_copyin_sigset(td, PTRIN(lpse6.ss), 2091 lpse6.ss_len, &ss, &ssp); 2092 if (error != 0) 2093 return (error); 2094 } else 2095 ssp = NULL; 2096 2097 /* 2098 * Currently glibc changes nanosecond number to microsecond. 2099 * This mean losing precision but for now it is hardly seen. 2100 */ 2101 if (tsp != NULL) { 2102 TIMESPEC_TO_TIMEVAL(&utv, tsp); 2103 if (itimerfix(&utv)) 2104 return (EINVAL); 2105 2106 microtime(&tv0); 2107 tvp = &utv; 2108 } else 2109 tvp = NULL; 2110 2111 error = kern_pselect(td, nfds, readfds, writefds, 2112 exceptfds, tvp, ssp, LINUX_NFDBITS); 2113 2114 if (tsp != NULL) { 2115 /* 2116 * Compute how much time was left of the timeout, 2117 * by subtracting the current time and the time 2118 * before we started the call, and subtracting 2119 * that result from the user-supplied value. 2120 */ 2121 microtime(&tv1); 2122 timevalsub(&tv1, &tv0); 2123 timevalsub(&utv, &tv1); 2124 if (utv.tv_sec < 0) 2125 timevalclear(&utv); 2126 TIMEVAL_TO_TIMESPEC(&utv, tsp); 2127 } 2128 return (error); 2129 } 2130 2131 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 2132 int 2133 linux_pselect6_time64(struct thread *td, 2134 struct linux_pselect6_time64_args *args) 2135 { 2136 struct timespec ts, *tsp; 2137 int error; 2138 2139 if (args->tsp != NULL) { 2140 error = linux_get_timespec64(&ts, args->tsp); 2141 if (error != 0) 2142 return (error); 2143 tsp = &ts; 2144 } else 2145 tsp = NULL; 2146 2147 error = linux_common_pselect6(td, args->nfds, args->readfds, 2148 args->writefds, args->exceptfds, tsp, args->sig); 2149 2150 if (args->tsp != NULL) 2151 linux_put_timespec64(&ts, args->tsp); 2152 return (error); 2153 } 2154 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 2155 2156 int 2157 linux_ppoll(struct thread *td, struct linux_ppoll_args *args) 2158 { 2159 struct timespec uts, *tsp; 2160 int error; 2161 2162 if (args->tsp != NULL) { 2163 error = linux_get_timespec(&uts, args->tsp); 2164 if (error != 0) 2165 return (error); 2166 tsp = &uts; 2167 } else 2168 tsp = NULL; 2169 2170 error = linux_common_ppoll(td, args->fds, args->nfds, tsp, 2171 args->sset, args->ssize); 2172 if (error == 0 && args->tsp != NULL) 2173 error = linux_put_timespec(&uts, args->tsp); 2174 return (error); 2175 } 2176 2177 static int 2178 linux_common_ppoll(struct thread *td, struct pollfd *fds, uint32_t nfds, 2179 struct timespec *tsp, l_sigset_t *sset, l_size_t ssize) 2180 { 2181 struct timespec ts0, ts1; 2182 struct pollfd stackfds[32]; 2183 struct pollfd *kfds; 2184 sigset_t *ssp; 2185 sigset_t ss; 2186 int error; 2187 2188 if (kern_poll_maxfds(nfds)) 2189 return (EINVAL); 2190 if (sset != NULL) { 2191 error = linux_copyin_sigset(td, sset, ssize, &ss, &ssp); 2192 if (error != 0) 2193 return (error); 2194 } else 2195 ssp = NULL; 2196 if (tsp != NULL) 2197 nanotime(&ts0); 2198 2199 if (nfds > nitems(stackfds)) 2200 kfds = mallocarray(nfds, sizeof(*kfds), M_TEMP, M_WAITOK); 2201 else 2202 kfds = stackfds; 2203 error = linux_pollin(td, kfds, fds, nfds); 2204 if (error != 0) 2205 goto out; 2206 2207 error = kern_poll_kfds(td, kfds, nfds, tsp, ssp); 2208 if (error == 0) 2209 error = linux_pollout(td, kfds, fds, nfds); 2210 2211 if (error == 0 && tsp != NULL) { 2212 if (td->td_retval[0]) { 2213 nanotime(&ts1); 2214 timespecsub(&ts1, &ts0, &ts1); 2215 timespecsub(tsp, &ts1, tsp); 2216 if (tsp->tv_sec < 0) 2217 timespecclear(tsp); 2218 } else 2219 timespecclear(tsp); 2220 } 2221 2222 out: 2223 if (nfds > nitems(stackfds)) 2224 free(kfds, M_TEMP); 2225 return (error); 2226 } 2227 2228 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 2229 int 2230 linux_ppoll_time64(struct thread *td, struct linux_ppoll_time64_args *args) 2231 { 2232 struct timespec uts, *tsp; 2233 int error; 2234 2235 if (args->tsp != NULL) { 2236 error = linux_get_timespec64(&uts, args->tsp); 2237 if (error != 0) 2238 return (error); 2239 tsp = &uts; 2240 } else 2241 tsp = NULL; 2242 error = linux_common_ppoll(td, args->fds, args->nfds, tsp, 2243 args->sset, args->ssize); 2244 if (error == 0 && args->tsp != NULL) 2245 error = linux_put_timespec64(&uts, args->tsp); 2246 return (error); 2247 } 2248 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 2249 2250 static int 2251 linux_pollin(struct thread *td, struct pollfd *fds, struct pollfd *ufds, u_int nfd) 2252 { 2253 int error; 2254 u_int i; 2255 2256 error = copyin(ufds, fds, nfd * sizeof(*fds)); 2257 if (error != 0) 2258 return (error); 2259 2260 for (i = 0; i < nfd; i++) { 2261 if (fds->events != 0) 2262 linux_to_bsd_poll_events(td, fds->fd, 2263 fds->events, &fds->events); 2264 fds++; 2265 } 2266 return (0); 2267 } 2268 2269 static int 2270 linux_pollout(struct thread *td, struct pollfd *fds, struct pollfd *ufds, u_int nfd) 2271 { 2272 int error = 0; 2273 u_int i, n = 0; 2274 2275 for (i = 0; i < nfd; i++) { 2276 if (fds->revents != 0) { 2277 bsd_to_linux_poll_events(fds->revents, 2278 &fds->revents); 2279 n++; 2280 } 2281 error = copyout(&fds->revents, &ufds->revents, 2282 sizeof(ufds->revents)); 2283 if (error) 2284 return (error); 2285 fds++; 2286 ufds++; 2287 } 2288 td->td_retval[0] = n; 2289 return (0); 2290 } 2291 2292 static int 2293 linux_sched_rr_get_interval_common(struct thread *td, pid_t pid, 2294 struct timespec *ts) 2295 { 2296 struct thread *tdt; 2297 int error; 2298 2299 /* 2300 * According to man in case the invalid pid specified 2301 * EINVAL should be returned. 2302 */ 2303 if (pid < 0) 2304 return (EINVAL); 2305 2306 tdt = linux_tdfind(td, pid, -1); 2307 if (tdt == NULL) 2308 return (ESRCH); 2309 2310 error = kern_sched_rr_get_interval_td(td, tdt, ts); 2311 PROC_UNLOCK(tdt->td_proc); 2312 return (error); 2313 } 2314 2315 int 2316 linux_sched_rr_get_interval(struct thread *td, 2317 struct linux_sched_rr_get_interval_args *uap) 2318 { 2319 struct timespec ts; 2320 int error; 2321 2322 error = linux_sched_rr_get_interval_common(td, uap->pid, &ts); 2323 if (error != 0) 2324 return (error); 2325 return (linux_put_timespec(&ts, uap->interval)); 2326 } 2327 2328 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 2329 int 2330 linux_sched_rr_get_interval_time64(struct thread *td, 2331 struct linux_sched_rr_get_interval_time64_args *uap) 2332 { 2333 struct timespec ts; 2334 int error; 2335 2336 error = linux_sched_rr_get_interval_common(td, uap->pid, &ts); 2337 if (error != 0) 2338 return (error); 2339 return (linux_put_timespec64(&ts, uap->interval)); 2340 } 2341 #endif 2342 2343 /* 2344 * In case when the Linux thread is the initial thread in 2345 * the thread group thread id is equal to the process id. 2346 * Glibc depends on this magic (assert in pthread_getattr_np.c). 2347 */ 2348 struct thread * 2349 linux_tdfind(struct thread *td, lwpid_t tid, pid_t pid) 2350 { 2351 struct linux_emuldata *em; 2352 struct thread *tdt; 2353 struct proc *p; 2354 2355 tdt = NULL; 2356 if (tid == 0 || tid == td->td_tid) { 2357 if (pid != -1 && td->td_proc->p_pid != pid) 2358 return (NULL); 2359 PROC_LOCK(td->td_proc); 2360 return (td); 2361 } else if (tid > PID_MAX) 2362 return (tdfind(tid, pid)); 2363 2364 /* 2365 * Initial thread where the tid equal to the pid. 2366 */ 2367 p = pfind(tid); 2368 if (p != NULL) { 2369 if (SV_PROC_ABI(p) != SV_ABI_LINUX || 2370 (pid != -1 && tid != pid)) { 2371 /* 2372 * p is not a Linuxulator process. 2373 */ 2374 PROC_UNLOCK(p); 2375 return (NULL); 2376 } 2377 FOREACH_THREAD_IN_PROC(p, tdt) { 2378 em = em_find(tdt); 2379 if (tid == em->em_tid) 2380 return (tdt); 2381 } 2382 PROC_UNLOCK(p); 2383 } 2384 return (NULL); 2385 } 2386 2387 void 2388 linux_to_bsd_waitopts(int options, int *bsdopts) 2389 { 2390 2391 if (options & LINUX_WNOHANG) 2392 *bsdopts |= WNOHANG; 2393 if (options & LINUX_WUNTRACED) 2394 *bsdopts |= WUNTRACED; 2395 if (options & LINUX_WEXITED) 2396 *bsdopts |= WEXITED; 2397 if (options & LINUX_WCONTINUED) 2398 *bsdopts |= WCONTINUED; 2399 if (options & LINUX_WNOWAIT) 2400 *bsdopts |= WNOWAIT; 2401 2402 if (options & __WCLONE) 2403 *bsdopts |= WLINUXCLONE; 2404 } 2405 2406 int 2407 linux_getrandom(struct thread *td, struct linux_getrandom_args *args) 2408 { 2409 struct uio uio; 2410 struct iovec iov; 2411 int error; 2412 2413 if (args->flags & ~(LINUX_GRND_NONBLOCK|LINUX_GRND_RANDOM)) 2414 return (EINVAL); 2415 if (args->count > INT_MAX) 2416 args->count = INT_MAX; 2417 2418 iov.iov_base = args->buf; 2419 iov.iov_len = args->count; 2420 2421 uio.uio_iov = &iov; 2422 uio.uio_iovcnt = 1; 2423 uio.uio_resid = iov.iov_len; 2424 uio.uio_segflg = UIO_USERSPACE; 2425 uio.uio_rw = UIO_READ; 2426 uio.uio_td = td; 2427 2428 error = read_random_uio(&uio, args->flags & LINUX_GRND_NONBLOCK); 2429 if (error == 0) 2430 td->td_retval[0] = args->count - uio.uio_resid; 2431 return (error); 2432 } 2433 2434 int 2435 linux_mincore(struct thread *td, struct linux_mincore_args *args) 2436 { 2437 2438 /* Needs to be page-aligned */ 2439 if (args->start & PAGE_MASK) 2440 return (EINVAL); 2441 return (kern_mincore(td, args->start, args->len, args->vec)); 2442 } 2443 2444 #define SYSLOG_TAG "<6>" 2445 2446 int 2447 linux_syslog(struct thread *td, struct linux_syslog_args *args) 2448 { 2449 char buf[128], *src, *dst; 2450 u_int seq; 2451 int buflen, error; 2452 2453 if (args->type != LINUX_SYSLOG_ACTION_READ_ALL) { 2454 linux_msg(td, "syslog unsupported type 0x%x", args->type); 2455 return (EINVAL); 2456 } 2457 2458 if (args->len < 6) { 2459 td->td_retval[0] = 0; 2460 return (0); 2461 } 2462 2463 error = priv_check(td, PRIV_MSGBUF); 2464 if (error) 2465 return (error); 2466 2467 mtx_lock(&msgbuf_lock); 2468 msgbuf_peekbytes(msgbufp, NULL, 0, &seq); 2469 mtx_unlock(&msgbuf_lock); 2470 2471 dst = args->buf; 2472 error = copyout(&SYSLOG_TAG, dst, sizeof(SYSLOG_TAG)); 2473 /* The -1 is to skip the trailing '\0'. */ 2474 dst += sizeof(SYSLOG_TAG) - 1; 2475 2476 while (error == 0) { 2477 mtx_lock(&msgbuf_lock); 2478 buflen = msgbuf_peekbytes(msgbufp, buf, sizeof(buf), &seq); 2479 mtx_unlock(&msgbuf_lock); 2480 2481 if (buflen == 0) 2482 break; 2483 2484 for (src = buf; src < buf + buflen && error == 0; src++) { 2485 if (*src == '\0') 2486 continue; 2487 2488 if (dst >= args->buf + args->len) 2489 goto out; 2490 2491 error = copyout(src, dst, 1); 2492 dst++; 2493 2494 if (*src == '\n' && *(src + 1) != '<' && 2495 dst + sizeof(SYSLOG_TAG) < args->buf + args->len) { 2496 error = copyout(&SYSLOG_TAG, 2497 dst, sizeof(SYSLOG_TAG)); 2498 dst += sizeof(SYSLOG_TAG) - 1; 2499 } 2500 } 2501 } 2502 out: 2503 td->td_retval[0] = dst - args->buf; 2504 return (error); 2505 } 2506 2507 int 2508 linux_getcpu(struct thread *td, struct linux_getcpu_args *args) 2509 { 2510 int cpu, error, node; 2511 2512 cpu = td->td_oncpu; /* Make sure it doesn't change during copyout(9) */ 2513 error = 0; 2514 node = cpuid_to_pcpu[cpu]->pc_domain; 2515 2516 if (args->cpu != NULL) 2517 error = copyout(&cpu, args->cpu, sizeof(l_int)); 2518 if (args->node != NULL) 2519 error = copyout(&node, args->node, sizeof(l_int)); 2520 return (error); 2521 } 2522 2523 #if defined(__i386__) || defined(__amd64__) 2524 int 2525 linux_poll(struct thread *td, struct linux_poll_args *args) 2526 { 2527 struct timespec ts, *tsp; 2528 2529 if (args->timeout != INFTIM) { 2530 if (args->timeout < 0) 2531 return (EINVAL); 2532 ts.tv_sec = args->timeout / 1000; 2533 ts.tv_nsec = (args->timeout % 1000) * 1000000; 2534 tsp = &ts; 2535 } else 2536 tsp = NULL; 2537 2538 return (linux_common_ppoll(td, args->fds, args->nfds, 2539 tsp, NULL, 0)); 2540 } 2541 #endif /* __i386__ || __amd64__ */ 2542 2543 int 2544 linux_seccomp(struct thread *td, struct linux_seccomp_args *args) 2545 { 2546 2547 switch (args->op) { 2548 case LINUX_SECCOMP_GET_ACTION_AVAIL: 2549 return (EOPNOTSUPP); 2550 default: 2551 /* 2552 * Ignore unknown operations, just like Linux kernel built 2553 * without CONFIG_SECCOMP. 2554 */ 2555 return (EINVAL); 2556 } 2557 } 2558 2559 /* 2560 * Custom version of exec_copyin_args(), to copy out argument and environment 2561 * strings from the old process address space into the temporary string buffer. 2562 * Based on freebsd32_exec_copyin_args. 2563 */ 2564 static int 2565 linux_exec_copyin_args(struct image_args *args, const char *fname, 2566 enum uio_seg segflg, l_uintptr_t *argv, l_uintptr_t *envv) 2567 { 2568 char *argp, *envp; 2569 l_uintptr_t *ptr, arg; 2570 int error; 2571 2572 bzero(args, sizeof(*args)); 2573 if (argv == NULL) 2574 return (EFAULT); 2575 2576 /* 2577 * Allocate demand-paged memory for the file name, argument, and 2578 * environment strings. 2579 */ 2580 error = exec_alloc_args(args); 2581 if (error != 0) 2582 return (error); 2583 2584 /* 2585 * Copy the file name. 2586 */ 2587 error = exec_args_add_fname(args, fname, segflg); 2588 if (error != 0) 2589 goto err_exit; 2590 2591 /* 2592 * extract arguments first 2593 */ 2594 ptr = argv; 2595 for (;;) { 2596 error = copyin(ptr++, &arg, sizeof(arg)); 2597 if (error) 2598 goto err_exit; 2599 if (arg == 0) 2600 break; 2601 argp = PTRIN(arg); 2602 error = exec_args_add_arg(args, argp, UIO_USERSPACE); 2603 if (error != 0) 2604 goto err_exit; 2605 } 2606 2607 /* 2608 * This comment is from Linux do_execveat_common: 2609 * When argv is empty, add an empty string ("") as argv[0] to 2610 * ensure confused userspace programs that start processing 2611 * from argv[1] won't end up walking envp. 2612 */ 2613 if (args->argc == 0 && 2614 (error = exec_args_add_arg(args, "", UIO_SYSSPACE) != 0)) 2615 goto err_exit; 2616 2617 /* 2618 * extract environment strings 2619 */ 2620 if (envv) { 2621 ptr = envv; 2622 for (;;) { 2623 error = copyin(ptr++, &arg, sizeof(arg)); 2624 if (error) 2625 goto err_exit; 2626 if (arg == 0) 2627 break; 2628 envp = PTRIN(arg); 2629 error = exec_args_add_env(args, envp, UIO_USERSPACE); 2630 if (error != 0) 2631 goto err_exit; 2632 } 2633 } 2634 2635 return (0); 2636 2637 err_exit: 2638 exec_free_args(args); 2639 return (error); 2640 } 2641 2642 int 2643 linux_execve(struct thread *td, struct linux_execve_args *args) 2644 { 2645 struct image_args eargs; 2646 int error; 2647 2648 LINUX_CTR(execve); 2649 2650 error = linux_exec_copyin_args(&eargs, args->path, UIO_USERSPACE, 2651 args->argp, args->envp); 2652 if (error == 0) 2653 error = linux_common_execve(td, &eargs); 2654 AUDIT_SYSCALL_EXIT(error == EJUSTRETURN ? 0 : error, td); 2655 return (error); 2656 } 2657 2658 static void 2659 linux_up_rtprio_if(struct thread *td1, struct rtprio *rtp) 2660 { 2661 struct rtprio rtp2; 2662 2663 pri_to_rtp(td1, &rtp2); 2664 if (rtp2.type < rtp->type || 2665 (rtp2.type == rtp->type && 2666 rtp2.prio < rtp->prio)) { 2667 rtp->type = rtp2.type; 2668 rtp->prio = rtp2.prio; 2669 } 2670 } 2671 2672 #define LINUX_PRIO_DIVIDER RTP_PRIO_MAX / LINUX_IOPRIO_MAX 2673 2674 static int 2675 linux_rtprio2ioprio(struct rtprio *rtp) 2676 { 2677 int ioprio, prio; 2678 2679 switch (rtp->type) { 2680 case RTP_PRIO_IDLE: 2681 prio = RTP_PRIO_MIN; 2682 ioprio = LINUX_IOPRIO_PRIO(LINUX_IOPRIO_CLASS_IDLE, prio); 2683 break; 2684 case RTP_PRIO_NORMAL: 2685 prio = rtp->prio / LINUX_PRIO_DIVIDER; 2686 ioprio = LINUX_IOPRIO_PRIO(LINUX_IOPRIO_CLASS_BE, prio); 2687 break; 2688 case RTP_PRIO_REALTIME: 2689 prio = rtp->prio / LINUX_PRIO_DIVIDER; 2690 ioprio = LINUX_IOPRIO_PRIO(LINUX_IOPRIO_CLASS_RT, prio); 2691 break; 2692 default: 2693 prio = RTP_PRIO_MIN; 2694 ioprio = LINUX_IOPRIO_PRIO(LINUX_IOPRIO_CLASS_NONE, prio); 2695 break; 2696 } 2697 return (ioprio); 2698 } 2699 2700 static int 2701 linux_ioprio2rtprio(int ioprio, struct rtprio *rtp) 2702 { 2703 2704 switch (LINUX_IOPRIO_PRIO_CLASS(ioprio)) { 2705 case LINUX_IOPRIO_CLASS_IDLE: 2706 rtp->prio = RTP_PRIO_MIN; 2707 rtp->type = RTP_PRIO_IDLE; 2708 break; 2709 case LINUX_IOPRIO_CLASS_BE: 2710 rtp->prio = LINUX_IOPRIO_PRIO_DATA(ioprio) * LINUX_PRIO_DIVIDER; 2711 rtp->type = RTP_PRIO_NORMAL; 2712 break; 2713 case LINUX_IOPRIO_CLASS_RT: 2714 rtp->prio = LINUX_IOPRIO_PRIO_DATA(ioprio) * LINUX_PRIO_DIVIDER; 2715 rtp->type = RTP_PRIO_REALTIME; 2716 break; 2717 default: 2718 return (EINVAL); 2719 } 2720 return (0); 2721 } 2722 #undef LINUX_PRIO_DIVIDER 2723 2724 int 2725 linux_ioprio_get(struct thread *td, struct linux_ioprio_get_args *args) 2726 { 2727 struct thread *td1; 2728 struct rtprio rtp; 2729 struct pgrp *pg; 2730 struct proc *p; 2731 int error, found; 2732 2733 p = NULL; 2734 td1 = NULL; 2735 error = 0; 2736 found = 0; 2737 rtp.type = RTP_PRIO_IDLE; 2738 rtp.prio = RTP_PRIO_MAX; 2739 switch (args->which) { 2740 case LINUX_IOPRIO_WHO_PROCESS: 2741 if (args->who == 0) { 2742 td1 = td; 2743 p = td1->td_proc; 2744 PROC_LOCK(p); 2745 } else if (args->who > PID_MAX) { 2746 td1 = linux_tdfind(td, args->who, -1); 2747 if (td1 != NULL) 2748 p = td1->td_proc; 2749 } else 2750 p = pfind(args->who); 2751 if (p == NULL) 2752 return (ESRCH); 2753 if ((error = p_cansee(td, p))) { 2754 PROC_UNLOCK(p); 2755 break; 2756 } 2757 if (td1 != NULL) { 2758 pri_to_rtp(td1, &rtp); 2759 } else { 2760 FOREACH_THREAD_IN_PROC(p, td1) { 2761 linux_up_rtprio_if(td1, &rtp); 2762 } 2763 } 2764 found++; 2765 PROC_UNLOCK(p); 2766 break; 2767 case LINUX_IOPRIO_WHO_PGRP: 2768 sx_slock(&proctree_lock); 2769 if (args->who == 0) { 2770 pg = td->td_proc->p_pgrp; 2771 PGRP_LOCK(pg); 2772 } else { 2773 pg = pgfind(args->who); 2774 if (pg == NULL) { 2775 sx_sunlock(&proctree_lock); 2776 error = ESRCH; 2777 break; 2778 } 2779 } 2780 sx_sunlock(&proctree_lock); 2781 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 2782 PROC_LOCK(p); 2783 if (p->p_state == PRS_NORMAL && 2784 p_cansee(td, p) == 0) { 2785 FOREACH_THREAD_IN_PROC(p, td1) { 2786 linux_up_rtprio_if(td1, &rtp); 2787 found++; 2788 } 2789 } 2790 PROC_UNLOCK(p); 2791 } 2792 PGRP_UNLOCK(pg); 2793 break; 2794 case LINUX_IOPRIO_WHO_USER: 2795 if (args->who == 0) 2796 args->who = td->td_ucred->cr_uid; 2797 sx_slock(&allproc_lock); 2798 FOREACH_PROC_IN_SYSTEM(p) { 2799 PROC_LOCK(p); 2800 if (p->p_state == PRS_NORMAL && 2801 p->p_ucred->cr_uid == args->who && 2802 p_cansee(td, p) == 0) { 2803 FOREACH_THREAD_IN_PROC(p, td1) { 2804 linux_up_rtprio_if(td1, &rtp); 2805 found++; 2806 } 2807 } 2808 PROC_UNLOCK(p); 2809 } 2810 sx_sunlock(&allproc_lock); 2811 break; 2812 default: 2813 error = EINVAL; 2814 break; 2815 } 2816 if (error == 0) { 2817 if (found != 0) 2818 td->td_retval[0] = linux_rtprio2ioprio(&rtp); 2819 else 2820 error = ESRCH; 2821 } 2822 return (error); 2823 } 2824 2825 int 2826 linux_ioprio_set(struct thread *td, struct linux_ioprio_set_args *args) 2827 { 2828 struct thread *td1; 2829 struct rtprio rtp; 2830 struct pgrp *pg; 2831 struct proc *p; 2832 int error; 2833 2834 if ((error = linux_ioprio2rtprio(args->ioprio, &rtp)) != 0) 2835 return (error); 2836 /* Attempts to set high priorities (REALTIME) require su privileges. */ 2837 if (RTP_PRIO_BASE(rtp.type) == RTP_PRIO_REALTIME && 2838 (error = priv_check(td, PRIV_SCHED_RTPRIO)) != 0) 2839 return (error); 2840 2841 p = NULL; 2842 td1 = NULL; 2843 switch (args->which) { 2844 case LINUX_IOPRIO_WHO_PROCESS: 2845 if (args->who == 0) { 2846 td1 = td; 2847 p = td1->td_proc; 2848 PROC_LOCK(p); 2849 } else if (args->who > PID_MAX) { 2850 td1 = linux_tdfind(td, args->who, -1); 2851 if (td1 != NULL) 2852 p = td1->td_proc; 2853 } else 2854 p = pfind(args->who); 2855 if (p == NULL) 2856 return (ESRCH); 2857 if ((error = p_cansched(td, p))) { 2858 PROC_UNLOCK(p); 2859 break; 2860 } 2861 if (td1 != NULL) { 2862 error = rtp_to_pri(&rtp, td1); 2863 } else { 2864 FOREACH_THREAD_IN_PROC(p, td1) { 2865 if ((error = rtp_to_pri(&rtp, td1)) != 0) 2866 break; 2867 } 2868 } 2869 PROC_UNLOCK(p); 2870 break; 2871 case LINUX_IOPRIO_WHO_PGRP: 2872 sx_slock(&proctree_lock); 2873 if (args->who == 0) { 2874 pg = td->td_proc->p_pgrp; 2875 PGRP_LOCK(pg); 2876 } else { 2877 pg = pgfind(args->who); 2878 if (pg == NULL) { 2879 sx_sunlock(&proctree_lock); 2880 error = ESRCH; 2881 break; 2882 } 2883 } 2884 sx_sunlock(&proctree_lock); 2885 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 2886 PROC_LOCK(p); 2887 if (p->p_state == PRS_NORMAL && 2888 p_cansched(td, p) == 0) { 2889 FOREACH_THREAD_IN_PROC(p, td1) { 2890 if ((error = rtp_to_pri(&rtp, td1)) != 0) 2891 break; 2892 } 2893 } 2894 PROC_UNLOCK(p); 2895 if (error != 0) 2896 break; 2897 } 2898 PGRP_UNLOCK(pg); 2899 break; 2900 case LINUX_IOPRIO_WHO_USER: 2901 if (args->who == 0) 2902 args->who = td->td_ucred->cr_uid; 2903 sx_slock(&allproc_lock); 2904 FOREACH_PROC_IN_SYSTEM(p) { 2905 PROC_LOCK(p); 2906 if (p->p_state == PRS_NORMAL && 2907 p->p_ucred->cr_uid == args->who && 2908 p_cansched(td, p) == 0) { 2909 FOREACH_THREAD_IN_PROC(p, td1) { 2910 if ((error = rtp_to_pri(&rtp, td1)) != 0) 2911 break; 2912 } 2913 } 2914 PROC_UNLOCK(p); 2915 if (error != 0) 2916 break; 2917 } 2918 sx_sunlock(&allproc_lock); 2919 break; 2920 default: 2921 error = EINVAL; 2922 break; 2923 } 2924 return (error); 2925 } 2926