1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2002 Doug Rabson 5 * Copyright (c) 1994-1995 Søren Schmidt 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer 13 * in this position and unchanged. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. The name of the author may not be used to endorse or promote products 18 * derived from this software without specific prior written permission 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 23 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 29 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/param.h> 33 #include <sys/fcntl.h> 34 #include <sys/jail.h> 35 #include <sys/imgact.h> 36 #include <sys/limits.h> 37 #include <sys/lock.h> 38 #include <sys/msgbuf.h> 39 #include <sys/mqueue.h> 40 #include <sys/mutex.h> 41 #include <sys/poll.h> 42 #include <sys/priv.h> 43 #include <sys/proc.h> 44 #include <sys/procctl.h> 45 #include <sys/reboot.h> 46 #include <sys/random.h> 47 #include <sys/resourcevar.h> 48 #include <sys/rtprio.h> 49 #include <sys/sched.h> 50 #include <sys/smp.h> 51 #include <sys/stat.h> 52 #include <sys/syscallsubr.h> 53 #include <sys/sysctl.h> 54 #include <sys/sysent.h> 55 #include <sys/sysproto.h> 56 #include <sys/time.h> 57 #include <sys/vmmeter.h> 58 #include <sys/vnode.h> 59 60 #include <security/audit/audit.h> 61 #include <security/mac/mac_framework.h> 62 63 #include <vm/pmap.h> 64 #include <vm/vm_map.h> 65 #include <vm/swap_pager.h> 66 67 #ifdef COMPAT_LINUX32 68 #include <machine/../linux32/linux.h> 69 #include <machine/../linux32/linux32_proto.h> 70 #else 71 #include <machine/../linux/linux.h> 72 #include <machine/../linux/linux_proto.h> 73 #endif 74 75 #include <compat/linux/linux_common.h> 76 #include <compat/linux/linux_dtrace.h> 77 #include <compat/linux/linux_file.h> 78 #include <compat/linux/linux_mib.h> 79 #include <compat/linux/linux_mmap.h> 80 #include <compat/linux/linux_signal.h> 81 #include <compat/linux/linux_time.h> 82 #include <compat/linux/linux_util.h> 83 #include <compat/linux/linux_emul.h> 84 #include <compat/linux/linux_misc.h> 85 86 int stclohz; /* Statistics clock frequency */ 87 88 static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] = { 89 RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK, 90 RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE, 91 RLIMIT_MEMLOCK, RLIMIT_AS 92 }; 93 94 struct l_sysinfo { 95 l_long uptime; /* Seconds since boot */ 96 l_ulong loads[3]; /* 1, 5, and 15 minute load averages */ 97 #define LINUX_SYSINFO_LOADS_SCALE 65536 98 l_ulong totalram; /* Total usable main memory size */ 99 l_ulong freeram; /* Available memory size */ 100 l_ulong sharedram; /* Amount of shared memory */ 101 l_ulong bufferram; /* Memory used by buffers */ 102 l_ulong totalswap; /* Total swap space size */ 103 l_ulong freeswap; /* swap space still available */ 104 l_ushort procs; /* Number of current processes */ 105 l_ushort pads; 106 l_ulong totalhigh; 107 l_ulong freehigh; 108 l_uint mem_unit; 109 char _f[20-2*sizeof(l_long)-sizeof(l_int)]; /* padding */ 110 }; 111 112 struct l_pselect6arg { 113 l_uintptr_t ss; 114 l_size_t ss_len; 115 }; 116 117 static int linux_utimensat_lts_to_ts(struct l_timespec *, 118 struct timespec *); 119 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 120 static int linux_utimensat_lts64_to_ts(struct l_timespec64 *, 121 struct timespec *); 122 #endif 123 static int linux_common_utimensat(struct thread *, int, 124 const char *, struct timespec *, int); 125 static int linux_common_pselect6(struct thread *, l_int, 126 l_fd_set *, l_fd_set *, l_fd_set *, 127 struct timespec *, l_uintptr_t *); 128 static int linux_common_ppoll(struct thread *, struct pollfd *, 129 uint32_t, struct timespec *, l_sigset_t *, 130 l_size_t); 131 static int linux_pollin(struct thread *, struct pollfd *, 132 struct pollfd *, u_int); 133 static int linux_pollout(struct thread *, struct pollfd *, 134 struct pollfd *, u_int); 135 136 int 137 linux_sysinfo(struct thread *td, struct linux_sysinfo_args *args) 138 { 139 struct l_sysinfo sysinfo; 140 int i, j; 141 struct timespec ts; 142 143 bzero(&sysinfo, sizeof(sysinfo)); 144 getnanouptime(&ts); 145 if (ts.tv_nsec != 0) 146 ts.tv_sec++; 147 sysinfo.uptime = ts.tv_sec; 148 149 /* Use the information from the mib to get our load averages */ 150 for (i = 0; i < 3; i++) 151 sysinfo.loads[i] = averunnable.ldavg[i] * 152 LINUX_SYSINFO_LOADS_SCALE / averunnable.fscale; 153 154 sysinfo.totalram = physmem * PAGE_SIZE; 155 sysinfo.freeram = (u_long)vm_free_count() * PAGE_SIZE; 156 157 /* 158 * sharedram counts pages allocated to named, swap-backed objects such 159 * as shared memory segments and tmpfs files. There is no cheap way to 160 * compute this, so just leave the field unpopulated. Linux itself only 161 * started setting this field in the 3.x timeframe. 162 */ 163 sysinfo.sharedram = 0; 164 sysinfo.bufferram = 0; 165 166 swap_pager_status(&i, &j); 167 sysinfo.totalswap = i * PAGE_SIZE; 168 sysinfo.freeswap = (i - j) * PAGE_SIZE; 169 170 sysinfo.procs = nprocs; 171 172 /* 173 * Platforms supported by the emulation layer do not have a notion of 174 * high memory. 175 */ 176 sysinfo.totalhigh = 0; 177 sysinfo.freehigh = 0; 178 179 sysinfo.mem_unit = 1; 180 181 return (copyout(&sysinfo, args->info, sizeof(sysinfo))); 182 } 183 184 #ifdef LINUX_LEGACY_SYSCALLS 185 int 186 linux_alarm(struct thread *td, struct linux_alarm_args *args) 187 { 188 struct itimerval it, old_it; 189 u_int secs; 190 int error __diagused; 191 192 secs = args->secs; 193 /* 194 * Linux alarm() is always successful. Limit secs to INT32_MAX / 2 195 * to match kern_setitimer()'s limit to avoid error from it. 196 * 197 * XXX. Linux limit secs to INT_MAX on 32 and does not limit on 64-bit 198 * platforms. 199 */ 200 if (secs > INT32_MAX / 2) 201 secs = INT32_MAX / 2; 202 203 it.it_value.tv_sec = secs; 204 it.it_value.tv_usec = 0; 205 timevalclear(&it.it_interval); 206 error = kern_setitimer(td, ITIMER_REAL, &it, &old_it); 207 KASSERT(error == 0, ("kern_setitimer returns %d", error)); 208 209 if ((old_it.it_value.tv_sec == 0 && old_it.it_value.tv_usec > 0) || 210 old_it.it_value.tv_usec >= 500000) 211 old_it.it_value.tv_sec++; 212 td->td_retval[0] = old_it.it_value.tv_sec; 213 return (0); 214 } 215 #endif 216 217 int 218 linux_brk(struct thread *td, struct linux_brk_args *args) 219 { 220 struct vmspace *vm = td->td_proc->p_vmspace; 221 uintptr_t new, old; 222 223 old = (uintptr_t)vm->vm_daddr + ctob(vm->vm_dsize); 224 new = (uintptr_t)args->dsend; 225 if ((caddr_t)new > vm->vm_daddr && !kern_break(td, &new)) 226 td->td_retval[0] = (register_t)new; 227 else 228 td->td_retval[0] = (register_t)old; 229 230 return (0); 231 } 232 233 #ifdef LINUX_LEGACY_SYSCALLS 234 int 235 linux_select(struct thread *td, struct linux_select_args *args) 236 { 237 l_timeval ltv; 238 struct timeval tv0, tv1, utv, *tvp; 239 int error; 240 241 /* 242 * Store current time for computation of the amount of 243 * time left. 244 */ 245 if (args->timeout) { 246 if ((error = copyin(args->timeout, <v, sizeof(ltv)))) 247 goto select_out; 248 utv.tv_sec = ltv.tv_sec; 249 utv.tv_usec = ltv.tv_usec; 250 251 if (itimerfix(&utv)) { 252 /* 253 * The timeval was invalid. Convert it to something 254 * valid that will act as it does under Linux. 255 */ 256 utv.tv_sec += utv.tv_usec / 1000000; 257 utv.tv_usec %= 1000000; 258 if (utv.tv_usec < 0) { 259 utv.tv_sec -= 1; 260 utv.tv_usec += 1000000; 261 } 262 if (utv.tv_sec < 0) 263 timevalclear(&utv); 264 } 265 microtime(&tv0); 266 tvp = &utv; 267 } else 268 tvp = NULL; 269 270 error = kern_select(td, args->nfds, args->readfds, args->writefds, 271 args->exceptfds, tvp, LINUX_NFDBITS); 272 if (error) 273 goto select_out; 274 275 if (args->timeout) { 276 if (td->td_retval[0]) { 277 /* 278 * Compute how much time was left of the timeout, 279 * by subtracting the current time and the time 280 * before we started the call, and subtracting 281 * that result from the user-supplied value. 282 */ 283 microtime(&tv1); 284 timevalsub(&tv1, &tv0); 285 timevalsub(&utv, &tv1); 286 if (utv.tv_sec < 0) 287 timevalclear(&utv); 288 } else 289 timevalclear(&utv); 290 ltv.tv_sec = utv.tv_sec; 291 ltv.tv_usec = utv.tv_usec; 292 if ((error = copyout(<v, args->timeout, sizeof(ltv)))) 293 goto select_out; 294 } 295 296 select_out: 297 return (error); 298 } 299 #endif 300 301 int 302 linux_mremap(struct thread *td, struct linux_mremap_args *args) 303 { 304 uintptr_t addr; 305 size_t len; 306 int error = 0; 307 308 if (args->flags & ~(LINUX_MREMAP_FIXED | LINUX_MREMAP_MAYMOVE)) { 309 td->td_retval[0] = 0; 310 return (EINVAL); 311 } 312 313 /* 314 * Check for the page alignment. 315 * Linux defines PAGE_MASK to be FreeBSD ~PAGE_MASK. 316 */ 317 if (args->addr & PAGE_MASK) { 318 td->td_retval[0] = 0; 319 return (EINVAL); 320 } 321 322 args->new_len = round_page(args->new_len); 323 args->old_len = round_page(args->old_len); 324 325 if (args->new_len > args->old_len) { 326 td->td_retval[0] = 0; 327 return (ENOMEM); 328 } 329 330 if (args->new_len < args->old_len) { 331 addr = args->addr + args->new_len; 332 len = args->old_len - args->new_len; 333 error = kern_munmap(td, addr, len); 334 } 335 336 td->td_retval[0] = error ? 0 : (uintptr_t)args->addr; 337 return (error); 338 } 339 340 #define LINUX_MS_ASYNC 0x0001 341 #define LINUX_MS_INVALIDATE 0x0002 342 #define LINUX_MS_SYNC 0x0004 343 344 int 345 linux_msync(struct thread *td, struct linux_msync_args *args) 346 { 347 348 return (kern_msync(td, args->addr, args->len, 349 args->fl & ~LINUX_MS_SYNC)); 350 } 351 352 int 353 linux_mprotect(struct thread *td, struct linux_mprotect_args *uap) 354 { 355 356 return (linux_mprotect_common(td, PTROUT(uap->addr), uap->len, 357 uap->prot)); 358 } 359 360 int 361 linux_madvise(struct thread *td, struct linux_madvise_args *uap) 362 { 363 364 return (linux_madvise_common(td, PTROUT(uap->addr), uap->len, 365 uap->behav)); 366 } 367 368 int 369 linux_mmap2(struct thread *td, struct linux_mmap2_args *uap) 370 { 371 #if defined(LINUX_ARCHWANT_MMAP2PGOFF) 372 /* 373 * For architectures with sizeof (off_t) < sizeof (loff_t) mmap is 374 * implemented with mmap2 syscall and the offset is represented in 375 * multiples of page size. 376 */ 377 return (linux_mmap_common(td, PTROUT(uap->addr), uap->len, uap->prot, 378 uap->flags, uap->fd, (uint64_t)(uint32_t)uap->pgoff * PAGE_SIZE)); 379 #else 380 return (linux_mmap_common(td, PTROUT(uap->addr), uap->len, uap->prot, 381 uap->flags, uap->fd, uap->pgoff)); 382 #endif 383 } 384 385 #ifdef LINUX_LEGACY_SYSCALLS 386 int 387 linux_time(struct thread *td, struct linux_time_args *args) 388 { 389 struct timeval tv; 390 l_time_t tm; 391 int error; 392 393 microtime(&tv); 394 tm = tv.tv_sec; 395 if (args->tm && (error = copyout(&tm, args->tm, sizeof(tm)))) 396 return (error); 397 td->td_retval[0] = tm; 398 return (0); 399 } 400 #endif 401 402 struct l_times_argv { 403 l_clock_t tms_utime; 404 l_clock_t tms_stime; 405 l_clock_t tms_cutime; 406 l_clock_t tms_cstime; 407 }; 408 409 /* 410 * Glibc versions prior to 2.2.1 always use hard-coded CLK_TCK value. 411 * Since 2.2.1 Glibc uses value exported from kernel via AT_CLKTCK 412 * auxiliary vector entry. 413 */ 414 #define CLK_TCK 100 415 416 #define CONVOTCK(r) (r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK)) 417 #define CONVNTCK(r) (r.tv_sec * stclohz + r.tv_usec / (1000000 / stclohz)) 418 419 #define CONVTCK(r) (linux_kernver(td) >= LINUX_KERNVER(2,4,0) ? \ 420 CONVNTCK(r) : CONVOTCK(r)) 421 422 int 423 linux_times(struct thread *td, struct linux_times_args *args) 424 { 425 struct timeval tv, utime, stime, cutime, cstime; 426 struct l_times_argv tms; 427 struct proc *p; 428 int error; 429 430 if (args->buf != NULL) { 431 p = td->td_proc; 432 PROC_LOCK(p); 433 PROC_STATLOCK(p); 434 calcru(p, &utime, &stime); 435 PROC_STATUNLOCK(p); 436 calccru(p, &cutime, &cstime); 437 PROC_UNLOCK(p); 438 439 tms.tms_utime = CONVTCK(utime); 440 tms.tms_stime = CONVTCK(stime); 441 442 tms.tms_cutime = CONVTCK(cutime); 443 tms.tms_cstime = CONVTCK(cstime); 444 445 if ((error = copyout(&tms, args->buf, sizeof(tms)))) 446 return (error); 447 } 448 449 microuptime(&tv); 450 td->td_retval[0] = (int)CONVTCK(tv); 451 return (0); 452 } 453 454 int 455 linux_newuname(struct thread *td, struct linux_newuname_args *args) 456 { 457 struct l_new_utsname utsname; 458 char osname[LINUX_MAX_UTSNAME]; 459 char osrelease[LINUX_MAX_UTSNAME]; 460 char *p; 461 462 linux_get_osname(td, osname); 463 linux_get_osrelease(td, osrelease); 464 465 bzero(&utsname, sizeof(utsname)); 466 strlcpy(utsname.sysname, osname, LINUX_MAX_UTSNAME); 467 getcredhostname(td->td_ucred, utsname.nodename, LINUX_MAX_UTSNAME); 468 getcreddomainname(td->td_ucred, utsname.domainname, LINUX_MAX_UTSNAME); 469 strlcpy(utsname.release, osrelease, LINUX_MAX_UTSNAME); 470 strlcpy(utsname.version, version, LINUX_MAX_UTSNAME); 471 for (p = utsname.version; *p != '\0'; ++p) 472 if (*p == '\n') { 473 *p = '\0'; 474 break; 475 } 476 #if defined(__amd64__) 477 /* 478 * On amd64, Linux uname(2) needs to return "x86_64" 479 * for both 64-bit and 32-bit applications. On 32-bit, 480 * the string returned by getauxval(AT_PLATFORM) needs 481 * to remain "i686", though. 482 */ 483 #if defined(COMPAT_LINUX32) 484 if (linux32_emulate_i386) 485 strlcpy(utsname.machine, "i686", LINUX_MAX_UTSNAME); 486 else 487 #endif 488 strlcpy(utsname.machine, "x86_64", LINUX_MAX_UTSNAME); 489 #elif defined(__aarch64__) 490 strlcpy(utsname.machine, "aarch64", LINUX_MAX_UTSNAME); 491 #elif defined(__i386__) 492 strlcpy(utsname.machine, "i686", LINUX_MAX_UTSNAME); 493 #endif 494 495 return (copyout(&utsname, args->buf, sizeof(utsname))); 496 } 497 498 struct l_utimbuf { 499 l_time_t l_actime; 500 l_time_t l_modtime; 501 }; 502 503 #ifdef LINUX_LEGACY_SYSCALLS 504 int 505 linux_utime(struct thread *td, struct linux_utime_args *args) 506 { 507 struct timeval tv[2], *tvp; 508 struct l_utimbuf lut; 509 int error; 510 511 if (args->times) { 512 if ((error = copyin(args->times, &lut, sizeof lut)) != 0) 513 return (error); 514 tv[0].tv_sec = lut.l_actime; 515 tv[0].tv_usec = 0; 516 tv[1].tv_sec = lut.l_modtime; 517 tv[1].tv_usec = 0; 518 tvp = tv; 519 } else 520 tvp = NULL; 521 522 return (kern_utimesat(td, AT_FDCWD, args->fname, UIO_USERSPACE, 523 tvp, UIO_SYSSPACE)); 524 } 525 #endif 526 527 #ifdef LINUX_LEGACY_SYSCALLS 528 int 529 linux_utimes(struct thread *td, struct linux_utimes_args *args) 530 { 531 l_timeval ltv[2]; 532 struct timeval tv[2], *tvp = NULL; 533 int error; 534 535 if (args->tptr != NULL) { 536 if ((error = copyin(args->tptr, ltv, sizeof ltv)) != 0) 537 return (error); 538 tv[0].tv_sec = ltv[0].tv_sec; 539 tv[0].tv_usec = ltv[0].tv_usec; 540 tv[1].tv_sec = ltv[1].tv_sec; 541 tv[1].tv_usec = ltv[1].tv_usec; 542 tvp = tv; 543 } 544 545 return (kern_utimesat(td, AT_FDCWD, args->fname, UIO_USERSPACE, 546 tvp, UIO_SYSSPACE)); 547 } 548 #endif 549 550 static int 551 linux_utimensat_lts_to_ts(struct l_timespec *l_times, struct timespec *times) 552 { 553 554 if (l_times->tv_nsec != LINUX_UTIME_OMIT && 555 l_times->tv_nsec != LINUX_UTIME_NOW && 556 (l_times->tv_nsec < 0 || l_times->tv_nsec > 999999999)) 557 return (EINVAL); 558 559 times->tv_sec = l_times->tv_sec; 560 switch (l_times->tv_nsec) 561 { 562 case LINUX_UTIME_OMIT: 563 times->tv_nsec = UTIME_OMIT; 564 break; 565 case LINUX_UTIME_NOW: 566 times->tv_nsec = UTIME_NOW; 567 break; 568 default: 569 times->tv_nsec = l_times->tv_nsec; 570 } 571 572 return (0); 573 } 574 575 static int 576 linux_common_utimensat(struct thread *td, int ldfd, const char *pathname, 577 struct timespec *timesp, int lflags) 578 { 579 int dfd, flags = 0; 580 581 dfd = (ldfd == LINUX_AT_FDCWD) ? AT_FDCWD : ldfd; 582 583 if (lflags & ~(LINUX_AT_SYMLINK_NOFOLLOW | LINUX_AT_EMPTY_PATH)) 584 return (EINVAL); 585 586 if (timesp != NULL) { 587 /* This breaks POSIX, but is what the Linux kernel does 588 * _on purpose_ (documented in the man page for utimensat(2)), 589 * so we must follow that behaviour. */ 590 if (timesp[0].tv_nsec == UTIME_OMIT && 591 timesp[1].tv_nsec == UTIME_OMIT) 592 return (0); 593 } 594 595 if (lflags & LINUX_AT_SYMLINK_NOFOLLOW) 596 flags |= AT_SYMLINK_NOFOLLOW; 597 if (lflags & LINUX_AT_EMPTY_PATH) 598 flags |= AT_EMPTY_PATH; 599 600 if (pathname != NULL) 601 return (kern_utimensat(td, dfd, pathname, 602 UIO_USERSPACE, timesp, UIO_SYSSPACE, flags)); 603 604 if (lflags != 0) 605 return (EINVAL); 606 607 return (kern_futimens(td, dfd, timesp, UIO_SYSSPACE)); 608 } 609 610 int 611 linux_utimensat(struct thread *td, struct linux_utimensat_args *args) 612 { 613 struct l_timespec l_times[2]; 614 struct timespec times[2], *timesp; 615 int error; 616 617 if (args->times != NULL) { 618 error = copyin(args->times, l_times, sizeof(l_times)); 619 if (error != 0) 620 return (error); 621 622 error = linux_utimensat_lts_to_ts(&l_times[0], ×[0]); 623 if (error != 0) 624 return (error); 625 error = linux_utimensat_lts_to_ts(&l_times[1], ×[1]); 626 if (error != 0) 627 return (error); 628 timesp = times; 629 } else 630 timesp = NULL; 631 632 return (linux_common_utimensat(td, args->dfd, args->pathname, 633 timesp, args->flags)); 634 } 635 636 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 637 static int 638 linux_utimensat_lts64_to_ts(struct l_timespec64 *l_times, struct timespec *times) 639 { 640 641 /* Zero out the padding in compat mode. */ 642 l_times->tv_nsec &= 0xFFFFFFFFUL; 643 644 if (l_times->tv_nsec != LINUX_UTIME_OMIT && 645 l_times->tv_nsec != LINUX_UTIME_NOW && 646 (l_times->tv_nsec < 0 || l_times->tv_nsec > 999999999)) 647 return (EINVAL); 648 649 times->tv_sec = l_times->tv_sec; 650 switch (l_times->tv_nsec) 651 { 652 case LINUX_UTIME_OMIT: 653 times->tv_nsec = UTIME_OMIT; 654 break; 655 case LINUX_UTIME_NOW: 656 times->tv_nsec = UTIME_NOW; 657 break; 658 default: 659 times->tv_nsec = l_times->tv_nsec; 660 } 661 662 return (0); 663 } 664 665 int 666 linux_utimensat_time64(struct thread *td, struct linux_utimensat_time64_args *args) 667 { 668 struct l_timespec64 l_times[2]; 669 struct timespec times[2], *timesp; 670 int error; 671 672 if (args->times64 != NULL) { 673 error = copyin(args->times64, l_times, sizeof(l_times)); 674 if (error != 0) 675 return (error); 676 677 error = linux_utimensat_lts64_to_ts(&l_times[0], ×[0]); 678 if (error != 0) 679 return (error); 680 error = linux_utimensat_lts64_to_ts(&l_times[1], ×[1]); 681 if (error != 0) 682 return (error); 683 timesp = times; 684 } else 685 timesp = NULL; 686 687 return (linux_common_utimensat(td, args->dfd, args->pathname, 688 timesp, args->flags)); 689 } 690 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 691 692 #ifdef LINUX_LEGACY_SYSCALLS 693 int 694 linux_futimesat(struct thread *td, struct linux_futimesat_args *args) 695 { 696 l_timeval ltv[2]; 697 struct timeval tv[2], *tvp = NULL; 698 int error, dfd; 699 700 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; 701 702 if (args->utimes != NULL) { 703 if ((error = copyin(args->utimes, ltv, sizeof ltv)) != 0) 704 return (error); 705 tv[0].tv_sec = ltv[0].tv_sec; 706 tv[0].tv_usec = ltv[0].tv_usec; 707 tv[1].tv_sec = ltv[1].tv_sec; 708 tv[1].tv_usec = ltv[1].tv_usec; 709 tvp = tv; 710 } 711 712 return (kern_utimesat(td, dfd, args->filename, UIO_USERSPACE, 713 tvp, UIO_SYSSPACE)); 714 } 715 #endif 716 717 static int 718 linux_common_wait(struct thread *td, idtype_t idtype, int id, int *statusp, 719 int options, void *rup, l_siginfo_t *infop) 720 { 721 l_siginfo_t lsi; 722 siginfo_t siginfo; 723 struct __wrusage wru; 724 int error, status, tmpstat, sig; 725 726 error = kern_wait6(td, idtype, id, &status, options, 727 rup != NULL ? &wru : NULL, &siginfo); 728 729 if (error == 0 && statusp) { 730 tmpstat = status & 0xffff; 731 if (WIFSIGNALED(tmpstat)) { 732 tmpstat = (tmpstat & 0xffffff80) | 733 bsd_to_linux_signal(WTERMSIG(tmpstat)); 734 } else if (WIFSTOPPED(tmpstat)) { 735 tmpstat = (tmpstat & 0xffff00ff) | 736 (bsd_to_linux_signal(WSTOPSIG(tmpstat)) << 8); 737 #if defined(__aarch64__) || (defined(__amd64__) && !defined(COMPAT_LINUX32)) 738 if (WSTOPSIG(status) == SIGTRAP) { 739 tmpstat = linux_ptrace_status(td, 740 siginfo.si_pid, tmpstat); 741 } 742 #endif 743 } else if (WIFCONTINUED(tmpstat)) { 744 tmpstat = 0xffff; 745 } 746 error = copyout(&tmpstat, statusp, sizeof(int)); 747 } 748 if (error == 0 && rup != NULL) 749 error = linux_copyout_rusage(&wru.wru_self, rup); 750 if (error == 0 && infop != NULL && td->td_retval[0] != 0) { 751 sig = bsd_to_linux_signal(siginfo.si_signo); 752 siginfo_to_lsiginfo(&siginfo, &lsi, sig); 753 error = copyout(&lsi, infop, sizeof(lsi)); 754 } 755 756 return (error); 757 } 758 759 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 760 int 761 linux_waitpid(struct thread *td, struct linux_waitpid_args *args) 762 { 763 struct linux_wait4_args wait4_args = { 764 .pid = args->pid, 765 .status = args->status, 766 .options = args->options, 767 .rusage = NULL, 768 }; 769 770 return (linux_wait4(td, &wait4_args)); 771 } 772 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 773 774 int 775 linux_wait4(struct thread *td, struct linux_wait4_args *args) 776 { 777 struct proc *p; 778 int options, id, idtype; 779 780 if (args->options & ~(LINUX_WUNTRACED | LINUX_WNOHANG | 781 LINUX_WCONTINUED | __WCLONE | __WNOTHREAD | __WALL)) 782 return (EINVAL); 783 784 /* -INT_MIN is not defined. */ 785 if (args->pid == INT_MIN) 786 return (ESRCH); 787 788 options = 0; 789 linux_to_bsd_waitopts(args->options, &options); 790 791 /* 792 * For backward compatibility we implicitly add flags WEXITED 793 * and WTRAPPED here. 794 */ 795 options |= WEXITED | WTRAPPED; 796 797 if (args->pid == WAIT_ANY) { 798 idtype = P_ALL; 799 id = 0; 800 } else if (args->pid < 0) { 801 idtype = P_PGID; 802 id = (id_t)-args->pid; 803 } else if (args->pid == 0) { 804 idtype = P_PGID; 805 p = td->td_proc; 806 PROC_LOCK(p); 807 id = p->p_pgid; 808 PROC_UNLOCK(p); 809 } else { 810 idtype = P_PID; 811 id = (id_t)args->pid; 812 } 813 814 return (linux_common_wait(td, idtype, id, args->status, options, 815 args->rusage, NULL)); 816 } 817 818 int 819 linux_waitid(struct thread *td, struct linux_waitid_args *args) 820 { 821 idtype_t idtype; 822 int error, options; 823 struct proc *p; 824 pid_t id; 825 826 if (args->options & ~(LINUX_WNOHANG | LINUX_WNOWAIT | LINUX_WEXITED | 827 LINUX_WSTOPPED | LINUX_WCONTINUED | __WCLONE | __WNOTHREAD | __WALL)) 828 return (EINVAL); 829 830 options = 0; 831 linux_to_bsd_waitopts(args->options, &options); 832 833 id = args->id; 834 switch (args->idtype) { 835 case LINUX_P_ALL: 836 idtype = P_ALL; 837 break; 838 case LINUX_P_PID: 839 if (args->id <= 0) 840 return (EINVAL); 841 idtype = P_PID; 842 break; 843 case LINUX_P_PGID: 844 if (linux_kernver(td) >= LINUX_KERNVER(5,4,0) && args->id == 0) { 845 p = td->td_proc; 846 PROC_LOCK(p); 847 id = p->p_pgid; 848 PROC_UNLOCK(p); 849 } else if (args->id <= 0) 850 return (EINVAL); 851 idtype = P_PGID; 852 break; 853 case LINUX_P_PIDFD: 854 LINUX_RATELIMIT_MSG("unsupported waitid P_PIDFD idtype"); 855 return (ENOSYS); 856 default: 857 return (EINVAL); 858 } 859 860 error = linux_common_wait(td, idtype, id, NULL, options, 861 args->rusage, args->info); 862 td->td_retval[0] = 0; 863 864 return (error); 865 } 866 867 #ifdef LINUX_LEGACY_SYSCALLS 868 int 869 linux_mknod(struct thread *td, struct linux_mknod_args *args) 870 { 871 int error; 872 873 switch (args->mode & S_IFMT) { 874 case S_IFIFO: 875 case S_IFSOCK: 876 error = kern_mkfifoat(td, AT_FDCWD, args->path, UIO_USERSPACE, 877 args->mode); 878 break; 879 880 case S_IFCHR: 881 case S_IFBLK: 882 error = kern_mknodat(td, AT_FDCWD, args->path, UIO_USERSPACE, 883 args->mode, linux_decode_dev(args->dev)); 884 break; 885 886 case S_IFDIR: 887 error = EPERM; 888 break; 889 890 case 0: 891 args->mode |= S_IFREG; 892 /* FALLTHROUGH */ 893 case S_IFREG: 894 error = kern_openat(td, AT_FDCWD, args->path, UIO_USERSPACE, 895 O_WRONLY | O_CREAT | O_TRUNC, args->mode); 896 if (error == 0) 897 kern_close(td, td->td_retval[0]); 898 break; 899 900 default: 901 error = EINVAL; 902 break; 903 } 904 return (error); 905 } 906 #endif 907 908 int 909 linux_mknodat(struct thread *td, struct linux_mknodat_args *args) 910 { 911 int error, dfd; 912 913 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; 914 915 switch (args->mode & S_IFMT) { 916 case S_IFIFO: 917 case S_IFSOCK: 918 error = kern_mkfifoat(td, dfd, args->filename, UIO_USERSPACE, 919 args->mode); 920 break; 921 922 case S_IFCHR: 923 case S_IFBLK: 924 error = kern_mknodat(td, dfd, args->filename, UIO_USERSPACE, 925 args->mode, linux_decode_dev(args->dev)); 926 break; 927 928 case S_IFDIR: 929 error = EPERM; 930 break; 931 932 case 0: 933 args->mode |= S_IFREG; 934 /* FALLTHROUGH */ 935 case S_IFREG: 936 error = kern_openat(td, dfd, args->filename, UIO_USERSPACE, 937 O_WRONLY | O_CREAT | O_TRUNC, args->mode); 938 if (error == 0) 939 kern_close(td, td->td_retval[0]); 940 break; 941 942 default: 943 error = EINVAL; 944 break; 945 } 946 return (error); 947 } 948 949 /* 950 * UGH! This is just about the dumbest idea I've ever heard!! 951 */ 952 int 953 linux_personality(struct thread *td, struct linux_personality_args *args) 954 { 955 struct linux_pemuldata *pem; 956 struct proc *p = td->td_proc; 957 uint32_t old; 958 959 PROC_LOCK(p); 960 pem = pem_find(p); 961 old = pem->persona; 962 if (args->per != 0xffffffff) 963 pem->persona = args->per; 964 PROC_UNLOCK(p); 965 966 td->td_retval[0] = old; 967 return (0); 968 } 969 970 struct l_itimerval { 971 l_timeval it_interval; 972 l_timeval it_value; 973 }; 974 975 #define B2L_ITIMERVAL(bip, lip) \ 976 (bip)->it_interval.tv_sec = (lip)->it_interval.tv_sec; \ 977 (bip)->it_interval.tv_usec = (lip)->it_interval.tv_usec; \ 978 (bip)->it_value.tv_sec = (lip)->it_value.tv_sec; \ 979 (bip)->it_value.tv_usec = (lip)->it_value.tv_usec; 980 981 int 982 linux_setitimer(struct thread *td, struct linux_setitimer_args *uap) 983 { 984 int error; 985 struct l_itimerval ls; 986 struct itimerval aitv, oitv; 987 988 if (uap->itv == NULL) { 989 uap->itv = uap->oitv; 990 return (linux_getitimer(td, (struct linux_getitimer_args *)uap)); 991 } 992 993 error = copyin(uap->itv, &ls, sizeof(ls)); 994 if (error != 0) 995 return (error); 996 B2L_ITIMERVAL(&aitv, &ls); 997 error = kern_setitimer(td, uap->which, &aitv, &oitv); 998 if (error != 0 || uap->oitv == NULL) 999 return (error); 1000 B2L_ITIMERVAL(&ls, &oitv); 1001 1002 return (copyout(&ls, uap->oitv, sizeof(ls))); 1003 } 1004 1005 int 1006 linux_getitimer(struct thread *td, struct linux_getitimer_args *uap) 1007 { 1008 int error; 1009 struct l_itimerval ls; 1010 struct itimerval aitv; 1011 1012 error = kern_getitimer(td, uap->which, &aitv); 1013 if (error != 0) 1014 return (error); 1015 B2L_ITIMERVAL(&ls, &aitv); 1016 return (copyout(&ls, uap->itv, sizeof(ls))); 1017 } 1018 1019 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 1020 int 1021 linux_nice(struct thread *td, struct linux_nice_args *args) 1022 { 1023 1024 return (kern_setpriority(td, PRIO_PROCESS, 0, args->inc)); 1025 } 1026 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 1027 1028 int 1029 linux_setgroups(struct thread *td, struct linux_setgroups_args *args) 1030 { 1031 struct ucred *newcred, *oldcred; 1032 l_gid_t *linux_gidset; 1033 gid_t *bsd_gidset; 1034 int ngrp, error; 1035 struct proc *p; 1036 1037 ngrp = args->gidsetsize; 1038 if (ngrp < 0 || ngrp >= ngroups_max + 1) 1039 return (EINVAL); 1040 linux_gidset = malloc(ngrp * sizeof(*linux_gidset), M_LINUX, M_WAITOK); 1041 error = copyin(args->grouplist, linux_gidset, ngrp * sizeof(l_gid_t)); 1042 if (error) 1043 goto out; 1044 newcred = crget(); 1045 crextend(newcred, ngrp + 1); 1046 p = td->td_proc; 1047 PROC_LOCK(p); 1048 oldcred = p->p_ucred; 1049 crcopy(newcred, oldcred); 1050 1051 /* 1052 * cr_groups[0] holds egid. Setting the whole set from 1053 * the supplied set will cause egid to be changed too. 1054 * Keep cr_groups[0] unchanged to prevent that. 1055 */ 1056 1057 if ((error = priv_check_cred(oldcred, PRIV_CRED_SETGROUPS)) != 0) { 1058 PROC_UNLOCK(p); 1059 crfree(newcred); 1060 goto out; 1061 } 1062 1063 if (ngrp > 0) { 1064 newcred->cr_ngroups = ngrp + 1; 1065 1066 bsd_gidset = newcred->cr_groups; 1067 ngrp--; 1068 while (ngrp >= 0) { 1069 bsd_gidset[ngrp + 1] = linux_gidset[ngrp]; 1070 ngrp--; 1071 } 1072 } else 1073 newcred->cr_ngroups = 1; 1074 1075 setsugid(p); 1076 proc_set_cred(p, newcred); 1077 PROC_UNLOCK(p); 1078 crfree(oldcred); 1079 error = 0; 1080 out: 1081 free(linux_gidset, M_LINUX); 1082 return (error); 1083 } 1084 1085 int 1086 linux_getgroups(struct thread *td, struct linux_getgroups_args *args) 1087 { 1088 struct ucred *cred; 1089 l_gid_t *linux_gidset; 1090 gid_t *bsd_gidset; 1091 int bsd_gidsetsz, ngrp, error; 1092 1093 cred = td->td_ucred; 1094 bsd_gidset = cred->cr_groups; 1095 bsd_gidsetsz = cred->cr_ngroups - 1; 1096 1097 /* 1098 * cr_groups[0] holds egid. Returning the whole set 1099 * here will cause a duplicate. Exclude cr_groups[0] 1100 * to prevent that. 1101 */ 1102 1103 if ((ngrp = args->gidsetsize) == 0) { 1104 td->td_retval[0] = bsd_gidsetsz; 1105 return (0); 1106 } 1107 1108 if (ngrp < bsd_gidsetsz) 1109 return (EINVAL); 1110 1111 ngrp = 0; 1112 linux_gidset = malloc(bsd_gidsetsz * sizeof(*linux_gidset), 1113 M_LINUX, M_WAITOK); 1114 while (ngrp < bsd_gidsetsz) { 1115 linux_gidset[ngrp] = bsd_gidset[ngrp + 1]; 1116 ngrp++; 1117 } 1118 1119 error = copyout(linux_gidset, args->grouplist, ngrp * sizeof(l_gid_t)); 1120 free(linux_gidset, M_LINUX); 1121 if (error) 1122 return (error); 1123 1124 td->td_retval[0] = ngrp; 1125 return (0); 1126 } 1127 1128 static bool 1129 linux_get_dummy_limit(struct thread *td, l_uint resource, struct rlimit *rlim) 1130 { 1131 ssize_t size; 1132 int res, error; 1133 1134 if (linux_dummy_rlimits == 0) 1135 return (false); 1136 1137 switch (resource) { 1138 case LINUX_RLIMIT_LOCKS: 1139 case LINUX_RLIMIT_RTTIME: 1140 rlim->rlim_cur = LINUX_RLIM_INFINITY; 1141 rlim->rlim_max = LINUX_RLIM_INFINITY; 1142 return (true); 1143 case LINUX_RLIMIT_NICE: 1144 case LINUX_RLIMIT_RTPRIO: 1145 rlim->rlim_cur = 0; 1146 rlim->rlim_max = 0; 1147 return (true); 1148 case LINUX_RLIMIT_SIGPENDING: 1149 error = kernel_sysctlbyname(td, 1150 "kern.sigqueue.max_pending_per_proc", 1151 &res, &size, 0, 0, 0, 0); 1152 if (error != 0) 1153 return (false); 1154 rlim->rlim_cur = res; 1155 rlim->rlim_max = res; 1156 return (true); 1157 case LINUX_RLIMIT_MSGQUEUE: 1158 error = kernel_sysctlbyname(td, 1159 "kern.ipc.msgmnb", &res, &size, 0, 0, 0, 0); 1160 if (error != 0) 1161 return (false); 1162 rlim->rlim_cur = res; 1163 rlim->rlim_max = res; 1164 return (true); 1165 default: 1166 return (false); 1167 } 1168 } 1169 1170 int 1171 linux_setrlimit(struct thread *td, struct linux_setrlimit_args *args) 1172 { 1173 struct rlimit bsd_rlim; 1174 struct l_rlimit rlim; 1175 u_int which; 1176 int error; 1177 1178 if (args->resource >= LINUX_RLIM_NLIMITS) 1179 return (EINVAL); 1180 1181 which = linux_to_bsd_resource[args->resource]; 1182 if (which == -1) 1183 return (EINVAL); 1184 1185 error = copyin(args->rlim, &rlim, sizeof(rlim)); 1186 if (error) 1187 return (error); 1188 1189 bsd_rlim.rlim_cur = (rlim_t)rlim.rlim_cur; 1190 bsd_rlim.rlim_max = (rlim_t)rlim.rlim_max; 1191 return (kern_setrlimit(td, which, &bsd_rlim)); 1192 } 1193 1194 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 1195 int 1196 linux_old_getrlimit(struct thread *td, struct linux_old_getrlimit_args *args) 1197 { 1198 struct l_rlimit rlim; 1199 struct rlimit bsd_rlim; 1200 u_int which; 1201 1202 if (linux_get_dummy_limit(td, args->resource, &bsd_rlim)) { 1203 rlim.rlim_cur = bsd_rlim.rlim_cur; 1204 rlim.rlim_max = bsd_rlim.rlim_max; 1205 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1206 } 1207 1208 if (args->resource >= LINUX_RLIM_NLIMITS) 1209 return (EINVAL); 1210 1211 which = linux_to_bsd_resource[args->resource]; 1212 if (which == -1) 1213 return (EINVAL); 1214 1215 lim_rlimit(td, which, &bsd_rlim); 1216 1217 #ifdef COMPAT_LINUX32 1218 rlim.rlim_cur = (unsigned int)bsd_rlim.rlim_cur; 1219 if (rlim.rlim_cur == UINT_MAX) 1220 rlim.rlim_cur = INT_MAX; 1221 rlim.rlim_max = (unsigned int)bsd_rlim.rlim_max; 1222 if (rlim.rlim_max == UINT_MAX) 1223 rlim.rlim_max = INT_MAX; 1224 #else 1225 rlim.rlim_cur = (unsigned long)bsd_rlim.rlim_cur; 1226 if (rlim.rlim_cur == ULONG_MAX) 1227 rlim.rlim_cur = LONG_MAX; 1228 rlim.rlim_max = (unsigned long)bsd_rlim.rlim_max; 1229 if (rlim.rlim_max == ULONG_MAX) 1230 rlim.rlim_max = LONG_MAX; 1231 #endif 1232 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1233 } 1234 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 1235 1236 int 1237 linux_getrlimit(struct thread *td, struct linux_getrlimit_args *args) 1238 { 1239 struct l_rlimit rlim; 1240 struct rlimit bsd_rlim; 1241 u_int which; 1242 1243 if (linux_get_dummy_limit(td, args->resource, &bsd_rlim)) { 1244 rlim.rlim_cur = bsd_rlim.rlim_cur; 1245 rlim.rlim_max = bsd_rlim.rlim_max; 1246 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1247 } 1248 1249 if (args->resource >= LINUX_RLIM_NLIMITS) 1250 return (EINVAL); 1251 1252 which = linux_to_bsd_resource[args->resource]; 1253 if (which == -1) 1254 return (EINVAL); 1255 1256 lim_rlimit(td, which, &bsd_rlim); 1257 1258 rlim.rlim_cur = (l_ulong)bsd_rlim.rlim_cur; 1259 rlim.rlim_max = (l_ulong)bsd_rlim.rlim_max; 1260 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1261 } 1262 1263 int 1264 linux_sched_setscheduler(struct thread *td, 1265 struct linux_sched_setscheduler_args *args) 1266 { 1267 struct sched_param sched_param; 1268 struct thread *tdt; 1269 int error, policy; 1270 1271 switch (args->policy) { 1272 case LINUX_SCHED_OTHER: 1273 policy = SCHED_OTHER; 1274 break; 1275 case LINUX_SCHED_FIFO: 1276 policy = SCHED_FIFO; 1277 break; 1278 case LINUX_SCHED_RR: 1279 policy = SCHED_RR; 1280 break; 1281 default: 1282 return (EINVAL); 1283 } 1284 1285 error = copyin(args->param, &sched_param, sizeof(sched_param)); 1286 if (error) 1287 return (error); 1288 1289 if (linux_map_sched_prio) { 1290 switch (policy) { 1291 case SCHED_OTHER: 1292 if (sched_param.sched_priority != 0) 1293 return (EINVAL); 1294 1295 sched_param.sched_priority = 1296 PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE; 1297 break; 1298 case SCHED_FIFO: 1299 case SCHED_RR: 1300 if (sched_param.sched_priority < 1 || 1301 sched_param.sched_priority >= LINUX_MAX_RT_PRIO) 1302 return (EINVAL); 1303 1304 /* 1305 * Map [1, LINUX_MAX_RT_PRIO - 1] to 1306 * [0, RTP_PRIO_MAX - RTP_PRIO_MIN] (rounding down). 1307 */ 1308 sched_param.sched_priority = 1309 (sched_param.sched_priority - 1) * 1310 (RTP_PRIO_MAX - RTP_PRIO_MIN + 1) / 1311 (LINUX_MAX_RT_PRIO - 1); 1312 break; 1313 } 1314 } 1315 1316 tdt = linux_tdfind(td, args->pid, -1); 1317 if (tdt == NULL) 1318 return (ESRCH); 1319 1320 error = kern_sched_setscheduler(td, tdt, policy, &sched_param); 1321 PROC_UNLOCK(tdt->td_proc); 1322 return (error); 1323 } 1324 1325 int 1326 linux_sched_getscheduler(struct thread *td, 1327 struct linux_sched_getscheduler_args *args) 1328 { 1329 struct thread *tdt; 1330 int error, policy; 1331 1332 tdt = linux_tdfind(td, args->pid, -1); 1333 if (tdt == NULL) 1334 return (ESRCH); 1335 1336 error = kern_sched_getscheduler(td, tdt, &policy); 1337 PROC_UNLOCK(tdt->td_proc); 1338 1339 switch (policy) { 1340 case SCHED_OTHER: 1341 td->td_retval[0] = LINUX_SCHED_OTHER; 1342 break; 1343 case SCHED_FIFO: 1344 td->td_retval[0] = LINUX_SCHED_FIFO; 1345 break; 1346 case SCHED_RR: 1347 td->td_retval[0] = LINUX_SCHED_RR; 1348 break; 1349 } 1350 return (error); 1351 } 1352 1353 int 1354 linux_sched_get_priority_max(struct thread *td, 1355 struct linux_sched_get_priority_max_args *args) 1356 { 1357 struct sched_get_priority_max_args bsd; 1358 1359 if (linux_map_sched_prio) { 1360 switch (args->policy) { 1361 case LINUX_SCHED_OTHER: 1362 td->td_retval[0] = 0; 1363 return (0); 1364 case LINUX_SCHED_FIFO: 1365 case LINUX_SCHED_RR: 1366 td->td_retval[0] = LINUX_MAX_RT_PRIO - 1; 1367 return (0); 1368 default: 1369 return (EINVAL); 1370 } 1371 } 1372 1373 switch (args->policy) { 1374 case LINUX_SCHED_OTHER: 1375 bsd.policy = SCHED_OTHER; 1376 break; 1377 case LINUX_SCHED_FIFO: 1378 bsd.policy = SCHED_FIFO; 1379 break; 1380 case LINUX_SCHED_RR: 1381 bsd.policy = SCHED_RR; 1382 break; 1383 default: 1384 return (EINVAL); 1385 } 1386 return (sys_sched_get_priority_max(td, &bsd)); 1387 } 1388 1389 int 1390 linux_sched_get_priority_min(struct thread *td, 1391 struct linux_sched_get_priority_min_args *args) 1392 { 1393 struct sched_get_priority_min_args bsd; 1394 1395 if (linux_map_sched_prio) { 1396 switch (args->policy) { 1397 case LINUX_SCHED_OTHER: 1398 td->td_retval[0] = 0; 1399 return (0); 1400 case LINUX_SCHED_FIFO: 1401 case LINUX_SCHED_RR: 1402 td->td_retval[0] = 1; 1403 return (0); 1404 default: 1405 return (EINVAL); 1406 } 1407 } 1408 1409 switch (args->policy) { 1410 case LINUX_SCHED_OTHER: 1411 bsd.policy = SCHED_OTHER; 1412 break; 1413 case LINUX_SCHED_FIFO: 1414 bsd.policy = SCHED_FIFO; 1415 break; 1416 case LINUX_SCHED_RR: 1417 bsd.policy = SCHED_RR; 1418 break; 1419 default: 1420 return (EINVAL); 1421 } 1422 return (sys_sched_get_priority_min(td, &bsd)); 1423 } 1424 1425 #define REBOOT_CAD_ON 0x89abcdef 1426 #define REBOOT_CAD_OFF 0 1427 #define REBOOT_HALT 0xcdef0123 1428 #define REBOOT_RESTART 0x01234567 1429 #define REBOOT_RESTART2 0xA1B2C3D4 1430 #define REBOOT_POWEROFF 0x4321FEDC 1431 #define REBOOT_MAGIC1 0xfee1dead 1432 #define REBOOT_MAGIC2 0x28121969 1433 #define REBOOT_MAGIC2A 0x05121996 1434 #define REBOOT_MAGIC2B 0x16041998 1435 1436 int 1437 linux_reboot(struct thread *td, struct linux_reboot_args *args) 1438 { 1439 struct reboot_args bsd_args; 1440 1441 if (args->magic1 != REBOOT_MAGIC1) 1442 return (EINVAL); 1443 1444 switch (args->magic2) { 1445 case REBOOT_MAGIC2: 1446 case REBOOT_MAGIC2A: 1447 case REBOOT_MAGIC2B: 1448 break; 1449 default: 1450 return (EINVAL); 1451 } 1452 1453 switch (args->cmd) { 1454 case REBOOT_CAD_ON: 1455 case REBOOT_CAD_OFF: 1456 return (priv_check(td, PRIV_REBOOT)); 1457 case REBOOT_HALT: 1458 bsd_args.opt = RB_HALT; 1459 break; 1460 case REBOOT_RESTART: 1461 case REBOOT_RESTART2: 1462 bsd_args.opt = 0; 1463 break; 1464 case REBOOT_POWEROFF: 1465 bsd_args.opt = RB_POWEROFF; 1466 break; 1467 default: 1468 return (EINVAL); 1469 } 1470 return (sys_reboot(td, &bsd_args)); 1471 } 1472 1473 int 1474 linux_getpid(struct thread *td, struct linux_getpid_args *args) 1475 { 1476 1477 td->td_retval[0] = td->td_proc->p_pid; 1478 1479 return (0); 1480 } 1481 1482 int 1483 linux_gettid(struct thread *td, struct linux_gettid_args *args) 1484 { 1485 struct linux_emuldata *em; 1486 1487 em = em_find(td); 1488 KASSERT(em != NULL, ("gettid: emuldata not found.\n")); 1489 1490 td->td_retval[0] = em->em_tid; 1491 1492 return (0); 1493 } 1494 1495 int 1496 linux_getppid(struct thread *td, struct linux_getppid_args *args) 1497 { 1498 1499 td->td_retval[0] = kern_getppid(td); 1500 return (0); 1501 } 1502 1503 int 1504 linux_getgid(struct thread *td, struct linux_getgid_args *args) 1505 { 1506 1507 td->td_retval[0] = td->td_ucred->cr_rgid; 1508 return (0); 1509 } 1510 1511 int 1512 linux_getuid(struct thread *td, struct linux_getuid_args *args) 1513 { 1514 1515 td->td_retval[0] = td->td_ucred->cr_ruid; 1516 return (0); 1517 } 1518 1519 int 1520 linux_getsid(struct thread *td, struct linux_getsid_args *args) 1521 { 1522 1523 return (kern_getsid(td, args->pid)); 1524 } 1525 1526 int 1527 linux_getpriority(struct thread *td, struct linux_getpriority_args *args) 1528 { 1529 int error; 1530 1531 error = kern_getpriority(td, args->which, args->who); 1532 td->td_retval[0] = 20 - td->td_retval[0]; 1533 return (error); 1534 } 1535 1536 int 1537 linux_sethostname(struct thread *td, struct linux_sethostname_args *args) 1538 { 1539 int name[2]; 1540 1541 name[0] = CTL_KERN; 1542 name[1] = KERN_HOSTNAME; 1543 return (userland_sysctl(td, name, 2, 0, 0, 0, args->hostname, 1544 args->len, 0, 0)); 1545 } 1546 1547 int 1548 linux_setdomainname(struct thread *td, struct linux_setdomainname_args *args) 1549 { 1550 int name[2]; 1551 1552 name[0] = CTL_KERN; 1553 name[1] = KERN_NISDOMAINNAME; 1554 return (userland_sysctl(td, name, 2, 0, 0, 0, args->name, 1555 args->len, 0, 0)); 1556 } 1557 1558 int 1559 linux_exit_group(struct thread *td, struct linux_exit_group_args *args) 1560 { 1561 1562 LINUX_CTR2(exit_group, "thread(%d) (%d)", td->td_tid, 1563 args->error_code); 1564 1565 /* 1566 * XXX: we should send a signal to the parent if 1567 * SIGNAL_EXIT_GROUP is set. We ignore that (temporarily?) 1568 * as it doesnt occur often. 1569 */ 1570 exit1(td, args->error_code, 0); 1571 /* NOTREACHED */ 1572 } 1573 1574 #define _LINUX_CAPABILITY_VERSION_1 0x19980330 1575 #define _LINUX_CAPABILITY_VERSION_2 0x20071026 1576 #define _LINUX_CAPABILITY_VERSION_3 0x20080522 1577 1578 struct l_user_cap_header { 1579 l_int version; 1580 l_int pid; 1581 }; 1582 1583 struct l_user_cap_data { 1584 l_int effective; 1585 l_int permitted; 1586 l_int inheritable; 1587 }; 1588 1589 int 1590 linux_capget(struct thread *td, struct linux_capget_args *uap) 1591 { 1592 struct l_user_cap_header luch; 1593 struct l_user_cap_data lucd[2]; 1594 int error, u32s; 1595 1596 if (uap->hdrp == NULL) 1597 return (EFAULT); 1598 1599 error = copyin(uap->hdrp, &luch, sizeof(luch)); 1600 if (error != 0) 1601 return (error); 1602 1603 switch (luch.version) { 1604 case _LINUX_CAPABILITY_VERSION_1: 1605 u32s = 1; 1606 break; 1607 case _LINUX_CAPABILITY_VERSION_2: 1608 case _LINUX_CAPABILITY_VERSION_3: 1609 u32s = 2; 1610 break; 1611 default: 1612 luch.version = _LINUX_CAPABILITY_VERSION_1; 1613 error = copyout(&luch, uap->hdrp, sizeof(luch)); 1614 if (error) 1615 return (error); 1616 return (EINVAL); 1617 } 1618 1619 if (luch.pid) 1620 return (EPERM); 1621 1622 if (uap->datap) { 1623 /* 1624 * The current implementation doesn't support setting 1625 * a capability (it's essentially a stub) so indicate 1626 * that no capabilities are currently set or available 1627 * to request. 1628 */ 1629 memset(&lucd, 0, u32s * sizeof(lucd[0])); 1630 error = copyout(&lucd, uap->datap, u32s * sizeof(lucd[0])); 1631 } 1632 1633 return (error); 1634 } 1635 1636 int 1637 linux_capset(struct thread *td, struct linux_capset_args *uap) 1638 { 1639 struct l_user_cap_header luch; 1640 struct l_user_cap_data lucd[2]; 1641 int error, i, u32s; 1642 1643 if (uap->hdrp == NULL || uap->datap == NULL) 1644 return (EFAULT); 1645 1646 error = copyin(uap->hdrp, &luch, sizeof(luch)); 1647 if (error != 0) 1648 return (error); 1649 1650 switch (luch.version) { 1651 case _LINUX_CAPABILITY_VERSION_1: 1652 u32s = 1; 1653 break; 1654 case _LINUX_CAPABILITY_VERSION_2: 1655 case _LINUX_CAPABILITY_VERSION_3: 1656 u32s = 2; 1657 break; 1658 default: 1659 luch.version = _LINUX_CAPABILITY_VERSION_1; 1660 error = copyout(&luch, uap->hdrp, sizeof(luch)); 1661 if (error) 1662 return (error); 1663 return (EINVAL); 1664 } 1665 1666 if (luch.pid) 1667 return (EPERM); 1668 1669 error = copyin(uap->datap, &lucd, u32s * sizeof(lucd[0])); 1670 if (error != 0) 1671 return (error); 1672 1673 /* We currently don't support setting any capabilities. */ 1674 for (i = 0; i < u32s; i++) { 1675 if (lucd[i].effective || lucd[i].permitted || 1676 lucd[i].inheritable) { 1677 linux_msg(td, 1678 "capset[%d] effective=0x%x, permitted=0x%x, " 1679 "inheritable=0x%x is not implemented", i, 1680 (int)lucd[i].effective, (int)lucd[i].permitted, 1681 (int)lucd[i].inheritable); 1682 return (EPERM); 1683 } 1684 } 1685 1686 return (0); 1687 } 1688 1689 int 1690 linux_prctl(struct thread *td, struct linux_prctl_args *args) 1691 { 1692 int error = 0, max_size, arg; 1693 struct proc *p = td->td_proc; 1694 char comm[LINUX_MAX_COMM_LEN]; 1695 int pdeath_signal, trace_state; 1696 1697 switch (args->option) { 1698 case LINUX_PR_SET_PDEATHSIG: 1699 if (!LINUX_SIG_VALID(args->arg2)) 1700 return (EINVAL); 1701 pdeath_signal = linux_to_bsd_signal(args->arg2); 1702 return (kern_procctl(td, P_PID, 0, PROC_PDEATHSIG_CTL, 1703 &pdeath_signal)); 1704 case LINUX_PR_GET_PDEATHSIG: 1705 error = kern_procctl(td, P_PID, 0, PROC_PDEATHSIG_STATUS, 1706 &pdeath_signal); 1707 if (error != 0) 1708 return (error); 1709 pdeath_signal = bsd_to_linux_signal(pdeath_signal); 1710 return (copyout(&pdeath_signal, 1711 (void *)(register_t)args->arg2, 1712 sizeof(pdeath_signal))); 1713 /* 1714 * In Linux, this flag controls if set[gu]id processes can coredump. 1715 * There are additional semantics imposed on processes that cannot 1716 * coredump: 1717 * - Such processes can not be ptraced. 1718 * - There are some semantics around ownership of process-related files 1719 * in the /proc namespace. 1720 * 1721 * In FreeBSD, we can (and by default, do) disable setuid coredump 1722 * system-wide with 'sugid_coredump.' We control tracability on a 1723 * per-process basis with the procctl PROC_TRACE (=> P2_NOTRACE flag). 1724 * By happy coincidence, P2_NOTRACE also prevents coredumping. So the 1725 * procctl is roughly analogous to Linux's DUMPABLE. 1726 * 1727 * So, proxy these knobs to the corresponding PROC_TRACE setting. 1728 */ 1729 case LINUX_PR_GET_DUMPABLE: 1730 error = kern_procctl(td, P_PID, p->p_pid, PROC_TRACE_STATUS, 1731 &trace_state); 1732 if (error != 0) 1733 return (error); 1734 td->td_retval[0] = (trace_state != -1); 1735 return (0); 1736 case LINUX_PR_SET_DUMPABLE: 1737 /* 1738 * It is only valid for userspace to set one of these two 1739 * flags, and only one at a time. 1740 */ 1741 switch (args->arg2) { 1742 case LINUX_SUID_DUMP_DISABLE: 1743 trace_state = PROC_TRACE_CTL_DISABLE_EXEC; 1744 break; 1745 case LINUX_SUID_DUMP_USER: 1746 trace_state = PROC_TRACE_CTL_ENABLE; 1747 break; 1748 default: 1749 return (EINVAL); 1750 } 1751 return (kern_procctl(td, P_PID, p->p_pid, PROC_TRACE_CTL, 1752 &trace_state)); 1753 case LINUX_PR_GET_KEEPCAPS: 1754 /* 1755 * Indicate that we always clear the effective and 1756 * permitted capability sets when the user id becomes 1757 * non-zero (actually the capability sets are simply 1758 * always zero in the current implementation). 1759 */ 1760 td->td_retval[0] = 0; 1761 break; 1762 case LINUX_PR_SET_KEEPCAPS: 1763 /* 1764 * Ignore requests to keep the effective and permitted 1765 * capability sets when the user id becomes non-zero. 1766 */ 1767 break; 1768 case LINUX_PR_SET_NAME: 1769 /* 1770 * To be on the safe side we need to make sure to not 1771 * overflow the size a Linux program expects. We already 1772 * do this here in the copyin, so that we don't need to 1773 * check on copyout. 1774 */ 1775 max_size = MIN(sizeof(comm), sizeof(p->p_comm)); 1776 error = copyinstr((void *)(register_t)args->arg2, comm, 1777 max_size, NULL); 1778 1779 /* Linux silently truncates the name if it is too long. */ 1780 if (error == ENAMETOOLONG) { 1781 /* 1782 * XXX: copyinstr() isn't documented to populate the 1783 * array completely, so do a copyin() to be on the 1784 * safe side. This should be changed in case 1785 * copyinstr() is changed to guarantee this. 1786 */ 1787 error = copyin((void *)(register_t)args->arg2, comm, 1788 max_size - 1); 1789 comm[max_size - 1] = '\0'; 1790 } 1791 if (error) 1792 return (error); 1793 1794 PROC_LOCK(p); 1795 strlcpy(p->p_comm, comm, sizeof(p->p_comm)); 1796 PROC_UNLOCK(p); 1797 break; 1798 case LINUX_PR_GET_NAME: 1799 PROC_LOCK(p); 1800 strlcpy(comm, p->p_comm, sizeof(comm)); 1801 PROC_UNLOCK(p); 1802 error = copyout(comm, (void *)(register_t)args->arg2, 1803 strlen(comm) + 1); 1804 break; 1805 case LINUX_PR_GET_SECCOMP: 1806 case LINUX_PR_SET_SECCOMP: 1807 /* 1808 * Same as returned by Linux without CONFIG_SECCOMP enabled. 1809 */ 1810 error = EINVAL; 1811 break; 1812 case LINUX_PR_CAPBSET_READ: 1813 #if 0 1814 /* 1815 * This makes too much noise with Ubuntu Focal. 1816 */ 1817 linux_msg(td, "unsupported prctl PR_CAPBSET_READ %d", 1818 (int)args->arg2); 1819 #endif 1820 error = EINVAL; 1821 break; 1822 case LINUX_PR_SET_CHILD_SUBREAPER: 1823 if (args->arg2 == 0) { 1824 return (kern_procctl(td, P_PID, 0, PROC_REAP_RELEASE, 1825 NULL)); 1826 } 1827 1828 return (kern_procctl(td, P_PID, 0, PROC_REAP_ACQUIRE, 1829 NULL)); 1830 case LINUX_PR_SET_NO_NEW_PRIVS: 1831 arg = args->arg2 == 1 ? 1832 PROC_NO_NEW_PRIVS_ENABLE : PROC_NO_NEW_PRIVS_DISABLE; 1833 error = kern_procctl(td, P_PID, p->p_pid, 1834 PROC_NO_NEW_PRIVS_CTL, &arg); 1835 break; 1836 case LINUX_PR_SET_PTRACER: 1837 linux_msg(td, "unsupported prctl PR_SET_PTRACER"); 1838 error = EINVAL; 1839 break; 1840 default: 1841 linux_msg(td, "unsupported prctl option %d", args->option); 1842 error = EINVAL; 1843 break; 1844 } 1845 1846 return (error); 1847 } 1848 1849 int 1850 linux_sched_setparam(struct thread *td, 1851 struct linux_sched_setparam_args *uap) 1852 { 1853 struct sched_param sched_param; 1854 struct thread *tdt; 1855 int error, policy; 1856 1857 error = copyin(uap->param, &sched_param, sizeof(sched_param)); 1858 if (error) 1859 return (error); 1860 1861 tdt = linux_tdfind(td, uap->pid, -1); 1862 if (tdt == NULL) 1863 return (ESRCH); 1864 1865 if (linux_map_sched_prio) { 1866 error = kern_sched_getscheduler(td, tdt, &policy); 1867 if (error) 1868 goto out; 1869 1870 switch (policy) { 1871 case SCHED_OTHER: 1872 if (sched_param.sched_priority != 0) { 1873 error = EINVAL; 1874 goto out; 1875 } 1876 sched_param.sched_priority = 1877 PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE; 1878 break; 1879 case SCHED_FIFO: 1880 case SCHED_RR: 1881 if (sched_param.sched_priority < 1 || 1882 sched_param.sched_priority >= LINUX_MAX_RT_PRIO) { 1883 error = EINVAL; 1884 goto out; 1885 } 1886 /* 1887 * Map [1, LINUX_MAX_RT_PRIO - 1] to 1888 * [0, RTP_PRIO_MAX - RTP_PRIO_MIN] (rounding down). 1889 */ 1890 sched_param.sched_priority = 1891 (sched_param.sched_priority - 1) * 1892 (RTP_PRIO_MAX - RTP_PRIO_MIN + 1) / 1893 (LINUX_MAX_RT_PRIO - 1); 1894 break; 1895 } 1896 } 1897 1898 error = kern_sched_setparam(td, tdt, &sched_param); 1899 out: PROC_UNLOCK(tdt->td_proc); 1900 return (error); 1901 } 1902 1903 int 1904 linux_sched_getparam(struct thread *td, 1905 struct linux_sched_getparam_args *uap) 1906 { 1907 struct sched_param sched_param; 1908 struct thread *tdt; 1909 int error, policy; 1910 1911 tdt = linux_tdfind(td, uap->pid, -1); 1912 if (tdt == NULL) 1913 return (ESRCH); 1914 1915 error = kern_sched_getparam(td, tdt, &sched_param); 1916 if (error) { 1917 PROC_UNLOCK(tdt->td_proc); 1918 return (error); 1919 } 1920 1921 if (linux_map_sched_prio) { 1922 error = kern_sched_getscheduler(td, tdt, &policy); 1923 PROC_UNLOCK(tdt->td_proc); 1924 if (error) 1925 return (error); 1926 1927 switch (policy) { 1928 case SCHED_OTHER: 1929 sched_param.sched_priority = 0; 1930 break; 1931 case SCHED_FIFO: 1932 case SCHED_RR: 1933 /* 1934 * Map [0, RTP_PRIO_MAX - RTP_PRIO_MIN] to 1935 * [1, LINUX_MAX_RT_PRIO - 1] (rounding up). 1936 */ 1937 sched_param.sched_priority = 1938 (sched_param.sched_priority * 1939 (LINUX_MAX_RT_PRIO - 1) + 1940 (RTP_PRIO_MAX - RTP_PRIO_MIN - 1)) / 1941 (RTP_PRIO_MAX - RTP_PRIO_MIN) + 1; 1942 break; 1943 } 1944 } else 1945 PROC_UNLOCK(tdt->td_proc); 1946 1947 error = copyout(&sched_param, uap->param, sizeof(sched_param)); 1948 return (error); 1949 } 1950 1951 /* 1952 * Get affinity of a process. 1953 */ 1954 int 1955 linux_sched_getaffinity(struct thread *td, 1956 struct linux_sched_getaffinity_args *args) 1957 { 1958 struct thread *tdt; 1959 cpuset_t *mask; 1960 size_t size; 1961 int error; 1962 id_t tid; 1963 1964 tdt = linux_tdfind(td, args->pid, -1); 1965 if (tdt == NULL) 1966 return (ESRCH); 1967 tid = tdt->td_tid; 1968 PROC_UNLOCK(tdt->td_proc); 1969 1970 mask = malloc(sizeof(cpuset_t), M_LINUX, M_WAITOK | M_ZERO); 1971 size = min(args->len, sizeof(cpuset_t)); 1972 error = kern_cpuset_getaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID, 1973 tid, size, mask); 1974 if (error == ERANGE) 1975 error = EINVAL; 1976 if (error == 0) 1977 error = copyout(mask, args->user_mask_ptr, size); 1978 if (error == 0) 1979 td->td_retval[0] = size; 1980 free(mask, M_LINUX); 1981 return (error); 1982 } 1983 1984 /* 1985 * Set affinity of a process. 1986 */ 1987 int 1988 linux_sched_setaffinity(struct thread *td, 1989 struct linux_sched_setaffinity_args *args) 1990 { 1991 struct thread *tdt; 1992 cpuset_t *mask; 1993 int cpu, error; 1994 size_t len; 1995 id_t tid; 1996 1997 tdt = linux_tdfind(td, args->pid, -1); 1998 if (tdt == NULL) 1999 return (ESRCH); 2000 tid = tdt->td_tid; 2001 PROC_UNLOCK(tdt->td_proc); 2002 2003 len = min(args->len, sizeof(cpuset_t)); 2004 mask = malloc(sizeof(cpuset_t), M_TEMP, M_WAITOK | M_ZERO); 2005 error = copyin(args->user_mask_ptr, mask, len); 2006 if (error != 0) 2007 goto out; 2008 /* Linux ignore high bits */ 2009 CPU_FOREACH_ISSET(cpu, mask) 2010 if (cpu > mp_maxid) 2011 CPU_CLR(cpu, mask); 2012 2013 error = kern_cpuset_setaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID, 2014 tid, mask); 2015 if (error == EDEADLK) 2016 error = EINVAL; 2017 out: 2018 free(mask, M_TEMP); 2019 return (error); 2020 } 2021 2022 struct linux_rlimit64 { 2023 uint64_t rlim_cur; 2024 uint64_t rlim_max; 2025 }; 2026 2027 int 2028 linux_prlimit64(struct thread *td, struct linux_prlimit64_args *args) 2029 { 2030 struct rlimit rlim, nrlim; 2031 struct linux_rlimit64 lrlim; 2032 struct proc *p; 2033 u_int which; 2034 int flags; 2035 int error; 2036 2037 if (args->new == NULL && args->old != NULL) { 2038 if (linux_get_dummy_limit(td, args->resource, &rlim)) { 2039 lrlim.rlim_cur = rlim.rlim_cur; 2040 lrlim.rlim_max = rlim.rlim_max; 2041 return (copyout(&lrlim, args->old, sizeof(lrlim))); 2042 } 2043 } 2044 2045 if (args->resource >= LINUX_RLIM_NLIMITS) 2046 return (EINVAL); 2047 2048 which = linux_to_bsd_resource[args->resource]; 2049 if (which == -1) 2050 return (EINVAL); 2051 2052 if (args->new != NULL) { 2053 /* 2054 * Note. Unlike FreeBSD where rlim is signed 64-bit Linux 2055 * rlim is unsigned 64-bit. FreeBSD treats negative limits 2056 * as INFINITY so we do not need a conversion even. 2057 */ 2058 error = copyin(args->new, &nrlim, sizeof(nrlim)); 2059 if (error != 0) 2060 return (error); 2061 } 2062 2063 flags = PGET_HOLD | PGET_NOTWEXIT; 2064 if (args->new != NULL) 2065 flags |= PGET_CANDEBUG; 2066 else 2067 flags |= PGET_CANSEE; 2068 if (args->pid == 0) { 2069 p = td->td_proc; 2070 PHOLD(p); 2071 } else { 2072 error = pget(args->pid, flags, &p); 2073 if (error != 0) 2074 return (error); 2075 } 2076 if (args->old != NULL) { 2077 PROC_LOCK(p); 2078 lim_rlimit_proc(p, which, &rlim); 2079 PROC_UNLOCK(p); 2080 if (rlim.rlim_cur == RLIM_INFINITY) 2081 lrlim.rlim_cur = LINUX_RLIM_INFINITY; 2082 else 2083 lrlim.rlim_cur = rlim.rlim_cur; 2084 if (rlim.rlim_max == RLIM_INFINITY) 2085 lrlim.rlim_max = LINUX_RLIM_INFINITY; 2086 else 2087 lrlim.rlim_max = rlim.rlim_max; 2088 error = copyout(&lrlim, args->old, sizeof(lrlim)); 2089 if (error != 0) 2090 goto out; 2091 } 2092 2093 if (args->new != NULL) 2094 error = kern_proc_setrlimit(td, p, which, &nrlim); 2095 2096 out: 2097 PRELE(p); 2098 return (error); 2099 } 2100 2101 int 2102 linux_pselect6(struct thread *td, struct linux_pselect6_args *args) 2103 { 2104 struct timespec ts, *tsp; 2105 int error; 2106 2107 if (args->tsp != NULL) { 2108 error = linux_get_timespec(&ts, args->tsp); 2109 if (error != 0) 2110 return (error); 2111 tsp = &ts; 2112 } else 2113 tsp = NULL; 2114 2115 error = linux_common_pselect6(td, args->nfds, args->readfds, 2116 args->writefds, args->exceptfds, tsp, args->sig); 2117 2118 if (args->tsp != NULL) 2119 linux_put_timespec(&ts, args->tsp); 2120 return (error); 2121 } 2122 2123 static int 2124 linux_common_pselect6(struct thread *td, l_int nfds, l_fd_set *readfds, 2125 l_fd_set *writefds, l_fd_set *exceptfds, struct timespec *tsp, 2126 l_uintptr_t *sig) 2127 { 2128 struct timeval utv, tv0, tv1, *tvp; 2129 struct l_pselect6arg lpse6; 2130 sigset_t *ssp; 2131 sigset_t ss; 2132 int error; 2133 2134 ssp = NULL; 2135 if (sig != NULL) { 2136 error = copyin(sig, &lpse6, sizeof(lpse6)); 2137 if (error != 0) 2138 return (error); 2139 error = linux_copyin_sigset(td, PTRIN(lpse6.ss), 2140 lpse6.ss_len, &ss, &ssp); 2141 if (error != 0) 2142 return (error); 2143 } else 2144 ssp = NULL; 2145 2146 /* 2147 * Currently glibc changes nanosecond number to microsecond. 2148 * This mean losing precision but for now it is hardly seen. 2149 */ 2150 if (tsp != NULL) { 2151 TIMESPEC_TO_TIMEVAL(&utv, tsp); 2152 if (itimerfix(&utv)) 2153 return (EINVAL); 2154 2155 microtime(&tv0); 2156 tvp = &utv; 2157 } else 2158 tvp = NULL; 2159 2160 error = kern_pselect(td, nfds, readfds, writefds, 2161 exceptfds, tvp, ssp, LINUX_NFDBITS); 2162 2163 if (tsp != NULL) { 2164 /* 2165 * Compute how much time was left of the timeout, 2166 * by subtracting the current time and the time 2167 * before we started the call, and subtracting 2168 * that result from the user-supplied value. 2169 */ 2170 microtime(&tv1); 2171 timevalsub(&tv1, &tv0); 2172 timevalsub(&utv, &tv1); 2173 if (utv.tv_sec < 0) 2174 timevalclear(&utv); 2175 TIMEVAL_TO_TIMESPEC(&utv, tsp); 2176 } 2177 return (error); 2178 } 2179 2180 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 2181 int 2182 linux_pselect6_time64(struct thread *td, 2183 struct linux_pselect6_time64_args *args) 2184 { 2185 struct timespec ts, *tsp; 2186 int error; 2187 2188 if (args->tsp != NULL) { 2189 error = linux_get_timespec64(&ts, args->tsp); 2190 if (error != 0) 2191 return (error); 2192 tsp = &ts; 2193 } else 2194 tsp = NULL; 2195 2196 error = linux_common_pselect6(td, args->nfds, args->readfds, 2197 args->writefds, args->exceptfds, tsp, args->sig); 2198 2199 if (args->tsp != NULL) 2200 linux_put_timespec64(&ts, args->tsp); 2201 return (error); 2202 } 2203 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 2204 2205 int 2206 linux_ppoll(struct thread *td, struct linux_ppoll_args *args) 2207 { 2208 struct timespec uts, *tsp; 2209 int error; 2210 2211 if (args->tsp != NULL) { 2212 error = linux_get_timespec(&uts, args->tsp); 2213 if (error != 0) 2214 return (error); 2215 tsp = &uts; 2216 } else 2217 tsp = NULL; 2218 2219 error = linux_common_ppoll(td, args->fds, args->nfds, tsp, 2220 args->sset, args->ssize); 2221 if (error == 0 && args->tsp != NULL) 2222 error = linux_put_timespec(&uts, args->tsp); 2223 return (error); 2224 } 2225 2226 static int 2227 linux_common_ppoll(struct thread *td, struct pollfd *fds, uint32_t nfds, 2228 struct timespec *tsp, l_sigset_t *sset, l_size_t ssize) 2229 { 2230 struct timespec ts0, ts1; 2231 struct pollfd stackfds[32]; 2232 struct pollfd *kfds; 2233 sigset_t *ssp; 2234 sigset_t ss; 2235 int error; 2236 2237 if (kern_poll_maxfds(nfds)) 2238 return (EINVAL); 2239 if (sset != NULL) { 2240 error = linux_copyin_sigset(td, sset, ssize, &ss, &ssp); 2241 if (error != 0) 2242 return (error); 2243 } else 2244 ssp = NULL; 2245 if (tsp != NULL) 2246 nanotime(&ts0); 2247 2248 if (nfds > nitems(stackfds)) 2249 kfds = mallocarray(nfds, sizeof(*kfds), M_TEMP, M_WAITOK); 2250 else 2251 kfds = stackfds; 2252 error = linux_pollin(td, kfds, fds, nfds); 2253 if (error != 0) 2254 goto out; 2255 2256 error = kern_poll_kfds(td, kfds, nfds, tsp, ssp); 2257 if (error == 0) 2258 error = linux_pollout(td, kfds, fds, nfds); 2259 2260 if (error == 0 && tsp != NULL) { 2261 if (td->td_retval[0]) { 2262 nanotime(&ts1); 2263 timespecsub(&ts1, &ts0, &ts1); 2264 timespecsub(tsp, &ts1, tsp); 2265 if (tsp->tv_sec < 0) 2266 timespecclear(tsp); 2267 } else 2268 timespecclear(tsp); 2269 } 2270 2271 out: 2272 if (nfds > nitems(stackfds)) 2273 free(kfds, M_TEMP); 2274 return (error); 2275 } 2276 2277 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 2278 int 2279 linux_ppoll_time64(struct thread *td, struct linux_ppoll_time64_args *args) 2280 { 2281 struct timespec uts, *tsp; 2282 int error; 2283 2284 if (args->tsp != NULL) { 2285 error = linux_get_timespec64(&uts, args->tsp); 2286 if (error != 0) 2287 return (error); 2288 tsp = &uts; 2289 } else 2290 tsp = NULL; 2291 error = linux_common_ppoll(td, args->fds, args->nfds, tsp, 2292 args->sset, args->ssize); 2293 if (error == 0 && args->tsp != NULL) 2294 error = linux_put_timespec64(&uts, args->tsp); 2295 return (error); 2296 } 2297 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 2298 2299 static int 2300 linux_pollin(struct thread *td, struct pollfd *fds, struct pollfd *ufds, u_int nfd) 2301 { 2302 int error; 2303 u_int i; 2304 2305 error = copyin(ufds, fds, nfd * sizeof(*fds)); 2306 if (error != 0) 2307 return (error); 2308 2309 for (i = 0; i < nfd; i++) { 2310 if (fds->events != 0) 2311 linux_to_bsd_poll_events(td, fds->fd, 2312 fds->events, &fds->events); 2313 fds++; 2314 } 2315 return (0); 2316 } 2317 2318 static int 2319 linux_pollout(struct thread *td, struct pollfd *fds, struct pollfd *ufds, u_int nfd) 2320 { 2321 int error = 0; 2322 u_int i, n = 0; 2323 2324 for (i = 0; i < nfd; i++) { 2325 if (fds->revents != 0) { 2326 bsd_to_linux_poll_events(fds->revents, 2327 &fds->revents); 2328 n++; 2329 } 2330 error = copyout(&fds->revents, &ufds->revents, 2331 sizeof(ufds->revents)); 2332 if (error) 2333 return (error); 2334 fds++; 2335 ufds++; 2336 } 2337 td->td_retval[0] = n; 2338 return (0); 2339 } 2340 2341 static int 2342 linux_sched_rr_get_interval_common(struct thread *td, pid_t pid, 2343 struct timespec *ts) 2344 { 2345 struct thread *tdt; 2346 int error; 2347 2348 /* 2349 * According to man in case the invalid pid specified 2350 * EINVAL should be returned. 2351 */ 2352 if (pid < 0) 2353 return (EINVAL); 2354 2355 tdt = linux_tdfind(td, pid, -1); 2356 if (tdt == NULL) 2357 return (ESRCH); 2358 2359 error = kern_sched_rr_get_interval_td(td, tdt, ts); 2360 PROC_UNLOCK(tdt->td_proc); 2361 return (error); 2362 } 2363 2364 int 2365 linux_sched_rr_get_interval(struct thread *td, 2366 struct linux_sched_rr_get_interval_args *uap) 2367 { 2368 struct timespec ts; 2369 int error; 2370 2371 error = linux_sched_rr_get_interval_common(td, uap->pid, &ts); 2372 if (error != 0) 2373 return (error); 2374 return (linux_put_timespec(&ts, uap->interval)); 2375 } 2376 2377 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 2378 int 2379 linux_sched_rr_get_interval_time64(struct thread *td, 2380 struct linux_sched_rr_get_interval_time64_args *uap) 2381 { 2382 struct timespec ts; 2383 int error; 2384 2385 error = linux_sched_rr_get_interval_common(td, uap->pid, &ts); 2386 if (error != 0) 2387 return (error); 2388 return (linux_put_timespec64(&ts, uap->interval)); 2389 } 2390 #endif 2391 2392 /* 2393 * In case when the Linux thread is the initial thread in 2394 * the thread group thread id is equal to the process id. 2395 * Glibc depends on this magic (assert in pthread_getattr_np.c). 2396 */ 2397 struct thread * 2398 linux_tdfind(struct thread *td, lwpid_t tid, pid_t pid) 2399 { 2400 struct linux_emuldata *em; 2401 struct thread *tdt; 2402 struct proc *p; 2403 2404 tdt = NULL; 2405 if (tid == 0 || tid == td->td_tid) { 2406 if (pid != -1 && td->td_proc->p_pid != pid) 2407 return (NULL); 2408 PROC_LOCK(td->td_proc); 2409 return (td); 2410 } else if (tid > PID_MAX) 2411 return (tdfind(tid, pid)); 2412 2413 /* 2414 * Initial thread where the tid equal to the pid. 2415 */ 2416 p = pfind(tid); 2417 if (p != NULL) { 2418 if (SV_PROC_ABI(p) != SV_ABI_LINUX || 2419 (pid != -1 && tid != pid)) { 2420 /* 2421 * p is not a Linuxulator process. 2422 */ 2423 PROC_UNLOCK(p); 2424 return (NULL); 2425 } 2426 FOREACH_THREAD_IN_PROC(p, tdt) { 2427 em = em_find(tdt); 2428 if (tid == em->em_tid) 2429 return (tdt); 2430 } 2431 PROC_UNLOCK(p); 2432 } 2433 return (NULL); 2434 } 2435 2436 void 2437 linux_to_bsd_waitopts(int options, int *bsdopts) 2438 { 2439 2440 if (options & LINUX_WNOHANG) 2441 *bsdopts |= WNOHANG; 2442 if (options & LINUX_WUNTRACED) 2443 *bsdopts |= WUNTRACED; 2444 if (options & LINUX_WEXITED) 2445 *bsdopts |= WEXITED; 2446 if (options & LINUX_WCONTINUED) 2447 *bsdopts |= WCONTINUED; 2448 if (options & LINUX_WNOWAIT) 2449 *bsdopts |= WNOWAIT; 2450 2451 if (options & __WCLONE) 2452 *bsdopts |= WLINUXCLONE; 2453 } 2454 2455 int 2456 linux_getrandom(struct thread *td, struct linux_getrandom_args *args) 2457 { 2458 struct uio uio; 2459 struct iovec iov; 2460 int error; 2461 2462 if (args->flags & ~(LINUX_GRND_NONBLOCK|LINUX_GRND_RANDOM)) 2463 return (EINVAL); 2464 if (args->count > INT_MAX) 2465 args->count = INT_MAX; 2466 2467 iov.iov_base = args->buf; 2468 iov.iov_len = args->count; 2469 2470 uio.uio_iov = &iov; 2471 uio.uio_iovcnt = 1; 2472 uio.uio_resid = iov.iov_len; 2473 uio.uio_segflg = UIO_USERSPACE; 2474 uio.uio_rw = UIO_READ; 2475 uio.uio_td = td; 2476 2477 error = read_random_uio(&uio, args->flags & LINUX_GRND_NONBLOCK); 2478 if (error == 0) 2479 td->td_retval[0] = args->count - uio.uio_resid; 2480 return (error); 2481 } 2482 2483 int 2484 linux_mincore(struct thread *td, struct linux_mincore_args *args) 2485 { 2486 2487 /* Needs to be page-aligned */ 2488 if (args->start & PAGE_MASK) 2489 return (EINVAL); 2490 return (kern_mincore(td, args->start, args->len, args->vec)); 2491 } 2492 2493 #define SYSLOG_TAG "<6>" 2494 2495 int 2496 linux_syslog(struct thread *td, struct linux_syslog_args *args) 2497 { 2498 char buf[128], *src, *dst; 2499 u_int seq; 2500 int buflen, error; 2501 2502 if (args->type != LINUX_SYSLOG_ACTION_READ_ALL) { 2503 linux_msg(td, "syslog unsupported type 0x%x", args->type); 2504 return (EINVAL); 2505 } 2506 2507 if (args->len < 6) { 2508 td->td_retval[0] = 0; 2509 return (0); 2510 } 2511 2512 error = priv_check(td, PRIV_MSGBUF); 2513 if (error) 2514 return (error); 2515 2516 mtx_lock(&msgbuf_lock); 2517 msgbuf_peekbytes(msgbufp, NULL, 0, &seq); 2518 mtx_unlock(&msgbuf_lock); 2519 2520 dst = args->buf; 2521 error = copyout(&SYSLOG_TAG, dst, sizeof(SYSLOG_TAG)); 2522 /* The -1 is to skip the trailing '\0'. */ 2523 dst += sizeof(SYSLOG_TAG) - 1; 2524 2525 while (error == 0) { 2526 mtx_lock(&msgbuf_lock); 2527 buflen = msgbuf_peekbytes(msgbufp, buf, sizeof(buf), &seq); 2528 mtx_unlock(&msgbuf_lock); 2529 2530 if (buflen == 0) 2531 break; 2532 2533 for (src = buf; src < buf + buflen && error == 0; src++) { 2534 if (*src == '\0') 2535 continue; 2536 2537 if (dst >= args->buf + args->len) 2538 goto out; 2539 2540 error = copyout(src, dst, 1); 2541 dst++; 2542 2543 if (*src == '\n' && *(src + 1) != '<' && 2544 dst + sizeof(SYSLOG_TAG) < args->buf + args->len) { 2545 error = copyout(&SYSLOG_TAG, 2546 dst, sizeof(SYSLOG_TAG)); 2547 dst += sizeof(SYSLOG_TAG) - 1; 2548 } 2549 } 2550 } 2551 out: 2552 td->td_retval[0] = dst - args->buf; 2553 return (error); 2554 } 2555 2556 int 2557 linux_getcpu(struct thread *td, struct linux_getcpu_args *args) 2558 { 2559 int cpu, error, node; 2560 2561 cpu = td->td_oncpu; /* Make sure it doesn't change during copyout(9) */ 2562 error = 0; 2563 node = cpuid_to_pcpu[cpu]->pc_domain; 2564 2565 if (args->cpu != NULL) 2566 error = copyout(&cpu, args->cpu, sizeof(l_int)); 2567 if (args->node != NULL) 2568 error = copyout(&node, args->node, sizeof(l_int)); 2569 return (error); 2570 } 2571 2572 #if defined(__i386__) || defined(__amd64__) 2573 int 2574 linux_poll(struct thread *td, struct linux_poll_args *args) 2575 { 2576 struct timespec ts, *tsp; 2577 2578 if (args->timeout != INFTIM) { 2579 if (args->timeout < 0) 2580 return (EINVAL); 2581 ts.tv_sec = args->timeout / 1000; 2582 ts.tv_nsec = (args->timeout % 1000) * 1000000; 2583 tsp = &ts; 2584 } else 2585 tsp = NULL; 2586 2587 return (linux_common_ppoll(td, args->fds, args->nfds, 2588 tsp, NULL, 0)); 2589 } 2590 #endif /* __i386__ || __amd64__ */ 2591 2592 int 2593 linux_seccomp(struct thread *td, struct linux_seccomp_args *args) 2594 { 2595 2596 switch (args->op) { 2597 case LINUX_SECCOMP_GET_ACTION_AVAIL: 2598 return (EOPNOTSUPP); 2599 default: 2600 /* 2601 * Ignore unknown operations, just like Linux kernel built 2602 * without CONFIG_SECCOMP. 2603 */ 2604 return (EINVAL); 2605 } 2606 } 2607 2608 /* 2609 * Custom version of exec_copyin_args(), to copy out argument and environment 2610 * strings from the old process address space into the temporary string buffer. 2611 * Based on freebsd32_exec_copyin_args. 2612 */ 2613 static int 2614 linux_exec_copyin_args(struct image_args *args, const char *fname, 2615 enum uio_seg segflg, l_uintptr_t *argv, l_uintptr_t *envv) 2616 { 2617 char *argp, *envp; 2618 l_uintptr_t *ptr, arg; 2619 int error; 2620 2621 bzero(args, sizeof(*args)); 2622 if (argv == NULL) 2623 return (EFAULT); 2624 2625 /* 2626 * Allocate demand-paged memory for the file name, argument, and 2627 * environment strings. 2628 */ 2629 error = exec_alloc_args(args); 2630 if (error != 0) 2631 return (error); 2632 2633 /* 2634 * Copy the file name. 2635 */ 2636 error = exec_args_add_fname(args, fname, segflg); 2637 if (error != 0) 2638 goto err_exit; 2639 2640 /* 2641 * extract arguments first 2642 */ 2643 ptr = argv; 2644 for (;;) { 2645 error = copyin(ptr++, &arg, sizeof(arg)); 2646 if (error) 2647 goto err_exit; 2648 if (arg == 0) 2649 break; 2650 argp = PTRIN(arg); 2651 error = exec_args_add_arg(args, argp, UIO_USERSPACE); 2652 if (error != 0) 2653 goto err_exit; 2654 } 2655 2656 /* 2657 * This comment is from Linux do_execveat_common: 2658 * When argv is empty, add an empty string ("") as argv[0] to 2659 * ensure confused userspace programs that start processing 2660 * from argv[1] won't end up walking envp. 2661 */ 2662 if (args->argc == 0 && 2663 (error = exec_args_add_arg(args, "", UIO_SYSSPACE) != 0)) 2664 goto err_exit; 2665 2666 /* 2667 * extract environment strings 2668 */ 2669 if (envv) { 2670 ptr = envv; 2671 for (;;) { 2672 error = copyin(ptr++, &arg, sizeof(arg)); 2673 if (error) 2674 goto err_exit; 2675 if (arg == 0) 2676 break; 2677 envp = PTRIN(arg); 2678 error = exec_args_add_env(args, envp, UIO_USERSPACE); 2679 if (error != 0) 2680 goto err_exit; 2681 } 2682 } 2683 2684 return (0); 2685 2686 err_exit: 2687 exec_free_args(args); 2688 return (error); 2689 } 2690 2691 int 2692 linux_execve(struct thread *td, struct linux_execve_args *args) 2693 { 2694 struct image_args eargs; 2695 int error; 2696 2697 LINUX_CTR(execve); 2698 2699 error = linux_exec_copyin_args(&eargs, args->path, UIO_USERSPACE, 2700 args->argp, args->envp); 2701 if (error == 0) 2702 error = linux_common_execve(td, &eargs); 2703 AUDIT_SYSCALL_EXIT(error == EJUSTRETURN ? 0 : error, td); 2704 return (error); 2705 } 2706 2707 static void 2708 linux_up_rtprio_if(struct thread *td1, struct rtprio *rtp) 2709 { 2710 struct rtprio rtp2; 2711 2712 pri_to_rtp(td1, &rtp2); 2713 if (rtp2.type < rtp->type || 2714 (rtp2.type == rtp->type && 2715 rtp2.prio < rtp->prio)) { 2716 rtp->type = rtp2.type; 2717 rtp->prio = rtp2.prio; 2718 } 2719 } 2720 2721 #define LINUX_PRIO_DIVIDER RTP_PRIO_MAX / LINUX_IOPRIO_MAX 2722 2723 static int 2724 linux_rtprio2ioprio(struct rtprio *rtp) 2725 { 2726 int ioprio, prio; 2727 2728 switch (rtp->type) { 2729 case RTP_PRIO_IDLE: 2730 prio = RTP_PRIO_MIN; 2731 ioprio = LINUX_IOPRIO_PRIO(LINUX_IOPRIO_CLASS_IDLE, prio); 2732 break; 2733 case RTP_PRIO_NORMAL: 2734 prio = rtp->prio / LINUX_PRIO_DIVIDER; 2735 ioprio = LINUX_IOPRIO_PRIO(LINUX_IOPRIO_CLASS_BE, prio); 2736 break; 2737 case RTP_PRIO_REALTIME: 2738 prio = rtp->prio / LINUX_PRIO_DIVIDER; 2739 ioprio = LINUX_IOPRIO_PRIO(LINUX_IOPRIO_CLASS_RT, prio); 2740 break; 2741 default: 2742 prio = RTP_PRIO_MIN; 2743 ioprio = LINUX_IOPRIO_PRIO(LINUX_IOPRIO_CLASS_NONE, prio); 2744 break; 2745 } 2746 return (ioprio); 2747 } 2748 2749 static int 2750 linux_ioprio2rtprio(int ioprio, struct rtprio *rtp) 2751 { 2752 2753 switch (LINUX_IOPRIO_PRIO_CLASS(ioprio)) { 2754 case LINUX_IOPRIO_CLASS_IDLE: 2755 rtp->prio = RTP_PRIO_MIN; 2756 rtp->type = RTP_PRIO_IDLE; 2757 break; 2758 case LINUX_IOPRIO_CLASS_BE: 2759 rtp->prio = LINUX_IOPRIO_PRIO_DATA(ioprio) * LINUX_PRIO_DIVIDER; 2760 rtp->type = RTP_PRIO_NORMAL; 2761 break; 2762 case LINUX_IOPRIO_CLASS_RT: 2763 rtp->prio = LINUX_IOPRIO_PRIO_DATA(ioprio) * LINUX_PRIO_DIVIDER; 2764 rtp->type = RTP_PRIO_REALTIME; 2765 break; 2766 default: 2767 return (EINVAL); 2768 } 2769 return (0); 2770 } 2771 #undef LINUX_PRIO_DIVIDER 2772 2773 int 2774 linux_ioprio_get(struct thread *td, struct linux_ioprio_get_args *args) 2775 { 2776 struct thread *td1; 2777 struct rtprio rtp; 2778 struct pgrp *pg; 2779 struct proc *p; 2780 int error, found; 2781 2782 p = NULL; 2783 td1 = NULL; 2784 error = 0; 2785 found = 0; 2786 rtp.type = RTP_PRIO_IDLE; 2787 rtp.prio = RTP_PRIO_MAX; 2788 switch (args->which) { 2789 case LINUX_IOPRIO_WHO_PROCESS: 2790 if (args->who == 0) { 2791 td1 = td; 2792 p = td1->td_proc; 2793 PROC_LOCK(p); 2794 } else if (args->who > PID_MAX) { 2795 td1 = linux_tdfind(td, args->who, -1); 2796 if (td1 != NULL) 2797 p = td1->td_proc; 2798 } else 2799 p = pfind(args->who); 2800 if (p == NULL) 2801 return (ESRCH); 2802 if ((error = p_cansee(td, p))) { 2803 PROC_UNLOCK(p); 2804 break; 2805 } 2806 if (td1 != NULL) { 2807 pri_to_rtp(td1, &rtp); 2808 } else { 2809 FOREACH_THREAD_IN_PROC(p, td1) { 2810 linux_up_rtprio_if(td1, &rtp); 2811 } 2812 } 2813 found++; 2814 PROC_UNLOCK(p); 2815 break; 2816 case LINUX_IOPRIO_WHO_PGRP: 2817 sx_slock(&proctree_lock); 2818 if (args->who == 0) { 2819 pg = td->td_proc->p_pgrp; 2820 PGRP_LOCK(pg); 2821 } else { 2822 pg = pgfind(args->who); 2823 if (pg == NULL) { 2824 sx_sunlock(&proctree_lock); 2825 error = ESRCH; 2826 break; 2827 } 2828 } 2829 sx_sunlock(&proctree_lock); 2830 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 2831 PROC_LOCK(p); 2832 if (p->p_state == PRS_NORMAL && 2833 p_cansee(td, p) == 0) { 2834 FOREACH_THREAD_IN_PROC(p, td1) { 2835 linux_up_rtprio_if(td1, &rtp); 2836 found++; 2837 } 2838 } 2839 PROC_UNLOCK(p); 2840 } 2841 PGRP_UNLOCK(pg); 2842 break; 2843 case LINUX_IOPRIO_WHO_USER: 2844 if (args->who == 0) 2845 args->who = td->td_ucred->cr_uid; 2846 sx_slock(&allproc_lock); 2847 FOREACH_PROC_IN_SYSTEM(p) { 2848 PROC_LOCK(p); 2849 if (p->p_state == PRS_NORMAL && 2850 p->p_ucred->cr_uid == args->who && 2851 p_cansee(td, p) == 0) { 2852 FOREACH_THREAD_IN_PROC(p, td1) { 2853 linux_up_rtprio_if(td1, &rtp); 2854 found++; 2855 } 2856 } 2857 PROC_UNLOCK(p); 2858 } 2859 sx_sunlock(&allproc_lock); 2860 break; 2861 default: 2862 error = EINVAL; 2863 break; 2864 } 2865 if (error == 0) { 2866 if (found != 0) 2867 td->td_retval[0] = linux_rtprio2ioprio(&rtp); 2868 else 2869 error = ESRCH; 2870 } 2871 return (error); 2872 } 2873 2874 int 2875 linux_ioprio_set(struct thread *td, struct linux_ioprio_set_args *args) 2876 { 2877 struct thread *td1; 2878 struct rtprio rtp; 2879 struct pgrp *pg; 2880 struct proc *p; 2881 int error; 2882 2883 if ((error = linux_ioprio2rtprio(args->ioprio, &rtp)) != 0) 2884 return (error); 2885 /* Attempts to set high priorities (REALTIME) require su privileges. */ 2886 if (RTP_PRIO_BASE(rtp.type) == RTP_PRIO_REALTIME && 2887 (error = priv_check(td, PRIV_SCHED_RTPRIO)) != 0) 2888 return (error); 2889 2890 p = NULL; 2891 td1 = NULL; 2892 switch (args->which) { 2893 case LINUX_IOPRIO_WHO_PROCESS: 2894 if (args->who == 0) { 2895 td1 = td; 2896 p = td1->td_proc; 2897 PROC_LOCK(p); 2898 } else if (args->who > PID_MAX) { 2899 td1 = linux_tdfind(td, args->who, -1); 2900 if (td1 != NULL) 2901 p = td1->td_proc; 2902 } else 2903 p = pfind(args->who); 2904 if (p == NULL) 2905 return (ESRCH); 2906 if ((error = p_cansched(td, p))) { 2907 PROC_UNLOCK(p); 2908 break; 2909 } 2910 if (td1 != NULL) { 2911 error = rtp_to_pri(&rtp, td1); 2912 } else { 2913 FOREACH_THREAD_IN_PROC(p, td1) { 2914 if ((error = rtp_to_pri(&rtp, td1)) != 0) 2915 break; 2916 } 2917 } 2918 PROC_UNLOCK(p); 2919 break; 2920 case LINUX_IOPRIO_WHO_PGRP: 2921 sx_slock(&proctree_lock); 2922 if (args->who == 0) { 2923 pg = td->td_proc->p_pgrp; 2924 PGRP_LOCK(pg); 2925 } else { 2926 pg = pgfind(args->who); 2927 if (pg == NULL) { 2928 sx_sunlock(&proctree_lock); 2929 error = ESRCH; 2930 break; 2931 } 2932 } 2933 sx_sunlock(&proctree_lock); 2934 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 2935 PROC_LOCK(p); 2936 if (p->p_state == PRS_NORMAL && 2937 p_cansched(td, p) == 0) { 2938 FOREACH_THREAD_IN_PROC(p, td1) { 2939 if ((error = rtp_to_pri(&rtp, td1)) != 0) 2940 break; 2941 } 2942 } 2943 PROC_UNLOCK(p); 2944 if (error != 0) 2945 break; 2946 } 2947 PGRP_UNLOCK(pg); 2948 break; 2949 case LINUX_IOPRIO_WHO_USER: 2950 if (args->who == 0) 2951 args->who = td->td_ucred->cr_uid; 2952 sx_slock(&allproc_lock); 2953 FOREACH_PROC_IN_SYSTEM(p) { 2954 PROC_LOCK(p); 2955 if (p->p_state == PRS_NORMAL && 2956 p->p_ucred->cr_uid == args->who && 2957 p_cansched(td, p) == 0) { 2958 FOREACH_THREAD_IN_PROC(p, td1) { 2959 if ((error = rtp_to_pri(&rtp, td1)) != 0) 2960 break; 2961 } 2962 } 2963 PROC_UNLOCK(p); 2964 if (error != 0) 2965 break; 2966 } 2967 sx_sunlock(&allproc_lock); 2968 break; 2969 default: 2970 error = EINVAL; 2971 break; 2972 } 2973 return (error); 2974 } 2975 2976 /* The only flag is O_NONBLOCK */ 2977 #define B2L_MQ_FLAGS(bflags) ((bflags) != 0 ? LINUX_O_NONBLOCK : 0) 2978 #define L2B_MQ_FLAGS(lflags) ((lflags) != 0 ? O_NONBLOCK : 0) 2979 2980 int 2981 linux_mq_open(struct thread *td, struct linux_mq_open_args *args) 2982 { 2983 struct mq_attr attr; 2984 int error, flags; 2985 2986 flags = linux_common_openflags(args->oflag); 2987 if ((flags & O_ACCMODE) == O_ACCMODE || (flags & O_EXEC) != 0) 2988 return (EINVAL); 2989 flags = FFLAGS(flags); 2990 if ((flags & O_CREAT) != 0 && args->attr != NULL) { 2991 error = copyin(args->attr, &attr, sizeof(attr)); 2992 if (error != 0) 2993 return (error); 2994 attr.mq_flags = L2B_MQ_FLAGS(attr.mq_flags); 2995 } 2996 2997 return (kern_kmq_open(td, args->name, flags, args->mode, 2998 args->attr != NULL ? &attr : NULL)); 2999 } 3000 3001 int 3002 linux_mq_unlink(struct thread *td, struct linux_mq_unlink_args *args) 3003 { 3004 struct kmq_unlink_args bsd_args = { 3005 .path = PTRIN(args->name) 3006 }; 3007 3008 return (sys_kmq_unlink(td, &bsd_args)); 3009 } 3010 3011 int 3012 linux_mq_timedsend(struct thread *td, struct linux_mq_timedsend_args *args) 3013 { 3014 struct timespec ts, *abs_timeout; 3015 int error; 3016 3017 if (args->abs_timeout == NULL) 3018 abs_timeout = NULL; 3019 else { 3020 error = linux_get_timespec(&ts, args->abs_timeout); 3021 if (error != 0) 3022 return (error); 3023 abs_timeout = &ts; 3024 } 3025 3026 return (kern_kmq_timedsend(td, args->mqd, PTRIN(args->msg_ptr), 3027 args->msg_len, args->msg_prio, abs_timeout)); 3028 } 3029 3030 int 3031 linux_mq_timedreceive(struct thread *td, struct linux_mq_timedreceive_args *args) 3032 { 3033 struct timespec ts, *abs_timeout; 3034 int error; 3035 3036 if (args->abs_timeout == NULL) 3037 abs_timeout = NULL; 3038 else { 3039 error = linux_get_timespec(&ts, args->abs_timeout); 3040 if (error != 0) 3041 return (error); 3042 abs_timeout = &ts; 3043 } 3044 3045 return (kern_kmq_timedreceive(td, args->mqd, PTRIN(args->msg_ptr), 3046 args->msg_len, args->msg_prio, abs_timeout)); 3047 } 3048 3049 int 3050 linux_mq_notify(struct thread *td, struct linux_mq_notify_args *args) 3051 { 3052 struct sigevent ev, *evp; 3053 struct l_sigevent l_ev; 3054 int error; 3055 3056 if (args->sevp == NULL) 3057 evp = NULL; 3058 else { 3059 error = copyin(args->sevp, &l_ev, sizeof(l_ev)); 3060 if (error != 0) 3061 return (error); 3062 error = linux_convert_l_sigevent(&l_ev, &ev); 3063 if (error != 0) 3064 return (error); 3065 evp = &ev; 3066 } 3067 3068 return (kern_kmq_notify(td, args->mqd, evp)); 3069 } 3070 3071 int 3072 linux_mq_getsetattr(struct thread *td, struct linux_mq_getsetattr_args *args) 3073 { 3074 struct mq_attr attr, oattr; 3075 int error; 3076 3077 if (args->attr != NULL) { 3078 error = copyin(args->attr, &attr, sizeof(attr)); 3079 if (error != 0) 3080 return (error); 3081 attr.mq_flags = L2B_MQ_FLAGS(attr.mq_flags); 3082 } 3083 3084 error = kern_kmq_setattr(td, args->mqd, args->attr != NULL ? &attr : NULL, 3085 &oattr); 3086 if (error == 0 && args->oattr != NULL) { 3087 oattr.mq_flags = B2L_MQ_FLAGS(oattr.mq_flags); 3088 bzero(oattr.__reserved, sizeof(oattr.__reserved)); 3089 error = copyout(&oattr, args->oattr, sizeof(oattr)); 3090 } 3091 3092 return (error); 3093 } 3094 3095 MODULE_DEPEND(linux, mqueuefs, 1, 1, 1); 3096