1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2002 Doug Rabson 5 * Copyright (c) 1994-1995 Søren Schmidt 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer 13 * in this position and unchanged. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. The name of the author may not be used to endorse or promote products 18 * derived from this software without specific prior written permission 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 23 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 29 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include <sys/param.h> 36 #include <sys/fcntl.h> 37 #include <sys/jail.h> 38 #include <sys/imgact.h> 39 #include <sys/limits.h> 40 #include <sys/lock.h> 41 #include <sys/msgbuf.h> 42 #include <sys/mutex.h> 43 #include <sys/poll.h> 44 #include <sys/priv.h> 45 #include <sys/proc.h> 46 #include <sys/procctl.h> 47 #include <sys/reboot.h> 48 #include <sys/random.h> 49 #include <sys/resourcevar.h> 50 #include <sys/sched.h> 51 #include <sys/smp.h> 52 #include <sys/stat.h> 53 #include <sys/syscallsubr.h> 54 #include <sys/sysctl.h> 55 #include <sys/sysent.h> 56 #include <sys/sysproto.h> 57 #include <sys/time.h> 58 #include <sys/vmmeter.h> 59 #include <sys/vnode.h> 60 61 #include <security/audit/audit.h> 62 #include <security/mac/mac_framework.h> 63 64 #include <vm/pmap.h> 65 #include <vm/vm_map.h> 66 #include <vm/swap_pager.h> 67 68 #ifdef COMPAT_LINUX32 69 #include <machine/../linux32/linux.h> 70 #include <machine/../linux32/linux32_proto.h> 71 #else 72 #include <machine/../linux/linux.h> 73 #include <machine/../linux/linux_proto.h> 74 #endif 75 76 #include <compat/linux/linux_common.h> 77 #include <compat/linux/linux_dtrace.h> 78 #include <compat/linux/linux_file.h> 79 #include <compat/linux/linux_mib.h> 80 #include <compat/linux/linux_signal.h> 81 #include <compat/linux/linux_time.h> 82 #include <compat/linux/linux_util.h> 83 #include <compat/linux/linux_sysproto.h> 84 #include <compat/linux/linux_emul.h> 85 #include <compat/linux/linux_misc.h> 86 87 int stclohz; /* Statistics clock frequency */ 88 89 static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] = { 90 RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK, 91 RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE, 92 RLIMIT_MEMLOCK, RLIMIT_AS 93 }; 94 95 struct l_sysinfo { 96 l_long uptime; /* Seconds since boot */ 97 l_ulong loads[3]; /* 1, 5, and 15 minute load averages */ 98 #define LINUX_SYSINFO_LOADS_SCALE 65536 99 l_ulong totalram; /* Total usable main memory size */ 100 l_ulong freeram; /* Available memory size */ 101 l_ulong sharedram; /* Amount of shared memory */ 102 l_ulong bufferram; /* Memory used by buffers */ 103 l_ulong totalswap; /* Total swap space size */ 104 l_ulong freeswap; /* swap space still available */ 105 l_ushort procs; /* Number of current processes */ 106 l_ushort pads; 107 l_ulong totalhigh; 108 l_ulong freehigh; 109 l_uint mem_unit; 110 char _f[20-2*sizeof(l_long)-sizeof(l_int)]; /* padding */ 111 }; 112 113 struct l_pselect6arg { 114 l_uintptr_t ss; 115 l_size_t ss_len; 116 }; 117 118 static int linux_utimensat_lts_to_ts(struct l_timespec *, 119 struct timespec *); 120 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 121 static int linux_utimensat_lts64_to_ts(struct l_timespec64 *, 122 struct timespec *); 123 #endif 124 static int linux_common_utimensat(struct thread *, int, 125 const char *, struct timespec *, int); 126 static int linux_common_pselect6(struct thread *, l_int, 127 l_fd_set *, l_fd_set *, l_fd_set *, 128 struct timespec *, l_uintptr_t *); 129 static int linux_common_ppoll(struct thread *, struct pollfd *, 130 uint32_t, struct timespec *, l_sigset_t *, 131 l_size_t); 132 static int linux_pollin(struct thread *, struct pollfd *, 133 struct pollfd *, u_int); 134 static int linux_pollout(struct thread *, struct pollfd *, 135 struct pollfd *, u_int); 136 137 int 138 linux_sysinfo(struct thread *td, struct linux_sysinfo_args *args) 139 { 140 struct l_sysinfo sysinfo; 141 int i, j; 142 struct timespec ts; 143 144 bzero(&sysinfo, sizeof(sysinfo)); 145 getnanouptime(&ts); 146 if (ts.tv_nsec != 0) 147 ts.tv_sec++; 148 sysinfo.uptime = ts.tv_sec; 149 150 /* Use the information from the mib to get our load averages */ 151 for (i = 0; i < 3; i++) 152 sysinfo.loads[i] = averunnable.ldavg[i] * 153 LINUX_SYSINFO_LOADS_SCALE / averunnable.fscale; 154 155 sysinfo.totalram = physmem * PAGE_SIZE; 156 sysinfo.freeram = (u_long)vm_free_count() * PAGE_SIZE; 157 158 /* 159 * sharedram counts pages allocated to named, swap-backed objects such 160 * as shared memory segments and tmpfs files. There is no cheap way to 161 * compute this, so just leave the field unpopulated. Linux itself only 162 * started setting this field in the 3.x timeframe. 163 */ 164 sysinfo.sharedram = 0; 165 sysinfo.bufferram = 0; 166 167 swap_pager_status(&i, &j); 168 sysinfo.totalswap = i * PAGE_SIZE; 169 sysinfo.freeswap = (i - j) * PAGE_SIZE; 170 171 sysinfo.procs = nprocs; 172 173 /* 174 * Platforms supported by the emulation layer do not have a notion of 175 * high memory. 176 */ 177 sysinfo.totalhigh = 0; 178 sysinfo.freehigh = 0; 179 180 sysinfo.mem_unit = 1; 181 182 return (copyout(&sysinfo, args->info, sizeof(sysinfo))); 183 } 184 185 #ifdef LINUX_LEGACY_SYSCALLS 186 int 187 linux_alarm(struct thread *td, struct linux_alarm_args *args) 188 { 189 struct itimerval it, old_it; 190 u_int secs; 191 int error __diagused; 192 193 secs = args->secs; 194 /* 195 * Linux alarm() is always successful. Limit secs to INT32_MAX / 2 196 * to match kern_setitimer()'s limit to avoid error from it. 197 * 198 * XXX. Linux limit secs to INT_MAX on 32 and does not limit on 64-bit 199 * platforms. 200 */ 201 if (secs > INT32_MAX / 2) 202 secs = INT32_MAX / 2; 203 204 it.it_value.tv_sec = secs; 205 it.it_value.tv_usec = 0; 206 timevalclear(&it.it_interval); 207 error = kern_setitimer(td, ITIMER_REAL, &it, &old_it); 208 KASSERT(error == 0, ("kern_setitimer returns %d", error)); 209 210 if ((old_it.it_value.tv_sec == 0 && old_it.it_value.tv_usec > 0) || 211 old_it.it_value.tv_usec >= 500000) 212 old_it.it_value.tv_sec++; 213 td->td_retval[0] = old_it.it_value.tv_sec; 214 return (0); 215 } 216 #endif 217 218 int 219 linux_brk(struct thread *td, struct linux_brk_args *args) 220 { 221 struct vmspace *vm = td->td_proc->p_vmspace; 222 uintptr_t new, old; 223 224 old = (uintptr_t)vm->vm_daddr + ctob(vm->vm_dsize); 225 new = (uintptr_t)args->dsend; 226 if ((caddr_t)new > vm->vm_daddr && !kern_break(td, &new)) 227 td->td_retval[0] = (register_t)new; 228 else 229 td->td_retval[0] = (register_t)old; 230 231 return (0); 232 } 233 234 #ifdef LINUX_LEGACY_SYSCALLS 235 int 236 linux_select(struct thread *td, struct linux_select_args *args) 237 { 238 l_timeval ltv; 239 struct timeval tv0, tv1, utv, *tvp; 240 int error; 241 242 /* 243 * Store current time for computation of the amount of 244 * time left. 245 */ 246 if (args->timeout) { 247 if ((error = copyin(args->timeout, <v, sizeof(ltv)))) 248 goto select_out; 249 utv.tv_sec = ltv.tv_sec; 250 utv.tv_usec = ltv.tv_usec; 251 252 if (itimerfix(&utv)) { 253 /* 254 * The timeval was invalid. Convert it to something 255 * valid that will act as it does under Linux. 256 */ 257 utv.tv_sec += utv.tv_usec / 1000000; 258 utv.tv_usec %= 1000000; 259 if (utv.tv_usec < 0) { 260 utv.tv_sec -= 1; 261 utv.tv_usec += 1000000; 262 } 263 if (utv.tv_sec < 0) 264 timevalclear(&utv); 265 } 266 microtime(&tv0); 267 tvp = &utv; 268 } else 269 tvp = NULL; 270 271 error = kern_select(td, args->nfds, args->readfds, args->writefds, 272 args->exceptfds, tvp, LINUX_NFDBITS); 273 if (error) 274 goto select_out; 275 276 if (args->timeout) { 277 if (td->td_retval[0]) { 278 /* 279 * Compute how much time was left of the timeout, 280 * by subtracting the current time and the time 281 * before we started the call, and subtracting 282 * that result from the user-supplied value. 283 */ 284 microtime(&tv1); 285 timevalsub(&tv1, &tv0); 286 timevalsub(&utv, &tv1); 287 if (utv.tv_sec < 0) 288 timevalclear(&utv); 289 } else 290 timevalclear(&utv); 291 ltv.tv_sec = utv.tv_sec; 292 ltv.tv_usec = utv.tv_usec; 293 if ((error = copyout(<v, args->timeout, sizeof(ltv)))) 294 goto select_out; 295 } 296 297 select_out: 298 return (error); 299 } 300 #endif 301 302 int 303 linux_mremap(struct thread *td, struct linux_mremap_args *args) 304 { 305 uintptr_t addr; 306 size_t len; 307 int error = 0; 308 309 if (args->flags & ~(LINUX_MREMAP_FIXED | LINUX_MREMAP_MAYMOVE)) { 310 td->td_retval[0] = 0; 311 return (EINVAL); 312 } 313 314 /* 315 * Check for the page alignment. 316 * Linux defines PAGE_MASK to be FreeBSD ~PAGE_MASK. 317 */ 318 if (args->addr & PAGE_MASK) { 319 td->td_retval[0] = 0; 320 return (EINVAL); 321 } 322 323 args->new_len = round_page(args->new_len); 324 args->old_len = round_page(args->old_len); 325 326 if (args->new_len > args->old_len) { 327 td->td_retval[0] = 0; 328 return (ENOMEM); 329 } 330 331 if (args->new_len < args->old_len) { 332 addr = args->addr + args->new_len; 333 len = args->old_len - args->new_len; 334 error = kern_munmap(td, addr, len); 335 } 336 337 td->td_retval[0] = error ? 0 : (uintptr_t)args->addr; 338 return (error); 339 } 340 341 #define LINUX_MS_ASYNC 0x0001 342 #define LINUX_MS_INVALIDATE 0x0002 343 #define LINUX_MS_SYNC 0x0004 344 345 int 346 linux_msync(struct thread *td, struct linux_msync_args *args) 347 { 348 349 return (kern_msync(td, args->addr, args->len, 350 args->fl & ~LINUX_MS_SYNC)); 351 } 352 353 #ifdef LINUX_LEGACY_SYSCALLS 354 int 355 linux_time(struct thread *td, struct linux_time_args *args) 356 { 357 struct timeval tv; 358 l_time_t tm; 359 int error; 360 361 microtime(&tv); 362 tm = tv.tv_sec; 363 if (args->tm && (error = copyout(&tm, args->tm, sizeof(tm)))) 364 return (error); 365 td->td_retval[0] = tm; 366 return (0); 367 } 368 #endif 369 370 struct l_times_argv { 371 l_clock_t tms_utime; 372 l_clock_t tms_stime; 373 l_clock_t tms_cutime; 374 l_clock_t tms_cstime; 375 }; 376 377 /* 378 * Glibc versions prior to 2.2.1 always use hard-coded CLK_TCK value. 379 * Since 2.2.1 Glibc uses value exported from kernel via AT_CLKTCK 380 * auxiliary vector entry. 381 */ 382 #define CLK_TCK 100 383 384 #define CONVOTCK(r) (r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK)) 385 #define CONVNTCK(r) (r.tv_sec * stclohz + r.tv_usec / (1000000 / stclohz)) 386 387 #define CONVTCK(r) (linux_kernver(td) >= LINUX_KERNVER(2,4,0) ? \ 388 CONVNTCK(r) : CONVOTCK(r)) 389 390 int 391 linux_times(struct thread *td, struct linux_times_args *args) 392 { 393 struct timeval tv, utime, stime, cutime, cstime; 394 struct l_times_argv tms; 395 struct proc *p; 396 int error; 397 398 if (args->buf != NULL) { 399 p = td->td_proc; 400 PROC_LOCK(p); 401 PROC_STATLOCK(p); 402 calcru(p, &utime, &stime); 403 PROC_STATUNLOCK(p); 404 calccru(p, &cutime, &cstime); 405 PROC_UNLOCK(p); 406 407 tms.tms_utime = CONVTCK(utime); 408 tms.tms_stime = CONVTCK(stime); 409 410 tms.tms_cutime = CONVTCK(cutime); 411 tms.tms_cstime = CONVTCK(cstime); 412 413 if ((error = copyout(&tms, args->buf, sizeof(tms)))) 414 return (error); 415 } 416 417 microuptime(&tv); 418 td->td_retval[0] = (int)CONVTCK(tv); 419 return (0); 420 } 421 422 int 423 linux_newuname(struct thread *td, struct linux_newuname_args *args) 424 { 425 struct l_new_utsname utsname; 426 char osname[LINUX_MAX_UTSNAME]; 427 char osrelease[LINUX_MAX_UTSNAME]; 428 char *p; 429 430 linux_get_osname(td, osname); 431 linux_get_osrelease(td, osrelease); 432 433 bzero(&utsname, sizeof(utsname)); 434 strlcpy(utsname.sysname, osname, LINUX_MAX_UTSNAME); 435 getcredhostname(td->td_ucred, utsname.nodename, LINUX_MAX_UTSNAME); 436 getcreddomainname(td->td_ucred, utsname.domainname, LINUX_MAX_UTSNAME); 437 strlcpy(utsname.release, osrelease, LINUX_MAX_UTSNAME); 438 strlcpy(utsname.version, version, LINUX_MAX_UTSNAME); 439 for (p = utsname.version; *p != '\0'; ++p) 440 if (*p == '\n') { 441 *p = '\0'; 442 break; 443 } 444 #if defined(__amd64__) 445 /* 446 * On amd64, Linux uname(2) needs to return "x86_64" 447 * for both 64-bit and 32-bit applications. On 32-bit, 448 * the string returned by getauxval(AT_PLATFORM) needs 449 * to remain "i686", though. 450 */ 451 #if defined(COMPAT_LINUX32) 452 if (linux32_emulate_i386) 453 strlcpy(utsname.machine, "i686", LINUX_MAX_UTSNAME); 454 else 455 #endif 456 strlcpy(utsname.machine, "x86_64", LINUX_MAX_UTSNAME); 457 #elif defined(__aarch64__) 458 strlcpy(utsname.machine, "aarch64", LINUX_MAX_UTSNAME); 459 #elif defined(__i386__) 460 strlcpy(utsname.machine, "i686", LINUX_MAX_UTSNAME); 461 #endif 462 463 return (copyout(&utsname, args->buf, sizeof(utsname))); 464 } 465 466 struct l_utimbuf { 467 l_time_t l_actime; 468 l_time_t l_modtime; 469 }; 470 471 #ifdef LINUX_LEGACY_SYSCALLS 472 int 473 linux_utime(struct thread *td, struct linux_utime_args *args) 474 { 475 struct timeval tv[2], *tvp; 476 struct l_utimbuf lut; 477 char *fname; 478 int error; 479 480 if (args->times) { 481 if ((error = copyin(args->times, &lut, sizeof lut)) != 0) 482 return (error); 483 tv[0].tv_sec = lut.l_actime; 484 tv[0].tv_usec = 0; 485 tv[1].tv_sec = lut.l_modtime; 486 tv[1].tv_usec = 0; 487 tvp = tv; 488 } else 489 tvp = NULL; 490 491 if (!LUSECONVPATH(td)) { 492 error = kern_utimesat(td, AT_FDCWD, args->fname, UIO_USERSPACE, 493 tvp, UIO_SYSSPACE); 494 } else { 495 LCONVPATHEXIST(args->fname, &fname); 496 error = kern_utimesat(td, AT_FDCWD, fname, UIO_SYSSPACE, tvp, 497 UIO_SYSSPACE); 498 LFREEPATH(fname); 499 } 500 return (error); 501 } 502 #endif 503 504 #ifdef LINUX_LEGACY_SYSCALLS 505 int 506 linux_utimes(struct thread *td, struct linux_utimes_args *args) 507 { 508 l_timeval ltv[2]; 509 struct timeval tv[2], *tvp = NULL; 510 char *fname; 511 int error; 512 513 if (args->tptr != NULL) { 514 if ((error = copyin(args->tptr, ltv, sizeof ltv)) != 0) 515 return (error); 516 tv[0].tv_sec = ltv[0].tv_sec; 517 tv[0].tv_usec = ltv[0].tv_usec; 518 tv[1].tv_sec = ltv[1].tv_sec; 519 tv[1].tv_usec = ltv[1].tv_usec; 520 tvp = tv; 521 } 522 523 if (!LUSECONVPATH(td)) { 524 error = kern_utimesat(td, AT_FDCWD, args->fname, UIO_USERSPACE, 525 tvp, UIO_SYSSPACE); 526 } else { 527 LCONVPATHEXIST(args->fname, &fname); 528 error = kern_utimesat(td, AT_FDCWD, fname, UIO_SYSSPACE, 529 tvp, UIO_SYSSPACE); 530 LFREEPATH(fname); 531 } 532 return (error); 533 } 534 #endif 535 536 static int 537 linux_utimensat_lts_to_ts(struct l_timespec *l_times, struct timespec *times) 538 { 539 540 if (l_times->tv_nsec != LINUX_UTIME_OMIT && 541 l_times->tv_nsec != LINUX_UTIME_NOW && 542 (l_times->tv_nsec < 0 || l_times->tv_nsec > 999999999)) 543 return (EINVAL); 544 545 times->tv_sec = l_times->tv_sec; 546 switch (l_times->tv_nsec) 547 { 548 case LINUX_UTIME_OMIT: 549 times->tv_nsec = UTIME_OMIT; 550 break; 551 case LINUX_UTIME_NOW: 552 times->tv_nsec = UTIME_NOW; 553 break; 554 default: 555 times->tv_nsec = l_times->tv_nsec; 556 } 557 558 return (0); 559 } 560 561 static int 562 linux_common_utimensat(struct thread *td, int ldfd, const char *pathname, 563 struct timespec *timesp, int lflags) 564 { 565 char *path = NULL; 566 int error, dfd, flags = 0; 567 568 dfd = (ldfd == LINUX_AT_FDCWD) ? AT_FDCWD : ldfd; 569 570 if (lflags & ~(LINUX_AT_SYMLINK_NOFOLLOW | LINUX_AT_EMPTY_PATH)) 571 return (EINVAL); 572 573 if (timesp != NULL) { 574 /* This breaks POSIX, but is what the Linux kernel does 575 * _on purpose_ (documented in the man page for utimensat(2)), 576 * so we must follow that behaviour. */ 577 if (timesp[0].tv_nsec == UTIME_OMIT && 578 timesp[1].tv_nsec == UTIME_OMIT) 579 return (0); 580 } 581 582 if (lflags & LINUX_AT_SYMLINK_NOFOLLOW) 583 flags |= AT_SYMLINK_NOFOLLOW; 584 if (lflags & LINUX_AT_EMPTY_PATH) 585 flags |= AT_EMPTY_PATH; 586 587 if (!LUSECONVPATH(td)) { 588 if (pathname != NULL) { 589 return (kern_utimensat(td, dfd, pathname, 590 UIO_USERSPACE, timesp, UIO_SYSSPACE, flags)); 591 } 592 } 593 594 if (pathname != NULL) 595 LCONVPATHEXIST_AT(pathname, &path, dfd); 596 else if (lflags != 0) 597 return (EINVAL); 598 599 if (path == NULL) 600 error = kern_futimens(td, dfd, timesp, UIO_SYSSPACE); 601 else { 602 error = kern_utimensat(td, dfd, path, UIO_SYSSPACE, timesp, 603 UIO_SYSSPACE, flags); 604 LFREEPATH(path); 605 } 606 607 return (error); 608 } 609 610 int 611 linux_utimensat(struct thread *td, struct linux_utimensat_args *args) 612 { 613 struct l_timespec l_times[2]; 614 struct timespec times[2], *timesp; 615 int error; 616 617 if (args->times != NULL) { 618 error = copyin(args->times, l_times, sizeof(l_times)); 619 if (error != 0) 620 return (error); 621 622 error = linux_utimensat_lts_to_ts(&l_times[0], ×[0]); 623 if (error != 0) 624 return (error); 625 error = linux_utimensat_lts_to_ts(&l_times[1], ×[1]); 626 if (error != 0) 627 return (error); 628 timesp = times; 629 } else 630 timesp = NULL; 631 632 return (linux_common_utimensat(td, args->dfd, args->pathname, 633 timesp, args->flags)); 634 } 635 636 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 637 static int 638 linux_utimensat_lts64_to_ts(struct l_timespec64 *l_times, struct timespec *times) 639 { 640 641 /* Zero out the padding in compat mode. */ 642 l_times->tv_nsec &= 0xFFFFFFFFUL; 643 644 if (l_times->tv_nsec != LINUX_UTIME_OMIT && 645 l_times->tv_nsec != LINUX_UTIME_NOW && 646 (l_times->tv_nsec < 0 || l_times->tv_nsec > 999999999)) 647 return (EINVAL); 648 649 times->tv_sec = l_times->tv_sec; 650 switch (l_times->tv_nsec) 651 { 652 case LINUX_UTIME_OMIT: 653 times->tv_nsec = UTIME_OMIT; 654 break; 655 case LINUX_UTIME_NOW: 656 times->tv_nsec = UTIME_NOW; 657 break; 658 default: 659 times->tv_nsec = l_times->tv_nsec; 660 } 661 662 return (0); 663 } 664 665 int 666 linux_utimensat_time64(struct thread *td, struct linux_utimensat_time64_args *args) 667 { 668 struct l_timespec64 l_times[2]; 669 struct timespec times[2], *timesp; 670 int error; 671 672 if (args->times64 != NULL) { 673 error = copyin(args->times64, l_times, sizeof(l_times)); 674 if (error != 0) 675 return (error); 676 677 error = linux_utimensat_lts64_to_ts(&l_times[0], ×[0]); 678 if (error != 0) 679 return (error); 680 error = linux_utimensat_lts64_to_ts(&l_times[1], ×[1]); 681 if (error != 0) 682 return (error); 683 timesp = times; 684 } else 685 timesp = NULL; 686 687 return (linux_common_utimensat(td, args->dfd, args->pathname, 688 timesp, args->flags)); 689 } 690 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 691 692 #ifdef LINUX_LEGACY_SYSCALLS 693 int 694 linux_futimesat(struct thread *td, struct linux_futimesat_args *args) 695 { 696 l_timeval ltv[2]; 697 struct timeval tv[2], *tvp = NULL; 698 char *fname; 699 int error, dfd; 700 701 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; 702 703 if (args->utimes != NULL) { 704 if ((error = copyin(args->utimes, ltv, sizeof ltv)) != 0) 705 return (error); 706 tv[0].tv_sec = ltv[0].tv_sec; 707 tv[0].tv_usec = ltv[0].tv_usec; 708 tv[1].tv_sec = ltv[1].tv_sec; 709 tv[1].tv_usec = ltv[1].tv_usec; 710 tvp = tv; 711 } 712 713 if (!LUSECONVPATH(td)) { 714 error = kern_utimesat(td, dfd, args->filename, UIO_USERSPACE, 715 tvp, UIO_SYSSPACE); 716 } else { 717 LCONVPATHEXIST_AT(args->filename, &fname, dfd); 718 error = kern_utimesat(td, dfd, fname, UIO_SYSSPACE, 719 tvp, UIO_SYSSPACE); 720 LFREEPATH(fname); 721 } 722 return (error); 723 } 724 #endif 725 726 static int 727 linux_common_wait(struct thread *td, idtype_t idtype, int id, int *statusp, 728 int options, void *rup, l_siginfo_t *infop) 729 { 730 l_siginfo_t lsi; 731 siginfo_t siginfo; 732 struct __wrusage wru; 733 int error, status, tmpstat, sig; 734 735 error = kern_wait6(td, idtype, id, &status, options, 736 rup != NULL ? &wru : NULL, &siginfo); 737 738 if (error == 0 && statusp) { 739 tmpstat = status & 0xffff; 740 if (WIFSIGNALED(tmpstat)) { 741 tmpstat = (tmpstat & 0xffffff80) | 742 bsd_to_linux_signal(WTERMSIG(tmpstat)); 743 } else if (WIFSTOPPED(tmpstat)) { 744 tmpstat = (tmpstat & 0xffff00ff) | 745 (bsd_to_linux_signal(WSTOPSIG(tmpstat)) << 8); 746 #if defined(__aarch64__) || (defined(__amd64__) && !defined(COMPAT_LINUX32)) 747 if (WSTOPSIG(status) == SIGTRAP) { 748 tmpstat = linux_ptrace_status(td, 749 siginfo.si_pid, tmpstat); 750 } 751 #endif 752 } else if (WIFCONTINUED(tmpstat)) { 753 tmpstat = 0xffff; 754 } 755 error = copyout(&tmpstat, statusp, sizeof(int)); 756 } 757 if (error == 0 && rup != NULL) 758 error = linux_copyout_rusage(&wru.wru_self, rup); 759 if (error == 0 && infop != NULL && td->td_retval[0] != 0) { 760 sig = bsd_to_linux_signal(siginfo.si_signo); 761 siginfo_to_lsiginfo(&siginfo, &lsi, sig); 762 error = copyout(&lsi, infop, sizeof(lsi)); 763 } 764 765 return (error); 766 } 767 768 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 769 int 770 linux_waitpid(struct thread *td, struct linux_waitpid_args *args) 771 { 772 struct linux_wait4_args wait4_args = { 773 .pid = args->pid, 774 .status = args->status, 775 .options = args->options, 776 .rusage = NULL, 777 }; 778 779 return (linux_wait4(td, &wait4_args)); 780 } 781 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 782 783 int 784 linux_wait4(struct thread *td, struct linux_wait4_args *args) 785 { 786 struct proc *p; 787 int options, id, idtype; 788 789 if (args->options & ~(LINUX_WUNTRACED | LINUX_WNOHANG | 790 LINUX_WCONTINUED | __WCLONE | __WNOTHREAD | __WALL)) 791 return (EINVAL); 792 793 /* -INT_MIN is not defined. */ 794 if (args->pid == INT_MIN) 795 return (ESRCH); 796 797 options = 0; 798 linux_to_bsd_waitopts(args->options, &options); 799 800 /* 801 * For backward compatibility we implicitly add flags WEXITED 802 * and WTRAPPED here. 803 */ 804 options |= WEXITED | WTRAPPED; 805 806 if (args->pid == WAIT_ANY) { 807 idtype = P_ALL; 808 id = 0; 809 } else if (args->pid < 0) { 810 idtype = P_PGID; 811 id = (id_t)-args->pid; 812 } else if (args->pid == 0) { 813 idtype = P_PGID; 814 p = td->td_proc; 815 PROC_LOCK(p); 816 id = p->p_pgid; 817 PROC_UNLOCK(p); 818 } else { 819 idtype = P_PID; 820 id = (id_t)args->pid; 821 } 822 823 return (linux_common_wait(td, idtype, id, args->status, options, 824 args->rusage, NULL)); 825 } 826 827 int 828 linux_waitid(struct thread *td, struct linux_waitid_args *args) 829 { 830 idtype_t idtype; 831 int error, options; 832 struct proc *p; 833 pid_t id; 834 835 if (args->options & ~(LINUX_WNOHANG | LINUX_WNOWAIT | LINUX_WEXITED | 836 LINUX_WSTOPPED | LINUX_WCONTINUED | __WCLONE | __WNOTHREAD | __WALL)) 837 return (EINVAL); 838 839 options = 0; 840 linux_to_bsd_waitopts(args->options, &options); 841 842 id = args->id; 843 switch (args->idtype) { 844 case LINUX_P_ALL: 845 idtype = P_ALL; 846 break; 847 case LINUX_P_PID: 848 if (args->id <= 0) 849 return (EINVAL); 850 idtype = P_PID; 851 break; 852 case LINUX_P_PGID: 853 if (linux_kernver(td) >= LINUX_KERNVER(5,4,0) && args->id == 0) { 854 p = td->td_proc; 855 PROC_LOCK(p); 856 id = p->p_pgid; 857 PROC_UNLOCK(p); 858 } else if (args->id <= 0) 859 return (EINVAL); 860 idtype = P_PGID; 861 break; 862 case LINUX_P_PIDFD: 863 LINUX_RATELIMIT_MSG("unsupported waitid P_PIDFD idtype"); 864 return (ENOSYS); 865 default: 866 return (EINVAL); 867 } 868 869 error = linux_common_wait(td, idtype, id, NULL, options, 870 args->rusage, args->info); 871 td->td_retval[0] = 0; 872 873 return (error); 874 } 875 876 #ifdef LINUX_LEGACY_SYSCALLS 877 int 878 linux_mknod(struct thread *td, struct linux_mknod_args *args) 879 { 880 char *path; 881 int error; 882 enum uio_seg seg; 883 bool convpath; 884 885 convpath = LUSECONVPATH(td); 886 if (!convpath) { 887 path = args->path; 888 seg = UIO_USERSPACE; 889 } else { 890 LCONVPATHCREAT(args->path, &path); 891 seg = UIO_SYSSPACE; 892 } 893 894 switch (args->mode & S_IFMT) { 895 case S_IFIFO: 896 case S_IFSOCK: 897 error = kern_mkfifoat(td, AT_FDCWD, path, seg, 898 args->mode); 899 break; 900 901 case S_IFCHR: 902 case S_IFBLK: 903 error = kern_mknodat(td, AT_FDCWD, path, seg, 904 args->mode, args->dev); 905 break; 906 907 case S_IFDIR: 908 error = EPERM; 909 break; 910 911 case 0: 912 args->mode |= S_IFREG; 913 /* FALLTHROUGH */ 914 case S_IFREG: 915 error = kern_openat(td, AT_FDCWD, path, seg, 916 O_WRONLY | O_CREAT | O_TRUNC, args->mode); 917 if (error == 0) 918 kern_close(td, td->td_retval[0]); 919 break; 920 921 default: 922 error = EINVAL; 923 break; 924 } 925 if (convpath) 926 LFREEPATH(path); 927 return (error); 928 } 929 #endif 930 931 int 932 linux_mknodat(struct thread *td, struct linux_mknodat_args *args) 933 { 934 char *path; 935 int error, dfd; 936 enum uio_seg seg; 937 bool convpath; 938 939 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; 940 941 convpath = LUSECONVPATH(td); 942 if (!convpath) { 943 path = __DECONST(char *, args->filename); 944 seg = UIO_USERSPACE; 945 } else { 946 LCONVPATHCREAT_AT(args->filename, &path, dfd); 947 seg = UIO_SYSSPACE; 948 } 949 950 switch (args->mode & S_IFMT) { 951 case S_IFIFO: 952 case S_IFSOCK: 953 error = kern_mkfifoat(td, dfd, path, seg, args->mode); 954 break; 955 956 case S_IFCHR: 957 case S_IFBLK: 958 error = kern_mknodat(td, dfd, path, seg, args->mode, 959 args->dev); 960 break; 961 962 case S_IFDIR: 963 error = EPERM; 964 break; 965 966 case 0: 967 args->mode |= S_IFREG; 968 /* FALLTHROUGH */ 969 case S_IFREG: 970 error = kern_openat(td, dfd, path, seg, 971 O_WRONLY | O_CREAT | O_TRUNC, args->mode); 972 if (error == 0) 973 kern_close(td, td->td_retval[0]); 974 break; 975 976 default: 977 error = EINVAL; 978 break; 979 } 980 if (convpath) 981 LFREEPATH(path); 982 return (error); 983 } 984 985 /* 986 * UGH! This is just about the dumbest idea I've ever heard!! 987 */ 988 int 989 linux_personality(struct thread *td, struct linux_personality_args *args) 990 { 991 struct linux_pemuldata *pem; 992 struct proc *p = td->td_proc; 993 uint32_t old; 994 995 PROC_LOCK(p); 996 pem = pem_find(p); 997 old = pem->persona; 998 if (args->per != 0xffffffff) 999 pem->persona = args->per; 1000 PROC_UNLOCK(p); 1001 1002 td->td_retval[0] = old; 1003 return (0); 1004 } 1005 1006 struct l_itimerval { 1007 l_timeval it_interval; 1008 l_timeval it_value; 1009 }; 1010 1011 #define B2L_ITIMERVAL(bip, lip) \ 1012 (bip)->it_interval.tv_sec = (lip)->it_interval.tv_sec; \ 1013 (bip)->it_interval.tv_usec = (lip)->it_interval.tv_usec; \ 1014 (bip)->it_value.tv_sec = (lip)->it_value.tv_sec; \ 1015 (bip)->it_value.tv_usec = (lip)->it_value.tv_usec; 1016 1017 int 1018 linux_setitimer(struct thread *td, struct linux_setitimer_args *uap) 1019 { 1020 int error; 1021 struct l_itimerval ls; 1022 struct itimerval aitv, oitv; 1023 1024 if (uap->itv == NULL) { 1025 uap->itv = uap->oitv; 1026 return (linux_getitimer(td, (struct linux_getitimer_args *)uap)); 1027 } 1028 1029 error = copyin(uap->itv, &ls, sizeof(ls)); 1030 if (error != 0) 1031 return (error); 1032 B2L_ITIMERVAL(&aitv, &ls); 1033 error = kern_setitimer(td, uap->which, &aitv, &oitv); 1034 if (error != 0 || uap->oitv == NULL) 1035 return (error); 1036 B2L_ITIMERVAL(&ls, &oitv); 1037 1038 return (copyout(&ls, uap->oitv, sizeof(ls))); 1039 } 1040 1041 int 1042 linux_getitimer(struct thread *td, struct linux_getitimer_args *uap) 1043 { 1044 int error; 1045 struct l_itimerval ls; 1046 struct itimerval aitv; 1047 1048 error = kern_getitimer(td, uap->which, &aitv); 1049 if (error != 0) 1050 return (error); 1051 B2L_ITIMERVAL(&ls, &aitv); 1052 return (copyout(&ls, uap->itv, sizeof(ls))); 1053 } 1054 1055 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 1056 int 1057 linux_nice(struct thread *td, struct linux_nice_args *args) 1058 { 1059 1060 return (kern_setpriority(td, PRIO_PROCESS, 0, args->inc)); 1061 } 1062 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 1063 1064 int 1065 linux_setgroups(struct thread *td, struct linux_setgroups_args *args) 1066 { 1067 struct ucred *newcred, *oldcred; 1068 l_gid_t *linux_gidset; 1069 gid_t *bsd_gidset; 1070 int ngrp, error; 1071 struct proc *p; 1072 1073 ngrp = args->gidsetsize; 1074 if (ngrp < 0 || ngrp >= ngroups_max + 1) 1075 return (EINVAL); 1076 linux_gidset = malloc(ngrp * sizeof(*linux_gidset), M_LINUX, M_WAITOK); 1077 error = copyin(args->grouplist, linux_gidset, ngrp * sizeof(l_gid_t)); 1078 if (error) 1079 goto out; 1080 newcred = crget(); 1081 crextend(newcred, ngrp + 1); 1082 p = td->td_proc; 1083 PROC_LOCK(p); 1084 oldcred = p->p_ucred; 1085 crcopy(newcred, oldcred); 1086 1087 /* 1088 * cr_groups[0] holds egid. Setting the whole set from 1089 * the supplied set will cause egid to be changed too. 1090 * Keep cr_groups[0] unchanged to prevent that. 1091 */ 1092 1093 if ((error = priv_check_cred(oldcred, PRIV_CRED_SETGROUPS)) != 0) { 1094 PROC_UNLOCK(p); 1095 crfree(newcred); 1096 goto out; 1097 } 1098 1099 if (ngrp > 0) { 1100 newcred->cr_ngroups = ngrp + 1; 1101 1102 bsd_gidset = newcred->cr_groups; 1103 ngrp--; 1104 while (ngrp >= 0) { 1105 bsd_gidset[ngrp + 1] = linux_gidset[ngrp]; 1106 ngrp--; 1107 } 1108 } else 1109 newcred->cr_ngroups = 1; 1110 1111 setsugid(p); 1112 proc_set_cred(p, newcred); 1113 PROC_UNLOCK(p); 1114 crfree(oldcred); 1115 error = 0; 1116 out: 1117 free(linux_gidset, M_LINUX); 1118 return (error); 1119 } 1120 1121 int 1122 linux_getgroups(struct thread *td, struct linux_getgroups_args *args) 1123 { 1124 struct ucred *cred; 1125 l_gid_t *linux_gidset; 1126 gid_t *bsd_gidset; 1127 int bsd_gidsetsz, ngrp, error; 1128 1129 cred = td->td_ucred; 1130 bsd_gidset = cred->cr_groups; 1131 bsd_gidsetsz = cred->cr_ngroups - 1; 1132 1133 /* 1134 * cr_groups[0] holds egid. Returning the whole set 1135 * here will cause a duplicate. Exclude cr_groups[0] 1136 * to prevent that. 1137 */ 1138 1139 if ((ngrp = args->gidsetsize) == 0) { 1140 td->td_retval[0] = bsd_gidsetsz; 1141 return (0); 1142 } 1143 1144 if (ngrp < bsd_gidsetsz) 1145 return (EINVAL); 1146 1147 ngrp = 0; 1148 linux_gidset = malloc(bsd_gidsetsz * sizeof(*linux_gidset), 1149 M_LINUX, M_WAITOK); 1150 while (ngrp < bsd_gidsetsz) { 1151 linux_gidset[ngrp] = bsd_gidset[ngrp + 1]; 1152 ngrp++; 1153 } 1154 1155 error = copyout(linux_gidset, args->grouplist, ngrp * sizeof(l_gid_t)); 1156 free(linux_gidset, M_LINUX); 1157 if (error) 1158 return (error); 1159 1160 td->td_retval[0] = ngrp; 1161 return (0); 1162 } 1163 1164 static bool 1165 linux_get_dummy_limit(l_uint resource, struct rlimit *rlim) 1166 { 1167 1168 if (linux_dummy_rlimits == 0) 1169 return (false); 1170 1171 switch (resource) { 1172 case LINUX_RLIMIT_LOCKS: 1173 case LINUX_RLIMIT_SIGPENDING: 1174 case LINUX_RLIMIT_MSGQUEUE: 1175 case LINUX_RLIMIT_RTTIME: 1176 rlim->rlim_cur = LINUX_RLIM_INFINITY; 1177 rlim->rlim_max = LINUX_RLIM_INFINITY; 1178 return (true); 1179 case LINUX_RLIMIT_NICE: 1180 case LINUX_RLIMIT_RTPRIO: 1181 rlim->rlim_cur = 0; 1182 rlim->rlim_max = 0; 1183 return (true); 1184 default: 1185 return (false); 1186 } 1187 } 1188 1189 int 1190 linux_setrlimit(struct thread *td, struct linux_setrlimit_args *args) 1191 { 1192 struct rlimit bsd_rlim; 1193 struct l_rlimit rlim; 1194 u_int which; 1195 int error; 1196 1197 if (args->resource >= LINUX_RLIM_NLIMITS) 1198 return (EINVAL); 1199 1200 which = linux_to_bsd_resource[args->resource]; 1201 if (which == -1) 1202 return (EINVAL); 1203 1204 error = copyin(args->rlim, &rlim, sizeof(rlim)); 1205 if (error) 1206 return (error); 1207 1208 bsd_rlim.rlim_cur = (rlim_t)rlim.rlim_cur; 1209 bsd_rlim.rlim_max = (rlim_t)rlim.rlim_max; 1210 return (kern_setrlimit(td, which, &bsd_rlim)); 1211 } 1212 1213 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 1214 int 1215 linux_old_getrlimit(struct thread *td, struct linux_old_getrlimit_args *args) 1216 { 1217 struct l_rlimit rlim; 1218 struct rlimit bsd_rlim; 1219 u_int which; 1220 1221 if (linux_get_dummy_limit(args->resource, &bsd_rlim)) { 1222 rlim.rlim_cur = bsd_rlim.rlim_cur; 1223 rlim.rlim_max = bsd_rlim.rlim_max; 1224 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1225 } 1226 1227 if (args->resource >= LINUX_RLIM_NLIMITS) 1228 return (EINVAL); 1229 1230 which = linux_to_bsd_resource[args->resource]; 1231 if (which == -1) 1232 return (EINVAL); 1233 1234 lim_rlimit(td, which, &bsd_rlim); 1235 1236 #ifdef COMPAT_LINUX32 1237 rlim.rlim_cur = (unsigned int)bsd_rlim.rlim_cur; 1238 if (rlim.rlim_cur == UINT_MAX) 1239 rlim.rlim_cur = INT_MAX; 1240 rlim.rlim_max = (unsigned int)bsd_rlim.rlim_max; 1241 if (rlim.rlim_max == UINT_MAX) 1242 rlim.rlim_max = INT_MAX; 1243 #else 1244 rlim.rlim_cur = (unsigned long)bsd_rlim.rlim_cur; 1245 if (rlim.rlim_cur == ULONG_MAX) 1246 rlim.rlim_cur = LONG_MAX; 1247 rlim.rlim_max = (unsigned long)bsd_rlim.rlim_max; 1248 if (rlim.rlim_max == ULONG_MAX) 1249 rlim.rlim_max = LONG_MAX; 1250 #endif 1251 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1252 } 1253 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 1254 1255 int 1256 linux_getrlimit(struct thread *td, struct linux_getrlimit_args *args) 1257 { 1258 struct l_rlimit rlim; 1259 struct rlimit bsd_rlim; 1260 u_int which; 1261 1262 if (linux_get_dummy_limit(args->resource, &bsd_rlim)) { 1263 rlim.rlim_cur = bsd_rlim.rlim_cur; 1264 rlim.rlim_max = bsd_rlim.rlim_max; 1265 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1266 } 1267 1268 if (args->resource >= LINUX_RLIM_NLIMITS) 1269 return (EINVAL); 1270 1271 which = linux_to_bsd_resource[args->resource]; 1272 if (which == -1) 1273 return (EINVAL); 1274 1275 lim_rlimit(td, which, &bsd_rlim); 1276 1277 rlim.rlim_cur = (l_ulong)bsd_rlim.rlim_cur; 1278 rlim.rlim_max = (l_ulong)bsd_rlim.rlim_max; 1279 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1280 } 1281 1282 int 1283 linux_sched_setscheduler(struct thread *td, 1284 struct linux_sched_setscheduler_args *args) 1285 { 1286 struct sched_param sched_param; 1287 struct thread *tdt; 1288 int error, policy; 1289 1290 switch (args->policy) { 1291 case LINUX_SCHED_OTHER: 1292 policy = SCHED_OTHER; 1293 break; 1294 case LINUX_SCHED_FIFO: 1295 policy = SCHED_FIFO; 1296 break; 1297 case LINUX_SCHED_RR: 1298 policy = SCHED_RR; 1299 break; 1300 default: 1301 return (EINVAL); 1302 } 1303 1304 error = copyin(args->param, &sched_param, sizeof(sched_param)); 1305 if (error) 1306 return (error); 1307 1308 if (linux_map_sched_prio) { 1309 switch (policy) { 1310 case SCHED_OTHER: 1311 if (sched_param.sched_priority != 0) 1312 return (EINVAL); 1313 1314 sched_param.sched_priority = 1315 PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE; 1316 break; 1317 case SCHED_FIFO: 1318 case SCHED_RR: 1319 if (sched_param.sched_priority < 1 || 1320 sched_param.sched_priority >= LINUX_MAX_RT_PRIO) 1321 return (EINVAL); 1322 1323 /* 1324 * Map [1, LINUX_MAX_RT_PRIO - 1] to 1325 * [0, RTP_PRIO_MAX - RTP_PRIO_MIN] (rounding down). 1326 */ 1327 sched_param.sched_priority = 1328 (sched_param.sched_priority - 1) * 1329 (RTP_PRIO_MAX - RTP_PRIO_MIN + 1) / 1330 (LINUX_MAX_RT_PRIO - 1); 1331 break; 1332 } 1333 } 1334 1335 tdt = linux_tdfind(td, args->pid, -1); 1336 if (tdt == NULL) 1337 return (ESRCH); 1338 1339 error = kern_sched_setscheduler(td, tdt, policy, &sched_param); 1340 PROC_UNLOCK(tdt->td_proc); 1341 return (error); 1342 } 1343 1344 int 1345 linux_sched_getscheduler(struct thread *td, 1346 struct linux_sched_getscheduler_args *args) 1347 { 1348 struct thread *tdt; 1349 int error, policy; 1350 1351 tdt = linux_tdfind(td, args->pid, -1); 1352 if (tdt == NULL) 1353 return (ESRCH); 1354 1355 error = kern_sched_getscheduler(td, tdt, &policy); 1356 PROC_UNLOCK(tdt->td_proc); 1357 1358 switch (policy) { 1359 case SCHED_OTHER: 1360 td->td_retval[0] = LINUX_SCHED_OTHER; 1361 break; 1362 case SCHED_FIFO: 1363 td->td_retval[0] = LINUX_SCHED_FIFO; 1364 break; 1365 case SCHED_RR: 1366 td->td_retval[0] = LINUX_SCHED_RR; 1367 break; 1368 } 1369 return (error); 1370 } 1371 1372 int 1373 linux_sched_get_priority_max(struct thread *td, 1374 struct linux_sched_get_priority_max_args *args) 1375 { 1376 struct sched_get_priority_max_args bsd; 1377 1378 if (linux_map_sched_prio) { 1379 switch (args->policy) { 1380 case LINUX_SCHED_OTHER: 1381 td->td_retval[0] = 0; 1382 return (0); 1383 case LINUX_SCHED_FIFO: 1384 case LINUX_SCHED_RR: 1385 td->td_retval[0] = LINUX_MAX_RT_PRIO - 1; 1386 return (0); 1387 default: 1388 return (EINVAL); 1389 } 1390 } 1391 1392 switch (args->policy) { 1393 case LINUX_SCHED_OTHER: 1394 bsd.policy = SCHED_OTHER; 1395 break; 1396 case LINUX_SCHED_FIFO: 1397 bsd.policy = SCHED_FIFO; 1398 break; 1399 case LINUX_SCHED_RR: 1400 bsd.policy = SCHED_RR; 1401 break; 1402 default: 1403 return (EINVAL); 1404 } 1405 return (sys_sched_get_priority_max(td, &bsd)); 1406 } 1407 1408 int 1409 linux_sched_get_priority_min(struct thread *td, 1410 struct linux_sched_get_priority_min_args *args) 1411 { 1412 struct sched_get_priority_min_args bsd; 1413 1414 if (linux_map_sched_prio) { 1415 switch (args->policy) { 1416 case LINUX_SCHED_OTHER: 1417 td->td_retval[0] = 0; 1418 return (0); 1419 case LINUX_SCHED_FIFO: 1420 case LINUX_SCHED_RR: 1421 td->td_retval[0] = 1; 1422 return (0); 1423 default: 1424 return (EINVAL); 1425 } 1426 } 1427 1428 switch (args->policy) { 1429 case LINUX_SCHED_OTHER: 1430 bsd.policy = SCHED_OTHER; 1431 break; 1432 case LINUX_SCHED_FIFO: 1433 bsd.policy = SCHED_FIFO; 1434 break; 1435 case LINUX_SCHED_RR: 1436 bsd.policy = SCHED_RR; 1437 break; 1438 default: 1439 return (EINVAL); 1440 } 1441 return (sys_sched_get_priority_min(td, &bsd)); 1442 } 1443 1444 #define REBOOT_CAD_ON 0x89abcdef 1445 #define REBOOT_CAD_OFF 0 1446 #define REBOOT_HALT 0xcdef0123 1447 #define REBOOT_RESTART 0x01234567 1448 #define REBOOT_RESTART2 0xA1B2C3D4 1449 #define REBOOT_POWEROFF 0x4321FEDC 1450 #define REBOOT_MAGIC1 0xfee1dead 1451 #define REBOOT_MAGIC2 0x28121969 1452 #define REBOOT_MAGIC2A 0x05121996 1453 #define REBOOT_MAGIC2B 0x16041998 1454 1455 int 1456 linux_reboot(struct thread *td, struct linux_reboot_args *args) 1457 { 1458 struct reboot_args bsd_args; 1459 1460 if (args->magic1 != REBOOT_MAGIC1) 1461 return (EINVAL); 1462 1463 switch (args->magic2) { 1464 case REBOOT_MAGIC2: 1465 case REBOOT_MAGIC2A: 1466 case REBOOT_MAGIC2B: 1467 break; 1468 default: 1469 return (EINVAL); 1470 } 1471 1472 switch (args->cmd) { 1473 case REBOOT_CAD_ON: 1474 case REBOOT_CAD_OFF: 1475 return (priv_check(td, PRIV_REBOOT)); 1476 case REBOOT_HALT: 1477 bsd_args.opt = RB_HALT; 1478 break; 1479 case REBOOT_RESTART: 1480 case REBOOT_RESTART2: 1481 bsd_args.opt = 0; 1482 break; 1483 case REBOOT_POWEROFF: 1484 bsd_args.opt = RB_POWEROFF; 1485 break; 1486 default: 1487 return (EINVAL); 1488 } 1489 return (sys_reboot(td, &bsd_args)); 1490 } 1491 1492 int 1493 linux_getpid(struct thread *td, struct linux_getpid_args *args) 1494 { 1495 1496 td->td_retval[0] = td->td_proc->p_pid; 1497 1498 return (0); 1499 } 1500 1501 int 1502 linux_gettid(struct thread *td, struct linux_gettid_args *args) 1503 { 1504 struct linux_emuldata *em; 1505 1506 em = em_find(td); 1507 KASSERT(em != NULL, ("gettid: emuldata not found.\n")); 1508 1509 td->td_retval[0] = em->em_tid; 1510 1511 return (0); 1512 } 1513 1514 int 1515 linux_getppid(struct thread *td, struct linux_getppid_args *args) 1516 { 1517 1518 td->td_retval[0] = kern_getppid(td); 1519 return (0); 1520 } 1521 1522 int 1523 linux_getgid(struct thread *td, struct linux_getgid_args *args) 1524 { 1525 1526 td->td_retval[0] = td->td_ucred->cr_rgid; 1527 return (0); 1528 } 1529 1530 int 1531 linux_getuid(struct thread *td, struct linux_getuid_args *args) 1532 { 1533 1534 td->td_retval[0] = td->td_ucred->cr_ruid; 1535 return (0); 1536 } 1537 1538 int 1539 linux_getsid(struct thread *td, struct linux_getsid_args *args) 1540 { 1541 1542 return (kern_getsid(td, args->pid)); 1543 } 1544 1545 int 1546 linux_nosys(struct thread *td, struct nosys_args *ignore) 1547 { 1548 1549 return (ENOSYS); 1550 } 1551 1552 int 1553 linux_getpriority(struct thread *td, struct linux_getpriority_args *args) 1554 { 1555 int error; 1556 1557 error = kern_getpriority(td, args->which, args->who); 1558 td->td_retval[0] = 20 - td->td_retval[0]; 1559 return (error); 1560 } 1561 1562 int 1563 linux_sethostname(struct thread *td, struct linux_sethostname_args *args) 1564 { 1565 int name[2]; 1566 1567 name[0] = CTL_KERN; 1568 name[1] = KERN_HOSTNAME; 1569 return (userland_sysctl(td, name, 2, 0, 0, 0, args->hostname, 1570 args->len, 0, 0)); 1571 } 1572 1573 int 1574 linux_setdomainname(struct thread *td, struct linux_setdomainname_args *args) 1575 { 1576 int name[2]; 1577 1578 name[0] = CTL_KERN; 1579 name[1] = KERN_NISDOMAINNAME; 1580 return (userland_sysctl(td, name, 2, 0, 0, 0, args->name, 1581 args->len, 0, 0)); 1582 } 1583 1584 int 1585 linux_exit_group(struct thread *td, struct linux_exit_group_args *args) 1586 { 1587 1588 LINUX_CTR2(exit_group, "thread(%d) (%d)", td->td_tid, 1589 args->error_code); 1590 1591 /* 1592 * XXX: we should send a signal to the parent if 1593 * SIGNAL_EXIT_GROUP is set. We ignore that (temporarily?) 1594 * as it doesnt occur often. 1595 */ 1596 exit1(td, args->error_code, 0); 1597 /* NOTREACHED */ 1598 } 1599 1600 #define _LINUX_CAPABILITY_VERSION_1 0x19980330 1601 #define _LINUX_CAPABILITY_VERSION_2 0x20071026 1602 #define _LINUX_CAPABILITY_VERSION_3 0x20080522 1603 1604 struct l_user_cap_header { 1605 l_int version; 1606 l_int pid; 1607 }; 1608 1609 struct l_user_cap_data { 1610 l_int effective; 1611 l_int permitted; 1612 l_int inheritable; 1613 }; 1614 1615 int 1616 linux_capget(struct thread *td, struct linux_capget_args *uap) 1617 { 1618 struct l_user_cap_header luch; 1619 struct l_user_cap_data lucd[2]; 1620 int error, u32s; 1621 1622 if (uap->hdrp == NULL) 1623 return (EFAULT); 1624 1625 error = copyin(uap->hdrp, &luch, sizeof(luch)); 1626 if (error != 0) 1627 return (error); 1628 1629 switch (luch.version) { 1630 case _LINUX_CAPABILITY_VERSION_1: 1631 u32s = 1; 1632 break; 1633 case _LINUX_CAPABILITY_VERSION_2: 1634 case _LINUX_CAPABILITY_VERSION_3: 1635 u32s = 2; 1636 break; 1637 default: 1638 luch.version = _LINUX_CAPABILITY_VERSION_1; 1639 error = copyout(&luch, uap->hdrp, sizeof(luch)); 1640 if (error) 1641 return (error); 1642 return (EINVAL); 1643 } 1644 1645 if (luch.pid) 1646 return (EPERM); 1647 1648 if (uap->datap) { 1649 /* 1650 * The current implementation doesn't support setting 1651 * a capability (it's essentially a stub) so indicate 1652 * that no capabilities are currently set or available 1653 * to request. 1654 */ 1655 memset(&lucd, 0, u32s * sizeof(lucd[0])); 1656 error = copyout(&lucd, uap->datap, u32s * sizeof(lucd[0])); 1657 } 1658 1659 return (error); 1660 } 1661 1662 int 1663 linux_capset(struct thread *td, struct linux_capset_args *uap) 1664 { 1665 struct l_user_cap_header luch; 1666 struct l_user_cap_data lucd[2]; 1667 int error, i, u32s; 1668 1669 if (uap->hdrp == NULL || uap->datap == NULL) 1670 return (EFAULT); 1671 1672 error = copyin(uap->hdrp, &luch, sizeof(luch)); 1673 if (error != 0) 1674 return (error); 1675 1676 switch (luch.version) { 1677 case _LINUX_CAPABILITY_VERSION_1: 1678 u32s = 1; 1679 break; 1680 case _LINUX_CAPABILITY_VERSION_2: 1681 case _LINUX_CAPABILITY_VERSION_3: 1682 u32s = 2; 1683 break; 1684 default: 1685 luch.version = _LINUX_CAPABILITY_VERSION_1; 1686 error = copyout(&luch, uap->hdrp, sizeof(luch)); 1687 if (error) 1688 return (error); 1689 return (EINVAL); 1690 } 1691 1692 if (luch.pid) 1693 return (EPERM); 1694 1695 error = copyin(uap->datap, &lucd, u32s * sizeof(lucd[0])); 1696 if (error != 0) 1697 return (error); 1698 1699 /* We currently don't support setting any capabilities. */ 1700 for (i = 0; i < u32s; i++) { 1701 if (lucd[i].effective || lucd[i].permitted || 1702 lucd[i].inheritable) { 1703 linux_msg(td, 1704 "capset[%d] effective=0x%x, permitted=0x%x, " 1705 "inheritable=0x%x is not implemented", i, 1706 (int)lucd[i].effective, (int)lucd[i].permitted, 1707 (int)lucd[i].inheritable); 1708 return (EPERM); 1709 } 1710 } 1711 1712 return (0); 1713 } 1714 1715 int 1716 linux_prctl(struct thread *td, struct linux_prctl_args *args) 1717 { 1718 int error = 0, max_size, arg; 1719 struct proc *p = td->td_proc; 1720 char comm[LINUX_MAX_COMM_LEN]; 1721 int pdeath_signal, trace_state; 1722 1723 switch (args->option) { 1724 case LINUX_PR_SET_PDEATHSIG: 1725 if (!LINUX_SIG_VALID(args->arg2)) 1726 return (EINVAL); 1727 pdeath_signal = linux_to_bsd_signal(args->arg2); 1728 return (kern_procctl(td, P_PID, 0, PROC_PDEATHSIG_CTL, 1729 &pdeath_signal)); 1730 case LINUX_PR_GET_PDEATHSIG: 1731 error = kern_procctl(td, P_PID, 0, PROC_PDEATHSIG_STATUS, 1732 &pdeath_signal); 1733 if (error != 0) 1734 return (error); 1735 pdeath_signal = bsd_to_linux_signal(pdeath_signal); 1736 return (copyout(&pdeath_signal, 1737 (void *)(register_t)args->arg2, 1738 sizeof(pdeath_signal))); 1739 /* 1740 * In Linux, this flag controls if set[gu]id processes can coredump. 1741 * There are additional semantics imposed on processes that cannot 1742 * coredump: 1743 * - Such processes can not be ptraced. 1744 * - There are some semantics around ownership of process-related files 1745 * in the /proc namespace. 1746 * 1747 * In FreeBSD, we can (and by default, do) disable setuid coredump 1748 * system-wide with 'sugid_coredump.' We control tracability on a 1749 * per-process basis with the procctl PROC_TRACE (=> P2_NOTRACE flag). 1750 * By happy coincidence, P2_NOTRACE also prevents coredumping. So the 1751 * procctl is roughly analogous to Linux's DUMPABLE. 1752 * 1753 * So, proxy these knobs to the corresponding PROC_TRACE setting. 1754 */ 1755 case LINUX_PR_GET_DUMPABLE: 1756 error = kern_procctl(td, P_PID, p->p_pid, PROC_TRACE_STATUS, 1757 &trace_state); 1758 if (error != 0) 1759 return (error); 1760 td->td_retval[0] = (trace_state != -1); 1761 return (0); 1762 case LINUX_PR_SET_DUMPABLE: 1763 /* 1764 * It is only valid for userspace to set one of these two 1765 * flags, and only one at a time. 1766 */ 1767 switch (args->arg2) { 1768 case LINUX_SUID_DUMP_DISABLE: 1769 trace_state = PROC_TRACE_CTL_DISABLE_EXEC; 1770 break; 1771 case LINUX_SUID_DUMP_USER: 1772 trace_state = PROC_TRACE_CTL_ENABLE; 1773 break; 1774 default: 1775 return (EINVAL); 1776 } 1777 return (kern_procctl(td, P_PID, p->p_pid, PROC_TRACE_CTL, 1778 &trace_state)); 1779 case LINUX_PR_GET_KEEPCAPS: 1780 /* 1781 * Indicate that we always clear the effective and 1782 * permitted capability sets when the user id becomes 1783 * non-zero (actually the capability sets are simply 1784 * always zero in the current implementation). 1785 */ 1786 td->td_retval[0] = 0; 1787 break; 1788 case LINUX_PR_SET_KEEPCAPS: 1789 /* 1790 * Ignore requests to keep the effective and permitted 1791 * capability sets when the user id becomes non-zero. 1792 */ 1793 break; 1794 case LINUX_PR_SET_NAME: 1795 /* 1796 * To be on the safe side we need to make sure to not 1797 * overflow the size a Linux program expects. We already 1798 * do this here in the copyin, so that we don't need to 1799 * check on copyout. 1800 */ 1801 max_size = MIN(sizeof(comm), sizeof(p->p_comm)); 1802 error = copyinstr((void *)(register_t)args->arg2, comm, 1803 max_size, NULL); 1804 1805 /* Linux silently truncates the name if it is too long. */ 1806 if (error == ENAMETOOLONG) { 1807 /* 1808 * XXX: copyinstr() isn't documented to populate the 1809 * array completely, so do a copyin() to be on the 1810 * safe side. This should be changed in case 1811 * copyinstr() is changed to guarantee this. 1812 */ 1813 error = copyin((void *)(register_t)args->arg2, comm, 1814 max_size - 1); 1815 comm[max_size - 1] = '\0'; 1816 } 1817 if (error) 1818 return (error); 1819 1820 PROC_LOCK(p); 1821 strlcpy(p->p_comm, comm, sizeof(p->p_comm)); 1822 PROC_UNLOCK(p); 1823 break; 1824 case LINUX_PR_GET_NAME: 1825 PROC_LOCK(p); 1826 strlcpy(comm, p->p_comm, sizeof(comm)); 1827 PROC_UNLOCK(p); 1828 error = copyout(comm, (void *)(register_t)args->arg2, 1829 strlen(comm) + 1); 1830 break; 1831 case LINUX_PR_GET_SECCOMP: 1832 case LINUX_PR_SET_SECCOMP: 1833 /* 1834 * Same as returned by Linux without CONFIG_SECCOMP enabled. 1835 */ 1836 error = EINVAL; 1837 break; 1838 case LINUX_PR_CAPBSET_READ: 1839 #if 0 1840 /* 1841 * This makes too much noise with Ubuntu Focal. 1842 */ 1843 linux_msg(td, "unsupported prctl PR_CAPBSET_READ %d", 1844 (int)args->arg2); 1845 #endif 1846 error = EINVAL; 1847 break; 1848 case LINUX_PR_SET_NO_NEW_PRIVS: 1849 arg = args->arg2 == 1 ? 1850 PROC_NO_NEW_PRIVS_ENABLE : PROC_NO_NEW_PRIVS_DISABLE; 1851 error = kern_procctl(td, P_PID, p->p_pid, 1852 PROC_NO_NEW_PRIVS_CTL, &arg); 1853 break; 1854 case LINUX_PR_SET_PTRACER: 1855 linux_msg(td, "unsupported prctl PR_SET_PTRACER"); 1856 error = EINVAL; 1857 break; 1858 default: 1859 linux_msg(td, "unsupported prctl option %d", args->option); 1860 error = EINVAL; 1861 break; 1862 } 1863 1864 return (error); 1865 } 1866 1867 int 1868 linux_sched_setparam(struct thread *td, 1869 struct linux_sched_setparam_args *uap) 1870 { 1871 struct sched_param sched_param; 1872 struct thread *tdt; 1873 int error, policy; 1874 1875 error = copyin(uap->param, &sched_param, sizeof(sched_param)); 1876 if (error) 1877 return (error); 1878 1879 tdt = linux_tdfind(td, uap->pid, -1); 1880 if (tdt == NULL) 1881 return (ESRCH); 1882 1883 if (linux_map_sched_prio) { 1884 error = kern_sched_getscheduler(td, tdt, &policy); 1885 if (error) 1886 goto out; 1887 1888 switch (policy) { 1889 case SCHED_OTHER: 1890 if (sched_param.sched_priority != 0) { 1891 error = EINVAL; 1892 goto out; 1893 } 1894 sched_param.sched_priority = 1895 PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE; 1896 break; 1897 case SCHED_FIFO: 1898 case SCHED_RR: 1899 if (sched_param.sched_priority < 1 || 1900 sched_param.sched_priority >= LINUX_MAX_RT_PRIO) { 1901 error = EINVAL; 1902 goto out; 1903 } 1904 /* 1905 * Map [1, LINUX_MAX_RT_PRIO - 1] to 1906 * [0, RTP_PRIO_MAX - RTP_PRIO_MIN] (rounding down). 1907 */ 1908 sched_param.sched_priority = 1909 (sched_param.sched_priority - 1) * 1910 (RTP_PRIO_MAX - RTP_PRIO_MIN + 1) / 1911 (LINUX_MAX_RT_PRIO - 1); 1912 break; 1913 } 1914 } 1915 1916 error = kern_sched_setparam(td, tdt, &sched_param); 1917 out: PROC_UNLOCK(tdt->td_proc); 1918 return (error); 1919 } 1920 1921 int 1922 linux_sched_getparam(struct thread *td, 1923 struct linux_sched_getparam_args *uap) 1924 { 1925 struct sched_param sched_param; 1926 struct thread *tdt; 1927 int error, policy; 1928 1929 tdt = linux_tdfind(td, uap->pid, -1); 1930 if (tdt == NULL) 1931 return (ESRCH); 1932 1933 error = kern_sched_getparam(td, tdt, &sched_param); 1934 if (error) { 1935 PROC_UNLOCK(tdt->td_proc); 1936 return (error); 1937 } 1938 1939 if (linux_map_sched_prio) { 1940 error = kern_sched_getscheduler(td, tdt, &policy); 1941 PROC_UNLOCK(tdt->td_proc); 1942 if (error) 1943 return (error); 1944 1945 switch (policy) { 1946 case SCHED_OTHER: 1947 sched_param.sched_priority = 0; 1948 break; 1949 case SCHED_FIFO: 1950 case SCHED_RR: 1951 /* 1952 * Map [0, RTP_PRIO_MAX - RTP_PRIO_MIN] to 1953 * [1, LINUX_MAX_RT_PRIO - 1] (rounding up). 1954 */ 1955 sched_param.sched_priority = 1956 (sched_param.sched_priority * 1957 (LINUX_MAX_RT_PRIO - 1) + 1958 (RTP_PRIO_MAX - RTP_PRIO_MIN - 1)) / 1959 (RTP_PRIO_MAX - RTP_PRIO_MIN) + 1; 1960 break; 1961 } 1962 } else 1963 PROC_UNLOCK(tdt->td_proc); 1964 1965 error = copyout(&sched_param, uap->param, sizeof(sched_param)); 1966 return (error); 1967 } 1968 1969 /* 1970 * Get affinity of a process. 1971 */ 1972 int 1973 linux_sched_getaffinity(struct thread *td, 1974 struct linux_sched_getaffinity_args *args) 1975 { 1976 struct thread *tdt; 1977 cpuset_t *mask; 1978 size_t size; 1979 int error; 1980 id_t tid; 1981 1982 tdt = linux_tdfind(td, args->pid, -1); 1983 if (tdt == NULL) 1984 return (ESRCH); 1985 tid = tdt->td_tid; 1986 PROC_UNLOCK(tdt->td_proc); 1987 1988 mask = malloc(sizeof(cpuset_t), M_LINUX, M_WAITOK | M_ZERO); 1989 size = min(args->len, sizeof(cpuset_t)); 1990 error = kern_cpuset_getaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID, 1991 tid, size, mask); 1992 if (error == ERANGE) 1993 error = EINVAL; 1994 if (error == 0) 1995 error = copyout(mask, args->user_mask_ptr, size); 1996 if (error == 0) 1997 td->td_retval[0] = size; 1998 free(mask, M_LINUX); 1999 return (error); 2000 } 2001 2002 /* 2003 * Set affinity of a process. 2004 */ 2005 int 2006 linux_sched_setaffinity(struct thread *td, 2007 struct linux_sched_setaffinity_args *args) 2008 { 2009 struct thread *tdt; 2010 cpuset_t *mask; 2011 int cpu, error; 2012 size_t len; 2013 id_t tid; 2014 2015 tdt = linux_tdfind(td, args->pid, -1); 2016 if (tdt == NULL) 2017 return (ESRCH); 2018 tid = tdt->td_tid; 2019 PROC_UNLOCK(tdt->td_proc); 2020 2021 len = min(args->len, sizeof(cpuset_t)); 2022 mask = malloc(sizeof(cpuset_t), M_TEMP, M_WAITOK | M_ZERO);; 2023 error = copyin(args->user_mask_ptr, mask, len); 2024 if (error != 0) 2025 goto out; 2026 /* Linux ignore high bits */ 2027 CPU_FOREACH_ISSET(cpu, mask) 2028 if (cpu > mp_maxid) 2029 CPU_CLR(cpu, mask); 2030 2031 error = kern_cpuset_setaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID, 2032 tid, mask); 2033 if (error == EDEADLK) 2034 error = EINVAL; 2035 out: 2036 free(mask, M_TEMP); 2037 return (error); 2038 } 2039 2040 struct linux_rlimit64 { 2041 uint64_t rlim_cur; 2042 uint64_t rlim_max; 2043 }; 2044 2045 int 2046 linux_prlimit64(struct thread *td, struct linux_prlimit64_args *args) 2047 { 2048 struct rlimit rlim, nrlim; 2049 struct linux_rlimit64 lrlim; 2050 struct proc *p; 2051 u_int which; 2052 int flags; 2053 int error; 2054 2055 if (args->new == NULL && args->old != NULL) { 2056 if (linux_get_dummy_limit(args->resource, &rlim)) { 2057 lrlim.rlim_cur = rlim.rlim_cur; 2058 lrlim.rlim_max = rlim.rlim_max; 2059 return (copyout(&lrlim, args->old, sizeof(lrlim))); 2060 } 2061 } 2062 2063 if (args->resource >= LINUX_RLIM_NLIMITS) 2064 return (EINVAL); 2065 2066 which = linux_to_bsd_resource[args->resource]; 2067 if (which == -1) 2068 return (EINVAL); 2069 2070 if (args->new != NULL) { 2071 /* 2072 * Note. Unlike FreeBSD where rlim is signed 64-bit Linux 2073 * rlim is unsigned 64-bit. FreeBSD treats negative limits 2074 * as INFINITY so we do not need a conversion even. 2075 */ 2076 error = copyin(args->new, &nrlim, sizeof(nrlim)); 2077 if (error != 0) 2078 return (error); 2079 } 2080 2081 flags = PGET_HOLD | PGET_NOTWEXIT; 2082 if (args->new != NULL) 2083 flags |= PGET_CANDEBUG; 2084 else 2085 flags |= PGET_CANSEE; 2086 if (args->pid == 0) { 2087 p = td->td_proc; 2088 PHOLD(p); 2089 } else { 2090 error = pget(args->pid, flags, &p); 2091 if (error != 0) 2092 return (error); 2093 } 2094 if (args->old != NULL) { 2095 PROC_LOCK(p); 2096 lim_rlimit_proc(p, which, &rlim); 2097 PROC_UNLOCK(p); 2098 if (rlim.rlim_cur == RLIM_INFINITY) 2099 lrlim.rlim_cur = LINUX_RLIM_INFINITY; 2100 else 2101 lrlim.rlim_cur = rlim.rlim_cur; 2102 if (rlim.rlim_max == RLIM_INFINITY) 2103 lrlim.rlim_max = LINUX_RLIM_INFINITY; 2104 else 2105 lrlim.rlim_max = rlim.rlim_max; 2106 error = copyout(&lrlim, args->old, sizeof(lrlim)); 2107 if (error != 0) 2108 goto out; 2109 } 2110 2111 if (args->new != NULL) 2112 error = kern_proc_setrlimit(td, p, which, &nrlim); 2113 2114 out: 2115 PRELE(p); 2116 return (error); 2117 } 2118 2119 int 2120 linux_pselect6(struct thread *td, struct linux_pselect6_args *args) 2121 { 2122 struct timespec ts, *tsp; 2123 int error; 2124 2125 if (args->tsp != NULL) { 2126 error = linux_get_timespec(&ts, args->tsp); 2127 if (error != 0) 2128 return (error); 2129 tsp = &ts; 2130 } else 2131 tsp = NULL; 2132 2133 error = linux_common_pselect6(td, args->nfds, args->readfds, 2134 args->writefds, args->exceptfds, tsp, args->sig); 2135 2136 if (args->tsp != NULL) 2137 linux_put_timespec(&ts, args->tsp); 2138 return (error); 2139 } 2140 2141 static int 2142 linux_common_pselect6(struct thread *td, l_int nfds, l_fd_set *readfds, 2143 l_fd_set *writefds, l_fd_set *exceptfds, struct timespec *tsp, 2144 l_uintptr_t *sig) 2145 { 2146 struct timeval utv, tv0, tv1, *tvp; 2147 struct l_pselect6arg lpse6; 2148 sigset_t *ssp; 2149 sigset_t ss; 2150 int error; 2151 2152 ssp = NULL; 2153 if (sig != NULL) { 2154 error = copyin(sig, &lpse6, sizeof(lpse6)); 2155 if (error != 0) 2156 return (error); 2157 error = linux_copyin_sigset(td, PTRIN(lpse6.ss), 2158 lpse6.ss_len, &ss, &ssp); 2159 if (error != 0) 2160 return (error); 2161 } else 2162 ssp = NULL; 2163 2164 /* 2165 * Currently glibc changes nanosecond number to microsecond. 2166 * This mean losing precision but for now it is hardly seen. 2167 */ 2168 if (tsp != NULL) { 2169 TIMESPEC_TO_TIMEVAL(&utv, tsp); 2170 if (itimerfix(&utv)) 2171 return (EINVAL); 2172 2173 microtime(&tv0); 2174 tvp = &utv; 2175 } else 2176 tvp = NULL; 2177 2178 error = kern_pselect(td, nfds, readfds, writefds, 2179 exceptfds, tvp, ssp, LINUX_NFDBITS); 2180 2181 if (tsp != NULL) { 2182 /* 2183 * Compute how much time was left of the timeout, 2184 * by subtracting the current time and the time 2185 * before we started the call, and subtracting 2186 * that result from the user-supplied value. 2187 */ 2188 microtime(&tv1); 2189 timevalsub(&tv1, &tv0); 2190 timevalsub(&utv, &tv1); 2191 if (utv.tv_sec < 0) 2192 timevalclear(&utv); 2193 TIMEVAL_TO_TIMESPEC(&utv, tsp); 2194 } 2195 return (error); 2196 } 2197 2198 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 2199 int 2200 linux_pselect6_time64(struct thread *td, 2201 struct linux_pselect6_time64_args *args) 2202 { 2203 struct timespec ts, *tsp; 2204 int error; 2205 2206 if (args->tsp != NULL) { 2207 error = linux_get_timespec64(&ts, args->tsp); 2208 if (error != 0) 2209 return (error); 2210 tsp = &ts; 2211 } else 2212 tsp = NULL; 2213 2214 error = linux_common_pselect6(td, args->nfds, args->readfds, 2215 args->writefds, args->exceptfds, tsp, args->sig); 2216 2217 if (args->tsp != NULL) 2218 linux_put_timespec64(&ts, args->tsp); 2219 return (error); 2220 } 2221 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 2222 2223 int 2224 linux_ppoll(struct thread *td, struct linux_ppoll_args *args) 2225 { 2226 struct timespec uts, *tsp; 2227 int error; 2228 2229 if (args->tsp != NULL) { 2230 error = linux_get_timespec(&uts, args->tsp); 2231 if (error != 0) 2232 return (error); 2233 tsp = &uts; 2234 } else 2235 tsp = NULL; 2236 2237 error = linux_common_ppoll(td, args->fds, args->nfds, tsp, 2238 args->sset, args->ssize); 2239 if (error == 0 && args->tsp != NULL) 2240 error = linux_put_timespec(&uts, args->tsp); 2241 return (error); 2242 } 2243 2244 static int 2245 linux_common_ppoll(struct thread *td, struct pollfd *fds, uint32_t nfds, 2246 struct timespec *tsp, l_sigset_t *sset, l_size_t ssize) 2247 { 2248 struct timespec ts0, ts1; 2249 struct pollfd stackfds[32]; 2250 struct pollfd *kfds; 2251 sigset_t *ssp; 2252 sigset_t ss; 2253 int error; 2254 2255 if (kern_poll_maxfds(nfds)) 2256 return (EINVAL); 2257 if (sset != NULL) { 2258 error = linux_copyin_sigset(td, sset, ssize, &ss, &ssp); 2259 if (error != 0) 2260 return (error); 2261 } else 2262 ssp = NULL; 2263 if (tsp != NULL) 2264 nanotime(&ts0); 2265 2266 if (nfds > nitems(stackfds)) 2267 kfds = mallocarray(nfds, sizeof(*kfds), M_TEMP, M_WAITOK); 2268 else 2269 kfds = stackfds; 2270 error = linux_pollin(td, kfds, fds, nfds); 2271 if (error != 0) 2272 goto out; 2273 2274 error = kern_poll_kfds(td, kfds, nfds, tsp, ssp); 2275 if (error == 0) 2276 error = linux_pollout(td, kfds, fds, nfds); 2277 2278 if (error == 0 && tsp != NULL) { 2279 if (td->td_retval[0]) { 2280 nanotime(&ts1); 2281 timespecsub(&ts1, &ts0, &ts1); 2282 timespecsub(tsp, &ts1, tsp); 2283 if (tsp->tv_sec < 0) 2284 timespecclear(tsp); 2285 } else 2286 timespecclear(tsp); 2287 } 2288 2289 out: 2290 if (nfds > nitems(stackfds)) 2291 free(kfds, M_TEMP); 2292 return (error); 2293 } 2294 2295 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 2296 int 2297 linux_ppoll_time64(struct thread *td, struct linux_ppoll_time64_args *args) 2298 { 2299 struct timespec uts, *tsp; 2300 int error; 2301 2302 if (args->tsp != NULL) { 2303 error = linux_get_timespec64(&uts, args->tsp); 2304 if (error != 0) 2305 return (error); 2306 tsp = &uts; 2307 } else 2308 tsp = NULL; 2309 error = linux_common_ppoll(td, args->fds, args->nfds, tsp, 2310 args->sset, args->ssize); 2311 if (error == 0 && args->tsp != NULL) 2312 error = linux_put_timespec64(&uts, args->tsp); 2313 return (error); 2314 } 2315 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 2316 2317 static int 2318 linux_pollin(struct thread *td, struct pollfd *fds, struct pollfd *ufds, u_int nfd) 2319 { 2320 int error; 2321 u_int i; 2322 2323 error = copyin(ufds, fds, nfd * sizeof(*fds)); 2324 if (error != 0) 2325 return (error); 2326 2327 for (i = 0; i < nfd; i++) { 2328 if (fds->events != 0) 2329 linux_to_bsd_poll_events(td, fds->fd, 2330 fds->events, &fds->events); 2331 fds++; 2332 } 2333 return (0); 2334 } 2335 2336 static int 2337 linux_pollout(struct thread *td, struct pollfd *fds, struct pollfd *ufds, u_int nfd) 2338 { 2339 int error = 0; 2340 u_int i, n = 0; 2341 2342 for (i = 0; i < nfd; i++) { 2343 if (fds->revents != 0) { 2344 bsd_to_linux_poll_events(fds->revents, 2345 &fds->revents); 2346 n++; 2347 } 2348 error = copyout(&fds->revents, &ufds->revents, 2349 sizeof(ufds->revents)); 2350 if (error) 2351 return (error); 2352 fds++; 2353 ufds++; 2354 } 2355 td->td_retval[0] = n; 2356 return (0); 2357 } 2358 2359 static int 2360 linux_sched_rr_get_interval_common(struct thread *td, pid_t pid, 2361 struct timespec *ts) 2362 { 2363 struct thread *tdt; 2364 int error; 2365 2366 /* 2367 * According to man in case the invalid pid specified 2368 * EINVAL should be returned. 2369 */ 2370 if (pid < 0) 2371 return (EINVAL); 2372 2373 tdt = linux_tdfind(td, pid, -1); 2374 if (tdt == NULL) 2375 return (ESRCH); 2376 2377 error = kern_sched_rr_get_interval_td(td, tdt, ts); 2378 PROC_UNLOCK(tdt->td_proc); 2379 return (error); 2380 } 2381 2382 int 2383 linux_sched_rr_get_interval(struct thread *td, 2384 struct linux_sched_rr_get_interval_args *uap) 2385 { 2386 struct timespec ts; 2387 int error; 2388 2389 error = linux_sched_rr_get_interval_common(td, uap->pid, &ts); 2390 if (error != 0) 2391 return (error); 2392 return (linux_put_timespec(&ts, uap->interval)); 2393 } 2394 2395 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 2396 int 2397 linux_sched_rr_get_interval_time64(struct thread *td, 2398 struct linux_sched_rr_get_interval_time64_args *uap) 2399 { 2400 struct timespec ts; 2401 int error; 2402 2403 error = linux_sched_rr_get_interval_common(td, uap->pid, &ts); 2404 if (error != 0) 2405 return (error); 2406 return (linux_put_timespec64(&ts, uap->interval)); 2407 } 2408 #endif 2409 2410 /* 2411 * In case when the Linux thread is the initial thread in 2412 * the thread group thread id is equal to the process id. 2413 * Glibc depends on this magic (assert in pthread_getattr_np.c). 2414 */ 2415 struct thread * 2416 linux_tdfind(struct thread *td, lwpid_t tid, pid_t pid) 2417 { 2418 struct linux_emuldata *em; 2419 struct thread *tdt; 2420 struct proc *p; 2421 2422 tdt = NULL; 2423 if (tid == 0 || tid == td->td_tid) { 2424 if (pid != -1 && td->td_proc->p_pid != pid) 2425 return (NULL); 2426 PROC_LOCK(td->td_proc); 2427 return (td); 2428 } else if (tid > PID_MAX) 2429 return (tdfind(tid, pid)); 2430 2431 /* 2432 * Initial thread where the tid equal to the pid. 2433 */ 2434 p = pfind(tid); 2435 if (p != NULL) { 2436 if (SV_PROC_ABI(p) != SV_ABI_LINUX || 2437 (pid != -1 && tid != pid)) { 2438 /* 2439 * p is not a Linuxulator process. 2440 */ 2441 PROC_UNLOCK(p); 2442 return (NULL); 2443 } 2444 FOREACH_THREAD_IN_PROC(p, tdt) { 2445 em = em_find(tdt); 2446 if (tid == em->em_tid) 2447 return (tdt); 2448 } 2449 PROC_UNLOCK(p); 2450 } 2451 return (NULL); 2452 } 2453 2454 void 2455 linux_to_bsd_waitopts(int options, int *bsdopts) 2456 { 2457 2458 if (options & LINUX_WNOHANG) 2459 *bsdopts |= WNOHANG; 2460 if (options & LINUX_WUNTRACED) 2461 *bsdopts |= WUNTRACED; 2462 if (options & LINUX_WEXITED) 2463 *bsdopts |= WEXITED; 2464 if (options & LINUX_WCONTINUED) 2465 *bsdopts |= WCONTINUED; 2466 if (options & LINUX_WNOWAIT) 2467 *bsdopts |= WNOWAIT; 2468 2469 if (options & __WCLONE) 2470 *bsdopts |= WLINUXCLONE; 2471 } 2472 2473 int 2474 linux_getrandom(struct thread *td, struct linux_getrandom_args *args) 2475 { 2476 struct uio uio; 2477 struct iovec iov; 2478 int error; 2479 2480 if (args->flags & ~(LINUX_GRND_NONBLOCK|LINUX_GRND_RANDOM)) 2481 return (EINVAL); 2482 if (args->count > INT_MAX) 2483 args->count = INT_MAX; 2484 2485 iov.iov_base = args->buf; 2486 iov.iov_len = args->count; 2487 2488 uio.uio_iov = &iov; 2489 uio.uio_iovcnt = 1; 2490 uio.uio_resid = iov.iov_len; 2491 uio.uio_segflg = UIO_USERSPACE; 2492 uio.uio_rw = UIO_READ; 2493 uio.uio_td = td; 2494 2495 error = read_random_uio(&uio, args->flags & LINUX_GRND_NONBLOCK); 2496 if (error == 0) 2497 td->td_retval[0] = args->count - uio.uio_resid; 2498 return (error); 2499 } 2500 2501 int 2502 linux_mincore(struct thread *td, struct linux_mincore_args *args) 2503 { 2504 2505 /* Needs to be page-aligned */ 2506 if (args->start & PAGE_MASK) 2507 return (EINVAL); 2508 return (kern_mincore(td, args->start, args->len, args->vec)); 2509 } 2510 2511 #define SYSLOG_TAG "<6>" 2512 2513 int 2514 linux_syslog(struct thread *td, struct linux_syslog_args *args) 2515 { 2516 char buf[128], *src, *dst; 2517 u_int seq; 2518 int buflen, error; 2519 2520 if (args->type != LINUX_SYSLOG_ACTION_READ_ALL) { 2521 linux_msg(td, "syslog unsupported type 0x%x", args->type); 2522 return (EINVAL); 2523 } 2524 2525 if (args->len < 6) { 2526 td->td_retval[0] = 0; 2527 return (0); 2528 } 2529 2530 error = priv_check(td, PRIV_MSGBUF); 2531 if (error) 2532 return (error); 2533 2534 mtx_lock(&msgbuf_lock); 2535 msgbuf_peekbytes(msgbufp, NULL, 0, &seq); 2536 mtx_unlock(&msgbuf_lock); 2537 2538 dst = args->buf; 2539 error = copyout(&SYSLOG_TAG, dst, sizeof(SYSLOG_TAG)); 2540 /* The -1 is to skip the trailing '\0'. */ 2541 dst += sizeof(SYSLOG_TAG) - 1; 2542 2543 while (error == 0) { 2544 mtx_lock(&msgbuf_lock); 2545 buflen = msgbuf_peekbytes(msgbufp, buf, sizeof(buf), &seq); 2546 mtx_unlock(&msgbuf_lock); 2547 2548 if (buflen == 0) 2549 break; 2550 2551 for (src = buf; src < buf + buflen && error == 0; src++) { 2552 if (*src == '\0') 2553 continue; 2554 2555 if (dst >= args->buf + args->len) 2556 goto out; 2557 2558 error = copyout(src, dst, 1); 2559 dst++; 2560 2561 if (*src == '\n' && *(src + 1) != '<' && 2562 dst + sizeof(SYSLOG_TAG) < args->buf + args->len) { 2563 error = copyout(&SYSLOG_TAG, 2564 dst, sizeof(SYSLOG_TAG)); 2565 dst += sizeof(SYSLOG_TAG) - 1; 2566 } 2567 } 2568 } 2569 out: 2570 td->td_retval[0] = dst - args->buf; 2571 return (error); 2572 } 2573 2574 int 2575 linux_getcpu(struct thread *td, struct linux_getcpu_args *args) 2576 { 2577 int cpu, error, node; 2578 2579 cpu = td->td_oncpu; /* Make sure it doesn't change during copyout(9) */ 2580 error = 0; 2581 node = cpuid_to_pcpu[cpu]->pc_domain; 2582 2583 if (args->cpu != NULL) 2584 error = copyout(&cpu, args->cpu, sizeof(l_int)); 2585 if (args->node != NULL) 2586 error = copyout(&node, args->node, sizeof(l_int)); 2587 return (error); 2588 } 2589 2590 #if defined(__i386__) || defined(__amd64__) 2591 int 2592 linux_poll(struct thread *td, struct linux_poll_args *args) 2593 { 2594 struct timespec ts, *tsp; 2595 2596 if (args->timeout != INFTIM) { 2597 if (args->timeout < 0) 2598 return (EINVAL); 2599 ts.tv_sec = args->timeout / 1000; 2600 ts.tv_nsec = (args->timeout % 1000) * 1000000; 2601 tsp = &ts; 2602 } else 2603 tsp = NULL; 2604 2605 return (linux_common_ppoll(td, args->fds, args->nfds, 2606 tsp, NULL, 0)); 2607 } 2608 #endif /* __i386__ || __amd64__ */ 2609 2610 int 2611 linux_seccomp(struct thread *td, struct linux_seccomp_args *args) 2612 { 2613 2614 switch (args->op) { 2615 case LINUX_SECCOMP_GET_ACTION_AVAIL: 2616 return (EOPNOTSUPP); 2617 default: 2618 /* 2619 * Ignore unknown operations, just like Linux kernel built 2620 * without CONFIG_SECCOMP. 2621 */ 2622 return (EINVAL); 2623 } 2624 } 2625 2626 #ifndef COMPAT_LINUX32 2627 int 2628 linux_execve(struct thread *td, struct linux_execve_args *args) 2629 { 2630 struct image_args eargs; 2631 char *path; 2632 int error; 2633 2634 LINUX_CTR(execve); 2635 2636 if (!LUSECONVPATH(td)) { 2637 error = exec_copyin_args(&eargs, args->path, UIO_USERSPACE, 2638 args->argp, args->envp); 2639 } else { 2640 LCONVPATHEXIST(args->path, &path); 2641 error = exec_copyin_args(&eargs, path, UIO_SYSSPACE, args->argp, 2642 args->envp); 2643 LFREEPATH(path); 2644 } 2645 if (error == 0) 2646 error = linux_common_execve(td, &eargs); 2647 AUDIT_SYSCALL_EXIT(error == EJUSTRETURN ? 0 : error, td); 2648 return (error); 2649 } 2650 #endif 2651