1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2002 Doug Rabson 5 * Copyright (c) 1994-1995 Søren Schmidt 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer 13 * in this position and unchanged. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. The name of the author may not be used to endorse or promote products 18 * derived from this software without specific prior written permission 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 23 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 29 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include <sys/param.h> 36 #include <sys/fcntl.h> 37 #include <sys/jail.h> 38 #include <sys/imgact.h> 39 #include <sys/limits.h> 40 #include <sys/lock.h> 41 #include <sys/msgbuf.h> 42 #include <sys/mutex.h> 43 #include <sys/poll.h> 44 #include <sys/priv.h> 45 #include <sys/proc.h> 46 #include <sys/procctl.h> 47 #include <sys/reboot.h> 48 #include <sys/random.h> 49 #include <sys/resourcevar.h> 50 #include <sys/sched.h> 51 #include <sys/smp.h> 52 #include <sys/stat.h> 53 #include <sys/syscallsubr.h> 54 #include <sys/sysctl.h> 55 #include <sys/sysent.h> 56 #include <sys/sysproto.h> 57 #include <sys/time.h> 58 #include <sys/vmmeter.h> 59 #include <sys/vnode.h> 60 61 #include <security/audit/audit.h> 62 #include <security/mac/mac_framework.h> 63 64 #include <vm/pmap.h> 65 #include <vm/vm_map.h> 66 #include <vm/swap_pager.h> 67 68 #ifdef COMPAT_LINUX32 69 #include <machine/../linux32/linux.h> 70 #include <machine/../linux32/linux32_proto.h> 71 #else 72 #include <machine/../linux/linux.h> 73 #include <machine/../linux/linux_proto.h> 74 #endif 75 76 #include <compat/linux/linux_common.h> 77 #include <compat/linux/linux_dtrace.h> 78 #include <compat/linux/linux_file.h> 79 #include <compat/linux/linux_mib.h> 80 #include <compat/linux/linux_signal.h> 81 #include <compat/linux/linux_time.h> 82 #include <compat/linux/linux_util.h> 83 #include <compat/linux/linux_sysproto.h> 84 #include <compat/linux/linux_emul.h> 85 #include <compat/linux/linux_misc.h> 86 87 int stclohz; /* Statistics clock frequency */ 88 89 static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] = { 90 RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK, 91 RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE, 92 RLIMIT_MEMLOCK, RLIMIT_AS 93 }; 94 95 struct l_sysinfo { 96 l_long uptime; /* Seconds since boot */ 97 l_ulong loads[3]; /* 1, 5, and 15 minute load averages */ 98 #define LINUX_SYSINFO_LOADS_SCALE 65536 99 l_ulong totalram; /* Total usable main memory size */ 100 l_ulong freeram; /* Available memory size */ 101 l_ulong sharedram; /* Amount of shared memory */ 102 l_ulong bufferram; /* Memory used by buffers */ 103 l_ulong totalswap; /* Total swap space size */ 104 l_ulong freeswap; /* swap space still available */ 105 l_ushort procs; /* Number of current processes */ 106 l_ushort pads; 107 l_ulong totalhigh; 108 l_ulong freehigh; 109 l_uint mem_unit; 110 char _f[20-2*sizeof(l_long)-sizeof(l_int)]; /* padding */ 111 }; 112 113 struct l_pselect6arg { 114 l_uintptr_t ss; 115 l_size_t ss_len; 116 }; 117 118 static int linux_utimensat_lts_to_ts(struct l_timespec *, 119 struct timespec *); 120 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 121 static int linux_utimensat_lts64_to_ts(struct l_timespec64 *, 122 struct timespec *); 123 #endif 124 static int linux_common_utimensat(struct thread *, int, 125 const char *, struct timespec *, int); 126 static int linux_common_pselect6(struct thread *, l_int, 127 l_fd_set *, l_fd_set *, l_fd_set *, 128 struct timespec *, l_uintptr_t *); 129 static int linux_common_ppoll(struct thread *, struct pollfd *, 130 uint32_t, struct timespec *, l_sigset_t *, 131 l_size_t); 132 static int linux_pollin(struct thread *, struct pollfd *, 133 struct pollfd *, u_int); 134 static int linux_pollout(struct thread *, struct pollfd *, 135 struct pollfd *, u_int); 136 137 int 138 linux_sysinfo(struct thread *td, struct linux_sysinfo_args *args) 139 { 140 struct l_sysinfo sysinfo; 141 int i, j; 142 struct timespec ts; 143 144 bzero(&sysinfo, sizeof(sysinfo)); 145 getnanouptime(&ts); 146 if (ts.tv_nsec != 0) 147 ts.tv_sec++; 148 sysinfo.uptime = ts.tv_sec; 149 150 /* Use the information from the mib to get our load averages */ 151 for (i = 0; i < 3; i++) 152 sysinfo.loads[i] = averunnable.ldavg[i] * 153 LINUX_SYSINFO_LOADS_SCALE / averunnable.fscale; 154 155 sysinfo.totalram = physmem * PAGE_SIZE; 156 sysinfo.freeram = (u_long)vm_free_count() * PAGE_SIZE; 157 158 /* 159 * sharedram counts pages allocated to named, swap-backed objects such 160 * as shared memory segments and tmpfs files. There is no cheap way to 161 * compute this, so just leave the field unpopulated. Linux itself only 162 * started setting this field in the 3.x timeframe. 163 */ 164 sysinfo.sharedram = 0; 165 sysinfo.bufferram = 0; 166 167 swap_pager_status(&i, &j); 168 sysinfo.totalswap = i * PAGE_SIZE; 169 sysinfo.freeswap = (i - j) * PAGE_SIZE; 170 171 sysinfo.procs = nprocs; 172 173 /* 174 * Platforms supported by the emulation layer do not have a notion of 175 * high memory. 176 */ 177 sysinfo.totalhigh = 0; 178 sysinfo.freehigh = 0; 179 180 sysinfo.mem_unit = 1; 181 182 return (copyout(&sysinfo, args->info, sizeof(sysinfo))); 183 } 184 185 #ifdef LINUX_LEGACY_SYSCALLS 186 int 187 linux_alarm(struct thread *td, struct linux_alarm_args *args) 188 { 189 struct itimerval it, old_it; 190 u_int secs; 191 int error __diagused; 192 193 secs = args->secs; 194 /* 195 * Linux alarm() is always successful. Limit secs to INT32_MAX / 2 196 * to match kern_setitimer()'s limit to avoid error from it. 197 * 198 * XXX. Linux limit secs to INT_MAX on 32 and does not limit on 64-bit 199 * platforms. 200 */ 201 if (secs > INT32_MAX / 2) 202 secs = INT32_MAX / 2; 203 204 it.it_value.tv_sec = secs; 205 it.it_value.tv_usec = 0; 206 timevalclear(&it.it_interval); 207 error = kern_setitimer(td, ITIMER_REAL, &it, &old_it); 208 KASSERT(error == 0, ("kern_setitimer returns %d", error)); 209 210 if ((old_it.it_value.tv_sec == 0 && old_it.it_value.tv_usec > 0) || 211 old_it.it_value.tv_usec >= 500000) 212 old_it.it_value.tv_sec++; 213 td->td_retval[0] = old_it.it_value.tv_sec; 214 return (0); 215 } 216 #endif 217 218 int 219 linux_brk(struct thread *td, struct linux_brk_args *args) 220 { 221 struct vmspace *vm = td->td_proc->p_vmspace; 222 uintptr_t new, old; 223 224 old = (uintptr_t)vm->vm_daddr + ctob(vm->vm_dsize); 225 new = (uintptr_t)args->dsend; 226 if ((caddr_t)new > vm->vm_daddr && !kern_break(td, &new)) 227 td->td_retval[0] = (register_t)new; 228 else 229 td->td_retval[0] = (register_t)old; 230 231 return (0); 232 } 233 234 #ifdef LINUX_LEGACY_SYSCALLS 235 int 236 linux_select(struct thread *td, struct linux_select_args *args) 237 { 238 l_timeval ltv; 239 struct timeval tv0, tv1, utv, *tvp; 240 int error; 241 242 /* 243 * Store current time for computation of the amount of 244 * time left. 245 */ 246 if (args->timeout) { 247 if ((error = copyin(args->timeout, <v, sizeof(ltv)))) 248 goto select_out; 249 utv.tv_sec = ltv.tv_sec; 250 utv.tv_usec = ltv.tv_usec; 251 252 if (itimerfix(&utv)) { 253 /* 254 * The timeval was invalid. Convert it to something 255 * valid that will act as it does under Linux. 256 */ 257 utv.tv_sec += utv.tv_usec / 1000000; 258 utv.tv_usec %= 1000000; 259 if (utv.tv_usec < 0) { 260 utv.tv_sec -= 1; 261 utv.tv_usec += 1000000; 262 } 263 if (utv.tv_sec < 0) 264 timevalclear(&utv); 265 } 266 microtime(&tv0); 267 tvp = &utv; 268 } else 269 tvp = NULL; 270 271 error = kern_select(td, args->nfds, args->readfds, args->writefds, 272 args->exceptfds, tvp, LINUX_NFDBITS); 273 if (error) 274 goto select_out; 275 276 if (args->timeout) { 277 if (td->td_retval[0]) { 278 /* 279 * Compute how much time was left of the timeout, 280 * by subtracting the current time and the time 281 * before we started the call, and subtracting 282 * that result from the user-supplied value. 283 */ 284 microtime(&tv1); 285 timevalsub(&tv1, &tv0); 286 timevalsub(&utv, &tv1); 287 if (utv.tv_sec < 0) 288 timevalclear(&utv); 289 } else 290 timevalclear(&utv); 291 ltv.tv_sec = utv.tv_sec; 292 ltv.tv_usec = utv.tv_usec; 293 if ((error = copyout(<v, args->timeout, sizeof(ltv)))) 294 goto select_out; 295 } 296 297 select_out: 298 return (error); 299 } 300 #endif 301 302 int 303 linux_mremap(struct thread *td, struct linux_mremap_args *args) 304 { 305 uintptr_t addr; 306 size_t len; 307 int error = 0; 308 309 if (args->flags & ~(LINUX_MREMAP_FIXED | LINUX_MREMAP_MAYMOVE)) { 310 td->td_retval[0] = 0; 311 return (EINVAL); 312 } 313 314 /* 315 * Check for the page alignment. 316 * Linux defines PAGE_MASK to be FreeBSD ~PAGE_MASK. 317 */ 318 if (args->addr & PAGE_MASK) { 319 td->td_retval[0] = 0; 320 return (EINVAL); 321 } 322 323 args->new_len = round_page(args->new_len); 324 args->old_len = round_page(args->old_len); 325 326 if (args->new_len > args->old_len) { 327 td->td_retval[0] = 0; 328 return (ENOMEM); 329 } 330 331 if (args->new_len < args->old_len) { 332 addr = args->addr + args->new_len; 333 len = args->old_len - args->new_len; 334 error = kern_munmap(td, addr, len); 335 } 336 337 td->td_retval[0] = error ? 0 : (uintptr_t)args->addr; 338 return (error); 339 } 340 341 #define LINUX_MS_ASYNC 0x0001 342 #define LINUX_MS_INVALIDATE 0x0002 343 #define LINUX_MS_SYNC 0x0004 344 345 int 346 linux_msync(struct thread *td, struct linux_msync_args *args) 347 { 348 349 return (kern_msync(td, args->addr, args->len, 350 args->fl & ~LINUX_MS_SYNC)); 351 } 352 353 #ifdef LINUX_LEGACY_SYSCALLS 354 int 355 linux_time(struct thread *td, struct linux_time_args *args) 356 { 357 struct timeval tv; 358 l_time_t tm; 359 int error; 360 361 microtime(&tv); 362 tm = tv.tv_sec; 363 if (args->tm && (error = copyout(&tm, args->tm, sizeof(tm)))) 364 return (error); 365 td->td_retval[0] = tm; 366 return (0); 367 } 368 #endif 369 370 struct l_times_argv { 371 l_clock_t tms_utime; 372 l_clock_t tms_stime; 373 l_clock_t tms_cutime; 374 l_clock_t tms_cstime; 375 }; 376 377 /* 378 * Glibc versions prior to 2.2.1 always use hard-coded CLK_TCK value. 379 * Since 2.2.1 Glibc uses value exported from kernel via AT_CLKTCK 380 * auxiliary vector entry. 381 */ 382 #define CLK_TCK 100 383 384 #define CONVOTCK(r) (r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK)) 385 #define CONVNTCK(r) (r.tv_sec * stclohz + r.tv_usec / (1000000 / stclohz)) 386 387 #define CONVTCK(r) (linux_kernver(td) >= LINUX_KERNVER(2,4,0) ? \ 388 CONVNTCK(r) : CONVOTCK(r)) 389 390 int 391 linux_times(struct thread *td, struct linux_times_args *args) 392 { 393 struct timeval tv, utime, stime, cutime, cstime; 394 struct l_times_argv tms; 395 struct proc *p; 396 int error; 397 398 if (args->buf != NULL) { 399 p = td->td_proc; 400 PROC_LOCK(p); 401 PROC_STATLOCK(p); 402 calcru(p, &utime, &stime); 403 PROC_STATUNLOCK(p); 404 calccru(p, &cutime, &cstime); 405 PROC_UNLOCK(p); 406 407 tms.tms_utime = CONVTCK(utime); 408 tms.tms_stime = CONVTCK(stime); 409 410 tms.tms_cutime = CONVTCK(cutime); 411 tms.tms_cstime = CONVTCK(cstime); 412 413 if ((error = copyout(&tms, args->buf, sizeof(tms)))) 414 return (error); 415 } 416 417 microuptime(&tv); 418 td->td_retval[0] = (int)CONVTCK(tv); 419 return (0); 420 } 421 422 int 423 linux_newuname(struct thread *td, struct linux_newuname_args *args) 424 { 425 struct l_new_utsname utsname; 426 char osname[LINUX_MAX_UTSNAME]; 427 char osrelease[LINUX_MAX_UTSNAME]; 428 char *p; 429 430 linux_get_osname(td, osname); 431 linux_get_osrelease(td, osrelease); 432 433 bzero(&utsname, sizeof(utsname)); 434 strlcpy(utsname.sysname, osname, LINUX_MAX_UTSNAME); 435 getcredhostname(td->td_ucred, utsname.nodename, LINUX_MAX_UTSNAME); 436 getcreddomainname(td->td_ucred, utsname.domainname, LINUX_MAX_UTSNAME); 437 strlcpy(utsname.release, osrelease, LINUX_MAX_UTSNAME); 438 strlcpy(utsname.version, version, LINUX_MAX_UTSNAME); 439 for (p = utsname.version; *p != '\0'; ++p) 440 if (*p == '\n') { 441 *p = '\0'; 442 break; 443 } 444 #if defined(__amd64__) 445 /* 446 * On amd64, Linux uname(2) needs to return "x86_64" 447 * for both 64-bit and 32-bit applications. On 32-bit, 448 * the string returned by getauxval(AT_PLATFORM) needs 449 * to remain "i686", though. 450 */ 451 #if defined(COMPAT_LINUX32) 452 if (linux32_emulate_i386) 453 strlcpy(utsname.machine, "i686", LINUX_MAX_UTSNAME); 454 else 455 #endif 456 strlcpy(utsname.machine, "x86_64", LINUX_MAX_UTSNAME); 457 #elif defined(__aarch64__) 458 strlcpy(utsname.machine, "aarch64", LINUX_MAX_UTSNAME); 459 #elif defined(__i386__) 460 strlcpy(utsname.machine, "i686", LINUX_MAX_UTSNAME); 461 #endif 462 463 return (copyout(&utsname, args->buf, sizeof(utsname))); 464 } 465 466 struct l_utimbuf { 467 l_time_t l_actime; 468 l_time_t l_modtime; 469 }; 470 471 #ifdef LINUX_LEGACY_SYSCALLS 472 int 473 linux_utime(struct thread *td, struct linux_utime_args *args) 474 { 475 struct timeval tv[2], *tvp; 476 struct l_utimbuf lut; 477 int error; 478 479 if (args->times) { 480 if ((error = copyin(args->times, &lut, sizeof lut)) != 0) 481 return (error); 482 tv[0].tv_sec = lut.l_actime; 483 tv[0].tv_usec = 0; 484 tv[1].tv_sec = lut.l_modtime; 485 tv[1].tv_usec = 0; 486 tvp = tv; 487 } else 488 tvp = NULL; 489 490 return (kern_utimesat(td, AT_FDCWD, args->fname, UIO_USERSPACE, 491 tvp, UIO_SYSSPACE)); 492 } 493 #endif 494 495 #ifdef LINUX_LEGACY_SYSCALLS 496 int 497 linux_utimes(struct thread *td, struct linux_utimes_args *args) 498 { 499 l_timeval ltv[2]; 500 struct timeval tv[2], *tvp = NULL; 501 int error; 502 503 if (args->tptr != NULL) { 504 if ((error = copyin(args->tptr, ltv, sizeof ltv)) != 0) 505 return (error); 506 tv[0].tv_sec = ltv[0].tv_sec; 507 tv[0].tv_usec = ltv[0].tv_usec; 508 tv[1].tv_sec = ltv[1].tv_sec; 509 tv[1].tv_usec = ltv[1].tv_usec; 510 tvp = tv; 511 } 512 513 return (kern_utimesat(td, AT_FDCWD, args->fname, UIO_USERSPACE, 514 tvp, UIO_SYSSPACE)); 515 } 516 #endif 517 518 static int 519 linux_utimensat_lts_to_ts(struct l_timespec *l_times, struct timespec *times) 520 { 521 522 if (l_times->tv_nsec != LINUX_UTIME_OMIT && 523 l_times->tv_nsec != LINUX_UTIME_NOW && 524 (l_times->tv_nsec < 0 || l_times->tv_nsec > 999999999)) 525 return (EINVAL); 526 527 times->tv_sec = l_times->tv_sec; 528 switch (l_times->tv_nsec) 529 { 530 case LINUX_UTIME_OMIT: 531 times->tv_nsec = UTIME_OMIT; 532 break; 533 case LINUX_UTIME_NOW: 534 times->tv_nsec = UTIME_NOW; 535 break; 536 default: 537 times->tv_nsec = l_times->tv_nsec; 538 } 539 540 return (0); 541 } 542 543 static int 544 linux_common_utimensat(struct thread *td, int ldfd, const char *pathname, 545 struct timespec *timesp, int lflags) 546 { 547 int dfd, flags = 0; 548 549 dfd = (ldfd == LINUX_AT_FDCWD) ? AT_FDCWD : ldfd; 550 551 if (lflags & ~(LINUX_AT_SYMLINK_NOFOLLOW | LINUX_AT_EMPTY_PATH)) 552 return (EINVAL); 553 554 if (timesp != NULL) { 555 /* This breaks POSIX, but is what the Linux kernel does 556 * _on purpose_ (documented in the man page for utimensat(2)), 557 * so we must follow that behaviour. */ 558 if (timesp[0].tv_nsec == UTIME_OMIT && 559 timesp[1].tv_nsec == UTIME_OMIT) 560 return (0); 561 } 562 563 if (lflags & LINUX_AT_SYMLINK_NOFOLLOW) 564 flags |= AT_SYMLINK_NOFOLLOW; 565 if (lflags & LINUX_AT_EMPTY_PATH) 566 flags |= AT_EMPTY_PATH; 567 568 if (pathname != NULL) 569 return (kern_utimensat(td, dfd, pathname, 570 UIO_USERSPACE, timesp, UIO_SYSSPACE, flags)); 571 572 if (lflags != 0) 573 return (EINVAL); 574 575 return (kern_futimens(td, dfd, timesp, UIO_SYSSPACE)); 576 } 577 578 int 579 linux_utimensat(struct thread *td, struct linux_utimensat_args *args) 580 { 581 struct l_timespec l_times[2]; 582 struct timespec times[2], *timesp; 583 int error; 584 585 if (args->times != NULL) { 586 error = copyin(args->times, l_times, sizeof(l_times)); 587 if (error != 0) 588 return (error); 589 590 error = linux_utimensat_lts_to_ts(&l_times[0], ×[0]); 591 if (error != 0) 592 return (error); 593 error = linux_utimensat_lts_to_ts(&l_times[1], ×[1]); 594 if (error != 0) 595 return (error); 596 timesp = times; 597 } else 598 timesp = NULL; 599 600 return (linux_common_utimensat(td, args->dfd, args->pathname, 601 timesp, args->flags)); 602 } 603 604 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 605 static int 606 linux_utimensat_lts64_to_ts(struct l_timespec64 *l_times, struct timespec *times) 607 { 608 609 /* Zero out the padding in compat mode. */ 610 l_times->tv_nsec &= 0xFFFFFFFFUL; 611 612 if (l_times->tv_nsec != LINUX_UTIME_OMIT && 613 l_times->tv_nsec != LINUX_UTIME_NOW && 614 (l_times->tv_nsec < 0 || l_times->tv_nsec > 999999999)) 615 return (EINVAL); 616 617 times->tv_sec = l_times->tv_sec; 618 switch (l_times->tv_nsec) 619 { 620 case LINUX_UTIME_OMIT: 621 times->tv_nsec = UTIME_OMIT; 622 break; 623 case LINUX_UTIME_NOW: 624 times->tv_nsec = UTIME_NOW; 625 break; 626 default: 627 times->tv_nsec = l_times->tv_nsec; 628 } 629 630 return (0); 631 } 632 633 int 634 linux_utimensat_time64(struct thread *td, struct linux_utimensat_time64_args *args) 635 { 636 struct l_timespec64 l_times[2]; 637 struct timespec times[2], *timesp; 638 int error; 639 640 if (args->times64 != NULL) { 641 error = copyin(args->times64, l_times, sizeof(l_times)); 642 if (error != 0) 643 return (error); 644 645 error = linux_utimensat_lts64_to_ts(&l_times[0], ×[0]); 646 if (error != 0) 647 return (error); 648 error = linux_utimensat_lts64_to_ts(&l_times[1], ×[1]); 649 if (error != 0) 650 return (error); 651 timesp = times; 652 } else 653 timesp = NULL; 654 655 return (linux_common_utimensat(td, args->dfd, args->pathname, 656 timesp, args->flags)); 657 } 658 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 659 660 #ifdef LINUX_LEGACY_SYSCALLS 661 int 662 linux_futimesat(struct thread *td, struct linux_futimesat_args *args) 663 { 664 l_timeval ltv[2]; 665 struct timeval tv[2], *tvp = NULL; 666 int error, dfd; 667 668 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; 669 670 if (args->utimes != NULL) { 671 if ((error = copyin(args->utimes, ltv, sizeof ltv)) != 0) 672 return (error); 673 tv[0].tv_sec = ltv[0].tv_sec; 674 tv[0].tv_usec = ltv[0].tv_usec; 675 tv[1].tv_sec = ltv[1].tv_sec; 676 tv[1].tv_usec = ltv[1].tv_usec; 677 tvp = tv; 678 } 679 680 return (kern_utimesat(td, dfd, args->filename, UIO_USERSPACE, 681 tvp, UIO_SYSSPACE)); 682 } 683 #endif 684 685 static int 686 linux_common_wait(struct thread *td, idtype_t idtype, int id, int *statusp, 687 int options, void *rup, l_siginfo_t *infop) 688 { 689 l_siginfo_t lsi; 690 siginfo_t siginfo; 691 struct __wrusage wru; 692 int error, status, tmpstat, sig; 693 694 error = kern_wait6(td, idtype, id, &status, options, 695 rup != NULL ? &wru : NULL, &siginfo); 696 697 if (error == 0 && statusp) { 698 tmpstat = status & 0xffff; 699 if (WIFSIGNALED(tmpstat)) { 700 tmpstat = (tmpstat & 0xffffff80) | 701 bsd_to_linux_signal(WTERMSIG(tmpstat)); 702 } else if (WIFSTOPPED(tmpstat)) { 703 tmpstat = (tmpstat & 0xffff00ff) | 704 (bsd_to_linux_signal(WSTOPSIG(tmpstat)) << 8); 705 #if defined(__aarch64__) || (defined(__amd64__) && !defined(COMPAT_LINUX32)) 706 if (WSTOPSIG(status) == SIGTRAP) { 707 tmpstat = linux_ptrace_status(td, 708 siginfo.si_pid, tmpstat); 709 } 710 #endif 711 } else if (WIFCONTINUED(tmpstat)) { 712 tmpstat = 0xffff; 713 } 714 error = copyout(&tmpstat, statusp, sizeof(int)); 715 } 716 if (error == 0 && rup != NULL) 717 error = linux_copyout_rusage(&wru.wru_self, rup); 718 if (error == 0 && infop != NULL && td->td_retval[0] != 0) { 719 sig = bsd_to_linux_signal(siginfo.si_signo); 720 siginfo_to_lsiginfo(&siginfo, &lsi, sig); 721 error = copyout(&lsi, infop, sizeof(lsi)); 722 } 723 724 return (error); 725 } 726 727 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 728 int 729 linux_waitpid(struct thread *td, struct linux_waitpid_args *args) 730 { 731 struct linux_wait4_args wait4_args = { 732 .pid = args->pid, 733 .status = args->status, 734 .options = args->options, 735 .rusage = NULL, 736 }; 737 738 return (linux_wait4(td, &wait4_args)); 739 } 740 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 741 742 int 743 linux_wait4(struct thread *td, struct linux_wait4_args *args) 744 { 745 struct proc *p; 746 int options, id, idtype; 747 748 if (args->options & ~(LINUX_WUNTRACED | LINUX_WNOHANG | 749 LINUX_WCONTINUED | __WCLONE | __WNOTHREAD | __WALL)) 750 return (EINVAL); 751 752 /* -INT_MIN is not defined. */ 753 if (args->pid == INT_MIN) 754 return (ESRCH); 755 756 options = 0; 757 linux_to_bsd_waitopts(args->options, &options); 758 759 /* 760 * For backward compatibility we implicitly add flags WEXITED 761 * and WTRAPPED here. 762 */ 763 options |= WEXITED | WTRAPPED; 764 765 if (args->pid == WAIT_ANY) { 766 idtype = P_ALL; 767 id = 0; 768 } else if (args->pid < 0) { 769 idtype = P_PGID; 770 id = (id_t)-args->pid; 771 } else if (args->pid == 0) { 772 idtype = P_PGID; 773 p = td->td_proc; 774 PROC_LOCK(p); 775 id = p->p_pgid; 776 PROC_UNLOCK(p); 777 } else { 778 idtype = P_PID; 779 id = (id_t)args->pid; 780 } 781 782 return (linux_common_wait(td, idtype, id, args->status, options, 783 args->rusage, NULL)); 784 } 785 786 int 787 linux_waitid(struct thread *td, struct linux_waitid_args *args) 788 { 789 idtype_t idtype; 790 int error, options; 791 struct proc *p; 792 pid_t id; 793 794 if (args->options & ~(LINUX_WNOHANG | LINUX_WNOWAIT | LINUX_WEXITED | 795 LINUX_WSTOPPED | LINUX_WCONTINUED | __WCLONE | __WNOTHREAD | __WALL)) 796 return (EINVAL); 797 798 options = 0; 799 linux_to_bsd_waitopts(args->options, &options); 800 801 id = args->id; 802 switch (args->idtype) { 803 case LINUX_P_ALL: 804 idtype = P_ALL; 805 break; 806 case LINUX_P_PID: 807 if (args->id <= 0) 808 return (EINVAL); 809 idtype = P_PID; 810 break; 811 case LINUX_P_PGID: 812 if (linux_kernver(td) >= LINUX_KERNVER(5,4,0) && args->id == 0) { 813 p = td->td_proc; 814 PROC_LOCK(p); 815 id = p->p_pgid; 816 PROC_UNLOCK(p); 817 } else if (args->id <= 0) 818 return (EINVAL); 819 idtype = P_PGID; 820 break; 821 case LINUX_P_PIDFD: 822 LINUX_RATELIMIT_MSG("unsupported waitid P_PIDFD idtype"); 823 return (ENOSYS); 824 default: 825 return (EINVAL); 826 } 827 828 error = linux_common_wait(td, idtype, id, NULL, options, 829 args->rusage, args->info); 830 td->td_retval[0] = 0; 831 832 return (error); 833 } 834 835 #ifdef LINUX_LEGACY_SYSCALLS 836 int 837 linux_mknod(struct thread *td, struct linux_mknod_args *args) 838 { 839 int error; 840 841 switch (args->mode & S_IFMT) { 842 case S_IFIFO: 843 case S_IFSOCK: 844 error = kern_mkfifoat(td, AT_FDCWD, args->path, UIO_USERSPACE, 845 args->mode); 846 break; 847 848 case S_IFCHR: 849 case S_IFBLK: 850 error = kern_mknodat(td, AT_FDCWD, args->path, UIO_USERSPACE, 851 args->mode, linux_decode_dev(args->dev)); 852 break; 853 854 case S_IFDIR: 855 error = EPERM; 856 break; 857 858 case 0: 859 args->mode |= S_IFREG; 860 /* FALLTHROUGH */ 861 case S_IFREG: 862 error = kern_openat(td, AT_FDCWD, args->path, UIO_USERSPACE, 863 O_WRONLY | O_CREAT | O_TRUNC, args->mode); 864 if (error == 0) 865 kern_close(td, td->td_retval[0]); 866 break; 867 868 default: 869 error = EINVAL; 870 break; 871 } 872 return (error); 873 } 874 #endif 875 876 int 877 linux_mknodat(struct thread *td, struct linux_mknodat_args *args) 878 { 879 int error, dfd; 880 881 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; 882 883 switch (args->mode & S_IFMT) { 884 case S_IFIFO: 885 case S_IFSOCK: 886 error = kern_mkfifoat(td, dfd, args->filename, UIO_USERSPACE, 887 args->mode); 888 break; 889 890 case S_IFCHR: 891 case S_IFBLK: 892 error = kern_mknodat(td, dfd, args->filename, UIO_USERSPACE, 893 args->mode, linux_decode_dev(args->dev)); 894 break; 895 896 case S_IFDIR: 897 error = EPERM; 898 break; 899 900 case 0: 901 args->mode |= S_IFREG; 902 /* FALLTHROUGH */ 903 case S_IFREG: 904 error = kern_openat(td, dfd, args->filename, UIO_USERSPACE, 905 O_WRONLY | O_CREAT | O_TRUNC, args->mode); 906 if (error == 0) 907 kern_close(td, td->td_retval[0]); 908 break; 909 910 default: 911 error = EINVAL; 912 break; 913 } 914 return (error); 915 } 916 917 /* 918 * UGH! This is just about the dumbest idea I've ever heard!! 919 */ 920 int 921 linux_personality(struct thread *td, struct linux_personality_args *args) 922 { 923 struct linux_pemuldata *pem; 924 struct proc *p = td->td_proc; 925 uint32_t old; 926 927 PROC_LOCK(p); 928 pem = pem_find(p); 929 old = pem->persona; 930 if (args->per != 0xffffffff) 931 pem->persona = args->per; 932 PROC_UNLOCK(p); 933 934 td->td_retval[0] = old; 935 return (0); 936 } 937 938 struct l_itimerval { 939 l_timeval it_interval; 940 l_timeval it_value; 941 }; 942 943 #define B2L_ITIMERVAL(bip, lip) \ 944 (bip)->it_interval.tv_sec = (lip)->it_interval.tv_sec; \ 945 (bip)->it_interval.tv_usec = (lip)->it_interval.tv_usec; \ 946 (bip)->it_value.tv_sec = (lip)->it_value.tv_sec; \ 947 (bip)->it_value.tv_usec = (lip)->it_value.tv_usec; 948 949 int 950 linux_setitimer(struct thread *td, struct linux_setitimer_args *uap) 951 { 952 int error; 953 struct l_itimerval ls; 954 struct itimerval aitv, oitv; 955 956 if (uap->itv == NULL) { 957 uap->itv = uap->oitv; 958 return (linux_getitimer(td, (struct linux_getitimer_args *)uap)); 959 } 960 961 error = copyin(uap->itv, &ls, sizeof(ls)); 962 if (error != 0) 963 return (error); 964 B2L_ITIMERVAL(&aitv, &ls); 965 error = kern_setitimer(td, uap->which, &aitv, &oitv); 966 if (error != 0 || uap->oitv == NULL) 967 return (error); 968 B2L_ITIMERVAL(&ls, &oitv); 969 970 return (copyout(&ls, uap->oitv, sizeof(ls))); 971 } 972 973 int 974 linux_getitimer(struct thread *td, struct linux_getitimer_args *uap) 975 { 976 int error; 977 struct l_itimerval ls; 978 struct itimerval aitv; 979 980 error = kern_getitimer(td, uap->which, &aitv); 981 if (error != 0) 982 return (error); 983 B2L_ITIMERVAL(&ls, &aitv); 984 return (copyout(&ls, uap->itv, sizeof(ls))); 985 } 986 987 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 988 int 989 linux_nice(struct thread *td, struct linux_nice_args *args) 990 { 991 992 return (kern_setpriority(td, PRIO_PROCESS, 0, args->inc)); 993 } 994 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 995 996 int 997 linux_setgroups(struct thread *td, struct linux_setgroups_args *args) 998 { 999 struct ucred *newcred, *oldcred; 1000 l_gid_t *linux_gidset; 1001 gid_t *bsd_gidset; 1002 int ngrp, error; 1003 struct proc *p; 1004 1005 ngrp = args->gidsetsize; 1006 if (ngrp < 0 || ngrp >= ngroups_max + 1) 1007 return (EINVAL); 1008 linux_gidset = malloc(ngrp * sizeof(*linux_gidset), M_LINUX, M_WAITOK); 1009 error = copyin(args->grouplist, linux_gidset, ngrp * sizeof(l_gid_t)); 1010 if (error) 1011 goto out; 1012 newcred = crget(); 1013 crextend(newcred, ngrp + 1); 1014 p = td->td_proc; 1015 PROC_LOCK(p); 1016 oldcred = p->p_ucred; 1017 crcopy(newcred, oldcred); 1018 1019 /* 1020 * cr_groups[0] holds egid. Setting the whole set from 1021 * the supplied set will cause egid to be changed too. 1022 * Keep cr_groups[0] unchanged to prevent that. 1023 */ 1024 1025 if ((error = priv_check_cred(oldcred, PRIV_CRED_SETGROUPS)) != 0) { 1026 PROC_UNLOCK(p); 1027 crfree(newcred); 1028 goto out; 1029 } 1030 1031 if (ngrp > 0) { 1032 newcred->cr_ngroups = ngrp + 1; 1033 1034 bsd_gidset = newcred->cr_groups; 1035 ngrp--; 1036 while (ngrp >= 0) { 1037 bsd_gidset[ngrp + 1] = linux_gidset[ngrp]; 1038 ngrp--; 1039 } 1040 } else 1041 newcred->cr_ngroups = 1; 1042 1043 setsugid(p); 1044 proc_set_cred(p, newcred); 1045 PROC_UNLOCK(p); 1046 crfree(oldcred); 1047 error = 0; 1048 out: 1049 free(linux_gidset, M_LINUX); 1050 return (error); 1051 } 1052 1053 int 1054 linux_getgroups(struct thread *td, struct linux_getgroups_args *args) 1055 { 1056 struct ucred *cred; 1057 l_gid_t *linux_gidset; 1058 gid_t *bsd_gidset; 1059 int bsd_gidsetsz, ngrp, error; 1060 1061 cred = td->td_ucred; 1062 bsd_gidset = cred->cr_groups; 1063 bsd_gidsetsz = cred->cr_ngroups - 1; 1064 1065 /* 1066 * cr_groups[0] holds egid. Returning the whole set 1067 * here will cause a duplicate. Exclude cr_groups[0] 1068 * to prevent that. 1069 */ 1070 1071 if ((ngrp = args->gidsetsize) == 0) { 1072 td->td_retval[0] = bsd_gidsetsz; 1073 return (0); 1074 } 1075 1076 if (ngrp < bsd_gidsetsz) 1077 return (EINVAL); 1078 1079 ngrp = 0; 1080 linux_gidset = malloc(bsd_gidsetsz * sizeof(*linux_gidset), 1081 M_LINUX, M_WAITOK); 1082 while (ngrp < bsd_gidsetsz) { 1083 linux_gidset[ngrp] = bsd_gidset[ngrp + 1]; 1084 ngrp++; 1085 } 1086 1087 error = copyout(linux_gidset, args->grouplist, ngrp * sizeof(l_gid_t)); 1088 free(linux_gidset, M_LINUX); 1089 if (error) 1090 return (error); 1091 1092 td->td_retval[0] = ngrp; 1093 return (0); 1094 } 1095 1096 static bool 1097 linux_get_dummy_limit(l_uint resource, struct rlimit *rlim) 1098 { 1099 1100 if (linux_dummy_rlimits == 0) 1101 return (false); 1102 1103 switch (resource) { 1104 case LINUX_RLIMIT_LOCKS: 1105 case LINUX_RLIMIT_SIGPENDING: 1106 case LINUX_RLIMIT_MSGQUEUE: 1107 case LINUX_RLIMIT_RTTIME: 1108 rlim->rlim_cur = LINUX_RLIM_INFINITY; 1109 rlim->rlim_max = LINUX_RLIM_INFINITY; 1110 return (true); 1111 case LINUX_RLIMIT_NICE: 1112 case LINUX_RLIMIT_RTPRIO: 1113 rlim->rlim_cur = 0; 1114 rlim->rlim_max = 0; 1115 return (true); 1116 default: 1117 return (false); 1118 } 1119 } 1120 1121 int 1122 linux_setrlimit(struct thread *td, struct linux_setrlimit_args *args) 1123 { 1124 struct rlimit bsd_rlim; 1125 struct l_rlimit rlim; 1126 u_int which; 1127 int error; 1128 1129 if (args->resource >= LINUX_RLIM_NLIMITS) 1130 return (EINVAL); 1131 1132 which = linux_to_bsd_resource[args->resource]; 1133 if (which == -1) 1134 return (EINVAL); 1135 1136 error = copyin(args->rlim, &rlim, sizeof(rlim)); 1137 if (error) 1138 return (error); 1139 1140 bsd_rlim.rlim_cur = (rlim_t)rlim.rlim_cur; 1141 bsd_rlim.rlim_max = (rlim_t)rlim.rlim_max; 1142 return (kern_setrlimit(td, which, &bsd_rlim)); 1143 } 1144 1145 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 1146 int 1147 linux_old_getrlimit(struct thread *td, struct linux_old_getrlimit_args *args) 1148 { 1149 struct l_rlimit rlim; 1150 struct rlimit bsd_rlim; 1151 u_int which; 1152 1153 if (linux_get_dummy_limit(args->resource, &bsd_rlim)) { 1154 rlim.rlim_cur = bsd_rlim.rlim_cur; 1155 rlim.rlim_max = bsd_rlim.rlim_max; 1156 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1157 } 1158 1159 if (args->resource >= LINUX_RLIM_NLIMITS) 1160 return (EINVAL); 1161 1162 which = linux_to_bsd_resource[args->resource]; 1163 if (which == -1) 1164 return (EINVAL); 1165 1166 lim_rlimit(td, which, &bsd_rlim); 1167 1168 #ifdef COMPAT_LINUX32 1169 rlim.rlim_cur = (unsigned int)bsd_rlim.rlim_cur; 1170 if (rlim.rlim_cur == UINT_MAX) 1171 rlim.rlim_cur = INT_MAX; 1172 rlim.rlim_max = (unsigned int)bsd_rlim.rlim_max; 1173 if (rlim.rlim_max == UINT_MAX) 1174 rlim.rlim_max = INT_MAX; 1175 #else 1176 rlim.rlim_cur = (unsigned long)bsd_rlim.rlim_cur; 1177 if (rlim.rlim_cur == ULONG_MAX) 1178 rlim.rlim_cur = LONG_MAX; 1179 rlim.rlim_max = (unsigned long)bsd_rlim.rlim_max; 1180 if (rlim.rlim_max == ULONG_MAX) 1181 rlim.rlim_max = LONG_MAX; 1182 #endif 1183 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1184 } 1185 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 1186 1187 int 1188 linux_getrlimit(struct thread *td, struct linux_getrlimit_args *args) 1189 { 1190 struct l_rlimit rlim; 1191 struct rlimit bsd_rlim; 1192 u_int which; 1193 1194 if (linux_get_dummy_limit(args->resource, &bsd_rlim)) { 1195 rlim.rlim_cur = bsd_rlim.rlim_cur; 1196 rlim.rlim_max = bsd_rlim.rlim_max; 1197 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1198 } 1199 1200 if (args->resource >= LINUX_RLIM_NLIMITS) 1201 return (EINVAL); 1202 1203 which = linux_to_bsd_resource[args->resource]; 1204 if (which == -1) 1205 return (EINVAL); 1206 1207 lim_rlimit(td, which, &bsd_rlim); 1208 1209 rlim.rlim_cur = (l_ulong)bsd_rlim.rlim_cur; 1210 rlim.rlim_max = (l_ulong)bsd_rlim.rlim_max; 1211 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1212 } 1213 1214 int 1215 linux_sched_setscheduler(struct thread *td, 1216 struct linux_sched_setscheduler_args *args) 1217 { 1218 struct sched_param sched_param; 1219 struct thread *tdt; 1220 int error, policy; 1221 1222 switch (args->policy) { 1223 case LINUX_SCHED_OTHER: 1224 policy = SCHED_OTHER; 1225 break; 1226 case LINUX_SCHED_FIFO: 1227 policy = SCHED_FIFO; 1228 break; 1229 case LINUX_SCHED_RR: 1230 policy = SCHED_RR; 1231 break; 1232 default: 1233 return (EINVAL); 1234 } 1235 1236 error = copyin(args->param, &sched_param, sizeof(sched_param)); 1237 if (error) 1238 return (error); 1239 1240 if (linux_map_sched_prio) { 1241 switch (policy) { 1242 case SCHED_OTHER: 1243 if (sched_param.sched_priority != 0) 1244 return (EINVAL); 1245 1246 sched_param.sched_priority = 1247 PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE; 1248 break; 1249 case SCHED_FIFO: 1250 case SCHED_RR: 1251 if (sched_param.sched_priority < 1 || 1252 sched_param.sched_priority >= LINUX_MAX_RT_PRIO) 1253 return (EINVAL); 1254 1255 /* 1256 * Map [1, LINUX_MAX_RT_PRIO - 1] to 1257 * [0, RTP_PRIO_MAX - RTP_PRIO_MIN] (rounding down). 1258 */ 1259 sched_param.sched_priority = 1260 (sched_param.sched_priority - 1) * 1261 (RTP_PRIO_MAX - RTP_PRIO_MIN + 1) / 1262 (LINUX_MAX_RT_PRIO - 1); 1263 break; 1264 } 1265 } 1266 1267 tdt = linux_tdfind(td, args->pid, -1); 1268 if (tdt == NULL) 1269 return (ESRCH); 1270 1271 error = kern_sched_setscheduler(td, tdt, policy, &sched_param); 1272 PROC_UNLOCK(tdt->td_proc); 1273 return (error); 1274 } 1275 1276 int 1277 linux_sched_getscheduler(struct thread *td, 1278 struct linux_sched_getscheduler_args *args) 1279 { 1280 struct thread *tdt; 1281 int error, policy; 1282 1283 tdt = linux_tdfind(td, args->pid, -1); 1284 if (tdt == NULL) 1285 return (ESRCH); 1286 1287 error = kern_sched_getscheduler(td, tdt, &policy); 1288 PROC_UNLOCK(tdt->td_proc); 1289 1290 switch (policy) { 1291 case SCHED_OTHER: 1292 td->td_retval[0] = LINUX_SCHED_OTHER; 1293 break; 1294 case SCHED_FIFO: 1295 td->td_retval[0] = LINUX_SCHED_FIFO; 1296 break; 1297 case SCHED_RR: 1298 td->td_retval[0] = LINUX_SCHED_RR; 1299 break; 1300 } 1301 return (error); 1302 } 1303 1304 int 1305 linux_sched_get_priority_max(struct thread *td, 1306 struct linux_sched_get_priority_max_args *args) 1307 { 1308 struct sched_get_priority_max_args bsd; 1309 1310 if (linux_map_sched_prio) { 1311 switch (args->policy) { 1312 case LINUX_SCHED_OTHER: 1313 td->td_retval[0] = 0; 1314 return (0); 1315 case LINUX_SCHED_FIFO: 1316 case LINUX_SCHED_RR: 1317 td->td_retval[0] = LINUX_MAX_RT_PRIO - 1; 1318 return (0); 1319 default: 1320 return (EINVAL); 1321 } 1322 } 1323 1324 switch (args->policy) { 1325 case LINUX_SCHED_OTHER: 1326 bsd.policy = SCHED_OTHER; 1327 break; 1328 case LINUX_SCHED_FIFO: 1329 bsd.policy = SCHED_FIFO; 1330 break; 1331 case LINUX_SCHED_RR: 1332 bsd.policy = SCHED_RR; 1333 break; 1334 default: 1335 return (EINVAL); 1336 } 1337 return (sys_sched_get_priority_max(td, &bsd)); 1338 } 1339 1340 int 1341 linux_sched_get_priority_min(struct thread *td, 1342 struct linux_sched_get_priority_min_args *args) 1343 { 1344 struct sched_get_priority_min_args bsd; 1345 1346 if (linux_map_sched_prio) { 1347 switch (args->policy) { 1348 case LINUX_SCHED_OTHER: 1349 td->td_retval[0] = 0; 1350 return (0); 1351 case LINUX_SCHED_FIFO: 1352 case LINUX_SCHED_RR: 1353 td->td_retval[0] = 1; 1354 return (0); 1355 default: 1356 return (EINVAL); 1357 } 1358 } 1359 1360 switch (args->policy) { 1361 case LINUX_SCHED_OTHER: 1362 bsd.policy = SCHED_OTHER; 1363 break; 1364 case LINUX_SCHED_FIFO: 1365 bsd.policy = SCHED_FIFO; 1366 break; 1367 case LINUX_SCHED_RR: 1368 bsd.policy = SCHED_RR; 1369 break; 1370 default: 1371 return (EINVAL); 1372 } 1373 return (sys_sched_get_priority_min(td, &bsd)); 1374 } 1375 1376 #define REBOOT_CAD_ON 0x89abcdef 1377 #define REBOOT_CAD_OFF 0 1378 #define REBOOT_HALT 0xcdef0123 1379 #define REBOOT_RESTART 0x01234567 1380 #define REBOOT_RESTART2 0xA1B2C3D4 1381 #define REBOOT_POWEROFF 0x4321FEDC 1382 #define REBOOT_MAGIC1 0xfee1dead 1383 #define REBOOT_MAGIC2 0x28121969 1384 #define REBOOT_MAGIC2A 0x05121996 1385 #define REBOOT_MAGIC2B 0x16041998 1386 1387 int 1388 linux_reboot(struct thread *td, struct linux_reboot_args *args) 1389 { 1390 struct reboot_args bsd_args; 1391 1392 if (args->magic1 != REBOOT_MAGIC1) 1393 return (EINVAL); 1394 1395 switch (args->magic2) { 1396 case REBOOT_MAGIC2: 1397 case REBOOT_MAGIC2A: 1398 case REBOOT_MAGIC2B: 1399 break; 1400 default: 1401 return (EINVAL); 1402 } 1403 1404 switch (args->cmd) { 1405 case REBOOT_CAD_ON: 1406 case REBOOT_CAD_OFF: 1407 return (priv_check(td, PRIV_REBOOT)); 1408 case REBOOT_HALT: 1409 bsd_args.opt = RB_HALT; 1410 break; 1411 case REBOOT_RESTART: 1412 case REBOOT_RESTART2: 1413 bsd_args.opt = 0; 1414 break; 1415 case REBOOT_POWEROFF: 1416 bsd_args.opt = RB_POWEROFF; 1417 break; 1418 default: 1419 return (EINVAL); 1420 } 1421 return (sys_reboot(td, &bsd_args)); 1422 } 1423 1424 int 1425 linux_getpid(struct thread *td, struct linux_getpid_args *args) 1426 { 1427 1428 td->td_retval[0] = td->td_proc->p_pid; 1429 1430 return (0); 1431 } 1432 1433 int 1434 linux_gettid(struct thread *td, struct linux_gettid_args *args) 1435 { 1436 struct linux_emuldata *em; 1437 1438 em = em_find(td); 1439 KASSERT(em != NULL, ("gettid: emuldata not found.\n")); 1440 1441 td->td_retval[0] = em->em_tid; 1442 1443 return (0); 1444 } 1445 1446 int 1447 linux_getppid(struct thread *td, struct linux_getppid_args *args) 1448 { 1449 1450 td->td_retval[0] = kern_getppid(td); 1451 return (0); 1452 } 1453 1454 int 1455 linux_getgid(struct thread *td, struct linux_getgid_args *args) 1456 { 1457 1458 td->td_retval[0] = td->td_ucred->cr_rgid; 1459 return (0); 1460 } 1461 1462 int 1463 linux_getuid(struct thread *td, struct linux_getuid_args *args) 1464 { 1465 1466 td->td_retval[0] = td->td_ucred->cr_ruid; 1467 return (0); 1468 } 1469 1470 int 1471 linux_getsid(struct thread *td, struct linux_getsid_args *args) 1472 { 1473 1474 return (kern_getsid(td, args->pid)); 1475 } 1476 1477 int 1478 linux_nosys(struct thread *td, struct nosys_args *ignore) 1479 { 1480 1481 return (ENOSYS); 1482 } 1483 1484 int 1485 linux_getpriority(struct thread *td, struct linux_getpriority_args *args) 1486 { 1487 int error; 1488 1489 error = kern_getpriority(td, args->which, args->who); 1490 td->td_retval[0] = 20 - td->td_retval[0]; 1491 return (error); 1492 } 1493 1494 int 1495 linux_sethostname(struct thread *td, struct linux_sethostname_args *args) 1496 { 1497 int name[2]; 1498 1499 name[0] = CTL_KERN; 1500 name[1] = KERN_HOSTNAME; 1501 return (userland_sysctl(td, name, 2, 0, 0, 0, args->hostname, 1502 args->len, 0, 0)); 1503 } 1504 1505 int 1506 linux_setdomainname(struct thread *td, struct linux_setdomainname_args *args) 1507 { 1508 int name[2]; 1509 1510 name[0] = CTL_KERN; 1511 name[1] = KERN_NISDOMAINNAME; 1512 return (userland_sysctl(td, name, 2, 0, 0, 0, args->name, 1513 args->len, 0, 0)); 1514 } 1515 1516 int 1517 linux_exit_group(struct thread *td, struct linux_exit_group_args *args) 1518 { 1519 1520 LINUX_CTR2(exit_group, "thread(%d) (%d)", td->td_tid, 1521 args->error_code); 1522 1523 /* 1524 * XXX: we should send a signal to the parent if 1525 * SIGNAL_EXIT_GROUP is set. We ignore that (temporarily?) 1526 * as it doesnt occur often. 1527 */ 1528 exit1(td, args->error_code, 0); 1529 /* NOTREACHED */ 1530 } 1531 1532 #define _LINUX_CAPABILITY_VERSION_1 0x19980330 1533 #define _LINUX_CAPABILITY_VERSION_2 0x20071026 1534 #define _LINUX_CAPABILITY_VERSION_3 0x20080522 1535 1536 struct l_user_cap_header { 1537 l_int version; 1538 l_int pid; 1539 }; 1540 1541 struct l_user_cap_data { 1542 l_int effective; 1543 l_int permitted; 1544 l_int inheritable; 1545 }; 1546 1547 int 1548 linux_capget(struct thread *td, struct linux_capget_args *uap) 1549 { 1550 struct l_user_cap_header luch; 1551 struct l_user_cap_data lucd[2]; 1552 int error, u32s; 1553 1554 if (uap->hdrp == NULL) 1555 return (EFAULT); 1556 1557 error = copyin(uap->hdrp, &luch, sizeof(luch)); 1558 if (error != 0) 1559 return (error); 1560 1561 switch (luch.version) { 1562 case _LINUX_CAPABILITY_VERSION_1: 1563 u32s = 1; 1564 break; 1565 case _LINUX_CAPABILITY_VERSION_2: 1566 case _LINUX_CAPABILITY_VERSION_3: 1567 u32s = 2; 1568 break; 1569 default: 1570 luch.version = _LINUX_CAPABILITY_VERSION_1; 1571 error = copyout(&luch, uap->hdrp, sizeof(luch)); 1572 if (error) 1573 return (error); 1574 return (EINVAL); 1575 } 1576 1577 if (luch.pid) 1578 return (EPERM); 1579 1580 if (uap->datap) { 1581 /* 1582 * The current implementation doesn't support setting 1583 * a capability (it's essentially a stub) so indicate 1584 * that no capabilities are currently set or available 1585 * to request. 1586 */ 1587 memset(&lucd, 0, u32s * sizeof(lucd[0])); 1588 error = copyout(&lucd, uap->datap, u32s * sizeof(lucd[0])); 1589 } 1590 1591 return (error); 1592 } 1593 1594 int 1595 linux_capset(struct thread *td, struct linux_capset_args *uap) 1596 { 1597 struct l_user_cap_header luch; 1598 struct l_user_cap_data lucd[2]; 1599 int error, i, u32s; 1600 1601 if (uap->hdrp == NULL || uap->datap == NULL) 1602 return (EFAULT); 1603 1604 error = copyin(uap->hdrp, &luch, sizeof(luch)); 1605 if (error != 0) 1606 return (error); 1607 1608 switch (luch.version) { 1609 case _LINUX_CAPABILITY_VERSION_1: 1610 u32s = 1; 1611 break; 1612 case _LINUX_CAPABILITY_VERSION_2: 1613 case _LINUX_CAPABILITY_VERSION_3: 1614 u32s = 2; 1615 break; 1616 default: 1617 luch.version = _LINUX_CAPABILITY_VERSION_1; 1618 error = copyout(&luch, uap->hdrp, sizeof(luch)); 1619 if (error) 1620 return (error); 1621 return (EINVAL); 1622 } 1623 1624 if (luch.pid) 1625 return (EPERM); 1626 1627 error = copyin(uap->datap, &lucd, u32s * sizeof(lucd[0])); 1628 if (error != 0) 1629 return (error); 1630 1631 /* We currently don't support setting any capabilities. */ 1632 for (i = 0; i < u32s; i++) { 1633 if (lucd[i].effective || lucd[i].permitted || 1634 lucd[i].inheritable) { 1635 linux_msg(td, 1636 "capset[%d] effective=0x%x, permitted=0x%x, " 1637 "inheritable=0x%x is not implemented", i, 1638 (int)lucd[i].effective, (int)lucd[i].permitted, 1639 (int)lucd[i].inheritable); 1640 return (EPERM); 1641 } 1642 } 1643 1644 return (0); 1645 } 1646 1647 int 1648 linux_prctl(struct thread *td, struct linux_prctl_args *args) 1649 { 1650 int error = 0, max_size, arg; 1651 struct proc *p = td->td_proc; 1652 char comm[LINUX_MAX_COMM_LEN]; 1653 int pdeath_signal, trace_state; 1654 1655 switch (args->option) { 1656 case LINUX_PR_SET_PDEATHSIG: 1657 if (!LINUX_SIG_VALID(args->arg2)) 1658 return (EINVAL); 1659 pdeath_signal = linux_to_bsd_signal(args->arg2); 1660 return (kern_procctl(td, P_PID, 0, PROC_PDEATHSIG_CTL, 1661 &pdeath_signal)); 1662 case LINUX_PR_GET_PDEATHSIG: 1663 error = kern_procctl(td, P_PID, 0, PROC_PDEATHSIG_STATUS, 1664 &pdeath_signal); 1665 if (error != 0) 1666 return (error); 1667 pdeath_signal = bsd_to_linux_signal(pdeath_signal); 1668 return (copyout(&pdeath_signal, 1669 (void *)(register_t)args->arg2, 1670 sizeof(pdeath_signal))); 1671 /* 1672 * In Linux, this flag controls if set[gu]id processes can coredump. 1673 * There are additional semantics imposed on processes that cannot 1674 * coredump: 1675 * - Such processes can not be ptraced. 1676 * - There are some semantics around ownership of process-related files 1677 * in the /proc namespace. 1678 * 1679 * In FreeBSD, we can (and by default, do) disable setuid coredump 1680 * system-wide with 'sugid_coredump.' We control tracability on a 1681 * per-process basis with the procctl PROC_TRACE (=> P2_NOTRACE flag). 1682 * By happy coincidence, P2_NOTRACE also prevents coredumping. So the 1683 * procctl is roughly analogous to Linux's DUMPABLE. 1684 * 1685 * So, proxy these knobs to the corresponding PROC_TRACE setting. 1686 */ 1687 case LINUX_PR_GET_DUMPABLE: 1688 error = kern_procctl(td, P_PID, p->p_pid, PROC_TRACE_STATUS, 1689 &trace_state); 1690 if (error != 0) 1691 return (error); 1692 td->td_retval[0] = (trace_state != -1); 1693 return (0); 1694 case LINUX_PR_SET_DUMPABLE: 1695 /* 1696 * It is only valid for userspace to set one of these two 1697 * flags, and only one at a time. 1698 */ 1699 switch (args->arg2) { 1700 case LINUX_SUID_DUMP_DISABLE: 1701 trace_state = PROC_TRACE_CTL_DISABLE_EXEC; 1702 break; 1703 case LINUX_SUID_DUMP_USER: 1704 trace_state = PROC_TRACE_CTL_ENABLE; 1705 break; 1706 default: 1707 return (EINVAL); 1708 } 1709 return (kern_procctl(td, P_PID, p->p_pid, PROC_TRACE_CTL, 1710 &trace_state)); 1711 case LINUX_PR_GET_KEEPCAPS: 1712 /* 1713 * Indicate that we always clear the effective and 1714 * permitted capability sets when the user id becomes 1715 * non-zero (actually the capability sets are simply 1716 * always zero in the current implementation). 1717 */ 1718 td->td_retval[0] = 0; 1719 break; 1720 case LINUX_PR_SET_KEEPCAPS: 1721 /* 1722 * Ignore requests to keep the effective and permitted 1723 * capability sets when the user id becomes non-zero. 1724 */ 1725 break; 1726 case LINUX_PR_SET_NAME: 1727 /* 1728 * To be on the safe side we need to make sure to not 1729 * overflow the size a Linux program expects. We already 1730 * do this here in the copyin, so that we don't need to 1731 * check on copyout. 1732 */ 1733 max_size = MIN(sizeof(comm), sizeof(p->p_comm)); 1734 error = copyinstr((void *)(register_t)args->arg2, comm, 1735 max_size, NULL); 1736 1737 /* Linux silently truncates the name if it is too long. */ 1738 if (error == ENAMETOOLONG) { 1739 /* 1740 * XXX: copyinstr() isn't documented to populate the 1741 * array completely, so do a copyin() to be on the 1742 * safe side. This should be changed in case 1743 * copyinstr() is changed to guarantee this. 1744 */ 1745 error = copyin((void *)(register_t)args->arg2, comm, 1746 max_size - 1); 1747 comm[max_size - 1] = '\0'; 1748 } 1749 if (error) 1750 return (error); 1751 1752 PROC_LOCK(p); 1753 strlcpy(p->p_comm, comm, sizeof(p->p_comm)); 1754 PROC_UNLOCK(p); 1755 break; 1756 case LINUX_PR_GET_NAME: 1757 PROC_LOCK(p); 1758 strlcpy(comm, p->p_comm, sizeof(comm)); 1759 PROC_UNLOCK(p); 1760 error = copyout(comm, (void *)(register_t)args->arg2, 1761 strlen(comm) + 1); 1762 break; 1763 case LINUX_PR_GET_SECCOMP: 1764 case LINUX_PR_SET_SECCOMP: 1765 /* 1766 * Same as returned by Linux without CONFIG_SECCOMP enabled. 1767 */ 1768 error = EINVAL; 1769 break; 1770 case LINUX_PR_CAPBSET_READ: 1771 #if 0 1772 /* 1773 * This makes too much noise with Ubuntu Focal. 1774 */ 1775 linux_msg(td, "unsupported prctl PR_CAPBSET_READ %d", 1776 (int)args->arg2); 1777 #endif 1778 error = EINVAL; 1779 break; 1780 case LINUX_PR_SET_NO_NEW_PRIVS: 1781 arg = args->arg2 == 1 ? 1782 PROC_NO_NEW_PRIVS_ENABLE : PROC_NO_NEW_PRIVS_DISABLE; 1783 error = kern_procctl(td, P_PID, p->p_pid, 1784 PROC_NO_NEW_PRIVS_CTL, &arg); 1785 break; 1786 case LINUX_PR_SET_PTRACER: 1787 linux_msg(td, "unsupported prctl PR_SET_PTRACER"); 1788 error = EINVAL; 1789 break; 1790 default: 1791 linux_msg(td, "unsupported prctl option %d", args->option); 1792 error = EINVAL; 1793 break; 1794 } 1795 1796 return (error); 1797 } 1798 1799 int 1800 linux_sched_setparam(struct thread *td, 1801 struct linux_sched_setparam_args *uap) 1802 { 1803 struct sched_param sched_param; 1804 struct thread *tdt; 1805 int error, policy; 1806 1807 error = copyin(uap->param, &sched_param, sizeof(sched_param)); 1808 if (error) 1809 return (error); 1810 1811 tdt = linux_tdfind(td, uap->pid, -1); 1812 if (tdt == NULL) 1813 return (ESRCH); 1814 1815 if (linux_map_sched_prio) { 1816 error = kern_sched_getscheduler(td, tdt, &policy); 1817 if (error) 1818 goto out; 1819 1820 switch (policy) { 1821 case SCHED_OTHER: 1822 if (sched_param.sched_priority != 0) { 1823 error = EINVAL; 1824 goto out; 1825 } 1826 sched_param.sched_priority = 1827 PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE; 1828 break; 1829 case SCHED_FIFO: 1830 case SCHED_RR: 1831 if (sched_param.sched_priority < 1 || 1832 sched_param.sched_priority >= LINUX_MAX_RT_PRIO) { 1833 error = EINVAL; 1834 goto out; 1835 } 1836 /* 1837 * Map [1, LINUX_MAX_RT_PRIO - 1] to 1838 * [0, RTP_PRIO_MAX - RTP_PRIO_MIN] (rounding down). 1839 */ 1840 sched_param.sched_priority = 1841 (sched_param.sched_priority - 1) * 1842 (RTP_PRIO_MAX - RTP_PRIO_MIN + 1) / 1843 (LINUX_MAX_RT_PRIO - 1); 1844 break; 1845 } 1846 } 1847 1848 error = kern_sched_setparam(td, tdt, &sched_param); 1849 out: PROC_UNLOCK(tdt->td_proc); 1850 return (error); 1851 } 1852 1853 int 1854 linux_sched_getparam(struct thread *td, 1855 struct linux_sched_getparam_args *uap) 1856 { 1857 struct sched_param sched_param; 1858 struct thread *tdt; 1859 int error, policy; 1860 1861 tdt = linux_tdfind(td, uap->pid, -1); 1862 if (tdt == NULL) 1863 return (ESRCH); 1864 1865 error = kern_sched_getparam(td, tdt, &sched_param); 1866 if (error) { 1867 PROC_UNLOCK(tdt->td_proc); 1868 return (error); 1869 } 1870 1871 if (linux_map_sched_prio) { 1872 error = kern_sched_getscheduler(td, tdt, &policy); 1873 PROC_UNLOCK(tdt->td_proc); 1874 if (error) 1875 return (error); 1876 1877 switch (policy) { 1878 case SCHED_OTHER: 1879 sched_param.sched_priority = 0; 1880 break; 1881 case SCHED_FIFO: 1882 case SCHED_RR: 1883 /* 1884 * Map [0, RTP_PRIO_MAX - RTP_PRIO_MIN] to 1885 * [1, LINUX_MAX_RT_PRIO - 1] (rounding up). 1886 */ 1887 sched_param.sched_priority = 1888 (sched_param.sched_priority * 1889 (LINUX_MAX_RT_PRIO - 1) + 1890 (RTP_PRIO_MAX - RTP_PRIO_MIN - 1)) / 1891 (RTP_PRIO_MAX - RTP_PRIO_MIN) + 1; 1892 break; 1893 } 1894 } else 1895 PROC_UNLOCK(tdt->td_proc); 1896 1897 error = copyout(&sched_param, uap->param, sizeof(sched_param)); 1898 return (error); 1899 } 1900 1901 /* 1902 * Get affinity of a process. 1903 */ 1904 int 1905 linux_sched_getaffinity(struct thread *td, 1906 struct linux_sched_getaffinity_args *args) 1907 { 1908 struct thread *tdt; 1909 cpuset_t *mask; 1910 size_t size; 1911 int error; 1912 id_t tid; 1913 1914 tdt = linux_tdfind(td, args->pid, -1); 1915 if (tdt == NULL) 1916 return (ESRCH); 1917 tid = tdt->td_tid; 1918 PROC_UNLOCK(tdt->td_proc); 1919 1920 mask = malloc(sizeof(cpuset_t), M_LINUX, M_WAITOK | M_ZERO); 1921 size = min(args->len, sizeof(cpuset_t)); 1922 error = kern_cpuset_getaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID, 1923 tid, size, mask); 1924 if (error == ERANGE) 1925 error = EINVAL; 1926 if (error == 0) 1927 error = copyout(mask, args->user_mask_ptr, size); 1928 if (error == 0) 1929 td->td_retval[0] = size; 1930 free(mask, M_LINUX); 1931 return (error); 1932 } 1933 1934 /* 1935 * Set affinity of a process. 1936 */ 1937 int 1938 linux_sched_setaffinity(struct thread *td, 1939 struct linux_sched_setaffinity_args *args) 1940 { 1941 struct thread *tdt; 1942 cpuset_t *mask; 1943 int cpu, error; 1944 size_t len; 1945 id_t tid; 1946 1947 tdt = linux_tdfind(td, args->pid, -1); 1948 if (tdt == NULL) 1949 return (ESRCH); 1950 tid = tdt->td_tid; 1951 PROC_UNLOCK(tdt->td_proc); 1952 1953 len = min(args->len, sizeof(cpuset_t)); 1954 mask = malloc(sizeof(cpuset_t), M_TEMP, M_WAITOK | M_ZERO);; 1955 error = copyin(args->user_mask_ptr, mask, len); 1956 if (error != 0) 1957 goto out; 1958 /* Linux ignore high bits */ 1959 CPU_FOREACH_ISSET(cpu, mask) 1960 if (cpu > mp_maxid) 1961 CPU_CLR(cpu, mask); 1962 1963 error = kern_cpuset_setaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID, 1964 tid, mask); 1965 if (error == EDEADLK) 1966 error = EINVAL; 1967 out: 1968 free(mask, M_TEMP); 1969 return (error); 1970 } 1971 1972 struct linux_rlimit64 { 1973 uint64_t rlim_cur; 1974 uint64_t rlim_max; 1975 }; 1976 1977 int 1978 linux_prlimit64(struct thread *td, struct linux_prlimit64_args *args) 1979 { 1980 struct rlimit rlim, nrlim; 1981 struct linux_rlimit64 lrlim; 1982 struct proc *p; 1983 u_int which; 1984 int flags; 1985 int error; 1986 1987 if (args->new == NULL && args->old != NULL) { 1988 if (linux_get_dummy_limit(args->resource, &rlim)) { 1989 lrlim.rlim_cur = rlim.rlim_cur; 1990 lrlim.rlim_max = rlim.rlim_max; 1991 return (copyout(&lrlim, args->old, sizeof(lrlim))); 1992 } 1993 } 1994 1995 if (args->resource >= LINUX_RLIM_NLIMITS) 1996 return (EINVAL); 1997 1998 which = linux_to_bsd_resource[args->resource]; 1999 if (which == -1) 2000 return (EINVAL); 2001 2002 if (args->new != NULL) { 2003 /* 2004 * Note. Unlike FreeBSD where rlim is signed 64-bit Linux 2005 * rlim is unsigned 64-bit. FreeBSD treats negative limits 2006 * as INFINITY so we do not need a conversion even. 2007 */ 2008 error = copyin(args->new, &nrlim, sizeof(nrlim)); 2009 if (error != 0) 2010 return (error); 2011 } 2012 2013 flags = PGET_HOLD | PGET_NOTWEXIT; 2014 if (args->new != NULL) 2015 flags |= PGET_CANDEBUG; 2016 else 2017 flags |= PGET_CANSEE; 2018 if (args->pid == 0) { 2019 p = td->td_proc; 2020 PHOLD(p); 2021 } else { 2022 error = pget(args->pid, flags, &p); 2023 if (error != 0) 2024 return (error); 2025 } 2026 if (args->old != NULL) { 2027 PROC_LOCK(p); 2028 lim_rlimit_proc(p, which, &rlim); 2029 PROC_UNLOCK(p); 2030 if (rlim.rlim_cur == RLIM_INFINITY) 2031 lrlim.rlim_cur = LINUX_RLIM_INFINITY; 2032 else 2033 lrlim.rlim_cur = rlim.rlim_cur; 2034 if (rlim.rlim_max == RLIM_INFINITY) 2035 lrlim.rlim_max = LINUX_RLIM_INFINITY; 2036 else 2037 lrlim.rlim_max = rlim.rlim_max; 2038 error = copyout(&lrlim, args->old, sizeof(lrlim)); 2039 if (error != 0) 2040 goto out; 2041 } 2042 2043 if (args->new != NULL) 2044 error = kern_proc_setrlimit(td, p, which, &nrlim); 2045 2046 out: 2047 PRELE(p); 2048 return (error); 2049 } 2050 2051 int 2052 linux_pselect6(struct thread *td, struct linux_pselect6_args *args) 2053 { 2054 struct timespec ts, *tsp; 2055 int error; 2056 2057 if (args->tsp != NULL) { 2058 error = linux_get_timespec(&ts, args->tsp); 2059 if (error != 0) 2060 return (error); 2061 tsp = &ts; 2062 } else 2063 tsp = NULL; 2064 2065 error = linux_common_pselect6(td, args->nfds, args->readfds, 2066 args->writefds, args->exceptfds, tsp, args->sig); 2067 2068 if (args->tsp != NULL) 2069 linux_put_timespec(&ts, args->tsp); 2070 return (error); 2071 } 2072 2073 static int 2074 linux_common_pselect6(struct thread *td, l_int nfds, l_fd_set *readfds, 2075 l_fd_set *writefds, l_fd_set *exceptfds, struct timespec *tsp, 2076 l_uintptr_t *sig) 2077 { 2078 struct timeval utv, tv0, tv1, *tvp; 2079 struct l_pselect6arg lpse6; 2080 sigset_t *ssp; 2081 sigset_t ss; 2082 int error; 2083 2084 ssp = NULL; 2085 if (sig != NULL) { 2086 error = copyin(sig, &lpse6, sizeof(lpse6)); 2087 if (error != 0) 2088 return (error); 2089 error = linux_copyin_sigset(td, PTRIN(lpse6.ss), 2090 lpse6.ss_len, &ss, &ssp); 2091 if (error != 0) 2092 return (error); 2093 } else 2094 ssp = NULL; 2095 2096 /* 2097 * Currently glibc changes nanosecond number to microsecond. 2098 * This mean losing precision but for now it is hardly seen. 2099 */ 2100 if (tsp != NULL) { 2101 TIMESPEC_TO_TIMEVAL(&utv, tsp); 2102 if (itimerfix(&utv)) 2103 return (EINVAL); 2104 2105 microtime(&tv0); 2106 tvp = &utv; 2107 } else 2108 tvp = NULL; 2109 2110 error = kern_pselect(td, nfds, readfds, writefds, 2111 exceptfds, tvp, ssp, LINUX_NFDBITS); 2112 2113 if (tsp != NULL) { 2114 /* 2115 * Compute how much time was left of the timeout, 2116 * by subtracting the current time and the time 2117 * before we started the call, and subtracting 2118 * that result from the user-supplied value. 2119 */ 2120 microtime(&tv1); 2121 timevalsub(&tv1, &tv0); 2122 timevalsub(&utv, &tv1); 2123 if (utv.tv_sec < 0) 2124 timevalclear(&utv); 2125 TIMEVAL_TO_TIMESPEC(&utv, tsp); 2126 } 2127 return (error); 2128 } 2129 2130 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 2131 int 2132 linux_pselect6_time64(struct thread *td, 2133 struct linux_pselect6_time64_args *args) 2134 { 2135 struct timespec ts, *tsp; 2136 int error; 2137 2138 if (args->tsp != NULL) { 2139 error = linux_get_timespec64(&ts, args->tsp); 2140 if (error != 0) 2141 return (error); 2142 tsp = &ts; 2143 } else 2144 tsp = NULL; 2145 2146 error = linux_common_pselect6(td, args->nfds, args->readfds, 2147 args->writefds, args->exceptfds, tsp, args->sig); 2148 2149 if (args->tsp != NULL) 2150 linux_put_timespec64(&ts, args->tsp); 2151 return (error); 2152 } 2153 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 2154 2155 int 2156 linux_ppoll(struct thread *td, struct linux_ppoll_args *args) 2157 { 2158 struct timespec uts, *tsp; 2159 int error; 2160 2161 if (args->tsp != NULL) { 2162 error = linux_get_timespec(&uts, args->tsp); 2163 if (error != 0) 2164 return (error); 2165 tsp = &uts; 2166 } else 2167 tsp = NULL; 2168 2169 error = linux_common_ppoll(td, args->fds, args->nfds, tsp, 2170 args->sset, args->ssize); 2171 if (error == 0 && args->tsp != NULL) 2172 error = linux_put_timespec(&uts, args->tsp); 2173 return (error); 2174 } 2175 2176 static int 2177 linux_common_ppoll(struct thread *td, struct pollfd *fds, uint32_t nfds, 2178 struct timespec *tsp, l_sigset_t *sset, l_size_t ssize) 2179 { 2180 struct timespec ts0, ts1; 2181 struct pollfd stackfds[32]; 2182 struct pollfd *kfds; 2183 sigset_t *ssp; 2184 sigset_t ss; 2185 int error; 2186 2187 if (kern_poll_maxfds(nfds)) 2188 return (EINVAL); 2189 if (sset != NULL) { 2190 error = linux_copyin_sigset(td, sset, ssize, &ss, &ssp); 2191 if (error != 0) 2192 return (error); 2193 } else 2194 ssp = NULL; 2195 if (tsp != NULL) 2196 nanotime(&ts0); 2197 2198 if (nfds > nitems(stackfds)) 2199 kfds = mallocarray(nfds, sizeof(*kfds), M_TEMP, M_WAITOK); 2200 else 2201 kfds = stackfds; 2202 error = linux_pollin(td, kfds, fds, nfds); 2203 if (error != 0) 2204 goto out; 2205 2206 error = kern_poll_kfds(td, kfds, nfds, tsp, ssp); 2207 if (error == 0) 2208 error = linux_pollout(td, kfds, fds, nfds); 2209 2210 if (error == 0 && tsp != NULL) { 2211 if (td->td_retval[0]) { 2212 nanotime(&ts1); 2213 timespecsub(&ts1, &ts0, &ts1); 2214 timespecsub(tsp, &ts1, tsp); 2215 if (tsp->tv_sec < 0) 2216 timespecclear(tsp); 2217 } else 2218 timespecclear(tsp); 2219 } 2220 2221 out: 2222 if (nfds > nitems(stackfds)) 2223 free(kfds, M_TEMP); 2224 return (error); 2225 } 2226 2227 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 2228 int 2229 linux_ppoll_time64(struct thread *td, struct linux_ppoll_time64_args *args) 2230 { 2231 struct timespec uts, *tsp; 2232 int error; 2233 2234 if (args->tsp != NULL) { 2235 error = linux_get_timespec64(&uts, args->tsp); 2236 if (error != 0) 2237 return (error); 2238 tsp = &uts; 2239 } else 2240 tsp = NULL; 2241 error = linux_common_ppoll(td, args->fds, args->nfds, tsp, 2242 args->sset, args->ssize); 2243 if (error == 0 && args->tsp != NULL) 2244 error = linux_put_timespec64(&uts, args->tsp); 2245 return (error); 2246 } 2247 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 2248 2249 static int 2250 linux_pollin(struct thread *td, struct pollfd *fds, struct pollfd *ufds, u_int nfd) 2251 { 2252 int error; 2253 u_int i; 2254 2255 error = copyin(ufds, fds, nfd * sizeof(*fds)); 2256 if (error != 0) 2257 return (error); 2258 2259 for (i = 0; i < nfd; i++) { 2260 if (fds->events != 0) 2261 linux_to_bsd_poll_events(td, fds->fd, 2262 fds->events, &fds->events); 2263 fds++; 2264 } 2265 return (0); 2266 } 2267 2268 static int 2269 linux_pollout(struct thread *td, struct pollfd *fds, struct pollfd *ufds, u_int nfd) 2270 { 2271 int error = 0; 2272 u_int i, n = 0; 2273 2274 for (i = 0; i < nfd; i++) { 2275 if (fds->revents != 0) { 2276 bsd_to_linux_poll_events(fds->revents, 2277 &fds->revents); 2278 n++; 2279 } 2280 error = copyout(&fds->revents, &ufds->revents, 2281 sizeof(ufds->revents)); 2282 if (error) 2283 return (error); 2284 fds++; 2285 ufds++; 2286 } 2287 td->td_retval[0] = n; 2288 return (0); 2289 } 2290 2291 static int 2292 linux_sched_rr_get_interval_common(struct thread *td, pid_t pid, 2293 struct timespec *ts) 2294 { 2295 struct thread *tdt; 2296 int error; 2297 2298 /* 2299 * According to man in case the invalid pid specified 2300 * EINVAL should be returned. 2301 */ 2302 if (pid < 0) 2303 return (EINVAL); 2304 2305 tdt = linux_tdfind(td, pid, -1); 2306 if (tdt == NULL) 2307 return (ESRCH); 2308 2309 error = kern_sched_rr_get_interval_td(td, tdt, ts); 2310 PROC_UNLOCK(tdt->td_proc); 2311 return (error); 2312 } 2313 2314 int 2315 linux_sched_rr_get_interval(struct thread *td, 2316 struct linux_sched_rr_get_interval_args *uap) 2317 { 2318 struct timespec ts; 2319 int error; 2320 2321 error = linux_sched_rr_get_interval_common(td, uap->pid, &ts); 2322 if (error != 0) 2323 return (error); 2324 return (linux_put_timespec(&ts, uap->interval)); 2325 } 2326 2327 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 2328 int 2329 linux_sched_rr_get_interval_time64(struct thread *td, 2330 struct linux_sched_rr_get_interval_time64_args *uap) 2331 { 2332 struct timespec ts; 2333 int error; 2334 2335 error = linux_sched_rr_get_interval_common(td, uap->pid, &ts); 2336 if (error != 0) 2337 return (error); 2338 return (linux_put_timespec64(&ts, uap->interval)); 2339 } 2340 #endif 2341 2342 /* 2343 * In case when the Linux thread is the initial thread in 2344 * the thread group thread id is equal to the process id. 2345 * Glibc depends on this magic (assert in pthread_getattr_np.c). 2346 */ 2347 struct thread * 2348 linux_tdfind(struct thread *td, lwpid_t tid, pid_t pid) 2349 { 2350 struct linux_emuldata *em; 2351 struct thread *tdt; 2352 struct proc *p; 2353 2354 tdt = NULL; 2355 if (tid == 0 || tid == td->td_tid) { 2356 if (pid != -1 && td->td_proc->p_pid != pid) 2357 return (NULL); 2358 PROC_LOCK(td->td_proc); 2359 return (td); 2360 } else if (tid > PID_MAX) 2361 return (tdfind(tid, pid)); 2362 2363 /* 2364 * Initial thread where the tid equal to the pid. 2365 */ 2366 p = pfind(tid); 2367 if (p != NULL) { 2368 if (SV_PROC_ABI(p) != SV_ABI_LINUX || 2369 (pid != -1 && tid != pid)) { 2370 /* 2371 * p is not a Linuxulator process. 2372 */ 2373 PROC_UNLOCK(p); 2374 return (NULL); 2375 } 2376 FOREACH_THREAD_IN_PROC(p, tdt) { 2377 em = em_find(tdt); 2378 if (tid == em->em_tid) 2379 return (tdt); 2380 } 2381 PROC_UNLOCK(p); 2382 } 2383 return (NULL); 2384 } 2385 2386 void 2387 linux_to_bsd_waitopts(int options, int *bsdopts) 2388 { 2389 2390 if (options & LINUX_WNOHANG) 2391 *bsdopts |= WNOHANG; 2392 if (options & LINUX_WUNTRACED) 2393 *bsdopts |= WUNTRACED; 2394 if (options & LINUX_WEXITED) 2395 *bsdopts |= WEXITED; 2396 if (options & LINUX_WCONTINUED) 2397 *bsdopts |= WCONTINUED; 2398 if (options & LINUX_WNOWAIT) 2399 *bsdopts |= WNOWAIT; 2400 2401 if (options & __WCLONE) 2402 *bsdopts |= WLINUXCLONE; 2403 } 2404 2405 int 2406 linux_getrandom(struct thread *td, struct linux_getrandom_args *args) 2407 { 2408 struct uio uio; 2409 struct iovec iov; 2410 int error; 2411 2412 if (args->flags & ~(LINUX_GRND_NONBLOCK|LINUX_GRND_RANDOM)) 2413 return (EINVAL); 2414 if (args->count > INT_MAX) 2415 args->count = INT_MAX; 2416 2417 iov.iov_base = args->buf; 2418 iov.iov_len = args->count; 2419 2420 uio.uio_iov = &iov; 2421 uio.uio_iovcnt = 1; 2422 uio.uio_resid = iov.iov_len; 2423 uio.uio_segflg = UIO_USERSPACE; 2424 uio.uio_rw = UIO_READ; 2425 uio.uio_td = td; 2426 2427 error = read_random_uio(&uio, args->flags & LINUX_GRND_NONBLOCK); 2428 if (error == 0) 2429 td->td_retval[0] = args->count - uio.uio_resid; 2430 return (error); 2431 } 2432 2433 int 2434 linux_mincore(struct thread *td, struct linux_mincore_args *args) 2435 { 2436 2437 /* Needs to be page-aligned */ 2438 if (args->start & PAGE_MASK) 2439 return (EINVAL); 2440 return (kern_mincore(td, args->start, args->len, args->vec)); 2441 } 2442 2443 #define SYSLOG_TAG "<6>" 2444 2445 int 2446 linux_syslog(struct thread *td, struct linux_syslog_args *args) 2447 { 2448 char buf[128], *src, *dst; 2449 u_int seq; 2450 int buflen, error; 2451 2452 if (args->type != LINUX_SYSLOG_ACTION_READ_ALL) { 2453 linux_msg(td, "syslog unsupported type 0x%x", args->type); 2454 return (EINVAL); 2455 } 2456 2457 if (args->len < 6) { 2458 td->td_retval[0] = 0; 2459 return (0); 2460 } 2461 2462 error = priv_check(td, PRIV_MSGBUF); 2463 if (error) 2464 return (error); 2465 2466 mtx_lock(&msgbuf_lock); 2467 msgbuf_peekbytes(msgbufp, NULL, 0, &seq); 2468 mtx_unlock(&msgbuf_lock); 2469 2470 dst = args->buf; 2471 error = copyout(&SYSLOG_TAG, dst, sizeof(SYSLOG_TAG)); 2472 /* The -1 is to skip the trailing '\0'. */ 2473 dst += sizeof(SYSLOG_TAG) - 1; 2474 2475 while (error == 0) { 2476 mtx_lock(&msgbuf_lock); 2477 buflen = msgbuf_peekbytes(msgbufp, buf, sizeof(buf), &seq); 2478 mtx_unlock(&msgbuf_lock); 2479 2480 if (buflen == 0) 2481 break; 2482 2483 for (src = buf; src < buf + buflen && error == 0; src++) { 2484 if (*src == '\0') 2485 continue; 2486 2487 if (dst >= args->buf + args->len) 2488 goto out; 2489 2490 error = copyout(src, dst, 1); 2491 dst++; 2492 2493 if (*src == '\n' && *(src + 1) != '<' && 2494 dst + sizeof(SYSLOG_TAG) < args->buf + args->len) { 2495 error = copyout(&SYSLOG_TAG, 2496 dst, sizeof(SYSLOG_TAG)); 2497 dst += sizeof(SYSLOG_TAG) - 1; 2498 } 2499 } 2500 } 2501 out: 2502 td->td_retval[0] = dst - args->buf; 2503 return (error); 2504 } 2505 2506 int 2507 linux_getcpu(struct thread *td, struct linux_getcpu_args *args) 2508 { 2509 int cpu, error, node; 2510 2511 cpu = td->td_oncpu; /* Make sure it doesn't change during copyout(9) */ 2512 error = 0; 2513 node = cpuid_to_pcpu[cpu]->pc_domain; 2514 2515 if (args->cpu != NULL) 2516 error = copyout(&cpu, args->cpu, sizeof(l_int)); 2517 if (args->node != NULL) 2518 error = copyout(&node, args->node, sizeof(l_int)); 2519 return (error); 2520 } 2521 2522 #if defined(__i386__) || defined(__amd64__) 2523 int 2524 linux_poll(struct thread *td, struct linux_poll_args *args) 2525 { 2526 struct timespec ts, *tsp; 2527 2528 if (args->timeout != INFTIM) { 2529 if (args->timeout < 0) 2530 return (EINVAL); 2531 ts.tv_sec = args->timeout / 1000; 2532 ts.tv_nsec = (args->timeout % 1000) * 1000000; 2533 tsp = &ts; 2534 } else 2535 tsp = NULL; 2536 2537 return (linux_common_ppoll(td, args->fds, args->nfds, 2538 tsp, NULL, 0)); 2539 } 2540 #endif /* __i386__ || __amd64__ */ 2541 2542 int 2543 linux_seccomp(struct thread *td, struct linux_seccomp_args *args) 2544 { 2545 2546 switch (args->op) { 2547 case LINUX_SECCOMP_GET_ACTION_AVAIL: 2548 return (EOPNOTSUPP); 2549 default: 2550 /* 2551 * Ignore unknown operations, just like Linux kernel built 2552 * without CONFIG_SECCOMP. 2553 */ 2554 return (EINVAL); 2555 } 2556 } 2557 2558 /* 2559 * Custom version of exec_copyin_args(), to copy out argument and environment 2560 * strings from the old process address space into the temporary string buffer. 2561 * Based on freebsd32_exec_copyin_args. 2562 */ 2563 static int 2564 linux_exec_copyin_args(struct image_args *args, const char *fname, 2565 enum uio_seg segflg, l_uintptr_t *argv, l_uintptr_t *envv) 2566 { 2567 char *argp, *envp; 2568 l_uintptr_t *ptr, arg; 2569 int error; 2570 2571 bzero(args, sizeof(*args)); 2572 if (argv == NULL) 2573 return (EFAULT); 2574 2575 /* 2576 * Allocate demand-paged memory for the file name, argument, and 2577 * environment strings. 2578 */ 2579 error = exec_alloc_args(args); 2580 if (error != 0) 2581 return (error); 2582 2583 /* 2584 * Copy the file name. 2585 */ 2586 error = exec_args_add_fname(args, fname, segflg); 2587 if (error != 0) 2588 goto err_exit; 2589 2590 /* 2591 * extract arguments first 2592 */ 2593 ptr = argv; 2594 for (;;) { 2595 error = copyin(ptr++, &arg, sizeof(arg)); 2596 if (error) 2597 goto err_exit; 2598 if (arg == 0) 2599 break; 2600 argp = PTRIN(arg); 2601 error = exec_args_add_arg(args, argp, UIO_USERSPACE); 2602 if (error != 0) 2603 goto err_exit; 2604 } 2605 2606 /* 2607 * This comment is from Linux do_execveat_common: 2608 * When argv is empty, add an empty string ("") as argv[0] to 2609 * ensure confused userspace programs that start processing 2610 * from argv[1] won't end up walking envp. 2611 */ 2612 if (args->argc == 0 && 2613 (error = exec_args_add_arg(args, "", UIO_SYSSPACE) != 0)) 2614 goto err_exit; 2615 2616 /* 2617 * extract environment strings 2618 */ 2619 if (envv) { 2620 ptr = envv; 2621 for (;;) { 2622 error = copyin(ptr++, &arg, sizeof(arg)); 2623 if (error) 2624 goto err_exit; 2625 if (arg == 0) 2626 break; 2627 envp = PTRIN(arg); 2628 error = exec_args_add_env(args, envp, UIO_USERSPACE); 2629 if (error != 0) 2630 goto err_exit; 2631 } 2632 } 2633 2634 return (0); 2635 2636 err_exit: 2637 exec_free_args(args); 2638 return (error); 2639 } 2640 2641 int 2642 linux_execve(struct thread *td, struct linux_execve_args *args) 2643 { 2644 struct image_args eargs; 2645 int error; 2646 2647 LINUX_CTR(execve); 2648 2649 error = linux_exec_copyin_args(&eargs, args->path, UIO_USERSPACE, 2650 args->argp, args->envp); 2651 if (error == 0) 2652 error = linux_common_execve(td, &eargs); 2653 AUDIT_SYSCALL_EXIT(error == EJUSTRETURN ? 0 : error, td); 2654 return (error); 2655 } 2656