1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2002 Doug Rabson 5 * Copyright (c) 1994-1995 Søren Schmidt 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer 13 * in this position and unchanged. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. The name of the author may not be used to endorse or promote products 18 * derived from this software without specific prior written permission 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 23 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 29 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include "opt_compat.h" 36 37 #include <sys/param.h> 38 #include <sys/blist.h> 39 #include <sys/fcntl.h> 40 #if defined(__i386__) 41 #include <sys/imgact_aout.h> 42 #endif 43 #include <sys/jail.h> 44 #include <sys/kernel.h> 45 #include <sys/limits.h> 46 #include <sys/lock.h> 47 #include <sys/malloc.h> 48 #include <sys/mman.h> 49 #include <sys/mount.h> 50 #include <sys/msgbuf.h> 51 #include <sys/mutex.h> 52 #include <sys/namei.h> 53 #include <sys/priv.h> 54 #include <sys/proc.h> 55 #include <sys/procctl.h> 56 #include <sys/reboot.h> 57 #include <sys/racct.h> 58 #include <sys/random.h> 59 #include <sys/resourcevar.h> 60 #include <sys/sched.h> 61 #include <sys/sdt.h> 62 #include <sys/signalvar.h> 63 #include <sys/stat.h> 64 #include <sys/syscallsubr.h> 65 #include <sys/sysctl.h> 66 #include <sys/sysproto.h> 67 #include <sys/systm.h> 68 #include <sys/time.h> 69 #include <sys/vmmeter.h> 70 #include <sys/vnode.h> 71 #include <sys/wait.h> 72 #include <sys/cpuset.h> 73 #include <sys/uio.h> 74 75 #include <security/mac/mac_framework.h> 76 77 #include <vm/vm.h> 78 #include <vm/pmap.h> 79 #include <vm/vm_kern.h> 80 #include <vm/vm_map.h> 81 #include <vm/vm_extern.h> 82 #include <vm/swap_pager.h> 83 84 #ifdef COMPAT_LINUX32 85 #include <machine/../linux32/linux.h> 86 #include <machine/../linux32/linux32_proto.h> 87 #else 88 #include <machine/../linux/linux.h> 89 #include <machine/../linux/linux_proto.h> 90 #endif 91 92 #include <compat/linux/linux_dtrace.h> 93 #include <compat/linux/linux_file.h> 94 #include <compat/linux/linux_mib.h> 95 #include <compat/linux/linux_signal.h> 96 #include <compat/linux/linux_timer.h> 97 #include <compat/linux/linux_util.h> 98 #include <compat/linux/linux_sysproto.h> 99 #include <compat/linux/linux_emul.h> 100 #include <compat/linux/linux_misc.h> 101 102 int stclohz; /* Statistics clock frequency */ 103 104 static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] = { 105 RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK, 106 RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE, 107 RLIMIT_MEMLOCK, RLIMIT_AS 108 }; 109 110 struct l_sysinfo { 111 l_long uptime; /* Seconds since boot */ 112 l_ulong loads[3]; /* 1, 5, and 15 minute load averages */ 113 #define LINUX_SYSINFO_LOADS_SCALE 65536 114 l_ulong totalram; /* Total usable main memory size */ 115 l_ulong freeram; /* Available memory size */ 116 l_ulong sharedram; /* Amount of shared memory */ 117 l_ulong bufferram; /* Memory used by buffers */ 118 l_ulong totalswap; /* Total swap space size */ 119 l_ulong freeswap; /* swap space still available */ 120 l_ushort procs; /* Number of current processes */ 121 l_ushort pads; 122 l_ulong totalhigh; 123 l_ulong freehigh; 124 l_uint mem_unit; 125 char _f[20-2*sizeof(l_long)-sizeof(l_int)]; /* padding */ 126 }; 127 128 struct l_pselect6arg { 129 l_uintptr_t ss; 130 l_size_t ss_len; 131 }; 132 133 static int linux_utimensat_nsec_valid(l_long); 134 135 int 136 linux_sysinfo(struct thread *td, struct linux_sysinfo_args *args) 137 { 138 struct l_sysinfo sysinfo; 139 int i, j; 140 struct timespec ts; 141 142 bzero(&sysinfo, sizeof(sysinfo)); 143 getnanouptime(&ts); 144 if (ts.tv_nsec != 0) 145 ts.tv_sec++; 146 sysinfo.uptime = ts.tv_sec; 147 148 /* Use the information from the mib to get our load averages */ 149 for (i = 0; i < 3; i++) 150 sysinfo.loads[i] = averunnable.ldavg[i] * 151 LINUX_SYSINFO_LOADS_SCALE / averunnable.fscale; 152 153 sysinfo.totalram = physmem * PAGE_SIZE; 154 sysinfo.freeram = (u_long)vm_free_count() * PAGE_SIZE; 155 156 /* 157 * sharedram counts pages allocated to named, swap-backed objects such 158 * as shared memory segments and tmpfs files. There is no cheap way to 159 * compute this, so just leave the field unpopulated. Linux itself only 160 * started setting this field in the 3.x timeframe. 161 */ 162 sysinfo.sharedram = 0; 163 sysinfo.bufferram = 0; 164 165 swap_pager_status(&i, &j); 166 sysinfo.totalswap = i * PAGE_SIZE; 167 sysinfo.freeswap = (i - j) * PAGE_SIZE; 168 169 sysinfo.procs = nprocs; 170 171 /* 172 * Platforms supported by the emulation layer do not have a notion of 173 * high memory. 174 */ 175 sysinfo.totalhigh = 0; 176 sysinfo.freehigh = 0; 177 178 sysinfo.mem_unit = 1; 179 180 return (copyout(&sysinfo, args->info, sizeof(sysinfo))); 181 } 182 183 #ifdef LINUX_LEGACY_SYSCALLS 184 int 185 linux_alarm(struct thread *td, struct linux_alarm_args *args) 186 { 187 struct itimerval it, old_it; 188 u_int secs; 189 int error; 190 191 secs = args->secs; 192 /* 193 * Linux alarm() is always successful. Limit secs to INT32_MAX / 2 194 * to match kern_setitimer()'s limit to avoid error from it. 195 * 196 * XXX. Linux limit secs to INT_MAX on 32 and does not limit on 64-bit 197 * platforms. 198 */ 199 if (secs > INT32_MAX / 2) 200 secs = INT32_MAX / 2; 201 202 it.it_value.tv_sec = secs; 203 it.it_value.tv_usec = 0; 204 timevalclear(&it.it_interval); 205 error = kern_setitimer(td, ITIMER_REAL, &it, &old_it); 206 KASSERT(error == 0, ("kern_setitimer returns %d", error)); 207 208 if ((old_it.it_value.tv_sec == 0 && old_it.it_value.tv_usec > 0) || 209 old_it.it_value.tv_usec >= 500000) 210 old_it.it_value.tv_sec++; 211 td->td_retval[0] = old_it.it_value.tv_sec; 212 return (0); 213 } 214 #endif 215 216 int 217 linux_brk(struct thread *td, struct linux_brk_args *args) 218 { 219 struct vmspace *vm = td->td_proc->p_vmspace; 220 uintptr_t new, old; 221 222 old = (uintptr_t)vm->vm_daddr + ctob(vm->vm_dsize); 223 new = (uintptr_t)args->dsend; 224 if ((caddr_t)new > vm->vm_daddr && !kern_break(td, &new)) 225 td->td_retval[0] = (register_t)new; 226 else 227 td->td_retval[0] = (register_t)old; 228 229 return (0); 230 } 231 232 #if defined(__i386__) 233 /* XXX: what about amd64/linux32? */ 234 235 int 236 linux_uselib(struct thread *td, struct linux_uselib_args *args) 237 { 238 struct nameidata ni; 239 struct vnode *vp; 240 struct exec *a_out; 241 vm_map_t map; 242 vm_map_entry_t entry; 243 struct vattr attr; 244 vm_offset_t vmaddr; 245 unsigned long file_offset; 246 unsigned long bss_size; 247 char *library; 248 ssize_t aresid; 249 int error; 250 bool locked, opened, textset; 251 252 a_out = NULL; 253 vp = NULL; 254 locked = false; 255 textset = false; 256 opened = false; 257 258 if (!LUSECONVPATH(td)) { 259 NDINIT(&ni, LOOKUP, ISOPEN | FOLLOW | LOCKLEAF | AUDITVNODE1, 260 UIO_USERSPACE, args->library, td); 261 error = namei(&ni); 262 } else { 263 LCONVPATHEXIST(td, args->library, &library); 264 NDINIT(&ni, LOOKUP, ISOPEN | FOLLOW | LOCKLEAF | AUDITVNODE1, 265 UIO_SYSSPACE, library, td); 266 error = namei(&ni); 267 LFREEPATH(library); 268 } 269 if (error) 270 goto cleanup; 271 272 vp = ni.ni_vp; 273 NDFREE(&ni, NDF_ONLY_PNBUF); 274 275 /* 276 * From here on down, we have a locked vnode that must be unlocked. 277 * XXX: The code below largely duplicates exec_check_permissions(). 278 */ 279 locked = true; 280 281 /* Executable? */ 282 error = VOP_GETATTR(vp, &attr, td->td_ucred); 283 if (error) 284 goto cleanup; 285 286 if ((vp->v_mount->mnt_flag & MNT_NOEXEC) || 287 ((attr.va_mode & 0111) == 0) || (attr.va_type != VREG)) { 288 /* EACCESS is what exec(2) returns. */ 289 error = ENOEXEC; 290 goto cleanup; 291 } 292 293 /* Sensible size? */ 294 if (attr.va_size == 0) { 295 error = ENOEXEC; 296 goto cleanup; 297 } 298 299 /* Can we access it? */ 300 error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td); 301 if (error) 302 goto cleanup; 303 304 /* 305 * XXX: This should use vn_open() so that it is properly authorized, 306 * and to reduce code redundancy all over the place here. 307 * XXX: Not really, it duplicates far more of exec_check_permissions() 308 * than vn_open(). 309 */ 310 #ifdef MAC 311 error = mac_vnode_check_open(td->td_ucred, vp, VREAD); 312 if (error) 313 goto cleanup; 314 #endif 315 error = VOP_OPEN(vp, FREAD, td->td_ucred, td, NULL); 316 if (error) 317 goto cleanup; 318 opened = true; 319 320 /* Pull in executable header into exec_map */ 321 error = vm_mmap(exec_map, (vm_offset_t *)&a_out, PAGE_SIZE, 322 VM_PROT_READ, VM_PROT_READ, 0, OBJT_VNODE, vp, 0); 323 if (error) 324 goto cleanup; 325 326 /* Is it a Linux binary ? */ 327 if (((a_out->a_magic >> 16) & 0xff) != 0x64) { 328 error = ENOEXEC; 329 goto cleanup; 330 } 331 332 /* 333 * While we are here, we should REALLY do some more checks 334 */ 335 336 /* Set file/virtual offset based on a.out variant. */ 337 switch ((int)(a_out->a_magic & 0xffff)) { 338 case 0413: /* ZMAGIC */ 339 file_offset = 1024; 340 break; 341 case 0314: /* QMAGIC */ 342 file_offset = 0; 343 break; 344 default: 345 error = ENOEXEC; 346 goto cleanup; 347 } 348 349 bss_size = round_page(a_out->a_bss); 350 351 /* Check various fields in header for validity/bounds. */ 352 if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) { 353 error = ENOEXEC; 354 goto cleanup; 355 } 356 357 /* text + data can't exceed file size */ 358 if (a_out->a_data + a_out->a_text > attr.va_size) { 359 error = EFAULT; 360 goto cleanup; 361 } 362 363 /* 364 * text/data/bss must not exceed limits 365 * XXX - this is not complete. it should check current usage PLUS 366 * the resources needed by this library. 367 */ 368 PROC_LOCK(td->td_proc); 369 if (a_out->a_text > maxtsiz || 370 a_out->a_data + bss_size > lim_cur_proc(td->td_proc, RLIMIT_DATA) || 371 racct_set(td->td_proc, RACCT_DATA, a_out->a_data + 372 bss_size) != 0) { 373 PROC_UNLOCK(td->td_proc); 374 error = ENOMEM; 375 goto cleanup; 376 } 377 PROC_UNLOCK(td->td_proc); 378 379 /* 380 * Prevent more writers. 381 */ 382 error = VOP_SET_TEXT(vp); 383 if (error != 0) 384 goto cleanup; 385 textset = true; 386 387 /* 388 * Lock no longer needed 389 */ 390 locked = false; 391 VOP_UNLOCK(vp); 392 393 /* 394 * Check if file_offset page aligned. Currently we cannot handle 395 * misalinged file offsets, and so we read in the entire image 396 * (what a waste). 397 */ 398 if (file_offset & PAGE_MASK) { 399 /* Map text+data read/write/execute */ 400 401 /* a_entry is the load address and is page aligned */ 402 vmaddr = trunc_page(a_out->a_entry); 403 404 /* get anon user mapping, read+write+execute */ 405 error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0, 406 &vmaddr, a_out->a_text + a_out->a_data, 0, VMFS_NO_SPACE, 407 VM_PROT_ALL, VM_PROT_ALL, 0); 408 if (error) 409 goto cleanup; 410 411 error = vn_rdwr(UIO_READ, vp, (void *)vmaddr, file_offset, 412 a_out->a_text + a_out->a_data, UIO_USERSPACE, 0, 413 td->td_ucred, NOCRED, &aresid, td); 414 if (error != 0) 415 goto cleanup; 416 if (aresid != 0) { 417 error = ENOEXEC; 418 goto cleanup; 419 } 420 } else { 421 /* 422 * for QMAGIC, a_entry is 20 bytes beyond the load address 423 * to skip the executable header 424 */ 425 vmaddr = trunc_page(a_out->a_entry); 426 427 /* 428 * Map it all into the process's space as a single 429 * copy-on-write "data" segment. 430 */ 431 map = &td->td_proc->p_vmspace->vm_map; 432 error = vm_mmap(map, &vmaddr, 433 a_out->a_text + a_out->a_data, VM_PROT_ALL, VM_PROT_ALL, 434 MAP_PRIVATE | MAP_FIXED, OBJT_VNODE, vp, file_offset); 435 if (error) 436 goto cleanup; 437 vm_map_lock(map); 438 if (!vm_map_lookup_entry(map, vmaddr, &entry)) { 439 vm_map_unlock(map); 440 error = EDOOFUS; 441 goto cleanup; 442 } 443 entry->eflags |= MAP_ENTRY_VN_EXEC; 444 vm_map_unlock(map); 445 textset = false; 446 } 447 448 if (bss_size != 0) { 449 /* Calculate BSS start address */ 450 vmaddr = trunc_page(a_out->a_entry) + a_out->a_text + 451 a_out->a_data; 452 453 /* allocate some 'anon' space */ 454 error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0, 455 &vmaddr, bss_size, 0, VMFS_NO_SPACE, VM_PROT_ALL, 456 VM_PROT_ALL, 0); 457 if (error) 458 goto cleanup; 459 } 460 461 cleanup: 462 if (opened) { 463 if (locked) 464 VOP_UNLOCK(vp); 465 locked = false; 466 VOP_CLOSE(vp, FREAD, td->td_ucred, td); 467 } 468 if (textset) { 469 if (!locked) { 470 locked = true; 471 VOP_LOCK(vp, LK_SHARED | LK_RETRY); 472 } 473 VOP_UNSET_TEXT_CHECKED(vp); 474 } 475 if (locked) 476 VOP_UNLOCK(vp); 477 478 /* Release the temporary mapping. */ 479 if (a_out) 480 kmap_free_wakeup(exec_map, (vm_offset_t)a_out, PAGE_SIZE); 481 482 return (error); 483 } 484 485 #endif /* __i386__ */ 486 487 #ifdef LINUX_LEGACY_SYSCALLS 488 int 489 linux_select(struct thread *td, struct linux_select_args *args) 490 { 491 l_timeval ltv; 492 struct timeval tv0, tv1, utv, *tvp; 493 int error; 494 495 /* 496 * Store current time for computation of the amount of 497 * time left. 498 */ 499 if (args->timeout) { 500 if ((error = copyin(args->timeout, <v, sizeof(ltv)))) 501 goto select_out; 502 utv.tv_sec = ltv.tv_sec; 503 utv.tv_usec = ltv.tv_usec; 504 505 if (itimerfix(&utv)) { 506 /* 507 * The timeval was invalid. Convert it to something 508 * valid that will act as it does under Linux. 509 */ 510 utv.tv_sec += utv.tv_usec / 1000000; 511 utv.tv_usec %= 1000000; 512 if (utv.tv_usec < 0) { 513 utv.tv_sec -= 1; 514 utv.tv_usec += 1000000; 515 } 516 if (utv.tv_sec < 0) 517 timevalclear(&utv); 518 } 519 microtime(&tv0); 520 tvp = &utv; 521 } else 522 tvp = NULL; 523 524 error = kern_select(td, args->nfds, args->readfds, args->writefds, 525 args->exceptfds, tvp, LINUX_NFDBITS); 526 if (error) 527 goto select_out; 528 529 if (args->timeout) { 530 if (td->td_retval[0]) { 531 /* 532 * Compute how much time was left of the timeout, 533 * by subtracting the current time and the time 534 * before we started the call, and subtracting 535 * that result from the user-supplied value. 536 */ 537 microtime(&tv1); 538 timevalsub(&tv1, &tv0); 539 timevalsub(&utv, &tv1); 540 if (utv.tv_sec < 0) 541 timevalclear(&utv); 542 } else 543 timevalclear(&utv); 544 ltv.tv_sec = utv.tv_sec; 545 ltv.tv_usec = utv.tv_usec; 546 if ((error = copyout(<v, args->timeout, sizeof(ltv)))) 547 goto select_out; 548 } 549 550 select_out: 551 return (error); 552 } 553 #endif 554 555 int 556 linux_mremap(struct thread *td, struct linux_mremap_args *args) 557 { 558 uintptr_t addr; 559 size_t len; 560 int error = 0; 561 562 if (args->flags & ~(LINUX_MREMAP_FIXED | LINUX_MREMAP_MAYMOVE)) { 563 td->td_retval[0] = 0; 564 return (EINVAL); 565 } 566 567 /* 568 * Check for the page alignment. 569 * Linux defines PAGE_MASK to be FreeBSD ~PAGE_MASK. 570 */ 571 if (args->addr & PAGE_MASK) { 572 td->td_retval[0] = 0; 573 return (EINVAL); 574 } 575 576 args->new_len = round_page(args->new_len); 577 args->old_len = round_page(args->old_len); 578 579 if (args->new_len > args->old_len) { 580 td->td_retval[0] = 0; 581 return (ENOMEM); 582 } 583 584 if (args->new_len < args->old_len) { 585 addr = args->addr + args->new_len; 586 len = args->old_len - args->new_len; 587 error = kern_munmap(td, addr, len); 588 } 589 590 td->td_retval[0] = error ? 0 : (uintptr_t)args->addr; 591 return (error); 592 } 593 594 #define LINUX_MS_ASYNC 0x0001 595 #define LINUX_MS_INVALIDATE 0x0002 596 #define LINUX_MS_SYNC 0x0004 597 598 int 599 linux_msync(struct thread *td, struct linux_msync_args *args) 600 { 601 602 return (kern_msync(td, args->addr, args->len, 603 args->fl & ~LINUX_MS_SYNC)); 604 } 605 606 #ifdef LINUX_LEGACY_SYSCALLS 607 int 608 linux_time(struct thread *td, struct linux_time_args *args) 609 { 610 struct timeval tv; 611 l_time_t tm; 612 int error; 613 614 microtime(&tv); 615 tm = tv.tv_sec; 616 if (args->tm && (error = copyout(&tm, args->tm, sizeof(tm)))) 617 return (error); 618 td->td_retval[0] = tm; 619 return (0); 620 } 621 #endif 622 623 struct l_times_argv { 624 l_clock_t tms_utime; 625 l_clock_t tms_stime; 626 l_clock_t tms_cutime; 627 l_clock_t tms_cstime; 628 }; 629 630 /* 631 * Glibc versions prior to 2.2.1 always use hard-coded CLK_TCK value. 632 * Since 2.2.1 Glibc uses value exported from kernel via AT_CLKTCK 633 * auxiliary vector entry. 634 */ 635 #define CLK_TCK 100 636 637 #define CONVOTCK(r) (r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK)) 638 #define CONVNTCK(r) (r.tv_sec * stclohz + r.tv_usec / (1000000 / stclohz)) 639 640 #define CONVTCK(r) (linux_kernver(td) >= LINUX_KERNVER_2004000 ? \ 641 CONVNTCK(r) : CONVOTCK(r)) 642 643 int 644 linux_times(struct thread *td, struct linux_times_args *args) 645 { 646 struct timeval tv, utime, stime, cutime, cstime; 647 struct l_times_argv tms; 648 struct proc *p; 649 int error; 650 651 if (args->buf != NULL) { 652 p = td->td_proc; 653 PROC_LOCK(p); 654 PROC_STATLOCK(p); 655 calcru(p, &utime, &stime); 656 PROC_STATUNLOCK(p); 657 calccru(p, &cutime, &cstime); 658 PROC_UNLOCK(p); 659 660 tms.tms_utime = CONVTCK(utime); 661 tms.tms_stime = CONVTCK(stime); 662 663 tms.tms_cutime = CONVTCK(cutime); 664 tms.tms_cstime = CONVTCK(cstime); 665 666 if ((error = copyout(&tms, args->buf, sizeof(tms)))) 667 return (error); 668 } 669 670 microuptime(&tv); 671 td->td_retval[0] = (int)CONVTCK(tv); 672 return (0); 673 } 674 675 int 676 linux_newuname(struct thread *td, struct linux_newuname_args *args) 677 { 678 struct l_new_utsname utsname; 679 char osname[LINUX_MAX_UTSNAME]; 680 char osrelease[LINUX_MAX_UTSNAME]; 681 char *p; 682 683 linux_get_osname(td, osname); 684 linux_get_osrelease(td, osrelease); 685 686 bzero(&utsname, sizeof(utsname)); 687 strlcpy(utsname.sysname, osname, LINUX_MAX_UTSNAME); 688 getcredhostname(td->td_ucred, utsname.nodename, LINUX_MAX_UTSNAME); 689 getcreddomainname(td->td_ucred, utsname.domainname, LINUX_MAX_UTSNAME); 690 strlcpy(utsname.release, osrelease, LINUX_MAX_UTSNAME); 691 strlcpy(utsname.version, version, LINUX_MAX_UTSNAME); 692 for (p = utsname.version; *p != '\0'; ++p) 693 if (*p == '\n') { 694 *p = '\0'; 695 break; 696 } 697 #if defined(__amd64__) 698 /* 699 * On amd64, Linux uname(2) needs to return "x86_64" 700 * for both 64-bit and 32-bit applications. On 32-bit, 701 * the string returned by getauxval(AT_PLATFORM) needs 702 * to remain "i686", though. 703 */ 704 strlcpy(utsname.machine, "x86_64", LINUX_MAX_UTSNAME); 705 #else 706 strlcpy(utsname.machine, linux_kplatform, LINUX_MAX_UTSNAME); 707 #endif 708 709 return (copyout(&utsname, args->buf, sizeof(utsname))); 710 } 711 712 struct l_utimbuf { 713 l_time_t l_actime; 714 l_time_t l_modtime; 715 }; 716 717 #ifdef LINUX_LEGACY_SYSCALLS 718 int 719 linux_utime(struct thread *td, struct linux_utime_args *args) 720 { 721 struct timeval tv[2], *tvp; 722 struct l_utimbuf lut; 723 char *fname; 724 int error; 725 726 if (args->times) { 727 if ((error = copyin(args->times, &lut, sizeof lut)) != 0) 728 return (error); 729 tv[0].tv_sec = lut.l_actime; 730 tv[0].tv_usec = 0; 731 tv[1].tv_sec = lut.l_modtime; 732 tv[1].tv_usec = 0; 733 tvp = tv; 734 } else 735 tvp = NULL; 736 737 if (!LUSECONVPATH(td)) { 738 error = kern_utimesat(td, AT_FDCWD, args->fname, UIO_USERSPACE, 739 tvp, UIO_SYSSPACE); 740 } else { 741 LCONVPATHEXIST(td, args->fname, &fname); 742 error = kern_utimesat(td, AT_FDCWD, fname, UIO_SYSSPACE, tvp, 743 UIO_SYSSPACE); 744 LFREEPATH(fname); 745 } 746 return (error); 747 } 748 #endif 749 750 #ifdef LINUX_LEGACY_SYSCALLS 751 int 752 linux_utimes(struct thread *td, struct linux_utimes_args *args) 753 { 754 l_timeval ltv[2]; 755 struct timeval tv[2], *tvp = NULL; 756 char *fname; 757 int error; 758 759 if (args->tptr != NULL) { 760 if ((error = copyin(args->tptr, ltv, sizeof ltv)) != 0) 761 return (error); 762 tv[0].tv_sec = ltv[0].tv_sec; 763 tv[0].tv_usec = ltv[0].tv_usec; 764 tv[1].tv_sec = ltv[1].tv_sec; 765 tv[1].tv_usec = ltv[1].tv_usec; 766 tvp = tv; 767 } 768 769 if (!LUSECONVPATH(td)) { 770 error = kern_utimesat(td, AT_FDCWD, args->fname, UIO_USERSPACE, 771 tvp, UIO_SYSSPACE); 772 } else { 773 LCONVPATHEXIST(td, args->fname, &fname); 774 error = kern_utimesat(td, AT_FDCWD, fname, UIO_SYSSPACE, 775 tvp, UIO_SYSSPACE); 776 LFREEPATH(fname); 777 } 778 return (error); 779 } 780 #endif 781 782 static int 783 linux_utimensat_nsec_valid(l_long nsec) 784 { 785 786 if (nsec == LINUX_UTIME_OMIT || nsec == LINUX_UTIME_NOW) 787 return (0); 788 if (nsec >= 0 && nsec <= 999999999) 789 return (0); 790 return (1); 791 } 792 793 int 794 linux_utimensat(struct thread *td, struct linux_utimensat_args *args) 795 { 796 struct l_timespec l_times[2]; 797 struct timespec times[2], *timesp = NULL; 798 char *path = NULL; 799 int error, dfd, flags = 0; 800 801 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; 802 803 if (args->flags & ~(LINUX_AT_SYMLINK_NOFOLLOW | LINUX_AT_EMPTY_PATH)) 804 return (EINVAL); 805 806 if (args->times != NULL) { 807 error = copyin(args->times, l_times, sizeof(l_times)); 808 if (error != 0) 809 return (error); 810 811 if (linux_utimensat_nsec_valid(l_times[0].tv_nsec) != 0 || 812 linux_utimensat_nsec_valid(l_times[1].tv_nsec) != 0) 813 return (EINVAL); 814 815 times[0].tv_sec = l_times[0].tv_sec; 816 switch (l_times[0].tv_nsec) 817 { 818 case LINUX_UTIME_OMIT: 819 times[0].tv_nsec = UTIME_OMIT; 820 break; 821 case LINUX_UTIME_NOW: 822 times[0].tv_nsec = UTIME_NOW; 823 break; 824 default: 825 times[0].tv_nsec = l_times[0].tv_nsec; 826 } 827 828 times[1].tv_sec = l_times[1].tv_sec; 829 switch (l_times[1].tv_nsec) 830 { 831 case LINUX_UTIME_OMIT: 832 times[1].tv_nsec = UTIME_OMIT; 833 break; 834 case LINUX_UTIME_NOW: 835 times[1].tv_nsec = UTIME_NOW; 836 break; 837 default: 838 times[1].tv_nsec = l_times[1].tv_nsec; 839 break; 840 } 841 timesp = times; 842 843 /* This breaks POSIX, but is what the Linux kernel does 844 * _on purpose_ (documented in the man page for utimensat(2)), 845 * so we must follow that behaviour. */ 846 if (times[0].tv_nsec == UTIME_OMIT && 847 times[1].tv_nsec == UTIME_OMIT) 848 return (0); 849 } 850 851 if (args->flags & LINUX_AT_SYMLINK_NOFOLLOW) 852 flags |= AT_SYMLINK_NOFOLLOW; 853 if (args->flags & LINUX_AT_EMPTY_PATH) 854 flags |= AT_EMPTY_PATH; 855 856 if (!LUSECONVPATH(td)) { 857 if (args->pathname != NULL) { 858 return (kern_utimensat(td, dfd, args->pathname, 859 UIO_USERSPACE, timesp, UIO_SYSSPACE, flags)); 860 } 861 } 862 863 if (args->pathname != NULL) 864 LCONVPATHEXIST_AT(td, args->pathname, &path, dfd); 865 else if (args->flags != 0) 866 return (EINVAL); 867 868 if (path == NULL) 869 error = kern_futimens(td, dfd, timesp, UIO_SYSSPACE); 870 else { 871 error = kern_utimensat(td, dfd, path, UIO_SYSSPACE, timesp, 872 UIO_SYSSPACE, flags); 873 LFREEPATH(path); 874 } 875 876 return (error); 877 } 878 879 #ifdef LINUX_LEGACY_SYSCALLS 880 int 881 linux_futimesat(struct thread *td, struct linux_futimesat_args *args) 882 { 883 l_timeval ltv[2]; 884 struct timeval tv[2], *tvp = NULL; 885 char *fname; 886 int error, dfd; 887 888 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; 889 890 if (args->utimes != NULL) { 891 if ((error = copyin(args->utimes, ltv, sizeof ltv)) != 0) 892 return (error); 893 tv[0].tv_sec = ltv[0].tv_sec; 894 tv[0].tv_usec = ltv[0].tv_usec; 895 tv[1].tv_sec = ltv[1].tv_sec; 896 tv[1].tv_usec = ltv[1].tv_usec; 897 tvp = tv; 898 } 899 900 if (!LUSECONVPATH(td)) { 901 error = kern_utimesat(td, dfd, args->filename, UIO_USERSPACE, 902 tvp, UIO_SYSSPACE); 903 } else { 904 LCONVPATHEXIST_AT(td, args->filename, &fname, dfd); 905 error = kern_utimesat(td, dfd, fname, UIO_SYSSPACE, 906 tvp, UIO_SYSSPACE); 907 LFREEPATH(fname); 908 } 909 return (error); 910 } 911 #endif 912 913 static int 914 linux_common_wait(struct thread *td, int pid, int *statusp, 915 int options, struct __wrusage *wrup) 916 { 917 siginfo_t siginfo; 918 idtype_t idtype; 919 id_t id; 920 int error, status, tmpstat; 921 922 if (pid == WAIT_ANY) { 923 idtype = P_ALL; 924 id = 0; 925 } else if (pid < 0) { 926 idtype = P_PGID; 927 id = (id_t)-pid; 928 } else { 929 idtype = P_PID; 930 id = (id_t)pid; 931 } 932 933 /* 934 * For backward compatibility we implicitly add flags WEXITED 935 * and WTRAPPED here. 936 */ 937 options |= WEXITED | WTRAPPED; 938 error = kern_wait6(td, idtype, id, &status, options, wrup, &siginfo); 939 if (error) 940 return (error); 941 942 if (statusp) { 943 tmpstat = status & 0xffff; 944 if (WIFSIGNALED(tmpstat)) { 945 tmpstat = (tmpstat & 0xffffff80) | 946 bsd_to_linux_signal(WTERMSIG(tmpstat)); 947 } else if (WIFSTOPPED(tmpstat)) { 948 tmpstat = (tmpstat & 0xffff00ff) | 949 (bsd_to_linux_signal(WSTOPSIG(tmpstat)) << 8); 950 #if defined(__amd64__) && !defined(COMPAT_LINUX32) 951 if (WSTOPSIG(status) == SIGTRAP) { 952 tmpstat = linux_ptrace_status(td, 953 siginfo.si_pid, tmpstat); 954 } 955 #endif 956 } else if (WIFCONTINUED(tmpstat)) { 957 tmpstat = 0xffff; 958 } 959 error = copyout(&tmpstat, statusp, sizeof(int)); 960 } 961 962 return (error); 963 } 964 965 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 966 int 967 linux_waitpid(struct thread *td, struct linux_waitpid_args *args) 968 { 969 struct linux_wait4_args wait4_args; 970 971 wait4_args.pid = args->pid; 972 wait4_args.status = args->status; 973 wait4_args.options = args->options; 974 wait4_args.rusage = NULL; 975 976 return (linux_wait4(td, &wait4_args)); 977 } 978 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 979 980 int 981 linux_wait4(struct thread *td, struct linux_wait4_args *args) 982 { 983 int error, options; 984 struct __wrusage wru, *wrup; 985 986 if (args->options & ~(LINUX_WUNTRACED | LINUX_WNOHANG | 987 LINUX_WCONTINUED | __WCLONE | __WNOTHREAD | __WALL)) 988 return (EINVAL); 989 990 options = WEXITED; 991 linux_to_bsd_waitopts(args->options, &options); 992 993 if (args->rusage != NULL) 994 wrup = &wru; 995 else 996 wrup = NULL; 997 error = linux_common_wait(td, args->pid, args->status, options, wrup); 998 if (error != 0) 999 return (error); 1000 if (args->rusage != NULL) 1001 error = linux_copyout_rusage(&wru.wru_self, args->rusage); 1002 return (error); 1003 } 1004 1005 int 1006 linux_waitid(struct thread *td, struct linux_waitid_args *args) 1007 { 1008 int status, options, sig; 1009 struct __wrusage wru; 1010 siginfo_t siginfo; 1011 l_siginfo_t lsi; 1012 idtype_t idtype; 1013 struct proc *p; 1014 int error; 1015 1016 options = 0; 1017 linux_to_bsd_waitopts(args->options, &options); 1018 1019 if (options & ~(WNOHANG | WNOWAIT | WEXITED | WUNTRACED | WCONTINUED)) 1020 return (EINVAL); 1021 if (!(options & (WEXITED | WUNTRACED | WCONTINUED))) 1022 return (EINVAL); 1023 1024 switch (args->idtype) { 1025 case LINUX_P_ALL: 1026 idtype = P_ALL; 1027 break; 1028 case LINUX_P_PID: 1029 if (args->id <= 0) 1030 return (EINVAL); 1031 idtype = P_PID; 1032 break; 1033 case LINUX_P_PGID: 1034 if (args->id <= 0) 1035 return (EINVAL); 1036 idtype = P_PGID; 1037 break; 1038 default: 1039 return (EINVAL); 1040 } 1041 1042 error = kern_wait6(td, idtype, args->id, &status, options, 1043 &wru, &siginfo); 1044 if (error != 0) 1045 return (error); 1046 if (args->rusage != NULL) { 1047 error = linux_copyout_rusage(&wru.wru_children, 1048 args->rusage); 1049 if (error != 0) 1050 return (error); 1051 } 1052 if (args->info != NULL) { 1053 p = td->td_proc; 1054 bzero(&lsi, sizeof(lsi)); 1055 if (td->td_retval[0] != 0) { 1056 sig = bsd_to_linux_signal(siginfo.si_signo); 1057 siginfo_to_lsiginfo(&siginfo, &lsi, sig); 1058 } 1059 error = copyout(&lsi, args->info, sizeof(lsi)); 1060 } 1061 td->td_retval[0] = 0; 1062 1063 return (error); 1064 } 1065 1066 #ifdef LINUX_LEGACY_SYSCALLS 1067 int 1068 linux_mknod(struct thread *td, struct linux_mknod_args *args) 1069 { 1070 char *path; 1071 int error; 1072 enum uio_seg seg; 1073 bool convpath; 1074 1075 convpath = LUSECONVPATH(td); 1076 if (!convpath) { 1077 path = args->path; 1078 seg = UIO_USERSPACE; 1079 } else { 1080 LCONVPATHCREAT(td, args->path, &path); 1081 seg = UIO_SYSSPACE; 1082 } 1083 1084 switch (args->mode & S_IFMT) { 1085 case S_IFIFO: 1086 case S_IFSOCK: 1087 error = kern_mkfifoat(td, AT_FDCWD, path, seg, 1088 args->mode); 1089 break; 1090 1091 case S_IFCHR: 1092 case S_IFBLK: 1093 error = kern_mknodat(td, AT_FDCWD, path, seg, 1094 args->mode, args->dev); 1095 break; 1096 1097 case S_IFDIR: 1098 error = EPERM; 1099 break; 1100 1101 case 0: 1102 args->mode |= S_IFREG; 1103 /* FALLTHROUGH */ 1104 case S_IFREG: 1105 error = kern_openat(td, AT_FDCWD, path, seg, 1106 O_WRONLY | O_CREAT | O_TRUNC, args->mode); 1107 if (error == 0) 1108 kern_close(td, td->td_retval[0]); 1109 break; 1110 1111 default: 1112 error = EINVAL; 1113 break; 1114 } 1115 if (convpath) 1116 LFREEPATH(path); 1117 return (error); 1118 } 1119 #endif 1120 1121 int 1122 linux_mknodat(struct thread *td, struct linux_mknodat_args *args) 1123 { 1124 char *path; 1125 int error, dfd; 1126 enum uio_seg seg; 1127 bool convpath; 1128 1129 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; 1130 1131 convpath = LUSECONVPATH(td); 1132 if (!convpath) { 1133 path = __DECONST(char *, args->filename); 1134 seg = UIO_USERSPACE; 1135 } else { 1136 LCONVPATHCREAT_AT(td, args->filename, &path, dfd); 1137 seg = UIO_SYSSPACE; 1138 } 1139 1140 switch (args->mode & S_IFMT) { 1141 case S_IFIFO: 1142 case S_IFSOCK: 1143 error = kern_mkfifoat(td, dfd, path, seg, args->mode); 1144 break; 1145 1146 case S_IFCHR: 1147 case S_IFBLK: 1148 error = kern_mknodat(td, dfd, path, seg, args->mode, 1149 args->dev); 1150 break; 1151 1152 case S_IFDIR: 1153 error = EPERM; 1154 break; 1155 1156 case 0: 1157 args->mode |= S_IFREG; 1158 /* FALLTHROUGH */ 1159 case S_IFREG: 1160 error = kern_openat(td, dfd, path, seg, 1161 O_WRONLY | O_CREAT | O_TRUNC, args->mode); 1162 if (error == 0) 1163 kern_close(td, td->td_retval[0]); 1164 break; 1165 1166 default: 1167 error = EINVAL; 1168 break; 1169 } 1170 if (convpath) 1171 LFREEPATH(path); 1172 return (error); 1173 } 1174 1175 /* 1176 * UGH! This is just about the dumbest idea I've ever heard!! 1177 */ 1178 int 1179 linux_personality(struct thread *td, struct linux_personality_args *args) 1180 { 1181 struct linux_pemuldata *pem; 1182 struct proc *p = td->td_proc; 1183 uint32_t old; 1184 1185 PROC_LOCK(p); 1186 pem = pem_find(p); 1187 old = pem->persona; 1188 if (args->per != 0xffffffff) 1189 pem->persona = args->per; 1190 PROC_UNLOCK(p); 1191 1192 td->td_retval[0] = old; 1193 return (0); 1194 } 1195 1196 struct l_itimerval { 1197 l_timeval it_interval; 1198 l_timeval it_value; 1199 }; 1200 1201 #define B2L_ITIMERVAL(bip, lip) \ 1202 (bip)->it_interval.tv_sec = (lip)->it_interval.tv_sec; \ 1203 (bip)->it_interval.tv_usec = (lip)->it_interval.tv_usec; \ 1204 (bip)->it_value.tv_sec = (lip)->it_value.tv_sec; \ 1205 (bip)->it_value.tv_usec = (lip)->it_value.tv_usec; 1206 1207 int 1208 linux_setitimer(struct thread *td, struct linux_setitimer_args *uap) 1209 { 1210 int error; 1211 struct l_itimerval ls; 1212 struct itimerval aitv, oitv; 1213 1214 if (uap->itv == NULL) { 1215 uap->itv = uap->oitv; 1216 return (linux_getitimer(td, (struct linux_getitimer_args *)uap)); 1217 } 1218 1219 error = copyin(uap->itv, &ls, sizeof(ls)); 1220 if (error != 0) 1221 return (error); 1222 B2L_ITIMERVAL(&aitv, &ls); 1223 error = kern_setitimer(td, uap->which, &aitv, &oitv); 1224 if (error != 0 || uap->oitv == NULL) 1225 return (error); 1226 B2L_ITIMERVAL(&ls, &oitv); 1227 1228 return (copyout(&ls, uap->oitv, sizeof(ls))); 1229 } 1230 1231 int 1232 linux_getitimer(struct thread *td, struct linux_getitimer_args *uap) 1233 { 1234 int error; 1235 struct l_itimerval ls; 1236 struct itimerval aitv; 1237 1238 error = kern_getitimer(td, uap->which, &aitv); 1239 if (error != 0) 1240 return (error); 1241 B2L_ITIMERVAL(&ls, &aitv); 1242 return (copyout(&ls, uap->itv, sizeof(ls))); 1243 } 1244 1245 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 1246 int 1247 linux_nice(struct thread *td, struct linux_nice_args *args) 1248 { 1249 1250 return (kern_setpriority(td, PRIO_PROCESS, 0, args->inc)); 1251 } 1252 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 1253 1254 int 1255 linux_setgroups(struct thread *td, struct linux_setgroups_args *args) 1256 { 1257 struct ucred *newcred, *oldcred; 1258 l_gid_t *linux_gidset; 1259 gid_t *bsd_gidset; 1260 int ngrp, error; 1261 struct proc *p; 1262 1263 ngrp = args->gidsetsize; 1264 if (ngrp < 0 || ngrp >= ngroups_max + 1) 1265 return (EINVAL); 1266 linux_gidset = malloc(ngrp * sizeof(*linux_gidset), M_LINUX, M_WAITOK); 1267 error = copyin(args->grouplist, linux_gidset, ngrp * sizeof(l_gid_t)); 1268 if (error) 1269 goto out; 1270 newcred = crget(); 1271 crextend(newcred, ngrp + 1); 1272 p = td->td_proc; 1273 PROC_LOCK(p); 1274 oldcred = p->p_ucred; 1275 crcopy(newcred, oldcred); 1276 1277 /* 1278 * cr_groups[0] holds egid. Setting the whole set from 1279 * the supplied set will cause egid to be changed too. 1280 * Keep cr_groups[0] unchanged to prevent that. 1281 */ 1282 1283 if ((error = priv_check_cred(oldcred, PRIV_CRED_SETGROUPS)) != 0) { 1284 PROC_UNLOCK(p); 1285 crfree(newcred); 1286 goto out; 1287 } 1288 1289 if (ngrp > 0) { 1290 newcred->cr_ngroups = ngrp + 1; 1291 1292 bsd_gidset = newcred->cr_groups; 1293 ngrp--; 1294 while (ngrp >= 0) { 1295 bsd_gidset[ngrp + 1] = linux_gidset[ngrp]; 1296 ngrp--; 1297 } 1298 } else 1299 newcred->cr_ngroups = 1; 1300 1301 setsugid(p); 1302 proc_set_cred(p, newcred); 1303 PROC_UNLOCK(p); 1304 crfree(oldcred); 1305 error = 0; 1306 out: 1307 free(linux_gidset, M_LINUX); 1308 return (error); 1309 } 1310 1311 int 1312 linux_getgroups(struct thread *td, struct linux_getgroups_args *args) 1313 { 1314 struct ucred *cred; 1315 l_gid_t *linux_gidset; 1316 gid_t *bsd_gidset; 1317 int bsd_gidsetsz, ngrp, error; 1318 1319 cred = td->td_ucred; 1320 bsd_gidset = cred->cr_groups; 1321 bsd_gidsetsz = cred->cr_ngroups - 1; 1322 1323 /* 1324 * cr_groups[0] holds egid. Returning the whole set 1325 * here will cause a duplicate. Exclude cr_groups[0] 1326 * to prevent that. 1327 */ 1328 1329 if ((ngrp = args->gidsetsize) == 0) { 1330 td->td_retval[0] = bsd_gidsetsz; 1331 return (0); 1332 } 1333 1334 if (ngrp < bsd_gidsetsz) 1335 return (EINVAL); 1336 1337 ngrp = 0; 1338 linux_gidset = malloc(bsd_gidsetsz * sizeof(*linux_gidset), 1339 M_LINUX, M_WAITOK); 1340 while (ngrp < bsd_gidsetsz) { 1341 linux_gidset[ngrp] = bsd_gidset[ngrp + 1]; 1342 ngrp++; 1343 } 1344 1345 error = copyout(linux_gidset, args->grouplist, ngrp * sizeof(l_gid_t)); 1346 free(linux_gidset, M_LINUX); 1347 if (error) 1348 return (error); 1349 1350 td->td_retval[0] = ngrp; 1351 return (0); 1352 } 1353 1354 static bool 1355 linux_get_dummy_limit(l_uint resource, struct rlimit *rlim) 1356 { 1357 1358 if (linux_dummy_rlimits == 0) 1359 return (false); 1360 1361 switch (resource) { 1362 case LINUX_RLIMIT_LOCKS: 1363 case LINUX_RLIMIT_SIGPENDING: 1364 case LINUX_RLIMIT_MSGQUEUE: 1365 case LINUX_RLIMIT_RTTIME: 1366 rlim->rlim_cur = LINUX_RLIM_INFINITY; 1367 rlim->rlim_max = LINUX_RLIM_INFINITY; 1368 return (true); 1369 case LINUX_RLIMIT_NICE: 1370 case LINUX_RLIMIT_RTPRIO: 1371 rlim->rlim_cur = 0; 1372 rlim->rlim_max = 0; 1373 return (true); 1374 default: 1375 return (false); 1376 } 1377 } 1378 1379 int 1380 linux_setrlimit(struct thread *td, struct linux_setrlimit_args *args) 1381 { 1382 struct rlimit bsd_rlim; 1383 struct l_rlimit rlim; 1384 u_int which; 1385 int error; 1386 1387 if (args->resource >= LINUX_RLIM_NLIMITS) 1388 return (EINVAL); 1389 1390 which = linux_to_bsd_resource[args->resource]; 1391 if (which == -1) 1392 return (EINVAL); 1393 1394 error = copyin(args->rlim, &rlim, sizeof(rlim)); 1395 if (error) 1396 return (error); 1397 1398 bsd_rlim.rlim_cur = (rlim_t)rlim.rlim_cur; 1399 bsd_rlim.rlim_max = (rlim_t)rlim.rlim_max; 1400 return (kern_setrlimit(td, which, &bsd_rlim)); 1401 } 1402 1403 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 1404 int 1405 linux_old_getrlimit(struct thread *td, struct linux_old_getrlimit_args *args) 1406 { 1407 struct l_rlimit rlim; 1408 struct rlimit bsd_rlim; 1409 u_int which; 1410 1411 if (linux_get_dummy_limit(args->resource, &bsd_rlim)) { 1412 rlim.rlim_cur = bsd_rlim.rlim_cur; 1413 rlim.rlim_max = bsd_rlim.rlim_max; 1414 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1415 } 1416 1417 if (args->resource >= LINUX_RLIM_NLIMITS) 1418 return (EINVAL); 1419 1420 which = linux_to_bsd_resource[args->resource]; 1421 if (which == -1) 1422 return (EINVAL); 1423 1424 lim_rlimit(td, which, &bsd_rlim); 1425 1426 #ifdef COMPAT_LINUX32 1427 rlim.rlim_cur = (unsigned int)bsd_rlim.rlim_cur; 1428 if (rlim.rlim_cur == UINT_MAX) 1429 rlim.rlim_cur = INT_MAX; 1430 rlim.rlim_max = (unsigned int)bsd_rlim.rlim_max; 1431 if (rlim.rlim_max == UINT_MAX) 1432 rlim.rlim_max = INT_MAX; 1433 #else 1434 rlim.rlim_cur = (unsigned long)bsd_rlim.rlim_cur; 1435 if (rlim.rlim_cur == ULONG_MAX) 1436 rlim.rlim_cur = LONG_MAX; 1437 rlim.rlim_max = (unsigned long)bsd_rlim.rlim_max; 1438 if (rlim.rlim_max == ULONG_MAX) 1439 rlim.rlim_max = LONG_MAX; 1440 #endif 1441 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1442 } 1443 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 1444 1445 int 1446 linux_getrlimit(struct thread *td, struct linux_getrlimit_args *args) 1447 { 1448 struct l_rlimit rlim; 1449 struct rlimit bsd_rlim; 1450 u_int which; 1451 1452 if (linux_get_dummy_limit(args->resource, &bsd_rlim)) { 1453 rlim.rlim_cur = bsd_rlim.rlim_cur; 1454 rlim.rlim_max = bsd_rlim.rlim_max; 1455 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1456 } 1457 1458 if (args->resource >= LINUX_RLIM_NLIMITS) 1459 return (EINVAL); 1460 1461 which = linux_to_bsd_resource[args->resource]; 1462 if (which == -1) 1463 return (EINVAL); 1464 1465 lim_rlimit(td, which, &bsd_rlim); 1466 1467 rlim.rlim_cur = (l_ulong)bsd_rlim.rlim_cur; 1468 rlim.rlim_max = (l_ulong)bsd_rlim.rlim_max; 1469 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1470 } 1471 1472 int 1473 linux_sched_setscheduler(struct thread *td, 1474 struct linux_sched_setscheduler_args *args) 1475 { 1476 struct sched_param sched_param; 1477 struct thread *tdt; 1478 int error, policy; 1479 1480 switch (args->policy) { 1481 case LINUX_SCHED_OTHER: 1482 policy = SCHED_OTHER; 1483 break; 1484 case LINUX_SCHED_FIFO: 1485 policy = SCHED_FIFO; 1486 break; 1487 case LINUX_SCHED_RR: 1488 policy = SCHED_RR; 1489 break; 1490 default: 1491 return (EINVAL); 1492 } 1493 1494 error = copyin(args->param, &sched_param, sizeof(sched_param)); 1495 if (error) 1496 return (error); 1497 1498 if (linux_map_sched_prio) { 1499 switch (policy) { 1500 case SCHED_OTHER: 1501 if (sched_param.sched_priority != 0) 1502 return (EINVAL); 1503 1504 sched_param.sched_priority = 1505 PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE; 1506 break; 1507 case SCHED_FIFO: 1508 case SCHED_RR: 1509 if (sched_param.sched_priority < 1 || 1510 sched_param.sched_priority >= LINUX_MAX_RT_PRIO) 1511 return (EINVAL); 1512 1513 /* 1514 * Map [1, LINUX_MAX_RT_PRIO - 1] to 1515 * [0, RTP_PRIO_MAX - RTP_PRIO_MIN] (rounding down). 1516 */ 1517 sched_param.sched_priority = 1518 (sched_param.sched_priority - 1) * 1519 (RTP_PRIO_MAX - RTP_PRIO_MIN + 1) / 1520 (LINUX_MAX_RT_PRIO - 1); 1521 break; 1522 } 1523 } 1524 1525 tdt = linux_tdfind(td, args->pid, -1); 1526 if (tdt == NULL) 1527 return (ESRCH); 1528 1529 error = kern_sched_setscheduler(td, tdt, policy, &sched_param); 1530 PROC_UNLOCK(tdt->td_proc); 1531 return (error); 1532 } 1533 1534 int 1535 linux_sched_getscheduler(struct thread *td, 1536 struct linux_sched_getscheduler_args *args) 1537 { 1538 struct thread *tdt; 1539 int error, policy; 1540 1541 tdt = linux_tdfind(td, args->pid, -1); 1542 if (tdt == NULL) 1543 return (ESRCH); 1544 1545 error = kern_sched_getscheduler(td, tdt, &policy); 1546 PROC_UNLOCK(tdt->td_proc); 1547 1548 switch (policy) { 1549 case SCHED_OTHER: 1550 td->td_retval[0] = LINUX_SCHED_OTHER; 1551 break; 1552 case SCHED_FIFO: 1553 td->td_retval[0] = LINUX_SCHED_FIFO; 1554 break; 1555 case SCHED_RR: 1556 td->td_retval[0] = LINUX_SCHED_RR; 1557 break; 1558 } 1559 return (error); 1560 } 1561 1562 int 1563 linux_sched_get_priority_max(struct thread *td, 1564 struct linux_sched_get_priority_max_args *args) 1565 { 1566 struct sched_get_priority_max_args bsd; 1567 1568 if (linux_map_sched_prio) { 1569 switch (args->policy) { 1570 case LINUX_SCHED_OTHER: 1571 td->td_retval[0] = 0; 1572 return (0); 1573 case LINUX_SCHED_FIFO: 1574 case LINUX_SCHED_RR: 1575 td->td_retval[0] = LINUX_MAX_RT_PRIO - 1; 1576 return (0); 1577 default: 1578 return (EINVAL); 1579 } 1580 } 1581 1582 switch (args->policy) { 1583 case LINUX_SCHED_OTHER: 1584 bsd.policy = SCHED_OTHER; 1585 break; 1586 case LINUX_SCHED_FIFO: 1587 bsd.policy = SCHED_FIFO; 1588 break; 1589 case LINUX_SCHED_RR: 1590 bsd.policy = SCHED_RR; 1591 break; 1592 default: 1593 return (EINVAL); 1594 } 1595 return (sys_sched_get_priority_max(td, &bsd)); 1596 } 1597 1598 int 1599 linux_sched_get_priority_min(struct thread *td, 1600 struct linux_sched_get_priority_min_args *args) 1601 { 1602 struct sched_get_priority_min_args bsd; 1603 1604 if (linux_map_sched_prio) { 1605 switch (args->policy) { 1606 case LINUX_SCHED_OTHER: 1607 td->td_retval[0] = 0; 1608 return (0); 1609 case LINUX_SCHED_FIFO: 1610 case LINUX_SCHED_RR: 1611 td->td_retval[0] = 1; 1612 return (0); 1613 default: 1614 return (EINVAL); 1615 } 1616 } 1617 1618 switch (args->policy) { 1619 case LINUX_SCHED_OTHER: 1620 bsd.policy = SCHED_OTHER; 1621 break; 1622 case LINUX_SCHED_FIFO: 1623 bsd.policy = SCHED_FIFO; 1624 break; 1625 case LINUX_SCHED_RR: 1626 bsd.policy = SCHED_RR; 1627 break; 1628 default: 1629 return (EINVAL); 1630 } 1631 return (sys_sched_get_priority_min(td, &bsd)); 1632 } 1633 1634 #define REBOOT_CAD_ON 0x89abcdef 1635 #define REBOOT_CAD_OFF 0 1636 #define REBOOT_HALT 0xcdef0123 1637 #define REBOOT_RESTART 0x01234567 1638 #define REBOOT_RESTART2 0xA1B2C3D4 1639 #define REBOOT_POWEROFF 0x4321FEDC 1640 #define REBOOT_MAGIC1 0xfee1dead 1641 #define REBOOT_MAGIC2 0x28121969 1642 #define REBOOT_MAGIC2A 0x05121996 1643 #define REBOOT_MAGIC2B 0x16041998 1644 1645 int 1646 linux_reboot(struct thread *td, struct linux_reboot_args *args) 1647 { 1648 struct reboot_args bsd_args; 1649 1650 if (args->magic1 != REBOOT_MAGIC1) 1651 return (EINVAL); 1652 1653 switch (args->magic2) { 1654 case REBOOT_MAGIC2: 1655 case REBOOT_MAGIC2A: 1656 case REBOOT_MAGIC2B: 1657 break; 1658 default: 1659 return (EINVAL); 1660 } 1661 1662 switch (args->cmd) { 1663 case REBOOT_CAD_ON: 1664 case REBOOT_CAD_OFF: 1665 return (priv_check(td, PRIV_REBOOT)); 1666 case REBOOT_HALT: 1667 bsd_args.opt = RB_HALT; 1668 break; 1669 case REBOOT_RESTART: 1670 case REBOOT_RESTART2: 1671 bsd_args.opt = 0; 1672 break; 1673 case REBOOT_POWEROFF: 1674 bsd_args.opt = RB_POWEROFF; 1675 break; 1676 default: 1677 return (EINVAL); 1678 } 1679 return (sys_reboot(td, &bsd_args)); 1680 } 1681 1682 int 1683 linux_getpid(struct thread *td, struct linux_getpid_args *args) 1684 { 1685 1686 td->td_retval[0] = td->td_proc->p_pid; 1687 1688 return (0); 1689 } 1690 1691 int 1692 linux_gettid(struct thread *td, struct linux_gettid_args *args) 1693 { 1694 struct linux_emuldata *em; 1695 1696 em = em_find(td); 1697 KASSERT(em != NULL, ("gettid: emuldata not found.\n")); 1698 1699 td->td_retval[0] = em->em_tid; 1700 1701 return (0); 1702 } 1703 1704 int 1705 linux_getppid(struct thread *td, struct linux_getppid_args *args) 1706 { 1707 1708 td->td_retval[0] = kern_getppid(td); 1709 return (0); 1710 } 1711 1712 int 1713 linux_getgid(struct thread *td, struct linux_getgid_args *args) 1714 { 1715 1716 td->td_retval[0] = td->td_ucred->cr_rgid; 1717 return (0); 1718 } 1719 1720 int 1721 linux_getuid(struct thread *td, struct linux_getuid_args *args) 1722 { 1723 1724 td->td_retval[0] = td->td_ucred->cr_ruid; 1725 return (0); 1726 } 1727 1728 int 1729 linux_getsid(struct thread *td, struct linux_getsid_args *args) 1730 { 1731 1732 return (kern_getsid(td, args->pid)); 1733 } 1734 1735 int 1736 linux_nosys(struct thread *td, struct nosys_args *ignore) 1737 { 1738 1739 return (ENOSYS); 1740 } 1741 1742 int 1743 linux_getpriority(struct thread *td, struct linux_getpriority_args *args) 1744 { 1745 int error; 1746 1747 error = kern_getpriority(td, args->which, args->who); 1748 td->td_retval[0] = 20 - td->td_retval[0]; 1749 return (error); 1750 } 1751 1752 int 1753 linux_sethostname(struct thread *td, struct linux_sethostname_args *args) 1754 { 1755 int name[2]; 1756 1757 name[0] = CTL_KERN; 1758 name[1] = KERN_HOSTNAME; 1759 return (userland_sysctl(td, name, 2, 0, 0, 0, args->hostname, 1760 args->len, 0, 0)); 1761 } 1762 1763 int 1764 linux_setdomainname(struct thread *td, struct linux_setdomainname_args *args) 1765 { 1766 int name[2]; 1767 1768 name[0] = CTL_KERN; 1769 name[1] = KERN_NISDOMAINNAME; 1770 return (userland_sysctl(td, name, 2, 0, 0, 0, args->name, 1771 args->len, 0, 0)); 1772 } 1773 1774 int 1775 linux_exit_group(struct thread *td, struct linux_exit_group_args *args) 1776 { 1777 1778 LINUX_CTR2(exit_group, "thread(%d) (%d)", td->td_tid, 1779 args->error_code); 1780 1781 /* 1782 * XXX: we should send a signal to the parent if 1783 * SIGNAL_EXIT_GROUP is set. We ignore that (temporarily?) 1784 * as it doesnt occur often. 1785 */ 1786 exit1(td, args->error_code, 0); 1787 /* NOTREACHED */ 1788 } 1789 1790 #define _LINUX_CAPABILITY_VERSION_1 0x19980330 1791 #define _LINUX_CAPABILITY_VERSION_2 0x20071026 1792 #define _LINUX_CAPABILITY_VERSION_3 0x20080522 1793 1794 struct l_user_cap_header { 1795 l_int version; 1796 l_int pid; 1797 }; 1798 1799 struct l_user_cap_data { 1800 l_int effective; 1801 l_int permitted; 1802 l_int inheritable; 1803 }; 1804 1805 int 1806 linux_capget(struct thread *td, struct linux_capget_args *uap) 1807 { 1808 struct l_user_cap_header luch; 1809 struct l_user_cap_data lucd[2]; 1810 int error, u32s; 1811 1812 if (uap->hdrp == NULL) 1813 return (EFAULT); 1814 1815 error = copyin(uap->hdrp, &luch, sizeof(luch)); 1816 if (error != 0) 1817 return (error); 1818 1819 switch (luch.version) { 1820 case _LINUX_CAPABILITY_VERSION_1: 1821 u32s = 1; 1822 break; 1823 case _LINUX_CAPABILITY_VERSION_2: 1824 case _LINUX_CAPABILITY_VERSION_3: 1825 u32s = 2; 1826 break; 1827 default: 1828 luch.version = _LINUX_CAPABILITY_VERSION_1; 1829 error = copyout(&luch, uap->hdrp, sizeof(luch)); 1830 if (error) 1831 return (error); 1832 return (EINVAL); 1833 } 1834 1835 if (luch.pid) 1836 return (EPERM); 1837 1838 if (uap->datap) { 1839 /* 1840 * The current implementation doesn't support setting 1841 * a capability (it's essentially a stub) so indicate 1842 * that no capabilities are currently set or available 1843 * to request. 1844 */ 1845 memset(&lucd, 0, u32s * sizeof(lucd[0])); 1846 error = copyout(&lucd, uap->datap, u32s * sizeof(lucd[0])); 1847 } 1848 1849 return (error); 1850 } 1851 1852 int 1853 linux_capset(struct thread *td, struct linux_capset_args *uap) 1854 { 1855 struct l_user_cap_header luch; 1856 struct l_user_cap_data lucd[2]; 1857 int error, i, u32s; 1858 1859 if (uap->hdrp == NULL || uap->datap == NULL) 1860 return (EFAULT); 1861 1862 error = copyin(uap->hdrp, &luch, sizeof(luch)); 1863 if (error != 0) 1864 return (error); 1865 1866 switch (luch.version) { 1867 case _LINUX_CAPABILITY_VERSION_1: 1868 u32s = 1; 1869 break; 1870 case _LINUX_CAPABILITY_VERSION_2: 1871 case _LINUX_CAPABILITY_VERSION_3: 1872 u32s = 2; 1873 break; 1874 default: 1875 luch.version = _LINUX_CAPABILITY_VERSION_1; 1876 error = copyout(&luch, uap->hdrp, sizeof(luch)); 1877 if (error) 1878 return (error); 1879 return (EINVAL); 1880 } 1881 1882 if (luch.pid) 1883 return (EPERM); 1884 1885 error = copyin(uap->datap, &lucd, u32s * sizeof(lucd[0])); 1886 if (error != 0) 1887 return (error); 1888 1889 /* We currently don't support setting any capabilities. */ 1890 for (i = 0; i < u32s; i++) { 1891 if (lucd[i].effective || lucd[i].permitted || 1892 lucd[i].inheritable) { 1893 linux_msg(td, 1894 "capset[%d] effective=0x%x, permitted=0x%x, " 1895 "inheritable=0x%x is not implemented", i, 1896 (int)lucd[i].effective, (int)lucd[i].permitted, 1897 (int)lucd[i].inheritable); 1898 return (EPERM); 1899 } 1900 } 1901 1902 return (0); 1903 } 1904 1905 int 1906 linux_prctl(struct thread *td, struct linux_prctl_args *args) 1907 { 1908 int error = 0, max_size; 1909 struct proc *p = td->td_proc; 1910 char comm[LINUX_MAX_COMM_LEN]; 1911 int pdeath_signal, trace_state; 1912 1913 switch (args->option) { 1914 case LINUX_PR_SET_PDEATHSIG: 1915 if (!LINUX_SIG_VALID(args->arg2)) 1916 return (EINVAL); 1917 pdeath_signal = linux_to_bsd_signal(args->arg2); 1918 return (kern_procctl(td, P_PID, 0, PROC_PDEATHSIG_CTL, 1919 &pdeath_signal)); 1920 case LINUX_PR_GET_PDEATHSIG: 1921 error = kern_procctl(td, P_PID, 0, PROC_PDEATHSIG_STATUS, 1922 &pdeath_signal); 1923 if (error != 0) 1924 return (error); 1925 pdeath_signal = bsd_to_linux_signal(pdeath_signal); 1926 return (copyout(&pdeath_signal, 1927 (void *)(register_t)args->arg2, 1928 sizeof(pdeath_signal))); 1929 /* 1930 * In Linux, this flag controls if set[gu]id processes can coredump. 1931 * There are additional semantics imposed on processes that cannot 1932 * coredump: 1933 * - Such processes can not be ptraced. 1934 * - There are some semantics around ownership of process-related files 1935 * in the /proc namespace. 1936 * 1937 * In FreeBSD, we can (and by default, do) disable setuid coredump 1938 * system-wide with 'sugid_coredump.' We control tracability on a 1939 * per-process basis with the procctl PROC_TRACE (=> P2_NOTRACE flag). 1940 * By happy coincidence, P2_NOTRACE also prevents coredumping. So the 1941 * procctl is roughly analogous to Linux's DUMPABLE. 1942 * 1943 * So, proxy these knobs to the corresponding PROC_TRACE setting. 1944 */ 1945 case LINUX_PR_GET_DUMPABLE: 1946 error = kern_procctl(td, P_PID, p->p_pid, PROC_TRACE_STATUS, 1947 &trace_state); 1948 if (error != 0) 1949 return (error); 1950 td->td_retval[0] = (trace_state != -1); 1951 return (0); 1952 case LINUX_PR_SET_DUMPABLE: 1953 /* 1954 * It is only valid for userspace to set one of these two 1955 * flags, and only one at a time. 1956 */ 1957 switch (args->arg2) { 1958 case LINUX_SUID_DUMP_DISABLE: 1959 trace_state = PROC_TRACE_CTL_DISABLE_EXEC; 1960 break; 1961 case LINUX_SUID_DUMP_USER: 1962 trace_state = PROC_TRACE_CTL_ENABLE; 1963 break; 1964 default: 1965 return (EINVAL); 1966 } 1967 return (kern_procctl(td, P_PID, p->p_pid, PROC_TRACE_CTL, 1968 &trace_state)); 1969 case LINUX_PR_GET_KEEPCAPS: 1970 /* 1971 * Indicate that we always clear the effective and 1972 * permitted capability sets when the user id becomes 1973 * non-zero (actually the capability sets are simply 1974 * always zero in the current implementation). 1975 */ 1976 td->td_retval[0] = 0; 1977 break; 1978 case LINUX_PR_SET_KEEPCAPS: 1979 /* 1980 * Ignore requests to keep the effective and permitted 1981 * capability sets when the user id becomes non-zero. 1982 */ 1983 break; 1984 case LINUX_PR_SET_NAME: 1985 /* 1986 * To be on the safe side we need to make sure to not 1987 * overflow the size a Linux program expects. We already 1988 * do this here in the copyin, so that we don't need to 1989 * check on copyout. 1990 */ 1991 max_size = MIN(sizeof(comm), sizeof(p->p_comm)); 1992 error = copyinstr((void *)(register_t)args->arg2, comm, 1993 max_size, NULL); 1994 1995 /* Linux silently truncates the name if it is too long. */ 1996 if (error == ENAMETOOLONG) { 1997 /* 1998 * XXX: copyinstr() isn't documented to populate the 1999 * array completely, so do a copyin() to be on the 2000 * safe side. This should be changed in case 2001 * copyinstr() is changed to guarantee this. 2002 */ 2003 error = copyin((void *)(register_t)args->arg2, comm, 2004 max_size - 1); 2005 comm[max_size - 1] = '\0'; 2006 } 2007 if (error) 2008 return (error); 2009 2010 PROC_LOCK(p); 2011 strlcpy(p->p_comm, comm, sizeof(p->p_comm)); 2012 PROC_UNLOCK(p); 2013 break; 2014 case LINUX_PR_GET_NAME: 2015 PROC_LOCK(p); 2016 strlcpy(comm, p->p_comm, sizeof(comm)); 2017 PROC_UNLOCK(p); 2018 error = copyout(comm, (void *)(register_t)args->arg2, 2019 strlen(comm) + 1); 2020 break; 2021 case LINUX_PR_GET_SECCOMP: 2022 case LINUX_PR_SET_SECCOMP: 2023 /* 2024 * Same as returned by Linux without CONFIG_SECCOMP enabled. 2025 */ 2026 error = EINVAL; 2027 break; 2028 case LINUX_PR_CAPBSET_READ: 2029 #if 0 2030 /* 2031 * This makes too much noise with Ubuntu Focal. 2032 */ 2033 linux_msg(td, "unsupported prctl PR_CAPBSET_READ %d", 2034 (int)args->arg2); 2035 #endif 2036 error = EINVAL; 2037 break; 2038 case LINUX_PR_SET_NO_NEW_PRIVS: 2039 linux_msg(td, "unsupported prctl PR_SET_NO_NEW_PRIVS"); 2040 error = EINVAL; 2041 break; 2042 case LINUX_PR_SET_PTRACER: 2043 linux_msg(td, "unsupported prctl PR_SET_PTRACER"); 2044 error = EINVAL; 2045 break; 2046 default: 2047 linux_msg(td, "unsupported prctl option %d", args->option); 2048 error = EINVAL; 2049 break; 2050 } 2051 2052 return (error); 2053 } 2054 2055 int 2056 linux_sched_setparam(struct thread *td, 2057 struct linux_sched_setparam_args *uap) 2058 { 2059 struct sched_param sched_param; 2060 struct thread *tdt; 2061 int error, policy; 2062 2063 error = copyin(uap->param, &sched_param, sizeof(sched_param)); 2064 if (error) 2065 return (error); 2066 2067 tdt = linux_tdfind(td, uap->pid, -1); 2068 if (tdt == NULL) 2069 return (ESRCH); 2070 2071 if (linux_map_sched_prio) { 2072 error = kern_sched_getscheduler(td, tdt, &policy); 2073 if (error) 2074 goto out; 2075 2076 switch (policy) { 2077 case SCHED_OTHER: 2078 if (sched_param.sched_priority != 0) { 2079 error = EINVAL; 2080 goto out; 2081 } 2082 sched_param.sched_priority = 2083 PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE; 2084 break; 2085 case SCHED_FIFO: 2086 case SCHED_RR: 2087 if (sched_param.sched_priority < 1 || 2088 sched_param.sched_priority >= LINUX_MAX_RT_PRIO) { 2089 error = EINVAL; 2090 goto out; 2091 } 2092 /* 2093 * Map [1, LINUX_MAX_RT_PRIO - 1] to 2094 * [0, RTP_PRIO_MAX - RTP_PRIO_MIN] (rounding down). 2095 */ 2096 sched_param.sched_priority = 2097 (sched_param.sched_priority - 1) * 2098 (RTP_PRIO_MAX - RTP_PRIO_MIN + 1) / 2099 (LINUX_MAX_RT_PRIO - 1); 2100 break; 2101 } 2102 } 2103 2104 error = kern_sched_setparam(td, tdt, &sched_param); 2105 out: PROC_UNLOCK(tdt->td_proc); 2106 return (error); 2107 } 2108 2109 int 2110 linux_sched_getparam(struct thread *td, 2111 struct linux_sched_getparam_args *uap) 2112 { 2113 struct sched_param sched_param; 2114 struct thread *tdt; 2115 int error, policy; 2116 2117 tdt = linux_tdfind(td, uap->pid, -1); 2118 if (tdt == NULL) 2119 return (ESRCH); 2120 2121 error = kern_sched_getparam(td, tdt, &sched_param); 2122 if (error) { 2123 PROC_UNLOCK(tdt->td_proc); 2124 return (error); 2125 } 2126 2127 if (linux_map_sched_prio) { 2128 error = kern_sched_getscheduler(td, tdt, &policy); 2129 PROC_UNLOCK(tdt->td_proc); 2130 if (error) 2131 return (error); 2132 2133 switch (policy) { 2134 case SCHED_OTHER: 2135 sched_param.sched_priority = 0; 2136 break; 2137 case SCHED_FIFO: 2138 case SCHED_RR: 2139 /* 2140 * Map [0, RTP_PRIO_MAX - RTP_PRIO_MIN] to 2141 * [1, LINUX_MAX_RT_PRIO - 1] (rounding up). 2142 */ 2143 sched_param.sched_priority = 2144 (sched_param.sched_priority * 2145 (LINUX_MAX_RT_PRIO - 1) + 2146 (RTP_PRIO_MAX - RTP_PRIO_MIN - 1)) / 2147 (RTP_PRIO_MAX - RTP_PRIO_MIN) + 1; 2148 break; 2149 } 2150 } else 2151 PROC_UNLOCK(tdt->td_proc); 2152 2153 error = copyout(&sched_param, uap->param, sizeof(sched_param)); 2154 return (error); 2155 } 2156 2157 /* 2158 * Get affinity of a process. 2159 */ 2160 int 2161 linux_sched_getaffinity(struct thread *td, 2162 struct linux_sched_getaffinity_args *args) 2163 { 2164 int error; 2165 struct thread *tdt; 2166 2167 if (args->len < sizeof(cpuset_t)) 2168 return (EINVAL); 2169 2170 tdt = linux_tdfind(td, args->pid, -1); 2171 if (tdt == NULL) 2172 return (ESRCH); 2173 2174 PROC_UNLOCK(tdt->td_proc); 2175 2176 error = kern_cpuset_getaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID, 2177 tdt->td_tid, sizeof(cpuset_t), (cpuset_t *)args->user_mask_ptr); 2178 if (error == 0) 2179 td->td_retval[0] = sizeof(cpuset_t); 2180 2181 return (error); 2182 } 2183 2184 /* 2185 * Set affinity of a process. 2186 */ 2187 int 2188 linux_sched_setaffinity(struct thread *td, 2189 struct linux_sched_setaffinity_args *args) 2190 { 2191 struct thread *tdt; 2192 2193 if (args->len < sizeof(cpuset_t)) 2194 return (EINVAL); 2195 2196 tdt = linux_tdfind(td, args->pid, -1); 2197 if (tdt == NULL) 2198 return (ESRCH); 2199 2200 PROC_UNLOCK(tdt->td_proc); 2201 2202 return (kern_cpuset_setaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID, 2203 tdt->td_tid, sizeof(cpuset_t), (cpuset_t *) args->user_mask_ptr)); 2204 } 2205 2206 struct linux_rlimit64 { 2207 uint64_t rlim_cur; 2208 uint64_t rlim_max; 2209 }; 2210 2211 int 2212 linux_prlimit64(struct thread *td, struct linux_prlimit64_args *args) 2213 { 2214 struct rlimit rlim, nrlim; 2215 struct linux_rlimit64 lrlim; 2216 struct proc *p; 2217 u_int which; 2218 int flags; 2219 int error; 2220 2221 if (args->new == NULL && args->old != NULL) { 2222 if (linux_get_dummy_limit(args->resource, &rlim)) { 2223 lrlim.rlim_cur = rlim.rlim_cur; 2224 lrlim.rlim_max = rlim.rlim_max; 2225 return (copyout(&lrlim, args->old, sizeof(lrlim))); 2226 } 2227 } 2228 2229 if (args->resource >= LINUX_RLIM_NLIMITS) 2230 return (EINVAL); 2231 2232 which = linux_to_bsd_resource[args->resource]; 2233 if (which == -1) 2234 return (EINVAL); 2235 2236 if (args->new != NULL) { 2237 /* 2238 * Note. Unlike FreeBSD where rlim is signed 64-bit Linux 2239 * rlim is unsigned 64-bit. FreeBSD treats negative limits 2240 * as INFINITY so we do not need a conversion even. 2241 */ 2242 error = copyin(args->new, &nrlim, sizeof(nrlim)); 2243 if (error != 0) 2244 return (error); 2245 } 2246 2247 flags = PGET_HOLD | PGET_NOTWEXIT; 2248 if (args->new != NULL) 2249 flags |= PGET_CANDEBUG; 2250 else 2251 flags |= PGET_CANSEE; 2252 if (args->pid == 0) { 2253 p = td->td_proc; 2254 PHOLD(p); 2255 } else { 2256 error = pget(args->pid, flags, &p); 2257 if (error != 0) 2258 return (error); 2259 } 2260 if (args->old != NULL) { 2261 PROC_LOCK(p); 2262 lim_rlimit_proc(p, which, &rlim); 2263 PROC_UNLOCK(p); 2264 if (rlim.rlim_cur == RLIM_INFINITY) 2265 lrlim.rlim_cur = LINUX_RLIM_INFINITY; 2266 else 2267 lrlim.rlim_cur = rlim.rlim_cur; 2268 if (rlim.rlim_max == RLIM_INFINITY) 2269 lrlim.rlim_max = LINUX_RLIM_INFINITY; 2270 else 2271 lrlim.rlim_max = rlim.rlim_max; 2272 error = copyout(&lrlim, args->old, sizeof(lrlim)); 2273 if (error != 0) 2274 goto out; 2275 } 2276 2277 if (args->new != NULL) 2278 error = kern_proc_setrlimit(td, p, which, &nrlim); 2279 2280 out: 2281 PRELE(p); 2282 return (error); 2283 } 2284 2285 int 2286 linux_pselect6(struct thread *td, struct linux_pselect6_args *args) 2287 { 2288 struct timeval utv, tv0, tv1, *tvp; 2289 struct l_pselect6arg lpse6; 2290 struct l_timespec lts; 2291 struct timespec uts; 2292 l_sigset_t l_ss; 2293 sigset_t *ssp; 2294 sigset_t ss; 2295 int error; 2296 2297 ssp = NULL; 2298 if (args->sig != NULL) { 2299 error = copyin(args->sig, &lpse6, sizeof(lpse6)); 2300 if (error != 0) 2301 return (error); 2302 if (lpse6.ss_len != sizeof(l_ss)) 2303 return (EINVAL); 2304 if (lpse6.ss != 0) { 2305 error = copyin(PTRIN(lpse6.ss), &l_ss, 2306 sizeof(l_ss)); 2307 if (error != 0) 2308 return (error); 2309 linux_to_bsd_sigset(&l_ss, &ss); 2310 ssp = &ss; 2311 } 2312 } 2313 2314 /* 2315 * Currently glibc changes nanosecond number to microsecond. 2316 * This mean losing precision but for now it is hardly seen. 2317 */ 2318 if (args->tsp != NULL) { 2319 error = copyin(args->tsp, <s, sizeof(lts)); 2320 if (error != 0) 2321 return (error); 2322 error = linux_to_native_timespec(&uts, <s); 2323 if (error != 0) 2324 return (error); 2325 2326 TIMESPEC_TO_TIMEVAL(&utv, &uts); 2327 if (itimerfix(&utv)) 2328 return (EINVAL); 2329 2330 microtime(&tv0); 2331 tvp = &utv; 2332 } else 2333 tvp = NULL; 2334 2335 error = kern_pselect(td, args->nfds, args->readfds, args->writefds, 2336 args->exceptfds, tvp, ssp, LINUX_NFDBITS); 2337 2338 if (error == 0 && args->tsp != NULL) { 2339 if (td->td_retval[0] != 0) { 2340 /* 2341 * Compute how much time was left of the timeout, 2342 * by subtracting the current time and the time 2343 * before we started the call, and subtracting 2344 * that result from the user-supplied value. 2345 */ 2346 2347 microtime(&tv1); 2348 timevalsub(&tv1, &tv0); 2349 timevalsub(&utv, &tv1); 2350 if (utv.tv_sec < 0) 2351 timevalclear(&utv); 2352 } else 2353 timevalclear(&utv); 2354 2355 TIMEVAL_TO_TIMESPEC(&utv, &uts); 2356 2357 error = native_to_linux_timespec(<s, &uts); 2358 if (error == 0) 2359 error = copyout(<s, args->tsp, sizeof(lts)); 2360 } 2361 2362 return (error); 2363 } 2364 2365 int 2366 linux_ppoll(struct thread *td, struct linux_ppoll_args *args) 2367 { 2368 struct timespec ts0, ts1; 2369 struct l_timespec lts; 2370 struct timespec uts, *tsp; 2371 l_sigset_t l_ss; 2372 sigset_t *ssp; 2373 sigset_t ss; 2374 int error; 2375 2376 if (args->sset != NULL) { 2377 if (args->ssize != sizeof(l_ss)) 2378 return (EINVAL); 2379 error = copyin(args->sset, &l_ss, sizeof(l_ss)); 2380 if (error) 2381 return (error); 2382 linux_to_bsd_sigset(&l_ss, &ss); 2383 ssp = &ss; 2384 } else 2385 ssp = NULL; 2386 if (args->tsp != NULL) { 2387 error = copyin(args->tsp, <s, sizeof(lts)); 2388 if (error) 2389 return (error); 2390 error = linux_to_native_timespec(&uts, <s); 2391 if (error != 0) 2392 return (error); 2393 2394 nanotime(&ts0); 2395 tsp = &uts; 2396 } else 2397 tsp = NULL; 2398 2399 error = kern_poll(td, args->fds, args->nfds, tsp, ssp); 2400 2401 if (error == 0 && args->tsp != NULL) { 2402 if (td->td_retval[0]) { 2403 nanotime(&ts1); 2404 timespecsub(&ts1, &ts0, &ts1); 2405 timespecsub(&uts, &ts1, &uts); 2406 if (uts.tv_sec < 0) 2407 timespecclear(&uts); 2408 } else 2409 timespecclear(&uts); 2410 2411 error = native_to_linux_timespec(<s, &uts); 2412 if (error == 0) 2413 error = copyout(<s, args->tsp, sizeof(lts)); 2414 } 2415 2416 return (error); 2417 } 2418 2419 int 2420 linux_sched_rr_get_interval(struct thread *td, 2421 struct linux_sched_rr_get_interval_args *uap) 2422 { 2423 struct timespec ts; 2424 struct l_timespec lts; 2425 struct thread *tdt; 2426 int error; 2427 2428 /* 2429 * According to man in case the invalid pid specified 2430 * EINVAL should be returned. 2431 */ 2432 if (uap->pid < 0) 2433 return (EINVAL); 2434 2435 tdt = linux_tdfind(td, uap->pid, -1); 2436 if (tdt == NULL) 2437 return (ESRCH); 2438 2439 error = kern_sched_rr_get_interval_td(td, tdt, &ts); 2440 PROC_UNLOCK(tdt->td_proc); 2441 if (error != 0) 2442 return (error); 2443 error = native_to_linux_timespec(<s, &ts); 2444 if (error != 0) 2445 return (error); 2446 return (copyout(<s, uap->interval, sizeof(lts))); 2447 } 2448 2449 /* 2450 * In case when the Linux thread is the initial thread in 2451 * the thread group thread id is equal to the process id. 2452 * Glibc depends on this magic (assert in pthread_getattr_np.c). 2453 */ 2454 struct thread * 2455 linux_tdfind(struct thread *td, lwpid_t tid, pid_t pid) 2456 { 2457 struct linux_emuldata *em; 2458 struct thread *tdt; 2459 struct proc *p; 2460 2461 tdt = NULL; 2462 if (tid == 0 || tid == td->td_tid) { 2463 tdt = td; 2464 PROC_LOCK(tdt->td_proc); 2465 } else if (tid > PID_MAX) 2466 tdt = tdfind(tid, pid); 2467 else { 2468 /* 2469 * Initial thread where the tid equal to the pid. 2470 */ 2471 p = pfind(tid); 2472 if (p != NULL) { 2473 if (SV_PROC_ABI(p) != SV_ABI_LINUX) { 2474 /* 2475 * p is not a Linuxulator process. 2476 */ 2477 PROC_UNLOCK(p); 2478 return (NULL); 2479 } 2480 FOREACH_THREAD_IN_PROC(p, tdt) { 2481 em = em_find(tdt); 2482 if (tid == em->em_tid) 2483 return (tdt); 2484 } 2485 PROC_UNLOCK(p); 2486 } 2487 return (NULL); 2488 } 2489 2490 return (tdt); 2491 } 2492 2493 void 2494 linux_to_bsd_waitopts(int options, int *bsdopts) 2495 { 2496 2497 if (options & LINUX_WNOHANG) 2498 *bsdopts |= WNOHANG; 2499 if (options & LINUX_WUNTRACED) 2500 *bsdopts |= WUNTRACED; 2501 if (options & LINUX_WEXITED) 2502 *bsdopts |= WEXITED; 2503 if (options & LINUX_WCONTINUED) 2504 *bsdopts |= WCONTINUED; 2505 if (options & LINUX_WNOWAIT) 2506 *bsdopts |= WNOWAIT; 2507 2508 if (options & __WCLONE) 2509 *bsdopts |= WLINUXCLONE; 2510 } 2511 2512 int 2513 linux_getrandom(struct thread *td, struct linux_getrandom_args *args) 2514 { 2515 struct uio uio; 2516 struct iovec iov; 2517 int error; 2518 2519 if (args->flags & ~(LINUX_GRND_NONBLOCK|LINUX_GRND_RANDOM)) 2520 return (EINVAL); 2521 if (args->count > INT_MAX) 2522 args->count = INT_MAX; 2523 2524 iov.iov_base = args->buf; 2525 iov.iov_len = args->count; 2526 2527 uio.uio_iov = &iov; 2528 uio.uio_iovcnt = 1; 2529 uio.uio_resid = iov.iov_len; 2530 uio.uio_segflg = UIO_USERSPACE; 2531 uio.uio_rw = UIO_READ; 2532 uio.uio_td = td; 2533 2534 error = read_random_uio(&uio, args->flags & LINUX_GRND_NONBLOCK); 2535 if (error == 0) 2536 td->td_retval[0] = args->count - uio.uio_resid; 2537 return (error); 2538 } 2539 2540 int 2541 linux_mincore(struct thread *td, struct linux_mincore_args *args) 2542 { 2543 2544 /* Needs to be page-aligned */ 2545 if (args->start & PAGE_MASK) 2546 return (EINVAL); 2547 return (kern_mincore(td, args->start, args->len, args->vec)); 2548 } 2549 2550 #define SYSLOG_TAG "<6>" 2551 2552 int 2553 linux_syslog(struct thread *td, struct linux_syslog_args *args) 2554 { 2555 char buf[128], *src, *dst; 2556 u_int seq; 2557 int buflen, error; 2558 2559 if (args->type != LINUX_SYSLOG_ACTION_READ_ALL) { 2560 linux_msg(td, "syslog unsupported type 0x%x", args->type); 2561 return (EINVAL); 2562 } 2563 2564 if (args->len < 6) { 2565 td->td_retval[0] = 0; 2566 return (0); 2567 } 2568 2569 error = priv_check(td, PRIV_MSGBUF); 2570 if (error) 2571 return (error); 2572 2573 mtx_lock(&msgbuf_lock); 2574 msgbuf_peekbytes(msgbufp, NULL, 0, &seq); 2575 mtx_unlock(&msgbuf_lock); 2576 2577 dst = args->buf; 2578 error = copyout(&SYSLOG_TAG, dst, sizeof(SYSLOG_TAG)); 2579 /* The -1 is to skip the trailing '\0'. */ 2580 dst += sizeof(SYSLOG_TAG) - 1; 2581 2582 while (error == 0) { 2583 mtx_lock(&msgbuf_lock); 2584 buflen = msgbuf_peekbytes(msgbufp, buf, sizeof(buf), &seq); 2585 mtx_unlock(&msgbuf_lock); 2586 2587 if (buflen == 0) 2588 break; 2589 2590 for (src = buf; src < buf + buflen && error == 0; src++) { 2591 if (*src == '\0') 2592 continue; 2593 2594 if (dst >= args->buf + args->len) 2595 goto out; 2596 2597 error = copyout(src, dst, 1); 2598 dst++; 2599 2600 if (*src == '\n' && *(src + 1) != '<' && 2601 dst + sizeof(SYSLOG_TAG) < args->buf + args->len) { 2602 error = copyout(&SYSLOG_TAG, 2603 dst, sizeof(SYSLOG_TAG)); 2604 dst += sizeof(SYSLOG_TAG) - 1; 2605 } 2606 } 2607 } 2608 out: 2609 td->td_retval[0] = dst - args->buf; 2610 return (error); 2611 } 2612 2613 int 2614 linux_getcpu(struct thread *td, struct linux_getcpu_args *args) 2615 { 2616 int cpu, error, node; 2617 2618 cpu = td->td_oncpu; /* Make sure it doesn't change during copyout(9) */ 2619 error = 0; 2620 node = cpuid_to_pcpu[cpu]->pc_domain; 2621 2622 if (args->cpu != NULL) 2623 error = copyout(&cpu, args->cpu, sizeof(l_int)); 2624 if (args->node != NULL) 2625 error = copyout(&node, args->node, sizeof(l_int)); 2626 return (error); 2627 } 2628