1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37 #include <sys/cdefs.h> 38 #include "opt_capsicum.h" 39 #include "opt_ktrace.h" 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #ifdef COMPAT_FREEBSD11 44 #include <sys/abi_compat.h> 45 #endif 46 #include <sys/bio.h> 47 #include <sys/buf.h> 48 #include <sys/capsicum.h> 49 #include <sys/disk.h> 50 #include <sys/malloc.h> 51 #include <sys/mount.h> 52 #include <sys/mutex.h> 53 #include <sys/sysproto.h> 54 #include <sys/namei.h> 55 #include <sys/filedesc.h> 56 #include <sys/kernel.h> 57 #include <sys/fcntl.h> 58 #include <sys/file.h> 59 #include <sys/filio.h> 60 #include <sys/limits.h> 61 #include <sys/linker.h> 62 #include <sys/rwlock.h> 63 #include <sys/sdt.h> 64 #include <sys/stat.h> 65 #include <sys/sx.h> 66 #include <sys/unistd.h> 67 #include <sys/vnode.h> 68 #include <sys/priv.h> 69 #include <sys/proc.h> 70 #include <sys/dirent.h> 71 #include <sys/jail.h> 72 #include <sys/syscallsubr.h> 73 #include <sys/sysctl.h> 74 #ifdef KTRACE 75 #include <sys/ktrace.h> 76 #endif 77 78 #include <machine/stdarg.h> 79 80 #include <security/audit/audit.h> 81 #include <security/mac/mac_framework.h> 82 83 #include <vm/vm.h> 84 #include <vm/vm_object.h> 85 #include <vm/vm_page.h> 86 #include <vm/vnode_pager.h> 87 #include <vm/uma.h> 88 89 #include <fs/devfs/devfs.h> 90 91 MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information"); 92 93 static int kern_chflagsat(struct thread *td, int fd, const char *path, 94 enum uio_seg pathseg, u_long flags, int atflag); 95 static int setfflags(struct thread *td, struct vnode *, u_long); 96 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *); 97 static int getutimens(const struct timespec *, enum uio_seg, 98 struct timespec *, int *); 99 static int setutimes(struct thread *td, struct vnode *, 100 const struct timespec *, int, int); 101 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 102 struct thread *td); 103 static int kern_fhlinkat(struct thread *td, int fd, const char *path, 104 enum uio_seg pathseg, fhandle_t *fhp); 105 static int kern_readlink_vp(struct vnode *vp, char *buf, enum uio_seg bufseg, 106 size_t count, struct thread *td); 107 static int kern_linkat_vp(struct thread *td, struct vnode *vp, int fd, 108 const char *path, enum uio_seg segflag); 109 110 uint64_t 111 at2cnpflags(u_int at_flags, u_int mask) 112 { 113 uint64_t res; 114 115 MPASS((at_flags & (AT_SYMLINK_FOLLOW | AT_SYMLINK_NOFOLLOW)) != 116 (AT_SYMLINK_FOLLOW | AT_SYMLINK_NOFOLLOW)); 117 118 res = 0; 119 at_flags &= mask; 120 if ((at_flags & AT_RESOLVE_BENEATH) != 0) 121 res |= RBENEATH; 122 if ((at_flags & AT_SYMLINK_FOLLOW) != 0) 123 res |= FOLLOW; 124 /* NOFOLLOW is pseudo flag */ 125 if ((mask & AT_SYMLINK_NOFOLLOW) != 0) { 126 res |= (at_flags & AT_SYMLINK_NOFOLLOW) != 0 ? NOFOLLOW : 127 FOLLOW; 128 } 129 if ((mask & AT_EMPTY_PATH) != 0 && (at_flags & AT_EMPTY_PATH) != 0) 130 res |= EMPTYPATH; 131 return (res); 132 } 133 134 int 135 kern_sync(struct thread *td) 136 { 137 struct mount *mp, *nmp; 138 int save; 139 140 mtx_lock(&mountlist_mtx); 141 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 142 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 143 nmp = TAILQ_NEXT(mp, mnt_list); 144 continue; 145 } 146 if ((mp->mnt_flag & MNT_RDONLY) == 0 && 147 vn_start_write(NULL, &mp, V_NOWAIT) == 0) { 148 save = curthread_pflags_set(TDP_SYNCIO); 149 vfs_periodic(mp, MNT_NOWAIT); 150 VFS_SYNC(mp, MNT_NOWAIT); 151 curthread_pflags_restore(save); 152 vn_finished_write(mp); 153 } 154 mtx_lock(&mountlist_mtx); 155 nmp = TAILQ_NEXT(mp, mnt_list); 156 vfs_unbusy(mp); 157 } 158 mtx_unlock(&mountlist_mtx); 159 return (0); 160 } 161 162 /* 163 * Sync each mounted filesystem. 164 */ 165 #ifndef _SYS_SYSPROTO_H_ 166 struct sync_args { 167 int dummy; 168 }; 169 #endif 170 /* ARGSUSED */ 171 int 172 sys_sync(struct thread *td, struct sync_args *uap) 173 { 174 175 return (kern_sync(td)); 176 } 177 178 /* 179 * Change filesystem quotas. 180 */ 181 #ifndef _SYS_SYSPROTO_H_ 182 struct quotactl_args { 183 char *path; 184 int cmd; 185 int uid; 186 caddr_t arg; 187 }; 188 #endif 189 int 190 sys_quotactl(struct thread *td, struct quotactl_args *uap) 191 { 192 struct mount *mp; 193 struct nameidata nd; 194 int error; 195 bool mp_busy; 196 197 AUDIT_ARG_CMD(uap->cmd); 198 AUDIT_ARG_UID(uap->uid); 199 if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS)) 200 return (EPERM); 201 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 202 uap->path); 203 if ((error = namei(&nd)) != 0) 204 return (error); 205 NDFREE_PNBUF(&nd); 206 mp = nd.ni_vp->v_mount; 207 vfs_ref(mp); 208 vput(nd.ni_vp); 209 error = vfs_busy(mp, 0); 210 if (error != 0) { 211 vfs_rel(mp); 212 return (error); 213 } 214 mp_busy = true; 215 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg, &mp_busy); 216 217 /* 218 * Since quota on/off operations typically need to open quota 219 * files, the implementation may need to unbusy the mount point 220 * before calling into namei. Otherwise, unmount might be 221 * started between two vfs_busy() invocations (first is ours, 222 * second is from mount point cross-walk code in lookup()), 223 * causing deadlock. 224 * 225 * Avoid unbusying mp if the implementation indicates it has 226 * already done so. 227 */ 228 if (mp_busy) 229 vfs_unbusy(mp); 230 vfs_rel(mp); 231 return (error); 232 } 233 234 /* 235 * Used by statfs conversion routines to scale the block size up if 236 * necessary so that all of the block counts are <= 'max_size'. Note 237 * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero 238 * value of 'n'. 239 */ 240 void 241 statfs_scale_blocks(struct statfs *sf, long max_size) 242 { 243 uint64_t count; 244 int shift; 245 246 KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__)); 247 248 /* 249 * Attempt to scale the block counts to give a more accurate 250 * overview to userland of the ratio of free space to used 251 * space. To do this, find the largest block count and compute 252 * a divisor that lets it fit into a signed integer <= max_size. 253 */ 254 if (sf->f_bavail < 0) 255 count = -sf->f_bavail; 256 else 257 count = sf->f_bavail; 258 count = MAX(sf->f_blocks, MAX(sf->f_bfree, count)); 259 if (count <= max_size) 260 return; 261 262 count >>= flsl(max_size); 263 shift = 0; 264 while (count > 0) { 265 shift++; 266 count >>=1; 267 } 268 269 sf->f_bsize <<= shift; 270 sf->f_blocks >>= shift; 271 sf->f_bfree >>= shift; 272 sf->f_bavail >>= shift; 273 } 274 275 static int 276 kern_do_statfs(struct thread *td, struct mount *mp, struct statfs *buf) 277 { 278 int error; 279 280 if (mp == NULL) 281 return (EBADF); 282 error = vfs_busy(mp, 0); 283 vfs_rel(mp); 284 if (error != 0) 285 return (error); 286 #ifdef MAC 287 error = mac_mount_check_stat(td->td_ucred, mp); 288 if (error != 0) 289 goto out; 290 #endif 291 error = VFS_STATFS(mp, buf); 292 if (error != 0) 293 goto out; 294 if (priv_check_cred_vfs_generation(td->td_ucred)) { 295 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 296 prison_enforce_statfs(td->td_ucred, mp, buf); 297 } 298 out: 299 vfs_unbusy(mp); 300 return (error); 301 } 302 303 /* 304 * Get filesystem statistics. 305 */ 306 #ifndef _SYS_SYSPROTO_H_ 307 struct statfs_args { 308 char *path; 309 struct statfs *buf; 310 }; 311 #endif 312 int 313 sys_statfs(struct thread *td, struct statfs_args *uap) 314 { 315 struct statfs *sfp; 316 int error; 317 318 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 319 error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); 320 if (error == 0) 321 error = copyout(sfp, uap->buf, sizeof(struct statfs)); 322 free(sfp, M_STATFS); 323 return (error); 324 } 325 326 int 327 kern_statfs(struct thread *td, const char *path, enum uio_seg pathseg, 328 struct statfs *buf) 329 { 330 struct mount *mp; 331 struct nameidata nd; 332 int error; 333 334 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path); 335 error = namei(&nd); 336 if (error != 0) 337 return (error); 338 NDFREE_PNBUF(&nd); 339 mp = vfs_ref_from_vp(nd.ni_vp); 340 vrele(nd.ni_vp); 341 return (kern_do_statfs(td, mp, buf)); 342 } 343 344 /* 345 * Get filesystem statistics. 346 */ 347 #ifndef _SYS_SYSPROTO_H_ 348 struct fstatfs_args { 349 int fd; 350 struct statfs *buf; 351 }; 352 #endif 353 int 354 sys_fstatfs(struct thread *td, struct fstatfs_args *uap) 355 { 356 struct statfs *sfp; 357 int error; 358 359 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 360 error = kern_fstatfs(td, uap->fd, sfp); 361 if (error == 0) 362 error = copyout(sfp, uap->buf, sizeof(struct statfs)); 363 free(sfp, M_STATFS); 364 return (error); 365 } 366 367 int 368 kern_fstatfs(struct thread *td, int fd, struct statfs *buf) 369 { 370 struct file *fp; 371 struct mount *mp; 372 struct vnode *vp; 373 int error; 374 375 AUDIT_ARG_FD(fd); 376 error = getvnode_path(td, fd, &cap_fstatfs_rights, &fp); 377 if (error != 0) 378 return (error); 379 vp = fp->f_vnode; 380 #ifdef AUDIT 381 if (AUDITING_TD(td)) { 382 vn_lock(vp, LK_SHARED | LK_RETRY); 383 AUDIT_ARG_VNODE1(vp); 384 VOP_UNLOCK(vp); 385 } 386 #endif 387 mp = vfs_ref_from_vp(vp); 388 fdrop(fp, td); 389 return (kern_do_statfs(td, mp, buf)); 390 } 391 392 /* 393 * Get statistics on all filesystems. 394 */ 395 #ifndef _SYS_SYSPROTO_H_ 396 struct getfsstat_args { 397 struct statfs *buf; 398 long bufsize; 399 int mode; 400 }; 401 #endif 402 int 403 sys_getfsstat(struct thread *td, struct getfsstat_args *uap) 404 { 405 size_t count; 406 int error; 407 408 if (uap->bufsize < 0 || uap->bufsize > SIZE_MAX) 409 return (EINVAL); 410 error = kern_getfsstat(td, &uap->buf, uap->bufsize, &count, 411 UIO_USERSPACE, uap->mode); 412 if (error == 0) 413 td->td_retval[0] = count; 414 return (error); 415 } 416 417 /* 418 * If (bufsize > 0 && bufseg == UIO_SYSSPACE) 419 * The caller is responsible for freeing memory which will be allocated 420 * in '*buf'. 421 */ 422 int 423 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize, 424 size_t *countp, enum uio_seg bufseg, int mode) 425 { 426 struct mount *mp, *nmp; 427 struct statfs *sfsp, *sp, *sptmp, *tofree; 428 size_t count, maxcount; 429 int error; 430 431 switch (mode) { 432 case MNT_WAIT: 433 case MNT_NOWAIT: 434 break; 435 default: 436 if (bufseg == UIO_SYSSPACE) 437 *buf = NULL; 438 return (EINVAL); 439 } 440 restart: 441 maxcount = bufsize / sizeof(struct statfs); 442 if (bufsize == 0) { 443 sfsp = NULL; 444 tofree = NULL; 445 } else if (bufseg == UIO_USERSPACE) { 446 sfsp = *buf; 447 tofree = NULL; 448 } else /* if (bufseg == UIO_SYSSPACE) */ { 449 count = 0; 450 mtx_lock(&mountlist_mtx); 451 TAILQ_FOREACH(mp, &mountlist, mnt_list) { 452 count++; 453 } 454 mtx_unlock(&mountlist_mtx); 455 if (maxcount > count) 456 maxcount = count; 457 tofree = sfsp = *buf = malloc(maxcount * sizeof(struct statfs), 458 M_STATFS, M_WAITOK); 459 } 460 461 count = 0; 462 463 /* 464 * If there is no target buffer they only want the count. 465 * 466 * This could be TAILQ_FOREACH but it is open-coded to match the original 467 * code below. 468 */ 469 if (sfsp == NULL) { 470 mtx_lock(&mountlist_mtx); 471 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 472 if (prison_canseemount(td->td_ucred, mp) != 0) { 473 nmp = TAILQ_NEXT(mp, mnt_list); 474 continue; 475 } 476 #ifdef MAC 477 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 478 nmp = TAILQ_NEXT(mp, mnt_list); 479 continue; 480 } 481 #endif 482 count++; 483 nmp = TAILQ_NEXT(mp, mnt_list); 484 } 485 mtx_unlock(&mountlist_mtx); 486 *countp = count; 487 return (0); 488 } 489 490 /* 491 * They want the entire thing. 492 * 493 * Short-circuit the corner case of no room for anything, avoids 494 * relocking below. 495 */ 496 if (maxcount < 1) { 497 goto out; 498 } 499 500 mtx_lock(&mountlist_mtx); 501 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 502 if (prison_canseemount(td->td_ucred, mp) != 0) { 503 nmp = TAILQ_NEXT(mp, mnt_list); 504 continue; 505 } 506 #ifdef MAC 507 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 508 nmp = TAILQ_NEXT(mp, mnt_list); 509 continue; 510 } 511 #endif 512 if (mode == MNT_WAIT) { 513 if (vfs_busy(mp, MBF_MNTLSTLOCK) != 0) { 514 /* 515 * If vfs_busy() failed, and MBF_NOWAIT 516 * wasn't passed, then the mp is gone. 517 * Furthermore, because of MBF_MNTLSTLOCK, 518 * the mountlist_mtx was dropped. We have 519 * no other choice than to start over. 520 */ 521 mtx_unlock(&mountlist_mtx); 522 free(tofree, M_STATFS); 523 goto restart; 524 } 525 } else { 526 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK) != 0) { 527 nmp = TAILQ_NEXT(mp, mnt_list); 528 continue; 529 } 530 } 531 sp = &mp->mnt_stat; 532 /* 533 * If MNT_NOWAIT is specified, do not refresh 534 * the fsstat cache. 535 */ 536 if (mode != MNT_NOWAIT) { 537 error = VFS_STATFS(mp, sp); 538 if (error != 0) { 539 mtx_lock(&mountlist_mtx); 540 nmp = TAILQ_NEXT(mp, mnt_list); 541 vfs_unbusy(mp); 542 continue; 543 } 544 } 545 if (priv_check_cred_vfs_generation(td->td_ucred)) { 546 sptmp = malloc(sizeof(struct statfs), M_STATFS, 547 M_WAITOK); 548 *sptmp = *sp; 549 sptmp->f_fsid.val[0] = sptmp->f_fsid.val[1] = 0; 550 prison_enforce_statfs(td->td_ucred, mp, sptmp); 551 sp = sptmp; 552 } else 553 sptmp = NULL; 554 if (bufseg == UIO_SYSSPACE) { 555 bcopy(sp, sfsp, sizeof(*sp)); 556 free(sptmp, M_STATFS); 557 } else /* if (bufseg == UIO_USERSPACE) */ { 558 error = copyout(sp, sfsp, sizeof(*sp)); 559 free(sptmp, M_STATFS); 560 if (error != 0) { 561 vfs_unbusy(mp); 562 return (error); 563 } 564 } 565 sfsp++; 566 count++; 567 568 if (count == maxcount) { 569 vfs_unbusy(mp); 570 goto out; 571 } 572 573 mtx_lock(&mountlist_mtx); 574 nmp = TAILQ_NEXT(mp, mnt_list); 575 vfs_unbusy(mp); 576 } 577 mtx_unlock(&mountlist_mtx); 578 out: 579 *countp = count; 580 return (0); 581 } 582 583 #ifdef COMPAT_FREEBSD4 584 /* 585 * Get old format filesystem statistics. 586 */ 587 static void freebsd4_cvtstatfs(struct statfs *, struct ostatfs *); 588 589 #ifndef _SYS_SYSPROTO_H_ 590 struct freebsd4_statfs_args { 591 char *path; 592 struct ostatfs *buf; 593 }; 594 #endif 595 int 596 freebsd4_statfs(struct thread *td, struct freebsd4_statfs_args *uap) 597 { 598 struct ostatfs osb; 599 struct statfs *sfp; 600 int error; 601 602 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 603 error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); 604 if (error == 0) { 605 freebsd4_cvtstatfs(sfp, &osb); 606 error = copyout(&osb, uap->buf, sizeof(osb)); 607 } 608 free(sfp, M_STATFS); 609 return (error); 610 } 611 612 /* 613 * Get filesystem statistics. 614 */ 615 #ifndef _SYS_SYSPROTO_H_ 616 struct freebsd4_fstatfs_args { 617 int fd; 618 struct ostatfs *buf; 619 }; 620 #endif 621 int 622 freebsd4_fstatfs(struct thread *td, struct freebsd4_fstatfs_args *uap) 623 { 624 struct ostatfs osb; 625 struct statfs *sfp; 626 int error; 627 628 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 629 error = kern_fstatfs(td, uap->fd, sfp); 630 if (error == 0) { 631 freebsd4_cvtstatfs(sfp, &osb); 632 error = copyout(&osb, uap->buf, sizeof(osb)); 633 } 634 free(sfp, M_STATFS); 635 return (error); 636 } 637 638 /* 639 * Get statistics on all filesystems. 640 */ 641 #ifndef _SYS_SYSPROTO_H_ 642 struct freebsd4_getfsstat_args { 643 struct ostatfs *buf; 644 long bufsize; 645 int mode; 646 }; 647 #endif 648 int 649 freebsd4_getfsstat(struct thread *td, struct freebsd4_getfsstat_args *uap) 650 { 651 struct statfs *buf, *sp; 652 struct ostatfs osb; 653 size_t count, size; 654 int error; 655 656 if (uap->bufsize < 0) 657 return (EINVAL); 658 count = uap->bufsize / sizeof(struct ostatfs); 659 if (count > SIZE_MAX / sizeof(struct statfs)) 660 return (EINVAL); 661 size = count * sizeof(struct statfs); 662 error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, 663 uap->mode); 664 if (error == 0) 665 td->td_retval[0] = count; 666 if (size != 0) { 667 sp = buf; 668 while (count != 0 && error == 0) { 669 freebsd4_cvtstatfs(sp, &osb); 670 error = copyout(&osb, uap->buf, sizeof(osb)); 671 sp++; 672 uap->buf++; 673 count--; 674 } 675 free(buf, M_STATFS); 676 } 677 return (error); 678 } 679 680 /* 681 * Implement fstatfs() for (NFS) file handles. 682 */ 683 #ifndef _SYS_SYSPROTO_H_ 684 struct freebsd4_fhstatfs_args { 685 struct fhandle *u_fhp; 686 struct ostatfs *buf; 687 }; 688 #endif 689 int 690 freebsd4_fhstatfs(struct thread *td, struct freebsd4_fhstatfs_args *uap) 691 { 692 struct ostatfs osb; 693 struct statfs *sfp; 694 fhandle_t fh; 695 int error; 696 697 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 698 if (error != 0) 699 return (error); 700 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 701 error = kern_fhstatfs(td, fh, sfp); 702 if (error == 0) { 703 freebsd4_cvtstatfs(sfp, &osb); 704 error = copyout(&osb, uap->buf, sizeof(osb)); 705 } 706 free(sfp, M_STATFS); 707 return (error); 708 } 709 710 /* 711 * Convert a new format statfs structure to an old format statfs structure. 712 */ 713 static void 714 freebsd4_cvtstatfs(struct statfs *nsp, struct ostatfs *osp) 715 { 716 717 statfs_scale_blocks(nsp, LONG_MAX); 718 bzero(osp, sizeof(*osp)); 719 osp->f_bsize = nsp->f_bsize; 720 osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX); 721 osp->f_blocks = nsp->f_blocks; 722 osp->f_bfree = nsp->f_bfree; 723 osp->f_bavail = nsp->f_bavail; 724 osp->f_files = MIN(nsp->f_files, LONG_MAX); 725 osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX); 726 osp->f_owner = nsp->f_owner; 727 osp->f_type = nsp->f_type; 728 osp->f_flags = nsp->f_flags; 729 osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX); 730 osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX); 731 osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX); 732 osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX); 733 strlcpy(osp->f_fstypename, nsp->f_fstypename, 734 MIN(MFSNAMELEN, OMFSNAMELEN)); 735 strlcpy(osp->f_mntonname, nsp->f_mntonname, 736 MIN(MNAMELEN, OMNAMELEN)); 737 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 738 MIN(MNAMELEN, OMNAMELEN)); 739 osp->f_fsid = nsp->f_fsid; 740 } 741 #endif /* COMPAT_FREEBSD4 */ 742 743 #if defined(COMPAT_FREEBSD11) 744 /* 745 * Get old format filesystem statistics. 746 */ 747 static void freebsd11_cvtstatfs(struct statfs *, struct freebsd11_statfs *); 748 749 int 750 freebsd11_statfs(struct thread *td, struct freebsd11_statfs_args *uap) 751 { 752 struct freebsd11_statfs osb; 753 struct statfs *sfp; 754 int error; 755 756 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 757 error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); 758 if (error == 0) { 759 freebsd11_cvtstatfs(sfp, &osb); 760 error = copyout(&osb, uap->buf, sizeof(osb)); 761 } 762 free(sfp, M_STATFS); 763 return (error); 764 } 765 766 /* 767 * Get filesystem statistics. 768 */ 769 int 770 freebsd11_fstatfs(struct thread *td, struct freebsd11_fstatfs_args *uap) 771 { 772 struct freebsd11_statfs osb; 773 struct statfs *sfp; 774 int error; 775 776 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 777 error = kern_fstatfs(td, uap->fd, sfp); 778 if (error == 0) { 779 freebsd11_cvtstatfs(sfp, &osb); 780 error = copyout(&osb, uap->buf, sizeof(osb)); 781 } 782 free(sfp, M_STATFS); 783 return (error); 784 } 785 786 /* 787 * Get statistics on all filesystems. 788 */ 789 int 790 freebsd11_getfsstat(struct thread *td, struct freebsd11_getfsstat_args *uap) 791 { 792 return (kern_freebsd11_getfsstat(td, uap->buf, uap->bufsize, uap->mode)); 793 } 794 795 int 796 kern_freebsd11_getfsstat(struct thread *td, struct freebsd11_statfs * ubuf, 797 long bufsize, int mode) 798 { 799 struct freebsd11_statfs osb; 800 struct statfs *buf, *sp; 801 size_t count, size; 802 int error; 803 804 if (bufsize < 0) 805 return (EINVAL); 806 807 count = bufsize / sizeof(struct ostatfs); 808 size = count * sizeof(struct statfs); 809 error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, mode); 810 if (error == 0) 811 td->td_retval[0] = count; 812 if (size > 0) { 813 sp = buf; 814 while (count > 0 && error == 0) { 815 freebsd11_cvtstatfs(sp, &osb); 816 error = copyout(&osb, ubuf, sizeof(osb)); 817 sp++; 818 ubuf++; 819 count--; 820 } 821 free(buf, M_STATFS); 822 } 823 return (error); 824 } 825 826 /* 827 * Implement fstatfs() for (NFS) file handles. 828 */ 829 int 830 freebsd11_fhstatfs(struct thread *td, struct freebsd11_fhstatfs_args *uap) 831 { 832 struct freebsd11_statfs osb; 833 struct statfs *sfp; 834 fhandle_t fh; 835 int error; 836 837 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 838 if (error) 839 return (error); 840 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 841 error = kern_fhstatfs(td, fh, sfp); 842 if (error == 0) { 843 freebsd11_cvtstatfs(sfp, &osb); 844 error = copyout(&osb, uap->buf, sizeof(osb)); 845 } 846 free(sfp, M_STATFS); 847 return (error); 848 } 849 850 /* 851 * Convert a new format statfs structure to an old format statfs structure. 852 */ 853 static void 854 freebsd11_cvtstatfs(struct statfs *nsp, struct freebsd11_statfs *osp) 855 { 856 857 bzero(osp, sizeof(*osp)); 858 osp->f_version = FREEBSD11_STATFS_VERSION; 859 osp->f_type = nsp->f_type; 860 osp->f_flags = nsp->f_flags; 861 osp->f_bsize = nsp->f_bsize; 862 osp->f_iosize = nsp->f_iosize; 863 osp->f_blocks = nsp->f_blocks; 864 osp->f_bfree = nsp->f_bfree; 865 osp->f_bavail = nsp->f_bavail; 866 osp->f_files = nsp->f_files; 867 osp->f_ffree = nsp->f_ffree; 868 osp->f_syncwrites = nsp->f_syncwrites; 869 osp->f_asyncwrites = nsp->f_asyncwrites; 870 osp->f_syncreads = nsp->f_syncreads; 871 osp->f_asyncreads = nsp->f_asyncreads; 872 osp->f_namemax = nsp->f_namemax; 873 osp->f_owner = nsp->f_owner; 874 osp->f_fsid = nsp->f_fsid; 875 strlcpy(osp->f_fstypename, nsp->f_fstypename, 876 MIN(MFSNAMELEN, sizeof(osp->f_fstypename))); 877 strlcpy(osp->f_mntonname, nsp->f_mntonname, 878 MIN(MNAMELEN, sizeof(osp->f_mntonname))); 879 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 880 MIN(MNAMELEN, sizeof(osp->f_mntfromname))); 881 } 882 #endif /* COMPAT_FREEBSD11 */ 883 884 /* 885 * Change current working directory to a given file descriptor. 886 */ 887 #ifndef _SYS_SYSPROTO_H_ 888 struct fchdir_args { 889 int fd; 890 }; 891 #endif 892 int 893 sys_fchdir(struct thread *td, struct fchdir_args *uap) 894 { 895 struct vnode *vp, *tdp; 896 struct mount *mp; 897 struct file *fp; 898 int error; 899 900 AUDIT_ARG_FD(uap->fd); 901 error = getvnode_path(td, uap->fd, &cap_fchdir_rights, 902 &fp); 903 if (error != 0) 904 return (error); 905 vp = fp->f_vnode; 906 vrefact(vp); 907 fdrop(fp, td); 908 vn_lock(vp, LK_SHARED | LK_RETRY); 909 AUDIT_ARG_VNODE1(vp); 910 error = change_dir(vp, td); 911 while (!error && (mp = vp->v_mountedhere) != NULL) { 912 if (vfs_busy(mp, 0)) 913 continue; 914 error = VFS_ROOT(mp, LK_SHARED, &tdp); 915 vfs_unbusy(mp); 916 if (error != 0) 917 break; 918 vput(vp); 919 vp = tdp; 920 } 921 if (error != 0) { 922 vput(vp); 923 return (error); 924 } 925 VOP_UNLOCK(vp); 926 pwd_chdir(td, vp); 927 return (0); 928 } 929 930 /* 931 * Change current working directory (``.''). 932 */ 933 #ifndef _SYS_SYSPROTO_H_ 934 struct chdir_args { 935 char *path; 936 }; 937 #endif 938 int 939 sys_chdir(struct thread *td, struct chdir_args *uap) 940 { 941 942 return (kern_chdir(td, uap->path, UIO_USERSPACE)); 943 } 944 945 int 946 kern_chdir(struct thread *td, const char *path, enum uio_seg pathseg) 947 { 948 struct nameidata nd; 949 int error; 950 951 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 952 pathseg, path); 953 if ((error = namei(&nd)) != 0) 954 return (error); 955 if ((error = change_dir(nd.ni_vp, td)) != 0) { 956 vput(nd.ni_vp); 957 NDFREE_PNBUF(&nd); 958 return (error); 959 } 960 VOP_UNLOCK(nd.ni_vp); 961 NDFREE_PNBUF(&nd); 962 pwd_chdir(td, nd.ni_vp); 963 return (0); 964 } 965 966 static int unprivileged_chroot = 0; 967 SYSCTL_INT(_security_bsd, OID_AUTO, unprivileged_chroot, CTLFLAG_RW, 968 &unprivileged_chroot, 0, 969 "Unprivileged processes can use chroot(2)"); 970 971 /* 972 * Takes locked vnode, unlocks it before returning. 973 */ 974 static int 975 kern_chroot(struct thread *td, struct vnode *vp) 976 { 977 struct proc *p; 978 int error; 979 980 error = priv_check(td, PRIV_VFS_CHROOT); 981 if (error != 0) { 982 p = td->td_proc; 983 PROC_LOCK(p); 984 if (unprivileged_chroot == 0 || 985 (p->p_flag2 & P2_NO_NEW_PRIVS) == 0) { 986 PROC_UNLOCK(p); 987 goto e_vunlock; 988 } 989 PROC_UNLOCK(p); 990 } 991 992 error = change_dir(vp, td); 993 if (error != 0) 994 goto e_vunlock; 995 #ifdef MAC 996 error = mac_vnode_check_chroot(td->td_ucred, vp); 997 if (error != 0) 998 goto e_vunlock; 999 #endif 1000 VOP_UNLOCK(vp); 1001 error = pwd_chroot(td, vp); 1002 vrele(vp); 1003 return (error); 1004 e_vunlock: 1005 vput(vp); 1006 return (error); 1007 } 1008 1009 /* 1010 * Change notion of root (``/'') directory. 1011 */ 1012 #ifndef _SYS_SYSPROTO_H_ 1013 struct chroot_args { 1014 char *path; 1015 }; 1016 #endif 1017 int 1018 sys_chroot(struct thread *td, struct chroot_args *uap) 1019 { 1020 struct nameidata nd; 1021 int error; 1022 1023 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 1024 UIO_USERSPACE, uap->path); 1025 error = namei(&nd); 1026 if (error != 0) 1027 return (error); 1028 NDFREE_PNBUF(&nd); 1029 error = kern_chroot(td, nd.ni_vp); 1030 return (error); 1031 } 1032 1033 /* 1034 * Change notion of root directory to a given file descriptor. 1035 */ 1036 #ifndef _SYS_SYSPROTO_H_ 1037 struct fchroot_args { 1038 int fd; 1039 }; 1040 #endif 1041 int 1042 sys_fchroot(struct thread *td, struct fchroot_args *uap) 1043 { 1044 struct vnode *vp; 1045 struct file *fp; 1046 int error; 1047 1048 error = getvnode_path(td, uap->fd, &cap_fchroot_rights, &fp); 1049 if (error != 0) 1050 return (error); 1051 vp = fp->f_vnode; 1052 vrefact(vp); 1053 fdrop(fp, td); 1054 vn_lock(vp, LK_SHARED | LK_RETRY); 1055 error = kern_chroot(td, vp); 1056 return (error); 1057 } 1058 1059 /* 1060 * Common routine for chroot and chdir. Callers must provide a locked vnode 1061 * instance. 1062 */ 1063 int 1064 change_dir(struct vnode *vp, struct thread *td) 1065 { 1066 #ifdef MAC 1067 int error; 1068 #endif 1069 1070 ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked"); 1071 if (vp->v_type != VDIR) 1072 return (ENOTDIR); 1073 #ifdef MAC 1074 error = mac_vnode_check_chdir(td->td_ucred, vp); 1075 if (error != 0) 1076 return (error); 1077 #endif 1078 return (VOP_ACCESS(vp, VEXEC, td->td_ucred, td)); 1079 } 1080 1081 static __inline void 1082 flags_to_rights(int flags, cap_rights_t *rightsp) 1083 { 1084 if (flags & O_EXEC) { 1085 cap_rights_set_one(rightsp, CAP_FEXECVE); 1086 if (flags & O_PATH) 1087 return; 1088 } else { 1089 switch ((flags & O_ACCMODE)) { 1090 case O_RDONLY: 1091 cap_rights_set_one(rightsp, CAP_READ); 1092 break; 1093 case O_RDWR: 1094 cap_rights_set_one(rightsp, CAP_READ); 1095 /* FALLTHROUGH */ 1096 case O_WRONLY: 1097 cap_rights_set_one(rightsp, CAP_WRITE); 1098 if (!(flags & (O_APPEND | O_TRUNC))) 1099 cap_rights_set_one(rightsp, CAP_SEEK); 1100 break; 1101 } 1102 } 1103 1104 if (flags & O_CREAT) 1105 cap_rights_set_one(rightsp, CAP_CREATE); 1106 1107 if (flags & O_TRUNC) 1108 cap_rights_set_one(rightsp, CAP_FTRUNCATE); 1109 1110 if (flags & (O_SYNC | O_FSYNC)) 1111 cap_rights_set_one(rightsp, CAP_FSYNC); 1112 1113 if (flags & (O_EXLOCK | O_SHLOCK)) 1114 cap_rights_set_one(rightsp, CAP_FLOCK); 1115 } 1116 1117 /* 1118 * Check permissions, allocate an open file structure, and call the device 1119 * open routine if any. 1120 */ 1121 #ifndef _SYS_SYSPROTO_H_ 1122 struct open_args { 1123 char *path; 1124 int flags; 1125 int mode; 1126 }; 1127 #endif 1128 int 1129 sys_open(struct thread *td, struct open_args *uap) 1130 { 1131 1132 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1133 uap->flags, uap->mode)); 1134 } 1135 1136 #ifndef _SYS_SYSPROTO_H_ 1137 struct openat_args { 1138 int fd; 1139 char *path; 1140 int flag; 1141 int mode; 1142 }; 1143 #endif 1144 int 1145 sys_openat(struct thread *td, struct openat_args *uap) 1146 { 1147 1148 AUDIT_ARG_FD(uap->fd); 1149 return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 1150 uap->mode)); 1151 } 1152 1153 /* 1154 * If fpp != NULL, opened file is not installed into the file 1155 * descriptor table, instead it is returned in *fpp. This is 1156 * incompatible with fdopen(), in which case we return EINVAL. 1157 */ 1158 static int 1159 openatfp(struct thread *td, int dirfd, const char *path, 1160 enum uio_seg pathseg, int flags, int mode, struct file **fpp) 1161 { 1162 struct proc *p; 1163 struct filedesc *fdp; 1164 struct pwddesc *pdp; 1165 struct file *fp; 1166 struct vnode *vp; 1167 struct filecaps *fcaps; 1168 struct nameidata nd; 1169 cap_rights_t rights; 1170 int cmode, error, indx; 1171 1172 indx = -1; 1173 p = td->td_proc; 1174 fdp = p->p_fd; 1175 pdp = p->p_pd; 1176 1177 AUDIT_ARG_FFLAGS(flags); 1178 AUDIT_ARG_MODE(mode); 1179 cap_rights_init_one(&rights, CAP_LOOKUP); 1180 flags_to_rights(flags, &rights); 1181 1182 /* 1183 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags 1184 * may be specified. On the other hand, for O_PATH any mode 1185 * except O_EXEC is ignored. 1186 */ 1187 if ((flags & O_PATH) != 0) { 1188 flags &= ~O_ACCMODE; 1189 } else if ((flags & O_EXEC) != 0) { 1190 if (flags & O_ACCMODE) 1191 return (EINVAL); 1192 } else if ((flags & O_ACCMODE) == O_ACCMODE) { 1193 return (EINVAL); 1194 } else { 1195 flags = FFLAGS(flags); 1196 } 1197 1198 /* 1199 * Allocate a file structure. The descriptor to reference it 1200 * is allocated and used by finstall_refed() below. 1201 */ 1202 error = falloc_noinstall(td, &fp); 1203 if (error != 0) 1204 return (error); 1205 /* Set the flags early so the finit in devfs can pick them up. */ 1206 fp->f_flag = flags & FMASK; 1207 cmode = ((mode & ~pdp->pd_cmask) & ALLPERMS) & ~S_ISTXT; 1208 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1 | WANTIOCTLCAPS, 1209 pathseg, path, dirfd, &rights); 1210 td->td_dupfd = -1; /* XXX check for fdopen */ 1211 error = vn_open_cred(&nd, &flags, cmode, VN_OPEN_WANTIOCTLCAPS, 1212 td->td_ucred, fp); 1213 if (error != 0) { 1214 /* 1215 * If the vn_open replaced the method vector, something 1216 * wonderous happened deep below and we just pass it up 1217 * pretending we know what we do. 1218 */ 1219 if (error == ENXIO && fp->f_ops != &badfileops) { 1220 MPASS((flags & O_PATH) == 0); 1221 goto success; 1222 } 1223 1224 /* 1225 * Handle special fdopen() case. bleh. 1226 * 1227 * Don't do this for relative (capability) lookups; we don't 1228 * understand exactly what would happen, and we don't think 1229 * that it ever should. 1230 */ 1231 if ((nd.ni_resflags & NIRES_STRICTREL) == 0 && 1232 (error == ENODEV || error == ENXIO) && 1233 td->td_dupfd >= 0) { 1234 MPASS(fpp == NULL); 1235 error = dupfdopen(td, fdp, td->td_dupfd, flags, error, 1236 &indx); 1237 if (error == 0) 1238 goto success; 1239 } 1240 1241 goto bad; 1242 } 1243 td->td_dupfd = 0; 1244 NDFREE_PNBUF(&nd); 1245 vp = nd.ni_vp; 1246 1247 /* 1248 * Store the vnode, for any f_type. Typically, the vnode use 1249 * count is decremented by direct call to vn_closefile() for 1250 * files that switched type in the cdevsw fdopen() method. 1251 */ 1252 fp->f_vnode = vp; 1253 1254 /* 1255 * If the file wasn't claimed by devfs bind it to the normal 1256 * vnode operations here. 1257 */ 1258 if (fp->f_ops == &badfileops) { 1259 KASSERT(vp->v_type != VFIFO || (flags & O_PATH) != 0, 1260 ("Unexpected fifo fp %p vp %p", fp, vp)); 1261 if ((flags & O_PATH) != 0) { 1262 finit(fp, (flags & FMASK) | (fp->f_flag & FKQALLOWED), 1263 DTYPE_VNODE, NULL, &path_fileops); 1264 } else { 1265 finit_vnode(fp, flags, NULL, &vnops); 1266 } 1267 } 1268 1269 VOP_UNLOCK(vp); 1270 if (flags & O_TRUNC) { 1271 error = fo_truncate(fp, 0, td->td_ucred, td); 1272 if (error != 0) 1273 goto bad; 1274 } 1275 success: 1276 if (fpp != NULL) { 1277 MPASS(error == 0); 1278 NDFREE_IOCTLCAPS(&nd); 1279 *fpp = fp; 1280 return (0); 1281 } 1282 1283 /* 1284 * If we haven't already installed the FD (for dupfdopen), do so now. 1285 */ 1286 if (indx == -1) { 1287 #ifdef CAPABILITIES 1288 if ((nd.ni_resflags & NIRES_STRICTREL) != 0) 1289 fcaps = &nd.ni_filecaps; 1290 else 1291 #endif 1292 fcaps = NULL; 1293 error = finstall_refed(td, fp, &indx, flags, fcaps); 1294 /* On success finstall_refed() consumes fcaps. */ 1295 if (error != 0) { 1296 goto bad; 1297 } 1298 } else { 1299 NDFREE_IOCTLCAPS(&nd); 1300 falloc_abort(td, fp); 1301 } 1302 1303 td->td_retval[0] = indx; 1304 return (0); 1305 bad: 1306 KASSERT(indx == -1, ("indx=%d, should be -1", indx)); 1307 NDFREE_IOCTLCAPS(&nd); 1308 falloc_abort(td, fp); 1309 return (error); 1310 } 1311 1312 int 1313 kern_openat(struct thread *td, int dirfd, const char *path, 1314 enum uio_seg pathseg, int flags, int mode) 1315 { 1316 return (openatfp(td, dirfd, path, pathseg, flags, mode, NULL)); 1317 } 1318 1319 int 1320 kern_openatfp(struct thread *td, int dirfd, const char *path, 1321 enum uio_seg pathseg, int flags, int mode, struct file **fpp) 1322 { 1323 int error, old_dupfd; 1324 1325 old_dupfd = td->td_dupfd; 1326 td->td_dupfd = -1; 1327 error = openatfp(td, dirfd, path, pathseg, flags, mode, fpp); 1328 td->td_dupfd = old_dupfd; 1329 return (error); 1330 } 1331 1332 #ifdef COMPAT_43 1333 /* 1334 * Create a file. 1335 */ 1336 #ifndef _SYS_SYSPROTO_H_ 1337 struct ocreat_args { 1338 char *path; 1339 int mode; 1340 }; 1341 #endif 1342 int 1343 ocreat(struct thread *td, struct ocreat_args *uap) 1344 { 1345 1346 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1347 O_WRONLY | O_CREAT | O_TRUNC, uap->mode)); 1348 } 1349 #endif /* COMPAT_43 */ 1350 1351 /* 1352 * Create a special file. 1353 */ 1354 #ifndef _SYS_SYSPROTO_H_ 1355 struct mknodat_args { 1356 int fd; 1357 char *path; 1358 mode_t mode; 1359 dev_t dev; 1360 }; 1361 #endif 1362 int 1363 sys_mknodat(struct thread *td, struct mknodat_args *uap) 1364 { 1365 1366 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1367 uap->dev)); 1368 } 1369 1370 #if defined(COMPAT_FREEBSD11) 1371 int 1372 freebsd11_mknod(struct thread *td, 1373 struct freebsd11_mknod_args *uap) 1374 { 1375 1376 return (kern_mknodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1377 uap->mode, uap->dev)); 1378 } 1379 1380 int 1381 freebsd11_mknodat(struct thread *td, 1382 struct freebsd11_mknodat_args *uap) 1383 { 1384 1385 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1386 uap->dev)); 1387 } 1388 #endif /* COMPAT_FREEBSD11 */ 1389 1390 int 1391 kern_mknodat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, 1392 int mode, dev_t dev) 1393 { 1394 struct vnode *vp; 1395 struct mount *mp; 1396 struct vattr vattr; 1397 struct nameidata nd; 1398 int error, whiteout = 0; 1399 1400 AUDIT_ARG_MODE(mode); 1401 AUDIT_ARG_DEV(dev); 1402 switch (mode & S_IFMT) { 1403 case S_IFCHR: 1404 case S_IFBLK: 1405 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 1406 if (error == 0 && dev == VNOVAL) 1407 error = EINVAL; 1408 break; 1409 case S_IFWHT: 1410 error = priv_check(td, PRIV_VFS_MKNOD_WHT); 1411 break; 1412 case S_IFIFO: 1413 if (dev == 0) 1414 return (kern_mkfifoat(td, fd, path, pathseg, mode)); 1415 /* FALLTHROUGH */ 1416 default: 1417 error = EINVAL; 1418 break; 1419 } 1420 if (error != 0) 1421 return (error); 1422 NDPREINIT(&nd); 1423 restart: 1424 bwillwrite(); 1425 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | AUDITVNODE1 | NOCACHE, 1426 pathseg, path, fd, &cap_mknodat_rights); 1427 if ((error = namei(&nd)) != 0) 1428 return (error); 1429 vp = nd.ni_vp; 1430 if (vp != NULL) { 1431 NDFREE_PNBUF(&nd); 1432 if (vp == nd.ni_dvp) 1433 vrele(nd.ni_dvp); 1434 else 1435 vput(nd.ni_dvp); 1436 vrele(vp); 1437 return (EEXIST); 1438 } else { 1439 VATTR_NULL(&vattr); 1440 vattr.va_mode = (mode & ALLPERMS) & 1441 ~td->td_proc->p_pd->pd_cmask; 1442 vattr.va_rdev = dev; 1443 whiteout = 0; 1444 1445 switch (mode & S_IFMT) { 1446 case S_IFCHR: 1447 vattr.va_type = VCHR; 1448 break; 1449 case S_IFBLK: 1450 vattr.va_type = VBLK; 1451 break; 1452 case S_IFWHT: 1453 whiteout = 1; 1454 break; 1455 default: 1456 panic("kern_mknod: invalid mode"); 1457 } 1458 } 1459 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1460 NDFREE_PNBUF(&nd); 1461 vput(nd.ni_dvp); 1462 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH)) != 0) 1463 return (error); 1464 goto restart; 1465 } 1466 #ifdef MAC 1467 if (error == 0 && !whiteout) 1468 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, 1469 &nd.ni_cnd, &vattr); 1470 #endif 1471 if (error == 0) { 1472 if (whiteout) 1473 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1474 else { 1475 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1476 &nd.ni_cnd, &vattr); 1477 } 1478 } 1479 VOP_VPUT_PAIR(nd.ni_dvp, error == 0 && !whiteout ? &nd.ni_vp : NULL, 1480 true); 1481 vn_finished_write(mp); 1482 NDFREE_PNBUF(&nd); 1483 if (error == ERELOOKUP) 1484 goto restart; 1485 return (error); 1486 } 1487 1488 /* 1489 * Create a named pipe. 1490 */ 1491 #ifndef _SYS_SYSPROTO_H_ 1492 struct mkfifo_args { 1493 char *path; 1494 int mode; 1495 }; 1496 #endif 1497 int 1498 sys_mkfifo(struct thread *td, struct mkfifo_args *uap) 1499 { 1500 1501 return (kern_mkfifoat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1502 uap->mode)); 1503 } 1504 1505 #ifndef _SYS_SYSPROTO_H_ 1506 struct mkfifoat_args { 1507 int fd; 1508 char *path; 1509 mode_t mode; 1510 }; 1511 #endif 1512 int 1513 sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap) 1514 { 1515 1516 return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE, 1517 uap->mode)); 1518 } 1519 1520 int 1521 kern_mkfifoat(struct thread *td, int fd, const char *path, 1522 enum uio_seg pathseg, int mode) 1523 { 1524 struct mount *mp; 1525 struct vattr vattr; 1526 struct nameidata nd; 1527 int error; 1528 1529 AUDIT_ARG_MODE(mode); 1530 NDPREINIT(&nd); 1531 restart: 1532 bwillwrite(); 1533 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | AUDITVNODE1 | NOCACHE, 1534 pathseg, path, fd, &cap_mkfifoat_rights); 1535 if ((error = namei(&nd)) != 0) 1536 return (error); 1537 if (nd.ni_vp != NULL) { 1538 NDFREE_PNBUF(&nd); 1539 if (nd.ni_vp == nd.ni_dvp) 1540 vrele(nd.ni_dvp); 1541 else 1542 vput(nd.ni_dvp); 1543 vrele(nd.ni_vp); 1544 return (EEXIST); 1545 } 1546 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1547 NDFREE_PNBUF(&nd); 1548 vput(nd.ni_dvp); 1549 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH)) != 0) 1550 return (error); 1551 goto restart; 1552 } 1553 VATTR_NULL(&vattr); 1554 vattr.va_type = VFIFO; 1555 vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_pd->pd_cmask; 1556 #ifdef MAC 1557 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1558 &vattr); 1559 if (error != 0) 1560 goto out; 1561 #endif 1562 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 1563 #ifdef MAC 1564 out: 1565 #endif 1566 VOP_VPUT_PAIR(nd.ni_dvp, error == 0 ? &nd.ni_vp : NULL, true); 1567 vn_finished_write(mp); 1568 NDFREE_PNBUF(&nd); 1569 if (error == ERELOOKUP) 1570 goto restart; 1571 return (error); 1572 } 1573 1574 /* 1575 * Make a hard file link. 1576 */ 1577 #ifndef _SYS_SYSPROTO_H_ 1578 struct link_args { 1579 char *path; 1580 char *link; 1581 }; 1582 #endif 1583 int 1584 sys_link(struct thread *td, struct link_args *uap) 1585 { 1586 1587 return (kern_linkat(td, AT_FDCWD, AT_FDCWD, uap->path, uap->link, 1588 UIO_USERSPACE, AT_SYMLINK_FOLLOW)); 1589 } 1590 1591 #ifndef _SYS_SYSPROTO_H_ 1592 struct linkat_args { 1593 int fd1; 1594 char *path1; 1595 int fd2; 1596 char *path2; 1597 int flag; 1598 }; 1599 #endif 1600 int 1601 sys_linkat(struct thread *td, struct linkat_args *uap) 1602 { 1603 1604 return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2, 1605 UIO_USERSPACE, uap->flag)); 1606 } 1607 1608 int hardlink_check_uid = 0; 1609 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 1610 &hardlink_check_uid, 0, 1611 "Unprivileged processes cannot create hard links to files owned by other " 1612 "users"); 1613 static int hardlink_check_gid = 0; 1614 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 1615 &hardlink_check_gid, 0, 1616 "Unprivileged processes cannot create hard links to files owned by other " 1617 "groups"); 1618 1619 static int 1620 can_hardlink(struct vnode *vp, struct ucred *cred) 1621 { 1622 struct vattr va; 1623 int error; 1624 1625 if (!hardlink_check_uid && !hardlink_check_gid) 1626 return (0); 1627 1628 error = VOP_GETATTR(vp, &va, cred); 1629 if (error != 0) 1630 return (error); 1631 1632 if (hardlink_check_uid && cred->cr_uid != va.va_uid) { 1633 error = priv_check_cred(cred, PRIV_VFS_LINK); 1634 if (error != 0) 1635 return (error); 1636 } 1637 1638 if (hardlink_check_gid && !groupmember(va.va_gid, cred)) { 1639 error = priv_check_cred(cred, PRIV_VFS_LINK); 1640 if (error != 0) 1641 return (error); 1642 } 1643 1644 return (0); 1645 } 1646 1647 int 1648 kern_linkat(struct thread *td, int fd1, int fd2, const char *path1, 1649 const char *path2, enum uio_seg segflag, int flag) 1650 { 1651 struct nameidata nd; 1652 int error; 1653 1654 if ((flag & ~(AT_SYMLINK_FOLLOW | AT_RESOLVE_BENEATH | 1655 AT_EMPTY_PATH)) != 0) 1656 return (EINVAL); 1657 1658 NDPREINIT(&nd); 1659 do { 1660 bwillwrite(); 1661 NDINIT_ATRIGHTS(&nd, LOOKUP, AUDITVNODE1 | at2cnpflags(flag, 1662 AT_SYMLINK_FOLLOW | AT_RESOLVE_BENEATH | AT_EMPTY_PATH), 1663 segflag, path1, fd1, &cap_linkat_source_rights); 1664 if ((error = namei(&nd)) != 0) 1665 return (error); 1666 NDFREE_PNBUF(&nd); 1667 if ((nd.ni_resflags & NIRES_EMPTYPATH) != 0) { 1668 error = priv_check(td, PRIV_VFS_FHOPEN); 1669 if (error != 0) { 1670 vrele(nd.ni_vp); 1671 return (error); 1672 } 1673 } 1674 error = kern_linkat_vp(td, nd.ni_vp, fd2, path2, segflag); 1675 } while (error == EAGAIN || error == ERELOOKUP); 1676 return (error); 1677 } 1678 1679 static int 1680 kern_linkat_vp(struct thread *td, struct vnode *vp, int fd, const char *path, 1681 enum uio_seg segflag) 1682 { 1683 struct nameidata nd; 1684 struct mount *mp; 1685 int error; 1686 1687 if (vp->v_type == VDIR) { 1688 vrele(vp); 1689 return (EPERM); /* POSIX */ 1690 } 1691 NDINIT_ATRIGHTS(&nd, CREATE, 1692 LOCKPARENT | AUDITVNODE2 | NOCACHE, segflag, path, fd, 1693 &cap_linkat_target_rights); 1694 if ((error = namei(&nd)) == 0) { 1695 if (nd.ni_vp != NULL) { 1696 NDFREE_PNBUF(&nd); 1697 if (nd.ni_dvp == nd.ni_vp) 1698 vrele(nd.ni_dvp); 1699 else 1700 vput(nd.ni_dvp); 1701 vrele(nd.ni_vp); 1702 vrele(vp); 1703 return (EEXIST); 1704 } else if (nd.ni_dvp->v_mount != vp->v_mount) { 1705 /* 1706 * Cross-device link. No need to recheck 1707 * vp->v_type, since it cannot change, except 1708 * to VBAD. 1709 */ 1710 NDFREE_PNBUF(&nd); 1711 vput(nd.ni_dvp); 1712 vrele(vp); 1713 return (EXDEV); 1714 } else if (vn_lock(vp, LK_EXCLUSIVE) == 0) { 1715 error = can_hardlink(vp, td->td_ucred); 1716 #ifdef MAC 1717 if (error == 0) 1718 error = mac_vnode_check_link(td->td_ucred, 1719 nd.ni_dvp, vp, &nd.ni_cnd); 1720 #endif 1721 if (error != 0) { 1722 vput(vp); 1723 vput(nd.ni_dvp); 1724 NDFREE_PNBUF(&nd); 1725 return (error); 1726 } 1727 error = vn_start_write(vp, &mp, V_NOWAIT); 1728 if (error != 0) { 1729 vput(vp); 1730 vput(nd.ni_dvp); 1731 NDFREE_PNBUF(&nd); 1732 error = vn_start_write(NULL, &mp, 1733 V_XSLEEP | V_PCATCH); 1734 if (error != 0) 1735 return (error); 1736 return (EAGAIN); 1737 } 1738 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 1739 VOP_VPUT_PAIR(nd.ni_dvp, &vp, true); 1740 vn_finished_write(mp); 1741 NDFREE_PNBUF(&nd); 1742 vp = NULL; 1743 } else { 1744 vput(nd.ni_dvp); 1745 NDFREE_PNBUF(&nd); 1746 vrele(vp); 1747 return (EAGAIN); 1748 } 1749 } 1750 if (vp != NULL) 1751 vrele(vp); 1752 return (error); 1753 } 1754 1755 /* 1756 * Make a symbolic link. 1757 */ 1758 #ifndef _SYS_SYSPROTO_H_ 1759 struct symlink_args { 1760 char *path; 1761 char *link; 1762 }; 1763 #endif 1764 int 1765 sys_symlink(struct thread *td, struct symlink_args *uap) 1766 { 1767 1768 return (kern_symlinkat(td, uap->path, AT_FDCWD, uap->link, 1769 UIO_USERSPACE)); 1770 } 1771 1772 #ifndef _SYS_SYSPROTO_H_ 1773 struct symlinkat_args { 1774 char *path; 1775 int fd; 1776 char *path2; 1777 }; 1778 #endif 1779 int 1780 sys_symlinkat(struct thread *td, struct symlinkat_args *uap) 1781 { 1782 1783 return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2, 1784 UIO_USERSPACE)); 1785 } 1786 1787 int 1788 kern_symlinkat(struct thread *td, const char *path1, int fd, const char *path2, 1789 enum uio_seg segflg) 1790 { 1791 struct mount *mp; 1792 struct vattr vattr; 1793 const char *syspath; 1794 char *tmppath; 1795 struct nameidata nd; 1796 int error; 1797 1798 if (segflg == UIO_SYSSPACE) { 1799 syspath = path1; 1800 } else { 1801 tmppath = uma_zalloc(namei_zone, M_WAITOK); 1802 if ((error = copyinstr(path1, tmppath, MAXPATHLEN, NULL)) != 0) 1803 goto out; 1804 syspath = tmppath; 1805 } 1806 AUDIT_ARG_TEXT(syspath); 1807 NDPREINIT(&nd); 1808 restart: 1809 bwillwrite(); 1810 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | AUDITVNODE1 | NOCACHE, segflg, 1811 path2, fd, &cap_symlinkat_rights); 1812 if ((error = namei(&nd)) != 0) 1813 goto out; 1814 if (nd.ni_vp) { 1815 NDFREE_PNBUF(&nd); 1816 if (nd.ni_vp == nd.ni_dvp) 1817 vrele(nd.ni_dvp); 1818 else 1819 vput(nd.ni_dvp); 1820 vrele(nd.ni_vp); 1821 nd.ni_vp = NULL; 1822 error = EEXIST; 1823 goto out; 1824 } 1825 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1826 NDFREE_PNBUF(&nd); 1827 vput(nd.ni_dvp); 1828 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH)) != 0) 1829 goto out; 1830 goto restart; 1831 } 1832 VATTR_NULL(&vattr); 1833 vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_pd->pd_cmask; 1834 #ifdef MAC 1835 vattr.va_type = VLNK; 1836 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1837 &vattr); 1838 if (error != 0) 1839 goto out2; 1840 #endif 1841 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath); 1842 #ifdef MAC 1843 out2: 1844 #endif 1845 VOP_VPUT_PAIR(nd.ni_dvp, error == 0 ? &nd.ni_vp : NULL, true); 1846 vn_finished_write(mp); 1847 NDFREE_PNBUF(&nd); 1848 if (error == ERELOOKUP) 1849 goto restart; 1850 out: 1851 if (segflg != UIO_SYSSPACE) 1852 uma_zfree(namei_zone, tmppath); 1853 return (error); 1854 } 1855 1856 /* 1857 * Delete a whiteout from the filesystem. 1858 */ 1859 #ifndef _SYS_SYSPROTO_H_ 1860 struct undelete_args { 1861 char *path; 1862 }; 1863 #endif 1864 int 1865 sys_undelete(struct thread *td, struct undelete_args *uap) 1866 { 1867 struct mount *mp; 1868 struct nameidata nd; 1869 int error; 1870 1871 NDPREINIT(&nd); 1872 restart: 1873 bwillwrite(); 1874 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | AUDITVNODE1, 1875 UIO_USERSPACE, uap->path); 1876 error = namei(&nd); 1877 if (error != 0) 1878 return (error); 1879 1880 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 1881 NDFREE_PNBUF(&nd); 1882 if (nd.ni_vp == nd.ni_dvp) 1883 vrele(nd.ni_dvp); 1884 else 1885 vput(nd.ni_dvp); 1886 if (nd.ni_vp) 1887 vrele(nd.ni_vp); 1888 return (EEXIST); 1889 } 1890 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1891 NDFREE_PNBUF(&nd); 1892 vput(nd.ni_dvp); 1893 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH)) != 0) 1894 return (error); 1895 goto restart; 1896 } 1897 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE); 1898 NDFREE_PNBUF(&nd); 1899 vput(nd.ni_dvp); 1900 vn_finished_write(mp); 1901 if (error == ERELOOKUP) 1902 goto restart; 1903 return (error); 1904 } 1905 1906 /* 1907 * Delete a name from the filesystem. 1908 */ 1909 #ifndef _SYS_SYSPROTO_H_ 1910 struct unlink_args { 1911 char *path; 1912 }; 1913 #endif 1914 int 1915 sys_unlink(struct thread *td, struct unlink_args *uap) 1916 { 1917 1918 return (kern_funlinkat(td, AT_FDCWD, uap->path, FD_NONE, UIO_USERSPACE, 1919 0, 0)); 1920 } 1921 1922 static int 1923 kern_funlinkat_ex(struct thread *td, int dfd, const char *path, int fd, 1924 int flag, enum uio_seg pathseg, ino_t oldinum) 1925 { 1926 1927 if ((flag & ~(AT_REMOVEDIR | AT_RESOLVE_BENEATH)) != 0) 1928 return (EINVAL); 1929 1930 if ((flag & AT_REMOVEDIR) != 0) 1931 return (kern_frmdirat(td, dfd, path, fd, UIO_USERSPACE, 0)); 1932 1933 return (kern_funlinkat(td, dfd, path, fd, UIO_USERSPACE, 0, 0)); 1934 } 1935 1936 #ifndef _SYS_SYSPROTO_H_ 1937 struct unlinkat_args { 1938 int fd; 1939 char *path; 1940 int flag; 1941 }; 1942 #endif 1943 int 1944 sys_unlinkat(struct thread *td, struct unlinkat_args *uap) 1945 { 1946 1947 return (kern_funlinkat_ex(td, uap->fd, uap->path, FD_NONE, uap->flag, 1948 UIO_USERSPACE, 0)); 1949 } 1950 1951 #ifndef _SYS_SYSPROTO_H_ 1952 struct funlinkat_args { 1953 int dfd; 1954 const char *path; 1955 int fd; 1956 int flag; 1957 }; 1958 #endif 1959 int 1960 sys_funlinkat(struct thread *td, struct funlinkat_args *uap) 1961 { 1962 1963 return (kern_funlinkat_ex(td, uap->dfd, uap->path, uap->fd, uap->flag, 1964 UIO_USERSPACE, 0)); 1965 } 1966 1967 int 1968 kern_funlinkat(struct thread *td, int dfd, const char *path, int fd, 1969 enum uio_seg pathseg, int flag, ino_t oldinum) 1970 { 1971 struct mount *mp; 1972 struct file *fp; 1973 struct vnode *vp; 1974 struct nameidata nd; 1975 struct stat sb; 1976 int error; 1977 1978 fp = NULL; 1979 if (fd != FD_NONE) { 1980 error = getvnode_path(td, fd, &cap_no_rights, &fp); 1981 if (error != 0) 1982 return (error); 1983 } 1984 1985 NDPREINIT(&nd); 1986 restart: 1987 bwillwrite(); 1988 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1 | 1989 at2cnpflags(flag, AT_RESOLVE_BENEATH), 1990 pathseg, path, dfd, &cap_unlinkat_rights); 1991 if ((error = namei(&nd)) != 0) { 1992 if (error == EINVAL) 1993 error = EPERM; 1994 goto fdout; 1995 } 1996 vp = nd.ni_vp; 1997 if (vp->v_type == VDIR && oldinum == 0) { 1998 error = EPERM; /* POSIX */ 1999 } else if (oldinum != 0 && 2000 ((error = VOP_STAT(vp, &sb, td->td_ucred, NOCRED)) == 0) && 2001 sb.st_ino != oldinum) { 2002 error = EIDRM; /* Identifier removed */ 2003 } else if (fp != NULL && fp->f_vnode != vp) { 2004 if (VN_IS_DOOMED(fp->f_vnode)) 2005 error = EBADF; 2006 else 2007 error = EDEADLK; 2008 } else { 2009 /* 2010 * The root of a mounted filesystem cannot be deleted. 2011 * 2012 * XXX: can this only be a VDIR case? 2013 */ 2014 if (vp->v_vflag & VV_ROOT) 2015 error = EBUSY; 2016 } 2017 if (error == 0) { 2018 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 2019 NDFREE_PNBUF(&nd); 2020 vput(nd.ni_dvp); 2021 if (vp == nd.ni_dvp) 2022 vrele(vp); 2023 else 2024 vput(vp); 2025 if ((error = vn_start_write(NULL, &mp, 2026 V_XSLEEP | V_PCATCH)) != 0) { 2027 goto fdout; 2028 } 2029 goto restart; 2030 } 2031 #ifdef MAC 2032 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 2033 &nd.ni_cnd); 2034 if (error != 0) 2035 goto out; 2036 #endif 2037 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 2038 #ifdef MAC 2039 out: 2040 #endif 2041 vn_finished_write(mp); 2042 } 2043 NDFREE_PNBUF(&nd); 2044 vput(nd.ni_dvp); 2045 if (vp == nd.ni_dvp) 2046 vrele(vp); 2047 else 2048 vput(vp); 2049 if (error == ERELOOKUP) 2050 goto restart; 2051 fdout: 2052 if (fp != NULL) 2053 fdrop(fp, td); 2054 return (error); 2055 } 2056 2057 /* 2058 * Reposition read/write file offset. 2059 */ 2060 #ifndef _SYS_SYSPROTO_H_ 2061 struct lseek_args { 2062 int fd; 2063 int pad; 2064 off_t offset; 2065 int whence; 2066 }; 2067 #endif 2068 int 2069 sys_lseek(struct thread *td, struct lseek_args *uap) 2070 { 2071 2072 return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); 2073 } 2074 2075 int 2076 kern_lseek(struct thread *td, int fd, off_t offset, int whence) 2077 { 2078 struct file *fp; 2079 int error; 2080 2081 AUDIT_ARG_FD(fd); 2082 error = fget(td, fd, &cap_seek_rights, &fp); 2083 if (error != 0) 2084 return (error); 2085 error = (fp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0 ? 2086 fo_seek(fp, offset, whence, td) : ESPIPE; 2087 fdrop(fp, td); 2088 return (error); 2089 } 2090 2091 #if defined(COMPAT_43) 2092 /* 2093 * Reposition read/write file offset. 2094 */ 2095 #ifndef _SYS_SYSPROTO_H_ 2096 struct olseek_args { 2097 int fd; 2098 long offset; 2099 int whence; 2100 }; 2101 #endif 2102 int 2103 olseek(struct thread *td, struct olseek_args *uap) 2104 { 2105 2106 return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); 2107 } 2108 #endif /* COMPAT_43 */ 2109 2110 #if defined(COMPAT_FREEBSD6) 2111 /* Version with the 'pad' argument */ 2112 int 2113 freebsd6_lseek(struct thread *td, struct freebsd6_lseek_args *uap) 2114 { 2115 2116 return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); 2117 } 2118 #endif 2119 2120 /* 2121 * Check access permissions using passed credentials. 2122 */ 2123 static int 2124 vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 2125 struct thread *td) 2126 { 2127 accmode_t accmode; 2128 int error; 2129 2130 /* Flags == 0 means only check for existence. */ 2131 if (user_flags == 0) 2132 return (0); 2133 2134 accmode = 0; 2135 if (user_flags & R_OK) 2136 accmode |= VREAD; 2137 if (user_flags & W_OK) 2138 accmode |= VWRITE; 2139 if (user_flags & X_OK) 2140 accmode |= VEXEC; 2141 #ifdef MAC 2142 error = mac_vnode_check_access(cred, vp, accmode); 2143 if (error != 0) 2144 return (error); 2145 #endif 2146 if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) 2147 error = VOP_ACCESS(vp, accmode, cred, td); 2148 return (error); 2149 } 2150 2151 /* 2152 * Check access permissions using "real" credentials. 2153 */ 2154 #ifndef _SYS_SYSPROTO_H_ 2155 struct access_args { 2156 char *path; 2157 int amode; 2158 }; 2159 #endif 2160 int 2161 sys_access(struct thread *td, struct access_args *uap) 2162 { 2163 2164 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2165 0, uap->amode)); 2166 } 2167 2168 #ifndef _SYS_SYSPROTO_H_ 2169 struct faccessat_args { 2170 int dirfd; 2171 char *path; 2172 int amode; 2173 int flag; 2174 } 2175 #endif 2176 int 2177 sys_faccessat(struct thread *td, struct faccessat_args *uap) 2178 { 2179 2180 return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 2181 uap->amode)); 2182 } 2183 2184 int 2185 kern_accessat(struct thread *td, int fd, const char *path, 2186 enum uio_seg pathseg, int flag, int amode) 2187 { 2188 struct ucred *cred, *usecred; 2189 struct vnode *vp; 2190 struct nameidata nd; 2191 int error; 2192 2193 if ((flag & ~(AT_EACCESS | AT_RESOLVE_BENEATH | AT_EMPTY_PATH | 2194 AT_SYMLINK_NOFOLLOW)) != 0) 2195 return (EINVAL); 2196 if (amode != F_OK && (amode & ~(R_OK | W_OK | X_OK)) != 0) 2197 return (EINVAL); 2198 2199 /* 2200 * Create and modify a temporary credential instead of one that 2201 * is potentially shared (if we need one). 2202 */ 2203 cred = td->td_ucred; 2204 if ((flag & AT_EACCESS) == 0 && 2205 ((cred->cr_uid != cred->cr_ruid || 2206 cred->cr_rgid != cred->cr_groups[0]))) { 2207 usecred = crdup(cred); 2208 usecred->cr_uid = cred->cr_ruid; 2209 usecred->cr_groups[0] = cred->cr_rgid; 2210 td->td_ucred = usecred; 2211 } else 2212 usecred = cred; 2213 AUDIT_ARG_VALUE(amode); 2214 NDINIT_ATRIGHTS(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | 2215 AUDITVNODE1 | at2cnpflags(flag, AT_RESOLVE_BENEATH | AT_SYMLINK_NOFOLLOW | 2216 AT_EMPTY_PATH), pathseg, path, fd, &cap_fstat_rights); 2217 if ((error = namei(&nd)) != 0) 2218 goto out; 2219 vp = nd.ni_vp; 2220 2221 error = vn_access(vp, amode, usecred, td); 2222 NDFREE_PNBUF(&nd); 2223 vput(vp); 2224 out: 2225 if (usecred != cred) { 2226 td->td_ucred = cred; 2227 crfree(usecred); 2228 } 2229 return (error); 2230 } 2231 2232 /* 2233 * Check access permissions using "effective" credentials. 2234 */ 2235 #ifndef _SYS_SYSPROTO_H_ 2236 struct eaccess_args { 2237 char *path; 2238 int amode; 2239 }; 2240 #endif 2241 int 2242 sys_eaccess(struct thread *td, struct eaccess_args *uap) 2243 { 2244 2245 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2246 AT_EACCESS, uap->amode)); 2247 } 2248 2249 #if defined(COMPAT_43) 2250 /* 2251 * Get file status; this version follows links. 2252 */ 2253 #ifndef _SYS_SYSPROTO_H_ 2254 struct ostat_args { 2255 char *path; 2256 struct ostat *ub; 2257 }; 2258 #endif 2259 int 2260 ostat(struct thread *td, struct ostat_args *uap) 2261 { 2262 struct stat sb; 2263 struct ostat osb; 2264 int error; 2265 2266 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, &sb); 2267 if (error != 0) 2268 return (error); 2269 cvtstat(&sb, &osb); 2270 return (copyout(&osb, uap->ub, sizeof (osb))); 2271 } 2272 2273 /* 2274 * Get file status; this version does not follow links. 2275 */ 2276 #ifndef _SYS_SYSPROTO_H_ 2277 struct olstat_args { 2278 char *path; 2279 struct ostat *ub; 2280 }; 2281 #endif 2282 int 2283 olstat(struct thread *td, struct olstat_args *uap) 2284 { 2285 struct stat sb; 2286 struct ostat osb; 2287 int error; 2288 2289 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2290 UIO_USERSPACE, &sb); 2291 if (error != 0) 2292 return (error); 2293 cvtstat(&sb, &osb); 2294 return (copyout(&osb, uap->ub, sizeof (osb))); 2295 } 2296 2297 /* 2298 * Convert from an old to a new stat structure. 2299 * XXX: many values are blindly truncated. 2300 */ 2301 void 2302 cvtstat(struct stat *st, struct ostat *ost) 2303 { 2304 2305 bzero(ost, sizeof(*ost)); 2306 ost->st_dev = st->st_dev; 2307 ost->st_ino = st->st_ino; 2308 ost->st_mode = st->st_mode; 2309 ost->st_nlink = st->st_nlink; 2310 ost->st_uid = st->st_uid; 2311 ost->st_gid = st->st_gid; 2312 ost->st_rdev = st->st_rdev; 2313 ost->st_size = MIN(st->st_size, INT32_MAX); 2314 ost->st_atim = st->st_atim; 2315 ost->st_mtim = st->st_mtim; 2316 ost->st_ctim = st->st_ctim; 2317 ost->st_blksize = st->st_blksize; 2318 ost->st_blocks = st->st_blocks; 2319 ost->st_flags = st->st_flags; 2320 ost->st_gen = st->st_gen; 2321 } 2322 #endif /* COMPAT_43 */ 2323 2324 #if defined(COMPAT_43) || defined(COMPAT_FREEBSD11) 2325 int ino64_trunc_error; 2326 SYSCTL_INT(_vfs, OID_AUTO, ino64_trunc_error, CTLFLAG_RW, 2327 &ino64_trunc_error, 0, 2328 "Error on truncation of device, file or inode number, or link count"); 2329 2330 int 2331 freebsd11_cvtstat(struct stat *st, struct freebsd11_stat *ost) 2332 { 2333 2334 ost->st_dev = st->st_dev; 2335 if (ost->st_dev != st->st_dev) { 2336 switch (ino64_trunc_error) { 2337 default: 2338 /* 2339 * Since dev_t is almost raw, don't clamp to the 2340 * maximum for case 2, but ignore the error. 2341 */ 2342 break; 2343 case 1: 2344 return (EOVERFLOW); 2345 } 2346 } 2347 ost->st_ino = st->st_ino; 2348 if (ost->st_ino != st->st_ino) { 2349 switch (ino64_trunc_error) { 2350 default: 2351 case 0: 2352 break; 2353 case 1: 2354 return (EOVERFLOW); 2355 case 2: 2356 ost->st_ino = UINT32_MAX; 2357 break; 2358 } 2359 } 2360 ost->st_mode = st->st_mode; 2361 ost->st_nlink = st->st_nlink; 2362 if (ost->st_nlink != st->st_nlink) { 2363 switch (ino64_trunc_error) { 2364 default: 2365 case 0: 2366 break; 2367 case 1: 2368 return (EOVERFLOW); 2369 case 2: 2370 ost->st_nlink = UINT16_MAX; 2371 break; 2372 } 2373 } 2374 ost->st_uid = st->st_uid; 2375 ost->st_gid = st->st_gid; 2376 ost->st_rdev = st->st_rdev; 2377 if (ost->st_rdev != st->st_rdev) { 2378 switch (ino64_trunc_error) { 2379 default: 2380 break; 2381 case 1: 2382 return (EOVERFLOW); 2383 } 2384 } 2385 ost->st_atim = st->st_atim; 2386 ost->st_mtim = st->st_mtim; 2387 ost->st_ctim = st->st_ctim; 2388 ost->st_size = st->st_size; 2389 ost->st_blocks = st->st_blocks; 2390 ost->st_blksize = st->st_blksize; 2391 ost->st_flags = st->st_flags; 2392 ost->st_gen = st->st_gen; 2393 ost->st_lspare = 0; 2394 ost->st_birthtim = st->st_birthtim; 2395 bzero((char *)&ost->st_birthtim + sizeof(ost->st_birthtim), 2396 sizeof(*ost) - offsetof(struct freebsd11_stat, 2397 st_birthtim) - sizeof(ost->st_birthtim)); 2398 return (0); 2399 } 2400 2401 int 2402 freebsd11_stat(struct thread *td, struct freebsd11_stat_args* uap) 2403 { 2404 struct stat sb; 2405 struct freebsd11_stat osb; 2406 int error; 2407 2408 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, &sb); 2409 if (error != 0) 2410 return (error); 2411 error = freebsd11_cvtstat(&sb, &osb); 2412 if (error == 0) 2413 error = copyout(&osb, uap->ub, sizeof(osb)); 2414 return (error); 2415 } 2416 2417 int 2418 freebsd11_lstat(struct thread *td, struct freebsd11_lstat_args* uap) 2419 { 2420 struct stat sb; 2421 struct freebsd11_stat osb; 2422 int error; 2423 2424 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2425 UIO_USERSPACE, &sb); 2426 if (error != 0) 2427 return (error); 2428 error = freebsd11_cvtstat(&sb, &osb); 2429 if (error == 0) 2430 error = copyout(&osb, uap->ub, sizeof(osb)); 2431 return (error); 2432 } 2433 2434 int 2435 freebsd11_fhstat(struct thread *td, struct freebsd11_fhstat_args* uap) 2436 { 2437 struct fhandle fh; 2438 struct stat sb; 2439 struct freebsd11_stat osb; 2440 int error; 2441 2442 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 2443 if (error != 0) 2444 return (error); 2445 error = kern_fhstat(td, fh, &sb); 2446 if (error != 0) 2447 return (error); 2448 error = freebsd11_cvtstat(&sb, &osb); 2449 if (error == 0) 2450 error = copyout(&osb, uap->sb, sizeof(osb)); 2451 return (error); 2452 } 2453 2454 int 2455 freebsd11_fstatat(struct thread *td, struct freebsd11_fstatat_args* uap) 2456 { 2457 struct stat sb; 2458 struct freebsd11_stat osb; 2459 int error; 2460 2461 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2462 UIO_USERSPACE, &sb); 2463 if (error != 0) 2464 return (error); 2465 error = freebsd11_cvtstat(&sb, &osb); 2466 if (error == 0) 2467 error = copyout(&osb, uap->buf, sizeof(osb)); 2468 return (error); 2469 } 2470 #endif /* COMPAT_FREEBSD11 */ 2471 2472 /* 2473 * Get file status 2474 */ 2475 #ifndef _SYS_SYSPROTO_H_ 2476 struct fstatat_args { 2477 int fd; 2478 char *path; 2479 struct stat *buf; 2480 int flag; 2481 } 2482 #endif 2483 int 2484 sys_fstatat(struct thread *td, struct fstatat_args *uap) 2485 { 2486 struct stat sb; 2487 int error; 2488 2489 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2490 UIO_USERSPACE, &sb); 2491 if (error == 0) 2492 error = copyout(&sb, uap->buf, sizeof (sb)); 2493 return (error); 2494 } 2495 2496 int 2497 kern_statat(struct thread *td, int flag, int fd, const char *path, 2498 enum uio_seg pathseg, struct stat *sbp) 2499 { 2500 struct nameidata nd; 2501 int error; 2502 2503 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | 2504 AT_EMPTY_PATH)) != 0) 2505 return (EINVAL); 2506 2507 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_RESOLVE_BENEATH | 2508 AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH) | LOCKSHARED | LOCKLEAF | 2509 AUDITVNODE1, pathseg, path, fd, &cap_fstat_rights); 2510 2511 if ((error = namei(&nd)) != 0) { 2512 if (error == ENOTDIR && 2513 (nd.ni_resflags & NIRES_EMPTYPATH) != 0) 2514 error = kern_fstat(td, fd, sbp); 2515 return (error); 2516 } 2517 error = VOP_STAT(nd.ni_vp, sbp, td->td_ucred, NOCRED); 2518 NDFREE_PNBUF(&nd); 2519 vput(nd.ni_vp); 2520 #ifdef __STAT_TIME_T_EXT 2521 sbp->st_atim_ext = 0; 2522 sbp->st_mtim_ext = 0; 2523 sbp->st_ctim_ext = 0; 2524 sbp->st_btim_ext = 0; 2525 #endif 2526 #ifdef KTRACE 2527 if (KTRPOINT(td, KTR_STRUCT)) 2528 ktrstat_error(sbp, error); 2529 #endif 2530 return (error); 2531 } 2532 2533 #if defined(COMPAT_FREEBSD11) 2534 /* 2535 * Implementation of the NetBSD [l]stat() functions. 2536 */ 2537 int 2538 freebsd11_cvtnstat(struct stat *sb, struct nstat *nsb) 2539 { 2540 struct freebsd11_stat sb11; 2541 int error; 2542 2543 error = freebsd11_cvtstat(sb, &sb11); 2544 if (error != 0) 2545 return (error); 2546 2547 bzero(nsb, sizeof(*nsb)); 2548 CP(sb11, *nsb, st_dev); 2549 CP(sb11, *nsb, st_ino); 2550 CP(sb11, *nsb, st_mode); 2551 CP(sb11, *nsb, st_nlink); 2552 CP(sb11, *nsb, st_uid); 2553 CP(sb11, *nsb, st_gid); 2554 CP(sb11, *nsb, st_rdev); 2555 CP(sb11, *nsb, st_atim); 2556 CP(sb11, *nsb, st_mtim); 2557 CP(sb11, *nsb, st_ctim); 2558 CP(sb11, *nsb, st_size); 2559 CP(sb11, *nsb, st_blocks); 2560 CP(sb11, *nsb, st_blksize); 2561 CP(sb11, *nsb, st_flags); 2562 CP(sb11, *nsb, st_gen); 2563 CP(sb11, *nsb, st_birthtim); 2564 return (0); 2565 } 2566 2567 #ifndef _SYS_SYSPROTO_H_ 2568 struct freebsd11_nstat_args { 2569 char *path; 2570 struct nstat *ub; 2571 }; 2572 #endif 2573 int 2574 freebsd11_nstat(struct thread *td, struct freebsd11_nstat_args *uap) 2575 { 2576 struct stat sb; 2577 struct nstat nsb; 2578 int error; 2579 2580 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, &sb); 2581 if (error != 0) 2582 return (error); 2583 error = freebsd11_cvtnstat(&sb, &nsb); 2584 if (error == 0) 2585 error = copyout(&nsb, uap->ub, sizeof (nsb)); 2586 return (error); 2587 } 2588 2589 /* 2590 * NetBSD lstat. Get file status; this version does not follow links. 2591 */ 2592 #ifndef _SYS_SYSPROTO_H_ 2593 struct freebsd11_nlstat_args { 2594 char *path; 2595 struct nstat *ub; 2596 }; 2597 #endif 2598 int 2599 freebsd11_nlstat(struct thread *td, struct freebsd11_nlstat_args *uap) 2600 { 2601 struct stat sb; 2602 struct nstat nsb; 2603 int error; 2604 2605 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2606 UIO_USERSPACE, &sb); 2607 if (error != 0) 2608 return (error); 2609 error = freebsd11_cvtnstat(&sb, &nsb); 2610 if (error == 0) 2611 error = copyout(&nsb, uap->ub, sizeof (nsb)); 2612 return (error); 2613 } 2614 #endif /* COMPAT_FREEBSD11 */ 2615 2616 /* 2617 * Get configurable pathname variables. 2618 */ 2619 #ifndef _SYS_SYSPROTO_H_ 2620 struct pathconf_args { 2621 char *path; 2622 int name; 2623 }; 2624 #endif 2625 int 2626 sys_pathconf(struct thread *td, struct pathconf_args *uap) 2627 { 2628 long value; 2629 int error; 2630 2631 error = kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW, 2632 &value); 2633 if (error == 0) 2634 td->td_retval[0] = value; 2635 return (error); 2636 } 2637 2638 #ifndef _SYS_SYSPROTO_H_ 2639 struct lpathconf_args { 2640 char *path; 2641 int name; 2642 }; 2643 #endif 2644 int 2645 sys_lpathconf(struct thread *td, struct lpathconf_args *uap) 2646 { 2647 long value; 2648 int error; 2649 2650 error = kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, 2651 NOFOLLOW, &value); 2652 if (error == 0) 2653 td->td_retval[0] = value; 2654 return (error); 2655 } 2656 2657 int 2658 kern_pathconf(struct thread *td, const char *path, enum uio_seg pathseg, 2659 int name, u_long flags, long *valuep) 2660 { 2661 struct nameidata nd; 2662 int error; 2663 2664 NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | AUDITVNODE1 | flags, 2665 pathseg, path); 2666 if ((error = namei(&nd)) != 0) 2667 return (error); 2668 NDFREE_PNBUF(&nd); 2669 2670 error = VOP_PATHCONF(nd.ni_vp, name, valuep); 2671 vput(nd.ni_vp); 2672 return (error); 2673 } 2674 2675 /* 2676 * Return target name of a symbolic link. 2677 */ 2678 #ifndef _SYS_SYSPROTO_H_ 2679 struct readlink_args { 2680 char *path; 2681 char *buf; 2682 size_t count; 2683 }; 2684 #endif 2685 int 2686 sys_readlink(struct thread *td, struct readlink_args *uap) 2687 { 2688 2689 return (kern_readlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2690 uap->buf, UIO_USERSPACE, uap->count)); 2691 } 2692 #ifndef _SYS_SYSPROTO_H_ 2693 struct readlinkat_args { 2694 int fd; 2695 char *path; 2696 char *buf; 2697 size_t bufsize; 2698 }; 2699 #endif 2700 int 2701 sys_readlinkat(struct thread *td, struct readlinkat_args *uap) 2702 { 2703 2704 return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE, 2705 uap->buf, UIO_USERSPACE, uap->bufsize)); 2706 } 2707 2708 int 2709 kern_readlinkat(struct thread *td, int fd, const char *path, 2710 enum uio_seg pathseg, char *buf, enum uio_seg bufseg, size_t count) 2711 { 2712 struct vnode *vp; 2713 struct nameidata nd; 2714 int error; 2715 2716 if (count > IOSIZE_MAX) 2717 return (EINVAL); 2718 2719 NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1 | 2720 EMPTYPATH, pathseg, path, fd); 2721 2722 if ((error = namei(&nd)) != 0) 2723 return (error); 2724 NDFREE_PNBUF(&nd); 2725 vp = nd.ni_vp; 2726 2727 error = kern_readlink_vp(vp, buf, bufseg, count, td); 2728 vput(vp); 2729 2730 return (error); 2731 } 2732 2733 /* 2734 * Helper function to readlink from a vnode 2735 */ 2736 static int 2737 kern_readlink_vp(struct vnode *vp, char *buf, enum uio_seg bufseg, size_t count, 2738 struct thread *td) 2739 { 2740 struct iovec aiov; 2741 struct uio auio; 2742 int error; 2743 2744 ASSERT_VOP_LOCKED(vp, "kern_readlink_vp(): vp not locked"); 2745 #ifdef MAC 2746 error = mac_vnode_check_readlink(td->td_ucred, vp); 2747 if (error != 0) 2748 return (error); 2749 #endif 2750 if (vp->v_type != VLNK && (vp->v_vflag & VV_READLINK) == 0) 2751 return (EINVAL); 2752 2753 aiov.iov_base = buf; 2754 aiov.iov_len = count; 2755 auio.uio_iov = &aiov; 2756 auio.uio_iovcnt = 1; 2757 auio.uio_offset = 0; 2758 auio.uio_rw = UIO_READ; 2759 auio.uio_segflg = bufseg; 2760 auio.uio_td = td; 2761 auio.uio_resid = count; 2762 error = VOP_READLINK(vp, &auio, td->td_ucred); 2763 td->td_retval[0] = count - auio.uio_resid; 2764 return (error); 2765 } 2766 2767 /* 2768 * Common implementation code for chflags() and fchflags(). 2769 */ 2770 static int 2771 setfflags(struct thread *td, struct vnode *vp, u_long flags) 2772 { 2773 struct mount *mp; 2774 struct vattr vattr; 2775 int error; 2776 2777 /* We can't support the value matching VNOVAL. */ 2778 if (flags == VNOVAL) 2779 return (EOPNOTSUPP); 2780 2781 /* 2782 * Prevent non-root users from setting flags on devices. When 2783 * a device is reused, users can retain ownership of the device 2784 * if they are allowed to set flags and programs assume that 2785 * chown can't fail when done as root. 2786 */ 2787 if (vp->v_type == VCHR || vp->v_type == VBLK) { 2788 error = priv_check(td, PRIV_VFS_CHFLAGS_DEV); 2789 if (error != 0) 2790 return (error); 2791 } 2792 2793 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) 2794 return (error); 2795 VATTR_NULL(&vattr); 2796 vattr.va_flags = flags; 2797 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2798 #ifdef MAC 2799 error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags); 2800 if (error == 0) 2801 #endif 2802 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 2803 VOP_UNLOCK(vp); 2804 vn_finished_write(mp); 2805 return (error); 2806 } 2807 2808 /* 2809 * Change flags of a file given a path name. 2810 */ 2811 #ifndef _SYS_SYSPROTO_H_ 2812 struct chflags_args { 2813 const char *path; 2814 u_long flags; 2815 }; 2816 #endif 2817 int 2818 sys_chflags(struct thread *td, struct chflags_args *uap) 2819 { 2820 2821 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2822 uap->flags, 0)); 2823 } 2824 2825 #ifndef _SYS_SYSPROTO_H_ 2826 struct chflagsat_args { 2827 int fd; 2828 const char *path; 2829 u_long flags; 2830 int atflag; 2831 } 2832 #endif 2833 int 2834 sys_chflagsat(struct thread *td, struct chflagsat_args *uap) 2835 { 2836 2837 return (kern_chflagsat(td, uap->fd, uap->path, UIO_USERSPACE, 2838 uap->flags, uap->atflag)); 2839 } 2840 2841 /* 2842 * Same as chflags() but doesn't follow symlinks. 2843 */ 2844 #ifndef _SYS_SYSPROTO_H_ 2845 struct lchflags_args { 2846 const char *path; 2847 u_long flags; 2848 }; 2849 #endif 2850 int 2851 sys_lchflags(struct thread *td, struct lchflags_args *uap) 2852 { 2853 2854 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2855 uap->flags, AT_SYMLINK_NOFOLLOW)); 2856 } 2857 2858 static int 2859 kern_chflagsat(struct thread *td, int fd, const char *path, 2860 enum uio_seg pathseg, u_long flags, int atflag) 2861 { 2862 struct nameidata nd; 2863 int error; 2864 2865 if ((atflag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | 2866 AT_EMPTY_PATH)) != 0) 2867 return (EINVAL); 2868 2869 AUDIT_ARG_FFLAGS(flags); 2870 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(atflag, AT_SYMLINK_NOFOLLOW | 2871 AT_RESOLVE_BENEATH | AT_EMPTY_PATH) | AUDITVNODE1, pathseg, path, 2872 fd, &cap_fchflags_rights); 2873 if ((error = namei(&nd)) != 0) 2874 return (error); 2875 NDFREE_PNBUF(&nd); 2876 error = setfflags(td, nd.ni_vp, flags); 2877 vrele(nd.ni_vp); 2878 return (error); 2879 } 2880 2881 /* 2882 * Change flags of a file given a file descriptor. 2883 */ 2884 #ifndef _SYS_SYSPROTO_H_ 2885 struct fchflags_args { 2886 int fd; 2887 u_long flags; 2888 }; 2889 #endif 2890 int 2891 sys_fchflags(struct thread *td, struct fchflags_args *uap) 2892 { 2893 struct file *fp; 2894 int error; 2895 2896 AUDIT_ARG_FD(uap->fd); 2897 AUDIT_ARG_FFLAGS(uap->flags); 2898 error = getvnode(td, uap->fd, &cap_fchflags_rights, 2899 &fp); 2900 if (error != 0) 2901 return (error); 2902 #ifdef AUDIT 2903 if (AUDITING_TD(td)) { 2904 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 2905 AUDIT_ARG_VNODE1(fp->f_vnode); 2906 VOP_UNLOCK(fp->f_vnode); 2907 } 2908 #endif 2909 error = setfflags(td, fp->f_vnode, uap->flags); 2910 fdrop(fp, td); 2911 return (error); 2912 } 2913 2914 /* 2915 * Common implementation code for chmod(), lchmod() and fchmod(). 2916 */ 2917 int 2918 setfmode(struct thread *td, struct ucred *cred, struct vnode *vp, int mode) 2919 { 2920 struct mount *mp; 2921 struct vattr vattr; 2922 int error; 2923 2924 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) 2925 return (error); 2926 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2927 VATTR_NULL(&vattr); 2928 vattr.va_mode = mode & ALLPERMS; 2929 #ifdef MAC 2930 error = mac_vnode_check_setmode(cred, vp, vattr.va_mode); 2931 if (error == 0) 2932 #endif 2933 error = VOP_SETATTR(vp, &vattr, cred); 2934 VOP_UNLOCK(vp); 2935 vn_finished_write(mp); 2936 return (error); 2937 } 2938 2939 /* 2940 * Change mode of a file given path name. 2941 */ 2942 #ifndef _SYS_SYSPROTO_H_ 2943 struct chmod_args { 2944 char *path; 2945 int mode; 2946 }; 2947 #endif 2948 int 2949 sys_chmod(struct thread *td, struct chmod_args *uap) 2950 { 2951 2952 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2953 uap->mode, 0)); 2954 } 2955 2956 #ifndef _SYS_SYSPROTO_H_ 2957 struct fchmodat_args { 2958 int dirfd; 2959 char *path; 2960 mode_t mode; 2961 int flag; 2962 } 2963 #endif 2964 int 2965 sys_fchmodat(struct thread *td, struct fchmodat_args *uap) 2966 { 2967 2968 return (kern_fchmodat(td, uap->fd, uap->path, UIO_USERSPACE, 2969 uap->mode, uap->flag)); 2970 } 2971 2972 /* 2973 * Change mode of a file given path name (don't follow links.) 2974 */ 2975 #ifndef _SYS_SYSPROTO_H_ 2976 struct lchmod_args { 2977 char *path; 2978 int mode; 2979 }; 2980 #endif 2981 int 2982 sys_lchmod(struct thread *td, struct lchmod_args *uap) 2983 { 2984 2985 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2986 uap->mode, AT_SYMLINK_NOFOLLOW)); 2987 } 2988 2989 int 2990 kern_fchmodat(struct thread *td, int fd, const char *path, 2991 enum uio_seg pathseg, mode_t mode, int flag) 2992 { 2993 struct nameidata nd; 2994 int error; 2995 2996 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | 2997 AT_EMPTY_PATH)) != 0) 2998 return (EINVAL); 2999 3000 AUDIT_ARG_MODE(mode); 3001 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_SYMLINK_NOFOLLOW | 3002 AT_RESOLVE_BENEATH | AT_EMPTY_PATH) | AUDITVNODE1, pathseg, path, 3003 fd, &cap_fchmod_rights); 3004 if ((error = namei(&nd)) != 0) 3005 return (error); 3006 NDFREE_PNBUF(&nd); 3007 error = setfmode(td, td->td_ucred, nd.ni_vp, mode); 3008 vrele(nd.ni_vp); 3009 return (error); 3010 } 3011 3012 /* 3013 * Change mode of a file given a file descriptor. 3014 */ 3015 #ifndef _SYS_SYSPROTO_H_ 3016 struct fchmod_args { 3017 int fd; 3018 int mode; 3019 }; 3020 #endif 3021 int 3022 sys_fchmod(struct thread *td, struct fchmod_args *uap) 3023 { 3024 struct file *fp; 3025 int error; 3026 3027 AUDIT_ARG_FD(uap->fd); 3028 AUDIT_ARG_MODE(uap->mode); 3029 3030 error = fget(td, uap->fd, &cap_fchmod_rights, &fp); 3031 if (error != 0) 3032 return (error); 3033 error = fo_chmod(fp, uap->mode, td->td_ucred, td); 3034 fdrop(fp, td); 3035 return (error); 3036 } 3037 3038 /* 3039 * Common implementation for chown(), lchown(), and fchown() 3040 */ 3041 int 3042 setfown(struct thread *td, struct ucred *cred, struct vnode *vp, uid_t uid, 3043 gid_t gid) 3044 { 3045 struct mount *mp; 3046 struct vattr vattr; 3047 int error; 3048 3049 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) 3050 return (error); 3051 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3052 VATTR_NULL(&vattr); 3053 vattr.va_uid = uid; 3054 vattr.va_gid = gid; 3055 #ifdef MAC 3056 error = mac_vnode_check_setowner(cred, vp, vattr.va_uid, 3057 vattr.va_gid); 3058 if (error == 0) 3059 #endif 3060 error = VOP_SETATTR(vp, &vattr, cred); 3061 VOP_UNLOCK(vp); 3062 vn_finished_write(mp); 3063 return (error); 3064 } 3065 3066 /* 3067 * Set ownership given a path name. 3068 */ 3069 #ifndef _SYS_SYSPROTO_H_ 3070 struct chown_args { 3071 char *path; 3072 int uid; 3073 int gid; 3074 }; 3075 #endif 3076 int 3077 sys_chown(struct thread *td, struct chown_args *uap) 3078 { 3079 3080 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->uid, 3081 uap->gid, 0)); 3082 } 3083 3084 #ifndef _SYS_SYSPROTO_H_ 3085 struct fchownat_args { 3086 int fd; 3087 const char * path; 3088 uid_t uid; 3089 gid_t gid; 3090 int flag; 3091 }; 3092 #endif 3093 int 3094 sys_fchownat(struct thread *td, struct fchownat_args *uap) 3095 { 3096 3097 return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid, 3098 uap->gid, uap->flag)); 3099 } 3100 3101 int 3102 kern_fchownat(struct thread *td, int fd, const char *path, 3103 enum uio_seg pathseg, int uid, int gid, int flag) 3104 { 3105 struct nameidata nd; 3106 int error; 3107 3108 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | 3109 AT_EMPTY_PATH)) != 0) 3110 return (EINVAL); 3111 3112 AUDIT_ARG_OWNER(uid, gid); 3113 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_SYMLINK_NOFOLLOW | 3114 AT_RESOLVE_BENEATH | AT_EMPTY_PATH) | AUDITVNODE1, pathseg, path, 3115 fd, &cap_fchown_rights); 3116 3117 if ((error = namei(&nd)) != 0) 3118 return (error); 3119 NDFREE_PNBUF(&nd); 3120 error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid); 3121 vrele(nd.ni_vp); 3122 return (error); 3123 } 3124 3125 /* 3126 * Set ownership given a path name, do not cross symlinks. 3127 */ 3128 #ifndef _SYS_SYSPROTO_H_ 3129 struct lchown_args { 3130 char *path; 3131 int uid; 3132 int gid; 3133 }; 3134 #endif 3135 int 3136 sys_lchown(struct thread *td, struct lchown_args *uap) 3137 { 3138 3139 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3140 uap->uid, uap->gid, AT_SYMLINK_NOFOLLOW)); 3141 } 3142 3143 /* 3144 * Set ownership given a file descriptor. 3145 */ 3146 #ifndef _SYS_SYSPROTO_H_ 3147 struct fchown_args { 3148 int fd; 3149 int uid; 3150 int gid; 3151 }; 3152 #endif 3153 int 3154 sys_fchown(struct thread *td, struct fchown_args *uap) 3155 { 3156 struct file *fp; 3157 int error; 3158 3159 AUDIT_ARG_FD(uap->fd); 3160 AUDIT_ARG_OWNER(uap->uid, uap->gid); 3161 error = fget(td, uap->fd, &cap_fchown_rights, &fp); 3162 if (error != 0) 3163 return (error); 3164 error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td); 3165 fdrop(fp, td); 3166 return (error); 3167 } 3168 3169 /* 3170 * Common implementation code for utimes(), lutimes(), and futimes(). 3171 */ 3172 static int 3173 getutimes(const struct timeval *usrtvp, enum uio_seg tvpseg, 3174 struct timespec *tsp) 3175 { 3176 struct timeval tv[2]; 3177 const struct timeval *tvp; 3178 int error; 3179 3180 if (usrtvp == NULL) { 3181 vfs_timestamp(&tsp[0]); 3182 tsp[1] = tsp[0]; 3183 } else { 3184 if (tvpseg == UIO_SYSSPACE) { 3185 tvp = usrtvp; 3186 } else { 3187 if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0) 3188 return (error); 3189 tvp = tv; 3190 } 3191 3192 if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 || 3193 tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000) 3194 return (EINVAL); 3195 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 3196 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 3197 } 3198 return (0); 3199 } 3200 3201 /* 3202 * Common implementation code for futimens(), utimensat(). 3203 */ 3204 #define UTIMENS_NULL 0x1 3205 #define UTIMENS_EXIT 0x2 3206 static int 3207 getutimens(const struct timespec *usrtsp, enum uio_seg tspseg, 3208 struct timespec *tsp, int *retflags) 3209 { 3210 struct timespec tsnow; 3211 int error; 3212 3213 vfs_timestamp(&tsnow); 3214 *retflags = 0; 3215 if (usrtsp == NULL) { 3216 tsp[0] = tsnow; 3217 tsp[1] = tsnow; 3218 *retflags |= UTIMENS_NULL; 3219 return (0); 3220 } 3221 if (tspseg == UIO_SYSSPACE) { 3222 tsp[0] = usrtsp[0]; 3223 tsp[1] = usrtsp[1]; 3224 } else if ((error = copyin(usrtsp, tsp, sizeof(*tsp) * 2)) != 0) 3225 return (error); 3226 if (tsp[0].tv_nsec == UTIME_OMIT && tsp[1].tv_nsec == UTIME_OMIT) 3227 *retflags |= UTIMENS_EXIT; 3228 if (tsp[0].tv_nsec == UTIME_NOW && tsp[1].tv_nsec == UTIME_NOW) 3229 *retflags |= UTIMENS_NULL; 3230 if (tsp[0].tv_nsec == UTIME_OMIT) 3231 tsp[0].tv_sec = VNOVAL; 3232 else if (tsp[0].tv_nsec == UTIME_NOW) 3233 tsp[0] = tsnow; 3234 else if (tsp[0].tv_nsec < 0 || tsp[0].tv_nsec >= 1000000000L) 3235 return (EINVAL); 3236 if (tsp[1].tv_nsec == UTIME_OMIT) 3237 tsp[1].tv_sec = VNOVAL; 3238 else if (tsp[1].tv_nsec == UTIME_NOW) 3239 tsp[1] = tsnow; 3240 else if (tsp[1].tv_nsec < 0 || tsp[1].tv_nsec >= 1000000000L) 3241 return (EINVAL); 3242 3243 return (0); 3244 } 3245 3246 /* 3247 * Common implementation code for utimes(), lutimes(), futimes(), futimens(), 3248 * and utimensat(). 3249 */ 3250 static int 3251 setutimes(struct thread *td, struct vnode *vp, const struct timespec *ts, 3252 int numtimes, int nullflag) 3253 { 3254 struct mount *mp; 3255 struct vattr vattr; 3256 int error; 3257 bool setbirthtime; 3258 3259 setbirthtime = false; 3260 vattr.va_birthtime.tv_sec = VNOVAL; 3261 vattr.va_birthtime.tv_nsec = 0; 3262 3263 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) 3264 return (error); 3265 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3266 if (numtimes < 3 && VOP_GETATTR(vp, &vattr, td->td_ucred) == 0 && 3267 timespeccmp(&ts[1], &vattr.va_birthtime, < )) 3268 setbirthtime = true; 3269 VATTR_NULL(&vattr); 3270 vattr.va_atime = ts[0]; 3271 vattr.va_mtime = ts[1]; 3272 if (setbirthtime) 3273 vattr.va_birthtime = ts[1]; 3274 if (numtimes > 2) 3275 vattr.va_birthtime = ts[2]; 3276 if (nullflag) 3277 vattr.va_vaflags |= VA_UTIMES_NULL; 3278 #ifdef MAC 3279 error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime, 3280 vattr.va_mtime); 3281 #endif 3282 if (error == 0) 3283 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3284 VOP_UNLOCK(vp); 3285 vn_finished_write(mp); 3286 return (error); 3287 } 3288 3289 /* 3290 * Set the access and modification times of a file. 3291 */ 3292 #ifndef _SYS_SYSPROTO_H_ 3293 struct utimes_args { 3294 char *path; 3295 struct timeval *tptr; 3296 }; 3297 #endif 3298 int 3299 sys_utimes(struct thread *td, struct utimes_args *uap) 3300 { 3301 3302 return (kern_utimesat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3303 uap->tptr, UIO_USERSPACE)); 3304 } 3305 3306 #ifndef _SYS_SYSPROTO_H_ 3307 struct futimesat_args { 3308 int fd; 3309 const char * path; 3310 const struct timeval * times; 3311 }; 3312 #endif 3313 int 3314 sys_futimesat(struct thread *td, struct futimesat_args *uap) 3315 { 3316 3317 return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE, 3318 uap->times, UIO_USERSPACE)); 3319 } 3320 3321 int 3322 kern_utimesat(struct thread *td, int fd, const char *path, 3323 enum uio_seg pathseg, const struct timeval *tptr, enum uio_seg tptrseg) 3324 { 3325 struct nameidata nd; 3326 struct timespec ts[2]; 3327 int error; 3328 3329 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3330 return (error); 3331 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 3332 &cap_futimes_rights); 3333 3334 if ((error = namei(&nd)) != 0) 3335 return (error); 3336 NDFREE_PNBUF(&nd); 3337 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3338 vrele(nd.ni_vp); 3339 return (error); 3340 } 3341 3342 /* 3343 * Set the access and modification times of a file. 3344 */ 3345 #ifndef _SYS_SYSPROTO_H_ 3346 struct lutimes_args { 3347 char *path; 3348 struct timeval *tptr; 3349 }; 3350 #endif 3351 int 3352 sys_lutimes(struct thread *td, struct lutimes_args *uap) 3353 { 3354 3355 return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr, 3356 UIO_USERSPACE)); 3357 } 3358 3359 int 3360 kern_lutimes(struct thread *td, const char *path, enum uio_seg pathseg, 3361 const struct timeval *tptr, enum uio_seg tptrseg) 3362 { 3363 struct timespec ts[2]; 3364 struct nameidata nd; 3365 int error; 3366 3367 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3368 return (error); 3369 NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, pathseg, path); 3370 if ((error = namei(&nd)) != 0) 3371 return (error); 3372 NDFREE_PNBUF(&nd); 3373 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3374 vrele(nd.ni_vp); 3375 return (error); 3376 } 3377 3378 /* 3379 * Set the access and modification times of a file. 3380 */ 3381 #ifndef _SYS_SYSPROTO_H_ 3382 struct futimes_args { 3383 int fd; 3384 struct timeval *tptr; 3385 }; 3386 #endif 3387 int 3388 sys_futimes(struct thread *td, struct futimes_args *uap) 3389 { 3390 3391 return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE)); 3392 } 3393 3394 int 3395 kern_futimes(struct thread *td, int fd, const struct timeval *tptr, 3396 enum uio_seg tptrseg) 3397 { 3398 struct timespec ts[2]; 3399 struct file *fp; 3400 int error; 3401 3402 AUDIT_ARG_FD(fd); 3403 error = getutimes(tptr, tptrseg, ts); 3404 if (error != 0) 3405 return (error); 3406 error = getvnode(td, fd, &cap_futimes_rights, &fp); 3407 if (error != 0) 3408 return (error); 3409 #ifdef AUDIT 3410 if (AUDITING_TD(td)) { 3411 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3412 AUDIT_ARG_VNODE1(fp->f_vnode); 3413 VOP_UNLOCK(fp->f_vnode); 3414 } 3415 #endif 3416 error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL); 3417 fdrop(fp, td); 3418 return (error); 3419 } 3420 3421 int 3422 sys_futimens(struct thread *td, struct futimens_args *uap) 3423 { 3424 3425 return (kern_futimens(td, uap->fd, uap->times, UIO_USERSPACE)); 3426 } 3427 3428 int 3429 kern_futimens(struct thread *td, int fd, const struct timespec *tptr, 3430 enum uio_seg tptrseg) 3431 { 3432 struct timespec ts[2]; 3433 struct file *fp; 3434 int error, flags; 3435 3436 AUDIT_ARG_FD(fd); 3437 error = getutimens(tptr, tptrseg, ts, &flags); 3438 if (error != 0) 3439 return (error); 3440 if (flags & UTIMENS_EXIT) 3441 return (0); 3442 error = getvnode(td, fd, &cap_futimes_rights, &fp); 3443 if (error != 0) 3444 return (error); 3445 #ifdef AUDIT 3446 if (AUDITING_TD(td)) { 3447 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3448 AUDIT_ARG_VNODE1(fp->f_vnode); 3449 VOP_UNLOCK(fp->f_vnode); 3450 } 3451 #endif 3452 error = setutimes(td, fp->f_vnode, ts, 2, flags & UTIMENS_NULL); 3453 fdrop(fp, td); 3454 return (error); 3455 } 3456 3457 int 3458 sys_utimensat(struct thread *td, struct utimensat_args *uap) 3459 { 3460 3461 return (kern_utimensat(td, uap->fd, uap->path, UIO_USERSPACE, 3462 uap->times, UIO_USERSPACE, uap->flag)); 3463 } 3464 3465 int 3466 kern_utimensat(struct thread *td, int fd, const char *path, 3467 enum uio_seg pathseg, const struct timespec *tptr, enum uio_seg tptrseg, 3468 int flag) 3469 { 3470 struct nameidata nd; 3471 struct timespec ts[2]; 3472 int error, flags; 3473 3474 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | 3475 AT_EMPTY_PATH)) != 0) 3476 return (EINVAL); 3477 3478 if ((error = getutimens(tptr, tptrseg, ts, &flags)) != 0) 3479 return (error); 3480 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_SYMLINK_NOFOLLOW | 3481 AT_RESOLVE_BENEATH | AT_EMPTY_PATH) | AUDITVNODE1, 3482 pathseg, path, fd, &cap_futimes_rights); 3483 if ((error = namei(&nd)) != 0) 3484 return (error); 3485 /* 3486 * We are allowed to call namei() regardless of 2xUTIME_OMIT. 3487 * POSIX states: 3488 * "If both tv_nsec fields are UTIME_OMIT... EACCESS may be detected." 3489 * "Search permission is denied by a component of the path prefix." 3490 */ 3491 NDFREE_PNBUF(&nd); 3492 if ((flags & UTIMENS_EXIT) == 0) 3493 error = setutimes(td, nd.ni_vp, ts, 2, flags & UTIMENS_NULL); 3494 vrele(nd.ni_vp); 3495 return (error); 3496 } 3497 3498 /* 3499 * Truncate a file given its path name. 3500 */ 3501 #ifndef _SYS_SYSPROTO_H_ 3502 struct truncate_args { 3503 char *path; 3504 int pad; 3505 off_t length; 3506 }; 3507 #endif 3508 int 3509 sys_truncate(struct thread *td, struct truncate_args *uap) 3510 { 3511 3512 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3513 } 3514 3515 int 3516 kern_truncate(struct thread *td, const char *path, enum uio_seg pathseg, 3517 off_t length) 3518 { 3519 struct mount *mp; 3520 struct vnode *vp; 3521 void *rl_cookie; 3522 struct nameidata nd; 3523 int error; 3524 3525 if (length < 0) 3526 return (EINVAL); 3527 NDPREINIT(&nd); 3528 retry: 3529 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path); 3530 if ((error = namei(&nd)) != 0) 3531 return (error); 3532 vp = nd.ni_vp; 3533 NDFREE_PNBUF(&nd); 3534 rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); 3535 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) { 3536 vn_rangelock_unlock(vp, rl_cookie); 3537 vrele(vp); 3538 return (error); 3539 } 3540 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3541 if (vp->v_type == VDIR) { 3542 error = EISDIR; 3543 goto out; 3544 } 3545 #ifdef MAC 3546 error = mac_vnode_check_write(td->td_ucred, NOCRED, vp); 3547 if (error != 0) 3548 goto out; 3549 #endif 3550 error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td); 3551 if (error != 0) 3552 goto out; 3553 3554 error = vn_truncate_locked(vp, length, false, td->td_ucred); 3555 out: 3556 VOP_UNLOCK(vp); 3557 vn_finished_write(mp); 3558 vn_rangelock_unlock(vp, rl_cookie); 3559 vrele(vp); 3560 if (error == ERELOOKUP) 3561 goto retry; 3562 return (error); 3563 } 3564 3565 #if defined(COMPAT_43) 3566 /* 3567 * Truncate a file given its path name. 3568 */ 3569 #ifndef _SYS_SYSPROTO_H_ 3570 struct otruncate_args { 3571 char *path; 3572 long length; 3573 }; 3574 #endif 3575 int 3576 otruncate(struct thread *td, struct otruncate_args *uap) 3577 { 3578 3579 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3580 } 3581 #endif /* COMPAT_43 */ 3582 3583 #if defined(COMPAT_FREEBSD6) 3584 /* Versions with the pad argument */ 3585 int 3586 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap) 3587 { 3588 3589 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3590 } 3591 3592 int 3593 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap) 3594 { 3595 3596 return (kern_ftruncate(td, uap->fd, uap->length)); 3597 } 3598 #endif 3599 3600 int 3601 kern_fsync(struct thread *td, int fd, bool fullsync) 3602 { 3603 struct vnode *vp; 3604 struct mount *mp; 3605 struct file *fp; 3606 int error; 3607 3608 AUDIT_ARG_FD(fd); 3609 error = getvnode(td, fd, &cap_fsync_rights, &fp); 3610 if (error != 0) 3611 return (error); 3612 vp = fp->f_vnode; 3613 #if 0 3614 if (!fullsync) 3615 /* XXXKIB: compete outstanding aio writes */; 3616 #endif 3617 retry: 3618 error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH); 3619 if (error != 0) 3620 goto drop; 3621 vn_lock(vp, vn_lktype_write(mp, vp) | LK_RETRY); 3622 AUDIT_ARG_VNODE1(vp); 3623 vnode_pager_clean_async(vp); 3624 error = fullsync ? VOP_FSYNC(vp, MNT_WAIT, td) : VOP_FDATASYNC(vp, td); 3625 VOP_UNLOCK(vp); 3626 vn_finished_write(mp); 3627 if (error == ERELOOKUP) 3628 goto retry; 3629 drop: 3630 fdrop(fp, td); 3631 return (error); 3632 } 3633 3634 /* 3635 * Sync an open file. 3636 */ 3637 #ifndef _SYS_SYSPROTO_H_ 3638 struct fsync_args { 3639 int fd; 3640 }; 3641 #endif 3642 int 3643 sys_fsync(struct thread *td, struct fsync_args *uap) 3644 { 3645 3646 return (kern_fsync(td, uap->fd, true)); 3647 } 3648 3649 int 3650 sys_fdatasync(struct thread *td, struct fdatasync_args *uap) 3651 { 3652 3653 return (kern_fsync(td, uap->fd, false)); 3654 } 3655 3656 /* 3657 * Rename files. Source and destination must either both be directories, or 3658 * both not be directories. If target is a directory, it must be empty. 3659 */ 3660 #ifndef _SYS_SYSPROTO_H_ 3661 struct rename_args { 3662 char *from; 3663 char *to; 3664 }; 3665 #endif 3666 int 3667 sys_rename(struct thread *td, struct rename_args *uap) 3668 { 3669 3670 return (kern_renameat(td, AT_FDCWD, uap->from, AT_FDCWD, 3671 uap->to, UIO_USERSPACE)); 3672 } 3673 3674 #ifndef _SYS_SYSPROTO_H_ 3675 struct renameat_args { 3676 int oldfd; 3677 char *old; 3678 int newfd; 3679 char *new; 3680 }; 3681 #endif 3682 int 3683 sys_renameat(struct thread *td, struct renameat_args *uap) 3684 { 3685 3686 return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new, 3687 UIO_USERSPACE)); 3688 } 3689 3690 #ifdef MAC 3691 static int 3692 kern_renameat_mac(struct thread *td, int oldfd, const char *old, int newfd, 3693 const char *new, enum uio_seg pathseg, struct nameidata *fromnd) 3694 { 3695 int error; 3696 3697 NDINIT_ATRIGHTS(fromnd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 3698 pathseg, old, oldfd, &cap_renameat_source_rights); 3699 if ((error = namei(fromnd)) != 0) 3700 return (error); 3701 error = mac_vnode_check_rename_from(td->td_ucred, fromnd->ni_dvp, 3702 fromnd->ni_vp, &fromnd->ni_cnd); 3703 VOP_UNLOCK(fromnd->ni_dvp); 3704 if (fromnd->ni_dvp != fromnd->ni_vp) 3705 VOP_UNLOCK(fromnd->ni_vp); 3706 if (error != 0) { 3707 NDFREE_PNBUF(fromnd); 3708 vrele(fromnd->ni_dvp); 3709 vrele(fromnd->ni_vp); 3710 } 3711 return (error); 3712 } 3713 #endif 3714 3715 int 3716 kern_renameat(struct thread *td, int oldfd, const char *old, int newfd, 3717 const char *new, enum uio_seg pathseg) 3718 { 3719 struct mount *mp = NULL; 3720 struct vnode *tvp, *fvp, *tdvp; 3721 struct nameidata fromnd, tond; 3722 uint64_t tondflags; 3723 int error; 3724 3725 again: 3726 bwillwrite(); 3727 #ifdef MAC 3728 if (mac_vnode_check_rename_from_enabled()) { 3729 error = kern_renameat_mac(td, oldfd, old, newfd, new, pathseg, 3730 &fromnd); 3731 if (error != 0) 3732 return (error); 3733 } else { 3734 #endif 3735 NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | AUDITVNODE1, 3736 pathseg, old, oldfd, &cap_renameat_source_rights); 3737 if ((error = namei(&fromnd)) != 0) 3738 return (error); 3739 #ifdef MAC 3740 } 3741 #endif 3742 fvp = fromnd.ni_vp; 3743 tondflags = LOCKPARENT | LOCKLEAF | NOCACHE | AUDITVNODE2; 3744 if (fromnd.ni_vp->v_type == VDIR) 3745 tondflags |= WILLBEDIR; 3746 NDINIT_ATRIGHTS(&tond, RENAME, tondflags, pathseg, new, newfd, 3747 &cap_renameat_target_rights); 3748 if ((error = namei(&tond)) != 0) { 3749 /* Translate error code for rename("dir1", "dir2/."). */ 3750 if (error == EISDIR && fvp->v_type == VDIR) 3751 error = EINVAL; 3752 NDFREE_PNBUF(&fromnd); 3753 vrele(fromnd.ni_dvp); 3754 vrele(fvp); 3755 goto out1; 3756 } 3757 tdvp = tond.ni_dvp; 3758 tvp = tond.ni_vp; 3759 error = vn_start_write(fvp, &mp, V_NOWAIT); 3760 if (error != 0) { 3761 NDFREE_PNBUF(&fromnd); 3762 NDFREE_PNBUF(&tond); 3763 if (tvp != NULL) 3764 vput(tvp); 3765 if (tdvp == tvp) 3766 vrele(tdvp); 3767 else 3768 vput(tdvp); 3769 vrele(fromnd.ni_dvp); 3770 vrele(fvp); 3771 error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH); 3772 if (error != 0) 3773 return (error); 3774 goto again; 3775 } 3776 if (tvp != NULL) { 3777 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3778 error = ENOTDIR; 3779 goto out; 3780 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3781 error = EISDIR; 3782 goto out; 3783 } 3784 #ifdef CAPABILITIES 3785 if (newfd != AT_FDCWD && (tond.ni_resflags & NIRES_ABS) == 0) { 3786 /* 3787 * If the target already exists we require CAP_UNLINKAT 3788 * from 'newfd', when newfd was used for the lookup. 3789 */ 3790 error = cap_check(&tond.ni_filecaps.fc_rights, 3791 &cap_unlinkat_rights); 3792 if (error != 0) 3793 goto out; 3794 } 3795 #endif 3796 } 3797 if (fvp == tdvp) { 3798 error = EINVAL; 3799 goto out; 3800 } 3801 /* 3802 * If the source is the same as the destination (that is, if they 3803 * are links to the same vnode), then there is nothing to do. 3804 */ 3805 if (fvp == tvp) 3806 error = ERESTART; 3807 #ifdef MAC 3808 else 3809 error = mac_vnode_check_rename_to(td->td_ucred, tdvp, 3810 tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd); 3811 #endif 3812 out: 3813 if (error == 0) { 3814 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3815 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); 3816 NDFREE_PNBUF(&fromnd); 3817 NDFREE_PNBUF(&tond); 3818 } else { 3819 NDFREE_PNBUF(&fromnd); 3820 NDFREE_PNBUF(&tond); 3821 if (tvp != NULL) 3822 vput(tvp); 3823 if (tdvp == tvp) 3824 vrele(tdvp); 3825 else 3826 vput(tdvp); 3827 vrele(fromnd.ni_dvp); 3828 vrele(fvp); 3829 } 3830 vn_finished_write(mp); 3831 out1: 3832 if (error == ERESTART) 3833 return (0); 3834 if (error == ERELOOKUP) 3835 goto again; 3836 return (error); 3837 } 3838 3839 /* 3840 * Make a directory file. 3841 */ 3842 #ifndef _SYS_SYSPROTO_H_ 3843 struct mkdir_args { 3844 char *path; 3845 int mode; 3846 }; 3847 #endif 3848 int 3849 sys_mkdir(struct thread *td, struct mkdir_args *uap) 3850 { 3851 3852 return (kern_mkdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3853 uap->mode)); 3854 } 3855 3856 #ifndef _SYS_SYSPROTO_H_ 3857 struct mkdirat_args { 3858 int fd; 3859 char *path; 3860 mode_t mode; 3861 }; 3862 #endif 3863 int 3864 sys_mkdirat(struct thread *td, struct mkdirat_args *uap) 3865 { 3866 3867 return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode)); 3868 } 3869 3870 int 3871 kern_mkdirat(struct thread *td, int fd, const char *path, enum uio_seg segflg, 3872 int mode) 3873 { 3874 struct mount *mp; 3875 struct vattr vattr; 3876 struct nameidata nd; 3877 int error; 3878 3879 AUDIT_ARG_MODE(mode); 3880 NDPREINIT(&nd); 3881 restart: 3882 bwillwrite(); 3883 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | AUDITVNODE1 | 3884 NC_NOMAKEENTRY | NC_KEEPPOSENTRY | FAILIFEXISTS | WILLBEDIR, 3885 segflg, path, fd, &cap_mkdirat_rights); 3886 if ((error = namei(&nd)) != 0) 3887 return (error); 3888 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3889 NDFREE_PNBUF(&nd); 3890 vput(nd.ni_dvp); 3891 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH)) != 0) 3892 return (error); 3893 goto restart; 3894 } 3895 VATTR_NULL(&vattr); 3896 vattr.va_type = VDIR; 3897 vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_pd->pd_cmask; 3898 #ifdef MAC 3899 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 3900 &vattr); 3901 if (error != 0) 3902 goto out; 3903 #endif 3904 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 3905 #ifdef MAC 3906 out: 3907 #endif 3908 NDFREE_PNBUF(&nd); 3909 VOP_VPUT_PAIR(nd.ni_dvp, error == 0 ? &nd.ni_vp : NULL, true); 3910 vn_finished_write(mp); 3911 if (error == ERELOOKUP) 3912 goto restart; 3913 return (error); 3914 } 3915 3916 /* 3917 * Remove a directory file. 3918 */ 3919 #ifndef _SYS_SYSPROTO_H_ 3920 struct rmdir_args { 3921 char *path; 3922 }; 3923 #endif 3924 int 3925 sys_rmdir(struct thread *td, struct rmdir_args *uap) 3926 { 3927 3928 return (kern_frmdirat(td, AT_FDCWD, uap->path, FD_NONE, UIO_USERSPACE, 3929 0)); 3930 } 3931 3932 int 3933 kern_frmdirat(struct thread *td, int dfd, const char *path, int fd, 3934 enum uio_seg pathseg, int flag) 3935 { 3936 struct mount *mp; 3937 struct vnode *vp; 3938 struct file *fp; 3939 struct nameidata nd; 3940 cap_rights_t rights; 3941 int error; 3942 3943 fp = NULL; 3944 if (fd != FD_NONE) { 3945 error = getvnode(td, fd, cap_rights_init_one(&rights, 3946 CAP_LOOKUP), &fp); 3947 if (error != 0) 3948 return (error); 3949 } 3950 3951 NDPREINIT(&nd); 3952 restart: 3953 bwillwrite(); 3954 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1 | 3955 at2cnpflags(flag, AT_RESOLVE_BENEATH), 3956 pathseg, path, dfd, &cap_unlinkat_rights); 3957 if ((error = namei(&nd)) != 0) 3958 goto fdout; 3959 vp = nd.ni_vp; 3960 if (vp->v_type != VDIR) { 3961 error = ENOTDIR; 3962 goto out; 3963 } 3964 /* 3965 * No rmdir "." please. 3966 */ 3967 if (nd.ni_dvp == vp) { 3968 error = EINVAL; 3969 goto out; 3970 } 3971 /* 3972 * The root of a mounted filesystem cannot be deleted. 3973 */ 3974 if (vp->v_vflag & VV_ROOT) { 3975 error = EBUSY; 3976 goto out; 3977 } 3978 3979 if (fp != NULL && fp->f_vnode != vp) { 3980 if (VN_IS_DOOMED(fp->f_vnode)) 3981 error = EBADF; 3982 else 3983 error = EDEADLK; 3984 goto out; 3985 } 3986 3987 #ifdef MAC 3988 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 3989 &nd.ni_cnd); 3990 if (error != 0) 3991 goto out; 3992 #endif 3993 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3994 NDFREE_PNBUF(&nd); 3995 vput(vp); 3996 if (nd.ni_dvp == vp) 3997 vrele(nd.ni_dvp); 3998 else 3999 vput(nd.ni_dvp); 4000 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH)) != 0) 4001 goto fdout; 4002 goto restart; 4003 } 4004 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 4005 vn_finished_write(mp); 4006 out: 4007 NDFREE_PNBUF(&nd); 4008 vput(vp); 4009 if (nd.ni_dvp == vp) 4010 vrele(nd.ni_dvp); 4011 else 4012 vput(nd.ni_dvp); 4013 if (error == ERELOOKUP) 4014 goto restart; 4015 fdout: 4016 if (fp != NULL) 4017 fdrop(fp, td); 4018 return (error); 4019 } 4020 4021 #if defined(COMPAT_43) || defined(COMPAT_FREEBSD11) 4022 int 4023 freebsd11_kern_getdirentries(struct thread *td, int fd, char *ubuf, u_int count, 4024 long *basep, void (*func)(struct freebsd11_dirent *)) 4025 { 4026 struct freebsd11_dirent dstdp; 4027 struct dirent *dp, *edp; 4028 char *dirbuf; 4029 off_t base; 4030 ssize_t resid, ucount; 4031 int error; 4032 4033 /* XXX arbitrary sanity limit on `count'. */ 4034 count = min(count, 64 * 1024); 4035 4036 dirbuf = malloc(count, M_TEMP, M_WAITOK); 4037 4038 error = kern_getdirentries(td, fd, dirbuf, count, &base, &resid, 4039 UIO_SYSSPACE); 4040 if (error != 0) 4041 goto done; 4042 if (basep != NULL) 4043 *basep = base; 4044 4045 ucount = 0; 4046 for (dp = (struct dirent *)dirbuf, 4047 edp = (struct dirent *)&dirbuf[count - resid]; 4048 ucount < count && dp < edp; ) { 4049 if (dp->d_reclen == 0) 4050 break; 4051 MPASS(dp->d_reclen >= _GENERIC_DIRLEN(0)); 4052 if (dp->d_namlen >= sizeof(dstdp.d_name)) 4053 continue; 4054 dstdp.d_type = dp->d_type; 4055 dstdp.d_namlen = dp->d_namlen; 4056 dstdp.d_fileno = dp->d_fileno; /* truncate */ 4057 if (dstdp.d_fileno != dp->d_fileno) { 4058 switch (ino64_trunc_error) { 4059 default: 4060 case 0: 4061 break; 4062 case 1: 4063 error = EOVERFLOW; 4064 goto done; 4065 case 2: 4066 dstdp.d_fileno = UINT32_MAX; 4067 break; 4068 } 4069 } 4070 dstdp.d_reclen = sizeof(dstdp) - sizeof(dstdp.d_name) + 4071 ((dp->d_namlen + 1 + 3) &~ 3); 4072 bcopy(dp->d_name, dstdp.d_name, dstdp.d_namlen); 4073 bzero(dstdp.d_name + dstdp.d_namlen, 4074 dstdp.d_reclen - offsetof(struct freebsd11_dirent, d_name) - 4075 dstdp.d_namlen); 4076 MPASS(dstdp.d_reclen <= dp->d_reclen); 4077 MPASS(ucount + dstdp.d_reclen <= count); 4078 if (func != NULL) 4079 func(&dstdp); 4080 error = copyout(&dstdp, ubuf + ucount, dstdp.d_reclen); 4081 if (error != 0) 4082 break; 4083 dp = (struct dirent *)((char *)dp + dp->d_reclen); 4084 ucount += dstdp.d_reclen; 4085 } 4086 4087 done: 4088 free(dirbuf, M_TEMP); 4089 if (error == 0) 4090 td->td_retval[0] = ucount; 4091 return (error); 4092 } 4093 #endif /* COMPAT */ 4094 4095 #ifdef COMPAT_43 4096 static void 4097 ogetdirentries_cvt(struct freebsd11_dirent *dp) 4098 { 4099 #if (BYTE_ORDER == LITTLE_ENDIAN) 4100 /* 4101 * The expected low byte of dp->d_namlen is our dp->d_type. 4102 * The high MBZ byte of dp->d_namlen is our dp->d_namlen. 4103 */ 4104 dp->d_type = dp->d_namlen; 4105 dp->d_namlen = 0; 4106 #else 4107 /* 4108 * The dp->d_type is the high byte of the expected dp->d_namlen, 4109 * so must be zero'ed. 4110 */ 4111 dp->d_type = 0; 4112 #endif 4113 } 4114 4115 /* 4116 * Read a block of directory entries in a filesystem independent format. 4117 */ 4118 #ifndef _SYS_SYSPROTO_H_ 4119 struct ogetdirentries_args { 4120 int fd; 4121 char *buf; 4122 u_int count; 4123 long *basep; 4124 }; 4125 #endif 4126 int 4127 ogetdirentries(struct thread *td, struct ogetdirentries_args *uap) 4128 { 4129 long loff; 4130 int error; 4131 4132 error = kern_ogetdirentries(td, uap, &loff); 4133 if (error == 0) 4134 error = copyout(&loff, uap->basep, sizeof(long)); 4135 return (error); 4136 } 4137 4138 int 4139 kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap, 4140 long *ploff) 4141 { 4142 long base; 4143 int error; 4144 4145 /* XXX arbitrary sanity limit on `count'. */ 4146 if (uap->count > 64 * 1024) 4147 return (EINVAL); 4148 4149 error = freebsd11_kern_getdirentries(td, uap->fd, uap->buf, uap->count, 4150 &base, ogetdirentries_cvt); 4151 4152 if (error == 0 && uap->basep != NULL) 4153 error = copyout(&base, uap->basep, sizeof(long)); 4154 4155 return (error); 4156 } 4157 #endif /* COMPAT_43 */ 4158 4159 #if defined(COMPAT_FREEBSD11) 4160 #ifndef _SYS_SYSPROTO_H_ 4161 struct freebsd11_getdirentries_args { 4162 int fd; 4163 char *buf; 4164 u_int count; 4165 long *basep; 4166 }; 4167 #endif 4168 int 4169 freebsd11_getdirentries(struct thread *td, 4170 struct freebsd11_getdirentries_args *uap) 4171 { 4172 long base; 4173 int error; 4174 4175 error = freebsd11_kern_getdirentries(td, uap->fd, uap->buf, uap->count, 4176 &base, NULL); 4177 4178 if (error == 0 && uap->basep != NULL) 4179 error = copyout(&base, uap->basep, sizeof(long)); 4180 return (error); 4181 } 4182 4183 int 4184 freebsd11_getdents(struct thread *td, struct freebsd11_getdents_args *uap) 4185 { 4186 struct freebsd11_getdirentries_args ap; 4187 4188 ap.fd = uap->fd; 4189 ap.buf = uap->buf; 4190 ap.count = uap->count; 4191 ap.basep = NULL; 4192 return (freebsd11_getdirentries(td, &ap)); 4193 } 4194 #endif /* COMPAT_FREEBSD11 */ 4195 4196 /* 4197 * Read a block of directory entries in a filesystem independent format. 4198 */ 4199 int 4200 sys_getdirentries(struct thread *td, struct getdirentries_args *uap) 4201 { 4202 off_t base; 4203 int error; 4204 4205 error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base, 4206 NULL, UIO_USERSPACE); 4207 if (error != 0) 4208 return (error); 4209 if (uap->basep != NULL) 4210 error = copyout(&base, uap->basep, sizeof(off_t)); 4211 return (error); 4212 } 4213 4214 int 4215 kern_getdirentries(struct thread *td, int fd, char *buf, size_t count, 4216 off_t *basep, ssize_t *residp, enum uio_seg bufseg) 4217 { 4218 struct vnode *vp; 4219 struct file *fp; 4220 struct uio auio; 4221 struct iovec aiov; 4222 off_t loff; 4223 int error, eofflag; 4224 off_t foffset; 4225 4226 AUDIT_ARG_FD(fd); 4227 if (count > IOSIZE_MAX) 4228 return (EINVAL); 4229 auio.uio_resid = count; 4230 error = getvnode(td, fd, &cap_read_rights, &fp); 4231 if (error != 0) 4232 return (error); 4233 if ((fp->f_flag & FREAD) == 0) { 4234 fdrop(fp, td); 4235 return (EBADF); 4236 } 4237 vp = fp->f_vnode; 4238 foffset = foffset_lock(fp, 0); 4239 unionread: 4240 if (vp->v_type != VDIR) { 4241 error = EINVAL; 4242 goto fail; 4243 } 4244 if (__predict_false((vp->v_vflag & VV_UNLINKED) != 0)) { 4245 error = ENOENT; 4246 goto fail; 4247 } 4248 aiov.iov_base = buf; 4249 aiov.iov_len = count; 4250 auio.uio_iov = &aiov; 4251 auio.uio_iovcnt = 1; 4252 auio.uio_rw = UIO_READ; 4253 auio.uio_segflg = bufseg; 4254 auio.uio_td = td; 4255 vn_lock(vp, LK_SHARED | LK_RETRY); 4256 AUDIT_ARG_VNODE1(vp); 4257 loff = auio.uio_offset = foffset; 4258 #ifdef MAC 4259 error = mac_vnode_check_readdir(td->td_ucred, vp); 4260 if (error == 0) 4261 #endif 4262 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, 4263 NULL); 4264 foffset = auio.uio_offset; 4265 if (error != 0) { 4266 VOP_UNLOCK(vp); 4267 goto fail; 4268 } 4269 if (count == auio.uio_resid && 4270 (vp->v_vflag & VV_ROOT) && 4271 (vp->v_mount->mnt_flag & MNT_UNION)) { 4272 struct vnode *tvp = vp; 4273 4274 vp = vp->v_mount->mnt_vnodecovered; 4275 VREF(vp); 4276 fp->f_vnode = vp; 4277 foffset = 0; 4278 vput(tvp); 4279 goto unionread; 4280 } 4281 VOP_UNLOCK(vp); 4282 *basep = loff; 4283 if (residp != NULL) 4284 *residp = auio.uio_resid; 4285 td->td_retval[0] = count - auio.uio_resid; 4286 fail: 4287 foffset_unlock(fp, foffset, 0); 4288 fdrop(fp, td); 4289 return (error); 4290 } 4291 4292 /* 4293 * Set the mode mask for creation of filesystem nodes. 4294 */ 4295 #ifndef _SYS_SYSPROTO_H_ 4296 struct umask_args { 4297 int newmask; 4298 }; 4299 #endif 4300 int 4301 sys_umask(struct thread *td, struct umask_args *uap) 4302 { 4303 struct pwddesc *pdp; 4304 4305 pdp = td->td_proc->p_pd; 4306 PWDDESC_XLOCK(pdp); 4307 td->td_retval[0] = pdp->pd_cmask; 4308 pdp->pd_cmask = uap->newmask & ALLPERMS; 4309 PWDDESC_XUNLOCK(pdp); 4310 return (0); 4311 } 4312 4313 /* 4314 * Void all references to file by ripping underlying filesystem away from 4315 * vnode. 4316 */ 4317 #ifndef _SYS_SYSPROTO_H_ 4318 struct revoke_args { 4319 char *path; 4320 }; 4321 #endif 4322 int 4323 sys_revoke(struct thread *td, struct revoke_args *uap) 4324 { 4325 struct vnode *vp; 4326 struct vattr vattr; 4327 struct nameidata nd; 4328 int error; 4329 4330 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4331 uap->path); 4332 if ((error = namei(&nd)) != 0) 4333 return (error); 4334 vp = nd.ni_vp; 4335 NDFREE_PNBUF(&nd); 4336 if (vp->v_type != VCHR || vp->v_rdev == NULL) { 4337 error = EINVAL; 4338 goto out; 4339 } 4340 #ifdef MAC 4341 error = mac_vnode_check_revoke(td->td_ucred, vp); 4342 if (error != 0) 4343 goto out; 4344 #endif 4345 error = VOP_GETATTR(vp, &vattr, td->td_ucred); 4346 if (error != 0) 4347 goto out; 4348 if (td->td_ucred->cr_uid != vattr.va_uid) { 4349 error = priv_check(td, PRIV_VFS_ADMIN); 4350 if (error != 0) 4351 goto out; 4352 } 4353 if (devfs_usecount(vp) > 0) 4354 VOP_REVOKE(vp, REVOKEALL); 4355 out: 4356 vput(vp); 4357 return (error); 4358 } 4359 4360 /* 4361 * This variant of getvnode() allows O_PATH files. Caller should 4362 * ensure that returned file and vnode are only used for compatible 4363 * semantics. 4364 */ 4365 int 4366 getvnode_path(struct thread *td, int fd, cap_rights_t *rightsp, 4367 struct file **fpp) 4368 { 4369 struct file *fp; 4370 int error; 4371 4372 error = fget_unlocked(td, fd, rightsp, &fp); 4373 if (error != 0) 4374 return (error); 4375 4376 /* 4377 * The file could be not of the vnode type, or it may be not 4378 * yet fully initialized, in which case the f_vnode pointer 4379 * may be set, but f_ops is still badfileops. E.g., 4380 * devfs_open() transiently create such situation to 4381 * facilitate csw d_fdopen(). 4382 * 4383 * Dupfdopen() handling in kern_openat() installs the 4384 * half-baked file into the process descriptor table, allowing 4385 * other thread to dereference it. Guard against the race by 4386 * checking f_ops. 4387 */ 4388 if (__predict_false(fp->f_vnode == NULL || fp->f_ops == &badfileops)) { 4389 fdrop(fp, td); 4390 *fpp = NULL; 4391 return (EINVAL); 4392 } 4393 4394 *fpp = fp; 4395 return (0); 4396 } 4397 4398 /* 4399 * Convert a user file descriptor to a kernel file entry and check 4400 * that, if it is a capability, the correct rights are present. 4401 * A reference on the file entry is held upon returning. 4402 */ 4403 int 4404 getvnode(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp) 4405 { 4406 int error; 4407 4408 error = getvnode_path(td, fd, rightsp, fpp); 4409 if (__predict_false(error != 0)) 4410 return (error); 4411 4412 /* 4413 * Filter out O_PATH file descriptors, most getvnode() callers 4414 * do not call fo_ methods. 4415 */ 4416 if (__predict_false((*fpp)->f_ops == &path_fileops)) { 4417 fdrop(*fpp, td); 4418 *fpp = NULL; 4419 error = EBADF; 4420 } 4421 4422 return (error); 4423 } 4424 4425 /* 4426 * Get an (NFS) file handle. 4427 */ 4428 #ifndef _SYS_SYSPROTO_H_ 4429 struct lgetfh_args { 4430 char *fname; 4431 fhandle_t *fhp; 4432 }; 4433 #endif 4434 int 4435 sys_lgetfh(struct thread *td, struct lgetfh_args *uap) 4436 { 4437 4438 return (kern_getfhat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->fname, 4439 UIO_USERSPACE, uap->fhp, UIO_USERSPACE)); 4440 } 4441 4442 #ifndef _SYS_SYSPROTO_H_ 4443 struct getfh_args { 4444 char *fname; 4445 fhandle_t *fhp; 4446 }; 4447 #endif 4448 int 4449 sys_getfh(struct thread *td, struct getfh_args *uap) 4450 { 4451 4452 return (kern_getfhat(td, 0, AT_FDCWD, uap->fname, UIO_USERSPACE, 4453 uap->fhp, UIO_USERSPACE)); 4454 } 4455 4456 /* 4457 * syscall for the rpc.lockd to use to translate an open descriptor into 4458 * a NFS file handle. 4459 * 4460 * warning: do not remove the priv_check() call or this becomes one giant 4461 * security hole. 4462 */ 4463 #ifndef _SYS_SYSPROTO_H_ 4464 struct getfhat_args { 4465 int fd; 4466 char *path; 4467 fhandle_t *fhp; 4468 int flags; 4469 }; 4470 #endif 4471 int 4472 sys_getfhat(struct thread *td, struct getfhat_args *uap) 4473 { 4474 4475 return (kern_getfhat(td, uap->flags, uap->fd, uap->path, UIO_USERSPACE, 4476 uap->fhp, UIO_USERSPACE)); 4477 } 4478 4479 int 4480 kern_getfhat(struct thread *td, int flags, int fd, const char *path, 4481 enum uio_seg pathseg, fhandle_t *fhp, enum uio_seg fhseg) 4482 { 4483 struct nameidata nd; 4484 fhandle_t fh; 4485 struct vnode *vp; 4486 int error; 4487 4488 if ((flags & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH)) != 0) 4489 return (EINVAL); 4490 error = priv_check(td, PRIV_VFS_GETFH); 4491 if (error != 0) 4492 return (error); 4493 NDINIT_AT(&nd, LOOKUP, at2cnpflags(flags, AT_SYMLINK_NOFOLLOW | 4494 AT_RESOLVE_BENEATH) | LOCKLEAF | AUDITVNODE1, pathseg, path, 4495 fd); 4496 error = namei(&nd); 4497 if (error != 0) 4498 return (error); 4499 NDFREE_PNBUF(&nd); 4500 vp = nd.ni_vp; 4501 bzero(&fh, sizeof(fh)); 4502 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4503 error = VOP_VPTOFH(vp, &fh.fh_fid); 4504 vput(vp); 4505 if (error == 0) { 4506 if (fhseg == UIO_USERSPACE) 4507 error = copyout(&fh, fhp, sizeof (fh)); 4508 else 4509 memcpy(fhp, &fh, sizeof(fh)); 4510 } 4511 return (error); 4512 } 4513 4514 #ifndef _SYS_SYSPROTO_H_ 4515 struct fhlink_args { 4516 fhandle_t *fhp; 4517 const char *to; 4518 }; 4519 #endif 4520 int 4521 sys_fhlink(struct thread *td, struct fhlink_args *uap) 4522 { 4523 4524 return (kern_fhlinkat(td, AT_FDCWD, uap->to, UIO_USERSPACE, uap->fhp)); 4525 } 4526 4527 #ifndef _SYS_SYSPROTO_H_ 4528 struct fhlinkat_args { 4529 fhandle_t *fhp; 4530 int tofd; 4531 const char *to; 4532 }; 4533 #endif 4534 int 4535 sys_fhlinkat(struct thread *td, struct fhlinkat_args *uap) 4536 { 4537 4538 return (kern_fhlinkat(td, uap->tofd, uap->to, UIO_USERSPACE, uap->fhp)); 4539 } 4540 4541 static int 4542 kern_fhlinkat(struct thread *td, int fd, const char *path, 4543 enum uio_seg pathseg, fhandle_t *fhp) 4544 { 4545 fhandle_t fh; 4546 struct mount *mp; 4547 struct vnode *vp; 4548 int error; 4549 4550 error = priv_check(td, PRIV_VFS_GETFH); 4551 if (error != 0) 4552 return (error); 4553 error = copyin(fhp, &fh, sizeof(fh)); 4554 if (error != 0) 4555 return (error); 4556 do { 4557 bwillwrite(); 4558 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4559 return (ESTALE); 4560 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_SHARED, &vp); 4561 vfs_unbusy(mp); 4562 if (error != 0) 4563 return (error); 4564 VOP_UNLOCK(vp); 4565 error = kern_linkat_vp(td, vp, fd, path, pathseg); 4566 } while (error == EAGAIN || error == ERELOOKUP); 4567 return (error); 4568 } 4569 4570 #ifndef _SYS_SYSPROTO_H_ 4571 struct fhreadlink_args { 4572 fhandle_t *fhp; 4573 char *buf; 4574 size_t bufsize; 4575 }; 4576 #endif 4577 int 4578 sys_fhreadlink(struct thread *td, struct fhreadlink_args *uap) 4579 { 4580 fhandle_t fh; 4581 struct mount *mp; 4582 struct vnode *vp; 4583 int error; 4584 4585 error = priv_check(td, PRIV_VFS_GETFH); 4586 if (error != 0) 4587 return (error); 4588 if (uap->bufsize > IOSIZE_MAX) 4589 return (EINVAL); 4590 error = copyin(uap->fhp, &fh, sizeof(fh)); 4591 if (error != 0) 4592 return (error); 4593 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4594 return (ESTALE); 4595 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_SHARED, &vp); 4596 vfs_unbusy(mp); 4597 if (error != 0) 4598 return (error); 4599 error = kern_readlink_vp(vp, uap->buf, UIO_USERSPACE, uap->bufsize, td); 4600 vput(vp); 4601 return (error); 4602 } 4603 4604 /* 4605 * syscall for the rpc.lockd to use to translate a NFS file handle into an 4606 * open descriptor. 4607 * 4608 * warning: do not remove the priv_check() call or this becomes one giant 4609 * security hole. 4610 */ 4611 #ifndef _SYS_SYSPROTO_H_ 4612 struct fhopen_args { 4613 const struct fhandle *u_fhp; 4614 int flags; 4615 }; 4616 #endif 4617 int 4618 sys_fhopen(struct thread *td, struct fhopen_args *uap) 4619 { 4620 return (kern_fhopen(td, uap->u_fhp, uap->flags)); 4621 } 4622 4623 int 4624 kern_fhopen(struct thread *td, const struct fhandle *u_fhp, int flags) 4625 { 4626 struct mount *mp; 4627 struct vnode *vp; 4628 struct fhandle fhp; 4629 struct file *fp; 4630 int fmode, error; 4631 int indx; 4632 4633 error = priv_check(td, PRIV_VFS_FHOPEN); 4634 if (error != 0) 4635 return (error); 4636 indx = -1; 4637 fmode = FFLAGS(flags); 4638 /* why not allow a non-read/write open for our lockd? */ 4639 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 4640 return (EINVAL); 4641 error = copyin(u_fhp, &fhp, sizeof(fhp)); 4642 if (error != 0) 4643 return(error); 4644 /* find the mount point */ 4645 mp = vfs_busyfs(&fhp.fh_fsid); 4646 if (mp == NULL) 4647 return (ESTALE); 4648 /* now give me my vnode, it gets returned to me locked */ 4649 error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp); 4650 vfs_unbusy(mp); 4651 if (error != 0) 4652 return (error); 4653 4654 error = falloc_noinstall(td, &fp); 4655 if (error != 0) { 4656 vput(vp); 4657 return (error); 4658 } 4659 /* 4660 * An extra reference on `fp' has been held for us by 4661 * falloc_noinstall(). 4662 */ 4663 4664 #ifdef INVARIANTS 4665 td->td_dupfd = -1; 4666 #endif 4667 error = vn_open_vnode(vp, fmode, td->td_ucred, td, fp); 4668 if (error != 0) { 4669 KASSERT(fp->f_ops == &badfileops, 4670 ("VOP_OPEN in fhopen() set f_ops")); 4671 KASSERT(td->td_dupfd < 0, 4672 ("fhopen() encountered fdopen()")); 4673 4674 vput(vp); 4675 goto bad; 4676 } 4677 #ifdef INVARIANTS 4678 td->td_dupfd = 0; 4679 #endif 4680 fp->f_vnode = vp; 4681 finit_vnode(fp, fmode, NULL, &vnops); 4682 VOP_UNLOCK(vp); 4683 if ((fmode & O_TRUNC) != 0) { 4684 error = fo_truncate(fp, 0, td->td_ucred, td); 4685 if (error != 0) 4686 goto bad; 4687 } 4688 4689 error = finstall(td, fp, &indx, fmode, NULL); 4690 bad: 4691 fdrop(fp, td); 4692 td->td_retval[0] = indx; 4693 return (error); 4694 } 4695 4696 /* 4697 * Stat an (NFS) file handle. 4698 */ 4699 #ifndef _SYS_SYSPROTO_H_ 4700 struct fhstat_args { 4701 struct fhandle *u_fhp; 4702 struct stat *sb; 4703 }; 4704 #endif 4705 int 4706 sys_fhstat(struct thread *td, struct fhstat_args *uap) 4707 { 4708 struct stat sb; 4709 struct fhandle fh; 4710 int error; 4711 4712 error = copyin(uap->u_fhp, &fh, sizeof(fh)); 4713 if (error != 0) 4714 return (error); 4715 error = kern_fhstat(td, fh, &sb); 4716 if (error == 0) 4717 error = copyout(&sb, uap->sb, sizeof(sb)); 4718 return (error); 4719 } 4720 4721 int 4722 kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb) 4723 { 4724 struct mount *mp; 4725 struct vnode *vp; 4726 int error; 4727 4728 error = priv_check(td, PRIV_VFS_FHSTAT); 4729 if (error != 0) 4730 return (error); 4731 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4732 return (ESTALE); 4733 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4734 vfs_unbusy(mp); 4735 if (error != 0) 4736 return (error); 4737 error = VOP_STAT(vp, sb, td->td_ucred, NOCRED); 4738 vput(vp); 4739 return (error); 4740 } 4741 4742 /* 4743 * Implement fstatfs() for (NFS) file handles. 4744 */ 4745 #ifndef _SYS_SYSPROTO_H_ 4746 struct fhstatfs_args { 4747 struct fhandle *u_fhp; 4748 struct statfs *buf; 4749 }; 4750 #endif 4751 int 4752 sys_fhstatfs(struct thread *td, struct fhstatfs_args *uap) 4753 { 4754 struct statfs *sfp; 4755 fhandle_t fh; 4756 int error; 4757 4758 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4759 if (error != 0) 4760 return (error); 4761 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 4762 error = kern_fhstatfs(td, fh, sfp); 4763 if (error == 0) 4764 error = copyout(sfp, uap->buf, sizeof(*sfp)); 4765 free(sfp, M_STATFS); 4766 return (error); 4767 } 4768 4769 int 4770 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf) 4771 { 4772 struct mount *mp; 4773 struct vnode *vp; 4774 int error; 4775 4776 error = priv_check(td, PRIV_VFS_FHSTATFS); 4777 if (error != 0) 4778 return (error); 4779 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4780 return (ESTALE); 4781 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4782 if (error != 0) { 4783 vfs_unbusy(mp); 4784 return (error); 4785 } 4786 vput(vp); 4787 error = prison_canseemount(td->td_ucred, mp); 4788 if (error != 0) 4789 goto out; 4790 #ifdef MAC 4791 error = mac_mount_check_stat(td->td_ucred, mp); 4792 if (error != 0) 4793 goto out; 4794 #endif 4795 error = VFS_STATFS(mp, buf); 4796 out: 4797 vfs_unbusy(mp); 4798 return (error); 4799 } 4800 4801 /* 4802 * Unlike madvise(2), we do not make a best effort to remember every 4803 * possible caching hint. Instead, we remember the last setting with 4804 * the exception that we will allow POSIX_FADV_NORMAL to adjust the 4805 * region of any current setting. 4806 */ 4807 int 4808 kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len, 4809 int advice) 4810 { 4811 struct fadvise_info *fa, *new; 4812 struct file *fp; 4813 struct vnode *vp; 4814 off_t end; 4815 int error; 4816 4817 if (offset < 0 || len < 0 || offset > OFF_MAX - len) 4818 return (EINVAL); 4819 AUDIT_ARG_VALUE(advice); 4820 switch (advice) { 4821 case POSIX_FADV_SEQUENTIAL: 4822 case POSIX_FADV_RANDOM: 4823 case POSIX_FADV_NOREUSE: 4824 new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK); 4825 break; 4826 case POSIX_FADV_NORMAL: 4827 case POSIX_FADV_WILLNEED: 4828 case POSIX_FADV_DONTNEED: 4829 new = NULL; 4830 break; 4831 default: 4832 return (EINVAL); 4833 } 4834 /* XXX: CAP_POSIX_FADVISE? */ 4835 AUDIT_ARG_FD(fd); 4836 error = fget(td, fd, &cap_no_rights, &fp); 4837 if (error != 0) 4838 goto out; 4839 AUDIT_ARG_FILE(td->td_proc, fp); 4840 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { 4841 error = ESPIPE; 4842 goto out; 4843 } 4844 if (fp->f_type != DTYPE_VNODE) { 4845 error = ENODEV; 4846 goto out; 4847 } 4848 vp = fp->f_vnode; 4849 if (vp->v_type != VREG) { 4850 error = ENODEV; 4851 goto out; 4852 } 4853 if (len == 0) 4854 end = OFF_MAX; 4855 else 4856 end = offset + len - 1; 4857 switch (advice) { 4858 case POSIX_FADV_SEQUENTIAL: 4859 case POSIX_FADV_RANDOM: 4860 case POSIX_FADV_NOREUSE: 4861 /* 4862 * Try to merge any existing non-standard region with 4863 * this new region if possible, otherwise create a new 4864 * non-standard region for this request. 4865 */ 4866 mtx_pool_lock(mtxpool_sleep, fp); 4867 fa = fp->f_advice; 4868 if (fa != NULL && fa->fa_advice == advice && 4869 ((fa->fa_start <= end && fa->fa_end >= offset) || 4870 (end != OFF_MAX && fa->fa_start == end + 1) || 4871 (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) { 4872 if (offset < fa->fa_start) 4873 fa->fa_start = offset; 4874 if (end > fa->fa_end) 4875 fa->fa_end = end; 4876 } else { 4877 new->fa_advice = advice; 4878 new->fa_start = offset; 4879 new->fa_end = end; 4880 fp->f_advice = new; 4881 new = fa; 4882 } 4883 mtx_pool_unlock(mtxpool_sleep, fp); 4884 break; 4885 case POSIX_FADV_NORMAL: 4886 /* 4887 * If a the "normal" region overlaps with an existing 4888 * non-standard region, trim or remove the 4889 * non-standard region. 4890 */ 4891 mtx_pool_lock(mtxpool_sleep, fp); 4892 fa = fp->f_advice; 4893 if (fa != NULL) { 4894 if (offset <= fa->fa_start && end >= fa->fa_end) { 4895 new = fa; 4896 fp->f_advice = NULL; 4897 } else if (offset <= fa->fa_start && 4898 end >= fa->fa_start) 4899 fa->fa_start = end + 1; 4900 else if (offset <= fa->fa_end && end >= fa->fa_end) 4901 fa->fa_end = offset - 1; 4902 else if (offset >= fa->fa_start && end <= fa->fa_end) { 4903 /* 4904 * If the "normal" region is a middle 4905 * portion of the existing 4906 * non-standard region, just remove 4907 * the whole thing rather than picking 4908 * one side or the other to 4909 * preserve. 4910 */ 4911 new = fa; 4912 fp->f_advice = NULL; 4913 } 4914 } 4915 mtx_pool_unlock(mtxpool_sleep, fp); 4916 break; 4917 case POSIX_FADV_WILLNEED: 4918 case POSIX_FADV_DONTNEED: 4919 error = VOP_ADVISE(vp, offset, end, advice); 4920 break; 4921 } 4922 out: 4923 if (fp != NULL) 4924 fdrop(fp, td); 4925 free(new, M_FADVISE); 4926 return (error); 4927 } 4928 4929 int 4930 sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap) 4931 { 4932 int error; 4933 4934 error = kern_posix_fadvise(td, uap->fd, uap->offset, uap->len, 4935 uap->advice); 4936 return (kern_posix_error(td, error)); 4937 } 4938 4939 int 4940 kern_copy_file_range(struct thread *td, int infd, off_t *inoffp, int outfd, 4941 off_t *outoffp, size_t len, unsigned int flags) 4942 { 4943 struct file *infp, *outfp; 4944 struct vnode *invp, *outvp; 4945 int error; 4946 size_t retlen; 4947 void *rl_rcookie, *rl_wcookie; 4948 off_t savinoff, savoutoff; 4949 4950 infp = outfp = NULL; 4951 rl_rcookie = rl_wcookie = NULL; 4952 savinoff = -1; 4953 error = 0; 4954 retlen = 0; 4955 4956 if (flags != 0) { 4957 error = EINVAL; 4958 goto out; 4959 } 4960 if (len > SSIZE_MAX) 4961 /* 4962 * Although the len argument is size_t, the return argument 4963 * is ssize_t (which is signed). Therefore a size that won't 4964 * fit in ssize_t can't be returned. 4965 */ 4966 len = SSIZE_MAX; 4967 4968 /* Get the file structures for the file descriptors. */ 4969 error = fget_read(td, infd, 4970 inoffp != NULL ? &cap_pread_rights : &cap_read_rights, &infp); 4971 if (error != 0) 4972 goto out; 4973 if (infp->f_ops == &badfileops) { 4974 error = EBADF; 4975 goto out; 4976 } 4977 if (infp->f_vnode == NULL) { 4978 error = EINVAL; 4979 goto out; 4980 } 4981 error = fget_write(td, outfd, 4982 outoffp != NULL ? &cap_pwrite_rights : &cap_write_rights, &outfp); 4983 if (error != 0) 4984 goto out; 4985 if (outfp->f_ops == &badfileops) { 4986 error = EBADF; 4987 goto out; 4988 } 4989 if (outfp->f_vnode == NULL) { 4990 error = EINVAL; 4991 goto out; 4992 } 4993 4994 /* Set the offset pointers to the correct place. */ 4995 if (inoffp == NULL) 4996 inoffp = &infp->f_offset; 4997 if (outoffp == NULL) 4998 outoffp = &outfp->f_offset; 4999 savinoff = *inoffp; 5000 savoutoff = *outoffp; 5001 5002 invp = infp->f_vnode; 5003 outvp = outfp->f_vnode; 5004 /* Sanity check the f_flag bits. */ 5005 if ((outfp->f_flag & (FWRITE | FAPPEND)) != FWRITE || 5006 (infp->f_flag & FREAD) == 0) { 5007 error = EBADF; 5008 goto out; 5009 } 5010 5011 /* If len == 0, just return 0. */ 5012 if (len == 0) 5013 goto out; 5014 5015 /* 5016 * If infp and outfp refer to the same file, the byte ranges cannot 5017 * overlap. 5018 */ 5019 if (invp == outvp) { 5020 if ((savinoff <= savoutoff && savinoff + len > savoutoff) || 5021 (savinoff > savoutoff && savoutoff + len > savinoff)) { 5022 error = EINVAL; 5023 goto out; 5024 } 5025 rangelock_may_recurse(&invp->v_rl); 5026 } 5027 5028 /* Range lock the byte ranges for both invp and outvp. */ 5029 for (;;) { 5030 rl_wcookie = vn_rangelock_wlock(outvp, *outoffp, *outoffp + 5031 len); 5032 rl_rcookie = vn_rangelock_tryrlock(invp, *inoffp, *inoffp + 5033 len); 5034 if (rl_rcookie != NULL) 5035 break; 5036 vn_rangelock_unlock(outvp, rl_wcookie); 5037 rl_rcookie = vn_rangelock_rlock(invp, *inoffp, *inoffp + len); 5038 vn_rangelock_unlock(invp, rl_rcookie); 5039 } 5040 5041 retlen = len; 5042 error = vn_copy_file_range(invp, inoffp, outvp, outoffp, &retlen, 5043 flags, infp->f_cred, outfp->f_cred, td); 5044 out: 5045 if (rl_rcookie != NULL) 5046 vn_rangelock_unlock(invp, rl_rcookie); 5047 if (rl_wcookie != NULL) 5048 vn_rangelock_unlock(outvp, rl_wcookie); 5049 if (savinoff != -1 && (error == EINTR || error == ERESTART)) { 5050 *inoffp = savinoff; 5051 *outoffp = savoutoff; 5052 } 5053 if (outfp != NULL) 5054 fdrop(outfp, td); 5055 if (infp != NULL) 5056 fdrop(infp, td); 5057 td->td_retval[0] = retlen; 5058 return (error); 5059 } 5060 5061 int 5062 sys_copy_file_range(struct thread *td, struct copy_file_range_args *uap) 5063 { 5064 off_t inoff, outoff, *inoffp, *outoffp; 5065 int error; 5066 5067 inoffp = outoffp = NULL; 5068 if (uap->inoffp != NULL) { 5069 error = copyin(uap->inoffp, &inoff, sizeof(off_t)); 5070 if (error != 0) 5071 return (error); 5072 inoffp = &inoff; 5073 } 5074 if (uap->outoffp != NULL) { 5075 error = copyin(uap->outoffp, &outoff, sizeof(off_t)); 5076 if (error != 0) 5077 return (error); 5078 outoffp = &outoff; 5079 } 5080 error = kern_copy_file_range(td, uap->infd, inoffp, uap->outfd, 5081 outoffp, uap->len, uap->flags); 5082 if (error == 0 && uap->inoffp != NULL) 5083 error = copyout(inoffp, uap->inoffp, sizeof(off_t)); 5084 if (error == 0 && uap->outoffp != NULL) 5085 error = copyout(outoffp, uap->outoffp, sizeof(off_t)); 5086 return (error); 5087 } 5088