1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37 #include "opt_capsicum.h" 38 #include "opt_ktrace.h" 39 40 #define EXTERR_CATEGORY EXTERR_CAT_VFSSYSCALL 41 #include <sys/systm.h> 42 #ifdef COMPAT_FREEBSD11 43 #include <sys/abi_compat.h> 44 #endif 45 #include <sys/bio.h> 46 #include <sys/buf.h> 47 #include <sys/capsicum.h> 48 #include <sys/disk.h> 49 #include <sys/dirent.h> 50 #include <sys/exterrvar.h> 51 #include <sys/fcntl.h> 52 #include <sys/file.h> 53 #include <sys/filedesc.h> 54 #include <sys/filio.h> 55 #include <sys/jail.h> 56 #include <sys/kernel.h> 57 #ifdef KTRACE 58 #include <sys/ktrace.h> 59 #endif 60 #include <sys/limits.h> 61 #include <sys/linker.h> 62 #include <sys/malloc.h> 63 #include <sys/mount.h> 64 #include <sys/mutex.h> 65 #include <sys/namei.h> 66 #include <sys/priv.h> 67 #include <sys/proc.h> 68 #include <sys/rwlock.h> 69 #include <sys/sdt.h> 70 #include <sys/stat.h> 71 #include <sys/stdarg.h> 72 #include <sys/sx.h> 73 #include <sys/syscallsubr.h> 74 #include <sys/sysctl.h> 75 #include <sys/sysproto.h> 76 #include <sys/unistd.h> 77 #include <sys/vnode.h> 78 79 #include <security/audit/audit.h> 80 #include <security/mac/mac_framework.h> 81 82 #include <vm/vm.h> 83 #include <vm/vm_object.h> 84 #include <vm/vm_page.h> 85 #include <vm/vnode_pager.h> 86 #include <vm/uma.h> 87 88 #include <fs/devfs/devfs.h> 89 90 MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information"); 91 92 static int kern_chflagsat(struct thread *td, int fd, const char *path, 93 enum uio_seg pathseg, u_long flags, int atflag); 94 static int setfflags(struct thread *td, struct vnode *, u_long); 95 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *); 96 static int getutimens(const struct timespec *, enum uio_seg, 97 struct timespec *, int *); 98 static int setutimes(struct thread *td, struct vnode *, 99 const struct timespec *, int, int); 100 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 101 struct thread *td); 102 static int kern_fhlinkat(struct thread *td, int fd, const char *path, 103 enum uio_seg pathseg, fhandle_t *fhp); 104 static int kern_readlink_vp(struct vnode *vp, char *buf, enum uio_seg bufseg, 105 size_t count, struct thread *td); 106 static int kern_linkat_vp(struct thread *td, struct vnode *vp, int fd, 107 const char *path, enum uio_seg segflag); 108 109 uint64_t 110 at2cnpflags(u_int at_flags, u_int mask) 111 { 112 uint64_t res; 113 114 MPASS((at_flags & (AT_SYMLINK_FOLLOW | AT_SYMLINK_NOFOLLOW)) != 115 (AT_SYMLINK_FOLLOW | AT_SYMLINK_NOFOLLOW)); 116 117 res = 0; 118 at_flags &= mask; 119 if ((at_flags & AT_RESOLVE_BENEATH) != 0) 120 res |= RBENEATH; 121 if ((at_flags & AT_SYMLINK_FOLLOW) != 0) 122 res |= FOLLOW; 123 /* NOFOLLOW is pseudo flag */ 124 if ((mask & AT_SYMLINK_NOFOLLOW) != 0) { 125 res |= (at_flags & AT_SYMLINK_NOFOLLOW) != 0 ? NOFOLLOW : 126 FOLLOW; 127 } 128 if ((mask & AT_EMPTY_PATH) != 0 && (at_flags & AT_EMPTY_PATH) != 0) 129 res |= EMPTYPATH; 130 return (res); 131 } 132 133 int 134 kern_sync(struct thread *td) 135 { 136 struct mount *mp, *nmp; 137 int save; 138 139 mtx_lock(&mountlist_mtx); 140 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 141 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 142 nmp = TAILQ_NEXT(mp, mnt_list); 143 continue; 144 } 145 if ((mp->mnt_flag & MNT_RDONLY) == 0 && 146 vn_start_write(NULL, &mp, V_NOWAIT) == 0) { 147 save = curthread_pflags_set(TDP_SYNCIO); 148 vfs_periodic(mp, MNT_NOWAIT); 149 VFS_SYNC(mp, MNT_NOWAIT); 150 curthread_pflags_restore(save); 151 vn_finished_write(mp); 152 } 153 mtx_lock(&mountlist_mtx); 154 nmp = TAILQ_NEXT(mp, mnt_list); 155 vfs_unbusy(mp); 156 } 157 mtx_unlock(&mountlist_mtx); 158 return (0); 159 } 160 161 /* 162 * Sync each mounted filesystem. 163 */ 164 #ifndef _SYS_SYSPROTO_H_ 165 struct sync_args { 166 int dummy; 167 }; 168 #endif 169 /* ARGSUSED */ 170 int 171 sys_sync(struct thread *td, struct sync_args *uap) 172 { 173 174 return (kern_sync(td)); 175 } 176 177 /* 178 * Change filesystem quotas. 179 */ 180 #ifndef _SYS_SYSPROTO_H_ 181 struct quotactl_args { 182 char *path; 183 int cmd; 184 int uid; 185 caddr_t arg; 186 }; 187 #endif 188 int 189 sys_quotactl(struct thread *td, struct quotactl_args *uap) 190 { 191 struct mount *mp; 192 struct nameidata nd; 193 int error; 194 bool mp_busy; 195 196 AUDIT_ARG_CMD(uap->cmd); 197 AUDIT_ARG_UID(uap->uid); 198 if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS)) 199 return (EPERM); 200 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 201 uap->path); 202 if ((error = namei(&nd)) != 0) 203 return (error); 204 NDFREE_PNBUF(&nd); 205 mp = nd.ni_vp->v_mount; 206 vfs_ref(mp); 207 vput(nd.ni_vp); 208 error = vfs_busy(mp, 0); 209 if (error != 0) { 210 vfs_rel(mp); 211 return (error); 212 } 213 mp_busy = true; 214 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg, &mp_busy); 215 216 /* 217 * Since quota on/off operations typically need to open quota 218 * files, the implementation may need to unbusy the mount point 219 * before calling into namei. Otherwise, unmount might be 220 * started between two vfs_busy() invocations (first is ours, 221 * second is from mount point cross-walk code in lookup()), 222 * causing deadlock. 223 * 224 * Avoid unbusying mp if the implementation indicates it has 225 * already done so. 226 */ 227 if (mp_busy) 228 vfs_unbusy(mp); 229 vfs_rel(mp); 230 return (error); 231 } 232 233 /* 234 * Used by statfs conversion routines to scale the block size up if 235 * necessary so that all of the block counts are <= 'max_size'. Note 236 * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero 237 * value of 'n'. 238 */ 239 void 240 statfs_scale_blocks(struct statfs *sf, long max_size) 241 { 242 uint64_t count; 243 int shift; 244 245 KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__)); 246 247 /* 248 * Attempt to scale the block counts to give a more accurate 249 * overview to userland of the ratio of free space to used 250 * space. To do this, find the largest block count and compute 251 * a divisor that lets it fit into a signed integer <= max_size. 252 */ 253 if (sf->f_bavail < 0) 254 count = -sf->f_bavail; 255 else 256 count = sf->f_bavail; 257 count = MAX(sf->f_blocks, MAX(sf->f_bfree, count)); 258 if (count <= max_size) 259 return; 260 261 count >>= flsl(max_size); 262 shift = 0; 263 while (count > 0) { 264 shift++; 265 count >>=1; 266 } 267 268 sf->f_bsize <<= shift; 269 sf->f_blocks >>= shift; 270 sf->f_bfree >>= shift; 271 sf->f_bavail >>= shift; 272 } 273 274 static int 275 kern_do_statfs(struct thread *td, struct mount *mp, struct statfs *buf) 276 { 277 int error; 278 279 if (mp == NULL) 280 return (EBADF); 281 error = vfs_busy(mp, 0); 282 vfs_rel(mp); 283 if (error != 0) 284 return (error); 285 #ifdef MAC 286 error = mac_mount_check_stat(td->td_ucred, mp); 287 if (error != 0) 288 goto out; 289 #endif 290 error = VFS_STATFS(mp, buf); 291 if (error != 0) 292 goto out; 293 if (priv_check_cred_vfs_generation(td->td_ucred)) 294 prison_enforce_statfs(td->td_ucred, mp, buf); 295 out: 296 vfs_unbusy(mp); 297 return (error); 298 } 299 300 /* 301 * Get filesystem statistics. 302 */ 303 #ifndef _SYS_SYSPROTO_H_ 304 struct statfs_args { 305 char *path; 306 struct statfs *buf; 307 }; 308 #endif 309 int 310 sys_statfs(struct thread *td, struct statfs_args *uap) 311 { 312 struct statfs *sfp; 313 int error; 314 315 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 316 error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); 317 if (error == 0) 318 error = copyout(sfp, uap->buf, sizeof(struct statfs)); 319 free(sfp, M_STATFS); 320 return (error); 321 } 322 323 int 324 kern_statfs(struct thread *td, const char *path, enum uio_seg pathseg, 325 struct statfs *buf) 326 { 327 struct mount *mp; 328 struct nameidata nd; 329 int error; 330 331 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path); 332 error = namei(&nd); 333 if (error != 0) 334 return (error); 335 NDFREE_PNBUF(&nd); 336 mp = vfs_ref_from_vp(nd.ni_vp); 337 vrele(nd.ni_vp); 338 return (kern_do_statfs(td, mp, buf)); 339 } 340 341 /* 342 * Get filesystem statistics. 343 */ 344 #ifndef _SYS_SYSPROTO_H_ 345 struct fstatfs_args { 346 int fd; 347 struct statfs *buf; 348 }; 349 #endif 350 int 351 sys_fstatfs(struct thread *td, struct fstatfs_args *uap) 352 { 353 struct statfs *sfp; 354 int error; 355 356 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 357 error = kern_fstatfs(td, uap->fd, sfp); 358 if (error == 0) 359 error = copyout(sfp, uap->buf, sizeof(struct statfs)); 360 free(sfp, M_STATFS); 361 return (error); 362 } 363 364 int 365 kern_fstatfs(struct thread *td, int fd, struct statfs *buf) 366 { 367 struct file *fp; 368 struct mount *mp; 369 struct vnode *vp; 370 int error; 371 372 AUDIT_ARG_FD(fd); 373 error = getvnode_path(td, fd, &cap_fstatfs_rights, NULL, &fp); 374 if (error != 0) 375 return (error); 376 vp = fp->f_vnode; 377 #ifdef AUDIT 378 if (AUDITING_TD(td)) { 379 vn_lock(vp, LK_SHARED | LK_RETRY); 380 AUDIT_ARG_VNODE1(vp); 381 VOP_UNLOCK(vp); 382 } 383 #endif 384 mp = vfs_ref_from_vp(vp); 385 fdrop(fp, td); 386 return (kern_do_statfs(td, mp, buf)); 387 } 388 389 /* 390 * Get statistics on all filesystems. 391 */ 392 #ifndef _SYS_SYSPROTO_H_ 393 struct getfsstat_args { 394 struct statfs *buf; 395 long bufsize; 396 int mode; 397 }; 398 #endif 399 int 400 sys_getfsstat(struct thread *td, struct getfsstat_args *uap) 401 { 402 size_t count; 403 int error; 404 405 if (uap->bufsize < 0 || uap->bufsize > SIZE_MAX) 406 return (EINVAL); 407 error = kern_getfsstat(td, &uap->buf, uap->bufsize, &count, 408 UIO_USERSPACE, uap->mode); 409 if (error == 0) 410 td->td_retval[0] = count; 411 return (error); 412 } 413 414 /* 415 * If (bufsize > 0 && bufseg == UIO_SYSSPACE) 416 * The caller is responsible for freeing memory which will be allocated 417 * in '*buf'. 418 */ 419 int 420 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize, 421 size_t *countp, enum uio_seg bufseg, int mode) 422 { 423 struct mount *mp, *nmp; 424 struct statfs *sfsp, *sp, *sptmp, *tofree; 425 size_t count, maxcount; 426 int error; 427 428 switch (mode) { 429 case MNT_WAIT: 430 case MNT_NOWAIT: 431 break; 432 default: 433 if (bufseg == UIO_SYSSPACE) 434 *buf = NULL; 435 return (EINVAL); 436 } 437 restart: 438 maxcount = bufsize / sizeof(struct statfs); 439 if (bufsize == 0) { 440 sfsp = NULL; 441 tofree = NULL; 442 } else if (bufseg == UIO_USERSPACE) { 443 sfsp = *buf; 444 tofree = NULL; 445 } else /* if (bufseg == UIO_SYSSPACE) */ { 446 count = 0; 447 mtx_lock(&mountlist_mtx); 448 TAILQ_FOREACH(mp, &mountlist, mnt_list) { 449 count++; 450 } 451 mtx_unlock(&mountlist_mtx); 452 if (maxcount > count) 453 maxcount = count; 454 tofree = sfsp = *buf = malloc(maxcount * sizeof(struct statfs), 455 M_STATFS, M_WAITOK); 456 } 457 458 count = 0; 459 460 /* 461 * If there is no target buffer they only want the count. 462 * 463 * This could be TAILQ_FOREACH but it is open-coded to match the original 464 * code below. 465 */ 466 if (sfsp == NULL) { 467 mtx_lock(&mountlist_mtx); 468 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 469 if (prison_canseemount(td->td_ucred, mp) != 0) { 470 nmp = TAILQ_NEXT(mp, mnt_list); 471 continue; 472 } 473 #ifdef MAC 474 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 475 nmp = TAILQ_NEXT(mp, mnt_list); 476 continue; 477 } 478 #endif 479 count++; 480 nmp = TAILQ_NEXT(mp, mnt_list); 481 } 482 mtx_unlock(&mountlist_mtx); 483 *countp = count; 484 return (0); 485 } 486 487 /* 488 * They want the entire thing. 489 * 490 * Short-circuit the corner case of no room for anything, avoids 491 * relocking below. 492 */ 493 if (maxcount < 1) { 494 goto out; 495 } 496 497 mtx_lock(&mountlist_mtx); 498 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 499 if (prison_canseemount(td->td_ucred, mp) != 0) { 500 nmp = TAILQ_NEXT(mp, mnt_list); 501 continue; 502 } 503 #ifdef MAC 504 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 505 nmp = TAILQ_NEXT(mp, mnt_list); 506 continue; 507 } 508 #endif 509 if (mode == MNT_WAIT) { 510 if (vfs_busy(mp, MBF_MNTLSTLOCK) != 0) { 511 /* 512 * If vfs_busy() failed, and MBF_NOWAIT 513 * wasn't passed, then the mp is gone. 514 * Furthermore, because of MBF_MNTLSTLOCK, 515 * the mountlist_mtx was dropped. We have 516 * no other choice than to start over. 517 */ 518 mtx_unlock(&mountlist_mtx); 519 free(tofree, M_STATFS); 520 goto restart; 521 } 522 } else { 523 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK) != 0) { 524 nmp = TAILQ_NEXT(mp, mnt_list); 525 continue; 526 } 527 } 528 sp = &mp->mnt_stat; 529 /* 530 * If MNT_NOWAIT is specified, do not refresh 531 * the fsstat cache. 532 */ 533 if (mode != MNT_NOWAIT) { 534 error = VFS_STATFS(mp, sp); 535 if (error != 0) { 536 mtx_lock(&mountlist_mtx); 537 nmp = TAILQ_NEXT(mp, mnt_list); 538 vfs_unbusy(mp); 539 continue; 540 } 541 } 542 if (priv_check_cred_vfs_generation(td->td_ucred)) { 543 sptmp = malloc(sizeof(struct statfs), M_STATFS, 544 M_WAITOK); 545 *sptmp = *sp; 546 prison_enforce_statfs(td->td_ucred, mp, sptmp); 547 sp = sptmp; 548 } else 549 sptmp = NULL; 550 if (bufseg == UIO_SYSSPACE) { 551 bcopy(sp, sfsp, sizeof(*sp)); 552 free(sptmp, M_STATFS); 553 } else /* if (bufseg == UIO_USERSPACE) */ { 554 error = copyout(sp, sfsp, sizeof(*sp)); 555 free(sptmp, M_STATFS); 556 if (error != 0) { 557 vfs_unbusy(mp); 558 return (error); 559 } 560 } 561 sfsp++; 562 count++; 563 564 if (count == maxcount) { 565 vfs_unbusy(mp); 566 goto out; 567 } 568 569 mtx_lock(&mountlist_mtx); 570 nmp = TAILQ_NEXT(mp, mnt_list); 571 vfs_unbusy(mp); 572 } 573 mtx_unlock(&mountlist_mtx); 574 out: 575 *countp = count; 576 return (0); 577 } 578 579 #ifdef COMPAT_FREEBSD4 580 /* 581 * Get old format filesystem statistics. 582 */ 583 static void freebsd4_cvtstatfs(struct statfs *, struct ostatfs *); 584 585 #ifndef _SYS_SYSPROTO_H_ 586 struct freebsd4_statfs_args { 587 char *path; 588 struct ostatfs *buf; 589 }; 590 #endif 591 int 592 freebsd4_statfs(struct thread *td, struct freebsd4_statfs_args *uap) 593 { 594 struct ostatfs osb; 595 struct statfs *sfp; 596 int error; 597 598 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 599 error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); 600 if (error == 0) { 601 freebsd4_cvtstatfs(sfp, &osb); 602 error = copyout(&osb, uap->buf, sizeof(osb)); 603 } 604 free(sfp, M_STATFS); 605 return (error); 606 } 607 608 /* 609 * Get filesystem statistics. 610 */ 611 #ifndef _SYS_SYSPROTO_H_ 612 struct freebsd4_fstatfs_args { 613 int fd; 614 struct ostatfs *buf; 615 }; 616 #endif 617 int 618 freebsd4_fstatfs(struct thread *td, struct freebsd4_fstatfs_args *uap) 619 { 620 struct ostatfs osb; 621 struct statfs *sfp; 622 int error; 623 624 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 625 error = kern_fstatfs(td, uap->fd, sfp); 626 if (error == 0) { 627 freebsd4_cvtstatfs(sfp, &osb); 628 error = copyout(&osb, uap->buf, sizeof(osb)); 629 } 630 free(sfp, M_STATFS); 631 return (error); 632 } 633 634 /* 635 * Get statistics on all filesystems. 636 */ 637 #ifndef _SYS_SYSPROTO_H_ 638 struct freebsd4_getfsstat_args { 639 struct ostatfs *buf; 640 long bufsize; 641 int mode; 642 }; 643 #endif 644 int 645 freebsd4_getfsstat(struct thread *td, struct freebsd4_getfsstat_args *uap) 646 { 647 struct statfs *buf, *sp; 648 struct ostatfs osb; 649 size_t count, size; 650 int error; 651 652 if (uap->bufsize < 0) 653 return (EINVAL); 654 count = uap->bufsize / sizeof(struct ostatfs); 655 if (count > SIZE_MAX / sizeof(struct statfs)) 656 return (EINVAL); 657 size = count * sizeof(struct statfs); 658 error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, 659 uap->mode); 660 if (error == 0) 661 td->td_retval[0] = count; 662 if (size != 0) { 663 sp = buf; 664 while (count != 0 && error == 0) { 665 freebsd4_cvtstatfs(sp, &osb); 666 error = copyout(&osb, uap->buf, sizeof(osb)); 667 sp++; 668 uap->buf++; 669 count--; 670 } 671 free(buf, M_STATFS); 672 } 673 return (error); 674 } 675 676 /* 677 * Implement fstatfs() for (NFS) file handles. 678 */ 679 #ifndef _SYS_SYSPROTO_H_ 680 struct freebsd4_fhstatfs_args { 681 struct fhandle *u_fhp; 682 struct ostatfs *buf; 683 }; 684 #endif 685 int 686 freebsd4_fhstatfs(struct thread *td, struct freebsd4_fhstatfs_args *uap) 687 { 688 struct ostatfs osb; 689 struct statfs *sfp; 690 fhandle_t fh; 691 int error; 692 693 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 694 if (error != 0) 695 return (error); 696 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 697 error = kern_fhstatfs(td, fh, sfp); 698 if (error == 0) { 699 freebsd4_cvtstatfs(sfp, &osb); 700 error = copyout(&osb, uap->buf, sizeof(osb)); 701 } 702 free(sfp, M_STATFS); 703 return (error); 704 } 705 706 /* 707 * Convert a new format statfs structure to an old format statfs structure. 708 */ 709 static void 710 freebsd4_cvtstatfs(struct statfs *nsp, struct ostatfs *osp) 711 { 712 713 statfs_scale_blocks(nsp, LONG_MAX); 714 bzero(osp, sizeof(*osp)); 715 osp->f_bsize = nsp->f_bsize; 716 osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX); 717 osp->f_blocks = nsp->f_blocks; 718 osp->f_bfree = nsp->f_bfree; 719 osp->f_bavail = nsp->f_bavail; 720 osp->f_files = MIN(nsp->f_files, LONG_MAX); 721 osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX); 722 osp->f_owner = nsp->f_owner; 723 osp->f_type = nsp->f_type; 724 osp->f_flags = nsp->f_flags; 725 osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX); 726 osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX); 727 osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX); 728 osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX); 729 strlcpy(osp->f_fstypename, nsp->f_fstypename, 730 MIN(MFSNAMELEN, OMFSNAMELEN)); 731 strlcpy(osp->f_mntonname, nsp->f_mntonname, 732 MIN(MNAMELEN, OMNAMELEN)); 733 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 734 MIN(MNAMELEN, OMNAMELEN)); 735 osp->f_fsid = nsp->f_fsid; 736 } 737 #endif /* COMPAT_FREEBSD4 */ 738 739 #if defined(COMPAT_FREEBSD11) 740 /* 741 * Get old format filesystem statistics. 742 */ 743 static void freebsd11_cvtstatfs(struct statfs *, struct freebsd11_statfs *); 744 745 int 746 freebsd11_statfs(struct thread *td, struct freebsd11_statfs_args *uap) 747 { 748 struct freebsd11_statfs osb; 749 struct statfs *sfp; 750 int error; 751 752 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 753 error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); 754 if (error == 0) { 755 freebsd11_cvtstatfs(sfp, &osb); 756 error = copyout(&osb, uap->buf, sizeof(osb)); 757 } 758 free(sfp, M_STATFS); 759 return (error); 760 } 761 762 /* 763 * Get filesystem statistics. 764 */ 765 int 766 freebsd11_fstatfs(struct thread *td, struct freebsd11_fstatfs_args *uap) 767 { 768 struct freebsd11_statfs osb; 769 struct statfs *sfp; 770 int error; 771 772 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 773 error = kern_fstatfs(td, uap->fd, sfp); 774 if (error == 0) { 775 freebsd11_cvtstatfs(sfp, &osb); 776 error = copyout(&osb, uap->buf, sizeof(osb)); 777 } 778 free(sfp, M_STATFS); 779 return (error); 780 } 781 782 /* 783 * Get statistics on all filesystems. 784 */ 785 int 786 freebsd11_getfsstat(struct thread *td, struct freebsd11_getfsstat_args *uap) 787 { 788 return (kern_freebsd11_getfsstat(td, uap->buf, uap->bufsize, uap->mode)); 789 } 790 791 int 792 kern_freebsd11_getfsstat(struct thread *td, struct freebsd11_statfs * ubuf, 793 long bufsize, int mode) 794 { 795 struct freebsd11_statfs osb; 796 struct statfs *buf, *sp; 797 size_t count, size; 798 int error; 799 800 if (bufsize < 0) 801 return (EINVAL); 802 803 count = bufsize / sizeof(struct ostatfs); 804 size = count * sizeof(struct statfs); 805 error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, mode); 806 if (error == 0) 807 td->td_retval[0] = count; 808 if (size > 0) { 809 sp = buf; 810 while (count > 0 && error == 0) { 811 freebsd11_cvtstatfs(sp, &osb); 812 error = copyout(&osb, ubuf, sizeof(osb)); 813 sp++; 814 ubuf++; 815 count--; 816 } 817 free(buf, M_STATFS); 818 } 819 return (error); 820 } 821 822 /* 823 * Implement fstatfs() for (NFS) file handles. 824 */ 825 int 826 freebsd11_fhstatfs(struct thread *td, struct freebsd11_fhstatfs_args *uap) 827 { 828 struct freebsd11_statfs osb; 829 struct statfs *sfp; 830 fhandle_t fh; 831 int error; 832 833 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 834 if (error) 835 return (error); 836 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 837 error = kern_fhstatfs(td, fh, sfp); 838 if (error == 0) { 839 freebsd11_cvtstatfs(sfp, &osb); 840 error = copyout(&osb, uap->buf, sizeof(osb)); 841 } 842 free(sfp, M_STATFS); 843 return (error); 844 } 845 846 /* 847 * Convert a new format statfs structure to an old format statfs structure. 848 */ 849 static void 850 freebsd11_cvtstatfs(struct statfs *nsp, struct freebsd11_statfs *osp) 851 { 852 853 bzero(osp, sizeof(*osp)); 854 osp->f_version = FREEBSD11_STATFS_VERSION; 855 osp->f_type = nsp->f_type; 856 osp->f_flags = nsp->f_flags; 857 osp->f_bsize = nsp->f_bsize; 858 osp->f_iosize = nsp->f_iosize; 859 osp->f_blocks = nsp->f_blocks; 860 osp->f_bfree = nsp->f_bfree; 861 osp->f_bavail = nsp->f_bavail; 862 osp->f_files = nsp->f_files; 863 osp->f_ffree = nsp->f_ffree; 864 osp->f_syncwrites = nsp->f_syncwrites; 865 osp->f_asyncwrites = nsp->f_asyncwrites; 866 osp->f_syncreads = nsp->f_syncreads; 867 osp->f_asyncreads = nsp->f_asyncreads; 868 osp->f_namemax = nsp->f_namemax; 869 osp->f_owner = nsp->f_owner; 870 osp->f_fsid = nsp->f_fsid; 871 strlcpy(osp->f_fstypename, nsp->f_fstypename, 872 MIN(MFSNAMELEN, sizeof(osp->f_fstypename))); 873 strlcpy(osp->f_mntonname, nsp->f_mntonname, 874 MIN(MNAMELEN, sizeof(osp->f_mntonname))); 875 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 876 MIN(MNAMELEN, sizeof(osp->f_mntfromname))); 877 } 878 #endif /* COMPAT_FREEBSD11 */ 879 880 /* 881 * Change current working directory to a given file descriptor. 882 */ 883 #ifndef _SYS_SYSPROTO_H_ 884 struct fchdir_args { 885 int fd; 886 }; 887 #endif 888 int 889 sys_fchdir(struct thread *td, struct fchdir_args *uap) 890 { 891 struct vnode *vp, *tdp; 892 struct mount *mp; 893 struct file *fp; 894 int error; 895 uint8_t fdflags; 896 897 AUDIT_ARG_FD(uap->fd); 898 error = getvnode_path(td, uap->fd, &cap_fchdir_rights, &fdflags, 899 &fp); 900 if (error != 0) 901 return (error); 902 if ((fdflags & UF_RESOLVE_BENEATH) != 0) { 903 fdrop(fp, td); 904 return (ENOTCAPABLE); 905 } 906 vp = fp->f_vnode; 907 vrefact(vp); 908 fdrop(fp, td); 909 vn_lock(vp, LK_SHARED | LK_RETRY); 910 AUDIT_ARG_VNODE1(vp); 911 error = change_dir(vp, td); 912 while (!error && (mp = vp->v_mountedhere) != NULL) { 913 if (vfs_busy(mp, 0)) 914 continue; 915 error = VFS_ROOT(mp, LK_SHARED, &tdp); 916 vfs_unbusy(mp); 917 if (error != 0) 918 break; 919 vput(vp); 920 vp = tdp; 921 } 922 if (error != 0) { 923 vput(vp); 924 return (error); 925 } 926 VOP_UNLOCK(vp); 927 pwd_chdir(td, vp); 928 return (0); 929 } 930 931 /* 932 * Change current working directory (``.''). 933 */ 934 #ifndef _SYS_SYSPROTO_H_ 935 struct chdir_args { 936 char *path; 937 }; 938 #endif 939 int 940 sys_chdir(struct thread *td, struct chdir_args *uap) 941 { 942 943 return (kern_chdir(td, uap->path, UIO_USERSPACE)); 944 } 945 946 int 947 kern_chdir(struct thread *td, const char *path, enum uio_seg pathseg) 948 { 949 struct nameidata nd; 950 int error; 951 952 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 953 pathseg, path); 954 if ((error = namei(&nd)) != 0) 955 return (error); 956 if ((error = change_dir(nd.ni_vp, td)) != 0) { 957 vput(nd.ni_vp); 958 NDFREE_PNBUF(&nd); 959 return (error); 960 } 961 VOP_UNLOCK(nd.ni_vp); 962 NDFREE_PNBUF(&nd); 963 pwd_chdir(td, nd.ni_vp); 964 return (0); 965 } 966 967 static int unprivileged_chroot = 0; 968 SYSCTL_INT(_security_bsd, OID_AUTO, unprivileged_chroot, CTLFLAG_RW, 969 &unprivileged_chroot, 0, 970 "Unprivileged processes can use chroot(2)"); 971 972 /* 973 * Takes locked vnode, unlocks it before returning. 974 */ 975 static int 976 kern_chroot(struct thread *td, struct vnode *vp) 977 { 978 struct proc *p; 979 int error; 980 981 error = priv_check(td, PRIV_VFS_CHROOT); 982 if (error != 0) { 983 p = td->td_proc; 984 if (unprivileged_chroot == 0) { 985 error = EXTERROR(EPERM, 986 "security.bsd.unprivileged_chroot sysctl not enabled"); 987 goto e_vunlock; 988 } 989 if ((p->p_flag2 & P2_NO_NEW_PRIVS) == 0) { 990 error = EXTERROR(EPERM, 991 "PROC_NO_NEW_PRIVS not enabled"); 992 goto e_vunlock; 993 } 994 } 995 996 error = change_dir(vp, td); 997 if (error != 0) 998 goto e_vunlock; 999 #ifdef MAC 1000 error = mac_vnode_check_chroot(td->td_ucred, vp); 1001 if (error != 0) 1002 goto e_vunlock; 1003 #endif 1004 VOP_UNLOCK(vp); 1005 error = pwd_chroot(td, vp); 1006 vrele(vp); 1007 return (error); 1008 e_vunlock: 1009 vput(vp); 1010 return (error); 1011 } 1012 1013 /* 1014 * Change notion of root (``/'') directory. 1015 */ 1016 #ifndef _SYS_SYSPROTO_H_ 1017 struct chroot_args { 1018 char *path; 1019 }; 1020 #endif 1021 int 1022 sys_chroot(struct thread *td, struct chroot_args *uap) 1023 { 1024 struct nameidata nd; 1025 int error; 1026 1027 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 1028 UIO_USERSPACE, uap->path); 1029 error = namei(&nd); 1030 if (error != 0) 1031 return (error); 1032 NDFREE_PNBUF(&nd); 1033 error = kern_chroot(td, nd.ni_vp); 1034 return (error); 1035 } 1036 1037 /* 1038 * Change notion of root directory to a given file descriptor. 1039 */ 1040 #ifndef _SYS_SYSPROTO_H_ 1041 struct fchroot_args { 1042 int fd; 1043 }; 1044 #endif 1045 int 1046 sys_fchroot(struct thread *td, struct fchroot_args *uap) 1047 { 1048 struct vnode *vp; 1049 struct file *fp; 1050 int error; 1051 uint8_t fdflags; 1052 1053 error = getvnode_path(td, uap->fd, &cap_fchroot_rights, &fdflags, &fp); 1054 if (error != 0) 1055 return (error); 1056 if ((fdflags & UF_RESOLVE_BENEATH) != 0) { 1057 fdrop(fp, td); 1058 return (ENOTCAPABLE); 1059 } 1060 vp = fp->f_vnode; 1061 vrefact(vp); 1062 fdrop(fp, td); 1063 vn_lock(vp, LK_SHARED | LK_RETRY); 1064 error = kern_chroot(td, vp); 1065 return (error); 1066 } 1067 1068 /* 1069 * Common routine for chroot and chdir. Callers must provide a locked vnode 1070 * instance. 1071 */ 1072 int 1073 change_dir(struct vnode *vp, struct thread *td) 1074 { 1075 #ifdef MAC 1076 int error; 1077 #endif 1078 1079 ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked"); 1080 if (vp->v_type != VDIR) 1081 return (ENOTDIR); 1082 #ifdef MAC 1083 error = mac_vnode_check_chdir(td->td_ucred, vp); 1084 if (error != 0) 1085 return (error); 1086 #endif 1087 return (VOP_ACCESS(vp, VEXEC, td->td_ucred, td)); 1088 } 1089 1090 static __inline void 1091 flags_to_rights(int flags, cap_rights_t *rightsp) 1092 { 1093 if (flags & O_EXEC) { 1094 cap_rights_set_one(rightsp, CAP_FEXECVE); 1095 if (flags & O_PATH) 1096 return; 1097 } else { 1098 switch ((flags & O_ACCMODE)) { 1099 case O_RDONLY: 1100 cap_rights_set_one(rightsp, CAP_READ); 1101 break; 1102 case O_RDWR: 1103 cap_rights_set_one(rightsp, CAP_READ); 1104 /* FALLTHROUGH */ 1105 case O_WRONLY: 1106 cap_rights_set_one(rightsp, CAP_WRITE); 1107 if (!(flags & (O_APPEND | O_TRUNC))) 1108 cap_rights_set_one(rightsp, CAP_SEEK); 1109 break; 1110 } 1111 } 1112 1113 if (flags & O_CREAT) 1114 cap_rights_set_one(rightsp, CAP_CREATE); 1115 1116 if (flags & O_TRUNC) 1117 cap_rights_set_one(rightsp, CAP_FTRUNCATE); 1118 1119 if (flags & (O_SYNC | O_FSYNC | O_DSYNC)) 1120 cap_rights_set_one(rightsp, CAP_FSYNC); 1121 1122 if (flags & (O_EXLOCK | O_SHLOCK)) 1123 cap_rights_set_one(rightsp, CAP_FLOCK); 1124 } 1125 1126 /* 1127 * Check permissions, allocate an open file structure, and call the device 1128 * open routine if any. 1129 */ 1130 #ifndef _SYS_SYSPROTO_H_ 1131 struct open_args { 1132 char *path; 1133 int flags; 1134 int mode; 1135 }; 1136 #endif 1137 int 1138 sys_open(struct thread *td, struct open_args *uap) 1139 { 1140 1141 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1142 uap->flags, uap->mode)); 1143 } 1144 1145 #ifndef _SYS_SYSPROTO_H_ 1146 struct openat_args { 1147 int fd; 1148 char *path; 1149 int flag; 1150 int mode; 1151 }; 1152 #endif 1153 int 1154 sys_openat(struct thread *td, struct openat_args *uap) 1155 { 1156 1157 AUDIT_ARG_FD(uap->fd); 1158 return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 1159 uap->mode)); 1160 } 1161 1162 /* 1163 * Validate open(2) flags and convert access mode flags (O_RDONLY etc.) to their 1164 * in-kernel representations (FREAD etc.). 1165 */ 1166 static int 1167 openflags(int *flagsp) 1168 { 1169 int flags; 1170 1171 flags = *flagsp; 1172 if ((flags & ~FUSERALLOWED) != 0) 1173 return (EINVAL); 1174 1175 /* 1176 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags 1177 * may be specified. On the other hand, for O_PATH any mode 1178 * except O_EXEC is ignored. 1179 */ 1180 if ((flags & O_PATH) != 0) { 1181 flags &= ~O_ACCMODE; 1182 } else if ((flags & O_EXEC) != 0) { 1183 if ((flags & O_ACCMODE) != 0) 1184 return (EINVAL); 1185 } else if ((flags & O_ACCMODE) == O_ACCMODE) { 1186 return (EINVAL); 1187 } else { 1188 flags = FFLAGS(flags); 1189 } 1190 *flagsp = flags; 1191 return (0); 1192 } 1193 1194 static void 1195 finit_open(struct file *fp, struct vnode *vp, int flags) 1196 { 1197 /* 1198 * Store the vnode, for any f_type. Typically, the vnode use count is 1199 * decremented by a direct call to vnops.fo_close() for files that 1200 * switched type. 1201 */ 1202 fp->f_vnode = vp; 1203 1204 /* 1205 * If the file wasn't claimed by devfs or fifofs, bind it to the normal 1206 * vnode operations here. 1207 */ 1208 if (fp->f_ops == &badfileops) { 1209 KASSERT(vp->v_type != VFIFO || (flags & O_PATH) != 0, 1210 ("Unexpected fifo fp %p vp %p", fp, vp)); 1211 if ((flags & O_PATH) != 0) { 1212 finit(fp, (flags & FMASK) | (fp->f_flag & FKQALLOWED), 1213 DTYPE_VNODE, NULL, &path_fileops); 1214 } else { 1215 finit_vnode(fp, flags, NULL, &vnops); 1216 } 1217 } 1218 } 1219 1220 /* 1221 * If fpp != NULL, opened file is not installed into the file 1222 * descriptor table, instead it is returned in *fpp. This is 1223 * incompatible with fdopen(), in which case we return EINVAL. 1224 */ 1225 static int 1226 openatfp(struct thread *td, int dirfd, const char *path, 1227 enum uio_seg pathseg, int flags, int mode, struct file **fpp) 1228 { 1229 struct proc *p; 1230 struct filedesc *fdp; 1231 struct pwddesc *pdp; 1232 struct file *fp; 1233 struct vnode *vp; 1234 struct filecaps *fcaps; 1235 struct nameidata nd; 1236 cap_rights_t rights; 1237 int cmode, error, indx; 1238 1239 indx = -1; 1240 p = td->td_proc; 1241 fdp = p->p_fd; 1242 pdp = p->p_pd; 1243 1244 AUDIT_ARG_FFLAGS(flags); 1245 AUDIT_ARG_MODE(mode); 1246 cap_rights_init_one(&rights, CAP_LOOKUP); 1247 flags_to_rights(flags, &rights); 1248 1249 error = openflags(&flags); 1250 if (error != 0) 1251 return (error); 1252 1253 /* 1254 * Allocate a file structure. The descriptor to reference it 1255 * is allocated and used by finstall_refed() below. 1256 */ 1257 error = falloc_noinstall(td, &fp); 1258 if (error != 0) 1259 return (error); 1260 /* Set the flags early so the finit in devfs can pick them up. */ 1261 fp->f_flag = flags & FMASK; 1262 cmode = ((mode & ~pdp->pd_cmask) & ALLPERMS) & ~S_ISTXT; 1263 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1 | WANTIOCTLCAPS, 1264 pathseg, path, dirfd, &rights); 1265 td->td_dupfd = -1; /* XXX check for fdopen */ 1266 error = vn_open_cred(&nd, &flags, cmode, VN_OPEN_WANTIOCTLCAPS, 1267 td->td_ucred, fp); 1268 if (error != 0) { 1269 /* 1270 * If the vn_open replaced the method vector, something 1271 * wonderous happened deep below and we just pass it up 1272 * pretending we know what we do. 1273 */ 1274 if (error == ENXIO && fp->f_ops != &badfileops) { 1275 MPASS((flags & O_PATH) == 0); 1276 goto success; 1277 } 1278 1279 /* 1280 * Handle special fdopen() case. bleh. 1281 * 1282 * Don't do this for relative (capability) lookups; we don't 1283 * understand exactly what would happen, and we don't think 1284 * that it ever should. 1285 */ 1286 if ((nd.ni_resflags & NIRES_STRICTREL) == 0 && 1287 (error == ENODEV || error == ENXIO) && 1288 td->td_dupfd >= 0) { 1289 MPASS(fpp == NULL); 1290 error = dupfdopen(td, fdp, td->td_dupfd, flags, error, 1291 &indx); 1292 if (error == 0) 1293 goto success; 1294 } 1295 1296 goto bad; 1297 } 1298 td->td_dupfd = 0; 1299 NDFREE_PNBUF(&nd); 1300 vp = nd.ni_vp; 1301 1302 finit_open(fp, vp, flags); 1303 VOP_UNLOCK(vp); 1304 if (flags & O_TRUNC) { 1305 error = fo_truncate(fp, 0, td->td_ucred, td); 1306 if (error != 0) 1307 goto bad; 1308 } 1309 success: 1310 if (fpp != NULL) { 1311 MPASS(error == 0); 1312 NDFREE_IOCTLCAPS(&nd); 1313 *fpp = fp; 1314 return (0); 1315 } 1316 1317 /* 1318 * If we haven't already installed the FD (for dupfdopen), do so now. 1319 */ 1320 if (indx == -1) { 1321 #ifdef CAPABILITIES 1322 if ((nd.ni_resflags & NIRES_STRICTREL) != 0) 1323 fcaps = &nd.ni_filecaps; 1324 else 1325 #endif 1326 fcaps = NULL; 1327 if ((nd.ni_resflags & NIRES_BENEATH) != 0) 1328 flags |= O_RESOLVE_BENEATH; 1329 else 1330 flags &= ~O_RESOLVE_BENEATH; 1331 error = finstall_refed(td, fp, &indx, flags, fcaps); 1332 /* On success finstall_refed() consumes fcaps. */ 1333 if (error != 0) { 1334 goto bad; 1335 } 1336 } else { 1337 NDFREE_IOCTLCAPS(&nd); 1338 falloc_abort(td, fp); 1339 } 1340 1341 td->td_retval[0] = indx; 1342 return (0); 1343 bad: 1344 KASSERT(indx == -1, ("indx=%d, should be -1", indx)); 1345 NDFREE_IOCTLCAPS(&nd); 1346 falloc_abort(td, fp); 1347 return (error); 1348 } 1349 1350 int 1351 kern_openat(struct thread *td, int dirfd, const char *path, 1352 enum uio_seg pathseg, int flags, int mode) 1353 { 1354 return (openatfp(td, dirfd, path, pathseg, flags, mode, NULL)); 1355 } 1356 1357 int 1358 kern_openatfp(struct thread *td, int dirfd, const char *path, 1359 enum uio_seg pathseg, int flags, int mode, struct file **fpp) 1360 { 1361 int error, old_dupfd; 1362 1363 old_dupfd = td->td_dupfd; 1364 td->td_dupfd = -1; 1365 error = openatfp(td, dirfd, path, pathseg, flags, mode, fpp); 1366 td->td_dupfd = old_dupfd; 1367 return (error); 1368 } 1369 1370 #ifdef COMPAT_43 1371 /* 1372 * Create a file. 1373 */ 1374 #ifndef _SYS_SYSPROTO_H_ 1375 struct ocreat_args { 1376 char *path; 1377 int mode; 1378 }; 1379 #endif 1380 int 1381 ocreat(struct thread *td, struct ocreat_args *uap) 1382 { 1383 1384 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1385 O_WRONLY | O_CREAT | O_TRUNC, uap->mode)); 1386 } 1387 #endif /* COMPAT_43 */ 1388 1389 /* 1390 * Create a special file. 1391 */ 1392 #ifndef _SYS_SYSPROTO_H_ 1393 struct mknodat_args { 1394 int fd; 1395 char *path; 1396 mode_t mode; 1397 dev_t dev; 1398 }; 1399 #endif 1400 int 1401 sys_mknodat(struct thread *td, struct mknodat_args *uap) 1402 { 1403 1404 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1405 uap->dev)); 1406 } 1407 1408 #if defined(COMPAT_FREEBSD11) 1409 int 1410 freebsd11_mknod(struct thread *td, 1411 struct freebsd11_mknod_args *uap) 1412 { 1413 1414 return (kern_mknodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1415 uap->mode, uap->dev)); 1416 } 1417 1418 int 1419 freebsd11_mknodat(struct thread *td, 1420 struct freebsd11_mknodat_args *uap) 1421 { 1422 1423 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1424 uap->dev)); 1425 } 1426 #endif /* COMPAT_FREEBSD11 */ 1427 1428 int 1429 kern_mknodat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, 1430 int mode, dev_t dev) 1431 { 1432 struct vnode *vp; 1433 struct mount *mp; 1434 struct vattr vattr; 1435 struct nameidata nd; 1436 int error, whiteout = 0; 1437 1438 AUDIT_ARG_MODE(mode); 1439 AUDIT_ARG_DEV(dev); 1440 switch (mode & S_IFMT) { 1441 case S_IFCHR: 1442 case S_IFBLK: 1443 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 1444 if (error == 0 && dev == VNOVAL) 1445 error = EINVAL; 1446 break; 1447 case S_IFWHT: 1448 error = priv_check(td, PRIV_VFS_MKNOD_WHT); 1449 break; 1450 case S_IFIFO: 1451 if (dev == 0) 1452 return (kern_mkfifoat(td, fd, path, pathseg, mode)); 1453 /* FALLTHROUGH */ 1454 default: 1455 error = EINVAL; 1456 break; 1457 } 1458 if (error != 0) 1459 return (error); 1460 NDPREINIT(&nd); 1461 restart: 1462 bwillwrite(); 1463 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | AUDITVNODE1 | NOCACHE, 1464 pathseg, path, fd, &cap_mknodat_rights); 1465 if ((error = namei(&nd)) != 0) 1466 return (error); 1467 vp = nd.ni_vp; 1468 if (vp != NULL) { 1469 NDFREE_PNBUF(&nd); 1470 if (vp == nd.ni_dvp) 1471 vrele(nd.ni_dvp); 1472 else 1473 vput(nd.ni_dvp); 1474 vrele(vp); 1475 return (EEXIST); 1476 } else if ((vn_irflag_read(nd.ni_dvp) & VIRF_NAMEDDIR) != 0) { 1477 NDFREE_PNBUF(&nd); 1478 vput(nd.ni_dvp); 1479 return (EINVAL); 1480 } else { 1481 VATTR_NULL(&vattr); 1482 vattr.va_mode = (mode & ALLPERMS) & 1483 ~td->td_proc->p_pd->pd_cmask; 1484 vattr.va_rdev = dev; 1485 whiteout = 0; 1486 1487 switch (mode & S_IFMT) { 1488 case S_IFCHR: 1489 vattr.va_type = VCHR; 1490 break; 1491 case S_IFBLK: 1492 vattr.va_type = VBLK; 1493 break; 1494 case S_IFWHT: 1495 whiteout = 1; 1496 break; 1497 default: 1498 panic("kern_mknod: invalid mode"); 1499 } 1500 } 1501 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1502 NDFREE_PNBUF(&nd); 1503 vput(nd.ni_dvp); 1504 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH)) != 0) 1505 return (error); 1506 goto restart; 1507 } 1508 #ifdef MAC 1509 if (error == 0 && !whiteout) 1510 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, 1511 &nd.ni_cnd, &vattr); 1512 #endif 1513 if (error == 0) { 1514 if (whiteout) 1515 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1516 else { 1517 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1518 &nd.ni_cnd, &vattr); 1519 } 1520 } 1521 VOP_VPUT_PAIR(nd.ni_dvp, error == 0 && !whiteout ? &nd.ni_vp : NULL, 1522 true); 1523 vn_finished_write(mp); 1524 NDFREE_PNBUF(&nd); 1525 if (error == ERELOOKUP) 1526 goto restart; 1527 return (error); 1528 } 1529 1530 /* 1531 * Create a named pipe. 1532 */ 1533 #ifndef _SYS_SYSPROTO_H_ 1534 struct mkfifo_args { 1535 char *path; 1536 int mode; 1537 }; 1538 #endif 1539 int 1540 sys_mkfifo(struct thread *td, struct mkfifo_args *uap) 1541 { 1542 1543 return (kern_mkfifoat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1544 uap->mode)); 1545 } 1546 1547 #ifndef _SYS_SYSPROTO_H_ 1548 struct mkfifoat_args { 1549 int fd; 1550 char *path; 1551 mode_t mode; 1552 }; 1553 #endif 1554 int 1555 sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap) 1556 { 1557 1558 return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE, 1559 uap->mode)); 1560 } 1561 1562 int 1563 kern_mkfifoat(struct thread *td, int fd, const char *path, 1564 enum uio_seg pathseg, int mode) 1565 { 1566 struct mount *mp; 1567 struct vattr vattr; 1568 struct nameidata nd; 1569 int error; 1570 1571 AUDIT_ARG_MODE(mode); 1572 NDPREINIT(&nd); 1573 restart: 1574 bwillwrite(); 1575 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | AUDITVNODE1 | NOCACHE, 1576 pathseg, path, fd, &cap_mkfifoat_rights); 1577 if ((error = namei(&nd)) != 0) 1578 return (error); 1579 if (nd.ni_vp != NULL) { 1580 NDFREE_PNBUF(&nd); 1581 if (nd.ni_vp == nd.ni_dvp) 1582 vrele(nd.ni_dvp); 1583 else 1584 vput(nd.ni_dvp); 1585 vrele(nd.ni_vp); 1586 return (EEXIST); 1587 } 1588 if ((vn_irflag_read(nd.ni_dvp) & VIRF_NAMEDDIR) != 0) { 1589 NDFREE_PNBUF(&nd); 1590 vput(nd.ni_dvp); 1591 return (EINVAL); 1592 } 1593 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1594 NDFREE_PNBUF(&nd); 1595 vput(nd.ni_dvp); 1596 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH)) != 0) 1597 return (error); 1598 goto restart; 1599 } 1600 VATTR_NULL(&vattr); 1601 vattr.va_type = VFIFO; 1602 vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_pd->pd_cmask; 1603 #ifdef MAC 1604 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1605 &vattr); 1606 if (error != 0) 1607 goto out; 1608 #endif 1609 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 1610 #ifdef MAC 1611 out: 1612 #endif 1613 VOP_VPUT_PAIR(nd.ni_dvp, error == 0 ? &nd.ni_vp : NULL, true); 1614 vn_finished_write(mp); 1615 NDFREE_PNBUF(&nd); 1616 if (error == ERELOOKUP) 1617 goto restart; 1618 return (error); 1619 } 1620 1621 /* 1622 * Make a hard file link. 1623 */ 1624 #ifndef _SYS_SYSPROTO_H_ 1625 struct link_args { 1626 char *path; 1627 char *link; 1628 }; 1629 #endif 1630 int 1631 sys_link(struct thread *td, struct link_args *uap) 1632 { 1633 1634 return (kern_linkat(td, AT_FDCWD, AT_FDCWD, uap->path, uap->link, 1635 UIO_USERSPACE, AT_SYMLINK_FOLLOW)); 1636 } 1637 1638 #ifndef _SYS_SYSPROTO_H_ 1639 struct linkat_args { 1640 int fd1; 1641 char *path1; 1642 int fd2; 1643 char *path2; 1644 int flag; 1645 }; 1646 #endif 1647 int 1648 sys_linkat(struct thread *td, struct linkat_args *uap) 1649 { 1650 1651 return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2, 1652 UIO_USERSPACE, uap->flag)); 1653 } 1654 1655 int hardlink_check_uid = 0; 1656 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 1657 &hardlink_check_uid, 0, 1658 "Unprivileged processes cannot create hard links to files owned by other " 1659 "users"); 1660 static int hardlink_check_gid = 0; 1661 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 1662 &hardlink_check_gid, 0, 1663 "Unprivileged processes cannot create hard links to files owned by other " 1664 "groups"); 1665 1666 static int 1667 can_hardlink(struct vnode *vp, struct ucred *cred) 1668 { 1669 struct vattr va; 1670 int error; 1671 1672 if (!hardlink_check_uid && !hardlink_check_gid) 1673 return (0); 1674 1675 error = VOP_GETATTR(vp, &va, cred); 1676 if (error != 0) 1677 return (error); 1678 1679 if (hardlink_check_uid && cred->cr_uid != va.va_uid) { 1680 error = priv_check_cred(cred, PRIV_VFS_LINK); 1681 if (error != 0) 1682 return (error); 1683 } 1684 1685 if (hardlink_check_gid && !groupmember(va.va_gid, cred)) { 1686 error = priv_check_cred(cred, PRIV_VFS_LINK); 1687 if (error != 0) 1688 return (error); 1689 } 1690 1691 return (0); 1692 } 1693 1694 int 1695 kern_linkat(struct thread *td, int fd1, int fd2, const char *path1, 1696 const char *path2, enum uio_seg segflag, int flag) 1697 { 1698 struct nameidata nd; 1699 int error; 1700 1701 if ((flag & ~(AT_SYMLINK_FOLLOW | AT_RESOLVE_BENEATH | 1702 AT_EMPTY_PATH)) != 0) 1703 return (EINVAL); 1704 1705 NDPREINIT(&nd); 1706 do { 1707 bwillwrite(); 1708 NDINIT_ATRIGHTS(&nd, LOOKUP, AUDITVNODE1 | at2cnpflags(flag, 1709 AT_SYMLINK_FOLLOW | AT_RESOLVE_BENEATH | AT_EMPTY_PATH), 1710 segflag, path1, fd1, &cap_linkat_source_rights); 1711 if ((error = namei(&nd)) != 0) 1712 return (error); 1713 NDFREE_PNBUF(&nd); 1714 if ((nd.ni_resflags & NIRES_EMPTYPATH) != 0) { 1715 error = priv_check(td, PRIV_VFS_FHOPEN); 1716 if (error != 0) { 1717 vrele(nd.ni_vp); 1718 return (error); 1719 } 1720 } 1721 error = kern_linkat_vp(td, nd.ni_vp, fd2, path2, segflag); 1722 } while (error == EAGAIN || error == ERELOOKUP); 1723 return (error); 1724 } 1725 1726 static int 1727 kern_linkat_vp(struct thread *td, struct vnode *vp, int fd, const char *path, 1728 enum uio_seg segflag) 1729 { 1730 struct nameidata nd; 1731 struct mount *mp; 1732 int error; 1733 1734 if (vp->v_type == VDIR) { 1735 vrele(vp); 1736 return (EPERM); /* POSIX */ 1737 } 1738 if ((vn_irflag_read(vp) & (VIRF_NAMEDDIR | VIRF_NAMEDATTR)) != 0) { 1739 vrele(vp); 1740 return (EINVAL); 1741 } 1742 NDINIT_ATRIGHTS(&nd, CREATE, 1743 LOCKPARENT | AUDITVNODE2 | NOCACHE, segflag, path, fd, 1744 &cap_linkat_target_rights); 1745 if ((error = namei(&nd)) == 0) { 1746 if (nd.ni_vp != NULL) { 1747 NDFREE_PNBUF(&nd); 1748 if (nd.ni_dvp == nd.ni_vp) 1749 vrele(nd.ni_dvp); 1750 else 1751 vput(nd.ni_dvp); 1752 vrele(nd.ni_vp); 1753 vrele(vp); 1754 return (EEXIST); 1755 } else if (nd.ni_dvp->v_mount != vp->v_mount) { 1756 /* 1757 * Cross-device link. No need to recheck 1758 * vp->v_type, since it cannot change, except 1759 * to VBAD. 1760 */ 1761 NDFREE_PNBUF(&nd); 1762 vput(nd.ni_dvp); 1763 vrele(vp); 1764 return (EXDEV); 1765 } else if (vn_lock(vp, LK_EXCLUSIVE) == 0) { 1766 error = can_hardlink(vp, td->td_ucred); 1767 #ifdef MAC 1768 if (error == 0) 1769 error = mac_vnode_check_link(td->td_ucred, 1770 nd.ni_dvp, vp, &nd.ni_cnd); 1771 #endif 1772 if (error != 0) { 1773 vput(vp); 1774 vput(nd.ni_dvp); 1775 NDFREE_PNBUF(&nd); 1776 return (error); 1777 } 1778 error = vn_start_write(vp, &mp, V_NOWAIT); 1779 if (error != 0) { 1780 vput(vp); 1781 vput(nd.ni_dvp); 1782 NDFREE_PNBUF(&nd); 1783 error = vn_start_write(NULL, &mp, 1784 V_XSLEEP | V_PCATCH); 1785 if (error != 0) 1786 return (error); 1787 return (EAGAIN); 1788 } 1789 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 1790 VOP_VPUT_PAIR(nd.ni_dvp, &vp, true); 1791 vn_finished_write(mp); 1792 NDFREE_PNBUF(&nd); 1793 vp = NULL; 1794 } else { 1795 vput(nd.ni_dvp); 1796 NDFREE_PNBUF(&nd); 1797 vrele(vp); 1798 return (EAGAIN); 1799 } 1800 } 1801 if (vp != NULL) 1802 vrele(vp); 1803 return (error); 1804 } 1805 1806 /* 1807 * Make a symbolic link. 1808 */ 1809 #ifndef _SYS_SYSPROTO_H_ 1810 struct symlink_args { 1811 char *path; 1812 char *link; 1813 }; 1814 #endif 1815 int 1816 sys_symlink(struct thread *td, struct symlink_args *uap) 1817 { 1818 1819 return (kern_symlinkat(td, uap->path, AT_FDCWD, uap->link, 1820 UIO_USERSPACE)); 1821 } 1822 1823 #ifndef _SYS_SYSPROTO_H_ 1824 struct symlinkat_args { 1825 char *path; 1826 int fd; 1827 char *path2; 1828 }; 1829 #endif 1830 int 1831 sys_symlinkat(struct thread *td, struct symlinkat_args *uap) 1832 { 1833 1834 return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2, 1835 UIO_USERSPACE)); 1836 } 1837 1838 int 1839 kern_symlinkat(struct thread *td, const char *path1, int fd, const char *path2, 1840 enum uio_seg segflg) 1841 { 1842 struct mount *mp; 1843 struct vattr vattr; 1844 const char *syspath; 1845 char *tmppath; 1846 struct nameidata nd; 1847 int error; 1848 1849 if (segflg == UIO_SYSSPACE) { 1850 syspath = path1; 1851 } else { 1852 tmppath = uma_zalloc(namei_zone, M_WAITOK); 1853 if ((error = copyinstr(path1, tmppath, MAXPATHLEN, NULL)) != 0) 1854 goto out; 1855 syspath = tmppath; 1856 } 1857 AUDIT_ARG_TEXT(syspath); 1858 NDPREINIT(&nd); 1859 restart: 1860 bwillwrite(); 1861 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | AUDITVNODE1 | NOCACHE, segflg, 1862 path2, fd, &cap_symlinkat_rights); 1863 if ((error = namei(&nd)) != 0) 1864 goto out; 1865 if (nd.ni_vp) { 1866 NDFREE_PNBUF(&nd); 1867 if (nd.ni_vp == nd.ni_dvp) 1868 vrele(nd.ni_dvp); 1869 else 1870 vput(nd.ni_dvp); 1871 vrele(nd.ni_vp); 1872 nd.ni_vp = NULL; 1873 error = EEXIST; 1874 goto out; 1875 } 1876 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1877 NDFREE_PNBUF(&nd); 1878 vput(nd.ni_dvp); 1879 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH)) != 0) 1880 goto out; 1881 goto restart; 1882 } 1883 if ((vn_irflag_read(nd.ni_dvp) & VIRF_NAMEDDIR) != 0) { 1884 error = EINVAL; 1885 goto out; 1886 } 1887 VATTR_NULL(&vattr); 1888 vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_pd->pd_cmask; 1889 #ifdef MAC 1890 vattr.va_type = VLNK; 1891 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1892 &vattr); 1893 if (error != 0) 1894 goto out2; 1895 #endif 1896 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath); 1897 #ifdef MAC 1898 out2: 1899 #endif 1900 VOP_VPUT_PAIR(nd.ni_dvp, error == 0 ? &nd.ni_vp : NULL, true); 1901 vn_finished_write(mp); 1902 NDFREE_PNBUF(&nd); 1903 if (error == ERELOOKUP) 1904 goto restart; 1905 out: 1906 if (segflg != UIO_SYSSPACE) 1907 uma_zfree(namei_zone, tmppath); 1908 return (error); 1909 } 1910 1911 /* 1912 * Delete a whiteout from the filesystem. 1913 */ 1914 #ifndef _SYS_SYSPROTO_H_ 1915 struct undelete_args { 1916 char *path; 1917 }; 1918 #endif 1919 int 1920 sys_undelete(struct thread *td, struct undelete_args *uap) 1921 { 1922 struct mount *mp; 1923 struct nameidata nd; 1924 int error; 1925 1926 NDPREINIT(&nd); 1927 restart: 1928 bwillwrite(); 1929 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | AUDITVNODE1, 1930 UIO_USERSPACE, uap->path); 1931 error = namei(&nd); 1932 if (error != 0) 1933 return (error); 1934 1935 if (nd.ni_vp != NULL || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 1936 NDFREE_PNBUF(&nd); 1937 if (nd.ni_vp == nd.ni_dvp) 1938 vrele(nd.ni_dvp); 1939 else 1940 vput(nd.ni_dvp); 1941 if (nd.ni_vp) 1942 vrele(nd.ni_vp); 1943 return (EEXIST); 1944 } 1945 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1946 NDFREE_PNBUF(&nd); 1947 vput(nd.ni_dvp); 1948 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH)) != 0) 1949 return (error); 1950 goto restart; 1951 } 1952 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE); 1953 NDFREE_PNBUF(&nd); 1954 vput(nd.ni_dvp); 1955 vn_finished_write(mp); 1956 if (error == ERELOOKUP) 1957 goto restart; 1958 return (error); 1959 } 1960 1961 /* 1962 * Delete a name from the filesystem. 1963 */ 1964 #ifndef _SYS_SYSPROTO_H_ 1965 struct unlink_args { 1966 char *path; 1967 }; 1968 #endif 1969 int 1970 sys_unlink(struct thread *td, struct unlink_args *uap) 1971 { 1972 1973 return (kern_funlinkat(td, AT_FDCWD, uap->path, FD_NONE, UIO_USERSPACE, 1974 0, 0)); 1975 } 1976 1977 static int 1978 kern_funlinkat_ex(struct thread *td, int dfd, const char *path, int fd, 1979 int flag, enum uio_seg pathseg, ino_t oldinum) 1980 { 1981 1982 if ((flag & ~(AT_REMOVEDIR | AT_RESOLVE_BENEATH)) != 0) 1983 return (EINVAL); 1984 1985 if ((flag & AT_REMOVEDIR) != 0) 1986 return (kern_frmdirat(td, dfd, path, fd, UIO_USERSPACE, 0)); 1987 1988 return (kern_funlinkat(td, dfd, path, fd, UIO_USERSPACE, 0, 0)); 1989 } 1990 1991 #ifndef _SYS_SYSPROTO_H_ 1992 struct unlinkat_args { 1993 int fd; 1994 char *path; 1995 int flag; 1996 }; 1997 #endif 1998 int 1999 sys_unlinkat(struct thread *td, struct unlinkat_args *uap) 2000 { 2001 2002 return (kern_funlinkat_ex(td, uap->fd, uap->path, FD_NONE, uap->flag, 2003 UIO_USERSPACE, 0)); 2004 } 2005 2006 #ifndef _SYS_SYSPROTO_H_ 2007 struct funlinkat_args { 2008 int dfd; 2009 const char *path; 2010 int fd; 2011 int flag; 2012 }; 2013 #endif 2014 int 2015 sys_funlinkat(struct thread *td, struct funlinkat_args *uap) 2016 { 2017 2018 return (kern_funlinkat_ex(td, uap->dfd, uap->path, uap->fd, uap->flag, 2019 UIO_USERSPACE, 0)); 2020 } 2021 2022 int 2023 kern_funlinkat(struct thread *td, int dfd, const char *path, int fd, 2024 enum uio_seg pathseg, int flag, ino_t oldinum) 2025 { 2026 struct mount *mp; 2027 struct file *fp; 2028 struct vnode *vp; 2029 struct nameidata nd; 2030 struct stat sb; 2031 int error; 2032 2033 fp = NULL; 2034 if (fd != FD_NONE) { 2035 error = getvnode_path(td, fd, &cap_no_rights, NULL, &fp); 2036 if (error != 0) 2037 return (error); 2038 } 2039 2040 NDPREINIT(&nd); 2041 restart: 2042 bwillwrite(); 2043 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1 | 2044 at2cnpflags(flag, AT_RESOLVE_BENEATH), 2045 pathseg, path, dfd, &cap_unlinkat_rights); 2046 if ((error = namei(&nd)) != 0) { 2047 if (error == EINVAL) 2048 error = EPERM; 2049 goto fdout; 2050 } 2051 vp = nd.ni_vp; 2052 if (vp->v_type == VDIR && oldinum == 0) { 2053 error = EPERM; /* POSIX */ 2054 } else if (oldinum != 0 && 2055 ((error = VOP_STAT(vp, &sb, td->td_ucred, NOCRED)) == 0) && 2056 sb.st_ino != oldinum) { 2057 error = EIDRM; /* Identifier removed */ 2058 } else if (fp != NULL && fp->f_vnode != vp) { 2059 if (VN_IS_DOOMED(fp->f_vnode)) 2060 error = EBADF; 2061 else 2062 error = EDEADLK; 2063 } else { 2064 /* 2065 * The root of a mounted filesystem cannot be deleted. 2066 * 2067 * XXX: can this only be a VDIR case? 2068 */ 2069 if (vp->v_vflag & VV_ROOT) 2070 error = EBUSY; 2071 } 2072 if (error == 0) { 2073 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 2074 NDFREE_PNBUF(&nd); 2075 vput(nd.ni_dvp); 2076 if (vp == nd.ni_dvp) 2077 vrele(vp); 2078 else 2079 vput(vp); 2080 if ((error = vn_start_write(NULL, &mp, 2081 V_XSLEEP | V_PCATCH)) != 0) { 2082 goto fdout; 2083 } 2084 goto restart; 2085 } 2086 #ifdef MAC 2087 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 2088 &nd.ni_cnd); 2089 if (error != 0) 2090 goto out; 2091 #endif 2092 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 2093 #ifdef MAC 2094 out: 2095 #endif 2096 vn_finished_write(mp); 2097 } 2098 NDFREE_PNBUF(&nd); 2099 vput(nd.ni_dvp); 2100 if (vp == nd.ni_dvp) 2101 vrele(vp); 2102 else 2103 vput(vp); 2104 if (error == ERELOOKUP) 2105 goto restart; 2106 fdout: 2107 if (fp != NULL) 2108 fdrop(fp, td); 2109 return (error); 2110 } 2111 2112 /* 2113 * Reposition read/write file offset. 2114 */ 2115 #ifndef _SYS_SYSPROTO_H_ 2116 struct lseek_args { 2117 int fd; 2118 int pad; 2119 off_t offset; 2120 int whence; 2121 }; 2122 #endif 2123 int 2124 sys_lseek(struct thread *td, struct lseek_args *uap) 2125 { 2126 2127 return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); 2128 } 2129 2130 int 2131 kern_lseek(struct thread *td, int fd, off_t offset, int whence) 2132 { 2133 struct file *fp; 2134 int error; 2135 2136 AUDIT_ARG_FD(fd); 2137 error = fget(td, fd, &cap_seek_rights, &fp); 2138 if (error != 0) 2139 return (error); 2140 error = (fp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0 ? 2141 fo_seek(fp, offset, whence, td) : ESPIPE; 2142 fdrop(fp, td); 2143 return (error); 2144 } 2145 2146 #if defined(COMPAT_43) 2147 /* 2148 * Reposition read/write file offset. 2149 */ 2150 #ifndef _SYS_SYSPROTO_H_ 2151 struct olseek_args { 2152 int fd; 2153 long offset; 2154 int whence; 2155 }; 2156 #endif 2157 int 2158 olseek(struct thread *td, struct olseek_args *uap) 2159 { 2160 2161 return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); 2162 } 2163 #endif /* COMPAT_43 */ 2164 2165 #if defined(COMPAT_FREEBSD6) 2166 /* Version with the 'pad' argument */ 2167 int 2168 freebsd6_lseek(struct thread *td, struct freebsd6_lseek_args *uap) 2169 { 2170 2171 return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); 2172 } 2173 #endif 2174 2175 /* 2176 * Check access permissions using passed credentials. 2177 */ 2178 static int 2179 vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 2180 struct thread *td) 2181 { 2182 accmode_t accmode; 2183 int error; 2184 2185 /* Flags == 0 means only check for existence. */ 2186 if (user_flags == 0) 2187 return (0); 2188 2189 accmode = 0; 2190 if (user_flags & R_OK) 2191 accmode |= VREAD; 2192 if (user_flags & W_OK) 2193 accmode |= VWRITE; 2194 if (user_flags & X_OK) 2195 accmode |= VEXEC; 2196 #ifdef MAC 2197 error = mac_vnode_check_access(cred, vp, accmode); 2198 if (error != 0) 2199 return (error); 2200 #endif 2201 if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) 2202 error = VOP_ACCESS(vp, accmode, cred, td); 2203 return (error); 2204 } 2205 2206 /* 2207 * Check access permissions using "real" credentials. 2208 */ 2209 #ifndef _SYS_SYSPROTO_H_ 2210 struct access_args { 2211 char *path; 2212 int amode; 2213 }; 2214 #endif 2215 int 2216 sys_access(struct thread *td, struct access_args *uap) 2217 { 2218 2219 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2220 0, uap->amode)); 2221 } 2222 2223 #ifndef _SYS_SYSPROTO_H_ 2224 struct faccessat_args { 2225 int dirfd; 2226 char *path; 2227 int amode; 2228 int flag; 2229 } 2230 #endif 2231 int 2232 sys_faccessat(struct thread *td, struct faccessat_args *uap) 2233 { 2234 2235 return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 2236 uap->amode)); 2237 } 2238 2239 int 2240 kern_accessat(struct thread *td, int fd, const char *path, 2241 enum uio_seg pathseg, int flag, int amode) 2242 { 2243 struct ucred *cred, *usecred; 2244 struct vnode *vp; 2245 struct nameidata nd; 2246 int error; 2247 2248 if ((flag & ~(AT_EACCESS | AT_RESOLVE_BENEATH | AT_EMPTY_PATH | 2249 AT_SYMLINK_NOFOLLOW)) != 0) 2250 return (EINVAL); 2251 if (amode != F_OK && (amode & ~(R_OK | W_OK | X_OK)) != 0) 2252 return (EINVAL); 2253 2254 /* 2255 * Create and modify a temporary credential instead of one that 2256 * is potentially shared (if we need one). 2257 */ 2258 cred = td->td_ucred; 2259 if ((flag & AT_EACCESS) == 0 && 2260 ((cred->cr_uid != cred->cr_ruid || 2261 cred->cr_rgid != cred->cr_gid))) { 2262 usecred = crdup(cred); 2263 usecred->cr_uid = cred->cr_ruid; 2264 usecred->cr_gid = cred->cr_rgid; 2265 td->td_ucred = usecred; 2266 } else 2267 usecred = cred; 2268 AUDIT_ARG_VALUE(amode); 2269 NDINIT_ATRIGHTS(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | 2270 AUDITVNODE1 | at2cnpflags(flag, AT_RESOLVE_BENEATH | AT_SYMLINK_NOFOLLOW | 2271 AT_EMPTY_PATH), pathseg, path, fd, &cap_fstat_rights); 2272 if ((error = namei(&nd)) != 0) 2273 goto out; 2274 vp = nd.ni_vp; 2275 2276 error = vn_access(vp, amode, usecred, td); 2277 NDFREE_PNBUF(&nd); 2278 vput(vp); 2279 out: 2280 if (usecred != cred) { 2281 td->td_ucred = cred; 2282 crfree(usecred); 2283 } 2284 return (error); 2285 } 2286 2287 /* 2288 * Check access permissions using "effective" credentials. 2289 */ 2290 #ifndef _SYS_SYSPROTO_H_ 2291 struct eaccess_args { 2292 char *path; 2293 int amode; 2294 }; 2295 #endif 2296 int 2297 sys_eaccess(struct thread *td, struct eaccess_args *uap) 2298 { 2299 2300 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2301 AT_EACCESS, uap->amode)); 2302 } 2303 2304 #if defined(COMPAT_43) 2305 /* 2306 * Get file status; this version follows links. 2307 */ 2308 #ifndef _SYS_SYSPROTO_H_ 2309 struct ostat_args { 2310 char *path; 2311 struct ostat *ub; 2312 }; 2313 #endif 2314 int 2315 ostat(struct thread *td, struct ostat_args *uap) 2316 { 2317 struct stat sb; 2318 struct ostat osb; 2319 int error; 2320 2321 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, &sb); 2322 if (error != 0) 2323 return (error); 2324 cvtstat(&sb, &osb); 2325 return (copyout(&osb, uap->ub, sizeof (osb))); 2326 } 2327 2328 /* 2329 * Get file status; this version does not follow links. 2330 */ 2331 #ifndef _SYS_SYSPROTO_H_ 2332 struct olstat_args { 2333 char *path; 2334 struct ostat *ub; 2335 }; 2336 #endif 2337 int 2338 olstat(struct thread *td, struct olstat_args *uap) 2339 { 2340 struct stat sb; 2341 struct ostat osb; 2342 int error; 2343 2344 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2345 UIO_USERSPACE, &sb); 2346 if (error != 0) 2347 return (error); 2348 cvtstat(&sb, &osb); 2349 return (copyout(&osb, uap->ub, sizeof (osb))); 2350 } 2351 2352 /* 2353 * Convert from an old to a new stat structure. 2354 * XXX: many values are blindly truncated. 2355 */ 2356 void 2357 cvtstat(struct stat *st, struct ostat *ost) 2358 { 2359 2360 bzero(ost, sizeof(*ost)); 2361 ost->st_dev = st->st_dev; 2362 ost->st_ino = st->st_ino; 2363 ost->st_mode = st->st_mode; 2364 ost->st_nlink = st->st_nlink; 2365 ost->st_uid = st->st_uid; 2366 ost->st_gid = st->st_gid; 2367 ost->st_rdev = st->st_rdev; 2368 ost->st_size = MIN(st->st_size, INT32_MAX); 2369 ost->st_atim = st->st_atim; 2370 ost->st_mtim = st->st_mtim; 2371 ost->st_ctim = st->st_ctim; 2372 ost->st_blksize = st->st_blksize; 2373 ost->st_blocks = st->st_blocks; 2374 ost->st_flags = st->st_flags; 2375 ost->st_gen = st->st_gen; 2376 } 2377 #endif /* COMPAT_43 */ 2378 2379 #if defined(COMPAT_43) || defined(COMPAT_FREEBSD11) 2380 int ino64_trunc_error; 2381 SYSCTL_INT(_vfs, OID_AUTO, ino64_trunc_error, CTLFLAG_RW, 2382 &ino64_trunc_error, 0, 2383 "Error on truncation of device, file or inode number, or link count"); 2384 2385 int 2386 freebsd11_cvtstat(struct stat *st, struct freebsd11_stat *ost) 2387 { 2388 2389 ost->st_dev = st->st_dev; 2390 if (ost->st_dev != st->st_dev) { 2391 switch (ino64_trunc_error) { 2392 default: 2393 /* 2394 * Since dev_t is almost raw, don't clamp to the 2395 * maximum for case 2, but ignore the error. 2396 */ 2397 break; 2398 case 1: 2399 return (EOVERFLOW); 2400 } 2401 } 2402 ost->st_ino = st->st_ino; 2403 if (ost->st_ino != st->st_ino) { 2404 switch (ino64_trunc_error) { 2405 default: 2406 case 0: 2407 break; 2408 case 1: 2409 return (EOVERFLOW); 2410 case 2: 2411 ost->st_ino = UINT32_MAX; 2412 break; 2413 } 2414 } 2415 ost->st_mode = st->st_mode; 2416 ost->st_nlink = st->st_nlink; 2417 if (ost->st_nlink != st->st_nlink) { 2418 switch (ino64_trunc_error) { 2419 default: 2420 case 0: 2421 break; 2422 case 1: 2423 return (EOVERFLOW); 2424 case 2: 2425 ost->st_nlink = UINT16_MAX; 2426 break; 2427 } 2428 } 2429 ost->st_uid = st->st_uid; 2430 ost->st_gid = st->st_gid; 2431 ost->st_rdev = st->st_rdev; 2432 if (ost->st_rdev != st->st_rdev) { 2433 switch (ino64_trunc_error) { 2434 default: 2435 break; 2436 case 1: 2437 return (EOVERFLOW); 2438 } 2439 } 2440 ost->st_atim = st->st_atim; 2441 ost->st_mtim = st->st_mtim; 2442 ost->st_ctim = st->st_ctim; 2443 ost->st_size = st->st_size; 2444 ost->st_blocks = st->st_blocks; 2445 ost->st_blksize = st->st_blksize; 2446 ost->st_flags = st->st_flags; 2447 ost->st_gen = st->st_gen; 2448 ost->st_lspare = 0; 2449 ost->st_birthtim = st->st_birthtim; 2450 bzero((char *)&ost->st_birthtim + sizeof(ost->st_birthtim), 2451 sizeof(*ost) - offsetof(struct freebsd11_stat, 2452 st_birthtim) - sizeof(ost->st_birthtim)); 2453 return (0); 2454 } 2455 2456 int 2457 freebsd11_stat(struct thread *td, struct freebsd11_stat_args* uap) 2458 { 2459 struct stat sb; 2460 struct freebsd11_stat osb; 2461 int error; 2462 2463 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, &sb); 2464 if (error != 0) 2465 return (error); 2466 error = freebsd11_cvtstat(&sb, &osb); 2467 if (error == 0) 2468 error = copyout(&osb, uap->ub, sizeof(osb)); 2469 return (error); 2470 } 2471 2472 int 2473 freebsd11_lstat(struct thread *td, struct freebsd11_lstat_args* uap) 2474 { 2475 struct stat sb; 2476 struct freebsd11_stat osb; 2477 int error; 2478 2479 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2480 UIO_USERSPACE, &sb); 2481 if (error != 0) 2482 return (error); 2483 error = freebsd11_cvtstat(&sb, &osb); 2484 if (error == 0) 2485 error = copyout(&osb, uap->ub, sizeof(osb)); 2486 return (error); 2487 } 2488 2489 int 2490 freebsd11_fhstat(struct thread *td, struct freebsd11_fhstat_args* uap) 2491 { 2492 struct fhandle fh; 2493 struct stat sb; 2494 struct freebsd11_stat osb; 2495 int error; 2496 2497 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 2498 if (error != 0) 2499 return (error); 2500 error = kern_fhstat(td, fh, &sb); 2501 if (error != 0) 2502 return (error); 2503 error = freebsd11_cvtstat(&sb, &osb); 2504 if (error == 0) 2505 error = copyout(&osb, uap->sb, sizeof(osb)); 2506 return (error); 2507 } 2508 2509 int 2510 freebsd11_fstatat(struct thread *td, struct freebsd11_fstatat_args* uap) 2511 { 2512 struct stat sb; 2513 struct freebsd11_stat osb; 2514 int error; 2515 2516 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2517 UIO_USERSPACE, &sb); 2518 if (error != 0) 2519 return (error); 2520 error = freebsd11_cvtstat(&sb, &osb); 2521 if (error == 0) 2522 error = copyout(&osb, uap->buf, sizeof(osb)); 2523 return (error); 2524 } 2525 #endif /* COMPAT_FREEBSD11 */ 2526 2527 /* 2528 * Get file status 2529 */ 2530 #ifndef _SYS_SYSPROTO_H_ 2531 struct fstatat_args { 2532 int fd; 2533 char *path; 2534 struct stat *buf; 2535 int flag; 2536 } 2537 #endif 2538 int 2539 sys_fstatat(struct thread *td, struct fstatat_args *uap) 2540 { 2541 struct stat sb; 2542 int error; 2543 2544 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2545 UIO_USERSPACE, &sb); 2546 if (error == 0) 2547 error = copyout(&sb, uap->buf, sizeof (sb)); 2548 return (error); 2549 } 2550 2551 int 2552 kern_statat(struct thread *td, int flag, int fd, const char *path, 2553 enum uio_seg pathseg, struct stat *sbp) 2554 { 2555 struct nameidata nd; 2556 int error; 2557 2558 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | 2559 AT_EMPTY_PATH)) != 0) 2560 return (EINVAL); 2561 2562 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_RESOLVE_BENEATH | 2563 AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH) | LOCKSHARED | LOCKLEAF | 2564 AUDITVNODE1, pathseg, path, fd, &cap_fstat_rights); 2565 2566 if ((error = namei(&nd)) != 0) { 2567 if (error == ENOTDIR && 2568 (nd.ni_resflags & NIRES_EMPTYPATH) != 0) 2569 error = kern_fstat(td, fd, sbp); 2570 return (error); 2571 } 2572 error = VOP_STAT(nd.ni_vp, sbp, td->td_ucred, NOCRED); 2573 NDFREE_PNBUF(&nd); 2574 vput(nd.ni_vp); 2575 #ifdef __STAT_TIME_T_EXT 2576 sbp->st_atim_ext = 0; 2577 sbp->st_mtim_ext = 0; 2578 sbp->st_ctim_ext = 0; 2579 sbp->st_btim_ext = 0; 2580 #endif 2581 #ifdef KTRACE 2582 if (KTRPOINT(td, KTR_STRUCT)) 2583 ktrstat_error(sbp, error); 2584 #endif 2585 return (error); 2586 } 2587 2588 #if defined(COMPAT_FREEBSD11) 2589 /* 2590 * Implementation of the NetBSD [l]stat() functions. 2591 */ 2592 int 2593 freebsd11_cvtnstat(struct stat *sb, struct nstat *nsb) 2594 { 2595 struct freebsd11_stat sb11; 2596 int error; 2597 2598 error = freebsd11_cvtstat(sb, &sb11); 2599 if (error != 0) 2600 return (error); 2601 2602 bzero(nsb, sizeof(*nsb)); 2603 CP(sb11, *nsb, st_dev); 2604 CP(sb11, *nsb, st_ino); 2605 CP(sb11, *nsb, st_mode); 2606 CP(sb11, *nsb, st_nlink); 2607 CP(sb11, *nsb, st_uid); 2608 CP(sb11, *nsb, st_gid); 2609 CP(sb11, *nsb, st_rdev); 2610 CP(sb11, *nsb, st_atim); 2611 CP(sb11, *nsb, st_mtim); 2612 CP(sb11, *nsb, st_ctim); 2613 CP(sb11, *nsb, st_size); 2614 CP(sb11, *nsb, st_blocks); 2615 CP(sb11, *nsb, st_blksize); 2616 CP(sb11, *nsb, st_flags); 2617 CP(sb11, *nsb, st_gen); 2618 CP(sb11, *nsb, st_birthtim); 2619 return (0); 2620 } 2621 2622 #ifndef _SYS_SYSPROTO_H_ 2623 struct freebsd11_nstat_args { 2624 char *path; 2625 struct nstat *ub; 2626 }; 2627 #endif 2628 int 2629 freebsd11_nstat(struct thread *td, struct freebsd11_nstat_args *uap) 2630 { 2631 struct stat sb; 2632 struct nstat nsb; 2633 int error; 2634 2635 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, &sb); 2636 if (error != 0) 2637 return (error); 2638 error = freebsd11_cvtnstat(&sb, &nsb); 2639 if (error == 0) 2640 error = copyout(&nsb, uap->ub, sizeof (nsb)); 2641 return (error); 2642 } 2643 2644 /* 2645 * NetBSD lstat. Get file status; this version does not follow links. 2646 */ 2647 #ifndef _SYS_SYSPROTO_H_ 2648 struct freebsd11_nlstat_args { 2649 char *path; 2650 struct nstat *ub; 2651 }; 2652 #endif 2653 int 2654 freebsd11_nlstat(struct thread *td, struct freebsd11_nlstat_args *uap) 2655 { 2656 struct stat sb; 2657 struct nstat nsb; 2658 int error; 2659 2660 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2661 UIO_USERSPACE, &sb); 2662 if (error != 0) 2663 return (error); 2664 error = freebsd11_cvtnstat(&sb, &nsb); 2665 if (error == 0) 2666 error = copyout(&nsb, uap->ub, sizeof (nsb)); 2667 return (error); 2668 } 2669 #endif /* COMPAT_FREEBSD11 */ 2670 2671 /* 2672 * Get configurable pathname variables. 2673 */ 2674 #ifndef _SYS_SYSPROTO_H_ 2675 struct pathconf_args { 2676 char *path; 2677 int name; 2678 }; 2679 #endif 2680 int 2681 sys_pathconf(struct thread *td, struct pathconf_args *uap) 2682 { 2683 long value; 2684 int error; 2685 2686 error = kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW, 2687 &value); 2688 if (error == 0) 2689 td->td_retval[0] = value; 2690 return (error); 2691 } 2692 2693 #ifndef _SYS_SYSPROTO_H_ 2694 struct lpathconf_args { 2695 char *path; 2696 int name; 2697 }; 2698 #endif 2699 int 2700 sys_lpathconf(struct thread *td, struct lpathconf_args *uap) 2701 { 2702 long value; 2703 int error; 2704 2705 error = kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, 2706 NOFOLLOW, &value); 2707 if (error == 0) 2708 td->td_retval[0] = value; 2709 return (error); 2710 } 2711 2712 int 2713 kern_pathconf(struct thread *td, const char *path, enum uio_seg pathseg, 2714 int name, u_long flags, long *valuep) 2715 { 2716 struct nameidata nd; 2717 int error; 2718 2719 NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | AUDITVNODE1 | flags, 2720 pathseg, path); 2721 if ((error = namei(&nd)) != 0) 2722 return (error); 2723 NDFREE_PNBUF(&nd); 2724 2725 error = VOP_PATHCONF(nd.ni_vp, name, valuep); 2726 vput(nd.ni_vp); 2727 return (error); 2728 } 2729 2730 /* 2731 * Return target name of a symbolic link. 2732 */ 2733 #ifndef _SYS_SYSPROTO_H_ 2734 struct readlink_args { 2735 char *path; 2736 char *buf; 2737 size_t count; 2738 }; 2739 #endif 2740 int 2741 sys_readlink(struct thread *td, struct readlink_args *uap) 2742 { 2743 2744 return (kern_readlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2745 uap->buf, UIO_USERSPACE, uap->count)); 2746 } 2747 #ifndef _SYS_SYSPROTO_H_ 2748 struct readlinkat_args { 2749 int fd; 2750 char *path; 2751 char *buf; 2752 size_t bufsize; 2753 }; 2754 #endif 2755 int 2756 sys_readlinkat(struct thread *td, struct readlinkat_args *uap) 2757 { 2758 2759 return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE, 2760 uap->buf, UIO_USERSPACE, uap->bufsize)); 2761 } 2762 2763 int 2764 kern_readlinkat(struct thread *td, int fd, const char *path, 2765 enum uio_seg pathseg, char *buf, enum uio_seg bufseg, size_t count) 2766 { 2767 struct vnode *vp; 2768 struct nameidata nd; 2769 int error; 2770 2771 if (count > IOSIZE_MAX) 2772 return (EINVAL); 2773 2774 NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1 | 2775 EMPTYPATH, pathseg, path, fd); 2776 2777 if ((error = namei(&nd)) != 0) 2778 return (error); 2779 NDFREE_PNBUF(&nd); 2780 vp = nd.ni_vp; 2781 2782 error = kern_readlink_vp(vp, buf, bufseg, count, td); 2783 vput(vp); 2784 2785 return (error); 2786 } 2787 2788 /* 2789 * Helper function to readlink from a vnode 2790 */ 2791 static int 2792 kern_readlink_vp(struct vnode *vp, char *buf, enum uio_seg bufseg, size_t count, 2793 struct thread *td) 2794 { 2795 struct iovec aiov; 2796 struct uio auio; 2797 int error; 2798 2799 ASSERT_VOP_LOCKED(vp, "kern_readlink_vp(): vp not locked"); 2800 #ifdef MAC 2801 error = mac_vnode_check_readlink(td->td_ucred, vp); 2802 if (error != 0) 2803 return (error); 2804 #endif 2805 if (vp->v_type != VLNK && (vp->v_vflag & VV_READLINK) == 0) 2806 return (EINVAL); 2807 2808 aiov.iov_base = buf; 2809 aiov.iov_len = count; 2810 auio.uio_iov = &aiov; 2811 auio.uio_iovcnt = 1; 2812 auio.uio_offset = 0; 2813 auio.uio_rw = UIO_READ; 2814 auio.uio_segflg = bufseg; 2815 auio.uio_td = td; 2816 auio.uio_resid = count; 2817 error = VOP_READLINK(vp, &auio, td->td_ucred); 2818 td->td_retval[0] = count - auio.uio_resid; 2819 return (error); 2820 } 2821 2822 /* 2823 * Common implementation code for chflags() and fchflags(). 2824 */ 2825 static int 2826 setfflags(struct thread *td, struct vnode *vp, u_long flags) 2827 { 2828 struct mount *mp; 2829 struct vattr vattr; 2830 int error; 2831 2832 /* We can't support the value matching VNOVAL. */ 2833 if (flags == VNOVAL) 2834 return (EOPNOTSUPP); 2835 2836 /* 2837 * Prevent non-root users from setting flags on devices. When 2838 * a device is reused, users can retain ownership of the device 2839 * if they are allowed to set flags and programs assume that 2840 * chown can't fail when done as root. 2841 */ 2842 if (VN_ISDEV(vp)) { 2843 error = priv_check(td, PRIV_VFS_CHFLAGS_DEV); 2844 if (error != 0) 2845 return (error); 2846 } 2847 2848 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) 2849 return (error); 2850 VATTR_NULL(&vattr); 2851 vattr.va_flags = flags; 2852 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2853 #ifdef MAC 2854 error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags); 2855 if (error == 0) 2856 #endif 2857 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 2858 VOP_UNLOCK(vp); 2859 vn_finished_write(mp); 2860 return (error); 2861 } 2862 2863 /* 2864 * Change flags of a file given a path name. 2865 */ 2866 #ifndef _SYS_SYSPROTO_H_ 2867 struct chflags_args { 2868 const char *path; 2869 u_long flags; 2870 }; 2871 #endif 2872 int 2873 sys_chflags(struct thread *td, struct chflags_args *uap) 2874 { 2875 2876 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2877 uap->flags, 0)); 2878 } 2879 2880 #ifndef _SYS_SYSPROTO_H_ 2881 struct chflagsat_args { 2882 int fd; 2883 const char *path; 2884 u_long flags; 2885 int atflag; 2886 } 2887 #endif 2888 int 2889 sys_chflagsat(struct thread *td, struct chflagsat_args *uap) 2890 { 2891 2892 return (kern_chflagsat(td, uap->fd, uap->path, UIO_USERSPACE, 2893 uap->flags, uap->atflag)); 2894 } 2895 2896 /* 2897 * Same as chflags() but doesn't follow symlinks. 2898 */ 2899 #ifndef _SYS_SYSPROTO_H_ 2900 struct lchflags_args { 2901 const char *path; 2902 u_long flags; 2903 }; 2904 #endif 2905 int 2906 sys_lchflags(struct thread *td, struct lchflags_args *uap) 2907 { 2908 2909 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2910 uap->flags, AT_SYMLINK_NOFOLLOW)); 2911 } 2912 2913 static int 2914 kern_chflagsat(struct thread *td, int fd, const char *path, 2915 enum uio_seg pathseg, u_long flags, int atflag) 2916 { 2917 struct nameidata nd; 2918 int error; 2919 2920 if ((atflag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | 2921 AT_EMPTY_PATH)) != 0) 2922 return (EINVAL); 2923 2924 AUDIT_ARG_FFLAGS(flags); 2925 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(atflag, AT_SYMLINK_NOFOLLOW | 2926 AT_RESOLVE_BENEATH | AT_EMPTY_PATH) | AUDITVNODE1, pathseg, path, 2927 fd, &cap_fchflags_rights); 2928 if ((error = namei(&nd)) != 0) 2929 return (error); 2930 NDFREE_PNBUF(&nd); 2931 error = setfflags(td, nd.ni_vp, flags); 2932 vrele(nd.ni_vp); 2933 return (error); 2934 } 2935 2936 /* 2937 * Change flags of a file given a file descriptor. 2938 */ 2939 #ifndef _SYS_SYSPROTO_H_ 2940 struct fchflags_args { 2941 int fd; 2942 u_long flags; 2943 }; 2944 #endif 2945 int 2946 sys_fchflags(struct thread *td, struct fchflags_args *uap) 2947 { 2948 struct file *fp; 2949 int error; 2950 2951 AUDIT_ARG_FD(uap->fd); 2952 AUDIT_ARG_FFLAGS(uap->flags); 2953 error = getvnode(td, uap->fd, &cap_fchflags_rights, 2954 &fp); 2955 if (error != 0) 2956 return (error); 2957 #ifdef AUDIT 2958 if (AUDITING_TD(td)) { 2959 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 2960 AUDIT_ARG_VNODE1(fp->f_vnode); 2961 VOP_UNLOCK(fp->f_vnode); 2962 } 2963 #endif 2964 error = setfflags(td, fp->f_vnode, uap->flags); 2965 fdrop(fp, td); 2966 return (error); 2967 } 2968 2969 /* 2970 * Common implementation code for chmod(), lchmod() and fchmod(). 2971 */ 2972 int 2973 setfmode(struct thread *td, struct ucred *cred, struct vnode *vp, int mode) 2974 { 2975 struct mount *mp; 2976 struct vattr vattr; 2977 int error; 2978 2979 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) 2980 return (error); 2981 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2982 VATTR_NULL(&vattr); 2983 vattr.va_mode = mode & ALLPERMS; 2984 #ifdef MAC 2985 error = mac_vnode_check_setmode(cred, vp, vattr.va_mode); 2986 if (error == 0) 2987 #endif 2988 error = VOP_SETATTR(vp, &vattr, cred); 2989 VOP_UNLOCK(vp); 2990 vn_finished_write(mp); 2991 return (error); 2992 } 2993 2994 /* 2995 * Change mode of a file given path name. 2996 */ 2997 #ifndef _SYS_SYSPROTO_H_ 2998 struct chmod_args { 2999 char *path; 3000 int mode; 3001 }; 3002 #endif 3003 int 3004 sys_chmod(struct thread *td, struct chmod_args *uap) 3005 { 3006 3007 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3008 uap->mode, 0)); 3009 } 3010 3011 #ifndef _SYS_SYSPROTO_H_ 3012 struct fchmodat_args { 3013 int dirfd; 3014 char *path; 3015 mode_t mode; 3016 int flag; 3017 } 3018 #endif 3019 int 3020 sys_fchmodat(struct thread *td, struct fchmodat_args *uap) 3021 { 3022 3023 return (kern_fchmodat(td, uap->fd, uap->path, UIO_USERSPACE, 3024 uap->mode, uap->flag)); 3025 } 3026 3027 /* 3028 * Change mode of a file given path name (don't follow links.) 3029 */ 3030 #ifndef _SYS_SYSPROTO_H_ 3031 struct lchmod_args { 3032 char *path; 3033 int mode; 3034 }; 3035 #endif 3036 int 3037 sys_lchmod(struct thread *td, struct lchmod_args *uap) 3038 { 3039 3040 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3041 uap->mode, AT_SYMLINK_NOFOLLOW)); 3042 } 3043 3044 int 3045 kern_fchmodat(struct thread *td, int fd, const char *path, 3046 enum uio_seg pathseg, mode_t mode, int flag) 3047 { 3048 struct nameidata nd; 3049 int error; 3050 3051 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | 3052 AT_EMPTY_PATH)) != 0) 3053 return (EINVAL); 3054 3055 AUDIT_ARG_MODE(mode); 3056 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_SYMLINK_NOFOLLOW | 3057 AT_RESOLVE_BENEATH | AT_EMPTY_PATH) | AUDITVNODE1, pathseg, path, 3058 fd, &cap_fchmod_rights); 3059 if ((error = namei(&nd)) != 0) 3060 return (error); 3061 NDFREE_PNBUF(&nd); 3062 error = setfmode(td, td->td_ucred, nd.ni_vp, mode); 3063 vrele(nd.ni_vp); 3064 return (error); 3065 } 3066 3067 /* 3068 * Change mode of a file given a file descriptor. 3069 */ 3070 #ifndef _SYS_SYSPROTO_H_ 3071 struct fchmod_args { 3072 int fd; 3073 int mode; 3074 }; 3075 #endif 3076 int 3077 sys_fchmod(struct thread *td, struct fchmod_args *uap) 3078 { 3079 struct file *fp; 3080 int error; 3081 3082 AUDIT_ARG_FD(uap->fd); 3083 AUDIT_ARG_MODE(uap->mode); 3084 3085 error = fget(td, uap->fd, &cap_fchmod_rights, &fp); 3086 if (error != 0) 3087 return (error); 3088 error = fo_chmod(fp, uap->mode, td->td_ucred, td); 3089 fdrop(fp, td); 3090 return (error); 3091 } 3092 3093 /* 3094 * Common implementation for chown(), lchown(), and fchown() 3095 */ 3096 int 3097 setfown(struct thread *td, struct ucred *cred, struct vnode *vp, uid_t uid, 3098 gid_t gid) 3099 { 3100 struct mount *mp; 3101 struct vattr vattr; 3102 int error; 3103 3104 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) 3105 return (error); 3106 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3107 VATTR_NULL(&vattr); 3108 vattr.va_uid = uid; 3109 vattr.va_gid = gid; 3110 #ifdef MAC 3111 error = mac_vnode_check_setowner(cred, vp, vattr.va_uid, 3112 vattr.va_gid); 3113 if (error == 0) 3114 #endif 3115 error = VOP_SETATTR(vp, &vattr, cred); 3116 VOP_UNLOCK(vp); 3117 vn_finished_write(mp); 3118 return (error); 3119 } 3120 3121 /* 3122 * Set ownership given a path name. 3123 */ 3124 #ifndef _SYS_SYSPROTO_H_ 3125 struct chown_args { 3126 char *path; 3127 int uid; 3128 int gid; 3129 }; 3130 #endif 3131 int 3132 sys_chown(struct thread *td, struct chown_args *uap) 3133 { 3134 3135 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->uid, 3136 uap->gid, 0)); 3137 } 3138 3139 #ifndef _SYS_SYSPROTO_H_ 3140 struct fchownat_args { 3141 int fd; 3142 const char * path; 3143 uid_t uid; 3144 gid_t gid; 3145 int flag; 3146 }; 3147 #endif 3148 int 3149 sys_fchownat(struct thread *td, struct fchownat_args *uap) 3150 { 3151 3152 return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid, 3153 uap->gid, uap->flag)); 3154 } 3155 3156 int 3157 kern_fchownat(struct thread *td, int fd, const char *path, 3158 enum uio_seg pathseg, int uid, int gid, int flag) 3159 { 3160 struct nameidata nd; 3161 int error; 3162 3163 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | 3164 AT_EMPTY_PATH)) != 0) 3165 return (EINVAL); 3166 3167 AUDIT_ARG_OWNER(uid, gid); 3168 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_SYMLINK_NOFOLLOW | 3169 AT_RESOLVE_BENEATH | AT_EMPTY_PATH) | AUDITVNODE1, pathseg, path, 3170 fd, &cap_fchown_rights); 3171 3172 if ((error = namei(&nd)) != 0) 3173 return (error); 3174 NDFREE_PNBUF(&nd); 3175 error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid); 3176 vrele(nd.ni_vp); 3177 return (error); 3178 } 3179 3180 /* 3181 * Set ownership given a path name, do not cross symlinks. 3182 */ 3183 #ifndef _SYS_SYSPROTO_H_ 3184 struct lchown_args { 3185 char *path; 3186 int uid; 3187 int gid; 3188 }; 3189 #endif 3190 int 3191 sys_lchown(struct thread *td, struct lchown_args *uap) 3192 { 3193 3194 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3195 uap->uid, uap->gid, AT_SYMLINK_NOFOLLOW)); 3196 } 3197 3198 /* 3199 * Set ownership given a file descriptor. 3200 */ 3201 #ifndef _SYS_SYSPROTO_H_ 3202 struct fchown_args { 3203 int fd; 3204 int uid; 3205 int gid; 3206 }; 3207 #endif 3208 int 3209 sys_fchown(struct thread *td, struct fchown_args *uap) 3210 { 3211 struct file *fp; 3212 int error; 3213 3214 AUDIT_ARG_FD(uap->fd); 3215 AUDIT_ARG_OWNER(uap->uid, uap->gid); 3216 error = fget(td, uap->fd, &cap_fchown_rights, &fp); 3217 if (error != 0) 3218 return (error); 3219 error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td); 3220 fdrop(fp, td); 3221 return (error); 3222 } 3223 3224 /* 3225 * Common implementation code for utimes(), lutimes(), and futimes(). 3226 */ 3227 static int 3228 getutimes(const struct timeval *usrtvp, enum uio_seg tvpseg, 3229 struct timespec *tsp) 3230 { 3231 struct timeval tv[2]; 3232 const struct timeval *tvp; 3233 int error; 3234 3235 if (usrtvp == NULL) { 3236 vfs_timestamp(&tsp[0]); 3237 tsp[1] = tsp[0]; 3238 } else { 3239 if (tvpseg == UIO_SYSSPACE) { 3240 tvp = usrtvp; 3241 } else { 3242 if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0) 3243 return (error); 3244 tvp = tv; 3245 } 3246 3247 if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 || 3248 tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000) 3249 return (EINVAL); 3250 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 3251 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 3252 } 3253 return (0); 3254 } 3255 3256 /* 3257 * Common implementation code for futimens(), utimensat(). 3258 */ 3259 #define UTIMENS_NULL 0x1 3260 #define UTIMENS_EXIT 0x2 3261 static int 3262 getutimens(const struct timespec *usrtsp, enum uio_seg tspseg, 3263 struct timespec *tsp, int *retflags) 3264 { 3265 struct timespec tsnow; 3266 int error; 3267 3268 vfs_timestamp(&tsnow); 3269 *retflags = 0; 3270 if (usrtsp == NULL) { 3271 tsp[0] = tsnow; 3272 tsp[1] = tsnow; 3273 *retflags |= UTIMENS_NULL; 3274 return (0); 3275 } 3276 if (tspseg == UIO_SYSSPACE) { 3277 tsp[0] = usrtsp[0]; 3278 tsp[1] = usrtsp[1]; 3279 } else if ((error = copyin(usrtsp, tsp, sizeof(*tsp) * 2)) != 0) 3280 return (error); 3281 if (tsp[0].tv_nsec == UTIME_OMIT && tsp[1].tv_nsec == UTIME_OMIT) 3282 *retflags |= UTIMENS_EXIT; 3283 if (tsp[0].tv_nsec == UTIME_NOW && tsp[1].tv_nsec == UTIME_NOW) 3284 *retflags |= UTIMENS_NULL; 3285 if (tsp[0].tv_nsec == UTIME_OMIT) 3286 tsp[0].tv_sec = VNOVAL; 3287 else if (tsp[0].tv_nsec == UTIME_NOW) 3288 tsp[0] = tsnow; 3289 else if (tsp[0].tv_nsec < 0 || tsp[0].tv_nsec >= 1000000000L) 3290 return (EINVAL); 3291 if (tsp[1].tv_nsec == UTIME_OMIT) 3292 tsp[1].tv_sec = VNOVAL; 3293 else if (tsp[1].tv_nsec == UTIME_NOW) 3294 tsp[1] = tsnow; 3295 else if (tsp[1].tv_nsec < 0 || tsp[1].tv_nsec >= 1000000000L) 3296 return (EINVAL); 3297 3298 return (0); 3299 } 3300 3301 /* 3302 * Common implementation code for utimes(), lutimes(), futimes(), futimens(), 3303 * and utimensat(). 3304 */ 3305 static int 3306 setutimes(struct thread *td, struct vnode *vp, const struct timespec *ts, 3307 int numtimes, int nullflag) 3308 { 3309 struct mount *mp; 3310 struct vattr vattr; 3311 int error; 3312 bool setbirthtime; 3313 3314 setbirthtime = false; 3315 vattr.va_birthtime.tv_sec = VNOVAL; 3316 vattr.va_birthtime.tv_nsec = 0; 3317 3318 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) 3319 return (error); 3320 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3321 if (numtimes < 3 && VOP_GETATTR(vp, &vattr, td->td_ucred) == 0 && 3322 timespeccmp(&ts[1], &vattr.va_birthtime, < )) 3323 setbirthtime = true; 3324 VATTR_NULL(&vattr); 3325 vattr.va_atime = ts[0]; 3326 vattr.va_mtime = ts[1]; 3327 if (setbirthtime) 3328 vattr.va_birthtime = ts[1]; 3329 if (numtimes > 2) 3330 vattr.va_birthtime = ts[2]; 3331 if (nullflag) 3332 vattr.va_vaflags |= VA_UTIMES_NULL; 3333 #ifdef MAC 3334 error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime, 3335 vattr.va_mtime); 3336 #endif 3337 if (error == 0) 3338 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3339 VOP_UNLOCK(vp); 3340 vn_finished_write(mp); 3341 return (error); 3342 } 3343 3344 /* 3345 * Set the access and modification times of a file. 3346 */ 3347 #ifndef _SYS_SYSPROTO_H_ 3348 struct utimes_args { 3349 char *path; 3350 struct timeval *tptr; 3351 }; 3352 #endif 3353 int 3354 sys_utimes(struct thread *td, struct utimes_args *uap) 3355 { 3356 3357 return (kern_utimesat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3358 uap->tptr, UIO_USERSPACE)); 3359 } 3360 3361 #ifndef _SYS_SYSPROTO_H_ 3362 struct futimesat_args { 3363 int fd; 3364 const char * path; 3365 const struct timeval * times; 3366 }; 3367 #endif 3368 int 3369 sys_futimesat(struct thread *td, struct futimesat_args *uap) 3370 { 3371 3372 return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE, 3373 uap->times, UIO_USERSPACE)); 3374 } 3375 3376 int 3377 kern_utimesat(struct thread *td, int fd, const char *path, 3378 enum uio_seg pathseg, const struct timeval *tptr, enum uio_seg tptrseg) 3379 { 3380 struct nameidata nd; 3381 struct timespec ts[2]; 3382 int error; 3383 3384 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3385 return (error); 3386 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 3387 &cap_futimes_rights); 3388 3389 if ((error = namei(&nd)) != 0) 3390 return (error); 3391 NDFREE_PNBUF(&nd); 3392 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3393 vrele(nd.ni_vp); 3394 return (error); 3395 } 3396 3397 /* 3398 * Set the access and modification times of a file. 3399 */ 3400 #ifndef _SYS_SYSPROTO_H_ 3401 struct lutimes_args { 3402 char *path; 3403 struct timeval *tptr; 3404 }; 3405 #endif 3406 int 3407 sys_lutimes(struct thread *td, struct lutimes_args *uap) 3408 { 3409 3410 return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr, 3411 UIO_USERSPACE)); 3412 } 3413 3414 int 3415 kern_lutimes(struct thread *td, const char *path, enum uio_seg pathseg, 3416 const struct timeval *tptr, enum uio_seg tptrseg) 3417 { 3418 struct timespec ts[2]; 3419 struct nameidata nd; 3420 int error; 3421 3422 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3423 return (error); 3424 NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, pathseg, path); 3425 if ((error = namei(&nd)) != 0) 3426 return (error); 3427 NDFREE_PNBUF(&nd); 3428 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3429 vrele(nd.ni_vp); 3430 return (error); 3431 } 3432 3433 /* 3434 * Set the access and modification times of a file. 3435 */ 3436 #ifndef _SYS_SYSPROTO_H_ 3437 struct futimes_args { 3438 int fd; 3439 struct timeval *tptr; 3440 }; 3441 #endif 3442 int 3443 sys_futimes(struct thread *td, struct futimes_args *uap) 3444 { 3445 3446 return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE)); 3447 } 3448 3449 int 3450 kern_futimes(struct thread *td, int fd, const struct timeval *tptr, 3451 enum uio_seg tptrseg) 3452 { 3453 struct timespec ts[2]; 3454 struct file *fp; 3455 int error; 3456 3457 AUDIT_ARG_FD(fd); 3458 error = getutimes(tptr, tptrseg, ts); 3459 if (error != 0) 3460 return (error); 3461 error = getvnode(td, fd, &cap_futimes_rights, &fp); 3462 if (error != 0) 3463 return (error); 3464 #ifdef AUDIT 3465 if (AUDITING_TD(td)) { 3466 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3467 AUDIT_ARG_VNODE1(fp->f_vnode); 3468 VOP_UNLOCK(fp->f_vnode); 3469 } 3470 #endif 3471 error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL); 3472 fdrop(fp, td); 3473 return (error); 3474 } 3475 3476 int 3477 sys_futimens(struct thread *td, struct futimens_args *uap) 3478 { 3479 3480 return (kern_futimens(td, uap->fd, uap->times, UIO_USERSPACE)); 3481 } 3482 3483 int 3484 kern_futimens(struct thread *td, int fd, const struct timespec *tptr, 3485 enum uio_seg tptrseg) 3486 { 3487 struct timespec ts[2]; 3488 struct file *fp; 3489 int error, flags; 3490 3491 AUDIT_ARG_FD(fd); 3492 error = getutimens(tptr, tptrseg, ts, &flags); 3493 if (error != 0) 3494 return (error); 3495 if (flags & UTIMENS_EXIT) 3496 return (0); 3497 error = getvnode(td, fd, &cap_futimes_rights, &fp); 3498 if (error != 0) 3499 return (error); 3500 #ifdef AUDIT 3501 if (AUDITING_TD(td)) { 3502 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3503 AUDIT_ARG_VNODE1(fp->f_vnode); 3504 VOP_UNLOCK(fp->f_vnode); 3505 } 3506 #endif 3507 error = setutimes(td, fp->f_vnode, ts, 2, flags & UTIMENS_NULL); 3508 fdrop(fp, td); 3509 return (error); 3510 } 3511 3512 int 3513 sys_utimensat(struct thread *td, struct utimensat_args *uap) 3514 { 3515 3516 return (kern_utimensat(td, uap->fd, uap->path, UIO_USERSPACE, 3517 uap->times, UIO_USERSPACE, uap->flag)); 3518 } 3519 3520 int 3521 kern_utimensat(struct thread *td, int fd, const char *path, 3522 enum uio_seg pathseg, const struct timespec *tptr, enum uio_seg tptrseg, 3523 int flag) 3524 { 3525 struct nameidata nd; 3526 struct timespec ts[2]; 3527 int error, flags; 3528 3529 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | 3530 AT_EMPTY_PATH)) != 0) 3531 return (EINVAL); 3532 3533 if ((error = getutimens(tptr, tptrseg, ts, &flags)) != 0) 3534 return (error); 3535 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_SYMLINK_NOFOLLOW | 3536 AT_RESOLVE_BENEATH | AT_EMPTY_PATH) | AUDITVNODE1, 3537 pathseg, path, fd, &cap_futimes_rights); 3538 if ((error = namei(&nd)) != 0) 3539 return (error); 3540 /* 3541 * We are allowed to call namei() regardless of 2xUTIME_OMIT. 3542 * POSIX states: 3543 * "If both tv_nsec fields are UTIME_OMIT... EACCESS may be detected." 3544 * "Search permission is denied by a component of the path prefix." 3545 */ 3546 NDFREE_PNBUF(&nd); 3547 if ((flags & UTIMENS_EXIT) == 0) 3548 error = setutimes(td, nd.ni_vp, ts, 2, flags & UTIMENS_NULL); 3549 vrele(nd.ni_vp); 3550 return (error); 3551 } 3552 3553 /* 3554 * Truncate a file given its path name. 3555 */ 3556 #ifndef _SYS_SYSPROTO_H_ 3557 struct truncate_args { 3558 char *path; 3559 int pad; 3560 off_t length; 3561 }; 3562 #endif 3563 int 3564 sys_truncate(struct thread *td, struct truncate_args *uap) 3565 { 3566 3567 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3568 } 3569 3570 int 3571 kern_truncate(struct thread *td, const char *path, enum uio_seg pathseg, 3572 off_t length) 3573 { 3574 struct mount *mp; 3575 struct vnode *vp; 3576 void *rl_cookie; 3577 struct nameidata nd; 3578 int error; 3579 3580 if (length < 0) 3581 return (EINVAL); 3582 NDPREINIT(&nd); 3583 retry: 3584 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path); 3585 if ((error = namei(&nd)) != 0) 3586 return (error); 3587 vp = nd.ni_vp; 3588 NDFREE_PNBUF(&nd); 3589 rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); 3590 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) { 3591 vn_rangelock_unlock(vp, rl_cookie); 3592 vrele(vp); 3593 return (error); 3594 } 3595 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3596 if (vp->v_type == VDIR) { 3597 error = EISDIR; 3598 goto out; 3599 } 3600 #ifdef MAC 3601 error = mac_vnode_check_write(td->td_ucred, NOCRED, vp); 3602 if (error != 0) 3603 goto out; 3604 #endif 3605 error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td); 3606 if (error != 0) 3607 goto out; 3608 3609 error = vn_truncate_locked(vp, length, false, td->td_ucred); 3610 out: 3611 VOP_UNLOCK(vp); 3612 vn_finished_write(mp); 3613 vn_rangelock_unlock(vp, rl_cookie); 3614 vrele(vp); 3615 if (error == ERELOOKUP) 3616 goto retry; 3617 return (error); 3618 } 3619 3620 #if defined(COMPAT_43) 3621 /* 3622 * Truncate a file given its path name. 3623 */ 3624 #ifndef _SYS_SYSPROTO_H_ 3625 struct otruncate_args { 3626 char *path; 3627 long length; 3628 }; 3629 #endif 3630 int 3631 otruncate(struct thread *td, struct otruncate_args *uap) 3632 { 3633 3634 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3635 } 3636 #endif /* COMPAT_43 */ 3637 3638 #if defined(COMPAT_FREEBSD6) 3639 /* Versions with the pad argument */ 3640 int 3641 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap) 3642 { 3643 3644 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3645 } 3646 3647 int 3648 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap) 3649 { 3650 3651 return (kern_ftruncate(td, uap->fd, uap->length)); 3652 } 3653 #endif 3654 3655 int 3656 kern_fsync(struct thread *td, int fd, bool fullsync) 3657 { 3658 struct vnode *vp; 3659 struct mount *mp; 3660 struct file *fp; 3661 int error; 3662 3663 AUDIT_ARG_FD(fd); 3664 error = getvnode(td, fd, &cap_fsync_rights, &fp); 3665 if (error != 0) 3666 return (error); 3667 vp = fp->f_vnode; 3668 #if 0 3669 if (!fullsync) 3670 /* XXXKIB: compete outstanding aio writes */; 3671 #endif 3672 retry: 3673 error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH); 3674 if (error != 0) 3675 goto drop; 3676 vn_lock(vp, vn_lktype_write(mp, vp) | LK_RETRY); 3677 AUDIT_ARG_VNODE1(vp); 3678 vnode_pager_clean_async(vp); 3679 error = fullsync ? VOP_FSYNC(vp, MNT_WAIT, td) : VOP_FDATASYNC(vp, td); 3680 VOP_UNLOCK(vp); 3681 vn_finished_write(mp); 3682 if (error == ERELOOKUP) 3683 goto retry; 3684 drop: 3685 fdrop(fp, td); 3686 return (error); 3687 } 3688 3689 /* 3690 * Sync an open file. 3691 */ 3692 #ifndef _SYS_SYSPROTO_H_ 3693 struct fsync_args { 3694 int fd; 3695 }; 3696 #endif 3697 int 3698 sys_fsync(struct thread *td, struct fsync_args *uap) 3699 { 3700 3701 return (kern_fsync(td, uap->fd, true)); 3702 } 3703 3704 int 3705 sys_fdatasync(struct thread *td, struct fdatasync_args *uap) 3706 { 3707 3708 return (kern_fsync(td, uap->fd, false)); 3709 } 3710 3711 /* 3712 * Rename files. Source and destination must either both be directories, or 3713 * both not be directories. If target is a directory, it must be empty. 3714 */ 3715 #ifndef _SYS_SYSPROTO_H_ 3716 struct rename_args { 3717 char *from; 3718 char *to; 3719 }; 3720 #endif 3721 int 3722 sys_rename(struct thread *td, struct rename_args *uap) 3723 { 3724 3725 return (kern_renameat(td, AT_FDCWD, uap->from, AT_FDCWD, 3726 uap->to, UIO_USERSPACE, 0)); 3727 } 3728 3729 #ifndef _SYS_SYSPROTO_H_ 3730 struct renameat_args { 3731 int oldfd; 3732 char *old; 3733 int newfd; 3734 char *new; 3735 }; 3736 #endif 3737 int 3738 sys_renameat(struct thread *td, struct renameat_args *uap) 3739 { 3740 3741 return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new, 3742 UIO_USERSPACE, 0)); 3743 } 3744 3745 int 3746 sys_renameat2(struct thread *td, struct renameat2_args *uap) 3747 { 3748 3749 return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new, 3750 UIO_USERSPACE, uap->flags)); 3751 } 3752 3753 #ifdef MAC 3754 static int 3755 kern_renameat_mac(struct thread *td, int oldfd, const char *old, int newfd, 3756 const char *new, enum uio_seg pathseg, struct nameidata *fromnd) 3757 { 3758 int error; 3759 3760 NDINIT_ATRIGHTS(fromnd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 3761 pathseg, old, oldfd, &cap_renameat_source_rights); 3762 if ((error = namei(fromnd)) != 0) 3763 return (error); 3764 error = mac_vnode_check_rename_from(td->td_ucred, fromnd->ni_dvp, 3765 fromnd->ni_vp, &fromnd->ni_cnd); 3766 VOP_UNLOCK(fromnd->ni_dvp); 3767 if (fromnd->ni_dvp != fromnd->ni_vp) 3768 VOP_UNLOCK(fromnd->ni_vp); 3769 if (error != 0) { 3770 NDFREE_PNBUF(fromnd); 3771 vrele(fromnd->ni_dvp); 3772 vrele(fromnd->ni_vp); 3773 } 3774 return (error); 3775 } 3776 #endif 3777 3778 int 3779 kern_renameat(struct thread *td, int oldfd, const char *old, int newfd, 3780 const char *new, enum uio_seg pathseg, u_int flags) 3781 { 3782 struct mount *mp, *tmp; 3783 struct vnode *tvp, *fvp, *tdvp; 3784 struct nameidata fromnd, tond; 3785 uint64_t tondflags; 3786 int error; 3787 short irflag; 3788 3789 if ((flags & ~(AT_RENAME_NOREPLACE)) != 0) 3790 return (EINVAL); 3791 again: 3792 tmp = mp = NULL; 3793 bwillwrite(); 3794 #ifdef MAC 3795 if (mac_vnode_check_rename_from_enabled()) { 3796 error = kern_renameat_mac(td, oldfd, old, newfd, new, pathseg, 3797 &fromnd); 3798 if (error != 0) 3799 return (error); 3800 } else { 3801 #endif 3802 NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | AUDITVNODE1, 3803 pathseg, old, oldfd, &cap_renameat_source_rights); 3804 if ((error = namei(&fromnd)) != 0) 3805 return (error); 3806 #ifdef MAC 3807 } 3808 #endif 3809 fvp = fromnd.ni_vp; 3810 tondflags = LOCKPARENT | LOCKLEAF | NOCACHE | AUDITVNODE2; 3811 if (fromnd.ni_vp->v_type == VDIR) 3812 tondflags |= WILLBEDIR; 3813 NDINIT_ATRIGHTS(&tond, RENAME, tondflags, pathseg, new, newfd, 3814 &cap_renameat_target_rights); 3815 if ((error = namei(&tond)) != 0) { 3816 /* Translate error code for rename("dir1", "dir2/."). */ 3817 if (error == EISDIR && fvp->v_type == VDIR) 3818 error = EINVAL; 3819 NDFREE_PNBUF(&fromnd); 3820 vrele(fromnd.ni_dvp); 3821 vrele(fvp); 3822 goto out1; 3823 } 3824 tdvp = tond.ni_dvp; 3825 tvp = tond.ni_vp; 3826 if (tvp != NULL && (flags & AT_RENAME_NOREPLACE) != 0) { 3827 /* 3828 * Often filesystems need to relock the vnodes in 3829 * VOP_RENAME(), which opens a window for invalidation 3830 * of this check. Then, not all filesystems might 3831 * implement AT_RENAME_NOREPLACE. This leads to 3832 * situation where sometimes EOPNOTSUPP might be 3833 * returned from the VOP due to race, while most of 3834 * the time this check works. 3835 */ 3836 error = EEXIST; 3837 goto out; 3838 } 3839 error = vn_start_write(fvp, &mp, V_NOWAIT); 3840 if (error != 0) { 3841 again1: 3842 NDFREE_PNBUF(&fromnd); 3843 NDFREE_PNBUF(&tond); 3844 if (tvp != NULL) 3845 vput(tvp); 3846 if (tdvp == tvp) 3847 vrele(tdvp); 3848 else 3849 vput(tdvp); 3850 vrele(fromnd.ni_dvp); 3851 vrele(fvp); 3852 if (tmp != NULL) { 3853 lockmgr(&tmp->mnt_renamelock, LK_EXCLUSIVE, NULL); 3854 lockmgr(&tmp->mnt_renamelock, LK_RELEASE, NULL); 3855 vfs_rel(tmp); 3856 tmp = NULL; 3857 } 3858 error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH); 3859 if (error != 0) 3860 return (error); 3861 goto again; 3862 } 3863 error = VOP_GETWRITEMOUNT(tdvp, &tmp); 3864 if (error != 0 || tmp == NULL) 3865 goto again1; 3866 error = lockmgr(&tmp->mnt_renamelock, LK_EXCLUSIVE | LK_NOWAIT, NULL); 3867 if (error != 0) { 3868 vn_finished_write(mp); 3869 goto again1; 3870 } 3871 irflag = vn_irflag_read(fvp); 3872 if (((irflag & VIRF_NAMEDATTR) != 0 && tdvp != fromnd.ni_dvp) || 3873 (irflag & VIRF_NAMEDDIR) != 0) { 3874 error = EINVAL; 3875 goto out; 3876 } 3877 if (tvp != NULL) { 3878 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3879 error = ENOTDIR; 3880 goto out; 3881 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3882 error = EISDIR; 3883 goto out; 3884 } 3885 #ifdef CAPABILITIES 3886 if (newfd != AT_FDCWD && (tond.ni_resflags & NIRES_ABS) == 0) { 3887 /* 3888 * If the target already exists we require CAP_UNLINKAT 3889 * from 'newfd', when newfd was used for the lookup. 3890 */ 3891 error = cap_check(&tond.ni_filecaps.fc_rights, 3892 &cap_unlinkat_rights); 3893 if (error != 0) 3894 goto out; 3895 } 3896 #endif 3897 } 3898 if (fvp == tdvp) { 3899 error = EINVAL; 3900 goto out; 3901 } 3902 /* 3903 * If the source is the same as the destination (that is, if they 3904 * are links to the same vnode), then there is nothing to do. 3905 */ 3906 if (fvp == tvp) 3907 error = ERESTART; 3908 #ifdef MAC 3909 else 3910 error = mac_vnode_check_rename_to(td->td_ucred, tdvp, 3911 tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd); 3912 #endif 3913 out: 3914 if (error == 0) { 3915 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3916 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd, flags); 3917 NDFREE_PNBUF(&fromnd); 3918 NDFREE_PNBUF(&tond); 3919 } else { 3920 NDFREE_PNBUF(&fromnd); 3921 NDFREE_PNBUF(&tond); 3922 if (tvp != NULL) 3923 vput(tvp); 3924 if (tdvp == tvp) 3925 vrele(tdvp); 3926 else 3927 vput(tdvp); 3928 vrele(fromnd.ni_dvp); 3929 vrele(fvp); 3930 } 3931 if (tmp != NULL) { 3932 lockmgr(&tmp->mnt_renamelock, LK_RELEASE, 0); 3933 vfs_rel(tmp); 3934 } 3935 if (mp != NULL) 3936 vn_finished_write(mp); 3937 out1: 3938 if (error == ERESTART) 3939 return (0); 3940 if (error == ERELOOKUP) 3941 goto again; 3942 return (error); 3943 } 3944 3945 /* 3946 * Make a directory file. 3947 */ 3948 #ifndef _SYS_SYSPROTO_H_ 3949 struct mkdir_args { 3950 char *path; 3951 int mode; 3952 }; 3953 #endif 3954 int 3955 sys_mkdir(struct thread *td, struct mkdir_args *uap) 3956 { 3957 3958 return (kern_mkdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3959 uap->mode)); 3960 } 3961 3962 #ifndef _SYS_SYSPROTO_H_ 3963 struct mkdirat_args { 3964 int fd; 3965 char *path; 3966 mode_t mode; 3967 }; 3968 #endif 3969 int 3970 sys_mkdirat(struct thread *td, struct mkdirat_args *uap) 3971 { 3972 3973 return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode)); 3974 } 3975 3976 int 3977 kern_mkdirat(struct thread *td, int fd, const char *path, enum uio_seg segflg, 3978 int mode) 3979 { 3980 struct mount *mp; 3981 struct vattr vattr; 3982 struct nameidata nd; 3983 int error; 3984 3985 AUDIT_ARG_MODE(mode); 3986 NDPREINIT(&nd); 3987 restart: 3988 bwillwrite(); 3989 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | AUDITVNODE1 | 3990 NC_NOMAKEENTRY | NC_KEEPPOSENTRY | FAILIFEXISTS | WILLBEDIR, 3991 segflg, path, fd, &cap_mkdirat_rights); 3992 if ((error = namei(&nd)) != 0) 3993 return (error); 3994 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3995 NDFREE_PNBUF(&nd); 3996 vput(nd.ni_dvp); 3997 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH)) != 0) 3998 return (error); 3999 goto restart; 4000 } 4001 if ((vn_irflag_read(nd.ni_dvp) & VIRF_NAMEDDIR) != 0) { 4002 error = EINVAL; 4003 goto out; 4004 } 4005 VATTR_NULL(&vattr); 4006 vattr.va_type = VDIR; 4007 vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_pd->pd_cmask; 4008 #ifdef MAC 4009 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 4010 &vattr); 4011 if (error != 0) 4012 goto out; 4013 #endif 4014 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 4015 out: 4016 NDFREE_PNBUF(&nd); 4017 VOP_VPUT_PAIR(nd.ni_dvp, error == 0 ? &nd.ni_vp : NULL, true); 4018 vn_finished_write(mp); 4019 if (error == ERELOOKUP) 4020 goto restart; 4021 return (error); 4022 } 4023 4024 /* 4025 * Remove a directory file. 4026 */ 4027 #ifndef _SYS_SYSPROTO_H_ 4028 struct rmdir_args { 4029 char *path; 4030 }; 4031 #endif 4032 int 4033 sys_rmdir(struct thread *td, struct rmdir_args *uap) 4034 { 4035 4036 return (kern_frmdirat(td, AT_FDCWD, uap->path, FD_NONE, UIO_USERSPACE, 4037 0)); 4038 } 4039 4040 int 4041 kern_frmdirat(struct thread *td, int dfd, const char *path, int fd, 4042 enum uio_seg pathseg, int flag) 4043 { 4044 struct mount *mp; 4045 struct vnode *vp; 4046 struct file *fp; 4047 struct nameidata nd; 4048 cap_rights_t rights; 4049 int error; 4050 4051 fp = NULL; 4052 if (fd != FD_NONE) { 4053 error = getvnode(td, fd, cap_rights_init_one(&rights, 4054 CAP_LOOKUP), &fp); 4055 if (error != 0) 4056 return (error); 4057 } 4058 4059 NDPREINIT(&nd); 4060 restart: 4061 bwillwrite(); 4062 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1 | 4063 at2cnpflags(flag, AT_RESOLVE_BENEATH), 4064 pathseg, path, dfd, &cap_unlinkat_rights); 4065 if ((error = namei(&nd)) != 0) 4066 goto fdout; 4067 vp = nd.ni_vp; 4068 if (vp->v_type != VDIR) { 4069 error = ENOTDIR; 4070 goto out; 4071 } 4072 /* 4073 * No rmdir "." please. 4074 */ 4075 if (nd.ni_dvp == vp) { 4076 error = EINVAL; 4077 goto out; 4078 } 4079 /* 4080 * The root of a mounted filesystem cannot be deleted. 4081 */ 4082 if (vp->v_vflag & VV_ROOT) { 4083 error = EBUSY; 4084 goto out; 4085 } 4086 4087 if (fp != NULL && fp->f_vnode != vp) { 4088 if (VN_IS_DOOMED(fp->f_vnode)) 4089 error = EBADF; 4090 else 4091 error = EDEADLK; 4092 goto out; 4093 } 4094 4095 #ifdef MAC 4096 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 4097 &nd.ni_cnd); 4098 if (error != 0) 4099 goto out; 4100 #endif 4101 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 4102 NDFREE_PNBUF(&nd); 4103 vput(vp); 4104 if (nd.ni_dvp == vp) 4105 vrele(nd.ni_dvp); 4106 else 4107 vput(nd.ni_dvp); 4108 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH)) != 0) 4109 goto fdout; 4110 goto restart; 4111 } 4112 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 4113 vn_finished_write(mp); 4114 out: 4115 NDFREE_PNBUF(&nd); 4116 vput(vp); 4117 if (nd.ni_dvp == vp) 4118 vrele(nd.ni_dvp); 4119 else 4120 vput(nd.ni_dvp); 4121 if (error == ERELOOKUP) 4122 goto restart; 4123 fdout: 4124 if (fp != NULL) 4125 fdrop(fp, td); 4126 return (error); 4127 } 4128 4129 #if defined(COMPAT_43) || defined(COMPAT_FREEBSD11) 4130 int 4131 freebsd11_kern_getdirentries(struct thread *td, int fd, char *ubuf, u_int count, 4132 long *basep, void (*func)(struct freebsd11_dirent *)) 4133 { 4134 struct freebsd11_dirent dstdp; 4135 struct dirent *dp, *edp; 4136 char *dirbuf; 4137 off_t base; 4138 ssize_t resid, ucount; 4139 int error; 4140 4141 /* XXX arbitrary sanity limit on `count'. */ 4142 count = min(count, 64 * 1024); 4143 4144 dirbuf = malloc(count, M_TEMP, M_WAITOK); 4145 4146 error = kern_getdirentries(td, fd, dirbuf, count, &base, &resid, 4147 UIO_SYSSPACE); 4148 if (error != 0) 4149 goto done; 4150 if (basep != NULL) 4151 *basep = base; 4152 4153 ucount = 0; 4154 for (dp = (struct dirent *)dirbuf, 4155 edp = (struct dirent *)&dirbuf[count - resid]; 4156 ucount < count && dp < edp; ) { 4157 if (dp->d_reclen == 0) 4158 break; 4159 MPASS(dp->d_reclen >= _GENERIC_DIRLEN(0)); 4160 if (dp->d_namlen >= sizeof(dstdp.d_name)) 4161 continue; 4162 dstdp.d_type = dp->d_type; 4163 dstdp.d_namlen = dp->d_namlen; 4164 dstdp.d_fileno = dp->d_fileno; /* truncate */ 4165 if (dstdp.d_fileno != dp->d_fileno) { 4166 switch (ino64_trunc_error) { 4167 default: 4168 case 0: 4169 break; 4170 case 1: 4171 error = EOVERFLOW; 4172 goto done; 4173 case 2: 4174 dstdp.d_fileno = UINT32_MAX; 4175 break; 4176 } 4177 } 4178 dstdp.d_reclen = sizeof(dstdp) - sizeof(dstdp.d_name) + 4179 ((dp->d_namlen + 1 + 3) &~ 3); 4180 bcopy(dp->d_name, dstdp.d_name, dstdp.d_namlen); 4181 bzero(dstdp.d_name + dstdp.d_namlen, 4182 dstdp.d_reclen - offsetof(struct freebsd11_dirent, d_name) - 4183 dstdp.d_namlen); 4184 MPASS(dstdp.d_reclen <= dp->d_reclen); 4185 MPASS(ucount + dstdp.d_reclen <= count); 4186 if (func != NULL) 4187 func(&dstdp); 4188 error = copyout(&dstdp, ubuf + ucount, dstdp.d_reclen); 4189 if (error != 0) 4190 break; 4191 dp = (struct dirent *)((char *)dp + dp->d_reclen); 4192 ucount += dstdp.d_reclen; 4193 } 4194 4195 done: 4196 free(dirbuf, M_TEMP); 4197 if (error == 0) 4198 td->td_retval[0] = ucount; 4199 return (error); 4200 } 4201 #endif /* COMPAT */ 4202 4203 #ifdef COMPAT_43 4204 static void 4205 ogetdirentries_cvt(struct freebsd11_dirent *dp) 4206 { 4207 #if (BYTE_ORDER == LITTLE_ENDIAN) 4208 /* 4209 * The expected low byte of dp->d_namlen is our dp->d_type. 4210 * The high MBZ byte of dp->d_namlen is our dp->d_namlen. 4211 */ 4212 dp->d_type = dp->d_namlen; 4213 dp->d_namlen = 0; 4214 #else 4215 /* 4216 * The dp->d_type is the high byte of the expected dp->d_namlen, 4217 * so must be zero'ed. 4218 */ 4219 dp->d_type = 0; 4220 #endif 4221 } 4222 4223 /* 4224 * Read a block of directory entries in a filesystem independent format. 4225 */ 4226 #ifndef _SYS_SYSPROTO_H_ 4227 struct ogetdirentries_args { 4228 int fd; 4229 char *buf; 4230 u_int count; 4231 long *basep; 4232 }; 4233 #endif 4234 int 4235 ogetdirentries(struct thread *td, struct ogetdirentries_args *uap) 4236 { 4237 long loff; 4238 int error; 4239 4240 error = kern_ogetdirentries(td, uap, &loff); 4241 if (error == 0) 4242 error = copyout(&loff, uap->basep, sizeof(long)); 4243 return (error); 4244 } 4245 4246 int 4247 kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap, 4248 long *ploff) 4249 { 4250 long base; 4251 int error; 4252 4253 /* XXX arbitrary sanity limit on `count'. */ 4254 if (uap->count > 64 * 1024) 4255 return (EINVAL); 4256 4257 error = freebsd11_kern_getdirentries(td, uap->fd, uap->buf, uap->count, 4258 &base, ogetdirentries_cvt); 4259 4260 if (error == 0 && uap->basep != NULL) 4261 error = copyout(&base, uap->basep, sizeof(long)); 4262 4263 return (error); 4264 } 4265 #endif /* COMPAT_43 */ 4266 4267 #if defined(COMPAT_FREEBSD11) 4268 #ifndef _SYS_SYSPROTO_H_ 4269 struct freebsd11_getdirentries_args { 4270 int fd; 4271 char *buf; 4272 u_int count; 4273 long *basep; 4274 }; 4275 #endif 4276 int 4277 freebsd11_getdirentries(struct thread *td, 4278 struct freebsd11_getdirentries_args *uap) 4279 { 4280 long base; 4281 int error; 4282 4283 error = freebsd11_kern_getdirentries(td, uap->fd, uap->buf, uap->count, 4284 &base, NULL); 4285 4286 if (error == 0 && uap->basep != NULL) 4287 error = copyout(&base, uap->basep, sizeof(long)); 4288 return (error); 4289 } 4290 4291 int 4292 freebsd11_getdents(struct thread *td, struct freebsd11_getdents_args *uap) 4293 { 4294 struct freebsd11_getdirentries_args ap; 4295 4296 ap.fd = uap->fd; 4297 ap.buf = uap->buf; 4298 ap.count = uap->count; 4299 ap.basep = NULL; 4300 return (freebsd11_getdirentries(td, &ap)); 4301 } 4302 #endif /* COMPAT_FREEBSD11 */ 4303 4304 /* 4305 * Read a block of directory entries in a filesystem independent format. 4306 */ 4307 int 4308 sys_getdirentries(struct thread *td, struct getdirentries_args *uap) 4309 { 4310 off_t base; 4311 int error; 4312 4313 error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base, 4314 NULL, UIO_USERSPACE); 4315 if (error != 0) 4316 return (error); 4317 if (uap->basep != NULL) 4318 error = copyout(&base, uap->basep, sizeof(off_t)); 4319 return (error); 4320 } 4321 4322 int 4323 kern_getdirentries(struct thread *td, int fd, char *buf, size_t count, 4324 off_t *basep, ssize_t *residp, enum uio_seg bufseg) 4325 { 4326 struct vnode *vp; 4327 struct file *fp; 4328 struct uio auio; 4329 struct iovec aiov; 4330 off_t loff; 4331 int error, eofflag; 4332 off_t foffset; 4333 4334 AUDIT_ARG_FD(fd); 4335 if (count > IOSIZE_MAX) 4336 return (EINVAL); 4337 auio.uio_resid = count; 4338 error = getvnode(td, fd, &cap_read_rights, &fp); 4339 if (error != 0) 4340 return (error); 4341 if ((fp->f_flag & FREAD) == 0) { 4342 fdrop(fp, td); 4343 return (EBADF); 4344 } 4345 vp = fp->f_vnode; 4346 foffset = foffset_lock(fp, 0); 4347 unionread: 4348 if (__predict_false((vp->v_vflag & VV_UNLINKED) != 0)) { 4349 error = ENOENT; 4350 goto fail; 4351 } 4352 aiov.iov_base = buf; 4353 aiov.iov_len = count; 4354 auio.uio_iov = &aiov; 4355 auio.uio_iovcnt = 1; 4356 auio.uio_rw = UIO_READ; 4357 auio.uio_segflg = bufseg; 4358 auio.uio_td = td; 4359 vn_lock(vp, LK_SHARED | LK_RETRY); 4360 /* 4361 * We want to return ENOTDIR for anything that is not VDIR, but 4362 * not for VBAD, and we can't check for VBAD while the vnode is 4363 * unlocked. 4364 */ 4365 if (vp->v_type != VDIR) { 4366 if (vp->v_type == VBAD) 4367 error = EBADF; 4368 else 4369 error = ENOTDIR; 4370 VOP_UNLOCK(vp); 4371 goto fail; 4372 } 4373 AUDIT_ARG_VNODE1(vp); 4374 loff = auio.uio_offset = foffset; 4375 #ifdef MAC 4376 error = mac_vnode_check_readdir(td->td_ucred, vp); 4377 if (error == 0) 4378 #endif 4379 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, 4380 NULL); 4381 foffset = auio.uio_offset; 4382 if (error != 0) { 4383 VOP_UNLOCK(vp); 4384 goto fail; 4385 } 4386 if (count == auio.uio_resid && 4387 (vp->v_vflag & VV_ROOT) && 4388 (vp->v_mount->mnt_flag & MNT_UNION)) { 4389 struct vnode *tvp = vp; 4390 4391 vp = vp->v_mount->mnt_vnodecovered; 4392 vref(vp); 4393 fp->f_vnode = vp; 4394 foffset = 0; 4395 vput(tvp); 4396 goto unionread; 4397 } 4398 VOP_UNLOCK(vp); 4399 *basep = loff; 4400 if (residp != NULL) 4401 *residp = auio.uio_resid; 4402 td->td_retval[0] = count - auio.uio_resid; 4403 fail: 4404 foffset_unlock(fp, foffset, 0); 4405 fdrop(fp, td); 4406 return (error); 4407 } 4408 4409 /* 4410 * Set the mode mask for creation of filesystem nodes. 4411 */ 4412 #ifndef _SYS_SYSPROTO_H_ 4413 struct umask_args { 4414 int newmask; 4415 }; 4416 #endif 4417 int 4418 sys_umask(struct thread *td, struct umask_args *uap) 4419 { 4420 struct pwddesc *pdp; 4421 4422 pdp = td->td_proc->p_pd; 4423 PWDDESC_XLOCK(pdp); 4424 td->td_retval[0] = pdp->pd_cmask; 4425 pdp->pd_cmask = uap->newmask & ALLPERMS; 4426 PWDDESC_XUNLOCK(pdp); 4427 return (0); 4428 } 4429 4430 /* 4431 * Void all references to file by ripping underlying filesystem away from 4432 * vnode. 4433 */ 4434 #ifndef _SYS_SYSPROTO_H_ 4435 struct revoke_args { 4436 char *path; 4437 }; 4438 #endif 4439 int 4440 sys_revoke(struct thread *td, struct revoke_args *uap) 4441 { 4442 struct vnode *vp; 4443 struct vattr vattr; 4444 struct nameidata nd; 4445 int error; 4446 4447 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4448 uap->path); 4449 if ((error = namei(&nd)) != 0) 4450 return (error); 4451 vp = nd.ni_vp; 4452 NDFREE_PNBUF(&nd); 4453 if (vp->v_type != VCHR || vp->v_rdev == NULL) { 4454 error = EINVAL; 4455 goto out; 4456 } 4457 #ifdef MAC 4458 error = mac_vnode_check_revoke(td->td_ucred, vp); 4459 if (error != 0) 4460 goto out; 4461 #endif 4462 error = VOP_GETATTR(vp, &vattr, td->td_ucred); 4463 if (error != 0) 4464 goto out; 4465 if (td->td_ucred->cr_uid != vattr.va_uid) { 4466 error = priv_check(td, PRIV_VFS_ADMIN); 4467 if (error != 0) 4468 goto out; 4469 } 4470 if (devfs_usecount(vp) > 0) 4471 VOP_REVOKE(vp, REVOKEALL); 4472 out: 4473 vput(vp); 4474 return (error); 4475 } 4476 4477 /* 4478 * This variant of getvnode() allows O_PATH files. Caller should 4479 * ensure that returned file and vnode are only used for compatible 4480 * semantics. 4481 */ 4482 int 4483 getvnode_path(struct thread *td, int fd, const cap_rights_t *rightsp, 4484 uint8_t *flagsp, struct file **fpp) 4485 { 4486 struct file *fp; 4487 int error; 4488 4489 error = fget_unlocked_flags(td, fd, rightsp, flagsp, &fp); 4490 if (error != 0) 4491 return (error); 4492 4493 /* 4494 * The file could be not of the vnode type, or it may be not 4495 * yet fully initialized, in which case the f_vnode pointer 4496 * may be set, but f_ops is still badfileops. E.g., 4497 * devfs_open() transiently create such situation to 4498 * facilitate csw d_fdopen(). 4499 * 4500 * Dupfdopen() handling in kern_openat() installs the 4501 * half-baked file into the process descriptor table, allowing 4502 * other thread to dereference it. Guard against the race by 4503 * checking f_ops. 4504 */ 4505 if (__predict_false(fp->f_vnode == NULL || fp->f_ops == &badfileops)) { 4506 fdrop(fp, td); 4507 *fpp = NULL; 4508 return (EINVAL); 4509 } 4510 4511 *fpp = fp; 4512 return (0); 4513 } 4514 4515 /* 4516 * Convert a user file descriptor to a kernel file entry and check 4517 * that, if it is a capability, the correct rights are present. 4518 * A reference on the file entry is held upon returning. 4519 */ 4520 int 4521 getvnode(struct thread *td, int fd, const cap_rights_t *rightsp, 4522 struct file **fpp) 4523 { 4524 int error; 4525 4526 error = getvnode_path(td, fd, rightsp, NULL, fpp); 4527 if (__predict_false(error != 0)) 4528 return (error); 4529 4530 /* 4531 * Filter out O_PATH file descriptors, most getvnode() callers 4532 * do not call fo_ methods. 4533 */ 4534 if (__predict_false((*fpp)->f_ops == &path_fileops)) { 4535 fdrop(*fpp, td); 4536 *fpp = NULL; 4537 error = EBADF; 4538 } 4539 4540 return (error); 4541 } 4542 4543 /* 4544 * Get an (NFS) file handle. 4545 */ 4546 #ifndef _SYS_SYSPROTO_H_ 4547 struct lgetfh_args { 4548 char *fname; 4549 fhandle_t *fhp; 4550 }; 4551 #endif 4552 int 4553 sys_lgetfh(struct thread *td, struct lgetfh_args *uap) 4554 { 4555 4556 return (kern_getfhat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->fname, 4557 UIO_USERSPACE, uap->fhp, UIO_USERSPACE)); 4558 } 4559 4560 #ifndef _SYS_SYSPROTO_H_ 4561 struct getfh_args { 4562 char *fname; 4563 fhandle_t *fhp; 4564 }; 4565 #endif 4566 int 4567 sys_getfh(struct thread *td, struct getfh_args *uap) 4568 { 4569 4570 return (kern_getfhat(td, 0, AT_FDCWD, uap->fname, UIO_USERSPACE, 4571 uap->fhp, UIO_USERSPACE)); 4572 } 4573 4574 /* 4575 * syscall for the rpc.lockd to use to translate an open descriptor into 4576 * a NFS file handle. 4577 * 4578 * warning: do not remove the priv_check() call or this becomes one giant 4579 * security hole. 4580 */ 4581 #ifndef _SYS_SYSPROTO_H_ 4582 struct getfhat_args { 4583 int fd; 4584 char *path; 4585 fhandle_t *fhp; 4586 int flags; 4587 }; 4588 #endif 4589 int 4590 sys_getfhat(struct thread *td, struct getfhat_args *uap) 4591 { 4592 4593 return (kern_getfhat(td, uap->flags, uap->fd, uap->path, UIO_USERSPACE, 4594 uap->fhp, UIO_USERSPACE)); 4595 } 4596 4597 int 4598 kern_getfhat(struct thread *td, int flags, int fd, const char *path, 4599 enum uio_seg pathseg, fhandle_t *fhp, enum uio_seg fhseg) 4600 { 4601 struct nameidata nd; 4602 fhandle_t fh; 4603 struct vnode *vp; 4604 int error; 4605 4606 if ((flags & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH)) != 0) 4607 return (EINVAL); 4608 error = priv_check(td, PRIV_VFS_GETFH); 4609 if (error != 0) 4610 return (error); 4611 NDINIT_AT(&nd, LOOKUP, at2cnpflags(flags, AT_SYMLINK_NOFOLLOW | 4612 AT_RESOLVE_BENEATH) | LOCKLEAF | AUDITVNODE1, pathseg, path, 4613 fd); 4614 error = namei(&nd); 4615 if (error != 0) 4616 return (error); 4617 NDFREE_PNBUF(&nd); 4618 vp = nd.ni_vp; 4619 bzero(&fh, sizeof(fh)); 4620 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4621 error = VOP_VPTOFH(vp, &fh.fh_fid); 4622 vput(vp); 4623 if (error == 0) { 4624 if (fhseg == UIO_USERSPACE) 4625 error = copyout(&fh, fhp, sizeof (fh)); 4626 else 4627 memcpy(fhp, &fh, sizeof(fh)); 4628 } 4629 return (error); 4630 } 4631 4632 #ifndef _SYS_SYSPROTO_H_ 4633 struct fhlink_args { 4634 fhandle_t *fhp; 4635 const char *to; 4636 }; 4637 #endif 4638 int 4639 sys_fhlink(struct thread *td, struct fhlink_args *uap) 4640 { 4641 4642 return (kern_fhlinkat(td, AT_FDCWD, uap->to, UIO_USERSPACE, uap->fhp)); 4643 } 4644 4645 #ifndef _SYS_SYSPROTO_H_ 4646 struct fhlinkat_args { 4647 fhandle_t *fhp; 4648 int tofd; 4649 const char *to; 4650 }; 4651 #endif 4652 int 4653 sys_fhlinkat(struct thread *td, struct fhlinkat_args *uap) 4654 { 4655 4656 return (kern_fhlinkat(td, uap->tofd, uap->to, UIO_USERSPACE, uap->fhp)); 4657 } 4658 4659 static int 4660 kern_fhlinkat(struct thread *td, int fd, const char *path, 4661 enum uio_seg pathseg, fhandle_t *fhp) 4662 { 4663 fhandle_t fh; 4664 struct mount *mp; 4665 struct vnode *vp; 4666 int error; 4667 4668 error = priv_check(td, PRIV_VFS_GETFH); 4669 if (error != 0) 4670 return (error); 4671 error = copyin(fhp, &fh, sizeof(fh)); 4672 if (error != 0) 4673 return (error); 4674 do { 4675 bwillwrite(); 4676 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4677 return (ESTALE); 4678 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_SHARED, &vp); 4679 vfs_unbusy(mp); 4680 if (error != 0) 4681 return (error); 4682 VOP_UNLOCK(vp); 4683 error = kern_linkat_vp(td, vp, fd, path, pathseg); 4684 } while (error == EAGAIN || error == ERELOOKUP); 4685 return (error); 4686 } 4687 4688 #ifndef _SYS_SYSPROTO_H_ 4689 struct fhreadlink_args { 4690 fhandle_t *fhp; 4691 char *buf; 4692 size_t bufsize; 4693 }; 4694 #endif 4695 int 4696 sys_fhreadlink(struct thread *td, struct fhreadlink_args *uap) 4697 { 4698 fhandle_t fh; 4699 struct mount *mp; 4700 struct vnode *vp; 4701 int error; 4702 4703 error = priv_check(td, PRIV_VFS_GETFH); 4704 if (error != 0) 4705 return (error); 4706 if (uap->bufsize > IOSIZE_MAX) 4707 return (EINVAL); 4708 error = copyin(uap->fhp, &fh, sizeof(fh)); 4709 if (error != 0) 4710 return (error); 4711 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4712 return (ESTALE); 4713 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_SHARED, &vp); 4714 vfs_unbusy(mp); 4715 if (error != 0) 4716 return (error); 4717 error = kern_readlink_vp(vp, uap->buf, UIO_USERSPACE, uap->bufsize, td); 4718 vput(vp); 4719 return (error); 4720 } 4721 4722 /* 4723 * syscall for the rpc.lockd to use to translate a NFS file handle into an 4724 * open descriptor. 4725 * 4726 * warning: do not remove the priv_check() call or this becomes one giant 4727 * security hole. 4728 */ 4729 #ifndef _SYS_SYSPROTO_H_ 4730 struct fhopen_args { 4731 const struct fhandle *u_fhp; 4732 int flags; 4733 }; 4734 #endif 4735 int 4736 sys_fhopen(struct thread *td, struct fhopen_args *uap) 4737 { 4738 return (kern_fhopen(td, uap->u_fhp, uap->flags)); 4739 } 4740 4741 int 4742 kern_fhopen(struct thread *td, const struct fhandle *u_fhp, int flags) 4743 { 4744 struct mount *mp; 4745 struct vnode *vp; 4746 struct fhandle fhp; 4747 struct file *fp; 4748 int error, indx; 4749 bool named_attr; 4750 4751 error = priv_check(td, PRIV_VFS_FHOPEN); 4752 if (error != 0) 4753 return (error); 4754 4755 indx = -1; 4756 if ((flags & O_CREAT) != 0) 4757 return (EINVAL); 4758 error = openflags(&flags); 4759 if (error != 0) 4760 return (error); 4761 error = copyin(u_fhp, &fhp, sizeof(fhp)); 4762 if (error != 0) 4763 return (error); 4764 /* find the mount point */ 4765 mp = vfs_busyfs(&fhp.fh_fsid); 4766 if (mp == NULL) 4767 return (ESTALE); 4768 /* now give me my vnode, it gets returned to me locked */ 4769 error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp); 4770 vfs_unbusy(mp); 4771 if (error != 0) 4772 return (error); 4773 4774 /* 4775 * Check to see if the file handle refers to a named attribute 4776 * directory or attribute. If it does, the O_NAMEDATTR flag 4777 * must have been specified. 4778 */ 4779 named_attr = (vn_irflag_read(vp) & 4780 (VIRF_NAMEDDIR | VIRF_NAMEDATTR)) != 0; 4781 if ((named_attr && (flags & O_NAMEDATTR) == 0) || 4782 (!named_attr && (flags & O_NAMEDATTR) != 0)) { 4783 vput(vp); 4784 return (ENOATTR); 4785 } 4786 4787 error = falloc_noinstall(td, &fp); 4788 if (error != 0) { 4789 vput(vp); 4790 return (error); 4791 } 4792 /* Set the flags early so the finit in devfs can pick them up. */ 4793 fp->f_flag = flags & FMASK; 4794 4795 #ifdef INVARIANTS 4796 td->td_dupfd = -1; 4797 #endif 4798 error = vn_open_vnode(vp, flags, td->td_ucred, td, fp); 4799 if (error != 0) { 4800 KASSERT(fp->f_ops == &badfileops, 4801 ("VOP_OPEN in fhopen() set f_ops")); 4802 KASSERT(td->td_dupfd < 0, 4803 ("fhopen() encountered fdopen()")); 4804 4805 vput(vp); 4806 goto bad; 4807 } 4808 #ifdef INVARIANTS 4809 td->td_dupfd = 0; 4810 #endif 4811 finit_open(fp, vp, flags); 4812 VOP_UNLOCK(vp); 4813 if ((flags & O_TRUNC) != 0) { 4814 error = fo_truncate(fp, 0, td->td_ucred, td); 4815 if (error != 0) 4816 goto bad; 4817 } 4818 4819 error = finstall(td, fp, &indx, flags, NULL); 4820 bad: 4821 fdrop(fp, td); 4822 td->td_retval[0] = indx; 4823 return (error); 4824 } 4825 4826 /* 4827 * Stat an (NFS) file handle. 4828 */ 4829 #ifndef _SYS_SYSPROTO_H_ 4830 struct fhstat_args { 4831 struct fhandle *u_fhp; 4832 struct stat *sb; 4833 }; 4834 #endif 4835 int 4836 sys_fhstat(struct thread *td, struct fhstat_args *uap) 4837 { 4838 struct stat sb; 4839 struct fhandle fh; 4840 int error; 4841 4842 error = copyin(uap->u_fhp, &fh, sizeof(fh)); 4843 if (error != 0) 4844 return (error); 4845 error = kern_fhstat(td, fh, &sb); 4846 if (error == 0) 4847 error = copyout(&sb, uap->sb, sizeof(sb)); 4848 return (error); 4849 } 4850 4851 int 4852 kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb) 4853 { 4854 struct mount *mp; 4855 struct vnode *vp; 4856 int error; 4857 4858 error = priv_check(td, PRIV_VFS_FHSTAT); 4859 if (error != 0) 4860 return (error); 4861 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4862 return (ESTALE); 4863 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4864 vfs_unbusy(mp); 4865 if (error != 0) 4866 return (error); 4867 error = VOP_STAT(vp, sb, td->td_ucred, NOCRED); 4868 vput(vp); 4869 return (error); 4870 } 4871 4872 /* 4873 * Implement fstatfs() for (NFS) file handles. 4874 */ 4875 #ifndef _SYS_SYSPROTO_H_ 4876 struct fhstatfs_args { 4877 struct fhandle *u_fhp; 4878 struct statfs *buf; 4879 }; 4880 #endif 4881 int 4882 sys_fhstatfs(struct thread *td, struct fhstatfs_args *uap) 4883 { 4884 struct statfs *sfp; 4885 fhandle_t fh; 4886 int error; 4887 4888 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4889 if (error != 0) 4890 return (error); 4891 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 4892 error = kern_fhstatfs(td, fh, sfp); 4893 if (error == 0) 4894 error = copyout(sfp, uap->buf, sizeof(*sfp)); 4895 free(sfp, M_STATFS); 4896 return (error); 4897 } 4898 4899 int 4900 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf) 4901 { 4902 struct mount *mp; 4903 struct vnode *vp; 4904 int error; 4905 4906 error = priv_check(td, PRIV_VFS_FHSTATFS); 4907 if (error != 0) 4908 return (error); 4909 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4910 return (ESTALE); 4911 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4912 if (error != 0) { 4913 vfs_unbusy(mp); 4914 return (error); 4915 } 4916 vput(vp); 4917 error = prison_canseemount(td->td_ucred, mp); 4918 if (error != 0) 4919 goto out; 4920 #ifdef MAC 4921 error = mac_mount_check_stat(td->td_ucred, mp); 4922 if (error != 0) 4923 goto out; 4924 #endif 4925 error = VFS_STATFS(mp, buf); 4926 out: 4927 vfs_unbusy(mp); 4928 return (error); 4929 } 4930 4931 /* 4932 * Unlike madvise(2), we do not make a best effort to remember every 4933 * possible caching hint. Instead, we remember the last setting with 4934 * the exception that we will allow POSIX_FADV_NORMAL to adjust the 4935 * region of any current setting. 4936 */ 4937 int 4938 kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len, 4939 int advice) 4940 { 4941 struct fadvise_info *fa, *new; 4942 struct file *fp; 4943 struct vnode *vp; 4944 off_t end; 4945 int error; 4946 4947 if (offset < 0 || len < 0 || offset > OFF_MAX - len) 4948 return (EINVAL); 4949 AUDIT_ARG_VALUE(advice); 4950 switch (advice) { 4951 case POSIX_FADV_SEQUENTIAL: 4952 case POSIX_FADV_RANDOM: 4953 case POSIX_FADV_NOREUSE: 4954 new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK); 4955 break; 4956 case POSIX_FADV_NORMAL: 4957 case POSIX_FADV_WILLNEED: 4958 case POSIX_FADV_DONTNEED: 4959 new = NULL; 4960 break; 4961 default: 4962 return (EINVAL); 4963 } 4964 /* XXX: CAP_POSIX_FADVISE? */ 4965 AUDIT_ARG_FD(fd); 4966 error = fget(td, fd, &cap_no_rights, &fp); 4967 if (error != 0) 4968 goto out; 4969 AUDIT_ARG_FILE(td->td_proc, fp); 4970 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { 4971 error = ESPIPE; 4972 goto out; 4973 } 4974 if (fp->f_type != DTYPE_VNODE) { 4975 error = ENODEV; 4976 goto out; 4977 } 4978 vp = fp->f_vnode; 4979 if (vp->v_type != VREG) { 4980 error = ENODEV; 4981 goto out; 4982 } 4983 if (len == 0) 4984 end = OFF_MAX; 4985 else 4986 end = offset + len - 1; 4987 switch (advice) { 4988 case POSIX_FADV_SEQUENTIAL: 4989 case POSIX_FADV_RANDOM: 4990 case POSIX_FADV_NOREUSE: 4991 /* 4992 * Try to merge any existing non-standard region with 4993 * this new region if possible, otherwise create a new 4994 * non-standard region for this request. 4995 */ 4996 mtx_pool_lock(mtxpool_sleep, fp); 4997 fa = fp->f_advice; 4998 if (fa != NULL && fa->fa_advice == advice && 4999 ((fa->fa_start <= end && fa->fa_end >= offset) || 5000 (end != OFF_MAX && fa->fa_start == end + 1) || 5001 (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) { 5002 if (offset < fa->fa_start) 5003 fa->fa_start = offset; 5004 if (end > fa->fa_end) 5005 fa->fa_end = end; 5006 } else { 5007 new->fa_advice = advice; 5008 new->fa_start = offset; 5009 new->fa_end = end; 5010 fp->f_advice = new; 5011 new = fa; 5012 } 5013 mtx_pool_unlock(mtxpool_sleep, fp); 5014 break; 5015 case POSIX_FADV_NORMAL: 5016 /* 5017 * If a the "normal" region overlaps with an existing 5018 * non-standard region, trim or remove the 5019 * non-standard region. 5020 */ 5021 mtx_pool_lock(mtxpool_sleep, fp); 5022 fa = fp->f_advice; 5023 if (fa != NULL) { 5024 if (offset <= fa->fa_start && end >= fa->fa_end) { 5025 new = fa; 5026 fp->f_advice = NULL; 5027 } else if (offset <= fa->fa_start && 5028 end >= fa->fa_start) 5029 fa->fa_start = end + 1; 5030 else if (offset <= fa->fa_end && end >= fa->fa_end) 5031 fa->fa_end = offset - 1; 5032 else if (offset >= fa->fa_start && end <= fa->fa_end) { 5033 /* 5034 * If the "normal" region is a middle 5035 * portion of the existing 5036 * non-standard region, just remove 5037 * the whole thing rather than picking 5038 * one side or the other to 5039 * preserve. 5040 */ 5041 new = fa; 5042 fp->f_advice = NULL; 5043 } 5044 } 5045 mtx_pool_unlock(mtxpool_sleep, fp); 5046 break; 5047 case POSIX_FADV_WILLNEED: 5048 case POSIX_FADV_DONTNEED: 5049 error = VOP_ADVISE(vp, offset, end, advice); 5050 break; 5051 } 5052 out: 5053 if (fp != NULL) 5054 fdrop(fp, td); 5055 free(new, M_FADVISE); 5056 return (error); 5057 } 5058 5059 int 5060 sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap) 5061 { 5062 int error; 5063 5064 error = kern_posix_fadvise(td, uap->fd, uap->offset, uap->len, 5065 uap->advice); 5066 return (kern_posix_error(td, error)); 5067 } 5068 5069 int 5070 kern_copy_file_range(struct thread *td, int infd, off_t *inoffp, int outfd, 5071 off_t *outoffp, size_t len, unsigned int flags) 5072 { 5073 struct file *infp, *infp1, *outfp, *outfp1; 5074 struct vnode *invp, *outvp; 5075 int error; 5076 size_t retlen; 5077 void *rl_rcookie, *rl_wcookie; 5078 off_t inoff, outoff, savinoff, savoutoff; 5079 bool foffsets_locked, foffsets_set; 5080 5081 infp = outfp = NULL; 5082 rl_rcookie = rl_wcookie = NULL; 5083 foffsets_locked = false; 5084 foffsets_set = false; 5085 error = 0; 5086 retlen = 0; 5087 5088 if ((flags & ~COPY_FILE_RANGE_USERFLAGS) != 0) { 5089 error = EINVAL; 5090 goto out; 5091 } 5092 if (len > SSIZE_MAX) 5093 /* 5094 * Although the len argument is size_t, the return argument 5095 * is ssize_t (which is signed). Therefore a size that won't 5096 * fit in ssize_t can't be returned. 5097 */ 5098 len = SSIZE_MAX; 5099 5100 /* Get the file structures for the file descriptors. */ 5101 error = fget_read(td, infd, 5102 inoffp != NULL ? &cap_pread_rights : &cap_read_rights, &infp); 5103 if (error != 0) 5104 goto out; 5105 if (infp->f_ops == &badfileops) { 5106 error = EBADF; 5107 goto out; 5108 } 5109 if (infp->f_vnode == NULL) { 5110 error = EINVAL; 5111 goto out; 5112 } 5113 error = fget_write(td, outfd, 5114 outoffp != NULL ? &cap_pwrite_rights : &cap_write_rights, &outfp); 5115 if (error != 0) 5116 goto out; 5117 if (outfp->f_ops == &badfileops) { 5118 error = EBADF; 5119 goto out; 5120 } 5121 if (outfp->f_vnode == NULL) { 5122 error = EINVAL; 5123 goto out; 5124 } 5125 5126 /* 5127 * Figure out which file offsets we're reading from and writing to. 5128 * If the offsets come from the file descriptions, we need to lock them, 5129 * and locking both offsets requires a loop to avoid deadlocks. 5130 */ 5131 infp1 = outfp1 = NULL; 5132 if (inoffp != NULL) 5133 inoff = *inoffp; 5134 else 5135 infp1 = infp; 5136 if (outoffp != NULL) 5137 outoff = *outoffp; 5138 else 5139 outfp1 = outfp; 5140 if (infp1 != NULL || outfp1 != NULL) { 5141 if (infp1 == outfp1) { 5142 /* 5143 * Overlapping ranges are not allowed. A more thorough 5144 * check appears below, but we must not lock the same 5145 * offset twice. 5146 */ 5147 error = EINVAL; 5148 goto out; 5149 } 5150 foffset_lock_pair(infp1, &inoff, outfp1, &outoff, 0); 5151 foffsets_locked = true; 5152 } else { 5153 foffsets_set = true; 5154 } 5155 savinoff = inoff; 5156 savoutoff = outoff; 5157 5158 invp = infp->f_vnode; 5159 outvp = outfp->f_vnode; 5160 /* Sanity check the f_flag bits. */ 5161 if ((outfp->f_flag & (FWRITE | FAPPEND)) != FWRITE || 5162 (infp->f_flag & FREAD) == 0) { 5163 error = EBADF; 5164 goto out; 5165 } 5166 5167 /* If len == 0, just return 0. */ 5168 if (len == 0) 5169 goto out; 5170 5171 /* 5172 * Make sure that the ranges we check and lock below are valid. Note 5173 * that len is clamped to SSIZE_MAX above. 5174 */ 5175 if (inoff < 0 || outoff < 0) { 5176 error = EINVAL; 5177 goto out; 5178 } 5179 5180 /* 5181 * If infp and outfp refer to the same file, the byte ranges cannot 5182 * overlap. 5183 */ 5184 if (invp == outvp) { 5185 if ((inoff <= outoff && inoff + len > outoff) || 5186 (inoff > outoff && outoff + len > inoff)) { 5187 error = EINVAL; 5188 goto out; 5189 } 5190 rangelock_may_recurse(&invp->v_rl); 5191 } 5192 5193 /* Range lock the byte ranges for both invp and outvp. */ 5194 for (;;) { 5195 rl_wcookie = vn_rangelock_wlock(outvp, outoff, outoff + len); 5196 rl_rcookie = vn_rangelock_tryrlock(invp, inoff, inoff + len); 5197 if (rl_rcookie != NULL) 5198 break; 5199 vn_rangelock_unlock(outvp, rl_wcookie); 5200 rl_rcookie = vn_rangelock_rlock(invp, inoff, inoff + len); 5201 vn_rangelock_unlock(invp, rl_rcookie); 5202 } 5203 5204 retlen = len; 5205 error = vn_copy_file_range(invp, &inoff, outvp, &outoff, &retlen, 5206 flags, infp->f_cred, outfp->f_cred, td); 5207 out: 5208 if (rl_rcookie != NULL) 5209 vn_rangelock_unlock(invp, rl_rcookie); 5210 if (rl_wcookie != NULL) 5211 vn_rangelock_unlock(outvp, rl_wcookie); 5212 if ((foffsets_locked || foffsets_set) && 5213 (error == EINTR || error == ERESTART)) { 5214 inoff = savinoff; 5215 outoff = savoutoff; 5216 } 5217 if (foffsets_locked) { 5218 if (inoffp == NULL) 5219 foffset_unlock(infp, inoff, 0); 5220 else 5221 *inoffp = inoff; 5222 if (outoffp == NULL) 5223 foffset_unlock(outfp, outoff, 0); 5224 else 5225 *outoffp = outoff; 5226 } else if (foffsets_set) { 5227 *inoffp = inoff; 5228 *outoffp = outoff; 5229 } 5230 if (outfp != NULL) 5231 fdrop(outfp, td); 5232 if (infp != NULL) 5233 fdrop(infp, td); 5234 td->td_retval[0] = retlen; 5235 return (error); 5236 } 5237 5238 int 5239 sys_copy_file_range(struct thread *td, struct copy_file_range_args *uap) 5240 { 5241 off_t inoff, outoff, *inoffp, *outoffp; 5242 int error; 5243 5244 inoffp = outoffp = NULL; 5245 if (uap->inoffp != NULL) { 5246 error = copyin(uap->inoffp, &inoff, sizeof(off_t)); 5247 if (error != 0) 5248 return (error); 5249 inoffp = &inoff; 5250 } 5251 if (uap->outoffp != NULL) { 5252 error = copyin(uap->outoffp, &outoff, sizeof(off_t)); 5253 if (error != 0) 5254 return (error); 5255 outoffp = &outoff; 5256 } 5257 error = kern_copy_file_range(td, uap->infd, inoffp, uap->outfd, 5258 outoffp, uap->len, uap->flags); 5259 if (error == 0 && uap->inoffp != NULL) 5260 error = copyout(inoffp, uap->inoffp, sizeof(off_t)); 5261 if (error == 0 && uap->outoffp != NULL) 5262 error = copyout(outoffp, uap->outoffp, sizeof(off_t)); 5263 return (error); 5264 } 5265