1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37 #include "opt_capsicum.h" 38 #include "opt_ktrace.h" 39 40 #define EXTERR_CATEGORY EXTERR_CAT_VFSSYSCALL 41 #include <sys/systm.h> 42 #ifdef COMPAT_FREEBSD11 43 #include <sys/abi_compat.h> 44 #endif 45 #include <sys/bio.h> 46 #include <sys/buf.h> 47 #include <sys/capsicum.h> 48 #include <sys/disk.h> 49 #include <sys/dirent.h> 50 #include <sys/exterrvar.h> 51 #include <sys/fcntl.h> 52 #include <sys/file.h> 53 #include <sys/filedesc.h> 54 #include <sys/filio.h> 55 #include <sys/jail.h> 56 #include <sys/kernel.h> 57 #ifdef KTRACE 58 #include <sys/ktrace.h> 59 #endif 60 #include <sys/limits.h> 61 #include <sys/linker.h> 62 #include <sys/malloc.h> 63 #include <sys/mount.h> 64 #include <sys/mutex.h> 65 #include <sys/namei.h> 66 #include <sys/priv.h> 67 #include <sys/proc.h> 68 #include <sys/rwlock.h> 69 #include <sys/sdt.h> 70 #include <sys/stat.h> 71 #include <sys/stdarg.h> 72 #include <sys/sx.h> 73 #include <sys/syscallsubr.h> 74 #include <sys/sysctl.h> 75 #include <sys/sysproto.h> 76 #include <sys/unistd.h> 77 #include <sys/vnode.h> 78 79 #include <security/audit/audit.h> 80 #include <security/mac/mac_framework.h> 81 82 #include <vm/vm.h> 83 #include <vm/vm_object.h> 84 #include <vm/vm_page.h> 85 #include <vm/vnode_pager.h> 86 #include <vm/uma.h> 87 88 #include <fs/devfs/devfs.h> 89 90 MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information"); 91 92 static int kern_chflagsat(struct thread *td, int fd, const char *path, 93 enum uio_seg pathseg, u_long flags, int atflag); 94 static int setfflags(struct thread *td, struct vnode *, u_long); 95 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *); 96 static int getutimens(const struct timespec *, enum uio_seg, 97 struct timespec *, int *); 98 static int setutimes(struct thread *td, struct vnode *, 99 const struct timespec *, int, int); 100 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 101 struct thread *td); 102 static int kern_fhlinkat(struct thread *td, int fd, const char *path, 103 enum uio_seg pathseg, fhandle_t *fhp); 104 static int kern_readlink_vp(struct vnode *vp, char *buf, enum uio_seg bufseg, 105 size_t count, struct thread *td); 106 static int kern_linkat_vp(struct thread *td, struct vnode *vp, int fd, 107 const char *path, enum uio_seg segflag); 108 109 uint64_t 110 at2cnpflags(u_int at_flags, u_int mask) 111 { 112 uint64_t res; 113 114 MPASS((at_flags & (AT_SYMLINK_FOLLOW | AT_SYMLINK_NOFOLLOW)) != 115 (AT_SYMLINK_FOLLOW | AT_SYMLINK_NOFOLLOW)); 116 117 res = 0; 118 at_flags &= mask; 119 if ((at_flags & AT_RESOLVE_BENEATH) != 0) 120 res |= RBENEATH; 121 if ((at_flags & AT_SYMLINK_FOLLOW) != 0) 122 res |= FOLLOW; 123 /* NOFOLLOW is pseudo flag */ 124 if ((mask & AT_SYMLINK_NOFOLLOW) != 0) { 125 res |= (at_flags & AT_SYMLINK_NOFOLLOW) != 0 ? NOFOLLOW : 126 FOLLOW; 127 } 128 if ((mask & AT_EMPTY_PATH) != 0 && (at_flags & AT_EMPTY_PATH) != 0) 129 res |= EMPTYPATH; 130 return (res); 131 } 132 133 int 134 kern_sync(struct thread *td) 135 { 136 struct mount *mp, *nmp; 137 int save; 138 139 mtx_lock(&mountlist_mtx); 140 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 141 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 142 nmp = TAILQ_NEXT(mp, mnt_list); 143 continue; 144 } 145 if ((mp->mnt_flag & MNT_RDONLY) == 0 && 146 vn_start_write(NULL, &mp, V_NOWAIT) == 0) { 147 save = curthread_pflags_set(TDP_SYNCIO); 148 vfs_periodic(mp, MNT_NOWAIT); 149 VFS_SYNC(mp, MNT_NOWAIT); 150 curthread_pflags_restore(save); 151 vn_finished_write(mp); 152 } 153 mtx_lock(&mountlist_mtx); 154 nmp = TAILQ_NEXT(mp, mnt_list); 155 vfs_unbusy(mp); 156 } 157 mtx_unlock(&mountlist_mtx); 158 return (0); 159 } 160 161 /* 162 * Sync each mounted filesystem. 163 */ 164 #ifndef _SYS_SYSPROTO_H_ 165 struct sync_args { 166 int dummy; 167 }; 168 #endif 169 /* ARGSUSED */ 170 int 171 sys_sync(struct thread *td, struct sync_args *uap) 172 { 173 174 return (kern_sync(td)); 175 } 176 177 /* 178 * Change filesystem quotas. 179 */ 180 #ifndef _SYS_SYSPROTO_H_ 181 struct quotactl_args { 182 char *path; 183 int cmd; 184 int uid; 185 caddr_t arg; 186 }; 187 #endif 188 int 189 sys_quotactl(struct thread *td, struct quotactl_args *uap) 190 { 191 struct mount *mp; 192 struct nameidata nd; 193 int error; 194 bool mp_busy; 195 196 AUDIT_ARG_CMD(uap->cmd); 197 AUDIT_ARG_UID(uap->uid); 198 if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS)) 199 return (EPERM); 200 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 201 uap->path); 202 if ((error = namei(&nd)) != 0) 203 return (error); 204 NDFREE_PNBUF(&nd); 205 mp = nd.ni_vp->v_mount; 206 vfs_ref(mp); 207 vput(nd.ni_vp); 208 error = vfs_busy(mp, 0); 209 if (error != 0) { 210 vfs_rel(mp); 211 return (error); 212 } 213 mp_busy = true; 214 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg, &mp_busy); 215 216 /* 217 * Since quota on/off operations typically need to open quota 218 * files, the implementation may need to unbusy the mount point 219 * before calling into namei. Otherwise, unmount might be 220 * started between two vfs_busy() invocations (first is ours, 221 * second is from mount point cross-walk code in lookup()), 222 * causing deadlock. 223 * 224 * Avoid unbusying mp if the implementation indicates it has 225 * already done so. 226 */ 227 if (mp_busy) 228 vfs_unbusy(mp); 229 vfs_rel(mp); 230 return (error); 231 } 232 233 /* 234 * Used by statfs conversion routines to scale the block size up if 235 * necessary so that all of the block counts are <= 'max_size'. Note 236 * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero 237 * value of 'n'. 238 */ 239 void 240 statfs_scale_blocks(struct statfs *sf, long max_size) 241 { 242 uint64_t count; 243 int shift; 244 245 KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__)); 246 247 /* 248 * Attempt to scale the block counts to give a more accurate 249 * overview to userland of the ratio of free space to used 250 * space. To do this, find the largest block count and compute 251 * a divisor that lets it fit into a signed integer <= max_size. 252 */ 253 if (sf->f_bavail < 0) 254 count = -sf->f_bavail; 255 else 256 count = sf->f_bavail; 257 count = MAX(sf->f_blocks, MAX(sf->f_bfree, count)); 258 if (count <= max_size) 259 return; 260 261 count >>= flsl(max_size); 262 shift = 0; 263 while (count > 0) { 264 shift++; 265 count >>=1; 266 } 267 268 sf->f_bsize <<= shift; 269 sf->f_blocks >>= shift; 270 sf->f_bfree >>= shift; 271 sf->f_bavail >>= shift; 272 } 273 274 static int 275 kern_do_statfs(struct thread *td, struct mount *mp, struct statfs *buf) 276 { 277 int error; 278 279 if (mp == NULL) 280 return (EBADF); 281 error = vfs_busy(mp, 0); 282 vfs_rel(mp); 283 if (error != 0) 284 return (error); 285 #ifdef MAC 286 error = mac_mount_check_stat(td->td_ucred, mp); 287 if (error != 0) 288 goto out; 289 #endif 290 error = VFS_STATFS(mp, buf); 291 if (error != 0) 292 goto out; 293 if (priv_check_cred_vfs_generation(td->td_ucred)) 294 prison_enforce_statfs(td->td_ucred, mp, buf); 295 out: 296 vfs_unbusy(mp); 297 return (error); 298 } 299 300 /* 301 * Get filesystem statistics. 302 */ 303 #ifndef _SYS_SYSPROTO_H_ 304 struct statfs_args { 305 char *path; 306 struct statfs *buf; 307 }; 308 #endif 309 int 310 sys_statfs(struct thread *td, struct statfs_args *uap) 311 { 312 struct statfs *sfp; 313 int error; 314 315 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 316 error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); 317 if (error == 0) 318 error = copyout(sfp, uap->buf, sizeof(struct statfs)); 319 free(sfp, M_STATFS); 320 return (error); 321 } 322 323 int 324 kern_statfs(struct thread *td, const char *path, enum uio_seg pathseg, 325 struct statfs *buf) 326 { 327 struct mount *mp; 328 struct nameidata nd; 329 int error; 330 331 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path); 332 error = namei(&nd); 333 if (error != 0) 334 return (error); 335 NDFREE_PNBUF(&nd); 336 mp = vfs_ref_from_vp(nd.ni_vp); 337 vrele(nd.ni_vp); 338 return (kern_do_statfs(td, mp, buf)); 339 } 340 341 /* 342 * Get filesystem statistics. 343 */ 344 #ifndef _SYS_SYSPROTO_H_ 345 struct fstatfs_args { 346 int fd; 347 struct statfs *buf; 348 }; 349 #endif 350 int 351 sys_fstatfs(struct thread *td, struct fstatfs_args *uap) 352 { 353 struct statfs *sfp; 354 int error; 355 356 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 357 error = kern_fstatfs(td, uap->fd, sfp); 358 if (error == 0) 359 error = copyout(sfp, uap->buf, sizeof(struct statfs)); 360 free(sfp, M_STATFS); 361 return (error); 362 } 363 364 int 365 kern_fstatfs(struct thread *td, int fd, struct statfs *buf) 366 { 367 struct file *fp; 368 struct mount *mp; 369 struct vnode *vp; 370 int error; 371 372 AUDIT_ARG_FD(fd); 373 error = getvnode_path(td, fd, &cap_fstatfs_rights, NULL, &fp); 374 if (error != 0) 375 return (error); 376 vp = fp->f_vnode; 377 #ifdef AUDIT 378 if (AUDITING_TD(td)) { 379 vn_lock(vp, LK_SHARED | LK_RETRY); 380 AUDIT_ARG_VNODE1(vp); 381 VOP_UNLOCK(vp); 382 } 383 #endif 384 mp = vfs_ref_from_vp(vp); 385 fdrop(fp, td); 386 return (kern_do_statfs(td, mp, buf)); 387 } 388 389 /* 390 * Get statistics on all filesystems. 391 */ 392 #ifndef _SYS_SYSPROTO_H_ 393 struct getfsstat_args { 394 struct statfs *buf; 395 long bufsize; 396 int mode; 397 }; 398 #endif 399 int 400 sys_getfsstat(struct thread *td, struct getfsstat_args *uap) 401 { 402 size_t count; 403 int error; 404 405 if (uap->bufsize < 0 || uap->bufsize > SIZE_MAX) 406 return (EINVAL); 407 error = kern_getfsstat(td, &uap->buf, uap->bufsize, &count, 408 UIO_USERSPACE, uap->mode); 409 if (error == 0) 410 td->td_retval[0] = count; 411 return (error); 412 } 413 414 /* 415 * If (bufsize > 0 && bufseg == UIO_SYSSPACE) 416 * The caller is responsible for freeing memory which will be allocated 417 * in '*buf'. 418 */ 419 int 420 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize, 421 size_t *countp, enum uio_seg bufseg, int mode) 422 { 423 struct mount *mp, *nmp; 424 struct statfs *sfsp, *sp, *sptmp, *tofree; 425 size_t count, maxcount; 426 int error; 427 428 switch (mode) { 429 case MNT_WAIT: 430 case MNT_NOWAIT: 431 break; 432 default: 433 if (bufseg == UIO_SYSSPACE) 434 *buf = NULL; 435 return (EINVAL); 436 } 437 restart: 438 maxcount = bufsize / sizeof(struct statfs); 439 if (bufsize == 0) { 440 sfsp = NULL; 441 tofree = NULL; 442 } else if (bufseg == UIO_USERSPACE) { 443 sfsp = *buf; 444 tofree = NULL; 445 } else /* if (bufseg == UIO_SYSSPACE) */ { 446 count = 0; 447 mtx_lock(&mountlist_mtx); 448 TAILQ_FOREACH(mp, &mountlist, mnt_list) { 449 count++; 450 } 451 mtx_unlock(&mountlist_mtx); 452 if (maxcount > count) 453 maxcount = count; 454 tofree = sfsp = *buf = malloc(maxcount * sizeof(struct statfs), 455 M_STATFS, M_WAITOK); 456 } 457 458 count = 0; 459 460 /* 461 * If there is no target buffer they only want the count. 462 * 463 * This could be TAILQ_FOREACH but it is open-coded to match the original 464 * code below. 465 */ 466 if (sfsp == NULL) { 467 mtx_lock(&mountlist_mtx); 468 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 469 if (prison_canseemount(td->td_ucred, mp) != 0) { 470 nmp = TAILQ_NEXT(mp, mnt_list); 471 continue; 472 } 473 #ifdef MAC 474 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 475 nmp = TAILQ_NEXT(mp, mnt_list); 476 continue; 477 } 478 #endif 479 count++; 480 nmp = TAILQ_NEXT(mp, mnt_list); 481 } 482 mtx_unlock(&mountlist_mtx); 483 *countp = count; 484 return (0); 485 } 486 487 /* 488 * They want the entire thing. 489 * 490 * Short-circuit the corner case of no room for anything, avoids 491 * relocking below. 492 */ 493 if (maxcount < 1) { 494 goto out; 495 } 496 497 mtx_lock(&mountlist_mtx); 498 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 499 if (prison_canseemount(td->td_ucred, mp) != 0) { 500 nmp = TAILQ_NEXT(mp, mnt_list); 501 continue; 502 } 503 #ifdef MAC 504 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 505 nmp = TAILQ_NEXT(mp, mnt_list); 506 continue; 507 } 508 #endif 509 if (mode == MNT_WAIT) { 510 if (vfs_busy(mp, MBF_MNTLSTLOCK) != 0) { 511 /* 512 * If vfs_busy() failed, and MBF_NOWAIT 513 * wasn't passed, then the mp is gone. 514 * Furthermore, because of MBF_MNTLSTLOCK, 515 * the mountlist_mtx was dropped. We have 516 * no other choice than to start over. 517 */ 518 mtx_unlock(&mountlist_mtx); 519 free(tofree, M_STATFS); 520 goto restart; 521 } 522 } else { 523 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK) != 0) { 524 nmp = TAILQ_NEXT(mp, mnt_list); 525 continue; 526 } 527 } 528 sp = &mp->mnt_stat; 529 /* 530 * If MNT_NOWAIT is specified, do not refresh 531 * the fsstat cache. 532 */ 533 if (mode != MNT_NOWAIT) { 534 error = VFS_STATFS(mp, sp); 535 if (error != 0) { 536 mtx_lock(&mountlist_mtx); 537 nmp = TAILQ_NEXT(mp, mnt_list); 538 vfs_unbusy(mp); 539 continue; 540 } 541 } 542 if (priv_check_cred_vfs_generation(td->td_ucred)) { 543 sptmp = malloc(sizeof(struct statfs), M_STATFS, 544 M_WAITOK); 545 *sptmp = *sp; 546 prison_enforce_statfs(td->td_ucred, mp, sptmp); 547 sp = sptmp; 548 } else 549 sptmp = NULL; 550 if (bufseg == UIO_SYSSPACE) { 551 bcopy(sp, sfsp, sizeof(*sp)); 552 free(sptmp, M_STATFS); 553 } else /* if (bufseg == UIO_USERSPACE) */ { 554 error = copyout(sp, sfsp, sizeof(*sp)); 555 free(sptmp, M_STATFS); 556 if (error != 0) { 557 vfs_unbusy(mp); 558 return (error); 559 } 560 } 561 sfsp++; 562 count++; 563 564 if (count == maxcount) { 565 vfs_unbusy(mp); 566 goto out; 567 } 568 569 mtx_lock(&mountlist_mtx); 570 nmp = TAILQ_NEXT(mp, mnt_list); 571 vfs_unbusy(mp); 572 } 573 mtx_unlock(&mountlist_mtx); 574 out: 575 *countp = count; 576 return (0); 577 } 578 579 #ifdef COMPAT_FREEBSD4 580 /* 581 * Get old format filesystem statistics. 582 */ 583 static void freebsd4_cvtstatfs(struct statfs *, struct ostatfs *); 584 585 #ifndef _SYS_SYSPROTO_H_ 586 struct freebsd4_statfs_args { 587 char *path; 588 struct ostatfs *buf; 589 }; 590 #endif 591 int 592 freebsd4_statfs(struct thread *td, struct freebsd4_statfs_args *uap) 593 { 594 struct ostatfs osb; 595 struct statfs *sfp; 596 int error; 597 598 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 599 error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); 600 if (error == 0) { 601 freebsd4_cvtstatfs(sfp, &osb); 602 error = copyout(&osb, uap->buf, sizeof(osb)); 603 } 604 free(sfp, M_STATFS); 605 return (error); 606 } 607 608 /* 609 * Get filesystem statistics. 610 */ 611 #ifndef _SYS_SYSPROTO_H_ 612 struct freebsd4_fstatfs_args { 613 int fd; 614 struct ostatfs *buf; 615 }; 616 #endif 617 int 618 freebsd4_fstatfs(struct thread *td, struct freebsd4_fstatfs_args *uap) 619 { 620 struct ostatfs osb; 621 struct statfs *sfp; 622 int error; 623 624 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 625 error = kern_fstatfs(td, uap->fd, sfp); 626 if (error == 0) { 627 freebsd4_cvtstatfs(sfp, &osb); 628 error = copyout(&osb, uap->buf, sizeof(osb)); 629 } 630 free(sfp, M_STATFS); 631 return (error); 632 } 633 634 /* 635 * Get statistics on all filesystems. 636 */ 637 #ifndef _SYS_SYSPROTO_H_ 638 struct freebsd4_getfsstat_args { 639 struct ostatfs *buf; 640 long bufsize; 641 int mode; 642 }; 643 #endif 644 int 645 freebsd4_getfsstat(struct thread *td, struct freebsd4_getfsstat_args *uap) 646 { 647 struct statfs *buf, *sp; 648 struct ostatfs osb; 649 size_t count, size; 650 int error; 651 652 if (uap->bufsize < 0) 653 return (EINVAL); 654 count = uap->bufsize / sizeof(struct ostatfs); 655 if (count > SIZE_MAX / sizeof(struct statfs)) 656 return (EINVAL); 657 size = count * sizeof(struct statfs); 658 error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, 659 uap->mode); 660 if (error == 0) 661 td->td_retval[0] = count; 662 if (size != 0) { 663 sp = buf; 664 while (count != 0 && error == 0) { 665 freebsd4_cvtstatfs(sp, &osb); 666 error = copyout(&osb, uap->buf, sizeof(osb)); 667 sp++; 668 uap->buf++; 669 count--; 670 } 671 free(buf, M_STATFS); 672 } 673 return (error); 674 } 675 676 /* 677 * Implement fstatfs() for (NFS) file handles. 678 */ 679 #ifndef _SYS_SYSPROTO_H_ 680 struct freebsd4_fhstatfs_args { 681 struct fhandle *u_fhp; 682 struct ostatfs *buf; 683 }; 684 #endif 685 int 686 freebsd4_fhstatfs(struct thread *td, struct freebsd4_fhstatfs_args *uap) 687 { 688 struct ostatfs osb; 689 struct statfs *sfp; 690 fhandle_t fh; 691 int error; 692 693 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 694 if (error != 0) 695 return (error); 696 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 697 error = kern_fhstatfs(td, fh, sfp); 698 if (error == 0) { 699 freebsd4_cvtstatfs(sfp, &osb); 700 error = copyout(&osb, uap->buf, sizeof(osb)); 701 } 702 free(sfp, M_STATFS); 703 return (error); 704 } 705 706 /* 707 * Convert a new format statfs structure to an old format statfs structure. 708 */ 709 static void 710 freebsd4_cvtstatfs(struct statfs *nsp, struct ostatfs *osp) 711 { 712 713 statfs_scale_blocks(nsp, LONG_MAX); 714 bzero(osp, sizeof(*osp)); 715 osp->f_bsize = nsp->f_bsize; 716 osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX); 717 osp->f_blocks = nsp->f_blocks; 718 osp->f_bfree = nsp->f_bfree; 719 osp->f_bavail = nsp->f_bavail; 720 osp->f_files = MIN(nsp->f_files, LONG_MAX); 721 osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX); 722 osp->f_owner = nsp->f_owner; 723 osp->f_type = nsp->f_type; 724 osp->f_flags = nsp->f_flags; 725 osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX); 726 osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX); 727 osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX); 728 osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX); 729 strlcpy(osp->f_fstypename, nsp->f_fstypename, 730 MIN(MFSNAMELEN, OMFSNAMELEN)); 731 strlcpy(osp->f_mntonname, nsp->f_mntonname, 732 MIN(MNAMELEN, OMNAMELEN)); 733 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 734 MIN(MNAMELEN, OMNAMELEN)); 735 osp->f_fsid = nsp->f_fsid; 736 } 737 #endif /* COMPAT_FREEBSD4 */ 738 739 #if defined(COMPAT_FREEBSD11) 740 /* 741 * Get old format filesystem statistics. 742 */ 743 static void freebsd11_cvtstatfs(struct statfs *, struct freebsd11_statfs *); 744 745 int 746 freebsd11_statfs(struct thread *td, struct freebsd11_statfs_args *uap) 747 { 748 struct freebsd11_statfs osb; 749 struct statfs *sfp; 750 int error; 751 752 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 753 error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); 754 if (error == 0) { 755 freebsd11_cvtstatfs(sfp, &osb); 756 error = copyout(&osb, uap->buf, sizeof(osb)); 757 } 758 free(sfp, M_STATFS); 759 return (error); 760 } 761 762 /* 763 * Get filesystem statistics. 764 */ 765 int 766 freebsd11_fstatfs(struct thread *td, struct freebsd11_fstatfs_args *uap) 767 { 768 struct freebsd11_statfs osb; 769 struct statfs *sfp; 770 int error; 771 772 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 773 error = kern_fstatfs(td, uap->fd, sfp); 774 if (error == 0) { 775 freebsd11_cvtstatfs(sfp, &osb); 776 error = copyout(&osb, uap->buf, sizeof(osb)); 777 } 778 free(sfp, M_STATFS); 779 return (error); 780 } 781 782 /* 783 * Get statistics on all filesystems. 784 */ 785 int 786 freebsd11_getfsstat(struct thread *td, struct freebsd11_getfsstat_args *uap) 787 { 788 return (kern_freebsd11_getfsstat(td, uap->buf, uap->bufsize, uap->mode)); 789 } 790 791 int 792 kern_freebsd11_getfsstat(struct thread *td, struct freebsd11_statfs * ubuf, 793 long bufsize, int mode) 794 { 795 struct freebsd11_statfs osb; 796 struct statfs *buf, *sp; 797 size_t count, size; 798 int error; 799 800 if (bufsize < 0) 801 return (EINVAL); 802 803 count = bufsize / sizeof(struct ostatfs); 804 size = count * sizeof(struct statfs); 805 error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, mode); 806 if (error == 0) 807 td->td_retval[0] = count; 808 if (size > 0) { 809 sp = buf; 810 while (count > 0 && error == 0) { 811 freebsd11_cvtstatfs(sp, &osb); 812 error = copyout(&osb, ubuf, sizeof(osb)); 813 sp++; 814 ubuf++; 815 count--; 816 } 817 free(buf, M_STATFS); 818 } 819 return (error); 820 } 821 822 /* 823 * Implement fstatfs() for (NFS) file handles. 824 */ 825 int 826 freebsd11_fhstatfs(struct thread *td, struct freebsd11_fhstatfs_args *uap) 827 { 828 struct freebsd11_statfs osb; 829 struct statfs *sfp; 830 fhandle_t fh; 831 int error; 832 833 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 834 if (error) 835 return (error); 836 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 837 error = kern_fhstatfs(td, fh, sfp); 838 if (error == 0) { 839 freebsd11_cvtstatfs(sfp, &osb); 840 error = copyout(&osb, uap->buf, sizeof(osb)); 841 } 842 free(sfp, M_STATFS); 843 return (error); 844 } 845 846 /* 847 * Convert a new format statfs structure to an old format statfs structure. 848 */ 849 static void 850 freebsd11_cvtstatfs(struct statfs *nsp, struct freebsd11_statfs *osp) 851 { 852 853 bzero(osp, sizeof(*osp)); 854 osp->f_version = FREEBSD11_STATFS_VERSION; 855 osp->f_type = nsp->f_type; 856 osp->f_flags = nsp->f_flags; 857 osp->f_bsize = nsp->f_bsize; 858 osp->f_iosize = nsp->f_iosize; 859 osp->f_blocks = nsp->f_blocks; 860 osp->f_bfree = nsp->f_bfree; 861 osp->f_bavail = nsp->f_bavail; 862 osp->f_files = nsp->f_files; 863 osp->f_ffree = nsp->f_ffree; 864 osp->f_syncwrites = nsp->f_syncwrites; 865 osp->f_asyncwrites = nsp->f_asyncwrites; 866 osp->f_syncreads = nsp->f_syncreads; 867 osp->f_asyncreads = nsp->f_asyncreads; 868 osp->f_namemax = nsp->f_namemax; 869 osp->f_owner = nsp->f_owner; 870 osp->f_fsid = nsp->f_fsid; 871 strlcpy(osp->f_fstypename, nsp->f_fstypename, 872 MIN(MFSNAMELEN, sizeof(osp->f_fstypename))); 873 strlcpy(osp->f_mntonname, nsp->f_mntonname, 874 MIN(MNAMELEN, sizeof(osp->f_mntonname))); 875 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 876 MIN(MNAMELEN, sizeof(osp->f_mntfromname))); 877 } 878 #endif /* COMPAT_FREEBSD11 */ 879 880 /* 881 * Change current working directory to a given file descriptor. 882 */ 883 #ifndef _SYS_SYSPROTO_H_ 884 struct fchdir_args { 885 int fd; 886 }; 887 #endif 888 int 889 sys_fchdir(struct thread *td, struct fchdir_args *uap) 890 { 891 struct vnode *vp, *tdp; 892 struct mount *mp; 893 struct file *fp; 894 int error; 895 uint8_t fdflags; 896 897 AUDIT_ARG_FD(uap->fd); 898 error = getvnode_path(td, uap->fd, &cap_fchdir_rights, &fdflags, 899 &fp); 900 if (error != 0) 901 return (error); 902 if ((fdflags & UF_RESOLVE_BENEATH) != 0) { 903 fdrop(fp, td); 904 return (ENOTCAPABLE); 905 } 906 vp = fp->f_vnode; 907 vrefact(vp); 908 fdrop(fp, td); 909 vn_lock(vp, LK_SHARED | LK_RETRY); 910 AUDIT_ARG_VNODE1(vp); 911 error = change_dir(vp, td); 912 while (!error && (mp = vp->v_mountedhere) != NULL) { 913 if (vfs_busy(mp, 0)) 914 continue; 915 error = VFS_ROOT(mp, LK_SHARED, &tdp); 916 vfs_unbusy(mp); 917 if (error != 0) 918 break; 919 vput(vp); 920 vp = tdp; 921 } 922 if (error != 0) { 923 vput(vp); 924 return (error); 925 } 926 VOP_UNLOCK(vp); 927 pwd_chdir(td, vp); 928 return (0); 929 } 930 931 /* 932 * Change current working directory (``.''). 933 */ 934 #ifndef _SYS_SYSPROTO_H_ 935 struct chdir_args { 936 char *path; 937 }; 938 #endif 939 int 940 sys_chdir(struct thread *td, struct chdir_args *uap) 941 { 942 943 return (kern_chdir(td, uap->path, UIO_USERSPACE)); 944 } 945 946 int 947 kern_chdir(struct thread *td, const char *path, enum uio_seg pathseg) 948 { 949 struct nameidata nd; 950 int error; 951 952 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 953 pathseg, path); 954 if ((error = namei(&nd)) != 0) 955 return (error); 956 if ((error = change_dir(nd.ni_vp, td)) != 0) { 957 vput(nd.ni_vp); 958 NDFREE_PNBUF(&nd); 959 return (error); 960 } 961 VOP_UNLOCK(nd.ni_vp); 962 NDFREE_PNBUF(&nd); 963 pwd_chdir(td, nd.ni_vp); 964 return (0); 965 } 966 967 static int unprivileged_chroot = 0; 968 SYSCTL_INT(_security_bsd, OID_AUTO, unprivileged_chroot, CTLFLAG_RW, 969 &unprivileged_chroot, 0, 970 "Unprivileged processes can use chroot(2)"); 971 972 /* 973 * Takes locked vnode, unlocks it before returning. 974 */ 975 static int 976 kern_chroot(struct thread *td, struct vnode *vp) 977 { 978 struct proc *p; 979 int error; 980 981 error = priv_check(td, PRIV_VFS_CHROOT); 982 if (error != 0) { 983 p = td->td_proc; 984 if (unprivileged_chroot == 0) { 985 error = EXTERROR(EPERM, 986 "security.bsd.unprivileged_chroot sysctl not enabled"); 987 goto e_vunlock; 988 } 989 if ((p->p_flag2 & P2_NO_NEW_PRIVS) == 0) { 990 error = EXTERROR(EPERM, 991 "PROC_NO_NEW_PRIVS not enabled"); 992 goto e_vunlock; 993 } 994 } 995 996 error = change_dir(vp, td); 997 if (error != 0) 998 goto e_vunlock; 999 #ifdef MAC 1000 error = mac_vnode_check_chroot(td->td_ucred, vp); 1001 if (error != 0) 1002 goto e_vunlock; 1003 #endif 1004 VOP_UNLOCK(vp); 1005 error = pwd_chroot(td, vp); 1006 vrele(vp); 1007 return (error); 1008 e_vunlock: 1009 vput(vp); 1010 return (error); 1011 } 1012 1013 /* 1014 * Change notion of root (``/'') directory. 1015 */ 1016 #ifndef _SYS_SYSPROTO_H_ 1017 struct chroot_args { 1018 char *path; 1019 }; 1020 #endif 1021 int 1022 sys_chroot(struct thread *td, struct chroot_args *uap) 1023 { 1024 struct nameidata nd; 1025 int error; 1026 1027 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 1028 UIO_USERSPACE, uap->path); 1029 error = namei(&nd); 1030 if (error != 0) 1031 return (error); 1032 NDFREE_PNBUF(&nd); 1033 error = kern_chroot(td, nd.ni_vp); 1034 return (error); 1035 } 1036 1037 /* 1038 * Change notion of root directory to a given file descriptor. 1039 */ 1040 #ifndef _SYS_SYSPROTO_H_ 1041 struct fchroot_args { 1042 int fd; 1043 }; 1044 #endif 1045 int 1046 sys_fchroot(struct thread *td, struct fchroot_args *uap) 1047 { 1048 struct vnode *vp; 1049 struct file *fp; 1050 int error; 1051 uint8_t fdflags; 1052 1053 error = getvnode_path(td, uap->fd, &cap_fchroot_rights, &fdflags, &fp); 1054 if (error != 0) 1055 return (error); 1056 if ((fdflags & UF_RESOLVE_BENEATH) != 0) { 1057 fdrop(fp, td); 1058 return (ENOTCAPABLE); 1059 } 1060 vp = fp->f_vnode; 1061 vrefact(vp); 1062 fdrop(fp, td); 1063 vn_lock(vp, LK_SHARED | LK_RETRY); 1064 error = kern_chroot(td, vp); 1065 return (error); 1066 } 1067 1068 /* 1069 * Common routine for chroot and chdir. Callers must provide a locked vnode 1070 * instance. 1071 */ 1072 int 1073 change_dir(struct vnode *vp, struct thread *td) 1074 { 1075 #ifdef MAC 1076 int error; 1077 #endif 1078 1079 ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked"); 1080 if (vp->v_type != VDIR) 1081 return (ENOTDIR); 1082 #ifdef MAC 1083 error = mac_vnode_check_chdir(td->td_ucred, vp); 1084 if (error != 0) 1085 return (error); 1086 #endif 1087 return (VOP_ACCESS(vp, VEXEC, td->td_ucred, td)); 1088 } 1089 1090 static __inline void 1091 flags_to_rights(int flags, cap_rights_t *rightsp) 1092 { 1093 if (flags & O_EXEC) { 1094 cap_rights_set_one(rightsp, CAP_FEXECVE); 1095 if (flags & O_PATH) 1096 return; 1097 } else { 1098 switch ((flags & O_ACCMODE)) { 1099 case O_RDONLY: 1100 cap_rights_set_one(rightsp, CAP_READ); 1101 break; 1102 case O_RDWR: 1103 cap_rights_set_one(rightsp, CAP_READ); 1104 /* FALLTHROUGH */ 1105 case O_WRONLY: 1106 cap_rights_set_one(rightsp, CAP_WRITE); 1107 if (!(flags & (O_APPEND | O_TRUNC))) 1108 cap_rights_set_one(rightsp, CAP_SEEK); 1109 break; 1110 } 1111 } 1112 1113 if (flags & O_CREAT) 1114 cap_rights_set_one(rightsp, CAP_CREATE); 1115 1116 if (flags & O_TRUNC) 1117 cap_rights_set_one(rightsp, CAP_FTRUNCATE); 1118 1119 if (flags & (O_SYNC | O_FSYNC | O_DSYNC)) 1120 cap_rights_set_one(rightsp, CAP_FSYNC); 1121 1122 if (flags & (O_EXLOCK | O_SHLOCK)) 1123 cap_rights_set_one(rightsp, CAP_FLOCK); 1124 } 1125 1126 /* 1127 * Check permissions, allocate an open file structure, and call the device 1128 * open routine if any. 1129 */ 1130 #ifndef _SYS_SYSPROTO_H_ 1131 struct open_args { 1132 char *path; 1133 int flags; 1134 int mode; 1135 }; 1136 #endif 1137 int 1138 sys_open(struct thread *td, struct open_args *uap) 1139 { 1140 1141 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1142 uap->flags, uap->mode)); 1143 } 1144 1145 #ifndef _SYS_SYSPROTO_H_ 1146 struct openat_args { 1147 int fd; 1148 char *path; 1149 int flag; 1150 int mode; 1151 }; 1152 #endif 1153 int 1154 sys_openat(struct thread *td, struct openat_args *uap) 1155 { 1156 1157 AUDIT_ARG_FD(uap->fd); 1158 return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 1159 uap->mode)); 1160 } 1161 1162 /* 1163 * Validate open(2) flags and convert access mode flags (O_RDONLY etc.) to their 1164 * in-kernel representations (FREAD etc.). 1165 */ 1166 static int 1167 openflags(int *flagsp) 1168 { 1169 int flags; 1170 1171 /* 1172 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags 1173 * may be specified. On the other hand, for O_PATH any mode 1174 * except O_EXEC is ignored. 1175 */ 1176 flags = *flagsp; 1177 if ((flags & O_PATH) != 0) { 1178 flags &= ~O_ACCMODE; 1179 } else if ((flags & O_EXEC) != 0) { 1180 if ((flags & O_ACCMODE) != 0) 1181 return (EINVAL); 1182 } else if ((flags & O_ACCMODE) == O_ACCMODE) { 1183 return (EINVAL); 1184 } else { 1185 flags = FFLAGS(flags); 1186 } 1187 *flagsp = flags; 1188 return (0); 1189 } 1190 1191 static void 1192 finit_open(struct file *fp, struct vnode *vp, int flags) 1193 { 1194 /* 1195 * Store the vnode, for any f_type. Typically, the vnode use count is 1196 * decremented by a direct call to vnops.fo_close() for files that 1197 * switched type. 1198 */ 1199 fp->f_vnode = vp; 1200 1201 /* 1202 * If the file wasn't claimed by devfs or fifofs, bind it to the normal 1203 * vnode operations here. 1204 */ 1205 if (fp->f_ops == &badfileops) { 1206 KASSERT(vp->v_type != VFIFO || (flags & O_PATH) != 0, 1207 ("Unexpected fifo fp %p vp %p", fp, vp)); 1208 if ((flags & O_PATH) != 0) { 1209 finit(fp, (flags & FMASK) | (fp->f_flag & FKQALLOWED), 1210 DTYPE_VNODE, NULL, &path_fileops); 1211 } else { 1212 finit_vnode(fp, flags, NULL, &vnops); 1213 } 1214 } 1215 } 1216 1217 /* 1218 * If fpp != NULL, opened file is not installed into the file 1219 * descriptor table, instead it is returned in *fpp. This is 1220 * incompatible with fdopen(), in which case we return EINVAL. 1221 */ 1222 static int 1223 openatfp(struct thread *td, int dirfd, const char *path, 1224 enum uio_seg pathseg, int flags, int mode, struct file **fpp) 1225 { 1226 struct proc *p; 1227 struct filedesc *fdp; 1228 struct pwddesc *pdp; 1229 struct file *fp; 1230 struct vnode *vp; 1231 struct filecaps *fcaps; 1232 struct nameidata nd; 1233 cap_rights_t rights; 1234 int cmode, error, indx; 1235 1236 indx = -1; 1237 p = td->td_proc; 1238 fdp = p->p_fd; 1239 pdp = p->p_pd; 1240 1241 AUDIT_ARG_FFLAGS(flags); 1242 AUDIT_ARG_MODE(mode); 1243 cap_rights_init_one(&rights, CAP_LOOKUP); 1244 flags_to_rights(flags, &rights); 1245 1246 error = openflags(&flags); 1247 if (error != 0) 1248 return (error); 1249 1250 /* 1251 * Allocate a file structure. The descriptor to reference it 1252 * is allocated and used by finstall_refed() below. 1253 */ 1254 error = falloc_noinstall(td, &fp); 1255 if (error != 0) 1256 return (error); 1257 /* Set the flags early so the finit in devfs can pick them up. */ 1258 fp->f_flag = flags & FMASK; 1259 cmode = ((mode & ~pdp->pd_cmask) & ALLPERMS) & ~S_ISTXT; 1260 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1 | WANTIOCTLCAPS, 1261 pathseg, path, dirfd, &rights); 1262 td->td_dupfd = -1; /* XXX check for fdopen */ 1263 error = vn_open_cred(&nd, &flags, cmode, VN_OPEN_WANTIOCTLCAPS, 1264 td->td_ucred, fp); 1265 if (error != 0) { 1266 /* 1267 * If the vn_open replaced the method vector, something 1268 * wonderous happened deep below and we just pass it up 1269 * pretending we know what we do. 1270 */ 1271 if (error == ENXIO && fp->f_ops != &badfileops) { 1272 MPASS((flags & O_PATH) == 0); 1273 goto success; 1274 } 1275 1276 /* 1277 * Handle special fdopen() case. bleh. 1278 * 1279 * Don't do this for relative (capability) lookups; we don't 1280 * understand exactly what would happen, and we don't think 1281 * that it ever should. 1282 */ 1283 if ((nd.ni_resflags & NIRES_STRICTREL) == 0 && 1284 (error == ENODEV || error == ENXIO) && 1285 td->td_dupfd >= 0) { 1286 MPASS(fpp == NULL); 1287 error = dupfdopen(td, fdp, td->td_dupfd, flags, error, 1288 &indx); 1289 if (error == 0) 1290 goto success; 1291 } 1292 1293 goto bad; 1294 } 1295 td->td_dupfd = 0; 1296 NDFREE_PNBUF(&nd); 1297 vp = nd.ni_vp; 1298 1299 finit_open(fp, vp, flags); 1300 VOP_UNLOCK(vp); 1301 if (flags & O_TRUNC) { 1302 error = fo_truncate(fp, 0, td->td_ucred, td); 1303 if (error != 0) 1304 goto bad; 1305 } 1306 success: 1307 if (fpp != NULL) { 1308 MPASS(error == 0); 1309 NDFREE_IOCTLCAPS(&nd); 1310 *fpp = fp; 1311 return (0); 1312 } 1313 1314 /* 1315 * If we haven't already installed the FD (for dupfdopen), do so now. 1316 */ 1317 if (indx == -1) { 1318 #ifdef CAPABILITIES 1319 if ((nd.ni_resflags & NIRES_STRICTREL) != 0) 1320 fcaps = &nd.ni_filecaps; 1321 else 1322 #endif 1323 fcaps = NULL; 1324 if ((nd.ni_resflags & NIRES_BENEATH) != 0) 1325 flags |= O_RESOLVE_BENEATH; 1326 else 1327 flags &= ~O_RESOLVE_BENEATH; 1328 error = finstall_refed(td, fp, &indx, flags, fcaps); 1329 /* On success finstall_refed() consumes fcaps. */ 1330 if (error != 0) { 1331 goto bad; 1332 } 1333 } else { 1334 NDFREE_IOCTLCAPS(&nd); 1335 falloc_abort(td, fp); 1336 } 1337 1338 td->td_retval[0] = indx; 1339 return (0); 1340 bad: 1341 KASSERT(indx == -1, ("indx=%d, should be -1", indx)); 1342 NDFREE_IOCTLCAPS(&nd); 1343 falloc_abort(td, fp); 1344 return (error); 1345 } 1346 1347 int 1348 kern_openat(struct thread *td, int dirfd, const char *path, 1349 enum uio_seg pathseg, int flags, int mode) 1350 { 1351 return (openatfp(td, dirfd, path, pathseg, flags, mode, NULL)); 1352 } 1353 1354 int 1355 kern_openatfp(struct thread *td, int dirfd, const char *path, 1356 enum uio_seg pathseg, int flags, int mode, struct file **fpp) 1357 { 1358 int error, old_dupfd; 1359 1360 old_dupfd = td->td_dupfd; 1361 td->td_dupfd = -1; 1362 error = openatfp(td, dirfd, path, pathseg, flags, mode, fpp); 1363 td->td_dupfd = old_dupfd; 1364 return (error); 1365 } 1366 1367 #ifdef COMPAT_43 1368 /* 1369 * Create a file. 1370 */ 1371 #ifndef _SYS_SYSPROTO_H_ 1372 struct ocreat_args { 1373 char *path; 1374 int mode; 1375 }; 1376 #endif 1377 int 1378 ocreat(struct thread *td, struct ocreat_args *uap) 1379 { 1380 1381 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1382 O_WRONLY | O_CREAT | O_TRUNC, uap->mode)); 1383 } 1384 #endif /* COMPAT_43 */ 1385 1386 /* 1387 * Create a special file. 1388 */ 1389 #ifndef _SYS_SYSPROTO_H_ 1390 struct mknodat_args { 1391 int fd; 1392 char *path; 1393 mode_t mode; 1394 dev_t dev; 1395 }; 1396 #endif 1397 int 1398 sys_mknodat(struct thread *td, struct mknodat_args *uap) 1399 { 1400 1401 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1402 uap->dev)); 1403 } 1404 1405 #if defined(COMPAT_FREEBSD11) 1406 int 1407 freebsd11_mknod(struct thread *td, 1408 struct freebsd11_mknod_args *uap) 1409 { 1410 1411 return (kern_mknodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1412 uap->mode, uap->dev)); 1413 } 1414 1415 int 1416 freebsd11_mknodat(struct thread *td, 1417 struct freebsd11_mknodat_args *uap) 1418 { 1419 1420 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1421 uap->dev)); 1422 } 1423 #endif /* COMPAT_FREEBSD11 */ 1424 1425 int 1426 kern_mknodat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, 1427 int mode, dev_t dev) 1428 { 1429 struct vnode *vp; 1430 struct mount *mp; 1431 struct vattr vattr; 1432 struct nameidata nd; 1433 int error, whiteout = 0; 1434 1435 AUDIT_ARG_MODE(mode); 1436 AUDIT_ARG_DEV(dev); 1437 switch (mode & S_IFMT) { 1438 case S_IFCHR: 1439 case S_IFBLK: 1440 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 1441 if (error == 0 && dev == VNOVAL) 1442 error = EINVAL; 1443 break; 1444 case S_IFWHT: 1445 error = priv_check(td, PRIV_VFS_MKNOD_WHT); 1446 break; 1447 case S_IFIFO: 1448 if (dev == 0) 1449 return (kern_mkfifoat(td, fd, path, pathseg, mode)); 1450 /* FALLTHROUGH */ 1451 default: 1452 error = EINVAL; 1453 break; 1454 } 1455 if (error != 0) 1456 return (error); 1457 NDPREINIT(&nd); 1458 restart: 1459 bwillwrite(); 1460 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | AUDITVNODE1 | NOCACHE, 1461 pathseg, path, fd, &cap_mknodat_rights); 1462 if ((error = namei(&nd)) != 0) 1463 return (error); 1464 vp = nd.ni_vp; 1465 if (vp != NULL) { 1466 NDFREE_PNBUF(&nd); 1467 if (vp == nd.ni_dvp) 1468 vrele(nd.ni_dvp); 1469 else 1470 vput(nd.ni_dvp); 1471 vrele(vp); 1472 return (EEXIST); 1473 } else if ((vn_irflag_read(nd.ni_dvp) & VIRF_NAMEDDIR) != 0) { 1474 NDFREE_PNBUF(&nd); 1475 vput(nd.ni_dvp); 1476 return (EINVAL); 1477 } else { 1478 VATTR_NULL(&vattr); 1479 vattr.va_mode = (mode & ALLPERMS) & 1480 ~td->td_proc->p_pd->pd_cmask; 1481 vattr.va_rdev = dev; 1482 whiteout = 0; 1483 1484 switch (mode & S_IFMT) { 1485 case S_IFCHR: 1486 vattr.va_type = VCHR; 1487 break; 1488 case S_IFBLK: 1489 vattr.va_type = VBLK; 1490 break; 1491 case S_IFWHT: 1492 whiteout = 1; 1493 break; 1494 default: 1495 panic("kern_mknod: invalid mode"); 1496 } 1497 } 1498 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1499 NDFREE_PNBUF(&nd); 1500 vput(nd.ni_dvp); 1501 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH)) != 0) 1502 return (error); 1503 goto restart; 1504 } 1505 #ifdef MAC 1506 if (error == 0 && !whiteout) 1507 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, 1508 &nd.ni_cnd, &vattr); 1509 #endif 1510 if (error == 0) { 1511 if (whiteout) 1512 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1513 else { 1514 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1515 &nd.ni_cnd, &vattr); 1516 } 1517 } 1518 VOP_VPUT_PAIR(nd.ni_dvp, error == 0 && !whiteout ? &nd.ni_vp : NULL, 1519 true); 1520 vn_finished_write(mp); 1521 NDFREE_PNBUF(&nd); 1522 if (error == ERELOOKUP) 1523 goto restart; 1524 return (error); 1525 } 1526 1527 /* 1528 * Create a named pipe. 1529 */ 1530 #ifndef _SYS_SYSPROTO_H_ 1531 struct mkfifo_args { 1532 char *path; 1533 int mode; 1534 }; 1535 #endif 1536 int 1537 sys_mkfifo(struct thread *td, struct mkfifo_args *uap) 1538 { 1539 1540 return (kern_mkfifoat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1541 uap->mode)); 1542 } 1543 1544 #ifndef _SYS_SYSPROTO_H_ 1545 struct mkfifoat_args { 1546 int fd; 1547 char *path; 1548 mode_t mode; 1549 }; 1550 #endif 1551 int 1552 sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap) 1553 { 1554 1555 return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE, 1556 uap->mode)); 1557 } 1558 1559 int 1560 kern_mkfifoat(struct thread *td, int fd, const char *path, 1561 enum uio_seg pathseg, int mode) 1562 { 1563 struct mount *mp; 1564 struct vattr vattr; 1565 struct nameidata nd; 1566 int error; 1567 1568 AUDIT_ARG_MODE(mode); 1569 NDPREINIT(&nd); 1570 restart: 1571 bwillwrite(); 1572 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | AUDITVNODE1 | NOCACHE, 1573 pathseg, path, fd, &cap_mkfifoat_rights); 1574 if ((error = namei(&nd)) != 0) 1575 return (error); 1576 if (nd.ni_vp != NULL) { 1577 NDFREE_PNBUF(&nd); 1578 if (nd.ni_vp == nd.ni_dvp) 1579 vrele(nd.ni_dvp); 1580 else 1581 vput(nd.ni_dvp); 1582 vrele(nd.ni_vp); 1583 return (EEXIST); 1584 } 1585 if ((vn_irflag_read(nd.ni_dvp) & VIRF_NAMEDDIR) != 0) { 1586 NDFREE_PNBUF(&nd); 1587 vput(nd.ni_dvp); 1588 return (EINVAL); 1589 } 1590 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1591 NDFREE_PNBUF(&nd); 1592 vput(nd.ni_dvp); 1593 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH)) != 0) 1594 return (error); 1595 goto restart; 1596 } 1597 VATTR_NULL(&vattr); 1598 vattr.va_type = VFIFO; 1599 vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_pd->pd_cmask; 1600 #ifdef MAC 1601 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1602 &vattr); 1603 if (error != 0) 1604 goto out; 1605 #endif 1606 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 1607 #ifdef MAC 1608 out: 1609 #endif 1610 VOP_VPUT_PAIR(nd.ni_dvp, error == 0 ? &nd.ni_vp : NULL, true); 1611 vn_finished_write(mp); 1612 NDFREE_PNBUF(&nd); 1613 if (error == ERELOOKUP) 1614 goto restart; 1615 return (error); 1616 } 1617 1618 /* 1619 * Make a hard file link. 1620 */ 1621 #ifndef _SYS_SYSPROTO_H_ 1622 struct link_args { 1623 char *path; 1624 char *link; 1625 }; 1626 #endif 1627 int 1628 sys_link(struct thread *td, struct link_args *uap) 1629 { 1630 1631 return (kern_linkat(td, AT_FDCWD, AT_FDCWD, uap->path, uap->link, 1632 UIO_USERSPACE, AT_SYMLINK_FOLLOW)); 1633 } 1634 1635 #ifndef _SYS_SYSPROTO_H_ 1636 struct linkat_args { 1637 int fd1; 1638 char *path1; 1639 int fd2; 1640 char *path2; 1641 int flag; 1642 }; 1643 #endif 1644 int 1645 sys_linkat(struct thread *td, struct linkat_args *uap) 1646 { 1647 1648 return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2, 1649 UIO_USERSPACE, uap->flag)); 1650 } 1651 1652 int hardlink_check_uid = 0; 1653 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 1654 &hardlink_check_uid, 0, 1655 "Unprivileged processes cannot create hard links to files owned by other " 1656 "users"); 1657 static int hardlink_check_gid = 0; 1658 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 1659 &hardlink_check_gid, 0, 1660 "Unprivileged processes cannot create hard links to files owned by other " 1661 "groups"); 1662 1663 static int 1664 can_hardlink(struct vnode *vp, struct ucred *cred) 1665 { 1666 struct vattr va; 1667 int error; 1668 1669 if (!hardlink_check_uid && !hardlink_check_gid) 1670 return (0); 1671 1672 error = VOP_GETATTR(vp, &va, cred); 1673 if (error != 0) 1674 return (error); 1675 1676 if (hardlink_check_uid && cred->cr_uid != va.va_uid) { 1677 error = priv_check_cred(cred, PRIV_VFS_LINK); 1678 if (error != 0) 1679 return (error); 1680 } 1681 1682 if (hardlink_check_gid && !groupmember(va.va_gid, cred)) { 1683 error = priv_check_cred(cred, PRIV_VFS_LINK); 1684 if (error != 0) 1685 return (error); 1686 } 1687 1688 return (0); 1689 } 1690 1691 int 1692 kern_linkat(struct thread *td, int fd1, int fd2, const char *path1, 1693 const char *path2, enum uio_seg segflag, int flag) 1694 { 1695 struct nameidata nd; 1696 int error; 1697 1698 if ((flag & ~(AT_SYMLINK_FOLLOW | AT_RESOLVE_BENEATH | 1699 AT_EMPTY_PATH)) != 0) 1700 return (EINVAL); 1701 1702 NDPREINIT(&nd); 1703 do { 1704 bwillwrite(); 1705 NDINIT_ATRIGHTS(&nd, LOOKUP, AUDITVNODE1 | at2cnpflags(flag, 1706 AT_SYMLINK_FOLLOW | AT_RESOLVE_BENEATH | AT_EMPTY_PATH), 1707 segflag, path1, fd1, &cap_linkat_source_rights); 1708 if ((error = namei(&nd)) != 0) 1709 return (error); 1710 NDFREE_PNBUF(&nd); 1711 if ((nd.ni_resflags & NIRES_EMPTYPATH) != 0) { 1712 error = priv_check(td, PRIV_VFS_FHOPEN); 1713 if (error != 0) { 1714 vrele(nd.ni_vp); 1715 return (error); 1716 } 1717 } 1718 error = kern_linkat_vp(td, nd.ni_vp, fd2, path2, segflag); 1719 } while (error == EAGAIN || error == ERELOOKUP); 1720 return (error); 1721 } 1722 1723 static int 1724 kern_linkat_vp(struct thread *td, struct vnode *vp, int fd, const char *path, 1725 enum uio_seg segflag) 1726 { 1727 struct nameidata nd; 1728 struct mount *mp; 1729 int error; 1730 1731 if (vp->v_type == VDIR) { 1732 vrele(vp); 1733 return (EPERM); /* POSIX */ 1734 } 1735 if ((vn_irflag_read(vp) & (VIRF_NAMEDDIR | VIRF_NAMEDATTR)) != 0) { 1736 vrele(vp); 1737 return (EINVAL); 1738 } 1739 NDINIT_ATRIGHTS(&nd, CREATE, 1740 LOCKPARENT | AUDITVNODE2 | NOCACHE, segflag, path, fd, 1741 &cap_linkat_target_rights); 1742 if ((error = namei(&nd)) == 0) { 1743 if (nd.ni_vp != NULL) { 1744 NDFREE_PNBUF(&nd); 1745 if (nd.ni_dvp == nd.ni_vp) 1746 vrele(nd.ni_dvp); 1747 else 1748 vput(nd.ni_dvp); 1749 vrele(nd.ni_vp); 1750 vrele(vp); 1751 return (EEXIST); 1752 } else if (nd.ni_dvp->v_mount != vp->v_mount) { 1753 /* 1754 * Cross-device link. No need to recheck 1755 * vp->v_type, since it cannot change, except 1756 * to VBAD. 1757 */ 1758 NDFREE_PNBUF(&nd); 1759 vput(nd.ni_dvp); 1760 vrele(vp); 1761 return (EXDEV); 1762 } else if (vn_lock(vp, LK_EXCLUSIVE) == 0) { 1763 error = can_hardlink(vp, td->td_ucred); 1764 #ifdef MAC 1765 if (error == 0) 1766 error = mac_vnode_check_link(td->td_ucred, 1767 nd.ni_dvp, vp, &nd.ni_cnd); 1768 #endif 1769 if (error != 0) { 1770 vput(vp); 1771 vput(nd.ni_dvp); 1772 NDFREE_PNBUF(&nd); 1773 return (error); 1774 } 1775 error = vn_start_write(vp, &mp, V_NOWAIT); 1776 if (error != 0) { 1777 vput(vp); 1778 vput(nd.ni_dvp); 1779 NDFREE_PNBUF(&nd); 1780 error = vn_start_write(NULL, &mp, 1781 V_XSLEEP | V_PCATCH); 1782 if (error != 0) 1783 return (error); 1784 return (EAGAIN); 1785 } 1786 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 1787 VOP_VPUT_PAIR(nd.ni_dvp, &vp, true); 1788 vn_finished_write(mp); 1789 NDFREE_PNBUF(&nd); 1790 vp = NULL; 1791 } else { 1792 vput(nd.ni_dvp); 1793 NDFREE_PNBUF(&nd); 1794 vrele(vp); 1795 return (EAGAIN); 1796 } 1797 } 1798 if (vp != NULL) 1799 vrele(vp); 1800 return (error); 1801 } 1802 1803 /* 1804 * Make a symbolic link. 1805 */ 1806 #ifndef _SYS_SYSPROTO_H_ 1807 struct symlink_args { 1808 char *path; 1809 char *link; 1810 }; 1811 #endif 1812 int 1813 sys_symlink(struct thread *td, struct symlink_args *uap) 1814 { 1815 1816 return (kern_symlinkat(td, uap->path, AT_FDCWD, uap->link, 1817 UIO_USERSPACE)); 1818 } 1819 1820 #ifndef _SYS_SYSPROTO_H_ 1821 struct symlinkat_args { 1822 char *path; 1823 int fd; 1824 char *path2; 1825 }; 1826 #endif 1827 int 1828 sys_symlinkat(struct thread *td, struct symlinkat_args *uap) 1829 { 1830 1831 return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2, 1832 UIO_USERSPACE)); 1833 } 1834 1835 int 1836 kern_symlinkat(struct thread *td, const char *path1, int fd, const char *path2, 1837 enum uio_seg segflg) 1838 { 1839 struct mount *mp; 1840 struct vattr vattr; 1841 const char *syspath; 1842 char *tmppath; 1843 struct nameidata nd; 1844 int error; 1845 1846 if (segflg == UIO_SYSSPACE) { 1847 syspath = path1; 1848 } else { 1849 tmppath = uma_zalloc(namei_zone, M_WAITOK); 1850 if ((error = copyinstr(path1, tmppath, MAXPATHLEN, NULL)) != 0) 1851 goto out; 1852 syspath = tmppath; 1853 } 1854 AUDIT_ARG_TEXT(syspath); 1855 NDPREINIT(&nd); 1856 restart: 1857 bwillwrite(); 1858 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | AUDITVNODE1 | NOCACHE, segflg, 1859 path2, fd, &cap_symlinkat_rights); 1860 if ((error = namei(&nd)) != 0) 1861 goto out; 1862 if (nd.ni_vp) { 1863 NDFREE_PNBUF(&nd); 1864 if (nd.ni_vp == nd.ni_dvp) 1865 vrele(nd.ni_dvp); 1866 else 1867 vput(nd.ni_dvp); 1868 vrele(nd.ni_vp); 1869 nd.ni_vp = NULL; 1870 error = EEXIST; 1871 goto out; 1872 } 1873 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1874 NDFREE_PNBUF(&nd); 1875 vput(nd.ni_dvp); 1876 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH)) != 0) 1877 goto out; 1878 goto restart; 1879 } 1880 if ((vn_irflag_read(nd.ni_dvp) & VIRF_NAMEDDIR) != 0) { 1881 error = EINVAL; 1882 goto out; 1883 } 1884 VATTR_NULL(&vattr); 1885 vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_pd->pd_cmask; 1886 #ifdef MAC 1887 vattr.va_type = VLNK; 1888 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1889 &vattr); 1890 if (error != 0) 1891 goto out2; 1892 #endif 1893 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath); 1894 #ifdef MAC 1895 out2: 1896 #endif 1897 VOP_VPUT_PAIR(nd.ni_dvp, error == 0 ? &nd.ni_vp : NULL, true); 1898 vn_finished_write(mp); 1899 NDFREE_PNBUF(&nd); 1900 if (error == ERELOOKUP) 1901 goto restart; 1902 out: 1903 if (segflg != UIO_SYSSPACE) 1904 uma_zfree(namei_zone, tmppath); 1905 return (error); 1906 } 1907 1908 /* 1909 * Delete a whiteout from the filesystem. 1910 */ 1911 #ifndef _SYS_SYSPROTO_H_ 1912 struct undelete_args { 1913 char *path; 1914 }; 1915 #endif 1916 int 1917 sys_undelete(struct thread *td, struct undelete_args *uap) 1918 { 1919 struct mount *mp; 1920 struct nameidata nd; 1921 int error; 1922 1923 NDPREINIT(&nd); 1924 restart: 1925 bwillwrite(); 1926 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | AUDITVNODE1, 1927 UIO_USERSPACE, uap->path); 1928 error = namei(&nd); 1929 if (error != 0) 1930 return (error); 1931 1932 if (nd.ni_vp != NULL || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 1933 NDFREE_PNBUF(&nd); 1934 if (nd.ni_vp == nd.ni_dvp) 1935 vrele(nd.ni_dvp); 1936 else 1937 vput(nd.ni_dvp); 1938 if (nd.ni_vp) 1939 vrele(nd.ni_vp); 1940 return (EEXIST); 1941 } 1942 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1943 NDFREE_PNBUF(&nd); 1944 vput(nd.ni_dvp); 1945 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH)) != 0) 1946 return (error); 1947 goto restart; 1948 } 1949 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE); 1950 NDFREE_PNBUF(&nd); 1951 vput(nd.ni_dvp); 1952 vn_finished_write(mp); 1953 if (error == ERELOOKUP) 1954 goto restart; 1955 return (error); 1956 } 1957 1958 /* 1959 * Delete a name from the filesystem. 1960 */ 1961 #ifndef _SYS_SYSPROTO_H_ 1962 struct unlink_args { 1963 char *path; 1964 }; 1965 #endif 1966 int 1967 sys_unlink(struct thread *td, struct unlink_args *uap) 1968 { 1969 1970 return (kern_funlinkat(td, AT_FDCWD, uap->path, FD_NONE, UIO_USERSPACE, 1971 0, 0)); 1972 } 1973 1974 static int 1975 kern_funlinkat_ex(struct thread *td, int dfd, const char *path, int fd, 1976 int flag, enum uio_seg pathseg, ino_t oldinum) 1977 { 1978 1979 if ((flag & ~(AT_REMOVEDIR | AT_RESOLVE_BENEATH)) != 0) 1980 return (EINVAL); 1981 1982 if ((flag & AT_REMOVEDIR) != 0) 1983 return (kern_frmdirat(td, dfd, path, fd, UIO_USERSPACE, 0)); 1984 1985 return (kern_funlinkat(td, dfd, path, fd, UIO_USERSPACE, 0, 0)); 1986 } 1987 1988 #ifndef _SYS_SYSPROTO_H_ 1989 struct unlinkat_args { 1990 int fd; 1991 char *path; 1992 int flag; 1993 }; 1994 #endif 1995 int 1996 sys_unlinkat(struct thread *td, struct unlinkat_args *uap) 1997 { 1998 1999 return (kern_funlinkat_ex(td, uap->fd, uap->path, FD_NONE, uap->flag, 2000 UIO_USERSPACE, 0)); 2001 } 2002 2003 #ifndef _SYS_SYSPROTO_H_ 2004 struct funlinkat_args { 2005 int dfd; 2006 const char *path; 2007 int fd; 2008 int flag; 2009 }; 2010 #endif 2011 int 2012 sys_funlinkat(struct thread *td, struct funlinkat_args *uap) 2013 { 2014 2015 return (kern_funlinkat_ex(td, uap->dfd, uap->path, uap->fd, uap->flag, 2016 UIO_USERSPACE, 0)); 2017 } 2018 2019 int 2020 kern_funlinkat(struct thread *td, int dfd, const char *path, int fd, 2021 enum uio_seg pathseg, int flag, ino_t oldinum) 2022 { 2023 struct mount *mp; 2024 struct file *fp; 2025 struct vnode *vp; 2026 struct nameidata nd; 2027 struct stat sb; 2028 int error; 2029 2030 fp = NULL; 2031 if (fd != FD_NONE) { 2032 error = getvnode_path(td, fd, &cap_no_rights, NULL, &fp); 2033 if (error != 0) 2034 return (error); 2035 } 2036 2037 NDPREINIT(&nd); 2038 restart: 2039 bwillwrite(); 2040 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1 | 2041 at2cnpflags(flag, AT_RESOLVE_BENEATH), 2042 pathseg, path, dfd, &cap_unlinkat_rights); 2043 if ((error = namei(&nd)) != 0) { 2044 if (error == EINVAL) 2045 error = EPERM; 2046 goto fdout; 2047 } 2048 vp = nd.ni_vp; 2049 if (vp->v_type == VDIR && oldinum == 0) { 2050 error = EPERM; /* POSIX */ 2051 } else if (oldinum != 0 && 2052 ((error = VOP_STAT(vp, &sb, td->td_ucred, NOCRED)) == 0) && 2053 sb.st_ino != oldinum) { 2054 error = EIDRM; /* Identifier removed */ 2055 } else if (fp != NULL && fp->f_vnode != vp) { 2056 if (VN_IS_DOOMED(fp->f_vnode)) 2057 error = EBADF; 2058 else 2059 error = EDEADLK; 2060 } else { 2061 /* 2062 * The root of a mounted filesystem cannot be deleted. 2063 * 2064 * XXX: can this only be a VDIR case? 2065 */ 2066 if (vp->v_vflag & VV_ROOT) 2067 error = EBUSY; 2068 } 2069 if (error == 0) { 2070 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 2071 NDFREE_PNBUF(&nd); 2072 vput(nd.ni_dvp); 2073 if (vp == nd.ni_dvp) 2074 vrele(vp); 2075 else 2076 vput(vp); 2077 if ((error = vn_start_write(NULL, &mp, 2078 V_XSLEEP | V_PCATCH)) != 0) { 2079 goto fdout; 2080 } 2081 goto restart; 2082 } 2083 #ifdef MAC 2084 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 2085 &nd.ni_cnd); 2086 if (error != 0) 2087 goto out; 2088 #endif 2089 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 2090 #ifdef MAC 2091 out: 2092 #endif 2093 vn_finished_write(mp); 2094 } 2095 NDFREE_PNBUF(&nd); 2096 vput(nd.ni_dvp); 2097 if (vp == nd.ni_dvp) 2098 vrele(vp); 2099 else 2100 vput(vp); 2101 if (error == ERELOOKUP) 2102 goto restart; 2103 fdout: 2104 if (fp != NULL) 2105 fdrop(fp, td); 2106 return (error); 2107 } 2108 2109 /* 2110 * Reposition read/write file offset. 2111 */ 2112 #ifndef _SYS_SYSPROTO_H_ 2113 struct lseek_args { 2114 int fd; 2115 int pad; 2116 off_t offset; 2117 int whence; 2118 }; 2119 #endif 2120 int 2121 sys_lseek(struct thread *td, struct lseek_args *uap) 2122 { 2123 2124 return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); 2125 } 2126 2127 int 2128 kern_lseek(struct thread *td, int fd, off_t offset, int whence) 2129 { 2130 struct file *fp; 2131 int error; 2132 2133 AUDIT_ARG_FD(fd); 2134 error = fget(td, fd, &cap_seek_rights, &fp); 2135 if (error != 0) 2136 return (error); 2137 error = (fp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0 ? 2138 fo_seek(fp, offset, whence, td) : ESPIPE; 2139 fdrop(fp, td); 2140 return (error); 2141 } 2142 2143 #if defined(COMPAT_43) 2144 /* 2145 * Reposition read/write file offset. 2146 */ 2147 #ifndef _SYS_SYSPROTO_H_ 2148 struct olseek_args { 2149 int fd; 2150 long offset; 2151 int whence; 2152 }; 2153 #endif 2154 int 2155 olseek(struct thread *td, struct olseek_args *uap) 2156 { 2157 2158 return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); 2159 } 2160 #endif /* COMPAT_43 */ 2161 2162 #if defined(COMPAT_FREEBSD6) 2163 /* Version with the 'pad' argument */ 2164 int 2165 freebsd6_lseek(struct thread *td, struct freebsd6_lseek_args *uap) 2166 { 2167 2168 return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); 2169 } 2170 #endif 2171 2172 /* 2173 * Check access permissions using passed credentials. 2174 */ 2175 static int 2176 vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 2177 struct thread *td) 2178 { 2179 accmode_t accmode; 2180 int error; 2181 2182 /* Flags == 0 means only check for existence. */ 2183 if (user_flags == 0) 2184 return (0); 2185 2186 accmode = 0; 2187 if (user_flags & R_OK) 2188 accmode |= VREAD; 2189 if (user_flags & W_OK) 2190 accmode |= VWRITE; 2191 if (user_flags & X_OK) 2192 accmode |= VEXEC; 2193 #ifdef MAC 2194 error = mac_vnode_check_access(cred, vp, accmode); 2195 if (error != 0) 2196 return (error); 2197 #endif 2198 if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) 2199 error = VOP_ACCESS(vp, accmode, cred, td); 2200 return (error); 2201 } 2202 2203 /* 2204 * Check access permissions using "real" credentials. 2205 */ 2206 #ifndef _SYS_SYSPROTO_H_ 2207 struct access_args { 2208 char *path; 2209 int amode; 2210 }; 2211 #endif 2212 int 2213 sys_access(struct thread *td, struct access_args *uap) 2214 { 2215 2216 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2217 0, uap->amode)); 2218 } 2219 2220 #ifndef _SYS_SYSPROTO_H_ 2221 struct faccessat_args { 2222 int dirfd; 2223 char *path; 2224 int amode; 2225 int flag; 2226 } 2227 #endif 2228 int 2229 sys_faccessat(struct thread *td, struct faccessat_args *uap) 2230 { 2231 2232 return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 2233 uap->amode)); 2234 } 2235 2236 int 2237 kern_accessat(struct thread *td, int fd, const char *path, 2238 enum uio_seg pathseg, int flag, int amode) 2239 { 2240 struct ucred *cred, *usecred; 2241 struct vnode *vp; 2242 struct nameidata nd; 2243 int error; 2244 2245 if ((flag & ~(AT_EACCESS | AT_RESOLVE_BENEATH | AT_EMPTY_PATH | 2246 AT_SYMLINK_NOFOLLOW)) != 0) 2247 return (EINVAL); 2248 if (amode != F_OK && (amode & ~(R_OK | W_OK | X_OK)) != 0) 2249 return (EINVAL); 2250 2251 /* 2252 * Create and modify a temporary credential instead of one that 2253 * is potentially shared (if we need one). 2254 */ 2255 cred = td->td_ucred; 2256 if ((flag & AT_EACCESS) == 0 && 2257 ((cred->cr_uid != cred->cr_ruid || 2258 cred->cr_rgid != cred->cr_gid))) { 2259 usecred = crdup(cred); 2260 usecred->cr_uid = cred->cr_ruid; 2261 usecred->cr_gid = cred->cr_rgid; 2262 td->td_ucred = usecred; 2263 } else 2264 usecred = cred; 2265 AUDIT_ARG_VALUE(amode); 2266 NDINIT_ATRIGHTS(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | 2267 AUDITVNODE1 | at2cnpflags(flag, AT_RESOLVE_BENEATH | AT_SYMLINK_NOFOLLOW | 2268 AT_EMPTY_PATH), pathseg, path, fd, &cap_fstat_rights); 2269 if ((error = namei(&nd)) != 0) 2270 goto out; 2271 vp = nd.ni_vp; 2272 2273 error = vn_access(vp, amode, usecred, td); 2274 NDFREE_PNBUF(&nd); 2275 vput(vp); 2276 out: 2277 if (usecred != cred) { 2278 td->td_ucred = cred; 2279 crfree(usecred); 2280 } 2281 return (error); 2282 } 2283 2284 /* 2285 * Check access permissions using "effective" credentials. 2286 */ 2287 #ifndef _SYS_SYSPROTO_H_ 2288 struct eaccess_args { 2289 char *path; 2290 int amode; 2291 }; 2292 #endif 2293 int 2294 sys_eaccess(struct thread *td, struct eaccess_args *uap) 2295 { 2296 2297 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2298 AT_EACCESS, uap->amode)); 2299 } 2300 2301 #if defined(COMPAT_43) 2302 /* 2303 * Get file status; this version follows links. 2304 */ 2305 #ifndef _SYS_SYSPROTO_H_ 2306 struct ostat_args { 2307 char *path; 2308 struct ostat *ub; 2309 }; 2310 #endif 2311 int 2312 ostat(struct thread *td, struct ostat_args *uap) 2313 { 2314 struct stat sb; 2315 struct ostat osb; 2316 int error; 2317 2318 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, &sb); 2319 if (error != 0) 2320 return (error); 2321 cvtstat(&sb, &osb); 2322 return (copyout(&osb, uap->ub, sizeof (osb))); 2323 } 2324 2325 /* 2326 * Get file status; this version does not follow links. 2327 */ 2328 #ifndef _SYS_SYSPROTO_H_ 2329 struct olstat_args { 2330 char *path; 2331 struct ostat *ub; 2332 }; 2333 #endif 2334 int 2335 olstat(struct thread *td, struct olstat_args *uap) 2336 { 2337 struct stat sb; 2338 struct ostat osb; 2339 int error; 2340 2341 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2342 UIO_USERSPACE, &sb); 2343 if (error != 0) 2344 return (error); 2345 cvtstat(&sb, &osb); 2346 return (copyout(&osb, uap->ub, sizeof (osb))); 2347 } 2348 2349 /* 2350 * Convert from an old to a new stat structure. 2351 * XXX: many values are blindly truncated. 2352 */ 2353 void 2354 cvtstat(struct stat *st, struct ostat *ost) 2355 { 2356 2357 bzero(ost, sizeof(*ost)); 2358 ost->st_dev = st->st_dev; 2359 ost->st_ino = st->st_ino; 2360 ost->st_mode = st->st_mode; 2361 ost->st_nlink = st->st_nlink; 2362 ost->st_uid = st->st_uid; 2363 ost->st_gid = st->st_gid; 2364 ost->st_rdev = st->st_rdev; 2365 ost->st_size = MIN(st->st_size, INT32_MAX); 2366 ost->st_atim = st->st_atim; 2367 ost->st_mtim = st->st_mtim; 2368 ost->st_ctim = st->st_ctim; 2369 ost->st_blksize = st->st_blksize; 2370 ost->st_blocks = st->st_blocks; 2371 ost->st_flags = st->st_flags; 2372 ost->st_gen = st->st_gen; 2373 } 2374 #endif /* COMPAT_43 */ 2375 2376 #if defined(COMPAT_43) || defined(COMPAT_FREEBSD11) 2377 int ino64_trunc_error; 2378 SYSCTL_INT(_vfs, OID_AUTO, ino64_trunc_error, CTLFLAG_RW, 2379 &ino64_trunc_error, 0, 2380 "Error on truncation of device, file or inode number, or link count"); 2381 2382 int 2383 freebsd11_cvtstat(struct stat *st, struct freebsd11_stat *ost) 2384 { 2385 2386 ost->st_dev = st->st_dev; 2387 if (ost->st_dev != st->st_dev) { 2388 switch (ino64_trunc_error) { 2389 default: 2390 /* 2391 * Since dev_t is almost raw, don't clamp to the 2392 * maximum for case 2, but ignore the error. 2393 */ 2394 break; 2395 case 1: 2396 return (EOVERFLOW); 2397 } 2398 } 2399 ost->st_ino = st->st_ino; 2400 if (ost->st_ino != st->st_ino) { 2401 switch (ino64_trunc_error) { 2402 default: 2403 case 0: 2404 break; 2405 case 1: 2406 return (EOVERFLOW); 2407 case 2: 2408 ost->st_ino = UINT32_MAX; 2409 break; 2410 } 2411 } 2412 ost->st_mode = st->st_mode; 2413 ost->st_nlink = st->st_nlink; 2414 if (ost->st_nlink != st->st_nlink) { 2415 switch (ino64_trunc_error) { 2416 default: 2417 case 0: 2418 break; 2419 case 1: 2420 return (EOVERFLOW); 2421 case 2: 2422 ost->st_nlink = UINT16_MAX; 2423 break; 2424 } 2425 } 2426 ost->st_uid = st->st_uid; 2427 ost->st_gid = st->st_gid; 2428 ost->st_rdev = st->st_rdev; 2429 if (ost->st_rdev != st->st_rdev) { 2430 switch (ino64_trunc_error) { 2431 default: 2432 break; 2433 case 1: 2434 return (EOVERFLOW); 2435 } 2436 } 2437 ost->st_atim = st->st_atim; 2438 ost->st_mtim = st->st_mtim; 2439 ost->st_ctim = st->st_ctim; 2440 ost->st_size = st->st_size; 2441 ost->st_blocks = st->st_blocks; 2442 ost->st_blksize = st->st_blksize; 2443 ost->st_flags = st->st_flags; 2444 ost->st_gen = st->st_gen; 2445 ost->st_lspare = 0; 2446 ost->st_birthtim = st->st_birthtim; 2447 bzero((char *)&ost->st_birthtim + sizeof(ost->st_birthtim), 2448 sizeof(*ost) - offsetof(struct freebsd11_stat, 2449 st_birthtim) - sizeof(ost->st_birthtim)); 2450 return (0); 2451 } 2452 2453 int 2454 freebsd11_stat(struct thread *td, struct freebsd11_stat_args* uap) 2455 { 2456 struct stat sb; 2457 struct freebsd11_stat osb; 2458 int error; 2459 2460 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, &sb); 2461 if (error != 0) 2462 return (error); 2463 error = freebsd11_cvtstat(&sb, &osb); 2464 if (error == 0) 2465 error = copyout(&osb, uap->ub, sizeof(osb)); 2466 return (error); 2467 } 2468 2469 int 2470 freebsd11_lstat(struct thread *td, struct freebsd11_lstat_args* uap) 2471 { 2472 struct stat sb; 2473 struct freebsd11_stat osb; 2474 int error; 2475 2476 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2477 UIO_USERSPACE, &sb); 2478 if (error != 0) 2479 return (error); 2480 error = freebsd11_cvtstat(&sb, &osb); 2481 if (error == 0) 2482 error = copyout(&osb, uap->ub, sizeof(osb)); 2483 return (error); 2484 } 2485 2486 int 2487 freebsd11_fhstat(struct thread *td, struct freebsd11_fhstat_args* uap) 2488 { 2489 struct fhandle fh; 2490 struct stat sb; 2491 struct freebsd11_stat osb; 2492 int error; 2493 2494 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 2495 if (error != 0) 2496 return (error); 2497 error = kern_fhstat(td, fh, &sb); 2498 if (error != 0) 2499 return (error); 2500 error = freebsd11_cvtstat(&sb, &osb); 2501 if (error == 0) 2502 error = copyout(&osb, uap->sb, sizeof(osb)); 2503 return (error); 2504 } 2505 2506 int 2507 freebsd11_fstatat(struct thread *td, struct freebsd11_fstatat_args* uap) 2508 { 2509 struct stat sb; 2510 struct freebsd11_stat osb; 2511 int error; 2512 2513 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2514 UIO_USERSPACE, &sb); 2515 if (error != 0) 2516 return (error); 2517 error = freebsd11_cvtstat(&sb, &osb); 2518 if (error == 0) 2519 error = copyout(&osb, uap->buf, sizeof(osb)); 2520 return (error); 2521 } 2522 #endif /* COMPAT_FREEBSD11 */ 2523 2524 /* 2525 * Get file status 2526 */ 2527 #ifndef _SYS_SYSPROTO_H_ 2528 struct fstatat_args { 2529 int fd; 2530 char *path; 2531 struct stat *buf; 2532 int flag; 2533 } 2534 #endif 2535 int 2536 sys_fstatat(struct thread *td, struct fstatat_args *uap) 2537 { 2538 struct stat sb; 2539 int error; 2540 2541 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2542 UIO_USERSPACE, &sb); 2543 if (error == 0) 2544 error = copyout(&sb, uap->buf, sizeof (sb)); 2545 return (error); 2546 } 2547 2548 int 2549 kern_statat(struct thread *td, int flag, int fd, const char *path, 2550 enum uio_seg pathseg, struct stat *sbp) 2551 { 2552 struct nameidata nd; 2553 int error; 2554 2555 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | 2556 AT_EMPTY_PATH)) != 0) 2557 return (EINVAL); 2558 2559 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_RESOLVE_BENEATH | 2560 AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH) | LOCKSHARED | LOCKLEAF | 2561 AUDITVNODE1, pathseg, path, fd, &cap_fstat_rights); 2562 2563 if ((error = namei(&nd)) != 0) { 2564 if (error == ENOTDIR && 2565 (nd.ni_resflags & NIRES_EMPTYPATH) != 0) 2566 error = kern_fstat(td, fd, sbp); 2567 return (error); 2568 } 2569 error = VOP_STAT(nd.ni_vp, sbp, td->td_ucred, NOCRED); 2570 NDFREE_PNBUF(&nd); 2571 vput(nd.ni_vp); 2572 #ifdef __STAT_TIME_T_EXT 2573 sbp->st_atim_ext = 0; 2574 sbp->st_mtim_ext = 0; 2575 sbp->st_ctim_ext = 0; 2576 sbp->st_btim_ext = 0; 2577 #endif 2578 #ifdef KTRACE 2579 if (KTRPOINT(td, KTR_STRUCT)) 2580 ktrstat_error(sbp, error); 2581 #endif 2582 return (error); 2583 } 2584 2585 #if defined(COMPAT_FREEBSD11) 2586 /* 2587 * Implementation of the NetBSD [l]stat() functions. 2588 */ 2589 int 2590 freebsd11_cvtnstat(struct stat *sb, struct nstat *nsb) 2591 { 2592 struct freebsd11_stat sb11; 2593 int error; 2594 2595 error = freebsd11_cvtstat(sb, &sb11); 2596 if (error != 0) 2597 return (error); 2598 2599 bzero(nsb, sizeof(*nsb)); 2600 CP(sb11, *nsb, st_dev); 2601 CP(sb11, *nsb, st_ino); 2602 CP(sb11, *nsb, st_mode); 2603 CP(sb11, *nsb, st_nlink); 2604 CP(sb11, *nsb, st_uid); 2605 CP(sb11, *nsb, st_gid); 2606 CP(sb11, *nsb, st_rdev); 2607 CP(sb11, *nsb, st_atim); 2608 CP(sb11, *nsb, st_mtim); 2609 CP(sb11, *nsb, st_ctim); 2610 CP(sb11, *nsb, st_size); 2611 CP(sb11, *nsb, st_blocks); 2612 CP(sb11, *nsb, st_blksize); 2613 CP(sb11, *nsb, st_flags); 2614 CP(sb11, *nsb, st_gen); 2615 CP(sb11, *nsb, st_birthtim); 2616 return (0); 2617 } 2618 2619 #ifndef _SYS_SYSPROTO_H_ 2620 struct freebsd11_nstat_args { 2621 char *path; 2622 struct nstat *ub; 2623 }; 2624 #endif 2625 int 2626 freebsd11_nstat(struct thread *td, struct freebsd11_nstat_args *uap) 2627 { 2628 struct stat sb; 2629 struct nstat nsb; 2630 int error; 2631 2632 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, &sb); 2633 if (error != 0) 2634 return (error); 2635 error = freebsd11_cvtnstat(&sb, &nsb); 2636 if (error == 0) 2637 error = copyout(&nsb, uap->ub, sizeof (nsb)); 2638 return (error); 2639 } 2640 2641 /* 2642 * NetBSD lstat. Get file status; this version does not follow links. 2643 */ 2644 #ifndef _SYS_SYSPROTO_H_ 2645 struct freebsd11_nlstat_args { 2646 char *path; 2647 struct nstat *ub; 2648 }; 2649 #endif 2650 int 2651 freebsd11_nlstat(struct thread *td, struct freebsd11_nlstat_args *uap) 2652 { 2653 struct stat sb; 2654 struct nstat nsb; 2655 int error; 2656 2657 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2658 UIO_USERSPACE, &sb); 2659 if (error != 0) 2660 return (error); 2661 error = freebsd11_cvtnstat(&sb, &nsb); 2662 if (error == 0) 2663 error = copyout(&nsb, uap->ub, sizeof (nsb)); 2664 return (error); 2665 } 2666 #endif /* COMPAT_FREEBSD11 */ 2667 2668 /* 2669 * Get configurable pathname variables. 2670 */ 2671 #ifndef _SYS_SYSPROTO_H_ 2672 struct pathconf_args { 2673 char *path; 2674 int name; 2675 }; 2676 #endif 2677 int 2678 sys_pathconf(struct thread *td, struct pathconf_args *uap) 2679 { 2680 long value; 2681 int error; 2682 2683 error = kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW, 2684 &value); 2685 if (error == 0) 2686 td->td_retval[0] = value; 2687 return (error); 2688 } 2689 2690 #ifndef _SYS_SYSPROTO_H_ 2691 struct lpathconf_args { 2692 char *path; 2693 int name; 2694 }; 2695 #endif 2696 int 2697 sys_lpathconf(struct thread *td, struct lpathconf_args *uap) 2698 { 2699 long value; 2700 int error; 2701 2702 error = kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, 2703 NOFOLLOW, &value); 2704 if (error == 0) 2705 td->td_retval[0] = value; 2706 return (error); 2707 } 2708 2709 int 2710 kern_pathconf(struct thread *td, const char *path, enum uio_seg pathseg, 2711 int name, u_long flags, long *valuep) 2712 { 2713 struct nameidata nd; 2714 int error; 2715 2716 NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | AUDITVNODE1 | flags, 2717 pathseg, path); 2718 if ((error = namei(&nd)) != 0) 2719 return (error); 2720 NDFREE_PNBUF(&nd); 2721 2722 error = VOP_PATHCONF(nd.ni_vp, name, valuep); 2723 vput(nd.ni_vp); 2724 return (error); 2725 } 2726 2727 /* 2728 * Return target name of a symbolic link. 2729 */ 2730 #ifndef _SYS_SYSPROTO_H_ 2731 struct readlink_args { 2732 char *path; 2733 char *buf; 2734 size_t count; 2735 }; 2736 #endif 2737 int 2738 sys_readlink(struct thread *td, struct readlink_args *uap) 2739 { 2740 2741 return (kern_readlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2742 uap->buf, UIO_USERSPACE, uap->count)); 2743 } 2744 #ifndef _SYS_SYSPROTO_H_ 2745 struct readlinkat_args { 2746 int fd; 2747 char *path; 2748 char *buf; 2749 size_t bufsize; 2750 }; 2751 #endif 2752 int 2753 sys_readlinkat(struct thread *td, struct readlinkat_args *uap) 2754 { 2755 2756 return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE, 2757 uap->buf, UIO_USERSPACE, uap->bufsize)); 2758 } 2759 2760 int 2761 kern_readlinkat(struct thread *td, int fd, const char *path, 2762 enum uio_seg pathseg, char *buf, enum uio_seg bufseg, size_t count) 2763 { 2764 struct vnode *vp; 2765 struct nameidata nd; 2766 int error; 2767 2768 if (count > IOSIZE_MAX) 2769 return (EINVAL); 2770 2771 NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1 | 2772 EMPTYPATH, pathseg, path, fd); 2773 2774 if ((error = namei(&nd)) != 0) 2775 return (error); 2776 NDFREE_PNBUF(&nd); 2777 vp = nd.ni_vp; 2778 2779 error = kern_readlink_vp(vp, buf, bufseg, count, td); 2780 vput(vp); 2781 2782 return (error); 2783 } 2784 2785 /* 2786 * Helper function to readlink from a vnode 2787 */ 2788 static int 2789 kern_readlink_vp(struct vnode *vp, char *buf, enum uio_seg bufseg, size_t count, 2790 struct thread *td) 2791 { 2792 struct iovec aiov; 2793 struct uio auio; 2794 int error; 2795 2796 ASSERT_VOP_LOCKED(vp, "kern_readlink_vp(): vp not locked"); 2797 #ifdef MAC 2798 error = mac_vnode_check_readlink(td->td_ucred, vp); 2799 if (error != 0) 2800 return (error); 2801 #endif 2802 if (vp->v_type != VLNK && (vp->v_vflag & VV_READLINK) == 0) 2803 return (EINVAL); 2804 2805 aiov.iov_base = buf; 2806 aiov.iov_len = count; 2807 auio.uio_iov = &aiov; 2808 auio.uio_iovcnt = 1; 2809 auio.uio_offset = 0; 2810 auio.uio_rw = UIO_READ; 2811 auio.uio_segflg = bufseg; 2812 auio.uio_td = td; 2813 auio.uio_resid = count; 2814 error = VOP_READLINK(vp, &auio, td->td_ucred); 2815 td->td_retval[0] = count - auio.uio_resid; 2816 return (error); 2817 } 2818 2819 /* 2820 * Common implementation code for chflags() and fchflags(). 2821 */ 2822 static int 2823 setfflags(struct thread *td, struct vnode *vp, u_long flags) 2824 { 2825 struct mount *mp; 2826 struct vattr vattr; 2827 int error; 2828 2829 /* We can't support the value matching VNOVAL. */ 2830 if (flags == VNOVAL) 2831 return (EOPNOTSUPP); 2832 2833 /* 2834 * Prevent non-root users from setting flags on devices. When 2835 * a device is reused, users can retain ownership of the device 2836 * if they are allowed to set flags and programs assume that 2837 * chown can't fail when done as root. 2838 */ 2839 if (VN_ISDEV(vp)) { 2840 error = priv_check(td, PRIV_VFS_CHFLAGS_DEV); 2841 if (error != 0) 2842 return (error); 2843 } 2844 2845 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) 2846 return (error); 2847 VATTR_NULL(&vattr); 2848 vattr.va_flags = flags; 2849 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2850 #ifdef MAC 2851 error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags); 2852 if (error == 0) 2853 #endif 2854 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 2855 VOP_UNLOCK(vp); 2856 vn_finished_write(mp); 2857 return (error); 2858 } 2859 2860 /* 2861 * Change flags of a file given a path name. 2862 */ 2863 #ifndef _SYS_SYSPROTO_H_ 2864 struct chflags_args { 2865 const char *path; 2866 u_long flags; 2867 }; 2868 #endif 2869 int 2870 sys_chflags(struct thread *td, struct chflags_args *uap) 2871 { 2872 2873 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2874 uap->flags, 0)); 2875 } 2876 2877 #ifndef _SYS_SYSPROTO_H_ 2878 struct chflagsat_args { 2879 int fd; 2880 const char *path; 2881 u_long flags; 2882 int atflag; 2883 } 2884 #endif 2885 int 2886 sys_chflagsat(struct thread *td, struct chflagsat_args *uap) 2887 { 2888 2889 return (kern_chflagsat(td, uap->fd, uap->path, UIO_USERSPACE, 2890 uap->flags, uap->atflag)); 2891 } 2892 2893 /* 2894 * Same as chflags() but doesn't follow symlinks. 2895 */ 2896 #ifndef _SYS_SYSPROTO_H_ 2897 struct lchflags_args { 2898 const char *path; 2899 u_long flags; 2900 }; 2901 #endif 2902 int 2903 sys_lchflags(struct thread *td, struct lchflags_args *uap) 2904 { 2905 2906 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2907 uap->flags, AT_SYMLINK_NOFOLLOW)); 2908 } 2909 2910 static int 2911 kern_chflagsat(struct thread *td, int fd, const char *path, 2912 enum uio_seg pathseg, u_long flags, int atflag) 2913 { 2914 struct nameidata nd; 2915 int error; 2916 2917 if ((atflag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | 2918 AT_EMPTY_PATH)) != 0) 2919 return (EINVAL); 2920 2921 AUDIT_ARG_FFLAGS(flags); 2922 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(atflag, AT_SYMLINK_NOFOLLOW | 2923 AT_RESOLVE_BENEATH | AT_EMPTY_PATH) | AUDITVNODE1, pathseg, path, 2924 fd, &cap_fchflags_rights); 2925 if ((error = namei(&nd)) != 0) 2926 return (error); 2927 NDFREE_PNBUF(&nd); 2928 error = setfflags(td, nd.ni_vp, flags); 2929 vrele(nd.ni_vp); 2930 return (error); 2931 } 2932 2933 /* 2934 * Change flags of a file given a file descriptor. 2935 */ 2936 #ifndef _SYS_SYSPROTO_H_ 2937 struct fchflags_args { 2938 int fd; 2939 u_long flags; 2940 }; 2941 #endif 2942 int 2943 sys_fchflags(struct thread *td, struct fchflags_args *uap) 2944 { 2945 struct file *fp; 2946 int error; 2947 2948 AUDIT_ARG_FD(uap->fd); 2949 AUDIT_ARG_FFLAGS(uap->flags); 2950 error = getvnode(td, uap->fd, &cap_fchflags_rights, 2951 &fp); 2952 if (error != 0) 2953 return (error); 2954 #ifdef AUDIT 2955 if (AUDITING_TD(td)) { 2956 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 2957 AUDIT_ARG_VNODE1(fp->f_vnode); 2958 VOP_UNLOCK(fp->f_vnode); 2959 } 2960 #endif 2961 error = setfflags(td, fp->f_vnode, uap->flags); 2962 fdrop(fp, td); 2963 return (error); 2964 } 2965 2966 /* 2967 * Common implementation code for chmod(), lchmod() and fchmod(). 2968 */ 2969 int 2970 setfmode(struct thread *td, struct ucred *cred, struct vnode *vp, int mode) 2971 { 2972 struct mount *mp; 2973 struct vattr vattr; 2974 int error; 2975 2976 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) 2977 return (error); 2978 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2979 VATTR_NULL(&vattr); 2980 vattr.va_mode = mode & ALLPERMS; 2981 #ifdef MAC 2982 error = mac_vnode_check_setmode(cred, vp, vattr.va_mode); 2983 if (error == 0) 2984 #endif 2985 error = VOP_SETATTR(vp, &vattr, cred); 2986 VOP_UNLOCK(vp); 2987 vn_finished_write(mp); 2988 return (error); 2989 } 2990 2991 /* 2992 * Change mode of a file given path name. 2993 */ 2994 #ifndef _SYS_SYSPROTO_H_ 2995 struct chmod_args { 2996 char *path; 2997 int mode; 2998 }; 2999 #endif 3000 int 3001 sys_chmod(struct thread *td, struct chmod_args *uap) 3002 { 3003 3004 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3005 uap->mode, 0)); 3006 } 3007 3008 #ifndef _SYS_SYSPROTO_H_ 3009 struct fchmodat_args { 3010 int dirfd; 3011 char *path; 3012 mode_t mode; 3013 int flag; 3014 } 3015 #endif 3016 int 3017 sys_fchmodat(struct thread *td, struct fchmodat_args *uap) 3018 { 3019 3020 return (kern_fchmodat(td, uap->fd, uap->path, UIO_USERSPACE, 3021 uap->mode, uap->flag)); 3022 } 3023 3024 /* 3025 * Change mode of a file given path name (don't follow links.) 3026 */ 3027 #ifndef _SYS_SYSPROTO_H_ 3028 struct lchmod_args { 3029 char *path; 3030 int mode; 3031 }; 3032 #endif 3033 int 3034 sys_lchmod(struct thread *td, struct lchmod_args *uap) 3035 { 3036 3037 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3038 uap->mode, AT_SYMLINK_NOFOLLOW)); 3039 } 3040 3041 int 3042 kern_fchmodat(struct thread *td, int fd, const char *path, 3043 enum uio_seg pathseg, mode_t mode, int flag) 3044 { 3045 struct nameidata nd; 3046 int error; 3047 3048 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | 3049 AT_EMPTY_PATH)) != 0) 3050 return (EINVAL); 3051 3052 AUDIT_ARG_MODE(mode); 3053 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_SYMLINK_NOFOLLOW | 3054 AT_RESOLVE_BENEATH | AT_EMPTY_PATH) | AUDITVNODE1, pathseg, path, 3055 fd, &cap_fchmod_rights); 3056 if ((error = namei(&nd)) != 0) 3057 return (error); 3058 NDFREE_PNBUF(&nd); 3059 error = setfmode(td, td->td_ucred, nd.ni_vp, mode); 3060 vrele(nd.ni_vp); 3061 return (error); 3062 } 3063 3064 /* 3065 * Change mode of a file given a file descriptor. 3066 */ 3067 #ifndef _SYS_SYSPROTO_H_ 3068 struct fchmod_args { 3069 int fd; 3070 int mode; 3071 }; 3072 #endif 3073 int 3074 sys_fchmod(struct thread *td, struct fchmod_args *uap) 3075 { 3076 struct file *fp; 3077 int error; 3078 3079 AUDIT_ARG_FD(uap->fd); 3080 AUDIT_ARG_MODE(uap->mode); 3081 3082 error = fget(td, uap->fd, &cap_fchmod_rights, &fp); 3083 if (error != 0) 3084 return (error); 3085 error = fo_chmod(fp, uap->mode, td->td_ucred, td); 3086 fdrop(fp, td); 3087 return (error); 3088 } 3089 3090 /* 3091 * Common implementation for chown(), lchown(), and fchown() 3092 */ 3093 int 3094 setfown(struct thread *td, struct ucred *cred, struct vnode *vp, uid_t uid, 3095 gid_t gid) 3096 { 3097 struct mount *mp; 3098 struct vattr vattr; 3099 int error; 3100 3101 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) 3102 return (error); 3103 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3104 VATTR_NULL(&vattr); 3105 vattr.va_uid = uid; 3106 vattr.va_gid = gid; 3107 #ifdef MAC 3108 error = mac_vnode_check_setowner(cred, vp, vattr.va_uid, 3109 vattr.va_gid); 3110 if (error == 0) 3111 #endif 3112 error = VOP_SETATTR(vp, &vattr, cred); 3113 VOP_UNLOCK(vp); 3114 vn_finished_write(mp); 3115 return (error); 3116 } 3117 3118 /* 3119 * Set ownership given a path name. 3120 */ 3121 #ifndef _SYS_SYSPROTO_H_ 3122 struct chown_args { 3123 char *path; 3124 int uid; 3125 int gid; 3126 }; 3127 #endif 3128 int 3129 sys_chown(struct thread *td, struct chown_args *uap) 3130 { 3131 3132 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->uid, 3133 uap->gid, 0)); 3134 } 3135 3136 #ifndef _SYS_SYSPROTO_H_ 3137 struct fchownat_args { 3138 int fd; 3139 const char * path; 3140 uid_t uid; 3141 gid_t gid; 3142 int flag; 3143 }; 3144 #endif 3145 int 3146 sys_fchownat(struct thread *td, struct fchownat_args *uap) 3147 { 3148 3149 return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid, 3150 uap->gid, uap->flag)); 3151 } 3152 3153 int 3154 kern_fchownat(struct thread *td, int fd, const char *path, 3155 enum uio_seg pathseg, int uid, int gid, int flag) 3156 { 3157 struct nameidata nd; 3158 int error; 3159 3160 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | 3161 AT_EMPTY_PATH)) != 0) 3162 return (EINVAL); 3163 3164 AUDIT_ARG_OWNER(uid, gid); 3165 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_SYMLINK_NOFOLLOW | 3166 AT_RESOLVE_BENEATH | AT_EMPTY_PATH) | AUDITVNODE1, pathseg, path, 3167 fd, &cap_fchown_rights); 3168 3169 if ((error = namei(&nd)) != 0) 3170 return (error); 3171 NDFREE_PNBUF(&nd); 3172 error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid); 3173 vrele(nd.ni_vp); 3174 return (error); 3175 } 3176 3177 /* 3178 * Set ownership given a path name, do not cross symlinks. 3179 */ 3180 #ifndef _SYS_SYSPROTO_H_ 3181 struct lchown_args { 3182 char *path; 3183 int uid; 3184 int gid; 3185 }; 3186 #endif 3187 int 3188 sys_lchown(struct thread *td, struct lchown_args *uap) 3189 { 3190 3191 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3192 uap->uid, uap->gid, AT_SYMLINK_NOFOLLOW)); 3193 } 3194 3195 /* 3196 * Set ownership given a file descriptor. 3197 */ 3198 #ifndef _SYS_SYSPROTO_H_ 3199 struct fchown_args { 3200 int fd; 3201 int uid; 3202 int gid; 3203 }; 3204 #endif 3205 int 3206 sys_fchown(struct thread *td, struct fchown_args *uap) 3207 { 3208 struct file *fp; 3209 int error; 3210 3211 AUDIT_ARG_FD(uap->fd); 3212 AUDIT_ARG_OWNER(uap->uid, uap->gid); 3213 error = fget(td, uap->fd, &cap_fchown_rights, &fp); 3214 if (error != 0) 3215 return (error); 3216 error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td); 3217 fdrop(fp, td); 3218 return (error); 3219 } 3220 3221 /* 3222 * Common implementation code for utimes(), lutimes(), and futimes(). 3223 */ 3224 static int 3225 getutimes(const struct timeval *usrtvp, enum uio_seg tvpseg, 3226 struct timespec *tsp) 3227 { 3228 struct timeval tv[2]; 3229 const struct timeval *tvp; 3230 int error; 3231 3232 if (usrtvp == NULL) { 3233 vfs_timestamp(&tsp[0]); 3234 tsp[1] = tsp[0]; 3235 } else { 3236 if (tvpseg == UIO_SYSSPACE) { 3237 tvp = usrtvp; 3238 } else { 3239 if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0) 3240 return (error); 3241 tvp = tv; 3242 } 3243 3244 if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 || 3245 tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000) 3246 return (EINVAL); 3247 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 3248 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 3249 } 3250 return (0); 3251 } 3252 3253 /* 3254 * Common implementation code for futimens(), utimensat(). 3255 */ 3256 #define UTIMENS_NULL 0x1 3257 #define UTIMENS_EXIT 0x2 3258 static int 3259 getutimens(const struct timespec *usrtsp, enum uio_seg tspseg, 3260 struct timespec *tsp, int *retflags) 3261 { 3262 struct timespec tsnow; 3263 int error; 3264 3265 vfs_timestamp(&tsnow); 3266 *retflags = 0; 3267 if (usrtsp == NULL) { 3268 tsp[0] = tsnow; 3269 tsp[1] = tsnow; 3270 *retflags |= UTIMENS_NULL; 3271 return (0); 3272 } 3273 if (tspseg == UIO_SYSSPACE) { 3274 tsp[0] = usrtsp[0]; 3275 tsp[1] = usrtsp[1]; 3276 } else if ((error = copyin(usrtsp, tsp, sizeof(*tsp) * 2)) != 0) 3277 return (error); 3278 if (tsp[0].tv_nsec == UTIME_OMIT && tsp[1].tv_nsec == UTIME_OMIT) 3279 *retflags |= UTIMENS_EXIT; 3280 if (tsp[0].tv_nsec == UTIME_NOW && tsp[1].tv_nsec == UTIME_NOW) 3281 *retflags |= UTIMENS_NULL; 3282 if (tsp[0].tv_nsec == UTIME_OMIT) 3283 tsp[0].tv_sec = VNOVAL; 3284 else if (tsp[0].tv_nsec == UTIME_NOW) 3285 tsp[0] = tsnow; 3286 else if (tsp[0].tv_nsec < 0 || tsp[0].tv_nsec >= 1000000000L) 3287 return (EINVAL); 3288 if (tsp[1].tv_nsec == UTIME_OMIT) 3289 tsp[1].tv_sec = VNOVAL; 3290 else if (tsp[1].tv_nsec == UTIME_NOW) 3291 tsp[1] = tsnow; 3292 else if (tsp[1].tv_nsec < 0 || tsp[1].tv_nsec >= 1000000000L) 3293 return (EINVAL); 3294 3295 return (0); 3296 } 3297 3298 /* 3299 * Common implementation code for utimes(), lutimes(), futimes(), futimens(), 3300 * and utimensat(). 3301 */ 3302 static int 3303 setutimes(struct thread *td, struct vnode *vp, const struct timespec *ts, 3304 int numtimes, int nullflag) 3305 { 3306 struct mount *mp; 3307 struct vattr vattr; 3308 int error; 3309 bool setbirthtime; 3310 3311 setbirthtime = false; 3312 vattr.va_birthtime.tv_sec = VNOVAL; 3313 vattr.va_birthtime.tv_nsec = 0; 3314 3315 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) 3316 return (error); 3317 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3318 if (numtimes < 3 && VOP_GETATTR(vp, &vattr, td->td_ucred) == 0 && 3319 timespeccmp(&ts[1], &vattr.va_birthtime, < )) 3320 setbirthtime = true; 3321 VATTR_NULL(&vattr); 3322 vattr.va_atime = ts[0]; 3323 vattr.va_mtime = ts[1]; 3324 if (setbirthtime) 3325 vattr.va_birthtime = ts[1]; 3326 if (numtimes > 2) 3327 vattr.va_birthtime = ts[2]; 3328 if (nullflag) 3329 vattr.va_vaflags |= VA_UTIMES_NULL; 3330 #ifdef MAC 3331 error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime, 3332 vattr.va_mtime); 3333 #endif 3334 if (error == 0) 3335 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3336 VOP_UNLOCK(vp); 3337 vn_finished_write(mp); 3338 return (error); 3339 } 3340 3341 /* 3342 * Set the access and modification times of a file. 3343 */ 3344 #ifndef _SYS_SYSPROTO_H_ 3345 struct utimes_args { 3346 char *path; 3347 struct timeval *tptr; 3348 }; 3349 #endif 3350 int 3351 sys_utimes(struct thread *td, struct utimes_args *uap) 3352 { 3353 3354 return (kern_utimesat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3355 uap->tptr, UIO_USERSPACE)); 3356 } 3357 3358 #ifndef _SYS_SYSPROTO_H_ 3359 struct futimesat_args { 3360 int fd; 3361 const char * path; 3362 const struct timeval * times; 3363 }; 3364 #endif 3365 int 3366 sys_futimesat(struct thread *td, struct futimesat_args *uap) 3367 { 3368 3369 return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE, 3370 uap->times, UIO_USERSPACE)); 3371 } 3372 3373 int 3374 kern_utimesat(struct thread *td, int fd, const char *path, 3375 enum uio_seg pathseg, const struct timeval *tptr, enum uio_seg tptrseg) 3376 { 3377 struct nameidata nd; 3378 struct timespec ts[2]; 3379 int error; 3380 3381 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3382 return (error); 3383 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 3384 &cap_futimes_rights); 3385 3386 if ((error = namei(&nd)) != 0) 3387 return (error); 3388 NDFREE_PNBUF(&nd); 3389 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3390 vrele(nd.ni_vp); 3391 return (error); 3392 } 3393 3394 /* 3395 * Set the access and modification times of a file. 3396 */ 3397 #ifndef _SYS_SYSPROTO_H_ 3398 struct lutimes_args { 3399 char *path; 3400 struct timeval *tptr; 3401 }; 3402 #endif 3403 int 3404 sys_lutimes(struct thread *td, struct lutimes_args *uap) 3405 { 3406 3407 return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr, 3408 UIO_USERSPACE)); 3409 } 3410 3411 int 3412 kern_lutimes(struct thread *td, const char *path, enum uio_seg pathseg, 3413 const struct timeval *tptr, enum uio_seg tptrseg) 3414 { 3415 struct timespec ts[2]; 3416 struct nameidata nd; 3417 int error; 3418 3419 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3420 return (error); 3421 NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, pathseg, path); 3422 if ((error = namei(&nd)) != 0) 3423 return (error); 3424 NDFREE_PNBUF(&nd); 3425 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3426 vrele(nd.ni_vp); 3427 return (error); 3428 } 3429 3430 /* 3431 * Set the access and modification times of a file. 3432 */ 3433 #ifndef _SYS_SYSPROTO_H_ 3434 struct futimes_args { 3435 int fd; 3436 struct timeval *tptr; 3437 }; 3438 #endif 3439 int 3440 sys_futimes(struct thread *td, struct futimes_args *uap) 3441 { 3442 3443 return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE)); 3444 } 3445 3446 int 3447 kern_futimes(struct thread *td, int fd, const struct timeval *tptr, 3448 enum uio_seg tptrseg) 3449 { 3450 struct timespec ts[2]; 3451 struct file *fp; 3452 int error; 3453 3454 AUDIT_ARG_FD(fd); 3455 error = getutimes(tptr, tptrseg, ts); 3456 if (error != 0) 3457 return (error); 3458 error = getvnode(td, fd, &cap_futimes_rights, &fp); 3459 if (error != 0) 3460 return (error); 3461 #ifdef AUDIT 3462 if (AUDITING_TD(td)) { 3463 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3464 AUDIT_ARG_VNODE1(fp->f_vnode); 3465 VOP_UNLOCK(fp->f_vnode); 3466 } 3467 #endif 3468 error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL); 3469 fdrop(fp, td); 3470 return (error); 3471 } 3472 3473 int 3474 sys_futimens(struct thread *td, struct futimens_args *uap) 3475 { 3476 3477 return (kern_futimens(td, uap->fd, uap->times, UIO_USERSPACE)); 3478 } 3479 3480 int 3481 kern_futimens(struct thread *td, int fd, const struct timespec *tptr, 3482 enum uio_seg tptrseg) 3483 { 3484 struct timespec ts[2]; 3485 struct file *fp; 3486 int error, flags; 3487 3488 AUDIT_ARG_FD(fd); 3489 error = getutimens(tptr, tptrseg, ts, &flags); 3490 if (error != 0) 3491 return (error); 3492 if (flags & UTIMENS_EXIT) 3493 return (0); 3494 error = getvnode(td, fd, &cap_futimes_rights, &fp); 3495 if (error != 0) 3496 return (error); 3497 #ifdef AUDIT 3498 if (AUDITING_TD(td)) { 3499 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3500 AUDIT_ARG_VNODE1(fp->f_vnode); 3501 VOP_UNLOCK(fp->f_vnode); 3502 } 3503 #endif 3504 error = setutimes(td, fp->f_vnode, ts, 2, flags & UTIMENS_NULL); 3505 fdrop(fp, td); 3506 return (error); 3507 } 3508 3509 int 3510 sys_utimensat(struct thread *td, struct utimensat_args *uap) 3511 { 3512 3513 return (kern_utimensat(td, uap->fd, uap->path, UIO_USERSPACE, 3514 uap->times, UIO_USERSPACE, uap->flag)); 3515 } 3516 3517 int 3518 kern_utimensat(struct thread *td, int fd, const char *path, 3519 enum uio_seg pathseg, const struct timespec *tptr, enum uio_seg tptrseg, 3520 int flag) 3521 { 3522 struct nameidata nd; 3523 struct timespec ts[2]; 3524 int error, flags; 3525 3526 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | 3527 AT_EMPTY_PATH)) != 0) 3528 return (EINVAL); 3529 3530 if ((error = getutimens(tptr, tptrseg, ts, &flags)) != 0) 3531 return (error); 3532 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_SYMLINK_NOFOLLOW | 3533 AT_RESOLVE_BENEATH | AT_EMPTY_PATH) | AUDITVNODE1, 3534 pathseg, path, fd, &cap_futimes_rights); 3535 if ((error = namei(&nd)) != 0) 3536 return (error); 3537 /* 3538 * We are allowed to call namei() regardless of 2xUTIME_OMIT. 3539 * POSIX states: 3540 * "If both tv_nsec fields are UTIME_OMIT... EACCESS may be detected." 3541 * "Search permission is denied by a component of the path prefix." 3542 */ 3543 NDFREE_PNBUF(&nd); 3544 if ((flags & UTIMENS_EXIT) == 0) 3545 error = setutimes(td, nd.ni_vp, ts, 2, flags & UTIMENS_NULL); 3546 vrele(nd.ni_vp); 3547 return (error); 3548 } 3549 3550 /* 3551 * Truncate a file given its path name. 3552 */ 3553 #ifndef _SYS_SYSPROTO_H_ 3554 struct truncate_args { 3555 char *path; 3556 int pad; 3557 off_t length; 3558 }; 3559 #endif 3560 int 3561 sys_truncate(struct thread *td, struct truncate_args *uap) 3562 { 3563 3564 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3565 } 3566 3567 int 3568 kern_truncate(struct thread *td, const char *path, enum uio_seg pathseg, 3569 off_t length) 3570 { 3571 struct mount *mp; 3572 struct vnode *vp; 3573 void *rl_cookie; 3574 struct nameidata nd; 3575 int error; 3576 3577 if (length < 0) 3578 return (EINVAL); 3579 NDPREINIT(&nd); 3580 retry: 3581 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path); 3582 if ((error = namei(&nd)) != 0) 3583 return (error); 3584 vp = nd.ni_vp; 3585 NDFREE_PNBUF(&nd); 3586 rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); 3587 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) { 3588 vn_rangelock_unlock(vp, rl_cookie); 3589 vrele(vp); 3590 return (error); 3591 } 3592 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3593 if (vp->v_type == VDIR) { 3594 error = EISDIR; 3595 goto out; 3596 } 3597 #ifdef MAC 3598 error = mac_vnode_check_write(td->td_ucred, NOCRED, vp); 3599 if (error != 0) 3600 goto out; 3601 #endif 3602 error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td); 3603 if (error != 0) 3604 goto out; 3605 3606 error = vn_truncate_locked(vp, length, false, td->td_ucred); 3607 out: 3608 VOP_UNLOCK(vp); 3609 vn_finished_write(mp); 3610 vn_rangelock_unlock(vp, rl_cookie); 3611 vrele(vp); 3612 if (error == ERELOOKUP) 3613 goto retry; 3614 return (error); 3615 } 3616 3617 #if defined(COMPAT_43) 3618 /* 3619 * Truncate a file given its path name. 3620 */ 3621 #ifndef _SYS_SYSPROTO_H_ 3622 struct otruncate_args { 3623 char *path; 3624 long length; 3625 }; 3626 #endif 3627 int 3628 otruncate(struct thread *td, struct otruncate_args *uap) 3629 { 3630 3631 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3632 } 3633 #endif /* COMPAT_43 */ 3634 3635 #if defined(COMPAT_FREEBSD6) 3636 /* Versions with the pad argument */ 3637 int 3638 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap) 3639 { 3640 3641 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3642 } 3643 3644 int 3645 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap) 3646 { 3647 3648 return (kern_ftruncate(td, uap->fd, uap->length)); 3649 } 3650 #endif 3651 3652 int 3653 kern_fsync(struct thread *td, int fd, bool fullsync) 3654 { 3655 struct vnode *vp; 3656 struct mount *mp; 3657 struct file *fp; 3658 int error; 3659 3660 AUDIT_ARG_FD(fd); 3661 error = getvnode(td, fd, &cap_fsync_rights, &fp); 3662 if (error != 0) 3663 return (error); 3664 vp = fp->f_vnode; 3665 #if 0 3666 if (!fullsync) 3667 /* XXXKIB: compete outstanding aio writes */; 3668 #endif 3669 retry: 3670 error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH); 3671 if (error != 0) 3672 goto drop; 3673 vn_lock(vp, vn_lktype_write(mp, vp) | LK_RETRY); 3674 AUDIT_ARG_VNODE1(vp); 3675 vnode_pager_clean_async(vp); 3676 error = fullsync ? VOP_FSYNC(vp, MNT_WAIT, td) : VOP_FDATASYNC(vp, td); 3677 VOP_UNLOCK(vp); 3678 vn_finished_write(mp); 3679 if (error == ERELOOKUP) 3680 goto retry; 3681 drop: 3682 fdrop(fp, td); 3683 return (error); 3684 } 3685 3686 /* 3687 * Sync an open file. 3688 */ 3689 #ifndef _SYS_SYSPROTO_H_ 3690 struct fsync_args { 3691 int fd; 3692 }; 3693 #endif 3694 int 3695 sys_fsync(struct thread *td, struct fsync_args *uap) 3696 { 3697 3698 return (kern_fsync(td, uap->fd, true)); 3699 } 3700 3701 int 3702 sys_fdatasync(struct thread *td, struct fdatasync_args *uap) 3703 { 3704 3705 return (kern_fsync(td, uap->fd, false)); 3706 } 3707 3708 /* 3709 * Rename files. Source and destination must either both be directories, or 3710 * both not be directories. If target is a directory, it must be empty. 3711 */ 3712 #ifndef _SYS_SYSPROTO_H_ 3713 struct rename_args { 3714 char *from; 3715 char *to; 3716 }; 3717 #endif 3718 int 3719 sys_rename(struct thread *td, struct rename_args *uap) 3720 { 3721 3722 return (kern_renameat(td, AT_FDCWD, uap->from, AT_FDCWD, 3723 uap->to, UIO_USERSPACE)); 3724 } 3725 3726 #ifndef _SYS_SYSPROTO_H_ 3727 struct renameat_args { 3728 int oldfd; 3729 char *old; 3730 int newfd; 3731 char *new; 3732 }; 3733 #endif 3734 int 3735 sys_renameat(struct thread *td, struct renameat_args *uap) 3736 { 3737 3738 return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new, 3739 UIO_USERSPACE)); 3740 } 3741 3742 #ifdef MAC 3743 static int 3744 kern_renameat_mac(struct thread *td, int oldfd, const char *old, int newfd, 3745 const char *new, enum uio_seg pathseg, struct nameidata *fromnd) 3746 { 3747 int error; 3748 3749 NDINIT_ATRIGHTS(fromnd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 3750 pathseg, old, oldfd, &cap_renameat_source_rights); 3751 if ((error = namei(fromnd)) != 0) 3752 return (error); 3753 error = mac_vnode_check_rename_from(td->td_ucred, fromnd->ni_dvp, 3754 fromnd->ni_vp, &fromnd->ni_cnd); 3755 VOP_UNLOCK(fromnd->ni_dvp); 3756 if (fromnd->ni_dvp != fromnd->ni_vp) 3757 VOP_UNLOCK(fromnd->ni_vp); 3758 if (error != 0) { 3759 NDFREE_PNBUF(fromnd); 3760 vrele(fromnd->ni_dvp); 3761 vrele(fromnd->ni_vp); 3762 } 3763 return (error); 3764 } 3765 #endif 3766 3767 int 3768 kern_renameat(struct thread *td, int oldfd, const char *old, int newfd, 3769 const char *new, enum uio_seg pathseg) 3770 { 3771 struct mount *mp, *tmp; 3772 struct vnode *tvp, *fvp, *tdvp; 3773 struct nameidata fromnd, tond; 3774 uint64_t tondflags; 3775 int error; 3776 short irflag; 3777 3778 again: 3779 tmp = mp = NULL; 3780 bwillwrite(); 3781 #ifdef MAC 3782 if (mac_vnode_check_rename_from_enabled()) { 3783 error = kern_renameat_mac(td, oldfd, old, newfd, new, pathseg, 3784 &fromnd); 3785 if (error != 0) 3786 return (error); 3787 } else { 3788 #endif 3789 NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | AUDITVNODE1, 3790 pathseg, old, oldfd, &cap_renameat_source_rights); 3791 if ((error = namei(&fromnd)) != 0) 3792 return (error); 3793 #ifdef MAC 3794 } 3795 #endif 3796 fvp = fromnd.ni_vp; 3797 tondflags = LOCKPARENT | LOCKLEAF | NOCACHE | AUDITVNODE2; 3798 if (fromnd.ni_vp->v_type == VDIR) 3799 tondflags |= WILLBEDIR; 3800 NDINIT_ATRIGHTS(&tond, RENAME, tondflags, pathseg, new, newfd, 3801 &cap_renameat_target_rights); 3802 if ((error = namei(&tond)) != 0) { 3803 /* Translate error code for rename("dir1", "dir2/."). */ 3804 if (error == EISDIR && fvp->v_type == VDIR) 3805 error = EINVAL; 3806 NDFREE_PNBUF(&fromnd); 3807 vrele(fromnd.ni_dvp); 3808 vrele(fvp); 3809 goto out1; 3810 } 3811 tdvp = tond.ni_dvp; 3812 tvp = tond.ni_vp; 3813 error = vn_start_write(fvp, &mp, V_NOWAIT); 3814 if (error != 0) { 3815 again1: 3816 NDFREE_PNBUF(&fromnd); 3817 NDFREE_PNBUF(&tond); 3818 if (tvp != NULL) 3819 vput(tvp); 3820 if (tdvp == tvp) 3821 vrele(tdvp); 3822 else 3823 vput(tdvp); 3824 vrele(fromnd.ni_dvp); 3825 vrele(fvp); 3826 if (tmp != NULL) { 3827 lockmgr(&tmp->mnt_renamelock, LK_EXCLUSIVE, NULL); 3828 lockmgr(&tmp->mnt_renamelock, LK_RELEASE, NULL); 3829 vfs_rel(tmp); 3830 tmp = NULL; 3831 } 3832 error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH); 3833 if (error != 0) 3834 return (error); 3835 goto again; 3836 } 3837 error = VOP_GETWRITEMOUNT(tdvp, &tmp); 3838 if (error != 0 || tmp == NULL) 3839 goto again1; 3840 error = lockmgr(&tmp->mnt_renamelock, LK_EXCLUSIVE | LK_NOWAIT, NULL); 3841 if (error != 0) { 3842 vn_finished_write(mp); 3843 goto again1; 3844 } 3845 irflag = vn_irflag_read(fvp); 3846 if (((irflag & VIRF_NAMEDATTR) != 0 && tdvp != fromnd.ni_dvp) || 3847 (irflag & VIRF_NAMEDDIR) != 0) { 3848 error = EINVAL; 3849 goto out; 3850 } 3851 if (tvp != NULL) { 3852 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3853 error = ENOTDIR; 3854 goto out; 3855 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3856 error = EISDIR; 3857 goto out; 3858 } 3859 #ifdef CAPABILITIES 3860 if (newfd != AT_FDCWD && (tond.ni_resflags & NIRES_ABS) == 0) { 3861 /* 3862 * If the target already exists we require CAP_UNLINKAT 3863 * from 'newfd', when newfd was used for the lookup. 3864 */ 3865 error = cap_check(&tond.ni_filecaps.fc_rights, 3866 &cap_unlinkat_rights); 3867 if (error != 0) 3868 goto out; 3869 } 3870 #endif 3871 } 3872 if (fvp == tdvp) { 3873 error = EINVAL; 3874 goto out; 3875 } 3876 /* 3877 * If the source is the same as the destination (that is, if they 3878 * are links to the same vnode), then there is nothing to do. 3879 */ 3880 if (fvp == tvp) 3881 error = ERESTART; 3882 #ifdef MAC 3883 else 3884 error = mac_vnode_check_rename_to(td->td_ucred, tdvp, 3885 tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd); 3886 #endif 3887 out: 3888 if (error == 0) { 3889 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3890 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); 3891 NDFREE_PNBUF(&fromnd); 3892 NDFREE_PNBUF(&tond); 3893 } else { 3894 NDFREE_PNBUF(&fromnd); 3895 NDFREE_PNBUF(&tond); 3896 if (tvp != NULL) 3897 vput(tvp); 3898 if (tdvp == tvp) 3899 vrele(tdvp); 3900 else 3901 vput(tdvp); 3902 vrele(fromnd.ni_dvp); 3903 vrele(fvp); 3904 } 3905 lockmgr(&tmp->mnt_renamelock, LK_RELEASE, 0); 3906 vfs_rel(tmp); 3907 vn_finished_write(mp); 3908 out1: 3909 if (error == ERESTART) 3910 return (0); 3911 if (error == ERELOOKUP) 3912 goto again; 3913 return (error); 3914 } 3915 3916 /* 3917 * Make a directory file. 3918 */ 3919 #ifndef _SYS_SYSPROTO_H_ 3920 struct mkdir_args { 3921 char *path; 3922 int mode; 3923 }; 3924 #endif 3925 int 3926 sys_mkdir(struct thread *td, struct mkdir_args *uap) 3927 { 3928 3929 return (kern_mkdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3930 uap->mode)); 3931 } 3932 3933 #ifndef _SYS_SYSPROTO_H_ 3934 struct mkdirat_args { 3935 int fd; 3936 char *path; 3937 mode_t mode; 3938 }; 3939 #endif 3940 int 3941 sys_mkdirat(struct thread *td, struct mkdirat_args *uap) 3942 { 3943 3944 return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode)); 3945 } 3946 3947 int 3948 kern_mkdirat(struct thread *td, int fd, const char *path, enum uio_seg segflg, 3949 int mode) 3950 { 3951 struct mount *mp; 3952 struct vattr vattr; 3953 struct nameidata nd; 3954 int error; 3955 3956 AUDIT_ARG_MODE(mode); 3957 NDPREINIT(&nd); 3958 restart: 3959 bwillwrite(); 3960 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | AUDITVNODE1 | 3961 NC_NOMAKEENTRY | NC_KEEPPOSENTRY | FAILIFEXISTS | WILLBEDIR, 3962 segflg, path, fd, &cap_mkdirat_rights); 3963 if ((error = namei(&nd)) != 0) 3964 return (error); 3965 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3966 NDFREE_PNBUF(&nd); 3967 vput(nd.ni_dvp); 3968 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH)) != 0) 3969 return (error); 3970 goto restart; 3971 } 3972 if ((vn_irflag_read(nd.ni_dvp) & VIRF_NAMEDDIR) != 0) { 3973 error = EINVAL; 3974 goto out; 3975 } 3976 VATTR_NULL(&vattr); 3977 vattr.va_type = VDIR; 3978 vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_pd->pd_cmask; 3979 #ifdef MAC 3980 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 3981 &vattr); 3982 if (error != 0) 3983 goto out; 3984 #endif 3985 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 3986 out: 3987 NDFREE_PNBUF(&nd); 3988 VOP_VPUT_PAIR(nd.ni_dvp, error == 0 ? &nd.ni_vp : NULL, true); 3989 vn_finished_write(mp); 3990 if (error == ERELOOKUP) 3991 goto restart; 3992 return (error); 3993 } 3994 3995 /* 3996 * Remove a directory file. 3997 */ 3998 #ifndef _SYS_SYSPROTO_H_ 3999 struct rmdir_args { 4000 char *path; 4001 }; 4002 #endif 4003 int 4004 sys_rmdir(struct thread *td, struct rmdir_args *uap) 4005 { 4006 4007 return (kern_frmdirat(td, AT_FDCWD, uap->path, FD_NONE, UIO_USERSPACE, 4008 0)); 4009 } 4010 4011 int 4012 kern_frmdirat(struct thread *td, int dfd, const char *path, int fd, 4013 enum uio_seg pathseg, int flag) 4014 { 4015 struct mount *mp; 4016 struct vnode *vp; 4017 struct file *fp; 4018 struct nameidata nd; 4019 cap_rights_t rights; 4020 int error; 4021 4022 fp = NULL; 4023 if (fd != FD_NONE) { 4024 error = getvnode(td, fd, cap_rights_init_one(&rights, 4025 CAP_LOOKUP), &fp); 4026 if (error != 0) 4027 return (error); 4028 } 4029 4030 NDPREINIT(&nd); 4031 restart: 4032 bwillwrite(); 4033 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1 | 4034 at2cnpflags(flag, AT_RESOLVE_BENEATH), 4035 pathseg, path, dfd, &cap_unlinkat_rights); 4036 if ((error = namei(&nd)) != 0) 4037 goto fdout; 4038 vp = nd.ni_vp; 4039 if (vp->v_type != VDIR) { 4040 error = ENOTDIR; 4041 goto out; 4042 } 4043 /* 4044 * No rmdir "." please. 4045 */ 4046 if (nd.ni_dvp == vp) { 4047 error = EINVAL; 4048 goto out; 4049 } 4050 /* 4051 * The root of a mounted filesystem cannot be deleted. 4052 */ 4053 if (vp->v_vflag & VV_ROOT) { 4054 error = EBUSY; 4055 goto out; 4056 } 4057 4058 if (fp != NULL && fp->f_vnode != vp) { 4059 if (VN_IS_DOOMED(fp->f_vnode)) 4060 error = EBADF; 4061 else 4062 error = EDEADLK; 4063 goto out; 4064 } 4065 4066 #ifdef MAC 4067 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 4068 &nd.ni_cnd); 4069 if (error != 0) 4070 goto out; 4071 #endif 4072 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 4073 NDFREE_PNBUF(&nd); 4074 vput(vp); 4075 if (nd.ni_dvp == vp) 4076 vrele(nd.ni_dvp); 4077 else 4078 vput(nd.ni_dvp); 4079 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH)) != 0) 4080 goto fdout; 4081 goto restart; 4082 } 4083 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 4084 vn_finished_write(mp); 4085 out: 4086 NDFREE_PNBUF(&nd); 4087 vput(vp); 4088 if (nd.ni_dvp == vp) 4089 vrele(nd.ni_dvp); 4090 else 4091 vput(nd.ni_dvp); 4092 if (error == ERELOOKUP) 4093 goto restart; 4094 fdout: 4095 if (fp != NULL) 4096 fdrop(fp, td); 4097 return (error); 4098 } 4099 4100 #if defined(COMPAT_43) || defined(COMPAT_FREEBSD11) 4101 int 4102 freebsd11_kern_getdirentries(struct thread *td, int fd, char *ubuf, u_int count, 4103 long *basep, void (*func)(struct freebsd11_dirent *)) 4104 { 4105 struct freebsd11_dirent dstdp; 4106 struct dirent *dp, *edp; 4107 char *dirbuf; 4108 off_t base; 4109 ssize_t resid, ucount; 4110 int error; 4111 4112 /* XXX arbitrary sanity limit on `count'. */ 4113 count = min(count, 64 * 1024); 4114 4115 dirbuf = malloc(count, M_TEMP, M_WAITOK); 4116 4117 error = kern_getdirentries(td, fd, dirbuf, count, &base, &resid, 4118 UIO_SYSSPACE); 4119 if (error != 0) 4120 goto done; 4121 if (basep != NULL) 4122 *basep = base; 4123 4124 ucount = 0; 4125 for (dp = (struct dirent *)dirbuf, 4126 edp = (struct dirent *)&dirbuf[count - resid]; 4127 ucount < count && dp < edp; ) { 4128 if (dp->d_reclen == 0) 4129 break; 4130 MPASS(dp->d_reclen >= _GENERIC_DIRLEN(0)); 4131 if (dp->d_namlen >= sizeof(dstdp.d_name)) 4132 continue; 4133 dstdp.d_type = dp->d_type; 4134 dstdp.d_namlen = dp->d_namlen; 4135 dstdp.d_fileno = dp->d_fileno; /* truncate */ 4136 if (dstdp.d_fileno != dp->d_fileno) { 4137 switch (ino64_trunc_error) { 4138 default: 4139 case 0: 4140 break; 4141 case 1: 4142 error = EOVERFLOW; 4143 goto done; 4144 case 2: 4145 dstdp.d_fileno = UINT32_MAX; 4146 break; 4147 } 4148 } 4149 dstdp.d_reclen = sizeof(dstdp) - sizeof(dstdp.d_name) + 4150 ((dp->d_namlen + 1 + 3) &~ 3); 4151 bcopy(dp->d_name, dstdp.d_name, dstdp.d_namlen); 4152 bzero(dstdp.d_name + dstdp.d_namlen, 4153 dstdp.d_reclen - offsetof(struct freebsd11_dirent, d_name) - 4154 dstdp.d_namlen); 4155 MPASS(dstdp.d_reclen <= dp->d_reclen); 4156 MPASS(ucount + dstdp.d_reclen <= count); 4157 if (func != NULL) 4158 func(&dstdp); 4159 error = copyout(&dstdp, ubuf + ucount, dstdp.d_reclen); 4160 if (error != 0) 4161 break; 4162 dp = (struct dirent *)((char *)dp + dp->d_reclen); 4163 ucount += dstdp.d_reclen; 4164 } 4165 4166 done: 4167 free(dirbuf, M_TEMP); 4168 if (error == 0) 4169 td->td_retval[0] = ucount; 4170 return (error); 4171 } 4172 #endif /* COMPAT */ 4173 4174 #ifdef COMPAT_43 4175 static void 4176 ogetdirentries_cvt(struct freebsd11_dirent *dp) 4177 { 4178 #if (BYTE_ORDER == LITTLE_ENDIAN) 4179 /* 4180 * The expected low byte of dp->d_namlen is our dp->d_type. 4181 * The high MBZ byte of dp->d_namlen is our dp->d_namlen. 4182 */ 4183 dp->d_type = dp->d_namlen; 4184 dp->d_namlen = 0; 4185 #else 4186 /* 4187 * The dp->d_type is the high byte of the expected dp->d_namlen, 4188 * so must be zero'ed. 4189 */ 4190 dp->d_type = 0; 4191 #endif 4192 } 4193 4194 /* 4195 * Read a block of directory entries in a filesystem independent format. 4196 */ 4197 #ifndef _SYS_SYSPROTO_H_ 4198 struct ogetdirentries_args { 4199 int fd; 4200 char *buf; 4201 u_int count; 4202 long *basep; 4203 }; 4204 #endif 4205 int 4206 ogetdirentries(struct thread *td, struct ogetdirentries_args *uap) 4207 { 4208 long loff; 4209 int error; 4210 4211 error = kern_ogetdirentries(td, uap, &loff); 4212 if (error == 0) 4213 error = copyout(&loff, uap->basep, sizeof(long)); 4214 return (error); 4215 } 4216 4217 int 4218 kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap, 4219 long *ploff) 4220 { 4221 long base; 4222 int error; 4223 4224 /* XXX arbitrary sanity limit on `count'. */ 4225 if (uap->count > 64 * 1024) 4226 return (EINVAL); 4227 4228 error = freebsd11_kern_getdirentries(td, uap->fd, uap->buf, uap->count, 4229 &base, ogetdirentries_cvt); 4230 4231 if (error == 0 && uap->basep != NULL) 4232 error = copyout(&base, uap->basep, sizeof(long)); 4233 4234 return (error); 4235 } 4236 #endif /* COMPAT_43 */ 4237 4238 #if defined(COMPAT_FREEBSD11) 4239 #ifndef _SYS_SYSPROTO_H_ 4240 struct freebsd11_getdirentries_args { 4241 int fd; 4242 char *buf; 4243 u_int count; 4244 long *basep; 4245 }; 4246 #endif 4247 int 4248 freebsd11_getdirentries(struct thread *td, 4249 struct freebsd11_getdirentries_args *uap) 4250 { 4251 long base; 4252 int error; 4253 4254 error = freebsd11_kern_getdirentries(td, uap->fd, uap->buf, uap->count, 4255 &base, NULL); 4256 4257 if (error == 0 && uap->basep != NULL) 4258 error = copyout(&base, uap->basep, sizeof(long)); 4259 return (error); 4260 } 4261 4262 int 4263 freebsd11_getdents(struct thread *td, struct freebsd11_getdents_args *uap) 4264 { 4265 struct freebsd11_getdirentries_args ap; 4266 4267 ap.fd = uap->fd; 4268 ap.buf = uap->buf; 4269 ap.count = uap->count; 4270 ap.basep = NULL; 4271 return (freebsd11_getdirentries(td, &ap)); 4272 } 4273 #endif /* COMPAT_FREEBSD11 */ 4274 4275 /* 4276 * Read a block of directory entries in a filesystem independent format. 4277 */ 4278 int 4279 sys_getdirentries(struct thread *td, struct getdirentries_args *uap) 4280 { 4281 off_t base; 4282 int error; 4283 4284 error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base, 4285 NULL, UIO_USERSPACE); 4286 if (error != 0) 4287 return (error); 4288 if (uap->basep != NULL) 4289 error = copyout(&base, uap->basep, sizeof(off_t)); 4290 return (error); 4291 } 4292 4293 int 4294 kern_getdirentries(struct thread *td, int fd, char *buf, size_t count, 4295 off_t *basep, ssize_t *residp, enum uio_seg bufseg) 4296 { 4297 struct vnode *vp; 4298 struct file *fp; 4299 struct uio auio; 4300 struct iovec aiov; 4301 off_t loff; 4302 int error, eofflag; 4303 off_t foffset; 4304 4305 AUDIT_ARG_FD(fd); 4306 if (count > IOSIZE_MAX) 4307 return (EINVAL); 4308 auio.uio_resid = count; 4309 error = getvnode(td, fd, &cap_read_rights, &fp); 4310 if (error != 0) 4311 return (error); 4312 if ((fp->f_flag & FREAD) == 0) { 4313 fdrop(fp, td); 4314 return (EBADF); 4315 } 4316 vp = fp->f_vnode; 4317 foffset = foffset_lock(fp, 0); 4318 unionread: 4319 if (__predict_false((vp->v_vflag & VV_UNLINKED) != 0)) { 4320 error = ENOENT; 4321 goto fail; 4322 } 4323 aiov.iov_base = buf; 4324 aiov.iov_len = count; 4325 auio.uio_iov = &aiov; 4326 auio.uio_iovcnt = 1; 4327 auio.uio_rw = UIO_READ; 4328 auio.uio_segflg = bufseg; 4329 auio.uio_td = td; 4330 vn_lock(vp, LK_SHARED | LK_RETRY); 4331 /* 4332 * We want to return ENOTDIR for anything that is not VDIR, but 4333 * not for VBAD, and we can't check for VBAD while the vnode is 4334 * unlocked. 4335 */ 4336 if (vp->v_type != VDIR) { 4337 if (vp->v_type == VBAD) 4338 error = EBADF; 4339 else 4340 error = ENOTDIR; 4341 VOP_UNLOCK(vp); 4342 goto fail; 4343 } 4344 AUDIT_ARG_VNODE1(vp); 4345 loff = auio.uio_offset = foffset; 4346 #ifdef MAC 4347 error = mac_vnode_check_readdir(td->td_ucred, vp); 4348 if (error == 0) 4349 #endif 4350 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, 4351 NULL); 4352 foffset = auio.uio_offset; 4353 if (error != 0) { 4354 VOP_UNLOCK(vp); 4355 goto fail; 4356 } 4357 if (count == auio.uio_resid && 4358 (vp->v_vflag & VV_ROOT) && 4359 (vp->v_mount->mnt_flag & MNT_UNION)) { 4360 struct vnode *tvp = vp; 4361 4362 vp = vp->v_mount->mnt_vnodecovered; 4363 vref(vp); 4364 fp->f_vnode = vp; 4365 foffset = 0; 4366 vput(tvp); 4367 goto unionread; 4368 } 4369 VOP_UNLOCK(vp); 4370 *basep = loff; 4371 if (residp != NULL) 4372 *residp = auio.uio_resid; 4373 td->td_retval[0] = count - auio.uio_resid; 4374 fail: 4375 foffset_unlock(fp, foffset, 0); 4376 fdrop(fp, td); 4377 return (error); 4378 } 4379 4380 /* 4381 * Set the mode mask for creation of filesystem nodes. 4382 */ 4383 #ifndef _SYS_SYSPROTO_H_ 4384 struct umask_args { 4385 int newmask; 4386 }; 4387 #endif 4388 int 4389 sys_umask(struct thread *td, struct umask_args *uap) 4390 { 4391 struct pwddesc *pdp; 4392 4393 pdp = td->td_proc->p_pd; 4394 PWDDESC_XLOCK(pdp); 4395 td->td_retval[0] = pdp->pd_cmask; 4396 pdp->pd_cmask = uap->newmask & ALLPERMS; 4397 PWDDESC_XUNLOCK(pdp); 4398 return (0); 4399 } 4400 4401 /* 4402 * Void all references to file by ripping underlying filesystem away from 4403 * vnode. 4404 */ 4405 #ifndef _SYS_SYSPROTO_H_ 4406 struct revoke_args { 4407 char *path; 4408 }; 4409 #endif 4410 int 4411 sys_revoke(struct thread *td, struct revoke_args *uap) 4412 { 4413 struct vnode *vp; 4414 struct vattr vattr; 4415 struct nameidata nd; 4416 int error; 4417 4418 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4419 uap->path); 4420 if ((error = namei(&nd)) != 0) 4421 return (error); 4422 vp = nd.ni_vp; 4423 NDFREE_PNBUF(&nd); 4424 if (vp->v_type != VCHR || vp->v_rdev == NULL) { 4425 error = EINVAL; 4426 goto out; 4427 } 4428 #ifdef MAC 4429 error = mac_vnode_check_revoke(td->td_ucred, vp); 4430 if (error != 0) 4431 goto out; 4432 #endif 4433 error = VOP_GETATTR(vp, &vattr, td->td_ucred); 4434 if (error != 0) 4435 goto out; 4436 if (td->td_ucred->cr_uid != vattr.va_uid) { 4437 error = priv_check(td, PRIV_VFS_ADMIN); 4438 if (error != 0) 4439 goto out; 4440 } 4441 if (devfs_usecount(vp) > 0) 4442 VOP_REVOKE(vp, REVOKEALL); 4443 out: 4444 vput(vp); 4445 return (error); 4446 } 4447 4448 /* 4449 * This variant of getvnode() allows O_PATH files. Caller should 4450 * ensure that returned file and vnode are only used for compatible 4451 * semantics. 4452 */ 4453 int 4454 getvnode_path(struct thread *td, int fd, const cap_rights_t *rightsp, 4455 uint8_t *flagsp, struct file **fpp) 4456 { 4457 struct file *fp; 4458 int error; 4459 4460 error = fget_unlocked_flags(td, fd, rightsp, flagsp, &fp); 4461 if (error != 0) 4462 return (error); 4463 4464 /* 4465 * The file could be not of the vnode type, or it may be not 4466 * yet fully initialized, in which case the f_vnode pointer 4467 * may be set, but f_ops is still badfileops. E.g., 4468 * devfs_open() transiently create such situation to 4469 * facilitate csw d_fdopen(). 4470 * 4471 * Dupfdopen() handling in kern_openat() installs the 4472 * half-baked file into the process descriptor table, allowing 4473 * other thread to dereference it. Guard against the race by 4474 * checking f_ops. 4475 */ 4476 if (__predict_false(fp->f_vnode == NULL || fp->f_ops == &badfileops)) { 4477 fdrop(fp, td); 4478 *fpp = NULL; 4479 return (EINVAL); 4480 } 4481 4482 *fpp = fp; 4483 return (0); 4484 } 4485 4486 /* 4487 * Convert a user file descriptor to a kernel file entry and check 4488 * that, if it is a capability, the correct rights are present. 4489 * A reference on the file entry is held upon returning. 4490 */ 4491 int 4492 getvnode(struct thread *td, int fd, const cap_rights_t *rightsp, 4493 struct file **fpp) 4494 { 4495 int error; 4496 4497 error = getvnode_path(td, fd, rightsp, NULL, fpp); 4498 if (__predict_false(error != 0)) 4499 return (error); 4500 4501 /* 4502 * Filter out O_PATH file descriptors, most getvnode() callers 4503 * do not call fo_ methods. 4504 */ 4505 if (__predict_false((*fpp)->f_ops == &path_fileops)) { 4506 fdrop(*fpp, td); 4507 *fpp = NULL; 4508 error = EBADF; 4509 } 4510 4511 return (error); 4512 } 4513 4514 /* 4515 * Get an (NFS) file handle. 4516 */ 4517 #ifndef _SYS_SYSPROTO_H_ 4518 struct lgetfh_args { 4519 char *fname; 4520 fhandle_t *fhp; 4521 }; 4522 #endif 4523 int 4524 sys_lgetfh(struct thread *td, struct lgetfh_args *uap) 4525 { 4526 4527 return (kern_getfhat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->fname, 4528 UIO_USERSPACE, uap->fhp, UIO_USERSPACE)); 4529 } 4530 4531 #ifndef _SYS_SYSPROTO_H_ 4532 struct getfh_args { 4533 char *fname; 4534 fhandle_t *fhp; 4535 }; 4536 #endif 4537 int 4538 sys_getfh(struct thread *td, struct getfh_args *uap) 4539 { 4540 4541 return (kern_getfhat(td, 0, AT_FDCWD, uap->fname, UIO_USERSPACE, 4542 uap->fhp, UIO_USERSPACE)); 4543 } 4544 4545 /* 4546 * syscall for the rpc.lockd to use to translate an open descriptor into 4547 * a NFS file handle. 4548 * 4549 * warning: do not remove the priv_check() call or this becomes one giant 4550 * security hole. 4551 */ 4552 #ifndef _SYS_SYSPROTO_H_ 4553 struct getfhat_args { 4554 int fd; 4555 char *path; 4556 fhandle_t *fhp; 4557 int flags; 4558 }; 4559 #endif 4560 int 4561 sys_getfhat(struct thread *td, struct getfhat_args *uap) 4562 { 4563 4564 return (kern_getfhat(td, uap->flags, uap->fd, uap->path, UIO_USERSPACE, 4565 uap->fhp, UIO_USERSPACE)); 4566 } 4567 4568 int 4569 kern_getfhat(struct thread *td, int flags, int fd, const char *path, 4570 enum uio_seg pathseg, fhandle_t *fhp, enum uio_seg fhseg) 4571 { 4572 struct nameidata nd; 4573 fhandle_t fh; 4574 struct vnode *vp; 4575 int error; 4576 4577 if ((flags & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH)) != 0) 4578 return (EINVAL); 4579 error = priv_check(td, PRIV_VFS_GETFH); 4580 if (error != 0) 4581 return (error); 4582 NDINIT_AT(&nd, LOOKUP, at2cnpflags(flags, AT_SYMLINK_NOFOLLOW | 4583 AT_RESOLVE_BENEATH) | LOCKLEAF | AUDITVNODE1, pathseg, path, 4584 fd); 4585 error = namei(&nd); 4586 if (error != 0) 4587 return (error); 4588 NDFREE_PNBUF(&nd); 4589 vp = nd.ni_vp; 4590 bzero(&fh, sizeof(fh)); 4591 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4592 error = VOP_VPTOFH(vp, &fh.fh_fid); 4593 vput(vp); 4594 if (error == 0) { 4595 if (fhseg == UIO_USERSPACE) 4596 error = copyout(&fh, fhp, sizeof (fh)); 4597 else 4598 memcpy(fhp, &fh, sizeof(fh)); 4599 } 4600 return (error); 4601 } 4602 4603 #ifndef _SYS_SYSPROTO_H_ 4604 struct fhlink_args { 4605 fhandle_t *fhp; 4606 const char *to; 4607 }; 4608 #endif 4609 int 4610 sys_fhlink(struct thread *td, struct fhlink_args *uap) 4611 { 4612 4613 return (kern_fhlinkat(td, AT_FDCWD, uap->to, UIO_USERSPACE, uap->fhp)); 4614 } 4615 4616 #ifndef _SYS_SYSPROTO_H_ 4617 struct fhlinkat_args { 4618 fhandle_t *fhp; 4619 int tofd; 4620 const char *to; 4621 }; 4622 #endif 4623 int 4624 sys_fhlinkat(struct thread *td, struct fhlinkat_args *uap) 4625 { 4626 4627 return (kern_fhlinkat(td, uap->tofd, uap->to, UIO_USERSPACE, uap->fhp)); 4628 } 4629 4630 static int 4631 kern_fhlinkat(struct thread *td, int fd, const char *path, 4632 enum uio_seg pathseg, fhandle_t *fhp) 4633 { 4634 fhandle_t fh; 4635 struct mount *mp; 4636 struct vnode *vp; 4637 int error; 4638 4639 error = priv_check(td, PRIV_VFS_GETFH); 4640 if (error != 0) 4641 return (error); 4642 error = copyin(fhp, &fh, sizeof(fh)); 4643 if (error != 0) 4644 return (error); 4645 do { 4646 bwillwrite(); 4647 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4648 return (ESTALE); 4649 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_SHARED, &vp); 4650 vfs_unbusy(mp); 4651 if (error != 0) 4652 return (error); 4653 VOP_UNLOCK(vp); 4654 error = kern_linkat_vp(td, vp, fd, path, pathseg); 4655 } while (error == EAGAIN || error == ERELOOKUP); 4656 return (error); 4657 } 4658 4659 #ifndef _SYS_SYSPROTO_H_ 4660 struct fhreadlink_args { 4661 fhandle_t *fhp; 4662 char *buf; 4663 size_t bufsize; 4664 }; 4665 #endif 4666 int 4667 sys_fhreadlink(struct thread *td, struct fhreadlink_args *uap) 4668 { 4669 fhandle_t fh; 4670 struct mount *mp; 4671 struct vnode *vp; 4672 int error; 4673 4674 error = priv_check(td, PRIV_VFS_GETFH); 4675 if (error != 0) 4676 return (error); 4677 if (uap->bufsize > IOSIZE_MAX) 4678 return (EINVAL); 4679 error = copyin(uap->fhp, &fh, sizeof(fh)); 4680 if (error != 0) 4681 return (error); 4682 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4683 return (ESTALE); 4684 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_SHARED, &vp); 4685 vfs_unbusy(mp); 4686 if (error != 0) 4687 return (error); 4688 error = kern_readlink_vp(vp, uap->buf, UIO_USERSPACE, uap->bufsize, td); 4689 vput(vp); 4690 return (error); 4691 } 4692 4693 /* 4694 * syscall for the rpc.lockd to use to translate a NFS file handle into an 4695 * open descriptor. 4696 * 4697 * warning: do not remove the priv_check() call or this becomes one giant 4698 * security hole. 4699 */ 4700 #ifndef _SYS_SYSPROTO_H_ 4701 struct fhopen_args { 4702 const struct fhandle *u_fhp; 4703 int flags; 4704 }; 4705 #endif 4706 int 4707 sys_fhopen(struct thread *td, struct fhopen_args *uap) 4708 { 4709 return (kern_fhopen(td, uap->u_fhp, uap->flags)); 4710 } 4711 4712 int 4713 kern_fhopen(struct thread *td, const struct fhandle *u_fhp, int flags) 4714 { 4715 struct mount *mp; 4716 struct vnode *vp; 4717 struct fhandle fhp; 4718 struct file *fp; 4719 int error, indx; 4720 bool named_attr; 4721 4722 error = priv_check(td, PRIV_VFS_FHOPEN); 4723 if (error != 0) 4724 return (error); 4725 4726 indx = -1; 4727 if ((flags & O_CREAT) != 0) 4728 return (EINVAL); 4729 error = openflags(&flags); 4730 if (error != 0) 4731 return (error); 4732 error = copyin(u_fhp, &fhp, sizeof(fhp)); 4733 if (error != 0) 4734 return (error); 4735 /* find the mount point */ 4736 mp = vfs_busyfs(&fhp.fh_fsid); 4737 if (mp == NULL) 4738 return (ESTALE); 4739 /* now give me my vnode, it gets returned to me locked */ 4740 error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp); 4741 vfs_unbusy(mp); 4742 if (error != 0) 4743 return (error); 4744 4745 /* 4746 * Check to see if the file handle refers to a named attribute 4747 * directory or attribute. If it does, the O_NAMEDATTR flag 4748 * must have been specified. 4749 */ 4750 named_attr = (vn_irflag_read(vp) & 4751 (VIRF_NAMEDDIR | VIRF_NAMEDATTR)) != 0; 4752 if ((named_attr && (flags & O_NAMEDATTR) == 0) || 4753 (!named_attr && (flags & O_NAMEDATTR) != 0)) { 4754 vput(vp); 4755 return (ENOATTR); 4756 } 4757 4758 error = falloc_noinstall(td, &fp); 4759 if (error != 0) { 4760 vput(vp); 4761 return (error); 4762 } 4763 /* Set the flags early so the finit in devfs can pick them up. */ 4764 fp->f_flag = flags & FMASK; 4765 4766 #ifdef INVARIANTS 4767 td->td_dupfd = -1; 4768 #endif 4769 error = vn_open_vnode(vp, flags, td->td_ucred, td, fp); 4770 if (error != 0) { 4771 KASSERT(fp->f_ops == &badfileops, 4772 ("VOP_OPEN in fhopen() set f_ops")); 4773 KASSERT(td->td_dupfd < 0, 4774 ("fhopen() encountered fdopen()")); 4775 4776 vput(vp); 4777 goto bad; 4778 } 4779 #ifdef INVARIANTS 4780 td->td_dupfd = 0; 4781 #endif 4782 finit_open(fp, vp, flags); 4783 VOP_UNLOCK(vp); 4784 if ((flags & O_TRUNC) != 0) { 4785 error = fo_truncate(fp, 0, td->td_ucred, td); 4786 if (error != 0) 4787 goto bad; 4788 } 4789 4790 error = finstall(td, fp, &indx, flags, NULL); 4791 bad: 4792 fdrop(fp, td); 4793 td->td_retval[0] = indx; 4794 return (error); 4795 } 4796 4797 /* 4798 * Stat an (NFS) file handle. 4799 */ 4800 #ifndef _SYS_SYSPROTO_H_ 4801 struct fhstat_args { 4802 struct fhandle *u_fhp; 4803 struct stat *sb; 4804 }; 4805 #endif 4806 int 4807 sys_fhstat(struct thread *td, struct fhstat_args *uap) 4808 { 4809 struct stat sb; 4810 struct fhandle fh; 4811 int error; 4812 4813 error = copyin(uap->u_fhp, &fh, sizeof(fh)); 4814 if (error != 0) 4815 return (error); 4816 error = kern_fhstat(td, fh, &sb); 4817 if (error == 0) 4818 error = copyout(&sb, uap->sb, sizeof(sb)); 4819 return (error); 4820 } 4821 4822 int 4823 kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb) 4824 { 4825 struct mount *mp; 4826 struct vnode *vp; 4827 int error; 4828 4829 error = priv_check(td, PRIV_VFS_FHSTAT); 4830 if (error != 0) 4831 return (error); 4832 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4833 return (ESTALE); 4834 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4835 vfs_unbusy(mp); 4836 if (error != 0) 4837 return (error); 4838 error = VOP_STAT(vp, sb, td->td_ucred, NOCRED); 4839 vput(vp); 4840 return (error); 4841 } 4842 4843 /* 4844 * Implement fstatfs() for (NFS) file handles. 4845 */ 4846 #ifndef _SYS_SYSPROTO_H_ 4847 struct fhstatfs_args { 4848 struct fhandle *u_fhp; 4849 struct statfs *buf; 4850 }; 4851 #endif 4852 int 4853 sys_fhstatfs(struct thread *td, struct fhstatfs_args *uap) 4854 { 4855 struct statfs *sfp; 4856 fhandle_t fh; 4857 int error; 4858 4859 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4860 if (error != 0) 4861 return (error); 4862 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 4863 error = kern_fhstatfs(td, fh, sfp); 4864 if (error == 0) 4865 error = copyout(sfp, uap->buf, sizeof(*sfp)); 4866 free(sfp, M_STATFS); 4867 return (error); 4868 } 4869 4870 int 4871 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf) 4872 { 4873 struct mount *mp; 4874 struct vnode *vp; 4875 int error; 4876 4877 error = priv_check(td, PRIV_VFS_FHSTATFS); 4878 if (error != 0) 4879 return (error); 4880 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4881 return (ESTALE); 4882 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4883 if (error != 0) { 4884 vfs_unbusy(mp); 4885 return (error); 4886 } 4887 vput(vp); 4888 error = prison_canseemount(td->td_ucred, mp); 4889 if (error != 0) 4890 goto out; 4891 #ifdef MAC 4892 error = mac_mount_check_stat(td->td_ucred, mp); 4893 if (error != 0) 4894 goto out; 4895 #endif 4896 error = VFS_STATFS(mp, buf); 4897 out: 4898 vfs_unbusy(mp); 4899 return (error); 4900 } 4901 4902 /* 4903 * Unlike madvise(2), we do not make a best effort to remember every 4904 * possible caching hint. Instead, we remember the last setting with 4905 * the exception that we will allow POSIX_FADV_NORMAL to adjust the 4906 * region of any current setting. 4907 */ 4908 int 4909 kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len, 4910 int advice) 4911 { 4912 struct fadvise_info *fa, *new; 4913 struct file *fp; 4914 struct vnode *vp; 4915 off_t end; 4916 int error; 4917 4918 if (offset < 0 || len < 0 || offset > OFF_MAX - len) 4919 return (EINVAL); 4920 AUDIT_ARG_VALUE(advice); 4921 switch (advice) { 4922 case POSIX_FADV_SEQUENTIAL: 4923 case POSIX_FADV_RANDOM: 4924 case POSIX_FADV_NOREUSE: 4925 new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK); 4926 break; 4927 case POSIX_FADV_NORMAL: 4928 case POSIX_FADV_WILLNEED: 4929 case POSIX_FADV_DONTNEED: 4930 new = NULL; 4931 break; 4932 default: 4933 return (EINVAL); 4934 } 4935 /* XXX: CAP_POSIX_FADVISE? */ 4936 AUDIT_ARG_FD(fd); 4937 error = fget(td, fd, &cap_no_rights, &fp); 4938 if (error != 0) 4939 goto out; 4940 AUDIT_ARG_FILE(td->td_proc, fp); 4941 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { 4942 error = ESPIPE; 4943 goto out; 4944 } 4945 if (fp->f_type != DTYPE_VNODE) { 4946 error = ENODEV; 4947 goto out; 4948 } 4949 vp = fp->f_vnode; 4950 if (vp->v_type != VREG) { 4951 error = ENODEV; 4952 goto out; 4953 } 4954 if (len == 0) 4955 end = OFF_MAX; 4956 else 4957 end = offset + len - 1; 4958 switch (advice) { 4959 case POSIX_FADV_SEQUENTIAL: 4960 case POSIX_FADV_RANDOM: 4961 case POSIX_FADV_NOREUSE: 4962 /* 4963 * Try to merge any existing non-standard region with 4964 * this new region if possible, otherwise create a new 4965 * non-standard region for this request. 4966 */ 4967 mtx_pool_lock(mtxpool_sleep, fp); 4968 fa = fp->f_advice; 4969 if (fa != NULL && fa->fa_advice == advice && 4970 ((fa->fa_start <= end && fa->fa_end >= offset) || 4971 (end != OFF_MAX && fa->fa_start == end + 1) || 4972 (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) { 4973 if (offset < fa->fa_start) 4974 fa->fa_start = offset; 4975 if (end > fa->fa_end) 4976 fa->fa_end = end; 4977 } else { 4978 new->fa_advice = advice; 4979 new->fa_start = offset; 4980 new->fa_end = end; 4981 fp->f_advice = new; 4982 new = fa; 4983 } 4984 mtx_pool_unlock(mtxpool_sleep, fp); 4985 break; 4986 case POSIX_FADV_NORMAL: 4987 /* 4988 * If a the "normal" region overlaps with an existing 4989 * non-standard region, trim or remove the 4990 * non-standard region. 4991 */ 4992 mtx_pool_lock(mtxpool_sleep, fp); 4993 fa = fp->f_advice; 4994 if (fa != NULL) { 4995 if (offset <= fa->fa_start && end >= fa->fa_end) { 4996 new = fa; 4997 fp->f_advice = NULL; 4998 } else if (offset <= fa->fa_start && 4999 end >= fa->fa_start) 5000 fa->fa_start = end + 1; 5001 else if (offset <= fa->fa_end && end >= fa->fa_end) 5002 fa->fa_end = offset - 1; 5003 else if (offset >= fa->fa_start && end <= fa->fa_end) { 5004 /* 5005 * If the "normal" region is a middle 5006 * portion of the existing 5007 * non-standard region, just remove 5008 * the whole thing rather than picking 5009 * one side or the other to 5010 * preserve. 5011 */ 5012 new = fa; 5013 fp->f_advice = NULL; 5014 } 5015 } 5016 mtx_pool_unlock(mtxpool_sleep, fp); 5017 break; 5018 case POSIX_FADV_WILLNEED: 5019 case POSIX_FADV_DONTNEED: 5020 error = VOP_ADVISE(vp, offset, end, advice); 5021 break; 5022 } 5023 out: 5024 if (fp != NULL) 5025 fdrop(fp, td); 5026 free(new, M_FADVISE); 5027 return (error); 5028 } 5029 5030 int 5031 sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap) 5032 { 5033 int error; 5034 5035 error = kern_posix_fadvise(td, uap->fd, uap->offset, uap->len, 5036 uap->advice); 5037 return (kern_posix_error(td, error)); 5038 } 5039 5040 int 5041 kern_copy_file_range(struct thread *td, int infd, off_t *inoffp, int outfd, 5042 off_t *outoffp, size_t len, unsigned int flags) 5043 { 5044 struct file *infp, *infp1, *outfp, *outfp1; 5045 struct vnode *invp, *outvp; 5046 int error; 5047 size_t retlen; 5048 void *rl_rcookie, *rl_wcookie; 5049 off_t inoff, outoff, savinoff, savoutoff; 5050 bool foffsets_locked, foffsets_set; 5051 5052 infp = outfp = NULL; 5053 rl_rcookie = rl_wcookie = NULL; 5054 foffsets_locked = false; 5055 foffsets_set = false; 5056 error = 0; 5057 retlen = 0; 5058 5059 if ((flags & ~COPY_FILE_RANGE_USERFLAGS) != 0) { 5060 error = EINVAL; 5061 goto out; 5062 } 5063 if (len > SSIZE_MAX) 5064 /* 5065 * Although the len argument is size_t, the return argument 5066 * is ssize_t (which is signed). Therefore a size that won't 5067 * fit in ssize_t can't be returned. 5068 */ 5069 len = SSIZE_MAX; 5070 5071 /* Get the file structures for the file descriptors. */ 5072 error = fget_read(td, infd, 5073 inoffp != NULL ? &cap_pread_rights : &cap_read_rights, &infp); 5074 if (error != 0) 5075 goto out; 5076 if (infp->f_ops == &badfileops) { 5077 error = EBADF; 5078 goto out; 5079 } 5080 if (infp->f_vnode == NULL) { 5081 error = EINVAL; 5082 goto out; 5083 } 5084 error = fget_write(td, outfd, 5085 outoffp != NULL ? &cap_pwrite_rights : &cap_write_rights, &outfp); 5086 if (error != 0) 5087 goto out; 5088 if (outfp->f_ops == &badfileops) { 5089 error = EBADF; 5090 goto out; 5091 } 5092 if (outfp->f_vnode == NULL) { 5093 error = EINVAL; 5094 goto out; 5095 } 5096 5097 /* 5098 * Figure out which file offsets we're reading from and writing to. 5099 * If the offsets come from the file descriptions, we need to lock them, 5100 * and locking both offsets requires a loop to avoid deadlocks. 5101 */ 5102 infp1 = outfp1 = NULL; 5103 if (inoffp != NULL) 5104 inoff = *inoffp; 5105 else 5106 infp1 = infp; 5107 if (outoffp != NULL) 5108 outoff = *outoffp; 5109 else 5110 outfp1 = outfp; 5111 if (infp1 != NULL || outfp1 != NULL) { 5112 if (infp1 == outfp1) { 5113 /* 5114 * Overlapping ranges are not allowed. A more thorough 5115 * check appears below, but we must not lock the same 5116 * offset twice. 5117 */ 5118 error = EINVAL; 5119 goto out; 5120 } 5121 foffset_lock_pair(infp1, &inoff, outfp1, &outoff, 0); 5122 foffsets_locked = true; 5123 } else { 5124 foffsets_set = true; 5125 } 5126 savinoff = inoff; 5127 savoutoff = outoff; 5128 5129 invp = infp->f_vnode; 5130 outvp = outfp->f_vnode; 5131 /* Sanity check the f_flag bits. */ 5132 if ((outfp->f_flag & (FWRITE | FAPPEND)) != FWRITE || 5133 (infp->f_flag & FREAD) == 0) { 5134 error = EBADF; 5135 goto out; 5136 } 5137 5138 /* If len == 0, just return 0. */ 5139 if (len == 0) 5140 goto out; 5141 5142 /* 5143 * Make sure that the ranges we check and lock below are valid. Note 5144 * that len is clamped to SSIZE_MAX above. 5145 */ 5146 if (inoff < 0 || outoff < 0) { 5147 error = EINVAL; 5148 goto out; 5149 } 5150 5151 /* 5152 * If infp and outfp refer to the same file, the byte ranges cannot 5153 * overlap. 5154 */ 5155 if (invp == outvp) { 5156 if ((inoff <= outoff && inoff + len > outoff) || 5157 (inoff > outoff && outoff + len > inoff)) { 5158 error = EINVAL; 5159 goto out; 5160 } 5161 rangelock_may_recurse(&invp->v_rl); 5162 } 5163 5164 /* Range lock the byte ranges for both invp and outvp. */ 5165 for (;;) { 5166 rl_wcookie = vn_rangelock_wlock(outvp, outoff, outoff + len); 5167 rl_rcookie = vn_rangelock_tryrlock(invp, inoff, inoff + len); 5168 if (rl_rcookie != NULL) 5169 break; 5170 vn_rangelock_unlock(outvp, rl_wcookie); 5171 rl_rcookie = vn_rangelock_rlock(invp, inoff, inoff + len); 5172 vn_rangelock_unlock(invp, rl_rcookie); 5173 } 5174 5175 retlen = len; 5176 error = vn_copy_file_range(invp, &inoff, outvp, &outoff, &retlen, 5177 flags, infp->f_cred, outfp->f_cred, td); 5178 out: 5179 if (rl_rcookie != NULL) 5180 vn_rangelock_unlock(invp, rl_rcookie); 5181 if (rl_wcookie != NULL) 5182 vn_rangelock_unlock(outvp, rl_wcookie); 5183 if ((foffsets_locked || foffsets_set) && 5184 (error == EINTR || error == ERESTART)) { 5185 inoff = savinoff; 5186 outoff = savoutoff; 5187 } 5188 if (foffsets_locked) { 5189 if (inoffp == NULL) 5190 foffset_unlock(infp, inoff, 0); 5191 else 5192 *inoffp = inoff; 5193 if (outoffp == NULL) 5194 foffset_unlock(outfp, outoff, 0); 5195 else 5196 *outoffp = outoff; 5197 } else if (foffsets_set) { 5198 *inoffp = inoff; 5199 *outoffp = outoff; 5200 } 5201 if (outfp != NULL) 5202 fdrop(outfp, td); 5203 if (infp != NULL) 5204 fdrop(infp, td); 5205 td->td_retval[0] = retlen; 5206 return (error); 5207 } 5208 5209 int 5210 sys_copy_file_range(struct thread *td, struct copy_file_range_args *uap) 5211 { 5212 off_t inoff, outoff, *inoffp, *outoffp; 5213 int error; 5214 5215 inoffp = outoffp = NULL; 5216 if (uap->inoffp != NULL) { 5217 error = copyin(uap->inoffp, &inoff, sizeof(off_t)); 5218 if (error != 0) 5219 return (error); 5220 inoffp = &inoff; 5221 } 5222 if (uap->outoffp != NULL) { 5223 error = copyin(uap->outoffp, &outoff, sizeof(off_t)); 5224 if (error != 0) 5225 return (error); 5226 outoffp = &outoff; 5227 } 5228 error = kern_copy_file_range(td, uap->infd, inoffp, uap->outfd, 5229 outoffp, uap->len, uap->flags); 5230 if (error == 0 && uap->inoffp != NULL) 5231 error = copyout(inoffp, uap->inoffp, sizeof(off_t)); 5232 if (error == 0 && uap->outoffp != NULL) 5233 error = copyout(outoffp, uap->outoffp, sizeof(off_t)); 5234 return (error); 5235 } 5236