1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37 #include <sys/cdefs.h> 38 #include "opt_capsicum.h" 39 #include "opt_ktrace.h" 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #ifdef COMPAT_FREEBSD11 44 #include <sys/abi_compat.h> 45 #endif 46 #include <sys/bio.h> 47 #include <sys/buf.h> 48 #include <sys/capsicum.h> 49 #include <sys/disk.h> 50 #include <sys/malloc.h> 51 #include <sys/mount.h> 52 #include <sys/mutex.h> 53 #include <sys/sysproto.h> 54 #include <sys/namei.h> 55 #include <sys/filedesc.h> 56 #include <sys/kernel.h> 57 #include <sys/fcntl.h> 58 #include <sys/file.h> 59 #include <sys/filio.h> 60 #include <sys/limits.h> 61 #include <sys/linker.h> 62 #include <sys/rwlock.h> 63 #include <sys/sdt.h> 64 #include <sys/stat.h> 65 #include <sys/sx.h> 66 #include <sys/unistd.h> 67 #include <sys/vnode.h> 68 #include <sys/priv.h> 69 #include <sys/proc.h> 70 #include <sys/dirent.h> 71 #include <sys/jail.h> 72 #include <sys/syscallsubr.h> 73 #include <sys/sysctl.h> 74 #ifdef KTRACE 75 #include <sys/ktrace.h> 76 #endif 77 78 #include <machine/stdarg.h> 79 80 #include <security/audit/audit.h> 81 #include <security/mac/mac_framework.h> 82 83 #include <vm/vm.h> 84 #include <vm/vm_object.h> 85 #include <vm/vm_page.h> 86 #include <vm/vnode_pager.h> 87 #include <vm/uma.h> 88 89 #include <fs/devfs/devfs.h> 90 91 MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information"); 92 93 static int kern_chflagsat(struct thread *td, int fd, const char *path, 94 enum uio_seg pathseg, u_long flags, int atflag); 95 static int setfflags(struct thread *td, struct vnode *, u_long); 96 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *); 97 static int getutimens(const struct timespec *, enum uio_seg, 98 struct timespec *, int *); 99 static int setutimes(struct thread *td, struct vnode *, 100 const struct timespec *, int, int); 101 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 102 struct thread *td); 103 static int kern_fhlinkat(struct thread *td, int fd, const char *path, 104 enum uio_seg pathseg, fhandle_t *fhp); 105 static int kern_readlink_vp(struct vnode *vp, char *buf, enum uio_seg bufseg, 106 size_t count, struct thread *td); 107 static int kern_linkat_vp(struct thread *td, struct vnode *vp, int fd, 108 const char *path, enum uio_seg segflag); 109 110 uint64_t 111 at2cnpflags(u_int at_flags, u_int mask) 112 { 113 uint64_t res; 114 115 MPASS((at_flags & (AT_SYMLINK_FOLLOW | AT_SYMLINK_NOFOLLOW)) != 116 (AT_SYMLINK_FOLLOW | AT_SYMLINK_NOFOLLOW)); 117 118 res = 0; 119 at_flags &= mask; 120 if ((at_flags & AT_RESOLVE_BENEATH) != 0) 121 res |= RBENEATH; 122 if ((at_flags & AT_SYMLINK_FOLLOW) != 0) 123 res |= FOLLOW; 124 /* NOFOLLOW is pseudo flag */ 125 if ((mask & AT_SYMLINK_NOFOLLOW) != 0) { 126 res |= (at_flags & AT_SYMLINK_NOFOLLOW) != 0 ? NOFOLLOW : 127 FOLLOW; 128 } 129 if ((mask & AT_EMPTY_PATH) != 0 && (at_flags & AT_EMPTY_PATH) != 0) 130 res |= EMPTYPATH; 131 return (res); 132 } 133 134 int 135 kern_sync(struct thread *td) 136 { 137 struct mount *mp, *nmp; 138 int save; 139 140 mtx_lock(&mountlist_mtx); 141 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 142 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 143 nmp = TAILQ_NEXT(mp, mnt_list); 144 continue; 145 } 146 if ((mp->mnt_flag & MNT_RDONLY) == 0 && 147 vn_start_write(NULL, &mp, V_NOWAIT) == 0) { 148 save = curthread_pflags_set(TDP_SYNCIO); 149 vfs_periodic(mp, MNT_NOWAIT); 150 VFS_SYNC(mp, MNT_NOWAIT); 151 curthread_pflags_restore(save); 152 vn_finished_write(mp); 153 } 154 mtx_lock(&mountlist_mtx); 155 nmp = TAILQ_NEXT(mp, mnt_list); 156 vfs_unbusy(mp); 157 } 158 mtx_unlock(&mountlist_mtx); 159 return (0); 160 } 161 162 /* 163 * Sync each mounted filesystem. 164 */ 165 #ifndef _SYS_SYSPROTO_H_ 166 struct sync_args { 167 int dummy; 168 }; 169 #endif 170 /* ARGSUSED */ 171 int 172 sys_sync(struct thread *td, struct sync_args *uap) 173 { 174 175 return (kern_sync(td)); 176 } 177 178 /* 179 * Change filesystem quotas. 180 */ 181 #ifndef _SYS_SYSPROTO_H_ 182 struct quotactl_args { 183 char *path; 184 int cmd; 185 int uid; 186 caddr_t arg; 187 }; 188 #endif 189 int 190 sys_quotactl(struct thread *td, struct quotactl_args *uap) 191 { 192 struct mount *mp; 193 struct nameidata nd; 194 int error; 195 bool mp_busy; 196 197 AUDIT_ARG_CMD(uap->cmd); 198 AUDIT_ARG_UID(uap->uid); 199 if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS)) 200 return (EPERM); 201 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 202 uap->path); 203 if ((error = namei(&nd)) != 0) 204 return (error); 205 NDFREE_PNBUF(&nd); 206 mp = nd.ni_vp->v_mount; 207 vfs_ref(mp); 208 vput(nd.ni_vp); 209 error = vfs_busy(mp, 0); 210 if (error != 0) { 211 vfs_rel(mp); 212 return (error); 213 } 214 mp_busy = true; 215 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg, &mp_busy); 216 217 /* 218 * Since quota on/off operations typically need to open quota 219 * files, the implementation may need to unbusy the mount point 220 * before calling into namei. Otherwise, unmount might be 221 * started between two vfs_busy() invocations (first is ours, 222 * second is from mount point cross-walk code in lookup()), 223 * causing deadlock. 224 * 225 * Avoid unbusying mp if the implementation indicates it has 226 * already done so. 227 */ 228 if (mp_busy) 229 vfs_unbusy(mp); 230 vfs_rel(mp); 231 return (error); 232 } 233 234 /* 235 * Used by statfs conversion routines to scale the block size up if 236 * necessary so that all of the block counts are <= 'max_size'. Note 237 * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero 238 * value of 'n'. 239 */ 240 void 241 statfs_scale_blocks(struct statfs *sf, long max_size) 242 { 243 uint64_t count; 244 int shift; 245 246 KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__)); 247 248 /* 249 * Attempt to scale the block counts to give a more accurate 250 * overview to userland of the ratio of free space to used 251 * space. To do this, find the largest block count and compute 252 * a divisor that lets it fit into a signed integer <= max_size. 253 */ 254 if (sf->f_bavail < 0) 255 count = -sf->f_bavail; 256 else 257 count = sf->f_bavail; 258 count = MAX(sf->f_blocks, MAX(sf->f_bfree, count)); 259 if (count <= max_size) 260 return; 261 262 count >>= flsl(max_size); 263 shift = 0; 264 while (count > 0) { 265 shift++; 266 count >>=1; 267 } 268 269 sf->f_bsize <<= shift; 270 sf->f_blocks >>= shift; 271 sf->f_bfree >>= shift; 272 sf->f_bavail >>= shift; 273 } 274 275 static int 276 kern_do_statfs(struct thread *td, struct mount *mp, struct statfs *buf) 277 { 278 int error; 279 280 if (mp == NULL) 281 return (EBADF); 282 error = vfs_busy(mp, 0); 283 vfs_rel(mp); 284 if (error != 0) 285 return (error); 286 #ifdef MAC 287 error = mac_mount_check_stat(td->td_ucred, mp); 288 if (error != 0) 289 goto out; 290 #endif 291 error = VFS_STATFS(mp, buf); 292 if (error != 0) 293 goto out; 294 if (priv_check_cred_vfs_generation(td->td_ucred)) { 295 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 296 prison_enforce_statfs(td->td_ucred, mp, buf); 297 } 298 out: 299 vfs_unbusy(mp); 300 return (error); 301 } 302 303 /* 304 * Get filesystem statistics. 305 */ 306 #ifndef _SYS_SYSPROTO_H_ 307 struct statfs_args { 308 char *path; 309 struct statfs *buf; 310 }; 311 #endif 312 int 313 sys_statfs(struct thread *td, struct statfs_args *uap) 314 { 315 struct statfs *sfp; 316 int error; 317 318 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 319 error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); 320 if (error == 0) 321 error = copyout(sfp, uap->buf, sizeof(struct statfs)); 322 free(sfp, M_STATFS); 323 return (error); 324 } 325 326 int 327 kern_statfs(struct thread *td, const char *path, enum uio_seg pathseg, 328 struct statfs *buf) 329 { 330 struct mount *mp; 331 struct nameidata nd; 332 int error; 333 334 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path); 335 error = namei(&nd); 336 if (error != 0) 337 return (error); 338 NDFREE_PNBUF(&nd); 339 mp = vfs_ref_from_vp(nd.ni_vp); 340 vrele(nd.ni_vp); 341 return (kern_do_statfs(td, mp, buf)); 342 } 343 344 /* 345 * Get filesystem statistics. 346 */ 347 #ifndef _SYS_SYSPROTO_H_ 348 struct fstatfs_args { 349 int fd; 350 struct statfs *buf; 351 }; 352 #endif 353 int 354 sys_fstatfs(struct thread *td, struct fstatfs_args *uap) 355 { 356 struct statfs *sfp; 357 int error; 358 359 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 360 error = kern_fstatfs(td, uap->fd, sfp); 361 if (error == 0) 362 error = copyout(sfp, uap->buf, sizeof(struct statfs)); 363 free(sfp, M_STATFS); 364 return (error); 365 } 366 367 int 368 kern_fstatfs(struct thread *td, int fd, struct statfs *buf) 369 { 370 struct file *fp; 371 struct mount *mp; 372 struct vnode *vp; 373 int error; 374 375 AUDIT_ARG_FD(fd); 376 error = getvnode_path(td, fd, &cap_fstatfs_rights, &fp); 377 if (error != 0) 378 return (error); 379 vp = fp->f_vnode; 380 #ifdef AUDIT 381 if (AUDITING_TD(td)) { 382 vn_lock(vp, LK_SHARED | LK_RETRY); 383 AUDIT_ARG_VNODE1(vp); 384 VOP_UNLOCK(vp); 385 } 386 #endif 387 mp = vfs_ref_from_vp(vp); 388 fdrop(fp, td); 389 return (kern_do_statfs(td, mp, buf)); 390 } 391 392 /* 393 * Get statistics on all filesystems. 394 */ 395 #ifndef _SYS_SYSPROTO_H_ 396 struct getfsstat_args { 397 struct statfs *buf; 398 long bufsize; 399 int mode; 400 }; 401 #endif 402 int 403 sys_getfsstat(struct thread *td, struct getfsstat_args *uap) 404 { 405 size_t count; 406 int error; 407 408 if (uap->bufsize < 0 || uap->bufsize > SIZE_MAX) 409 return (EINVAL); 410 error = kern_getfsstat(td, &uap->buf, uap->bufsize, &count, 411 UIO_USERSPACE, uap->mode); 412 if (error == 0) 413 td->td_retval[0] = count; 414 return (error); 415 } 416 417 /* 418 * If (bufsize > 0 && bufseg == UIO_SYSSPACE) 419 * The caller is responsible for freeing memory which will be allocated 420 * in '*buf'. 421 */ 422 int 423 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize, 424 size_t *countp, enum uio_seg bufseg, int mode) 425 { 426 struct mount *mp, *nmp; 427 struct statfs *sfsp, *sp, *sptmp, *tofree; 428 size_t count, maxcount; 429 int error; 430 431 switch (mode) { 432 case MNT_WAIT: 433 case MNT_NOWAIT: 434 break; 435 default: 436 if (bufseg == UIO_SYSSPACE) 437 *buf = NULL; 438 return (EINVAL); 439 } 440 restart: 441 maxcount = bufsize / sizeof(struct statfs); 442 if (bufsize == 0) { 443 sfsp = NULL; 444 tofree = NULL; 445 } else if (bufseg == UIO_USERSPACE) { 446 sfsp = *buf; 447 tofree = NULL; 448 } else /* if (bufseg == UIO_SYSSPACE) */ { 449 count = 0; 450 mtx_lock(&mountlist_mtx); 451 TAILQ_FOREACH(mp, &mountlist, mnt_list) { 452 count++; 453 } 454 mtx_unlock(&mountlist_mtx); 455 if (maxcount > count) 456 maxcount = count; 457 tofree = sfsp = *buf = malloc(maxcount * sizeof(struct statfs), 458 M_STATFS, M_WAITOK); 459 } 460 461 count = 0; 462 463 /* 464 * If there is no target buffer they only want the count. 465 * 466 * This could be TAILQ_FOREACH but it is open-coded to match the original 467 * code below. 468 */ 469 if (sfsp == NULL) { 470 mtx_lock(&mountlist_mtx); 471 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 472 if (prison_canseemount(td->td_ucred, mp) != 0) { 473 nmp = TAILQ_NEXT(mp, mnt_list); 474 continue; 475 } 476 #ifdef MAC 477 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 478 nmp = TAILQ_NEXT(mp, mnt_list); 479 continue; 480 } 481 #endif 482 count++; 483 nmp = TAILQ_NEXT(mp, mnt_list); 484 } 485 mtx_unlock(&mountlist_mtx); 486 *countp = count; 487 return (0); 488 } 489 490 /* 491 * They want the entire thing. 492 * 493 * Short-circuit the corner case of no room for anything, avoids 494 * relocking below. 495 */ 496 if (maxcount < 1) { 497 goto out; 498 } 499 500 mtx_lock(&mountlist_mtx); 501 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 502 if (prison_canseemount(td->td_ucred, mp) != 0) { 503 nmp = TAILQ_NEXT(mp, mnt_list); 504 continue; 505 } 506 #ifdef MAC 507 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 508 nmp = TAILQ_NEXT(mp, mnt_list); 509 continue; 510 } 511 #endif 512 if (mode == MNT_WAIT) { 513 if (vfs_busy(mp, MBF_MNTLSTLOCK) != 0) { 514 /* 515 * If vfs_busy() failed, and MBF_NOWAIT 516 * wasn't passed, then the mp is gone. 517 * Furthermore, because of MBF_MNTLSTLOCK, 518 * the mountlist_mtx was dropped. We have 519 * no other choice than to start over. 520 */ 521 mtx_unlock(&mountlist_mtx); 522 free(tofree, M_STATFS); 523 goto restart; 524 } 525 } else { 526 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK) != 0) { 527 nmp = TAILQ_NEXT(mp, mnt_list); 528 continue; 529 } 530 } 531 sp = &mp->mnt_stat; 532 /* 533 * If MNT_NOWAIT is specified, do not refresh 534 * the fsstat cache. 535 */ 536 if (mode != MNT_NOWAIT) { 537 error = VFS_STATFS(mp, sp); 538 if (error != 0) { 539 mtx_lock(&mountlist_mtx); 540 nmp = TAILQ_NEXT(mp, mnt_list); 541 vfs_unbusy(mp); 542 continue; 543 } 544 } 545 if (priv_check_cred_vfs_generation(td->td_ucred)) { 546 sptmp = malloc(sizeof(struct statfs), M_STATFS, 547 M_WAITOK); 548 *sptmp = *sp; 549 sptmp->f_fsid.val[0] = sptmp->f_fsid.val[1] = 0; 550 prison_enforce_statfs(td->td_ucred, mp, sptmp); 551 sp = sptmp; 552 } else 553 sptmp = NULL; 554 if (bufseg == UIO_SYSSPACE) { 555 bcopy(sp, sfsp, sizeof(*sp)); 556 free(sptmp, M_STATFS); 557 } else /* if (bufseg == UIO_USERSPACE) */ { 558 error = copyout(sp, sfsp, sizeof(*sp)); 559 free(sptmp, M_STATFS); 560 if (error != 0) { 561 vfs_unbusy(mp); 562 return (error); 563 } 564 } 565 sfsp++; 566 count++; 567 568 if (count == maxcount) { 569 vfs_unbusy(mp); 570 goto out; 571 } 572 573 mtx_lock(&mountlist_mtx); 574 nmp = TAILQ_NEXT(mp, mnt_list); 575 vfs_unbusy(mp); 576 } 577 mtx_unlock(&mountlist_mtx); 578 out: 579 *countp = count; 580 return (0); 581 } 582 583 #ifdef COMPAT_FREEBSD4 584 /* 585 * Get old format filesystem statistics. 586 */ 587 static void freebsd4_cvtstatfs(struct statfs *, struct ostatfs *); 588 589 #ifndef _SYS_SYSPROTO_H_ 590 struct freebsd4_statfs_args { 591 char *path; 592 struct ostatfs *buf; 593 }; 594 #endif 595 int 596 freebsd4_statfs(struct thread *td, struct freebsd4_statfs_args *uap) 597 { 598 struct ostatfs osb; 599 struct statfs *sfp; 600 int error; 601 602 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 603 error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); 604 if (error == 0) { 605 freebsd4_cvtstatfs(sfp, &osb); 606 error = copyout(&osb, uap->buf, sizeof(osb)); 607 } 608 free(sfp, M_STATFS); 609 return (error); 610 } 611 612 /* 613 * Get filesystem statistics. 614 */ 615 #ifndef _SYS_SYSPROTO_H_ 616 struct freebsd4_fstatfs_args { 617 int fd; 618 struct ostatfs *buf; 619 }; 620 #endif 621 int 622 freebsd4_fstatfs(struct thread *td, struct freebsd4_fstatfs_args *uap) 623 { 624 struct ostatfs osb; 625 struct statfs *sfp; 626 int error; 627 628 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 629 error = kern_fstatfs(td, uap->fd, sfp); 630 if (error == 0) { 631 freebsd4_cvtstatfs(sfp, &osb); 632 error = copyout(&osb, uap->buf, sizeof(osb)); 633 } 634 free(sfp, M_STATFS); 635 return (error); 636 } 637 638 /* 639 * Get statistics on all filesystems. 640 */ 641 #ifndef _SYS_SYSPROTO_H_ 642 struct freebsd4_getfsstat_args { 643 struct ostatfs *buf; 644 long bufsize; 645 int mode; 646 }; 647 #endif 648 int 649 freebsd4_getfsstat(struct thread *td, struct freebsd4_getfsstat_args *uap) 650 { 651 struct statfs *buf, *sp; 652 struct ostatfs osb; 653 size_t count, size; 654 int error; 655 656 if (uap->bufsize < 0) 657 return (EINVAL); 658 count = uap->bufsize / sizeof(struct ostatfs); 659 if (count > SIZE_MAX / sizeof(struct statfs)) 660 return (EINVAL); 661 size = count * sizeof(struct statfs); 662 error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, 663 uap->mode); 664 if (error == 0) 665 td->td_retval[0] = count; 666 if (size != 0) { 667 sp = buf; 668 while (count != 0 && error == 0) { 669 freebsd4_cvtstatfs(sp, &osb); 670 error = copyout(&osb, uap->buf, sizeof(osb)); 671 sp++; 672 uap->buf++; 673 count--; 674 } 675 free(buf, M_STATFS); 676 } 677 return (error); 678 } 679 680 /* 681 * Implement fstatfs() for (NFS) file handles. 682 */ 683 #ifndef _SYS_SYSPROTO_H_ 684 struct freebsd4_fhstatfs_args { 685 struct fhandle *u_fhp; 686 struct ostatfs *buf; 687 }; 688 #endif 689 int 690 freebsd4_fhstatfs(struct thread *td, struct freebsd4_fhstatfs_args *uap) 691 { 692 struct ostatfs osb; 693 struct statfs *sfp; 694 fhandle_t fh; 695 int error; 696 697 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 698 if (error != 0) 699 return (error); 700 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 701 error = kern_fhstatfs(td, fh, sfp); 702 if (error == 0) { 703 freebsd4_cvtstatfs(sfp, &osb); 704 error = copyout(&osb, uap->buf, sizeof(osb)); 705 } 706 free(sfp, M_STATFS); 707 return (error); 708 } 709 710 /* 711 * Convert a new format statfs structure to an old format statfs structure. 712 */ 713 static void 714 freebsd4_cvtstatfs(struct statfs *nsp, struct ostatfs *osp) 715 { 716 717 statfs_scale_blocks(nsp, LONG_MAX); 718 bzero(osp, sizeof(*osp)); 719 osp->f_bsize = nsp->f_bsize; 720 osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX); 721 osp->f_blocks = nsp->f_blocks; 722 osp->f_bfree = nsp->f_bfree; 723 osp->f_bavail = nsp->f_bavail; 724 osp->f_files = MIN(nsp->f_files, LONG_MAX); 725 osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX); 726 osp->f_owner = nsp->f_owner; 727 osp->f_type = nsp->f_type; 728 osp->f_flags = nsp->f_flags; 729 osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX); 730 osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX); 731 osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX); 732 osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX); 733 strlcpy(osp->f_fstypename, nsp->f_fstypename, 734 MIN(MFSNAMELEN, OMFSNAMELEN)); 735 strlcpy(osp->f_mntonname, nsp->f_mntonname, 736 MIN(MNAMELEN, OMNAMELEN)); 737 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 738 MIN(MNAMELEN, OMNAMELEN)); 739 osp->f_fsid = nsp->f_fsid; 740 } 741 #endif /* COMPAT_FREEBSD4 */ 742 743 #if defined(COMPAT_FREEBSD11) 744 /* 745 * Get old format filesystem statistics. 746 */ 747 static void freebsd11_cvtstatfs(struct statfs *, struct freebsd11_statfs *); 748 749 int 750 freebsd11_statfs(struct thread *td, struct freebsd11_statfs_args *uap) 751 { 752 struct freebsd11_statfs osb; 753 struct statfs *sfp; 754 int error; 755 756 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 757 error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); 758 if (error == 0) { 759 freebsd11_cvtstatfs(sfp, &osb); 760 error = copyout(&osb, uap->buf, sizeof(osb)); 761 } 762 free(sfp, M_STATFS); 763 return (error); 764 } 765 766 /* 767 * Get filesystem statistics. 768 */ 769 int 770 freebsd11_fstatfs(struct thread *td, struct freebsd11_fstatfs_args *uap) 771 { 772 struct freebsd11_statfs osb; 773 struct statfs *sfp; 774 int error; 775 776 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 777 error = kern_fstatfs(td, uap->fd, sfp); 778 if (error == 0) { 779 freebsd11_cvtstatfs(sfp, &osb); 780 error = copyout(&osb, uap->buf, sizeof(osb)); 781 } 782 free(sfp, M_STATFS); 783 return (error); 784 } 785 786 /* 787 * Get statistics on all filesystems. 788 */ 789 int 790 freebsd11_getfsstat(struct thread *td, struct freebsd11_getfsstat_args *uap) 791 { 792 return (kern_freebsd11_getfsstat(td, uap->buf, uap->bufsize, uap->mode)); 793 } 794 795 int 796 kern_freebsd11_getfsstat(struct thread *td, struct freebsd11_statfs * ubuf, 797 long bufsize, int mode) 798 { 799 struct freebsd11_statfs osb; 800 struct statfs *buf, *sp; 801 size_t count, size; 802 int error; 803 804 if (bufsize < 0) 805 return (EINVAL); 806 807 count = bufsize / sizeof(struct ostatfs); 808 size = count * sizeof(struct statfs); 809 error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, mode); 810 if (error == 0) 811 td->td_retval[0] = count; 812 if (size > 0) { 813 sp = buf; 814 while (count > 0 && error == 0) { 815 freebsd11_cvtstatfs(sp, &osb); 816 error = copyout(&osb, ubuf, sizeof(osb)); 817 sp++; 818 ubuf++; 819 count--; 820 } 821 free(buf, M_STATFS); 822 } 823 return (error); 824 } 825 826 /* 827 * Implement fstatfs() for (NFS) file handles. 828 */ 829 int 830 freebsd11_fhstatfs(struct thread *td, struct freebsd11_fhstatfs_args *uap) 831 { 832 struct freebsd11_statfs osb; 833 struct statfs *sfp; 834 fhandle_t fh; 835 int error; 836 837 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 838 if (error) 839 return (error); 840 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 841 error = kern_fhstatfs(td, fh, sfp); 842 if (error == 0) { 843 freebsd11_cvtstatfs(sfp, &osb); 844 error = copyout(&osb, uap->buf, sizeof(osb)); 845 } 846 free(sfp, M_STATFS); 847 return (error); 848 } 849 850 /* 851 * Convert a new format statfs structure to an old format statfs structure. 852 */ 853 static void 854 freebsd11_cvtstatfs(struct statfs *nsp, struct freebsd11_statfs *osp) 855 { 856 857 bzero(osp, sizeof(*osp)); 858 osp->f_version = FREEBSD11_STATFS_VERSION; 859 osp->f_type = nsp->f_type; 860 osp->f_flags = nsp->f_flags; 861 osp->f_bsize = nsp->f_bsize; 862 osp->f_iosize = nsp->f_iosize; 863 osp->f_blocks = nsp->f_blocks; 864 osp->f_bfree = nsp->f_bfree; 865 osp->f_bavail = nsp->f_bavail; 866 osp->f_files = nsp->f_files; 867 osp->f_ffree = nsp->f_ffree; 868 osp->f_syncwrites = nsp->f_syncwrites; 869 osp->f_asyncwrites = nsp->f_asyncwrites; 870 osp->f_syncreads = nsp->f_syncreads; 871 osp->f_asyncreads = nsp->f_asyncreads; 872 osp->f_namemax = nsp->f_namemax; 873 osp->f_owner = nsp->f_owner; 874 osp->f_fsid = nsp->f_fsid; 875 strlcpy(osp->f_fstypename, nsp->f_fstypename, 876 MIN(MFSNAMELEN, sizeof(osp->f_fstypename))); 877 strlcpy(osp->f_mntonname, nsp->f_mntonname, 878 MIN(MNAMELEN, sizeof(osp->f_mntonname))); 879 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 880 MIN(MNAMELEN, sizeof(osp->f_mntfromname))); 881 } 882 #endif /* COMPAT_FREEBSD11 */ 883 884 /* 885 * Change current working directory to a given file descriptor. 886 */ 887 #ifndef _SYS_SYSPROTO_H_ 888 struct fchdir_args { 889 int fd; 890 }; 891 #endif 892 int 893 sys_fchdir(struct thread *td, struct fchdir_args *uap) 894 { 895 struct vnode *vp, *tdp; 896 struct mount *mp; 897 struct file *fp; 898 int error; 899 900 AUDIT_ARG_FD(uap->fd); 901 error = getvnode_path(td, uap->fd, &cap_fchdir_rights, 902 &fp); 903 if (error != 0) 904 return (error); 905 vp = fp->f_vnode; 906 vrefact(vp); 907 fdrop(fp, td); 908 vn_lock(vp, LK_SHARED | LK_RETRY); 909 AUDIT_ARG_VNODE1(vp); 910 error = change_dir(vp, td); 911 while (!error && (mp = vp->v_mountedhere) != NULL) { 912 if (vfs_busy(mp, 0)) 913 continue; 914 error = VFS_ROOT(mp, LK_SHARED, &tdp); 915 vfs_unbusy(mp); 916 if (error != 0) 917 break; 918 vput(vp); 919 vp = tdp; 920 } 921 if (error != 0) { 922 vput(vp); 923 return (error); 924 } 925 VOP_UNLOCK(vp); 926 pwd_chdir(td, vp); 927 return (0); 928 } 929 930 /* 931 * Change current working directory (``.''). 932 */ 933 #ifndef _SYS_SYSPROTO_H_ 934 struct chdir_args { 935 char *path; 936 }; 937 #endif 938 int 939 sys_chdir(struct thread *td, struct chdir_args *uap) 940 { 941 942 return (kern_chdir(td, uap->path, UIO_USERSPACE)); 943 } 944 945 int 946 kern_chdir(struct thread *td, const char *path, enum uio_seg pathseg) 947 { 948 struct nameidata nd; 949 int error; 950 951 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 952 pathseg, path); 953 if ((error = namei(&nd)) != 0) 954 return (error); 955 if ((error = change_dir(nd.ni_vp, td)) != 0) { 956 vput(nd.ni_vp); 957 NDFREE_PNBUF(&nd); 958 return (error); 959 } 960 VOP_UNLOCK(nd.ni_vp); 961 NDFREE_PNBUF(&nd); 962 pwd_chdir(td, nd.ni_vp); 963 return (0); 964 } 965 966 static int unprivileged_chroot = 0; 967 SYSCTL_INT(_security_bsd, OID_AUTO, unprivileged_chroot, CTLFLAG_RW, 968 &unprivileged_chroot, 0, 969 "Unprivileged processes can use chroot(2)"); 970 971 /* 972 * Takes locked vnode, unlocks it before returning. 973 */ 974 static int 975 kern_chroot(struct thread *td, struct vnode *vp) 976 { 977 struct proc *p; 978 int error; 979 980 error = priv_check(td, PRIV_VFS_CHROOT); 981 if (error != 0) { 982 p = td->td_proc; 983 PROC_LOCK(p); 984 if (unprivileged_chroot == 0 || 985 (p->p_flag2 & P2_NO_NEW_PRIVS) == 0) { 986 PROC_UNLOCK(p); 987 goto e_vunlock; 988 } 989 PROC_UNLOCK(p); 990 } 991 992 error = change_dir(vp, td); 993 if (error != 0) 994 goto e_vunlock; 995 #ifdef MAC 996 error = mac_vnode_check_chroot(td->td_ucred, vp); 997 if (error != 0) 998 goto e_vunlock; 999 #endif 1000 VOP_UNLOCK(vp); 1001 error = pwd_chroot(td, vp); 1002 vrele(vp); 1003 return (error); 1004 e_vunlock: 1005 vput(vp); 1006 return (error); 1007 } 1008 1009 /* 1010 * Change notion of root (``/'') directory. 1011 */ 1012 #ifndef _SYS_SYSPROTO_H_ 1013 struct chroot_args { 1014 char *path; 1015 }; 1016 #endif 1017 int 1018 sys_chroot(struct thread *td, struct chroot_args *uap) 1019 { 1020 struct nameidata nd; 1021 int error; 1022 1023 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 1024 UIO_USERSPACE, uap->path); 1025 error = namei(&nd); 1026 if (error != 0) 1027 return (error); 1028 NDFREE_PNBUF(&nd); 1029 error = kern_chroot(td, nd.ni_vp); 1030 return (error); 1031 } 1032 1033 /* 1034 * Change notion of root directory to a given file descriptor. 1035 */ 1036 #ifndef _SYS_SYSPROTO_H_ 1037 struct fchroot_args { 1038 int fd; 1039 }; 1040 #endif 1041 int 1042 sys_fchroot(struct thread *td, struct fchroot_args *uap) 1043 { 1044 struct vnode *vp; 1045 struct file *fp; 1046 int error; 1047 1048 error = getvnode_path(td, uap->fd, &cap_fchroot_rights, &fp); 1049 if (error != 0) 1050 return (error); 1051 vp = fp->f_vnode; 1052 vrefact(vp); 1053 fdrop(fp, td); 1054 vn_lock(vp, LK_SHARED | LK_RETRY); 1055 error = kern_chroot(td, vp); 1056 return (error); 1057 } 1058 1059 /* 1060 * Common routine for chroot and chdir. Callers must provide a locked vnode 1061 * instance. 1062 */ 1063 int 1064 change_dir(struct vnode *vp, struct thread *td) 1065 { 1066 #ifdef MAC 1067 int error; 1068 #endif 1069 1070 ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked"); 1071 if (vp->v_type != VDIR) 1072 return (ENOTDIR); 1073 #ifdef MAC 1074 error = mac_vnode_check_chdir(td->td_ucred, vp); 1075 if (error != 0) 1076 return (error); 1077 #endif 1078 return (VOP_ACCESS(vp, VEXEC, td->td_ucred, td)); 1079 } 1080 1081 static __inline void 1082 flags_to_rights(int flags, cap_rights_t *rightsp) 1083 { 1084 if (flags & O_EXEC) { 1085 cap_rights_set_one(rightsp, CAP_FEXECVE); 1086 if (flags & O_PATH) 1087 return; 1088 } else { 1089 switch ((flags & O_ACCMODE)) { 1090 case O_RDONLY: 1091 cap_rights_set_one(rightsp, CAP_READ); 1092 break; 1093 case O_RDWR: 1094 cap_rights_set_one(rightsp, CAP_READ); 1095 /* FALLTHROUGH */ 1096 case O_WRONLY: 1097 cap_rights_set_one(rightsp, CAP_WRITE); 1098 if (!(flags & (O_APPEND | O_TRUNC))) 1099 cap_rights_set_one(rightsp, CAP_SEEK); 1100 break; 1101 } 1102 } 1103 1104 if (flags & O_CREAT) 1105 cap_rights_set_one(rightsp, CAP_CREATE); 1106 1107 if (flags & O_TRUNC) 1108 cap_rights_set_one(rightsp, CAP_FTRUNCATE); 1109 1110 if (flags & (O_SYNC | O_FSYNC)) 1111 cap_rights_set_one(rightsp, CAP_FSYNC); 1112 1113 if (flags & (O_EXLOCK | O_SHLOCK)) 1114 cap_rights_set_one(rightsp, CAP_FLOCK); 1115 } 1116 1117 /* 1118 * Check permissions, allocate an open file structure, and call the device 1119 * open routine if any. 1120 */ 1121 #ifndef _SYS_SYSPROTO_H_ 1122 struct open_args { 1123 char *path; 1124 int flags; 1125 int mode; 1126 }; 1127 #endif 1128 int 1129 sys_open(struct thread *td, struct open_args *uap) 1130 { 1131 1132 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1133 uap->flags, uap->mode)); 1134 } 1135 1136 #ifndef _SYS_SYSPROTO_H_ 1137 struct openat_args { 1138 int fd; 1139 char *path; 1140 int flag; 1141 int mode; 1142 }; 1143 #endif 1144 int 1145 sys_openat(struct thread *td, struct openat_args *uap) 1146 { 1147 1148 AUDIT_ARG_FD(uap->fd); 1149 return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 1150 uap->mode)); 1151 } 1152 1153 /* 1154 * If fpp != NULL, opened file is not installed into the file 1155 * descriptor table, instead it is returned in *fpp. This is 1156 * incompatible with fdopen(), in which case we return EINVAL. 1157 */ 1158 static int 1159 openatfp(struct thread *td, int dirfd, const char *path, 1160 enum uio_seg pathseg, int flags, int mode, struct file **fpp) 1161 { 1162 struct proc *p; 1163 struct filedesc *fdp; 1164 struct pwddesc *pdp; 1165 struct file *fp; 1166 struct vnode *vp; 1167 struct filecaps *fcaps; 1168 struct nameidata nd; 1169 cap_rights_t rights; 1170 int cmode, error, indx; 1171 1172 indx = -1; 1173 p = td->td_proc; 1174 fdp = p->p_fd; 1175 pdp = p->p_pd; 1176 1177 AUDIT_ARG_FFLAGS(flags); 1178 AUDIT_ARG_MODE(mode); 1179 cap_rights_init_one(&rights, CAP_LOOKUP); 1180 flags_to_rights(flags, &rights); 1181 1182 /* 1183 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags 1184 * may be specified. On the other hand, for O_PATH any mode 1185 * except O_EXEC is ignored. 1186 */ 1187 if ((flags & O_PATH) != 0) { 1188 flags &= ~O_ACCMODE; 1189 } else if ((flags & O_EXEC) != 0) { 1190 if (flags & O_ACCMODE) 1191 return (EINVAL); 1192 } else if ((flags & O_ACCMODE) == O_ACCMODE) { 1193 return (EINVAL); 1194 } else { 1195 flags = FFLAGS(flags); 1196 } 1197 1198 /* 1199 * Allocate a file structure. The descriptor to reference it 1200 * is allocated and used by finstall_refed() below. 1201 */ 1202 error = falloc_noinstall(td, &fp); 1203 if (error != 0) 1204 return (error); 1205 /* Set the flags early so the finit in devfs can pick them up. */ 1206 fp->f_flag = flags & FMASK; 1207 cmode = ((mode & ~pdp->pd_cmask) & ALLPERMS) & ~S_ISTXT; 1208 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1 | WANTIOCTLCAPS, 1209 pathseg, path, dirfd, &rights); 1210 td->td_dupfd = -1; /* XXX check for fdopen */ 1211 error = vn_open_cred(&nd, &flags, cmode, VN_OPEN_WANTIOCTLCAPS, 1212 td->td_ucred, fp); 1213 if (error != 0) { 1214 /* 1215 * If the vn_open replaced the method vector, something 1216 * wonderous happened deep below and we just pass it up 1217 * pretending we know what we do. 1218 */ 1219 if (error == ENXIO && fp->f_ops != &badfileops) { 1220 MPASS((flags & O_PATH) == 0); 1221 goto success; 1222 } 1223 1224 /* 1225 * Handle special fdopen() case. bleh. 1226 * 1227 * Don't do this for relative (capability) lookups; we don't 1228 * understand exactly what would happen, and we don't think 1229 * that it ever should. 1230 */ 1231 if ((nd.ni_resflags & NIRES_STRICTREL) == 0 && 1232 (error == ENODEV || error == ENXIO) && 1233 td->td_dupfd >= 0) { 1234 MPASS(fpp == NULL); 1235 error = dupfdopen(td, fdp, td->td_dupfd, flags, error, 1236 &indx); 1237 if (error == 0) 1238 goto success; 1239 } 1240 1241 goto bad; 1242 } 1243 td->td_dupfd = 0; 1244 NDFREE_PNBUF(&nd); 1245 vp = nd.ni_vp; 1246 1247 /* 1248 * Store the vnode, for any f_type. Typically, the vnode use 1249 * count is decremented by direct call to vn_closefile() for 1250 * files that switched type in the cdevsw fdopen() method. 1251 */ 1252 fp->f_vnode = vp; 1253 1254 /* 1255 * If the file wasn't claimed by devfs bind it to the normal 1256 * vnode operations here. 1257 */ 1258 if (fp->f_ops == &badfileops) { 1259 KASSERT(vp->v_type != VFIFO || (flags & O_PATH) != 0, 1260 ("Unexpected fifo fp %p vp %p", fp, vp)); 1261 if ((flags & O_PATH) != 0) { 1262 finit(fp, (flags & FMASK) | (fp->f_flag & FKQALLOWED), 1263 DTYPE_VNODE, NULL, &path_fileops); 1264 } else { 1265 finit_vnode(fp, flags, NULL, &vnops); 1266 } 1267 } 1268 1269 VOP_UNLOCK(vp); 1270 if (flags & O_TRUNC) { 1271 error = fo_truncate(fp, 0, td->td_ucred, td); 1272 if (error != 0) 1273 goto bad; 1274 } 1275 success: 1276 if (fpp != NULL) { 1277 MPASS(error == 0); 1278 NDFREE_IOCTLCAPS(&nd); 1279 *fpp = fp; 1280 return (0); 1281 } 1282 1283 /* 1284 * If we haven't already installed the FD (for dupfdopen), do so now. 1285 */ 1286 if (indx == -1) { 1287 #ifdef CAPABILITIES 1288 if ((nd.ni_resflags & NIRES_STRICTREL) != 0) 1289 fcaps = &nd.ni_filecaps; 1290 else 1291 #endif 1292 fcaps = NULL; 1293 error = finstall_refed(td, fp, &indx, flags, fcaps); 1294 /* On success finstall_refed() consumes fcaps. */ 1295 if (error != 0) { 1296 goto bad; 1297 } 1298 } else { 1299 NDFREE_IOCTLCAPS(&nd); 1300 falloc_abort(td, fp); 1301 } 1302 1303 td->td_retval[0] = indx; 1304 return (0); 1305 bad: 1306 KASSERT(indx == -1, ("indx=%d, should be -1", indx)); 1307 NDFREE_IOCTLCAPS(&nd); 1308 falloc_abort(td, fp); 1309 return (error); 1310 } 1311 1312 int 1313 kern_openat(struct thread *td, int dirfd, const char *path, 1314 enum uio_seg pathseg, int flags, int mode) 1315 { 1316 return (openatfp(td, dirfd, path, pathseg, flags, mode, NULL)); 1317 } 1318 1319 int 1320 kern_openatfp(struct thread *td, int dirfd, const char *path, 1321 enum uio_seg pathseg, int flags, int mode, struct file **fpp) 1322 { 1323 int error, old_dupfd; 1324 1325 old_dupfd = td->td_dupfd; 1326 td->td_dupfd = -1; 1327 error = openatfp(td, dirfd, path, pathseg, flags, mode, fpp); 1328 td->td_dupfd = old_dupfd; 1329 return (error); 1330 } 1331 1332 #ifdef COMPAT_43 1333 /* 1334 * Create a file. 1335 */ 1336 #ifndef _SYS_SYSPROTO_H_ 1337 struct ocreat_args { 1338 char *path; 1339 int mode; 1340 }; 1341 #endif 1342 int 1343 ocreat(struct thread *td, struct ocreat_args *uap) 1344 { 1345 1346 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1347 O_WRONLY | O_CREAT | O_TRUNC, uap->mode)); 1348 } 1349 #endif /* COMPAT_43 */ 1350 1351 /* 1352 * Create a special file. 1353 */ 1354 #ifndef _SYS_SYSPROTO_H_ 1355 struct mknodat_args { 1356 int fd; 1357 char *path; 1358 mode_t mode; 1359 dev_t dev; 1360 }; 1361 #endif 1362 int 1363 sys_mknodat(struct thread *td, struct mknodat_args *uap) 1364 { 1365 1366 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1367 uap->dev)); 1368 } 1369 1370 #if defined(COMPAT_FREEBSD11) 1371 int 1372 freebsd11_mknod(struct thread *td, 1373 struct freebsd11_mknod_args *uap) 1374 { 1375 1376 return (kern_mknodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1377 uap->mode, uap->dev)); 1378 } 1379 1380 int 1381 freebsd11_mknodat(struct thread *td, 1382 struct freebsd11_mknodat_args *uap) 1383 { 1384 1385 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1386 uap->dev)); 1387 } 1388 #endif /* COMPAT_FREEBSD11 */ 1389 1390 int 1391 kern_mknodat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, 1392 int mode, dev_t dev) 1393 { 1394 struct vnode *vp; 1395 struct mount *mp; 1396 struct vattr vattr; 1397 struct nameidata nd; 1398 int error, whiteout = 0; 1399 1400 AUDIT_ARG_MODE(mode); 1401 AUDIT_ARG_DEV(dev); 1402 switch (mode & S_IFMT) { 1403 case S_IFCHR: 1404 case S_IFBLK: 1405 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 1406 if (error == 0 && dev == VNOVAL) 1407 error = EINVAL; 1408 break; 1409 case S_IFWHT: 1410 error = priv_check(td, PRIV_VFS_MKNOD_WHT); 1411 break; 1412 case S_IFIFO: 1413 if (dev == 0) 1414 return (kern_mkfifoat(td, fd, path, pathseg, mode)); 1415 /* FALLTHROUGH */ 1416 default: 1417 error = EINVAL; 1418 break; 1419 } 1420 if (error != 0) 1421 return (error); 1422 NDPREINIT(&nd); 1423 restart: 1424 bwillwrite(); 1425 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | AUDITVNODE1 | NOCACHE, 1426 pathseg, path, fd, &cap_mknodat_rights); 1427 if ((error = namei(&nd)) != 0) 1428 return (error); 1429 vp = nd.ni_vp; 1430 if (vp != NULL) { 1431 NDFREE_PNBUF(&nd); 1432 if (vp == nd.ni_dvp) 1433 vrele(nd.ni_dvp); 1434 else 1435 vput(nd.ni_dvp); 1436 vrele(vp); 1437 return (EEXIST); 1438 } else if ((vn_irflag_read(nd.ni_dvp) & VIRF_NAMEDDIR) != 0) { 1439 NDFREE_PNBUF(&nd); 1440 vput(nd.ni_dvp); 1441 return (EINVAL); 1442 } else { 1443 VATTR_NULL(&vattr); 1444 vattr.va_mode = (mode & ALLPERMS) & 1445 ~td->td_proc->p_pd->pd_cmask; 1446 vattr.va_rdev = dev; 1447 whiteout = 0; 1448 1449 switch (mode & S_IFMT) { 1450 case S_IFCHR: 1451 vattr.va_type = VCHR; 1452 break; 1453 case S_IFBLK: 1454 vattr.va_type = VBLK; 1455 break; 1456 case S_IFWHT: 1457 whiteout = 1; 1458 break; 1459 default: 1460 panic("kern_mknod: invalid mode"); 1461 } 1462 } 1463 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1464 NDFREE_PNBUF(&nd); 1465 vput(nd.ni_dvp); 1466 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH)) != 0) 1467 return (error); 1468 goto restart; 1469 } 1470 #ifdef MAC 1471 if (error == 0 && !whiteout) 1472 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, 1473 &nd.ni_cnd, &vattr); 1474 #endif 1475 if (error == 0) { 1476 if (whiteout) 1477 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1478 else { 1479 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1480 &nd.ni_cnd, &vattr); 1481 } 1482 } 1483 VOP_VPUT_PAIR(nd.ni_dvp, error == 0 && !whiteout ? &nd.ni_vp : NULL, 1484 true); 1485 vn_finished_write(mp); 1486 NDFREE_PNBUF(&nd); 1487 if (error == ERELOOKUP) 1488 goto restart; 1489 return (error); 1490 } 1491 1492 /* 1493 * Create a named pipe. 1494 */ 1495 #ifndef _SYS_SYSPROTO_H_ 1496 struct mkfifo_args { 1497 char *path; 1498 int mode; 1499 }; 1500 #endif 1501 int 1502 sys_mkfifo(struct thread *td, struct mkfifo_args *uap) 1503 { 1504 1505 return (kern_mkfifoat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1506 uap->mode)); 1507 } 1508 1509 #ifndef _SYS_SYSPROTO_H_ 1510 struct mkfifoat_args { 1511 int fd; 1512 char *path; 1513 mode_t mode; 1514 }; 1515 #endif 1516 int 1517 sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap) 1518 { 1519 1520 return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE, 1521 uap->mode)); 1522 } 1523 1524 int 1525 kern_mkfifoat(struct thread *td, int fd, const char *path, 1526 enum uio_seg pathseg, int mode) 1527 { 1528 struct mount *mp; 1529 struct vattr vattr; 1530 struct nameidata nd; 1531 int error; 1532 1533 AUDIT_ARG_MODE(mode); 1534 NDPREINIT(&nd); 1535 restart: 1536 bwillwrite(); 1537 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | AUDITVNODE1 | NOCACHE, 1538 pathseg, path, fd, &cap_mkfifoat_rights); 1539 if ((error = namei(&nd)) != 0) 1540 return (error); 1541 if (nd.ni_vp != NULL) { 1542 NDFREE_PNBUF(&nd); 1543 if (nd.ni_vp == nd.ni_dvp) 1544 vrele(nd.ni_dvp); 1545 else 1546 vput(nd.ni_dvp); 1547 vrele(nd.ni_vp); 1548 return (EEXIST); 1549 } 1550 if ((vn_irflag_read(nd.ni_dvp) & VIRF_NAMEDDIR) != 0) { 1551 NDFREE_PNBUF(&nd); 1552 vput(nd.ni_dvp); 1553 return (EINVAL); 1554 } 1555 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1556 NDFREE_PNBUF(&nd); 1557 vput(nd.ni_dvp); 1558 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH)) != 0) 1559 return (error); 1560 goto restart; 1561 } 1562 VATTR_NULL(&vattr); 1563 vattr.va_type = VFIFO; 1564 vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_pd->pd_cmask; 1565 #ifdef MAC 1566 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1567 &vattr); 1568 if (error != 0) 1569 goto out; 1570 #endif 1571 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 1572 #ifdef MAC 1573 out: 1574 #endif 1575 VOP_VPUT_PAIR(nd.ni_dvp, error == 0 ? &nd.ni_vp : NULL, true); 1576 vn_finished_write(mp); 1577 NDFREE_PNBUF(&nd); 1578 if (error == ERELOOKUP) 1579 goto restart; 1580 return (error); 1581 } 1582 1583 /* 1584 * Make a hard file link. 1585 */ 1586 #ifndef _SYS_SYSPROTO_H_ 1587 struct link_args { 1588 char *path; 1589 char *link; 1590 }; 1591 #endif 1592 int 1593 sys_link(struct thread *td, struct link_args *uap) 1594 { 1595 1596 return (kern_linkat(td, AT_FDCWD, AT_FDCWD, uap->path, uap->link, 1597 UIO_USERSPACE, AT_SYMLINK_FOLLOW)); 1598 } 1599 1600 #ifndef _SYS_SYSPROTO_H_ 1601 struct linkat_args { 1602 int fd1; 1603 char *path1; 1604 int fd2; 1605 char *path2; 1606 int flag; 1607 }; 1608 #endif 1609 int 1610 sys_linkat(struct thread *td, struct linkat_args *uap) 1611 { 1612 1613 return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2, 1614 UIO_USERSPACE, uap->flag)); 1615 } 1616 1617 int hardlink_check_uid = 0; 1618 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 1619 &hardlink_check_uid, 0, 1620 "Unprivileged processes cannot create hard links to files owned by other " 1621 "users"); 1622 static int hardlink_check_gid = 0; 1623 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 1624 &hardlink_check_gid, 0, 1625 "Unprivileged processes cannot create hard links to files owned by other " 1626 "groups"); 1627 1628 static int 1629 can_hardlink(struct vnode *vp, struct ucred *cred) 1630 { 1631 struct vattr va; 1632 int error; 1633 1634 if (!hardlink_check_uid && !hardlink_check_gid) 1635 return (0); 1636 1637 error = VOP_GETATTR(vp, &va, cred); 1638 if (error != 0) 1639 return (error); 1640 1641 if (hardlink_check_uid && cred->cr_uid != va.va_uid) { 1642 error = priv_check_cred(cred, PRIV_VFS_LINK); 1643 if (error != 0) 1644 return (error); 1645 } 1646 1647 if (hardlink_check_gid && !groupmember(va.va_gid, cred)) { 1648 error = priv_check_cred(cred, PRIV_VFS_LINK); 1649 if (error != 0) 1650 return (error); 1651 } 1652 1653 return (0); 1654 } 1655 1656 int 1657 kern_linkat(struct thread *td, int fd1, int fd2, const char *path1, 1658 const char *path2, enum uio_seg segflag, int flag) 1659 { 1660 struct nameidata nd; 1661 int error; 1662 1663 if ((flag & ~(AT_SYMLINK_FOLLOW | AT_RESOLVE_BENEATH | 1664 AT_EMPTY_PATH)) != 0) 1665 return (EINVAL); 1666 1667 NDPREINIT(&nd); 1668 do { 1669 bwillwrite(); 1670 NDINIT_ATRIGHTS(&nd, LOOKUP, AUDITVNODE1 | at2cnpflags(flag, 1671 AT_SYMLINK_FOLLOW | AT_RESOLVE_BENEATH | AT_EMPTY_PATH), 1672 segflag, path1, fd1, &cap_linkat_source_rights); 1673 if ((error = namei(&nd)) != 0) 1674 return (error); 1675 NDFREE_PNBUF(&nd); 1676 if ((nd.ni_resflags & NIRES_EMPTYPATH) != 0) { 1677 error = priv_check(td, PRIV_VFS_FHOPEN); 1678 if (error != 0) { 1679 vrele(nd.ni_vp); 1680 return (error); 1681 } 1682 } 1683 error = kern_linkat_vp(td, nd.ni_vp, fd2, path2, segflag); 1684 } while (error == EAGAIN || error == ERELOOKUP); 1685 return (error); 1686 } 1687 1688 static int 1689 kern_linkat_vp(struct thread *td, struct vnode *vp, int fd, const char *path, 1690 enum uio_seg segflag) 1691 { 1692 struct nameidata nd; 1693 struct mount *mp; 1694 int error; 1695 1696 if (vp->v_type == VDIR) { 1697 vrele(vp); 1698 return (EPERM); /* POSIX */ 1699 } 1700 if ((vn_irflag_read(vp) & (VIRF_NAMEDDIR | VIRF_NAMEDATTR)) != 0) { 1701 vrele(vp); 1702 return (EINVAL); 1703 } 1704 NDINIT_ATRIGHTS(&nd, CREATE, 1705 LOCKPARENT | AUDITVNODE2 | NOCACHE, segflag, path, fd, 1706 &cap_linkat_target_rights); 1707 if ((error = namei(&nd)) == 0) { 1708 if (nd.ni_vp != NULL) { 1709 NDFREE_PNBUF(&nd); 1710 if (nd.ni_dvp == nd.ni_vp) 1711 vrele(nd.ni_dvp); 1712 else 1713 vput(nd.ni_dvp); 1714 vrele(nd.ni_vp); 1715 vrele(vp); 1716 return (EEXIST); 1717 } else if (nd.ni_dvp->v_mount != vp->v_mount) { 1718 /* 1719 * Cross-device link. No need to recheck 1720 * vp->v_type, since it cannot change, except 1721 * to VBAD. 1722 */ 1723 NDFREE_PNBUF(&nd); 1724 vput(nd.ni_dvp); 1725 vrele(vp); 1726 return (EXDEV); 1727 } else if (vn_lock(vp, LK_EXCLUSIVE) == 0) { 1728 error = can_hardlink(vp, td->td_ucred); 1729 #ifdef MAC 1730 if (error == 0) 1731 error = mac_vnode_check_link(td->td_ucred, 1732 nd.ni_dvp, vp, &nd.ni_cnd); 1733 #endif 1734 if (error != 0) { 1735 vput(vp); 1736 vput(nd.ni_dvp); 1737 NDFREE_PNBUF(&nd); 1738 return (error); 1739 } 1740 error = vn_start_write(vp, &mp, V_NOWAIT); 1741 if (error != 0) { 1742 vput(vp); 1743 vput(nd.ni_dvp); 1744 NDFREE_PNBUF(&nd); 1745 error = vn_start_write(NULL, &mp, 1746 V_XSLEEP | V_PCATCH); 1747 if (error != 0) 1748 return (error); 1749 return (EAGAIN); 1750 } 1751 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 1752 VOP_VPUT_PAIR(nd.ni_dvp, &vp, true); 1753 vn_finished_write(mp); 1754 NDFREE_PNBUF(&nd); 1755 vp = NULL; 1756 } else { 1757 vput(nd.ni_dvp); 1758 NDFREE_PNBUF(&nd); 1759 vrele(vp); 1760 return (EAGAIN); 1761 } 1762 } 1763 if (vp != NULL) 1764 vrele(vp); 1765 return (error); 1766 } 1767 1768 /* 1769 * Make a symbolic link. 1770 */ 1771 #ifndef _SYS_SYSPROTO_H_ 1772 struct symlink_args { 1773 char *path; 1774 char *link; 1775 }; 1776 #endif 1777 int 1778 sys_symlink(struct thread *td, struct symlink_args *uap) 1779 { 1780 1781 return (kern_symlinkat(td, uap->path, AT_FDCWD, uap->link, 1782 UIO_USERSPACE)); 1783 } 1784 1785 #ifndef _SYS_SYSPROTO_H_ 1786 struct symlinkat_args { 1787 char *path; 1788 int fd; 1789 char *path2; 1790 }; 1791 #endif 1792 int 1793 sys_symlinkat(struct thread *td, struct symlinkat_args *uap) 1794 { 1795 1796 return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2, 1797 UIO_USERSPACE)); 1798 } 1799 1800 int 1801 kern_symlinkat(struct thread *td, const char *path1, int fd, const char *path2, 1802 enum uio_seg segflg) 1803 { 1804 struct mount *mp; 1805 struct vattr vattr; 1806 const char *syspath; 1807 char *tmppath; 1808 struct nameidata nd; 1809 int error; 1810 1811 if (segflg == UIO_SYSSPACE) { 1812 syspath = path1; 1813 } else { 1814 tmppath = uma_zalloc(namei_zone, M_WAITOK); 1815 if ((error = copyinstr(path1, tmppath, MAXPATHLEN, NULL)) != 0) 1816 goto out; 1817 syspath = tmppath; 1818 } 1819 AUDIT_ARG_TEXT(syspath); 1820 NDPREINIT(&nd); 1821 restart: 1822 bwillwrite(); 1823 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | AUDITVNODE1 | NOCACHE, segflg, 1824 path2, fd, &cap_symlinkat_rights); 1825 if ((error = namei(&nd)) != 0) 1826 goto out; 1827 if (nd.ni_vp) { 1828 NDFREE_PNBUF(&nd); 1829 if (nd.ni_vp == nd.ni_dvp) 1830 vrele(nd.ni_dvp); 1831 else 1832 vput(nd.ni_dvp); 1833 vrele(nd.ni_vp); 1834 nd.ni_vp = NULL; 1835 error = EEXIST; 1836 goto out; 1837 } 1838 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1839 NDFREE_PNBUF(&nd); 1840 vput(nd.ni_dvp); 1841 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH)) != 0) 1842 goto out; 1843 goto restart; 1844 } 1845 if ((vn_irflag_read(nd.ni_dvp) & VIRF_NAMEDDIR) != 0) { 1846 error = EINVAL; 1847 goto out; 1848 } 1849 VATTR_NULL(&vattr); 1850 vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_pd->pd_cmask; 1851 #ifdef MAC 1852 vattr.va_type = VLNK; 1853 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1854 &vattr); 1855 if (error != 0) 1856 goto out2; 1857 #endif 1858 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath); 1859 #ifdef MAC 1860 out2: 1861 #endif 1862 VOP_VPUT_PAIR(nd.ni_dvp, error == 0 ? &nd.ni_vp : NULL, true); 1863 vn_finished_write(mp); 1864 NDFREE_PNBUF(&nd); 1865 if (error == ERELOOKUP) 1866 goto restart; 1867 out: 1868 if (segflg != UIO_SYSSPACE) 1869 uma_zfree(namei_zone, tmppath); 1870 return (error); 1871 } 1872 1873 /* 1874 * Delete a whiteout from the filesystem. 1875 */ 1876 #ifndef _SYS_SYSPROTO_H_ 1877 struct undelete_args { 1878 char *path; 1879 }; 1880 #endif 1881 int 1882 sys_undelete(struct thread *td, struct undelete_args *uap) 1883 { 1884 struct mount *mp; 1885 struct nameidata nd; 1886 int error; 1887 1888 NDPREINIT(&nd); 1889 restart: 1890 bwillwrite(); 1891 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | AUDITVNODE1, 1892 UIO_USERSPACE, uap->path); 1893 error = namei(&nd); 1894 if (error != 0) 1895 return (error); 1896 1897 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 1898 NDFREE_PNBUF(&nd); 1899 if (nd.ni_vp == nd.ni_dvp) 1900 vrele(nd.ni_dvp); 1901 else 1902 vput(nd.ni_dvp); 1903 if (nd.ni_vp) 1904 vrele(nd.ni_vp); 1905 return (EEXIST); 1906 } 1907 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1908 NDFREE_PNBUF(&nd); 1909 vput(nd.ni_dvp); 1910 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH)) != 0) 1911 return (error); 1912 goto restart; 1913 } 1914 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE); 1915 NDFREE_PNBUF(&nd); 1916 vput(nd.ni_dvp); 1917 vn_finished_write(mp); 1918 if (error == ERELOOKUP) 1919 goto restart; 1920 return (error); 1921 } 1922 1923 /* 1924 * Delete a name from the filesystem. 1925 */ 1926 #ifndef _SYS_SYSPROTO_H_ 1927 struct unlink_args { 1928 char *path; 1929 }; 1930 #endif 1931 int 1932 sys_unlink(struct thread *td, struct unlink_args *uap) 1933 { 1934 1935 return (kern_funlinkat(td, AT_FDCWD, uap->path, FD_NONE, UIO_USERSPACE, 1936 0, 0)); 1937 } 1938 1939 static int 1940 kern_funlinkat_ex(struct thread *td, int dfd, const char *path, int fd, 1941 int flag, enum uio_seg pathseg, ino_t oldinum) 1942 { 1943 1944 if ((flag & ~(AT_REMOVEDIR | AT_RESOLVE_BENEATH)) != 0) 1945 return (EINVAL); 1946 1947 if ((flag & AT_REMOVEDIR) != 0) 1948 return (kern_frmdirat(td, dfd, path, fd, UIO_USERSPACE, 0)); 1949 1950 return (kern_funlinkat(td, dfd, path, fd, UIO_USERSPACE, 0, 0)); 1951 } 1952 1953 #ifndef _SYS_SYSPROTO_H_ 1954 struct unlinkat_args { 1955 int fd; 1956 char *path; 1957 int flag; 1958 }; 1959 #endif 1960 int 1961 sys_unlinkat(struct thread *td, struct unlinkat_args *uap) 1962 { 1963 1964 return (kern_funlinkat_ex(td, uap->fd, uap->path, FD_NONE, uap->flag, 1965 UIO_USERSPACE, 0)); 1966 } 1967 1968 #ifndef _SYS_SYSPROTO_H_ 1969 struct funlinkat_args { 1970 int dfd; 1971 const char *path; 1972 int fd; 1973 int flag; 1974 }; 1975 #endif 1976 int 1977 sys_funlinkat(struct thread *td, struct funlinkat_args *uap) 1978 { 1979 1980 return (kern_funlinkat_ex(td, uap->dfd, uap->path, uap->fd, uap->flag, 1981 UIO_USERSPACE, 0)); 1982 } 1983 1984 int 1985 kern_funlinkat(struct thread *td, int dfd, const char *path, int fd, 1986 enum uio_seg pathseg, int flag, ino_t oldinum) 1987 { 1988 struct mount *mp; 1989 struct file *fp; 1990 struct vnode *vp; 1991 struct nameidata nd; 1992 struct stat sb; 1993 int error; 1994 1995 fp = NULL; 1996 if (fd != FD_NONE) { 1997 error = getvnode_path(td, fd, &cap_no_rights, &fp); 1998 if (error != 0) 1999 return (error); 2000 } 2001 2002 NDPREINIT(&nd); 2003 restart: 2004 bwillwrite(); 2005 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1 | 2006 at2cnpflags(flag, AT_RESOLVE_BENEATH), 2007 pathseg, path, dfd, &cap_unlinkat_rights); 2008 if ((error = namei(&nd)) != 0) { 2009 if (error == EINVAL) 2010 error = EPERM; 2011 goto fdout; 2012 } 2013 vp = nd.ni_vp; 2014 if (vp->v_type == VDIR && oldinum == 0) { 2015 error = EPERM; /* POSIX */ 2016 } else if (oldinum != 0 && 2017 ((error = VOP_STAT(vp, &sb, td->td_ucred, NOCRED)) == 0) && 2018 sb.st_ino != oldinum) { 2019 error = EIDRM; /* Identifier removed */ 2020 } else if (fp != NULL && fp->f_vnode != vp) { 2021 if (VN_IS_DOOMED(fp->f_vnode)) 2022 error = EBADF; 2023 else 2024 error = EDEADLK; 2025 } else { 2026 /* 2027 * The root of a mounted filesystem cannot be deleted. 2028 * 2029 * XXX: can this only be a VDIR case? 2030 */ 2031 if (vp->v_vflag & VV_ROOT) 2032 error = EBUSY; 2033 } 2034 if (error == 0) { 2035 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 2036 NDFREE_PNBUF(&nd); 2037 vput(nd.ni_dvp); 2038 if (vp == nd.ni_dvp) 2039 vrele(vp); 2040 else 2041 vput(vp); 2042 if ((error = vn_start_write(NULL, &mp, 2043 V_XSLEEP | V_PCATCH)) != 0) { 2044 goto fdout; 2045 } 2046 goto restart; 2047 } 2048 #ifdef MAC 2049 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 2050 &nd.ni_cnd); 2051 if (error != 0) 2052 goto out; 2053 #endif 2054 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 2055 #ifdef MAC 2056 out: 2057 #endif 2058 vn_finished_write(mp); 2059 } 2060 NDFREE_PNBUF(&nd); 2061 vput(nd.ni_dvp); 2062 if (vp == nd.ni_dvp) 2063 vrele(vp); 2064 else 2065 vput(vp); 2066 if (error == ERELOOKUP) 2067 goto restart; 2068 fdout: 2069 if (fp != NULL) 2070 fdrop(fp, td); 2071 return (error); 2072 } 2073 2074 /* 2075 * Reposition read/write file offset. 2076 */ 2077 #ifndef _SYS_SYSPROTO_H_ 2078 struct lseek_args { 2079 int fd; 2080 int pad; 2081 off_t offset; 2082 int whence; 2083 }; 2084 #endif 2085 int 2086 sys_lseek(struct thread *td, struct lseek_args *uap) 2087 { 2088 2089 return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); 2090 } 2091 2092 int 2093 kern_lseek(struct thread *td, int fd, off_t offset, int whence) 2094 { 2095 struct file *fp; 2096 int error; 2097 2098 AUDIT_ARG_FD(fd); 2099 error = fget(td, fd, &cap_seek_rights, &fp); 2100 if (error != 0) 2101 return (error); 2102 error = (fp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0 ? 2103 fo_seek(fp, offset, whence, td) : ESPIPE; 2104 fdrop(fp, td); 2105 return (error); 2106 } 2107 2108 #if defined(COMPAT_43) 2109 /* 2110 * Reposition read/write file offset. 2111 */ 2112 #ifndef _SYS_SYSPROTO_H_ 2113 struct olseek_args { 2114 int fd; 2115 long offset; 2116 int whence; 2117 }; 2118 #endif 2119 int 2120 olseek(struct thread *td, struct olseek_args *uap) 2121 { 2122 2123 return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); 2124 } 2125 #endif /* COMPAT_43 */ 2126 2127 #if defined(COMPAT_FREEBSD6) 2128 /* Version with the 'pad' argument */ 2129 int 2130 freebsd6_lseek(struct thread *td, struct freebsd6_lseek_args *uap) 2131 { 2132 2133 return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); 2134 } 2135 #endif 2136 2137 /* 2138 * Check access permissions using passed credentials. 2139 */ 2140 static int 2141 vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 2142 struct thread *td) 2143 { 2144 accmode_t accmode; 2145 int error; 2146 2147 /* Flags == 0 means only check for existence. */ 2148 if (user_flags == 0) 2149 return (0); 2150 2151 accmode = 0; 2152 if (user_flags & R_OK) 2153 accmode |= VREAD; 2154 if (user_flags & W_OK) 2155 accmode |= VWRITE; 2156 if (user_flags & X_OK) 2157 accmode |= VEXEC; 2158 #ifdef MAC 2159 error = mac_vnode_check_access(cred, vp, accmode); 2160 if (error != 0) 2161 return (error); 2162 #endif 2163 if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) 2164 error = VOP_ACCESS(vp, accmode, cred, td); 2165 return (error); 2166 } 2167 2168 /* 2169 * Check access permissions using "real" credentials. 2170 */ 2171 #ifndef _SYS_SYSPROTO_H_ 2172 struct access_args { 2173 char *path; 2174 int amode; 2175 }; 2176 #endif 2177 int 2178 sys_access(struct thread *td, struct access_args *uap) 2179 { 2180 2181 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2182 0, uap->amode)); 2183 } 2184 2185 #ifndef _SYS_SYSPROTO_H_ 2186 struct faccessat_args { 2187 int dirfd; 2188 char *path; 2189 int amode; 2190 int flag; 2191 } 2192 #endif 2193 int 2194 sys_faccessat(struct thread *td, struct faccessat_args *uap) 2195 { 2196 2197 return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 2198 uap->amode)); 2199 } 2200 2201 int 2202 kern_accessat(struct thread *td, int fd, const char *path, 2203 enum uio_seg pathseg, int flag, int amode) 2204 { 2205 struct ucred *cred, *usecred; 2206 struct vnode *vp; 2207 struct nameidata nd; 2208 int error; 2209 2210 if ((flag & ~(AT_EACCESS | AT_RESOLVE_BENEATH | AT_EMPTY_PATH | 2211 AT_SYMLINK_NOFOLLOW)) != 0) 2212 return (EINVAL); 2213 if (amode != F_OK && (amode & ~(R_OK | W_OK | X_OK)) != 0) 2214 return (EINVAL); 2215 2216 /* 2217 * Create and modify a temporary credential instead of one that 2218 * is potentially shared (if we need one). 2219 */ 2220 cred = td->td_ucred; 2221 if ((flag & AT_EACCESS) == 0 && 2222 ((cred->cr_uid != cred->cr_ruid || 2223 cred->cr_rgid != cred->cr_groups[0]))) { 2224 usecred = crdup(cred); 2225 usecred->cr_uid = cred->cr_ruid; 2226 usecred->cr_groups[0] = cred->cr_rgid; 2227 td->td_ucred = usecred; 2228 } else 2229 usecred = cred; 2230 AUDIT_ARG_VALUE(amode); 2231 NDINIT_ATRIGHTS(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | 2232 AUDITVNODE1 | at2cnpflags(flag, AT_RESOLVE_BENEATH | AT_SYMLINK_NOFOLLOW | 2233 AT_EMPTY_PATH), pathseg, path, fd, &cap_fstat_rights); 2234 if ((error = namei(&nd)) != 0) 2235 goto out; 2236 vp = nd.ni_vp; 2237 2238 error = vn_access(vp, amode, usecred, td); 2239 NDFREE_PNBUF(&nd); 2240 vput(vp); 2241 out: 2242 if (usecred != cred) { 2243 td->td_ucred = cred; 2244 crfree(usecred); 2245 } 2246 return (error); 2247 } 2248 2249 /* 2250 * Check access permissions using "effective" credentials. 2251 */ 2252 #ifndef _SYS_SYSPROTO_H_ 2253 struct eaccess_args { 2254 char *path; 2255 int amode; 2256 }; 2257 #endif 2258 int 2259 sys_eaccess(struct thread *td, struct eaccess_args *uap) 2260 { 2261 2262 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2263 AT_EACCESS, uap->amode)); 2264 } 2265 2266 #if defined(COMPAT_43) 2267 /* 2268 * Get file status; this version follows links. 2269 */ 2270 #ifndef _SYS_SYSPROTO_H_ 2271 struct ostat_args { 2272 char *path; 2273 struct ostat *ub; 2274 }; 2275 #endif 2276 int 2277 ostat(struct thread *td, struct ostat_args *uap) 2278 { 2279 struct stat sb; 2280 struct ostat osb; 2281 int error; 2282 2283 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, &sb); 2284 if (error != 0) 2285 return (error); 2286 cvtstat(&sb, &osb); 2287 return (copyout(&osb, uap->ub, sizeof (osb))); 2288 } 2289 2290 /* 2291 * Get file status; this version does not follow links. 2292 */ 2293 #ifndef _SYS_SYSPROTO_H_ 2294 struct olstat_args { 2295 char *path; 2296 struct ostat *ub; 2297 }; 2298 #endif 2299 int 2300 olstat(struct thread *td, struct olstat_args *uap) 2301 { 2302 struct stat sb; 2303 struct ostat osb; 2304 int error; 2305 2306 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2307 UIO_USERSPACE, &sb); 2308 if (error != 0) 2309 return (error); 2310 cvtstat(&sb, &osb); 2311 return (copyout(&osb, uap->ub, sizeof (osb))); 2312 } 2313 2314 /* 2315 * Convert from an old to a new stat structure. 2316 * XXX: many values are blindly truncated. 2317 */ 2318 void 2319 cvtstat(struct stat *st, struct ostat *ost) 2320 { 2321 2322 bzero(ost, sizeof(*ost)); 2323 ost->st_dev = st->st_dev; 2324 ost->st_ino = st->st_ino; 2325 ost->st_mode = st->st_mode; 2326 ost->st_nlink = st->st_nlink; 2327 ost->st_uid = st->st_uid; 2328 ost->st_gid = st->st_gid; 2329 ost->st_rdev = st->st_rdev; 2330 ost->st_size = MIN(st->st_size, INT32_MAX); 2331 ost->st_atim = st->st_atim; 2332 ost->st_mtim = st->st_mtim; 2333 ost->st_ctim = st->st_ctim; 2334 ost->st_blksize = st->st_blksize; 2335 ost->st_blocks = st->st_blocks; 2336 ost->st_flags = st->st_flags; 2337 ost->st_gen = st->st_gen; 2338 } 2339 #endif /* COMPAT_43 */ 2340 2341 #if defined(COMPAT_43) || defined(COMPAT_FREEBSD11) 2342 int ino64_trunc_error; 2343 SYSCTL_INT(_vfs, OID_AUTO, ino64_trunc_error, CTLFLAG_RW, 2344 &ino64_trunc_error, 0, 2345 "Error on truncation of device, file or inode number, or link count"); 2346 2347 int 2348 freebsd11_cvtstat(struct stat *st, struct freebsd11_stat *ost) 2349 { 2350 2351 ost->st_dev = st->st_dev; 2352 if (ost->st_dev != st->st_dev) { 2353 switch (ino64_trunc_error) { 2354 default: 2355 /* 2356 * Since dev_t is almost raw, don't clamp to the 2357 * maximum for case 2, but ignore the error. 2358 */ 2359 break; 2360 case 1: 2361 return (EOVERFLOW); 2362 } 2363 } 2364 ost->st_ino = st->st_ino; 2365 if (ost->st_ino != st->st_ino) { 2366 switch (ino64_trunc_error) { 2367 default: 2368 case 0: 2369 break; 2370 case 1: 2371 return (EOVERFLOW); 2372 case 2: 2373 ost->st_ino = UINT32_MAX; 2374 break; 2375 } 2376 } 2377 ost->st_mode = st->st_mode; 2378 ost->st_nlink = st->st_nlink; 2379 if (ost->st_nlink != st->st_nlink) { 2380 switch (ino64_trunc_error) { 2381 default: 2382 case 0: 2383 break; 2384 case 1: 2385 return (EOVERFLOW); 2386 case 2: 2387 ost->st_nlink = UINT16_MAX; 2388 break; 2389 } 2390 } 2391 ost->st_uid = st->st_uid; 2392 ost->st_gid = st->st_gid; 2393 ost->st_rdev = st->st_rdev; 2394 if (ost->st_rdev != st->st_rdev) { 2395 switch (ino64_trunc_error) { 2396 default: 2397 break; 2398 case 1: 2399 return (EOVERFLOW); 2400 } 2401 } 2402 ost->st_atim = st->st_atim; 2403 ost->st_mtim = st->st_mtim; 2404 ost->st_ctim = st->st_ctim; 2405 ost->st_size = st->st_size; 2406 ost->st_blocks = st->st_blocks; 2407 ost->st_blksize = st->st_blksize; 2408 ost->st_flags = st->st_flags; 2409 ost->st_gen = st->st_gen; 2410 ost->st_lspare = 0; 2411 ost->st_birthtim = st->st_birthtim; 2412 bzero((char *)&ost->st_birthtim + sizeof(ost->st_birthtim), 2413 sizeof(*ost) - offsetof(struct freebsd11_stat, 2414 st_birthtim) - sizeof(ost->st_birthtim)); 2415 return (0); 2416 } 2417 2418 int 2419 freebsd11_stat(struct thread *td, struct freebsd11_stat_args* uap) 2420 { 2421 struct stat sb; 2422 struct freebsd11_stat osb; 2423 int error; 2424 2425 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, &sb); 2426 if (error != 0) 2427 return (error); 2428 error = freebsd11_cvtstat(&sb, &osb); 2429 if (error == 0) 2430 error = copyout(&osb, uap->ub, sizeof(osb)); 2431 return (error); 2432 } 2433 2434 int 2435 freebsd11_lstat(struct thread *td, struct freebsd11_lstat_args* uap) 2436 { 2437 struct stat sb; 2438 struct freebsd11_stat osb; 2439 int error; 2440 2441 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2442 UIO_USERSPACE, &sb); 2443 if (error != 0) 2444 return (error); 2445 error = freebsd11_cvtstat(&sb, &osb); 2446 if (error == 0) 2447 error = copyout(&osb, uap->ub, sizeof(osb)); 2448 return (error); 2449 } 2450 2451 int 2452 freebsd11_fhstat(struct thread *td, struct freebsd11_fhstat_args* uap) 2453 { 2454 struct fhandle fh; 2455 struct stat sb; 2456 struct freebsd11_stat osb; 2457 int error; 2458 2459 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 2460 if (error != 0) 2461 return (error); 2462 error = kern_fhstat(td, fh, &sb); 2463 if (error != 0) 2464 return (error); 2465 error = freebsd11_cvtstat(&sb, &osb); 2466 if (error == 0) 2467 error = copyout(&osb, uap->sb, sizeof(osb)); 2468 return (error); 2469 } 2470 2471 int 2472 freebsd11_fstatat(struct thread *td, struct freebsd11_fstatat_args* uap) 2473 { 2474 struct stat sb; 2475 struct freebsd11_stat osb; 2476 int error; 2477 2478 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2479 UIO_USERSPACE, &sb); 2480 if (error != 0) 2481 return (error); 2482 error = freebsd11_cvtstat(&sb, &osb); 2483 if (error == 0) 2484 error = copyout(&osb, uap->buf, sizeof(osb)); 2485 return (error); 2486 } 2487 #endif /* COMPAT_FREEBSD11 */ 2488 2489 /* 2490 * Get file status 2491 */ 2492 #ifndef _SYS_SYSPROTO_H_ 2493 struct fstatat_args { 2494 int fd; 2495 char *path; 2496 struct stat *buf; 2497 int flag; 2498 } 2499 #endif 2500 int 2501 sys_fstatat(struct thread *td, struct fstatat_args *uap) 2502 { 2503 struct stat sb; 2504 int error; 2505 2506 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2507 UIO_USERSPACE, &sb); 2508 if (error == 0) 2509 error = copyout(&sb, uap->buf, sizeof (sb)); 2510 return (error); 2511 } 2512 2513 int 2514 kern_statat(struct thread *td, int flag, int fd, const char *path, 2515 enum uio_seg pathseg, struct stat *sbp) 2516 { 2517 struct nameidata nd; 2518 int error; 2519 2520 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | 2521 AT_EMPTY_PATH)) != 0) 2522 return (EINVAL); 2523 2524 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_RESOLVE_BENEATH | 2525 AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH) | LOCKSHARED | LOCKLEAF | 2526 AUDITVNODE1, pathseg, path, fd, &cap_fstat_rights); 2527 2528 if ((error = namei(&nd)) != 0) { 2529 if (error == ENOTDIR && 2530 (nd.ni_resflags & NIRES_EMPTYPATH) != 0) 2531 error = kern_fstat(td, fd, sbp); 2532 return (error); 2533 } 2534 error = VOP_STAT(nd.ni_vp, sbp, td->td_ucred, NOCRED); 2535 NDFREE_PNBUF(&nd); 2536 vput(nd.ni_vp); 2537 #ifdef __STAT_TIME_T_EXT 2538 sbp->st_atim_ext = 0; 2539 sbp->st_mtim_ext = 0; 2540 sbp->st_ctim_ext = 0; 2541 sbp->st_btim_ext = 0; 2542 #endif 2543 #ifdef KTRACE 2544 if (KTRPOINT(td, KTR_STRUCT)) 2545 ktrstat_error(sbp, error); 2546 #endif 2547 return (error); 2548 } 2549 2550 #if defined(COMPAT_FREEBSD11) 2551 /* 2552 * Implementation of the NetBSD [l]stat() functions. 2553 */ 2554 int 2555 freebsd11_cvtnstat(struct stat *sb, struct nstat *nsb) 2556 { 2557 struct freebsd11_stat sb11; 2558 int error; 2559 2560 error = freebsd11_cvtstat(sb, &sb11); 2561 if (error != 0) 2562 return (error); 2563 2564 bzero(nsb, sizeof(*nsb)); 2565 CP(sb11, *nsb, st_dev); 2566 CP(sb11, *nsb, st_ino); 2567 CP(sb11, *nsb, st_mode); 2568 CP(sb11, *nsb, st_nlink); 2569 CP(sb11, *nsb, st_uid); 2570 CP(sb11, *nsb, st_gid); 2571 CP(sb11, *nsb, st_rdev); 2572 CP(sb11, *nsb, st_atim); 2573 CP(sb11, *nsb, st_mtim); 2574 CP(sb11, *nsb, st_ctim); 2575 CP(sb11, *nsb, st_size); 2576 CP(sb11, *nsb, st_blocks); 2577 CP(sb11, *nsb, st_blksize); 2578 CP(sb11, *nsb, st_flags); 2579 CP(sb11, *nsb, st_gen); 2580 CP(sb11, *nsb, st_birthtim); 2581 return (0); 2582 } 2583 2584 #ifndef _SYS_SYSPROTO_H_ 2585 struct freebsd11_nstat_args { 2586 char *path; 2587 struct nstat *ub; 2588 }; 2589 #endif 2590 int 2591 freebsd11_nstat(struct thread *td, struct freebsd11_nstat_args *uap) 2592 { 2593 struct stat sb; 2594 struct nstat nsb; 2595 int error; 2596 2597 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, &sb); 2598 if (error != 0) 2599 return (error); 2600 error = freebsd11_cvtnstat(&sb, &nsb); 2601 if (error == 0) 2602 error = copyout(&nsb, uap->ub, sizeof (nsb)); 2603 return (error); 2604 } 2605 2606 /* 2607 * NetBSD lstat. Get file status; this version does not follow links. 2608 */ 2609 #ifndef _SYS_SYSPROTO_H_ 2610 struct freebsd11_nlstat_args { 2611 char *path; 2612 struct nstat *ub; 2613 }; 2614 #endif 2615 int 2616 freebsd11_nlstat(struct thread *td, struct freebsd11_nlstat_args *uap) 2617 { 2618 struct stat sb; 2619 struct nstat nsb; 2620 int error; 2621 2622 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2623 UIO_USERSPACE, &sb); 2624 if (error != 0) 2625 return (error); 2626 error = freebsd11_cvtnstat(&sb, &nsb); 2627 if (error == 0) 2628 error = copyout(&nsb, uap->ub, sizeof (nsb)); 2629 return (error); 2630 } 2631 #endif /* COMPAT_FREEBSD11 */ 2632 2633 /* 2634 * Get configurable pathname variables. 2635 */ 2636 #ifndef _SYS_SYSPROTO_H_ 2637 struct pathconf_args { 2638 char *path; 2639 int name; 2640 }; 2641 #endif 2642 int 2643 sys_pathconf(struct thread *td, struct pathconf_args *uap) 2644 { 2645 long value; 2646 int error; 2647 2648 error = kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW, 2649 &value); 2650 if (error == 0) 2651 td->td_retval[0] = value; 2652 return (error); 2653 } 2654 2655 #ifndef _SYS_SYSPROTO_H_ 2656 struct lpathconf_args { 2657 char *path; 2658 int name; 2659 }; 2660 #endif 2661 int 2662 sys_lpathconf(struct thread *td, struct lpathconf_args *uap) 2663 { 2664 long value; 2665 int error; 2666 2667 error = kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, 2668 NOFOLLOW, &value); 2669 if (error == 0) 2670 td->td_retval[0] = value; 2671 return (error); 2672 } 2673 2674 int 2675 kern_pathconf(struct thread *td, const char *path, enum uio_seg pathseg, 2676 int name, u_long flags, long *valuep) 2677 { 2678 struct nameidata nd; 2679 int error; 2680 2681 NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | AUDITVNODE1 | flags, 2682 pathseg, path); 2683 if ((error = namei(&nd)) != 0) 2684 return (error); 2685 NDFREE_PNBUF(&nd); 2686 2687 error = VOP_PATHCONF(nd.ni_vp, name, valuep); 2688 vput(nd.ni_vp); 2689 return (error); 2690 } 2691 2692 /* 2693 * Return target name of a symbolic link. 2694 */ 2695 #ifndef _SYS_SYSPROTO_H_ 2696 struct readlink_args { 2697 char *path; 2698 char *buf; 2699 size_t count; 2700 }; 2701 #endif 2702 int 2703 sys_readlink(struct thread *td, struct readlink_args *uap) 2704 { 2705 2706 return (kern_readlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2707 uap->buf, UIO_USERSPACE, uap->count)); 2708 } 2709 #ifndef _SYS_SYSPROTO_H_ 2710 struct readlinkat_args { 2711 int fd; 2712 char *path; 2713 char *buf; 2714 size_t bufsize; 2715 }; 2716 #endif 2717 int 2718 sys_readlinkat(struct thread *td, struct readlinkat_args *uap) 2719 { 2720 2721 return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE, 2722 uap->buf, UIO_USERSPACE, uap->bufsize)); 2723 } 2724 2725 int 2726 kern_readlinkat(struct thread *td, int fd, const char *path, 2727 enum uio_seg pathseg, char *buf, enum uio_seg bufseg, size_t count) 2728 { 2729 struct vnode *vp; 2730 struct nameidata nd; 2731 int error; 2732 2733 if (count > IOSIZE_MAX) 2734 return (EINVAL); 2735 2736 NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1 | 2737 EMPTYPATH, pathseg, path, fd); 2738 2739 if ((error = namei(&nd)) != 0) 2740 return (error); 2741 NDFREE_PNBUF(&nd); 2742 vp = nd.ni_vp; 2743 2744 error = kern_readlink_vp(vp, buf, bufseg, count, td); 2745 vput(vp); 2746 2747 return (error); 2748 } 2749 2750 /* 2751 * Helper function to readlink from a vnode 2752 */ 2753 static int 2754 kern_readlink_vp(struct vnode *vp, char *buf, enum uio_seg bufseg, size_t count, 2755 struct thread *td) 2756 { 2757 struct iovec aiov; 2758 struct uio auio; 2759 int error; 2760 2761 ASSERT_VOP_LOCKED(vp, "kern_readlink_vp(): vp not locked"); 2762 #ifdef MAC 2763 error = mac_vnode_check_readlink(td->td_ucred, vp); 2764 if (error != 0) 2765 return (error); 2766 #endif 2767 if (vp->v_type != VLNK && (vp->v_vflag & VV_READLINK) == 0) 2768 return (EINVAL); 2769 2770 aiov.iov_base = buf; 2771 aiov.iov_len = count; 2772 auio.uio_iov = &aiov; 2773 auio.uio_iovcnt = 1; 2774 auio.uio_offset = 0; 2775 auio.uio_rw = UIO_READ; 2776 auio.uio_segflg = bufseg; 2777 auio.uio_td = td; 2778 auio.uio_resid = count; 2779 error = VOP_READLINK(vp, &auio, td->td_ucred); 2780 td->td_retval[0] = count - auio.uio_resid; 2781 return (error); 2782 } 2783 2784 /* 2785 * Common implementation code for chflags() and fchflags(). 2786 */ 2787 static int 2788 setfflags(struct thread *td, struct vnode *vp, u_long flags) 2789 { 2790 struct mount *mp; 2791 struct vattr vattr; 2792 int error; 2793 2794 /* We can't support the value matching VNOVAL. */ 2795 if (flags == VNOVAL) 2796 return (EOPNOTSUPP); 2797 2798 /* 2799 * Prevent non-root users from setting flags on devices. When 2800 * a device is reused, users can retain ownership of the device 2801 * if they are allowed to set flags and programs assume that 2802 * chown can't fail when done as root. 2803 */ 2804 if (vp->v_type == VCHR || vp->v_type == VBLK) { 2805 error = priv_check(td, PRIV_VFS_CHFLAGS_DEV); 2806 if (error != 0) 2807 return (error); 2808 } 2809 2810 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) 2811 return (error); 2812 VATTR_NULL(&vattr); 2813 vattr.va_flags = flags; 2814 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2815 #ifdef MAC 2816 error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags); 2817 if (error == 0) 2818 #endif 2819 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 2820 VOP_UNLOCK(vp); 2821 vn_finished_write(mp); 2822 return (error); 2823 } 2824 2825 /* 2826 * Change flags of a file given a path name. 2827 */ 2828 #ifndef _SYS_SYSPROTO_H_ 2829 struct chflags_args { 2830 const char *path; 2831 u_long flags; 2832 }; 2833 #endif 2834 int 2835 sys_chflags(struct thread *td, struct chflags_args *uap) 2836 { 2837 2838 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2839 uap->flags, 0)); 2840 } 2841 2842 #ifndef _SYS_SYSPROTO_H_ 2843 struct chflagsat_args { 2844 int fd; 2845 const char *path; 2846 u_long flags; 2847 int atflag; 2848 } 2849 #endif 2850 int 2851 sys_chflagsat(struct thread *td, struct chflagsat_args *uap) 2852 { 2853 2854 return (kern_chflagsat(td, uap->fd, uap->path, UIO_USERSPACE, 2855 uap->flags, uap->atflag)); 2856 } 2857 2858 /* 2859 * Same as chflags() but doesn't follow symlinks. 2860 */ 2861 #ifndef _SYS_SYSPROTO_H_ 2862 struct lchflags_args { 2863 const char *path; 2864 u_long flags; 2865 }; 2866 #endif 2867 int 2868 sys_lchflags(struct thread *td, struct lchflags_args *uap) 2869 { 2870 2871 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2872 uap->flags, AT_SYMLINK_NOFOLLOW)); 2873 } 2874 2875 static int 2876 kern_chflagsat(struct thread *td, int fd, const char *path, 2877 enum uio_seg pathseg, u_long flags, int atflag) 2878 { 2879 struct nameidata nd; 2880 int error; 2881 2882 if ((atflag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | 2883 AT_EMPTY_PATH)) != 0) 2884 return (EINVAL); 2885 2886 AUDIT_ARG_FFLAGS(flags); 2887 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(atflag, AT_SYMLINK_NOFOLLOW | 2888 AT_RESOLVE_BENEATH | AT_EMPTY_PATH) | AUDITVNODE1, pathseg, path, 2889 fd, &cap_fchflags_rights); 2890 if ((error = namei(&nd)) != 0) 2891 return (error); 2892 NDFREE_PNBUF(&nd); 2893 error = setfflags(td, nd.ni_vp, flags); 2894 vrele(nd.ni_vp); 2895 return (error); 2896 } 2897 2898 /* 2899 * Change flags of a file given a file descriptor. 2900 */ 2901 #ifndef _SYS_SYSPROTO_H_ 2902 struct fchflags_args { 2903 int fd; 2904 u_long flags; 2905 }; 2906 #endif 2907 int 2908 sys_fchflags(struct thread *td, struct fchflags_args *uap) 2909 { 2910 struct file *fp; 2911 int error; 2912 2913 AUDIT_ARG_FD(uap->fd); 2914 AUDIT_ARG_FFLAGS(uap->flags); 2915 error = getvnode(td, uap->fd, &cap_fchflags_rights, 2916 &fp); 2917 if (error != 0) 2918 return (error); 2919 #ifdef AUDIT 2920 if (AUDITING_TD(td)) { 2921 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 2922 AUDIT_ARG_VNODE1(fp->f_vnode); 2923 VOP_UNLOCK(fp->f_vnode); 2924 } 2925 #endif 2926 error = setfflags(td, fp->f_vnode, uap->flags); 2927 fdrop(fp, td); 2928 return (error); 2929 } 2930 2931 /* 2932 * Common implementation code for chmod(), lchmod() and fchmod(). 2933 */ 2934 int 2935 setfmode(struct thread *td, struct ucred *cred, struct vnode *vp, int mode) 2936 { 2937 struct mount *mp; 2938 struct vattr vattr; 2939 int error; 2940 2941 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) 2942 return (error); 2943 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2944 VATTR_NULL(&vattr); 2945 vattr.va_mode = mode & ALLPERMS; 2946 #ifdef MAC 2947 error = mac_vnode_check_setmode(cred, vp, vattr.va_mode); 2948 if (error == 0) 2949 #endif 2950 error = VOP_SETATTR(vp, &vattr, cred); 2951 VOP_UNLOCK(vp); 2952 vn_finished_write(mp); 2953 return (error); 2954 } 2955 2956 /* 2957 * Change mode of a file given path name. 2958 */ 2959 #ifndef _SYS_SYSPROTO_H_ 2960 struct chmod_args { 2961 char *path; 2962 int mode; 2963 }; 2964 #endif 2965 int 2966 sys_chmod(struct thread *td, struct chmod_args *uap) 2967 { 2968 2969 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2970 uap->mode, 0)); 2971 } 2972 2973 #ifndef _SYS_SYSPROTO_H_ 2974 struct fchmodat_args { 2975 int dirfd; 2976 char *path; 2977 mode_t mode; 2978 int flag; 2979 } 2980 #endif 2981 int 2982 sys_fchmodat(struct thread *td, struct fchmodat_args *uap) 2983 { 2984 2985 return (kern_fchmodat(td, uap->fd, uap->path, UIO_USERSPACE, 2986 uap->mode, uap->flag)); 2987 } 2988 2989 /* 2990 * Change mode of a file given path name (don't follow links.) 2991 */ 2992 #ifndef _SYS_SYSPROTO_H_ 2993 struct lchmod_args { 2994 char *path; 2995 int mode; 2996 }; 2997 #endif 2998 int 2999 sys_lchmod(struct thread *td, struct lchmod_args *uap) 3000 { 3001 3002 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3003 uap->mode, AT_SYMLINK_NOFOLLOW)); 3004 } 3005 3006 int 3007 kern_fchmodat(struct thread *td, int fd, const char *path, 3008 enum uio_seg pathseg, mode_t mode, int flag) 3009 { 3010 struct nameidata nd; 3011 int error; 3012 3013 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | 3014 AT_EMPTY_PATH)) != 0) 3015 return (EINVAL); 3016 3017 AUDIT_ARG_MODE(mode); 3018 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_SYMLINK_NOFOLLOW | 3019 AT_RESOLVE_BENEATH | AT_EMPTY_PATH) | AUDITVNODE1, pathseg, path, 3020 fd, &cap_fchmod_rights); 3021 if ((error = namei(&nd)) != 0) 3022 return (error); 3023 NDFREE_PNBUF(&nd); 3024 error = setfmode(td, td->td_ucred, nd.ni_vp, mode); 3025 vrele(nd.ni_vp); 3026 return (error); 3027 } 3028 3029 /* 3030 * Change mode of a file given a file descriptor. 3031 */ 3032 #ifndef _SYS_SYSPROTO_H_ 3033 struct fchmod_args { 3034 int fd; 3035 int mode; 3036 }; 3037 #endif 3038 int 3039 sys_fchmod(struct thread *td, struct fchmod_args *uap) 3040 { 3041 struct file *fp; 3042 int error; 3043 3044 AUDIT_ARG_FD(uap->fd); 3045 AUDIT_ARG_MODE(uap->mode); 3046 3047 error = fget(td, uap->fd, &cap_fchmod_rights, &fp); 3048 if (error != 0) 3049 return (error); 3050 error = fo_chmod(fp, uap->mode, td->td_ucred, td); 3051 fdrop(fp, td); 3052 return (error); 3053 } 3054 3055 /* 3056 * Common implementation for chown(), lchown(), and fchown() 3057 */ 3058 int 3059 setfown(struct thread *td, struct ucred *cred, struct vnode *vp, uid_t uid, 3060 gid_t gid) 3061 { 3062 struct mount *mp; 3063 struct vattr vattr; 3064 int error; 3065 3066 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) 3067 return (error); 3068 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3069 VATTR_NULL(&vattr); 3070 vattr.va_uid = uid; 3071 vattr.va_gid = gid; 3072 #ifdef MAC 3073 error = mac_vnode_check_setowner(cred, vp, vattr.va_uid, 3074 vattr.va_gid); 3075 if (error == 0) 3076 #endif 3077 error = VOP_SETATTR(vp, &vattr, cred); 3078 VOP_UNLOCK(vp); 3079 vn_finished_write(mp); 3080 return (error); 3081 } 3082 3083 /* 3084 * Set ownership given a path name. 3085 */ 3086 #ifndef _SYS_SYSPROTO_H_ 3087 struct chown_args { 3088 char *path; 3089 int uid; 3090 int gid; 3091 }; 3092 #endif 3093 int 3094 sys_chown(struct thread *td, struct chown_args *uap) 3095 { 3096 3097 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->uid, 3098 uap->gid, 0)); 3099 } 3100 3101 #ifndef _SYS_SYSPROTO_H_ 3102 struct fchownat_args { 3103 int fd; 3104 const char * path; 3105 uid_t uid; 3106 gid_t gid; 3107 int flag; 3108 }; 3109 #endif 3110 int 3111 sys_fchownat(struct thread *td, struct fchownat_args *uap) 3112 { 3113 3114 return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid, 3115 uap->gid, uap->flag)); 3116 } 3117 3118 int 3119 kern_fchownat(struct thread *td, int fd, const char *path, 3120 enum uio_seg pathseg, int uid, int gid, int flag) 3121 { 3122 struct nameidata nd; 3123 int error; 3124 3125 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | 3126 AT_EMPTY_PATH)) != 0) 3127 return (EINVAL); 3128 3129 AUDIT_ARG_OWNER(uid, gid); 3130 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_SYMLINK_NOFOLLOW | 3131 AT_RESOLVE_BENEATH | AT_EMPTY_PATH) | AUDITVNODE1, pathseg, path, 3132 fd, &cap_fchown_rights); 3133 3134 if ((error = namei(&nd)) != 0) 3135 return (error); 3136 NDFREE_PNBUF(&nd); 3137 error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid); 3138 vrele(nd.ni_vp); 3139 return (error); 3140 } 3141 3142 /* 3143 * Set ownership given a path name, do not cross symlinks. 3144 */ 3145 #ifndef _SYS_SYSPROTO_H_ 3146 struct lchown_args { 3147 char *path; 3148 int uid; 3149 int gid; 3150 }; 3151 #endif 3152 int 3153 sys_lchown(struct thread *td, struct lchown_args *uap) 3154 { 3155 3156 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3157 uap->uid, uap->gid, AT_SYMLINK_NOFOLLOW)); 3158 } 3159 3160 /* 3161 * Set ownership given a file descriptor. 3162 */ 3163 #ifndef _SYS_SYSPROTO_H_ 3164 struct fchown_args { 3165 int fd; 3166 int uid; 3167 int gid; 3168 }; 3169 #endif 3170 int 3171 sys_fchown(struct thread *td, struct fchown_args *uap) 3172 { 3173 struct file *fp; 3174 int error; 3175 3176 AUDIT_ARG_FD(uap->fd); 3177 AUDIT_ARG_OWNER(uap->uid, uap->gid); 3178 error = fget(td, uap->fd, &cap_fchown_rights, &fp); 3179 if (error != 0) 3180 return (error); 3181 error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td); 3182 fdrop(fp, td); 3183 return (error); 3184 } 3185 3186 /* 3187 * Common implementation code for utimes(), lutimes(), and futimes(). 3188 */ 3189 static int 3190 getutimes(const struct timeval *usrtvp, enum uio_seg tvpseg, 3191 struct timespec *tsp) 3192 { 3193 struct timeval tv[2]; 3194 const struct timeval *tvp; 3195 int error; 3196 3197 if (usrtvp == NULL) { 3198 vfs_timestamp(&tsp[0]); 3199 tsp[1] = tsp[0]; 3200 } else { 3201 if (tvpseg == UIO_SYSSPACE) { 3202 tvp = usrtvp; 3203 } else { 3204 if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0) 3205 return (error); 3206 tvp = tv; 3207 } 3208 3209 if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 || 3210 tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000) 3211 return (EINVAL); 3212 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 3213 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 3214 } 3215 return (0); 3216 } 3217 3218 /* 3219 * Common implementation code for futimens(), utimensat(). 3220 */ 3221 #define UTIMENS_NULL 0x1 3222 #define UTIMENS_EXIT 0x2 3223 static int 3224 getutimens(const struct timespec *usrtsp, enum uio_seg tspseg, 3225 struct timespec *tsp, int *retflags) 3226 { 3227 struct timespec tsnow; 3228 int error; 3229 3230 vfs_timestamp(&tsnow); 3231 *retflags = 0; 3232 if (usrtsp == NULL) { 3233 tsp[0] = tsnow; 3234 tsp[1] = tsnow; 3235 *retflags |= UTIMENS_NULL; 3236 return (0); 3237 } 3238 if (tspseg == UIO_SYSSPACE) { 3239 tsp[0] = usrtsp[0]; 3240 tsp[1] = usrtsp[1]; 3241 } else if ((error = copyin(usrtsp, tsp, sizeof(*tsp) * 2)) != 0) 3242 return (error); 3243 if (tsp[0].tv_nsec == UTIME_OMIT && tsp[1].tv_nsec == UTIME_OMIT) 3244 *retflags |= UTIMENS_EXIT; 3245 if (tsp[0].tv_nsec == UTIME_NOW && tsp[1].tv_nsec == UTIME_NOW) 3246 *retflags |= UTIMENS_NULL; 3247 if (tsp[0].tv_nsec == UTIME_OMIT) 3248 tsp[0].tv_sec = VNOVAL; 3249 else if (tsp[0].tv_nsec == UTIME_NOW) 3250 tsp[0] = tsnow; 3251 else if (tsp[0].tv_nsec < 0 || tsp[0].tv_nsec >= 1000000000L) 3252 return (EINVAL); 3253 if (tsp[1].tv_nsec == UTIME_OMIT) 3254 tsp[1].tv_sec = VNOVAL; 3255 else if (tsp[1].tv_nsec == UTIME_NOW) 3256 tsp[1] = tsnow; 3257 else if (tsp[1].tv_nsec < 0 || tsp[1].tv_nsec >= 1000000000L) 3258 return (EINVAL); 3259 3260 return (0); 3261 } 3262 3263 /* 3264 * Common implementation code for utimes(), lutimes(), futimes(), futimens(), 3265 * and utimensat(). 3266 */ 3267 static int 3268 setutimes(struct thread *td, struct vnode *vp, const struct timespec *ts, 3269 int numtimes, int nullflag) 3270 { 3271 struct mount *mp; 3272 struct vattr vattr; 3273 int error; 3274 bool setbirthtime; 3275 3276 setbirthtime = false; 3277 vattr.va_birthtime.tv_sec = VNOVAL; 3278 vattr.va_birthtime.tv_nsec = 0; 3279 3280 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) 3281 return (error); 3282 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3283 if (numtimes < 3 && VOP_GETATTR(vp, &vattr, td->td_ucred) == 0 && 3284 timespeccmp(&ts[1], &vattr.va_birthtime, < )) 3285 setbirthtime = true; 3286 VATTR_NULL(&vattr); 3287 vattr.va_atime = ts[0]; 3288 vattr.va_mtime = ts[1]; 3289 if (setbirthtime) 3290 vattr.va_birthtime = ts[1]; 3291 if (numtimes > 2) 3292 vattr.va_birthtime = ts[2]; 3293 if (nullflag) 3294 vattr.va_vaflags |= VA_UTIMES_NULL; 3295 #ifdef MAC 3296 error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime, 3297 vattr.va_mtime); 3298 #endif 3299 if (error == 0) 3300 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3301 VOP_UNLOCK(vp); 3302 vn_finished_write(mp); 3303 return (error); 3304 } 3305 3306 /* 3307 * Set the access and modification times of a file. 3308 */ 3309 #ifndef _SYS_SYSPROTO_H_ 3310 struct utimes_args { 3311 char *path; 3312 struct timeval *tptr; 3313 }; 3314 #endif 3315 int 3316 sys_utimes(struct thread *td, struct utimes_args *uap) 3317 { 3318 3319 return (kern_utimesat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3320 uap->tptr, UIO_USERSPACE)); 3321 } 3322 3323 #ifndef _SYS_SYSPROTO_H_ 3324 struct futimesat_args { 3325 int fd; 3326 const char * path; 3327 const struct timeval * times; 3328 }; 3329 #endif 3330 int 3331 sys_futimesat(struct thread *td, struct futimesat_args *uap) 3332 { 3333 3334 return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE, 3335 uap->times, UIO_USERSPACE)); 3336 } 3337 3338 int 3339 kern_utimesat(struct thread *td, int fd, const char *path, 3340 enum uio_seg pathseg, const struct timeval *tptr, enum uio_seg tptrseg) 3341 { 3342 struct nameidata nd; 3343 struct timespec ts[2]; 3344 int error; 3345 3346 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3347 return (error); 3348 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 3349 &cap_futimes_rights); 3350 3351 if ((error = namei(&nd)) != 0) 3352 return (error); 3353 NDFREE_PNBUF(&nd); 3354 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3355 vrele(nd.ni_vp); 3356 return (error); 3357 } 3358 3359 /* 3360 * Set the access and modification times of a file. 3361 */ 3362 #ifndef _SYS_SYSPROTO_H_ 3363 struct lutimes_args { 3364 char *path; 3365 struct timeval *tptr; 3366 }; 3367 #endif 3368 int 3369 sys_lutimes(struct thread *td, struct lutimes_args *uap) 3370 { 3371 3372 return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr, 3373 UIO_USERSPACE)); 3374 } 3375 3376 int 3377 kern_lutimes(struct thread *td, const char *path, enum uio_seg pathseg, 3378 const struct timeval *tptr, enum uio_seg tptrseg) 3379 { 3380 struct timespec ts[2]; 3381 struct nameidata nd; 3382 int error; 3383 3384 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3385 return (error); 3386 NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, pathseg, path); 3387 if ((error = namei(&nd)) != 0) 3388 return (error); 3389 NDFREE_PNBUF(&nd); 3390 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3391 vrele(nd.ni_vp); 3392 return (error); 3393 } 3394 3395 /* 3396 * Set the access and modification times of a file. 3397 */ 3398 #ifndef _SYS_SYSPROTO_H_ 3399 struct futimes_args { 3400 int fd; 3401 struct timeval *tptr; 3402 }; 3403 #endif 3404 int 3405 sys_futimes(struct thread *td, struct futimes_args *uap) 3406 { 3407 3408 return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE)); 3409 } 3410 3411 int 3412 kern_futimes(struct thread *td, int fd, const struct timeval *tptr, 3413 enum uio_seg tptrseg) 3414 { 3415 struct timespec ts[2]; 3416 struct file *fp; 3417 int error; 3418 3419 AUDIT_ARG_FD(fd); 3420 error = getutimes(tptr, tptrseg, ts); 3421 if (error != 0) 3422 return (error); 3423 error = getvnode(td, fd, &cap_futimes_rights, &fp); 3424 if (error != 0) 3425 return (error); 3426 #ifdef AUDIT 3427 if (AUDITING_TD(td)) { 3428 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3429 AUDIT_ARG_VNODE1(fp->f_vnode); 3430 VOP_UNLOCK(fp->f_vnode); 3431 } 3432 #endif 3433 error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL); 3434 fdrop(fp, td); 3435 return (error); 3436 } 3437 3438 int 3439 sys_futimens(struct thread *td, struct futimens_args *uap) 3440 { 3441 3442 return (kern_futimens(td, uap->fd, uap->times, UIO_USERSPACE)); 3443 } 3444 3445 int 3446 kern_futimens(struct thread *td, int fd, const struct timespec *tptr, 3447 enum uio_seg tptrseg) 3448 { 3449 struct timespec ts[2]; 3450 struct file *fp; 3451 int error, flags; 3452 3453 AUDIT_ARG_FD(fd); 3454 error = getutimens(tptr, tptrseg, ts, &flags); 3455 if (error != 0) 3456 return (error); 3457 if (flags & UTIMENS_EXIT) 3458 return (0); 3459 error = getvnode(td, fd, &cap_futimes_rights, &fp); 3460 if (error != 0) 3461 return (error); 3462 #ifdef AUDIT 3463 if (AUDITING_TD(td)) { 3464 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3465 AUDIT_ARG_VNODE1(fp->f_vnode); 3466 VOP_UNLOCK(fp->f_vnode); 3467 } 3468 #endif 3469 error = setutimes(td, fp->f_vnode, ts, 2, flags & UTIMENS_NULL); 3470 fdrop(fp, td); 3471 return (error); 3472 } 3473 3474 int 3475 sys_utimensat(struct thread *td, struct utimensat_args *uap) 3476 { 3477 3478 return (kern_utimensat(td, uap->fd, uap->path, UIO_USERSPACE, 3479 uap->times, UIO_USERSPACE, uap->flag)); 3480 } 3481 3482 int 3483 kern_utimensat(struct thread *td, int fd, const char *path, 3484 enum uio_seg pathseg, const struct timespec *tptr, enum uio_seg tptrseg, 3485 int flag) 3486 { 3487 struct nameidata nd; 3488 struct timespec ts[2]; 3489 int error, flags; 3490 3491 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | 3492 AT_EMPTY_PATH)) != 0) 3493 return (EINVAL); 3494 3495 if ((error = getutimens(tptr, tptrseg, ts, &flags)) != 0) 3496 return (error); 3497 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_SYMLINK_NOFOLLOW | 3498 AT_RESOLVE_BENEATH | AT_EMPTY_PATH) | AUDITVNODE1, 3499 pathseg, path, fd, &cap_futimes_rights); 3500 if ((error = namei(&nd)) != 0) 3501 return (error); 3502 /* 3503 * We are allowed to call namei() regardless of 2xUTIME_OMIT. 3504 * POSIX states: 3505 * "If both tv_nsec fields are UTIME_OMIT... EACCESS may be detected." 3506 * "Search permission is denied by a component of the path prefix." 3507 */ 3508 NDFREE_PNBUF(&nd); 3509 if ((flags & UTIMENS_EXIT) == 0) 3510 error = setutimes(td, nd.ni_vp, ts, 2, flags & UTIMENS_NULL); 3511 vrele(nd.ni_vp); 3512 return (error); 3513 } 3514 3515 /* 3516 * Truncate a file given its path name. 3517 */ 3518 #ifndef _SYS_SYSPROTO_H_ 3519 struct truncate_args { 3520 char *path; 3521 int pad; 3522 off_t length; 3523 }; 3524 #endif 3525 int 3526 sys_truncate(struct thread *td, struct truncate_args *uap) 3527 { 3528 3529 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3530 } 3531 3532 int 3533 kern_truncate(struct thread *td, const char *path, enum uio_seg pathseg, 3534 off_t length) 3535 { 3536 struct mount *mp; 3537 struct vnode *vp; 3538 void *rl_cookie; 3539 struct nameidata nd; 3540 int error; 3541 3542 if (length < 0) 3543 return (EINVAL); 3544 NDPREINIT(&nd); 3545 retry: 3546 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path); 3547 if ((error = namei(&nd)) != 0) 3548 return (error); 3549 vp = nd.ni_vp; 3550 NDFREE_PNBUF(&nd); 3551 rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); 3552 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) { 3553 vn_rangelock_unlock(vp, rl_cookie); 3554 vrele(vp); 3555 return (error); 3556 } 3557 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3558 if (vp->v_type == VDIR) { 3559 error = EISDIR; 3560 goto out; 3561 } 3562 #ifdef MAC 3563 error = mac_vnode_check_write(td->td_ucred, NOCRED, vp); 3564 if (error != 0) 3565 goto out; 3566 #endif 3567 error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td); 3568 if (error != 0) 3569 goto out; 3570 3571 error = vn_truncate_locked(vp, length, false, td->td_ucred); 3572 out: 3573 VOP_UNLOCK(vp); 3574 vn_finished_write(mp); 3575 vn_rangelock_unlock(vp, rl_cookie); 3576 vrele(vp); 3577 if (error == ERELOOKUP) 3578 goto retry; 3579 return (error); 3580 } 3581 3582 #if defined(COMPAT_43) 3583 /* 3584 * Truncate a file given its path name. 3585 */ 3586 #ifndef _SYS_SYSPROTO_H_ 3587 struct otruncate_args { 3588 char *path; 3589 long length; 3590 }; 3591 #endif 3592 int 3593 otruncate(struct thread *td, struct otruncate_args *uap) 3594 { 3595 3596 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3597 } 3598 #endif /* COMPAT_43 */ 3599 3600 #if defined(COMPAT_FREEBSD6) 3601 /* Versions with the pad argument */ 3602 int 3603 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap) 3604 { 3605 3606 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3607 } 3608 3609 int 3610 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap) 3611 { 3612 3613 return (kern_ftruncate(td, uap->fd, uap->length)); 3614 } 3615 #endif 3616 3617 int 3618 kern_fsync(struct thread *td, int fd, bool fullsync) 3619 { 3620 struct vnode *vp; 3621 struct mount *mp; 3622 struct file *fp; 3623 int error; 3624 3625 AUDIT_ARG_FD(fd); 3626 error = getvnode(td, fd, &cap_fsync_rights, &fp); 3627 if (error != 0) 3628 return (error); 3629 vp = fp->f_vnode; 3630 #if 0 3631 if (!fullsync) 3632 /* XXXKIB: compete outstanding aio writes */; 3633 #endif 3634 retry: 3635 error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH); 3636 if (error != 0) 3637 goto drop; 3638 vn_lock(vp, vn_lktype_write(mp, vp) | LK_RETRY); 3639 AUDIT_ARG_VNODE1(vp); 3640 vnode_pager_clean_async(vp); 3641 error = fullsync ? VOP_FSYNC(vp, MNT_WAIT, td) : VOP_FDATASYNC(vp, td); 3642 VOP_UNLOCK(vp); 3643 vn_finished_write(mp); 3644 if (error == ERELOOKUP) 3645 goto retry; 3646 drop: 3647 fdrop(fp, td); 3648 return (error); 3649 } 3650 3651 /* 3652 * Sync an open file. 3653 */ 3654 #ifndef _SYS_SYSPROTO_H_ 3655 struct fsync_args { 3656 int fd; 3657 }; 3658 #endif 3659 int 3660 sys_fsync(struct thread *td, struct fsync_args *uap) 3661 { 3662 3663 return (kern_fsync(td, uap->fd, true)); 3664 } 3665 3666 int 3667 sys_fdatasync(struct thread *td, struct fdatasync_args *uap) 3668 { 3669 3670 return (kern_fsync(td, uap->fd, false)); 3671 } 3672 3673 /* 3674 * Rename files. Source and destination must either both be directories, or 3675 * both not be directories. If target is a directory, it must be empty. 3676 */ 3677 #ifndef _SYS_SYSPROTO_H_ 3678 struct rename_args { 3679 char *from; 3680 char *to; 3681 }; 3682 #endif 3683 int 3684 sys_rename(struct thread *td, struct rename_args *uap) 3685 { 3686 3687 return (kern_renameat(td, AT_FDCWD, uap->from, AT_FDCWD, 3688 uap->to, UIO_USERSPACE)); 3689 } 3690 3691 #ifndef _SYS_SYSPROTO_H_ 3692 struct renameat_args { 3693 int oldfd; 3694 char *old; 3695 int newfd; 3696 char *new; 3697 }; 3698 #endif 3699 int 3700 sys_renameat(struct thread *td, struct renameat_args *uap) 3701 { 3702 3703 return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new, 3704 UIO_USERSPACE)); 3705 } 3706 3707 #ifdef MAC 3708 static int 3709 kern_renameat_mac(struct thread *td, int oldfd, const char *old, int newfd, 3710 const char *new, enum uio_seg pathseg, struct nameidata *fromnd) 3711 { 3712 int error; 3713 3714 NDINIT_ATRIGHTS(fromnd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 3715 pathseg, old, oldfd, &cap_renameat_source_rights); 3716 if ((error = namei(fromnd)) != 0) 3717 return (error); 3718 error = mac_vnode_check_rename_from(td->td_ucred, fromnd->ni_dvp, 3719 fromnd->ni_vp, &fromnd->ni_cnd); 3720 VOP_UNLOCK(fromnd->ni_dvp); 3721 if (fromnd->ni_dvp != fromnd->ni_vp) 3722 VOP_UNLOCK(fromnd->ni_vp); 3723 if (error != 0) { 3724 NDFREE_PNBUF(fromnd); 3725 vrele(fromnd->ni_dvp); 3726 vrele(fromnd->ni_vp); 3727 } 3728 return (error); 3729 } 3730 #endif 3731 3732 int 3733 kern_renameat(struct thread *td, int oldfd, const char *old, int newfd, 3734 const char *new, enum uio_seg pathseg) 3735 { 3736 struct mount *mp = NULL; 3737 struct vnode *tvp, *fvp, *tdvp; 3738 struct nameidata fromnd, tond; 3739 uint64_t tondflags; 3740 int error; 3741 short irflag; 3742 3743 again: 3744 bwillwrite(); 3745 #ifdef MAC 3746 if (mac_vnode_check_rename_from_enabled()) { 3747 error = kern_renameat_mac(td, oldfd, old, newfd, new, pathseg, 3748 &fromnd); 3749 if (error != 0) 3750 return (error); 3751 } else { 3752 #endif 3753 NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | AUDITVNODE1, 3754 pathseg, old, oldfd, &cap_renameat_source_rights); 3755 if ((error = namei(&fromnd)) != 0) 3756 return (error); 3757 #ifdef MAC 3758 } 3759 #endif 3760 fvp = fromnd.ni_vp; 3761 tondflags = LOCKPARENT | LOCKLEAF | NOCACHE | AUDITVNODE2; 3762 if (fromnd.ni_vp->v_type == VDIR) 3763 tondflags |= WILLBEDIR; 3764 NDINIT_ATRIGHTS(&tond, RENAME, tondflags, pathseg, new, newfd, 3765 &cap_renameat_target_rights); 3766 if ((error = namei(&tond)) != 0) { 3767 /* Translate error code for rename("dir1", "dir2/."). */ 3768 if (error == EISDIR && fvp->v_type == VDIR) 3769 error = EINVAL; 3770 NDFREE_PNBUF(&fromnd); 3771 vrele(fromnd.ni_dvp); 3772 vrele(fvp); 3773 goto out1; 3774 } 3775 tdvp = tond.ni_dvp; 3776 tvp = tond.ni_vp; 3777 error = vn_start_write(fvp, &mp, V_NOWAIT); 3778 if (error != 0) { 3779 NDFREE_PNBUF(&fromnd); 3780 NDFREE_PNBUF(&tond); 3781 if (tvp != NULL) 3782 vput(tvp); 3783 if (tdvp == tvp) 3784 vrele(tdvp); 3785 else 3786 vput(tdvp); 3787 vrele(fromnd.ni_dvp); 3788 vrele(fvp); 3789 error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH); 3790 if (error != 0) 3791 return (error); 3792 goto again; 3793 } 3794 irflag = vn_irflag_read(fvp); 3795 if (((irflag & VIRF_NAMEDATTR) != 0 && tdvp != fromnd.ni_dvp) || 3796 (irflag & VIRF_NAMEDDIR) != 0) { 3797 error = EINVAL; 3798 goto out; 3799 } 3800 if (tvp != NULL) { 3801 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3802 error = ENOTDIR; 3803 goto out; 3804 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3805 error = EISDIR; 3806 goto out; 3807 } 3808 #ifdef CAPABILITIES 3809 if (newfd != AT_FDCWD && (tond.ni_resflags & NIRES_ABS) == 0) { 3810 /* 3811 * If the target already exists we require CAP_UNLINKAT 3812 * from 'newfd', when newfd was used for the lookup. 3813 */ 3814 error = cap_check(&tond.ni_filecaps.fc_rights, 3815 &cap_unlinkat_rights); 3816 if (error != 0) 3817 goto out; 3818 } 3819 #endif 3820 } 3821 if (fvp == tdvp) { 3822 error = EINVAL; 3823 goto out; 3824 } 3825 /* 3826 * If the source is the same as the destination (that is, if they 3827 * are links to the same vnode), then there is nothing to do. 3828 */ 3829 if (fvp == tvp) 3830 error = ERESTART; 3831 #ifdef MAC 3832 else 3833 error = mac_vnode_check_rename_to(td->td_ucred, tdvp, 3834 tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd); 3835 #endif 3836 out: 3837 if (error == 0) { 3838 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3839 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); 3840 NDFREE_PNBUF(&fromnd); 3841 NDFREE_PNBUF(&tond); 3842 } else { 3843 NDFREE_PNBUF(&fromnd); 3844 NDFREE_PNBUF(&tond); 3845 if (tvp != NULL) 3846 vput(tvp); 3847 if (tdvp == tvp) 3848 vrele(tdvp); 3849 else 3850 vput(tdvp); 3851 vrele(fromnd.ni_dvp); 3852 vrele(fvp); 3853 } 3854 vn_finished_write(mp); 3855 out1: 3856 if (error == ERESTART) 3857 return (0); 3858 if (error == ERELOOKUP) 3859 goto again; 3860 return (error); 3861 } 3862 3863 /* 3864 * Make a directory file. 3865 */ 3866 #ifndef _SYS_SYSPROTO_H_ 3867 struct mkdir_args { 3868 char *path; 3869 int mode; 3870 }; 3871 #endif 3872 int 3873 sys_mkdir(struct thread *td, struct mkdir_args *uap) 3874 { 3875 3876 return (kern_mkdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3877 uap->mode)); 3878 } 3879 3880 #ifndef _SYS_SYSPROTO_H_ 3881 struct mkdirat_args { 3882 int fd; 3883 char *path; 3884 mode_t mode; 3885 }; 3886 #endif 3887 int 3888 sys_mkdirat(struct thread *td, struct mkdirat_args *uap) 3889 { 3890 3891 return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode)); 3892 } 3893 3894 int 3895 kern_mkdirat(struct thread *td, int fd, const char *path, enum uio_seg segflg, 3896 int mode) 3897 { 3898 struct mount *mp; 3899 struct vattr vattr; 3900 struct nameidata nd; 3901 int error; 3902 3903 AUDIT_ARG_MODE(mode); 3904 NDPREINIT(&nd); 3905 restart: 3906 bwillwrite(); 3907 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | AUDITVNODE1 | 3908 NC_NOMAKEENTRY | NC_KEEPPOSENTRY | FAILIFEXISTS | WILLBEDIR, 3909 segflg, path, fd, &cap_mkdirat_rights); 3910 if ((error = namei(&nd)) != 0) 3911 return (error); 3912 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3913 NDFREE_PNBUF(&nd); 3914 vput(nd.ni_dvp); 3915 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH)) != 0) 3916 return (error); 3917 goto restart; 3918 } 3919 if ((vn_irflag_read(nd.ni_dvp) & VIRF_NAMEDDIR) != 0) { 3920 error = EINVAL; 3921 goto out; 3922 } 3923 VATTR_NULL(&vattr); 3924 vattr.va_type = VDIR; 3925 vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_pd->pd_cmask; 3926 #ifdef MAC 3927 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 3928 &vattr); 3929 if (error != 0) 3930 goto out; 3931 #endif 3932 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 3933 out: 3934 NDFREE_PNBUF(&nd); 3935 VOP_VPUT_PAIR(nd.ni_dvp, error == 0 ? &nd.ni_vp : NULL, true); 3936 vn_finished_write(mp); 3937 if (error == ERELOOKUP) 3938 goto restart; 3939 return (error); 3940 } 3941 3942 /* 3943 * Remove a directory file. 3944 */ 3945 #ifndef _SYS_SYSPROTO_H_ 3946 struct rmdir_args { 3947 char *path; 3948 }; 3949 #endif 3950 int 3951 sys_rmdir(struct thread *td, struct rmdir_args *uap) 3952 { 3953 3954 return (kern_frmdirat(td, AT_FDCWD, uap->path, FD_NONE, UIO_USERSPACE, 3955 0)); 3956 } 3957 3958 int 3959 kern_frmdirat(struct thread *td, int dfd, const char *path, int fd, 3960 enum uio_seg pathseg, int flag) 3961 { 3962 struct mount *mp; 3963 struct vnode *vp; 3964 struct file *fp; 3965 struct nameidata nd; 3966 cap_rights_t rights; 3967 int error; 3968 3969 fp = NULL; 3970 if (fd != FD_NONE) { 3971 error = getvnode(td, fd, cap_rights_init_one(&rights, 3972 CAP_LOOKUP), &fp); 3973 if (error != 0) 3974 return (error); 3975 } 3976 3977 NDPREINIT(&nd); 3978 restart: 3979 bwillwrite(); 3980 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1 | 3981 at2cnpflags(flag, AT_RESOLVE_BENEATH), 3982 pathseg, path, dfd, &cap_unlinkat_rights); 3983 if ((error = namei(&nd)) != 0) 3984 goto fdout; 3985 vp = nd.ni_vp; 3986 if (vp->v_type != VDIR) { 3987 error = ENOTDIR; 3988 goto out; 3989 } 3990 /* 3991 * No rmdir "." please. 3992 */ 3993 if (nd.ni_dvp == vp) { 3994 error = EINVAL; 3995 goto out; 3996 } 3997 /* 3998 * The root of a mounted filesystem cannot be deleted. 3999 */ 4000 if (vp->v_vflag & VV_ROOT) { 4001 error = EBUSY; 4002 goto out; 4003 } 4004 4005 if (fp != NULL && fp->f_vnode != vp) { 4006 if (VN_IS_DOOMED(fp->f_vnode)) 4007 error = EBADF; 4008 else 4009 error = EDEADLK; 4010 goto out; 4011 } 4012 4013 #ifdef MAC 4014 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 4015 &nd.ni_cnd); 4016 if (error != 0) 4017 goto out; 4018 #endif 4019 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 4020 NDFREE_PNBUF(&nd); 4021 vput(vp); 4022 if (nd.ni_dvp == vp) 4023 vrele(nd.ni_dvp); 4024 else 4025 vput(nd.ni_dvp); 4026 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH)) != 0) 4027 goto fdout; 4028 goto restart; 4029 } 4030 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 4031 vn_finished_write(mp); 4032 out: 4033 NDFREE_PNBUF(&nd); 4034 vput(vp); 4035 if (nd.ni_dvp == vp) 4036 vrele(nd.ni_dvp); 4037 else 4038 vput(nd.ni_dvp); 4039 if (error == ERELOOKUP) 4040 goto restart; 4041 fdout: 4042 if (fp != NULL) 4043 fdrop(fp, td); 4044 return (error); 4045 } 4046 4047 #if defined(COMPAT_43) || defined(COMPAT_FREEBSD11) 4048 int 4049 freebsd11_kern_getdirentries(struct thread *td, int fd, char *ubuf, u_int count, 4050 long *basep, void (*func)(struct freebsd11_dirent *)) 4051 { 4052 struct freebsd11_dirent dstdp; 4053 struct dirent *dp, *edp; 4054 char *dirbuf; 4055 off_t base; 4056 ssize_t resid, ucount; 4057 int error; 4058 4059 /* XXX arbitrary sanity limit on `count'. */ 4060 count = min(count, 64 * 1024); 4061 4062 dirbuf = malloc(count, M_TEMP, M_WAITOK); 4063 4064 error = kern_getdirentries(td, fd, dirbuf, count, &base, &resid, 4065 UIO_SYSSPACE); 4066 if (error != 0) 4067 goto done; 4068 if (basep != NULL) 4069 *basep = base; 4070 4071 ucount = 0; 4072 for (dp = (struct dirent *)dirbuf, 4073 edp = (struct dirent *)&dirbuf[count - resid]; 4074 ucount < count && dp < edp; ) { 4075 if (dp->d_reclen == 0) 4076 break; 4077 MPASS(dp->d_reclen >= _GENERIC_DIRLEN(0)); 4078 if (dp->d_namlen >= sizeof(dstdp.d_name)) 4079 continue; 4080 dstdp.d_type = dp->d_type; 4081 dstdp.d_namlen = dp->d_namlen; 4082 dstdp.d_fileno = dp->d_fileno; /* truncate */ 4083 if (dstdp.d_fileno != dp->d_fileno) { 4084 switch (ino64_trunc_error) { 4085 default: 4086 case 0: 4087 break; 4088 case 1: 4089 error = EOVERFLOW; 4090 goto done; 4091 case 2: 4092 dstdp.d_fileno = UINT32_MAX; 4093 break; 4094 } 4095 } 4096 dstdp.d_reclen = sizeof(dstdp) - sizeof(dstdp.d_name) + 4097 ((dp->d_namlen + 1 + 3) &~ 3); 4098 bcopy(dp->d_name, dstdp.d_name, dstdp.d_namlen); 4099 bzero(dstdp.d_name + dstdp.d_namlen, 4100 dstdp.d_reclen - offsetof(struct freebsd11_dirent, d_name) - 4101 dstdp.d_namlen); 4102 MPASS(dstdp.d_reclen <= dp->d_reclen); 4103 MPASS(ucount + dstdp.d_reclen <= count); 4104 if (func != NULL) 4105 func(&dstdp); 4106 error = copyout(&dstdp, ubuf + ucount, dstdp.d_reclen); 4107 if (error != 0) 4108 break; 4109 dp = (struct dirent *)((char *)dp + dp->d_reclen); 4110 ucount += dstdp.d_reclen; 4111 } 4112 4113 done: 4114 free(dirbuf, M_TEMP); 4115 if (error == 0) 4116 td->td_retval[0] = ucount; 4117 return (error); 4118 } 4119 #endif /* COMPAT */ 4120 4121 #ifdef COMPAT_43 4122 static void 4123 ogetdirentries_cvt(struct freebsd11_dirent *dp) 4124 { 4125 #if (BYTE_ORDER == LITTLE_ENDIAN) 4126 /* 4127 * The expected low byte of dp->d_namlen is our dp->d_type. 4128 * The high MBZ byte of dp->d_namlen is our dp->d_namlen. 4129 */ 4130 dp->d_type = dp->d_namlen; 4131 dp->d_namlen = 0; 4132 #else 4133 /* 4134 * The dp->d_type is the high byte of the expected dp->d_namlen, 4135 * so must be zero'ed. 4136 */ 4137 dp->d_type = 0; 4138 #endif 4139 } 4140 4141 /* 4142 * Read a block of directory entries in a filesystem independent format. 4143 */ 4144 #ifndef _SYS_SYSPROTO_H_ 4145 struct ogetdirentries_args { 4146 int fd; 4147 char *buf; 4148 u_int count; 4149 long *basep; 4150 }; 4151 #endif 4152 int 4153 ogetdirentries(struct thread *td, struct ogetdirentries_args *uap) 4154 { 4155 long loff; 4156 int error; 4157 4158 error = kern_ogetdirentries(td, uap, &loff); 4159 if (error == 0) 4160 error = copyout(&loff, uap->basep, sizeof(long)); 4161 return (error); 4162 } 4163 4164 int 4165 kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap, 4166 long *ploff) 4167 { 4168 long base; 4169 int error; 4170 4171 /* XXX arbitrary sanity limit on `count'. */ 4172 if (uap->count > 64 * 1024) 4173 return (EINVAL); 4174 4175 error = freebsd11_kern_getdirentries(td, uap->fd, uap->buf, uap->count, 4176 &base, ogetdirentries_cvt); 4177 4178 if (error == 0 && uap->basep != NULL) 4179 error = copyout(&base, uap->basep, sizeof(long)); 4180 4181 return (error); 4182 } 4183 #endif /* COMPAT_43 */ 4184 4185 #if defined(COMPAT_FREEBSD11) 4186 #ifndef _SYS_SYSPROTO_H_ 4187 struct freebsd11_getdirentries_args { 4188 int fd; 4189 char *buf; 4190 u_int count; 4191 long *basep; 4192 }; 4193 #endif 4194 int 4195 freebsd11_getdirentries(struct thread *td, 4196 struct freebsd11_getdirentries_args *uap) 4197 { 4198 long base; 4199 int error; 4200 4201 error = freebsd11_kern_getdirentries(td, uap->fd, uap->buf, uap->count, 4202 &base, NULL); 4203 4204 if (error == 0 && uap->basep != NULL) 4205 error = copyout(&base, uap->basep, sizeof(long)); 4206 return (error); 4207 } 4208 4209 int 4210 freebsd11_getdents(struct thread *td, struct freebsd11_getdents_args *uap) 4211 { 4212 struct freebsd11_getdirentries_args ap; 4213 4214 ap.fd = uap->fd; 4215 ap.buf = uap->buf; 4216 ap.count = uap->count; 4217 ap.basep = NULL; 4218 return (freebsd11_getdirentries(td, &ap)); 4219 } 4220 #endif /* COMPAT_FREEBSD11 */ 4221 4222 /* 4223 * Read a block of directory entries in a filesystem independent format. 4224 */ 4225 int 4226 sys_getdirentries(struct thread *td, struct getdirentries_args *uap) 4227 { 4228 off_t base; 4229 int error; 4230 4231 error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base, 4232 NULL, UIO_USERSPACE); 4233 if (error != 0) 4234 return (error); 4235 if (uap->basep != NULL) 4236 error = copyout(&base, uap->basep, sizeof(off_t)); 4237 return (error); 4238 } 4239 4240 int 4241 kern_getdirentries(struct thread *td, int fd, char *buf, size_t count, 4242 off_t *basep, ssize_t *residp, enum uio_seg bufseg) 4243 { 4244 struct vnode *vp; 4245 struct file *fp; 4246 struct uio auio; 4247 struct iovec aiov; 4248 off_t loff; 4249 int error, eofflag; 4250 off_t foffset; 4251 4252 AUDIT_ARG_FD(fd); 4253 if (count > IOSIZE_MAX) 4254 return (EINVAL); 4255 auio.uio_resid = count; 4256 error = getvnode(td, fd, &cap_read_rights, &fp); 4257 if (error != 0) 4258 return (error); 4259 if ((fp->f_flag & FREAD) == 0) { 4260 fdrop(fp, td); 4261 return (EBADF); 4262 } 4263 vp = fp->f_vnode; 4264 foffset = foffset_lock(fp, 0); 4265 unionread: 4266 if (vp->v_type != VDIR) { 4267 error = EINVAL; 4268 goto fail; 4269 } 4270 if (__predict_false((vp->v_vflag & VV_UNLINKED) != 0)) { 4271 error = ENOENT; 4272 goto fail; 4273 } 4274 aiov.iov_base = buf; 4275 aiov.iov_len = count; 4276 auio.uio_iov = &aiov; 4277 auio.uio_iovcnt = 1; 4278 auio.uio_rw = UIO_READ; 4279 auio.uio_segflg = bufseg; 4280 auio.uio_td = td; 4281 vn_lock(vp, LK_SHARED | LK_RETRY); 4282 AUDIT_ARG_VNODE1(vp); 4283 loff = auio.uio_offset = foffset; 4284 #ifdef MAC 4285 error = mac_vnode_check_readdir(td->td_ucred, vp); 4286 if (error == 0) 4287 #endif 4288 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, 4289 NULL); 4290 foffset = auio.uio_offset; 4291 if (error != 0) { 4292 VOP_UNLOCK(vp); 4293 goto fail; 4294 } 4295 if (count == auio.uio_resid && 4296 (vp->v_vflag & VV_ROOT) && 4297 (vp->v_mount->mnt_flag & MNT_UNION)) { 4298 struct vnode *tvp = vp; 4299 4300 vp = vp->v_mount->mnt_vnodecovered; 4301 VREF(vp); 4302 fp->f_vnode = vp; 4303 foffset = 0; 4304 vput(tvp); 4305 goto unionread; 4306 } 4307 VOP_UNLOCK(vp); 4308 *basep = loff; 4309 if (residp != NULL) 4310 *residp = auio.uio_resid; 4311 td->td_retval[0] = count - auio.uio_resid; 4312 fail: 4313 foffset_unlock(fp, foffset, 0); 4314 fdrop(fp, td); 4315 return (error); 4316 } 4317 4318 /* 4319 * Set the mode mask for creation of filesystem nodes. 4320 */ 4321 #ifndef _SYS_SYSPROTO_H_ 4322 struct umask_args { 4323 int newmask; 4324 }; 4325 #endif 4326 int 4327 sys_umask(struct thread *td, struct umask_args *uap) 4328 { 4329 struct pwddesc *pdp; 4330 4331 pdp = td->td_proc->p_pd; 4332 PWDDESC_XLOCK(pdp); 4333 td->td_retval[0] = pdp->pd_cmask; 4334 pdp->pd_cmask = uap->newmask & ALLPERMS; 4335 PWDDESC_XUNLOCK(pdp); 4336 return (0); 4337 } 4338 4339 /* 4340 * Void all references to file by ripping underlying filesystem away from 4341 * vnode. 4342 */ 4343 #ifndef _SYS_SYSPROTO_H_ 4344 struct revoke_args { 4345 char *path; 4346 }; 4347 #endif 4348 int 4349 sys_revoke(struct thread *td, struct revoke_args *uap) 4350 { 4351 struct vnode *vp; 4352 struct vattr vattr; 4353 struct nameidata nd; 4354 int error; 4355 4356 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4357 uap->path); 4358 if ((error = namei(&nd)) != 0) 4359 return (error); 4360 vp = nd.ni_vp; 4361 NDFREE_PNBUF(&nd); 4362 if (vp->v_type != VCHR || vp->v_rdev == NULL) { 4363 error = EINVAL; 4364 goto out; 4365 } 4366 #ifdef MAC 4367 error = mac_vnode_check_revoke(td->td_ucred, vp); 4368 if (error != 0) 4369 goto out; 4370 #endif 4371 error = VOP_GETATTR(vp, &vattr, td->td_ucred); 4372 if (error != 0) 4373 goto out; 4374 if (td->td_ucred->cr_uid != vattr.va_uid) { 4375 error = priv_check(td, PRIV_VFS_ADMIN); 4376 if (error != 0) 4377 goto out; 4378 } 4379 if (devfs_usecount(vp) > 0) 4380 VOP_REVOKE(vp, REVOKEALL); 4381 out: 4382 vput(vp); 4383 return (error); 4384 } 4385 4386 /* 4387 * This variant of getvnode() allows O_PATH files. Caller should 4388 * ensure that returned file and vnode are only used for compatible 4389 * semantics. 4390 */ 4391 int 4392 getvnode_path(struct thread *td, int fd, cap_rights_t *rightsp, 4393 struct file **fpp) 4394 { 4395 struct file *fp; 4396 int error; 4397 4398 error = fget_unlocked(td, fd, rightsp, &fp); 4399 if (error != 0) 4400 return (error); 4401 4402 /* 4403 * The file could be not of the vnode type, or it may be not 4404 * yet fully initialized, in which case the f_vnode pointer 4405 * may be set, but f_ops is still badfileops. E.g., 4406 * devfs_open() transiently create such situation to 4407 * facilitate csw d_fdopen(). 4408 * 4409 * Dupfdopen() handling in kern_openat() installs the 4410 * half-baked file into the process descriptor table, allowing 4411 * other thread to dereference it. Guard against the race by 4412 * checking f_ops. 4413 */ 4414 if (__predict_false(fp->f_vnode == NULL || fp->f_ops == &badfileops)) { 4415 fdrop(fp, td); 4416 *fpp = NULL; 4417 return (EINVAL); 4418 } 4419 4420 *fpp = fp; 4421 return (0); 4422 } 4423 4424 /* 4425 * Convert a user file descriptor to a kernel file entry and check 4426 * that, if it is a capability, the correct rights are present. 4427 * A reference on the file entry is held upon returning. 4428 */ 4429 int 4430 getvnode(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp) 4431 { 4432 int error; 4433 4434 error = getvnode_path(td, fd, rightsp, fpp); 4435 if (__predict_false(error != 0)) 4436 return (error); 4437 4438 /* 4439 * Filter out O_PATH file descriptors, most getvnode() callers 4440 * do not call fo_ methods. 4441 */ 4442 if (__predict_false((*fpp)->f_ops == &path_fileops)) { 4443 fdrop(*fpp, td); 4444 *fpp = NULL; 4445 error = EBADF; 4446 } 4447 4448 return (error); 4449 } 4450 4451 /* 4452 * Get an (NFS) file handle. 4453 */ 4454 #ifndef _SYS_SYSPROTO_H_ 4455 struct lgetfh_args { 4456 char *fname; 4457 fhandle_t *fhp; 4458 }; 4459 #endif 4460 int 4461 sys_lgetfh(struct thread *td, struct lgetfh_args *uap) 4462 { 4463 4464 return (kern_getfhat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->fname, 4465 UIO_USERSPACE, uap->fhp, UIO_USERSPACE)); 4466 } 4467 4468 #ifndef _SYS_SYSPROTO_H_ 4469 struct getfh_args { 4470 char *fname; 4471 fhandle_t *fhp; 4472 }; 4473 #endif 4474 int 4475 sys_getfh(struct thread *td, struct getfh_args *uap) 4476 { 4477 4478 return (kern_getfhat(td, 0, AT_FDCWD, uap->fname, UIO_USERSPACE, 4479 uap->fhp, UIO_USERSPACE)); 4480 } 4481 4482 /* 4483 * syscall for the rpc.lockd to use to translate an open descriptor into 4484 * a NFS file handle. 4485 * 4486 * warning: do not remove the priv_check() call or this becomes one giant 4487 * security hole. 4488 */ 4489 #ifndef _SYS_SYSPROTO_H_ 4490 struct getfhat_args { 4491 int fd; 4492 char *path; 4493 fhandle_t *fhp; 4494 int flags; 4495 }; 4496 #endif 4497 int 4498 sys_getfhat(struct thread *td, struct getfhat_args *uap) 4499 { 4500 4501 return (kern_getfhat(td, uap->flags, uap->fd, uap->path, UIO_USERSPACE, 4502 uap->fhp, UIO_USERSPACE)); 4503 } 4504 4505 int 4506 kern_getfhat(struct thread *td, int flags, int fd, const char *path, 4507 enum uio_seg pathseg, fhandle_t *fhp, enum uio_seg fhseg) 4508 { 4509 struct nameidata nd; 4510 fhandle_t fh; 4511 struct vnode *vp; 4512 int error; 4513 4514 if ((flags & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH)) != 0) 4515 return (EINVAL); 4516 error = priv_check(td, PRIV_VFS_GETFH); 4517 if (error != 0) 4518 return (error); 4519 NDINIT_AT(&nd, LOOKUP, at2cnpflags(flags, AT_SYMLINK_NOFOLLOW | 4520 AT_RESOLVE_BENEATH) | LOCKLEAF | AUDITVNODE1, pathseg, path, 4521 fd); 4522 error = namei(&nd); 4523 if (error != 0) 4524 return (error); 4525 NDFREE_PNBUF(&nd); 4526 vp = nd.ni_vp; 4527 bzero(&fh, sizeof(fh)); 4528 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4529 error = VOP_VPTOFH(vp, &fh.fh_fid); 4530 vput(vp); 4531 if (error == 0) { 4532 if (fhseg == UIO_USERSPACE) 4533 error = copyout(&fh, fhp, sizeof (fh)); 4534 else 4535 memcpy(fhp, &fh, sizeof(fh)); 4536 } 4537 return (error); 4538 } 4539 4540 #ifndef _SYS_SYSPROTO_H_ 4541 struct fhlink_args { 4542 fhandle_t *fhp; 4543 const char *to; 4544 }; 4545 #endif 4546 int 4547 sys_fhlink(struct thread *td, struct fhlink_args *uap) 4548 { 4549 4550 return (kern_fhlinkat(td, AT_FDCWD, uap->to, UIO_USERSPACE, uap->fhp)); 4551 } 4552 4553 #ifndef _SYS_SYSPROTO_H_ 4554 struct fhlinkat_args { 4555 fhandle_t *fhp; 4556 int tofd; 4557 const char *to; 4558 }; 4559 #endif 4560 int 4561 sys_fhlinkat(struct thread *td, struct fhlinkat_args *uap) 4562 { 4563 4564 return (kern_fhlinkat(td, uap->tofd, uap->to, UIO_USERSPACE, uap->fhp)); 4565 } 4566 4567 static int 4568 kern_fhlinkat(struct thread *td, int fd, const char *path, 4569 enum uio_seg pathseg, fhandle_t *fhp) 4570 { 4571 fhandle_t fh; 4572 struct mount *mp; 4573 struct vnode *vp; 4574 int error; 4575 4576 error = priv_check(td, PRIV_VFS_GETFH); 4577 if (error != 0) 4578 return (error); 4579 error = copyin(fhp, &fh, sizeof(fh)); 4580 if (error != 0) 4581 return (error); 4582 do { 4583 bwillwrite(); 4584 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4585 return (ESTALE); 4586 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_SHARED, &vp); 4587 vfs_unbusy(mp); 4588 if (error != 0) 4589 return (error); 4590 VOP_UNLOCK(vp); 4591 error = kern_linkat_vp(td, vp, fd, path, pathseg); 4592 } while (error == EAGAIN || error == ERELOOKUP); 4593 return (error); 4594 } 4595 4596 #ifndef _SYS_SYSPROTO_H_ 4597 struct fhreadlink_args { 4598 fhandle_t *fhp; 4599 char *buf; 4600 size_t bufsize; 4601 }; 4602 #endif 4603 int 4604 sys_fhreadlink(struct thread *td, struct fhreadlink_args *uap) 4605 { 4606 fhandle_t fh; 4607 struct mount *mp; 4608 struct vnode *vp; 4609 int error; 4610 4611 error = priv_check(td, PRIV_VFS_GETFH); 4612 if (error != 0) 4613 return (error); 4614 if (uap->bufsize > IOSIZE_MAX) 4615 return (EINVAL); 4616 error = copyin(uap->fhp, &fh, sizeof(fh)); 4617 if (error != 0) 4618 return (error); 4619 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4620 return (ESTALE); 4621 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_SHARED, &vp); 4622 vfs_unbusy(mp); 4623 if (error != 0) 4624 return (error); 4625 error = kern_readlink_vp(vp, uap->buf, UIO_USERSPACE, uap->bufsize, td); 4626 vput(vp); 4627 return (error); 4628 } 4629 4630 /* 4631 * syscall for the rpc.lockd to use to translate a NFS file handle into an 4632 * open descriptor. 4633 * 4634 * warning: do not remove the priv_check() call or this becomes one giant 4635 * security hole. 4636 */ 4637 #ifndef _SYS_SYSPROTO_H_ 4638 struct fhopen_args { 4639 const struct fhandle *u_fhp; 4640 int flags; 4641 }; 4642 #endif 4643 int 4644 sys_fhopen(struct thread *td, struct fhopen_args *uap) 4645 { 4646 return (kern_fhopen(td, uap->u_fhp, uap->flags)); 4647 } 4648 4649 int 4650 kern_fhopen(struct thread *td, const struct fhandle *u_fhp, int flags) 4651 { 4652 struct mount *mp; 4653 struct vnode *vp; 4654 struct fhandle fhp; 4655 struct file *fp; 4656 int fmode, error; 4657 int indx; 4658 bool named_attr; 4659 4660 error = priv_check(td, PRIV_VFS_FHOPEN); 4661 if (error != 0) 4662 return (error); 4663 indx = -1; 4664 fmode = FFLAGS(flags); 4665 /* why not allow a non-read/write open for our lockd? */ 4666 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 4667 return (EINVAL); 4668 error = copyin(u_fhp, &fhp, sizeof(fhp)); 4669 if (error != 0) 4670 return(error); 4671 /* find the mount point */ 4672 mp = vfs_busyfs(&fhp.fh_fsid); 4673 if (mp == NULL) 4674 return (ESTALE); 4675 /* now give me my vnode, it gets returned to me locked */ 4676 error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp); 4677 vfs_unbusy(mp); 4678 if (error != 0) 4679 return (error); 4680 4681 /* 4682 * Check to see if the file handle refers to a named attribute 4683 * directory or attribute. If it does, the O_NAMEDATTR flag 4684 * must have been specified. 4685 */ 4686 named_attr = (vn_irflag_read(vp) & 4687 (VIRF_NAMEDDIR | VIRF_NAMEDATTR)) != 0; 4688 if ((named_attr && (fmode & O_NAMEDATTR) == 0) || 4689 (!named_attr && (fmode & O_NAMEDATTR) != 0)) { 4690 vput(vp); 4691 return (ENOATTR); 4692 } 4693 4694 error = falloc_noinstall(td, &fp); 4695 if (error != 0) { 4696 vput(vp); 4697 return (error); 4698 } 4699 /* 4700 * An extra reference on `fp' has been held for us by 4701 * falloc_noinstall(). 4702 */ 4703 4704 #ifdef INVARIANTS 4705 td->td_dupfd = -1; 4706 #endif 4707 error = vn_open_vnode(vp, fmode, td->td_ucred, td, fp); 4708 if (error != 0) { 4709 KASSERT(fp->f_ops == &badfileops, 4710 ("VOP_OPEN in fhopen() set f_ops")); 4711 KASSERT(td->td_dupfd < 0, 4712 ("fhopen() encountered fdopen()")); 4713 4714 vput(vp); 4715 goto bad; 4716 } 4717 #ifdef INVARIANTS 4718 td->td_dupfd = 0; 4719 #endif 4720 fp->f_vnode = vp; 4721 finit_vnode(fp, fmode, NULL, &vnops); 4722 VOP_UNLOCK(vp); 4723 if ((fmode & O_TRUNC) != 0) { 4724 error = fo_truncate(fp, 0, td->td_ucred, td); 4725 if (error != 0) 4726 goto bad; 4727 } 4728 4729 error = finstall(td, fp, &indx, fmode, NULL); 4730 bad: 4731 fdrop(fp, td); 4732 td->td_retval[0] = indx; 4733 return (error); 4734 } 4735 4736 /* 4737 * Stat an (NFS) file handle. 4738 */ 4739 #ifndef _SYS_SYSPROTO_H_ 4740 struct fhstat_args { 4741 struct fhandle *u_fhp; 4742 struct stat *sb; 4743 }; 4744 #endif 4745 int 4746 sys_fhstat(struct thread *td, struct fhstat_args *uap) 4747 { 4748 struct stat sb; 4749 struct fhandle fh; 4750 int error; 4751 4752 error = copyin(uap->u_fhp, &fh, sizeof(fh)); 4753 if (error != 0) 4754 return (error); 4755 error = kern_fhstat(td, fh, &sb); 4756 if (error == 0) 4757 error = copyout(&sb, uap->sb, sizeof(sb)); 4758 return (error); 4759 } 4760 4761 int 4762 kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb) 4763 { 4764 struct mount *mp; 4765 struct vnode *vp; 4766 int error; 4767 4768 error = priv_check(td, PRIV_VFS_FHSTAT); 4769 if (error != 0) 4770 return (error); 4771 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4772 return (ESTALE); 4773 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4774 vfs_unbusy(mp); 4775 if (error != 0) 4776 return (error); 4777 error = VOP_STAT(vp, sb, td->td_ucred, NOCRED); 4778 vput(vp); 4779 return (error); 4780 } 4781 4782 /* 4783 * Implement fstatfs() for (NFS) file handles. 4784 */ 4785 #ifndef _SYS_SYSPROTO_H_ 4786 struct fhstatfs_args { 4787 struct fhandle *u_fhp; 4788 struct statfs *buf; 4789 }; 4790 #endif 4791 int 4792 sys_fhstatfs(struct thread *td, struct fhstatfs_args *uap) 4793 { 4794 struct statfs *sfp; 4795 fhandle_t fh; 4796 int error; 4797 4798 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4799 if (error != 0) 4800 return (error); 4801 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 4802 error = kern_fhstatfs(td, fh, sfp); 4803 if (error == 0) 4804 error = copyout(sfp, uap->buf, sizeof(*sfp)); 4805 free(sfp, M_STATFS); 4806 return (error); 4807 } 4808 4809 int 4810 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf) 4811 { 4812 struct mount *mp; 4813 struct vnode *vp; 4814 int error; 4815 4816 error = priv_check(td, PRIV_VFS_FHSTATFS); 4817 if (error != 0) 4818 return (error); 4819 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4820 return (ESTALE); 4821 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4822 if (error != 0) { 4823 vfs_unbusy(mp); 4824 return (error); 4825 } 4826 vput(vp); 4827 error = prison_canseemount(td->td_ucred, mp); 4828 if (error != 0) 4829 goto out; 4830 #ifdef MAC 4831 error = mac_mount_check_stat(td->td_ucred, mp); 4832 if (error != 0) 4833 goto out; 4834 #endif 4835 error = VFS_STATFS(mp, buf); 4836 out: 4837 vfs_unbusy(mp); 4838 return (error); 4839 } 4840 4841 /* 4842 * Unlike madvise(2), we do not make a best effort to remember every 4843 * possible caching hint. Instead, we remember the last setting with 4844 * the exception that we will allow POSIX_FADV_NORMAL to adjust the 4845 * region of any current setting. 4846 */ 4847 int 4848 kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len, 4849 int advice) 4850 { 4851 struct fadvise_info *fa, *new; 4852 struct file *fp; 4853 struct vnode *vp; 4854 off_t end; 4855 int error; 4856 4857 if (offset < 0 || len < 0 || offset > OFF_MAX - len) 4858 return (EINVAL); 4859 AUDIT_ARG_VALUE(advice); 4860 switch (advice) { 4861 case POSIX_FADV_SEQUENTIAL: 4862 case POSIX_FADV_RANDOM: 4863 case POSIX_FADV_NOREUSE: 4864 new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK); 4865 break; 4866 case POSIX_FADV_NORMAL: 4867 case POSIX_FADV_WILLNEED: 4868 case POSIX_FADV_DONTNEED: 4869 new = NULL; 4870 break; 4871 default: 4872 return (EINVAL); 4873 } 4874 /* XXX: CAP_POSIX_FADVISE? */ 4875 AUDIT_ARG_FD(fd); 4876 error = fget(td, fd, &cap_no_rights, &fp); 4877 if (error != 0) 4878 goto out; 4879 AUDIT_ARG_FILE(td->td_proc, fp); 4880 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { 4881 error = ESPIPE; 4882 goto out; 4883 } 4884 if (fp->f_type != DTYPE_VNODE) { 4885 error = ENODEV; 4886 goto out; 4887 } 4888 vp = fp->f_vnode; 4889 if (vp->v_type != VREG) { 4890 error = ENODEV; 4891 goto out; 4892 } 4893 if (len == 0) 4894 end = OFF_MAX; 4895 else 4896 end = offset + len - 1; 4897 switch (advice) { 4898 case POSIX_FADV_SEQUENTIAL: 4899 case POSIX_FADV_RANDOM: 4900 case POSIX_FADV_NOREUSE: 4901 /* 4902 * Try to merge any existing non-standard region with 4903 * this new region if possible, otherwise create a new 4904 * non-standard region for this request. 4905 */ 4906 mtx_pool_lock(mtxpool_sleep, fp); 4907 fa = fp->f_advice; 4908 if (fa != NULL && fa->fa_advice == advice && 4909 ((fa->fa_start <= end && fa->fa_end >= offset) || 4910 (end != OFF_MAX && fa->fa_start == end + 1) || 4911 (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) { 4912 if (offset < fa->fa_start) 4913 fa->fa_start = offset; 4914 if (end > fa->fa_end) 4915 fa->fa_end = end; 4916 } else { 4917 new->fa_advice = advice; 4918 new->fa_start = offset; 4919 new->fa_end = end; 4920 fp->f_advice = new; 4921 new = fa; 4922 } 4923 mtx_pool_unlock(mtxpool_sleep, fp); 4924 break; 4925 case POSIX_FADV_NORMAL: 4926 /* 4927 * If a the "normal" region overlaps with an existing 4928 * non-standard region, trim or remove the 4929 * non-standard region. 4930 */ 4931 mtx_pool_lock(mtxpool_sleep, fp); 4932 fa = fp->f_advice; 4933 if (fa != NULL) { 4934 if (offset <= fa->fa_start && end >= fa->fa_end) { 4935 new = fa; 4936 fp->f_advice = NULL; 4937 } else if (offset <= fa->fa_start && 4938 end >= fa->fa_start) 4939 fa->fa_start = end + 1; 4940 else if (offset <= fa->fa_end && end >= fa->fa_end) 4941 fa->fa_end = offset - 1; 4942 else if (offset >= fa->fa_start && end <= fa->fa_end) { 4943 /* 4944 * If the "normal" region is a middle 4945 * portion of the existing 4946 * non-standard region, just remove 4947 * the whole thing rather than picking 4948 * one side or the other to 4949 * preserve. 4950 */ 4951 new = fa; 4952 fp->f_advice = NULL; 4953 } 4954 } 4955 mtx_pool_unlock(mtxpool_sleep, fp); 4956 break; 4957 case POSIX_FADV_WILLNEED: 4958 case POSIX_FADV_DONTNEED: 4959 error = VOP_ADVISE(vp, offset, end, advice); 4960 break; 4961 } 4962 out: 4963 if (fp != NULL) 4964 fdrop(fp, td); 4965 free(new, M_FADVISE); 4966 return (error); 4967 } 4968 4969 int 4970 sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap) 4971 { 4972 int error; 4973 4974 error = kern_posix_fadvise(td, uap->fd, uap->offset, uap->len, 4975 uap->advice); 4976 return (kern_posix_error(td, error)); 4977 } 4978 4979 int 4980 kern_copy_file_range(struct thread *td, int infd, off_t *inoffp, int outfd, 4981 off_t *outoffp, size_t len, unsigned int flags) 4982 { 4983 struct file *infp, *infp1, *outfp, *outfp1; 4984 struct vnode *invp, *outvp; 4985 int error; 4986 size_t retlen; 4987 void *rl_rcookie, *rl_wcookie; 4988 off_t inoff, outoff, savinoff, savoutoff; 4989 bool foffsets_locked; 4990 4991 infp = outfp = NULL; 4992 rl_rcookie = rl_wcookie = NULL; 4993 foffsets_locked = false; 4994 error = 0; 4995 retlen = 0; 4996 4997 if (flags != 0) { 4998 error = EINVAL; 4999 goto out; 5000 } 5001 if (len > SSIZE_MAX) 5002 /* 5003 * Although the len argument is size_t, the return argument 5004 * is ssize_t (which is signed). Therefore a size that won't 5005 * fit in ssize_t can't be returned. 5006 */ 5007 len = SSIZE_MAX; 5008 5009 /* Get the file structures for the file descriptors. */ 5010 error = fget_read(td, infd, 5011 inoffp != NULL ? &cap_pread_rights : &cap_read_rights, &infp); 5012 if (error != 0) 5013 goto out; 5014 if (infp->f_ops == &badfileops) { 5015 error = EBADF; 5016 goto out; 5017 } 5018 if (infp->f_vnode == NULL) { 5019 error = EINVAL; 5020 goto out; 5021 } 5022 error = fget_write(td, outfd, 5023 outoffp != NULL ? &cap_pwrite_rights : &cap_write_rights, &outfp); 5024 if (error != 0) 5025 goto out; 5026 if (outfp->f_ops == &badfileops) { 5027 error = EBADF; 5028 goto out; 5029 } 5030 if (outfp->f_vnode == NULL) { 5031 error = EINVAL; 5032 goto out; 5033 } 5034 5035 /* 5036 * Figure out which file offsets we're reading from and writing to. 5037 * If the offsets come from the file descriptions, we need to lock them, 5038 * and locking both offsets requires a loop to avoid deadlocks. 5039 */ 5040 infp1 = outfp1 = NULL; 5041 if (inoffp != NULL) 5042 inoff = *inoffp; 5043 else 5044 infp1 = infp; 5045 if (outoffp != NULL) 5046 outoff = *outoffp; 5047 else 5048 outfp1 = outfp; 5049 if (infp1 != NULL || outfp1 != NULL) { 5050 if (infp1 == outfp1) { 5051 /* 5052 * Overlapping ranges are not allowed. A more thorough 5053 * check appears below, but we must not lock the same 5054 * offset twice. 5055 */ 5056 error = EINVAL; 5057 goto out; 5058 } 5059 foffset_lock_pair(infp1, &inoff, outfp1, &outoff, 0); 5060 foffsets_locked = true; 5061 } 5062 savinoff = inoff; 5063 savoutoff = outoff; 5064 5065 invp = infp->f_vnode; 5066 outvp = outfp->f_vnode; 5067 /* Sanity check the f_flag bits. */ 5068 if ((outfp->f_flag & (FWRITE | FAPPEND)) != FWRITE || 5069 (infp->f_flag & FREAD) == 0) { 5070 error = EBADF; 5071 goto out; 5072 } 5073 5074 /* If len == 0, just return 0. */ 5075 if (len == 0) 5076 goto out; 5077 5078 /* 5079 * If infp and outfp refer to the same file, the byte ranges cannot 5080 * overlap. 5081 */ 5082 if (invp == outvp) { 5083 if ((inoff <= outoff && inoff + len > outoff) || 5084 (inoff > outoff && outoff + len > inoff)) { 5085 error = EINVAL; 5086 goto out; 5087 } 5088 rangelock_may_recurse(&invp->v_rl); 5089 } 5090 5091 /* Range lock the byte ranges for both invp and outvp. */ 5092 for (;;) { 5093 rl_wcookie = vn_rangelock_wlock(outvp, outoff, outoff + len); 5094 rl_rcookie = vn_rangelock_tryrlock(invp, inoff, inoff + len); 5095 if (rl_rcookie != NULL) 5096 break; 5097 vn_rangelock_unlock(outvp, rl_wcookie); 5098 rl_rcookie = vn_rangelock_rlock(invp, inoff, inoff + len); 5099 vn_rangelock_unlock(invp, rl_rcookie); 5100 } 5101 5102 retlen = len; 5103 error = vn_copy_file_range(invp, &inoff, outvp, &outoff, &retlen, 5104 flags, infp->f_cred, outfp->f_cred, td); 5105 out: 5106 if (rl_rcookie != NULL) 5107 vn_rangelock_unlock(invp, rl_rcookie); 5108 if (rl_wcookie != NULL) 5109 vn_rangelock_unlock(outvp, rl_wcookie); 5110 if (foffsets_locked) { 5111 if (error == EINTR || error == ERESTART) { 5112 inoff = savinoff; 5113 outoff = savoutoff; 5114 } 5115 if (inoffp == NULL) 5116 foffset_unlock(infp, inoff, 0); 5117 else 5118 *inoffp = inoff; 5119 if (outoffp == NULL) 5120 foffset_unlock(outfp, outoff, 0); 5121 else 5122 *outoffp = outoff; 5123 } 5124 if (outfp != NULL) 5125 fdrop(outfp, td); 5126 if (infp != NULL) 5127 fdrop(infp, td); 5128 td->td_retval[0] = retlen; 5129 return (error); 5130 } 5131 5132 int 5133 sys_copy_file_range(struct thread *td, struct copy_file_range_args *uap) 5134 { 5135 off_t inoff, outoff, *inoffp, *outoffp; 5136 int error; 5137 5138 inoffp = outoffp = NULL; 5139 if (uap->inoffp != NULL) { 5140 error = copyin(uap->inoffp, &inoff, sizeof(off_t)); 5141 if (error != 0) 5142 return (error); 5143 inoffp = &inoff; 5144 } 5145 if (uap->outoffp != NULL) { 5146 error = copyin(uap->outoffp, &outoff, sizeof(off_t)); 5147 if (error != 0) 5148 return (error); 5149 outoffp = &outoff; 5150 } 5151 error = kern_copy_file_range(td, uap->infd, inoffp, uap->outfd, 5152 outoffp, uap->len, uap->flags); 5153 if (error == 0 && uap->inoffp != NULL) 5154 error = copyout(inoffp, uap->inoffp, sizeof(off_t)); 5155 if (error == 0 && uap->outoffp != NULL) 5156 error = copyout(outoffp, uap->outoffp, sizeof(off_t)); 5157 return (error); 5158 } 5159