1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 37 */ 38 39 #include <sys/cdefs.h> 40 #include "opt_capsicum.h" 41 #include "opt_ktrace.h" 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #ifdef COMPAT_FREEBSD11 46 #include <sys/abi_compat.h> 47 #endif 48 #include <sys/bio.h> 49 #include <sys/buf.h> 50 #include <sys/capsicum.h> 51 #include <sys/disk.h> 52 #include <sys/malloc.h> 53 #include <sys/mount.h> 54 #include <sys/mutex.h> 55 #include <sys/sysproto.h> 56 #include <sys/namei.h> 57 #include <sys/filedesc.h> 58 #include <sys/kernel.h> 59 #include <sys/fcntl.h> 60 #include <sys/file.h> 61 #include <sys/filio.h> 62 #include <sys/limits.h> 63 #include <sys/linker.h> 64 #include <sys/rwlock.h> 65 #include <sys/sdt.h> 66 #include <sys/stat.h> 67 #include <sys/sx.h> 68 #include <sys/unistd.h> 69 #include <sys/vnode.h> 70 #include <sys/priv.h> 71 #include <sys/proc.h> 72 #include <sys/dirent.h> 73 #include <sys/jail.h> 74 #include <sys/syscallsubr.h> 75 #include <sys/sysctl.h> 76 #ifdef KTRACE 77 #include <sys/ktrace.h> 78 #endif 79 80 #include <machine/stdarg.h> 81 82 #include <security/audit/audit.h> 83 #include <security/mac/mac_framework.h> 84 85 #include <vm/vm.h> 86 #include <vm/vm_object.h> 87 #include <vm/vm_page.h> 88 #include <vm/uma.h> 89 90 #include <fs/devfs/devfs.h> 91 92 MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information"); 93 94 static int kern_chflagsat(struct thread *td, int fd, const char *path, 95 enum uio_seg pathseg, u_long flags, int atflag); 96 static int setfflags(struct thread *td, struct vnode *, u_long); 97 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *); 98 static int getutimens(const struct timespec *, enum uio_seg, 99 struct timespec *, int *); 100 static int setutimes(struct thread *td, struct vnode *, 101 const struct timespec *, int, int); 102 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 103 struct thread *td); 104 static int kern_fhlinkat(struct thread *td, int fd, const char *path, 105 enum uio_seg pathseg, fhandle_t *fhp); 106 static int kern_readlink_vp(struct vnode *vp, char *buf, enum uio_seg bufseg, 107 size_t count, struct thread *td); 108 static int kern_linkat_vp(struct thread *td, struct vnode *vp, int fd, 109 const char *path, enum uio_seg segflag); 110 111 uint64_t 112 at2cnpflags(u_int at_flags, u_int mask) 113 { 114 uint64_t res; 115 116 MPASS((at_flags & (AT_SYMLINK_FOLLOW | AT_SYMLINK_NOFOLLOW)) != 117 (AT_SYMLINK_FOLLOW | AT_SYMLINK_NOFOLLOW)); 118 119 res = 0; 120 at_flags &= mask; 121 if ((at_flags & AT_RESOLVE_BENEATH) != 0) 122 res |= RBENEATH; 123 if ((at_flags & AT_SYMLINK_FOLLOW) != 0) 124 res |= FOLLOW; 125 /* NOFOLLOW is pseudo flag */ 126 if ((mask & AT_SYMLINK_NOFOLLOW) != 0) { 127 res |= (at_flags & AT_SYMLINK_NOFOLLOW) != 0 ? NOFOLLOW : 128 FOLLOW; 129 } 130 if ((mask & AT_EMPTY_PATH) != 0 && (at_flags & AT_EMPTY_PATH) != 0) 131 res |= EMPTYPATH; 132 return (res); 133 } 134 135 int 136 kern_sync(struct thread *td) 137 { 138 struct mount *mp, *nmp; 139 int save; 140 141 mtx_lock(&mountlist_mtx); 142 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 143 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 144 nmp = TAILQ_NEXT(mp, mnt_list); 145 continue; 146 } 147 if ((mp->mnt_flag & MNT_RDONLY) == 0 && 148 vn_start_write(NULL, &mp, V_NOWAIT) == 0) { 149 save = curthread_pflags_set(TDP_SYNCIO); 150 vfs_periodic(mp, MNT_NOWAIT); 151 VFS_SYNC(mp, MNT_NOWAIT); 152 curthread_pflags_restore(save); 153 vn_finished_write(mp); 154 } 155 mtx_lock(&mountlist_mtx); 156 nmp = TAILQ_NEXT(mp, mnt_list); 157 vfs_unbusy(mp); 158 } 159 mtx_unlock(&mountlist_mtx); 160 return (0); 161 } 162 163 /* 164 * Sync each mounted filesystem. 165 */ 166 #ifndef _SYS_SYSPROTO_H_ 167 struct sync_args { 168 int dummy; 169 }; 170 #endif 171 /* ARGSUSED */ 172 int 173 sys_sync(struct thread *td, struct sync_args *uap) 174 { 175 176 return (kern_sync(td)); 177 } 178 179 /* 180 * Change filesystem quotas. 181 */ 182 #ifndef _SYS_SYSPROTO_H_ 183 struct quotactl_args { 184 char *path; 185 int cmd; 186 int uid; 187 caddr_t arg; 188 }; 189 #endif 190 int 191 sys_quotactl(struct thread *td, struct quotactl_args *uap) 192 { 193 struct mount *mp; 194 struct nameidata nd; 195 int error; 196 bool mp_busy; 197 198 AUDIT_ARG_CMD(uap->cmd); 199 AUDIT_ARG_UID(uap->uid); 200 if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS)) 201 return (EPERM); 202 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 203 uap->path); 204 if ((error = namei(&nd)) != 0) 205 return (error); 206 NDFREE_PNBUF(&nd); 207 mp = nd.ni_vp->v_mount; 208 vfs_ref(mp); 209 vput(nd.ni_vp); 210 error = vfs_busy(mp, 0); 211 if (error != 0) { 212 vfs_rel(mp); 213 return (error); 214 } 215 mp_busy = true; 216 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg, &mp_busy); 217 218 /* 219 * Since quota on/off operations typically need to open quota 220 * files, the implementation may need to unbusy the mount point 221 * before calling into namei. Otherwise, unmount might be 222 * started between two vfs_busy() invocations (first is ours, 223 * second is from mount point cross-walk code in lookup()), 224 * causing deadlock. 225 * 226 * Avoid unbusying mp if the implementation indicates it has 227 * already done so. 228 */ 229 if (mp_busy) 230 vfs_unbusy(mp); 231 vfs_rel(mp); 232 return (error); 233 } 234 235 /* 236 * Used by statfs conversion routines to scale the block size up if 237 * necessary so that all of the block counts are <= 'max_size'. Note 238 * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero 239 * value of 'n'. 240 */ 241 void 242 statfs_scale_blocks(struct statfs *sf, long max_size) 243 { 244 uint64_t count; 245 int shift; 246 247 KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__)); 248 249 /* 250 * Attempt to scale the block counts to give a more accurate 251 * overview to userland of the ratio of free space to used 252 * space. To do this, find the largest block count and compute 253 * a divisor that lets it fit into a signed integer <= max_size. 254 */ 255 if (sf->f_bavail < 0) 256 count = -sf->f_bavail; 257 else 258 count = sf->f_bavail; 259 count = MAX(sf->f_blocks, MAX(sf->f_bfree, count)); 260 if (count <= max_size) 261 return; 262 263 count >>= flsl(max_size); 264 shift = 0; 265 while (count > 0) { 266 shift++; 267 count >>=1; 268 } 269 270 sf->f_bsize <<= shift; 271 sf->f_blocks >>= shift; 272 sf->f_bfree >>= shift; 273 sf->f_bavail >>= shift; 274 } 275 276 static int 277 kern_do_statfs(struct thread *td, struct mount *mp, struct statfs *buf) 278 { 279 int error; 280 281 if (mp == NULL) 282 return (EBADF); 283 error = vfs_busy(mp, 0); 284 vfs_rel(mp); 285 if (error != 0) 286 return (error); 287 #ifdef MAC 288 error = mac_mount_check_stat(td->td_ucred, mp); 289 if (error != 0) 290 goto out; 291 #endif 292 error = VFS_STATFS(mp, buf); 293 if (error != 0) 294 goto out; 295 if (priv_check_cred_vfs_generation(td->td_ucred)) { 296 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 297 prison_enforce_statfs(td->td_ucred, mp, buf); 298 } 299 out: 300 vfs_unbusy(mp); 301 return (error); 302 } 303 304 /* 305 * Get filesystem statistics. 306 */ 307 #ifndef _SYS_SYSPROTO_H_ 308 struct statfs_args { 309 char *path; 310 struct statfs *buf; 311 }; 312 #endif 313 int 314 sys_statfs(struct thread *td, struct statfs_args *uap) 315 { 316 struct statfs *sfp; 317 int error; 318 319 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 320 error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); 321 if (error == 0) 322 error = copyout(sfp, uap->buf, sizeof(struct statfs)); 323 free(sfp, M_STATFS); 324 return (error); 325 } 326 327 int 328 kern_statfs(struct thread *td, const char *path, enum uio_seg pathseg, 329 struct statfs *buf) 330 { 331 struct mount *mp; 332 struct nameidata nd; 333 int error; 334 335 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path); 336 error = namei(&nd); 337 if (error != 0) 338 return (error); 339 NDFREE_PNBUF(&nd); 340 mp = vfs_ref_from_vp(nd.ni_vp); 341 vrele(nd.ni_vp); 342 return (kern_do_statfs(td, mp, buf)); 343 } 344 345 /* 346 * Get filesystem statistics. 347 */ 348 #ifndef _SYS_SYSPROTO_H_ 349 struct fstatfs_args { 350 int fd; 351 struct statfs *buf; 352 }; 353 #endif 354 int 355 sys_fstatfs(struct thread *td, struct fstatfs_args *uap) 356 { 357 struct statfs *sfp; 358 int error; 359 360 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 361 error = kern_fstatfs(td, uap->fd, sfp); 362 if (error == 0) 363 error = copyout(sfp, uap->buf, sizeof(struct statfs)); 364 free(sfp, M_STATFS); 365 return (error); 366 } 367 368 int 369 kern_fstatfs(struct thread *td, int fd, struct statfs *buf) 370 { 371 struct file *fp; 372 struct mount *mp; 373 struct vnode *vp; 374 int error; 375 376 AUDIT_ARG_FD(fd); 377 error = getvnode_path(td, fd, &cap_fstatfs_rights, &fp); 378 if (error != 0) 379 return (error); 380 vp = fp->f_vnode; 381 #ifdef AUDIT 382 if (AUDITING_TD(td)) { 383 vn_lock(vp, LK_SHARED | LK_RETRY); 384 AUDIT_ARG_VNODE1(vp); 385 VOP_UNLOCK(vp); 386 } 387 #endif 388 mp = vfs_ref_from_vp(vp); 389 fdrop(fp, td); 390 return (kern_do_statfs(td, mp, buf)); 391 } 392 393 /* 394 * Get statistics on all filesystems. 395 */ 396 #ifndef _SYS_SYSPROTO_H_ 397 struct getfsstat_args { 398 struct statfs *buf; 399 long bufsize; 400 int mode; 401 }; 402 #endif 403 int 404 sys_getfsstat(struct thread *td, struct getfsstat_args *uap) 405 { 406 size_t count; 407 int error; 408 409 if (uap->bufsize < 0 || uap->bufsize > SIZE_MAX) 410 return (EINVAL); 411 error = kern_getfsstat(td, &uap->buf, uap->bufsize, &count, 412 UIO_USERSPACE, uap->mode); 413 if (error == 0) 414 td->td_retval[0] = count; 415 return (error); 416 } 417 418 /* 419 * If (bufsize > 0 && bufseg == UIO_SYSSPACE) 420 * The caller is responsible for freeing memory which will be allocated 421 * in '*buf'. 422 */ 423 int 424 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize, 425 size_t *countp, enum uio_seg bufseg, int mode) 426 { 427 struct mount *mp, *nmp; 428 struct statfs *sfsp, *sp, *sptmp, *tofree; 429 size_t count, maxcount; 430 int error; 431 432 switch (mode) { 433 case MNT_WAIT: 434 case MNT_NOWAIT: 435 break; 436 default: 437 if (bufseg == UIO_SYSSPACE) 438 *buf = NULL; 439 return (EINVAL); 440 } 441 restart: 442 maxcount = bufsize / sizeof(struct statfs); 443 if (bufsize == 0) { 444 sfsp = NULL; 445 tofree = NULL; 446 } else if (bufseg == UIO_USERSPACE) { 447 sfsp = *buf; 448 tofree = NULL; 449 } else /* if (bufseg == UIO_SYSSPACE) */ { 450 count = 0; 451 mtx_lock(&mountlist_mtx); 452 TAILQ_FOREACH(mp, &mountlist, mnt_list) { 453 count++; 454 } 455 mtx_unlock(&mountlist_mtx); 456 if (maxcount > count) 457 maxcount = count; 458 tofree = sfsp = *buf = malloc(maxcount * sizeof(struct statfs), 459 M_STATFS, M_WAITOK); 460 } 461 462 count = 0; 463 464 /* 465 * If there is no target buffer they only want the count. 466 * 467 * This could be TAILQ_FOREACH but it is open-coded to match the original 468 * code below. 469 */ 470 if (sfsp == NULL) { 471 mtx_lock(&mountlist_mtx); 472 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 473 if (prison_canseemount(td->td_ucred, mp) != 0) { 474 nmp = TAILQ_NEXT(mp, mnt_list); 475 continue; 476 } 477 #ifdef MAC 478 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 479 nmp = TAILQ_NEXT(mp, mnt_list); 480 continue; 481 } 482 #endif 483 count++; 484 nmp = TAILQ_NEXT(mp, mnt_list); 485 } 486 mtx_unlock(&mountlist_mtx); 487 *countp = count; 488 return (0); 489 } 490 491 /* 492 * They want the entire thing. 493 * 494 * Short-circuit the corner case of no room for anything, avoids 495 * relocking below. 496 */ 497 if (maxcount < 1) { 498 goto out; 499 } 500 501 mtx_lock(&mountlist_mtx); 502 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 503 if (prison_canseemount(td->td_ucred, mp) != 0) { 504 nmp = TAILQ_NEXT(mp, mnt_list); 505 continue; 506 } 507 #ifdef MAC 508 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 509 nmp = TAILQ_NEXT(mp, mnt_list); 510 continue; 511 } 512 #endif 513 if (mode == MNT_WAIT) { 514 if (vfs_busy(mp, MBF_MNTLSTLOCK) != 0) { 515 /* 516 * If vfs_busy() failed, and MBF_NOWAIT 517 * wasn't passed, then the mp is gone. 518 * Furthermore, because of MBF_MNTLSTLOCK, 519 * the mountlist_mtx was dropped. We have 520 * no other choice than to start over. 521 */ 522 mtx_unlock(&mountlist_mtx); 523 free(tofree, M_STATFS); 524 goto restart; 525 } 526 } else { 527 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK) != 0) { 528 nmp = TAILQ_NEXT(mp, mnt_list); 529 continue; 530 } 531 } 532 sp = &mp->mnt_stat; 533 /* 534 * If MNT_NOWAIT is specified, do not refresh 535 * the fsstat cache. 536 */ 537 if (mode != MNT_NOWAIT) { 538 error = VFS_STATFS(mp, sp); 539 if (error != 0) { 540 mtx_lock(&mountlist_mtx); 541 nmp = TAILQ_NEXT(mp, mnt_list); 542 vfs_unbusy(mp); 543 continue; 544 } 545 } 546 if (priv_check_cred_vfs_generation(td->td_ucred)) { 547 sptmp = malloc(sizeof(struct statfs), M_STATFS, 548 M_WAITOK); 549 *sptmp = *sp; 550 sptmp->f_fsid.val[0] = sptmp->f_fsid.val[1] = 0; 551 prison_enforce_statfs(td->td_ucred, mp, sptmp); 552 sp = sptmp; 553 } else 554 sptmp = NULL; 555 if (bufseg == UIO_SYSSPACE) { 556 bcopy(sp, sfsp, sizeof(*sp)); 557 free(sptmp, M_STATFS); 558 } else /* if (bufseg == UIO_USERSPACE) */ { 559 error = copyout(sp, sfsp, sizeof(*sp)); 560 free(sptmp, M_STATFS); 561 if (error != 0) { 562 vfs_unbusy(mp); 563 return (error); 564 } 565 } 566 sfsp++; 567 count++; 568 569 if (count == maxcount) { 570 vfs_unbusy(mp); 571 goto out; 572 } 573 574 mtx_lock(&mountlist_mtx); 575 nmp = TAILQ_NEXT(mp, mnt_list); 576 vfs_unbusy(mp); 577 } 578 mtx_unlock(&mountlist_mtx); 579 out: 580 *countp = count; 581 return (0); 582 } 583 584 #ifdef COMPAT_FREEBSD4 585 /* 586 * Get old format filesystem statistics. 587 */ 588 static void freebsd4_cvtstatfs(struct statfs *, struct ostatfs *); 589 590 #ifndef _SYS_SYSPROTO_H_ 591 struct freebsd4_statfs_args { 592 char *path; 593 struct ostatfs *buf; 594 }; 595 #endif 596 int 597 freebsd4_statfs(struct thread *td, struct freebsd4_statfs_args *uap) 598 { 599 struct ostatfs osb; 600 struct statfs *sfp; 601 int error; 602 603 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 604 error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); 605 if (error == 0) { 606 freebsd4_cvtstatfs(sfp, &osb); 607 error = copyout(&osb, uap->buf, sizeof(osb)); 608 } 609 free(sfp, M_STATFS); 610 return (error); 611 } 612 613 /* 614 * Get filesystem statistics. 615 */ 616 #ifndef _SYS_SYSPROTO_H_ 617 struct freebsd4_fstatfs_args { 618 int fd; 619 struct ostatfs *buf; 620 }; 621 #endif 622 int 623 freebsd4_fstatfs(struct thread *td, struct freebsd4_fstatfs_args *uap) 624 { 625 struct ostatfs osb; 626 struct statfs *sfp; 627 int error; 628 629 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 630 error = kern_fstatfs(td, uap->fd, sfp); 631 if (error == 0) { 632 freebsd4_cvtstatfs(sfp, &osb); 633 error = copyout(&osb, uap->buf, sizeof(osb)); 634 } 635 free(sfp, M_STATFS); 636 return (error); 637 } 638 639 /* 640 * Get statistics on all filesystems. 641 */ 642 #ifndef _SYS_SYSPROTO_H_ 643 struct freebsd4_getfsstat_args { 644 struct ostatfs *buf; 645 long bufsize; 646 int mode; 647 }; 648 #endif 649 int 650 freebsd4_getfsstat(struct thread *td, struct freebsd4_getfsstat_args *uap) 651 { 652 struct statfs *buf, *sp; 653 struct ostatfs osb; 654 size_t count, size; 655 int error; 656 657 if (uap->bufsize < 0) 658 return (EINVAL); 659 count = uap->bufsize / sizeof(struct ostatfs); 660 if (count > SIZE_MAX / sizeof(struct statfs)) 661 return (EINVAL); 662 size = count * sizeof(struct statfs); 663 error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, 664 uap->mode); 665 if (error == 0) 666 td->td_retval[0] = count; 667 if (size != 0) { 668 sp = buf; 669 while (count != 0 && error == 0) { 670 freebsd4_cvtstatfs(sp, &osb); 671 error = copyout(&osb, uap->buf, sizeof(osb)); 672 sp++; 673 uap->buf++; 674 count--; 675 } 676 free(buf, M_STATFS); 677 } 678 return (error); 679 } 680 681 /* 682 * Implement fstatfs() for (NFS) file handles. 683 */ 684 #ifndef _SYS_SYSPROTO_H_ 685 struct freebsd4_fhstatfs_args { 686 struct fhandle *u_fhp; 687 struct ostatfs *buf; 688 }; 689 #endif 690 int 691 freebsd4_fhstatfs(struct thread *td, struct freebsd4_fhstatfs_args *uap) 692 { 693 struct ostatfs osb; 694 struct statfs *sfp; 695 fhandle_t fh; 696 int error; 697 698 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 699 if (error != 0) 700 return (error); 701 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 702 error = kern_fhstatfs(td, fh, sfp); 703 if (error == 0) { 704 freebsd4_cvtstatfs(sfp, &osb); 705 error = copyout(&osb, uap->buf, sizeof(osb)); 706 } 707 free(sfp, M_STATFS); 708 return (error); 709 } 710 711 /* 712 * Convert a new format statfs structure to an old format statfs structure. 713 */ 714 static void 715 freebsd4_cvtstatfs(struct statfs *nsp, struct ostatfs *osp) 716 { 717 718 statfs_scale_blocks(nsp, LONG_MAX); 719 bzero(osp, sizeof(*osp)); 720 osp->f_bsize = nsp->f_bsize; 721 osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX); 722 osp->f_blocks = nsp->f_blocks; 723 osp->f_bfree = nsp->f_bfree; 724 osp->f_bavail = nsp->f_bavail; 725 osp->f_files = MIN(nsp->f_files, LONG_MAX); 726 osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX); 727 osp->f_owner = nsp->f_owner; 728 osp->f_type = nsp->f_type; 729 osp->f_flags = nsp->f_flags; 730 osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX); 731 osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX); 732 osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX); 733 osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX); 734 strlcpy(osp->f_fstypename, nsp->f_fstypename, 735 MIN(MFSNAMELEN, OMFSNAMELEN)); 736 strlcpy(osp->f_mntonname, nsp->f_mntonname, 737 MIN(MNAMELEN, OMNAMELEN)); 738 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 739 MIN(MNAMELEN, OMNAMELEN)); 740 osp->f_fsid = nsp->f_fsid; 741 } 742 #endif /* COMPAT_FREEBSD4 */ 743 744 #if defined(COMPAT_FREEBSD11) 745 /* 746 * Get old format filesystem statistics. 747 */ 748 static void freebsd11_cvtstatfs(struct statfs *, struct freebsd11_statfs *); 749 750 int 751 freebsd11_statfs(struct thread *td, struct freebsd11_statfs_args *uap) 752 { 753 struct freebsd11_statfs osb; 754 struct statfs *sfp; 755 int error; 756 757 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 758 error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); 759 if (error == 0) { 760 freebsd11_cvtstatfs(sfp, &osb); 761 error = copyout(&osb, uap->buf, sizeof(osb)); 762 } 763 free(sfp, M_STATFS); 764 return (error); 765 } 766 767 /* 768 * Get filesystem statistics. 769 */ 770 int 771 freebsd11_fstatfs(struct thread *td, struct freebsd11_fstatfs_args *uap) 772 { 773 struct freebsd11_statfs osb; 774 struct statfs *sfp; 775 int error; 776 777 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 778 error = kern_fstatfs(td, uap->fd, sfp); 779 if (error == 0) { 780 freebsd11_cvtstatfs(sfp, &osb); 781 error = copyout(&osb, uap->buf, sizeof(osb)); 782 } 783 free(sfp, M_STATFS); 784 return (error); 785 } 786 787 /* 788 * Get statistics on all filesystems. 789 */ 790 int 791 freebsd11_getfsstat(struct thread *td, struct freebsd11_getfsstat_args *uap) 792 { 793 return (kern_freebsd11_getfsstat(td, uap->buf, uap->bufsize, uap->mode)); 794 } 795 796 int 797 kern_freebsd11_getfsstat(struct thread *td, struct freebsd11_statfs * ubuf, 798 long bufsize, int mode) 799 { 800 struct freebsd11_statfs osb; 801 struct statfs *buf, *sp; 802 size_t count, size; 803 int error; 804 805 if (bufsize < 0) 806 return (EINVAL); 807 808 count = bufsize / sizeof(struct ostatfs); 809 size = count * sizeof(struct statfs); 810 error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, mode); 811 if (error == 0) 812 td->td_retval[0] = count; 813 if (size > 0) { 814 sp = buf; 815 while (count > 0 && error == 0) { 816 freebsd11_cvtstatfs(sp, &osb); 817 error = copyout(&osb, ubuf, sizeof(osb)); 818 sp++; 819 ubuf++; 820 count--; 821 } 822 free(buf, M_STATFS); 823 } 824 return (error); 825 } 826 827 /* 828 * Implement fstatfs() for (NFS) file handles. 829 */ 830 int 831 freebsd11_fhstatfs(struct thread *td, struct freebsd11_fhstatfs_args *uap) 832 { 833 struct freebsd11_statfs osb; 834 struct statfs *sfp; 835 fhandle_t fh; 836 int error; 837 838 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 839 if (error) 840 return (error); 841 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 842 error = kern_fhstatfs(td, fh, sfp); 843 if (error == 0) { 844 freebsd11_cvtstatfs(sfp, &osb); 845 error = copyout(&osb, uap->buf, sizeof(osb)); 846 } 847 free(sfp, M_STATFS); 848 return (error); 849 } 850 851 /* 852 * Convert a new format statfs structure to an old format statfs structure. 853 */ 854 static void 855 freebsd11_cvtstatfs(struct statfs *nsp, struct freebsd11_statfs *osp) 856 { 857 858 bzero(osp, sizeof(*osp)); 859 osp->f_version = FREEBSD11_STATFS_VERSION; 860 osp->f_type = nsp->f_type; 861 osp->f_flags = nsp->f_flags; 862 osp->f_bsize = nsp->f_bsize; 863 osp->f_iosize = nsp->f_iosize; 864 osp->f_blocks = nsp->f_blocks; 865 osp->f_bfree = nsp->f_bfree; 866 osp->f_bavail = nsp->f_bavail; 867 osp->f_files = nsp->f_files; 868 osp->f_ffree = nsp->f_ffree; 869 osp->f_syncwrites = nsp->f_syncwrites; 870 osp->f_asyncwrites = nsp->f_asyncwrites; 871 osp->f_syncreads = nsp->f_syncreads; 872 osp->f_asyncreads = nsp->f_asyncreads; 873 osp->f_namemax = nsp->f_namemax; 874 osp->f_owner = nsp->f_owner; 875 osp->f_fsid = nsp->f_fsid; 876 strlcpy(osp->f_fstypename, nsp->f_fstypename, 877 MIN(MFSNAMELEN, sizeof(osp->f_fstypename))); 878 strlcpy(osp->f_mntonname, nsp->f_mntonname, 879 MIN(MNAMELEN, sizeof(osp->f_mntonname))); 880 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 881 MIN(MNAMELEN, sizeof(osp->f_mntfromname))); 882 } 883 #endif /* COMPAT_FREEBSD11 */ 884 885 /* 886 * Change current working directory to a given file descriptor. 887 */ 888 #ifndef _SYS_SYSPROTO_H_ 889 struct fchdir_args { 890 int fd; 891 }; 892 #endif 893 int 894 sys_fchdir(struct thread *td, struct fchdir_args *uap) 895 { 896 struct vnode *vp, *tdp; 897 struct mount *mp; 898 struct file *fp; 899 int error; 900 901 AUDIT_ARG_FD(uap->fd); 902 error = getvnode_path(td, uap->fd, &cap_fchdir_rights, 903 &fp); 904 if (error != 0) 905 return (error); 906 vp = fp->f_vnode; 907 vrefact(vp); 908 fdrop(fp, td); 909 vn_lock(vp, LK_SHARED | LK_RETRY); 910 AUDIT_ARG_VNODE1(vp); 911 error = change_dir(vp, td); 912 while (!error && (mp = vp->v_mountedhere) != NULL) { 913 if (vfs_busy(mp, 0)) 914 continue; 915 error = VFS_ROOT(mp, LK_SHARED, &tdp); 916 vfs_unbusy(mp); 917 if (error != 0) 918 break; 919 vput(vp); 920 vp = tdp; 921 } 922 if (error != 0) { 923 vput(vp); 924 return (error); 925 } 926 VOP_UNLOCK(vp); 927 pwd_chdir(td, vp); 928 return (0); 929 } 930 931 /* 932 * Change current working directory (``.''). 933 */ 934 #ifndef _SYS_SYSPROTO_H_ 935 struct chdir_args { 936 char *path; 937 }; 938 #endif 939 int 940 sys_chdir(struct thread *td, struct chdir_args *uap) 941 { 942 943 return (kern_chdir(td, uap->path, UIO_USERSPACE)); 944 } 945 946 int 947 kern_chdir(struct thread *td, const char *path, enum uio_seg pathseg) 948 { 949 struct nameidata nd; 950 int error; 951 952 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 953 pathseg, path); 954 if ((error = namei(&nd)) != 0) 955 return (error); 956 if ((error = change_dir(nd.ni_vp, td)) != 0) { 957 vput(nd.ni_vp); 958 NDFREE_PNBUF(&nd); 959 return (error); 960 } 961 VOP_UNLOCK(nd.ni_vp); 962 NDFREE_PNBUF(&nd); 963 pwd_chdir(td, nd.ni_vp); 964 return (0); 965 } 966 967 static int unprivileged_chroot = 0; 968 SYSCTL_INT(_security_bsd, OID_AUTO, unprivileged_chroot, CTLFLAG_RW, 969 &unprivileged_chroot, 0, 970 "Unprivileged processes can use chroot(2)"); 971 /* 972 * Change notion of root (``/'') directory. 973 */ 974 #ifndef _SYS_SYSPROTO_H_ 975 struct chroot_args { 976 char *path; 977 }; 978 #endif 979 int 980 sys_chroot(struct thread *td, struct chroot_args *uap) 981 { 982 struct nameidata nd; 983 struct proc *p; 984 int error; 985 986 error = priv_check(td, PRIV_VFS_CHROOT); 987 if (error != 0) { 988 p = td->td_proc; 989 PROC_LOCK(p); 990 if (unprivileged_chroot == 0 || 991 (p->p_flag2 & P2_NO_NEW_PRIVS) == 0) { 992 PROC_UNLOCK(p); 993 return (error); 994 } 995 PROC_UNLOCK(p); 996 } 997 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 998 UIO_USERSPACE, uap->path); 999 error = namei(&nd); 1000 if (error != 0) 1001 return (error); 1002 NDFREE_PNBUF(&nd); 1003 error = change_dir(nd.ni_vp, td); 1004 if (error != 0) 1005 goto e_vunlock; 1006 #ifdef MAC 1007 error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp); 1008 if (error != 0) 1009 goto e_vunlock; 1010 #endif 1011 VOP_UNLOCK(nd.ni_vp); 1012 error = pwd_chroot(td, nd.ni_vp); 1013 vrele(nd.ni_vp); 1014 return (error); 1015 e_vunlock: 1016 vput(nd.ni_vp); 1017 return (error); 1018 } 1019 1020 /* 1021 * Common routine for chroot and chdir. Callers must provide a locked vnode 1022 * instance. 1023 */ 1024 int 1025 change_dir(struct vnode *vp, struct thread *td) 1026 { 1027 #ifdef MAC 1028 int error; 1029 #endif 1030 1031 ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked"); 1032 if (vp->v_type != VDIR) 1033 return (ENOTDIR); 1034 #ifdef MAC 1035 error = mac_vnode_check_chdir(td->td_ucred, vp); 1036 if (error != 0) 1037 return (error); 1038 #endif 1039 return (VOP_ACCESS(vp, VEXEC, td->td_ucred, td)); 1040 } 1041 1042 static __inline void 1043 flags_to_rights(int flags, cap_rights_t *rightsp) 1044 { 1045 if (flags & O_EXEC) { 1046 cap_rights_set_one(rightsp, CAP_FEXECVE); 1047 if (flags & O_PATH) 1048 return; 1049 } else { 1050 switch ((flags & O_ACCMODE)) { 1051 case O_RDONLY: 1052 cap_rights_set_one(rightsp, CAP_READ); 1053 break; 1054 case O_RDWR: 1055 cap_rights_set_one(rightsp, CAP_READ); 1056 /* FALLTHROUGH */ 1057 case O_WRONLY: 1058 cap_rights_set_one(rightsp, CAP_WRITE); 1059 if (!(flags & (O_APPEND | O_TRUNC))) 1060 cap_rights_set_one(rightsp, CAP_SEEK); 1061 break; 1062 } 1063 } 1064 1065 if (flags & O_CREAT) 1066 cap_rights_set_one(rightsp, CAP_CREATE); 1067 1068 if (flags & O_TRUNC) 1069 cap_rights_set_one(rightsp, CAP_FTRUNCATE); 1070 1071 if (flags & (O_SYNC | O_FSYNC)) 1072 cap_rights_set_one(rightsp, CAP_FSYNC); 1073 1074 if (flags & (O_EXLOCK | O_SHLOCK)) 1075 cap_rights_set_one(rightsp, CAP_FLOCK); 1076 } 1077 1078 /* 1079 * Check permissions, allocate an open file structure, and call the device 1080 * open routine if any. 1081 */ 1082 #ifndef _SYS_SYSPROTO_H_ 1083 struct open_args { 1084 char *path; 1085 int flags; 1086 int mode; 1087 }; 1088 #endif 1089 int 1090 sys_open(struct thread *td, struct open_args *uap) 1091 { 1092 1093 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1094 uap->flags, uap->mode)); 1095 } 1096 1097 #ifndef _SYS_SYSPROTO_H_ 1098 struct openat_args { 1099 int fd; 1100 char *path; 1101 int flag; 1102 int mode; 1103 }; 1104 #endif 1105 int 1106 sys_openat(struct thread *td, struct openat_args *uap) 1107 { 1108 1109 AUDIT_ARG_FD(uap->fd); 1110 return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 1111 uap->mode)); 1112 } 1113 1114 int 1115 kern_openat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, 1116 int flags, int mode) 1117 { 1118 struct proc *p = td->td_proc; 1119 struct filedesc *fdp; 1120 struct pwddesc *pdp; 1121 struct file *fp; 1122 struct vnode *vp; 1123 struct nameidata nd; 1124 cap_rights_t rights; 1125 int cmode, error, indx; 1126 1127 indx = -1; 1128 fdp = p->p_fd; 1129 pdp = p->p_pd; 1130 1131 AUDIT_ARG_FFLAGS(flags); 1132 AUDIT_ARG_MODE(mode); 1133 cap_rights_init_one(&rights, CAP_LOOKUP); 1134 flags_to_rights(flags, &rights); 1135 1136 /* 1137 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags 1138 * may be specified. On the other hand, for O_PATH any mode 1139 * except O_EXEC is ignored. 1140 */ 1141 if ((flags & O_PATH) != 0) { 1142 flags &= ~(O_CREAT | O_ACCMODE); 1143 } else if ((flags & O_EXEC) != 0) { 1144 if (flags & O_ACCMODE) 1145 return (EINVAL); 1146 } else if ((flags & O_ACCMODE) == O_ACCMODE) { 1147 return (EINVAL); 1148 } else { 1149 flags = FFLAGS(flags); 1150 } 1151 1152 /* 1153 * Allocate a file structure. The descriptor to reference it 1154 * is allocated and used by finstall_refed() below. 1155 */ 1156 error = falloc_noinstall(td, &fp); 1157 if (error != 0) 1158 return (error); 1159 /* Set the flags early so the finit in devfs can pick them up. */ 1160 fp->f_flag = flags & FMASK; 1161 cmode = ((mode & ~pdp->pd_cmask) & ALLPERMS) & ~S_ISTXT; 1162 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1 | WANTIOCTLCAPS, 1163 pathseg, path, fd, &rights); 1164 td->td_dupfd = -1; /* XXX check for fdopen */ 1165 error = vn_open_cred(&nd, &flags, cmode, VN_OPEN_WANTIOCTLCAPS, 1166 td->td_ucred, fp); 1167 if (error != 0) { 1168 /* 1169 * If the vn_open replaced the method vector, something 1170 * wonderous happened deep below and we just pass it up 1171 * pretending we know what we do. 1172 */ 1173 if (error == ENXIO && fp->f_ops != &badfileops) { 1174 MPASS((flags & O_PATH) == 0); 1175 goto success; 1176 } 1177 1178 /* 1179 * Handle special fdopen() case. bleh. 1180 * 1181 * Don't do this for relative (capability) lookups; we don't 1182 * understand exactly what would happen, and we don't think 1183 * that it ever should. 1184 */ 1185 if ((nd.ni_resflags & NIRES_STRICTREL) == 0 && 1186 (error == ENODEV || error == ENXIO) && 1187 td->td_dupfd >= 0) { 1188 error = dupfdopen(td, fdp, td->td_dupfd, flags, error, 1189 &indx); 1190 if (error == 0) 1191 goto success; 1192 } 1193 1194 goto bad; 1195 } 1196 td->td_dupfd = 0; 1197 NDFREE_PNBUF(&nd); 1198 vp = nd.ni_vp; 1199 1200 /* 1201 * Store the vnode, for any f_type. Typically, the vnode use 1202 * count is decremented by direct call to vn_closefile() for 1203 * files that switched type in the cdevsw fdopen() method. 1204 */ 1205 fp->f_vnode = vp; 1206 1207 /* 1208 * If the file wasn't claimed by devfs bind it to the normal 1209 * vnode operations here. 1210 */ 1211 if (fp->f_ops == &badfileops) { 1212 KASSERT(vp->v_type != VFIFO || (flags & O_PATH) != 0, 1213 ("Unexpected fifo fp %p vp %p", fp, vp)); 1214 if ((flags & O_PATH) != 0) { 1215 finit(fp, (flags & FMASK) | (fp->f_flag & FKQALLOWED), 1216 DTYPE_VNODE, NULL, &path_fileops); 1217 } else { 1218 finit_vnode(fp, flags, NULL, &vnops); 1219 } 1220 } 1221 1222 VOP_UNLOCK(vp); 1223 if (flags & O_TRUNC) { 1224 error = fo_truncate(fp, 0, td->td_ucred, td); 1225 if (error != 0) 1226 goto bad; 1227 } 1228 success: 1229 /* 1230 * If we haven't already installed the FD (for dupfdopen), do so now. 1231 */ 1232 if (indx == -1) { 1233 struct filecaps *fcaps; 1234 1235 #ifdef CAPABILITIES 1236 if ((nd.ni_resflags & NIRES_STRICTREL) != 0) 1237 fcaps = &nd.ni_filecaps; 1238 else 1239 #endif 1240 fcaps = NULL; 1241 error = finstall_refed(td, fp, &indx, flags, fcaps); 1242 /* On success finstall_refed() consumes fcaps. */ 1243 if (error != 0) { 1244 goto bad; 1245 } 1246 } else { 1247 NDFREE_IOCTLCAPS(&nd); 1248 falloc_abort(td, fp); 1249 } 1250 1251 td->td_retval[0] = indx; 1252 return (0); 1253 bad: 1254 KASSERT(indx == -1, ("indx=%d, should be -1", indx)); 1255 NDFREE_IOCTLCAPS(&nd); 1256 falloc_abort(td, fp); 1257 return (error); 1258 } 1259 1260 #ifdef COMPAT_43 1261 /* 1262 * Create a file. 1263 */ 1264 #ifndef _SYS_SYSPROTO_H_ 1265 struct ocreat_args { 1266 char *path; 1267 int mode; 1268 }; 1269 #endif 1270 int 1271 ocreat(struct thread *td, struct ocreat_args *uap) 1272 { 1273 1274 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1275 O_WRONLY | O_CREAT | O_TRUNC, uap->mode)); 1276 } 1277 #endif /* COMPAT_43 */ 1278 1279 /* 1280 * Create a special file. 1281 */ 1282 #ifndef _SYS_SYSPROTO_H_ 1283 struct mknodat_args { 1284 int fd; 1285 char *path; 1286 mode_t mode; 1287 dev_t dev; 1288 }; 1289 #endif 1290 int 1291 sys_mknodat(struct thread *td, struct mknodat_args *uap) 1292 { 1293 1294 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1295 uap->dev)); 1296 } 1297 1298 #if defined(COMPAT_FREEBSD11) 1299 int 1300 freebsd11_mknod(struct thread *td, 1301 struct freebsd11_mknod_args *uap) 1302 { 1303 1304 return (kern_mknodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1305 uap->mode, uap->dev)); 1306 } 1307 1308 int 1309 freebsd11_mknodat(struct thread *td, 1310 struct freebsd11_mknodat_args *uap) 1311 { 1312 1313 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1314 uap->dev)); 1315 } 1316 #endif /* COMPAT_FREEBSD11 */ 1317 1318 int 1319 kern_mknodat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, 1320 int mode, dev_t dev) 1321 { 1322 struct vnode *vp; 1323 struct mount *mp; 1324 struct vattr vattr; 1325 struct nameidata nd; 1326 int error, whiteout = 0; 1327 1328 AUDIT_ARG_MODE(mode); 1329 AUDIT_ARG_DEV(dev); 1330 switch (mode & S_IFMT) { 1331 case S_IFCHR: 1332 case S_IFBLK: 1333 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 1334 if (error == 0 && dev == VNOVAL) 1335 error = EINVAL; 1336 break; 1337 case S_IFWHT: 1338 error = priv_check(td, PRIV_VFS_MKNOD_WHT); 1339 break; 1340 case S_IFIFO: 1341 if (dev == 0) 1342 return (kern_mkfifoat(td, fd, path, pathseg, mode)); 1343 /* FALLTHROUGH */ 1344 default: 1345 error = EINVAL; 1346 break; 1347 } 1348 if (error != 0) 1349 return (error); 1350 NDPREINIT(&nd); 1351 restart: 1352 bwillwrite(); 1353 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | AUDITVNODE1 | NOCACHE, 1354 pathseg, path, fd, &cap_mknodat_rights); 1355 if ((error = namei(&nd)) != 0) 1356 return (error); 1357 vp = nd.ni_vp; 1358 if (vp != NULL) { 1359 NDFREE_PNBUF(&nd); 1360 if (vp == nd.ni_dvp) 1361 vrele(nd.ni_dvp); 1362 else 1363 vput(nd.ni_dvp); 1364 vrele(vp); 1365 return (EEXIST); 1366 } else { 1367 VATTR_NULL(&vattr); 1368 vattr.va_mode = (mode & ALLPERMS) & 1369 ~td->td_proc->p_pd->pd_cmask; 1370 vattr.va_rdev = dev; 1371 whiteout = 0; 1372 1373 switch (mode & S_IFMT) { 1374 case S_IFCHR: 1375 vattr.va_type = VCHR; 1376 break; 1377 case S_IFBLK: 1378 vattr.va_type = VBLK; 1379 break; 1380 case S_IFWHT: 1381 whiteout = 1; 1382 break; 1383 default: 1384 panic("kern_mknod: invalid mode"); 1385 } 1386 } 1387 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1388 NDFREE_PNBUF(&nd); 1389 vput(nd.ni_dvp); 1390 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH)) != 0) 1391 return (error); 1392 goto restart; 1393 } 1394 #ifdef MAC 1395 if (error == 0 && !whiteout) 1396 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, 1397 &nd.ni_cnd, &vattr); 1398 #endif 1399 if (error == 0) { 1400 if (whiteout) 1401 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1402 else { 1403 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1404 &nd.ni_cnd, &vattr); 1405 } 1406 } 1407 VOP_VPUT_PAIR(nd.ni_dvp, error == 0 && !whiteout ? &nd.ni_vp : NULL, 1408 true); 1409 vn_finished_write(mp); 1410 NDFREE_PNBUF(&nd); 1411 if (error == ERELOOKUP) 1412 goto restart; 1413 return (error); 1414 } 1415 1416 /* 1417 * Create a named pipe. 1418 */ 1419 #ifndef _SYS_SYSPROTO_H_ 1420 struct mkfifo_args { 1421 char *path; 1422 int mode; 1423 }; 1424 #endif 1425 int 1426 sys_mkfifo(struct thread *td, struct mkfifo_args *uap) 1427 { 1428 1429 return (kern_mkfifoat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1430 uap->mode)); 1431 } 1432 1433 #ifndef _SYS_SYSPROTO_H_ 1434 struct mkfifoat_args { 1435 int fd; 1436 char *path; 1437 mode_t mode; 1438 }; 1439 #endif 1440 int 1441 sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap) 1442 { 1443 1444 return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE, 1445 uap->mode)); 1446 } 1447 1448 int 1449 kern_mkfifoat(struct thread *td, int fd, const char *path, 1450 enum uio_seg pathseg, int mode) 1451 { 1452 struct mount *mp; 1453 struct vattr vattr; 1454 struct nameidata nd; 1455 int error; 1456 1457 AUDIT_ARG_MODE(mode); 1458 NDPREINIT(&nd); 1459 restart: 1460 bwillwrite(); 1461 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | AUDITVNODE1 | NOCACHE, 1462 pathseg, path, fd, &cap_mkfifoat_rights); 1463 if ((error = namei(&nd)) != 0) 1464 return (error); 1465 if (nd.ni_vp != NULL) { 1466 NDFREE_PNBUF(&nd); 1467 if (nd.ni_vp == nd.ni_dvp) 1468 vrele(nd.ni_dvp); 1469 else 1470 vput(nd.ni_dvp); 1471 vrele(nd.ni_vp); 1472 return (EEXIST); 1473 } 1474 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1475 NDFREE_PNBUF(&nd); 1476 vput(nd.ni_dvp); 1477 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH)) != 0) 1478 return (error); 1479 goto restart; 1480 } 1481 VATTR_NULL(&vattr); 1482 vattr.va_type = VFIFO; 1483 vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_pd->pd_cmask; 1484 #ifdef MAC 1485 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1486 &vattr); 1487 if (error != 0) 1488 goto out; 1489 #endif 1490 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 1491 #ifdef MAC 1492 out: 1493 #endif 1494 VOP_VPUT_PAIR(nd.ni_dvp, error == 0 ? &nd.ni_vp : NULL, true); 1495 vn_finished_write(mp); 1496 NDFREE_PNBUF(&nd); 1497 if (error == ERELOOKUP) 1498 goto restart; 1499 return (error); 1500 } 1501 1502 /* 1503 * Make a hard file link. 1504 */ 1505 #ifndef _SYS_SYSPROTO_H_ 1506 struct link_args { 1507 char *path; 1508 char *link; 1509 }; 1510 #endif 1511 int 1512 sys_link(struct thread *td, struct link_args *uap) 1513 { 1514 1515 return (kern_linkat(td, AT_FDCWD, AT_FDCWD, uap->path, uap->link, 1516 UIO_USERSPACE, AT_SYMLINK_FOLLOW)); 1517 } 1518 1519 #ifndef _SYS_SYSPROTO_H_ 1520 struct linkat_args { 1521 int fd1; 1522 char *path1; 1523 int fd2; 1524 char *path2; 1525 int flag; 1526 }; 1527 #endif 1528 int 1529 sys_linkat(struct thread *td, struct linkat_args *uap) 1530 { 1531 1532 return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2, 1533 UIO_USERSPACE, uap->flag)); 1534 } 1535 1536 int hardlink_check_uid = 0; 1537 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 1538 &hardlink_check_uid, 0, 1539 "Unprivileged processes cannot create hard links to files owned by other " 1540 "users"); 1541 static int hardlink_check_gid = 0; 1542 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 1543 &hardlink_check_gid, 0, 1544 "Unprivileged processes cannot create hard links to files owned by other " 1545 "groups"); 1546 1547 static int 1548 can_hardlink(struct vnode *vp, struct ucred *cred) 1549 { 1550 struct vattr va; 1551 int error; 1552 1553 if (!hardlink_check_uid && !hardlink_check_gid) 1554 return (0); 1555 1556 error = VOP_GETATTR(vp, &va, cred); 1557 if (error != 0) 1558 return (error); 1559 1560 if (hardlink_check_uid && cred->cr_uid != va.va_uid) { 1561 error = priv_check_cred(cred, PRIV_VFS_LINK); 1562 if (error != 0) 1563 return (error); 1564 } 1565 1566 if (hardlink_check_gid && !groupmember(va.va_gid, cred)) { 1567 error = priv_check_cred(cred, PRIV_VFS_LINK); 1568 if (error != 0) 1569 return (error); 1570 } 1571 1572 return (0); 1573 } 1574 1575 int 1576 kern_linkat(struct thread *td, int fd1, int fd2, const char *path1, 1577 const char *path2, enum uio_seg segflag, int flag) 1578 { 1579 struct nameidata nd; 1580 int error; 1581 1582 if ((flag & ~(AT_SYMLINK_FOLLOW | AT_RESOLVE_BENEATH | 1583 AT_EMPTY_PATH)) != 0) 1584 return (EINVAL); 1585 1586 NDPREINIT(&nd); 1587 do { 1588 bwillwrite(); 1589 NDINIT_ATRIGHTS(&nd, LOOKUP, AUDITVNODE1 | at2cnpflags(flag, 1590 AT_SYMLINK_FOLLOW | AT_RESOLVE_BENEATH | AT_EMPTY_PATH), 1591 segflag, path1, fd1, &cap_linkat_source_rights); 1592 if ((error = namei(&nd)) != 0) 1593 return (error); 1594 NDFREE_PNBUF(&nd); 1595 if ((nd.ni_resflags & NIRES_EMPTYPATH) != 0) { 1596 error = priv_check(td, PRIV_VFS_FHOPEN); 1597 if (error != 0) { 1598 vrele(nd.ni_vp); 1599 return (error); 1600 } 1601 } 1602 error = kern_linkat_vp(td, nd.ni_vp, fd2, path2, segflag); 1603 } while (error == EAGAIN || error == ERELOOKUP); 1604 return (error); 1605 } 1606 1607 static int 1608 kern_linkat_vp(struct thread *td, struct vnode *vp, int fd, const char *path, 1609 enum uio_seg segflag) 1610 { 1611 struct nameidata nd; 1612 struct mount *mp; 1613 int error; 1614 1615 if (vp->v_type == VDIR) { 1616 vrele(vp); 1617 return (EPERM); /* POSIX */ 1618 } 1619 NDINIT_ATRIGHTS(&nd, CREATE, 1620 LOCKPARENT | AUDITVNODE2 | NOCACHE, segflag, path, fd, 1621 &cap_linkat_target_rights); 1622 if ((error = namei(&nd)) == 0) { 1623 if (nd.ni_vp != NULL) { 1624 NDFREE_PNBUF(&nd); 1625 if (nd.ni_dvp == nd.ni_vp) 1626 vrele(nd.ni_dvp); 1627 else 1628 vput(nd.ni_dvp); 1629 vrele(nd.ni_vp); 1630 vrele(vp); 1631 return (EEXIST); 1632 } else if (nd.ni_dvp->v_mount != vp->v_mount) { 1633 /* 1634 * Cross-device link. No need to recheck 1635 * vp->v_type, since it cannot change, except 1636 * to VBAD. 1637 */ 1638 NDFREE_PNBUF(&nd); 1639 vput(nd.ni_dvp); 1640 vrele(vp); 1641 return (EXDEV); 1642 } else if (vn_lock(vp, LK_EXCLUSIVE) == 0) { 1643 error = can_hardlink(vp, td->td_ucred); 1644 #ifdef MAC 1645 if (error == 0) 1646 error = mac_vnode_check_link(td->td_ucred, 1647 nd.ni_dvp, vp, &nd.ni_cnd); 1648 #endif 1649 if (error != 0) { 1650 vput(vp); 1651 vput(nd.ni_dvp); 1652 NDFREE_PNBUF(&nd); 1653 return (error); 1654 } 1655 error = vn_start_write(vp, &mp, V_NOWAIT); 1656 if (error != 0) { 1657 vput(vp); 1658 vput(nd.ni_dvp); 1659 NDFREE_PNBUF(&nd); 1660 error = vn_start_write(NULL, &mp, 1661 V_XSLEEP | V_PCATCH); 1662 if (error != 0) 1663 return (error); 1664 return (EAGAIN); 1665 } 1666 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 1667 VOP_VPUT_PAIR(nd.ni_dvp, &vp, true); 1668 vn_finished_write(mp); 1669 NDFREE_PNBUF(&nd); 1670 vp = NULL; 1671 } else { 1672 vput(nd.ni_dvp); 1673 NDFREE_PNBUF(&nd); 1674 vrele(vp); 1675 return (EAGAIN); 1676 } 1677 } 1678 if (vp != NULL) 1679 vrele(vp); 1680 return (error); 1681 } 1682 1683 /* 1684 * Make a symbolic link. 1685 */ 1686 #ifndef _SYS_SYSPROTO_H_ 1687 struct symlink_args { 1688 char *path; 1689 char *link; 1690 }; 1691 #endif 1692 int 1693 sys_symlink(struct thread *td, struct symlink_args *uap) 1694 { 1695 1696 return (kern_symlinkat(td, uap->path, AT_FDCWD, uap->link, 1697 UIO_USERSPACE)); 1698 } 1699 1700 #ifndef _SYS_SYSPROTO_H_ 1701 struct symlinkat_args { 1702 char *path; 1703 int fd; 1704 char *path2; 1705 }; 1706 #endif 1707 int 1708 sys_symlinkat(struct thread *td, struct symlinkat_args *uap) 1709 { 1710 1711 return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2, 1712 UIO_USERSPACE)); 1713 } 1714 1715 int 1716 kern_symlinkat(struct thread *td, const char *path1, int fd, const char *path2, 1717 enum uio_seg segflg) 1718 { 1719 struct mount *mp; 1720 struct vattr vattr; 1721 const char *syspath; 1722 char *tmppath; 1723 struct nameidata nd; 1724 int error; 1725 1726 if (segflg == UIO_SYSSPACE) { 1727 syspath = path1; 1728 } else { 1729 tmppath = uma_zalloc(namei_zone, M_WAITOK); 1730 if ((error = copyinstr(path1, tmppath, MAXPATHLEN, NULL)) != 0) 1731 goto out; 1732 syspath = tmppath; 1733 } 1734 AUDIT_ARG_TEXT(syspath); 1735 NDPREINIT(&nd); 1736 restart: 1737 bwillwrite(); 1738 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | AUDITVNODE1 | NOCACHE, segflg, 1739 path2, fd, &cap_symlinkat_rights); 1740 if ((error = namei(&nd)) != 0) 1741 goto out; 1742 if (nd.ni_vp) { 1743 NDFREE_PNBUF(&nd); 1744 if (nd.ni_vp == nd.ni_dvp) 1745 vrele(nd.ni_dvp); 1746 else 1747 vput(nd.ni_dvp); 1748 vrele(nd.ni_vp); 1749 nd.ni_vp = NULL; 1750 error = EEXIST; 1751 goto out; 1752 } 1753 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1754 NDFREE_PNBUF(&nd); 1755 vput(nd.ni_dvp); 1756 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH)) != 0) 1757 goto out; 1758 goto restart; 1759 } 1760 VATTR_NULL(&vattr); 1761 vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_pd->pd_cmask; 1762 #ifdef MAC 1763 vattr.va_type = VLNK; 1764 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1765 &vattr); 1766 if (error != 0) 1767 goto out2; 1768 #endif 1769 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath); 1770 #ifdef MAC 1771 out2: 1772 #endif 1773 VOP_VPUT_PAIR(nd.ni_dvp, error == 0 ? &nd.ni_vp : NULL, true); 1774 vn_finished_write(mp); 1775 NDFREE_PNBUF(&nd); 1776 if (error == ERELOOKUP) 1777 goto restart; 1778 out: 1779 if (segflg != UIO_SYSSPACE) 1780 uma_zfree(namei_zone, tmppath); 1781 return (error); 1782 } 1783 1784 /* 1785 * Delete a whiteout from the filesystem. 1786 */ 1787 #ifndef _SYS_SYSPROTO_H_ 1788 struct undelete_args { 1789 char *path; 1790 }; 1791 #endif 1792 int 1793 sys_undelete(struct thread *td, struct undelete_args *uap) 1794 { 1795 struct mount *mp; 1796 struct nameidata nd; 1797 int error; 1798 1799 NDPREINIT(&nd); 1800 restart: 1801 bwillwrite(); 1802 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | AUDITVNODE1, 1803 UIO_USERSPACE, uap->path); 1804 error = namei(&nd); 1805 if (error != 0) 1806 return (error); 1807 1808 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 1809 NDFREE_PNBUF(&nd); 1810 if (nd.ni_vp == nd.ni_dvp) 1811 vrele(nd.ni_dvp); 1812 else 1813 vput(nd.ni_dvp); 1814 if (nd.ni_vp) 1815 vrele(nd.ni_vp); 1816 return (EEXIST); 1817 } 1818 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1819 NDFREE_PNBUF(&nd); 1820 vput(nd.ni_dvp); 1821 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH)) != 0) 1822 return (error); 1823 goto restart; 1824 } 1825 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE); 1826 NDFREE_PNBUF(&nd); 1827 vput(nd.ni_dvp); 1828 vn_finished_write(mp); 1829 if (error == ERELOOKUP) 1830 goto restart; 1831 return (error); 1832 } 1833 1834 /* 1835 * Delete a name from the filesystem. 1836 */ 1837 #ifndef _SYS_SYSPROTO_H_ 1838 struct unlink_args { 1839 char *path; 1840 }; 1841 #endif 1842 int 1843 sys_unlink(struct thread *td, struct unlink_args *uap) 1844 { 1845 1846 return (kern_funlinkat(td, AT_FDCWD, uap->path, FD_NONE, UIO_USERSPACE, 1847 0, 0)); 1848 } 1849 1850 static int 1851 kern_funlinkat_ex(struct thread *td, int dfd, const char *path, int fd, 1852 int flag, enum uio_seg pathseg, ino_t oldinum) 1853 { 1854 1855 if ((flag & ~(AT_REMOVEDIR | AT_RESOLVE_BENEATH)) != 0) 1856 return (EINVAL); 1857 1858 if ((flag & AT_REMOVEDIR) != 0) 1859 return (kern_frmdirat(td, dfd, path, fd, UIO_USERSPACE, 0)); 1860 1861 return (kern_funlinkat(td, dfd, path, fd, UIO_USERSPACE, 0, 0)); 1862 } 1863 1864 #ifndef _SYS_SYSPROTO_H_ 1865 struct unlinkat_args { 1866 int fd; 1867 char *path; 1868 int flag; 1869 }; 1870 #endif 1871 int 1872 sys_unlinkat(struct thread *td, struct unlinkat_args *uap) 1873 { 1874 1875 return (kern_funlinkat_ex(td, uap->fd, uap->path, FD_NONE, uap->flag, 1876 UIO_USERSPACE, 0)); 1877 } 1878 1879 #ifndef _SYS_SYSPROTO_H_ 1880 struct funlinkat_args { 1881 int dfd; 1882 const char *path; 1883 int fd; 1884 int flag; 1885 }; 1886 #endif 1887 int 1888 sys_funlinkat(struct thread *td, struct funlinkat_args *uap) 1889 { 1890 1891 return (kern_funlinkat_ex(td, uap->dfd, uap->path, uap->fd, uap->flag, 1892 UIO_USERSPACE, 0)); 1893 } 1894 1895 int 1896 kern_funlinkat(struct thread *td, int dfd, const char *path, int fd, 1897 enum uio_seg pathseg, int flag, ino_t oldinum) 1898 { 1899 struct mount *mp; 1900 struct file *fp; 1901 struct vnode *vp; 1902 struct nameidata nd; 1903 struct stat sb; 1904 int error; 1905 1906 fp = NULL; 1907 if (fd != FD_NONE) { 1908 error = getvnode_path(td, fd, &cap_no_rights, &fp); 1909 if (error != 0) 1910 return (error); 1911 } 1912 1913 NDPREINIT(&nd); 1914 restart: 1915 bwillwrite(); 1916 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1 | 1917 at2cnpflags(flag, AT_RESOLVE_BENEATH), 1918 pathseg, path, dfd, &cap_unlinkat_rights); 1919 if ((error = namei(&nd)) != 0) { 1920 if (error == EINVAL) 1921 error = EPERM; 1922 goto fdout; 1923 } 1924 vp = nd.ni_vp; 1925 if (vp->v_type == VDIR && oldinum == 0) { 1926 error = EPERM; /* POSIX */ 1927 } else if (oldinum != 0 && 1928 ((error = VOP_STAT(vp, &sb, td->td_ucred, NOCRED)) == 0) && 1929 sb.st_ino != oldinum) { 1930 error = EIDRM; /* Identifier removed */ 1931 } else if (fp != NULL && fp->f_vnode != vp) { 1932 if (VN_IS_DOOMED(fp->f_vnode)) 1933 error = EBADF; 1934 else 1935 error = EDEADLK; 1936 } else { 1937 /* 1938 * The root of a mounted filesystem cannot be deleted. 1939 * 1940 * XXX: can this only be a VDIR case? 1941 */ 1942 if (vp->v_vflag & VV_ROOT) 1943 error = EBUSY; 1944 } 1945 if (error == 0) { 1946 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1947 NDFREE_PNBUF(&nd); 1948 vput(nd.ni_dvp); 1949 if (vp == nd.ni_dvp) 1950 vrele(vp); 1951 else 1952 vput(vp); 1953 if ((error = vn_start_write(NULL, &mp, 1954 V_XSLEEP | V_PCATCH)) != 0) { 1955 goto fdout; 1956 } 1957 goto restart; 1958 } 1959 #ifdef MAC 1960 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 1961 &nd.ni_cnd); 1962 if (error != 0) 1963 goto out; 1964 #endif 1965 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 1966 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 1967 #ifdef MAC 1968 out: 1969 #endif 1970 vn_finished_write(mp); 1971 } 1972 NDFREE_PNBUF(&nd); 1973 vput(nd.ni_dvp); 1974 if (vp == nd.ni_dvp) 1975 vrele(vp); 1976 else 1977 vput(vp); 1978 if (error == ERELOOKUP) 1979 goto restart; 1980 fdout: 1981 if (fp != NULL) 1982 fdrop(fp, td); 1983 return (error); 1984 } 1985 1986 /* 1987 * Reposition read/write file offset. 1988 */ 1989 #ifndef _SYS_SYSPROTO_H_ 1990 struct lseek_args { 1991 int fd; 1992 int pad; 1993 off_t offset; 1994 int whence; 1995 }; 1996 #endif 1997 int 1998 sys_lseek(struct thread *td, struct lseek_args *uap) 1999 { 2000 2001 return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); 2002 } 2003 2004 int 2005 kern_lseek(struct thread *td, int fd, off_t offset, int whence) 2006 { 2007 struct file *fp; 2008 int error; 2009 2010 AUDIT_ARG_FD(fd); 2011 error = fget(td, fd, &cap_seek_rights, &fp); 2012 if (error != 0) 2013 return (error); 2014 error = (fp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0 ? 2015 fo_seek(fp, offset, whence, td) : ESPIPE; 2016 fdrop(fp, td); 2017 return (error); 2018 } 2019 2020 #if defined(COMPAT_43) 2021 /* 2022 * Reposition read/write file offset. 2023 */ 2024 #ifndef _SYS_SYSPROTO_H_ 2025 struct olseek_args { 2026 int fd; 2027 long offset; 2028 int whence; 2029 }; 2030 #endif 2031 int 2032 olseek(struct thread *td, struct olseek_args *uap) 2033 { 2034 2035 return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); 2036 } 2037 #endif /* COMPAT_43 */ 2038 2039 #if defined(COMPAT_FREEBSD6) 2040 /* Version with the 'pad' argument */ 2041 int 2042 freebsd6_lseek(struct thread *td, struct freebsd6_lseek_args *uap) 2043 { 2044 2045 return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); 2046 } 2047 #endif 2048 2049 /* 2050 * Check access permissions using passed credentials. 2051 */ 2052 static int 2053 vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 2054 struct thread *td) 2055 { 2056 accmode_t accmode; 2057 int error; 2058 2059 /* Flags == 0 means only check for existence. */ 2060 if (user_flags == 0) 2061 return (0); 2062 2063 accmode = 0; 2064 if (user_flags & R_OK) 2065 accmode |= VREAD; 2066 if (user_flags & W_OK) 2067 accmode |= VWRITE; 2068 if (user_flags & X_OK) 2069 accmode |= VEXEC; 2070 #ifdef MAC 2071 error = mac_vnode_check_access(cred, vp, accmode); 2072 if (error != 0) 2073 return (error); 2074 #endif 2075 if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) 2076 error = VOP_ACCESS(vp, accmode, cred, td); 2077 return (error); 2078 } 2079 2080 /* 2081 * Check access permissions using "real" credentials. 2082 */ 2083 #ifndef _SYS_SYSPROTO_H_ 2084 struct access_args { 2085 char *path; 2086 int amode; 2087 }; 2088 #endif 2089 int 2090 sys_access(struct thread *td, struct access_args *uap) 2091 { 2092 2093 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2094 0, uap->amode)); 2095 } 2096 2097 #ifndef _SYS_SYSPROTO_H_ 2098 struct faccessat_args { 2099 int dirfd; 2100 char *path; 2101 int amode; 2102 int flag; 2103 } 2104 #endif 2105 int 2106 sys_faccessat(struct thread *td, struct faccessat_args *uap) 2107 { 2108 2109 return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 2110 uap->amode)); 2111 } 2112 2113 int 2114 kern_accessat(struct thread *td, int fd, const char *path, 2115 enum uio_seg pathseg, int flag, int amode) 2116 { 2117 struct ucred *cred, *usecred; 2118 struct vnode *vp; 2119 struct nameidata nd; 2120 int error; 2121 2122 if ((flag & ~(AT_EACCESS | AT_RESOLVE_BENEATH | AT_EMPTY_PATH)) != 0) 2123 return (EINVAL); 2124 if (amode != F_OK && (amode & ~(R_OK | W_OK | X_OK)) != 0) 2125 return (EINVAL); 2126 2127 /* 2128 * Create and modify a temporary credential instead of one that 2129 * is potentially shared (if we need one). 2130 */ 2131 cred = td->td_ucred; 2132 if ((flag & AT_EACCESS) == 0 && 2133 ((cred->cr_uid != cred->cr_ruid || 2134 cred->cr_rgid != cred->cr_groups[0]))) { 2135 usecred = crdup(cred); 2136 usecred->cr_uid = cred->cr_ruid; 2137 usecred->cr_groups[0] = cred->cr_rgid; 2138 td->td_ucred = usecred; 2139 } else 2140 usecred = cred; 2141 AUDIT_ARG_VALUE(amode); 2142 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | 2143 AUDITVNODE1 | at2cnpflags(flag, AT_RESOLVE_BENEATH | 2144 AT_EMPTY_PATH), pathseg, path, fd, &cap_fstat_rights); 2145 if ((error = namei(&nd)) != 0) 2146 goto out; 2147 vp = nd.ni_vp; 2148 2149 error = vn_access(vp, amode, usecred, td); 2150 NDFREE_PNBUF(&nd); 2151 vput(vp); 2152 out: 2153 if (usecred != cred) { 2154 td->td_ucred = cred; 2155 crfree(usecred); 2156 } 2157 return (error); 2158 } 2159 2160 /* 2161 * Check access permissions using "effective" credentials. 2162 */ 2163 #ifndef _SYS_SYSPROTO_H_ 2164 struct eaccess_args { 2165 char *path; 2166 int amode; 2167 }; 2168 #endif 2169 int 2170 sys_eaccess(struct thread *td, struct eaccess_args *uap) 2171 { 2172 2173 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2174 AT_EACCESS, uap->amode)); 2175 } 2176 2177 #if defined(COMPAT_43) 2178 /* 2179 * Get file status; this version follows links. 2180 */ 2181 #ifndef _SYS_SYSPROTO_H_ 2182 struct ostat_args { 2183 char *path; 2184 struct ostat *ub; 2185 }; 2186 #endif 2187 int 2188 ostat(struct thread *td, struct ostat_args *uap) 2189 { 2190 struct stat sb; 2191 struct ostat osb; 2192 int error; 2193 2194 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, &sb); 2195 if (error != 0) 2196 return (error); 2197 cvtstat(&sb, &osb); 2198 return (copyout(&osb, uap->ub, sizeof (osb))); 2199 } 2200 2201 /* 2202 * Get file status; this version does not follow links. 2203 */ 2204 #ifndef _SYS_SYSPROTO_H_ 2205 struct olstat_args { 2206 char *path; 2207 struct ostat *ub; 2208 }; 2209 #endif 2210 int 2211 olstat(struct thread *td, struct olstat_args *uap) 2212 { 2213 struct stat sb; 2214 struct ostat osb; 2215 int error; 2216 2217 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2218 UIO_USERSPACE, &sb); 2219 if (error != 0) 2220 return (error); 2221 cvtstat(&sb, &osb); 2222 return (copyout(&osb, uap->ub, sizeof (osb))); 2223 } 2224 2225 /* 2226 * Convert from an old to a new stat structure. 2227 * XXX: many values are blindly truncated. 2228 */ 2229 void 2230 cvtstat(struct stat *st, struct ostat *ost) 2231 { 2232 2233 bzero(ost, sizeof(*ost)); 2234 ost->st_dev = st->st_dev; 2235 ost->st_ino = st->st_ino; 2236 ost->st_mode = st->st_mode; 2237 ost->st_nlink = st->st_nlink; 2238 ost->st_uid = st->st_uid; 2239 ost->st_gid = st->st_gid; 2240 ost->st_rdev = st->st_rdev; 2241 ost->st_size = MIN(st->st_size, INT32_MAX); 2242 ost->st_atim = st->st_atim; 2243 ost->st_mtim = st->st_mtim; 2244 ost->st_ctim = st->st_ctim; 2245 ost->st_blksize = st->st_blksize; 2246 ost->st_blocks = st->st_blocks; 2247 ost->st_flags = st->st_flags; 2248 ost->st_gen = st->st_gen; 2249 } 2250 #endif /* COMPAT_43 */ 2251 2252 #if defined(COMPAT_43) || defined(COMPAT_FREEBSD11) 2253 int ino64_trunc_error; 2254 SYSCTL_INT(_vfs, OID_AUTO, ino64_trunc_error, CTLFLAG_RW, 2255 &ino64_trunc_error, 0, 2256 "Error on truncation of device, file or inode number, or link count"); 2257 2258 int 2259 freebsd11_cvtstat(struct stat *st, struct freebsd11_stat *ost) 2260 { 2261 2262 ost->st_dev = st->st_dev; 2263 if (ost->st_dev != st->st_dev) { 2264 switch (ino64_trunc_error) { 2265 default: 2266 /* 2267 * Since dev_t is almost raw, don't clamp to the 2268 * maximum for case 2, but ignore the error. 2269 */ 2270 break; 2271 case 1: 2272 return (EOVERFLOW); 2273 } 2274 } 2275 ost->st_ino = st->st_ino; 2276 if (ost->st_ino != st->st_ino) { 2277 switch (ino64_trunc_error) { 2278 default: 2279 case 0: 2280 break; 2281 case 1: 2282 return (EOVERFLOW); 2283 case 2: 2284 ost->st_ino = UINT32_MAX; 2285 break; 2286 } 2287 } 2288 ost->st_mode = st->st_mode; 2289 ost->st_nlink = st->st_nlink; 2290 if (ost->st_nlink != st->st_nlink) { 2291 switch (ino64_trunc_error) { 2292 default: 2293 case 0: 2294 break; 2295 case 1: 2296 return (EOVERFLOW); 2297 case 2: 2298 ost->st_nlink = UINT16_MAX; 2299 break; 2300 } 2301 } 2302 ost->st_uid = st->st_uid; 2303 ost->st_gid = st->st_gid; 2304 ost->st_rdev = st->st_rdev; 2305 if (ost->st_rdev != st->st_rdev) { 2306 switch (ino64_trunc_error) { 2307 default: 2308 break; 2309 case 1: 2310 return (EOVERFLOW); 2311 } 2312 } 2313 ost->st_atim = st->st_atim; 2314 ost->st_mtim = st->st_mtim; 2315 ost->st_ctim = st->st_ctim; 2316 ost->st_size = st->st_size; 2317 ost->st_blocks = st->st_blocks; 2318 ost->st_blksize = st->st_blksize; 2319 ost->st_flags = st->st_flags; 2320 ost->st_gen = st->st_gen; 2321 ost->st_lspare = 0; 2322 ost->st_birthtim = st->st_birthtim; 2323 bzero((char *)&ost->st_birthtim + sizeof(ost->st_birthtim), 2324 sizeof(*ost) - offsetof(struct freebsd11_stat, 2325 st_birthtim) - sizeof(ost->st_birthtim)); 2326 return (0); 2327 } 2328 2329 int 2330 freebsd11_stat(struct thread *td, struct freebsd11_stat_args* uap) 2331 { 2332 struct stat sb; 2333 struct freebsd11_stat osb; 2334 int error; 2335 2336 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, &sb); 2337 if (error != 0) 2338 return (error); 2339 error = freebsd11_cvtstat(&sb, &osb); 2340 if (error == 0) 2341 error = copyout(&osb, uap->ub, sizeof(osb)); 2342 return (error); 2343 } 2344 2345 int 2346 freebsd11_lstat(struct thread *td, struct freebsd11_lstat_args* uap) 2347 { 2348 struct stat sb; 2349 struct freebsd11_stat osb; 2350 int error; 2351 2352 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2353 UIO_USERSPACE, &sb); 2354 if (error != 0) 2355 return (error); 2356 error = freebsd11_cvtstat(&sb, &osb); 2357 if (error == 0) 2358 error = copyout(&osb, uap->ub, sizeof(osb)); 2359 return (error); 2360 } 2361 2362 int 2363 freebsd11_fhstat(struct thread *td, struct freebsd11_fhstat_args* uap) 2364 { 2365 struct fhandle fh; 2366 struct stat sb; 2367 struct freebsd11_stat osb; 2368 int error; 2369 2370 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 2371 if (error != 0) 2372 return (error); 2373 error = kern_fhstat(td, fh, &sb); 2374 if (error != 0) 2375 return (error); 2376 error = freebsd11_cvtstat(&sb, &osb); 2377 if (error == 0) 2378 error = copyout(&osb, uap->sb, sizeof(osb)); 2379 return (error); 2380 } 2381 2382 int 2383 freebsd11_fstatat(struct thread *td, struct freebsd11_fstatat_args* uap) 2384 { 2385 struct stat sb; 2386 struct freebsd11_stat osb; 2387 int error; 2388 2389 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2390 UIO_USERSPACE, &sb); 2391 if (error != 0) 2392 return (error); 2393 error = freebsd11_cvtstat(&sb, &osb); 2394 if (error == 0) 2395 error = copyout(&osb, uap->buf, sizeof(osb)); 2396 return (error); 2397 } 2398 #endif /* COMPAT_FREEBSD11 */ 2399 2400 /* 2401 * Get file status 2402 */ 2403 #ifndef _SYS_SYSPROTO_H_ 2404 struct fstatat_args { 2405 int fd; 2406 char *path; 2407 struct stat *buf; 2408 int flag; 2409 } 2410 #endif 2411 int 2412 sys_fstatat(struct thread *td, struct fstatat_args *uap) 2413 { 2414 struct stat sb; 2415 int error; 2416 2417 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2418 UIO_USERSPACE, &sb); 2419 if (error == 0) 2420 error = copyout(&sb, uap->buf, sizeof (sb)); 2421 return (error); 2422 } 2423 2424 int 2425 kern_statat(struct thread *td, int flag, int fd, const char *path, 2426 enum uio_seg pathseg, struct stat *sbp) 2427 { 2428 struct nameidata nd; 2429 int error; 2430 2431 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | 2432 AT_EMPTY_PATH)) != 0) 2433 return (EINVAL); 2434 2435 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_RESOLVE_BENEATH | 2436 AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH) | LOCKSHARED | LOCKLEAF | 2437 AUDITVNODE1, pathseg, path, fd, &cap_fstat_rights); 2438 2439 if ((error = namei(&nd)) != 0) { 2440 if (error == ENOTDIR && 2441 (nd.ni_resflags & NIRES_EMPTYPATH) != 0) 2442 error = kern_fstat(td, fd, sbp); 2443 return (error); 2444 } 2445 error = VOP_STAT(nd.ni_vp, sbp, td->td_ucred, NOCRED); 2446 NDFREE_PNBUF(&nd); 2447 vput(nd.ni_vp); 2448 #ifdef __STAT_TIME_T_EXT 2449 sbp->st_atim_ext = 0; 2450 sbp->st_mtim_ext = 0; 2451 sbp->st_ctim_ext = 0; 2452 sbp->st_btim_ext = 0; 2453 #endif 2454 #ifdef KTRACE 2455 if (KTRPOINT(td, KTR_STRUCT)) 2456 ktrstat_error(sbp, error); 2457 #endif 2458 return (error); 2459 } 2460 2461 #if defined(COMPAT_FREEBSD11) 2462 /* 2463 * Implementation of the NetBSD [l]stat() functions. 2464 */ 2465 int 2466 freebsd11_cvtnstat(struct stat *sb, struct nstat *nsb) 2467 { 2468 struct freebsd11_stat sb11; 2469 int error; 2470 2471 error = freebsd11_cvtstat(sb, &sb11); 2472 if (error != 0) 2473 return (error); 2474 2475 bzero(nsb, sizeof(*nsb)); 2476 CP(sb11, *nsb, st_dev); 2477 CP(sb11, *nsb, st_ino); 2478 CP(sb11, *nsb, st_mode); 2479 CP(sb11, *nsb, st_nlink); 2480 CP(sb11, *nsb, st_uid); 2481 CP(sb11, *nsb, st_gid); 2482 CP(sb11, *nsb, st_rdev); 2483 CP(sb11, *nsb, st_atim); 2484 CP(sb11, *nsb, st_mtim); 2485 CP(sb11, *nsb, st_ctim); 2486 CP(sb11, *nsb, st_size); 2487 CP(sb11, *nsb, st_blocks); 2488 CP(sb11, *nsb, st_blksize); 2489 CP(sb11, *nsb, st_flags); 2490 CP(sb11, *nsb, st_gen); 2491 CP(sb11, *nsb, st_birthtim); 2492 return (0); 2493 } 2494 2495 #ifndef _SYS_SYSPROTO_H_ 2496 struct freebsd11_nstat_args { 2497 char *path; 2498 struct nstat *ub; 2499 }; 2500 #endif 2501 int 2502 freebsd11_nstat(struct thread *td, struct freebsd11_nstat_args *uap) 2503 { 2504 struct stat sb; 2505 struct nstat nsb; 2506 int error; 2507 2508 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, &sb); 2509 if (error != 0) 2510 return (error); 2511 error = freebsd11_cvtnstat(&sb, &nsb); 2512 if (error == 0) 2513 error = copyout(&nsb, uap->ub, sizeof (nsb)); 2514 return (error); 2515 } 2516 2517 /* 2518 * NetBSD lstat. Get file status; this version does not follow links. 2519 */ 2520 #ifndef _SYS_SYSPROTO_H_ 2521 struct freebsd11_nlstat_args { 2522 char *path; 2523 struct nstat *ub; 2524 }; 2525 #endif 2526 int 2527 freebsd11_nlstat(struct thread *td, struct freebsd11_nlstat_args *uap) 2528 { 2529 struct stat sb; 2530 struct nstat nsb; 2531 int error; 2532 2533 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2534 UIO_USERSPACE, &sb); 2535 if (error != 0) 2536 return (error); 2537 error = freebsd11_cvtnstat(&sb, &nsb); 2538 if (error == 0) 2539 error = copyout(&nsb, uap->ub, sizeof (nsb)); 2540 return (error); 2541 } 2542 #endif /* COMPAT_FREEBSD11 */ 2543 2544 /* 2545 * Get configurable pathname variables. 2546 */ 2547 #ifndef _SYS_SYSPROTO_H_ 2548 struct pathconf_args { 2549 char *path; 2550 int name; 2551 }; 2552 #endif 2553 int 2554 sys_pathconf(struct thread *td, struct pathconf_args *uap) 2555 { 2556 long value; 2557 int error; 2558 2559 error = kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW, 2560 &value); 2561 if (error == 0) 2562 td->td_retval[0] = value; 2563 return (error); 2564 } 2565 2566 #ifndef _SYS_SYSPROTO_H_ 2567 struct lpathconf_args { 2568 char *path; 2569 int name; 2570 }; 2571 #endif 2572 int 2573 sys_lpathconf(struct thread *td, struct lpathconf_args *uap) 2574 { 2575 long value; 2576 int error; 2577 2578 error = kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, 2579 NOFOLLOW, &value); 2580 if (error == 0) 2581 td->td_retval[0] = value; 2582 return (error); 2583 } 2584 2585 int 2586 kern_pathconf(struct thread *td, const char *path, enum uio_seg pathseg, 2587 int name, u_long flags, long *valuep) 2588 { 2589 struct nameidata nd; 2590 int error; 2591 2592 NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | AUDITVNODE1 | flags, 2593 pathseg, path); 2594 if ((error = namei(&nd)) != 0) 2595 return (error); 2596 NDFREE_PNBUF(&nd); 2597 2598 error = VOP_PATHCONF(nd.ni_vp, name, valuep); 2599 vput(nd.ni_vp); 2600 return (error); 2601 } 2602 2603 /* 2604 * Return target name of a symbolic link. 2605 */ 2606 #ifndef _SYS_SYSPROTO_H_ 2607 struct readlink_args { 2608 char *path; 2609 char *buf; 2610 size_t count; 2611 }; 2612 #endif 2613 int 2614 sys_readlink(struct thread *td, struct readlink_args *uap) 2615 { 2616 2617 return (kern_readlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2618 uap->buf, UIO_USERSPACE, uap->count)); 2619 } 2620 #ifndef _SYS_SYSPROTO_H_ 2621 struct readlinkat_args { 2622 int fd; 2623 char *path; 2624 char *buf; 2625 size_t bufsize; 2626 }; 2627 #endif 2628 int 2629 sys_readlinkat(struct thread *td, struct readlinkat_args *uap) 2630 { 2631 2632 return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE, 2633 uap->buf, UIO_USERSPACE, uap->bufsize)); 2634 } 2635 2636 int 2637 kern_readlinkat(struct thread *td, int fd, const char *path, 2638 enum uio_seg pathseg, char *buf, enum uio_seg bufseg, size_t count) 2639 { 2640 struct vnode *vp; 2641 struct nameidata nd; 2642 int error; 2643 2644 if (count > IOSIZE_MAX) 2645 return (EINVAL); 2646 2647 NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1 | 2648 EMPTYPATH, pathseg, path, fd); 2649 2650 if ((error = namei(&nd)) != 0) 2651 return (error); 2652 NDFREE_PNBUF(&nd); 2653 vp = nd.ni_vp; 2654 2655 error = kern_readlink_vp(vp, buf, bufseg, count, td); 2656 vput(vp); 2657 2658 return (error); 2659 } 2660 2661 /* 2662 * Helper function to readlink from a vnode 2663 */ 2664 static int 2665 kern_readlink_vp(struct vnode *vp, char *buf, enum uio_seg bufseg, size_t count, 2666 struct thread *td) 2667 { 2668 struct iovec aiov; 2669 struct uio auio; 2670 int error; 2671 2672 ASSERT_VOP_LOCKED(vp, "kern_readlink_vp(): vp not locked"); 2673 #ifdef MAC 2674 error = mac_vnode_check_readlink(td->td_ucred, vp); 2675 if (error != 0) 2676 return (error); 2677 #endif 2678 if (vp->v_type != VLNK && (vp->v_vflag & VV_READLINK) == 0) 2679 return (EINVAL); 2680 2681 aiov.iov_base = buf; 2682 aiov.iov_len = count; 2683 auio.uio_iov = &aiov; 2684 auio.uio_iovcnt = 1; 2685 auio.uio_offset = 0; 2686 auio.uio_rw = UIO_READ; 2687 auio.uio_segflg = bufseg; 2688 auio.uio_td = td; 2689 auio.uio_resid = count; 2690 error = VOP_READLINK(vp, &auio, td->td_ucred); 2691 td->td_retval[0] = count - auio.uio_resid; 2692 return (error); 2693 } 2694 2695 /* 2696 * Common implementation code for chflags() and fchflags(). 2697 */ 2698 static int 2699 setfflags(struct thread *td, struct vnode *vp, u_long flags) 2700 { 2701 struct mount *mp; 2702 struct vattr vattr; 2703 int error; 2704 2705 /* We can't support the value matching VNOVAL. */ 2706 if (flags == VNOVAL) 2707 return (EOPNOTSUPP); 2708 2709 /* 2710 * Prevent non-root users from setting flags on devices. When 2711 * a device is reused, users can retain ownership of the device 2712 * if they are allowed to set flags and programs assume that 2713 * chown can't fail when done as root. 2714 */ 2715 if (vp->v_type == VCHR || vp->v_type == VBLK) { 2716 error = priv_check(td, PRIV_VFS_CHFLAGS_DEV); 2717 if (error != 0) 2718 return (error); 2719 } 2720 2721 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) 2722 return (error); 2723 VATTR_NULL(&vattr); 2724 vattr.va_flags = flags; 2725 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2726 #ifdef MAC 2727 error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags); 2728 if (error == 0) 2729 #endif 2730 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 2731 VOP_UNLOCK(vp); 2732 vn_finished_write(mp); 2733 return (error); 2734 } 2735 2736 /* 2737 * Change flags of a file given a path name. 2738 */ 2739 #ifndef _SYS_SYSPROTO_H_ 2740 struct chflags_args { 2741 const char *path; 2742 u_long flags; 2743 }; 2744 #endif 2745 int 2746 sys_chflags(struct thread *td, struct chflags_args *uap) 2747 { 2748 2749 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2750 uap->flags, 0)); 2751 } 2752 2753 #ifndef _SYS_SYSPROTO_H_ 2754 struct chflagsat_args { 2755 int fd; 2756 const char *path; 2757 u_long flags; 2758 int atflag; 2759 } 2760 #endif 2761 int 2762 sys_chflagsat(struct thread *td, struct chflagsat_args *uap) 2763 { 2764 2765 return (kern_chflagsat(td, uap->fd, uap->path, UIO_USERSPACE, 2766 uap->flags, uap->atflag)); 2767 } 2768 2769 /* 2770 * Same as chflags() but doesn't follow symlinks. 2771 */ 2772 #ifndef _SYS_SYSPROTO_H_ 2773 struct lchflags_args { 2774 const char *path; 2775 u_long flags; 2776 }; 2777 #endif 2778 int 2779 sys_lchflags(struct thread *td, struct lchflags_args *uap) 2780 { 2781 2782 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2783 uap->flags, AT_SYMLINK_NOFOLLOW)); 2784 } 2785 2786 static int 2787 kern_chflagsat(struct thread *td, int fd, const char *path, 2788 enum uio_seg pathseg, u_long flags, int atflag) 2789 { 2790 struct nameidata nd; 2791 int error; 2792 2793 if ((atflag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | 2794 AT_EMPTY_PATH)) != 0) 2795 return (EINVAL); 2796 2797 AUDIT_ARG_FFLAGS(flags); 2798 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(atflag, AT_SYMLINK_NOFOLLOW | 2799 AT_RESOLVE_BENEATH | AT_EMPTY_PATH) | AUDITVNODE1, pathseg, path, 2800 fd, &cap_fchflags_rights); 2801 if ((error = namei(&nd)) != 0) 2802 return (error); 2803 NDFREE_PNBUF(&nd); 2804 error = setfflags(td, nd.ni_vp, flags); 2805 vrele(nd.ni_vp); 2806 return (error); 2807 } 2808 2809 /* 2810 * Change flags of a file given a file descriptor. 2811 */ 2812 #ifndef _SYS_SYSPROTO_H_ 2813 struct fchflags_args { 2814 int fd; 2815 u_long flags; 2816 }; 2817 #endif 2818 int 2819 sys_fchflags(struct thread *td, struct fchflags_args *uap) 2820 { 2821 struct file *fp; 2822 int error; 2823 2824 AUDIT_ARG_FD(uap->fd); 2825 AUDIT_ARG_FFLAGS(uap->flags); 2826 error = getvnode(td, uap->fd, &cap_fchflags_rights, 2827 &fp); 2828 if (error != 0) 2829 return (error); 2830 #ifdef AUDIT 2831 if (AUDITING_TD(td)) { 2832 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 2833 AUDIT_ARG_VNODE1(fp->f_vnode); 2834 VOP_UNLOCK(fp->f_vnode); 2835 } 2836 #endif 2837 error = setfflags(td, fp->f_vnode, uap->flags); 2838 fdrop(fp, td); 2839 return (error); 2840 } 2841 2842 /* 2843 * Common implementation code for chmod(), lchmod() and fchmod(). 2844 */ 2845 int 2846 setfmode(struct thread *td, struct ucred *cred, struct vnode *vp, int mode) 2847 { 2848 struct mount *mp; 2849 struct vattr vattr; 2850 int error; 2851 2852 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) 2853 return (error); 2854 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2855 VATTR_NULL(&vattr); 2856 vattr.va_mode = mode & ALLPERMS; 2857 #ifdef MAC 2858 error = mac_vnode_check_setmode(cred, vp, vattr.va_mode); 2859 if (error == 0) 2860 #endif 2861 error = VOP_SETATTR(vp, &vattr, cred); 2862 VOP_UNLOCK(vp); 2863 vn_finished_write(mp); 2864 return (error); 2865 } 2866 2867 /* 2868 * Change mode of a file given path name. 2869 */ 2870 #ifndef _SYS_SYSPROTO_H_ 2871 struct chmod_args { 2872 char *path; 2873 int mode; 2874 }; 2875 #endif 2876 int 2877 sys_chmod(struct thread *td, struct chmod_args *uap) 2878 { 2879 2880 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2881 uap->mode, 0)); 2882 } 2883 2884 #ifndef _SYS_SYSPROTO_H_ 2885 struct fchmodat_args { 2886 int dirfd; 2887 char *path; 2888 mode_t mode; 2889 int flag; 2890 } 2891 #endif 2892 int 2893 sys_fchmodat(struct thread *td, struct fchmodat_args *uap) 2894 { 2895 2896 return (kern_fchmodat(td, uap->fd, uap->path, UIO_USERSPACE, 2897 uap->mode, uap->flag)); 2898 } 2899 2900 /* 2901 * Change mode of a file given path name (don't follow links.) 2902 */ 2903 #ifndef _SYS_SYSPROTO_H_ 2904 struct lchmod_args { 2905 char *path; 2906 int mode; 2907 }; 2908 #endif 2909 int 2910 sys_lchmod(struct thread *td, struct lchmod_args *uap) 2911 { 2912 2913 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2914 uap->mode, AT_SYMLINK_NOFOLLOW)); 2915 } 2916 2917 int 2918 kern_fchmodat(struct thread *td, int fd, const char *path, 2919 enum uio_seg pathseg, mode_t mode, int flag) 2920 { 2921 struct nameidata nd; 2922 int error; 2923 2924 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | 2925 AT_EMPTY_PATH)) != 0) 2926 return (EINVAL); 2927 2928 AUDIT_ARG_MODE(mode); 2929 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_SYMLINK_NOFOLLOW | 2930 AT_RESOLVE_BENEATH | AT_EMPTY_PATH) | AUDITVNODE1, pathseg, path, 2931 fd, &cap_fchmod_rights); 2932 if ((error = namei(&nd)) != 0) 2933 return (error); 2934 NDFREE_PNBUF(&nd); 2935 error = setfmode(td, td->td_ucred, nd.ni_vp, mode); 2936 vrele(nd.ni_vp); 2937 return (error); 2938 } 2939 2940 /* 2941 * Change mode of a file given a file descriptor. 2942 */ 2943 #ifndef _SYS_SYSPROTO_H_ 2944 struct fchmod_args { 2945 int fd; 2946 int mode; 2947 }; 2948 #endif 2949 int 2950 sys_fchmod(struct thread *td, struct fchmod_args *uap) 2951 { 2952 struct file *fp; 2953 int error; 2954 2955 AUDIT_ARG_FD(uap->fd); 2956 AUDIT_ARG_MODE(uap->mode); 2957 2958 error = fget(td, uap->fd, &cap_fchmod_rights, &fp); 2959 if (error != 0) 2960 return (error); 2961 error = fo_chmod(fp, uap->mode, td->td_ucred, td); 2962 fdrop(fp, td); 2963 return (error); 2964 } 2965 2966 /* 2967 * Common implementation for chown(), lchown(), and fchown() 2968 */ 2969 int 2970 setfown(struct thread *td, struct ucred *cred, struct vnode *vp, uid_t uid, 2971 gid_t gid) 2972 { 2973 struct mount *mp; 2974 struct vattr vattr; 2975 int error; 2976 2977 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) 2978 return (error); 2979 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2980 VATTR_NULL(&vattr); 2981 vattr.va_uid = uid; 2982 vattr.va_gid = gid; 2983 #ifdef MAC 2984 error = mac_vnode_check_setowner(cred, vp, vattr.va_uid, 2985 vattr.va_gid); 2986 if (error == 0) 2987 #endif 2988 error = VOP_SETATTR(vp, &vattr, cred); 2989 VOP_UNLOCK(vp); 2990 vn_finished_write(mp); 2991 return (error); 2992 } 2993 2994 /* 2995 * Set ownership given a path name. 2996 */ 2997 #ifndef _SYS_SYSPROTO_H_ 2998 struct chown_args { 2999 char *path; 3000 int uid; 3001 int gid; 3002 }; 3003 #endif 3004 int 3005 sys_chown(struct thread *td, struct chown_args *uap) 3006 { 3007 3008 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->uid, 3009 uap->gid, 0)); 3010 } 3011 3012 #ifndef _SYS_SYSPROTO_H_ 3013 struct fchownat_args { 3014 int fd; 3015 const char * path; 3016 uid_t uid; 3017 gid_t gid; 3018 int flag; 3019 }; 3020 #endif 3021 int 3022 sys_fchownat(struct thread *td, struct fchownat_args *uap) 3023 { 3024 3025 return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid, 3026 uap->gid, uap->flag)); 3027 } 3028 3029 int 3030 kern_fchownat(struct thread *td, int fd, const char *path, 3031 enum uio_seg pathseg, int uid, int gid, int flag) 3032 { 3033 struct nameidata nd; 3034 int error; 3035 3036 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | 3037 AT_EMPTY_PATH)) != 0) 3038 return (EINVAL); 3039 3040 AUDIT_ARG_OWNER(uid, gid); 3041 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_SYMLINK_NOFOLLOW | 3042 AT_RESOLVE_BENEATH | AT_EMPTY_PATH) | AUDITVNODE1, pathseg, path, 3043 fd, &cap_fchown_rights); 3044 3045 if ((error = namei(&nd)) != 0) 3046 return (error); 3047 NDFREE_PNBUF(&nd); 3048 error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid); 3049 vrele(nd.ni_vp); 3050 return (error); 3051 } 3052 3053 /* 3054 * Set ownership given a path name, do not cross symlinks. 3055 */ 3056 #ifndef _SYS_SYSPROTO_H_ 3057 struct lchown_args { 3058 char *path; 3059 int uid; 3060 int gid; 3061 }; 3062 #endif 3063 int 3064 sys_lchown(struct thread *td, struct lchown_args *uap) 3065 { 3066 3067 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3068 uap->uid, uap->gid, AT_SYMLINK_NOFOLLOW)); 3069 } 3070 3071 /* 3072 * Set ownership given a file descriptor. 3073 */ 3074 #ifndef _SYS_SYSPROTO_H_ 3075 struct fchown_args { 3076 int fd; 3077 int uid; 3078 int gid; 3079 }; 3080 #endif 3081 int 3082 sys_fchown(struct thread *td, struct fchown_args *uap) 3083 { 3084 struct file *fp; 3085 int error; 3086 3087 AUDIT_ARG_FD(uap->fd); 3088 AUDIT_ARG_OWNER(uap->uid, uap->gid); 3089 error = fget(td, uap->fd, &cap_fchown_rights, &fp); 3090 if (error != 0) 3091 return (error); 3092 error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td); 3093 fdrop(fp, td); 3094 return (error); 3095 } 3096 3097 /* 3098 * Common implementation code for utimes(), lutimes(), and futimes(). 3099 */ 3100 static int 3101 getutimes(const struct timeval *usrtvp, enum uio_seg tvpseg, 3102 struct timespec *tsp) 3103 { 3104 struct timeval tv[2]; 3105 const struct timeval *tvp; 3106 int error; 3107 3108 if (usrtvp == NULL) { 3109 vfs_timestamp(&tsp[0]); 3110 tsp[1] = tsp[0]; 3111 } else { 3112 if (tvpseg == UIO_SYSSPACE) { 3113 tvp = usrtvp; 3114 } else { 3115 if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0) 3116 return (error); 3117 tvp = tv; 3118 } 3119 3120 if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 || 3121 tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000) 3122 return (EINVAL); 3123 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 3124 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 3125 } 3126 return (0); 3127 } 3128 3129 /* 3130 * Common implementation code for futimens(), utimensat(). 3131 */ 3132 #define UTIMENS_NULL 0x1 3133 #define UTIMENS_EXIT 0x2 3134 static int 3135 getutimens(const struct timespec *usrtsp, enum uio_seg tspseg, 3136 struct timespec *tsp, int *retflags) 3137 { 3138 struct timespec tsnow; 3139 int error; 3140 3141 vfs_timestamp(&tsnow); 3142 *retflags = 0; 3143 if (usrtsp == NULL) { 3144 tsp[0] = tsnow; 3145 tsp[1] = tsnow; 3146 *retflags |= UTIMENS_NULL; 3147 return (0); 3148 } 3149 if (tspseg == UIO_SYSSPACE) { 3150 tsp[0] = usrtsp[0]; 3151 tsp[1] = usrtsp[1]; 3152 } else if ((error = copyin(usrtsp, tsp, sizeof(*tsp) * 2)) != 0) 3153 return (error); 3154 if (tsp[0].tv_nsec == UTIME_OMIT && tsp[1].tv_nsec == UTIME_OMIT) 3155 *retflags |= UTIMENS_EXIT; 3156 if (tsp[0].tv_nsec == UTIME_NOW && tsp[1].tv_nsec == UTIME_NOW) 3157 *retflags |= UTIMENS_NULL; 3158 if (tsp[0].tv_nsec == UTIME_OMIT) 3159 tsp[0].tv_sec = VNOVAL; 3160 else if (tsp[0].tv_nsec == UTIME_NOW) 3161 tsp[0] = tsnow; 3162 else if (tsp[0].tv_nsec < 0 || tsp[0].tv_nsec >= 1000000000L) 3163 return (EINVAL); 3164 if (tsp[1].tv_nsec == UTIME_OMIT) 3165 tsp[1].tv_sec = VNOVAL; 3166 else if (tsp[1].tv_nsec == UTIME_NOW) 3167 tsp[1] = tsnow; 3168 else if (tsp[1].tv_nsec < 0 || tsp[1].tv_nsec >= 1000000000L) 3169 return (EINVAL); 3170 3171 return (0); 3172 } 3173 3174 /* 3175 * Common implementation code for utimes(), lutimes(), futimes(), futimens(), 3176 * and utimensat(). 3177 */ 3178 static int 3179 setutimes(struct thread *td, struct vnode *vp, const struct timespec *ts, 3180 int numtimes, int nullflag) 3181 { 3182 struct mount *mp; 3183 struct vattr vattr; 3184 int error; 3185 bool setbirthtime; 3186 3187 setbirthtime = false; 3188 vattr.va_birthtime.tv_sec = VNOVAL; 3189 vattr.va_birthtime.tv_nsec = 0; 3190 3191 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) 3192 return (error); 3193 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3194 if (numtimes < 3 && VOP_GETATTR(vp, &vattr, td->td_ucred) == 0 && 3195 timespeccmp(&ts[1], &vattr.va_birthtime, < )) 3196 setbirthtime = true; 3197 VATTR_NULL(&vattr); 3198 vattr.va_atime = ts[0]; 3199 vattr.va_mtime = ts[1]; 3200 if (setbirthtime) 3201 vattr.va_birthtime = ts[1]; 3202 if (numtimes > 2) 3203 vattr.va_birthtime = ts[2]; 3204 if (nullflag) 3205 vattr.va_vaflags |= VA_UTIMES_NULL; 3206 #ifdef MAC 3207 error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime, 3208 vattr.va_mtime); 3209 #endif 3210 if (error == 0) 3211 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3212 VOP_UNLOCK(vp); 3213 vn_finished_write(mp); 3214 return (error); 3215 } 3216 3217 /* 3218 * Set the access and modification times of a file. 3219 */ 3220 #ifndef _SYS_SYSPROTO_H_ 3221 struct utimes_args { 3222 char *path; 3223 struct timeval *tptr; 3224 }; 3225 #endif 3226 int 3227 sys_utimes(struct thread *td, struct utimes_args *uap) 3228 { 3229 3230 return (kern_utimesat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3231 uap->tptr, UIO_USERSPACE)); 3232 } 3233 3234 #ifndef _SYS_SYSPROTO_H_ 3235 struct futimesat_args { 3236 int fd; 3237 const char * path; 3238 const struct timeval * times; 3239 }; 3240 #endif 3241 int 3242 sys_futimesat(struct thread *td, struct futimesat_args *uap) 3243 { 3244 3245 return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE, 3246 uap->times, UIO_USERSPACE)); 3247 } 3248 3249 int 3250 kern_utimesat(struct thread *td, int fd, const char *path, 3251 enum uio_seg pathseg, const struct timeval *tptr, enum uio_seg tptrseg) 3252 { 3253 struct nameidata nd; 3254 struct timespec ts[2]; 3255 int error; 3256 3257 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3258 return (error); 3259 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 3260 &cap_futimes_rights); 3261 3262 if ((error = namei(&nd)) != 0) 3263 return (error); 3264 NDFREE_PNBUF(&nd); 3265 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3266 vrele(nd.ni_vp); 3267 return (error); 3268 } 3269 3270 /* 3271 * Set the access and modification times of a file. 3272 */ 3273 #ifndef _SYS_SYSPROTO_H_ 3274 struct lutimes_args { 3275 char *path; 3276 struct timeval *tptr; 3277 }; 3278 #endif 3279 int 3280 sys_lutimes(struct thread *td, struct lutimes_args *uap) 3281 { 3282 3283 return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr, 3284 UIO_USERSPACE)); 3285 } 3286 3287 int 3288 kern_lutimes(struct thread *td, const char *path, enum uio_seg pathseg, 3289 const struct timeval *tptr, enum uio_seg tptrseg) 3290 { 3291 struct timespec ts[2]; 3292 struct nameidata nd; 3293 int error; 3294 3295 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3296 return (error); 3297 NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, pathseg, path); 3298 if ((error = namei(&nd)) != 0) 3299 return (error); 3300 NDFREE_PNBUF(&nd); 3301 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3302 vrele(nd.ni_vp); 3303 return (error); 3304 } 3305 3306 /* 3307 * Set the access and modification times of a file. 3308 */ 3309 #ifndef _SYS_SYSPROTO_H_ 3310 struct futimes_args { 3311 int fd; 3312 struct timeval *tptr; 3313 }; 3314 #endif 3315 int 3316 sys_futimes(struct thread *td, struct futimes_args *uap) 3317 { 3318 3319 return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE)); 3320 } 3321 3322 int 3323 kern_futimes(struct thread *td, int fd, const struct timeval *tptr, 3324 enum uio_seg tptrseg) 3325 { 3326 struct timespec ts[2]; 3327 struct file *fp; 3328 int error; 3329 3330 AUDIT_ARG_FD(fd); 3331 error = getutimes(tptr, tptrseg, ts); 3332 if (error != 0) 3333 return (error); 3334 error = getvnode(td, fd, &cap_futimes_rights, &fp); 3335 if (error != 0) 3336 return (error); 3337 #ifdef AUDIT 3338 if (AUDITING_TD(td)) { 3339 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3340 AUDIT_ARG_VNODE1(fp->f_vnode); 3341 VOP_UNLOCK(fp->f_vnode); 3342 } 3343 #endif 3344 error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL); 3345 fdrop(fp, td); 3346 return (error); 3347 } 3348 3349 int 3350 sys_futimens(struct thread *td, struct futimens_args *uap) 3351 { 3352 3353 return (kern_futimens(td, uap->fd, uap->times, UIO_USERSPACE)); 3354 } 3355 3356 int 3357 kern_futimens(struct thread *td, int fd, const struct timespec *tptr, 3358 enum uio_seg tptrseg) 3359 { 3360 struct timespec ts[2]; 3361 struct file *fp; 3362 int error, flags; 3363 3364 AUDIT_ARG_FD(fd); 3365 error = getutimens(tptr, tptrseg, ts, &flags); 3366 if (error != 0) 3367 return (error); 3368 if (flags & UTIMENS_EXIT) 3369 return (0); 3370 error = getvnode(td, fd, &cap_futimes_rights, &fp); 3371 if (error != 0) 3372 return (error); 3373 #ifdef AUDIT 3374 if (AUDITING_TD(td)) { 3375 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3376 AUDIT_ARG_VNODE1(fp->f_vnode); 3377 VOP_UNLOCK(fp->f_vnode); 3378 } 3379 #endif 3380 error = setutimes(td, fp->f_vnode, ts, 2, flags & UTIMENS_NULL); 3381 fdrop(fp, td); 3382 return (error); 3383 } 3384 3385 int 3386 sys_utimensat(struct thread *td, struct utimensat_args *uap) 3387 { 3388 3389 return (kern_utimensat(td, uap->fd, uap->path, UIO_USERSPACE, 3390 uap->times, UIO_USERSPACE, uap->flag)); 3391 } 3392 3393 int 3394 kern_utimensat(struct thread *td, int fd, const char *path, 3395 enum uio_seg pathseg, const struct timespec *tptr, enum uio_seg tptrseg, 3396 int flag) 3397 { 3398 struct nameidata nd; 3399 struct timespec ts[2]; 3400 int error, flags; 3401 3402 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | 3403 AT_EMPTY_PATH)) != 0) 3404 return (EINVAL); 3405 3406 if ((error = getutimens(tptr, tptrseg, ts, &flags)) != 0) 3407 return (error); 3408 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_SYMLINK_NOFOLLOW | 3409 AT_RESOLVE_BENEATH | AT_EMPTY_PATH) | AUDITVNODE1, 3410 pathseg, path, fd, &cap_futimes_rights); 3411 if ((error = namei(&nd)) != 0) 3412 return (error); 3413 /* 3414 * We are allowed to call namei() regardless of 2xUTIME_OMIT. 3415 * POSIX states: 3416 * "If both tv_nsec fields are UTIME_OMIT... EACCESS may be detected." 3417 * "Search permission is denied by a component of the path prefix." 3418 */ 3419 NDFREE_PNBUF(&nd); 3420 if ((flags & UTIMENS_EXIT) == 0) 3421 error = setutimes(td, nd.ni_vp, ts, 2, flags & UTIMENS_NULL); 3422 vrele(nd.ni_vp); 3423 return (error); 3424 } 3425 3426 /* 3427 * Truncate a file given its path name. 3428 */ 3429 #ifndef _SYS_SYSPROTO_H_ 3430 struct truncate_args { 3431 char *path; 3432 int pad; 3433 off_t length; 3434 }; 3435 #endif 3436 int 3437 sys_truncate(struct thread *td, struct truncate_args *uap) 3438 { 3439 3440 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3441 } 3442 3443 int 3444 kern_truncate(struct thread *td, const char *path, enum uio_seg pathseg, 3445 off_t length) 3446 { 3447 struct mount *mp; 3448 struct vnode *vp; 3449 void *rl_cookie; 3450 struct nameidata nd; 3451 int error; 3452 3453 if (length < 0) 3454 return (EINVAL); 3455 NDPREINIT(&nd); 3456 retry: 3457 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path); 3458 if ((error = namei(&nd)) != 0) 3459 return (error); 3460 vp = nd.ni_vp; 3461 NDFREE_PNBUF(&nd); 3462 rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); 3463 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) { 3464 vn_rangelock_unlock(vp, rl_cookie); 3465 vrele(vp); 3466 return (error); 3467 } 3468 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3469 if (vp->v_type == VDIR) { 3470 error = EISDIR; 3471 goto out; 3472 } 3473 #ifdef MAC 3474 error = mac_vnode_check_write(td->td_ucred, NOCRED, vp); 3475 if (error != 0) 3476 goto out; 3477 #endif 3478 error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td); 3479 if (error != 0) 3480 goto out; 3481 3482 error = vn_truncate_locked(vp, length, false, td->td_ucred); 3483 out: 3484 VOP_UNLOCK(vp); 3485 vn_finished_write(mp); 3486 vn_rangelock_unlock(vp, rl_cookie); 3487 vrele(vp); 3488 if (error == ERELOOKUP) 3489 goto retry; 3490 return (error); 3491 } 3492 3493 #if defined(COMPAT_43) 3494 /* 3495 * Truncate a file given its path name. 3496 */ 3497 #ifndef _SYS_SYSPROTO_H_ 3498 struct otruncate_args { 3499 char *path; 3500 long length; 3501 }; 3502 #endif 3503 int 3504 otruncate(struct thread *td, struct otruncate_args *uap) 3505 { 3506 3507 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3508 } 3509 #endif /* COMPAT_43 */ 3510 3511 #if defined(COMPAT_FREEBSD6) 3512 /* Versions with the pad argument */ 3513 int 3514 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap) 3515 { 3516 3517 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3518 } 3519 3520 int 3521 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap) 3522 { 3523 3524 return (kern_ftruncate(td, uap->fd, uap->length)); 3525 } 3526 #endif 3527 3528 int 3529 kern_fsync(struct thread *td, int fd, bool fullsync) 3530 { 3531 struct vnode *vp; 3532 struct mount *mp; 3533 struct file *fp; 3534 int error; 3535 3536 AUDIT_ARG_FD(fd); 3537 error = getvnode(td, fd, &cap_fsync_rights, &fp); 3538 if (error != 0) 3539 return (error); 3540 vp = fp->f_vnode; 3541 #if 0 3542 if (!fullsync) 3543 /* XXXKIB: compete outstanding aio writes */; 3544 #endif 3545 retry: 3546 error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH); 3547 if (error != 0) 3548 goto drop; 3549 vn_lock(vp, vn_lktype_write(mp, vp) | LK_RETRY); 3550 AUDIT_ARG_VNODE1(vp); 3551 if (vp->v_object != NULL) { 3552 VM_OBJECT_WLOCK(vp->v_object); 3553 vm_object_page_clean(vp->v_object, 0, 0, 0); 3554 VM_OBJECT_WUNLOCK(vp->v_object); 3555 } 3556 error = fullsync ? VOP_FSYNC(vp, MNT_WAIT, td) : VOP_FDATASYNC(vp, td); 3557 VOP_UNLOCK(vp); 3558 vn_finished_write(mp); 3559 if (error == ERELOOKUP) 3560 goto retry; 3561 drop: 3562 fdrop(fp, td); 3563 return (error); 3564 } 3565 3566 /* 3567 * Sync an open file. 3568 */ 3569 #ifndef _SYS_SYSPROTO_H_ 3570 struct fsync_args { 3571 int fd; 3572 }; 3573 #endif 3574 int 3575 sys_fsync(struct thread *td, struct fsync_args *uap) 3576 { 3577 3578 return (kern_fsync(td, uap->fd, true)); 3579 } 3580 3581 int 3582 sys_fdatasync(struct thread *td, struct fdatasync_args *uap) 3583 { 3584 3585 return (kern_fsync(td, uap->fd, false)); 3586 } 3587 3588 /* 3589 * Rename files. Source and destination must either both be directories, or 3590 * both not be directories. If target is a directory, it must be empty. 3591 */ 3592 #ifndef _SYS_SYSPROTO_H_ 3593 struct rename_args { 3594 char *from; 3595 char *to; 3596 }; 3597 #endif 3598 int 3599 sys_rename(struct thread *td, struct rename_args *uap) 3600 { 3601 3602 return (kern_renameat(td, AT_FDCWD, uap->from, AT_FDCWD, 3603 uap->to, UIO_USERSPACE)); 3604 } 3605 3606 #ifndef _SYS_SYSPROTO_H_ 3607 struct renameat_args { 3608 int oldfd; 3609 char *old; 3610 int newfd; 3611 char *new; 3612 }; 3613 #endif 3614 int 3615 sys_renameat(struct thread *td, struct renameat_args *uap) 3616 { 3617 3618 return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new, 3619 UIO_USERSPACE)); 3620 } 3621 3622 #ifdef MAC 3623 static int 3624 kern_renameat_mac(struct thread *td, int oldfd, const char *old, int newfd, 3625 const char *new, enum uio_seg pathseg, struct nameidata *fromnd) 3626 { 3627 int error; 3628 3629 NDINIT_ATRIGHTS(fromnd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 3630 pathseg, old, oldfd, &cap_renameat_source_rights); 3631 if ((error = namei(fromnd)) != 0) 3632 return (error); 3633 error = mac_vnode_check_rename_from(td->td_ucred, fromnd->ni_dvp, 3634 fromnd->ni_vp, &fromnd->ni_cnd); 3635 VOP_UNLOCK(fromnd->ni_dvp); 3636 if (fromnd->ni_dvp != fromnd->ni_vp) 3637 VOP_UNLOCK(fromnd->ni_vp); 3638 if (error != 0) { 3639 NDFREE_PNBUF(fromnd); 3640 vrele(fromnd->ni_dvp); 3641 vrele(fromnd->ni_vp); 3642 } 3643 return (error); 3644 } 3645 #endif 3646 3647 int 3648 kern_renameat(struct thread *td, int oldfd, const char *old, int newfd, 3649 const char *new, enum uio_seg pathseg) 3650 { 3651 struct mount *mp = NULL; 3652 struct vnode *tvp, *fvp, *tdvp; 3653 struct nameidata fromnd, tond; 3654 uint64_t tondflags; 3655 int error; 3656 3657 again: 3658 bwillwrite(); 3659 #ifdef MAC 3660 if (mac_vnode_check_rename_from_enabled()) { 3661 error = kern_renameat_mac(td, oldfd, old, newfd, new, pathseg, 3662 &fromnd); 3663 if (error != 0) 3664 return (error); 3665 } else { 3666 #endif 3667 NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | AUDITVNODE1, 3668 pathseg, old, oldfd, &cap_renameat_source_rights); 3669 if ((error = namei(&fromnd)) != 0) 3670 return (error); 3671 #ifdef MAC 3672 } 3673 #endif 3674 fvp = fromnd.ni_vp; 3675 tondflags = LOCKPARENT | LOCKLEAF | NOCACHE | AUDITVNODE2; 3676 if (fromnd.ni_vp->v_type == VDIR) 3677 tondflags |= WILLBEDIR; 3678 NDINIT_ATRIGHTS(&tond, RENAME, tondflags, pathseg, new, newfd, 3679 &cap_renameat_target_rights); 3680 if ((error = namei(&tond)) != 0) { 3681 /* Translate error code for rename("dir1", "dir2/."). */ 3682 if (error == EISDIR && fvp->v_type == VDIR) 3683 error = EINVAL; 3684 NDFREE_PNBUF(&fromnd); 3685 vrele(fromnd.ni_dvp); 3686 vrele(fvp); 3687 goto out1; 3688 } 3689 tdvp = tond.ni_dvp; 3690 tvp = tond.ni_vp; 3691 error = vn_start_write(fvp, &mp, V_NOWAIT); 3692 if (error != 0) { 3693 NDFREE_PNBUF(&fromnd); 3694 NDFREE_PNBUF(&tond); 3695 if (tvp != NULL) 3696 vput(tvp); 3697 if (tdvp == tvp) 3698 vrele(tdvp); 3699 else 3700 vput(tdvp); 3701 vrele(fromnd.ni_dvp); 3702 vrele(fvp); 3703 error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH); 3704 if (error != 0) 3705 return (error); 3706 goto again; 3707 } 3708 if (tvp != NULL) { 3709 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3710 error = ENOTDIR; 3711 goto out; 3712 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3713 error = EISDIR; 3714 goto out; 3715 } 3716 #ifdef CAPABILITIES 3717 if (newfd != AT_FDCWD && (tond.ni_resflags & NIRES_ABS) == 0) { 3718 /* 3719 * If the target already exists we require CAP_UNLINKAT 3720 * from 'newfd', when newfd was used for the lookup. 3721 */ 3722 error = cap_check(&tond.ni_filecaps.fc_rights, 3723 &cap_unlinkat_rights); 3724 if (error != 0) 3725 goto out; 3726 } 3727 #endif 3728 } 3729 if (fvp == tdvp) { 3730 error = EINVAL; 3731 goto out; 3732 } 3733 /* 3734 * If the source is the same as the destination (that is, if they 3735 * are links to the same vnode), then there is nothing to do. 3736 */ 3737 if (fvp == tvp) 3738 error = ERESTART; 3739 #ifdef MAC 3740 else 3741 error = mac_vnode_check_rename_to(td->td_ucred, tdvp, 3742 tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd); 3743 #endif 3744 out: 3745 if (error == 0) { 3746 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3747 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); 3748 NDFREE_PNBUF(&fromnd); 3749 NDFREE_PNBUF(&tond); 3750 } else { 3751 NDFREE_PNBUF(&fromnd); 3752 NDFREE_PNBUF(&tond); 3753 if (tvp != NULL) 3754 vput(tvp); 3755 if (tdvp == tvp) 3756 vrele(tdvp); 3757 else 3758 vput(tdvp); 3759 vrele(fromnd.ni_dvp); 3760 vrele(fvp); 3761 } 3762 vn_finished_write(mp); 3763 out1: 3764 if (error == ERESTART) 3765 return (0); 3766 if (error == ERELOOKUP) 3767 goto again; 3768 return (error); 3769 } 3770 3771 /* 3772 * Make a directory file. 3773 */ 3774 #ifndef _SYS_SYSPROTO_H_ 3775 struct mkdir_args { 3776 char *path; 3777 int mode; 3778 }; 3779 #endif 3780 int 3781 sys_mkdir(struct thread *td, struct mkdir_args *uap) 3782 { 3783 3784 return (kern_mkdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3785 uap->mode)); 3786 } 3787 3788 #ifndef _SYS_SYSPROTO_H_ 3789 struct mkdirat_args { 3790 int fd; 3791 char *path; 3792 mode_t mode; 3793 }; 3794 #endif 3795 int 3796 sys_mkdirat(struct thread *td, struct mkdirat_args *uap) 3797 { 3798 3799 return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode)); 3800 } 3801 3802 int 3803 kern_mkdirat(struct thread *td, int fd, const char *path, enum uio_seg segflg, 3804 int mode) 3805 { 3806 struct mount *mp; 3807 struct vattr vattr; 3808 struct nameidata nd; 3809 int error; 3810 3811 AUDIT_ARG_MODE(mode); 3812 NDPREINIT(&nd); 3813 restart: 3814 bwillwrite(); 3815 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | AUDITVNODE1 | 3816 NC_NOMAKEENTRY | NC_KEEPPOSENTRY | FAILIFEXISTS | WILLBEDIR, 3817 segflg, path, fd, &cap_mkdirat_rights); 3818 if ((error = namei(&nd)) != 0) 3819 return (error); 3820 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3821 NDFREE_PNBUF(&nd); 3822 vput(nd.ni_dvp); 3823 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH)) != 0) 3824 return (error); 3825 goto restart; 3826 } 3827 VATTR_NULL(&vattr); 3828 vattr.va_type = VDIR; 3829 vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_pd->pd_cmask; 3830 #ifdef MAC 3831 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 3832 &vattr); 3833 if (error != 0) 3834 goto out; 3835 #endif 3836 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 3837 #ifdef MAC 3838 out: 3839 #endif 3840 NDFREE_PNBUF(&nd); 3841 VOP_VPUT_PAIR(nd.ni_dvp, error == 0 ? &nd.ni_vp : NULL, true); 3842 vn_finished_write(mp); 3843 if (error == ERELOOKUP) 3844 goto restart; 3845 return (error); 3846 } 3847 3848 /* 3849 * Remove a directory file. 3850 */ 3851 #ifndef _SYS_SYSPROTO_H_ 3852 struct rmdir_args { 3853 char *path; 3854 }; 3855 #endif 3856 int 3857 sys_rmdir(struct thread *td, struct rmdir_args *uap) 3858 { 3859 3860 return (kern_frmdirat(td, AT_FDCWD, uap->path, FD_NONE, UIO_USERSPACE, 3861 0)); 3862 } 3863 3864 int 3865 kern_frmdirat(struct thread *td, int dfd, const char *path, int fd, 3866 enum uio_seg pathseg, int flag) 3867 { 3868 struct mount *mp; 3869 struct vnode *vp; 3870 struct file *fp; 3871 struct nameidata nd; 3872 cap_rights_t rights; 3873 int error; 3874 3875 fp = NULL; 3876 if (fd != FD_NONE) { 3877 error = getvnode(td, fd, cap_rights_init_one(&rights, 3878 CAP_LOOKUP), &fp); 3879 if (error != 0) 3880 return (error); 3881 } 3882 3883 NDPREINIT(&nd); 3884 restart: 3885 bwillwrite(); 3886 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1 | 3887 at2cnpflags(flag, AT_RESOLVE_BENEATH), 3888 pathseg, path, dfd, &cap_unlinkat_rights); 3889 if ((error = namei(&nd)) != 0) 3890 goto fdout; 3891 vp = nd.ni_vp; 3892 if (vp->v_type != VDIR) { 3893 error = ENOTDIR; 3894 goto out; 3895 } 3896 /* 3897 * No rmdir "." please. 3898 */ 3899 if (nd.ni_dvp == vp) { 3900 error = EINVAL; 3901 goto out; 3902 } 3903 /* 3904 * The root of a mounted filesystem cannot be deleted. 3905 */ 3906 if (vp->v_vflag & VV_ROOT) { 3907 error = EBUSY; 3908 goto out; 3909 } 3910 3911 if (fp != NULL && fp->f_vnode != vp) { 3912 if (VN_IS_DOOMED(fp->f_vnode)) 3913 error = EBADF; 3914 else 3915 error = EDEADLK; 3916 goto out; 3917 } 3918 3919 #ifdef MAC 3920 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 3921 &nd.ni_cnd); 3922 if (error != 0) 3923 goto out; 3924 #endif 3925 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3926 NDFREE_PNBUF(&nd); 3927 vput(vp); 3928 if (nd.ni_dvp == vp) 3929 vrele(nd.ni_dvp); 3930 else 3931 vput(nd.ni_dvp); 3932 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH)) != 0) 3933 goto fdout; 3934 goto restart; 3935 } 3936 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 3937 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 3938 vn_finished_write(mp); 3939 out: 3940 NDFREE_PNBUF(&nd); 3941 vput(vp); 3942 if (nd.ni_dvp == vp) 3943 vrele(nd.ni_dvp); 3944 else 3945 vput(nd.ni_dvp); 3946 if (error == ERELOOKUP) 3947 goto restart; 3948 fdout: 3949 if (fp != NULL) 3950 fdrop(fp, td); 3951 return (error); 3952 } 3953 3954 #if defined(COMPAT_43) || defined(COMPAT_FREEBSD11) 3955 int 3956 freebsd11_kern_getdirentries(struct thread *td, int fd, char *ubuf, u_int count, 3957 long *basep, void (*func)(struct freebsd11_dirent *)) 3958 { 3959 struct freebsd11_dirent dstdp; 3960 struct dirent *dp, *edp; 3961 char *dirbuf; 3962 off_t base; 3963 ssize_t resid, ucount; 3964 int error; 3965 3966 /* XXX arbitrary sanity limit on `count'. */ 3967 count = min(count, 64 * 1024); 3968 3969 dirbuf = malloc(count, M_TEMP, M_WAITOK); 3970 3971 error = kern_getdirentries(td, fd, dirbuf, count, &base, &resid, 3972 UIO_SYSSPACE); 3973 if (error != 0) 3974 goto done; 3975 if (basep != NULL) 3976 *basep = base; 3977 3978 ucount = 0; 3979 for (dp = (struct dirent *)dirbuf, 3980 edp = (struct dirent *)&dirbuf[count - resid]; 3981 ucount < count && dp < edp; ) { 3982 if (dp->d_reclen == 0) 3983 break; 3984 MPASS(dp->d_reclen >= _GENERIC_DIRLEN(0)); 3985 if (dp->d_namlen >= sizeof(dstdp.d_name)) 3986 continue; 3987 dstdp.d_type = dp->d_type; 3988 dstdp.d_namlen = dp->d_namlen; 3989 dstdp.d_fileno = dp->d_fileno; /* truncate */ 3990 if (dstdp.d_fileno != dp->d_fileno) { 3991 switch (ino64_trunc_error) { 3992 default: 3993 case 0: 3994 break; 3995 case 1: 3996 error = EOVERFLOW; 3997 goto done; 3998 case 2: 3999 dstdp.d_fileno = UINT32_MAX; 4000 break; 4001 } 4002 } 4003 dstdp.d_reclen = sizeof(dstdp) - sizeof(dstdp.d_name) + 4004 ((dp->d_namlen + 1 + 3) &~ 3); 4005 bcopy(dp->d_name, dstdp.d_name, dstdp.d_namlen); 4006 bzero(dstdp.d_name + dstdp.d_namlen, 4007 dstdp.d_reclen - offsetof(struct freebsd11_dirent, d_name) - 4008 dstdp.d_namlen); 4009 MPASS(dstdp.d_reclen <= dp->d_reclen); 4010 MPASS(ucount + dstdp.d_reclen <= count); 4011 if (func != NULL) 4012 func(&dstdp); 4013 error = copyout(&dstdp, ubuf + ucount, dstdp.d_reclen); 4014 if (error != 0) 4015 break; 4016 dp = (struct dirent *)((char *)dp + dp->d_reclen); 4017 ucount += dstdp.d_reclen; 4018 } 4019 4020 done: 4021 free(dirbuf, M_TEMP); 4022 if (error == 0) 4023 td->td_retval[0] = ucount; 4024 return (error); 4025 } 4026 #endif /* COMPAT */ 4027 4028 #ifdef COMPAT_43 4029 static void 4030 ogetdirentries_cvt(struct freebsd11_dirent *dp) 4031 { 4032 #if (BYTE_ORDER == LITTLE_ENDIAN) 4033 /* 4034 * The expected low byte of dp->d_namlen is our dp->d_type. 4035 * The high MBZ byte of dp->d_namlen is our dp->d_namlen. 4036 */ 4037 dp->d_type = dp->d_namlen; 4038 dp->d_namlen = 0; 4039 #else 4040 /* 4041 * The dp->d_type is the high byte of the expected dp->d_namlen, 4042 * so must be zero'ed. 4043 */ 4044 dp->d_type = 0; 4045 #endif 4046 } 4047 4048 /* 4049 * Read a block of directory entries in a filesystem independent format. 4050 */ 4051 #ifndef _SYS_SYSPROTO_H_ 4052 struct ogetdirentries_args { 4053 int fd; 4054 char *buf; 4055 u_int count; 4056 long *basep; 4057 }; 4058 #endif 4059 int 4060 ogetdirentries(struct thread *td, struct ogetdirentries_args *uap) 4061 { 4062 long loff; 4063 int error; 4064 4065 error = kern_ogetdirentries(td, uap, &loff); 4066 if (error == 0) 4067 error = copyout(&loff, uap->basep, sizeof(long)); 4068 return (error); 4069 } 4070 4071 int 4072 kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap, 4073 long *ploff) 4074 { 4075 long base; 4076 int error; 4077 4078 /* XXX arbitrary sanity limit on `count'. */ 4079 if (uap->count > 64 * 1024) 4080 return (EINVAL); 4081 4082 error = freebsd11_kern_getdirentries(td, uap->fd, uap->buf, uap->count, 4083 &base, ogetdirentries_cvt); 4084 4085 if (error == 0 && uap->basep != NULL) 4086 error = copyout(&base, uap->basep, sizeof(long)); 4087 4088 return (error); 4089 } 4090 #endif /* COMPAT_43 */ 4091 4092 #if defined(COMPAT_FREEBSD11) 4093 #ifndef _SYS_SYSPROTO_H_ 4094 struct freebsd11_getdirentries_args { 4095 int fd; 4096 char *buf; 4097 u_int count; 4098 long *basep; 4099 }; 4100 #endif 4101 int 4102 freebsd11_getdirentries(struct thread *td, 4103 struct freebsd11_getdirentries_args *uap) 4104 { 4105 long base; 4106 int error; 4107 4108 error = freebsd11_kern_getdirentries(td, uap->fd, uap->buf, uap->count, 4109 &base, NULL); 4110 4111 if (error == 0 && uap->basep != NULL) 4112 error = copyout(&base, uap->basep, sizeof(long)); 4113 return (error); 4114 } 4115 4116 int 4117 freebsd11_getdents(struct thread *td, struct freebsd11_getdents_args *uap) 4118 { 4119 struct freebsd11_getdirentries_args ap; 4120 4121 ap.fd = uap->fd; 4122 ap.buf = uap->buf; 4123 ap.count = uap->count; 4124 ap.basep = NULL; 4125 return (freebsd11_getdirentries(td, &ap)); 4126 } 4127 #endif /* COMPAT_FREEBSD11 */ 4128 4129 /* 4130 * Read a block of directory entries in a filesystem independent format. 4131 */ 4132 int 4133 sys_getdirentries(struct thread *td, struct getdirentries_args *uap) 4134 { 4135 off_t base; 4136 int error; 4137 4138 error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base, 4139 NULL, UIO_USERSPACE); 4140 if (error != 0) 4141 return (error); 4142 if (uap->basep != NULL) 4143 error = copyout(&base, uap->basep, sizeof(off_t)); 4144 return (error); 4145 } 4146 4147 int 4148 kern_getdirentries(struct thread *td, int fd, char *buf, size_t count, 4149 off_t *basep, ssize_t *residp, enum uio_seg bufseg) 4150 { 4151 struct vnode *vp; 4152 struct file *fp; 4153 struct uio auio; 4154 struct iovec aiov; 4155 off_t loff; 4156 int error, eofflag; 4157 off_t foffset; 4158 4159 AUDIT_ARG_FD(fd); 4160 if (count > IOSIZE_MAX) 4161 return (EINVAL); 4162 auio.uio_resid = count; 4163 error = getvnode(td, fd, &cap_read_rights, &fp); 4164 if (error != 0) 4165 return (error); 4166 if ((fp->f_flag & FREAD) == 0) { 4167 fdrop(fp, td); 4168 return (EBADF); 4169 } 4170 vp = fp->f_vnode; 4171 foffset = foffset_lock(fp, 0); 4172 unionread: 4173 if (vp->v_type != VDIR) { 4174 error = EINVAL; 4175 goto fail; 4176 } 4177 if (__predict_false((vp->v_vflag & VV_UNLINKED) != 0)) { 4178 error = ENOENT; 4179 goto fail; 4180 } 4181 aiov.iov_base = buf; 4182 aiov.iov_len = count; 4183 auio.uio_iov = &aiov; 4184 auio.uio_iovcnt = 1; 4185 auio.uio_rw = UIO_READ; 4186 auio.uio_segflg = bufseg; 4187 auio.uio_td = td; 4188 vn_lock(vp, LK_SHARED | LK_RETRY); 4189 AUDIT_ARG_VNODE1(vp); 4190 loff = auio.uio_offset = foffset; 4191 #ifdef MAC 4192 error = mac_vnode_check_readdir(td->td_ucred, vp); 4193 if (error == 0) 4194 #endif 4195 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, 4196 NULL); 4197 foffset = auio.uio_offset; 4198 if (error != 0) { 4199 VOP_UNLOCK(vp); 4200 goto fail; 4201 } 4202 if (count == auio.uio_resid && 4203 (vp->v_vflag & VV_ROOT) && 4204 (vp->v_mount->mnt_flag & MNT_UNION)) { 4205 struct vnode *tvp = vp; 4206 4207 vp = vp->v_mount->mnt_vnodecovered; 4208 VREF(vp); 4209 fp->f_vnode = vp; 4210 foffset = 0; 4211 vput(tvp); 4212 goto unionread; 4213 } 4214 VOP_UNLOCK(vp); 4215 *basep = loff; 4216 if (residp != NULL) 4217 *residp = auio.uio_resid; 4218 td->td_retval[0] = count - auio.uio_resid; 4219 fail: 4220 foffset_unlock(fp, foffset, 0); 4221 fdrop(fp, td); 4222 return (error); 4223 } 4224 4225 /* 4226 * Set the mode mask for creation of filesystem nodes. 4227 */ 4228 #ifndef _SYS_SYSPROTO_H_ 4229 struct umask_args { 4230 int newmask; 4231 }; 4232 #endif 4233 int 4234 sys_umask(struct thread *td, struct umask_args *uap) 4235 { 4236 struct pwddesc *pdp; 4237 4238 pdp = td->td_proc->p_pd; 4239 PWDDESC_XLOCK(pdp); 4240 td->td_retval[0] = pdp->pd_cmask; 4241 pdp->pd_cmask = uap->newmask & ALLPERMS; 4242 PWDDESC_XUNLOCK(pdp); 4243 return (0); 4244 } 4245 4246 /* 4247 * Void all references to file by ripping underlying filesystem away from 4248 * vnode. 4249 */ 4250 #ifndef _SYS_SYSPROTO_H_ 4251 struct revoke_args { 4252 char *path; 4253 }; 4254 #endif 4255 int 4256 sys_revoke(struct thread *td, struct revoke_args *uap) 4257 { 4258 struct vnode *vp; 4259 struct vattr vattr; 4260 struct nameidata nd; 4261 int error; 4262 4263 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4264 uap->path); 4265 if ((error = namei(&nd)) != 0) 4266 return (error); 4267 vp = nd.ni_vp; 4268 NDFREE_PNBUF(&nd); 4269 if (vp->v_type != VCHR || vp->v_rdev == NULL) { 4270 error = EINVAL; 4271 goto out; 4272 } 4273 #ifdef MAC 4274 error = mac_vnode_check_revoke(td->td_ucred, vp); 4275 if (error != 0) 4276 goto out; 4277 #endif 4278 error = VOP_GETATTR(vp, &vattr, td->td_ucred); 4279 if (error != 0) 4280 goto out; 4281 if (td->td_ucred->cr_uid != vattr.va_uid) { 4282 error = priv_check(td, PRIV_VFS_ADMIN); 4283 if (error != 0) 4284 goto out; 4285 } 4286 if (devfs_usecount(vp) > 0) 4287 VOP_REVOKE(vp, REVOKEALL); 4288 out: 4289 vput(vp); 4290 return (error); 4291 } 4292 4293 /* 4294 * This variant of getvnode() allows O_PATH files. Caller should 4295 * ensure that returned file and vnode are only used for compatible 4296 * semantics. 4297 */ 4298 int 4299 getvnode_path(struct thread *td, int fd, cap_rights_t *rightsp, 4300 struct file **fpp) 4301 { 4302 struct file *fp; 4303 int error; 4304 4305 error = fget_unlocked(td, fd, rightsp, &fp); 4306 if (error != 0) 4307 return (error); 4308 4309 /* 4310 * The file could be not of the vnode type, or it may be not 4311 * yet fully initialized, in which case the f_vnode pointer 4312 * may be set, but f_ops is still badfileops. E.g., 4313 * devfs_open() transiently create such situation to 4314 * facilitate csw d_fdopen(). 4315 * 4316 * Dupfdopen() handling in kern_openat() installs the 4317 * half-baked file into the process descriptor table, allowing 4318 * other thread to dereference it. Guard against the race by 4319 * checking f_ops. 4320 */ 4321 if (__predict_false(fp->f_vnode == NULL || fp->f_ops == &badfileops)) { 4322 fdrop(fp, td); 4323 *fpp = NULL; 4324 return (EINVAL); 4325 } 4326 4327 *fpp = fp; 4328 return (0); 4329 } 4330 4331 /* 4332 * Convert a user file descriptor to a kernel file entry and check 4333 * that, if it is a capability, the correct rights are present. 4334 * A reference on the file entry is held upon returning. 4335 */ 4336 int 4337 getvnode(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp) 4338 { 4339 int error; 4340 4341 error = getvnode_path(td, fd, rightsp, fpp); 4342 if (__predict_false(error != 0)) 4343 return (error); 4344 4345 /* 4346 * Filter out O_PATH file descriptors, most getvnode() callers 4347 * do not call fo_ methods. 4348 */ 4349 if (__predict_false((*fpp)->f_ops == &path_fileops)) { 4350 fdrop(*fpp, td); 4351 *fpp = NULL; 4352 error = EBADF; 4353 } 4354 4355 return (error); 4356 } 4357 4358 /* 4359 * Get an (NFS) file handle. 4360 */ 4361 #ifndef _SYS_SYSPROTO_H_ 4362 struct lgetfh_args { 4363 char *fname; 4364 fhandle_t *fhp; 4365 }; 4366 #endif 4367 int 4368 sys_lgetfh(struct thread *td, struct lgetfh_args *uap) 4369 { 4370 4371 return (kern_getfhat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->fname, 4372 UIO_USERSPACE, uap->fhp, UIO_USERSPACE)); 4373 } 4374 4375 #ifndef _SYS_SYSPROTO_H_ 4376 struct getfh_args { 4377 char *fname; 4378 fhandle_t *fhp; 4379 }; 4380 #endif 4381 int 4382 sys_getfh(struct thread *td, struct getfh_args *uap) 4383 { 4384 4385 return (kern_getfhat(td, 0, AT_FDCWD, uap->fname, UIO_USERSPACE, 4386 uap->fhp, UIO_USERSPACE)); 4387 } 4388 4389 /* 4390 * syscall for the rpc.lockd to use to translate an open descriptor into 4391 * a NFS file handle. 4392 * 4393 * warning: do not remove the priv_check() call or this becomes one giant 4394 * security hole. 4395 */ 4396 #ifndef _SYS_SYSPROTO_H_ 4397 struct getfhat_args { 4398 int fd; 4399 char *path; 4400 fhandle_t *fhp; 4401 int flags; 4402 }; 4403 #endif 4404 int 4405 sys_getfhat(struct thread *td, struct getfhat_args *uap) 4406 { 4407 4408 return (kern_getfhat(td, uap->flags, uap->fd, uap->path, UIO_USERSPACE, 4409 uap->fhp, UIO_USERSPACE)); 4410 } 4411 4412 int 4413 kern_getfhat(struct thread *td, int flags, int fd, const char *path, 4414 enum uio_seg pathseg, fhandle_t *fhp, enum uio_seg fhseg) 4415 { 4416 struct nameidata nd; 4417 fhandle_t fh; 4418 struct vnode *vp; 4419 int error; 4420 4421 if ((flags & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH)) != 0) 4422 return (EINVAL); 4423 error = priv_check(td, PRIV_VFS_GETFH); 4424 if (error != 0) 4425 return (error); 4426 NDINIT_AT(&nd, LOOKUP, at2cnpflags(flags, AT_SYMLINK_NOFOLLOW | 4427 AT_RESOLVE_BENEATH) | LOCKLEAF | AUDITVNODE1, pathseg, path, 4428 fd); 4429 error = namei(&nd); 4430 if (error != 0) 4431 return (error); 4432 NDFREE_PNBUF(&nd); 4433 vp = nd.ni_vp; 4434 bzero(&fh, sizeof(fh)); 4435 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4436 error = VOP_VPTOFH(vp, &fh.fh_fid); 4437 vput(vp); 4438 if (error == 0) { 4439 if (fhseg == UIO_USERSPACE) 4440 error = copyout(&fh, fhp, sizeof (fh)); 4441 else 4442 memcpy(fhp, &fh, sizeof(fh)); 4443 } 4444 return (error); 4445 } 4446 4447 #ifndef _SYS_SYSPROTO_H_ 4448 struct fhlink_args { 4449 fhandle_t *fhp; 4450 const char *to; 4451 }; 4452 #endif 4453 int 4454 sys_fhlink(struct thread *td, struct fhlink_args *uap) 4455 { 4456 4457 return (kern_fhlinkat(td, AT_FDCWD, uap->to, UIO_USERSPACE, uap->fhp)); 4458 } 4459 4460 #ifndef _SYS_SYSPROTO_H_ 4461 struct fhlinkat_args { 4462 fhandle_t *fhp; 4463 int tofd; 4464 const char *to; 4465 }; 4466 #endif 4467 int 4468 sys_fhlinkat(struct thread *td, struct fhlinkat_args *uap) 4469 { 4470 4471 return (kern_fhlinkat(td, uap->tofd, uap->to, UIO_USERSPACE, uap->fhp)); 4472 } 4473 4474 static int 4475 kern_fhlinkat(struct thread *td, int fd, const char *path, 4476 enum uio_seg pathseg, fhandle_t *fhp) 4477 { 4478 fhandle_t fh; 4479 struct mount *mp; 4480 struct vnode *vp; 4481 int error; 4482 4483 error = priv_check(td, PRIV_VFS_GETFH); 4484 if (error != 0) 4485 return (error); 4486 error = copyin(fhp, &fh, sizeof(fh)); 4487 if (error != 0) 4488 return (error); 4489 do { 4490 bwillwrite(); 4491 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4492 return (ESTALE); 4493 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_SHARED, &vp); 4494 vfs_unbusy(mp); 4495 if (error != 0) 4496 return (error); 4497 VOP_UNLOCK(vp); 4498 error = kern_linkat_vp(td, vp, fd, path, pathseg); 4499 } while (error == EAGAIN || error == ERELOOKUP); 4500 return (error); 4501 } 4502 4503 #ifndef _SYS_SYSPROTO_H_ 4504 struct fhreadlink_args { 4505 fhandle_t *fhp; 4506 char *buf; 4507 size_t bufsize; 4508 }; 4509 #endif 4510 int 4511 sys_fhreadlink(struct thread *td, struct fhreadlink_args *uap) 4512 { 4513 fhandle_t fh; 4514 struct mount *mp; 4515 struct vnode *vp; 4516 int error; 4517 4518 error = priv_check(td, PRIV_VFS_GETFH); 4519 if (error != 0) 4520 return (error); 4521 if (uap->bufsize > IOSIZE_MAX) 4522 return (EINVAL); 4523 error = copyin(uap->fhp, &fh, sizeof(fh)); 4524 if (error != 0) 4525 return (error); 4526 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4527 return (ESTALE); 4528 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_SHARED, &vp); 4529 vfs_unbusy(mp); 4530 if (error != 0) 4531 return (error); 4532 error = kern_readlink_vp(vp, uap->buf, UIO_USERSPACE, uap->bufsize, td); 4533 vput(vp); 4534 return (error); 4535 } 4536 4537 /* 4538 * syscall for the rpc.lockd to use to translate a NFS file handle into an 4539 * open descriptor. 4540 * 4541 * warning: do not remove the priv_check() call or this becomes one giant 4542 * security hole. 4543 */ 4544 #ifndef _SYS_SYSPROTO_H_ 4545 struct fhopen_args { 4546 const struct fhandle *u_fhp; 4547 int flags; 4548 }; 4549 #endif 4550 int 4551 sys_fhopen(struct thread *td, struct fhopen_args *uap) 4552 { 4553 return (kern_fhopen(td, uap->u_fhp, uap->flags)); 4554 } 4555 4556 int 4557 kern_fhopen(struct thread *td, const struct fhandle *u_fhp, int flags) 4558 { 4559 struct mount *mp; 4560 struct vnode *vp; 4561 struct fhandle fhp; 4562 struct file *fp; 4563 int fmode, error; 4564 int indx; 4565 4566 error = priv_check(td, PRIV_VFS_FHOPEN); 4567 if (error != 0) 4568 return (error); 4569 indx = -1; 4570 fmode = FFLAGS(flags); 4571 /* why not allow a non-read/write open for our lockd? */ 4572 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 4573 return (EINVAL); 4574 error = copyin(u_fhp, &fhp, sizeof(fhp)); 4575 if (error != 0) 4576 return(error); 4577 /* find the mount point */ 4578 mp = vfs_busyfs(&fhp.fh_fsid); 4579 if (mp == NULL) 4580 return (ESTALE); 4581 /* now give me my vnode, it gets returned to me locked */ 4582 error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp); 4583 vfs_unbusy(mp); 4584 if (error != 0) 4585 return (error); 4586 4587 error = falloc_noinstall(td, &fp); 4588 if (error != 0) { 4589 vput(vp); 4590 return (error); 4591 } 4592 /* 4593 * An extra reference on `fp' has been held for us by 4594 * falloc_noinstall(). 4595 */ 4596 4597 #ifdef INVARIANTS 4598 td->td_dupfd = -1; 4599 #endif 4600 error = vn_open_vnode(vp, fmode, td->td_ucred, td, fp); 4601 if (error != 0) { 4602 KASSERT(fp->f_ops == &badfileops, 4603 ("VOP_OPEN in fhopen() set f_ops")); 4604 KASSERT(td->td_dupfd < 0, 4605 ("fhopen() encountered fdopen()")); 4606 4607 vput(vp); 4608 goto bad; 4609 } 4610 #ifdef INVARIANTS 4611 td->td_dupfd = 0; 4612 #endif 4613 fp->f_vnode = vp; 4614 finit_vnode(fp, fmode, NULL, &vnops); 4615 VOP_UNLOCK(vp); 4616 if ((fmode & O_TRUNC) != 0) { 4617 error = fo_truncate(fp, 0, td->td_ucred, td); 4618 if (error != 0) 4619 goto bad; 4620 } 4621 4622 error = finstall(td, fp, &indx, fmode, NULL); 4623 bad: 4624 fdrop(fp, td); 4625 td->td_retval[0] = indx; 4626 return (error); 4627 } 4628 4629 /* 4630 * Stat an (NFS) file handle. 4631 */ 4632 #ifndef _SYS_SYSPROTO_H_ 4633 struct fhstat_args { 4634 struct fhandle *u_fhp; 4635 struct stat *sb; 4636 }; 4637 #endif 4638 int 4639 sys_fhstat(struct thread *td, struct fhstat_args *uap) 4640 { 4641 struct stat sb; 4642 struct fhandle fh; 4643 int error; 4644 4645 error = copyin(uap->u_fhp, &fh, sizeof(fh)); 4646 if (error != 0) 4647 return (error); 4648 error = kern_fhstat(td, fh, &sb); 4649 if (error == 0) 4650 error = copyout(&sb, uap->sb, sizeof(sb)); 4651 return (error); 4652 } 4653 4654 int 4655 kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb) 4656 { 4657 struct mount *mp; 4658 struct vnode *vp; 4659 int error; 4660 4661 error = priv_check(td, PRIV_VFS_FHSTAT); 4662 if (error != 0) 4663 return (error); 4664 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4665 return (ESTALE); 4666 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4667 vfs_unbusy(mp); 4668 if (error != 0) 4669 return (error); 4670 error = VOP_STAT(vp, sb, td->td_ucred, NOCRED); 4671 vput(vp); 4672 return (error); 4673 } 4674 4675 /* 4676 * Implement fstatfs() for (NFS) file handles. 4677 */ 4678 #ifndef _SYS_SYSPROTO_H_ 4679 struct fhstatfs_args { 4680 struct fhandle *u_fhp; 4681 struct statfs *buf; 4682 }; 4683 #endif 4684 int 4685 sys_fhstatfs(struct thread *td, struct fhstatfs_args *uap) 4686 { 4687 struct statfs *sfp; 4688 fhandle_t fh; 4689 int error; 4690 4691 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4692 if (error != 0) 4693 return (error); 4694 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 4695 error = kern_fhstatfs(td, fh, sfp); 4696 if (error == 0) 4697 error = copyout(sfp, uap->buf, sizeof(*sfp)); 4698 free(sfp, M_STATFS); 4699 return (error); 4700 } 4701 4702 int 4703 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf) 4704 { 4705 struct mount *mp; 4706 struct vnode *vp; 4707 int error; 4708 4709 error = priv_check(td, PRIV_VFS_FHSTATFS); 4710 if (error != 0) 4711 return (error); 4712 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4713 return (ESTALE); 4714 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4715 if (error != 0) { 4716 vfs_unbusy(mp); 4717 return (error); 4718 } 4719 vput(vp); 4720 error = prison_canseemount(td->td_ucred, mp); 4721 if (error != 0) 4722 goto out; 4723 #ifdef MAC 4724 error = mac_mount_check_stat(td->td_ucred, mp); 4725 if (error != 0) 4726 goto out; 4727 #endif 4728 error = VFS_STATFS(mp, buf); 4729 out: 4730 vfs_unbusy(mp); 4731 return (error); 4732 } 4733 4734 /* 4735 * Unlike madvise(2), we do not make a best effort to remember every 4736 * possible caching hint. Instead, we remember the last setting with 4737 * the exception that we will allow POSIX_FADV_NORMAL to adjust the 4738 * region of any current setting. 4739 */ 4740 int 4741 kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len, 4742 int advice) 4743 { 4744 struct fadvise_info *fa, *new; 4745 struct file *fp; 4746 struct vnode *vp; 4747 off_t end; 4748 int error; 4749 4750 if (offset < 0 || len < 0 || offset > OFF_MAX - len) 4751 return (EINVAL); 4752 AUDIT_ARG_VALUE(advice); 4753 switch (advice) { 4754 case POSIX_FADV_SEQUENTIAL: 4755 case POSIX_FADV_RANDOM: 4756 case POSIX_FADV_NOREUSE: 4757 new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK); 4758 break; 4759 case POSIX_FADV_NORMAL: 4760 case POSIX_FADV_WILLNEED: 4761 case POSIX_FADV_DONTNEED: 4762 new = NULL; 4763 break; 4764 default: 4765 return (EINVAL); 4766 } 4767 /* XXX: CAP_POSIX_FADVISE? */ 4768 AUDIT_ARG_FD(fd); 4769 error = fget(td, fd, &cap_no_rights, &fp); 4770 if (error != 0) 4771 goto out; 4772 AUDIT_ARG_FILE(td->td_proc, fp); 4773 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { 4774 error = ESPIPE; 4775 goto out; 4776 } 4777 if (fp->f_type != DTYPE_VNODE) { 4778 error = ENODEV; 4779 goto out; 4780 } 4781 vp = fp->f_vnode; 4782 if (vp->v_type != VREG) { 4783 error = ENODEV; 4784 goto out; 4785 } 4786 if (len == 0) 4787 end = OFF_MAX; 4788 else 4789 end = offset + len - 1; 4790 switch (advice) { 4791 case POSIX_FADV_SEQUENTIAL: 4792 case POSIX_FADV_RANDOM: 4793 case POSIX_FADV_NOREUSE: 4794 /* 4795 * Try to merge any existing non-standard region with 4796 * this new region if possible, otherwise create a new 4797 * non-standard region for this request. 4798 */ 4799 mtx_pool_lock(mtxpool_sleep, fp); 4800 fa = fp->f_advice; 4801 if (fa != NULL && fa->fa_advice == advice && 4802 ((fa->fa_start <= end && fa->fa_end >= offset) || 4803 (end != OFF_MAX && fa->fa_start == end + 1) || 4804 (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) { 4805 if (offset < fa->fa_start) 4806 fa->fa_start = offset; 4807 if (end > fa->fa_end) 4808 fa->fa_end = end; 4809 } else { 4810 new->fa_advice = advice; 4811 new->fa_start = offset; 4812 new->fa_end = end; 4813 fp->f_advice = new; 4814 new = fa; 4815 } 4816 mtx_pool_unlock(mtxpool_sleep, fp); 4817 break; 4818 case POSIX_FADV_NORMAL: 4819 /* 4820 * If a the "normal" region overlaps with an existing 4821 * non-standard region, trim or remove the 4822 * non-standard region. 4823 */ 4824 mtx_pool_lock(mtxpool_sleep, fp); 4825 fa = fp->f_advice; 4826 if (fa != NULL) { 4827 if (offset <= fa->fa_start && end >= fa->fa_end) { 4828 new = fa; 4829 fp->f_advice = NULL; 4830 } else if (offset <= fa->fa_start && 4831 end >= fa->fa_start) 4832 fa->fa_start = end + 1; 4833 else if (offset <= fa->fa_end && end >= fa->fa_end) 4834 fa->fa_end = offset - 1; 4835 else if (offset >= fa->fa_start && end <= fa->fa_end) { 4836 /* 4837 * If the "normal" region is a middle 4838 * portion of the existing 4839 * non-standard region, just remove 4840 * the whole thing rather than picking 4841 * one side or the other to 4842 * preserve. 4843 */ 4844 new = fa; 4845 fp->f_advice = NULL; 4846 } 4847 } 4848 mtx_pool_unlock(mtxpool_sleep, fp); 4849 break; 4850 case POSIX_FADV_WILLNEED: 4851 case POSIX_FADV_DONTNEED: 4852 error = VOP_ADVISE(vp, offset, end, advice); 4853 break; 4854 } 4855 out: 4856 if (fp != NULL) 4857 fdrop(fp, td); 4858 free(new, M_FADVISE); 4859 return (error); 4860 } 4861 4862 int 4863 sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap) 4864 { 4865 int error; 4866 4867 error = kern_posix_fadvise(td, uap->fd, uap->offset, uap->len, 4868 uap->advice); 4869 return (kern_posix_error(td, error)); 4870 } 4871 4872 int 4873 kern_copy_file_range(struct thread *td, int infd, off_t *inoffp, int outfd, 4874 off_t *outoffp, size_t len, unsigned int flags) 4875 { 4876 struct file *infp, *outfp; 4877 struct vnode *invp, *outvp; 4878 int error; 4879 size_t retlen; 4880 void *rl_rcookie, *rl_wcookie; 4881 off_t savinoff, savoutoff; 4882 4883 infp = outfp = NULL; 4884 rl_rcookie = rl_wcookie = NULL; 4885 savinoff = -1; 4886 error = 0; 4887 retlen = 0; 4888 4889 if (flags != 0) { 4890 error = EINVAL; 4891 goto out; 4892 } 4893 if (len > SSIZE_MAX) 4894 /* 4895 * Although the len argument is size_t, the return argument 4896 * is ssize_t (which is signed). Therefore a size that won't 4897 * fit in ssize_t can't be returned. 4898 */ 4899 len = SSIZE_MAX; 4900 4901 /* Get the file structures for the file descriptors. */ 4902 error = fget_read(td, infd, &cap_read_rights, &infp); 4903 if (error != 0) 4904 goto out; 4905 if (infp->f_ops == &badfileops) { 4906 error = EBADF; 4907 goto out; 4908 } 4909 if (infp->f_vnode == NULL) { 4910 error = EINVAL; 4911 goto out; 4912 } 4913 error = fget_write(td, outfd, &cap_write_rights, &outfp); 4914 if (error != 0) 4915 goto out; 4916 if (outfp->f_ops == &badfileops) { 4917 error = EBADF; 4918 goto out; 4919 } 4920 if (outfp->f_vnode == NULL) { 4921 error = EINVAL; 4922 goto out; 4923 } 4924 4925 /* Set the offset pointers to the correct place. */ 4926 if (inoffp == NULL) 4927 inoffp = &infp->f_offset; 4928 if (outoffp == NULL) 4929 outoffp = &outfp->f_offset; 4930 savinoff = *inoffp; 4931 savoutoff = *outoffp; 4932 4933 invp = infp->f_vnode; 4934 outvp = outfp->f_vnode; 4935 /* Sanity check the f_flag bits. */ 4936 if ((outfp->f_flag & (FWRITE | FAPPEND)) != FWRITE || 4937 (infp->f_flag & FREAD) == 0) { 4938 error = EBADF; 4939 goto out; 4940 } 4941 4942 /* If len == 0, just return 0. */ 4943 if (len == 0) 4944 goto out; 4945 4946 /* 4947 * If infp and outfp refer to the same file, the byte ranges cannot 4948 * overlap. 4949 */ 4950 if (invp == outvp && ((savinoff <= savoutoff && savinoff + len > 4951 savoutoff) || (savinoff > savoutoff && savoutoff + len > 4952 savinoff))) { 4953 error = EINVAL; 4954 goto out; 4955 } 4956 4957 /* Range lock the byte ranges for both invp and outvp. */ 4958 for (;;) { 4959 rl_wcookie = vn_rangelock_wlock(outvp, *outoffp, *outoffp + 4960 len); 4961 rl_rcookie = vn_rangelock_tryrlock(invp, *inoffp, *inoffp + 4962 len); 4963 if (rl_rcookie != NULL) 4964 break; 4965 vn_rangelock_unlock(outvp, rl_wcookie); 4966 rl_rcookie = vn_rangelock_rlock(invp, *inoffp, *inoffp + len); 4967 vn_rangelock_unlock(invp, rl_rcookie); 4968 } 4969 4970 retlen = len; 4971 error = vn_copy_file_range(invp, inoffp, outvp, outoffp, &retlen, 4972 flags, infp->f_cred, outfp->f_cred, td); 4973 out: 4974 if (rl_rcookie != NULL) 4975 vn_rangelock_unlock(invp, rl_rcookie); 4976 if (rl_wcookie != NULL) 4977 vn_rangelock_unlock(outvp, rl_wcookie); 4978 if (savinoff != -1 && (error == EINTR || error == ERESTART)) { 4979 *inoffp = savinoff; 4980 *outoffp = savoutoff; 4981 } 4982 if (outfp != NULL) 4983 fdrop(outfp, td); 4984 if (infp != NULL) 4985 fdrop(infp, td); 4986 td->td_retval[0] = retlen; 4987 return (error); 4988 } 4989 4990 int 4991 sys_copy_file_range(struct thread *td, struct copy_file_range_args *uap) 4992 { 4993 off_t inoff, outoff, *inoffp, *outoffp; 4994 int error; 4995 4996 inoffp = outoffp = NULL; 4997 if (uap->inoffp != NULL) { 4998 error = copyin(uap->inoffp, &inoff, sizeof(off_t)); 4999 if (error != 0) 5000 return (error); 5001 inoffp = &inoff; 5002 } 5003 if (uap->outoffp != NULL) { 5004 error = copyin(uap->outoffp, &outoff, sizeof(off_t)); 5005 if (error != 0) 5006 return (error); 5007 outoffp = &outoff; 5008 } 5009 error = kern_copy_file_range(td, uap->infd, inoffp, uap->outfd, 5010 outoffp, uap->len, uap->flags); 5011 if (error == 0 && uap->inoffp != NULL) 5012 error = copyout(inoffp, uap->inoffp, sizeof(off_t)); 5013 if (error == 0 && uap->outoffp != NULL) 5014 error = copyout(outoffp, uap->outoffp, sizeof(off_t)); 5015 return (error); 5016 } 5017