1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37 #include <sys/cdefs.h> 38 #include "opt_capsicum.h" 39 #include "opt_ktrace.h" 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #ifdef COMPAT_FREEBSD11 44 #include <sys/abi_compat.h> 45 #endif 46 #include <sys/bio.h> 47 #include <sys/buf.h> 48 #include <sys/capsicum.h> 49 #include <sys/disk.h> 50 #include <sys/malloc.h> 51 #include <sys/mount.h> 52 #include <sys/mutex.h> 53 #include <sys/sysproto.h> 54 #include <sys/namei.h> 55 #include <sys/filedesc.h> 56 #include <sys/kernel.h> 57 #include <sys/fcntl.h> 58 #include <sys/file.h> 59 #include <sys/filio.h> 60 #include <sys/limits.h> 61 #include <sys/linker.h> 62 #include <sys/rwlock.h> 63 #include <sys/sdt.h> 64 #include <sys/stat.h> 65 #include <sys/sx.h> 66 #include <sys/unistd.h> 67 #include <sys/vnode.h> 68 #include <sys/priv.h> 69 #include <sys/proc.h> 70 #include <sys/dirent.h> 71 #include <sys/jail.h> 72 #include <sys/syscallsubr.h> 73 #include <sys/sysctl.h> 74 #ifdef KTRACE 75 #include <sys/ktrace.h> 76 #endif 77 78 #include <machine/stdarg.h> 79 80 #include <security/audit/audit.h> 81 #include <security/mac/mac_framework.h> 82 83 #include <vm/vm.h> 84 #include <vm/vm_object.h> 85 #include <vm/vm_page.h> 86 #include <vm/vnode_pager.h> 87 #include <vm/uma.h> 88 89 #include <fs/devfs/devfs.h> 90 91 MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information"); 92 93 static int kern_chflagsat(struct thread *td, int fd, const char *path, 94 enum uio_seg pathseg, u_long flags, int atflag); 95 static int setfflags(struct thread *td, struct vnode *, u_long); 96 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *); 97 static int getutimens(const struct timespec *, enum uio_seg, 98 struct timespec *, int *); 99 static int setutimes(struct thread *td, struct vnode *, 100 const struct timespec *, int, int); 101 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 102 struct thread *td); 103 static int kern_fhlinkat(struct thread *td, int fd, const char *path, 104 enum uio_seg pathseg, fhandle_t *fhp); 105 static int kern_readlink_vp(struct vnode *vp, char *buf, enum uio_seg bufseg, 106 size_t count, struct thread *td); 107 static int kern_linkat_vp(struct thread *td, struct vnode *vp, int fd, 108 const char *path, enum uio_seg segflag); 109 110 uint64_t 111 at2cnpflags(u_int at_flags, u_int mask) 112 { 113 uint64_t res; 114 115 MPASS((at_flags & (AT_SYMLINK_FOLLOW | AT_SYMLINK_NOFOLLOW)) != 116 (AT_SYMLINK_FOLLOW | AT_SYMLINK_NOFOLLOW)); 117 118 res = 0; 119 at_flags &= mask; 120 if ((at_flags & AT_RESOLVE_BENEATH) != 0) 121 res |= RBENEATH; 122 if ((at_flags & AT_SYMLINK_FOLLOW) != 0) 123 res |= FOLLOW; 124 /* NOFOLLOW is pseudo flag */ 125 if ((mask & AT_SYMLINK_NOFOLLOW) != 0) { 126 res |= (at_flags & AT_SYMLINK_NOFOLLOW) != 0 ? NOFOLLOW : 127 FOLLOW; 128 } 129 if ((mask & AT_EMPTY_PATH) != 0 && (at_flags & AT_EMPTY_PATH) != 0) 130 res |= EMPTYPATH; 131 return (res); 132 } 133 134 int 135 kern_sync(struct thread *td) 136 { 137 struct mount *mp, *nmp; 138 int save; 139 140 mtx_lock(&mountlist_mtx); 141 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 142 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 143 nmp = TAILQ_NEXT(mp, mnt_list); 144 continue; 145 } 146 if ((mp->mnt_flag & MNT_RDONLY) == 0 && 147 vn_start_write(NULL, &mp, V_NOWAIT) == 0) { 148 save = curthread_pflags_set(TDP_SYNCIO); 149 vfs_periodic(mp, MNT_NOWAIT); 150 VFS_SYNC(mp, MNT_NOWAIT); 151 curthread_pflags_restore(save); 152 vn_finished_write(mp); 153 } 154 mtx_lock(&mountlist_mtx); 155 nmp = TAILQ_NEXT(mp, mnt_list); 156 vfs_unbusy(mp); 157 } 158 mtx_unlock(&mountlist_mtx); 159 return (0); 160 } 161 162 /* 163 * Sync each mounted filesystem. 164 */ 165 #ifndef _SYS_SYSPROTO_H_ 166 struct sync_args { 167 int dummy; 168 }; 169 #endif 170 /* ARGSUSED */ 171 int 172 sys_sync(struct thread *td, struct sync_args *uap) 173 { 174 175 return (kern_sync(td)); 176 } 177 178 /* 179 * Change filesystem quotas. 180 */ 181 #ifndef _SYS_SYSPROTO_H_ 182 struct quotactl_args { 183 char *path; 184 int cmd; 185 int uid; 186 caddr_t arg; 187 }; 188 #endif 189 int 190 sys_quotactl(struct thread *td, struct quotactl_args *uap) 191 { 192 struct mount *mp; 193 struct nameidata nd; 194 int error; 195 bool mp_busy; 196 197 AUDIT_ARG_CMD(uap->cmd); 198 AUDIT_ARG_UID(uap->uid); 199 if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS)) 200 return (EPERM); 201 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 202 uap->path); 203 if ((error = namei(&nd)) != 0) 204 return (error); 205 NDFREE_PNBUF(&nd); 206 mp = nd.ni_vp->v_mount; 207 vfs_ref(mp); 208 vput(nd.ni_vp); 209 error = vfs_busy(mp, 0); 210 if (error != 0) { 211 vfs_rel(mp); 212 return (error); 213 } 214 mp_busy = true; 215 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg, &mp_busy); 216 217 /* 218 * Since quota on/off operations typically need to open quota 219 * files, the implementation may need to unbusy the mount point 220 * before calling into namei. Otherwise, unmount might be 221 * started between two vfs_busy() invocations (first is ours, 222 * second is from mount point cross-walk code in lookup()), 223 * causing deadlock. 224 * 225 * Avoid unbusying mp if the implementation indicates it has 226 * already done so. 227 */ 228 if (mp_busy) 229 vfs_unbusy(mp); 230 vfs_rel(mp); 231 return (error); 232 } 233 234 /* 235 * Used by statfs conversion routines to scale the block size up if 236 * necessary so that all of the block counts are <= 'max_size'. Note 237 * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero 238 * value of 'n'. 239 */ 240 void 241 statfs_scale_blocks(struct statfs *sf, long max_size) 242 { 243 uint64_t count; 244 int shift; 245 246 KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__)); 247 248 /* 249 * Attempt to scale the block counts to give a more accurate 250 * overview to userland of the ratio of free space to used 251 * space. To do this, find the largest block count and compute 252 * a divisor that lets it fit into a signed integer <= max_size. 253 */ 254 if (sf->f_bavail < 0) 255 count = -sf->f_bavail; 256 else 257 count = sf->f_bavail; 258 count = MAX(sf->f_blocks, MAX(sf->f_bfree, count)); 259 if (count <= max_size) 260 return; 261 262 count >>= flsl(max_size); 263 shift = 0; 264 while (count > 0) { 265 shift++; 266 count >>=1; 267 } 268 269 sf->f_bsize <<= shift; 270 sf->f_blocks >>= shift; 271 sf->f_bfree >>= shift; 272 sf->f_bavail >>= shift; 273 } 274 275 static int 276 kern_do_statfs(struct thread *td, struct mount *mp, struct statfs *buf) 277 { 278 int error; 279 280 if (mp == NULL) 281 return (EBADF); 282 error = vfs_busy(mp, 0); 283 vfs_rel(mp); 284 if (error != 0) 285 return (error); 286 #ifdef MAC 287 error = mac_mount_check_stat(td->td_ucred, mp); 288 if (error != 0) 289 goto out; 290 #endif 291 error = VFS_STATFS(mp, buf); 292 if (error != 0) 293 goto out; 294 if (priv_check_cred_vfs_generation(td->td_ucred)) { 295 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 296 prison_enforce_statfs(td->td_ucred, mp, buf); 297 } 298 out: 299 vfs_unbusy(mp); 300 return (error); 301 } 302 303 /* 304 * Get filesystem statistics. 305 */ 306 #ifndef _SYS_SYSPROTO_H_ 307 struct statfs_args { 308 char *path; 309 struct statfs *buf; 310 }; 311 #endif 312 int 313 sys_statfs(struct thread *td, struct statfs_args *uap) 314 { 315 struct statfs *sfp; 316 int error; 317 318 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 319 error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); 320 if (error == 0) 321 error = copyout(sfp, uap->buf, sizeof(struct statfs)); 322 free(sfp, M_STATFS); 323 return (error); 324 } 325 326 int 327 kern_statfs(struct thread *td, const char *path, enum uio_seg pathseg, 328 struct statfs *buf) 329 { 330 struct mount *mp; 331 struct nameidata nd; 332 int error; 333 334 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path); 335 error = namei(&nd); 336 if (error != 0) 337 return (error); 338 NDFREE_PNBUF(&nd); 339 mp = vfs_ref_from_vp(nd.ni_vp); 340 vrele(nd.ni_vp); 341 return (kern_do_statfs(td, mp, buf)); 342 } 343 344 /* 345 * Get filesystem statistics. 346 */ 347 #ifndef _SYS_SYSPROTO_H_ 348 struct fstatfs_args { 349 int fd; 350 struct statfs *buf; 351 }; 352 #endif 353 int 354 sys_fstatfs(struct thread *td, struct fstatfs_args *uap) 355 { 356 struct statfs *sfp; 357 int error; 358 359 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 360 error = kern_fstatfs(td, uap->fd, sfp); 361 if (error == 0) 362 error = copyout(sfp, uap->buf, sizeof(struct statfs)); 363 free(sfp, M_STATFS); 364 return (error); 365 } 366 367 int 368 kern_fstatfs(struct thread *td, int fd, struct statfs *buf) 369 { 370 struct file *fp; 371 struct mount *mp; 372 struct vnode *vp; 373 int error; 374 375 AUDIT_ARG_FD(fd); 376 error = getvnode_path(td, fd, &cap_fstatfs_rights, &fp); 377 if (error != 0) 378 return (error); 379 vp = fp->f_vnode; 380 #ifdef AUDIT 381 if (AUDITING_TD(td)) { 382 vn_lock(vp, LK_SHARED | LK_RETRY); 383 AUDIT_ARG_VNODE1(vp); 384 VOP_UNLOCK(vp); 385 } 386 #endif 387 mp = vfs_ref_from_vp(vp); 388 fdrop(fp, td); 389 return (kern_do_statfs(td, mp, buf)); 390 } 391 392 /* 393 * Get statistics on all filesystems. 394 */ 395 #ifndef _SYS_SYSPROTO_H_ 396 struct getfsstat_args { 397 struct statfs *buf; 398 long bufsize; 399 int mode; 400 }; 401 #endif 402 int 403 sys_getfsstat(struct thread *td, struct getfsstat_args *uap) 404 { 405 size_t count; 406 int error; 407 408 if (uap->bufsize < 0 || uap->bufsize > SIZE_MAX) 409 return (EINVAL); 410 error = kern_getfsstat(td, &uap->buf, uap->bufsize, &count, 411 UIO_USERSPACE, uap->mode); 412 if (error == 0) 413 td->td_retval[0] = count; 414 return (error); 415 } 416 417 /* 418 * If (bufsize > 0 && bufseg == UIO_SYSSPACE) 419 * The caller is responsible for freeing memory which will be allocated 420 * in '*buf'. 421 */ 422 int 423 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize, 424 size_t *countp, enum uio_seg bufseg, int mode) 425 { 426 struct mount *mp, *nmp; 427 struct statfs *sfsp, *sp, *sptmp, *tofree; 428 size_t count, maxcount; 429 int error; 430 431 switch (mode) { 432 case MNT_WAIT: 433 case MNT_NOWAIT: 434 break; 435 default: 436 if (bufseg == UIO_SYSSPACE) 437 *buf = NULL; 438 return (EINVAL); 439 } 440 restart: 441 maxcount = bufsize / sizeof(struct statfs); 442 if (bufsize == 0) { 443 sfsp = NULL; 444 tofree = NULL; 445 } else if (bufseg == UIO_USERSPACE) { 446 sfsp = *buf; 447 tofree = NULL; 448 } else /* if (bufseg == UIO_SYSSPACE) */ { 449 count = 0; 450 mtx_lock(&mountlist_mtx); 451 TAILQ_FOREACH(mp, &mountlist, mnt_list) { 452 count++; 453 } 454 mtx_unlock(&mountlist_mtx); 455 if (maxcount > count) 456 maxcount = count; 457 tofree = sfsp = *buf = malloc(maxcount * sizeof(struct statfs), 458 M_STATFS, M_WAITOK); 459 } 460 461 count = 0; 462 463 /* 464 * If there is no target buffer they only want the count. 465 * 466 * This could be TAILQ_FOREACH but it is open-coded to match the original 467 * code below. 468 */ 469 if (sfsp == NULL) { 470 mtx_lock(&mountlist_mtx); 471 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 472 if (prison_canseemount(td->td_ucred, mp) != 0) { 473 nmp = TAILQ_NEXT(mp, mnt_list); 474 continue; 475 } 476 #ifdef MAC 477 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 478 nmp = TAILQ_NEXT(mp, mnt_list); 479 continue; 480 } 481 #endif 482 count++; 483 nmp = TAILQ_NEXT(mp, mnt_list); 484 } 485 mtx_unlock(&mountlist_mtx); 486 *countp = count; 487 return (0); 488 } 489 490 /* 491 * They want the entire thing. 492 * 493 * Short-circuit the corner case of no room for anything, avoids 494 * relocking below. 495 */ 496 if (maxcount < 1) { 497 goto out; 498 } 499 500 mtx_lock(&mountlist_mtx); 501 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 502 if (prison_canseemount(td->td_ucred, mp) != 0) { 503 nmp = TAILQ_NEXT(mp, mnt_list); 504 continue; 505 } 506 #ifdef MAC 507 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 508 nmp = TAILQ_NEXT(mp, mnt_list); 509 continue; 510 } 511 #endif 512 if (mode == MNT_WAIT) { 513 if (vfs_busy(mp, MBF_MNTLSTLOCK) != 0) { 514 /* 515 * If vfs_busy() failed, and MBF_NOWAIT 516 * wasn't passed, then the mp is gone. 517 * Furthermore, because of MBF_MNTLSTLOCK, 518 * the mountlist_mtx was dropped. We have 519 * no other choice than to start over. 520 */ 521 mtx_unlock(&mountlist_mtx); 522 free(tofree, M_STATFS); 523 goto restart; 524 } 525 } else { 526 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK) != 0) { 527 nmp = TAILQ_NEXT(mp, mnt_list); 528 continue; 529 } 530 } 531 sp = &mp->mnt_stat; 532 /* 533 * If MNT_NOWAIT is specified, do not refresh 534 * the fsstat cache. 535 */ 536 if (mode != MNT_NOWAIT) { 537 error = VFS_STATFS(mp, sp); 538 if (error != 0) { 539 mtx_lock(&mountlist_mtx); 540 nmp = TAILQ_NEXT(mp, mnt_list); 541 vfs_unbusy(mp); 542 continue; 543 } 544 } 545 if (priv_check_cred_vfs_generation(td->td_ucred)) { 546 sptmp = malloc(sizeof(struct statfs), M_STATFS, 547 M_WAITOK); 548 *sptmp = *sp; 549 sptmp->f_fsid.val[0] = sptmp->f_fsid.val[1] = 0; 550 prison_enforce_statfs(td->td_ucred, mp, sptmp); 551 sp = sptmp; 552 } else 553 sptmp = NULL; 554 if (bufseg == UIO_SYSSPACE) { 555 bcopy(sp, sfsp, sizeof(*sp)); 556 free(sptmp, M_STATFS); 557 } else /* if (bufseg == UIO_USERSPACE) */ { 558 error = copyout(sp, sfsp, sizeof(*sp)); 559 free(sptmp, M_STATFS); 560 if (error != 0) { 561 vfs_unbusy(mp); 562 return (error); 563 } 564 } 565 sfsp++; 566 count++; 567 568 if (count == maxcount) { 569 vfs_unbusy(mp); 570 goto out; 571 } 572 573 mtx_lock(&mountlist_mtx); 574 nmp = TAILQ_NEXT(mp, mnt_list); 575 vfs_unbusy(mp); 576 } 577 mtx_unlock(&mountlist_mtx); 578 out: 579 *countp = count; 580 return (0); 581 } 582 583 #ifdef COMPAT_FREEBSD4 584 /* 585 * Get old format filesystem statistics. 586 */ 587 static void freebsd4_cvtstatfs(struct statfs *, struct ostatfs *); 588 589 #ifndef _SYS_SYSPROTO_H_ 590 struct freebsd4_statfs_args { 591 char *path; 592 struct ostatfs *buf; 593 }; 594 #endif 595 int 596 freebsd4_statfs(struct thread *td, struct freebsd4_statfs_args *uap) 597 { 598 struct ostatfs osb; 599 struct statfs *sfp; 600 int error; 601 602 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 603 error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); 604 if (error == 0) { 605 freebsd4_cvtstatfs(sfp, &osb); 606 error = copyout(&osb, uap->buf, sizeof(osb)); 607 } 608 free(sfp, M_STATFS); 609 return (error); 610 } 611 612 /* 613 * Get filesystem statistics. 614 */ 615 #ifndef _SYS_SYSPROTO_H_ 616 struct freebsd4_fstatfs_args { 617 int fd; 618 struct ostatfs *buf; 619 }; 620 #endif 621 int 622 freebsd4_fstatfs(struct thread *td, struct freebsd4_fstatfs_args *uap) 623 { 624 struct ostatfs osb; 625 struct statfs *sfp; 626 int error; 627 628 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 629 error = kern_fstatfs(td, uap->fd, sfp); 630 if (error == 0) { 631 freebsd4_cvtstatfs(sfp, &osb); 632 error = copyout(&osb, uap->buf, sizeof(osb)); 633 } 634 free(sfp, M_STATFS); 635 return (error); 636 } 637 638 /* 639 * Get statistics on all filesystems. 640 */ 641 #ifndef _SYS_SYSPROTO_H_ 642 struct freebsd4_getfsstat_args { 643 struct ostatfs *buf; 644 long bufsize; 645 int mode; 646 }; 647 #endif 648 int 649 freebsd4_getfsstat(struct thread *td, struct freebsd4_getfsstat_args *uap) 650 { 651 struct statfs *buf, *sp; 652 struct ostatfs osb; 653 size_t count, size; 654 int error; 655 656 if (uap->bufsize < 0) 657 return (EINVAL); 658 count = uap->bufsize / sizeof(struct ostatfs); 659 if (count > SIZE_MAX / sizeof(struct statfs)) 660 return (EINVAL); 661 size = count * sizeof(struct statfs); 662 error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, 663 uap->mode); 664 if (error == 0) 665 td->td_retval[0] = count; 666 if (size != 0) { 667 sp = buf; 668 while (count != 0 && error == 0) { 669 freebsd4_cvtstatfs(sp, &osb); 670 error = copyout(&osb, uap->buf, sizeof(osb)); 671 sp++; 672 uap->buf++; 673 count--; 674 } 675 free(buf, M_STATFS); 676 } 677 return (error); 678 } 679 680 /* 681 * Implement fstatfs() for (NFS) file handles. 682 */ 683 #ifndef _SYS_SYSPROTO_H_ 684 struct freebsd4_fhstatfs_args { 685 struct fhandle *u_fhp; 686 struct ostatfs *buf; 687 }; 688 #endif 689 int 690 freebsd4_fhstatfs(struct thread *td, struct freebsd4_fhstatfs_args *uap) 691 { 692 struct ostatfs osb; 693 struct statfs *sfp; 694 fhandle_t fh; 695 int error; 696 697 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 698 if (error != 0) 699 return (error); 700 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 701 error = kern_fhstatfs(td, fh, sfp); 702 if (error == 0) { 703 freebsd4_cvtstatfs(sfp, &osb); 704 error = copyout(&osb, uap->buf, sizeof(osb)); 705 } 706 free(sfp, M_STATFS); 707 return (error); 708 } 709 710 /* 711 * Convert a new format statfs structure to an old format statfs structure. 712 */ 713 static void 714 freebsd4_cvtstatfs(struct statfs *nsp, struct ostatfs *osp) 715 { 716 717 statfs_scale_blocks(nsp, LONG_MAX); 718 bzero(osp, sizeof(*osp)); 719 osp->f_bsize = nsp->f_bsize; 720 osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX); 721 osp->f_blocks = nsp->f_blocks; 722 osp->f_bfree = nsp->f_bfree; 723 osp->f_bavail = nsp->f_bavail; 724 osp->f_files = MIN(nsp->f_files, LONG_MAX); 725 osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX); 726 osp->f_owner = nsp->f_owner; 727 osp->f_type = nsp->f_type; 728 osp->f_flags = nsp->f_flags; 729 osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX); 730 osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX); 731 osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX); 732 osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX); 733 strlcpy(osp->f_fstypename, nsp->f_fstypename, 734 MIN(MFSNAMELEN, OMFSNAMELEN)); 735 strlcpy(osp->f_mntonname, nsp->f_mntonname, 736 MIN(MNAMELEN, OMNAMELEN)); 737 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 738 MIN(MNAMELEN, OMNAMELEN)); 739 osp->f_fsid = nsp->f_fsid; 740 } 741 #endif /* COMPAT_FREEBSD4 */ 742 743 #if defined(COMPAT_FREEBSD11) 744 /* 745 * Get old format filesystem statistics. 746 */ 747 static void freebsd11_cvtstatfs(struct statfs *, struct freebsd11_statfs *); 748 749 int 750 freebsd11_statfs(struct thread *td, struct freebsd11_statfs_args *uap) 751 { 752 struct freebsd11_statfs osb; 753 struct statfs *sfp; 754 int error; 755 756 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 757 error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); 758 if (error == 0) { 759 freebsd11_cvtstatfs(sfp, &osb); 760 error = copyout(&osb, uap->buf, sizeof(osb)); 761 } 762 free(sfp, M_STATFS); 763 return (error); 764 } 765 766 /* 767 * Get filesystem statistics. 768 */ 769 int 770 freebsd11_fstatfs(struct thread *td, struct freebsd11_fstatfs_args *uap) 771 { 772 struct freebsd11_statfs osb; 773 struct statfs *sfp; 774 int error; 775 776 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 777 error = kern_fstatfs(td, uap->fd, sfp); 778 if (error == 0) { 779 freebsd11_cvtstatfs(sfp, &osb); 780 error = copyout(&osb, uap->buf, sizeof(osb)); 781 } 782 free(sfp, M_STATFS); 783 return (error); 784 } 785 786 /* 787 * Get statistics on all filesystems. 788 */ 789 int 790 freebsd11_getfsstat(struct thread *td, struct freebsd11_getfsstat_args *uap) 791 { 792 return (kern_freebsd11_getfsstat(td, uap->buf, uap->bufsize, uap->mode)); 793 } 794 795 int 796 kern_freebsd11_getfsstat(struct thread *td, struct freebsd11_statfs * ubuf, 797 long bufsize, int mode) 798 { 799 struct freebsd11_statfs osb; 800 struct statfs *buf, *sp; 801 size_t count, size; 802 int error; 803 804 if (bufsize < 0) 805 return (EINVAL); 806 807 count = bufsize / sizeof(struct ostatfs); 808 size = count * sizeof(struct statfs); 809 error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, mode); 810 if (error == 0) 811 td->td_retval[0] = count; 812 if (size > 0) { 813 sp = buf; 814 while (count > 0 && error == 0) { 815 freebsd11_cvtstatfs(sp, &osb); 816 error = copyout(&osb, ubuf, sizeof(osb)); 817 sp++; 818 ubuf++; 819 count--; 820 } 821 free(buf, M_STATFS); 822 } 823 return (error); 824 } 825 826 /* 827 * Implement fstatfs() for (NFS) file handles. 828 */ 829 int 830 freebsd11_fhstatfs(struct thread *td, struct freebsd11_fhstatfs_args *uap) 831 { 832 struct freebsd11_statfs osb; 833 struct statfs *sfp; 834 fhandle_t fh; 835 int error; 836 837 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 838 if (error) 839 return (error); 840 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 841 error = kern_fhstatfs(td, fh, sfp); 842 if (error == 0) { 843 freebsd11_cvtstatfs(sfp, &osb); 844 error = copyout(&osb, uap->buf, sizeof(osb)); 845 } 846 free(sfp, M_STATFS); 847 return (error); 848 } 849 850 /* 851 * Convert a new format statfs structure to an old format statfs structure. 852 */ 853 static void 854 freebsd11_cvtstatfs(struct statfs *nsp, struct freebsd11_statfs *osp) 855 { 856 857 bzero(osp, sizeof(*osp)); 858 osp->f_version = FREEBSD11_STATFS_VERSION; 859 osp->f_type = nsp->f_type; 860 osp->f_flags = nsp->f_flags; 861 osp->f_bsize = nsp->f_bsize; 862 osp->f_iosize = nsp->f_iosize; 863 osp->f_blocks = nsp->f_blocks; 864 osp->f_bfree = nsp->f_bfree; 865 osp->f_bavail = nsp->f_bavail; 866 osp->f_files = nsp->f_files; 867 osp->f_ffree = nsp->f_ffree; 868 osp->f_syncwrites = nsp->f_syncwrites; 869 osp->f_asyncwrites = nsp->f_asyncwrites; 870 osp->f_syncreads = nsp->f_syncreads; 871 osp->f_asyncreads = nsp->f_asyncreads; 872 osp->f_namemax = nsp->f_namemax; 873 osp->f_owner = nsp->f_owner; 874 osp->f_fsid = nsp->f_fsid; 875 strlcpy(osp->f_fstypename, nsp->f_fstypename, 876 MIN(MFSNAMELEN, sizeof(osp->f_fstypename))); 877 strlcpy(osp->f_mntonname, nsp->f_mntonname, 878 MIN(MNAMELEN, sizeof(osp->f_mntonname))); 879 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 880 MIN(MNAMELEN, sizeof(osp->f_mntfromname))); 881 } 882 #endif /* COMPAT_FREEBSD11 */ 883 884 /* 885 * Change current working directory to a given file descriptor. 886 */ 887 #ifndef _SYS_SYSPROTO_H_ 888 struct fchdir_args { 889 int fd; 890 }; 891 #endif 892 int 893 sys_fchdir(struct thread *td, struct fchdir_args *uap) 894 { 895 struct vnode *vp, *tdp; 896 struct mount *mp; 897 struct file *fp; 898 int error; 899 900 AUDIT_ARG_FD(uap->fd); 901 error = getvnode_path(td, uap->fd, &cap_fchdir_rights, 902 &fp); 903 if (error != 0) 904 return (error); 905 vp = fp->f_vnode; 906 vrefact(vp); 907 fdrop(fp, td); 908 vn_lock(vp, LK_SHARED | LK_RETRY); 909 AUDIT_ARG_VNODE1(vp); 910 error = change_dir(vp, td); 911 while (!error && (mp = vp->v_mountedhere) != NULL) { 912 if (vfs_busy(mp, 0)) 913 continue; 914 error = VFS_ROOT(mp, LK_SHARED, &tdp); 915 vfs_unbusy(mp); 916 if (error != 0) 917 break; 918 vput(vp); 919 vp = tdp; 920 } 921 if (error != 0) { 922 vput(vp); 923 return (error); 924 } 925 VOP_UNLOCK(vp); 926 pwd_chdir(td, vp); 927 return (0); 928 } 929 930 /* 931 * Change current working directory (``.''). 932 */ 933 #ifndef _SYS_SYSPROTO_H_ 934 struct chdir_args { 935 char *path; 936 }; 937 #endif 938 int 939 sys_chdir(struct thread *td, struct chdir_args *uap) 940 { 941 942 return (kern_chdir(td, uap->path, UIO_USERSPACE)); 943 } 944 945 int 946 kern_chdir(struct thread *td, const char *path, enum uio_seg pathseg) 947 { 948 struct nameidata nd; 949 int error; 950 951 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 952 pathseg, path); 953 if ((error = namei(&nd)) != 0) 954 return (error); 955 if ((error = change_dir(nd.ni_vp, td)) != 0) { 956 vput(nd.ni_vp); 957 NDFREE_PNBUF(&nd); 958 return (error); 959 } 960 VOP_UNLOCK(nd.ni_vp); 961 NDFREE_PNBUF(&nd); 962 pwd_chdir(td, nd.ni_vp); 963 return (0); 964 } 965 966 static int unprivileged_chroot = 0; 967 SYSCTL_INT(_security_bsd, OID_AUTO, unprivileged_chroot, CTLFLAG_RW, 968 &unprivileged_chroot, 0, 969 "Unprivileged processes can use chroot(2)"); 970 971 /* 972 * Takes locked vnode, unlocks it before returning. 973 */ 974 static int 975 kern_chroot(struct thread *td, struct vnode *vp) 976 { 977 struct proc *p; 978 int error; 979 980 error = priv_check(td, PRIV_VFS_CHROOT); 981 if (error != 0) { 982 p = td->td_proc; 983 PROC_LOCK(p); 984 if (unprivileged_chroot == 0 || 985 (p->p_flag2 & P2_NO_NEW_PRIVS) == 0) { 986 PROC_UNLOCK(p); 987 goto e_vunlock; 988 } 989 PROC_UNLOCK(p); 990 } 991 992 error = change_dir(vp, td); 993 if (error != 0) 994 goto e_vunlock; 995 #ifdef MAC 996 error = mac_vnode_check_chroot(td->td_ucred, vp); 997 if (error != 0) 998 goto e_vunlock; 999 #endif 1000 VOP_UNLOCK(vp); 1001 error = pwd_chroot(td, vp); 1002 vrele(vp); 1003 return (error); 1004 e_vunlock: 1005 vput(vp); 1006 return (error); 1007 } 1008 1009 /* 1010 * Change notion of root (``/'') directory. 1011 */ 1012 #ifndef _SYS_SYSPROTO_H_ 1013 struct chroot_args { 1014 char *path; 1015 }; 1016 #endif 1017 int 1018 sys_chroot(struct thread *td, struct chroot_args *uap) 1019 { 1020 struct nameidata nd; 1021 int error; 1022 1023 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 1024 UIO_USERSPACE, uap->path); 1025 error = namei(&nd); 1026 if (error != 0) 1027 return (error); 1028 NDFREE_PNBUF(&nd); 1029 error = kern_chroot(td, nd.ni_vp); 1030 return (error); 1031 } 1032 1033 /* 1034 * Change notion of root directory to a given file descriptor. 1035 */ 1036 #ifndef _SYS_SYSPROTO_H_ 1037 struct fchroot_args { 1038 int fd; 1039 }; 1040 #endif 1041 int 1042 sys_fchroot(struct thread *td, struct fchroot_args *uap) 1043 { 1044 struct vnode *vp; 1045 struct file *fp; 1046 int error; 1047 1048 error = getvnode_path(td, uap->fd, &cap_fchroot_rights, &fp); 1049 if (error != 0) 1050 return (error); 1051 vp = fp->f_vnode; 1052 vrefact(vp); 1053 fdrop(fp, td); 1054 vn_lock(vp, LK_SHARED | LK_RETRY); 1055 error = kern_chroot(td, vp); 1056 return (error); 1057 } 1058 1059 /* 1060 * Common routine for chroot and chdir. Callers must provide a locked vnode 1061 * instance. 1062 */ 1063 int 1064 change_dir(struct vnode *vp, struct thread *td) 1065 { 1066 #ifdef MAC 1067 int error; 1068 #endif 1069 1070 ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked"); 1071 if (vp->v_type != VDIR) 1072 return (ENOTDIR); 1073 #ifdef MAC 1074 error = mac_vnode_check_chdir(td->td_ucred, vp); 1075 if (error != 0) 1076 return (error); 1077 #endif 1078 return (VOP_ACCESS(vp, VEXEC, td->td_ucred, td)); 1079 } 1080 1081 static __inline void 1082 flags_to_rights(int flags, cap_rights_t *rightsp) 1083 { 1084 if (flags & O_EXEC) { 1085 cap_rights_set_one(rightsp, CAP_FEXECVE); 1086 if (flags & O_PATH) 1087 return; 1088 } else { 1089 switch ((flags & O_ACCMODE)) { 1090 case O_RDONLY: 1091 cap_rights_set_one(rightsp, CAP_READ); 1092 break; 1093 case O_RDWR: 1094 cap_rights_set_one(rightsp, CAP_READ); 1095 /* FALLTHROUGH */ 1096 case O_WRONLY: 1097 cap_rights_set_one(rightsp, CAP_WRITE); 1098 if (!(flags & (O_APPEND | O_TRUNC))) 1099 cap_rights_set_one(rightsp, CAP_SEEK); 1100 break; 1101 } 1102 } 1103 1104 if (flags & O_CREAT) 1105 cap_rights_set_one(rightsp, CAP_CREATE); 1106 1107 if (flags & O_TRUNC) 1108 cap_rights_set_one(rightsp, CAP_FTRUNCATE); 1109 1110 if (flags & (O_SYNC | O_FSYNC)) 1111 cap_rights_set_one(rightsp, CAP_FSYNC); 1112 1113 if (flags & (O_EXLOCK | O_SHLOCK)) 1114 cap_rights_set_one(rightsp, CAP_FLOCK); 1115 } 1116 1117 /* 1118 * Check permissions, allocate an open file structure, and call the device 1119 * open routine if any. 1120 */ 1121 #ifndef _SYS_SYSPROTO_H_ 1122 struct open_args { 1123 char *path; 1124 int flags; 1125 int mode; 1126 }; 1127 #endif 1128 int 1129 sys_open(struct thread *td, struct open_args *uap) 1130 { 1131 1132 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1133 uap->flags, uap->mode)); 1134 } 1135 1136 #ifndef _SYS_SYSPROTO_H_ 1137 struct openat_args { 1138 int fd; 1139 char *path; 1140 int flag; 1141 int mode; 1142 }; 1143 #endif 1144 int 1145 sys_openat(struct thread *td, struct openat_args *uap) 1146 { 1147 1148 AUDIT_ARG_FD(uap->fd); 1149 return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 1150 uap->mode)); 1151 } 1152 1153 /* 1154 * If fpp != NULL, opened file is not installed into the file 1155 * descriptor table, instead it is returned in *fpp. This is 1156 * incompatible with fdopen(), in which case we return EINVAL. 1157 */ 1158 static int 1159 openatfp(struct thread *td, int dirfd, const char *path, 1160 enum uio_seg pathseg, int flags, int mode, struct file **fpp) 1161 { 1162 struct proc *p; 1163 struct filedesc *fdp; 1164 struct pwddesc *pdp; 1165 struct file *fp; 1166 struct vnode *vp; 1167 struct filecaps *fcaps; 1168 struct nameidata nd; 1169 cap_rights_t rights; 1170 int cmode, error, indx; 1171 1172 indx = -1; 1173 p = td->td_proc; 1174 fdp = p->p_fd; 1175 pdp = p->p_pd; 1176 1177 AUDIT_ARG_FFLAGS(flags); 1178 AUDIT_ARG_MODE(mode); 1179 cap_rights_init_one(&rights, CAP_LOOKUP); 1180 flags_to_rights(flags, &rights); 1181 1182 /* 1183 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags 1184 * may be specified. On the other hand, for O_PATH any mode 1185 * except O_EXEC is ignored. 1186 */ 1187 if ((flags & O_PATH) != 0) { 1188 flags &= ~O_ACCMODE; 1189 } else if ((flags & O_EXEC) != 0) { 1190 if (flags & O_ACCMODE) 1191 return (EINVAL); 1192 } else if ((flags & O_ACCMODE) == O_ACCMODE) { 1193 return (EINVAL); 1194 } else { 1195 flags = FFLAGS(flags); 1196 } 1197 1198 /* 1199 * Allocate a file structure. The descriptor to reference it 1200 * is allocated and used by finstall_refed() below. 1201 */ 1202 error = falloc_noinstall(td, &fp); 1203 if (error != 0) 1204 return (error); 1205 /* Set the flags early so the finit in devfs can pick them up. */ 1206 fp->f_flag = flags & FMASK; 1207 cmode = ((mode & ~pdp->pd_cmask) & ALLPERMS) & ~S_ISTXT; 1208 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1 | WANTIOCTLCAPS, 1209 pathseg, path, dirfd, &rights); 1210 td->td_dupfd = -1; /* XXX check for fdopen */ 1211 error = vn_open_cred(&nd, &flags, cmode, VN_OPEN_WANTIOCTLCAPS, 1212 td->td_ucred, fp); 1213 if (error != 0) { 1214 /* 1215 * If the vn_open replaced the method vector, something 1216 * wonderous happened deep below and we just pass it up 1217 * pretending we know what we do. 1218 */ 1219 if (error == ENXIO && fp->f_ops != &badfileops) { 1220 MPASS((flags & O_PATH) == 0); 1221 goto success; 1222 } 1223 1224 /* 1225 * Handle special fdopen() case. bleh. 1226 * 1227 * Don't do this for relative (capability) lookups; we don't 1228 * understand exactly what would happen, and we don't think 1229 * that it ever should. 1230 */ 1231 if ((nd.ni_resflags & NIRES_STRICTREL) == 0 && 1232 (error == ENODEV || error == ENXIO) && 1233 td->td_dupfd >= 0) { 1234 MPASS(fpp == NULL); 1235 error = dupfdopen(td, fdp, td->td_dupfd, flags, error, 1236 &indx); 1237 if (error == 0) 1238 goto success; 1239 } 1240 1241 goto bad; 1242 } 1243 td->td_dupfd = 0; 1244 NDFREE_PNBUF(&nd); 1245 vp = nd.ni_vp; 1246 1247 /* 1248 * Store the vnode, for any f_type. Typically, the vnode use 1249 * count is decremented by direct call to vn_closefile() for 1250 * files that switched type in the cdevsw fdopen() method. 1251 */ 1252 fp->f_vnode = vp; 1253 1254 /* 1255 * If the file wasn't claimed by devfs bind it to the normal 1256 * vnode operations here. 1257 */ 1258 if (fp->f_ops == &badfileops) { 1259 KASSERT(vp->v_type != VFIFO || (flags & O_PATH) != 0, 1260 ("Unexpected fifo fp %p vp %p", fp, vp)); 1261 if ((flags & O_PATH) != 0) { 1262 finit(fp, (flags & FMASK) | (fp->f_flag & FKQALLOWED), 1263 DTYPE_VNODE, NULL, &path_fileops); 1264 } else { 1265 finit_vnode(fp, flags, NULL, &vnops); 1266 } 1267 } 1268 1269 VOP_UNLOCK(vp); 1270 if (flags & O_TRUNC) { 1271 error = fo_truncate(fp, 0, td->td_ucred, td); 1272 if (error != 0) 1273 goto bad; 1274 } 1275 success: 1276 if (fpp != NULL) { 1277 MPASS(error == 0); 1278 NDFREE_IOCTLCAPS(&nd); 1279 *fpp = fp; 1280 return (0); 1281 } 1282 1283 /* 1284 * If we haven't already installed the FD (for dupfdopen), do so now. 1285 */ 1286 if (indx == -1) { 1287 #ifdef CAPABILITIES 1288 if ((nd.ni_resflags & NIRES_STRICTREL) != 0) 1289 fcaps = &nd.ni_filecaps; 1290 else 1291 #endif 1292 fcaps = NULL; 1293 error = finstall_refed(td, fp, &indx, flags, fcaps); 1294 /* On success finstall_refed() consumes fcaps. */ 1295 if (error != 0) { 1296 goto bad; 1297 } 1298 } else { 1299 NDFREE_IOCTLCAPS(&nd); 1300 falloc_abort(td, fp); 1301 } 1302 1303 td->td_retval[0] = indx; 1304 return (0); 1305 bad: 1306 KASSERT(indx == -1, ("indx=%d, should be -1", indx)); 1307 NDFREE_IOCTLCAPS(&nd); 1308 falloc_abort(td, fp); 1309 return (error); 1310 } 1311 1312 int 1313 kern_openat(struct thread *td, int dirfd, const char *path, 1314 enum uio_seg pathseg, int flags, int mode) 1315 { 1316 return (openatfp(td, dirfd, path, pathseg, flags, mode, NULL)); 1317 } 1318 1319 int 1320 kern_openatfp(struct thread *td, int dirfd, const char *path, 1321 enum uio_seg pathseg, int flags, int mode, struct file **fpp) 1322 { 1323 int error, old_dupfd; 1324 1325 old_dupfd = td->td_dupfd; 1326 td->td_dupfd = -1; 1327 error = openatfp(td, dirfd, path, pathseg, flags, mode, fpp); 1328 td->td_dupfd = old_dupfd; 1329 return (error); 1330 } 1331 1332 #ifdef COMPAT_43 1333 /* 1334 * Create a file. 1335 */ 1336 #ifndef _SYS_SYSPROTO_H_ 1337 struct ocreat_args { 1338 char *path; 1339 int mode; 1340 }; 1341 #endif 1342 int 1343 ocreat(struct thread *td, struct ocreat_args *uap) 1344 { 1345 1346 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1347 O_WRONLY | O_CREAT | O_TRUNC, uap->mode)); 1348 } 1349 #endif /* COMPAT_43 */ 1350 1351 /* 1352 * Create a special file. 1353 */ 1354 #ifndef _SYS_SYSPROTO_H_ 1355 struct mknodat_args { 1356 int fd; 1357 char *path; 1358 mode_t mode; 1359 dev_t dev; 1360 }; 1361 #endif 1362 int 1363 sys_mknodat(struct thread *td, struct mknodat_args *uap) 1364 { 1365 1366 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1367 uap->dev)); 1368 } 1369 1370 #if defined(COMPAT_FREEBSD11) 1371 int 1372 freebsd11_mknod(struct thread *td, 1373 struct freebsd11_mknod_args *uap) 1374 { 1375 1376 return (kern_mknodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1377 uap->mode, uap->dev)); 1378 } 1379 1380 int 1381 freebsd11_mknodat(struct thread *td, 1382 struct freebsd11_mknodat_args *uap) 1383 { 1384 1385 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1386 uap->dev)); 1387 } 1388 #endif /* COMPAT_FREEBSD11 */ 1389 1390 int 1391 kern_mknodat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, 1392 int mode, dev_t dev) 1393 { 1394 struct vnode *vp; 1395 struct mount *mp; 1396 struct vattr vattr; 1397 struct nameidata nd; 1398 int error, whiteout = 0; 1399 1400 AUDIT_ARG_MODE(mode); 1401 AUDIT_ARG_DEV(dev); 1402 switch (mode & S_IFMT) { 1403 case S_IFCHR: 1404 case S_IFBLK: 1405 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 1406 if (error == 0 && dev == VNOVAL) 1407 error = EINVAL; 1408 break; 1409 case S_IFWHT: 1410 error = priv_check(td, PRIV_VFS_MKNOD_WHT); 1411 break; 1412 case S_IFIFO: 1413 if (dev == 0) 1414 return (kern_mkfifoat(td, fd, path, pathseg, mode)); 1415 /* FALLTHROUGH */ 1416 default: 1417 error = EINVAL; 1418 break; 1419 } 1420 if (error != 0) 1421 return (error); 1422 NDPREINIT(&nd); 1423 restart: 1424 bwillwrite(); 1425 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | AUDITVNODE1 | NOCACHE, 1426 pathseg, path, fd, &cap_mknodat_rights); 1427 if ((error = namei(&nd)) != 0) 1428 return (error); 1429 vp = nd.ni_vp; 1430 if (vp != NULL) { 1431 NDFREE_PNBUF(&nd); 1432 if (vp == nd.ni_dvp) 1433 vrele(nd.ni_dvp); 1434 else 1435 vput(nd.ni_dvp); 1436 vrele(vp); 1437 return (EEXIST); 1438 } else { 1439 VATTR_NULL(&vattr); 1440 vattr.va_mode = (mode & ALLPERMS) & 1441 ~td->td_proc->p_pd->pd_cmask; 1442 vattr.va_rdev = dev; 1443 whiteout = 0; 1444 1445 switch (mode & S_IFMT) { 1446 case S_IFCHR: 1447 vattr.va_type = VCHR; 1448 break; 1449 case S_IFBLK: 1450 vattr.va_type = VBLK; 1451 break; 1452 case S_IFWHT: 1453 whiteout = 1; 1454 break; 1455 default: 1456 panic("kern_mknod: invalid mode"); 1457 } 1458 } 1459 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1460 NDFREE_PNBUF(&nd); 1461 vput(nd.ni_dvp); 1462 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH)) != 0) 1463 return (error); 1464 goto restart; 1465 } 1466 #ifdef MAC 1467 if (error == 0 && !whiteout) 1468 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, 1469 &nd.ni_cnd, &vattr); 1470 #endif 1471 if (error == 0) { 1472 if (whiteout) 1473 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1474 else { 1475 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1476 &nd.ni_cnd, &vattr); 1477 } 1478 } 1479 VOP_VPUT_PAIR(nd.ni_dvp, error == 0 && !whiteout ? &nd.ni_vp : NULL, 1480 true); 1481 vn_finished_write(mp); 1482 NDFREE_PNBUF(&nd); 1483 if (error == ERELOOKUP) 1484 goto restart; 1485 return (error); 1486 } 1487 1488 /* 1489 * Create a named pipe. 1490 */ 1491 #ifndef _SYS_SYSPROTO_H_ 1492 struct mkfifo_args { 1493 char *path; 1494 int mode; 1495 }; 1496 #endif 1497 int 1498 sys_mkfifo(struct thread *td, struct mkfifo_args *uap) 1499 { 1500 1501 return (kern_mkfifoat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1502 uap->mode)); 1503 } 1504 1505 #ifndef _SYS_SYSPROTO_H_ 1506 struct mkfifoat_args { 1507 int fd; 1508 char *path; 1509 mode_t mode; 1510 }; 1511 #endif 1512 int 1513 sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap) 1514 { 1515 1516 return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE, 1517 uap->mode)); 1518 } 1519 1520 int 1521 kern_mkfifoat(struct thread *td, int fd, const char *path, 1522 enum uio_seg pathseg, int mode) 1523 { 1524 struct mount *mp; 1525 struct vattr vattr; 1526 struct nameidata nd; 1527 int error; 1528 1529 AUDIT_ARG_MODE(mode); 1530 NDPREINIT(&nd); 1531 restart: 1532 bwillwrite(); 1533 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | AUDITVNODE1 | NOCACHE, 1534 pathseg, path, fd, &cap_mkfifoat_rights); 1535 if ((error = namei(&nd)) != 0) 1536 return (error); 1537 if (nd.ni_vp != NULL) { 1538 NDFREE_PNBUF(&nd); 1539 if (nd.ni_vp == nd.ni_dvp) 1540 vrele(nd.ni_dvp); 1541 else 1542 vput(nd.ni_dvp); 1543 vrele(nd.ni_vp); 1544 return (EEXIST); 1545 } 1546 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1547 NDFREE_PNBUF(&nd); 1548 vput(nd.ni_dvp); 1549 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH)) != 0) 1550 return (error); 1551 goto restart; 1552 } 1553 VATTR_NULL(&vattr); 1554 vattr.va_type = VFIFO; 1555 vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_pd->pd_cmask; 1556 #ifdef MAC 1557 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1558 &vattr); 1559 if (error != 0) 1560 goto out; 1561 #endif 1562 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 1563 #ifdef MAC 1564 out: 1565 #endif 1566 VOP_VPUT_PAIR(nd.ni_dvp, error == 0 ? &nd.ni_vp : NULL, true); 1567 vn_finished_write(mp); 1568 NDFREE_PNBUF(&nd); 1569 if (error == ERELOOKUP) 1570 goto restart; 1571 return (error); 1572 } 1573 1574 /* 1575 * Make a hard file link. 1576 */ 1577 #ifndef _SYS_SYSPROTO_H_ 1578 struct link_args { 1579 char *path; 1580 char *link; 1581 }; 1582 #endif 1583 int 1584 sys_link(struct thread *td, struct link_args *uap) 1585 { 1586 1587 return (kern_linkat(td, AT_FDCWD, AT_FDCWD, uap->path, uap->link, 1588 UIO_USERSPACE, AT_SYMLINK_FOLLOW)); 1589 } 1590 1591 #ifndef _SYS_SYSPROTO_H_ 1592 struct linkat_args { 1593 int fd1; 1594 char *path1; 1595 int fd2; 1596 char *path2; 1597 int flag; 1598 }; 1599 #endif 1600 int 1601 sys_linkat(struct thread *td, struct linkat_args *uap) 1602 { 1603 1604 return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2, 1605 UIO_USERSPACE, uap->flag)); 1606 } 1607 1608 int hardlink_check_uid = 0; 1609 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 1610 &hardlink_check_uid, 0, 1611 "Unprivileged processes cannot create hard links to files owned by other " 1612 "users"); 1613 static int hardlink_check_gid = 0; 1614 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 1615 &hardlink_check_gid, 0, 1616 "Unprivileged processes cannot create hard links to files owned by other " 1617 "groups"); 1618 1619 static int 1620 can_hardlink(struct vnode *vp, struct ucred *cred) 1621 { 1622 struct vattr va; 1623 int error; 1624 1625 if (!hardlink_check_uid && !hardlink_check_gid) 1626 return (0); 1627 1628 error = VOP_GETATTR(vp, &va, cred); 1629 if (error != 0) 1630 return (error); 1631 1632 if (hardlink_check_uid && cred->cr_uid != va.va_uid) { 1633 error = priv_check_cred(cred, PRIV_VFS_LINK); 1634 if (error != 0) 1635 return (error); 1636 } 1637 1638 if (hardlink_check_gid && !groupmember(va.va_gid, cred)) { 1639 error = priv_check_cred(cred, PRIV_VFS_LINK); 1640 if (error != 0) 1641 return (error); 1642 } 1643 1644 return (0); 1645 } 1646 1647 int 1648 kern_linkat(struct thread *td, int fd1, int fd2, const char *path1, 1649 const char *path2, enum uio_seg segflag, int flag) 1650 { 1651 struct nameidata nd; 1652 int error; 1653 1654 if ((flag & ~(AT_SYMLINK_FOLLOW | AT_RESOLVE_BENEATH | 1655 AT_EMPTY_PATH)) != 0) 1656 return (EINVAL); 1657 1658 NDPREINIT(&nd); 1659 do { 1660 bwillwrite(); 1661 NDINIT_ATRIGHTS(&nd, LOOKUP, AUDITVNODE1 | at2cnpflags(flag, 1662 AT_SYMLINK_FOLLOW | AT_RESOLVE_BENEATH | AT_EMPTY_PATH), 1663 segflag, path1, fd1, &cap_linkat_source_rights); 1664 if ((error = namei(&nd)) != 0) 1665 return (error); 1666 NDFREE_PNBUF(&nd); 1667 if ((nd.ni_resflags & NIRES_EMPTYPATH) != 0) { 1668 error = priv_check(td, PRIV_VFS_FHOPEN); 1669 if (error != 0) { 1670 vrele(nd.ni_vp); 1671 return (error); 1672 } 1673 } 1674 error = kern_linkat_vp(td, nd.ni_vp, fd2, path2, segflag); 1675 } while (error == EAGAIN || error == ERELOOKUP); 1676 return (error); 1677 } 1678 1679 static int 1680 kern_linkat_vp(struct thread *td, struct vnode *vp, int fd, const char *path, 1681 enum uio_seg segflag) 1682 { 1683 struct nameidata nd; 1684 struct mount *mp; 1685 int error; 1686 1687 if (vp->v_type == VDIR) { 1688 vrele(vp); 1689 return (EPERM); /* POSIX */ 1690 } 1691 NDINIT_ATRIGHTS(&nd, CREATE, 1692 LOCKPARENT | AUDITVNODE2 | NOCACHE, segflag, path, fd, 1693 &cap_linkat_target_rights); 1694 if ((error = namei(&nd)) == 0) { 1695 if (nd.ni_vp != NULL) { 1696 NDFREE_PNBUF(&nd); 1697 if (nd.ni_dvp == nd.ni_vp) 1698 vrele(nd.ni_dvp); 1699 else 1700 vput(nd.ni_dvp); 1701 vrele(nd.ni_vp); 1702 vrele(vp); 1703 return (EEXIST); 1704 } else if (nd.ni_dvp->v_mount != vp->v_mount) { 1705 /* 1706 * Cross-device link. No need to recheck 1707 * vp->v_type, since it cannot change, except 1708 * to VBAD. 1709 */ 1710 NDFREE_PNBUF(&nd); 1711 vput(nd.ni_dvp); 1712 vrele(vp); 1713 return (EXDEV); 1714 } else if (vn_lock(vp, LK_EXCLUSIVE) == 0) { 1715 error = can_hardlink(vp, td->td_ucred); 1716 #ifdef MAC 1717 if (error == 0) 1718 error = mac_vnode_check_link(td->td_ucred, 1719 nd.ni_dvp, vp, &nd.ni_cnd); 1720 #endif 1721 if (error != 0) { 1722 vput(vp); 1723 vput(nd.ni_dvp); 1724 NDFREE_PNBUF(&nd); 1725 return (error); 1726 } 1727 error = vn_start_write(vp, &mp, V_NOWAIT); 1728 if (error != 0) { 1729 vput(vp); 1730 vput(nd.ni_dvp); 1731 NDFREE_PNBUF(&nd); 1732 error = vn_start_write(NULL, &mp, 1733 V_XSLEEP | V_PCATCH); 1734 if (error != 0) 1735 return (error); 1736 return (EAGAIN); 1737 } 1738 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 1739 VOP_VPUT_PAIR(nd.ni_dvp, &vp, true); 1740 vn_finished_write(mp); 1741 NDFREE_PNBUF(&nd); 1742 vp = NULL; 1743 } else { 1744 vput(nd.ni_dvp); 1745 NDFREE_PNBUF(&nd); 1746 vrele(vp); 1747 return (EAGAIN); 1748 } 1749 } 1750 if (vp != NULL) 1751 vrele(vp); 1752 return (error); 1753 } 1754 1755 /* 1756 * Make a symbolic link. 1757 */ 1758 #ifndef _SYS_SYSPROTO_H_ 1759 struct symlink_args { 1760 char *path; 1761 char *link; 1762 }; 1763 #endif 1764 int 1765 sys_symlink(struct thread *td, struct symlink_args *uap) 1766 { 1767 1768 return (kern_symlinkat(td, uap->path, AT_FDCWD, uap->link, 1769 UIO_USERSPACE)); 1770 } 1771 1772 #ifndef _SYS_SYSPROTO_H_ 1773 struct symlinkat_args { 1774 char *path; 1775 int fd; 1776 char *path2; 1777 }; 1778 #endif 1779 int 1780 sys_symlinkat(struct thread *td, struct symlinkat_args *uap) 1781 { 1782 1783 return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2, 1784 UIO_USERSPACE)); 1785 } 1786 1787 int 1788 kern_symlinkat(struct thread *td, const char *path1, int fd, const char *path2, 1789 enum uio_seg segflg) 1790 { 1791 struct mount *mp; 1792 struct vattr vattr; 1793 const char *syspath; 1794 char *tmppath; 1795 struct nameidata nd; 1796 int error; 1797 1798 if (segflg == UIO_SYSSPACE) { 1799 syspath = path1; 1800 } else { 1801 tmppath = uma_zalloc(namei_zone, M_WAITOK); 1802 if ((error = copyinstr(path1, tmppath, MAXPATHLEN, NULL)) != 0) 1803 goto out; 1804 syspath = tmppath; 1805 } 1806 AUDIT_ARG_TEXT(syspath); 1807 NDPREINIT(&nd); 1808 restart: 1809 bwillwrite(); 1810 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | AUDITVNODE1 | NOCACHE, segflg, 1811 path2, fd, &cap_symlinkat_rights); 1812 if ((error = namei(&nd)) != 0) 1813 goto out; 1814 if (nd.ni_vp) { 1815 NDFREE_PNBUF(&nd); 1816 if (nd.ni_vp == nd.ni_dvp) 1817 vrele(nd.ni_dvp); 1818 else 1819 vput(nd.ni_dvp); 1820 vrele(nd.ni_vp); 1821 nd.ni_vp = NULL; 1822 error = EEXIST; 1823 goto out; 1824 } 1825 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1826 NDFREE_PNBUF(&nd); 1827 vput(nd.ni_dvp); 1828 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH)) != 0) 1829 goto out; 1830 goto restart; 1831 } 1832 VATTR_NULL(&vattr); 1833 vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_pd->pd_cmask; 1834 #ifdef MAC 1835 vattr.va_type = VLNK; 1836 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1837 &vattr); 1838 if (error != 0) 1839 goto out2; 1840 #endif 1841 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath); 1842 #ifdef MAC 1843 out2: 1844 #endif 1845 VOP_VPUT_PAIR(nd.ni_dvp, error == 0 ? &nd.ni_vp : NULL, true); 1846 vn_finished_write(mp); 1847 NDFREE_PNBUF(&nd); 1848 if (error == ERELOOKUP) 1849 goto restart; 1850 out: 1851 if (segflg != UIO_SYSSPACE) 1852 uma_zfree(namei_zone, tmppath); 1853 return (error); 1854 } 1855 1856 /* 1857 * Delete a whiteout from the filesystem. 1858 */ 1859 #ifndef _SYS_SYSPROTO_H_ 1860 struct undelete_args { 1861 char *path; 1862 }; 1863 #endif 1864 int 1865 sys_undelete(struct thread *td, struct undelete_args *uap) 1866 { 1867 struct mount *mp; 1868 struct nameidata nd; 1869 int error; 1870 1871 NDPREINIT(&nd); 1872 restart: 1873 bwillwrite(); 1874 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | AUDITVNODE1, 1875 UIO_USERSPACE, uap->path); 1876 error = namei(&nd); 1877 if (error != 0) 1878 return (error); 1879 1880 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 1881 NDFREE_PNBUF(&nd); 1882 if (nd.ni_vp == nd.ni_dvp) 1883 vrele(nd.ni_dvp); 1884 else 1885 vput(nd.ni_dvp); 1886 if (nd.ni_vp) 1887 vrele(nd.ni_vp); 1888 return (EEXIST); 1889 } 1890 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1891 NDFREE_PNBUF(&nd); 1892 vput(nd.ni_dvp); 1893 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH)) != 0) 1894 return (error); 1895 goto restart; 1896 } 1897 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE); 1898 NDFREE_PNBUF(&nd); 1899 vput(nd.ni_dvp); 1900 vn_finished_write(mp); 1901 if (error == ERELOOKUP) 1902 goto restart; 1903 return (error); 1904 } 1905 1906 /* 1907 * Delete a name from the filesystem. 1908 */ 1909 #ifndef _SYS_SYSPROTO_H_ 1910 struct unlink_args { 1911 char *path; 1912 }; 1913 #endif 1914 int 1915 sys_unlink(struct thread *td, struct unlink_args *uap) 1916 { 1917 1918 return (kern_funlinkat(td, AT_FDCWD, uap->path, FD_NONE, UIO_USERSPACE, 1919 0, 0)); 1920 } 1921 1922 static int 1923 kern_funlinkat_ex(struct thread *td, int dfd, const char *path, int fd, 1924 int flag, enum uio_seg pathseg, ino_t oldinum) 1925 { 1926 1927 if ((flag & ~(AT_REMOVEDIR | AT_RESOLVE_BENEATH)) != 0) 1928 return (EINVAL); 1929 1930 if ((flag & AT_REMOVEDIR) != 0) 1931 return (kern_frmdirat(td, dfd, path, fd, UIO_USERSPACE, 0)); 1932 1933 return (kern_funlinkat(td, dfd, path, fd, UIO_USERSPACE, 0, 0)); 1934 } 1935 1936 #ifndef _SYS_SYSPROTO_H_ 1937 struct unlinkat_args { 1938 int fd; 1939 char *path; 1940 int flag; 1941 }; 1942 #endif 1943 int 1944 sys_unlinkat(struct thread *td, struct unlinkat_args *uap) 1945 { 1946 1947 return (kern_funlinkat_ex(td, uap->fd, uap->path, FD_NONE, uap->flag, 1948 UIO_USERSPACE, 0)); 1949 } 1950 1951 #ifndef _SYS_SYSPROTO_H_ 1952 struct funlinkat_args { 1953 int dfd; 1954 const char *path; 1955 int fd; 1956 int flag; 1957 }; 1958 #endif 1959 int 1960 sys_funlinkat(struct thread *td, struct funlinkat_args *uap) 1961 { 1962 1963 return (kern_funlinkat_ex(td, uap->dfd, uap->path, uap->fd, uap->flag, 1964 UIO_USERSPACE, 0)); 1965 } 1966 1967 int 1968 kern_funlinkat(struct thread *td, int dfd, const char *path, int fd, 1969 enum uio_seg pathseg, int flag, ino_t oldinum) 1970 { 1971 struct mount *mp; 1972 struct file *fp; 1973 struct vnode *vp; 1974 struct nameidata nd; 1975 struct stat sb; 1976 int error; 1977 1978 fp = NULL; 1979 if (fd != FD_NONE) { 1980 error = getvnode_path(td, fd, &cap_no_rights, &fp); 1981 if (error != 0) 1982 return (error); 1983 } 1984 1985 NDPREINIT(&nd); 1986 restart: 1987 bwillwrite(); 1988 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1 | 1989 at2cnpflags(flag, AT_RESOLVE_BENEATH), 1990 pathseg, path, dfd, &cap_unlinkat_rights); 1991 if ((error = namei(&nd)) != 0) { 1992 if (error == EINVAL) 1993 error = EPERM; 1994 goto fdout; 1995 } 1996 vp = nd.ni_vp; 1997 if (vp->v_type == VDIR && oldinum == 0) { 1998 error = EPERM; /* POSIX */ 1999 } else if (oldinum != 0 && 2000 ((error = VOP_STAT(vp, &sb, td->td_ucred, NOCRED)) == 0) && 2001 sb.st_ino != oldinum) { 2002 error = EIDRM; /* Identifier removed */ 2003 } else if (fp != NULL && fp->f_vnode != vp) { 2004 if (VN_IS_DOOMED(fp->f_vnode)) 2005 error = EBADF; 2006 else 2007 error = EDEADLK; 2008 } else { 2009 /* 2010 * The root of a mounted filesystem cannot be deleted. 2011 * 2012 * XXX: can this only be a VDIR case? 2013 */ 2014 if (vp->v_vflag & VV_ROOT) 2015 error = EBUSY; 2016 } 2017 if (error == 0) { 2018 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 2019 NDFREE_PNBUF(&nd); 2020 vput(nd.ni_dvp); 2021 if (vp == nd.ni_dvp) 2022 vrele(vp); 2023 else 2024 vput(vp); 2025 if ((error = vn_start_write(NULL, &mp, 2026 V_XSLEEP | V_PCATCH)) != 0) { 2027 goto fdout; 2028 } 2029 goto restart; 2030 } 2031 #ifdef MAC 2032 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 2033 &nd.ni_cnd); 2034 if (error != 0) 2035 goto out; 2036 #endif 2037 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 2038 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 2039 #ifdef MAC 2040 out: 2041 #endif 2042 vn_finished_write(mp); 2043 } 2044 NDFREE_PNBUF(&nd); 2045 vput(nd.ni_dvp); 2046 if (vp == nd.ni_dvp) 2047 vrele(vp); 2048 else 2049 vput(vp); 2050 if (error == ERELOOKUP) 2051 goto restart; 2052 fdout: 2053 if (fp != NULL) 2054 fdrop(fp, td); 2055 return (error); 2056 } 2057 2058 /* 2059 * Reposition read/write file offset. 2060 */ 2061 #ifndef _SYS_SYSPROTO_H_ 2062 struct lseek_args { 2063 int fd; 2064 int pad; 2065 off_t offset; 2066 int whence; 2067 }; 2068 #endif 2069 int 2070 sys_lseek(struct thread *td, struct lseek_args *uap) 2071 { 2072 2073 return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); 2074 } 2075 2076 int 2077 kern_lseek(struct thread *td, int fd, off_t offset, int whence) 2078 { 2079 struct file *fp; 2080 int error; 2081 2082 AUDIT_ARG_FD(fd); 2083 error = fget(td, fd, &cap_seek_rights, &fp); 2084 if (error != 0) 2085 return (error); 2086 error = (fp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0 ? 2087 fo_seek(fp, offset, whence, td) : ESPIPE; 2088 fdrop(fp, td); 2089 return (error); 2090 } 2091 2092 #if defined(COMPAT_43) 2093 /* 2094 * Reposition read/write file offset. 2095 */ 2096 #ifndef _SYS_SYSPROTO_H_ 2097 struct olseek_args { 2098 int fd; 2099 long offset; 2100 int whence; 2101 }; 2102 #endif 2103 int 2104 olseek(struct thread *td, struct olseek_args *uap) 2105 { 2106 2107 return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); 2108 } 2109 #endif /* COMPAT_43 */ 2110 2111 #if defined(COMPAT_FREEBSD6) 2112 /* Version with the 'pad' argument */ 2113 int 2114 freebsd6_lseek(struct thread *td, struct freebsd6_lseek_args *uap) 2115 { 2116 2117 return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); 2118 } 2119 #endif 2120 2121 /* 2122 * Check access permissions using passed credentials. 2123 */ 2124 static int 2125 vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 2126 struct thread *td) 2127 { 2128 accmode_t accmode; 2129 int error; 2130 2131 /* Flags == 0 means only check for existence. */ 2132 if (user_flags == 0) 2133 return (0); 2134 2135 accmode = 0; 2136 if (user_flags & R_OK) 2137 accmode |= VREAD; 2138 if (user_flags & W_OK) 2139 accmode |= VWRITE; 2140 if (user_flags & X_OK) 2141 accmode |= VEXEC; 2142 #ifdef MAC 2143 error = mac_vnode_check_access(cred, vp, accmode); 2144 if (error != 0) 2145 return (error); 2146 #endif 2147 if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) 2148 error = VOP_ACCESS(vp, accmode, cred, td); 2149 return (error); 2150 } 2151 2152 /* 2153 * Check access permissions using "real" credentials. 2154 */ 2155 #ifndef _SYS_SYSPROTO_H_ 2156 struct access_args { 2157 char *path; 2158 int amode; 2159 }; 2160 #endif 2161 int 2162 sys_access(struct thread *td, struct access_args *uap) 2163 { 2164 2165 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2166 0, uap->amode)); 2167 } 2168 2169 #ifndef _SYS_SYSPROTO_H_ 2170 struct faccessat_args { 2171 int dirfd; 2172 char *path; 2173 int amode; 2174 int flag; 2175 } 2176 #endif 2177 int 2178 sys_faccessat(struct thread *td, struct faccessat_args *uap) 2179 { 2180 2181 return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 2182 uap->amode)); 2183 } 2184 2185 int 2186 kern_accessat(struct thread *td, int fd, const char *path, 2187 enum uio_seg pathseg, int flag, int amode) 2188 { 2189 struct ucred *cred, *usecred; 2190 struct vnode *vp; 2191 struct nameidata nd; 2192 int error; 2193 2194 if ((flag & ~(AT_EACCESS | AT_RESOLVE_BENEATH | AT_EMPTY_PATH | 2195 AT_SYMLINK_NOFOLLOW)) != 0) 2196 return (EINVAL); 2197 if (amode != F_OK && (amode & ~(R_OK | W_OK | X_OK)) != 0) 2198 return (EINVAL); 2199 2200 /* 2201 * Create and modify a temporary credential instead of one that 2202 * is potentially shared (if we need one). 2203 */ 2204 cred = td->td_ucred; 2205 if ((flag & AT_EACCESS) == 0 && 2206 ((cred->cr_uid != cred->cr_ruid || 2207 cred->cr_rgid != cred->cr_groups[0]))) { 2208 usecred = crdup(cred); 2209 usecred->cr_uid = cred->cr_ruid; 2210 usecred->cr_groups[0] = cred->cr_rgid; 2211 td->td_ucred = usecred; 2212 } else 2213 usecred = cred; 2214 AUDIT_ARG_VALUE(amode); 2215 NDINIT_ATRIGHTS(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | 2216 AUDITVNODE1 | at2cnpflags(flag, AT_RESOLVE_BENEATH | AT_SYMLINK_NOFOLLOW | 2217 AT_EMPTY_PATH), pathseg, path, fd, &cap_fstat_rights); 2218 if ((error = namei(&nd)) != 0) 2219 goto out; 2220 vp = nd.ni_vp; 2221 2222 error = vn_access(vp, amode, usecred, td); 2223 NDFREE_PNBUF(&nd); 2224 vput(vp); 2225 out: 2226 if (usecred != cred) { 2227 td->td_ucred = cred; 2228 crfree(usecred); 2229 } 2230 return (error); 2231 } 2232 2233 /* 2234 * Check access permissions using "effective" credentials. 2235 */ 2236 #ifndef _SYS_SYSPROTO_H_ 2237 struct eaccess_args { 2238 char *path; 2239 int amode; 2240 }; 2241 #endif 2242 int 2243 sys_eaccess(struct thread *td, struct eaccess_args *uap) 2244 { 2245 2246 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2247 AT_EACCESS, uap->amode)); 2248 } 2249 2250 #if defined(COMPAT_43) 2251 /* 2252 * Get file status; this version follows links. 2253 */ 2254 #ifndef _SYS_SYSPROTO_H_ 2255 struct ostat_args { 2256 char *path; 2257 struct ostat *ub; 2258 }; 2259 #endif 2260 int 2261 ostat(struct thread *td, struct ostat_args *uap) 2262 { 2263 struct stat sb; 2264 struct ostat osb; 2265 int error; 2266 2267 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, &sb); 2268 if (error != 0) 2269 return (error); 2270 cvtstat(&sb, &osb); 2271 return (copyout(&osb, uap->ub, sizeof (osb))); 2272 } 2273 2274 /* 2275 * Get file status; this version does not follow links. 2276 */ 2277 #ifndef _SYS_SYSPROTO_H_ 2278 struct olstat_args { 2279 char *path; 2280 struct ostat *ub; 2281 }; 2282 #endif 2283 int 2284 olstat(struct thread *td, struct olstat_args *uap) 2285 { 2286 struct stat sb; 2287 struct ostat osb; 2288 int error; 2289 2290 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2291 UIO_USERSPACE, &sb); 2292 if (error != 0) 2293 return (error); 2294 cvtstat(&sb, &osb); 2295 return (copyout(&osb, uap->ub, sizeof (osb))); 2296 } 2297 2298 /* 2299 * Convert from an old to a new stat structure. 2300 * XXX: many values are blindly truncated. 2301 */ 2302 void 2303 cvtstat(struct stat *st, struct ostat *ost) 2304 { 2305 2306 bzero(ost, sizeof(*ost)); 2307 ost->st_dev = st->st_dev; 2308 ost->st_ino = st->st_ino; 2309 ost->st_mode = st->st_mode; 2310 ost->st_nlink = st->st_nlink; 2311 ost->st_uid = st->st_uid; 2312 ost->st_gid = st->st_gid; 2313 ost->st_rdev = st->st_rdev; 2314 ost->st_size = MIN(st->st_size, INT32_MAX); 2315 ost->st_atim = st->st_atim; 2316 ost->st_mtim = st->st_mtim; 2317 ost->st_ctim = st->st_ctim; 2318 ost->st_blksize = st->st_blksize; 2319 ost->st_blocks = st->st_blocks; 2320 ost->st_flags = st->st_flags; 2321 ost->st_gen = st->st_gen; 2322 } 2323 #endif /* COMPAT_43 */ 2324 2325 #if defined(COMPAT_43) || defined(COMPAT_FREEBSD11) 2326 int ino64_trunc_error; 2327 SYSCTL_INT(_vfs, OID_AUTO, ino64_trunc_error, CTLFLAG_RW, 2328 &ino64_trunc_error, 0, 2329 "Error on truncation of device, file or inode number, or link count"); 2330 2331 int 2332 freebsd11_cvtstat(struct stat *st, struct freebsd11_stat *ost) 2333 { 2334 2335 ost->st_dev = st->st_dev; 2336 if (ost->st_dev != st->st_dev) { 2337 switch (ino64_trunc_error) { 2338 default: 2339 /* 2340 * Since dev_t is almost raw, don't clamp to the 2341 * maximum for case 2, but ignore the error. 2342 */ 2343 break; 2344 case 1: 2345 return (EOVERFLOW); 2346 } 2347 } 2348 ost->st_ino = st->st_ino; 2349 if (ost->st_ino != st->st_ino) { 2350 switch (ino64_trunc_error) { 2351 default: 2352 case 0: 2353 break; 2354 case 1: 2355 return (EOVERFLOW); 2356 case 2: 2357 ost->st_ino = UINT32_MAX; 2358 break; 2359 } 2360 } 2361 ost->st_mode = st->st_mode; 2362 ost->st_nlink = st->st_nlink; 2363 if (ost->st_nlink != st->st_nlink) { 2364 switch (ino64_trunc_error) { 2365 default: 2366 case 0: 2367 break; 2368 case 1: 2369 return (EOVERFLOW); 2370 case 2: 2371 ost->st_nlink = UINT16_MAX; 2372 break; 2373 } 2374 } 2375 ost->st_uid = st->st_uid; 2376 ost->st_gid = st->st_gid; 2377 ost->st_rdev = st->st_rdev; 2378 if (ost->st_rdev != st->st_rdev) { 2379 switch (ino64_trunc_error) { 2380 default: 2381 break; 2382 case 1: 2383 return (EOVERFLOW); 2384 } 2385 } 2386 ost->st_atim = st->st_atim; 2387 ost->st_mtim = st->st_mtim; 2388 ost->st_ctim = st->st_ctim; 2389 ost->st_size = st->st_size; 2390 ost->st_blocks = st->st_blocks; 2391 ost->st_blksize = st->st_blksize; 2392 ost->st_flags = st->st_flags; 2393 ost->st_gen = st->st_gen; 2394 ost->st_lspare = 0; 2395 ost->st_birthtim = st->st_birthtim; 2396 bzero((char *)&ost->st_birthtim + sizeof(ost->st_birthtim), 2397 sizeof(*ost) - offsetof(struct freebsd11_stat, 2398 st_birthtim) - sizeof(ost->st_birthtim)); 2399 return (0); 2400 } 2401 2402 int 2403 freebsd11_stat(struct thread *td, struct freebsd11_stat_args* uap) 2404 { 2405 struct stat sb; 2406 struct freebsd11_stat osb; 2407 int error; 2408 2409 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, &sb); 2410 if (error != 0) 2411 return (error); 2412 error = freebsd11_cvtstat(&sb, &osb); 2413 if (error == 0) 2414 error = copyout(&osb, uap->ub, sizeof(osb)); 2415 return (error); 2416 } 2417 2418 int 2419 freebsd11_lstat(struct thread *td, struct freebsd11_lstat_args* uap) 2420 { 2421 struct stat sb; 2422 struct freebsd11_stat osb; 2423 int error; 2424 2425 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2426 UIO_USERSPACE, &sb); 2427 if (error != 0) 2428 return (error); 2429 error = freebsd11_cvtstat(&sb, &osb); 2430 if (error == 0) 2431 error = copyout(&osb, uap->ub, sizeof(osb)); 2432 return (error); 2433 } 2434 2435 int 2436 freebsd11_fhstat(struct thread *td, struct freebsd11_fhstat_args* uap) 2437 { 2438 struct fhandle fh; 2439 struct stat sb; 2440 struct freebsd11_stat osb; 2441 int error; 2442 2443 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 2444 if (error != 0) 2445 return (error); 2446 error = kern_fhstat(td, fh, &sb); 2447 if (error != 0) 2448 return (error); 2449 error = freebsd11_cvtstat(&sb, &osb); 2450 if (error == 0) 2451 error = copyout(&osb, uap->sb, sizeof(osb)); 2452 return (error); 2453 } 2454 2455 int 2456 freebsd11_fstatat(struct thread *td, struct freebsd11_fstatat_args* uap) 2457 { 2458 struct stat sb; 2459 struct freebsd11_stat osb; 2460 int error; 2461 2462 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2463 UIO_USERSPACE, &sb); 2464 if (error != 0) 2465 return (error); 2466 error = freebsd11_cvtstat(&sb, &osb); 2467 if (error == 0) 2468 error = copyout(&osb, uap->buf, sizeof(osb)); 2469 return (error); 2470 } 2471 #endif /* COMPAT_FREEBSD11 */ 2472 2473 /* 2474 * Get file status 2475 */ 2476 #ifndef _SYS_SYSPROTO_H_ 2477 struct fstatat_args { 2478 int fd; 2479 char *path; 2480 struct stat *buf; 2481 int flag; 2482 } 2483 #endif 2484 int 2485 sys_fstatat(struct thread *td, struct fstatat_args *uap) 2486 { 2487 struct stat sb; 2488 int error; 2489 2490 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2491 UIO_USERSPACE, &sb); 2492 if (error == 0) 2493 error = copyout(&sb, uap->buf, sizeof (sb)); 2494 return (error); 2495 } 2496 2497 int 2498 kern_statat(struct thread *td, int flag, int fd, const char *path, 2499 enum uio_seg pathseg, struct stat *sbp) 2500 { 2501 struct nameidata nd; 2502 int error; 2503 2504 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | 2505 AT_EMPTY_PATH)) != 0) 2506 return (EINVAL); 2507 2508 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_RESOLVE_BENEATH | 2509 AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH) | LOCKSHARED | LOCKLEAF | 2510 AUDITVNODE1, pathseg, path, fd, &cap_fstat_rights); 2511 2512 if ((error = namei(&nd)) != 0) { 2513 if (error == ENOTDIR && 2514 (nd.ni_resflags & NIRES_EMPTYPATH) != 0) 2515 error = kern_fstat(td, fd, sbp); 2516 return (error); 2517 } 2518 error = VOP_STAT(nd.ni_vp, sbp, td->td_ucred, NOCRED); 2519 NDFREE_PNBUF(&nd); 2520 vput(nd.ni_vp); 2521 #ifdef __STAT_TIME_T_EXT 2522 sbp->st_atim_ext = 0; 2523 sbp->st_mtim_ext = 0; 2524 sbp->st_ctim_ext = 0; 2525 sbp->st_btim_ext = 0; 2526 #endif 2527 #ifdef KTRACE 2528 if (KTRPOINT(td, KTR_STRUCT)) 2529 ktrstat_error(sbp, error); 2530 #endif 2531 return (error); 2532 } 2533 2534 #if defined(COMPAT_FREEBSD11) 2535 /* 2536 * Implementation of the NetBSD [l]stat() functions. 2537 */ 2538 int 2539 freebsd11_cvtnstat(struct stat *sb, struct nstat *nsb) 2540 { 2541 struct freebsd11_stat sb11; 2542 int error; 2543 2544 error = freebsd11_cvtstat(sb, &sb11); 2545 if (error != 0) 2546 return (error); 2547 2548 bzero(nsb, sizeof(*nsb)); 2549 CP(sb11, *nsb, st_dev); 2550 CP(sb11, *nsb, st_ino); 2551 CP(sb11, *nsb, st_mode); 2552 CP(sb11, *nsb, st_nlink); 2553 CP(sb11, *nsb, st_uid); 2554 CP(sb11, *nsb, st_gid); 2555 CP(sb11, *nsb, st_rdev); 2556 CP(sb11, *nsb, st_atim); 2557 CP(sb11, *nsb, st_mtim); 2558 CP(sb11, *nsb, st_ctim); 2559 CP(sb11, *nsb, st_size); 2560 CP(sb11, *nsb, st_blocks); 2561 CP(sb11, *nsb, st_blksize); 2562 CP(sb11, *nsb, st_flags); 2563 CP(sb11, *nsb, st_gen); 2564 CP(sb11, *nsb, st_birthtim); 2565 return (0); 2566 } 2567 2568 #ifndef _SYS_SYSPROTO_H_ 2569 struct freebsd11_nstat_args { 2570 char *path; 2571 struct nstat *ub; 2572 }; 2573 #endif 2574 int 2575 freebsd11_nstat(struct thread *td, struct freebsd11_nstat_args *uap) 2576 { 2577 struct stat sb; 2578 struct nstat nsb; 2579 int error; 2580 2581 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, &sb); 2582 if (error != 0) 2583 return (error); 2584 error = freebsd11_cvtnstat(&sb, &nsb); 2585 if (error == 0) 2586 error = copyout(&nsb, uap->ub, sizeof (nsb)); 2587 return (error); 2588 } 2589 2590 /* 2591 * NetBSD lstat. Get file status; this version does not follow links. 2592 */ 2593 #ifndef _SYS_SYSPROTO_H_ 2594 struct freebsd11_nlstat_args { 2595 char *path; 2596 struct nstat *ub; 2597 }; 2598 #endif 2599 int 2600 freebsd11_nlstat(struct thread *td, struct freebsd11_nlstat_args *uap) 2601 { 2602 struct stat sb; 2603 struct nstat nsb; 2604 int error; 2605 2606 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2607 UIO_USERSPACE, &sb); 2608 if (error != 0) 2609 return (error); 2610 error = freebsd11_cvtnstat(&sb, &nsb); 2611 if (error == 0) 2612 error = copyout(&nsb, uap->ub, sizeof (nsb)); 2613 return (error); 2614 } 2615 #endif /* COMPAT_FREEBSD11 */ 2616 2617 /* 2618 * Get configurable pathname variables. 2619 */ 2620 #ifndef _SYS_SYSPROTO_H_ 2621 struct pathconf_args { 2622 char *path; 2623 int name; 2624 }; 2625 #endif 2626 int 2627 sys_pathconf(struct thread *td, struct pathconf_args *uap) 2628 { 2629 long value; 2630 int error; 2631 2632 error = kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW, 2633 &value); 2634 if (error == 0) 2635 td->td_retval[0] = value; 2636 return (error); 2637 } 2638 2639 #ifndef _SYS_SYSPROTO_H_ 2640 struct lpathconf_args { 2641 char *path; 2642 int name; 2643 }; 2644 #endif 2645 int 2646 sys_lpathconf(struct thread *td, struct lpathconf_args *uap) 2647 { 2648 long value; 2649 int error; 2650 2651 error = kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, 2652 NOFOLLOW, &value); 2653 if (error == 0) 2654 td->td_retval[0] = value; 2655 return (error); 2656 } 2657 2658 int 2659 kern_pathconf(struct thread *td, const char *path, enum uio_seg pathseg, 2660 int name, u_long flags, long *valuep) 2661 { 2662 struct nameidata nd; 2663 int error; 2664 2665 NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | AUDITVNODE1 | flags, 2666 pathseg, path); 2667 if ((error = namei(&nd)) != 0) 2668 return (error); 2669 NDFREE_PNBUF(&nd); 2670 2671 error = VOP_PATHCONF(nd.ni_vp, name, valuep); 2672 vput(nd.ni_vp); 2673 return (error); 2674 } 2675 2676 /* 2677 * Return target name of a symbolic link. 2678 */ 2679 #ifndef _SYS_SYSPROTO_H_ 2680 struct readlink_args { 2681 char *path; 2682 char *buf; 2683 size_t count; 2684 }; 2685 #endif 2686 int 2687 sys_readlink(struct thread *td, struct readlink_args *uap) 2688 { 2689 2690 return (kern_readlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2691 uap->buf, UIO_USERSPACE, uap->count)); 2692 } 2693 #ifndef _SYS_SYSPROTO_H_ 2694 struct readlinkat_args { 2695 int fd; 2696 char *path; 2697 char *buf; 2698 size_t bufsize; 2699 }; 2700 #endif 2701 int 2702 sys_readlinkat(struct thread *td, struct readlinkat_args *uap) 2703 { 2704 2705 return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE, 2706 uap->buf, UIO_USERSPACE, uap->bufsize)); 2707 } 2708 2709 int 2710 kern_readlinkat(struct thread *td, int fd, const char *path, 2711 enum uio_seg pathseg, char *buf, enum uio_seg bufseg, size_t count) 2712 { 2713 struct vnode *vp; 2714 struct nameidata nd; 2715 int error; 2716 2717 if (count > IOSIZE_MAX) 2718 return (EINVAL); 2719 2720 NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1 | 2721 EMPTYPATH, pathseg, path, fd); 2722 2723 if ((error = namei(&nd)) != 0) 2724 return (error); 2725 NDFREE_PNBUF(&nd); 2726 vp = nd.ni_vp; 2727 2728 error = kern_readlink_vp(vp, buf, bufseg, count, td); 2729 vput(vp); 2730 2731 return (error); 2732 } 2733 2734 /* 2735 * Helper function to readlink from a vnode 2736 */ 2737 static int 2738 kern_readlink_vp(struct vnode *vp, char *buf, enum uio_seg bufseg, size_t count, 2739 struct thread *td) 2740 { 2741 struct iovec aiov; 2742 struct uio auio; 2743 int error; 2744 2745 ASSERT_VOP_LOCKED(vp, "kern_readlink_vp(): vp not locked"); 2746 #ifdef MAC 2747 error = mac_vnode_check_readlink(td->td_ucred, vp); 2748 if (error != 0) 2749 return (error); 2750 #endif 2751 if (vp->v_type != VLNK && (vp->v_vflag & VV_READLINK) == 0) 2752 return (EINVAL); 2753 2754 aiov.iov_base = buf; 2755 aiov.iov_len = count; 2756 auio.uio_iov = &aiov; 2757 auio.uio_iovcnt = 1; 2758 auio.uio_offset = 0; 2759 auio.uio_rw = UIO_READ; 2760 auio.uio_segflg = bufseg; 2761 auio.uio_td = td; 2762 auio.uio_resid = count; 2763 error = VOP_READLINK(vp, &auio, td->td_ucred); 2764 td->td_retval[0] = count - auio.uio_resid; 2765 return (error); 2766 } 2767 2768 /* 2769 * Common implementation code for chflags() and fchflags(). 2770 */ 2771 static int 2772 setfflags(struct thread *td, struct vnode *vp, u_long flags) 2773 { 2774 struct mount *mp; 2775 struct vattr vattr; 2776 int error; 2777 2778 /* We can't support the value matching VNOVAL. */ 2779 if (flags == VNOVAL) 2780 return (EOPNOTSUPP); 2781 2782 /* 2783 * Prevent non-root users from setting flags on devices. When 2784 * a device is reused, users can retain ownership of the device 2785 * if they are allowed to set flags and programs assume that 2786 * chown can't fail when done as root. 2787 */ 2788 if (vp->v_type == VCHR || vp->v_type == VBLK) { 2789 error = priv_check(td, PRIV_VFS_CHFLAGS_DEV); 2790 if (error != 0) 2791 return (error); 2792 } 2793 2794 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) 2795 return (error); 2796 VATTR_NULL(&vattr); 2797 vattr.va_flags = flags; 2798 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2799 #ifdef MAC 2800 error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags); 2801 if (error == 0) 2802 #endif 2803 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 2804 VOP_UNLOCK(vp); 2805 vn_finished_write(mp); 2806 return (error); 2807 } 2808 2809 /* 2810 * Change flags of a file given a path name. 2811 */ 2812 #ifndef _SYS_SYSPROTO_H_ 2813 struct chflags_args { 2814 const char *path; 2815 u_long flags; 2816 }; 2817 #endif 2818 int 2819 sys_chflags(struct thread *td, struct chflags_args *uap) 2820 { 2821 2822 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2823 uap->flags, 0)); 2824 } 2825 2826 #ifndef _SYS_SYSPROTO_H_ 2827 struct chflagsat_args { 2828 int fd; 2829 const char *path; 2830 u_long flags; 2831 int atflag; 2832 } 2833 #endif 2834 int 2835 sys_chflagsat(struct thread *td, struct chflagsat_args *uap) 2836 { 2837 2838 return (kern_chflagsat(td, uap->fd, uap->path, UIO_USERSPACE, 2839 uap->flags, uap->atflag)); 2840 } 2841 2842 /* 2843 * Same as chflags() but doesn't follow symlinks. 2844 */ 2845 #ifndef _SYS_SYSPROTO_H_ 2846 struct lchflags_args { 2847 const char *path; 2848 u_long flags; 2849 }; 2850 #endif 2851 int 2852 sys_lchflags(struct thread *td, struct lchflags_args *uap) 2853 { 2854 2855 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2856 uap->flags, AT_SYMLINK_NOFOLLOW)); 2857 } 2858 2859 static int 2860 kern_chflagsat(struct thread *td, int fd, const char *path, 2861 enum uio_seg pathseg, u_long flags, int atflag) 2862 { 2863 struct nameidata nd; 2864 int error; 2865 2866 if ((atflag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | 2867 AT_EMPTY_PATH)) != 0) 2868 return (EINVAL); 2869 2870 AUDIT_ARG_FFLAGS(flags); 2871 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(atflag, AT_SYMLINK_NOFOLLOW | 2872 AT_RESOLVE_BENEATH | AT_EMPTY_PATH) | AUDITVNODE1, pathseg, path, 2873 fd, &cap_fchflags_rights); 2874 if ((error = namei(&nd)) != 0) 2875 return (error); 2876 NDFREE_PNBUF(&nd); 2877 error = setfflags(td, nd.ni_vp, flags); 2878 vrele(nd.ni_vp); 2879 return (error); 2880 } 2881 2882 /* 2883 * Change flags of a file given a file descriptor. 2884 */ 2885 #ifndef _SYS_SYSPROTO_H_ 2886 struct fchflags_args { 2887 int fd; 2888 u_long flags; 2889 }; 2890 #endif 2891 int 2892 sys_fchflags(struct thread *td, struct fchflags_args *uap) 2893 { 2894 struct file *fp; 2895 int error; 2896 2897 AUDIT_ARG_FD(uap->fd); 2898 AUDIT_ARG_FFLAGS(uap->flags); 2899 error = getvnode(td, uap->fd, &cap_fchflags_rights, 2900 &fp); 2901 if (error != 0) 2902 return (error); 2903 #ifdef AUDIT 2904 if (AUDITING_TD(td)) { 2905 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 2906 AUDIT_ARG_VNODE1(fp->f_vnode); 2907 VOP_UNLOCK(fp->f_vnode); 2908 } 2909 #endif 2910 error = setfflags(td, fp->f_vnode, uap->flags); 2911 fdrop(fp, td); 2912 return (error); 2913 } 2914 2915 /* 2916 * Common implementation code for chmod(), lchmod() and fchmod(). 2917 */ 2918 int 2919 setfmode(struct thread *td, struct ucred *cred, struct vnode *vp, int mode) 2920 { 2921 struct mount *mp; 2922 struct vattr vattr; 2923 int error; 2924 2925 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) 2926 return (error); 2927 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2928 VATTR_NULL(&vattr); 2929 vattr.va_mode = mode & ALLPERMS; 2930 #ifdef MAC 2931 error = mac_vnode_check_setmode(cred, vp, vattr.va_mode); 2932 if (error == 0) 2933 #endif 2934 error = VOP_SETATTR(vp, &vattr, cred); 2935 VOP_UNLOCK(vp); 2936 vn_finished_write(mp); 2937 return (error); 2938 } 2939 2940 /* 2941 * Change mode of a file given path name. 2942 */ 2943 #ifndef _SYS_SYSPROTO_H_ 2944 struct chmod_args { 2945 char *path; 2946 int mode; 2947 }; 2948 #endif 2949 int 2950 sys_chmod(struct thread *td, struct chmod_args *uap) 2951 { 2952 2953 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2954 uap->mode, 0)); 2955 } 2956 2957 #ifndef _SYS_SYSPROTO_H_ 2958 struct fchmodat_args { 2959 int dirfd; 2960 char *path; 2961 mode_t mode; 2962 int flag; 2963 } 2964 #endif 2965 int 2966 sys_fchmodat(struct thread *td, struct fchmodat_args *uap) 2967 { 2968 2969 return (kern_fchmodat(td, uap->fd, uap->path, UIO_USERSPACE, 2970 uap->mode, uap->flag)); 2971 } 2972 2973 /* 2974 * Change mode of a file given path name (don't follow links.) 2975 */ 2976 #ifndef _SYS_SYSPROTO_H_ 2977 struct lchmod_args { 2978 char *path; 2979 int mode; 2980 }; 2981 #endif 2982 int 2983 sys_lchmod(struct thread *td, struct lchmod_args *uap) 2984 { 2985 2986 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2987 uap->mode, AT_SYMLINK_NOFOLLOW)); 2988 } 2989 2990 int 2991 kern_fchmodat(struct thread *td, int fd, const char *path, 2992 enum uio_seg pathseg, mode_t mode, int flag) 2993 { 2994 struct nameidata nd; 2995 int error; 2996 2997 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | 2998 AT_EMPTY_PATH)) != 0) 2999 return (EINVAL); 3000 3001 AUDIT_ARG_MODE(mode); 3002 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_SYMLINK_NOFOLLOW | 3003 AT_RESOLVE_BENEATH | AT_EMPTY_PATH) | AUDITVNODE1, pathseg, path, 3004 fd, &cap_fchmod_rights); 3005 if ((error = namei(&nd)) != 0) 3006 return (error); 3007 NDFREE_PNBUF(&nd); 3008 error = setfmode(td, td->td_ucred, nd.ni_vp, mode); 3009 vrele(nd.ni_vp); 3010 return (error); 3011 } 3012 3013 /* 3014 * Change mode of a file given a file descriptor. 3015 */ 3016 #ifndef _SYS_SYSPROTO_H_ 3017 struct fchmod_args { 3018 int fd; 3019 int mode; 3020 }; 3021 #endif 3022 int 3023 sys_fchmod(struct thread *td, struct fchmod_args *uap) 3024 { 3025 struct file *fp; 3026 int error; 3027 3028 AUDIT_ARG_FD(uap->fd); 3029 AUDIT_ARG_MODE(uap->mode); 3030 3031 error = fget(td, uap->fd, &cap_fchmod_rights, &fp); 3032 if (error != 0) 3033 return (error); 3034 error = fo_chmod(fp, uap->mode, td->td_ucred, td); 3035 fdrop(fp, td); 3036 return (error); 3037 } 3038 3039 /* 3040 * Common implementation for chown(), lchown(), and fchown() 3041 */ 3042 int 3043 setfown(struct thread *td, struct ucred *cred, struct vnode *vp, uid_t uid, 3044 gid_t gid) 3045 { 3046 struct mount *mp; 3047 struct vattr vattr; 3048 int error; 3049 3050 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) 3051 return (error); 3052 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3053 VATTR_NULL(&vattr); 3054 vattr.va_uid = uid; 3055 vattr.va_gid = gid; 3056 #ifdef MAC 3057 error = mac_vnode_check_setowner(cred, vp, vattr.va_uid, 3058 vattr.va_gid); 3059 if (error == 0) 3060 #endif 3061 error = VOP_SETATTR(vp, &vattr, cred); 3062 VOP_UNLOCK(vp); 3063 vn_finished_write(mp); 3064 return (error); 3065 } 3066 3067 /* 3068 * Set ownership given a path name. 3069 */ 3070 #ifndef _SYS_SYSPROTO_H_ 3071 struct chown_args { 3072 char *path; 3073 int uid; 3074 int gid; 3075 }; 3076 #endif 3077 int 3078 sys_chown(struct thread *td, struct chown_args *uap) 3079 { 3080 3081 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->uid, 3082 uap->gid, 0)); 3083 } 3084 3085 #ifndef _SYS_SYSPROTO_H_ 3086 struct fchownat_args { 3087 int fd; 3088 const char * path; 3089 uid_t uid; 3090 gid_t gid; 3091 int flag; 3092 }; 3093 #endif 3094 int 3095 sys_fchownat(struct thread *td, struct fchownat_args *uap) 3096 { 3097 3098 return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid, 3099 uap->gid, uap->flag)); 3100 } 3101 3102 int 3103 kern_fchownat(struct thread *td, int fd, const char *path, 3104 enum uio_seg pathseg, int uid, int gid, int flag) 3105 { 3106 struct nameidata nd; 3107 int error; 3108 3109 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | 3110 AT_EMPTY_PATH)) != 0) 3111 return (EINVAL); 3112 3113 AUDIT_ARG_OWNER(uid, gid); 3114 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_SYMLINK_NOFOLLOW | 3115 AT_RESOLVE_BENEATH | AT_EMPTY_PATH) | AUDITVNODE1, pathseg, path, 3116 fd, &cap_fchown_rights); 3117 3118 if ((error = namei(&nd)) != 0) 3119 return (error); 3120 NDFREE_PNBUF(&nd); 3121 error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid); 3122 vrele(nd.ni_vp); 3123 return (error); 3124 } 3125 3126 /* 3127 * Set ownership given a path name, do not cross symlinks. 3128 */ 3129 #ifndef _SYS_SYSPROTO_H_ 3130 struct lchown_args { 3131 char *path; 3132 int uid; 3133 int gid; 3134 }; 3135 #endif 3136 int 3137 sys_lchown(struct thread *td, struct lchown_args *uap) 3138 { 3139 3140 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3141 uap->uid, uap->gid, AT_SYMLINK_NOFOLLOW)); 3142 } 3143 3144 /* 3145 * Set ownership given a file descriptor. 3146 */ 3147 #ifndef _SYS_SYSPROTO_H_ 3148 struct fchown_args { 3149 int fd; 3150 int uid; 3151 int gid; 3152 }; 3153 #endif 3154 int 3155 sys_fchown(struct thread *td, struct fchown_args *uap) 3156 { 3157 struct file *fp; 3158 int error; 3159 3160 AUDIT_ARG_FD(uap->fd); 3161 AUDIT_ARG_OWNER(uap->uid, uap->gid); 3162 error = fget(td, uap->fd, &cap_fchown_rights, &fp); 3163 if (error != 0) 3164 return (error); 3165 error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td); 3166 fdrop(fp, td); 3167 return (error); 3168 } 3169 3170 /* 3171 * Common implementation code for utimes(), lutimes(), and futimes(). 3172 */ 3173 static int 3174 getutimes(const struct timeval *usrtvp, enum uio_seg tvpseg, 3175 struct timespec *tsp) 3176 { 3177 struct timeval tv[2]; 3178 const struct timeval *tvp; 3179 int error; 3180 3181 if (usrtvp == NULL) { 3182 vfs_timestamp(&tsp[0]); 3183 tsp[1] = tsp[0]; 3184 } else { 3185 if (tvpseg == UIO_SYSSPACE) { 3186 tvp = usrtvp; 3187 } else { 3188 if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0) 3189 return (error); 3190 tvp = tv; 3191 } 3192 3193 if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 || 3194 tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000) 3195 return (EINVAL); 3196 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 3197 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 3198 } 3199 return (0); 3200 } 3201 3202 /* 3203 * Common implementation code for futimens(), utimensat(). 3204 */ 3205 #define UTIMENS_NULL 0x1 3206 #define UTIMENS_EXIT 0x2 3207 static int 3208 getutimens(const struct timespec *usrtsp, enum uio_seg tspseg, 3209 struct timespec *tsp, int *retflags) 3210 { 3211 struct timespec tsnow; 3212 int error; 3213 3214 vfs_timestamp(&tsnow); 3215 *retflags = 0; 3216 if (usrtsp == NULL) { 3217 tsp[0] = tsnow; 3218 tsp[1] = tsnow; 3219 *retflags |= UTIMENS_NULL; 3220 return (0); 3221 } 3222 if (tspseg == UIO_SYSSPACE) { 3223 tsp[0] = usrtsp[0]; 3224 tsp[1] = usrtsp[1]; 3225 } else if ((error = copyin(usrtsp, tsp, sizeof(*tsp) * 2)) != 0) 3226 return (error); 3227 if (tsp[0].tv_nsec == UTIME_OMIT && tsp[1].tv_nsec == UTIME_OMIT) 3228 *retflags |= UTIMENS_EXIT; 3229 if (tsp[0].tv_nsec == UTIME_NOW && tsp[1].tv_nsec == UTIME_NOW) 3230 *retflags |= UTIMENS_NULL; 3231 if (tsp[0].tv_nsec == UTIME_OMIT) 3232 tsp[0].tv_sec = VNOVAL; 3233 else if (tsp[0].tv_nsec == UTIME_NOW) 3234 tsp[0] = tsnow; 3235 else if (tsp[0].tv_nsec < 0 || tsp[0].tv_nsec >= 1000000000L) 3236 return (EINVAL); 3237 if (tsp[1].tv_nsec == UTIME_OMIT) 3238 tsp[1].tv_sec = VNOVAL; 3239 else if (tsp[1].tv_nsec == UTIME_NOW) 3240 tsp[1] = tsnow; 3241 else if (tsp[1].tv_nsec < 0 || tsp[1].tv_nsec >= 1000000000L) 3242 return (EINVAL); 3243 3244 return (0); 3245 } 3246 3247 /* 3248 * Common implementation code for utimes(), lutimes(), futimes(), futimens(), 3249 * and utimensat(). 3250 */ 3251 static int 3252 setutimes(struct thread *td, struct vnode *vp, const struct timespec *ts, 3253 int numtimes, int nullflag) 3254 { 3255 struct mount *mp; 3256 struct vattr vattr; 3257 int error; 3258 bool setbirthtime; 3259 3260 setbirthtime = false; 3261 vattr.va_birthtime.tv_sec = VNOVAL; 3262 vattr.va_birthtime.tv_nsec = 0; 3263 3264 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) 3265 return (error); 3266 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3267 if (numtimes < 3 && VOP_GETATTR(vp, &vattr, td->td_ucred) == 0 && 3268 timespeccmp(&ts[1], &vattr.va_birthtime, < )) 3269 setbirthtime = true; 3270 VATTR_NULL(&vattr); 3271 vattr.va_atime = ts[0]; 3272 vattr.va_mtime = ts[1]; 3273 if (setbirthtime) 3274 vattr.va_birthtime = ts[1]; 3275 if (numtimes > 2) 3276 vattr.va_birthtime = ts[2]; 3277 if (nullflag) 3278 vattr.va_vaflags |= VA_UTIMES_NULL; 3279 #ifdef MAC 3280 error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime, 3281 vattr.va_mtime); 3282 #endif 3283 if (error == 0) 3284 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3285 VOP_UNLOCK(vp); 3286 vn_finished_write(mp); 3287 return (error); 3288 } 3289 3290 /* 3291 * Set the access and modification times of a file. 3292 */ 3293 #ifndef _SYS_SYSPROTO_H_ 3294 struct utimes_args { 3295 char *path; 3296 struct timeval *tptr; 3297 }; 3298 #endif 3299 int 3300 sys_utimes(struct thread *td, struct utimes_args *uap) 3301 { 3302 3303 return (kern_utimesat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3304 uap->tptr, UIO_USERSPACE)); 3305 } 3306 3307 #ifndef _SYS_SYSPROTO_H_ 3308 struct futimesat_args { 3309 int fd; 3310 const char * path; 3311 const struct timeval * times; 3312 }; 3313 #endif 3314 int 3315 sys_futimesat(struct thread *td, struct futimesat_args *uap) 3316 { 3317 3318 return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE, 3319 uap->times, UIO_USERSPACE)); 3320 } 3321 3322 int 3323 kern_utimesat(struct thread *td, int fd, const char *path, 3324 enum uio_seg pathseg, const struct timeval *tptr, enum uio_seg tptrseg) 3325 { 3326 struct nameidata nd; 3327 struct timespec ts[2]; 3328 int error; 3329 3330 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3331 return (error); 3332 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 3333 &cap_futimes_rights); 3334 3335 if ((error = namei(&nd)) != 0) 3336 return (error); 3337 NDFREE_PNBUF(&nd); 3338 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3339 vrele(nd.ni_vp); 3340 return (error); 3341 } 3342 3343 /* 3344 * Set the access and modification times of a file. 3345 */ 3346 #ifndef _SYS_SYSPROTO_H_ 3347 struct lutimes_args { 3348 char *path; 3349 struct timeval *tptr; 3350 }; 3351 #endif 3352 int 3353 sys_lutimes(struct thread *td, struct lutimes_args *uap) 3354 { 3355 3356 return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr, 3357 UIO_USERSPACE)); 3358 } 3359 3360 int 3361 kern_lutimes(struct thread *td, const char *path, enum uio_seg pathseg, 3362 const struct timeval *tptr, enum uio_seg tptrseg) 3363 { 3364 struct timespec ts[2]; 3365 struct nameidata nd; 3366 int error; 3367 3368 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3369 return (error); 3370 NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, pathseg, path); 3371 if ((error = namei(&nd)) != 0) 3372 return (error); 3373 NDFREE_PNBUF(&nd); 3374 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3375 vrele(nd.ni_vp); 3376 return (error); 3377 } 3378 3379 /* 3380 * Set the access and modification times of a file. 3381 */ 3382 #ifndef _SYS_SYSPROTO_H_ 3383 struct futimes_args { 3384 int fd; 3385 struct timeval *tptr; 3386 }; 3387 #endif 3388 int 3389 sys_futimes(struct thread *td, struct futimes_args *uap) 3390 { 3391 3392 return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE)); 3393 } 3394 3395 int 3396 kern_futimes(struct thread *td, int fd, const struct timeval *tptr, 3397 enum uio_seg tptrseg) 3398 { 3399 struct timespec ts[2]; 3400 struct file *fp; 3401 int error; 3402 3403 AUDIT_ARG_FD(fd); 3404 error = getutimes(tptr, tptrseg, ts); 3405 if (error != 0) 3406 return (error); 3407 error = getvnode(td, fd, &cap_futimes_rights, &fp); 3408 if (error != 0) 3409 return (error); 3410 #ifdef AUDIT 3411 if (AUDITING_TD(td)) { 3412 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3413 AUDIT_ARG_VNODE1(fp->f_vnode); 3414 VOP_UNLOCK(fp->f_vnode); 3415 } 3416 #endif 3417 error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL); 3418 fdrop(fp, td); 3419 return (error); 3420 } 3421 3422 int 3423 sys_futimens(struct thread *td, struct futimens_args *uap) 3424 { 3425 3426 return (kern_futimens(td, uap->fd, uap->times, UIO_USERSPACE)); 3427 } 3428 3429 int 3430 kern_futimens(struct thread *td, int fd, const struct timespec *tptr, 3431 enum uio_seg tptrseg) 3432 { 3433 struct timespec ts[2]; 3434 struct file *fp; 3435 int error, flags; 3436 3437 AUDIT_ARG_FD(fd); 3438 error = getutimens(tptr, tptrseg, ts, &flags); 3439 if (error != 0) 3440 return (error); 3441 if (flags & UTIMENS_EXIT) 3442 return (0); 3443 error = getvnode(td, fd, &cap_futimes_rights, &fp); 3444 if (error != 0) 3445 return (error); 3446 #ifdef AUDIT 3447 if (AUDITING_TD(td)) { 3448 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3449 AUDIT_ARG_VNODE1(fp->f_vnode); 3450 VOP_UNLOCK(fp->f_vnode); 3451 } 3452 #endif 3453 error = setutimes(td, fp->f_vnode, ts, 2, flags & UTIMENS_NULL); 3454 fdrop(fp, td); 3455 return (error); 3456 } 3457 3458 int 3459 sys_utimensat(struct thread *td, struct utimensat_args *uap) 3460 { 3461 3462 return (kern_utimensat(td, uap->fd, uap->path, UIO_USERSPACE, 3463 uap->times, UIO_USERSPACE, uap->flag)); 3464 } 3465 3466 int 3467 kern_utimensat(struct thread *td, int fd, const char *path, 3468 enum uio_seg pathseg, const struct timespec *tptr, enum uio_seg tptrseg, 3469 int flag) 3470 { 3471 struct nameidata nd; 3472 struct timespec ts[2]; 3473 int error, flags; 3474 3475 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | 3476 AT_EMPTY_PATH)) != 0) 3477 return (EINVAL); 3478 3479 if ((error = getutimens(tptr, tptrseg, ts, &flags)) != 0) 3480 return (error); 3481 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_SYMLINK_NOFOLLOW | 3482 AT_RESOLVE_BENEATH | AT_EMPTY_PATH) | AUDITVNODE1, 3483 pathseg, path, fd, &cap_futimes_rights); 3484 if ((error = namei(&nd)) != 0) 3485 return (error); 3486 /* 3487 * We are allowed to call namei() regardless of 2xUTIME_OMIT. 3488 * POSIX states: 3489 * "If both tv_nsec fields are UTIME_OMIT... EACCESS may be detected." 3490 * "Search permission is denied by a component of the path prefix." 3491 */ 3492 NDFREE_PNBUF(&nd); 3493 if ((flags & UTIMENS_EXIT) == 0) 3494 error = setutimes(td, nd.ni_vp, ts, 2, flags & UTIMENS_NULL); 3495 vrele(nd.ni_vp); 3496 return (error); 3497 } 3498 3499 /* 3500 * Truncate a file given its path name. 3501 */ 3502 #ifndef _SYS_SYSPROTO_H_ 3503 struct truncate_args { 3504 char *path; 3505 int pad; 3506 off_t length; 3507 }; 3508 #endif 3509 int 3510 sys_truncate(struct thread *td, struct truncate_args *uap) 3511 { 3512 3513 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3514 } 3515 3516 int 3517 kern_truncate(struct thread *td, const char *path, enum uio_seg pathseg, 3518 off_t length) 3519 { 3520 struct mount *mp; 3521 struct vnode *vp; 3522 void *rl_cookie; 3523 struct nameidata nd; 3524 int error; 3525 3526 if (length < 0) 3527 return (EINVAL); 3528 NDPREINIT(&nd); 3529 retry: 3530 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path); 3531 if ((error = namei(&nd)) != 0) 3532 return (error); 3533 vp = nd.ni_vp; 3534 NDFREE_PNBUF(&nd); 3535 rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); 3536 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) { 3537 vn_rangelock_unlock(vp, rl_cookie); 3538 vrele(vp); 3539 return (error); 3540 } 3541 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3542 if (vp->v_type == VDIR) { 3543 error = EISDIR; 3544 goto out; 3545 } 3546 #ifdef MAC 3547 error = mac_vnode_check_write(td->td_ucred, NOCRED, vp); 3548 if (error != 0) 3549 goto out; 3550 #endif 3551 error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td); 3552 if (error != 0) 3553 goto out; 3554 3555 error = vn_truncate_locked(vp, length, false, td->td_ucred); 3556 out: 3557 VOP_UNLOCK(vp); 3558 vn_finished_write(mp); 3559 vn_rangelock_unlock(vp, rl_cookie); 3560 vrele(vp); 3561 if (error == ERELOOKUP) 3562 goto retry; 3563 return (error); 3564 } 3565 3566 #if defined(COMPAT_43) 3567 /* 3568 * Truncate a file given its path name. 3569 */ 3570 #ifndef _SYS_SYSPROTO_H_ 3571 struct otruncate_args { 3572 char *path; 3573 long length; 3574 }; 3575 #endif 3576 int 3577 otruncate(struct thread *td, struct otruncate_args *uap) 3578 { 3579 3580 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3581 } 3582 #endif /* COMPAT_43 */ 3583 3584 #if defined(COMPAT_FREEBSD6) 3585 /* Versions with the pad argument */ 3586 int 3587 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap) 3588 { 3589 3590 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3591 } 3592 3593 int 3594 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap) 3595 { 3596 3597 return (kern_ftruncate(td, uap->fd, uap->length)); 3598 } 3599 #endif 3600 3601 int 3602 kern_fsync(struct thread *td, int fd, bool fullsync) 3603 { 3604 struct vnode *vp; 3605 struct mount *mp; 3606 struct file *fp; 3607 int error; 3608 3609 AUDIT_ARG_FD(fd); 3610 error = getvnode(td, fd, &cap_fsync_rights, &fp); 3611 if (error != 0) 3612 return (error); 3613 vp = fp->f_vnode; 3614 #if 0 3615 if (!fullsync) 3616 /* XXXKIB: compete outstanding aio writes */; 3617 #endif 3618 retry: 3619 error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH); 3620 if (error != 0) 3621 goto drop; 3622 vn_lock(vp, vn_lktype_write(mp, vp) | LK_RETRY); 3623 AUDIT_ARG_VNODE1(vp); 3624 vnode_pager_clean_async(vp); 3625 error = fullsync ? VOP_FSYNC(vp, MNT_WAIT, td) : VOP_FDATASYNC(vp, td); 3626 VOP_UNLOCK(vp); 3627 vn_finished_write(mp); 3628 if (error == ERELOOKUP) 3629 goto retry; 3630 drop: 3631 fdrop(fp, td); 3632 return (error); 3633 } 3634 3635 /* 3636 * Sync an open file. 3637 */ 3638 #ifndef _SYS_SYSPROTO_H_ 3639 struct fsync_args { 3640 int fd; 3641 }; 3642 #endif 3643 int 3644 sys_fsync(struct thread *td, struct fsync_args *uap) 3645 { 3646 3647 return (kern_fsync(td, uap->fd, true)); 3648 } 3649 3650 int 3651 sys_fdatasync(struct thread *td, struct fdatasync_args *uap) 3652 { 3653 3654 return (kern_fsync(td, uap->fd, false)); 3655 } 3656 3657 /* 3658 * Rename files. Source and destination must either both be directories, or 3659 * both not be directories. If target is a directory, it must be empty. 3660 */ 3661 #ifndef _SYS_SYSPROTO_H_ 3662 struct rename_args { 3663 char *from; 3664 char *to; 3665 }; 3666 #endif 3667 int 3668 sys_rename(struct thread *td, struct rename_args *uap) 3669 { 3670 3671 return (kern_renameat(td, AT_FDCWD, uap->from, AT_FDCWD, 3672 uap->to, UIO_USERSPACE)); 3673 } 3674 3675 #ifndef _SYS_SYSPROTO_H_ 3676 struct renameat_args { 3677 int oldfd; 3678 char *old; 3679 int newfd; 3680 char *new; 3681 }; 3682 #endif 3683 int 3684 sys_renameat(struct thread *td, struct renameat_args *uap) 3685 { 3686 3687 return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new, 3688 UIO_USERSPACE)); 3689 } 3690 3691 #ifdef MAC 3692 static int 3693 kern_renameat_mac(struct thread *td, int oldfd, const char *old, int newfd, 3694 const char *new, enum uio_seg pathseg, struct nameidata *fromnd) 3695 { 3696 int error; 3697 3698 NDINIT_ATRIGHTS(fromnd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 3699 pathseg, old, oldfd, &cap_renameat_source_rights); 3700 if ((error = namei(fromnd)) != 0) 3701 return (error); 3702 error = mac_vnode_check_rename_from(td->td_ucred, fromnd->ni_dvp, 3703 fromnd->ni_vp, &fromnd->ni_cnd); 3704 VOP_UNLOCK(fromnd->ni_dvp); 3705 if (fromnd->ni_dvp != fromnd->ni_vp) 3706 VOP_UNLOCK(fromnd->ni_vp); 3707 if (error != 0) { 3708 NDFREE_PNBUF(fromnd); 3709 vrele(fromnd->ni_dvp); 3710 vrele(fromnd->ni_vp); 3711 } 3712 return (error); 3713 } 3714 #endif 3715 3716 int 3717 kern_renameat(struct thread *td, int oldfd, const char *old, int newfd, 3718 const char *new, enum uio_seg pathseg) 3719 { 3720 struct mount *mp = NULL; 3721 struct vnode *tvp, *fvp, *tdvp; 3722 struct nameidata fromnd, tond; 3723 uint64_t tondflags; 3724 int error; 3725 3726 again: 3727 bwillwrite(); 3728 #ifdef MAC 3729 if (mac_vnode_check_rename_from_enabled()) { 3730 error = kern_renameat_mac(td, oldfd, old, newfd, new, pathseg, 3731 &fromnd); 3732 if (error != 0) 3733 return (error); 3734 } else { 3735 #endif 3736 NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | AUDITVNODE1, 3737 pathseg, old, oldfd, &cap_renameat_source_rights); 3738 if ((error = namei(&fromnd)) != 0) 3739 return (error); 3740 #ifdef MAC 3741 } 3742 #endif 3743 fvp = fromnd.ni_vp; 3744 tondflags = LOCKPARENT | LOCKLEAF | NOCACHE | AUDITVNODE2; 3745 if (fromnd.ni_vp->v_type == VDIR) 3746 tondflags |= WILLBEDIR; 3747 NDINIT_ATRIGHTS(&tond, RENAME, tondflags, pathseg, new, newfd, 3748 &cap_renameat_target_rights); 3749 if ((error = namei(&tond)) != 0) { 3750 /* Translate error code for rename("dir1", "dir2/."). */ 3751 if (error == EISDIR && fvp->v_type == VDIR) 3752 error = EINVAL; 3753 NDFREE_PNBUF(&fromnd); 3754 vrele(fromnd.ni_dvp); 3755 vrele(fvp); 3756 goto out1; 3757 } 3758 tdvp = tond.ni_dvp; 3759 tvp = tond.ni_vp; 3760 error = vn_start_write(fvp, &mp, V_NOWAIT); 3761 if (error != 0) { 3762 NDFREE_PNBUF(&fromnd); 3763 NDFREE_PNBUF(&tond); 3764 if (tvp != NULL) 3765 vput(tvp); 3766 if (tdvp == tvp) 3767 vrele(tdvp); 3768 else 3769 vput(tdvp); 3770 vrele(fromnd.ni_dvp); 3771 vrele(fvp); 3772 error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH); 3773 if (error != 0) 3774 return (error); 3775 goto again; 3776 } 3777 if (tvp != NULL) { 3778 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3779 error = ENOTDIR; 3780 goto out; 3781 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3782 error = EISDIR; 3783 goto out; 3784 } 3785 #ifdef CAPABILITIES 3786 if (newfd != AT_FDCWD && (tond.ni_resflags & NIRES_ABS) == 0) { 3787 /* 3788 * If the target already exists we require CAP_UNLINKAT 3789 * from 'newfd', when newfd was used for the lookup. 3790 */ 3791 error = cap_check(&tond.ni_filecaps.fc_rights, 3792 &cap_unlinkat_rights); 3793 if (error != 0) 3794 goto out; 3795 } 3796 #endif 3797 } 3798 if (fvp == tdvp) { 3799 error = EINVAL; 3800 goto out; 3801 } 3802 /* 3803 * If the source is the same as the destination (that is, if they 3804 * are links to the same vnode), then there is nothing to do. 3805 */ 3806 if (fvp == tvp) 3807 error = ERESTART; 3808 #ifdef MAC 3809 else 3810 error = mac_vnode_check_rename_to(td->td_ucred, tdvp, 3811 tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd); 3812 #endif 3813 out: 3814 if (error == 0) { 3815 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3816 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); 3817 NDFREE_PNBUF(&fromnd); 3818 NDFREE_PNBUF(&tond); 3819 } else { 3820 NDFREE_PNBUF(&fromnd); 3821 NDFREE_PNBUF(&tond); 3822 if (tvp != NULL) 3823 vput(tvp); 3824 if (tdvp == tvp) 3825 vrele(tdvp); 3826 else 3827 vput(tdvp); 3828 vrele(fromnd.ni_dvp); 3829 vrele(fvp); 3830 } 3831 vn_finished_write(mp); 3832 out1: 3833 if (error == ERESTART) 3834 return (0); 3835 if (error == ERELOOKUP) 3836 goto again; 3837 return (error); 3838 } 3839 3840 /* 3841 * Make a directory file. 3842 */ 3843 #ifndef _SYS_SYSPROTO_H_ 3844 struct mkdir_args { 3845 char *path; 3846 int mode; 3847 }; 3848 #endif 3849 int 3850 sys_mkdir(struct thread *td, struct mkdir_args *uap) 3851 { 3852 3853 return (kern_mkdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3854 uap->mode)); 3855 } 3856 3857 #ifndef _SYS_SYSPROTO_H_ 3858 struct mkdirat_args { 3859 int fd; 3860 char *path; 3861 mode_t mode; 3862 }; 3863 #endif 3864 int 3865 sys_mkdirat(struct thread *td, struct mkdirat_args *uap) 3866 { 3867 3868 return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode)); 3869 } 3870 3871 int 3872 kern_mkdirat(struct thread *td, int fd, const char *path, enum uio_seg segflg, 3873 int mode) 3874 { 3875 struct mount *mp; 3876 struct vattr vattr; 3877 struct nameidata nd; 3878 int error; 3879 3880 AUDIT_ARG_MODE(mode); 3881 NDPREINIT(&nd); 3882 restart: 3883 bwillwrite(); 3884 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | AUDITVNODE1 | 3885 NC_NOMAKEENTRY | NC_KEEPPOSENTRY | FAILIFEXISTS | WILLBEDIR, 3886 segflg, path, fd, &cap_mkdirat_rights); 3887 if ((error = namei(&nd)) != 0) 3888 return (error); 3889 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3890 NDFREE_PNBUF(&nd); 3891 vput(nd.ni_dvp); 3892 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH)) != 0) 3893 return (error); 3894 goto restart; 3895 } 3896 VATTR_NULL(&vattr); 3897 vattr.va_type = VDIR; 3898 vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_pd->pd_cmask; 3899 #ifdef MAC 3900 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 3901 &vattr); 3902 if (error != 0) 3903 goto out; 3904 #endif 3905 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 3906 #ifdef MAC 3907 out: 3908 #endif 3909 NDFREE_PNBUF(&nd); 3910 VOP_VPUT_PAIR(nd.ni_dvp, error == 0 ? &nd.ni_vp : NULL, true); 3911 vn_finished_write(mp); 3912 if (error == ERELOOKUP) 3913 goto restart; 3914 return (error); 3915 } 3916 3917 /* 3918 * Remove a directory file. 3919 */ 3920 #ifndef _SYS_SYSPROTO_H_ 3921 struct rmdir_args { 3922 char *path; 3923 }; 3924 #endif 3925 int 3926 sys_rmdir(struct thread *td, struct rmdir_args *uap) 3927 { 3928 3929 return (kern_frmdirat(td, AT_FDCWD, uap->path, FD_NONE, UIO_USERSPACE, 3930 0)); 3931 } 3932 3933 int 3934 kern_frmdirat(struct thread *td, int dfd, const char *path, int fd, 3935 enum uio_seg pathseg, int flag) 3936 { 3937 struct mount *mp; 3938 struct vnode *vp; 3939 struct file *fp; 3940 struct nameidata nd; 3941 cap_rights_t rights; 3942 int error; 3943 3944 fp = NULL; 3945 if (fd != FD_NONE) { 3946 error = getvnode(td, fd, cap_rights_init_one(&rights, 3947 CAP_LOOKUP), &fp); 3948 if (error != 0) 3949 return (error); 3950 } 3951 3952 NDPREINIT(&nd); 3953 restart: 3954 bwillwrite(); 3955 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1 | 3956 at2cnpflags(flag, AT_RESOLVE_BENEATH), 3957 pathseg, path, dfd, &cap_unlinkat_rights); 3958 if ((error = namei(&nd)) != 0) 3959 goto fdout; 3960 vp = nd.ni_vp; 3961 if (vp->v_type != VDIR) { 3962 error = ENOTDIR; 3963 goto out; 3964 } 3965 /* 3966 * No rmdir "." please. 3967 */ 3968 if (nd.ni_dvp == vp) { 3969 error = EINVAL; 3970 goto out; 3971 } 3972 /* 3973 * The root of a mounted filesystem cannot be deleted. 3974 */ 3975 if (vp->v_vflag & VV_ROOT) { 3976 error = EBUSY; 3977 goto out; 3978 } 3979 3980 if (fp != NULL && fp->f_vnode != vp) { 3981 if (VN_IS_DOOMED(fp->f_vnode)) 3982 error = EBADF; 3983 else 3984 error = EDEADLK; 3985 goto out; 3986 } 3987 3988 #ifdef MAC 3989 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 3990 &nd.ni_cnd); 3991 if (error != 0) 3992 goto out; 3993 #endif 3994 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3995 NDFREE_PNBUF(&nd); 3996 vput(vp); 3997 if (nd.ni_dvp == vp) 3998 vrele(nd.ni_dvp); 3999 else 4000 vput(nd.ni_dvp); 4001 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH)) != 0) 4002 goto fdout; 4003 goto restart; 4004 } 4005 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 4006 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 4007 vn_finished_write(mp); 4008 out: 4009 NDFREE_PNBUF(&nd); 4010 vput(vp); 4011 if (nd.ni_dvp == vp) 4012 vrele(nd.ni_dvp); 4013 else 4014 vput(nd.ni_dvp); 4015 if (error == ERELOOKUP) 4016 goto restart; 4017 fdout: 4018 if (fp != NULL) 4019 fdrop(fp, td); 4020 return (error); 4021 } 4022 4023 #if defined(COMPAT_43) || defined(COMPAT_FREEBSD11) 4024 int 4025 freebsd11_kern_getdirentries(struct thread *td, int fd, char *ubuf, u_int count, 4026 long *basep, void (*func)(struct freebsd11_dirent *)) 4027 { 4028 struct freebsd11_dirent dstdp; 4029 struct dirent *dp, *edp; 4030 char *dirbuf; 4031 off_t base; 4032 ssize_t resid, ucount; 4033 int error; 4034 4035 /* XXX arbitrary sanity limit on `count'. */ 4036 count = min(count, 64 * 1024); 4037 4038 dirbuf = malloc(count, M_TEMP, M_WAITOK); 4039 4040 error = kern_getdirentries(td, fd, dirbuf, count, &base, &resid, 4041 UIO_SYSSPACE); 4042 if (error != 0) 4043 goto done; 4044 if (basep != NULL) 4045 *basep = base; 4046 4047 ucount = 0; 4048 for (dp = (struct dirent *)dirbuf, 4049 edp = (struct dirent *)&dirbuf[count - resid]; 4050 ucount < count && dp < edp; ) { 4051 if (dp->d_reclen == 0) 4052 break; 4053 MPASS(dp->d_reclen >= _GENERIC_DIRLEN(0)); 4054 if (dp->d_namlen >= sizeof(dstdp.d_name)) 4055 continue; 4056 dstdp.d_type = dp->d_type; 4057 dstdp.d_namlen = dp->d_namlen; 4058 dstdp.d_fileno = dp->d_fileno; /* truncate */ 4059 if (dstdp.d_fileno != dp->d_fileno) { 4060 switch (ino64_trunc_error) { 4061 default: 4062 case 0: 4063 break; 4064 case 1: 4065 error = EOVERFLOW; 4066 goto done; 4067 case 2: 4068 dstdp.d_fileno = UINT32_MAX; 4069 break; 4070 } 4071 } 4072 dstdp.d_reclen = sizeof(dstdp) - sizeof(dstdp.d_name) + 4073 ((dp->d_namlen + 1 + 3) &~ 3); 4074 bcopy(dp->d_name, dstdp.d_name, dstdp.d_namlen); 4075 bzero(dstdp.d_name + dstdp.d_namlen, 4076 dstdp.d_reclen - offsetof(struct freebsd11_dirent, d_name) - 4077 dstdp.d_namlen); 4078 MPASS(dstdp.d_reclen <= dp->d_reclen); 4079 MPASS(ucount + dstdp.d_reclen <= count); 4080 if (func != NULL) 4081 func(&dstdp); 4082 error = copyout(&dstdp, ubuf + ucount, dstdp.d_reclen); 4083 if (error != 0) 4084 break; 4085 dp = (struct dirent *)((char *)dp + dp->d_reclen); 4086 ucount += dstdp.d_reclen; 4087 } 4088 4089 done: 4090 free(dirbuf, M_TEMP); 4091 if (error == 0) 4092 td->td_retval[0] = ucount; 4093 return (error); 4094 } 4095 #endif /* COMPAT */ 4096 4097 #ifdef COMPAT_43 4098 static void 4099 ogetdirentries_cvt(struct freebsd11_dirent *dp) 4100 { 4101 #if (BYTE_ORDER == LITTLE_ENDIAN) 4102 /* 4103 * The expected low byte of dp->d_namlen is our dp->d_type. 4104 * The high MBZ byte of dp->d_namlen is our dp->d_namlen. 4105 */ 4106 dp->d_type = dp->d_namlen; 4107 dp->d_namlen = 0; 4108 #else 4109 /* 4110 * The dp->d_type is the high byte of the expected dp->d_namlen, 4111 * so must be zero'ed. 4112 */ 4113 dp->d_type = 0; 4114 #endif 4115 } 4116 4117 /* 4118 * Read a block of directory entries in a filesystem independent format. 4119 */ 4120 #ifndef _SYS_SYSPROTO_H_ 4121 struct ogetdirentries_args { 4122 int fd; 4123 char *buf; 4124 u_int count; 4125 long *basep; 4126 }; 4127 #endif 4128 int 4129 ogetdirentries(struct thread *td, struct ogetdirentries_args *uap) 4130 { 4131 long loff; 4132 int error; 4133 4134 error = kern_ogetdirentries(td, uap, &loff); 4135 if (error == 0) 4136 error = copyout(&loff, uap->basep, sizeof(long)); 4137 return (error); 4138 } 4139 4140 int 4141 kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap, 4142 long *ploff) 4143 { 4144 long base; 4145 int error; 4146 4147 /* XXX arbitrary sanity limit on `count'. */ 4148 if (uap->count > 64 * 1024) 4149 return (EINVAL); 4150 4151 error = freebsd11_kern_getdirentries(td, uap->fd, uap->buf, uap->count, 4152 &base, ogetdirentries_cvt); 4153 4154 if (error == 0 && uap->basep != NULL) 4155 error = copyout(&base, uap->basep, sizeof(long)); 4156 4157 return (error); 4158 } 4159 #endif /* COMPAT_43 */ 4160 4161 #if defined(COMPAT_FREEBSD11) 4162 #ifndef _SYS_SYSPROTO_H_ 4163 struct freebsd11_getdirentries_args { 4164 int fd; 4165 char *buf; 4166 u_int count; 4167 long *basep; 4168 }; 4169 #endif 4170 int 4171 freebsd11_getdirentries(struct thread *td, 4172 struct freebsd11_getdirentries_args *uap) 4173 { 4174 long base; 4175 int error; 4176 4177 error = freebsd11_kern_getdirentries(td, uap->fd, uap->buf, uap->count, 4178 &base, NULL); 4179 4180 if (error == 0 && uap->basep != NULL) 4181 error = copyout(&base, uap->basep, sizeof(long)); 4182 return (error); 4183 } 4184 4185 int 4186 freebsd11_getdents(struct thread *td, struct freebsd11_getdents_args *uap) 4187 { 4188 struct freebsd11_getdirentries_args ap; 4189 4190 ap.fd = uap->fd; 4191 ap.buf = uap->buf; 4192 ap.count = uap->count; 4193 ap.basep = NULL; 4194 return (freebsd11_getdirentries(td, &ap)); 4195 } 4196 #endif /* COMPAT_FREEBSD11 */ 4197 4198 /* 4199 * Read a block of directory entries in a filesystem independent format. 4200 */ 4201 int 4202 sys_getdirentries(struct thread *td, struct getdirentries_args *uap) 4203 { 4204 off_t base; 4205 int error; 4206 4207 error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base, 4208 NULL, UIO_USERSPACE); 4209 if (error != 0) 4210 return (error); 4211 if (uap->basep != NULL) 4212 error = copyout(&base, uap->basep, sizeof(off_t)); 4213 return (error); 4214 } 4215 4216 int 4217 kern_getdirentries(struct thread *td, int fd, char *buf, size_t count, 4218 off_t *basep, ssize_t *residp, enum uio_seg bufseg) 4219 { 4220 struct vnode *vp; 4221 struct file *fp; 4222 struct uio auio; 4223 struct iovec aiov; 4224 off_t loff; 4225 int error, eofflag; 4226 off_t foffset; 4227 4228 AUDIT_ARG_FD(fd); 4229 if (count > IOSIZE_MAX) 4230 return (EINVAL); 4231 auio.uio_resid = count; 4232 error = getvnode(td, fd, &cap_read_rights, &fp); 4233 if (error != 0) 4234 return (error); 4235 if ((fp->f_flag & FREAD) == 0) { 4236 fdrop(fp, td); 4237 return (EBADF); 4238 } 4239 vp = fp->f_vnode; 4240 foffset = foffset_lock(fp, 0); 4241 unionread: 4242 if (vp->v_type != VDIR) { 4243 error = EINVAL; 4244 goto fail; 4245 } 4246 if (__predict_false((vp->v_vflag & VV_UNLINKED) != 0)) { 4247 error = ENOENT; 4248 goto fail; 4249 } 4250 aiov.iov_base = buf; 4251 aiov.iov_len = count; 4252 auio.uio_iov = &aiov; 4253 auio.uio_iovcnt = 1; 4254 auio.uio_rw = UIO_READ; 4255 auio.uio_segflg = bufseg; 4256 auio.uio_td = td; 4257 vn_lock(vp, LK_SHARED | LK_RETRY); 4258 AUDIT_ARG_VNODE1(vp); 4259 loff = auio.uio_offset = foffset; 4260 #ifdef MAC 4261 error = mac_vnode_check_readdir(td->td_ucred, vp); 4262 if (error == 0) 4263 #endif 4264 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, 4265 NULL); 4266 foffset = auio.uio_offset; 4267 if (error != 0) { 4268 VOP_UNLOCK(vp); 4269 goto fail; 4270 } 4271 if (count == auio.uio_resid && 4272 (vp->v_vflag & VV_ROOT) && 4273 (vp->v_mount->mnt_flag & MNT_UNION)) { 4274 struct vnode *tvp = vp; 4275 4276 vp = vp->v_mount->mnt_vnodecovered; 4277 VREF(vp); 4278 fp->f_vnode = vp; 4279 foffset = 0; 4280 vput(tvp); 4281 goto unionread; 4282 } 4283 VOP_UNLOCK(vp); 4284 *basep = loff; 4285 if (residp != NULL) 4286 *residp = auio.uio_resid; 4287 td->td_retval[0] = count - auio.uio_resid; 4288 fail: 4289 foffset_unlock(fp, foffset, 0); 4290 fdrop(fp, td); 4291 return (error); 4292 } 4293 4294 /* 4295 * Set the mode mask for creation of filesystem nodes. 4296 */ 4297 #ifndef _SYS_SYSPROTO_H_ 4298 struct umask_args { 4299 int newmask; 4300 }; 4301 #endif 4302 int 4303 sys_umask(struct thread *td, struct umask_args *uap) 4304 { 4305 struct pwddesc *pdp; 4306 4307 pdp = td->td_proc->p_pd; 4308 PWDDESC_XLOCK(pdp); 4309 td->td_retval[0] = pdp->pd_cmask; 4310 pdp->pd_cmask = uap->newmask & ALLPERMS; 4311 PWDDESC_XUNLOCK(pdp); 4312 return (0); 4313 } 4314 4315 /* 4316 * Void all references to file by ripping underlying filesystem away from 4317 * vnode. 4318 */ 4319 #ifndef _SYS_SYSPROTO_H_ 4320 struct revoke_args { 4321 char *path; 4322 }; 4323 #endif 4324 int 4325 sys_revoke(struct thread *td, struct revoke_args *uap) 4326 { 4327 struct vnode *vp; 4328 struct vattr vattr; 4329 struct nameidata nd; 4330 int error; 4331 4332 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4333 uap->path); 4334 if ((error = namei(&nd)) != 0) 4335 return (error); 4336 vp = nd.ni_vp; 4337 NDFREE_PNBUF(&nd); 4338 if (vp->v_type != VCHR || vp->v_rdev == NULL) { 4339 error = EINVAL; 4340 goto out; 4341 } 4342 #ifdef MAC 4343 error = mac_vnode_check_revoke(td->td_ucred, vp); 4344 if (error != 0) 4345 goto out; 4346 #endif 4347 error = VOP_GETATTR(vp, &vattr, td->td_ucred); 4348 if (error != 0) 4349 goto out; 4350 if (td->td_ucred->cr_uid != vattr.va_uid) { 4351 error = priv_check(td, PRIV_VFS_ADMIN); 4352 if (error != 0) 4353 goto out; 4354 } 4355 if (devfs_usecount(vp) > 0) 4356 VOP_REVOKE(vp, REVOKEALL); 4357 out: 4358 vput(vp); 4359 return (error); 4360 } 4361 4362 /* 4363 * This variant of getvnode() allows O_PATH files. Caller should 4364 * ensure that returned file and vnode are only used for compatible 4365 * semantics. 4366 */ 4367 int 4368 getvnode_path(struct thread *td, int fd, cap_rights_t *rightsp, 4369 struct file **fpp) 4370 { 4371 struct file *fp; 4372 int error; 4373 4374 error = fget_unlocked(td, fd, rightsp, &fp); 4375 if (error != 0) 4376 return (error); 4377 4378 /* 4379 * The file could be not of the vnode type, or it may be not 4380 * yet fully initialized, in which case the f_vnode pointer 4381 * may be set, but f_ops is still badfileops. E.g., 4382 * devfs_open() transiently create such situation to 4383 * facilitate csw d_fdopen(). 4384 * 4385 * Dupfdopen() handling in kern_openat() installs the 4386 * half-baked file into the process descriptor table, allowing 4387 * other thread to dereference it. Guard against the race by 4388 * checking f_ops. 4389 */ 4390 if (__predict_false(fp->f_vnode == NULL || fp->f_ops == &badfileops)) { 4391 fdrop(fp, td); 4392 *fpp = NULL; 4393 return (EINVAL); 4394 } 4395 4396 *fpp = fp; 4397 return (0); 4398 } 4399 4400 /* 4401 * Convert a user file descriptor to a kernel file entry and check 4402 * that, if it is a capability, the correct rights are present. 4403 * A reference on the file entry is held upon returning. 4404 */ 4405 int 4406 getvnode(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp) 4407 { 4408 int error; 4409 4410 error = getvnode_path(td, fd, rightsp, fpp); 4411 if (__predict_false(error != 0)) 4412 return (error); 4413 4414 /* 4415 * Filter out O_PATH file descriptors, most getvnode() callers 4416 * do not call fo_ methods. 4417 */ 4418 if (__predict_false((*fpp)->f_ops == &path_fileops)) { 4419 fdrop(*fpp, td); 4420 *fpp = NULL; 4421 error = EBADF; 4422 } 4423 4424 return (error); 4425 } 4426 4427 /* 4428 * Get an (NFS) file handle. 4429 */ 4430 #ifndef _SYS_SYSPROTO_H_ 4431 struct lgetfh_args { 4432 char *fname; 4433 fhandle_t *fhp; 4434 }; 4435 #endif 4436 int 4437 sys_lgetfh(struct thread *td, struct lgetfh_args *uap) 4438 { 4439 4440 return (kern_getfhat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->fname, 4441 UIO_USERSPACE, uap->fhp, UIO_USERSPACE)); 4442 } 4443 4444 #ifndef _SYS_SYSPROTO_H_ 4445 struct getfh_args { 4446 char *fname; 4447 fhandle_t *fhp; 4448 }; 4449 #endif 4450 int 4451 sys_getfh(struct thread *td, struct getfh_args *uap) 4452 { 4453 4454 return (kern_getfhat(td, 0, AT_FDCWD, uap->fname, UIO_USERSPACE, 4455 uap->fhp, UIO_USERSPACE)); 4456 } 4457 4458 /* 4459 * syscall for the rpc.lockd to use to translate an open descriptor into 4460 * a NFS file handle. 4461 * 4462 * warning: do not remove the priv_check() call or this becomes one giant 4463 * security hole. 4464 */ 4465 #ifndef _SYS_SYSPROTO_H_ 4466 struct getfhat_args { 4467 int fd; 4468 char *path; 4469 fhandle_t *fhp; 4470 int flags; 4471 }; 4472 #endif 4473 int 4474 sys_getfhat(struct thread *td, struct getfhat_args *uap) 4475 { 4476 4477 return (kern_getfhat(td, uap->flags, uap->fd, uap->path, UIO_USERSPACE, 4478 uap->fhp, UIO_USERSPACE)); 4479 } 4480 4481 int 4482 kern_getfhat(struct thread *td, int flags, int fd, const char *path, 4483 enum uio_seg pathseg, fhandle_t *fhp, enum uio_seg fhseg) 4484 { 4485 struct nameidata nd; 4486 fhandle_t fh; 4487 struct vnode *vp; 4488 int error; 4489 4490 if ((flags & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH)) != 0) 4491 return (EINVAL); 4492 error = priv_check(td, PRIV_VFS_GETFH); 4493 if (error != 0) 4494 return (error); 4495 NDINIT_AT(&nd, LOOKUP, at2cnpflags(flags, AT_SYMLINK_NOFOLLOW | 4496 AT_RESOLVE_BENEATH) | LOCKLEAF | AUDITVNODE1, pathseg, path, 4497 fd); 4498 error = namei(&nd); 4499 if (error != 0) 4500 return (error); 4501 NDFREE_PNBUF(&nd); 4502 vp = nd.ni_vp; 4503 bzero(&fh, sizeof(fh)); 4504 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4505 error = VOP_VPTOFH(vp, &fh.fh_fid); 4506 vput(vp); 4507 if (error == 0) { 4508 if (fhseg == UIO_USERSPACE) 4509 error = copyout(&fh, fhp, sizeof (fh)); 4510 else 4511 memcpy(fhp, &fh, sizeof(fh)); 4512 } 4513 return (error); 4514 } 4515 4516 #ifndef _SYS_SYSPROTO_H_ 4517 struct fhlink_args { 4518 fhandle_t *fhp; 4519 const char *to; 4520 }; 4521 #endif 4522 int 4523 sys_fhlink(struct thread *td, struct fhlink_args *uap) 4524 { 4525 4526 return (kern_fhlinkat(td, AT_FDCWD, uap->to, UIO_USERSPACE, uap->fhp)); 4527 } 4528 4529 #ifndef _SYS_SYSPROTO_H_ 4530 struct fhlinkat_args { 4531 fhandle_t *fhp; 4532 int tofd; 4533 const char *to; 4534 }; 4535 #endif 4536 int 4537 sys_fhlinkat(struct thread *td, struct fhlinkat_args *uap) 4538 { 4539 4540 return (kern_fhlinkat(td, uap->tofd, uap->to, UIO_USERSPACE, uap->fhp)); 4541 } 4542 4543 static int 4544 kern_fhlinkat(struct thread *td, int fd, const char *path, 4545 enum uio_seg pathseg, fhandle_t *fhp) 4546 { 4547 fhandle_t fh; 4548 struct mount *mp; 4549 struct vnode *vp; 4550 int error; 4551 4552 error = priv_check(td, PRIV_VFS_GETFH); 4553 if (error != 0) 4554 return (error); 4555 error = copyin(fhp, &fh, sizeof(fh)); 4556 if (error != 0) 4557 return (error); 4558 do { 4559 bwillwrite(); 4560 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4561 return (ESTALE); 4562 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_SHARED, &vp); 4563 vfs_unbusy(mp); 4564 if (error != 0) 4565 return (error); 4566 VOP_UNLOCK(vp); 4567 error = kern_linkat_vp(td, vp, fd, path, pathseg); 4568 } while (error == EAGAIN || error == ERELOOKUP); 4569 return (error); 4570 } 4571 4572 #ifndef _SYS_SYSPROTO_H_ 4573 struct fhreadlink_args { 4574 fhandle_t *fhp; 4575 char *buf; 4576 size_t bufsize; 4577 }; 4578 #endif 4579 int 4580 sys_fhreadlink(struct thread *td, struct fhreadlink_args *uap) 4581 { 4582 fhandle_t fh; 4583 struct mount *mp; 4584 struct vnode *vp; 4585 int error; 4586 4587 error = priv_check(td, PRIV_VFS_GETFH); 4588 if (error != 0) 4589 return (error); 4590 if (uap->bufsize > IOSIZE_MAX) 4591 return (EINVAL); 4592 error = copyin(uap->fhp, &fh, sizeof(fh)); 4593 if (error != 0) 4594 return (error); 4595 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4596 return (ESTALE); 4597 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_SHARED, &vp); 4598 vfs_unbusy(mp); 4599 if (error != 0) 4600 return (error); 4601 error = kern_readlink_vp(vp, uap->buf, UIO_USERSPACE, uap->bufsize, td); 4602 vput(vp); 4603 return (error); 4604 } 4605 4606 /* 4607 * syscall for the rpc.lockd to use to translate a NFS file handle into an 4608 * open descriptor. 4609 * 4610 * warning: do not remove the priv_check() call or this becomes one giant 4611 * security hole. 4612 */ 4613 #ifndef _SYS_SYSPROTO_H_ 4614 struct fhopen_args { 4615 const struct fhandle *u_fhp; 4616 int flags; 4617 }; 4618 #endif 4619 int 4620 sys_fhopen(struct thread *td, struct fhopen_args *uap) 4621 { 4622 return (kern_fhopen(td, uap->u_fhp, uap->flags)); 4623 } 4624 4625 int 4626 kern_fhopen(struct thread *td, const struct fhandle *u_fhp, int flags) 4627 { 4628 struct mount *mp; 4629 struct vnode *vp; 4630 struct fhandle fhp; 4631 struct file *fp; 4632 int fmode, error; 4633 int indx; 4634 4635 error = priv_check(td, PRIV_VFS_FHOPEN); 4636 if (error != 0) 4637 return (error); 4638 indx = -1; 4639 fmode = FFLAGS(flags); 4640 /* why not allow a non-read/write open for our lockd? */ 4641 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 4642 return (EINVAL); 4643 error = copyin(u_fhp, &fhp, sizeof(fhp)); 4644 if (error != 0) 4645 return(error); 4646 /* find the mount point */ 4647 mp = vfs_busyfs(&fhp.fh_fsid); 4648 if (mp == NULL) 4649 return (ESTALE); 4650 /* now give me my vnode, it gets returned to me locked */ 4651 error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp); 4652 vfs_unbusy(mp); 4653 if (error != 0) 4654 return (error); 4655 4656 error = falloc_noinstall(td, &fp); 4657 if (error != 0) { 4658 vput(vp); 4659 return (error); 4660 } 4661 /* 4662 * An extra reference on `fp' has been held for us by 4663 * falloc_noinstall(). 4664 */ 4665 4666 #ifdef INVARIANTS 4667 td->td_dupfd = -1; 4668 #endif 4669 error = vn_open_vnode(vp, fmode, td->td_ucred, td, fp); 4670 if (error != 0) { 4671 KASSERT(fp->f_ops == &badfileops, 4672 ("VOP_OPEN in fhopen() set f_ops")); 4673 KASSERT(td->td_dupfd < 0, 4674 ("fhopen() encountered fdopen()")); 4675 4676 vput(vp); 4677 goto bad; 4678 } 4679 #ifdef INVARIANTS 4680 td->td_dupfd = 0; 4681 #endif 4682 fp->f_vnode = vp; 4683 finit_vnode(fp, fmode, NULL, &vnops); 4684 VOP_UNLOCK(vp); 4685 if ((fmode & O_TRUNC) != 0) { 4686 error = fo_truncate(fp, 0, td->td_ucred, td); 4687 if (error != 0) 4688 goto bad; 4689 } 4690 4691 error = finstall(td, fp, &indx, fmode, NULL); 4692 bad: 4693 fdrop(fp, td); 4694 td->td_retval[0] = indx; 4695 return (error); 4696 } 4697 4698 /* 4699 * Stat an (NFS) file handle. 4700 */ 4701 #ifndef _SYS_SYSPROTO_H_ 4702 struct fhstat_args { 4703 struct fhandle *u_fhp; 4704 struct stat *sb; 4705 }; 4706 #endif 4707 int 4708 sys_fhstat(struct thread *td, struct fhstat_args *uap) 4709 { 4710 struct stat sb; 4711 struct fhandle fh; 4712 int error; 4713 4714 error = copyin(uap->u_fhp, &fh, sizeof(fh)); 4715 if (error != 0) 4716 return (error); 4717 error = kern_fhstat(td, fh, &sb); 4718 if (error == 0) 4719 error = copyout(&sb, uap->sb, sizeof(sb)); 4720 return (error); 4721 } 4722 4723 int 4724 kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb) 4725 { 4726 struct mount *mp; 4727 struct vnode *vp; 4728 int error; 4729 4730 error = priv_check(td, PRIV_VFS_FHSTAT); 4731 if (error != 0) 4732 return (error); 4733 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4734 return (ESTALE); 4735 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4736 vfs_unbusy(mp); 4737 if (error != 0) 4738 return (error); 4739 error = VOP_STAT(vp, sb, td->td_ucred, NOCRED); 4740 vput(vp); 4741 return (error); 4742 } 4743 4744 /* 4745 * Implement fstatfs() for (NFS) file handles. 4746 */ 4747 #ifndef _SYS_SYSPROTO_H_ 4748 struct fhstatfs_args { 4749 struct fhandle *u_fhp; 4750 struct statfs *buf; 4751 }; 4752 #endif 4753 int 4754 sys_fhstatfs(struct thread *td, struct fhstatfs_args *uap) 4755 { 4756 struct statfs *sfp; 4757 fhandle_t fh; 4758 int error; 4759 4760 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4761 if (error != 0) 4762 return (error); 4763 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 4764 error = kern_fhstatfs(td, fh, sfp); 4765 if (error == 0) 4766 error = copyout(sfp, uap->buf, sizeof(*sfp)); 4767 free(sfp, M_STATFS); 4768 return (error); 4769 } 4770 4771 int 4772 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf) 4773 { 4774 struct mount *mp; 4775 struct vnode *vp; 4776 int error; 4777 4778 error = priv_check(td, PRIV_VFS_FHSTATFS); 4779 if (error != 0) 4780 return (error); 4781 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4782 return (ESTALE); 4783 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4784 if (error != 0) { 4785 vfs_unbusy(mp); 4786 return (error); 4787 } 4788 vput(vp); 4789 error = prison_canseemount(td->td_ucred, mp); 4790 if (error != 0) 4791 goto out; 4792 #ifdef MAC 4793 error = mac_mount_check_stat(td->td_ucred, mp); 4794 if (error != 0) 4795 goto out; 4796 #endif 4797 error = VFS_STATFS(mp, buf); 4798 out: 4799 vfs_unbusy(mp); 4800 return (error); 4801 } 4802 4803 /* 4804 * Unlike madvise(2), we do not make a best effort to remember every 4805 * possible caching hint. Instead, we remember the last setting with 4806 * the exception that we will allow POSIX_FADV_NORMAL to adjust the 4807 * region of any current setting. 4808 */ 4809 int 4810 kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len, 4811 int advice) 4812 { 4813 struct fadvise_info *fa, *new; 4814 struct file *fp; 4815 struct vnode *vp; 4816 off_t end; 4817 int error; 4818 4819 if (offset < 0 || len < 0 || offset > OFF_MAX - len) 4820 return (EINVAL); 4821 AUDIT_ARG_VALUE(advice); 4822 switch (advice) { 4823 case POSIX_FADV_SEQUENTIAL: 4824 case POSIX_FADV_RANDOM: 4825 case POSIX_FADV_NOREUSE: 4826 new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK); 4827 break; 4828 case POSIX_FADV_NORMAL: 4829 case POSIX_FADV_WILLNEED: 4830 case POSIX_FADV_DONTNEED: 4831 new = NULL; 4832 break; 4833 default: 4834 return (EINVAL); 4835 } 4836 /* XXX: CAP_POSIX_FADVISE? */ 4837 AUDIT_ARG_FD(fd); 4838 error = fget(td, fd, &cap_no_rights, &fp); 4839 if (error != 0) 4840 goto out; 4841 AUDIT_ARG_FILE(td->td_proc, fp); 4842 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { 4843 error = ESPIPE; 4844 goto out; 4845 } 4846 if (fp->f_type != DTYPE_VNODE) { 4847 error = ENODEV; 4848 goto out; 4849 } 4850 vp = fp->f_vnode; 4851 if (vp->v_type != VREG) { 4852 error = ENODEV; 4853 goto out; 4854 } 4855 if (len == 0) 4856 end = OFF_MAX; 4857 else 4858 end = offset + len - 1; 4859 switch (advice) { 4860 case POSIX_FADV_SEQUENTIAL: 4861 case POSIX_FADV_RANDOM: 4862 case POSIX_FADV_NOREUSE: 4863 /* 4864 * Try to merge any existing non-standard region with 4865 * this new region if possible, otherwise create a new 4866 * non-standard region for this request. 4867 */ 4868 mtx_pool_lock(mtxpool_sleep, fp); 4869 fa = fp->f_advice; 4870 if (fa != NULL && fa->fa_advice == advice && 4871 ((fa->fa_start <= end && fa->fa_end >= offset) || 4872 (end != OFF_MAX && fa->fa_start == end + 1) || 4873 (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) { 4874 if (offset < fa->fa_start) 4875 fa->fa_start = offset; 4876 if (end > fa->fa_end) 4877 fa->fa_end = end; 4878 } else { 4879 new->fa_advice = advice; 4880 new->fa_start = offset; 4881 new->fa_end = end; 4882 fp->f_advice = new; 4883 new = fa; 4884 } 4885 mtx_pool_unlock(mtxpool_sleep, fp); 4886 break; 4887 case POSIX_FADV_NORMAL: 4888 /* 4889 * If a the "normal" region overlaps with an existing 4890 * non-standard region, trim or remove the 4891 * non-standard region. 4892 */ 4893 mtx_pool_lock(mtxpool_sleep, fp); 4894 fa = fp->f_advice; 4895 if (fa != NULL) { 4896 if (offset <= fa->fa_start && end >= fa->fa_end) { 4897 new = fa; 4898 fp->f_advice = NULL; 4899 } else if (offset <= fa->fa_start && 4900 end >= fa->fa_start) 4901 fa->fa_start = end + 1; 4902 else if (offset <= fa->fa_end && end >= fa->fa_end) 4903 fa->fa_end = offset - 1; 4904 else if (offset >= fa->fa_start && end <= fa->fa_end) { 4905 /* 4906 * If the "normal" region is a middle 4907 * portion of the existing 4908 * non-standard region, just remove 4909 * the whole thing rather than picking 4910 * one side or the other to 4911 * preserve. 4912 */ 4913 new = fa; 4914 fp->f_advice = NULL; 4915 } 4916 } 4917 mtx_pool_unlock(mtxpool_sleep, fp); 4918 break; 4919 case POSIX_FADV_WILLNEED: 4920 case POSIX_FADV_DONTNEED: 4921 error = VOP_ADVISE(vp, offset, end, advice); 4922 break; 4923 } 4924 out: 4925 if (fp != NULL) 4926 fdrop(fp, td); 4927 free(new, M_FADVISE); 4928 return (error); 4929 } 4930 4931 int 4932 sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap) 4933 { 4934 int error; 4935 4936 error = kern_posix_fadvise(td, uap->fd, uap->offset, uap->len, 4937 uap->advice); 4938 return (kern_posix_error(td, error)); 4939 } 4940 4941 int 4942 kern_copy_file_range(struct thread *td, int infd, off_t *inoffp, int outfd, 4943 off_t *outoffp, size_t len, unsigned int flags) 4944 { 4945 struct file *infp, *outfp; 4946 struct vnode *invp, *outvp; 4947 int error; 4948 size_t retlen; 4949 void *rl_rcookie, *rl_wcookie; 4950 off_t savinoff, savoutoff; 4951 4952 infp = outfp = NULL; 4953 rl_rcookie = rl_wcookie = NULL; 4954 savinoff = -1; 4955 error = 0; 4956 retlen = 0; 4957 4958 if (flags != 0) { 4959 error = EINVAL; 4960 goto out; 4961 } 4962 if (len > SSIZE_MAX) 4963 /* 4964 * Although the len argument is size_t, the return argument 4965 * is ssize_t (which is signed). Therefore a size that won't 4966 * fit in ssize_t can't be returned. 4967 */ 4968 len = SSIZE_MAX; 4969 4970 /* Get the file structures for the file descriptors. */ 4971 error = fget_read(td, infd, 4972 inoffp != NULL ? &cap_pread_rights : &cap_read_rights, &infp); 4973 if (error != 0) 4974 goto out; 4975 if (infp->f_ops == &badfileops) { 4976 error = EBADF; 4977 goto out; 4978 } 4979 if (infp->f_vnode == NULL) { 4980 error = EINVAL; 4981 goto out; 4982 } 4983 error = fget_write(td, outfd, 4984 outoffp != NULL ? &cap_pwrite_rights : &cap_write_rights, &outfp); 4985 if (error != 0) 4986 goto out; 4987 if (outfp->f_ops == &badfileops) { 4988 error = EBADF; 4989 goto out; 4990 } 4991 if (outfp->f_vnode == NULL) { 4992 error = EINVAL; 4993 goto out; 4994 } 4995 4996 /* Set the offset pointers to the correct place. */ 4997 if (inoffp == NULL) 4998 inoffp = &infp->f_offset; 4999 if (outoffp == NULL) 5000 outoffp = &outfp->f_offset; 5001 savinoff = *inoffp; 5002 savoutoff = *outoffp; 5003 5004 invp = infp->f_vnode; 5005 outvp = outfp->f_vnode; 5006 /* Sanity check the f_flag bits. */ 5007 if ((outfp->f_flag & (FWRITE | FAPPEND)) != FWRITE || 5008 (infp->f_flag & FREAD) == 0) { 5009 error = EBADF; 5010 goto out; 5011 } 5012 5013 /* If len == 0, just return 0. */ 5014 if (len == 0) 5015 goto out; 5016 5017 /* 5018 * If infp and outfp refer to the same file, the byte ranges cannot 5019 * overlap. 5020 */ 5021 if (invp == outvp) { 5022 if ((savinoff <= savoutoff && savinoff + len > savoutoff) || 5023 (savinoff > savoutoff && savoutoff + len > savinoff)) { 5024 error = EINVAL; 5025 goto out; 5026 } 5027 rangelock_may_recurse(&invp->v_rl); 5028 } 5029 5030 /* Range lock the byte ranges for both invp and outvp. */ 5031 for (;;) { 5032 rl_wcookie = vn_rangelock_wlock(outvp, *outoffp, *outoffp + 5033 len); 5034 rl_rcookie = vn_rangelock_tryrlock(invp, *inoffp, *inoffp + 5035 len); 5036 if (rl_rcookie != NULL) 5037 break; 5038 vn_rangelock_unlock(outvp, rl_wcookie); 5039 rl_rcookie = vn_rangelock_rlock(invp, *inoffp, *inoffp + len); 5040 vn_rangelock_unlock(invp, rl_rcookie); 5041 } 5042 5043 retlen = len; 5044 error = vn_copy_file_range(invp, inoffp, outvp, outoffp, &retlen, 5045 flags, infp->f_cred, outfp->f_cred, td); 5046 out: 5047 if (rl_rcookie != NULL) 5048 vn_rangelock_unlock(invp, rl_rcookie); 5049 if (rl_wcookie != NULL) 5050 vn_rangelock_unlock(outvp, rl_wcookie); 5051 if (savinoff != -1 && (error == EINTR || error == ERESTART)) { 5052 *inoffp = savinoff; 5053 *outoffp = savoutoff; 5054 } 5055 if (outfp != NULL) 5056 fdrop(outfp, td); 5057 if (infp != NULL) 5058 fdrop(infp, td); 5059 td->td_retval[0] = retlen; 5060 return (error); 5061 } 5062 5063 int 5064 sys_copy_file_range(struct thread *td, struct copy_file_range_args *uap) 5065 { 5066 off_t inoff, outoff, *inoffp, *outoffp; 5067 int error; 5068 5069 inoffp = outoffp = NULL; 5070 if (uap->inoffp != NULL) { 5071 error = copyin(uap->inoffp, &inoff, sizeof(off_t)); 5072 if (error != 0) 5073 return (error); 5074 inoffp = &inoff; 5075 } 5076 if (uap->outoffp != NULL) { 5077 error = copyin(uap->outoffp, &outoff, sizeof(off_t)); 5078 if (error != 0) 5079 return (error); 5080 outoffp = &outoff; 5081 } 5082 error = kern_copy_file_range(td, uap->infd, inoffp, uap->outfd, 5083 outoffp, uap->len, uap->flags); 5084 if (error == 0 && uap->inoffp != NULL) 5085 error = copyout(inoffp, uap->inoffp, sizeof(off_t)); 5086 if (error == 0 && uap->outoffp != NULL) 5087 error = copyout(outoffp, uap->outoffp, sizeof(off_t)); 5088 return (error); 5089 } 5090