1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37 #include "opt_capsicum.h" 38 #include "opt_ktrace.h" 39 40 #include <sys/systm.h> 41 #ifdef COMPAT_FREEBSD11 42 #include <sys/abi_compat.h> 43 #endif 44 #include <sys/bio.h> 45 #include <sys/buf.h> 46 #include <sys/capsicum.h> 47 #include <sys/disk.h> 48 #include <sys/dirent.h> 49 #include <sys/fcntl.h> 50 #include <sys/file.h> 51 #include <sys/filedesc.h> 52 #include <sys/filio.h> 53 #include <sys/jail.h> 54 #include <sys/kernel.h> 55 #ifdef KTRACE 56 #include <sys/ktrace.h> 57 #endif 58 #include <sys/limits.h> 59 #include <sys/linker.h> 60 #include <sys/malloc.h> 61 #include <sys/mount.h> 62 #include <sys/mutex.h> 63 #include <sys/namei.h> 64 #include <sys/priv.h> 65 #include <sys/proc.h> 66 #include <sys/rwlock.h> 67 #include <sys/sdt.h> 68 #include <sys/stat.h> 69 #include <sys/sx.h> 70 #include <sys/syscallsubr.h> 71 #include <sys/sysctl.h> 72 #include <sys/sysproto.h> 73 #include <sys/unistd.h> 74 #include <sys/vnode.h> 75 76 #include <machine/stdarg.h> 77 78 #include <security/audit/audit.h> 79 #include <security/mac/mac_framework.h> 80 81 #include <vm/vm.h> 82 #include <vm/vm_object.h> 83 #include <vm/vm_page.h> 84 #include <vm/vnode_pager.h> 85 #include <vm/uma.h> 86 87 #include <fs/devfs/devfs.h> 88 89 MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information"); 90 91 static int kern_chflagsat(struct thread *td, int fd, const char *path, 92 enum uio_seg pathseg, u_long flags, int atflag); 93 static int setfflags(struct thread *td, struct vnode *, u_long); 94 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *); 95 static int getutimens(const struct timespec *, enum uio_seg, 96 struct timespec *, int *); 97 static int setutimes(struct thread *td, struct vnode *, 98 const struct timespec *, int, int); 99 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 100 struct thread *td); 101 static int kern_fhlinkat(struct thread *td, int fd, const char *path, 102 enum uio_seg pathseg, fhandle_t *fhp); 103 static int kern_readlink_vp(struct vnode *vp, char *buf, enum uio_seg bufseg, 104 size_t count, struct thread *td); 105 static int kern_linkat_vp(struct thread *td, struct vnode *vp, int fd, 106 const char *path, enum uio_seg segflag); 107 108 uint64_t 109 at2cnpflags(u_int at_flags, u_int mask) 110 { 111 uint64_t res; 112 113 MPASS((at_flags & (AT_SYMLINK_FOLLOW | AT_SYMLINK_NOFOLLOW)) != 114 (AT_SYMLINK_FOLLOW | AT_SYMLINK_NOFOLLOW)); 115 116 res = 0; 117 at_flags &= mask; 118 if ((at_flags & AT_RESOLVE_BENEATH) != 0) 119 res |= RBENEATH; 120 if ((at_flags & AT_SYMLINK_FOLLOW) != 0) 121 res |= FOLLOW; 122 /* NOFOLLOW is pseudo flag */ 123 if ((mask & AT_SYMLINK_NOFOLLOW) != 0) { 124 res |= (at_flags & AT_SYMLINK_NOFOLLOW) != 0 ? NOFOLLOW : 125 FOLLOW; 126 } 127 if ((mask & AT_EMPTY_PATH) != 0 && (at_flags & AT_EMPTY_PATH) != 0) 128 res |= EMPTYPATH; 129 return (res); 130 } 131 132 int 133 kern_sync(struct thread *td) 134 { 135 struct mount *mp, *nmp; 136 int save; 137 138 mtx_lock(&mountlist_mtx); 139 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 140 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 141 nmp = TAILQ_NEXT(mp, mnt_list); 142 continue; 143 } 144 if ((mp->mnt_flag & MNT_RDONLY) == 0 && 145 vn_start_write(NULL, &mp, V_NOWAIT) == 0) { 146 save = curthread_pflags_set(TDP_SYNCIO); 147 vfs_periodic(mp, MNT_NOWAIT); 148 VFS_SYNC(mp, MNT_NOWAIT); 149 curthread_pflags_restore(save); 150 vn_finished_write(mp); 151 } 152 mtx_lock(&mountlist_mtx); 153 nmp = TAILQ_NEXT(mp, mnt_list); 154 vfs_unbusy(mp); 155 } 156 mtx_unlock(&mountlist_mtx); 157 return (0); 158 } 159 160 /* 161 * Sync each mounted filesystem. 162 */ 163 #ifndef _SYS_SYSPROTO_H_ 164 struct sync_args { 165 int dummy; 166 }; 167 #endif 168 /* ARGSUSED */ 169 int 170 sys_sync(struct thread *td, struct sync_args *uap) 171 { 172 173 return (kern_sync(td)); 174 } 175 176 /* 177 * Change filesystem quotas. 178 */ 179 #ifndef _SYS_SYSPROTO_H_ 180 struct quotactl_args { 181 char *path; 182 int cmd; 183 int uid; 184 caddr_t arg; 185 }; 186 #endif 187 int 188 sys_quotactl(struct thread *td, struct quotactl_args *uap) 189 { 190 struct mount *mp; 191 struct nameidata nd; 192 int error; 193 bool mp_busy; 194 195 AUDIT_ARG_CMD(uap->cmd); 196 AUDIT_ARG_UID(uap->uid); 197 if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS)) 198 return (EPERM); 199 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 200 uap->path); 201 if ((error = namei(&nd)) != 0) 202 return (error); 203 NDFREE_PNBUF(&nd); 204 mp = nd.ni_vp->v_mount; 205 vfs_ref(mp); 206 vput(nd.ni_vp); 207 error = vfs_busy(mp, 0); 208 if (error != 0) { 209 vfs_rel(mp); 210 return (error); 211 } 212 mp_busy = true; 213 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg, &mp_busy); 214 215 /* 216 * Since quota on/off operations typically need to open quota 217 * files, the implementation may need to unbusy the mount point 218 * before calling into namei. Otherwise, unmount might be 219 * started between two vfs_busy() invocations (first is ours, 220 * second is from mount point cross-walk code in lookup()), 221 * causing deadlock. 222 * 223 * Avoid unbusying mp if the implementation indicates it has 224 * already done so. 225 */ 226 if (mp_busy) 227 vfs_unbusy(mp); 228 vfs_rel(mp); 229 return (error); 230 } 231 232 /* 233 * Used by statfs conversion routines to scale the block size up if 234 * necessary so that all of the block counts are <= 'max_size'. Note 235 * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero 236 * value of 'n'. 237 */ 238 void 239 statfs_scale_blocks(struct statfs *sf, long max_size) 240 { 241 uint64_t count; 242 int shift; 243 244 KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__)); 245 246 /* 247 * Attempt to scale the block counts to give a more accurate 248 * overview to userland of the ratio of free space to used 249 * space. To do this, find the largest block count and compute 250 * a divisor that lets it fit into a signed integer <= max_size. 251 */ 252 if (sf->f_bavail < 0) 253 count = -sf->f_bavail; 254 else 255 count = sf->f_bavail; 256 count = MAX(sf->f_blocks, MAX(sf->f_bfree, count)); 257 if (count <= max_size) 258 return; 259 260 count >>= flsl(max_size); 261 shift = 0; 262 while (count > 0) { 263 shift++; 264 count >>=1; 265 } 266 267 sf->f_bsize <<= shift; 268 sf->f_blocks >>= shift; 269 sf->f_bfree >>= shift; 270 sf->f_bavail >>= shift; 271 } 272 273 static int 274 kern_do_statfs(struct thread *td, struct mount *mp, struct statfs *buf) 275 { 276 int error; 277 278 if (mp == NULL) 279 return (EBADF); 280 error = vfs_busy(mp, 0); 281 vfs_rel(mp); 282 if (error != 0) 283 return (error); 284 #ifdef MAC 285 error = mac_mount_check_stat(td->td_ucred, mp); 286 if (error != 0) 287 goto out; 288 #endif 289 error = VFS_STATFS(mp, buf); 290 if (error != 0) 291 goto out; 292 if (priv_check_cred_vfs_generation(td->td_ucred)) { 293 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 294 prison_enforce_statfs(td->td_ucred, mp, buf); 295 } 296 out: 297 vfs_unbusy(mp); 298 return (error); 299 } 300 301 /* 302 * Get filesystem statistics. 303 */ 304 #ifndef _SYS_SYSPROTO_H_ 305 struct statfs_args { 306 char *path; 307 struct statfs *buf; 308 }; 309 #endif 310 int 311 sys_statfs(struct thread *td, struct statfs_args *uap) 312 { 313 struct statfs *sfp; 314 int error; 315 316 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 317 error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); 318 if (error == 0) 319 error = copyout(sfp, uap->buf, sizeof(struct statfs)); 320 free(sfp, M_STATFS); 321 return (error); 322 } 323 324 int 325 kern_statfs(struct thread *td, const char *path, enum uio_seg pathseg, 326 struct statfs *buf) 327 { 328 struct mount *mp; 329 struct nameidata nd; 330 int error; 331 332 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path); 333 error = namei(&nd); 334 if (error != 0) 335 return (error); 336 NDFREE_PNBUF(&nd); 337 mp = vfs_ref_from_vp(nd.ni_vp); 338 vrele(nd.ni_vp); 339 return (kern_do_statfs(td, mp, buf)); 340 } 341 342 /* 343 * Get filesystem statistics. 344 */ 345 #ifndef _SYS_SYSPROTO_H_ 346 struct fstatfs_args { 347 int fd; 348 struct statfs *buf; 349 }; 350 #endif 351 int 352 sys_fstatfs(struct thread *td, struct fstatfs_args *uap) 353 { 354 struct statfs *sfp; 355 int error; 356 357 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 358 error = kern_fstatfs(td, uap->fd, sfp); 359 if (error == 0) 360 error = copyout(sfp, uap->buf, sizeof(struct statfs)); 361 free(sfp, M_STATFS); 362 return (error); 363 } 364 365 int 366 kern_fstatfs(struct thread *td, int fd, struct statfs *buf) 367 { 368 struct file *fp; 369 struct mount *mp; 370 struct vnode *vp; 371 int error; 372 373 AUDIT_ARG_FD(fd); 374 error = getvnode_path(td, fd, &cap_fstatfs_rights, &fp); 375 if (error != 0) 376 return (error); 377 vp = fp->f_vnode; 378 #ifdef AUDIT 379 if (AUDITING_TD(td)) { 380 vn_lock(vp, LK_SHARED | LK_RETRY); 381 AUDIT_ARG_VNODE1(vp); 382 VOP_UNLOCK(vp); 383 } 384 #endif 385 mp = vfs_ref_from_vp(vp); 386 fdrop(fp, td); 387 return (kern_do_statfs(td, mp, buf)); 388 } 389 390 /* 391 * Get statistics on all filesystems. 392 */ 393 #ifndef _SYS_SYSPROTO_H_ 394 struct getfsstat_args { 395 struct statfs *buf; 396 long bufsize; 397 int mode; 398 }; 399 #endif 400 int 401 sys_getfsstat(struct thread *td, struct getfsstat_args *uap) 402 { 403 size_t count; 404 int error; 405 406 if (uap->bufsize < 0 || uap->bufsize > SIZE_MAX) 407 return (EINVAL); 408 error = kern_getfsstat(td, &uap->buf, uap->bufsize, &count, 409 UIO_USERSPACE, uap->mode); 410 if (error == 0) 411 td->td_retval[0] = count; 412 return (error); 413 } 414 415 /* 416 * If (bufsize > 0 && bufseg == UIO_SYSSPACE) 417 * The caller is responsible for freeing memory which will be allocated 418 * in '*buf'. 419 */ 420 int 421 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize, 422 size_t *countp, enum uio_seg bufseg, int mode) 423 { 424 struct mount *mp, *nmp; 425 struct statfs *sfsp, *sp, *sptmp, *tofree; 426 size_t count, maxcount; 427 int error; 428 429 switch (mode) { 430 case MNT_WAIT: 431 case MNT_NOWAIT: 432 break; 433 default: 434 if (bufseg == UIO_SYSSPACE) 435 *buf = NULL; 436 return (EINVAL); 437 } 438 restart: 439 maxcount = bufsize / sizeof(struct statfs); 440 if (bufsize == 0) { 441 sfsp = NULL; 442 tofree = NULL; 443 } else if (bufseg == UIO_USERSPACE) { 444 sfsp = *buf; 445 tofree = NULL; 446 } else /* if (bufseg == UIO_SYSSPACE) */ { 447 count = 0; 448 mtx_lock(&mountlist_mtx); 449 TAILQ_FOREACH(mp, &mountlist, mnt_list) { 450 count++; 451 } 452 mtx_unlock(&mountlist_mtx); 453 if (maxcount > count) 454 maxcount = count; 455 tofree = sfsp = *buf = malloc(maxcount * sizeof(struct statfs), 456 M_STATFS, M_WAITOK); 457 } 458 459 count = 0; 460 461 /* 462 * If there is no target buffer they only want the count. 463 * 464 * This could be TAILQ_FOREACH but it is open-coded to match the original 465 * code below. 466 */ 467 if (sfsp == NULL) { 468 mtx_lock(&mountlist_mtx); 469 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 470 if (prison_canseemount(td->td_ucred, mp) != 0) { 471 nmp = TAILQ_NEXT(mp, mnt_list); 472 continue; 473 } 474 #ifdef MAC 475 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 476 nmp = TAILQ_NEXT(mp, mnt_list); 477 continue; 478 } 479 #endif 480 count++; 481 nmp = TAILQ_NEXT(mp, mnt_list); 482 } 483 mtx_unlock(&mountlist_mtx); 484 *countp = count; 485 return (0); 486 } 487 488 /* 489 * They want the entire thing. 490 * 491 * Short-circuit the corner case of no room for anything, avoids 492 * relocking below. 493 */ 494 if (maxcount < 1) { 495 goto out; 496 } 497 498 mtx_lock(&mountlist_mtx); 499 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 500 if (prison_canseemount(td->td_ucred, mp) != 0) { 501 nmp = TAILQ_NEXT(mp, mnt_list); 502 continue; 503 } 504 #ifdef MAC 505 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 506 nmp = TAILQ_NEXT(mp, mnt_list); 507 continue; 508 } 509 #endif 510 if (mode == MNT_WAIT) { 511 if (vfs_busy(mp, MBF_MNTLSTLOCK) != 0) { 512 /* 513 * If vfs_busy() failed, and MBF_NOWAIT 514 * wasn't passed, then the mp is gone. 515 * Furthermore, because of MBF_MNTLSTLOCK, 516 * the mountlist_mtx was dropped. We have 517 * no other choice than to start over. 518 */ 519 mtx_unlock(&mountlist_mtx); 520 free(tofree, M_STATFS); 521 goto restart; 522 } 523 } else { 524 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK) != 0) { 525 nmp = TAILQ_NEXT(mp, mnt_list); 526 continue; 527 } 528 } 529 sp = &mp->mnt_stat; 530 /* 531 * If MNT_NOWAIT is specified, do not refresh 532 * the fsstat cache. 533 */ 534 if (mode != MNT_NOWAIT) { 535 error = VFS_STATFS(mp, sp); 536 if (error != 0) { 537 mtx_lock(&mountlist_mtx); 538 nmp = TAILQ_NEXT(mp, mnt_list); 539 vfs_unbusy(mp); 540 continue; 541 } 542 } 543 if (priv_check_cred_vfs_generation(td->td_ucred)) { 544 sptmp = malloc(sizeof(struct statfs), M_STATFS, 545 M_WAITOK); 546 *sptmp = *sp; 547 sptmp->f_fsid.val[0] = sptmp->f_fsid.val[1] = 0; 548 prison_enforce_statfs(td->td_ucred, mp, sptmp); 549 sp = sptmp; 550 } else 551 sptmp = NULL; 552 if (bufseg == UIO_SYSSPACE) { 553 bcopy(sp, sfsp, sizeof(*sp)); 554 free(sptmp, M_STATFS); 555 } else /* if (bufseg == UIO_USERSPACE) */ { 556 error = copyout(sp, sfsp, sizeof(*sp)); 557 free(sptmp, M_STATFS); 558 if (error != 0) { 559 vfs_unbusy(mp); 560 return (error); 561 } 562 } 563 sfsp++; 564 count++; 565 566 if (count == maxcount) { 567 vfs_unbusy(mp); 568 goto out; 569 } 570 571 mtx_lock(&mountlist_mtx); 572 nmp = TAILQ_NEXT(mp, mnt_list); 573 vfs_unbusy(mp); 574 } 575 mtx_unlock(&mountlist_mtx); 576 out: 577 *countp = count; 578 return (0); 579 } 580 581 #ifdef COMPAT_FREEBSD4 582 /* 583 * Get old format filesystem statistics. 584 */ 585 static void freebsd4_cvtstatfs(struct statfs *, struct ostatfs *); 586 587 #ifndef _SYS_SYSPROTO_H_ 588 struct freebsd4_statfs_args { 589 char *path; 590 struct ostatfs *buf; 591 }; 592 #endif 593 int 594 freebsd4_statfs(struct thread *td, struct freebsd4_statfs_args *uap) 595 { 596 struct ostatfs osb; 597 struct statfs *sfp; 598 int error; 599 600 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 601 error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); 602 if (error == 0) { 603 freebsd4_cvtstatfs(sfp, &osb); 604 error = copyout(&osb, uap->buf, sizeof(osb)); 605 } 606 free(sfp, M_STATFS); 607 return (error); 608 } 609 610 /* 611 * Get filesystem statistics. 612 */ 613 #ifndef _SYS_SYSPROTO_H_ 614 struct freebsd4_fstatfs_args { 615 int fd; 616 struct ostatfs *buf; 617 }; 618 #endif 619 int 620 freebsd4_fstatfs(struct thread *td, struct freebsd4_fstatfs_args *uap) 621 { 622 struct ostatfs osb; 623 struct statfs *sfp; 624 int error; 625 626 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 627 error = kern_fstatfs(td, uap->fd, sfp); 628 if (error == 0) { 629 freebsd4_cvtstatfs(sfp, &osb); 630 error = copyout(&osb, uap->buf, sizeof(osb)); 631 } 632 free(sfp, M_STATFS); 633 return (error); 634 } 635 636 /* 637 * Get statistics on all filesystems. 638 */ 639 #ifndef _SYS_SYSPROTO_H_ 640 struct freebsd4_getfsstat_args { 641 struct ostatfs *buf; 642 long bufsize; 643 int mode; 644 }; 645 #endif 646 int 647 freebsd4_getfsstat(struct thread *td, struct freebsd4_getfsstat_args *uap) 648 { 649 struct statfs *buf, *sp; 650 struct ostatfs osb; 651 size_t count, size; 652 int error; 653 654 if (uap->bufsize < 0) 655 return (EINVAL); 656 count = uap->bufsize / sizeof(struct ostatfs); 657 if (count > SIZE_MAX / sizeof(struct statfs)) 658 return (EINVAL); 659 size = count * sizeof(struct statfs); 660 error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, 661 uap->mode); 662 if (error == 0) 663 td->td_retval[0] = count; 664 if (size != 0) { 665 sp = buf; 666 while (count != 0 && error == 0) { 667 freebsd4_cvtstatfs(sp, &osb); 668 error = copyout(&osb, uap->buf, sizeof(osb)); 669 sp++; 670 uap->buf++; 671 count--; 672 } 673 free(buf, M_STATFS); 674 } 675 return (error); 676 } 677 678 /* 679 * Implement fstatfs() for (NFS) file handles. 680 */ 681 #ifndef _SYS_SYSPROTO_H_ 682 struct freebsd4_fhstatfs_args { 683 struct fhandle *u_fhp; 684 struct ostatfs *buf; 685 }; 686 #endif 687 int 688 freebsd4_fhstatfs(struct thread *td, struct freebsd4_fhstatfs_args *uap) 689 { 690 struct ostatfs osb; 691 struct statfs *sfp; 692 fhandle_t fh; 693 int error; 694 695 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 696 if (error != 0) 697 return (error); 698 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 699 error = kern_fhstatfs(td, fh, sfp); 700 if (error == 0) { 701 freebsd4_cvtstatfs(sfp, &osb); 702 error = copyout(&osb, uap->buf, sizeof(osb)); 703 } 704 free(sfp, M_STATFS); 705 return (error); 706 } 707 708 /* 709 * Convert a new format statfs structure to an old format statfs structure. 710 */ 711 static void 712 freebsd4_cvtstatfs(struct statfs *nsp, struct ostatfs *osp) 713 { 714 715 statfs_scale_blocks(nsp, LONG_MAX); 716 bzero(osp, sizeof(*osp)); 717 osp->f_bsize = nsp->f_bsize; 718 osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX); 719 osp->f_blocks = nsp->f_blocks; 720 osp->f_bfree = nsp->f_bfree; 721 osp->f_bavail = nsp->f_bavail; 722 osp->f_files = MIN(nsp->f_files, LONG_MAX); 723 osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX); 724 osp->f_owner = nsp->f_owner; 725 osp->f_type = nsp->f_type; 726 osp->f_flags = nsp->f_flags; 727 osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX); 728 osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX); 729 osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX); 730 osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX); 731 strlcpy(osp->f_fstypename, nsp->f_fstypename, 732 MIN(MFSNAMELEN, OMFSNAMELEN)); 733 strlcpy(osp->f_mntonname, nsp->f_mntonname, 734 MIN(MNAMELEN, OMNAMELEN)); 735 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 736 MIN(MNAMELEN, OMNAMELEN)); 737 osp->f_fsid = nsp->f_fsid; 738 } 739 #endif /* COMPAT_FREEBSD4 */ 740 741 #if defined(COMPAT_FREEBSD11) 742 /* 743 * Get old format filesystem statistics. 744 */ 745 static void freebsd11_cvtstatfs(struct statfs *, struct freebsd11_statfs *); 746 747 int 748 freebsd11_statfs(struct thread *td, struct freebsd11_statfs_args *uap) 749 { 750 struct freebsd11_statfs osb; 751 struct statfs *sfp; 752 int error; 753 754 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 755 error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); 756 if (error == 0) { 757 freebsd11_cvtstatfs(sfp, &osb); 758 error = copyout(&osb, uap->buf, sizeof(osb)); 759 } 760 free(sfp, M_STATFS); 761 return (error); 762 } 763 764 /* 765 * Get filesystem statistics. 766 */ 767 int 768 freebsd11_fstatfs(struct thread *td, struct freebsd11_fstatfs_args *uap) 769 { 770 struct freebsd11_statfs osb; 771 struct statfs *sfp; 772 int error; 773 774 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 775 error = kern_fstatfs(td, uap->fd, sfp); 776 if (error == 0) { 777 freebsd11_cvtstatfs(sfp, &osb); 778 error = copyout(&osb, uap->buf, sizeof(osb)); 779 } 780 free(sfp, M_STATFS); 781 return (error); 782 } 783 784 /* 785 * Get statistics on all filesystems. 786 */ 787 int 788 freebsd11_getfsstat(struct thread *td, struct freebsd11_getfsstat_args *uap) 789 { 790 return (kern_freebsd11_getfsstat(td, uap->buf, uap->bufsize, uap->mode)); 791 } 792 793 int 794 kern_freebsd11_getfsstat(struct thread *td, struct freebsd11_statfs * ubuf, 795 long bufsize, int mode) 796 { 797 struct freebsd11_statfs osb; 798 struct statfs *buf, *sp; 799 size_t count, size; 800 int error; 801 802 if (bufsize < 0) 803 return (EINVAL); 804 805 count = bufsize / sizeof(struct ostatfs); 806 size = count * sizeof(struct statfs); 807 error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, mode); 808 if (error == 0) 809 td->td_retval[0] = count; 810 if (size > 0) { 811 sp = buf; 812 while (count > 0 && error == 0) { 813 freebsd11_cvtstatfs(sp, &osb); 814 error = copyout(&osb, ubuf, sizeof(osb)); 815 sp++; 816 ubuf++; 817 count--; 818 } 819 free(buf, M_STATFS); 820 } 821 return (error); 822 } 823 824 /* 825 * Implement fstatfs() for (NFS) file handles. 826 */ 827 int 828 freebsd11_fhstatfs(struct thread *td, struct freebsd11_fhstatfs_args *uap) 829 { 830 struct freebsd11_statfs osb; 831 struct statfs *sfp; 832 fhandle_t fh; 833 int error; 834 835 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 836 if (error) 837 return (error); 838 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 839 error = kern_fhstatfs(td, fh, sfp); 840 if (error == 0) { 841 freebsd11_cvtstatfs(sfp, &osb); 842 error = copyout(&osb, uap->buf, sizeof(osb)); 843 } 844 free(sfp, M_STATFS); 845 return (error); 846 } 847 848 /* 849 * Convert a new format statfs structure to an old format statfs structure. 850 */ 851 static void 852 freebsd11_cvtstatfs(struct statfs *nsp, struct freebsd11_statfs *osp) 853 { 854 855 bzero(osp, sizeof(*osp)); 856 osp->f_version = FREEBSD11_STATFS_VERSION; 857 osp->f_type = nsp->f_type; 858 osp->f_flags = nsp->f_flags; 859 osp->f_bsize = nsp->f_bsize; 860 osp->f_iosize = nsp->f_iosize; 861 osp->f_blocks = nsp->f_blocks; 862 osp->f_bfree = nsp->f_bfree; 863 osp->f_bavail = nsp->f_bavail; 864 osp->f_files = nsp->f_files; 865 osp->f_ffree = nsp->f_ffree; 866 osp->f_syncwrites = nsp->f_syncwrites; 867 osp->f_asyncwrites = nsp->f_asyncwrites; 868 osp->f_syncreads = nsp->f_syncreads; 869 osp->f_asyncreads = nsp->f_asyncreads; 870 osp->f_namemax = nsp->f_namemax; 871 osp->f_owner = nsp->f_owner; 872 osp->f_fsid = nsp->f_fsid; 873 strlcpy(osp->f_fstypename, nsp->f_fstypename, 874 MIN(MFSNAMELEN, sizeof(osp->f_fstypename))); 875 strlcpy(osp->f_mntonname, nsp->f_mntonname, 876 MIN(MNAMELEN, sizeof(osp->f_mntonname))); 877 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 878 MIN(MNAMELEN, sizeof(osp->f_mntfromname))); 879 } 880 #endif /* COMPAT_FREEBSD11 */ 881 882 /* 883 * Change current working directory to a given file descriptor. 884 */ 885 #ifndef _SYS_SYSPROTO_H_ 886 struct fchdir_args { 887 int fd; 888 }; 889 #endif 890 int 891 sys_fchdir(struct thread *td, struct fchdir_args *uap) 892 { 893 struct vnode *vp, *tdp; 894 struct mount *mp; 895 struct file *fp; 896 int error; 897 898 AUDIT_ARG_FD(uap->fd); 899 error = getvnode_path(td, uap->fd, &cap_fchdir_rights, 900 &fp); 901 if (error != 0) 902 return (error); 903 vp = fp->f_vnode; 904 vrefact(vp); 905 fdrop(fp, td); 906 vn_lock(vp, LK_SHARED | LK_RETRY); 907 AUDIT_ARG_VNODE1(vp); 908 error = change_dir(vp, td); 909 while (!error && (mp = vp->v_mountedhere) != NULL) { 910 if (vfs_busy(mp, 0)) 911 continue; 912 error = VFS_ROOT(mp, LK_SHARED, &tdp); 913 vfs_unbusy(mp); 914 if (error != 0) 915 break; 916 vput(vp); 917 vp = tdp; 918 } 919 if (error != 0) { 920 vput(vp); 921 return (error); 922 } 923 VOP_UNLOCK(vp); 924 pwd_chdir(td, vp); 925 return (0); 926 } 927 928 /* 929 * Change current working directory (``.''). 930 */ 931 #ifndef _SYS_SYSPROTO_H_ 932 struct chdir_args { 933 char *path; 934 }; 935 #endif 936 int 937 sys_chdir(struct thread *td, struct chdir_args *uap) 938 { 939 940 return (kern_chdir(td, uap->path, UIO_USERSPACE)); 941 } 942 943 int 944 kern_chdir(struct thread *td, const char *path, enum uio_seg pathseg) 945 { 946 struct nameidata nd; 947 int error; 948 949 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 950 pathseg, path); 951 if ((error = namei(&nd)) != 0) 952 return (error); 953 if ((error = change_dir(nd.ni_vp, td)) != 0) { 954 vput(nd.ni_vp); 955 NDFREE_PNBUF(&nd); 956 return (error); 957 } 958 VOP_UNLOCK(nd.ni_vp); 959 NDFREE_PNBUF(&nd); 960 pwd_chdir(td, nd.ni_vp); 961 return (0); 962 } 963 964 static int unprivileged_chroot = 0; 965 SYSCTL_INT(_security_bsd, OID_AUTO, unprivileged_chroot, CTLFLAG_RW, 966 &unprivileged_chroot, 0, 967 "Unprivileged processes can use chroot(2)"); 968 969 /* 970 * Takes locked vnode, unlocks it before returning. 971 */ 972 static int 973 kern_chroot(struct thread *td, struct vnode *vp) 974 { 975 struct proc *p; 976 int error; 977 978 error = priv_check(td, PRIV_VFS_CHROOT); 979 if (error != 0) { 980 p = td->td_proc; 981 PROC_LOCK(p); 982 if (unprivileged_chroot == 0 || 983 (p->p_flag2 & P2_NO_NEW_PRIVS) == 0) { 984 PROC_UNLOCK(p); 985 goto e_vunlock; 986 } 987 PROC_UNLOCK(p); 988 } 989 990 error = change_dir(vp, td); 991 if (error != 0) 992 goto e_vunlock; 993 #ifdef MAC 994 error = mac_vnode_check_chroot(td->td_ucred, vp); 995 if (error != 0) 996 goto e_vunlock; 997 #endif 998 VOP_UNLOCK(vp); 999 error = pwd_chroot(td, vp); 1000 vrele(vp); 1001 return (error); 1002 e_vunlock: 1003 vput(vp); 1004 return (error); 1005 } 1006 1007 /* 1008 * Change notion of root (``/'') directory. 1009 */ 1010 #ifndef _SYS_SYSPROTO_H_ 1011 struct chroot_args { 1012 char *path; 1013 }; 1014 #endif 1015 int 1016 sys_chroot(struct thread *td, struct chroot_args *uap) 1017 { 1018 struct nameidata nd; 1019 int error; 1020 1021 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 1022 UIO_USERSPACE, uap->path); 1023 error = namei(&nd); 1024 if (error != 0) 1025 return (error); 1026 NDFREE_PNBUF(&nd); 1027 error = kern_chroot(td, nd.ni_vp); 1028 return (error); 1029 } 1030 1031 /* 1032 * Change notion of root directory to a given file descriptor. 1033 */ 1034 #ifndef _SYS_SYSPROTO_H_ 1035 struct fchroot_args { 1036 int fd; 1037 }; 1038 #endif 1039 int 1040 sys_fchroot(struct thread *td, struct fchroot_args *uap) 1041 { 1042 struct vnode *vp; 1043 struct file *fp; 1044 int error; 1045 1046 error = getvnode_path(td, uap->fd, &cap_fchroot_rights, &fp); 1047 if (error != 0) 1048 return (error); 1049 vp = fp->f_vnode; 1050 vrefact(vp); 1051 fdrop(fp, td); 1052 vn_lock(vp, LK_SHARED | LK_RETRY); 1053 error = kern_chroot(td, vp); 1054 return (error); 1055 } 1056 1057 /* 1058 * Common routine for chroot and chdir. Callers must provide a locked vnode 1059 * instance. 1060 */ 1061 int 1062 change_dir(struct vnode *vp, struct thread *td) 1063 { 1064 #ifdef MAC 1065 int error; 1066 #endif 1067 1068 ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked"); 1069 if (vp->v_type != VDIR) 1070 return (ENOTDIR); 1071 #ifdef MAC 1072 error = mac_vnode_check_chdir(td->td_ucred, vp); 1073 if (error != 0) 1074 return (error); 1075 #endif 1076 return (VOP_ACCESS(vp, VEXEC, td->td_ucred, td)); 1077 } 1078 1079 static __inline void 1080 flags_to_rights(int flags, cap_rights_t *rightsp) 1081 { 1082 if (flags & O_EXEC) { 1083 cap_rights_set_one(rightsp, CAP_FEXECVE); 1084 if (flags & O_PATH) 1085 return; 1086 } else { 1087 switch ((flags & O_ACCMODE)) { 1088 case O_RDONLY: 1089 cap_rights_set_one(rightsp, CAP_READ); 1090 break; 1091 case O_RDWR: 1092 cap_rights_set_one(rightsp, CAP_READ); 1093 /* FALLTHROUGH */ 1094 case O_WRONLY: 1095 cap_rights_set_one(rightsp, CAP_WRITE); 1096 if (!(flags & (O_APPEND | O_TRUNC))) 1097 cap_rights_set_one(rightsp, CAP_SEEK); 1098 break; 1099 } 1100 } 1101 1102 if (flags & O_CREAT) 1103 cap_rights_set_one(rightsp, CAP_CREATE); 1104 1105 if (flags & O_TRUNC) 1106 cap_rights_set_one(rightsp, CAP_FTRUNCATE); 1107 1108 if (flags & (O_SYNC | O_FSYNC)) 1109 cap_rights_set_one(rightsp, CAP_FSYNC); 1110 1111 if (flags & (O_EXLOCK | O_SHLOCK)) 1112 cap_rights_set_one(rightsp, CAP_FLOCK); 1113 } 1114 1115 /* 1116 * Check permissions, allocate an open file structure, and call the device 1117 * open routine if any. 1118 */ 1119 #ifndef _SYS_SYSPROTO_H_ 1120 struct open_args { 1121 char *path; 1122 int flags; 1123 int mode; 1124 }; 1125 #endif 1126 int 1127 sys_open(struct thread *td, struct open_args *uap) 1128 { 1129 1130 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1131 uap->flags, uap->mode)); 1132 } 1133 1134 #ifndef _SYS_SYSPROTO_H_ 1135 struct openat_args { 1136 int fd; 1137 char *path; 1138 int flag; 1139 int mode; 1140 }; 1141 #endif 1142 int 1143 sys_openat(struct thread *td, struct openat_args *uap) 1144 { 1145 1146 AUDIT_ARG_FD(uap->fd); 1147 return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 1148 uap->mode)); 1149 } 1150 1151 /* 1152 * Validate open(2) flags and convert access mode flags (O_RDONLY etc.) to their 1153 * in-kernel representations (FREAD etc.). 1154 */ 1155 static int 1156 openflags(int *flagsp) 1157 { 1158 int flags; 1159 1160 /* 1161 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags 1162 * may be specified. On the other hand, for O_PATH any mode 1163 * except O_EXEC is ignored. 1164 */ 1165 flags = *flagsp; 1166 if ((flags & O_PATH) != 0) { 1167 flags &= ~O_ACCMODE; 1168 } else if ((flags & O_EXEC) != 0) { 1169 if ((flags & O_ACCMODE) != 0) 1170 return (EINVAL); 1171 } else if ((flags & O_ACCMODE) == O_ACCMODE) { 1172 return (EINVAL); 1173 } else { 1174 flags = FFLAGS(flags); 1175 } 1176 *flagsp = flags; 1177 return (0); 1178 } 1179 1180 static void 1181 finit_open(struct file *fp, struct vnode *vp, int flags) 1182 { 1183 /* 1184 * Store the vnode, for any f_type. Typically, the vnode use count is 1185 * decremented by a direct call to vnops.fo_close() for files that 1186 * switched type. 1187 */ 1188 fp->f_vnode = vp; 1189 1190 /* 1191 * If the file wasn't claimed by devfs or fifofs, bind it to the normal 1192 * vnode operations here. 1193 */ 1194 if (fp->f_ops == &badfileops) { 1195 KASSERT(vp->v_type != VFIFO || (flags & O_PATH) != 0, 1196 ("Unexpected fifo fp %p vp %p", fp, vp)); 1197 if ((flags & O_PATH) != 0) { 1198 finit(fp, (flags & FMASK) | (fp->f_flag & FKQALLOWED), 1199 DTYPE_VNODE, NULL, &path_fileops); 1200 } else { 1201 finit_vnode(fp, flags, NULL, &vnops); 1202 } 1203 } 1204 } 1205 1206 /* 1207 * If fpp != NULL, opened file is not installed into the file 1208 * descriptor table, instead it is returned in *fpp. This is 1209 * incompatible with fdopen(), in which case we return EINVAL. 1210 */ 1211 static int 1212 openatfp(struct thread *td, int dirfd, const char *path, 1213 enum uio_seg pathseg, int flags, int mode, struct file **fpp) 1214 { 1215 struct proc *p; 1216 struct filedesc *fdp; 1217 struct pwddesc *pdp; 1218 struct file *fp; 1219 struct vnode *vp; 1220 struct filecaps *fcaps; 1221 struct nameidata nd; 1222 cap_rights_t rights; 1223 int cmode, error, indx; 1224 1225 indx = -1; 1226 p = td->td_proc; 1227 fdp = p->p_fd; 1228 pdp = p->p_pd; 1229 1230 AUDIT_ARG_FFLAGS(flags); 1231 AUDIT_ARG_MODE(mode); 1232 cap_rights_init_one(&rights, CAP_LOOKUP); 1233 flags_to_rights(flags, &rights); 1234 1235 error = openflags(&flags); 1236 if (error != 0) 1237 return (error); 1238 1239 /* 1240 * Allocate a file structure. The descriptor to reference it 1241 * is allocated and used by finstall_refed() below. 1242 */ 1243 error = falloc_noinstall(td, &fp); 1244 if (error != 0) 1245 return (error); 1246 /* Set the flags early so the finit in devfs can pick them up. */ 1247 fp->f_flag = flags & FMASK; 1248 cmode = ((mode & ~pdp->pd_cmask) & ALLPERMS) & ~S_ISTXT; 1249 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1 | WANTIOCTLCAPS, 1250 pathseg, path, dirfd, &rights); 1251 td->td_dupfd = -1; /* XXX check for fdopen */ 1252 error = vn_open_cred(&nd, &flags, cmode, VN_OPEN_WANTIOCTLCAPS, 1253 td->td_ucred, fp); 1254 if (error != 0) { 1255 /* 1256 * If the vn_open replaced the method vector, something 1257 * wonderous happened deep below and we just pass it up 1258 * pretending we know what we do. 1259 */ 1260 if (error == ENXIO && fp->f_ops != &badfileops) { 1261 MPASS((flags & O_PATH) == 0); 1262 goto success; 1263 } 1264 1265 /* 1266 * Handle special fdopen() case. bleh. 1267 * 1268 * Don't do this for relative (capability) lookups; we don't 1269 * understand exactly what would happen, and we don't think 1270 * that it ever should. 1271 */ 1272 if ((nd.ni_resflags & NIRES_STRICTREL) == 0 && 1273 (error == ENODEV || error == ENXIO) && 1274 td->td_dupfd >= 0) { 1275 MPASS(fpp == NULL); 1276 error = dupfdopen(td, fdp, td->td_dupfd, flags, error, 1277 &indx); 1278 if (error == 0) 1279 goto success; 1280 } 1281 1282 goto bad; 1283 } 1284 td->td_dupfd = 0; 1285 NDFREE_PNBUF(&nd); 1286 vp = nd.ni_vp; 1287 1288 finit_open(fp, vp, flags); 1289 VOP_UNLOCK(vp); 1290 if (flags & O_TRUNC) { 1291 error = fo_truncate(fp, 0, td->td_ucred, td); 1292 if (error != 0) 1293 goto bad; 1294 } 1295 success: 1296 if (fpp != NULL) { 1297 MPASS(error == 0); 1298 NDFREE_IOCTLCAPS(&nd); 1299 *fpp = fp; 1300 return (0); 1301 } 1302 1303 /* 1304 * If we haven't already installed the FD (for dupfdopen), do so now. 1305 */ 1306 if (indx == -1) { 1307 #ifdef CAPABILITIES 1308 if ((nd.ni_resflags & NIRES_STRICTREL) != 0) 1309 fcaps = &nd.ni_filecaps; 1310 else 1311 #endif 1312 fcaps = NULL; 1313 error = finstall_refed(td, fp, &indx, flags, fcaps); 1314 /* On success finstall_refed() consumes fcaps. */ 1315 if (error != 0) { 1316 goto bad; 1317 } 1318 } else { 1319 NDFREE_IOCTLCAPS(&nd); 1320 falloc_abort(td, fp); 1321 } 1322 1323 td->td_retval[0] = indx; 1324 return (0); 1325 bad: 1326 KASSERT(indx == -1, ("indx=%d, should be -1", indx)); 1327 NDFREE_IOCTLCAPS(&nd); 1328 falloc_abort(td, fp); 1329 return (error); 1330 } 1331 1332 int 1333 kern_openat(struct thread *td, int dirfd, const char *path, 1334 enum uio_seg pathseg, int flags, int mode) 1335 { 1336 return (openatfp(td, dirfd, path, pathseg, flags, mode, NULL)); 1337 } 1338 1339 int 1340 kern_openatfp(struct thread *td, int dirfd, const char *path, 1341 enum uio_seg pathseg, int flags, int mode, struct file **fpp) 1342 { 1343 int error, old_dupfd; 1344 1345 old_dupfd = td->td_dupfd; 1346 td->td_dupfd = -1; 1347 error = openatfp(td, dirfd, path, pathseg, flags, mode, fpp); 1348 td->td_dupfd = old_dupfd; 1349 return (error); 1350 } 1351 1352 #ifdef COMPAT_43 1353 /* 1354 * Create a file. 1355 */ 1356 #ifndef _SYS_SYSPROTO_H_ 1357 struct ocreat_args { 1358 char *path; 1359 int mode; 1360 }; 1361 #endif 1362 int 1363 ocreat(struct thread *td, struct ocreat_args *uap) 1364 { 1365 1366 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1367 O_WRONLY | O_CREAT | O_TRUNC, uap->mode)); 1368 } 1369 #endif /* COMPAT_43 */ 1370 1371 /* 1372 * Create a special file. 1373 */ 1374 #ifndef _SYS_SYSPROTO_H_ 1375 struct mknodat_args { 1376 int fd; 1377 char *path; 1378 mode_t mode; 1379 dev_t dev; 1380 }; 1381 #endif 1382 int 1383 sys_mknodat(struct thread *td, struct mknodat_args *uap) 1384 { 1385 1386 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1387 uap->dev)); 1388 } 1389 1390 #if defined(COMPAT_FREEBSD11) 1391 int 1392 freebsd11_mknod(struct thread *td, 1393 struct freebsd11_mknod_args *uap) 1394 { 1395 1396 return (kern_mknodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1397 uap->mode, uap->dev)); 1398 } 1399 1400 int 1401 freebsd11_mknodat(struct thread *td, 1402 struct freebsd11_mknodat_args *uap) 1403 { 1404 1405 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1406 uap->dev)); 1407 } 1408 #endif /* COMPAT_FREEBSD11 */ 1409 1410 int 1411 kern_mknodat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, 1412 int mode, dev_t dev) 1413 { 1414 struct vnode *vp; 1415 struct mount *mp; 1416 struct vattr vattr; 1417 struct nameidata nd; 1418 int error, whiteout = 0; 1419 1420 AUDIT_ARG_MODE(mode); 1421 AUDIT_ARG_DEV(dev); 1422 switch (mode & S_IFMT) { 1423 case S_IFCHR: 1424 case S_IFBLK: 1425 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 1426 if (error == 0 && dev == VNOVAL) 1427 error = EINVAL; 1428 break; 1429 case S_IFWHT: 1430 error = priv_check(td, PRIV_VFS_MKNOD_WHT); 1431 break; 1432 case S_IFIFO: 1433 if (dev == 0) 1434 return (kern_mkfifoat(td, fd, path, pathseg, mode)); 1435 /* FALLTHROUGH */ 1436 default: 1437 error = EINVAL; 1438 break; 1439 } 1440 if (error != 0) 1441 return (error); 1442 NDPREINIT(&nd); 1443 restart: 1444 bwillwrite(); 1445 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | AUDITVNODE1 | NOCACHE, 1446 pathseg, path, fd, &cap_mknodat_rights); 1447 if ((error = namei(&nd)) != 0) 1448 return (error); 1449 vp = nd.ni_vp; 1450 if (vp != NULL) { 1451 NDFREE_PNBUF(&nd); 1452 if (vp == nd.ni_dvp) 1453 vrele(nd.ni_dvp); 1454 else 1455 vput(nd.ni_dvp); 1456 vrele(vp); 1457 return (EEXIST); 1458 } else if ((vn_irflag_read(nd.ni_dvp) & VIRF_NAMEDDIR) != 0) { 1459 NDFREE_PNBUF(&nd); 1460 vput(nd.ni_dvp); 1461 return (EINVAL); 1462 } else { 1463 VATTR_NULL(&vattr); 1464 vattr.va_mode = (mode & ALLPERMS) & 1465 ~td->td_proc->p_pd->pd_cmask; 1466 vattr.va_rdev = dev; 1467 whiteout = 0; 1468 1469 switch (mode & S_IFMT) { 1470 case S_IFCHR: 1471 vattr.va_type = VCHR; 1472 break; 1473 case S_IFBLK: 1474 vattr.va_type = VBLK; 1475 break; 1476 case S_IFWHT: 1477 whiteout = 1; 1478 break; 1479 default: 1480 panic("kern_mknod: invalid mode"); 1481 } 1482 } 1483 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1484 NDFREE_PNBUF(&nd); 1485 vput(nd.ni_dvp); 1486 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH)) != 0) 1487 return (error); 1488 goto restart; 1489 } 1490 #ifdef MAC 1491 if (error == 0 && !whiteout) 1492 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, 1493 &nd.ni_cnd, &vattr); 1494 #endif 1495 if (error == 0) { 1496 if (whiteout) 1497 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1498 else { 1499 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1500 &nd.ni_cnd, &vattr); 1501 } 1502 } 1503 VOP_VPUT_PAIR(nd.ni_dvp, error == 0 && !whiteout ? &nd.ni_vp : NULL, 1504 true); 1505 vn_finished_write(mp); 1506 NDFREE_PNBUF(&nd); 1507 if (error == ERELOOKUP) 1508 goto restart; 1509 return (error); 1510 } 1511 1512 /* 1513 * Create a named pipe. 1514 */ 1515 #ifndef _SYS_SYSPROTO_H_ 1516 struct mkfifo_args { 1517 char *path; 1518 int mode; 1519 }; 1520 #endif 1521 int 1522 sys_mkfifo(struct thread *td, struct mkfifo_args *uap) 1523 { 1524 1525 return (kern_mkfifoat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1526 uap->mode)); 1527 } 1528 1529 #ifndef _SYS_SYSPROTO_H_ 1530 struct mkfifoat_args { 1531 int fd; 1532 char *path; 1533 mode_t mode; 1534 }; 1535 #endif 1536 int 1537 sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap) 1538 { 1539 1540 return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE, 1541 uap->mode)); 1542 } 1543 1544 int 1545 kern_mkfifoat(struct thread *td, int fd, const char *path, 1546 enum uio_seg pathseg, int mode) 1547 { 1548 struct mount *mp; 1549 struct vattr vattr; 1550 struct nameidata nd; 1551 int error; 1552 1553 AUDIT_ARG_MODE(mode); 1554 NDPREINIT(&nd); 1555 restart: 1556 bwillwrite(); 1557 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | AUDITVNODE1 | NOCACHE, 1558 pathseg, path, fd, &cap_mkfifoat_rights); 1559 if ((error = namei(&nd)) != 0) 1560 return (error); 1561 if (nd.ni_vp != NULL) { 1562 NDFREE_PNBUF(&nd); 1563 if (nd.ni_vp == nd.ni_dvp) 1564 vrele(nd.ni_dvp); 1565 else 1566 vput(nd.ni_dvp); 1567 vrele(nd.ni_vp); 1568 return (EEXIST); 1569 } 1570 if ((vn_irflag_read(nd.ni_dvp) & VIRF_NAMEDDIR) != 0) { 1571 NDFREE_PNBUF(&nd); 1572 vput(nd.ni_dvp); 1573 return (EINVAL); 1574 } 1575 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1576 NDFREE_PNBUF(&nd); 1577 vput(nd.ni_dvp); 1578 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH)) != 0) 1579 return (error); 1580 goto restart; 1581 } 1582 VATTR_NULL(&vattr); 1583 vattr.va_type = VFIFO; 1584 vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_pd->pd_cmask; 1585 #ifdef MAC 1586 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1587 &vattr); 1588 if (error != 0) 1589 goto out; 1590 #endif 1591 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 1592 #ifdef MAC 1593 out: 1594 #endif 1595 VOP_VPUT_PAIR(nd.ni_dvp, error == 0 ? &nd.ni_vp : NULL, true); 1596 vn_finished_write(mp); 1597 NDFREE_PNBUF(&nd); 1598 if (error == ERELOOKUP) 1599 goto restart; 1600 return (error); 1601 } 1602 1603 /* 1604 * Make a hard file link. 1605 */ 1606 #ifndef _SYS_SYSPROTO_H_ 1607 struct link_args { 1608 char *path; 1609 char *link; 1610 }; 1611 #endif 1612 int 1613 sys_link(struct thread *td, struct link_args *uap) 1614 { 1615 1616 return (kern_linkat(td, AT_FDCWD, AT_FDCWD, uap->path, uap->link, 1617 UIO_USERSPACE, AT_SYMLINK_FOLLOW)); 1618 } 1619 1620 #ifndef _SYS_SYSPROTO_H_ 1621 struct linkat_args { 1622 int fd1; 1623 char *path1; 1624 int fd2; 1625 char *path2; 1626 int flag; 1627 }; 1628 #endif 1629 int 1630 sys_linkat(struct thread *td, struct linkat_args *uap) 1631 { 1632 1633 return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2, 1634 UIO_USERSPACE, uap->flag)); 1635 } 1636 1637 int hardlink_check_uid = 0; 1638 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 1639 &hardlink_check_uid, 0, 1640 "Unprivileged processes cannot create hard links to files owned by other " 1641 "users"); 1642 static int hardlink_check_gid = 0; 1643 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 1644 &hardlink_check_gid, 0, 1645 "Unprivileged processes cannot create hard links to files owned by other " 1646 "groups"); 1647 1648 static int 1649 can_hardlink(struct vnode *vp, struct ucred *cred) 1650 { 1651 struct vattr va; 1652 int error; 1653 1654 if (!hardlink_check_uid && !hardlink_check_gid) 1655 return (0); 1656 1657 error = VOP_GETATTR(vp, &va, cred); 1658 if (error != 0) 1659 return (error); 1660 1661 if (hardlink_check_uid && cred->cr_uid != va.va_uid) { 1662 error = priv_check_cred(cred, PRIV_VFS_LINK); 1663 if (error != 0) 1664 return (error); 1665 } 1666 1667 if (hardlink_check_gid && !groupmember(va.va_gid, cred)) { 1668 error = priv_check_cred(cred, PRIV_VFS_LINK); 1669 if (error != 0) 1670 return (error); 1671 } 1672 1673 return (0); 1674 } 1675 1676 int 1677 kern_linkat(struct thread *td, int fd1, int fd2, const char *path1, 1678 const char *path2, enum uio_seg segflag, int flag) 1679 { 1680 struct nameidata nd; 1681 int error; 1682 1683 if ((flag & ~(AT_SYMLINK_FOLLOW | AT_RESOLVE_BENEATH | 1684 AT_EMPTY_PATH)) != 0) 1685 return (EINVAL); 1686 1687 NDPREINIT(&nd); 1688 do { 1689 bwillwrite(); 1690 NDINIT_ATRIGHTS(&nd, LOOKUP, AUDITVNODE1 | at2cnpflags(flag, 1691 AT_SYMLINK_FOLLOW | AT_RESOLVE_BENEATH | AT_EMPTY_PATH), 1692 segflag, path1, fd1, &cap_linkat_source_rights); 1693 if ((error = namei(&nd)) != 0) 1694 return (error); 1695 NDFREE_PNBUF(&nd); 1696 if ((nd.ni_resflags & NIRES_EMPTYPATH) != 0) { 1697 error = priv_check(td, PRIV_VFS_FHOPEN); 1698 if (error != 0) { 1699 vrele(nd.ni_vp); 1700 return (error); 1701 } 1702 } 1703 error = kern_linkat_vp(td, nd.ni_vp, fd2, path2, segflag); 1704 } while (error == EAGAIN || error == ERELOOKUP); 1705 return (error); 1706 } 1707 1708 static int 1709 kern_linkat_vp(struct thread *td, struct vnode *vp, int fd, const char *path, 1710 enum uio_seg segflag) 1711 { 1712 struct nameidata nd; 1713 struct mount *mp; 1714 int error; 1715 1716 if (vp->v_type == VDIR) { 1717 vrele(vp); 1718 return (EPERM); /* POSIX */ 1719 } 1720 if ((vn_irflag_read(vp) & (VIRF_NAMEDDIR | VIRF_NAMEDATTR)) != 0) { 1721 vrele(vp); 1722 return (EINVAL); 1723 } 1724 NDINIT_ATRIGHTS(&nd, CREATE, 1725 LOCKPARENT | AUDITVNODE2 | NOCACHE, segflag, path, fd, 1726 &cap_linkat_target_rights); 1727 if ((error = namei(&nd)) == 0) { 1728 if (nd.ni_vp != NULL) { 1729 NDFREE_PNBUF(&nd); 1730 if (nd.ni_dvp == nd.ni_vp) 1731 vrele(nd.ni_dvp); 1732 else 1733 vput(nd.ni_dvp); 1734 vrele(nd.ni_vp); 1735 vrele(vp); 1736 return (EEXIST); 1737 } else if (nd.ni_dvp->v_mount != vp->v_mount) { 1738 /* 1739 * Cross-device link. No need to recheck 1740 * vp->v_type, since it cannot change, except 1741 * to VBAD. 1742 */ 1743 NDFREE_PNBUF(&nd); 1744 vput(nd.ni_dvp); 1745 vrele(vp); 1746 return (EXDEV); 1747 } else if (vn_lock(vp, LK_EXCLUSIVE) == 0) { 1748 error = can_hardlink(vp, td->td_ucred); 1749 #ifdef MAC 1750 if (error == 0) 1751 error = mac_vnode_check_link(td->td_ucred, 1752 nd.ni_dvp, vp, &nd.ni_cnd); 1753 #endif 1754 if (error != 0) { 1755 vput(vp); 1756 vput(nd.ni_dvp); 1757 NDFREE_PNBUF(&nd); 1758 return (error); 1759 } 1760 error = vn_start_write(vp, &mp, V_NOWAIT); 1761 if (error != 0) { 1762 vput(vp); 1763 vput(nd.ni_dvp); 1764 NDFREE_PNBUF(&nd); 1765 error = vn_start_write(NULL, &mp, 1766 V_XSLEEP | V_PCATCH); 1767 if (error != 0) 1768 return (error); 1769 return (EAGAIN); 1770 } 1771 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 1772 VOP_VPUT_PAIR(nd.ni_dvp, &vp, true); 1773 vn_finished_write(mp); 1774 NDFREE_PNBUF(&nd); 1775 vp = NULL; 1776 } else { 1777 vput(nd.ni_dvp); 1778 NDFREE_PNBUF(&nd); 1779 vrele(vp); 1780 return (EAGAIN); 1781 } 1782 } 1783 if (vp != NULL) 1784 vrele(vp); 1785 return (error); 1786 } 1787 1788 /* 1789 * Make a symbolic link. 1790 */ 1791 #ifndef _SYS_SYSPROTO_H_ 1792 struct symlink_args { 1793 char *path; 1794 char *link; 1795 }; 1796 #endif 1797 int 1798 sys_symlink(struct thread *td, struct symlink_args *uap) 1799 { 1800 1801 return (kern_symlinkat(td, uap->path, AT_FDCWD, uap->link, 1802 UIO_USERSPACE)); 1803 } 1804 1805 #ifndef _SYS_SYSPROTO_H_ 1806 struct symlinkat_args { 1807 char *path; 1808 int fd; 1809 char *path2; 1810 }; 1811 #endif 1812 int 1813 sys_symlinkat(struct thread *td, struct symlinkat_args *uap) 1814 { 1815 1816 return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2, 1817 UIO_USERSPACE)); 1818 } 1819 1820 int 1821 kern_symlinkat(struct thread *td, const char *path1, int fd, const char *path2, 1822 enum uio_seg segflg) 1823 { 1824 struct mount *mp; 1825 struct vattr vattr; 1826 const char *syspath; 1827 char *tmppath; 1828 struct nameidata nd; 1829 int error; 1830 1831 if (segflg == UIO_SYSSPACE) { 1832 syspath = path1; 1833 } else { 1834 tmppath = uma_zalloc(namei_zone, M_WAITOK); 1835 if ((error = copyinstr(path1, tmppath, MAXPATHLEN, NULL)) != 0) 1836 goto out; 1837 syspath = tmppath; 1838 } 1839 AUDIT_ARG_TEXT(syspath); 1840 NDPREINIT(&nd); 1841 restart: 1842 bwillwrite(); 1843 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | AUDITVNODE1 | NOCACHE, segflg, 1844 path2, fd, &cap_symlinkat_rights); 1845 if ((error = namei(&nd)) != 0) 1846 goto out; 1847 if (nd.ni_vp) { 1848 NDFREE_PNBUF(&nd); 1849 if (nd.ni_vp == nd.ni_dvp) 1850 vrele(nd.ni_dvp); 1851 else 1852 vput(nd.ni_dvp); 1853 vrele(nd.ni_vp); 1854 nd.ni_vp = NULL; 1855 error = EEXIST; 1856 goto out; 1857 } 1858 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1859 NDFREE_PNBUF(&nd); 1860 vput(nd.ni_dvp); 1861 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH)) != 0) 1862 goto out; 1863 goto restart; 1864 } 1865 if ((vn_irflag_read(nd.ni_dvp) & VIRF_NAMEDDIR) != 0) { 1866 error = EINVAL; 1867 goto out; 1868 } 1869 VATTR_NULL(&vattr); 1870 vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_pd->pd_cmask; 1871 #ifdef MAC 1872 vattr.va_type = VLNK; 1873 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1874 &vattr); 1875 if (error != 0) 1876 goto out2; 1877 #endif 1878 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath); 1879 #ifdef MAC 1880 out2: 1881 #endif 1882 VOP_VPUT_PAIR(nd.ni_dvp, error == 0 ? &nd.ni_vp : NULL, true); 1883 vn_finished_write(mp); 1884 NDFREE_PNBUF(&nd); 1885 if (error == ERELOOKUP) 1886 goto restart; 1887 out: 1888 if (segflg != UIO_SYSSPACE) 1889 uma_zfree(namei_zone, tmppath); 1890 return (error); 1891 } 1892 1893 /* 1894 * Delete a whiteout from the filesystem. 1895 */ 1896 #ifndef _SYS_SYSPROTO_H_ 1897 struct undelete_args { 1898 char *path; 1899 }; 1900 #endif 1901 int 1902 sys_undelete(struct thread *td, struct undelete_args *uap) 1903 { 1904 struct mount *mp; 1905 struct nameidata nd; 1906 int error; 1907 1908 NDPREINIT(&nd); 1909 restart: 1910 bwillwrite(); 1911 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | AUDITVNODE1, 1912 UIO_USERSPACE, uap->path); 1913 error = namei(&nd); 1914 if (error != 0) 1915 return (error); 1916 1917 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 1918 NDFREE_PNBUF(&nd); 1919 if (nd.ni_vp == nd.ni_dvp) 1920 vrele(nd.ni_dvp); 1921 else 1922 vput(nd.ni_dvp); 1923 if (nd.ni_vp) 1924 vrele(nd.ni_vp); 1925 return (EEXIST); 1926 } 1927 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1928 NDFREE_PNBUF(&nd); 1929 vput(nd.ni_dvp); 1930 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH)) != 0) 1931 return (error); 1932 goto restart; 1933 } 1934 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE); 1935 NDFREE_PNBUF(&nd); 1936 vput(nd.ni_dvp); 1937 vn_finished_write(mp); 1938 if (error == ERELOOKUP) 1939 goto restart; 1940 return (error); 1941 } 1942 1943 /* 1944 * Delete a name from the filesystem. 1945 */ 1946 #ifndef _SYS_SYSPROTO_H_ 1947 struct unlink_args { 1948 char *path; 1949 }; 1950 #endif 1951 int 1952 sys_unlink(struct thread *td, struct unlink_args *uap) 1953 { 1954 1955 return (kern_funlinkat(td, AT_FDCWD, uap->path, FD_NONE, UIO_USERSPACE, 1956 0, 0)); 1957 } 1958 1959 static int 1960 kern_funlinkat_ex(struct thread *td, int dfd, const char *path, int fd, 1961 int flag, enum uio_seg pathseg, ino_t oldinum) 1962 { 1963 1964 if ((flag & ~(AT_REMOVEDIR | AT_RESOLVE_BENEATH)) != 0) 1965 return (EINVAL); 1966 1967 if ((flag & AT_REMOVEDIR) != 0) 1968 return (kern_frmdirat(td, dfd, path, fd, UIO_USERSPACE, 0)); 1969 1970 return (kern_funlinkat(td, dfd, path, fd, UIO_USERSPACE, 0, 0)); 1971 } 1972 1973 #ifndef _SYS_SYSPROTO_H_ 1974 struct unlinkat_args { 1975 int fd; 1976 char *path; 1977 int flag; 1978 }; 1979 #endif 1980 int 1981 sys_unlinkat(struct thread *td, struct unlinkat_args *uap) 1982 { 1983 1984 return (kern_funlinkat_ex(td, uap->fd, uap->path, FD_NONE, uap->flag, 1985 UIO_USERSPACE, 0)); 1986 } 1987 1988 #ifndef _SYS_SYSPROTO_H_ 1989 struct funlinkat_args { 1990 int dfd; 1991 const char *path; 1992 int fd; 1993 int flag; 1994 }; 1995 #endif 1996 int 1997 sys_funlinkat(struct thread *td, struct funlinkat_args *uap) 1998 { 1999 2000 return (kern_funlinkat_ex(td, uap->dfd, uap->path, uap->fd, uap->flag, 2001 UIO_USERSPACE, 0)); 2002 } 2003 2004 int 2005 kern_funlinkat(struct thread *td, int dfd, const char *path, int fd, 2006 enum uio_seg pathseg, int flag, ino_t oldinum) 2007 { 2008 struct mount *mp; 2009 struct file *fp; 2010 struct vnode *vp; 2011 struct nameidata nd; 2012 struct stat sb; 2013 int error; 2014 2015 fp = NULL; 2016 if (fd != FD_NONE) { 2017 error = getvnode_path(td, fd, &cap_no_rights, &fp); 2018 if (error != 0) 2019 return (error); 2020 } 2021 2022 NDPREINIT(&nd); 2023 restart: 2024 bwillwrite(); 2025 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1 | 2026 at2cnpflags(flag, AT_RESOLVE_BENEATH), 2027 pathseg, path, dfd, &cap_unlinkat_rights); 2028 if ((error = namei(&nd)) != 0) { 2029 if (error == EINVAL) 2030 error = EPERM; 2031 goto fdout; 2032 } 2033 vp = nd.ni_vp; 2034 if (vp->v_type == VDIR && oldinum == 0) { 2035 error = EPERM; /* POSIX */ 2036 } else if (oldinum != 0 && 2037 ((error = VOP_STAT(vp, &sb, td->td_ucred, NOCRED)) == 0) && 2038 sb.st_ino != oldinum) { 2039 error = EIDRM; /* Identifier removed */ 2040 } else if (fp != NULL && fp->f_vnode != vp) { 2041 if (VN_IS_DOOMED(fp->f_vnode)) 2042 error = EBADF; 2043 else 2044 error = EDEADLK; 2045 } else { 2046 /* 2047 * The root of a mounted filesystem cannot be deleted. 2048 * 2049 * XXX: can this only be a VDIR case? 2050 */ 2051 if (vp->v_vflag & VV_ROOT) 2052 error = EBUSY; 2053 } 2054 if (error == 0) { 2055 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 2056 NDFREE_PNBUF(&nd); 2057 vput(nd.ni_dvp); 2058 if (vp == nd.ni_dvp) 2059 vrele(vp); 2060 else 2061 vput(vp); 2062 if ((error = vn_start_write(NULL, &mp, 2063 V_XSLEEP | V_PCATCH)) != 0) { 2064 goto fdout; 2065 } 2066 goto restart; 2067 } 2068 #ifdef MAC 2069 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 2070 &nd.ni_cnd); 2071 if (error != 0) 2072 goto out; 2073 #endif 2074 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 2075 #ifdef MAC 2076 out: 2077 #endif 2078 vn_finished_write(mp); 2079 } 2080 NDFREE_PNBUF(&nd); 2081 vput(nd.ni_dvp); 2082 if (vp == nd.ni_dvp) 2083 vrele(vp); 2084 else 2085 vput(vp); 2086 if (error == ERELOOKUP) 2087 goto restart; 2088 fdout: 2089 if (fp != NULL) 2090 fdrop(fp, td); 2091 return (error); 2092 } 2093 2094 /* 2095 * Reposition read/write file offset. 2096 */ 2097 #ifndef _SYS_SYSPROTO_H_ 2098 struct lseek_args { 2099 int fd; 2100 int pad; 2101 off_t offset; 2102 int whence; 2103 }; 2104 #endif 2105 int 2106 sys_lseek(struct thread *td, struct lseek_args *uap) 2107 { 2108 2109 return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); 2110 } 2111 2112 int 2113 kern_lseek(struct thread *td, int fd, off_t offset, int whence) 2114 { 2115 struct file *fp; 2116 int error; 2117 2118 AUDIT_ARG_FD(fd); 2119 error = fget(td, fd, &cap_seek_rights, &fp); 2120 if (error != 0) 2121 return (error); 2122 error = (fp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0 ? 2123 fo_seek(fp, offset, whence, td) : ESPIPE; 2124 fdrop(fp, td); 2125 return (error); 2126 } 2127 2128 #if defined(COMPAT_43) 2129 /* 2130 * Reposition read/write file offset. 2131 */ 2132 #ifndef _SYS_SYSPROTO_H_ 2133 struct olseek_args { 2134 int fd; 2135 long offset; 2136 int whence; 2137 }; 2138 #endif 2139 int 2140 olseek(struct thread *td, struct olseek_args *uap) 2141 { 2142 2143 return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); 2144 } 2145 #endif /* COMPAT_43 */ 2146 2147 #if defined(COMPAT_FREEBSD6) 2148 /* Version with the 'pad' argument */ 2149 int 2150 freebsd6_lseek(struct thread *td, struct freebsd6_lseek_args *uap) 2151 { 2152 2153 return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); 2154 } 2155 #endif 2156 2157 /* 2158 * Check access permissions using passed credentials. 2159 */ 2160 static int 2161 vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 2162 struct thread *td) 2163 { 2164 accmode_t accmode; 2165 int error; 2166 2167 /* Flags == 0 means only check for existence. */ 2168 if (user_flags == 0) 2169 return (0); 2170 2171 accmode = 0; 2172 if (user_flags & R_OK) 2173 accmode |= VREAD; 2174 if (user_flags & W_OK) 2175 accmode |= VWRITE; 2176 if (user_flags & X_OK) 2177 accmode |= VEXEC; 2178 #ifdef MAC 2179 error = mac_vnode_check_access(cred, vp, accmode); 2180 if (error != 0) 2181 return (error); 2182 #endif 2183 if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) 2184 error = VOP_ACCESS(vp, accmode, cred, td); 2185 return (error); 2186 } 2187 2188 /* 2189 * Check access permissions using "real" credentials. 2190 */ 2191 #ifndef _SYS_SYSPROTO_H_ 2192 struct access_args { 2193 char *path; 2194 int amode; 2195 }; 2196 #endif 2197 int 2198 sys_access(struct thread *td, struct access_args *uap) 2199 { 2200 2201 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2202 0, uap->amode)); 2203 } 2204 2205 #ifndef _SYS_SYSPROTO_H_ 2206 struct faccessat_args { 2207 int dirfd; 2208 char *path; 2209 int amode; 2210 int flag; 2211 } 2212 #endif 2213 int 2214 sys_faccessat(struct thread *td, struct faccessat_args *uap) 2215 { 2216 2217 return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 2218 uap->amode)); 2219 } 2220 2221 int 2222 kern_accessat(struct thread *td, int fd, const char *path, 2223 enum uio_seg pathseg, int flag, int amode) 2224 { 2225 struct ucred *cred, *usecred; 2226 struct vnode *vp; 2227 struct nameidata nd; 2228 int error; 2229 2230 if ((flag & ~(AT_EACCESS | AT_RESOLVE_BENEATH | AT_EMPTY_PATH | 2231 AT_SYMLINK_NOFOLLOW)) != 0) 2232 return (EINVAL); 2233 if (amode != F_OK && (amode & ~(R_OK | W_OK | X_OK)) != 0) 2234 return (EINVAL); 2235 2236 /* 2237 * Create and modify a temporary credential instead of one that 2238 * is potentially shared (if we need one). 2239 */ 2240 cred = td->td_ucred; 2241 if ((flag & AT_EACCESS) == 0 && 2242 ((cred->cr_uid != cred->cr_ruid || 2243 cred->cr_rgid != cred->cr_groups[0]))) { 2244 usecred = crdup(cred); 2245 usecred->cr_uid = cred->cr_ruid; 2246 usecred->cr_groups[0] = cred->cr_rgid; 2247 td->td_ucred = usecred; 2248 } else 2249 usecred = cred; 2250 AUDIT_ARG_VALUE(amode); 2251 NDINIT_ATRIGHTS(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | 2252 AUDITVNODE1 | at2cnpflags(flag, AT_RESOLVE_BENEATH | AT_SYMLINK_NOFOLLOW | 2253 AT_EMPTY_PATH), pathseg, path, fd, &cap_fstat_rights); 2254 if ((error = namei(&nd)) != 0) 2255 goto out; 2256 vp = nd.ni_vp; 2257 2258 error = vn_access(vp, amode, usecred, td); 2259 NDFREE_PNBUF(&nd); 2260 vput(vp); 2261 out: 2262 if (usecred != cred) { 2263 td->td_ucred = cred; 2264 crfree(usecred); 2265 } 2266 return (error); 2267 } 2268 2269 /* 2270 * Check access permissions using "effective" credentials. 2271 */ 2272 #ifndef _SYS_SYSPROTO_H_ 2273 struct eaccess_args { 2274 char *path; 2275 int amode; 2276 }; 2277 #endif 2278 int 2279 sys_eaccess(struct thread *td, struct eaccess_args *uap) 2280 { 2281 2282 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2283 AT_EACCESS, uap->amode)); 2284 } 2285 2286 #if defined(COMPAT_43) 2287 /* 2288 * Get file status; this version follows links. 2289 */ 2290 #ifndef _SYS_SYSPROTO_H_ 2291 struct ostat_args { 2292 char *path; 2293 struct ostat *ub; 2294 }; 2295 #endif 2296 int 2297 ostat(struct thread *td, struct ostat_args *uap) 2298 { 2299 struct stat sb; 2300 struct ostat osb; 2301 int error; 2302 2303 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, &sb); 2304 if (error != 0) 2305 return (error); 2306 cvtstat(&sb, &osb); 2307 return (copyout(&osb, uap->ub, sizeof (osb))); 2308 } 2309 2310 /* 2311 * Get file status; this version does not follow links. 2312 */ 2313 #ifndef _SYS_SYSPROTO_H_ 2314 struct olstat_args { 2315 char *path; 2316 struct ostat *ub; 2317 }; 2318 #endif 2319 int 2320 olstat(struct thread *td, struct olstat_args *uap) 2321 { 2322 struct stat sb; 2323 struct ostat osb; 2324 int error; 2325 2326 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2327 UIO_USERSPACE, &sb); 2328 if (error != 0) 2329 return (error); 2330 cvtstat(&sb, &osb); 2331 return (copyout(&osb, uap->ub, sizeof (osb))); 2332 } 2333 2334 /* 2335 * Convert from an old to a new stat structure. 2336 * XXX: many values are blindly truncated. 2337 */ 2338 void 2339 cvtstat(struct stat *st, struct ostat *ost) 2340 { 2341 2342 bzero(ost, sizeof(*ost)); 2343 ost->st_dev = st->st_dev; 2344 ost->st_ino = st->st_ino; 2345 ost->st_mode = st->st_mode; 2346 ost->st_nlink = st->st_nlink; 2347 ost->st_uid = st->st_uid; 2348 ost->st_gid = st->st_gid; 2349 ost->st_rdev = st->st_rdev; 2350 ost->st_size = MIN(st->st_size, INT32_MAX); 2351 ost->st_atim = st->st_atim; 2352 ost->st_mtim = st->st_mtim; 2353 ost->st_ctim = st->st_ctim; 2354 ost->st_blksize = st->st_blksize; 2355 ost->st_blocks = st->st_blocks; 2356 ost->st_flags = st->st_flags; 2357 ost->st_gen = st->st_gen; 2358 } 2359 #endif /* COMPAT_43 */ 2360 2361 #if defined(COMPAT_43) || defined(COMPAT_FREEBSD11) 2362 int ino64_trunc_error; 2363 SYSCTL_INT(_vfs, OID_AUTO, ino64_trunc_error, CTLFLAG_RW, 2364 &ino64_trunc_error, 0, 2365 "Error on truncation of device, file or inode number, or link count"); 2366 2367 int 2368 freebsd11_cvtstat(struct stat *st, struct freebsd11_stat *ost) 2369 { 2370 2371 ost->st_dev = st->st_dev; 2372 if (ost->st_dev != st->st_dev) { 2373 switch (ino64_trunc_error) { 2374 default: 2375 /* 2376 * Since dev_t is almost raw, don't clamp to the 2377 * maximum for case 2, but ignore the error. 2378 */ 2379 break; 2380 case 1: 2381 return (EOVERFLOW); 2382 } 2383 } 2384 ost->st_ino = st->st_ino; 2385 if (ost->st_ino != st->st_ino) { 2386 switch (ino64_trunc_error) { 2387 default: 2388 case 0: 2389 break; 2390 case 1: 2391 return (EOVERFLOW); 2392 case 2: 2393 ost->st_ino = UINT32_MAX; 2394 break; 2395 } 2396 } 2397 ost->st_mode = st->st_mode; 2398 ost->st_nlink = st->st_nlink; 2399 if (ost->st_nlink != st->st_nlink) { 2400 switch (ino64_trunc_error) { 2401 default: 2402 case 0: 2403 break; 2404 case 1: 2405 return (EOVERFLOW); 2406 case 2: 2407 ost->st_nlink = UINT16_MAX; 2408 break; 2409 } 2410 } 2411 ost->st_uid = st->st_uid; 2412 ost->st_gid = st->st_gid; 2413 ost->st_rdev = st->st_rdev; 2414 if (ost->st_rdev != st->st_rdev) { 2415 switch (ino64_trunc_error) { 2416 default: 2417 break; 2418 case 1: 2419 return (EOVERFLOW); 2420 } 2421 } 2422 ost->st_atim = st->st_atim; 2423 ost->st_mtim = st->st_mtim; 2424 ost->st_ctim = st->st_ctim; 2425 ost->st_size = st->st_size; 2426 ost->st_blocks = st->st_blocks; 2427 ost->st_blksize = st->st_blksize; 2428 ost->st_flags = st->st_flags; 2429 ost->st_gen = st->st_gen; 2430 ost->st_lspare = 0; 2431 ost->st_birthtim = st->st_birthtim; 2432 bzero((char *)&ost->st_birthtim + sizeof(ost->st_birthtim), 2433 sizeof(*ost) - offsetof(struct freebsd11_stat, 2434 st_birthtim) - sizeof(ost->st_birthtim)); 2435 return (0); 2436 } 2437 2438 int 2439 freebsd11_stat(struct thread *td, struct freebsd11_stat_args* uap) 2440 { 2441 struct stat sb; 2442 struct freebsd11_stat osb; 2443 int error; 2444 2445 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, &sb); 2446 if (error != 0) 2447 return (error); 2448 error = freebsd11_cvtstat(&sb, &osb); 2449 if (error == 0) 2450 error = copyout(&osb, uap->ub, sizeof(osb)); 2451 return (error); 2452 } 2453 2454 int 2455 freebsd11_lstat(struct thread *td, struct freebsd11_lstat_args* uap) 2456 { 2457 struct stat sb; 2458 struct freebsd11_stat osb; 2459 int error; 2460 2461 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2462 UIO_USERSPACE, &sb); 2463 if (error != 0) 2464 return (error); 2465 error = freebsd11_cvtstat(&sb, &osb); 2466 if (error == 0) 2467 error = copyout(&osb, uap->ub, sizeof(osb)); 2468 return (error); 2469 } 2470 2471 int 2472 freebsd11_fhstat(struct thread *td, struct freebsd11_fhstat_args* uap) 2473 { 2474 struct fhandle fh; 2475 struct stat sb; 2476 struct freebsd11_stat osb; 2477 int error; 2478 2479 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 2480 if (error != 0) 2481 return (error); 2482 error = kern_fhstat(td, fh, &sb); 2483 if (error != 0) 2484 return (error); 2485 error = freebsd11_cvtstat(&sb, &osb); 2486 if (error == 0) 2487 error = copyout(&osb, uap->sb, sizeof(osb)); 2488 return (error); 2489 } 2490 2491 int 2492 freebsd11_fstatat(struct thread *td, struct freebsd11_fstatat_args* uap) 2493 { 2494 struct stat sb; 2495 struct freebsd11_stat osb; 2496 int error; 2497 2498 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2499 UIO_USERSPACE, &sb); 2500 if (error != 0) 2501 return (error); 2502 error = freebsd11_cvtstat(&sb, &osb); 2503 if (error == 0) 2504 error = copyout(&osb, uap->buf, sizeof(osb)); 2505 return (error); 2506 } 2507 #endif /* COMPAT_FREEBSD11 */ 2508 2509 /* 2510 * Get file status 2511 */ 2512 #ifndef _SYS_SYSPROTO_H_ 2513 struct fstatat_args { 2514 int fd; 2515 char *path; 2516 struct stat *buf; 2517 int flag; 2518 } 2519 #endif 2520 int 2521 sys_fstatat(struct thread *td, struct fstatat_args *uap) 2522 { 2523 struct stat sb; 2524 int error; 2525 2526 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2527 UIO_USERSPACE, &sb); 2528 if (error == 0) 2529 error = copyout(&sb, uap->buf, sizeof (sb)); 2530 return (error); 2531 } 2532 2533 int 2534 kern_statat(struct thread *td, int flag, int fd, const char *path, 2535 enum uio_seg pathseg, struct stat *sbp) 2536 { 2537 struct nameidata nd; 2538 int error; 2539 2540 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | 2541 AT_EMPTY_PATH)) != 0) 2542 return (EINVAL); 2543 2544 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_RESOLVE_BENEATH | 2545 AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH) | LOCKSHARED | LOCKLEAF | 2546 AUDITVNODE1, pathseg, path, fd, &cap_fstat_rights); 2547 2548 if ((error = namei(&nd)) != 0) { 2549 if (error == ENOTDIR && 2550 (nd.ni_resflags & NIRES_EMPTYPATH) != 0) 2551 error = kern_fstat(td, fd, sbp); 2552 return (error); 2553 } 2554 error = VOP_STAT(nd.ni_vp, sbp, td->td_ucred, NOCRED); 2555 NDFREE_PNBUF(&nd); 2556 vput(nd.ni_vp); 2557 #ifdef __STAT_TIME_T_EXT 2558 sbp->st_atim_ext = 0; 2559 sbp->st_mtim_ext = 0; 2560 sbp->st_ctim_ext = 0; 2561 sbp->st_btim_ext = 0; 2562 #endif 2563 #ifdef KTRACE 2564 if (KTRPOINT(td, KTR_STRUCT)) 2565 ktrstat_error(sbp, error); 2566 #endif 2567 return (error); 2568 } 2569 2570 #if defined(COMPAT_FREEBSD11) 2571 /* 2572 * Implementation of the NetBSD [l]stat() functions. 2573 */ 2574 int 2575 freebsd11_cvtnstat(struct stat *sb, struct nstat *nsb) 2576 { 2577 struct freebsd11_stat sb11; 2578 int error; 2579 2580 error = freebsd11_cvtstat(sb, &sb11); 2581 if (error != 0) 2582 return (error); 2583 2584 bzero(nsb, sizeof(*nsb)); 2585 CP(sb11, *nsb, st_dev); 2586 CP(sb11, *nsb, st_ino); 2587 CP(sb11, *nsb, st_mode); 2588 CP(sb11, *nsb, st_nlink); 2589 CP(sb11, *nsb, st_uid); 2590 CP(sb11, *nsb, st_gid); 2591 CP(sb11, *nsb, st_rdev); 2592 CP(sb11, *nsb, st_atim); 2593 CP(sb11, *nsb, st_mtim); 2594 CP(sb11, *nsb, st_ctim); 2595 CP(sb11, *nsb, st_size); 2596 CP(sb11, *nsb, st_blocks); 2597 CP(sb11, *nsb, st_blksize); 2598 CP(sb11, *nsb, st_flags); 2599 CP(sb11, *nsb, st_gen); 2600 CP(sb11, *nsb, st_birthtim); 2601 return (0); 2602 } 2603 2604 #ifndef _SYS_SYSPROTO_H_ 2605 struct freebsd11_nstat_args { 2606 char *path; 2607 struct nstat *ub; 2608 }; 2609 #endif 2610 int 2611 freebsd11_nstat(struct thread *td, struct freebsd11_nstat_args *uap) 2612 { 2613 struct stat sb; 2614 struct nstat nsb; 2615 int error; 2616 2617 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, &sb); 2618 if (error != 0) 2619 return (error); 2620 error = freebsd11_cvtnstat(&sb, &nsb); 2621 if (error == 0) 2622 error = copyout(&nsb, uap->ub, sizeof (nsb)); 2623 return (error); 2624 } 2625 2626 /* 2627 * NetBSD lstat. Get file status; this version does not follow links. 2628 */ 2629 #ifndef _SYS_SYSPROTO_H_ 2630 struct freebsd11_nlstat_args { 2631 char *path; 2632 struct nstat *ub; 2633 }; 2634 #endif 2635 int 2636 freebsd11_nlstat(struct thread *td, struct freebsd11_nlstat_args *uap) 2637 { 2638 struct stat sb; 2639 struct nstat nsb; 2640 int error; 2641 2642 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2643 UIO_USERSPACE, &sb); 2644 if (error != 0) 2645 return (error); 2646 error = freebsd11_cvtnstat(&sb, &nsb); 2647 if (error == 0) 2648 error = copyout(&nsb, uap->ub, sizeof (nsb)); 2649 return (error); 2650 } 2651 #endif /* COMPAT_FREEBSD11 */ 2652 2653 /* 2654 * Get configurable pathname variables. 2655 */ 2656 #ifndef _SYS_SYSPROTO_H_ 2657 struct pathconf_args { 2658 char *path; 2659 int name; 2660 }; 2661 #endif 2662 int 2663 sys_pathconf(struct thread *td, struct pathconf_args *uap) 2664 { 2665 long value; 2666 int error; 2667 2668 error = kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW, 2669 &value); 2670 if (error == 0) 2671 td->td_retval[0] = value; 2672 return (error); 2673 } 2674 2675 #ifndef _SYS_SYSPROTO_H_ 2676 struct lpathconf_args { 2677 char *path; 2678 int name; 2679 }; 2680 #endif 2681 int 2682 sys_lpathconf(struct thread *td, struct lpathconf_args *uap) 2683 { 2684 long value; 2685 int error; 2686 2687 error = kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, 2688 NOFOLLOW, &value); 2689 if (error == 0) 2690 td->td_retval[0] = value; 2691 return (error); 2692 } 2693 2694 int 2695 kern_pathconf(struct thread *td, const char *path, enum uio_seg pathseg, 2696 int name, u_long flags, long *valuep) 2697 { 2698 struct nameidata nd; 2699 int error; 2700 2701 NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | AUDITVNODE1 | flags, 2702 pathseg, path); 2703 if ((error = namei(&nd)) != 0) 2704 return (error); 2705 NDFREE_PNBUF(&nd); 2706 2707 error = VOP_PATHCONF(nd.ni_vp, name, valuep); 2708 vput(nd.ni_vp); 2709 return (error); 2710 } 2711 2712 /* 2713 * Return target name of a symbolic link. 2714 */ 2715 #ifndef _SYS_SYSPROTO_H_ 2716 struct readlink_args { 2717 char *path; 2718 char *buf; 2719 size_t count; 2720 }; 2721 #endif 2722 int 2723 sys_readlink(struct thread *td, struct readlink_args *uap) 2724 { 2725 2726 return (kern_readlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2727 uap->buf, UIO_USERSPACE, uap->count)); 2728 } 2729 #ifndef _SYS_SYSPROTO_H_ 2730 struct readlinkat_args { 2731 int fd; 2732 char *path; 2733 char *buf; 2734 size_t bufsize; 2735 }; 2736 #endif 2737 int 2738 sys_readlinkat(struct thread *td, struct readlinkat_args *uap) 2739 { 2740 2741 return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE, 2742 uap->buf, UIO_USERSPACE, uap->bufsize)); 2743 } 2744 2745 int 2746 kern_readlinkat(struct thread *td, int fd, const char *path, 2747 enum uio_seg pathseg, char *buf, enum uio_seg bufseg, size_t count) 2748 { 2749 struct vnode *vp; 2750 struct nameidata nd; 2751 int error; 2752 2753 if (count > IOSIZE_MAX) 2754 return (EINVAL); 2755 2756 NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1 | 2757 EMPTYPATH, pathseg, path, fd); 2758 2759 if ((error = namei(&nd)) != 0) 2760 return (error); 2761 NDFREE_PNBUF(&nd); 2762 vp = nd.ni_vp; 2763 2764 error = kern_readlink_vp(vp, buf, bufseg, count, td); 2765 vput(vp); 2766 2767 return (error); 2768 } 2769 2770 /* 2771 * Helper function to readlink from a vnode 2772 */ 2773 static int 2774 kern_readlink_vp(struct vnode *vp, char *buf, enum uio_seg bufseg, size_t count, 2775 struct thread *td) 2776 { 2777 struct iovec aiov; 2778 struct uio auio; 2779 int error; 2780 2781 ASSERT_VOP_LOCKED(vp, "kern_readlink_vp(): vp not locked"); 2782 #ifdef MAC 2783 error = mac_vnode_check_readlink(td->td_ucred, vp); 2784 if (error != 0) 2785 return (error); 2786 #endif 2787 if (vp->v_type != VLNK && (vp->v_vflag & VV_READLINK) == 0) 2788 return (EINVAL); 2789 2790 aiov.iov_base = buf; 2791 aiov.iov_len = count; 2792 auio.uio_iov = &aiov; 2793 auio.uio_iovcnt = 1; 2794 auio.uio_offset = 0; 2795 auio.uio_rw = UIO_READ; 2796 auio.uio_segflg = bufseg; 2797 auio.uio_td = td; 2798 auio.uio_resid = count; 2799 error = VOP_READLINK(vp, &auio, td->td_ucred); 2800 td->td_retval[0] = count - auio.uio_resid; 2801 return (error); 2802 } 2803 2804 /* 2805 * Common implementation code for chflags() and fchflags(). 2806 */ 2807 static int 2808 setfflags(struct thread *td, struct vnode *vp, u_long flags) 2809 { 2810 struct mount *mp; 2811 struct vattr vattr; 2812 int error; 2813 2814 /* We can't support the value matching VNOVAL. */ 2815 if (flags == VNOVAL) 2816 return (EOPNOTSUPP); 2817 2818 /* 2819 * Prevent non-root users from setting flags on devices. When 2820 * a device is reused, users can retain ownership of the device 2821 * if they are allowed to set flags and programs assume that 2822 * chown can't fail when done as root. 2823 */ 2824 if (vp->v_type == VCHR || vp->v_type == VBLK) { 2825 error = priv_check(td, PRIV_VFS_CHFLAGS_DEV); 2826 if (error != 0) 2827 return (error); 2828 } 2829 2830 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) 2831 return (error); 2832 VATTR_NULL(&vattr); 2833 vattr.va_flags = flags; 2834 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2835 #ifdef MAC 2836 error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags); 2837 if (error == 0) 2838 #endif 2839 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 2840 VOP_UNLOCK(vp); 2841 vn_finished_write(mp); 2842 return (error); 2843 } 2844 2845 /* 2846 * Change flags of a file given a path name. 2847 */ 2848 #ifndef _SYS_SYSPROTO_H_ 2849 struct chflags_args { 2850 const char *path; 2851 u_long flags; 2852 }; 2853 #endif 2854 int 2855 sys_chflags(struct thread *td, struct chflags_args *uap) 2856 { 2857 2858 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2859 uap->flags, 0)); 2860 } 2861 2862 #ifndef _SYS_SYSPROTO_H_ 2863 struct chflagsat_args { 2864 int fd; 2865 const char *path; 2866 u_long flags; 2867 int atflag; 2868 } 2869 #endif 2870 int 2871 sys_chflagsat(struct thread *td, struct chflagsat_args *uap) 2872 { 2873 2874 return (kern_chflagsat(td, uap->fd, uap->path, UIO_USERSPACE, 2875 uap->flags, uap->atflag)); 2876 } 2877 2878 /* 2879 * Same as chflags() but doesn't follow symlinks. 2880 */ 2881 #ifndef _SYS_SYSPROTO_H_ 2882 struct lchflags_args { 2883 const char *path; 2884 u_long flags; 2885 }; 2886 #endif 2887 int 2888 sys_lchflags(struct thread *td, struct lchflags_args *uap) 2889 { 2890 2891 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2892 uap->flags, AT_SYMLINK_NOFOLLOW)); 2893 } 2894 2895 static int 2896 kern_chflagsat(struct thread *td, int fd, const char *path, 2897 enum uio_seg pathseg, u_long flags, int atflag) 2898 { 2899 struct nameidata nd; 2900 int error; 2901 2902 if ((atflag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | 2903 AT_EMPTY_PATH)) != 0) 2904 return (EINVAL); 2905 2906 AUDIT_ARG_FFLAGS(flags); 2907 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(atflag, AT_SYMLINK_NOFOLLOW | 2908 AT_RESOLVE_BENEATH | AT_EMPTY_PATH) | AUDITVNODE1, pathseg, path, 2909 fd, &cap_fchflags_rights); 2910 if ((error = namei(&nd)) != 0) 2911 return (error); 2912 NDFREE_PNBUF(&nd); 2913 error = setfflags(td, nd.ni_vp, flags); 2914 vrele(nd.ni_vp); 2915 return (error); 2916 } 2917 2918 /* 2919 * Change flags of a file given a file descriptor. 2920 */ 2921 #ifndef _SYS_SYSPROTO_H_ 2922 struct fchflags_args { 2923 int fd; 2924 u_long flags; 2925 }; 2926 #endif 2927 int 2928 sys_fchflags(struct thread *td, struct fchflags_args *uap) 2929 { 2930 struct file *fp; 2931 int error; 2932 2933 AUDIT_ARG_FD(uap->fd); 2934 AUDIT_ARG_FFLAGS(uap->flags); 2935 error = getvnode(td, uap->fd, &cap_fchflags_rights, 2936 &fp); 2937 if (error != 0) 2938 return (error); 2939 #ifdef AUDIT 2940 if (AUDITING_TD(td)) { 2941 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 2942 AUDIT_ARG_VNODE1(fp->f_vnode); 2943 VOP_UNLOCK(fp->f_vnode); 2944 } 2945 #endif 2946 error = setfflags(td, fp->f_vnode, uap->flags); 2947 fdrop(fp, td); 2948 return (error); 2949 } 2950 2951 /* 2952 * Common implementation code for chmod(), lchmod() and fchmod(). 2953 */ 2954 int 2955 setfmode(struct thread *td, struct ucred *cred, struct vnode *vp, int mode) 2956 { 2957 struct mount *mp; 2958 struct vattr vattr; 2959 int error; 2960 2961 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) 2962 return (error); 2963 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2964 VATTR_NULL(&vattr); 2965 vattr.va_mode = mode & ALLPERMS; 2966 #ifdef MAC 2967 error = mac_vnode_check_setmode(cred, vp, vattr.va_mode); 2968 if (error == 0) 2969 #endif 2970 error = VOP_SETATTR(vp, &vattr, cred); 2971 VOP_UNLOCK(vp); 2972 vn_finished_write(mp); 2973 return (error); 2974 } 2975 2976 /* 2977 * Change mode of a file given path name. 2978 */ 2979 #ifndef _SYS_SYSPROTO_H_ 2980 struct chmod_args { 2981 char *path; 2982 int mode; 2983 }; 2984 #endif 2985 int 2986 sys_chmod(struct thread *td, struct chmod_args *uap) 2987 { 2988 2989 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2990 uap->mode, 0)); 2991 } 2992 2993 #ifndef _SYS_SYSPROTO_H_ 2994 struct fchmodat_args { 2995 int dirfd; 2996 char *path; 2997 mode_t mode; 2998 int flag; 2999 } 3000 #endif 3001 int 3002 sys_fchmodat(struct thread *td, struct fchmodat_args *uap) 3003 { 3004 3005 return (kern_fchmodat(td, uap->fd, uap->path, UIO_USERSPACE, 3006 uap->mode, uap->flag)); 3007 } 3008 3009 /* 3010 * Change mode of a file given path name (don't follow links.) 3011 */ 3012 #ifndef _SYS_SYSPROTO_H_ 3013 struct lchmod_args { 3014 char *path; 3015 int mode; 3016 }; 3017 #endif 3018 int 3019 sys_lchmod(struct thread *td, struct lchmod_args *uap) 3020 { 3021 3022 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3023 uap->mode, AT_SYMLINK_NOFOLLOW)); 3024 } 3025 3026 int 3027 kern_fchmodat(struct thread *td, int fd, const char *path, 3028 enum uio_seg pathseg, mode_t mode, int flag) 3029 { 3030 struct nameidata nd; 3031 int error; 3032 3033 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | 3034 AT_EMPTY_PATH)) != 0) 3035 return (EINVAL); 3036 3037 AUDIT_ARG_MODE(mode); 3038 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_SYMLINK_NOFOLLOW | 3039 AT_RESOLVE_BENEATH | AT_EMPTY_PATH) | AUDITVNODE1, pathseg, path, 3040 fd, &cap_fchmod_rights); 3041 if ((error = namei(&nd)) != 0) 3042 return (error); 3043 NDFREE_PNBUF(&nd); 3044 error = setfmode(td, td->td_ucred, nd.ni_vp, mode); 3045 vrele(nd.ni_vp); 3046 return (error); 3047 } 3048 3049 /* 3050 * Change mode of a file given a file descriptor. 3051 */ 3052 #ifndef _SYS_SYSPROTO_H_ 3053 struct fchmod_args { 3054 int fd; 3055 int mode; 3056 }; 3057 #endif 3058 int 3059 sys_fchmod(struct thread *td, struct fchmod_args *uap) 3060 { 3061 struct file *fp; 3062 int error; 3063 3064 AUDIT_ARG_FD(uap->fd); 3065 AUDIT_ARG_MODE(uap->mode); 3066 3067 error = fget(td, uap->fd, &cap_fchmod_rights, &fp); 3068 if (error != 0) 3069 return (error); 3070 error = fo_chmod(fp, uap->mode, td->td_ucred, td); 3071 fdrop(fp, td); 3072 return (error); 3073 } 3074 3075 /* 3076 * Common implementation for chown(), lchown(), and fchown() 3077 */ 3078 int 3079 setfown(struct thread *td, struct ucred *cred, struct vnode *vp, uid_t uid, 3080 gid_t gid) 3081 { 3082 struct mount *mp; 3083 struct vattr vattr; 3084 int error; 3085 3086 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) 3087 return (error); 3088 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3089 VATTR_NULL(&vattr); 3090 vattr.va_uid = uid; 3091 vattr.va_gid = gid; 3092 #ifdef MAC 3093 error = mac_vnode_check_setowner(cred, vp, vattr.va_uid, 3094 vattr.va_gid); 3095 if (error == 0) 3096 #endif 3097 error = VOP_SETATTR(vp, &vattr, cred); 3098 VOP_UNLOCK(vp); 3099 vn_finished_write(mp); 3100 return (error); 3101 } 3102 3103 /* 3104 * Set ownership given a path name. 3105 */ 3106 #ifndef _SYS_SYSPROTO_H_ 3107 struct chown_args { 3108 char *path; 3109 int uid; 3110 int gid; 3111 }; 3112 #endif 3113 int 3114 sys_chown(struct thread *td, struct chown_args *uap) 3115 { 3116 3117 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->uid, 3118 uap->gid, 0)); 3119 } 3120 3121 #ifndef _SYS_SYSPROTO_H_ 3122 struct fchownat_args { 3123 int fd; 3124 const char * path; 3125 uid_t uid; 3126 gid_t gid; 3127 int flag; 3128 }; 3129 #endif 3130 int 3131 sys_fchownat(struct thread *td, struct fchownat_args *uap) 3132 { 3133 3134 return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid, 3135 uap->gid, uap->flag)); 3136 } 3137 3138 int 3139 kern_fchownat(struct thread *td, int fd, const char *path, 3140 enum uio_seg pathseg, int uid, int gid, int flag) 3141 { 3142 struct nameidata nd; 3143 int error; 3144 3145 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | 3146 AT_EMPTY_PATH)) != 0) 3147 return (EINVAL); 3148 3149 AUDIT_ARG_OWNER(uid, gid); 3150 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_SYMLINK_NOFOLLOW | 3151 AT_RESOLVE_BENEATH | AT_EMPTY_PATH) | AUDITVNODE1, pathseg, path, 3152 fd, &cap_fchown_rights); 3153 3154 if ((error = namei(&nd)) != 0) 3155 return (error); 3156 NDFREE_PNBUF(&nd); 3157 error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid); 3158 vrele(nd.ni_vp); 3159 return (error); 3160 } 3161 3162 /* 3163 * Set ownership given a path name, do not cross symlinks. 3164 */ 3165 #ifndef _SYS_SYSPROTO_H_ 3166 struct lchown_args { 3167 char *path; 3168 int uid; 3169 int gid; 3170 }; 3171 #endif 3172 int 3173 sys_lchown(struct thread *td, struct lchown_args *uap) 3174 { 3175 3176 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3177 uap->uid, uap->gid, AT_SYMLINK_NOFOLLOW)); 3178 } 3179 3180 /* 3181 * Set ownership given a file descriptor. 3182 */ 3183 #ifndef _SYS_SYSPROTO_H_ 3184 struct fchown_args { 3185 int fd; 3186 int uid; 3187 int gid; 3188 }; 3189 #endif 3190 int 3191 sys_fchown(struct thread *td, struct fchown_args *uap) 3192 { 3193 struct file *fp; 3194 int error; 3195 3196 AUDIT_ARG_FD(uap->fd); 3197 AUDIT_ARG_OWNER(uap->uid, uap->gid); 3198 error = fget(td, uap->fd, &cap_fchown_rights, &fp); 3199 if (error != 0) 3200 return (error); 3201 error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td); 3202 fdrop(fp, td); 3203 return (error); 3204 } 3205 3206 /* 3207 * Common implementation code for utimes(), lutimes(), and futimes(). 3208 */ 3209 static int 3210 getutimes(const struct timeval *usrtvp, enum uio_seg tvpseg, 3211 struct timespec *tsp) 3212 { 3213 struct timeval tv[2]; 3214 const struct timeval *tvp; 3215 int error; 3216 3217 if (usrtvp == NULL) { 3218 vfs_timestamp(&tsp[0]); 3219 tsp[1] = tsp[0]; 3220 } else { 3221 if (tvpseg == UIO_SYSSPACE) { 3222 tvp = usrtvp; 3223 } else { 3224 if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0) 3225 return (error); 3226 tvp = tv; 3227 } 3228 3229 if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 || 3230 tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000) 3231 return (EINVAL); 3232 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 3233 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 3234 } 3235 return (0); 3236 } 3237 3238 /* 3239 * Common implementation code for futimens(), utimensat(). 3240 */ 3241 #define UTIMENS_NULL 0x1 3242 #define UTIMENS_EXIT 0x2 3243 static int 3244 getutimens(const struct timespec *usrtsp, enum uio_seg tspseg, 3245 struct timespec *tsp, int *retflags) 3246 { 3247 struct timespec tsnow; 3248 int error; 3249 3250 vfs_timestamp(&tsnow); 3251 *retflags = 0; 3252 if (usrtsp == NULL) { 3253 tsp[0] = tsnow; 3254 tsp[1] = tsnow; 3255 *retflags |= UTIMENS_NULL; 3256 return (0); 3257 } 3258 if (tspseg == UIO_SYSSPACE) { 3259 tsp[0] = usrtsp[0]; 3260 tsp[1] = usrtsp[1]; 3261 } else if ((error = copyin(usrtsp, tsp, sizeof(*tsp) * 2)) != 0) 3262 return (error); 3263 if (tsp[0].tv_nsec == UTIME_OMIT && tsp[1].tv_nsec == UTIME_OMIT) 3264 *retflags |= UTIMENS_EXIT; 3265 if (tsp[0].tv_nsec == UTIME_NOW && tsp[1].tv_nsec == UTIME_NOW) 3266 *retflags |= UTIMENS_NULL; 3267 if (tsp[0].tv_nsec == UTIME_OMIT) 3268 tsp[0].tv_sec = VNOVAL; 3269 else if (tsp[0].tv_nsec == UTIME_NOW) 3270 tsp[0] = tsnow; 3271 else if (tsp[0].tv_nsec < 0 || tsp[0].tv_nsec >= 1000000000L) 3272 return (EINVAL); 3273 if (tsp[1].tv_nsec == UTIME_OMIT) 3274 tsp[1].tv_sec = VNOVAL; 3275 else if (tsp[1].tv_nsec == UTIME_NOW) 3276 tsp[1] = tsnow; 3277 else if (tsp[1].tv_nsec < 0 || tsp[1].tv_nsec >= 1000000000L) 3278 return (EINVAL); 3279 3280 return (0); 3281 } 3282 3283 /* 3284 * Common implementation code for utimes(), lutimes(), futimes(), futimens(), 3285 * and utimensat(). 3286 */ 3287 static int 3288 setutimes(struct thread *td, struct vnode *vp, const struct timespec *ts, 3289 int numtimes, int nullflag) 3290 { 3291 struct mount *mp; 3292 struct vattr vattr; 3293 int error; 3294 bool setbirthtime; 3295 3296 setbirthtime = false; 3297 vattr.va_birthtime.tv_sec = VNOVAL; 3298 vattr.va_birthtime.tv_nsec = 0; 3299 3300 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) 3301 return (error); 3302 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3303 if (numtimes < 3 && VOP_GETATTR(vp, &vattr, td->td_ucred) == 0 && 3304 timespeccmp(&ts[1], &vattr.va_birthtime, < )) 3305 setbirthtime = true; 3306 VATTR_NULL(&vattr); 3307 vattr.va_atime = ts[0]; 3308 vattr.va_mtime = ts[1]; 3309 if (setbirthtime) 3310 vattr.va_birthtime = ts[1]; 3311 if (numtimes > 2) 3312 vattr.va_birthtime = ts[2]; 3313 if (nullflag) 3314 vattr.va_vaflags |= VA_UTIMES_NULL; 3315 #ifdef MAC 3316 error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime, 3317 vattr.va_mtime); 3318 #endif 3319 if (error == 0) 3320 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3321 VOP_UNLOCK(vp); 3322 vn_finished_write(mp); 3323 return (error); 3324 } 3325 3326 /* 3327 * Set the access and modification times of a file. 3328 */ 3329 #ifndef _SYS_SYSPROTO_H_ 3330 struct utimes_args { 3331 char *path; 3332 struct timeval *tptr; 3333 }; 3334 #endif 3335 int 3336 sys_utimes(struct thread *td, struct utimes_args *uap) 3337 { 3338 3339 return (kern_utimesat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3340 uap->tptr, UIO_USERSPACE)); 3341 } 3342 3343 #ifndef _SYS_SYSPROTO_H_ 3344 struct futimesat_args { 3345 int fd; 3346 const char * path; 3347 const struct timeval * times; 3348 }; 3349 #endif 3350 int 3351 sys_futimesat(struct thread *td, struct futimesat_args *uap) 3352 { 3353 3354 return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE, 3355 uap->times, UIO_USERSPACE)); 3356 } 3357 3358 int 3359 kern_utimesat(struct thread *td, int fd, const char *path, 3360 enum uio_seg pathseg, const struct timeval *tptr, enum uio_seg tptrseg) 3361 { 3362 struct nameidata nd; 3363 struct timespec ts[2]; 3364 int error; 3365 3366 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3367 return (error); 3368 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 3369 &cap_futimes_rights); 3370 3371 if ((error = namei(&nd)) != 0) 3372 return (error); 3373 NDFREE_PNBUF(&nd); 3374 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3375 vrele(nd.ni_vp); 3376 return (error); 3377 } 3378 3379 /* 3380 * Set the access and modification times of a file. 3381 */ 3382 #ifndef _SYS_SYSPROTO_H_ 3383 struct lutimes_args { 3384 char *path; 3385 struct timeval *tptr; 3386 }; 3387 #endif 3388 int 3389 sys_lutimes(struct thread *td, struct lutimes_args *uap) 3390 { 3391 3392 return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr, 3393 UIO_USERSPACE)); 3394 } 3395 3396 int 3397 kern_lutimes(struct thread *td, const char *path, enum uio_seg pathseg, 3398 const struct timeval *tptr, enum uio_seg tptrseg) 3399 { 3400 struct timespec ts[2]; 3401 struct nameidata nd; 3402 int error; 3403 3404 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3405 return (error); 3406 NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, pathseg, path); 3407 if ((error = namei(&nd)) != 0) 3408 return (error); 3409 NDFREE_PNBUF(&nd); 3410 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3411 vrele(nd.ni_vp); 3412 return (error); 3413 } 3414 3415 /* 3416 * Set the access and modification times of a file. 3417 */ 3418 #ifndef _SYS_SYSPROTO_H_ 3419 struct futimes_args { 3420 int fd; 3421 struct timeval *tptr; 3422 }; 3423 #endif 3424 int 3425 sys_futimes(struct thread *td, struct futimes_args *uap) 3426 { 3427 3428 return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE)); 3429 } 3430 3431 int 3432 kern_futimes(struct thread *td, int fd, const struct timeval *tptr, 3433 enum uio_seg tptrseg) 3434 { 3435 struct timespec ts[2]; 3436 struct file *fp; 3437 int error; 3438 3439 AUDIT_ARG_FD(fd); 3440 error = getutimes(tptr, tptrseg, ts); 3441 if (error != 0) 3442 return (error); 3443 error = getvnode(td, fd, &cap_futimes_rights, &fp); 3444 if (error != 0) 3445 return (error); 3446 #ifdef AUDIT 3447 if (AUDITING_TD(td)) { 3448 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3449 AUDIT_ARG_VNODE1(fp->f_vnode); 3450 VOP_UNLOCK(fp->f_vnode); 3451 } 3452 #endif 3453 error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL); 3454 fdrop(fp, td); 3455 return (error); 3456 } 3457 3458 int 3459 sys_futimens(struct thread *td, struct futimens_args *uap) 3460 { 3461 3462 return (kern_futimens(td, uap->fd, uap->times, UIO_USERSPACE)); 3463 } 3464 3465 int 3466 kern_futimens(struct thread *td, int fd, const struct timespec *tptr, 3467 enum uio_seg tptrseg) 3468 { 3469 struct timespec ts[2]; 3470 struct file *fp; 3471 int error, flags; 3472 3473 AUDIT_ARG_FD(fd); 3474 error = getutimens(tptr, tptrseg, ts, &flags); 3475 if (error != 0) 3476 return (error); 3477 if (flags & UTIMENS_EXIT) 3478 return (0); 3479 error = getvnode(td, fd, &cap_futimes_rights, &fp); 3480 if (error != 0) 3481 return (error); 3482 #ifdef AUDIT 3483 if (AUDITING_TD(td)) { 3484 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3485 AUDIT_ARG_VNODE1(fp->f_vnode); 3486 VOP_UNLOCK(fp->f_vnode); 3487 } 3488 #endif 3489 error = setutimes(td, fp->f_vnode, ts, 2, flags & UTIMENS_NULL); 3490 fdrop(fp, td); 3491 return (error); 3492 } 3493 3494 int 3495 sys_utimensat(struct thread *td, struct utimensat_args *uap) 3496 { 3497 3498 return (kern_utimensat(td, uap->fd, uap->path, UIO_USERSPACE, 3499 uap->times, UIO_USERSPACE, uap->flag)); 3500 } 3501 3502 int 3503 kern_utimensat(struct thread *td, int fd, const char *path, 3504 enum uio_seg pathseg, const struct timespec *tptr, enum uio_seg tptrseg, 3505 int flag) 3506 { 3507 struct nameidata nd; 3508 struct timespec ts[2]; 3509 int error, flags; 3510 3511 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | 3512 AT_EMPTY_PATH)) != 0) 3513 return (EINVAL); 3514 3515 if ((error = getutimens(tptr, tptrseg, ts, &flags)) != 0) 3516 return (error); 3517 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_SYMLINK_NOFOLLOW | 3518 AT_RESOLVE_BENEATH | AT_EMPTY_PATH) | AUDITVNODE1, 3519 pathseg, path, fd, &cap_futimes_rights); 3520 if ((error = namei(&nd)) != 0) 3521 return (error); 3522 /* 3523 * We are allowed to call namei() regardless of 2xUTIME_OMIT. 3524 * POSIX states: 3525 * "If both tv_nsec fields are UTIME_OMIT... EACCESS may be detected." 3526 * "Search permission is denied by a component of the path prefix." 3527 */ 3528 NDFREE_PNBUF(&nd); 3529 if ((flags & UTIMENS_EXIT) == 0) 3530 error = setutimes(td, nd.ni_vp, ts, 2, flags & UTIMENS_NULL); 3531 vrele(nd.ni_vp); 3532 return (error); 3533 } 3534 3535 /* 3536 * Truncate a file given its path name. 3537 */ 3538 #ifndef _SYS_SYSPROTO_H_ 3539 struct truncate_args { 3540 char *path; 3541 int pad; 3542 off_t length; 3543 }; 3544 #endif 3545 int 3546 sys_truncate(struct thread *td, struct truncate_args *uap) 3547 { 3548 3549 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3550 } 3551 3552 int 3553 kern_truncate(struct thread *td, const char *path, enum uio_seg pathseg, 3554 off_t length) 3555 { 3556 struct mount *mp; 3557 struct vnode *vp; 3558 void *rl_cookie; 3559 struct nameidata nd; 3560 int error; 3561 3562 if (length < 0) 3563 return (EINVAL); 3564 NDPREINIT(&nd); 3565 retry: 3566 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path); 3567 if ((error = namei(&nd)) != 0) 3568 return (error); 3569 vp = nd.ni_vp; 3570 NDFREE_PNBUF(&nd); 3571 rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); 3572 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) { 3573 vn_rangelock_unlock(vp, rl_cookie); 3574 vrele(vp); 3575 return (error); 3576 } 3577 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3578 if (vp->v_type == VDIR) { 3579 error = EISDIR; 3580 goto out; 3581 } 3582 #ifdef MAC 3583 error = mac_vnode_check_write(td->td_ucred, NOCRED, vp); 3584 if (error != 0) 3585 goto out; 3586 #endif 3587 error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td); 3588 if (error != 0) 3589 goto out; 3590 3591 error = vn_truncate_locked(vp, length, false, td->td_ucred); 3592 out: 3593 VOP_UNLOCK(vp); 3594 vn_finished_write(mp); 3595 vn_rangelock_unlock(vp, rl_cookie); 3596 vrele(vp); 3597 if (error == ERELOOKUP) 3598 goto retry; 3599 return (error); 3600 } 3601 3602 #if defined(COMPAT_43) 3603 /* 3604 * Truncate a file given its path name. 3605 */ 3606 #ifndef _SYS_SYSPROTO_H_ 3607 struct otruncate_args { 3608 char *path; 3609 long length; 3610 }; 3611 #endif 3612 int 3613 otruncate(struct thread *td, struct otruncate_args *uap) 3614 { 3615 3616 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3617 } 3618 #endif /* COMPAT_43 */ 3619 3620 #if defined(COMPAT_FREEBSD6) 3621 /* Versions with the pad argument */ 3622 int 3623 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap) 3624 { 3625 3626 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3627 } 3628 3629 int 3630 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap) 3631 { 3632 3633 return (kern_ftruncate(td, uap->fd, uap->length)); 3634 } 3635 #endif 3636 3637 int 3638 kern_fsync(struct thread *td, int fd, bool fullsync) 3639 { 3640 struct vnode *vp; 3641 struct mount *mp; 3642 struct file *fp; 3643 int error; 3644 3645 AUDIT_ARG_FD(fd); 3646 error = getvnode(td, fd, &cap_fsync_rights, &fp); 3647 if (error != 0) 3648 return (error); 3649 vp = fp->f_vnode; 3650 #if 0 3651 if (!fullsync) 3652 /* XXXKIB: compete outstanding aio writes */; 3653 #endif 3654 retry: 3655 error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH); 3656 if (error != 0) 3657 goto drop; 3658 vn_lock(vp, vn_lktype_write(mp, vp) | LK_RETRY); 3659 AUDIT_ARG_VNODE1(vp); 3660 vnode_pager_clean_async(vp); 3661 error = fullsync ? VOP_FSYNC(vp, MNT_WAIT, td) : VOP_FDATASYNC(vp, td); 3662 VOP_UNLOCK(vp); 3663 vn_finished_write(mp); 3664 if (error == ERELOOKUP) 3665 goto retry; 3666 drop: 3667 fdrop(fp, td); 3668 return (error); 3669 } 3670 3671 /* 3672 * Sync an open file. 3673 */ 3674 #ifndef _SYS_SYSPROTO_H_ 3675 struct fsync_args { 3676 int fd; 3677 }; 3678 #endif 3679 int 3680 sys_fsync(struct thread *td, struct fsync_args *uap) 3681 { 3682 3683 return (kern_fsync(td, uap->fd, true)); 3684 } 3685 3686 int 3687 sys_fdatasync(struct thread *td, struct fdatasync_args *uap) 3688 { 3689 3690 return (kern_fsync(td, uap->fd, false)); 3691 } 3692 3693 /* 3694 * Rename files. Source and destination must either both be directories, or 3695 * both not be directories. If target is a directory, it must be empty. 3696 */ 3697 #ifndef _SYS_SYSPROTO_H_ 3698 struct rename_args { 3699 char *from; 3700 char *to; 3701 }; 3702 #endif 3703 int 3704 sys_rename(struct thread *td, struct rename_args *uap) 3705 { 3706 3707 return (kern_renameat(td, AT_FDCWD, uap->from, AT_FDCWD, 3708 uap->to, UIO_USERSPACE)); 3709 } 3710 3711 #ifndef _SYS_SYSPROTO_H_ 3712 struct renameat_args { 3713 int oldfd; 3714 char *old; 3715 int newfd; 3716 char *new; 3717 }; 3718 #endif 3719 int 3720 sys_renameat(struct thread *td, struct renameat_args *uap) 3721 { 3722 3723 return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new, 3724 UIO_USERSPACE)); 3725 } 3726 3727 #ifdef MAC 3728 static int 3729 kern_renameat_mac(struct thread *td, int oldfd, const char *old, int newfd, 3730 const char *new, enum uio_seg pathseg, struct nameidata *fromnd) 3731 { 3732 int error; 3733 3734 NDINIT_ATRIGHTS(fromnd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 3735 pathseg, old, oldfd, &cap_renameat_source_rights); 3736 if ((error = namei(fromnd)) != 0) 3737 return (error); 3738 error = mac_vnode_check_rename_from(td->td_ucred, fromnd->ni_dvp, 3739 fromnd->ni_vp, &fromnd->ni_cnd); 3740 VOP_UNLOCK(fromnd->ni_dvp); 3741 if (fromnd->ni_dvp != fromnd->ni_vp) 3742 VOP_UNLOCK(fromnd->ni_vp); 3743 if (error != 0) { 3744 NDFREE_PNBUF(fromnd); 3745 vrele(fromnd->ni_dvp); 3746 vrele(fromnd->ni_vp); 3747 } 3748 return (error); 3749 } 3750 #endif 3751 3752 int 3753 kern_renameat(struct thread *td, int oldfd, const char *old, int newfd, 3754 const char *new, enum uio_seg pathseg) 3755 { 3756 struct mount *mp = NULL; 3757 struct vnode *tvp, *fvp, *tdvp; 3758 struct nameidata fromnd, tond; 3759 uint64_t tondflags; 3760 int error; 3761 short irflag; 3762 3763 again: 3764 bwillwrite(); 3765 #ifdef MAC 3766 if (mac_vnode_check_rename_from_enabled()) { 3767 error = kern_renameat_mac(td, oldfd, old, newfd, new, pathseg, 3768 &fromnd); 3769 if (error != 0) 3770 return (error); 3771 } else { 3772 #endif 3773 NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | AUDITVNODE1, 3774 pathseg, old, oldfd, &cap_renameat_source_rights); 3775 if ((error = namei(&fromnd)) != 0) 3776 return (error); 3777 #ifdef MAC 3778 } 3779 #endif 3780 fvp = fromnd.ni_vp; 3781 tondflags = LOCKPARENT | LOCKLEAF | NOCACHE | AUDITVNODE2; 3782 if (fromnd.ni_vp->v_type == VDIR) 3783 tondflags |= WILLBEDIR; 3784 NDINIT_ATRIGHTS(&tond, RENAME, tondflags, pathseg, new, newfd, 3785 &cap_renameat_target_rights); 3786 if ((error = namei(&tond)) != 0) { 3787 /* Translate error code for rename("dir1", "dir2/."). */ 3788 if (error == EISDIR && fvp->v_type == VDIR) 3789 error = EINVAL; 3790 NDFREE_PNBUF(&fromnd); 3791 vrele(fromnd.ni_dvp); 3792 vrele(fvp); 3793 goto out1; 3794 } 3795 tdvp = tond.ni_dvp; 3796 tvp = tond.ni_vp; 3797 error = vn_start_write(fvp, &mp, V_NOWAIT); 3798 if (error != 0) { 3799 NDFREE_PNBUF(&fromnd); 3800 NDFREE_PNBUF(&tond); 3801 if (tvp != NULL) 3802 vput(tvp); 3803 if (tdvp == tvp) 3804 vrele(tdvp); 3805 else 3806 vput(tdvp); 3807 vrele(fromnd.ni_dvp); 3808 vrele(fvp); 3809 error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH); 3810 if (error != 0) 3811 return (error); 3812 goto again; 3813 } 3814 irflag = vn_irflag_read(fvp); 3815 if (((irflag & VIRF_NAMEDATTR) != 0 && tdvp != fromnd.ni_dvp) || 3816 (irflag & VIRF_NAMEDDIR) != 0) { 3817 error = EINVAL; 3818 goto out; 3819 } 3820 if (tvp != NULL) { 3821 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3822 error = ENOTDIR; 3823 goto out; 3824 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3825 error = EISDIR; 3826 goto out; 3827 } 3828 #ifdef CAPABILITIES 3829 if (newfd != AT_FDCWD && (tond.ni_resflags & NIRES_ABS) == 0) { 3830 /* 3831 * If the target already exists we require CAP_UNLINKAT 3832 * from 'newfd', when newfd was used for the lookup. 3833 */ 3834 error = cap_check(&tond.ni_filecaps.fc_rights, 3835 &cap_unlinkat_rights); 3836 if (error != 0) 3837 goto out; 3838 } 3839 #endif 3840 } 3841 if (fvp == tdvp) { 3842 error = EINVAL; 3843 goto out; 3844 } 3845 /* 3846 * If the source is the same as the destination (that is, if they 3847 * are links to the same vnode), then there is nothing to do. 3848 */ 3849 if (fvp == tvp) 3850 error = ERESTART; 3851 #ifdef MAC 3852 else 3853 error = mac_vnode_check_rename_to(td->td_ucred, tdvp, 3854 tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd); 3855 #endif 3856 out: 3857 if (error == 0) { 3858 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3859 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); 3860 NDFREE_PNBUF(&fromnd); 3861 NDFREE_PNBUF(&tond); 3862 } else { 3863 NDFREE_PNBUF(&fromnd); 3864 NDFREE_PNBUF(&tond); 3865 if (tvp != NULL) 3866 vput(tvp); 3867 if (tdvp == tvp) 3868 vrele(tdvp); 3869 else 3870 vput(tdvp); 3871 vrele(fromnd.ni_dvp); 3872 vrele(fvp); 3873 } 3874 vn_finished_write(mp); 3875 out1: 3876 if (error == ERESTART) 3877 return (0); 3878 if (error == ERELOOKUP) 3879 goto again; 3880 return (error); 3881 } 3882 3883 /* 3884 * Make a directory file. 3885 */ 3886 #ifndef _SYS_SYSPROTO_H_ 3887 struct mkdir_args { 3888 char *path; 3889 int mode; 3890 }; 3891 #endif 3892 int 3893 sys_mkdir(struct thread *td, struct mkdir_args *uap) 3894 { 3895 3896 return (kern_mkdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3897 uap->mode)); 3898 } 3899 3900 #ifndef _SYS_SYSPROTO_H_ 3901 struct mkdirat_args { 3902 int fd; 3903 char *path; 3904 mode_t mode; 3905 }; 3906 #endif 3907 int 3908 sys_mkdirat(struct thread *td, struct mkdirat_args *uap) 3909 { 3910 3911 return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode)); 3912 } 3913 3914 int 3915 kern_mkdirat(struct thread *td, int fd, const char *path, enum uio_seg segflg, 3916 int mode) 3917 { 3918 struct mount *mp; 3919 struct vattr vattr; 3920 struct nameidata nd; 3921 int error; 3922 3923 AUDIT_ARG_MODE(mode); 3924 NDPREINIT(&nd); 3925 restart: 3926 bwillwrite(); 3927 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | AUDITVNODE1 | 3928 NC_NOMAKEENTRY | NC_KEEPPOSENTRY | FAILIFEXISTS | WILLBEDIR, 3929 segflg, path, fd, &cap_mkdirat_rights); 3930 if ((error = namei(&nd)) != 0) 3931 return (error); 3932 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3933 NDFREE_PNBUF(&nd); 3934 vput(nd.ni_dvp); 3935 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH)) != 0) 3936 return (error); 3937 goto restart; 3938 } 3939 if ((vn_irflag_read(nd.ni_dvp) & VIRF_NAMEDDIR) != 0) { 3940 error = EINVAL; 3941 goto out; 3942 } 3943 VATTR_NULL(&vattr); 3944 vattr.va_type = VDIR; 3945 vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_pd->pd_cmask; 3946 #ifdef MAC 3947 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 3948 &vattr); 3949 if (error != 0) 3950 goto out; 3951 #endif 3952 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 3953 out: 3954 NDFREE_PNBUF(&nd); 3955 VOP_VPUT_PAIR(nd.ni_dvp, error == 0 ? &nd.ni_vp : NULL, true); 3956 vn_finished_write(mp); 3957 if (error == ERELOOKUP) 3958 goto restart; 3959 return (error); 3960 } 3961 3962 /* 3963 * Remove a directory file. 3964 */ 3965 #ifndef _SYS_SYSPROTO_H_ 3966 struct rmdir_args { 3967 char *path; 3968 }; 3969 #endif 3970 int 3971 sys_rmdir(struct thread *td, struct rmdir_args *uap) 3972 { 3973 3974 return (kern_frmdirat(td, AT_FDCWD, uap->path, FD_NONE, UIO_USERSPACE, 3975 0)); 3976 } 3977 3978 int 3979 kern_frmdirat(struct thread *td, int dfd, const char *path, int fd, 3980 enum uio_seg pathseg, int flag) 3981 { 3982 struct mount *mp; 3983 struct vnode *vp; 3984 struct file *fp; 3985 struct nameidata nd; 3986 cap_rights_t rights; 3987 int error; 3988 3989 fp = NULL; 3990 if (fd != FD_NONE) { 3991 error = getvnode(td, fd, cap_rights_init_one(&rights, 3992 CAP_LOOKUP), &fp); 3993 if (error != 0) 3994 return (error); 3995 } 3996 3997 NDPREINIT(&nd); 3998 restart: 3999 bwillwrite(); 4000 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1 | 4001 at2cnpflags(flag, AT_RESOLVE_BENEATH), 4002 pathseg, path, dfd, &cap_unlinkat_rights); 4003 if ((error = namei(&nd)) != 0) 4004 goto fdout; 4005 vp = nd.ni_vp; 4006 if (vp->v_type != VDIR) { 4007 error = ENOTDIR; 4008 goto out; 4009 } 4010 /* 4011 * No rmdir "." please. 4012 */ 4013 if (nd.ni_dvp == vp) { 4014 error = EINVAL; 4015 goto out; 4016 } 4017 /* 4018 * The root of a mounted filesystem cannot be deleted. 4019 */ 4020 if (vp->v_vflag & VV_ROOT) { 4021 error = EBUSY; 4022 goto out; 4023 } 4024 4025 if (fp != NULL && fp->f_vnode != vp) { 4026 if (VN_IS_DOOMED(fp->f_vnode)) 4027 error = EBADF; 4028 else 4029 error = EDEADLK; 4030 goto out; 4031 } 4032 4033 #ifdef MAC 4034 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 4035 &nd.ni_cnd); 4036 if (error != 0) 4037 goto out; 4038 #endif 4039 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 4040 NDFREE_PNBUF(&nd); 4041 vput(vp); 4042 if (nd.ni_dvp == vp) 4043 vrele(nd.ni_dvp); 4044 else 4045 vput(nd.ni_dvp); 4046 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH)) != 0) 4047 goto fdout; 4048 goto restart; 4049 } 4050 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 4051 vn_finished_write(mp); 4052 out: 4053 NDFREE_PNBUF(&nd); 4054 vput(vp); 4055 if (nd.ni_dvp == vp) 4056 vrele(nd.ni_dvp); 4057 else 4058 vput(nd.ni_dvp); 4059 if (error == ERELOOKUP) 4060 goto restart; 4061 fdout: 4062 if (fp != NULL) 4063 fdrop(fp, td); 4064 return (error); 4065 } 4066 4067 #if defined(COMPAT_43) || defined(COMPAT_FREEBSD11) 4068 int 4069 freebsd11_kern_getdirentries(struct thread *td, int fd, char *ubuf, u_int count, 4070 long *basep, void (*func)(struct freebsd11_dirent *)) 4071 { 4072 struct freebsd11_dirent dstdp; 4073 struct dirent *dp, *edp; 4074 char *dirbuf; 4075 off_t base; 4076 ssize_t resid, ucount; 4077 int error; 4078 4079 /* XXX arbitrary sanity limit on `count'. */ 4080 count = min(count, 64 * 1024); 4081 4082 dirbuf = malloc(count, M_TEMP, M_WAITOK); 4083 4084 error = kern_getdirentries(td, fd, dirbuf, count, &base, &resid, 4085 UIO_SYSSPACE); 4086 if (error != 0) 4087 goto done; 4088 if (basep != NULL) 4089 *basep = base; 4090 4091 ucount = 0; 4092 for (dp = (struct dirent *)dirbuf, 4093 edp = (struct dirent *)&dirbuf[count - resid]; 4094 ucount < count && dp < edp; ) { 4095 if (dp->d_reclen == 0) 4096 break; 4097 MPASS(dp->d_reclen >= _GENERIC_DIRLEN(0)); 4098 if (dp->d_namlen >= sizeof(dstdp.d_name)) 4099 continue; 4100 dstdp.d_type = dp->d_type; 4101 dstdp.d_namlen = dp->d_namlen; 4102 dstdp.d_fileno = dp->d_fileno; /* truncate */ 4103 if (dstdp.d_fileno != dp->d_fileno) { 4104 switch (ino64_trunc_error) { 4105 default: 4106 case 0: 4107 break; 4108 case 1: 4109 error = EOVERFLOW; 4110 goto done; 4111 case 2: 4112 dstdp.d_fileno = UINT32_MAX; 4113 break; 4114 } 4115 } 4116 dstdp.d_reclen = sizeof(dstdp) - sizeof(dstdp.d_name) + 4117 ((dp->d_namlen + 1 + 3) &~ 3); 4118 bcopy(dp->d_name, dstdp.d_name, dstdp.d_namlen); 4119 bzero(dstdp.d_name + dstdp.d_namlen, 4120 dstdp.d_reclen - offsetof(struct freebsd11_dirent, d_name) - 4121 dstdp.d_namlen); 4122 MPASS(dstdp.d_reclen <= dp->d_reclen); 4123 MPASS(ucount + dstdp.d_reclen <= count); 4124 if (func != NULL) 4125 func(&dstdp); 4126 error = copyout(&dstdp, ubuf + ucount, dstdp.d_reclen); 4127 if (error != 0) 4128 break; 4129 dp = (struct dirent *)((char *)dp + dp->d_reclen); 4130 ucount += dstdp.d_reclen; 4131 } 4132 4133 done: 4134 free(dirbuf, M_TEMP); 4135 if (error == 0) 4136 td->td_retval[0] = ucount; 4137 return (error); 4138 } 4139 #endif /* COMPAT */ 4140 4141 #ifdef COMPAT_43 4142 static void 4143 ogetdirentries_cvt(struct freebsd11_dirent *dp) 4144 { 4145 #if (BYTE_ORDER == LITTLE_ENDIAN) 4146 /* 4147 * The expected low byte of dp->d_namlen is our dp->d_type. 4148 * The high MBZ byte of dp->d_namlen is our dp->d_namlen. 4149 */ 4150 dp->d_type = dp->d_namlen; 4151 dp->d_namlen = 0; 4152 #else 4153 /* 4154 * The dp->d_type is the high byte of the expected dp->d_namlen, 4155 * so must be zero'ed. 4156 */ 4157 dp->d_type = 0; 4158 #endif 4159 } 4160 4161 /* 4162 * Read a block of directory entries in a filesystem independent format. 4163 */ 4164 #ifndef _SYS_SYSPROTO_H_ 4165 struct ogetdirentries_args { 4166 int fd; 4167 char *buf; 4168 u_int count; 4169 long *basep; 4170 }; 4171 #endif 4172 int 4173 ogetdirentries(struct thread *td, struct ogetdirentries_args *uap) 4174 { 4175 long loff; 4176 int error; 4177 4178 error = kern_ogetdirentries(td, uap, &loff); 4179 if (error == 0) 4180 error = copyout(&loff, uap->basep, sizeof(long)); 4181 return (error); 4182 } 4183 4184 int 4185 kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap, 4186 long *ploff) 4187 { 4188 long base; 4189 int error; 4190 4191 /* XXX arbitrary sanity limit on `count'. */ 4192 if (uap->count > 64 * 1024) 4193 return (EINVAL); 4194 4195 error = freebsd11_kern_getdirentries(td, uap->fd, uap->buf, uap->count, 4196 &base, ogetdirentries_cvt); 4197 4198 if (error == 0 && uap->basep != NULL) 4199 error = copyout(&base, uap->basep, sizeof(long)); 4200 4201 return (error); 4202 } 4203 #endif /* COMPAT_43 */ 4204 4205 #if defined(COMPAT_FREEBSD11) 4206 #ifndef _SYS_SYSPROTO_H_ 4207 struct freebsd11_getdirentries_args { 4208 int fd; 4209 char *buf; 4210 u_int count; 4211 long *basep; 4212 }; 4213 #endif 4214 int 4215 freebsd11_getdirentries(struct thread *td, 4216 struct freebsd11_getdirentries_args *uap) 4217 { 4218 long base; 4219 int error; 4220 4221 error = freebsd11_kern_getdirentries(td, uap->fd, uap->buf, uap->count, 4222 &base, NULL); 4223 4224 if (error == 0 && uap->basep != NULL) 4225 error = copyout(&base, uap->basep, sizeof(long)); 4226 return (error); 4227 } 4228 4229 int 4230 freebsd11_getdents(struct thread *td, struct freebsd11_getdents_args *uap) 4231 { 4232 struct freebsd11_getdirentries_args ap; 4233 4234 ap.fd = uap->fd; 4235 ap.buf = uap->buf; 4236 ap.count = uap->count; 4237 ap.basep = NULL; 4238 return (freebsd11_getdirentries(td, &ap)); 4239 } 4240 #endif /* COMPAT_FREEBSD11 */ 4241 4242 /* 4243 * Read a block of directory entries in a filesystem independent format. 4244 */ 4245 int 4246 sys_getdirentries(struct thread *td, struct getdirentries_args *uap) 4247 { 4248 off_t base; 4249 int error; 4250 4251 error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base, 4252 NULL, UIO_USERSPACE); 4253 if (error != 0) 4254 return (error); 4255 if (uap->basep != NULL) 4256 error = copyout(&base, uap->basep, sizeof(off_t)); 4257 return (error); 4258 } 4259 4260 int 4261 kern_getdirentries(struct thread *td, int fd, char *buf, size_t count, 4262 off_t *basep, ssize_t *residp, enum uio_seg bufseg) 4263 { 4264 struct vnode *vp; 4265 struct file *fp; 4266 struct uio auio; 4267 struct iovec aiov; 4268 off_t loff; 4269 int error, eofflag; 4270 off_t foffset; 4271 4272 AUDIT_ARG_FD(fd); 4273 if (count > IOSIZE_MAX) 4274 return (EINVAL); 4275 auio.uio_resid = count; 4276 error = getvnode(td, fd, &cap_read_rights, &fp); 4277 if (error != 0) 4278 return (error); 4279 if ((fp->f_flag & FREAD) == 0) { 4280 fdrop(fp, td); 4281 return (EBADF); 4282 } 4283 vp = fp->f_vnode; 4284 foffset = foffset_lock(fp, 0); 4285 unionread: 4286 if (vp->v_type != VDIR) { 4287 error = EINVAL; 4288 goto fail; 4289 } 4290 if (__predict_false((vp->v_vflag & VV_UNLINKED) != 0)) { 4291 error = ENOENT; 4292 goto fail; 4293 } 4294 aiov.iov_base = buf; 4295 aiov.iov_len = count; 4296 auio.uio_iov = &aiov; 4297 auio.uio_iovcnt = 1; 4298 auio.uio_rw = UIO_READ; 4299 auio.uio_segflg = bufseg; 4300 auio.uio_td = td; 4301 vn_lock(vp, LK_SHARED | LK_RETRY); 4302 AUDIT_ARG_VNODE1(vp); 4303 loff = auio.uio_offset = foffset; 4304 #ifdef MAC 4305 error = mac_vnode_check_readdir(td->td_ucred, vp); 4306 if (error == 0) 4307 #endif 4308 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, 4309 NULL); 4310 foffset = auio.uio_offset; 4311 if (error != 0) { 4312 VOP_UNLOCK(vp); 4313 goto fail; 4314 } 4315 if (count == auio.uio_resid && 4316 (vp->v_vflag & VV_ROOT) && 4317 (vp->v_mount->mnt_flag & MNT_UNION)) { 4318 struct vnode *tvp = vp; 4319 4320 vp = vp->v_mount->mnt_vnodecovered; 4321 VREF(vp); 4322 fp->f_vnode = vp; 4323 foffset = 0; 4324 vput(tvp); 4325 goto unionread; 4326 } 4327 VOP_UNLOCK(vp); 4328 *basep = loff; 4329 if (residp != NULL) 4330 *residp = auio.uio_resid; 4331 td->td_retval[0] = count - auio.uio_resid; 4332 fail: 4333 foffset_unlock(fp, foffset, 0); 4334 fdrop(fp, td); 4335 return (error); 4336 } 4337 4338 /* 4339 * Set the mode mask for creation of filesystem nodes. 4340 */ 4341 #ifndef _SYS_SYSPROTO_H_ 4342 struct umask_args { 4343 int newmask; 4344 }; 4345 #endif 4346 int 4347 sys_umask(struct thread *td, struct umask_args *uap) 4348 { 4349 struct pwddesc *pdp; 4350 4351 pdp = td->td_proc->p_pd; 4352 PWDDESC_XLOCK(pdp); 4353 td->td_retval[0] = pdp->pd_cmask; 4354 pdp->pd_cmask = uap->newmask & ALLPERMS; 4355 PWDDESC_XUNLOCK(pdp); 4356 return (0); 4357 } 4358 4359 /* 4360 * Void all references to file by ripping underlying filesystem away from 4361 * vnode. 4362 */ 4363 #ifndef _SYS_SYSPROTO_H_ 4364 struct revoke_args { 4365 char *path; 4366 }; 4367 #endif 4368 int 4369 sys_revoke(struct thread *td, struct revoke_args *uap) 4370 { 4371 struct vnode *vp; 4372 struct vattr vattr; 4373 struct nameidata nd; 4374 int error; 4375 4376 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4377 uap->path); 4378 if ((error = namei(&nd)) != 0) 4379 return (error); 4380 vp = nd.ni_vp; 4381 NDFREE_PNBUF(&nd); 4382 if (vp->v_type != VCHR || vp->v_rdev == NULL) { 4383 error = EINVAL; 4384 goto out; 4385 } 4386 #ifdef MAC 4387 error = mac_vnode_check_revoke(td->td_ucred, vp); 4388 if (error != 0) 4389 goto out; 4390 #endif 4391 error = VOP_GETATTR(vp, &vattr, td->td_ucred); 4392 if (error != 0) 4393 goto out; 4394 if (td->td_ucred->cr_uid != vattr.va_uid) { 4395 error = priv_check(td, PRIV_VFS_ADMIN); 4396 if (error != 0) 4397 goto out; 4398 } 4399 if (devfs_usecount(vp) > 0) 4400 VOP_REVOKE(vp, REVOKEALL); 4401 out: 4402 vput(vp); 4403 return (error); 4404 } 4405 4406 /* 4407 * This variant of getvnode() allows O_PATH files. Caller should 4408 * ensure that returned file and vnode are only used for compatible 4409 * semantics. 4410 */ 4411 int 4412 getvnode_path(struct thread *td, int fd, cap_rights_t *rightsp, 4413 struct file **fpp) 4414 { 4415 struct file *fp; 4416 int error; 4417 4418 error = fget_unlocked(td, fd, rightsp, &fp); 4419 if (error != 0) 4420 return (error); 4421 4422 /* 4423 * The file could be not of the vnode type, or it may be not 4424 * yet fully initialized, in which case the f_vnode pointer 4425 * may be set, but f_ops is still badfileops. E.g., 4426 * devfs_open() transiently create such situation to 4427 * facilitate csw d_fdopen(). 4428 * 4429 * Dupfdopen() handling in kern_openat() installs the 4430 * half-baked file into the process descriptor table, allowing 4431 * other thread to dereference it. Guard against the race by 4432 * checking f_ops. 4433 */ 4434 if (__predict_false(fp->f_vnode == NULL || fp->f_ops == &badfileops)) { 4435 fdrop(fp, td); 4436 *fpp = NULL; 4437 return (EINVAL); 4438 } 4439 4440 *fpp = fp; 4441 return (0); 4442 } 4443 4444 /* 4445 * Convert a user file descriptor to a kernel file entry and check 4446 * that, if it is a capability, the correct rights are present. 4447 * A reference on the file entry is held upon returning. 4448 */ 4449 int 4450 getvnode(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp) 4451 { 4452 int error; 4453 4454 error = getvnode_path(td, fd, rightsp, fpp); 4455 if (__predict_false(error != 0)) 4456 return (error); 4457 4458 /* 4459 * Filter out O_PATH file descriptors, most getvnode() callers 4460 * do not call fo_ methods. 4461 */ 4462 if (__predict_false((*fpp)->f_ops == &path_fileops)) { 4463 fdrop(*fpp, td); 4464 *fpp = NULL; 4465 error = EBADF; 4466 } 4467 4468 return (error); 4469 } 4470 4471 /* 4472 * Get an (NFS) file handle. 4473 */ 4474 #ifndef _SYS_SYSPROTO_H_ 4475 struct lgetfh_args { 4476 char *fname; 4477 fhandle_t *fhp; 4478 }; 4479 #endif 4480 int 4481 sys_lgetfh(struct thread *td, struct lgetfh_args *uap) 4482 { 4483 4484 return (kern_getfhat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->fname, 4485 UIO_USERSPACE, uap->fhp, UIO_USERSPACE)); 4486 } 4487 4488 #ifndef _SYS_SYSPROTO_H_ 4489 struct getfh_args { 4490 char *fname; 4491 fhandle_t *fhp; 4492 }; 4493 #endif 4494 int 4495 sys_getfh(struct thread *td, struct getfh_args *uap) 4496 { 4497 4498 return (kern_getfhat(td, 0, AT_FDCWD, uap->fname, UIO_USERSPACE, 4499 uap->fhp, UIO_USERSPACE)); 4500 } 4501 4502 /* 4503 * syscall for the rpc.lockd to use to translate an open descriptor into 4504 * a NFS file handle. 4505 * 4506 * warning: do not remove the priv_check() call or this becomes one giant 4507 * security hole. 4508 */ 4509 #ifndef _SYS_SYSPROTO_H_ 4510 struct getfhat_args { 4511 int fd; 4512 char *path; 4513 fhandle_t *fhp; 4514 int flags; 4515 }; 4516 #endif 4517 int 4518 sys_getfhat(struct thread *td, struct getfhat_args *uap) 4519 { 4520 4521 return (kern_getfhat(td, uap->flags, uap->fd, uap->path, UIO_USERSPACE, 4522 uap->fhp, UIO_USERSPACE)); 4523 } 4524 4525 int 4526 kern_getfhat(struct thread *td, int flags, int fd, const char *path, 4527 enum uio_seg pathseg, fhandle_t *fhp, enum uio_seg fhseg) 4528 { 4529 struct nameidata nd; 4530 fhandle_t fh; 4531 struct vnode *vp; 4532 int error; 4533 4534 if ((flags & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH)) != 0) 4535 return (EINVAL); 4536 error = priv_check(td, PRIV_VFS_GETFH); 4537 if (error != 0) 4538 return (error); 4539 NDINIT_AT(&nd, LOOKUP, at2cnpflags(flags, AT_SYMLINK_NOFOLLOW | 4540 AT_RESOLVE_BENEATH) | LOCKLEAF | AUDITVNODE1, pathseg, path, 4541 fd); 4542 error = namei(&nd); 4543 if (error != 0) 4544 return (error); 4545 NDFREE_PNBUF(&nd); 4546 vp = nd.ni_vp; 4547 bzero(&fh, sizeof(fh)); 4548 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4549 error = VOP_VPTOFH(vp, &fh.fh_fid); 4550 vput(vp); 4551 if (error == 0) { 4552 if (fhseg == UIO_USERSPACE) 4553 error = copyout(&fh, fhp, sizeof (fh)); 4554 else 4555 memcpy(fhp, &fh, sizeof(fh)); 4556 } 4557 return (error); 4558 } 4559 4560 #ifndef _SYS_SYSPROTO_H_ 4561 struct fhlink_args { 4562 fhandle_t *fhp; 4563 const char *to; 4564 }; 4565 #endif 4566 int 4567 sys_fhlink(struct thread *td, struct fhlink_args *uap) 4568 { 4569 4570 return (kern_fhlinkat(td, AT_FDCWD, uap->to, UIO_USERSPACE, uap->fhp)); 4571 } 4572 4573 #ifndef _SYS_SYSPROTO_H_ 4574 struct fhlinkat_args { 4575 fhandle_t *fhp; 4576 int tofd; 4577 const char *to; 4578 }; 4579 #endif 4580 int 4581 sys_fhlinkat(struct thread *td, struct fhlinkat_args *uap) 4582 { 4583 4584 return (kern_fhlinkat(td, uap->tofd, uap->to, UIO_USERSPACE, uap->fhp)); 4585 } 4586 4587 static int 4588 kern_fhlinkat(struct thread *td, int fd, const char *path, 4589 enum uio_seg pathseg, fhandle_t *fhp) 4590 { 4591 fhandle_t fh; 4592 struct mount *mp; 4593 struct vnode *vp; 4594 int error; 4595 4596 error = priv_check(td, PRIV_VFS_GETFH); 4597 if (error != 0) 4598 return (error); 4599 error = copyin(fhp, &fh, sizeof(fh)); 4600 if (error != 0) 4601 return (error); 4602 do { 4603 bwillwrite(); 4604 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4605 return (ESTALE); 4606 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_SHARED, &vp); 4607 vfs_unbusy(mp); 4608 if (error != 0) 4609 return (error); 4610 VOP_UNLOCK(vp); 4611 error = kern_linkat_vp(td, vp, fd, path, pathseg); 4612 } while (error == EAGAIN || error == ERELOOKUP); 4613 return (error); 4614 } 4615 4616 #ifndef _SYS_SYSPROTO_H_ 4617 struct fhreadlink_args { 4618 fhandle_t *fhp; 4619 char *buf; 4620 size_t bufsize; 4621 }; 4622 #endif 4623 int 4624 sys_fhreadlink(struct thread *td, struct fhreadlink_args *uap) 4625 { 4626 fhandle_t fh; 4627 struct mount *mp; 4628 struct vnode *vp; 4629 int error; 4630 4631 error = priv_check(td, PRIV_VFS_GETFH); 4632 if (error != 0) 4633 return (error); 4634 if (uap->bufsize > IOSIZE_MAX) 4635 return (EINVAL); 4636 error = copyin(uap->fhp, &fh, sizeof(fh)); 4637 if (error != 0) 4638 return (error); 4639 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4640 return (ESTALE); 4641 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_SHARED, &vp); 4642 vfs_unbusy(mp); 4643 if (error != 0) 4644 return (error); 4645 error = kern_readlink_vp(vp, uap->buf, UIO_USERSPACE, uap->bufsize, td); 4646 vput(vp); 4647 return (error); 4648 } 4649 4650 /* 4651 * syscall for the rpc.lockd to use to translate a NFS file handle into an 4652 * open descriptor. 4653 * 4654 * warning: do not remove the priv_check() call or this becomes one giant 4655 * security hole. 4656 */ 4657 #ifndef _SYS_SYSPROTO_H_ 4658 struct fhopen_args { 4659 const struct fhandle *u_fhp; 4660 int flags; 4661 }; 4662 #endif 4663 int 4664 sys_fhopen(struct thread *td, struct fhopen_args *uap) 4665 { 4666 return (kern_fhopen(td, uap->u_fhp, uap->flags)); 4667 } 4668 4669 int 4670 kern_fhopen(struct thread *td, const struct fhandle *u_fhp, int flags) 4671 { 4672 struct mount *mp; 4673 struct vnode *vp; 4674 struct fhandle fhp; 4675 struct file *fp; 4676 int error, indx; 4677 bool named_attr; 4678 4679 error = priv_check(td, PRIV_VFS_FHOPEN); 4680 if (error != 0) 4681 return (error); 4682 4683 indx = -1; 4684 if ((flags & O_CREAT) != 0) 4685 return (EINVAL); 4686 error = openflags(&flags); 4687 if (error != 0) 4688 return (error); 4689 error = copyin(u_fhp, &fhp, sizeof(fhp)); 4690 if (error != 0) 4691 return (error); 4692 /* find the mount point */ 4693 mp = vfs_busyfs(&fhp.fh_fsid); 4694 if (mp == NULL) 4695 return (ESTALE); 4696 /* now give me my vnode, it gets returned to me locked */ 4697 error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp); 4698 vfs_unbusy(mp); 4699 if (error != 0) 4700 return (error); 4701 4702 /* 4703 * Check to see if the file handle refers to a named attribute 4704 * directory or attribute. If it does, the O_NAMEDATTR flag 4705 * must have been specified. 4706 */ 4707 named_attr = (vn_irflag_read(vp) & 4708 (VIRF_NAMEDDIR | VIRF_NAMEDATTR)) != 0; 4709 if ((named_attr && (flags & O_NAMEDATTR) == 0) || 4710 (!named_attr && (flags & O_NAMEDATTR) != 0)) { 4711 vput(vp); 4712 return (ENOATTR); 4713 } 4714 4715 error = falloc_noinstall(td, &fp); 4716 if (error != 0) { 4717 vput(vp); 4718 return (error); 4719 } 4720 /* Set the flags early so the finit in devfs can pick them up. */ 4721 fp->f_flag = flags & FMASK; 4722 4723 #ifdef INVARIANTS 4724 td->td_dupfd = -1; 4725 #endif 4726 error = vn_open_vnode(vp, flags, td->td_ucred, td, fp); 4727 if (error != 0) { 4728 KASSERT(fp->f_ops == &badfileops, 4729 ("VOP_OPEN in fhopen() set f_ops")); 4730 KASSERT(td->td_dupfd < 0, 4731 ("fhopen() encountered fdopen()")); 4732 4733 vput(vp); 4734 goto bad; 4735 } 4736 #ifdef INVARIANTS 4737 td->td_dupfd = 0; 4738 #endif 4739 finit_open(fp, vp, flags); 4740 VOP_UNLOCK(vp); 4741 if ((flags & O_TRUNC) != 0) { 4742 error = fo_truncate(fp, 0, td->td_ucred, td); 4743 if (error != 0) 4744 goto bad; 4745 } 4746 4747 error = finstall(td, fp, &indx, flags, NULL); 4748 bad: 4749 fdrop(fp, td); 4750 td->td_retval[0] = indx; 4751 return (error); 4752 } 4753 4754 /* 4755 * Stat an (NFS) file handle. 4756 */ 4757 #ifndef _SYS_SYSPROTO_H_ 4758 struct fhstat_args { 4759 struct fhandle *u_fhp; 4760 struct stat *sb; 4761 }; 4762 #endif 4763 int 4764 sys_fhstat(struct thread *td, struct fhstat_args *uap) 4765 { 4766 struct stat sb; 4767 struct fhandle fh; 4768 int error; 4769 4770 error = copyin(uap->u_fhp, &fh, sizeof(fh)); 4771 if (error != 0) 4772 return (error); 4773 error = kern_fhstat(td, fh, &sb); 4774 if (error == 0) 4775 error = copyout(&sb, uap->sb, sizeof(sb)); 4776 return (error); 4777 } 4778 4779 int 4780 kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb) 4781 { 4782 struct mount *mp; 4783 struct vnode *vp; 4784 int error; 4785 4786 error = priv_check(td, PRIV_VFS_FHSTAT); 4787 if (error != 0) 4788 return (error); 4789 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4790 return (ESTALE); 4791 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4792 vfs_unbusy(mp); 4793 if (error != 0) 4794 return (error); 4795 error = VOP_STAT(vp, sb, td->td_ucred, NOCRED); 4796 vput(vp); 4797 return (error); 4798 } 4799 4800 /* 4801 * Implement fstatfs() for (NFS) file handles. 4802 */ 4803 #ifndef _SYS_SYSPROTO_H_ 4804 struct fhstatfs_args { 4805 struct fhandle *u_fhp; 4806 struct statfs *buf; 4807 }; 4808 #endif 4809 int 4810 sys_fhstatfs(struct thread *td, struct fhstatfs_args *uap) 4811 { 4812 struct statfs *sfp; 4813 fhandle_t fh; 4814 int error; 4815 4816 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4817 if (error != 0) 4818 return (error); 4819 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 4820 error = kern_fhstatfs(td, fh, sfp); 4821 if (error == 0) 4822 error = copyout(sfp, uap->buf, sizeof(*sfp)); 4823 free(sfp, M_STATFS); 4824 return (error); 4825 } 4826 4827 int 4828 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf) 4829 { 4830 struct mount *mp; 4831 struct vnode *vp; 4832 int error; 4833 4834 error = priv_check(td, PRIV_VFS_FHSTATFS); 4835 if (error != 0) 4836 return (error); 4837 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4838 return (ESTALE); 4839 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4840 if (error != 0) { 4841 vfs_unbusy(mp); 4842 return (error); 4843 } 4844 vput(vp); 4845 error = prison_canseemount(td->td_ucred, mp); 4846 if (error != 0) 4847 goto out; 4848 #ifdef MAC 4849 error = mac_mount_check_stat(td->td_ucred, mp); 4850 if (error != 0) 4851 goto out; 4852 #endif 4853 error = VFS_STATFS(mp, buf); 4854 out: 4855 vfs_unbusy(mp); 4856 return (error); 4857 } 4858 4859 /* 4860 * Unlike madvise(2), we do not make a best effort to remember every 4861 * possible caching hint. Instead, we remember the last setting with 4862 * the exception that we will allow POSIX_FADV_NORMAL to adjust the 4863 * region of any current setting. 4864 */ 4865 int 4866 kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len, 4867 int advice) 4868 { 4869 struct fadvise_info *fa, *new; 4870 struct file *fp; 4871 struct vnode *vp; 4872 off_t end; 4873 int error; 4874 4875 if (offset < 0 || len < 0 || offset > OFF_MAX - len) 4876 return (EINVAL); 4877 AUDIT_ARG_VALUE(advice); 4878 switch (advice) { 4879 case POSIX_FADV_SEQUENTIAL: 4880 case POSIX_FADV_RANDOM: 4881 case POSIX_FADV_NOREUSE: 4882 new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK); 4883 break; 4884 case POSIX_FADV_NORMAL: 4885 case POSIX_FADV_WILLNEED: 4886 case POSIX_FADV_DONTNEED: 4887 new = NULL; 4888 break; 4889 default: 4890 return (EINVAL); 4891 } 4892 /* XXX: CAP_POSIX_FADVISE? */ 4893 AUDIT_ARG_FD(fd); 4894 error = fget(td, fd, &cap_no_rights, &fp); 4895 if (error != 0) 4896 goto out; 4897 AUDIT_ARG_FILE(td->td_proc, fp); 4898 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { 4899 error = ESPIPE; 4900 goto out; 4901 } 4902 if (fp->f_type != DTYPE_VNODE) { 4903 error = ENODEV; 4904 goto out; 4905 } 4906 vp = fp->f_vnode; 4907 if (vp->v_type != VREG) { 4908 error = ENODEV; 4909 goto out; 4910 } 4911 if (len == 0) 4912 end = OFF_MAX; 4913 else 4914 end = offset + len - 1; 4915 switch (advice) { 4916 case POSIX_FADV_SEQUENTIAL: 4917 case POSIX_FADV_RANDOM: 4918 case POSIX_FADV_NOREUSE: 4919 /* 4920 * Try to merge any existing non-standard region with 4921 * this new region if possible, otherwise create a new 4922 * non-standard region for this request. 4923 */ 4924 mtx_pool_lock(mtxpool_sleep, fp); 4925 fa = fp->f_advice; 4926 if (fa != NULL && fa->fa_advice == advice && 4927 ((fa->fa_start <= end && fa->fa_end >= offset) || 4928 (end != OFF_MAX && fa->fa_start == end + 1) || 4929 (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) { 4930 if (offset < fa->fa_start) 4931 fa->fa_start = offset; 4932 if (end > fa->fa_end) 4933 fa->fa_end = end; 4934 } else { 4935 new->fa_advice = advice; 4936 new->fa_start = offset; 4937 new->fa_end = end; 4938 fp->f_advice = new; 4939 new = fa; 4940 } 4941 mtx_pool_unlock(mtxpool_sleep, fp); 4942 break; 4943 case POSIX_FADV_NORMAL: 4944 /* 4945 * If a the "normal" region overlaps with an existing 4946 * non-standard region, trim or remove the 4947 * non-standard region. 4948 */ 4949 mtx_pool_lock(mtxpool_sleep, fp); 4950 fa = fp->f_advice; 4951 if (fa != NULL) { 4952 if (offset <= fa->fa_start && end >= fa->fa_end) { 4953 new = fa; 4954 fp->f_advice = NULL; 4955 } else if (offset <= fa->fa_start && 4956 end >= fa->fa_start) 4957 fa->fa_start = end + 1; 4958 else if (offset <= fa->fa_end && end >= fa->fa_end) 4959 fa->fa_end = offset - 1; 4960 else if (offset >= fa->fa_start && end <= fa->fa_end) { 4961 /* 4962 * If the "normal" region is a middle 4963 * portion of the existing 4964 * non-standard region, just remove 4965 * the whole thing rather than picking 4966 * one side or the other to 4967 * preserve. 4968 */ 4969 new = fa; 4970 fp->f_advice = NULL; 4971 } 4972 } 4973 mtx_pool_unlock(mtxpool_sleep, fp); 4974 break; 4975 case POSIX_FADV_WILLNEED: 4976 case POSIX_FADV_DONTNEED: 4977 error = VOP_ADVISE(vp, offset, end, advice); 4978 break; 4979 } 4980 out: 4981 if (fp != NULL) 4982 fdrop(fp, td); 4983 free(new, M_FADVISE); 4984 return (error); 4985 } 4986 4987 int 4988 sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap) 4989 { 4990 int error; 4991 4992 error = kern_posix_fadvise(td, uap->fd, uap->offset, uap->len, 4993 uap->advice); 4994 return (kern_posix_error(td, error)); 4995 } 4996 4997 int 4998 kern_copy_file_range(struct thread *td, int infd, off_t *inoffp, int outfd, 4999 off_t *outoffp, size_t len, unsigned int flags) 5000 { 5001 struct file *infp, *infp1, *outfp, *outfp1; 5002 struct vnode *invp, *outvp; 5003 int error; 5004 size_t retlen; 5005 void *rl_rcookie, *rl_wcookie; 5006 off_t inoff, outoff, savinoff, savoutoff; 5007 bool foffsets_locked; 5008 5009 infp = outfp = NULL; 5010 rl_rcookie = rl_wcookie = NULL; 5011 foffsets_locked = false; 5012 error = 0; 5013 retlen = 0; 5014 5015 if (flags != 0) { 5016 error = EINVAL; 5017 goto out; 5018 } 5019 if (len > SSIZE_MAX) 5020 /* 5021 * Although the len argument is size_t, the return argument 5022 * is ssize_t (which is signed). Therefore a size that won't 5023 * fit in ssize_t can't be returned. 5024 */ 5025 len = SSIZE_MAX; 5026 5027 /* Get the file structures for the file descriptors. */ 5028 error = fget_read(td, infd, 5029 inoffp != NULL ? &cap_pread_rights : &cap_read_rights, &infp); 5030 if (error != 0) 5031 goto out; 5032 if (infp->f_ops == &badfileops) { 5033 error = EBADF; 5034 goto out; 5035 } 5036 if (infp->f_vnode == NULL) { 5037 error = EINVAL; 5038 goto out; 5039 } 5040 error = fget_write(td, outfd, 5041 outoffp != NULL ? &cap_pwrite_rights : &cap_write_rights, &outfp); 5042 if (error != 0) 5043 goto out; 5044 if (outfp->f_ops == &badfileops) { 5045 error = EBADF; 5046 goto out; 5047 } 5048 if (outfp->f_vnode == NULL) { 5049 error = EINVAL; 5050 goto out; 5051 } 5052 5053 /* 5054 * Figure out which file offsets we're reading from and writing to. 5055 * If the offsets come from the file descriptions, we need to lock them, 5056 * and locking both offsets requires a loop to avoid deadlocks. 5057 */ 5058 infp1 = outfp1 = NULL; 5059 if (inoffp != NULL) 5060 inoff = *inoffp; 5061 else 5062 infp1 = infp; 5063 if (outoffp != NULL) 5064 outoff = *outoffp; 5065 else 5066 outfp1 = outfp; 5067 if (infp1 != NULL || outfp1 != NULL) { 5068 if (infp1 == outfp1) { 5069 /* 5070 * Overlapping ranges are not allowed. A more thorough 5071 * check appears below, but we must not lock the same 5072 * offset twice. 5073 */ 5074 error = EINVAL; 5075 goto out; 5076 } 5077 foffset_lock_pair(infp1, &inoff, outfp1, &outoff, 0); 5078 foffsets_locked = true; 5079 } 5080 savinoff = inoff; 5081 savoutoff = outoff; 5082 5083 invp = infp->f_vnode; 5084 outvp = outfp->f_vnode; 5085 /* Sanity check the f_flag bits. */ 5086 if ((outfp->f_flag & (FWRITE | FAPPEND)) != FWRITE || 5087 (infp->f_flag & FREAD) == 0) { 5088 error = EBADF; 5089 goto out; 5090 } 5091 5092 /* If len == 0, just return 0. */ 5093 if (len == 0) 5094 goto out; 5095 5096 /* 5097 * Make sure that the ranges we check and lock below are valid. Note 5098 * that len is clamped to SSIZE_MAX above. 5099 */ 5100 if (inoff < 0 || outoff < 0) { 5101 error = EINVAL; 5102 goto out; 5103 } 5104 5105 /* 5106 * If infp and outfp refer to the same file, the byte ranges cannot 5107 * overlap. 5108 */ 5109 if (invp == outvp) { 5110 if ((inoff <= outoff && inoff + len > outoff) || 5111 (inoff > outoff && outoff + len > inoff)) { 5112 error = EINVAL; 5113 goto out; 5114 } 5115 rangelock_may_recurse(&invp->v_rl); 5116 } 5117 5118 /* Range lock the byte ranges for both invp and outvp. */ 5119 for (;;) { 5120 rl_wcookie = vn_rangelock_wlock(outvp, outoff, outoff + len); 5121 rl_rcookie = vn_rangelock_tryrlock(invp, inoff, inoff + len); 5122 if (rl_rcookie != NULL) 5123 break; 5124 vn_rangelock_unlock(outvp, rl_wcookie); 5125 rl_rcookie = vn_rangelock_rlock(invp, inoff, inoff + len); 5126 vn_rangelock_unlock(invp, rl_rcookie); 5127 } 5128 5129 retlen = len; 5130 error = vn_copy_file_range(invp, &inoff, outvp, &outoff, &retlen, 5131 flags, infp->f_cred, outfp->f_cred, td); 5132 out: 5133 if (rl_rcookie != NULL) 5134 vn_rangelock_unlock(invp, rl_rcookie); 5135 if (rl_wcookie != NULL) 5136 vn_rangelock_unlock(outvp, rl_wcookie); 5137 if (foffsets_locked) { 5138 if (error == EINTR || error == ERESTART) { 5139 inoff = savinoff; 5140 outoff = savoutoff; 5141 } 5142 if (inoffp == NULL) 5143 foffset_unlock(infp, inoff, 0); 5144 else 5145 *inoffp = inoff; 5146 if (outoffp == NULL) 5147 foffset_unlock(outfp, outoff, 0); 5148 else 5149 *outoffp = outoff; 5150 } 5151 if (outfp != NULL) 5152 fdrop(outfp, td); 5153 if (infp != NULL) 5154 fdrop(infp, td); 5155 td->td_retval[0] = retlen; 5156 return (error); 5157 } 5158 5159 int 5160 sys_copy_file_range(struct thread *td, struct copy_file_range_args *uap) 5161 { 5162 off_t inoff, outoff, *inoffp, *outoffp; 5163 int error; 5164 5165 inoffp = outoffp = NULL; 5166 if (uap->inoffp != NULL) { 5167 error = copyin(uap->inoffp, &inoff, sizeof(off_t)); 5168 if (error != 0) 5169 return (error); 5170 inoffp = &inoff; 5171 } 5172 if (uap->outoffp != NULL) { 5173 error = copyin(uap->outoffp, &outoff, sizeof(off_t)); 5174 if (error != 0) 5175 return (error); 5176 outoffp = &outoff; 5177 } 5178 error = kern_copy_file_range(td, uap->infd, inoffp, uap->outfd, 5179 outoffp, uap->len, uap->flags); 5180 if (error == 0 && uap->inoffp != NULL) 5181 error = copyout(inoffp, uap->inoffp, sizeof(off_t)); 5182 if (error == 0 && uap->outoffp != NULL) 5183 error = copyout(outoffp, uap->outoffp, sizeof(off_t)); 5184 return (error); 5185 } 5186