1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37 #include "opt_capsicum.h" 38 #include "opt_ktrace.h" 39 40 #include <sys/systm.h> 41 #ifdef COMPAT_FREEBSD11 42 #include <sys/abi_compat.h> 43 #endif 44 #include <sys/bio.h> 45 #include <sys/buf.h> 46 #include <sys/capsicum.h> 47 #include <sys/disk.h> 48 #include <sys/dirent.h> 49 #include <sys/fcntl.h> 50 #include <sys/file.h> 51 #include <sys/filedesc.h> 52 #include <sys/filio.h> 53 #include <sys/jail.h> 54 #include <sys/kernel.h> 55 #ifdef KTRACE 56 #include <sys/ktrace.h> 57 #endif 58 #include <sys/limits.h> 59 #include <sys/linker.h> 60 #include <sys/malloc.h> 61 #include <sys/mount.h> 62 #include <sys/mutex.h> 63 #include <sys/namei.h> 64 #include <sys/priv.h> 65 #include <sys/proc.h> 66 #include <sys/rwlock.h> 67 #include <sys/sdt.h> 68 #include <sys/stat.h> 69 #include <sys/sx.h> 70 #include <sys/syscallsubr.h> 71 #include <sys/sysctl.h> 72 #include <sys/sysproto.h> 73 #include <sys/unistd.h> 74 #include <sys/vnode.h> 75 76 #include <machine/stdarg.h> 77 78 #include <security/audit/audit.h> 79 #include <security/mac/mac_framework.h> 80 81 #include <vm/vm.h> 82 #include <vm/vm_object.h> 83 #include <vm/vm_page.h> 84 #include <vm/vnode_pager.h> 85 #include <vm/uma.h> 86 87 #include <fs/devfs/devfs.h> 88 89 MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information"); 90 91 static int kern_chflagsat(struct thread *td, int fd, const char *path, 92 enum uio_seg pathseg, u_long flags, int atflag); 93 static int setfflags(struct thread *td, struct vnode *, u_long); 94 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *); 95 static int getutimens(const struct timespec *, enum uio_seg, 96 struct timespec *, int *); 97 static int setutimes(struct thread *td, struct vnode *, 98 const struct timespec *, int, int); 99 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 100 struct thread *td); 101 static int kern_fhlinkat(struct thread *td, int fd, const char *path, 102 enum uio_seg pathseg, fhandle_t *fhp); 103 static int kern_readlink_vp(struct vnode *vp, char *buf, enum uio_seg bufseg, 104 size_t count, struct thread *td); 105 static int kern_linkat_vp(struct thread *td, struct vnode *vp, int fd, 106 const char *path, enum uio_seg segflag); 107 108 uint64_t 109 at2cnpflags(u_int at_flags, u_int mask) 110 { 111 uint64_t res; 112 113 MPASS((at_flags & (AT_SYMLINK_FOLLOW | AT_SYMLINK_NOFOLLOW)) != 114 (AT_SYMLINK_FOLLOW | AT_SYMLINK_NOFOLLOW)); 115 116 res = 0; 117 at_flags &= mask; 118 if ((at_flags & AT_RESOLVE_BENEATH) != 0) 119 res |= RBENEATH; 120 if ((at_flags & AT_SYMLINK_FOLLOW) != 0) 121 res |= FOLLOW; 122 /* NOFOLLOW is pseudo flag */ 123 if ((mask & AT_SYMLINK_NOFOLLOW) != 0) { 124 res |= (at_flags & AT_SYMLINK_NOFOLLOW) != 0 ? NOFOLLOW : 125 FOLLOW; 126 } 127 if ((mask & AT_EMPTY_PATH) != 0 && (at_flags & AT_EMPTY_PATH) != 0) 128 res |= EMPTYPATH; 129 return (res); 130 } 131 132 int 133 kern_sync(struct thread *td) 134 { 135 struct mount *mp, *nmp; 136 int save; 137 138 mtx_lock(&mountlist_mtx); 139 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 140 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 141 nmp = TAILQ_NEXT(mp, mnt_list); 142 continue; 143 } 144 if ((mp->mnt_flag & MNT_RDONLY) == 0 && 145 vn_start_write(NULL, &mp, V_NOWAIT) == 0) { 146 save = curthread_pflags_set(TDP_SYNCIO); 147 vfs_periodic(mp, MNT_NOWAIT); 148 VFS_SYNC(mp, MNT_NOWAIT); 149 curthread_pflags_restore(save); 150 vn_finished_write(mp); 151 } 152 mtx_lock(&mountlist_mtx); 153 nmp = TAILQ_NEXT(mp, mnt_list); 154 vfs_unbusy(mp); 155 } 156 mtx_unlock(&mountlist_mtx); 157 return (0); 158 } 159 160 /* 161 * Sync each mounted filesystem. 162 */ 163 #ifndef _SYS_SYSPROTO_H_ 164 struct sync_args { 165 int dummy; 166 }; 167 #endif 168 /* ARGSUSED */ 169 int 170 sys_sync(struct thread *td, struct sync_args *uap) 171 { 172 173 return (kern_sync(td)); 174 } 175 176 /* 177 * Change filesystem quotas. 178 */ 179 #ifndef _SYS_SYSPROTO_H_ 180 struct quotactl_args { 181 char *path; 182 int cmd; 183 int uid; 184 caddr_t arg; 185 }; 186 #endif 187 int 188 sys_quotactl(struct thread *td, struct quotactl_args *uap) 189 { 190 struct mount *mp; 191 struct nameidata nd; 192 int error; 193 bool mp_busy; 194 195 AUDIT_ARG_CMD(uap->cmd); 196 AUDIT_ARG_UID(uap->uid); 197 if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS)) 198 return (EPERM); 199 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 200 uap->path); 201 if ((error = namei(&nd)) != 0) 202 return (error); 203 NDFREE_PNBUF(&nd); 204 mp = nd.ni_vp->v_mount; 205 vfs_ref(mp); 206 vput(nd.ni_vp); 207 error = vfs_busy(mp, 0); 208 if (error != 0) { 209 vfs_rel(mp); 210 return (error); 211 } 212 mp_busy = true; 213 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg, &mp_busy); 214 215 /* 216 * Since quota on/off operations typically need to open quota 217 * files, the implementation may need to unbusy the mount point 218 * before calling into namei. Otherwise, unmount might be 219 * started between two vfs_busy() invocations (first is ours, 220 * second is from mount point cross-walk code in lookup()), 221 * causing deadlock. 222 * 223 * Avoid unbusying mp if the implementation indicates it has 224 * already done so. 225 */ 226 if (mp_busy) 227 vfs_unbusy(mp); 228 vfs_rel(mp); 229 return (error); 230 } 231 232 /* 233 * Used by statfs conversion routines to scale the block size up if 234 * necessary so that all of the block counts are <= 'max_size'. Note 235 * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero 236 * value of 'n'. 237 */ 238 void 239 statfs_scale_blocks(struct statfs *sf, long max_size) 240 { 241 uint64_t count; 242 int shift; 243 244 KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__)); 245 246 /* 247 * Attempt to scale the block counts to give a more accurate 248 * overview to userland of the ratio of free space to used 249 * space. To do this, find the largest block count and compute 250 * a divisor that lets it fit into a signed integer <= max_size. 251 */ 252 if (sf->f_bavail < 0) 253 count = -sf->f_bavail; 254 else 255 count = sf->f_bavail; 256 count = MAX(sf->f_blocks, MAX(sf->f_bfree, count)); 257 if (count <= max_size) 258 return; 259 260 count >>= flsl(max_size); 261 shift = 0; 262 while (count > 0) { 263 shift++; 264 count >>=1; 265 } 266 267 sf->f_bsize <<= shift; 268 sf->f_blocks >>= shift; 269 sf->f_bfree >>= shift; 270 sf->f_bavail >>= shift; 271 } 272 273 static int 274 kern_do_statfs(struct thread *td, struct mount *mp, struct statfs *buf) 275 { 276 int error; 277 278 if (mp == NULL) 279 return (EBADF); 280 error = vfs_busy(mp, 0); 281 vfs_rel(mp); 282 if (error != 0) 283 return (error); 284 #ifdef MAC 285 error = mac_mount_check_stat(td->td_ucred, mp); 286 if (error != 0) 287 goto out; 288 #endif 289 error = VFS_STATFS(mp, buf); 290 if (error != 0) 291 goto out; 292 if (priv_check_cred_vfs_generation(td->td_ucred)) { 293 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 294 prison_enforce_statfs(td->td_ucred, mp, buf); 295 } 296 out: 297 vfs_unbusy(mp); 298 return (error); 299 } 300 301 /* 302 * Get filesystem statistics. 303 */ 304 #ifndef _SYS_SYSPROTO_H_ 305 struct statfs_args { 306 char *path; 307 struct statfs *buf; 308 }; 309 #endif 310 int 311 sys_statfs(struct thread *td, struct statfs_args *uap) 312 { 313 struct statfs *sfp; 314 int error; 315 316 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 317 error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); 318 if (error == 0) 319 error = copyout(sfp, uap->buf, sizeof(struct statfs)); 320 free(sfp, M_STATFS); 321 return (error); 322 } 323 324 int 325 kern_statfs(struct thread *td, const char *path, enum uio_seg pathseg, 326 struct statfs *buf) 327 { 328 struct mount *mp; 329 struct nameidata nd; 330 int error; 331 332 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path); 333 error = namei(&nd); 334 if (error != 0) 335 return (error); 336 NDFREE_PNBUF(&nd); 337 mp = vfs_ref_from_vp(nd.ni_vp); 338 vrele(nd.ni_vp); 339 return (kern_do_statfs(td, mp, buf)); 340 } 341 342 /* 343 * Get filesystem statistics. 344 */ 345 #ifndef _SYS_SYSPROTO_H_ 346 struct fstatfs_args { 347 int fd; 348 struct statfs *buf; 349 }; 350 #endif 351 int 352 sys_fstatfs(struct thread *td, struct fstatfs_args *uap) 353 { 354 struct statfs *sfp; 355 int error; 356 357 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 358 error = kern_fstatfs(td, uap->fd, sfp); 359 if (error == 0) 360 error = copyout(sfp, uap->buf, sizeof(struct statfs)); 361 free(sfp, M_STATFS); 362 return (error); 363 } 364 365 int 366 kern_fstatfs(struct thread *td, int fd, struct statfs *buf) 367 { 368 struct file *fp; 369 struct mount *mp; 370 struct vnode *vp; 371 int error; 372 373 AUDIT_ARG_FD(fd); 374 error = getvnode_path(td, fd, &cap_fstatfs_rights, &fp); 375 if (error != 0) 376 return (error); 377 vp = fp->f_vnode; 378 #ifdef AUDIT 379 if (AUDITING_TD(td)) { 380 vn_lock(vp, LK_SHARED | LK_RETRY); 381 AUDIT_ARG_VNODE1(vp); 382 VOP_UNLOCK(vp); 383 } 384 #endif 385 mp = vfs_ref_from_vp(vp); 386 fdrop(fp, td); 387 return (kern_do_statfs(td, mp, buf)); 388 } 389 390 /* 391 * Get statistics on all filesystems. 392 */ 393 #ifndef _SYS_SYSPROTO_H_ 394 struct getfsstat_args { 395 struct statfs *buf; 396 long bufsize; 397 int mode; 398 }; 399 #endif 400 int 401 sys_getfsstat(struct thread *td, struct getfsstat_args *uap) 402 { 403 size_t count; 404 int error; 405 406 if (uap->bufsize < 0 || uap->bufsize > SIZE_MAX) 407 return (EINVAL); 408 error = kern_getfsstat(td, &uap->buf, uap->bufsize, &count, 409 UIO_USERSPACE, uap->mode); 410 if (error == 0) 411 td->td_retval[0] = count; 412 return (error); 413 } 414 415 /* 416 * If (bufsize > 0 && bufseg == UIO_SYSSPACE) 417 * The caller is responsible for freeing memory which will be allocated 418 * in '*buf'. 419 */ 420 int 421 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize, 422 size_t *countp, enum uio_seg bufseg, int mode) 423 { 424 struct mount *mp, *nmp; 425 struct statfs *sfsp, *sp, *sptmp, *tofree; 426 size_t count, maxcount; 427 int error; 428 429 switch (mode) { 430 case MNT_WAIT: 431 case MNT_NOWAIT: 432 break; 433 default: 434 if (bufseg == UIO_SYSSPACE) 435 *buf = NULL; 436 return (EINVAL); 437 } 438 restart: 439 maxcount = bufsize / sizeof(struct statfs); 440 if (bufsize == 0) { 441 sfsp = NULL; 442 tofree = NULL; 443 } else if (bufseg == UIO_USERSPACE) { 444 sfsp = *buf; 445 tofree = NULL; 446 } else /* if (bufseg == UIO_SYSSPACE) */ { 447 count = 0; 448 mtx_lock(&mountlist_mtx); 449 TAILQ_FOREACH(mp, &mountlist, mnt_list) { 450 count++; 451 } 452 mtx_unlock(&mountlist_mtx); 453 if (maxcount > count) 454 maxcount = count; 455 tofree = sfsp = *buf = malloc(maxcount * sizeof(struct statfs), 456 M_STATFS, M_WAITOK); 457 } 458 459 count = 0; 460 461 /* 462 * If there is no target buffer they only want the count. 463 * 464 * This could be TAILQ_FOREACH but it is open-coded to match the original 465 * code below. 466 */ 467 if (sfsp == NULL) { 468 mtx_lock(&mountlist_mtx); 469 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 470 if (prison_canseemount(td->td_ucred, mp) != 0) { 471 nmp = TAILQ_NEXT(mp, mnt_list); 472 continue; 473 } 474 #ifdef MAC 475 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 476 nmp = TAILQ_NEXT(mp, mnt_list); 477 continue; 478 } 479 #endif 480 count++; 481 nmp = TAILQ_NEXT(mp, mnt_list); 482 } 483 mtx_unlock(&mountlist_mtx); 484 *countp = count; 485 return (0); 486 } 487 488 /* 489 * They want the entire thing. 490 * 491 * Short-circuit the corner case of no room for anything, avoids 492 * relocking below. 493 */ 494 if (maxcount < 1) { 495 goto out; 496 } 497 498 mtx_lock(&mountlist_mtx); 499 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 500 if (prison_canseemount(td->td_ucred, mp) != 0) { 501 nmp = TAILQ_NEXT(mp, mnt_list); 502 continue; 503 } 504 #ifdef MAC 505 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 506 nmp = TAILQ_NEXT(mp, mnt_list); 507 continue; 508 } 509 #endif 510 if (mode == MNT_WAIT) { 511 if (vfs_busy(mp, MBF_MNTLSTLOCK) != 0) { 512 /* 513 * If vfs_busy() failed, and MBF_NOWAIT 514 * wasn't passed, then the mp is gone. 515 * Furthermore, because of MBF_MNTLSTLOCK, 516 * the mountlist_mtx was dropped. We have 517 * no other choice than to start over. 518 */ 519 mtx_unlock(&mountlist_mtx); 520 free(tofree, M_STATFS); 521 goto restart; 522 } 523 } else { 524 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK) != 0) { 525 nmp = TAILQ_NEXT(mp, mnt_list); 526 continue; 527 } 528 } 529 sp = &mp->mnt_stat; 530 /* 531 * If MNT_NOWAIT is specified, do not refresh 532 * the fsstat cache. 533 */ 534 if (mode != MNT_NOWAIT) { 535 error = VFS_STATFS(mp, sp); 536 if (error != 0) { 537 mtx_lock(&mountlist_mtx); 538 nmp = TAILQ_NEXT(mp, mnt_list); 539 vfs_unbusy(mp); 540 continue; 541 } 542 } 543 if (priv_check_cred_vfs_generation(td->td_ucred)) { 544 sptmp = malloc(sizeof(struct statfs), M_STATFS, 545 M_WAITOK); 546 *sptmp = *sp; 547 sptmp->f_fsid.val[0] = sptmp->f_fsid.val[1] = 0; 548 prison_enforce_statfs(td->td_ucred, mp, sptmp); 549 sp = sptmp; 550 } else 551 sptmp = NULL; 552 if (bufseg == UIO_SYSSPACE) { 553 bcopy(sp, sfsp, sizeof(*sp)); 554 free(sptmp, M_STATFS); 555 } else /* if (bufseg == UIO_USERSPACE) */ { 556 error = copyout(sp, sfsp, sizeof(*sp)); 557 free(sptmp, M_STATFS); 558 if (error != 0) { 559 vfs_unbusy(mp); 560 return (error); 561 } 562 } 563 sfsp++; 564 count++; 565 566 if (count == maxcount) { 567 vfs_unbusy(mp); 568 goto out; 569 } 570 571 mtx_lock(&mountlist_mtx); 572 nmp = TAILQ_NEXT(mp, mnt_list); 573 vfs_unbusy(mp); 574 } 575 mtx_unlock(&mountlist_mtx); 576 out: 577 *countp = count; 578 return (0); 579 } 580 581 #ifdef COMPAT_FREEBSD4 582 /* 583 * Get old format filesystem statistics. 584 */ 585 static void freebsd4_cvtstatfs(struct statfs *, struct ostatfs *); 586 587 #ifndef _SYS_SYSPROTO_H_ 588 struct freebsd4_statfs_args { 589 char *path; 590 struct ostatfs *buf; 591 }; 592 #endif 593 int 594 freebsd4_statfs(struct thread *td, struct freebsd4_statfs_args *uap) 595 { 596 struct ostatfs osb; 597 struct statfs *sfp; 598 int error; 599 600 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 601 error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); 602 if (error == 0) { 603 freebsd4_cvtstatfs(sfp, &osb); 604 error = copyout(&osb, uap->buf, sizeof(osb)); 605 } 606 free(sfp, M_STATFS); 607 return (error); 608 } 609 610 /* 611 * Get filesystem statistics. 612 */ 613 #ifndef _SYS_SYSPROTO_H_ 614 struct freebsd4_fstatfs_args { 615 int fd; 616 struct ostatfs *buf; 617 }; 618 #endif 619 int 620 freebsd4_fstatfs(struct thread *td, struct freebsd4_fstatfs_args *uap) 621 { 622 struct ostatfs osb; 623 struct statfs *sfp; 624 int error; 625 626 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 627 error = kern_fstatfs(td, uap->fd, sfp); 628 if (error == 0) { 629 freebsd4_cvtstatfs(sfp, &osb); 630 error = copyout(&osb, uap->buf, sizeof(osb)); 631 } 632 free(sfp, M_STATFS); 633 return (error); 634 } 635 636 /* 637 * Get statistics on all filesystems. 638 */ 639 #ifndef _SYS_SYSPROTO_H_ 640 struct freebsd4_getfsstat_args { 641 struct ostatfs *buf; 642 long bufsize; 643 int mode; 644 }; 645 #endif 646 int 647 freebsd4_getfsstat(struct thread *td, struct freebsd4_getfsstat_args *uap) 648 { 649 struct statfs *buf, *sp; 650 struct ostatfs osb; 651 size_t count, size; 652 int error; 653 654 if (uap->bufsize < 0) 655 return (EINVAL); 656 count = uap->bufsize / sizeof(struct ostatfs); 657 if (count > SIZE_MAX / sizeof(struct statfs)) 658 return (EINVAL); 659 size = count * sizeof(struct statfs); 660 error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, 661 uap->mode); 662 if (error == 0) 663 td->td_retval[0] = count; 664 if (size != 0) { 665 sp = buf; 666 while (count != 0 && error == 0) { 667 freebsd4_cvtstatfs(sp, &osb); 668 error = copyout(&osb, uap->buf, sizeof(osb)); 669 sp++; 670 uap->buf++; 671 count--; 672 } 673 free(buf, M_STATFS); 674 } 675 return (error); 676 } 677 678 /* 679 * Implement fstatfs() for (NFS) file handles. 680 */ 681 #ifndef _SYS_SYSPROTO_H_ 682 struct freebsd4_fhstatfs_args { 683 struct fhandle *u_fhp; 684 struct ostatfs *buf; 685 }; 686 #endif 687 int 688 freebsd4_fhstatfs(struct thread *td, struct freebsd4_fhstatfs_args *uap) 689 { 690 struct ostatfs osb; 691 struct statfs *sfp; 692 fhandle_t fh; 693 int error; 694 695 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 696 if (error != 0) 697 return (error); 698 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 699 error = kern_fhstatfs(td, fh, sfp); 700 if (error == 0) { 701 freebsd4_cvtstatfs(sfp, &osb); 702 error = copyout(&osb, uap->buf, sizeof(osb)); 703 } 704 free(sfp, M_STATFS); 705 return (error); 706 } 707 708 /* 709 * Convert a new format statfs structure to an old format statfs structure. 710 */ 711 static void 712 freebsd4_cvtstatfs(struct statfs *nsp, struct ostatfs *osp) 713 { 714 715 statfs_scale_blocks(nsp, LONG_MAX); 716 bzero(osp, sizeof(*osp)); 717 osp->f_bsize = nsp->f_bsize; 718 osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX); 719 osp->f_blocks = nsp->f_blocks; 720 osp->f_bfree = nsp->f_bfree; 721 osp->f_bavail = nsp->f_bavail; 722 osp->f_files = MIN(nsp->f_files, LONG_MAX); 723 osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX); 724 osp->f_owner = nsp->f_owner; 725 osp->f_type = nsp->f_type; 726 osp->f_flags = nsp->f_flags; 727 osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX); 728 osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX); 729 osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX); 730 osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX); 731 strlcpy(osp->f_fstypename, nsp->f_fstypename, 732 MIN(MFSNAMELEN, OMFSNAMELEN)); 733 strlcpy(osp->f_mntonname, nsp->f_mntonname, 734 MIN(MNAMELEN, OMNAMELEN)); 735 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 736 MIN(MNAMELEN, OMNAMELEN)); 737 osp->f_fsid = nsp->f_fsid; 738 } 739 #endif /* COMPAT_FREEBSD4 */ 740 741 #if defined(COMPAT_FREEBSD11) 742 /* 743 * Get old format filesystem statistics. 744 */ 745 static void freebsd11_cvtstatfs(struct statfs *, struct freebsd11_statfs *); 746 747 int 748 freebsd11_statfs(struct thread *td, struct freebsd11_statfs_args *uap) 749 { 750 struct freebsd11_statfs osb; 751 struct statfs *sfp; 752 int error; 753 754 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 755 error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); 756 if (error == 0) { 757 freebsd11_cvtstatfs(sfp, &osb); 758 error = copyout(&osb, uap->buf, sizeof(osb)); 759 } 760 free(sfp, M_STATFS); 761 return (error); 762 } 763 764 /* 765 * Get filesystem statistics. 766 */ 767 int 768 freebsd11_fstatfs(struct thread *td, struct freebsd11_fstatfs_args *uap) 769 { 770 struct freebsd11_statfs osb; 771 struct statfs *sfp; 772 int error; 773 774 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 775 error = kern_fstatfs(td, uap->fd, sfp); 776 if (error == 0) { 777 freebsd11_cvtstatfs(sfp, &osb); 778 error = copyout(&osb, uap->buf, sizeof(osb)); 779 } 780 free(sfp, M_STATFS); 781 return (error); 782 } 783 784 /* 785 * Get statistics on all filesystems. 786 */ 787 int 788 freebsd11_getfsstat(struct thread *td, struct freebsd11_getfsstat_args *uap) 789 { 790 return (kern_freebsd11_getfsstat(td, uap->buf, uap->bufsize, uap->mode)); 791 } 792 793 int 794 kern_freebsd11_getfsstat(struct thread *td, struct freebsd11_statfs * ubuf, 795 long bufsize, int mode) 796 { 797 struct freebsd11_statfs osb; 798 struct statfs *buf, *sp; 799 size_t count, size; 800 int error; 801 802 if (bufsize < 0) 803 return (EINVAL); 804 805 count = bufsize / sizeof(struct ostatfs); 806 size = count * sizeof(struct statfs); 807 error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, mode); 808 if (error == 0) 809 td->td_retval[0] = count; 810 if (size > 0) { 811 sp = buf; 812 while (count > 0 && error == 0) { 813 freebsd11_cvtstatfs(sp, &osb); 814 error = copyout(&osb, ubuf, sizeof(osb)); 815 sp++; 816 ubuf++; 817 count--; 818 } 819 free(buf, M_STATFS); 820 } 821 return (error); 822 } 823 824 /* 825 * Implement fstatfs() for (NFS) file handles. 826 */ 827 int 828 freebsd11_fhstatfs(struct thread *td, struct freebsd11_fhstatfs_args *uap) 829 { 830 struct freebsd11_statfs osb; 831 struct statfs *sfp; 832 fhandle_t fh; 833 int error; 834 835 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 836 if (error) 837 return (error); 838 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 839 error = kern_fhstatfs(td, fh, sfp); 840 if (error == 0) { 841 freebsd11_cvtstatfs(sfp, &osb); 842 error = copyout(&osb, uap->buf, sizeof(osb)); 843 } 844 free(sfp, M_STATFS); 845 return (error); 846 } 847 848 /* 849 * Convert a new format statfs structure to an old format statfs structure. 850 */ 851 static void 852 freebsd11_cvtstatfs(struct statfs *nsp, struct freebsd11_statfs *osp) 853 { 854 855 bzero(osp, sizeof(*osp)); 856 osp->f_version = FREEBSD11_STATFS_VERSION; 857 osp->f_type = nsp->f_type; 858 osp->f_flags = nsp->f_flags; 859 osp->f_bsize = nsp->f_bsize; 860 osp->f_iosize = nsp->f_iosize; 861 osp->f_blocks = nsp->f_blocks; 862 osp->f_bfree = nsp->f_bfree; 863 osp->f_bavail = nsp->f_bavail; 864 osp->f_files = nsp->f_files; 865 osp->f_ffree = nsp->f_ffree; 866 osp->f_syncwrites = nsp->f_syncwrites; 867 osp->f_asyncwrites = nsp->f_asyncwrites; 868 osp->f_syncreads = nsp->f_syncreads; 869 osp->f_asyncreads = nsp->f_asyncreads; 870 osp->f_namemax = nsp->f_namemax; 871 osp->f_owner = nsp->f_owner; 872 osp->f_fsid = nsp->f_fsid; 873 strlcpy(osp->f_fstypename, nsp->f_fstypename, 874 MIN(MFSNAMELEN, sizeof(osp->f_fstypename))); 875 strlcpy(osp->f_mntonname, nsp->f_mntonname, 876 MIN(MNAMELEN, sizeof(osp->f_mntonname))); 877 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 878 MIN(MNAMELEN, sizeof(osp->f_mntfromname))); 879 } 880 #endif /* COMPAT_FREEBSD11 */ 881 882 /* 883 * Change current working directory to a given file descriptor. 884 */ 885 #ifndef _SYS_SYSPROTO_H_ 886 struct fchdir_args { 887 int fd; 888 }; 889 #endif 890 int 891 sys_fchdir(struct thread *td, struct fchdir_args *uap) 892 { 893 struct vnode *vp, *tdp; 894 struct mount *mp; 895 struct file *fp; 896 int error; 897 898 AUDIT_ARG_FD(uap->fd); 899 error = getvnode_path(td, uap->fd, &cap_fchdir_rights, 900 &fp); 901 if (error != 0) 902 return (error); 903 vp = fp->f_vnode; 904 vrefact(vp); 905 fdrop(fp, td); 906 vn_lock(vp, LK_SHARED | LK_RETRY); 907 AUDIT_ARG_VNODE1(vp); 908 error = change_dir(vp, td); 909 while (!error && (mp = vp->v_mountedhere) != NULL) { 910 if (vfs_busy(mp, 0)) 911 continue; 912 error = VFS_ROOT(mp, LK_SHARED, &tdp); 913 vfs_unbusy(mp); 914 if (error != 0) 915 break; 916 vput(vp); 917 vp = tdp; 918 } 919 if (error != 0) { 920 vput(vp); 921 return (error); 922 } 923 VOP_UNLOCK(vp); 924 pwd_chdir(td, vp); 925 return (0); 926 } 927 928 /* 929 * Change current working directory (``.''). 930 */ 931 #ifndef _SYS_SYSPROTO_H_ 932 struct chdir_args { 933 char *path; 934 }; 935 #endif 936 int 937 sys_chdir(struct thread *td, struct chdir_args *uap) 938 { 939 940 return (kern_chdir(td, uap->path, UIO_USERSPACE)); 941 } 942 943 int 944 kern_chdir(struct thread *td, const char *path, enum uio_seg pathseg) 945 { 946 struct nameidata nd; 947 int error; 948 949 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 950 pathseg, path); 951 if ((error = namei(&nd)) != 0) 952 return (error); 953 if ((error = change_dir(nd.ni_vp, td)) != 0) { 954 vput(nd.ni_vp); 955 NDFREE_PNBUF(&nd); 956 return (error); 957 } 958 VOP_UNLOCK(nd.ni_vp); 959 NDFREE_PNBUF(&nd); 960 pwd_chdir(td, nd.ni_vp); 961 return (0); 962 } 963 964 static int unprivileged_chroot = 0; 965 SYSCTL_INT(_security_bsd, OID_AUTO, unprivileged_chroot, CTLFLAG_RW, 966 &unprivileged_chroot, 0, 967 "Unprivileged processes can use chroot(2)"); 968 969 /* 970 * Takes locked vnode, unlocks it before returning. 971 */ 972 static int 973 kern_chroot(struct thread *td, struct vnode *vp) 974 { 975 struct proc *p; 976 int error; 977 978 error = priv_check(td, PRIV_VFS_CHROOT); 979 if (error != 0) { 980 p = td->td_proc; 981 PROC_LOCK(p); 982 if (unprivileged_chroot == 0 || 983 (p->p_flag2 & P2_NO_NEW_PRIVS) == 0) { 984 PROC_UNLOCK(p); 985 goto e_vunlock; 986 } 987 PROC_UNLOCK(p); 988 } 989 990 error = change_dir(vp, td); 991 if (error != 0) 992 goto e_vunlock; 993 #ifdef MAC 994 error = mac_vnode_check_chroot(td->td_ucred, vp); 995 if (error != 0) 996 goto e_vunlock; 997 #endif 998 VOP_UNLOCK(vp); 999 error = pwd_chroot(td, vp); 1000 vrele(vp); 1001 return (error); 1002 e_vunlock: 1003 vput(vp); 1004 return (error); 1005 } 1006 1007 /* 1008 * Change notion of root (``/'') directory. 1009 */ 1010 #ifndef _SYS_SYSPROTO_H_ 1011 struct chroot_args { 1012 char *path; 1013 }; 1014 #endif 1015 int 1016 sys_chroot(struct thread *td, struct chroot_args *uap) 1017 { 1018 struct nameidata nd; 1019 int error; 1020 1021 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 1022 UIO_USERSPACE, uap->path); 1023 error = namei(&nd); 1024 if (error != 0) 1025 return (error); 1026 NDFREE_PNBUF(&nd); 1027 error = kern_chroot(td, nd.ni_vp); 1028 return (error); 1029 } 1030 1031 /* 1032 * Change notion of root directory to a given file descriptor. 1033 */ 1034 #ifndef _SYS_SYSPROTO_H_ 1035 struct fchroot_args { 1036 int fd; 1037 }; 1038 #endif 1039 int 1040 sys_fchroot(struct thread *td, struct fchroot_args *uap) 1041 { 1042 struct vnode *vp; 1043 struct file *fp; 1044 int error; 1045 1046 error = getvnode_path(td, uap->fd, &cap_fchroot_rights, &fp); 1047 if (error != 0) 1048 return (error); 1049 vp = fp->f_vnode; 1050 vrefact(vp); 1051 fdrop(fp, td); 1052 vn_lock(vp, LK_SHARED | LK_RETRY); 1053 error = kern_chroot(td, vp); 1054 return (error); 1055 } 1056 1057 /* 1058 * Common routine for chroot and chdir. Callers must provide a locked vnode 1059 * instance. 1060 */ 1061 int 1062 change_dir(struct vnode *vp, struct thread *td) 1063 { 1064 #ifdef MAC 1065 int error; 1066 #endif 1067 1068 ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked"); 1069 if (vp->v_type != VDIR) 1070 return (ENOTDIR); 1071 #ifdef MAC 1072 error = mac_vnode_check_chdir(td->td_ucred, vp); 1073 if (error != 0) 1074 return (error); 1075 #endif 1076 return (VOP_ACCESS(vp, VEXEC, td->td_ucred, td)); 1077 } 1078 1079 static __inline void 1080 flags_to_rights(int flags, cap_rights_t *rightsp) 1081 { 1082 if (flags & O_EXEC) { 1083 cap_rights_set_one(rightsp, CAP_FEXECVE); 1084 if (flags & O_PATH) 1085 return; 1086 } else { 1087 switch ((flags & O_ACCMODE)) { 1088 case O_RDONLY: 1089 cap_rights_set_one(rightsp, CAP_READ); 1090 break; 1091 case O_RDWR: 1092 cap_rights_set_one(rightsp, CAP_READ); 1093 /* FALLTHROUGH */ 1094 case O_WRONLY: 1095 cap_rights_set_one(rightsp, CAP_WRITE); 1096 if (!(flags & (O_APPEND | O_TRUNC))) 1097 cap_rights_set_one(rightsp, CAP_SEEK); 1098 break; 1099 } 1100 } 1101 1102 if (flags & O_CREAT) 1103 cap_rights_set_one(rightsp, CAP_CREATE); 1104 1105 if (flags & O_TRUNC) 1106 cap_rights_set_one(rightsp, CAP_FTRUNCATE); 1107 1108 if (flags & (O_SYNC | O_FSYNC)) 1109 cap_rights_set_one(rightsp, CAP_FSYNC); 1110 1111 if (flags & (O_EXLOCK | O_SHLOCK)) 1112 cap_rights_set_one(rightsp, CAP_FLOCK); 1113 } 1114 1115 /* 1116 * Check permissions, allocate an open file structure, and call the device 1117 * open routine if any. 1118 */ 1119 #ifndef _SYS_SYSPROTO_H_ 1120 struct open_args { 1121 char *path; 1122 int flags; 1123 int mode; 1124 }; 1125 #endif 1126 int 1127 sys_open(struct thread *td, struct open_args *uap) 1128 { 1129 1130 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1131 uap->flags, uap->mode)); 1132 } 1133 1134 #ifndef _SYS_SYSPROTO_H_ 1135 struct openat_args { 1136 int fd; 1137 char *path; 1138 int flag; 1139 int mode; 1140 }; 1141 #endif 1142 int 1143 sys_openat(struct thread *td, struct openat_args *uap) 1144 { 1145 1146 AUDIT_ARG_FD(uap->fd); 1147 return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 1148 uap->mode)); 1149 } 1150 1151 /* 1152 * Validate open(2) flags and convert access mode flags (O_RDONLY etc.) to their 1153 * in-kernel representations (FREAD etc.). 1154 */ 1155 static int 1156 openflags(int *flagsp) 1157 { 1158 int flags; 1159 1160 /* 1161 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags 1162 * may be specified. On the other hand, for O_PATH any mode 1163 * except O_EXEC is ignored. 1164 */ 1165 flags = *flagsp; 1166 if ((flags & O_PATH) != 0) { 1167 flags &= ~O_ACCMODE; 1168 } else if ((flags & O_EXEC) != 0) { 1169 if ((flags & O_ACCMODE) != 0) 1170 return (EINVAL); 1171 } else if ((flags & O_ACCMODE) == O_ACCMODE) { 1172 return (EINVAL); 1173 } else { 1174 flags = FFLAGS(flags); 1175 } 1176 *flagsp = flags; 1177 return (0); 1178 } 1179 1180 static void 1181 finit_open(struct file *fp, struct vnode *vp, int flags) 1182 { 1183 /* 1184 * Store the vnode, for any f_type. Typically, the vnode use count is 1185 * decremented by a direct call to vnops.fo_close() for files that 1186 * switched type. 1187 */ 1188 fp->f_vnode = vp; 1189 1190 /* 1191 * If the file wasn't claimed by devfs or fifofs, bind it to the normal 1192 * vnode operations here. 1193 */ 1194 if (fp->f_ops == &badfileops) { 1195 KASSERT(vp->v_type != VFIFO || (flags & O_PATH) != 0, 1196 ("Unexpected fifo fp %p vp %p", fp, vp)); 1197 if ((flags & O_PATH) != 0) { 1198 finit(fp, (flags & FMASK) | (fp->f_flag & FKQALLOWED), 1199 DTYPE_VNODE, NULL, &path_fileops); 1200 } else { 1201 finit_vnode(fp, flags, NULL, &vnops); 1202 } 1203 } 1204 } 1205 1206 /* 1207 * If fpp != NULL, opened file is not installed into the file 1208 * descriptor table, instead it is returned in *fpp. This is 1209 * incompatible with fdopen(), in which case we return EINVAL. 1210 */ 1211 static int 1212 openatfp(struct thread *td, int dirfd, const char *path, 1213 enum uio_seg pathseg, int flags, int mode, struct file **fpp) 1214 { 1215 struct proc *p; 1216 struct filedesc *fdp; 1217 struct pwddesc *pdp; 1218 struct file *fp; 1219 struct vnode *vp; 1220 struct filecaps *fcaps; 1221 struct nameidata nd; 1222 cap_rights_t rights; 1223 int cmode, error, indx; 1224 1225 indx = -1; 1226 p = td->td_proc; 1227 fdp = p->p_fd; 1228 pdp = p->p_pd; 1229 1230 AUDIT_ARG_FFLAGS(flags); 1231 AUDIT_ARG_MODE(mode); 1232 cap_rights_init_one(&rights, CAP_LOOKUP); 1233 flags_to_rights(flags, &rights); 1234 1235 error = openflags(&flags); 1236 if (error != 0) 1237 return (error); 1238 1239 /* 1240 * Allocate a file structure. The descriptor to reference it 1241 * is allocated and used by finstall_refed() below. 1242 */ 1243 error = falloc_noinstall(td, &fp); 1244 if (error != 0) 1245 return (error); 1246 /* Set the flags early so the finit in devfs can pick them up. */ 1247 fp->f_flag = flags & FMASK; 1248 cmode = ((mode & ~pdp->pd_cmask) & ALLPERMS) & ~S_ISTXT; 1249 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1 | WANTIOCTLCAPS, 1250 pathseg, path, dirfd, &rights); 1251 td->td_dupfd = -1; /* XXX check for fdopen */ 1252 error = vn_open_cred(&nd, &flags, cmode, VN_OPEN_WANTIOCTLCAPS, 1253 td->td_ucred, fp); 1254 if (error != 0) { 1255 /* 1256 * If the vn_open replaced the method vector, something 1257 * wonderous happened deep below and we just pass it up 1258 * pretending we know what we do. 1259 */ 1260 if (error == ENXIO && fp->f_ops != &badfileops) { 1261 MPASS((flags & O_PATH) == 0); 1262 goto success; 1263 } 1264 1265 /* 1266 * Handle special fdopen() case. bleh. 1267 * 1268 * Don't do this for relative (capability) lookups; we don't 1269 * understand exactly what would happen, and we don't think 1270 * that it ever should. 1271 */ 1272 if ((nd.ni_resflags & NIRES_STRICTREL) == 0 && 1273 (error == ENODEV || error == ENXIO) && 1274 td->td_dupfd >= 0) { 1275 MPASS(fpp == NULL); 1276 error = dupfdopen(td, fdp, td->td_dupfd, flags, error, 1277 &indx); 1278 if (error == 0) 1279 goto success; 1280 } 1281 1282 goto bad; 1283 } 1284 td->td_dupfd = 0; 1285 NDFREE_PNBUF(&nd); 1286 vp = nd.ni_vp; 1287 1288 finit_open(fp, vp, flags); 1289 VOP_UNLOCK(vp); 1290 if (flags & O_TRUNC) { 1291 error = fo_truncate(fp, 0, td->td_ucred, td); 1292 if (error != 0) 1293 goto bad; 1294 } 1295 success: 1296 if (fpp != NULL) { 1297 MPASS(error == 0); 1298 NDFREE_IOCTLCAPS(&nd); 1299 *fpp = fp; 1300 return (0); 1301 } 1302 1303 /* 1304 * If we haven't already installed the FD (for dupfdopen), do so now. 1305 */ 1306 if (indx == -1) { 1307 #ifdef CAPABILITIES 1308 if ((nd.ni_resflags & NIRES_STRICTREL) != 0) 1309 fcaps = &nd.ni_filecaps; 1310 else 1311 #endif 1312 fcaps = NULL; 1313 error = finstall_refed(td, fp, &indx, flags, fcaps); 1314 /* On success finstall_refed() consumes fcaps. */ 1315 if (error != 0) { 1316 goto bad; 1317 } 1318 } else { 1319 NDFREE_IOCTLCAPS(&nd); 1320 falloc_abort(td, fp); 1321 } 1322 1323 td->td_retval[0] = indx; 1324 return (0); 1325 bad: 1326 KASSERT(indx == -1, ("indx=%d, should be -1", indx)); 1327 NDFREE_IOCTLCAPS(&nd); 1328 falloc_abort(td, fp); 1329 return (error); 1330 } 1331 1332 int 1333 kern_openat(struct thread *td, int dirfd, const char *path, 1334 enum uio_seg pathseg, int flags, int mode) 1335 { 1336 return (openatfp(td, dirfd, path, pathseg, flags, mode, NULL)); 1337 } 1338 1339 int 1340 kern_openatfp(struct thread *td, int dirfd, const char *path, 1341 enum uio_seg pathseg, int flags, int mode, struct file **fpp) 1342 { 1343 int error, old_dupfd; 1344 1345 old_dupfd = td->td_dupfd; 1346 td->td_dupfd = -1; 1347 error = openatfp(td, dirfd, path, pathseg, flags, mode, fpp); 1348 td->td_dupfd = old_dupfd; 1349 return (error); 1350 } 1351 1352 #ifdef COMPAT_43 1353 /* 1354 * Create a file. 1355 */ 1356 #ifndef _SYS_SYSPROTO_H_ 1357 struct ocreat_args { 1358 char *path; 1359 int mode; 1360 }; 1361 #endif 1362 int 1363 ocreat(struct thread *td, struct ocreat_args *uap) 1364 { 1365 1366 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1367 O_WRONLY | O_CREAT | O_TRUNC, uap->mode)); 1368 } 1369 #endif /* COMPAT_43 */ 1370 1371 /* 1372 * Create a special file. 1373 */ 1374 #ifndef _SYS_SYSPROTO_H_ 1375 struct mknodat_args { 1376 int fd; 1377 char *path; 1378 mode_t mode; 1379 dev_t dev; 1380 }; 1381 #endif 1382 int 1383 sys_mknodat(struct thread *td, struct mknodat_args *uap) 1384 { 1385 1386 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1387 uap->dev)); 1388 } 1389 1390 #if defined(COMPAT_FREEBSD11) 1391 int 1392 freebsd11_mknod(struct thread *td, 1393 struct freebsd11_mknod_args *uap) 1394 { 1395 1396 return (kern_mknodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1397 uap->mode, uap->dev)); 1398 } 1399 1400 int 1401 freebsd11_mknodat(struct thread *td, 1402 struct freebsd11_mknodat_args *uap) 1403 { 1404 1405 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1406 uap->dev)); 1407 } 1408 #endif /* COMPAT_FREEBSD11 */ 1409 1410 int 1411 kern_mknodat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, 1412 int mode, dev_t dev) 1413 { 1414 struct vnode *vp; 1415 struct mount *mp; 1416 struct vattr vattr; 1417 struct nameidata nd; 1418 int error, whiteout = 0; 1419 1420 AUDIT_ARG_MODE(mode); 1421 AUDIT_ARG_DEV(dev); 1422 switch (mode & S_IFMT) { 1423 case S_IFCHR: 1424 case S_IFBLK: 1425 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 1426 if (error == 0 && dev == VNOVAL) 1427 error = EINVAL; 1428 break; 1429 case S_IFWHT: 1430 error = priv_check(td, PRIV_VFS_MKNOD_WHT); 1431 break; 1432 case S_IFIFO: 1433 if (dev == 0) 1434 return (kern_mkfifoat(td, fd, path, pathseg, mode)); 1435 /* FALLTHROUGH */ 1436 default: 1437 error = EINVAL; 1438 break; 1439 } 1440 if (error != 0) 1441 return (error); 1442 NDPREINIT(&nd); 1443 restart: 1444 bwillwrite(); 1445 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | AUDITVNODE1 | NOCACHE, 1446 pathseg, path, fd, &cap_mknodat_rights); 1447 if ((error = namei(&nd)) != 0) 1448 return (error); 1449 vp = nd.ni_vp; 1450 if (vp != NULL) { 1451 NDFREE_PNBUF(&nd); 1452 if (vp == nd.ni_dvp) 1453 vrele(nd.ni_dvp); 1454 else 1455 vput(nd.ni_dvp); 1456 vrele(vp); 1457 return (EEXIST); 1458 } else if ((vn_irflag_read(nd.ni_dvp) & VIRF_NAMEDDIR) != 0) { 1459 NDFREE_PNBUF(&nd); 1460 vput(nd.ni_dvp); 1461 return (EINVAL); 1462 } else { 1463 VATTR_NULL(&vattr); 1464 vattr.va_mode = (mode & ALLPERMS) & 1465 ~td->td_proc->p_pd->pd_cmask; 1466 vattr.va_rdev = dev; 1467 whiteout = 0; 1468 1469 switch (mode & S_IFMT) { 1470 case S_IFCHR: 1471 vattr.va_type = VCHR; 1472 break; 1473 case S_IFBLK: 1474 vattr.va_type = VBLK; 1475 break; 1476 case S_IFWHT: 1477 whiteout = 1; 1478 break; 1479 default: 1480 panic("kern_mknod: invalid mode"); 1481 } 1482 } 1483 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1484 NDFREE_PNBUF(&nd); 1485 vput(nd.ni_dvp); 1486 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH)) != 0) 1487 return (error); 1488 goto restart; 1489 } 1490 #ifdef MAC 1491 if (error == 0 && !whiteout) 1492 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, 1493 &nd.ni_cnd, &vattr); 1494 #endif 1495 if (error == 0) { 1496 if (whiteout) 1497 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1498 else { 1499 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1500 &nd.ni_cnd, &vattr); 1501 } 1502 } 1503 VOP_VPUT_PAIR(nd.ni_dvp, error == 0 && !whiteout ? &nd.ni_vp : NULL, 1504 true); 1505 vn_finished_write(mp); 1506 NDFREE_PNBUF(&nd); 1507 if (error == ERELOOKUP) 1508 goto restart; 1509 return (error); 1510 } 1511 1512 /* 1513 * Create a named pipe. 1514 */ 1515 #ifndef _SYS_SYSPROTO_H_ 1516 struct mkfifo_args { 1517 char *path; 1518 int mode; 1519 }; 1520 #endif 1521 int 1522 sys_mkfifo(struct thread *td, struct mkfifo_args *uap) 1523 { 1524 1525 return (kern_mkfifoat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1526 uap->mode)); 1527 } 1528 1529 #ifndef _SYS_SYSPROTO_H_ 1530 struct mkfifoat_args { 1531 int fd; 1532 char *path; 1533 mode_t mode; 1534 }; 1535 #endif 1536 int 1537 sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap) 1538 { 1539 1540 return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE, 1541 uap->mode)); 1542 } 1543 1544 int 1545 kern_mkfifoat(struct thread *td, int fd, const char *path, 1546 enum uio_seg pathseg, int mode) 1547 { 1548 struct mount *mp; 1549 struct vattr vattr; 1550 struct nameidata nd; 1551 int error; 1552 1553 AUDIT_ARG_MODE(mode); 1554 NDPREINIT(&nd); 1555 restart: 1556 bwillwrite(); 1557 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | AUDITVNODE1 | NOCACHE, 1558 pathseg, path, fd, &cap_mkfifoat_rights); 1559 if ((error = namei(&nd)) != 0) 1560 return (error); 1561 if (nd.ni_vp != NULL) { 1562 NDFREE_PNBUF(&nd); 1563 if (nd.ni_vp == nd.ni_dvp) 1564 vrele(nd.ni_dvp); 1565 else 1566 vput(nd.ni_dvp); 1567 vrele(nd.ni_vp); 1568 return (EEXIST); 1569 } 1570 if ((vn_irflag_read(nd.ni_dvp) & VIRF_NAMEDDIR) != 0) { 1571 NDFREE_PNBUF(&nd); 1572 vput(nd.ni_dvp); 1573 return (EINVAL); 1574 } 1575 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1576 NDFREE_PNBUF(&nd); 1577 vput(nd.ni_dvp); 1578 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH)) != 0) 1579 return (error); 1580 goto restart; 1581 } 1582 VATTR_NULL(&vattr); 1583 vattr.va_type = VFIFO; 1584 vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_pd->pd_cmask; 1585 #ifdef MAC 1586 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1587 &vattr); 1588 if (error != 0) 1589 goto out; 1590 #endif 1591 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 1592 #ifdef MAC 1593 out: 1594 #endif 1595 VOP_VPUT_PAIR(nd.ni_dvp, error == 0 ? &nd.ni_vp : NULL, true); 1596 vn_finished_write(mp); 1597 NDFREE_PNBUF(&nd); 1598 if (error == ERELOOKUP) 1599 goto restart; 1600 return (error); 1601 } 1602 1603 /* 1604 * Make a hard file link. 1605 */ 1606 #ifndef _SYS_SYSPROTO_H_ 1607 struct link_args { 1608 char *path; 1609 char *link; 1610 }; 1611 #endif 1612 int 1613 sys_link(struct thread *td, struct link_args *uap) 1614 { 1615 1616 return (kern_linkat(td, AT_FDCWD, AT_FDCWD, uap->path, uap->link, 1617 UIO_USERSPACE, AT_SYMLINK_FOLLOW)); 1618 } 1619 1620 #ifndef _SYS_SYSPROTO_H_ 1621 struct linkat_args { 1622 int fd1; 1623 char *path1; 1624 int fd2; 1625 char *path2; 1626 int flag; 1627 }; 1628 #endif 1629 int 1630 sys_linkat(struct thread *td, struct linkat_args *uap) 1631 { 1632 1633 return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2, 1634 UIO_USERSPACE, uap->flag)); 1635 } 1636 1637 int hardlink_check_uid = 0; 1638 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 1639 &hardlink_check_uid, 0, 1640 "Unprivileged processes cannot create hard links to files owned by other " 1641 "users"); 1642 static int hardlink_check_gid = 0; 1643 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 1644 &hardlink_check_gid, 0, 1645 "Unprivileged processes cannot create hard links to files owned by other " 1646 "groups"); 1647 1648 static int 1649 can_hardlink(struct vnode *vp, struct ucred *cred) 1650 { 1651 struct vattr va; 1652 int error; 1653 1654 if (!hardlink_check_uid && !hardlink_check_gid) 1655 return (0); 1656 1657 error = VOP_GETATTR(vp, &va, cred); 1658 if (error != 0) 1659 return (error); 1660 1661 if (hardlink_check_uid && cred->cr_uid != va.va_uid) { 1662 error = priv_check_cred(cred, PRIV_VFS_LINK); 1663 if (error != 0) 1664 return (error); 1665 } 1666 1667 if (hardlink_check_gid && !groupmember(va.va_gid, cred)) { 1668 error = priv_check_cred(cred, PRIV_VFS_LINK); 1669 if (error != 0) 1670 return (error); 1671 } 1672 1673 return (0); 1674 } 1675 1676 int 1677 kern_linkat(struct thread *td, int fd1, int fd2, const char *path1, 1678 const char *path2, enum uio_seg segflag, int flag) 1679 { 1680 struct nameidata nd; 1681 int error; 1682 1683 if ((flag & ~(AT_SYMLINK_FOLLOW | AT_RESOLVE_BENEATH | 1684 AT_EMPTY_PATH)) != 0) 1685 return (EINVAL); 1686 1687 NDPREINIT(&nd); 1688 do { 1689 bwillwrite(); 1690 NDINIT_ATRIGHTS(&nd, LOOKUP, AUDITVNODE1 | at2cnpflags(flag, 1691 AT_SYMLINK_FOLLOW | AT_RESOLVE_BENEATH | AT_EMPTY_PATH), 1692 segflag, path1, fd1, &cap_linkat_source_rights); 1693 if ((error = namei(&nd)) != 0) 1694 return (error); 1695 NDFREE_PNBUF(&nd); 1696 if ((nd.ni_resflags & NIRES_EMPTYPATH) != 0) { 1697 error = priv_check(td, PRIV_VFS_FHOPEN); 1698 if (error != 0) { 1699 vrele(nd.ni_vp); 1700 return (error); 1701 } 1702 } 1703 error = kern_linkat_vp(td, nd.ni_vp, fd2, path2, segflag); 1704 } while (error == EAGAIN || error == ERELOOKUP); 1705 return (error); 1706 } 1707 1708 static int 1709 kern_linkat_vp(struct thread *td, struct vnode *vp, int fd, const char *path, 1710 enum uio_seg segflag) 1711 { 1712 struct nameidata nd; 1713 struct mount *mp; 1714 int error; 1715 1716 if (vp->v_type == VDIR) { 1717 vrele(vp); 1718 return (EPERM); /* POSIX */ 1719 } 1720 if ((vn_irflag_read(vp) & (VIRF_NAMEDDIR | VIRF_NAMEDATTR)) != 0) { 1721 vrele(vp); 1722 return (EINVAL); 1723 } 1724 NDINIT_ATRIGHTS(&nd, CREATE, 1725 LOCKPARENT | AUDITVNODE2 | NOCACHE, segflag, path, fd, 1726 &cap_linkat_target_rights); 1727 if ((error = namei(&nd)) == 0) { 1728 if (nd.ni_vp != NULL) { 1729 NDFREE_PNBUF(&nd); 1730 if (nd.ni_dvp == nd.ni_vp) 1731 vrele(nd.ni_dvp); 1732 else 1733 vput(nd.ni_dvp); 1734 vrele(nd.ni_vp); 1735 vrele(vp); 1736 return (EEXIST); 1737 } else if (nd.ni_dvp->v_mount != vp->v_mount) { 1738 /* 1739 * Cross-device link. No need to recheck 1740 * vp->v_type, since it cannot change, except 1741 * to VBAD. 1742 */ 1743 NDFREE_PNBUF(&nd); 1744 vput(nd.ni_dvp); 1745 vrele(vp); 1746 return (EXDEV); 1747 } else if (vn_lock(vp, LK_EXCLUSIVE) == 0) { 1748 error = can_hardlink(vp, td->td_ucred); 1749 #ifdef MAC 1750 if (error == 0) 1751 error = mac_vnode_check_link(td->td_ucred, 1752 nd.ni_dvp, vp, &nd.ni_cnd); 1753 #endif 1754 if (error != 0) { 1755 vput(vp); 1756 vput(nd.ni_dvp); 1757 NDFREE_PNBUF(&nd); 1758 return (error); 1759 } 1760 error = vn_start_write(vp, &mp, V_NOWAIT); 1761 if (error != 0) { 1762 vput(vp); 1763 vput(nd.ni_dvp); 1764 NDFREE_PNBUF(&nd); 1765 error = vn_start_write(NULL, &mp, 1766 V_XSLEEP | V_PCATCH); 1767 if (error != 0) 1768 return (error); 1769 return (EAGAIN); 1770 } 1771 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 1772 VOP_VPUT_PAIR(nd.ni_dvp, &vp, true); 1773 vn_finished_write(mp); 1774 NDFREE_PNBUF(&nd); 1775 vp = NULL; 1776 } else { 1777 vput(nd.ni_dvp); 1778 NDFREE_PNBUF(&nd); 1779 vrele(vp); 1780 return (EAGAIN); 1781 } 1782 } 1783 if (vp != NULL) 1784 vrele(vp); 1785 return (error); 1786 } 1787 1788 /* 1789 * Make a symbolic link. 1790 */ 1791 #ifndef _SYS_SYSPROTO_H_ 1792 struct symlink_args { 1793 char *path; 1794 char *link; 1795 }; 1796 #endif 1797 int 1798 sys_symlink(struct thread *td, struct symlink_args *uap) 1799 { 1800 1801 return (kern_symlinkat(td, uap->path, AT_FDCWD, uap->link, 1802 UIO_USERSPACE)); 1803 } 1804 1805 #ifndef _SYS_SYSPROTO_H_ 1806 struct symlinkat_args { 1807 char *path; 1808 int fd; 1809 char *path2; 1810 }; 1811 #endif 1812 int 1813 sys_symlinkat(struct thread *td, struct symlinkat_args *uap) 1814 { 1815 1816 return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2, 1817 UIO_USERSPACE)); 1818 } 1819 1820 int 1821 kern_symlinkat(struct thread *td, const char *path1, int fd, const char *path2, 1822 enum uio_seg segflg) 1823 { 1824 struct mount *mp; 1825 struct vattr vattr; 1826 const char *syspath; 1827 char *tmppath; 1828 struct nameidata nd; 1829 int error; 1830 1831 if (segflg == UIO_SYSSPACE) { 1832 syspath = path1; 1833 } else { 1834 tmppath = uma_zalloc(namei_zone, M_WAITOK); 1835 if ((error = copyinstr(path1, tmppath, MAXPATHLEN, NULL)) != 0) 1836 goto out; 1837 syspath = tmppath; 1838 } 1839 AUDIT_ARG_TEXT(syspath); 1840 NDPREINIT(&nd); 1841 restart: 1842 bwillwrite(); 1843 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | AUDITVNODE1 | NOCACHE, segflg, 1844 path2, fd, &cap_symlinkat_rights); 1845 if ((error = namei(&nd)) != 0) 1846 goto out; 1847 if (nd.ni_vp) { 1848 NDFREE_PNBUF(&nd); 1849 if (nd.ni_vp == nd.ni_dvp) 1850 vrele(nd.ni_dvp); 1851 else 1852 vput(nd.ni_dvp); 1853 vrele(nd.ni_vp); 1854 nd.ni_vp = NULL; 1855 error = EEXIST; 1856 goto out; 1857 } 1858 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1859 NDFREE_PNBUF(&nd); 1860 vput(nd.ni_dvp); 1861 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH)) != 0) 1862 goto out; 1863 goto restart; 1864 } 1865 if ((vn_irflag_read(nd.ni_dvp) & VIRF_NAMEDDIR) != 0) { 1866 error = EINVAL; 1867 goto out; 1868 } 1869 VATTR_NULL(&vattr); 1870 vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_pd->pd_cmask; 1871 #ifdef MAC 1872 vattr.va_type = VLNK; 1873 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1874 &vattr); 1875 if (error != 0) 1876 goto out2; 1877 #endif 1878 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath); 1879 #ifdef MAC 1880 out2: 1881 #endif 1882 VOP_VPUT_PAIR(nd.ni_dvp, error == 0 ? &nd.ni_vp : NULL, true); 1883 vn_finished_write(mp); 1884 NDFREE_PNBUF(&nd); 1885 if (error == ERELOOKUP) 1886 goto restart; 1887 out: 1888 if (segflg != UIO_SYSSPACE) 1889 uma_zfree(namei_zone, tmppath); 1890 return (error); 1891 } 1892 1893 /* 1894 * Delete a whiteout from the filesystem. 1895 */ 1896 #ifndef _SYS_SYSPROTO_H_ 1897 struct undelete_args { 1898 char *path; 1899 }; 1900 #endif 1901 int 1902 sys_undelete(struct thread *td, struct undelete_args *uap) 1903 { 1904 struct mount *mp; 1905 struct nameidata nd; 1906 int error; 1907 1908 NDPREINIT(&nd); 1909 restart: 1910 bwillwrite(); 1911 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | AUDITVNODE1, 1912 UIO_USERSPACE, uap->path); 1913 error = namei(&nd); 1914 if (error != 0) 1915 return (error); 1916 1917 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 1918 NDFREE_PNBUF(&nd); 1919 if (nd.ni_vp == nd.ni_dvp) 1920 vrele(nd.ni_dvp); 1921 else 1922 vput(nd.ni_dvp); 1923 if (nd.ni_vp) 1924 vrele(nd.ni_vp); 1925 return (EEXIST); 1926 } 1927 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1928 NDFREE_PNBUF(&nd); 1929 vput(nd.ni_dvp); 1930 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH)) != 0) 1931 return (error); 1932 goto restart; 1933 } 1934 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE); 1935 NDFREE_PNBUF(&nd); 1936 vput(nd.ni_dvp); 1937 vn_finished_write(mp); 1938 if (error == ERELOOKUP) 1939 goto restart; 1940 return (error); 1941 } 1942 1943 /* 1944 * Delete a name from the filesystem. 1945 */ 1946 #ifndef _SYS_SYSPROTO_H_ 1947 struct unlink_args { 1948 char *path; 1949 }; 1950 #endif 1951 int 1952 sys_unlink(struct thread *td, struct unlink_args *uap) 1953 { 1954 1955 return (kern_funlinkat(td, AT_FDCWD, uap->path, FD_NONE, UIO_USERSPACE, 1956 0, 0)); 1957 } 1958 1959 static int 1960 kern_funlinkat_ex(struct thread *td, int dfd, const char *path, int fd, 1961 int flag, enum uio_seg pathseg, ino_t oldinum) 1962 { 1963 1964 if ((flag & ~(AT_REMOVEDIR | AT_RESOLVE_BENEATH)) != 0) 1965 return (EINVAL); 1966 1967 if ((flag & AT_REMOVEDIR) != 0) 1968 return (kern_frmdirat(td, dfd, path, fd, UIO_USERSPACE, 0)); 1969 1970 return (kern_funlinkat(td, dfd, path, fd, UIO_USERSPACE, 0, 0)); 1971 } 1972 1973 #ifndef _SYS_SYSPROTO_H_ 1974 struct unlinkat_args { 1975 int fd; 1976 char *path; 1977 int flag; 1978 }; 1979 #endif 1980 int 1981 sys_unlinkat(struct thread *td, struct unlinkat_args *uap) 1982 { 1983 1984 return (kern_funlinkat_ex(td, uap->fd, uap->path, FD_NONE, uap->flag, 1985 UIO_USERSPACE, 0)); 1986 } 1987 1988 #ifndef _SYS_SYSPROTO_H_ 1989 struct funlinkat_args { 1990 int dfd; 1991 const char *path; 1992 int fd; 1993 int flag; 1994 }; 1995 #endif 1996 int 1997 sys_funlinkat(struct thread *td, struct funlinkat_args *uap) 1998 { 1999 2000 return (kern_funlinkat_ex(td, uap->dfd, uap->path, uap->fd, uap->flag, 2001 UIO_USERSPACE, 0)); 2002 } 2003 2004 int 2005 kern_funlinkat(struct thread *td, int dfd, const char *path, int fd, 2006 enum uio_seg pathseg, int flag, ino_t oldinum) 2007 { 2008 struct mount *mp; 2009 struct file *fp; 2010 struct vnode *vp; 2011 struct nameidata nd; 2012 struct stat sb; 2013 int error; 2014 2015 fp = NULL; 2016 if (fd != FD_NONE) { 2017 error = getvnode_path(td, fd, &cap_no_rights, &fp); 2018 if (error != 0) 2019 return (error); 2020 } 2021 2022 NDPREINIT(&nd); 2023 restart: 2024 bwillwrite(); 2025 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1 | 2026 at2cnpflags(flag, AT_RESOLVE_BENEATH), 2027 pathseg, path, dfd, &cap_unlinkat_rights); 2028 if ((error = namei(&nd)) != 0) { 2029 if (error == EINVAL) 2030 error = EPERM; 2031 goto fdout; 2032 } 2033 vp = nd.ni_vp; 2034 if (vp->v_type == VDIR && oldinum == 0) { 2035 error = EPERM; /* POSIX */ 2036 } else if (oldinum != 0 && 2037 ((error = VOP_STAT(vp, &sb, td->td_ucred, NOCRED)) == 0) && 2038 sb.st_ino != oldinum) { 2039 error = EIDRM; /* Identifier removed */ 2040 } else if (fp != NULL && fp->f_vnode != vp) { 2041 if (VN_IS_DOOMED(fp->f_vnode)) 2042 error = EBADF; 2043 else 2044 error = EDEADLK; 2045 } else { 2046 /* 2047 * The root of a mounted filesystem cannot be deleted. 2048 * 2049 * XXX: can this only be a VDIR case? 2050 */ 2051 if (vp->v_vflag & VV_ROOT) 2052 error = EBUSY; 2053 } 2054 if (error == 0) { 2055 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 2056 NDFREE_PNBUF(&nd); 2057 vput(nd.ni_dvp); 2058 if (vp == nd.ni_dvp) 2059 vrele(vp); 2060 else 2061 vput(vp); 2062 if ((error = vn_start_write(NULL, &mp, 2063 V_XSLEEP | V_PCATCH)) != 0) { 2064 goto fdout; 2065 } 2066 goto restart; 2067 } 2068 #ifdef MAC 2069 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 2070 &nd.ni_cnd); 2071 if (error != 0) 2072 goto out; 2073 #endif 2074 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 2075 #ifdef MAC 2076 out: 2077 #endif 2078 vn_finished_write(mp); 2079 } 2080 NDFREE_PNBUF(&nd); 2081 vput(nd.ni_dvp); 2082 if (vp == nd.ni_dvp) 2083 vrele(vp); 2084 else 2085 vput(vp); 2086 if (error == ERELOOKUP) 2087 goto restart; 2088 fdout: 2089 if (fp != NULL) 2090 fdrop(fp, td); 2091 return (error); 2092 } 2093 2094 /* 2095 * Reposition read/write file offset. 2096 */ 2097 #ifndef _SYS_SYSPROTO_H_ 2098 struct lseek_args { 2099 int fd; 2100 int pad; 2101 off_t offset; 2102 int whence; 2103 }; 2104 #endif 2105 int 2106 sys_lseek(struct thread *td, struct lseek_args *uap) 2107 { 2108 2109 return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); 2110 } 2111 2112 int 2113 kern_lseek(struct thread *td, int fd, off_t offset, int whence) 2114 { 2115 struct file *fp; 2116 int error; 2117 2118 AUDIT_ARG_FD(fd); 2119 error = fget(td, fd, &cap_seek_rights, &fp); 2120 if (error != 0) 2121 return (error); 2122 error = (fp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0 ? 2123 fo_seek(fp, offset, whence, td) : ESPIPE; 2124 fdrop(fp, td); 2125 return (error); 2126 } 2127 2128 #if defined(COMPAT_43) 2129 /* 2130 * Reposition read/write file offset. 2131 */ 2132 #ifndef _SYS_SYSPROTO_H_ 2133 struct olseek_args { 2134 int fd; 2135 long offset; 2136 int whence; 2137 }; 2138 #endif 2139 int 2140 olseek(struct thread *td, struct olseek_args *uap) 2141 { 2142 2143 return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); 2144 } 2145 #endif /* COMPAT_43 */ 2146 2147 #if defined(COMPAT_FREEBSD6) 2148 /* Version with the 'pad' argument */ 2149 int 2150 freebsd6_lseek(struct thread *td, struct freebsd6_lseek_args *uap) 2151 { 2152 2153 return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); 2154 } 2155 #endif 2156 2157 /* 2158 * Check access permissions using passed credentials. 2159 */ 2160 static int 2161 vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 2162 struct thread *td) 2163 { 2164 accmode_t accmode; 2165 int error; 2166 2167 /* Flags == 0 means only check for existence. */ 2168 if (user_flags == 0) 2169 return (0); 2170 2171 accmode = 0; 2172 if (user_flags & R_OK) 2173 accmode |= VREAD; 2174 if (user_flags & W_OK) 2175 accmode |= VWRITE; 2176 if (user_flags & X_OK) 2177 accmode |= VEXEC; 2178 #ifdef MAC 2179 error = mac_vnode_check_access(cred, vp, accmode); 2180 if (error != 0) 2181 return (error); 2182 #endif 2183 if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) 2184 error = VOP_ACCESS(vp, accmode, cred, td); 2185 return (error); 2186 } 2187 2188 /* 2189 * Check access permissions using "real" credentials. 2190 */ 2191 #ifndef _SYS_SYSPROTO_H_ 2192 struct access_args { 2193 char *path; 2194 int amode; 2195 }; 2196 #endif 2197 int 2198 sys_access(struct thread *td, struct access_args *uap) 2199 { 2200 2201 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2202 0, uap->amode)); 2203 } 2204 2205 #ifndef _SYS_SYSPROTO_H_ 2206 struct faccessat_args { 2207 int dirfd; 2208 char *path; 2209 int amode; 2210 int flag; 2211 } 2212 #endif 2213 int 2214 sys_faccessat(struct thread *td, struct faccessat_args *uap) 2215 { 2216 2217 return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 2218 uap->amode)); 2219 } 2220 2221 int 2222 kern_accessat(struct thread *td, int fd, const char *path, 2223 enum uio_seg pathseg, int flag, int amode) 2224 { 2225 struct ucred *cred, *usecred; 2226 struct vnode *vp; 2227 struct nameidata nd; 2228 int error; 2229 2230 if ((flag & ~(AT_EACCESS | AT_RESOLVE_BENEATH | AT_EMPTY_PATH | 2231 AT_SYMLINK_NOFOLLOW)) != 0) 2232 return (EINVAL); 2233 if (amode != F_OK && (amode & ~(R_OK | W_OK | X_OK)) != 0) 2234 return (EINVAL); 2235 2236 /* 2237 * Create and modify a temporary credential instead of one that 2238 * is potentially shared (if we need one). 2239 */ 2240 cred = td->td_ucred; 2241 if ((flag & AT_EACCESS) == 0 && 2242 ((cred->cr_uid != cred->cr_ruid || 2243 cred->cr_rgid != cred->cr_groups[0]))) { 2244 usecred = crdup(cred); 2245 usecred->cr_uid = cred->cr_ruid; 2246 usecred->cr_groups[0] = cred->cr_rgid; 2247 td->td_ucred = usecred; 2248 } else 2249 usecred = cred; 2250 AUDIT_ARG_VALUE(amode); 2251 NDINIT_ATRIGHTS(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | 2252 AUDITVNODE1 | at2cnpflags(flag, AT_RESOLVE_BENEATH | AT_SYMLINK_NOFOLLOW | 2253 AT_EMPTY_PATH), pathseg, path, fd, &cap_fstat_rights); 2254 if ((error = namei(&nd)) != 0) 2255 goto out; 2256 vp = nd.ni_vp; 2257 2258 error = vn_access(vp, amode, usecred, td); 2259 NDFREE_PNBUF(&nd); 2260 vput(vp); 2261 out: 2262 if (usecred != cred) { 2263 td->td_ucred = cred; 2264 crfree(usecred); 2265 } 2266 return (error); 2267 } 2268 2269 /* 2270 * Check access permissions using "effective" credentials. 2271 */ 2272 #ifndef _SYS_SYSPROTO_H_ 2273 struct eaccess_args { 2274 char *path; 2275 int amode; 2276 }; 2277 #endif 2278 int 2279 sys_eaccess(struct thread *td, struct eaccess_args *uap) 2280 { 2281 2282 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2283 AT_EACCESS, uap->amode)); 2284 } 2285 2286 #if defined(COMPAT_43) 2287 /* 2288 * Get file status; this version follows links. 2289 */ 2290 #ifndef _SYS_SYSPROTO_H_ 2291 struct ostat_args { 2292 char *path; 2293 struct ostat *ub; 2294 }; 2295 #endif 2296 int 2297 ostat(struct thread *td, struct ostat_args *uap) 2298 { 2299 struct stat sb; 2300 struct ostat osb; 2301 int error; 2302 2303 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, &sb); 2304 if (error != 0) 2305 return (error); 2306 cvtstat(&sb, &osb); 2307 return (copyout(&osb, uap->ub, sizeof (osb))); 2308 } 2309 2310 /* 2311 * Get file status; this version does not follow links. 2312 */ 2313 #ifndef _SYS_SYSPROTO_H_ 2314 struct olstat_args { 2315 char *path; 2316 struct ostat *ub; 2317 }; 2318 #endif 2319 int 2320 olstat(struct thread *td, struct olstat_args *uap) 2321 { 2322 struct stat sb; 2323 struct ostat osb; 2324 int error; 2325 2326 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2327 UIO_USERSPACE, &sb); 2328 if (error != 0) 2329 return (error); 2330 cvtstat(&sb, &osb); 2331 return (copyout(&osb, uap->ub, sizeof (osb))); 2332 } 2333 2334 /* 2335 * Convert from an old to a new stat structure. 2336 * XXX: many values are blindly truncated. 2337 */ 2338 void 2339 cvtstat(struct stat *st, struct ostat *ost) 2340 { 2341 2342 bzero(ost, sizeof(*ost)); 2343 ost->st_dev = st->st_dev; 2344 ost->st_ino = st->st_ino; 2345 ost->st_mode = st->st_mode; 2346 ost->st_nlink = st->st_nlink; 2347 ost->st_uid = st->st_uid; 2348 ost->st_gid = st->st_gid; 2349 ost->st_rdev = st->st_rdev; 2350 ost->st_size = MIN(st->st_size, INT32_MAX); 2351 ost->st_atim = st->st_atim; 2352 ost->st_mtim = st->st_mtim; 2353 ost->st_ctim = st->st_ctim; 2354 ost->st_blksize = st->st_blksize; 2355 ost->st_blocks = st->st_blocks; 2356 ost->st_flags = st->st_flags; 2357 ost->st_gen = st->st_gen; 2358 } 2359 #endif /* COMPAT_43 */ 2360 2361 #if defined(COMPAT_43) || defined(COMPAT_FREEBSD11) 2362 int ino64_trunc_error; 2363 SYSCTL_INT(_vfs, OID_AUTO, ino64_trunc_error, CTLFLAG_RW, 2364 &ino64_trunc_error, 0, 2365 "Error on truncation of device, file or inode number, or link count"); 2366 2367 int 2368 freebsd11_cvtstat(struct stat *st, struct freebsd11_stat *ost) 2369 { 2370 2371 ost->st_dev = st->st_dev; 2372 if (ost->st_dev != st->st_dev) { 2373 switch (ino64_trunc_error) { 2374 default: 2375 /* 2376 * Since dev_t is almost raw, don't clamp to the 2377 * maximum for case 2, but ignore the error. 2378 */ 2379 break; 2380 case 1: 2381 return (EOVERFLOW); 2382 } 2383 } 2384 ost->st_ino = st->st_ino; 2385 if (ost->st_ino != st->st_ino) { 2386 switch (ino64_trunc_error) { 2387 default: 2388 case 0: 2389 break; 2390 case 1: 2391 return (EOVERFLOW); 2392 case 2: 2393 ost->st_ino = UINT32_MAX; 2394 break; 2395 } 2396 } 2397 ost->st_mode = st->st_mode; 2398 ost->st_nlink = st->st_nlink; 2399 if (ost->st_nlink != st->st_nlink) { 2400 switch (ino64_trunc_error) { 2401 default: 2402 case 0: 2403 break; 2404 case 1: 2405 return (EOVERFLOW); 2406 case 2: 2407 ost->st_nlink = UINT16_MAX; 2408 break; 2409 } 2410 } 2411 ost->st_uid = st->st_uid; 2412 ost->st_gid = st->st_gid; 2413 ost->st_rdev = st->st_rdev; 2414 if (ost->st_rdev != st->st_rdev) { 2415 switch (ino64_trunc_error) { 2416 default: 2417 break; 2418 case 1: 2419 return (EOVERFLOW); 2420 } 2421 } 2422 ost->st_atim = st->st_atim; 2423 ost->st_mtim = st->st_mtim; 2424 ost->st_ctim = st->st_ctim; 2425 ost->st_size = st->st_size; 2426 ost->st_blocks = st->st_blocks; 2427 ost->st_blksize = st->st_blksize; 2428 ost->st_flags = st->st_flags; 2429 ost->st_gen = st->st_gen; 2430 ost->st_lspare = 0; 2431 ost->st_birthtim = st->st_birthtim; 2432 bzero((char *)&ost->st_birthtim + sizeof(ost->st_birthtim), 2433 sizeof(*ost) - offsetof(struct freebsd11_stat, 2434 st_birthtim) - sizeof(ost->st_birthtim)); 2435 return (0); 2436 } 2437 2438 int 2439 freebsd11_stat(struct thread *td, struct freebsd11_stat_args* uap) 2440 { 2441 struct stat sb; 2442 struct freebsd11_stat osb; 2443 int error; 2444 2445 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, &sb); 2446 if (error != 0) 2447 return (error); 2448 error = freebsd11_cvtstat(&sb, &osb); 2449 if (error == 0) 2450 error = copyout(&osb, uap->ub, sizeof(osb)); 2451 return (error); 2452 } 2453 2454 int 2455 freebsd11_lstat(struct thread *td, struct freebsd11_lstat_args* uap) 2456 { 2457 struct stat sb; 2458 struct freebsd11_stat osb; 2459 int error; 2460 2461 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2462 UIO_USERSPACE, &sb); 2463 if (error != 0) 2464 return (error); 2465 error = freebsd11_cvtstat(&sb, &osb); 2466 if (error == 0) 2467 error = copyout(&osb, uap->ub, sizeof(osb)); 2468 return (error); 2469 } 2470 2471 int 2472 freebsd11_fhstat(struct thread *td, struct freebsd11_fhstat_args* uap) 2473 { 2474 struct fhandle fh; 2475 struct stat sb; 2476 struct freebsd11_stat osb; 2477 int error; 2478 2479 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 2480 if (error != 0) 2481 return (error); 2482 error = kern_fhstat(td, fh, &sb); 2483 if (error != 0) 2484 return (error); 2485 error = freebsd11_cvtstat(&sb, &osb); 2486 if (error == 0) 2487 error = copyout(&osb, uap->sb, sizeof(osb)); 2488 return (error); 2489 } 2490 2491 int 2492 freebsd11_fstatat(struct thread *td, struct freebsd11_fstatat_args* uap) 2493 { 2494 struct stat sb; 2495 struct freebsd11_stat osb; 2496 int error; 2497 2498 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2499 UIO_USERSPACE, &sb); 2500 if (error != 0) 2501 return (error); 2502 error = freebsd11_cvtstat(&sb, &osb); 2503 if (error == 0) 2504 error = copyout(&osb, uap->buf, sizeof(osb)); 2505 return (error); 2506 } 2507 #endif /* COMPAT_FREEBSD11 */ 2508 2509 /* 2510 * Get file status 2511 */ 2512 #ifndef _SYS_SYSPROTO_H_ 2513 struct fstatat_args { 2514 int fd; 2515 char *path; 2516 struct stat *buf; 2517 int flag; 2518 } 2519 #endif 2520 int 2521 sys_fstatat(struct thread *td, struct fstatat_args *uap) 2522 { 2523 struct stat sb; 2524 int error; 2525 2526 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2527 UIO_USERSPACE, &sb); 2528 if (error == 0) 2529 error = copyout(&sb, uap->buf, sizeof (sb)); 2530 return (error); 2531 } 2532 2533 int 2534 kern_statat(struct thread *td, int flag, int fd, const char *path, 2535 enum uio_seg pathseg, struct stat *sbp) 2536 { 2537 struct nameidata nd; 2538 int error; 2539 2540 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | 2541 AT_EMPTY_PATH)) != 0) 2542 return (EINVAL); 2543 2544 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_RESOLVE_BENEATH | 2545 AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH) | LOCKSHARED | LOCKLEAF | 2546 AUDITVNODE1, pathseg, path, fd, &cap_fstat_rights); 2547 2548 if ((error = namei(&nd)) != 0) { 2549 if (error == ENOTDIR && 2550 (nd.ni_resflags & NIRES_EMPTYPATH) != 0) 2551 error = kern_fstat(td, fd, sbp); 2552 return (error); 2553 } 2554 error = VOP_STAT(nd.ni_vp, sbp, td->td_ucred, NOCRED); 2555 NDFREE_PNBUF(&nd); 2556 vput(nd.ni_vp); 2557 #ifdef __STAT_TIME_T_EXT 2558 sbp->st_atim_ext = 0; 2559 sbp->st_mtim_ext = 0; 2560 sbp->st_ctim_ext = 0; 2561 sbp->st_btim_ext = 0; 2562 #endif 2563 #ifdef KTRACE 2564 if (KTRPOINT(td, KTR_STRUCT)) 2565 ktrstat_error(sbp, error); 2566 #endif 2567 return (error); 2568 } 2569 2570 #if defined(COMPAT_FREEBSD11) 2571 /* 2572 * Implementation of the NetBSD [l]stat() functions. 2573 */ 2574 int 2575 freebsd11_cvtnstat(struct stat *sb, struct nstat *nsb) 2576 { 2577 struct freebsd11_stat sb11; 2578 int error; 2579 2580 error = freebsd11_cvtstat(sb, &sb11); 2581 if (error != 0) 2582 return (error); 2583 2584 bzero(nsb, sizeof(*nsb)); 2585 CP(sb11, *nsb, st_dev); 2586 CP(sb11, *nsb, st_ino); 2587 CP(sb11, *nsb, st_mode); 2588 CP(sb11, *nsb, st_nlink); 2589 CP(sb11, *nsb, st_uid); 2590 CP(sb11, *nsb, st_gid); 2591 CP(sb11, *nsb, st_rdev); 2592 CP(sb11, *nsb, st_atim); 2593 CP(sb11, *nsb, st_mtim); 2594 CP(sb11, *nsb, st_ctim); 2595 CP(sb11, *nsb, st_size); 2596 CP(sb11, *nsb, st_blocks); 2597 CP(sb11, *nsb, st_blksize); 2598 CP(sb11, *nsb, st_flags); 2599 CP(sb11, *nsb, st_gen); 2600 CP(sb11, *nsb, st_birthtim); 2601 return (0); 2602 } 2603 2604 #ifndef _SYS_SYSPROTO_H_ 2605 struct freebsd11_nstat_args { 2606 char *path; 2607 struct nstat *ub; 2608 }; 2609 #endif 2610 int 2611 freebsd11_nstat(struct thread *td, struct freebsd11_nstat_args *uap) 2612 { 2613 struct stat sb; 2614 struct nstat nsb; 2615 int error; 2616 2617 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, &sb); 2618 if (error != 0) 2619 return (error); 2620 error = freebsd11_cvtnstat(&sb, &nsb); 2621 if (error == 0) 2622 error = copyout(&nsb, uap->ub, sizeof (nsb)); 2623 return (error); 2624 } 2625 2626 /* 2627 * NetBSD lstat. Get file status; this version does not follow links. 2628 */ 2629 #ifndef _SYS_SYSPROTO_H_ 2630 struct freebsd11_nlstat_args { 2631 char *path; 2632 struct nstat *ub; 2633 }; 2634 #endif 2635 int 2636 freebsd11_nlstat(struct thread *td, struct freebsd11_nlstat_args *uap) 2637 { 2638 struct stat sb; 2639 struct nstat nsb; 2640 int error; 2641 2642 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2643 UIO_USERSPACE, &sb); 2644 if (error != 0) 2645 return (error); 2646 error = freebsd11_cvtnstat(&sb, &nsb); 2647 if (error == 0) 2648 error = copyout(&nsb, uap->ub, sizeof (nsb)); 2649 return (error); 2650 } 2651 #endif /* COMPAT_FREEBSD11 */ 2652 2653 /* 2654 * Get configurable pathname variables. 2655 */ 2656 #ifndef _SYS_SYSPROTO_H_ 2657 struct pathconf_args { 2658 char *path; 2659 int name; 2660 }; 2661 #endif 2662 int 2663 sys_pathconf(struct thread *td, struct pathconf_args *uap) 2664 { 2665 long value; 2666 int error; 2667 2668 error = kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW, 2669 &value); 2670 if (error == 0) 2671 td->td_retval[0] = value; 2672 return (error); 2673 } 2674 2675 #ifndef _SYS_SYSPROTO_H_ 2676 struct lpathconf_args { 2677 char *path; 2678 int name; 2679 }; 2680 #endif 2681 int 2682 sys_lpathconf(struct thread *td, struct lpathconf_args *uap) 2683 { 2684 long value; 2685 int error; 2686 2687 error = kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, 2688 NOFOLLOW, &value); 2689 if (error == 0) 2690 td->td_retval[0] = value; 2691 return (error); 2692 } 2693 2694 int 2695 kern_pathconf(struct thread *td, const char *path, enum uio_seg pathseg, 2696 int name, u_long flags, long *valuep) 2697 { 2698 struct nameidata nd; 2699 int error; 2700 2701 NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | AUDITVNODE1 | flags, 2702 pathseg, path); 2703 if ((error = namei(&nd)) != 0) 2704 return (error); 2705 NDFREE_PNBUF(&nd); 2706 2707 error = VOP_PATHCONF(nd.ni_vp, name, valuep); 2708 vput(nd.ni_vp); 2709 return (error); 2710 } 2711 2712 /* 2713 * Return target name of a symbolic link. 2714 */ 2715 #ifndef _SYS_SYSPROTO_H_ 2716 struct readlink_args { 2717 char *path; 2718 char *buf; 2719 size_t count; 2720 }; 2721 #endif 2722 int 2723 sys_readlink(struct thread *td, struct readlink_args *uap) 2724 { 2725 2726 return (kern_readlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2727 uap->buf, UIO_USERSPACE, uap->count)); 2728 } 2729 #ifndef _SYS_SYSPROTO_H_ 2730 struct readlinkat_args { 2731 int fd; 2732 char *path; 2733 char *buf; 2734 size_t bufsize; 2735 }; 2736 #endif 2737 int 2738 sys_readlinkat(struct thread *td, struct readlinkat_args *uap) 2739 { 2740 2741 return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE, 2742 uap->buf, UIO_USERSPACE, uap->bufsize)); 2743 } 2744 2745 int 2746 kern_readlinkat(struct thread *td, int fd, const char *path, 2747 enum uio_seg pathseg, char *buf, enum uio_seg bufseg, size_t count) 2748 { 2749 struct vnode *vp; 2750 struct nameidata nd; 2751 int error; 2752 2753 if (count > IOSIZE_MAX) 2754 return (EINVAL); 2755 2756 NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1 | 2757 EMPTYPATH, pathseg, path, fd); 2758 2759 if ((error = namei(&nd)) != 0) 2760 return (error); 2761 NDFREE_PNBUF(&nd); 2762 vp = nd.ni_vp; 2763 2764 error = kern_readlink_vp(vp, buf, bufseg, count, td); 2765 vput(vp); 2766 2767 return (error); 2768 } 2769 2770 /* 2771 * Helper function to readlink from a vnode 2772 */ 2773 static int 2774 kern_readlink_vp(struct vnode *vp, char *buf, enum uio_seg bufseg, size_t count, 2775 struct thread *td) 2776 { 2777 struct iovec aiov; 2778 struct uio auio; 2779 int error; 2780 2781 ASSERT_VOP_LOCKED(vp, "kern_readlink_vp(): vp not locked"); 2782 #ifdef MAC 2783 error = mac_vnode_check_readlink(td->td_ucred, vp); 2784 if (error != 0) 2785 return (error); 2786 #endif 2787 if (vp->v_type != VLNK && (vp->v_vflag & VV_READLINK) == 0) 2788 return (EINVAL); 2789 2790 aiov.iov_base = buf; 2791 aiov.iov_len = count; 2792 auio.uio_iov = &aiov; 2793 auio.uio_iovcnt = 1; 2794 auio.uio_offset = 0; 2795 auio.uio_rw = UIO_READ; 2796 auio.uio_segflg = bufseg; 2797 auio.uio_td = td; 2798 auio.uio_resid = count; 2799 error = VOP_READLINK(vp, &auio, td->td_ucred); 2800 td->td_retval[0] = count - auio.uio_resid; 2801 return (error); 2802 } 2803 2804 /* 2805 * Common implementation code for chflags() and fchflags(). 2806 */ 2807 static int 2808 setfflags(struct thread *td, struct vnode *vp, u_long flags) 2809 { 2810 struct mount *mp; 2811 struct vattr vattr; 2812 int error; 2813 2814 /* We can't support the value matching VNOVAL. */ 2815 if (flags == VNOVAL) 2816 return (EOPNOTSUPP); 2817 2818 /* 2819 * Prevent non-root users from setting flags on devices. When 2820 * a device is reused, users can retain ownership of the device 2821 * if they are allowed to set flags and programs assume that 2822 * chown can't fail when done as root. 2823 */ 2824 if (vp->v_type == VCHR || vp->v_type == VBLK) { 2825 error = priv_check(td, PRIV_VFS_CHFLAGS_DEV); 2826 if (error != 0) 2827 return (error); 2828 } 2829 2830 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) 2831 return (error); 2832 VATTR_NULL(&vattr); 2833 vattr.va_flags = flags; 2834 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2835 #ifdef MAC 2836 error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags); 2837 if (error == 0) 2838 #endif 2839 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 2840 VOP_UNLOCK(vp); 2841 vn_finished_write(mp); 2842 return (error); 2843 } 2844 2845 /* 2846 * Change flags of a file given a path name. 2847 */ 2848 #ifndef _SYS_SYSPROTO_H_ 2849 struct chflags_args { 2850 const char *path; 2851 u_long flags; 2852 }; 2853 #endif 2854 int 2855 sys_chflags(struct thread *td, struct chflags_args *uap) 2856 { 2857 2858 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2859 uap->flags, 0)); 2860 } 2861 2862 #ifndef _SYS_SYSPROTO_H_ 2863 struct chflagsat_args { 2864 int fd; 2865 const char *path; 2866 u_long flags; 2867 int atflag; 2868 } 2869 #endif 2870 int 2871 sys_chflagsat(struct thread *td, struct chflagsat_args *uap) 2872 { 2873 2874 return (kern_chflagsat(td, uap->fd, uap->path, UIO_USERSPACE, 2875 uap->flags, uap->atflag)); 2876 } 2877 2878 /* 2879 * Same as chflags() but doesn't follow symlinks. 2880 */ 2881 #ifndef _SYS_SYSPROTO_H_ 2882 struct lchflags_args { 2883 const char *path; 2884 u_long flags; 2885 }; 2886 #endif 2887 int 2888 sys_lchflags(struct thread *td, struct lchflags_args *uap) 2889 { 2890 2891 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2892 uap->flags, AT_SYMLINK_NOFOLLOW)); 2893 } 2894 2895 static int 2896 kern_chflagsat(struct thread *td, int fd, const char *path, 2897 enum uio_seg pathseg, u_long flags, int atflag) 2898 { 2899 struct nameidata nd; 2900 int error; 2901 2902 if ((atflag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | 2903 AT_EMPTY_PATH)) != 0) 2904 return (EINVAL); 2905 2906 AUDIT_ARG_FFLAGS(flags); 2907 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(atflag, AT_SYMLINK_NOFOLLOW | 2908 AT_RESOLVE_BENEATH | AT_EMPTY_PATH) | AUDITVNODE1, pathseg, path, 2909 fd, &cap_fchflags_rights); 2910 if ((error = namei(&nd)) != 0) 2911 return (error); 2912 NDFREE_PNBUF(&nd); 2913 error = setfflags(td, nd.ni_vp, flags); 2914 vrele(nd.ni_vp); 2915 return (error); 2916 } 2917 2918 /* 2919 * Change flags of a file given a file descriptor. 2920 */ 2921 #ifndef _SYS_SYSPROTO_H_ 2922 struct fchflags_args { 2923 int fd; 2924 u_long flags; 2925 }; 2926 #endif 2927 int 2928 sys_fchflags(struct thread *td, struct fchflags_args *uap) 2929 { 2930 struct file *fp; 2931 int error; 2932 2933 AUDIT_ARG_FD(uap->fd); 2934 AUDIT_ARG_FFLAGS(uap->flags); 2935 error = getvnode(td, uap->fd, &cap_fchflags_rights, 2936 &fp); 2937 if (error != 0) 2938 return (error); 2939 #ifdef AUDIT 2940 if (AUDITING_TD(td)) { 2941 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 2942 AUDIT_ARG_VNODE1(fp->f_vnode); 2943 VOP_UNLOCK(fp->f_vnode); 2944 } 2945 #endif 2946 error = setfflags(td, fp->f_vnode, uap->flags); 2947 fdrop(fp, td); 2948 return (error); 2949 } 2950 2951 /* 2952 * Common implementation code for chmod(), lchmod() and fchmod(). 2953 */ 2954 int 2955 setfmode(struct thread *td, struct ucred *cred, struct vnode *vp, int mode) 2956 { 2957 struct mount *mp; 2958 struct vattr vattr; 2959 int error; 2960 2961 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) 2962 return (error); 2963 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2964 VATTR_NULL(&vattr); 2965 vattr.va_mode = mode & ALLPERMS; 2966 #ifdef MAC 2967 error = mac_vnode_check_setmode(cred, vp, vattr.va_mode); 2968 if (error == 0) 2969 #endif 2970 error = VOP_SETATTR(vp, &vattr, cred); 2971 VOP_UNLOCK(vp); 2972 vn_finished_write(mp); 2973 return (error); 2974 } 2975 2976 /* 2977 * Change mode of a file given path name. 2978 */ 2979 #ifndef _SYS_SYSPROTO_H_ 2980 struct chmod_args { 2981 char *path; 2982 int mode; 2983 }; 2984 #endif 2985 int 2986 sys_chmod(struct thread *td, struct chmod_args *uap) 2987 { 2988 2989 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2990 uap->mode, 0)); 2991 } 2992 2993 #ifndef _SYS_SYSPROTO_H_ 2994 struct fchmodat_args { 2995 int dirfd; 2996 char *path; 2997 mode_t mode; 2998 int flag; 2999 } 3000 #endif 3001 int 3002 sys_fchmodat(struct thread *td, struct fchmodat_args *uap) 3003 { 3004 3005 return (kern_fchmodat(td, uap->fd, uap->path, UIO_USERSPACE, 3006 uap->mode, uap->flag)); 3007 } 3008 3009 /* 3010 * Change mode of a file given path name (don't follow links.) 3011 */ 3012 #ifndef _SYS_SYSPROTO_H_ 3013 struct lchmod_args { 3014 char *path; 3015 int mode; 3016 }; 3017 #endif 3018 int 3019 sys_lchmod(struct thread *td, struct lchmod_args *uap) 3020 { 3021 3022 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3023 uap->mode, AT_SYMLINK_NOFOLLOW)); 3024 } 3025 3026 int 3027 kern_fchmodat(struct thread *td, int fd, const char *path, 3028 enum uio_seg pathseg, mode_t mode, int flag) 3029 { 3030 struct nameidata nd; 3031 int error; 3032 3033 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | 3034 AT_EMPTY_PATH)) != 0) 3035 return (EINVAL); 3036 3037 AUDIT_ARG_MODE(mode); 3038 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_SYMLINK_NOFOLLOW | 3039 AT_RESOLVE_BENEATH | AT_EMPTY_PATH) | AUDITVNODE1, pathseg, path, 3040 fd, &cap_fchmod_rights); 3041 if ((error = namei(&nd)) != 0) 3042 return (error); 3043 NDFREE_PNBUF(&nd); 3044 error = setfmode(td, td->td_ucred, nd.ni_vp, mode); 3045 vrele(nd.ni_vp); 3046 return (error); 3047 } 3048 3049 /* 3050 * Change mode of a file given a file descriptor. 3051 */ 3052 #ifndef _SYS_SYSPROTO_H_ 3053 struct fchmod_args { 3054 int fd; 3055 int mode; 3056 }; 3057 #endif 3058 int 3059 sys_fchmod(struct thread *td, struct fchmod_args *uap) 3060 { 3061 struct file *fp; 3062 int error; 3063 3064 AUDIT_ARG_FD(uap->fd); 3065 AUDIT_ARG_MODE(uap->mode); 3066 3067 error = fget(td, uap->fd, &cap_fchmod_rights, &fp); 3068 if (error != 0) 3069 return (error); 3070 error = fo_chmod(fp, uap->mode, td->td_ucred, td); 3071 fdrop(fp, td); 3072 return (error); 3073 } 3074 3075 /* 3076 * Common implementation for chown(), lchown(), and fchown() 3077 */ 3078 int 3079 setfown(struct thread *td, struct ucred *cred, struct vnode *vp, uid_t uid, 3080 gid_t gid) 3081 { 3082 struct mount *mp; 3083 struct vattr vattr; 3084 int error; 3085 3086 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) 3087 return (error); 3088 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3089 VATTR_NULL(&vattr); 3090 vattr.va_uid = uid; 3091 vattr.va_gid = gid; 3092 #ifdef MAC 3093 error = mac_vnode_check_setowner(cred, vp, vattr.va_uid, 3094 vattr.va_gid); 3095 if (error == 0) 3096 #endif 3097 error = VOP_SETATTR(vp, &vattr, cred); 3098 VOP_UNLOCK(vp); 3099 vn_finished_write(mp); 3100 return (error); 3101 } 3102 3103 /* 3104 * Set ownership given a path name. 3105 */ 3106 #ifndef _SYS_SYSPROTO_H_ 3107 struct chown_args { 3108 char *path; 3109 int uid; 3110 int gid; 3111 }; 3112 #endif 3113 int 3114 sys_chown(struct thread *td, struct chown_args *uap) 3115 { 3116 3117 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->uid, 3118 uap->gid, 0)); 3119 } 3120 3121 #ifndef _SYS_SYSPROTO_H_ 3122 struct fchownat_args { 3123 int fd; 3124 const char * path; 3125 uid_t uid; 3126 gid_t gid; 3127 int flag; 3128 }; 3129 #endif 3130 int 3131 sys_fchownat(struct thread *td, struct fchownat_args *uap) 3132 { 3133 3134 return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid, 3135 uap->gid, uap->flag)); 3136 } 3137 3138 int 3139 kern_fchownat(struct thread *td, int fd, const char *path, 3140 enum uio_seg pathseg, int uid, int gid, int flag) 3141 { 3142 struct nameidata nd; 3143 int error; 3144 3145 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | 3146 AT_EMPTY_PATH)) != 0) 3147 return (EINVAL); 3148 3149 AUDIT_ARG_OWNER(uid, gid); 3150 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_SYMLINK_NOFOLLOW | 3151 AT_RESOLVE_BENEATH | AT_EMPTY_PATH) | AUDITVNODE1, pathseg, path, 3152 fd, &cap_fchown_rights); 3153 3154 if ((error = namei(&nd)) != 0) 3155 return (error); 3156 NDFREE_PNBUF(&nd); 3157 error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid); 3158 vrele(nd.ni_vp); 3159 return (error); 3160 } 3161 3162 /* 3163 * Set ownership given a path name, do not cross symlinks. 3164 */ 3165 #ifndef _SYS_SYSPROTO_H_ 3166 struct lchown_args { 3167 char *path; 3168 int uid; 3169 int gid; 3170 }; 3171 #endif 3172 int 3173 sys_lchown(struct thread *td, struct lchown_args *uap) 3174 { 3175 3176 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3177 uap->uid, uap->gid, AT_SYMLINK_NOFOLLOW)); 3178 } 3179 3180 /* 3181 * Set ownership given a file descriptor. 3182 */ 3183 #ifndef _SYS_SYSPROTO_H_ 3184 struct fchown_args { 3185 int fd; 3186 int uid; 3187 int gid; 3188 }; 3189 #endif 3190 int 3191 sys_fchown(struct thread *td, struct fchown_args *uap) 3192 { 3193 struct file *fp; 3194 int error; 3195 3196 AUDIT_ARG_FD(uap->fd); 3197 AUDIT_ARG_OWNER(uap->uid, uap->gid); 3198 error = fget(td, uap->fd, &cap_fchown_rights, &fp); 3199 if (error != 0) 3200 return (error); 3201 error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td); 3202 fdrop(fp, td); 3203 return (error); 3204 } 3205 3206 /* 3207 * Common implementation code for utimes(), lutimes(), and futimes(). 3208 */ 3209 static int 3210 getutimes(const struct timeval *usrtvp, enum uio_seg tvpseg, 3211 struct timespec *tsp) 3212 { 3213 struct timeval tv[2]; 3214 const struct timeval *tvp; 3215 int error; 3216 3217 if (usrtvp == NULL) { 3218 vfs_timestamp(&tsp[0]); 3219 tsp[1] = tsp[0]; 3220 } else { 3221 if (tvpseg == UIO_SYSSPACE) { 3222 tvp = usrtvp; 3223 } else { 3224 if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0) 3225 return (error); 3226 tvp = tv; 3227 } 3228 3229 if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 || 3230 tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000) 3231 return (EINVAL); 3232 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 3233 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 3234 } 3235 return (0); 3236 } 3237 3238 /* 3239 * Common implementation code for futimens(), utimensat(). 3240 */ 3241 #define UTIMENS_NULL 0x1 3242 #define UTIMENS_EXIT 0x2 3243 static int 3244 getutimens(const struct timespec *usrtsp, enum uio_seg tspseg, 3245 struct timespec *tsp, int *retflags) 3246 { 3247 struct timespec tsnow; 3248 int error; 3249 3250 vfs_timestamp(&tsnow); 3251 *retflags = 0; 3252 if (usrtsp == NULL) { 3253 tsp[0] = tsnow; 3254 tsp[1] = tsnow; 3255 *retflags |= UTIMENS_NULL; 3256 return (0); 3257 } 3258 if (tspseg == UIO_SYSSPACE) { 3259 tsp[0] = usrtsp[0]; 3260 tsp[1] = usrtsp[1]; 3261 } else if ((error = copyin(usrtsp, tsp, sizeof(*tsp) * 2)) != 0) 3262 return (error); 3263 if (tsp[0].tv_nsec == UTIME_OMIT && tsp[1].tv_nsec == UTIME_OMIT) 3264 *retflags |= UTIMENS_EXIT; 3265 if (tsp[0].tv_nsec == UTIME_NOW && tsp[1].tv_nsec == UTIME_NOW) 3266 *retflags |= UTIMENS_NULL; 3267 if (tsp[0].tv_nsec == UTIME_OMIT) 3268 tsp[0].tv_sec = VNOVAL; 3269 else if (tsp[0].tv_nsec == UTIME_NOW) 3270 tsp[0] = tsnow; 3271 else if (tsp[0].tv_nsec < 0 || tsp[0].tv_nsec >= 1000000000L) 3272 return (EINVAL); 3273 if (tsp[1].tv_nsec == UTIME_OMIT) 3274 tsp[1].tv_sec = VNOVAL; 3275 else if (tsp[1].tv_nsec == UTIME_NOW) 3276 tsp[1] = tsnow; 3277 else if (tsp[1].tv_nsec < 0 || tsp[1].tv_nsec >= 1000000000L) 3278 return (EINVAL); 3279 3280 return (0); 3281 } 3282 3283 /* 3284 * Common implementation code for utimes(), lutimes(), futimes(), futimens(), 3285 * and utimensat(). 3286 */ 3287 static int 3288 setutimes(struct thread *td, struct vnode *vp, const struct timespec *ts, 3289 int numtimes, int nullflag) 3290 { 3291 struct mount *mp; 3292 struct vattr vattr; 3293 int error; 3294 bool setbirthtime; 3295 3296 setbirthtime = false; 3297 vattr.va_birthtime.tv_sec = VNOVAL; 3298 vattr.va_birthtime.tv_nsec = 0; 3299 3300 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) 3301 return (error); 3302 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3303 if (numtimes < 3 && VOP_GETATTR(vp, &vattr, td->td_ucred) == 0 && 3304 timespeccmp(&ts[1], &vattr.va_birthtime, < )) 3305 setbirthtime = true; 3306 VATTR_NULL(&vattr); 3307 vattr.va_atime = ts[0]; 3308 vattr.va_mtime = ts[1]; 3309 if (setbirthtime) 3310 vattr.va_birthtime = ts[1]; 3311 if (numtimes > 2) 3312 vattr.va_birthtime = ts[2]; 3313 if (nullflag) 3314 vattr.va_vaflags |= VA_UTIMES_NULL; 3315 #ifdef MAC 3316 error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime, 3317 vattr.va_mtime); 3318 #endif 3319 if (error == 0) 3320 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3321 VOP_UNLOCK(vp); 3322 vn_finished_write(mp); 3323 return (error); 3324 } 3325 3326 /* 3327 * Set the access and modification times of a file. 3328 */ 3329 #ifndef _SYS_SYSPROTO_H_ 3330 struct utimes_args { 3331 char *path; 3332 struct timeval *tptr; 3333 }; 3334 #endif 3335 int 3336 sys_utimes(struct thread *td, struct utimes_args *uap) 3337 { 3338 3339 return (kern_utimesat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3340 uap->tptr, UIO_USERSPACE)); 3341 } 3342 3343 #ifndef _SYS_SYSPROTO_H_ 3344 struct futimesat_args { 3345 int fd; 3346 const char * path; 3347 const struct timeval * times; 3348 }; 3349 #endif 3350 int 3351 sys_futimesat(struct thread *td, struct futimesat_args *uap) 3352 { 3353 3354 return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE, 3355 uap->times, UIO_USERSPACE)); 3356 } 3357 3358 int 3359 kern_utimesat(struct thread *td, int fd, const char *path, 3360 enum uio_seg pathseg, const struct timeval *tptr, enum uio_seg tptrseg) 3361 { 3362 struct nameidata nd; 3363 struct timespec ts[2]; 3364 int error; 3365 3366 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3367 return (error); 3368 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 3369 &cap_futimes_rights); 3370 3371 if ((error = namei(&nd)) != 0) 3372 return (error); 3373 NDFREE_PNBUF(&nd); 3374 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3375 vrele(nd.ni_vp); 3376 return (error); 3377 } 3378 3379 /* 3380 * Set the access and modification times of a file. 3381 */ 3382 #ifndef _SYS_SYSPROTO_H_ 3383 struct lutimes_args { 3384 char *path; 3385 struct timeval *tptr; 3386 }; 3387 #endif 3388 int 3389 sys_lutimes(struct thread *td, struct lutimes_args *uap) 3390 { 3391 3392 return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr, 3393 UIO_USERSPACE)); 3394 } 3395 3396 int 3397 kern_lutimes(struct thread *td, const char *path, enum uio_seg pathseg, 3398 const struct timeval *tptr, enum uio_seg tptrseg) 3399 { 3400 struct timespec ts[2]; 3401 struct nameidata nd; 3402 int error; 3403 3404 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3405 return (error); 3406 NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, pathseg, path); 3407 if ((error = namei(&nd)) != 0) 3408 return (error); 3409 NDFREE_PNBUF(&nd); 3410 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3411 vrele(nd.ni_vp); 3412 return (error); 3413 } 3414 3415 /* 3416 * Set the access and modification times of a file. 3417 */ 3418 #ifndef _SYS_SYSPROTO_H_ 3419 struct futimes_args { 3420 int fd; 3421 struct timeval *tptr; 3422 }; 3423 #endif 3424 int 3425 sys_futimes(struct thread *td, struct futimes_args *uap) 3426 { 3427 3428 return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE)); 3429 } 3430 3431 int 3432 kern_futimes(struct thread *td, int fd, const struct timeval *tptr, 3433 enum uio_seg tptrseg) 3434 { 3435 struct timespec ts[2]; 3436 struct file *fp; 3437 int error; 3438 3439 AUDIT_ARG_FD(fd); 3440 error = getutimes(tptr, tptrseg, ts); 3441 if (error != 0) 3442 return (error); 3443 error = getvnode(td, fd, &cap_futimes_rights, &fp); 3444 if (error != 0) 3445 return (error); 3446 #ifdef AUDIT 3447 if (AUDITING_TD(td)) { 3448 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3449 AUDIT_ARG_VNODE1(fp->f_vnode); 3450 VOP_UNLOCK(fp->f_vnode); 3451 } 3452 #endif 3453 error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL); 3454 fdrop(fp, td); 3455 return (error); 3456 } 3457 3458 int 3459 sys_futimens(struct thread *td, struct futimens_args *uap) 3460 { 3461 3462 return (kern_futimens(td, uap->fd, uap->times, UIO_USERSPACE)); 3463 } 3464 3465 int 3466 kern_futimens(struct thread *td, int fd, const struct timespec *tptr, 3467 enum uio_seg tptrseg) 3468 { 3469 struct timespec ts[2]; 3470 struct file *fp; 3471 int error, flags; 3472 3473 AUDIT_ARG_FD(fd); 3474 error = getutimens(tptr, tptrseg, ts, &flags); 3475 if (error != 0) 3476 return (error); 3477 if (flags & UTIMENS_EXIT) 3478 return (0); 3479 error = getvnode(td, fd, &cap_futimes_rights, &fp); 3480 if (error != 0) 3481 return (error); 3482 #ifdef AUDIT 3483 if (AUDITING_TD(td)) { 3484 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3485 AUDIT_ARG_VNODE1(fp->f_vnode); 3486 VOP_UNLOCK(fp->f_vnode); 3487 } 3488 #endif 3489 error = setutimes(td, fp->f_vnode, ts, 2, flags & UTIMENS_NULL); 3490 fdrop(fp, td); 3491 return (error); 3492 } 3493 3494 int 3495 sys_utimensat(struct thread *td, struct utimensat_args *uap) 3496 { 3497 3498 return (kern_utimensat(td, uap->fd, uap->path, UIO_USERSPACE, 3499 uap->times, UIO_USERSPACE, uap->flag)); 3500 } 3501 3502 int 3503 kern_utimensat(struct thread *td, int fd, const char *path, 3504 enum uio_seg pathseg, const struct timespec *tptr, enum uio_seg tptrseg, 3505 int flag) 3506 { 3507 struct nameidata nd; 3508 struct timespec ts[2]; 3509 int error, flags; 3510 3511 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | 3512 AT_EMPTY_PATH)) != 0) 3513 return (EINVAL); 3514 3515 if ((error = getutimens(tptr, tptrseg, ts, &flags)) != 0) 3516 return (error); 3517 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_SYMLINK_NOFOLLOW | 3518 AT_RESOLVE_BENEATH | AT_EMPTY_PATH) | AUDITVNODE1, 3519 pathseg, path, fd, &cap_futimes_rights); 3520 if ((error = namei(&nd)) != 0) 3521 return (error); 3522 /* 3523 * We are allowed to call namei() regardless of 2xUTIME_OMIT. 3524 * POSIX states: 3525 * "If both tv_nsec fields are UTIME_OMIT... EACCESS may be detected." 3526 * "Search permission is denied by a component of the path prefix." 3527 */ 3528 NDFREE_PNBUF(&nd); 3529 if ((flags & UTIMENS_EXIT) == 0) 3530 error = setutimes(td, nd.ni_vp, ts, 2, flags & UTIMENS_NULL); 3531 vrele(nd.ni_vp); 3532 return (error); 3533 } 3534 3535 /* 3536 * Truncate a file given its path name. 3537 */ 3538 #ifndef _SYS_SYSPROTO_H_ 3539 struct truncate_args { 3540 char *path; 3541 int pad; 3542 off_t length; 3543 }; 3544 #endif 3545 int 3546 sys_truncate(struct thread *td, struct truncate_args *uap) 3547 { 3548 3549 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3550 } 3551 3552 int 3553 kern_truncate(struct thread *td, const char *path, enum uio_seg pathseg, 3554 off_t length) 3555 { 3556 struct mount *mp; 3557 struct vnode *vp; 3558 void *rl_cookie; 3559 struct nameidata nd; 3560 int error; 3561 3562 if (length < 0) 3563 return (EINVAL); 3564 NDPREINIT(&nd); 3565 retry: 3566 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path); 3567 if ((error = namei(&nd)) != 0) 3568 return (error); 3569 vp = nd.ni_vp; 3570 NDFREE_PNBUF(&nd); 3571 rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); 3572 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) { 3573 vn_rangelock_unlock(vp, rl_cookie); 3574 vrele(vp); 3575 return (error); 3576 } 3577 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3578 if (vp->v_type == VDIR) { 3579 error = EISDIR; 3580 goto out; 3581 } 3582 #ifdef MAC 3583 error = mac_vnode_check_write(td->td_ucred, NOCRED, vp); 3584 if (error != 0) 3585 goto out; 3586 #endif 3587 error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td); 3588 if (error != 0) 3589 goto out; 3590 3591 error = vn_truncate_locked(vp, length, false, td->td_ucred); 3592 out: 3593 VOP_UNLOCK(vp); 3594 vn_finished_write(mp); 3595 vn_rangelock_unlock(vp, rl_cookie); 3596 vrele(vp); 3597 if (error == ERELOOKUP) 3598 goto retry; 3599 return (error); 3600 } 3601 3602 #if defined(COMPAT_43) 3603 /* 3604 * Truncate a file given its path name. 3605 */ 3606 #ifndef _SYS_SYSPROTO_H_ 3607 struct otruncate_args { 3608 char *path; 3609 long length; 3610 }; 3611 #endif 3612 int 3613 otruncate(struct thread *td, struct otruncate_args *uap) 3614 { 3615 3616 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3617 } 3618 #endif /* COMPAT_43 */ 3619 3620 #if defined(COMPAT_FREEBSD6) 3621 /* Versions with the pad argument */ 3622 int 3623 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap) 3624 { 3625 3626 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3627 } 3628 3629 int 3630 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap) 3631 { 3632 3633 return (kern_ftruncate(td, uap->fd, uap->length)); 3634 } 3635 #endif 3636 3637 int 3638 kern_fsync(struct thread *td, int fd, bool fullsync) 3639 { 3640 struct vnode *vp; 3641 struct mount *mp; 3642 struct file *fp; 3643 int error; 3644 3645 AUDIT_ARG_FD(fd); 3646 error = getvnode(td, fd, &cap_fsync_rights, &fp); 3647 if (error != 0) 3648 return (error); 3649 vp = fp->f_vnode; 3650 #if 0 3651 if (!fullsync) 3652 /* XXXKIB: compete outstanding aio writes */; 3653 #endif 3654 retry: 3655 error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH); 3656 if (error != 0) 3657 goto drop; 3658 vn_lock(vp, vn_lktype_write(mp, vp) | LK_RETRY); 3659 AUDIT_ARG_VNODE1(vp); 3660 vnode_pager_clean_async(vp); 3661 error = fullsync ? VOP_FSYNC(vp, MNT_WAIT, td) : VOP_FDATASYNC(vp, td); 3662 VOP_UNLOCK(vp); 3663 vn_finished_write(mp); 3664 if (error == ERELOOKUP) 3665 goto retry; 3666 drop: 3667 fdrop(fp, td); 3668 return (error); 3669 } 3670 3671 /* 3672 * Sync an open file. 3673 */ 3674 #ifndef _SYS_SYSPROTO_H_ 3675 struct fsync_args { 3676 int fd; 3677 }; 3678 #endif 3679 int 3680 sys_fsync(struct thread *td, struct fsync_args *uap) 3681 { 3682 3683 return (kern_fsync(td, uap->fd, true)); 3684 } 3685 3686 int 3687 sys_fdatasync(struct thread *td, struct fdatasync_args *uap) 3688 { 3689 3690 return (kern_fsync(td, uap->fd, false)); 3691 } 3692 3693 /* 3694 * Rename files. Source and destination must either both be directories, or 3695 * both not be directories. If target is a directory, it must be empty. 3696 */ 3697 #ifndef _SYS_SYSPROTO_H_ 3698 struct rename_args { 3699 char *from; 3700 char *to; 3701 }; 3702 #endif 3703 int 3704 sys_rename(struct thread *td, struct rename_args *uap) 3705 { 3706 3707 return (kern_renameat(td, AT_FDCWD, uap->from, AT_FDCWD, 3708 uap->to, UIO_USERSPACE)); 3709 } 3710 3711 #ifndef _SYS_SYSPROTO_H_ 3712 struct renameat_args { 3713 int oldfd; 3714 char *old; 3715 int newfd; 3716 char *new; 3717 }; 3718 #endif 3719 int 3720 sys_renameat(struct thread *td, struct renameat_args *uap) 3721 { 3722 3723 return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new, 3724 UIO_USERSPACE)); 3725 } 3726 3727 #ifdef MAC 3728 static int 3729 kern_renameat_mac(struct thread *td, int oldfd, const char *old, int newfd, 3730 const char *new, enum uio_seg pathseg, struct nameidata *fromnd) 3731 { 3732 int error; 3733 3734 NDINIT_ATRIGHTS(fromnd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 3735 pathseg, old, oldfd, &cap_renameat_source_rights); 3736 if ((error = namei(fromnd)) != 0) 3737 return (error); 3738 error = mac_vnode_check_rename_from(td->td_ucred, fromnd->ni_dvp, 3739 fromnd->ni_vp, &fromnd->ni_cnd); 3740 VOP_UNLOCK(fromnd->ni_dvp); 3741 if (fromnd->ni_dvp != fromnd->ni_vp) 3742 VOP_UNLOCK(fromnd->ni_vp); 3743 if (error != 0) { 3744 NDFREE_PNBUF(fromnd); 3745 vrele(fromnd->ni_dvp); 3746 vrele(fromnd->ni_vp); 3747 } 3748 return (error); 3749 } 3750 #endif 3751 3752 int 3753 kern_renameat(struct thread *td, int oldfd, const char *old, int newfd, 3754 const char *new, enum uio_seg pathseg) 3755 { 3756 struct mount *mp = NULL; 3757 struct vnode *tvp, *fvp, *tdvp; 3758 struct nameidata fromnd, tond; 3759 uint64_t tondflags; 3760 int error; 3761 short irflag; 3762 3763 again: 3764 bwillwrite(); 3765 #ifdef MAC 3766 if (mac_vnode_check_rename_from_enabled()) { 3767 error = kern_renameat_mac(td, oldfd, old, newfd, new, pathseg, 3768 &fromnd); 3769 if (error != 0) 3770 return (error); 3771 } else { 3772 #endif 3773 NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | AUDITVNODE1, 3774 pathseg, old, oldfd, &cap_renameat_source_rights); 3775 if ((error = namei(&fromnd)) != 0) 3776 return (error); 3777 #ifdef MAC 3778 } 3779 #endif 3780 fvp = fromnd.ni_vp; 3781 tondflags = LOCKPARENT | LOCKLEAF | NOCACHE | AUDITVNODE2; 3782 if (fromnd.ni_vp->v_type == VDIR) 3783 tondflags |= WILLBEDIR; 3784 NDINIT_ATRIGHTS(&tond, RENAME, tondflags, pathseg, new, newfd, 3785 &cap_renameat_target_rights); 3786 if ((error = namei(&tond)) != 0) { 3787 /* Translate error code for rename("dir1", "dir2/."). */ 3788 if (error == EISDIR && fvp->v_type == VDIR) 3789 error = EINVAL; 3790 NDFREE_PNBUF(&fromnd); 3791 vrele(fromnd.ni_dvp); 3792 vrele(fvp); 3793 goto out1; 3794 } 3795 tdvp = tond.ni_dvp; 3796 tvp = tond.ni_vp; 3797 error = vn_start_write(fvp, &mp, V_NOWAIT); 3798 if (error != 0) { 3799 NDFREE_PNBUF(&fromnd); 3800 NDFREE_PNBUF(&tond); 3801 if (tvp != NULL) 3802 vput(tvp); 3803 if (tdvp == tvp) 3804 vrele(tdvp); 3805 else 3806 vput(tdvp); 3807 vrele(fromnd.ni_dvp); 3808 vrele(fvp); 3809 error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH); 3810 if (error != 0) 3811 return (error); 3812 goto again; 3813 } 3814 irflag = vn_irflag_read(fvp); 3815 if (((irflag & VIRF_NAMEDATTR) != 0 && tdvp != fromnd.ni_dvp) || 3816 (irflag & VIRF_NAMEDDIR) != 0) { 3817 error = EINVAL; 3818 goto out; 3819 } 3820 if (tvp != NULL) { 3821 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3822 error = ENOTDIR; 3823 goto out; 3824 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3825 error = EISDIR; 3826 goto out; 3827 } 3828 #ifdef CAPABILITIES 3829 if (newfd != AT_FDCWD && (tond.ni_resflags & NIRES_ABS) == 0) { 3830 /* 3831 * If the target already exists we require CAP_UNLINKAT 3832 * from 'newfd', when newfd was used for the lookup. 3833 */ 3834 error = cap_check(&tond.ni_filecaps.fc_rights, 3835 &cap_unlinkat_rights); 3836 if (error != 0) 3837 goto out; 3838 } 3839 #endif 3840 } 3841 if (fvp == tdvp) { 3842 error = EINVAL; 3843 goto out; 3844 } 3845 /* 3846 * If the source is the same as the destination (that is, if they 3847 * are links to the same vnode), then there is nothing to do. 3848 */ 3849 if (fvp == tvp) 3850 error = ERESTART; 3851 #ifdef MAC 3852 else 3853 error = mac_vnode_check_rename_to(td->td_ucred, tdvp, 3854 tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd); 3855 #endif 3856 out: 3857 if (error == 0) { 3858 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3859 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); 3860 NDFREE_PNBUF(&fromnd); 3861 NDFREE_PNBUF(&tond); 3862 } else { 3863 NDFREE_PNBUF(&fromnd); 3864 NDFREE_PNBUF(&tond); 3865 if (tvp != NULL) 3866 vput(tvp); 3867 if (tdvp == tvp) 3868 vrele(tdvp); 3869 else 3870 vput(tdvp); 3871 vrele(fromnd.ni_dvp); 3872 vrele(fvp); 3873 } 3874 vn_finished_write(mp); 3875 out1: 3876 if (error == ERESTART) 3877 return (0); 3878 if (error == ERELOOKUP) 3879 goto again; 3880 return (error); 3881 } 3882 3883 /* 3884 * Make a directory file. 3885 */ 3886 #ifndef _SYS_SYSPROTO_H_ 3887 struct mkdir_args { 3888 char *path; 3889 int mode; 3890 }; 3891 #endif 3892 int 3893 sys_mkdir(struct thread *td, struct mkdir_args *uap) 3894 { 3895 3896 return (kern_mkdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3897 uap->mode)); 3898 } 3899 3900 #ifndef _SYS_SYSPROTO_H_ 3901 struct mkdirat_args { 3902 int fd; 3903 char *path; 3904 mode_t mode; 3905 }; 3906 #endif 3907 int 3908 sys_mkdirat(struct thread *td, struct mkdirat_args *uap) 3909 { 3910 3911 return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode)); 3912 } 3913 3914 int 3915 kern_mkdirat(struct thread *td, int fd, const char *path, enum uio_seg segflg, 3916 int mode) 3917 { 3918 struct mount *mp; 3919 struct vattr vattr; 3920 struct nameidata nd; 3921 int error; 3922 3923 AUDIT_ARG_MODE(mode); 3924 NDPREINIT(&nd); 3925 restart: 3926 bwillwrite(); 3927 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | AUDITVNODE1 | 3928 NC_NOMAKEENTRY | NC_KEEPPOSENTRY | FAILIFEXISTS | WILLBEDIR, 3929 segflg, path, fd, &cap_mkdirat_rights); 3930 if ((error = namei(&nd)) != 0) 3931 return (error); 3932 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3933 NDFREE_PNBUF(&nd); 3934 vput(nd.ni_dvp); 3935 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH)) != 0) 3936 return (error); 3937 goto restart; 3938 } 3939 if ((vn_irflag_read(nd.ni_dvp) & VIRF_NAMEDDIR) != 0) { 3940 error = EINVAL; 3941 goto out; 3942 } 3943 VATTR_NULL(&vattr); 3944 vattr.va_type = VDIR; 3945 vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_pd->pd_cmask; 3946 #ifdef MAC 3947 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 3948 &vattr); 3949 if (error != 0) 3950 goto out; 3951 #endif 3952 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 3953 out: 3954 NDFREE_PNBUF(&nd); 3955 VOP_VPUT_PAIR(nd.ni_dvp, error == 0 ? &nd.ni_vp : NULL, true); 3956 vn_finished_write(mp); 3957 if (error == ERELOOKUP) 3958 goto restart; 3959 return (error); 3960 } 3961 3962 /* 3963 * Remove a directory file. 3964 */ 3965 #ifndef _SYS_SYSPROTO_H_ 3966 struct rmdir_args { 3967 char *path; 3968 }; 3969 #endif 3970 int 3971 sys_rmdir(struct thread *td, struct rmdir_args *uap) 3972 { 3973 3974 return (kern_frmdirat(td, AT_FDCWD, uap->path, FD_NONE, UIO_USERSPACE, 3975 0)); 3976 } 3977 3978 int 3979 kern_frmdirat(struct thread *td, int dfd, const char *path, int fd, 3980 enum uio_seg pathseg, int flag) 3981 { 3982 struct mount *mp; 3983 struct vnode *vp; 3984 struct file *fp; 3985 struct nameidata nd; 3986 cap_rights_t rights; 3987 int error; 3988 3989 fp = NULL; 3990 if (fd != FD_NONE) { 3991 error = getvnode(td, fd, cap_rights_init_one(&rights, 3992 CAP_LOOKUP), &fp); 3993 if (error != 0) 3994 return (error); 3995 } 3996 3997 NDPREINIT(&nd); 3998 restart: 3999 bwillwrite(); 4000 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1 | 4001 at2cnpflags(flag, AT_RESOLVE_BENEATH), 4002 pathseg, path, dfd, &cap_unlinkat_rights); 4003 if ((error = namei(&nd)) != 0) 4004 goto fdout; 4005 vp = nd.ni_vp; 4006 if (vp->v_type != VDIR) { 4007 error = ENOTDIR; 4008 goto out; 4009 } 4010 /* 4011 * No rmdir "." please. 4012 */ 4013 if (nd.ni_dvp == vp) { 4014 error = EINVAL; 4015 goto out; 4016 } 4017 /* 4018 * The root of a mounted filesystem cannot be deleted. 4019 */ 4020 if (vp->v_vflag & VV_ROOT) { 4021 error = EBUSY; 4022 goto out; 4023 } 4024 4025 if (fp != NULL && fp->f_vnode != vp) { 4026 if (VN_IS_DOOMED(fp->f_vnode)) 4027 error = EBADF; 4028 else 4029 error = EDEADLK; 4030 goto out; 4031 } 4032 4033 #ifdef MAC 4034 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 4035 &nd.ni_cnd); 4036 if (error != 0) 4037 goto out; 4038 #endif 4039 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 4040 NDFREE_PNBUF(&nd); 4041 vput(vp); 4042 if (nd.ni_dvp == vp) 4043 vrele(nd.ni_dvp); 4044 else 4045 vput(nd.ni_dvp); 4046 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH)) != 0) 4047 goto fdout; 4048 goto restart; 4049 } 4050 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 4051 vn_finished_write(mp); 4052 out: 4053 NDFREE_PNBUF(&nd); 4054 vput(vp); 4055 if (nd.ni_dvp == vp) 4056 vrele(nd.ni_dvp); 4057 else 4058 vput(nd.ni_dvp); 4059 if (error == ERELOOKUP) 4060 goto restart; 4061 fdout: 4062 if (fp != NULL) 4063 fdrop(fp, td); 4064 return (error); 4065 } 4066 4067 #if defined(COMPAT_43) || defined(COMPAT_FREEBSD11) 4068 int 4069 freebsd11_kern_getdirentries(struct thread *td, int fd, char *ubuf, u_int count, 4070 long *basep, void (*func)(struct freebsd11_dirent *)) 4071 { 4072 struct freebsd11_dirent dstdp; 4073 struct dirent *dp, *edp; 4074 char *dirbuf; 4075 off_t base; 4076 ssize_t resid, ucount; 4077 int error; 4078 4079 /* XXX arbitrary sanity limit on `count'. */ 4080 count = min(count, 64 * 1024); 4081 4082 dirbuf = malloc(count, M_TEMP, M_WAITOK); 4083 4084 error = kern_getdirentries(td, fd, dirbuf, count, &base, &resid, 4085 UIO_SYSSPACE); 4086 if (error != 0) 4087 goto done; 4088 if (basep != NULL) 4089 *basep = base; 4090 4091 ucount = 0; 4092 for (dp = (struct dirent *)dirbuf, 4093 edp = (struct dirent *)&dirbuf[count - resid]; 4094 ucount < count && dp < edp; ) { 4095 if (dp->d_reclen == 0) 4096 break; 4097 MPASS(dp->d_reclen >= _GENERIC_DIRLEN(0)); 4098 if (dp->d_namlen >= sizeof(dstdp.d_name)) 4099 continue; 4100 dstdp.d_type = dp->d_type; 4101 dstdp.d_namlen = dp->d_namlen; 4102 dstdp.d_fileno = dp->d_fileno; /* truncate */ 4103 if (dstdp.d_fileno != dp->d_fileno) { 4104 switch (ino64_trunc_error) { 4105 default: 4106 case 0: 4107 break; 4108 case 1: 4109 error = EOVERFLOW; 4110 goto done; 4111 case 2: 4112 dstdp.d_fileno = UINT32_MAX; 4113 break; 4114 } 4115 } 4116 dstdp.d_reclen = sizeof(dstdp) - sizeof(dstdp.d_name) + 4117 ((dp->d_namlen + 1 + 3) &~ 3); 4118 bcopy(dp->d_name, dstdp.d_name, dstdp.d_namlen); 4119 bzero(dstdp.d_name + dstdp.d_namlen, 4120 dstdp.d_reclen - offsetof(struct freebsd11_dirent, d_name) - 4121 dstdp.d_namlen); 4122 MPASS(dstdp.d_reclen <= dp->d_reclen); 4123 MPASS(ucount + dstdp.d_reclen <= count); 4124 if (func != NULL) 4125 func(&dstdp); 4126 error = copyout(&dstdp, ubuf + ucount, dstdp.d_reclen); 4127 if (error != 0) 4128 break; 4129 dp = (struct dirent *)((char *)dp + dp->d_reclen); 4130 ucount += dstdp.d_reclen; 4131 } 4132 4133 done: 4134 free(dirbuf, M_TEMP); 4135 if (error == 0) 4136 td->td_retval[0] = ucount; 4137 return (error); 4138 } 4139 #endif /* COMPAT */ 4140 4141 #ifdef COMPAT_43 4142 static void 4143 ogetdirentries_cvt(struct freebsd11_dirent *dp) 4144 { 4145 #if (BYTE_ORDER == LITTLE_ENDIAN) 4146 /* 4147 * The expected low byte of dp->d_namlen is our dp->d_type. 4148 * The high MBZ byte of dp->d_namlen is our dp->d_namlen. 4149 */ 4150 dp->d_type = dp->d_namlen; 4151 dp->d_namlen = 0; 4152 #else 4153 /* 4154 * The dp->d_type is the high byte of the expected dp->d_namlen, 4155 * so must be zero'ed. 4156 */ 4157 dp->d_type = 0; 4158 #endif 4159 } 4160 4161 /* 4162 * Read a block of directory entries in a filesystem independent format. 4163 */ 4164 #ifndef _SYS_SYSPROTO_H_ 4165 struct ogetdirentries_args { 4166 int fd; 4167 char *buf; 4168 u_int count; 4169 long *basep; 4170 }; 4171 #endif 4172 int 4173 ogetdirentries(struct thread *td, struct ogetdirentries_args *uap) 4174 { 4175 long loff; 4176 int error; 4177 4178 error = kern_ogetdirentries(td, uap, &loff); 4179 if (error == 0) 4180 error = copyout(&loff, uap->basep, sizeof(long)); 4181 return (error); 4182 } 4183 4184 int 4185 kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap, 4186 long *ploff) 4187 { 4188 long base; 4189 int error; 4190 4191 /* XXX arbitrary sanity limit on `count'. */ 4192 if (uap->count > 64 * 1024) 4193 return (EINVAL); 4194 4195 error = freebsd11_kern_getdirentries(td, uap->fd, uap->buf, uap->count, 4196 &base, ogetdirentries_cvt); 4197 4198 if (error == 0 && uap->basep != NULL) 4199 error = copyout(&base, uap->basep, sizeof(long)); 4200 4201 return (error); 4202 } 4203 #endif /* COMPAT_43 */ 4204 4205 #if defined(COMPAT_FREEBSD11) 4206 #ifndef _SYS_SYSPROTO_H_ 4207 struct freebsd11_getdirentries_args { 4208 int fd; 4209 char *buf; 4210 u_int count; 4211 long *basep; 4212 }; 4213 #endif 4214 int 4215 freebsd11_getdirentries(struct thread *td, 4216 struct freebsd11_getdirentries_args *uap) 4217 { 4218 long base; 4219 int error; 4220 4221 error = freebsd11_kern_getdirentries(td, uap->fd, uap->buf, uap->count, 4222 &base, NULL); 4223 4224 if (error == 0 && uap->basep != NULL) 4225 error = copyout(&base, uap->basep, sizeof(long)); 4226 return (error); 4227 } 4228 4229 int 4230 freebsd11_getdents(struct thread *td, struct freebsd11_getdents_args *uap) 4231 { 4232 struct freebsd11_getdirentries_args ap; 4233 4234 ap.fd = uap->fd; 4235 ap.buf = uap->buf; 4236 ap.count = uap->count; 4237 ap.basep = NULL; 4238 return (freebsd11_getdirentries(td, &ap)); 4239 } 4240 #endif /* COMPAT_FREEBSD11 */ 4241 4242 /* 4243 * Read a block of directory entries in a filesystem independent format. 4244 */ 4245 int 4246 sys_getdirentries(struct thread *td, struct getdirentries_args *uap) 4247 { 4248 off_t base; 4249 int error; 4250 4251 error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base, 4252 NULL, UIO_USERSPACE); 4253 if (error != 0) 4254 return (error); 4255 if (uap->basep != NULL) 4256 error = copyout(&base, uap->basep, sizeof(off_t)); 4257 return (error); 4258 } 4259 4260 int 4261 kern_getdirentries(struct thread *td, int fd, char *buf, size_t count, 4262 off_t *basep, ssize_t *residp, enum uio_seg bufseg) 4263 { 4264 struct vnode *vp; 4265 struct file *fp; 4266 struct uio auio; 4267 struct iovec aiov; 4268 off_t loff; 4269 int error, eofflag; 4270 off_t foffset; 4271 4272 AUDIT_ARG_FD(fd); 4273 if (count > IOSIZE_MAX) 4274 return (EINVAL); 4275 auio.uio_resid = count; 4276 error = getvnode(td, fd, &cap_read_rights, &fp); 4277 if (error != 0) 4278 return (error); 4279 if ((fp->f_flag & FREAD) == 0) { 4280 fdrop(fp, td); 4281 return (EBADF); 4282 } 4283 vp = fp->f_vnode; 4284 foffset = foffset_lock(fp, 0); 4285 unionread: 4286 if (vp->v_type != VDIR) { 4287 error = EINVAL; 4288 goto fail; 4289 } 4290 if (__predict_false((vp->v_vflag & VV_UNLINKED) != 0)) { 4291 error = ENOENT; 4292 goto fail; 4293 } 4294 aiov.iov_base = buf; 4295 aiov.iov_len = count; 4296 auio.uio_iov = &aiov; 4297 auio.uio_iovcnt = 1; 4298 auio.uio_rw = UIO_READ; 4299 auio.uio_segflg = bufseg; 4300 auio.uio_td = td; 4301 vn_lock(vp, LK_SHARED | LK_RETRY); 4302 AUDIT_ARG_VNODE1(vp); 4303 loff = auio.uio_offset = foffset; 4304 #ifdef MAC 4305 error = mac_vnode_check_readdir(td->td_ucred, vp); 4306 if (error == 0) 4307 #endif 4308 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, 4309 NULL); 4310 foffset = auio.uio_offset; 4311 if (error != 0) { 4312 VOP_UNLOCK(vp); 4313 goto fail; 4314 } 4315 if (count == auio.uio_resid && 4316 (vp->v_vflag & VV_ROOT) && 4317 (vp->v_mount->mnt_flag & MNT_UNION)) { 4318 struct vnode *tvp = vp; 4319 4320 vp = vp->v_mount->mnt_vnodecovered; 4321 VREF(vp); 4322 fp->f_vnode = vp; 4323 foffset = 0; 4324 vput(tvp); 4325 goto unionread; 4326 } 4327 VOP_UNLOCK(vp); 4328 *basep = loff; 4329 if (residp != NULL) 4330 *residp = auio.uio_resid; 4331 td->td_retval[0] = count - auio.uio_resid; 4332 fail: 4333 foffset_unlock(fp, foffset, 0); 4334 fdrop(fp, td); 4335 return (error); 4336 } 4337 4338 /* 4339 * Set the mode mask for creation of filesystem nodes. 4340 */ 4341 #ifndef _SYS_SYSPROTO_H_ 4342 struct umask_args { 4343 int newmask; 4344 }; 4345 #endif 4346 int 4347 sys_umask(struct thread *td, struct umask_args *uap) 4348 { 4349 struct pwddesc *pdp; 4350 4351 pdp = td->td_proc->p_pd; 4352 PWDDESC_XLOCK(pdp); 4353 td->td_retval[0] = pdp->pd_cmask; 4354 pdp->pd_cmask = uap->newmask & ALLPERMS; 4355 PWDDESC_XUNLOCK(pdp); 4356 return (0); 4357 } 4358 4359 /* 4360 * Void all references to file by ripping underlying filesystem away from 4361 * vnode. 4362 */ 4363 #ifndef _SYS_SYSPROTO_H_ 4364 struct revoke_args { 4365 char *path; 4366 }; 4367 #endif 4368 int 4369 sys_revoke(struct thread *td, struct revoke_args *uap) 4370 { 4371 struct vnode *vp; 4372 struct vattr vattr; 4373 struct nameidata nd; 4374 int error; 4375 4376 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4377 uap->path); 4378 if ((error = namei(&nd)) != 0) 4379 return (error); 4380 vp = nd.ni_vp; 4381 NDFREE_PNBUF(&nd); 4382 if (vp->v_type != VCHR || vp->v_rdev == NULL) { 4383 error = EINVAL; 4384 goto out; 4385 } 4386 #ifdef MAC 4387 error = mac_vnode_check_revoke(td->td_ucred, vp); 4388 if (error != 0) 4389 goto out; 4390 #endif 4391 error = VOP_GETATTR(vp, &vattr, td->td_ucred); 4392 if (error != 0) 4393 goto out; 4394 if (td->td_ucred->cr_uid != vattr.va_uid) { 4395 error = priv_check(td, PRIV_VFS_ADMIN); 4396 if (error != 0) 4397 goto out; 4398 } 4399 if (devfs_usecount(vp) > 0) 4400 VOP_REVOKE(vp, REVOKEALL); 4401 out: 4402 vput(vp); 4403 return (error); 4404 } 4405 4406 /* 4407 * This variant of getvnode() allows O_PATH files. Caller should 4408 * ensure that returned file and vnode are only used for compatible 4409 * semantics. 4410 */ 4411 int 4412 getvnode_path(struct thread *td, int fd, const cap_rights_t *rightsp, 4413 struct file **fpp) 4414 { 4415 struct file *fp; 4416 int error; 4417 4418 error = fget_unlocked(td, fd, rightsp, &fp); 4419 if (error != 0) 4420 return (error); 4421 4422 /* 4423 * The file could be not of the vnode type, or it may be not 4424 * yet fully initialized, in which case the f_vnode pointer 4425 * may be set, but f_ops is still badfileops. E.g., 4426 * devfs_open() transiently create such situation to 4427 * facilitate csw d_fdopen(). 4428 * 4429 * Dupfdopen() handling in kern_openat() installs the 4430 * half-baked file into the process descriptor table, allowing 4431 * other thread to dereference it. Guard against the race by 4432 * checking f_ops. 4433 */ 4434 if (__predict_false(fp->f_vnode == NULL || fp->f_ops == &badfileops)) { 4435 fdrop(fp, td); 4436 *fpp = NULL; 4437 return (EINVAL); 4438 } 4439 4440 *fpp = fp; 4441 return (0); 4442 } 4443 4444 /* 4445 * Convert a user file descriptor to a kernel file entry and check 4446 * that, if it is a capability, the correct rights are present. 4447 * A reference on the file entry is held upon returning. 4448 */ 4449 int 4450 getvnode(struct thread *td, int fd, const cap_rights_t *rightsp, 4451 struct file **fpp) 4452 { 4453 int error; 4454 4455 error = getvnode_path(td, fd, rightsp, fpp); 4456 if (__predict_false(error != 0)) 4457 return (error); 4458 4459 /* 4460 * Filter out O_PATH file descriptors, most getvnode() callers 4461 * do not call fo_ methods. 4462 */ 4463 if (__predict_false((*fpp)->f_ops == &path_fileops)) { 4464 fdrop(*fpp, td); 4465 *fpp = NULL; 4466 error = EBADF; 4467 } 4468 4469 return (error); 4470 } 4471 4472 /* 4473 * Get an (NFS) file handle. 4474 */ 4475 #ifndef _SYS_SYSPROTO_H_ 4476 struct lgetfh_args { 4477 char *fname; 4478 fhandle_t *fhp; 4479 }; 4480 #endif 4481 int 4482 sys_lgetfh(struct thread *td, struct lgetfh_args *uap) 4483 { 4484 4485 return (kern_getfhat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->fname, 4486 UIO_USERSPACE, uap->fhp, UIO_USERSPACE)); 4487 } 4488 4489 #ifndef _SYS_SYSPROTO_H_ 4490 struct getfh_args { 4491 char *fname; 4492 fhandle_t *fhp; 4493 }; 4494 #endif 4495 int 4496 sys_getfh(struct thread *td, struct getfh_args *uap) 4497 { 4498 4499 return (kern_getfhat(td, 0, AT_FDCWD, uap->fname, UIO_USERSPACE, 4500 uap->fhp, UIO_USERSPACE)); 4501 } 4502 4503 /* 4504 * syscall for the rpc.lockd to use to translate an open descriptor into 4505 * a NFS file handle. 4506 * 4507 * warning: do not remove the priv_check() call or this becomes one giant 4508 * security hole. 4509 */ 4510 #ifndef _SYS_SYSPROTO_H_ 4511 struct getfhat_args { 4512 int fd; 4513 char *path; 4514 fhandle_t *fhp; 4515 int flags; 4516 }; 4517 #endif 4518 int 4519 sys_getfhat(struct thread *td, struct getfhat_args *uap) 4520 { 4521 4522 return (kern_getfhat(td, uap->flags, uap->fd, uap->path, UIO_USERSPACE, 4523 uap->fhp, UIO_USERSPACE)); 4524 } 4525 4526 int 4527 kern_getfhat(struct thread *td, int flags, int fd, const char *path, 4528 enum uio_seg pathseg, fhandle_t *fhp, enum uio_seg fhseg) 4529 { 4530 struct nameidata nd; 4531 fhandle_t fh; 4532 struct vnode *vp; 4533 int error; 4534 4535 if ((flags & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH)) != 0) 4536 return (EINVAL); 4537 error = priv_check(td, PRIV_VFS_GETFH); 4538 if (error != 0) 4539 return (error); 4540 NDINIT_AT(&nd, LOOKUP, at2cnpflags(flags, AT_SYMLINK_NOFOLLOW | 4541 AT_RESOLVE_BENEATH) | LOCKLEAF | AUDITVNODE1, pathseg, path, 4542 fd); 4543 error = namei(&nd); 4544 if (error != 0) 4545 return (error); 4546 NDFREE_PNBUF(&nd); 4547 vp = nd.ni_vp; 4548 bzero(&fh, sizeof(fh)); 4549 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4550 error = VOP_VPTOFH(vp, &fh.fh_fid); 4551 vput(vp); 4552 if (error == 0) { 4553 if (fhseg == UIO_USERSPACE) 4554 error = copyout(&fh, fhp, sizeof (fh)); 4555 else 4556 memcpy(fhp, &fh, sizeof(fh)); 4557 } 4558 return (error); 4559 } 4560 4561 #ifndef _SYS_SYSPROTO_H_ 4562 struct fhlink_args { 4563 fhandle_t *fhp; 4564 const char *to; 4565 }; 4566 #endif 4567 int 4568 sys_fhlink(struct thread *td, struct fhlink_args *uap) 4569 { 4570 4571 return (kern_fhlinkat(td, AT_FDCWD, uap->to, UIO_USERSPACE, uap->fhp)); 4572 } 4573 4574 #ifndef _SYS_SYSPROTO_H_ 4575 struct fhlinkat_args { 4576 fhandle_t *fhp; 4577 int tofd; 4578 const char *to; 4579 }; 4580 #endif 4581 int 4582 sys_fhlinkat(struct thread *td, struct fhlinkat_args *uap) 4583 { 4584 4585 return (kern_fhlinkat(td, uap->tofd, uap->to, UIO_USERSPACE, uap->fhp)); 4586 } 4587 4588 static int 4589 kern_fhlinkat(struct thread *td, int fd, const char *path, 4590 enum uio_seg pathseg, fhandle_t *fhp) 4591 { 4592 fhandle_t fh; 4593 struct mount *mp; 4594 struct vnode *vp; 4595 int error; 4596 4597 error = priv_check(td, PRIV_VFS_GETFH); 4598 if (error != 0) 4599 return (error); 4600 error = copyin(fhp, &fh, sizeof(fh)); 4601 if (error != 0) 4602 return (error); 4603 do { 4604 bwillwrite(); 4605 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4606 return (ESTALE); 4607 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_SHARED, &vp); 4608 vfs_unbusy(mp); 4609 if (error != 0) 4610 return (error); 4611 VOP_UNLOCK(vp); 4612 error = kern_linkat_vp(td, vp, fd, path, pathseg); 4613 } while (error == EAGAIN || error == ERELOOKUP); 4614 return (error); 4615 } 4616 4617 #ifndef _SYS_SYSPROTO_H_ 4618 struct fhreadlink_args { 4619 fhandle_t *fhp; 4620 char *buf; 4621 size_t bufsize; 4622 }; 4623 #endif 4624 int 4625 sys_fhreadlink(struct thread *td, struct fhreadlink_args *uap) 4626 { 4627 fhandle_t fh; 4628 struct mount *mp; 4629 struct vnode *vp; 4630 int error; 4631 4632 error = priv_check(td, PRIV_VFS_GETFH); 4633 if (error != 0) 4634 return (error); 4635 if (uap->bufsize > IOSIZE_MAX) 4636 return (EINVAL); 4637 error = copyin(uap->fhp, &fh, sizeof(fh)); 4638 if (error != 0) 4639 return (error); 4640 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4641 return (ESTALE); 4642 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_SHARED, &vp); 4643 vfs_unbusy(mp); 4644 if (error != 0) 4645 return (error); 4646 error = kern_readlink_vp(vp, uap->buf, UIO_USERSPACE, uap->bufsize, td); 4647 vput(vp); 4648 return (error); 4649 } 4650 4651 /* 4652 * syscall for the rpc.lockd to use to translate a NFS file handle into an 4653 * open descriptor. 4654 * 4655 * warning: do not remove the priv_check() call or this becomes one giant 4656 * security hole. 4657 */ 4658 #ifndef _SYS_SYSPROTO_H_ 4659 struct fhopen_args { 4660 const struct fhandle *u_fhp; 4661 int flags; 4662 }; 4663 #endif 4664 int 4665 sys_fhopen(struct thread *td, struct fhopen_args *uap) 4666 { 4667 return (kern_fhopen(td, uap->u_fhp, uap->flags)); 4668 } 4669 4670 int 4671 kern_fhopen(struct thread *td, const struct fhandle *u_fhp, int flags) 4672 { 4673 struct mount *mp; 4674 struct vnode *vp; 4675 struct fhandle fhp; 4676 struct file *fp; 4677 int error, indx; 4678 bool named_attr; 4679 4680 error = priv_check(td, PRIV_VFS_FHOPEN); 4681 if (error != 0) 4682 return (error); 4683 4684 indx = -1; 4685 if ((flags & O_CREAT) != 0) 4686 return (EINVAL); 4687 error = openflags(&flags); 4688 if (error != 0) 4689 return (error); 4690 error = copyin(u_fhp, &fhp, sizeof(fhp)); 4691 if (error != 0) 4692 return (error); 4693 /* find the mount point */ 4694 mp = vfs_busyfs(&fhp.fh_fsid); 4695 if (mp == NULL) 4696 return (ESTALE); 4697 /* now give me my vnode, it gets returned to me locked */ 4698 error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp); 4699 vfs_unbusy(mp); 4700 if (error != 0) 4701 return (error); 4702 4703 /* 4704 * Check to see if the file handle refers to a named attribute 4705 * directory or attribute. If it does, the O_NAMEDATTR flag 4706 * must have been specified. 4707 */ 4708 named_attr = (vn_irflag_read(vp) & 4709 (VIRF_NAMEDDIR | VIRF_NAMEDATTR)) != 0; 4710 if ((named_attr && (flags & O_NAMEDATTR) == 0) || 4711 (!named_attr && (flags & O_NAMEDATTR) != 0)) { 4712 vput(vp); 4713 return (ENOATTR); 4714 } 4715 4716 error = falloc_noinstall(td, &fp); 4717 if (error != 0) { 4718 vput(vp); 4719 return (error); 4720 } 4721 /* Set the flags early so the finit in devfs can pick them up. */ 4722 fp->f_flag = flags & FMASK; 4723 4724 #ifdef INVARIANTS 4725 td->td_dupfd = -1; 4726 #endif 4727 error = vn_open_vnode(vp, flags, td->td_ucred, td, fp); 4728 if (error != 0) { 4729 KASSERT(fp->f_ops == &badfileops, 4730 ("VOP_OPEN in fhopen() set f_ops")); 4731 KASSERT(td->td_dupfd < 0, 4732 ("fhopen() encountered fdopen()")); 4733 4734 vput(vp); 4735 goto bad; 4736 } 4737 #ifdef INVARIANTS 4738 td->td_dupfd = 0; 4739 #endif 4740 finit_open(fp, vp, flags); 4741 VOP_UNLOCK(vp); 4742 if ((flags & O_TRUNC) != 0) { 4743 error = fo_truncate(fp, 0, td->td_ucred, td); 4744 if (error != 0) 4745 goto bad; 4746 } 4747 4748 error = finstall(td, fp, &indx, flags, NULL); 4749 bad: 4750 fdrop(fp, td); 4751 td->td_retval[0] = indx; 4752 return (error); 4753 } 4754 4755 /* 4756 * Stat an (NFS) file handle. 4757 */ 4758 #ifndef _SYS_SYSPROTO_H_ 4759 struct fhstat_args { 4760 struct fhandle *u_fhp; 4761 struct stat *sb; 4762 }; 4763 #endif 4764 int 4765 sys_fhstat(struct thread *td, struct fhstat_args *uap) 4766 { 4767 struct stat sb; 4768 struct fhandle fh; 4769 int error; 4770 4771 error = copyin(uap->u_fhp, &fh, sizeof(fh)); 4772 if (error != 0) 4773 return (error); 4774 error = kern_fhstat(td, fh, &sb); 4775 if (error == 0) 4776 error = copyout(&sb, uap->sb, sizeof(sb)); 4777 return (error); 4778 } 4779 4780 int 4781 kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb) 4782 { 4783 struct mount *mp; 4784 struct vnode *vp; 4785 int error; 4786 4787 error = priv_check(td, PRIV_VFS_FHSTAT); 4788 if (error != 0) 4789 return (error); 4790 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4791 return (ESTALE); 4792 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4793 vfs_unbusy(mp); 4794 if (error != 0) 4795 return (error); 4796 error = VOP_STAT(vp, sb, td->td_ucred, NOCRED); 4797 vput(vp); 4798 return (error); 4799 } 4800 4801 /* 4802 * Implement fstatfs() for (NFS) file handles. 4803 */ 4804 #ifndef _SYS_SYSPROTO_H_ 4805 struct fhstatfs_args { 4806 struct fhandle *u_fhp; 4807 struct statfs *buf; 4808 }; 4809 #endif 4810 int 4811 sys_fhstatfs(struct thread *td, struct fhstatfs_args *uap) 4812 { 4813 struct statfs *sfp; 4814 fhandle_t fh; 4815 int error; 4816 4817 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4818 if (error != 0) 4819 return (error); 4820 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 4821 error = kern_fhstatfs(td, fh, sfp); 4822 if (error == 0) 4823 error = copyout(sfp, uap->buf, sizeof(*sfp)); 4824 free(sfp, M_STATFS); 4825 return (error); 4826 } 4827 4828 int 4829 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf) 4830 { 4831 struct mount *mp; 4832 struct vnode *vp; 4833 int error; 4834 4835 error = priv_check(td, PRIV_VFS_FHSTATFS); 4836 if (error != 0) 4837 return (error); 4838 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4839 return (ESTALE); 4840 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4841 if (error != 0) { 4842 vfs_unbusy(mp); 4843 return (error); 4844 } 4845 vput(vp); 4846 error = prison_canseemount(td->td_ucred, mp); 4847 if (error != 0) 4848 goto out; 4849 #ifdef MAC 4850 error = mac_mount_check_stat(td->td_ucred, mp); 4851 if (error != 0) 4852 goto out; 4853 #endif 4854 error = VFS_STATFS(mp, buf); 4855 out: 4856 vfs_unbusy(mp); 4857 return (error); 4858 } 4859 4860 /* 4861 * Unlike madvise(2), we do not make a best effort to remember every 4862 * possible caching hint. Instead, we remember the last setting with 4863 * the exception that we will allow POSIX_FADV_NORMAL to adjust the 4864 * region of any current setting. 4865 */ 4866 int 4867 kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len, 4868 int advice) 4869 { 4870 struct fadvise_info *fa, *new; 4871 struct file *fp; 4872 struct vnode *vp; 4873 off_t end; 4874 int error; 4875 4876 if (offset < 0 || len < 0 || offset > OFF_MAX - len) 4877 return (EINVAL); 4878 AUDIT_ARG_VALUE(advice); 4879 switch (advice) { 4880 case POSIX_FADV_SEQUENTIAL: 4881 case POSIX_FADV_RANDOM: 4882 case POSIX_FADV_NOREUSE: 4883 new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK); 4884 break; 4885 case POSIX_FADV_NORMAL: 4886 case POSIX_FADV_WILLNEED: 4887 case POSIX_FADV_DONTNEED: 4888 new = NULL; 4889 break; 4890 default: 4891 return (EINVAL); 4892 } 4893 /* XXX: CAP_POSIX_FADVISE? */ 4894 AUDIT_ARG_FD(fd); 4895 error = fget(td, fd, &cap_no_rights, &fp); 4896 if (error != 0) 4897 goto out; 4898 AUDIT_ARG_FILE(td->td_proc, fp); 4899 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { 4900 error = ESPIPE; 4901 goto out; 4902 } 4903 if (fp->f_type != DTYPE_VNODE) { 4904 error = ENODEV; 4905 goto out; 4906 } 4907 vp = fp->f_vnode; 4908 if (vp->v_type != VREG) { 4909 error = ENODEV; 4910 goto out; 4911 } 4912 if (len == 0) 4913 end = OFF_MAX; 4914 else 4915 end = offset + len - 1; 4916 switch (advice) { 4917 case POSIX_FADV_SEQUENTIAL: 4918 case POSIX_FADV_RANDOM: 4919 case POSIX_FADV_NOREUSE: 4920 /* 4921 * Try to merge any existing non-standard region with 4922 * this new region if possible, otherwise create a new 4923 * non-standard region for this request. 4924 */ 4925 mtx_pool_lock(mtxpool_sleep, fp); 4926 fa = fp->f_advice; 4927 if (fa != NULL && fa->fa_advice == advice && 4928 ((fa->fa_start <= end && fa->fa_end >= offset) || 4929 (end != OFF_MAX && fa->fa_start == end + 1) || 4930 (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) { 4931 if (offset < fa->fa_start) 4932 fa->fa_start = offset; 4933 if (end > fa->fa_end) 4934 fa->fa_end = end; 4935 } else { 4936 new->fa_advice = advice; 4937 new->fa_start = offset; 4938 new->fa_end = end; 4939 fp->f_advice = new; 4940 new = fa; 4941 } 4942 mtx_pool_unlock(mtxpool_sleep, fp); 4943 break; 4944 case POSIX_FADV_NORMAL: 4945 /* 4946 * If a the "normal" region overlaps with an existing 4947 * non-standard region, trim or remove the 4948 * non-standard region. 4949 */ 4950 mtx_pool_lock(mtxpool_sleep, fp); 4951 fa = fp->f_advice; 4952 if (fa != NULL) { 4953 if (offset <= fa->fa_start && end >= fa->fa_end) { 4954 new = fa; 4955 fp->f_advice = NULL; 4956 } else if (offset <= fa->fa_start && 4957 end >= fa->fa_start) 4958 fa->fa_start = end + 1; 4959 else if (offset <= fa->fa_end && end >= fa->fa_end) 4960 fa->fa_end = offset - 1; 4961 else if (offset >= fa->fa_start && end <= fa->fa_end) { 4962 /* 4963 * If the "normal" region is a middle 4964 * portion of the existing 4965 * non-standard region, just remove 4966 * the whole thing rather than picking 4967 * one side or the other to 4968 * preserve. 4969 */ 4970 new = fa; 4971 fp->f_advice = NULL; 4972 } 4973 } 4974 mtx_pool_unlock(mtxpool_sleep, fp); 4975 break; 4976 case POSIX_FADV_WILLNEED: 4977 case POSIX_FADV_DONTNEED: 4978 error = VOP_ADVISE(vp, offset, end, advice); 4979 break; 4980 } 4981 out: 4982 if (fp != NULL) 4983 fdrop(fp, td); 4984 free(new, M_FADVISE); 4985 return (error); 4986 } 4987 4988 int 4989 sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap) 4990 { 4991 int error; 4992 4993 error = kern_posix_fadvise(td, uap->fd, uap->offset, uap->len, 4994 uap->advice); 4995 return (kern_posix_error(td, error)); 4996 } 4997 4998 int 4999 kern_copy_file_range(struct thread *td, int infd, off_t *inoffp, int outfd, 5000 off_t *outoffp, size_t len, unsigned int flags) 5001 { 5002 struct file *infp, *infp1, *outfp, *outfp1; 5003 struct vnode *invp, *outvp; 5004 int error; 5005 size_t retlen; 5006 void *rl_rcookie, *rl_wcookie; 5007 off_t inoff, outoff, savinoff, savoutoff; 5008 bool foffsets_locked; 5009 5010 infp = outfp = NULL; 5011 rl_rcookie = rl_wcookie = NULL; 5012 foffsets_locked = false; 5013 error = 0; 5014 retlen = 0; 5015 5016 if (flags != 0) { 5017 error = EINVAL; 5018 goto out; 5019 } 5020 if (len > SSIZE_MAX) 5021 /* 5022 * Although the len argument is size_t, the return argument 5023 * is ssize_t (which is signed). Therefore a size that won't 5024 * fit in ssize_t can't be returned. 5025 */ 5026 len = SSIZE_MAX; 5027 5028 /* Get the file structures for the file descriptors. */ 5029 error = fget_read(td, infd, 5030 inoffp != NULL ? &cap_pread_rights : &cap_read_rights, &infp); 5031 if (error != 0) 5032 goto out; 5033 if (infp->f_ops == &badfileops) { 5034 error = EBADF; 5035 goto out; 5036 } 5037 if (infp->f_vnode == NULL) { 5038 error = EINVAL; 5039 goto out; 5040 } 5041 error = fget_write(td, outfd, 5042 outoffp != NULL ? &cap_pwrite_rights : &cap_write_rights, &outfp); 5043 if (error != 0) 5044 goto out; 5045 if (outfp->f_ops == &badfileops) { 5046 error = EBADF; 5047 goto out; 5048 } 5049 if (outfp->f_vnode == NULL) { 5050 error = EINVAL; 5051 goto out; 5052 } 5053 5054 /* 5055 * Figure out which file offsets we're reading from and writing to. 5056 * If the offsets come from the file descriptions, we need to lock them, 5057 * and locking both offsets requires a loop to avoid deadlocks. 5058 */ 5059 infp1 = outfp1 = NULL; 5060 if (inoffp != NULL) 5061 inoff = *inoffp; 5062 else 5063 infp1 = infp; 5064 if (outoffp != NULL) 5065 outoff = *outoffp; 5066 else 5067 outfp1 = outfp; 5068 if (infp1 != NULL || outfp1 != NULL) { 5069 if (infp1 == outfp1) { 5070 /* 5071 * Overlapping ranges are not allowed. A more thorough 5072 * check appears below, but we must not lock the same 5073 * offset twice. 5074 */ 5075 error = EINVAL; 5076 goto out; 5077 } 5078 foffset_lock_pair(infp1, &inoff, outfp1, &outoff, 0); 5079 foffsets_locked = true; 5080 } 5081 savinoff = inoff; 5082 savoutoff = outoff; 5083 5084 invp = infp->f_vnode; 5085 outvp = outfp->f_vnode; 5086 /* Sanity check the f_flag bits. */ 5087 if ((outfp->f_flag & (FWRITE | FAPPEND)) != FWRITE || 5088 (infp->f_flag & FREAD) == 0) { 5089 error = EBADF; 5090 goto out; 5091 } 5092 5093 /* If len == 0, just return 0. */ 5094 if (len == 0) 5095 goto out; 5096 5097 /* 5098 * Make sure that the ranges we check and lock below are valid. Note 5099 * that len is clamped to SSIZE_MAX above. 5100 */ 5101 if (inoff < 0 || outoff < 0) { 5102 error = EINVAL; 5103 goto out; 5104 } 5105 5106 /* 5107 * If infp and outfp refer to the same file, the byte ranges cannot 5108 * overlap. 5109 */ 5110 if (invp == outvp) { 5111 if ((inoff <= outoff && inoff + len > outoff) || 5112 (inoff > outoff && outoff + len > inoff)) { 5113 error = EINVAL; 5114 goto out; 5115 } 5116 rangelock_may_recurse(&invp->v_rl); 5117 } 5118 5119 /* Range lock the byte ranges for both invp and outvp. */ 5120 for (;;) { 5121 rl_wcookie = vn_rangelock_wlock(outvp, outoff, outoff + len); 5122 rl_rcookie = vn_rangelock_tryrlock(invp, inoff, inoff + len); 5123 if (rl_rcookie != NULL) 5124 break; 5125 vn_rangelock_unlock(outvp, rl_wcookie); 5126 rl_rcookie = vn_rangelock_rlock(invp, inoff, inoff + len); 5127 vn_rangelock_unlock(invp, rl_rcookie); 5128 } 5129 5130 retlen = len; 5131 error = vn_copy_file_range(invp, &inoff, outvp, &outoff, &retlen, 5132 flags, infp->f_cred, outfp->f_cred, td); 5133 out: 5134 if (rl_rcookie != NULL) 5135 vn_rangelock_unlock(invp, rl_rcookie); 5136 if (rl_wcookie != NULL) 5137 vn_rangelock_unlock(outvp, rl_wcookie); 5138 if (foffsets_locked) { 5139 if (error == EINTR || error == ERESTART) { 5140 inoff = savinoff; 5141 outoff = savoutoff; 5142 } 5143 if (inoffp == NULL) 5144 foffset_unlock(infp, inoff, 0); 5145 else 5146 *inoffp = inoff; 5147 if (outoffp == NULL) 5148 foffset_unlock(outfp, outoff, 0); 5149 else 5150 *outoffp = outoff; 5151 } 5152 if (outfp != NULL) 5153 fdrop(outfp, td); 5154 if (infp != NULL) 5155 fdrop(infp, td); 5156 td->td_retval[0] = retlen; 5157 return (error); 5158 } 5159 5160 int 5161 sys_copy_file_range(struct thread *td, struct copy_file_range_args *uap) 5162 { 5163 off_t inoff, outoff, *inoffp, *outoffp; 5164 int error; 5165 5166 inoffp = outoffp = NULL; 5167 if (uap->inoffp != NULL) { 5168 error = copyin(uap->inoffp, &inoff, sizeof(off_t)); 5169 if (error != 0) 5170 return (error); 5171 inoffp = &inoff; 5172 } 5173 if (uap->outoffp != NULL) { 5174 error = copyin(uap->outoffp, &outoff, sizeof(off_t)); 5175 if (error != 0) 5176 return (error); 5177 outoffp = &outoff; 5178 } 5179 error = kern_copy_file_range(td, uap->infd, inoffp, uap->outfd, 5180 outoffp, uap->len, uap->flags); 5181 if (error == 0 && uap->inoffp != NULL) 5182 error = copyout(inoffp, uap->inoffp, sizeof(off_t)); 5183 if (error == 0 && uap->outoffp != NULL) 5184 error = copyout(outoffp, uap->outoffp, sizeof(off_t)); 5185 return (error); 5186 } 5187