1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 37 */ 38 39 #include <sys/cdefs.h> 40 __FBSDID("$FreeBSD$"); 41 42 #include "opt_capsicum.h" 43 #include "opt_ktrace.h" 44 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 #ifdef COMPAT_FREEBSD11 48 #include <sys/abi_compat.h> 49 #endif 50 #include <sys/bio.h> 51 #include <sys/buf.h> 52 #include <sys/capsicum.h> 53 #include <sys/disk.h> 54 #include <sys/malloc.h> 55 #include <sys/mount.h> 56 #include <sys/mutex.h> 57 #include <sys/sysproto.h> 58 #include <sys/namei.h> 59 #include <sys/filedesc.h> 60 #include <sys/kernel.h> 61 #include <sys/fcntl.h> 62 #include <sys/file.h> 63 #include <sys/filio.h> 64 #include <sys/limits.h> 65 #include <sys/linker.h> 66 #include <sys/rwlock.h> 67 #include <sys/sdt.h> 68 #include <sys/stat.h> 69 #include <sys/sx.h> 70 #include <sys/unistd.h> 71 #include <sys/vnode.h> 72 #include <sys/priv.h> 73 #include <sys/proc.h> 74 #include <sys/dirent.h> 75 #include <sys/jail.h> 76 #include <sys/syscallsubr.h> 77 #include <sys/sysctl.h> 78 #ifdef KTRACE 79 #include <sys/ktrace.h> 80 #endif 81 82 #include <machine/stdarg.h> 83 84 #include <security/audit/audit.h> 85 #include <security/mac/mac_framework.h> 86 87 #include <vm/vm.h> 88 #include <vm/vm_object.h> 89 #include <vm/vm_page.h> 90 #include <vm/uma.h> 91 92 #include <fs/devfs/devfs.h> 93 94 MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information"); 95 96 static int kern_chflagsat(struct thread *td, int fd, const char *path, 97 enum uio_seg pathseg, u_long flags, int atflag); 98 static int setfflags(struct thread *td, struct vnode *, u_long); 99 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *); 100 static int getutimens(const struct timespec *, enum uio_seg, 101 struct timespec *, int *); 102 static int setutimes(struct thread *td, struct vnode *, 103 const struct timespec *, int, int); 104 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 105 struct thread *td); 106 static int kern_fhlinkat(struct thread *td, int fd, const char *path, 107 enum uio_seg pathseg, fhandle_t *fhp); 108 static int kern_readlink_vp(struct vnode *vp, char *buf, enum uio_seg bufseg, 109 size_t count, struct thread *td); 110 static int kern_linkat_vp(struct thread *td, struct vnode *vp, int fd, 111 const char *path, enum uio_seg segflag); 112 113 static uint64_t 114 at2cnpflags(u_int at_flags, u_int mask) 115 { 116 uint64_t res; 117 118 MPASS((at_flags & (AT_SYMLINK_FOLLOW | AT_SYMLINK_NOFOLLOW)) != 119 (AT_SYMLINK_FOLLOW | AT_SYMLINK_NOFOLLOW)); 120 121 res = 0; 122 at_flags &= mask; 123 if ((at_flags & AT_RESOLVE_BENEATH) != 0) 124 res |= RBENEATH; 125 if ((at_flags & AT_SYMLINK_FOLLOW) != 0) 126 res |= FOLLOW; 127 /* NOFOLLOW is pseudo flag */ 128 if ((mask & AT_SYMLINK_NOFOLLOW) != 0) { 129 res |= (at_flags & AT_SYMLINK_NOFOLLOW) != 0 ? NOFOLLOW : 130 FOLLOW; 131 } 132 if ((mask & AT_EMPTY_PATH) != 0 && (at_flags & AT_EMPTY_PATH) != 0) 133 res |= EMPTYPATH; 134 return (res); 135 } 136 137 int 138 kern_sync(struct thread *td) 139 { 140 struct mount *mp, *nmp; 141 int save; 142 143 mtx_lock(&mountlist_mtx); 144 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 145 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 146 nmp = TAILQ_NEXT(mp, mnt_list); 147 continue; 148 } 149 if ((mp->mnt_flag & MNT_RDONLY) == 0 && 150 vn_start_write(NULL, &mp, V_NOWAIT) == 0) { 151 save = curthread_pflags_set(TDP_SYNCIO); 152 vfs_periodic(mp, MNT_NOWAIT); 153 VFS_SYNC(mp, MNT_NOWAIT); 154 curthread_pflags_restore(save); 155 vn_finished_write(mp); 156 } 157 mtx_lock(&mountlist_mtx); 158 nmp = TAILQ_NEXT(mp, mnt_list); 159 vfs_unbusy(mp); 160 } 161 mtx_unlock(&mountlist_mtx); 162 return (0); 163 } 164 165 /* 166 * Sync each mounted filesystem. 167 */ 168 #ifndef _SYS_SYSPROTO_H_ 169 struct sync_args { 170 int dummy; 171 }; 172 #endif 173 /* ARGSUSED */ 174 int 175 sys_sync(struct thread *td, struct sync_args *uap) 176 { 177 178 return (kern_sync(td)); 179 } 180 181 /* 182 * Change filesystem quotas. 183 */ 184 #ifndef _SYS_SYSPROTO_H_ 185 struct quotactl_args { 186 char *path; 187 int cmd; 188 int uid; 189 caddr_t arg; 190 }; 191 #endif 192 int 193 sys_quotactl(struct thread *td, struct quotactl_args *uap) 194 { 195 struct mount *mp; 196 struct nameidata nd; 197 int error; 198 bool mp_busy; 199 200 AUDIT_ARG_CMD(uap->cmd); 201 AUDIT_ARG_UID(uap->uid); 202 if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS)) 203 return (EPERM); 204 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 205 uap->path); 206 if ((error = namei(&nd)) != 0) 207 return (error); 208 NDFREE_PNBUF(&nd); 209 mp = nd.ni_vp->v_mount; 210 vfs_ref(mp); 211 vput(nd.ni_vp); 212 error = vfs_busy(mp, 0); 213 if (error != 0) { 214 vfs_rel(mp); 215 return (error); 216 } 217 mp_busy = true; 218 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg, &mp_busy); 219 220 /* 221 * Since quota on/off operations typically need to open quota 222 * files, the implementation may need to unbusy the mount point 223 * before calling into namei. Otherwise, unmount might be 224 * started between two vfs_busy() invocations (first is ours, 225 * second is from mount point cross-walk code in lookup()), 226 * causing deadlock. 227 * 228 * Avoid unbusying mp if the implementation indicates it has 229 * already done so. 230 */ 231 if (mp_busy) 232 vfs_unbusy(mp); 233 vfs_rel(mp); 234 return (error); 235 } 236 237 /* 238 * Used by statfs conversion routines to scale the block size up if 239 * necessary so that all of the block counts are <= 'max_size'. Note 240 * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero 241 * value of 'n'. 242 */ 243 void 244 statfs_scale_blocks(struct statfs *sf, long max_size) 245 { 246 uint64_t count; 247 int shift; 248 249 KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__)); 250 251 /* 252 * Attempt to scale the block counts to give a more accurate 253 * overview to userland of the ratio of free space to used 254 * space. To do this, find the largest block count and compute 255 * a divisor that lets it fit into a signed integer <= max_size. 256 */ 257 if (sf->f_bavail < 0) 258 count = -sf->f_bavail; 259 else 260 count = sf->f_bavail; 261 count = MAX(sf->f_blocks, MAX(sf->f_bfree, count)); 262 if (count <= max_size) 263 return; 264 265 count >>= flsl(max_size); 266 shift = 0; 267 while (count > 0) { 268 shift++; 269 count >>=1; 270 } 271 272 sf->f_bsize <<= shift; 273 sf->f_blocks >>= shift; 274 sf->f_bfree >>= shift; 275 sf->f_bavail >>= shift; 276 } 277 278 static int 279 kern_do_statfs(struct thread *td, struct mount *mp, struct statfs *buf) 280 { 281 int error; 282 283 if (mp == NULL) 284 return (EBADF); 285 error = vfs_busy(mp, 0); 286 vfs_rel(mp); 287 if (error != 0) 288 return (error); 289 #ifdef MAC 290 error = mac_mount_check_stat(td->td_ucred, mp); 291 if (error != 0) 292 goto out; 293 #endif 294 error = VFS_STATFS(mp, buf); 295 if (error != 0) 296 goto out; 297 if (priv_check_cred_vfs_generation(td->td_ucred)) { 298 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 299 prison_enforce_statfs(td->td_ucred, mp, buf); 300 } 301 out: 302 vfs_unbusy(mp); 303 return (error); 304 } 305 306 /* 307 * Get filesystem statistics. 308 */ 309 #ifndef _SYS_SYSPROTO_H_ 310 struct statfs_args { 311 char *path; 312 struct statfs *buf; 313 }; 314 #endif 315 int 316 sys_statfs(struct thread *td, struct statfs_args *uap) 317 { 318 struct statfs *sfp; 319 int error; 320 321 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 322 error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); 323 if (error == 0) 324 error = copyout(sfp, uap->buf, sizeof(struct statfs)); 325 free(sfp, M_STATFS); 326 return (error); 327 } 328 329 int 330 kern_statfs(struct thread *td, const char *path, enum uio_seg pathseg, 331 struct statfs *buf) 332 { 333 struct mount *mp; 334 struct nameidata nd; 335 int error; 336 337 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path); 338 error = namei(&nd); 339 if (error != 0) 340 return (error); 341 mp = vfs_ref_from_vp(nd.ni_vp); 342 NDFREE_NOTHING(&nd); 343 vrele(nd.ni_vp); 344 return (kern_do_statfs(td, mp, buf)); 345 } 346 347 /* 348 * Get filesystem statistics. 349 */ 350 #ifndef _SYS_SYSPROTO_H_ 351 struct fstatfs_args { 352 int fd; 353 struct statfs *buf; 354 }; 355 #endif 356 int 357 sys_fstatfs(struct thread *td, struct fstatfs_args *uap) 358 { 359 struct statfs *sfp; 360 int error; 361 362 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 363 error = kern_fstatfs(td, uap->fd, sfp); 364 if (error == 0) 365 error = copyout(sfp, uap->buf, sizeof(struct statfs)); 366 free(sfp, M_STATFS); 367 return (error); 368 } 369 370 int 371 kern_fstatfs(struct thread *td, int fd, struct statfs *buf) 372 { 373 struct file *fp; 374 struct mount *mp; 375 struct vnode *vp; 376 int error; 377 378 AUDIT_ARG_FD(fd); 379 error = getvnode_path(td, fd, &cap_fstatfs_rights, &fp); 380 if (error != 0) 381 return (error); 382 vp = fp->f_vnode; 383 #ifdef AUDIT 384 if (AUDITING_TD(td)) { 385 vn_lock(vp, LK_SHARED | LK_RETRY); 386 AUDIT_ARG_VNODE1(vp); 387 VOP_UNLOCK(vp); 388 } 389 #endif 390 mp = vfs_ref_from_vp(vp); 391 fdrop(fp, td); 392 return (kern_do_statfs(td, mp, buf)); 393 } 394 395 /* 396 * Get statistics on all filesystems. 397 */ 398 #ifndef _SYS_SYSPROTO_H_ 399 struct getfsstat_args { 400 struct statfs *buf; 401 long bufsize; 402 int mode; 403 }; 404 #endif 405 int 406 sys_getfsstat(struct thread *td, struct getfsstat_args *uap) 407 { 408 size_t count; 409 int error; 410 411 if (uap->bufsize < 0 || uap->bufsize > SIZE_MAX) 412 return (EINVAL); 413 error = kern_getfsstat(td, &uap->buf, uap->bufsize, &count, 414 UIO_USERSPACE, uap->mode); 415 if (error == 0) 416 td->td_retval[0] = count; 417 return (error); 418 } 419 420 /* 421 * If (bufsize > 0 && bufseg == UIO_SYSSPACE) 422 * The caller is responsible for freeing memory which will be allocated 423 * in '*buf'. 424 */ 425 int 426 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize, 427 size_t *countp, enum uio_seg bufseg, int mode) 428 { 429 struct mount *mp, *nmp; 430 struct statfs *sfsp, *sp, *sptmp, *tofree; 431 size_t count, maxcount; 432 int error; 433 434 switch (mode) { 435 case MNT_WAIT: 436 case MNT_NOWAIT: 437 break; 438 default: 439 if (bufseg == UIO_SYSSPACE) 440 *buf = NULL; 441 return (EINVAL); 442 } 443 restart: 444 maxcount = bufsize / sizeof(struct statfs); 445 if (bufsize == 0) { 446 sfsp = NULL; 447 tofree = NULL; 448 } else if (bufseg == UIO_USERSPACE) { 449 sfsp = *buf; 450 tofree = NULL; 451 } else /* if (bufseg == UIO_SYSSPACE) */ { 452 count = 0; 453 mtx_lock(&mountlist_mtx); 454 TAILQ_FOREACH(mp, &mountlist, mnt_list) { 455 count++; 456 } 457 mtx_unlock(&mountlist_mtx); 458 if (maxcount > count) 459 maxcount = count; 460 tofree = sfsp = *buf = malloc(maxcount * sizeof(struct statfs), 461 M_STATFS, M_WAITOK); 462 } 463 464 count = 0; 465 466 /* 467 * If there is no target buffer they only want the count. 468 * 469 * This could be TAILQ_FOREACH but it is open-coded to match the original 470 * code below. 471 */ 472 if (sfsp == NULL) { 473 mtx_lock(&mountlist_mtx); 474 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 475 if (prison_canseemount(td->td_ucred, mp) != 0) { 476 nmp = TAILQ_NEXT(mp, mnt_list); 477 continue; 478 } 479 #ifdef MAC 480 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 481 nmp = TAILQ_NEXT(mp, mnt_list); 482 continue; 483 } 484 #endif 485 count++; 486 nmp = TAILQ_NEXT(mp, mnt_list); 487 } 488 mtx_unlock(&mountlist_mtx); 489 *countp = count; 490 return (0); 491 } 492 493 /* 494 * They want the entire thing. 495 * 496 * Short-circuit the corner case of no room for anything, avoids 497 * relocking below. 498 */ 499 if (maxcount < 1) { 500 goto out; 501 } 502 503 mtx_lock(&mountlist_mtx); 504 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 505 if (prison_canseemount(td->td_ucred, mp) != 0) { 506 nmp = TAILQ_NEXT(mp, mnt_list); 507 continue; 508 } 509 #ifdef MAC 510 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 511 nmp = TAILQ_NEXT(mp, mnt_list); 512 continue; 513 } 514 #endif 515 if (mode == MNT_WAIT) { 516 if (vfs_busy(mp, MBF_MNTLSTLOCK) != 0) { 517 /* 518 * If vfs_busy() failed, and MBF_NOWAIT 519 * wasn't passed, then the mp is gone. 520 * Furthermore, because of MBF_MNTLSTLOCK, 521 * the mountlist_mtx was dropped. We have 522 * no other choice than to start over. 523 */ 524 mtx_unlock(&mountlist_mtx); 525 free(tofree, M_STATFS); 526 goto restart; 527 } 528 } else { 529 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK) != 0) { 530 nmp = TAILQ_NEXT(mp, mnt_list); 531 continue; 532 } 533 } 534 sp = &mp->mnt_stat; 535 /* 536 * If MNT_NOWAIT is specified, do not refresh 537 * the fsstat cache. 538 */ 539 if (mode != MNT_NOWAIT) { 540 error = VFS_STATFS(mp, sp); 541 if (error != 0) { 542 mtx_lock(&mountlist_mtx); 543 nmp = TAILQ_NEXT(mp, mnt_list); 544 vfs_unbusy(mp); 545 continue; 546 } 547 } 548 if (priv_check_cred_vfs_generation(td->td_ucred)) { 549 sptmp = malloc(sizeof(struct statfs), M_STATFS, 550 M_WAITOK); 551 *sptmp = *sp; 552 sptmp->f_fsid.val[0] = sptmp->f_fsid.val[1] = 0; 553 prison_enforce_statfs(td->td_ucred, mp, sptmp); 554 sp = sptmp; 555 } else 556 sptmp = NULL; 557 if (bufseg == UIO_SYSSPACE) { 558 bcopy(sp, sfsp, sizeof(*sp)); 559 free(sptmp, M_STATFS); 560 } else /* if (bufseg == UIO_USERSPACE) */ { 561 error = copyout(sp, sfsp, sizeof(*sp)); 562 free(sptmp, M_STATFS); 563 if (error != 0) { 564 vfs_unbusy(mp); 565 return (error); 566 } 567 } 568 sfsp++; 569 count++; 570 571 if (count == maxcount) { 572 vfs_unbusy(mp); 573 goto out; 574 } 575 576 mtx_lock(&mountlist_mtx); 577 nmp = TAILQ_NEXT(mp, mnt_list); 578 vfs_unbusy(mp); 579 } 580 mtx_unlock(&mountlist_mtx); 581 out: 582 *countp = count; 583 return (0); 584 } 585 586 #ifdef COMPAT_FREEBSD4 587 /* 588 * Get old format filesystem statistics. 589 */ 590 static void freebsd4_cvtstatfs(struct statfs *, struct ostatfs *); 591 592 #ifndef _SYS_SYSPROTO_H_ 593 struct freebsd4_statfs_args { 594 char *path; 595 struct ostatfs *buf; 596 }; 597 #endif 598 int 599 freebsd4_statfs(struct thread *td, struct freebsd4_statfs_args *uap) 600 { 601 struct ostatfs osb; 602 struct statfs *sfp; 603 int error; 604 605 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 606 error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); 607 if (error == 0) { 608 freebsd4_cvtstatfs(sfp, &osb); 609 error = copyout(&osb, uap->buf, sizeof(osb)); 610 } 611 free(sfp, M_STATFS); 612 return (error); 613 } 614 615 /* 616 * Get filesystem statistics. 617 */ 618 #ifndef _SYS_SYSPROTO_H_ 619 struct freebsd4_fstatfs_args { 620 int fd; 621 struct ostatfs *buf; 622 }; 623 #endif 624 int 625 freebsd4_fstatfs(struct thread *td, struct freebsd4_fstatfs_args *uap) 626 { 627 struct ostatfs osb; 628 struct statfs *sfp; 629 int error; 630 631 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 632 error = kern_fstatfs(td, uap->fd, sfp); 633 if (error == 0) { 634 freebsd4_cvtstatfs(sfp, &osb); 635 error = copyout(&osb, uap->buf, sizeof(osb)); 636 } 637 free(sfp, M_STATFS); 638 return (error); 639 } 640 641 /* 642 * Get statistics on all filesystems. 643 */ 644 #ifndef _SYS_SYSPROTO_H_ 645 struct freebsd4_getfsstat_args { 646 struct ostatfs *buf; 647 long bufsize; 648 int mode; 649 }; 650 #endif 651 int 652 freebsd4_getfsstat(struct thread *td, struct freebsd4_getfsstat_args *uap) 653 { 654 struct statfs *buf, *sp; 655 struct ostatfs osb; 656 size_t count, size; 657 int error; 658 659 if (uap->bufsize < 0) 660 return (EINVAL); 661 count = uap->bufsize / sizeof(struct ostatfs); 662 if (count > SIZE_MAX / sizeof(struct statfs)) 663 return (EINVAL); 664 size = count * sizeof(struct statfs); 665 error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, 666 uap->mode); 667 if (error == 0) 668 td->td_retval[0] = count; 669 if (size != 0) { 670 sp = buf; 671 while (count != 0 && error == 0) { 672 freebsd4_cvtstatfs(sp, &osb); 673 error = copyout(&osb, uap->buf, sizeof(osb)); 674 sp++; 675 uap->buf++; 676 count--; 677 } 678 free(buf, M_STATFS); 679 } 680 return (error); 681 } 682 683 /* 684 * Implement fstatfs() for (NFS) file handles. 685 */ 686 #ifndef _SYS_SYSPROTO_H_ 687 struct freebsd4_fhstatfs_args { 688 struct fhandle *u_fhp; 689 struct ostatfs *buf; 690 }; 691 #endif 692 int 693 freebsd4_fhstatfs(struct thread *td, struct freebsd4_fhstatfs_args *uap) 694 { 695 struct ostatfs osb; 696 struct statfs *sfp; 697 fhandle_t fh; 698 int error; 699 700 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 701 if (error != 0) 702 return (error); 703 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 704 error = kern_fhstatfs(td, fh, sfp); 705 if (error == 0) { 706 freebsd4_cvtstatfs(sfp, &osb); 707 error = copyout(&osb, uap->buf, sizeof(osb)); 708 } 709 free(sfp, M_STATFS); 710 return (error); 711 } 712 713 /* 714 * Convert a new format statfs structure to an old format statfs structure. 715 */ 716 static void 717 freebsd4_cvtstatfs(struct statfs *nsp, struct ostatfs *osp) 718 { 719 720 statfs_scale_blocks(nsp, LONG_MAX); 721 bzero(osp, sizeof(*osp)); 722 osp->f_bsize = nsp->f_bsize; 723 osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX); 724 osp->f_blocks = nsp->f_blocks; 725 osp->f_bfree = nsp->f_bfree; 726 osp->f_bavail = nsp->f_bavail; 727 osp->f_files = MIN(nsp->f_files, LONG_MAX); 728 osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX); 729 osp->f_owner = nsp->f_owner; 730 osp->f_type = nsp->f_type; 731 osp->f_flags = nsp->f_flags; 732 osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX); 733 osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX); 734 osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX); 735 osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX); 736 strlcpy(osp->f_fstypename, nsp->f_fstypename, 737 MIN(MFSNAMELEN, OMFSNAMELEN)); 738 strlcpy(osp->f_mntonname, nsp->f_mntonname, 739 MIN(MNAMELEN, OMNAMELEN)); 740 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 741 MIN(MNAMELEN, OMNAMELEN)); 742 osp->f_fsid = nsp->f_fsid; 743 } 744 #endif /* COMPAT_FREEBSD4 */ 745 746 #if defined(COMPAT_FREEBSD11) 747 /* 748 * Get old format filesystem statistics. 749 */ 750 static void freebsd11_cvtstatfs(struct statfs *, struct freebsd11_statfs *); 751 752 int 753 freebsd11_statfs(struct thread *td, struct freebsd11_statfs_args *uap) 754 { 755 struct freebsd11_statfs osb; 756 struct statfs *sfp; 757 int error; 758 759 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 760 error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); 761 if (error == 0) { 762 freebsd11_cvtstatfs(sfp, &osb); 763 error = copyout(&osb, uap->buf, sizeof(osb)); 764 } 765 free(sfp, M_STATFS); 766 return (error); 767 } 768 769 /* 770 * Get filesystem statistics. 771 */ 772 int 773 freebsd11_fstatfs(struct thread *td, struct freebsd11_fstatfs_args *uap) 774 { 775 struct freebsd11_statfs osb; 776 struct statfs *sfp; 777 int error; 778 779 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 780 error = kern_fstatfs(td, uap->fd, sfp); 781 if (error == 0) { 782 freebsd11_cvtstatfs(sfp, &osb); 783 error = copyout(&osb, uap->buf, sizeof(osb)); 784 } 785 free(sfp, M_STATFS); 786 return (error); 787 } 788 789 /* 790 * Get statistics on all filesystems. 791 */ 792 int 793 freebsd11_getfsstat(struct thread *td, struct freebsd11_getfsstat_args *uap) 794 { 795 return (kern_freebsd11_getfsstat(td, uap->buf, uap->bufsize, uap->mode)); 796 } 797 798 int 799 kern_freebsd11_getfsstat(struct thread *td, struct freebsd11_statfs * ubuf, 800 long bufsize, int mode) 801 { 802 struct freebsd11_statfs osb; 803 struct statfs *buf, *sp; 804 size_t count, size; 805 int error; 806 807 if (bufsize < 0) 808 return (EINVAL); 809 810 count = bufsize / sizeof(struct ostatfs); 811 size = count * sizeof(struct statfs); 812 error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, mode); 813 if (error == 0) 814 td->td_retval[0] = count; 815 if (size > 0) { 816 sp = buf; 817 while (count > 0 && error == 0) { 818 freebsd11_cvtstatfs(sp, &osb); 819 error = copyout(&osb, ubuf, sizeof(osb)); 820 sp++; 821 ubuf++; 822 count--; 823 } 824 free(buf, M_STATFS); 825 } 826 return (error); 827 } 828 829 /* 830 * Implement fstatfs() for (NFS) file handles. 831 */ 832 int 833 freebsd11_fhstatfs(struct thread *td, struct freebsd11_fhstatfs_args *uap) 834 { 835 struct freebsd11_statfs osb; 836 struct statfs *sfp; 837 fhandle_t fh; 838 int error; 839 840 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 841 if (error) 842 return (error); 843 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 844 error = kern_fhstatfs(td, fh, sfp); 845 if (error == 0) { 846 freebsd11_cvtstatfs(sfp, &osb); 847 error = copyout(&osb, uap->buf, sizeof(osb)); 848 } 849 free(sfp, M_STATFS); 850 return (error); 851 } 852 853 /* 854 * Convert a new format statfs structure to an old format statfs structure. 855 */ 856 static void 857 freebsd11_cvtstatfs(struct statfs *nsp, struct freebsd11_statfs *osp) 858 { 859 860 bzero(osp, sizeof(*osp)); 861 osp->f_version = FREEBSD11_STATFS_VERSION; 862 osp->f_type = nsp->f_type; 863 osp->f_flags = nsp->f_flags; 864 osp->f_bsize = nsp->f_bsize; 865 osp->f_iosize = nsp->f_iosize; 866 osp->f_blocks = nsp->f_blocks; 867 osp->f_bfree = nsp->f_bfree; 868 osp->f_bavail = nsp->f_bavail; 869 osp->f_files = nsp->f_files; 870 osp->f_ffree = nsp->f_ffree; 871 osp->f_syncwrites = nsp->f_syncwrites; 872 osp->f_asyncwrites = nsp->f_asyncwrites; 873 osp->f_syncreads = nsp->f_syncreads; 874 osp->f_asyncreads = nsp->f_asyncreads; 875 osp->f_namemax = nsp->f_namemax; 876 osp->f_owner = nsp->f_owner; 877 osp->f_fsid = nsp->f_fsid; 878 strlcpy(osp->f_fstypename, nsp->f_fstypename, 879 MIN(MFSNAMELEN, sizeof(osp->f_fstypename))); 880 strlcpy(osp->f_mntonname, nsp->f_mntonname, 881 MIN(MNAMELEN, sizeof(osp->f_mntonname))); 882 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 883 MIN(MNAMELEN, sizeof(osp->f_mntfromname))); 884 } 885 #endif /* COMPAT_FREEBSD11 */ 886 887 /* 888 * Change current working directory to a given file descriptor. 889 */ 890 #ifndef _SYS_SYSPROTO_H_ 891 struct fchdir_args { 892 int fd; 893 }; 894 #endif 895 int 896 sys_fchdir(struct thread *td, struct fchdir_args *uap) 897 { 898 struct vnode *vp, *tdp; 899 struct mount *mp; 900 struct file *fp; 901 int error; 902 903 AUDIT_ARG_FD(uap->fd); 904 error = getvnode_path(td, uap->fd, &cap_fchdir_rights, 905 &fp); 906 if (error != 0) 907 return (error); 908 vp = fp->f_vnode; 909 vrefact(vp); 910 fdrop(fp, td); 911 vn_lock(vp, LK_SHARED | LK_RETRY); 912 AUDIT_ARG_VNODE1(vp); 913 error = change_dir(vp, td); 914 while (!error && (mp = vp->v_mountedhere) != NULL) { 915 if (vfs_busy(mp, 0)) 916 continue; 917 error = VFS_ROOT(mp, LK_SHARED, &tdp); 918 vfs_unbusy(mp); 919 if (error != 0) 920 break; 921 vput(vp); 922 vp = tdp; 923 } 924 if (error != 0) { 925 vput(vp); 926 return (error); 927 } 928 VOP_UNLOCK(vp); 929 pwd_chdir(td, vp); 930 return (0); 931 } 932 933 /* 934 * Change current working directory (``.''). 935 */ 936 #ifndef _SYS_SYSPROTO_H_ 937 struct chdir_args { 938 char *path; 939 }; 940 #endif 941 int 942 sys_chdir(struct thread *td, struct chdir_args *uap) 943 { 944 945 return (kern_chdir(td, uap->path, UIO_USERSPACE)); 946 } 947 948 int 949 kern_chdir(struct thread *td, const char *path, enum uio_seg pathseg) 950 { 951 struct nameidata nd; 952 int error; 953 954 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 955 pathseg, path); 956 if ((error = namei(&nd)) != 0) 957 return (error); 958 if ((error = change_dir(nd.ni_vp, td)) != 0) { 959 vput(nd.ni_vp); 960 NDFREE_NOTHING(&nd); 961 return (error); 962 } 963 VOP_UNLOCK(nd.ni_vp); 964 NDFREE_NOTHING(&nd); 965 pwd_chdir(td, nd.ni_vp); 966 return (0); 967 } 968 969 static int unprivileged_chroot = 0; 970 SYSCTL_INT(_security_bsd, OID_AUTO, unprivileged_chroot, CTLFLAG_RW, 971 &unprivileged_chroot, 0, 972 "Unprivileged processes can use chroot(2)"); 973 /* 974 * Change notion of root (``/'') directory. 975 */ 976 #ifndef _SYS_SYSPROTO_H_ 977 struct chroot_args { 978 char *path; 979 }; 980 #endif 981 int 982 sys_chroot(struct thread *td, struct chroot_args *uap) 983 { 984 struct nameidata nd; 985 struct proc *p; 986 int error; 987 988 error = priv_check(td, PRIV_VFS_CHROOT); 989 if (error != 0) { 990 p = td->td_proc; 991 PROC_LOCK(p); 992 if (unprivileged_chroot == 0 || 993 (p->p_flag2 & P2_NO_NEW_PRIVS) == 0) { 994 PROC_UNLOCK(p); 995 return (error); 996 } 997 PROC_UNLOCK(p); 998 } 999 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 1000 UIO_USERSPACE, uap->path); 1001 error = namei(&nd); 1002 if (error != 0) 1003 goto error; 1004 error = change_dir(nd.ni_vp, td); 1005 if (error != 0) 1006 goto e_vunlock; 1007 #ifdef MAC 1008 error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp); 1009 if (error != 0) 1010 goto e_vunlock; 1011 #endif 1012 VOP_UNLOCK(nd.ni_vp); 1013 error = pwd_chroot(td, nd.ni_vp); 1014 vrele(nd.ni_vp); 1015 NDFREE_NOTHING(&nd); 1016 return (error); 1017 e_vunlock: 1018 vput(nd.ni_vp); 1019 error: 1020 NDFREE_NOTHING(&nd); 1021 return (error); 1022 } 1023 1024 /* 1025 * Common routine for chroot and chdir. Callers must provide a locked vnode 1026 * instance. 1027 */ 1028 int 1029 change_dir(struct vnode *vp, struct thread *td) 1030 { 1031 #ifdef MAC 1032 int error; 1033 #endif 1034 1035 ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked"); 1036 if (vp->v_type != VDIR) 1037 return (ENOTDIR); 1038 #ifdef MAC 1039 error = mac_vnode_check_chdir(td->td_ucred, vp); 1040 if (error != 0) 1041 return (error); 1042 #endif 1043 return (VOP_ACCESS(vp, VEXEC, td->td_ucred, td)); 1044 } 1045 1046 static __inline void 1047 flags_to_rights(int flags, cap_rights_t *rightsp) 1048 { 1049 if (flags & O_EXEC) { 1050 cap_rights_set_one(rightsp, CAP_FEXECVE); 1051 if (flags & O_PATH) 1052 return; 1053 } else { 1054 switch ((flags & O_ACCMODE)) { 1055 case O_RDONLY: 1056 cap_rights_set_one(rightsp, CAP_READ); 1057 break; 1058 case O_RDWR: 1059 cap_rights_set_one(rightsp, CAP_READ); 1060 /* FALLTHROUGH */ 1061 case O_WRONLY: 1062 cap_rights_set_one(rightsp, CAP_WRITE); 1063 if (!(flags & (O_APPEND | O_TRUNC))) 1064 cap_rights_set_one(rightsp, CAP_SEEK); 1065 break; 1066 } 1067 } 1068 1069 if (flags & O_CREAT) 1070 cap_rights_set_one(rightsp, CAP_CREATE); 1071 1072 if (flags & O_TRUNC) 1073 cap_rights_set_one(rightsp, CAP_FTRUNCATE); 1074 1075 if (flags & (O_SYNC | O_FSYNC)) 1076 cap_rights_set_one(rightsp, CAP_FSYNC); 1077 1078 if (flags & (O_EXLOCK | O_SHLOCK)) 1079 cap_rights_set_one(rightsp, CAP_FLOCK); 1080 } 1081 1082 /* 1083 * Check permissions, allocate an open file structure, and call the device 1084 * open routine if any. 1085 */ 1086 #ifndef _SYS_SYSPROTO_H_ 1087 struct open_args { 1088 char *path; 1089 int flags; 1090 int mode; 1091 }; 1092 #endif 1093 int 1094 sys_open(struct thread *td, struct open_args *uap) 1095 { 1096 1097 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1098 uap->flags, uap->mode)); 1099 } 1100 1101 #ifndef _SYS_SYSPROTO_H_ 1102 struct openat_args { 1103 int fd; 1104 char *path; 1105 int flag; 1106 int mode; 1107 }; 1108 #endif 1109 int 1110 sys_openat(struct thread *td, struct openat_args *uap) 1111 { 1112 1113 AUDIT_ARG_FD(uap->fd); 1114 return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 1115 uap->mode)); 1116 } 1117 1118 int 1119 kern_openat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, 1120 int flags, int mode) 1121 { 1122 struct proc *p = td->td_proc; 1123 struct filedesc *fdp; 1124 struct pwddesc *pdp; 1125 struct file *fp; 1126 struct vnode *vp; 1127 struct nameidata nd; 1128 cap_rights_t rights; 1129 int cmode, error, indx; 1130 1131 indx = -1; 1132 fdp = p->p_fd; 1133 pdp = p->p_pd; 1134 1135 AUDIT_ARG_FFLAGS(flags); 1136 AUDIT_ARG_MODE(mode); 1137 cap_rights_init_one(&rights, CAP_LOOKUP); 1138 flags_to_rights(flags, &rights); 1139 1140 /* 1141 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags 1142 * may be specified. On the other hand, for O_PATH any mode 1143 * except O_EXEC is ignored. 1144 */ 1145 if ((flags & O_PATH) != 0) { 1146 flags &= ~(O_CREAT | O_ACCMODE); 1147 } else if ((flags & O_EXEC) != 0) { 1148 if (flags & O_ACCMODE) 1149 return (EINVAL); 1150 } else if ((flags & O_ACCMODE) == O_ACCMODE) { 1151 return (EINVAL); 1152 } else { 1153 flags = FFLAGS(flags); 1154 } 1155 1156 /* 1157 * Allocate a file structure. The descriptor to reference it 1158 * is allocated and used by finstall_refed() below. 1159 */ 1160 error = falloc_noinstall(td, &fp); 1161 if (error != 0) 1162 return (error); 1163 /* Set the flags early so the finit in devfs can pick them up. */ 1164 fp->f_flag = flags & FMASK; 1165 cmode = ((mode & ~pdp->pd_cmask) & ALLPERMS) & ~S_ISTXT; 1166 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1 | WANTIOCTLCAPS, 1167 pathseg, path, fd, &rights); 1168 td->td_dupfd = -1; /* XXX check for fdopen */ 1169 error = vn_open_cred(&nd, &flags, cmode, VN_OPEN_WANTIOCTLCAPS, 1170 td->td_ucred, fp); 1171 if (error != 0) { 1172 /* 1173 * If the vn_open replaced the method vector, something 1174 * wonderous happened deep below and we just pass it up 1175 * pretending we know what we do. 1176 */ 1177 if (error == ENXIO && fp->f_ops != &badfileops) { 1178 MPASS((flags & O_PATH) == 0); 1179 goto success; 1180 } 1181 1182 /* 1183 * Handle special fdopen() case. bleh. 1184 * 1185 * Don't do this for relative (capability) lookups; we don't 1186 * understand exactly what would happen, and we don't think 1187 * that it ever should. 1188 */ 1189 if ((nd.ni_resflags & NIRES_STRICTREL) == 0 && 1190 (error == ENODEV || error == ENXIO) && 1191 td->td_dupfd >= 0) { 1192 error = dupfdopen(td, fdp, td->td_dupfd, flags, error, 1193 &indx); 1194 if (error == 0) 1195 goto success; 1196 } 1197 1198 goto bad; 1199 } 1200 td->td_dupfd = 0; 1201 NDFREE_PNBUF(&nd); 1202 vp = nd.ni_vp; 1203 1204 /* 1205 * Store the vnode, for any f_type. Typically, the vnode use 1206 * count is decremented by direct call to vn_closefile() for 1207 * files that switched type in the cdevsw fdopen() method. 1208 */ 1209 fp->f_vnode = vp; 1210 1211 /* 1212 * If the file wasn't claimed by devfs bind it to the normal 1213 * vnode operations here. 1214 */ 1215 if (fp->f_ops == &badfileops) { 1216 KASSERT(vp->v_type != VFIFO || (flags & O_PATH) != 0, 1217 ("Unexpected fifo fp %p vp %p", fp, vp)); 1218 if ((flags & O_PATH) != 0) { 1219 finit(fp, (flags & FMASK) | (fp->f_flag & FKQALLOWED), 1220 DTYPE_VNODE, NULL, &path_fileops); 1221 } else { 1222 finit_vnode(fp, flags, NULL, &vnops); 1223 } 1224 } 1225 1226 VOP_UNLOCK(vp); 1227 if (flags & O_TRUNC) { 1228 error = fo_truncate(fp, 0, td->td_ucred, td); 1229 if (error != 0) 1230 goto bad; 1231 } 1232 success: 1233 /* 1234 * If we haven't already installed the FD (for dupfdopen), do so now. 1235 */ 1236 if (indx == -1) { 1237 struct filecaps *fcaps; 1238 1239 #ifdef CAPABILITIES 1240 if ((nd.ni_resflags & NIRES_STRICTREL) != 0) 1241 fcaps = &nd.ni_filecaps; 1242 else 1243 #endif 1244 fcaps = NULL; 1245 error = finstall_refed(td, fp, &indx, flags, fcaps); 1246 /* On success finstall_refed() consumes fcaps. */ 1247 if (error != 0) { 1248 goto bad; 1249 } 1250 } else { 1251 NDFREE_IOCTLCAPS(&nd); 1252 falloc_abort(td, fp); 1253 } 1254 1255 td->td_retval[0] = indx; 1256 return (0); 1257 bad: 1258 KASSERT(indx == -1, ("indx=%d, should be -1", indx)); 1259 NDFREE_IOCTLCAPS(&nd); 1260 falloc_abort(td, fp); 1261 return (error); 1262 } 1263 1264 #ifdef COMPAT_43 1265 /* 1266 * Create a file. 1267 */ 1268 #ifndef _SYS_SYSPROTO_H_ 1269 struct ocreat_args { 1270 char *path; 1271 int mode; 1272 }; 1273 #endif 1274 int 1275 ocreat(struct thread *td, struct ocreat_args *uap) 1276 { 1277 1278 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1279 O_WRONLY | O_CREAT | O_TRUNC, uap->mode)); 1280 } 1281 #endif /* COMPAT_43 */ 1282 1283 /* 1284 * Create a special file. 1285 */ 1286 #ifndef _SYS_SYSPROTO_H_ 1287 struct mknodat_args { 1288 int fd; 1289 char *path; 1290 mode_t mode; 1291 dev_t dev; 1292 }; 1293 #endif 1294 int 1295 sys_mknodat(struct thread *td, struct mknodat_args *uap) 1296 { 1297 1298 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1299 uap->dev)); 1300 } 1301 1302 #if defined(COMPAT_FREEBSD11) 1303 int 1304 freebsd11_mknod(struct thread *td, 1305 struct freebsd11_mknod_args *uap) 1306 { 1307 1308 return (kern_mknodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1309 uap->mode, uap->dev)); 1310 } 1311 1312 int 1313 freebsd11_mknodat(struct thread *td, 1314 struct freebsd11_mknodat_args *uap) 1315 { 1316 1317 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1318 uap->dev)); 1319 } 1320 #endif /* COMPAT_FREEBSD11 */ 1321 1322 int 1323 kern_mknodat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, 1324 int mode, dev_t dev) 1325 { 1326 struct vnode *vp; 1327 struct mount *mp; 1328 struct vattr vattr; 1329 struct nameidata nd; 1330 int error, whiteout = 0; 1331 1332 AUDIT_ARG_MODE(mode); 1333 AUDIT_ARG_DEV(dev); 1334 switch (mode & S_IFMT) { 1335 case S_IFCHR: 1336 case S_IFBLK: 1337 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 1338 if (error == 0 && dev == VNOVAL) 1339 error = EINVAL; 1340 break; 1341 case S_IFWHT: 1342 error = priv_check(td, PRIV_VFS_MKNOD_WHT); 1343 break; 1344 case S_IFIFO: 1345 if (dev == 0) 1346 return (kern_mkfifoat(td, fd, path, pathseg, mode)); 1347 /* FALLTHROUGH */ 1348 default: 1349 error = EINVAL; 1350 break; 1351 } 1352 if (error != 0) 1353 return (error); 1354 NDPREINIT(&nd); 1355 restart: 1356 bwillwrite(); 1357 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1358 NOCACHE, pathseg, path, fd, &cap_mknodat_rights); 1359 if ((error = namei(&nd)) != 0) 1360 return (error); 1361 vp = nd.ni_vp; 1362 if (vp != NULL) { 1363 NDFREE_PNBUF(&nd); 1364 if (vp == nd.ni_dvp) 1365 vrele(nd.ni_dvp); 1366 else 1367 vput(nd.ni_dvp); 1368 vrele(vp); 1369 return (EEXIST); 1370 } else { 1371 VATTR_NULL(&vattr); 1372 vattr.va_mode = (mode & ALLPERMS) & 1373 ~td->td_proc->p_pd->pd_cmask; 1374 vattr.va_rdev = dev; 1375 whiteout = 0; 1376 1377 switch (mode & S_IFMT) { 1378 case S_IFCHR: 1379 vattr.va_type = VCHR; 1380 break; 1381 case S_IFBLK: 1382 vattr.va_type = VBLK; 1383 break; 1384 case S_IFWHT: 1385 whiteout = 1; 1386 break; 1387 default: 1388 panic("kern_mknod: invalid mode"); 1389 } 1390 } 1391 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1392 NDFREE_PNBUF(&nd); 1393 vput(nd.ni_dvp); 1394 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1395 return (error); 1396 goto restart; 1397 } 1398 #ifdef MAC 1399 if (error == 0 && !whiteout) 1400 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, 1401 &nd.ni_cnd, &vattr); 1402 #endif 1403 if (error == 0) { 1404 if (whiteout) 1405 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1406 else { 1407 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1408 &nd.ni_cnd, &vattr); 1409 } 1410 } 1411 VOP_VPUT_PAIR(nd.ni_dvp, error == 0 && !whiteout ? &nd.ni_vp : NULL, 1412 true); 1413 vn_finished_write(mp); 1414 NDFREE_PNBUF(&nd); 1415 if (error == ERELOOKUP) 1416 goto restart; 1417 return (error); 1418 } 1419 1420 /* 1421 * Create a named pipe. 1422 */ 1423 #ifndef _SYS_SYSPROTO_H_ 1424 struct mkfifo_args { 1425 char *path; 1426 int mode; 1427 }; 1428 #endif 1429 int 1430 sys_mkfifo(struct thread *td, struct mkfifo_args *uap) 1431 { 1432 1433 return (kern_mkfifoat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1434 uap->mode)); 1435 } 1436 1437 #ifndef _SYS_SYSPROTO_H_ 1438 struct mkfifoat_args { 1439 int fd; 1440 char *path; 1441 mode_t mode; 1442 }; 1443 #endif 1444 int 1445 sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap) 1446 { 1447 1448 return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE, 1449 uap->mode)); 1450 } 1451 1452 int 1453 kern_mkfifoat(struct thread *td, int fd, const char *path, 1454 enum uio_seg pathseg, int mode) 1455 { 1456 struct mount *mp; 1457 struct vattr vattr; 1458 struct nameidata nd; 1459 int error; 1460 1461 AUDIT_ARG_MODE(mode); 1462 NDPREINIT(&nd); 1463 restart: 1464 bwillwrite(); 1465 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1466 NOCACHE, pathseg, path, fd, &cap_mkfifoat_rights); 1467 if ((error = namei(&nd)) != 0) 1468 return (error); 1469 if (nd.ni_vp != NULL) { 1470 NDFREE_PNBUF(&nd); 1471 if (nd.ni_vp == nd.ni_dvp) 1472 vrele(nd.ni_dvp); 1473 else 1474 vput(nd.ni_dvp); 1475 vrele(nd.ni_vp); 1476 return (EEXIST); 1477 } 1478 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1479 NDFREE_PNBUF(&nd); 1480 vput(nd.ni_dvp); 1481 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1482 return (error); 1483 goto restart; 1484 } 1485 VATTR_NULL(&vattr); 1486 vattr.va_type = VFIFO; 1487 vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_pd->pd_cmask; 1488 #ifdef MAC 1489 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1490 &vattr); 1491 if (error != 0) 1492 goto out; 1493 #endif 1494 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 1495 #ifdef MAC 1496 out: 1497 #endif 1498 VOP_VPUT_PAIR(nd.ni_dvp, error == 0 ? &nd.ni_vp : NULL, true); 1499 vn_finished_write(mp); 1500 NDFREE_PNBUF(&nd); 1501 if (error == ERELOOKUP) 1502 goto restart; 1503 return (error); 1504 } 1505 1506 /* 1507 * Make a hard file link. 1508 */ 1509 #ifndef _SYS_SYSPROTO_H_ 1510 struct link_args { 1511 char *path; 1512 char *link; 1513 }; 1514 #endif 1515 int 1516 sys_link(struct thread *td, struct link_args *uap) 1517 { 1518 1519 return (kern_linkat(td, AT_FDCWD, AT_FDCWD, uap->path, uap->link, 1520 UIO_USERSPACE, AT_SYMLINK_FOLLOW)); 1521 } 1522 1523 #ifndef _SYS_SYSPROTO_H_ 1524 struct linkat_args { 1525 int fd1; 1526 char *path1; 1527 int fd2; 1528 char *path2; 1529 int flag; 1530 }; 1531 #endif 1532 int 1533 sys_linkat(struct thread *td, struct linkat_args *uap) 1534 { 1535 1536 return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2, 1537 UIO_USERSPACE, uap->flag)); 1538 } 1539 1540 int hardlink_check_uid = 0; 1541 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 1542 &hardlink_check_uid, 0, 1543 "Unprivileged processes cannot create hard links to files owned by other " 1544 "users"); 1545 static int hardlink_check_gid = 0; 1546 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 1547 &hardlink_check_gid, 0, 1548 "Unprivileged processes cannot create hard links to files owned by other " 1549 "groups"); 1550 1551 static int 1552 can_hardlink(struct vnode *vp, struct ucred *cred) 1553 { 1554 struct vattr va; 1555 int error; 1556 1557 if (!hardlink_check_uid && !hardlink_check_gid) 1558 return (0); 1559 1560 error = VOP_GETATTR(vp, &va, cred); 1561 if (error != 0) 1562 return (error); 1563 1564 if (hardlink_check_uid && cred->cr_uid != va.va_uid) { 1565 error = priv_check_cred(cred, PRIV_VFS_LINK); 1566 if (error != 0) 1567 return (error); 1568 } 1569 1570 if (hardlink_check_gid && !groupmember(va.va_gid, cred)) { 1571 error = priv_check_cred(cred, PRIV_VFS_LINK); 1572 if (error != 0) 1573 return (error); 1574 } 1575 1576 return (0); 1577 } 1578 1579 int 1580 kern_linkat(struct thread *td, int fd1, int fd2, const char *path1, 1581 const char *path2, enum uio_seg segflag, int flag) 1582 { 1583 struct nameidata nd; 1584 int error; 1585 1586 if ((flag & ~(AT_SYMLINK_FOLLOW | AT_RESOLVE_BENEATH | 1587 AT_EMPTY_PATH)) != 0) 1588 return (EINVAL); 1589 1590 NDPREINIT(&nd); 1591 do { 1592 bwillwrite(); 1593 NDINIT_ATRIGHTS(&nd, LOOKUP, AUDITVNODE1 | at2cnpflags(flag, 1594 AT_SYMLINK_FOLLOW | AT_RESOLVE_BENEATH | AT_EMPTY_PATH), 1595 segflag, path1, fd1, &cap_linkat_source_rights); 1596 if ((error = namei(&nd)) != 0) 1597 return (error); 1598 NDFREE_PNBUF(&nd); 1599 if ((nd.ni_resflags & NIRES_EMPTYPATH) != 0) { 1600 error = priv_check(td, PRIV_VFS_FHOPEN); 1601 if (error != 0) { 1602 vrele(nd.ni_vp); 1603 return (error); 1604 } 1605 } 1606 error = kern_linkat_vp(td, nd.ni_vp, fd2, path2, segflag); 1607 } while (error == EAGAIN || error == ERELOOKUP); 1608 return (error); 1609 } 1610 1611 static int 1612 kern_linkat_vp(struct thread *td, struct vnode *vp, int fd, const char *path, 1613 enum uio_seg segflag) 1614 { 1615 struct nameidata nd; 1616 struct mount *mp; 1617 int error; 1618 1619 if (vp->v_type == VDIR) { 1620 vrele(vp); 1621 return (EPERM); /* POSIX */ 1622 } 1623 NDINIT_ATRIGHTS(&nd, CREATE, 1624 LOCKPARENT | SAVENAME | AUDITVNODE2 | NOCACHE, segflag, path, fd, 1625 &cap_linkat_target_rights); 1626 if ((error = namei(&nd)) == 0) { 1627 if (nd.ni_vp != NULL) { 1628 NDFREE_PNBUF(&nd); 1629 if (nd.ni_dvp == nd.ni_vp) 1630 vrele(nd.ni_dvp); 1631 else 1632 vput(nd.ni_dvp); 1633 vrele(nd.ni_vp); 1634 vrele(vp); 1635 return (EEXIST); 1636 } else if (nd.ni_dvp->v_mount != vp->v_mount) { 1637 /* 1638 * Cross-device link. No need to recheck 1639 * vp->v_type, since it cannot change, except 1640 * to VBAD. 1641 */ 1642 NDFREE_PNBUF(&nd); 1643 vput(nd.ni_dvp); 1644 vrele(vp); 1645 return (EXDEV); 1646 } else if ((error = vn_lock(vp, LK_EXCLUSIVE)) == 0) { 1647 error = can_hardlink(vp, td->td_ucred); 1648 #ifdef MAC 1649 if (error == 0) 1650 error = mac_vnode_check_link(td->td_ucred, 1651 nd.ni_dvp, vp, &nd.ni_cnd); 1652 #endif 1653 if (error != 0) { 1654 vput(vp); 1655 vput(nd.ni_dvp); 1656 NDFREE_PNBUF(&nd); 1657 return (error); 1658 } 1659 error = vn_start_write(vp, &mp, V_NOWAIT); 1660 if (error != 0) { 1661 vput(vp); 1662 vput(nd.ni_dvp); 1663 NDFREE_PNBUF(&nd); 1664 error = vn_start_write(NULL, &mp, 1665 V_XSLEEP | PCATCH); 1666 if (error != 0) 1667 return (error); 1668 return (EAGAIN); 1669 } 1670 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 1671 VOP_VPUT_PAIR(nd.ni_dvp, &vp, true); 1672 vn_finished_write(mp); 1673 NDFREE_PNBUF(&nd); 1674 vp = NULL; 1675 } else { 1676 vput(nd.ni_dvp); 1677 NDFREE_PNBUF(&nd); 1678 vrele(vp); 1679 return (EAGAIN); 1680 } 1681 } 1682 if (vp != NULL) 1683 vrele(vp); 1684 return (error); 1685 } 1686 1687 /* 1688 * Make a symbolic link. 1689 */ 1690 #ifndef _SYS_SYSPROTO_H_ 1691 struct symlink_args { 1692 char *path; 1693 char *link; 1694 }; 1695 #endif 1696 int 1697 sys_symlink(struct thread *td, struct symlink_args *uap) 1698 { 1699 1700 return (kern_symlinkat(td, uap->path, AT_FDCWD, uap->link, 1701 UIO_USERSPACE)); 1702 } 1703 1704 #ifndef _SYS_SYSPROTO_H_ 1705 struct symlinkat_args { 1706 char *path; 1707 int fd; 1708 char *path2; 1709 }; 1710 #endif 1711 int 1712 sys_symlinkat(struct thread *td, struct symlinkat_args *uap) 1713 { 1714 1715 return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2, 1716 UIO_USERSPACE)); 1717 } 1718 1719 int 1720 kern_symlinkat(struct thread *td, const char *path1, int fd, const char *path2, 1721 enum uio_seg segflg) 1722 { 1723 struct mount *mp; 1724 struct vattr vattr; 1725 const char *syspath; 1726 char *tmppath; 1727 struct nameidata nd; 1728 int error; 1729 1730 if (segflg == UIO_SYSSPACE) { 1731 syspath = path1; 1732 } else { 1733 tmppath = uma_zalloc(namei_zone, M_WAITOK); 1734 if ((error = copyinstr(path1, tmppath, MAXPATHLEN, NULL)) != 0) 1735 goto out; 1736 syspath = tmppath; 1737 } 1738 AUDIT_ARG_TEXT(syspath); 1739 NDPREINIT(&nd); 1740 restart: 1741 bwillwrite(); 1742 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1743 NOCACHE, segflg, path2, fd, &cap_symlinkat_rights); 1744 if ((error = namei(&nd)) != 0) 1745 goto out; 1746 if (nd.ni_vp) { 1747 NDFREE_PNBUF(&nd); 1748 if (nd.ni_vp == nd.ni_dvp) 1749 vrele(nd.ni_dvp); 1750 else 1751 vput(nd.ni_dvp); 1752 vrele(nd.ni_vp); 1753 nd.ni_vp = NULL; 1754 error = EEXIST; 1755 goto out; 1756 } 1757 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1758 NDFREE_PNBUF(&nd); 1759 vput(nd.ni_dvp); 1760 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1761 goto out; 1762 goto restart; 1763 } 1764 VATTR_NULL(&vattr); 1765 vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_pd->pd_cmask; 1766 #ifdef MAC 1767 vattr.va_type = VLNK; 1768 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1769 &vattr); 1770 if (error != 0) 1771 goto out2; 1772 #endif 1773 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath); 1774 #ifdef MAC 1775 out2: 1776 #endif 1777 VOP_VPUT_PAIR(nd.ni_dvp, error == 0 ? &nd.ni_vp : NULL, true); 1778 vn_finished_write(mp); 1779 NDFREE_PNBUF(&nd); 1780 if (error == ERELOOKUP) 1781 goto restart; 1782 out: 1783 if (segflg != UIO_SYSSPACE) 1784 uma_zfree(namei_zone, tmppath); 1785 return (error); 1786 } 1787 1788 /* 1789 * Delete a whiteout from the filesystem. 1790 */ 1791 #ifndef _SYS_SYSPROTO_H_ 1792 struct undelete_args { 1793 char *path; 1794 }; 1795 #endif 1796 int 1797 sys_undelete(struct thread *td, struct undelete_args *uap) 1798 { 1799 struct mount *mp; 1800 struct nameidata nd; 1801 int error; 1802 1803 NDPREINIT(&nd); 1804 restart: 1805 bwillwrite(); 1806 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | AUDITVNODE1, 1807 UIO_USERSPACE, uap->path); 1808 error = namei(&nd); 1809 if (error != 0) 1810 return (error); 1811 1812 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 1813 NDFREE_PNBUF(&nd); 1814 if (nd.ni_vp == nd.ni_dvp) 1815 vrele(nd.ni_dvp); 1816 else 1817 vput(nd.ni_dvp); 1818 if (nd.ni_vp) 1819 vrele(nd.ni_vp); 1820 return (EEXIST); 1821 } 1822 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1823 NDFREE_PNBUF(&nd); 1824 vput(nd.ni_dvp); 1825 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1826 return (error); 1827 goto restart; 1828 } 1829 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE); 1830 NDFREE_PNBUF(&nd); 1831 vput(nd.ni_dvp); 1832 vn_finished_write(mp); 1833 if (error == ERELOOKUP) 1834 goto restart; 1835 return (error); 1836 } 1837 1838 /* 1839 * Delete a name from the filesystem. 1840 */ 1841 #ifndef _SYS_SYSPROTO_H_ 1842 struct unlink_args { 1843 char *path; 1844 }; 1845 #endif 1846 int 1847 sys_unlink(struct thread *td, struct unlink_args *uap) 1848 { 1849 1850 return (kern_funlinkat(td, AT_FDCWD, uap->path, FD_NONE, UIO_USERSPACE, 1851 0, 0)); 1852 } 1853 1854 static int 1855 kern_funlinkat_ex(struct thread *td, int dfd, const char *path, int fd, 1856 int flag, enum uio_seg pathseg, ino_t oldinum) 1857 { 1858 1859 if ((flag & ~(AT_REMOVEDIR | AT_RESOLVE_BENEATH)) != 0) 1860 return (EINVAL); 1861 1862 if ((flag & AT_REMOVEDIR) != 0) 1863 return (kern_frmdirat(td, dfd, path, fd, UIO_USERSPACE, 0)); 1864 1865 return (kern_funlinkat(td, dfd, path, fd, UIO_USERSPACE, 0, 0)); 1866 } 1867 1868 #ifndef _SYS_SYSPROTO_H_ 1869 struct unlinkat_args { 1870 int fd; 1871 char *path; 1872 int flag; 1873 }; 1874 #endif 1875 int 1876 sys_unlinkat(struct thread *td, struct unlinkat_args *uap) 1877 { 1878 1879 return (kern_funlinkat_ex(td, uap->fd, uap->path, FD_NONE, uap->flag, 1880 UIO_USERSPACE, 0)); 1881 } 1882 1883 #ifndef _SYS_SYSPROTO_H_ 1884 struct funlinkat_args { 1885 int dfd; 1886 const char *path; 1887 int fd; 1888 int flag; 1889 }; 1890 #endif 1891 int 1892 sys_funlinkat(struct thread *td, struct funlinkat_args *uap) 1893 { 1894 1895 return (kern_funlinkat_ex(td, uap->dfd, uap->path, uap->fd, uap->flag, 1896 UIO_USERSPACE, 0)); 1897 } 1898 1899 int 1900 kern_funlinkat(struct thread *td, int dfd, const char *path, int fd, 1901 enum uio_seg pathseg, int flag, ino_t oldinum) 1902 { 1903 struct mount *mp; 1904 struct file *fp; 1905 struct vnode *vp; 1906 struct nameidata nd; 1907 struct stat sb; 1908 int error; 1909 1910 fp = NULL; 1911 if (fd != FD_NONE) { 1912 error = getvnode_path(td, fd, &cap_no_rights, &fp); 1913 if (error != 0) 1914 return (error); 1915 } 1916 1917 NDPREINIT(&nd); 1918 restart: 1919 bwillwrite(); 1920 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1 | 1921 at2cnpflags(flag, AT_RESOLVE_BENEATH), 1922 pathseg, path, dfd, &cap_unlinkat_rights); 1923 if ((error = namei(&nd)) != 0) { 1924 if (error == EINVAL) 1925 error = EPERM; 1926 goto fdout; 1927 } 1928 vp = nd.ni_vp; 1929 if (vp->v_type == VDIR && oldinum == 0) { 1930 error = EPERM; /* POSIX */ 1931 } else if (oldinum != 0 && 1932 ((error = VOP_STAT(vp, &sb, td->td_ucred, NOCRED)) == 0) && 1933 sb.st_ino != oldinum) { 1934 error = EIDRM; /* Identifier removed */ 1935 } else if (fp != NULL && fp->f_vnode != vp) { 1936 if (VN_IS_DOOMED(fp->f_vnode)) 1937 error = EBADF; 1938 else 1939 error = EDEADLK; 1940 } else { 1941 /* 1942 * The root of a mounted filesystem cannot be deleted. 1943 * 1944 * XXX: can this only be a VDIR case? 1945 */ 1946 if (vp->v_vflag & VV_ROOT) 1947 error = EBUSY; 1948 } 1949 if (error == 0) { 1950 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1951 NDFREE_PNBUF(&nd); 1952 vput(nd.ni_dvp); 1953 if (vp == nd.ni_dvp) 1954 vrele(vp); 1955 else 1956 vput(vp); 1957 if ((error = vn_start_write(NULL, &mp, 1958 V_XSLEEP | PCATCH)) != 0) { 1959 goto fdout; 1960 } 1961 goto restart; 1962 } 1963 #ifdef MAC 1964 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 1965 &nd.ni_cnd); 1966 if (error != 0) 1967 goto out; 1968 #endif 1969 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 1970 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 1971 #ifdef MAC 1972 out: 1973 #endif 1974 vn_finished_write(mp); 1975 } 1976 NDFREE_PNBUF(&nd); 1977 vput(nd.ni_dvp); 1978 if (vp == nd.ni_dvp) 1979 vrele(vp); 1980 else 1981 vput(vp); 1982 if (error == ERELOOKUP) 1983 goto restart; 1984 fdout: 1985 if (fp != NULL) 1986 fdrop(fp, td); 1987 return (error); 1988 } 1989 1990 /* 1991 * Reposition read/write file offset. 1992 */ 1993 #ifndef _SYS_SYSPROTO_H_ 1994 struct lseek_args { 1995 int fd; 1996 int pad; 1997 off_t offset; 1998 int whence; 1999 }; 2000 #endif 2001 int 2002 sys_lseek(struct thread *td, struct lseek_args *uap) 2003 { 2004 2005 return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); 2006 } 2007 2008 int 2009 kern_lseek(struct thread *td, int fd, off_t offset, int whence) 2010 { 2011 struct file *fp; 2012 int error; 2013 2014 AUDIT_ARG_FD(fd); 2015 error = fget(td, fd, &cap_seek_rights, &fp); 2016 if (error != 0) 2017 return (error); 2018 error = (fp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0 ? 2019 fo_seek(fp, offset, whence, td) : ESPIPE; 2020 fdrop(fp, td); 2021 return (error); 2022 } 2023 2024 #if defined(COMPAT_43) 2025 /* 2026 * Reposition read/write file offset. 2027 */ 2028 #ifndef _SYS_SYSPROTO_H_ 2029 struct olseek_args { 2030 int fd; 2031 long offset; 2032 int whence; 2033 }; 2034 #endif 2035 int 2036 olseek(struct thread *td, struct olseek_args *uap) 2037 { 2038 2039 return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); 2040 } 2041 #endif /* COMPAT_43 */ 2042 2043 #if defined(COMPAT_FREEBSD6) 2044 /* Version with the 'pad' argument */ 2045 int 2046 freebsd6_lseek(struct thread *td, struct freebsd6_lseek_args *uap) 2047 { 2048 2049 return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); 2050 } 2051 #endif 2052 2053 /* 2054 * Check access permissions using passed credentials. 2055 */ 2056 static int 2057 vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 2058 struct thread *td) 2059 { 2060 accmode_t accmode; 2061 int error; 2062 2063 /* Flags == 0 means only check for existence. */ 2064 if (user_flags == 0) 2065 return (0); 2066 2067 accmode = 0; 2068 if (user_flags & R_OK) 2069 accmode |= VREAD; 2070 if (user_flags & W_OK) 2071 accmode |= VWRITE; 2072 if (user_flags & X_OK) 2073 accmode |= VEXEC; 2074 #ifdef MAC 2075 error = mac_vnode_check_access(cred, vp, accmode); 2076 if (error != 0) 2077 return (error); 2078 #endif 2079 if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) 2080 error = VOP_ACCESS(vp, accmode, cred, td); 2081 return (error); 2082 } 2083 2084 /* 2085 * Check access permissions using "real" credentials. 2086 */ 2087 #ifndef _SYS_SYSPROTO_H_ 2088 struct access_args { 2089 char *path; 2090 int amode; 2091 }; 2092 #endif 2093 int 2094 sys_access(struct thread *td, struct access_args *uap) 2095 { 2096 2097 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2098 0, uap->amode)); 2099 } 2100 2101 #ifndef _SYS_SYSPROTO_H_ 2102 struct faccessat_args { 2103 int dirfd; 2104 char *path; 2105 int amode; 2106 int flag; 2107 } 2108 #endif 2109 int 2110 sys_faccessat(struct thread *td, struct faccessat_args *uap) 2111 { 2112 2113 return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 2114 uap->amode)); 2115 } 2116 2117 int 2118 kern_accessat(struct thread *td, int fd, const char *path, 2119 enum uio_seg pathseg, int flag, int amode) 2120 { 2121 struct ucred *cred, *usecred; 2122 struct vnode *vp; 2123 struct nameidata nd; 2124 int error; 2125 2126 if ((flag & ~(AT_EACCESS | AT_RESOLVE_BENEATH | AT_EMPTY_PATH)) != 0) 2127 return (EINVAL); 2128 if (amode != F_OK && (amode & ~(R_OK | W_OK | X_OK)) != 0) 2129 return (EINVAL); 2130 2131 /* 2132 * Create and modify a temporary credential instead of one that 2133 * is potentially shared (if we need one). 2134 */ 2135 cred = td->td_ucred; 2136 if ((flag & AT_EACCESS) == 0 && 2137 ((cred->cr_uid != cred->cr_ruid || 2138 cred->cr_rgid != cred->cr_groups[0]))) { 2139 usecred = crdup(cred); 2140 usecred->cr_uid = cred->cr_ruid; 2141 usecred->cr_groups[0] = cred->cr_rgid; 2142 td->td_ucred = usecred; 2143 } else 2144 usecred = cred; 2145 AUDIT_ARG_VALUE(amode); 2146 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | 2147 AUDITVNODE1 | at2cnpflags(flag, AT_RESOLVE_BENEATH | 2148 AT_EMPTY_PATH), pathseg, path, fd, &cap_fstat_rights); 2149 if ((error = namei(&nd)) != 0) 2150 goto out; 2151 vp = nd.ni_vp; 2152 2153 error = vn_access(vp, amode, usecred, td); 2154 NDFREE_NOTHING(&nd); 2155 vput(vp); 2156 out: 2157 if (usecred != cred) { 2158 td->td_ucred = cred; 2159 crfree(usecred); 2160 } 2161 return (error); 2162 } 2163 2164 /* 2165 * Check access permissions using "effective" credentials. 2166 */ 2167 #ifndef _SYS_SYSPROTO_H_ 2168 struct eaccess_args { 2169 char *path; 2170 int amode; 2171 }; 2172 #endif 2173 int 2174 sys_eaccess(struct thread *td, struct eaccess_args *uap) 2175 { 2176 2177 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2178 AT_EACCESS, uap->amode)); 2179 } 2180 2181 #if defined(COMPAT_43) 2182 /* 2183 * Get file status; this version follows links. 2184 */ 2185 #ifndef _SYS_SYSPROTO_H_ 2186 struct ostat_args { 2187 char *path; 2188 struct ostat *ub; 2189 }; 2190 #endif 2191 int 2192 ostat(struct thread *td, struct ostat_args *uap) 2193 { 2194 struct stat sb; 2195 struct ostat osb; 2196 int error; 2197 2198 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2199 &sb, NULL); 2200 if (error != 0) 2201 return (error); 2202 cvtstat(&sb, &osb); 2203 return (copyout(&osb, uap->ub, sizeof (osb))); 2204 } 2205 2206 /* 2207 * Get file status; this version does not follow links. 2208 */ 2209 #ifndef _SYS_SYSPROTO_H_ 2210 struct olstat_args { 2211 char *path; 2212 struct ostat *ub; 2213 }; 2214 #endif 2215 int 2216 olstat(struct thread *td, struct olstat_args *uap) 2217 { 2218 struct stat sb; 2219 struct ostat osb; 2220 int error; 2221 2222 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2223 UIO_USERSPACE, &sb, NULL); 2224 if (error != 0) 2225 return (error); 2226 cvtstat(&sb, &osb); 2227 return (copyout(&osb, uap->ub, sizeof (osb))); 2228 } 2229 2230 /* 2231 * Convert from an old to a new stat structure. 2232 * XXX: many values are blindly truncated. 2233 */ 2234 void 2235 cvtstat(struct stat *st, struct ostat *ost) 2236 { 2237 2238 bzero(ost, sizeof(*ost)); 2239 ost->st_dev = st->st_dev; 2240 ost->st_ino = st->st_ino; 2241 ost->st_mode = st->st_mode; 2242 ost->st_nlink = st->st_nlink; 2243 ost->st_uid = st->st_uid; 2244 ost->st_gid = st->st_gid; 2245 ost->st_rdev = st->st_rdev; 2246 ost->st_size = MIN(st->st_size, INT32_MAX); 2247 ost->st_atim = st->st_atim; 2248 ost->st_mtim = st->st_mtim; 2249 ost->st_ctim = st->st_ctim; 2250 ost->st_blksize = st->st_blksize; 2251 ost->st_blocks = st->st_blocks; 2252 ost->st_flags = st->st_flags; 2253 ost->st_gen = st->st_gen; 2254 } 2255 #endif /* COMPAT_43 */ 2256 2257 #if defined(COMPAT_43) || defined(COMPAT_FREEBSD11) 2258 int ino64_trunc_error; 2259 SYSCTL_INT(_vfs, OID_AUTO, ino64_trunc_error, CTLFLAG_RW, 2260 &ino64_trunc_error, 0, 2261 "Error on truncation of device, file or inode number, or link count"); 2262 2263 int 2264 freebsd11_cvtstat(struct stat *st, struct freebsd11_stat *ost) 2265 { 2266 2267 ost->st_dev = st->st_dev; 2268 if (ost->st_dev != st->st_dev) { 2269 switch (ino64_trunc_error) { 2270 default: 2271 /* 2272 * Since dev_t is almost raw, don't clamp to the 2273 * maximum for case 2, but ignore the error. 2274 */ 2275 break; 2276 case 1: 2277 return (EOVERFLOW); 2278 } 2279 } 2280 ost->st_ino = st->st_ino; 2281 if (ost->st_ino != st->st_ino) { 2282 switch (ino64_trunc_error) { 2283 default: 2284 case 0: 2285 break; 2286 case 1: 2287 return (EOVERFLOW); 2288 case 2: 2289 ost->st_ino = UINT32_MAX; 2290 break; 2291 } 2292 } 2293 ost->st_mode = st->st_mode; 2294 ost->st_nlink = st->st_nlink; 2295 if (ost->st_nlink != st->st_nlink) { 2296 switch (ino64_trunc_error) { 2297 default: 2298 case 0: 2299 break; 2300 case 1: 2301 return (EOVERFLOW); 2302 case 2: 2303 ost->st_nlink = UINT16_MAX; 2304 break; 2305 } 2306 } 2307 ost->st_uid = st->st_uid; 2308 ost->st_gid = st->st_gid; 2309 ost->st_rdev = st->st_rdev; 2310 if (ost->st_rdev != st->st_rdev) { 2311 switch (ino64_trunc_error) { 2312 default: 2313 break; 2314 case 1: 2315 return (EOVERFLOW); 2316 } 2317 } 2318 ost->st_atim = st->st_atim; 2319 ost->st_mtim = st->st_mtim; 2320 ost->st_ctim = st->st_ctim; 2321 ost->st_size = st->st_size; 2322 ost->st_blocks = st->st_blocks; 2323 ost->st_blksize = st->st_blksize; 2324 ost->st_flags = st->st_flags; 2325 ost->st_gen = st->st_gen; 2326 ost->st_lspare = 0; 2327 ost->st_birthtim = st->st_birthtim; 2328 bzero((char *)&ost->st_birthtim + sizeof(ost->st_birthtim), 2329 sizeof(*ost) - offsetof(struct freebsd11_stat, 2330 st_birthtim) - sizeof(ost->st_birthtim)); 2331 return (0); 2332 } 2333 2334 int 2335 freebsd11_stat(struct thread *td, struct freebsd11_stat_args* uap) 2336 { 2337 struct stat sb; 2338 struct freebsd11_stat osb; 2339 int error; 2340 2341 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2342 &sb, NULL); 2343 if (error != 0) 2344 return (error); 2345 error = freebsd11_cvtstat(&sb, &osb); 2346 if (error == 0) 2347 error = copyout(&osb, uap->ub, sizeof(osb)); 2348 return (error); 2349 } 2350 2351 int 2352 freebsd11_lstat(struct thread *td, struct freebsd11_lstat_args* uap) 2353 { 2354 struct stat sb; 2355 struct freebsd11_stat osb; 2356 int error; 2357 2358 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2359 UIO_USERSPACE, &sb, NULL); 2360 if (error != 0) 2361 return (error); 2362 error = freebsd11_cvtstat(&sb, &osb); 2363 if (error == 0) 2364 error = copyout(&osb, uap->ub, sizeof(osb)); 2365 return (error); 2366 } 2367 2368 int 2369 freebsd11_fhstat(struct thread *td, struct freebsd11_fhstat_args* uap) 2370 { 2371 struct fhandle fh; 2372 struct stat sb; 2373 struct freebsd11_stat osb; 2374 int error; 2375 2376 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 2377 if (error != 0) 2378 return (error); 2379 error = kern_fhstat(td, fh, &sb); 2380 if (error != 0) 2381 return (error); 2382 error = freebsd11_cvtstat(&sb, &osb); 2383 if (error == 0) 2384 error = copyout(&osb, uap->sb, sizeof(osb)); 2385 return (error); 2386 } 2387 2388 int 2389 freebsd11_fstatat(struct thread *td, struct freebsd11_fstatat_args* uap) 2390 { 2391 struct stat sb; 2392 struct freebsd11_stat osb; 2393 int error; 2394 2395 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2396 UIO_USERSPACE, &sb, NULL); 2397 if (error != 0) 2398 return (error); 2399 error = freebsd11_cvtstat(&sb, &osb); 2400 if (error == 0) 2401 error = copyout(&osb, uap->buf, sizeof(osb)); 2402 return (error); 2403 } 2404 #endif /* COMPAT_FREEBSD11 */ 2405 2406 /* 2407 * Get file status 2408 */ 2409 #ifndef _SYS_SYSPROTO_H_ 2410 struct fstatat_args { 2411 int fd; 2412 char *path; 2413 struct stat *buf; 2414 int flag; 2415 } 2416 #endif 2417 int 2418 sys_fstatat(struct thread *td, struct fstatat_args *uap) 2419 { 2420 struct stat sb; 2421 int error; 2422 2423 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2424 UIO_USERSPACE, &sb, NULL); 2425 if (error == 0) 2426 error = copyout(&sb, uap->buf, sizeof (sb)); 2427 return (error); 2428 } 2429 2430 int 2431 kern_statat(struct thread *td, int flag, int fd, const char *path, 2432 enum uio_seg pathseg, struct stat *sbp, 2433 void (*hook)(struct vnode *vp, struct stat *sbp)) 2434 { 2435 struct nameidata nd; 2436 int error; 2437 2438 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | 2439 AT_EMPTY_PATH)) != 0) 2440 return (EINVAL); 2441 2442 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_RESOLVE_BENEATH | 2443 AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH) | LOCKSHARED | LOCKLEAF | 2444 AUDITVNODE1, pathseg, path, fd, &cap_fstat_rights); 2445 2446 if ((error = namei(&nd)) != 0) { 2447 if (error == ENOTDIR && 2448 (nd.ni_resflags & NIRES_EMPTYPATH) != 0) 2449 error = kern_fstat(td, fd, sbp); 2450 return (error); 2451 } 2452 error = VOP_STAT(nd.ni_vp, sbp, td->td_ucred, NOCRED); 2453 if (error == 0) { 2454 if (__predict_false(hook != NULL)) 2455 hook(nd.ni_vp, sbp); 2456 } 2457 NDFREE_NOTHING(&nd); 2458 vput(nd.ni_vp); 2459 #ifdef __STAT_TIME_T_EXT 2460 sbp->st_atim_ext = 0; 2461 sbp->st_mtim_ext = 0; 2462 sbp->st_ctim_ext = 0; 2463 sbp->st_btim_ext = 0; 2464 #endif 2465 #ifdef KTRACE 2466 if (KTRPOINT(td, KTR_STRUCT)) 2467 ktrstat_error(sbp, error); 2468 #endif 2469 return (error); 2470 } 2471 2472 #if defined(COMPAT_FREEBSD11) 2473 /* 2474 * Implementation of the NetBSD [l]stat() functions. 2475 */ 2476 int 2477 freebsd11_cvtnstat(struct stat *sb, struct nstat *nsb) 2478 { 2479 struct freebsd11_stat sb11; 2480 int error; 2481 2482 error = freebsd11_cvtstat(sb, &sb11); 2483 if (error != 0) 2484 return (error); 2485 2486 bzero(nsb, sizeof(*nsb)); 2487 CP(sb11, *nsb, st_dev); 2488 CP(sb11, *nsb, st_ino); 2489 CP(sb11, *nsb, st_mode); 2490 CP(sb11, *nsb, st_nlink); 2491 CP(sb11, *nsb, st_uid); 2492 CP(sb11, *nsb, st_gid); 2493 CP(sb11, *nsb, st_rdev); 2494 CP(sb11, *nsb, st_atim); 2495 CP(sb11, *nsb, st_mtim); 2496 CP(sb11, *nsb, st_ctim); 2497 CP(sb11, *nsb, st_size); 2498 CP(sb11, *nsb, st_blocks); 2499 CP(sb11, *nsb, st_blksize); 2500 CP(sb11, *nsb, st_flags); 2501 CP(sb11, *nsb, st_gen); 2502 CP(sb11, *nsb, st_birthtim); 2503 return (0); 2504 } 2505 2506 #ifndef _SYS_SYSPROTO_H_ 2507 struct freebsd11_nstat_args { 2508 char *path; 2509 struct nstat *ub; 2510 }; 2511 #endif 2512 int 2513 freebsd11_nstat(struct thread *td, struct freebsd11_nstat_args *uap) 2514 { 2515 struct stat sb; 2516 struct nstat nsb; 2517 int error; 2518 2519 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2520 &sb, NULL); 2521 if (error != 0) 2522 return (error); 2523 error = freebsd11_cvtnstat(&sb, &nsb); 2524 if (error == 0) 2525 error = copyout(&nsb, uap->ub, sizeof (nsb)); 2526 return (error); 2527 } 2528 2529 /* 2530 * NetBSD lstat. Get file status; this version does not follow links. 2531 */ 2532 #ifndef _SYS_SYSPROTO_H_ 2533 struct freebsd11_nlstat_args { 2534 char *path; 2535 struct nstat *ub; 2536 }; 2537 #endif 2538 int 2539 freebsd11_nlstat(struct thread *td, struct freebsd11_nlstat_args *uap) 2540 { 2541 struct stat sb; 2542 struct nstat nsb; 2543 int error; 2544 2545 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2546 UIO_USERSPACE, &sb, NULL); 2547 if (error != 0) 2548 return (error); 2549 error = freebsd11_cvtnstat(&sb, &nsb); 2550 if (error == 0) 2551 error = copyout(&nsb, uap->ub, sizeof (nsb)); 2552 return (error); 2553 } 2554 #endif /* COMPAT_FREEBSD11 */ 2555 2556 /* 2557 * Get configurable pathname variables. 2558 */ 2559 #ifndef _SYS_SYSPROTO_H_ 2560 struct pathconf_args { 2561 char *path; 2562 int name; 2563 }; 2564 #endif 2565 int 2566 sys_pathconf(struct thread *td, struct pathconf_args *uap) 2567 { 2568 long value; 2569 int error; 2570 2571 error = kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW, 2572 &value); 2573 if (error == 0) 2574 td->td_retval[0] = value; 2575 return (error); 2576 } 2577 2578 #ifndef _SYS_SYSPROTO_H_ 2579 struct lpathconf_args { 2580 char *path; 2581 int name; 2582 }; 2583 #endif 2584 int 2585 sys_lpathconf(struct thread *td, struct lpathconf_args *uap) 2586 { 2587 long value; 2588 int error; 2589 2590 error = kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, 2591 NOFOLLOW, &value); 2592 if (error == 0) 2593 td->td_retval[0] = value; 2594 return (error); 2595 } 2596 2597 int 2598 kern_pathconf(struct thread *td, const char *path, enum uio_seg pathseg, 2599 int name, u_long flags, long *valuep) 2600 { 2601 struct nameidata nd; 2602 int error; 2603 2604 NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | AUDITVNODE1 | flags, 2605 pathseg, path); 2606 if ((error = namei(&nd)) != 0) 2607 return (error); 2608 NDFREE_NOTHING(&nd); 2609 2610 error = VOP_PATHCONF(nd.ni_vp, name, valuep); 2611 vput(nd.ni_vp); 2612 return (error); 2613 } 2614 2615 /* 2616 * Return target name of a symbolic link. 2617 */ 2618 #ifndef _SYS_SYSPROTO_H_ 2619 struct readlink_args { 2620 char *path; 2621 char *buf; 2622 size_t count; 2623 }; 2624 #endif 2625 int 2626 sys_readlink(struct thread *td, struct readlink_args *uap) 2627 { 2628 2629 return (kern_readlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2630 uap->buf, UIO_USERSPACE, uap->count)); 2631 } 2632 #ifndef _SYS_SYSPROTO_H_ 2633 struct readlinkat_args { 2634 int fd; 2635 char *path; 2636 char *buf; 2637 size_t bufsize; 2638 }; 2639 #endif 2640 int 2641 sys_readlinkat(struct thread *td, struct readlinkat_args *uap) 2642 { 2643 2644 return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE, 2645 uap->buf, UIO_USERSPACE, uap->bufsize)); 2646 } 2647 2648 int 2649 kern_readlinkat(struct thread *td, int fd, const char *path, 2650 enum uio_seg pathseg, char *buf, enum uio_seg bufseg, size_t count) 2651 { 2652 struct vnode *vp; 2653 struct nameidata nd; 2654 int error; 2655 2656 if (count > IOSIZE_MAX) 2657 return (EINVAL); 2658 2659 NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1 | 2660 EMPTYPATH, pathseg, path, fd); 2661 2662 if ((error = namei(&nd)) != 0) 2663 return (error); 2664 NDFREE_NOTHING(&nd); 2665 vp = nd.ni_vp; 2666 2667 error = kern_readlink_vp(vp, buf, bufseg, count, td); 2668 vput(vp); 2669 2670 return (error); 2671 } 2672 2673 /* 2674 * Helper function to readlink from a vnode 2675 */ 2676 static int 2677 kern_readlink_vp(struct vnode *vp, char *buf, enum uio_seg bufseg, size_t count, 2678 struct thread *td) 2679 { 2680 struct iovec aiov; 2681 struct uio auio; 2682 int error; 2683 2684 ASSERT_VOP_LOCKED(vp, "kern_readlink_vp(): vp not locked"); 2685 #ifdef MAC 2686 error = mac_vnode_check_readlink(td->td_ucred, vp); 2687 if (error != 0) 2688 return (error); 2689 #endif 2690 if (vp->v_type != VLNK && (vp->v_vflag & VV_READLINK) == 0) 2691 return (EINVAL); 2692 2693 aiov.iov_base = buf; 2694 aiov.iov_len = count; 2695 auio.uio_iov = &aiov; 2696 auio.uio_iovcnt = 1; 2697 auio.uio_offset = 0; 2698 auio.uio_rw = UIO_READ; 2699 auio.uio_segflg = bufseg; 2700 auio.uio_td = td; 2701 auio.uio_resid = count; 2702 error = VOP_READLINK(vp, &auio, td->td_ucred); 2703 td->td_retval[0] = count - auio.uio_resid; 2704 return (error); 2705 } 2706 2707 /* 2708 * Common implementation code for chflags() and fchflags(). 2709 */ 2710 static int 2711 setfflags(struct thread *td, struct vnode *vp, u_long flags) 2712 { 2713 struct mount *mp; 2714 struct vattr vattr; 2715 int error; 2716 2717 /* We can't support the value matching VNOVAL. */ 2718 if (flags == VNOVAL) 2719 return (EOPNOTSUPP); 2720 2721 /* 2722 * Prevent non-root users from setting flags on devices. When 2723 * a device is reused, users can retain ownership of the device 2724 * if they are allowed to set flags and programs assume that 2725 * chown can't fail when done as root. 2726 */ 2727 if (vp->v_type == VCHR || vp->v_type == VBLK) { 2728 error = priv_check(td, PRIV_VFS_CHFLAGS_DEV); 2729 if (error != 0) 2730 return (error); 2731 } 2732 2733 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2734 return (error); 2735 VATTR_NULL(&vattr); 2736 vattr.va_flags = flags; 2737 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2738 #ifdef MAC 2739 error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags); 2740 if (error == 0) 2741 #endif 2742 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 2743 VOP_UNLOCK(vp); 2744 vn_finished_write(mp); 2745 return (error); 2746 } 2747 2748 /* 2749 * Change flags of a file given a path name. 2750 */ 2751 #ifndef _SYS_SYSPROTO_H_ 2752 struct chflags_args { 2753 const char *path; 2754 u_long flags; 2755 }; 2756 #endif 2757 int 2758 sys_chflags(struct thread *td, struct chflags_args *uap) 2759 { 2760 2761 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2762 uap->flags, 0)); 2763 } 2764 2765 #ifndef _SYS_SYSPROTO_H_ 2766 struct chflagsat_args { 2767 int fd; 2768 const char *path; 2769 u_long flags; 2770 int atflag; 2771 } 2772 #endif 2773 int 2774 sys_chflagsat(struct thread *td, struct chflagsat_args *uap) 2775 { 2776 2777 return (kern_chflagsat(td, uap->fd, uap->path, UIO_USERSPACE, 2778 uap->flags, uap->atflag)); 2779 } 2780 2781 /* 2782 * Same as chflags() but doesn't follow symlinks. 2783 */ 2784 #ifndef _SYS_SYSPROTO_H_ 2785 struct lchflags_args { 2786 const char *path; 2787 u_long flags; 2788 }; 2789 #endif 2790 int 2791 sys_lchflags(struct thread *td, struct lchflags_args *uap) 2792 { 2793 2794 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2795 uap->flags, AT_SYMLINK_NOFOLLOW)); 2796 } 2797 2798 static int 2799 kern_chflagsat(struct thread *td, int fd, const char *path, 2800 enum uio_seg pathseg, u_long flags, int atflag) 2801 { 2802 struct nameidata nd; 2803 int error; 2804 2805 if ((atflag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | 2806 AT_EMPTY_PATH)) != 0) 2807 return (EINVAL); 2808 2809 AUDIT_ARG_FFLAGS(flags); 2810 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(atflag, AT_SYMLINK_NOFOLLOW | 2811 AT_RESOLVE_BENEATH | AT_EMPTY_PATH) | AUDITVNODE1, pathseg, path, 2812 fd, &cap_fchflags_rights); 2813 if ((error = namei(&nd)) != 0) 2814 return (error); 2815 NDFREE_NOTHING(&nd); 2816 error = setfflags(td, nd.ni_vp, flags); 2817 vrele(nd.ni_vp); 2818 return (error); 2819 } 2820 2821 /* 2822 * Change flags of a file given a file descriptor. 2823 */ 2824 #ifndef _SYS_SYSPROTO_H_ 2825 struct fchflags_args { 2826 int fd; 2827 u_long flags; 2828 }; 2829 #endif 2830 int 2831 sys_fchflags(struct thread *td, struct fchflags_args *uap) 2832 { 2833 struct file *fp; 2834 int error; 2835 2836 AUDIT_ARG_FD(uap->fd); 2837 AUDIT_ARG_FFLAGS(uap->flags); 2838 error = getvnode(td, uap->fd, &cap_fchflags_rights, 2839 &fp); 2840 if (error != 0) 2841 return (error); 2842 #ifdef AUDIT 2843 if (AUDITING_TD(td)) { 2844 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 2845 AUDIT_ARG_VNODE1(fp->f_vnode); 2846 VOP_UNLOCK(fp->f_vnode); 2847 } 2848 #endif 2849 error = setfflags(td, fp->f_vnode, uap->flags); 2850 fdrop(fp, td); 2851 return (error); 2852 } 2853 2854 /* 2855 * Common implementation code for chmod(), lchmod() and fchmod(). 2856 */ 2857 int 2858 setfmode(struct thread *td, struct ucred *cred, struct vnode *vp, int mode) 2859 { 2860 struct mount *mp; 2861 struct vattr vattr; 2862 int error; 2863 2864 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2865 return (error); 2866 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2867 VATTR_NULL(&vattr); 2868 vattr.va_mode = mode & ALLPERMS; 2869 #ifdef MAC 2870 error = mac_vnode_check_setmode(cred, vp, vattr.va_mode); 2871 if (error == 0) 2872 #endif 2873 error = VOP_SETATTR(vp, &vattr, cred); 2874 VOP_UNLOCK(vp); 2875 vn_finished_write(mp); 2876 return (error); 2877 } 2878 2879 /* 2880 * Change mode of a file given path name. 2881 */ 2882 #ifndef _SYS_SYSPROTO_H_ 2883 struct chmod_args { 2884 char *path; 2885 int mode; 2886 }; 2887 #endif 2888 int 2889 sys_chmod(struct thread *td, struct chmod_args *uap) 2890 { 2891 2892 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2893 uap->mode, 0)); 2894 } 2895 2896 #ifndef _SYS_SYSPROTO_H_ 2897 struct fchmodat_args { 2898 int dirfd; 2899 char *path; 2900 mode_t mode; 2901 int flag; 2902 } 2903 #endif 2904 int 2905 sys_fchmodat(struct thread *td, struct fchmodat_args *uap) 2906 { 2907 2908 return (kern_fchmodat(td, uap->fd, uap->path, UIO_USERSPACE, 2909 uap->mode, uap->flag)); 2910 } 2911 2912 /* 2913 * Change mode of a file given path name (don't follow links.) 2914 */ 2915 #ifndef _SYS_SYSPROTO_H_ 2916 struct lchmod_args { 2917 char *path; 2918 int mode; 2919 }; 2920 #endif 2921 int 2922 sys_lchmod(struct thread *td, struct lchmod_args *uap) 2923 { 2924 2925 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2926 uap->mode, AT_SYMLINK_NOFOLLOW)); 2927 } 2928 2929 int 2930 kern_fchmodat(struct thread *td, int fd, const char *path, 2931 enum uio_seg pathseg, mode_t mode, int flag) 2932 { 2933 struct nameidata nd; 2934 int error; 2935 2936 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | 2937 AT_EMPTY_PATH)) != 0) 2938 return (EINVAL); 2939 2940 AUDIT_ARG_MODE(mode); 2941 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_SYMLINK_NOFOLLOW | 2942 AT_RESOLVE_BENEATH | AT_EMPTY_PATH) | AUDITVNODE1, pathseg, path, 2943 fd, &cap_fchmod_rights); 2944 if ((error = namei(&nd)) != 0) 2945 return (error); 2946 NDFREE_NOTHING(&nd); 2947 error = setfmode(td, td->td_ucred, nd.ni_vp, mode); 2948 vrele(nd.ni_vp); 2949 return (error); 2950 } 2951 2952 /* 2953 * Change mode of a file given a file descriptor. 2954 */ 2955 #ifndef _SYS_SYSPROTO_H_ 2956 struct fchmod_args { 2957 int fd; 2958 int mode; 2959 }; 2960 #endif 2961 int 2962 sys_fchmod(struct thread *td, struct fchmod_args *uap) 2963 { 2964 struct file *fp; 2965 int error; 2966 2967 AUDIT_ARG_FD(uap->fd); 2968 AUDIT_ARG_MODE(uap->mode); 2969 2970 error = fget(td, uap->fd, &cap_fchmod_rights, &fp); 2971 if (error != 0) 2972 return (error); 2973 error = fo_chmod(fp, uap->mode, td->td_ucred, td); 2974 fdrop(fp, td); 2975 return (error); 2976 } 2977 2978 /* 2979 * Common implementation for chown(), lchown(), and fchown() 2980 */ 2981 int 2982 setfown(struct thread *td, struct ucred *cred, struct vnode *vp, uid_t uid, 2983 gid_t gid) 2984 { 2985 struct mount *mp; 2986 struct vattr vattr; 2987 int error; 2988 2989 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2990 return (error); 2991 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2992 VATTR_NULL(&vattr); 2993 vattr.va_uid = uid; 2994 vattr.va_gid = gid; 2995 #ifdef MAC 2996 error = mac_vnode_check_setowner(cred, vp, vattr.va_uid, 2997 vattr.va_gid); 2998 if (error == 0) 2999 #endif 3000 error = VOP_SETATTR(vp, &vattr, cred); 3001 VOP_UNLOCK(vp); 3002 vn_finished_write(mp); 3003 return (error); 3004 } 3005 3006 /* 3007 * Set ownership given a path name. 3008 */ 3009 #ifndef _SYS_SYSPROTO_H_ 3010 struct chown_args { 3011 char *path; 3012 int uid; 3013 int gid; 3014 }; 3015 #endif 3016 int 3017 sys_chown(struct thread *td, struct chown_args *uap) 3018 { 3019 3020 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->uid, 3021 uap->gid, 0)); 3022 } 3023 3024 #ifndef _SYS_SYSPROTO_H_ 3025 struct fchownat_args { 3026 int fd; 3027 const char * path; 3028 uid_t uid; 3029 gid_t gid; 3030 int flag; 3031 }; 3032 #endif 3033 int 3034 sys_fchownat(struct thread *td, struct fchownat_args *uap) 3035 { 3036 3037 return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid, 3038 uap->gid, uap->flag)); 3039 } 3040 3041 int 3042 kern_fchownat(struct thread *td, int fd, const char *path, 3043 enum uio_seg pathseg, int uid, int gid, int flag) 3044 { 3045 struct nameidata nd; 3046 int error; 3047 3048 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | 3049 AT_EMPTY_PATH)) != 0) 3050 return (EINVAL); 3051 3052 AUDIT_ARG_OWNER(uid, gid); 3053 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_SYMLINK_NOFOLLOW | 3054 AT_RESOLVE_BENEATH | AT_EMPTY_PATH) | AUDITVNODE1, pathseg, path, 3055 fd, &cap_fchown_rights); 3056 3057 if ((error = namei(&nd)) != 0) 3058 return (error); 3059 NDFREE_NOTHING(&nd); 3060 error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid); 3061 vrele(nd.ni_vp); 3062 return (error); 3063 } 3064 3065 /* 3066 * Set ownership given a path name, do not cross symlinks. 3067 */ 3068 #ifndef _SYS_SYSPROTO_H_ 3069 struct lchown_args { 3070 char *path; 3071 int uid; 3072 int gid; 3073 }; 3074 #endif 3075 int 3076 sys_lchown(struct thread *td, struct lchown_args *uap) 3077 { 3078 3079 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3080 uap->uid, uap->gid, AT_SYMLINK_NOFOLLOW)); 3081 } 3082 3083 /* 3084 * Set ownership given a file descriptor. 3085 */ 3086 #ifndef _SYS_SYSPROTO_H_ 3087 struct fchown_args { 3088 int fd; 3089 int uid; 3090 int gid; 3091 }; 3092 #endif 3093 int 3094 sys_fchown(struct thread *td, struct fchown_args *uap) 3095 { 3096 struct file *fp; 3097 int error; 3098 3099 AUDIT_ARG_FD(uap->fd); 3100 AUDIT_ARG_OWNER(uap->uid, uap->gid); 3101 error = fget(td, uap->fd, &cap_fchown_rights, &fp); 3102 if (error != 0) 3103 return (error); 3104 error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td); 3105 fdrop(fp, td); 3106 return (error); 3107 } 3108 3109 /* 3110 * Common implementation code for utimes(), lutimes(), and futimes(). 3111 */ 3112 static int 3113 getutimes(const struct timeval *usrtvp, enum uio_seg tvpseg, 3114 struct timespec *tsp) 3115 { 3116 struct timeval tv[2]; 3117 const struct timeval *tvp; 3118 int error; 3119 3120 if (usrtvp == NULL) { 3121 vfs_timestamp(&tsp[0]); 3122 tsp[1] = tsp[0]; 3123 } else { 3124 if (tvpseg == UIO_SYSSPACE) { 3125 tvp = usrtvp; 3126 } else { 3127 if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0) 3128 return (error); 3129 tvp = tv; 3130 } 3131 3132 if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 || 3133 tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000) 3134 return (EINVAL); 3135 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 3136 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 3137 } 3138 return (0); 3139 } 3140 3141 /* 3142 * Common implementation code for futimens(), utimensat(). 3143 */ 3144 #define UTIMENS_NULL 0x1 3145 #define UTIMENS_EXIT 0x2 3146 static int 3147 getutimens(const struct timespec *usrtsp, enum uio_seg tspseg, 3148 struct timespec *tsp, int *retflags) 3149 { 3150 struct timespec tsnow; 3151 int error; 3152 3153 vfs_timestamp(&tsnow); 3154 *retflags = 0; 3155 if (usrtsp == NULL) { 3156 tsp[0] = tsnow; 3157 tsp[1] = tsnow; 3158 *retflags |= UTIMENS_NULL; 3159 return (0); 3160 } 3161 if (tspseg == UIO_SYSSPACE) { 3162 tsp[0] = usrtsp[0]; 3163 tsp[1] = usrtsp[1]; 3164 } else if ((error = copyin(usrtsp, tsp, sizeof(*tsp) * 2)) != 0) 3165 return (error); 3166 if (tsp[0].tv_nsec == UTIME_OMIT && tsp[1].tv_nsec == UTIME_OMIT) 3167 *retflags |= UTIMENS_EXIT; 3168 if (tsp[0].tv_nsec == UTIME_NOW && tsp[1].tv_nsec == UTIME_NOW) 3169 *retflags |= UTIMENS_NULL; 3170 if (tsp[0].tv_nsec == UTIME_OMIT) 3171 tsp[0].tv_sec = VNOVAL; 3172 else if (tsp[0].tv_nsec == UTIME_NOW) 3173 tsp[0] = tsnow; 3174 else if (tsp[0].tv_nsec < 0 || tsp[0].tv_nsec >= 1000000000L) 3175 return (EINVAL); 3176 if (tsp[1].tv_nsec == UTIME_OMIT) 3177 tsp[1].tv_sec = VNOVAL; 3178 else if (tsp[1].tv_nsec == UTIME_NOW) 3179 tsp[1] = tsnow; 3180 else if (tsp[1].tv_nsec < 0 || tsp[1].tv_nsec >= 1000000000L) 3181 return (EINVAL); 3182 3183 return (0); 3184 } 3185 3186 /* 3187 * Common implementation code for utimes(), lutimes(), futimes(), futimens(), 3188 * and utimensat(). 3189 */ 3190 static int 3191 setutimes(struct thread *td, struct vnode *vp, const struct timespec *ts, 3192 int numtimes, int nullflag) 3193 { 3194 struct mount *mp; 3195 struct vattr vattr; 3196 int error; 3197 bool setbirthtime; 3198 3199 setbirthtime = false; 3200 vattr.va_birthtime.tv_sec = VNOVAL; 3201 vattr.va_birthtime.tv_nsec = 0; 3202 3203 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 3204 return (error); 3205 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3206 if (numtimes < 3 && VOP_GETATTR(vp, &vattr, td->td_ucred) == 0 && 3207 timespeccmp(&ts[1], &vattr.va_birthtime, < )) 3208 setbirthtime = true; 3209 VATTR_NULL(&vattr); 3210 vattr.va_atime = ts[0]; 3211 vattr.va_mtime = ts[1]; 3212 if (setbirthtime) 3213 vattr.va_birthtime = ts[1]; 3214 if (numtimes > 2) 3215 vattr.va_birthtime = ts[2]; 3216 if (nullflag) 3217 vattr.va_vaflags |= VA_UTIMES_NULL; 3218 #ifdef MAC 3219 error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime, 3220 vattr.va_mtime); 3221 #endif 3222 if (error == 0) 3223 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3224 VOP_UNLOCK(vp); 3225 vn_finished_write(mp); 3226 return (error); 3227 } 3228 3229 /* 3230 * Set the access and modification times of a file. 3231 */ 3232 #ifndef _SYS_SYSPROTO_H_ 3233 struct utimes_args { 3234 char *path; 3235 struct timeval *tptr; 3236 }; 3237 #endif 3238 int 3239 sys_utimes(struct thread *td, struct utimes_args *uap) 3240 { 3241 3242 return (kern_utimesat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3243 uap->tptr, UIO_USERSPACE)); 3244 } 3245 3246 #ifndef _SYS_SYSPROTO_H_ 3247 struct futimesat_args { 3248 int fd; 3249 const char * path; 3250 const struct timeval * times; 3251 }; 3252 #endif 3253 int 3254 sys_futimesat(struct thread *td, struct futimesat_args *uap) 3255 { 3256 3257 return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE, 3258 uap->times, UIO_USERSPACE)); 3259 } 3260 3261 int 3262 kern_utimesat(struct thread *td, int fd, const char *path, 3263 enum uio_seg pathseg, const struct timeval *tptr, enum uio_seg tptrseg) 3264 { 3265 struct nameidata nd; 3266 struct timespec ts[2]; 3267 int error; 3268 3269 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3270 return (error); 3271 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 3272 &cap_futimes_rights); 3273 3274 if ((error = namei(&nd)) != 0) 3275 return (error); 3276 NDFREE_NOTHING(&nd); 3277 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3278 vrele(nd.ni_vp); 3279 return (error); 3280 } 3281 3282 /* 3283 * Set the access and modification times of a file. 3284 */ 3285 #ifndef _SYS_SYSPROTO_H_ 3286 struct lutimes_args { 3287 char *path; 3288 struct timeval *tptr; 3289 }; 3290 #endif 3291 int 3292 sys_lutimes(struct thread *td, struct lutimes_args *uap) 3293 { 3294 3295 return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr, 3296 UIO_USERSPACE)); 3297 } 3298 3299 int 3300 kern_lutimes(struct thread *td, const char *path, enum uio_seg pathseg, 3301 const struct timeval *tptr, enum uio_seg tptrseg) 3302 { 3303 struct timespec ts[2]; 3304 struct nameidata nd; 3305 int error; 3306 3307 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3308 return (error); 3309 NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, pathseg, path); 3310 if ((error = namei(&nd)) != 0) 3311 return (error); 3312 NDFREE_NOTHING(&nd); 3313 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3314 vrele(nd.ni_vp); 3315 return (error); 3316 } 3317 3318 /* 3319 * Set the access and modification times of a file. 3320 */ 3321 #ifndef _SYS_SYSPROTO_H_ 3322 struct futimes_args { 3323 int fd; 3324 struct timeval *tptr; 3325 }; 3326 #endif 3327 int 3328 sys_futimes(struct thread *td, struct futimes_args *uap) 3329 { 3330 3331 return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE)); 3332 } 3333 3334 int 3335 kern_futimes(struct thread *td, int fd, const struct timeval *tptr, 3336 enum uio_seg tptrseg) 3337 { 3338 struct timespec ts[2]; 3339 struct file *fp; 3340 int error; 3341 3342 AUDIT_ARG_FD(fd); 3343 error = getutimes(tptr, tptrseg, ts); 3344 if (error != 0) 3345 return (error); 3346 error = getvnode(td, fd, &cap_futimes_rights, &fp); 3347 if (error != 0) 3348 return (error); 3349 #ifdef AUDIT 3350 if (AUDITING_TD(td)) { 3351 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3352 AUDIT_ARG_VNODE1(fp->f_vnode); 3353 VOP_UNLOCK(fp->f_vnode); 3354 } 3355 #endif 3356 error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL); 3357 fdrop(fp, td); 3358 return (error); 3359 } 3360 3361 int 3362 sys_futimens(struct thread *td, struct futimens_args *uap) 3363 { 3364 3365 return (kern_futimens(td, uap->fd, uap->times, UIO_USERSPACE)); 3366 } 3367 3368 int 3369 kern_futimens(struct thread *td, int fd, const struct timespec *tptr, 3370 enum uio_seg tptrseg) 3371 { 3372 struct timespec ts[2]; 3373 struct file *fp; 3374 int error, flags; 3375 3376 AUDIT_ARG_FD(fd); 3377 error = getutimens(tptr, tptrseg, ts, &flags); 3378 if (error != 0) 3379 return (error); 3380 if (flags & UTIMENS_EXIT) 3381 return (0); 3382 error = getvnode(td, fd, &cap_futimes_rights, &fp); 3383 if (error != 0) 3384 return (error); 3385 #ifdef AUDIT 3386 if (AUDITING_TD(td)) { 3387 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3388 AUDIT_ARG_VNODE1(fp->f_vnode); 3389 VOP_UNLOCK(fp->f_vnode); 3390 } 3391 #endif 3392 error = setutimes(td, fp->f_vnode, ts, 2, flags & UTIMENS_NULL); 3393 fdrop(fp, td); 3394 return (error); 3395 } 3396 3397 int 3398 sys_utimensat(struct thread *td, struct utimensat_args *uap) 3399 { 3400 3401 return (kern_utimensat(td, uap->fd, uap->path, UIO_USERSPACE, 3402 uap->times, UIO_USERSPACE, uap->flag)); 3403 } 3404 3405 int 3406 kern_utimensat(struct thread *td, int fd, const char *path, 3407 enum uio_seg pathseg, const struct timespec *tptr, enum uio_seg tptrseg, 3408 int flag) 3409 { 3410 struct nameidata nd; 3411 struct timespec ts[2]; 3412 int error, flags; 3413 3414 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | 3415 AT_EMPTY_PATH)) != 0) 3416 return (EINVAL); 3417 3418 if ((error = getutimens(tptr, tptrseg, ts, &flags)) != 0) 3419 return (error); 3420 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_SYMLINK_NOFOLLOW | 3421 AT_RESOLVE_BENEATH | AT_EMPTY_PATH) | AUDITVNODE1, 3422 pathseg, path, fd, &cap_futimes_rights); 3423 if ((error = namei(&nd)) != 0) 3424 return (error); 3425 /* 3426 * We are allowed to call namei() regardless of 2xUTIME_OMIT. 3427 * POSIX states: 3428 * "If both tv_nsec fields are UTIME_OMIT... EACCESS may be detected." 3429 * "Search permission is denied by a component of the path prefix." 3430 */ 3431 NDFREE_NOTHING(&nd); 3432 if ((flags & UTIMENS_EXIT) == 0) 3433 error = setutimes(td, nd.ni_vp, ts, 2, flags & UTIMENS_NULL); 3434 vrele(nd.ni_vp); 3435 return (error); 3436 } 3437 3438 /* 3439 * Truncate a file given its path name. 3440 */ 3441 #ifndef _SYS_SYSPROTO_H_ 3442 struct truncate_args { 3443 char *path; 3444 int pad; 3445 off_t length; 3446 }; 3447 #endif 3448 int 3449 sys_truncate(struct thread *td, struct truncate_args *uap) 3450 { 3451 3452 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3453 } 3454 3455 int 3456 kern_truncate(struct thread *td, const char *path, enum uio_seg pathseg, 3457 off_t length) 3458 { 3459 struct mount *mp; 3460 struct vnode *vp; 3461 void *rl_cookie; 3462 struct nameidata nd; 3463 int error; 3464 3465 if (length < 0) 3466 return (EINVAL); 3467 NDPREINIT(&nd); 3468 retry: 3469 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path); 3470 if ((error = namei(&nd)) != 0) 3471 return (error); 3472 vp = nd.ni_vp; 3473 NDFREE_NOTHING(&nd); 3474 rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); 3475 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { 3476 vn_rangelock_unlock(vp, rl_cookie); 3477 vrele(vp); 3478 return (error); 3479 } 3480 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3481 if (vp->v_type == VDIR) { 3482 error = EISDIR; 3483 goto out; 3484 } 3485 #ifdef MAC 3486 error = mac_vnode_check_write(td->td_ucred, NOCRED, vp); 3487 if (error != 0) 3488 goto out; 3489 #endif 3490 error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td); 3491 if (error != 0) 3492 goto out; 3493 3494 error = vn_truncate_locked(vp, length, false, td->td_ucred); 3495 out: 3496 VOP_UNLOCK(vp); 3497 vn_finished_write(mp); 3498 vn_rangelock_unlock(vp, rl_cookie); 3499 vrele(vp); 3500 if (error == ERELOOKUP) 3501 goto retry; 3502 return (error); 3503 } 3504 3505 #if defined(COMPAT_43) 3506 /* 3507 * Truncate a file given its path name. 3508 */ 3509 #ifndef _SYS_SYSPROTO_H_ 3510 struct otruncate_args { 3511 char *path; 3512 long length; 3513 }; 3514 #endif 3515 int 3516 otruncate(struct thread *td, struct otruncate_args *uap) 3517 { 3518 3519 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3520 } 3521 #endif /* COMPAT_43 */ 3522 3523 #if defined(COMPAT_FREEBSD6) 3524 /* Versions with the pad argument */ 3525 int 3526 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap) 3527 { 3528 3529 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3530 } 3531 3532 int 3533 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap) 3534 { 3535 3536 return (kern_ftruncate(td, uap->fd, uap->length)); 3537 } 3538 #endif 3539 3540 int 3541 kern_fsync(struct thread *td, int fd, bool fullsync) 3542 { 3543 struct vnode *vp; 3544 struct mount *mp; 3545 struct file *fp; 3546 int error; 3547 3548 AUDIT_ARG_FD(fd); 3549 error = getvnode(td, fd, &cap_fsync_rights, &fp); 3550 if (error != 0) 3551 return (error); 3552 vp = fp->f_vnode; 3553 #if 0 3554 if (!fullsync) 3555 /* XXXKIB: compete outstanding aio writes */; 3556 #endif 3557 retry: 3558 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 3559 if (error != 0) 3560 goto drop; 3561 vn_lock(vp, vn_lktype_write(mp, vp) | LK_RETRY); 3562 AUDIT_ARG_VNODE1(vp); 3563 if (vp->v_object != NULL) { 3564 VM_OBJECT_WLOCK(vp->v_object); 3565 vm_object_page_clean(vp->v_object, 0, 0, 0); 3566 VM_OBJECT_WUNLOCK(vp->v_object); 3567 } 3568 error = fullsync ? VOP_FSYNC(vp, MNT_WAIT, td) : VOP_FDATASYNC(vp, td); 3569 VOP_UNLOCK(vp); 3570 vn_finished_write(mp); 3571 if (error == ERELOOKUP) 3572 goto retry; 3573 drop: 3574 fdrop(fp, td); 3575 return (error); 3576 } 3577 3578 /* 3579 * Sync an open file. 3580 */ 3581 #ifndef _SYS_SYSPROTO_H_ 3582 struct fsync_args { 3583 int fd; 3584 }; 3585 #endif 3586 int 3587 sys_fsync(struct thread *td, struct fsync_args *uap) 3588 { 3589 3590 return (kern_fsync(td, uap->fd, true)); 3591 } 3592 3593 int 3594 sys_fdatasync(struct thread *td, struct fdatasync_args *uap) 3595 { 3596 3597 return (kern_fsync(td, uap->fd, false)); 3598 } 3599 3600 /* 3601 * Rename files. Source and destination must either both be directories, or 3602 * both not be directories. If target is a directory, it must be empty. 3603 */ 3604 #ifndef _SYS_SYSPROTO_H_ 3605 struct rename_args { 3606 char *from; 3607 char *to; 3608 }; 3609 #endif 3610 int 3611 sys_rename(struct thread *td, struct rename_args *uap) 3612 { 3613 3614 return (kern_renameat(td, AT_FDCWD, uap->from, AT_FDCWD, 3615 uap->to, UIO_USERSPACE)); 3616 } 3617 3618 #ifndef _SYS_SYSPROTO_H_ 3619 struct renameat_args { 3620 int oldfd; 3621 char *old; 3622 int newfd; 3623 char *new; 3624 }; 3625 #endif 3626 int 3627 sys_renameat(struct thread *td, struct renameat_args *uap) 3628 { 3629 3630 return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new, 3631 UIO_USERSPACE)); 3632 } 3633 3634 #ifdef MAC 3635 static int 3636 kern_renameat_mac(struct thread *td, int oldfd, const char *old, int newfd, 3637 const char *new, enum uio_seg pathseg, struct nameidata *fromnd) 3638 { 3639 int error; 3640 3641 NDINIT_ATRIGHTS(fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | 3642 AUDITVNODE1, pathseg, old, oldfd, &cap_renameat_source_rights); 3643 if ((error = namei(fromnd)) != 0) 3644 return (error); 3645 error = mac_vnode_check_rename_from(td->td_ucred, fromnd->ni_dvp, 3646 fromnd->ni_vp, &fromnd->ni_cnd); 3647 VOP_UNLOCK(fromnd->ni_dvp); 3648 if (fromnd->ni_dvp != fromnd->ni_vp) 3649 VOP_UNLOCK(fromnd->ni_vp); 3650 if (error != 0) { 3651 NDFREE_PNBUF(fromnd); 3652 vrele(fromnd->ni_dvp); 3653 vrele(fromnd->ni_vp); 3654 if (fromnd->ni_startdir) 3655 vrele(fromnd->ni_startdir); 3656 } 3657 return (error); 3658 } 3659 #endif 3660 3661 int 3662 kern_renameat(struct thread *td, int oldfd, const char *old, int newfd, 3663 const char *new, enum uio_seg pathseg) 3664 { 3665 struct mount *mp = NULL; 3666 struct vnode *tvp, *fvp, *tdvp; 3667 struct nameidata fromnd, tond; 3668 uint64_t tondflags; 3669 int error; 3670 3671 again: 3672 bwillwrite(); 3673 #ifdef MAC 3674 if (mac_vnode_check_rename_from_enabled()) { 3675 error = kern_renameat_mac(td, oldfd, old, newfd, new, pathseg, 3676 &fromnd); 3677 if (error != 0) 3678 return (error); 3679 } else { 3680 #endif 3681 NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | AUDITVNODE1, 3682 pathseg, old, oldfd, &cap_renameat_source_rights); 3683 if ((error = namei(&fromnd)) != 0) 3684 return (error); 3685 #ifdef MAC 3686 } 3687 #endif 3688 fvp = fromnd.ni_vp; 3689 tondflags = LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | AUDITVNODE2; 3690 if (fromnd.ni_vp->v_type == VDIR) 3691 tondflags |= WILLBEDIR; 3692 NDINIT_ATRIGHTS(&tond, RENAME, tondflags, pathseg, new, newfd, 3693 &cap_renameat_target_rights); 3694 if ((error = namei(&tond)) != 0) { 3695 /* Translate error code for rename("dir1", "dir2/."). */ 3696 if (error == EISDIR && fvp->v_type == VDIR) 3697 error = EINVAL; 3698 NDFREE_PNBUF(&fromnd); 3699 vrele(fromnd.ni_dvp); 3700 vrele(fvp); 3701 goto out1; 3702 } 3703 tdvp = tond.ni_dvp; 3704 tvp = tond.ni_vp; 3705 error = vn_start_write(fvp, &mp, V_NOWAIT); 3706 if (error != 0) { 3707 NDFREE_PNBUF(&fromnd); 3708 NDFREE_PNBUF(&tond); 3709 if (tvp != NULL) 3710 vput(tvp); 3711 if (tdvp == tvp) 3712 vrele(tdvp); 3713 else 3714 vput(tdvp); 3715 vrele(fromnd.ni_dvp); 3716 vrele(fvp); 3717 vrele(tond.ni_startdir); 3718 if (fromnd.ni_startdir != NULL) 3719 vrele(fromnd.ni_startdir); 3720 error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH); 3721 if (error != 0) 3722 return (error); 3723 goto again; 3724 } 3725 if (tvp != NULL) { 3726 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3727 error = ENOTDIR; 3728 goto out; 3729 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3730 error = EISDIR; 3731 goto out; 3732 } 3733 #ifdef CAPABILITIES 3734 if (newfd != AT_FDCWD && (tond.ni_resflags & NIRES_ABS) == 0) { 3735 /* 3736 * If the target already exists we require CAP_UNLINKAT 3737 * from 'newfd', when newfd was used for the lookup. 3738 */ 3739 error = cap_check(&tond.ni_filecaps.fc_rights, 3740 &cap_unlinkat_rights); 3741 if (error != 0) 3742 goto out; 3743 } 3744 #endif 3745 } 3746 if (fvp == tdvp) { 3747 error = EINVAL; 3748 goto out; 3749 } 3750 /* 3751 * If the source is the same as the destination (that is, if they 3752 * are links to the same vnode), then there is nothing to do. 3753 */ 3754 if (fvp == tvp) 3755 error = ERESTART; 3756 #ifdef MAC 3757 else 3758 error = mac_vnode_check_rename_to(td->td_ucred, tdvp, 3759 tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd); 3760 #endif 3761 out: 3762 if (error == 0) { 3763 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3764 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); 3765 NDFREE_PNBUF(&fromnd); 3766 NDFREE_PNBUF(&tond); 3767 } else { 3768 NDFREE_PNBUF(&fromnd); 3769 NDFREE_PNBUF(&tond); 3770 if (tvp != NULL) 3771 vput(tvp); 3772 if (tdvp == tvp) 3773 vrele(tdvp); 3774 else 3775 vput(tdvp); 3776 vrele(fromnd.ni_dvp); 3777 vrele(fvp); 3778 } 3779 vrele(tond.ni_startdir); 3780 vn_finished_write(mp); 3781 out1: 3782 if (fromnd.ni_startdir) 3783 vrele(fromnd.ni_startdir); 3784 if (error == ERESTART) 3785 return (0); 3786 if (error == ERELOOKUP) 3787 goto again; 3788 return (error); 3789 } 3790 3791 /* 3792 * Make a directory file. 3793 */ 3794 #ifndef _SYS_SYSPROTO_H_ 3795 struct mkdir_args { 3796 char *path; 3797 int mode; 3798 }; 3799 #endif 3800 int 3801 sys_mkdir(struct thread *td, struct mkdir_args *uap) 3802 { 3803 3804 return (kern_mkdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3805 uap->mode)); 3806 } 3807 3808 #ifndef _SYS_SYSPROTO_H_ 3809 struct mkdirat_args { 3810 int fd; 3811 char *path; 3812 mode_t mode; 3813 }; 3814 #endif 3815 int 3816 sys_mkdirat(struct thread *td, struct mkdirat_args *uap) 3817 { 3818 3819 return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode)); 3820 } 3821 3822 int 3823 kern_mkdirat(struct thread *td, int fd, const char *path, enum uio_seg segflg, 3824 int mode) 3825 { 3826 struct mount *mp; 3827 struct vattr vattr; 3828 struct nameidata nd; 3829 int error; 3830 3831 AUDIT_ARG_MODE(mode); 3832 NDPREINIT(&nd); 3833 restart: 3834 bwillwrite(); 3835 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 3836 NC_NOMAKEENTRY | NC_KEEPPOSENTRY | FAILIFEXISTS | WILLBEDIR, 3837 segflg, path, fd, &cap_mkdirat_rights); 3838 if ((error = namei(&nd)) != 0) 3839 return (error); 3840 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3841 NDFREE_PNBUF(&nd); 3842 vput(nd.ni_dvp); 3843 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3844 return (error); 3845 goto restart; 3846 } 3847 VATTR_NULL(&vattr); 3848 vattr.va_type = VDIR; 3849 vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_pd->pd_cmask; 3850 #ifdef MAC 3851 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 3852 &vattr); 3853 if (error != 0) 3854 goto out; 3855 #endif 3856 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 3857 #ifdef MAC 3858 out: 3859 #endif 3860 NDFREE_PNBUF(&nd); 3861 VOP_VPUT_PAIR(nd.ni_dvp, error == 0 ? &nd.ni_vp : NULL, true); 3862 vn_finished_write(mp); 3863 if (error == ERELOOKUP) 3864 goto restart; 3865 return (error); 3866 } 3867 3868 /* 3869 * Remove a directory file. 3870 */ 3871 #ifndef _SYS_SYSPROTO_H_ 3872 struct rmdir_args { 3873 char *path; 3874 }; 3875 #endif 3876 int 3877 sys_rmdir(struct thread *td, struct rmdir_args *uap) 3878 { 3879 3880 return (kern_frmdirat(td, AT_FDCWD, uap->path, FD_NONE, UIO_USERSPACE, 3881 0)); 3882 } 3883 3884 int 3885 kern_frmdirat(struct thread *td, int dfd, const char *path, int fd, 3886 enum uio_seg pathseg, int flag) 3887 { 3888 struct mount *mp; 3889 struct vnode *vp; 3890 struct file *fp; 3891 struct nameidata nd; 3892 cap_rights_t rights; 3893 int error; 3894 3895 fp = NULL; 3896 if (fd != FD_NONE) { 3897 error = getvnode(td, fd, cap_rights_init_one(&rights, 3898 CAP_LOOKUP), &fp); 3899 if (error != 0) 3900 return (error); 3901 } 3902 3903 NDPREINIT(&nd); 3904 restart: 3905 bwillwrite(); 3906 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1 | 3907 at2cnpflags(flag, AT_RESOLVE_BENEATH), 3908 pathseg, path, dfd, &cap_unlinkat_rights); 3909 if ((error = namei(&nd)) != 0) 3910 goto fdout; 3911 vp = nd.ni_vp; 3912 if (vp->v_type != VDIR) { 3913 error = ENOTDIR; 3914 goto out; 3915 } 3916 /* 3917 * No rmdir "." please. 3918 */ 3919 if (nd.ni_dvp == vp) { 3920 error = EINVAL; 3921 goto out; 3922 } 3923 /* 3924 * The root of a mounted filesystem cannot be deleted. 3925 */ 3926 if (vp->v_vflag & VV_ROOT) { 3927 error = EBUSY; 3928 goto out; 3929 } 3930 3931 if (fp != NULL && fp->f_vnode != vp) { 3932 if (VN_IS_DOOMED(fp->f_vnode)) 3933 error = EBADF; 3934 else 3935 error = EDEADLK; 3936 goto out; 3937 } 3938 3939 #ifdef MAC 3940 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 3941 &nd.ni_cnd); 3942 if (error != 0) 3943 goto out; 3944 #endif 3945 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3946 NDFREE_PNBUF(&nd); 3947 vput(vp); 3948 if (nd.ni_dvp == vp) 3949 vrele(nd.ni_dvp); 3950 else 3951 vput(nd.ni_dvp); 3952 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3953 goto fdout; 3954 goto restart; 3955 } 3956 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 3957 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 3958 vn_finished_write(mp); 3959 out: 3960 NDFREE_PNBUF(&nd); 3961 vput(vp); 3962 if (nd.ni_dvp == vp) 3963 vrele(nd.ni_dvp); 3964 else 3965 vput(nd.ni_dvp); 3966 if (error == ERELOOKUP) 3967 goto restart; 3968 fdout: 3969 if (fp != NULL) 3970 fdrop(fp, td); 3971 return (error); 3972 } 3973 3974 #if defined(COMPAT_43) || defined(COMPAT_FREEBSD11) 3975 int 3976 freebsd11_kern_getdirentries(struct thread *td, int fd, char *ubuf, u_int count, 3977 long *basep, void (*func)(struct freebsd11_dirent *)) 3978 { 3979 struct freebsd11_dirent dstdp; 3980 struct dirent *dp, *edp; 3981 char *dirbuf; 3982 off_t base; 3983 ssize_t resid, ucount; 3984 int error; 3985 3986 /* XXX arbitrary sanity limit on `count'. */ 3987 count = min(count, 64 * 1024); 3988 3989 dirbuf = malloc(count, M_TEMP, M_WAITOK); 3990 3991 error = kern_getdirentries(td, fd, dirbuf, count, &base, &resid, 3992 UIO_SYSSPACE); 3993 if (error != 0) 3994 goto done; 3995 if (basep != NULL) 3996 *basep = base; 3997 3998 ucount = 0; 3999 for (dp = (struct dirent *)dirbuf, 4000 edp = (struct dirent *)&dirbuf[count - resid]; 4001 ucount < count && dp < edp; ) { 4002 if (dp->d_reclen == 0) 4003 break; 4004 MPASS(dp->d_reclen >= _GENERIC_DIRLEN(0)); 4005 if (dp->d_namlen >= sizeof(dstdp.d_name)) 4006 continue; 4007 dstdp.d_type = dp->d_type; 4008 dstdp.d_namlen = dp->d_namlen; 4009 dstdp.d_fileno = dp->d_fileno; /* truncate */ 4010 if (dstdp.d_fileno != dp->d_fileno) { 4011 switch (ino64_trunc_error) { 4012 default: 4013 case 0: 4014 break; 4015 case 1: 4016 error = EOVERFLOW; 4017 goto done; 4018 case 2: 4019 dstdp.d_fileno = UINT32_MAX; 4020 break; 4021 } 4022 } 4023 dstdp.d_reclen = sizeof(dstdp) - sizeof(dstdp.d_name) + 4024 ((dp->d_namlen + 1 + 3) &~ 3); 4025 bcopy(dp->d_name, dstdp.d_name, dstdp.d_namlen); 4026 bzero(dstdp.d_name + dstdp.d_namlen, 4027 dstdp.d_reclen - offsetof(struct freebsd11_dirent, d_name) - 4028 dstdp.d_namlen); 4029 MPASS(dstdp.d_reclen <= dp->d_reclen); 4030 MPASS(ucount + dstdp.d_reclen <= count); 4031 if (func != NULL) 4032 func(&dstdp); 4033 error = copyout(&dstdp, ubuf + ucount, dstdp.d_reclen); 4034 if (error != 0) 4035 break; 4036 dp = (struct dirent *)((char *)dp + dp->d_reclen); 4037 ucount += dstdp.d_reclen; 4038 } 4039 4040 done: 4041 free(dirbuf, M_TEMP); 4042 if (error == 0) 4043 td->td_retval[0] = ucount; 4044 return (error); 4045 } 4046 #endif /* COMPAT */ 4047 4048 #ifdef COMPAT_43 4049 static void 4050 ogetdirentries_cvt(struct freebsd11_dirent *dp) 4051 { 4052 #if (BYTE_ORDER == LITTLE_ENDIAN) 4053 /* 4054 * The expected low byte of dp->d_namlen is our dp->d_type. 4055 * The high MBZ byte of dp->d_namlen is our dp->d_namlen. 4056 */ 4057 dp->d_type = dp->d_namlen; 4058 dp->d_namlen = 0; 4059 #else 4060 /* 4061 * The dp->d_type is the high byte of the expected dp->d_namlen, 4062 * so must be zero'ed. 4063 */ 4064 dp->d_type = 0; 4065 #endif 4066 } 4067 4068 /* 4069 * Read a block of directory entries in a filesystem independent format. 4070 */ 4071 #ifndef _SYS_SYSPROTO_H_ 4072 struct ogetdirentries_args { 4073 int fd; 4074 char *buf; 4075 u_int count; 4076 long *basep; 4077 }; 4078 #endif 4079 int 4080 ogetdirentries(struct thread *td, struct ogetdirentries_args *uap) 4081 { 4082 long loff; 4083 int error; 4084 4085 error = kern_ogetdirentries(td, uap, &loff); 4086 if (error == 0) 4087 error = copyout(&loff, uap->basep, sizeof(long)); 4088 return (error); 4089 } 4090 4091 int 4092 kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap, 4093 long *ploff) 4094 { 4095 long base; 4096 int error; 4097 4098 /* XXX arbitrary sanity limit on `count'. */ 4099 if (uap->count > 64 * 1024) 4100 return (EINVAL); 4101 4102 error = freebsd11_kern_getdirentries(td, uap->fd, uap->buf, uap->count, 4103 &base, ogetdirentries_cvt); 4104 4105 if (error == 0 && uap->basep != NULL) 4106 error = copyout(&base, uap->basep, sizeof(long)); 4107 4108 return (error); 4109 } 4110 #endif /* COMPAT_43 */ 4111 4112 #if defined(COMPAT_FREEBSD11) 4113 #ifndef _SYS_SYSPROTO_H_ 4114 struct freebsd11_getdirentries_args { 4115 int fd; 4116 char *buf; 4117 u_int count; 4118 long *basep; 4119 }; 4120 #endif 4121 int 4122 freebsd11_getdirentries(struct thread *td, 4123 struct freebsd11_getdirentries_args *uap) 4124 { 4125 long base; 4126 int error; 4127 4128 error = freebsd11_kern_getdirentries(td, uap->fd, uap->buf, uap->count, 4129 &base, NULL); 4130 4131 if (error == 0 && uap->basep != NULL) 4132 error = copyout(&base, uap->basep, sizeof(long)); 4133 return (error); 4134 } 4135 4136 int 4137 freebsd11_getdents(struct thread *td, struct freebsd11_getdents_args *uap) 4138 { 4139 struct freebsd11_getdirentries_args ap; 4140 4141 ap.fd = uap->fd; 4142 ap.buf = uap->buf; 4143 ap.count = uap->count; 4144 ap.basep = NULL; 4145 return (freebsd11_getdirentries(td, &ap)); 4146 } 4147 #endif /* COMPAT_FREEBSD11 */ 4148 4149 /* 4150 * Read a block of directory entries in a filesystem independent format. 4151 */ 4152 int 4153 sys_getdirentries(struct thread *td, struct getdirentries_args *uap) 4154 { 4155 off_t base; 4156 int error; 4157 4158 error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base, 4159 NULL, UIO_USERSPACE); 4160 if (error != 0) 4161 return (error); 4162 if (uap->basep != NULL) 4163 error = copyout(&base, uap->basep, sizeof(off_t)); 4164 return (error); 4165 } 4166 4167 int 4168 kern_getdirentries(struct thread *td, int fd, char *buf, size_t count, 4169 off_t *basep, ssize_t *residp, enum uio_seg bufseg) 4170 { 4171 struct vnode *vp; 4172 struct file *fp; 4173 struct uio auio; 4174 struct iovec aiov; 4175 off_t loff; 4176 int error, eofflag; 4177 off_t foffset; 4178 4179 AUDIT_ARG_FD(fd); 4180 if (count > IOSIZE_MAX) 4181 return (EINVAL); 4182 auio.uio_resid = count; 4183 error = getvnode(td, fd, &cap_read_rights, &fp); 4184 if (error != 0) 4185 return (error); 4186 if ((fp->f_flag & FREAD) == 0) { 4187 fdrop(fp, td); 4188 return (EBADF); 4189 } 4190 vp = fp->f_vnode; 4191 foffset = foffset_lock(fp, 0); 4192 unionread: 4193 if (vp->v_type != VDIR) { 4194 error = EINVAL; 4195 goto fail; 4196 } 4197 if (__predict_false((vp->v_vflag & VV_UNLINKED) != 0)) { 4198 error = ENOENT; 4199 goto fail; 4200 } 4201 aiov.iov_base = buf; 4202 aiov.iov_len = count; 4203 auio.uio_iov = &aiov; 4204 auio.uio_iovcnt = 1; 4205 auio.uio_rw = UIO_READ; 4206 auio.uio_segflg = bufseg; 4207 auio.uio_td = td; 4208 vn_lock(vp, LK_SHARED | LK_RETRY); 4209 AUDIT_ARG_VNODE1(vp); 4210 loff = auio.uio_offset = foffset; 4211 #ifdef MAC 4212 error = mac_vnode_check_readdir(td->td_ucred, vp); 4213 if (error == 0) 4214 #endif 4215 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, 4216 NULL); 4217 foffset = auio.uio_offset; 4218 if (error != 0) { 4219 VOP_UNLOCK(vp); 4220 goto fail; 4221 } 4222 if (count == auio.uio_resid && 4223 (vp->v_vflag & VV_ROOT) && 4224 (vp->v_mount->mnt_flag & MNT_UNION)) { 4225 struct vnode *tvp = vp; 4226 4227 vp = vp->v_mount->mnt_vnodecovered; 4228 VREF(vp); 4229 fp->f_vnode = vp; 4230 foffset = 0; 4231 vput(tvp); 4232 goto unionread; 4233 } 4234 VOP_UNLOCK(vp); 4235 *basep = loff; 4236 if (residp != NULL) 4237 *residp = auio.uio_resid; 4238 td->td_retval[0] = count - auio.uio_resid; 4239 fail: 4240 foffset_unlock(fp, foffset, 0); 4241 fdrop(fp, td); 4242 return (error); 4243 } 4244 4245 /* 4246 * Set the mode mask for creation of filesystem nodes. 4247 */ 4248 #ifndef _SYS_SYSPROTO_H_ 4249 struct umask_args { 4250 int newmask; 4251 }; 4252 #endif 4253 int 4254 sys_umask(struct thread *td, struct umask_args *uap) 4255 { 4256 struct pwddesc *pdp; 4257 4258 pdp = td->td_proc->p_pd; 4259 PWDDESC_XLOCK(pdp); 4260 td->td_retval[0] = pdp->pd_cmask; 4261 pdp->pd_cmask = uap->newmask & ALLPERMS; 4262 PWDDESC_XUNLOCK(pdp); 4263 return (0); 4264 } 4265 4266 /* 4267 * Void all references to file by ripping underlying filesystem away from 4268 * vnode. 4269 */ 4270 #ifndef _SYS_SYSPROTO_H_ 4271 struct revoke_args { 4272 char *path; 4273 }; 4274 #endif 4275 int 4276 sys_revoke(struct thread *td, struct revoke_args *uap) 4277 { 4278 struct vnode *vp; 4279 struct vattr vattr; 4280 struct nameidata nd; 4281 int error; 4282 4283 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4284 uap->path); 4285 if ((error = namei(&nd)) != 0) 4286 return (error); 4287 vp = nd.ni_vp; 4288 NDFREE_NOTHING(&nd); 4289 if (vp->v_type != VCHR || vp->v_rdev == NULL) { 4290 error = EINVAL; 4291 goto out; 4292 } 4293 #ifdef MAC 4294 error = mac_vnode_check_revoke(td->td_ucred, vp); 4295 if (error != 0) 4296 goto out; 4297 #endif 4298 error = VOP_GETATTR(vp, &vattr, td->td_ucred); 4299 if (error != 0) 4300 goto out; 4301 if (td->td_ucred->cr_uid != vattr.va_uid) { 4302 error = priv_check(td, PRIV_VFS_ADMIN); 4303 if (error != 0) 4304 goto out; 4305 } 4306 if (devfs_usecount(vp) > 0) 4307 VOP_REVOKE(vp, REVOKEALL); 4308 out: 4309 vput(vp); 4310 return (error); 4311 } 4312 4313 /* 4314 * This variant of getvnode() allows O_PATH files. Caller should 4315 * ensure that returned file and vnode are only used for compatible 4316 * semantics. 4317 */ 4318 int 4319 getvnode_path(struct thread *td, int fd, cap_rights_t *rightsp, 4320 struct file **fpp) 4321 { 4322 struct file *fp; 4323 int error; 4324 4325 error = fget_unlocked(td, fd, rightsp, &fp); 4326 if (error != 0) 4327 return (error); 4328 4329 /* 4330 * The file could be not of the vnode type, or it may be not 4331 * yet fully initialized, in which case the f_vnode pointer 4332 * may be set, but f_ops is still badfileops. E.g., 4333 * devfs_open() transiently create such situation to 4334 * facilitate csw d_fdopen(). 4335 * 4336 * Dupfdopen() handling in kern_openat() installs the 4337 * half-baked file into the process descriptor table, allowing 4338 * other thread to dereference it. Guard against the race by 4339 * checking f_ops. 4340 */ 4341 if (__predict_false(fp->f_vnode == NULL || fp->f_ops == &badfileops)) { 4342 fdrop(fp, td); 4343 *fpp = NULL; 4344 return (EINVAL); 4345 } 4346 4347 *fpp = fp; 4348 return (0); 4349 } 4350 4351 /* 4352 * Convert a user file descriptor to a kernel file entry and check 4353 * that, if it is a capability, the correct rights are present. 4354 * A reference on the file entry is held upon returning. 4355 */ 4356 int 4357 getvnode(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp) 4358 { 4359 int error; 4360 4361 error = getvnode_path(td, fd, rightsp, fpp); 4362 if (__predict_false(error != 0)) 4363 return (error); 4364 4365 /* 4366 * Filter out O_PATH file descriptors, most getvnode() callers 4367 * do not call fo_ methods. 4368 */ 4369 if (__predict_false((*fpp)->f_ops == &path_fileops)) { 4370 fdrop(*fpp, td); 4371 *fpp = NULL; 4372 error = EBADF; 4373 } 4374 4375 return (error); 4376 } 4377 4378 /* 4379 * Get an (NFS) file handle. 4380 */ 4381 #ifndef _SYS_SYSPROTO_H_ 4382 struct lgetfh_args { 4383 char *fname; 4384 fhandle_t *fhp; 4385 }; 4386 #endif 4387 int 4388 sys_lgetfh(struct thread *td, struct lgetfh_args *uap) 4389 { 4390 4391 return (kern_getfhat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->fname, 4392 UIO_USERSPACE, uap->fhp, UIO_USERSPACE)); 4393 } 4394 4395 #ifndef _SYS_SYSPROTO_H_ 4396 struct getfh_args { 4397 char *fname; 4398 fhandle_t *fhp; 4399 }; 4400 #endif 4401 int 4402 sys_getfh(struct thread *td, struct getfh_args *uap) 4403 { 4404 4405 return (kern_getfhat(td, 0, AT_FDCWD, uap->fname, UIO_USERSPACE, 4406 uap->fhp, UIO_USERSPACE)); 4407 } 4408 4409 /* 4410 * syscall for the rpc.lockd to use to translate an open descriptor into 4411 * a NFS file handle. 4412 * 4413 * warning: do not remove the priv_check() call or this becomes one giant 4414 * security hole. 4415 */ 4416 #ifndef _SYS_SYSPROTO_H_ 4417 struct getfhat_args { 4418 int fd; 4419 char *path; 4420 fhandle_t *fhp; 4421 int flags; 4422 }; 4423 #endif 4424 int 4425 sys_getfhat(struct thread *td, struct getfhat_args *uap) 4426 { 4427 4428 return (kern_getfhat(td, uap->flags, uap->fd, uap->path, UIO_USERSPACE, 4429 uap->fhp, UIO_USERSPACE)); 4430 } 4431 4432 int 4433 kern_getfhat(struct thread *td, int flags, int fd, const char *path, 4434 enum uio_seg pathseg, fhandle_t *fhp, enum uio_seg fhseg) 4435 { 4436 struct nameidata nd; 4437 fhandle_t fh; 4438 struct vnode *vp; 4439 int error; 4440 4441 if ((flags & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH)) != 0) 4442 return (EINVAL); 4443 error = priv_check(td, PRIV_VFS_GETFH); 4444 if (error != 0) 4445 return (error); 4446 NDINIT_AT(&nd, LOOKUP, at2cnpflags(flags, AT_SYMLINK_NOFOLLOW | 4447 AT_RESOLVE_BENEATH) | LOCKLEAF | AUDITVNODE1, pathseg, path, 4448 fd); 4449 error = namei(&nd); 4450 if (error != 0) 4451 return (error); 4452 NDFREE_NOTHING(&nd); 4453 vp = nd.ni_vp; 4454 bzero(&fh, sizeof(fh)); 4455 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4456 error = VOP_VPTOFH(vp, &fh.fh_fid); 4457 vput(vp); 4458 if (error == 0) { 4459 if (fhseg == UIO_USERSPACE) 4460 error = copyout(&fh, fhp, sizeof (fh)); 4461 else 4462 memcpy(fhp, &fh, sizeof(fh)); 4463 } 4464 return (error); 4465 } 4466 4467 #ifndef _SYS_SYSPROTO_H_ 4468 struct fhlink_args { 4469 fhandle_t *fhp; 4470 const char *to; 4471 }; 4472 #endif 4473 int 4474 sys_fhlink(struct thread *td, struct fhlink_args *uap) 4475 { 4476 4477 return (kern_fhlinkat(td, AT_FDCWD, uap->to, UIO_USERSPACE, uap->fhp)); 4478 } 4479 4480 #ifndef _SYS_SYSPROTO_H_ 4481 struct fhlinkat_args { 4482 fhandle_t *fhp; 4483 int tofd; 4484 const char *to; 4485 }; 4486 #endif 4487 int 4488 sys_fhlinkat(struct thread *td, struct fhlinkat_args *uap) 4489 { 4490 4491 return (kern_fhlinkat(td, uap->tofd, uap->to, UIO_USERSPACE, uap->fhp)); 4492 } 4493 4494 static int 4495 kern_fhlinkat(struct thread *td, int fd, const char *path, 4496 enum uio_seg pathseg, fhandle_t *fhp) 4497 { 4498 fhandle_t fh; 4499 struct mount *mp; 4500 struct vnode *vp; 4501 int error; 4502 4503 error = priv_check(td, PRIV_VFS_GETFH); 4504 if (error != 0) 4505 return (error); 4506 error = copyin(fhp, &fh, sizeof(fh)); 4507 if (error != 0) 4508 return (error); 4509 do { 4510 bwillwrite(); 4511 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4512 return (ESTALE); 4513 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_SHARED, &vp); 4514 vfs_unbusy(mp); 4515 if (error != 0) 4516 return (error); 4517 VOP_UNLOCK(vp); 4518 error = kern_linkat_vp(td, vp, fd, path, pathseg); 4519 } while (error == EAGAIN || error == ERELOOKUP); 4520 return (error); 4521 } 4522 4523 #ifndef _SYS_SYSPROTO_H_ 4524 struct fhreadlink_args { 4525 fhandle_t *fhp; 4526 char *buf; 4527 size_t bufsize; 4528 }; 4529 #endif 4530 int 4531 sys_fhreadlink(struct thread *td, struct fhreadlink_args *uap) 4532 { 4533 fhandle_t fh; 4534 struct mount *mp; 4535 struct vnode *vp; 4536 int error; 4537 4538 error = priv_check(td, PRIV_VFS_GETFH); 4539 if (error != 0) 4540 return (error); 4541 if (uap->bufsize > IOSIZE_MAX) 4542 return (EINVAL); 4543 error = copyin(uap->fhp, &fh, sizeof(fh)); 4544 if (error != 0) 4545 return (error); 4546 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4547 return (ESTALE); 4548 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_SHARED, &vp); 4549 vfs_unbusy(mp); 4550 if (error != 0) 4551 return (error); 4552 error = kern_readlink_vp(vp, uap->buf, UIO_USERSPACE, uap->bufsize, td); 4553 vput(vp); 4554 return (error); 4555 } 4556 4557 /* 4558 * syscall for the rpc.lockd to use to translate a NFS file handle into an 4559 * open descriptor. 4560 * 4561 * warning: do not remove the priv_check() call or this becomes one giant 4562 * security hole. 4563 */ 4564 #ifndef _SYS_SYSPROTO_H_ 4565 struct fhopen_args { 4566 const struct fhandle *u_fhp; 4567 int flags; 4568 }; 4569 #endif 4570 int 4571 sys_fhopen(struct thread *td, struct fhopen_args *uap) 4572 { 4573 return (kern_fhopen(td, uap->u_fhp, uap->flags)); 4574 } 4575 4576 int 4577 kern_fhopen(struct thread *td, const struct fhandle *u_fhp, int flags) 4578 { 4579 struct mount *mp; 4580 struct vnode *vp; 4581 struct fhandle fhp; 4582 struct file *fp; 4583 int fmode, error; 4584 int indx; 4585 4586 error = priv_check(td, PRIV_VFS_FHOPEN); 4587 if (error != 0) 4588 return (error); 4589 indx = -1; 4590 fmode = FFLAGS(flags); 4591 /* why not allow a non-read/write open for our lockd? */ 4592 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 4593 return (EINVAL); 4594 error = copyin(u_fhp, &fhp, sizeof(fhp)); 4595 if (error != 0) 4596 return(error); 4597 /* find the mount point */ 4598 mp = vfs_busyfs(&fhp.fh_fsid); 4599 if (mp == NULL) 4600 return (ESTALE); 4601 /* now give me my vnode, it gets returned to me locked */ 4602 error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp); 4603 vfs_unbusy(mp); 4604 if (error != 0) 4605 return (error); 4606 4607 error = falloc_noinstall(td, &fp); 4608 if (error != 0) { 4609 vput(vp); 4610 return (error); 4611 } 4612 /* 4613 * An extra reference on `fp' has been held for us by 4614 * falloc_noinstall(). 4615 */ 4616 4617 #ifdef INVARIANTS 4618 td->td_dupfd = -1; 4619 #endif 4620 error = vn_open_vnode(vp, fmode, td->td_ucred, td, fp); 4621 if (error != 0) { 4622 KASSERT(fp->f_ops == &badfileops, 4623 ("VOP_OPEN in fhopen() set f_ops")); 4624 KASSERT(td->td_dupfd < 0, 4625 ("fhopen() encountered fdopen()")); 4626 4627 vput(vp); 4628 goto bad; 4629 } 4630 #ifdef INVARIANTS 4631 td->td_dupfd = 0; 4632 #endif 4633 fp->f_vnode = vp; 4634 finit_vnode(fp, fmode, NULL, &vnops); 4635 VOP_UNLOCK(vp); 4636 if ((fmode & O_TRUNC) != 0) { 4637 error = fo_truncate(fp, 0, td->td_ucred, td); 4638 if (error != 0) 4639 goto bad; 4640 } 4641 4642 error = finstall(td, fp, &indx, fmode, NULL); 4643 bad: 4644 fdrop(fp, td); 4645 td->td_retval[0] = indx; 4646 return (error); 4647 } 4648 4649 /* 4650 * Stat an (NFS) file handle. 4651 */ 4652 #ifndef _SYS_SYSPROTO_H_ 4653 struct fhstat_args { 4654 struct fhandle *u_fhp; 4655 struct stat *sb; 4656 }; 4657 #endif 4658 int 4659 sys_fhstat(struct thread *td, struct fhstat_args *uap) 4660 { 4661 struct stat sb; 4662 struct fhandle fh; 4663 int error; 4664 4665 error = copyin(uap->u_fhp, &fh, sizeof(fh)); 4666 if (error != 0) 4667 return (error); 4668 error = kern_fhstat(td, fh, &sb); 4669 if (error == 0) 4670 error = copyout(&sb, uap->sb, sizeof(sb)); 4671 return (error); 4672 } 4673 4674 int 4675 kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb) 4676 { 4677 struct mount *mp; 4678 struct vnode *vp; 4679 int error; 4680 4681 error = priv_check(td, PRIV_VFS_FHSTAT); 4682 if (error != 0) 4683 return (error); 4684 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4685 return (ESTALE); 4686 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4687 vfs_unbusy(mp); 4688 if (error != 0) 4689 return (error); 4690 error = VOP_STAT(vp, sb, td->td_ucred, NOCRED); 4691 vput(vp); 4692 return (error); 4693 } 4694 4695 /* 4696 * Implement fstatfs() for (NFS) file handles. 4697 */ 4698 #ifndef _SYS_SYSPROTO_H_ 4699 struct fhstatfs_args { 4700 struct fhandle *u_fhp; 4701 struct statfs *buf; 4702 }; 4703 #endif 4704 int 4705 sys_fhstatfs(struct thread *td, struct fhstatfs_args *uap) 4706 { 4707 struct statfs *sfp; 4708 fhandle_t fh; 4709 int error; 4710 4711 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4712 if (error != 0) 4713 return (error); 4714 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 4715 error = kern_fhstatfs(td, fh, sfp); 4716 if (error == 0) 4717 error = copyout(sfp, uap->buf, sizeof(*sfp)); 4718 free(sfp, M_STATFS); 4719 return (error); 4720 } 4721 4722 int 4723 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf) 4724 { 4725 struct mount *mp; 4726 struct vnode *vp; 4727 int error; 4728 4729 error = priv_check(td, PRIV_VFS_FHSTATFS); 4730 if (error != 0) 4731 return (error); 4732 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4733 return (ESTALE); 4734 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4735 if (error != 0) { 4736 vfs_unbusy(mp); 4737 return (error); 4738 } 4739 vput(vp); 4740 error = prison_canseemount(td->td_ucred, mp); 4741 if (error != 0) 4742 goto out; 4743 #ifdef MAC 4744 error = mac_mount_check_stat(td->td_ucred, mp); 4745 if (error != 0) 4746 goto out; 4747 #endif 4748 error = VFS_STATFS(mp, buf); 4749 out: 4750 vfs_unbusy(mp); 4751 return (error); 4752 } 4753 4754 /* 4755 * Unlike madvise(2), we do not make a best effort to remember every 4756 * possible caching hint. Instead, we remember the last setting with 4757 * the exception that we will allow POSIX_FADV_NORMAL to adjust the 4758 * region of any current setting. 4759 */ 4760 int 4761 kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len, 4762 int advice) 4763 { 4764 struct fadvise_info *fa, *new; 4765 struct file *fp; 4766 struct vnode *vp; 4767 off_t end; 4768 int error; 4769 4770 if (offset < 0 || len < 0 || offset > OFF_MAX - len) 4771 return (EINVAL); 4772 AUDIT_ARG_VALUE(advice); 4773 switch (advice) { 4774 case POSIX_FADV_SEQUENTIAL: 4775 case POSIX_FADV_RANDOM: 4776 case POSIX_FADV_NOREUSE: 4777 new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK); 4778 break; 4779 case POSIX_FADV_NORMAL: 4780 case POSIX_FADV_WILLNEED: 4781 case POSIX_FADV_DONTNEED: 4782 new = NULL; 4783 break; 4784 default: 4785 return (EINVAL); 4786 } 4787 /* XXX: CAP_POSIX_FADVISE? */ 4788 AUDIT_ARG_FD(fd); 4789 error = fget(td, fd, &cap_no_rights, &fp); 4790 if (error != 0) 4791 goto out; 4792 AUDIT_ARG_FILE(td->td_proc, fp); 4793 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { 4794 error = ESPIPE; 4795 goto out; 4796 } 4797 if (fp->f_type != DTYPE_VNODE) { 4798 error = ENODEV; 4799 goto out; 4800 } 4801 vp = fp->f_vnode; 4802 if (vp->v_type != VREG) { 4803 error = ENODEV; 4804 goto out; 4805 } 4806 if (len == 0) 4807 end = OFF_MAX; 4808 else 4809 end = offset + len - 1; 4810 switch (advice) { 4811 case POSIX_FADV_SEQUENTIAL: 4812 case POSIX_FADV_RANDOM: 4813 case POSIX_FADV_NOREUSE: 4814 /* 4815 * Try to merge any existing non-standard region with 4816 * this new region if possible, otherwise create a new 4817 * non-standard region for this request. 4818 */ 4819 mtx_pool_lock(mtxpool_sleep, fp); 4820 fa = fp->f_advice; 4821 if (fa != NULL && fa->fa_advice == advice && 4822 ((fa->fa_start <= end && fa->fa_end >= offset) || 4823 (end != OFF_MAX && fa->fa_start == end + 1) || 4824 (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) { 4825 if (offset < fa->fa_start) 4826 fa->fa_start = offset; 4827 if (end > fa->fa_end) 4828 fa->fa_end = end; 4829 } else { 4830 new->fa_advice = advice; 4831 new->fa_start = offset; 4832 new->fa_end = end; 4833 fp->f_advice = new; 4834 new = fa; 4835 } 4836 mtx_pool_unlock(mtxpool_sleep, fp); 4837 break; 4838 case POSIX_FADV_NORMAL: 4839 /* 4840 * If a the "normal" region overlaps with an existing 4841 * non-standard region, trim or remove the 4842 * non-standard region. 4843 */ 4844 mtx_pool_lock(mtxpool_sleep, fp); 4845 fa = fp->f_advice; 4846 if (fa != NULL) { 4847 if (offset <= fa->fa_start && end >= fa->fa_end) { 4848 new = fa; 4849 fp->f_advice = NULL; 4850 } else if (offset <= fa->fa_start && 4851 end >= fa->fa_start) 4852 fa->fa_start = end + 1; 4853 else if (offset <= fa->fa_end && end >= fa->fa_end) 4854 fa->fa_end = offset - 1; 4855 else if (offset >= fa->fa_start && end <= fa->fa_end) { 4856 /* 4857 * If the "normal" region is a middle 4858 * portion of the existing 4859 * non-standard region, just remove 4860 * the whole thing rather than picking 4861 * one side or the other to 4862 * preserve. 4863 */ 4864 new = fa; 4865 fp->f_advice = NULL; 4866 } 4867 } 4868 mtx_pool_unlock(mtxpool_sleep, fp); 4869 break; 4870 case POSIX_FADV_WILLNEED: 4871 case POSIX_FADV_DONTNEED: 4872 error = VOP_ADVISE(vp, offset, end, advice); 4873 break; 4874 } 4875 out: 4876 if (fp != NULL) 4877 fdrop(fp, td); 4878 free(new, M_FADVISE); 4879 return (error); 4880 } 4881 4882 int 4883 sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap) 4884 { 4885 int error; 4886 4887 error = kern_posix_fadvise(td, uap->fd, uap->offset, uap->len, 4888 uap->advice); 4889 return (kern_posix_error(td, error)); 4890 } 4891 4892 int 4893 kern_copy_file_range(struct thread *td, int infd, off_t *inoffp, int outfd, 4894 off_t *outoffp, size_t len, unsigned int flags) 4895 { 4896 struct file *infp, *outfp; 4897 struct vnode *invp, *outvp; 4898 int error; 4899 size_t retlen; 4900 void *rl_rcookie, *rl_wcookie; 4901 off_t savinoff, savoutoff; 4902 4903 infp = outfp = NULL; 4904 rl_rcookie = rl_wcookie = NULL; 4905 savinoff = -1; 4906 error = 0; 4907 retlen = 0; 4908 4909 if (flags != 0) { 4910 error = EINVAL; 4911 goto out; 4912 } 4913 if (len > SSIZE_MAX) 4914 /* 4915 * Although the len argument is size_t, the return argument 4916 * is ssize_t (which is signed). Therefore a size that won't 4917 * fit in ssize_t can't be returned. 4918 */ 4919 len = SSIZE_MAX; 4920 4921 /* Get the file structures for the file descriptors. */ 4922 error = fget_read(td, infd, &cap_read_rights, &infp); 4923 if (error != 0) 4924 goto out; 4925 if (infp->f_ops == &badfileops) { 4926 error = EBADF; 4927 goto out; 4928 } 4929 if (infp->f_vnode == NULL) { 4930 error = EINVAL; 4931 goto out; 4932 } 4933 error = fget_write(td, outfd, &cap_write_rights, &outfp); 4934 if (error != 0) 4935 goto out; 4936 if (outfp->f_ops == &badfileops) { 4937 error = EBADF; 4938 goto out; 4939 } 4940 if (outfp->f_vnode == NULL) { 4941 error = EINVAL; 4942 goto out; 4943 } 4944 4945 /* Set the offset pointers to the correct place. */ 4946 if (inoffp == NULL) 4947 inoffp = &infp->f_offset; 4948 if (outoffp == NULL) 4949 outoffp = &outfp->f_offset; 4950 savinoff = *inoffp; 4951 savoutoff = *outoffp; 4952 4953 invp = infp->f_vnode; 4954 outvp = outfp->f_vnode; 4955 /* Sanity check the f_flag bits. */ 4956 if ((outfp->f_flag & (FWRITE | FAPPEND)) != FWRITE || 4957 (infp->f_flag & FREAD) == 0) { 4958 error = EBADF; 4959 goto out; 4960 } 4961 4962 /* If len == 0, just return 0. */ 4963 if (len == 0) 4964 goto out; 4965 4966 /* 4967 * If infp and outfp refer to the same file, the byte ranges cannot 4968 * overlap. 4969 */ 4970 if (invp == outvp && ((savinoff <= savoutoff && savinoff + len > 4971 savoutoff) || (savinoff > savoutoff && savoutoff + len > 4972 savinoff))) { 4973 error = EINVAL; 4974 goto out; 4975 } 4976 4977 /* Range lock the byte ranges for both invp and outvp. */ 4978 for (;;) { 4979 rl_wcookie = vn_rangelock_wlock(outvp, *outoffp, *outoffp + 4980 len); 4981 rl_rcookie = vn_rangelock_tryrlock(invp, *inoffp, *inoffp + 4982 len); 4983 if (rl_rcookie != NULL) 4984 break; 4985 vn_rangelock_unlock(outvp, rl_wcookie); 4986 rl_rcookie = vn_rangelock_rlock(invp, *inoffp, *inoffp + len); 4987 vn_rangelock_unlock(invp, rl_rcookie); 4988 } 4989 4990 retlen = len; 4991 error = vn_copy_file_range(invp, inoffp, outvp, outoffp, &retlen, 4992 flags, infp->f_cred, outfp->f_cred, td); 4993 out: 4994 if (rl_rcookie != NULL) 4995 vn_rangelock_unlock(invp, rl_rcookie); 4996 if (rl_wcookie != NULL) 4997 vn_rangelock_unlock(outvp, rl_wcookie); 4998 if (savinoff != -1 && (error == EINTR || error == ERESTART)) { 4999 *inoffp = savinoff; 5000 *outoffp = savoutoff; 5001 } 5002 if (outfp != NULL) 5003 fdrop(outfp, td); 5004 if (infp != NULL) 5005 fdrop(infp, td); 5006 td->td_retval[0] = retlen; 5007 return (error); 5008 } 5009 5010 int 5011 sys_copy_file_range(struct thread *td, struct copy_file_range_args *uap) 5012 { 5013 off_t inoff, outoff, *inoffp, *outoffp; 5014 int error; 5015 5016 inoffp = outoffp = NULL; 5017 if (uap->inoffp != NULL) { 5018 error = copyin(uap->inoffp, &inoff, sizeof(off_t)); 5019 if (error != 0) 5020 return (error); 5021 inoffp = &inoff; 5022 } 5023 if (uap->outoffp != NULL) { 5024 error = copyin(uap->outoffp, &outoff, sizeof(off_t)); 5025 if (error != 0) 5026 return (error); 5027 outoffp = &outoff; 5028 } 5029 error = kern_copy_file_range(td, uap->infd, inoffp, uap->outfd, 5030 outoffp, uap->len, uap->flags); 5031 if (error == 0 && uap->inoffp != NULL) 5032 error = copyout(inoffp, uap->inoffp, sizeof(off_t)); 5033 if (error == 0 && uap->outoffp != NULL) 5034 error = copyout(outoffp, uap->outoffp, sizeof(off_t)); 5035 return (error); 5036 } 5037