1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 37 */ 38 39 #include <sys/cdefs.h> 40 __FBSDID("$FreeBSD$"); 41 42 #include "opt_capsicum.h" 43 #include "opt_ktrace.h" 44 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 #include <sys/bio.h> 48 #include <sys/buf.h> 49 #include <sys/capsicum.h> 50 #include <sys/disk.h> 51 #include <sys/sysent.h> 52 #include <sys/malloc.h> 53 #include <sys/mount.h> 54 #include <sys/mutex.h> 55 #include <sys/sysproto.h> 56 #include <sys/namei.h> 57 #include <sys/filedesc.h> 58 #include <sys/kernel.h> 59 #include <sys/fcntl.h> 60 #include <sys/file.h> 61 #include <sys/filio.h> 62 #include <sys/limits.h> 63 #include <sys/linker.h> 64 #include <sys/rwlock.h> 65 #include <sys/sdt.h> 66 #include <sys/stat.h> 67 #include <sys/sx.h> 68 #include <sys/unistd.h> 69 #include <sys/vnode.h> 70 #include <sys/priv.h> 71 #include <sys/proc.h> 72 #include <sys/dirent.h> 73 #include <sys/jail.h> 74 #include <sys/syscallsubr.h> 75 #include <sys/sysctl.h> 76 #ifdef KTRACE 77 #include <sys/ktrace.h> 78 #endif 79 80 #include <machine/stdarg.h> 81 82 #include <security/audit/audit.h> 83 #include <security/mac/mac_framework.h> 84 85 #include <vm/vm.h> 86 #include <vm/vm_object.h> 87 #include <vm/vm_page.h> 88 #include <vm/uma.h> 89 90 #include <ufs/ufs/quota.h> 91 92 MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information"); 93 94 SDT_PROVIDER_DEFINE(vfs); 95 SDT_PROBE_DEFINE2(vfs, , stat, mode, "char *", "int"); 96 SDT_PROBE_DEFINE2(vfs, , stat, reg, "char *", "int"); 97 98 static int kern_chflagsat(struct thread *td, int fd, const char *path, 99 enum uio_seg pathseg, u_long flags, int atflag); 100 static int setfflags(struct thread *td, struct vnode *, u_long); 101 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *); 102 static int getutimens(const struct timespec *, enum uio_seg, 103 struct timespec *, int *); 104 static int setutimes(struct thread *td, struct vnode *, 105 const struct timespec *, int, int); 106 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 107 struct thread *td); 108 static int kern_fhlinkat(struct thread *td, int fd, const char *path, 109 enum uio_seg pathseg, fhandle_t *fhp); 110 static int kern_getfhat(struct thread *td, int flags, int fd, 111 const char *path, enum uio_seg pathseg, fhandle_t *fhp); 112 static int kern_readlink_vp(struct vnode *vp, char *buf, enum uio_seg bufseg, 113 size_t count, struct thread *td); 114 static int kern_linkat_vp(struct thread *td, struct vnode *vp, int fd, 115 const char *path, enum uio_seg segflag); 116 117 /* 118 * Sync each mounted filesystem. 119 */ 120 #ifndef _SYS_SYSPROTO_H_ 121 struct sync_args { 122 int dummy; 123 }; 124 #endif 125 /* ARGSUSED */ 126 int 127 sys_sync(struct thread *td, struct sync_args *uap) 128 { 129 struct mount *mp, *nmp; 130 int save; 131 132 mtx_lock(&mountlist_mtx); 133 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 134 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 135 nmp = TAILQ_NEXT(mp, mnt_list); 136 continue; 137 } 138 if ((mp->mnt_flag & MNT_RDONLY) == 0 && 139 vn_start_write(NULL, &mp, V_NOWAIT) == 0) { 140 save = curthread_pflags_set(TDP_SYNCIO); 141 vfs_msync(mp, MNT_NOWAIT); 142 VFS_SYNC(mp, MNT_NOWAIT); 143 curthread_pflags_restore(save); 144 vn_finished_write(mp); 145 } 146 mtx_lock(&mountlist_mtx); 147 nmp = TAILQ_NEXT(mp, mnt_list); 148 vfs_unbusy(mp); 149 } 150 mtx_unlock(&mountlist_mtx); 151 return (0); 152 } 153 154 /* 155 * Change filesystem quotas. 156 */ 157 #ifndef _SYS_SYSPROTO_H_ 158 struct quotactl_args { 159 char *path; 160 int cmd; 161 int uid; 162 caddr_t arg; 163 }; 164 #endif 165 int 166 sys_quotactl(struct thread *td, struct quotactl_args *uap) 167 { 168 struct mount *mp; 169 struct nameidata nd; 170 int error; 171 172 AUDIT_ARG_CMD(uap->cmd); 173 AUDIT_ARG_UID(uap->uid); 174 if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS)) 175 return (EPERM); 176 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 177 uap->path, td); 178 if ((error = namei(&nd)) != 0) 179 return (error); 180 NDFREE(&nd, NDF_ONLY_PNBUF); 181 mp = nd.ni_vp->v_mount; 182 vfs_ref(mp); 183 vput(nd.ni_vp); 184 error = vfs_busy(mp, 0); 185 vfs_rel(mp); 186 if (error != 0) 187 return (error); 188 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg); 189 190 /* 191 * Since quota on operation typically needs to open quota 192 * file, the Q_QUOTAON handler needs to unbusy the mount point 193 * before calling into namei. Otherwise, unmount might be 194 * started between two vfs_busy() invocations (first is our, 195 * second is from mount point cross-walk code in lookup()), 196 * causing deadlock. 197 * 198 * Require that Q_QUOTAON handles the vfs_busy() reference on 199 * its own, always returning with ubusied mount point. 200 */ 201 if ((uap->cmd >> SUBCMDSHIFT) != Q_QUOTAON && 202 (uap->cmd >> SUBCMDSHIFT) != Q_QUOTAOFF) 203 vfs_unbusy(mp); 204 return (error); 205 } 206 207 /* 208 * Used by statfs conversion routines to scale the block size up if 209 * necessary so that all of the block counts are <= 'max_size'. Note 210 * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero 211 * value of 'n'. 212 */ 213 void 214 statfs_scale_blocks(struct statfs *sf, long max_size) 215 { 216 uint64_t count; 217 int shift; 218 219 KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__)); 220 221 /* 222 * Attempt to scale the block counts to give a more accurate 223 * overview to userland of the ratio of free space to used 224 * space. To do this, find the largest block count and compute 225 * a divisor that lets it fit into a signed integer <= max_size. 226 */ 227 if (sf->f_bavail < 0) 228 count = -sf->f_bavail; 229 else 230 count = sf->f_bavail; 231 count = MAX(sf->f_blocks, MAX(sf->f_bfree, count)); 232 if (count <= max_size) 233 return; 234 235 count >>= flsl(max_size); 236 shift = 0; 237 while (count > 0) { 238 shift++; 239 count >>=1; 240 } 241 242 sf->f_bsize <<= shift; 243 sf->f_blocks >>= shift; 244 sf->f_bfree >>= shift; 245 sf->f_bavail >>= shift; 246 } 247 248 static int 249 kern_do_statfs(struct thread *td, struct mount *mp, struct statfs *buf) 250 { 251 int error; 252 253 if (mp == NULL) 254 return (EBADF); 255 error = vfs_busy(mp, 0); 256 vfs_rel(mp); 257 if (error != 0) 258 return (error); 259 #ifdef MAC 260 error = mac_mount_check_stat(td->td_ucred, mp); 261 if (error != 0) 262 goto out; 263 #endif 264 error = VFS_STATFS(mp, buf); 265 if (error != 0) 266 goto out; 267 if (priv_check(td, PRIV_VFS_GENERATION)) { 268 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 269 prison_enforce_statfs(td->td_ucred, mp, buf); 270 } 271 out: 272 vfs_unbusy(mp); 273 return (error); 274 } 275 276 /* 277 * Get filesystem statistics. 278 */ 279 #ifndef _SYS_SYSPROTO_H_ 280 struct statfs_args { 281 char *path; 282 struct statfs *buf; 283 }; 284 #endif 285 int 286 sys_statfs(struct thread *td, struct statfs_args *uap) 287 { 288 struct statfs *sfp; 289 int error; 290 291 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 292 error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); 293 if (error == 0) 294 error = copyout(sfp, uap->buf, sizeof(struct statfs)); 295 free(sfp, M_STATFS); 296 return (error); 297 } 298 299 int 300 kern_statfs(struct thread *td, const char *path, enum uio_seg pathseg, 301 struct statfs *buf) 302 { 303 struct mount *mp; 304 struct nameidata nd; 305 int error; 306 307 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 308 pathseg, path, td); 309 error = namei(&nd); 310 if (error != 0) 311 return (error); 312 mp = nd.ni_vp->v_mount; 313 vfs_ref(mp); 314 NDFREE(&nd, NDF_ONLY_PNBUF); 315 vput(nd.ni_vp); 316 return (kern_do_statfs(td, mp, buf)); 317 } 318 319 /* 320 * Get filesystem statistics. 321 */ 322 #ifndef _SYS_SYSPROTO_H_ 323 struct fstatfs_args { 324 int fd; 325 struct statfs *buf; 326 }; 327 #endif 328 int 329 sys_fstatfs(struct thread *td, struct fstatfs_args *uap) 330 { 331 struct statfs *sfp; 332 int error; 333 334 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 335 error = kern_fstatfs(td, uap->fd, sfp); 336 if (error == 0) 337 error = copyout(sfp, uap->buf, sizeof(struct statfs)); 338 free(sfp, M_STATFS); 339 return (error); 340 } 341 342 int 343 kern_fstatfs(struct thread *td, int fd, struct statfs *buf) 344 { 345 struct file *fp; 346 struct mount *mp; 347 struct vnode *vp; 348 int error; 349 350 AUDIT_ARG_FD(fd); 351 error = getvnode(td, fd, &cap_fstatfs_rights, &fp); 352 if (error != 0) 353 return (error); 354 vp = fp->f_vnode; 355 vn_lock(vp, LK_SHARED | LK_RETRY); 356 #ifdef AUDIT 357 AUDIT_ARG_VNODE1(vp); 358 #endif 359 mp = vp->v_mount; 360 if (mp != NULL) 361 vfs_ref(mp); 362 VOP_UNLOCK(vp, 0); 363 fdrop(fp, td); 364 return (kern_do_statfs(td, mp, buf)); 365 } 366 367 /* 368 * Get statistics on all filesystems. 369 */ 370 #ifndef _SYS_SYSPROTO_H_ 371 struct getfsstat_args { 372 struct statfs *buf; 373 long bufsize; 374 int mode; 375 }; 376 #endif 377 int 378 sys_getfsstat(struct thread *td, struct getfsstat_args *uap) 379 { 380 size_t count; 381 int error; 382 383 if (uap->bufsize < 0 || uap->bufsize > SIZE_MAX) 384 return (EINVAL); 385 error = kern_getfsstat(td, &uap->buf, uap->bufsize, &count, 386 UIO_USERSPACE, uap->mode); 387 if (error == 0) 388 td->td_retval[0] = count; 389 return (error); 390 } 391 392 /* 393 * If (bufsize > 0 && bufseg == UIO_SYSSPACE) 394 * The caller is responsible for freeing memory which will be allocated 395 * in '*buf'. 396 */ 397 int 398 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize, 399 size_t *countp, enum uio_seg bufseg, int mode) 400 { 401 struct mount *mp, *nmp; 402 struct statfs *sfsp, *sp, *sptmp, *tofree; 403 size_t count, maxcount; 404 int error; 405 406 switch (mode) { 407 case MNT_WAIT: 408 case MNT_NOWAIT: 409 break; 410 default: 411 if (bufseg == UIO_SYSSPACE) 412 *buf = NULL; 413 return (EINVAL); 414 } 415 restart: 416 maxcount = bufsize / sizeof(struct statfs); 417 if (bufsize == 0) { 418 sfsp = NULL; 419 tofree = NULL; 420 } else if (bufseg == UIO_USERSPACE) { 421 sfsp = *buf; 422 tofree = NULL; 423 } else /* if (bufseg == UIO_SYSSPACE) */ { 424 count = 0; 425 mtx_lock(&mountlist_mtx); 426 TAILQ_FOREACH(mp, &mountlist, mnt_list) { 427 count++; 428 } 429 mtx_unlock(&mountlist_mtx); 430 if (maxcount > count) 431 maxcount = count; 432 tofree = sfsp = *buf = malloc(maxcount * sizeof(struct statfs), 433 M_STATFS, M_WAITOK); 434 } 435 count = 0; 436 mtx_lock(&mountlist_mtx); 437 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 438 if (prison_canseemount(td->td_ucred, mp) != 0) { 439 nmp = TAILQ_NEXT(mp, mnt_list); 440 continue; 441 } 442 #ifdef MAC 443 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 444 nmp = TAILQ_NEXT(mp, mnt_list); 445 continue; 446 } 447 #endif 448 if (mode == MNT_WAIT) { 449 if (vfs_busy(mp, MBF_MNTLSTLOCK) != 0) { 450 /* 451 * If vfs_busy() failed, and MBF_NOWAIT 452 * wasn't passed, then the mp is gone. 453 * Furthermore, because of MBF_MNTLSTLOCK, 454 * the mountlist_mtx was dropped. We have 455 * no other choice than to start over. 456 */ 457 mtx_unlock(&mountlist_mtx); 458 free(tofree, M_STATFS); 459 goto restart; 460 } 461 } else { 462 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK) != 0) { 463 nmp = TAILQ_NEXT(mp, mnt_list); 464 continue; 465 } 466 } 467 if (sfsp != NULL && count < maxcount) { 468 sp = &mp->mnt_stat; 469 /* 470 * If MNT_NOWAIT is specified, do not refresh 471 * the fsstat cache. 472 */ 473 if (mode != MNT_NOWAIT) { 474 error = VFS_STATFS(mp, sp); 475 if (error != 0) { 476 mtx_lock(&mountlist_mtx); 477 nmp = TAILQ_NEXT(mp, mnt_list); 478 vfs_unbusy(mp); 479 continue; 480 } 481 } 482 if (priv_check(td, PRIV_VFS_GENERATION)) { 483 sptmp = malloc(sizeof(struct statfs), M_STATFS, 484 M_WAITOK); 485 *sptmp = *sp; 486 sptmp->f_fsid.val[0] = sptmp->f_fsid.val[1] = 0; 487 prison_enforce_statfs(td->td_ucred, mp, sptmp); 488 sp = sptmp; 489 } else 490 sptmp = NULL; 491 if (bufseg == UIO_SYSSPACE) { 492 bcopy(sp, sfsp, sizeof(*sp)); 493 free(sptmp, M_STATFS); 494 } else /* if (bufseg == UIO_USERSPACE) */ { 495 error = copyout(sp, sfsp, sizeof(*sp)); 496 free(sptmp, M_STATFS); 497 if (error != 0) { 498 vfs_unbusy(mp); 499 return (error); 500 } 501 } 502 sfsp++; 503 } 504 count++; 505 mtx_lock(&mountlist_mtx); 506 nmp = TAILQ_NEXT(mp, mnt_list); 507 vfs_unbusy(mp); 508 } 509 mtx_unlock(&mountlist_mtx); 510 if (sfsp != NULL && count > maxcount) 511 *countp = maxcount; 512 else 513 *countp = count; 514 return (0); 515 } 516 517 #ifdef COMPAT_FREEBSD4 518 /* 519 * Get old format filesystem statistics. 520 */ 521 static void freebsd4_cvtstatfs(struct statfs *, struct ostatfs *); 522 523 #ifndef _SYS_SYSPROTO_H_ 524 struct freebsd4_statfs_args { 525 char *path; 526 struct ostatfs *buf; 527 }; 528 #endif 529 int 530 freebsd4_statfs(struct thread *td, struct freebsd4_statfs_args *uap) 531 { 532 struct ostatfs osb; 533 struct statfs *sfp; 534 int error; 535 536 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 537 error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); 538 if (error == 0) { 539 freebsd4_cvtstatfs(sfp, &osb); 540 error = copyout(&osb, uap->buf, sizeof(osb)); 541 } 542 free(sfp, M_STATFS); 543 return (error); 544 } 545 546 /* 547 * Get filesystem statistics. 548 */ 549 #ifndef _SYS_SYSPROTO_H_ 550 struct freebsd4_fstatfs_args { 551 int fd; 552 struct ostatfs *buf; 553 }; 554 #endif 555 int 556 freebsd4_fstatfs(struct thread *td, struct freebsd4_fstatfs_args *uap) 557 { 558 struct ostatfs osb; 559 struct statfs *sfp; 560 int error; 561 562 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 563 error = kern_fstatfs(td, uap->fd, sfp); 564 if (error == 0) { 565 freebsd4_cvtstatfs(sfp, &osb); 566 error = copyout(&osb, uap->buf, sizeof(osb)); 567 } 568 free(sfp, M_STATFS); 569 return (error); 570 } 571 572 /* 573 * Get statistics on all filesystems. 574 */ 575 #ifndef _SYS_SYSPROTO_H_ 576 struct freebsd4_getfsstat_args { 577 struct ostatfs *buf; 578 long bufsize; 579 int mode; 580 }; 581 #endif 582 int 583 freebsd4_getfsstat(struct thread *td, struct freebsd4_getfsstat_args *uap) 584 { 585 struct statfs *buf, *sp; 586 struct ostatfs osb; 587 size_t count, size; 588 int error; 589 590 if (uap->bufsize < 0) 591 return (EINVAL); 592 count = uap->bufsize / sizeof(struct ostatfs); 593 if (count > SIZE_MAX / sizeof(struct statfs)) 594 return (EINVAL); 595 size = count * sizeof(struct statfs); 596 error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, 597 uap->mode); 598 if (error == 0) 599 td->td_retval[0] = count; 600 if (size != 0) { 601 sp = buf; 602 while (count != 0 && error == 0) { 603 freebsd4_cvtstatfs(sp, &osb); 604 error = copyout(&osb, uap->buf, sizeof(osb)); 605 sp++; 606 uap->buf++; 607 count--; 608 } 609 free(buf, M_STATFS); 610 } 611 return (error); 612 } 613 614 /* 615 * Implement fstatfs() for (NFS) file handles. 616 */ 617 #ifndef _SYS_SYSPROTO_H_ 618 struct freebsd4_fhstatfs_args { 619 struct fhandle *u_fhp; 620 struct ostatfs *buf; 621 }; 622 #endif 623 int 624 freebsd4_fhstatfs(struct thread *td, struct freebsd4_fhstatfs_args *uap) 625 { 626 struct ostatfs osb; 627 struct statfs *sfp; 628 fhandle_t fh; 629 int error; 630 631 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 632 if (error != 0) 633 return (error); 634 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 635 error = kern_fhstatfs(td, fh, sfp); 636 if (error == 0) { 637 freebsd4_cvtstatfs(sfp, &osb); 638 error = copyout(&osb, uap->buf, sizeof(osb)); 639 } 640 free(sfp, M_STATFS); 641 return (error); 642 } 643 644 /* 645 * Convert a new format statfs structure to an old format statfs structure. 646 */ 647 static void 648 freebsd4_cvtstatfs(struct statfs *nsp, struct ostatfs *osp) 649 { 650 651 statfs_scale_blocks(nsp, LONG_MAX); 652 bzero(osp, sizeof(*osp)); 653 osp->f_bsize = nsp->f_bsize; 654 osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX); 655 osp->f_blocks = nsp->f_blocks; 656 osp->f_bfree = nsp->f_bfree; 657 osp->f_bavail = nsp->f_bavail; 658 osp->f_files = MIN(nsp->f_files, LONG_MAX); 659 osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX); 660 osp->f_owner = nsp->f_owner; 661 osp->f_type = nsp->f_type; 662 osp->f_flags = nsp->f_flags; 663 osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX); 664 osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX); 665 osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX); 666 osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX); 667 strlcpy(osp->f_fstypename, nsp->f_fstypename, 668 MIN(MFSNAMELEN, OMFSNAMELEN)); 669 strlcpy(osp->f_mntonname, nsp->f_mntonname, 670 MIN(MNAMELEN, OMNAMELEN)); 671 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 672 MIN(MNAMELEN, OMNAMELEN)); 673 osp->f_fsid = nsp->f_fsid; 674 } 675 #endif /* COMPAT_FREEBSD4 */ 676 677 #if defined(COMPAT_FREEBSD11) 678 /* 679 * Get old format filesystem statistics. 680 */ 681 static void freebsd11_cvtstatfs(struct statfs *, struct freebsd11_statfs *); 682 683 int 684 freebsd11_statfs(struct thread *td, struct freebsd11_statfs_args *uap) 685 { 686 struct freebsd11_statfs osb; 687 struct statfs *sfp; 688 int error; 689 690 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 691 error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); 692 if (error == 0) { 693 freebsd11_cvtstatfs(sfp, &osb); 694 error = copyout(&osb, uap->buf, sizeof(osb)); 695 } 696 free(sfp, M_STATFS); 697 return (error); 698 } 699 700 /* 701 * Get filesystem statistics. 702 */ 703 int 704 freebsd11_fstatfs(struct thread *td, struct freebsd11_fstatfs_args *uap) 705 { 706 struct freebsd11_statfs osb; 707 struct statfs *sfp; 708 int error; 709 710 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 711 error = kern_fstatfs(td, uap->fd, sfp); 712 if (error == 0) { 713 freebsd11_cvtstatfs(sfp, &osb); 714 error = copyout(&osb, uap->buf, sizeof(osb)); 715 } 716 free(sfp, M_STATFS); 717 return (error); 718 } 719 720 /* 721 * Get statistics on all filesystems. 722 */ 723 int 724 freebsd11_getfsstat(struct thread *td, struct freebsd11_getfsstat_args *uap) 725 { 726 struct freebsd11_statfs osb; 727 struct statfs *buf, *sp; 728 size_t count, size; 729 int error; 730 731 count = uap->bufsize / sizeof(struct ostatfs); 732 size = count * sizeof(struct statfs); 733 error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, 734 uap->mode); 735 if (error == 0) 736 td->td_retval[0] = count; 737 if (size > 0) { 738 sp = buf; 739 while (count > 0 && error == 0) { 740 freebsd11_cvtstatfs(sp, &osb); 741 error = copyout(&osb, uap->buf, sizeof(osb)); 742 sp++; 743 uap->buf++; 744 count--; 745 } 746 free(buf, M_STATFS); 747 } 748 return (error); 749 } 750 751 /* 752 * Implement fstatfs() for (NFS) file handles. 753 */ 754 int 755 freebsd11_fhstatfs(struct thread *td, struct freebsd11_fhstatfs_args *uap) 756 { 757 struct freebsd11_statfs osb; 758 struct statfs *sfp; 759 fhandle_t fh; 760 int error; 761 762 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 763 if (error) 764 return (error); 765 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 766 error = kern_fhstatfs(td, fh, sfp); 767 if (error == 0) { 768 freebsd11_cvtstatfs(sfp, &osb); 769 error = copyout(&osb, uap->buf, sizeof(osb)); 770 } 771 free(sfp, M_STATFS); 772 return (error); 773 } 774 775 /* 776 * Convert a new format statfs structure to an old format statfs structure. 777 */ 778 static void 779 freebsd11_cvtstatfs(struct statfs *nsp, struct freebsd11_statfs *osp) 780 { 781 782 bzero(osp, sizeof(*osp)); 783 osp->f_version = FREEBSD11_STATFS_VERSION; 784 osp->f_type = nsp->f_type; 785 osp->f_flags = nsp->f_flags; 786 osp->f_bsize = nsp->f_bsize; 787 osp->f_iosize = nsp->f_iosize; 788 osp->f_blocks = nsp->f_blocks; 789 osp->f_bfree = nsp->f_bfree; 790 osp->f_bavail = nsp->f_bavail; 791 osp->f_files = nsp->f_files; 792 osp->f_ffree = nsp->f_ffree; 793 osp->f_syncwrites = nsp->f_syncwrites; 794 osp->f_asyncwrites = nsp->f_asyncwrites; 795 osp->f_syncreads = nsp->f_syncreads; 796 osp->f_asyncreads = nsp->f_asyncreads; 797 osp->f_namemax = nsp->f_namemax; 798 osp->f_owner = nsp->f_owner; 799 osp->f_fsid = nsp->f_fsid; 800 strlcpy(osp->f_fstypename, nsp->f_fstypename, 801 MIN(MFSNAMELEN, sizeof(osp->f_fstypename))); 802 strlcpy(osp->f_mntonname, nsp->f_mntonname, 803 MIN(MNAMELEN, sizeof(osp->f_mntonname))); 804 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 805 MIN(MNAMELEN, sizeof(osp->f_mntfromname))); 806 } 807 #endif /* COMPAT_FREEBSD11 */ 808 809 /* 810 * Change current working directory to a given file descriptor. 811 */ 812 #ifndef _SYS_SYSPROTO_H_ 813 struct fchdir_args { 814 int fd; 815 }; 816 #endif 817 int 818 sys_fchdir(struct thread *td, struct fchdir_args *uap) 819 { 820 struct vnode *vp, *tdp; 821 struct mount *mp; 822 struct file *fp; 823 int error; 824 825 AUDIT_ARG_FD(uap->fd); 826 error = getvnode(td, uap->fd, &cap_fchdir_rights, 827 &fp); 828 if (error != 0) 829 return (error); 830 vp = fp->f_vnode; 831 vrefact(vp); 832 fdrop(fp, td); 833 vn_lock(vp, LK_SHARED | LK_RETRY); 834 AUDIT_ARG_VNODE1(vp); 835 error = change_dir(vp, td); 836 while (!error && (mp = vp->v_mountedhere) != NULL) { 837 if (vfs_busy(mp, 0)) 838 continue; 839 error = VFS_ROOT(mp, LK_SHARED, &tdp); 840 vfs_unbusy(mp); 841 if (error != 0) 842 break; 843 vput(vp); 844 vp = tdp; 845 } 846 if (error != 0) { 847 vput(vp); 848 return (error); 849 } 850 VOP_UNLOCK(vp, 0); 851 pwd_chdir(td, vp); 852 return (0); 853 } 854 855 /* 856 * Change current working directory (``.''). 857 */ 858 #ifndef _SYS_SYSPROTO_H_ 859 struct chdir_args { 860 char *path; 861 }; 862 #endif 863 int 864 sys_chdir(struct thread *td, struct chdir_args *uap) 865 { 866 867 return (kern_chdir(td, uap->path, UIO_USERSPACE)); 868 } 869 870 int 871 kern_chdir(struct thread *td, const char *path, enum uio_seg pathseg) 872 { 873 struct nameidata nd; 874 int error; 875 876 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 877 pathseg, path, td); 878 if ((error = namei(&nd)) != 0) 879 return (error); 880 if ((error = change_dir(nd.ni_vp, td)) != 0) { 881 vput(nd.ni_vp); 882 NDFREE(&nd, NDF_ONLY_PNBUF); 883 return (error); 884 } 885 VOP_UNLOCK(nd.ni_vp, 0); 886 NDFREE(&nd, NDF_ONLY_PNBUF); 887 pwd_chdir(td, nd.ni_vp); 888 return (0); 889 } 890 891 /* 892 * Change notion of root (``/'') directory. 893 */ 894 #ifndef _SYS_SYSPROTO_H_ 895 struct chroot_args { 896 char *path; 897 }; 898 #endif 899 int 900 sys_chroot(struct thread *td, struct chroot_args *uap) 901 { 902 struct nameidata nd; 903 int error; 904 905 error = priv_check(td, PRIV_VFS_CHROOT); 906 if (error != 0) 907 return (error); 908 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 909 UIO_USERSPACE, uap->path, td); 910 error = namei(&nd); 911 if (error != 0) 912 goto error; 913 error = change_dir(nd.ni_vp, td); 914 if (error != 0) 915 goto e_vunlock; 916 #ifdef MAC 917 error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp); 918 if (error != 0) 919 goto e_vunlock; 920 #endif 921 VOP_UNLOCK(nd.ni_vp, 0); 922 error = pwd_chroot(td, nd.ni_vp); 923 vrele(nd.ni_vp); 924 NDFREE(&nd, NDF_ONLY_PNBUF); 925 return (error); 926 e_vunlock: 927 vput(nd.ni_vp); 928 error: 929 NDFREE(&nd, NDF_ONLY_PNBUF); 930 return (error); 931 } 932 933 /* 934 * Common routine for chroot and chdir. Callers must provide a locked vnode 935 * instance. 936 */ 937 int 938 change_dir(struct vnode *vp, struct thread *td) 939 { 940 #ifdef MAC 941 int error; 942 #endif 943 944 ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked"); 945 if (vp->v_type != VDIR) 946 return (ENOTDIR); 947 #ifdef MAC 948 error = mac_vnode_check_chdir(td->td_ucred, vp); 949 if (error != 0) 950 return (error); 951 #endif 952 return (VOP_ACCESS(vp, VEXEC, td->td_ucred, td)); 953 } 954 955 static __inline void 956 flags_to_rights(int flags, cap_rights_t *rightsp) 957 { 958 959 if (flags & O_EXEC) { 960 cap_rights_set(rightsp, CAP_FEXECVE); 961 } else { 962 switch ((flags & O_ACCMODE)) { 963 case O_RDONLY: 964 cap_rights_set(rightsp, CAP_READ); 965 break; 966 case O_RDWR: 967 cap_rights_set(rightsp, CAP_READ); 968 /* FALLTHROUGH */ 969 case O_WRONLY: 970 cap_rights_set(rightsp, CAP_WRITE); 971 if (!(flags & (O_APPEND | O_TRUNC))) 972 cap_rights_set(rightsp, CAP_SEEK); 973 break; 974 } 975 } 976 977 if (flags & O_CREAT) 978 cap_rights_set(rightsp, CAP_CREATE); 979 980 if (flags & O_TRUNC) 981 cap_rights_set(rightsp, CAP_FTRUNCATE); 982 983 if (flags & (O_SYNC | O_FSYNC)) 984 cap_rights_set(rightsp, CAP_FSYNC); 985 986 if (flags & (O_EXLOCK | O_SHLOCK)) 987 cap_rights_set(rightsp, CAP_FLOCK); 988 } 989 990 /* 991 * Check permissions, allocate an open file structure, and call the device 992 * open routine if any. 993 */ 994 #ifndef _SYS_SYSPROTO_H_ 995 struct open_args { 996 char *path; 997 int flags; 998 int mode; 999 }; 1000 #endif 1001 int 1002 sys_open(struct thread *td, struct open_args *uap) 1003 { 1004 1005 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1006 uap->flags, uap->mode)); 1007 } 1008 1009 #ifndef _SYS_SYSPROTO_H_ 1010 struct openat_args { 1011 int fd; 1012 char *path; 1013 int flag; 1014 int mode; 1015 }; 1016 #endif 1017 int 1018 sys_openat(struct thread *td, struct openat_args *uap) 1019 { 1020 1021 AUDIT_ARG_FD(uap->fd); 1022 return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 1023 uap->mode)); 1024 } 1025 1026 int 1027 kern_openat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, 1028 int flags, int mode) 1029 { 1030 struct proc *p = td->td_proc; 1031 struct filedesc *fdp = p->p_fd; 1032 struct file *fp; 1033 struct vnode *vp; 1034 struct nameidata nd; 1035 cap_rights_t rights; 1036 int cmode, error, indx; 1037 1038 indx = -1; 1039 1040 AUDIT_ARG_FFLAGS(flags); 1041 AUDIT_ARG_MODE(mode); 1042 cap_rights_init(&rights, CAP_LOOKUP); 1043 flags_to_rights(flags, &rights); 1044 /* 1045 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags 1046 * may be specified. 1047 */ 1048 if (flags & O_EXEC) { 1049 if (flags & O_ACCMODE) 1050 return (EINVAL); 1051 } else if ((flags & O_ACCMODE) == O_ACCMODE) { 1052 return (EINVAL); 1053 } else { 1054 flags = FFLAGS(flags); 1055 } 1056 1057 /* 1058 * Allocate a file structure. The descriptor to reference it 1059 * is allocated and set by finstall() below. 1060 */ 1061 error = falloc_noinstall(td, &fp); 1062 if (error != 0) 1063 return (error); 1064 /* 1065 * An extra reference on `fp' has been held for us by 1066 * falloc_noinstall(). 1067 */ 1068 /* Set the flags early so the finit in devfs can pick them up. */ 1069 fp->f_flag = flags & FMASK; 1070 cmode = ((mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; 1071 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 1072 &rights, td); 1073 td->td_dupfd = -1; /* XXX check for fdopen */ 1074 error = vn_open(&nd, &flags, cmode, fp); 1075 if (error != 0) { 1076 /* 1077 * If the vn_open replaced the method vector, something 1078 * wonderous happened deep below and we just pass it up 1079 * pretending we know what we do. 1080 */ 1081 if (error == ENXIO && fp->f_ops != &badfileops) 1082 goto success; 1083 1084 /* 1085 * Handle special fdopen() case. bleh. 1086 * 1087 * Don't do this for relative (capability) lookups; we don't 1088 * understand exactly what would happen, and we don't think 1089 * that it ever should. 1090 */ 1091 if ((nd.ni_lcf & NI_LCF_STRICTRELATIVE) == 0 && 1092 (error == ENODEV || error == ENXIO) && 1093 td->td_dupfd >= 0) { 1094 error = dupfdopen(td, fdp, td->td_dupfd, flags, error, 1095 &indx); 1096 if (error == 0) 1097 goto success; 1098 } 1099 1100 goto bad; 1101 } 1102 td->td_dupfd = 0; 1103 NDFREE(&nd, NDF_ONLY_PNBUF); 1104 vp = nd.ni_vp; 1105 1106 /* 1107 * Store the vnode, for any f_type. Typically, the vnode use 1108 * count is decremented by direct call to vn_closefile() for 1109 * files that switched type in the cdevsw fdopen() method. 1110 */ 1111 fp->f_vnode = vp; 1112 /* 1113 * If the file wasn't claimed by devfs bind it to the normal 1114 * vnode operations here. 1115 */ 1116 if (fp->f_ops == &badfileops) { 1117 KASSERT(vp->v_type != VFIFO, ("Unexpected fifo.")); 1118 fp->f_seqcount = 1; 1119 finit(fp, (flags & FMASK) | (fp->f_flag & FHASLOCK), 1120 DTYPE_VNODE, vp, &vnops); 1121 } 1122 1123 VOP_UNLOCK(vp, 0); 1124 if (flags & O_TRUNC) { 1125 error = fo_truncate(fp, 0, td->td_ucred, td); 1126 if (error != 0) 1127 goto bad; 1128 } 1129 success: 1130 /* 1131 * If we haven't already installed the FD (for dupfdopen), do so now. 1132 */ 1133 if (indx == -1) { 1134 struct filecaps *fcaps; 1135 1136 #ifdef CAPABILITIES 1137 if ((nd.ni_lcf & NI_LCF_STRICTRELATIVE) != 0) 1138 fcaps = &nd.ni_filecaps; 1139 else 1140 #endif 1141 fcaps = NULL; 1142 error = finstall(td, fp, &indx, flags, fcaps); 1143 /* On success finstall() consumes fcaps. */ 1144 if (error != 0) { 1145 filecaps_free(&nd.ni_filecaps); 1146 goto bad; 1147 } 1148 } else { 1149 filecaps_free(&nd.ni_filecaps); 1150 } 1151 1152 /* 1153 * Release our private reference, leaving the one associated with 1154 * the descriptor table intact. 1155 */ 1156 fdrop(fp, td); 1157 td->td_retval[0] = indx; 1158 return (0); 1159 bad: 1160 KASSERT(indx == -1, ("indx=%d, should be -1", indx)); 1161 fdrop(fp, td); 1162 return (error); 1163 } 1164 1165 #ifdef COMPAT_43 1166 /* 1167 * Create a file. 1168 */ 1169 #ifndef _SYS_SYSPROTO_H_ 1170 struct ocreat_args { 1171 char *path; 1172 int mode; 1173 }; 1174 #endif 1175 int 1176 ocreat(struct thread *td, struct ocreat_args *uap) 1177 { 1178 1179 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1180 O_WRONLY | O_CREAT | O_TRUNC, uap->mode)); 1181 } 1182 #endif /* COMPAT_43 */ 1183 1184 /* 1185 * Create a special file. 1186 */ 1187 #ifndef _SYS_SYSPROTO_H_ 1188 struct mknodat_args { 1189 int fd; 1190 char *path; 1191 mode_t mode; 1192 dev_t dev; 1193 }; 1194 #endif 1195 int 1196 sys_mknodat(struct thread *td, struct mknodat_args *uap) 1197 { 1198 1199 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1200 uap->dev)); 1201 } 1202 1203 #if defined(COMPAT_FREEBSD11) 1204 int 1205 freebsd11_mknod(struct thread *td, 1206 struct freebsd11_mknod_args *uap) 1207 { 1208 1209 return (kern_mknodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1210 uap->mode, uap->dev)); 1211 } 1212 1213 int 1214 freebsd11_mknodat(struct thread *td, 1215 struct freebsd11_mknodat_args *uap) 1216 { 1217 1218 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1219 uap->dev)); 1220 } 1221 #endif /* COMPAT_FREEBSD11 */ 1222 1223 int 1224 kern_mknodat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, 1225 int mode, dev_t dev) 1226 { 1227 struct vnode *vp; 1228 struct mount *mp; 1229 struct vattr vattr; 1230 struct nameidata nd; 1231 int error, whiteout = 0; 1232 1233 AUDIT_ARG_MODE(mode); 1234 AUDIT_ARG_DEV(dev); 1235 switch (mode & S_IFMT) { 1236 case S_IFCHR: 1237 case S_IFBLK: 1238 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 1239 if (error == 0 && dev == VNOVAL) 1240 error = EINVAL; 1241 break; 1242 case S_IFWHT: 1243 error = priv_check(td, PRIV_VFS_MKNOD_WHT); 1244 break; 1245 case S_IFIFO: 1246 if (dev == 0) 1247 return (kern_mkfifoat(td, fd, path, pathseg, mode)); 1248 /* FALLTHROUGH */ 1249 default: 1250 error = EINVAL; 1251 break; 1252 } 1253 if (error != 0) 1254 return (error); 1255 restart: 1256 bwillwrite(); 1257 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1258 NOCACHE, pathseg, path, fd, &cap_mknodat_rights, 1259 td); 1260 if ((error = namei(&nd)) != 0) 1261 return (error); 1262 vp = nd.ni_vp; 1263 if (vp != NULL) { 1264 NDFREE(&nd, NDF_ONLY_PNBUF); 1265 if (vp == nd.ni_dvp) 1266 vrele(nd.ni_dvp); 1267 else 1268 vput(nd.ni_dvp); 1269 vrele(vp); 1270 return (EEXIST); 1271 } else { 1272 VATTR_NULL(&vattr); 1273 vattr.va_mode = (mode & ALLPERMS) & 1274 ~td->td_proc->p_fd->fd_cmask; 1275 vattr.va_rdev = dev; 1276 whiteout = 0; 1277 1278 switch (mode & S_IFMT) { 1279 case S_IFCHR: 1280 vattr.va_type = VCHR; 1281 break; 1282 case S_IFBLK: 1283 vattr.va_type = VBLK; 1284 break; 1285 case S_IFWHT: 1286 whiteout = 1; 1287 break; 1288 default: 1289 panic("kern_mknod: invalid mode"); 1290 } 1291 } 1292 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1293 NDFREE(&nd, NDF_ONLY_PNBUF); 1294 vput(nd.ni_dvp); 1295 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1296 return (error); 1297 goto restart; 1298 } 1299 #ifdef MAC 1300 if (error == 0 && !whiteout) 1301 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, 1302 &nd.ni_cnd, &vattr); 1303 #endif 1304 if (error == 0) { 1305 if (whiteout) 1306 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1307 else { 1308 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1309 &nd.ni_cnd, &vattr); 1310 if (error == 0) 1311 vput(nd.ni_vp); 1312 } 1313 } 1314 NDFREE(&nd, NDF_ONLY_PNBUF); 1315 vput(nd.ni_dvp); 1316 vn_finished_write(mp); 1317 return (error); 1318 } 1319 1320 /* 1321 * Create a named pipe. 1322 */ 1323 #ifndef _SYS_SYSPROTO_H_ 1324 struct mkfifo_args { 1325 char *path; 1326 int mode; 1327 }; 1328 #endif 1329 int 1330 sys_mkfifo(struct thread *td, struct mkfifo_args *uap) 1331 { 1332 1333 return (kern_mkfifoat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1334 uap->mode)); 1335 } 1336 1337 #ifndef _SYS_SYSPROTO_H_ 1338 struct mkfifoat_args { 1339 int fd; 1340 char *path; 1341 mode_t mode; 1342 }; 1343 #endif 1344 int 1345 sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap) 1346 { 1347 1348 return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE, 1349 uap->mode)); 1350 } 1351 1352 int 1353 kern_mkfifoat(struct thread *td, int fd, const char *path, 1354 enum uio_seg pathseg, int mode) 1355 { 1356 struct mount *mp; 1357 struct vattr vattr; 1358 struct nameidata nd; 1359 int error; 1360 1361 AUDIT_ARG_MODE(mode); 1362 restart: 1363 bwillwrite(); 1364 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1365 NOCACHE, pathseg, path, fd, &cap_mkfifoat_rights, 1366 td); 1367 if ((error = namei(&nd)) != 0) 1368 return (error); 1369 if (nd.ni_vp != NULL) { 1370 NDFREE(&nd, NDF_ONLY_PNBUF); 1371 if (nd.ni_vp == nd.ni_dvp) 1372 vrele(nd.ni_dvp); 1373 else 1374 vput(nd.ni_dvp); 1375 vrele(nd.ni_vp); 1376 return (EEXIST); 1377 } 1378 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1379 NDFREE(&nd, NDF_ONLY_PNBUF); 1380 vput(nd.ni_dvp); 1381 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1382 return (error); 1383 goto restart; 1384 } 1385 VATTR_NULL(&vattr); 1386 vattr.va_type = VFIFO; 1387 vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask; 1388 #ifdef MAC 1389 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1390 &vattr); 1391 if (error != 0) 1392 goto out; 1393 #endif 1394 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 1395 if (error == 0) 1396 vput(nd.ni_vp); 1397 #ifdef MAC 1398 out: 1399 #endif 1400 vput(nd.ni_dvp); 1401 vn_finished_write(mp); 1402 NDFREE(&nd, NDF_ONLY_PNBUF); 1403 return (error); 1404 } 1405 1406 /* 1407 * Make a hard file link. 1408 */ 1409 #ifndef _SYS_SYSPROTO_H_ 1410 struct link_args { 1411 char *path; 1412 char *link; 1413 }; 1414 #endif 1415 int 1416 sys_link(struct thread *td, struct link_args *uap) 1417 { 1418 1419 return (kern_linkat(td, AT_FDCWD, AT_FDCWD, uap->path, uap->link, 1420 UIO_USERSPACE, FOLLOW)); 1421 } 1422 1423 #ifndef _SYS_SYSPROTO_H_ 1424 struct linkat_args { 1425 int fd1; 1426 char *path1; 1427 int fd2; 1428 char *path2; 1429 int flag; 1430 }; 1431 #endif 1432 int 1433 sys_linkat(struct thread *td, struct linkat_args *uap) 1434 { 1435 int flag; 1436 1437 flag = uap->flag; 1438 if ((flag & ~(AT_SYMLINK_FOLLOW | AT_BENEATH)) != 0) 1439 return (EINVAL); 1440 1441 return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2, 1442 UIO_USERSPACE, ((flag & AT_SYMLINK_FOLLOW) != 0 ? FOLLOW : 1443 NOFOLLOW) | ((flag & AT_BENEATH) != 0 ? BENEATH : 0))); 1444 } 1445 1446 int hardlink_check_uid = 0; 1447 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 1448 &hardlink_check_uid, 0, 1449 "Unprivileged processes cannot create hard links to files owned by other " 1450 "users"); 1451 static int hardlink_check_gid = 0; 1452 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 1453 &hardlink_check_gid, 0, 1454 "Unprivileged processes cannot create hard links to files owned by other " 1455 "groups"); 1456 1457 static int 1458 can_hardlink(struct vnode *vp, struct ucred *cred) 1459 { 1460 struct vattr va; 1461 int error; 1462 1463 if (!hardlink_check_uid && !hardlink_check_gid) 1464 return (0); 1465 1466 error = VOP_GETATTR(vp, &va, cred); 1467 if (error != 0) 1468 return (error); 1469 1470 if (hardlink_check_uid && cred->cr_uid != va.va_uid) { 1471 error = priv_check_cred(cred, PRIV_VFS_LINK); 1472 if (error != 0) 1473 return (error); 1474 } 1475 1476 if (hardlink_check_gid && !groupmember(va.va_gid, cred)) { 1477 error = priv_check_cred(cred, PRIV_VFS_LINK); 1478 if (error != 0) 1479 return (error); 1480 } 1481 1482 return (0); 1483 } 1484 1485 int 1486 kern_linkat(struct thread *td, int fd1, int fd2, const char *path1, 1487 const char *path2, enum uio_seg segflag, int follow) 1488 { 1489 struct nameidata nd; 1490 int error; 1491 1492 do { 1493 bwillwrite(); 1494 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, segflag, 1495 path1, fd1, &cap_linkat_source_rights, td); 1496 if ((error = namei(&nd)) != 0) 1497 return (error); 1498 NDFREE(&nd, NDF_ONLY_PNBUF); 1499 error = kern_linkat_vp(td, nd.ni_vp, fd2, path2, segflag); 1500 } while (error == EAGAIN); 1501 return (error); 1502 } 1503 1504 static int 1505 kern_linkat_vp(struct thread *td, struct vnode *vp, int fd, const char *path, 1506 enum uio_seg segflag) 1507 { 1508 struct nameidata nd; 1509 struct mount *mp; 1510 int error; 1511 1512 if (vp->v_type == VDIR) { 1513 vrele(vp); 1514 return (EPERM); /* POSIX */ 1515 } 1516 NDINIT_ATRIGHTS(&nd, CREATE, 1517 LOCKPARENT | SAVENAME | AUDITVNODE2 | NOCACHE, segflag, path, fd, 1518 &cap_linkat_target_rights, td); 1519 if ((error = namei(&nd)) == 0) { 1520 if (nd.ni_vp != NULL) { 1521 NDFREE(&nd, NDF_ONLY_PNBUF); 1522 if (nd.ni_dvp == nd.ni_vp) 1523 vrele(nd.ni_dvp); 1524 else 1525 vput(nd.ni_dvp); 1526 vrele(nd.ni_vp); 1527 vrele(vp); 1528 return (EEXIST); 1529 } else if (nd.ni_dvp->v_mount != vp->v_mount) { 1530 /* 1531 * Cross-device link. No need to recheck 1532 * vp->v_type, since it cannot change, except 1533 * to VBAD. 1534 */ 1535 NDFREE(&nd, NDF_ONLY_PNBUF); 1536 vput(nd.ni_dvp); 1537 vrele(vp); 1538 return (EXDEV); 1539 } else if ((error = vn_lock(vp, LK_EXCLUSIVE)) == 0) { 1540 error = can_hardlink(vp, td->td_ucred); 1541 #ifdef MAC 1542 if (error == 0) 1543 error = mac_vnode_check_link(td->td_ucred, 1544 nd.ni_dvp, vp, &nd.ni_cnd); 1545 #endif 1546 if (error != 0) { 1547 vput(vp); 1548 vput(nd.ni_dvp); 1549 NDFREE(&nd, NDF_ONLY_PNBUF); 1550 return (error); 1551 } 1552 error = vn_start_write(vp, &mp, V_NOWAIT); 1553 if (error != 0) { 1554 vput(vp); 1555 vput(nd.ni_dvp); 1556 NDFREE(&nd, NDF_ONLY_PNBUF); 1557 error = vn_start_write(NULL, &mp, 1558 V_XSLEEP | PCATCH); 1559 if (error != 0) 1560 return (error); 1561 return (EAGAIN); 1562 } 1563 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 1564 VOP_UNLOCK(vp, 0); 1565 vput(nd.ni_dvp); 1566 vn_finished_write(mp); 1567 NDFREE(&nd, NDF_ONLY_PNBUF); 1568 } else { 1569 vput(nd.ni_dvp); 1570 NDFREE(&nd, NDF_ONLY_PNBUF); 1571 vrele(vp); 1572 return (EAGAIN); 1573 } 1574 } 1575 vrele(vp); 1576 return (error); 1577 } 1578 1579 /* 1580 * Make a symbolic link. 1581 */ 1582 #ifndef _SYS_SYSPROTO_H_ 1583 struct symlink_args { 1584 char *path; 1585 char *link; 1586 }; 1587 #endif 1588 int 1589 sys_symlink(struct thread *td, struct symlink_args *uap) 1590 { 1591 1592 return (kern_symlinkat(td, uap->path, AT_FDCWD, uap->link, 1593 UIO_USERSPACE)); 1594 } 1595 1596 #ifndef _SYS_SYSPROTO_H_ 1597 struct symlinkat_args { 1598 char *path; 1599 int fd; 1600 char *path2; 1601 }; 1602 #endif 1603 int 1604 sys_symlinkat(struct thread *td, struct symlinkat_args *uap) 1605 { 1606 1607 return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2, 1608 UIO_USERSPACE)); 1609 } 1610 1611 int 1612 kern_symlinkat(struct thread *td, const char *path1, int fd, const char *path2, 1613 enum uio_seg segflg) 1614 { 1615 struct mount *mp; 1616 struct vattr vattr; 1617 const char *syspath; 1618 char *tmppath; 1619 struct nameidata nd; 1620 int error; 1621 1622 if (segflg == UIO_SYSSPACE) { 1623 syspath = path1; 1624 } else { 1625 tmppath = uma_zalloc(namei_zone, M_WAITOK); 1626 if ((error = copyinstr(path1, tmppath, MAXPATHLEN, NULL)) != 0) 1627 goto out; 1628 syspath = tmppath; 1629 } 1630 AUDIT_ARG_TEXT(syspath); 1631 restart: 1632 bwillwrite(); 1633 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1634 NOCACHE, segflg, path2, fd, &cap_symlinkat_rights, 1635 td); 1636 if ((error = namei(&nd)) != 0) 1637 goto out; 1638 if (nd.ni_vp) { 1639 NDFREE(&nd, NDF_ONLY_PNBUF); 1640 if (nd.ni_vp == nd.ni_dvp) 1641 vrele(nd.ni_dvp); 1642 else 1643 vput(nd.ni_dvp); 1644 vrele(nd.ni_vp); 1645 error = EEXIST; 1646 goto out; 1647 } 1648 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1649 NDFREE(&nd, NDF_ONLY_PNBUF); 1650 vput(nd.ni_dvp); 1651 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1652 goto out; 1653 goto restart; 1654 } 1655 VATTR_NULL(&vattr); 1656 vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask; 1657 #ifdef MAC 1658 vattr.va_type = VLNK; 1659 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1660 &vattr); 1661 if (error != 0) 1662 goto out2; 1663 #endif 1664 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath); 1665 if (error == 0) 1666 vput(nd.ni_vp); 1667 #ifdef MAC 1668 out2: 1669 #endif 1670 NDFREE(&nd, NDF_ONLY_PNBUF); 1671 vput(nd.ni_dvp); 1672 vn_finished_write(mp); 1673 out: 1674 if (segflg != UIO_SYSSPACE) 1675 uma_zfree(namei_zone, tmppath); 1676 return (error); 1677 } 1678 1679 /* 1680 * Delete a whiteout from the filesystem. 1681 */ 1682 #ifndef _SYS_SYSPROTO_H_ 1683 struct undelete_args { 1684 char *path; 1685 }; 1686 #endif 1687 int 1688 sys_undelete(struct thread *td, struct undelete_args *uap) 1689 { 1690 struct mount *mp; 1691 struct nameidata nd; 1692 int error; 1693 1694 restart: 1695 bwillwrite(); 1696 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | AUDITVNODE1, 1697 UIO_USERSPACE, uap->path, td); 1698 error = namei(&nd); 1699 if (error != 0) 1700 return (error); 1701 1702 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 1703 NDFREE(&nd, NDF_ONLY_PNBUF); 1704 if (nd.ni_vp == nd.ni_dvp) 1705 vrele(nd.ni_dvp); 1706 else 1707 vput(nd.ni_dvp); 1708 if (nd.ni_vp) 1709 vrele(nd.ni_vp); 1710 return (EEXIST); 1711 } 1712 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1713 NDFREE(&nd, NDF_ONLY_PNBUF); 1714 vput(nd.ni_dvp); 1715 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1716 return (error); 1717 goto restart; 1718 } 1719 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE); 1720 NDFREE(&nd, NDF_ONLY_PNBUF); 1721 vput(nd.ni_dvp); 1722 vn_finished_write(mp); 1723 return (error); 1724 } 1725 1726 /* 1727 * Delete a name from the filesystem. 1728 */ 1729 #ifndef _SYS_SYSPROTO_H_ 1730 struct unlink_args { 1731 char *path; 1732 }; 1733 #endif 1734 int 1735 sys_unlink(struct thread *td, struct unlink_args *uap) 1736 { 1737 1738 return (kern_funlinkat(td, AT_FDCWD, uap->path, FD_NONE, UIO_USERSPACE, 1739 0, 0)); 1740 } 1741 1742 static int 1743 kern_funlinkat_ex(struct thread *td, int dfd, const char *path, int fd, 1744 int flag, enum uio_seg pathseg, ino_t oldinum) 1745 { 1746 1747 if ((flag & ~AT_REMOVEDIR) != 0) 1748 return (EINVAL); 1749 1750 if ((flag & AT_REMOVEDIR) != 0) 1751 return (kern_frmdirat(td, dfd, path, fd, UIO_USERSPACE, 0)); 1752 1753 return (kern_funlinkat(td, dfd, path, fd, UIO_USERSPACE, 0, 0)); 1754 } 1755 1756 #ifndef _SYS_SYSPROTO_H_ 1757 struct unlinkat_args { 1758 int fd; 1759 char *path; 1760 int flag; 1761 }; 1762 #endif 1763 int 1764 sys_unlinkat(struct thread *td, struct unlinkat_args *uap) 1765 { 1766 1767 return (kern_funlinkat_ex(td, uap->fd, uap->path, FD_NONE, uap->flag, 1768 UIO_USERSPACE, 0)); 1769 } 1770 1771 #ifndef _SYS_SYSPROTO_H_ 1772 struct funlinkat_args { 1773 int dfd; 1774 const char *path; 1775 int fd; 1776 int flag; 1777 }; 1778 #endif 1779 int 1780 sys_funlinkat(struct thread *td, struct funlinkat_args *uap) 1781 { 1782 1783 return (kern_funlinkat_ex(td, uap->dfd, uap->path, uap->fd, uap->flag, 1784 UIO_USERSPACE, 0)); 1785 } 1786 1787 int 1788 kern_funlinkat(struct thread *td, int dfd, const char *path, int fd, 1789 enum uio_seg pathseg, int flag, ino_t oldinum) 1790 { 1791 struct mount *mp; 1792 struct file *fp; 1793 struct vnode *vp; 1794 struct nameidata nd; 1795 struct stat sb; 1796 int error; 1797 1798 fp = NULL; 1799 if (fd != FD_NONE) { 1800 error = getvnode(td, fd, &cap_no_rights, &fp); 1801 if (error != 0) 1802 return (error); 1803 } 1804 1805 restart: 1806 bwillwrite(); 1807 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1 | 1808 ((flag & AT_BENEATH) != 0 ? BENEATH : 0), 1809 pathseg, path, dfd, &cap_unlinkat_rights, td); 1810 if ((error = namei(&nd)) != 0) { 1811 if (error == EINVAL) 1812 error = EPERM; 1813 goto fdout; 1814 } 1815 vp = nd.ni_vp; 1816 if (vp->v_type == VDIR && oldinum == 0) { 1817 error = EPERM; /* POSIX */ 1818 } else if (oldinum != 0 && 1819 ((error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td)) == 0) && 1820 sb.st_ino != oldinum) { 1821 error = EIDRM; /* Identifier removed */ 1822 } else if (fp != NULL && fp->f_vnode != vp) { 1823 if ((fp->f_vnode->v_iflag & VI_DOOMED) != 0) 1824 error = EBADF; 1825 else 1826 error = EDEADLK; 1827 } else { 1828 /* 1829 * The root of a mounted filesystem cannot be deleted. 1830 * 1831 * XXX: can this only be a VDIR case? 1832 */ 1833 if (vp->v_vflag & VV_ROOT) 1834 error = EBUSY; 1835 } 1836 if (error == 0) { 1837 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1838 NDFREE(&nd, NDF_ONLY_PNBUF); 1839 vput(nd.ni_dvp); 1840 if (vp == nd.ni_dvp) 1841 vrele(vp); 1842 else 1843 vput(vp); 1844 if ((error = vn_start_write(NULL, &mp, 1845 V_XSLEEP | PCATCH)) != 0) { 1846 goto fdout; 1847 } 1848 goto restart; 1849 } 1850 #ifdef MAC 1851 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 1852 &nd.ni_cnd); 1853 if (error != 0) 1854 goto out; 1855 #endif 1856 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 1857 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 1858 #ifdef MAC 1859 out: 1860 #endif 1861 vn_finished_write(mp); 1862 } 1863 NDFREE(&nd, NDF_ONLY_PNBUF); 1864 vput(nd.ni_dvp); 1865 if (vp == nd.ni_dvp) 1866 vrele(vp); 1867 else 1868 vput(vp); 1869 fdout: 1870 if (fp != NULL) 1871 fdrop(fp, td); 1872 return (error); 1873 } 1874 1875 /* 1876 * Reposition read/write file offset. 1877 */ 1878 #ifndef _SYS_SYSPROTO_H_ 1879 struct lseek_args { 1880 int fd; 1881 int pad; 1882 off_t offset; 1883 int whence; 1884 }; 1885 #endif 1886 int 1887 sys_lseek(struct thread *td, struct lseek_args *uap) 1888 { 1889 1890 return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); 1891 } 1892 1893 int 1894 kern_lseek(struct thread *td, int fd, off_t offset, int whence) 1895 { 1896 struct file *fp; 1897 int error; 1898 1899 AUDIT_ARG_FD(fd); 1900 error = fget(td, fd, &cap_seek_rights, &fp); 1901 if (error != 0) 1902 return (error); 1903 error = (fp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0 ? 1904 fo_seek(fp, offset, whence, td) : ESPIPE; 1905 fdrop(fp, td); 1906 return (error); 1907 } 1908 1909 #if defined(COMPAT_43) 1910 /* 1911 * Reposition read/write file offset. 1912 */ 1913 #ifndef _SYS_SYSPROTO_H_ 1914 struct olseek_args { 1915 int fd; 1916 long offset; 1917 int whence; 1918 }; 1919 #endif 1920 int 1921 olseek(struct thread *td, struct olseek_args *uap) 1922 { 1923 1924 return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); 1925 } 1926 #endif /* COMPAT_43 */ 1927 1928 #if defined(COMPAT_FREEBSD6) 1929 /* Version with the 'pad' argument */ 1930 int 1931 freebsd6_lseek(struct thread *td, struct freebsd6_lseek_args *uap) 1932 { 1933 1934 return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); 1935 } 1936 #endif 1937 1938 /* 1939 * Check access permissions using passed credentials. 1940 */ 1941 static int 1942 vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 1943 struct thread *td) 1944 { 1945 accmode_t accmode; 1946 int error; 1947 1948 /* Flags == 0 means only check for existence. */ 1949 if (user_flags == 0) 1950 return (0); 1951 1952 accmode = 0; 1953 if (user_flags & R_OK) 1954 accmode |= VREAD; 1955 if (user_flags & W_OK) 1956 accmode |= VWRITE; 1957 if (user_flags & X_OK) 1958 accmode |= VEXEC; 1959 #ifdef MAC 1960 error = mac_vnode_check_access(cred, vp, accmode); 1961 if (error != 0) 1962 return (error); 1963 #endif 1964 if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) 1965 error = VOP_ACCESS(vp, accmode, cred, td); 1966 return (error); 1967 } 1968 1969 /* 1970 * Check access permissions using "real" credentials. 1971 */ 1972 #ifndef _SYS_SYSPROTO_H_ 1973 struct access_args { 1974 char *path; 1975 int amode; 1976 }; 1977 #endif 1978 int 1979 sys_access(struct thread *td, struct access_args *uap) 1980 { 1981 1982 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1983 0, uap->amode)); 1984 } 1985 1986 #ifndef _SYS_SYSPROTO_H_ 1987 struct faccessat_args { 1988 int dirfd; 1989 char *path; 1990 int amode; 1991 int flag; 1992 } 1993 #endif 1994 int 1995 sys_faccessat(struct thread *td, struct faccessat_args *uap) 1996 { 1997 1998 return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 1999 uap->amode)); 2000 } 2001 2002 int 2003 kern_accessat(struct thread *td, int fd, const char *path, 2004 enum uio_seg pathseg, int flag, int amode) 2005 { 2006 struct ucred *cred, *usecred; 2007 struct vnode *vp; 2008 struct nameidata nd; 2009 int error; 2010 2011 if ((flag & ~(AT_EACCESS | AT_BENEATH)) != 0) 2012 return (EINVAL); 2013 if (amode != F_OK && (amode & ~(R_OK | W_OK | X_OK)) != 0) 2014 return (EINVAL); 2015 2016 /* 2017 * Create and modify a temporary credential instead of one that 2018 * is potentially shared (if we need one). 2019 */ 2020 cred = td->td_ucred; 2021 if ((flag & AT_EACCESS) == 0 && 2022 ((cred->cr_uid != cred->cr_ruid || 2023 cred->cr_rgid != cred->cr_groups[0]))) { 2024 usecred = crdup(cred); 2025 usecred->cr_uid = cred->cr_ruid; 2026 usecred->cr_groups[0] = cred->cr_rgid; 2027 td->td_ucred = usecred; 2028 } else 2029 usecred = cred; 2030 AUDIT_ARG_VALUE(amode); 2031 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | 2032 AUDITVNODE1 | ((flag & AT_BENEATH) != 0 ? BENEATH : 0), 2033 pathseg, path, fd, &cap_fstat_rights, td); 2034 if ((error = namei(&nd)) != 0) 2035 goto out; 2036 vp = nd.ni_vp; 2037 2038 error = vn_access(vp, amode, usecred, td); 2039 NDFREE(&nd, NDF_ONLY_PNBUF); 2040 vput(vp); 2041 out: 2042 if (usecred != cred) { 2043 td->td_ucred = cred; 2044 crfree(usecred); 2045 } 2046 return (error); 2047 } 2048 2049 /* 2050 * Check access permissions using "effective" credentials. 2051 */ 2052 #ifndef _SYS_SYSPROTO_H_ 2053 struct eaccess_args { 2054 char *path; 2055 int amode; 2056 }; 2057 #endif 2058 int 2059 sys_eaccess(struct thread *td, struct eaccess_args *uap) 2060 { 2061 2062 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2063 AT_EACCESS, uap->amode)); 2064 } 2065 2066 #if defined(COMPAT_43) 2067 /* 2068 * Get file status; this version follows links. 2069 */ 2070 #ifndef _SYS_SYSPROTO_H_ 2071 struct ostat_args { 2072 char *path; 2073 struct ostat *ub; 2074 }; 2075 #endif 2076 int 2077 ostat(struct thread *td, struct ostat_args *uap) 2078 { 2079 struct stat sb; 2080 struct ostat osb; 2081 int error; 2082 2083 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2084 &sb, NULL); 2085 if (error != 0) 2086 return (error); 2087 cvtstat(&sb, &osb); 2088 return (copyout(&osb, uap->ub, sizeof (osb))); 2089 } 2090 2091 /* 2092 * Get file status; this version does not follow links. 2093 */ 2094 #ifndef _SYS_SYSPROTO_H_ 2095 struct olstat_args { 2096 char *path; 2097 struct ostat *ub; 2098 }; 2099 #endif 2100 int 2101 olstat(struct thread *td, struct olstat_args *uap) 2102 { 2103 struct stat sb; 2104 struct ostat osb; 2105 int error; 2106 2107 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2108 UIO_USERSPACE, &sb, NULL); 2109 if (error != 0) 2110 return (error); 2111 cvtstat(&sb, &osb); 2112 return (copyout(&osb, uap->ub, sizeof (osb))); 2113 } 2114 2115 /* 2116 * Convert from an old to a new stat structure. 2117 * XXX: many values are blindly truncated. 2118 */ 2119 void 2120 cvtstat(struct stat *st, struct ostat *ost) 2121 { 2122 2123 bzero(ost, sizeof(*ost)); 2124 ost->st_dev = st->st_dev; 2125 ost->st_ino = st->st_ino; 2126 ost->st_mode = st->st_mode; 2127 ost->st_nlink = st->st_nlink; 2128 ost->st_uid = st->st_uid; 2129 ost->st_gid = st->st_gid; 2130 ost->st_rdev = st->st_rdev; 2131 ost->st_size = MIN(st->st_size, INT32_MAX); 2132 ost->st_atim = st->st_atim; 2133 ost->st_mtim = st->st_mtim; 2134 ost->st_ctim = st->st_ctim; 2135 ost->st_blksize = st->st_blksize; 2136 ost->st_blocks = st->st_blocks; 2137 ost->st_flags = st->st_flags; 2138 ost->st_gen = st->st_gen; 2139 } 2140 #endif /* COMPAT_43 */ 2141 2142 #if defined(COMPAT_43) || defined(COMPAT_FREEBSD11) 2143 int ino64_trunc_error; 2144 SYSCTL_INT(_vfs, OID_AUTO, ino64_trunc_error, CTLFLAG_RW, 2145 &ino64_trunc_error, 0, 2146 "Error on truncation of device, file or inode number, or link count"); 2147 2148 int 2149 freebsd11_cvtstat(struct stat *st, struct freebsd11_stat *ost) 2150 { 2151 2152 ost->st_dev = st->st_dev; 2153 if (ost->st_dev != st->st_dev) { 2154 switch (ino64_trunc_error) { 2155 default: 2156 /* 2157 * Since dev_t is almost raw, don't clamp to the 2158 * maximum for case 2, but ignore the error. 2159 */ 2160 break; 2161 case 1: 2162 return (EOVERFLOW); 2163 } 2164 } 2165 ost->st_ino = st->st_ino; 2166 if (ost->st_ino != st->st_ino) { 2167 switch (ino64_trunc_error) { 2168 default: 2169 case 0: 2170 break; 2171 case 1: 2172 return (EOVERFLOW); 2173 case 2: 2174 ost->st_ino = UINT32_MAX; 2175 break; 2176 } 2177 } 2178 ost->st_mode = st->st_mode; 2179 ost->st_nlink = st->st_nlink; 2180 if (ost->st_nlink != st->st_nlink) { 2181 switch (ino64_trunc_error) { 2182 default: 2183 case 0: 2184 break; 2185 case 1: 2186 return (EOVERFLOW); 2187 case 2: 2188 ost->st_nlink = UINT16_MAX; 2189 break; 2190 } 2191 } 2192 ost->st_uid = st->st_uid; 2193 ost->st_gid = st->st_gid; 2194 ost->st_rdev = st->st_rdev; 2195 if (ost->st_rdev != st->st_rdev) { 2196 switch (ino64_trunc_error) { 2197 default: 2198 break; 2199 case 1: 2200 return (EOVERFLOW); 2201 } 2202 } 2203 ost->st_atim = st->st_atim; 2204 ost->st_mtim = st->st_mtim; 2205 ost->st_ctim = st->st_ctim; 2206 ost->st_size = st->st_size; 2207 ost->st_blocks = st->st_blocks; 2208 ost->st_blksize = st->st_blksize; 2209 ost->st_flags = st->st_flags; 2210 ost->st_gen = st->st_gen; 2211 ost->st_lspare = 0; 2212 ost->st_birthtim = st->st_birthtim; 2213 bzero((char *)&ost->st_birthtim + sizeof(ost->st_birthtim), 2214 sizeof(*ost) - offsetof(struct freebsd11_stat, 2215 st_birthtim) - sizeof(ost->st_birthtim)); 2216 return (0); 2217 } 2218 2219 int 2220 freebsd11_stat(struct thread *td, struct freebsd11_stat_args* uap) 2221 { 2222 struct stat sb; 2223 struct freebsd11_stat osb; 2224 int error; 2225 2226 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2227 &sb, NULL); 2228 if (error != 0) 2229 return (error); 2230 error = freebsd11_cvtstat(&sb, &osb); 2231 if (error == 0) 2232 error = copyout(&osb, uap->ub, sizeof(osb)); 2233 return (error); 2234 } 2235 2236 int 2237 freebsd11_lstat(struct thread *td, struct freebsd11_lstat_args* uap) 2238 { 2239 struct stat sb; 2240 struct freebsd11_stat osb; 2241 int error; 2242 2243 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2244 UIO_USERSPACE, &sb, NULL); 2245 if (error != 0) 2246 return (error); 2247 error = freebsd11_cvtstat(&sb, &osb); 2248 if (error == 0) 2249 error = copyout(&osb, uap->ub, sizeof(osb)); 2250 return (error); 2251 } 2252 2253 int 2254 freebsd11_fhstat(struct thread *td, struct freebsd11_fhstat_args* uap) 2255 { 2256 struct fhandle fh; 2257 struct stat sb; 2258 struct freebsd11_stat osb; 2259 int error; 2260 2261 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 2262 if (error != 0) 2263 return (error); 2264 error = kern_fhstat(td, fh, &sb); 2265 if (error != 0) 2266 return (error); 2267 error = freebsd11_cvtstat(&sb, &osb); 2268 if (error == 0) 2269 error = copyout(&osb, uap->sb, sizeof(osb)); 2270 return (error); 2271 } 2272 2273 int 2274 freebsd11_fstatat(struct thread *td, struct freebsd11_fstatat_args* uap) 2275 { 2276 struct stat sb; 2277 struct freebsd11_stat osb; 2278 int error; 2279 2280 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2281 UIO_USERSPACE, &sb, NULL); 2282 if (error != 0) 2283 return (error); 2284 error = freebsd11_cvtstat(&sb, &osb); 2285 if (error == 0) 2286 error = copyout(&osb, uap->buf, sizeof(osb)); 2287 return (error); 2288 } 2289 #endif /* COMPAT_FREEBSD11 */ 2290 2291 /* 2292 * Get file status 2293 */ 2294 #ifndef _SYS_SYSPROTO_H_ 2295 struct fstatat_args { 2296 int fd; 2297 char *path; 2298 struct stat *buf; 2299 int flag; 2300 } 2301 #endif 2302 int 2303 sys_fstatat(struct thread *td, struct fstatat_args *uap) 2304 { 2305 struct stat sb; 2306 int error; 2307 2308 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2309 UIO_USERSPACE, &sb, NULL); 2310 if (error == 0) 2311 error = copyout(&sb, uap->buf, sizeof (sb)); 2312 return (error); 2313 } 2314 2315 int 2316 kern_statat(struct thread *td, int flag, int fd, const char *path, 2317 enum uio_seg pathseg, struct stat *sbp, 2318 void (*hook)(struct vnode *vp, struct stat *sbp)) 2319 { 2320 struct nameidata nd; 2321 int error; 2322 2323 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_BENEATH)) != 0) 2324 return (EINVAL); 2325 2326 NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) != 0 ? 2327 NOFOLLOW : FOLLOW) | ((flag & AT_BENEATH) != 0 ? BENEATH : 0) | 2328 LOCKSHARED | LOCKLEAF | AUDITVNODE1, pathseg, path, fd, 2329 &cap_fstat_rights, td); 2330 2331 if ((error = namei(&nd)) != 0) 2332 return (error); 2333 error = vn_stat(nd.ni_vp, sbp, td->td_ucred, NOCRED, td); 2334 if (error == 0) { 2335 SDT_PROBE2(vfs, , stat, mode, path, sbp->st_mode); 2336 if (S_ISREG(sbp->st_mode)) 2337 SDT_PROBE2(vfs, , stat, reg, path, pathseg); 2338 if (__predict_false(hook != NULL)) 2339 hook(nd.ni_vp, sbp); 2340 } 2341 NDFREE(&nd, NDF_ONLY_PNBUF); 2342 vput(nd.ni_vp); 2343 if (error != 0) 2344 return (error); 2345 #ifdef __STAT_TIME_T_EXT 2346 sbp->st_atim_ext = 0; 2347 sbp->st_mtim_ext = 0; 2348 sbp->st_ctim_ext = 0; 2349 sbp->st_btim_ext = 0; 2350 #endif 2351 #ifdef KTRACE 2352 if (KTRPOINT(td, KTR_STRUCT)) 2353 ktrstat(sbp); 2354 #endif 2355 return (0); 2356 } 2357 2358 #if defined(COMPAT_FREEBSD11) 2359 /* 2360 * Implementation of the NetBSD [l]stat() functions. 2361 */ 2362 void 2363 freebsd11_cvtnstat(struct stat *sb, struct nstat *nsb) 2364 { 2365 2366 bzero(nsb, sizeof(*nsb)); 2367 nsb->st_dev = sb->st_dev; 2368 nsb->st_ino = sb->st_ino; 2369 nsb->st_mode = sb->st_mode; 2370 nsb->st_nlink = sb->st_nlink; 2371 nsb->st_uid = sb->st_uid; 2372 nsb->st_gid = sb->st_gid; 2373 nsb->st_rdev = sb->st_rdev; 2374 nsb->st_atim = sb->st_atim; 2375 nsb->st_mtim = sb->st_mtim; 2376 nsb->st_ctim = sb->st_ctim; 2377 nsb->st_size = sb->st_size; 2378 nsb->st_blocks = sb->st_blocks; 2379 nsb->st_blksize = sb->st_blksize; 2380 nsb->st_flags = sb->st_flags; 2381 nsb->st_gen = sb->st_gen; 2382 nsb->st_birthtim = sb->st_birthtim; 2383 } 2384 2385 #ifndef _SYS_SYSPROTO_H_ 2386 struct freebsd11_nstat_args { 2387 char *path; 2388 struct nstat *ub; 2389 }; 2390 #endif 2391 int 2392 freebsd11_nstat(struct thread *td, struct freebsd11_nstat_args *uap) 2393 { 2394 struct stat sb; 2395 struct nstat nsb; 2396 int error; 2397 2398 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2399 &sb, NULL); 2400 if (error != 0) 2401 return (error); 2402 freebsd11_cvtnstat(&sb, &nsb); 2403 return (copyout(&nsb, uap->ub, sizeof (nsb))); 2404 } 2405 2406 /* 2407 * NetBSD lstat. Get file status; this version does not follow links. 2408 */ 2409 #ifndef _SYS_SYSPROTO_H_ 2410 struct freebsd11_nlstat_args { 2411 char *path; 2412 struct nstat *ub; 2413 }; 2414 #endif 2415 int 2416 freebsd11_nlstat(struct thread *td, struct freebsd11_nlstat_args *uap) 2417 { 2418 struct stat sb; 2419 struct nstat nsb; 2420 int error; 2421 2422 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2423 UIO_USERSPACE, &sb, NULL); 2424 if (error != 0) 2425 return (error); 2426 freebsd11_cvtnstat(&sb, &nsb); 2427 return (copyout(&nsb, uap->ub, sizeof (nsb))); 2428 } 2429 #endif /* COMPAT_FREEBSD11 */ 2430 2431 /* 2432 * Get configurable pathname variables. 2433 */ 2434 #ifndef _SYS_SYSPROTO_H_ 2435 struct pathconf_args { 2436 char *path; 2437 int name; 2438 }; 2439 #endif 2440 int 2441 sys_pathconf(struct thread *td, struct pathconf_args *uap) 2442 { 2443 long value; 2444 int error; 2445 2446 error = kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW, 2447 &value); 2448 if (error == 0) 2449 td->td_retval[0] = value; 2450 return (error); 2451 } 2452 2453 #ifndef _SYS_SYSPROTO_H_ 2454 struct lpathconf_args { 2455 char *path; 2456 int name; 2457 }; 2458 #endif 2459 int 2460 sys_lpathconf(struct thread *td, struct lpathconf_args *uap) 2461 { 2462 long value; 2463 int error; 2464 2465 error = kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, 2466 NOFOLLOW, &value); 2467 if (error == 0) 2468 td->td_retval[0] = value; 2469 return (error); 2470 } 2471 2472 int 2473 kern_pathconf(struct thread *td, const char *path, enum uio_seg pathseg, 2474 int name, u_long flags, long *valuep) 2475 { 2476 struct nameidata nd; 2477 int error; 2478 2479 NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | AUDITVNODE1 | flags, 2480 pathseg, path, td); 2481 if ((error = namei(&nd)) != 0) 2482 return (error); 2483 NDFREE(&nd, NDF_ONLY_PNBUF); 2484 2485 error = VOP_PATHCONF(nd.ni_vp, name, valuep); 2486 vput(nd.ni_vp); 2487 return (error); 2488 } 2489 2490 /* 2491 * Return target name of a symbolic link. 2492 */ 2493 #ifndef _SYS_SYSPROTO_H_ 2494 struct readlink_args { 2495 char *path; 2496 char *buf; 2497 size_t count; 2498 }; 2499 #endif 2500 int 2501 sys_readlink(struct thread *td, struct readlink_args *uap) 2502 { 2503 2504 return (kern_readlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2505 uap->buf, UIO_USERSPACE, uap->count)); 2506 } 2507 #ifndef _SYS_SYSPROTO_H_ 2508 struct readlinkat_args { 2509 int fd; 2510 char *path; 2511 char *buf; 2512 size_t bufsize; 2513 }; 2514 #endif 2515 int 2516 sys_readlinkat(struct thread *td, struct readlinkat_args *uap) 2517 { 2518 2519 return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE, 2520 uap->buf, UIO_USERSPACE, uap->bufsize)); 2521 } 2522 2523 int 2524 kern_readlinkat(struct thread *td, int fd, const char *path, 2525 enum uio_seg pathseg, char *buf, enum uio_seg bufseg, size_t count) 2526 { 2527 struct vnode *vp; 2528 struct nameidata nd; 2529 int error; 2530 2531 if (count > IOSIZE_MAX) 2532 return (EINVAL); 2533 2534 NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 2535 pathseg, path, fd, td); 2536 2537 if ((error = namei(&nd)) != 0) 2538 return (error); 2539 NDFREE(&nd, NDF_ONLY_PNBUF); 2540 vp = nd.ni_vp; 2541 2542 error = kern_readlink_vp(vp, buf, bufseg, count, td); 2543 vput(vp); 2544 2545 return (error); 2546 } 2547 2548 /* 2549 * Helper function to readlink from a vnode 2550 */ 2551 static int 2552 kern_readlink_vp(struct vnode *vp, char *buf, enum uio_seg bufseg, size_t count, 2553 struct thread *td) 2554 { 2555 struct iovec aiov; 2556 struct uio auio; 2557 int error; 2558 2559 ASSERT_VOP_LOCKED(vp, "kern_readlink_vp(): vp not locked"); 2560 #ifdef MAC 2561 error = mac_vnode_check_readlink(td->td_ucred, vp); 2562 if (error != 0) 2563 return (error); 2564 #endif 2565 if (vp->v_type != VLNK && (vp->v_vflag & VV_READLINK) == 0) 2566 return (EINVAL); 2567 2568 aiov.iov_base = buf; 2569 aiov.iov_len = count; 2570 auio.uio_iov = &aiov; 2571 auio.uio_iovcnt = 1; 2572 auio.uio_offset = 0; 2573 auio.uio_rw = UIO_READ; 2574 auio.uio_segflg = bufseg; 2575 auio.uio_td = td; 2576 auio.uio_resid = count; 2577 error = VOP_READLINK(vp, &auio, td->td_ucred); 2578 td->td_retval[0] = count - auio.uio_resid; 2579 return (error); 2580 } 2581 2582 /* 2583 * Common implementation code for chflags() and fchflags(). 2584 */ 2585 static int 2586 setfflags(struct thread *td, struct vnode *vp, u_long flags) 2587 { 2588 struct mount *mp; 2589 struct vattr vattr; 2590 int error; 2591 2592 /* We can't support the value matching VNOVAL. */ 2593 if (flags == VNOVAL) 2594 return (EOPNOTSUPP); 2595 2596 /* 2597 * Prevent non-root users from setting flags on devices. When 2598 * a device is reused, users can retain ownership of the device 2599 * if they are allowed to set flags and programs assume that 2600 * chown can't fail when done as root. 2601 */ 2602 if (vp->v_type == VCHR || vp->v_type == VBLK) { 2603 error = priv_check(td, PRIV_VFS_CHFLAGS_DEV); 2604 if (error != 0) 2605 return (error); 2606 } 2607 2608 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2609 return (error); 2610 VATTR_NULL(&vattr); 2611 vattr.va_flags = flags; 2612 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2613 #ifdef MAC 2614 error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags); 2615 if (error == 0) 2616 #endif 2617 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 2618 VOP_UNLOCK(vp, 0); 2619 vn_finished_write(mp); 2620 return (error); 2621 } 2622 2623 /* 2624 * Change flags of a file given a path name. 2625 */ 2626 #ifndef _SYS_SYSPROTO_H_ 2627 struct chflags_args { 2628 const char *path; 2629 u_long flags; 2630 }; 2631 #endif 2632 int 2633 sys_chflags(struct thread *td, struct chflags_args *uap) 2634 { 2635 2636 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2637 uap->flags, 0)); 2638 } 2639 2640 #ifndef _SYS_SYSPROTO_H_ 2641 struct chflagsat_args { 2642 int fd; 2643 const char *path; 2644 u_long flags; 2645 int atflag; 2646 } 2647 #endif 2648 int 2649 sys_chflagsat(struct thread *td, struct chflagsat_args *uap) 2650 { 2651 2652 if ((uap->atflag & ~(AT_SYMLINK_NOFOLLOW | AT_BENEATH)) != 0) 2653 return (EINVAL); 2654 2655 return (kern_chflagsat(td, uap->fd, uap->path, UIO_USERSPACE, 2656 uap->flags, uap->atflag)); 2657 } 2658 2659 /* 2660 * Same as chflags() but doesn't follow symlinks. 2661 */ 2662 #ifndef _SYS_SYSPROTO_H_ 2663 struct lchflags_args { 2664 const char *path; 2665 u_long flags; 2666 }; 2667 #endif 2668 int 2669 sys_lchflags(struct thread *td, struct lchflags_args *uap) 2670 { 2671 2672 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2673 uap->flags, AT_SYMLINK_NOFOLLOW)); 2674 } 2675 2676 static int 2677 kern_chflagsat(struct thread *td, int fd, const char *path, 2678 enum uio_seg pathseg, u_long flags, int atflag) 2679 { 2680 struct nameidata nd; 2681 int error, follow; 2682 2683 AUDIT_ARG_FFLAGS(flags); 2684 follow = (atflag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2685 follow |= (atflag & AT_BENEATH) != 0 ? BENEATH : 0; 2686 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2687 &cap_fchflags_rights, td); 2688 if ((error = namei(&nd)) != 0) 2689 return (error); 2690 NDFREE(&nd, NDF_ONLY_PNBUF); 2691 error = setfflags(td, nd.ni_vp, flags); 2692 vrele(nd.ni_vp); 2693 return (error); 2694 } 2695 2696 /* 2697 * Change flags of a file given a file descriptor. 2698 */ 2699 #ifndef _SYS_SYSPROTO_H_ 2700 struct fchflags_args { 2701 int fd; 2702 u_long flags; 2703 }; 2704 #endif 2705 int 2706 sys_fchflags(struct thread *td, struct fchflags_args *uap) 2707 { 2708 struct file *fp; 2709 int error; 2710 2711 AUDIT_ARG_FD(uap->fd); 2712 AUDIT_ARG_FFLAGS(uap->flags); 2713 error = getvnode(td, uap->fd, &cap_fchflags_rights, 2714 &fp); 2715 if (error != 0) 2716 return (error); 2717 #ifdef AUDIT 2718 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 2719 AUDIT_ARG_VNODE1(fp->f_vnode); 2720 VOP_UNLOCK(fp->f_vnode, 0); 2721 #endif 2722 error = setfflags(td, fp->f_vnode, uap->flags); 2723 fdrop(fp, td); 2724 return (error); 2725 } 2726 2727 /* 2728 * Common implementation code for chmod(), lchmod() and fchmod(). 2729 */ 2730 int 2731 setfmode(struct thread *td, struct ucred *cred, struct vnode *vp, int mode) 2732 { 2733 struct mount *mp; 2734 struct vattr vattr; 2735 int error; 2736 2737 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2738 return (error); 2739 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2740 VATTR_NULL(&vattr); 2741 vattr.va_mode = mode & ALLPERMS; 2742 #ifdef MAC 2743 error = mac_vnode_check_setmode(cred, vp, vattr.va_mode); 2744 if (error == 0) 2745 #endif 2746 error = VOP_SETATTR(vp, &vattr, cred); 2747 VOP_UNLOCK(vp, 0); 2748 vn_finished_write(mp); 2749 return (error); 2750 } 2751 2752 /* 2753 * Change mode of a file given path name. 2754 */ 2755 #ifndef _SYS_SYSPROTO_H_ 2756 struct chmod_args { 2757 char *path; 2758 int mode; 2759 }; 2760 #endif 2761 int 2762 sys_chmod(struct thread *td, struct chmod_args *uap) 2763 { 2764 2765 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2766 uap->mode, 0)); 2767 } 2768 2769 #ifndef _SYS_SYSPROTO_H_ 2770 struct fchmodat_args { 2771 int dirfd; 2772 char *path; 2773 mode_t mode; 2774 int flag; 2775 } 2776 #endif 2777 int 2778 sys_fchmodat(struct thread *td, struct fchmodat_args *uap) 2779 { 2780 2781 if ((uap->flag & ~(AT_SYMLINK_NOFOLLOW | AT_BENEATH)) != 0) 2782 return (EINVAL); 2783 2784 return (kern_fchmodat(td, uap->fd, uap->path, UIO_USERSPACE, 2785 uap->mode, uap->flag)); 2786 } 2787 2788 /* 2789 * Change mode of a file given path name (don't follow links.) 2790 */ 2791 #ifndef _SYS_SYSPROTO_H_ 2792 struct lchmod_args { 2793 char *path; 2794 int mode; 2795 }; 2796 #endif 2797 int 2798 sys_lchmod(struct thread *td, struct lchmod_args *uap) 2799 { 2800 2801 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2802 uap->mode, AT_SYMLINK_NOFOLLOW)); 2803 } 2804 2805 int 2806 kern_fchmodat(struct thread *td, int fd, const char *path, 2807 enum uio_seg pathseg, mode_t mode, int flag) 2808 { 2809 struct nameidata nd; 2810 int error, follow; 2811 2812 AUDIT_ARG_MODE(mode); 2813 follow = (flag & AT_SYMLINK_NOFOLLOW) != 0 ? NOFOLLOW : FOLLOW; 2814 follow |= (flag & AT_BENEATH) != 0 ? BENEATH : 0; 2815 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2816 &cap_fchmod_rights, td); 2817 if ((error = namei(&nd)) != 0) 2818 return (error); 2819 NDFREE(&nd, NDF_ONLY_PNBUF); 2820 error = setfmode(td, td->td_ucred, nd.ni_vp, mode); 2821 vrele(nd.ni_vp); 2822 return (error); 2823 } 2824 2825 /* 2826 * Change mode of a file given a file descriptor. 2827 */ 2828 #ifndef _SYS_SYSPROTO_H_ 2829 struct fchmod_args { 2830 int fd; 2831 int mode; 2832 }; 2833 #endif 2834 int 2835 sys_fchmod(struct thread *td, struct fchmod_args *uap) 2836 { 2837 struct file *fp; 2838 int error; 2839 2840 AUDIT_ARG_FD(uap->fd); 2841 AUDIT_ARG_MODE(uap->mode); 2842 2843 error = fget(td, uap->fd, &cap_fchmod_rights, &fp); 2844 if (error != 0) 2845 return (error); 2846 error = fo_chmod(fp, uap->mode, td->td_ucred, td); 2847 fdrop(fp, td); 2848 return (error); 2849 } 2850 2851 /* 2852 * Common implementation for chown(), lchown(), and fchown() 2853 */ 2854 int 2855 setfown(struct thread *td, struct ucred *cred, struct vnode *vp, uid_t uid, 2856 gid_t gid) 2857 { 2858 struct mount *mp; 2859 struct vattr vattr; 2860 int error; 2861 2862 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2863 return (error); 2864 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2865 VATTR_NULL(&vattr); 2866 vattr.va_uid = uid; 2867 vattr.va_gid = gid; 2868 #ifdef MAC 2869 error = mac_vnode_check_setowner(cred, vp, vattr.va_uid, 2870 vattr.va_gid); 2871 if (error == 0) 2872 #endif 2873 error = VOP_SETATTR(vp, &vattr, cred); 2874 VOP_UNLOCK(vp, 0); 2875 vn_finished_write(mp); 2876 return (error); 2877 } 2878 2879 /* 2880 * Set ownership given a path name. 2881 */ 2882 #ifndef _SYS_SYSPROTO_H_ 2883 struct chown_args { 2884 char *path; 2885 int uid; 2886 int gid; 2887 }; 2888 #endif 2889 int 2890 sys_chown(struct thread *td, struct chown_args *uap) 2891 { 2892 2893 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->uid, 2894 uap->gid, 0)); 2895 } 2896 2897 #ifndef _SYS_SYSPROTO_H_ 2898 struct fchownat_args { 2899 int fd; 2900 const char * path; 2901 uid_t uid; 2902 gid_t gid; 2903 int flag; 2904 }; 2905 #endif 2906 int 2907 sys_fchownat(struct thread *td, struct fchownat_args *uap) 2908 { 2909 2910 if ((uap->flag & ~(AT_SYMLINK_NOFOLLOW | AT_BENEATH)) != 0) 2911 return (EINVAL); 2912 2913 return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid, 2914 uap->gid, uap->flag)); 2915 } 2916 2917 int 2918 kern_fchownat(struct thread *td, int fd, const char *path, 2919 enum uio_seg pathseg, int uid, int gid, int flag) 2920 { 2921 struct nameidata nd; 2922 int error, follow; 2923 2924 AUDIT_ARG_OWNER(uid, gid); 2925 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2926 follow |= (flag & AT_BENEATH) != 0 ? BENEATH : 0; 2927 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2928 &cap_fchown_rights, td); 2929 2930 if ((error = namei(&nd)) != 0) 2931 return (error); 2932 NDFREE(&nd, NDF_ONLY_PNBUF); 2933 error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid); 2934 vrele(nd.ni_vp); 2935 return (error); 2936 } 2937 2938 /* 2939 * Set ownership given a path name, do not cross symlinks. 2940 */ 2941 #ifndef _SYS_SYSPROTO_H_ 2942 struct lchown_args { 2943 char *path; 2944 int uid; 2945 int gid; 2946 }; 2947 #endif 2948 int 2949 sys_lchown(struct thread *td, struct lchown_args *uap) 2950 { 2951 2952 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2953 uap->uid, uap->gid, AT_SYMLINK_NOFOLLOW)); 2954 } 2955 2956 /* 2957 * Set ownership given a file descriptor. 2958 */ 2959 #ifndef _SYS_SYSPROTO_H_ 2960 struct fchown_args { 2961 int fd; 2962 int uid; 2963 int gid; 2964 }; 2965 #endif 2966 int 2967 sys_fchown(struct thread *td, struct fchown_args *uap) 2968 { 2969 struct file *fp; 2970 int error; 2971 2972 AUDIT_ARG_FD(uap->fd); 2973 AUDIT_ARG_OWNER(uap->uid, uap->gid); 2974 error = fget(td, uap->fd, &cap_fchown_rights, &fp); 2975 if (error != 0) 2976 return (error); 2977 error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td); 2978 fdrop(fp, td); 2979 return (error); 2980 } 2981 2982 /* 2983 * Common implementation code for utimes(), lutimes(), and futimes(). 2984 */ 2985 static int 2986 getutimes(const struct timeval *usrtvp, enum uio_seg tvpseg, 2987 struct timespec *tsp) 2988 { 2989 struct timeval tv[2]; 2990 const struct timeval *tvp; 2991 int error; 2992 2993 if (usrtvp == NULL) { 2994 vfs_timestamp(&tsp[0]); 2995 tsp[1] = tsp[0]; 2996 } else { 2997 if (tvpseg == UIO_SYSSPACE) { 2998 tvp = usrtvp; 2999 } else { 3000 if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0) 3001 return (error); 3002 tvp = tv; 3003 } 3004 3005 if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 || 3006 tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000) 3007 return (EINVAL); 3008 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 3009 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 3010 } 3011 return (0); 3012 } 3013 3014 /* 3015 * Common implementation code for futimens(), utimensat(). 3016 */ 3017 #define UTIMENS_NULL 0x1 3018 #define UTIMENS_EXIT 0x2 3019 static int 3020 getutimens(const struct timespec *usrtsp, enum uio_seg tspseg, 3021 struct timespec *tsp, int *retflags) 3022 { 3023 struct timespec tsnow; 3024 int error; 3025 3026 vfs_timestamp(&tsnow); 3027 *retflags = 0; 3028 if (usrtsp == NULL) { 3029 tsp[0] = tsnow; 3030 tsp[1] = tsnow; 3031 *retflags |= UTIMENS_NULL; 3032 return (0); 3033 } 3034 if (tspseg == UIO_SYSSPACE) { 3035 tsp[0] = usrtsp[0]; 3036 tsp[1] = usrtsp[1]; 3037 } else if ((error = copyin(usrtsp, tsp, sizeof(*tsp) * 2)) != 0) 3038 return (error); 3039 if (tsp[0].tv_nsec == UTIME_OMIT && tsp[1].tv_nsec == UTIME_OMIT) 3040 *retflags |= UTIMENS_EXIT; 3041 if (tsp[0].tv_nsec == UTIME_NOW && tsp[1].tv_nsec == UTIME_NOW) 3042 *retflags |= UTIMENS_NULL; 3043 if (tsp[0].tv_nsec == UTIME_OMIT) 3044 tsp[0].tv_sec = VNOVAL; 3045 else if (tsp[0].tv_nsec == UTIME_NOW) 3046 tsp[0] = tsnow; 3047 else if (tsp[0].tv_nsec < 0 || tsp[0].tv_nsec >= 1000000000L) 3048 return (EINVAL); 3049 if (tsp[1].tv_nsec == UTIME_OMIT) 3050 tsp[1].tv_sec = VNOVAL; 3051 else if (tsp[1].tv_nsec == UTIME_NOW) 3052 tsp[1] = tsnow; 3053 else if (tsp[1].tv_nsec < 0 || tsp[1].tv_nsec >= 1000000000L) 3054 return (EINVAL); 3055 3056 return (0); 3057 } 3058 3059 /* 3060 * Common implementation code for utimes(), lutimes(), futimes(), futimens(), 3061 * and utimensat(). 3062 */ 3063 static int 3064 setutimes(struct thread *td, struct vnode *vp, const struct timespec *ts, 3065 int numtimes, int nullflag) 3066 { 3067 struct mount *mp; 3068 struct vattr vattr; 3069 int error, setbirthtime; 3070 3071 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 3072 return (error); 3073 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3074 setbirthtime = 0; 3075 if (numtimes < 3 && !VOP_GETATTR(vp, &vattr, td->td_ucred) && 3076 timespeccmp(&ts[1], &vattr.va_birthtime, < )) 3077 setbirthtime = 1; 3078 VATTR_NULL(&vattr); 3079 vattr.va_atime = ts[0]; 3080 vattr.va_mtime = ts[1]; 3081 if (setbirthtime) 3082 vattr.va_birthtime = ts[1]; 3083 if (numtimes > 2) 3084 vattr.va_birthtime = ts[2]; 3085 if (nullflag) 3086 vattr.va_vaflags |= VA_UTIMES_NULL; 3087 #ifdef MAC 3088 error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime, 3089 vattr.va_mtime); 3090 #endif 3091 if (error == 0) 3092 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3093 VOP_UNLOCK(vp, 0); 3094 vn_finished_write(mp); 3095 return (error); 3096 } 3097 3098 /* 3099 * Set the access and modification times of a file. 3100 */ 3101 #ifndef _SYS_SYSPROTO_H_ 3102 struct utimes_args { 3103 char *path; 3104 struct timeval *tptr; 3105 }; 3106 #endif 3107 int 3108 sys_utimes(struct thread *td, struct utimes_args *uap) 3109 { 3110 3111 return (kern_utimesat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3112 uap->tptr, UIO_USERSPACE)); 3113 } 3114 3115 #ifndef _SYS_SYSPROTO_H_ 3116 struct futimesat_args { 3117 int fd; 3118 const char * path; 3119 const struct timeval * times; 3120 }; 3121 #endif 3122 int 3123 sys_futimesat(struct thread *td, struct futimesat_args *uap) 3124 { 3125 3126 return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE, 3127 uap->times, UIO_USERSPACE)); 3128 } 3129 3130 int 3131 kern_utimesat(struct thread *td, int fd, const char *path, 3132 enum uio_seg pathseg, struct timeval *tptr, enum uio_seg tptrseg) 3133 { 3134 struct nameidata nd; 3135 struct timespec ts[2]; 3136 int error; 3137 3138 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3139 return (error); 3140 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 3141 &cap_futimes_rights, td); 3142 3143 if ((error = namei(&nd)) != 0) 3144 return (error); 3145 NDFREE(&nd, NDF_ONLY_PNBUF); 3146 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3147 vrele(nd.ni_vp); 3148 return (error); 3149 } 3150 3151 /* 3152 * Set the access and modification times of a file. 3153 */ 3154 #ifndef _SYS_SYSPROTO_H_ 3155 struct lutimes_args { 3156 char *path; 3157 struct timeval *tptr; 3158 }; 3159 #endif 3160 int 3161 sys_lutimes(struct thread *td, struct lutimes_args *uap) 3162 { 3163 3164 return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr, 3165 UIO_USERSPACE)); 3166 } 3167 3168 int 3169 kern_lutimes(struct thread *td, const char *path, enum uio_seg pathseg, 3170 struct timeval *tptr, enum uio_seg tptrseg) 3171 { 3172 struct timespec ts[2]; 3173 struct nameidata nd; 3174 int error; 3175 3176 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3177 return (error); 3178 NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, pathseg, path, td); 3179 if ((error = namei(&nd)) != 0) 3180 return (error); 3181 NDFREE(&nd, NDF_ONLY_PNBUF); 3182 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3183 vrele(nd.ni_vp); 3184 return (error); 3185 } 3186 3187 /* 3188 * Set the access and modification times of a file. 3189 */ 3190 #ifndef _SYS_SYSPROTO_H_ 3191 struct futimes_args { 3192 int fd; 3193 struct timeval *tptr; 3194 }; 3195 #endif 3196 int 3197 sys_futimes(struct thread *td, struct futimes_args *uap) 3198 { 3199 3200 return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE)); 3201 } 3202 3203 int 3204 kern_futimes(struct thread *td, int fd, struct timeval *tptr, 3205 enum uio_seg tptrseg) 3206 { 3207 struct timespec ts[2]; 3208 struct file *fp; 3209 int error; 3210 3211 AUDIT_ARG_FD(fd); 3212 error = getutimes(tptr, tptrseg, ts); 3213 if (error != 0) 3214 return (error); 3215 error = getvnode(td, fd, &cap_futimes_rights, &fp); 3216 if (error != 0) 3217 return (error); 3218 #ifdef AUDIT 3219 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3220 AUDIT_ARG_VNODE1(fp->f_vnode); 3221 VOP_UNLOCK(fp->f_vnode, 0); 3222 #endif 3223 error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL); 3224 fdrop(fp, td); 3225 return (error); 3226 } 3227 3228 int 3229 sys_futimens(struct thread *td, struct futimens_args *uap) 3230 { 3231 3232 return (kern_futimens(td, uap->fd, uap->times, UIO_USERSPACE)); 3233 } 3234 3235 int 3236 kern_futimens(struct thread *td, int fd, struct timespec *tptr, 3237 enum uio_seg tptrseg) 3238 { 3239 struct timespec ts[2]; 3240 struct file *fp; 3241 int error, flags; 3242 3243 AUDIT_ARG_FD(fd); 3244 error = getutimens(tptr, tptrseg, ts, &flags); 3245 if (error != 0) 3246 return (error); 3247 if (flags & UTIMENS_EXIT) 3248 return (0); 3249 error = getvnode(td, fd, &cap_futimes_rights, &fp); 3250 if (error != 0) 3251 return (error); 3252 #ifdef AUDIT 3253 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3254 AUDIT_ARG_VNODE1(fp->f_vnode); 3255 VOP_UNLOCK(fp->f_vnode, 0); 3256 #endif 3257 error = setutimes(td, fp->f_vnode, ts, 2, flags & UTIMENS_NULL); 3258 fdrop(fp, td); 3259 return (error); 3260 } 3261 3262 int 3263 sys_utimensat(struct thread *td, struct utimensat_args *uap) 3264 { 3265 3266 return (kern_utimensat(td, uap->fd, uap->path, UIO_USERSPACE, 3267 uap->times, UIO_USERSPACE, uap->flag)); 3268 } 3269 3270 int 3271 kern_utimensat(struct thread *td, int fd, const char *path, 3272 enum uio_seg pathseg, struct timespec *tptr, enum uio_seg tptrseg, 3273 int flag) 3274 { 3275 struct nameidata nd; 3276 struct timespec ts[2]; 3277 int error, flags; 3278 3279 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_BENEATH)) != 0) 3280 return (EINVAL); 3281 3282 if ((error = getutimens(tptr, tptrseg, ts, &flags)) != 0) 3283 return (error); 3284 NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : 3285 FOLLOW) | ((flag & AT_BENEATH) != 0 ? BENEATH : 0) | AUDITVNODE1, 3286 pathseg, path, fd, &cap_futimes_rights, td); 3287 if ((error = namei(&nd)) != 0) 3288 return (error); 3289 /* 3290 * We are allowed to call namei() regardless of 2xUTIME_OMIT. 3291 * POSIX states: 3292 * "If both tv_nsec fields are UTIME_OMIT... EACCESS may be detected." 3293 * "Search permission is denied by a component of the path prefix." 3294 */ 3295 NDFREE(&nd, NDF_ONLY_PNBUF); 3296 if ((flags & UTIMENS_EXIT) == 0) 3297 error = setutimes(td, nd.ni_vp, ts, 2, flags & UTIMENS_NULL); 3298 vrele(nd.ni_vp); 3299 return (error); 3300 } 3301 3302 /* 3303 * Truncate a file given its path name. 3304 */ 3305 #ifndef _SYS_SYSPROTO_H_ 3306 struct truncate_args { 3307 char *path; 3308 int pad; 3309 off_t length; 3310 }; 3311 #endif 3312 int 3313 sys_truncate(struct thread *td, struct truncate_args *uap) 3314 { 3315 3316 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3317 } 3318 3319 int 3320 kern_truncate(struct thread *td, const char *path, enum uio_seg pathseg, 3321 off_t length) 3322 { 3323 struct mount *mp; 3324 struct vnode *vp; 3325 void *rl_cookie; 3326 struct vattr vattr; 3327 struct nameidata nd; 3328 int error; 3329 3330 if (length < 0) 3331 return(EINVAL); 3332 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, td); 3333 if ((error = namei(&nd)) != 0) 3334 return (error); 3335 vp = nd.ni_vp; 3336 rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); 3337 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { 3338 vn_rangelock_unlock(vp, rl_cookie); 3339 vrele(vp); 3340 return (error); 3341 } 3342 NDFREE(&nd, NDF_ONLY_PNBUF); 3343 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3344 if (vp->v_type == VDIR) 3345 error = EISDIR; 3346 #ifdef MAC 3347 else if ((error = mac_vnode_check_write(td->td_ucred, NOCRED, vp))) { 3348 } 3349 #endif 3350 else if ((error = vn_writechk(vp)) == 0 && 3351 (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) { 3352 VATTR_NULL(&vattr); 3353 vattr.va_size = length; 3354 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3355 } 3356 VOP_UNLOCK(vp, 0); 3357 vn_finished_write(mp); 3358 vn_rangelock_unlock(vp, rl_cookie); 3359 vrele(vp); 3360 return (error); 3361 } 3362 3363 #if defined(COMPAT_43) 3364 /* 3365 * Truncate a file given its path name. 3366 */ 3367 #ifndef _SYS_SYSPROTO_H_ 3368 struct otruncate_args { 3369 char *path; 3370 long length; 3371 }; 3372 #endif 3373 int 3374 otruncate(struct thread *td, struct otruncate_args *uap) 3375 { 3376 3377 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3378 } 3379 #endif /* COMPAT_43 */ 3380 3381 #if defined(COMPAT_FREEBSD6) 3382 /* Versions with the pad argument */ 3383 int 3384 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap) 3385 { 3386 3387 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3388 } 3389 3390 int 3391 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap) 3392 { 3393 3394 return (kern_ftruncate(td, uap->fd, uap->length)); 3395 } 3396 #endif 3397 3398 int 3399 kern_fsync(struct thread *td, int fd, bool fullsync) 3400 { 3401 struct vnode *vp; 3402 struct mount *mp; 3403 struct file *fp; 3404 int error, lock_flags; 3405 3406 AUDIT_ARG_FD(fd); 3407 error = getvnode(td, fd, &cap_fsync_rights, &fp); 3408 if (error != 0) 3409 return (error); 3410 vp = fp->f_vnode; 3411 #if 0 3412 if (!fullsync) 3413 /* XXXKIB: compete outstanding aio writes */; 3414 #endif 3415 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 3416 if (error != 0) 3417 goto drop; 3418 if (MNT_SHARED_WRITES(mp) || 3419 ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) { 3420 lock_flags = LK_SHARED; 3421 } else { 3422 lock_flags = LK_EXCLUSIVE; 3423 } 3424 vn_lock(vp, lock_flags | LK_RETRY); 3425 AUDIT_ARG_VNODE1(vp); 3426 if (vp->v_object != NULL) { 3427 VM_OBJECT_WLOCK(vp->v_object); 3428 vm_object_page_clean(vp->v_object, 0, 0, 0); 3429 VM_OBJECT_WUNLOCK(vp->v_object); 3430 } 3431 error = fullsync ? VOP_FSYNC(vp, MNT_WAIT, td) : VOP_FDATASYNC(vp, td); 3432 VOP_UNLOCK(vp, 0); 3433 vn_finished_write(mp); 3434 drop: 3435 fdrop(fp, td); 3436 return (error); 3437 } 3438 3439 /* 3440 * Sync an open file. 3441 */ 3442 #ifndef _SYS_SYSPROTO_H_ 3443 struct fsync_args { 3444 int fd; 3445 }; 3446 #endif 3447 int 3448 sys_fsync(struct thread *td, struct fsync_args *uap) 3449 { 3450 3451 return (kern_fsync(td, uap->fd, true)); 3452 } 3453 3454 int 3455 sys_fdatasync(struct thread *td, struct fdatasync_args *uap) 3456 { 3457 3458 return (kern_fsync(td, uap->fd, false)); 3459 } 3460 3461 /* 3462 * Rename files. Source and destination must either both be directories, or 3463 * both not be directories. If target is a directory, it must be empty. 3464 */ 3465 #ifndef _SYS_SYSPROTO_H_ 3466 struct rename_args { 3467 char *from; 3468 char *to; 3469 }; 3470 #endif 3471 int 3472 sys_rename(struct thread *td, struct rename_args *uap) 3473 { 3474 3475 return (kern_renameat(td, AT_FDCWD, uap->from, AT_FDCWD, 3476 uap->to, UIO_USERSPACE)); 3477 } 3478 3479 #ifndef _SYS_SYSPROTO_H_ 3480 struct renameat_args { 3481 int oldfd; 3482 char *old; 3483 int newfd; 3484 char *new; 3485 }; 3486 #endif 3487 int 3488 sys_renameat(struct thread *td, struct renameat_args *uap) 3489 { 3490 3491 return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new, 3492 UIO_USERSPACE)); 3493 } 3494 3495 int 3496 kern_renameat(struct thread *td, int oldfd, const char *old, int newfd, 3497 const char *new, enum uio_seg pathseg) 3498 { 3499 struct mount *mp = NULL; 3500 struct vnode *tvp, *fvp, *tdvp; 3501 struct nameidata fromnd, tond; 3502 int error; 3503 3504 again: 3505 bwillwrite(); 3506 #ifdef MAC 3507 NDINIT_ATRIGHTS(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | 3508 AUDITVNODE1, pathseg, old, oldfd, 3509 &cap_renameat_source_rights, td); 3510 #else 3511 NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | AUDITVNODE1, 3512 pathseg, old, oldfd, 3513 &cap_renameat_source_rights, td); 3514 #endif 3515 3516 if ((error = namei(&fromnd)) != 0) 3517 return (error); 3518 #ifdef MAC 3519 error = mac_vnode_check_rename_from(td->td_ucred, fromnd.ni_dvp, 3520 fromnd.ni_vp, &fromnd.ni_cnd); 3521 VOP_UNLOCK(fromnd.ni_dvp, 0); 3522 if (fromnd.ni_dvp != fromnd.ni_vp) 3523 VOP_UNLOCK(fromnd.ni_vp, 0); 3524 #endif 3525 fvp = fromnd.ni_vp; 3526 NDINIT_ATRIGHTS(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | 3527 SAVESTART | AUDITVNODE2, pathseg, new, newfd, 3528 &cap_renameat_target_rights, td); 3529 if (fromnd.ni_vp->v_type == VDIR) 3530 tond.ni_cnd.cn_flags |= WILLBEDIR; 3531 if ((error = namei(&tond)) != 0) { 3532 /* Translate error code for rename("dir1", "dir2/."). */ 3533 if (error == EISDIR && fvp->v_type == VDIR) 3534 error = EINVAL; 3535 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3536 vrele(fromnd.ni_dvp); 3537 vrele(fvp); 3538 goto out1; 3539 } 3540 tdvp = tond.ni_dvp; 3541 tvp = tond.ni_vp; 3542 error = vn_start_write(fvp, &mp, V_NOWAIT); 3543 if (error != 0) { 3544 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3545 NDFREE(&tond, NDF_ONLY_PNBUF); 3546 if (tvp != NULL) 3547 vput(tvp); 3548 if (tdvp == tvp) 3549 vrele(tdvp); 3550 else 3551 vput(tdvp); 3552 vrele(fromnd.ni_dvp); 3553 vrele(fvp); 3554 vrele(tond.ni_startdir); 3555 if (fromnd.ni_startdir != NULL) 3556 vrele(fromnd.ni_startdir); 3557 error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH); 3558 if (error != 0) 3559 return (error); 3560 goto again; 3561 } 3562 if (tvp != NULL) { 3563 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3564 error = ENOTDIR; 3565 goto out; 3566 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3567 error = EISDIR; 3568 goto out; 3569 } 3570 #ifdef CAPABILITIES 3571 if (newfd != AT_FDCWD && (tond.ni_resflags & NIRES_ABS) == 0) { 3572 /* 3573 * If the target already exists we require CAP_UNLINKAT 3574 * from 'newfd', when newfd was used for the lookup. 3575 */ 3576 error = cap_check(&tond.ni_filecaps.fc_rights, 3577 &cap_unlinkat_rights); 3578 if (error != 0) 3579 goto out; 3580 } 3581 #endif 3582 } 3583 if (fvp == tdvp) { 3584 error = EINVAL; 3585 goto out; 3586 } 3587 /* 3588 * If the source is the same as the destination (that is, if they 3589 * are links to the same vnode), then there is nothing to do. 3590 */ 3591 if (fvp == tvp) 3592 error = -1; 3593 #ifdef MAC 3594 else 3595 error = mac_vnode_check_rename_to(td->td_ucred, tdvp, 3596 tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd); 3597 #endif 3598 out: 3599 if (error == 0) { 3600 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3601 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); 3602 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3603 NDFREE(&tond, NDF_ONLY_PNBUF); 3604 } else { 3605 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3606 NDFREE(&tond, NDF_ONLY_PNBUF); 3607 if (tvp != NULL) 3608 vput(tvp); 3609 if (tdvp == tvp) 3610 vrele(tdvp); 3611 else 3612 vput(tdvp); 3613 vrele(fromnd.ni_dvp); 3614 vrele(fvp); 3615 } 3616 vrele(tond.ni_startdir); 3617 vn_finished_write(mp); 3618 out1: 3619 if (fromnd.ni_startdir) 3620 vrele(fromnd.ni_startdir); 3621 if (error == -1) 3622 return (0); 3623 return (error); 3624 } 3625 3626 /* 3627 * Make a directory file. 3628 */ 3629 #ifndef _SYS_SYSPROTO_H_ 3630 struct mkdir_args { 3631 char *path; 3632 int mode; 3633 }; 3634 #endif 3635 int 3636 sys_mkdir(struct thread *td, struct mkdir_args *uap) 3637 { 3638 3639 return (kern_mkdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3640 uap->mode)); 3641 } 3642 3643 #ifndef _SYS_SYSPROTO_H_ 3644 struct mkdirat_args { 3645 int fd; 3646 char *path; 3647 mode_t mode; 3648 }; 3649 #endif 3650 int 3651 sys_mkdirat(struct thread *td, struct mkdirat_args *uap) 3652 { 3653 3654 return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode)); 3655 } 3656 3657 int 3658 kern_mkdirat(struct thread *td, int fd, const char *path, enum uio_seg segflg, 3659 int mode) 3660 { 3661 struct mount *mp; 3662 struct vnode *vp; 3663 struct vattr vattr; 3664 struct nameidata nd; 3665 int error; 3666 3667 AUDIT_ARG_MODE(mode); 3668 restart: 3669 bwillwrite(); 3670 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 3671 NOCACHE, segflg, path, fd, &cap_mkdirat_rights, 3672 td); 3673 nd.ni_cnd.cn_flags |= WILLBEDIR; 3674 if ((error = namei(&nd)) != 0) 3675 return (error); 3676 vp = nd.ni_vp; 3677 if (vp != NULL) { 3678 NDFREE(&nd, NDF_ONLY_PNBUF); 3679 /* 3680 * XXX namei called with LOCKPARENT but not LOCKLEAF has 3681 * the strange behaviour of leaving the vnode unlocked 3682 * if the target is the same vnode as the parent. 3683 */ 3684 if (vp == nd.ni_dvp) 3685 vrele(nd.ni_dvp); 3686 else 3687 vput(nd.ni_dvp); 3688 vrele(vp); 3689 return (EEXIST); 3690 } 3691 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3692 NDFREE(&nd, NDF_ONLY_PNBUF); 3693 vput(nd.ni_dvp); 3694 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3695 return (error); 3696 goto restart; 3697 } 3698 VATTR_NULL(&vattr); 3699 vattr.va_type = VDIR; 3700 vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask; 3701 #ifdef MAC 3702 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 3703 &vattr); 3704 if (error != 0) 3705 goto out; 3706 #endif 3707 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 3708 #ifdef MAC 3709 out: 3710 #endif 3711 NDFREE(&nd, NDF_ONLY_PNBUF); 3712 vput(nd.ni_dvp); 3713 if (error == 0) 3714 vput(nd.ni_vp); 3715 vn_finished_write(mp); 3716 return (error); 3717 } 3718 3719 /* 3720 * Remove a directory file. 3721 */ 3722 #ifndef _SYS_SYSPROTO_H_ 3723 struct rmdir_args { 3724 char *path; 3725 }; 3726 #endif 3727 int 3728 sys_rmdir(struct thread *td, struct rmdir_args *uap) 3729 { 3730 3731 return (kern_frmdirat(td, AT_FDCWD, uap->path, FD_NONE, UIO_USERSPACE, 3732 0)); 3733 } 3734 3735 int 3736 kern_frmdirat(struct thread *td, int dfd, const char *path, int fd, 3737 enum uio_seg pathseg, int flag) 3738 { 3739 struct mount *mp; 3740 struct vnode *vp; 3741 struct file *fp; 3742 struct nameidata nd; 3743 cap_rights_t rights; 3744 int error; 3745 3746 fp = NULL; 3747 if (fd != FD_NONE) { 3748 error = getvnode(td, fd, cap_rights_init(&rights, CAP_LOOKUP), 3749 &fp); 3750 if (error != 0) 3751 return (error); 3752 } 3753 3754 restart: 3755 bwillwrite(); 3756 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1 | 3757 ((flag & AT_BENEATH) != 0 ? BENEATH : 0), 3758 pathseg, path, dfd, &cap_unlinkat_rights, td); 3759 if ((error = namei(&nd)) != 0) 3760 goto fdout; 3761 vp = nd.ni_vp; 3762 if (vp->v_type != VDIR) { 3763 error = ENOTDIR; 3764 goto out; 3765 } 3766 /* 3767 * No rmdir "." please. 3768 */ 3769 if (nd.ni_dvp == vp) { 3770 error = EINVAL; 3771 goto out; 3772 } 3773 /* 3774 * The root of a mounted filesystem cannot be deleted. 3775 */ 3776 if (vp->v_vflag & VV_ROOT) { 3777 error = EBUSY; 3778 goto out; 3779 } 3780 3781 if (fp != NULL && fp->f_vnode != vp) { 3782 if ((fp->f_vnode->v_iflag & VI_DOOMED) != 0) 3783 error = EBADF; 3784 else 3785 error = EDEADLK; 3786 goto out; 3787 } 3788 3789 #ifdef MAC 3790 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 3791 &nd.ni_cnd); 3792 if (error != 0) 3793 goto out; 3794 #endif 3795 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3796 NDFREE(&nd, NDF_ONLY_PNBUF); 3797 vput(vp); 3798 if (nd.ni_dvp == vp) 3799 vrele(nd.ni_dvp); 3800 else 3801 vput(nd.ni_dvp); 3802 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3803 goto fdout; 3804 goto restart; 3805 } 3806 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 3807 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 3808 vn_finished_write(mp); 3809 out: 3810 NDFREE(&nd, NDF_ONLY_PNBUF); 3811 vput(vp); 3812 if (nd.ni_dvp == vp) 3813 vrele(nd.ni_dvp); 3814 else 3815 vput(nd.ni_dvp); 3816 fdout: 3817 if (fp != NULL) 3818 fdrop(fp, td); 3819 return (error); 3820 } 3821 3822 #if defined(COMPAT_43) || defined(COMPAT_FREEBSD11) 3823 int 3824 freebsd11_kern_getdirentries(struct thread *td, int fd, char *ubuf, u_int count, 3825 long *basep, void (*func)(struct freebsd11_dirent *)) 3826 { 3827 struct freebsd11_dirent dstdp; 3828 struct dirent *dp, *edp; 3829 char *dirbuf; 3830 off_t base; 3831 ssize_t resid, ucount; 3832 int error; 3833 3834 /* XXX arbitrary sanity limit on `count'. */ 3835 count = min(count, 64 * 1024); 3836 3837 dirbuf = malloc(count, M_TEMP, M_WAITOK); 3838 3839 error = kern_getdirentries(td, fd, dirbuf, count, &base, &resid, 3840 UIO_SYSSPACE); 3841 if (error != 0) 3842 goto done; 3843 if (basep != NULL) 3844 *basep = base; 3845 3846 ucount = 0; 3847 for (dp = (struct dirent *)dirbuf, 3848 edp = (struct dirent *)&dirbuf[count - resid]; 3849 ucount < count && dp < edp; ) { 3850 if (dp->d_reclen == 0) 3851 break; 3852 MPASS(dp->d_reclen >= _GENERIC_DIRLEN(0)); 3853 if (dp->d_namlen >= sizeof(dstdp.d_name)) 3854 continue; 3855 dstdp.d_type = dp->d_type; 3856 dstdp.d_namlen = dp->d_namlen; 3857 dstdp.d_fileno = dp->d_fileno; /* truncate */ 3858 if (dstdp.d_fileno != dp->d_fileno) { 3859 switch (ino64_trunc_error) { 3860 default: 3861 case 0: 3862 break; 3863 case 1: 3864 error = EOVERFLOW; 3865 goto done; 3866 case 2: 3867 dstdp.d_fileno = UINT32_MAX; 3868 break; 3869 } 3870 } 3871 dstdp.d_reclen = sizeof(dstdp) - sizeof(dstdp.d_name) + 3872 ((dp->d_namlen + 1 + 3) &~ 3); 3873 bcopy(dp->d_name, dstdp.d_name, dstdp.d_namlen); 3874 bzero(dstdp.d_name + dstdp.d_namlen, 3875 dstdp.d_reclen - offsetof(struct freebsd11_dirent, d_name) - 3876 dstdp.d_namlen); 3877 MPASS(dstdp.d_reclen <= dp->d_reclen); 3878 MPASS(ucount + dstdp.d_reclen <= count); 3879 if (func != NULL) 3880 func(&dstdp); 3881 error = copyout(&dstdp, ubuf + ucount, dstdp.d_reclen); 3882 if (error != 0) 3883 break; 3884 dp = (struct dirent *)((char *)dp + dp->d_reclen); 3885 ucount += dstdp.d_reclen; 3886 } 3887 3888 done: 3889 free(dirbuf, M_TEMP); 3890 if (error == 0) 3891 td->td_retval[0] = ucount; 3892 return (error); 3893 } 3894 #endif /* COMPAT */ 3895 3896 #ifdef COMPAT_43 3897 static void 3898 ogetdirentries_cvt(struct freebsd11_dirent *dp) 3899 { 3900 #if (BYTE_ORDER == LITTLE_ENDIAN) 3901 /* 3902 * The expected low byte of dp->d_namlen is our dp->d_type. 3903 * The high MBZ byte of dp->d_namlen is our dp->d_namlen. 3904 */ 3905 dp->d_type = dp->d_namlen; 3906 dp->d_namlen = 0; 3907 #else 3908 /* 3909 * The dp->d_type is the high byte of the expected dp->d_namlen, 3910 * so must be zero'ed. 3911 */ 3912 dp->d_type = 0; 3913 #endif 3914 } 3915 3916 /* 3917 * Read a block of directory entries in a filesystem independent format. 3918 */ 3919 #ifndef _SYS_SYSPROTO_H_ 3920 struct ogetdirentries_args { 3921 int fd; 3922 char *buf; 3923 u_int count; 3924 long *basep; 3925 }; 3926 #endif 3927 int 3928 ogetdirentries(struct thread *td, struct ogetdirentries_args *uap) 3929 { 3930 long loff; 3931 int error; 3932 3933 error = kern_ogetdirentries(td, uap, &loff); 3934 if (error == 0) 3935 error = copyout(&loff, uap->basep, sizeof(long)); 3936 return (error); 3937 } 3938 3939 int 3940 kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap, 3941 long *ploff) 3942 { 3943 long base; 3944 int error; 3945 3946 /* XXX arbitrary sanity limit on `count'. */ 3947 if (uap->count > 64 * 1024) 3948 return (EINVAL); 3949 3950 error = freebsd11_kern_getdirentries(td, uap->fd, uap->buf, uap->count, 3951 &base, ogetdirentries_cvt); 3952 3953 if (error == 0 && uap->basep != NULL) 3954 error = copyout(&base, uap->basep, sizeof(long)); 3955 3956 return (error); 3957 } 3958 #endif /* COMPAT_43 */ 3959 3960 #if defined(COMPAT_FREEBSD11) 3961 #ifndef _SYS_SYSPROTO_H_ 3962 struct freebsd11_getdirentries_args { 3963 int fd; 3964 char *buf; 3965 u_int count; 3966 long *basep; 3967 }; 3968 #endif 3969 int 3970 freebsd11_getdirentries(struct thread *td, 3971 struct freebsd11_getdirentries_args *uap) 3972 { 3973 long base; 3974 int error; 3975 3976 error = freebsd11_kern_getdirentries(td, uap->fd, uap->buf, uap->count, 3977 &base, NULL); 3978 3979 if (error == 0 && uap->basep != NULL) 3980 error = copyout(&base, uap->basep, sizeof(long)); 3981 return (error); 3982 } 3983 3984 int 3985 freebsd11_getdents(struct thread *td, struct freebsd11_getdents_args *uap) 3986 { 3987 struct freebsd11_getdirentries_args ap; 3988 3989 ap.fd = uap->fd; 3990 ap.buf = uap->buf; 3991 ap.count = uap->count; 3992 ap.basep = NULL; 3993 return (freebsd11_getdirentries(td, &ap)); 3994 } 3995 #endif /* COMPAT_FREEBSD11 */ 3996 3997 /* 3998 * Read a block of directory entries in a filesystem independent format. 3999 */ 4000 int 4001 sys_getdirentries(struct thread *td, struct getdirentries_args *uap) 4002 { 4003 off_t base; 4004 int error; 4005 4006 error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base, 4007 NULL, UIO_USERSPACE); 4008 if (error != 0) 4009 return (error); 4010 if (uap->basep != NULL) 4011 error = copyout(&base, uap->basep, sizeof(off_t)); 4012 return (error); 4013 } 4014 4015 int 4016 kern_getdirentries(struct thread *td, int fd, char *buf, size_t count, 4017 off_t *basep, ssize_t *residp, enum uio_seg bufseg) 4018 { 4019 struct vnode *vp; 4020 struct file *fp; 4021 struct uio auio; 4022 struct iovec aiov; 4023 off_t loff; 4024 int error, eofflag; 4025 off_t foffset; 4026 4027 AUDIT_ARG_FD(fd); 4028 if (count > IOSIZE_MAX) 4029 return (EINVAL); 4030 auio.uio_resid = count; 4031 error = getvnode(td, fd, &cap_read_rights, &fp); 4032 if (error != 0) 4033 return (error); 4034 if ((fp->f_flag & FREAD) == 0) { 4035 fdrop(fp, td); 4036 return (EBADF); 4037 } 4038 vp = fp->f_vnode; 4039 foffset = foffset_lock(fp, 0); 4040 unionread: 4041 if (vp->v_type != VDIR) { 4042 error = EINVAL; 4043 goto fail; 4044 } 4045 aiov.iov_base = buf; 4046 aiov.iov_len = count; 4047 auio.uio_iov = &aiov; 4048 auio.uio_iovcnt = 1; 4049 auio.uio_rw = UIO_READ; 4050 auio.uio_segflg = bufseg; 4051 auio.uio_td = td; 4052 vn_lock(vp, LK_SHARED | LK_RETRY); 4053 AUDIT_ARG_VNODE1(vp); 4054 loff = auio.uio_offset = foffset; 4055 #ifdef MAC 4056 error = mac_vnode_check_readdir(td->td_ucred, vp); 4057 if (error == 0) 4058 #endif 4059 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, 4060 NULL); 4061 foffset = auio.uio_offset; 4062 if (error != 0) { 4063 VOP_UNLOCK(vp, 0); 4064 goto fail; 4065 } 4066 if (count == auio.uio_resid && 4067 (vp->v_vflag & VV_ROOT) && 4068 (vp->v_mount->mnt_flag & MNT_UNION)) { 4069 struct vnode *tvp = vp; 4070 4071 vp = vp->v_mount->mnt_vnodecovered; 4072 VREF(vp); 4073 fp->f_vnode = vp; 4074 fp->f_data = vp; 4075 foffset = 0; 4076 vput(tvp); 4077 goto unionread; 4078 } 4079 VOP_UNLOCK(vp, 0); 4080 *basep = loff; 4081 if (residp != NULL) 4082 *residp = auio.uio_resid; 4083 td->td_retval[0] = count - auio.uio_resid; 4084 fail: 4085 foffset_unlock(fp, foffset, 0); 4086 fdrop(fp, td); 4087 return (error); 4088 } 4089 4090 /* 4091 * Set the mode mask for creation of filesystem nodes. 4092 */ 4093 #ifndef _SYS_SYSPROTO_H_ 4094 struct umask_args { 4095 int newmask; 4096 }; 4097 #endif 4098 int 4099 sys_umask(struct thread *td, struct umask_args *uap) 4100 { 4101 struct filedesc *fdp; 4102 4103 fdp = td->td_proc->p_fd; 4104 FILEDESC_XLOCK(fdp); 4105 td->td_retval[0] = fdp->fd_cmask; 4106 fdp->fd_cmask = uap->newmask & ALLPERMS; 4107 FILEDESC_XUNLOCK(fdp); 4108 return (0); 4109 } 4110 4111 /* 4112 * Void all references to file by ripping underlying filesystem away from 4113 * vnode. 4114 */ 4115 #ifndef _SYS_SYSPROTO_H_ 4116 struct revoke_args { 4117 char *path; 4118 }; 4119 #endif 4120 int 4121 sys_revoke(struct thread *td, struct revoke_args *uap) 4122 { 4123 struct vnode *vp; 4124 struct vattr vattr; 4125 struct nameidata nd; 4126 int error; 4127 4128 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4129 uap->path, td); 4130 if ((error = namei(&nd)) != 0) 4131 return (error); 4132 vp = nd.ni_vp; 4133 NDFREE(&nd, NDF_ONLY_PNBUF); 4134 if (vp->v_type != VCHR || vp->v_rdev == NULL) { 4135 error = EINVAL; 4136 goto out; 4137 } 4138 #ifdef MAC 4139 error = mac_vnode_check_revoke(td->td_ucred, vp); 4140 if (error != 0) 4141 goto out; 4142 #endif 4143 error = VOP_GETATTR(vp, &vattr, td->td_ucred); 4144 if (error != 0) 4145 goto out; 4146 if (td->td_ucred->cr_uid != vattr.va_uid) { 4147 error = priv_check(td, PRIV_VFS_ADMIN); 4148 if (error != 0) 4149 goto out; 4150 } 4151 if (vcount(vp) > 1) 4152 VOP_REVOKE(vp, REVOKEALL); 4153 out: 4154 vput(vp); 4155 return (error); 4156 } 4157 4158 /* 4159 * Convert a user file descriptor to a kernel file entry and check that, if it 4160 * is a capability, the correct rights are present. A reference on the file 4161 * entry is held upon returning. 4162 */ 4163 int 4164 getvnode(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp) 4165 { 4166 struct file *fp; 4167 int error; 4168 4169 error = fget_unlocked(td->td_proc->p_fd, fd, rightsp, &fp, NULL); 4170 if (error != 0) 4171 return (error); 4172 4173 /* 4174 * The file could be not of the vnode type, or it may be not 4175 * yet fully initialized, in which case the f_vnode pointer 4176 * may be set, but f_ops is still badfileops. E.g., 4177 * devfs_open() transiently create such situation to 4178 * facilitate csw d_fdopen(). 4179 * 4180 * Dupfdopen() handling in kern_openat() installs the 4181 * half-baked file into the process descriptor table, allowing 4182 * other thread to dereference it. Guard against the race by 4183 * checking f_ops. 4184 */ 4185 if (fp->f_vnode == NULL || fp->f_ops == &badfileops) { 4186 fdrop(fp, td); 4187 return (EINVAL); 4188 } 4189 *fpp = fp; 4190 return (0); 4191 } 4192 4193 4194 /* 4195 * Get an (NFS) file handle. 4196 */ 4197 #ifndef _SYS_SYSPROTO_H_ 4198 struct lgetfh_args { 4199 char *fname; 4200 fhandle_t *fhp; 4201 }; 4202 #endif 4203 int 4204 sys_lgetfh(struct thread *td, struct lgetfh_args *uap) 4205 { 4206 4207 return (kern_getfhat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->fname, 4208 UIO_USERSPACE, uap->fhp)); 4209 } 4210 4211 #ifndef _SYS_SYSPROTO_H_ 4212 struct getfh_args { 4213 char *fname; 4214 fhandle_t *fhp; 4215 }; 4216 #endif 4217 int 4218 sys_getfh(struct thread *td, struct getfh_args *uap) 4219 { 4220 4221 return (kern_getfhat(td, 0, AT_FDCWD, uap->fname, UIO_USERSPACE, 4222 uap->fhp)); 4223 } 4224 4225 /* 4226 * syscall for the rpc.lockd to use to translate an open descriptor into 4227 * a NFS file handle. 4228 * 4229 * warning: do not remove the priv_check() call or this becomes one giant 4230 * security hole. 4231 */ 4232 #ifndef _SYS_SYSPROTO_H_ 4233 struct getfhat_args { 4234 int fd; 4235 char *path; 4236 fhandle_t *fhp; 4237 int flags; 4238 }; 4239 #endif 4240 int 4241 sys_getfhat(struct thread *td, struct getfhat_args *uap) 4242 { 4243 4244 if ((uap->flags & ~(AT_SYMLINK_NOFOLLOW | AT_BENEATH)) != 0) 4245 return (EINVAL); 4246 return (kern_getfhat(td, uap->flags, uap->fd, uap->path, UIO_USERSPACE, 4247 uap->fhp)); 4248 } 4249 4250 static int 4251 kern_getfhat(struct thread *td, int flags, int fd, const char *path, 4252 enum uio_seg pathseg, fhandle_t *fhp) 4253 { 4254 struct nameidata nd; 4255 fhandle_t fh; 4256 struct vnode *vp; 4257 int error; 4258 4259 error = priv_check(td, PRIV_VFS_GETFH); 4260 if (error != 0) 4261 return (error); 4262 NDINIT_AT(&nd, LOOKUP, ((flags & AT_SYMLINK_NOFOLLOW) != 0 ? NOFOLLOW : 4263 FOLLOW) | ((flags & AT_BENEATH) != 0 ? BENEATH : 0) | LOCKLEAF | 4264 AUDITVNODE1, pathseg, path, fd, td); 4265 error = namei(&nd); 4266 if (error != 0) 4267 return (error); 4268 NDFREE(&nd, NDF_ONLY_PNBUF); 4269 vp = nd.ni_vp; 4270 bzero(&fh, sizeof(fh)); 4271 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4272 error = VOP_VPTOFH(vp, &fh.fh_fid); 4273 vput(vp); 4274 if (error == 0) 4275 error = copyout(&fh, fhp, sizeof (fh)); 4276 return (error); 4277 } 4278 4279 #ifndef _SYS_SYSPROTO_H_ 4280 struct fhlink_args { 4281 fhandle_t *fhp; 4282 const char *to; 4283 }; 4284 #endif 4285 int 4286 sys_fhlink(struct thread *td, struct fhlink_args *uap) 4287 { 4288 4289 return (kern_fhlinkat(td, AT_FDCWD, uap->to, UIO_USERSPACE, uap->fhp)); 4290 } 4291 4292 #ifndef _SYS_SYSPROTO_H_ 4293 struct fhlinkat_args { 4294 fhandle_t *fhp; 4295 int tofd; 4296 const char *to; 4297 }; 4298 #endif 4299 int 4300 sys_fhlinkat(struct thread *td, struct fhlinkat_args *uap) 4301 { 4302 4303 return (kern_fhlinkat(td, uap->tofd, uap->to, UIO_USERSPACE, uap->fhp)); 4304 } 4305 4306 static int 4307 kern_fhlinkat(struct thread *td, int fd, const char *path, 4308 enum uio_seg pathseg, fhandle_t *fhp) 4309 { 4310 fhandle_t fh; 4311 struct mount *mp; 4312 struct vnode *vp; 4313 int error; 4314 4315 error = priv_check(td, PRIV_VFS_GETFH); 4316 if (error != 0) 4317 return (error); 4318 error = copyin(fhp, &fh, sizeof(fh)); 4319 if (error != 0) 4320 return (error); 4321 do { 4322 bwillwrite(); 4323 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4324 return (ESTALE); 4325 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_SHARED, &vp); 4326 vfs_unbusy(mp); 4327 if (error != 0) 4328 return (error); 4329 VOP_UNLOCK(vp, 0); 4330 } while ((error = kern_linkat_vp(td, vp, fd, path, pathseg)) == EAGAIN); 4331 return (error); 4332 } 4333 4334 #ifndef _SYS_SYSPROTO_H_ 4335 struct fhreadlink_args { 4336 fhandle_t *fhp; 4337 char *buf; 4338 size_t bufsize; 4339 }; 4340 #endif 4341 int 4342 sys_fhreadlink(struct thread *td, struct fhreadlink_args *uap) 4343 { 4344 fhandle_t fh; 4345 struct mount *mp; 4346 struct vnode *vp; 4347 int error; 4348 4349 error = priv_check(td, PRIV_VFS_GETFH); 4350 if (error != 0) 4351 return (error); 4352 if (uap->bufsize > IOSIZE_MAX) 4353 return (EINVAL); 4354 error = copyin(uap->fhp, &fh, sizeof(fh)); 4355 if (error != 0) 4356 return (error); 4357 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4358 return (ESTALE); 4359 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_SHARED, &vp); 4360 vfs_unbusy(mp); 4361 if (error != 0) 4362 return (error); 4363 error = kern_readlink_vp(vp, uap->buf, UIO_USERSPACE, uap->bufsize, td); 4364 vput(vp); 4365 return (error); 4366 } 4367 4368 /* 4369 * syscall for the rpc.lockd to use to translate a NFS file handle into an 4370 * open descriptor. 4371 * 4372 * warning: do not remove the priv_check() call or this becomes one giant 4373 * security hole. 4374 */ 4375 #ifndef _SYS_SYSPROTO_H_ 4376 struct fhopen_args { 4377 const struct fhandle *u_fhp; 4378 int flags; 4379 }; 4380 #endif 4381 int 4382 sys_fhopen(struct thread *td, struct fhopen_args *uap) 4383 { 4384 struct mount *mp; 4385 struct vnode *vp; 4386 struct fhandle fhp; 4387 struct file *fp; 4388 int fmode, error; 4389 int indx; 4390 4391 error = priv_check(td, PRIV_VFS_FHOPEN); 4392 if (error != 0) 4393 return (error); 4394 indx = -1; 4395 fmode = FFLAGS(uap->flags); 4396 /* why not allow a non-read/write open for our lockd? */ 4397 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 4398 return (EINVAL); 4399 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 4400 if (error != 0) 4401 return(error); 4402 /* find the mount point */ 4403 mp = vfs_busyfs(&fhp.fh_fsid); 4404 if (mp == NULL) 4405 return (ESTALE); 4406 /* now give me my vnode, it gets returned to me locked */ 4407 error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp); 4408 vfs_unbusy(mp); 4409 if (error != 0) 4410 return (error); 4411 4412 error = falloc_noinstall(td, &fp); 4413 if (error != 0) { 4414 vput(vp); 4415 return (error); 4416 } 4417 /* 4418 * An extra reference on `fp' has been held for us by 4419 * falloc_noinstall(). 4420 */ 4421 4422 #ifdef INVARIANTS 4423 td->td_dupfd = -1; 4424 #endif 4425 error = vn_open_vnode(vp, fmode, td->td_ucred, td, fp); 4426 if (error != 0) { 4427 KASSERT(fp->f_ops == &badfileops, 4428 ("VOP_OPEN in fhopen() set f_ops")); 4429 KASSERT(td->td_dupfd < 0, 4430 ("fhopen() encountered fdopen()")); 4431 4432 vput(vp); 4433 goto bad; 4434 } 4435 #ifdef INVARIANTS 4436 td->td_dupfd = 0; 4437 #endif 4438 fp->f_vnode = vp; 4439 fp->f_seqcount = 1; 4440 finit(fp, (fmode & FMASK) | (fp->f_flag & FHASLOCK), DTYPE_VNODE, vp, 4441 &vnops); 4442 VOP_UNLOCK(vp, 0); 4443 if ((fmode & O_TRUNC) != 0) { 4444 error = fo_truncate(fp, 0, td->td_ucred, td); 4445 if (error != 0) 4446 goto bad; 4447 } 4448 4449 error = finstall(td, fp, &indx, fmode, NULL); 4450 bad: 4451 fdrop(fp, td); 4452 td->td_retval[0] = indx; 4453 return (error); 4454 } 4455 4456 /* 4457 * Stat an (NFS) file handle. 4458 */ 4459 #ifndef _SYS_SYSPROTO_H_ 4460 struct fhstat_args { 4461 struct fhandle *u_fhp; 4462 struct stat *sb; 4463 }; 4464 #endif 4465 int 4466 sys_fhstat(struct thread *td, struct fhstat_args *uap) 4467 { 4468 struct stat sb; 4469 struct fhandle fh; 4470 int error; 4471 4472 error = copyin(uap->u_fhp, &fh, sizeof(fh)); 4473 if (error != 0) 4474 return (error); 4475 error = kern_fhstat(td, fh, &sb); 4476 if (error == 0) 4477 error = copyout(&sb, uap->sb, sizeof(sb)); 4478 return (error); 4479 } 4480 4481 int 4482 kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb) 4483 { 4484 struct mount *mp; 4485 struct vnode *vp; 4486 int error; 4487 4488 error = priv_check(td, PRIV_VFS_FHSTAT); 4489 if (error != 0) 4490 return (error); 4491 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4492 return (ESTALE); 4493 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4494 vfs_unbusy(mp); 4495 if (error != 0) 4496 return (error); 4497 error = vn_stat(vp, sb, td->td_ucred, NOCRED, td); 4498 vput(vp); 4499 return (error); 4500 } 4501 4502 /* 4503 * Implement fstatfs() for (NFS) file handles. 4504 */ 4505 #ifndef _SYS_SYSPROTO_H_ 4506 struct fhstatfs_args { 4507 struct fhandle *u_fhp; 4508 struct statfs *buf; 4509 }; 4510 #endif 4511 int 4512 sys_fhstatfs(struct thread *td, struct fhstatfs_args *uap) 4513 { 4514 struct statfs *sfp; 4515 fhandle_t fh; 4516 int error; 4517 4518 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4519 if (error != 0) 4520 return (error); 4521 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 4522 error = kern_fhstatfs(td, fh, sfp); 4523 if (error == 0) 4524 error = copyout(sfp, uap->buf, sizeof(*sfp)); 4525 free(sfp, M_STATFS); 4526 return (error); 4527 } 4528 4529 int 4530 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf) 4531 { 4532 struct mount *mp; 4533 struct vnode *vp; 4534 int error; 4535 4536 error = priv_check(td, PRIV_VFS_FHSTATFS); 4537 if (error != 0) 4538 return (error); 4539 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4540 return (ESTALE); 4541 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4542 if (error != 0) { 4543 vfs_unbusy(mp); 4544 return (error); 4545 } 4546 vput(vp); 4547 error = prison_canseemount(td->td_ucred, mp); 4548 if (error != 0) 4549 goto out; 4550 #ifdef MAC 4551 error = mac_mount_check_stat(td->td_ucred, mp); 4552 if (error != 0) 4553 goto out; 4554 #endif 4555 error = VFS_STATFS(mp, buf); 4556 out: 4557 vfs_unbusy(mp); 4558 return (error); 4559 } 4560 4561 int 4562 kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len) 4563 { 4564 struct file *fp; 4565 struct mount *mp; 4566 struct vnode *vp; 4567 off_t olen, ooffset; 4568 int error; 4569 #ifdef AUDIT 4570 int audited_vnode1 = 0; 4571 #endif 4572 4573 AUDIT_ARG_FD(fd); 4574 if (offset < 0 || len <= 0) 4575 return (EINVAL); 4576 /* Check for wrap. */ 4577 if (offset > OFF_MAX - len) 4578 return (EFBIG); 4579 AUDIT_ARG_FD(fd); 4580 error = fget(td, fd, &cap_pwrite_rights, &fp); 4581 if (error != 0) 4582 return (error); 4583 AUDIT_ARG_FILE(td->td_proc, fp); 4584 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { 4585 error = ESPIPE; 4586 goto out; 4587 } 4588 if ((fp->f_flag & FWRITE) == 0) { 4589 error = EBADF; 4590 goto out; 4591 } 4592 if (fp->f_type != DTYPE_VNODE) { 4593 error = ENODEV; 4594 goto out; 4595 } 4596 vp = fp->f_vnode; 4597 if (vp->v_type != VREG) { 4598 error = ENODEV; 4599 goto out; 4600 } 4601 4602 /* Allocating blocks may take a long time, so iterate. */ 4603 for (;;) { 4604 olen = len; 4605 ooffset = offset; 4606 4607 bwillwrite(); 4608 mp = NULL; 4609 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 4610 if (error != 0) 4611 break; 4612 error = vn_lock(vp, LK_EXCLUSIVE); 4613 if (error != 0) { 4614 vn_finished_write(mp); 4615 break; 4616 } 4617 #ifdef AUDIT 4618 if (!audited_vnode1) { 4619 AUDIT_ARG_VNODE1(vp); 4620 audited_vnode1 = 1; 4621 } 4622 #endif 4623 #ifdef MAC 4624 error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp); 4625 if (error == 0) 4626 #endif 4627 error = VOP_ALLOCATE(vp, &offset, &len); 4628 VOP_UNLOCK(vp, 0); 4629 vn_finished_write(mp); 4630 4631 if (olen + ooffset != offset + len) { 4632 panic("offset + len changed from %jx/%jx to %jx/%jx", 4633 ooffset, olen, offset, len); 4634 } 4635 if (error != 0 || len == 0) 4636 break; 4637 KASSERT(olen > len, ("Iteration did not make progress?")); 4638 maybe_yield(); 4639 } 4640 out: 4641 fdrop(fp, td); 4642 return (error); 4643 } 4644 4645 int 4646 sys_posix_fallocate(struct thread *td, struct posix_fallocate_args *uap) 4647 { 4648 int error; 4649 4650 error = kern_posix_fallocate(td, uap->fd, uap->offset, uap->len); 4651 return (kern_posix_error(td, error)); 4652 } 4653 4654 /* 4655 * Unlike madvise(2), we do not make a best effort to remember every 4656 * possible caching hint. Instead, we remember the last setting with 4657 * the exception that we will allow POSIX_FADV_NORMAL to adjust the 4658 * region of any current setting. 4659 */ 4660 int 4661 kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len, 4662 int advice) 4663 { 4664 struct fadvise_info *fa, *new; 4665 struct file *fp; 4666 struct vnode *vp; 4667 off_t end; 4668 int error; 4669 4670 if (offset < 0 || len < 0 || offset > OFF_MAX - len) 4671 return (EINVAL); 4672 AUDIT_ARG_VALUE(advice); 4673 switch (advice) { 4674 case POSIX_FADV_SEQUENTIAL: 4675 case POSIX_FADV_RANDOM: 4676 case POSIX_FADV_NOREUSE: 4677 new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK); 4678 break; 4679 case POSIX_FADV_NORMAL: 4680 case POSIX_FADV_WILLNEED: 4681 case POSIX_FADV_DONTNEED: 4682 new = NULL; 4683 break; 4684 default: 4685 return (EINVAL); 4686 } 4687 /* XXX: CAP_POSIX_FADVISE? */ 4688 AUDIT_ARG_FD(fd); 4689 error = fget(td, fd, &cap_no_rights, &fp); 4690 if (error != 0) 4691 goto out; 4692 AUDIT_ARG_FILE(td->td_proc, fp); 4693 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { 4694 error = ESPIPE; 4695 goto out; 4696 } 4697 if (fp->f_type != DTYPE_VNODE) { 4698 error = ENODEV; 4699 goto out; 4700 } 4701 vp = fp->f_vnode; 4702 if (vp->v_type != VREG) { 4703 error = ENODEV; 4704 goto out; 4705 } 4706 if (len == 0) 4707 end = OFF_MAX; 4708 else 4709 end = offset + len - 1; 4710 switch (advice) { 4711 case POSIX_FADV_SEQUENTIAL: 4712 case POSIX_FADV_RANDOM: 4713 case POSIX_FADV_NOREUSE: 4714 /* 4715 * Try to merge any existing non-standard region with 4716 * this new region if possible, otherwise create a new 4717 * non-standard region for this request. 4718 */ 4719 mtx_pool_lock(mtxpool_sleep, fp); 4720 fa = fp->f_advice; 4721 if (fa != NULL && fa->fa_advice == advice && 4722 ((fa->fa_start <= end && fa->fa_end >= offset) || 4723 (end != OFF_MAX && fa->fa_start == end + 1) || 4724 (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) { 4725 if (offset < fa->fa_start) 4726 fa->fa_start = offset; 4727 if (end > fa->fa_end) 4728 fa->fa_end = end; 4729 } else { 4730 new->fa_advice = advice; 4731 new->fa_start = offset; 4732 new->fa_end = end; 4733 fp->f_advice = new; 4734 new = fa; 4735 } 4736 mtx_pool_unlock(mtxpool_sleep, fp); 4737 break; 4738 case POSIX_FADV_NORMAL: 4739 /* 4740 * If a the "normal" region overlaps with an existing 4741 * non-standard region, trim or remove the 4742 * non-standard region. 4743 */ 4744 mtx_pool_lock(mtxpool_sleep, fp); 4745 fa = fp->f_advice; 4746 if (fa != NULL) { 4747 if (offset <= fa->fa_start && end >= fa->fa_end) { 4748 new = fa; 4749 fp->f_advice = NULL; 4750 } else if (offset <= fa->fa_start && 4751 end >= fa->fa_start) 4752 fa->fa_start = end + 1; 4753 else if (offset <= fa->fa_end && end >= fa->fa_end) 4754 fa->fa_end = offset - 1; 4755 else if (offset >= fa->fa_start && end <= fa->fa_end) { 4756 /* 4757 * If the "normal" region is a middle 4758 * portion of the existing 4759 * non-standard region, just remove 4760 * the whole thing rather than picking 4761 * one side or the other to 4762 * preserve. 4763 */ 4764 new = fa; 4765 fp->f_advice = NULL; 4766 } 4767 } 4768 mtx_pool_unlock(mtxpool_sleep, fp); 4769 break; 4770 case POSIX_FADV_WILLNEED: 4771 case POSIX_FADV_DONTNEED: 4772 error = VOP_ADVISE(vp, offset, end, advice); 4773 break; 4774 } 4775 out: 4776 if (fp != NULL) 4777 fdrop(fp, td); 4778 free(new, M_FADVISE); 4779 return (error); 4780 } 4781 4782 int 4783 sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap) 4784 { 4785 int error; 4786 4787 error = kern_posix_fadvise(td, uap->fd, uap->offset, uap->len, 4788 uap->advice); 4789 return (kern_posix_error(td, error)); 4790 } 4791 4792 int 4793 kern_copy_file_range(struct thread *td, int infd, off_t *inoffp, int outfd, 4794 off_t *outoffp, size_t len, unsigned int flags) 4795 { 4796 struct file *infp, *outfp; 4797 struct vnode *invp, *outvp; 4798 int error; 4799 size_t retlen; 4800 void *rl_rcookie, *rl_wcookie; 4801 off_t savinoff, savoutoff; 4802 4803 infp = outfp = NULL; 4804 rl_rcookie = rl_wcookie = NULL; 4805 savinoff = -1; 4806 error = 0; 4807 retlen = 0; 4808 4809 if (flags != 0) { 4810 error = EINVAL; 4811 goto out; 4812 } 4813 if (len > SSIZE_MAX) 4814 /* 4815 * Although the len argument is size_t, the return argument 4816 * is ssize_t (which is signed). Therefore a size that won't 4817 * fit in ssize_t can't be returned. 4818 */ 4819 len = SSIZE_MAX; 4820 4821 /* Get the file structures for the file descriptors. */ 4822 error = fget_read(td, infd, &cap_read_rights, &infp); 4823 if (error != 0) 4824 goto out; 4825 error = fget_write(td, outfd, &cap_write_rights, &outfp); 4826 if (error != 0) 4827 goto out; 4828 4829 /* Set the offset pointers to the correct place. */ 4830 if (inoffp == NULL) 4831 inoffp = &infp->f_offset; 4832 if (outoffp == NULL) 4833 outoffp = &outfp->f_offset; 4834 savinoff = *inoffp; 4835 savoutoff = *outoffp; 4836 4837 invp = infp->f_vnode; 4838 outvp = outfp->f_vnode; 4839 /* Sanity check the f_flag bits. */ 4840 if ((outfp->f_flag & (FWRITE | FAPPEND)) != FWRITE || 4841 (infp->f_flag & FREAD) == 0 || invp == outvp) { 4842 error = EBADF; 4843 goto out; 4844 } 4845 4846 /* If len == 0, just return 0. */ 4847 if (len == 0) 4848 goto out; 4849 4850 /* Range lock the byte ranges for both invp and outvp. */ 4851 for (;;) { 4852 rl_wcookie = vn_rangelock_wlock(outvp, *outoffp, *outoffp + 4853 len); 4854 rl_rcookie = vn_rangelock_tryrlock(invp, *inoffp, *inoffp + 4855 len); 4856 if (rl_rcookie != NULL) 4857 break; 4858 vn_rangelock_unlock(outvp, rl_wcookie); 4859 rl_rcookie = vn_rangelock_rlock(invp, *inoffp, *inoffp + len); 4860 vn_rangelock_unlock(invp, rl_rcookie); 4861 } 4862 4863 retlen = len; 4864 error = vn_copy_file_range(invp, inoffp, outvp, outoffp, &retlen, 4865 flags, infp->f_cred, outfp->f_cred, td); 4866 out: 4867 if (rl_rcookie != NULL) 4868 vn_rangelock_unlock(invp, rl_rcookie); 4869 if (rl_wcookie != NULL) 4870 vn_rangelock_unlock(outvp, rl_wcookie); 4871 if (savinoff != -1 && (error == EINTR || error == ERESTART)) { 4872 *inoffp = savinoff; 4873 *outoffp = savoutoff; 4874 } 4875 if (outfp != NULL) 4876 fdrop(outfp, td); 4877 if (infp != NULL) 4878 fdrop(infp, td); 4879 td->td_retval[0] = retlen; 4880 return (error); 4881 } 4882 4883 int 4884 sys_copy_file_range(struct thread *td, struct copy_file_range_args *uap) 4885 { 4886 off_t inoff, outoff, *inoffp, *outoffp; 4887 int error; 4888 4889 inoffp = outoffp = NULL; 4890 if (uap->inoffp != NULL) { 4891 error = copyin(uap->inoffp, &inoff, sizeof(off_t)); 4892 if (error != 0) 4893 return (error); 4894 inoffp = &inoff; 4895 } 4896 if (uap->outoffp != NULL) { 4897 error = copyin(uap->outoffp, &outoff, sizeof(off_t)); 4898 if (error != 0) 4899 return (error); 4900 outoffp = &outoff; 4901 } 4902 error = kern_copy_file_range(td, uap->infd, inoffp, uap->outfd, 4903 outoffp, uap->len, uap->flags); 4904 if (error == 0 && uap->inoffp != NULL) 4905 error = copyout(inoffp, uap->inoffp, sizeof(off_t)); 4906 if (error == 0 && uap->outoffp != NULL) 4907 error = copyout(outoffp, uap->outoffp, sizeof(off_t)); 4908 return (error); 4909 } 4910