1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 37 */ 38 39 #include <sys/cdefs.h> 40 __FBSDID("$FreeBSD$"); 41 42 #include "opt_capsicum.h" 43 #include "opt_ktrace.h" 44 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 #include <sys/bio.h> 48 #include <sys/buf.h> 49 #include <sys/capsicum.h> 50 #include <sys/disk.h> 51 #include <sys/sysent.h> 52 #include <sys/malloc.h> 53 #include <sys/mount.h> 54 #include <sys/mutex.h> 55 #include <sys/sysproto.h> 56 #include <sys/namei.h> 57 #include <sys/filedesc.h> 58 #include <sys/kernel.h> 59 #include <sys/fcntl.h> 60 #include <sys/file.h> 61 #include <sys/filio.h> 62 #include <sys/limits.h> 63 #include <sys/linker.h> 64 #include <sys/rwlock.h> 65 #include <sys/sdt.h> 66 #include <sys/stat.h> 67 #include <sys/sx.h> 68 #include <sys/unistd.h> 69 #include <sys/vnode.h> 70 #include <sys/priv.h> 71 #include <sys/proc.h> 72 #include <sys/dirent.h> 73 #include <sys/jail.h> 74 #include <sys/syscallsubr.h> 75 #include <sys/sysctl.h> 76 #ifdef KTRACE 77 #include <sys/ktrace.h> 78 #endif 79 80 #include <machine/stdarg.h> 81 82 #include <security/audit/audit.h> 83 #include <security/mac/mac_framework.h> 84 85 #include <vm/vm.h> 86 #include <vm/vm_object.h> 87 #include <vm/vm_page.h> 88 #include <vm/uma.h> 89 90 #include <ufs/ufs/quota.h> 91 92 MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information"); 93 94 SDT_PROVIDER_DEFINE(vfs); 95 SDT_PROBE_DEFINE2(vfs, , stat, mode, "char *", "int"); 96 SDT_PROBE_DEFINE2(vfs, , stat, reg, "char *", "int"); 97 98 static int kern_chflagsat(struct thread *td, int fd, const char *path, 99 enum uio_seg pathseg, u_long flags, int atflag); 100 static int setfflags(struct thread *td, struct vnode *, u_long); 101 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *); 102 static int getutimens(const struct timespec *, enum uio_seg, 103 struct timespec *, int *); 104 static int setutimes(struct thread *td, struct vnode *, 105 const struct timespec *, int, int); 106 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 107 struct thread *td); 108 109 /* 110 * Sync each mounted filesystem. 111 */ 112 #ifndef _SYS_SYSPROTO_H_ 113 struct sync_args { 114 int dummy; 115 }; 116 #endif 117 /* ARGSUSED */ 118 int 119 sys_sync(struct thread *td, struct sync_args *uap) 120 { 121 struct mount *mp, *nmp; 122 int save; 123 124 mtx_lock(&mountlist_mtx); 125 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 126 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 127 nmp = TAILQ_NEXT(mp, mnt_list); 128 continue; 129 } 130 if ((mp->mnt_flag & MNT_RDONLY) == 0 && 131 vn_start_write(NULL, &mp, V_NOWAIT) == 0) { 132 save = curthread_pflags_set(TDP_SYNCIO); 133 vfs_msync(mp, MNT_NOWAIT); 134 VFS_SYNC(mp, MNT_NOWAIT); 135 curthread_pflags_restore(save); 136 vn_finished_write(mp); 137 } 138 mtx_lock(&mountlist_mtx); 139 nmp = TAILQ_NEXT(mp, mnt_list); 140 vfs_unbusy(mp); 141 } 142 mtx_unlock(&mountlist_mtx); 143 return (0); 144 } 145 146 /* 147 * Change filesystem quotas. 148 */ 149 #ifndef _SYS_SYSPROTO_H_ 150 struct quotactl_args { 151 char *path; 152 int cmd; 153 int uid; 154 caddr_t arg; 155 }; 156 #endif 157 int 158 sys_quotactl(struct thread *td, struct quotactl_args *uap) 159 { 160 struct mount *mp; 161 struct nameidata nd; 162 int error; 163 164 AUDIT_ARG_CMD(uap->cmd); 165 AUDIT_ARG_UID(uap->uid); 166 if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS)) 167 return (EPERM); 168 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 169 uap->path, td); 170 if ((error = namei(&nd)) != 0) 171 return (error); 172 NDFREE(&nd, NDF_ONLY_PNBUF); 173 mp = nd.ni_vp->v_mount; 174 vfs_ref(mp); 175 vput(nd.ni_vp); 176 error = vfs_busy(mp, 0); 177 vfs_rel(mp); 178 if (error != 0) 179 return (error); 180 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg); 181 182 /* 183 * Since quota on operation typically needs to open quota 184 * file, the Q_QUOTAON handler needs to unbusy the mount point 185 * before calling into namei. Otherwise, unmount might be 186 * started between two vfs_busy() invocations (first is our, 187 * second is from mount point cross-walk code in lookup()), 188 * causing deadlock. 189 * 190 * Require that Q_QUOTAON handles the vfs_busy() reference on 191 * its own, always returning with ubusied mount point. 192 */ 193 if ((uap->cmd >> SUBCMDSHIFT) != Q_QUOTAON && 194 (uap->cmd >> SUBCMDSHIFT) != Q_QUOTAOFF) 195 vfs_unbusy(mp); 196 return (error); 197 } 198 199 /* 200 * Used by statfs conversion routines to scale the block size up if 201 * necessary so that all of the block counts are <= 'max_size'. Note 202 * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero 203 * value of 'n'. 204 */ 205 void 206 statfs_scale_blocks(struct statfs *sf, long max_size) 207 { 208 uint64_t count; 209 int shift; 210 211 KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__)); 212 213 /* 214 * Attempt to scale the block counts to give a more accurate 215 * overview to userland of the ratio of free space to used 216 * space. To do this, find the largest block count and compute 217 * a divisor that lets it fit into a signed integer <= max_size. 218 */ 219 if (sf->f_bavail < 0) 220 count = -sf->f_bavail; 221 else 222 count = sf->f_bavail; 223 count = MAX(sf->f_blocks, MAX(sf->f_bfree, count)); 224 if (count <= max_size) 225 return; 226 227 count >>= flsl(max_size); 228 shift = 0; 229 while (count > 0) { 230 shift++; 231 count >>=1; 232 } 233 234 sf->f_bsize <<= shift; 235 sf->f_blocks >>= shift; 236 sf->f_bfree >>= shift; 237 sf->f_bavail >>= shift; 238 } 239 240 static int 241 kern_do_statfs(struct thread *td, struct mount *mp, struct statfs *buf) 242 { 243 struct statfs *sp; 244 int error; 245 246 if (mp == NULL) 247 return (EBADF); 248 error = vfs_busy(mp, 0); 249 vfs_rel(mp); 250 if (error != 0) 251 return (error); 252 #ifdef MAC 253 error = mac_mount_check_stat(td->td_ucred, mp); 254 if (error != 0) 255 goto out; 256 #endif 257 /* 258 * Set these in case the underlying filesystem fails to do so. 259 */ 260 sp = &mp->mnt_stat; 261 sp->f_version = STATFS_VERSION; 262 sp->f_namemax = NAME_MAX; 263 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 264 error = VFS_STATFS(mp, sp); 265 if (error != 0) 266 goto out; 267 *buf = *sp; 268 if (priv_check(td, PRIV_VFS_GENERATION)) { 269 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 270 prison_enforce_statfs(td->td_ucred, mp, buf); 271 } 272 out: 273 vfs_unbusy(mp); 274 return (error); 275 } 276 277 /* 278 * Get filesystem statistics. 279 */ 280 #ifndef _SYS_SYSPROTO_H_ 281 struct statfs_args { 282 char *path; 283 struct statfs *buf; 284 }; 285 #endif 286 int 287 sys_statfs(struct thread *td, struct statfs_args *uap) 288 { 289 struct statfs *sfp; 290 int error; 291 292 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 293 error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); 294 if (error == 0) 295 error = copyout(sfp, uap->buf, sizeof(struct statfs)); 296 free(sfp, M_STATFS); 297 return (error); 298 } 299 300 int 301 kern_statfs(struct thread *td, char *path, enum uio_seg pathseg, 302 struct statfs *buf) 303 { 304 struct mount *mp; 305 struct nameidata nd; 306 int error; 307 308 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 309 pathseg, path, td); 310 error = namei(&nd); 311 if (error != 0) 312 return (error); 313 mp = nd.ni_vp->v_mount; 314 vfs_ref(mp); 315 NDFREE(&nd, NDF_ONLY_PNBUF); 316 vput(nd.ni_vp); 317 return (kern_do_statfs(td, mp, buf)); 318 } 319 320 /* 321 * Get filesystem statistics. 322 */ 323 #ifndef _SYS_SYSPROTO_H_ 324 struct fstatfs_args { 325 int fd; 326 struct statfs *buf; 327 }; 328 #endif 329 int 330 sys_fstatfs(struct thread *td, struct fstatfs_args *uap) 331 { 332 struct statfs *sfp; 333 int error; 334 335 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 336 error = kern_fstatfs(td, uap->fd, sfp); 337 if (error == 0) 338 error = copyout(sfp, uap->buf, sizeof(struct statfs)); 339 free(sfp, M_STATFS); 340 return (error); 341 } 342 343 int 344 kern_fstatfs(struct thread *td, int fd, struct statfs *buf) 345 { 346 struct file *fp; 347 struct mount *mp; 348 struct vnode *vp; 349 int error; 350 351 AUDIT_ARG_FD(fd); 352 error = getvnode(td, fd, &cap_fstatfs_rights, &fp); 353 if (error != 0) 354 return (error); 355 vp = fp->f_vnode; 356 vn_lock(vp, LK_SHARED | LK_RETRY); 357 #ifdef AUDIT 358 AUDIT_ARG_VNODE1(vp); 359 #endif 360 mp = vp->v_mount; 361 if (mp != NULL) 362 vfs_ref(mp); 363 VOP_UNLOCK(vp, 0); 364 fdrop(fp, td); 365 return (kern_do_statfs(td, mp, buf)); 366 } 367 368 /* 369 * Get statistics on all filesystems. 370 */ 371 #ifndef _SYS_SYSPROTO_H_ 372 struct getfsstat_args { 373 struct statfs *buf; 374 long bufsize; 375 int mode; 376 }; 377 #endif 378 int 379 sys_getfsstat(struct thread *td, struct getfsstat_args *uap) 380 { 381 size_t count; 382 int error; 383 384 if (uap->bufsize < 0 || uap->bufsize > SIZE_MAX) 385 return (EINVAL); 386 error = kern_getfsstat(td, &uap->buf, uap->bufsize, &count, 387 UIO_USERSPACE, uap->mode); 388 if (error == 0) 389 td->td_retval[0] = count; 390 return (error); 391 } 392 393 /* 394 * If (bufsize > 0 && bufseg == UIO_SYSSPACE) 395 * The caller is responsible for freeing memory which will be allocated 396 * in '*buf'. 397 */ 398 int 399 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize, 400 size_t *countp, enum uio_seg bufseg, int mode) 401 { 402 struct mount *mp, *nmp; 403 struct statfs *sfsp, *sp, *sptmp, *tofree; 404 size_t count, maxcount; 405 int error; 406 407 switch (mode) { 408 case MNT_WAIT: 409 case MNT_NOWAIT: 410 break; 411 default: 412 if (bufseg == UIO_SYSSPACE) 413 *buf = NULL; 414 return (EINVAL); 415 } 416 restart: 417 maxcount = bufsize / sizeof(struct statfs); 418 if (bufsize == 0) { 419 sfsp = NULL; 420 tofree = NULL; 421 } else if (bufseg == UIO_USERSPACE) { 422 sfsp = *buf; 423 tofree = NULL; 424 } else /* if (bufseg == UIO_SYSSPACE) */ { 425 count = 0; 426 mtx_lock(&mountlist_mtx); 427 TAILQ_FOREACH(mp, &mountlist, mnt_list) { 428 count++; 429 } 430 mtx_unlock(&mountlist_mtx); 431 if (maxcount > count) 432 maxcount = count; 433 tofree = sfsp = *buf = malloc(maxcount * sizeof(struct statfs), 434 M_STATFS, M_WAITOK); 435 } 436 count = 0; 437 mtx_lock(&mountlist_mtx); 438 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 439 if (prison_canseemount(td->td_ucred, mp) != 0) { 440 nmp = TAILQ_NEXT(mp, mnt_list); 441 continue; 442 } 443 #ifdef MAC 444 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 445 nmp = TAILQ_NEXT(mp, mnt_list); 446 continue; 447 } 448 #endif 449 if (mode == MNT_WAIT) { 450 if (vfs_busy(mp, MBF_MNTLSTLOCK) != 0) { 451 /* 452 * If vfs_busy() failed, and MBF_NOWAIT 453 * wasn't passed, then the mp is gone. 454 * Furthermore, because of MBF_MNTLSTLOCK, 455 * the mountlist_mtx was dropped. We have 456 * no other choice than to start over. 457 */ 458 mtx_unlock(&mountlist_mtx); 459 free(tofree, M_STATFS); 460 goto restart; 461 } 462 } else { 463 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK) != 0) { 464 nmp = TAILQ_NEXT(mp, mnt_list); 465 continue; 466 } 467 } 468 if (sfsp != NULL && count < maxcount) { 469 sp = &mp->mnt_stat; 470 /* 471 * Set these in case the underlying filesystem 472 * fails to do so. 473 */ 474 sp->f_version = STATFS_VERSION; 475 sp->f_namemax = NAME_MAX; 476 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 477 /* 478 * If MNT_NOWAIT is specified, do not refresh 479 * the fsstat cache. 480 */ 481 if (mode != MNT_NOWAIT) { 482 error = VFS_STATFS(mp, sp); 483 if (error != 0) { 484 mtx_lock(&mountlist_mtx); 485 nmp = TAILQ_NEXT(mp, mnt_list); 486 vfs_unbusy(mp); 487 continue; 488 } 489 } 490 if (priv_check(td, PRIV_VFS_GENERATION)) { 491 sptmp = malloc(sizeof(struct statfs), M_STATFS, 492 M_WAITOK); 493 *sptmp = *sp; 494 sptmp->f_fsid.val[0] = sptmp->f_fsid.val[1] = 0; 495 prison_enforce_statfs(td->td_ucred, mp, sptmp); 496 sp = sptmp; 497 } else 498 sptmp = NULL; 499 if (bufseg == UIO_SYSSPACE) { 500 bcopy(sp, sfsp, sizeof(*sp)); 501 free(sptmp, M_STATFS); 502 } else /* if (bufseg == UIO_USERSPACE) */ { 503 error = copyout(sp, sfsp, sizeof(*sp)); 504 free(sptmp, M_STATFS); 505 if (error != 0) { 506 vfs_unbusy(mp); 507 return (error); 508 } 509 } 510 sfsp++; 511 } 512 count++; 513 mtx_lock(&mountlist_mtx); 514 nmp = TAILQ_NEXT(mp, mnt_list); 515 vfs_unbusy(mp); 516 } 517 mtx_unlock(&mountlist_mtx); 518 if (sfsp != NULL && count > maxcount) 519 *countp = maxcount; 520 else 521 *countp = count; 522 return (0); 523 } 524 525 #ifdef COMPAT_FREEBSD4 526 /* 527 * Get old format filesystem statistics. 528 */ 529 static void freebsd4_cvtstatfs(struct statfs *, struct ostatfs *); 530 531 #ifndef _SYS_SYSPROTO_H_ 532 struct freebsd4_statfs_args { 533 char *path; 534 struct ostatfs *buf; 535 }; 536 #endif 537 int 538 freebsd4_statfs(struct thread *td, struct freebsd4_statfs_args *uap) 539 { 540 struct ostatfs osb; 541 struct statfs *sfp; 542 int error; 543 544 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 545 error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); 546 if (error == 0) { 547 freebsd4_cvtstatfs(sfp, &osb); 548 error = copyout(&osb, uap->buf, sizeof(osb)); 549 } 550 free(sfp, M_STATFS); 551 return (error); 552 } 553 554 /* 555 * Get filesystem statistics. 556 */ 557 #ifndef _SYS_SYSPROTO_H_ 558 struct freebsd4_fstatfs_args { 559 int fd; 560 struct ostatfs *buf; 561 }; 562 #endif 563 int 564 freebsd4_fstatfs(struct thread *td, struct freebsd4_fstatfs_args *uap) 565 { 566 struct ostatfs osb; 567 struct statfs *sfp; 568 int error; 569 570 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 571 error = kern_fstatfs(td, uap->fd, sfp); 572 if (error == 0) { 573 freebsd4_cvtstatfs(sfp, &osb); 574 error = copyout(&osb, uap->buf, sizeof(osb)); 575 } 576 free(sfp, M_STATFS); 577 return (error); 578 } 579 580 /* 581 * Get statistics on all filesystems. 582 */ 583 #ifndef _SYS_SYSPROTO_H_ 584 struct freebsd4_getfsstat_args { 585 struct ostatfs *buf; 586 long bufsize; 587 int mode; 588 }; 589 #endif 590 int 591 freebsd4_getfsstat(struct thread *td, struct freebsd4_getfsstat_args *uap) 592 { 593 struct statfs *buf, *sp; 594 struct ostatfs osb; 595 size_t count, size; 596 int error; 597 598 if (uap->bufsize < 0) 599 return (EINVAL); 600 count = uap->bufsize / sizeof(struct ostatfs); 601 if (count > SIZE_MAX / sizeof(struct statfs)) 602 return (EINVAL); 603 size = count * sizeof(struct statfs); 604 error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, 605 uap->mode); 606 if (error == 0) 607 td->td_retval[0] = count; 608 if (size != 0) { 609 sp = buf; 610 while (count != 0 && error == 0) { 611 freebsd4_cvtstatfs(sp, &osb); 612 error = copyout(&osb, uap->buf, sizeof(osb)); 613 sp++; 614 uap->buf++; 615 count--; 616 } 617 free(buf, M_STATFS); 618 } 619 return (error); 620 } 621 622 /* 623 * Implement fstatfs() for (NFS) file handles. 624 */ 625 #ifndef _SYS_SYSPROTO_H_ 626 struct freebsd4_fhstatfs_args { 627 struct fhandle *u_fhp; 628 struct ostatfs *buf; 629 }; 630 #endif 631 int 632 freebsd4_fhstatfs(struct thread *td, struct freebsd4_fhstatfs_args *uap) 633 { 634 struct ostatfs osb; 635 struct statfs *sfp; 636 fhandle_t fh; 637 int error; 638 639 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 640 if (error != 0) 641 return (error); 642 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 643 error = kern_fhstatfs(td, fh, sfp); 644 if (error == 0) { 645 freebsd4_cvtstatfs(sfp, &osb); 646 error = copyout(&osb, uap->buf, sizeof(osb)); 647 } 648 free(sfp, M_STATFS); 649 return (error); 650 } 651 652 /* 653 * Convert a new format statfs structure to an old format statfs structure. 654 */ 655 static void 656 freebsd4_cvtstatfs(struct statfs *nsp, struct ostatfs *osp) 657 { 658 659 statfs_scale_blocks(nsp, LONG_MAX); 660 bzero(osp, sizeof(*osp)); 661 osp->f_bsize = nsp->f_bsize; 662 osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX); 663 osp->f_blocks = nsp->f_blocks; 664 osp->f_bfree = nsp->f_bfree; 665 osp->f_bavail = nsp->f_bavail; 666 osp->f_files = MIN(nsp->f_files, LONG_MAX); 667 osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX); 668 osp->f_owner = nsp->f_owner; 669 osp->f_type = nsp->f_type; 670 osp->f_flags = nsp->f_flags; 671 osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX); 672 osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX); 673 osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX); 674 osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX); 675 strlcpy(osp->f_fstypename, nsp->f_fstypename, 676 MIN(MFSNAMELEN, OMFSNAMELEN)); 677 strlcpy(osp->f_mntonname, nsp->f_mntonname, 678 MIN(MNAMELEN, OMNAMELEN)); 679 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 680 MIN(MNAMELEN, OMNAMELEN)); 681 osp->f_fsid = nsp->f_fsid; 682 } 683 #endif /* COMPAT_FREEBSD4 */ 684 685 #if defined(COMPAT_FREEBSD11) 686 /* 687 * Get old format filesystem statistics. 688 */ 689 static void freebsd11_cvtstatfs(struct statfs *, struct freebsd11_statfs *); 690 691 int 692 freebsd11_statfs(struct thread *td, struct freebsd11_statfs_args *uap) 693 { 694 struct freebsd11_statfs osb; 695 struct statfs *sfp; 696 int error; 697 698 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 699 error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); 700 if (error == 0) { 701 freebsd11_cvtstatfs(sfp, &osb); 702 error = copyout(&osb, uap->buf, sizeof(osb)); 703 } 704 free(sfp, M_STATFS); 705 return (error); 706 } 707 708 /* 709 * Get filesystem statistics. 710 */ 711 int 712 freebsd11_fstatfs(struct thread *td, struct freebsd11_fstatfs_args *uap) 713 { 714 struct freebsd11_statfs osb; 715 struct statfs *sfp; 716 int error; 717 718 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 719 error = kern_fstatfs(td, uap->fd, sfp); 720 if (error == 0) { 721 freebsd11_cvtstatfs(sfp, &osb); 722 error = copyout(&osb, uap->buf, sizeof(osb)); 723 } 724 free(sfp, M_STATFS); 725 return (error); 726 } 727 728 /* 729 * Get statistics on all filesystems. 730 */ 731 int 732 freebsd11_getfsstat(struct thread *td, struct freebsd11_getfsstat_args *uap) 733 { 734 struct freebsd11_statfs osb; 735 struct statfs *buf, *sp; 736 size_t count, size; 737 int error; 738 739 count = uap->bufsize / sizeof(struct ostatfs); 740 size = count * sizeof(struct statfs); 741 error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, 742 uap->mode); 743 if (error == 0) 744 td->td_retval[0] = count; 745 if (size > 0) { 746 sp = buf; 747 while (count > 0 && error == 0) { 748 freebsd11_cvtstatfs(sp, &osb); 749 error = copyout(&osb, uap->buf, sizeof(osb)); 750 sp++; 751 uap->buf++; 752 count--; 753 } 754 free(buf, M_STATFS); 755 } 756 return (error); 757 } 758 759 /* 760 * Implement fstatfs() for (NFS) file handles. 761 */ 762 int 763 freebsd11_fhstatfs(struct thread *td, struct freebsd11_fhstatfs_args *uap) 764 { 765 struct freebsd11_statfs osb; 766 struct statfs *sfp; 767 fhandle_t fh; 768 int error; 769 770 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 771 if (error) 772 return (error); 773 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 774 error = kern_fhstatfs(td, fh, sfp); 775 if (error == 0) { 776 freebsd11_cvtstatfs(sfp, &osb); 777 error = copyout(&osb, uap->buf, sizeof(osb)); 778 } 779 free(sfp, M_STATFS); 780 return (error); 781 } 782 783 /* 784 * Convert a new format statfs structure to an old format statfs structure. 785 */ 786 static void 787 freebsd11_cvtstatfs(struct statfs *nsp, struct freebsd11_statfs *osp) 788 { 789 790 bzero(osp, sizeof(*osp)); 791 osp->f_version = FREEBSD11_STATFS_VERSION; 792 osp->f_type = nsp->f_type; 793 osp->f_flags = nsp->f_flags; 794 osp->f_bsize = nsp->f_bsize; 795 osp->f_iosize = nsp->f_iosize; 796 osp->f_blocks = nsp->f_blocks; 797 osp->f_bfree = nsp->f_bfree; 798 osp->f_bavail = nsp->f_bavail; 799 osp->f_files = nsp->f_files; 800 osp->f_ffree = nsp->f_ffree; 801 osp->f_syncwrites = nsp->f_syncwrites; 802 osp->f_asyncwrites = nsp->f_asyncwrites; 803 osp->f_syncreads = nsp->f_syncreads; 804 osp->f_asyncreads = nsp->f_asyncreads; 805 osp->f_namemax = nsp->f_namemax; 806 osp->f_owner = nsp->f_owner; 807 osp->f_fsid = nsp->f_fsid; 808 strlcpy(osp->f_fstypename, nsp->f_fstypename, 809 MIN(MFSNAMELEN, sizeof(osp->f_fstypename))); 810 strlcpy(osp->f_mntonname, nsp->f_mntonname, 811 MIN(MNAMELEN, sizeof(osp->f_mntonname))); 812 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 813 MIN(MNAMELEN, sizeof(osp->f_mntfromname))); 814 } 815 #endif /* COMPAT_FREEBSD11 */ 816 817 /* 818 * Change current working directory to a given file descriptor. 819 */ 820 #ifndef _SYS_SYSPROTO_H_ 821 struct fchdir_args { 822 int fd; 823 }; 824 #endif 825 int 826 sys_fchdir(struct thread *td, struct fchdir_args *uap) 827 { 828 struct vnode *vp, *tdp; 829 struct mount *mp; 830 struct file *fp; 831 int error; 832 833 AUDIT_ARG_FD(uap->fd); 834 error = getvnode(td, uap->fd, &cap_fchdir_rights, 835 &fp); 836 if (error != 0) 837 return (error); 838 vp = fp->f_vnode; 839 vrefact(vp); 840 fdrop(fp, td); 841 vn_lock(vp, LK_SHARED | LK_RETRY); 842 AUDIT_ARG_VNODE1(vp); 843 error = change_dir(vp, td); 844 while (!error && (mp = vp->v_mountedhere) != NULL) { 845 if (vfs_busy(mp, 0)) 846 continue; 847 error = VFS_ROOT(mp, LK_SHARED, &tdp); 848 vfs_unbusy(mp); 849 if (error != 0) 850 break; 851 vput(vp); 852 vp = tdp; 853 } 854 if (error != 0) { 855 vput(vp); 856 return (error); 857 } 858 VOP_UNLOCK(vp, 0); 859 pwd_chdir(td, vp); 860 return (0); 861 } 862 863 /* 864 * Change current working directory (``.''). 865 */ 866 #ifndef _SYS_SYSPROTO_H_ 867 struct chdir_args { 868 char *path; 869 }; 870 #endif 871 int 872 sys_chdir(struct thread *td, struct chdir_args *uap) 873 { 874 875 return (kern_chdir(td, uap->path, UIO_USERSPACE)); 876 } 877 878 int 879 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg) 880 { 881 struct nameidata nd; 882 int error; 883 884 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 885 pathseg, path, td); 886 if ((error = namei(&nd)) != 0) 887 return (error); 888 if ((error = change_dir(nd.ni_vp, td)) != 0) { 889 vput(nd.ni_vp); 890 NDFREE(&nd, NDF_ONLY_PNBUF); 891 return (error); 892 } 893 VOP_UNLOCK(nd.ni_vp, 0); 894 NDFREE(&nd, NDF_ONLY_PNBUF); 895 pwd_chdir(td, nd.ni_vp); 896 return (0); 897 } 898 899 /* 900 * Change notion of root (``/'') directory. 901 */ 902 #ifndef _SYS_SYSPROTO_H_ 903 struct chroot_args { 904 char *path; 905 }; 906 #endif 907 int 908 sys_chroot(struct thread *td, struct chroot_args *uap) 909 { 910 struct nameidata nd; 911 int error; 912 913 error = priv_check(td, PRIV_VFS_CHROOT); 914 if (error != 0) 915 return (error); 916 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 917 UIO_USERSPACE, uap->path, td); 918 error = namei(&nd); 919 if (error != 0) 920 goto error; 921 error = change_dir(nd.ni_vp, td); 922 if (error != 0) 923 goto e_vunlock; 924 #ifdef MAC 925 error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp); 926 if (error != 0) 927 goto e_vunlock; 928 #endif 929 VOP_UNLOCK(nd.ni_vp, 0); 930 error = pwd_chroot(td, nd.ni_vp); 931 vrele(nd.ni_vp); 932 NDFREE(&nd, NDF_ONLY_PNBUF); 933 return (error); 934 e_vunlock: 935 vput(nd.ni_vp); 936 error: 937 NDFREE(&nd, NDF_ONLY_PNBUF); 938 return (error); 939 } 940 941 /* 942 * Common routine for chroot and chdir. Callers must provide a locked vnode 943 * instance. 944 */ 945 int 946 change_dir(struct vnode *vp, struct thread *td) 947 { 948 #ifdef MAC 949 int error; 950 #endif 951 952 ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked"); 953 if (vp->v_type != VDIR) 954 return (ENOTDIR); 955 #ifdef MAC 956 error = mac_vnode_check_chdir(td->td_ucred, vp); 957 if (error != 0) 958 return (error); 959 #endif 960 return (VOP_ACCESS(vp, VEXEC, td->td_ucred, td)); 961 } 962 963 static __inline void 964 flags_to_rights(int flags, cap_rights_t *rightsp) 965 { 966 967 if (flags & O_EXEC) { 968 cap_rights_set(rightsp, CAP_FEXECVE); 969 } else { 970 switch ((flags & O_ACCMODE)) { 971 case O_RDONLY: 972 cap_rights_set(rightsp, CAP_READ); 973 break; 974 case O_RDWR: 975 cap_rights_set(rightsp, CAP_READ); 976 /* FALLTHROUGH */ 977 case O_WRONLY: 978 cap_rights_set(rightsp, CAP_WRITE); 979 if (!(flags & (O_APPEND | O_TRUNC))) 980 cap_rights_set(rightsp, CAP_SEEK); 981 break; 982 } 983 } 984 985 if (flags & O_CREAT) 986 cap_rights_set(rightsp, CAP_CREATE); 987 988 if (flags & O_TRUNC) 989 cap_rights_set(rightsp, CAP_FTRUNCATE); 990 991 if (flags & (O_SYNC | O_FSYNC)) 992 cap_rights_set(rightsp, CAP_FSYNC); 993 994 if (flags & (O_EXLOCK | O_SHLOCK)) 995 cap_rights_set(rightsp, CAP_FLOCK); 996 } 997 998 /* 999 * Check permissions, allocate an open file structure, and call the device 1000 * open routine if any. 1001 */ 1002 #ifndef _SYS_SYSPROTO_H_ 1003 struct open_args { 1004 char *path; 1005 int flags; 1006 int mode; 1007 }; 1008 #endif 1009 int 1010 sys_open(struct thread *td, struct open_args *uap) 1011 { 1012 1013 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1014 uap->flags, uap->mode)); 1015 } 1016 1017 #ifndef _SYS_SYSPROTO_H_ 1018 struct openat_args { 1019 int fd; 1020 char *path; 1021 int flag; 1022 int mode; 1023 }; 1024 #endif 1025 int 1026 sys_openat(struct thread *td, struct openat_args *uap) 1027 { 1028 1029 AUDIT_ARG_FD(uap->fd); 1030 return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 1031 uap->mode)); 1032 } 1033 1034 int 1035 kern_openat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1036 int flags, int mode) 1037 { 1038 struct proc *p = td->td_proc; 1039 struct filedesc *fdp = p->p_fd; 1040 struct file *fp; 1041 struct vnode *vp; 1042 struct nameidata nd; 1043 cap_rights_t rights; 1044 int cmode, error, indx; 1045 1046 indx = -1; 1047 1048 AUDIT_ARG_FFLAGS(flags); 1049 AUDIT_ARG_MODE(mode); 1050 cap_rights_init(&rights, CAP_LOOKUP); 1051 flags_to_rights(flags, &rights); 1052 /* 1053 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags 1054 * may be specified. 1055 */ 1056 if (flags & O_EXEC) { 1057 if (flags & O_ACCMODE) 1058 return (EINVAL); 1059 } else if ((flags & O_ACCMODE) == O_ACCMODE) { 1060 return (EINVAL); 1061 } else { 1062 flags = FFLAGS(flags); 1063 } 1064 1065 /* 1066 * Allocate a file structure. The descriptor to reference it 1067 * is allocated and set by finstall() below. 1068 */ 1069 error = falloc_noinstall(td, &fp); 1070 if (error != 0) 1071 return (error); 1072 /* 1073 * An extra reference on `fp' has been held for us by 1074 * falloc_noinstall(). 1075 */ 1076 /* Set the flags early so the finit in devfs can pick them up. */ 1077 fp->f_flag = flags & FMASK; 1078 cmode = ((mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; 1079 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 1080 &rights, td); 1081 td->td_dupfd = -1; /* XXX check for fdopen */ 1082 error = vn_open(&nd, &flags, cmode, fp); 1083 if (error != 0) { 1084 /* 1085 * If the vn_open replaced the method vector, something 1086 * wonderous happened deep below and we just pass it up 1087 * pretending we know what we do. 1088 */ 1089 if (error == ENXIO && fp->f_ops != &badfileops) 1090 goto success; 1091 1092 /* 1093 * Handle special fdopen() case. bleh. 1094 * 1095 * Don't do this for relative (capability) lookups; we don't 1096 * understand exactly what would happen, and we don't think 1097 * that it ever should. 1098 */ 1099 if ((nd.ni_lcf & NI_LCF_STRICTRELATIVE) == 0 && 1100 (error == ENODEV || error == ENXIO) && 1101 td->td_dupfd >= 0) { 1102 error = dupfdopen(td, fdp, td->td_dupfd, flags, error, 1103 &indx); 1104 if (error == 0) 1105 goto success; 1106 } 1107 1108 goto bad; 1109 } 1110 td->td_dupfd = 0; 1111 NDFREE(&nd, NDF_ONLY_PNBUF); 1112 vp = nd.ni_vp; 1113 1114 /* 1115 * Store the vnode, for any f_type. Typically, the vnode use 1116 * count is decremented by direct call to vn_closefile() for 1117 * files that switched type in the cdevsw fdopen() method. 1118 */ 1119 fp->f_vnode = vp; 1120 /* 1121 * If the file wasn't claimed by devfs bind it to the normal 1122 * vnode operations here. 1123 */ 1124 if (fp->f_ops == &badfileops) { 1125 KASSERT(vp->v_type != VFIFO, ("Unexpected fifo.")); 1126 fp->f_seqcount = 1; 1127 finit(fp, (flags & FMASK) | (fp->f_flag & FHASLOCK), 1128 DTYPE_VNODE, vp, &vnops); 1129 } 1130 1131 VOP_UNLOCK(vp, 0); 1132 if (flags & O_TRUNC) { 1133 error = fo_truncate(fp, 0, td->td_ucred, td); 1134 if (error != 0) 1135 goto bad; 1136 } 1137 success: 1138 /* 1139 * If we haven't already installed the FD (for dupfdopen), do so now. 1140 */ 1141 if (indx == -1) { 1142 struct filecaps *fcaps; 1143 1144 #ifdef CAPABILITIES 1145 if ((nd.ni_lcf & NI_LCF_STRICTRELATIVE) != 0) 1146 fcaps = &nd.ni_filecaps; 1147 else 1148 #endif 1149 fcaps = NULL; 1150 error = finstall(td, fp, &indx, flags, fcaps); 1151 /* On success finstall() consumes fcaps. */ 1152 if (error != 0) { 1153 filecaps_free(&nd.ni_filecaps); 1154 goto bad; 1155 } 1156 } else { 1157 filecaps_free(&nd.ni_filecaps); 1158 } 1159 1160 /* 1161 * Release our private reference, leaving the one associated with 1162 * the descriptor table intact. 1163 */ 1164 fdrop(fp, td); 1165 td->td_retval[0] = indx; 1166 return (0); 1167 bad: 1168 KASSERT(indx == -1, ("indx=%d, should be -1", indx)); 1169 fdrop(fp, td); 1170 return (error); 1171 } 1172 1173 #ifdef COMPAT_43 1174 /* 1175 * Create a file. 1176 */ 1177 #ifndef _SYS_SYSPROTO_H_ 1178 struct ocreat_args { 1179 char *path; 1180 int mode; 1181 }; 1182 #endif 1183 int 1184 ocreat(struct thread *td, struct ocreat_args *uap) 1185 { 1186 1187 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1188 O_WRONLY | O_CREAT | O_TRUNC, uap->mode)); 1189 } 1190 #endif /* COMPAT_43 */ 1191 1192 /* 1193 * Create a special file. 1194 */ 1195 #ifndef _SYS_SYSPROTO_H_ 1196 struct mknodat_args { 1197 int fd; 1198 char *path; 1199 mode_t mode; 1200 dev_t dev; 1201 }; 1202 #endif 1203 int 1204 sys_mknodat(struct thread *td, struct mknodat_args *uap) 1205 { 1206 1207 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1208 uap->dev)); 1209 } 1210 1211 #if defined(COMPAT_FREEBSD11) 1212 int 1213 freebsd11_mknod(struct thread *td, 1214 struct freebsd11_mknod_args *uap) 1215 { 1216 1217 return (kern_mknodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1218 uap->mode, uap->dev)); 1219 } 1220 1221 int 1222 freebsd11_mknodat(struct thread *td, 1223 struct freebsd11_mknodat_args *uap) 1224 { 1225 1226 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1227 uap->dev)); 1228 } 1229 #endif /* COMPAT_FREEBSD11 */ 1230 1231 int 1232 kern_mknodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1233 int mode, dev_t dev) 1234 { 1235 struct vnode *vp; 1236 struct mount *mp; 1237 struct vattr vattr; 1238 struct nameidata nd; 1239 int error, whiteout = 0; 1240 1241 AUDIT_ARG_MODE(mode); 1242 AUDIT_ARG_DEV(dev); 1243 switch (mode & S_IFMT) { 1244 case S_IFCHR: 1245 case S_IFBLK: 1246 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 1247 if (error == 0 && dev == VNOVAL) 1248 error = EINVAL; 1249 break; 1250 case S_IFWHT: 1251 error = priv_check(td, PRIV_VFS_MKNOD_WHT); 1252 break; 1253 case S_IFIFO: 1254 if (dev == 0) 1255 return (kern_mkfifoat(td, fd, path, pathseg, mode)); 1256 /* FALLTHROUGH */ 1257 default: 1258 error = EINVAL; 1259 break; 1260 } 1261 if (error != 0) 1262 return (error); 1263 restart: 1264 bwillwrite(); 1265 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1266 NOCACHE, pathseg, path, fd, &cap_mknodat_rights, 1267 td); 1268 if ((error = namei(&nd)) != 0) 1269 return (error); 1270 vp = nd.ni_vp; 1271 if (vp != NULL) { 1272 NDFREE(&nd, NDF_ONLY_PNBUF); 1273 if (vp == nd.ni_dvp) 1274 vrele(nd.ni_dvp); 1275 else 1276 vput(nd.ni_dvp); 1277 vrele(vp); 1278 return (EEXIST); 1279 } else { 1280 VATTR_NULL(&vattr); 1281 vattr.va_mode = (mode & ALLPERMS) & 1282 ~td->td_proc->p_fd->fd_cmask; 1283 vattr.va_rdev = dev; 1284 whiteout = 0; 1285 1286 switch (mode & S_IFMT) { 1287 case S_IFCHR: 1288 vattr.va_type = VCHR; 1289 break; 1290 case S_IFBLK: 1291 vattr.va_type = VBLK; 1292 break; 1293 case S_IFWHT: 1294 whiteout = 1; 1295 break; 1296 default: 1297 panic("kern_mknod: invalid mode"); 1298 } 1299 } 1300 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1301 NDFREE(&nd, NDF_ONLY_PNBUF); 1302 vput(nd.ni_dvp); 1303 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1304 return (error); 1305 goto restart; 1306 } 1307 #ifdef MAC 1308 if (error == 0 && !whiteout) 1309 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, 1310 &nd.ni_cnd, &vattr); 1311 #endif 1312 if (error == 0) { 1313 if (whiteout) 1314 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1315 else { 1316 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1317 &nd.ni_cnd, &vattr); 1318 if (error == 0) 1319 vput(nd.ni_vp); 1320 } 1321 } 1322 NDFREE(&nd, NDF_ONLY_PNBUF); 1323 vput(nd.ni_dvp); 1324 vn_finished_write(mp); 1325 return (error); 1326 } 1327 1328 /* 1329 * Create a named pipe. 1330 */ 1331 #ifndef _SYS_SYSPROTO_H_ 1332 struct mkfifo_args { 1333 char *path; 1334 int mode; 1335 }; 1336 #endif 1337 int 1338 sys_mkfifo(struct thread *td, struct mkfifo_args *uap) 1339 { 1340 1341 return (kern_mkfifoat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1342 uap->mode)); 1343 } 1344 1345 #ifndef _SYS_SYSPROTO_H_ 1346 struct mkfifoat_args { 1347 int fd; 1348 char *path; 1349 mode_t mode; 1350 }; 1351 #endif 1352 int 1353 sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap) 1354 { 1355 1356 return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE, 1357 uap->mode)); 1358 } 1359 1360 int 1361 kern_mkfifoat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1362 int mode) 1363 { 1364 struct mount *mp; 1365 struct vattr vattr; 1366 struct nameidata nd; 1367 int error; 1368 1369 AUDIT_ARG_MODE(mode); 1370 restart: 1371 bwillwrite(); 1372 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1373 NOCACHE, pathseg, path, fd, &cap_mkfifoat_rights, 1374 td); 1375 if ((error = namei(&nd)) != 0) 1376 return (error); 1377 if (nd.ni_vp != NULL) { 1378 NDFREE(&nd, NDF_ONLY_PNBUF); 1379 if (nd.ni_vp == nd.ni_dvp) 1380 vrele(nd.ni_dvp); 1381 else 1382 vput(nd.ni_dvp); 1383 vrele(nd.ni_vp); 1384 return (EEXIST); 1385 } 1386 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1387 NDFREE(&nd, NDF_ONLY_PNBUF); 1388 vput(nd.ni_dvp); 1389 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1390 return (error); 1391 goto restart; 1392 } 1393 VATTR_NULL(&vattr); 1394 vattr.va_type = VFIFO; 1395 vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask; 1396 #ifdef MAC 1397 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1398 &vattr); 1399 if (error != 0) 1400 goto out; 1401 #endif 1402 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 1403 if (error == 0) 1404 vput(nd.ni_vp); 1405 #ifdef MAC 1406 out: 1407 #endif 1408 vput(nd.ni_dvp); 1409 vn_finished_write(mp); 1410 NDFREE(&nd, NDF_ONLY_PNBUF); 1411 return (error); 1412 } 1413 1414 /* 1415 * Make a hard file link. 1416 */ 1417 #ifndef _SYS_SYSPROTO_H_ 1418 struct link_args { 1419 char *path; 1420 char *link; 1421 }; 1422 #endif 1423 int 1424 sys_link(struct thread *td, struct link_args *uap) 1425 { 1426 1427 return (kern_linkat(td, AT_FDCWD, AT_FDCWD, uap->path, uap->link, 1428 UIO_USERSPACE, FOLLOW)); 1429 } 1430 1431 #ifndef _SYS_SYSPROTO_H_ 1432 struct linkat_args { 1433 int fd1; 1434 char *path1; 1435 int fd2; 1436 char *path2; 1437 int flag; 1438 }; 1439 #endif 1440 int 1441 sys_linkat(struct thread *td, struct linkat_args *uap) 1442 { 1443 int flag; 1444 1445 flag = uap->flag; 1446 if (flag & ~AT_SYMLINK_FOLLOW) 1447 return (EINVAL); 1448 1449 return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2, 1450 UIO_USERSPACE, (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW)); 1451 } 1452 1453 int hardlink_check_uid = 0; 1454 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 1455 &hardlink_check_uid, 0, 1456 "Unprivileged processes cannot create hard links to files owned by other " 1457 "users"); 1458 static int hardlink_check_gid = 0; 1459 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 1460 &hardlink_check_gid, 0, 1461 "Unprivileged processes cannot create hard links to files owned by other " 1462 "groups"); 1463 1464 static int 1465 can_hardlink(struct vnode *vp, struct ucred *cred) 1466 { 1467 struct vattr va; 1468 int error; 1469 1470 if (!hardlink_check_uid && !hardlink_check_gid) 1471 return (0); 1472 1473 error = VOP_GETATTR(vp, &va, cred); 1474 if (error != 0) 1475 return (error); 1476 1477 if (hardlink_check_uid && cred->cr_uid != va.va_uid) { 1478 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1479 if (error != 0) 1480 return (error); 1481 } 1482 1483 if (hardlink_check_gid && !groupmember(va.va_gid, cred)) { 1484 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1485 if (error != 0) 1486 return (error); 1487 } 1488 1489 return (0); 1490 } 1491 1492 int 1493 kern_linkat(struct thread *td, int fd1, int fd2, char *path1, char *path2, 1494 enum uio_seg segflg, int follow) 1495 { 1496 struct vnode *vp; 1497 struct mount *mp; 1498 struct nameidata nd; 1499 int error; 1500 1501 again: 1502 bwillwrite(); 1503 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, segflg, path1, fd1, 1504 &cap_linkat_source_rights, td); 1505 1506 if ((error = namei(&nd)) != 0) 1507 return (error); 1508 NDFREE(&nd, NDF_ONLY_PNBUF); 1509 vp = nd.ni_vp; 1510 if (vp->v_type == VDIR) { 1511 vrele(vp); 1512 return (EPERM); /* POSIX */ 1513 } 1514 NDINIT_ATRIGHTS(&nd, CREATE, 1515 LOCKPARENT | SAVENAME | AUDITVNODE2 | NOCACHE, segflg, path2, fd2, 1516 &cap_linkat_target_rights, td); 1517 if ((error = namei(&nd)) == 0) { 1518 if (nd.ni_vp != NULL) { 1519 NDFREE(&nd, NDF_ONLY_PNBUF); 1520 if (nd.ni_dvp == nd.ni_vp) 1521 vrele(nd.ni_dvp); 1522 else 1523 vput(nd.ni_dvp); 1524 vrele(nd.ni_vp); 1525 vrele(vp); 1526 return (EEXIST); 1527 } else if (nd.ni_dvp->v_mount != vp->v_mount) { 1528 /* 1529 * Cross-device link. No need to recheck 1530 * vp->v_type, since it cannot change, except 1531 * to VBAD. 1532 */ 1533 NDFREE(&nd, NDF_ONLY_PNBUF); 1534 vput(nd.ni_dvp); 1535 vrele(vp); 1536 return (EXDEV); 1537 } else if ((error = vn_lock(vp, LK_EXCLUSIVE)) == 0) { 1538 error = can_hardlink(vp, td->td_ucred); 1539 #ifdef MAC 1540 if (error == 0) 1541 error = mac_vnode_check_link(td->td_ucred, 1542 nd.ni_dvp, vp, &nd.ni_cnd); 1543 #endif 1544 if (error != 0) { 1545 vput(vp); 1546 vput(nd.ni_dvp); 1547 NDFREE(&nd, NDF_ONLY_PNBUF); 1548 return (error); 1549 } 1550 error = vn_start_write(vp, &mp, V_NOWAIT); 1551 if (error != 0) { 1552 vput(vp); 1553 vput(nd.ni_dvp); 1554 NDFREE(&nd, NDF_ONLY_PNBUF); 1555 error = vn_start_write(NULL, &mp, 1556 V_XSLEEP | PCATCH); 1557 if (error != 0) 1558 return (error); 1559 goto again; 1560 } 1561 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 1562 VOP_UNLOCK(vp, 0); 1563 vput(nd.ni_dvp); 1564 vn_finished_write(mp); 1565 NDFREE(&nd, NDF_ONLY_PNBUF); 1566 } else { 1567 vput(nd.ni_dvp); 1568 NDFREE(&nd, NDF_ONLY_PNBUF); 1569 vrele(vp); 1570 goto again; 1571 } 1572 } 1573 vrele(vp); 1574 return (error); 1575 } 1576 1577 /* 1578 * Make a symbolic link. 1579 */ 1580 #ifndef _SYS_SYSPROTO_H_ 1581 struct symlink_args { 1582 char *path; 1583 char *link; 1584 }; 1585 #endif 1586 int 1587 sys_symlink(struct thread *td, struct symlink_args *uap) 1588 { 1589 1590 return (kern_symlinkat(td, uap->path, AT_FDCWD, uap->link, 1591 UIO_USERSPACE)); 1592 } 1593 1594 #ifndef _SYS_SYSPROTO_H_ 1595 struct symlinkat_args { 1596 char *path; 1597 int fd; 1598 char *path2; 1599 }; 1600 #endif 1601 int 1602 sys_symlinkat(struct thread *td, struct symlinkat_args *uap) 1603 { 1604 1605 return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2, 1606 UIO_USERSPACE)); 1607 } 1608 1609 int 1610 kern_symlinkat(struct thread *td, char *path1, int fd, char *path2, 1611 enum uio_seg segflg) 1612 { 1613 struct mount *mp; 1614 struct vattr vattr; 1615 char *syspath; 1616 struct nameidata nd; 1617 int error; 1618 1619 if (segflg == UIO_SYSSPACE) { 1620 syspath = path1; 1621 } else { 1622 syspath = uma_zalloc(namei_zone, M_WAITOK); 1623 if ((error = copyinstr(path1, syspath, MAXPATHLEN, NULL)) != 0) 1624 goto out; 1625 } 1626 AUDIT_ARG_TEXT(syspath); 1627 restart: 1628 bwillwrite(); 1629 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1630 NOCACHE, segflg, path2, fd, &cap_symlinkat_rights, 1631 td); 1632 if ((error = namei(&nd)) != 0) 1633 goto out; 1634 if (nd.ni_vp) { 1635 NDFREE(&nd, NDF_ONLY_PNBUF); 1636 if (nd.ni_vp == nd.ni_dvp) 1637 vrele(nd.ni_dvp); 1638 else 1639 vput(nd.ni_dvp); 1640 vrele(nd.ni_vp); 1641 error = EEXIST; 1642 goto out; 1643 } 1644 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1645 NDFREE(&nd, NDF_ONLY_PNBUF); 1646 vput(nd.ni_dvp); 1647 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1648 goto out; 1649 goto restart; 1650 } 1651 VATTR_NULL(&vattr); 1652 vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask; 1653 #ifdef MAC 1654 vattr.va_type = VLNK; 1655 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1656 &vattr); 1657 if (error != 0) 1658 goto out2; 1659 #endif 1660 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath); 1661 if (error == 0) 1662 vput(nd.ni_vp); 1663 #ifdef MAC 1664 out2: 1665 #endif 1666 NDFREE(&nd, NDF_ONLY_PNBUF); 1667 vput(nd.ni_dvp); 1668 vn_finished_write(mp); 1669 out: 1670 if (segflg != UIO_SYSSPACE) 1671 uma_zfree(namei_zone, syspath); 1672 return (error); 1673 } 1674 1675 /* 1676 * Delete a whiteout from the filesystem. 1677 */ 1678 #ifndef _SYS_SYSPROTO_H_ 1679 struct undelete_args { 1680 char *path; 1681 }; 1682 #endif 1683 int 1684 sys_undelete(struct thread *td, struct undelete_args *uap) 1685 { 1686 struct mount *mp; 1687 struct nameidata nd; 1688 int error; 1689 1690 restart: 1691 bwillwrite(); 1692 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | AUDITVNODE1, 1693 UIO_USERSPACE, uap->path, td); 1694 error = namei(&nd); 1695 if (error != 0) 1696 return (error); 1697 1698 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 1699 NDFREE(&nd, NDF_ONLY_PNBUF); 1700 if (nd.ni_vp == nd.ni_dvp) 1701 vrele(nd.ni_dvp); 1702 else 1703 vput(nd.ni_dvp); 1704 if (nd.ni_vp) 1705 vrele(nd.ni_vp); 1706 return (EEXIST); 1707 } 1708 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1709 NDFREE(&nd, NDF_ONLY_PNBUF); 1710 vput(nd.ni_dvp); 1711 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1712 return (error); 1713 goto restart; 1714 } 1715 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE); 1716 NDFREE(&nd, NDF_ONLY_PNBUF); 1717 vput(nd.ni_dvp); 1718 vn_finished_write(mp); 1719 return (error); 1720 } 1721 1722 /* 1723 * Delete a name from the filesystem. 1724 */ 1725 #ifndef _SYS_SYSPROTO_H_ 1726 struct unlink_args { 1727 char *path; 1728 }; 1729 #endif 1730 int 1731 sys_unlink(struct thread *td, struct unlink_args *uap) 1732 { 1733 1734 return (kern_unlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 0)); 1735 } 1736 1737 #ifndef _SYS_SYSPROTO_H_ 1738 struct unlinkat_args { 1739 int fd; 1740 char *path; 1741 int flag; 1742 }; 1743 #endif 1744 int 1745 sys_unlinkat(struct thread *td, struct unlinkat_args *uap) 1746 { 1747 int flag = uap->flag; 1748 int fd = uap->fd; 1749 char *path = uap->path; 1750 1751 if (flag & ~AT_REMOVEDIR) 1752 return (EINVAL); 1753 1754 if (flag & AT_REMOVEDIR) 1755 return (kern_rmdirat(td, fd, path, UIO_USERSPACE)); 1756 else 1757 return (kern_unlinkat(td, fd, path, UIO_USERSPACE, 0)); 1758 } 1759 1760 int 1761 kern_unlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1762 ino_t oldinum) 1763 { 1764 struct mount *mp; 1765 struct vnode *vp; 1766 struct nameidata nd; 1767 struct stat sb; 1768 int error; 1769 1770 restart: 1771 bwillwrite(); 1772 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 1773 pathseg, path, fd, &cap_unlinkat_rights, td); 1774 if ((error = namei(&nd)) != 0) 1775 return (error == EINVAL ? EPERM : error); 1776 vp = nd.ni_vp; 1777 if (vp->v_type == VDIR && oldinum == 0) { 1778 error = EPERM; /* POSIX */ 1779 } else if (oldinum != 0 && 1780 ((error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td)) == 0) && 1781 sb.st_ino != oldinum) { 1782 error = EIDRM; /* Identifier removed */ 1783 } else { 1784 /* 1785 * The root of a mounted filesystem cannot be deleted. 1786 * 1787 * XXX: can this only be a VDIR case? 1788 */ 1789 if (vp->v_vflag & VV_ROOT) 1790 error = EBUSY; 1791 } 1792 if (error == 0) { 1793 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1794 NDFREE(&nd, NDF_ONLY_PNBUF); 1795 vput(nd.ni_dvp); 1796 if (vp == nd.ni_dvp) 1797 vrele(vp); 1798 else 1799 vput(vp); 1800 if ((error = vn_start_write(NULL, &mp, 1801 V_XSLEEP | PCATCH)) != 0) 1802 return (error); 1803 goto restart; 1804 } 1805 #ifdef MAC 1806 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 1807 &nd.ni_cnd); 1808 if (error != 0) 1809 goto out; 1810 #endif 1811 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 1812 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 1813 #ifdef MAC 1814 out: 1815 #endif 1816 vn_finished_write(mp); 1817 } 1818 NDFREE(&nd, NDF_ONLY_PNBUF); 1819 vput(nd.ni_dvp); 1820 if (vp == nd.ni_dvp) 1821 vrele(vp); 1822 else 1823 vput(vp); 1824 return (error); 1825 } 1826 1827 /* 1828 * Reposition read/write file offset. 1829 */ 1830 #ifndef _SYS_SYSPROTO_H_ 1831 struct lseek_args { 1832 int fd; 1833 int pad; 1834 off_t offset; 1835 int whence; 1836 }; 1837 #endif 1838 int 1839 sys_lseek(struct thread *td, struct lseek_args *uap) 1840 { 1841 1842 return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); 1843 } 1844 1845 int 1846 kern_lseek(struct thread *td, int fd, off_t offset, int whence) 1847 { 1848 struct file *fp; 1849 int error; 1850 1851 AUDIT_ARG_FD(fd); 1852 error = fget(td, fd, &cap_seek_rights, &fp); 1853 if (error != 0) 1854 return (error); 1855 error = (fp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0 ? 1856 fo_seek(fp, offset, whence, td) : ESPIPE; 1857 fdrop(fp, td); 1858 return (error); 1859 } 1860 1861 #if defined(COMPAT_43) 1862 /* 1863 * Reposition read/write file offset. 1864 */ 1865 #ifndef _SYS_SYSPROTO_H_ 1866 struct olseek_args { 1867 int fd; 1868 long offset; 1869 int whence; 1870 }; 1871 #endif 1872 int 1873 olseek(struct thread *td, struct olseek_args *uap) 1874 { 1875 1876 return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); 1877 } 1878 #endif /* COMPAT_43 */ 1879 1880 #if defined(COMPAT_FREEBSD6) 1881 /* Version with the 'pad' argument */ 1882 int 1883 freebsd6_lseek(struct thread *td, struct freebsd6_lseek_args *uap) 1884 { 1885 1886 return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); 1887 } 1888 #endif 1889 1890 /* 1891 * Check access permissions using passed credentials. 1892 */ 1893 static int 1894 vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 1895 struct thread *td) 1896 { 1897 accmode_t accmode; 1898 int error; 1899 1900 /* Flags == 0 means only check for existence. */ 1901 if (user_flags == 0) 1902 return (0); 1903 1904 accmode = 0; 1905 if (user_flags & R_OK) 1906 accmode |= VREAD; 1907 if (user_flags & W_OK) 1908 accmode |= VWRITE; 1909 if (user_flags & X_OK) 1910 accmode |= VEXEC; 1911 #ifdef MAC 1912 error = mac_vnode_check_access(cred, vp, accmode); 1913 if (error != 0) 1914 return (error); 1915 #endif 1916 if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) 1917 error = VOP_ACCESS(vp, accmode, cred, td); 1918 return (error); 1919 } 1920 1921 /* 1922 * Check access permissions using "real" credentials. 1923 */ 1924 #ifndef _SYS_SYSPROTO_H_ 1925 struct access_args { 1926 char *path; 1927 int amode; 1928 }; 1929 #endif 1930 int 1931 sys_access(struct thread *td, struct access_args *uap) 1932 { 1933 1934 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1935 0, uap->amode)); 1936 } 1937 1938 #ifndef _SYS_SYSPROTO_H_ 1939 struct faccessat_args { 1940 int dirfd; 1941 char *path; 1942 int amode; 1943 int flag; 1944 } 1945 #endif 1946 int 1947 sys_faccessat(struct thread *td, struct faccessat_args *uap) 1948 { 1949 1950 return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 1951 uap->amode)); 1952 } 1953 1954 int 1955 kern_accessat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1956 int flag, int amode) 1957 { 1958 struct ucred *cred, *usecred; 1959 struct vnode *vp; 1960 struct nameidata nd; 1961 int error; 1962 1963 if (flag & ~AT_EACCESS) 1964 return (EINVAL); 1965 if (amode != F_OK && (amode & ~(R_OK | W_OK | X_OK)) != 0) 1966 return (EINVAL); 1967 1968 /* 1969 * Create and modify a temporary credential instead of one that 1970 * is potentially shared (if we need one). 1971 */ 1972 cred = td->td_ucred; 1973 if ((flag & AT_EACCESS) == 0 && 1974 ((cred->cr_uid != cred->cr_ruid || 1975 cred->cr_rgid != cred->cr_groups[0]))) { 1976 usecred = crdup(cred); 1977 usecred->cr_uid = cred->cr_ruid; 1978 usecred->cr_groups[0] = cred->cr_rgid; 1979 td->td_ucred = usecred; 1980 } else 1981 usecred = cred; 1982 AUDIT_ARG_VALUE(amode); 1983 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | 1984 AUDITVNODE1, pathseg, path, fd, &cap_fstat_rights, 1985 td); 1986 if ((error = namei(&nd)) != 0) 1987 goto out; 1988 vp = nd.ni_vp; 1989 1990 error = vn_access(vp, amode, usecred, td); 1991 NDFREE(&nd, NDF_ONLY_PNBUF); 1992 vput(vp); 1993 out: 1994 if (usecred != cred) { 1995 td->td_ucred = cred; 1996 crfree(usecred); 1997 } 1998 return (error); 1999 } 2000 2001 /* 2002 * Check access permissions using "effective" credentials. 2003 */ 2004 #ifndef _SYS_SYSPROTO_H_ 2005 struct eaccess_args { 2006 char *path; 2007 int amode; 2008 }; 2009 #endif 2010 int 2011 sys_eaccess(struct thread *td, struct eaccess_args *uap) 2012 { 2013 2014 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2015 AT_EACCESS, uap->amode)); 2016 } 2017 2018 #if defined(COMPAT_43) 2019 /* 2020 * Get file status; this version follows links. 2021 */ 2022 #ifndef _SYS_SYSPROTO_H_ 2023 struct ostat_args { 2024 char *path; 2025 struct ostat *ub; 2026 }; 2027 #endif 2028 int 2029 ostat(struct thread *td, struct ostat_args *uap) 2030 { 2031 struct stat sb; 2032 struct ostat osb; 2033 int error; 2034 2035 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2036 &sb, NULL); 2037 if (error != 0) 2038 return (error); 2039 cvtstat(&sb, &osb); 2040 return (copyout(&osb, uap->ub, sizeof (osb))); 2041 } 2042 2043 /* 2044 * Get file status; this version does not follow links. 2045 */ 2046 #ifndef _SYS_SYSPROTO_H_ 2047 struct olstat_args { 2048 char *path; 2049 struct ostat *ub; 2050 }; 2051 #endif 2052 int 2053 olstat(struct thread *td, struct olstat_args *uap) 2054 { 2055 struct stat sb; 2056 struct ostat osb; 2057 int error; 2058 2059 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2060 UIO_USERSPACE, &sb, NULL); 2061 if (error != 0) 2062 return (error); 2063 cvtstat(&sb, &osb); 2064 return (copyout(&osb, uap->ub, sizeof (osb))); 2065 } 2066 2067 /* 2068 * Convert from an old to a new stat structure. 2069 * XXX: many values are blindly truncated. 2070 */ 2071 void 2072 cvtstat(struct stat *st, struct ostat *ost) 2073 { 2074 2075 bzero(ost, sizeof(*ost)); 2076 ost->st_dev = st->st_dev; 2077 ost->st_ino = st->st_ino; 2078 ost->st_mode = st->st_mode; 2079 ost->st_nlink = st->st_nlink; 2080 ost->st_uid = st->st_uid; 2081 ost->st_gid = st->st_gid; 2082 ost->st_rdev = st->st_rdev; 2083 ost->st_size = MIN(st->st_size, INT32_MAX); 2084 ost->st_atim = st->st_atim; 2085 ost->st_mtim = st->st_mtim; 2086 ost->st_ctim = st->st_ctim; 2087 ost->st_blksize = st->st_blksize; 2088 ost->st_blocks = st->st_blocks; 2089 ost->st_flags = st->st_flags; 2090 ost->st_gen = st->st_gen; 2091 } 2092 #endif /* COMPAT_43 */ 2093 2094 #if defined(COMPAT_43) || defined(COMPAT_FREEBSD11) 2095 int ino64_trunc_error; 2096 SYSCTL_INT(_vfs, OID_AUTO, ino64_trunc_error, CTLFLAG_RW, 2097 &ino64_trunc_error, 0, 2098 "Error on truncation of device, file or inode number, or link count"); 2099 2100 int 2101 freebsd11_cvtstat(struct stat *st, struct freebsd11_stat *ost) 2102 { 2103 2104 ost->st_dev = st->st_dev; 2105 if (ost->st_dev != st->st_dev) { 2106 switch (ino64_trunc_error) { 2107 default: 2108 /* 2109 * Since dev_t is almost raw, don't clamp to the 2110 * maximum for case 2, but ignore the error. 2111 */ 2112 break; 2113 case 1: 2114 return (EOVERFLOW); 2115 } 2116 } 2117 ost->st_ino = st->st_ino; 2118 if (ost->st_ino != st->st_ino) { 2119 switch (ino64_trunc_error) { 2120 default: 2121 case 0: 2122 break; 2123 case 1: 2124 return (EOVERFLOW); 2125 case 2: 2126 ost->st_ino = UINT32_MAX; 2127 break; 2128 } 2129 } 2130 ost->st_mode = st->st_mode; 2131 ost->st_nlink = st->st_nlink; 2132 if (ost->st_nlink != st->st_nlink) { 2133 switch (ino64_trunc_error) { 2134 default: 2135 case 0: 2136 break; 2137 case 1: 2138 return (EOVERFLOW); 2139 case 2: 2140 ost->st_nlink = UINT16_MAX; 2141 break; 2142 } 2143 } 2144 ost->st_uid = st->st_uid; 2145 ost->st_gid = st->st_gid; 2146 ost->st_rdev = st->st_rdev; 2147 if (ost->st_rdev != st->st_rdev) { 2148 switch (ino64_trunc_error) { 2149 default: 2150 break; 2151 case 1: 2152 return (EOVERFLOW); 2153 } 2154 } 2155 ost->st_atim = st->st_atim; 2156 ost->st_mtim = st->st_mtim; 2157 ost->st_ctim = st->st_ctim; 2158 ost->st_size = st->st_size; 2159 ost->st_blocks = st->st_blocks; 2160 ost->st_blksize = st->st_blksize; 2161 ost->st_flags = st->st_flags; 2162 ost->st_gen = st->st_gen; 2163 ost->st_lspare = 0; 2164 ost->st_birthtim = st->st_birthtim; 2165 bzero((char *)&ost->st_birthtim + sizeof(ost->st_birthtim), 2166 sizeof(*ost) - offsetof(struct freebsd11_stat, 2167 st_birthtim) - sizeof(ost->st_birthtim)); 2168 return (0); 2169 } 2170 2171 int 2172 freebsd11_stat(struct thread *td, struct freebsd11_stat_args* uap) 2173 { 2174 struct stat sb; 2175 struct freebsd11_stat osb; 2176 int error; 2177 2178 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2179 &sb, NULL); 2180 if (error != 0) 2181 return (error); 2182 error = freebsd11_cvtstat(&sb, &osb); 2183 if (error == 0) 2184 error = copyout(&osb, uap->ub, sizeof(osb)); 2185 return (error); 2186 } 2187 2188 int 2189 freebsd11_lstat(struct thread *td, struct freebsd11_lstat_args* uap) 2190 { 2191 struct stat sb; 2192 struct freebsd11_stat osb; 2193 int error; 2194 2195 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2196 UIO_USERSPACE, &sb, NULL); 2197 if (error != 0) 2198 return (error); 2199 error = freebsd11_cvtstat(&sb, &osb); 2200 if (error == 0) 2201 error = copyout(&osb, uap->ub, sizeof(osb)); 2202 return (error); 2203 } 2204 2205 int 2206 freebsd11_fhstat(struct thread *td, struct freebsd11_fhstat_args* uap) 2207 { 2208 struct fhandle fh; 2209 struct stat sb; 2210 struct freebsd11_stat osb; 2211 int error; 2212 2213 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 2214 if (error != 0) 2215 return (error); 2216 error = kern_fhstat(td, fh, &sb); 2217 if (error != 0) 2218 return (error); 2219 error = freebsd11_cvtstat(&sb, &osb); 2220 if (error == 0) 2221 error = copyout(&osb, uap->sb, sizeof(osb)); 2222 return (error); 2223 } 2224 2225 int 2226 freebsd11_fstatat(struct thread *td, struct freebsd11_fstatat_args* uap) 2227 { 2228 struct stat sb; 2229 struct freebsd11_stat osb; 2230 int error; 2231 2232 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2233 UIO_USERSPACE, &sb, NULL); 2234 if (error != 0) 2235 return (error); 2236 error = freebsd11_cvtstat(&sb, &osb); 2237 if (error == 0) 2238 error = copyout(&osb, uap->buf, sizeof(osb)); 2239 return (error); 2240 } 2241 #endif /* COMPAT_FREEBSD11 */ 2242 2243 /* 2244 * Get file status 2245 */ 2246 #ifndef _SYS_SYSPROTO_H_ 2247 struct fstatat_args { 2248 int fd; 2249 char *path; 2250 struct stat *buf; 2251 int flag; 2252 } 2253 #endif 2254 int 2255 sys_fstatat(struct thread *td, struct fstatat_args *uap) 2256 { 2257 struct stat sb; 2258 int error; 2259 2260 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2261 UIO_USERSPACE, &sb, NULL); 2262 if (error == 0) 2263 error = copyout(&sb, uap->buf, sizeof (sb)); 2264 return (error); 2265 } 2266 2267 int 2268 kern_statat(struct thread *td, int flag, int fd, char *path, 2269 enum uio_seg pathseg, struct stat *sbp, 2270 void (*hook)(struct vnode *vp, struct stat *sbp)) 2271 { 2272 struct nameidata nd; 2273 struct stat sb; 2274 int error; 2275 2276 if (flag & ~AT_SYMLINK_NOFOLLOW) 2277 return (EINVAL); 2278 2279 NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : 2280 FOLLOW) | LOCKSHARED | LOCKLEAF | AUDITVNODE1, pathseg, path, fd, 2281 &cap_fstat_rights, td); 2282 2283 if ((error = namei(&nd)) != 0) 2284 return (error); 2285 error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td); 2286 if (error == 0) { 2287 SDT_PROBE2(vfs, , stat, mode, path, sb.st_mode); 2288 if (S_ISREG(sb.st_mode)) 2289 SDT_PROBE2(vfs, , stat, reg, path, pathseg); 2290 if (__predict_false(hook != NULL)) 2291 hook(nd.ni_vp, &sb); 2292 } 2293 NDFREE(&nd, NDF_ONLY_PNBUF); 2294 vput(nd.ni_vp); 2295 if (error != 0) 2296 return (error); 2297 #ifdef __STAT_TIME_T_EXT 2298 sb.st_atim_ext = 0; 2299 sb.st_mtim_ext = 0; 2300 sb.st_ctim_ext = 0; 2301 sb.st_btim_ext = 0; 2302 #endif 2303 *sbp = sb; 2304 #ifdef KTRACE 2305 if (KTRPOINT(td, KTR_STRUCT)) 2306 ktrstat(&sb); 2307 #endif 2308 return (0); 2309 } 2310 2311 #if defined(COMPAT_FREEBSD11) 2312 /* 2313 * Implementation of the NetBSD [l]stat() functions. 2314 */ 2315 void 2316 freebsd11_cvtnstat(struct stat *sb, struct nstat *nsb) 2317 { 2318 2319 bzero(nsb, sizeof(*nsb)); 2320 nsb->st_dev = sb->st_dev; 2321 nsb->st_ino = sb->st_ino; 2322 nsb->st_mode = sb->st_mode; 2323 nsb->st_nlink = sb->st_nlink; 2324 nsb->st_uid = sb->st_uid; 2325 nsb->st_gid = sb->st_gid; 2326 nsb->st_rdev = sb->st_rdev; 2327 nsb->st_atim = sb->st_atim; 2328 nsb->st_mtim = sb->st_mtim; 2329 nsb->st_ctim = sb->st_ctim; 2330 nsb->st_size = sb->st_size; 2331 nsb->st_blocks = sb->st_blocks; 2332 nsb->st_blksize = sb->st_blksize; 2333 nsb->st_flags = sb->st_flags; 2334 nsb->st_gen = sb->st_gen; 2335 nsb->st_birthtim = sb->st_birthtim; 2336 } 2337 2338 #ifndef _SYS_SYSPROTO_H_ 2339 struct freebsd11_nstat_args { 2340 char *path; 2341 struct nstat *ub; 2342 }; 2343 #endif 2344 int 2345 freebsd11_nstat(struct thread *td, struct freebsd11_nstat_args *uap) 2346 { 2347 struct stat sb; 2348 struct nstat nsb; 2349 int error; 2350 2351 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2352 &sb, NULL); 2353 if (error != 0) 2354 return (error); 2355 freebsd11_cvtnstat(&sb, &nsb); 2356 return (copyout(&nsb, uap->ub, sizeof (nsb))); 2357 } 2358 2359 /* 2360 * NetBSD lstat. Get file status; this version does not follow links. 2361 */ 2362 #ifndef _SYS_SYSPROTO_H_ 2363 struct freebsd11_nlstat_args { 2364 char *path; 2365 struct nstat *ub; 2366 }; 2367 #endif 2368 int 2369 freebsd11_nlstat(struct thread *td, struct freebsd11_nlstat_args *uap) 2370 { 2371 struct stat sb; 2372 struct nstat nsb; 2373 int error; 2374 2375 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2376 UIO_USERSPACE, &sb, NULL); 2377 if (error != 0) 2378 return (error); 2379 freebsd11_cvtnstat(&sb, &nsb); 2380 return (copyout(&nsb, uap->ub, sizeof (nsb))); 2381 } 2382 #endif /* COMPAT_FREEBSD11 */ 2383 2384 /* 2385 * Get configurable pathname variables. 2386 */ 2387 #ifndef _SYS_SYSPROTO_H_ 2388 struct pathconf_args { 2389 char *path; 2390 int name; 2391 }; 2392 #endif 2393 int 2394 sys_pathconf(struct thread *td, struct pathconf_args *uap) 2395 { 2396 long value; 2397 int error; 2398 2399 error = kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW, 2400 &value); 2401 if (error == 0) 2402 td->td_retval[0] = value; 2403 return (error); 2404 } 2405 2406 #ifndef _SYS_SYSPROTO_H_ 2407 struct lpathconf_args { 2408 char *path; 2409 int name; 2410 }; 2411 #endif 2412 int 2413 sys_lpathconf(struct thread *td, struct lpathconf_args *uap) 2414 { 2415 long value; 2416 int error; 2417 2418 error = kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, 2419 NOFOLLOW, &value); 2420 if (error == 0) 2421 td->td_retval[0] = value; 2422 return (error); 2423 } 2424 2425 int 2426 kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name, 2427 u_long flags, long *valuep) 2428 { 2429 struct nameidata nd; 2430 int error; 2431 2432 NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | AUDITVNODE1 | flags, 2433 pathseg, path, td); 2434 if ((error = namei(&nd)) != 0) 2435 return (error); 2436 NDFREE(&nd, NDF_ONLY_PNBUF); 2437 2438 error = VOP_PATHCONF(nd.ni_vp, name, valuep); 2439 vput(nd.ni_vp); 2440 return (error); 2441 } 2442 2443 /* 2444 * Return target name of a symbolic link. 2445 */ 2446 #ifndef _SYS_SYSPROTO_H_ 2447 struct readlink_args { 2448 char *path; 2449 char *buf; 2450 size_t count; 2451 }; 2452 #endif 2453 int 2454 sys_readlink(struct thread *td, struct readlink_args *uap) 2455 { 2456 2457 return (kern_readlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2458 uap->buf, UIO_USERSPACE, uap->count)); 2459 } 2460 #ifndef _SYS_SYSPROTO_H_ 2461 struct readlinkat_args { 2462 int fd; 2463 char *path; 2464 char *buf; 2465 size_t bufsize; 2466 }; 2467 #endif 2468 int 2469 sys_readlinkat(struct thread *td, struct readlinkat_args *uap) 2470 { 2471 2472 return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE, 2473 uap->buf, UIO_USERSPACE, uap->bufsize)); 2474 } 2475 2476 int 2477 kern_readlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2478 char *buf, enum uio_seg bufseg, size_t count) 2479 { 2480 struct vnode *vp; 2481 struct iovec aiov; 2482 struct uio auio; 2483 struct nameidata nd; 2484 int error; 2485 2486 if (count > IOSIZE_MAX) 2487 return (EINVAL); 2488 2489 NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 2490 pathseg, path, fd, td); 2491 2492 if ((error = namei(&nd)) != 0) 2493 return (error); 2494 NDFREE(&nd, NDF_ONLY_PNBUF); 2495 vp = nd.ni_vp; 2496 #ifdef MAC 2497 error = mac_vnode_check_readlink(td->td_ucred, vp); 2498 if (error != 0) { 2499 vput(vp); 2500 return (error); 2501 } 2502 #endif 2503 if (vp->v_type != VLNK && (vp->v_vflag & VV_READLINK) == 0) 2504 error = EINVAL; 2505 else { 2506 aiov.iov_base = buf; 2507 aiov.iov_len = count; 2508 auio.uio_iov = &aiov; 2509 auio.uio_iovcnt = 1; 2510 auio.uio_offset = 0; 2511 auio.uio_rw = UIO_READ; 2512 auio.uio_segflg = bufseg; 2513 auio.uio_td = td; 2514 auio.uio_resid = count; 2515 error = VOP_READLINK(vp, &auio, td->td_ucred); 2516 td->td_retval[0] = count - auio.uio_resid; 2517 } 2518 vput(vp); 2519 return (error); 2520 } 2521 2522 /* 2523 * Common implementation code for chflags() and fchflags(). 2524 */ 2525 static int 2526 setfflags(struct thread *td, struct vnode *vp, u_long flags) 2527 { 2528 struct mount *mp; 2529 struct vattr vattr; 2530 int error; 2531 2532 /* We can't support the value matching VNOVAL. */ 2533 if (flags == VNOVAL) 2534 return (EOPNOTSUPP); 2535 2536 /* 2537 * Prevent non-root users from setting flags on devices. When 2538 * a device is reused, users can retain ownership of the device 2539 * if they are allowed to set flags and programs assume that 2540 * chown can't fail when done as root. 2541 */ 2542 if (vp->v_type == VCHR || vp->v_type == VBLK) { 2543 error = priv_check(td, PRIV_VFS_CHFLAGS_DEV); 2544 if (error != 0) 2545 return (error); 2546 } 2547 2548 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2549 return (error); 2550 VATTR_NULL(&vattr); 2551 vattr.va_flags = flags; 2552 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2553 #ifdef MAC 2554 error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags); 2555 if (error == 0) 2556 #endif 2557 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 2558 VOP_UNLOCK(vp, 0); 2559 vn_finished_write(mp); 2560 return (error); 2561 } 2562 2563 /* 2564 * Change flags of a file given a path name. 2565 */ 2566 #ifndef _SYS_SYSPROTO_H_ 2567 struct chflags_args { 2568 const char *path; 2569 u_long flags; 2570 }; 2571 #endif 2572 int 2573 sys_chflags(struct thread *td, struct chflags_args *uap) 2574 { 2575 2576 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2577 uap->flags, 0)); 2578 } 2579 2580 #ifndef _SYS_SYSPROTO_H_ 2581 struct chflagsat_args { 2582 int fd; 2583 const char *path; 2584 u_long flags; 2585 int atflag; 2586 } 2587 #endif 2588 int 2589 sys_chflagsat(struct thread *td, struct chflagsat_args *uap) 2590 { 2591 int fd = uap->fd; 2592 const char *path = uap->path; 2593 u_long flags = uap->flags; 2594 int atflag = uap->atflag; 2595 2596 if (atflag & ~AT_SYMLINK_NOFOLLOW) 2597 return (EINVAL); 2598 2599 return (kern_chflagsat(td, fd, path, UIO_USERSPACE, flags, atflag)); 2600 } 2601 2602 /* 2603 * Same as chflags() but doesn't follow symlinks. 2604 */ 2605 #ifndef _SYS_SYSPROTO_H_ 2606 struct lchflags_args { 2607 const char *path; 2608 u_long flags; 2609 }; 2610 #endif 2611 int 2612 sys_lchflags(struct thread *td, struct lchflags_args *uap) 2613 { 2614 2615 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2616 uap->flags, AT_SYMLINK_NOFOLLOW)); 2617 } 2618 2619 static int 2620 kern_chflagsat(struct thread *td, int fd, const char *path, 2621 enum uio_seg pathseg, u_long flags, int atflag) 2622 { 2623 struct nameidata nd; 2624 int error, follow; 2625 2626 AUDIT_ARG_FFLAGS(flags); 2627 follow = (atflag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2628 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2629 &cap_fchflags_rights, td); 2630 if ((error = namei(&nd)) != 0) 2631 return (error); 2632 NDFREE(&nd, NDF_ONLY_PNBUF); 2633 error = setfflags(td, nd.ni_vp, flags); 2634 vrele(nd.ni_vp); 2635 return (error); 2636 } 2637 2638 /* 2639 * Change flags of a file given a file descriptor. 2640 */ 2641 #ifndef _SYS_SYSPROTO_H_ 2642 struct fchflags_args { 2643 int fd; 2644 u_long flags; 2645 }; 2646 #endif 2647 int 2648 sys_fchflags(struct thread *td, struct fchflags_args *uap) 2649 { 2650 struct file *fp; 2651 int error; 2652 2653 AUDIT_ARG_FD(uap->fd); 2654 AUDIT_ARG_FFLAGS(uap->flags); 2655 error = getvnode(td, uap->fd, &cap_fchflags_rights, 2656 &fp); 2657 if (error != 0) 2658 return (error); 2659 #ifdef AUDIT 2660 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 2661 AUDIT_ARG_VNODE1(fp->f_vnode); 2662 VOP_UNLOCK(fp->f_vnode, 0); 2663 #endif 2664 error = setfflags(td, fp->f_vnode, uap->flags); 2665 fdrop(fp, td); 2666 return (error); 2667 } 2668 2669 /* 2670 * Common implementation code for chmod(), lchmod() and fchmod(). 2671 */ 2672 int 2673 setfmode(struct thread *td, struct ucred *cred, struct vnode *vp, int mode) 2674 { 2675 struct mount *mp; 2676 struct vattr vattr; 2677 int error; 2678 2679 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2680 return (error); 2681 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2682 VATTR_NULL(&vattr); 2683 vattr.va_mode = mode & ALLPERMS; 2684 #ifdef MAC 2685 error = mac_vnode_check_setmode(cred, vp, vattr.va_mode); 2686 if (error == 0) 2687 #endif 2688 error = VOP_SETATTR(vp, &vattr, cred); 2689 VOP_UNLOCK(vp, 0); 2690 vn_finished_write(mp); 2691 return (error); 2692 } 2693 2694 /* 2695 * Change mode of a file given path name. 2696 */ 2697 #ifndef _SYS_SYSPROTO_H_ 2698 struct chmod_args { 2699 char *path; 2700 int mode; 2701 }; 2702 #endif 2703 int 2704 sys_chmod(struct thread *td, struct chmod_args *uap) 2705 { 2706 2707 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2708 uap->mode, 0)); 2709 } 2710 2711 #ifndef _SYS_SYSPROTO_H_ 2712 struct fchmodat_args { 2713 int dirfd; 2714 char *path; 2715 mode_t mode; 2716 int flag; 2717 } 2718 #endif 2719 int 2720 sys_fchmodat(struct thread *td, struct fchmodat_args *uap) 2721 { 2722 int flag = uap->flag; 2723 int fd = uap->fd; 2724 char *path = uap->path; 2725 mode_t mode = uap->mode; 2726 2727 if (flag & ~AT_SYMLINK_NOFOLLOW) 2728 return (EINVAL); 2729 2730 return (kern_fchmodat(td, fd, path, UIO_USERSPACE, mode, flag)); 2731 } 2732 2733 /* 2734 * Change mode of a file given path name (don't follow links.) 2735 */ 2736 #ifndef _SYS_SYSPROTO_H_ 2737 struct lchmod_args { 2738 char *path; 2739 int mode; 2740 }; 2741 #endif 2742 int 2743 sys_lchmod(struct thread *td, struct lchmod_args *uap) 2744 { 2745 2746 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2747 uap->mode, AT_SYMLINK_NOFOLLOW)); 2748 } 2749 2750 int 2751 kern_fchmodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2752 mode_t mode, int flag) 2753 { 2754 struct nameidata nd; 2755 int error, follow; 2756 2757 AUDIT_ARG_MODE(mode); 2758 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2759 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2760 &cap_fchmod_rights, td); 2761 if ((error = namei(&nd)) != 0) 2762 return (error); 2763 NDFREE(&nd, NDF_ONLY_PNBUF); 2764 error = setfmode(td, td->td_ucred, nd.ni_vp, mode); 2765 vrele(nd.ni_vp); 2766 return (error); 2767 } 2768 2769 /* 2770 * Change mode of a file given a file descriptor. 2771 */ 2772 #ifndef _SYS_SYSPROTO_H_ 2773 struct fchmod_args { 2774 int fd; 2775 int mode; 2776 }; 2777 #endif 2778 int 2779 sys_fchmod(struct thread *td, struct fchmod_args *uap) 2780 { 2781 struct file *fp; 2782 int error; 2783 2784 AUDIT_ARG_FD(uap->fd); 2785 AUDIT_ARG_MODE(uap->mode); 2786 2787 error = fget(td, uap->fd, &cap_fchmod_rights, &fp); 2788 if (error != 0) 2789 return (error); 2790 error = fo_chmod(fp, uap->mode, td->td_ucred, td); 2791 fdrop(fp, td); 2792 return (error); 2793 } 2794 2795 /* 2796 * Common implementation for chown(), lchown(), and fchown() 2797 */ 2798 int 2799 setfown(struct thread *td, struct ucred *cred, struct vnode *vp, uid_t uid, 2800 gid_t gid) 2801 { 2802 struct mount *mp; 2803 struct vattr vattr; 2804 int error; 2805 2806 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2807 return (error); 2808 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2809 VATTR_NULL(&vattr); 2810 vattr.va_uid = uid; 2811 vattr.va_gid = gid; 2812 #ifdef MAC 2813 error = mac_vnode_check_setowner(cred, vp, vattr.va_uid, 2814 vattr.va_gid); 2815 if (error == 0) 2816 #endif 2817 error = VOP_SETATTR(vp, &vattr, cred); 2818 VOP_UNLOCK(vp, 0); 2819 vn_finished_write(mp); 2820 return (error); 2821 } 2822 2823 /* 2824 * Set ownership given a path name. 2825 */ 2826 #ifndef _SYS_SYSPROTO_H_ 2827 struct chown_args { 2828 char *path; 2829 int uid; 2830 int gid; 2831 }; 2832 #endif 2833 int 2834 sys_chown(struct thread *td, struct chown_args *uap) 2835 { 2836 2837 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->uid, 2838 uap->gid, 0)); 2839 } 2840 2841 #ifndef _SYS_SYSPROTO_H_ 2842 struct fchownat_args { 2843 int fd; 2844 const char * path; 2845 uid_t uid; 2846 gid_t gid; 2847 int flag; 2848 }; 2849 #endif 2850 int 2851 sys_fchownat(struct thread *td, struct fchownat_args *uap) 2852 { 2853 int flag; 2854 2855 flag = uap->flag; 2856 if (flag & ~AT_SYMLINK_NOFOLLOW) 2857 return (EINVAL); 2858 2859 return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid, 2860 uap->gid, uap->flag)); 2861 } 2862 2863 int 2864 kern_fchownat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2865 int uid, int gid, int flag) 2866 { 2867 struct nameidata nd; 2868 int error, follow; 2869 2870 AUDIT_ARG_OWNER(uid, gid); 2871 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2872 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2873 &cap_fchown_rights, td); 2874 2875 if ((error = namei(&nd)) != 0) 2876 return (error); 2877 NDFREE(&nd, NDF_ONLY_PNBUF); 2878 error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid); 2879 vrele(nd.ni_vp); 2880 return (error); 2881 } 2882 2883 /* 2884 * Set ownership given a path name, do not cross symlinks. 2885 */ 2886 #ifndef _SYS_SYSPROTO_H_ 2887 struct lchown_args { 2888 char *path; 2889 int uid; 2890 int gid; 2891 }; 2892 #endif 2893 int 2894 sys_lchown(struct thread *td, struct lchown_args *uap) 2895 { 2896 2897 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2898 uap->uid, uap->gid, AT_SYMLINK_NOFOLLOW)); 2899 } 2900 2901 /* 2902 * Set ownership given a file descriptor. 2903 */ 2904 #ifndef _SYS_SYSPROTO_H_ 2905 struct fchown_args { 2906 int fd; 2907 int uid; 2908 int gid; 2909 }; 2910 #endif 2911 int 2912 sys_fchown(struct thread *td, struct fchown_args *uap) 2913 { 2914 struct file *fp; 2915 int error; 2916 2917 AUDIT_ARG_FD(uap->fd); 2918 AUDIT_ARG_OWNER(uap->uid, uap->gid); 2919 error = fget(td, uap->fd, &cap_fchown_rights, &fp); 2920 if (error != 0) 2921 return (error); 2922 error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td); 2923 fdrop(fp, td); 2924 return (error); 2925 } 2926 2927 /* 2928 * Common implementation code for utimes(), lutimes(), and futimes(). 2929 */ 2930 static int 2931 getutimes(const struct timeval *usrtvp, enum uio_seg tvpseg, 2932 struct timespec *tsp) 2933 { 2934 struct timeval tv[2]; 2935 const struct timeval *tvp; 2936 int error; 2937 2938 if (usrtvp == NULL) { 2939 vfs_timestamp(&tsp[0]); 2940 tsp[1] = tsp[0]; 2941 } else { 2942 if (tvpseg == UIO_SYSSPACE) { 2943 tvp = usrtvp; 2944 } else { 2945 if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0) 2946 return (error); 2947 tvp = tv; 2948 } 2949 2950 if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 || 2951 tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000) 2952 return (EINVAL); 2953 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 2954 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 2955 } 2956 return (0); 2957 } 2958 2959 /* 2960 * Common implementation code for futimens(), utimensat(). 2961 */ 2962 #define UTIMENS_NULL 0x1 2963 #define UTIMENS_EXIT 0x2 2964 static int 2965 getutimens(const struct timespec *usrtsp, enum uio_seg tspseg, 2966 struct timespec *tsp, int *retflags) 2967 { 2968 struct timespec tsnow; 2969 int error; 2970 2971 vfs_timestamp(&tsnow); 2972 *retflags = 0; 2973 if (usrtsp == NULL) { 2974 tsp[0] = tsnow; 2975 tsp[1] = tsnow; 2976 *retflags |= UTIMENS_NULL; 2977 return (0); 2978 } 2979 if (tspseg == UIO_SYSSPACE) { 2980 tsp[0] = usrtsp[0]; 2981 tsp[1] = usrtsp[1]; 2982 } else if ((error = copyin(usrtsp, tsp, sizeof(*tsp) * 2)) != 0) 2983 return (error); 2984 if (tsp[0].tv_nsec == UTIME_OMIT && tsp[1].tv_nsec == UTIME_OMIT) 2985 *retflags |= UTIMENS_EXIT; 2986 if (tsp[0].tv_nsec == UTIME_NOW && tsp[1].tv_nsec == UTIME_NOW) 2987 *retflags |= UTIMENS_NULL; 2988 if (tsp[0].tv_nsec == UTIME_OMIT) 2989 tsp[0].tv_sec = VNOVAL; 2990 else if (tsp[0].tv_nsec == UTIME_NOW) 2991 tsp[0] = tsnow; 2992 else if (tsp[0].tv_nsec < 0 || tsp[0].tv_nsec >= 1000000000L) 2993 return (EINVAL); 2994 if (tsp[1].tv_nsec == UTIME_OMIT) 2995 tsp[1].tv_sec = VNOVAL; 2996 else if (tsp[1].tv_nsec == UTIME_NOW) 2997 tsp[1] = tsnow; 2998 else if (tsp[1].tv_nsec < 0 || tsp[1].tv_nsec >= 1000000000L) 2999 return (EINVAL); 3000 3001 return (0); 3002 } 3003 3004 /* 3005 * Common implementation code for utimes(), lutimes(), futimes(), futimens(), 3006 * and utimensat(). 3007 */ 3008 static int 3009 setutimes(struct thread *td, struct vnode *vp, const struct timespec *ts, 3010 int numtimes, int nullflag) 3011 { 3012 struct mount *mp; 3013 struct vattr vattr; 3014 int error, setbirthtime; 3015 3016 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 3017 return (error); 3018 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3019 setbirthtime = 0; 3020 if (numtimes < 3 && !VOP_GETATTR(vp, &vattr, td->td_ucred) && 3021 timespeccmp(&ts[1], &vattr.va_birthtime, < )) 3022 setbirthtime = 1; 3023 VATTR_NULL(&vattr); 3024 vattr.va_atime = ts[0]; 3025 vattr.va_mtime = ts[1]; 3026 if (setbirthtime) 3027 vattr.va_birthtime = ts[1]; 3028 if (numtimes > 2) 3029 vattr.va_birthtime = ts[2]; 3030 if (nullflag) 3031 vattr.va_vaflags |= VA_UTIMES_NULL; 3032 #ifdef MAC 3033 error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime, 3034 vattr.va_mtime); 3035 #endif 3036 if (error == 0) 3037 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3038 VOP_UNLOCK(vp, 0); 3039 vn_finished_write(mp); 3040 return (error); 3041 } 3042 3043 /* 3044 * Set the access and modification times of a file. 3045 */ 3046 #ifndef _SYS_SYSPROTO_H_ 3047 struct utimes_args { 3048 char *path; 3049 struct timeval *tptr; 3050 }; 3051 #endif 3052 int 3053 sys_utimes(struct thread *td, struct utimes_args *uap) 3054 { 3055 3056 return (kern_utimesat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3057 uap->tptr, UIO_USERSPACE)); 3058 } 3059 3060 #ifndef _SYS_SYSPROTO_H_ 3061 struct futimesat_args { 3062 int fd; 3063 const char * path; 3064 const struct timeval * times; 3065 }; 3066 #endif 3067 int 3068 sys_futimesat(struct thread *td, struct futimesat_args *uap) 3069 { 3070 3071 return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE, 3072 uap->times, UIO_USERSPACE)); 3073 } 3074 3075 int 3076 kern_utimesat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 3077 struct timeval *tptr, enum uio_seg tptrseg) 3078 { 3079 struct nameidata nd; 3080 struct timespec ts[2]; 3081 int error; 3082 3083 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3084 return (error); 3085 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 3086 &cap_futimes_rights, td); 3087 3088 if ((error = namei(&nd)) != 0) 3089 return (error); 3090 NDFREE(&nd, NDF_ONLY_PNBUF); 3091 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3092 vrele(nd.ni_vp); 3093 return (error); 3094 } 3095 3096 /* 3097 * Set the access and modification times of a file. 3098 */ 3099 #ifndef _SYS_SYSPROTO_H_ 3100 struct lutimes_args { 3101 char *path; 3102 struct timeval *tptr; 3103 }; 3104 #endif 3105 int 3106 sys_lutimes(struct thread *td, struct lutimes_args *uap) 3107 { 3108 3109 return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr, 3110 UIO_USERSPACE)); 3111 } 3112 3113 int 3114 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg, 3115 struct timeval *tptr, enum uio_seg tptrseg) 3116 { 3117 struct timespec ts[2]; 3118 struct nameidata nd; 3119 int error; 3120 3121 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3122 return (error); 3123 NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, pathseg, path, td); 3124 if ((error = namei(&nd)) != 0) 3125 return (error); 3126 NDFREE(&nd, NDF_ONLY_PNBUF); 3127 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3128 vrele(nd.ni_vp); 3129 return (error); 3130 } 3131 3132 /* 3133 * Set the access and modification times of a file. 3134 */ 3135 #ifndef _SYS_SYSPROTO_H_ 3136 struct futimes_args { 3137 int fd; 3138 struct timeval *tptr; 3139 }; 3140 #endif 3141 int 3142 sys_futimes(struct thread *td, struct futimes_args *uap) 3143 { 3144 3145 return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE)); 3146 } 3147 3148 int 3149 kern_futimes(struct thread *td, int fd, struct timeval *tptr, 3150 enum uio_seg tptrseg) 3151 { 3152 struct timespec ts[2]; 3153 struct file *fp; 3154 int error; 3155 3156 AUDIT_ARG_FD(fd); 3157 error = getutimes(tptr, tptrseg, ts); 3158 if (error != 0) 3159 return (error); 3160 error = getvnode(td, fd, &cap_futimes_rights, &fp); 3161 if (error != 0) 3162 return (error); 3163 #ifdef AUDIT 3164 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3165 AUDIT_ARG_VNODE1(fp->f_vnode); 3166 VOP_UNLOCK(fp->f_vnode, 0); 3167 #endif 3168 error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL); 3169 fdrop(fp, td); 3170 return (error); 3171 } 3172 3173 int 3174 sys_futimens(struct thread *td, struct futimens_args *uap) 3175 { 3176 3177 return (kern_futimens(td, uap->fd, uap->times, UIO_USERSPACE)); 3178 } 3179 3180 int 3181 kern_futimens(struct thread *td, int fd, struct timespec *tptr, 3182 enum uio_seg tptrseg) 3183 { 3184 struct timespec ts[2]; 3185 struct file *fp; 3186 int error, flags; 3187 3188 AUDIT_ARG_FD(fd); 3189 error = getutimens(tptr, tptrseg, ts, &flags); 3190 if (error != 0) 3191 return (error); 3192 if (flags & UTIMENS_EXIT) 3193 return (0); 3194 error = getvnode(td, fd, &cap_futimes_rights, &fp); 3195 if (error != 0) 3196 return (error); 3197 #ifdef AUDIT 3198 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3199 AUDIT_ARG_VNODE1(fp->f_vnode); 3200 VOP_UNLOCK(fp->f_vnode, 0); 3201 #endif 3202 error = setutimes(td, fp->f_vnode, ts, 2, flags & UTIMENS_NULL); 3203 fdrop(fp, td); 3204 return (error); 3205 } 3206 3207 int 3208 sys_utimensat(struct thread *td, struct utimensat_args *uap) 3209 { 3210 3211 return (kern_utimensat(td, uap->fd, uap->path, UIO_USERSPACE, 3212 uap->times, UIO_USERSPACE, uap->flag)); 3213 } 3214 3215 int 3216 kern_utimensat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 3217 struct timespec *tptr, enum uio_seg tptrseg, int flag) 3218 { 3219 struct nameidata nd; 3220 struct timespec ts[2]; 3221 int error, flags; 3222 3223 if (flag & ~AT_SYMLINK_NOFOLLOW) 3224 return (EINVAL); 3225 3226 if ((error = getutimens(tptr, tptrseg, ts, &flags)) != 0) 3227 return (error); 3228 NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : 3229 FOLLOW) | AUDITVNODE1, pathseg, path, fd, 3230 &cap_futimes_rights, td); 3231 if ((error = namei(&nd)) != 0) 3232 return (error); 3233 /* 3234 * We are allowed to call namei() regardless of 2xUTIME_OMIT. 3235 * POSIX states: 3236 * "If both tv_nsec fields are UTIME_OMIT... EACCESS may be detected." 3237 * "Search permission is denied by a component of the path prefix." 3238 */ 3239 NDFREE(&nd, NDF_ONLY_PNBUF); 3240 if ((flags & UTIMENS_EXIT) == 0) 3241 error = setutimes(td, nd.ni_vp, ts, 2, flags & UTIMENS_NULL); 3242 vrele(nd.ni_vp); 3243 return (error); 3244 } 3245 3246 /* 3247 * Truncate a file given its path name. 3248 */ 3249 #ifndef _SYS_SYSPROTO_H_ 3250 struct truncate_args { 3251 char *path; 3252 int pad; 3253 off_t length; 3254 }; 3255 #endif 3256 int 3257 sys_truncate(struct thread *td, struct truncate_args *uap) 3258 { 3259 3260 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3261 } 3262 3263 int 3264 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length) 3265 { 3266 struct mount *mp; 3267 struct vnode *vp; 3268 void *rl_cookie; 3269 struct vattr vattr; 3270 struct nameidata nd; 3271 int error; 3272 3273 if (length < 0) 3274 return(EINVAL); 3275 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, td); 3276 if ((error = namei(&nd)) != 0) 3277 return (error); 3278 vp = nd.ni_vp; 3279 rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); 3280 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { 3281 vn_rangelock_unlock(vp, rl_cookie); 3282 vrele(vp); 3283 return (error); 3284 } 3285 NDFREE(&nd, NDF_ONLY_PNBUF); 3286 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3287 if (vp->v_type == VDIR) 3288 error = EISDIR; 3289 #ifdef MAC 3290 else if ((error = mac_vnode_check_write(td->td_ucred, NOCRED, vp))) { 3291 } 3292 #endif 3293 else if ((error = vn_writechk(vp)) == 0 && 3294 (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) { 3295 VATTR_NULL(&vattr); 3296 vattr.va_size = length; 3297 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3298 } 3299 VOP_UNLOCK(vp, 0); 3300 vn_finished_write(mp); 3301 vn_rangelock_unlock(vp, rl_cookie); 3302 vrele(vp); 3303 return (error); 3304 } 3305 3306 #if defined(COMPAT_43) 3307 /* 3308 * Truncate a file given its path name. 3309 */ 3310 #ifndef _SYS_SYSPROTO_H_ 3311 struct otruncate_args { 3312 char *path; 3313 long length; 3314 }; 3315 #endif 3316 int 3317 otruncate(struct thread *td, struct otruncate_args *uap) 3318 { 3319 3320 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3321 } 3322 #endif /* COMPAT_43 */ 3323 3324 #if defined(COMPAT_FREEBSD6) 3325 /* Versions with the pad argument */ 3326 int 3327 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap) 3328 { 3329 3330 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3331 } 3332 3333 int 3334 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap) 3335 { 3336 3337 return (kern_ftruncate(td, uap->fd, uap->length)); 3338 } 3339 #endif 3340 3341 int 3342 kern_fsync(struct thread *td, int fd, bool fullsync) 3343 { 3344 struct vnode *vp; 3345 struct mount *mp; 3346 struct file *fp; 3347 int error, lock_flags; 3348 3349 AUDIT_ARG_FD(fd); 3350 error = getvnode(td, fd, &cap_fsync_rights, &fp); 3351 if (error != 0) 3352 return (error); 3353 vp = fp->f_vnode; 3354 #if 0 3355 if (!fullsync) 3356 /* XXXKIB: compete outstanding aio writes */; 3357 #endif 3358 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 3359 if (error != 0) 3360 goto drop; 3361 if (MNT_SHARED_WRITES(mp) || 3362 ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) { 3363 lock_flags = LK_SHARED; 3364 } else { 3365 lock_flags = LK_EXCLUSIVE; 3366 } 3367 vn_lock(vp, lock_flags | LK_RETRY); 3368 AUDIT_ARG_VNODE1(vp); 3369 if (vp->v_object != NULL) { 3370 VM_OBJECT_WLOCK(vp->v_object); 3371 vm_object_page_clean(vp->v_object, 0, 0, 0); 3372 VM_OBJECT_WUNLOCK(vp->v_object); 3373 } 3374 error = fullsync ? VOP_FSYNC(vp, MNT_WAIT, td) : VOP_FDATASYNC(vp, td); 3375 VOP_UNLOCK(vp, 0); 3376 vn_finished_write(mp); 3377 drop: 3378 fdrop(fp, td); 3379 return (error); 3380 } 3381 3382 /* 3383 * Sync an open file. 3384 */ 3385 #ifndef _SYS_SYSPROTO_H_ 3386 struct fsync_args { 3387 int fd; 3388 }; 3389 #endif 3390 int 3391 sys_fsync(struct thread *td, struct fsync_args *uap) 3392 { 3393 3394 return (kern_fsync(td, uap->fd, true)); 3395 } 3396 3397 int 3398 sys_fdatasync(struct thread *td, struct fdatasync_args *uap) 3399 { 3400 3401 return (kern_fsync(td, uap->fd, false)); 3402 } 3403 3404 /* 3405 * Rename files. Source and destination must either both be directories, or 3406 * both not be directories. If target is a directory, it must be empty. 3407 */ 3408 #ifndef _SYS_SYSPROTO_H_ 3409 struct rename_args { 3410 char *from; 3411 char *to; 3412 }; 3413 #endif 3414 int 3415 sys_rename(struct thread *td, struct rename_args *uap) 3416 { 3417 3418 return (kern_renameat(td, AT_FDCWD, uap->from, AT_FDCWD, 3419 uap->to, UIO_USERSPACE)); 3420 } 3421 3422 #ifndef _SYS_SYSPROTO_H_ 3423 struct renameat_args { 3424 int oldfd; 3425 char *old; 3426 int newfd; 3427 char *new; 3428 }; 3429 #endif 3430 int 3431 sys_renameat(struct thread *td, struct renameat_args *uap) 3432 { 3433 3434 return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new, 3435 UIO_USERSPACE)); 3436 } 3437 3438 int 3439 kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new, 3440 enum uio_seg pathseg) 3441 { 3442 struct mount *mp = NULL; 3443 struct vnode *tvp, *fvp, *tdvp; 3444 struct nameidata fromnd, tond; 3445 int error; 3446 3447 again: 3448 bwillwrite(); 3449 #ifdef MAC 3450 NDINIT_ATRIGHTS(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | 3451 AUDITVNODE1, pathseg, old, oldfd, 3452 &cap_renameat_source_rights, td); 3453 #else 3454 NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | AUDITVNODE1, 3455 pathseg, old, oldfd, 3456 &cap_renameat_source_rights, td); 3457 #endif 3458 3459 if ((error = namei(&fromnd)) != 0) 3460 return (error); 3461 #ifdef MAC 3462 error = mac_vnode_check_rename_from(td->td_ucred, fromnd.ni_dvp, 3463 fromnd.ni_vp, &fromnd.ni_cnd); 3464 VOP_UNLOCK(fromnd.ni_dvp, 0); 3465 if (fromnd.ni_dvp != fromnd.ni_vp) 3466 VOP_UNLOCK(fromnd.ni_vp, 0); 3467 #endif 3468 fvp = fromnd.ni_vp; 3469 NDINIT_ATRIGHTS(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | 3470 SAVESTART | AUDITVNODE2, pathseg, new, newfd, 3471 &cap_renameat_target_rights, td); 3472 if (fromnd.ni_vp->v_type == VDIR) 3473 tond.ni_cnd.cn_flags |= WILLBEDIR; 3474 if ((error = namei(&tond)) != 0) { 3475 /* Translate error code for rename("dir1", "dir2/."). */ 3476 if (error == EISDIR && fvp->v_type == VDIR) 3477 error = EINVAL; 3478 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3479 vrele(fromnd.ni_dvp); 3480 vrele(fvp); 3481 goto out1; 3482 } 3483 tdvp = tond.ni_dvp; 3484 tvp = tond.ni_vp; 3485 error = vn_start_write(fvp, &mp, V_NOWAIT); 3486 if (error != 0) { 3487 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3488 NDFREE(&tond, NDF_ONLY_PNBUF); 3489 if (tvp != NULL) 3490 vput(tvp); 3491 if (tdvp == tvp) 3492 vrele(tdvp); 3493 else 3494 vput(tdvp); 3495 vrele(fromnd.ni_dvp); 3496 vrele(fvp); 3497 vrele(tond.ni_startdir); 3498 if (fromnd.ni_startdir != NULL) 3499 vrele(fromnd.ni_startdir); 3500 error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH); 3501 if (error != 0) 3502 return (error); 3503 goto again; 3504 } 3505 if (tvp != NULL) { 3506 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3507 error = ENOTDIR; 3508 goto out; 3509 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3510 error = EISDIR; 3511 goto out; 3512 } 3513 #ifdef CAPABILITIES 3514 if (newfd != AT_FDCWD) { 3515 /* 3516 * If the target already exists we require CAP_UNLINKAT 3517 * from 'newfd'. 3518 */ 3519 error = cap_check(&tond.ni_filecaps.fc_rights, 3520 &cap_unlinkat_rights); 3521 if (error != 0) 3522 goto out; 3523 } 3524 #endif 3525 } 3526 if (fvp == tdvp) { 3527 error = EINVAL; 3528 goto out; 3529 } 3530 /* 3531 * If the source is the same as the destination (that is, if they 3532 * are links to the same vnode), then there is nothing to do. 3533 */ 3534 if (fvp == tvp) 3535 error = -1; 3536 #ifdef MAC 3537 else 3538 error = mac_vnode_check_rename_to(td->td_ucred, tdvp, 3539 tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd); 3540 #endif 3541 out: 3542 if (error == 0) { 3543 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3544 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); 3545 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3546 NDFREE(&tond, NDF_ONLY_PNBUF); 3547 } else { 3548 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3549 NDFREE(&tond, NDF_ONLY_PNBUF); 3550 if (tvp != NULL) 3551 vput(tvp); 3552 if (tdvp == tvp) 3553 vrele(tdvp); 3554 else 3555 vput(tdvp); 3556 vrele(fromnd.ni_dvp); 3557 vrele(fvp); 3558 } 3559 vrele(tond.ni_startdir); 3560 vn_finished_write(mp); 3561 out1: 3562 if (fromnd.ni_startdir) 3563 vrele(fromnd.ni_startdir); 3564 if (error == -1) 3565 return (0); 3566 return (error); 3567 } 3568 3569 /* 3570 * Make a directory file. 3571 */ 3572 #ifndef _SYS_SYSPROTO_H_ 3573 struct mkdir_args { 3574 char *path; 3575 int mode; 3576 }; 3577 #endif 3578 int 3579 sys_mkdir(struct thread *td, struct mkdir_args *uap) 3580 { 3581 3582 return (kern_mkdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3583 uap->mode)); 3584 } 3585 3586 #ifndef _SYS_SYSPROTO_H_ 3587 struct mkdirat_args { 3588 int fd; 3589 char *path; 3590 mode_t mode; 3591 }; 3592 #endif 3593 int 3594 sys_mkdirat(struct thread *td, struct mkdirat_args *uap) 3595 { 3596 3597 return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode)); 3598 } 3599 3600 int 3601 kern_mkdirat(struct thread *td, int fd, char *path, enum uio_seg segflg, 3602 int mode) 3603 { 3604 struct mount *mp; 3605 struct vnode *vp; 3606 struct vattr vattr; 3607 struct nameidata nd; 3608 int error; 3609 3610 AUDIT_ARG_MODE(mode); 3611 restart: 3612 bwillwrite(); 3613 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 3614 NOCACHE, segflg, path, fd, &cap_mkdirat_rights, 3615 td); 3616 nd.ni_cnd.cn_flags |= WILLBEDIR; 3617 if ((error = namei(&nd)) != 0) 3618 return (error); 3619 vp = nd.ni_vp; 3620 if (vp != NULL) { 3621 NDFREE(&nd, NDF_ONLY_PNBUF); 3622 /* 3623 * XXX namei called with LOCKPARENT but not LOCKLEAF has 3624 * the strange behaviour of leaving the vnode unlocked 3625 * if the target is the same vnode as the parent. 3626 */ 3627 if (vp == nd.ni_dvp) 3628 vrele(nd.ni_dvp); 3629 else 3630 vput(nd.ni_dvp); 3631 vrele(vp); 3632 return (EEXIST); 3633 } 3634 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3635 NDFREE(&nd, NDF_ONLY_PNBUF); 3636 vput(nd.ni_dvp); 3637 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3638 return (error); 3639 goto restart; 3640 } 3641 VATTR_NULL(&vattr); 3642 vattr.va_type = VDIR; 3643 vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask; 3644 #ifdef MAC 3645 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 3646 &vattr); 3647 if (error != 0) 3648 goto out; 3649 #endif 3650 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 3651 #ifdef MAC 3652 out: 3653 #endif 3654 NDFREE(&nd, NDF_ONLY_PNBUF); 3655 vput(nd.ni_dvp); 3656 if (error == 0) 3657 vput(nd.ni_vp); 3658 vn_finished_write(mp); 3659 return (error); 3660 } 3661 3662 /* 3663 * Remove a directory file. 3664 */ 3665 #ifndef _SYS_SYSPROTO_H_ 3666 struct rmdir_args { 3667 char *path; 3668 }; 3669 #endif 3670 int 3671 sys_rmdir(struct thread *td, struct rmdir_args *uap) 3672 { 3673 3674 return (kern_rmdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE)); 3675 } 3676 3677 int 3678 kern_rmdirat(struct thread *td, int fd, char *path, enum uio_seg pathseg) 3679 { 3680 struct mount *mp; 3681 struct vnode *vp; 3682 struct nameidata nd; 3683 int error; 3684 3685 restart: 3686 bwillwrite(); 3687 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 3688 pathseg, path, fd, &cap_unlinkat_rights, td); 3689 if ((error = namei(&nd)) != 0) 3690 return (error); 3691 vp = nd.ni_vp; 3692 if (vp->v_type != VDIR) { 3693 error = ENOTDIR; 3694 goto out; 3695 } 3696 /* 3697 * No rmdir "." please. 3698 */ 3699 if (nd.ni_dvp == vp) { 3700 error = EINVAL; 3701 goto out; 3702 } 3703 /* 3704 * The root of a mounted filesystem cannot be deleted. 3705 */ 3706 if (vp->v_vflag & VV_ROOT) { 3707 error = EBUSY; 3708 goto out; 3709 } 3710 #ifdef MAC 3711 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 3712 &nd.ni_cnd); 3713 if (error != 0) 3714 goto out; 3715 #endif 3716 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3717 NDFREE(&nd, NDF_ONLY_PNBUF); 3718 vput(vp); 3719 if (nd.ni_dvp == vp) 3720 vrele(nd.ni_dvp); 3721 else 3722 vput(nd.ni_dvp); 3723 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3724 return (error); 3725 goto restart; 3726 } 3727 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 3728 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 3729 vn_finished_write(mp); 3730 out: 3731 NDFREE(&nd, NDF_ONLY_PNBUF); 3732 vput(vp); 3733 if (nd.ni_dvp == vp) 3734 vrele(nd.ni_dvp); 3735 else 3736 vput(nd.ni_dvp); 3737 return (error); 3738 } 3739 3740 #if defined(COMPAT_43) || defined(COMPAT_FREEBSD11) 3741 int 3742 freebsd11_kern_getdirentries(struct thread *td, int fd, char *ubuf, u_int count, 3743 long *basep, void (*func)(struct freebsd11_dirent *)) 3744 { 3745 struct freebsd11_dirent dstdp; 3746 struct dirent *dp, *edp; 3747 char *dirbuf; 3748 off_t base; 3749 ssize_t resid, ucount; 3750 int error; 3751 3752 /* XXX arbitrary sanity limit on `count'. */ 3753 count = min(count, 64 * 1024); 3754 3755 dirbuf = malloc(count, M_TEMP, M_WAITOK); 3756 3757 error = kern_getdirentries(td, fd, dirbuf, count, &base, &resid, 3758 UIO_SYSSPACE); 3759 if (error != 0) 3760 goto done; 3761 if (basep != NULL) 3762 *basep = base; 3763 3764 ucount = 0; 3765 for (dp = (struct dirent *)dirbuf, 3766 edp = (struct dirent *)&dirbuf[count - resid]; 3767 ucount < count && dp < edp; ) { 3768 if (dp->d_reclen == 0) 3769 break; 3770 MPASS(dp->d_reclen >= _GENERIC_DIRLEN(0)); 3771 if (dp->d_namlen >= sizeof(dstdp.d_name)) 3772 continue; 3773 dstdp.d_type = dp->d_type; 3774 dstdp.d_namlen = dp->d_namlen; 3775 dstdp.d_fileno = dp->d_fileno; /* truncate */ 3776 if (dstdp.d_fileno != dp->d_fileno) { 3777 switch (ino64_trunc_error) { 3778 default: 3779 case 0: 3780 break; 3781 case 1: 3782 error = EOVERFLOW; 3783 goto done; 3784 case 2: 3785 dstdp.d_fileno = UINT32_MAX; 3786 break; 3787 } 3788 } 3789 dstdp.d_reclen = sizeof(dstdp) - sizeof(dstdp.d_name) + 3790 ((dp->d_namlen + 1 + 3) &~ 3); 3791 bcopy(dp->d_name, dstdp.d_name, dstdp.d_namlen); 3792 bzero(dstdp.d_name + dstdp.d_namlen, 3793 dstdp.d_reclen - offsetof(struct freebsd11_dirent, d_name) - 3794 dstdp.d_namlen); 3795 MPASS(dstdp.d_reclen <= dp->d_reclen); 3796 MPASS(ucount + dstdp.d_reclen <= count); 3797 if (func != NULL) 3798 func(&dstdp); 3799 error = copyout(&dstdp, ubuf + ucount, dstdp.d_reclen); 3800 if (error != 0) 3801 break; 3802 dp = (struct dirent *)((char *)dp + dp->d_reclen); 3803 ucount += dstdp.d_reclen; 3804 } 3805 3806 done: 3807 free(dirbuf, M_TEMP); 3808 if (error == 0) 3809 td->td_retval[0] = ucount; 3810 return (error); 3811 } 3812 #endif /* COMPAT */ 3813 3814 #ifdef COMPAT_43 3815 static void 3816 ogetdirentries_cvt(struct freebsd11_dirent *dp) 3817 { 3818 #if (BYTE_ORDER == LITTLE_ENDIAN) 3819 /* 3820 * The expected low byte of dp->d_namlen is our dp->d_type. 3821 * The high MBZ byte of dp->d_namlen is our dp->d_namlen. 3822 */ 3823 dp->d_type = dp->d_namlen; 3824 dp->d_namlen = 0; 3825 #else 3826 /* 3827 * The dp->d_type is the high byte of the expected dp->d_namlen, 3828 * so must be zero'ed. 3829 */ 3830 dp->d_type = 0; 3831 #endif 3832 } 3833 3834 /* 3835 * Read a block of directory entries in a filesystem independent format. 3836 */ 3837 #ifndef _SYS_SYSPROTO_H_ 3838 struct ogetdirentries_args { 3839 int fd; 3840 char *buf; 3841 u_int count; 3842 long *basep; 3843 }; 3844 #endif 3845 int 3846 ogetdirentries(struct thread *td, struct ogetdirentries_args *uap) 3847 { 3848 long loff; 3849 int error; 3850 3851 error = kern_ogetdirentries(td, uap, &loff); 3852 if (error == 0) 3853 error = copyout(&loff, uap->basep, sizeof(long)); 3854 return (error); 3855 } 3856 3857 int 3858 kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap, 3859 long *ploff) 3860 { 3861 long base; 3862 int error; 3863 3864 /* XXX arbitrary sanity limit on `count'. */ 3865 if (uap->count > 64 * 1024) 3866 return (EINVAL); 3867 3868 error = freebsd11_kern_getdirentries(td, uap->fd, uap->buf, uap->count, 3869 &base, ogetdirentries_cvt); 3870 3871 if (error == 0 && uap->basep != NULL) 3872 error = copyout(&base, uap->basep, sizeof(long)); 3873 3874 return (error); 3875 } 3876 #endif /* COMPAT_43 */ 3877 3878 #if defined(COMPAT_FREEBSD11) 3879 #ifndef _SYS_SYSPROTO_H_ 3880 struct freebsd11_getdirentries_args { 3881 int fd; 3882 char *buf; 3883 u_int count; 3884 long *basep; 3885 }; 3886 #endif 3887 int 3888 freebsd11_getdirentries(struct thread *td, 3889 struct freebsd11_getdirentries_args *uap) 3890 { 3891 long base; 3892 int error; 3893 3894 error = freebsd11_kern_getdirentries(td, uap->fd, uap->buf, uap->count, 3895 &base, NULL); 3896 3897 if (error == 0 && uap->basep != NULL) 3898 error = copyout(&base, uap->basep, sizeof(long)); 3899 return (error); 3900 } 3901 3902 int 3903 freebsd11_getdents(struct thread *td, struct freebsd11_getdents_args *uap) 3904 { 3905 struct freebsd11_getdirentries_args ap; 3906 3907 ap.fd = uap->fd; 3908 ap.buf = uap->buf; 3909 ap.count = uap->count; 3910 ap.basep = NULL; 3911 return (freebsd11_getdirentries(td, &ap)); 3912 } 3913 #endif /* COMPAT_FREEBSD11 */ 3914 3915 /* 3916 * Read a block of directory entries in a filesystem independent format. 3917 */ 3918 int 3919 sys_getdirentries(struct thread *td, struct getdirentries_args *uap) 3920 { 3921 off_t base; 3922 int error; 3923 3924 error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base, 3925 NULL, UIO_USERSPACE); 3926 if (error != 0) 3927 return (error); 3928 if (uap->basep != NULL) 3929 error = copyout(&base, uap->basep, sizeof(off_t)); 3930 return (error); 3931 } 3932 3933 int 3934 kern_getdirentries(struct thread *td, int fd, char *buf, size_t count, 3935 off_t *basep, ssize_t *residp, enum uio_seg bufseg) 3936 { 3937 struct vnode *vp; 3938 struct file *fp; 3939 struct uio auio; 3940 struct iovec aiov; 3941 off_t loff; 3942 int error, eofflag; 3943 off_t foffset; 3944 3945 AUDIT_ARG_FD(fd); 3946 if (count > IOSIZE_MAX) 3947 return (EINVAL); 3948 auio.uio_resid = count; 3949 error = getvnode(td, fd, &cap_read_rights, &fp); 3950 if (error != 0) 3951 return (error); 3952 if ((fp->f_flag & FREAD) == 0) { 3953 fdrop(fp, td); 3954 return (EBADF); 3955 } 3956 vp = fp->f_vnode; 3957 foffset = foffset_lock(fp, 0); 3958 unionread: 3959 if (vp->v_type != VDIR) { 3960 error = EINVAL; 3961 goto fail; 3962 } 3963 aiov.iov_base = buf; 3964 aiov.iov_len = count; 3965 auio.uio_iov = &aiov; 3966 auio.uio_iovcnt = 1; 3967 auio.uio_rw = UIO_READ; 3968 auio.uio_segflg = bufseg; 3969 auio.uio_td = td; 3970 vn_lock(vp, LK_SHARED | LK_RETRY); 3971 AUDIT_ARG_VNODE1(vp); 3972 loff = auio.uio_offset = foffset; 3973 #ifdef MAC 3974 error = mac_vnode_check_readdir(td->td_ucred, vp); 3975 if (error == 0) 3976 #endif 3977 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, 3978 NULL); 3979 foffset = auio.uio_offset; 3980 if (error != 0) { 3981 VOP_UNLOCK(vp, 0); 3982 goto fail; 3983 } 3984 if (count == auio.uio_resid && 3985 (vp->v_vflag & VV_ROOT) && 3986 (vp->v_mount->mnt_flag & MNT_UNION)) { 3987 struct vnode *tvp = vp; 3988 3989 vp = vp->v_mount->mnt_vnodecovered; 3990 VREF(vp); 3991 fp->f_vnode = vp; 3992 fp->f_data = vp; 3993 foffset = 0; 3994 vput(tvp); 3995 goto unionread; 3996 } 3997 VOP_UNLOCK(vp, 0); 3998 *basep = loff; 3999 if (residp != NULL) 4000 *residp = auio.uio_resid; 4001 td->td_retval[0] = count - auio.uio_resid; 4002 fail: 4003 foffset_unlock(fp, foffset, 0); 4004 fdrop(fp, td); 4005 return (error); 4006 } 4007 4008 /* 4009 * Set the mode mask for creation of filesystem nodes. 4010 */ 4011 #ifndef _SYS_SYSPROTO_H_ 4012 struct umask_args { 4013 int newmask; 4014 }; 4015 #endif 4016 int 4017 sys_umask(struct thread *td, struct umask_args *uap) 4018 { 4019 struct filedesc *fdp; 4020 4021 fdp = td->td_proc->p_fd; 4022 FILEDESC_XLOCK(fdp); 4023 td->td_retval[0] = fdp->fd_cmask; 4024 fdp->fd_cmask = uap->newmask & ALLPERMS; 4025 FILEDESC_XUNLOCK(fdp); 4026 return (0); 4027 } 4028 4029 /* 4030 * Void all references to file by ripping underlying filesystem away from 4031 * vnode. 4032 */ 4033 #ifndef _SYS_SYSPROTO_H_ 4034 struct revoke_args { 4035 char *path; 4036 }; 4037 #endif 4038 int 4039 sys_revoke(struct thread *td, struct revoke_args *uap) 4040 { 4041 struct vnode *vp; 4042 struct vattr vattr; 4043 struct nameidata nd; 4044 int error; 4045 4046 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4047 uap->path, td); 4048 if ((error = namei(&nd)) != 0) 4049 return (error); 4050 vp = nd.ni_vp; 4051 NDFREE(&nd, NDF_ONLY_PNBUF); 4052 if (vp->v_type != VCHR || vp->v_rdev == NULL) { 4053 error = EINVAL; 4054 goto out; 4055 } 4056 #ifdef MAC 4057 error = mac_vnode_check_revoke(td->td_ucred, vp); 4058 if (error != 0) 4059 goto out; 4060 #endif 4061 error = VOP_GETATTR(vp, &vattr, td->td_ucred); 4062 if (error != 0) 4063 goto out; 4064 if (td->td_ucred->cr_uid != vattr.va_uid) { 4065 error = priv_check(td, PRIV_VFS_ADMIN); 4066 if (error != 0) 4067 goto out; 4068 } 4069 if (vcount(vp) > 1) 4070 VOP_REVOKE(vp, REVOKEALL); 4071 out: 4072 vput(vp); 4073 return (error); 4074 } 4075 4076 /* 4077 * Convert a user file descriptor to a kernel file entry and check that, if it 4078 * is a capability, the correct rights are present. A reference on the file 4079 * entry is held upon returning. 4080 */ 4081 int 4082 getvnode(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp) 4083 { 4084 struct file *fp; 4085 int error; 4086 4087 error = fget_unlocked(td->td_proc->p_fd, fd, rightsp, &fp, NULL); 4088 if (error != 0) 4089 return (error); 4090 4091 /* 4092 * The file could be not of the vnode type, or it may be not 4093 * yet fully initialized, in which case the f_vnode pointer 4094 * may be set, but f_ops is still badfileops. E.g., 4095 * devfs_open() transiently create such situation to 4096 * facilitate csw d_fdopen(). 4097 * 4098 * Dupfdopen() handling in kern_openat() installs the 4099 * half-baked file into the process descriptor table, allowing 4100 * other thread to dereference it. Guard against the race by 4101 * checking f_ops. 4102 */ 4103 if (fp->f_vnode == NULL || fp->f_ops == &badfileops) { 4104 fdrop(fp, td); 4105 return (EINVAL); 4106 } 4107 *fpp = fp; 4108 return (0); 4109 } 4110 4111 4112 /* 4113 * Get an (NFS) file handle. 4114 */ 4115 #ifndef _SYS_SYSPROTO_H_ 4116 struct lgetfh_args { 4117 char *fname; 4118 fhandle_t *fhp; 4119 }; 4120 #endif 4121 int 4122 sys_lgetfh(struct thread *td, struct lgetfh_args *uap) 4123 { 4124 struct nameidata nd; 4125 fhandle_t fh; 4126 struct vnode *vp; 4127 int error; 4128 4129 error = priv_check(td, PRIV_VFS_GETFH); 4130 if (error != 0) 4131 return (error); 4132 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4133 uap->fname, td); 4134 error = namei(&nd); 4135 if (error != 0) 4136 return (error); 4137 NDFREE(&nd, NDF_ONLY_PNBUF); 4138 vp = nd.ni_vp; 4139 bzero(&fh, sizeof(fh)); 4140 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4141 error = VOP_VPTOFH(vp, &fh.fh_fid); 4142 vput(vp); 4143 if (error == 0) 4144 error = copyout(&fh, uap->fhp, sizeof (fh)); 4145 return (error); 4146 } 4147 4148 #ifndef _SYS_SYSPROTO_H_ 4149 struct getfh_args { 4150 char *fname; 4151 fhandle_t *fhp; 4152 }; 4153 #endif 4154 int 4155 sys_getfh(struct thread *td, struct getfh_args *uap) 4156 { 4157 struct nameidata nd; 4158 fhandle_t fh; 4159 struct vnode *vp; 4160 int error; 4161 4162 error = priv_check(td, PRIV_VFS_GETFH); 4163 if (error != 0) 4164 return (error); 4165 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4166 uap->fname, td); 4167 error = namei(&nd); 4168 if (error != 0) 4169 return (error); 4170 NDFREE(&nd, NDF_ONLY_PNBUF); 4171 vp = nd.ni_vp; 4172 bzero(&fh, sizeof(fh)); 4173 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4174 error = VOP_VPTOFH(vp, &fh.fh_fid); 4175 vput(vp); 4176 if (error == 0) 4177 error = copyout(&fh, uap->fhp, sizeof (fh)); 4178 return (error); 4179 } 4180 4181 /* 4182 * syscall for the rpc.lockd to use to translate a NFS file handle into an 4183 * open descriptor. 4184 * 4185 * warning: do not remove the priv_check() call or this becomes one giant 4186 * security hole. 4187 */ 4188 #ifndef _SYS_SYSPROTO_H_ 4189 struct fhopen_args { 4190 const struct fhandle *u_fhp; 4191 int flags; 4192 }; 4193 #endif 4194 int 4195 sys_fhopen(struct thread *td, struct fhopen_args *uap) 4196 { 4197 struct mount *mp; 4198 struct vnode *vp; 4199 struct fhandle fhp; 4200 struct file *fp; 4201 int fmode, error; 4202 int indx; 4203 4204 error = priv_check(td, PRIV_VFS_FHOPEN); 4205 if (error != 0) 4206 return (error); 4207 indx = -1; 4208 fmode = FFLAGS(uap->flags); 4209 /* why not allow a non-read/write open for our lockd? */ 4210 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 4211 return (EINVAL); 4212 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 4213 if (error != 0) 4214 return(error); 4215 /* find the mount point */ 4216 mp = vfs_busyfs(&fhp.fh_fsid); 4217 if (mp == NULL) 4218 return (ESTALE); 4219 /* now give me my vnode, it gets returned to me locked */ 4220 error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp); 4221 vfs_unbusy(mp); 4222 if (error != 0) 4223 return (error); 4224 4225 error = falloc_noinstall(td, &fp); 4226 if (error != 0) { 4227 vput(vp); 4228 return (error); 4229 } 4230 /* 4231 * An extra reference on `fp' has been held for us by 4232 * falloc_noinstall(). 4233 */ 4234 4235 #ifdef INVARIANTS 4236 td->td_dupfd = -1; 4237 #endif 4238 error = vn_open_vnode(vp, fmode, td->td_ucred, td, fp); 4239 if (error != 0) { 4240 KASSERT(fp->f_ops == &badfileops, 4241 ("VOP_OPEN in fhopen() set f_ops")); 4242 KASSERT(td->td_dupfd < 0, 4243 ("fhopen() encountered fdopen()")); 4244 4245 vput(vp); 4246 goto bad; 4247 } 4248 #ifdef INVARIANTS 4249 td->td_dupfd = 0; 4250 #endif 4251 fp->f_vnode = vp; 4252 fp->f_seqcount = 1; 4253 finit(fp, (fmode & FMASK) | (fp->f_flag & FHASLOCK), DTYPE_VNODE, vp, 4254 &vnops); 4255 VOP_UNLOCK(vp, 0); 4256 if ((fmode & O_TRUNC) != 0) { 4257 error = fo_truncate(fp, 0, td->td_ucred, td); 4258 if (error != 0) 4259 goto bad; 4260 } 4261 4262 error = finstall(td, fp, &indx, fmode, NULL); 4263 bad: 4264 fdrop(fp, td); 4265 td->td_retval[0] = indx; 4266 return (error); 4267 } 4268 4269 /* 4270 * Stat an (NFS) file handle. 4271 */ 4272 #ifndef _SYS_SYSPROTO_H_ 4273 struct fhstat_args { 4274 struct fhandle *u_fhp; 4275 struct stat *sb; 4276 }; 4277 #endif 4278 int 4279 sys_fhstat(struct thread *td, struct fhstat_args *uap) 4280 { 4281 struct stat sb; 4282 struct fhandle fh; 4283 int error; 4284 4285 error = copyin(uap->u_fhp, &fh, sizeof(fh)); 4286 if (error != 0) 4287 return (error); 4288 error = kern_fhstat(td, fh, &sb); 4289 if (error == 0) 4290 error = copyout(&sb, uap->sb, sizeof(sb)); 4291 return (error); 4292 } 4293 4294 int 4295 kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb) 4296 { 4297 struct mount *mp; 4298 struct vnode *vp; 4299 int error; 4300 4301 error = priv_check(td, PRIV_VFS_FHSTAT); 4302 if (error != 0) 4303 return (error); 4304 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4305 return (ESTALE); 4306 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4307 vfs_unbusy(mp); 4308 if (error != 0) 4309 return (error); 4310 error = vn_stat(vp, sb, td->td_ucred, NOCRED, td); 4311 vput(vp); 4312 return (error); 4313 } 4314 4315 /* 4316 * Implement fstatfs() for (NFS) file handles. 4317 */ 4318 #ifndef _SYS_SYSPROTO_H_ 4319 struct fhstatfs_args { 4320 struct fhandle *u_fhp; 4321 struct statfs *buf; 4322 }; 4323 #endif 4324 int 4325 sys_fhstatfs(struct thread *td, struct fhstatfs_args *uap) 4326 { 4327 struct statfs *sfp; 4328 fhandle_t fh; 4329 int error; 4330 4331 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4332 if (error != 0) 4333 return (error); 4334 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 4335 error = kern_fhstatfs(td, fh, sfp); 4336 if (error == 0) 4337 error = copyout(sfp, uap->buf, sizeof(*sfp)); 4338 free(sfp, M_STATFS); 4339 return (error); 4340 } 4341 4342 int 4343 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf) 4344 { 4345 struct statfs *sp; 4346 struct mount *mp; 4347 struct vnode *vp; 4348 int error; 4349 4350 error = priv_check(td, PRIV_VFS_FHSTATFS); 4351 if (error != 0) 4352 return (error); 4353 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4354 return (ESTALE); 4355 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4356 if (error != 0) { 4357 vfs_unbusy(mp); 4358 return (error); 4359 } 4360 vput(vp); 4361 error = prison_canseemount(td->td_ucred, mp); 4362 if (error != 0) 4363 goto out; 4364 #ifdef MAC 4365 error = mac_mount_check_stat(td->td_ucred, mp); 4366 if (error != 0) 4367 goto out; 4368 #endif 4369 /* 4370 * Set these in case the underlying filesystem fails to do so. 4371 */ 4372 sp = &mp->mnt_stat; 4373 sp->f_version = STATFS_VERSION; 4374 sp->f_namemax = NAME_MAX; 4375 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 4376 error = VFS_STATFS(mp, sp); 4377 if (error == 0) 4378 *buf = *sp; 4379 out: 4380 vfs_unbusy(mp); 4381 return (error); 4382 } 4383 4384 int 4385 kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len) 4386 { 4387 struct file *fp; 4388 struct mount *mp; 4389 struct vnode *vp; 4390 off_t olen, ooffset; 4391 int error; 4392 #ifdef AUDIT 4393 int audited_vnode1 = 0; 4394 #endif 4395 4396 AUDIT_ARG_FD(fd); 4397 if (offset < 0 || len <= 0) 4398 return (EINVAL); 4399 /* Check for wrap. */ 4400 if (offset > OFF_MAX - len) 4401 return (EFBIG); 4402 AUDIT_ARG_FD(fd); 4403 error = fget(td, fd, &cap_pwrite_rights, &fp); 4404 if (error != 0) 4405 return (error); 4406 AUDIT_ARG_FILE(td->td_proc, fp); 4407 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { 4408 error = ESPIPE; 4409 goto out; 4410 } 4411 if ((fp->f_flag & FWRITE) == 0) { 4412 error = EBADF; 4413 goto out; 4414 } 4415 if (fp->f_type != DTYPE_VNODE) { 4416 error = ENODEV; 4417 goto out; 4418 } 4419 vp = fp->f_vnode; 4420 if (vp->v_type != VREG) { 4421 error = ENODEV; 4422 goto out; 4423 } 4424 4425 /* Allocating blocks may take a long time, so iterate. */ 4426 for (;;) { 4427 olen = len; 4428 ooffset = offset; 4429 4430 bwillwrite(); 4431 mp = NULL; 4432 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 4433 if (error != 0) 4434 break; 4435 error = vn_lock(vp, LK_EXCLUSIVE); 4436 if (error != 0) { 4437 vn_finished_write(mp); 4438 break; 4439 } 4440 #ifdef AUDIT 4441 if (!audited_vnode1) { 4442 AUDIT_ARG_VNODE1(vp); 4443 audited_vnode1 = 1; 4444 } 4445 #endif 4446 #ifdef MAC 4447 error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp); 4448 if (error == 0) 4449 #endif 4450 error = VOP_ALLOCATE(vp, &offset, &len); 4451 VOP_UNLOCK(vp, 0); 4452 vn_finished_write(mp); 4453 4454 if (olen + ooffset != offset + len) { 4455 panic("offset + len changed from %jx/%jx to %jx/%jx", 4456 ooffset, olen, offset, len); 4457 } 4458 if (error != 0 || len == 0) 4459 break; 4460 KASSERT(olen > len, ("Iteration did not make progress?")); 4461 maybe_yield(); 4462 } 4463 out: 4464 fdrop(fp, td); 4465 return (error); 4466 } 4467 4468 int 4469 sys_posix_fallocate(struct thread *td, struct posix_fallocate_args *uap) 4470 { 4471 int error; 4472 4473 error = kern_posix_fallocate(td, uap->fd, uap->offset, uap->len); 4474 return (kern_posix_error(td, error)); 4475 } 4476 4477 /* 4478 * Unlike madvise(2), we do not make a best effort to remember every 4479 * possible caching hint. Instead, we remember the last setting with 4480 * the exception that we will allow POSIX_FADV_NORMAL to adjust the 4481 * region of any current setting. 4482 */ 4483 int 4484 kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len, 4485 int advice) 4486 { 4487 struct fadvise_info *fa, *new; 4488 struct file *fp; 4489 struct vnode *vp; 4490 off_t end; 4491 int error; 4492 4493 if (offset < 0 || len < 0 || offset > OFF_MAX - len) 4494 return (EINVAL); 4495 AUDIT_ARG_VALUE(advice); 4496 switch (advice) { 4497 case POSIX_FADV_SEQUENTIAL: 4498 case POSIX_FADV_RANDOM: 4499 case POSIX_FADV_NOREUSE: 4500 new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK); 4501 break; 4502 case POSIX_FADV_NORMAL: 4503 case POSIX_FADV_WILLNEED: 4504 case POSIX_FADV_DONTNEED: 4505 new = NULL; 4506 break; 4507 default: 4508 return (EINVAL); 4509 } 4510 /* XXX: CAP_POSIX_FADVISE? */ 4511 AUDIT_ARG_FD(fd); 4512 error = fget(td, fd, &cap_no_rights, &fp); 4513 if (error != 0) 4514 goto out; 4515 AUDIT_ARG_FILE(td->td_proc, fp); 4516 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { 4517 error = ESPIPE; 4518 goto out; 4519 } 4520 if (fp->f_type != DTYPE_VNODE) { 4521 error = ENODEV; 4522 goto out; 4523 } 4524 vp = fp->f_vnode; 4525 if (vp->v_type != VREG) { 4526 error = ENODEV; 4527 goto out; 4528 } 4529 if (len == 0) 4530 end = OFF_MAX; 4531 else 4532 end = offset + len - 1; 4533 switch (advice) { 4534 case POSIX_FADV_SEQUENTIAL: 4535 case POSIX_FADV_RANDOM: 4536 case POSIX_FADV_NOREUSE: 4537 /* 4538 * Try to merge any existing non-standard region with 4539 * this new region if possible, otherwise create a new 4540 * non-standard region for this request. 4541 */ 4542 mtx_pool_lock(mtxpool_sleep, fp); 4543 fa = fp->f_advice; 4544 if (fa != NULL && fa->fa_advice == advice && 4545 ((fa->fa_start <= end && fa->fa_end >= offset) || 4546 (end != OFF_MAX && fa->fa_start == end + 1) || 4547 (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) { 4548 if (offset < fa->fa_start) 4549 fa->fa_start = offset; 4550 if (end > fa->fa_end) 4551 fa->fa_end = end; 4552 } else { 4553 new->fa_advice = advice; 4554 new->fa_start = offset; 4555 new->fa_end = end; 4556 fp->f_advice = new; 4557 new = fa; 4558 } 4559 mtx_pool_unlock(mtxpool_sleep, fp); 4560 break; 4561 case POSIX_FADV_NORMAL: 4562 /* 4563 * If a the "normal" region overlaps with an existing 4564 * non-standard region, trim or remove the 4565 * non-standard region. 4566 */ 4567 mtx_pool_lock(mtxpool_sleep, fp); 4568 fa = fp->f_advice; 4569 if (fa != NULL) { 4570 if (offset <= fa->fa_start && end >= fa->fa_end) { 4571 new = fa; 4572 fp->f_advice = NULL; 4573 } else if (offset <= fa->fa_start && 4574 end >= fa->fa_start) 4575 fa->fa_start = end + 1; 4576 else if (offset <= fa->fa_end && end >= fa->fa_end) 4577 fa->fa_end = offset - 1; 4578 else if (offset >= fa->fa_start && end <= fa->fa_end) { 4579 /* 4580 * If the "normal" region is a middle 4581 * portion of the existing 4582 * non-standard region, just remove 4583 * the whole thing rather than picking 4584 * one side or the other to 4585 * preserve. 4586 */ 4587 new = fa; 4588 fp->f_advice = NULL; 4589 } 4590 } 4591 mtx_pool_unlock(mtxpool_sleep, fp); 4592 break; 4593 case POSIX_FADV_WILLNEED: 4594 case POSIX_FADV_DONTNEED: 4595 error = VOP_ADVISE(vp, offset, end, advice); 4596 break; 4597 } 4598 out: 4599 if (fp != NULL) 4600 fdrop(fp, td); 4601 free(new, M_FADVISE); 4602 return (error); 4603 } 4604 4605 int 4606 sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap) 4607 { 4608 int error; 4609 4610 error = kern_posix_fadvise(td, uap->fd, uap->offset, uap->len, 4611 uap->advice); 4612 return (kern_posix_error(td, error)); 4613 } 4614