1 /*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include "opt_capsicum.h" 41 #include "opt_compat.h" 42 #include "opt_ktrace.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/bio.h> 47 #include <sys/buf.h> 48 #include <sys/capsicum.h> 49 #include <sys/disk.h> 50 #include <sys/sysent.h> 51 #include <sys/malloc.h> 52 #include <sys/mount.h> 53 #include <sys/mutex.h> 54 #include <sys/sysproto.h> 55 #include <sys/namei.h> 56 #include <sys/filedesc.h> 57 #include <sys/kernel.h> 58 #include <sys/fcntl.h> 59 #include <sys/file.h> 60 #include <sys/filio.h> 61 #include <sys/limits.h> 62 #include <sys/linker.h> 63 #include <sys/rwlock.h> 64 #include <sys/sdt.h> 65 #include <sys/stat.h> 66 #include <sys/sx.h> 67 #include <sys/unistd.h> 68 #include <sys/vnode.h> 69 #include <sys/priv.h> 70 #include <sys/proc.h> 71 #include <sys/dirent.h> 72 #include <sys/jail.h> 73 #include <sys/syscallsubr.h> 74 #include <sys/sysctl.h> 75 #ifdef KTRACE 76 #include <sys/ktrace.h> 77 #endif 78 79 #include <machine/stdarg.h> 80 81 #include <security/audit/audit.h> 82 #include <security/mac/mac_framework.h> 83 84 #include <vm/vm.h> 85 #include <vm/vm_object.h> 86 #include <vm/vm_page.h> 87 #include <vm/uma.h> 88 89 #include <ufs/ufs/quota.h> 90 91 MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information"); 92 93 SDT_PROVIDER_DEFINE(vfs); 94 SDT_PROBE_DEFINE2(vfs, , stat, mode, "char *", "int"); 95 SDT_PROBE_DEFINE2(vfs, , stat, reg, "char *", "int"); 96 97 static int kern_chflagsat(struct thread *td, int fd, const char *path, 98 enum uio_seg pathseg, u_long flags, int atflag); 99 static int setfflags(struct thread *td, struct vnode *, u_long); 100 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *); 101 static int getutimens(const struct timespec *, enum uio_seg, 102 struct timespec *, int *); 103 static int setutimes(struct thread *td, struct vnode *, 104 const struct timespec *, int, int); 105 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 106 struct thread *td); 107 108 /* 109 * Sync each mounted filesystem. 110 */ 111 #ifndef _SYS_SYSPROTO_H_ 112 struct sync_args { 113 int dummy; 114 }; 115 #endif 116 /* ARGSUSED */ 117 int 118 sys_sync(td, uap) 119 struct thread *td; 120 struct sync_args *uap; 121 { 122 struct mount *mp, *nmp; 123 int save; 124 125 mtx_lock(&mountlist_mtx); 126 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 127 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 128 nmp = TAILQ_NEXT(mp, mnt_list); 129 continue; 130 } 131 if ((mp->mnt_flag & MNT_RDONLY) == 0 && 132 vn_start_write(NULL, &mp, V_NOWAIT) == 0) { 133 save = curthread_pflags_set(TDP_SYNCIO); 134 vfs_msync(mp, MNT_NOWAIT); 135 VFS_SYNC(mp, MNT_NOWAIT); 136 curthread_pflags_restore(save); 137 vn_finished_write(mp); 138 } 139 mtx_lock(&mountlist_mtx); 140 nmp = TAILQ_NEXT(mp, mnt_list); 141 vfs_unbusy(mp); 142 } 143 mtx_unlock(&mountlist_mtx); 144 return (0); 145 } 146 147 /* 148 * Change filesystem quotas. 149 */ 150 #ifndef _SYS_SYSPROTO_H_ 151 struct quotactl_args { 152 char *path; 153 int cmd; 154 int uid; 155 caddr_t arg; 156 }; 157 #endif 158 int 159 sys_quotactl(td, uap) 160 struct thread *td; 161 register struct quotactl_args /* { 162 char *path; 163 int cmd; 164 int uid; 165 caddr_t arg; 166 } */ *uap; 167 { 168 struct mount *mp; 169 struct nameidata nd; 170 int error; 171 172 AUDIT_ARG_CMD(uap->cmd); 173 AUDIT_ARG_UID(uap->uid); 174 if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS)) 175 return (EPERM); 176 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 177 uap->path, td); 178 if ((error = namei(&nd)) != 0) 179 return (error); 180 NDFREE(&nd, NDF_ONLY_PNBUF); 181 mp = nd.ni_vp->v_mount; 182 vfs_ref(mp); 183 vput(nd.ni_vp); 184 error = vfs_busy(mp, 0); 185 vfs_rel(mp); 186 if (error != 0) 187 return (error); 188 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg); 189 190 /* 191 * Since quota on operation typically needs to open quota 192 * file, the Q_QUOTAON handler needs to unbusy the mount point 193 * before calling into namei. Otherwise, unmount might be 194 * started between two vfs_busy() invocations (first is our, 195 * second is from mount point cross-walk code in lookup()), 196 * causing deadlock. 197 * 198 * Require that Q_QUOTAON handles the vfs_busy() reference on 199 * its own, always returning with ubusied mount point. 200 */ 201 if ((uap->cmd >> SUBCMDSHIFT) != Q_QUOTAON) 202 vfs_unbusy(mp); 203 return (error); 204 } 205 206 /* 207 * Used by statfs conversion routines to scale the block size up if 208 * necessary so that all of the block counts are <= 'max_size'. Note 209 * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero 210 * value of 'n'. 211 */ 212 void 213 statfs_scale_blocks(struct statfs *sf, long max_size) 214 { 215 uint64_t count; 216 int shift; 217 218 KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__)); 219 220 /* 221 * Attempt to scale the block counts to give a more accurate 222 * overview to userland of the ratio of free space to used 223 * space. To do this, find the largest block count and compute 224 * a divisor that lets it fit into a signed integer <= max_size. 225 */ 226 if (sf->f_bavail < 0) 227 count = -sf->f_bavail; 228 else 229 count = sf->f_bavail; 230 count = MAX(sf->f_blocks, MAX(sf->f_bfree, count)); 231 if (count <= max_size) 232 return; 233 234 count >>= flsl(max_size); 235 shift = 0; 236 while (count > 0) { 237 shift++; 238 count >>=1; 239 } 240 241 sf->f_bsize <<= shift; 242 sf->f_blocks >>= shift; 243 sf->f_bfree >>= shift; 244 sf->f_bavail >>= shift; 245 } 246 247 static int 248 kern_do_statfs(struct thread *td, struct mount *mp, struct statfs *buf) 249 { 250 struct statfs *sp; 251 int error; 252 253 if (mp == NULL) 254 return (EBADF); 255 error = vfs_busy(mp, 0); 256 vfs_rel(mp); 257 if (error != 0) 258 return (error); 259 #ifdef MAC 260 error = mac_mount_check_stat(td->td_ucred, mp); 261 if (error != 0) 262 goto out; 263 #endif 264 /* 265 * Set these in case the underlying filesystem fails to do so. 266 */ 267 sp = &mp->mnt_stat; 268 sp->f_version = STATFS_VERSION; 269 sp->f_namemax = NAME_MAX; 270 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 271 error = VFS_STATFS(mp, sp); 272 if (error != 0) 273 goto out; 274 *buf = *sp; 275 if (priv_check(td, PRIV_VFS_GENERATION)) { 276 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 277 prison_enforce_statfs(td->td_ucred, mp, buf); 278 } 279 out: 280 vfs_unbusy(mp); 281 return (error); 282 } 283 284 /* 285 * Get filesystem statistics. 286 */ 287 #ifndef _SYS_SYSPROTO_H_ 288 struct statfs_args { 289 char *path; 290 struct statfs *buf; 291 }; 292 #endif 293 int 294 sys_statfs(td, uap) 295 struct thread *td; 296 register struct statfs_args /* { 297 char *path; 298 struct statfs *buf; 299 } */ *uap; 300 { 301 struct statfs sf; 302 int error; 303 304 error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf); 305 if (error == 0) 306 error = copyout(&sf, uap->buf, sizeof(sf)); 307 return (error); 308 } 309 310 int 311 kern_statfs(struct thread *td, char *path, enum uio_seg pathseg, 312 struct statfs *buf) 313 { 314 struct mount *mp; 315 struct nameidata nd; 316 int error; 317 318 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 319 pathseg, path, td); 320 error = namei(&nd); 321 if (error != 0) 322 return (error); 323 mp = nd.ni_vp->v_mount; 324 vfs_ref(mp); 325 NDFREE(&nd, NDF_ONLY_PNBUF); 326 vput(nd.ni_vp); 327 return (kern_do_statfs(td, mp, buf)); 328 } 329 330 /* 331 * Get filesystem statistics. 332 */ 333 #ifndef _SYS_SYSPROTO_H_ 334 struct fstatfs_args { 335 int fd; 336 struct statfs *buf; 337 }; 338 #endif 339 int 340 sys_fstatfs(td, uap) 341 struct thread *td; 342 register struct fstatfs_args /* { 343 int fd; 344 struct statfs *buf; 345 } */ *uap; 346 { 347 struct statfs sf; 348 int error; 349 350 error = kern_fstatfs(td, uap->fd, &sf); 351 if (error == 0) 352 error = copyout(&sf, uap->buf, sizeof(sf)); 353 return (error); 354 } 355 356 int 357 kern_fstatfs(struct thread *td, int fd, struct statfs *buf) 358 { 359 struct file *fp; 360 struct mount *mp; 361 struct vnode *vp; 362 cap_rights_t rights; 363 int error; 364 365 AUDIT_ARG_FD(fd); 366 error = getvnode(td, fd, cap_rights_init(&rights, CAP_FSTATFS), &fp); 367 if (error != 0) 368 return (error); 369 vp = fp->f_vnode; 370 vn_lock(vp, LK_SHARED | LK_RETRY); 371 #ifdef AUDIT 372 AUDIT_ARG_VNODE1(vp); 373 #endif 374 mp = vp->v_mount; 375 if (mp != NULL) 376 vfs_ref(mp); 377 VOP_UNLOCK(vp, 0); 378 fdrop(fp, td); 379 return (kern_do_statfs(td, mp, buf)); 380 } 381 382 /* 383 * Get statistics on all filesystems. 384 */ 385 #ifndef _SYS_SYSPROTO_H_ 386 struct getfsstat_args { 387 struct statfs *buf; 388 long bufsize; 389 int mode; 390 }; 391 #endif 392 int 393 sys_getfsstat(td, uap) 394 struct thread *td; 395 register struct getfsstat_args /* { 396 struct statfs *buf; 397 long bufsize; 398 int mode; 399 } */ *uap; 400 { 401 size_t count; 402 int error; 403 404 if (uap->bufsize < 0 || uap->bufsize > SIZE_MAX) 405 return (EINVAL); 406 error = kern_getfsstat(td, &uap->buf, uap->bufsize, &count, 407 UIO_USERSPACE, uap->mode); 408 if (error == 0) 409 td->td_retval[0] = count; 410 return (error); 411 } 412 413 /* 414 * If (bufsize > 0 && bufseg == UIO_SYSSPACE) 415 * The caller is responsible for freeing memory which will be allocated 416 * in '*buf'. 417 */ 418 int 419 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize, 420 size_t *countp, enum uio_seg bufseg, int mode) 421 { 422 struct mount *mp, *nmp; 423 struct statfs *sfsp, *sp, sb, *tofree; 424 size_t count, maxcount; 425 int error; 426 427 switch (mode) { 428 case MNT_WAIT: 429 case MNT_NOWAIT: 430 break; 431 default: 432 if (bufseg == UIO_SYSSPACE) 433 *buf = NULL; 434 return (EINVAL); 435 } 436 restart: 437 maxcount = bufsize / sizeof(struct statfs); 438 if (bufsize == 0) { 439 sfsp = NULL; 440 tofree = NULL; 441 } else if (bufseg == UIO_USERSPACE) { 442 sfsp = *buf; 443 tofree = NULL; 444 } else /* if (bufseg == UIO_SYSSPACE) */ { 445 count = 0; 446 mtx_lock(&mountlist_mtx); 447 TAILQ_FOREACH(mp, &mountlist, mnt_list) { 448 count++; 449 } 450 mtx_unlock(&mountlist_mtx); 451 if (maxcount > count) 452 maxcount = count; 453 tofree = sfsp = *buf = malloc(maxcount * sizeof(struct statfs), 454 M_TEMP, M_WAITOK); 455 } 456 count = 0; 457 mtx_lock(&mountlist_mtx); 458 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 459 if (prison_canseemount(td->td_ucred, mp) != 0) { 460 nmp = TAILQ_NEXT(mp, mnt_list); 461 continue; 462 } 463 #ifdef MAC 464 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 465 nmp = TAILQ_NEXT(mp, mnt_list); 466 continue; 467 } 468 #endif 469 if (mode == MNT_WAIT) { 470 if (vfs_busy(mp, MBF_MNTLSTLOCK) != 0) { 471 /* 472 * If vfs_busy() failed, and MBF_NOWAIT 473 * wasn't passed, then the mp is gone. 474 * Furthermore, because of MBF_MNTLSTLOCK, 475 * the mountlist_mtx was dropped. We have 476 * no other choice than to start over. 477 */ 478 mtx_unlock(&mountlist_mtx); 479 free(tofree, M_TEMP); 480 goto restart; 481 } 482 } else { 483 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK) != 0) { 484 nmp = TAILQ_NEXT(mp, mnt_list); 485 continue; 486 } 487 } 488 if (sfsp && count < maxcount) { 489 sp = &mp->mnt_stat; 490 /* 491 * Set these in case the underlying filesystem 492 * fails to do so. 493 */ 494 sp->f_version = STATFS_VERSION; 495 sp->f_namemax = NAME_MAX; 496 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 497 /* 498 * If MNT_NOWAIT is specified, do not refresh 499 * the fsstat cache. 500 */ 501 if (mode != MNT_NOWAIT) { 502 error = VFS_STATFS(mp, sp); 503 if (error != 0) { 504 mtx_lock(&mountlist_mtx); 505 nmp = TAILQ_NEXT(mp, mnt_list); 506 vfs_unbusy(mp); 507 continue; 508 } 509 } 510 if (priv_check(td, PRIV_VFS_GENERATION)) { 511 bcopy(sp, &sb, sizeof(sb)); 512 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 513 prison_enforce_statfs(td->td_ucred, mp, &sb); 514 sp = &sb; 515 } 516 if (bufseg == UIO_SYSSPACE) 517 bcopy(sp, sfsp, sizeof(*sp)); 518 else /* if (bufseg == UIO_USERSPACE) */ { 519 error = copyout(sp, sfsp, sizeof(*sp)); 520 if (error != 0) { 521 vfs_unbusy(mp); 522 return (error); 523 } 524 } 525 sfsp++; 526 } 527 count++; 528 mtx_lock(&mountlist_mtx); 529 nmp = TAILQ_NEXT(mp, mnt_list); 530 vfs_unbusy(mp); 531 } 532 mtx_unlock(&mountlist_mtx); 533 if (sfsp && count > maxcount) 534 *countp = maxcount; 535 else 536 *countp = count; 537 return (0); 538 } 539 540 #ifdef COMPAT_FREEBSD4 541 /* 542 * Get old format filesystem statistics. 543 */ 544 static void cvtstatfs(struct statfs *, struct ostatfs *); 545 546 #ifndef _SYS_SYSPROTO_H_ 547 struct freebsd4_statfs_args { 548 char *path; 549 struct ostatfs *buf; 550 }; 551 #endif 552 int 553 freebsd4_statfs(td, uap) 554 struct thread *td; 555 struct freebsd4_statfs_args /* { 556 char *path; 557 struct ostatfs *buf; 558 } */ *uap; 559 { 560 struct ostatfs osb; 561 struct statfs sf; 562 int error; 563 564 error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf); 565 if (error != 0) 566 return (error); 567 cvtstatfs(&sf, &osb); 568 return (copyout(&osb, uap->buf, sizeof(osb))); 569 } 570 571 /* 572 * Get filesystem statistics. 573 */ 574 #ifndef _SYS_SYSPROTO_H_ 575 struct freebsd4_fstatfs_args { 576 int fd; 577 struct ostatfs *buf; 578 }; 579 #endif 580 int 581 freebsd4_fstatfs(td, uap) 582 struct thread *td; 583 struct freebsd4_fstatfs_args /* { 584 int fd; 585 struct ostatfs *buf; 586 } */ *uap; 587 { 588 struct ostatfs osb; 589 struct statfs sf; 590 int error; 591 592 error = kern_fstatfs(td, uap->fd, &sf); 593 if (error != 0) 594 return (error); 595 cvtstatfs(&sf, &osb); 596 return (copyout(&osb, uap->buf, sizeof(osb))); 597 } 598 599 /* 600 * Get statistics on all filesystems. 601 */ 602 #ifndef _SYS_SYSPROTO_H_ 603 struct freebsd4_getfsstat_args { 604 struct ostatfs *buf; 605 long bufsize; 606 int mode; 607 }; 608 #endif 609 int 610 freebsd4_getfsstat(td, uap) 611 struct thread *td; 612 register struct freebsd4_getfsstat_args /* { 613 struct ostatfs *buf; 614 long bufsize; 615 int mode; 616 } */ *uap; 617 { 618 struct statfs *buf, *sp; 619 struct ostatfs osb; 620 size_t count, size; 621 int error; 622 623 if (uap->bufsize < 0) 624 return (EINVAL); 625 count = uap->bufsize / sizeof(struct ostatfs); 626 if (count > SIZE_MAX / sizeof(struct statfs)) 627 return (EINVAL); 628 size = count * sizeof(struct statfs); 629 error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, 630 uap->mode); 631 td->td_retval[0] = count; 632 if (size != 0) { 633 sp = buf; 634 while (count != 0 && error == 0) { 635 cvtstatfs(sp, &osb); 636 error = copyout(&osb, uap->buf, sizeof(osb)); 637 sp++; 638 uap->buf++; 639 count--; 640 } 641 free(buf, M_TEMP); 642 } 643 return (error); 644 } 645 646 /* 647 * Implement fstatfs() for (NFS) file handles. 648 */ 649 #ifndef _SYS_SYSPROTO_H_ 650 struct freebsd4_fhstatfs_args { 651 struct fhandle *u_fhp; 652 struct ostatfs *buf; 653 }; 654 #endif 655 int 656 freebsd4_fhstatfs(td, uap) 657 struct thread *td; 658 struct freebsd4_fhstatfs_args /* { 659 struct fhandle *u_fhp; 660 struct ostatfs *buf; 661 } */ *uap; 662 { 663 struct ostatfs osb; 664 struct statfs sf; 665 fhandle_t fh; 666 int error; 667 668 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 669 if (error != 0) 670 return (error); 671 error = kern_fhstatfs(td, fh, &sf); 672 if (error != 0) 673 return (error); 674 cvtstatfs(&sf, &osb); 675 return (copyout(&osb, uap->buf, sizeof(osb))); 676 } 677 678 /* 679 * Convert a new format statfs structure to an old format statfs structure. 680 */ 681 static void 682 cvtstatfs(nsp, osp) 683 struct statfs *nsp; 684 struct ostatfs *osp; 685 { 686 687 statfs_scale_blocks(nsp, LONG_MAX); 688 bzero(osp, sizeof(*osp)); 689 osp->f_bsize = nsp->f_bsize; 690 osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX); 691 osp->f_blocks = nsp->f_blocks; 692 osp->f_bfree = nsp->f_bfree; 693 osp->f_bavail = nsp->f_bavail; 694 osp->f_files = MIN(nsp->f_files, LONG_MAX); 695 osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX); 696 osp->f_owner = nsp->f_owner; 697 osp->f_type = nsp->f_type; 698 osp->f_flags = nsp->f_flags; 699 osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX); 700 osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX); 701 osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX); 702 osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX); 703 strlcpy(osp->f_fstypename, nsp->f_fstypename, 704 MIN(MFSNAMELEN, OMFSNAMELEN)); 705 strlcpy(osp->f_mntonname, nsp->f_mntonname, 706 MIN(MNAMELEN, OMNAMELEN)); 707 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 708 MIN(MNAMELEN, OMNAMELEN)); 709 osp->f_fsid = nsp->f_fsid; 710 } 711 #endif /* COMPAT_FREEBSD4 */ 712 713 /* 714 * Change current working directory to a given file descriptor. 715 */ 716 #ifndef _SYS_SYSPROTO_H_ 717 struct fchdir_args { 718 int fd; 719 }; 720 #endif 721 int 722 sys_fchdir(td, uap) 723 struct thread *td; 724 struct fchdir_args /* { 725 int fd; 726 } */ *uap; 727 { 728 struct vnode *vp, *tdp; 729 struct mount *mp; 730 struct file *fp; 731 cap_rights_t rights; 732 int error; 733 734 AUDIT_ARG_FD(uap->fd); 735 error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_FCHDIR), 736 &fp); 737 if (error != 0) 738 return (error); 739 vp = fp->f_vnode; 740 vrefact(vp); 741 fdrop(fp, td); 742 vn_lock(vp, LK_SHARED | LK_RETRY); 743 AUDIT_ARG_VNODE1(vp); 744 error = change_dir(vp, td); 745 while (!error && (mp = vp->v_mountedhere) != NULL) { 746 if (vfs_busy(mp, 0)) 747 continue; 748 error = VFS_ROOT(mp, LK_SHARED, &tdp); 749 vfs_unbusy(mp); 750 if (error != 0) 751 break; 752 vput(vp); 753 vp = tdp; 754 } 755 if (error != 0) { 756 vput(vp); 757 return (error); 758 } 759 VOP_UNLOCK(vp, 0); 760 pwd_chdir(td, vp); 761 return (0); 762 } 763 764 /* 765 * Change current working directory (``.''). 766 */ 767 #ifndef _SYS_SYSPROTO_H_ 768 struct chdir_args { 769 char *path; 770 }; 771 #endif 772 int 773 sys_chdir(td, uap) 774 struct thread *td; 775 struct chdir_args /* { 776 char *path; 777 } */ *uap; 778 { 779 780 return (kern_chdir(td, uap->path, UIO_USERSPACE)); 781 } 782 783 int 784 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg) 785 { 786 struct nameidata nd; 787 int error; 788 789 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 790 pathseg, path, td); 791 if ((error = namei(&nd)) != 0) 792 return (error); 793 if ((error = change_dir(nd.ni_vp, td)) != 0) { 794 vput(nd.ni_vp); 795 NDFREE(&nd, NDF_ONLY_PNBUF); 796 return (error); 797 } 798 VOP_UNLOCK(nd.ni_vp, 0); 799 NDFREE(&nd, NDF_ONLY_PNBUF); 800 pwd_chdir(td, nd.ni_vp); 801 return (0); 802 } 803 804 /* 805 * Change notion of root (``/'') directory. 806 */ 807 #ifndef _SYS_SYSPROTO_H_ 808 struct chroot_args { 809 char *path; 810 }; 811 #endif 812 int 813 sys_chroot(td, uap) 814 struct thread *td; 815 struct chroot_args /* { 816 char *path; 817 } */ *uap; 818 { 819 struct nameidata nd; 820 int error; 821 822 error = priv_check(td, PRIV_VFS_CHROOT); 823 if (error != 0) 824 return (error); 825 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 826 UIO_USERSPACE, uap->path, td); 827 error = namei(&nd); 828 if (error != 0) 829 goto error; 830 error = change_dir(nd.ni_vp, td); 831 if (error != 0) 832 goto e_vunlock; 833 #ifdef MAC 834 error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp); 835 if (error != 0) 836 goto e_vunlock; 837 #endif 838 VOP_UNLOCK(nd.ni_vp, 0); 839 error = pwd_chroot(td, nd.ni_vp); 840 vrele(nd.ni_vp); 841 NDFREE(&nd, NDF_ONLY_PNBUF); 842 return (error); 843 e_vunlock: 844 vput(nd.ni_vp); 845 error: 846 NDFREE(&nd, NDF_ONLY_PNBUF); 847 return (error); 848 } 849 850 /* 851 * Common routine for chroot and chdir. Callers must provide a locked vnode 852 * instance. 853 */ 854 int 855 change_dir(vp, td) 856 struct vnode *vp; 857 struct thread *td; 858 { 859 #ifdef MAC 860 int error; 861 #endif 862 863 ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked"); 864 if (vp->v_type != VDIR) 865 return (ENOTDIR); 866 #ifdef MAC 867 error = mac_vnode_check_chdir(td->td_ucred, vp); 868 if (error != 0) 869 return (error); 870 #endif 871 return (VOP_ACCESS(vp, VEXEC, td->td_ucred, td)); 872 } 873 874 static __inline void 875 flags_to_rights(int flags, cap_rights_t *rightsp) 876 { 877 878 if (flags & O_EXEC) { 879 cap_rights_set(rightsp, CAP_FEXECVE); 880 } else { 881 switch ((flags & O_ACCMODE)) { 882 case O_RDONLY: 883 cap_rights_set(rightsp, CAP_READ); 884 break; 885 case O_RDWR: 886 cap_rights_set(rightsp, CAP_READ); 887 /* FALLTHROUGH */ 888 case O_WRONLY: 889 cap_rights_set(rightsp, CAP_WRITE); 890 if (!(flags & (O_APPEND | O_TRUNC))) 891 cap_rights_set(rightsp, CAP_SEEK); 892 break; 893 } 894 } 895 896 if (flags & O_CREAT) 897 cap_rights_set(rightsp, CAP_CREATE); 898 899 if (flags & O_TRUNC) 900 cap_rights_set(rightsp, CAP_FTRUNCATE); 901 902 if (flags & (O_SYNC | O_FSYNC)) 903 cap_rights_set(rightsp, CAP_FSYNC); 904 905 if (flags & (O_EXLOCK | O_SHLOCK)) 906 cap_rights_set(rightsp, CAP_FLOCK); 907 } 908 909 /* 910 * Check permissions, allocate an open file structure, and call the device 911 * open routine if any. 912 */ 913 #ifndef _SYS_SYSPROTO_H_ 914 struct open_args { 915 char *path; 916 int flags; 917 int mode; 918 }; 919 #endif 920 int 921 sys_open(td, uap) 922 struct thread *td; 923 register struct open_args /* { 924 char *path; 925 int flags; 926 int mode; 927 } */ *uap; 928 { 929 930 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 931 uap->flags, uap->mode)); 932 } 933 934 #ifndef _SYS_SYSPROTO_H_ 935 struct openat_args { 936 int fd; 937 char *path; 938 int flag; 939 int mode; 940 }; 941 #endif 942 int 943 sys_openat(struct thread *td, struct openat_args *uap) 944 { 945 946 AUDIT_ARG_FD(uap->fd); 947 return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 948 uap->mode)); 949 } 950 951 int 952 kern_openat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 953 int flags, int mode) 954 { 955 struct proc *p = td->td_proc; 956 struct filedesc *fdp = p->p_fd; 957 struct file *fp; 958 struct vnode *vp; 959 struct nameidata nd; 960 cap_rights_t rights; 961 int cmode, error, indx; 962 963 indx = -1; 964 965 AUDIT_ARG_FFLAGS(flags); 966 AUDIT_ARG_MODE(mode); 967 cap_rights_init(&rights, CAP_LOOKUP); 968 flags_to_rights(flags, &rights); 969 /* 970 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags 971 * may be specified. 972 */ 973 if (flags & O_EXEC) { 974 if (flags & O_ACCMODE) 975 return (EINVAL); 976 } else if ((flags & O_ACCMODE) == O_ACCMODE) { 977 return (EINVAL); 978 } else { 979 flags = FFLAGS(flags); 980 } 981 982 /* 983 * Allocate a file structure. The descriptor to reference it 984 * is allocated and set by finstall() below. 985 */ 986 error = falloc_noinstall(td, &fp); 987 if (error != 0) 988 return (error); 989 /* 990 * An extra reference on `fp' has been held for us by 991 * falloc_noinstall(). 992 */ 993 /* Set the flags early so the finit in devfs can pick them up. */ 994 fp->f_flag = flags & FMASK; 995 cmode = ((mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; 996 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 997 &rights, td); 998 td->td_dupfd = -1; /* XXX check for fdopen */ 999 error = vn_open(&nd, &flags, cmode, fp); 1000 if (error != 0) { 1001 /* 1002 * If the vn_open replaced the method vector, something 1003 * wonderous happened deep below and we just pass it up 1004 * pretending we know what we do. 1005 */ 1006 if (error == ENXIO && fp->f_ops != &badfileops) 1007 goto success; 1008 1009 /* 1010 * Handle special fdopen() case. bleh. 1011 * 1012 * Don't do this for relative (capability) lookups; we don't 1013 * understand exactly what would happen, and we don't think 1014 * that it ever should. 1015 */ 1016 if ((nd.ni_lcf & NI_LCF_STRICTRELATIVE) == 0 && 1017 (error == ENODEV || error == ENXIO) && 1018 td->td_dupfd >= 0) { 1019 error = dupfdopen(td, fdp, td->td_dupfd, flags, error, 1020 &indx); 1021 if (error == 0) 1022 goto success; 1023 } 1024 1025 goto bad; 1026 } 1027 td->td_dupfd = 0; 1028 NDFREE(&nd, NDF_ONLY_PNBUF); 1029 vp = nd.ni_vp; 1030 1031 /* 1032 * Store the vnode, for any f_type. Typically, the vnode use 1033 * count is decremented by direct call to vn_closefile() for 1034 * files that switched type in the cdevsw fdopen() method. 1035 */ 1036 fp->f_vnode = vp; 1037 /* 1038 * If the file wasn't claimed by devfs bind it to the normal 1039 * vnode operations here. 1040 */ 1041 if (fp->f_ops == &badfileops) { 1042 KASSERT(vp->v_type != VFIFO, ("Unexpected fifo.")); 1043 fp->f_seqcount = 1; 1044 finit(fp, (flags & FMASK) | (fp->f_flag & FHASLOCK), 1045 DTYPE_VNODE, vp, &vnops); 1046 } 1047 1048 VOP_UNLOCK(vp, 0); 1049 if (flags & O_TRUNC) { 1050 error = fo_truncate(fp, 0, td->td_ucred, td); 1051 if (error != 0) 1052 goto bad; 1053 } 1054 success: 1055 /* 1056 * If we haven't already installed the FD (for dupfdopen), do so now. 1057 */ 1058 if (indx == -1) { 1059 struct filecaps *fcaps; 1060 1061 #ifdef CAPABILITIES 1062 if ((nd.ni_lcf & NI_LCF_STRICTRELATIVE) != 0) 1063 fcaps = &nd.ni_filecaps; 1064 else 1065 #endif 1066 fcaps = NULL; 1067 error = finstall(td, fp, &indx, flags, fcaps); 1068 /* On success finstall() consumes fcaps. */ 1069 if (error != 0) { 1070 filecaps_free(&nd.ni_filecaps); 1071 goto bad; 1072 } 1073 } else { 1074 filecaps_free(&nd.ni_filecaps); 1075 } 1076 1077 /* 1078 * Release our private reference, leaving the one associated with 1079 * the descriptor table intact. 1080 */ 1081 fdrop(fp, td); 1082 td->td_retval[0] = indx; 1083 return (0); 1084 bad: 1085 KASSERT(indx == -1, ("indx=%d, should be -1", indx)); 1086 fdrop(fp, td); 1087 return (error); 1088 } 1089 1090 #ifdef COMPAT_43 1091 /* 1092 * Create a file. 1093 */ 1094 #ifndef _SYS_SYSPROTO_H_ 1095 struct ocreat_args { 1096 char *path; 1097 int mode; 1098 }; 1099 #endif 1100 int 1101 ocreat(td, uap) 1102 struct thread *td; 1103 register struct ocreat_args /* { 1104 char *path; 1105 int mode; 1106 } */ *uap; 1107 { 1108 1109 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1110 O_WRONLY | O_CREAT | O_TRUNC, uap->mode)); 1111 } 1112 #endif /* COMPAT_43 */ 1113 1114 /* 1115 * Create a special file. 1116 */ 1117 #ifndef _SYS_SYSPROTO_H_ 1118 struct mknod_args { 1119 char *path; 1120 int mode; 1121 int dev; 1122 }; 1123 #endif 1124 int 1125 sys_mknod(td, uap) 1126 struct thread *td; 1127 register struct mknod_args /* { 1128 char *path; 1129 int mode; 1130 int dev; 1131 } */ *uap; 1132 { 1133 1134 return (kern_mknodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1135 uap->mode, uap->dev)); 1136 } 1137 1138 #ifndef _SYS_SYSPROTO_H_ 1139 struct mknodat_args { 1140 int fd; 1141 char *path; 1142 mode_t mode; 1143 dev_t dev; 1144 }; 1145 #endif 1146 int 1147 sys_mknodat(struct thread *td, struct mknodat_args *uap) 1148 { 1149 1150 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1151 uap->dev)); 1152 } 1153 1154 int 1155 kern_mknodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1156 int mode, int dev) 1157 { 1158 struct vnode *vp; 1159 struct mount *mp; 1160 struct vattr vattr; 1161 struct nameidata nd; 1162 cap_rights_t rights; 1163 int error, whiteout = 0; 1164 1165 AUDIT_ARG_MODE(mode); 1166 AUDIT_ARG_DEV(dev); 1167 switch (mode & S_IFMT) { 1168 case S_IFCHR: 1169 case S_IFBLK: 1170 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 1171 if (error == 0 && dev == VNOVAL) 1172 error = EINVAL; 1173 break; 1174 case S_IFMT: 1175 error = priv_check(td, PRIV_VFS_MKNOD_BAD); 1176 break; 1177 case S_IFWHT: 1178 error = priv_check(td, PRIV_VFS_MKNOD_WHT); 1179 break; 1180 case S_IFIFO: 1181 if (dev == 0) 1182 return (kern_mkfifoat(td, fd, path, pathseg, mode)); 1183 /* FALLTHROUGH */ 1184 default: 1185 error = EINVAL; 1186 break; 1187 } 1188 if (error != 0) 1189 return (error); 1190 restart: 1191 bwillwrite(); 1192 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1193 NOCACHE, pathseg, path, fd, cap_rights_init(&rights, CAP_MKNODAT), 1194 td); 1195 if ((error = namei(&nd)) != 0) 1196 return (error); 1197 vp = nd.ni_vp; 1198 if (vp != NULL) { 1199 NDFREE(&nd, NDF_ONLY_PNBUF); 1200 if (vp == nd.ni_dvp) 1201 vrele(nd.ni_dvp); 1202 else 1203 vput(nd.ni_dvp); 1204 vrele(vp); 1205 return (EEXIST); 1206 } else { 1207 VATTR_NULL(&vattr); 1208 vattr.va_mode = (mode & ALLPERMS) & 1209 ~td->td_proc->p_fd->fd_cmask; 1210 vattr.va_rdev = dev; 1211 whiteout = 0; 1212 1213 switch (mode & S_IFMT) { 1214 case S_IFMT: /* used by badsect to flag bad sectors */ 1215 vattr.va_type = VBAD; 1216 break; 1217 case S_IFCHR: 1218 vattr.va_type = VCHR; 1219 break; 1220 case S_IFBLK: 1221 vattr.va_type = VBLK; 1222 break; 1223 case S_IFWHT: 1224 whiteout = 1; 1225 break; 1226 default: 1227 panic("kern_mknod: invalid mode"); 1228 } 1229 } 1230 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1231 NDFREE(&nd, NDF_ONLY_PNBUF); 1232 vput(nd.ni_dvp); 1233 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1234 return (error); 1235 goto restart; 1236 } 1237 #ifdef MAC 1238 if (error == 0 && !whiteout) 1239 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, 1240 &nd.ni_cnd, &vattr); 1241 #endif 1242 if (error == 0) { 1243 if (whiteout) 1244 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1245 else { 1246 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1247 &nd.ni_cnd, &vattr); 1248 if (error == 0) 1249 vput(nd.ni_vp); 1250 } 1251 } 1252 NDFREE(&nd, NDF_ONLY_PNBUF); 1253 vput(nd.ni_dvp); 1254 vn_finished_write(mp); 1255 return (error); 1256 } 1257 1258 /* 1259 * Create a named pipe. 1260 */ 1261 #ifndef _SYS_SYSPROTO_H_ 1262 struct mkfifo_args { 1263 char *path; 1264 int mode; 1265 }; 1266 #endif 1267 int 1268 sys_mkfifo(td, uap) 1269 struct thread *td; 1270 register struct mkfifo_args /* { 1271 char *path; 1272 int mode; 1273 } */ *uap; 1274 { 1275 1276 return (kern_mkfifoat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1277 uap->mode)); 1278 } 1279 1280 #ifndef _SYS_SYSPROTO_H_ 1281 struct mkfifoat_args { 1282 int fd; 1283 char *path; 1284 mode_t mode; 1285 }; 1286 #endif 1287 int 1288 sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap) 1289 { 1290 1291 return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE, 1292 uap->mode)); 1293 } 1294 1295 int 1296 kern_mkfifoat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1297 int mode) 1298 { 1299 struct mount *mp; 1300 struct vattr vattr; 1301 struct nameidata nd; 1302 cap_rights_t rights; 1303 int error; 1304 1305 AUDIT_ARG_MODE(mode); 1306 restart: 1307 bwillwrite(); 1308 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1309 NOCACHE, pathseg, path, fd, cap_rights_init(&rights, CAP_MKFIFOAT), 1310 td); 1311 if ((error = namei(&nd)) != 0) 1312 return (error); 1313 if (nd.ni_vp != NULL) { 1314 NDFREE(&nd, NDF_ONLY_PNBUF); 1315 if (nd.ni_vp == nd.ni_dvp) 1316 vrele(nd.ni_dvp); 1317 else 1318 vput(nd.ni_dvp); 1319 vrele(nd.ni_vp); 1320 return (EEXIST); 1321 } 1322 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1323 NDFREE(&nd, NDF_ONLY_PNBUF); 1324 vput(nd.ni_dvp); 1325 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1326 return (error); 1327 goto restart; 1328 } 1329 VATTR_NULL(&vattr); 1330 vattr.va_type = VFIFO; 1331 vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask; 1332 #ifdef MAC 1333 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1334 &vattr); 1335 if (error != 0) 1336 goto out; 1337 #endif 1338 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 1339 if (error == 0) 1340 vput(nd.ni_vp); 1341 #ifdef MAC 1342 out: 1343 #endif 1344 vput(nd.ni_dvp); 1345 vn_finished_write(mp); 1346 NDFREE(&nd, NDF_ONLY_PNBUF); 1347 return (error); 1348 } 1349 1350 /* 1351 * Make a hard file link. 1352 */ 1353 #ifndef _SYS_SYSPROTO_H_ 1354 struct link_args { 1355 char *path; 1356 char *link; 1357 }; 1358 #endif 1359 int 1360 sys_link(td, uap) 1361 struct thread *td; 1362 register struct link_args /* { 1363 char *path; 1364 char *link; 1365 } */ *uap; 1366 { 1367 1368 return (kern_linkat(td, AT_FDCWD, AT_FDCWD, uap->path, uap->link, 1369 UIO_USERSPACE, FOLLOW)); 1370 } 1371 1372 #ifndef _SYS_SYSPROTO_H_ 1373 struct linkat_args { 1374 int fd1; 1375 char *path1; 1376 int fd2; 1377 char *path2; 1378 int flag; 1379 }; 1380 #endif 1381 int 1382 sys_linkat(struct thread *td, struct linkat_args *uap) 1383 { 1384 int flag; 1385 1386 flag = uap->flag; 1387 if (flag & ~AT_SYMLINK_FOLLOW) 1388 return (EINVAL); 1389 1390 return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2, 1391 UIO_USERSPACE, (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW)); 1392 } 1393 1394 int hardlink_check_uid = 0; 1395 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 1396 &hardlink_check_uid, 0, 1397 "Unprivileged processes cannot create hard links to files owned by other " 1398 "users"); 1399 static int hardlink_check_gid = 0; 1400 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 1401 &hardlink_check_gid, 0, 1402 "Unprivileged processes cannot create hard links to files owned by other " 1403 "groups"); 1404 1405 static int 1406 can_hardlink(struct vnode *vp, struct ucred *cred) 1407 { 1408 struct vattr va; 1409 int error; 1410 1411 if (!hardlink_check_uid && !hardlink_check_gid) 1412 return (0); 1413 1414 error = VOP_GETATTR(vp, &va, cred); 1415 if (error != 0) 1416 return (error); 1417 1418 if (hardlink_check_uid && cred->cr_uid != va.va_uid) { 1419 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1420 if (error != 0) 1421 return (error); 1422 } 1423 1424 if (hardlink_check_gid && !groupmember(va.va_gid, cred)) { 1425 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1426 if (error != 0) 1427 return (error); 1428 } 1429 1430 return (0); 1431 } 1432 1433 int 1434 kern_linkat(struct thread *td, int fd1, int fd2, char *path1, char *path2, 1435 enum uio_seg segflg, int follow) 1436 { 1437 struct vnode *vp; 1438 struct mount *mp; 1439 struct nameidata nd; 1440 cap_rights_t rights; 1441 int error; 1442 1443 again: 1444 bwillwrite(); 1445 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, segflg, path1, fd1, 1446 cap_rights_init(&rights, CAP_LINKAT_SOURCE), td); 1447 1448 if ((error = namei(&nd)) != 0) 1449 return (error); 1450 NDFREE(&nd, NDF_ONLY_PNBUF); 1451 vp = nd.ni_vp; 1452 if (vp->v_type == VDIR) { 1453 vrele(vp); 1454 return (EPERM); /* POSIX */ 1455 } 1456 NDINIT_ATRIGHTS(&nd, CREATE, 1457 LOCKPARENT | SAVENAME | AUDITVNODE2 | NOCACHE, segflg, path2, fd2, 1458 cap_rights_init(&rights, CAP_LINKAT_TARGET), td); 1459 if ((error = namei(&nd)) == 0) { 1460 if (nd.ni_vp != NULL) { 1461 NDFREE(&nd, NDF_ONLY_PNBUF); 1462 if (nd.ni_dvp == nd.ni_vp) 1463 vrele(nd.ni_dvp); 1464 else 1465 vput(nd.ni_dvp); 1466 vrele(nd.ni_vp); 1467 vrele(vp); 1468 return (EEXIST); 1469 } else if (nd.ni_dvp->v_mount != vp->v_mount) { 1470 /* 1471 * Cross-device link. No need to recheck 1472 * vp->v_type, since it cannot change, except 1473 * to VBAD. 1474 */ 1475 NDFREE(&nd, NDF_ONLY_PNBUF); 1476 vput(nd.ni_dvp); 1477 vrele(vp); 1478 return (EXDEV); 1479 } else if ((error = vn_lock(vp, LK_EXCLUSIVE)) == 0) { 1480 error = can_hardlink(vp, td->td_ucred); 1481 #ifdef MAC 1482 if (error == 0) 1483 error = mac_vnode_check_link(td->td_ucred, 1484 nd.ni_dvp, vp, &nd.ni_cnd); 1485 #endif 1486 if (error != 0) { 1487 vput(vp); 1488 vput(nd.ni_dvp); 1489 NDFREE(&nd, NDF_ONLY_PNBUF); 1490 return (error); 1491 } 1492 error = vn_start_write(vp, &mp, V_NOWAIT); 1493 if (error != 0) { 1494 vput(vp); 1495 vput(nd.ni_dvp); 1496 NDFREE(&nd, NDF_ONLY_PNBUF); 1497 error = vn_start_write(NULL, &mp, 1498 V_XSLEEP | PCATCH); 1499 if (error != 0) 1500 return (error); 1501 goto again; 1502 } 1503 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 1504 VOP_UNLOCK(vp, 0); 1505 vput(nd.ni_dvp); 1506 vn_finished_write(mp); 1507 NDFREE(&nd, NDF_ONLY_PNBUF); 1508 } else { 1509 vput(nd.ni_dvp); 1510 NDFREE(&nd, NDF_ONLY_PNBUF); 1511 vrele(vp); 1512 goto again; 1513 } 1514 } 1515 vrele(vp); 1516 return (error); 1517 } 1518 1519 /* 1520 * Make a symbolic link. 1521 */ 1522 #ifndef _SYS_SYSPROTO_H_ 1523 struct symlink_args { 1524 char *path; 1525 char *link; 1526 }; 1527 #endif 1528 int 1529 sys_symlink(td, uap) 1530 struct thread *td; 1531 register struct symlink_args /* { 1532 char *path; 1533 char *link; 1534 } */ *uap; 1535 { 1536 1537 return (kern_symlinkat(td, uap->path, AT_FDCWD, uap->link, 1538 UIO_USERSPACE)); 1539 } 1540 1541 #ifndef _SYS_SYSPROTO_H_ 1542 struct symlinkat_args { 1543 char *path; 1544 int fd; 1545 char *path2; 1546 }; 1547 #endif 1548 int 1549 sys_symlinkat(struct thread *td, struct symlinkat_args *uap) 1550 { 1551 1552 return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2, 1553 UIO_USERSPACE)); 1554 } 1555 1556 int 1557 kern_symlinkat(struct thread *td, char *path1, int fd, char *path2, 1558 enum uio_seg segflg) 1559 { 1560 struct mount *mp; 1561 struct vattr vattr; 1562 char *syspath; 1563 struct nameidata nd; 1564 int error; 1565 cap_rights_t rights; 1566 1567 if (segflg == UIO_SYSSPACE) { 1568 syspath = path1; 1569 } else { 1570 syspath = uma_zalloc(namei_zone, M_WAITOK); 1571 if ((error = copyinstr(path1, syspath, MAXPATHLEN, NULL)) != 0) 1572 goto out; 1573 } 1574 AUDIT_ARG_TEXT(syspath); 1575 restart: 1576 bwillwrite(); 1577 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1578 NOCACHE, segflg, path2, fd, cap_rights_init(&rights, CAP_SYMLINKAT), 1579 td); 1580 if ((error = namei(&nd)) != 0) 1581 goto out; 1582 if (nd.ni_vp) { 1583 NDFREE(&nd, NDF_ONLY_PNBUF); 1584 if (nd.ni_vp == nd.ni_dvp) 1585 vrele(nd.ni_dvp); 1586 else 1587 vput(nd.ni_dvp); 1588 vrele(nd.ni_vp); 1589 error = EEXIST; 1590 goto out; 1591 } 1592 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1593 NDFREE(&nd, NDF_ONLY_PNBUF); 1594 vput(nd.ni_dvp); 1595 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1596 goto out; 1597 goto restart; 1598 } 1599 VATTR_NULL(&vattr); 1600 vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask; 1601 #ifdef MAC 1602 vattr.va_type = VLNK; 1603 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1604 &vattr); 1605 if (error != 0) 1606 goto out2; 1607 #endif 1608 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath); 1609 if (error == 0) 1610 vput(nd.ni_vp); 1611 #ifdef MAC 1612 out2: 1613 #endif 1614 NDFREE(&nd, NDF_ONLY_PNBUF); 1615 vput(nd.ni_dvp); 1616 vn_finished_write(mp); 1617 out: 1618 if (segflg != UIO_SYSSPACE) 1619 uma_zfree(namei_zone, syspath); 1620 return (error); 1621 } 1622 1623 /* 1624 * Delete a whiteout from the filesystem. 1625 */ 1626 int 1627 sys_undelete(td, uap) 1628 struct thread *td; 1629 register struct undelete_args /* { 1630 char *path; 1631 } */ *uap; 1632 { 1633 struct mount *mp; 1634 struct nameidata nd; 1635 int error; 1636 1637 restart: 1638 bwillwrite(); 1639 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | AUDITVNODE1, 1640 UIO_USERSPACE, uap->path, td); 1641 error = namei(&nd); 1642 if (error != 0) 1643 return (error); 1644 1645 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 1646 NDFREE(&nd, NDF_ONLY_PNBUF); 1647 if (nd.ni_vp == nd.ni_dvp) 1648 vrele(nd.ni_dvp); 1649 else 1650 vput(nd.ni_dvp); 1651 if (nd.ni_vp) 1652 vrele(nd.ni_vp); 1653 return (EEXIST); 1654 } 1655 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1656 NDFREE(&nd, NDF_ONLY_PNBUF); 1657 vput(nd.ni_dvp); 1658 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1659 return (error); 1660 goto restart; 1661 } 1662 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE); 1663 NDFREE(&nd, NDF_ONLY_PNBUF); 1664 vput(nd.ni_dvp); 1665 vn_finished_write(mp); 1666 return (error); 1667 } 1668 1669 /* 1670 * Delete a name from the filesystem. 1671 */ 1672 #ifndef _SYS_SYSPROTO_H_ 1673 struct unlink_args { 1674 char *path; 1675 }; 1676 #endif 1677 int 1678 sys_unlink(td, uap) 1679 struct thread *td; 1680 struct unlink_args /* { 1681 char *path; 1682 } */ *uap; 1683 { 1684 1685 return (kern_unlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 0)); 1686 } 1687 1688 #ifndef _SYS_SYSPROTO_H_ 1689 struct unlinkat_args { 1690 int fd; 1691 char *path; 1692 int flag; 1693 }; 1694 #endif 1695 int 1696 sys_unlinkat(struct thread *td, struct unlinkat_args *uap) 1697 { 1698 int flag = uap->flag; 1699 int fd = uap->fd; 1700 char *path = uap->path; 1701 1702 if (flag & ~AT_REMOVEDIR) 1703 return (EINVAL); 1704 1705 if (flag & AT_REMOVEDIR) 1706 return (kern_rmdirat(td, fd, path, UIO_USERSPACE)); 1707 else 1708 return (kern_unlinkat(td, fd, path, UIO_USERSPACE, 0)); 1709 } 1710 1711 int 1712 kern_unlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1713 ino_t oldinum) 1714 { 1715 struct mount *mp; 1716 struct vnode *vp; 1717 struct nameidata nd; 1718 struct stat sb; 1719 cap_rights_t rights; 1720 int error; 1721 1722 restart: 1723 bwillwrite(); 1724 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 1725 pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td); 1726 if ((error = namei(&nd)) != 0) 1727 return (error == EINVAL ? EPERM : error); 1728 vp = nd.ni_vp; 1729 if (vp->v_type == VDIR && oldinum == 0) { 1730 error = EPERM; /* POSIX */ 1731 } else if (oldinum != 0 && 1732 ((error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td)) == 0) && 1733 sb.st_ino != oldinum) { 1734 error = EIDRM; /* Identifier removed */ 1735 } else { 1736 /* 1737 * The root of a mounted filesystem cannot be deleted. 1738 * 1739 * XXX: can this only be a VDIR case? 1740 */ 1741 if (vp->v_vflag & VV_ROOT) 1742 error = EBUSY; 1743 } 1744 if (error == 0) { 1745 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1746 NDFREE(&nd, NDF_ONLY_PNBUF); 1747 vput(nd.ni_dvp); 1748 if (vp == nd.ni_dvp) 1749 vrele(vp); 1750 else 1751 vput(vp); 1752 if ((error = vn_start_write(NULL, &mp, 1753 V_XSLEEP | PCATCH)) != 0) 1754 return (error); 1755 goto restart; 1756 } 1757 #ifdef MAC 1758 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 1759 &nd.ni_cnd); 1760 if (error != 0) 1761 goto out; 1762 #endif 1763 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 1764 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 1765 #ifdef MAC 1766 out: 1767 #endif 1768 vn_finished_write(mp); 1769 } 1770 NDFREE(&nd, NDF_ONLY_PNBUF); 1771 vput(nd.ni_dvp); 1772 if (vp == nd.ni_dvp) 1773 vrele(vp); 1774 else 1775 vput(vp); 1776 return (error); 1777 } 1778 1779 /* 1780 * Reposition read/write file offset. 1781 */ 1782 #ifndef _SYS_SYSPROTO_H_ 1783 struct lseek_args { 1784 int fd; 1785 int pad; 1786 off_t offset; 1787 int whence; 1788 }; 1789 #endif 1790 int 1791 sys_lseek(td, uap) 1792 struct thread *td; 1793 register struct lseek_args /* { 1794 int fd; 1795 int pad; 1796 off_t offset; 1797 int whence; 1798 } */ *uap; 1799 { 1800 struct file *fp; 1801 cap_rights_t rights; 1802 int error; 1803 1804 AUDIT_ARG_FD(uap->fd); 1805 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_SEEK), &fp); 1806 if (error != 0) 1807 return (error); 1808 error = (fp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0 ? 1809 fo_seek(fp, uap->offset, uap->whence, td) : ESPIPE; 1810 fdrop(fp, td); 1811 return (error); 1812 } 1813 1814 #if defined(COMPAT_43) 1815 /* 1816 * Reposition read/write file offset. 1817 */ 1818 #ifndef _SYS_SYSPROTO_H_ 1819 struct olseek_args { 1820 int fd; 1821 long offset; 1822 int whence; 1823 }; 1824 #endif 1825 int 1826 olseek(td, uap) 1827 struct thread *td; 1828 register struct olseek_args /* { 1829 int fd; 1830 long offset; 1831 int whence; 1832 } */ *uap; 1833 { 1834 struct lseek_args /* { 1835 int fd; 1836 int pad; 1837 off_t offset; 1838 int whence; 1839 } */ nuap; 1840 1841 nuap.fd = uap->fd; 1842 nuap.offset = uap->offset; 1843 nuap.whence = uap->whence; 1844 return (sys_lseek(td, &nuap)); 1845 } 1846 #endif /* COMPAT_43 */ 1847 1848 #if defined(COMPAT_FREEBSD6) 1849 /* Version with the 'pad' argument */ 1850 int 1851 freebsd6_lseek(td, uap) 1852 struct thread *td; 1853 register struct freebsd6_lseek_args *uap; 1854 { 1855 struct lseek_args ouap; 1856 1857 ouap.fd = uap->fd; 1858 ouap.offset = uap->offset; 1859 ouap.whence = uap->whence; 1860 return (sys_lseek(td, &ouap)); 1861 } 1862 #endif 1863 1864 /* 1865 * Check access permissions using passed credentials. 1866 */ 1867 static int 1868 vn_access(vp, user_flags, cred, td) 1869 struct vnode *vp; 1870 int user_flags; 1871 struct ucred *cred; 1872 struct thread *td; 1873 { 1874 accmode_t accmode; 1875 int error; 1876 1877 /* Flags == 0 means only check for existence. */ 1878 if (user_flags == 0) 1879 return (0); 1880 1881 accmode = 0; 1882 if (user_flags & R_OK) 1883 accmode |= VREAD; 1884 if (user_flags & W_OK) 1885 accmode |= VWRITE; 1886 if (user_flags & X_OK) 1887 accmode |= VEXEC; 1888 #ifdef MAC 1889 error = mac_vnode_check_access(cred, vp, accmode); 1890 if (error != 0) 1891 return (error); 1892 #endif 1893 if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) 1894 error = VOP_ACCESS(vp, accmode, cred, td); 1895 return (error); 1896 } 1897 1898 /* 1899 * Check access permissions using "real" credentials. 1900 */ 1901 #ifndef _SYS_SYSPROTO_H_ 1902 struct access_args { 1903 char *path; 1904 int amode; 1905 }; 1906 #endif 1907 int 1908 sys_access(td, uap) 1909 struct thread *td; 1910 register struct access_args /* { 1911 char *path; 1912 int amode; 1913 } */ *uap; 1914 { 1915 1916 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1917 0, uap->amode)); 1918 } 1919 1920 #ifndef _SYS_SYSPROTO_H_ 1921 struct faccessat_args { 1922 int dirfd; 1923 char *path; 1924 int amode; 1925 int flag; 1926 } 1927 #endif 1928 int 1929 sys_faccessat(struct thread *td, struct faccessat_args *uap) 1930 { 1931 1932 return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 1933 uap->amode)); 1934 } 1935 1936 int 1937 kern_accessat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1938 int flag, int amode) 1939 { 1940 struct ucred *cred, *usecred; 1941 struct vnode *vp; 1942 struct nameidata nd; 1943 cap_rights_t rights; 1944 int error; 1945 1946 if (flag & ~AT_EACCESS) 1947 return (EINVAL); 1948 if (amode != F_OK && (amode & ~(R_OK | W_OK | X_OK)) != 0) 1949 return (EINVAL); 1950 1951 /* 1952 * Create and modify a temporary credential instead of one that 1953 * is potentially shared (if we need one). 1954 */ 1955 cred = td->td_ucred; 1956 if ((flag & AT_EACCESS) == 0 && 1957 ((cred->cr_uid != cred->cr_ruid || 1958 cred->cr_rgid != cred->cr_groups[0]))) { 1959 usecred = crdup(cred); 1960 usecred->cr_uid = cred->cr_ruid; 1961 usecred->cr_groups[0] = cred->cr_rgid; 1962 td->td_ucred = usecred; 1963 } else 1964 usecred = cred; 1965 AUDIT_ARG_VALUE(amode); 1966 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | 1967 AUDITVNODE1, pathseg, path, fd, cap_rights_init(&rights, CAP_FSTAT), 1968 td); 1969 if ((error = namei(&nd)) != 0) 1970 goto out; 1971 vp = nd.ni_vp; 1972 1973 error = vn_access(vp, amode, usecred, td); 1974 NDFREE(&nd, NDF_ONLY_PNBUF); 1975 vput(vp); 1976 out: 1977 if (usecred != cred) { 1978 td->td_ucred = cred; 1979 crfree(usecred); 1980 } 1981 return (error); 1982 } 1983 1984 /* 1985 * Check access permissions using "effective" credentials. 1986 */ 1987 #ifndef _SYS_SYSPROTO_H_ 1988 struct eaccess_args { 1989 char *path; 1990 int amode; 1991 }; 1992 #endif 1993 int 1994 sys_eaccess(td, uap) 1995 struct thread *td; 1996 register struct eaccess_args /* { 1997 char *path; 1998 int amode; 1999 } */ *uap; 2000 { 2001 2002 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2003 AT_EACCESS, uap->amode)); 2004 } 2005 2006 #if defined(COMPAT_43) 2007 /* 2008 * Get file status; this version follows links. 2009 */ 2010 #ifndef _SYS_SYSPROTO_H_ 2011 struct ostat_args { 2012 char *path; 2013 struct ostat *ub; 2014 }; 2015 #endif 2016 int 2017 ostat(td, uap) 2018 struct thread *td; 2019 register struct ostat_args /* { 2020 char *path; 2021 struct ostat *ub; 2022 } */ *uap; 2023 { 2024 struct stat sb; 2025 struct ostat osb; 2026 int error; 2027 2028 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2029 &sb, NULL); 2030 if (error != 0) 2031 return (error); 2032 cvtstat(&sb, &osb); 2033 return (copyout(&osb, uap->ub, sizeof (osb))); 2034 } 2035 2036 /* 2037 * Get file status; this version does not follow links. 2038 */ 2039 #ifndef _SYS_SYSPROTO_H_ 2040 struct olstat_args { 2041 char *path; 2042 struct ostat *ub; 2043 }; 2044 #endif 2045 int 2046 olstat(td, uap) 2047 struct thread *td; 2048 register struct olstat_args /* { 2049 char *path; 2050 struct ostat *ub; 2051 } */ *uap; 2052 { 2053 struct stat sb; 2054 struct ostat osb; 2055 int error; 2056 2057 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2058 UIO_USERSPACE, &sb, NULL); 2059 if (error != 0) 2060 return (error); 2061 cvtstat(&sb, &osb); 2062 return (copyout(&osb, uap->ub, sizeof (osb))); 2063 } 2064 2065 /* 2066 * Convert from an old to a new stat structure. 2067 */ 2068 void 2069 cvtstat(st, ost) 2070 struct stat *st; 2071 struct ostat *ost; 2072 { 2073 2074 bzero(ost, sizeof(*ost)); 2075 ost->st_dev = st->st_dev; 2076 ost->st_ino = st->st_ino; 2077 ost->st_mode = st->st_mode; 2078 ost->st_nlink = st->st_nlink; 2079 ost->st_uid = st->st_uid; 2080 ost->st_gid = st->st_gid; 2081 ost->st_rdev = st->st_rdev; 2082 if (st->st_size < (quad_t)1 << 32) 2083 ost->st_size = st->st_size; 2084 else 2085 ost->st_size = -2; 2086 ost->st_atim = st->st_atim; 2087 ost->st_mtim = st->st_mtim; 2088 ost->st_ctim = st->st_ctim; 2089 ost->st_blksize = st->st_blksize; 2090 ost->st_blocks = st->st_blocks; 2091 ost->st_flags = st->st_flags; 2092 ost->st_gen = st->st_gen; 2093 } 2094 #endif /* COMPAT_43 */ 2095 2096 /* 2097 * Get file status; this version follows links. 2098 */ 2099 #ifndef _SYS_SYSPROTO_H_ 2100 struct stat_args { 2101 char *path; 2102 struct stat *ub; 2103 }; 2104 #endif 2105 int 2106 sys_stat(td, uap) 2107 struct thread *td; 2108 register struct stat_args /* { 2109 char *path; 2110 struct stat *ub; 2111 } */ *uap; 2112 { 2113 struct stat sb; 2114 int error; 2115 2116 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2117 &sb, NULL); 2118 if (error == 0) 2119 error = copyout(&sb, uap->ub, sizeof (sb)); 2120 return (error); 2121 } 2122 2123 #ifndef _SYS_SYSPROTO_H_ 2124 struct fstatat_args { 2125 int fd; 2126 char *path; 2127 struct stat *buf; 2128 int flag; 2129 } 2130 #endif 2131 int 2132 sys_fstatat(struct thread *td, struct fstatat_args *uap) 2133 { 2134 struct stat sb; 2135 int error; 2136 2137 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2138 UIO_USERSPACE, &sb, NULL); 2139 if (error == 0) 2140 error = copyout(&sb, uap->buf, sizeof (sb)); 2141 return (error); 2142 } 2143 2144 int 2145 kern_statat(struct thread *td, int flag, int fd, char *path, 2146 enum uio_seg pathseg, struct stat *sbp, 2147 void (*hook)(struct vnode *vp, struct stat *sbp)) 2148 { 2149 struct nameidata nd; 2150 struct stat sb; 2151 cap_rights_t rights; 2152 int error; 2153 2154 if (flag & ~AT_SYMLINK_NOFOLLOW) 2155 return (EINVAL); 2156 2157 NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : 2158 FOLLOW) | LOCKSHARED | LOCKLEAF | AUDITVNODE1, pathseg, path, fd, 2159 cap_rights_init(&rights, CAP_FSTAT), td); 2160 2161 if ((error = namei(&nd)) != 0) 2162 return (error); 2163 error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td); 2164 if (error == 0) { 2165 SDT_PROBE2(vfs, , stat, mode, path, sb.st_mode); 2166 if (S_ISREG(sb.st_mode)) 2167 SDT_PROBE2(vfs, , stat, reg, path, pathseg); 2168 if (__predict_false(hook != NULL)) 2169 hook(nd.ni_vp, &sb); 2170 } 2171 NDFREE(&nd, NDF_ONLY_PNBUF); 2172 vput(nd.ni_vp); 2173 if (error != 0) 2174 return (error); 2175 *sbp = sb; 2176 #ifdef KTRACE 2177 if (KTRPOINT(td, KTR_STRUCT)) 2178 ktrstat(&sb); 2179 #endif 2180 return (0); 2181 } 2182 2183 /* 2184 * Get file status; this version does not follow links. 2185 */ 2186 #ifndef _SYS_SYSPROTO_H_ 2187 struct lstat_args { 2188 char *path; 2189 struct stat *ub; 2190 }; 2191 #endif 2192 int 2193 sys_lstat(td, uap) 2194 struct thread *td; 2195 register struct lstat_args /* { 2196 char *path; 2197 struct stat *ub; 2198 } */ *uap; 2199 { 2200 struct stat sb; 2201 int error; 2202 2203 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2204 UIO_USERSPACE, &sb, NULL); 2205 if (error == 0) 2206 error = copyout(&sb, uap->ub, sizeof (sb)); 2207 return (error); 2208 } 2209 2210 /* 2211 * Implementation of the NetBSD [l]stat() functions. 2212 */ 2213 void 2214 cvtnstat(sb, nsb) 2215 struct stat *sb; 2216 struct nstat *nsb; 2217 { 2218 2219 bzero(nsb, sizeof *nsb); 2220 nsb->st_dev = sb->st_dev; 2221 nsb->st_ino = sb->st_ino; 2222 nsb->st_mode = sb->st_mode; 2223 nsb->st_nlink = sb->st_nlink; 2224 nsb->st_uid = sb->st_uid; 2225 nsb->st_gid = sb->st_gid; 2226 nsb->st_rdev = sb->st_rdev; 2227 nsb->st_atim = sb->st_atim; 2228 nsb->st_mtim = sb->st_mtim; 2229 nsb->st_ctim = sb->st_ctim; 2230 nsb->st_size = sb->st_size; 2231 nsb->st_blocks = sb->st_blocks; 2232 nsb->st_blksize = sb->st_blksize; 2233 nsb->st_flags = sb->st_flags; 2234 nsb->st_gen = sb->st_gen; 2235 nsb->st_birthtim = sb->st_birthtim; 2236 } 2237 2238 #ifndef _SYS_SYSPROTO_H_ 2239 struct nstat_args { 2240 char *path; 2241 struct nstat *ub; 2242 }; 2243 #endif 2244 int 2245 sys_nstat(td, uap) 2246 struct thread *td; 2247 register struct nstat_args /* { 2248 char *path; 2249 struct nstat *ub; 2250 } */ *uap; 2251 { 2252 struct stat sb; 2253 struct nstat nsb; 2254 int error; 2255 2256 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2257 &sb, NULL); 2258 if (error != 0) 2259 return (error); 2260 cvtnstat(&sb, &nsb); 2261 return (copyout(&nsb, uap->ub, sizeof (nsb))); 2262 } 2263 2264 /* 2265 * NetBSD lstat. Get file status; this version does not follow links. 2266 */ 2267 #ifndef _SYS_SYSPROTO_H_ 2268 struct lstat_args { 2269 char *path; 2270 struct stat *ub; 2271 }; 2272 #endif 2273 int 2274 sys_nlstat(td, uap) 2275 struct thread *td; 2276 register struct nlstat_args /* { 2277 char *path; 2278 struct nstat *ub; 2279 } */ *uap; 2280 { 2281 struct stat sb; 2282 struct nstat nsb; 2283 int error; 2284 2285 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2286 UIO_USERSPACE, &sb, NULL); 2287 if (error != 0) 2288 return (error); 2289 cvtnstat(&sb, &nsb); 2290 return (copyout(&nsb, uap->ub, sizeof (nsb))); 2291 } 2292 2293 /* 2294 * Get configurable pathname variables. 2295 */ 2296 #ifndef _SYS_SYSPROTO_H_ 2297 struct pathconf_args { 2298 char *path; 2299 int name; 2300 }; 2301 #endif 2302 int 2303 sys_pathconf(td, uap) 2304 struct thread *td; 2305 register struct pathconf_args /* { 2306 char *path; 2307 int name; 2308 } */ *uap; 2309 { 2310 2311 return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW)); 2312 } 2313 2314 #ifndef _SYS_SYSPROTO_H_ 2315 struct lpathconf_args { 2316 char *path; 2317 int name; 2318 }; 2319 #endif 2320 int 2321 sys_lpathconf(td, uap) 2322 struct thread *td; 2323 register struct lpathconf_args /* { 2324 char *path; 2325 int name; 2326 } */ *uap; 2327 { 2328 2329 return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, 2330 NOFOLLOW)); 2331 } 2332 2333 int 2334 kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name, 2335 u_long flags) 2336 { 2337 struct nameidata nd; 2338 int error; 2339 2340 NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | AUDITVNODE1 | flags, 2341 pathseg, path, td); 2342 if ((error = namei(&nd)) != 0) 2343 return (error); 2344 NDFREE(&nd, NDF_ONLY_PNBUF); 2345 2346 error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval); 2347 vput(nd.ni_vp); 2348 return (error); 2349 } 2350 2351 /* 2352 * Return target name of a symbolic link. 2353 */ 2354 #ifndef _SYS_SYSPROTO_H_ 2355 struct readlink_args { 2356 char *path; 2357 char *buf; 2358 size_t count; 2359 }; 2360 #endif 2361 int 2362 sys_readlink(td, uap) 2363 struct thread *td; 2364 register struct readlink_args /* { 2365 char *path; 2366 char *buf; 2367 size_t count; 2368 } */ *uap; 2369 { 2370 2371 return (kern_readlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2372 uap->buf, UIO_USERSPACE, uap->count)); 2373 } 2374 #ifndef _SYS_SYSPROTO_H_ 2375 struct readlinkat_args { 2376 int fd; 2377 char *path; 2378 char *buf; 2379 size_t bufsize; 2380 }; 2381 #endif 2382 int 2383 sys_readlinkat(struct thread *td, struct readlinkat_args *uap) 2384 { 2385 2386 return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE, 2387 uap->buf, UIO_USERSPACE, uap->bufsize)); 2388 } 2389 2390 int 2391 kern_readlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2392 char *buf, enum uio_seg bufseg, size_t count) 2393 { 2394 struct vnode *vp; 2395 struct iovec aiov; 2396 struct uio auio; 2397 struct nameidata nd; 2398 int error; 2399 2400 if (count > IOSIZE_MAX) 2401 return (EINVAL); 2402 2403 NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 2404 pathseg, path, fd, td); 2405 2406 if ((error = namei(&nd)) != 0) 2407 return (error); 2408 NDFREE(&nd, NDF_ONLY_PNBUF); 2409 vp = nd.ni_vp; 2410 #ifdef MAC 2411 error = mac_vnode_check_readlink(td->td_ucred, vp); 2412 if (error != 0) { 2413 vput(vp); 2414 return (error); 2415 } 2416 #endif 2417 if (vp->v_type != VLNK) 2418 error = EINVAL; 2419 else { 2420 aiov.iov_base = buf; 2421 aiov.iov_len = count; 2422 auio.uio_iov = &aiov; 2423 auio.uio_iovcnt = 1; 2424 auio.uio_offset = 0; 2425 auio.uio_rw = UIO_READ; 2426 auio.uio_segflg = bufseg; 2427 auio.uio_td = td; 2428 auio.uio_resid = count; 2429 error = VOP_READLINK(vp, &auio, td->td_ucred); 2430 td->td_retval[0] = count - auio.uio_resid; 2431 } 2432 vput(vp); 2433 return (error); 2434 } 2435 2436 /* 2437 * Common implementation code for chflags() and fchflags(). 2438 */ 2439 static int 2440 setfflags(td, vp, flags) 2441 struct thread *td; 2442 struct vnode *vp; 2443 u_long flags; 2444 { 2445 struct mount *mp; 2446 struct vattr vattr; 2447 int error; 2448 2449 /* We can't support the value matching VNOVAL. */ 2450 if (flags == VNOVAL) 2451 return (EOPNOTSUPP); 2452 2453 /* 2454 * Prevent non-root users from setting flags on devices. When 2455 * a device is reused, users can retain ownership of the device 2456 * if they are allowed to set flags and programs assume that 2457 * chown can't fail when done as root. 2458 */ 2459 if (vp->v_type == VCHR || vp->v_type == VBLK) { 2460 error = priv_check(td, PRIV_VFS_CHFLAGS_DEV); 2461 if (error != 0) 2462 return (error); 2463 } 2464 2465 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2466 return (error); 2467 VATTR_NULL(&vattr); 2468 vattr.va_flags = flags; 2469 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2470 #ifdef MAC 2471 error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags); 2472 if (error == 0) 2473 #endif 2474 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 2475 VOP_UNLOCK(vp, 0); 2476 vn_finished_write(mp); 2477 return (error); 2478 } 2479 2480 /* 2481 * Change flags of a file given a path name. 2482 */ 2483 #ifndef _SYS_SYSPROTO_H_ 2484 struct chflags_args { 2485 const char *path; 2486 u_long flags; 2487 }; 2488 #endif 2489 int 2490 sys_chflags(td, uap) 2491 struct thread *td; 2492 register struct chflags_args /* { 2493 const char *path; 2494 u_long flags; 2495 } */ *uap; 2496 { 2497 2498 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2499 uap->flags, 0)); 2500 } 2501 2502 #ifndef _SYS_SYSPROTO_H_ 2503 struct chflagsat_args { 2504 int fd; 2505 const char *path; 2506 u_long flags; 2507 int atflag; 2508 } 2509 #endif 2510 int 2511 sys_chflagsat(struct thread *td, struct chflagsat_args *uap) 2512 { 2513 int fd = uap->fd; 2514 const char *path = uap->path; 2515 u_long flags = uap->flags; 2516 int atflag = uap->atflag; 2517 2518 if (atflag & ~AT_SYMLINK_NOFOLLOW) 2519 return (EINVAL); 2520 2521 return (kern_chflagsat(td, fd, path, UIO_USERSPACE, flags, atflag)); 2522 } 2523 2524 /* 2525 * Same as chflags() but doesn't follow symlinks. 2526 */ 2527 int 2528 sys_lchflags(td, uap) 2529 struct thread *td; 2530 register struct lchflags_args /* { 2531 const char *path; 2532 u_long flags; 2533 } */ *uap; 2534 { 2535 2536 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2537 uap->flags, AT_SYMLINK_NOFOLLOW)); 2538 } 2539 2540 static int 2541 kern_chflagsat(struct thread *td, int fd, const char *path, 2542 enum uio_seg pathseg, u_long flags, int atflag) 2543 { 2544 struct nameidata nd; 2545 cap_rights_t rights; 2546 int error, follow; 2547 2548 AUDIT_ARG_FFLAGS(flags); 2549 follow = (atflag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2550 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2551 cap_rights_init(&rights, CAP_FCHFLAGS), td); 2552 if ((error = namei(&nd)) != 0) 2553 return (error); 2554 NDFREE(&nd, NDF_ONLY_PNBUF); 2555 error = setfflags(td, nd.ni_vp, flags); 2556 vrele(nd.ni_vp); 2557 return (error); 2558 } 2559 2560 /* 2561 * Change flags of a file given a file descriptor. 2562 */ 2563 #ifndef _SYS_SYSPROTO_H_ 2564 struct fchflags_args { 2565 int fd; 2566 u_long flags; 2567 }; 2568 #endif 2569 int 2570 sys_fchflags(td, uap) 2571 struct thread *td; 2572 register struct fchflags_args /* { 2573 int fd; 2574 u_long flags; 2575 } */ *uap; 2576 { 2577 struct file *fp; 2578 cap_rights_t rights; 2579 int error; 2580 2581 AUDIT_ARG_FD(uap->fd); 2582 AUDIT_ARG_FFLAGS(uap->flags); 2583 error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_FCHFLAGS), 2584 &fp); 2585 if (error != 0) 2586 return (error); 2587 #ifdef AUDIT 2588 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 2589 AUDIT_ARG_VNODE1(fp->f_vnode); 2590 VOP_UNLOCK(fp->f_vnode, 0); 2591 #endif 2592 error = setfflags(td, fp->f_vnode, uap->flags); 2593 fdrop(fp, td); 2594 return (error); 2595 } 2596 2597 /* 2598 * Common implementation code for chmod(), lchmod() and fchmod(). 2599 */ 2600 int 2601 setfmode(td, cred, vp, mode) 2602 struct thread *td; 2603 struct ucred *cred; 2604 struct vnode *vp; 2605 int mode; 2606 { 2607 struct mount *mp; 2608 struct vattr vattr; 2609 int error; 2610 2611 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2612 return (error); 2613 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2614 VATTR_NULL(&vattr); 2615 vattr.va_mode = mode & ALLPERMS; 2616 #ifdef MAC 2617 error = mac_vnode_check_setmode(cred, vp, vattr.va_mode); 2618 if (error == 0) 2619 #endif 2620 error = VOP_SETATTR(vp, &vattr, cred); 2621 VOP_UNLOCK(vp, 0); 2622 vn_finished_write(mp); 2623 return (error); 2624 } 2625 2626 /* 2627 * Change mode of a file given path name. 2628 */ 2629 #ifndef _SYS_SYSPROTO_H_ 2630 struct chmod_args { 2631 char *path; 2632 int mode; 2633 }; 2634 #endif 2635 int 2636 sys_chmod(td, uap) 2637 struct thread *td; 2638 register struct chmod_args /* { 2639 char *path; 2640 int mode; 2641 } */ *uap; 2642 { 2643 2644 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2645 uap->mode, 0)); 2646 } 2647 2648 #ifndef _SYS_SYSPROTO_H_ 2649 struct fchmodat_args { 2650 int dirfd; 2651 char *path; 2652 mode_t mode; 2653 int flag; 2654 } 2655 #endif 2656 int 2657 sys_fchmodat(struct thread *td, struct fchmodat_args *uap) 2658 { 2659 int flag = uap->flag; 2660 int fd = uap->fd; 2661 char *path = uap->path; 2662 mode_t mode = uap->mode; 2663 2664 if (flag & ~AT_SYMLINK_NOFOLLOW) 2665 return (EINVAL); 2666 2667 return (kern_fchmodat(td, fd, path, UIO_USERSPACE, mode, flag)); 2668 } 2669 2670 /* 2671 * Change mode of a file given path name (don't follow links.) 2672 */ 2673 #ifndef _SYS_SYSPROTO_H_ 2674 struct lchmod_args { 2675 char *path; 2676 int mode; 2677 }; 2678 #endif 2679 int 2680 sys_lchmod(td, uap) 2681 struct thread *td; 2682 register struct lchmod_args /* { 2683 char *path; 2684 int mode; 2685 } */ *uap; 2686 { 2687 2688 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2689 uap->mode, AT_SYMLINK_NOFOLLOW)); 2690 } 2691 2692 int 2693 kern_fchmodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2694 mode_t mode, int flag) 2695 { 2696 struct nameidata nd; 2697 cap_rights_t rights; 2698 int error, follow; 2699 2700 AUDIT_ARG_MODE(mode); 2701 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2702 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2703 cap_rights_init(&rights, CAP_FCHMOD), td); 2704 if ((error = namei(&nd)) != 0) 2705 return (error); 2706 NDFREE(&nd, NDF_ONLY_PNBUF); 2707 error = setfmode(td, td->td_ucred, nd.ni_vp, mode); 2708 vrele(nd.ni_vp); 2709 return (error); 2710 } 2711 2712 /* 2713 * Change mode of a file given a file descriptor. 2714 */ 2715 #ifndef _SYS_SYSPROTO_H_ 2716 struct fchmod_args { 2717 int fd; 2718 int mode; 2719 }; 2720 #endif 2721 int 2722 sys_fchmod(struct thread *td, struct fchmod_args *uap) 2723 { 2724 struct file *fp; 2725 cap_rights_t rights; 2726 int error; 2727 2728 AUDIT_ARG_FD(uap->fd); 2729 AUDIT_ARG_MODE(uap->mode); 2730 2731 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHMOD), &fp); 2732 if (error != 0) 2733 return (error); 2734 error = fo_chmod(fp, uap->mode, td->td_ucred, td); 2735 fdrop(fp, td); 2736 return (error); 2737 } 2738 2739 /* 2740 * Common implementation for chown(), lchown(), and fchown() 2741 */ 2742 int 2743 setfown(td, cred, vp, uid, gid) 2744 struct thread *td; 2745 struct ucred *cred; 2746 struct vnode *vp; 2747 uid_t uid; 2748 gid_t gid; 2749 { 2750 struct mount *mp; 2751 struct vattr vattr; 2752 int error; 2753 2754 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2755 return (error); 2756 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2757 VATTR_NULL(&vattr); 2758 vattr.va_uid = uid; 2759 vattr.va_gid = gid; 2760 #ifdef MAC 2761 error = mac_vnode_check_setowner(cred, vp, vattr.va_uid, 2762 vattr.va_gid); 2763 if (error == 0) 2764 #endif 2765 error = VOP_SETATTR(vp, &vattr, cred); 2766 VOP_UNLOCK(vp, 0); 2767 vn_finished_write(mp); 2768 return (error); 2769 } 2770 2771 /* 2772 * Set ownership given a path name. 2773 */ 2774 #ifndef _SYS_SYSPROTO_H_ 2775 struct chown_args { 2776 char *path; 2777 int uid; 2778 int gid; 2779 }; 2780 #endif 2781 int 2782 sys_chown(td, uap) 2783 struct thread *td; 2784 register struct chown_args /* { 2785 char *path; 2786 int uid; 2787 int gid; 2788 } */ *uap; 2789 { 2790 2791 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->uid, 2792 uap->gid, 0)); 2793 } 2794 2795 #ifndef _SYS_SYSPROTO_H_ 2796 struct fchownat_args { 2797 int fd; 2798 const char * path; 2799 uid_t uid; 2800 gid_t gid; 2801 int flag; 2802 }; 2803 #endif 2804 int 2805 sys_fchownat(struct thread *td, struct fchownat_args *uap) 2806 { 2807 int flag; 2808 2809 flag = uap->flag; 2810 if (flag & ~AT_SYMLINK_NOFOLLOW) 2811 return (EINVAL); 2812 2813 return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid, 2814 uap->gid, uap->flag)); 2815 } 2816 2817 int 2818 kern_fchownat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2819 int uid, int gid, int flag) 2820 { 2821 struct nameidata nd; 2822 cap_rights_t rights; 2823 int error, follow; 2824 2825 AUDIT_ARG_OWNER(uid, gid); 2826 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2827 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2828 cap_rights_init(&rights, CAP_FCHOWN), td); 2829 2830 if ((error = namei(&nd)) != 0) 2831 return (error); 2832 NDFREE(&nd, NDF_ONLY_PNBUF); 2833 error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid); 2834 vrele(nd.ni_vp); 2835 return (error); 2836 } 2837 2838 /* 2839 * Set ownership given a path name, do not cross symlinks. 2840 */ 2841 #ifndef _SYS_SYSPROTO_H_ 2842 struct lchown_args { 2843 char *path; 2844 int uid; 2845 int gid; 2846 }; 2847 #endif 2848 int 2849 sys_lchown(td, uap) 2850 struct thread *td; 2851 register struct lchown_args /* { 2852 char *path; 2853 int uid; 2854 int gid; 2855 } */ *uap; 2856 { 2857 2858 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2859 uap->uid, uap->gid, AT_SYMLINK_NOFOLLOW)); 2860 } 2861 2862 /* 2863 * Set ownership given a file descriptor. 2864 */ 2865 #ifndef _SYS_SYSPROTO_H_ 2866 struct fchown_args { 2867 int fd; 2868 int uid; 2869 int gid; 2870 }; 2871 #endif 2872 int 2873 sys_fchown(td, uap) 2874 struct thread *td; 2875 register struct fchown_args /* { 2876 int fd; 2877 int uid; 2878 int gid; 2879 } */ *uap; 2880 { 2881 struct file *fp; 2882 cap_rights_t rights; 2883 int error; 2884 2885 AUDIT_ARG_FD(uap->fd); 2886 AUDIT_ARG_OWNER(uap->uid, uap->gid); 2887 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHOWN), &fp); 2888 if (error != 0) 2889 return (error); 2890 error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td); 2891 fdrop(fp, td); 2892 return (error); 2893 } 2894 2895 /* 2896 * Common implementation code for utimes(), lutimes(), and futimes(). 2897 */ 2898 static int 2899 getutimes(usrtvp, tvpseg, tsp) 2900 const struct timeval *usrtvp; 2901 enum uio_seg tvpseg; 2902 struct timespec *tsp; 2903 { 2904 struct timeval tv[2]; 2905 const struct timeval *tvp; 2906 int error; 2907 2908 if (usrtvp == NULL) { 2909 vfs_timestamp(&tsp[0]); 2910 tsp[1] = tsp[0]; 2911 } else { 2912 if (tvpseg == UIO_SYSSPACE) { 2913 tvp = usrtvp; 2914 } else { 2915 if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0) 2916 return (error); 2917 tvp = tv; 2918 } 2919 2920 if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 || 2921 tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000) 2922 return (EINVAL); 2923 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 2924 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 2925 } 2926 return (0); 2927 } 2928 2929 /* 2930 * Common implementation code for futimens(), utimensat(). 2931 */ 2932 #define UTIMENS_NULL 0x1 2933 #define UTIMENS_EXIT 0x2 2934 static int 2935 getutimens(const struct timespec *usrtsp, enum uio_seg tspseg, 2936 struct timespec *tsp, int *retflags) 2937 { 2938 struct timespec tsnow; 2939 int error; 2940 2941 vfs_timestamp(&tsnow); 2942 *retflags = 0; 2943 if (usrtsp == NULL) { 2944 tsp[0] = tsnow; 2945 tsp[1] = tsnow; 2946 *retflags |= UTIMENS_NULL; 2947 return (0); 2948 } 2949 if (tspseg == UIO_SYSSPACE) { 2950 tsp[0] = usrtsp[0]; 2951 tsp[1] = usrtsp[1]; 2952 } else if ((error = copyin(usrtsp, tsp, sizeof(*tsp) * 2)) != 0) 2953 return (error); 2954 if (tsp[0].tv_nsec == UTIME_OMIT && tsp[1].tv_nsec == UTIME_OMIT) 2955 *retflags |= UTIMENS_EXIT; 2956 if (tsp[0].tv_nsec == UTIME_NOW && tsp[1].tv_nsec == UTIME_NOW) 2957 *retflags |= UTIMENS_NULL; 2958 if (tsp[0].tv_nsec == UTIME_OMIT) 2959 tsp[0].tv_sec = VNOVAL; 2960 else if (tsp[0].tv_nsec == UTIME_NOW) 2961 tsp[0] = tsnow; 2962 else if (tsp[0].tv_nsec < 0 || tsp[0].tv_nsec >= 1000000000L) 2963 return (EINVAL); 2964 if (tsp[1].tv_nsec == UTIME_OMIT) 2965 tsp[1].tv_sec = VNOVAL; 2966 else if (tsp[1].tv_nsec == UTIME_NOW) 2967 tsp[1] = tsnow; 2968 else if (tsp[1].tv_nsec < 0 || tsp[1].tv_nsec >= 1000000000L) 2969 return (EINVAL); 2970 2971 return (0); 2972 } 2973 2974 /* 2975 * Common implementation code for utimes(), lutimes(), futimes(), futimens(), 2976 * and utimensat(). 2977 */ 2978 static int 2979 setutimes(td, vp, ts, numtimes, nullflag) 2980 struct thread *td; 2981 struct vnode *vp; 2982 const struct timespec *ts; 2983 int numtimes; 2984 int nullflag; 2985 { 2986 struct mount *mp; 2987 struct vattr vattr; 2988 int error, setbirthtime; 2989 2990 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2991 return (error); 2992 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2993 setbirthtime = 0; 2994 if (numtimes < 3 && !VOP_GETATTR(vp, &vattr, td->td_ucred) && 2995 timespeccmp(&ts[1], &vattr.va_birthtime, < )) 2996 setbirthtime = 1; 2997 VATTR_NULL(&vattr); 2998 vattr.va_atime = ts[0]; 2999 vattr.va_mtime = ts[1]; 3000 if (setbirthtime) 3001 vattr.va_birthtime = ts[1]; 3002 if (numtimes > 2) 3003 vattr.va_birthtime = ts[2]; 3004 if (nullflag) 3005 vattr.va_vaflags |= VA_UTIMES_NULL; 3006 #ifdef MAC 3007 error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime, 3008 vattr.va_mtime); 3009 #endif 3010 if (error == 0) 3011 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3012 VOP_UNLOCK(vp, 0); 3013 vn_finished_write(mp); 3014 return (error); 3015 } 3016 3017 /* 3018 * Set the access and modification times of a file. 3019 */ 3020 #ifndef _SYS_SYSPROTO_H_ 3021 struct utimes_args { 3022 char *path; 3023 struct timeval *tptr; 3024 }; 3025 #endif 3026 int 3027 sys_utimes(td, uap) 3028 struct thread *td; 3029 register struct utimes_args /* { 3030 char *path; 3031 struct timeval *tptr; 3032 } */ *uap; 3033 { 3034 3035 return (kern_utimesat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3036 uap->tptr, UIO_USERSPACE)); 3037 } 3038 3039 #ifndef _SYS_SYSPROTO_H_ 3040 struct futimesat_args { 3041 int fd; 3042 const char * path; 3043 const struct timeval * times; 3044 }; 3045 #endif 3046 int 3047 sys_futimesat(struct thread *td, struct futimesat_args *uap) 3048 { 3049 3050 return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE, 3051 uap->times, UIO_USERSPACE)); 3052 } 3053 3054 int 3055 kern_utimesat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 3056 struct timeval *tptr, enum uio_seg tptrseg) 3057 { 3058 struct nameidata nd; 3059 struct timespec ts[2]; 3060 cap_rights_t rights; 3061 int error; 3062 3063 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3064 return (error); 3065 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 3066 cap_rights_init(&rights, CAP_FUTIMES), td); 3067 3068 if ((error = namei(&nd)) != 0) 3069 return (error); 3070 NDFREE(&nd, NDF_ONLY_PNBUF); 3071 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3072 vrele(nd.ni_vp); 3073 return (error); 3074 } 3075 3076 /* 3077 * Set the access and modification times of a file. 3078 */ 3079 #ifndef _SYS_SYSPROTO_H_ 3080 struct lutimes_args { 3081 char *path; 3082 struct timeval *tptr; 3083 }; 3084 #endif 3085 int 3086 sys_lutimes(td, uap) 3087 struct thread *td; 3088 register struct lutimes_args /* { 3089 char *path; 3090 struct timeval *tptr; 3091 } */ *uap; 3092 { 3093 3094 return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr, 3095 UIO_USERSPACE)); 3096 } 3097 3098 int 3099 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg, 3100 struct timeval *tptr, enum uio_seg tptrseg) 3101 { 3102 struct timespec ts[2]; 3103 struct nameidata nd; 3104 int error; 3105 3106 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3107 return (error); 3108 NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, pathseg, path, td); 3109 if ((error = namei(&nd)) != 0) 3110 return (error); 3111 NDFREE(&nd, NDF_ONLY_PNBUF); 3112 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3113 vrele(nd.ni_vp); 3114 return (error); 3115 } 3116 3117 /* 3118 * Set the access and modification times of a file. 3119 */ 3120 #ifndef _SYS_SYSPROTO_H_ 3121 struct futimes_args { 3122 int fd; 3123 struct timeval *tptr; 3124 }; 3125 #endif 3126 int 3127 sys_futimes(td, uap) 3128 struct thread *td; 3129 register struct futimes_args /* { 3130 int fd; 3131 struct timeval *tptr; 3132 } */ *uap; 3133 { 3134 3135 return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE)); 3136 } 3137 3138 int 3139 kern_futimes(struct thread *td, int fd, struct timeval *tptr, 3140 enum uio_seg tptrseg) 3141 { 3142 struct timespec ts[2]; 3143 struct file *fp; 3144 cap_rights_t rights; 3145 int error; 3146 3147 AUDIT_ARG_FD(fd); 3148 error = getutimes(tptr, tptrseg, ts); 3149 if (error != 0) 3150 return (error); 3151 error = getvnode(td, fd, cap_rights_init(&rights, CAP_FUTIMES), &fp); 3152 if (error != 0) 3153 return (error); 3154 #ifdef AUDIT 3155 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3156 AUDIT_ARG_VNODE1(fp->f_vnode); 3157 VOP_UNLOCK(fp->f_vnode, 0); 3158 #endif 3159 error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL); 3160 fdrop(fp, td); 3161 return (error); 3162 } 3163 3164 int 3165 sys_futimens(struct thread *td, struct futimens_args *uap) 3166 { 3167 3168 return (kern_futimens(td, uap->fd, uap->times, UIO_USERSPACE)); 3169 } 3170 3171 int 3172 kern_futimens(struct thread *td, int fd, struct timespec *tptr, 3173 enum uio_seg tptrseg) 3174 { 3175 struct timespec ts[2]; 3176 struct file *fp; 3177 cap_rights_t rights; 3178 int error, flags; 3179 3180 AUDIT_ARG_FD(fd); 3181 error = getutimens(tptr, tptrseg, ts, &flags); 3182 if (error != 0) 3183 return (error); 3184 if (flags & UTIMENS_EXIT) 3185 return (0); 3186 error = getvnode(td, fd, cap_rights_init(&rights, CAP_FUTIMES), &fp); 3187 if (error != 0) 3188 return (error); 3189 #ifdef AUDIT 3190 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3191 AUDIT_ARG_VNODE1(fp->f_vnode); 3192 VOP_UNLOCK(fp->f_vnode, 0); 3193 #endif 3194 error = setutimes(td, fp->f_vnode, ts, 2, flags & UTIMENS_NULL); 3195 fdrop(fp, td); 3196 return (error); 3197 } 3198 3199 int 3200 sys_utimensat(struct thread *td, struct utimensat_args *uap) 3201 { 3202 3203 return (kern_utimensat(td, uap->fd, uap->path, UIO_USERSPACE, 3204 uap->times, UIO_USERSPACE, uap->flag)); 3205 } 3206 3207 int 3208 kern_utimensat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 3209 struct timespec *tptr, enum uio_seg tptrseg, int flag) 3210 { 3211 struct nameidata nd; 3212 struct timespec ts[2]; 3213 cap_rights_t rights; 3214 int error, flags; 3215 3216 if (flag & ~AT_SYMLINK_NOFOLLOW) 3217 return (EINVAL); 3218 3219 if ((error = getutimens(tptr, tptrseg, ts, &flags)) != 0) 3220 return (error); 3221 NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : 3222 FOLLOW) | AUDITVNODE1, pathseg, path, fd, 3223 cap_rights_init(&rights, CAP_FUTIMES), td); 3224 if ((error = namei(&nd)) != 0) 3225 return (error); 3226 /* 3227 * We are allowed to call namei() regardless of 2xUTIME_OMIT. 3228 * POSIX states: 3229 * "If both tv_nsec fields are UTIME_OMIT... EACCESS may be detected." 3230 * "Search permission is denied by a component of the path prefix." 3231 */ 3232 NDFREE(&nd, NDF_ONLY_PNBUF); 3233 if ((flags & UTIMENS_EXIT) == 0) 3234 error = setutimes(td, nd.ni_vp, ts, 2, flags & UTIMENS_NULL); 3235 vrele(nd.ni_vp); 3236 return (error); 3237 } 3238 3239 /* 3240 * Truncate a file given its path name. 3241 */ 3242 #ifndef _SYS_SYSPROTO_H_ 3243 struct truncate_args { 3244 char *path; 3245 int pad; 3246 off_t length; 3247 }; 3248 #endif 3249 int 3250 sys_truncate(td, uap) 3251 struct thread *td; 3252 register struct truncate_args /* { 3253 char *path; 3254 int pad; 3255 off_t length; 3256 } */ *uap; 3257 { 3258 3259 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3260 } 3261 3262 int 3263 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length) 3264 { 3265 struct mount *mp; 3266 struct vnode *vp; 3267 void *rl_cookie; 3268 struct vattr vattr; 3269 struct nameidata nd; 3270 int error; 3271 3272 if (length < 0) 3273 return(EINVAL); 3274 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, td); 3275 if ((error = namei(&nd)) != 0) 3276 return (error); 3277 vp = nd.ni_vp; 3278 rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); 3279 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { 3280 vn_rangelock_unlock(vp, rl_cookie); 3281 vrele(vp); 3282 return (error); 3283 } 3284 NDFREE(&nd, NDF_ONLY_PNBUF); 3285 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3286 if (vp->v_type == VDIR) 3287 error = EISDIR; 3288 #ifdef MAC 3289 else if ((error = mac_vnode_check_write(td->td_ucred, NOCRED, vp))) { 3290 } 3291 #endif 3292 else if ((error = vn_writechk(vp)) == 0 && 3293 (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) { 3294 VATTR_NULL(&vattr); 3295 vattr.va_size = length; 3296 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3297 } 3298 VOP_UNLOCK(vp, 0); 3299 vn_finished_write(mp); 3300 vn_rangelock_unlock(vp, rl_cookie); 3301 vrele(vp); 3302 return (error); 3303 } 3304 3305 #if defined(COMPAT_43) 3306 /* 3307 * Truncate a file given its path name. 3308 */ 3309 #ifndef _SYS_SYSPROTO_H_ 3310 struct otruncate_args { 3311 char *path; 3312 long length; 3313 }; 3314 #endif 3315 int 3316 otruncate(td, uap) 3317 struct thread *td; 3318 register struct otruncate_args /* { 3319 char *path; 3320 long length; 3321 } */ *uap; 3322 { 3323 struct truncate_args /* { 3324 char *path; 3325 int pad; 3326 off_t length; 3327 } */ nuap; 3328 3329 nuap.path = uap->path; 3330 nuap.length = uap->length; 3331 return (sys_truncate(td, &nuap)); 3332 } 3333 #endif /* COMPAT_43 */ 3334 3335 #if defined(COMPAT_FREEBSD6) 3336 /* Versions with the pad argument */ 3337 int 3338 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap) 3339 { 3340 struct truncate_args ouap; 3341 3342 ouap.path = uap->path; 3343 ouap.length = uap->length; 3344 return (sys_truncate(td, &ouap)); 3345 } 3346 3347 int 3348 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap) 3349 { 3350 struct ftruncate_args ouap; 3351 3352 ouap.fd = uap->fd; 3353 ouap.length = uap->length; 3354 return (sys_ftruncate(td, &ouap)); 3355 } 3356 #endif 3357 3358 int 3359 kern_fsync(struct thread *td, int fd, bool fullsync) 3360 { 3361 struct vnode *vp; 3362 struct mount *mp; 3363 struct file *fp; 3364 cap_rights_t rights; 3365 int error, lock_flags; 3366 3367 AUDIT_ARG_FD(fd); 3368 error = getvnode(td, fd, cap_rights_init(&rights, CAP_FSYNC), &fp); 3369 if (error != 0) 3370 return (error); 3371 vp = fp->f_vnode; 3372 #if 0 3373 if (!fullsync) 3374 /* XXXKIB: compete outstanding aio writes */; 3375 #endif 3376 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 3377 if (error != 0) 3378 goto drop; 3379 if (MNT_SHARED_WRITES(mp) || 3380 ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) { 3381 lock_flags = LK_SHARED; 3382 } else { 3383 lock_flags = LK_EXCLUSIVE; 3384 } 3385 vn_lock(vp, lock_flags | LK_RETRY); 3386 AUDIT_ARG_VNODE1(vp); 3387 if (vp->v_object != NULL) { 3388 VM_OBJECT_WLOCK(vp->v_object); 3389 vm_object_page_clean(vp->v_object, 0, 0, 0); 3390 VM_OBJECT_WUNLOCK(vp->v_object); 3391 } 3392 error = fullsync ? VOP_FSYNC(vp, MNT_WAIT, td) : VOP_FDATASYNC(vp, td); 3393 VOP_UNLOCK(vp, 0); 3394 vn_finished_write(mp); 3395 drop: 3396 fdrop(fp, td); 3397 return (error); 3398 } 3399 3400 /* 3401 * Sync an open file. 3402 */ 3403 #ifndef _SYS_SYSPROTO_H_ 3404 struct fsync_args { 3405 int fd; 3406 }; 3407 #endif 3408 int 3409 sys_fsync(struct thread *td, struct fsync_args *uap) 3410 { 3411 3412 return (kern_fsync(td, uap->fd, true)); 3413 } 3414 3415 int 3416 sys_fdatasync(struct thread *td, struct fdatasync_args *uap) 3417 { 3418 3419 return (kern_fsync(td, uap->fd, false)); 3420 } 3421 3422 /* 3423 * Rename files. Source and destination must either both be directories, or 3424 * both not be directories. If target is a directory, it must be empty. 3425 */ 3426 #ifndef _SYS_SYSPROTO_H_ 3427 struct rename_args { 3428 char *from; 3429 char *to; 3430 }; 3431 #endif 3432 int 3433 sys_rename(td, uap) 3434 struct thread *td; 3435 register struct rename_args /* { 3436 char *from; 3437 char *to; 3438 } */ *uap; 3439 { 3440 3441 return (kern_renameat(td, AT_FDCWD, uap->from, AT_FDCWD, 3442 uap->to, UIO_USERSPACE)); 3443 } 3444 3445 #ifndef _SYS_SYSPROTO_H_ 3446 struct renameat_args { 3447 int oldfd; 3448 char *old; 3449 int newfd; 3450 char *new; 3451 }; 3452 #endif 3453 int 3454 sys_renameat(struct thread *td, struct renameat_args *uap) 3455 { 3456 3457 return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new, 3458 UIO_USERSPACE)); 3459 } 3460 3461 int 3462 kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new, 3463 enum uio_seg pathseg) 3464 { 3465 struct mount *mp = NULL; 3466 struct vnode *tvp, *fvp, *tdvp; 3467 struct nameidata fromnd, tond; 3468 cap_rights_t rights; 3469 int error; 3470 3471 again: 3472 bwillwrite(); 3473 #ifdef MAC 3474 NDINIT_ATRIGHTS(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | 3475 AUDITVNODE1, pathseg, old, oldfd, 3476 cap_rights_init(&rights, CAP_RENAMEAT_SOURCE), td); 3477 #else 3478 NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | AUDITVNODE1, 3479 pathseg, old, oldfd, 3480 cap_rights_init(&rights, CAP_RENAMEAT_SOURCE), td); 3481 #endif 3482 3483 if ((error = namei(&fromnd)) != 0) 3484 return (error); 3485 #ifdef MAC 3486 error = mac_vnode_check_rename_from(td->td_ucred, fromnd.ni_dvp, 3487 fromnd.ni_vp, &fromnd.ni_cnd); 3488 VOP_UNLOCK(fromnd.ni_dvp, 0); 3489 if (fromnd.ni_dvp != fromnd.ni_vp) 3490 VOP_UNLOCK(fromnd.ni_vp, 0); 3491 #endif 3492 fvp = fromnd.ni_vp; 3493 NDINIT_ATRIGHTS(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | 3494 SAVESTART | AUDITVNODE2, pathseg, new, newfd, 3495 cap_rights_init(&rights, CAP_RENAMEAT_TARGET), td); 3496 if (fromnd.ni_vp->v_type == VDIR) 3497 tond.ni_cnd.cn_flags |= WILLBEDIR; 3498 if ((error = namei(&tond)) != 0) { 3499 /* Translate error code for rename("dir1", "dir2/."). */ 3500 if (error == EISDIR && fvp->v_type == VDIR) 3501 error = EINVAL; 3502 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3503 vrele(fromnd.ni_dvp); 3504 vrele(fvp); 3505 goto out1; 3506 } 3507 tdvp = tond.ni_dvp; 3508 tvp = tond.ni_vp; 3509 error = vn_start_write(fvp, &mp, V_NOWAIT); 3510 if (error != 0) { 3511 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3512 NDFREE(&tond, NDF_ONLY_PNBUF); 3513 if (tvp != NULL) 3514 vput(tvp); 3515 if (tdvp == tvp) 3516 vrele(tdvp); 3517 else 3518 vput(tdvp); 3519 vrele(fromnd.ni_dvp); 3520 vrele(fvp); 3521 vrele(tond.ni_startdir); 3522 if (fromnd.ni_startdir != NULL) 3523 vrele(fromnd.ni_startdir); 3524 error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH); 3525 if (error != 0) 3526 return (error); 3527 goto again; 3528 } 3529 if (tvp != NULL) { 3530 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3531 error = ENOTDIR; 3532 goto out; 3533 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3534 error = EISDIR; 3535 goto out; 3536 } 3537 #ifdef CAPABILITIES 3538 if (newfd != AT_FDCWD) { 3539 /* 3540 * If the target already exists we require CAP_UNLINKAT 3541 * from 'newfd'. 3542 */ 3543 error = cap_check(&tond.ni_filecaps.fc_rights, 3544 cap_rights_init(&rights, CAP_UNLINKAT)); 3545 if (error != 0) 3546 goto out; 3547 } 3548 #endif 3549 } 3550 if (fvp == tdvp) { 3551 error = EINVAL; 3552 goto out; 3553 } 3554 /* 3555 * If the source is the same as the destination (that is, if they 3556 * are links to the same vnode), then there is nothing to do. 3557 */ 3558 if (fvp == tvp) 3559 error = -1; 3560 #ifdef MAC 3561 else 3562 error = mac_vnode_check_rename_to(td->td_ucred, tdvp, 3563 tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd); 3564 #endif 3565 out: 3566 if (error == 0) { 3567 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3568 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); 3569 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3570 NDFREE(&tond, NDF_ONLY_PNBUF); 3571 } else { 3572 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3573 NDFREE(&tond, NDF_ONLY_PNBUF); 3574 if (tvp != NULL) 3575 vput(tvp); 3576 if (tdvp == tvp) 3577 vrele(tdvp); 3578 else 3579 vput(tdvp); 3580 vrele(fromnd.ni_dvp); 3581 vrele(fvp); 3582 } 3583 vrele(tond.ni_startdir); 3584 vn_finished_write(mp); 3585 out1: 3586 if (fromnd.ni_startdir) 3587 vrele(fromnd.ni_startdir); 3588 if (error == -1) 3589 return (0); 3590 return (error); 3591 } 3592 3593 /* 3594 * Make a directory file. 3595 */ 3596 #ifndef _SYS_SYSPROTO_H_ 3597 struct mkdir_args { 3598 char *path; 3599 int mode; 3600 }; 3601 #endif 3602 int 3603 sys_mkdir(td, uap) 3604 struct thread *td; 3605 register struct mkdir_args /* { 3606 char *path; 3607 int mode; 3608 } */ *uap; 3609 { 3610 3611 return (kern_mkdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3612 uap->mode)); 3613 } 3614 3615 #ifndef _SYS_SYSPROTO_H_ 3616 struct mkdirat_args { 3617 int fd; 3618 char *path; 3619 mode_t mode; 3620 }; 3621 #endif 3622 int 3623 sys_mkdirat(struct thread *td, struct mkdirat_args *uap) 3624 { 3625 3626 return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode)); 3627 } 3628 3629 int 3630 kern_mkdirat(struct thread *td, int fd, char *path, enum uio_seg segflg, 3631 int mode) 3632 { 3633 struct mount *mp; 3634 struct vnode *vp; 3635 struct vattr vattr; 3636 struct nameidata nd; 3637 cap_rights_t rights; 3638 int error; 3639 3640 AUDIT_ARG_MODE(mode); 3641 restart: 3642 bwillwrite(); 3643 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 3644 NOCACHE, segflg, path, fd, cap_rights_init(&rights, CAP_MKDIRAT), 3645 td); 3646 nd.ni_cnd.cn_flags |= WILLBEDIR; 3647 if ((error = namei(&nd)) != 0) 3648 return (error); 3649 vp = nd.ni_vp; 3650 if (vp != NULL) { 3651 NDFREE(&nd, NDF_ONLY_PNBUF); 3652 /* 3653 * XXX namei called with LOCKPARENT but not LOCKLEAF has 3654 * the strange behaviour of leaving the vnode unlocked 3655 * if the target is the same vnode as the parent. 3656 */ 3657 if (vp == nd.ni_dvp) 3658 vrele(nd.ni_dvp); 3659 else 3660 vput(nd.ni_dvp); 3661 vrele(vp); 3662 return (EEXIST); 3663 } 3664 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3665 NDFREE(&nd, NDF_ONLY_PNBUF); 3666 vput(nd.ni_dvp); 3667 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3668 return (error); 3669 goto restart; 3670 } 3671 VATTR_NULL(&vattr); 3672 vattr.va_type = VDIR; 3673 vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask; 3674 #ifdef MAC 3675 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 3676 &vattr); 3677 if (error != 0) 3678 goto out; 3679 #endif 3680 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 3681 #ifdef MAC 3682 out: 3683 #endif 3684 NDFREE(&nd, NDF_ONLY_PNBUF); 3685 vput(nd.ni_dvp); 3686 if (error == 0) 3687 vput(nd.ni_vp); 3688 vn_finished_write(mp); 3689 return (error); 3690 } 3691 3692 /* 3693 * Remove a directory file. 3694 */ 3695 #ifndef _SYS_SYSPROTO_H_ 3696 struct rmdir_args { 3697 char *path; 3698 }; 3699 #endif 3700 int 3701 sys_rmdir(td, uap) 3702 struct thread *td; 3703 struct rmdir_args /* { 3704 char *path; 3705 } */ *uap; 3706 { 3707 3708 return (kern_rmdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE)); 3709 } 3710 3711 int 3712 kern_rmdirat(struct thread *td, int fd, char *path, enum uio_seg pathseg) 3713 { 3714 struct mount *mp; 3715 struct vnode *vp; 3716 struct nameidata nd; 3717 cap_rights_t rights; 3718 int error; 3719 3720 restart: 3721 bwillwrite(); 3722 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 3723 pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td); 3724 if ((error = namei(&nd)) != 0) 3725 return (error); 3726 vp = nd.ni_vp; 3727 if (vp->v_type != VDIR) { 3728 error = ENOTDIR; 3729 goto out; 3730 } 3731 /* 3732 * No rmdir "." please. 3733 */ 3734 if (nd.ni_dvp == vp) { 3735 error = EINVAL; 3736 goto out; 3737 } 3738 /* 3739 * The root of a mounted filesystem cannot be deleted. 3740 */ 3741 if (vp->v_vflag & VV_ROOT) { 3742 error = EBUSY; 3743 goto out; 3744 } 3745 #ifdef MAC 3746 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 3747 &nd.ni_cnd); 3748 if (error != 0) 3749 goto out; 3750 #endif 3751 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3752 NDFREE(&nd, NDF_ONLY_PNBUF); 3753 vput(vp); 3754 if (nd.ni_dvp == vp) 3755 vrele(nd.ni_dvp); 3756 else 3757 vput(nd.ni_dvp); 3758 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3759 return (error); 3760 goto restart; 3761 } 3762 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 3763 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 3764 vn_finished_write(mp); 3765 out: 3766 NDFREE(&nd, NDF_ONLY_PNBUF); 3767 vput(vp); 3768 if (nd.ni_dvp == vp) 3769 vrele(nd.ni_dvp); 3770 else 3771 vput(nd.ni_dvp); 3772 return (error); 3773 } 3774 3775 #ifdef COMPAT_43 3776 /* 3777 * Read a block of directory entries in a filesystem independent format. 3778 */ 3779 #ifndef _SYS_SYSPROTO_H_ 3780 struct ogetdirentries_args { 3781 int fd; 3782 char *buf; 3783 u_int count; 3784 long *basep; 3785 }; 3786 #endif 3787 int 3788 ogetdirentries(struct thread *td, struct ogetdirentries_args *uap) 3789 { 3790 long loff; 3791 int error; 3792 3793 error = kern_ogetdirentries(td, uap, &loff); 3794 if (error == 0) 3795 error = copyout(&loff, uap->basep, sizeof(long)); 3796 return (error); 3797 } 3798 3799 int 3800 kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap, 3801 long *ploff) 3802 { 3803 struct vnode *vp; 3804 struct file *fp; 3805 struct uio auio, kuio; 3806 struct iovec aiov, kiov; 3807 struct dirent *dp, *edp; 3808 cap_rights_t rights; 3809 caddr_t dirbuf; 3810 int error, eofflag, readcnt; 3811 long loff; 3812 off_t foffset; 3813 3814 /* XXX arbitrary sanity limit on `count'. */ 3815 if (uap->count > 64 * 1024) 3816 return (EINVAL); 3817 error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_READ), &fp); 3818 if (error != 0) 3819 return (error); 3820 if ((fp->f_flag & FREAD) == 0) { 3821 fdrop(fp, td); 3822 return (EBADF); 3823 } 3824 vp = fp->f_vnode; 3825 foffset = foffset_lock(fp, 0); 3826 unionread: 3827 if (vp->v_type != VDIR) { 3828 foffset_unlock(fp, foffset, 0); 3829 fdrop(fp, td); 3830 return (EINVAL); 3831 } 3832 aiov.iov_base = uap->buf; 3833 aiov.iov_len = uap->count; 3834 auio.uio_iov = &aiov; 3835 auio.uio_iovcnt = 1; 3836 auio.uio_rw = UIO_READ; 3837 auio.uio_segflg = UIO_USERSPACE; 3838 auio.uio_td = td; 3839 auio.uio_resid = uap->count; 3840 vn_lock(vp, LK_SHARED | LK_RETRY); 3841 loff = auio.uio_offset = foffset; 3842 #ifdef MAC 3843 error = mac_vnode_check_readdir(td->td_ucred, vp); 3844 if (error != 0) { 3845 VOP_UNLOCK(vp, 0); 3846 foffset_unlock(fp, foffset, FOF_NOUPDATE); 3847 fdrop(fp, td); 3848 return (error); 3849 } 3850 #endif 3851 # if (BYTE_ORDER != LITTLE_ENDIAN) 3852 if (vp->v_mount->mnt_maxsymlinklen <= 0) { 3853 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, 3854 NULL, NULL); 3855 foffset = auio.uio_offset; 3856 } else 3857 # endif 3858 { 3859 kuio = auio; 3860 kuio.uio_iov = &kiov; 3861 kuio.uio_segflg = UIO_SYSSPACE; 3862 kiov.iov_len = uap->count; 3863 dirbuf = malloc(uap->count, M_TEMP, M_WAITOK); 3864 kiov.iov_base = dirbuf; 3865 error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag, 3866 NULL, NULL); 3867 foffset = kuio.uio_offset; 3868 if (error == 0) { 3869 readcnt = uap->count - kuio.uio_resid; 3870 edp = (struct dirent *)&dirbuf[readcnt]; 3871 for (dp = (struct dirent *)dirbuf; dp < edp; ) { 3872 # if (BYTE_ORDER == LITTLE_ENDIAN) 3873 /* 3874 * The expected low byte of 3875 * dp->d_namlen is our dp->d_type. 3876 * The high MBZ byte of dp->d_namlen 3877 * is our dp->d_namlen. 3878 */ 3879 dp->d_type = dp->d_namlen; 3880 dp->d_namlen = 0; 3881 # else 3882 /* 3883 * The dp->d_type is the high byte 3884 * of the expected dp->d_namlen, 3885 * so must be zero'ed. 3886 */ 3887 dp->d_type = 0; 3888 # endif 3889 if (dp->d_reclen > 0) { 3890 dp = (struct dirent *) 3891 ((char *)dp + dp->d_reclen); 3892 } else { 3893 error = EIO; 3894 break; 3895 } 3896 } 3897 if (dp >= edp) 3898 error = uiomove(dirbuf, readcnt, &auio); 3899 } 3900 free(dirbuf, M_TEMP); 3901 } 3902 if (error != 0) { 3903 VOP_UNLOCK(vp, 0); 3904 foffset_unlock(fp, foffset, 0); 3905 fdrop(fp, td); 3906 return (error); 3907 } 3908 if (uap->count == auio.uio_resid && 3909 (vp->v_vflag & VV_ROOT) && 3910 (vp->v_mount->mnt_flag & MNT_UNION)) { 3911 struct vnode *tvp = vp; 3912 vp = vp->v_mount->mnt_vnodecovered; 3913 VREF(vp); 3914 fp->f_vnode = vp; 3915 fp->f_data = vp; 3916 foffset = 0; 3917 vput(tvp); 3918 goto unionread; 3919 } 3920 VOP_UNLOCK(vp, 0); 3921 foffset_unlock(fp, foffset, 0); 3922 fdrop(fp, td); 3923 td->td_retval[0] = uap->count - auio.uio_resid; 3924 if (error == 0) 3925 *ploff = loff; 3926 return (error); 3927 } 3928 #endif /* COMPAT_43 */ 3929 3930 /* 3931 * Read a block of directory entries in a filesystem independent format. 3932 */ 3933 #ifndef _SYS_SYSPROTO_H_ 3934 struct getdirentries_args { 3935 int fd; 3936 char *buf; 3937 u_int count; 3938 long *basep; 3939 }; 3940 #endif 3941 int 3942 sys_getdirentries(td, uap) 3943 struct thread *td; 3944 register struct getdirentries_args /* { 3945 int fd; 3946 char *buf; 3947 u_int count; 3948 long *basep; 3949 } */ *uap; 3950 { 3951 long base; 3952 int error; 3953 3954 error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base, 3955 NULL, UIO_USERSPACE); 3956 if (error != 0) 3957 return (error); 3958 if (uap->basep != NULL) 3959 error = copyout(&base, uap->basep, sizeof(long)); 3960 return (error); 3961 } 3962 3963 int 3964 kern_getdirentries(struct thread *td, int fd, char *buf, u_int count, 3965 long *basep, ssize_t *residp, enum uio_seg bufseg) 3966 { 3967 struct vnode *vp; 3968 struct file *fp; 3969 struct uio auio; 3970 struct iovec aiov; 3971 cap_rights_t rights; 3972 long loff; 3973 int error, eofflag; 3974 off_t foffset; 3975 3976 AUDIT_ARG_FD(fd); 3977 if (count > IOSIZE_MAX) 3978 return (EINVAL); 3979 auio.uio_resid = count; 3980 error = getvnode(td, fd, cap_rights_init(&rights, CAP_READ), &fp); 3981 if (error != 0) 3982 return (error); 3983 if ((fp->f_flag & FREAD) == 0) { 3984 fdrop(fp, td); 3985 return (EBADF); 3986 } 3987 vp = fp->f_vnode; 3988 foffset = foffset_lock(fp, 0); 3989 unionread: 3990 if (vp->v_type != VDIR) { 3991 error = EINVAL; 3992 goto fail; 3993 } 3994 aiov.iov_base = buf; 3995 aiov.iov_len = count; 3996 auio.uio_iov = &aiov; 3997 auio.uio_iovcnt = 1; 3998 auio.uio_rw = UIO_READ; 3999 auio.uio_segflg = bufseg; 4000 auio.uio_td = td; 4001 vn_lock(vp, LK_SHARED | LK_RETRY); 4002 AUDIT_ARG_VNODE1(vp); 4003 loff = auio.uio_offset = foffset; 4004 #ifdef MAC 4005 error = mac_vnode_check_readdir(td->td_ucred, vp); 4006 if (error == 0) 4007 #endif 4008 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, 4009 NULL); 4010 foffset = auio.uio_offset; 4011 if (error != 0) { 4012 VOP_UNLOCK(vp, 0); 4013 goto fail; 4014 } 4015 if (count == auio.uio_resid && 4016 (vp->v_vflag & VV_ROOT) && 4017 (vp->v_mount->mnt_flag & MNT_UNION)) { 4018 struct vnode *tvp = vp; 4019 4020 vp = vp->v_mount->mnt_vnodecovered; 4021 VREF(vp); 4022 fp->f_vnode = vp; 4023 fp->f_data = vp; 4024 foffset = 0; 4025 vput(tvp); 4026 goto unionread; 4027 } 4028 VOP_UNLOCK(vp, 0); 4029 *basep = loff; 4030 if (residp != NULL) 4031 *residp = auio.uio_resid; 4032 td->td_retval[0] = count - auio.uio_resid; 4033 fail: 4034 foffset_unlock(fp, foffset, 0); 4035 fdrop(fp, td); 4036 return (error); 4037 } 4038 4039 #ifndef _SYS_SYSPROTO_H_ 4040 struct getdents_args { 4041 int fd; 4042 char *buf; 4043 size_t count; 4044 }; 4045 #endif 4046 int 4047 sys_getdents(td, uap) 4048 struct thread *td; 4049 register struct getdents_args /* { 4050 int fd; 4051 char *buf; 4052 u_int count; 4053 } */ *uap; 4054 { 4055 struct getdirentries_args ap; 4056 4057 ap.fd = uap->fd; 4058 ap.buf = uap->buf; 4059 ap.count = uap->count; 4060 ap.basep = NULL; 4061 return (sys_getdirentries(td, &ap)); 4062 } 4063 4064 /* 4065 * Set the mode mask for creation of filesystem nodes. 4066 */ 4067 #ifndef _SYS_SYSPROTO_H_ 4068 struct umask_args { 4069 int newmask; 4070 }; 4071 #endif 4072 int 4073 sys_umask(td, uap) 4074 struct thread *td; 4075 struct umask_args /* { 4076 int newmask; 4077 } */ *uap; 4078 { 4079 struct filedesc *fdp; 4080 4081 fdp = td->td_proc->p_fd; 4082 FILEDESC_XLOCK(fdp); 4083 td->td_retval[0] = fdp->fd_cmask; 4084 fdp->fd_cmask = uap->newmask & ALLPERMS; 4085 FILEDESC_XUNLOCK(fdp); 4086 return (0); 4087 } 4088 4089 /* 4090 * Void all references to file by ripping underlying filesystem away from 4091 * vnode. 4092 */ 4093 #ifndef _SYS_SYSPROTO_H_ 4094 struct revoke_args { 4095 char *path; 4096 }; 4097 #endif 4098 int 4099 sys_revoke(td, uap) 4100 struct thread *td; 4101 register struct revoke_args /* { 4102 char *path; 4103 } */ *uap; 4104 { 4105 struct vnode *vp; 4106 struct vattr vattr; 4107 struct nameidata nd; 4108 int error; 4109 4110 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4111 uap->path, td); 4112 if ((error = namei(&nd)) != 0) 4113 return (error); 4114 vp = nd.ni_vp; 4115 NDFREE(&nd, NDF_ONLY_PNBUF); 4116 if (vp->v_type != VCHR || vp->v_rdev == NULL) { 4117 error = EINVAL; 4118 goto out; 4119 } 4120 #ifdef MAC 4121 error = mac_vnode_check_revoke(td->td_ucred, vp); 4122 if (error != 0) 4123 goto out; 4124 #endif 4125 error = VOP_GETATTR(vp, &vattr, td->td_ucred); 4126 if (error != 0) 4127 goto out; 4128 if (td->td_ucred->cr_uid != vattr.va_uid) { 4129 error = priv_check(td, PRIV_VFS_ADMIN); 4130 if (error != 0) 4131 goto out; 4132 } 4133 if (vcount(vp) > 1) 4134 VOP_REVOKE(vp, REVOKEALL); 4135 out: 4136 vput(vp); 4137 return (error); 4138 } 4139 4140 /* 4141 * Convert a user file descriptor to a kernel file entry and check that, if it 4142 * is a capability, the correct rights are present. A reference on the file 4143 * entry is held upon returning. 4144 */ 4145 int 4146 getvnode(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp) 4147 { 4148 struct file *fp; 4149 int error; 4150 4151 error = fget_unlocked(td->td_proc->p_fd, fd, rightsp, &fp, NULL); 4152 if (error != 0) 4153 return (error); 4154 4155 /* 4156 * The file could be not of the vnode type, or it may be not 4157 * yet fully initialized, in which case the f_vnode pointer 4158 * may be set, but f_ops is still badfileops. E.g., 4159 * devfs_open() transiently create such situation to 4160 * facilitate csw d_fdopen(). 4161 * 4162 * Dupfdopen() handling in kern_openat() installs the 4163 * half-baked file into the process descriptor table, allowing 4164 * other thread to dereference it. Guard against the race by 4165 * checking f_ops. 4166 */ 4167 if (fp->f_vnode == NULL || fp->f_ops == &badfileops) { 4168 fdrop(fp, td); 4169 return (EINVAL); 4170 } 4171 *fpp = fp; 4172 return (0); 4173 } 4174 4175 4176 /* 4177 * Get an (NFS) file handle. 4178 */ 4179 #ifndef _SYS_SYSPROTO_H_ 4180 struct lgetfh_args { 4181 char *fname; 4182 fhandle_t *fhp; 4183 }; 4184 #endif 4185 int 4186 sys_lgetfh(td, uap) 4187 struct thread *td; 4188 register struct lgetfh_args *uap; 4189 { 4190 struct nameidata nd; 4191 fhandle_t fh; 4192 register struct vnode *vp; 4193 int error; 4194 4195 error = priv_check(td, PRIV_VFS_GETFH); 4196 if (error != 0) 4197 return (error); 4198 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4199 uap->fname, td); 4200 error = namei(&nd); 4201 if (error != 0) 4202 return (error); 4203 NDFREE(&nd, NDF_ONLY_PNBUF); 4204 vp = nd.ni_vp; 4205 bzero(&fh, sizeof(fh)); 4206 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4207 error = VOP_VPTOFH(vp, &fh.fh_fid); 4208 vput(vp); 4209 if (error == 0) 4210 error = copyout(&fh, uap->fhp, sizeof (fh)); 4211 return (error); 4212 } 4213 4214 #ifndef _SYS_SYSPROTO_H_ 4215 struct getfh_args { 4216 char *fname; 4217 fhandle_t *fhp; 4218 }; 4219 #endif 4220 int 4221 sys_getfh(td, uap) 4222 struct thread *td; 4223 register struct getfh_args *uap; 4224 { 4225 struct nameidata nd; 4226 fhandle_t fh; 4227 register struct vnode *vp; 4228 int error; 4229 4230 error = priv_check(td, PRIV_VFS_GETFH); 4231 if (error != 0) 4232 return (error); 4233 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4234 uap->fname, td); 4235 error = namei(&nd); 4236 if (error != 0) 4237 return (error); 4238 NDFREE(&nd, NDF_ONLY_PNBUF); 4239 vp = nd.ni_vp; 4240 bzero(&fh, sizeof(fh)); 4241 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4242 error = VOP_VPTOFH(vp, &fh.fh_fid); 4243 vput(vp); 4244 if (error == 0) 4245 error = copyout(&fh, uap->fhp, sizeof (fh)); 4246 return (error); 4247 } 4248 4249 /* 4250 * syscall for the rpc.lockd to use to translate a NFS file handle into an 4251 * open descriptor. 4252 * 4253 * warning: do not remove the priv_check() call or this becomes one giant 4254 * security hole. 4255 */ 4256 #ifndef _SYS_SYSPROTO_H_ 4257 struct fhopen_args { 4258 const struct fhandle *u_fhp; 4259 int flags; 4260 }; 4261 #endif 4262 int 4263 sys_fhopen(td, uap) 4264 struct thread *td; 4265 struct fhopen_args /* { 4266 const struct fhandle *u_fhp; 4267 int flags; 4268 } */ *uap; 4269 { 4270 struct mount *mp; 4271 struct vnode *vp; 4272 struct fhandle fhp; 4273 struct file *fp; 4274 int fmode, error; 4275 int indx; 4276 4277 error = priv_check(td, PRIV_VFS_FHOPEN); 4278 if (error != 0) 4279 return (error); 4280 indx = -1; 4281 fmode = FFLAGS(uap->flags); 4282 /* why not allow a non-read/write open for our lockd? */ 4283 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 4284 return (EINVAL); 4285 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 4286 if (error != 0) 4287 return(error); 4288 /* find the mount point */ 4289 mp = vfs_busyfs(&fhp.fh_fsid); 4290 if (mp == NULL) 4291 return (ESTALE); 4292 /* now give me my vnode, it gets returned to me locked */ 4293 error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp); 4294 vfs_unbusy(mp); 4295 if (error != 0) 4296 return (error); 4297 4298 error = falloc_noinstall(td, &fp); 4299 if (error != 0) { 4300 vput(vp); 4301 return (error); 4302 } 4303 /* 4304 * An extra reference on `fp' has been held for us by 4305 * falloc_noinstall(). 4306 */ 4307 4308 #ifdef INVARIANTS 4309 td->td_dupfd = -1; 4310 #endif 4311 error = vn_open_vnode(vp, fmode, td->td_ucred, td, fp); 4312 if (error != 0) { 4313 KASSERT(fp->f_ops == &badfileops, 4314 ("VOP_OPEN in fhopen() set f_ops")); 4315 KASSERT(td->td_dupfd < 0, 4316 ("fhopen() encountered fdopen()")); 4317 4318 vput(vp); 4319 goto bad; 4320 } 4321 #ifdef INVARIANTS 4322 td->td_dupfd = 0; 4323 #endif 4324 fp->f_vnode = vp; 4325 fp->f_seqcount = 1; 4326 finit(fp, (fmode & FMASK) | (fp->f_flag & FHASLOCK), DTYPE_VNODE, vp, 4327 &vnops); 4328 VOP_UNLOCK(vp, 0); 4329 if ((fmode & O_TRUNC) != 0) { 4330 error = fo_truncate(fp, 0, td->td_ucred, td); 4331 if (error != 0) 4332 goto bad; 4333 } 4334 4335 error = finstall(td, fp, &indx, fmode, NULL); 4336 bad: 4337 fdrop(fp, td); 4338 td->td_retval[0] = indx; 4339 return (error); 4340 } 4341 4342 /* 4343 * Stat an (NFS) file handle. 4344 */ 4345 #ifndef _SYS_SYSPROTO_H_ 4346 struct fhstat_args { 4347 struct fhandle *u_fhp; 4348 struct stat *sb; 4349 }; 4350 #endif 4351 int 4352 sys_fhstat(td, uap) 4353 struct thread *td; 4354 register struct fhstat_args /* { 4355 struct fhandle *u_fhp; 4356 struct stat *sb; 4357 } */ *uap; 4358 { 4359 struct stat sb; 4360 struct fhandle fh; 4361 int error; 4362 4363 error = copyin(uap->u_fhp, &fh, sizeof(fh)); 4364 if (error != 0) 4365 return (error); 4366 error = kern_fhstat(td, fh, &sb); 4367 if (error == 0) 4368 error = copyout(&sb, uap->sb, sizeof(sb)); 4369 return (error); 4370 } 4371 4372 int 4373 kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb) 4374 { 4375 struct mount *mp; 4376 struct vnode *vp; 4377 int error; 4378 4379 error = priv_check(td, PRIV_VFS_FHSTAT); 4380 if (error != 0) 4381 return (error); 4382 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4383 return (ESTALE); 4384 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4385 vfs_unbusy(mp); 4386 if (error != 0) 4387 return (error); 4388 error = vn_stat(vp, sb, td->td_ucred, NOCRED, td); 4389 vput(vp); 4390 return (error); 4391 } 4392 4393 /* 4394 * Implement fstatfs() for (NFS) file handles. 4395 */ 4396 #ifndef _SYS_SYSPROTO_H_ 4397 struct fhstatfs_args { 4398 struct fhandle *u_fhp; 4399 struct statfs *buf; 4400 }; 4401 #endif 4402 int 4403 sys_fhstatfs(td, uap) 4404 struct thread *td; 4405 struct fhstatfs_args /* { 4406 struct fhandle *u_fhp; 4407 struct statfs *buf; 4408 } */ *uap; 4409 { 4410 struct statfs sf; 4411 fhandle_t fh; 4412 int error; 4413 4414 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4415 if (error != 0) 4416 return (error); 4417 error = kern_fhstatfs(td, fh, &sf); 4418 if (error != 0) 4419 return (error); 4420 return (copyout(&sf, uap->buf, sizeof(sf))); 4421 } 4422 4423 int 4424 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf) 4425 { 4426 struct statfs *sp; 4427 struct mount *mp; 4428 struct vnode *vp; 4429 int error; 4430 4431 error = priv_check(td, PRIV_VFS_FHSTATFS); 4432 if (error != 0) 4433 return (error); 4434 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4435 return (ESTALE); 4436 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4437 if (error != 0) { 4438 vfs_unbusy(mp); 4439 return (error); 4440 } 4441 vput(vp); 4442 error = prison_canseemount(td->td_ucred, mp); 4443 if (error != 0) 4444 goto out; 4445 #ifdef MAC 4446 error = mac_mount_check_stat(td->td_ucred, mp); 4447 if (error != 0) 4448 goto out; 4449 #endif 4450 /* 4451 * Set these in case the underlying filesystem fails to do so. 4452 */ 4453 sp = &mp->mnt_stat; 4454 sp->f_version = STATFS_VERSION; 4455 sp->f_namemax = NAME_MAX; 4456 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 4457 error = VFS_STATFS(mp, sp); 4458 if (error == 0) 4459 *buf = *sp; 4460 out: 4461 vfs_unbusy(mp); 4462 return (error); 4463 } 4464 4465 int 4466 kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len) 4467 { 4468 struct file *fp; 4469 struct mount *mp; 4470 struct vnode *vp; 4471 cap_rights_t rights; 4472 off_t olen, ooffset; 4473 int error; 4474 4475 if (offset < 0 || len <= 0) 4476 return (EINVAL); 4477 /* Check for wrap. */ 4478 if (offset > OFF_MAX - len) 4479 return (EFBIG); 4480 error = fget(td, fd, cap_rights_init(&rights, CAP_WRITE), &fp); 4481 if (error != 0) 4482 return (error); 4483 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { 4484 error = ESPIPE; 4485 goto out; 4486 } 4487 if ((fp->f_flag & FWRITE) == 0) { 4488 error = EBADF; 4489 goto out; 4490 } 4491 if (fp->f_type != DTYPE_VNODE) { 4492 error = ENODEV; 4493 goto out; 4494 } 4495 vp = fp->f_vnode; 4496 if (vp->v_type != VREG) { 4497 error = ENODEV; 4498 goto out; 4499 } 4500 4501 /* Allocating blocks may take a long time, so iterate. */ 4502 for (;;) { 4503 olen = len; 4504 ooffset = offset; 4505 4506 bwillwrite(); 4507 mp = NULL; 4508 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 4509 if (error != 0) 4510 break; 4511 error = vn_lock(vp, LK_EXCLUSIVE); 4512 if (error != 0) { 4513 vn_finished_write(mp); 4514 break; 4515 } 4516 #ifdef MAC 4517 error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp); 4518 if (error == 0) 4519 #endif 4520 error = VOP_ALLOCATE(vp, &offset, &len); 4521 VOP_UNLOCK(vp, 0); 4522 vn_finished_write(mp); 4523 4524 if (olen + ooffset != offset + len) { 4525 panic("offset + len changed from %jx/%jx to %jx/%jx", 4526 ooffset, olen, offset, len); 4527 } 4528 if (error != 0 || len == 0) 4529 break; 4530 KASSERT(olen > len, ("Iteration did not make progress?")); 4531 maybe_yield(); 4532 } 4533 out: 4534 fdrop(fp, td); 4535 return (error); 4536 } 4537 4538 int 4539 sys_posix_fallocate(struct thread *td, struct posix_fallocate_args *uap) 4540 { 4541 int error; 4542 4543 error = kern_posix_fallocate(td, uap->fd, uap->offset, uap->len); 4544 return (kern_posix_error(td, error)); 4545 } 4546 4547 /* 4548 * Unlike madvise(2), we do not make a best effort to remember every 4549 * possible caching hint. Instead, we remember the last setting with 4550 * the exception that we will allow POSIX_FADV_NORMAL to adjust the 4551 * region of any current setting. 4552 */ 4553 int 4554 kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len, 4555 int advice) 4556 { 4557 struct fadvise_info *fa, *new; 4558 struct file *fp; 4559 struct vnode *vp; 4560 cap_rights_t rights; 4561 off_t end; 4562 int error; 4563 4564 if (offset < 0 || len < 0 || offset > OFF_MAX - len) 4565 return (EINVAL); 4566 switch (advice) { 4567 case POSIX_FADV_SEQUENTIAL: 4568 case POSIX_FADV_RANDOM: 4569 case POSIX_FADV_NOREUSE: 4570 new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK); 4571 break; 4572 case POSIX_FADV_NORMAL: 4573 case POSIX_FADV_WILLNEED: 4574 case POSIX_FADV_DONTNEED: 4575 new = NULL; 4576 break; 4577 default: 4578 return (EINVAL); 4579 } 4580 /* XXX: CAP_POSIX_FADVISE? */ 4581 error = fget(td, fd, cap_rights_init(&rights), &fp); 4582 if (error != 0) 4583 goto out; 4584 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { 4585 error = ESPIPE; 4586 goto out; 4587 } 4588 if (fp->f_type != DTYPE_VNODE) { 4589 error = ENODEV; 4590 goto out; 4591 } 4592 vp = fp->f_vnode; 4593 if (vp->v_type != VREG) { 4594 error = ENODEV; 4595 goto out; 4596 } 4597 if (len == 0) 4598 end = OFF_MAX; 4599 else 4600 end = offset + len - 1; 4601 switch (advice) { 4602 case POSIX_FADV_SEQUENTIAL: 4603 case POSIX_FADV_RANDOM: 4604 case POSIX_FADV_NOREUSE: 4605 /* 4606 * Try to merge any existing non-standard region with 4607 * this new region if possible, otherwise create a new 4608 * non-standard region for this request. 4609 */ 4610 mtx_pool_lock(mtxpool_sleep, fp); 4611 fa = fp->f_advice; 4612 if (fa != NULL && fa->fa_advice == advice && 4613 ((fa->fa_start <= end && fa->fa_end >= offset) || 4614 (end != OFF_MAX && fa->fa_start == end + 1) || 4615 (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) { 4616 if (offset < fa->fa_start) 4617 fa->fa_start = offset; 4618 if (end > fa->fa_end) 4619 fa->fa_end = end; 4620 } else { 4621 new->fa_advice = advice; 4622 new->fa_start = offset; 4623 new->fa_end = end; 4624 fp->f_advice = new; 4625 new = fa; 4626 } 4627 mtx_pool_unlock(mtxpool_sleep, fp); 4628 break; 4629 case POSIX_FADV_NORMAL: 4630 /* 4631 * If a the "normal" region overlaps with an existing 4632 * non-standard region, trim or remove the 4633 * non-standard region. 4634 */ 4635 mtx_pool_lock(mtxpool_sleep, fp); 4636 fa = fp->f_advice; 4637 if (fa != NULL) { 4638 if (offset <= fa->fa_start && end >= fa->fa_end) { 4639 new = fa; 4640 fp->f_advice = NULL; 4641 } else if (offset <= fa->fa_start && 4642 end >= fa->fa_start) 4643 fa->fa_start = end + 1; 4644 else if (offset <= fa->fa_end && end >= fa->fa_end) 4645 fa->fa_end = offset - 1; 4646 else if (offset >= fa->fa_start && end <= fa->fa_end) { 4647 /* 4648 * If the "normal" region is a middle 4649 * portion of the existing 4650 * non-standard region, just remove 4651 * the whole thing rather than picking 4652 * one side or the other to 4653 * preserve. 4654 */ 4655 new = fa; 4656 fp->f_advice = NULL; 4657 } 4658 } 4659 mtx_pool_unlock(mtxpool_sleep, fp); 4660 break; 4661 case POSIX_FADV_WILLNEED: 4662 case POSIX_FADV_DONTNEED: 4663 error = VOP_ADVISE(vp, offset, end, advice); 4664 break; 4665 } 4666 out: 4667 if (fp != NULL) 4668 fdrop(fp, td); 4669 free(new, M_FADVISE); 4670 return (error); 4671 } 4672 4673 int 4674 sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap) 4675 { 4676 int error; 4677 4678 error = kern_posix_fadvise(td, uap->fd, uap->offset, uap->len, 4679 uap->advice); 4680 return (kern_posix_error(td, error)); 4681 } 4682