1 /*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include "opt_capsicum.h" 41 #include "opt_compat.h" 42 #include "opt_ktrace.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/bio.h> 47 #include <sys/buf.h> 48 #include <sys/capsicum.h> 49 #include <sys/disk.h> 50 #include <sys/sysent.h> 51 #include <sys/malloc.h> 52 #include <sys/mount.h> 53 #include <sys/mutex.h> 54 #include <sys/sysproto.h> 55 #include <sys/namei.h> 56 #include <sys/filedesc.h> 57 #include <sys/kernel.h> 58 #include <sys/fcntl.h> 59 #include <sys/file.h> 60 #include <sys/filio.h> 61 #include <sys/limits.h> 62 #include <sys/linker.h> 63 #include <sys/rwlock.h> 64 #include <sys/sdt.h> 65 #include <sys/stat.h> 66 #include <sys/sx.h> 67 #include <sys/unistd.h> 68 #include <sys/vnode.h> 69 #include <sys/priv.h> 70 #include <sys/proc.h> 71 #include <sys/dirent.h> 72 #include <sys/jail.h> 73 #include <sys/syscallsubr.h> 74 #include <sys/sysctl.h> 75 #ifdef KTRACE 76 #include <sys/ktrace.h> 77 #endif 78 79 #include <machine/stdarg.h> 80 81 #include <security/audit/audit.h> 82 #include <security/mac/mac_framework.h> 83 84 #include <vm/vm.h> 85 #include <vm/vm_object.h> 86 #include <vm/vm_page.h> 87 #include <vm/uma.h> 88 89 #include <ufs/ufs/quota.h> 90 91 MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information"); 92 93 SDT_PROVIDER_DEFINE(vfs); 94 SDT_PROBE_DEFINE2(vfs, , stat, mode, "char *", "int"); 95 SDT_PROBE_DEFINE2(vfs, , stat, reg, "char *", "int"); 96 97 static int kern_chflagsat(struct thread *td, int fd, const char *path, 98 enum uio_seg pathseg, u_long flags, int atflag); 99 static int setfflags(struct thread *td, struct vnode *, u_long); 100 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *); 101 static int getutimens(const struct timespec *, enum uio_seg, 102 struct timespec *, int *); 103 static int setutimes(struct thread *td, struct vnode *, 104 const struct timespec *, int, int); 105 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 106 struct thread *td); 107 108 /* 109 * Sync each mounted filesystem. 110 */ 111 #ifndef _SYS_SYSPROTO_H_ 112 struct sync_args { 113 int dummy; 114 }; 115 #endif 116 /* ARGSUSED */ 117 int 118 sys_sync(td, uap) 119 struct thread *td; 120 struct sync_args *uap; 121 { 122 struct mount *mp, *nmp; 123 int save; 124 125 mtx_lock(&mountlist_mtx); 126 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 127 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 128 nmp = TAILQ_NEXT(mp, mnt_list); 129 continue; 130 } 131 if ((mp->mnt_flag & MNT_RDONLY) == 0 && 132 vn_start_write(NULL, &mp, V_NOWAIT) == 0) { 133 save = curthread_pflags_set(TDP_SYNCIO); 134 vfs_msync(mp, MNT_NOWAIT); 135 VFS_SYNC(mp, MNT_NOWAIT); 136 curthread_pflags_restore(save); 137 vn_finished_write(mp); 138 } 139 mtx_lock(&mountlist_mtx); 140 nmp = TAILQ_NEXT(mp, mnt_list); 141 vfs_unbusy(mp); 142 } 143 mtx_unlock(&mountlist_mtx); 144 return (0); 145 } 146 147 /* 148 * Change filesystem quotas. 149 */ 150 #ifndef _SYS_SYSPROTO_H_ 151 struct quotactl_args { 152 char *path; 153 int cmd; 154 int uid; 155 caddr_t arg; 156 }; 157 #endif 158 int 159 sys_quotactl(td, uap) 160 struct thread *td; 161 register struct quotactl_args /* { 162 char *path; 163 int cmd; 164 int uid; 165 caddr_t arg; 166 } */ *uap; 167 { 168 struct mount *mp; 169 struct nameidata nd; 170 int error; 171 172 AUDIT_ARG_CMD(uap->cmd); 173 AUDIT_ARG_UID(uap->uid); 174 if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS)) 175 return (EPERM); 176 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 177 uap->path, td); 178 if ((error = namei(&nd)) != 0) 179 return (error); 180 NDFREE(&nd, NDF_ONLY_PNBUF); 181 mp = nd.ni_vp->v_mount; 182 vfs_ref(mp); 183 vput(nd.ni_vp); 184 error = vfs_busy(mp, 0); 185 vfs_rel(mp); 186 if (error != 0) 187 return (error); 188 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg); 189 190 /* 191 * Since quota on operation typically needs to open quota 192 * file, the Q_QUOTAON handler needs to unbusy the mount point 193 * before calling into namei. Otherwise, unmount might be 194 * started between two vfs_busy() invocations (first is our, 195 * second is from mount point cross-walk code in lookup()), 196 * causing deadlock. 197 * 198 * Require that Q_QUOTAON handles the vfs_busy() reference on 199 * its own, always returning with ubusied mount point. 200 */ 201 if ((uap->cmd >> SUBCMDSHIFT) != Q_QUOTAON) 202 vfs_unbusy(mp); 203 return (error); 204 } 205 206 /* 207 * Used by statfs conversion routines to scale the block size up if 208 * necessary so that all of the block counts are <= 'max_size'. Note 209 * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero 210 * value of 'n'. 211 */ 212 void 213 statfs_scale_blocks(struct statfs *sf, long max_size) 214 { 215 uint64_t count; 216 int shift; 217 218 KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__)); 219 220 /* 221 * Attempt to scale the block counts to give a more accurate 222 * overview to userland of the ratio of free space to used 223 * space. To do this, find the largest block count and compute 224 * a divisor that lets it fit into a signed integer <= max_size. 225 */ 226 if (sf->f_bavail < 0) 227 count = -sf->f_bavail; 228 else 229 count = sf->f_bavail; 230 count = MAX(sf->f_blocks, MAX(sf->f_bfree, count)); 231 if (count <= max_size) 232 return; 233 234 count >>= flsl(max_size); 235 shift = 0; 236 while (count > 0) { 237 shift++; 238 count >>=1; 239 } 240 241 sf->f_bsize <<= shift; 242 sf->f_blocks >>= shift; 243 sf->f_bfree >>= shift; 244 sf->f_bavail >>= shift; 245 } 246 247 /* 248 * Get filesystem statistics. 249 */ 250 #ifndef _SYS_SYSPROTO_H_ 251 struct statfs_args { 252 char *path; 253 struct statfs *buf; 254 }; 255 #endif 256 int 257 sys_statfs(td, uap) 258 struct thread *td; 259 register struct statfs_args /* { 260 char *path; 261 struct statfs *buf; 262 } */ *uap; 263 { 264 struct statfs sf; 265 int error; 266 267 error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf); 268 if (error == 0) 269 error = copyout(&sf, uap->buf, sizeof(sf)); 270 return (error); 271 } 272 273 int 274 kern_statfs(struct thread *td, char *path, enum uio_seg pathseg, 275 struct statfs *buf) 276 { 277 struct mount *mp; 278 struct statfs *sp, sb; 279 struct nameidata nd; 280 int error; 281 282 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 283 pathseg, path, td); 284 error = namei(&nd); 285 if (error != 0) 286 return (error); 287 mp = nd.ni_vp->v_mount; 288 vfs_ref(mp); 289 NDFREE(&nd, NDF_ONLY_PNBUF); 290 vput(nd.ni_vp); 291 error = vfs_busy(mp, 0); 292 vfs_rel(mp); 293 if (error != 0) 294 return (error); 295 #ifdef MAC 296 error = mac_mount_check_stat(td->td_ucred, mp); 297 if (error != 0) 298 goto out; 299 #endif 300 /* 301 * Set these in case the underlying filesystem fails to do so. 302 */ 303 sp = &mp->mnt_stat; 304 sp->f_version = STATFS_VERSION; 305 sp->f_namemax = NAME_MAX; 306 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 307 error = VFS_STATFS(mp, sp); 308 if (error != 0) 309 goto out; 310 if (priv_check(td, PRIV_VFS_GENERATION)) { 311 bcopy(sp, &sb, sizeof(sb)); 312 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 313 prison_enforce_statfs(td->td_ucred, mp, &sb); 314 sp = &sb; 315 } 316 *buf = *sp; 317 out: 318 vfs_unbusy(mp); 319 return (error); 320 } 321 322 /* 323 * Get filesystem statistics. 324 */ 325 #ifndef _SYS_SYSPROTO_H_ 326 struct fstatfs_args { 327 int fd; 328 struct statfs *buf; 329 }; 330 #endif 331 int 332 sys_fstatfs(td, uap) 333 struct thread *td; 334 register struct fstatfs_args /* { 335 int fd; 336 struct statfs *buf; 337 } */ *uap; 338 { 339 struct statfs sf; 340 int error; 341 342 error = kern_fstatfs(td, uap->fd, &sf); 343 if (error == 0) 344 error = copyout(&sf, uap->buf, sizeof(sf)); 345 return (error); 346 } 347 348 int 349 kern_fstatfs(struct thread *td, int fd, struct statfs *buf) 350 { 351 struct file *fp; 352 struct mount *mp; 353 struct statfs *sp, sb; 354 struct vnode *vp; 355 cap_rights_t rights; 356 int error; 357 358 AUDIT_ARG_FD(fd); 359 error = getvnode(td, fd, cap_rights_init(&rights, CAP_FSTATFS), &fp); 360 if (error != 0) 361 return (error); 362 vp = fp->f_vnode; 363 vn_lock(vp, LK_SHARED | LK_RETRY); 364 #ifdef AUDIT 365 AUDIT_ARG_VNODE1(vp); 366 #endif 367 mp = vp->v_mount; 368 if (mp) 369 vfs_ref(mp); 370 VOP_UNLOCK(vp, 0); 371 fdrop(fp, td); 372 if (mp == NULL) { 373 error = EBADF; 374 goto out; 375 } 376 error = vfs_busy(mp, 0); 377 vfs_rel(mp); 378 if (error != 0) 379 return (error); 380 #ifdef MAC 381 error = mac_mount_check_stat(td->td_ucred, mp); 382 if (error != 0) 383 goto out; 384 #endif 385 /* 386 * Set these in case the underlying filesystem fails to do so. 387 */ 388 sp = &mp->mnt_stat; 389 sp->f_version = STATFS_VERSION; 390 sp->f_namemax = NAME_MAX; 391 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 392 error = VFS_STATFS(mp, sp); 393 if (error != 0) 394 goto out; 395 if (priv_check(td, PRIV_VFS_GENERATION)) { 396 bcopy(sp, &sb, sizeof(sb)); 397 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 398 prison_enforce_statfs(td->td_ucred, mp, &sb); 399 sp = &sb; 400 } 401 *buf = *sp; 402 out: 403 if (mp) 404 vfs_unbusy(mp); 405 return (error); 406 } 407 408 /* 409 * Get statistics on all filesystems. 410 */ 411 #ifndef _SYS_SYSPROTO_H_ 412 struct getfsstat_args { 413 struct statfs *buf; 414 long bufsize; 415 int flags; 416 }; 417 #endif 418 int 419 sys_getfsstat(td, uap) 420 struct thread *td; 421 register struct getfsstat_args /* { 422 struct statfs *buf; 423 long bufsize; 424 int flags; 425 } */ *uap; 426 { 427 size_t count; 428 int error; 429 430 if (uap->bufsize < 0 || uap->bufsize > SIZE_MAX) 431 return (EINVAL); 432 error = kern_getfsstat(td, &uap->buf, uap->bufsize, &count, 433 UIO_USERSPACE, uap->flags); 434 if (error == 0) 435 td->td_retval[0] = count; 436 return (error); 437 } 438 439 /* 440 * If (bufsize > 0 && bufseg == UIO_SYSSPACE) 441 * The caller is responsible for freeing memory which will be allocated 442 * in '*buf'. 443 */ 444 int 445 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize, 446 size_t *countp, enum uio_seg bufseg, int flags) 447 { 448 struct mount *mp, *nmp; 449 struct statfs *sfsp, *sp, sb; 450 size_t count, maxcount; 451 int error; 452 453 maxcount = bufsize / sizeof(struct statfs); 454 if (bufsize == 0) 455 sfsp = NULL; 456 else if (bufseg == UIO_USERSPACE) 457 sfsp = *buf; 458 else /* if (bufseg == UIO_SYSSPACE) */ { 459 count = 0; 460 mtx_lock(&mountlist_mtx); 461 TAILQ_FOREACH(mp, &mountlist, mnt_list) { 462 count++; 463 } 464 mtx_unlock(&mountlist_mtx); 465 if (maxcount > count) 466 maxcount = count; 467 sfsp = *buf = malloc(maxcount * sizeof(struct statfs), M_TEMP, 468 M_WAITOK); 469 } 470 count = 0; 471 mtx_lock(&mountlist_mtx); 472 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 473 if (prison_canseemount(td->td_ucred, mp) != 0) { 474 nmp = TAILQ_NEXT(mp, mnt_list); 475 continue; 476 } 477 #ifdef MAC 478 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 479 nmp = TAILQ_NEXT(mp, mnt_list); 480 continue; 481 } 482 #endif 483 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 484 nmp = TAILQ_NEXT(mp, mnt_list); 485 continue; 486 } 487 if (sfsp && count < maxcount) { 488 sp = &mp->mnt_stat; 489 /* 490 * Set these in case the underlying filesystem 491 * fails to do so. 492 */ 493 sp->f_version = STATFS_VERSION; 494 sp->f_namemax = NAME_MAX; 495 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 496 /* 497 * If MNT_NOWAIT or MNT_LAZY is specified, do not 498 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 499 * overrides MNT_WAIT. 500 */ 501 if (((flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 502 (flags & MNT_WAIT)) && 503 (error = VFS_STATFS(mp, sp))) { 504 mtx_lock(&mountlist_mtx); 505 nmp = TAILQ_NEXT(mp, mnt_list); 506 vfs_unbusy(mp); 507 continue; 508 } 509 if (priv_check(td, PRIV_VFS_GENERATION)) { 510 bcopy(sp, &sb, sizeof(sb)); 511 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 512 prison_enforce_statfs(td->td_ucred, mp, &sb); 513 sp = &sb; 514 } 515 if (bufseg == UIO_SYSSPACE) 516 bcopy(sp, sfsp, sizeof(*sp)); 517 else /* if (bufseg == UIO_USERSPACE) */ { 518 error = copyout(sp, sfsp, sizeof(*sp)); 519 if (error != 0) { 520 vfs_unbusy(mp); 521 return (error); 522 } 523 } 524 sfsp++; 525 } 526 count++; 527 mtx_lock(&mountlist_mtx); 528 nmp = TAILQ_NEXT(mp, mnt_list); 529 vfs_unbusy(mp); 530 } 531 mtx_unlock(&mountlist_mtx); 532 if (sfsp && count > maxcount) 533 *countp = maxcount; 534 else 535 *countp = count; 536 return (0); 537 } 538 539 #ifdef COMPAT_FREEBSD4 540 /* 541 * Get old format filesystem statistics. 542 */ 543 static void cvtstatfs(struct statfs *, struct ostatfs *); 544 545 #ifndef _SYS_SYSPROTO_H_ 546 struct freebsd4_statfs_args { 547 char *path; 548 struct ostatfs *buf; 549 }; 550 #endif 551 int 552 freebsd4_statfs(td, uap) 553 struct thread *td; 554 struct freebsd4_statfs_args /* { 555 char *path; 556 struct ostatfs *buf; 557 } */ *uap; 558 { 559 struct ostatfs osb; 560 struct statfs sf; 561 int error; 562 563 error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf); 564 if (error != 0) 565 return (error); 566 cvtstatfs(&sf, &osb); 567 return (copyout(&osb, uap->buf, sizeof(osb))); 568 } 569 570 /* 571 * Get filesystem statistics. 572 */ 573 #ifndef _SYS_SYSPROTO_H_ 574 struct freebsd4_fstatfs_args { 575 int fd; 576 struct ostatfs *buf; 577 }; 578 #endif 579 int 580 freebsd4_fstatfs(td, uap) 581 struct thread *td; 582 struct freebsd4_fstatfs_args /* { 583 int fd; 584 struct ostatfs *buf; 585 } */ *uap; 586 { 587 struct ostatfs osb; 588 struct statfs sf; 589 int error; 590 591 error = kern_fstatfs(td, uap->fd, &sf); 592 if (error != 0) 593 return (error); 594 cvtstatfs(&sf, &osb); 595 return (copyout(&osb, uap->buf, sizeof(osb))); 596 } 597 598 /* 599 * Get statistics on all filesystems. 600 */ 601 #ifndef _SYS_SYSPROTO_H_ 602 struct freebsd4_getfsstat_args { 603 struct ostatfs *buf; 604 long bufsize; 605 int flags; 606 }; 607 #endif 608 int 609 freebsd4_getfsstat(td, uap) 610 struct thread *td; 611 register struct freebsd4_getfsstat_args /* { 612 struct ostatfs *buf; 613 long bufsize; 614 int flags; 615 } */ *uap; 616 { 617 struct statfs *buf, *sp; 618 struct ostatfs osb; 619 size_t count, size; 620 int error; 621 622 if (uap->bufsize < 0) 623 return (EINVAL); 624 count = uap->bufsize / sizeof(struct ostatfs); 625 if (count > SIZE_MAX / sizeof(struct statfs)) 626 return (EINVAL); 627 size = count * sizeof(struct statfs); 628 error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, 629 uap->flags); 630 td->td_retval[0] = count; 631 if (size != 0) { 632 sp = buf; 633 while (count != 0 && error == 0) { 634 cvtstatfs(sp, &osb); 635 error = copyout(&osb, uap->buf, sizeof(osb)); 636 sp++; 637 uap->buf++; 638 count--; 639 } 640 free(buf, M_TEMP); 641 } 642 return (error); 643 } 644 645 /* 646 * Implement fstatfs() for (NFS) file handles. 647 */ 648 #ifndef _SYS_SYSPROTO_H_ 649 struct freebsd4_fhstatfs_args { 650 struct fhandle *u_fhp; 651 struct ostatfs *buf; 652 }; 653 #endif 654 int 655 freebsd4_fhstatfs(td, uap) 656 struct thread *td; 657 struct freebsd4_fhstatfs_args /* { 658 struct fhandle *u_fhp; 659 struct ostatfs *buf; 660 } */ *uap; 661 { 662 struct ostatfs osb; 663 struct statfs sf; 664 fhandle_t fh; 665 int error; 666 667 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 668 if (error != 0) 669 return (error); 670 error = kern_fhstatfs(td, fh, &sf); 671 if (error != 0) 672 return (error); 673 cvtstatfs(&sf, &osb); 674 return (copyout(&osb, uap->buf, sizeof(osb))); 675 } 676 677 /* 678 * Convert a new format statfs structure to an old format statfs structure. 679 */ 680 static void 681 cvtstatfs(nsp, osp) 682 struct statfs *nsp; 683 struct ostatfs *osp; 684 { 685 686 statfs_scale_blocks(nsp, LONG_MAX); 687 bzero(osp, sizeof(*osp)); 688 osp->f_bsize = nsp->f_bsize; 689 osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX); 690 osp->f_blocks = nsp->f_blocks; 691 osp->f_bfree = nsp->f_bfree; 692 osp->f_bavail = nsp->f_bavail; 693 osp->f_files = MIN(nsp->f_files, LONG_MAX); 694 osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX); 695 osp->f_owner = nsp->f_owner; 696 osp->f_type = nsp->f_type; 697 osp->f_flags = nsp->f_flags; 698 osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX); 699 osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX); 700 osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX); 701 osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX); 702 strlcpy(osp->f_fstypename, nsp->f_fstypename, 703 MIN(MFSNAMELEN, OMFSNAMELEN)); 704 strlcpy(osp->f_mntonname, nsp->f_mntonname, 705 MIN(MNAMELEN, OMNAMELEN)); 706 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 707 MIN(MNAMELEN, OMNAMELEN)); 708 osp->f_fsid = nsp->f_fsid; 709 } 710 #endif /* COMPAT_FREEBSD4 */ 711 712 /* 713 * Change current working directory to a given file descriptor. 714 */ 715 #ifndef _SYS_SYSPROTO_H_ 716 struct fchdir_args { 717 int fd; 718 }; 719 #endif 720 int 721 sys_fchdir(td, uap) 722 struct thread *td; 723 struct fchdir_args /* { 724 int fd; 725 } */ *uap; 726 { 727 struct vnode *vp, *tdp; 728 struct mount *mp; 729 struct file *fp; 730 cap_rights_t rights; 731 int error; 732 733 AUDIT_ARG_FD(uap->fd); 734 error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_FCHDIR), 735 &fp); 736 if (error != 0) 737 return (error); 738 vp = fp->f_vnode; 739 VREF(vp); 740 fdrop(fp, td); 741 vn_lock(vp, LK_SHARED | LK_RETRY); 742 AUDIT_ARG_VNODE1(vp); 743 error = change_dir(vp, td); 744 while (!error && (mp = vp->v_mountedhere) != NULL) { 745 if (vfs_busy(mp, 0)) 746 continue; 747 error = VFS_ROOT(mp, LK_SHARED, &tdp); 748 vfs_unbusy(mp); 749 if (error != 0) 750 break; 751 vput(vp); 752 vp = tdp; 753 } 754 if (error != 0) { 755 vput(vp); 756 return (error); 757 } 758 VOP_UNLOCK(vp, 0); 759 pwd_chdir(td, vp); 760 return (0); 761 } 762 763 /* 764 * Change current working directory (``.''). 765 */ 766 #ifndef _SYS_SYSPROTO_H_ 767 struct chdir_args { 768 char *path; 769 }; 770 #endif 771 int 772 sys_chdir(td, uap) 773 struct thread *td; 774 struct chdir_args /* { 775 char *path; 776 } */ *uap; 777 { 778 779 return (kern_chdir(td, uap->path, UIO_USERSPACE)); 780 } 781 782 int 783 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg) 784 { 785 struct nameidata nd; 786 int error; 787 788 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 789 pathseg, path, td); 790 if ((error = namei(&nd)) != 0) 791 return (error); 792 if ((error = change_dir(nd.ni_vp, td)) != 0) { 793 vput(nd.ni_vp); 794 NDFREE(&nd, NDF_ONLY_PNBUF); 795 return (error); 796 } 797 VOP_UNLOCK(nd.ni_vp, 0); 798 NDFREE(&nd, NDF_ONLY_PNBUF); 799 pwd_chdir(td, nd.ni_vp); 800 return (0); 801 } 802 803 /* 804 * Change notion of root (``/'') directory. 805 */ 806 #ifndef _SYS_SYSPROTO_H_ 807 struct chroot_args { 808 char *path; 809 }; 810 #endif 811 int 812 sys_chroot(td, uap) 813 struct thread *td; 814 struct chroot_args /* { 815 char *path; 816 } */ *uap; 817 { 818 struct nameidata nd; 819 int error; 820 821 error = priv_check(td, PRIV_VFS_CHROOT); 822 if (error != 0) 823 return (error); 824 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 825 UIO_USERSPACE, uap->path, td); 826 error = namei(&nd); 827 if (error != 0) 828 goto error; 829 error = change_dir(nd.ni_vp, td); 830 if (error != 0) 831 goto e_vunlock; 832 #ifdef MAC 833 error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp); 834 if (error != 0) 835 goto e_vunlock; 836 #endif 837 VOP_UNLOCK(nd.ni_vp, 0); 838 error = pwd_chroot(td, nd.ni_vp); 839 vrele(nd.ni_vp); 840 NDFREE(&nd, NDF_ONLY_PNBUF); 841 return (error); 842 e_vunlock: 843 vput(nd.ni_vp); 844 error: 845 NDFREE(&nd, NDF_ONLY_PNBUF); 846 return (error); 847 } 848 849 /* 850 * Common routine for chroot and chdir. Callers must provide a locked vnode 851 * instance. 852 */ 853 int 854 change_dir(vp, td) 855 struct vnode *vp; 856 struct thread *td; 857 { 858 #ifdef MAC 859 int error; 860 #endif 861 862 ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked"); 863 if (vp->v_type != VDIR) 864 return (ENOTDIR); 865 #ifdef MAC 866 error = mac_vnode_check_chdir(td->td_ucred, vp); 867 if (error != 0) 868 return (error); 869 #endif 870 return (VOP_ACCESS(vp, VEXEC, td->td_ucred, td)); 871 } 872 873 static __inline void 874 flags_to_rights(int flags, cap_rights_t *rightsp) 875 { 876 877 if (flags & O_EXEC) { 878 cap_rights_set(rightsp, CAP_FEXECVE); 879 } else { 880 switch ((flags & O_ACCMODE)) { 881 case O_RDONLY: 882 cap_rights_set(rightsp, CAP_READ); 883 break; 884 case O_RDWR: 885 cap_rights_set(rightsp, CAP_READ); 886 /* FALLTHROUGH */ 887 case O_WRONLY: 888 cap_rights_set(rightsp, CAP_WRITE); 889 if (!(flags & (O_APPEND | O_TRUNC))) 890 cap_rights_set(rightsp, CAP_SEEK); 891 break; 892 } 893 } 894 895 if (flags & O_CREAT) 896 cap_rights_set(rightsp, CAP_CREATE); 897 898 if (flags & O_TRUNC) 899 cap_rights_set(rightsp, CAP_FTRUNCATE); 900 901 if (flags & (O_SYNC | O_FSYNC)) 902 cap_rights_set(rightsp, CAP_FSYNC); 903 904 if (flags & (O_EXLOCK | O_SHLOCK)) 905 cap_rights_set(rightsp, CAP_FLOCK); 906 } 907 908 /* 909 * Check permissions, allocate an open file structure, and call the device 910 * open routine if any. 911 */ 912 #ifndef _SYS_SYSPROTO_H_ 913 struct open_args { 914 char *path; 915 int flags; 916 int mode; 917 }; 918 #endif 919 int 920 sys_open(td, uap) 921 struct thread *td; 922 register struct open_args /* { 923 char *path; 924 int flags; 925 int mode; 926 } */ *uap; 927 { 928 929 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 930 uap->flags, uap->mode)); 931 } 932 933 #ifndef _SYS_SYSPROTO_H_ 934 struct openat_args { 935 int fd; 936 char *path; 937 int flag; 938 int mode; 939 }; 940 #endif 941 int 942 sys_openat(struct thread *td, struct openat_args *uap) 943 { 944 945 return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 946 uap->mode)); 947 } 948 949 int 950 kern_openat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 951 int flags, int mode) 952 { 953 struct proc *p = td->td_proc; 954 struct filedesc *fdp = p->p_fd; 955 struct file *fp; 956 struct vnode *vp; 957 struct nameidata nd; 958 cap_rights_t rights; 959 int cmode, error, indx; 960 961 indx = -1; 962 963 AUDIT_ARG_FFLAGS(flags); 964 AUDIT_ARG_MODE(mode); 965 /* XXX: audit dirfd */ 966 cap_rights_init(&rights, CAP_LOOKUP); 967 flags_to_rights(flags, &rights); 968 /* 969 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags 970 * may be specified. 971 */ 972 if (flags & O_EXEC) { 973 if (flags & O_ACCMODE) 974 return (EINVAL); 975 } else if ((flags & O_ACCMODE) == O_ACCMODE) { 976 return (EINVAL); 977 } else { 978 flags = FFLAGS(flags); 979 } 980 981 /* 982 * Allocate a file structure. The descriptor to reference it 983 * is allocated and set by finstall() below. 984 */ 985 error = falloc_noinstall(td, &fp); 986 if (error != 0) 987 return (error); 988 /* 989 * An extra reference on `fp' has been held for us by 990 * falloc_noinstall(). 991 */ 992 /* Set the flags early so the finit in devfs can pick them up. */ 993 fp->f_flag = flags & FMASK; 994 cmode = ((mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; 995 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 996 &rights, td); 997 td->td_dupfd = -1; /* XXX check for fdopen */ 998 error = vn_open(&nd, &flags, cmode, fp); 999 if (error != 0) { 1000 /* 1001 * If the vn_open replaced the method vector, something 1002 * wonderous happened deep below and we just pass it up 1003 * pretending we know what we do. 1004 */ 1005 if (error == ENXIO && fp->f_ops != &badfileops) 1006 goto success; 1007 1008 /* 1009 * Handle special fdopen() case. bleh. 1010 * 1011 * Don't do this for relative (capability) lookups; we don't 1012 * understand exactly what would happen, and we don't think 1013 * that it ever should. 1014 */ 1015 if (nd.ni_strictrelative == 0 && 1016 (error == ENODEV || error == ENXIO) && 1017 td->td_dupfd >= 0) { 1018 error = dupfdopen(td, fdp, td->td_dupfd, flags, error, 1019 &indx); 1020 if (error == 0) 1021 goto success; 1022 } 1023 1024 goto bad; 1025 } 1026 td->td_dupfd = 0; 1027 NDFREE(&nd, NDF_ONLY_PNBUF); 1028 vp = nd.ni_vp; 1029 1030 /* 1031 * Store the vnode, for any f_type. Typically, the vnode use 1032 * count is decremented by direct call to vn_closefile() for 1033 * files that switched type in the cdevsw fdopen() method. 1034 */ 1035 fp->f_vnode = vp; 1036 /* 1037 * If the file wasn't claimed by devfs bind it to the normal 1038 * vnode operations here. 1039 */ 1040 if (fp->f_ops == &badfileops) { 1041 KASSERT(vp->v_type != VFIFO, ("Unexpected fifo.")); 1042 fp->f_seqcount = 1; 1043 finit(fp, (flags & FMASK) | (fp->f_flag & FHASLOCK), 1044 DTYPE_VNODE, vp, &vnops); 1045 } 1046 1047 VOP_UNLOCK(vp, 0); 1048 if (flags & O_TRUNC) { 1049 error = fo_truncate(fp, 0, td->td_ucred, td); 1050 if (error != 0) 1051 goto bad; 1052 } 1053 success: 1054 /* 1055 * If we haven't already installed the FD (for dupfdopen), do so now. 1056 */ 1057 if (indx == -1) { 1058 struct filecaps *fcaps; 1059 1060 #ifdef CAPABILITIES 1061 if (nd.ni_strictrelative == 1) 1062 fcaps = &nd.ni_filecaps; 1063 else 1064 #endif 1065 fcaps = NULL; 1066 error = finstall(td, fp, &indx, flags, fcaps); 1067 /* On success finstall() consumes fcaps. */ 1068 if (error != 0) { 1069 filecaps_free(&nd.ni_filecaps); 1070 goto bad; 1071 } 1072 } else { 1073 filecaps_free(&nd.ni_filecaps); 1074 } 1075 1076 /* 1077 * Release our private reference, leaving the one associated with 1078 * the descriptor table intact. 1079 */ 1080 fdrop(fp, td); 1081 td->td_retval[0] = indx; 1082 return (0); 1083 bad: 1084 KASSERT(indx == -1, ("indx=%d, should be -1", indx)); 1085 fdrop(fp, td); 1086 return (error); 1087 } 1088 1089 #ifdef COMPAT_43 1090 /* 1091 * Create a file. 1092 */ 1093 #ifndef _SYS_SYSPROTO_H_ 1094 struct ocreat_args { 1095 char *path; 1096 int mode; 1097 }; 1098 #endif 1099 int 1100 ocreat(td, uap) 1101 struct thread *td; 1102 register struct ocreat_args /* { 1103 char *path; 1104 int mode; 1105 } */ *uap; 1106 { 1107 1108 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1109 O_WRONLY | O_CREAT | O_TRUNC, uap->mode)); 1110 } 1111 #endif /* COMPAT_43 */ 1112 1113 /* 1114 * Create a special file. 1115 */ 1116 #ifndef _SYS_SYSPROTO_H_ 1117 struct mknod_args { 1118 char *path; 1119 int mode; 1120 int dev; 1121 }; 1122 #endif 1123 int 1124 sys_mknod(td, uap) 1125 struct thread *td; 1126 register struct mknod_args /* { 1127 char *path; 1128 int mode; 1129 int dev; 1130 } */ *uap; 1131 { 1132 1133 return (kern_mknodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1134 uap->mode, uap->dev)); 1135 } 1136 1137 #ifndef _SYS_SYSPROTO_H_ 1138 struct mknodat_args { 1139 int fd; 1140 char *path; 1141 mode_t mode; 1142 dev_t dev; 1143 }; 1144 #endif 1145 int 1146 sys_mknodat(struct thread *td, struct mknodat_args *uap) 1147 { 1148 1149 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1150 uap->dev)); 1151 } 1152 1153 int 1154 kern_mknodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1155 int mode, int dev) 1156 { 1157 struct vnode *vp; 1158 struct mount *mp; 1159 struct vattr vattr; 1160 struct nameidata nd; 1161 cap_rights_t rights; 1162 int error, whiteout = 0; 1163 1164 AUDIT_ARG_MODE(mode); 1165 AUDIT_ARG_DEV(dev); 1166 switch (mode & S_IFMT) { 1167 case S_IFCHR: 1168 case S_IFBLK: 1169 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 1170 break; 1171 case S_IFMT: 1172 error = priv_check(td, PRIV_VFS_MKNOD_BAD); 1173 break; 1174 case S_IFWHT: 1175 error = priv_check(td, PRIV_VFS_MKNOD_WHT); 1176 break; 1177 case S_IFIFO: 1178 if (dev == 0) 1179 return (kern_mkfifoat(td, fd, path, pathseg, mode)); 1180 /* FALLTHROUGH */ 1181 default: 1182 error = EINVAL; 1183 break; 1184 } 1185 if (error != 0) 1186 return (error); 1187 restart: 1188 bwillwrite(); 1189 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1190 NOCACHE, pathseg, path, fd, cap_rights_init(&rights, CAP_MKNODAT), 1191 td); 1192 if ((error = namei(&nd)) != 0) 1193 return (error); 1194 vp = nd.ni_vp; 1195 if (vp != NULL) { 1196 NDFREE(&nd, NDF_ONLY_PNBUF); 1197 if (vp == nd.ni_dvp) 1198 vrele(nd.ni_dvp); 1199 else 1200 vput(nd.ni_dvp); 1201 vrele(vp); 1202 return (EEXIST); 1203 } else { 1204 VATTR_NULL(&vattr); 1205 vattr.va_mode = (mode & ALLPERMS) & 1206 ~td->td_proc->p_fd->fd_cmask; 1207 vattr.va_rdev = dev; 1208 whiteout = 0; 1209 1210 switch (mode & S_IFMT) { 1211 case S_IFMT: /* used by badsect to flag bad sectors */ 1212 vattr.va_type = VBAD; 1213 break; 1214 case S_IFCHR: 1215 vattr.va_type = VCHR; 1216 break; 1217 case S_IFBLK: 1218 vattr.va_type = VBLK; 1219 break; 1220 case S_IFWHT: 1221 whiteout = 1; 1222 break; 1223 default: 1224 panic("kern_mknod: invalid mode"); 1225 } 1226 } 1227 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1228 NDFREE(&nd, NDF_ONLY_PNBUF); 1229 vput(nd.ni_dvp); 1230 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1231 return (error); 1232 goto restart; 1233 } 1234 #ifdef MAC 1235 if (error == 0 && !whiteout) 1236 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, 1237 &nd.ni_cnd, &vattr); 1238 #endif 1239 if (error == 0) { 1240 if (whiteout) 1241 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1242 else { 1243 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1244 &nd.ni_cnd, &vattr); 1245 if (error == 0) 1246 vput(nd.ni_vp); 1247 } 1248 } 1249 NDFREE(&nd, NDF_ONLY_PNBUF); 1250 vput(nd.ni_dvp); 1251 vn_finished_write(mp); 1252 return (error); 1253 } 1254 1255 /* 1256 * Create a named pipe. 1257 */ 1258 #ifndef _SYS_SYSPROTO_H_ 1259 struct mkfifo_args { 1260 char *path; 1261 int mode; 1262 }; 1263 #endif 1264 int 1265 sys_mkfifo(td, uap) 1266 struct thread *td; 1267 register struct mkfifo_args /* { 1268 char *path; 1269 int mode; 1270 } */ *uap; 1271 { 1272 1273 return (kern_mkfifoat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1274 uap->mode)); 1275 } 1276 1277 #ifndef _SYS_SYSPROTO_H_ 1278 struct mkfifoat_args { 1279 int fd; 1280 char *path; 1281 mode_t mode; 1282 }; 1283 #endif 1284 int 1285 sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap) 1286 { 1287 1288 return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE, 1289 uap->mode)); 1290 } 1291 1292 int 1293 kern_mkfifoat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1294 int mode) 1295 { 1296 struct mount *mp; 1297 struct vattr vattr; 1298 struct nameidata nd; 1299 cap_rights_t rights; 1300 int error; 1301 1302 AUDIT_ARG_MODE(mode); 1303 restart: 1304 bwillwrite(); 1305 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1306 NOCACHE, pathseg, path, fd, cap_rights_init(&rights, CAP_MKFIFOAT), 1307 td); 1308 if ((error = namei(&nd)) != 0) 1309 return (error); 1310 if (nd.ni_vp != NULL) { 1311 NDFREE(&nd, NDF_ONLY_PNBUF); 1312 if (nd.ni_vp == nd.ni_dvp) 1313 vrele(nd.ni_dvp); 1314 else 1315 vput(nd.ni_dvp); 1316 vrele(nd.ni_vp); 1317 return (EEXIST); 1318 } 1319 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1320 NDFREE(&nd, NDF_ONLY_PNBUF); 1321 vput(nd.ni_dvp); 1322 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1323 return (error); 1324 goto restart; 1325 } 1326 VATTR_NULL(&vattr); 1327 vattr.va_type = VFIFO; 1328 vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask; 1329 #ifdef MAC 1330 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1331 &vattr); 1332 if (error != 0) 1333 goto out; 1334 #endif 1335 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 1336 if (error == 0) 1337 vput(nd.ni_vp); 1338 #ifdef MAC 1339 out: 1340 #endif 1341 vput(nd.ni_dvp); 1342 vn_finished_write(mp); 1343 NDFREE(&nd, NDF_ONLY_PNBUF); 1344 return (error); 1345 } 1346 1347 /* 1348 * Make a hard file link. 1349 */ 1350 #ifndef _SYS_SYSPROTO_H_ 1351 struct link_args { 1352 char *path; 1353 char *link; 1354 }; 1355 #endif 1356 int 1357 sys_link(td, uap) 1358 struct thread *td; 1359 register struct link_args /* { 1360 char *path; 1361 char *link; 1362 } */ *uap; 1363 { 1364 1365 return (kern_linkat(td, AT_FDCWD, AT_FDCWD, uap->path, uap->link, 1366 UIO_USERSPACE, FOLLOW)); 1367 } 1368 1369 #ifndef _SYS_SYSPROTO_H_ 1370 struct linkat_args { 1371 int fd1; 1372 char *path1; 1373 int fd2; 1374 char *path2; 1375 int flag; 1376 }; 1377 #endif 1378 int 1379 sys_linkat(struct thread *td, struct linkat_args *uap) 1380 { 1381 int flag; 1382 1383 flag = uap->flag; 1384 if (flag & ~AT_SYMLINK_FOLLOW) 1385 return (EINVAL); 1386 1387 return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2, 1388 UIO_USERSPACE, (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW)); 1389 } 1390 1391 int hardlink_check_uid = 0; 1392 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 1393 &hardlink_check_uid, 0, 1394 "Unprivileged processes cannot create hard links to files owned by other " 1395 "users"); 1396 static int hardlink_check_gid = 0; 1397 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 1398 &hardlink_check_gid, 0, 1399 "Unprivileged processes cannot create hard links to files owned by other " 1400 "groups"); 1401 1402 static int 1403 can_hardlink(struct vnode *vp, struct ucred *cred) 1404 { 1405 struct vattr va; 1406 int error; 1407 1408 if (!hardlink_check_uid && !hardlink_check_gid) 1409 return (0); 1410 1411 error = VOP_GETATTR(vp, &va, cred); 1412 if (error != 0) 1413 return (error); 1414 1415 if (hardlink_check_uid && cred->cr_uid != va.va_uid) { 1416 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1417 if (error != 0) 1418 return (error); 1419 } 1420 1421 if (hardlink_check_gid && !groupmember(va.va_gid, cred)) { 1422 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1423 if (error != 0) 1424 return (error); 1425 } 1426 1427 return (0); 1428 } 1429 1430 int 1431 kern_linkat(struct thread *td, int fd1, int fd2, char *path1, char *path2, 1432 enum uio_seg segflg, int follow) 1433 { 1434 struct vnode *vp; 1435 struct mount *mp; 1436 struct nameidata nd; 1437 cap_rights_t rights; 1438 int error; 1439 1440 again: 1441 bwillwrite(); 1442 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, segflg, path1, fd1, 1443 cap_rights_init(&rights, CAP_LINKAT_SOURCE), td); 1444 1445 if ((error = namei(&nd)) != 0) 1446 return (error); 1447 NDFREE(&nd, NDF_ONLY_PNBUF); 1448 vp = nd.ni_vp; 1449 if (vp->v_type == VDIR) { 1450 vrele(vp); 1451 return (EPERM); /* POSIX */ 1452 } 1453 NDINIT_ATRIGHTS(&nd, CREATE, 1454 LOCKPARENT | SAVENAME | AUDITVNODE2 | NOCACHE, segflg, path2, fd2, 1455 cap_rights_init(&rights, CAP_LINKAT_TARGET), td); 1456 if ((error = namei(&nd)) == 0) { 1457 if (nd.ni_vp != NULL) { 1458 NDFREE(&nd, NDF_ONLY_PNBUF); 1459 if (nd.ni_dvp == nd.ni_vp) 1460 vrele(nd.ni_dvp); 1461 else 1462 vput(nd.ni_dvp); 1463 vrele(nd.ni_vp); 1464 vrele(vp); 1465 return (EEXIST); 1466 } else if (nd.ni_dvp->v_mount != vp->v_mount) { 1467 /* 1468 * Cross-device link. No need to recheck 1469 * vp->v_type, since it cannot change, except 1470 * to VBAD. 1471 */ 1472 NDFREE(&nd, NDF_ONLY_PNBUF); 1473 vput(nd.ni_dvp); 1474 vrele(vp); 1475 return (EXDEV); 1476 } else if ((error = vn_lock(vp, LK_EXCLUSIVE)) == 0) { 1477 error = can_hardlink(vp, td->td_ucred); 1478 #ifdef MAC 1479 if (error == 0) 1480 error = mac_vnode_check_link(td->td_ucred, 1481 nd.ni_dvp, vp, &nd.ni_cnd); 1482 #endif 1483 if (error != 0) { 1484 vput(vp); 1485 vput(nd.ni_dvp); 1486 NDFREE(&nd, NDF_ONLY_PNBUF); 1487 return (error); 1488 } 1489 error = vn_start_write(vp, &mp, V_NOWAIT); 1490 if (error != 0) { 1491 vput(vp); 1492 vput(nd.ni_dvp); 1493 NDFREE(&nd, NDF_ONLY_PNBUF); 1494 error = vn_start_write(NULL, &mp, 1495 V_XSLEEP | PCATCH); 1496 if (error != 0) 1497 return (error); 1498 goto again; 1499 } 1500 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 1501 VOP_UNLOCK(vp, 0); 1502 vput(nd.ni_dvp); 1503 vn_finished_write(mp); 1504 NDFREE(&nd, NDF_ONLY_PNBUF); 1505 } else { 1506 vput(nd.ni_dvp); 1507 NDFREE(&nd, NDF_ONLY_PNBUF); 1508 vrele(vp); 1509 goto again; 1510 } 1511 } 1512 vrele(vp); 1513 return (error); 1514 } 1515 1516 /* 1517 * Make a symbolic link. 1518 */ 1519 #ifndef _SYS_SYSPROTO_H_ 1520 struct symlink_args { 1521 char *path; 1522 char *link; 1523 }; 1524 #endif 1525 int 1526 sys_symlink(td, uap) 1527 struct thread *td; 1528 register struct symlink_args /* { 1529 char *path; 1530 char *link; 1531 } */ *uap; 1532 { 1533 1534 return (kern_symlinkat(td, uap->path, AT_FDCWD, uap->link, 1535 UIO_USERSPACE)); 1536 } 1537 1538 #ifndef _SYS_SYSPROTO_H_ 1539 struct symlinkat_args { 1540 char *path; 1541 int fd; 1542 char *path2; 1543 }; 1544 #endif 1545 int 1546 sys_symlinkat(struct thread *td, struct symlinkat_args *uap) 1547 { 1548 1549 return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2, 1550 UIO_USERSPACE)); 1551 } 1552 1553 int 1554 kern_symlinkat(struct thread *td, char *path1, int fd, char *path2, 1555 enum uio_seg segflg) 1556 { 1557 struct mount *mp; 1558 struct vattr vattr; 1559 char *syspath; 1560 struct nameidata nd; 1561 int error; 1562 cap_rights_t rights; 1563 1564 if (segflg == UIO_SYSSPACE) { 1565 syspath = path1; 1566 } else { 1567 syspath = uma_zalloc(namei_zone, M_WAITOK); 1568 if ((error = copyinstr(path1, syspath, MAXPATHLEN, NULL)) != 0) 1569 goto out; 1570 } 1571 AUDIT_ARG_TEXT(syspath); 1572 restart: 1573 bwillwrite(); 1574 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1575 NOCACHE, segflg, path2, fd, cap_rights_init(&rights, CAP_SYMLINKAT), 1576 td); 1577 if ((error = namei(&nd)) != 0) 1578 goto out; 1579 if (nd.ni_vp) { 1580 NDFREE(&nd, NDF_ONLY_PNBUF); 1581 if (nd.ni_vp == nd.ni_dvp) 1582 vrele(nd.ni_dvp); 1583 else 1584 vput(nd.ni_dvp); 1585 vrele(nd.ni_vp); 1586 error = EEXIST; 1587 goto out; 1588 } 1589 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1590 NDFREE(&nd, NDF_ONLY_PNBUF); 1591 vput(nd.ni_dvp); 1592 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1593 goto out; 1594 goto restart; 1595 } 1596 VATTR_NULL(&vattr); 1597 vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask; 1598 #ifdef MAC 1599 vattr.va_type = VLNK; 1600 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1601 &vattr); 1602 if (error != 0) 1603 goto out2; 1604 #endif 1605 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath); 1606 if (error == 0) 1607 vput(nd.ni_vp); 1608 #ifdef MAC 1609 out2: 1610 #endif 1611 NDFREE(&nd, NDF_ONLY_PNBUF); 1612 vput(nd.ni_dvp); 1613 vn_finished_write(mp); 1614 out: 1615 if (segflg != UIO_SYSSPACE) 1616 uma_zfree(namei_zone, syspath); 1617 return (error); 1618 } 1619 1620 /* 1621 * Delete a whiteout from the filesystem. 1622 */ 1623 int 1624 sys_undelete(td, uap) 1625 struct thread *td; 1626 register struct undelete_args /* { 1627 char *path; 1628 } */ *uap; 1629 { 1630 struct mount *mp; 1631 struct nameidata nd; 1632 int error; 1633 1634 restart: 1635 bwillwrite(); 1636 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | AUDITVNODE1, 1637 UIO_USERSPACE, uap->path, td); 1638 error = namei(&nd); 1639 if (error != 0) 1640 return (error); 1641 1642 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 1643 NDFREE(&nd, NDF_ONLY_PNBUF); 1644 if (nd.ni_vp == nd.ni_dvp) 1645 vrele(nd.ni_dvp); 1646 else 1647 vput(nd.ni_dvp); 1648 if (nd.ni_vp) 1649 vrele(nd.ni_vp); 1650 return (EEXIST); 1651 } 1652 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1653 NDFREE(&nd, NDF_ONLY_PNBUF); 1654 vput(nd.ni_dvp); 1655 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1656 return (error); 1657 goto restart; 1658 } 1659 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE); 1660 NDFREE(&nd, NDF_ONLY_PNBUF); 1661 vput(nd.ni_dvp); 1662 vn_finished_write(mp); 1663 return (error); 1664 } 1665 1666 /* 1667 * Delete a name from the filesystem. 1668 */ 1669 #ifndef _SYS_SYSPROTO_H_ 1670 struct unlink_args { 1671 char *path; 1672 }; 1673 #endif 1674 int 1675 sys_unlink(td, uap) 1676 struct thread *td; 1677 struct unlink_args /* { 1678 char *path; 1679 } */ *uap; 1680 { 1681 1682 return (kern_unlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 0)); 1683 } 1684 1685 #ifndef _SYS_SYSPROTO_H_ 1686 struct unlinkat_args { 1687 int fd; 1688 char *path; 1689 int flag; 1690 }; 1691 #endif 1692 int 1693 sys_unlinkat(struct thread *td, struct unlinkat_args *uap) 1694 { 1695 int flag = uap->flag; 1696 int fd = uap->fd; 1697 char *path = uap->path; 1698 1699 if (flag & ~AT_REMOVEDIR) 1700 return (EINVAL); 1701 1702 if (flag & AT_REMOVEDIR) 1703 return (kern_rmdirat(td, fd, path, UIO_USERSPACE)); 1704 else 1705 return (kern_unlinkat(td, fd, path, UIO_USERSPACE, 0)); 1706 } 1707 1708 int 1709 kern_unlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1710 ino_t oldinum) 1711 { 1712 struct mount *mp; 1713 struct vnode *vp; 1714 struct nameidata nd; 1715 struct stat sb; 1716 cap_rights_t rights; 1717 int error; 1718 1719 restart: 1720 bwillwrite(); 1721 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 1722 pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td); 1723 if ((error = namei(&nd)) != 0) 1724 return (error == EINVAL ? EPERM : error); 1725 vp = nd.ni_vp; 1726 if (vp->v_type == VDIR && oldinum == 0) { 1727 error = EPERM; /* POSIX */ 1728 } else if (oldinum != 0 && 1729 ((error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td)) == 0) && 1730 sb.st_ino != oldinum) { 1731 error = EIDRM; /* Identifier removed */ 1732 } else { 1733 /* 1734 * The root of a mounted filesystem cannot be deleted. 1735 * 1736 * XXX: can this only be a VDIR case? 1737 */ 1738 if (vp->v_vflag & VV_ROOT) 1739 error = EBUSY; 1740 } 1741 if (error == 0) { 1742 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1743 NDFREE(&nd, NDF_ONLY_PNBUF); 1744 vput(nd.ni_dvp); 1745 if (vp == nd.ni_dvp) 1746 vrele(vp); 1747 else 1748 vput(vp); 1749 if ((error = vn_start_write(NULL, &mp, 1750 V_XSLEEP | PCATCH)) != 0) 1751 return (error); 1752 goto restart; 1753 } 1754 #ifdef MAC 1755 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 1756 &nd.ni_cnd); 1757 if (error != 0) 1758 goto out; 1759 #endif 1760 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 1761 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 1762 #ifdef MAC 1763 out: 1764 #endif 1765 vn_finished_write(mp); 1766 } 1767 NDFREE(&nd, NDF_ONLY_PNBUF); 1768 vput(nd.ni_dvp); 1769 if (vp == nd.ni_dvp) 1770 vrele(vp); 1771 else 1772 vput(vp); 1773 return (error); 1774 } 1775 1776 /* 1777 * Reposition read/write file offset. 1778 */ 1779 #ifndef _SYS_SYSPROTO_H_ 1780 struct lseek_args { 1781 int fd; 1782 int pad; 1783 off_t offset; 1784 int whence; 1785 }; 1786 #endif 1787 int 1788 sys_lseek(td, uap) 1789 struct thread *td; 1790 register struct lseek_args /* { 1791 int fd; 1792 int pad; 1793 off_t offset; 1794 int whence; 1795 } */ *uap; 1796 { 1797 struct file *fp; 1798 cap_rights_t rights; 1799 int error; 1800 1801 AUDIT_ARG_FD(uap->fd); 1802 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_SEEK), &fp); 1803 if (error != 0) 1804 return (error); 1805 error = (fp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0 ? 1806 fo_seek(fp, uap->offset, uap->whence, td) : ESPIPE; 1807 fdrop(fp, td); 1808 return (error); 1809 } 1810 1811 #if defined(COMPAT_43) 1812 /* 1813 * Reposition read/write file offset. 1814 */ 1815 #ifndef _SYS_SYSPROTO_H_ 1816 struct olseek_args { 1817 int fd; 1818 long offset; 1819 int whence; 1820 }; 1821 #endif 1822 int 1823 olseek(td, uap) 1824 struct thread *td; 1825 register struct olseek_args /* { 1826 int fd; 1827 long offset; 1828 int whence; 1829 } */ *uap; 1830 { 1831 struct lseek_args /* { 1832 int fd; 1833 int pad; 1834 off_t offset; 1835 int whence; 1836 } */ nuap; 1837 1838 nuap.fd = uap->fd; 1839 nuap.offset = uap->offset; 1840 nuap.whence = uap->whence; 1841 return (sys_lseek(td, &nuap)); 1842 } 1843 #endif /* COMPAT_43 */ 1844 1845 #if defined(COMPAT_FREEBSD6) 1846 /* Version with the 'pad' argument */ 1847 int 1848 freebsd6_lseek(td, uap) 1849 struct thread *td; 1850 register struct freebsd6_lseek_args *uap; 1851 { 1852 struct lseek_args ouap; 1853 1854 ouap.fd = uap->fd; 1855 ouap.offset = uap->offset; 1856 ouap.whence = uap->whence; 1857 return (sys_lseek(td, &ouap)); 1858 } 1859 #endif 1860 1861 /* 1862 * Check access permissions using passed credentials. 1863 */ 1864 static int 1865 vn_access(vp, user_flags, cred, td) 1866 struct vnode *vp; 1867 int user_flags; 1868 struct ucred *cred; 1869 struct thread *td; 1870 { 1871 accmode_t accmode; 1872 int error; 1873 1874 /* Flags == 0 means only check for existence. */ 1875 if (user_flags == 0) 1876 return (0); 1877 1878 accmode = 0; 1879 if (user_flags & R_OK) 1880 accmode |= VREAD; 1881 if (user_flags & W_OK) 1882 accmode |= VWRITE; 1883 if (user_flags & X_OK) 1884 accmode |= VEXEC; 1885 #ifdef MAC 1886 error = mac_vnode_check_access(cred, vp, accmode); 1887 if (error != 0) 1888 return (error); 1889 #endif 1890 if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) 1891 error = VOP_ACCESS(vp, accmode, cred, td); 1892 return (error); 1893 } 1894 1895 /* 1896 * Check access permissions using "real" credentials. 1897 */ 1898 #ifndef _SYS_SYSPROTO_H_ 1899 struct access_args { 1900 char *path; 1901 int amode; 1902 }; 1903 #endif 1904 int 1905 sys_access(td, uap) 1906 struct thread *td; 1907 register struct access_args /* { 1908 char *path; 1909 int amode; 1910 } */ *uap; 1911 { 1912 1913 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1914 0, uap->amode)); 1915 } 1916 1917 #ifndef _SYS_SYSPROTO_H_ 1918 struct faccessat_args { 1919 int dirfd; 1920 char *path; 1921 int amode; 1922 int flag; 1923 } 1924 #endif 1925 int 1926 sys_faccessat(struct thread *td, struct faccessat_args *uap) 1927 { 1928 1929 return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 1930 uap->amode)); 1931 } 1932 1933 int 1934 kern_accessat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1935 int flag, int amode) 1936 { 1937 struct ucred *cred, *usecred; 1938 struct vnode *vp; 1939 struct nameidata nd; 1940 cap_rights_t rights; 1941 int error; 1942 1943 if (flag & ~AT_EACCESS) 1944 return (EINVAL); 1945 if (amode != F_OK && (amode & ~(R_OK | W_OK | X_OK)) != 0) 1946 return (EINVAL); 1947 1948 /* 1949 * Create and modify a temporary credential instead of one that 1950 * is potentially shared (if we need one). 1951 */ 1952 cred = td->td_ucred; 1953 if ((flag & AT_EACCESS) == 0 && 1954 ((cred->cr_uid != cred->cr_ruid || 1955 cred->cr_rgid != cred->cr_groups[0]))) { 1956 usecred = crdup(cred); 1957 usecred->cr_uid = cred->cr_ruid; 1958 usecred->cr_groups[0] = cred->cr_rgid; 1959 td->td_ucred = usecred; 1960 } else 1961 usecred = cred; 1962 AUDIT_ARG_VALUE(amode); 1963 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | 1964 AUDITVNODE1, pathseg, path, fd, cap_rights_init(&rights, CAP_FSTAT), 1965 td); 1966 if ((error = namei(&nd)) != 0) 1967 goto out; 1968 vp = nd.ni_vp; 1969 1970 error = vn_access(vp, amode, usecred, td); 1971 NDFREE(&nd, NDF_ONLY_PNBUF); 1972 vput(vp); 1973 out: 1974 if (usecred != cred) { 1975 td->td_ucred = cred; 1976 crfree(usecred); 1977 } 1978 return (error); 1979 } 1980 1981 /* 1982 * Check access permissions using "effective" credentials. 1983 */ 1984 #ifndef _SYS_SYSPROTO_H_ 1985 struct eaccess_args { 1986 char *path; 1987 int amode; 1988 }; 1989 #endif 1990 int 1991 sys_eaccess(td, uap) 1992 struct thread *td; 1993 register struct eaccess_args /* { 1994 char *path; 1995 int amode; 1996 } */ *uap; 1997 { 1998 1999 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2000 AT_EACCESS, uap->amode)); 2001 } 2002 2003 #if defined(COMPAT_43) 2004 /* 2005 * Get file status; this version follows links. 2006 */ 2007 #ifndef _SYS_SYSPROTO_H_ 2008 struct ostat_args { 2009 char *path; 2010 struct ostat *ub; 2011 }; 2012 #endif 2013 int 2014 ostat(td, uap) 2015 struct thread *td; 2016 register struct ostat_args /* { 2017 char *path; 2018 struct ostat *ub; 2019 } */ *uap; 2020 { 2021 struct stat sb; 2022 struct ostat osb; 2023 int error; 2024 2025 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2026 &sb, NULL); 2027 if (error != 0) 2028 return (error); 2029 cvtstat(&sb, &osb); 2030 return (copyout(&osb, uap->ub, sizeof (osb))); 2031 } 2032 2033 /* 2034 * Get file status; this version does not follow links. 2035 */ 2036 #ifndef _SYS_SYSPROTO_H_ 2037 struct olstat_args { 2038 char *path; 2039 struct ostat *ub; 2040 }; 2041 #endif 2042 int 2043 olstat(td, uap) 2044 struct thread *td; 2045 register struct olstat_args /* { 2046 char *path; 2047 struct ostat *ub; 2048 } */ *uap; 2049 { 2050 struct stat sb; 2051 struct ostat osb; 2052 int error; 2053 2054 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2055 UIO_USERSPACE, &sb, NULL); 2056 if (error != 0) 2057 return (error); 2058 cvtstat(&sb, &osb); 2059 return (copyout(&osb, uap->ub, sizeof (osb))); 2060 } 2061 2062 /* 2063 * Convert from an old to a new stat structure. 2064 */ 2065 void 2066 cvtstat(st, ost) 2067 struct stat *st; 2068 struct ostat *ost; 2069 { 2070 2071 ost->st_dev = st->st_dev; 2072 ost->st_ino = st->st_ino; 2073 ost->st_mode = st->st_mode; 2074 ost->st_nlink = st->st_nlink; 2075 ost->st_uid = st->st_uid; 2076 ost->st_gid = st->st_gid; 2077 ost->st_rdev = st->st_rdev; 2078 if (st->st_size < (quad_t)1 << 32) 2079 ost->st_size = st->st_size; 2080 else 2081 ost->st_size = -2; 2082 ost->st_atim = st->st_atim; 2083 ost->st_mtim = st->st_mtim; 2084 ost->st_ctim = st->st_ctim; 2085 ost->st_blksize = st->st_blksize; 2086 ost->st_blocks = st->st_blocks; 2087 ost->st_flags = st->st_flags; 2088 ost->st_gen = st->st_gen; 2089 } 2090 #endif /* COMPAT_43 */ 2091 2092 /* 2093 * Get file status; this version follows links. 2094 */ 2095 #ifndef _SYS_SYSPROTO_H_ 2096 struct stat_args { 2097 char *path; 2098 struct stat *ub; 2099 }; 2100 #endif 2101 int 2102 sys_stat(td, uap) 2103 struct thread *td; 2104 register struct stat_args /* { 2105 char *path; 2106 struct stat *ub; 2107 } */ *uap; 2108 { 2109 struct stat sb; 2110 int error; 2111 2112 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2113 &sb, NULL); 2114 if (error == 0) 2115 error = copyout(&sb, uap->ub, sizeof (sb)); 2116 return (error); 2117 } 2118 2119 #ifndef _SYS_SYSPROTO_H_ 2120 struct fstatat_args { 2121 int fd; 2122 char *path; 2123 struct stat *buf; 2124 int flag; 2125 } 2126 #endif 2127 int 2128 sys_fstatat(struct thread *td, struct fstatat_args *uap) 2129 { 2130 struct stat sb; 2131 int error; 2132 2133 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2134 UIO_USERSPACE, &sb, NULL); 2135 if (error == 0) 2136 error = copyout(&sb, uap->buf, sizeof (sb)); 2137 return (error); 2138 } 2139 2140 int 2141 kern_statat(struct thread *td, int flag, int fd, char *path, 2142 enum uio_seg pathseg, struct stat *sbp, 2143 void (*hook)(struct vnode *vp, struct stat *sbp)) 2144 { 2145 struct nameidata nd; 2146 struct stat sb; 2147 cap_rights_t rights; 2148 int error; 2149 2150 if (flag & ~AT_SYMLINK_NOFOLLOW) 2151 return (EINVAL); 2152 2153 NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : 2154 FOLLOW) | LOCKSHARED | LOCKLEAF | AUDITVNODE1, pathseg, path, fd, 2155 cap_rights_init(&rights, CAP_FSTAT), td); 2156 2157 if ((error = namei(&nd)) != 0) 2158 return (error); 2159 error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td); 2160 if (error == 0) { 2161 SDT_PROBE2(vfs, , stat, mode, path, sb.st_mode); 2162 if (S_ISREG(sb.st_mode)) 2163 SDT_PROBE2(vfs, , stat, reg, path, pathseg); 2164 if (__predict_false(hook != NULL)) 2165 hook(nd.ni_vp, &sb); 2166 } 2167 NDFREE(&nd, NDF_ONLY_PNBUF); 2168 vput(nd.ni_vp); 2169 if (error != 0) 2170 return (error); 2171 *sbp = sb; 2172 #ifdef KTRACE 2173 if (KTRPOINT(td, KTR_STRUCT)) 2174 ktrstat(&sb); 2175 #endif 2176 return (0); 2177 } 2178 2179 /* 2180 * Get file status; this version does not follow links. 2181 */ 2182 #ifndef _SYS_SYSPROTO_H_ 2183 struct lstat_args { 2184 char *path; 2185 struct stat *ub; 2186 }; 2187 #endif 2188 int 2189 sys_lstat(td, uap) 2190 struct thread *td; 2191 register struct lstat_args /* { 2192 char *path; 2193 struct stat *ub; 2194 } */ *uap; 2195 { 2196 struct stat sb; 2197 int error; 2198 2199 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2200 UIO_USERSPACE, &sb, NULL); 2201 if (error == 0) 2202 error = copyout(&sb, uap->ub, sizeof (sb)); 2203 return (error); 2204 } 2205 2206 /* 2207 * Implementation of the NetBSD [l]stat() functions. 2208 */ 2209 void 2210 cvtnstat(sb, nsb) 2211 struct stat *sb; 2212 struct nstat *nsb; 2213 { 2214 2215 bzero(nsb, sizeof *nsb); 2216 nsb->st_dev = sb->st_dev; 2217 nsb->st_ino = sb->st_ino; 2218 nsb->st_mode = sb->st_mode; 2219 nsb->st_nlink = sb->st_nlink; 2220 nsb->st_uid = sb->st_uid; 2221 nsb->st_gid = sb->st_gid; 2222 nsb->st_rdev = sb->st_rdev; 2223 nsb->st_atim = sb->st_atim; 2224 nsb->st_mtim = sb->st_mtim; 2225 nsb->st_ctim = sb->st_ctim; 2226 nsb->st_size = sb->st_size; 2227 nsb->st_blocks = sb->st_blocks; 2228 nsb->st_blksize = sb->st_blksize; 2229 nsb->st_flags = sb->st_flags; 2230 nsb->st_gen = sb->st_gen; 2231 nsb->st_birthtim = sb->st_birthtim; 2232 } 2233 2234 #ifndef _SYS_SYSPROTO_H_ 2235 struct nstat_args { 2236 char *path; 2237 struct nstat *ub; 2238 }; 2239 #endif 2240 int 2241 sys_nstat(td, uap) 2242 struct thread *td; 2243 register struct nstat_args /* { 2244 char *path; 2245 struct nstat *ub; 2246 } */ *uap; 2247 { 2248 struct stat sb; 2249 struct nstat nsb; 2250 int error; 2251 2252 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2253 &sb, NULL); 2254 if (error != 0) 2255 return (error); 2256 cvtnstat(&sb, &nsb); 2257 return (copyout(&nsb, uap->ub, sizeof (nsb))); 2258 } 2259 2260 /* 2261 * NetBSD lstat. Get file status; this version does not follow links. 2262 */ 2263 #ifndef _SYS_SYSPROTO_H_ 2264 struct lstat_args { 2265 char *path; 2266 struct stat *ub; 2267 }; 2268 #endif 2269 int 2270 sys_nlstat(td, uap) 2271 struct thread *td; 2272 register struct nlstat_args /* { 2273 char *path; 2274 struct nstat *ub; 2275 } */ *uap; 2276 { 2277 struct stat sb; 2278 struct nstat nsb; 2279 int error; 2280 2281 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2282 UIO_USERSPACE, &sb, NULL); 2283 if (error != 0) 2284 return (error); 2285 cvtnstat(&sb, &nsb); 2286 return (copyout(&nsb, uap->ub, sizeof (nsb))); 2287 } 2288 2289 /* 2290 * Get configurable pathname variables. 2291 */ 2292 #ifndef _SYS_SYSPROTO_H_ 2293 struct pathconf_args { 2294 char *path; 2295 int name; 2296 }; 2297 #endif 2298 int 2299 sys_pathconf(td, uap) 2300 struct thread *td; 2301 register struct pathconf_args /* { 2302 char *path; 2303 int name; 2304 } */ *uap; 2305 { 2306 2307 return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW)); 2308 } 2309 2310 #ifndef _SYS_SYSPROTO_H_ 2311 struct lpathconf_args { 2312 char *path; 2313 int name; 2314 }; 2315 #endif 2316 int 2317 sys_lpathconf(td, uap) 2318 struct thread *td; 2319 register struct lpathconf_args /* { 2320 char *path; 2321 int name; 2322 } */ *uap; 2323 { 2324 2325 return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, 2326 NOFOLLOW)); 2327 } 2328 2329 int 2330 kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name, 2331 u_long flags) 2332 { 2333 struct nameidata nd; 2334 int error; 2335 2336 NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | AUDITVNODE1 | flags, 2337 pathseg, path, td); 2338 if ((error = namei(&nd)) != 0) 2339 return (error); 2340 NDFREE(&nd, NDF_ONLY_PNBUF); 2341 2342 error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval); 2343 vput(nd.ni_vp); 2344 return (error); 2345 } 2346 2347 /* 2348 * Return target name of a symbolic link. 2349 */ 2350 #ifndef _SYS_SYSPROTO_H_ 2351 struct readlink_args { 2352 char *path; 2353 char *buf; 2354 size_t count; 2355 }; 2356 #endif 2357 int 2358 sys_readlink(td, uap) 2359 struct thread *td; 2360 register struct readlink_args /* { 2361 char *path; 2362 char *buf; 2363 size_t count; 2364 } */ *uap; 2365 { 2366 2367 return (kern_readlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2368 uap->buf, UIO_USERSPACE, uap->count)); 2369 } 2370 #ifndef _SYS_SYSPROTO_H_ 2371 struct readlinkat_args { 2372 int fd; 2373 char *path; 2374 char *buf; 2375 size_t bufsize; 2376 }; 2377 #endif 2378 int 2379 sys_readlinkat(struct thread *td, struct readlinkat_args *uap) 2380 { 2381 2382 return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE, 2383 uap->buf, UIO_USERSPACE, uap->bufsize)); 2384 } 2385 2386 int 2387 kern_readlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2388 char *buf, enum uio_seg bufseg, size_t count) 2389 { 2390 struct vnode *vp; 2391 struct iovec aiov; 2392 struct uio auio; 2393 struct nameidata nd; 2394 int error; 2395 2396 if (count > IOSIZE_MAX) 2397 return (EINVAL); 2398 2399 NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 2400 pathseg, path, fd, td); 2401 2402 if ((error = namei(&nd)) != 0) 2403 return (error); 2404 NDFREE(&nd, NDF_ONLY_PNBUF); 2405 vp = nd.ni_vp; 2406 #ifdef MAC 2407 error = mac_vnode_check_readlink(td->td_ucred, vp); 2408 if (error != 0) { 2409 vput(vp); 2410 return (error); 2411 } 2412 #endif 2413 if (vp->v_type != VLNK) 2414 error = EINVAL; 2415 else { 2416 aiov.iov_base = buf; 2417 aiov.iov_len = count; 2418 auio.uio_iov = &aiov; 2419 auio.uio_iovcnt = 1; 2420 auio.uio_offset = 0; 2421 auio.uio_rw = UIO_READ; 2422 auio.uio_segflg = bufseg; 2423 auio.uio_td = td; 2424 auio.uio_resid = count; 2425 error = VOP_READLINK(vp, &auio, td->td_ucred); 2426 td->td_retval[0] = count - auio.uio_resid; 2427 } 2428 vput(vp); 2429 return (error); 2430 } 2431 2432 /* 2433 * Common implementation code for chflags() and fchflags(). 2434 */ 2435 static int 2436 setfflags(td, vp, flags) 2437 struct thread *td; 2438 struct vnode *vp; 2439 u_long flags; 2440 { 2441 struct mount *mp; 2442 struct vattr vattr; 2443 int error; 2444 2445 /* We can't support the value matching VNOVAL. */ 2446 if (flags == VNOVAL) 2447 return (EOPNOTSUPP); 2448 2449 /* 2450 * Prevent non-root users from setting flags on devices. When 2451 * a device is reused, users can retain ownership of the device 2452 * if they are allowed to set flags and programs assume that 2453 * chown can't fail when done as root. 2454 */ 2455 if (vp->v_type == VCHR || vp->v_type == VBLK) { 2456 error = priv_check(td, PRIV_VFS_CHFLAGS_DEV); 2457 if (error != 0) 2458 return (error); 2459 } 2460 2461 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2462 return (error); 2463 VATTR_NULL(&vattr); 2464 vattr.va_flags = flags; 2465 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2466 #ifdef MAC 2467 error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags); 2468 if (error == 0) 2469 #endif 2470 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 2471 VOP_UNLOCK(vp, 0); 2472 vn_finished_write(mp); 2473 return (error); 2474 } 2475 2476 /* 2477 * Change flags of a file given a path name. 2478 */ 2479 #ifndef _SYS_SYSPROTO_H_ 2480 struct chflags_args { 2481 const char *path; 2482 u_long flags; 2483 }; 2484 #endif 2485 int 2486 sys_chflags(td, uap) 2487 struct thread *td; 2488 register struct chflags_args /* { 2489 const char *path; 2490 u_long flags; 2491 } */ *uap; 2492 { 2493 2494 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2495 uap->flags, 0)); 2496 } 2497 2498 #ifndef _SYS_SYSPROTO_H_ 2499 struct chflagsat_args { 2500 int fd; 2501 const char *path; 2502 u_long flags; 2503 int atflag; 2504 } 2505 #endif 2506 int 2507 sys_chflagsat(struct thread *td, struct chflagsat_args *uap) 2508 { 2509 int fd = uap->fd; 2510 const char *path = uap->path; 2511 u_long flags = uap->flags; 2512 int atflag = uap->atflag; 2513 2514 if (atflag & ~AT_SYMLINK_NOFOLLOW) 2515 return (EINVAL); 2516 2517 return (kern_chflagsat(td, fd, path, UIO_USERSPACE, flags, atflag)); 2518 } 2519 2520 /* 2521 * Same as chflags() but doesn't follow symlinks. 2522 */ 2523 int 2524 sys_lchflags(td, uap) 2525 struct thread *td; 2526 register struct lchflags_args /* { 2527 const char *path; 2528 u_long flags; 2529 } */ *uap; 2530 { 2531 2532 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2533 uap->flags, AT_SYMLINK_NOFOLLOW)); 2534 } 2535 2536 static int 2537 kern_chflagsat(struct thread *td, int fd, const char *path, 2538 enum uio_seg pathseg, u_long flags, int atflag) 2539 { 2540 struct nameidata nd; 2541 cap_rights_t rights; 2542 int error, follow; 2543 2544 AUDIT_ARG_FFLAGS(flags); 2545 follow = (atflag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2546 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2547 cap_rights_init(&rights, CAP_FCHFLAGS), td); 2548 if ((error = namei(&nd)) != 0) 2549 return (error); 2550 NDFREE(&nd, NDF_ONLY_PNBUF); 2551 error = setfflags(td, nd.ni_vp, flags); 2552 vrele(nd.ni_vp); 2553 return (error); 2554 } 2555 2556 /* 2557 * Change flags of a file given a file descriptor. 2558 */ 2559 #ifndef _SYS_SYSPROTO_H_ 2560 struct fchflags_args { 2561 int fd; 2562 u_long flags; 2563 }; 2564 #endif 2565 int 2566 sys_fchflags(td, uap) 2567 struct thread *td; 2568 register struct fchflags_args /* { 2569 int fd; 2570 u_long flags; 2571 } */ *uap; 2572 { 2573 struct file *fp; 2574 cap_rights_t rights; 2575 int error; 2576 2577 AUDIT_ARG_FD(uap->fd); 2578 AUDIT_ARG_FFLAGS(uap->flags); 2579 error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_FCHFLAGS), 2580 &fp); 2581 if (error != 0) 2582 return (error); 2583 #ifdef AUDIT 2584 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 2585 AUDIT_ARG_VNODE1(fp->f_vnode); 2586 VOP_UNLOCK(fp->f_vnode, 0); 2587 #endif 2588 error = setfflags(td, fp->f_vnode, uap->flags); 2589 fdrop(fp, td); 2590 return (error); 2591 } 2592 2593 /* 2594 * Common implementation code for chmod(), lchmod() and fchmod(). 2595 */ 2596 int 2597 setfmode(td, cred, vp, mode) 2598 struct thread *td; 2599 struct ucred *cred; 2600 struct vnode *vp; 2601 int mode; 2602 { 2603 struct mount *mp; 2604 struct vattr vattr; 2605 int error; 2606 2607 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2608 return (error); 2609 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2610 VATTR_NULL(&vattr); 2611 vattr.va_mode = mode & ALLPERMS; 2612 #ifdef MAC 2613 error = mac_vnode_check_setmode(cred, vp, vattr.va_mode); 2614 if (error == 0) 2615 #endif 2616 error = VOP_SETATTR(vp, &vattr, cred); 2617 VOP_UNLOCK(vp, 0); 2618 vn_finished_write(mp); 2619 return (error); 2620 } 2621 2622 /* 2623 * Change mode of a file given path name. 2624 */ 2625 #ifndef _SYS_SYSPROTO_H_ 2626 struct chmod_args { 2627 char *path; 2628 int mode; 2629 }; 2630 #endif 2631 int 2632 sys_chmod(td, uap) 2633 struct thread *td; 2634 register struct chmod_args /* { 2635 char *path; 2636 int mode; 2637 } */ *uap; 2638 { 2639 2640 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2641 uap->mode, 0)); 2642 } 2643 2644 #ifndef _SYS_SYSPROTO_H_ 2645 struct fchmodat_args { 2646 int dirfd; 2647 char *path; 2648 mode_t mode; 2649 int flag; 2650 } 2651 #endif 2652 int 2653 sys_fchmodat(struct thread *td, struct fchmodat_args *uap) 2654 { 2655 int flag = uap->flag; 2656 int fd = uap->fd; 2657 char *path = uap->path; 2658 mode_t mode = uap->mode; 2659 2660 if (flag & ~AT_SYMLINK_NOFOLLOW) 2661 return (EINVAL); 2662 2663 return (kern_fchmodat(td, fd, path, UIO_USERSPACE, mode, flag)); 2664 } 2665 2666 /* 2667 * Change mode of a file given path name (don't follow links.) 2668 */ 2669 #ifndef _SYS_SYSPROTO_H_ 2670 struct lchmod_args { 2671 char *path; 2672 int mode; 2673 }; 2674 #endif 2675 int 2676 sys_lchmod(td, uap) 2677 struct thread *td; 2678 register struct lchmod_args /* { 2679 char *path; 2680 int mode; 2681 } */ *uap; 2682 { 2683 2684 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2685 uap->mode, AT_SYMLINK_NOFOLLOW)); 2686 } 2687 2688 int 2689 kern_fchmodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2690 mode_t mode, int flag) 2691 { 2692 struct nameidata nd; 2693 cap_rights_t rights; 2694 int error, follow; 2695 2696 AUDIT_ARG_MODE(mode); 2697 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2698 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2699 cap_rights_init(&rights, CAP_FCHMOD), td); 2700 if ((error = namei(&nd)) != 0) 2701 return (error); 2702 NDFREE(&nd, NDF_ONLY_PNBUF); 2703 error = setfmode(td, td->td_ucred, nd.ni_vp, mode); 2704 vrele(nd.ni_vp); 2705 return (error); 2706 } 2707 2708 /* 2709 * Change mode of a file given a file descriptor. 2710 */ 2711 #ifndef _SYS_SYSPROTO_H_ 2712 struct fchmod_args { 2713 int fd; 2714 int mode; 2715 }; 2716 #endif 2717 int 2718 sys_fchmod(struct thread *td, struct fchmod_args *uap) 2719 { 2720 struct file *fp; 2721 cap_rights_t rights; 2722 int error; 2723 2724 AUDIT_ARG_FD(uap->fd); 2725 AUDIT_ARG_MODE(uap->mode); 2726 2727 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHMOD), &fp); 2728 if (error != 0) 2729 return (error); 2730 error = fo_chmod(fp, uap->mode, td->td_ucred, td); 2731 fdrop(fp, td); 2732 return (error); 2733 } 2734 2735 /* 2736 * Common implementation for chown(), lchown(), and fchown() 2737 */ 2738 int 2739 setfown(td, cred, vp, uid, gid) 2740 struct thread *td; 2741 struct ucred *cred; 2742 struct vnode *vp; 2743 uid_t uid; 2744 gid_t gid; 2745 { 2746 struct mount *mp; 2747 struct vattr vattr; 2748 int error; 2749 2750 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2751 return (error); 2752 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2753 VATTR_NULL(&vattr); 2754 vattr.va_uid = uid; 2755 vattr.va_gid = gid; 2756 #ifdef MAC 2757 error = mac_vnode_check_setowner(cred, vp, vattr.va_uid, 2758 vattr.va_gid); 2759 if (error == 0) 2760 #endif 2761 error = VOP_SETATTR(vp, &vattr, cred); 2762 VOP_UNLOCK(vp, 0); 2763 vn_finished_write(mp); 2764 return (error); 2765 } 2766 2767 /* 2768 * Set ownership given a path name. 2769 */ 2770 #ifndef _SYS_SYSPROTO_H_ 2771 struct chown_args { 2772 char *path; 2773 int uid; 2774 int gid; 2775 }; 2776 #endif 2777 int 2778 sys_chown(td, uap) 2779 struct thread *td; 2780 register struct chown_args /* { 2781 char *path; 2782 int uid; 2783 int gid; 2784 } */ *uap; 2785 { 2786 2787 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->uid, 2788 uap->gid, 0)); 2789 } 2790 2791 #ifndef _SYS_SYSPROTO_H_ 2792 struct fchownat_args { 2793 int fd; 2794 const char * path; 2795 uid_t uid; 2796 gid_t gid; 2797 int flag; 2798 }; 2799 #endif 2800 int 2801 sys_fchownat(struct thread *td, struct fchownat_args *uap) 2802 { 2803 int flag; 2804 2805 flag = uap->flag; 2806 if (flag & ~AT_SYMLINK_NOFOLLOW) 2807 return (EINVAL); 2808 2809 return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid, 2810 uap->gid, uap->flag)); 2811 } 2812 2813 int 2814 kern_fchownat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2815 int uid, int gid, int flag) 2816 { 2817 struct nameidata nd; 2818 cap_rights_t rights; 2819 int error, follow; 2820 2821 AUDIT_ARG_OWNER(uid, gid); 2822 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2823 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2824 cap_rights_init(&rights, CAP_FCHOWN), td); 2825 2826 if ((error = namei(&nd)) != 0) 2827 return (error); 2828 NDFREE(&nd, NDF_ONLY_PNBUF); 2829 error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid); 2830 vrele(nd.ni_vp); 2831 return (error); 2832 } 2833 2834 /* 2835 * Set ownership given a path name, do not cross symlinks. 2836 */ 2837 #ifndef _SYS_SYSPROTO_H_ 2838 struct lchown_args { 2839 char *path; 2840 int uid; 2841 int gid; 2842 }; 2843 #endif 2844 int 2845 sys_lchown(td, uap) 2846 struct thread *td; 2847 register struct lchown_args /* { 2848 char *path; 2849 int uid; 2850 int gid; 2851 } */ *uap; 2852 { 2853 2854 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2855 uap->uid, uap->gid, AT_SYMLINK_NOFOLLOW)); 2856 } 2857 2858 /* 2859 * Set ownership given a file descriptor. 2860 */ 2861 #ifndef _SYS_SYSPROTO_H_ 2862 struct fchown_args { 2863 int fd; 2864 int uid; 2865 int gid; 2866 }; 2867 #endif 2868 int 2869 sys_fchown(td, uap) 2870 struct thread *td; 2871 register struct fchown_args /* { 2872 int fd; 2873 int uid; 2874 int gid; 2875 } */ *uap; 2876 { 2877 struct file *fp; 2878 cap_rights_t rights; 2879 int error; 2880 2881 AUDIT_ARG_FD(uap->fd); 2882 AUDIT_ARG_OWNER(uap->uid, uap->gid); 2883 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHOWN), &fp); 2884 if (error != 0) 2885 return (error); 2886 error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td); 2887 fdrop(fp, td); 2888 return (error); 2889 } 2890 2891 /* 2892 * Common implementation code for utimes(), lutimes(), and futimes(). 2893 */ 2894 static int 2895 getutimes(usrtvp, tvpseg, tsp) 2896 const struct timeval *usrtvp; 2897 enum uio_seg tvpseg; 2898 struct timespec *tsp; 2899 { 2900 struct timeval tv[2]; 2901 const struct timeval *tvp; 2902 int error; 2903 2904 if (usrtvp == NULL) { 2905 vfs_timestamp(&tsp[0]); 2906 tsp[1] = tsp[0]; 2907 } else { 2908 if (tvpseg == UIO_SYSSPACE) { 2909 tvp = usrtvp; 2910 } else { 2911 if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0) 2912 return (error); 2913 tvp = tv; 2914 } 2915 2916 if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 || 2917 tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000) 2918 return (EINVAL); 2919 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 2920 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 2921 } 2922 return (0); 2923 } 2924 2925 /* 2926 * Common implementation code for futimens(), utimensat(). 2927 */ 2928 #define UTIMENS_NULL 0x1 2929 #define UTIMENS_EXIT 0x2 2930 static int 2931 getutimens(const struct timespec *usrtsp, enum uio_seg tspseg, 2932 struct timespec *tsp, int *retflags) 2933 { 2934 struct timespec tsnow; 2935 int error; 2936 2937 vfs_timestamp(&tsnow); 2938 *retflags = 0; 2939 if (usrtsp == NULL) { 2940 tsp[0] = tsnow; 2941 tsp[1] = tsnow; 2942 *retflags |= UTIMENS_NULL; 2943 return (0); 2944 } 2945 if (tspseg == UIO_SYSSPACE) { 2946 tsp[0] = usrtsp[0]; 2947 tsp[1] = usrtsp[1]; 2948 } else if ((error = copyin(usrtsp, tsp, sizeof(*tsp) * 2)) != 0) 2949 return (error); 2950 if (tsp[0].tv_nsec == UTIME_OMIT && tsp[1].tv_nsec == UTIME_OMIT) 2951 *retflags |= UTIMENS_EXIT; 2952 if (tsp[0].tv_nsec == UTIME_NOW && tsp[1].tv_nsec == UTIME_NOW) 2953 *retflags |= UTIMENS_NULL; 2954 if (tsp[0].tv_nsec == UTIME_OMIT) 2955 tsp[0].tv_sec = VNOVAL; 2956 else if (tsp[0].tv_nsec == UTIME_NOW) 2957 tsp[0] = tsnow; 2958 else if (tsp[0].tv_nsec < 0 || tsp[0].tv_nsec >= 1000000000L) 2959 return (EINVAL); 2960 if (tsp[1].tv_nsec == UTIME_OMIT) 2961 tsp[1].tv_sec = VNOVAL; 2962 else if (tsp[1].tv_nsec == UTIME_NOW) 2963 tsp[1] = tsnow; 2964 else if (tsp[1].tv_nsec < 0 || tsp[1].tv_nsec >= 1000000000L) 2965 return (EINVAL); 2966 2967 return (0); 2968 } 2969 2970 /* 2971 * Common implementation code for utimes(), lutimes(), futimes(), futimens(), 2972 * and utimensat(). 2973 */ 2974 static int 2975 setutimes(td, vp, ts, numtimes, nullflag) 2976 struct thread *td; 2977 struct vnode *vp; 2978 const struct timespec *ts; 2979 int numtimes; 2980 int nullflag; 2981 { 2982 struct mount *mp; 2983 struct vattr vattr; 2984 int error, setbirthtime; 2985 2986 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2987 return (error); 2988 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2989 setbirthtime = 0; 2990 if (numtimes < 3 && !VOP_GETATTR(vp, &vattr, td->td_ucred) && 2991 timespeccmp(&ts[1], &vattr.va_birthtime, < )) 2992 setbirthtime = 1; 2993 VATTR_NULL(&vattr); 2994 vattr.va_atime = ts[0]; 2995 vattr.va_mtime = ts[1]; 2996 if (setbirthtime) 2997 vattr.va_birthtime = ts[1]; 2998 if (numtimes > 2) 2999 vattr.va_birthtime = ts[2]; 3000 if (nullflag) 3001 vattr.va_vaflags |= VA_UTIMES_NULL; 3002 #ifdef MAC 3003 error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime, 3004 vattr.va_mtime); 3005 #endif 3006 if (error == 0) 3007 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3008 VOP_UNLOCK(vp, 0); 3009 vn_finished_write(mp); 3010 return (error); 3011 } 3012 3013 /* 3014 * Set the access and modification times of a file. 3015 */ 3016 #ifndef _SYS_SYSPROTO_H_ 3017 struct utimes_args { 3018 char *path; 3019 struct timeval *tptr; 3020 }; 3021 #endif 3022 int 3023 sys_utimes(td, uap) 3024 struct thread *td; 3025 register struct utimes_args /* { 3026 char *path; 3027 struct timeval *tptr; 3028 } */ *uap; 3029 { 3030 3031 return (kern_utimesat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3032 uap->tptr, UIO_USERSPACE)); 3033 } 3034 3035 #ifndef _SYS_SYSPROTO_H_ 3036 struct futimesat_args { 3037 int fd; 3038 const char * path; 3039 const struct timeval * times; 3040 }; 3041 #endif 3042 int 3043 sys_futimesat(struct thread *td, struct futimesat_args *uap) 3044 { 3045 3046 return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE, 3047 uap->times, UIO_USERSPACE)); 3048 } 3049 3050 int 3051 kern_utimesat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 3052 struct timeval *tptr, enum uio_seg tptrseg) 3053 { 3054 struct nameidata nd; 3055 struct timespec ts[2]; 3056 cap_rights_t rights; 3057 int error; 3058 3059 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3060 return (error); 3061 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 3062 cap_rights_init(&rights, CAP_FUTIMES), td); 3063 3064 if ((error = namei(&nd)) != 0) 3065 return (error); 3066 NDFREE(&nd, NDF_ONLY_PNBUF); 3067 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3068 vrele(nd.ni_vp); 3069 return (error); 3070 } 3071 3072 /* 3073 * Set the access and modification times of a file. 3074 */ 3075 #ifndef _SYS_SYSPROTO_H_ 3076 struct lutimes_args { 3077 char *path; 3078 struct timeval *tptr; 3079 }; 3080 #endif 3081 int 3082 sys_lutimes(td, uap) 3083 struct thread *td; 3084 register struct lutimes_args /* { 3085 char *path; 3086 struct timeval *tptr; 3087 } */ *uap; 3088 { 3089 3090 return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr, 3091 UIO_USERSPACE)); 3092 } 3093 3094 int 3095 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg, 3096 struct timeval *tptr, enum uio_seg tptrseg) 3097 { 3098 struct timespec ts[2]; 3099 struct nameidata nd; 3100 int error; 3101 3102 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3103 return (error); 3104 NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, pathseg, path, td); 3105 if ((error = namei(&nd)) != 0) 3106 return (error); 3107 NDFREE(&nd, NDF_ONLY_PNBUF); 3108 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3109 vrele(nd.ni_vp); 3110 return (error); 3111 } 3112 3113 /* 3114 * Set the access and modification times of a file. 3115 */ 3116 #ifndef _SYS_SYSPROTO_H_ 3117 struct futimes_args { 3118 int fd; 3119 struct timeval *tptr; 3120 }; 3121 #endif 3122 int 3123 sys_futimes(td, uap) 3124 struct thread *td; 3125 register struct futimes_args /* { 3126 int fd; 3127 struct timeval *tptr; 3128 } */ *uap; 3129 { 3130 3131 return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE)); 3132 } 3133 3134 int 3135 kern_futimes(struct thread *td, int fd, struct timeval *tptr, 3136 enum uio_seg tptrseg) 3137 { 3138 struct timespec ts[2]; 3139 struct file *fp; 3140 cap_rights_t rights; 3141 int error; 3142 3143 AUDIT_ARG_FD(fd); 3144 error = getutimes(tptr, tptrseg, ts); 3145 if (error != 0) 3146 return (error); 3147 error = getvnode(td, fd, cap_rights_init(&rights, CAP_FUTIMES), &fp); 3148 if (error != 0) 3149 return (error); 3150 #ifdef AUDIT 3151 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3152 AUDIT_ARG_VNODE1(fp->f_vnode); 3153 VOP_UNLOCK(fp->f_vnode, 0); 3154 #endif 3155 error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL); 3156 fdrop(fp, td); 3157 return (error); 3158 } 3159 3160 int 3161 sys_futimens(struct thread *td, struct futimens_args *uap) 3162 { 3163 3164 return (kern_futimens(td, uap->fd, uap->times, UIO_USERSPACE)); 3165 } 3166 3167 int 3168 kern_futimens(struct thread *td, int fd, struct timespec *tptr, 3169 enum uio_seg tptrseg) 3170 { 3171 struct timespec ts[2]; 3172 struct file *fp; 3173 cap_rights_t rights; 3174 int error, flags; 3175 3176 AUDIT_ARG_FD(fd); 3177 error = getutimens(tptr, tptrseg, ts, &flags); 3178 if (error != 0) 3179 return (error); 3180 if (flags & UTIMENS_EXIT) 3181 return (0); 3182 error = getvnode(td, fd, cap_rights_init(&rights, CAP_FUTIMES), &fp); 3183 if (error != 0) 3184 return (error); 3185 #ifdef AUDIT 3186 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3187 AUDIT_ARG_VNODE1(fp->f_vnode); 3188 VOP_UNLOCK(fp->f_vnode, 0); 3189 #endif 3190 error = setutimes(td, fp->f_vnode, ts, 2, flags & UTIMENS_NULL); 3191 fdrop(fp, td); 3192 return (error); 3193 } 3194 3195 int 3196 sys_utimensat(struct thread *td, struct utimensat_args *uap) 3197 { 3198 3199 return (kern_utimensat(td, uap->fd, uap->path, UIO_USERSPACE, 3200 uap->times, UIO_USERSPACE, uap->flag)); 3201 } 3202 3203 int 3204 kern_utimensat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 3205 struct timespec *tptr, enum uio_seg tptrseg, int flag) 3206 { 3207 struct nameidata nd; 3208 struct timespec ts[2]; 3209 cap_rights_t rights; 3210 int error, flags; 3211 3212 if (flag & ~AT_SYMLINK_NOFOLLOW) 3213 return (EINVAL); 3214 3215 if ((error = getutimens(tptr, tptrseg, ts, &flags)) != 0) 3216 return (error); 3217 NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : 3218 FOLLOW) | AUDITVNODE1, pathseg, path, fd, 3219 cap_rights_init(&rights, CAP_FUTIMES), td); 3220 if ((error = namei(&nd)) != 0) 3221 return (error); 3222 /* 3223 * We are allowed to call namei() regardless of 2xUTIME_OMIT. 3224 * POSIX states: 3225 * "If both tv_nsec fields are UTIME_OMIT... EACCESS may be detected." 3226 * "Search permission is denied by a component of the path prefix." 3227 */ 3228 NDFREE(&nd, NDF_ONLY_PNBUF); 3229 if ((flags & UTIMENS_EXIT) == 0) 3230 error = setutimes(td, nd.ni_vp, ts, 2, flags & UTIMENS_NULL); 3231 vrele(nd.ni_vp); 3232 return (error); 3233 } 3234 3235 /* 3236 * Truncate a file given its path name. 3237 */ 3238 #ifndef _SYS_SYSPROTO_H_ 3239 struct truncate_args { 3240 char *path; 3241 int pad; 3242 off_t length; 3243 }; 3244 #endif 3245 int 3246 sys_truncate(td, uap) 3247 struct thread *td; 3248 register struct truncate_args /* { 3249 char *path; 3250 int pad; 3251 off_t length; 3252 } */ *uap; 3253 { 3254 3255 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3256 } 3257 3258 int 3259 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length) 3260 { 3261 struct mount *mp; 3262 struct vnode *vp; 3263 void *rl_cookie; 3264 struct vattr vattr; 3265 struct nameidata nd; 3266 int error; 3267 3268 if (length < 0) 3269 return(EINVAL); 3270 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, td); 3271 if ((error = namei(&nd)) != 0) 3272 return (error); 3273 vp = nd.ni_vp; 3274 rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); 3275 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { 3276 vn_rangelock_unlock(vp, rl_cookie); 3277 vrele(vp); 3278 return (error); 3279 } 3280 NDFREE(&nd, NDF_ONLY_PNBUF); 3281 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3282 if (vp->v_type == VDIR) 3283 error = EISDIR; 3284 #ifdef MAC 3285 else if ((error = mac_vnode_check_write(td->td_ucred, NOCRED, vp))) { 3286 } 3287 #endif 3288 else if ((error = vn_writechk(vp)) == 0 && 3289 (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) { 3290 VATTR_NULL(&vattr); 3291 vattr.va_size = length; 3292 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3293 } 3294 VOP_UNLOCK(vp, 0); 3295 vn_finished_write(mp); 3296 vn_rangelock_unlock(vp, rl_cookie); 3297 vrele(vp); 3298 return (error); 3299 } 3300 3301 #if defined(COMPAT_43) 3302 /* 3303 * Truncate a file given its path name. 3304 */ 3305 #ifndef _SYS_SYSPROTO_H_ 3306 struct otruncate_args { 3307 char *path; 3308 long length; 3309 }; 3310 #endif 3311 int 3312 otruncate(td, uap) 3313 struct thread *td; 3314 register struct otruncate_args /* { 3315 char *path; 3316 long length; 3317 } */ *uap; 3318 { 3319 struct truncate_args /* { 3320 char *path; 3321 int pad; 3322 off_t length; 3323 } */ nuap; 3324 3325 nuap.path = uap->path; 3326 nuap.length = uap->length; 3327 return (sys_truncate(td, &nuap)); 3328 } 3329 #endif /* COMPAT_43 */ 3330 3331 #if defined(COMPAT_FREEBSD6) 3332 /* Versions with the pad argument */ 3333 int 3334 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap) 3335 { 3336 struct truncate_args ouap; 3337 3338 ouap.path = uap->path; 3339 ouap.length = uap->length; 3340 return (sys_truncate(td, &ouap)); 3341 } 3342 3343 int 3344 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap) 3345 { 3346 struct ftruncate_args ouap; 3347 3348 ouap.fd = uap->fd; 3349 ouap.length = uap->length; 3350 return (sys_ftruncate(td, &ouap)); 3351 } 3352 #endif 3353 3354 /* 3355 * Sync an open file. 3356 */ 3357 #ifndef _SYS_SYSPROTO_H_ 3358 struct fsync_args { 3359 int fd; 3360 }; 3361 #endif 3362 int 3363 sys_fsync(td, uap) 3364 struct thread *td; 3365 struct fsync_args /* { 3366 int fd; 3367 } */ *uap; 3368 { 3369 struct vnode *vp; 3370 struct mount *mp; 3371 struct file *fp; 3372 cap_rights_t rights; 3373 int error, lock_flags; 3374 3375 AUDIT_ARG_FD(uap->fd); 3376 error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_FSYNC), &fp); 3377 if (error != 0) 3378 return (error); 3379 vp = fp->f_vnode; 3380 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 3381 if (error != 0) 3382 goto drop; 3383 if (MNT_SHARED_WRITES(mp) || 3384 ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) { 3385 lock_flags = LK_SHARED; 3386 } else { 3387 lock_flags = LK_EXCLUSIVE; 3388 } 3389 vn_lock(vp, lock_flags | LK_RETRY); 3390 AUDIT_ARG_VNODE1(vp); 3391 if (vp->v_object != NULL) { 3392 VM_OBJECT_WLOCK(vp->v_object); 3393 vm_object_page_clean(vp->v_object, 0, 0, 0); 3394 VM_OBJECT_WUNLOCK(vp->v_object); 3395 } 3396 error = VOP_FSYNC(vp, MNT_WAIT, td); 3397 3398 VOP_UNLOCK(vp, 0); 3399 vn_finished_write(mp); 3400 drop: 3401 fdrop(fp, td); 3402 return (error); 3403 } 3404 3405 /* 3406 * Rename files. Source and destination must either both be directories, or 3407 * both not be directories. If target is a directory, it must be empty. 3408 */ 3409 #ifndef _SYS_SYSPROTO_H_ 3410 struct rename_args { 3411 char *from; 3412 char *to; 3413 }; 3414 #endif 3415 int 3416 sys_rename(td, uap) 3417 struct thread *td; 3418 register struct rename_args /* { 3419 char *from; 3420 char *to; 3421 } */ *uap; 3422 { 3423 3424 return (kern_renameat(td, AT_FDCWD, uap->from, AT_FDCWD, 3425 uap->to, UIO_USERSPACE)); 3426 } 3427 3428 #ifndef _SYS_SYSPROTO_H_ 3429 struct renameat_args { 3430 int oldfd; 3431 char *old; 3432 int newfd; 3433 char *new; 3434 }; 3435 #endif 3436 int 3437 sys_renameat(struct thread *td, struct renameat_args *uap) 3438 { 3439 3440 return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new, 3441 UIO_USERSPACE)); 3442 } 3443 3444 int 3445 kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new, 3446 enum uio_seg pathseg) 3447 { 3448 struct mount *mp = NULL; 3449 struct vnode *tvp, *fvp, *tdvp; 3450 struct nameidata fromnd, tond; 3451 cap_rights_t rights; 3452 int error; 3453 3454 again: 3455 bwillwrite(); 3456 #ifdef MAC 3457 NDINIT_ATRIGHTS(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | 3458 AUDITVNODE1, pathseg, old, oldfd, 3459 cap_rights_init(&rights, CAP_RENAMEAT_SOURCE), td); 3460 #else 3461 NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | AUDITVNODE1, 3462 pathseg, old, oldfd, 3463 cap_rights_init(&rights, CAP_RENAMEAT_SOURCE), td); 3464 #endif 3465 3466 if ((error = namei(&fromnd)) != 0) 3467 return (error); 3468 #ifdef MAC 3469 error = mac_vnode_check_rename_from(td->td_ucred, fromnd.ni_dvp, 3470 fromnd.ni_vp, &fromnd.ni_cnd); 3471 VOP_UNLOCK(fromnd.ni_dvp, 0); 3472 if (fromnd.ni_dvp != fromnd.ni_vp) 3473 VOP_UNLOCK(fromnd.ni_vp, 0); 3474 #endif 3475 fvp = fromnd.ni_vp; 3476 NDINIT_ATRIGHTS(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | 3477 SAVESTART | AUDITVNODE2, pathseg, new, newfd, 3478 cap_rights_init(&rights, CAP_RENAMEAT_TARGET), td); 3479 if (fromnd.ni_vp->v_type == VDIR) 3480 tond.ni_cnd.cn_flags |= WILLBEDIR; 3481 if ((error = namei(&tond)) != 0) { 3482 /* Translate error code for rename("dir1", "dir2/."). */ 3483 if (error == EISDIR && fvp->v_type == VDIR) 3484 error = EINVAL; 3485 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3486 vrele(fromnd.ni_dvp); 3487 vrele(fvp); 3488 goto out1; 3489 } 3490 tdvp = tond.ni_dvp; 3491 tvp = tond.ni_vp; 3492 error = vn_start_write(fvp, &mp, V_NOWAIT); 3493 if (error != 0) { 3494 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3495 NDFREE(&tond, NDF_ONLY_PNBUF); 3496 if (tvp != NULL) 3497 vput(tvp); 3498 if (tdvp == tvp) 3499 vrele(tdvp); 3500 else 3501 vput(tdvp); 3502 vrele(fromnd.ni_dvp); 3503 vrele(fvp); 3504 vrele(tond.ni_startdir); 3505 if (fromnd.ni_startdir != NULL) 3506 vrele(fromnd.ni_startdir); 3507 error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH); 3508 if (error != 0) 3509 return (error); 3510 goto again; 3511 } 3512 if (tvp != NULL) { 3513 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3514 error = ENOTDIR; 3515 goto out; 3516 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3517 error = EISDIR; 3518 goto out; 3519 } 3520 #ifdef CAPABILITIES 3521 if (newfd != AT_FDCWD) { 3522 /* 3523 * If the target already exists we require CAP_UNLINKAT 3524 * from 'newfd'. 3525 */ 3526 error = cap_check(&tond.ni_filecaps.fc_rights, 3527 cap_rights_init(&rights, CAP_UNLINKAT)); 3528 if (error != 0) 3529 goto out; 3530 } 3531 #endif 3532 } 3533 if (fvp == tdvp) { 3534 error = EINVAL; 3535 goto out; 3536 } 3537 /* 3538 * If the source is the same as the destination (that is, if they 3539 * are links to the same vnode), then there is nothing to do. 3540 */ 3541 if (fvp == tvp) 3542 error = -1; 3543 #ifdef MAC 3544 else 3545 error = mac_vnode_check_rename_to(td->td_ucred, tdvp, 3546 tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd); 3547 #endif 3548 out: 3549 if (error == 0) { 3550 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3551 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); 3552 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3553 NDFREE(&tond, NDF_ONLY_PNBUF); 3554 } else { 3555 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3556 NDFREE(&tond, NDF_ONLY_PNBUF); 3557 if (tvp != NULL) 3558 vput(tvp); 3559 if (tdvp == tvp) 3560 vrele(tdvp); 3561 else 3562 vput(tdvp); 3563 vrele(fromnd.ni_dvp); 3564 vrele(fvp); 3565 } 3566 vrele(tond.ni_startdir); 3567 vn_finished_write(mp); 3568 out1: 3569 if (fromnd.ni_startdir) 3570 vrele(fromnd.ni_startdir); 3571 if (error == -1) 3572 return (0); 3573 return (error); 3574 } 3575 3576 /* 3577 * Make a directory file. 3578 */ 3579 #ifndef _SYS_SYSPROTO_H_ 3580 struct mkdir_args { 3581 char *path; 3582 int mode; 3583 }; 3584 #endif 3585 int 3586 sys_mkdir(td, uap) 3587 struct thread *td; 3588 register struct mkdir_args /* { 3589 char *path; 3590 int mode; 3591 } */ *uap; 3592 { 3593 3594 return (kern_mkdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3595 uap->mode)); 3596 } 3597 3598 #ifndef _SYS_SYSPROTO_H_ 3599 struct mkdirat_args { 3600 int fd; 3601 char *path; 3602 mode_t mode; 3603 }; 3604 #endif 3605 int 3606 sys_mkdirat(struct thread *td, struct mkdirat_args *uap) 3607 { 3608 3609 return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode)); 3610 } 3611 3612 int 3613 kern_mkdirat(struct thread *td, int fd, char *path, enum uio_seg segflg, 3614 int mode) 3615 { 3616 struct mount *mp; 3617 struct vnode *vp; 3618 struct vattr vattr; 3619 struct nameidata nd; 3620 cap_rights_t rights; 3621 int error; 3622 3623 AUDIT_ARG_MODE(mode); 3624 restart: 3625 bwillwrite(); 3626 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 3627 NOCACHE, segflg, path, fd, cap_rights_init(&rights, CAP_MKDIRAT), 3628 td); 3629 nd.ni_cnd.cn_flags |= WILLBEDIR; 3630 if ((error = namei(&nd)) != 0) 3631 return (error); 3632 vp = nd.ni_vp; 3633 if (vp != NULL) { 3634 NDFREE(&nd, NDF_ONLY_PNBUF); 3635 /* 3636 * XXX namei called with LOCKPARENT but not LOCKLEAF has 3637 * the strange behaviour of leaving the vnode unlocked 3638 * if the target is the same vnode as the parent. 3639 */ 3640 if (vp == nd.ni_dvp) 3641 vrele(nd.ni_dvp); 3642 else 3643 vput(nd.ni_dvp); 3644 vrele(vp); 3645 return (EEXIST); 3646 } 3647 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3648 NDFREE(&nd, NDF_ONLY_PNBUF); 3649 vput(nd.ni_dvp); 3650 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3651 return (error); 3652 goto restart; 3653 } 3654 VATTR_NULL(&vattr); 3655 vattr.va_type = VDIR; 3656 vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask; 3657 #ifdef MAC 3658 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 3659 &vattr); 3660 if (error != 0) 3661 goto out; 3662 #endif 3663 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 3664 #ifdef MAC 3665 out: 3666 #endif 3667 NDFREE(&nd, NDF_ONLY_PNBUF); 3668 vput(nd.ni_dvp); 3669 if (error == 0) 3670 vput(nd.ni_vp); 3671 vn_finished_write(mp); 3672 return (error); 3673 } 3674 3675 /* 3676 * Remove a directory file. 3677 */ 3678 #ifndef _SYS_SYSPROTO_H_ 3679 struct rmdir_args { 3680 char *path; 3681 }; 3682 #endif 3683 int 3684 sys_rmdir(td, uap) 3685 struct thread *td; 3686 struct rmdir_args /* { 3687 char *path; 3688 } */ *uap; 3689 { 3690 3691 return (kern_rmdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE)); 3692 } 3693 3694 int 3695 kern_rmdirat(struct thread *td, int fd, char *path, enum uio_seg pathseg) 3696 { 3697 struct mount *mp; 3698 struct vnode *vp; 3699 struct nameidata nd; 3700 cap_rights_t rights; 3701 int error; 3702 3703 restart: 3704 bwillwrite(); 3705 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 3706 pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td); 3707 if ((error = namei(&nd)) != 0) 3708 return (error); 3709 vp = nd.ni_vp; 3710 if (vp->v_type != VDIR) { 3711 error = ENOTDIR; 3712 goto out; 3713 } 3714 /* 3715 * No rmdir "." please. 3716 */ 3717 if (nd.ni_dvp == vp) { 3718 error = EINVAL; 3719 goto out; 3720 } 3721 /* 3722 * The root of a mounted filesystem cannot be deleted. 3723 */ 3724 if (vp->v_vflag & VV_ROOT) { 3725 error = EBUSY; 3726 goto out; 3727 } 3728 #ifdef MAC 3729 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 3730 &nd.ni_cnd); 3731 if (error != 0) 3732 goto out; 3733 #endif 3734 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3735 NDFREE(&nd, NDF_ONLY_PNBUF); 3736 vput(vp); 3737 if (nd.ni_dvp == vp) 3738 vrele(nd.ni_dvp); 3739 else 3740 vput(nd.ni_dvp); 3741 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3742 return (error); 3743 goto restart; 3744 } 3745 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 3746 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 3747 vn_finished_write(mp); 3748 out: 3749 NDFREE(&nd, NDF_ONLY_PNBUF); 3750 vput(vp); 3751 if (nd.ni_dvp == vp) 3752 vrele(nd.ni_dvp); 3753 else 3754 vput(nd.ni_dvp); 3755 return (error); 3756 } 3757 3758 #ifdef COMPAT_43 3759 /* 3760 * Read a block of directory entries in a filesystem independent format. 3761 */ 3762 #ifndef _SYS_SYSPROTO_H_ 3763 struct ogetdirentries_args { 3764 int fd; 3765 char *buf; 3766 u_int count; 3767 long *basep; 3768 }; 3769 #endif 3770 int 3771 ogetdirentries(struct thread *td, struct ogetdirentries_args *uap) 3772 { 3773 long loff; 3774 int error; 3775 3776 error = kern_ogetdirentries(td, uap, &loff); 3777 if (error == 0) 3778 error = copyout(&loff, uap->basep, sizeof(long)); 3779 return (error); 3780 } 3781 3782 int 3783 kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap, 3784 long *ploff) 3785 { 3786 struct vnode *vp; 3787 struct file *fp; 3788 struct uio auio, kuio; 3789 struct iovec aiov, kiov; 3790 struct dirent *dp, *edp; 3791 cap_rights_t rights; 3792 caddr_t dirbuf; 3793 int error, eofflag, readcnt; 3794 long loff; 3795 off_t foffset; 3796 3797 /* XXX arbitrary sanity limit on `count'. */ 3798 if (uap->count > 64 * 1024) 3799 return (EINVAL); 3800 error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_READ), &fp); 3801 if (error != 0) 3802 return (error); 3803 if ((fp->f_flag & FREAD) == 0) { 3804 fdrop(fp, td); 3805 return (EBADF); 3806 } 3807 vp = fp->f_vnode; 3808 foffset = foffset_lock(fp, 0); 3809 unionread: 3810 if (vp->v_type != VDIR) { 3811 foffset_unlock(fp, foffset, 0); 3812 fdrop(fp, td); 3813 return (EINVAL); 3814 } 3815 aiov.iov_base = uap->buf; 3816 aiov.iov_len = uap->count; 3817 auio.uio_iov = &aiov; 3818 auio.uio_iovcnt = 1; 3819 auio.uio_rw = UIO_READ; 3820 auio.uio_segflg = UIO_USERSPACE; 3821 auio.uio_td = td; 3822 auio.uio_resid = uap->count; 3823 vn_lock(vp, LK_SHARED | LK_RETRY); 3824 loff = auio.uio_offset = foffset; 3825 #ifdef MAC 3826 error = mac_vnode_check_readdir(td->td_ucred, vp); 3827 if (error != 0) { 3828 VOP_UNLOCK(vp, 0); 3829 foffset_unlock(fp, foffset, FOF_NOUPDATE); 3830 fdrop(fp, td); 3831 return (error); 3832 } 3833 #endif 3834 # if (BYTE_ORDER != LITTLE_ENDIAN) 3835 if (vp->v_mount->mnt_maxsymlinklen <= 0) { 3836 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, 3837 NULL, NULL); 3838 foffset = auio.uio_offset; 3839 } else 3840 # endif 3841 { 3842 kuio = auio; 3843 kuio.uio_iov = &kiov; 3844 kuio.uio_segflg = UIO_SYSSPACE; 3845 kiov.iov_len = uap->count; 3846 dirbuf = malloc(uap->count, M_TEMP, M_WAITOK); 3847 kiov.iov_base = dirbuf; 3848 error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag, 3849 NULL, NULL); 3850 foffset = kuio.uio_offset; 3851 if (error == 0) { 3852 readcnt = uap->count - kuio.uio_resid; 3853 edp = (struct dirent *)&dirbuf[readcnt]; 3854 for (dp = (struct dirent *)dirbuf; dp < edp; ) { 3855 # if (BYTE_ORDER == LITTLE_ENDIAN) 3856 /* 3857 * The expected low byte of 3858 * dp->d_namlen is our dp->d_type. 3859 * The high MBZ byte of dp->d_namlen 3860 * is our dp->d_namlen. 3861 */ 3862 dp->d_type = dp->d_namlen; 3863 dp->d_namlen = 0; 3864 # else 3865 /* 3866 * The dp->d_type is the high byte 3867 * of the expected dp->d_namlen, 3868 * so must be zero'ed. 3869 */ 3870 dp->d_type = 0; 3871 # endif 3872 if (dp->d_reclen > 0) { 3873 dp = (struct dirent *) 3874 ((char *)dp + dp->d_reclen); 3875 } else { 3876 error = EIO; 3877 break; 3878 } 3879 } 3880 if (dp >= edp) 3881 error = uiomove(dirbuf, readcnt, &auio); 3882 } 3883 free(dirbuf, M_TEMP); 3884 } 3885 if (error != 0) { 3886 VOP_UNLOCK(vp, 0); 3887 foffset_unlock(fp, foffset, 0); 3888 fdrop(fp, td); 3889 return (error); 3890 } 3891 if (uap->count == auio.uio_resid && 3892 (vp->v_vflag & VV_ROOT) && 3893 (vp->v_mount->mnt_flag & MNT_UNION)) { 3894 struct vnode *tvp = vp; 3895 vp = vp->v_mount->mnt_vnodecovered; 3896 VREF(vp); 3897 fp->f_vnode = vp; 3898 fp->f_data = vp; 3899 foffset = 0; 3900 vput(tvp); 3901 goto unionread; 3902 } 3903 VOP_UNLOCK(vp, 0); 3904 foffset_unlock(fp, foffset, 0); 3905 fdrop(fp, td); 3906 td->td_retval[0] = uap->count - auio.uio_resid; 3907 if (error == 0) 3908 *ploff = loff; 3909 return (error); 3910 } 3911 #endif /* COMPAT_43 */ 3912 3913 /* 3914 * Read a block of directory entries in a filesystem independent format. 3915 */ 3916 #ifndef _SYS_SYSPROTO_H_ 3917 struct getdirentries_args { 3918 int fd; 3919 char *buf; 3920 u_int count; 3921 long *basep; 3922 }; 3923 #endif 3924 int 3925 sys_getdirentries(td, uap) 3926 struct thread *td; 3927 register struct getdirentries_args /* { 3928 int fd; 3929 char *buf; 3930 u_int count; 3931 long *basep; 3932 } */ *uap; 3933 { 3934 long base; 3935 int error; 3936 3937 error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base, 3938 NULL, UIO_USERSPACE); 3939 if (error != 0) 3940 return (error); 3941 if (uap->basep != NULL) 3942 error = copyout(&base, uap->basep, sizeof(long)); 3943 return (error); 3944 } 3945 3946 int 3947 kern_getdirentries(struct thread *td, int fd, char *buf, u_int count, 3948 long *basep, ssize_t *residp, enum uio_seg bufseg) 3949 { 3950 struct vnode *vp; 3951 struct file *fp; 3952 struct uio auio; 3953 struct iovec aiov; 3954 cap_rights_t rights; 3955 long loff; 3956 int error, eofflag; 3957 off_t foffset; 3958 3959 AUDIT_ARG_FD(fd); 3960 if (count > IOSIZE_MAX) 3961 return (EINVAL); 3962 auio.uio_resid = count; 3963 error = getvnode(td, fd, cap_rights_init(&rights, CAP_READ), &fp); 3964 if (error != 0) 3965 return (error); 3966 if ((fp->f_flag & FREAD) == 0) { 3967 fdrop(fp, td); 3968 return (EBADF); 3969 } 3970 vp = fp->f_vnode; 3971 foffset = foffset_lock(fp, 0); 3972 unionread: 3973 if (vp->v_type != VDIR) { 3974 error = EINVAL; 3975 goto fail; 3976 } 3977 aiov.iov_base = buf; 3978 aiov.iov_len = count; 3979 auio.uio_iov = &aiov; 3980 auio.uio_iovcnt = 1; 3981 auio.uio_rw = UIO_READ; 3982 auio.uio_segflg = bufseg; 3983 auio.uio_td = td; 3984 vn_lock(vp, LK_SHARED | LK_RETRY); 3985 AUDIT_ARG_VNODE1(vp); 3986 loff = auio.uio_offset = foffset; 3987 #ifdef MAC 3988 error = mac_vnode_check_readdir(td->td_ucred, vp); 3989 if (error == 0) 3990 #endif 3991 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, 3992 NULL); 3993 foffset = auio.uio_offset; 3994 if (error != 0) { 3995 VOP_UNLOCK(vp, 0); 3996 goto fail; 3997 } 3998 if (count == auio.uio_resid && 3999 (vp->v_vflag & VV_ROOT) && 4000 (vp->v_mount->mnt_flag & MNT_UNION)) { 4001 struct vnode *tvp = vp; 4002 4003 vp = vp->v_mount->mnt_vnodecovered; 4004 VREF(vp); 4005 fp->f_vnode = vp; 4006 fp->f_data = vp; 4007 foffset = 0; 4008 vput(tvp); 4009 goto unionread; 4010 } 4011 VOP_UNLOCK(vp, 0); 4012 *basep = loff; 4013 if (residp != NULL) 4014 *residp = auio.uio_resid; 4015 td->td_retval[0] = count - auio.uio_resid; 4016 fail: 4017 foffset_unlock(fp, foffset, 0); 4018 fdrop(fp, td); 4019 return (error); 4020 } 4021 4022 #ifndef _SYS_SYSPROTO_H_ 4023 struct getdents_args { 4024 int fd; 4025 char *buf; 4026 size_t count; 4027 }; 4028 #endif 4029 int 4030 sys_getdents(td, uap) 4031 struct thread *td; 4032 register struct getdents_args /* { 4033 int fd; 4034 char *buf; 4035 u_int count; 4036 } */ *uap; 4037 { 4038 struct getdirentries_args ap; 4039 4040 ap.fd = uap->fd; 4041 ap.buf = uap->buf; 4042 ap.count = uap->count; 4043 ap.basep = NULL; 4044 return (sys_getdirentries(td, &ap)); 4045 } 4046 4047 /* 4048 * Set the mode mask for creation of filesystem nodes. 4049 */ 4050 #ifndef _SYS_SYSPROTO_H_ 4051 struct umask_args { 4052 int newmask; 4053 }; 4054 #endif 4055 int 4056 sys_umask(td, uap) 4057 struct thread *td; 4058 struct umask_args /* { 4059 int newmask; 4060 } */ *uap; 4061 { 4062 struct filedesc *fdp; 4063 4064 fdp = td->td_proc->p_fd; 4065 FILEDESC_XLOCK(fdp); 4066 td->td_retval[0] = fdp->fd_cmask; 4067 fdp->fd_cmask = uap->newmask & ALLPERMS; 4068 FILEDESC_XUNLOCK(fdp); 4069 return (0); 4070 } 4071 4072 /* 4073 * Void all references to file by ripping underlying filesystem away from 4074 * vnode. 4075 */ 4076 #ifndef _SYS_SYSPROTO_H_ 4077 struct revoke_args { 4078 char *path; 4079 }; 4080 #endif 4081 int 4082 sys_revoke(td, uap) 4083 struct thread *td; 4084 register struct revoke_args /* { 4085 char *path; 4086 } */ *uap; 4087 { 4088 struct vnode *vp; 4089 struct vattr vattr; 4090 struct nameidata nd; 4091 int error; 4092 4093 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4094 uap->path, td); 4095 if ((error = namei(&nd)) != 0) 4096 return (error); 4097 vp = nd.ni_vp; 4098 NDFREE(&nd, NDF_ONLY_PNBUF); 4099 if (vp->v_type != VCHR || vp->v_rdev == NULL) { 4100 error = EINVAL; 4101 goto out; 4102 } 4103 #ifdef MAC 4104 error = mac_vnode_check_revoke(td->td_ucred, vp); 4105 if (error != 0) 4106 goto out; 4107 #endif 4108 error = VOP_GETATTR(vp, &vattr, td->td_ucred); 4109 if (error != 0) 4110 goto out; 4111 if (td->td_ucred->cr_uid != vattr.va_uid) { 4112 error = priv_check(td, PRIV_VFS_ADMIN); 4113 if (error != 0) 4114 goto out; 4115 } 4116 if (vcount(vp) > 1) 4117 VOP_REVOKE(vp, REVOKEALL); 4118 out: 4119 vput(vp); 4120 return (error); 4121 } 4122 4123 /* 4124 * Convert a user file descriptor to a kernel file entry and check that, if it 4125 * is a capability, the correct rights are present. A reference on the file 4126 * entry is held upon returning. 4127 */ 4128 int 4129 getvnode(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp) 4130 { 4131 struct file *fp; 4132 int error; 4133 4134 error = fget_unlocked(td->td_proc->p_fd, fd, rightsp, &fp, NULL); 4135 if (error != 0) 4136 return (error); 4137 4138 /* 4139 * The file could be not of the vnode type, or it may be not 4140 * yet fully initialized, in which case the f_vnode pointer 4141 * may be set, but f_ops is still badfileops. E.g., 4142 * devfs_open() transiently create such situation to 4143 * facilitate csw d_fdopen(). 4144 * 4145 * Dupfdopen() handling in kern_openat() installs the 4146 * half-baked file into the process descriptor table, allowing 4147 * other thread to dereference it. Guard against the race by 4148 * checking f_ops. 4149 */ 4150 if (fp->f_vnode == NULL || fp->f_ops == &badfileops) { 4151 fdrop(fp, td); 4152 return (EINVAL); 4153 } 4154 *fpp = fp; 4155 return (0); 4156 } 4157 4158 4159 /* 4160 * Get an (NFS) file handle. 4161 */ 4162 #ifndef _SYS_SYSPROTO_H_ 4163 struct lgetfh_args { 4164 char *fname; 4165 fhandle_t *fhp; 4166 }; 4167 #endif 4168 int 4169 sys_lgetfh(td, uap) 4170 struct thread *td; 4171 register struct lgetfh_args *uap; 4172 { 4173 struct nameidata nd; 4174 fhandle_t fh; 4175 register struct vnode *vp; 4176 int error; 4177 4178 error = priv_check(td, PRIV_VFS_GETFH); 4179 if (error != 0) 4180 return (error); 4181 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4182 uap->fname, td); 4183 error = namei(&nd); 4184 if (error != 0) 4185 return (error); 4186 NDFREE(&nd, NDF_ONLY_PNBUF); 4187 vp = nd.ni_vp; 4188 bzero(&fh, sizeof(fh)); 4189 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4190 error = VOP_VPTOFH(vp, &fh.fh_fid); 4191 vput(vp); 4192 if (error == 0) 4193 error = copyout(&fh, uap->fhp, sizeof (fh)); 4194 return (error); 4195 } 4196 4197 #ifndef _SYS_SYSPROTO_H_ 4198 struct getfh_args { 4199 char *fname; 4200 fhandle_t *fhp; 4201 }; 4202 #endif 4203 int 4204 sys_getfh(td, uap) 4205 struct thread *td; 4206 register struct getfh_args *uap; 4207 { 4208 struct nameidata nd; 4209 fhandle_t fh; 4210 register struct vnode *vp; 4211 int error; 4212 4213 error = priv_check(td, PRIV_VFS_GETFH); 4214 if (error != 0) 4215 return (error); 4216 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4217 uap->fname, td); 4218 error = namei(&nd); 4219 if (error != 0) 4220 return (error); 4221 NDFREE(&nd, NDF_ONLY_PNBUF); 4222 vp = nd.ni_vp; 4223 bzero(&fh, sizeof(fh)); 4224 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4225 error = VOP_VPTOFH(vp, &fh.fh_fid); 4226 vput(vp); 4227 if (error == 0) 4228 error = copyout(&fh, uap->fhp, sizeof (fh)); 4229 return (error); 4230 } 4231 4232 /* 4233 * syscall for the rpc.lockd to use to translate a NFS file handle into an 4234 * open descriptor. 4235 * 4236 * warning: do not remove the priv_check() call or this becomes one giant 4237 * security hole. 4238 */ 4239 #ifndef _SYS_SYSPROTO_H_ 4240 struct fhopen_args { 4241 const struct fhandle *u_fhp; 4242 int flags; 4243 }; 4244 #endif 4245 int 4246 sys_fhopen(td, uap) 4247 struct thread *td; 4248 struct fhopen_args /* { 4249 const struct fhandle *u_fhp; 4250 int flags; 4251 } */ *uap; 4252 { 4253 struct mount *mp; 4254 struct vnode *vp; 4255 struct fhandle fhp; 4256 struct file *fp; 4257 int fmode, error; 4258 int indx; 4259 4260 error = priv_check(td, PRIV_VFS_FHOPEN); 4261 if (error != 0) 4262 return (error); 4263 indx = -1; 4264 fmode = FFLAGS(uap->flags); 4265 /* why not allow a non-read/write open for our lockd? */ 4266 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 4267 return (EINVAL); 4268 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 4269 if (error != 0) 4270 return(error); 4271 /* find the mount point */ 4272 mp = vfs_busyfs(&fhp.fh_fsid); 4273 if (mp == NULL) 4274 return (ESTALE); 4275 /* now give me my vnode, it gets returned to me locked */ 4276 error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp); 4277 vfs_unbusy(mp); 4278 if (error != 0) 4279 return (error); 4280 4281 error = falloc_noinstall(td, &fp); 4282 if (error != 0) { 4283 vput(vp); 4284 return (error); 4285 } 4286 /* 4287 * An extra reference on `fp' has been held for us by 4288 * falloc_noinstall(). 4289 */ 4290 4291 #ifdef INVARIANTS 4292 td->td_dupfd = -1; 4293 #endif 4294 error = vn_open_vnode(vp, fmode, td->td_ucred, td, fp); 4295 if (error != 0) { 4296 KASSERT(fp->f_ops == &badfileops, 4297 ("VOP_OPEN in fhopen() set f_ops")); 4298 KASSERT(td->td_dupfd < 0, 4299 ("fhopen() encountered fdopen()")); 4300 4301 vput(vp); 4302 goto bad; 4303 } 4304 #ifdef INVARIANTS 4305 td->td_dupfd = 0; 4306 #endif 4307 fp->f_vnode = vp; 4308 fp->f_seqcount = 1; 4309 finit(fp, (fmode & FMASK) | (fp->f_flag & FHASLOCK), DTYPE_VNODE, vp, 4310 &vnops); 4311 VOP_UNLOCK(vp, 0); 4312 if ((fmode & O_TRUNC) != 0) { 4313 error = fo_truncate(fp, 0, td->td_ucred, td); 4314 if (error != 0) 4315 goto bad; 4316 } 4317 4318 error = finstall(td, fp, &indx, fmode, NULL); 4319 bad: 4320 fdrop(fp, td); 4321 td->td_retval[0] = indx; 4322 return (error); 4323 } 4324 4325 /* 4326 * Stat an (NFS) file handle. 4327 */ 4328 #ifndef _SYS_SYSPROTO_H_ 4329 struct fhstat_args { 4330 struct fhandle *u_fhp; 4331 struct stat *sb; 4332 }; 4333 #endif 4334 int 4335 sys_fhstat(td, uap) 4336 struct thread *td; 4337 register struct fhstat_args /* { 4338 struct fhandle *u_fhp; 4339 struct stat *sb; 4340 } */ *uap; 4341 { 4342 struct stat sb; 4343 struct fhandle fh; 4344 int error; 4345 4346 error = copyin(uap->u_fhp, &fh, sizeof(fh)); 4347 if (error != 0) 4348 return (error); 4349 error = kern_fhstat(td, fh, &sb); 4350 if (error == 0) 4351 error = copyout(&sb, uap->sb, sizeof(sb)); 4352 return (error); 4353 } 4354 4355 int 4356 kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb) 4357 { 4358 struct mount *mp; 4359 struct vnode *vp; 4360 int error; 4361 4362 error = priv_check(td, PRIV_VFS_FHSTAT); 4363 if (error != 0) 4364 return (error); 4365 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4366 return (ESTALE); 4367 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4368 vfs_unbusy(mp); 4369 if (error != 0) 4370 return (error); 4371 error = vn_stat(vp, sb, td->td_ucred, NOCRED, td); 4372 vput(vp); 4373 return (error); 4374 } 4375 4376 /* 4377 * Implement fstatfs() for (NFS) file handles. 4378 */ 4379 #ifndef _SYS_SYSPROTO_H_ 4380 struct fhstatfs_args { 4381 struct fhandle *u_fhp; 4382 struct statfs *buf; 4383 }; 4384 #endif 4385 int 4386 sys_fhstatfs(td, uap) 4387 struct thread *td; 4388 struct fhstatfs_args /* { 4389 struct fhandle *u_fhp; 4390 struct statfs *buf; 4391 } */ *uap; 4392 { 4393 struct statfs sf; 4394 fhandle_t fh; 4395 int error; 4396 4397 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4398 if (error != 0) 4399 return (error); 4400 error = kern_fhstatfs(td, fh, &sf); 4401 if (error != 0) 4402 return (error); 4403 return (copyout(&sf, uap->buf, sizeof(sf))); 4404 } 4405 4406 int 4407 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf) 4408 { 4409 struct statfs *sp; 4410 struct mount *mp; 4411 struct vnode *vp; 4412 int error; 4413 4414 error = priv_check(td, PRIV_VFS_FHSTATFS); 4415 if (error != 0) 4416 return (error); 4417 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4418 return (ESTALE); 4419 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4420 if (error != 0) { 4421 vfs_unbusy(mp); 4422 return (error); 4423 } 4424 vput(vp); 4425 error = prison_canseemount(td->td_ucred, mp); 4426 if (error != 0) 4427 goto out; 4428 #ifdef MAC 4429 error = mac_mount_check_stat(td->td_ucred, mp); 4430 if (error != 0) 4431 goto out; 4432 #endif 4433 /* 4434 * Set these in case the underlying filesystem fails to do so. 4435 */ 4436 sp = &mp->mnt_stat; 4437 sp->f_version = STATFS_VERSION; 4438 sp->f_namemax = NAME_MAX; 4439 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 4440 error = VFS_STATFS(mp, sp); 4441 if (error == 0) 4442 *buf = *sp; 4443 out: 4444 vfs_unbusy(mp); 4445 return (error); 4446 } 4447 4448 int 4449 kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len) 4450 { 4451 struct file *fp; 4452 struct mount *mp; 4453 struct vnode *vp; 4454 cap_rights_t rights; 4455 off_t olen, ooffset; 4456 int error; 4457 4458 if (offset < 0 || len <= 0) 4459 return (EINVAL); 4460 /* Check for wrap. */ 4461 if (offset > OFF_MAX - len) 4462 return (EFBIG); 4463 error = fget(td, fd, cap_rights_init(&rights, CAP_WRITE), &fp); 4464 if (error != 0) 4465 return (error); 4466 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { 4467 error = ESPIPE; 4468 goto out; 4469 } 4470 if ((fp->f_flag & FWRITE) == 0) { 4471 error = EBADF; 4472 goto out; 4473 } 4474 if (fp->f_type != DTYPE_VNODE) { 4475 error = ENODEV; 4476 goto out; 4477 } 4478 vp = fp->f_vnode; 4479 if (vp->v_type != VREG) { 4480 error = ENODEV; 4481 goto out; 4482 } 4483 4484 /* Allocating blocks may take a long time, so iterate. */ 4485 for (;;) { 4486 olen = len; 4487 ooffset = offset; 4488 4489 bwillwrite(); 4490 mp = NULL; 4491 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 4492 if (error != 0) 4493 break; 4494 error = vn_lock(vp, LK_EXCLUSIVE); 4495 if (error != 0) { 4496 vn_finished_write(mp); 4497 break; 4498 } 4499 #ifdef MAC 4500 error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp); 4501 if (error == 0) 4502 #endif 4503 error = VOP_ALLOCATE(vp, &offset, &len); 4504 VOP_UNLOCK(vp, 0); 4505 vn_finished_write(mp); 4506 4507 if (olen + ooffset != offset + len) { 4508 panic("offset + len changed from %jx/%jx to %jx/%jx", 4509 ooffset, olen, offset, len); 4510 } 4511 if (error != 0 || len == 0) 4512 break; 4513 KASSERT(olen > len, ("Iteration did not make progress?")); 4514 maybe_yield(); 4515 } 4516 out: 4517 fdrop(fp, td); 4518 return (error); 4519 } 4520 4521 int 4522 sys_posix_fallocate(struct thread *td, struct posix_fallocate_args *uap) 4523 { 4524 int error; 4525 4526 error = kern_posix_fallocate(td, uap->fd, uap->offset, uap->len); 4527 return (kern_posix_error(td, error)); 4528 } 4529 4530 /* 4531 * Unlike madvise(2), we do not make a best effort to remember every 4532 * possible caching hint. Instead, we remember the last setting with 4533 * the exception that we will allow POSIX_FADV_NORMAL to adjust the 4534 * region of any current setting. 4535 */ 4536 int 4537 kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len, 4538 int advice) 4539 { 4540 struct fadvise_info *fa, *new; 4541 struct file *fp; 4542 struct vnode *vp; 4543 cap_rights_t rights; 4544 off_t end; 4545 int error; 4546 4547 if (offset < 0 || len < 0 || offset > OFF_MAX - len) 4548 return (EINVAL); 4549 switch (advice) { 4550 case POSIX_FADV_SEQUENTIAL: 4551 case POSIX_FADV_RANDOM: 4552 case POSIX_FADV_NOREUSE: 4553 new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK); 4554 break; 4555 case POSIX_FADV_NORMAL: 4556 case POSIX_FADV_WILLNEED: 4557 case POSIX_FADV_DONTNEED: 4558 new = NULL; 4559 break; 4560 default: 4561 return (EINVAL); 4562 } 4563 /* XXX: CAP_POSIX_FADVISE? */ 4564 error = fget(td, fd, cap_rights_init(&rights), &fp); 4565 if (error != 0) 4566 goto out; 4567 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { 4568 error = ESPIPE; 4569 goto out; 4570 } 4571 if (fp->f_type != DTYPE_VNODE) { 4572 error = ENODEV; 4573 goto out; 4574 } 4575 vp = fp->f_vnode; 4576 if (vp->v_type != VREG) { 4577 error = ENODEV; 4578 goto out; 4579 } 4580 if (len == 0) 4581 end = OFF_MAX; 4582 else 4583 end = offset + len - 1; 4584 switch (advice) { 4585 case POSIX_FADV_SEQUENTIAL: 4586 case POSIX_FADV_RANDOM: 4587 case POSIX_FADV_NOREUSE: 4588 /* 4589 * Try to merge any existing non-standard region with 4590 * this new region if possible, otherwise create a new 4591 * non-standard region for this request. 4592 */ 4593 mtx_pool_lock(mtxpool_sleep, fp); 4594 fa = fp->f_advice; 4595 if (fa != NULL && fa->fa_advice == advice && 4596 ((fa->fa_start <= end && fa->fa_end >= offset) || 4597 (end != OFF_MAX && fa->fa_start == end + 1) || 4598 (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) { 4599 if (offset < fa->fa_start) 4600 fa->fa_start = offset; 4601 if (end > fa->fa_end) 4602 fa->fa_end = end; 4603 } else { 4604 new->fa_advice = advice; 4605 new->fa_start = offset; 4606 new->fa_end = end; 4607 fp->f_advice = new; 4608 new = fa; 4609 } 4610 mtx_pool_unlock(mtxpool_sleep, fp); 4611 break; 4612 case POSIX_FADV_NORMAL: 4613 /* 4614 * If a the "normal" region overlaps with an existing 4615 * non-standard region, trim or remove the 4616 * non-standard region. 4617 */ 4618 mtx_pool_lock(mtxpool_sleep, fp); 4619 fa = fp->f_advice; 4620 if (fa != NULL) { 4621 if (offset <= fa->fa_start && end >= fa->fa_end) { 4622 new = fa; 4623 fp->f_advice = NULL; 4624 } else if (offset <= fa->fa_start && 4625 end >= fa->fa_start) 4626 fa->fa_start = end + 1; 4627 else if (offset <= fa->fa_end && end >= fa->fa_end) 4628 fa->fa_end = offset - 1; 4629 else if (offset >= fa->fa_start && end <= fa->fa_end) { 4630 /* 4631 * If the "normal" region is a middle 4632 * portion of the existing 4633 * non-standard region, just remove 4634 * the whole thing rather than picking 4635 * one side or the other to 4636 * preserve. 4637 */ 4638 new = fa; 4639 fp->f_advice = NULL; 4640 } 4641 } 4642 mtx_pool_unlock(mtxpool_sleep, fp); 4643 break; 4644 case POSIX_FADV_WILLNEED: 4645 case POSIX_FADV_DONTNEED: 4646 error = VOP_ADVISE(vp, offset, end, advice); 4647 break; 4648 } 4649 out: 4650 if (fp != NULL) 4651 fdrop(fp, td); 4652 free(new, M_FADVISE); 4653 return (error); 4654 } 4655 4656 int 4657 sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap) 4658 { 4659 int error; 4660 4661 error = kern_posix_fadvise(td, uap->fd, uap->offset, uap->len, 4662 uap->advice); 4663 return (kern_posix_error(td, error)); 4664 } 4665