1 /*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include "opt_capsicum.h" 41 #include "opt_compat.h" 42 #include "opt_ktrace.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/bio.h> 47 #include <sys/buf.h> 48 #include <sys/capsicum.h> 49 #include <sys/disk.h> 50 #include <sys/sysent.h> 51 #include <sys/malloc.h> 52 #include <sys/mount.h> 53 #include <sys/mutex.h> 54 #include <sys/sysproto.h> 55 #include <sys/namei.h> 56 #include <sys/filedesc.h> 57 #include <sys/kernel.h> 58 #include <sys/fcntl.h> 59 #include <sys/file.h> 60 #include <sys/filio.h> 61 #include <sys/limits.h> 62 #include <sys/linker.h> 63 #include <sys/rwlock.h> 64 #include <sys/sdt.h> 65 #include <sys/stat.h> 66 #include <sys/sx.h> 67 #include <sys/unistd.h> 68 #include <sys/vnode.h> 69 #include <sys/priv.h> 70 #include <sys/proc.h> 71 #include <sys/dirent.h> 72 #include <sys/jail.h> 73 #include <sys/syscallsubr.h> 74 #include <sys/sysctl.h> 75 #ifdef KTRACE 76 #include <sys/ktrace.h> 77 #endif 78 79 #include <machine/stdarg.h> 80 81 #include <security/audit/audit.h> 82 #include <security/mac/mac_framework.h> 83 84 #include <vm/vm.h> 85 #include <vm/vm_object.h> 86 #include <vm/vm_page.h> 87 #include <vm/uma.h> 88 89 #include <ufs/ufs/quota.h> 90 91 MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information"); 92 93 SDT_PROVIDER_DEFINE(vfs); 94 SDT_PROBE_DEFINE2(vfs, , stat, mode, "char *", "int"); 95 SDT_PROBE_DEFINE2(vfs, , stat, reg, "char *", "int"); 96 97 static int kern_chflagsat(struct thread *td, int fd, const char *path, 98 enum uio_seg pathseg, u_long flags, int atflag); 99 static int setfflags(struct thread *td, struct vnode *, u_long); 100 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *); 101 static int getutimens(const struct timespec *, enum uio_seg, 102 struct timespec *, int *); 103 static int setutimes(struct thread *td, struct vnode *, 104 const struct timespec *, int, int); 105 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 106 struct thread *td); 107 108 /* 109 * Sync each mounted filesystem. 110 */ 111 #ifndef _SYS_SYSPROTO_H_ 112 struct sync_args { 113 int dummy; 114 }; 115 #endif 116 /* ARGSUSED */ 117 int 118 sys_sync(td, uap) 119 struct thread *td; 120 struct sync_args *uap; 121 { 122 struct mount *mp, *nmp; 123 int save; 124 125 mtx_lock(&mountlist_mtx); 126 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 127 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 128 nmp = TAILQ_NEXT(mp, mnt_list); 129 continue; 130 } 131 if ((mp->mnt_flag & MNT_RDONLY) == 0 && 132 vn_start_write(NULL, &mp, V_NOWAIT) == 0) { 133 save = curthread_pflags_set(TDP_SYNCIO); 134 vfs_msync(mp, MNT_NOWAIT); 135 VFS_SYNC(mp, MNT_NOWAIT); 136 curthread_pflags_restore(save); 137 vn_finished_write(mp); 138 } 139 mtx_lock(&mountlist_mtx); 140 nmp = TAILQ_NEXT(mp, mnt_list); 141 vfs_unbusy(mp); 142 } 143 mtx_unlock(&mountlist_mtx); 144 return (0); 145 } 146 147 /* 148 * Change filesystem quotas. 149 */ 150 #ifndef _SYS_SYSPROTO_H_ 151 struct quotactl_args { 152 char *path; 153 int cmd; 154 int uid; 155 caddr_t arg; 156 }; 157 #endif 158 int 159 sys_quotactl(td, uap) 160 struct thread *td; 161 register struct quotactl_args /* { 162 char *path; 163 int cmd; 164 int uid; 165 caddr_t arg; 166 } */ *uap; 167 { 168 struct mount *mp; 169 struct nameidata nd; 170 int error; 171 172 AUDIT_ARG_CMD(uap->cmd); 173 AUDIT_ARG_UID(uap->uid); 174 if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS)) 175 return (EPERM); 176 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 177 uap->path, td); 178 if ((error = namei(&nd)) != 0) 179 return (error); 180 NDFREE(&nd, NDF_ONLY_PNBUF); 181 mp = nd.ni_vp->v_mount; 182 vfs_ref(mp); 183 vput(nd.ni_vp); 184 error = vfs_busy(mp, 0); 185 vfs_rel(mp); 186 if (error != 0) 187 return (error); 188 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg); 189 190 /* 191 * Since quota on operation typically needs to open quota 192 * file, the Q_QUOTAON handler needs to unbusy the mount point 193 * before calling into namei. Otherwise, unmount might be 194 * started between two vfs_busy() invocations (first is our, 195 * second is from mount point cross-walk code in lookup()), 196 * causing deadlock. 197 * 198 * Require that Q_QUOTAON handles the vfs_busy() reference on 199 * its own, always returning with ubusied mount point. 200 */ 201 if ((uap->cmd >> SUBCMDSHIFT) != Q_QUOTAON) 202 vfs_unbusy(mp); 203 return (error); 204 } 205 206 /* 207 * Used by statfs conversion routines to scale the block size up if 208 * necessary so that all of the block counts are <= 'max_size'. Note 209 * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero 210 * value of 'n'. 211 */ 212 void 213 statfs_scale_blocks(struct statfs *sf, long max_size) 214 { 215 uint64_t count; 216 int shift; 217 218 KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__)); 219 220 /* 221 * Attempt to scale the block counts to give a more accurate 222 * overview to userland of the ratio of free space to used 223 * space. To do this, find the largest block count and compute 224 * a divisor that lets it fit into a signed integer <= max_size. 225 */ 226 if (sf->f_bavail < 0) 227 count = -sf->f_bavail; 228 else 229 count = sf->f_bavail; 230 count = MAX(sf->f_blocks, MAX(sf->f_bfree, count)); 231 if (count <= max_size) 232 return; 233 234 count >>= flsl(max_size); 235 shift = 0; 236 while (count > 0) { 237 shift++; 238 count >>=1; 239 } 240 241 sf->f_bsize <<= shift; 242 sf->f_blocks >>= shift; 243 sf->f_bfree >>= shift; 244 sf->f_bavail >>= shift; 245 } 246 247 /* 248 * Get filesystem statistics. 249 */ 250 #ifndef _SYS_SYSPROTO_H_ 251 struct statfs_args { 252 char *path; 253 struct statfs *buf; 254 }; 255 #endif 256 int 257 sys_statfs(td, uap) 258 struct thread *td; 259 register struct statfs_args /* { 260 char *path; 261 struct statfs *buf; 262 } */ *uap; 263 { 264 struct statfs sf; 265 int error; 266 267 error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf); 268 if (error == 0) 269 error = copyout(&sf, uap->buf, sizeof(sf)); 270 return (error); 271 } 272 273 int 274 kern_statfs(struct thread *td, char *path, enum uio_seg pathseg, 275 struct statfs *buf) 276 { 277 struct mount *mp; 278 struct statfs *sp, sb; 279 struct nameidata nd; 280 int error; 281 282 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 283 pathseg, path, td); 284 error = namei(&nd); 285 if (error != 0) 286 return (error); 287 mp = nd.ni_vp->v_mount; 288 vfs_ref(mp); 289 NDFREE(&nd, NDF_ONLY_PNBUF); 290 vput(nd.ni_vp); 291 error = vfs_busy(mp, 0); 292 vfs_rel(mp); 293 if (error != 0) 294 return (error); 295 #ifdef MAC 296 error = mac_mount_check_stat(td->td_ucred, mp); 297 if (error != 0) 298 goto out; 299 #endif 300 /* 301 * Set these in case the underlying filesystem fails to do so. 302 */ 303 sp = &mp->mnt_stat; 304 sp->f_version = STATFS_VERSION; 305 sp->f_namemax = NAME_MAX; 306 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 307 error = VFS_STATFS(mp, sp); 308 if (error != 0) 309 goto out; 310 if (priv_check(td, PRIV_VFS_GENERATION)) { 311 bcopy(sp, &sb, sizeof(sb)); 312 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 313 prison_enforce_statfs(td->td_ucred, mp, &sb); 314 sp = &sb; 315 } 316 *buf = *sp; 317 out: 318 vfs_unbusy(mp); 319 return (error); 320 } 321 322 /* 323 * Get filesystem statistics. 324 */ 325 #ifndef _SYS_SYSPROTO_H_ 326 struct fstatfs_args { 327 int fd; 328 struct statfs *buf; 329 }; 330 #endif 331 int 332 sys_fstatfs(td, uap) 333 struct thread *td; 334 register struct fstatfs_args /* { 335 int fd; 336 struct statfs *buf; 337 } */ *uap; 338 { 339 struct statfs sf; 340 int error; 341 342 error = kern_fstatfs(td, uap->fd, &sf); 343 if (error == 0) 344 error = copyout(&sf, uap->buf, sizeof(sf)); 345 return (error); 346 } 347 348 int 349 kern_fstatfs(struct thread *td, int fd, struct statfs *buf) 350 { 351 struct file *fp; 352 struct mount *mp; 353 struct statfs *sp, sb; 354 struct vnode *vp; 355 cap_rights_t rights; 356 int error; 357 358 AUDIT_ARG_FD(fd); 359 error = getvnode(td, fd, cap_rights_init(&rights, CAP_FSTATFS), &fp); 360 if (error != 0) 361 return (error); 362 vp = fp->f_vnode; 363 vn_lock(vp, LK_SHARED | LK_RETRY); 364 #ifdef AUDIT 365 AUDIT_ARG_VNODE1(vp); 366 #endif 367 mp = vp->v_mount; 368 if (mp) 369 vfs_ref(mp); 370 VOP_UNLOCK(vp, 0); 371 fdrop(fp, td); 372 if (mp == NULL) { 373 error = EBADF; 374 goto out; 375 } 376 error = vfs_busy(mp, 0); 377 vfs_rel(mp); 378 if (error != 0) 379 return (error); 380 #ifdef MAC 381 error = mac_mount_check_stat(td->td_ucred, mp); 382 if (error != 0) 383 goto out; 384 #endif 385 /* 386 * Set these in case the underlying filesystem fails to do so. 387 */ 388 sp = &mp->mnt_stat; 389 sp->f_version = STATFS_VERSION; 390 sp->f_namemax = NAME_MAX; 391 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 392 error = VFS_STATFS(mp, sp); 393 if (error != 0) 394 goto out; 395 if (priv_check(td, PRIV_VFS_GENERATION)) { 396 bcopy(sp, &sb, sizeof(sb)); 397 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 398 prison_enforce_statfs(td->td_ucred, mp, &sb); 399 sp = &sb; 400 } 401 *buf = *sp; 402 out: 403 if (mp) 404 vfs_unbusy(mp); 405 return (error); 406 } 407 408 /* 409 * Get statistics on all filesystems. 410 */ 411 #ifndef _SYS_SYSPROTO_H_ 412 struct getfsstat_args { 413 struct statfs *buf; 414 long bufsize; 415 int flags; 416 }; 417 #endif 418 int 419 sys_getfsstat(td, uap) 420 struct thread *td; 421 register struct getfsstat_args /* { 422 struct statfs *buf; 423 long bufsize; 424 int flags; 425 } */ *uap; 426 { 427 size_t count; 428 int error; 429 430 if (uap->bufsize < 0 || uap->bufsize > SIZE_MAX) 431 return (EINVAL); 432 error = kern_getfsstat(td, &uap->buf, uap->bufsize, &count, 433 UIO_USERSPACE, uap->flags); 434 if (error == 0) 435 td->td_retval[0] = count; 436 return (error); 437 } 438 439 /* 440 * If (bufsize > 0 && bufseg == UIO_SYSSPACE) 441 * The caller is responsible for freeing memory which will be allocated 442 * in '*buf'. 443 */ 444 int 445 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize, 446 size_t *countp, enum uio_seg bufseg, int flags) 447 { 448 struct mount *mp, *nmp; 449 struct statfs *sfsp, *sp, sb; 450 size_t count, maxcount; 451 int error; 452 453 maxcount = bufsize / sizeof(struct statfs); 454 if (bufsize == 0) 455 sfsp = NULL; 456 else if (bufseg == UIO_USERSPACE) 457 sfsp = *buf; 458 else /* if (bufseg == UIO_SYSSPACE) */ { 459 count = 0; 460 mtx_lock(&mountlist_mtx); 461 TAILQ_FOREACH(mp, &mountlist, mnt_list) { 462 count++; 463 } 464 mtx_unlock(&mountlist_mtx); 465 if (maxcount > count) 466 maxcount = count; 467 sfsp = *buf = malloc(maxcount * sizeof(struct statfs), M_TEMP, 468 M_WAITOK); 469 } 470 count = 0; 471 mtx_lock(&mountlist_mtx); 472 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 473 if (prison_canseemount(td->td_ucred, mp) != 0) { 474 nmp = TAILQ_NEXT(mp, mnt_list); 475 continue; 476 } 477 #ifdef MAC 478 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 479 nmp = TAILQ_NEXT(mp, mnt_list); 480 continue; 481 } 482 #endif 483 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 484 nmp = TAILQ_NEXT(mp, mnt_list); 485 continue; 486 } 487 if (sfsp && count < maxcount) { 488 sp = &mp->mnt_stat; 489 /* 490 * Set these in case the underlying filesystem 491 * fails to do so. 492 */ 493 sp->f_version = STATFS_VERSION; 494 sp->f_namemax = NAME_MAX; 495 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 496 /* 497 * If MNT_NOWAIT or MNT_LAZY is specified, do not 498 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 499 * overrides MNT_WAIT. 500 */ 501 if (((flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 502 (flags & MNT_WAIT)) && 503 (error = VFS_STATFS(mp, sp))) { 504 mtx_lock(&mountlist_mtx); 505 nmp = TAILQ_NEXT(mp, mnt_list); 506 vfs_unbusy(mp); 507 continue; 508 } 509 if (priv_check(td, PRIV_VFS_GENERATION)) { 510 bcopy(sp, &sb, sizeof(sb)); 511 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 512 prison_enforce_statfs(td->td_ucred, mp, &sb); 513 sp = &sb; 514 } 515 if (bufseg == UIO_SYSSPACE) 516 bcopy(sp, sfsp, sizeof(*sp)); 517 else /* if (bufseg == UIO_USERSPACE) */ { 518 error = copyout(sp, sfsp, sizeof(*sp)); 519 if (error != 0) { 520 vfs_unbusy(mp); 521 return (error); 522 } 523 } 524 sfsp++; 525 } 526 count++; 527 mtx_lock(&mountlist_mtx); 528 nmp = TAILQ_NEXT(mp, mnt_list); 529 vfs_unbusy(mp); 530 } 531 mtx_unlock(&mountlist_mtx); 532 if (sfsp && count > maxcount) 533 *countp = maxcount; 534 else 535 *countp = count; 536 return (0); 537 } 538 539 #ifdef COMPAT_FREEBSD4 540 /* 541 * Get old format filesystem statistics. 542 */ 543 static void cvtstatfs(struct statfs *, struct ostatfs *); 544 545 #ifndef _SYS_SYSPROTO_H_ 546 struct freebsd4_statfs_args { 547 char *path; 548 struct ostatfs *buf; 549 }; 550 #endif 551 int 552 freebsd4_statfs(td, uap) 553 struct thread *td; 554 struct freebsd4_statfs_args /* { 555 char *path; 556 struct ostatfs *buf; 557 } */ *uap; 558 { 559 struct ostatfs osb; 560 struct statfs sf; 561 int error; 562 563 error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf); 564 if (error != 0) 565 return (error); 566 cvtstatfs(&sf, &osb); 567 return (copyout(&osb, uap->buf, sizeof(osb))); 568 } 569 570 /* 571 * Get filesystem statistics. 572 */ 573 #ifndef _SYS_SYSPROTO_H_ 574 struct freebsd4_fstatfs_args { 575 int fd; 576 struct ostatfs *buf; 577 }; 578 #endif 579 int 580 freebsd4_fstatfs(td, uap) 581 struct thread *td; 582 struct freebsd4_fstatfs_args /* { 583 int fd; 584 struct ostatfs *buf; 585 } */ *uap; 586 { 587 struct ostatfs osb; 588 struct statfs sf; 589 int error; 590 591 error = kern_fstatfs(td, uap->fd, &sf); 592 if (error != 0) 593 return (error); 594 cvtstatfs(&sf, &osb); 595 return (copyout(&osb, uap->buf, sizeof(osb))); 596 } 597 598 /* 599 * Get statistics on all filesystems. 600 */ 601 #ifndef _SYS_SYSPROTO_H_ 602 struct freebsd4_getfsstat_args { 603 struct ostatfs *buf; 604 long bufsize; 605 int flags; 606 }; 607 #endif 608 int 609 freebsd4_getfsstat(td, uap) 610 struct thread *td; 611 register struct freebsd4_getfsstat_args /* { 612 struct ostatfs *buf; 613 long bufsize; 614 int flags; 615 } */ *uap; 616 { 617 struct statfs *buf, *sp; 618 struct ostatfs osb; 619 size_t count, size; 620 int error; 621 622 if (uap->bufsize < 0) 623 return (EINVAL); 624 count = uap->bufsize / sizeof(struct ostatfs); 625 if (count > SIZE_MAX / sizeof(struct statfs)) 626 return (EINVAL); 627 size = count * sizeof(struct statfs); 628 error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, 629 uap->flags); 630 td->td_retval[0] = count; 631 if (size != 0) { 632 sp = buf; 633 while (count != 0 && error == 0) { 634 cvtstatfs(sp, &osb); 635 error = copyout(&osb, uap->buf, sizeof(osb)); 636 sp++; 637 uap->buf++; 638 count--; 639 } 640 free(buf, M_TEMP); 641 } 642 return (error); 643 } 644 645 /* 646 * Implement fstatfs() for (NFS) file handles. 647 */ 648 #ifndef _SYS_SYSPROTO_H_ 649 struct freebsd4_fhstatfs_args { 650 struct fhandle *u_fhp; 651 struct ostatfs *buf; 652 }; 653 #endif 654 int 655 freebsd4_fhstatfs(td, uap) 656 struct thread *td; 657 struct freebsd4_fhstatfs_args /* { 658 struct fhandle *u_fhp; 659 struct ostatfs *buf; 660 } */ *uap; 661 { 662 struct ostatfs osb; 663 struct statfs sf; 664 fhandle_t fh; 665 int error; 666 667 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 668 if (error != 0) 669 return (error); 670 error = kern_fhstatfs(td, fh, &sf); 671 if (error != 0) 672 return (error); 673 cvtstatfs(&sf, &osb); 674 return (copyout(&osb, uap->buf, sizeof(osb))); 675 } 676 677 /* 678 * Convert a new format statfs structure to an old format statfs structure. 679 */ 680 static void 681 cvtstatfs(nsp, osp) 682 struct statfs *nsp; 683 struct ostatfs *osp; 684 { 685 686 statfs_scale_blocks(nsp, LONG_MAX); 687 bzero(osp, sizeof(*osp)); 688 osp->f_bsize = nsp->f_bsize; 689 osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX); 690 osp->f_blocks = nsp->f_blocks; 691 osp->f_bfree = nsp->f_bfree; 692 osp->f_bavail = nsp->f_bavail; 693 osp->f_files = MIN(nsp->f_files, LONG_MAX); 694 osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX); 695 osp->f_owner = nsp->f_owner; 696 osp->f_type = nsp->f_type; 697 osp->f_flags = nsp->f_flags; 698 osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX); 699 osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX); 700 osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX); 701 osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX); 702 strlcpy(osp->f_fstypename, nsp->f_fstypename, 703 MIN(MFSNAMELEN, OMFSNAMELEN)); 704 strlcpy(osp->f_mntonname, nsp->f_mntonname, 705 MIN(MNAMELEN, OMNAMELEN)); 706 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 707 MIN(MNAMELEN, OMNAMELEN)); 708 osp->f_fsid = nsp->f_fsid; 709 } 710 #endif /* COMPAT_FREEBSD4 */ 711 712 /* 713 * Change current working directory to a given file descriptor. 714 */ 715 #ifndef _SYS_SYSPROTO_H_ 716 struct fchdir_args { 717 int fd; 718 }; 719 #endif 720 int 721 sys_fchdir(td, uap) 722 struct thread *td; 723 struct fchdir_args /* { 724 int fd; 725 } */ *uap; 726 { 727 struct vnode *vp, *tdp; 728 struct mount *mp; 729 struct file *fp; 730 cap_rights_t rights; 731 int error; 732 733 AUDIT_ARG_FD(uap->fd); 734 error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_FCHDIR), 735 &fp); 736 if (error != 0) 737 return (error); 738 vp = fp->f_vnode; 739 VREF(vp); 740 fdrop(fp, td); 741 vn_lock(vp, LK_SHARED | LK_RETRY); 742 AUDIT_ARG_VNODE1(vp); 743 error = change_dir(vp, td); 744 while (!error && (mp = vp->v_mountedhere) != NULL) { 745 if (vfs_busy(mp, 0)) 746 continue; 747 error = VFS_ROOT(mp, LK_SHARED, &tdp); 748 vfs_unbusy(mp); 749 if (error != 0) 750 break; 751 vput(vp); 752 vp = tdp; 753 } 754 if (error != 0) { 755 vput(vp); 756 return (error); 757 } 758 VOP_UNLOCK(vp, 0); 759 pwd_chdir(td, vp); 760 return (0); 761 } 762 763 /* 764 * Change current working directory (``.''). 765 */ 766 #ifndef _SYS_SYSPROTO_H_ 767 struct chdir_args { 768 char *path; 769 }; 770 #endif 771 int 772 sys_chdir(td, uap) 773 struct thread *td; 774 struct chdir_args /* { 775 char *path; 776 } */ *uap; 777 { 778 779 return (kern_chdir(td, uap->path, UIO_USERSPACE)); 780 } 781 782 int 783 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg) 784 { 785 struct nameidata nd; 786 int error; 787 788 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 789 pathseg, path, td); 790 if ((error = namei(&nd)) != 0) 791 return (error); 792 if ((error = change_dir(nd.ni_vp, td)) != 0) { 793 vput(nd.ni_vp); 794 NDFREE(&nd, NDF_ONLY_PNBUF); 795 return (error); 796 } 797 VOP_UNLOCK(nd.ni_vp, 0); 798 NDFREE(&nd, NDF_ONLY_PNBUF); 799 pwd_chdir(td, nd.ni_vp); 800 return (0); 801 } 802 803 /* 804 * Change notion of root (``/'') directory. 805 */ 806 #ifndef _SYS_SYSPROTO_H_ 807 struct chroot_args { 808 char *path; 809 }; 810 #endif 811 int 812 sys_chroot(td, uap) 813 struct thread *td; 814 struct chroot_args /* { 815 char *path; 816 } */ *uap; 817 { 818 struct nameidata nd; 819 int error; 820 821 error = priv_check(td, PRIV_VFS_CHROOT); 822 if (error != 0) 823 return (error); 824 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 825 UIO_USERSPACE, uap->path, td); 826 error = namei(&nd); 827 if (error != 0) 828 goto error; 829 error = change_dir(nd.ni_vp, td); 830 if (error != 0) 831 goto e_vunlock; 832 #ifdef MAC 833 error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp); 834 if (error != 0) 835 goto e_vunlock; 836 #endif 837 VOP_UNLOCK(nd.ni_vp, 0); 838 error = pwd_chroot(td, nd.ni_vp); 839 vrele(nd.ni_vp); 840 NDFREE(&nd, NDF_ONLY_PNBUF); 841 return (error); 842 e_vunlock: 843 vput(nd.ni_vp); 844 error: 845 NDFREE(&nd, NDF_ONLY_PNBUF); 846 return (error); 847 } 848 849 /* 850 * Common routine for chroot and chdir. Callers must provide a locked vnode 851 * instance. 852 */ 853 int 854 change_dir(vp, td) 855 struct vnode *vp; 856 struct thread *td; 857 { 858 #ifdef MAC 859 int error; 860 #endif 861 862 ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked"); 863 if (vp->v_type != VDIR) 864 return (ENOTDIR); 865 #ifdef MAC 866 error = mac_vnode_check_chdir(td->td_ucred, vp); 867 if (error != 0) 868 return (error); 869 #endif 870 return (VOP_ACCESS(vp, VEXEC, td->td_ucred, td)); 871 } 872 873 static __inline void 874 flags_to_rights(int flags, cap_rights_t *rightsp) 875 { 876 877 if (flags & O_EXEC) { 878 cap_rights_set(rightsp, CAP_FEXECVE); 879 } else { 880 switch ((flags & O_ACCMODE)) { 881 case O_RDONLY: 882 cap_rights_set(rightsp, CAP_READ); 883 break; 884 case O_RDWR: 885 cap_rights_set(rightsp, CAP_READ); 886 /* FALLTHROUGH */ 887 case O_WRONLY: 888 cap_rights_set(rightsp, CAP_WRITE); 889 if (!(flags & (O_APPEND | O_TRUNC))) 890 cap_rights_set(rightsp, CAP_SEEK); 891 break; 892 } 893 } 894 895 if (flags & O_CREAT) 896 cap_rights_set(rightsp, CAP_CREATE); 897 898 if (flags & O_TRUNC) 899 cap_rights_set(rightsp, CAP_FTRUNCATE); 900 901 if (flags & (O_SYNC | O_FSYNC)) 902 cap_rights_set(rightsp, CAP_FSYNC); 903 904 if (flags & (O_EXLOCK | O_SHLOCK)) 905 cap_rights_set(rightsp, CAP_FLOCK); 906 } 907 908 /* 909 * Check permissions, allocate an open file structure, and call the device 910 * open routine if any. 911 */ 912 #ifndef _SYS_SYSPROTO_H_ 913 struct open_args { 914 char *path; 915 int flags; 916 int mode; 917 }; 918 #endif 919 int 920 sys_open(td, uap) 921 struct thread *td; 922 register struct open_args /* { 923 char *path; 924 int flags; 925 int mode; 926 } */ *uap; 927 { 928 929 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 930 uap->flags, uap->mode)); 931 } 932 933 #ifndef _SYS_SYSPROTO_H_ 934 struct openat_args { 935 int fd; 936 char *path; 937 int flag; 938 int mode; 939 }; 940 #endif 941 int 942 sys_openat(struct thread *td, struct openat_args *uap) 943 { 944 945 return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 946 uap->mode)); 947 } 948 949 int 950 kern_openat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 951 int flags, int mode) 952 { 953 struct proc *p = td->td_proc; 954 struct filedesc *fdp = p->p_fd; 955 struct file *fp; 956 struct vnode *vp; 957 struct nameidata nd; 958 cap_rights_t rights; 959 int cmode, error, indx; 960 961 indx = -1; 962 963 AUDIT_ARG_FFLAGS(flags); 964 AUDIT_ARG_MODE(mode); 965 /* XXX: audit dirfd */ 966 cap_rights_init(&rights, CAP_LOOKUP); 967 flags_to_rights(flags, &rights); 968 /* 969 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags 970 * may be specified. 971 */ 972 if (flags & O_EXEC) { 973 if (flags & O_ACCMODE) 974 return (EINVAL); 975 } else if ((flags & O_ACCMODE) == O_ACCMODE) { 976 return (EINVAL); 977 } else { 978 flags = FFLAGS(flags); 979 } 980 981 /* 982 * Allocate a file structure. The descriptor to reference it 983 * is allocated and set by finstall() below. 984 */ 985 error = falloc_noinstall(td, &fp); 986 if (error != 0) 987 return (error); 988 /* 989 * An extra reference on `fp' has been held for us by 990 * falloc_noinstall(). 991 */ 992 /* Set the flags early so the finit in devfs can pick them up. */ 993 fp->f_flag = flags & FMASK; 994 cmode = ((mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; 995 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 996 &rights, td); 997 td->td_dupfd = -1; /* XXX check for fdopen */ 998 error = vn_open(&nd, &flags, cmode, fp); 999 if (error != 0) { 1000 /* 1001 * If the vn_open replaced the method vector, something 1002 * wonderous happened deep below and we just pass it up 1003 * pretending we know what we do. 1004 */ 1005 if (error == ENXIO && fp->f_ops != &badfileops) 1006 goto success; 1007 1008 /* 1009 * Handle special fdopen() case. bleh. 1010 * 1011 * Don't do this for relative (capability) lookups; we don't 1012 * understand exactly what would happen, and we don't think 1013 * that it ever should. 1014 */ 1015 if (nd.ni_strictrelative == 0 && 1016 (error == ENODEV || error == ENXIO) && 1017 td->td_dupfd >= 0) { 1018 error = dupfdopen(td, fdp, td->td_dupfd, flags, error, 1019 &indx); 1020 if (error == 0) 1021 goto success; 1022 } 1023 1024 goto bad; 1025 } 1026 td->td_dupfd = 0; 1027 NDFREE(&nd, NDF_ONLY_PNBUF); 1028 vp = nd.ni_vp; 1029 1030 /* 1031 * Store the vnode, for any f_type. Typically, the vnode use 1032 * count is decremented by direct call to vn_closefile() for 1033 * files that switched type in the cdevsw fdopen() method. 1034 */ 1035 fp->f_vnode = vp; 1036 /* 1037 * If the file wasn't claimed by devfs bind it to the normal 1038 * vnode operations here. 1039 */ 1040 if (fp->f_ops == &badfileops) { 1041 KASSERT(vp->v_type != VFIFO, ("Unexpected fifo.")); 1042 fp->f_seqcount = 1; 1043 finit(fp, (flags & FMASK) | (fp->f_flag & FHASLOCK), 1044 DTYPE_VNODE, vp, &vnops); 1045 } 1046 1047 VOP_UNLOCK(vp, 0); 1048 if (flags & O_TRUNC) { 1049 error = fo_truncate(fp, 0, td->td_ucred, td); 1050 if (error != 0) 1051 goto bad; 1052 } 1053 success: 1054 /* 1055 * If we haven't already installed the FD (for dupfdopen), do so now. 1056 */ 1057 if (indx == -1) { 1058 struct filecaps *fcaps; 1059 1060 #ifdef CAPABILITIES 1061 if (nd.ni_strictrelative == 1) 1062 fcaps = &nd.ni_filecaps; 1063 else 1064 #endif 1065 fcaps = NULL; 1066 error = finstall(td, fp, &indx, flags, fcaps); 1067 /* On success finstall() consumes fcaps. */ 1068 if (error != 0) { 1069 filecaps_free(&nd.ni_filecaps); 1070 goto bad; 1071 } 1072 } else { 1073 filecaps_free(&nd.ni_filecaps); 1074 } 1075 1076 /* 1077 * Release our private reference, leaving the one associated with 1078 * the descriptor table intact. 1079 */ 1080 fdrop(fp, td); 1081 td->td_retval[0] = indx; 1082 return (0); 1083 bad: 1084 KASSERT(indx == -1, ("indx=%d, should be -1", indx)); 1085 fdrop(fp, td); 1086 return (error); 1087 } 1088 1089 #ifdef COMPAT_43 1090 /* 1091 * Create a file. 1092 */ 1093 #ifndef _SYS_SYSPROTO_H_ 1094 struct ocreat_args { 1095 char *path; 1096 int mode; 1097 }; 1098 #endif 1099 int 1100 ocreat(td, uap) 1101 struct thread *td; 1102 register struct ocreat_args /* { 1103 char *path; 1104 int mode; 1105 } */ *uap; 1106 { 1107 1108 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1109 O_WRONLY | O_CREAT | O_TRUNC, uap->mode)); 1110 } 1111 #endif /* COMPAT_43 */ 1112 1113 /* 1114 * Create a special file. 1115 */ 1116 #ifndef _SYS_SYSPROTO_H_ 1117 struct mknod_args { 1118 char *path; 1119 int mode; 1120 int dev; 1121 }; 1122 #endif 1123 int 1124 sys_mknod(td, uap) 1125 struct thread *td; 1126 register struct mknod_args /* { 1127 char *path; 1128 int mode; 1129 int dev; 1130 } */ *uap; 1131 { 1132 1133 return (kern_mknodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1134 uap->mode, uap->dev)); 1135 } 1136 1137 #ifndef _SYS_SYSPROTO_H_ 1138 struct mknodat_args { 1139 int fd; 1140 char *path; 1141 mode_t mode; 1142 dev_t dev; 1143 }; 1144 #endif 1145 int 1146 sys_mknodat(struct thread *td, struct mknodat_args *uap) 1147 { 1148 1149 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1150 uap->dev)); 1151 } 1152 1153 int 1154 kern_mknodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1155 int mode, int dev) 1156 { 1157 struct vnode *vp; 1158 struct mount *mp; 1159 struct vattr vattr; 1160 struct nameidata nd; 1161 cap_rights_t rights; 1162 int error, whiteout = 0; 1163 1164 AUDIT_ARG_MODE(mode); 1165 AUDIT_ARG_DEV(dev); 1166 switch (mode & S_IFMT) { 1167 case S_IFCHR: 1168 case S_IFBLK: 1169 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 1170 break; 1171 case S_IFMT: 1172 error = priv_check(td, PRIV_VFS_MKNOD_BAD); 1173 break; 1174 case S_IFWHT: 1175 error = priv_check(td, PRIV_VFS_MKNOD_WHT); 1176 break; 1177 case S_IFIFO: 1178 if (dev == 0) 1179 return (kern_mkfifoat(td, fd, path, pathseg, mode)); 1180 /* FALLTHROUGH */ 1181 default: 1182 error = EINVAL; 1183 break; 1184 } 1185 if (error != 0) 1186 return (error); 1187 restart: 1188 bwillwrite(); 1189 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1190 NOCACHE, pathseg, path, fd, cap_rights_init(&rights, CAP_MKNODAT), 1191 td); 1192 if ((error = namei(&nd)) != 0) 1193 return (error); 1194 vp = nd.ni_vp; 1195 if (vp != NULL) { 1196 NDFREE(&nd, NDF_ONLY_PNBUF); 1197 if (vp == nd.ni_dvp) 1198 vrele(nd.ni_dvp); 1199 else 1200 vput(nd.ni_dvp); 1201 vrele(vp); 1202 return (EEXIST); 1203 } else { 1204 VATTR_NULL(&vattr); 1205 vattr.va_mode = (mode & ALLPERMS) & 1206 ~td->td_proc->p_fd->fd_cmask; 1207 vattr.va_rdev = dev; 1208 whiteout = 0; 1209 1210 switch (mode & S_IFMT) { 1211 case S_IFMT: /* used by badsect to flag bad sectors */ 1212 vattr.va_type = VBAD; 1213 break; 1214 case S_IFCHR: 1215 vattr.va_type = VCHR; 1216 break; 1217 case S_IFBLK: 1218 vattr.va_type = VBLK; 1219 break; 1220 case S_IFWHT: 1221 whiteout = 1; 1222 break; 1223 default: 1224 panic("kern_mknod: invalid mode"); 1225 } 1226 } 1227 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1228 NDFREE(&nd, NDF_ONLY_PNBUF); 1229 vput(nd.ni_dvp); 1230 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1231 return (error); 1232 goto restart; 1233 } 1234 #ifdef MAC 1235 if (error == 0 && !whiteout) 1236 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, 1237 &nd.ni_cnd, &vattr); 1238 #endif 1239 if (error == 0) { 1240 if (whiteout) 1241 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1242 else { 1243 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1244 &nd.ni_cnd, &vattr); 1245 if (error == 0) 1246 vput(nd.ni_vp); 1247 } 1248 } 1249 NDFREE(&nd, NDF_ONLY_PNBUF); 1250 vput(nd.ni_dvp); 1251 vn_finished_write(mp); 1252 return (error); 1253 } 1254 1255 /* 1256 * Create a named pipe. 1257 */ 1258 #ifndef _SYS_SYSPROTO_H_ 1259 struct mkfifo_args { 1260 char *path; 1261 int mode; 1262 }; 1263 #endif 1264 int 1265 sys_mkfifo(td, uap) 1266 struct thread *td; 1267 register struct mkfifo_args /* { 1268 char *path; 1269 int mode; 1270 } */ *uap; 1271 { 1272 1273 return (kern_mkfifoat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1274 uap->mode)); 1275 } 1276 1277 #ifndef _SYS_SYSPROTO_H_ 1278 struct mkfifoat_args { 1279 int fd; 1280 char *path; 1281 mode_t mode; 1282 }; 1283 #endif 1284 int 1285 sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap) 1286 { 1287 1288 return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE, 1289 uap->mode)); 1290 } 1291 1292 int 1293 kern_mkfifoat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1294 int mode) 1295 { 1296 struct mount *mp; 1297 struct vattr vattr; 1298 struct nameidata nd; 1299 cap_rights_t rights; 1300 int error; 1301 1302 AUDIT_ARG_MODE(mode); 1303 restart: 1304 bwillwrite(); 1305 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1306 NOCACHE, pathseg, path, fd, cap_rights_init(&rights, CAP_MKFIFOAT), 1307 td); 1308 if ((error = namei(&nd)) != 0) 1309 return (error); 1310 if (nd.ni_vp != NULL) { 1311 NDFREE(&nd, NDF_ONLY_PNBUF); 1312 if (nd.ni_vp == nd.ni_dvp) 1313 vrele(nd.ni_dvp); 1314 else 1315 vput(nd.ni_dvp); 1316 vrele(nd.ni_vp); 1317 return (EEXIST); 1318 } 1319 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1320 NDFREE(&nd, NDF_ONLY_PNBUF); 1321 vput(nd.ni_dvp); 1322 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1323 return (error); 1324 goto restart; 1325 } 1326 VATTR_NULL(&vattr); 1327 vattr.va_type = VFIFO; 1328 vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask; 1329 #ifdef MAC 1330 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1331 &vattr); 1332 if (error != 0) 1333 goto out; 1334 #endif 1335 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 1336 if (error == 0) 1337 vput(nd.ni_vp); 1338 #ifdef MAC 1339 out: 1340 #endif 1341 vput(nd.ni_dvp); 1342 vn_finished_write(mp); 1343 NDFREE(&nd, NDF_ONLY_PNBUF); 1344 return (error); 1345 } 1346 1347 /* 1348 * Make a hard file link. 1349 */ 1350 #ifndef _SYS_SYSPROTO_H_ 1351 struct link_args { 1352 char *path; 1353 char *link; 1354 }; 1355 #endif 1356 int 1357 sys_link(td, uap) 1358 struct thread *td; 1359 register struct link_args /* { 1360 char *path; 1361 char *link; 1362 } */ *uap; 1363 { 1364 1365 return (kern_linkat(td, AT_FDCWD, AT_FDCWD, uap->path, uap->link, 1366 UIO_USERSPACE, FOLLOW)); 1367 } 1368 1369 #ifndef _SYS_SYSPROTO_H_ 1370 struct linkat_args { 1371 int fd1; 1372 char *path1; 1373 int fd2; 1374 char *path2; 1375 int flag; 1376 }; 1377 #endif 1378 int 1379 sys_linkat(struct thread *td, struct linkat_args *uap) 1380 { 1381 int flag; 1382 1383 flag = uap->flag; 1384 if (flag & ~AT_SYMLINK_FOLLOW) 1385 return (EINVAL); 1386 1387 return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2, 1388 UIO_USERSPACE, (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW)); 1389 } 1390 1391 int hardlink_check_uid = 0; 1392 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 1393 &hardlink_check_uid, 0, 1394 "Unprivileged processes cannot create hard links to files owned by other " 1395 "users"); 1396 static int hardlink_check_gid = 0; 1397 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 1398 &hardlink_check_gid, 0, 1399 "Unprivileged processes cannot create hard links to files owned by other " 1400 "groups"); 1401 1402 static int 1403 can_hardlink(struct vnode *vp, struct ucred *cred) 1404 { 1405 struct vattr va; 1406 int error; 1407 1408 if (!hardlink_check_uid && !hardlink_check_gid) 1409 return (0); 1410 1411 error = VOP_GETATTR(vp, &va, cred); 1412 if (error != 0) 1413 return (error); 1414 1415 if (hardlink_check_uid && cred->cr_uid != va.va_uid) { 1416 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1417 if (error != 0) 1418 return (error); 1419 } 1420 1421 if (hardlink_check_gid && !groupmember(va.va_gid, cred)) { 1422 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1423 if (error != 0) 1424 return (error); 1425 } 1426 1427 return (0); 1428 } 1429 1430 int 1431 kern_linkat(struct thread *td, int fd1, int fd2, char *path1, char *path2, 1432 enum uio_seg segflg, int follow) 1433 { 1434 struct vnode *vp; 1435 struct mount *mp; 1436 struct nameidata nd; 1437 cap_rights_t rights; 1438 int error; 1439 1440 again: 1441 bwillwrite(); 1442 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, segflg, path1, fd1, 1443 cap_rights_init(&rights, CAP_LINKAT_SOURCE), td); 1444 1445 if ((error = namei(&nd)) != 0) 1446 return (error); 1447 NDFREE(&nd, NDF_ONLY_PNBUF); 1448 vp = nd.ni_vp; 1449 if (vp->v_type == VDIR) { 1450 vrele(vp); 1451 return (EPERM); /* POSIX */ 1452 } 1453 NDINIT_ATRIGHTS(&nd, CREATE, 1454 LOCKPARENT | SAVENAME | AUDITVNODE2 | NOCACHE, segflg, path2, fd2, 1455 cap_rights_init(&rights, CAP_LINKAT_TARGET), td); 1456 if ((error = namei(&nd)) == 0) { 1457 if (nd.ni_vp != NULL) { 1458 NDFREE(&nd, NDF_ONLY_PNBUF); 1459 if (nd.ni_dvp == nd.ni_vp) 1460 vrele(nd.ni_dvp); 1461 else 1462 vput(nd.ni_dvp); 1463 vrele(nd.ni_vp); 1464 vrele(vp); 1465 return (EEXIST); 1466 } else if (nd.ni_dvp->v_mount != vp->v_mount) { 1467 /* 1468 * Cross-device link. No need to recheck 1469 * vp->v_type, since it cannot change, except 1470 * to VBAD. 1471 */ 1472 NDFREE(&nd, NDF_ONLY_PNBUF); 1473 vput(nd.ni_dvp); 1474 vrele(vp); 1475 return (EXDEV); 1476 } else if ((error = vn_lock(vp, LK_EXCLUSIVE)) == 0) { 1477 error = can_hardlink(vp, td->td_ucred); 1478 #ifdef MAC 1479 if (error == 0) 1480 error = mac_vnode_check_link(td->td_ucred, 1481 nd.ni_dvp, vp, &nd.ni_cnd); 1482 #endif 1483 if (error != 0) { 1484 vput(vp); 1485 vput(nd.ni_dvp); 1486 NDFREE(&nd, NDF_ONLY_PNBUF); 1487 return (error); 1488 } 1489 error = vn_start_write(vp, &mp, V_NOWAIT); 1490 if (error != 0) { 1491 vput(vp); 1492 vput(nd.ni_dvp); 1493 NDFREE(&nd, NDF_ONLY_PNBUF); 1494 error = vn_start_write(NULL, &mp, 1495 V_XSLEEP | PCATCH); 1496 if (error != 0) 1497 return (error); 1498 goto again; 1499 } 1500 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 1501 VOP_UNLOCK(vp, 0); 1502 vput(nd.ni_dvp); 1503 vn_finished_write(mp); 1504 NDFREE(&nd, NDF_ONLY_PNBUF); 1505 } else { 1506 vput(nd.ni_dvp); 1507 NDFREE(&nd, NDF_ONLY_PNBUF); 1508 vrele(vp); 1509 goto again; 1510 } 1511 } 1512 vrele(vp); 1513 return (error); 1514 } 1515 1516 /* 1517 * Make a symbolic link. 1518 */ 1519 #ifndef _SYS_SYSPROTO_H_ 1520 struct symlink_args { 1521 char *path; 1522 char *link; 1523 }; 1524 #endif 1525 int 1526 sys_symlink(td, uap) 1527 struct thread *td; 1528 register struct symlink_args /* { 1529 char *path; 1530 char *link; 1531 } */ *uap; 1532 { 1533 1534 return (kern_symlinkat(td, uap->path, AT_FDCWD, uap->link, 1535 UIO_USERSPACE)); 1536 } 1537 1538 #ifndef _SYS_SYSPROTO_H_ 1539 struct symlinkat_args { 1540 char *path; 1541 int fd; 1542 char *path2; 1543 }; 1544 #endif 1545 int 1546 sys_symlinkat(struct thread *td, struct symlinkat_args *uap) 1547 { 1548 1549 return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2, 1550 UIO_USERSPACE)); 1551 } 1552 1553 int 1554 kern_symlinkat(struct thread *td, char *path1, int fd, char *path2, 1555 enum uio_seg segflg) 1556 { 1557 struct mount *mp; 1558 struct vattr vattr; 1559 char *syspath; 1560 struct nameidata nd; 1561 int error; 1562 cap_rights_t rights; 1563 1564 if (segflg == UIO_SYSSPACE) { 1565 syspath = path1; 1566 } else { 1567 syspath = uma_zalloc(namei_zone, M_WAITOK); 1568 if ((error = copyinstr(path1, syspath, MAXPATHLEN, NULL)) != 0) 1569 goto out; 1570 } 1571 AUDIT_ARG_TEXT(syspath); 1572 restart: 1573 bwillwrite(); 1574 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1575 NOCACHE, segflg, path2, fd, cap_rights_init(&rights, CAP_SYMLINKAT), 1576 td); 1577 if ((error = namei(&nd)) != 0) 1578 goto out; 1579 if (nd.ni_vp) { 1580 NDFREE(&nd, NDF_ONLY_PNBUF); 1581 if (nd.ni_vp == nd.ni_dvp) 1582 vrele(nd.ni_dvp); 1583 else 1584 vput(nd.ni_dvp); 1585 vrele(nd.ni_vp); 1586 error = EEXIST; 1587 goto out; 1588 } 1589 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1590 NDFREE(&nd, NDF_ONLY_PNBUF); 1591 vput(nd.ni_dvp); 1592 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1593 goto out; 1594 goto restart; 1595 } 1596 VATTR_NULL(&vattr); 1597 vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask; 1598 #ifdef MAC 1599 vattr.va_type = VLNK; 1600 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1601 &vattr); 1602 if (error != 0) 1603 goto out2; 1604 #endif 1605 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath); 1606 if (error == 0) 1607 vput(nd.ni_vp); 1608 #ifdef MAC 1609 out2: 1610 #endif 1611 NDFREE(&nd, NDF_ONLY_PNBUF); 1612 vput(nd.ni_dvp); 1613 vn_finished_write(mp); 1614 out: 1615 if (segflg != UIO_SYSSPACE) 1616 uma_zfree(namei_zone, syspath); 1617 return (error); 1618 } 1619 1620 /* 1621 * Delete a whiteout from the filesystem. 1622 */ 1623 int 1624 sys_undelete(td, uap) 1625 struct thread *td; 1626 register struct undelete_args /* { 1627 char *path; 1628 } */ *uap; 1629 { 1630 struct mount *mp; 1631 struct nameidata nd; 1632 int error; 1633 1634 restart: 1635 bwillwrite(); 1636 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | AUDITVNODE1, 1637 UIO_USERSPACE, uap->path, td); 1638 error = namei(&nd); 1639 if (error != 0) 1640 return (error); 1641 1642 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 1643 NDFREE(&nd, NDF_ONLY_PNBUF); 1644 if (nd.ni_vp == nd.ni_dvp) 1645 vrele(nd.ni_dvp); 1646 else 1647 vput(nd.ni_dvp); 1648 if (nd.ni_vp) 1649 vrele(nd.ni_vp); 1650 return (EEXIST); 1651 } 1652 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1653 NDFREE(&nd, NDF_ONLY_PNBUF); 1654 vput(nd.ni_dvp); 1655 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1656 return (error); 1657 goto restart; 1658 } 1659 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE); 1660 NDFREE(&nd, NDF_ONLY_PNBUF); 1661 vput(nd.ni_dvp); 1662 vn_finished_write(mp); 1663 return (error); 1664 } 1665 1666 /* 1667 * Delete a name from the filesystem. 1668 */ 1669 #ifndef _SYS_SYSPROTO_H_ 1670 struct unlink_args { 1671 char *path; 1672 }; 1673 #endif 1674 int 1675 sys_unlink(td, uap) 1676 struct thread *td; 1677 struct unlink_args /* { 1678 char *path; 1679 } */ *uap; 1680 { 1681 1682 return (kern_unlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 0)); 1683 } 1684 1685 #ifndef _SYS_SYSPROTO_H_ 1686 struct unlinkat_args { 1687 int fd; 1688 char *path; 1689 int flag; 1690 }; 1691 #endif 1692 int 1693 sys_unlinkat(struct thread *td, struct unlinkat_args *uap) 1694 { 1695 int flag = uap->flag; 1696 int fd = uap->fd; 1697 char *path = uap->path; 1698 1699 if (flag & ~AT_REMOVEDIR) 1700 return (EINVAL); 1701 1702 if (flag & AT_REMOVEDIR) 1703 return (kern_rmdirat(td, fd, path, UIO_USERSPACE)); 1704 else 1705 return (kern_unlinkat(td, fd, path, UIO_USERSPACE, 0)); 1706 } 1707 1708 int 1709 kern_unlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1710 ino_t oldinum) 1711 { 1712 struct mount *mp; 1713 struct vnode *vp; 1714 struct nameidata nd; 1715 struct stat sb; 1716 cap_rights_t rights; 1717 int error; 1718 1719 restart: 1720 bwillwrite(); 1721 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 1722 pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td); 1723 if ((error = namei(&nd)) != 0) 1724 return (error == EINVAL ? EPERM : error); 1725 vp = nd.ni_vp; 1726 if (vp->v_type == VDIR && oldinum == 0) { 1727 error = EPERM; /* POSIX */ 1728 } else if (oldinum != 0 && 1729 ((error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td)) == 0) && 1730 sb.st_ino != oldinum) { 1731 error = EIDRM; /* Identifier removed */ 1732 } else { 1733 /* 1734 * The root of a mounted filesystem cannot be deleted. 1735 * 1736 * XXX: can this only be a VDIR case? 1737 */ 1738 if (vp->v_vflag & VV_ROOT) 1739 error = EBUSY; 1740 } 1741 if (error == 0) { 1742 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1743 NDFREE(&nd, NDF_ONLY_PNBUF); 1744 vput(nd.ni_dvp); 1745 if (vp == nd.ni_dvp) 1746 vrele(vp); 1747 else 1748 vput(vp); 1749 if ((error = vn_start_write(NULL, &mp, 1750 V_XSLEEP | PCATCH)) != 0) 1751 return (error); 1752 goto restart; 1753 } 1754 #ifdef MAC 1755 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 1756 &nd.ni_cnd); 1757 if (error != 0) 1758 goto out; 1759 #endif 1760 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 1761 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 1762 #ifdef MAC 1763 out: 1764 #endif 1765 vn_finished_write(mp); 1766 } 1767 NDFREE(&nd, NDF_ONLY_PNBUF); 1768 vput(nd.ni_dvp); 1769 if (vp == nd.ni_dvp) 1770 vrele(vp); 1771 else 1772 vput(vp); 1773 return (error); 1774 } 1775 1776 /* 1777 * Reposition read/write file offset. 1778 */ 1779 #ifndef _SYS_SYSPROTO_H_ 1780 struct lseek_args { 1781 int fd; 1782 int pad; 1783 off_t offset; 1784 int whence; 1785 }; 1786 #endif 1787 int 1788 sys_lseek(td, uap) 1789 struct thread *td; 1790 register struct lseek_args /* { 1791 int fd; 1792 int pad; 1793 off_t offset; 1794 int whence; 1795 } */ *uap; 1796 { 1797 struct file *fp; 1798 cap_rights_t rights; 1799 int error; 1800 1801 AUDIT_ARG_FD(uap->fd); 1802 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_SEEK), &fp); 1803 if (error != 0) 1804 return (error); 1805 error = (fp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0 ? 1806 fo_seek(fp, uap->offset, uap->whence, td) : ESPIPE; 1807 fdrop(fp, td); 1808 return (error); 1809 } 1810 1811 #if defined(COMPAT_43) 1812 /* 1813 * Reposition read/write file offset. 1814 */ 1815 #ifndef _SYS_SYSPROTO_H_ 1816 struct olseek_args { 1817 int fd; 1818 long offset; 1819 int whence; 1820 }; 1821 #endif 1822 int 1823 olseek(td, uap) 1824 struct thread *td; 1825 register struct olseek_args /* { 1826 int fd; 1827 long offset; 1828 int whence; 1829 } */ *uap; 1830 { 1831 struct lseek_args /* { 1832 int fd; 1833 int pad; 1834 off_t offset; 1835 int whence; 1836 } */ nuap; 1837 1838 nuap.fd = uap->fd; 1839 nuap.offset = uap->offset; 1840 nuap.whence = uap->whence; 1841 return (sys_lseek(td, &nuap)); 1842 } 1843 #endif /* COMPAT_43 */ 1844 1845 #if defined(COMPAT_FREEBSD6) 1846 /* Version with the 'pad' argument */ 1847 int 1848 freebsd6_lseek(td, uap) 1849 struct thread *td; 1850 register struct freebsd6_lseek_args *uap; 1851 { 1852 struct lseek_args ouap; 1853 1854 ouap.fd = uap->fd; 1855 ouap.offset = uap->offset; 1856 ouap.whence = uap->whence; 1857 return (sys_lseek(td, &ouap)); 1858 } 1859 #endif 1860 1861 /* 1862 * Check access permissions using passed credentials. 1863 */ 1864 static int 1865 vn_access(vp, user_flags, cred, td) 1866 struct vnode *vp; 1867 int user_flags; 1868 struct ucred *cred; 1869 struct thread *td; 1870 { 1871 accmode_t accmode; 1872 int error; 1873 1874 /* Flags == 0 means only check for existence. */ 1875 if (user_flags == 0) 1876 return (0); 1877 1878 accmode = 0; 1879 if (user_flags & R_OK) 1880 accmode |= VREAD; 1881 if (user_flags & W_OK) 1882 accmode |= VWRITE; 1883 if (user_flags & X_OK) 1884 accmode |= VEXEC; 1885 #ifdef MAC 1886 error = mac_vnode_check_access(cred, vp, accmode); 1887 if (error != 0) 1888 return (error); 1889 #endif 1890 if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) 1891 error = VOP_ACCESS(vp, accmode, cred, td); 1892 return (error); 1893 } 1894 1895 /* 1896 * Check access permissions using "real" credentials. 1897 */ 1898 #ifndef _SYS_SYSPROTO_H_ 1899 struct access_args { 1900 char *path; 1901 int amode; 1902 }; 1903 #endif 1904 int 1905 sys_access(td, uap) 1906 struct thread *td; 1907 register struct access_args /* { 1908 char *path; 1909 int amode; 1910 } */ *uap; 1911 { 1912 1913 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1914 0, uap->amode)); 1915 } 1916 1917 #ifndef _SYS_SYSPROTO_H_ 1918 struct faccessat_args { 1919 int dirfd; 1920 char *path; 1921 int amode; 1922 int flag; 1923 } 1924 #endif 1925 int 1926 sys_faccessat(struct thread *td, struct faccessat_args *uap) 1927 { 1928 1929 return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 1930 uap->amode)); 1931 } 1932 1933 int 1934 kern_accessat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1935 int flag, int amode) 1936 { 1937 struct ucred *cred, *usecred; 1938 struct vnode *vp; 1939 struct nameidata nd; 1940 cap_rights_t rights; 1941 int error; 1942 1943 if (flag & ~AT_EACCESS) 1944 return (EINVAL); 1945 if (amode != F_OK && (amode & ~(R_OK | W_OK | X_OK)) != 0) 1946 return (EINVAL); 1947 1948 /* 1949 * Create and modify a temporary credential instead of one that 1950 * is potentially shared (if we need one). 1951 */ 1952 cred = td->td_ucred; 1953 if ((flag & AT_EACCESS) == 0 && 1954 ((cred->cr_uid != cred->cr_ruid || 1955 cred->cr_rgid != cred->cr_groups[0]))) { 1956 usecred = crdup(cred); 1957 usecred->cr_uid = cred->cr_ruid; 1958 usecred->cr_groups[0] = cred->cr_rgid; 1959 td->td_ucred = usecred; 1960 } else 1961 usecred = cred; 1962 AUDIT_ARG_VALUE(amode); 1963 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | 1964 AUDITVNODE1, pathseg, path, fd, cap_rights_init(&rights, CAP_FSTAT), 1965 td); 1966 if ((error = namei(&nd)) != 0) 1967 goto out; 1968 vp = nd.ni_vp; 1969 1970 error = vn_access(vp, amode, usecred, td); 1971 NDFREE(&nd, NDF_ONLY_PNBUF); 1972 vput(vp); 1973 out: 1974 if (usecred != cred) { 1975 td->td_ucred = cred; 1976 crfree(usecred); 1977 } 1978 return (error); 1979 } 1980 1981 /* 1982 * Check access permissions using "effective" credentials. 1983 */ 1984 #ifndef _SYS_SYSPROTO_H_ 1985 struct eaccess_args { 1986 char *path; 1987 int amode; 1988 }; 1989 #endif 1990 int 1991 sys_eaccess(td, uap) 1992 struct thread *td; 1993 register struct eaccess_args /* { 1994 char *path; 1995 int amode; 1996 } */ *uap; 1997 { 1998 1999 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2000 AT_EACCESS, uap->amode)); 2001 } 2002 2003 #if defined(COMPAT_43) 2004 /* 2005 * Get file status; this version follows links. 2006 */ 2007 #ifndef _SYS_SYSPROTO_H_ 2008 struct ostat_args { 2009 char *path; 2010 struct ostat *ub; 2011 }; 2012 #endif 2013 int 2014 ostat(td, uap) 2015 struct thread *td; 2016 register struct ostat_args /* { 2017 char *path; 2018 struct ostat *ub; 2019 } */ *uap; 2020 { 2021 struct stat sb; 2022 struct ostat osb; 2023 int error; 2024 2025 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2026 &sb, NULL); 2027 if (error != 0) 2028 return (error); 2029 cvtstat(&sb, &osb); 2030 return (copyout(&osb, uap->ub, sizeof (osb))); 2031 } 2032 2033 /* 2034 * Get file status; this version does not follow links. 2035 */ 2036 #ifndef _SYS_SYSPROTO_H_ 2037 struct olstat_args { 2038 char *path; 2039 struct ostat *ub; 2040 }; 2041 #endif 2042 int 2043 olstat(td, uap) 2044 struct thread *td; 2045 register struct olstat_args /* { 2046 char *path; 2047 struct ostat *ub; 2048 } */ *uap; 2049 { 2050 struct stat sb; 2051 struct ostat osb; 2052 int error; 2053 2054 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2055 UIO_USERSPACE, &sb, NULL); 2056 if (error != 0) 2057 return (error); 2058 cvtstat(&sb, &osb); 2059 return (copyout(&osb, uap->ub, sizeof (osb))); 2060 } 2061 2062 /* 2063 * Convert from an old to a new stat structure. 2064 */ 2065 void 2066 cvtstat(st, ost) 2067 struct stat *st; 2068 struct ostat *ost; 2069 { 2070 2071 bzero(ost, sizeof(*ost)); 2072 ost->st_dev = st->st_dev; 2073 ost->st_ino = st->st_ino; 2074 ost->st_mode = st->st_mode; 2075 ost->st_nlink = st->st_nlink; 2076 ost->st_uid = st->st_uid; 2077 ost->st_gid = st->st_gid; 2078 ost->st_rdev = st->st_rdev; 2079 if (st->st_size < (quad_t)1 << 32) 2080 ost->st_size = st->st_size; 2081 else 2082 ost->st_size = -2; 2083 ost->st_atim = st->st_atim; 2084 ost->st_mtim = st->st_mtim; 2085 ost->st_ctim = st->st_ctim; 2086 ost->st_blksize = st->st_blksize; 2087 ost->st_blocks = st->st_blocks; 2088 ost->st_flags = st->st_flags; 2089 ost->st_gen = st->st_gen; 2090 } 2091 #endif /* COMPAT_43 */ 2092 2093 /* 2094 * Get file status; this version follows links. 2095 */ 2096 #ifndef _SYS_SYSPROTO_H_ 2097 struct stat_args { 2098 char *path; 2099 struct stat *ub; 2100 }; 2101 #endif 2102 int 2103 sys_stat(td, uap) 2104 struct thread *td; 2105 register struct stat_args /* { 2106 char *path; 2107 struct stat *ub; 2108 } */ *uap; 2109 { 2110 struct stat sb; 2111 int error; 2112 2113 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2114 &sb, NULL); 2115 if (error == 0) 2116 error = copyout(&sb, uap->ub, sizeof (sb)); 2117 return (error); 2118 } 2119 2120 #ifndef _SYS_SYSPROTO_H_ 2121 struct fstatat_args { 2122 int fd; 2123 char *path; 2124 struct stat *buf; 2125 int flag; 2126 } 2127 #endif 2128 int 2129 sys_fstatat(struct thread *td, struct fstatat_args *uap) 2130 { 2131 struct stat sb; 2132 int error; 2133 2134 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2135 UIO_USERSPACE, &sb, NULL); 2136 if (error == 0) 2137 error = copyout(&sb, uap->buf, sizeof (sb)); 2138 return (error); 2139 } 2140 2141 int 2142 kern_statat(struct thread *td, int flag, int fd, char *path, 2143 enum uio_seg pathseg, struct stat *sbp, 2144 void (*hook)(struct vnode *vp, struct stat *sbp)) 2145 { 2146 struct nameidata nd; 2147 struct stat sb; 2148 cap_rights_t rights; 2149 int error; 2150 2151 if (flag & ~AT_SYMLINK_NOFOLLOW) 2152 return (EINVAL); 2153 2154 NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : 2155 FOLLOW) | LOCKSHARED | LOCKLEAF | AUDITVNODE1, pathseg, path, fd, 2156 cap_rights_init(&rights, CAP_FSTAT), td); 2157 2158 if ((error = namei(&nd)) != 0) 2159 return (error); 2160 error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td); 2161 if (error == 0) { 2162 SDT_PROBE2(vfs, , stat, mode, path, sb.st_mode); 2163 if (S_ISREG(sb.st_mode)) 2164 SDT_PROBE2(vfs, , stat, reg, path, pathseg); 2165 if (__predict_false(hook != NULL)) 2166 hook(nd.ni_vp, &sb); 2167 } 2168 NDFREE(&nd, NDF_ONLY_PNBUF); 2169 vput(nd.ni_vp); 2170 if (error != 0) 2171 return (error); 2172 *sbp = sb; 2173 #ifdef KTRACE 2174 if (KTRPOINT(td, KTR_STRUCT)) 2175 ktrstat(&sb); 2176 #endif 2177 return (0); 2178 } 2179 2180 /* 2181 * Get file status; this version does not follow links. 2182 */ 2183 #ifndef _SYS_SYSPROTO_H_ 2184 struct lstat_args { 2185 char *path; 2186 struct stat *ub; 2187 }; 2188 #endif 2189 int 2190 sys_lstat(td, uap) 2191 struct thread *td; 2192 register struct lstat_args /* { 2193 char *path; 2194 struct stat *ub; 2195 } */ *uap; 2196 { 2197 struct stat sb; 2198 int error; 2199 2200 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2201 UIO_USERSPACE, &sb, NULL); 2202 if (error == 0) 2203 error = copyout(&sb, uap->ub, sizeof (sb)); 2204 return (error); 2205 } 2206 2207 /* 2208 * Implementation of the NetBSD [l]stat() functions. 2209 */ 2210 void 2211 cvtnstat(sb, nsb) 2212 struct stat *sb; 2213 struct nstat *nsb; 2214 { 2215 2216 bzero(nsb, sizeof *nsb); 2217 nsb->st_dev = sb->st_dev; 2218 nsb->st_ino = sb->st_ino; 2219 nsb->st_mode = sb->st_mode; 2220 nsb->st_nlink = sb->st_nlink; 2221 nsb->st_uid = sb->st_uid; 2222 nsb->st_gid = sb->st_gid; 2223 nsb->st_rdev = sb->st_rdev; 2224 nsb->st_atim = sb->st_atim; 2225 nsb->st_mtim = sb->st_mtim; 2226 nsb->st_ctim = sb->st_ctim; 2227 nsb->st_size = sb->st_size; 2228 nsb->st_blocks = sb->st_blocks; 2229 nsb->st_blksize = sb->st_blksize; 2230 nsb->st_flags = sb->st_flags; 2231 nsb->st_gen = sb->st_gen; 2232 nsb->st_birthtim = sb->st_birthtim; 2233 } 2234 2235 #ifndef _SYS_SYSPROTO_H_ 2236 struct nstat_args { 2237 char *path; 2238 struct nstat *ub; 2239 }; 2240 #endif 2241 int 2242 sys_nstat(td, uap) 2243 struct thread *td; 2244 register struct nstat_args /* { 2245 char *path; 2246 struct nstat *ub; 2247 } */ *uap; 2248 { 2249 struct stat sb; 2250 struct nstat nsb; 2251 int error; 2252 2253 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2254 &sb, NULL); 2255 if (error != 0) 2256 return (error); 2257 cvtnstat(&sb, &nsb); 2258 return (copyout(&nsb, uap->ub, sizeof (nsb))); 2259 } 2260 2261 /* 2262 * NetBSD lstat. Get file status; this version does not follow links. 2263 */ 2264 #ifndef _SYS_SYSPROTO_H_ 2265 struct lstat_args { 2266 char *path; 2267 struct stat *ub; 2268 }; 2269 #endif 2270 int 2271 sys_nlstat(td, uap) 2272 struct thread *td; 2273 register struct nlstat_args /* { 2274 char *path; 2275 struct nstat *ub; 2276 } */ *uap; 2277 { 2278 struct stat sb; 2279 struct nstat nsb; 2280 int error; 2281 2282 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2283 UIO_USERSPACE, &sb, NULL); 2284 if (error != 0) 2285 return (error); 2286 cvtnstat(&sb, &nsb); 2287 return (copyout(&nsb, uap->ub, sizeof (nsb))); 2288 } 2289 2290 /* 2291 * Get configurable pathname variables. 2292 */ 2293 #ifndef _SYS_SYSPROTO_H_ 2294 struct pathconf_args { 2295 char *path; 2296 int name; 2297 }; 2298 #endif 2299 int 2300 sys_pathconf(td, uap) 2301 struct thread *td; 2302 register struct pathconf_args /* { 2303 char *path; 2304 int name; 2305 } */ *uap; 2306 { 2307 2308 return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW)); 2309 } 2310 2311 #ifndef _SYS_SYSPROTO_H_ 2312 struct lpathconf_args { 2313 char *path; 2314 int name; 2315 }; 2316 #endif 2317 int 2318 sys_lpathconf(td, uap) 2319 struct thread *td; 2320 register struct lpathconf_args /* { 2321 char *path; 2322 int name; 2323 } */ *uap; 2324 { 2325 2326 return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, 2327 NOFOLLOW)); 2328 } 2329 2330 int 2331 kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name, 2332 u_long flags) 2333 { 2334 struct nameidata nd; 2335 int error; 2336 2337 NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | AUDITVNODE1 | flags, 2338 pathseg, path, td); 2339 if ((error = namei(&nd)) != 0) 2340 return (error); 2341 NDFREE(&nd, NDF_ONLY_PNBUF); 2342 2343 error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval); 2344 vput(nd.ni_vp); 2345 return (error); 2346 } 2347 2348 /* 2349 * Return target name of a symbolic link. 2350 */ 2351 #ifndef _SYS_SYSPROTO_H_ 2352 struct readlink_args { 2353 char *path; 2354 char *buf; 2355 size_t count; 2356 }; 2357 #endif 2358 int 2359 sys_readlink(td, uap) 2360 struct thread *td; 2361 register struct readlink_args /* { 2362 char *path; 2363 char *buf; 2364 size_t count; 2365 } */ *uap; 2366 { 2367 2368 return (kern_readlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2369 uap->buf, UIO_USERSPACE, uap->count)); 2370 } 2371 #ifndef _SYS_SYSPROTO_H_ 2372 struct readlinkat_args { 2373 int fd; 2374 char *path; 2375 char *buf; 2376 size_t bufsize; 2377 }; 2378 #endif 2379 int 2380 sys_readlinkat(struct thread *td, struct readlinkat_args *uap) 2381 { 2382 2383 return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE, 2384 uap->buf, UIO_USERSPACE, uap->bufsize)); 2385 } 2386 2387 int 2388 kern_readlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2389 char *buf, enum uio_seg bufseg, size_t count) 2390 { 2391 struct vnode *vp; 2392 struct iovec aiov; 2393 struct uio auio; 2394 struct nameidata nd; 2395 int error; 2396 2397 if (count > IOSIZE_MAX) 2398 return (EINVAL); 2399 2400 NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 2401 pathseg, path, fd, td); 2402 2403 if ((error = namei(&nd)) != 0) 2404 return (error); 2405 NDFREE(&nd, NDF_ONLY_PNBUF); 2406 vp = nd.ni_vp; 2407 #ifdef MAC 2408 error = mac_vnode_check_readlink(td->td_ucred, vp); 2409 if (error != 0) { 2410 vput(vp); 2411 return (error); 2412 } 2413 #endif 2414 if (vp->v_type != VLNK) 2415 error = EINVAL; 2416 else { 2417 aiov.iov_base = buf; 2418 aiov.iov_len = count; 2419 auio.uio_iov = &aiov; 2420 auio.uio_iovcnt = 1; 2421 auio.uio_offset = 0; 2422 auio.uio_rw = UIO_READ; 2423 auio.uio_segflg = bufseg; 2424 auio.uio_td = td; 2425 auio.uio_resid = count; 2426 error = VOP_READLINK(vp, &auio, td->td_ucred); 2427 td->td_retval[0] = count - auio.uio_resid; 2428 } 2429 vput(vp); 2430 return (error); 2431 } 2432 2433 /* 2434 * Common implementation code for chflags() and fchflags(). 2435 */ 2436 static int 2437 setfflags(td, vp, flags) 2438 struct thread *td; 2439 struct vnode *vp; 2440 u_long flags; 2441 { 2442 struct mount *mp; 2443 struct vattr vattr; 2444 int error; 2445 2446 /* We can't support the value matching VNOVAL. */ 2447 if (flags == VNOVAL) 2448 return (EOPNOTSUPP); 2449 2450 /* 2451 * Prevent non-root users from setting flags on devices. When 2452 * a device is reused, users can retain ownership of the device 2453 * if they are allowed to set flags and programs assume that 2454 * chown can't fail when done as root. 2455 */ 2456 if (vp->v_type == VCHR || vp->v_type == VBLK) { 2457 error = priv_check(td, PRIV_VFS_CHFLAGS_DEV); 2458 if (error != 0) 2459 return (error); 2460 } 2461 2462 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2463 return (error); 2464 VATTR_NULL(&vattr); 2465 vattr.va_flags = flags; 2466 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2467 #ifdef MAC 2468 error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags); 2469 if (error == 0) 2470 #endif 2471 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 2472 VOP_UNLOCK(vp, 0); 2473 vn_finished_write(mp); 2474 return (error); 2475 } 2476 2477 /* 2478 * Change flags of a file given a path name. 2479 */ 2480 #ifndef _SYS_SYSPROTO_H_ 2481 struct chflags_args { 2482 const char *path; 2483 u_long flags; 2484 }; 2485 #endif 2486 int 2487 sys_chflags(td, uap) 2488 struct thread *td; 2489 register struct chflags_args /* { 2490 const char *path; 2491 u_long flags; 2492 } */ *uap; 2493 { 2494 2495 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2496 uap->flags, 0)); 2497 } 2498 2499 #ifndef _SYS_SYSPROTO_H_ 2500 struct chflagsat_args { 2501 int fd; 2502 const char *path; 2503 u_long flags; 2504 int atflag; 2505 } 2506 #endif 2507 int 2508 sys_chflagsat(struct thread *td, struct chflagsat_args *uap) 2509 { 2510 int fd = uap->fd; 2511 const char *path = uap->path; 2512 u_long flags = uap->flags; 2513 int atflag = uap->atflag; 2514 2515 if (atflag & ~AT_SYMLINK_NOFOLLOW) 2516 return (EINVAL); 2517 2518 return (kern_chflagsat(td, fd, path, UIO_USERSPACE, flags, atflag)); 2519 } 2520 2521 /* 2522 * Same as chflags() but doesn't follow symlinks. 2523 */ 2524 int 2525 sys_lchflags(td, uap) 2526 struct thread *td; 2527 register struct lchflags_args /* { 2528 const char *path; 2529 u_long flags; 2530 } */ *uap; 2531 { 2532 2533 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2534 uap->flags, AT_SYMLINK_NOFOLLOW)); 2535 } 2536 2537 static int 2538 kern_chflagsat(struct thread *td, int fd, const char *path, 2539 enum uio_seg pathseg, u_long flags, int atflag) 2540 { 2541 struct nameidata nd; 2542 cap_rights_t rights; 2543 int error, follow; 2544 2545 AUDIT_ARG_FFLAGS(flags); 2546 follow = (atflag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2547 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2548 cap_rights_init(&rights, CAP_FCHFLAGS), td); 2549 if ((error = namei(&nd)) != 0) 2550 return (error); 2551 NDFREE(&nd, NDF_ONLY_PNBUF); 2552 error = setfflags(td, nd.ni_vp, flags); 2553 vrele(nd.ni_vp); 2554 return (error); 2555 } 2556 2557 /* 2558 * Change flags of a file given a file descriptor. 2559 */ 2560 #ifndef _SYS_SYSPROTO_H_ 2561 struct fchflags_args { 2562 int fd; 2563 u_long flags; 2564 }; 2565 #endif 2566 int 2567 sys_fchflags(td, uap) 2568 struct thread *td; 2569 register struct fchflags_args /* { 2570 int fd; 2571 u_long flags; 2572 } */ *uap; 2573 { 2574 struct file *fp; 2575 cap_rights_t rights; 2576 int error; 2577 2578 AUDIT_ARG_FD(uap->fd); 2579 AUDIT_ARG_FFLAGS(uap->flags); 2580 error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_FCHFLAGS), 2581 &fp); 2582 if (error != 0) 2583 return (error); 2584 #ifdef AUDIT 2585 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 2586 AUDIT_ARG_VNODE1(fp->f_vnode); 2587 VOP_UNLOCK(fp->f_vnode, 0); 2588 #endif 2589 error = setfflags(td, fp->f_vnode, uap->flags); 2590 fdrop(fp, td); 2591 return (error); 2592 } 2593 2594 /* 2595 * Common implementation code for chmod(), lchmod() and fchmod(). 2596 */ 2597 int 2598 setfmode(td, cred, vp, mode) 2599 struct thread *td; 2600 struct ucred *cred; 2601 struct vnode *vp; 2602 int mode; 2603 { 2604 struct mount *mp; 2605 struct vattr vattr; 2606 int error; 2607 2608 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2609 return (error); 2610 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2611 VATTR_NULL(&vattr); 2612 vattr.va_mode = mode & ALLPERMS; 2613 #ifdef MAC 2614 error = mac_vnode_check_setmode(cred, vp, vattr.va_mode); 2615 if (error == 0) 2616 #endif 2617 error = VOP_SETATTR(vp, &vattr, cred); 2618 VOP_UNLOCK(vp, 0); 2619 vn_finished_write(mp); 2620 return (error); 2621 } 2622 2623 /* 2624 * Change mode of a file given path name. 2625 */ 2626 #ifndef _SYS_SYSPROTO_H_ 2627 struct chmod_args { 2628 char *path; 2629 int mode; 2630 }; 2631 #endif 2632 int 2633 sys_chmod(td, uap) 2634 struct thread *td; 2635 register struct chmod_args /* { 2636 char *path; 2637 int mode; 2638 } */ *uap; 2639 { 2640 2641 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2642 uap->mode, 0)); 2643 } 2644 2645 #ifndef _SYS_SYSPROTO_H_ 2646 struct fchmodat_args { 2647 int dirfd; 2648 char *path; 2649 mode_t mode; 2650 int flag; 2651 } 2652 #endif 2653 int 2654 sys_fchmodat(struct thread *td, struct fchmodat_args *uap) 2655 { 2656 int flag = uap->flag; 2657 int fd = uap->fd; 2658 char *path = uap->path; 2659 mode_t mode = uap->mode; 2660 2661 if (flag & ~AT_SYMLINK_NOFOLLOW) 2662 return (EINVAL); 2663 2664 return (kern_fchmodat(td, fd, path, UIO_USERSPACE, mode, flag)); 2665 } 2666 2667 /* 2668 * Change mode of a file given path name (don't follow links.) 2669 */ 2670 #ifndef _SYS_SYSPROTO_H_ 2671 struct lchmod_args { 2672 char *path; 2673 int mode; 2674 }; 2675 #endif 2676 int 2677 sys_lchmod(td, uap) 2678 struct thread *td; 2679 register struct lchmod_args /* { 2680 char *path; 2681 int mode; 2682 } */ *uap; 2683 { 2684 2685 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2686 uap->mode, AT_SYMLINK_NOFOLLOW)); 2687 } 2688 2689 int 2690 kern_fchmodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2691 mode_t mode, int flag) 2692 { 2693 struct nameidata nd; 2694 cap_rights_t rights; 2695 int error, follow; 2696 2697 AUDIT_ARG_MODE(mode); 2698 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2699 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2700 cap_rights_init(&rights, CAP_FCHMOD), td); 2701 if ((error = namei(&nd)) != 0) 2702 return (error); 2703 NDFREE(&nd, NDF_ONLY_PNBUF); 2704 error = setfmode(td, td->td_ucred, nd.ni_vp, mode); 2705 vrele(nd.ni_vp); 2706 return (error); 2707 } 2708 2709 /* 2710 * Change mode of a file given a file descriptor. 2711 */ 2712 #ifndef _SYS_SYSPROTO_H_ 2713 struct fchmod_args { 2714 int fd; 2715 int mode; 2716 }; 2717 #endif 2718 int 2719 sys_fchmod(struct thread *td, struct fchmod_args *uap) 2720 { 2721 struct file *fp; 2722 cap_rights_t rights; 2723 int error; 2724 2725 AUDIT_ARG_FD(uap->fd); 2726 AUDIT_ARG_MODE(uap->mode); 2727 2728 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHMOD), &fp); 2729 if (error != 0) 2730 return (error); 2731 error = fo_chmod(fp, uap->mode, td->td_ucred, td); 2732 fdrop(fp, td); 2733 return (error); 2734 } 2735 2736 /* 2737 * Common implementation for chown(), lchown(), and fchown() 2738 */ 2739 int 2740 setfown(td, cred, vp, uid, gid) 2741 struct thread *td; 2742 struct ucred *cred; 2743 struct vnode *vp; 2744 uid_t uid; 2745 gid_t gid; 2746 { 2747 struct mount *mp; 2748 struct vattr vattr; 2749 int error; 2750 2751 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2752 return (error); 2753 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2754 VATTR_NULL(&vattr); 2755 vattr.va_uid = uid; 2756 vattr.va_gid = gid; 2757 #ifdef MAC 2758 error = mac_vnode_check_setowner(cred, vp, vattr.va_uid, 2759 vattr.va_gid); 2760 if (error == 0) 2761 #endif 2762 error = VOP_SETATTR(vp, &vattr, cred); 2763 VOP_UNLOCK(vp, 0); 2764 vn_finished_write(mp); 2765 return (error); 2766 } 2767 2768 /* 2769 * Set ownership given a path name. 2770 */ 2771 #ifndef _SYS_SYSPROTO_H_ 2772 struct chown_args { 2773 char *path; 2774 int uid; 2775 int gid; 2776 }; 2777 #endif 2778 int 2779 sys_chown(td, uap) 2780 struct thread *td; 2781 register struct chown_args /* { 2782 char *path; 2783 int uid; 2784 int gid; 2785 } */ *uap; 2786 { 2787 2788 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->uid, 2789 uap->gid, 0)); 2790 } 2791 2792 #ifndef _SYS_SYSPROTO_H_ 2793 struct fchownat_args { 2794 int fd; 2795 const char * path; 2796 uid_t uid; 2797 gid_t gid; 2798 int flag; 2799 }; 2800 #endif 2801 int 2802 sys_fchownat(struct thread *td, struct fchownat_args *uap) 2803 { 2804 int flag; 2805 2806 flag = uap->flag; 2807 if (flag & ~AT_SYMLINK_NOFOLLOW) 2808 return (EINVAL); 2809 2810 return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid, 2811 uap->gid, uap->flag)); 2812 } 2813 2814 int 2815 kern_fchownat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2816 int uid, int gid, int flag) 2817 { 2818 struct nameidata nd; 2819 cap_rights_t rights; 2820 int error, follow; 2821 2822 AUDIT_ARG_OWNER(uid, gid); 2823 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2824 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2825 cap_rights_init(&rights, CAP_FCHOWN), td); 2826 2827 if ((error = namei(&nd)) != 0) 2828 return (error); 2829 NDFREE(&nd, NDF_ONLY_PNBUF); 2830 error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid); 2831 vrele(nd.ni_vp); 2832 return (error); 2833 } 2834 2835 /* 2836 * Set ownership given a path name, do not cross symlinks. 2837 */ 2838 #ifndef _SYS_SYSPROTO_H_ 2839 struct lchown_args { 2840 char *path; 2841 int uid; 2842 int gid; 2843 }; 2844 #endif 2845 int 2846 sys_lchown(td, uap) 2847 struct thread *td; 2848 register struct lchown_args /* { 2849 char *path; 2850 int uid; 2851 int gid; 2852 } */ *uap; 2853 { 2854 2855 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2856 uap->uid, uap->gid, AT_SYMLINK_NOFOLLOW)); 2857 } 2858 2859 /* 2860 * Set ownership given a file descriptor. 2861 */ 2862 #ifndef _SYS_SYSPROTO_H_ 2863 struct fchown_args { 2864 int fd; 2865 int uid; 2866 int gid; 2867 }; 2868 #endif 2869 int 2870 sys_fchown(td, uap) 2871 struct thread *td; 2872 register struct fchown_args /* { 2873 int fd; 2874 int uid; 2875 int gid; 2876 } */ *uap; 2877 { 2878 struct file *fp; 2879 cap_rights_t rights; 2880 int error; 2881 2882 AUDIT_ARG_FD(uap->fd); 2883 AUDIT_ARG_OWNER(uap->uid, uap->gid); 2884 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHOWN), &fp); 2885 if (error != 0) 2886 return (error); 2887 error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td); 2888 fdrop(fp, td); 2889 return (error); 2890 } 2891 2892 /* 2893 * Common implementation code for utimes(), lutimes(), and futimes(). 2894 */ 2895 static int 2896 getutimes(usrtvp, tvpseg, tsp) 2897 const struct timeval *usrtvp; 2898 enum uio_seg tvpseg; 2899 struct timespec *tsp; 2900 { 2901 struct timeval tv[2]; 2902 const struct timeval *tvp; 2903 int error; 2904 2905 if (usrtvp == NULL) { 2906 vfs_timestamp(&tsp[0]); 2907 tsp[1] = tsp[0]; 2908 } else { 2909 if (tvpseg == UIO_SYSSPACE) { 2910 tvp = usrtvp; 2911 } else { 2912 if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0) 2913 return (error); 2914 tvp = tv; 2915 } 2916 2917 if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 || 2918 tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000) 2919 return (EINVAL); 2920 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 2921 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 2922 } 2923 return (0); 2924 } 2925 2926 /* 2927 * Common implementation code for futimens(), utimensat(). 2928 */ 2929 #define UTIMENS_NULL 0x1 2930 #define UTIMENS_EXIT 0x2 2931 static int 2932 getutimens(const struct timespec *usrtsp, enum uio_seg tspseg, 2933 struct timespec *tsp, int *retflags) 2934 { 2935 struct timespec tsnow; 2936 int error; 2937 2938 vfs_timestamp(&tsnow); 2939 *retflags = 0; 2940 if (usrtsp == NULL) { 2941 tsp[0] = tsnow; 2942 tsp[1] = tsnow; 2943 *retflags |= UTIMENS_NULL; 2944 return (0); 2945 } 2946 if (tspseg == UIO_SYSSPACE) { 2947 tsp[0] = usrtsp[0]; 2948 tsp[1] = usrtsp[1]; 2949 } else if ((error = copyin(usrtsp, tsp, sizeof(*tsp) * 2)) != 0) 2950 return (error); 2951 if (tsp[0].tv_nsec == UTIME_OMIT && tsp[1].tv_nsec == UTIME_OMIT) 2952 *retflags |= UTIMENS_EXIT; 2953 if (tsp[0].tv_nsec == UTIME_NOW && tsp[1].tv_nsec == UTIME_NOW) 2954 *retflags |= UTIMENS_NULL; 2955 if (tsp[0].tv_nsec == UTIME_OMIT) 2956 tsp[0].tv_sec = VNOVAL; 2957 else if (tsp[0].tv_nsec == UTIME_NOW) 2958 tsp[0] = tsnow; 2959 else if (tsp[0].tv_nsec < 0 || tsp[0].tv_nsec >= 1000000000L) 2960 return (EINVAL); 2961 if (tsp[1].tv_nsec == UTIME_OMIT) 2962 tsp[1].tv_sec = VNOVAL; 2963 else if (tsp[1].tv_nsec == UTIME_NOW) 2964 tsp[1] = tsnow; 2965 else if (tsp[1].tv_nsec < 0 || tsp[1].tv_nsec >= 1000000000L) 2966 return (EINVAL); 2967 2968 return (0); 2969 } 2970 2971 /* 2972 * Common implementation code for utimes(), lutimes(), futimes(), futimens(), 2973 * and utimensat(). 2974 */ 2975 static int 2976 setutimes(td, vp, ts, numtimes, nullflag) 2977 struct thread *td; 2978 struct vnode *vp; 2979 const struct timespec *ts; 2980 int numtimes; 2981 int nullflag; 2982 { 2983 struct mount *mp; 2984 struct vattr vattr; 2985 int error, setbirthtime; 2986 2987 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2988 return (error); 2989 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2990 setbirthtime = 0; 2991 if (numtimes < 3 && !VOP_GETATTR(vp, &vattr, td->td_ucred) && 2992 timespeccmp(&ts[1], &vattr.va_birthtime, < )) 2993 setbirthtime = 1; 2994 VATTR_NULL(&vattr); 2995 vattr.va_atime = ts[0]; 2996 vattr.va_mtime = ts[1]; 2997 if (setbirthtime) 2998 vattr.va_birthtime = ts[1]; 2999 if (numtimes > 2) 3000 vattr.va_birthtime = ts[2]; 3001 if (nullflag) 3002 vattr.va_vaflags |= VA_UTIMES_NULL; 3003 #ifdef MAC 3004 error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime, 3005 vattr.va_mtime); 3006 #endif 3007 if (error == 0) 3008 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3009 VOP_UNLOCK(vp, 0); 3010 vn_finished_write(mp); 3011 return (error); 3012 } 3013 3014 /* 3015 * Set the access and modification times of a file. 3016 */ 3017 #ifndef _SYS_SYSPROTO_H_ 3018 struct utimes_args { 3019 char *path; 3020 struct timeval *tptr; 3021 }; 3022 #endif 3023 int 3024 sys_utimes(td, uap) 3025 struct thread *td; 3026 register struct utimes_args /* { 3027 char *path; 3028 struct timeval *tptr; 3029 } */ *uap; 3030 { 3031 3032 return (kern_utimesat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3033 uap->tptr, UIO_USERSPACE)); 3034 } 3035 3036 #ifndef _SYS_SYSPROTO_H_ 3037 struct futimesat_args { 3038 int fd; 3039 const char * path; 3040 const struct timeval * times; 3041 }; 3042 #endif 3043 int 3044 sys_futimesat(struct thread *td, struct futimesat_args *uap) 3045 { 3046 3047 return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE, 3048 uap->times, UIO_USERSPACE)); 3049 } 3050 3051 int 3052 kern_utimesat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 3053 struct timeval *tptr, enum uio_seg tptrseg) 3054 { 3055 struct nameidata nd; 3056 struct timespec ts[2]; 3057 cap_rights_t rights; 3058 int error; 3059 3060 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3061 return (error); 3062 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 3063 cap_rights_init(&rights, CAP_FUTIMES), td); 3064 3065 if ((error = namei(&nd)) != 0) 3066 return (error); 3067 NDFREE(&nd, NDF_ONLY_PNBUF); 3068 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3069 vrele(nd.ni_vp); 3070 return (error); 3071 } 3072 3073 /* 3074 * Set the access and modification times of a file. 3075 */ 3076 #ifndef _SYS_SYSPROTO_H_ 3077 struct lutimes_args { 3078 char *path; 3079 struct timeval *tptr; 3080 }; 3081 #endif 3082 int 3083 sys_lutimes(td, uap) 3084 struct thread *td; 3085 register struct lutimes_args /* { 3086 char *path; 3087 struct timeval *tptr; 3088 } */ *uap; 3089 { 3090 3091 return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr, 3092 UIO_USERSPACE)); 3093 } 3094 3095 int 3096 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg, 3097 struct timeval *tptr, enum uio_seg tptrseg) 3098 { 3099 struct timespec ts[2]; 3100 struct nameidata nd; 3101 int error; 3102 3103 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3104 return (error); 3105 NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, pathseg, path, td); 3106 if ((error = namei(&nd)) != 0) 3107 return (error); 3108 NDFREE(&nd, NDF_ONLY_PNBUF); 3109 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3110 vrele(nd.ni_vp); 3111 return (error); 3112 } 3113 3114 /* 3115 * Set the access and modification times of a file. 3116 */ 3117 #ifndef _SYS_SYSPROTO_H_ 3118 struct futimes_args { 3119 int fd; 3120 struct timeval *tptr; 3121 }; 3122 #endif 3123 int 3124 sys_futimes(td, uap) 3125 struct thread *td; 3126 register struct futimes_args /* { 3127 int fd; 3128 struct timeval *tptr; 3129 } */ *uap; 3130 { 3131 3132 return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE)); 3133 } 3134 3135 int 3136 kern_futimes(struct thread *td, int fd, struct timeval *tptr, 3137 enum uio_seg tptrseg) 3138 { 3139 struct timespec ts[2]; 3140 struct file *fp; 3141 cap_rights_t rights; 3142 int error; 3143 3144 AUDIT_ARG_FD(fd); 3145 error = getutimes(tptr, tptrseg, ts); 3146 if (error != 0) 3147 return (error); 3148 error = getvnode(td, fd, cap_rights_init(&rights, CAP_FUTIMES), &fp); 3149 if (error != 0) 3150 return (error); 3151 #ifdef AUDIT 3152 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3153 AUDIT_ARG_VNODE1(fp->f_vnode); 3154 VOP_UNLOCK(fp->f_vnode, 0); 3155 #endif 3156 error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL); 3157 fdrop(fp, td); 3158 return (error); 3159 } 3160 3161 int 3162 sys_futimens(struct thread *td, struct futimens_args *uap) 3163 { 3164 3165 return (kern_futimens(td, uap->fd, uap->times, UIO_USERSPACE)); 3166 } 3167 3168 int 3169 kern_futimens(struct thread *td, int fd, struct timespec *tptr, 3170 enum uio_seg tptrseg) 3171 { 3172 struct timespec ts[2]; 3173 struct file *fp; 3174 cap_rights_t rights; 3175 int error, flags; 3176 3177 AUDIT_ARG_FD(fd); 3178 error = getutimens(tptr, tptrseg, ts, &flags); 3179 if (error != 0) 3180 return (error); 3181 if (flags & UTIMENS_EXIT) 3182 return (0); 3183 error = getvnode(td, fd, cap_rights_init(&rights, CAP_FUTIMES), &fp); 3184 if (error != 0) 3185 return (error); 3186 #ifdef AUDIT 3187 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3188 AUDIT_ARG_VNODE1(fp->f_vnode); 3189 VOP_UNLOCK(fp->f_vnode, 0); 3190 #endif 3191 error = setutimes(td, fp->f_vnode, ts, 2, flags & UTIMENS_NULL); 3192 fdrop(fp, td); 3193 return (error); 3194 } 3195 3196 int 3197 sys_utimensat(struct thread *td, struct utimensat_args *uap) 3198 { 3199 3200 return (kern_utimensat(td, uap->fd, uap->path, UIO_USERSPACE, 3201 uap->times, UIO_USERSPACE, uap->flag)); 3202 } 3203 3204 int 3205 kern_utimensat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 3206 struct timespec *tptr, enum uio_seg tptrseg, int flag) 3207 { 3208 struct nameidata nd; 3209 struct timespec ts[2]; 3210 cap_rights_t rights; 3211 int error, flags; 3212 3213 if (flag & ~AT_SYMLINK_NOFOLLOW) 3214 return (EINVAL); 3215 3216 if ((error = getutimens(tptr, tptrseg, ts, &flags)) != 0) 3217 return (error); 3218 NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : 3219 FOLLOW) | AUDITVNODE1, pathseg, path, fd, 3220 cap_rights_init(&rights, CAP_FUTIMES), td); 3221 if ((error = namei(&nd)) != 0) 3222 return (error); 3223 /* 3224 * We are allowed to call namei() regardless of 2xUTIME_OMIT. 3225 * POSIX states: 3226 * "If both tv_nsec fields are UTIME_OMIT... EACCESS may be detected." 3227 * "Search permission is denied by a component of the path prefix." 3228 */ 3229 NDFREE(&nd, NDF_ONLY_PNBUF); 3230 if ((flags & UTIMENS_EXIT) == 0) 3231 error = setutimes(td, nd.ni_vp, ts, 2, flags & UTIMENS_NULL); 3232 vrele(nd.ni_vp); 3233 return (error); 3234 } 3235 3236 /* 3237 * Truncate a file given its path name. 3238 */ 3239 #ifndef _SYS_SYSPROTO_H_ 3240 struct truncate_args { 3241 char *path; 3242 int pad; 3243 off_t length; 3244 }; 3245 #endif 3246 int 3247 sys_truncate(td, uap) 3248 struct thread *td; 3249 register struct truncate_args /* { 3250 char *path; 3251 int pad; 3252 off_t length; 3253 } */ *uap; 3254 { 3255 3256 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3257 } 3258 3259 int 3260 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length) 3261 { 3262 struct mount *mp; 3263 struct vnode *vp; 3264 void *rl_cookie; 3265 struct vattr vattr; 3266 struct nameidata nd; 3267 int error; 3268 3269 if (length < 0) 3270 return(EINVAL); 3271 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, td); 3272 if ((error = namei(&nd)) != 0) 3273 return (error); 3274 vp = nd.ni_vp; 3275 rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); 3276 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { 3277 vn_rangelock_unlock(vp, rl_cookie); 3278 vrele(vp); 3279 return (error); 3280 } 3281 NDFREE(&nd, NDF_ONLY_PNBUF); 3282 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3283 if (vp->v_type == VDIR) 3284 error = EISDIR; 3285 #ifdef MAC 3286 else if ((error = mac_vnode_check_write(td->td_ucred, NOCRED, vp))) { 3287 } 3288 #endif 3289 else if ((error = vn_writechk(vp)) == 0 && 3290 (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) { 3291 VATTR_NULL(&vattr); 3292 vattr.va_size = length; 3293 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3294 } 3295 VOP_UNLOCK(vp, 0); 3296 vn_finished_write(mp); 3297 vn_rangelock_unlock(vp, rl_cookie); 3298 vrele(vp); 3299 return (error); 3300 } 3301 3302 #if defined(COMPAT_43) 3303 /* 3304 * Truncate a file given its path name. 3305 */ 3306 #ifndef _SYS_SYSPROTO_H_ 3307 struct otruncate_args { 3308 char *path; 3309 long length; 3310 }; 3311 #endif 3312 int 3313 otruncate(td, uap) 3314 struct thread *td; 3315 register struct otruncate_args /* { 3316 char *path; 3317 long length; 3318 } */ *uap; 3319 { 3320 struct truncate_args /* { 3321 char *path; 3322 int pad; 3323 off_t length; 3324 } */ nuap; 3325 3326 nuap.path = uap->path; 3327 nuap.length = uap->length; 3328 return (sys_truncate(td, &nuap)); 3329 } 3330 #endif /* COMPAT_43 */ 3331 3332 #if defined(COMPAT_FREEBSD6) 3333 /* Versions with the pad argument */ 3334 int 3335 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap) 3336 { 3337 struct truncate_args ouap; 3338 3339 ouap.path = uap->path; 3340 ouap.length = uap->length; 3341 return (sys_truncate(td, &ouap)); 3342 } 3343 3344 int 3345 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap) 3346 { 3347 struct ftruncate_args ouap; 3348 3349 ouap.fd = uap->fd; 3350 ouap.length = uap->length; 3351 return (sys_ftruncate(td, &ouap)); 3352 } 3353 #endif 3354 3355 /* 3356 * Sync an open file. 3357 */ 3358 #ifndef _SYS_SYSPROTO_H_ 3359 struct fsync_args { 3360 int fd; 3361 }; 3362 #endif 3363 int 3364 sys_fsync(td, uap) 3365 struct thread *td; 3366 struct fsync_args /* { 3367 int fd; 3368 } */ *uap; 3369 { 3370 struct vnode *vp; 3371 struct mount *mp; 3372 struct file *fp; 3373 cap_rights_t rights; 3374 int error, lock_flags; 3375 3376 AUDIT_ARG_FD(uap->fd); 3377 error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_FSYNC), &fp); 3378 if (error != 0) 3379 return (error); 3380 vp = fp->f_vnode; 3381 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 3382 if (error != 0) 3383 goto drop; 3384 if (MNT_SHARED_WRITES(mp) || 3385 ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) { 3386 lock_flags = LK_SHARED; 3387 } else { 3388 lock_flags = LK_EXCLUSIVE; 3389 } 3390 vn_lock(vp, lock_flags | LK_RETRY); 3391 AUDIT_ARG_VNODE1(vp); 3392 if (vp->v_object != NULL) { 3393 VM_OBJECT_WLOCK(vp->v_object); 3394 vm_object_page_clean(vp->v_object, 0, 0, 0); 3395 VM_OBJECT_WUNLOCK(vp->v_object); 3396 } 3397 error = VOP_FSYNC(vp, MNT_WAIT, td); 3398 3399 VOP_UNLOCK(vp, 0); 3400 vn_finished_write(mp); 3401 drop: 3402 fdrop(fp, td); 3403 return (error); 3404 } 3405 3406 /* 3407 * Rename files. Source and destination must either both be directories, or 3408 * both not be directories. If target is a directory, it must be empty. 3409 */ 3410 #ifndef _SYS_SYSPROTO_H_ 3411 struct rename_args { 3412 char *from; 3413 char *to; 3414 }; 3415 #endif 3416 int 3417 sys_rename(td, uap) 3418 struct thread *td; 3419 register struct rename_args /* { 3420 char *from; 3421 char *to; 3422 } */ *uap; 3423 { 3424 3425 return (kern_renameat(td, AT_FDCWD, uap->from, AT_FDCWD, 3426 uap->to, UIO_USERSPACE)); 3427 } 3428 3429 #ifndef _SYS_SYSPROTO_H_ 3430 struct renameat_args { 3431 int oldfd; 3432 char *old; 3433 int newfd; 3434 char *new; 3435 }; 3436 #endif 3437 int 3438 sys_renameat(struct thread *td, struct renameat_args *uap) 3439 { 3440 3441 return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new, 3442 UIO_USERSPACE)); 3443 } 3444 3445 int 3446 kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new, 3447 enum uio_seg pathseg) 3448 { 3449 struct mount *mp = NULL; 3450 struct vnode *tvp, *fvp, *tdvp; 3451 struct nameidata fromnd, tond; 3452 cap_rights_t rights; 3453 int error; 3454 3455 again: 3456 bwillwrite(); 3457 #ifdef MAC 3458 NDINIT_ATRIGHTS(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | 3459 AUDITVNODE1, pathseg, old, oldfd, 3460 cap_rights_init(&rights, CAP_RENAMEAT_SOURCE), td); 3461 #else 3462 NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | AUDITVNODE1, 3463 pathseg, old, oldfd, 3464 cap_rights_init(&rights, CAP_RENAMEAT_SOURCE), td); 3465 #endif 3466 3467 if ((error = namei(&fromnd)) != 0) 3468 return (error); 3469 #ifdef MAC 3470 error = mac_vnode_check_rename_from(td->td_ucred, fromnd.ni_dvp, 3471 fromnd.ni_vp, &fromnd.ni_cnd); 3472 VOP_UNLOCK(fromnd.ni_dvp, 0); 3473 if (fromnd.ni_dvp != fromnd.ni_vp) 3474 VOP_UNLOCK(fromnd.ni_vp, 0); 3475 #endif 3476 fvp = fromnd.ni_vp; 3477 NDINIT_ATRIGHTS(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | 3478 SAVESTART | AUDITVNODE2, pathseg, new, newfd, 3479 cap_rights_init(&rights, CAP_RENAMEAT_TARGET), td); 3480 if (fromnd.ni_vp->v_type == VDIR) 3481 tond.ni_cnd.cn_flags |= WILLBEDIR; 3482 if ((error = namei(&tond)) != 0) { 3483 /* Translate error code for rename("dir1", "dir2/."). */ 3484 if (error == EISDIR && fvp->v_type == VDIR) 3485 error = EINVAL; 3486 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3487 vrele(fromnd.ni_dvp); 3488 vrele(fvp); 3489 goto out1; 3490 } 3491 tdvp = tond.ni_dvp; 3492 tvp = tond.ni_vp; 3493 error = vn_start_write(fvp, &mp, V_NOWAIT); 3494 if (error != 0) { 3495 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3496 NDFREE(&tond, NDF_ONLY_PNBUF); 3497 if (tvp != NULL) 3498 vput(tvp); 3499 if (tdvp == tvp) 3500 vrele(tdvp); 3501 else 3502 vput(tdvp); 3503 vrele(fromnd.ni_dvp); 3504 vrele(fvp); 3505 vrele(tond.ni_startdir); 3506 if (fromnd.ni_startdir != NULL) 3507 vrele(fromnd.ni_startdir); 3508 error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH); 3509 if (error != 0) 3510 return (error); 3511 goto again; 3512 } 3513 if (tvp != NULL) { 3514 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3515 error = ENOTDIR; 3516 goto out; 3517 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3518 error = EISDIR; 3519 goto out; 3520 } 3521 #ifdef CAPABILITIES 3522 if (newfd != AT_FDCWD) { 3523 /* 3524 * If the target already exists we require CAP_UNLINKAT 3525 * from 'newfd'. 3526 */ 3527 error = cap_check(&tond.ni_filecaps.fc_rights, 3528 cap_rights_init(&rights, CAP_UNLINKAT)); 3529 if (error != 0) 3530 goto out; 3531 } 3532 #endif 3533 } 3534 if (fvp == tdvp) { 3535 error = EINVAL; 3536 goto out; 3537 } 3538 /* 3539 * If the source is the same as the destination (that is, if they 3540 * are links to the same vnode), then there is nothing to do. 3541 */ 3542 if (fvp == tvp) 3543 error = -1; 3544 #ifdef MAC 3545 else 3546 error = mac_vnode_check_rename_to(td->td_ucred, tdvp, 3547 tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd); 3548 #endif 3549 out: 3550 if (error == 0) { 3551 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3552 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); 3553 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3554 NDFREE(&tond, NDF_ONLY_PNBUF); 3555 } else { 3556 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3557 NDFREE(&tond, NDF_ONLY_PNBUF); 3558 if (tvp != NULL) 3559 vput(tvp); 3560 if (tdvp == tvp) 3561 vrele(tdvp); 3562 else 3563 vput(tdvp); 3564 vrele(fromnd.ni_dvp); 3565 vrele(fvp); 3566 } 3567 vrele(tond.ni_startdir); 3568 vn_finished_write(mp); 3569 out1: 3570 if (fromnd.ni_startdir) 3571 vrele(fromnd.ni_startdir); 3572 if (error == -1) 3573 return (0); 3574 return (error); 3575 } 3576 3577 /* 3578 * Make a directory file. 3579 */ 3580 #ifndef _SYS_SYSPROTO_H_ 3581 struct mkdir_args { 3582 char *path; 3583 int mode; 3584 }; 3585 #endif 3586 int 3587 sys_mkdir(td, uap) 3588 struct thread *td; 3589 register struct mkdir_args /* { 3590 char *path; 3591 int mode; 3592 } */ *uap; 3593 { 3594 3595 return (kern_mkdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3596 uap->mode)); 3597 } 3598 3599 #ifndef _SYS_SYSPROTO_H_ 3600 struct mkdirat_args { 3601 int fd; 3602 char *path; 3603 mode_t mode; 3604 }; 3605 #endif 3606 int 3607 sys_mkdirat(struct thread *td, struct mkdirat_args *uap) 3608 { 3609 3610 return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode)); 3611 } 3612 3613 int 3614 kern_mkdirat(struct thread *td, int fd, char *path, enum uio_seg segflg, 3615 int mode) 3616 { 3617 struct mount *mp; 3618 struct vnode *vp; 3619 struct vattr vattr; 3620 struct nameidata nd; 3621 cap_rights_t rights; 3622 int error; 3623 3624 AUDIT_ARG_MODE(mode); 3625 restart: 3626 bwillwrite(); 3627 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 3628 NOCACHE, segflg, path, fd, cap_rights_init(&rights, CAP_MKDIRAT), 3629 td); 3630 nd.ni_cnd.cn_flags |= WILLBEDIR; 3631 if ((error = namei(&nd)) != 0) 3632 return (error); 3633 vp = nd.ni_vp; 3634 if (vp != NULL) { 3635 NDFREE(&nd, NDF_ONLY_PNBUF); 3636 /* 3637 * XXX namei called with LOCKPARENT but not LOCKLEAF has 3638 * the strange behaviour of leaving the vnode unlocked 3639 * if the target is the same vnode as the parent. 3640 */ 3641 if (vp == nd.ni_dvp) 3642 vrele(nd.ni_dvp); 3643 else 3644 vput(nd.ni_dvp); 3645 vrele(vp); 3646 return (EEXIST); 3647 } 3648 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3649 NDFREE(&nd, NDF_ONLY_PNBUF); 3650 vput(nd.ni_dvp); 3651 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3652 return (error); 3653 goto restart; 3654 } 3655 VATTR_NULL(&vattr); 3656 vattr.va_type = VDIR; 3657 vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask; 3658 #ifdef MAC 3659 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 3660 &vattr); 3661 if (error != 0) 3662 goto out; 3663 #endif 3664 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 3665 #ifdef MAC 3666 out: 3667 #endif 3668 NDFREE(&nd, NDF_ONLY_PNBUF); 3669 vput(nd.ni_dvp); 3670 if (error == 0) 3671 vput(nd.ni_vp); 3672 vn_finished_write(mp); 3673 return (error); 3674 } 3675 3676 /* 3677 * Remove a directory file. 3678 */ 3679 #ifndef _SYS_SYSPROTO_H_ 3680 struct rmdir_args { 3681 char *path; 3682 }; 3683 #endif 3684 int 3685 sys_rmdir(td, uap) 3686 struct thread *td; 3687 struct rmdir_args /* { 3688 char *path; 3689 } */ *uap; 3690 { 3691 3692 return (kern_rmdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE)); 3693 } 3694 3695 int 3696 kern_rmdirat(struct thread *td, int fd, char *path, enum uio_seg pathseg) 3697 { 3698 struct mount *mp; 3699 struct vnode *vp; 3700 struct nameidata nd; 3701 cap_rights_t rights; 3702 int error; 3703 3704 restart: 3705 bwillwrite(); 3706 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 3707 pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td); 3708 if ((error = namei(&nd)) != 0) 3709 return (error); 3710 vp = nd.ni_vp; 3711 if (vp->v_type != VDIR) { 3712 error = ENOTDIR; 3713 goto out; 3714 } 3715 /* 3716 * No rmdir "." please. 3717 */ 3718 if (nd.ni_dvp == vp) { 3719 error = EINVAL; 3720 goto out; 3721 } 3722 /* 3723 * The root of a mounted filesystem cannot be deleted. 3724 */ 3725 if (vp->v_vflag & VV_ROOT) { 3726 error = EBUSY; 3727 goto out; 3728 } 3729 #ifdef MAC 3730 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 3731 &nd.ni_cnd); 3732 if (error != 0) 3733 goto out; 3734 #endif 3735 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3736 NDFREE(&nd, NDF_ONLY_PNBUF); 3737 vput(vp); 3738 if (nd.ni_dvp == vp) 3739 vrele(nd.ni_dvp); 3740 else 3741 vput(nd.ni_dvp); 3742 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3743 return (error); 3744 goto restart; 3745 } 3746 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 3747 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 3748 vn_finished_write(mp); 3749 out: 3750 NDFREE(&nd, NDF_ONLY_PNBUF); 3751 vput(vp); 3752 if (nd.ni_dvp == vp) 3753 vrele(nd.ni_dvp); 3754 else 3755 vput(nd.ni_dvp); 3756 return (error); 3757 } 3758 3759 #ifdef COMPAT_43 3760 /* 3761 * Read a block of directory entries in a filesystem independent format. 3762 */ 3763 #ifndef _SYS_SYSPROTO_H_ 3764 struct ogetdirentries_args { 3765 int fd; 3766 char *buf; 3767 u_int count; 3768 long *basep; 3769 }; 3770 #endif 3771 int 3772 ogetdirentries(struct thread *td, struct ogetdirentries_args *uap) 3773 { 3774 long loff; 3775 int error; 3776 3777 error = kern_ogetdirentries(td, uap, &loff); 3778 if (error == 0) 3779 error = copyout(&loff, uap->basep, sizeof(long)); 3780 return (error); 3781 } 3782 3783 int 3784 kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap, 3785 long *ploff) 3786 { 3787 struct vnode *vp; 3788 struct file *fp; 3789 struct uio auio, kuio; 3790 struct iovec aiov, kiov; 3791 struct dirent *dp, *edp; 3792 cap_rights_t rights; 3793 caddr_t dirbuf; 3794 int error, eofflag, readcnt; 3795 long loff; 3796 off_t foffset; 3797 3798 /* XXX arbitrary sanity limit on `count'. */ 3799 if (uap->count > 64 * 1024) 3800 return (EINVAL); 3801 error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_READ), &fp); 3802 if (error != 0) 3803 return (error); 3804 if ((fp->f_flag & FREAD) == 0) { 3805 fdrop(fp, td); 3806 return (EBADF); 3807 } 3808 vp = fp->f_vnode; 3809 foffset = foffset_lock(fp, 0); 3810 unionread: 3811 if (vp->v_type != VDIR) { 3812 foffset_unlock(fp, foffset, 0); 3813 fdrop(fp, td); 3814 return (EINVAL); 3815 } 3816 aiov.iov_base = uap->buf; 3817 aiov.iov_len = uap->count; 3818 auio.uio_iov = &aiov; 3819 auio.uio_iovcnt = 1; 3820 auio.uio_rw = UIO_READ; 3821 auio.uio_segflg = UIO_USERSPACE; 3822 auio.uio_td = td; 3823 auio.uio_resid = uap->count; 3824 vn_lock(vp, LK_SHARED | LK_RETRY); 3825 loff = auio.uio_offset = foffset; 3826 #ifdef MAC 3827 error = mac_vnode_check_readdir(td->td_ucred, vp); 3828 if (error != 0) { 3829 VOP_UNLOCK(vp, 0); 3830 foffset_unlock(fp, foffset, FOF_NOUPDATE); 3831 fdrop(fp, td); 3832 return (error); 3833 } 3834 #endif 3835 # if (BYTE_ORDER != LITTLE_ENDIAN) 3836 if (vp->v_mount->mnt_maxsymlinklen <= 0) { 3837 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, 3838 NULL, NULL); 3839 foffset = auio.uio_offset; 3840 } else 3841 # endif 3842 { 3843 kuio = auio; 3844 kuio.uio_iov = &kiov; 3845 kuio.uio_segflg = UIO_SYSSPACE; 3846 kiov.iov_len = uap->count; 3847 dirbuf = malloc(uap->count, M_TEMP, M_WAITOK); 3848 kiov.iov_base = dirbuf; 3849 error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag, 3850 NULL, NULL); 3851 foffset = kuio.uio_offset; 3852 if (error == 0) { 3853 readcnt = uap->count - kuio.uio_resid; 3854 edp = (struct dirent *)&dirbuf[readcnt]; 3855 for (dp = (struct dirent *)dirbuf; dp < edp; ) { 3856 # if (BYTE_ORDER == LITTLE_ENDIAN) 3857 /* 3858 * The expected low byte of 3859 * dp->d_namlen is our dp->d_type. 3860 * The high MBZ byte of dp->d_namlen 3861 * is our dp->d_namlen. 3862 */ 3863 dp->d_type = dp->d_namlen; 3864 dp->d_namlen = 0; 3865 # else 3866 /* 3867 * The dp->d_type is the high byte 3868 * of the expected dp->d_namlen, 3869 * so must be zero'ed. 3870 */ 3871 dp->d_type = 0; 3872 # endif 3873 if (dp->d_reclen > 0) { 3874 dp = (struct dirent *) 3875 ((char *)dp + dp->d_reclen); 3876 } else { 3877 error = EIO; 3878 break; 3879 } 3880 } 3881 if (dp >= edp) 3882 error = uiomove(dirbuf, readcnt, &auio); 3883 } 3884 free(dirbuf, M_TEMP); 3885 } 3886 if (error != 0) { 3887 VOP_UNLOCK(vp, 0); 3888 foffset_unlock(fp, foffset, 0); 3889 fdrop(fp, td); 3890 return (error); 3891 } 3892 if (uap->count == auio.uio_resid && 3893 (vp->v_vflag & VV_ROOT) && 3894 (vp->v_mount->mnt_flag & MNT_UNION)) { 3895 struct vnode *tvp = vp; 3896 vp = vp->v_mount->mnt_vnodecovered; 3897 VREF(vp); 3898 fp->f_vnode = vp; 3899 fp->f_data = vp; 3900 foffset = 0; 3901 vput(tvp); 3902 goto unionread; 3903 } 3904 VOP_UNLOCK(vp, 0); 3905 foffset_unlock(fp, foffset, 0); 3906 fdrop(fp, td); 3907 td->td_retval[0] = uap->count - auio.uio_resid; 3908 if (error == 0) 3909 *ploff = loff; 3910 return (error); 3911 } 3912 #endif /* COMPAT_43 */ 3913 3914 /* 3915 * Read a block of directory entries in a filesystem independent format. 3916 */ 3917 #ifndef _SYS_SYSPROTO_H_ 3918 struct getdirentries_args { 3919 int fd; 3920 char *buf; 3921 u_int count; 3922 long *basep; 3923 }; 3924 #endif 3925 int 3926 sys_getdirentries(td, uap) 3927 struct thread *td; 3928 register struct getdirentries_args /* { 3929 int fd; 3930 char *buf; 3931 u_int count; 3932 long *basep; 3933 } */ *uap; 3934 { 3935 long base; 3936 int error; 3937 3938 error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base, 3939 NULL, UIO_USERSPACE); 3940 if (error != 0) 3941 return (error); 3942 if (uap->basep != NULL) 3943 error = copyout(&base, uap->basep, sizeof(long)); 3944 return (error); 3945 } 3946 3947 int 3948 kern_getdirentries(struct thread *td, int fd, char *buf, u_int count, 3949 long *basep, ssize_t *residp, enum uio_seg bufseg) 3950 { 3951 struct vnode *vp; 3952 struct file *fp; 3953 struct uio auio; 3954 struct iovec aiov; 3955 cap_rights_t rights; 3956 long loff; 3957 int error, eofflag; 3958 off_t foffset; 3959 3960 AUDIT_ARG_FD(fd); 3961 if (count > IOSIZE_MAX) 3962 return (EINVAL); 3963 auio.uio_resid = count; 3964 error = getvnode(td, fd, cap_rights_init(&rights, CAP_READ), &fp); 3965 if (error != 0) 3966 return (error); 3967 if ((fp->f_flag & FREAD) == 0) { 3968 fdrop(fp, td); 3969 return (EBADF); 3970 } 3971 vp = fp->f_vnode; 3972 foffset = foffset_lock(fp, 0); 3973 unionread: 3974 if (vp->v_type != VDIR) { 3975 error = EINVAL; 3976 goto fail; 3977 } 3978 aiov.iov_base = buf; 3979 aiov.iov_len = count; 3980 auio.uio_iov = &aiov; 3981 auio.uio_iovcnt = 1; 3982 auio.uio_rw = UIO_READ; 3983 auio.uio_segflg = bufseg; 3984 auio.uio_td = td; 3985 vn_lock(vp, LK_SHARED | LK_RETRY); 3986 AUDIT_ARG_VNODE1(vp); 3987 loff = auio.uio_offset = foffset; 3988 #ifdef MAC 3989 error = mac_vnode_check_readdir(td->td_ucred, vp); 3990 if (error == 0) 3991 #endif 3992 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, 3993 NULL); 3994 foffset = auio.uio_offset; 3995 if (error != 0) { 3996 VOP_UNLOCK(vp, 0); 3997 goto fail; 3998 } 3999 if (count == auio.uio_resid && 4000 (vp->v_vflag & VV_ROOT) && 4001 (vp->v_mount->mnt_flag & MNT_UNION)) { 4002 struct vnode *tvp = vp; 4003 4004 vp = vp->v_mount->mnt_vnodecovered; 4005 VREF(vp); 4006 fp->f_vnode = vp; 4007 fp->f_data = vp; 4008 foffset = 0; 4009 vput(tvp); 4010 goto unionread; 4011 } 4012 VOP_UNLOCK(vp, 0); 4013 *basep = loff; 4014 if (residp != NULL) 4015 *residp = auio.uio_resid; 4016 td->td_retval[0] = count - auio.uio_resid; 4017 fail: 4018 foffset_unlock(fp, foffset, 0); 4019 fdrop(fp, td); 4020 return (error); 4021 } 4022 4023 #ifndef _SYS_SYSPROTO_H_ 4024 struct getdents_args { 4025 int fd; 4026 char *buf; 4027 size_t count; 4028 }; 4029 #endif 4030 int 4031 sys_getdents(td, uap) 4032 struct thread *td; 4033 register struct getdents_args /* { 4034 int fd; 4035 char *buf; 4036 u_int count; 4037 } */ *uap; 4038 { 4039 struct getdirentries_args ap; 4040 4041 ap.fd = uap->fd; 4042 ap.buf = uap->buf; 4043 ap.count = uap->count; 4044 ap.basep = NULL; 4045 return (sys_getdirentries(td, &ap)); 4046 } 4047 4048 /* 4049 * Set the mode mask for creation of filesystem nodes. 4050 */ 4051 #ifndef _SYS_SYSPROTO_H_ 4052 struct umask_args { 4053 int newmask; 4054 }; 4055 #endif 4056 int 4057 sys_umask(td, uap) 4058 struct thread *td; 4059 struct umask_args /* { 4060 int newmask; 4061 } */ *uap; 4062 { 4063 struct filedesc *fdp; 4064 4065 fdp = td->td_proc->p_fd; 4066 FILEDESC_XLOCK(fdp); 4067 td->td_retval[0] = fdp->fd_cmask; 4068 fdp->fd_cmask = uap->newmask & ALLPERMS; 4069 FILEDESC_XUNLOCK(fdp); 4070 return (0); 4071 } 4072 4073 /* 4074 * Void all references to file by ripping underlying filesystem away from 4075 * vnode. 4076 */ 4077 #ifndef _SYS_SYSPROTO_H_ 4078 struct revoke_args { 4079 char *path; 4080 }; 4081 #endif 4082 int 4083 sys_revoke(td, uap) 4084 struct thread *td; 4085 register struct revoke_args /* { 4086 char *path; 4087 } */ *uap; 4088 { 4089 struct vnode *vp; 4090 struct vattr vattr; 4091 struct nameidata nd; 4092 int error; 4093 4094 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4095 uap->path, td); 4096 if ((error = namei(&nd)) != 0) 4097 return (error); 4098 vp = nd.ni_vp; 4099 NDFREE(&nd, NDF_ONLY_PNBUF); 4100 if (vp->v_type != VCHR || vp->v_rdev == NULL) { 4101 error = EINVAL; 4102 goto out; 4103 } 4104 #ifdef MAC 4105 error = mac_vnode_check_revoke(td->td_ucred, vp); 4106 if (error != 0) 4107 goto out; 4108 #endif 4109 error = VOP_GETATTR(vp, &vattr, td->td_ucred); 4110 if (error != 0) 4111 goto out; 4112 if (td->td_ucred->cr_uid != vattr.va_uid) { 4113 error = priv_check(td, PRIV_VFS_ADMIN); 4114 if (error != 0) 4115 goto out; 4116 } 4117 if (vcount(vp) > 1) 4118 VOP_REVOKE(vp, REVOKEALL); 4119 out: 4120 vput(vp); 4121 return (error); 4122 } 4123 4124 /* 4125 * Convert a user file descriptor to a kernel file entry and check that, if it 4126 * is a capability, the correct rights are present. A reference on the file 4127 * entry is held upon returning. 4128 */ 4129 int 4130 getvnode(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp) 4131 { 4132 struct file *fp; 4133 int error; 4134 4135 error = fget_unlocked(td->td_proc->p_fd, fd, rightsp, &fp, NULL); 4136 if (error != 0) 4137 return (error); 4138 4139 /* 4140 * The file could be not of the vnode type, or it may be not 4141 * yet fully initialized, in which case the f_vnode pointer 4142 * may be set, but f_ops is still badfileops. E.g., 4143 * devfs_open() transiently create such situation to 4144 * facilitate csw d_fdopen(). 4145 * 4146 * Dupfdopen() handling in kern_openat() installs the 4147 * half-baked file into the process descriptor table, allowing 4148 * other thread to dereference it. Guard against the race by 4149 * checking f_ops. 4150 */ 4151 if (fp->f_vnode == NULL || fp->f_ops == &badfileops) { 4152 fdrop(fp, td); 4153 return (EINVAL); 4154 } 4155 *fpp = fp; 4156 return (0); 4157 } 4158 4159 4160 /* 4161 * Get an (NFS) file handle. 4162 */ 4163 #ifndef _SYS_SYSPROTO_H_ 4164 struct lgetfh_args { 4165 char *fname; 4166 fhandle_t *fhp; 4167 }; 4168 #endif 4169 int 4170 sys_lgetfh(td, uap) 4171 struct thread *td; 4172 register struct lgetfh_args *uap; 4173 { 4174 struct nameidata nd; 4175 fhandle_t fh; 4176 register struct vnode *vp; 4177 int error; 4178 4179 error = priv_check(td, PRIV_VFS_GETFH); 4180 if (error != 0) 4181 return (error); 4182 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4183 uap->fname, td); 4184 error = namei(&nd); 4185 if (error != 0) 4186 return (error); 4187 NDFREE(&nd, NDF_ONLY_PNBUF); 4188 vp = nd.ni_vp; 4189 bzero(&fh, sizeof(fh)); 4190 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4191 error = VOP_VPTOFH(vp, &fh.fh_fid); 4192 vput(vp); 4193 if (error == 0) 4194 error = copyout(&fh, uap->fhp, sizeof (fh)); 4195 return (error); 4196 } 4197 4198 #ifndef _SYS_SYSPROTO_H_ 4199 struct getfh_args { 4200 char *fname; 4201 fhandle_t *fhp; 4202 }; 4203 #endif 4204 int 4205 sys_getfh(td, uap) 4206 struct thread *td; 4207 register struct getfh_args *uap; 4208 { 4209 struct nameidata nd; 4210 fhandle_t fh; 4211 register struct vnode *vp; 4212 int error; 4213 4214 error = priv_check(td, PRIV_VFS_GETFH); 4215 if (error != 0) 4216 return (error); 4217 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4218 uap->fname, td); 4219 error = namei(&nd); 4220 if (error != 0) 4221 return (error); 4222 NDFREE(&nd, NDF_ONLY_PNBUF); 4223 vp = nd.ni_vp; 4224 bzero(&fh, sizeof(fh)); 4225 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4226 error = VOP_VPTOFH(vp, &fh.fh_fid); 4227 vput(vp); 4228 if (error == 0) 4229 error = copyout(&fh, uap->fhp, sizeof (fh)); 4230 return (error); 4231 } 4232 4233 /* 4234 * syscall for the rpc.lockd to use to translate a NFS file handle into an 4235 * open descriptor. 4236 * 4237 * warning: do not remove the priv_check() call or this becomes one giant 4238 * security hole. 4239 */ 4240 #ifndef _SYS_SYSPROTO_H_ 4241 struct fhopen_args { 4242 const struct fhandle *u_fhp; 4243 int flags; 4244 }; 4245 #endif 4246 int 4247 sys_fhopen(td, uap) 4248 struct thread *td; 4249 struct fhopen_args /* { 4250 const struct fhandle *u_fhp; 4251 int flags; 4252 } */ *uap; 4253 { 4254 struct mount *mp; 4255 struct vnode *vp; 4256 struct fhandle fhp; 4257 struct file *fp; 4258 int fmode, error; 4259 int indx; 4260 4261 error = priv_check(td, PRIV_VFS_FHOPEN); 4262 if (error != 0) 4263 return (error); 4264 indx = -1; 4265 fmode = FFLAGS(uap->flags); 4266 /* why not allow a non-read/write open for our lockd? */ 4267 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 4268 return (EINVAL); 4269 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 4270 if (error != 0) 4271 return(error); 4272 /* find the mount point */ 4273 mp = vfs_busyfs(&fhp.fh_fsid); 4274 if (mp == NULL) 4275 return (ESTALE); 4276 /* now give me my vnode, it gets returned to me locked */ 4277 error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp); 4278 vfs_unbusy(mp); 4279 if (error != 0) 4280 return (error); 4281 4282 error = falloc_noinstall(td, &fp); 4283 if (error != 0) { 4284 vput(vp); 4285 return (error); 4286 } 4287 /* 4288 * An extra reference on `fp' has been held for us by 4289 * falloc_noinstall(). 4290 */ 4291 4292 #ifdef INVARIANTS 4293 td->td_dupfd = -1; 4294 #endif 4295 error = vn_open_vnode(vp, fmode, td->td_ucred, td, fp); 4296 if (error != 0) { 4297 KASSERT(fp->f_ops == &badfileops, 4298 ("VOP_OPEN in fhopen() set f_ops")); 4299 KASSERT(td->td_dupfd < 0, 4300 ("fhopen() encountered fdopen()")); 4301 4302 vput(vp); 4303 goto bad; 4304 } 4305 #ifdef INVARIANTS 4306 td->td_dupfd = 0; 4307 #endif 4308 fp->f_vnode = vp; 4309 fp->f_seqcount = 1; 4310 finit(fp, (fmode & FMASK) | (fp->f_flag & FHASLOCK), DTYPE_VNODE, vp, 4311 &vnops); 4312 VOP_UNLOCK(vp, 0); 4313 if ((fmode & O_TRUNC) != 0) { 4314 error = fo_truncate(fp, 0, td->td_ucred, td); 4315 if (error != 0) 4316 goto bad; 4317 } 4318 4319 error = finstall(td, fp, &indx, fmode, NULL); 4320 bad: 4321 fdrop(fp, td); 4322 td->td_retval[0] = indx; 4323 return (error); 4324 } 4325 4326 /* 4327 * Stat an (NFS) file handle. 4328 */ 4329 #ifndef _SYS_SYSPROTO_H_ 4330 struct fhstat_args { 4331 struct fhandle *u_fhp; 4332 struct stat *sb; 4333 }; 4334 #endif 4335 int 4336 sys_fhstat(td, uap) 4337 struct thread *td; 4338 register struct fhstat_args /* { 4339 struct fhandle *u_fhp; 4340 struct stat *sb; 4341 } */ *uap; 4342 { 4343 struct stat sb; 4344 struct fhandle fh; 4345 int error; 4346 4347 error = copyin(uap->u_fhp, &fh, sizeof(fh)); 4348 if (error != 0) 4349 return (error); 4350 error = kern_fhstat(td, fh, &sb); 4351 if (error == 0) 4352 error = copyout(&sb, uap->sb, sizeof(sb)); 4353 return (error); 4354 } 4355 4356 int 4357 kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb) 4358 { 4359 struct mount *mp; 4360 struct vnode *vp; 4361 int error; 4362 4363 error = priv_check(td, PRIV_VFS_FHSTAT); 4364 if (error != 0) 4365 return (error); 4366 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4367 return (ESTALE); 4368 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4369 vfs_unbusy(mp); 4370 if (error != 0) 4371 return (error); 4372 error = vn_stat(vp, sb, td->td_ucred, NOCRED, td); 4373 vput(vp); 4374 return (error); 4375 } 4376 4377 /* 4378 * Implement fstatfs() for (NFS) file handles. 4379 */ 4380 #ifndef _SYS_SYSPROTO_H_ 4381 struct fhstatfs_args { 4382 struct fhandle *u_fhp; 4383 struct statfs *buf; 4384 }; 4385 #endif 4386 int 4387 sys_fhstatfs(td, uap) 4388 struct thread *td; 4389 struct fhstatfs_args /* { 4390 struct fhandle *u_fhp; 4391 struct statfs *buf; 4392 } */ *uap; 4393 { 4394 struct statfs sf; 4395 fhandle_t fh; 4396 int error; 4397 4398 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4399 if (error != 0) 4400 return (error); 4401 error = kern_fhstatfs(td, fh, &sf); 4402 if (error != 0) 4403 return (error); 4404 return (copyout(&sf, uap->buf, sizeof(sf))); 4405 } 4406 4407 int 4408 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf) 4409 { 4410 struct statfs *sp; 4411 struct mount *mp; 4412 struct vnode *vp; 4413 int error; 4414 4415 error = priv_check(td, PRIV_VFS_FHSTATFS); 4416 if (error != 0) 4417 return (error); 4418 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4419 return (ESTALE); 4420 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4421 if (error != 0) { 4422 vfs_unbusy(mp); 4423 return (error); 4424 } 4425 vput(vp); 4426 error = prison_canseemount(td->td_ucred, mp); 4427 if (error != 0) 4428 goto out; 4429 #ifdef MAC 4430 error = mac_mount_check_stat(td->td_ucred, mp); 4431 if (error != 0) 4432 goto out; 4433 #endif 4434 /* 4435 * Set these in case the underlying filesystem fails to do so. 4436 */ 4437 sp = &mp->mnt_stat; 4438 sp->f_version = STATFS_VERSION; 4439 sp->f_namemax = NAME_MAX; 4440 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 4441 error = VFS_STATFS(mp, sp); 4442 if (error == 0) 4443 *buf = *sp; 4444 out: 4445 vfs_unbusy(mp); 4446 return (error); 4447 } 4448 4449 int 4450 kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len) 4451 { 4452 struct file *fp; 4453 struct mount *mp; 4454 struct vnode *vp; 4455 cap_rights_t rights; 4456 off_t olen, ooffset; 4457 int error; 4458 4459 if (offset < 0 || len <= 0) 4460 return (EINVAL); 4461 /* Check for wrap. */ 4462 if (offset > OFF_MAX - len) 4463 return (EFBIG); 4464 error = fget(td, fd, cap_rights_init(&rights, CAP_WRITE), &fp); 4465 if (error != 0) 4466 return (error); 4467 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { 4468 error = ESPIPE; 4469 goto out; 4470 } 4471 if ((fp->f_flag & FWRITE) == 0) { 4472 error = EBADF; 4473 goto out; 4474 } 4475 if (fp->f_type != DTYPE_VNODE) { 4476 error = ENODEV; 4477 goto out; 4478 } 4479 vp = fp->f_vnode; 4480 if (vp->v_type != VREG) { 4481 error = ENODEV; 4482 goto out; 4483 } 4484 4485 /* Allocating blocks may take a long time, so iterate. */ 4486 for (;;) { 4487 olen = len; 4488 ooffset = offset; 4489 4490 bwillwrite(); 4491 mp = NULL; 4492 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 4493 if (error != 0) 4494 break; 4495 error = vn_lock(vp, LK_EXCLUSIVE); 4496 if (error != 0) { 4497 vn_finished_write(mp); 4498 break; 4499 } 4500 #ifdef MAC 4501 error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp); 4502 if (error == 0) 4503 #endif 4504 error = VOP_ALLOCATE(vp, &offset, &len); 4505 VOP_UNLOCK(vp, 0); 4506 vn_finished_write(mp); 4507 4508 if (olen + ooffset != offset + len) { 4509 panic("offset + len changed from %jx/%jx to %jx/%jx", 4510 ooffset, olen, offset, len); 4511 } 4512 if (error != 0 || len == 0) 4513 break; 4514 KASSERT(olen > len, ("Iteration did not make progress?")); 4515 maybe_yield(); 4516 } 4517 out: 4518 fdrop(fp, td); 4519 return (error); 4520 } 4521 4522 int 4523 sys_posix_fallocate(struct thread *td, struct posix_fallocate_args *uap) 4524 { 4525 int error; 4526 4527 error = kern_posix_fallocate(td, uap->fd, uap->offset, uap->len); 4528 return (kern_posix_error(td, error)); 4529 } 4530 4531 /* 4532 * Unlike madvise(2), we do not make a best effort to remember every 4533 * possible caching hint. Instead, we remember the last setting with 4534 * the exception that we will allow POSIX_FADV_NORMAL to adjust the 4535 * region of any current setting. 4536 */ 4537 int 4538 kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len, 4539 int advice) 4540 { 4541 struct fadvise_info *fa, *new; 4542 struct file *fp; 4543 struct vnode *vp; 4544 cap_rights_t rights; 4545 off_t end; 4546 int error; 4547 4548 if (offset < 0 || len < 0 || offset > OFF_MAX - len) 4549 return (EINVAL); 4550 switch (advice) { 4551 case POSIX_FADV_SEQUENTIAL: 4552 case POSIX_FADV_RANDOM: 4553 case POSIX_FADV_NOREUSE: 4554 new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK); 4555 break; 4556 case POSIX_FADV_NORMAL: 4557 case POSIX_FADV_WILLNEED: 4558 case POSIX_FADV_DONTNEED: 4559 new = NULL; 4560 break; 4561 default: 4562 return (EINVAL); 4563 } 4564 /* XXX: CAP_POSIX_FADVISE? */ 4565 error = fget(td, fd, cap_rights_init(&rights), &fp); 4566 if (error != 0) 4567 goto out; 4568 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { 4569 error = ESPIPE; 4570 goto out; 4571 } 4572 if (fp->f_type != DTYPE_VNODE) { 4573 error = ENODEV; 4574 goto out; 4575 } 4576 vp = fp->f_vnode; 4577 if (vp->v_type != VREG) { 4578 error = ENODEV; 4579 goto out; 4580 } 4581 if (len == 0) 4582 end = OFF_MAX; 4583 else 4584 end = offset + len - 1; 4585 switch (advice) { 4586 case POSIX_FADV_SEQUENTIAL: 4587 case POSIX_FADV_RANDOM: 4588 case POSIX_FADV_NOREUSE: 4589 /* 4590 * Try to merge any existing non-standard region with 4591 * this new region if possible, otherwise create a new 4592 * non-standard region for this request. 4593 */ 4594 mtx_pool_lock(mtxpool_sleep, fp); 4595 fa = fp->f_advice; 4596 if (fa != NULL && fa->fa_advice == advice && 4597 ((fa->fa_start <= end && fa->fa_end >= offset) || 4598 (end != OFF_MAX && fa->fa_start == end + 1) || 4599 (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) { 4600 if (offset < fa->fa_start) 4601 fa->fa_start = offset; 4602 if (end > fa->fa_end) 4603 fa->fa_end = end; 4604 } else { 4605 new->fa_advice = advice; 4606 new->fa_start = offset; 4607 new->fa_end = end; 4608 fp->f_advice = new; 4609 new = fa; 4610 } 4611 mtx_pool_unlock(mtxpool_sleep, fp); 4612 break; 4613 case POSIX_FADV_NORMAL: 4614 /* 4615 * If a the "normal" region overlaps with an existing 4616 * non-standard region, trim or remove the 4617 * non-standard region. 4618 */ 4619 mtx_pool_lock(mtxpool_sleep, fp); 4620 fa = fp->f_advice; 4621 if (fa != NULL) { 4622 if (offset <= fa->fa_start && end >= fa->fa_end) { 4623 new = fa; 4624 fp->f_advice = NULL; 4625 } else if (offset <= fa->fa_start && 4626 end >= fa->fa_start) 4627 fa->fa_start = end + 1; 4628 else if (offset <= fa->fa_end && end >= fa->fa_end) 4629 fa->fa_end = offset - 1; 4630 else if (offset >= fa->fa_start && end <= fa->fa_end) { 4631 /* 4632 * If the "normal" region is a middle 4633 * portion of the existing 4634 * non-standard region, just remove 4635 * the whole thing rather than picking 4636 * one side or the other to 4637 * preserve. 4638 */ 4639 new = fa; 4640 fp->f_advice = NULL; 4641 } 4642 } 4643 mtx_pool_unlock(mtxpool_sleep, fp); 4644 break; 4645 case POSIX_FADV_WILLNEED: 4646 case POSIX_FADV_DONTNEED: 4647 error = VOP_ADVISE(vp, offset, end, advice); 4648 break; 4649 } 4650 out: 4651 if (fp != NULL) 4652 fdrop(fp, td); 4653 free(new, M_FADVISE); 4654 return (error); 4655 } 4656 4657 int 4658 sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap) 4659 { 4660 int error; 4661 4662 error = kern_posix_fadvise(td, uap->fd, uap->offset, uap->len, 4663 uap->advice); 4664 return (kern_posix_error(td, error)); 4665 } 4666