1 /*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include "opt_capsicum.h" 41 #include "opt_compat.h" 42 #include "opt_ktrace.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/bio.h> 47 #include <sys/buf.h> 48 #include <sys/capsicum.h> 49 #include <sys/disk.h> 50 #include <sys/sysent.h> 51 #include <sys/malloc.h> 52 #include <sys/mount.h> 53 #include <sys/mutex.h> 54 #include <sys/sysproto.h> 55 #include <sys/namei.h> 56 #include <sys/filedesc.h> 57 #include <sys/kernel.h> 58 #include <sys/fcntl.h> 59 #include <sys/file.h> 60 #include <sys/filio.h> 61 #include <sys/limits.h> 62 #include <sys/linker.h> 63 #include <sys/rwlock.h> 64 #include <sys/sdt.h> 65 #include <sys/stat.h> 66 #include <sys/sx.h> 67 #include <sys/unistd.h> 68 #include <sys/vnode.h> 69 #include <sys/priv.h> 70 #include <sys/proc.h> 71 #include <sys/dirent.h> 72 #include <sys/jail.h> 73 #include <sys/syscallsubr.h> 74 #include <sys/sysctl.h> 75 #ifdef KTRACE 76 #include <sys/ktrace.h> 77 #endif 78 79 #include <machine/stdarg.h> 80 81 #include <security/audit/audit.h> 82 #include <security/mac/mac_framework.h> 83 84 #include <vm/vm.h> 85 #include <vm/vm_object.h> 86 #include <vm/vm_page.h> 87 #include <vm/uma.h> 88 89 #include <ufs/ufs/quota.h> 90 91 MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information"); 92 93 SDT_PROVIDER_DEFINE(vfs); 94 SDT_PROBE_DEFINE2(vfs, , stat, mode, "char *", "int"); 95 SDT_PROBE_DEFINE2(vfs, , stat, reg, "char *", "int"); 96 97 static int kern_chflagsat(struct thread *td, int fd, const char *path, 98 enum uio_seg pathseg, u_long flags, int atflag); 99 static int setfflags(struct thread *td, struct vnode *, u_long); 100 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *); 101 static int getutimens(const struct timespec *, enum uio_seg, 102 struct timespec *, int *); 103 static int setutimes(struct thread *td, struct vnode *, 104 const struct timespec *, int, int); 105 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 106 struct thread *td); 107 108 /* 109 * Sync each mounted filesystem. 110 */ 111 #ifndef _SYS_SYSPROTO_H_ 112 struct sync_args { 113 int dummy; 114 }; 115 #endif 116 /* ARGSUSED */ 117 int 118 sys_sync(td, uap) 119 struct thread *td; 120 struct sync_args *uap; 121 { 122 struct mount *mp, *nmp; 123 int save; 124 125 mtx_lock(&mountlist_mtx); 126 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 127 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 128 nmp = TAILQ_NEXT(mp, mnt_list); 129 continue; 130 } 131 if ((mp->mnt_flag & MNT_RDONLY) == 0 && 132 vn_start_write(NULL, &mp, V_NOWAIT) == 0) { 133 save = curthread_pflags_set(TDP_SYNCIO); 134 vfs_msync(mp, MNT_NOWAIT); 135 VFS_SYNC(mp, MNT_NOWAIT); 136 curthread_pflags_restore(save); 137 vn_finished_write(mp); 138 } 139 mtx_lock(&mountlist_mtx); 140 nmp = TAILQ_NEXT(mp, mnt_list); 141 vfs_unbusy(mp); 142 } 143 mtx_unlock(&mountlist_mtx); 144 return (0); 145 } 146 147 /* 148 * Change filesystem quotas. 149 */ 150 #ifndef _SYS_SYSPROTO_H_ 151 struct quotactl_args { 152 char *path; 153 int cmd; 154 int uid; 155 caddr_t arg; 156 }; 157 #endif 158 int 159 sys_quotactl(td, uap) 160 struct thread *td; 161 register struct quotactl_args /* { 162 char *path; 163 int cmd; 164 int uid; 165 caddr_t arg; 166 } */ *uap; 167 { 168 struct mount *mp; 169 struct nameidata nd; 170 int error; 171 172 AUDIT_ARG_CMD(uap->cmd); 173 AUDIT_ARG_UID(uap->uid); 174 if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS)) 175 return (EPERM); 176 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 177 uap->path, td); 178 if ((error = namei(&nd)) != 0) 179 return (error); 180 NDFREE(&nd, NDF_ONLY_PNBUF); 181 mp = nd.ni_vp->v_mount; 182 vfs_ref(mp); 183 vput(nd.ni_vp); 184 error = vfs_busy(mp, 0); 185 vfs_rel(mp); 186 if (error != 0) 187 return (error); 188 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg); 189 190 /* 191 * Since quota on operation typically needs to open quota 192 * file, the Q_QUOTAON handler needs to unbusy the mount point 193 * before calling into namei. Otherwise, unmount might be 194 * started between two vfs_busy() invocations (first is our, 195 * second is from mount point cross-walk code in lookup()), 196 * causing deadlock. 197 * 198 * Require that Q_QUOTAON handles the vfs_busy() reference on 199 * its own, always returning with ubusied mount point. 200 */ 201 if ((uap->cmd >> SUBCMDSHIFT) != Q_QUOTAON) 202 vfs_unbusy(mp); 203 return (error); 204 } 205 206 /* 207 * Used by statfs conversion routines to scale the block size up if 208 * necessary so that all of the block counts are <= 'max_size'. Note 209 * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero 210 * value of 'n'. 211 */ 212 void 213 statfs_scale_blocks(struct statfs *sf, long max_size) 214 { 215 uint64_t count; 216 int shift; 217 218 KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__)); 219 220 /* 221 * Attempt to scale the block counts to give a more accurate 222 * overview to userland of the ratio of free space to used 223 * space. To do this, find the largest block count and compute 224 * a divisor that lets it fit into a signed integer <= max_size. 225 */ 226 if (sf->f_bavail < 0) 227 count = -sf->f_bavail; 228 else 229 count = sf->f_bavail; 230 count = MAX(sf->f_blocks, MAX(sf->f_bfree, count)); 231 if (count <= max_size) 232 return; 233 234 count >>= flsl(max_size); 235 shift = 0; 236 while (count > 0) { 237 shift++; 238 count >>=1; 239 } 240 241 sf->f_bsize <<= shift; 242 sf->f_blocks >>= shift; 243 sf->f_bfree >>= shift; 244 sf->f_bavail >>= shift; 245 } 246 247 /* 248 * Get filesystem statistics. 249 */ 250 #ifndef _SYS_SYSPROTO_H_ 251 struct statfs_args { 252 char *path; 253 struct statfs *buf; 254 }; 255 #endif 256 int 257 sys_statfs(td, uap) 258 struct thread *td; 259 register struct statfs_args /* { 260 char *path; 261 struct statfs *buf; 262 } */ *uap; 263 { 264 struct statfs sf; 265 int error; 266 267 error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf); 268 if (error == 0) 269 error = copyout(&sf, uap->buf, sizeof(sf)); 270 return (error); 271 } 272 273 int 274 kern_statfs(struct thread *td, char *path, enum uio_seg pathseg, 275 struct statfs *buf) 276 { 277 struct mount *mp; 278 struct statfs *sp, sb; 279 struct nameidata nd; 280 int error; 281 282 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 283 pathseg, path, td); 284 error = namei(&nd); 285 if (error != 0) 286 return (error); 287 mp = nd.ni_vp->v_mount; 288 vfs_ref(mp); 289 NDFREE(&nd, NDF_ONLY_PNBUF); 290 vput(nd.ni_vp); 291 error = vfs_busy(mp, 0); 292 vfs_rel(mp); 293 if (error != 0) 294 return (error); 295 #ifdef MAC 296 error = mac_mount_check_stat(td->td_ucred, mp); 297 if (error != 0) 298 goto out; 299 #endif 300 /* 301 * Set these in case the underlying filesystem fails to do so. 302 */ 303 sp = &mp->mnt_stat; 304 sp->f_version = STATFS_VERSION; 305 sp->f_namemax = NAME_MAX; 306 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 307 error = VFS_STATFS(mp, sp); 308 if (error != 0) 309 goto out; 310 if (priv_check(td, PRIV_VFS_GENERATION)) { 311 bcopy(sp, &sb, sizeof(sb)); 312 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 313 prison_enforce_statfs(td->td_ucred, mp, &sb); 314 sp = &sb; 315 } 316 *buf = *sp; 317 out: 318 vfs_unbusy(mp); 319 return (error); 320 } 321 322 /* 323 * Get filesystem statistics. 324 */ 325 #ifndef _SYS_SYSPROTO_H_ 326 struct fstatfs_args { 327 int fd; 328 struct statfs *buf; 329 }; 330 #endif 331 int 332 sys_fstatfs(td, uap) 333 struct thread *td; 334 register struct fstatfs_args /* { 335 int fd; 336 struct statfs *buf; 337 } */ *uap; 338 { 339 struct statfs sf; 340 int error; 341 342 error = kern_fstatfs(td, uap->fd, &sf); 343 if (error == 0) 344 error = copyout(&sf, uap->buf, sizeof(sf)); 345 return (error); 346 } 347 348 int 349 kern_fstatfs(struct thread *td, int fd, struct statfs *buf) 350 { 351 struct file *fp; 352 struct mount *mp; 353 struct statfs *sp, sb; 354 struct vnode *vp; 355 cap_rights_t rights; 356 int error; 357 358 AUDIT_ARG_FD(fd); 359 error = getvnode(td, fd, cap_rights_init(&rights, CAP_FSTATFS), &fp); 360 if (error != 0) 361 return (error); 362 vp = fp->f_vnode; 363 vn_lock(vp, LK_SHARED | LK_RETRY); 364 #ifdef AUDIT 365 AUDIT_ARG_VNODE1(vp); 366 #endif 367 mp = vp->v_mount; 368 if (mp) 369 vfs_ref(mp); 370 VOP_UNLOCK(vp, 0); 371 fdrop(fp, td); 372 if (mp == NULL) { 373 error = EBADF; 374 goto out; 375 } 376 error = vfs_busy(mp, 0); 377 vfs_rel(mp); 378 if (error != 0) 379 return (error); 380 #ifdef MAC 381 error = mac_mount_check_stat(td->td_ucred, mp); 382 if (error != 0) 383 goto out; 384 #endif 385 /* 386 * Set these in case the underlying filesystem fails to do so. 387 */ 388 sp = &mp->mnt_stat; 389 sp->f_version = STATFS_VERSION; 390 sp->f_namemax = NAME_MAX; 391 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 392 error = VFS_STATFS(mp, sp); 393 if (error != 0) 394 goto out; 395 if (priv_check(td, PRIV_VFS_GENERATION)) { 396 bcopy(sp, &sb, sizeof(sb)); 397 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 398 prison_enforce_statfs(td->td_ucred, mp, &sb); 399 sp = &sb; 400 } 401 *buf = *sp; 402 out: 403 if (mp) 404 vfs_unbusy(mp); 405 return (error); 406 } 407 408 /* 409 * Get statistics on all filesystems. 410 */ 411 #ifndef _SYS_SYSPROTO_H_ 412 struct getfsstat_args { 413 struct statfs *buf; 414 long bufsize; 415 int flags; 416 }; 417 #endif 418 int 419 sys_getfsstat(td, uap) 420 struct thread *td; 421 register struct getfsstat_args /* { 422 struct statfs *buf; 423 long bufsize; 424 int flags; 425 } */ *uap; 426 { 427 size_t count; 428 int error; 429 430 if (uap->bufsize < 0 || uap->bufsize > SIZE_MAX) 431 return (EINVAL); 432 error = kern_getfsstat(td, &uap->buf, uap->bufsize, &count, 433 UIO_USERSPACE, uap->flags); 434 if (error == 0) 435 td->td_retval[0] = count; 436 return (error); 437 } 438 439 /* 440 * If (bufsize > 0 && bufseg == UIO_SYSSPACE) 441 * The caller is responsible for freeing memory which will be allocated 442 * in '*buf'. 443 */ 444 int 445 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize, 446 size_t *countp, enum uio_seg bufseg, int flags) 447 { 448 struct mount *mp, *nmp; 449 struct statfs *sfsp, *sp, sb, *tofree; 450 size_t count, maxcount; 451 int error; 452 453 restart: 454 maxcount = bufsize / sizeof(struct statfs); 455 if (bufsize == 0) { 456 sfsp = NULL; 457 tofree = NULL; 458 } else if (bufseg == UIO_USERSPACE) { 459 sfsp = *buf; 460 tofree = NULL; 461 } else /* if (bufseg == UIO_SYSSPACE) */ { 462 count = 0; 463 mtx_lock(&mountlist_mtx); 464 TAILQ_FOREACH(mp, &mountlist, mnt_list) { 465 count++; 466 } 467 mtx_unlock(&mountlist_mtx); 468 if (maxcount > count) 469 maxcount = count; 470 tofree = sfsp = *buf = malloc(maxcount * sizeof(struct statfs), 471 M_TEMP, M_WAITOK); 472 } 473 count = 0; 474 mtx_lock(&mountlist_mtx); 475 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 476 if (prison_canseemount(td->td_ucred, mp) != 0) { 477 nmp = TAILQ_NEXT(mp, mnt_list); 478 continue; 479 } 480 #ifdef MAC 481 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 482 nmp = TAILQ_NEXT(mp, mnt_list); 483 continue; 484 } 485 #endif 486 if (flags == MNT_WAIT) { 487 if (vfs_busy(mp, MBF_MNTLSTLOCK) != 0) { 488 /* 489 * If vfs_busy() failed, and MBF_NOWAIT 490 * wasn't passed, then the mp is gone. 491 * Furthermore, because of MBF_MNTLSTLOCK, 492 * the mountlist_mtx was dropped. We have 493 * no other choice than to start over. 494 */ 495 mtx_unlock(&mountlist_mtx); 496 free(tofree, M_TEMP); 497 goto restart; 498 } 499 } else { 500 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK) != 0) { 501 nmp = TAILQ_NEXT(mp, mnt_list); 502 continue; 503 } 504 } 505 if (sfsp && count < maxcount) { 506 sp = &mp->mnt_stat; 507 /* 508 * Set these in case the underlying filesystem 509 * fails to do so. 510 */ 511 sp->f_version = STATFS_VERSION; 512 sp->f_namemax = NAME_MAX; 513 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 514 /* 515 * If MNT_NOWAIT or MNT_LAZY is specified, do not 516 * refresh the fsstat cache. 517 */ 518 if (flags != MNT_LAZY && flags != MNT_NOWAIT) { 519 error = VFS_STATFS(mp, sp); 520 if (error != 0) { 521 mtx_lock(&mountlist_mtx); 522 nmp = TAILQ_NEXT(mp, mnt_list); 523 vfs_unbusy(mp); 524 continue; 525 } 526 } 527 if (priv_check(td, PRIV_VFS_GENERATION)) { 528 bcopy(sp, &sb, sizeof(sb)); 529 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 530 prison_enforce_statfs(td->td_ucred, mp, &sb); 531 sp = &sb; 532 } 533 if (bufseg == UIO_SYSSPACE) 534 bcopy(sp, sfsp, sizeof(*sp)); 535 else /* if (bufseg == UIO_USERSPACE) */ { 536 error = copyout(sp, sfsp, sizeof(*sp)); 537 if (error != 0) { 538 vfs_unbusy(mp); 539 return (error); 540 } 541 } 542 sfsp++; 543 } 544 count++; 545 mtx_lock(&mountlist_mtx); 546 nmp = TAILQ_NEXT(mp, mnt_list); 547 vfs_unbusy(mp); 548 } 549 mtx_unlock(&mountlist_mtx); 550 if (sfsp && count > maxcount) 551 *countp = maxcount; 552 else 553 *countp = count; 554 return (0); 555 } 556 557 #ifdef COMPAT_FREEBSD4 558 /* 559 * Get old format filesystem statistics. 560 */ 561 static void cvtstatfs(struct statfs *, struct ostatfs *); 562 563 #ifndef _SYS_SYSPROTO_H_ 564 struct freebsd4_statfs_args { 565 char *path; 566 struct ostatfs *buf; 567 }; 568 #endif 569 int 570 freebsd4_statfs(td, uap) 571 struct thread *td; 572 struct freebsd4_statfs_args /* { 573 char *path; 574 struct ostatfs *buf; 575 } */ *uap; 576 { 577 struct ostatfs osb; 578 struct statfs sf; 579 int error; 580 581 error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf); 582 if (error != 0) 583 return (error); 584 cvtstatfs(&sf, &osb); 585 return (copyout(&osb, uap->buf, sizeof(osb))); 586 } 587 588 /* 589 * Get filesystem statistics. 590 */ 591 #ifndef _SYS_SYSPROTO_H_ 592 struct freebsd4_fstatfs_args { 593 int fd; 594 struct ostatfs *buf; 595 }; 596 #endif 597 int 598 freebsd4_fstatfs(td, uap) 599 struct thread *td; 600 struct freebsd4_fstatfs_args /* { 601 int fd; 602 struct ostatfs *buf; 603 } */ *uap; 604 { 605 struct ostatfs osb; 606 struct statfs sf; 607 int error; 608 609 error = kern_fstatfs(td, uap->fd, &sf); 610 if (error != 0) 611 return (error); 612 cvtstatfs(&sf, &osb); 613 return (copyout(&osb, uap->buf, sizeof(osb))); 614 } 615 616 /* 617 * Get statistics on all filesystems. 618 */ 619 #ifndef _SYS_SYSPROTO_H_ 620 struct freebsd4_getfsstat_args { 621 struct ostatfs *buf; 622 long bufsize; 623 int flags; 624 }; 625 #endif 626 int 627 freebsd4_getfsstat(td, uap) 628 struct thread *td; 629 register struct freebsd4_getfsstat_args /* { 630 struct ostatfs *buf; 631 long bufsize; 632 int flags; 633 } */ *uap; 634 { 635 struct statfs *buf, *sp; 636 struct ostatfs osb; 637 size_t count, size; 638 int error; 639 640 if (uap->bufsize < 0) 641 return (EINVAL); 642 count = uap->bufsize / sizeof(struct ostatfs); 643 if (count > SIZE_MAX / sizeof(struct statfs)) 644 return (EINVAL); 645 size = count * sizeof(struct statfs); 646 error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, 647 uap->flags); 648 td->td_retval[0] = count; 649 if (size != 0) { 650 sp = buf; 651 while (count != 0 && error == 0) { 652 cvtstatfs(sp, &osb); 653 error = copyout(&osb, uap->buf, sizeof(osb)); 654 sp++; 655 uap->buf++; 656 count--; 657 } 658 free(buf, M_TEMP); 659 } 660 return (error); 661 } 662 663 /* 664 * Implement fstatfs() for (NFS) file handles. 665 */ 666 #ifndef _SYS_SYSPROTO_H_ 667 struct freebsd4_fhstatfs_args { 668 struct fhandle *u_fhp; 669 struct ostatfs *buf; 670 }; 671 #endif 672 int 673 freebsd4_fhstatfs(td, uap) 674 struct thread *td; 675 struct freebsd4_fhstatfs_args /* { 676 struct fhandle *u_fhp; 677 struct ostatfs *buf; 678 } */ *uap; 679 { 680 struct ostatfs osb; 681 struct statfs sf; 682 fhandle_t fh; 683 int error; 684 685 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 686 if (error != 0) 687 return (error); 688 error = kern_fhstatfs(td, fh, &sf); 689 if (error != 0) 690 return (error); 691 cvtstatfs(&sf, &osb); 692 return (copyout(&osb, uap->buf, sizeof(osb))); 693 } 694 695 /* 696 * Convert a new format statfs structure to an old format statfs structure. 697 */ 698 static void 699 cvtstatfs(nsp, osp) 700 struct statfs *nsp; 701 struct ostatfs *osp; 702 { 703 704 statfs_scale_blocks(nsp, LONG_MAX); 705 bzero(osp, sizeof(*osp)); 706 osp->f_bsize = nsp->f_bsize; 707 osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX); 708 osp->f_blocks = nsp->f_blocks; 709 osp->f_bfree = nsp->f_bfree; 710 osp->f_bavail = nsp->f_bavail; 711 osp->f_files = MIN(nsp->f_files, LONG_MAX); 712 osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX); 713 osp->f_owner = nsp->f_owner; 714 osp->f_type = nsp->f_type; 715 osp->f_flags = nsp->f_flags; 716 osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX); 717 osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX); 718 osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX); 719 osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX); 720 strlcpy(osp->f_fstypename, nsp->f_fstypename, 721 MIN(MFSNAMELEN, OMFSNAMELEN)); 722 strlcpy(osp->f_mntonname, nsp->f_mntonname, 723 MIN(MNAMELEN, OMNAMELEN)); 724 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 725 MIN(MNAMELEN, OMNAMELEN)); 726 osp->f_fsid = nsp->f_fsid; 727 } 728 #endif /* COMPAT_FREEBSD4 */ 729 730 /* 731 * Change current working directory to a given file descriptor. 732 */ 733 #ifndef _SYS_SYSPROTO_H_ 734 struct fchdir_args { 735 int fd; 736 }; 737 #endif 738 int 739 sys_fchdir(td, uap) 740 struct thread *td; 741 struct fchdir_args /* { 742 int fd; 743 } */ *uap; 744 { 745 struct vnode *vp, *tdp; 746 struct mount *mp; 747 struct file *fp; 748 cap_rights_t rights; 749 int error; 750 751 AUDIT_ARG_FD(uap->fd); 752 error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_FCHDIR), 753 &fp); 754 if (error != 0) 755 return (error); 756 vp = fp->f_vnode; 757 VREF(vp); 758 fdrop(fp, td); 759 vn_lock(vp, LK_SHARED | LK_RETRY); 760 AUDIT_ARG_VNODE1(vp); 761 error = change_dir(vp, td); 762 while (!error && (mp = vp->v_mountedhere) != NULL) { 763 if (vfs_busy(mp, 0)) 764 continue; 765 error = VFS_ROOT(mp, LK_SHARED, &tdp); 766 vfs_unbusy(mp); 767 if (error != 0) 768 break; 769 vput(vp); 770 vp = tdp; 771 } 772 if (error != 0) { 773 vput(vp); 774 return (error); 775 } 776 VOP_UNLOCK(vp, 0); 777 pwd_chdir(td, vp); 778 return (0); 779 } 780 781 /* 782 * Change current working directory (``.''). 783 */ 784 #ifndef _SYS_SYSPROTO_H_ 785 struct chdir_args { 786 char *path; 787 }; 788 #endif 789 int 790 sys_chdir(td, uap) 791 struct thread *td; 792 struct chdir_args /* { 793 char *path; 794 } */ *uap; 795 { 796 797 return (kern_chdir(td, uap->path, UIO_USERSPACE)); 798 } 799 800 int 801 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg) 802 { 803 struct nameidata nd; 804 int error; 805 806 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 807 pathseg, path, td); 808 if ((error = namei(&nd)) != 0) 809 return (error); 810 if ((error = change_dir(nd.ni_vp, td)) != 0) { 811 vput(nd.ni_vp); 812 NDFREE(&nd, NDF_ONLY_PNBUF); 813 return (error); 814 } 815 VOP_UNLOCK(nd.ni_vp, 0); 816 NDFREE(&nd, NDF_ONLY_PNBUF); 817 pwd_chdir(td, nd.ni_vp); 818 return (0); 819 } 820 821 /* 822 * Change notion of root (``/'') directory. 823 */ 824 #ifndef _SYS_SYSPROTO_H_ 825 struct chroot_args { 826 char *path; 827 }; 828 #endif 829 int 830 sys_chroot(td, uap) 831 struct thread *td; 832 struct chroot_args /* { 833 char *path; 834 } */ *uap; 835 { 836 struct nameidata nd; 837 int error; 838 839 error = priv_check(td, PRIV_VFS_CHROOT); 840 if (error != 0) 841 return (error); 842 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 843 UIO_USERSPACE, uap->path, td); 844 error = namei(&nd); 845 if (error != 0) 846 goto error; 847 error = change_dir(nd.ni_vp, td); 848 if (error != 0) 849 goto e_vunlock; 850 #ifdef MAC 851 error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp); 852 if (error != 0) 853 goto e_vunlock; 854 #endif 855 VOP_UNLOCK(nd.ni_vp, 0); 856 error = pwd_chroot(td, nd.ni_vp); 857 vrele(nd.ni_vp); 858 NDFREE(&nd, NDF_ONLY_PNBUF); 859 return (error); 860 e_vunlock: 861 vput(nd.ni_vp); 862 error: 863 NDFREE(&nd, NDF_ONLY_PNBUF); 864 return (error); 865 } 866 867 /* 868 * Common routine for chroot and chdir. Callers must provide a locked vnode 869 * instance. 870 */ 871 int 872 change_dir(vp, td) 873 struct vnode *vp; 874 struct thread *td; 875 { 876 #ifdef MAC 877 int error; 878 #endif 879 880 ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked"); 881 if (vp->v_type != VDIR) 882 return (ENOTDIR); 883 #ifdef MAC 884 error = mac_vnode_check_chdir(td->td_ucred, vp); 885 if (error != 0) 886 return (error); 887 #endif 888 return (VOP_ACCESS(vp, VEXEC, td->td_ucred, td)); 889 } 890 891 static __inline void 892 flags_to_rights(int flags, cap_rights_t *rightsp) 893 { 894 895 if (flags & O_EXEC) { 896 cap_rights_set(rightsp, CAP_FEXECVE); 897 } else { 898 switch ((flags & O_ACCMODE)) { 899 case O_RDONLY: 900 cap_rights_set(rightsp, CAP_READ); 901 break; 902 case O_RDWR: 903 cap_rights_set(rightsp, CAP_READ); 904 /* FALLTHROUGH */ 905 case O_WRONLY: 906 cap_rights_set(rightsp, CAP_WRITE); 907 if (!(flags & (O_APPEND | O_TRUNC))) 908 cap_rights_set(rightsp, CAP_SEEK); 909 break; 910 } 911 } 912 913 if (flags & O_CREAT) 914 cap_rights_set(rightsp, CAP_CREATE); 915 916 if (flags & O_TRUNC) 917 cap_rights_set(rightsp, CAP_FTRUNCATE); 918 919 if (flags & (O_SYNC | O_FSYNC)) 920 cap_rights_set(rightsp, CAP_FSYNC); 921 922 if (flags & (O_EXLOCK | O_SHLOCK)) 923 cap_rights_set(rightsp, CAP_FLOCK); 924 } 925 926 /* 927 * Check permissions, allocate an open file structure, and call the device 928 * open routine if any. 929 */ 930 #ifndef _SYS_SYSPROTO_H_ 931 struct open_args { 932 char *path; 933 int flags; 934 int mode; 935 }; 936 #endif 937 int 938 sys_open(td, uap) 939 struct thread *td; 940 register struct open_args /* { 941 char *path; 942 int flags; 943 int mode; 944 } */ *uap; 945 { 946 947 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 948 uap->flags, uap->mode)); 949 } 950 951 #ifndef _SYS_SYSPROTO_H_ 952 struct openat_args { 953 int fd; 954 char *path; 955 int flag; 956 int mode; 957 }; 958 #endif 959 int 960 sys_openat(struct thread *td, struct openat_args *uap) 961 { 962 963 AUDIT_ARG_FD(uap->fd); 964 return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 965 uap->mode)); 966 } 967 968 int 969 kern_openat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 970 int flags, int mode) 971 { 972 struct proc *p = td->td_proc; 973 struct filedesc *fdp = p->p_fd; 974 struct file *fp; 975 struct vnode *vp; 976 struct nameidata nd; 977 cap_rights_t rights; 978 int cmode, error, indx; 979 980 indx = -1; 981 982 AUDIT_ARG_FFLAGS(flags); 983 AUDIT_ARG_MODE(mode); 984 cap_rights_init(&rights, CAP_LOOKUP); 985 flags_to_rights(flags, &rights); 986 /* 987 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags 988 * may be specified. 989 */ 990 if (flags & O_EXEC) { 991 if (flags & O_ACCMODE) 992 return (EINVAL); 993 } else if ((flags & O_ACCMODE) == O_ACCMODE) { 994 return (EINVAL); 995 } else { 996 flags = FFLAGS(flags); 997 } 998 999 /* 1000 * Allocate a file structure. The descriptor to reference it 1001 * is allocated and set by finstall() below. 1002 */ 1003 error = falloc_noinstall(td, &fp); 1004 if (error != 0) 1005 return (error); 1006 /* 1007 * An extra reference on `fp' has been held for us by 1008 * falloc_noinstall(). 1009 */ 1010 /* Set the flags early so the finit in devfs can pick them up. */ 1011 fp->f_flag = flags & FMASK; 1012 cmode = ((mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; 1013 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 1014 &rights, td); 1015 td->td_dupfd = -1; /* XXX check for fdopen */ 1016 error = vn_open(&nd, &flags, cmode, fp); 1017 if (error != 0) { 1018 /* 1019 * If the vn_open replaced the method vector, something 1020 * wonderous happened deep below and we just pass it up 1021 * pretending we know what we do. 1022 */ 1023 if (error == ENXIO && fp->f_ops != &badfileops) 1024 goto success; 1025 1026 /* 1027 * Handle special fdopen() case. bleh. 1028 * 1029 * Don't do this for relative (capability) lookups; we don't 1030 * understand exactly what would happen, and we don't think 1031 * that it ever should. 1032 */ 1033 if ((nd.ni_lcf & NI_LCF_STRICTRELATIVE) == 0 && 1034 (error == ENODEV || error == ENXIO) && 1035 td->td_dupfd >= 0) { 1036 error = dupfdopen(td, fdp, td->td_dupfd, flags, error, 1037 &indx); 1038 if (error == 0) 1039 goto success; 1040 } 1041 1042 goto bad; 1043 } 1044 td->td_dupfd = 0; 1045 NDFREE(&nd, NDF_ONLY_PNBUF); 1046 vp = nd.ni_vp; 1047 1048 /* 1049 * Store the vnode, for any f_type. Typically, the vnode use 1050 * count is decremented by direct call to vn_closefile() for 1051 * files that switched type in the cdevsw fdopen() method. 1052 */ 1053 fp->f_vnode = vp; 1054 /* 1055 * If the file wasn't claimed by devfs bind it to the normal 1056 * vnode operations here. 1057 */ 1058 if (fp->f_ops == &badfileops) { 1059 KASSERT(vp->v_type != VFIFO, ("Unexpected fifo.")); 1060 fp->f_seqcount = 1; 1061 finit(fp, (flags & FMASK) | (fp->f_flag & FHASLOCK), 1062 DTYPE_VNODE, vp, &vnops); 1063 } 1064 1065 VOP_UNLOCK(vp, 0); 1066 if (flags & O_TRUNC) { 1067 error = fo_truncate(fp, 0, td->td_ucred, td); 1068 if (error != 0) 1069 goto bad; 1070 } 1071 success: 1072 /* 1073 * If we haven't already installed the FD (for dupfdopen), do so now. 1074 */ 1075 if (indx == -1) { 1076 struct filecaps *fcaps; 1077 1078 #ifdef CAPABILITIES 1079 if ((nd.ni_lcf & NI_LCF_STRICTRELATIVE) != 0) 1080 fcaps = &nd.ni_filecaps; 1081 else 1082 #endif 1083 fcaps = NULL; 1084 error = finstall(td, fp, &indx, flags, fcaps); 1085 /* On success finstall() consumes fcaps. */ 1086 if (error != 0) { 1087 filecaps_free(&nd.ni_filecaps); 1088 goto bad; 1089 } 1090 } else { 1091 filecaps_free(&nd.ni_filecaps); 1092 } 1093 1094 /* 1095 * Release our private reference, leaving the one associated with 1096 * the descriptor table intact. 1097 */ 1098 fdrop(fp, td); 1099 td->td_retval[0] = indx; 1100 return (0); 1101 bad: 1102 KASSERT(indx == -1, ("indx=%d, should be -1", indx)); 1103 fdrop(fp, td); 1104 return (error); 1105 } 1106 1107 #ifdef COMPAT_43 1108 /* 1109 * Create a file. 1110 */ 1111 #ifndef _SYS_SYSPROTO_H_ 1112 struct ocreat_args { 1113 char *path; 1114 int mode; 1115 }; 1116 #endif 1117 int 1118 ocreat(td, uap) 1119 struct thread *td; 1120 register struct ocreat_args /* { 1121 char *path; 1122 int mode; 1123 } */ *uap; 1124 { 1125 1126 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1127 O_WRONLY | O_CREAT | O_TRUNC, uap->mode)); 1128 } 1129 #endif /* COMPAT_43 */ 1130 1131 /* 1132 * Create a special file. 1133 */ 1134 #ifndef _SYS_SYSPROTO_H_ 1135 struct mknod_args { 1136 char *path; 1137 int mode; 1138 int dev; 1139 }; 1140 #endif 1141 int 1142 sys_mknod(td, uap) 1143 struct thread *td; 1144 register struct mknod_args /* { 1145 char *path; 1146 int mode; 1147 int dev; 1148 } */ *uap; 1149 { 1150 1151 return (kern_mknodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1152 uap->mode, uap->dev)); 1153 } 1154 1155 #ifndef _SYS_SYSPROTO_H_ 1156 struct mknodat_args { 1157 int fd; 1158 char *path; 1159 mode_t mode; 1160 dev_t dev; 1161 }; 1162 #endif 1163 int 1164 sys_mknodat(struct thread *td, struct mknodat_args *uap) 1165 { 1166 1167 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1168 uap->dev)); 1169 } 1170 1171 int 1172 kern_mknodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1173 int mode, int dev) 1174 { 1175 struct vnode *vp; 1176 struct mount *mp; 1177 struct vattr vattr; 1178 struct nameidata nd; 1179 cap_rights_t rights; 1180 int error, whiteout = 0; 1181 1182 AUDIT_ARG_MODE(mode); 1183 AUDIT_ARG_DEV(dev); 1184 switch (mode & S_IFMT) { 1185 case S_IFCHR: 1186 case S_IFBLK: 1187 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 1188 if (error == 0 && dev == VNOVAL) 1189 error = EINVAL; 1190 break; 1191 case S_IFMT: 1192 error = priv_check(td, PRIV_VFS_MKNOD_BAD); 1193 break; 1194 case S_IFWHT: 1195 error = priv_check(td, PRIV_VFS_MKNOD_WHT); 1196 break; 1197 case S_IFIFO: 1198 if (dev == 0) 1199 return (kern_mkfifoat(td, fd, path, pathseg, mode)); 1200 /* FALLTHROUGH */ 1201 default: 1202 error = EINVAL; 1203 break; 1204 } 1205 if (error != 0) 1206 return (error); 1207 restart: 1208 bwillwrite(); 1209 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1210 NOCACHE, pathseg, path, fd, cap_rights_init(&rights, CAP_MKNODAT), 1211 td); 1212 if ((error = namei(&nd)) != 0) 1213 return (error); 1214 vp = nd.ni_vp; 1215 if (vp != NULL) { 1216 NDFREE(&nd, NDF_ONLY_PNBUF); 1217 if (vp == nd.ni_dvp) 1218 vrele(nd.ni_dvp); 1219 else 1220 vput(nd.ni_dvp); 1221 vrele(vp); 1222 return (EEXIST); 1223 } else { 1224 VATTR_NULL(&vattr); 1225 vattr.va_mode = (mode & ALLPERMS) & 1226 ~td->td_proc->p_fd->fd_cmask; 1227 vattr.va_rdev = dev; 1228 whiteout = 0; 1229 1230 switch (mode & S_IFMT) { 1231 case S_IFMT: /* used by badsect to flag bad sectors */ 1232 vattr.va_type = VBAD; 1233 break; 1234 case S_IFCHR: 1235 vattr.va_type = VCHR; 1236 break; 1237 case S_IFBLK: 1238 vattr.va_type = VBLK; 1239 break; 1240 case S_IFWHT: 1241 whiteout = 1; 1242 break; 1243 default: 1244 panic("kern_mknod: invalid mode"); 1245 } 1246 } 1247 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1248 NDFREE(&nd, NDF_ONLY_PNBUF); 1249 vput(nd.ni_dvp); 1250 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1251 return (error); 1252 goto restart; 1253 } 1254 #ifdef MAC 1255 if (error == 0 && !whiteout) 1256 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, 1257 &nd.ni_cnd, &vattr); 1258 #endif 1259 if (error == 0) { 1260 if (whiteout) 1261 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1262 else { 1263 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1264 &nd.ni_cnd, &vattr); 1265 if (error == 0) 1266 vput(nd.ni_vp); 1267 } 1268 } 1269 NDFREE(&nd, NDF_ONLY_PNBUF); 1270 vput(nd.ni_dvp); 1271 vn_finished_write(mp); 1272 return (error); 1273 } 1274 1275 /* 1276 * Create a named pipe. 1277 */ 1278 #ifndef _SYS_SYSPROTO_H_ 1279 struct mkfifo_args { 1280 char *path; 1281 int mode; 1282 }; 1283 #endif 1284 int 1285 sys_mkfifo(td, uap) 1286 struct thread *td; 1287 register struct mkfifo_args /* { 1288 char *path; 1289 int mode; 1290 } */ *uap; 1291 { 1292 1293 return (kern_mkfifoat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1294 uap->mode)); 1295 } 1296 1297 #ifndef _SYS_SYSPROTO_H_ 1298 struct mkfifoat_args { 1299 int fd; 1300 char *path; 1301 mode_t mode; 1302 }; 1303 #endif 1304 int 1305 sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap) 1306 { 1307 1308 return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE, 1309 uap->mode)); 1310 } 1311 1312 int 1313 kern_mkfifoat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1314 int mode) 1315 { 1316 struct mount *mp; 1317 struct vattr vattr; 1318 struct nameidata nd; 1319 cap_rights_t rights; 1320 int error; 1321 1322 AUDIT_ARG_MODE(mode); 1323 restart: 1324 bwillwrite(); 1325 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1326 NOCACHE, pathseg, path, fd, cap_rights_init(&rights, CAP_MKFIFOAT), 1327 td); 1328 if ((error = namei(&nd)) != 0) 1329 return (error); 1330 if (nd.ni_vp != NULL) { 1331 NDFREE(&nd, NDF_ONLY_PNBUF); 1332 if (nd.ni_vp == nd.ni_dvp) 1333 vrele(nd.ni_dvp); 1334 else 1335 vput(nd.ni_dvp); 1336 vrele(nd.ni_vp); 1337 return (EEXIST); 1338 } 1339 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1340 NDFREE(&nd, NDF_ONLY_PNBUF); 1341 vput(nd.ni_dvp); 1342 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1343 return (error); 1344 goto restart; 1345 } 1346 VATTR_NULL(&vattr); 1347 vattr.va_type = VFIFO; 1348 vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask; 1349 #ifdef MAC 1350 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1351 &vattr); 1352 if (error != 0) 1353 goto out; 1354 #endif 1355 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 1356 if (error == 0) 1357 vput(nd.ni_vp); 1358 #ifdef MAC 1359 out: 1360 #endif 1361 vput(nd.ni_dvp); 1362 vn_finished_write(mp); 1363 NDFREE(&nd, NDF_ONLY_PNBUF); 1364 return (error); 1365 } 1366 1367 /* 1368 * Make a hard file link. 1369 */ 1370 #ifndef _SYS_SYSPROTO_H_ 1371 struct link_args { 1372 char *path; 1373 char *link; 1374 }; 1375 #endif 1376 int 1377 sys_link(td, uap) 1378 struct thread *td; 1379 register struct link_args /* { 1380 char *path; 1381 char *link; 1382 } */ *uap; 1383 { 1384 1385 return (kern_linkat(td, AT_FDCWD, AT_FDCWD, uap->path, uap->link, 1386 UIO_USERSPACE, FOLLOW)); 1387 } 1388 1389 #ifndef _SYS_SYSPROTO_H_ 1390 struct linkat_args { 1391 int fd1; 1392 char *path1; 1393 int fd2; 1394 char *path2; 1395 int flag; 1396 }; 1397 #endif 1398 int 1399 sys_linkat(struct thread *td, struct linkat_args *uap) 1400 { 1401 int flag; 1402 1403 flag = uap->flag; 1404 if (flag & ~AT_SYMLINK_FOLLOW) 1405 return (EINVAL); 1406 1407 return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2, 1408 UIO_USERSPACE, (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW)); 1409 } 1410 1411 int hardlink_check_uid = 0; 1412 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 1413 &hardlink_check_uid, 0, 1414 "Unprivileged processes cannot create hard links to files owned by other " 1415 "users"); 1416 static int hardlink_check_gid = 0; 1417 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 1418 &hardlink_check_gid, 0, 1419 "Unprivileged processes cannot create hard links to files owned by other " 1420 "groups"); 1421 1422 static int 1423 can_hardlink(struct vnode *vp, struct ucred *cred) 1424 { 1425 struct vattr va; 1426 int error; 1427 1428 if (!hardlink_check_uid && !hardlink_check_gid) 1429 return (0); 1430 1431 error = VOP_GETATTR(vp, &va, cred); 1432 if (error != 0) 1433 return (error); 1434 1435 if (hardlink_check_uid && cred->cr_uid != va.va_uid) { 1436 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1437 if (error != 0) 1438 return (error); 1439 } 1440 1441 if (hardlink_check_gid && !groupmember(va.va_gid, cred)) { 1442 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1443 if (error != 0) 1444 return (error); 1445 } 1446 1447 return (0); 1448 } 1449 1450 int 1451 kern_linkat(struct thread *td, int fd1, int fd2, char *path1, char *path2, 1452 enum uio_seg segflg, int follow) 1453 { 1454 struct vnode *vp; 1455 struct mount *mp; 1456 struct nameidata nd; 1457 cap_rights_t rights; 1458 int error; 1459 1460 again: 1461 bwillwrite(); 1462 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, segflg, path1, fd1, 1463 cap_rights_init(&rights, CAP_LINKAT_SOURCE), td); 1464 1465 if ((error = namei(&nd)) != 0) 1466 return (error); 1467 NDFREE(&nd, NDF_ONLY_PNBUF); 1468 vp = nd.ni_vp; 1469 if (vp->v_type == VDIR) { 1470 vrele(vp); 1471 return (EPERM); /* POSIX */ 1472 } 1473 NDINIT_ATRIGHTS(&nd, CREATE, 1474 LOCKPARENT | SAVENAME | AUDITVNODE2 | NOCACHE, segflg, path2, fd2, 1475 cap_rights_init(&rights, CAP_LINKAT_TARGET), td); 1476 if ((error = namei(&nd)) == 0) { 1477 if (nd.ni_vp != NULL) { 1478 NDFREE(&nd, NDF_ONLY_PNBUF); 1479 if (nd.ni_dvp == nd.ni_vp) 1480 vrele(nd.ni_dvp); 1481 else 1482 vput(nd.ni_dvp); 1483 vrele(nd.ni_vp); 1484 vrele(vp); 1485 return (EEXIST); 1486 } else if (nd.ni_dvp->v_mount != vp->v_mount) { 1487 /* 1488 * Cross-device link. No need to recheck 1489 * vp->v_type, since it cannot change, except 1490 * to VBAD. 1491 */ 1492 NDFREE(&nd, NDF_ONLY_PNBUF); 1493 vput(nd.ni_dvp); 1494 vrele(vp); 1495 return (EXDEV); 1496 } else if ((error = vn_lock(vp, LK_EXCLUSIVE)) == 0) { 1497 error = can_hardlink(vp, td->td_ucred); 1498 #ifdef MAC 1499 if (error == 0) 1500 error = mac_vnode_check_link(td->td_ucred, 1501 nd.ni_dvp, vp, &nd.ni_cnd); 1502 #endif 1503 if (error != 0) { 1504 vput(vp); 1505 vput(nd.ni_dvp); 1506 NDFREE(&nd, NDF_ONLY_PNBUF); 1507 return (error); 1508 } 1509 error = vn_start_write(vp, &mp, V_NOWAIT); 1510 if (error != 0) { 1511 vput(vp); 1512 vput(nd.ni_dvp); 1513 NDFREE(&nd, NDF_ONLY_PNBUF); 1514 error = vn_start_write(NULL, &mp, 1515 V_XSLEEP | PCATCH); 1516 if (error != 0) 1517 return (error); 1518 goto again; 1519 } 1520 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 1521 VOP_UNLOCK(vp, 0); 1522 vput(nd.ni_dvp); 1523 vn_finished_write(mp); 1524 NDFREE(&nd, NDF_ONLY_PNBUF); 1525 } else { 1526 vput(nd.ni_dvp); 1527 NDFREE(&nd, NDF_ONLY_PNBUF); 1528 vrele(vp); 1529 goto again; 1530 } 1531 } 1532 vrele(vp); 1533 return (error); 1534 } 1535 1536 /* 1537 * Make a symbolic link. 1538 */ 1539 #ifndef _SYS_SYSPROTO_H_ 1540 struct symlink_args { 1541 char *path; 1542 char *link; 1543 }; 1544 #endif 1545 int 1546 sys_symlink(td, uap) 1547 struct thread *td; 1548 register struct symlink_args /* { 1549 char *path; 1550 char *link; 1551 } */ *uap; 1552 { 1553 1554 return (kern_symlinkat(td, uap->path, AT_FDCWD, uap->link, 1555 UIO_USERSPACE)); 1556 } 1557 1558 #ifndef _SYS_SYSPROTO_H_ 1559 struct symlinkat_args { 1560 char *path; 1561 int fd; 1562 char *path2; 1563 }; 1564 #endif 1565 int 1566 sys_symlinkat(struct thread *td, struct symlinkat_args *uap) 1567 { 1568 1569 return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2, 1570 UIO_USERSPACE)); 1571 } 1572 1573 int 1574 kern_symlinkat(struct thread *td, char *path1, int fd, char *path2, 1575 enum uio_seg segflg) 1576 { 1577 struct mount *mp; 1578 struct vattr vattr; 1579 char *syspath; 1580 struct nameidata nd; 1581 int error; 1582 cap_rights_t rights; 1583 1584 if (segflg == UIO_SYSSPACE) { 1585 syspath = path1; 1586 } else { 1587 syspath = uma_zalloc(namei_zone, M_WAITOK); 1588 if ((error = copyinstr(path1, syspath, MAXPATHLEN, NULL)) != 0) 1589 goto out; 1590 } 1591 AUDIT_ARG_TEXT(syspath); 1592 restart: 1593 bwillwrite(); 1594 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1595 NOCACHE, segflg, path2, fd, cap_rights_init(&rights, CAP_SYMLINKAT), 1596 td); 1597 if ((error = namei(&nd)) != 0) 1598 goto out; 1599 if (nd.ni_vp) { 1600 NDFREE(&nd, NDF_ONLY_PNBUF); 1601 if (nd.ni_vp == nd.ni_dvp) 1602 vrele(nd.ni_dvp); 1603 else 1604 vput(nd.ni_dvp); 1605 vrele(nd.ni_vp); 1606 error = EEXIST; 1607 goto out; 1608 } 1609 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1610 NDFREE(&nd, NDF_ONLY_PNBUF); 1611 vput(nd.ni_dvp); 1612 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1613 goto out; 1614 goto restart; 1615 } 1616 VATTR_NULL(&vattr); 1617 vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask; 1618 #ifdef MAC 1619 vattr.va_type = VLNK; 1620 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1621 &vattr); 1622 if (error != 0) 1623 goto out2; 1624 #endif 1625 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath); 1626 if (error == 0) 1627 vput(nd.ni_vp); 1628 #ifdef MAC 1629 out2: 1630 #endif 1631 NDFREE(&nd, NDF_ONLY_PNBUF); 1632 vput(nd.ni_dvp); 1633 vn_finished_write(mp); 1634 out: 1635 if (segflg != UIO_SYSSPACE) 1636 uma_zfree(namei_zone, syspath); 1637 return (error); 1638 } 1639 1640 /* 1641 * Delete a whiteout from the filesystem. 1642 */ 1643 int 1644 sys_undelete(td, uap) 1645 struct thread *td; 1646 register struct undelete_args /* { 1647 char *path; 1648 } */ *uap; 1649 { 1650 struct mount *mp; 1651 struct nameidata nd; 1652 int error; 1653 1654 restart: 1655 bwillwrite(); 1656 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | AUDITVNODE1, 1657 UIO_USERSPACE, uap->path, td); 1658 error = namei(&nd); 1659 if (error != 0) 1660 return (error); 1661 1662 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 1663 NDFREE(&nd, NDF_ONLY_PNBUF); 1664 if (nd.ni_vp == nd.ni_dvp) 1665 vrele(nd.ni_dvp); 1666 else 1667 vput(nd.ni_dvp); 1668 if (nd.ni_vp) 1669 vrele(nd.ni_vp); 1670 return (EEXIST); 1671 } 1672 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1673 NDFREE(&nd, NDF_ONLY_PNBUF); 1674 vput(nd.ni_dvp); 1675 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1676 return (error); 1677 goto restart; 1678 } 1679 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE); 1680 NDFREE(&nd, NDF_ONLY_PNBUF); 1681 vput(nd.ni_dvp); 1682 vn_finished_write(mp); 1683 return (error); 1684 } 1685 1686 /* 1687 * Delete a name from the filesystem. 1688 */ 1689 #ifndef _SYS_SYSPROTO_H_ 1690 struct unlink_args { 1691 char *path; 1692 }; 1693 #endif 1694 int 1695 sys_unlink(td, uap) 1696 struct thread *td; 1697 struct unlink_args /* { 1698 char *path; 1699 } */ *uap; 1700 { 1701 1702 return (kern_unlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 0)); 1703 } 1704 1705 #ifndef _SYS_SYSPROTO_H_ 1706 struct unlinkat_args { 1707 int fd; 1708 char *path; 1709 int flag; 1710 }; 1711 #endif 1712 int 1713 sys_unlinkat(struct thread *td, struct unlinkat_args *uap) 1714 { 1715 int flag = uap->flag; 1716 int fd = uap->fd; 1717 char *path = uap->path; 1718 1719 if (flag & ~AT_REMOVEDIR) 1720 return (EINVAL); 1721 1722 if (flag & AT_REMOVEDIR) 1723 return (kern_rmdirat(td, fd, path, UIO_USERSPACE)); 1724 else 1725 return (kern_unlinkat(td, fd, path, UIO_USERSPACE, 0)); 1726 } 1727 1728 int 1729 kern_unlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1730 ino_t oldinum) 1731 { 1732 struct mount *mp; 1733 struct vnode *vp; 1734 struct nameidata nd; 1735 struct stat sb; 1736 cap_rights_t rights; 1737 int error; 1738 1739 restart: 1740 bwillwrite(); 1741 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 1742 pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td); 1743 if ((error = namei(&nd)) != 0) 1744 return (error == EINVAL ? EPERM : error); 1745 vp = nd.ni_vp; 1746 if (vp->v_type == VDIR && oldinum == 0) { 1747 error = EPERM; /* POSIX */ 1748 } else if (oldinum != 0 && 1749 ((error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td)) == 0) && 1750 sb.st_ino != oldinum) { 1751 error = EIDRM; /* Identifier removed */ 1752 } else { 1753 /* 1754 * The root of a mounted filesystem cannot be deleted. 1755 * 1756 * XXX: can this only be a VDIR case? 1757 */ 1758 if (vp->v_vflag & VV_ROOT) 1759 error = EBUSY; 1760 } 1761 if (error == 0) { 1762 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1763 NDFREE(&nd, NDF_ONLY_PNBUF); 1764 vput(nd.ni_dvp); 1765 if (vp == nd.ni_dvp) 1766 vrele(vp); 1767 else 1768 vput(vp); 1769 if ((error = vn_start_write(NULL, &mp, 1770 V_XSLEEP | PCATCH)) != 0) 1771 return (error); 1772 goto restart; 1773 } 1774 #ifdef MAC 1775 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 1776 &nd.ni_cnd); 1777 if (error != 0) 1778 goto out; 1779 #endif 1780 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 1781 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 1782 #ifdef MAC 1783 out: 1784 #endif 1785 vn_finished_write(mp); 1786 } 1787 NDFREE(&nd, NDF_ONLY_PNBUF); 1788 vput(nd.ni_dvp); 1789 if (vp == nd.ni_dvp) 1790 vrele(vp); 1791 else 1792 vput(vp); 1793 return (error); 1794 } 1795 1796 /* 1797 * Reposition read/write file offset. 1798 */ 1799 #ifndef _SYS_SYSPROTO_H_ 1800 struct lseek_args { 1801 int fd; 1802 int pad; 1803 off_t offset; 1804 int whence; 1805 }; 1806 #endif 1807 int 1808 sys_lseek(td, uap) 1809 struct thread *td; 1810 register struct lseek_args /* { 1811 int fd; 1812 int pad; 1813 off_t offset; 1814 int whence; 1815 } */ *uap; 1816 { 1817 struct file *fp; 1818 cap_rights_t rights; 1819 int error; 1820 1821 AUDIT_ARG_FD(uap->fd); 1822 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_SEEK), &fp); 1823 if (error != 0) 1824 return (error); 1825 error = (fp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0 ? 1826 fo_seek(fp, uap->offset, uap->whence, td) : ESPIPE; 1827 fdrop(fp, td); 1828 return (error); 1829 } 1830 1831 #if defined(COMPAT_43) 1832 /* 1833 * Reposition read/write file offset. 1834 */ 1835 #ifndef _SYS_SYSPROTO_H_ 1836 struct olseek_args { 1837 int fd; 1838 long offset; 1839 int whence; 1840 }; 1841 #endif 1842 int 1843 olseek(td, uap) 1844 struct thread *td; 1845 register struct olseek_args /* { 1846 int fd; 1847 long offset; 1848 int whence; 1849 } */ *uap; 1850 { 1851 struct lseek_args /* { 1852 int fd; 1853 int pad; 1854 off_t offset; 1855 int whence; 1856 } */ nuap; 1857 1858 nuap.fd = uap->fd; 1859 nuap.offset = uap->offset; 1860 nuap.whence = uap->whence; 1861 return (sys_lseek(td, &nuap)); 1862 } 1863 #endif /* COMPAT_43 */ 1864 1865 #if defined(COMPAT_FREEBSD6) 1866 /* Version with the 'pad' argument */ 1867 int 1868 freebsd6_lseek(td, uap) 1869 struct thread *td; 1870 register struct freebsd6_lseek_args *uap; 1871 { 1872 struct lseek_args ouap; 1873 1874 ouap.fd = uap->fd; 1875 ouap.offset = uap->offset; 1876 ouap.whence = uap->whence; 1877 return (sys_lseek(td, &ouap)); 1878 } 1879 #endif 1880 1881 /* 1882 * Check access permissions using passed credentials. 1883 */ 1884 static int 1885 vn_access(vp, user_flags, cred, td) 1886 struct vnode *vp; 1887 int user_flags; 1888 struct ucred *cred; 1889 struct thread *td; 1890 { 1891 accmode_t accmode; 1892 int error; 1893 1894 /* Flags == 0 means only check for existence. */ 1895 if (user_flags == 0) 1896 return (0); 1897 1898 accmode = 0; 1899 if (user_flags & R_OK) 1900 accmode |= VREAD; 1901 if (user_flags & W_OK) 1902 accmode |= VWRITE; 1903 if (user_flags & X_OK) 1904 accmode |= VEXEC; 1905 #ifdef MAC 1906 error = mac_vnode_check_access(cred, vp, accmode); 1907 if (error != 0) 1908 return (error); 1909 #endif 1910 if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) 1911 error = VOP_ACCESS(vp, accmode, cred, td); 1912 return (error); 1913 } 1914 1915 /* 1916 * Check access permissions using "real" credentials. 1917 */ 1918 #ifndef _SYS_SYSPROTO_H_ 1919 struct access_args { 1920 char *path; 1921 int amode; 1922 }; 1923 #endif 1924 int 1925 sys_access(td, uap) 1926 struct thread *td; 1927 register struct access_args /* { 1928 char *path; 1929 int amode; 1930 } */ *uap; 1931 { 1932 1933 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1934 0, uap->amode)); 1935 } 1936 1937 #ifndef _SYS_SYSPROTO_H_ 1938 struct faccessat_args { 1939 int dirfd; 1940 char *path; 1941 int amode; 1942 int flag; 1943 } 1944 #endif 1945 int 1946 sys_faccessat(struct thread *td, struct faccessat_args *uap) 1947 { 1948 1949 return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 1950 uap->amode)); 1951 } 1952 1953 int 1954 kern_accessat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1955 int flag, int amode) 1956 { 1957 struct ucred *cred, *usecred; 1958 struct vnode *vp; 1959 struct nameidata nd; 1960 cap_rights_t rights; 1961 int error; 1962 1963 if (flag & ~AT_EACCESS) 1964 return (EINVAL); 1965 if (amode != F_OK && (amode & ~(R_OK | W_OK | X_OK)) != 0) 1966 return (EINVAL); 1967 1968 /* 1969 * Create and modify a temporary credential instead of one that 1970 * is potentially shared (if we need one). 1971 */ 1972 cred = td->td_ucred; 1973 if ((flag & AT_EACCESS) == 0 && 1974 ((cred->cr_uid != cred->cr_ruid || 1975 cred->cr_rgid != cred->cr_groups[0]))) { 1976 usecred = crdup(cred); 1977 usecred->cr_uid = cred->cr_ruid; 1978 usecred->cr_groups[0] = cred->cr_rgid; 1979 td->td_ucred = usecred; 1980 } else 1981 usecred = cred; 1982 AUDIT_ARG_VALUE(amode); 1983 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | 1984 AUDITVNODE1, pathseg, path, fd, cap_rights_init(&rights, CAP_FSTAT), 1985 td); 1986 if ((error = namei(&nd)) != 0) 1987 goto out; 1988 vp = nd.ni_vp; 1989 1990 error = vn_access(vp, amode, usecred, td); 1991 NDFREE(&nd, NDF_ONLY_PNBUF); 1992 vput(vp); 1993 out: 1994 if (usecred != cred) { 1995 td->td_ucred = cred; 1996 crfree(usecred); 1997 } 1998 return (error); 1999 } 2000 2001 /* 2002 * Check access permissions using "effective" credentials. 2003 */ 2004 #ifndef _SYS_SYSPROTO_H_ 2005 struct eaccess_args { 2006 char *path; 2007 int amode; 2008 }; 2009 #endif 2010 int 2011 sys_eaccess(td, uap) 2012 struct thread *td; 2013 register struct eaccess_args /* { 2014 char *path; 2015 int amode; 2016 } */ *uap; 2017 { 2018 2019 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2020 AT_EACCESS, uap->amode)); 2021 } 2022 2023 #if defined(COMPAT_43) 2024 /* 2025 * Get file status; this version follows links. 2026 */ 2027 #ifndef _SYS_SYSPROTO_H_ 2028 struct ostat_args { 2029 char *path; 2030 struct ostat *ub; 2031 }; 2032 #endif 2033 int 2034 ostat(td, uap) 2035 struct thread *td; 2036 register struct ostat_args /* { 2037 char *path; 2038 struct ostat *ub; 2039 } */ *uap; 2040 { 2041 struct stat sb; 2042 struct ostat osb; 2043 int error; 2044 2045 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2046 &sb, NULL); 2047 if (error != 0) 2048 return (error); 2049 cvtstat(&sb, &osb); 2050 return (copyout(&osb, uap->ub, sizeof (osb))); 2051 } 2052 2053 /* 2054 * Get file status; this version does not follow links. 2055 */ 2056 #ifndef _SYS_SYSPROTO_H_ 2057 struct olstat_args { 2058 char *path; 2059 struct ostat *ub; 2060 }; 2061 #endif 2062 int 2063 olstat(td, uap) 2064 struct thread *td; 2065 register struct olstat_args /* { 2066 char *path; 2067 struct ostat *ub; 2068 } */ *uap; 2069 { 2070 struct stat sb; 2071 struct ostat osb; 2072 int error; 2073 2074 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2075 UIO_USERSPACE, &sb, NULL); 2076 if (error != 0) 2077 return (error); 2078 cvtstat(&sb, &osb); 2079 return (copyout(&osb, uap->ub, sizeof (osb))); 2080 } 2081 2082 /* 2083 * Convert from an old to a new stat structure. 2084 */ 2085 void 2086 cvtstat(st, ost) 2087 struct stat *st; 2088 struct ostat *ost; 2089 { 2090 2091 bzero(ost, sizeof(*ost)); 2092 ost->st_dev = st->st_dev; 2093 ost->st_ino = st->st_ino; 2094 ost->st_mode = st->st_mode; 2095 ost->st_nlink = st->st_nlink; 2096 ost->st_uid = st->st_uid; 2097 ost->st_gid = st->st_gid; 2098 ost->st_rdev = st->st_rdev; 2099 if (st->st_size < (quad_t)1 << 32) 2100 ost->st_size = st->st_size; 2101 else 2102 ost->st_size = -2; 2103 ost->st_atim = st->st_atim; 2104 ost->st_mtim = st->st_mtim; 2105 ost->st_ctim = st->st_ctim; 2106 ost->st_blksize = st->st_blksize; 2107 ost->st_blocks = st->st_blocks; 2108 ost->st_flags = st->st_flags; 2109 ost->st_gen = st->st_gen; 2110 } 2111 #endif /* COMPAT_43 */ 2112 2113 /* 2114 * Get file status; this version follows links. 2115 */ 2116 #ifndef _SYS_SYSPROTO_H_ 2117 struct stat_args { 2118 char *path; 2119 struct stat *ub; 2120 }; 2121 #endif 2122 int 2123 sys_stat(td, uap) 2124 struct thread *td; 2125 register struct stat_args /* { 2126 char *path; 2127 struct stat *ub; 2128 } */ *uap; 2129 { 2130 struct stat sb; 2131 int error; 2132 2133 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2134 &sb, NULL); 2135 if (error == 0) 2136 error = copyout(&sb, uap->ub, sizeof (sb)); 2137 return (error); 2138 } 2139 2140 #ifndef _SYS_SYSPROTO_H_ 2141 struct fstatat_args { 2142 int fd; 2143 char *path; 2144 struct stat *buf; 2145 int flag; 2146 } 2147 #endif 2148 int 2149 sys_fstatat(struct thread *td, struct fstatat_args *uap) 2150 { 2151 struct stat sb; 2152 int error; 2153 2154 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2155 UIO_USERSPACE, &sb, NULL); 2156 if (error == 0) 2157 error = copyout(&sb, uap->buf, sizeof (sb)); 2158 return (error); 2159 } 2160 2161 int 2162 kern_statat(struct thread *td, int flag, int fd, char *path, 2163 enum uio_seg pathseg, struct stat *sbp, 2164 void (*hook)(struct vnode *vp, struct stat *sbp)) 2165 { 2166 struct nameidata nd; 2167 struct stat sb; 2168 cap_rights_t rights; 2169 int error; 2170 2171 if (flag & ~AT_SYMLINK_NOFOLLOW) 2172 return (EINVAL); 2173 2174 NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : 2175 FOLLOW) | LOCKSHARED | LOCKLEAF | AUDITVNODE1, pathseg, path, fd, 2176 cap_rights_init(&rights, CAP_FSTAT), td); 2177 2178 if ((error = namei(&nd)) != 0) 2179 return (error); 2180 error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td); 2181 if (error == 0) { 2182 SDT_PROBE2(vfs, , stat, mode, path, sb.st_mode); 2183 if (S_ISREG(sb.st_mode)) 2184 SDT_PROBE2(vfs, , stat, reg, path, pathseg); 2185 if (__predict_false(hook != NULL)) 2186 hook(nd.ni_vp, &sb); 2187 } 2188 NDFREE(&nd, NDF_ONLY_PNBUF); 2189 vput(nd.ni_vp); 2190 if (error != 0) 2191 return (error); 2192 *sbp = sb; 2193 #ifdef KTRACE 2194 if (KTRPOINT(td, KTR_STRUCT)) 2195 ktrstat(&sb); 2196 #endif 2197 return (0); 2198 } 2199 2200 /* 2201 * Get file status; this version does not follow links. 2202 */ 2203 #ifndef _SYS_SYSPROTO_H_ 2204 struct lstat_args { 2205 char *path; 2206 struct stat *ub; 2207 }; 2208 #endif 2209 int 2210 sys_lstat(td, uap) 2211 struct thread *td; 2212 register struct lstat_args /* { 2213 char *path; 2214 struct stat *ub; 2215 } */ *uap; 2216 { 2217 struct stat sb; 2218 int error; 2219 2220 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2221 UIO_USERSPACE, &sb, NULL); 2222 if (error == 0) 2223 error = copyout(&sb, uap->ub, sizeof (sb)); 2224 return (error); 2225 } 2226 2227 /* 2228 * Implementation of the NetBSD [l]stat() functions. 2229 */ 2230 void 2231 cvtnstat(sb, nsb) 2232 struct stat *sb; 2233 struct nstat *nsb; 2234 { 2235 2236 bzero(nsb, sizeof *nsb); 2237 nsb->st_dev = sb->st_dev; 2238 nsb->st_ino = sb->st_ino; 2239 nsb->st_mode = sb->st_mode; 2240 nsb->st_nlink = sb->st_nlink; 2241 nsb->st_uid = sb->st_uid; 2242 nsb->st_gid = sb->st_gid; 2243 nsb->st_rdev = sb->st_rdev; 2244 nsb->st_atim = sb->st_atim; 2245 nsb->st_mtim = sb->st_mtim; 2246 nsb->st_ctim = sb->st_ctim; 2247 nsb->st_size = sb->st_size; 2248 nsb->st_blocks = sb->st_blocks; 2249 nsb->st_blksize = sb->st_blksize; 2250 nsb->st_flags = sb->st_flags; 2251 nsb->st_gen = sb->st_gen; 2252 nsb->st_birthtim = sb->st_birthtim; 2253 } 2254 2255 #ifndef _SYS_SYSPROTO_H_ 2256 struct nstat_args { 2257 char *path; 2258 struct nstat *ub; 2259 }; 2260 #endif 2261 int 2262 sys_nstat(td, uap) 2263 struct thread *td; 2264 register struct nstat_args /* { 2265 char *path; 2266 struct nstat *ub; 2267 } */ *uap; 2268 { 2269 struct stat sb; 2270 struct nstat nsb; 2271 int error; 2272 2273 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2274 &sb, NULL); 2275 if (error != 0) 2276 return (error); 2277 cvtnstat(&sb, &nsb); 2278 return (copyout(&nsb, uap->ub, sizeof (nsb))); 2279 } 2280 2281 /* 2282 * NetBSD lstat. Get file status; this version does not follow links. 2283 */ 2284 #ifndef _SYS_SYSPROTO_H_ 2285 struct lstat_args { 2286 char *path; 2287 struct stat *ub; 2288 }; 2289 #endif 2290 int 2291 sys_nlstat(td, uap) 2292 struct thread *td; 2293 register struct nlstat_args /* { 2294 char *path; 2295 struct nstat *ub; 2296 } */ *uap; 2297 { 2298 struct stat sb; 2299 struct nstat nsb; 2300 int error; 2301 2302 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2303 UIO_USERSPACE, &sb, NULL); 2304 if (error != 0) 2305 return (error); 2306 cvtnstat(&sb, &nsb); 2307 return (copyout(&nsb, uap->ub, sizeof (nsb))); 2308 } 2309 2310 /* 2311 * Get configurable pathname variables. 2312 */ 2313 #ifndef _SYS_SYSPROTO_H_ 2314 struct pathconf_args { 2315 char *path; 2316 int name; 2317 }; 2318 #endif 2319 int 2320 sys_pathconf(td, uap) 2321 struct thread *td; 2322 register struct pathconf_args /* { 2323 char *path; 2324 int name; 2325 } */ *uap; 2326 { 2327 2328 return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW)); 2329 } 2330 2331 #ifndef _SYS_SYSPROTO_H_ 2332 struct lpathconf_args { 2333 char *path; 2334 int name; 2335 }; 2336 #endif 2337 int 2338 sys_lpathconf(td, uap) 2339 struct thread *td; 2340 register struct lpathconf_args /* { 2341 char *path; 2342 int name; 2343 } */ *uap; 2344 { 2345 2346 return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, 2347 NOFOLLOW)); 2348 } 2349 2350 int 2351 kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name, 2352 u_long flags) 2353 { 2354 struct nameidata nd; 2355 int error; 2356 2357 NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | AUDITVNODE1 | flags, 2358 pathseg, path, td); 2359 if ((error = namei(&nd)) != 0) 2360 return (error); 2361 NDFREE(&nd, NDF_ONLY_PNBUF); 2362 2363 error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval); 2364 vput(nd.ni_vp); 2365 return (error); 2366 } 2367 2368 /* 2369 * Return target name of a symbolic link. 2370 */ 2371 #ifndef _SYS_SYSPROTO_H_ 2372 struct readlink_args { 2373 char *path; 2374 char *buf; 2375 size_t count; 2376 }; 2377 #endif 2378 int 2379 sys_readlink(td, uap) 2380 struct thread *td; 2381 register struct readlink_args /* { 2382 char *path; 2383 char *buf; 2384 size_t count; 2385 } */ *uap; 2386 { 2387 2388 return (kern_readlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2389 uap->buf, UIO_USERSPACE, uap->count)); 2390 } 2391 #ifndef _SYS_SYSPROTO_H_ 2392 struct readlinkat_args { 2393 int fd; 2394 char *path; 2395 char *buf; 2396 size_t bufsize; 2397 }; 2398 #endif 2399 int 2400 sys_readlinkat(struct thread *td, struct readlinkat_args *uap) 2401 { 2402 2403 return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE, 2404 uap->buf, UIO_USERSPACE, uap->bufsize)); 2405 } 2406 2407 int 2408 kern_readlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2409 char *buf, enum uio_seg bufseg, size_t count) 2410 { 2411 struct vnode *vp; 2412 struct iovec aiov; 2413 struct uio auio; 2414 struct nameidata nd; 2415 int error; 2416 2417 if (count > IOSIZE_MAX) 2418 return (EINVAL); 2419 2420 NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 2421 pathseg, path, fd, td); 2422 2423 if ((error = namei(&nd)) != 0) 2424 return (error); 2425 NDFREE(&nd, NDF_ONLY_PNBUF); 2426 vp = nd.ni_vp; 2427 #ifdef MAC 2428 error = mac_vnode_check_readlink(td->td_ucred, vp); 2429 if (error != 0) { 2430 vput(vp); 2431 return (error); 2432 } 2433 #endif 2434 if (vp->v_type != VLNK) 2435 error = EINVAL; 2436 else { 2437 aiov.iov_base = buf; 2438 aiov.iov_len = count; 2439 auio.uio_iov = &aiov; 2440 auio.uio_iovcnt = 1; 2441 auio.uio_offset = 0; 2442 auio.uio_rw = UIO_READ; 2443 auio.uio_segflg = bufseg; 2444 auio.uio_td = td; 2445 auio.uio_resid = count; 2446 error = VOP_READLINK(vp, &auio, td->td_ucred); 2447 td->td_retval[0] = count - auio.uio_resid; 2448 } 2449 vput(vp); 2450 return (error); 2451 } 2452 2453 /* 2454 * Common implementation code for chflags() and fchflags(). 2455 */ 2456 static int 2457 setfflags(td, vp, flags) 2458 struct thread *td; 2459 struct vnode *vp; 2460 u_long flags; 2461 { 2462 struct mount *mp; 2463 struct vattr vattr; 2464 int error; 2465 2466 /* We can't support the value matching VNOVAL. */ 2467 if (flags == VNOVAL) 2468 return (EOPNOTSUPP); 2469 2470 /* 2471 * Prevent non-root users from setting flags on devices. When 2472 * a device is reused, users can retain ownership of the device 2473 * if they are allowed to set flags and programs assume that 2474 * chown can't fail when done as root. 2475 */ 2476 if (vp->v_type == VCHR || vp->v_type == VBLK) { 2477 error = priv_check(td, PRIV_VFS_CHFLAGS_DEV); 2478 if (error != 0) 2479 return (error); 2480 } 2481 2482 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2483 return (error); 2484 VATTR_NULL(&vattr); 2485 vattr.va_flags = flags; 2486 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2487 #ifdef MAC 2488 error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags); 2489 if (error == 0) 2490 #endif 2491 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 2492 VOP_UNLOCK(vp, 0); 2493 vn_finished_write(mp); 2494 return (error); 2495 } 2496 2497 /* 2498 * Change flags of a file given a path name. 2499 */ 2500 #ifndef _SYS_SYSPROTO_H_ 2501 struct chflags_args { 2502 const char *path; 2503 u_long flags; 2504 }; 2505 #endif 2506 int 2507 sys_chflags(td, uap) 2508 struct thread *td; 2509 register struct chflags_args /* { 2510 const char *path; 2511 u_long flags; 2512 } */ *uap; 2513 { 2514 2515 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2516 uap->flags, 0)); 2517 } 2518 2519 #ifndef _SYS_SYSPROTO_H_ 2520 struct chflagsat_args { 2521 int fd; 2522 const char *path; 2523 u_long flags; 2524 int atflag; 2525 } 2526 #endif 2527 int 2528 sys_chflagsat(struct thread *td, struct chflagsat_args *uap) 2529 { 2530 int fd = uap->fd; 2531 const char *path = uap->path; 2532 u_long flags = uap->flags; 2533 int atflag = uap->atflag; 2534 2535 if (atflag & ~AT_SYMLINK_NOFOLLOW) 2536 return (EINVAL); 2537 2538 return (kern_chflagsat(td, fd, path, UIO_USERSPACE, flags, atflag)); 2539 } 2540 2541 /* 2542 * Same as chflags() but doesn't follow symlinks. 2543 */ 2544 int 2545 sys_lchflags(td, uap) 2546 struct thread *td; 2547 register struct lchflags_args /* { 2548 const char *path; 2549 u_long flags; 2550 } */ *uap; 2551 { 2552 2553 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2554 uap->flags, AT_SYMLINK_NOFOLLOW)); 2555 } 2556 2557 static int 2558 kern_chflagsat(struct thread *td, int fd, const char *path, 2559 enum uio_seg pathseg, u_long flags, int atflag) 2560 { 2561 struct nameidata nd; 2562 cap_rights_t rights; 2563 int error, follow; 2564 2565 AUDIT_ARG_FFLAGS(flags); 2566 follow = (atflag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2567 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2568 cap_rights_init(&rights, CAP_FCHFLAGS), td); 2569 if ((error = namei(&nd)) != 0) 2570 return (error); 2571 NDFREE(&nd, NDF_ONLY_PNBUF); 2572 error = setfflags(td, nd.ni_vp, flags); 2573 vrele(nd.ni_vp); 2574 return (error); 2575 } 2576 2577 /* 2578 * Change flags of a file given a file descriptor. 2579 */ 2580 #ifndef _SYS_SYSPROTO_H_ 2581 struct fchflags_args { 2582 int fd; 2583 u_long flags; 2584 }; 2585 #endif 2586 int 2587 sys_fchflags(td, uap) 2588 struct thread *td; 2589 register struct fchflags_args /* { 2590 int fd; 2591 u_long flags; 2592 } */ *uap; 2593 { 2594 struct file *fp; 2595 cap_rights_t rights; 2596 int error; 2597 2598 AUDIT_ARG_FD(uap->fd); 2599 AUDIT_ARG_FFLAGS(uap->flags); 2600 error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_FCHFLAGS), 2601 &fp); 2602 if (error != 0) 2603 return (error); 2604 #ifdef AUDIT 2605 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 2606 AUDIT_ARG_VNODE1(fp->f_vnode); 2607 VOP_UNLOCK(fp->f_vnode, 0); 2608 #endif 2609 error = setfflags(td, fp->f_vnode, uap->flags); 2610 fdrop(fp, td); 2611 return (error); 2612 } 2613 2614 /* 2615 * Common implementation code for chmod(), lchmod() and fchmod(). 2616 */ 2617 int 2618 setfmode(td, cred, vp, mode) 2619 struct thread *td; 2620 struct ucred *cred; 2621 struct vnode *vp; 2622 int mode; 2623 { 2624 struct mount *mp; 2625 struct vattr vattr; 2626 int error; 2627 2628 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2629 return (error); 2630 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2631 VATTR_NULL(&vattr); 2632 vattr.va_mode = mode & ALLPERMS; 2633 #ifdef MAC 2634 error = mac_vnode_check_setmode(cred, vp, vattr.va_mode); 2635 if (error == 0) 2636 #endif 2637 error = VOP_SETATTR(vp, &vattr, cred); 2638 VOP_UNLOCK(vp, 0); 2639 vn_finished_write(mp); 2640 return (error); 2641 } 2642 2643 /* 2644 * Change mode of a file given path name. 2645 */ 2646 #ifndef _SYS_SYSPROTO_H_ 2647 struct chmod_args { 2648 char *path; 2649 int mode; 2650 }; 2651 #endif 2652 int 2653 sys_chmod(td, uap) 2654 struct thread *td; 2655 register struct chmod_args /* { 2656 char *path; 2657 int mode; 2658 } */ *uap; 2659 { 2660 2661 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2662 uap->mode, 0)); 2663 } 2664 2665 #ifndef _SYS_SYSPROTO_H_ 2666 struct fchmodat_args { 2667 int dirfd; 2668 char *path; 2669 mode_t mode; 2670 int flag; 2671 } 2672 #endif 2673 int 2674 sys_fchmodat(struct thread *td, struct fchmodat_args *uap) 2675 { 2676 int flag = uap->flag; 2677 int fd = uap->fd; 2678 char *path = uap->path; 2679 mode_t mode = uap->mode; 2680 2681 if (flag & ~AT_SYMLINK_NOFOLLOW) 2682 return (EINVAL); 2683 2684 return (kern_fchmodat(td, fd, path, UIO_USERSPACE, mode, flag)); 2685 } 2686 2687 /* 2688 * Change mode of a file given path name (don't follow links.) 2689 */ 2690 #ifndef _SYS_SYSPROTO_H_ 2691 struct lchmod_args { 2692 char *path; 2693 int mode; 2694 }; 2695 #endif 2696 int 2697 sys_lchmod(td, uap) 2698 struct thread *td; 2699 register struct lchmod_args /* { 2700 char *path; 2701 int mode; 2702 } */ *uap; 2703 { 2704 2705 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2706 uap->mode, AT_SYMLINK_NOFOLLOW)); 2707 } 2708 2709 int 2710 kern_fchmodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2711 mode_t mode, int flag) 2712 { 2713 struct nameidata nd; 2714 cap_rights_t rights; 2715 int error, follow; 2716 2717 AUDIT_ARG_MODE(mode); 2718 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2719 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2720 cap_rights_init(&rights, CAP_FCHMOD), td); 2721 if ((error = namei(&nd)) != 0) 2722 return (error); 2723 NDFREE(&nd, NDF_ONLY_PNBUF); 2724 error = setfmode(td, td->td_ucred, nd.ni_vp, mode); 2725 vrele(nd.ni_vp); 2726 return (error); 2727 } 2728 2729 /* 2730 * Change mode of a file given a file descriptor. 2731 */ 2732 #ifndef _SYS_SYSPROTO_H_ 2733 struct fchmod_args { 2734 int fd; 2735 int mode; 2736 }; 2737 #endif 2738 int 2739 sys_fchmod(struct thread *td, struct fchmod_args *uap) 2740 { 2741 struct file *fp; 2742 cap_rights_t rights; 2743 int error; 2744 2745 AUDIT_ARG_FD(uap->fd); 2746 AUDIT_ARG_MODE(uap->mode); 2747 2748 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHMOD), &fp); 2749 if (error != 0) 2750 return (error); 2751 error = fo_chmod(fp, uap->mode, td->td_ucred, td); 2752 fdrop(fp, td); 2753 return (error); 2754 } 2755 2756 /* 2757 * Common implementation for chown(), lchown(), and fchown() 2758 */ 2759 int 2760 setfown(td, cred, vp, uid, gid) 2761 struct thread *td; 2762 struct ucred *cred; 2763 struct vnode *vp; 2764 uid_t uid; 2765 gid_t gid; 2766 { 2767 struct mount *mp; 2768 struct vattr vattr; 2769 int error; 2770 2771 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2772 return (error); 2773 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2774 VATTR_NULL(&vattr); 2775 vattr.va_uid = uid; 2776 vattr.va_gid = gid; 2777 #ifdef MAC 2778 error = mac_vnode_check_setowner(cred, vp, vattr.va_uid, 2779 vattr.va_gid); 2780 if (error == 0) 2781 #endif 2782 error = VOP_SETATTR(vp, &vattr, cred); 2783 VOP_UNLOCK(vp, 0); 2784 vn_finished_write(mp); 2785 return (error); 2786 } 2787 2788 /* 2789 * Set ownership given a path name. 2790 */ 2791 #ifndef _SYS_SYSPROTO_H_ 2792 struct chown_args { 2793 char *path; 2794 int uid; 2795 int gid; 2796 }; 2797 #endif 2798 int 2799 sys_chown(td, uap) 2800 struct thread *td; 2801 register struct chown_args /* { 2802 char *path; 2803 int uid; 2804 int gid; 2805 } */ *uap; 2806 { 2807 2808 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->uid, 2809 uap->gid, 0)); 2810 } 2811 2812 #ifndef _SYS_SYSPROTO_H_ 2813 struct fchownat_args { 2814 int fd; 2815 const char * path; 2816 uid_t uid; 2817 gid_t gid; 2818 int flag; 2819 }; 2820 #endif 2821 int 2822 sys_fchownat(struct thread *td, struct fchownat_args *uap) 2823 { 2824 int flag; 2825 2826 flag = uap->flag; 2827 if (flag & ~AT_SYMLINK_NOFOLLOW) 2828 return (EINVAL); 2829 2830 return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid, 2831 uap->gid, uap->flag)); 2832 } 2833 2834 int 2835 kern_fchownat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2836 int uid, int gid, int flag) 2837 { 2838 struct nameidata nd; 2839 cap_rights_t rights; 2840 int error, follow; 2841 2842 AUDIT_ARG_OWNER(uid, gid); 2843 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2844 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2845 cap_rights_init(&rights, CAP_FCHOWN), td); 2846 2847 if ((error = namei(&nd)) != 0) 2848 return (error); 2849 NDFREE(&nd, NDF_ONLY_PNBUF); 2850 error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid); 2851 vrele(nd.ni_vp); 2852 return (error); 2853 } 2854 2855 /* 2856 * Set ownership given a path name, do not cross symlinks. 2857 */ 2858 #ifndef _SYS_SYSPROTO_H_ 2859 struct lchown_args { 2860 char *path; 2861 int uid; 2862 int gid; 2863 }; 2864 #endif 2865 int 2866 sys_lchown(td, uap) 2867 struct thread *td; 2868 register struct lchown_args /* { 2869 char *path; 2870 int uid; 2871 int gid; 2872 } */ *uap; 2873 { 2874 2875 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2876 uap->uid, uap->gid, AT_SYMLINK_NOFOLLOW)); 2877 } 2878 2879 /* 2880 * Set ownership given a file descriptor. 2881 */ 2882 #ifndef _SYS_SYSPROTO_H_ 2883 struct fchown_args { 2884 int fd; 2885 int uid; 2886 int gid; 2887 }; 2888 #endif 2889 int 2890 sys_fchown(td, uap) 2891 struct thread *td; 2892 register struct fchown_args /* { 2893 int fd; 2894 int uid; 2895 int gid; 2896 } */ *uap; 2897 { 2898 struct file *fp; 2899 cap_rights_t rights; 2900 int error; 2901 2902 AUDIT_ARG_FD(uap->fd); 2903 AUDIT_ARG_OWNER(uap->uid, uap->gid); 2904 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHOWN), &fp); 2905 if (error != 0) 2906 return (error); 2907 error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td); 2908 fdrop(fp, td); 2909 return (error); 2910 } 2911 2912 /* 2913 * Common implementation code for utimes(), lutimes(), and futimes(). 2914 */ 2915 static int 2916 getutimes(usrtvp, tvpseg, tsp) 2917 const struct timeval *usrtvp; 2918 enum uio_seg tvpseg; 2919 struct timespec *tsp; 2920 { 2921 struct timeval tv[2]; 2922 const struct timeval *tvp; 2923 int error; 2924 2925 if (usrtvp == NULL) { 2926 vfs_timestamp(&tsp[0]); 2927 tsp[1] = tsp[0]; 2928 } else { 2929 if (tvpseg == UIO_SYSSPACE) { 2930 tvp = usrtvp; 2931 } else { 2932 if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0) 2933 return (error); 2934 tvp = tv; 2935 } 2936 2937 if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 || 2938 tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000) 2939 return (EINVAL); 2940 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 2941 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 2942 } 2943 return (0); 2944 } 2945 2946 /* 2947 * Common implementation code for futimens(), utimensat(). 2948 */ 2949 #define UTIMENS_NULL 0x1 2950 #define UTIMENS_EXIT 0x2 2951 static int 2952 getutimens(const struct timespec *usrtsp, enum uio_seg tspseg, 2953 struct timespec *tsp, int *retflags) 2954 { 2955 struct timespec tsnow; 2956 int error; 2957 2958 vfs_timestamp(&tsnow); 2959 *retflags = 0; 2960 if (usrtsp == NULL) { 2961 tsp[0] = tsnow; 2962 tsp[1] = tsnow; 2963 *retflags |= UTIMENS_NULL; 2964 return (0); 2965 } 2966 if (tspseg == UIO_SYSSPACE) { 2967 tsp[0] = usrtsp[0]; 2968 tsp[1] = usrtsp[1]; 2969 } else if ((error = copyin(usrtsp, tsp, sizeof(*tsp) * 2)) != 0) 2970 return (error); 2971 if (tsp[0].tv_nsec == UTIME_OMIT && tsp[1].tv_nsec == UTIME_OMIT) 2972 *retflags |= UTIMENS_EXIT; 2973 if (tsp[0].tv_nsec == UTIME_NOW && tsp[1].tv_nsec == UTIME_NOW) 2974 *retflags |= UTIMENS_NULL; 2975 if (tsp[0].tv_nsec == UTIME_OMIT) 2976 tsp[0].tv_sec = VNOVAL; 2977 else if (tsp[0].tv_nsec == UTIME_NOW) 2978 tsp[0] = tsnow; 2979 else if (tsp[0].tv_nsec < 0 || tsp[0].tv_nsec >= 1000000000L) 2980 return (EINVAL); 2981 if (tsp[1].tv_nsec == UTIME_OMIT) 2982 tsp[1].tv_sec = VNOVAL; 2983 else if (tsp[1].tv_nsec == UTIME_NOW) 2984 tsp[1] = tsnow; 2985 else if (tsp[1].tv_nsec < 0 || tsp[1].tv_nsec >= 1000000000L) 2986 return (EINVAL); 2987 2988 return (0); 2989 } 2990 2991 /* 2992 * Common implementation code for utimes(), lutimes(), futimes(), futimens(), 2993 * and utimensat(). 2994 */ 2995 static int 2996 setutimes(td, vp, ts, numtimes, nullflag) 2997 struct thread *td; 2998 struct vnode *vp; 2999 const struct timespec *ts; 3000 int numtimes; 3001 int nullflag; 3002 { 3003 struct mount *mp; 3004 struct vattr vattr; 3005 int error, setbirthtime; 3006 3007 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 3008 return (error); 3009 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3010 setbirthtime = 0; 3011 if (numtimes < 3 && !VOP_GETATTR(vp, &vattr, td->td_ucred) && 3012 timespeccmp(&ts[1], &vattr.va_birthtime, < )) 3013 setbirthtime = 1; 3014 VATTR_NULL(&vattr); 3015 vattr.va_atime = ts[0]; 3016 vattr.va_mtime = ts[1]; 3017 if (setbirthtime) 3018 vattr.va_birthtime = ts[1]; 3019 if (numtimes > 2) 3020 vattr.va_birthtime = ts[2]; 3021 if (nullflag) 3022 vattr.va_vaflags |= VA_UTIMES_NULL; 3023 #ifdef MAC 3024 error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime, 3025 vattr.va_mtime); 3026 #endif 3027 if (error == 0) 3028 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3029 VOP_UNLOCK(vp, 0); 3030 vn_finished_write(mp); 3031 return (error); 3032 } 3033 3034 /* 3035 * Set the access and modification times of a file. 3036 */ 3037 #ifndef _SYS_SYSPROTO_H_ 3038 struct utimes_args { 3039 char *path; 3040 struct timeval *tptr; 3041 }; 3042 #endif 3043 int 3044 sys_utimes(td, uap) 3045 struct thread *td; 3046 register struct utimes_args /* { 3047 char *path; 3048 struct timeval *tptr; 3049 } */ *uap; 3050 { 3051 3052 return (kern_utimesat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3053 uap->tptr, UIO_USERSPACE)); 3054 } 3055 3056 #ifndef _SYS_SYSPROTO_H_ 3057 struct futimesat_args { 3058 int fd; 3059 const char * path; 3060 const struct timeval * times; 3061 }; 3062 #endif 3063 int 3064 sys_futimesat(struct thread *td, struct futimesat_args *uap) 3065 { 3066 3067 return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE, 3068 uap->times, UIO_USERSPACE)); 3069 } 3070 3071 int 3072 kern_utimesat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 3073 struct timeval *tptr, enum uio_seg tptrseg) 3074 { 3075 struct nameidata nd; 3076 struct timespec ts[2]; 3077 cap_rights_t rights; 3078 int error; 3079 3080 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3081 return (error); 3082 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 3083 cap_rights_init(&rights, CAP_FUTIMES), td); 3084 3085 if ((error = namei(&nd)) != 0) 3086 return (error); 3087 NDFREE(&nd, NDF_ONLY_PNBUF); 3088 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3089 vrele(nd.ni_vp); 3090 return (error); 3091 } 3092 3093 /* 3094 * Set the access and modification times of a file. 3095 */ 3096 #ifndef _SYS_SYSPROTO_H_ 3097 struct lutimes_args { 3098 char *path; 3099 struct timeval *tptr; 3100 }; 3101 #endif 3102 int 3103 sys_lutimes(td, uap) 3104 struct thread *td; 3105 register struct lutimes_args /* { 3106 char *path; 3107 struct timeval *tptr; 3108 } */ *uap; 3109 { 3110 3111 return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr, 3112 UIO_USERSPACE)); 3113 } 3114 3115 int 3116 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg, 3117 struct timeval *tptr, enum uio_seg tptrseg) 3118 { 3119 struct timespec ts[2]; 3120 struct nameidata nd; 3121 int error; 3122 3123 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3124 return (error); 3125 NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, pathseg, path, td); 3126 if ((error = namei(&nd)) != 0) 3127 return (error); 3128 NDFREE(&nd, NDF_ONLY_PNBUF); 3129 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3130 vrele(nd.ni_vp); 3131 return (error); 3132 } 3133 3134 /* 3135 * Set the access and modification times of a file. 3136 */ 3137 #ifndef _SYS_SYSPROTO_H_ 3138 struct futimes_args { 3139 int fd; 3140 struct timeval *tptr; 3141 }; 3142 #endif 3143 int 3144 sys_futimes(td, uap) 3145 struct thread *td; 3146 register struct futimes_args /* { 3147 int fd; 3148 struct timeval *tptr; 3149 } */ *uap; 3150 { 3151 3152 return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE)); 3153 } 3154 3155 int 3156 kern_futimes(struct thread *td, int fd, struct timeval *tptr, 3157 enum uio_seg tptrseg) 3158 { 3159 struct timespec ts[2]; 3160 struct file *fp; 3161 cap_rights_t rights; 3162 int error; 3163 3164 AUDIT_ARG_FD(fd); 3165 error = getutimes(tptr, tptrseg, ts); 3166 if (error != 0) 3167 return (error); 3168 error = getvnode(td, fd, cap_rights_init(&rights, CAP_FUTIMES), &fp); 3169 if (error != 0) 3170 return (error); 3171 #ifdef AUDIT 3172 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3173 AUDIT_ARG_VNODE1(fp->f_vnode); 3174 VOP_UNLOCK(fp->f_vnode, 0); 3175 #endif 3176 error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL); 3177 fdrop(fp, td); 3178 return (error); 3179 } 3180 3181 int 3182 sys_futimens(struct thread *td, struct futimens_args *uap) 3183 { 3184 3185 return (kern_futimens(td, uap->fd, uap->times, UIO_USERSPACE)); 3186 } 3187 3188 int 3189 kern_futimens(struct thread *td, int fd, struct timespec *tptr, 3190 enum uio_seg tptrseg) 3191 { 3192 struct timespec ts[2]; 3193 struct file *fp; 3194 cap_rights_t rights; 3195 int error, flags; 3196 3197 AUDIT_ARG_FD(fd); 3198 error = getutimens(tptr, tptrseg, ts, &flags); 3199 if (error != 0) 3200 return (error); 3201 if (flags & UTIMENS_EXIT) 3202 return (0); 3203 error = getvnode(td, fd, cap_rights_init(&rights, CAP_FUTIMES), &fp); 3204 if (error != 0) 3205 return (error); 3206 #ifdef AUDIT 3207 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3208 AUDIT_ARG_VNODE1(fp->f_vnode); 3209 VOP_UNLOCK(fp->f_vnode, 0); 3210 #endif 3211 error = setutimes(td, fp->f_vnode, ts, 2, flags & UTIMENS_NULL); 3212 fdrop(fp, td); 3213 return (error); 3214 } 3215 3216 int 3217 sys_utimensat(struct thread *td, struct utimensat_args *uap) 3218 { 3219 3220 return (kern_utimensat(td, uap->fd, uap->path, UIO_USERSPACE, 3221 uap->times, UIO_USERSPACE, uap->flag)); 3222 } 3223 3224 int 3225 kern_utimensat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 3226 struct timespec *tptr, enum uio_seg tptrseg, int flag) 3227 { 3228 struct nameidata nd; 3229 struct timespec ts[2]; 3230 cap_rights_t rights; 3231 int error, flags; 3232 3233 if (flag & ~AT_SYMLINK_NOFOLLOW) 3234 return (EINVAL); 3235 3236 if ((error = getutimens(tptr, tptrseg, ts, &flags)) != 0) 3237 return (error); 3238 NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : 3239 FOLLOW) | AUDITVNODE1, pathseg, path, fd, 3240 cap_rights_init(&rights, CAP_FUTIMES), td); 3241 if ((error = namei(&nd)) != 0) 3242 return (error); 3243 /* 3244 * We are allowed to call namei() regardless of 2xUTIME_OMIT. 3245 * POSIX states: 3246 * "If both tv_nsec fields are UTIME_OMIT... EACCESS may be detected." 3247 * "Search permission is denied by a component of the path prefix." 3248 */ 3249 NDFREE(&nd, NDF_ONLY_PNBUF); 3250 if ((flags & UTIMENS_EXIT) == 0) 3251 error = setutimes(td, nd.ni_vp, ts, 2, flags & UTIMENS_NULL); 3252 vrele(nd.ni_vp); 3253 return (error); 3254 } 3255 3256 /* 3257 * Truncate a file given its path name. 3258 */ 3259 #ifndef _SYS_SYSPROTO_H_ 3260 struct truncate_args { 3261 char *path; 3262 int pad; 3263 off_t length; 3264 }; 3265 #endif 3266 int 3267 sys_truncate(td, uap) 3268 struct thread *td; 3269 register struct truncate_args /* { 3270 char *path; 3271 int pad; 3272 off_t length; 3273 } */ *uap; 3274 { 3275 3276 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3277 } 3278 3279 int 3280 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length) 3281 { 3282 struct mount *mp; 3283 struct vnode *vp; 3284 void *rl_cookie; 3285 struct vattr vattr; 3286 struct nameidata nd; 3287 int error; 3288 3289 if (length < 0) 3290 return(EINVAL); 3291 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, td); 3292 if ((error = namei(&nd)) != 0) 3293 return (error); 3294 vp = nd.ni_vp; 3295 rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); 3296 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { 3297 vn_rangelock_unlock(vp, rl_cookie); 3298 vrele(vp); 3299 return (error); 3300 } 3301 NDFREE(&nd, NDF_ONLY_PNBUF); 3302 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3303 if (vp->v_type == VDIR) 3304 error = EISDIR; 3305 #ifdef MAC 3306 else if ((error = mac_vnode_check_write(td->td_ucred, NOCRED, vp))) { 3307 } 3308 #endif 3309 else if ((error = vn_writechk(vp)) == 0 && 3310 (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) { 3311 VATTR_NULL(&vattr); 3312 vattr.va_size = length; 3313 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3314 } 3315 VOP_UNLOCK(vp, 0); 3316 vn_finished_write(mp); 3317 vn_rangelock_unlock(vp, rl_cookie); 3318 vrele(vp); 3319 return (error); 3320 } 3321 3322 #if defined(COMPAT_43) 3323 /* 3324 * Truncate a file given its path name. 3325 */ 3326 #ifndef _SYS_SYSPROTO_H_ 3327 struct otruncate_args { 3328 char *path; 3329 long length; 3330 }; 3331 #endif 3332 int 3333 otruncate(td, uap) 3334 struct thread *td; 3335 register struct otruncate_args /* { 3336 char *path; 3337 long length; 3338 } */ *uap; 3339 { 3340 struct truncate_args /* { 3341 char *path; 3342 int pad; 3343 off_t length; 3344 } */ nuap; 3345 3346 nuap.path = uap->path; 3347 nuap.length = uap->length; 3348 return (sys_truncate(td, &nuap)); 3349 } 3350 #endif /* COMPAT_43 */ 3351 3352 #if defined(COMPAT_FREEBSD6) 3353 /* Versions with the pad argument */ 3354 int 3355 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap) 3356 { 3357 struct truncate_args ouap; 3358 3359 ouap.path = uap->path; 3360 ouap.length = uap->length; 3361 return (sys_truncate(td, &ouap)); 3362 } 3363 3364 int 3365 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap) 3366 { 3367 struct ftruncate_args ouap; 3368 3369 ouap.fd = uap->fd; 3370 ouap.length = uap->length; 3371 return (sys_ftruncate(td, &ouap)); 3372 } 3373 #endif 3374 3375 int 3376 kern_fsync(struct thread *td, int fd, bool fullsync) 3377 { 3378 struct vnode *vp; 3379 struct mount *mp; 3380 struct file *fp; 3381 cap_rights_t rights; 3382 int error, lock_flags; 3383 3384 AUDIT_ARG_FD(fd); 3385 error = getvnode(td, fd, cap_rights_init(&rights, CAP_FSYNC), &fp); 3386 if (error != 0) 3387 return (error); 3388 vp = fp->f_vnode; 3389 #if 0 3390 if (!fullsync) 3391 /* XXXKIB: compete outstanding aio writes */; 3392 #endif 3393 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 3394 if (error != 0) 3395 goto drop; 3396 if (MNT_SHARED_WRITES(mp) || 3397 ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) { 3398 lock_flags = LK_SHARED; 3399 } else { 3400 lock_flags = LK_EXCLUSIVE; 3401 } 3402 vn_lock(vp, lock_flags | LK_RETRY); 3403 AUDIT_ARG_VNODE1(vp); 3404 if (vp->v_object != NULL) { 3405 VM_OBJECT_WLOCK(vp->v_object); 3406 vm_object_page_clean(vp->v_object, 0, 0, 0); 3407 VM_OBJECT_WUNLOCK(vp->v_object); 3408 } 3409 error = fullsync ? VOP_FSYNC(vp, MNT_WAIT, td) : VOP_FDATASYNC(vp, td); 3410 VOP_UNLOCK(vp, 0); 3411 vn_finished_write(mp); 3412 drop: 3413 fdrop(fp, td); 3414 return (error); 3415 } 3416 3417 /* 3418 * Sync an open file. 3419 */ 3420 #ifndef _SYS_SYSPROTO_H_ 3421 struct fsync_args { 3422 int fd; 3423 }; 3424 #endif 3425 int 3426 sys_fsync(struct thread *td, struct fsync_args *uap) 3427 { 3428 3429 return (kern_fsync(td, uap->fd, true)); 3430 } 3431 3432 int 3433 sys_fdatasync(struct thread *td, struct fdatasync_args *uap) 3434 { 3435 3436 return (kern_fsync(td, uap->fd, false)); 3437 } 3438 3439 /* 3440 * Rename files. Source and destination must either both be directories, or 3441 * both not be directories. If target is a directory, it must be empty. 3442 */ 3443 #ifndef _SYS_SYSPROTO_H_ 3444 struct rename_args { 3445 char *from; 3446 char *to; 3447 }; 3448 #endif 3449 int 3450 sys_rename(td, uap) 3451 struct thread *td; 3452 register struct rename_args /* { 3453 char *from; 3454 char *to; 3455 } */ *uap; 3456 { 3457 3458 return (kern_renameat(td, AT_FDCWD, uap->from, AT_FDCWD, 3459 uap->to, UIO_USERSPACE)); 3460 } 3461 3462 #ifndef _SYS_SYSPROTO_H_ 3463 struct renameat_args { 3464 int oldfd; 3465 char *old; 3466 int newfd; 3467 char *new; 3468 }; 3469 #endif 3470 int 3471 sys_renameat(struct thread *td, struct renameat_args *uap) 3472 { 3473 3474 return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new, 3475 UIO_USERSPACE)); 3476 } 3477 3478 int 3479 kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new, 3480 enum uio_seg pathseg) 3481 { 3482 struct mount *mp = NULL; 3483 struct vnode *tvp, *fvp, *tdvp; 3484 struct nameidata fromnd, tond; 3485 cap_rights_t rights; 3486 int error; 3487 3488 again: 3489 bwillwrite(); 3490 #ifdef MAC 3491 NDINIT_ATRIGHTS(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | 3492 AUDITVNODE1, pathseg, old, oldfd, 3493 cap_rights_init(&rights, CAP_RENAMEAT_SOURCE), td); 3494 #else 3495 NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | AUDITVNODE1, 3496 pathseg, old, oldfd, 3497 cap_rights_init(&rights, CAP_RENAMEAT_SOURCE), td); 3498 #endif 3499 3500 if ((error = namei(&fromnd)) != 0) 3501 return (error); 3502 #ifdef MAC 3503 error = mac_vnode_check_rename_from(td->td_ucred, fromnd.ni_dvp, 3504 fromnd.ni_vp, &fromnd.ni_cnd); 3505 VOP_UNLOCK(fromnd.ni_dvp, 0); 3506 if (fromnd.ni_dvp != fromnd.ni_vp) 3507 VOP_UNLOCK(fromnd.ni_vp, 0); 3508 #endif 3509 fvp = fromnd.ni_vp; 3510 NDINIT_ATRIGHTS(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | 3511 SAVESTART | AUDITVNODE2, pathseg, new, newfd, 3512 cap_rights_init(&rights, CAP_RENAMEAT_TARGET), td); 3513 if (fromnd.ni_vp->v_type == VDIR) 3514 tond.ni_cnd.cn_flags |= WILLBEDIR; 3515 if ((error = namei(&tond)) != 0) { 3516 /* Translate error code for rename("dir1", "dir2/."). */ 3517 if (error == EISDIR && fvp->v_type == VDIR) 3518 error = EINVAL; 3519 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3520 vrele(fromnd.ni_dvp); 3521 vrele(fvp); 3522 goto out1; 3523 } 3524 tdvp = tond.ni_dvp; 3525 tvp = tond.ni_vp; 3526 error = vn_start_write(fvp, &mp, V_NOWAIT); 3527 if (error != 0) { 3528 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3529 NDFREE(&tond, NDF_ONLY_PNBUF); 3530 if (tvp != NULL) 3531 vput(tvp); 3532 if (tdvp == tvp) 3533 vrele(tdvp); 3534 else 3535 vput(tdvp); 3536 vrele(fromnd.ni_dvp); 3537 vrele(fvp); 3538 vrele(tond.ni_startdir); 3539 if (fromnd.ni_startdir != NULL) 3540 vrele(fromnd.ni_startdir); 3541 error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH); 3542 if (error != 0) 3543 return (error); 3544 goto again; 3545 } 3546 if (tvp != NULL) { 3547 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3548 error = ENOTDIR; 3549 goto out; 3550 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3551 error = EISDIR; 3552 goto out; 3553 } 3554 #ifdef CAPABILITIES 3555 if (newfd != AT_FDCWD) { 3556 /* 3557 * If the target already exists we require CAP_UNLINKAT 3558 * from 'newfd'. 3559 */ 3560 error = cap_check(&tond.ni_filecaps.fc_rights, 3561 cap_rights_init(&rights, CAP_UNLINKAT)); 3562 if (error != 0) 3563 goto out; 3564 } 3565 #endif 3566 } 3567 if (fvp == tdvp) { 3568 error = EINVAL; 3569 goto out; 3570 } 3571 /* 3572 * If the source is the same as the destination (that is, if they 3573 * are links to the same vnode), then there is nothing to do. 3574 */ 3575 if (fvp == tvp) 3576 error = -1; 3577 #ifdef MAC 3578 else 3579 error = mac_vnode_check_rename_to(td->td_ucred, tdvp, 3580 tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd); 3581 #endif 3582 out: 3583 if (error == 0) { 3584 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3585 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); 3586 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3587 NDFREE(&tond, NDF_ONLY_PNBUF); 3588 } else { 3589 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3590 NDFREE(&tond, NDF_ONLY_PNBUF); 3591 if (tvp != NULL) 3592 vput(tvp); 3593 if (tdvp == tvp) 3594 vrele(tdvp); 3595 else 3596 vput(tdvp); 3597 vrele(fromnd.ni_dvp); 3598 vrele(fvp); 3599 } 3600 vrele(tond.ni_startdir); 3601 vn_finished_write(mp); 3602 out1: 3603 if (fromnd.ni_startdir) 3604 vrele(fromnd.ni_startdir); 3605 if (error == -1) 3606 return (0); 3607 return (error); 3608 } 3609 3610 /* 3611 * Make a directory file. 3612 */ 3613 #ifndef _SYS_SYSPROTO_H_ 3614 struct mkdir_args { 3615 char *path; 3616 int mode; 3617 }; 3618 #endif 3619 int 3620 sys_mkdir(td, uap) 3621 struct thread *td; 3622 register struct mkdir_args /* { 3623 char *path; 3624 int mode; 3625 } */ *uap; 3626 { 3627 3628 return (kern_mkdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3629 uap->mode)); 3630 } 3631 3632 #ifndef _SYS_SYSPROTO_H_ 3633 struct mkdirat_args { 3634 int fd; 3635 char *path; 3636 mode_t mode; 3637 }; 3638 #endif 3639 int 3640 sys_mkdirat(struct thread *td, struct mkdirat_args *uap) 3641 { 3642 3643 return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode)); 3644 } 3645 3646 int 3647 kern_mkdirat(struct thread *td, int fd, char *path, enum uio_seg segflg, 3648 int mode) 3649 { 3650 struct mount *mp; 3651 struct vnode *vp; 3652 struct vattr vattr; 3653 struct nameidata nd; 3654 cap_rights_t rights; 3655 int error; 3656 3657 AUDIT_ARG_MODE(mode); 3658 restart: 3659 bwillwrite(); 3660 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 3661 NOCACHE, segflg, path, fd, cap_rights_init(&rights, CAP_MKDIRAT), 3662 td); 3663 nd.ni_cnd.cn_flags |= WILLBEDIR; 3664 if ((error = namei(&nd)) != 0) 3665 return (error); 3666 vp = nd.ni_vp; 3667 if (vp != NULL) { 3668 NDFREE(&nd, NDF_ONLY_PNBUF); 3669 /* 3670 * XXX namei called with LOCKPARENT but not LOCKLEAF has 3671 * the strange behaviour of leaving the vnode unlocked 3672 * if the target is the same vnode as the parent. 3673 */ 3674 if (vp == nd.ni_dvp) 3675 vrele(nd.ni_dvp); 3676 else 3677 vput(nd.ni_dvp); 3678 vrele(vp); 3679 return (EEXIST); 3680 } 3681 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3682 NDFREE(&nd, NDF_ONLY_PNBUF); 3683 vput(nd.ni_dvp); 3684 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3685 return (error); 3686 goto restart; 3687 } 3688 VATTR_NULL(&vattr); 3689 vattr.va_type = VDIR; 3690 vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask; 3691 #ifdef MAC 3692 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 3693 &vattr); 3694 if (error != 0) 3695 goto out; 3696 #endif 3697 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 3698 #ifdef MAC 3699 out: 3700 #endif 3701 NDFREE(&nd, NDF_ONLY_PNBUF); 3702 vput(nd.ni_dvp); 3703 if (error == 0) 3704 vput(nd.ni_vp); 3705 vn_finished_write(mp); 3706 return (error); 3707 } 3708 3709 /* 3710 * Remove a directory file. 3711 */ 3712 #ifndef _SYS_SYSPROTO_H_ 3713 struct rmdir_args { 3714 char *path; 3715 }; 3716 #endif 3717 int 3718 sys_rmdir(td, uap) 3719 struct thread *td; 3720 struct rmdir_args /* { 3721 char *path; 3722 } */ *uap; 3723 { 3724 3725 return (kern_rmdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE)); 3726 } 3727 3728 int 3729 kern_rmdirat(struct thread *td, int fd, char *path, enum uio_seg pathseg) 3730 { 3731 struct mount *mp; 3732 struct vnode *vp; 3733 struct nameidata nd; 3734 cap_rights_t rights; 3735 int error; 3736 3737 restart: 3738 bwillwrite(); 3739 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 3740 pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td); 3741 if ((error = namei(&nd)) != 0) 3742 return (error); 3743 vp = nd.ni_vp; 3744 if (vp->v_type != VDIR) { 3745 error = ENOTDIR; 3746 goto out; 3747 } 3748 /* 3749 * No rmdir "." please. 3750 */ 3751 if (nd.ni_dvp == vp) { 3752 error = EINVAL; 3753 goto out; 3754 } 3755 /* 3756 * The root of a mounted filesystem cannot be deleted. 3757 */ 3758 if (vp->v_vflag & VV_ROOT) { 3759 error = EBUSY; 3760 goto out; 3761 } 3762 #ifdef MAC 3763 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 3764 &nd.ni_cnd); 3765 if (error != 0) 3766 goto out; 3767 #endif 3768 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3769 NDFREE(&nd, NDF_ONLY_PNBUF); 3770 vput(vp); 3771 if (nd.ni_dvp == vp) 3772 vrele(nd.ni_dvp); 3773 else 3774 vput(nd.ni_dvp); 3775 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3776 return (error); 3777 goto restart; 3778 } 3779 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 3780 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 3781 vn_finished_write(mp); 3782 out: 3783 NDFREE(&nd, NDF_ONLY_PNBUF); 3784 vput(vp); 3785 if (nd.ni_dvp == vp) 3786 vrele(nd.ni_dvp); 3787 else 3788 vput(nd.ni_dvp); 3789 return (error); 3790 } 3791 3792 #ifdef COMPAT_43 3793 /* 3794 * Read a block of directory entries in a filesystem independent format. 3795 */ 3796 #ifndef _SYS_SYSPROTO_H_ 3797 struct ogetdirentries_args { 3798 int fd; 3799 char *buf; 3800 u_int count; 3801 long *basep; 3802 }; 3803 #endif 3804 int 3805 ogetdirentries(struct thread *td, struct ogetdirentries_args *uap) 3806 { 3807 long loff; 3808 int error; 3809 3810 error = kern_ogetdirentries(td, uap, &loff); 3811 if (error == 0) 3812 error = copyout(&loff, uap->basep, sizeof(long)); 3813 return (error); 3814 } 3815 3816 int 3817 kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap, 3818 long *ploff) 3819 { 3820 struct vnode *vp; 3821 struct file *fp; 3822 struct uio auio, kuio; 3823 struct iovec aiov, kiov; 3824 struct dirent *dp, *edp; 3825 cap_rights_t rights; 3826 caddr_t dirbuf; 3827 int error, eofflag, readcnt; 3828 long loff; 3829 off_t foffset; 3830 3831 /* XXX arbitrary sanity limit on `count'. */ 3832 if (uap->count > 64 * 1024) 3833 return (EINVAL); 3834 error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_READ), &fp); 3835 if (error != 0) 3836 return (error); 3837 if ((fp->f_flag & FREAD) == 0) { 3838 fdrop(fp, td); 3839 return (EBADF); 3840 } 3841 vp = fp->f_vnode; 3842 foffset = foffset_lock(fp, 0); 3843 unionread: 3844 if (vp->v_type != VDIR) { 3845 foffset_unlock(fp, foffset, 0); 3846 fdrop(fp, td); 3847 return (EINVAL); 3848 } 3849 aiov.iov_base = uap->buf; 3850 aiov.iov_len = uap->count; 3851 auio.uio_iov = &aiov; 3852 auio.uio_iovcnt = 1; 3853 auio.uio_rw = UIO_READ; 3854 auio.uio_segflg = UIO_USERSPACE; 3855 auio.uio_td = td; 3856 auio.uio_resid = uap->count; 3857 vn_lock(vp, LK_SHARED | LK_RETRY); 3858 loff = auio.uio_offset = foffset; 3859 #ifdef MAC 3860 error = mac_vnode_check_readdir(td->td_ucred, vp); 3861 if (error != 0) { 3862 VOP_UNLOCK(vp, 0); 3863 foffset_unlock(fp, foffset, FOF_NOUPDATE); 3864 fdrop(fp, td); 3865 return (error); 3866 } 3867 #endif 3868 # if (BYTE_ORDER != LITTLE_ENDIAN) 3869 if (vp->v_mount->mnt_maxsymlinklen <= 0) { 3870 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, 3871 NULL, NULL); 3872 foffset = auio.uio_offset; 3873 } else 3874 # endif 3875 { 3876 kuio = auio; 3877 kuio.uio_iov = &kiov; 3878 kuio.uio_segflg = UIO_SYSSPACE; 3879 kiov.iov_len = uap->count; 3880 dirbuf = malloc(uap->count, M_TEMP, M_WAITOK); 3881 kiov.iov_base = dirbuf; 3882 error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag, 3883 NULL, NULL); 3884 foffset = kuio.uio_offset; 3885 if (error == 0) { 3886 readcnt = uap->count - kuio.uio_resid; 3887 edp = (struct dirent *)&dirbuf[readcnt]; 3888 for (dp = (struct dirent *)dirbuf; dp < edp; ) { 3889 # if (BYTE_ORDER == LITTLE_ENDIAN) 3890 /* 3891 * The expected low byte of 3892 * dp->d_namlen is our dp->d_type. 3893 * The high MBZ byte of dp->d_namlen 3894 * is our dp->d_namlen. 3895 */ 3896 dp->d_type = dp->d_namlen; 3897 dp->d_namlen = 0; 3898 # else 3899 /* 3900 * The dp->d_type is the high byte 3901 * of the expected dp->d_namlen, 3902 * so must be zero'ed. 3903 */ 3904 dp->d_type = 0; 3905 # endif 3906 if (dp->d_reclen > 0) { 3907 dp = (struct dirent *) 3908 ((char *)dp + dp->d_reclen); 3909 } else { 3910 error = EIO; 3911 break; 3912 } 3913 } 3914 if (dp >= edp) 3915 error = uiomove(dirbuf, readcnt, &auio); 3916 } 3917 free(dirbuf, M_TEMP); 3918 } 3919 if (error != 0) { 3920 VOP_UNLOCK(vp, 0); 3921 foffset_unlock(fp, foffset, 0); 3922 fdrop(fp, td); 3923 return (error); 3924 } 3925 if (uap->count == auio.uio_resid && 3926 (vp->v_vflag & VV_ROOT) && 3927 (vp->v_mount->mnt_flag & MNT_UNION)) { 3928 struct vnode *tvp = vp; 3929 vp = vp->v_mount->mnt_vnodecovered; 3930 VREF(vp); 3931 fp->f_vnode = vp; 3932 fp->f_data = vp; 3933 foffset = 0; 3934 vput(tvp); 3935 goto unionread; 3936 } 3937 VOP_UNLOCK(vp, 0); 3938 foffset_unlock(fp, foffset, 0); 3939 fdrop(fp, td); 3940 td->td_retval[0] = uap->count - auio.uio_resid; 3941 if (error == 0) 3942 *ploff = loff; 3943 return (error); 3944 } 3945 #endif /* COMPAT_43 */ 3946 3947 /* 3948 * Read a block of directory entries in a filesystem independent format. 3949 */ 3950 #ifndef _SYS_SYSPROTO_H_ 3951 struct getdirentries_args { 3952 int fd; 3953 char *buf; 3954 u_int count; 3955 long *basep; 3956 }; 3957 #endif 3958 int 3959 sys_getdirentries(td, uap) 3960 struct thread *td; 3961 register struct getdirentries_args /* { 3962 int fd; 3963 char *buf; 3964 u_int count; 3965 long *basep; 3966 } */ *uap; 3967 { 3968 long base; 3969 int error; 3970 3971 error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base, 3972 NULL, UIO_USERSPACE); 3973 if (error != 0) 3974 return (error); 3975 if (uap->basep != NULL) 3976 error = copyout(&base, uap->basep, sizeof(long)); 3977 return (error); 3978 } 3979 3980 int 3981 kern_getdirentries(struct thread *td, int fd, char *buf, u_int count, 3982 long *basep, ssize_t *residp, enum uio_seg bufseg) 3983 { 3984 struct vnode *vp; 3985 struct file *fp; 3986 struct uio auio; 3987 struct iovec aiov; 3988 cap_rights_t rights; 3989 long loff; 3990 int error, eofflag; 3991 off_t foffset; 3992 3993 AUDIT_ARG_FD(fd); 3994 if (count > IOSIZE_MAX) 3995 return (EINVAL); 3996 auio.uio_resid = count; 3997 error = getvnode(td, fd, cap_rights_init(&rights, CAP_READ), &fp); 3998 if (error != 0) 3999 return (error); 4000 if ((fp->f_flag & FREAD) == 0) { 4001 fdrop(fp, td); 4002 return (EBADF); 4003 } 4004 vp = fp->f_vnode; 4005 foffset = foffset_lock(fp, 0); 4006 unionread: 4007 if (vp->v_type != VDIR) { 4008 error = EINVAL; 4009 goto fail; 4010 } 4011 aiov.iov_base = buf; 4012 aiov.iov_len = count; 4013 auio.uio_iov = &aiov; 4014 auio.uio_iovcnt = 1; 4015 auio.uio_rw = UIO_READ; 4016 auio.uio_segflg = bufseg; 4017 auio.uio_td = td; 4018 vn_lock(vp, LK_SHARED | LK_RETRY); 4019 AUDIT_ARG_VNODE1(vp); 4020 loff = auio.uio_offset = foffset; 4021 #ifdef MAC 4022 error = mac_vnode_check_readdir(td->td_ucred, vp); 4023 if (error == 0) 4024 #endif 4025 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, 4026 NULL); 4027 foffset = auio.uio_offset; 4028 if (error != 0) { 4029 VOP_UNLOCK(vp, 0); 4030 goto fail; 4031 } 4032 if (count == auio.uio_resid && 4033 (vp->v_vflag & VV_ROOT) && 4034 (vp->v_mount->mnt_flag & MNT_UNION)) { 4035 struct vnode *tvp = vp; 4036 4037 vp = vp->v_mount->mnt_vnodecovered; 4038 VREF(vp); 4039 fp->f_vnode = vp; 4040 fp->f_data = vp; 4041 foffset = 0; 4042 vput(tvp); 4043 goto unionread; 4044 } 4045 VOP_UNLOCK(vp, 0); 4046 *basep = loff; 4047 if (residp != NULL) 4048 *residp = auio.uio_resid; 4049 td->td_retval[0] = count - auio.uio_resid; 4050 fail: 4051 foffset_unlock(fp, foffset, 0); 4052 fdrop(fp, td); 4053 return (error); 4054 } 4055 4056 #ifndef _SYS_SYSPROTO_H_ 4057 struct getdents_args { 4058 int fd; 4059 char *buf; 4060 size_t count; 4061 }; 4062 #endif 4063 int 4064 sys_getdents(td, uap) 4065 struct thread *td; 4066 register struct getdents_args /* { 4067 int fd; 4068 char *buf; 4069 u_int count; 4070 } */ *uap; 4071 { 4072 struct getdirentries_args ap; 4073 4074 ap.fd = uap->fd; 4075 ap.buf = uap->buf; 4076 ap.count = uap->count; 4077 ap.basep = NULL; 4078 return (sys_getdirentries(td, &ap)); 4079 } 4080 4081 /* 4082 * Set the mode mask for creation of filesystem nodes. 4083 */ 4084 #ifndef _SYS_SYSPROTO_H_ 4085 struct umask_args { 4086 int newmask; 4087 }; 4088 #endif 4089 int 4090 sys_umask(td, uap) 4091 struct thread *td; 4092 struct umask_args /* { 4093 int newmask; 4094 } */ *uap; 4095 { 4096 struct filedesc *fdp; 4097 4098 fdp = td->td_proc->p_fd; 4099 FILEDESC_XLOCK(fdp); 4100 td->td_retval[0] = fdp->fd_cmask; 4101 fdp->fd_cmask = uap->newmask & ALLPERMS; 4102 FILEDESC_XUNLOCK(fdp); 4103 return (0); 4104 } 4105 4106 /* 4107 * Void all references to file by ripping underlying filesystem away from 4108 * vnode. 4109 */ 4110 #ifndef _SYS_SYSPROTO_H_ 4111 struct revoke_args { 4112 char *path; 4113 }; 4114 #endif 4115 int 4116 sys_revoke(td, uap) 4117 struct thread *td; 4118 register struct revoke_args /* { 4119 char *path; 4120 } */ *uap; 4121 { 4122 struct vnode *vp; 4123 struct vattr vattr; 4124 struct nameidata nd; 4125 int error; 4126 4127 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4128 uap->path, td); 4129 if ((error = namei(&nd)) != 0) 4130 return (error); 4131 vp = nd.ni_vp; 4132 NDFREE(&nd, NDF_ONLY_PNBUF); 4133 if (vp->v_type != VCHR || vp->v_rdev == NULL) { 4134 error = EINVAL; 4135 goto out; 4136 } 4137 #ifdef MAC 4138 error = mac_vnode_check_revoke(td->td_ucred, vp); 4139 if (error != 0) 4140 goto out; 4141 #endif 4142 error = VOP_GETATTR(vp, &vattr, td->td_ucred); 4143 if (error != 0) 4144 goto out; 4145 if (td->td_ucred->cr_uid != vattr.va_uid) { 4146 error = priv_check(td, PRIV_VFS_ADMIN); 4147 if (error != 0) 4148 goto out; 4149 } 4150 if (vcount(vp) > 1) 4151 VOP_REVOKE(vp, REVOKEALL); 4152 out: 4153 vput(vp); 4154 return (error); 4155 } 4156 4157 /* 4158 * Convert a user file descriptor to a kernel file entry and check that, if it 4159 * is a capability, the correct rights are present. A reference on the file 4160 * entry is held upon returning. 4161 */ 4162 int 4163 getvnode(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp) 4164 { 4165 struct file *fp; 4166 int error; 4167 4168 error = fget_unlocked(td->td_proc->p_fd, fd, rightsp, &fp, NULL); 4169 if (error != 0) 4170 return (error); 4171 4172 /* 4173 * The file could be not of the vnode type, or it may be not 4174 * yet fully initialized, in which case the f_vnode pointer 4175 * may be set, but f_ops is still badfileops. E.g., 4176 * devfs_open() transiently create such situation to 4177 * facilitate csw d_fdopen(). 4178 * 4179 * Dupfdopen() handling in kern_openat() installs the 4180 * half-baked file into the process descriptor table, allowing 4181 * other thread to dereference it. Guard against the race by 4182 * checking f_ops. 4183 */ 4184 if (fp->f_vnode == NULL || fp->f_ops == &badfileops) { 4185 fdrop(fp, td); 4186 return (EINVAL); 4187 } 4188 *fpp = fp; 4189 return (0); 4190 } 4191 4192 4193 /* 4194 * Get an (NFS) file handle. 4195 */ 4196 #ifndef _SYS_SYSPROTO_H_ 4197 struct lgetfh_args { 4198 char *fname; 4199 fhandle_t *fhp; 4200 }; 4201 #endif 4202 int 4203 sys_lgetfh(td, uap) 4204 struct thread *td; 4205 register struct lgetfh_args *uap; 4206 { 4207 struct nameidata nd; 4208 fhandle_t fh; 4209 register struct vnode *vp; 4210 int error; 4211 4212 error = priv_check(td, PRIV_VFS_GETFH); 4213 if (error != 0) 4214 return (error); 4215 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4216 uap->fname, td); 4217 error = namei(&nd); 4218 if (error != 0) 4219 return (error); 4220 NDFREE(&nd, NDF_ONLY_PNBUF); 4221 vp = nd.ni_vp; 4222 bzero(&fh, sizeof(fh)); 4223 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4224 error = VOP_VPTOFH(vp, &fh.fh_fid); 4225 vput(vp); 4226 if (error == 0) 4227 error = copyout(&fh, uap->fhp, sizeof (fh)); 4228 return (error); 4229 } 4230 4231 #ifndef _SYS_SYSPROTO_H_ 4232 struct getfh_args { 4233 char *fname; 4234 fhandle_t *fhp; 4235 }; 4236 #endif 4237 int 4238 sys_getfh(td, uap) 4239 struct thread *td; 4240 register struct getfh_args *uap; 4241 { 4242 struct nameidata nd; 4243 fhandle_t fh; 4244 register struct vnode *vp; 4245 int error; 4246 4247 error = priv_check(td, PRIV_VFS_GETFH); 4248 if (error != 0) 4249 return (error); 4250 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4251 uap->fname, td); 4252 error = namei(&nd); 4253 if (error != 0) 4254 return (error); 4255 NDFREE(&nd, NDF_ONLY_PNBUF); 4256 vp = nd.ni_vp; 4257 bzero(&fh, sizeof(fh)); 4258 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4259 error = VOP_VPTOFH(vp, &fh.fh_fid); 4260 vput(vp); 4261 if (error == 0) 4262 error = copyout(&fh, uap->fhp, sizeof (fh)); 4263 return (error); 4264 } 4265 4266 /* 4267 * syscall for the rpc.lockd to use to translate a NFS file handle into an 4268 * open descriptor. 4269 * 4270 * warning: do not remove the priv_check() call or this becomes one giant 4271 * security hole. 4272 */ 4273 #ifndef _SYS_SYSPROTO_H_ 4274 struct fhopen_args { 4275 const struct fhandle *u_fhp; 4276 int flags; 4277 }; 4278 #endif 4279 int 4280 sys_fhopen(td, uap) 4281 struct thread *td; 4282 struct fhopen_args /* { 4283 const struct fhandle *u_fhp; 4284 int flags; 4285 } */ *uap; 4286 { 4287 struct mount *mp; 4288 struct vnode *vp; 4289 struct fhandle fhp; 4290 struct file *fp; 4291 int fmode, error; 4292 int indx; 4293 4294 error = priv_check(td, PRIV_VFS_FHOPEN); 4295 if (error != 0) 4296 return (error); 4297 indx = -1; 4298 fmode = FFLAGS(uap->flags); 4299 /* why not allow a non-read/write open for our lockd? */ 4300 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 4301 return (EINVAL); 4302 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 4303 if (error != 0) 4304 return(error); 4305 /* find the mount point */ 4306 mp = vfs_busyfs(&fhp.fh_fsid); 4307 if (mp == NULL) 4308 return (ESTALE); 4309 /* now give me my vnode, it gets returned to me locked */ 4310 error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp); 4311 vfs_unbusy(mp); 4312 if (error != 0) 4313 return (error); 4314 4315 error = falloc_noinstall(td, &fp); 4316 if (error != 0) { 4317 vput(vp); 4318 return (error); 4319 } 4320 /* 4321 * An extra reference on `fp' has been held for us by 4322 * falloc_noinstall(). 4323 */ 4324 4325 #ifdef INVARIANTS 4326 td->td_dupfd = -1; 4327 #endif 4328 error = vn_open_vnode(vp, fmode, td->td_ucred, td, fp); 4329 if (error != 0) { 4330 KASSERT(fp->f_ops == &badfileops, 4331 ("VOP_OPEN in fhopen() set f_ops")); 4332 KASSERT(td->td_dupfd < 0, 4333 ("fhopen() encountered fdopen()")); 4334 4335 vput(vp); 4336 goto bad; 4337 } 4338 #ifdef INVARIANTS 4339 td->td_dupfd = 0; 4340 #endif 4341 fp->f_vnode = vp; 4342 fp->f_seqcount = 1; 4343 finit(fp, (fmode & FMASK) | (fp->f_flag & FHASLOCK), DTYPE_VNODE, vp, 4344 &vnops); 4345 VOP_UNLOCK(vp, 0); 4346 if ((fmode & O_TRUNC) != 0) { 4347 error = fo_truncate(fp, 0, td->td_ucred, td); 4348 if (error != 0) 4349 goto bad; 4350 } 4351 4352 error = finstall(td, fp, &indx, fmode, NULL); 4353 bad: 4354 fdrop(fp, td); 4355 td->td_retval[0] = indx; 4356 return (error); 4357 } 4358 4359 /* 4360 * Stat an (NFS) file handle. 4361 */ 4362 #ifndef _SYS_SYSPROTO_H_ 4363 struct fhstat_args { 4364 struct fhandle *u_fhp; 4365 struct stat *sb; 4366 }; 4367 #endif 4368 int 4369 sys_fhstat(td, uap) 4370 struct thread *td; 4371 register struct fhstat_args /* { 4372 struct fhandle *u_fhp; 4373 struct stat *sb; 4374 } */ *uap; 4375 { 4376 struct stat sb; 4377 struct fhandle fh; 4378 int error; 4379 4380 error = copyin(uap->u_fhp, &fh, sizeof(fh)); 4381 if (error != 0) 4382 return (error); 4383 error = kern_fhstat(td, fh, &sb); 4384 if (error == 0) 4385 error = copyout(&sb, uap->sb, sizeof(sb)); 4386 return (error); 4387 } 4388 4389 int 4390 kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb) 4391 { 4392 struct mount *mp; 4393 struct vnode *vp; 4394 int error; 4395 4396 error = priv_check(td, PRIV_VFS_FHSTAT); 4397 if (error != 0) 4398 return (error); 4399 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4400 return (ESTALE); 4401 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4402 vfs_unbusy(mp); 4403 if (error != 0) 4404 return (error); 4405 error = vn_stat(vp, sb, td->td_ucred, NOCRED, td); 4406 vput(vp); 4407 return (error); 4408 } 4409 4410 /* 4411 * Implement fstatfs() for (NFS) file handles. 4412 */ 4413 #ifndef _SYS_SYSPROTO_H_ 4414 struct fhstatfs_args { 4415 struct fhandle *u_fhp; 4416 struct statfs *buf; 4417 }; 4418 #endif 4419 int 4420 sys_fhstatfs(td, uap) 4421 struct thread *td; 4422 struct fhstatfs_args /* { 4423 struct fhandle *u_fhp; 4424 struct statfs *buf; 4425 } */ *uap; 4426 { 4427 struct statfs sf; 4428 fhandle_t fh; 4429 int error; 4430 4431 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4432 if (error != 0) 4433 return (error); 4434 error = kern_fhstatfs(td, fh, &sf); 4435 if (error != 0) 4436 return (error); 4437 return (copyout(&sf, uap->buf, sizeof(sf))); 4438 } 4439 4440 int 4441 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf) 4442 { 4443 struct statfs *sp; 4444 struct mount *mp; 4445 struct vnode *vp; 4446 int error; 4447 4448 error = priv_check(td, PRIV_VFS_FHSTATFS); 4449 if (error != 0) 4450 return (error); 4451 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4452 return (ESTALE); 4453 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4454 if (error != 0) { 4455 vfs_unbusy(mp); 4456 return (error); 4457 } 4458 vput(vp); 4459 error = prison_canseemount(td->td_ucred, mp); 4460 if (error != 0) 4461 goto out; 4462 #ifdef MAC 4463 error = mac_mount_check_stat(td->td_ucred, mp); 4464 if (error != 0) 4465 goto out; 4466 #endif 4467 /* 4468 * Set these in case the underlying filesystem fails to do so. 4469 */ 4470 sp = &mp->mnt_stat; 4471 sp->f_version = STATFS_VERSION; 4472 sp->f_namemax = NAME_MAX; 4473 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 4474 error = VFS_STATFS(mp, sp); 4475 if (error == 0) 4476 *buf = *sp; 4477 out: 4478 vfs_unbusy(mp); 4479 return (error); 4480 } 4481 4482 int 4483 kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len) 4484 { 4485 struct file *fp; 4486 struct mount *mp; 4487 struct vnode *vp; 4488 cap_rights_t rights; 4489 off_t olen, ooffset; 4490 int error; 4491 4492 if (offset < 0 || len <= 0) 4493 return (EINVAL); 4494 /* Check for wrap. */ 4495 if (offset > OFF_MAX - len) 4496 return (EFBIG); 4497 error = fget(td, fd, cap_rights_init(&rights, CAP_WRITE), &fp); 4498 if (error != 0) 4499 return (error); 4500 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { 4501 error = ESPIPE; 4502 goto out; 4503 } 4504 if ((fp->f_flag & FWRITE) == 0) { 4505 error = EBADF; 4506 goto out; 4507 } 4508 if (fp->f_type != DTYPE_VNODE) { 4509 error = ENODEV; 4510 goto out; 4511 } 4512 vp = fp->f_vnode; 4513 if (vp->v_type != VREG) { 4514 error = ENODEV; 4515 goto out; 4516 } 4517 4518 /* Allocating blocks may take a long time, so iterate. */ 4519 for (;;) { 4520 olen = len; 4521 ooffset = offset; 4522 4523 bwillwrite(); 4524 mp = NULL; 4525 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 4526 if (error != 0) 4527 break; 4528 error = vn_lock(vp, LK_EXCLUSIVE); 4529 if (error != 0) { 4530 vn_finished_write(mp); 4531 break; 4532 } 4533 #ifdef MAC 4534 error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp); 4535 if (error == 0) 4536 #endif 4537 error = VOP_ALLOCATE(vp, &offset, &len); 4538 VOP_UNLOCK(vp, 0); 4539 vn_finished_write(mp); 4540 4541 if (olen + ooffset != offset + len) { 4542 panic("offset + len changed from %jx/%jx to %jx/%jx", 4543 ooffset, olen, offset, len); 4544 } 4545 if (error != 0 || len == 0) 4546 break; 4547 KASSERT(olen > len, ("Iteration did not make progress?")); 4548 maybe_yield(); 4549 } 4550 out: 4551 fdrop(fp, td); 4552 return (error); 4553 } 4554 4555 int 4556 sys_posix_fallocate(struct thread *td, struct posix_fallocate_args *uap) 4557 { 4558 int error; 4559 4560 error = kern_posix_fallocate(td, uap->fd, uap->offset, uap->len); 4561 return (kern_posix_error(td, error)); 4562 } 4563 4564 /* 4565 * Unlike madvise(2), we do not make a best effort to remember every 4566 * possible caching hint. Instead, we remember the last setting with 4567 * the exception that we will allow POSIX_FADV_NORMAL to adjust the 4568 * region of any current setting. 4569 */ 4570 int 4571 kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len, 4572 int advice) 4573 { 4574 struct fadvise_info *fa, *new; 4575 struct file *fp; 4576 struct vnode *vp; 4577 cap_rights_t rights; 4578 off_t end; 4579 int error; 4580 4581 if (offset < 0 || len < 0 || offset > OFF_MAX - len) 4582 return (EINVAL); 4583 switch (advice) { 4584 case POSIX_FADV_SEQUENTIAL: 4585 case POSIX_FADV_RANDOM: 4586 case POSIX_FADV_NOREUSE: 4587 new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK); 4588 break; 4589 case POSIX_FADV_NORMAL: 4590 case POSIX_FADV_WILLNEED: 4591 case POSIX_FADV_DONTNEED: 4592 new = NULL; 4593 break; 4594 default: 4595 return (EINVAL); 4596 } 4597 /* XXX: CAP_POSIX_FADVISE? */ 4598 error = fget(td, fd, cap_rights_init(&rights), &fp); 4599 if (error != 0) 4600 goto out; 4601 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { 4602 error = ESPIPE; 4603 goto out; 4604 } 4605 if (fp->f_type != DTYPE_VNODE) { 4606 error = ENODEV; 4607 goto out; 4608 } 4609 vp = fp->f_vnode; 4610 if (vp->v_type != VREG) { 4611 error = ENODEV; 4612 goto out; 4613 } 4614 if (len == 0) 4615 end = OFF_MAX; 4616 else 4617 end = offset + len - 1; 4618 switch (advice) { 4619 case POSIX_FADV_SEQUENTIAL: 4620 case POSIX_FADV_RANDOM: 4621 case POSIX_FADV_NOREUSE: 4622 /* 4623 * Try to merge any existing non-standard region with 4624 * this new region if possible, otherwise create a new 4625 * non-standard region for this request. 4626 */ 4627 mtx_pool_lock(mtxpool_sleep, fp); 4628 fa = fp->f_advice; 4629 if (fa != NULL && fa->fa_advice == advice && 4630 ((fa->fa_start <= end && fa->fa_end >= offset) || 4631 (end != OFF_MAX && fa->fa_start == end + 1) || 4632 (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) { 4633 if (offset < fa->fa_start) 4634 fa->fa_start = offset; 4635 if (end > fa->fa_end) 4636 fa->fa_end = end; 4637 } else { 4638 new->fa_advice = advice; 4639 new->fa_start = offset; 4640 new->fa_end = end; 4641 fp->f_advice = new; 4642 new = fa; 4643 } 4644 mtx_pool_unlock(mtxpool_sleep, fp); 4645 break; 4646 case POSIX_FADV_NORMAL: 4647 /* 4648 * If a the "normal" region overlaps with an existing 4649 * non-standard region, trim or remove the 4650 * non-standard region. 4651 */ 4652 mtx_pool_lock(mtxpool_sleep, fp); 4653 fa = fp->f_advice; 4654 if (fa != NULL) { 4655 if (offset <= fa->fa_start && end >= fa->fa_end) { 4656 new = fa; 4657 fp->f_advice = NULL; 4658 } else if (offset <= fa->fa_start && 4659 end >= fa->fa_start) 4660 fa->fa_start = end + 1; 4661 else if (offset <= fa->fa_end && end >= fa->fa_end) 4662 fa->fa_end = offset - 1; 4663 else if (offset >= fa->fa_start && end <= fa->fa_end) { 4664 /* 4665 * If the "normal" region is a middle 4666 * portion of the existing 4667 * non-standard region, just remove 4668 * the whole thing rather than picking 4669 * one side or the other to 4670 * preserve. 4671 */ 4672 new = fa; 4673 fp->f_advice = NULL; 4674 } 4675 } 4676 mtx_pool_unlock(mtxpool_sleep, fp); 4677 break; 4678 case POSIX_FADV_WILLNEED: 4679 case POSIX_FADV_DONTNEED: 4680 error = VOP_ADVISE(vp, offset, end, advice); 4681 break; 4682 } 4683 out: 4684 if (fp != NULL) 4685 fdrop(fp, td); 4686 free(new, M_FADVISE); 4687 return (error); 4688 } 4689 4690 int 4691 sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap) 4692 { 4693 int error; 4694 4695 error = kern_posix_fadvise(td, uap->fd, uap->offset, uap->len, 4696 uap->advice); 4697 return (kern_posix_error(td, error)); 4698 } 4699