1 /*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include "opt_capsicum.h" 41 #include "opt_compat.h" 42 #include "opt_ktrace.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/bio.h> 47 #include <sys/buf.h> 48 #include <sys/capsicum.h> 49 #include <sys/disk.h> 50 #include <sys/sysent.h> 51 #include <sys/malloc.h> 52 #include <sys/mount.h> 53 #include <sys/mutex.h> 54 #include <sys/sysproto.h> 55 #include <sys/namei.h> 56 #include <sys/filedesc.h> 57 #include <sys/kernel.h> 58 #include <sys/fcntl.h> 59 #include <sys/file.h> 60 #include <sys/filio.h> 61 #include <sys/limits.h> 62 #include <sys/linker.h> 63 #include <sys/rwlock.h> 64 #include <sys/sdt.h> 65 #include <sys/stat.h> 66 #include <sys/sx.h> 67 #include <sys/unistd.h> 68 #include <sys/vnode.h> 69 #include <sys/priv.h> 70 #include <sys/proc.h> 71 #include <sys/dirent.h> 72 #include <sys/jail.h> 73 #include <sys/syscallsubr.h> 74 #include <sys/sysctl.h> 75 #ifdef KTRACE 76 #include <sys/ktrace.h> 77 #endif 78 79 #include <machine/stdarg.h> 80 81 #include <security/audit/audit.h> 82 #include <security/mac/mac_framework.h> 83 84 #include <vm/vm.h> 85 #include <vm/vm_object.h> 86 #include <vm/vm_page.h> 87 #include <vm/uma.h> 88 89 #include <ufs/ufs/quota.h> 90 91 MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information"); 92 93 SDT_PROVIDER_DEFINE(vfs); 94 SDT_PROBE_DEFINE2(vfs, , stat, mode, "char *", "int"); 95 SDT_PROBE_DEFINE2(vfs, , stat, reg, "char *", "int"); 96 97 static int kern_chflagsat(struct thread *td, int fd, const char *path, 98 enum uio_seg pathseg, u_long flags, int atflag); 99 static int setfflags(struct thread *td, struct vnode *, u_long); 100 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *); 101 static int getutimens(const struct timespec *, enum uio_seg, 102 struct timespec *, int *); 103 static int setutimes(struct thread *td, struct vnode *, 104 const struct timespec *, int, int); 105 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 106 struct thread *td); 107 108 /* 109 * Sync each mounted filesystem. 110 */ 111 #ifndef _SYS_SYSPROTO_H_ 112 struct sync_args { 113 int dummy; 114 }; 115 #endif 116 /* ARGSUSED */ 117 int 118 sys_sync(td, uap) 119 struct thread *td; 120 struct sync_args *uap; 121 { 122 struct mount *mp, *nmp; 123 int save; 124 125 mtx_lock(&mountlist_mtx); 126 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 127 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 128 nmp = TAILQ_NEXT(mp, mnt_list); 129 continue; 130 } 131 if ((mp->mnt_flag & MNT_RDONLY) == 0 && 132 vn_start_write(NULL, &mp, V_NOWAIT) == 0) { 133 save = curthread_pflags_set(TDP_SYNCIO); 134 vfs_msync(mp, MNT_NOWAIT); 135 VFS_SYNC(mp, MNT_NOWAIT); 136 curthread_pflags_restore(save); 137 vn_finished_write(mp); 138 } 139 mtx_lock(&mountlist_mtx); 140 nmp = TAILQ_NEXT(mp, mnt_list); 141 vfs_unbusy(mp); 142 } 143 mtx_unlock(&mountlist_mtx); 144 return (0); 145 } 146 147 /* 148 * Change filesystem quotas. 149 */ 150 #ifndef _SYS_SYSPROTO_H_ 151 struct quotactl_args { 152 char *path; 153 int cmd; 154 int uid; 155 caddr_t arg; 156 }; 157 #endif 158 int 159 sys_quotactl(td, uap) 160 struct thread *td; 161 register struct quotactl_args /* { 162 char *path; 163 int cmd; 164 int uid; 165 caddr_t arg; 166 } */ *uap; 167 { 168 struct mount *mp; 169 struct nameidata nd; 170 int error; 171 172 AUDIT_ARG_CMD(uap->cmd); 173 AUDIT_ARG_UID(uap->uid); 174 if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS)) 175 return (EPERM); 176 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 177 uap->path, td); 178 if ((error = namei(&nd)) != 0) 179 return (error); 180 NDFREE(&nd, NDF_ONLY_PNBUF); 181 mp = nd.ni_vp->v_mount; 182 vfs_ref(mp); 183 vput(nd.ni_vp); 184 error = vfs_busy(mp, 0); 185 vfs_rel(mp); 186 if (error != 0) 187 return (error); 188 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg); 189 190 /* 191 * Since quota on operation typically needs to open quota 192 * file, the Q_QUOTAON handler needs to unbusy the mount point 193 * before calling into namei. Otherwise, unmount might be 194 * started between two vfs_busy() invocations (first is our, 195 * second is from mount point cross-walk code in lookup()), 196 * causing deadlock. 197 * 198 * Require that Q_QUOTAON handles the vfs_busy() reference on 199 * its own, always returning with ubusied mount point. 200 */ 201 if ((uap->cmd >> SUBCMDSHIFT) != Q_QUOTAON) 202 vfs_unbusy(mp); 203 return (error); 204 } 205 206 /* 207 * Used by statfs conversion routines to scale the block size up if 208 * necessary so that all of the block counts are <= 'max_size'. Note 209 * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero 210 * value of 'n'. 211 */ 212 void 213 statfs_scale_blocks(struct statfs *sf, long max_size) 214 { 215 uint64_t count; 216 int shift; 217 218 KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__)); 219 220 /* 221 * Attempt to scale the block counts to give a more accurate 222 * overview to userland of the ratio of free space to used 223 * space. To do this, find the largest block count and compute 224 * a divisor that lets it fit into a signed integer <= max_size. 225 */ 226 if (sf->f_bavail < 0) 227 count = -sf->f_bavail; 228 else 229 count = sf->f_bavail; 230 count = MAX(sf->f_blocks, MAX(sf->f_bfree, count)); 231 if (count <= max_size) 232 return; 233 234 count >>= flsl(max_size); 235 shift = 0; 236 while (count > 0) { 237 shift++; 238 count >>=1; 239 } 240 241 sf->f_bsize <<= shift; 242 sf->f_blocks >>= shift; 243 sf->f_bfree >>= shift; 244 sf->f_bavail >>= shift; 245 } 246 247 /* 248 * Get filesystem statistics. 249 */ 250 #ifndef _SYS_SYSPROTO_H_ 251 struct statfs_args { 252 char *path; 253 struct statfs *buf; 254 }; 255 #endif 256 int 257 sys_statfs(td, uap) 258 struct thread *td; 259 register struct statfs_args /* { 260 char *path; 261 struct statfs *buf; 262 } */ *uap; 263 { 264 struct statfs sf; 265 int error; 266 267 error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf); 268 if (error == 0) 269 error = copyout(&sf, uap->buf, sizeof(sf)); 270 return (error); 271 } 272 273 int 274 kern_statfs(struct thread *td, char *path, enum uio_seg pathseg, 275 struct statfs *buf) 276 { 277 struct mount *mp; 278 struct statfs *sp, sb; 279 struct nameidata nd; 280 int error; 281 282 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 283 pathseg, path, td); 284 error = namei(&nd); 285 if (error != 0) 286 return (error); 287 mp = nd.ni_vp->v_mount; 288 vfs_ref(mp); 289 NDFREE(&nd, NDF_ONLY_PNBUF); 290 vput(nd.ni_vp); 291 error = vfs_busy(mp, 0); 292 vfs_rel(mp); 293 if (error != 0) 294 return (error); 295 #ifdef MAC 296 error = mac_mount_check_stat(td->td_ucred, mp); 297 if (error != 0) 298 goto out; 299 #endif 300 /* 301 * Set these in case the underlying filesystem fails to do so. 302 */ 303 sp = &mp->mnt_stat; 304 sp->f_version = STATFS_VERSION; 305 sp->f_namemax = NAME_MAX; 306 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 307 error = VFS_STATFS(mp, sp); 308 if (error != 0) 309 goto out; 310 if (priv_check(td, PRIV_VFS_GENERATION)) { 311 bcopy(sp, &sb, sizeof(sb)); 312 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 313 prison_enforce_statfs(td->td_ucred, mp, &sb); 314 sp = &sb; 315 } 316 *buf = *sp; 317 out: 318 vfs_unbusy(mp); 319 return (error); 320 } 321 322 /* 323 * Get filesystem statistics. 324 */ 325 #ifndef _SYS_SYSPROTO_H_ 326 struct fstatfs_args { 327 int fd; 328 struct statfs *buf; 329 }; 330 #endif 331 int 332 sys_fstatfs(td, uap) 333 struct thread *td; 334 register struct fstatfs_args /* { 335 int fd; 336 struct statfs *buf; 337 } */ *uap; 338 { 339 struct statfs sf; 340 int error; 341 342 error = kern_fstatfs(td, uap->fd, &sf); 343 if (error == 0) 344 error = copyout(&sf, uap->buf, sizeof(sf)); 345 return (error); 346 } 347 348 int 349 kern_fstatfs(struct thread *td, int fd, struct statfs *buf) 350 { 351 struct file *fp; 352 struct mount *mp; 353 struct statfs *sp, sb; 354 struct vnode *vp; 355 cap_rights_t rights; 356 int error; 357 358 AUDIT_ARG_FD(fd); 359 error = getvnode(td, fd, cap_rights_init(&rights, CAP_FSTATFS), &fp); 360 if (error != 0) 361 return (error); 362 vp = fp->f_vnode; 363 vn_lock(vp, LK_SHARED | LK_RETRY); 364 #ifdef AUDIT 365 AUDIT_ARG_VNODE1(vp); 366 #endif 367 mp = vp->v_mount; 368 if (mp) 369 vfs_ref(mp); 370 VOP_UNLOCK(vp, 0); 371 fdrop(fp, td); 372 if (mp == NULL) { 373 error = EBADF; 374 goto out; 375 } 376 error = vfs_busy(mp, 0); 377 vfs_rel(mp); 378 if (error != 0) 379 return (error); 380 #ifdef MAC 381 error = mac_mount_check_stat(td->td_ucred, mp); 382 if (error != 0) 383 goto out; 384 #endif 385 /* 386 * Set these in case the underlying filesystem fails to do so. 387 */ 388 sp = &mp->mnt_stat; 389 sp->f_version = STATFS_VERSION; 390 sp->f_namemax = NAME_MAX; 391 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 392 error = VFS_STATFS(mp, sp); 393 if (error != 0) 394 goto out; 395 if (priv_check(td, PRIV_VFS_GENERATION)) { 396 bcopy(sp, &sb, sizeof(sb)); 397 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 398 prison_enforce_statfs(td->td_ucred, mp, &sb); 399 sp = &sb; 400 } 401 *buf = *sp; 402 out: 403 if (mp) 404 vfs_unbusy(mp); 405 return (error); 406 } 407 408 /* 409 * Get statistics on all filesystems. 410 */ 411 #ifndef _SYS_SYSPROTO_H_ 412 struct getfsstat_args { 413 struct statfs *buf; 414 long bufsize; 415 int mode; 416 }; 417 #endif 418 int 419 sys_getfsstat(td, uap) 420 struct thread *td; 421 register struct getfsstat_args /* { 422 struct statfs *buf; 423 long bufsize; 424 int mode; 425 } */ *uap; 426 { 427 size_t count; 428 int error; 429 430 if (uap->bufsize < 0 || uap->bufsize > SIZE_MAX) 431 return (EINVAL); 432 error = kern_getfsstat(td, &uap->buf, uap->bufsize, &count, 433 UIO_USERSPACE, uap->mode); 434 if (error == 0) 435 td->td_retval[0] = count; 436 return (error); 437 } 438 439 /* 440 * If (bufsize > 0 && bufseg == UIO_SYSSPACE) 441 * The caller is responsible for freeing memory which will be allocated 442 * in '*buf'. 443 */ 444 int 445 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize, 446 size_t *countp, enum uio_seg bufseg, int mode) 447 { 448 struct mount *mp, *nmp; 449 struct statfs *sfsp, *sp, sb, *tofree; 450 size_t count, maxcount; 451 int error; 452 453 switch (mode) { 454 case MNT_WAIT: 455 case MNT_NOWAIT: 456 break; 457 default: 458 return (EINVAL); 459 } 460 restart: 461 maxcount = bufsize / sizeof(struct statfs); 462 if (bufsize == 0) { 463 sfsp = NULL; 464 tofree = NULL; 465 } else if (bufseg == UIO_USERSPACE) { 466 sfsp = *buf; 467 tofree = NULL; 468 } else /* if (bufseg == UIO_SYSSPACE) */ { 469 count = 0; 470 mtx_lock(&mountlist_mtx); 471 TAILQ_FOREACH(mp, &mountlist, mnt_list) { 472 count++; 473 } 474 mtx_unlock(&mountlist_mtx); 475 if (maxcount > count) 476 maxcount = count; 477 tofree = sfsp = *buf = malloc(maxcount * sizeof(struct statfs), 478 M_TEMP, M_WAITOK); 479 } 480 count = 0; 481 mtx_lock(&mountlist_mtx); 482 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 483 if (prison_canseemount(td->td_ucred, mp) != 0) { 484 nmp = TAILQ_NEXT(mp, mnt_list); 485 continue; 486 } 487 #ifdef MAC 488 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 489 nmp = TAILQ_NEXT(mp, mnt_list); 490 continue; 491 } 492 #endif 493 if (mode == MNT_WAIT) { 494 if (vfs_busy(mp, MBF_MNTLSTLOCK) != 0) { 495 /* 496 * If vfs_busy() failed, and MBF_NOWAIT 497 * wasn't passed, then the mp is gone. 498 * Furthermore, because of MBF_MNTLSTLOCK, 499 * the mountlist_mtx was dropped. We have 500 * no other choice than to start over. 501 */ 502 mtx_unlock(&mountlist_mtx); 503 free(tofree, M_TEMP); 504 goto restart; 505 } 506 } else { 507 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK) != 0) { 508 nmp = TAILQ_NEXT(mp, mnt_list); 509 continue; 510 } 511 } 512 if (sfsp && count < maxcount) { 513 sp = &mp->mnt_stat; 514 /* 515 * Set these in case the underlying filesystem 516 * fails to do so. 517 */ 518 sp->f_version = STATFS_VERSION; 519 sp->f_namemax = NAME_MAX; 520 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 521 /* 522 * If MNT_NOWAIT is specified, do not refresh 523 * the fsstat cache. 524 */ 525 if (mode != MNT_NOWAIT) { 526 error = VFS_STATFS(mp, sp); 527 if (error != 0) { 528 mtx_lock(&mountlist_mtx); 529 nmp = TAILQ_NEXT(mp, mnt_list); 530 vfs_unbusy(mp); 531 continue; 532 } 533 } 534 if (priv_check(td, PRIV_VFS_GENERATION)) { 535 bcopy(sp, &sb, sizeof(sb)); 536 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 537 prison_enforce_statfs(td->td_ucred, mp, &sb); 538 sp = &sb; 539 } 540 if (bufseg == UIO_SYSSPACE) 541 bcopy(sp, sfsp, sizeof(*sp)); 542 else /* if (bufseg == UIO_USERSPACE) */ { 543 error = copyout(sp, sfsp, sizeof(*sp)); 544 if (error != 0) { 545 vfs_unbusy(mp); 546 return (error); 547 } 548 } 549 sfsp++; 550 } 551 count++; 552 mtx_lock(&mountlist_mtx); 553 nmp = TAILQ_NEXT(mp, mnt_list); 554 vfs_unbusy(mp); 555 } 556 mtx_unlock(&mountlist_mtx); 557 if (sfsp && count > maxcount) 558 *countp = maxcount; 559 else 560 *countp = count; 561 return (0); 562 } 563 564 #ifdef COMPAT_FREEBSD4 565 /* 566 * Get old format filesystem statistics. 567 */ 568 static void cvtstatfs(struct statfs *, struct ostatfs *); 569 570 #ifndef _SYS_SYSPROTO_H_ 571 struct freebsd4_statfs_args { 572 char *path; 573 struct ostatfs *buf; 574 }; 575 #endif 576 int 577 freebsd4_statfs(td, uap) 578 struct thread *td; 579 struct freebsd4_statfs_args /* { 580 char *path; 581 struct ostatfs *buf; 582 } */ *uap; 583 { 584 struct ostatfs osb; 585 struct statfs sf; 586 int error; 587 588 error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf); 589 if (error != 0) 590 return (error); 591 cvtstatfs(&sf, &osb); 592 return (copyout(&osb, uap->buf, sizeof(osb))); 593 } 594 595 /* 596 * Get filesystem statistics. 597 */ 598 #ifndef _SYS_SYSPROTO_H_ 599 struct freebsd4_fstatfs_args { 600 int fd; 601 struct ostatfs *buf; 602 }; 603 #endif 604 int 605 freebsd4_fstatfs(td, uap) 606 struct thread *td; 607 struct freebsd4_fstatfs_args /* { 608 int fd; 609 struct ostatfs *buf; 610 } */ *uap; 611 { 612 struct ostatfs osb; 613 struct statfs sf; 614 int error; 615 616 error = kern_fstatfs(td, uap->fd, &sf); 617 if (error != 0) 618 return (error); 619 cvtstatfs(&sf, &osb); 620 return (copyout(&osb, uap->buf, sizeof(osb))); 621 } 622 623 /* 624 * Get statistics on all filesystems. 625 */ 626 #ifndef _SYS_SYSPROTO_H_ 627 struct freebsd4_getfsstat_args { 628 struct ostatfs *buf; 629 long bufsize; 630 int mode; 631 }; 632 #endif 633 int 634 freebsd4_getfsstat(td, uap) 635 struct thread *td; 636 register struct freebsd4_getfsstat_args /* { 637 struct ostatfs *buf; 638 long bufsize; 639 int mode; 640 } */ *uap; 641 { 642 struct statfs *buf, *sp; 643 struct ostatfs osb; 644 size_t count, size; 645 int error; 646 647 if (uap->bufsize < 0) 648 return (EINVAL); 649 count = uap->bufsize / sizeof(struct ostatfs); 650 if (count > SIZE_MAX / sizeof(struct statfs)) 651 return (EINVAL); 652 size = count * sizeof(struct statfs); 653 error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, 654 uap->mode); 655 td->td_retval[0] = count; 656 if (size != 0) { 657 sp = buf; 658 while (count != 0 && error == 0) { 659 cvtstatfs(sp, &osb); 660 error = copyout(&osb, uap->buf, sizeof(osb)); 661 sp++; 662 uap->buf++; 663 count--; 664 } 665 free(buf, M_TEMP); 666 } 667 return (error); 668 } 669 670 /* 671 * Implement fstatfs() for (NFS) file handles. 672 */ 673 #ifndef _SYS_SYSPROTO_H_ 674 struct freebsd4_fhstatfs_args { 675 struct fhandle *u_fhp; 676 struct ostatfs *buf; 677 }; 678 #endif 679 int 680 freebsd4_fhstatfs(td, uap) 681 struct thread *td; 682 struct freebsd4_fhstatfs_args /* { 683 struct fhandle *u_fhp; 684 struct ostatfs *buf; 685 } */ *uap; 686 { 687 struct ostatfs osb; 688 struct statfs sf; 689 fhandle_t fh; 690 int error; 691 692 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 693 if (error != 0) 694 return (error); 695 error = kern_fhstatfs(td, fh, &sf); 696 if (error != 0) 697 return (error); 698 cvtstatfs(&sf, &osb); 699 return (copyout(&osb, uap->buf, sizeof(osb))); 700 } 701 702 /* 703 * Convert a new format statfs structure to an old format statfs structure. 704 */ 705 static void 706 cvtstatfs(nsp, osp) 707 struct statfs *nsp; 708 struct ostatfs *osp; 709 { 710 711 statfs_scale_blocks(nsp, LONG_MAX); 712 bzero(osp, sizeof(*osp)); 713 osp->f_bsize = nsp->f_bsize; 714 osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX); 715 osp->f_blocks = nsp->f_blocks; 716 osp->f_bfree = nsp->f_bfree; 717 osp->f_bavail = nsp->f_bavail; 718 osp->f_files = MIN(nsp->f_files, LONG_MAX); 719 osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX); 720 osp->f_owner = nsp->f_owner; 721 osp->f_type = nsp->f_type; 722 osp->f_flags = nsp->f_flags; 723 osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX); 724 osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX); 725 osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX); 726 osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX); 727 strlcpy(osp->f_fstypename, nsp->f_fstypename, 728 MIN(MFSNAMELEN, OMFSNAMELEN)); 729 strlcpy(osp->f_mntonname, nsp->f_mntonname, 730 MIN(MNAMELEN, OMNAMELEN)); 731 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 732 MIN(MNAMELEN, OMNAMELEN)); 733 osp->f_fsid = nsp->f_fsid; 734 } 735 #endif /* COMPAT_FREEBSD4 */ 736 737 /* 738 * Change current working directory to a given file descriptor. 739 */ 740 #ifndef _SYS_SYSPROTO_H_ 741 struct fchdir_args { 742 int fd; 743 }; 744 #endif 745 int 746 sys_fchdir(td, uap) 747 struct thread *td; 748 struct fchdir_args /* { 749 int fd; 750 } */ *uap; 751 { 752 struct vnode *vp, *tdp; 753 struct mount *mp; 754 struct file *fp; 755 cap_rights_t rights; 756 int error; 757 758 AUDIT_ARG_FD(uap->fd); 759 error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_FCHDIR), 760 &fp); 761 if (error != 0) 762 return (error); 763 vp = fp->f_vnode; 764 vrefact(vp); 765 fdrop(fp, td); 766 vn_lock(vp, LK_SHARED | LK_RETRY); 767 AUDIT_ARG_VNODE1(vp); 768 error = change_dir(vp, td); 769 while (!error && (mp = vp->v_mountedhere) != NULL) { 770 if (vfs_busy(mp, 0)) 771 continue; 772 error = VFS_ROOT(mp, LK_SHARED, &tdp); 773 vfs_unbusy(mp); 774 if (error != 0) 775 break; 776 vput(vp); 777 vp = tdp; 778 } 779 if (error != 0) { 780 vput(vp); 781 return (error); 782 } 783 VOP_UNLOCK(vp, 0); 784 pwd_chdir(td, vp); 785 return (0); 786 } 787 788 /* 789 * Change current working directory (``.''). 790 */ 791 #ifndef _SYS_SYSPROTO_H_ 792 struct chdir_args { 793 char *path; 794 }; 795 #endif 796 int 797 sys_chdir(td, uap) 798 struct thread *td; 799 struct chdir_args /* { 800 char *path; 801 } */ *uap; 802 { 803 804 return (kern_chdir(td, uap->path, UIO_USERSPACE)); 805 } 806 807 int 808 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg) 809 { 810 struct nameidata nd; 811 int error; 812 813 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 814 pathseg, path, td); 815 if ((error = namei(&nd)) != 0) 816 return (error); 817 if ((error = change_dir(nd.ni_vp, td)) != 0) { 818 vput(nd.ni_vp); 819 NDFREE(&nd, NDF_ONLY_PNBUF); 820 return (error); 821 } 822 VOP_UNLOCK(nd.ni_vp, 0); 823 NDFREE(&nd, NDF_ONLY_PNBUF); 824 pwd_chdir(td, nd.ni_vp); 825 return (0); 826 } 827 828 /* 829 * Change notion of root (``/'') directory. 830 */ 831 #ifndef _SYS_SYSPROTO_H_ 832 struct chroot_args { 833 char *path; 834 }; 835 #endif 836 int 837 sys_chroot(td, uap) 838 struct thread *td; 839 struct chroot_args /* { 840 char *path; 841 } */ *uap; 842 { 843 struct nameidata nd; 844 int error; 845 846 error = priv_check(td, PRIV_VFS_CHROOT); 847 if (error != 0) 848 return (error); 849 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 850 UIO_USERSPACE, uap->path, td); 851 error = namei(&nd); 852 if (error != 0) 853 goto error; 854 error = change_dir(nd.ni_vp, td); 855 if (error != 0) 856 goto e_vunlock; 857 #ifdef MAC 858 error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp); 859 if (error != 0) 860 goto e_vunlock; 861 #endif 862 VOP_UNLOCK(nd.ni_vp, 0); 863 error = pwd_chroot(td, nd.ni_vp); 864 vrele(nd.ni_vp); 865 NDFREE(&nd, NDF_ONLY_PNBUF); 866 return (error); 867 e_vunlock: 868 vput(nd.ni_vp); 869 error: 870 NDFREE(&nd, NDF_ONLY_PNBUF); 871 return (error); 872 } 873 874 /* 875 * Common routine for chroot and chdir. Callers must provide a locked vnode 876 * instance. 877 */ 878 int 879 change_dir(vp, td) 880 struct vnode *vp; 881 struct thread *td; 882 { 883 #ifdef MAC 884 int error; 885 #endif 886 887 ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked"); 888 if (vp->v_type != VDIR) 889 return (ENOTDIR); 890 #ifdef MAC 891 error = mac_vnode_check_chdir(td->td_ucred, vp); 892 if (error != 0) 893 return (error); 894 #endif 895 return (VOP_ACCESS(vp, VEXEC, td->td_ucred, td)); 896 } 897 898 static __inline void 899 flags_to_rights(int flags, cap_rights_t *rightsp) 900 { 901 902 if (flags & O_EXEC) { 903 cap_rights_set(rightsp, CAP_FEXECVE); 904 } else { 905 switch ((flags & O_ACCMODE)) { 906 case O_RDONLY: 907 cap_rights_set(rightsp, CAP_READ); 908 break; 909 case O_RDWR: 910 cap_rights_set(rightsp, CAP_READ); 911 /* FALLTHROUGH */ 912 case O_WRONLY: 913 cap_rights_set(rightsp, CAP_WRITE); 914 if (!(flags & (O_APPEND | O_TRUNC))) 915 cap_rights_set(rightsp, CAP_SEEK); 916 break; 917 } 918 } 919 920 if (flags & O_CREAT) 921 cap_rights_set(rightsp, CAP_CREATE); 922 923 if (flags & O_TRUNC) 924 cap_rights_set(rightsp, CAP_FTRUNCATE); 925 926 if (flags & (O_SYNC | O_FSYNC)) 927 cap_rights_set(rightsp, CAP_FSYNC); 928 929 if (flags & (O_EXLOCK | O_SHLOCK)) 930 cap_rights_set(rightsp, CAP_FLOCK); 931 } 932 933 /* 934 * Check permissions, allocate an open file structure, and call the device 935 * open routine if any. 936 */ 937 #ifndef _SYS_SYSPROTO_H_ 938 struct open_args { 939 char *path; 940 int flags; 941 int mode; 942 }; 943 #endif 944 int 945 sys_open(td, uap) 946 struct thread *td; 947 register struct open_args /* { 948 char *path; 949 int flags; 950 int mode; 951 } */ *uap; 952 { 953 954 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 955 uap->flags, uap->mode)); 956 } 957 958 #ifndef _SYS_SYSPROTO_H_ 959 struct openat_args { 960 int fd; 961 char *path; 962 int flag; 963 int mode; 964 }; 965 #endif 966 int 967 sys_openat(struct thread *td, struct openat_args *uap) 968 { 969 970 AUDIT_ARG_FD(uap->fd); 971 return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 972 uap->mode)); 973 } 974 975 int 976 kern_openat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 977 int flags, int mode) 978 { 979 struct proc *p = td->td_proc; 980 struct filedesc *fdp = p->p_fd; 981 struct file *fp; 982 struct vnode *vp; 983 struct nameidata nd; 984 cap_rights_t rights; 985 int cmode, error, indx; 986 987 indx = -1; 988 989 AUDIT_ARG_FFLAGS(flags); 990 AUDIT_ARG_MODE(mode); 991 cap_rights_init(&rights, CAP_LOOKUP); 992 flags_to_rights(flags, &rights); 993 /* 994 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags 995 * may be specified. 996 */ 997 if (flags & O_EXEC) { 998 if (flags & O_ACCMODE) 999 return (EINVAL); 1000 } else if ((flags & O_ACCMODE) == O_ACCMODE) { 1001 return (EINVAL); 1002 } else { 1003 flags = FFLAGS(flags); 1004 } 1005 1006 /* 1007 * Allocate a file structure. The descriptor to reference it 1008 * is allocated and set by finstall() below. 1009 */ 1010 error = falloc_noinstall(td, &fp); 1011 if (error != 0) 1012 return (error); 1013 /* 1014 * An extra reference on `fp' has been held for us by 1015 * falloc_noinstall(). 1016 */ 1017 /* Set the flags early so the finit in devfs can pick them up. */ 1018 fp->f_flag = flags & FMASK; 1019 cmode = ((mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; 1020 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 1021 &rights, td); 1022 td->td_dupfd = -1; /* XXX check for fdopen */ 1023 error = vn_open(&nd, &flags, cmode, fp); 1024 if (error != 0) { 1025 /* 1026 * If the vn_open replaced the method vector, something 1027 * wonderous happened deep below and we just pass it up 1028 * pretending we know what we do. 1029 */ 1030 if (error == ENXIO && fp->f_ops != &badfileops) 1031 goto success; 1032 1033 /* 1034 * Handle special fdopen() case. bleh. 1035 * 1036 * Don't do this for relative (capability) lookups; we don't 1037 * understand exactly what would happen, and we don't think 1038 * that it ever should. 1039 */ 1040 if ((nd.ni_lcf & NI_LCF_STRICTRELATIVE) == 0 && 1041 (error == ENODEV || error == ENXIO) && 1042 td->td_dupfd >= 0) { 1043 error = dupfdopen(td, fdp, td->td_dupfd, flags, error, 1044 &indx); 1045 if (error == 0) 1046 goto success; 1047 } 1048 1049 goto bad; 1050 } 1051 td->td_dupfd = 0; 1052 NDFREE(&nd, NDF_ONLY_PNBUF); 1053 vp = nd.ni_vp; 1054 1055 /* 1056 * Store the vnode, for any f_type. Typically, the vnode use 1057 * count is decremented by direct call to vn_closefile() for 1058 * files that switched type in the cdevsw fdopen() method. 1059 */ 1060 fp->f_vnode = vp; 1061 /* 1062 * If the file wasn't claimed by devfs bind it to the normal 1063 * vnode operations here. 1064 */ 1065 if (fp->f_ops == &badfileops) { 1066 KASSERT(vp->v_type != VFIFO, ("Unexpected fifo.")); 1067 fp->f_seqcount = 1; 1068 finit(fp, (flags & FMASK) | (fp->f_flag & FHASLOCK), 1069 DTYPE_VNODE, vp, &vnops); 1070 } 1071 1072 VOP_UNLOCK(vp, 0); 1073 if (flags & O_TRUNC) { 1074 error = fo_truncate(fp, 0, td->td_ucred, td); 1075 if (error != 0) 1076 goto bad; 1077 } 1078 success: 1079 /* 1080 * If we haven't already installed the FD (for dupfdopen), do so now. 1081 */ 1082 if (indx == -1) { 1083 struct filecaps *fcaps; 1084 1085 #ifdef CAPABILITIES 1086 if ((nd.ni_lcf & NI_LCF_STRICTRELATIVE) != 0) 1087 fcaps = &nd.ni_filecaps; 1088 else 1089 #endif 1090 fcaps = NULL; 1091 error = finstall(td, fp, &indx, flags, fcaps); 1092 /* On success finstall() consumes fcaps. */ 1093 if (error != 0) { 1094 filecaps_free(&nd.ni_filecaps); 1095 goto bad; 1096 } 1097 } else { 1098 filecaps_free(&nd.ni_filecaps); 1099 } 1100 1101 /* 1102 * Release our private reference, leaving the one associated with 1103 * the descriptor table intact. 1104 */ 1105 fdrop(fp, td); 1106 td->td_retval[0] = indx; 1107 return (0); 1108 bad: 1109 KASSERT(indx == -1, ("indx=%d, should be -1", indx)); 1110 fdrop(fp, td); 1111 return (error); 1112 } 1113 1114 #ifdef COMPAT_43 1115 /* 1116 * Create a file. 1117 */ 1118 #ifndef _SYS_SYSPROTO_H_ 1119 struct ocreat_args { 1120 char *path; 1121 int mode; 1122 }; 1123 #endif 1124 int 1125 ocreat(td, uap) 1126 struct thread *td; 1127 register struct ocreat_args /* { 1128 char *path; 1129 int mode; 1130 } */ *uap; 1131 { 1132 1133 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1134 O_WRONLY | O_CREAT | O_TRUNC, uap->mode)); 1135 } 1136 #endif /* COMPAT_43 */ 1137 1138 /* 1139 * Create a special file. 1140 */ 1141 #ifndef _SYS_SYSPROTO_H_ 1142 struct mknod_args { 1143 char *path; 1144 int mode; 1145 int dev; 1146 }; 1147 #endif 1148 int 1149 sys_mknod(td, uap) 1150 struct thread *td; 1151 register struct mknod_args /* { 1152 char *path; 1153 int mode; 1154 int dev; 1155 } */ *uap; 1156 { 1157 1158 return (kern_mknodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1159 uap->mode, uap->dev)); 1160 } 1161 1162 #ifndef _SYS_SYSPROTO_H_ 1163 struct mknodat_args { 1164 int fd; 1165 char *path; 1166 mode_t mode; 1167 dev_t dev; 1168 }; 1169 #endif 1170 int 1171 sys_mknodat(struct thread *td, struct mknodat_args *uap) 1172 { 1173 1174 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1175 uap->dev)); 1176 } 1177 1178 int 1179 kern_mknodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1180 int mode, int dev) 1181 { 1182 struct vnode *vp; 1183 struct mount *mp; 1184 struct vattr vattr; 1185 struct nameidata nd; 1186 cap_rights_t rights; 1187 int error, whiteout = 0; 1188 1189 AUDIT_ARG_MODE(mode); 1190 AUDIT_ARG_DEV(dev); 1191 switch (mode & S_IFMT) { 1192 case S_IFCHR: 1193 case S_IFBLK: 1194 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 1195 if (error == 0 && dev == VNOVAL) 1196 error = EINVAL; 1197 break; 1198 case S_IFMT: 1199 error = priv_check(td, PRIV_VFS_MKNOD_BAD); 1200 break; 1201 case S_IFWHT: 1202 error = priv_check(td, PRIV_VFS_MKNOD_WHT); 1203 break; 1204 case S_IFIFO: 1205 if (dev == 0) 1206 return (kern_mkfifoat(td, fd, path, pathseg, mode)); 1207 /* FALLTHROUGH */ 1208 default: 1209 error = EINVAL; 1210 break; 1211 } 1212 if (error != 0) 1213 return (error); 1214 restart: 1215 bwillwrite(); 1216 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1217 NOCACHE, pathseg, path, fd, cap_rights_init(&rights, CAP_MKNODAT), 1218 td); 1219 if ((error = namei(&nd)) != 0) 1220 return (error); 1221 vp = nd.ni_vp; 1222 if (vp != NULL) { 1223 NDFREE(&nd, NDF_ONLY_PNBUF); 1224 if (vp == nd.ni_dvp) 1225 vrele(nd.ni_dvp); 1226 else 1227 vput(nd.ni_dvp); 1228 vrele(vp); 1229 return (EEXIST); 1230 } else { 1231 VATTR_NULL(&vattr); 1232 vattr.va_mode = (mode & ALLPERMS) & 1233 ~td->td_proc->p_fd->fd_cmask; 1234 vattr.va_rdev = dev; 1235 whiteout = 0; 1236 1237 switch (mode & S_IFMT) { 1238 case S_IFMT: /* used by badsect to flag bad sectors */ 1239 vattr.va_type = VBAD; 1240 break; 1241 case S_IFCHR: 1242 vattr.va_type = VCHR; 1243 break; 1244 case S_IFBLK: 1245 vattr.va_type = VBLK; 1246 break; 1247 case S_IFWHT: 1248 whiteout = 1; 1249 break; 1250 default: 1251 panic("kern_mknod: invalid mode"); 1252 } 1253 } 1254 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1255 NDFREE(&nd, NDF_ONLY_PNBUF); 1256 vput(nd.ni_dvp); 1257 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1258 return (error); 1259 goto restart; 1260 } 1261 #ifdef MAC 1262 if (error == 0 && !whiteout) 1263 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, 1264 &nd.ni_cnd, &vattr); 1265 #endif 1266 if (error == 0) { 1267 if (whiteout) 1268 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1269 else { 1270 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1271 &nd.ni_cnd, &vattr); 1272 if (error == 0) 1273 vput(nd.ni_vp); 1274 } 1275 } 1276 NDFREE(&nd, NDF_ONLY_PNBUF); 1277 vput(nd.ni_dvp); 1278 vn_finished_write(mp); 1279 return (error); 1280 } 1281 1282 /* 1283 * Create a named pipe. 1284 */ 1285 #ifndef _SYS_SYSPROTO_H_ 1286 struct mkfifo_args { 1287 char *path; 1288 int mode; 1289 }; 1290 #endif 1291 int 1292 sys_mkfifo(td, uap) 1293 struct thread *td; 1294 register struct mkfifo_args /* { 1295 char *path; 1296 int mode; 1297 } */ *uap; 1298 { 1299 1300 return (kern_mkfifoat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1301 uap->mode)); 1302 } 1303 1304 #ifndef _SYS_SYSPROTO_H_ 1305 struct mkfifoat_args { 1306 int fd; 1307 char *path; 1308 mode_t mode; 1309 }; 1310 #endif 1311 int 1312 sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap) 1313 { 1314 1315 return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE, 1316 uap->mode)); 1317 } 1318 1319 int 1320 kern_mkfifoat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1321 int mode) 1322 { 1323 struct mount *mp; 1324 struct vattr vattr; 1325 struct nameidata nd; 1326 cap_rights_t rights; 1327 int error; 1328 1329 AUDIT_ARG_MODE(mode); 1330 restart: 1331 bwillwrite(); 1332 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1333 NOCACHE, pathseg, path, fd, cap_rights_init(&rights, CAP_MKFIFOAT), 1334 td); 1335 if ((error = namei(&nd)) != 0) 1336 return (error); 1337 if (nd.ni_vp != NULL) { 1338 NDFREE(&nd, NDF_ONLY_PNBUF); 1339 if (nd.ni_vp == nd.ni_dvp) 1340 vrele(nd.ni_dvp); 1341 else 1342 vput(nd.ni_dvp); 1343 vrele(nd.ni_vp); 1344 return (EEXIST); 1345 } 1346 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1347 NDFREE(&nd, NDF_ONLY_PNBUF); 1348 vput(nd.ni_dvp); 1349 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1350 return (error); 1351 goto restart; 1352 } 1353 VATTR_NULL(&vattr); 1354 vattr.va_type = VFIFO; 1355 vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask; 1356 #ifdef MAC 1357 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1358 &vattr); 1359 if (error != 0) 1360 goto out; 1361 #endif 1362 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 1363 if (error == 0) 1364 vput(nd.ni_vp); 1365 #ifdef MAC 1366 out: 1367 #endif 1368 vput(nd.ni_dvp); 1369 vn_finished_write(mp); 1370 NDFREE(&nd, NDF_ONLY_PNBUF); 1371 return (error); 1372 } 1373 1374 /* 1375 * Make a hard file link. 1376 */ 1377 #ifndef _SYS_SYSPROTO_H_ 1378 struct link_args { 1379 char *path; 1380 char *link; 1381 }; 1382 #endif 1383 int 1384 sys_link(td, uap) 1385 struct thread *td; 1386 register struct link_args /* { 1387 char *path; 1388 char *link; 1389 } */ *uap; 1390 { 1391 1392 return (kern_linkat(td, AT_FDCWD, AT_FDCWD, uap->path, uap->link, 1393 UIO_USERSPACE, FOLLOW)); 1394 } 1395 1396 #ifndef _SYS_SYSPROTO_H_ 1397 struct linkat_args { 1398 int fd1; 1399 char *path1; 1400 int fd2; 1401 char *path2; 1402 int flag; 1403 }; 1404 #endif 1405 int 1406 sys_linkat(struct thread *td, struct linkat_args *uap) 1407 { 1408 int flag; 1409 1410 flag = uap->flag; 1411 if (flag & ~AT_SYMLINK_FOLLOW) 1412 return (EINVAL); 1413 1414 return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2, 1415 UIO_USERSPACE, (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW)); 1416 } 1417 1418 int hardlink_check_uid = 0; 1419 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 1420 &hardlink_check_uid, 0, 1421 "Unprivileged processes cannot create hard links to files owned by other " 1422 "users"); 1423 static int hardlink_check_gid = 0; 1424 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 1425 &hardlink_check_gid, 0, 1426 "Unprivileged processes cannot create hard links to files owned by other " 1427 "groups"); 1428 1429 static int 1430 can_hardlink(struct vnode *vp, struct ucred *cred) 1431 { 1432 struct vattr va; 1433 int error; 1434 1435 if (!hardlink_check_uid && !hardlink_check_gid) 1436 return (0); 1437 1438 error = VOP_GETATTR(vp, &va, cred); 1439 if (error != 0) 1440 return (error); 1441 1442 if (hardlink_check_uid && cred->cr_uid != va.va_uid) { 1443 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1444 if (error != 0) 1445 return (error); 1446 } 1447 1448 if (hardlink_check_gid && !groupmember(va.va_gid, cred)) { 1449 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1450 if (error != 0) 1451 return (error); 1452 } 1453 1454 return (0); 1455 } 1456 1457 int 1458 kern_linkat(struct thread *td, int fd1, int fd2, char *path1, char *path2, 1459 enum uio_seg segflg, int follow) 1460 { 1461 struct vnode *vp; 1462 struct mount *mp; 1463 struct nameidata nd; 1464 cap_rights_t rights; 1465 int error; 1466 1467 again: 1468 bwillwrite(); 1469 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, segflg, path1, fd1, 1470 cap_rights_init(&rights, CAP_LINKAT_SOURCE), td); 1471 1472 if ((error = namei(&nd)) != 0) 1473 return (error); 1474 NDFREE(&nd, NDF_ONLY_PNBUF); 1475 vp = nd.ni_vp; 1476 if (vp->v_type == VDIR) { 1477 vrele(vp); 1478 return (EPERM); /* POSIX */ 1479 } 1480 NDINIT_ATRIGHTS(&nd, CREATE, 1481 LOCKPARENT | SAVENAME | AUDITVNODE2 | NOCACHE, segflg, path2, fd2, 1482 cap_rights_init(&rights, CAP_LINKAT_TARGET), td); 1483 if ((error = namei(&nd)) == 0) { 1484 if (nd.ni_vp != NULL) { 1485 NDFREE(&nd, NDF_ONLY_PNBUF); 1486 if (nd.ni_dvp == nd.ni_vp) 1487 vrele(nd.ni_dvp); 1488 else 1489 vput(nd.ni_dvp); 1490 vrele(nd.ni_vp); 1491 vrele(vp); 1492 return (EEXIST); 1493 } else if (nd.ni_dvp->v_mount != vp->v_mount) { 1494 /* 1495 * Cross-device link. No need to recheck 1496 * vp->v_type, since it cannot change, except 1497 * to VBAD. 1498 */ 1499 NDFREE(&nd, NDF_ONLY_PNBUF); 1500 vput(nd.ni_dvp); 1501 vrele(vp); 1502 return (EXDEV); 1503 } else if ((error = vn_lock(vp, LK_EXCLUSIVE)) == 0) { 1504 error = can_hardlink(vp, td->td_ucred); 1505 #ifdef MAC 1506 if (error == 0) 1507 error = mac_vnode_check_link(td->td_ucred, 1508 nd.ni_dvp, vp, &nd.ni_cnd); 1509 #endif 1510 if (error != 0) { 1511 vput(vp); 1512 vput(nd.ni_dvp); 1513 NDFREE(&nd, NDF_ONLY_PNBUF); 1514 return (error); 1515 } 1516 error = vn_start_write(vp, &mp, V_NOWAIT); 1517 if (error != 0) { 1518 vput(vp); 1519 vput(nd.ni_dvp); 1520 NDFREE(&nd, NDF_ONLY_PNBUF); 1521 error = vn_start_write(NULL, &mp, 1522 V_XSLEEP | PCATCH); 1523 if (error != 0) 1524 return (error); 1525 goto again; 1526 } 1527 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 1528 VOP_UNLOCK(vp, 0); 1529 vput(nd.ni_dvp); 1530 vn_finished_write(mp); 1531 NDFREE(&nd, NDF_ONLY_PNBUF); 1532 } else { 1533 vput(nd.ni_dvp); 1534 NDFREE(&nd, NDF_ONLY_PNBUF); 1535 vrele(vp); 1536 goto again; 1537 } 1538 } 1539 vrele(vp); 1540 return (error); 1541 } 1542 1543 /* 1544 * Make a symbolic link. 1545 */ 1546 #ifndef _SYS_SYSPROTO_H_ 1547 struct symlink_args { 1548 char *path; 1549 char *link; 1550 }; 1551 #endif 1552 int 1553 sys_symlink(td, uap) 1554 struct thread *td; 1555 register struct symlink_args /* { 1556 char *path; 1557 char *link; 1558 } */ *uap; 1559 { 1560 1561 return (kern_symlinkat(td, uap->path, AT_FDCWD, uap->link, 1562 UIO_USERSPACE)); 1563 } 1564 1565 #ifndef _SYS_SYSPROTO_H_ 1566 struct symlinkat_args { 1567 char *path; 1568 int fd; 1569 char *path2; 1570 }; 1571 #endif 1572 int 1573 sys_symlinkat(struct thread *td, struct symlinkat_args *uap) 1574 { 1575 1576 return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2, 1577 UIO_USERSPACE)); 1578 } 1579 1580 int 1581 kern_symlinkat(struct thread *td, char *path1, int fd, char *path2, 1582 enum uio_seg segflg) 1583 { 1584 struct mount *mp; 1585 struct vattr vattr; 1586 char *syspath; 1587 struct nameidata nd; 1588 int error; 1589 cap_rights_t rights; 1590 1591 if (segflg == UIO_SYSSPACE) { 1592 syspath = path1; 1593 } else { 1594 syspath = uma_zalloc(namei_zone, M_WAITOK); 1595 if ((error = copyinstr(path1, syspath, MAXPATHLEN, NULL)) != 0) 1596 goto out; 1597 } 1598 AUDIT_ARG_TEXT(syspath); 1599 restart: 1600 bwillwrite(); 1601 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1602 NOCACHE, segflg, path2, fd, cap_rights_init(&rights, CAP_SYMLINKAT), 1603 td); 1604 if ((error = namei(&nd)) != 0) 1605 goto out; 1606 if (nd.ni_vp) { 1607 NDFREE(&nd, NDF_ONLY_PNBUF); 1608 if (nd.ni_vp == nd.ni_dvp) 1609 vrele(nd.ni_dvp); 1610 else 1611 vput(nd.ni_dvp); 1612 vrele(nd.ni_vp); 1613 error = EEXIST; 1614 goto out; 1615 } 1616 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1617 NDFREE(&nd, NDF_ONLY_PNBUF); 1618 vput(nd.ni_dvp); 1619 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1620 goto out; 1621 goto restart; 1622 } 1623 VATTR_NULL(&vattr); 1624 vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask; 1625 #ifdef MAC 1626 vattr.va_type = VLNK; 1627 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1628 &vattr); 1629 if (error != 0) 1630 goto out2; 1631 #endif 1632 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath); 1633 if (error == 0) 1634 vput(nd.ni_vp); 1635 #ifdef MAC 1636 out2: 1637 #endif 1638 NDFREE(&nd, NDF_ONLY_PNBUF); 1639 vput(nd.ni_dvp); 1640 vn_finished_write(mp); 1641 out: 1642 if (segflg != UIO_SYSSPACE) 1643 uma_zfree(namei_zone, syspath); 1644 return (error); 1645 } 1646 1647 /* 1648 * Delete a whiteout from the filesystem. 1649 */ 1650 int 1651 sys_undelete(td, uap) 1652 struct thread *td; 1653 register struct undelete_args /* { 1654 char *path; 1655 } */ *uap; 1656 { 1657 struct mount *mp; 1658 struct nameidata nd; 1659 int error; 1660 1661 restart: 1662 bwillwrite(); 1663 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | AUDITVNODE1, 1664 UIO_USERSPACE, uap->path, td); 1665 error = namei(&nd); 1666 if (error != 0) 1667 return (error); 1668 1669 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 1670 NDFREE(&nd, NDF_ONLY_PNBUF); 1671 if (nd.ni_vp == nd.ni_dvp) 1672 vrele(nd.ni_dvp); 1673 else 1674 vput(nd.ni_dvp); 1675 if (nd.ni_vp) 1676 vrele(nd.ni_vp); 1677 return (EEXIST); 1678 } 1679 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1680 NDFREE(&nd, NDF_ONLY_PNBUF); 1681 vput(nd.ni_dvp); 1682 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1683 return (error); 1684 goto restart; 1685 } 1686 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE); 1687 NDFREE(&nd, NDF_ONLY_PNBUF); 1688 vput(nd.ni_dvp); 1689 vn_finished_write(mp); 1690 return (error); 1691 } 1692 1693 /* 1694 * Delete a name from the filesystem. 1695 */ 1696 #ifndef _SYS_SYSPROTO_H_ 1697 struct unlink_args { 1698 char *path; 1699 }; 1700 #endif 1701 int 1702 sys_unlink(td, uap) 1703 struct thread *td; 1704 struct unlink_args /* { 1705 char *path; 1706 } */ *uap; 1707 { 1708 1709 return (kern_unlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 0)); 1710 } 1711 1712 #ifndef _SYS_SYSPROTO_H_ 1713 struct unlinkat_args { 1714 int fd; 1715 char *path; 1716 int flag; 1717 }; 1718 #endif 1719 int 1720 sys_unlinkat(struct thread *td, struct unlinkat_args *uap) 1721 { 1722 int flag = uap->flag; 1723 int fd = uap->fd; 1724 char *path = uap->path; 1725 1726 if (flag & ~AT_REMOVEDIR) 1727 return (EINVAL); 1728 1729 if (flag & AT_REMOVEDIR) 1730 return (kern_rmdirat(td, fd, path, UIO_USERSPACE)); 1731 else 1732 return (kern_unlinkat(td, fd, path, UIO_USERSPACE, 0)); 1733 } 1734 1735 int 1736 kern_unlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1737 ino_t oldinum) 1738 { 1739 struct mount *mp; 1740 struct vnode *vp; 1741 struct nameidata nd; 1742 struct stat sb; 1743 cap_rights_t rights; 1744 int error; 1745 1746 restart: 1747 bwillwrite(); 1748 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 1749 pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td); 1750 if ((error = namei(&nd)) != 0) 1751 return (error == EINVAL ? EPERM : error); 1752 vp = nd.ni_vp; 1753 if (vp->v_type == VDIR && oldinum == 0) { 1754 error = EPERM; /* POSIX */ 1755 } else if (oldinum != 0 && 1756 ((error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td)) == 0) && 1757 sb.st_ino != oldinum) { 1758 error = EIDRM; /* Identifier removed */ 1759 } else { 1760 /* 1761 * The root of a mounted filesystem cannot be deleted. 1762 * 1763 * XXX: can this only be a VDIR case? 1764 */ 1765 if (vp->v_vflag & VV_ROOT) 1766 error = EBUSY; 1767 } 1768 if (error == 0) { 1769 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1770 NDFREE(&nd, NDF_ONLY_PNBUF); 1771 vput(nd.ni_dvp); 1772 if (vp == nd.ni_dvp) 1773 vrele(vp); 1774 else 1775 vput(vp); 1776 if ((error = vn_start_write(NULL, &mp, 1777 V_XSLEEP | PCATCH)) != 0) 1778 return (error); 1779 goto restart; 1780 } 1781 #ifdef MAC 1782 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 1783 &nd.ni_cnd); 1784 if (error != 0) 1785 goto out; 1786 #endif 1787 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 1788 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 1789 #ifdef MAC 1790 out: 1791 #endif 1792 vn_finished_write(mp); 1793 } 1794 NDFREE(&nd, NDF_ONLY_PNBUF); 1795 vput(nd.ni_dvp); 1796 if (vp == nd.ni_dvp) 1797 vrele(vp); 1798 else 1799 vput(vp); 1800 return (error); 1801 } 1802 1803 /* 1804 * Reposition read/write file offset. 1805 */ 1806 #ifndef _SYS_SYSPROTO_H_ 1807 struct lseek_args { 1808 int fd; 1809 int pad; 1810 off_t offset; 1811 int whence; 1812 }; 1813 #endif 1814 int 1815 sys_lseek(td, uap) 1816 struct thread *td; 1817 register struct lseek_args /* { 1818 int fd; 1819 int pad; 1820 off_t offset; 1821 int whence; 1822 } */ *uap; 1823 { 1824 struct file *fp; 1825 cap_rights_t rights; 1826 int error; 1827 1828 AUDIT_ARG_FD(uap->fd); 1829 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_SEEK), &fp); 1830 if (error != 0) 1831 return (error); 1832 error = (fp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0 ? 1833 fo_seek(fp, uap->offset, uap->whence, td) : ESPIPE; 1834 fdrop(fp, td); 1835 return (error); 1836 } 1837 1838 #if defined(COMPAT_43) 1839 /* 1840 * Reposition read/write file offset. 1841 */ 1842 #ifndef _SYS_SYSPROTO_H_ 1843 struct olseek_args { 1844 int fd; 1845 long offset; 1846 int whence; 1847 }; 1848 #endif 1849 int 1850 olseek(td, uap) 1851 struct thread *td; 1852 register struct olseek_args /* { 1853 int fd; 1854 long offset; 1855 int whence; 1856 } */ *uap; 1857 { 1858 struct lseek_args /* { 1859 int fd; 1860 int pad; 1861 off_t offset; 1862 int whence; 1863 } */ nuap; 1864 1865 nuap.fd = uap->fd; 1866 nuap.offset = uap->offset; 1867 nuap.whence = uap->whence; 1868 return (sys_lseek(td, &nuap)); 1869 } 1870 #endif /* COMPAT_43 */ 1871 1872 #if defined(COMPAT_FREEBSD6) 1873 /* Version with the 'pad' argument */ 1874 int 1875 freebsd6_lseek(td, uap) 1876 struct thread *td; 1877 register struct freebsd6_lseek_args *uap; 1878 { 1879 struct lseek_args ouap; 1880 1881 ouap.fd = uap->fd; 1882 ouap.offset = uap->offset; 1883 ouap.whence = uap->whence; 1884 return (sys_lseek(td, &ouap)); 1885 } 1886 #endif 1887 1888 /* 1889 * Check access permissions using passed credentials. 1890 */ 1891 static int 1892 vn_access(vp, user_flags, cred, td) 1893 struct vnode *vp; 1894 int user_flags; 1895 struct ucred *cred; 1896 struct thread *td; 1897 { 1898 accmode_t accmode; 1899 int error; 1900 1901 /* Flags == 0 means only check for existence. */ 1902 if (user_flags == 0) 1903 return (0); 1904 1905 accmode = 0; 1906 if (user_flags & R_OK) 1907 accmode |= VREAD; 1908 if (user_flags & W_OK) 1909 accmode |= VWRITE; 1910 if (user_flags & X_OK) 1911 accmode |= VEXEC; 1912 #ifdef MAC 1913 error = mac_vnode_check_access(cred, vp, accmode); 1914 if (error != 0) 1915 return (error); 1916 #endif 1917 if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) 1918 error = VOP_ACCESS(vp, accmode, cred, td); 1919 return (error); 1920 } 1921 1922 /* 1923 * Check access permissions using "real" credentials. 1924 */ 1925 #ifndef _SYS_SYSPROTO_H_ 1926 struct access_args { 1927 char *path; 1928 int amode; 1929 }; 1930 #endif 1931 int 1932 sys_access(td, uap) 1933 struct thread *td; 1934 register struct access_args /* { 1935 char *path; 1936 int amode; 1937 } */ *uap; 1938 { 1939 1940 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1941 0, uap->amode)); 1942 } 1943 1944 #ifndef _SYS_SYSPROTO_H_ 1945 struct faccessat_args { 1946 int dirfd; 1947 char *path; 1948 int amode; 1949 int flag; 1950 } 1951 #endif 1952 int 1953 sys_faccessat(struct thread *td, struct faccessat_args *uap) 1954 { 1955 1956 return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 1957 uap->amode)); 1958 } 1959 1960 int 1961 kern_accessat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1962 int flag, int amode) 1963 { 1964 struct ucred *cred, *usecred; 1965 struct vnode *vp; 1966 struct nameidata nd; 1967 cap_rights_t rights; 1968 int error; 1969 1970 if (flag & ~AT_EACCESS) 1971 return (EINVAL); 1972 if (amode != F_OK && (amode & ~(R_OK | W_OK | X_OK)) != 0) 1973 return (EINVAL); 1974 1975 /* 1976 * Create and modify a temporary credential instead of one that 1977 * is potentially shared (if we need one). 1978 */ 1979 cred = td->td_ucred; 1980 if ((flag & AT_EACCESS) == 0 && 1981 ((cred->cr_uid != cred->cr_ruid || 1982 cred->cr_rgid != cred->cr_groups[0]))) { 1983 usecred = crdup(cred); 1984 usecred->cr_uid = cred->cr_ruid; 1985 usecred->cr_groups[0] = cred->cr_rgid; 1986 td->td_ucred = usecred; 1987 } else 1988 usecred = cred; 1989 AUDIT_ARG_VALUE(amode); 1990 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | 1991 AUDITVNODE1, pathseg, path, fd, cap_rights_init(&rights, CAP_FSTAT), 1992 td); 1993 if ((error = namei(&nd)) != 0) 1994 goto out; 1995 vp = nd.ni_vp; 1996 1997 error = vn_access(vp, amode, usecred, td); 1998 NDFREE(&nd, NDF_ONLY_PNBUF); 1999 vput(vp); 2000 out: 2001 if (usecred != cred) { 2002 td->td_ucred = cred; 2003 crfree(usecred); 2004 } 2005 return (error); 2006 } 2007 2008 /* 2009 * Check access permissions using "effective" credentials. 2010 */ 2011 #ifndef _SYS_SYSPROTO_H_ 2012 struct eaccess_args { 2013 char *path; 2014 int amode; 2015 }; 2016 #endif 2017 int 2018 sys_eaccess(td, uap) 2019 struct thread *td; 2020 register struct eaccess_args /* { 2021 char *path; 2022 int amode; 2023 } */ *uap; 2024 { 2025 2026 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2027 AT_EACCESS, uap->amode)); 2028 } 2029 2030 #if defined(COMPAT_43) 2031 /* 2032 * Get file status; this version follows links. 2033 */ 2034 #ifndef _SYS_SYSPROTO_H_ 2035 struct ostat_args { 2036 char *path; 2037 struct ostat *ub; 2038 }; 2039 #endif 2040 int 2041 ostat(td, uap) 2042 struct thread *td; 2043 register struct ostat_args /* { 2044 char *path; 2045 struct ostat *ub; 2046 } */ *uap; 2047 { 2048 struct stat sb; 2049 struct ostat osb; 2050 int error; 2051 2052 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2053 &sb, NULL); 2054 if (error != 0) 2055 return (error); 2056 cvtstat(&sb, &osb); 2057 return (copyout(&osb, uap->ub, sizeof (osb))); 2058 } 2059 2060 /* 2061 * Get file status; this version does not follow links. 2062 */ 2063 #ifndef _SYS_SYSPROTO_H_ 2064 struct olstat_args { 2065 char *path; 2066 struct ostat *ub; 2067 }; 2068 #endif 2069 int 2070 olstat(td, uap) 2071 struct thread *td; 2072 register struct olstat_args /* { 2073 char *path; 2074 struct ostat *ub; 2075 } */ *uap; 2076 { 2077 struct stat sb; 2078 struct ostat osb; 2079 int error; 2080 2081 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2082 UIO_USERSPACE, &sb, NULL); 2083 if (error != 0) 2084 return (error); 2085 cvtstat(&sb, &osb); 2086 return (copyout(&osb, uap->ub, sizeof (osb))); 2087 } 2088 2089 /* 2090 * Convert from an old to a new stat structure. 2091 */ 2092 void 2093 cvtstat(st, ost) 2094 struct stat *st; 2095 struct ostat *ost; 2096 { 2097 2098 bzero(ost, sizeof(*ost)); 2099 ost->st_dev = st->st_dev; 2100 ost->st_ino = st->st_ino; 2101 ost->st_mode = st->st_mode; 2102 ost->st_nlink = st->st_nlink; 2103 ost->st_uid = st->st_uid; 2104 ost->st_gid = st->st_gid; 2105 ost->st_rdev = st->st_rdev; 2106 if (st->st_size < (quad_t)1 << 32) 2107 ost->st_size = st->st_size; 2108 else 2109 ost->st_size = -2; 2110 ost->st_atim = st->st_atim; 2111 ost->st_mtim = st->st_mtim; 2112 ost->st_ctim = st->st_ctim; 2113 ost->st_blksize = st->st_blksize; 2114 ost->st_blocks = st->st_blocks; 2115 ost->st_flags = st->st_flags; 2116 ost->st_gen = st->st_gen; 2117 } 2118 #endif /* COMPAT_43 */ 2119 2120 /* 2121 * Get file status; this version follows links. 2122 */ 2123 #ifndef _SYS_SYSPROTO_H_ 2124 struct stat_args { 2125 char *path; 2126 struct stat *ub; 2127 }; 2128 #endif 2129 int 2130 sys_stat(td, uap) 2131 struct thread *td; 2132 register struct stat_args /* { 2133 char *path; 2134 struct stat *ub; 2135 } */ *uap; 2136 { 2137 struct stat sb; 2138 int error; 2139 2140 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2141 &sb, NULL); 2142 if (error == 0) 2143 error = copyout(&sb, uap->ub, sizeof (sb)); 2144 return (error); 2145 } 2146 2147 #ifndef _SYS_SYSPROTO_H_ 2148 struct fstatat_args { 2149 int fd; 2150 char *path; 2151 struct stat *buf; 2152 int flag; 2153 } 2154 #endif 2155 int 2156 sys_fstatat(struct thread *td, struct fstatat_args *uap) 2157 { 2158 struct stat sb; 2159 int error; 2160 2161 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2162 UIO_USERSPACE, &sb, NULL); 2163 if (error == 0) 2164 error = copyout(&sb, uap->buf, sizeof (sb)); 2165 return (error); 2166 } 2167 2168 int 2169 kern_statat(struct thread *td, int flag, int fd, char *path, 2170 enum uio_seg pathseg, struct stat *sbp, 2171 void (*hook)(struct vnode *vp, struct stat *sbp)) 2172 { 2173 struct nameidata nd; 2174 struct stat sb; 2175 cap_rights_t rights; 2176 int error; 2177 2178 if (flag & ~AT_SYMLINK_NOFOLLOW) 2179 return (EINVAL); 2180 2181 NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : 2182 FOLLOW) | LOCKSHARED | LOCKLEAF | AUDITVNODE1, pathseg, path, fd, 2183 cap_rights_init(&rights, CAP_FSTAT), td); 2184 2185 if ((error = namei(&nd)) != 0) 2186 return (error); 2187 error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td); 2188 if (error == 0) { 2189 SDT_PROBE2(vfs, , stat, mode, path, sb.st_mode); 2190 if (S_ISREG(sb.st_mode)) 2191 SDT_PROBE2(vfs, , stat, reg, path, pathseg); 2192 if (__predict_false(hook != NULL)) 2193 hook(nd.ni_vp, &sb); 2194 } 2195 NDFREE(&nd, NDF_ONLY_PNBUF); 2196 vput(nd.ni_vp); 2197 if (error != 0) 2198 return (error); 2199 *sbp = sb; 2200 #ifdef KTRACE 2201 if (KTRPOINT(td, KTR_STRUCT)) 2202 ktrstat(&sb); 2203 #endif 2204 return (0); 2205 } 2206 2207 /* 2208 * Get file status; this version does not follow links. 2209 */ 2210 #ifndef _SYS_SYSPROTO_H_ 2211 struct lstat_args { 2212 char *path; 2213 struct stat *ub; 2214 }; 2215 #endif 2216 int 2217 sys_lstat(td, uap) 2218 struct thread *td; 2219 register struct lstat_args /* { 2220 char *path; 2221 struct stat *ub; 2222 } */ *uap; 2223 { 2224 struct stat sb; 2225 int error; 2226 2227 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2228 UIO_USERSPACE, &sb, NULL); 2229 if (error == 0) 2230 error = copyout(&sb, uap->ub, sizeof (sb)); 2231 return (error); 2232 } 2233 2234 /* 2235 * Implementation of the NetBSD [l]stat() functions. 2236 */ 2237 void 2238 cvtnstat(sb, nsb) 2239 struct stat *sb; 2240 struct nstat *nsb; 2241 { 2242 2243 bzero(nsb, sizeof *nsb); 2244 nsb->st_dev = sb->st_dev; 2245 nsb->st_ino = sb->st_ino; 2246 nsb->st_mode = sb->st_mode; 2247 nsb->st_nlink = sb->st_nlink; 2248 nsb->st_uid = sb->st_uid; 2249 nsb->st_gid = sb->st_gid; 2250 nsb->st_rdev = sb->st_rdev; 2251 nsb->st_atim = sb->st_atim; 2252 nsb->st_mtim = sb->st_mtim; 2253 nsb->st_ctim = sb->st_ctim; 2254 nsb->st_size = sb->st_size; 2255 nsb->st_blocks = sb->st_blocks; 2256 nsb->st_blksize = sb->st_blksize; 2257 nsb->st_flags = sb->st_flags; 2258 nsb->st_gen = sb->st_gen; 2259 nsb->st_birthtim = sb->st_birthtim; 2260 } 2261 2262 #ifndef _SYS_SYSPROTO_H_ 2263 struct nstat_args { 2264 char *path; 2265 struct nstat *ub; 2266 }; 2267 #endif 2268 int 2269 sys_nstat(td, uap) 2270 struct thread *td; 2271 register struct nstat_args /* { 2272 char *path; 2273 struct nstat *ub; 2274 } */ *uap; 2275 { 2276 struct stat sb; 2277 struct nstat nsb; 2278 int error; 2279 2280 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2281 &sb, NULL); 2282 if (error != 0) 2283 return (error); 2284 cvtnstat(&sb, &nsb); 2285 return (copyout(&nsb, uap->ub, sizeof (nsb))); 2286 } 2287 2288 /* 2289 * NetBSD lstat. Get file status; this version does not follow links. 2290 */ 2291 #ifndef _SYS_SYSPROTO_H_ 2292 struct lstat_args { 2293 char *path; 2294 struct stat *ub; 2295 }; 2296 #endif 2297 int 2298 sys_nlstat(td, uap) 2299 struct thread *td; 2300 register struct nlstat_args /* { 2301 char *path; 2302 struct nstat *ub; 2303 } */ *uap; 2304 { 2305 struct stat sb; 2306 struct nstat nsb; 2307 int error; 2308 2309 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2310 UIO_USERSPACE, &sb, NULL); 2311 if (error != 0) 2312 return (error); 2313 cvtnstat(&sb, &nsb); 2314 return (copyout(&nsb, uap->ub, sizeof (nsb))); 2315 } 2316 2317 /* 2318 * Get configurable pathname variables. 2319 */ 2320 #ifndef _SYS_SYSPROTO_H_ 2321 struct pathconf_args { 2322 char *path; 2323 int name; 2324 }; 2325 #endif 2326 int 2327 sys_pathconf(td, uap) 2328 struct thread *td; 2329 register struct pathconf_args /* { 2330 char *path; 2331 int name; 2332 } */ *uap; 2333 { 2334 2335 return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW)); 2336 } 2337 2338 #ifndef _SYS_SYSPROTO_H_ 2339 struct lpathconf_args { 2340 char *path; 2341 int name; 2342 }; 2343 #endif 2344 int 2345 sys_lpathconf(td, uap) 2346 struct thread *td; 2347 register struct lpathconf_args /* { 2348 char *path; 2349 int name; 2350 } */ *uap; 2351 { 2352 2353 return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, 2354 NOFOLLOW)); 2355 } 2356 2357 int 2358 kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name, 2359 u_long flags) 2360 { 2361 struct nameidata nd; 2362 int error; 2363 2364 NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | AUDITVNODE1 | flags, 2365 pathseg, path, td); 2366 if ((error = namei(&nd)) != 0) 2367 return (error); 2368 NDFREE(&nd, NDF_ONLY_PNBUF); 2369 2370 error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval); 2371 vput(nd.ni_vp); 2372 return (error); 2373 } 2374 2375 /* 2376 * Return target name of a symbolic link. 2377 */ 2378 #ifndef _SYS_SYSPROTO_H_ 2379 struct readlink_args { 2380 char *path; 2381 char *buf; 2382 size_t count; 2383 }; 2384 #endif 2385 int 2386 sys_readlink(td, uap) 2387 struct thread *td; 2388 register struct readlink_args /* { 2389 char *path; 2390 char *buf; 2391 size_t count; 2392 } */ *uap; 2393 { 2394 2395 return (kern_readlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2396 uap->buf, UIO_USERSPACE, uap->count)); 2397 } 2398 #ifndef _SYS_SYSPROTO_H_ 2399 struct readlinkat_args { 2400 int fd; 2401 char *path; 2402 char *buf; 2403 size_t bufsize; 2404 }; 2405 #endif 2406 int 2407 sys_readlinkat(struct thread *td, struct readlinkat_args *uap) 2408 { 2409 2410 return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE, 2411 uap->buf, UIO_USERSPACE, uap->bufsize)); 2412 } 2413 2414 int 2415 kern_readlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2416 char *buf, enum uio_seg bufseg, size_t count) 2417 { 2418 struct vnode *vp; 2419 struct iovec aiov; 2420 struct uio auio; 2421 struct nameidata nd; 2422 int error; 2423 2424 if (count > IOSIZE_MAX) 2425 return (EINVAL); 2426 2427 NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 2428 pathseg, path, fd, td); 2429 2430 if ((error = namei(&nd)) != 0) 2431 return (error); 2432 NDFREE(&nd, NDF_ONLY_PNBUF); 2433 vp = nd.ni_vp; 2434 #ifdef MAC 2435 error = mac_vnode_check_readlink(td->td_ucred, vp); 2436 if (error != 0) { 2437 vput(vp); 2438 return (error); 2439 } 2440 #endif 2441 if (vp->v_type != VLNK) 2442 error = EINVAL; 2443 else { 2444 aiov.iov_base = buf; 2445 aiov.iov_len = count; 2446 auio.uio_iov = &aiov; 2447 auio.uio_iovcnt = 1; 2448 auio.uio_offset = 0; 2449 auio.uio_rw = UIO_READ; 2450 auio.uio_segflg = bufseg; 2451 auio.uio_td = td; 2452 auio.uio_resid = count; 2453 error = VOP_READLINK(vp, &auio, td->td_ucred); 2454 td->td_retval[0] = count - auio.uio_resid; 2455 } 2456 vput(vp); 2457 return (error); 2458 } 2459 2460 /* 2461 * Common implementation code for chflags() and fchflags(). 2462 */ 2463 static int 2464 setfflags(td, vp, flags) 2465 struct thread *td; 2466 struct vnode *vp; 2467 u_long flags; 2468 { 2469 struct mount *mp; 2470 struct vattr vattr; 2471 int error; 2472 2473 /* We can't support the value matching VNOVAL. */ 2474 if (flags == VNOVAL) 2475 return (EOPNOTSUPP); 2476 2477 /* 2478 * Prevent non-root users from setting flags on devices. When 2479 * a device is reused, users can retain ownership of the device 2480 * if they are allowed to set flags and programs assume that 2481 * chown can't fail when done as root. 2482 */ 2483 if (vp->v_type == VCHR || vp->v_type == VBLK) { 2484 error = priv_check(td, PRIV_VFS_CHFLAGS_DEV); 2485 if (error != 0) 2486 return (error); 2487 } 2488 2489 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2490 return (error); 2491 VATTR_NULL(&vattr); 2492 vattr.va_flags = flags; 2493 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2494 #ifdef MAC 2495 error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags); 2496 if (error == 0) 2497 #endif 2498 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 2499 VOP_UNLOCK(vp, 0); 2500 vn_finished_write(mp); 2501 return (error); 2502 } 2503 2504 /* 2505 * Change flags of a file given a path name. 2506 */ 2507 #ifndef _SYS_SYSPROTO_H_ 2508 struct chflags_args { 2509 const char *path; 2510 u_long flags; 2511 }; 2512 #endif 2513 int 2514 sys_chflags(td, uap) 2515 struct thread *td; 2516 register struct chflags_args /* { 2517 const char *path; 2518 u_long flags; 2519 } */ *uap; 2520 { 2521 2522 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2523 uap->flags, 0)); 2524 } 2525 2526 #ifndef _SYS_SYSPROTO_H_ 2527 struct chflagsat_args { 2528 int fd; 2529 const char *path; 2530 u_long flags; 2531 int atflag; 2532 } 2533 #endif 2534 int 2535 sys_chflagsat(struct thread *td, struct chflagsat_args *uap) 2536 { 2537 int fd = uap->fd; 2538 const char *path = uap->path; 2539 u_long flags = uap->flags; 2540 int atflag = uap->atflag; 2541 2542 if (atflag & ~AT_SYMLINK_NOFOLLOW) 2543 return (EINVAL); 2544 2545 return (kern_chflagsat(td, fd, path, UIO_USERSPACE, flags, atflag)); 2546 } 2547 2548 /* 2549 * Same as chflags() but doesn't follow symlinks. 2550 */ 2551 int 2552 sys_lchflags(td, uap) 2553 struct thread *td; 2554 register struct lchflags_args /* { 2555 const char *path; 2556 u_long flags; 2557 } */ *uap; 2558 { 2559 2560 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2561 uap->flags, AT_SYMLINK_NOFOLLOW)); 2562 } 2563 2564 static int 2565 kern_chflagsat(struct thread *td, int fd, const char *path, 2566 enum uio_seg pathseg, u_long flags, int atflag) 2567 { 2568 struct nameidata nd; 2569 cap_rights_t rights; 2570 int error, follow; 2571 2572 AUDIT_ARG_FFLAGS(flags); 2573 follow = (atflag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2574 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2575 cap_rights_init(&rights, CAP_FCHFLAGS), td); 2576 if ((error = namei(&nd)) != 0) 2577 return (error); 2578 NDFREE(&nd, NDF_ONLY_PNBUF); 2579 error = setfflags(td, nd.ni_vp, flags); 2580 vrele(nd.ni_vp); 2581 return (error); 2582 } 2583 2584 /* 2585 * Change flags of a file given a file descriptor. 2586 */ 2587 #ifndef _SYS_SYSPROTO_H_ 2588 struct fchflags_args { 2589 int fd; 2590 u_long flags; 2591 }; 2592 #endif 2593 int 2594 sys_fchflags(td, uap) 2595 struct thread *td; 2596 register struct fchflags_args /* { 2597 int fd; 2598 u_long flags; 2599 } */ *uap; 2600 { 2601 struct file *fp; 2602 cap_rights_t rights; 2603 int error; 2604 2605 AUDIT_ARG_FD(uap->fd); 2606 AUDIT_ARG_FFLAGS(uap->flags); 2607 error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_FCHFLAGS), 2608 &fp); 2609 if (error != 0) 2610 return (error); 2611 #ifdef AUDIT 2612 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 2613 AUDIT_ARG_VNODE1(fp->f_vnode); 2614 VOP_UNLOCK(fp->f_vnode, 0); 2615 #endif 2616 error = setfflags(td, fp->f_vnode, uap->flags); 2617 fdrop(fp, td); 2618 return (error); 2619 } 2620 2621 /* 2622 * Common implementation code for chmod(), lchmod() and fchmod(). 2623 */ 2624 int 2625 setfmode(td, cred, vp, mode) 2626 struct thread *td; 2627 struct ucred *cred; 2628 struct vnode *vp; 2629 int mode; 2630 { 2631 struct mount *mp; 2632 struct vattr vattr; 2633 int error; 2634 2635 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2636 return (error); 2637 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2638 VATTR_NULL(&vattr); 2639 vattr.va_mode = mode & ALLPERMS; 2640 #ifdef MAC 2641 error = mac_vnode_check_setmode(cred, vp, vattr.va_mode); 2642 if (error == 0) 2643 #endif 2644 error = VOP_SETATTR(vp, &vattr, cred); 2645 VOP_UNLOCK(vp, 0); 2646 vn_finished_write(mp); 2647 return (error); 2648 } 2649 2650 /* 2651 * Change mode of a file given path name. 2652 */ 2653 #ifndef _SYS_SYSPROTO_H_ 2654 struct chmod_args { 2655 char *path; 2656 int mode; 2657 }; 2658 #endif 2659 int 2660 sys_chmod(td, uap) 2661 struct thread *td; 2662 register struct chmod_args /* { 2663 char *path; 2664 int mode; 2665 } */ *uap; 2666 { 2667 2668 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2669 uap->mode, 0)); 2670 } 2671 2672 #ifndef _SYS_SYSPROTO_H_ 2673 struct fchmodat_args { 2674 int dirfd; 2675 char *path; 2676 mode_t mode; 2677 int flag; 2678 } 2679 #endif 2680 int 2681 sys_fchmodat(struct thread *td, struct fchmodat_args *uap) 2682 { 2683 int flag = uap->flag; 2684 int fd = uap->fd; 2685 char *path = uap->path; 2686 mode_t mode = uap->mode; 2687 2688 if (flag & ~AT_SYMLINK_NOFOLLOW) 2689 return (EINVAL); 2690 2691 return (kern_fchmodat(td, fd, path, UIO_USERSPACE, mode, flag)); 2692 } 2693 2694 /* 2695 * Change mode of a file given path name (don't follow links.) 2696 */ 2697 #ifndef _SYS_SYSPROTO_H_ 2698 struct lchmod_args { 2699 char *path; 2700 int mode; 2701 }; 2702 #endif 2703 int 2704 sys_lchmod(td, uap) 2705 struct thread *td; 2706 register struct lchmod_args /* { 2707 char *path; 2708 int mode; 2709 } */ *uap; 2710 { 2711 2712 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2713 uap->mode, AT_SYMLINK_NOFOLLOW)); 2714 } 2715 2716 int 2717 kern_fchmodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2718 mode_t mode, int flag) 2719 { 2720 struct nameidata nd; 2721 cap_rights_t rights; 2722 int error, follow; 2723 2724 AUDIT_ARG_MODE(mode); 2725 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2726 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2727 cap_rights_init(&rights, CAP_FCHMOD), td); 2728 if ((error = namei(&nd)) != 0) 2729 return (error); 2730 NDFREE(&nd, NDF_ONLY_PNBUF); 2731 error = setfmode(td, td->td_ucred, nd.ni_vp, mode); 2732 vrele(nd.ni_vp); 2733 return (error); 2734 } 2735 2736 /* 2737 * Change mode of a file given a file descriptor. 2738 */ 2739 #ifndef _SYS_SYSPROTO_H_ 2740 struct fchmod_args { 2741 int fd; 2742 int mode; 2743 }; 2744 #endif 2745 int 2746 sys_fchmod(struct thread *td, struct fchmod_args *uap) 2747 { 2748 struct file *fp; 2749 cap_rights_t rights; 2750 int error; 2751 2752 AUDIT_ARG_FD(uap->fd); 2753 AUDIT_ARG_MODE(uap->mode); 2754 2755 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHMOD), &fp); 2756 if (error != 0) 2757 return (error); 2758 error = fo_chmod(fp, uap->mode, td->td_ucred, td); 2759 fdrop(fp, td); 2760 return (error); 2761 } 2762 2763 /* 2764 * Common implementation for chown(), lchown(), and fchown() 2765 */ 2766 int 2767 setfown(td, cred, vp, uid, gid) 2768 struct thread *td; 2769 struct ucred *cred; 2770 struct vnode *vp; 2771 uid_t uid; 2772 gid_t gid; 2773 { 2774 struct mount *mp; 2775 struct vattr vattr; 2776 int error; 2777 2778 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2779 return (error); 2780 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2781 VATTR_NULL(&vattr); 2782 vattr.va_uid = uid; 2783 vattr.va_gid = gid; 2784 #ifdef MAC 2785 error = mac_vnode_check_setowner(cred, vp, vattr.va_uid, 2786 vattr.va_gid); 2787 if (error == 0) 2788 #endif 2789 error = VOP_SETATTR(vp, &vattr, cred); 2790 VOP_UNLOCK(vp, 0); 2791 vn_finished_write(mp); 2792 return (error); 2793 } 2794 2795 /* 2796 * Set ownership given a path name. 2797 */ 2798 #ifndef _SYS_SYSPROTO_H_ 2799 struct chown_args { 2800 char *path; 2801 int uid; 2802 int gid; 2803 }; 2804 #endif 2805 int 2806 sys_chown(td, uap) 2807 struct thread *td; 2808 register struct chown_args /* { 2809 char *path; 2810 int uid; 2811 int gid; 2812 } */ *uap; 2813 { 2814 2815 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->uid, 2816 uap->gid, 0)); 2817 } 2818 2819 #ifndef _SYS_SYSPROTO_H_ 2820 struct fchownat_args { 2821 int fd; 2822 const char * path; 2823 uid_t uid; 2824 gid_t gid; 2825 int flag; 2826 }; 2827 #endif 2828 int 2829 sys_fchownat(struct thread *td, struct fchownat_args *uap) 2830 { 2831 int flag; 2832 2833 flag = uap->flag; 2834 if (flag & ~AT_SYMLINK_NOFOLLOW) 2835 return (EINVAL); 2836 2837 return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid, 2838 uap->gid, uap->flag)); 2839 } 2840 2841 int 2842 kern_fchownat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2843 int uid, int gid, int flag) 2844 { 2845 struct nameidata nd; 2846 cap_rights_t rights; 2847 int error, follow; 2848 2849 AUDIT_ARG_OWNER(uid, gid); 2850 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2851 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2852 cap_rights_init(&rights, CAP_FCHOWN), td); 2853 2854 if ((error = namei(&nd)) != 0) 2855 return (error); 2856 NDFREE(&nd, NDF_ONLY_PNBUF); 2857 error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid); 2858 vrele(nd.ni_vp); 2859 return (error); 2860 } 2861 2862 /* 2863 * Set ownership given a path name, do not cross symlinks. 2864 */ 2865 #ifndef _SYS_SYSPROTO_H_ 2866 struct lchown_args { 2867 char *path; 2868 int uid; 2869 int gid; 2870 }; 2871 #endif 2872 int 2873 sys_lchown(td, uap) 2874 struct thread *td; 2875 register struct lchown_args /* { 2876 char *path; 2877 int uid; 2878 int gid; 2879 } */ *uap; 2880 { 2881 2882 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2883 uap->uid, uap->gid, AT_SYMLINK_NOFOLLOW)); 2884 } 2885 2886 /* 2887 * Set ownership given a file descriptor. 2888 */ 2889 #ifndef _SYS_SYSPROTO_H_ 2890 struct fchown_args { 2891 int fd; 2892 int uid; 2893 int gid; 2894 }; 2895 #endif 2896 int 2897 sys_fchown(td, uap) 2898 struct thread *td; 2899 register struct fchown_args /* { 2900 int fd; 2901 int uid; 2902 int gid; 2903 } */ *uap; 2904 { 2905 struct file *fp; 2906 cap_rights_t rights; 2907 int error; 2908 2909 AUDIT_ARG_FD(uap->fd); 2910 AUDIT_ARG_OWNER(uap->uid, uap->gid); 2911 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHOWN), &fp); 2912 if (error != 0) 2913 return (error); 2914 error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td); 2915 fdrop(fp, td); 2916 return (error); 2917 } 2918 2919 /* 2920 * Common implementation code for utimes(), lutimes(), and futimes(). 2921 */ 2922 static int 2923 getutimes(usrtvp, tvpseg, tsp) 2924 const struct timeval *usrtvp; 2925 enum uio_seg tvpseg; 2926 struct timespec *tsp; 2927 { 2928 struct timeval tv[2]; 2929 const struct timeval *tvp; 2930 int error; 2931 2932 if (usrtvp == NULL) { 2933 vfs_timestamp(&tsp[0]); 2934 tsp[1] = tsp[0]; 2935 } else { 2936 if (tvpseg == UIO_SYSSPACE) { 2937 tvp = usrtvp; 2938 } else { 2939 if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0) 2940 return (error); 2941 tvp = tv; 2942 } 2943 2944 if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 || 2945 tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000) 2946 return (EINVAL); 2947 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 2948 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 2949 } 2950 return (0); 2951 } 2952 2953 /* 2954 * Common implementation code for futimens(), utimensat(). 2955 */ 2956 #define UTIMENS_NULL 0x1 2957 #define UTIMENS_EXIT 0x2 2958 static int 2959 getutimens(const struct timespec *usrtsp, enum uio_seg tspseg, 2960 struct timespec *tsp, int *retflags) 2961 { 2962 struct timespec tsnow; 2963 int error; 2964 2965 vfs_timestamp(&tsnow); 2966 *retflags = 0; 2967 if (usrtsp == NULL) { 2968 tsp[0] = tsnow; 2969 tsp[1] = tsnow; 2970 *retflags |= UTIMENS_NULL; 2971 return (0); 2972 } 2973 if (tspseg == UIO_SYSSPACE) { 2974 tsp[0] = usrtsp[0]; 2975 tsp[1] = usrtsp[1]; 2976 } else if ((error = copyin(usrtsp, tsp, sizeof(*tsp) * 2)) != 0) 2977 return (error); 2978 if (tsp[0].tv_nsec == UTIME_OMIT && tsp[1].tv_nsec == UTIME_OMIT) 2979 *retflags |= UTIMENS_EXIT; 2980 if (tsp[0].tv_nsec == UTIME_NOW && tsp[1].tv_nsec == UTIME_NOW) 2981 *retflags |= UTIMENS_NULL; 2982 if (tsp[0].tv_nsec == UTIME_OMIT) 2983 tsp[0].tv_sec = VNOVAL; 2984 else if (tsp[0].tv_nsec == UTIME_NOW) 2985 tsp[0] = tsnow; 2986 else if (tsp[0].tv_nsec < 0 || tsp[0].tv_nsec >= 1000000000L) 2987 return (EINVAL); 2988 if (tsp[1].tv_nsec == UTIME_OMIT) 2989 tsp[1].tv_sec = VNOVAL; 2990 else if (tsp[1].tv_nsec == UTIME_NOW) 2991 tsp[1] = tsnow; 2992 else if (tsp[1].tv_nsec < 0 || tsp[1].tv_nsec >= 1000000000L) 2993 return (EINVAL); 2994 2995 return (0); 2996 } 2997 2998 /* 2999 * Common implementation code for utimes(), lutimes(), futimes(), futimens(), 3000 * and utimensat(). 3001 */ 3002 static int 3003 setutimes(td, vp, ts, numtimes, nullflag) 3004 struct thread *td; 3005 struct vnode *vp; 3006 const struct timespec *ts; 3007 int numtimes; 3008 int nullflag; 3009 { 3010 struct mount *mp; 3011 struct vattr vattr; 3012 int error, setbirthtime; 3013 3014 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 3015 return (error); 3016 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3017 setbirthtime = 0; 3018 if (numtimes < 3 && !VOP_GETATTR(vp, &vattr, td->td_ucred) && 3019 timespeccmp(&ts[1], &vattr.va_birthtime, < )) 3020 setbirthtime = 1; 3021 VATTR_NULL(&vattr); 3022 vattr.va_atime = ts[0]; 3023 vattr.va_mtime = ts[1]; 3024 if (setbirthtime) 3025 vattr.va_birthtime = ts[1]; 3026 if (numtimes > 2) 3027 vattr.va_birthtime = ts[2]; 3028 if (nullflag) 3029 vattr.va_vaflags |= VA_UTIMES_NULL; 3030 #ifdef MAC 3031 error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime, 3032 vattr.va_mtime); 3033 #endif 3034 if (error == 0) 3035 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3036 VOP_UNLOCK(vp, 0); 3037 vn_finished_write(mp); 3038 return (error); 3039 } 3040 3041 /* 3042 * Set the access and modification times of a file. 3043 */ 3044 #ifndef _SYS_SYSPROTO_H_ 3045 struct utimes_args { 3046 char *path; 3047 struct timeval *tptr; 3048 }; 3049 #endif 3050 int 3051 sys_utimes(td, uap) 3052 struct thread *td; 3053 register struct utimes_args /* { 3054 char *path; 3055 struct timeval *tptr; 3056 } */ *uap; 3057 { 3058 3059 return (kern_utimesat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3060 uap->tptr, UIO_USERSPACE)); 3061 } 3062 3063 #ifndef _SYS_SYSPROTO_H_ 3064 struct futimesat_args { 3065 int fd; 3066 const char * path; 3067 const struct timeval * times; 3068 }; 3069 #endif 3070 int 3071 sys_futimesat(struct thread *td, struct futimesat_args *uap) 3072 { 3073 3074 return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE, 3075 uap->times, UIO_USERSPACE)); 3076 } 3077 3078 int 3079 kern_utimesat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 3080 struct timeval *tptr, enum uio_seg tptrseg) 3081 { 3082 struct nameidata nd; 3083 struct timespec ts[2]; 3084 cap_rights_t rights; 3085 int error; 3086 3087 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3088 return (error); 3089 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 3090 cap_rights_init(&rights, CAP_FUTIMES), td); 3091 3092 if ((error = namei(&nd)) != 0) 3093 return (error); 3094 NDFREE(&nd, NDF_ONLY_PNBUF); 3095 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3096 vrele(nd.ni_vp); 3097 return (error); 3098 } 3099 3100 /* 3101 * Set the access and modification times of a file. 3102 */ 3103 #ifndef _SYS_SYSPROTO_H_ 3104 struct lutimes_args { 3105 char *path; 3106 struct timeval *tptr; 3107 }; 3108 #endif 3109 int 3110 sys_lutimes(td, uap) 3111 struct thread *td; 3112 register struct lutimes_args /* { 3113 char *path; 3114 struct timeval *tptr; 3115 } */ *uap; 3116 { 3117 3118 return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr, 3119 UIO_USERSPACE)); 3120 } 3121 3122 int 3123 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg, 3124 struct timeval *tptr, enum uio_seg tptrseg) 3125 { 3126 struct timespec ts[2]; 3127 struct nameidata nd; 3128 int error; 3129 3130 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3131 return (error); 3132 NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, pathseg, path, td); 3133 if ((error = namei(&nd)) != 0) 3134 return (error); 3135 NDFREE(&nd, NDF_ONLY_PNBUF); 3136 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3137 vrele(nd.ni_vp); 3138 return (error); 3139 } 3140 3141 /* 3142 * Set the access and modification times of a file. 3143 */ 3144 #ifndef _SYS_SYSPROTO_H_ 3145 struct futimes_args { 3146 int fd; 3147 struct timeval *tptr; 3148 }; 3149 #endif 3150 int 3151 sys_futimes(td, uap) 3152 struct thread *td; 3153 register struct futimes_args /* { 3154 int fd; 3155 struct timeval *tptr; 3156 } */ *uap; 3157 { 3158 3159 return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE)); 3160 } 3161 3162 int 3163 kern_futimes(struct thread *td, int fd, struct timeval *tptr, 3164 enum uio_seg tptrseg) 3165 { 3166 struct timespec ts[2]; 3167 struct file *fp; 3168 cap_rights_t rights; 3169 int error; 3170 3171 AUDIT_ARG_FD(fd); 3172 error = getutimes(tptr, tptrseg, ts); 3173 if (error != 0) 3174 return (error); 3175 error = getvnode(td, fd, cap_rights_init(&rights, CAP_FUTIMES), &fp); 3176 if (error != 0) 3177 return (error); 3178 #ifdef AUDIT 3179 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3180 AUDIT_ARG_VNODE1(fp->f_vnode); 3181 VOP_UNLOCK(fp->f_vnode, 0); 3182 #endif 3183 error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL); 3184 fdrop(fp, td); 3185 return (error); 3186 } 3187 3188 int 3189 sys_futimens(struct thread *td, struct futimens_args *uap) 3190 { 3191 3192 return (kern_futimens(td, uap->fd, uap->times, UIO_USERSPACE)); 3193 } 3194 3195 int 3196 kern_futimens(struct thread *td, int fd, struct timespec *tptr, 3197 enum uio_seg tptrseg) 3198 { 3199 struct timespec ts[2]; 3200 struct file *fp; 3201 cap_rights_t rights; 3202 int error, flags; 3203 3204 AUDIT_ARG_FD(fd); 3205 error = getutimens(tptr, tptrseg, ts, &flags); 3206 if (error != 0) 3207 return (error); 3208 if (flags & UTIMENS_EXIT) 3209 return (0); 3210 error = getvnode(td, fd, cap_rights_init(&rights, CAP_FUTIMES), &fp); 3211 if (error != 0) 3212 return (error); 3213 #ifdef AUDIT 3214 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3215 AUDIT_ARG_VNODE1(fp->f_vnode); 3216 VOP_UNLOCK(fp->f_vnode, 0); 3217 #endif 3218 error = setutimes(td, fp->f_vnode, ts, 2, flags & UTIMENS_NULL); 3219 fdrop(fp, td); 3220 return (error); 3221 } 3222 3223 int 3224 sys_utimensat(struct thread *td, struct utimensat_args *uap) 3225 { 3226 3227 return (kern_utimensat(td, uap->fd, uap->path, UIO_USERSPACE, 3228 uap->times, UIO_USERSPACE, uap->flag)); 3229 } 3230 3231 int 3232 kern_utimensat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 3233 struct timespec *tptr, enum uio_seg tptrseg, int flag) 3234 { 3235 struct nameidata nd; 3236 struct timespec ts[2]; 3237 cap_rights_t rights; 3238 int error, flags; 3239 3240 if (flag & ~AT_SYMLINK_NOFOLLOW) 3241 return (EINVAL); 3242 3243 if ((error = getutimens(tptr, tptrseg, ts, &flags)) != 0) 3244 return (error); 3245 NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : 3246 FOLLOW) | AUDITVNODE1, pathseg, path, fd, 3247 cap_rights_init(&rights, CAP_FUTIMES), td); 3248 if ((error = namei(&nd)) != 0) 3249 return (error); 3250 /* 3251 * We are allowed to call namei() regardless of 2xUTIME_OMIT. 3252 * POSIX states: 3253 * "If both tv_nsec fields are UTIME_OMIT... EACCESS may be detected." 3254 * "Search permission is denied by a component of the path prefix." 3255 */ 3256 NDFREE(&nd, NDF_ONLY_PNBUF); 3257 if ((flags & UTIMENS_EXIT) == 0) 3258 error = setutimes(td, nd.ni_vp, ts, 2, flags & UTIMENS_NULL); 3259 vrele(nd.ni_vp); 3260 return (error); 3261 } 3262 3263 /* 3264 * Truncate a file given its path name. 3265 */ 3266 #ifndef _SYS_SYSPROTO_H_ 3267 struct truncate_args { 3268 char *path; 3269 int pad; 3270 off_t length; 3271 }; 3272 #endif 3273 int 3274 sys_truncate(td, uap) 3275 struct thread *td; 3276 register struct truncate_args /* { 3277 char *path; 3278 int pad; 3279 off_t length; 3280 } */ *uap; 3281 { 3282 3283 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3284 } 3285 3286 int 3287 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length) 3288 { 3289 struct mount *mp; 3290 struct vnode *vp; 3291 void *rl_cookie; 3292 struct vattr vattr; 3293 struct nameidata nd; 3294 int error; 3295 3296 if (length < 0) 3297 return(EINVAL); 3298 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, td); 3299 if ((error = namei(&nd)) != 0) 3300 return (error); 3301 vp = nd.ni_vp; 3302 rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); 3303 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { 3304 vn_rangelock_unlock(vp, rl_cookie); 3305 vrele(vp); 3306 return (error); 3307 } 3308 NDFREE(&nd, NDF_ONLY_PNBUF); 3309 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3310 if (vp->v_type == VDIR) 3311 error = EISDIR; 3312 #ifdef MAC 3313 else if ((error = mac_vnode_check_write(td->td_ucred, NOCRED, vp))) { 3314 } 3315 #endif 3316 else if ((error = vn_writechk(vp)) == 0 && 3317 (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) { 3318 VATTR_NULL(&vattr); 3319 vattr.va_size = length; 3320 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3321 } 3322 VOP_UNLOCK(vp, 0); 3323 vn_finished_write(mp); 3324 vn_rangelock_unlock(vp, rl_cookie); 3325 vrele(vp); 3326 return (error); 3327 } 3328 3329 #if defined(COMPAT_43) 3330 /* 3331 * Truncate a file given its path name. 3332 */ 3333 #ifndef _SYS_SYSPROTO_H_ 3334 struct otruncate_args { 3335 char *path; 3336 long length; 3337 }; 3338 #endif 3339 int 3340 otruncate(td, uap) 3341 struct thread *td; 3342 register struct otruncate_args /* { 3343 char *path; 3344 long length; 3345 } */ *uap; 3346 { 3347 struct truncate_args /* { 3348 char *path; 3349 int pad; 3350 off_t length; 3351 } */ nuap; 3352 3353 nuap.path = uap->path; 3354 nuap.length = uap->length; 3355 return (sys_truncate(td, &nuap)); 3356 } 3357 #endif /* COMPAT_43 */ 3358 3359 #if defined(COMPAT_FREEBSD6) 3360 /* Versions with the pad argument */ 3361 int 3362 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap) 3363 { 3364 struct truncate_args ouap; 3365 3366 ouap.path = uap->path; 3367 ouap.length = uap->length; 3368 return (sys_truncate(td, &ouap)); 3369 } 3370 3371 int 3372 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap) 3373 { 3374 struct ftruncate_args ouap; 3375 3376 ouap.fd = uap->fd; 3377 ouap.length = uap->length; 3378 return (sys_ftruncate(td, &ouap)); 3379 } 3380 #endif 3381 3382 int 3383 kern_fsync(struct thread *td, int fd, bool fullsync) 3384 { 3385 struct vnode *vp; 3386 struct mount *mp; 3387 struct file *fp; 3388 cap_rights_t rights; 3389 int error, lock_flags; 3390 3391 AUDIT_ARG_FD(fd); 3392 error = getvnode(td, fd, cap_rights_init(&rights, CAP_FSYNC), &fp); 3393 if (error != 0) 3394 return (error); 3395 vp = fp->f_vnode; 3396 #if 0 3397 if (!fullsync) 3398 /* XXXKIB: compete outstanding aio writes */; 3399 #endif 3400 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 3401 if (error != 0) 3402 goto drop; 3403 if (MNT_SHARED_WRITES(mp) || 3404 ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) { 3405 lock_flags = LK_SHARED; 3406 } else { 3407 lock_flags = LK_EXCLUSIVE; 3408 } 3409 vn_lock(vp, lock_flags | LK_RETRY); 3410 AUDIT_ARG_VNODE1(vp); 3411 if (vp->v_object != NULL) { 3412 VM_OBJECT_WLOCK(vp->v_object); 3413 vm_object_page_clean(vp->v_object, 0, 0, 0); 3414 VM_OBJECT_WUNLOCK(vp->v_object); 3415 } 3416 error = fullsync ? VOP_FSYNC(vp, MNT_WAIT, td) : VOP_FDATASYNC(vp, td); 3417 VOP_UNLOCK(vp, 0); 3418 vn_finished_write(mp); 3419 drop: 3420 fdrop(fp, td); 3421 return (error); 3422 } 3423 3424 /* 3425 * Sync an open file. 3426 */ 3427 #ifndef _SYS_SYSPROTO_H_ 3428 struct fsync_args { 3429 int fd; 3430 }; 3431 #endif 3432 int 3433 sys_fsync(struct thread *td, struct fsync_args *uap) 3434 { 3435 3436 return (kern_fsync(td, uap->fd, true)); 3437 } 3438 3439 int 3440 sys_fdatasync(struct thread *td, struct fdatasync_args *uap) 3441 { 3442 3443 return (kern_fsync(td, uap->fd, false)); 3444 } 3445 3446 /* 3447 * Rename files. Source and destination must either both be directories, or 3448 * both not be directories. If target is a directory, it must be empty. 3449 */ 3450 #ifndef _SYS_SYSPROTO_H_ 3451 struct rename_args { 3452 char *from; 3453 char *to; 3454 }; 3455 #endif 3456 int 3457 sys_rename(td, uap) 3458 struct thread *td; 3459 register struct rename_args /* { 3460 char *from; 3461 char *to; 3462 } */ *uap; 3463 { 3464 3465 return (kern_renameat(td, AT_FDCWD, uap->from, AT_FDCWD, 3466 uap->to, UIO_USERSPACE)); 3467 } 3468 3469 #ifndef _SYS_SYSPROTO_H_ 3470 struct renameat_args { 3471 int oldfd; 3472 char *old; 3473 int newfd; 3474 char *new; 3475 }; 3476 #endif 3477 int 3478 sys_renameat(struct thread *td, struct renameat_args *uap) 3479 { 3480 3481 return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new, 3482 UIO_USERSPACE)); 3483 } 3484 3485 int 3486 kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new, 3487 enum uio_seg pathseg) 3488 { 3489 struct mount *mp = NULL; 3490 struct vnode *tvp, *fvp, *tdvp; 3491 struct nameidata fromnd, tond; 3492 cap_rights_t rights; 3493 int error; 3494 3495 again: 3496 bwillwrite(); 3497 #ifdef MAC 3498 NDINIT_ATRIGHTS(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | 3499 AUDITVNODE1, pathseg, old, oldfd, 3500 cap_rights_init(&rights, CAP_RENAMEAT_SOURCE), td); 3501 #else 3502 NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | AUDITVNODE1, 3503 pathseg, old, oldfd, 3504 cap_rights_init(&rights, CAP_RENAMEAT_SOURCE), td); 3505 #endif 3506 3507 if ((error = namei(&fromnd)) != 0) 3508 return (error); 3509 #ifdef MAC 3510 error = mac_vnode_check_rename_from(td->td_ucred, fromnd.ni_dvp, 3511 fromnd.ni_vp, &fromnd.ni_cnd); 3512 VOP_UNLOCK(fromnd.ni_dvp, 0); 3513 if (fromnd.ni_dvp != fromnd.ni_vp) 3514 VOP_UNLOCK(fromnd.ni_vp, 0); 3515 #endif 3516 fvp = fromnd.ni_vp; 3517 NDINIT_ATRIGHTS(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | 3518 SAVESTART | AUDITVNODE2, pathseg, new, newfd, 3519 cap_rights_init(&rights, CAP_RENAMEAT_TARGET), td); 3520 if (fromnd.ni_vp->v_type == VDIR) 3521 tond.ni_cnd.cn_flags |= WILLBEDIR; 3522 if ((error = namei(&tond)) != 0) { 3523 /* Translate error code for rename("dir1", "dir2/."). */ 3524 if (error == EISDIR && fvp->v_type == VDIR) 3525 error = EINVAL; 3526 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3527 vrele(fromnd.ni_dvp); 3528 vrele(fvp); 3529 goto out1; 3530 } 3531 tdvp = tond.ni_dvp; 3532 tvp = tond.ni_vp; 3533 error = vn_start_write(fvp, &mp, V_NOWAIT); 3534 if (error != 0) { 3535 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3536 NDFREE(&tond, NDF_ONLY_PNBUF); 3537 if (tvp != NULL) 3538 vput(tvp); 3539 if (tdvp == tvp) 3540 vrele(tdvp); 3541 else 3542 vput(tdvp); 3543 vrele(fromnd.ni_dvp); 3544 vrele(fvp); 3545 vrele(tond.ni_startdir); 3546 if (fromnd.ni_startdir != NULL) 3547 vrele(fromnd.ni_startdir); 3548 error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH); 3549 if (error != 0) 3550 return (error); 3551 goto again; 3552 } 3553 if (tvp != NULL) { 3554 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3555 error = ENOTDIR; 3556 goto out; 3557 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3558 error = EISDIR; 3559 goto out; 3560 } 3561 #ifdef CAPABILITIES 3562 if (newfd != AT_FDCWD) { 3563 /* 3564 * If the target already exists we require CAP_UNLINKAT 3565 * from 'newfd'. 3566 */ 3567 error = cap_check(&tond.ni_filecaps.fc_rights, 3568 cap_rights_init(&rights, CAP_UNLINKAT)); 3569 if (error != 0) 3570 goto out; 3571 } 3572 #endif 3573 } 3574 if (fvp == tdvp) { 3575 error = EINVAL; 3576 goto out; 3577 } 3578 /* 3579 * If the source is the same as the destination (that is, if they 3580 * are links to the same vnode), then there is nothing to do. 3581 */ 3582 if (fvp == tvp) 3583 error = -1; 3584 #ifdef MAC 3585 else 3586 error = mac_vnode_check_rename_to(td->td_ucred, tdvp, 3587 tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd); 3588 #endif 3589 out: 3590 if (error == 0) { 3591 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3592 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); 3593 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3594 NDFREE(&tond, NDF_ONLY_PNBUF); 3595 } else { 3596 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3597 NDFREE(&tond, NDF_ONLY_PNBUF); 3598 if (tvp != NULL) 3599 vput(tvp); 3600 if (tdvp == tvp) 3601 vrele(tdvp); 3602 else 3603 vput(tdvp); 3604 vrele(fromnd.ni_dvp); 3605 vrele(fvp); 3606 } 3607 vrele(tond.ni_startdir); 3608 vn_finished_write(mp); 3609 out1: 3610 if (fromnd.ni_startdir) 3611 vrele(fromnd.ni_startdir); 3612 if (error == -1) 3613 return (0); 3614 return (error); 3615 } 3616 3617 /* 3618 * Make a directory file. 3619 */ 3620 #ifndef _SYS_SYSPROTO_H_ 3621 struct mkdir_args { 3622 char *path; 3623 int mode; 3624 }; 3625 #endif 3626 int 3627 sys_mkdir(td, uap) 3628 struct thread *td; 3629 register struct mkdir_args /* { 3630 char *path; 3631 int mode; 3632 } */ *uap; 3633 { 3634 3635 return (kern_mkdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3636 uap->mode)); 3637 } 3638 3639 #ifndef _SYS_SYSPROTO_H_ 3640 struct mkdirat_args { 3641 int fd; 3642 char *path; 3643 mode_t mode; 3644 }; 3645 #endif 3646 int 3647 sys_mkdirat(struct thread *td, struct mkdirat_args *uap) 3648 { 3649 3650 return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode)); 3651 } 3652 3653 int 3654 kern_mkdirat(struct thread *td, int fd, char *path, enum uio_seg segflg, 3655 int mode) 3656 { 3657 struct mount *mp; 3658 struct vnode *vp; 3659 struct vattr vattr; 3660 struct nameidata nd; 3661 cap_rights_t rights; 3662 int error; 3663 3664 AUDIT_ARG_MODE(mode); 3665 restart: 3666 bwillwrite(); 3667 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 3668 NOCACHE, segflg, path, fd, cap_rights_init(&rights, CAP_MKDIRAT), 3669 td); 3670 nd.ni_cnd.cn_flags |= WILLBEDIR; 3671 if ((error = namei(&nd)) != 0) 3672 return (error); 3673 vp = nd.ni_vp; 3674 if (vp != NULL) { 3675 NDFREE(&nd, NDF_ONLY_PNBUF); 3676 /* 3677 * XXX namei called with LOCKPARENT but not LOCKLEAF has 3678 * the strange behaviour of leaving the vnode unlocked 3679 * if the target is the same vnode as the parent. 3680 */ 3681 if (vp == nd.ni_dvp) 3682 vrele(nd.ni_dvp); 3683 else 3684 vput(nd.ni_dvp); 3685 vrele(vp); 3686 return (EEXIST); 3687 } 3688 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3689 NDFREE(&nd, NDF_ONLY_PNBUF); 3690 vput(nd.ni_dvp); 3691 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3692 return (error); 3693 goto restart; 3694 } 3695 VATTR_NULL(&vattr); 3696 vattr.va_type = VDIR; 3697 vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask; 3698 #ifdef MAC 3699 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 3700 &vattr); 3701 if (error != 0) 3702 goto out; 3703 #endif 3704 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 3705 #ifdef MAC 3706 out: 3707 #endif 3708 NDFREE(&nd, NDF_ONLY_PNBUF); 3709 vput(nd.ni_dvp); 3710 if (error == 0) 3711 vput(nd.ni_vp); 3712 vn_finished_write(mp); 3713 return (error); 3714 } 3715 3716 /* 3717 * Remove a directory file. 3718 */ 3719 #ifndef _SYS_SYSPROTO_H_ 3720 struct rmdir_args { 3721 char *path; 3722 }; 3723 #endif 3724 int 3725 sys_rmdir(td, uap) 3726 struct thread *td; 3727 struct rmdir_args /* { 3728 char *path; 3729 } */ *uap; 3730 { 3731 3732 return (kern_rmdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE)); 3733 } 3734 3735 int 3736 kern_rmdirat(struct thread *td, int fd, char *path, enum uio_seg pathseg) 3737 { 3738 struct mount *mp; 3739 struct vnode *vp; 3740 struct nameidata nd; 3741 cap_rights_t rights; 3742 int error; 3743 3744 restart: 3745 bwillwrite(); 3746 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 3747 pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td); 3748 if ((error = namei(&nd)) != 0) 3749 return (error); 3750 vp = nd.ni_vp; 3751 if (vp->v_type != VDIR) { 3752 error = ENOTDIR; 3753 goto out; 3754 } 3755 /* 3756 * No rmdir "." please. 3757 */ 3758 if (nd.ni_dvp == vp) { 3759 error = EINVAL; 3760 goto out; 3761 } 3762 /* 3763 * The root of a mounted filesystem cannot be deleted. 3764 */ 3765 if (vp->v_vflag & VV_ROOT) { 3766 error = EBUSY; 3767 goto out; 3768 } 3769 #ifdef MAC 3770 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 3771 &nd.ni_cnd); 3772 if (error != 0) 3773 goto out; 3774 #endif 3775 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3776 NDFREE(&nd, NDF_ONLY_PNBUF); 3777 vput(vp); 3778 if (nd.ni_dvp == vp) 3779 vrele(nd.ni_dvp); 3780 else 3781 vput(nd.ni_dvp); 3782 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3783 return (error); 3784 goto restart; 3785 } 3786 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 3787 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 3788 vn_finished_write(mp); 3789 out: 3790 NDFREE(&nd, NDF_ONLY_PNBUF); 3791 vput(vp); 3792 if (nd.ni_dvp == vp) 3793 vrele(nd.ni_dvp); 3794 else 3795 vput(nd.ni_dvp); 3796 return (error); 3797 } 3798 3799 #ifdef COMPAT_43 3800 /* 3801 * Read a block of directory entries in a filesystem independent format. 3802 */ 3803 #ifndef _SYS_SYSPROTO_H_ 3804 struct ogetdirentries_args { 3805 int fd; 3806 char *buf; 3807 u_int count; 3808 long *basep; 3809 }; 3810 #endif 3811 int 3812 ogetdirentries(struct thread *td, struct ogetdirentries_args *uap) 3813 { 3814 long loff; 3815 int error; 3816 3817 error = kern_ogetdirentries(td, uap, &loff); 3818 if (error == 0) 3819 error = copyout(&loff, uap->basep, sizeof(long)); 3820 return (error); 3821 } 3822 3823 int 3824 kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap, 3825 long *ploff) 3826 { 3827 struct vnode *vp; 3828 struct file *fp; 3829 struct uio auio, kuio; 3830 struct iovec aiov, kiov; 3831 struct dirent *dp, *edp; 3832 cap_rights_t rights; 3833 caddr_t dirbuf; 3834 int error, eofflag, readcnt; 3835 long loff; 3836 off_t foffset; 3837 3838 /* XXX arbitrary sanity limit on `count'. */ 3839 if (uap->count > 64 * 1024) 3840 return (EINVAL); 3841 error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_READ), &fp); 3842 if (error != 0) 3843 return (error); 3844 if ((fp->f_flag & FREAD) == 0) { 3845 fdrop(fp, td); 3846 return (EBADF); 3847 } 3848 vp = fp->f_vnode; 3849 foffset = foffset_lock(fp, 0); 3850 unionread: 3851 if (vp->v_type != VDIR) { 3852 foffset_unlock(fp, foffset, 0); 3853 fdrop(fp, td); 3854 return (EINVAL); 3855 } 3856 aiov.iov_base = uap->buf; 3857 aiov.iov_len = uap->count; 3858 auio.uio_iov = &aiov; 3859 auio.uio_iovcnt = 1; 3860 auio.uio_rw = UIO_READ; 3861 auio.uio_segflg = UIO_USERSPACE; 3862 auio.uio_td = td; 3863 auio.uio_resid = uap->count; 3864 vn_lock(vp, LK_SHARED | LK_RETRY); 3865 loff = auio.uio_offset = foffset; 3866 #ifdef MAC 3867 error = mac_vnode_check_readdir(td->td_ucred, vp); 3868 if (error != 0) { 3869 VOP_UNLOCK(vp, 0); 3870 foffset_unlock(fp, foffset, FOF_NOUPDATE); 3871 fdrop(fp, td); 3872 return (error); 3873 } 3874 #endif 3875 # if (BYTE_ORDER != LITTLE_ENDIAN) 3876 if (vp->v_mount->mnt_maxsymlinklen <= 0) { 3877 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, 3878 NULL, NULL); 3879 foffset = auio.uio_offset; 3880 } else 3881 # endif 3882 { 3883 kuio = auio; 3884 kuio.uio_iov = &kiov; 3885 kuio.uio_segflg = UIO_SYSSPACE; 3886 kiov.iov_len = uap->count; 3887 dirbuf = malloc(uap->count, M_TEMP, M_WAITOK); 3888 kiov.iov_base = dirbuf; 3889 error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag, 3890 NULL, NULL); 3891 foffset = kuio.uio_offset; 3892 if (error == 0) { 3893 readcnt = uap->count - kuio.uio_resid; 3894 edp = (struct dirent *)&dirbuf[readcnt]; 3895 for (dp = (struct dirent *)dirbuf; dp < edp; ) { 3896 # if (BYTE_ORDER == LITTLE_ENDIAN) 3897 /* 3898 * The expected low byte of 3899 * dp->d_namlen is our dp->d_type. 3900 * The high MBZ byte of dp->d_namlen 3901 * is our dp->d_namlen. 3902 */ 3903 dp->d_type = dp->d_namlen; 3904 dp->d_namlen = 0; 3905 # else 3906 /* 3907 * The dp->d_type is the high byte 3908 * of the expected dp->d_namlen, 3909 * so must be zero'ed. 3910 */ 3911 dp->d_type = 0; 3912 # endif 3913 if (dp->d_reclen > 0) { 3914 dp = (struct dirent *) 3915 ((char *)dp + dp->d_reclen); 3916 } else { 3917 error = EIO; 3918 break; 3919 } 3920 } 3921 if (dp >= edp) 3922 error = uiomove(dirbuf, readcnt, &auio); 3923 } 3924 free(dirbuf, M_TEMP); 3925 } 3926 if (error != 0) { 3927 VOP_UNLOCK(vp, 0); 3928 foffset_unlock(fp, foffset, 0); 3929 fdrop(fp, td); 3930 return (error); 3931 } 3932 if (uap->count == auio.uio_resid && 3933 (vp->v_vflag & VV_ROOT) && 3934 (vp->v_mount->mnt_flag & MNT_UNION)) { 3935 struct vnode *tvp = vp; 3936 vp = vp->v_mount->mnt_vnodecovered; 3937 VREF(vp); 3938 fp->f_vnode = vp; 3939 fp->f_data = vp; 3940 foffset = 0; 3941 vput(tvp); 3942 goto unionread; 3943 } 3944 VOP_UNLOCK(vp, 0); 3945 foffset_unlock(fp, foffset, 0); 3946 fdrop(fp, td); 3947 td->td_retval[0] = uap->count - auio.uio_resid; 3948 if (error == 0) 3949 *ploff = loff; 3950 return (error); 3951 } 3952 #endif /* COMPAT_43 */ 3953 3954 /* 3955 * Read a block of directory entries in a filesystem independent format. 3956 */ 3957 #ifndef _SYS_SYSPROTO_H_ 3958 struct getdirentries_args { 3959 int fd; 3960 char *buf; 3961 u_int count; 3962 long *basep; 3963 }; 3964 #endif 3965 int 3966 sys_getdirentries(td, uap) 3967 struct thread *td; 3968 register struct getdirentries_args /* { 3969 int fd; 3970 char *buf; 3971 u_int count; 3972 long *basep; 3973 } */ *uap; 3974 { 3975 long base; 3976 int error; 3977 3978 error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base, 3979 NULL, UIO_USERSPACE); 3980 if (error != 0) 3981 return (error); 3982 if (uap->basep != NULL) 3983 error = copyout(&base, uap->basep, sizeof(long)); 3984 return (error); 3985 } 3986 3987 int 3988 kern_getdirentries(struct thread *td, int fd, char *buf, u_int count, 3989 long *basep, ssize_t *residp, enum uio_seg bufseg) 3990 { 3991 struct vnode *vp; 3992 struct file *fp; 3993 struct uio auio; 3994 struct iovec aiov; 3995 cap_rights_t rights; 3996 long loff; 3997 int error, eofflag; 3998 off_t foffset; 3999 4000 AUDIT_ARG_FD(fd); 4001 if (count > IOSIZE_MAX) 4002 return (EINVAL); 4003 auio.uio_resid = count; 4004 error = getvnode(td, fd, cap_rights_init(&rights, CAP_READ), &fp); 4005 if (error != 0) 4006 return (error); 4007 if ((fp->f_flag & FREAD) == 0) { 4008 fdrop(fp, td); 4009 return (EBADF); 4010 } 4011 vp = fp->f_vnode; 4012 foffset = foffset_lock(fp, 0); 4013 unionread: 4014 if (vp->v_type != VDIR) { 4015 error = EINVAL; 4016 goto fail; 4017 } 4018 aiov.iov_base = buf; 4019 aiov.iov_len = count; 4020 auio.uio_iov = &aiov; 4021 auio.uio_iovcnt = 1; 4022 auio.uio_rw = UIO_READ; 4023 auio.uio_segflg = bufseg; 4024 auio.uio_td = td; 4025 vn_lock(vp, LK_SHARED | LK_RETRY); 4026 AUDIT_ARG_VNODE1(vp); 4027 loff = auio.uio_offset = foffset; 4028 #ifdef MAC 4029 error = mac_vnode_check_readdir(td->td_ucred, vp); 4030 if (error == 0) 4031 #endif 4032 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, 4033 NULL); 4034 foffset = auio.uio_offset; 4035 if (error != 0) { 4036 VOP_UNLOCK(vp, 0); 4037 goto fail; 4038 } 4039 if (count == auio.uio_resid && 4040 (vp->v_vflag & VV_ROOT) && 4041 (vp->v_mount->mnt_flag & MNT_UNION)) { 4042 struct vnode *tvp = vp; 4043 4044 vp = vp->v_mount->mnt_vnodecovered; 4045 VREF(vp); 4046 fp->f_vnode = vp; 4047 fp->f_data = vp; 4048 foffset = 0; 4049 vput(tvp); 4050 goto unionread; 4051 } 4052 VOP_UNLOCK(vp, 0); 4053 *basep = loff; 4054 if (residp != NULL) 4055 *residp = auio.uio_resid; 4056 td->td_retval[0] = count - auio.uio_resid; 4057 fail: 4058 foffset_unlock(fp, foffset, 0); 4059 fdrop(fp, td); 4060 return (error); 4061 } 4062 4063 #ifndef _SYS_SYSPROTO_H_ 4064 struct getdents_args { 4065 int fd; 4066 char *buf; 4067 size_t count; 4068 }; 4069 #endif 4070 int 4071 sys_getdents(td, uap) 4072 struct thread *td; 4073 register struct getdents_args /* { 4074 int fd; 4075 char *buf; 4076 u_int count; 4077 } */ *uap; 4078 { 4079 struct getdirentries_args ap; 4080 4081 ap.fd = uap->fd; 4082 ap.buf = uap->buf; 4083 ap.count = uap->count; 4084 ap.basep = NULL; 4085 return (sys_getdirentries(td, &ap)); 4086 } 4087 4088 /* 4089 * Set the mode mask for creation of filesystem nodes. 4090 */ 4091 #ifndef _SYS_SYSPROTO_H_ 4092 struct umask_args { 4093 int newmask; 4094 }; 4095 #endif 4096 int 4097 sys_umask(td, uap) 4098 struct thread *td; 4099 struct umask_args /* { 4100 int newmask; 4101 } */ *uap; 4102 { 4103 struct filedesc *fdp; 4104 4105 fdp = td->td_proc->p_fd; 4106 FILEDESC_XLOCK(fdp); 4107 td->td_retval[0] = fdp->fd_cmask; 4108 fdp->fd_cmask = uap->newmask & ALLPERMS; 4109 FILEDESC_XUNLOCK(fdp); 4110 return (0); 4111 } 4112 4113 /* 4114 * Void all references to file by ripping underlying filesystem away from 4115 * vnode. 4116 */ 4117 #ifndef _SYS_SYSPROTO_H_ 4118 struct revoke_args { 4119 char *path; 4120 }; 4121 #endif 4122 int 4123 sys_revoke(td, uap) 4124 struct thread *td; 4125 register struct revoke_args /* { 4126 char *path; 4127 } */ *uap; 4128 { 4129 struct vnode *vp; 4130 struct vattr vattr; 4131 struct nameidata nd; 4132 int error; 4133 4134 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4135 uap->path, td); 4136 if ((error = namei(&nd)) != 0) 4137 return (error); 4138 vp = nd.ni_vp; 4139 NDFREE(&nd, NDF_ONLY_PNBUF); 4140 if (vp->v_type != VCHR || vp->v_rdev == NULL) { 4141 error = EINVAL; 4142 goto out; 4143 } 4144 #ifdef MAC 4145 error = mac_vnode_check_revoke(td->td_ucred, vp); 4146 if (error != 0) 4147 goto out; 4148 #endif 4149 error = VOP_GETATTR(vp, &vattr, td->td_ucred); 4150 if (error != 0) 4151 goto out; 4152 if (td->td_ucred->cr_uid != vattr.va_uid) { 4153 error = priv_check(td, PRIV_VFS_ADMIN); 4154 if (error != 0) 4155 goto out; 4156 } 4157 if (vcount(vp) > 1) 4158 VOP_REVOKE(vp, REVOKEALL); 4159 out: 4160 vput(vp); 4161 return (error); 4162 } 4163 4164 /* 4165 * Convert a user file descriptor to a kernel file entry and check that, if it 4166 * is a capability, the correct rights are present. A reference on the file 4167 * entry is held upon returning. 4168 */ 4169 int 4170 getvnode(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp) 4171 { 4172 struct file *fp; 4173 int error; 4174 4175 error = fget_unlocked(td->td_proc->p_fd, fd, rightsp, &fp, NULL); 4176 if (error != 0) 4177 return (error); 4178 4179 /* 4180 * The file could be not of the vnode type, or it may be not 4181 * yet fully initialized, in which case the f_vnode pointer 4182 * may be set, but f_ops is still badfileops. E.g., 4183 * devfs_open() transiently create such situation to 4184 * facilitate csw d_fdopen(). 4185 * 4186 * Dupfdopen() handling in kern_openat() installs the 4187 * half-baked file into the process descriptor table, allowing 4188 * other thread to dereference it. Guard against the race by 4189 * checking f_ops. 4190 */ 4191 if (fp->f_vnode == NULL || fp->f_ops == &badfileops) { 4192 fdrop(fp, td); 4193 return (EINVAL); 4194 } 4195 *fpp = fp; 4196 return (0); 4197 } 4198 4199 4200 /* 4201 * Get an (NFS) file handle. 4202 */ 4203 #ifndef _SYS_SYSPROTO_H_ 4204 struct lgetfh_args { 4205 char *fname; 4206 fhandle_t *fhp; 4207 }; 4208 #endif 4209 int 4210 sys_lgetfh(td, uap) 4211 struct thread *td; 4212 register struct lgetfh_args *uap; 4213 { 4214 struct nameidata nd; 4215 fhandle_t fh; 4216 register struct vnode *vp; 4217 int error; 4218 4219 error = priv_check(td, PRIV_VFS_GETFH); 4220 if (error != 0) 4221 return (error); 4222 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4223 uap->fname, td); 4224 error = namei(&nd); 4225 if (error != 0) 4226 return (error); 4227 NDFREE(&nd, NDF_ONLY_PNBUF); 4228 vp = nd.ni_vp; 4229 bzero(&fh, sizeof(fh)); 4230 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4231 error = VOP_VPTOFH(vp, &fh.fh_fid); 4232 vput(vp); 4233 if (error == 0) 4234 error = copyout(&fh, uap->fhp, sizeof (fh)); 4235 return (error); 4236 } 4237 4238 #ifndef _SYS_SYSPROTO_H_ 4239 struct getfh_args { 4240 char *fname; 4241 fhandle_t *fhp; 4242 }; 4243 #endif 4244 int 4245 sys_getfh(td, uap) 4246 struct thread *td; 4247 register struct getfh_args *uap; 4248 { 4249 struct nameidata nd; 4250 fhandle_t fh; 4251 register struct vnode *vp; 4252 int error; 4253 4254 error = priv_check(td, PRIV_VFS_GETFH); 4255 if (error != 0) 4256 return (error); 4257 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4258 uap->fname, td); 4259 error = namei(&nd); 4260 if (error != 0) 4261 return (error); 4262 NDFREE(&nd, NDF_ONLY_PNBUF); 4263 vp = nd.ni_vp; 4264 bzero(&fh, sizeof(fh)); 4265 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4266 error = VOP_VPTOFH(vp, &fh.fh_fid); 4267 vput(vp); 4268 if (error == 0) 4269 error = copyout(&fh, uap->fhp, sizeof (fh)); 4270 return (error); 4271 } 4272 4273 /* 4274 * syscall for the rpc.lockd to use to translate a NFS file handle into an 4275 * open descriptor. 4276 * 4277 * warning: do not remove the priv_check() call or this becomes one giant 4278 * security hole. 4279 */ 4280 #ifndef _SYS_SYSPROTO_H_ 4281 struct fhopen_args { 4282 const struct fhandle *u_fhp; 4283 int flags; 4284 }; 4285 #endif 4286 int 4287 sys_fhopen(td, uap) 4288 struct thread *td; 4289 struct fhopen_args /* { 4290 const struct fhandle *u_fhp; 4291 int flags; 4292 } */ *uap; 4293 { 4294 struct mount *mp; 4295 struct vnode *vp; 4296 struct fhandle fhp; 4297 struct file *fp; 4298 int fmode, error; 4299 int indx; 4300 4301 error = priv_check(td, PRIV_VFS_FHOPEN); 4302 if (error != 0) 4303 return (error); 4304 indx = -1; 4305 fmode = FFLAGS(uap->flags); 4306 /* why not allow a non-read/write open for our lockd? */ 4307 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 4308 return (EINVAL); 4309 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 4310 if (error != 0) 4311 return(error); 4312 /* find the mount point */ 4313 mp = vfs_busyfs(&fhp.fh_fsid); 4314 if (mp == NULL) 4315 return (ESTALE); 4316 /* now give me my vnode, it gets returned to me locked */ 4317 error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp); 4318 vfs_unbusy(mp); 4319 if (error != 0) 4320 return (error); 4321 4322 error = falloc_noinstall(td, &fp); 4323 if (error != 0) { 4324 vput(vp); 4325 return (error); 4326 } 4327 /* 4328 * An extra reference on `fp' has been held for us by 4329 * falloc_noinstall(). 4330 */ 4331 4332 #ifdef INVARIANTS 4333 td->td_dupfd = -1; 4334 #endif 4335 error = vn_open_vnode(vp, fmode, td->td_ucred, td, fp); 4336 if (error != 0) { 4337 KASSERT(fp->f_ops == &badfileops, 4338 ("VOP_OPEN in fhopen() set f_ops")); 4339 KASSERT(td->td_dupfd < 0, 4340 ("fhopen() encountered fdopen()")); 4341 4342 vput(vp); 4343 goto bad; 4344 } 4345 #ifdef INVARIANTS 4346 td->td_dupfd = 0; 4347 #endif 4348 fp->f_vnode = vp; 4349 fp->f_seqcount = 1; 4350 finit(fp, (fmode & FMASK) | (fp->f_flag & FHASLOCK), DTYPE_VNODE, vp, 4351 &vnops); 4352 VOP_UNLOCK(vp, 0); 4353 if ((fmode & O_TRUNC) != 0) { 4354 error = fo_truncate(fp, 0, td->td_ucred, td); 4355 if (error != 0) 4356 goto bad; 4357 } 4358 4359 error = finstall(td, fp, &indx, fmode, NULL); 4360 bad: 4361 fdrop(fp, td); 4362 td->td_retval[0] = indx; 4363 return (error); 4364 } 4365 4366 /* 4367 * Stat an (NFS) file handle. 4368 */ 4369 #ifndef _SYS_SYSPROTO_H_ 4370 struct fhstat_args { 4371 struct fhandle *u_fhp; 4372 struct stat *sb; 4373 }; 4374 #endif 4375 int 4376 sys_fhstat(td, uap) 4377 struct thread *td; 4378 register struct fhstat_args /* { 4379 struct fhandle *u_fhp; 4380 struct stat *sb; 4381 } */ *uap; 4382 { 4383 struct stat sb; 4384 struct fhandle fh; 4385 int error; 4386 4387 error = copyin(uap->u_fhp, &fh, sizeof(fh)); 4388 if (error != 0) 4389 return (error); 4390 error = kern_fhstat(td, fh, &sb); 4391 if (error == 0) 4392 error = copyout(&sb, uap->sb, sizeof(sb)); 4393 return (error); 4394 } 4395 4396 int 4397 kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb) 4398 { 4399 struct mount *mp; 4400 struct vnode *vp; 4401 int error; 4402 4403 error = priv_check(td, PRIV_VFS_FHSTAT); 4404 if (error != 0) 4405 return (error); 4406 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4407 return (ESTALE); 4408 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4409 vfs_unbusy(mp); 4410 if (error != 0) 4411 return (error); 4412 error = vn_stat(vp, sb, td->td_ucred, NOCRED, td); 4413 vput(vp); 4414 return (error); 4415 } 4416 4417 /* 4418 * Implement fstatfs() for (NFS) file handles. 4419 */ 4420 #ifndef _SYS_SYSPROTO_H_ 4421 struct fhstatfs_args { 4422 struct fhandle *u_fhp; 4423 struct statfs *buf; 4424 }; 4425 #endif 4426 int 4427 sys_fhstatfs(td, uap) 4428 struct thread *td; 4429 struct fhstatfs_args /* { 4430 struct fhandle *u_fhp; 4431 struct statfs *buf; 4432 } */ *uap; 4433 { 4434 struct statfs sf; 4435 fhandle_t fh; 4436 int error; 4437 4438 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4439 if (error != 0) 4440 return (error); 4441 error = kern_fhstatfs(td, fh, &sf); 4442 if (error != 0) 4443 return (error); 4444 return (copyout(&sf, uap->buf, sizeof(sf))); 4445 } 4446 4447 int 4448 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf) 4449 { 4450 struct statfs *sp; 4451 struct mount *mp; 4452 struct vnode *vp; 4453 int error; 4454 4455 error = priv_check(td, PRIV_VFS_FHSTATFS); 4456 if (error != 0) 4457 return (error); 4458 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4459 return (ESTALE); 4460 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4461 if (error != 0) { 4462 vfs_unbusy(mp); 4463 return (error); 4464 } 4465 vput(vp); 4466 error = prison_canseemount(td->td_ucred, mp); 4467 if (error != 0) 4468 goto out; 4469 #ifdef MAC 4470 error = mac_mount_check_stat(td->td_ucred, mp); 4471 if (error != 0) 4472 goto out; 4473 #endif 4474 /* 4475 * Set these in case the underlying filesystem fails to do so. 4476 */ 4477 sp = &mp->mnt_stat; 4478 sp->f_version = STATFS_VERSION; 4479 sp->f_namemax = NAME_MAX; 4480 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 4481 error = VFS_STATFS(mp, sp); 4482 if (error == 0) 4483 *buf = *sp; 4484 out: 4485 vfs_unbusy(mp); 4486 return (error); 4487 } 4488 4489 int 4490 kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len) 4491 { 4492 struct file *fp; 4493 struct mount *mp; 4494 struct vnode *vp; 4495 cap_rights_t rights; 4496 off_t olen, ooffset; 4497 int error; 4498 4499 if (offset < 0 || len <= 0) 4500 return (EINVAL); 4501 /* Check for wrap. */ 4502 if (offset > OFF_MAX - len) 4503 return (EFBIG); 4504 error = fget(td, fd, cap_rights_init(&rights, CAP_WRITE), &fp); 4505 if (error != 0) 4506 return (error); 4507 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { 4508 error = ESPIPE; 4509 goto out; 4510 } 4511 if ((fp->f_flag & FWRITE) == 0) { 4512 error = EBADF; 4513 goto out; 4514 } 4515 if (fp->f_type != DTYPE_VNODE) { 4516 error = ENODEV; 4517 goto out; 4518 } 4519 vp = fp->f_vnode; 4520 if (vp->v_type != VREG) { 4521 error = ENODEV; 4522 goto out; 4523 } 4524 4525 /* Allocating blocks may take a long time, so iterate. */ 4526 for (;;) { 4527 olen = len; 4528 ooffset = offset; 4529 4530 bwillwrite(); 4531 mp = NULL; 4532 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 4533 if (error != 0) 4534 break; 4535 error = vn_lock(vp, LK_EXCLUSIVE); 4536 if (error != 0) { 4537 vn_finished_write(mp); 4538 break; 4539 } 4540 #ifdef MAC 4541 error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp); 4542 if (error == 0) 4543 #endif 4544 error = VOP_ALLOCATE(vp, &offset, &len); 4545 VOP_UNLOCK(vp, 0); 4546 vn_finished_write(mp); 4547 4548 if (olen + ooffset != offset + len) { 4549 panic("offset + len changed from %jx/%jx to %jx/%jx", 4550 ooffset, olen, offset, len); 4551 } 4552 if (error != 0 || len == 0) 4553 break; 4554 KASSERT(olen > len, ("Iteration did not make progress?")); 4555 maybe_yield(); 4556 } 4557 out: 4558 fdrop(fp, td); 4559 return (error); 4560 } 4561 4562 int 4563 sys_posix_fallocate(struct thread *td, struct posix_fallocate_args *uap) 4564 { 4565 int error; 4566 4567 error = kern_posix_fallocate(td, uap->fd, uap->offset, uap->len); 4568 return (kern_posix_error(td, error)); 4569 } 4570 4571 /* 4572 * Unlike madvise(2), we do not make a best effort to remember every 4573 * possible caching hint. Instead, we remember the last setting with 4574 * the exception that we will allow POSIX_FADV_NORMAL to adjust the 4575 * region of any current setting. 4576 */ 4577 int 4578 kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len, 4579 int advice) 4580 { 4581 struct fadvise_info *fa, *new; 4582 struct file *fp; 4583 struct vnode *vp; 4584 cap_rights_t rights; 4585 off_t end; 4586 int error; 4587 4588 if (offset < 0 || len < 0 || offset > OFF_MAX - len) 4589 return (EINVAL); 4590 switch (advice) { 4591 case POSIX_FADV_SEQUENTIAL: 4592 case POSIX_FADV_RANDOM: 4593 case POSIX_FADV_NOREUSE: 4594 new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK); 4595 break; 4596 case POSIX_FADV_NORMAL: 4597 case POSIX_FADV_WILLNEED: 4598 case POSIX_FADV_DONTNEED: 4599 new = NULL; 4600 break; 4601 default: 4602 return (EINVAL); 4603 } 4604 /* XXX: CAP_POSIX_FADVISE? */ 4605 error = fget(td, fd, cap_rights_init(&rights), &fp); 4606 if (error != 0) 4607 goto out; 4608 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { 4609 error = ESPIPE; 4610 goto out; 4611 } 4612 if (fp->f_type != DTYPE_VNODE) { 4613 error = ENODEV; 4614 goto out; 4615 } 4616 vp = fp->f_vnode; 4617 if (vp->v_type != VREG) { 4618 error = ENODEV; 4619 goto out; 4620 } 4621 if (len == 0) 4622 end = OFF_MAX; 4623 else 4624 end = offset + len - 1; 4625 switch (advice) { 4626 case POSIX_FADV_SEQUENTIAL: 4627 case POSIX_FADV_RANDOM: 4628 case POSIX_FADV_NOREUSE: 4629 /* 4630 * Try to merge any existing non-standard region with 4631 * this new region if possible, otherwise create a new 4632 * non-standard region for this request. 4633 */ 4634 mtx_pool_lock(mtxpool_sleep, fp); 4635 fa = fp->f_advice; 4636 if (fa != NULL && fa->fa_advice == advice && 4637 ((fa->fa_start <= end && fa->fa_end >= offset) || 4638 (end != OFF_MAX && fa->fa_start == end + 1) || 4639 (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) { 4640 if (offset < fa->fa_start) 4641 fa->fa_start = offset; 4642 if (end > fa->fa_end) 4643 fa->fa_end = end; 4644 } else { 4645 new->fa_advice = advice; 4646 new->fa_start = offset; 4647 new->fa_end = end; 4648 fp->f_advice = new; 4649 new = fa; 4650 } 4651 mtx_pool_unlock(mtxpool_sleep, fp); 4652 break; 4653 case POSIX_FADV_NORMAL: 4654 /* 4655 * If a the "normal" region overlaps with an existing 4656 * non-standard region, trim or remove the 4657 * non-standard region. 4658 */ 4659 mtx_pool_lock(mtxpool_sleep, fp); 4660 fa = fp->f_advice; 4661 if (fa != NULL) { 4662 if (offset <= fa->fa_start && end >= fa->fa_end) { 4663 new = fa; 4664 fp->f_advice = NULL; 4665 } else if (offset <= fa->fa_start && 4666 end >= fa->fa_start) 4667 fa->fa_start = end + 1; 4668 else if (offset <= fa->fa_end && end >= fa->fa_end) 4669 fa->fa_end = offset - 1; 4670 else if (offset >= fa->fa_start && end <= fa->fa_end) { 4671 /* 4672 * If the "normal" region is a middle 4673 * portion of the existing 4674 * non-standard region, just remove 4675 * the whole thing rather than picking 4676 * one side or the other to 4677 * preserve. 4678 */ 4679 new = fa; 4680 fp->f_advice = NULL; 4681 } 4682 } 4683 mtx_pool_unlock(mtxpool_sleep, fp); 4684 break; 4685 case POSIX_FADV_WILLNEED: 4686 case POSIX_FADV_DONTNEED: 4687 error = VOP_ADVISE(vp, offset, end, advice); 4688 break; 4689 } 4690 out: 4691 if (fp != NULL) 4692 fdrop(fp, td); 4693 free(new, M_FADVISE); 4694 return (error); 4695 } 4696 4697 int 4698 sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap) 4699 { 4700 int error; 4701 4702 error = kern_posix_fadvise(td, uap->fd, uap->offset, uap->len, 4703 uap->advice); 4704 return (kern_posix_error(td, error)); 4705 } 4706