1 /*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include "opt_capsicum.h" 41 #include "opt_compat.h" 42 #include "opt_ktrace.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/bio.h> 47 #include <sys/buf.h> 48 #include <sys/capsicum.h> 49 #include <sys/disk.h> 50 #include <sys/sysent.h> 51 #include <sys/malloc.h> 52 #include <sys/mount.h> 53 #include <sys/mutex.h> 54 #include <sys/sysproto.h> 55 #include <sys/namei.h> 56 #include <sys/filedesc.h> 57 #include <sys/kernel.h> 58 #include <sys/fcntl.h> 59 #include <sys/file.h> 60 #include <sys/filio.h> 61 #include <sys/limits.h> 62 #include <sys/linker.h> 63 #include <sys/rwlock.h> 64 #include <sys/sdt.h> 65 #include <sys/stat.h> 66 #include <sys/sx.h> 67 #include <sys/unistd.h> 68 #include <sys/vnode.h> 69 #include <sys/priv.h> 70 #include <sys/proc.h> 71 #include <sys/dirent.h> 72 #include <sys/jail.h> 73 #include <sys/syscallsubr.h> 74 #include <sys/sysctl.h> 75 #ifdef KTRACE 76 #include <sys/ktrace.h> 77 #endif 78 79 #include <machine/stdarg.h> 80 81 #include <security/audit/audit.h> 82 #include <security/mac/mac_framework.h> 83 84 #include <vm/vm.h> 85 #include <vm/vm_object.h> 86 #include <vm/vm_page.h> 87 #include <vm/uma.h> 88 89 #include <ufs/ufs/quota.h> 90 91 MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information"); 92 93 SDT_PROVIDER_DEFINE(vfs); 94 SDT_PROBE_DEFINE2(vfs, , stat, mode, "char *", "int"); 95 SDT_PROBE_DEFINE2(vfs, , stat, reg, "char *", "int"); 96 97 static int chroot_refuse_vdir_fds(struct filedesc *fdp); 98 static int kern_chflagsat(struct thread *td, int fd, const char *path, 99 enum uio_seg pathseg, u_long flags, int atflag); 100 static int setfflags(struct thread *td, struct vnode *, u_long); 101 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *); 102 static int getutimens(const struct timespec *, enum uio_seg, 103 struct timespec *, int *); 104 static int setutimes(struct thread *td, struct vnode *, 105 const struct timespec *, int, int); 106 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 107 struct thread *td); 108 109 /* 110 * The module initialization routine for POSIX asynchronous I/O will 111 * set this to the version of AIO that it implements. (Zero means 112 * that it is not implemented.) This value is used here by pathconf() 113 * and in kern_descrip.c by fpathconf(). 114 */ 115 int async_io_version; 116 117 /* 118 * Sync each mounted filesystem. 119 */ 120 #ifndef _SYS_SYSPROTO_H_ 121 struct sync_args { 122 int dummy; 123 }; 124 #endif 125 /* ARGSUSED */ 126 int 127 sys_sync(td, uap) 128 struct thread *td; 129 struct sync_args *uap; 130 { 131 struct mount *mp, *nmp; 132 int save; 133 134 mtx_lock(&mountlist_mtx); 135 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 136 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 137 nmp = TAILQ_NEXT(mp, mnt_list); 138 continue; 139 } 140 if ((mp->mnt_flag & MNT_RDONLY) == 0 && 141 vn_start_write(NULL, &mp, V_NOWAIT) == 0) { 142 save = curthread_pflags_set(TDP_SYNCIO); 143 vfs_msync(mp, MNT_NOWAIT); 144 VFS_SYNC(mp, MNT_NOWAIT); 145 curthread_pflags_restore(save); 146 vn_finished_write(mp); 147 } 148 mtx_lock(&mountlist_mtx); 149 nmp = TAILQ_NEXT(mp, mnt_list); 150 vfs_unbusy(mp); 151 } 152 mtx_unlock(&mountlist_mtx); 153 return (0); 154 } 155 156 /* 157 * Change filesystem quotas. 158 */ 159 #ifndef _SYS_SYSPROTO_H_ 160 struct quotactl_args { 161 char *path; 162 int cmd; 163 int uid; 164 caddr_t arg; 165 }; 166 #endif 167 int 168 sys_quotactl(td, uap) 169 struct thread *td; 170 register struct quotactl_args /* { 171 char *path; 172 int cmd; 173 int uid; 174 caddr_t arg; 175 } */ *uap; 176 { 177 struct mount *mp; 178 struct nameidata nd; 179 int error; 180 181 AUDIT_ARG_CMD(uap->cmd); 182 AUDIT_ARG_UID(uap->uid); 183 if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS)) 184 return (EPERM); 185 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 186 uap->path, td); 187 if ((error = namei(&nd)) != 0) 188 return (error); 189 NDFREE(&nd, NDF_ONLY_PNBUF); 190 mp = nd.ni_vp->v_mount; 191 vfs_ref(mp); 192 vput(nd.ni_vp); 193 error = vfs_busy(mp, 0); 194 vfs_rel(mp); 195 if (error != 0) 196 return (error); 197 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg); 198 199 /* 200 * Since quota on operation typically needs to open quota 201 * file, the Q_QUOTAON handler needs to unbusy the mount point 202 * before calling into namei. Otherwise, unmount might be 203 * started between two vfs_busy() invocations (first is our, 204 * second is from mount point cross-walk code in lookup()), 205 * causing deadlock. 206 * 207 * Require that Q_QUOTAON handles the vfs_busy() reference on 208 * its own, always returning with ubusied mount point. 209 */ 210 if ((uap->cmd >> SUBCMDSHIFT) != Q_QUOTAON) 211 vfs_unbusy(mp); 212 return (error); 213 } 214 215 /* 216 * Used by statfs conversion routines to scale the block size up if 217 * necessary so that all of the block counts are <= 'max_size'. Note 218 * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero 219 * value of 'n'. 220 */ 221 void 222 statfs_scale_blocks(struct statfs *sf, long max_size) 223 { 224 uint64_t count; 225 int shift; 226 227 KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__)); 228 229 /* 230 * Attempt to scale the block counts to give a more accurate 231 * overview to userland of the ratio of free space to used 232 * space. To do this, find the largest block count and compute 233 * a divisor that lets it fit into a signed integer <= max_size. 234 */ 235 if (sf->f_bavail < 0) 236 count = -sf->f_bavail; 237 else 238 count = sf->f_bavail; 239 count = MAX(sf->f_blocks, MAX(sf->f_bfree, count)); 240 if (count <= max_size) 241 return; 242 243 count >>= flsl(max_size); 244 shift = 0; 245 while (count > 0) { 246 shift++; 247 count >>=1; 248 } 249 250 sf->f_bsize <<= shift; 251 sf->f_blocks >>= shift; 252 sf->f_bfree >>= shift; 253 sf->f_bavail >>= shift; 254 } 255 256 /* 257 * Get filesystem statistics. 258 */ 259 #ifndef _SYS_SYSPROTO_H_ 260 struct statfs_args { 261 char *path; 262 struct statfs *buf; 263 }; 264 #endif 265 int 266 sys_statfs(td, uap) 267 struct thread *td; 268 register struct statfs_args /* { 269 char *path; 270 struct statfs *buf; 271 } */ *uap; 272 { 273 struct statfs sf; 274 int error; 275 276 error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf); 277 if (error == 0) 278 error = copyout(&sf, uap->buf, sizeof(sf)); 279 return (error); 280 } 281 282 int 283 kern_statfs(struct thread *td, char *path, enum uio_seg pathseg, 284 struct statfs *buf) 285 { 286 struct mount *mp; 287 struct statfs *sp, sb; 288 struct nameidata nd; 289 int error; 290 291 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 292 pathseg, path, td); 293 error = namei(&nd); 294 if (error != 0) 295 return (error); 296 mp = nd.ni_vp->v_mount; 297 vfs_ref(mp); 298 NDFREE(&nd, NDF_ONLY_PNBUF); 299 vput(nd.ni_vp); 300 error = vfs_busy(mp, 0); 301 vfs_rel(mp); 302 if (error != 0) 303 return (error); 304 #ifdef MAC 305 error = mac_mount_check_stat(td->td_ucred, mp); 306 if (error != 0) 307 goto out; 308 #endif 309 /* 310 * Set these in case the underlying filesystem fails to do so. 311 */ 312 sp = &mp->mnt_stat; 313 sp->f_version = STATFS_VERSION; 314 sp->f_namemax = NAME_MAX; 315 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 316 error = VFS_STATFS(mp, sp); 317 if (error != 0) 318 goto out; 319 if (priv_check(td, PRIV_VFS_GENERATION)) { 320 bcopy(sp, &sb, sizeof(sb)); 321 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 322 prison_enforce_statfs(td->td_ucred, mp, &sb); 323 sp = &sb; 324 } 325 *buf = *sp; 326 out: 327 vfs_unbusy(mp); 328 return (error); 329 } 330 331 /* 332 * Get filesystem statistics. 333 */ 334 #ifndef _SYS_SYSPROTO_H_ 335 struct fstatfs_args { 336 int fd; 337 struct statfs *buf; 338 }; 339 #endif 340 int 341 sys_fstatfs(td, uap) 342 struct thread *td; 343 register struct fstatfs_args /* { 344 int fd; 345 struct statfs *buf; 346 } */ *uap; 347 { 348 struct statfs sf; 349 int error; 350 351 error = kern_fstatfs(td, uap->fd, &sf); 352 if (error == 0) 353 error = copyout(&sf, uap->buf, sizeof(sf)); 354 return (error); 355 } 356 357 int 358 kern_fstatfs(struct thread *td, int fd, struct statfs *buf) 359 { 360 struct file *fp; 361 struct mount *mp; 362 struct statfs *sp, sb; 363 struct vnode *vp; 364 cap_rights_t rights; 365 int error; 366 367 AUDIT_ARG_FD(fd); 368 error = getvnode(td, fd, cap_rights_init(&rights, CAP_FSTATFS), &fp); 369 if (error != 0) 370 return (error); 371 vp = fp->f_vnode; 372 vn_lock(vp, LK_SHARED | LK_RETRY); 373 #ifdef AUDIT 374 AUDIT_ARG_VNODE1(vp); 375 #endif 376 mp = vp->v_mount; 377 if (mp) 378 vfs_ref(mp); 379 VOP_UNLOCK(vp, 0); 380 fdrop(fp, td); 381 if (mp == NULL) { 382 error = EBADF; 383 goto out; 384 } 385 error = vfs_busy(mp, 0); 386 vfs_rel(mp); 387 if (error != 0) 388 return (error); 389 #ifdef MAC 390 error = mac_mount_check_stat(td->td_ucred, mp); 391 if (error != 0) 392 goto out; 393 #endif 394 /* 395 * Set these in case the underlying filesystem fails to do so. 396 */ 397 sp = &mp->mnt_stat; 398 sp->f_version = STATFS_VERSION; 399 sp->f_namemax = NAME_MAX; 400 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 401 error = VFS_STATFS(mp, sp); 402 if (error != 0) 403 goto out; 404 if (priv_check(td, PRIV_VFS_GENERATION)) { 405 bcopy(sp, &sb, sizeof(sb)); 406 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 407 prison_enforce_statfs(td->td_ucred, mp, &sb); 408 sp = &sb; 409 } 410 *buf = *sp; 411 out: 412 if (mp) 413 vfs_unbusy(mp); 414 return (error); 415 } 416 417 /* 418 * Get statistics on all filesystems. 419 */ 420 #ifndef _SYS_SYSPROTO_H_ 421 struct getfsstat_args { 422 struct statfs *buf; 423 long bufsize; 424 int flags; 425 }; 426 #endif 427 int 428 sys_getfsstat(td, uap) 429 struct thread *td; 430 register struct getfsstat_args /* { 431 struct statfs *buf; 432 long bufsize; 433 int flags; 434 } */ *uap; 435 { 436 size_t count; 437 int error; 438 439 error = kern_getfsstat(td, &uap->buf, uap->bufsize, &count, 440 UIO_USERSPACE, uap->flags); 441 if (error == 0) 442 td->td_retval[0] = count; 443 return (error); 444 } 445 446 /* 447 * If (bufsize > 0 && bufseg == UIO_SYSSPACE) 448 * The caller is responsible for freeing memory which will be allocated 449 * in '*buf'. 450 */ 451 int 452 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize, 453 size_t *countp, enum uio_seg bufseg, int flags) 454 { 455 struct mount *mp, *nmp; 456 struct statfs *sfsp, *sp, sb; 457 size_t count, maxcount; 458 int error; 459 460 maxcount = bufsize / sizeof(struct statfs); 461 if (bufsize == 0) 462 sfsp = NULL; 463 else if (bufseg == UIO_USERSPACE) 464 sfsp = *buf; 465 else /* if (bufseg == UIO_SYSSPACE) */ { 466 count = 0; 467 mtx_lock(&mountlist_mtx); 468 TAILQ_FOREACH(mp, &mountlist, mnt_list) { 469 count++; 470 } 471 mtx_unlock(&mountlist_mtx); 472 if (maxcount > count) 473 maxcount = count; 474 sfsp = *buf = malloc(maxcount * sizeof(struct statfs), M_TEMP, 475 M_WAITOK); 476 } 477 count = 0; 478 mtx_lock(&mountlist_mtx); 479 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 480 if (prison_canseemount(td->td_ucred, mp) != 0) { 481 nmp = TAILQ_NEXT(mp, mnt_list); 482 continue; 483 } 484 #ifdef MAC 485 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 486 nmp = TAILQ_NEXT(mp, mnt_list); 487 continue; 488 } 489 #endif 490 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 491 nmp = TAILQ_NEXT(mp, mnt_list); 492 continue; 493 } 494 if (sfsp && count < maxcount) { 495 sp = &mp->mnt_stat; 496 /* 497 * Set these in case the underlying filesystem 498 * fails to do so. 499 */ 500 sp->f_version = STATFS_VERSION; 501 sp->f_namemax = NAME_MAX; 502 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 503 /* 504 * If MNT_NOWAIT or MNT_LAZY is specified, do not 505 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 506 * overrides MNT_WAIT. 507 */ 508 if (((flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 509 (flags & MNT_WAIT)) && 510 (error = VFS_STATFS(mp, sp))) { 511 mtx_lock(&mountlist_mtx); 512 nmp = TAILQ_NEXT(mp, mnt_list); 513 vfs_unbusy(mp); 514 continue; 515 } 516 if (priv_check(td, PRIV_VFS_GENERATION)) { 517 bcopy(sp, &sb, sizeof(sb)); 518 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 519 prison_enforce_statfs(td->td_ucred, mp, &sb); 520 sp = &sb; 521 } 522 if (bufseg == UIO_SYSSPACE) 523 bcopy(sp, sfsp, sizeof(*sp)); 524 else /* if (bufseg == UIO_USERSPACE) */ { 525 error = copyout(sp, sfsp, sizeof(*sp)); 526 if (error != 0) { 527 vfs_unbusy(mp); 528 return (error); 529 } 530 } 531 sfsp++; 532 } 533 count++; 534 mtx_lock(&mountlist_mtx); 535 nmp = TAILQ_NEXT(mp, mnt_list); 536 vfs_unbusy(mp); 537 } 538 mtx_unlock(&mountlist_mtx); 539 if (sfsp && count > maxcount) 540 *countp = maxcount; 541 else 542 *countp = count; 543 return (0); 544 } 545 546 #ifdef COMPAT_FREEBSD4 547 /* 548 * Get old format filesystem statistics. 549 */ 550 static void cvtstatfs(struct statfs *, struct ostatfs *); 551 552 #ifndef _SYS_SYSPROTO_H_ 553 struct freebsd4_statfs_args { 554 char *path; 555 struct ostatfs *buf; 556 }; 557 #endif 558 int 559 freebsd4_statfs(td, uap) 560 struct thread *td; 561 struct freebsd4_statfs_args /* { 562 char *path; 563 struct ostatfs *buf; 564 } */ *uap; 565 { 566 struct ostatfs osb; 567 struct statfs sf; 568 int error; 569 570 error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf); 571 if (error != 0) 572 return (error); 573 cvtstatfs(&sf, &osb); 574 return (copyout(&osb, uap->buf, sizeof(osb))); 575 } 576 577 /* 578 * Get filesystem statistics. 579 */ 580 #ifndef _SYS_SYSPROTO_H_ 581 struct freebsd4_fstatfs_args { 582 int fd; 583 struct ostatfs *buf; 584 }; 585 #endif 586 int 587 freebsd4_fstatfs(td, uap) 588 struct thread *td; 589 struct freebsd4_fstatfs_args /* { 590 int fd; 591 struct ostatfs *buf; 592 } */ *uap; 593 { 594 struct ostatfs osb; 595 struct statfs sf; 596 int error; 597 598 error = kern_fstatfs(td, uap->fd, &sf); 599 if (error != 0) 600 return (error); 601 cvtstatfs(&sf, &osb); 602 return (copyout(&osb, uap->buf, sizeof(osb))); 603 } 604 605 /* 606 * Get statistics on all filesystems. 607 */ 608 #ifndef _SYS_SYSPROTO_H_ 609 struct freebsd4_getfsstat_args { 610 struct ostatfs *buf; 611 long bufsize; 612 int flags; 613 }; 614 #endif 615 int 616 freebsd4_getfsstat(td, uap) 617 struct thread *td; 618 register struct freebsd4_getfsstat_args /* { 619 struct ostatfs *buf; 620 long bufsize; 621 int flags; 622 } */ *uap; 623 { 624 struct statfs *buf, *sp; 625 struct ostatfs osb; 626 size_t count, size; 627 int error; 628 629 count = uap->bufsize / sizeof(struct ostatfs); 630 size = count * sizeof(struct statfs); 631 error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, 632 uap->flags); 633 if (size > 0) { 634 sp = buf; 635 while (count > 0 && error == 0) { 636 cvtstatfs(sp, &osb); 637 error = copyout(&osb, uap->buf, sizeof(osb)); 638 sp++; 639 uap->buf++; 640 count--; 641 } 642 free(buf, M_TEMP); 643 } 644 if (error == 0) 645 td->td_retval[0] = count; 646 return (error); 647 } 648 649 /* 650 * Implement fstatfs() for (NFS) file handles. 651 */ 652 #ifndef _SYS_SYSPROTO_H_ 653 struct freebsd4_fhstatfs_args { 654 struct fhandle *u_fhp; 655 struct ostatfs *buf; 656 }; 657 #endif 658 int 659 freebsd4_fhstatfs(td, uap) 660 struct thread *td; 661 struct freebsd4_fhstatfs_args /* { 662 struct fhandle *u_fhp; 663 struct ostatfs *buf; 664 } */ *uap; 665 { 666 struct ostatfs osb; 667 struct statfs sf; 668 fhandle_t fh; 669 int error; 670 671 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 672 if (error != 0) 673 return (error); 674 error = kern_fhstatfs(td, fh, &sf); 675 if (error != 0) 676 return (error); 677 cvtstatfs(&sf, &osb); 678 return (copyout(&osb, uap->buf, sizeof(osb))); 679 } 680 681 /* 682 * Convert a new format statfs structure to an old format statfs structure. 683 */ 684 static void 685 cvtstatfs(nsp, osp) 686 struct statfs *nsp; 687 struct ostatfs *osp; 688 { 689 690 statfs_scale_blocks(nsp, LONG_MAX); 691 bzero(osp, sizeof(*osp)); 692 osp->f_bsize = nsp->f_bsize; 693 osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX); 694 osp->f_blocks = nsp->f_blocks; 695 osp->f_bfree = nsp->f_bfree; 696 osp->f_bavail = nsp->f_bavail; 697 osp->f_files = MIN(nsp->f_files, LONG_MAX); 698 osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX); 699 osp->f_owner = nsp->f_owner; 700 osp->f_type = nsp->f_type; 701 osp->f_flags = nsp->f_flags; 702 osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX); 703 osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX); 704 osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX); 705 osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX); 706 strlcpy(osp->f_fstypename, nsp->f_fstypename, 707 MIN(MFSNAMELEN, OMFSNAMELEN)); 708 strlcpy(osp->f_mntonname, nsp->f_mntonname, 709 MIN(MNAMELEN, OMNAMELEN)); 710 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 711 MIN(MNAMELEN, OMNAMELEN)); 712 osp->f_fsid = nsp->f_fsid; 713 } 714 #endif /* COMPAT_FREEBSD4 */ 715 716 /* 717 * Change current working directory to a given file descriptor. 718 */ 719 #ifndef _SYS_SYSPROTO_H_ 720 struct fchdir_args { 721 int fd; 722 }; 723 #endif 724 int 725 sys_fchdir(td, uap) 726 struct thread *td; 727 struct fchdir_args /* { 728 int fd; 729 } */ *uap; 730 { 731 register struct filedesc *fdp = td->td_proc->p_fd; 732 struct vnode *vp, *tdp, *vpold; 733 struct mount *mp; 734 struct file *fp; 735 cap_rights_t rights; 736 int error; 737 738 AUDIT_ARG_FD(uap->fd); 739 error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_FCHDIR), 740 &fp); 741 if (error != 0) 742 return (error); 743 vp = fp->f_vnode; 744 VREF(vp); 745 fdrop(fp, td); 746 vn_lock(vp, LK_SHARED | LK_RETRY); 747 AUDIT_ARG_VNODE1(vp); 748 error = change_dir(vp, td); 749 while (!error && (mp = vp->v_mountedhere) != NULL) { 750 if (vfs_busy(mp, 0)) 751 continue; 752 error = VFS_ROOT(mp, LK_SHARED, &tdp); 753 vfs_unbusy(mp); 754 if (error != 0) 755 break; 756 vput(vp); 757 vp = tdp; 758 } 759 if (error != 0) { 760 vput(vp); 761 return (error); 762 } 763 VOP_UNLOCK(vp, 0); 764 FILEDESC_XLOCK(fdp); 765 vpold = fdp->fd_cdir; 766 fdp->fd_cdir = vp; 767 FILEDESC_XUNLOCK(fdp); 768 vrele(vpold); 769 return (0); 770 } 771 772 /* 773 * Change current working directory (``.''). 774 */ 775 #ifndef _SYS_SYSPROTO_H_ 776 struct chdir_args { 777 char *path; 778 }; 779 #endif 780 int 781 sys_chdir(td, uap) 782 struct thread *td; 783 struct chdir_args /* { 784 char *path; 785 } */ *uap; 786 { 787 788 return (kern_chdir(td, uap->path, UIO_USERSPACE)); 789 } 790 791 int 792 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg) 793 { 794 register struct filedesc *fdp = td->td_proc->p_fd; 795 struct nameidata nd; 796 struct vnode *vp; 797 int error; 798 799 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 800 pathseg, path, td); 801 if ((error = namei(&nd)) != 0) 802 return (error); 803 if ((error = change_dir(nd.ni_vp, td)) != 0) { 804 vput(nd.ni_vp); 805 NDFREE(&nd, NDF_ONLY_PNBUF); 806 return (error); 807 } 808 VOP_UNLOCK(nd.ni_vp, 0); 809 NDFREE(&nd, NDF_ONLY_PNBUF); 810 FILEDESC_XLOCK(fdp); 811 vp = fdp->fd_cdir; 812 fdp->fd_cdir = nd.ni_vp; 813 FILEDESC_XUNLOCK(fdp); 814 vrele(vp); 815 return (0); 816 } 817 818 /* 819 * Helper function for raised chroot(2) security function: Refuse if 820 * any filedescriptors are open directories. 821 */ 822 static int 823 chroot_refuse_vdir_fds(fdp) 824 struct filedesc *fdp; 825 { 826 struct vnode *vp; 827 struct file *fp; 828 int fd; 829 830 FILEDESC_LOCK_ASSERT(fdp); 831 832 for (fd = 0; fd <= fdp->fd_lastfile; fd++) { 833 fp = fget_locked(fdp, fd); 834 if (fp == NULL) 835 continue; 836 if (fp->f_type == DTYPE_VNODE) { 837 vp = fp->f_vnode; 838 if (vp->v_type == VDIR) 839 return (EPERM); 840 } 841 } 842 return (0); 843 } 844 845 /* 846 * This sysctl determines if we will allow a process to chroot(2) if it 847 * has a directory open: 848 * 0: disallowed for all processes. 849 * 1: allowed for processes that were not already chroot(2)'ed. 850 * 2: allowed for all processes. 851 */ 852 853 static int chroot_allow_open_directories = 1; 854 855 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW, 856 &chroot_allow_open_directories, 0, 857 "Allow a process to chroot(2) if it has a directory open"); 858 859 /* 860 * Change notion of root (``/'') directory. 861 */ 862 #ifndef _SYS_SYSPROTO_H_ 863 struct chroot_args { 864 char *path; 865 }; 866 #endif 867 int 868 sys_chroot(td, uap) 869 struct thread *td; 870 struct chroot_args /* { 871 char *path; 872 } */ *uap; 873 { 874 struct nameidata nd; 875 int error; 876 877 error = priv_check(td, PRIV_VFS_CHROOT); 878 if (error != 0) 879 return (error); 880 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 881 UIO_USERSPACE, uap->path, td); 882 error = namei(&nd); 883 if (error != 0) 884 goto error; 885 error = change_dir(nd.ni_vp, td); 886 if (error != 0) 887 goto e_vunlock; 888 #ifdef MAC 889 error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp); 890 if (error != 0) 891 goto e_vunlock; 892 #endif 893 VOP_UNLOCK(nd.ni_vp, 0); 894 error = change_root(nd.ni_vp, td); 895 vrele(nd.ni_vp); 896 NDFREE(&nd, NDF_ONLY_PNBUF); 897 return (error); 898 e_vunlock: 899 vput(nd.ni_vp); 900 error: 901 NDFREE(&nd, NDF_ONLY_PNBUF); 902 return (error); 903 } 904 905 /* 906 * Common routine for chroot and chdir. Callers must provide a locked vnode 907 * instance. 908 */ 909 int 910 change_dir(vp, td) 911 struct vnode *vp; 912 struct thread *td; 913 { 914 #ifdef MAC 915 int error; 916 #endif 917 918 ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked"); 919 if (vp->v_type != VDIR) 920 return (ENOTDIR); 921 #ifdef MAC 922 error = mac_vnode_check_chdir(td->td_ucred, vp); 923 if (error != 0) 924 return (error); 925 #endif 926 return (VOP_ACCESS(vp, VEXEC, td->td_ucred, td)); 927 } 928 929 /* 930 * Common routine for kern_chroot() and jail_attach(). The caller is 931 * responsible for invoking priv_check() and mac_vnode_check_chroot() to 932 * authorize this operation. 933 */ 934 int 935 change_root(vp, td) 936 struct vnode *vp; 937 struct thread *td; 938 { 939 struct filedesc *fdp; 940 struct vnode *oldvp; 941 int error; 942 943 fdp = td->td_proc->p_fd; 944 FILEDESC_XLOCK(fdp); 945 if (chroot_allow_open_directories == 0 || 946 (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) { 947 error = chroot_refuse_vdir_fds(fdp); 948 if (error != 0) { 949 FILEDESC_XUNLOCK(fdp); 950 return (error); 951 } 952 } 953 oldvp = fdp->fd_rdir; 954 fdp->fd_rdir = vp; 955 VREF(fdp->fd_rdir); 956 if (!fdp->fd_jdir) { 957 fdp->fd_jdir = vp; 958 VREF(fdp->fd_jdir); 959 } 960 FILEDESC_XUNLOCK(fdp); 961 vrele(oldvp); 962 return (0); 963 } 964 965 static __inline void 966 flags_to_rights(int flags, cap_rights_t *rightsp) 967 { 968 969 if (flags & O_EXEC) { 970 cap_rights_set(rightsp, CAP_FEXECVE); 971 } else { 972 switch ((flags & O_ACCMODE)) { 973 case O_RDONLY: 974 cap_rights_set(rightsp, CAP_READ); 975 break; 976 case O_RDWR: 977 cap_rights_set(rightsp, CAP_READ); 978 /* FALLTHROUGH */ 979 case O_WRONLY: 980 cap_rights_set(rightsp, CAP_WRITE); 981 if (!(flags & (O_APPEND | O_TRUNC))) 982 cap_rights_set(rightsp, CAP_SEEK); 983 break; 984 } 985 } 986 987 if (flags & O_CREAT) 988 cap_rights_set(rightsp, CAP_CREATE); 989 990 if (flags & O_TRUNC) 991 cap_rights_set(rightsp, CAP_FTRUNCATE); 992 993 if (flags & (O_SYNC | O_FSYNC)) 994 cap_rights_set(rightsp, CAP_FSYNC); 995 996 if (flags & (O_EXLOCK | O_SHLOCK)) 997 cap_rights_set(rightsp, CAP_FLOCK); 998 } 999 1000 /* 1001 * Check permissions, allocate an open file structure, and call the device 1002 * open routine if any. 1003 */ 1004 #ifndef _SYS_SYSPROTO_H_ 1005 struct open_args { 1006 char *path; 1007 int flags; 1008 int mode; 1009 }; 1010 #endif 1011 int 1012 sys_open(td, uap) 1013 struct thread *td; 1014 register struct open_args /* { 1015 char *path; 1016 int flags; 1017 int mode; 1018 } */ *uap; 1019 { 1020 1021 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1022 uap->flags, uap->mode)); 1023 } 1024 1025 #ifndef _SYS_SYSPROTO_H_ 1026 struct openat_args { 1027 int fd; 1028 char *path; 1029 int flag; 1030 int mode; 1031 }; 1032 #endif 1033 int 1034 sys_openat(struct thread *td, struct openat_args *uap) 1035 { 1036 1037 return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 1038 uap->mode)); 1039 } 1040 1041 int 1042 kern_openat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1043 int flags, int mode) 1044 { 1045 struct proc *p = td->td_proc; 1046 struct filedesc *fdp = p->p_fd; 1047 struct file *fp; 1048 struct vnode *vp; 1049 struct nameidata nd; 1050 cap_rights_t rights; 1051 int cmode, error, indx; 1052 1053 indx = -1; 1054 1055 AUDIT_ARG_FFLAGS(flags); 1056 AUDIT_ARG_MODE(mode); 1057 /* XXX: audit dirfd */ 1058 cap_rights_init(&rights, CAP_LOOKUP); 1059 flags_to_rights(flags, &rights); 1060 /* 1061 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags 1062 * may be specified. 1063 */ 1064 if (flags & O_EXEC) { 1065 if (flags & O_ACCMODE) 1066 return (EINVAL); 1067 } else if ((flags & O_ACCMODE) == O_ACCMODE) { 1068 return (EINVAL); 1069 } else { 1070 flags = FFLAGS(flags); 1071 } 1072 1073 /* 1074 * Allocate the file descriptor, but don't install a descriptor yet. 1075 */ 1076 error = falloc_noinstall(td, &fp); 1077 if (error != 0) 1078 return (error); 1079 /* 1080 * An extra reference on `fp' has been held for us by 1081 * falloc_noinstall(). 1082 */ 1083 /* Set the flags early so the finit in devfs can pick them up. */ 1084 fp->f_flag = flags & FMASK; 1085 cmode = ((mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; 1086 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 1087 &rights, td); 1088 td->td_dupfd = -1; /* XXX check for fdopen */ 1089 error = vn_open(&nd, &flags, cmode, fp); 1090 if (error != 0) { 1091 /* 1092 * If the vn_open replaced the method vector, something 1093 * wonderous happened deep below and we just pass it up 1094 * pretending we know what we do. 1095 */ 1096 if (error == ENXIO && fp->f_ops != &badfileops) 1097 goto success; 1098 1099 /* 1100 * Handle special fdopen() case. bleh. 1101 * 1102 * Don't do this for relative (capability) lookups; we don't 1103 * understand exactly what would happen, and we don't think 1104 * that it ever should. 1105 */ 1106 if (nd.ni_strictrelative == 0 && 1107 (error == ENODEV || error == ENXIO) && 1108 td->td_dupfd >= 0) { 1109 error = dupfdopen(td, fdp, td->td_dupfd, flags, error, 1110 &indx); 1111 if (error == 0) 1112 goto success; 1113 } 1114 1115 goto bad; 1116 } 1117 td->td_dupfd = 0; 1118 NDFREE(&nd, NDF_ONLY_PNBUF); 1119 vp = nd.ni_vp; 1120 1121 /* 1122 * Store the vnode, for any f_type. Typically, the vnode use 1123 * count is decremented by direct call to vn_closefile() for 1124 * files that switched type in the cdevsw fdopen() method. 1125 */ 1126 fp->f_vnode = vp; 1127 /* 1128 * If the file wasn't claimed by devfs bind it to the normal 1129 * vnode operations here. 1130 */ 1131 if (fp->f_ops == &badfileops) { 1132 KASSERT(vp->v_type != VFIFO, ("Unexpected fifo.")); 1133 fp->f_seqcount = 1; 1134 finit(fp, (flags & FMASK) | (fp->f_flag & FHASLOCK), 1135 DTYPE_VNODE, vp, &vnops); 1136 } 1137 1138 VOP_UNLOCK(vp, 0); 1139 if (flags & O_TRUNC) { 1140 error = fo_truncate(fp, 0, td->td_ucred, td); 1141 if (error != 0) 1142 goto bad; 1143 } 1144 success: 1145 /* 1146 * If we haven't already installed the FD (for dupfdopen), do so now. 1147 */ 1148 if (indx == -1) { 1149 struct filecaps *fcaps; 1150 1151 #ifdef CAPABILITIES 1152 if (nd.ni_strictrelative == 1) 1153 fcaps = &nd.ni_filecaps; 1154 else 1155 #endif 1156 fcaps = NULL; 1157 error = finstall(td, fp, &indx, flags, fcaps); 1158 /* On success finstall() consumes fcaps. */ 1159 if (error != 0) { 1160 filecaps_free(&nd.ni_filecaps); 1161 goto bad; 1162 } 1163 } else { 1164 filecaps_free(&nd.ni_filecaps); 1165 } 1166 1167 /* 1168 * Release our private reference, leaving the one associated with 1169 * the descriptor table intact. 1170 */ 1171 fdrop(fp, td); 1172 td->td_retval[0] = indx; 1173 return (0); 1174 bad: 1175 KASSERT(indx == -1, ("indx=%d, should be -1", indx)); 1176 fdrop(fp, td); 1177 return (error); 1178 } 1179 1180 #ifdef COMPAT_43 1181 /* 1182 * Create a file. 1183 */ 1184 #ifndef _SYS_SYSPROTO_H_ 1185 struct ocreat_args { 1186 char *path; 1187 int mode; 1188 }; 1189 #endif 1190 int 1191 ocreat(td, uap) 1192 struct thread *td; 1193 register struct ocreat_args /* { 1194 char *path; 1195 int mode; 1196 } */ *uap; 1197 { 1198 1199 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1200 O_WRONLY | O_CREAT | O_TRUNC, uap->mode)); 1201 } 1202 #endif /* COMPAT_43 */ 1203 1204 /* 1205 * Create a special file. 1206 */ 1207 #ifndef _SYS_SYSPROTO_H_ 1208 struct mknod_args { 1209 char *path; 1210 int mode; 1211 int dev; 1212 }; 1213 #endif 1214 int 1215 sys_mknod(td, uap) 1216 struct thread *td; 1217 register struct mknod_args /* { 1218 char *path; 1219 int mode; 1220 int dev; 1221 } */ *uap; 1222 { 1223 1224 return (kern_mknodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1225 uap->mode, uap->dev)); 1226 } 1227 1228 #ifndef _SYS_SYSPROTO_H_ 1229 struct mknodat_args { 1230 int fd; 1231 char *path; 1232 mode_t mode; 1233 dev_t dev; 1234 }; 1235 #endif 1236 int 1237 sys_mknodat(struct thread *td, struct mknodat_args *uap) 1238 { 1239 1240 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1241 uap->dev)); 1242 } 1243 1244 int 1245 kern_mknodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1246 int mode, int dev) 1247 { 1248 struct vnode *vp; 1249 struct mount *mp; 1250 struct vattr vattr; 1251 struct nameidata nd; 1252 cap_rights_t rights; 1253 int error, whiteout = 0; 1254 1255 AUDIT_ARG_MODE(mode); 1256 AUDIT_ARG_DEV(dev); 1257 switch (mode & S_IFMT) { 1258 case S_IFCHR: 1259 case S_IFBLK: 1260 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 1261 break; 1262 case S_IFMT: 1263 error = priv_check(td, PRIV_VFS_MKNOD_BAD); 1264 break; 1265 case S_IFWHT: 1266 error = priv_check(td, PRIV_VFS_MKNOD_WHT); 1267 break; 1268 case S_IFIFO: 1269 if (dev == 0) 1270 return (kern_mkfifoat(td, fd, path, pathseg, mode)); 1271 /* FALLTHROUGH */ 1272 default: 1273 error = EINVAL; 1274 break; 1275 } 1276 if (error != 0) 1277 return (error); 1278 restart: 1279 bwillwrite(); 1280 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1281 NOCACHE, pathseg, path, fd, cap_rights_init(&rights, CAP_MKNODAT), 1282 td); 1283 if ((error = namei(&nd)) != 0) 1284 return (error); 1285 vp = nd.ni_vp; 1286 if (vp != NULL) { 1287 NDFREE(&nd, NDF_ONLY_PNBUF); 1288 if (vp == nd.ni_dvp) 1289 vrele(nd.ni_dvp); 1290 else 1291 vput(nd.ni_dvp); 1292 vrele(vp); 1293 return (EEXIST); 1294 } else { 1295 VATTR_NULL(&vattr); 1296 vattr.va_mode = (mode & ALLPERMS) & 1297 ~td->td_proc->p_fd->fd_cmask; 1298 vattr.va_rdev = dev; 1299 whiteout = 0; 1300 1301 switch (mode & S_IFMT) { 1302 case S_IFMT: /* used by badsect to flag bad sectors */ 1303 vattr.va_type = VBAD; 1304 break; 1305 case S_IFCHR: 1306 vattr.va_type = VCHR; 1307 break; 1308 case S_IFBLK: 1309 vattr.va_type = VBLK; 1310 break; 1311 case S_IFWHT: 1312 whiteout = 1; 1313 break; 1314 default: 1315 panic("kern_mknod: invalid mode"); 1316 } 1317 } 1318 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1319 NDFREE(&nd, NDF_ONLY_PNBUF); 1320 vput(nd.ni_dvp); 1321 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1322 return (error); 1323 goto restart; 1324 } 1325 #ifdef MAC 1326 if (error == 0 && !whiteout) 1327 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, 1328 &nd.ni_cnd, &vattr); 1329 #endif 1330 if (error == 0) { 1331 if (whiteout) 1332 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1333 else { 1334 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1335 &nd.ni_cnd, &vattr); 1336 if (error == 0) 1337 vput(nd.ni_vp); 1338 } 1339 } 1340 NDFREE(&nd, NDF_ONLY_PNBUF); 1341 vput(nd.ni_dvp); 1342 vn_finished_write(mp); 1343 return (error); 1344 } 1345 1346 /* 1347 * Create a named pipe. 1348 */ 1349 #ifndef _SYS_SYSPROTO_H_ 1350 struct mkfifo_args { 1351 char *path; 1352 int mode; 1353 }; 1354 #endif 1355 int 1356 sys_mkfifo(td, uap) 1357 struct thread *td; 1358 register struct mkfifo_args /* { 1359 char *path; 1360 int mode; 1361 } */ *uap; 1362 { 1363 1364 return (kern_mkfifoat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1365 uap->mode)); 1366 } 1367 1368 #ifndef _SYS_SYSPROTO_H_ 1369 struct mkfifoat_args { 1370 int fd; 1371 char *path; 1372 mode_t mode; 1373 }; 1374 #endif 1375 int 1376 sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap) 1377 { 1378 1379 return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE, 1380 uap->mode)); 1381 } 1382 1383 int 1384 kern_mkfifoat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1385 int mode) 1386 { 1387 struct mount *mp; 1388 struct vattr vattr; 1389 struct nameidata nd; 1390 cap_rights_t rights; 1391 int error; 1392 1393 AUDIT_ARG_MODE(mode); 1394 restart: 1395 bwillwrite(); 1396 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1397 NOCACHE, pathseg, path, fd, cap_rights_init(&rights, CAP_MKFIFOAT), 1398 td); 1399 if ((error = namei(&nd)) != 0) 1400 return (error); 1401 if (nd.ni_vp != NULL) { 1402 NDFREE(&nd, NDF_ONLY_PNBUF); 1403 if (nd.ni_vp == nd.ni_dvp) 1404 vrele(nd.ni_dvp); 1405 else 1406 vput(nd.ni_dvp); 1407 vrele(nd.ni_vp); 1408 return (EEXIST); 1409 } 1410 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1411 NDFREE(&nd, NDF_ONLY_PNBUF); 1412 vput(nd.ni_dvp); 1413 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1414 return (error); 1415 goto restart; 1416 } 1417 VATTR_NULL(&vattr); 1418 vattr.va_type = VFIFO; 1419 vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask; 1420 #ifdef MAC 1421 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1422 &vattr); 1423 if (error != 0) 1424 goto out; 1425 #endif 1426 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 1427 if (error == 0) 1428 vput(nd.ni_vp); 1429 #ifdef MAC 1430 out: 1431 #endif 1432 vput(nd.ni_dvp); 1433 vn_finished_write(mp); 1434 NDFREE(&nd, NDF_ONLY_PNBUF); 1435 return (error); 1436 } 1437 1438 /* 1439 * Make a hard file link. 1440 */ 1441 #ifndef _SYS_SYSPROTO_H_ 1442 struct link_args { 1443 char *path; 1444 char *link; 1445 }; 1446 #endif 1447 int 1448 sys_link(td, uap) 1449 struct thread *td; 1450 register struct link_args /* { 1451 char *path; 1452 char *link; 1453 } */ *uap; 1454 { 1455 1456 return (kern_linkat(td, AT_FDCWD, AT_FDCWD, uap->path, uap->link, 1457 UIO_USERSPACE, FOLLOW)); 1458 } 1459 1460 #ifndef _SYS_SYSPROTO_H_ 1461 struct linkat_args { 1462 int fd1; 1463 char *path1; 1464 int fd2; 1465 char *path2; 1466 int flag; 1467 }; 1468 #endif 1469 int 1470 sys_linkat(struct thread *td, struct linkat_args *uap) 1471 { 1472 int flag; 1473 1474 flag = uap->flag; 1475 if (flag & ~AT_SYMLINK_FOLLOW) 1476 return (EINVAL); 1477 1478 return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2, 1479 UIO_USERSPACE, (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW)); 1480 } 1481 1482 int hardlink_check_uid = 0; 1483 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 1484 &hardlink_check_uid, 0, 1485 "Unprivileged processes cannot create hard links to files owned by other " 1486 "users"); 1487 static int hardlink_check_gid = 0; 1488 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 1489 &hardlink_check_gid, 0, 1490 "Unprivileged processes cannot create hard links to files owned by other " 1491 "groups"); 1492 1493 static int 1494 can_hardlink(struct vnode *vp, struct ucred *cred) 1495 { 1496 struct vattr va; 1497 int error; 1498 1499 if (!hardlink_check_uid && !hardlink_check_gid) 1500 return (0); 1501 1502 error = VOP_GETATTR(vp, &va, cred); 1503 if (error != 0) 1504 return (error); 1505 1506 if (hardlink_check_uid && cred->cr_uid != va.va_uid) { 1507 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1508 if (error != 0) 1509 return (error); 1510 } 1511 1512 if (hardlink_check_gid && !groupmember(va.va_gid, cred)) { 1513 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1514 if (error != 0) 1515 return (error); 1516 } 1517 1518 return (0); 1519 } 1520 1521 int 1522 kern_linkat(struct thread *td, int fd1, int fd2, char *path1, char *path2, 1523 enum uio_seg segflg, int follow) 1524 { 1525 struct vnode *vp; 1526 struct mount *mp; 1527 struct nameidata nd; 1528 cap_rights_t rights; 1529 int error; 1530 1531 again: 1532 bwillwrite(); 1533 NDINIT_AT(&nd, LOOKUP, follow | AUDITVNODE1, segflg, path1, fd1, td); 1534 1535 if ((error = namei(&nd)) != 0) 1536 return (error); 1537 NDFREE(&nd, NDF_ONLY_PNBUF); 1538 vp = nd.ni_vp; 1539 if (vp->v_type == VDIR) { 1540 vrele(vp); 1541 return (EPERM); /* POSIX */ 1542 } 1543 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE2 | 1544 NOCACHE, segflg, path2, fd2, cap_rights_init(&rights, CAP_LINKAT), 1545 td); 1546 if ((error = namei(&nd)) == 0) { 1547 if (nd.ni_vp != NULL) { 1548 NDFREE(&nd, NDF_ONLY_PNBUF); 1549 if (nd.ni_dvp == nd.ni_vp) 1550 vrele(nd.ni_dvp); 1551 else 1552 vput(nd.ni_dvp); 1553 vrele(nd.ni_vp); 1554 vrele(vp); 1555 return (EEXIST); 1556 } else if (nd.ni_dvp->v_mount != vp->v_mount) { 1557 /* 1558 * Cross-device link. No need to recheck 1559 * vp->v_type, since it cannot change, except 1560 * to VBAD. 1561 */ 1562 NDFREE(&nd, NDF_ONLY_PNBUF); 1563 vput(nd.ni_dvp); 1564 vrele(vp); 1565 return (EXDEV); 1566 } else if ((error = vn_lock(vp, LK_EXCLUSIVE)) == 0) { 1567 error = can_hardlink(vp, td->td_ucred); 1568 #ifdef MAC 1569 if (error == 0) 1570 error = mac_vnode_check_link(td->td_ucred, 1571 nd.ni_dvp, vp, &nd.ni_cnd); 1572 #endif 1573 if (error != 0) { 1574 vput(vp); 1575 vput(nd.ni_dvp); 1576 NDFREE(&nd, NDF_ONLY_PNBUF); 1577 return (error); 1578 } 1579 error = vn_start_write(vp, &mp, V_NOWAIT); 1580 if (error != 0) { 1581 vput(vp); 1582 vput(nd.ni_dvp); 1583 NDFREE(&nd, NDF_ONLY_PNBUF); 1584 error = vn_start_write(NULL, &mp, 1585 V_XSLEEP | PCATCH); 1586 if (error != 0) 1587 return (error); 1588 goto again; 1589 } 1590 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 1591 VOP_UNLOCK(vp, 0); 1592 vput(nd.ni_dvp); 1593 vn_finished_write(mp); 1594 NDFREE(&nd, NDF_ONLY_PNBUF); 1595 } else { 1596 vput(nd.ni_dvp); 1597 NDFREE(&nd, NDF_ONLY_PNBUF); 1598 vrele(vp); 1599 goto again; 1600 } 1601 } 1602 vrele(vp); 1603 return (error); 1604 } 1605 1606 /* 1607 * Make a symbolic link. 1608 */ 1609 #ifndef _SYS_SYSPROTO_H_ 1610 struct symlink_args { 1611 char *path; 1612 char *link; 1613 }; 1614 #endif 1615 int 1616 sys_symlink(td, uap) 1617 struct thread *td; 1618 register struct symlink_args /* { 1619 char *path; 1620 char *link; 1621 } */ *uap; 1622 { 1623 1624 return (kern_symlinkat(td, uap->path, AT_FDCWD, uap->link, 1625 UIO_USERSPACE)); 1626 } 1627 1628 #ifndef _SYS_SYSPROTO_H_ 1629 struct symlinkat_args { 1630 char *path; 1631 int fd; 1632 char *path2; 1633 }; 1634 #endif 1635 int 1636 sys_symlinkat(struct thread *td, struct symlinkat_args *uap) 1637 { 1638 1639 return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2, 1640 UIO_USERSPACE)); 1641 } 1642 1643 int 1644 kern_symlinkat(struct thread *td, char *path1, int fd, char *path2, 1645 enum uio_seg segflg) 1646 { 1647 struct mount *mp; 1648 struct vattr vattr; 1649 char *syspath; 1650 struct nameidata nd; 1651 int error; 1652 cap_rights_t rights; 1653 1654 if (segflg == UIO_SYSSPACE) { 1655 syspath = path1; 1656 } else { 1657 syspath = uma_zalloc(namei_zone, M_WAITOK); 1658 if ((error = copyinstr(path1, syspath, MAXPATHLEN, NULL)) != 0) 1659 goto out; 1660 } 1661 AUDIT_ARG_TEXT(syspath); 1662 restart: 1663 bwillwrite(); 1664 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1665 NOCACHE, segflg, path2, fd, cap_rights_init(&rights, CAP_SYMLINKAT), 1666 td); 1667 if ((error = namei(&nd)) != 0) 1668 goto out; 1669 if (nd.ni_vp) { 1670 NDFREE(&nd, NDF_ONLY_PNBUF); 1671 if (nd.ni_vp == nd.ni_dvp) 1672 vrele(nd.ni_dvp); 1673 else 1674 vput(nd.ni_dvp); 1675 vrele(nd.ni_vp); 1676 error = EEXIST; 1677 goto out; 1678 } 1679 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1680 NDFREE(&nd, NDF_ONLY_PNBUF); 1681 vput(nd.ni_dvp); 1682 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1683 goto out; 1684 goto restart; 1685 } 1686 VATTR_NULL(&vattr); 1687 vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask; 1688 #ifdef MAC 1689 vattr.va_type = VLNK; 1690 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1691 &vattr); 1692 if (error != 0) 1693 goto out2; 1694 #endif 1695 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath); 1696 if (error == 0) 1697 vput(nd.ni_vp); 1698 #ifdef MAC 1699 out2: 1700 #endif 1701 NDFREE(&nd, NDF_ONLY_PNBUF); 1702 vput(nd.ni_dvp); 1703 vn_finished_write(mp); 1704 out: 1705 if (segflg != UIO_SYSSPACE) 1706 uma_zfree(namei_zone, syspath); 1707 return (error); 1708 } 1709 1710 /* 1711 * Delete a whiteout from the filesystem. 1712 */ 1713 int 1714 sys_undelete(td, uap) 1715 struct thread *td; 1716 register struct undelete_args /* { 1717 char *path; 1718 } */ *uap; 1719 { 1720 struct mount *mp; 1721 struct nameidata nd; 1722 int error; 1723 1724 restart: 1725 bwillwrite(); 1726 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | AUDITVNODE1, 1727 UIO_USERSPACE, uap->path, td); 1728 error = namei(&nd); 1729 if (error != 0) 1730 return (error); 1731 1732 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 1733 NDFREE(&nd, NDF_ONLY_PNBUF); 1734 if (nd.ni_vp == nd.ni_dvp) 1735 vrele(nd.ni_dvp); 1736 else 1737 vput(nd.ni_dvp); 1738 if (nd.ni_vp) 1739 vrele(nd.ni_vp); 1740 return (EEXIST); 1741 } 1742 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1743 NDFREE(&nd, NDF_ONLY_PNBUF); 1744 vput(nd.ni_dvp); 1745 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1746 return (error); 1747 goto restart; 1748 } 1749 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE); 1750 NDFREE(&nd, NDF_ONLY_PNBUF); 1751 vput(nd.ni_dvp); 1752 vn_finished_write(mp); 1753 return (error); 1754 } 1755 1756 /* 1757 * Delete a name from the filesystem. 1758 */ 1759 #ifndef _SYS_SYSPROTO_H_ 1760 struct unlink_args { 1761 char *path; 1762 }; 1763 #endif 1764 int 1765 sys_unlink(td, uap) 1766 struct thread *td; 1767 struct unlink_args /* { 1768 char *path; 1769 } */ *uap; 1770 { 1771 1772 return (kern_unlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 0)); 1773 } 1774 1775 #ifndef _SYS_SYSPROTO_H_ 1776 struct unlinkat_args { 1777 int fd; 1778 char *path; 1779 int flag; 1780 }; 1781 #endif 1782 int 1783 sys_unlinkat(struct thread *td, struct unlinkat_args *uap) 1784 { 1785 int flag = uap->flag; 1786 int fd = uap->fd; 1787 char *path = uap->path; 1788 1789 if (flag & ~AT_REMOVEDIR) 1790 return (EINVAL); 1791 1792 if (flag & AT_REMOVEDIR) 1793 return (kern_rmdirat(td, fd, path, UIO_USERSPACE)); 1794 else 1795 return (kern_unlinkat(td, fd, path, UIO_USERSPACE, 0)); 1796 } 1797 1798 int 1799 kern_unlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1800 ino_t oldinum) 1801 { 1802 struct mount *mp; 1803 struct vnode *vp; 1804 struct nameidata nd; 1805 struct stat sb; 1806 cap_rights_t rights; 1807 int error; 1808 1809 restart: 1810 bwillwrite(); 1811 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 1812 pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td); 1813 if ((error = namei(&nd)) != 0) 1814 return (error == EINVAL ? EPERM : error); 1815 vp = nd.ni_vp; 1816 if (vp->v_type == VDIR && oldinum == 0) { 1817 error = EPERM; /* POSIX */ 1818 } else if (oldinum != 0 && 1819 ((error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td)) == 0) && 1820 sb.st_ino != oldinum) { 1821 error = EIDRM; /* Identifier removed */ 1822 } else { 1823 /* 1824 * The root of a mounted filesystem cannot be deleted. 1825 * 1826 * XXX: can this only be a VDIR case? 1827 */ 1828 if (vp->v_vflag & VV_ROOT) 1829 error = EBUSY; 1830 } 1831 if (error == 0) { 1832 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1833 NDFREE(&nd, NDF_ONLY_PNBUF); 1834 vput(nd.ni_dvp); 1835 if (vp == nd.ni_dvp) 1836 vrele(vp); 1837 else 1838 vput(vp); 1839 if ((error = vn_start_write(NULL, &mp, 1840 V_XSLEEP | PCATCH)) != 0) 1841 return (error); 1842 goto restart; 1843 } 1844 #ifdef MAC 1845 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 1846 &nd.ni_cnd); 1847 if (error != 0) 1848 goto out; 1849 #endif 1850 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 1851 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 1852 #ifdef MAC 1853 out: 1854 #endif 1855 vn_finished_write(mp); 1856 } 1857 NDFREE(&nd, NDF_ONLY_PNBUF); 1858 vput(nd.ni_dvp); 1859 if (vp == nd.ni_dvp) 1860 vrele(vp); 1861 else 1862 vput(vp); 1863 return (error); 1864 } 1865 1866 /* 1867 * Reposition read/write file offset. 1868 */ 1869 #ifndef _SYS_SYSPROTO_H_ 1870 struct lseek_args { 1871 int fd; 1872 int pad; 1873 off_t offset; 1874 int whence; 1875 }; 1876 #endif 1877 int 1878 sys_lseek(td, uap) 1879 struct thread *td; 1880 register struct lseek_args /* { 1881 int fd; 1882 int pad; 1883 off_t offset; 1884 int whence; 1885 } */ *uap; 1886 { 1887 struct file *fp; 1888 cap_rights_t rights; 1889 int error; 1890 1891 AUDIT_ARG_FD(uap->fd); 1892 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_SEEK), &fp); 1893 if (error != 0) 1894 return (error); 1895 error = (fp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0 ? 1896 fo_seek(fp, uap->offset, uap->whence, td) : ESPIPE; 1897 fdrop(fp, td); 1898 return (error); 1899 } 1900 1901 #if defined(COMPAT_43) 1902 /* 1903 * Reposition read/write file offset. 1904 */ 1905 #ifndef _SYS_SYSPROTO_H_ 1906 struct olseek_args { 1907 int fd; 1908 long offset; 1909 int whence; 1910 }; 1911 #endif 1912 int 1913 olseek(td, uap) 1914 struct thread *td; 1915 register struct olseek_args /* { 1916 int fd; 1917 long offset; 1918 int whence; 1919 } */ *uap; 1920 { 1921 struct lseek_args /* { 1922 int fd; 1923 int pad; 1924 off_t offset; 1925 int whence; 1926 } */ nuap; 1927 1928 nuap.fd = uap->fd; 1929 nuap.offset = uap->offset; 1930 nuap.whence = uap->whence; 1931 return (sys_lseek(td, &nuap)); 1932 } 1933 #endif /* COMPAT_43 */ 1934 1935 #if defined(COMPAT_FREEBSD6) 1936 /* Version with the 'pad' argument */ 1937 int 1938 freebsd6_lseek(td, uap) 1939 struct thread *td; 1940 register struct freebsd6_lseek_args *uap; 1941 { 1942 struct lseek_args ouap; 1943 1944 ouap.fd = uap->fd; 1945 ouap.offset = uap->offset; 1946 ouap.whence = uap->whence; 1947 return (sys_lseek(td, &ouap)); 1948 } 1949 #endif 1950 1951 /* 1952 * Check access permissions using passed credentials. 1953 */ 1954 static int 1955 vn_access(vp, user_flags, cred, td) 1956 struct vnode *vp; 1957 int user_flags; 1958 struct ucred *cred; 1959 struct thread *td; 1960 { 1961 accmode_t accmode; 1962 int error; 1963 1964 /* Flags == 0 means only check for existence. */ 1965 if (user_flags == 0) 1966 return (0); 1967 1968 accmode = 0; 1969 if (user_flags & R_OK) 1970 accmode |= VREAD; 1971 if (user_flags & W_OK) 1972 accmode |= VWRITE; 1973 if (user_flags & X_OK) 1974 accmode |= VEXEC; 1975 #ifdef MAC 1976 error = mac_vnode_check_access(cred, vp, accmode); 1977 if (error != 0) 1978 return (error); 1979 #endif 1980 if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) 1981 error = VOP_ACCESS(vp, accmode, cred, td); 1982 return (error); 1983 } 1984 1985 /* 1986 * Check access permissions using "real" credentials. 1987 */ 1988 #ifndef _SYS_SYSPROTO_H_ 1989 struct access_args { 1990 char *path; 1991 int amode; 1992 }; 1993 #endif 1994 int 1995 sys_access(td, uap) 1996 struct thread *td; 1997 register struct access_args /* { 1998 char *path; 1999 int amode; 2000 } */ *uap; 2001 { 2002 2003 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2004 0, uap->amode)); 2005 } 2006 2007 #ifndef _SYS_SYSPROTO_H_ 2008 struct faccessat_args { 2009 int dirfd; 2010 char *path; 2011 int amode; 2012 int flag; 2013 } 2014 #endif 2015 int 2016 sys_faccessat(struct thread *td, struct faccessat_args *uap) 2017 { 2018 2019 return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 2020 uap->amode)); 2021 } 2022 2023 int 2024 kern_accessat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2025 int flag, int amode) 2026 { 2027 struct ucred *cred, *usecred; 2028 struct vnode *vp; 2029 struct nameidata nd; 2030 cap_rights_t rights; 2031 int error; 2032 2033 if (flag & ~AT_EACCESS) 2034 return (EINVAL); 2035 if (amode != F_OK && (amode & ~(R_OK | W_OK | X_OK)) != 0) 2036 return (EINVAL); 2037 2038 /* 2039 * Create and modify a temporary credential instead of one that 2040 * is potentially shared (if we need one). 2041 */ 2042 cred = td->td_ucred; 2043 if ((flag & AT_EACCESS) == 0 && 2044 ((cred->cr_uid != cred->cr_ruid || 2045 cred->cr_rgid != cred->cr_groups[0]))) { 2046 usecred = crdup(cred); 2047 usecred->cr_uid = cred->cr_ruid; 2048 usecred->cr_groups[0] = cred->cr_rgid; 2049 td->td_ucred = usecred; 2050 } else 2051 usecred = cred; 2052 AUDIT_ARG_VALUE(amode); 2053 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | 2054 AUDITVNODE1, pathseg, path, fd, cap_rights_init(&rights, CAP_FSTAT), 2055 td); 2056 if ((error = namei(&nd)) != 0) 2057 goto out; 2058 vp = nd.ni_vp; 2059 2060 error = vn_access(vp, amode, usecred, td); 2061 NDFREE(&nd, NDF_ONLY_PNBUF); 2062 vput(vp); 2063 out: 2064 if (usecred != cred) { 2065 td->td_ucred = cred; 2066 crfree(usecred); 2067 } 2068 return (error); 2069 } 2070 2071 /* 2072 * Check access permissions using "effective" credentials. 2073 */ 2074 #ifndef _SYS_SYSPROTO_H_ 2075 struct eaccess_args { 2076 char *path; 2077 int amode; 2078 }; 2079 #endif 2080 int 2081 sys_eaccess(td, uap) 2082 struct thread *td; 2083 register struct eaccess_args /* { 2084 char *path; 2085 int amode; 2086 } */ *uap; 2087 { 2088 2089 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2090 AT_EACCESS, uap->amode)); 2091 } 2092 2093 #if defined(COMPAT_43) 2094 /* 2095 * Get file status; this version follows links. 2096 */ 2097 #ifndef _SYS_SYSPROTO_H_ 2098 struct ostat_args { 2099 char *path; 2100 struct ostat *ub; 2101 }; 2102 #endif 2103 int 2104 ostat(td, uap) 2105 struct thread *td; 2106 register struct ostat_args /* { 2107 char *path; 2108 struct ostat *ub; 2109 } */ *uap; 2110 { 2111 struct stat sb; 2112 struct ostat osb; 2113 int error; 2114 2115 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2116 &sb, NULL); 2117 if (error != 0) 2118 return (error); 2119 cvtstat(&sb, &osb); 2120 return (copyout(&osb, uap->ub, sizeof (osb))); 2121 } 2122 2123 /* 2124 * Get file status; this version does not follow links. 2125 */ 2126 #ifndef _SYS_SYSPROTO_H_ 2127 struct olstat_args { 2128 char *path; 2129 struct ostat *ub; 2130 }; 2131 #endif 2132 int 2133 olstat(td, uap) 2134 struct thread *td; 2135 register struct olstat_args /* { 2136 char *path; 2137 struct ostat *ub; 2138 } */ *uap; 2139 { 2140 struct stat sb; 2141 struct ostat osb; 2142 int error; 2143 2144 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2145 UIO_USERSPACE, &sb, NULL); 2146 if (error != 0) 2147 return (error); 2148 cvtstat(&sb, &osb); 2149 return (copyout(&osb, uap->ub, sizeof (osb))); 2150 } 2151 2152 /* 2153 * Convert from an old to a new stat structure. 2154 */ 2155 void 2156 cvtstat(st, ost) 2157 struct stat *st; 2158 struct ostat *ost; 2159 { 2160 2161 ost->st_dev = st->st_dev; 2162 ost->st_ino = st->st_ino; 2163 ost->st_mode = st->st_mode; 2164 ost->st_nlink = st->st_nlink; 2165 ost->st_uid = st->st_uid; 2166 ost->st_gid = st->st_gid; 2167 ost->st_rdev = st->st_rdev; 2168 if (st->st_size < (quad_t)1 << 32) 2169 ost->st_size = st->st_size; 2170 else 2171 ost->st_size = -2; 2172 ost->st_atim = st->st_atim; 2173 ost->st_mtim = st->st_mtim; 2174 ost->st_ctim = st->st_ctim; 2175 ost->st_blksize = st->st_blksize; 2176 ost->st_blocks = st->st_blocks; 2177 ost->st_flags = st->st_flags; 2178 ost->st_gen = st->st_gen; 2179 } 2180 #endif /* COMPAT_43 */ 2181 2182 /* 2183 * Get file status; this version follows links. 2184 */ 2185 #ifndef _SYS_SYSPROTO_H_ 2186 struct stat_args { 2187 char *path; 2188 struct stat *ub; 2189 }; 2190 #endif 2191 int 2192 sys_stat(td, uap) 2193 struct thread *td; 2194 register struct stat_args /* { 2195 char *path; 2196 struct stat *ub; 2197 } */ *uap; 2198 { 2199 struct stat sb; 2200 int error; 2201 2202 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2203 &sb, NULL); 2204 if (error == 0) 2205 error = copyout(&sb, uap->ub, sizeof (sb)); 2206 return (error); 2207 } 2208 2209 #ifndef _SYS_SYSPROTO_H_ 2210 struct fstatat_args { 2211 int fd; 2212 char *path; 2213 struct stat *buf; 2214 int flag; 2215 } 2216 #endif 2217 int 2218 sys_fstatat(struct thread *td, struct fstatat_args *uap) 2219 { 2220 struct stat sb; 2221 int error; 2222 2223 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2224 UIO_USERSPACE, &sb, NULL); 2225 if (error == 0) 2226 error = copyout(&sb, uap->buf, sizeof (sb)); 2227 return (error); 2228 } 2229 2230 int 2231 kern_statat(struct thread *td, int flag, int fd, char *path, 2232 enum uio_seg pathseg, struct stat *sbp, 2233 void (*hook)(struct vnode *vp, struct stat *sbp)) 2234 { 2235 struct nameidata nd; 2236 struct stat sb; 2237 cap_rights_t rights; 2238 int error; 2239 2240 if (flag & ~AT_SYMLINK_NOFOLLOW) 2241 return (EINVAL); 2242 2243 NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : 2244 FOLLOW) | LOCKSHARED | LOCKLEAF | AUDITVNODE1, pathseg, path, fd, 2245 cap_rights_init(&rights, CAP_FSTAT), td); 2246 2247 if ((error = namei(&nd)) != 0) 2248 return (error); 2249 error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td); 2250 if (error == 0) { 2251 SDT_PROBE(vfs, , stat, mode, path, sb.st_mode, 0, 0, 0); 2252 if (S_ISREG(sb.st_mode)) 2253 SDT_PROBE(vfs, , stat, reg, path, pathseg, 0, 0, 0); 2254 if (__predict_false(hook != NULL)) 2255 hook(nd.ni_vp, &sb); 2256 } 2257 NDFREE(&nd, NDF_ONLY_PNBUF); 2258 vput(nd.ni_vp); 2259 if (error != 0) 2260 return (error); 2261 *sbp = sb; 2262 #ifdef KTRACE 2263 if (KTRPOINT(td, KTR_STRUCT)) 2264 ktrstat(&sb); 2265 #endif 2266 return (0); 2267 } 2268 2269 /* 2270 * Get file status; this version does not follow links. 2271 */ 2272 #ifndef _SYS_SYSPROTO_H_ 2273 struct lstat_args { 2274 char *path; 2275 struct stat *ub; 2276 }; 2277 #endif 2278 int 2279 sys_lstat(td, uap) 2280 struct thread *td; 2281 register struct lstat_args /* { 2282 char *path; 2283 struct stat *ub; 2284 } */ *uap; 2285 { 2286 struct stat sb; 2287 int error; 2288 2289 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2290 UIO_USERSPACE, &sb, NULL); 2291 if (error == 0) 2292 error = copyout(&sb, uap->ub, sizeof (sb)); 2293 return (error); 2294 } 2295 2296 /* 2297 * Implementation of the NetBSD [l]stat() functions. 2298 */ 2299 void 2300 cvtnstat(sb, nsb) 2301 struct stat *sb; 2302 struct nstat *nsb; 2303 { 2304 2305 bzero(nsb, sizeof *nsb); 2306 nsb->st_dev = sb->st_dev; 2307 nsb->st_ino = sb->st_ino; 2308 nsb->st_mode = sb->st_mode; 2309 nsb->st_nlink = sb->st_nlink; 2310 nsb->st_uid = sb->st_uid; 2311 nsb->st_gid = sb->st_gid; 2312 nsb->st_rdev = sb->st_rdev; 2313 nsb->st_atim = sb->st_atim; 2314 nsb->st_mtim = sb->st_mtim; 2315 nsb->st_ctim = sb->st_ctim; 2316 nsb->st_size = sb->st_size; 2317 nsb->st_blocks = sb->st_blocks; 2318 nsb->st_blksize = sb->st_blksize; 2319 nsb->st_flags = sb->st_flags; 2320 nsb->st_gen = sb->st_gen; 2321 nsb->st_birthtim = sb->st_birthtim; 2322 } 2323 2324 #ifndef _SYS_SYSPROTO_H_ 2325 struct nstat_args { 2326 char *path; 2327 struct nstat *ub; 2328 }; 2329 #endif 2330 int 2331 sys_nstat(td, uap) 2332 struct thread *td; 2333 register struct nstat_args /* { 2334 char *path; 2335 struct nstat *ub; 2336 } */ *uap; 2337 { 2338 struct stat sb; 2339 struct nstat nsb; 2340 int error; 2341 2342 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2343 &sb, NULL); 2344 if (error != 0) 2345 return (error); 2346 cvtnstat(&sb, &nsb); 2347 return (copyout(&nsb, uap->ub, sizeof (nsb))); 2348 } 2349 2350 /* 2351 * NetBSD lstat. Get file status; this version does not follow links. 2352 */ 2353 #ifndef _SYS_SYSPROTO_H_ 2354 struct lstat_args { 2355 char *path; 2356 struct stat *ub; 2357 }; 2358 #endif 2359 int 2360 sys_nlstat(td, uap) 2361 struct thread *td; 2362 register struct nlstat_args /* { 2363 char *path; 2364 struct nstat *ub; 2365 } */ *uap; 2366 { 2367 struct stat sb; 2368 struct nstat nsb; 2369 int error; 2370 2371 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2372 UIO_USERSPACE, &sb, NULL); 2373 if (error != 0) 2374 return (error); 2375 cvtnstat(&sb, &nsb); 2376 return (copyout(&nsb, uap->ub, sizeof (nsb))); 2377 } 2378 2379 /* 2380 * Get configurable pathname variables. 2381 */ 2382 #ifndef _SYS_SYSPROTO_H_ 2383 struct pathconf_args { 2384 char *path; 2385 int name; 2386 }; 2387 #endif 2388 int 2389 sys_pathconf(td, uap) 2390 struct thread *td; 2391 register struct pathconf_args /* { 2392 char *path; 2393 int name; 2394 } */ *uap; 2395 { 2396 2397 return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW)); 2398 } 2399 2400 #ifndef _SYS_SYSPROTO_H_ 2401 struct lpathconf_args { 2402 char *path; 2403 int name; 2404 }; 2405 #endif 2406 int 2407 sys_lpathconf(td, uap) 2408 struct thread *td; 2409 register struct lpathconf_args /* { 2410 char *path; 2411 int name; 2412 } */ *uap; 2413 { 2414 2415 return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, 2416 NOFOLLOW)); 2417 } 2418 2419 int 2420 kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name, 2421 u_long flags) 2422 { 2423 struct nameidata nd; 2424 int error; 2425 2426 NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | AUDITVNODE1 | flags, 2427 pathseg, path, td); 2428 if ((error = namei(&nd)) != 0) 2429 return (error); 2430 NDFREE(&nd, NDF_ONLY_PNBUF); 2431 2432 /* If asynchronous I/O is available, it works for all files. */ 2433 if (name == _PC_ASYNC_IO) 2434 td->td_retval[0] = async_io_version; 2435 else 2436 error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval); 2437 vput(nd.ni_vp); 2438 return (error); 2439 } 2440 2441 /* 2442 * Return target name of a symbolic link. 2443 */ 2444 #ifndef _SYS_SYSPROTO_H_ 2445 struct readlink_args { 2446 char *path; 2447 char *buf; 2448 size_t count; 2449 }; 2450 #endif 2451 int 2452 sys_readlink(td, uap) 2453 struct thread *td; 2454 register struct readlink_args /* { 2455 char *path; 2456 char *buf; 2457 size_t count; 2458 } */ *uap; 2459 { 2460 2461 return (kern_readlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2462 uap->buf, UIO_USERSPACE, uap->count)); 2463 } 2464 #ifndef _SYS_SYSPROTO_H_ 2465 struct readlinkat_args { 2466 int fd; 2467 char *path; 2468 char *buf; 2469 size_t bufsize; 2470 }; 2471 #endif 2472 int 2473 sys_readlinkat(struct thread *td, struct readlinkat_args *uap) 2474 { 2475 2476 return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE, 2477 uap->buf, UIO_USERSPACE, uap->bufsize)); 2478 } 2479 2480 int 2481 kern_readlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2482 char *buf, enum uio_seg bufseg, size_t count) 2483 { 2484 struct vnode *vp; 2485 struct iovec aiov; 2486 struct uio auio; 2487 struct nameidata nd; 2488 int error; 2489 2490 if (count > IOSIZE_MAX) 2491 return (EINVAL); 2492 2493 NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 2494 pathseg, path, fd, td); 2495 2496 if ((error = namei(&nd)) != 0) 2497 return (error); 2498 NDFREE(&nd, NDF_ONLY_PNBUF); 2499 vp = nd.ni_vp; 2500 #ifdef MAC 2501 error = mac_vnode_check_readlink(td->td_ucred, vp); 2502 if (error != 0) { 2503 vput(vp); 2504 return (error); 2505 } 2506 #endif 2507 if (vp->v_type != VLNK) 2508 error = EINVAL; 2509 else { 2510 aiov.iov_base = buf; 2511 aiov.iov_len = count; 2512 auio.uio_iov = &aiov; 2513 auio.uio_iovcnt = 1; 2514 auio.uio_offset = 0; 2515 auio.uio_rw = UIO_READ; 2516 auio.uio_segflg = bufseg; 2517 auio.uio_td = td; 2518 auio.uio_resid = count; 2519 error = VOP_READLINK(vp, &auio, td->td_ucred); 2520 td->td_retval[0] = count - auio.uio_resid; 2521 } 2522 vput(vp); 2523 return (error); 2524 } 2525 2526 /* 2527 * Common implementation code for chflags() and fchflags(). 2528 */ 2529 static int 2530 setfflags(td, vp, flags) 2531 struct thread *td; 2532 struct vnode *vp; 2533 u_long flags; 2534 { 2535 struct mount *mp; 2536 struct vattr vattr; 2537 int error; 2538 2539 /* We can't support the value matching VNOVAL. */ 2540 if (flags == VNOVAL) 2541 return (EOPNOTSUPP); 2542 2543 /* 2544 * Prevent non-root users from setting flags on devices. When 2545 * a device is reused, users can retain ownership of the device 2546 * if they are allowed to set flags and programs assume that 2547 * chown can't fail when done as root. 2548 */ 2549 if (vp->v_type == VCHR || vp->v_type == VBLK) { 2550 error = priv_check(td, PRIV_VFS_CHFLAGS_DEV); 2551 if (error != 0) 2552 return (error); 2553 } 2554 2555 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2556 return (error); 2557 VATTR_NULL(&vattr); 2558 vattr.va_flags = flags; 2559 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2560 #ifdef MAC 2561 error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags); 2562 if (error == 0) 2563 #endif 2564 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 2565 VOP_UNLOCK(vp, 0); 2566 vn_finished_write(mp); 2567 return (error); 2568 } 2569 2570 /* 2571 * Change flags of a file given a path name. 2572 */ 2573 #ifndef _SYS_SYSPROTO_H_ 2574 struct chflags_args { 2575 const char *path; 2576 u_long flags; 2577 }; 2578 #endif 2579 int 2580 sys_chflags(td, uap) 2581 struct thread *td; 2582 register struct chflags_args /* { 2583 const char *path; 2584 u_long flags; 2585 } */ *uap; 2586 { 2587 2588 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2589 uap->flags, 0)); 2590 } 2591 2592 #ifndef _SYS_SYSPROTO_H_ 2593 struct chflagsat_args { 2594 int fd; 2595 const char *path; 2596 u_long flags; 2597 int atflag; 2598 } 2599 #endif 2600 int 2601 sys_chflagsat(struct thread *td, struct chflagsat_args *uap) 2602 { 2603 int fd = uap->fd; 2604 const char *path = uap->path; 2605 u_long flags = uap->flags; 2606 int atflag = uap->atflag; 2607 2608 if (atflag & ~AT_SYMLINK_NOFOLLOW) 2609 return (EINVAL); 2610 2611 return (kern_chflagsat(td, fd, path, UIO_USERSPACE, flags, atflag)); 2612 } 2613 2614 /* 2615 * Same as chflags() but doesn't follow symlinks. 2616 */ 2617 int 2618 sys_lchflags(td, uap) 2619 struct thread *td; 2620 register struct lchflags_args /* { 2621 const char *path; 2622 u_long flags; 2623 } */ *uap; 2624 { 2625 2626 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2627 uap->flags, AT_SYMLINK_NOFOLLOW)); 2628 } 2629 2630 static int 2631 kern_chflagsat(struct thread *td, int fd, const char *path, 2632 enum uio_seg pathseg, u_long flags, int atflag) 2633 { 2634 struct nameidata nd; 2635 cap_rights_t rights; 2636 int error, follow; 2637 2638 AUDIT_ARG_FFLAGS(flags); 2639 follow = (atflag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2640 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2641 cap_rights_init(&rights, CAP_FCHFLAGS), td); 2642 if ((error = namei(&nd)) != 0) 2643 return (error); 2644 NDFREE(&nd, NDF_ONLY_PNBUF); 2645 error = setfflags(td, nd.ni_vp, flags); 2646 vrele(nd.ni_vp); 2647 return (error); 2648 } 2649 2650 /* 2651 * Change flags of a file given a file descriptor. 2652 */ 2653 #ifndef _SYS_SYSPROTO_H_ 2654 struct fchflags_args { 2655 int fd; 2656 u_long flags; 2657 }; 2658 #endif 2659 int 2660 sys_fchflags(td, uap) 2661 struct thread *td; 2662 register struct fchflags_args /* { 2663 int fd; 2664 u_long flags; 2665 } */ *uap; 2666 { 2667 struct file *fp; 2668 cap_rights_t rights; 2669 int error; 2670 2671 AUDIT_ARG_FD(uap->fd); 2672 AUDIT_ARG_FFLAGS(uap->flags); 2673 error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_FCHFLAGS), 2674 &fp); 2675 if (error != 0) 2676 return (error); 2677 #ifdef AUDIT 2678 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 2679 AUDIT_ARG_VNODE1(fp->f_vnode); 2680 VOP_UNLOCK(fp->f_vnode, 0); 2681 #endif 2682 error = setfflags(td, fp->f_vnode, uap->flags); 2683 fdrop(fp, td); 2684 return (error); 2685 } 2686 2687 /* 2688 * Common implementation code for chmod(), lchmod() and fchmod(). 2689 */ 2690 int 2691 setfmode(td, cred, vp, mode) 2692 struct thread *td; 2693 struct ucred *cred; 2694 struct vnode *vp; 2695 int mode; 2696 { 2697 struct mount *mp; 2698 struct vattr vattr; 2699 int error; 2700 2701 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2702 return (error); 2703 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2704 VATTR_NULL(&vattr); 2705 vattr.va_mode = mode & ALLPERMS; 2706 #ifdef MAC 2707 error = mac_vnode_check_setmode(cred, vp, vattr.va_mode); 2708 if (error == 0) 2709 #endif 2710 error = VOP_SETATTR(vp, &vattr, cred); 2711 VOP_UNLOCK(vp, 0); 2712 vn_finished_write(mp); 2713 return (error); 2714 } 2715 2716 /* 2717 * Change mode of a file given path name. 2718 */ 2719 #ifndef _SYS_SYSPROTO_H_ 2720 struct chmod_args { 2721 char *path; 2722 int mode; 2723 }; 2724 #endif 2725 int 2726 sys_chmod(td, uap) 2727 struct thread *td; 2728 register struct chmod_args /* { 2729 char *path; 2730 int mode; 2731 } */ *uap; 2732 { 2733 2734 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2735 uap->mode, 0)); 2736 } 2737 2738 #ifndef _SYS_SYSPROTO_H_ 2739 struct fchmodat_args { 2740 int dirfd; 2741 char *path; 2742 mode_t mode; 2743 int flag; 2744 } 2745 #endif 2746 int 2747 sys_fchmodat(struct thread *td, struct fchmodat_args *uap) 2748 { 2749 int flag = uap->flag; 2750 int fd = uap->fd; 2751 char *path = uap->path; 2752 mode_t mode = uap->mode; 2753 2754 if (flag & ~AT_SYMLINK_NOFOLLOW) 2755 return (EINVAL); 2756 2757 return (kern_fchmodat(td, fd, path, UIO_USERSPACE, mode, flag)); 2758 } 2759 2760 /* 2761 * Change mode of a file given path name (don't follow links.) 2762 */ 2763 #ifndef _SYS_SYSPROTO_H_ 2764 struct lchmod_args { 2765 char *path; 2766 int mode; 2767 }; 2768 #endif 2769 int 2770 sys_lchmod(td, uap) 2771 struct thread *td; 2772 register struct lchmod_args /* { 2773 char *path; 2774 int mode; 2775 } */ *uap; 2776 { 2777 2778 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2779 uap->mode, AT_SYMLINK_NOFOLLOW)); 2780 } 2781 2782 int 2783 kern_fchmodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2784 mode_t mode, int flag) 2785 { 2786 struct nameidata nd; 2787 cap_rights_t rights; 2788 int error, follow; 2789 2790 AUDIT_ARG_MODE(mode); 2791 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2792 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2793 cap_rights_init(&rights, CAP_FCHMOD), td); 2794 if ((error = namei(&nd)) != 0) 2795 return (error); 2796 NDFREE(&nd, NDF_ONLY_PNBUF); 2797 error = setfmode(td, td->td_ucred, nd.ni_vp, mode); 2798 vrele(nd.ni_vp); 2799 return (error); 2800 } 2801 2802 /* 2803 * Change mode of a file given a file descriptor. 2804 */ 2805 #ifndef _SYS_SYSPROTO_H_ 2806 struct fchmod_args { 2807 int fd; 2808 int mode; 2809 }; 2810 #endif 2811 int 2812 sys_fchmod(struct thread *td, struct fchmod_args *uap) 2813 { 2814 struct file *fp; 2815 cap_rights_t rights; 2816 int error; 2817 2818 AUDIT_ARG_FD(uap->fd); 2819 AUDIT_ARG_MODE(uap->mode); 2820 2821 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHMOD), &fp); 2822 if (error != 0) 2823 return (error); 2824 error = fo_chmod(fp, uap->mode, td->td_ucred, td); 2825 fdrop(fp, td); 2826 return (error); 2827 } 2828 2829 /* 2830 * Common implementation for chown(), lchown(), and fchown() 2831 */ 2832 int 2833 setfown(td, cred, vp, uid, gid) 2834 struct thread *td; 2835 struct ucred *cred; 2836 struct vnode *vp; 2837 uid_t uid; 2838 gid_t gid; 2839 { 2840 struct mount *mp; 2841 struct vattr vattr; 2842 int error; 2843 2844 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2845 return (error); 2846 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2847 VATTR_NULL(&vattr); 2848 vattr.va_uid = uid; 2849 vattr.va_gid = gid; 2850 #ifdef MAC 2851 error = mac_vnode_check_setowner(cred, vp, vattr.va_uid, 2852 vattr.va_gid); 2853 if (error == 0) 2854 #endif 2855 error = VOP_SETATTR(vp, &vattr, cred); 2856 VOP_UNLOCK(vp, 0); 2857 vn_finished_write(mp); 2858 return (error); 2859 } 2860 2861 /* 2862 * Set ownership given a path name. 2863 */ 2864 #ifndef _SYS_SYSPROTO_H_ 2865 struct chown_args { 2866 char *path; 2867 int uid; 2868 int gid; 2869 }; 2870 #endif 2871 int 2872 sys_chown(td, uap) 2873 struct thread *td; 2874 register struct chown_args /* { 2875 char *path; 2876 int uid; 2877 int gid; 2878 } */ *uap; 2879 { 2880 2881 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->uid, 2882 uap->gid, 0)); 2883 } 2884 2885 #ifndef _SYS_SYSPROTO_H_ 2886 struct fchownat_args { 2887 int fd; 2888 const char * path; 2889 uid_t uid; 2890 gid_t gid; 2891 int flag; 2892 }; 2893 #endif 2894 int 2895 sys_fchownat(struct thread *td, struct fchownat_args *uap) 2896 { 2897 int flag; 2898 2899 flag = uap->flag; 2900 if (flag & ~AT_SYMLINK_NOFOLLOW) 2901 return (EINVAL); 2902 2903 return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid, 2904 uap->gid, uap->flag)); 2905 } 2906 2907 int 2908 kern_fchownat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2909 int uid, int gid, int flag) 2910 { 2911 struct nameidata nd; 2912 cap_rights_t rights; 2913 int error, follow; 2914 2915 AUDIT_ARG_OWNER(uid, gid); 2916 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2917 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2918 cap_rights_init(&rights, CAP_FCHOWN), td); 2919 2920 if ((error = namei(&nd)) != 0) 2921 return (error); 2922 NDFREE(&nd, NDF_ONLY_PNBUF); 2923 error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid); 2924 vrele(nd.ni_vp); 2925 return (error); 2926 } 2927 2928 /* 2929 * Set ownership given a path name, do not cross symlinks. 2930 */ 2931 #ifndef _SYS_SYSPROTO_H_ 2932 struct lchown_args { 2933 char *path; 2934 int uid; 2935 int gid; 2936 }; 2937 #endif 2938 int 2939 sys_lchown(td, uap) 2940 struct thread *td; 2941 register struct lchown_args /* { 2942 char *path; 2943 int uid; 2944 int gid; 2945 } */ *uap; 2946 { 2947 2948 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2949 uap->uid, uap->gid, AT_SYMLINK_NOFOLLOW)); 2950 } 2951 2952 /* 2953 * Set ownership given a file descriptor. 2954 */ 2955 #ifndef _SYS_SYSPROTO_H_ 2956 struct fchown_args { 2957 int fd; 2958 int uid; 2959 int gid; 2960 }; 2961 #endif 2962 int 2963 sys_fchown(td, uap) 2964 struct thread *td; 2965 register struct fchown_args /* { 2966 int fd; 2967 int uid; 2968 int gid; 2969 } */ *uap; 2970 { 2971 struct file *fp; 2972 cap_rights_t rights; 2973 int error; 2974 2975 AUDIT_ARG_FD(uap->fd); 2976 AUDIT_ARG_OWNER(uap->uid, uap->gid); 2977 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHOWN), &fp); 2978 if (error != 0) 2979 return (error); 2980 error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td); 2981 fdrop(fp, td); 2982 return (error); 2983 } 2984 2985 /* 2986 * Common implementation code for utimes(), lutimes(), and futimes(). 2987 */ 2988 static int 2989 getutimes(usrtvp, tvpseg, tsp) 2990 const struct timeval *usrtvp; 2991 enum uio_seg tvpseg; 2992 struct timespec *tsp; 2993 { 2994 struct timeval tv[2]; 2995 const struct timeval *tvp; 2996 int error; 2997 2998 if (usrtvp == NULL) { 2999 vfs_timestamp(&tsp[0]); 3000 tsp[1] = tsp[0]; 3001 } else { 3002 if (tvpseg == UIO_SYSSPACE) { 3003 tvp = usrtvp; 3004 } else { 3005 if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0) 3006 return (error); 3007 tvp = tv; 3008 } 3009 3010 if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 || 3011 tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000) 3012 return (EINVAL); 3013 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 3014 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 3015 } 3016 return (0); 3017 } 3018 3019 /* 3020 * Common implementation code for futimens(), utimensat(). 3021 */ 3022 #define UTIMENS_NULL 0x1 3023 #define UTIMENS_EXIT 0x2 3024 static int 3025 getutimens(const struct timespec *usrtsp, enum uio_seg tspseg, 3026 struct timespec *tsp, int *retflags) 3027 { 3028 struct timespec tsnow; 3029 int error; 3030 3031 vfs_timestamp(&tsnow); 3032 *retflags = 0; 3033 if (usrtsp == NULL) { 3034 tsp[0] = tsnow; 3035 tsp[1] = tsnow; 3036 *retflags |= UTIMENS_NULL; 3037 return (0); 3038 } 3039 if (tspseg == UIO_SYSSPACE) { 3040 tsp[0] = usrtsp[0]; 3041 tsp[1] = usrtsp[1]; 3042 } else if ((error = copyin(usrtsp, tsp, sizeof(*tsp) * 2)) != 0) 3043 return (error); 3044 if (tsp[0].tv_nsec == UTIME_OMIT && tsp[1].tv_nsec == UTIME_OMIT) 3045 *retflags |= UTIMENS_EXIT; 3046 if (tsp[0].tv_nsec == UTIME_NOW && tsp[1].tv_nsec == UTIME_NOW) 3047 *retflags |= UTIMENS_NULL; 3048 if (tsp[0].tv_nsec == UTIME_OMIT) 3049 tsp[0].tv_sec = VNOVAL; 3050 else if (tsp[0].tv_nsec == UTIME_NOW) 3051 tsp[0] = tsnow; 3052 else if (tsp[0].tv_nsec < 0 || tsp[0].tv_nsec >= 1000000000L) 3053 return (EINVAL); 3054 if (tsp[1].tv_nsec == UTIME_OMIT) 3055 tsp[1].tv_sec = VNOVAL; 3056 else if (tsp[1].tv_nsec == UTIME_NOW) 3057 tsp[1] = tsnow; 3058 else if (tsp[1].tv_nsec < 0 || tsp[1].tv_nsec >= 1000000000L) 3059 return (EINVAL); 3060 3061 return (0); 3062 } 3063 3064 /* 3065 * Common implementation code for utimes(), lutimes(), futimes(), futimens(), 3066 * and utimensat(). 3067 */ 3068 static int 3069 setutimes(td, vp, ts, numtimes, nullflag) 3070 struct thread *td; 3071 struct vnode *vp; 3072 const struct timespec *ts; 3073 int numtimes; 3074 int nullflag; 3075 { 3076 struct mount *mp; 3077 struct vattr vattr; 3078 int error, setbirthtime; 3079 3080 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 3081 return (error); 3082 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3083 setbirthtime = 0; 3084 if (numtimes < 3 && !VOP_GETATTR(vp, &vattr, td->td_ucred) && 3085 timespeccmp(&ts[1], &vattr.va_birthtime, < )) 3086 setbirthtime = 1; 3087 VATTR_NULL(&vattr); 3088 vattr.va_atime = ts[0]; 3089 vattr.va_mtime = ts[1]; 3090 if (setbirthtime) 3091 vattr.va_birthtime = ts[1]; 3092 if (numtimes > 2) 3093 vattr.va_birthtime = ts[2]; 3094 if (nullflag) 3095 vattr.va_vaflags |= VA_UTIMES_NULL; 3096 #ifdef MAC 3097 error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime, 3098 vattr.va_mtime); 3099 #endif 3100 if (error == 0) 3101 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3102 VOP_UNLOCK(vp, 0); 3103 vn_finished_write(mp); 3104 return (error); 3105 } 3106 3107 /* 3108 * Set the access and modification times of a file. 3109 */ 3110 #ifndef _SYS_SYSPROTO_H_ 3111 struct utimes_args { 3112 char *path; 3113 struct timeval *tptr; 3114 }; 3115 #endif 3116 int 3117 sys_utimes(td, uap) 3118 struct thread *td; 3119 register struct utimes_args /* { 3120 char *path; 3121 struct timeval *tptr; 3122 } */ *uap; 3123 { 3124 3125 return (kern_utimesat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3126 uap->tptr, UIO_USERSPACE)); 3127 } 3128 3129 #ifndef _SYS_SYSPROTO_H_ 3130 struct futimesat_args { 3131 int fd; 3132 const char * path; 3133 const struct timeval * times; 3134 }; 3135 #endif 3136 int 3137 sys_futimesat(struct thread *td, struct futimesat_args *uap) 3138 { 3139 3140 return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE, 3141 uap->times, UIO_USERSPACE)); 3142 } 3143 3144 int 3145 kern_utimesat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 3146 struct timeval *tptr, enum uio_seg tptrseg) 3147 { 3148 struct nameidata nd; 3149 struct timespec ts[2]; 3150 cap_rights_t rights; 3151 int error; 3152 3153 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3154 return (error); 3155 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 3156 cap_rights_init(&rights, CAP_FUTIMES), td); 3157 3158 if ((error = namei(&nd)) != 0) 3159 return (error); 3160 NDFREE(&nd, NDF_ONLY_PNBUF); 3161 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3162 vrele(nd.ni_vp); 3163 return (error); 3164 } 3165 3166 /* 3167 * Set the access and modification times of a file. 3168 */ 3169 #ifndef _SYS_SYSPROTO_H_ 3170 struct lutimes_args { 3171 char *path; 3172 struct timeval *tptr; 3173 }; 3174 #endif 3175 int 3176 sys_lutimes(td, uap) 3177 struct thread *td; 3178 register struct lutimes_args /* { 3179 char *path; 3180 struct timeval *tptr; 3181 } */ *uap; 3182 { 3183 3184 return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr, 3185 UIO_USERSPACE)); 3186 } 3187 3188 int 3189 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg, 3190 struct timeval *tptr, enum uio_seg tptrseg) 3191 { 3192 struct timespec ts[2]; 3193 struct nameidata nd; 3194 int error; 3195 3196 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3197 return (error); 3198 NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, pathseg, path, td); 3199 if ((error = namei(&nd)) != 0) 3200 return (error); 3201 NDFREE(&nd, NDF_ONLY_PNBUF); 3202 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3203 vrele(nd.ni_vp); 3204 return (error); 3205 } 3206 3207 /* 3208 * Set the access and modification times of a file. 3209 */ 3210 #ifndef _SYS_SYSPROTO_H_ 3211 struct futimes_args { 3212 int fd; 3213 struct timeval *tptr; 3214 }; 3215 #endif 3216 int 3217 sys_futimes(td, uap) 3218 struct thread *td; 3219 register struct futimes_args /* { 3220 int fd; 3221 struct timeval *tptr; 3222 } */ *uap; 3223 { 3224 3225 return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE)); 3226 } 3227 3228 int 3229 kern_futimes(struct thread *td, int fd, struct timeval *tptr, 3230 enum uio_seg tptrseg) 3231 { 3232 struct timespec ts[2]; 3233 struct file *fp; 3234 cap_rights_t rights; 3235 int error; 3236 3237 AUDIT_ARG_FD(fd); 3238 error = getutimes(tptr, tptrseg, ts); 3239 if (error != 0) 3240 return (error); 3241 error = getvnode(td, fd, cap_rights_init(&rights, CAP_FUTIMES), &fp); 3242 if (error != 0) 3243 return (error); 3244 #ifdef AUDIT 3245 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3246 AUDIT_ARG_VNODE1(fp->f_vnode); 3247 VOP_UNLOCK(fp->f_vnode, 0); 3248 #endif 3249 error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL); 3250 fdrop(fp, td); 3251 return (error); 3252 } 3253 3254 int 3255 sys_futimens(struct thread *td, struct futimens_args *uap) 3256 { 3257 3258 return (kern_futimens(td, uap->fd, uap->times, UIO_USERSPACE)); 3259 } 3260 3261 int 3262 kern_futimens(struct thread *td, int fd, struct timespec *tptr, 3263 enum uio_seg tptrseg) 3264 { 3265 struct timespec ts[2]; 3266 struct file *fp; 3267 cap_rights_t rights; 3268 int error, flags; 3269 3270 AUDIT_ARG_FD(fd); 3271 error = getutimens(tptr, tptrseg, ts, &flags); 3272 if (error != 0) 3273 return (error); 3274 if (flags & UTIMENS_EXIT) 3275 return (0); 3276 error = getvnode(td, fd, cap_rights_init(&rights, CAP_FUTIMES), &fp); 3277 if (error != 0) 3278 return (error); 3279 #ifdef AUDIT 3280 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3281 AUDIT_ARG_VNODE1(fp->f_vnode); 3282 VOP_UNLOCK(fp->f_vnode, 0); 3283 #endif 3284 error = setutimes(td, fp->f_vnode, ts, 2, flags & UTIMENS_NULL); 3285 fdrop(fp, td); 3286 return (error); 3287 } 3288 3289 int 3290 sys_utimensat(struct thread *td, struct utimensat_args *uap) 3291 { 3292 3293 return (kern_utimensat(td, uap->fd, uap->path, UIO_USERSPACE, 3294 uap->times, UIO_USERSPACE, uap->flag)); 3295 } 3296 3297 int 3298 kern_utimensat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 3299 struct timespec *tptr, enum uio_seg tptrseg, int flag) 3300 { 3301 struct nameidata nd; 3302 struct timespec ts[2]; 3303 cap_rights_t rights; 3304 int error, flags; 3305 3306 if (flag & ~AT_SYMLINK_NOFOLLOW) 3307 return (EINVAL); 3308 3309 if ((error = getutimens(tptr, tptrseg, ts, &flags)) != 0) 3310 return (error); 3311 NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : 3312 FOLLOW) | AUDITVNODE1, pathseg, path, fd, 3313 cap_rights_init(&rights, CAP_FUTIMES), td); 3314 if ((error = namei(&nd)) != 0) 3315 return (error); 3316 /* 3317 * We are allowed to call namei() regardless of 2xUTIME_OMIT. 3318 * POSIX states: 3319 * "If both tv_nsec fields are UTIME_OMIT... EACCESS may be detected." 3320 * "Search permission is denied by a component of the path prefix." 3321 */ 3322 NDFREE(&nd, NDF_ONLY_PNBUF); 3323 if ((flags & UTIMENS_EXIT) == 0) 3324 error = setutimes(td, nd.ni_vp, ts, 2, flags & UTIMENS_NULL); 3325 vrele(nd.ni_vp); 3326 return (error); 3327 } 3328 3329 /* 3330 * Truncate a file given its path name. 3331 */ 3332 #ifndef _SYS_SYSPROTO_H_ 3333 struct truncate_args { 3334 char *path; 3335 int pad; 3336 off_t length; 3337 }; 3338 #endif 3339 int 3340 sys_truncate(td, uap) 3341 struct thread *td; 3342 register struct truncate_args /* { 3343 char *path; 3344 int pad; 3345 off_t length; 3346 } */ *uap; 3347 { 3348 3349 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3350 } 3351 3352 int 3353 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length) 3354 { 3355 struct mount *mp; 3356 struct vnode *vp; 3357 void *rl_cookie; 3358 struct vattr vattr; 3359 struct nameidata nd; 3360 int error; 3361 3362 if (length < 0) 3363 return(EINVAL); 3364 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, td); 3365 if ((error = namei(&nd)) != 0) 3366 return (error); 3367 vp = nd.ni_vp; 3368 rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); 3369 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { 3370 vn_rangelock_unlock(vp, rl_cookie); 3371 vrele(vp); 3372 return (error); 3373 } 3374 NDFREE(&nd, NDF_ONLY_PNBUF); 3375 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3376 if (vp->v_type == VDIR) 3377 error = EISDIR; 3378 #ifdef MAC 3379 else if ((error = mac_vnode_check_write(td->td_ucred, NOCRED, vp))) { 3380 } 3381 #endif 3382 else if ((error = vn_writechk(vp)) == 0 && 3383 (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) { 3384 VATTR_NULL(&vattr); 3385 vattr.va_size = length; 3386 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3387 } 3388 VOP_UNLOCK(vp, 0); 3389 vn_finished_write(mp); 3390 vn_rangelock_unlock(vp, rl_cookie); 3391 vrele(vp); 3392 return (error); 3393 } 3394 3395 #if defined(COMPAT_43) 3396 /* 3397 * Truncate a file given its path name. 3398 */ 3399 #ifndef _SYS_SYSPROTO_H_ 3400 struct otruncate_args { 3401 char *path; 3402 long length; 3403 }; 3404 #endif 3405 int 3406 otruncate(td, uap) 3407 struct thread *td; 3408 register struct otruncate_args /* { 3409 char *path; 3410 long length; 3411 } */ *uap; 3412 { 3413 struct truncate_args /* { 3414 char *path; 3415 int pad; 3416 off_t length; 3417 } */ nuap; 3418 3419 nuap.path = uap->path; 3420 nuap.length = uap->length; 3421 return (sys_truncate(td, &nuap)); 3422 } 3423 #endif /* COMPAT_43 */ 3424 3425 #if defined(COMPAT_FREEBSD6) 3426 /* Versions with the pad argument */ 3427 int 3428 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap) 3429 { 3430 struct truncate_args ouap; 3431 3432 ouap.path = uap->path; 3433 ouap.length = uap->length; 3434 return (sys_truncate(td, &ouap)); 3435 } 3436 3437 int 3438 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap) 3439 { 3440 struct ftruncate_args ouap; 3441 3442 ouap.fd = uap->fd; 3443 ouap.length = uap->length; 3444 return (sys_ftruncate(td, &ouap)); 3445 } 3446 #endif 3447 3448 /* 3449 * Sync an open file. 3450 */ 3451 #ifndef _SYS_SYSPROTO_H_ 3452 struct fsync_args { 3453 int fd; 3454 }; 3455 #endif 3456 int 3457 sys_fsync(td, uap) 3458 struct thread *td; 3459 struct fsync_args /* { 3460 int fd; 3461 } */ *uap; 3462 { 3463 struct vnode *vp; 3464 struct mount *mp; 3465 struct file *fp; 3466 cap_rights_t rights; 3467 int error, lock_flags; 3468 3469 AUDIT_ARG_FD(uap->fd); 3470 error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_FSYNC), &fp); 3471 if (error != 0) 3472 return (error); 3473 vp = fp->f_vnode; 3474 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 3475 if (error != 0) 3476 goto drop; 3477 if (MNT_SHARED_WRITES(mp) || 3478 ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) { 3479 lock_flags = LK_SHARED; 3480 } else { 3481 lock_flags = LK_EXCLUSIVE; 3482 } 3483 vn_lock(vp, lock_flags | LK_RETRY); 3484 AUDIT_ARG_VNODE1(vp); 3485 if (vp->v_object != NULL) { 3486 VM_OBJECT_WLOCK(vp->v_object); 3487 vm_object_page_clean(vp->v_object, 0, 0, 0); 3488 VM_OBJECT_WUNLOCK(vp->v_object); 3489 } 3490 error = VOP_FSYNC(vp, MNT_WAIT, td); 3491 3492 VOP_UNLOCK(vp, 0); 3493 vn_finished_write(mp); 3494 drop: 3495 fdrop(fp, td); 3496 return (error); 3497 } 3498 3499 /* 3500 * Rename files. Source and destination must either both be directories, or 3501 * both not be directories. If target is a directory, it must be empty. 3502 */ 3503 #ifndef _SYS_SYSPROTO_H_ 3504 struct rename_args { 3505 char *from; 3506 char *to; 3507 }; 3508 #endif 3509 int 3510 sys_rename(td, uap) 3511 struct thread *td; 3512 register struct rename_args /* { 3513 char *from; 3514 char *to; 3515 } */ *uap; 3516 { 3517 3518 return (kern_renameat(td, AT_FDCWD, uap->from, AT_FDCWD, 3519 uap->to, UIO_USERSPACE)); 3520 } 3521 3522 #ifndef _SYS_SYSPROTO_H_ 3523 struct renameat_args { 3524 int oldfd; 3525 char *old; 3526 int newfd; 3527 char *new; 3528 }; 3529 #endif 3530 int 3531 sys_renameat(struct thread *td, struct renameat_args *uap) 3532 { 3533 3534 return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new, 3535 UIO_USERSPACE)); 3536 } 3537 3538 int 3539 kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new, 3540 enum uio_seg pathseg) 3541 { 3542 struct mount *mp = NULL; 3543 struct vnode *tvp, *fvp, *tdvp; 3544 struct nameidata fromnd, tond; 3545 cap_rights_t rights; 3546 int error; 3547 3548 again: 3549 bwillwrite(); 3550 #ifdef MAC 3551 NDINIT_ATRIGHTS(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | 3552 AUDITVNODE1, pathseg, old, oldfd, 3553 cap_rights_init(&rights, CAP_RENAMEAT), td); 3554 #else 3555 NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | AUDITVNODE1, 3556 pathseg, old, oldfd, cap_rights_init(&rights, CAP_RENAMEAT), td); 3557 #endif 3558 3559 if ((error = namei(&fromnd)) != 0) 3560 return (error); 3561 #ifdef MAC 3562 error = mac_vnode_check_rename_from(td->td_ucred, fromnd.ni_dvp, 3563 fromnd.ni_vp, &fromnd.ni_cnd); 3564 VOP_UNLOCK(fromnd.ni_dvp, 0); 3565 if (fromnd.ni_dvp != fromnd.ni_vp) 3566 VOP_UNLOCK(fromnd.ni_vp, 0); 3567 #endif 3568 fvp = fromnd.ni_vp; 3569 NDINIT_ATRIGHTS(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | 3570 SAVESTART | AUDITVNODE2, pathseg, new, newfd, 3571 cap_rights_init(&rights, CAP_LINKAT), td); 3572 if (fromnd.ni_vp->v_type == VDIR) 3573 tond.ni_cnd.cn_flags |= WILLBEDIR; 3574 if ((error = namei(&tond)) != 0) { 3575 /* Translate error code for rename("dir1", "dir2/."). */ 3576 if (error == EISDIR && fvp->v_type == VDIR) 3577 error = EINVAL; 3578 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3579 vrele(fromnd.ni_dvp); 3580 vrele(fvp); 3581 goto out1; 3582 } 3583 tdvp = tond.ni_dvp; 3584 tvp = tond.ni_vp; 3585 error = vn_start_write(fvp, &mp, V_NOWAIT); 3586 if (error != 0) { 3587 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3588 NDFREE(&tond, NDF_ONLY_PNBUF); 3589 if (tvp != NULL) 3590 vput(tvp); 3591 if (tdvp == tvp) 3592 vrele(tdvp); 3593 else 3594 vput(tdvp); 3595 vrele(fromnd.ni_dvp); 3596 vrele(fvp); 3597 vrele(tond.ni_startdir); 3598 if (fromnd.ni_startdir != NULL) 3599 vrele(fromnd.ni_startdir); 3600 error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH); 3601 if (error != 0) 3602 return (error); 3603 goto again; 3604 } 3605 if (tvp != NULL) { 3606 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3607 error = ENOTDIR; 3608 goto out; 3609 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3610 error = EISDIR; 3611 goto out; 3612 } 3613 #ifdef CAPABILITIES 3614 if (newfd != AT_FDCWD) { 3615 /* 3616 * If the target already exists we require CAP_UNLINKAT 3617 * from 'newfd'. 3618 */ 3619 error = cap_check(&tond.ni_filecaps.fc_rights, 3620 cap_rights_init(&rights, CAP_UNLINKAT)); 3621 if (error != 0) 3622 goto out; 3623 } 3624 #endif 3625 } 3626 if (fvp == tdvp) { 3627 error = EINVAL; 3628 goto out; 3629 } 3630 /* 3631 * If the source is the same as the destination (that is, if they 3632 * are links to the same vnode), then there is nothing to do. 3633 */ 3634 if (fvp == tvp) 3635 error = -1; 3636 #ifdef MAC 3637 else 3638 error = mac_vnode_check_rename_to(td->td_ucred, tdvp, 3639 tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd); 3640 #endif 3641 out: 3642 if (error == 0) { 3643 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3644 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); 3645 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3646 NDFREE(&tond, NDF_ONLY_PNBUF); 3647 } else { 3648 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3649 NDFREE(&tond, NDF_ONLY_PNBUF); 3650 if (tvp != NULL) 3651 vput(tvp); 3652 if (tdvp == tvp) 3653 vrele(tdvp); 3654 else 3655 vput(tdvp); 3656 vrele(fromnd.ni_dvp); 3657 vrele(fvp); 3658 } 3659 vrele(tond.ni_startdir); 3660 vn_finished_write(mp); 3661 out1: 3662 if (fromnd.ni_startdir) 3663 vrele(fromnd.ni_startdir); 3664 if (error == -1) 3665 return (0); 3666 return (error); 3667 } 3668 3669 /* 3670 * Make a directory file. 3671 */ 3672 #ifndef _SYS_SYSPROTO_H_ 3673 struct mkdir_args { 3674 char *path; 3675 int mode; 3676 }; 3677 #endif 3678 int 3679 sys_mkdir(td, uap) 3680 struct thread *td; 3681 register struct mkdir_args /* { 3682 char *path; 3683 int mode; 3684 } */ *uap; 3685 { 3686 3687 return (kern_mkdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3688 uap->mode)); 3689 } 3690 3691 #ifndef _SYS_SYSPROTO_H_ 3692 struct mkdirat_args { 3693 int fd; 3694 char *path; 3695 mode_t mode; 3696 }; 3697 #endif 3698 int 3699 sys_mkdirat(struct thread *td, struct mkdirat_args *uap) 3700 { 3701 3702 return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode)); 3703 } 3704 3705 int 3706 kern_mkdirat(struct thread *td, int fd, char *path, enum uio_seg segflg, 3707 int mode) 3708 { 3709 struct mount *mp; 3710 struct vnode *vp; 3711 struct vattr vattr; 3712 struct nameidata nd; 3713 cap_rights_t rights; 3714 int error; 3715 3716 AUDIT_ARG_MODE(mode); 3717 restart: 3718 bwillwrite(); 3719 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 3720 NOCACHE, segflg, path, fd, cap_rights_init(&rights, CAP_MKDIRAT), 3721 td); 3722 nd.ni_cnd.cn_flags |= WILLBEDIR; 3723 if ((error = namei(&nd)) != 0) 3724 return (error); 3725 vp = nd.ni_vp; 3726 if (vp != NULL) { 3727 NDFREE(&nd, NDF_ONLY_PNBUF); 3728 /* 3729 * XXX namei called with LOCKPARENT but not LOCKLEAF has 3730 * the strange behaviour of leaving the vnode unlocked 3731 * if the target is the same vnode as the parent. 3732 */ 3733 if (vp == nd.ni_dvp) 3734 vrele(nd.ni_dvp); 3735 else 3736 vput(nd.ni_dvp); 3737 vrele(vp); 3738 return (EEXIST); 3739 } 3740 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3741 NDFREE(&nd, NDF_ONLY_PNBUF); 3742 vput(nd.ni_dvp); 3743 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3744 return (error); 3745 goto restart; 3746 } 3747 VATTR_NULL(&vattr); 3748 vattr.va_type = VDIR; 3749 vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask; 3750 #ifdef MAC 3751 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 3752 &vattr); 3753 if (error != 0) 3754 goto out; 3755 #endif 3756 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 3757 #ifdef MAC 3758 out: 3759 #endif 3760 NDFREE(&nd, NDF_ONLY_PNBUF); 3761 vput(nd.ni_dvp); 3762 if (error == 0) 3763 vput(nd.ni_vp); 3764 vn_finished_write(mp); 3765 return (error); 3766 } 3767 3768 /* 3769 * Remove a directory file. 3770 */ 3771 #ifndef _SYS_SYSPROTO_H_ 3772 struct rmdir_args { 3773 char *path; 3774 }; 3775 #endif 3776 int 3777 sys_rmdir(td, uap) 3778 struct thread *td; 3779 struct rmdir_args /* { 3780 char *path; 3781 } */ *uap; 3782 { 3783 3784 return (kern_rmdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE)); 3785 } 3786 3787 int 3788 kern_rmdirat(struct thread *td, int fd, char *path, enum uio_seg pathseg) 3789 { 3790 struct mount *mp; 3791 struct vnode *vp; 3792 struct nameidata nd; 3793 cap_rights_t rights; 3794 int error; 3795 3796 restart: 3797 bwillwrite(); 3798 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 3799 pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td); 3800 if ((error = namei(&nd)) != 0) 3801 return (error); 3802 vp = nd.ni_vp; 3803 if (vp->v_type != VDIR) { 3804 error = ENOTDIR; 3805 goto out; 3806 } 3807 /* 3808 * No rmdir "." please. 3809 */ 3810 if (nd.ni_dvp == vp) { 3811 error = EINVAL; 3812 goto out; 3813 } 3814 /* 3815 * The root of a mounted filesystem cannot be deleted. 3816 */ 3817 if (vp->v_vflag & VV_ROOT) { 3818 error = EBUSY; 3819 goto out; 3820 } 3821 #ifdef MAC 3822 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 3823 &nd.ni_cnd); 3824 if (error != 0) 3825 goto out; 3826 #endif 3827 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3828 NDFREE(&nd, NDF_ONLY_PNBUF); 3829 vput(vp); 3830 if (nd.ni_dvp == vp) 3831 vrele(nd.ni_dvp); 3832 else 3833 vput(nd.ni_dvp); 3834 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3835 return (error); 3836 goto restart; 3837 } 3838 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 3839 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 3840 vn_finished_write(mp); 3841 out: 3842 NDFREE(&nd, NDF_ONLY_PNBUF); 3843 vput(vp); 3844 if (nd.ni_dvp == vp) 3845 vrele(nd.ni_dvp); 3846 else 3847 vput(nd.ni_dvp); 3848 return (error); 3849 } 3850 3851 #ifdef COMPAT_43 3852 /* 3853 * Read a block of directory entries in a filesystem independent format. 3854 */ 3855 #ifndef _SYS_SYSPROTO_H_ 3856 struct ogetdirentries_args { 3857 int fd; 3858 char *buf; 3859 u_int count; 3860 long *basep; 3861 }; 3862 #endif 3863 int 3864 ogetdirentries(struct thread *td, struct ogetdirentries_args *uap) 3865 { 3866 long loff; 3867 int error; 3868 3869 error = kern_ogetdirentries(td, uap, &loff); 3870 if (error == 0) 3871 error = copyout(&loff, uap->basep, sizeof(long)); 3872 return (error); 3873 } 3874 3875 int 3876 kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap, 3877 long *ploff) 3878 { 3879 struct vnode *vp; 3880 struct file *fp; 3881 struct uio auio, kuio; 3882 struct iovec aiov, kiov; 3883 struct dirent *dp, *edp; 3884 cap_rights_t rights; 3885 caddr_t dirbuf; 3886 int error, eofflag, readcnt; 3887 long loff; 3888 off_t foffset; 3889 3890 /* XXX arbitrary sanity limit on `count'. */ 3891 if (uap->count > 64 * 1024) 3892 return (EINVAL); 3893 error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_READ), &fp); 3894 if (error != 0) 3895 return (error); 3896 if ((fp->f_flag & FREAD) == 0) { 3897 fdrop(fp, td); 3898 return (EBADF); 3899 } 3900 vp = fp->f_vnode; 3901 foffset = foffset_lock(fp, 0); 3902 unionread: 3903 if (vp->v_type != VDIR) { 3904 foffset_unlock(fp, foffset, 0); 3905 fdrop(fp, td); 3906 return (EINVAL); 3907 } 3908 aiov.iov_base = uap->buf; 3909 aiov.iov_len = uap->count; 3910 auio.uio_iov = &aiov; 3911 auio.uio_iovcnt = 1; 3912 auio.uio_rw = UIO_READ; 3913 auio.uio_segflg = UIO_USERSPACE; 3914 auio.uio_td = td; 3915 auio.uio_resid = uap->count; 3916 vn_lock(vp, LK_SHARED | LK_RETRY); 3917 loff = auio.uio_offset = foffset; 3918 #ifdef MAC 3919 error = mac_vnode_check_readdir(td->td_ucred, vp); 3920 if (error != 0) { 3921 VOP_UNLOCK(vp, 0); 3922 foffset_unlock(fp, foffset, FOF_NOUPDATE); 3923 fdrop(fp, td); 3924 return (error); 3925 } 3926 #endif 3927 # if (BYTE_ORDER != LITTLE_ENDIAN) 3928 if (vp->v_mount->mnt_maxsymlinklen <= 0) { 3929 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, 3930 NULL, NULL); 3931 foffset = auio.uio_offset; 3932 } else 3933 # endif 3934 { 3935 kuio = auio; 3936 kuio.uio_iov = &kiov; 3937 kuio.uio_segflg = UIO_SYSSPACE; 3938 kiov.iov_len = uap->count; 3939 dirbuf = malloc(uap->count, M_TEMP, M_WAITOK); 3940 kiov.iov_base = dirbuf; 3941 error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag, 3942 NULL, NULL); 3943 foffset = kuio.uio_offset; 3944 if (error == 0) { 3945 readcnt = uap->count - kuio.uio_resid; 3946 edp = (struct dirent *)&dirbuf[readcnt]; 3947 for (dp = (struct dirent *)dirbuf; dp < edp; ) { 3948 # if (BYTE_ORDER == LITTLE_ENDIAN) 3949 /* 3950 * The expected low byte of 3951 * dp->d_namlen is our dp->d_type. 3952 * The high MBZ byte of dp->d_namlen 3953 * is our dp->d_namlen. 3954 */ 3955 dp->d_type = dp->d_namlen; 3956 dp->d_namlen = 0; 3957 # else 3958 /* 3959 * The dp->d_type is the high byte 3960 * of the expected dp->d_namlen, 3961 * so must be zero'ed. 3962 */ 3963 dp->d_type = 0; 3964 # endif 3965 if (dp->d_reclen > 0) { 3966 dp = (struct dirent *) 3967 ((char *)dp + dp->d_reclen); 3968 } else { 3969 error = EIO; 3970 break; 3971 } 3972 } 3973 if (dp >= edp) 3974 error = uiomove(dirbuf, readcnt, &auio); 3975 } 3976 free(dirbuf, M_TEMP); 3977 } 3978 if (error != 0) { 3979 VOP_UNLOCK(vp, 0); 3980 foffset_unlock(fp, foffset, 0); 3981 fdrop(fp, td); 3982 return (error); 3983 } 3984 if (uap->count == auio.uio_resid && 3985 (vp->v_vflag & VV_ROOT) && 3986 (vp->v_mount->mnt_flag & MNT_UNION)) { 3987 struct vnode *tvp = vp; 3988 vp = vp->v_mount->mnt_vnodecovered; 3989 VREF(vp); 3990 fp->f_vnode = vp; 3991 fp->f_data = vp; 3992 foffset = 0; 3993 vput(tvp); 3994 goto unionread; 3995 } 3996 VOP_UNLOCK(vp, 0); 3997 foffset_unlock(fp, foffset, 0); 3998 fdrop(fp, td); 3999 td->td_retval[0] = uap->count - auio.uio_resid; 4000 if (error == 0) 4001 *ploff = loff; 4002 return (error); 4003 } 4004 #endif /* COMPAT_43 */ 4005 4006 /* 4007 * Read a block of directory entries in a filesystem independent format. 4008 */ 4009 #ifndef _SYS_SYSPROTO_H_ 4010 struct getdirentries_args { 4011 int fd; 4012 char *buf; 4013 u_int count; 4014 long *basep; 4015 }; 4016 #endif 4017 int 4018 sys_getdirentries(td, uap) 4019 struct thread *td; 4020 register struct getdirentries_args /* { 4021 int fd; 4022 char *buf; 4023 u_int count; 4024 long *basep; 4025 } */ *uap; 4026 { 4027 long base; 4028 int error; 4029 4030 error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base, 4031 NULL, UIO_USERSPACE); 4032 if (error != 0) 4033 return (error); 4034 if (uap->basep != NULL) 4035 error = copyout(&base, uap->basep, sizeof(long)); 4036 return (error); 4037 } 4038 4039 int 4040 kern_getdirentries(struct thread *td, int fd, char *buf, u_int count, 4041 long *basep, ssize_t *residp, enum uio_seg bufseg) 4042 { 4043 struct vnode *vp; 4044 struct file *fp; 4045 struct uio auio; 4046 struct iovec aiov; 4047 cap_rights_t rights; 4048 long loff; 4049 int error, eofflag; 4050 off_t foffset; 4051 4052 AUDIT_ARG_FD(fd); 4053 if (count > IOSIZE_MAX) 4054 return (EINVAL); 4055 auio.uio_resid = count; 4056 error = getvnode(td, fd, cap_rights_init(&rights, CAP_READ), &fp); 4057 if (error != 0) 4058 return (error); 4059 if ((fp->f_flag & FREAD) == 0) { 4060 fdrop(fp, td); 4061 return (EBADF); 4062 } 4063 vp = fp->f_vnode; 4064 foffset = foffset_lock(fp, 0); 4065 unionread: 4066 if (vp->v_type != VDIR) { 4067 error = EINVAL; 4068 goto fail; 4069 } 4070 aiov.iov_base = buf; 4071 aiov.iov_len = count; 4072 auio.uio_iov = &aiov; 4073 auio.uio_iovcnt = 1; 4074 auio.uio_rw = UIO_READ; 4075 auio.uio_segflg = bufseg; 4076 auio.uio_td = td; 4077 vn_lock(vp, LK_SHARED | LK_RETRY); 4078 AUDIT_ARG_VNODE1(vp); 4079 loff = auio.uio_offset = foffset; 4080 #ifdef MAC 4081 error = mac_vnode_check_readdir(td->td_ucred, vp); 4082 if (error == 0) 4083 #endif 4084 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, 4085 NULL); 4086 foffset = auio.uio_offset; 4087 if (error != 0) { 4088 VOP_UNLOCK(vp, 0); 4089 goto fail; 4090 } 4091 if (count == auio.uio_resid && 4092 (vp->v_vflag & VV_ROOT) && 4093 (vp->v_mount->mnt_flag & MNT_UNION)) { 4094 struct vnode *tvp = vp; 4095 4096 vp = vp->v_mount->mnt_vnodecovered; 4097 VREF(vp); 4098 fp->f_vnode = vp; 4099 fp->f_data = vp; 4100 foffset = 0; 4101 vput(tvp); 4102 goto unionread; 4103 } 4104 VOP_UNLOCK(vp, 0); 4105 *basep = loff; 4106 if (residp != NULL) 4107 *residp = auio.uio_resid; 4108 td->td_retval[0] = count - auio.uio_resid; 4109 fail: 4110 foffset_unlock(fp, foffset, 0); 4111 fdrop(fp, td); 4112 return (error); 4113 } 4114 4115 #ifndef _SYS_SYSPROTO_H_ 4116 struct getdents_args { 4117 int fd; 4118 char *buf; 4119 size_t count; 4120 }; 4121 #endif 4122 int 4123 sys_getdents(td, uap) 4124 struct thread *td; 4125 register struct getdents_args /* { 4126 int fd; 4127 char *buf; 4128 u_int count; 4129 } */ *uap; 4130 { 4131 struct getdirentries_args ap; 4132 4133 ap.fd = uap->fd; 4134 ap.buf = uap->buf; 4135 ap.count = uap->count; 4136 ap.basep = NULL; 4137 return (sys_getdirentries(td, &ap)); 4138 } 4139 4140 /* 4141 * Set the mode mask for creation of filesystem nodes. 4142 */ 4143 #ifndef _SYS_SYSPROTO_H_ 4144 struct umask_args { 4145 int newmask; 4146 }; 4147 #endif 4148 int 4149 sys_umask(td, uap) 4150 struct thread *td; 4151 struct umask_args /* { 4152 int newmask; 4153 } */ *uap; 4154 { 4155 struct filedesc *fdp; 4156 4157 fdp = td->td_proc->p_fd; 4158 FILEDESC_XLOCK(fdp); 4159 td->td_retval[0] = fdp->fd_cmask; 4160 fdp->fd_cmask = uap->newmask & ALLPERMS; 4161 FILEDESC_XUNLOCK(fdp); 4162 return (0); 4163 } 4164 4165 /* 4166 * Void all references to file by ripping underlying filesystem away from 4167 * vnode. 4168 */ 4169 #ifndef _SYS_SYSPROTO_H_ 4170 struct revoke_args { 4171 char *path; 4172 }; 4173 #endif 4174 int 4175 sys_revoke(td, uap) 4176 struct thread *td; 4177 register struct revoke_args /* { 4178 char *path; 4179 } */ *uap; 4180 { 4181 struct vnode *vp; 4182 struct vattr vattr; 4183 struct nameidata nd; 4184 int error; 4185 4186 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4187 uap->path, td); 4188 if ((error = namei(&nd)) != 0) 4189 return (error); 4190 vp = nd.ni_vp; 4191 NDFREE(&nd, NDF_ONLY_PNBUF); 4192 if (vp->v_type != VCHR || vp->v_rdev == NULL) { 4193 error = EINVAL; 4194 goto out; 4195 } 4196 #ifdef MAC 4197 error = mac_vnode_check_revoke(td->td_ucred, vp); 4198 if (error != 0) 4199 goto out; 4200 #endif 4201 error = VOP_GETATTR(vp, &vattr, td->td_ucred); 4202 if (error != 0) 4203 goto out; 4204 if (td->td_ucred->cr_uid != vattr.va_uid) { 4205 error = priv_check(td, PRIV_VFS_ADMIN); 4206 if (error != 0) 4207 goto out; 4208 } 4209 if (vcount(vp) > 1) 4210 VOP_REVOKE(vp, REVOKEALL); 4211 out: 4212 vput(vp); 4213 return (error); 4214 } 4215 4216 /* 4217 * Convert a user file descriptor to a kernel file entry and check that, if it 4218 * is a capability, the correct rights are present. A reference on the file 4219 * entry is held upon returning. 4220 */ 4221 int 4222 getvnode(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp) 4223 { 4224 struct file *fp; 4225 int error; 4226 4227 error = fget_unlocked(td->td_proc->p_fd, fd, rightsp, &fp, NULL); 4228 if (error != 0) 4229 return (error); 4230 4231 /* 4232 * The file could be not of the vnode type, or it may be not 4233 * yet fully initialized, in which case the f_vnode pointer 4234 * may be set, but f_ops is still badfileops. E.g., 4235 * devfs_open() transiently create such situation to 4236 * facilitate csw d_fdopen(). 4237 * 4238 * Dupfdopen() handling in kern_openat() installs the 4239 * half-baked file into the process descriptor table, allowing 4240 * other thread to dereference it. Guard against the race by 4241 * checking f_ops. 4242 */ 4243 if (fp->f_vnode == NULL || fp->f_ops == &badfileops) { 4244 fdrop(fp, td); 4245 return (EINVAL); 4246 } 4247 *fpp = fp; 4248 return (0); 4249 } 4250 4251 4252 /* 4253 * Get an (NFS) file handle. 4254 */ 4255 #ifndef _SYS_SYSPROTO_H_ 4256 struct lgetfh_args { 4257 char *fname; 4258 fhandle_t *fhp; 4259 }; 4260 #endif 4261 int 4262 sys_lgetfh(td, uap) 4263 struct thread *td; 4264 register struct lgetfh_args *uap; 4265 { 4266 struct nameidata nd; 4267 fhandle_t fh; 4268 register struct vnode *vp; 4269 int error; 4270 4271 error = priv_check(td, PRIV_VFS_GETFH); 4272 if (error != 0) 4273 return (error); 4274 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4275 uap->fname, td); 4276 error = namei(&nd); 4277 if (error != 0) 4278 return (error); 4279 NDFREE(&nd, NDF_ONLY_PNBUF); 4280 vp = nd.ni_vp; 4281 bzero(&fh, sizeof(fh)); 4282 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4283 error = VOP_VPTOFH(vp, &fh.fh_fid); 4284 vput(vp); 4285 if (error == 0) 4286 error = copyout(&fh, uap->fhp, sizeof (fh)); 4287 return (error); 4288 } 4289 4290 #ifndef _SYS_SYSPROTO_H_ 4291 struct getfh_args { 4292 char *fname; 4293 fhandle_t *fhp; 4294 }; 4295 #endif 4296 int 4297 sys_getfh(td, uap) 4298 struct thread *td; 4299 register struct getfh_args *uap; 4300 { 4301 struct nameidata nd; 4302 fhandle_t fh; 4303 register struct vnode *vp; 4304 int error; 4305 4306 error = priv_check(td, PRIV_VFS_GETFH); 4307 if (error != 0) 4308 return (error); 4309 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4310 uap->fname, td); 4311 error = namei(&nd); 4312 if (error != 0) 4313 return (error); 4314 NDFREE(&nd, NDF_ONLY_PNBUF); 4315 vp = nd.ni_vp; 4316 bzero(&fh, sizeof(fh)); 4317 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4318 error = VOP_VPTOFH(vp, &fh.fh_fid); 4319 vput(vp); 4320 if (error == 0) 4321 error = copyout(&fh, uap->fhp, sizeof (fh)); 4322 return (error); 4323 } 4324 4325 /* 4326 * syscall for the rpc.lockd to use to translate a NFS file handle into an 4327 * open descriptor. 4328 * 4329 * warning: do not remove the priv_check() call or this becomes one giant 4330 * security hole. 4331 */ 4332 #ifndef _SYS_SYSPROTO_H_ 4333 struct fhopen_args { 4334 const struct fhandle *u_fhp; 4335 int flags; 4336 }; 4337 #endif 4338 int 4339 sys_fhopen(td, uap) 4340 struct thread *td; 4341 struct fhopen_args /* { 4342 const struct fhandle *u_fhp; 4343 int flags; 4344 } */ *uap; 4345 { 4346 struct mount *mp; 4347 struct vnode *vp; 4348 struct fhandle fhp; 4349 struct file *fp; 4350 int fmode, error; 4351 int indx; 4352 4353 error = priv_check(td, PRIV_VFS_FHOPEN); 4354 if (error != 0) 4355 return (error); 4356 indx = -1; 4357 fmode = FFLAGS(uap->flags); 4358 /* why not allow a non-read/write open for our lockd? */ 4359 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 4360 return (EINVAL); 4361 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 4362 if (error != 0) 4363 return(error); 4364 /* find the mount point */ 4365 mp = vfs_busyfs(&fhp.fh_fsid); 4366 if (mp == NULL) 4367 return (ESTALE); 4368 /* now give me my vnode, it gets returned to me locked */ 4369 error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp); 4370 vfs_unbusy(mp); 4371 if (error != 0) 4372 return (error); 4373 4374 error = falloc_noinstall(td, &fp); 4375 if (error != 0) { 4376 vput(vp); 4377 return (error); 4378 } 4379 /* 4380 * An extra reference on `fp' has been held for us by 4381 * falloc_noinstall(). 4382 */ 4383 4384 #ifdef INVARIANTS 4385 td->td_dupfd = -1; 4386 #endif 4387 error = vn_open_vnode(vp, fmode, td->td_ucred, td, fp); 4388 if (error != 0) { 4389 KASSERT(fp->f_ops == &badfileops, 4390 ("VOP_OPEN in fhopen() set f_ops")); 4391 KASSERT(td->td_dupfd < 0, 4392 ("fhopen() encountered fdopen()")); 4393 4394 vput(vp); 4395 goto bad; 4396 } 4397 #ifdef INVARIANTS 4398 td->td_dupfd = 0; 4399 #endif 4400 fp->f_vnode = vp; 4401 fp->f_seqcount = 1; 4402 finit(fp, (fmode & FMASK) | (fp->f_flag & FHASLOCK), DTYPE_VNODE, vp, 4403 &vnops); 4404 VOP_UNLOCK(vp, 0); 4405 if ((fmode & O_TRUNC) != 0) { 4406 error = fo_truncate(fp, 0, td->td_ucred, td); 4407 if (error != 0) 4408 goto bad; 4409 } 4410 4411 error = finstall(td, fp, &indx, fmode, NULL); 4412 bad: 4413 fdrop(fp, td); 4414 td->td_retval[0] = indx; 4415 return (error); 4416 } 4417 4418 /* 4419 * Stat an (NFS) file handle. 4420 */ 4421 #ifndef _SYS_SYSPROTO_H_ 4422 struct fhstat_args { 4423 struct fhandle *u_fhp; 4424 struct stat *sb; 4425 }; 4426 #endif 4427 int 4428 sys_fhstat(td, uap) 4429 struct thread *td; 4430 register struct fhstat_args /* { 4431 struct fhandle *u_fhp; 4432 struct stat *sb; 4433 } */ *uap; 4434 { 4435 struct stat sb; 4436 struct fhandle fh; 4437 int error; 4438 4439 error = copyin(uap->u_fhp, &fh, sizeof(fh)); 4440 if (error != 0) 4441 return (error); 4442 error = kern_fhstat(td, fh, &sb); 4443 if (error == 0) 4444 error = copyout(&sb, uap->sb, sizeof(sb)); 4445 return (error); 4446 } 4447 4448 int 4449 kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb) 4450 { 4451 struct mount *mp; 4452 struct vnode *vp; 4453 int error; 4454 4455 error = priv_check(td, PRIV_VFS_FHSTAT); 4456 if (error != 0) 4457 return (error); 4458 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4459 return (ESTALE); 4460 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4461 vfs_unbusy(mp); 4462 if (error != 0) 4463 return (error); 4464 error = vn_stat(vp, sb, td->td_ucred, NOCRED, td); 4465 vput(vp); 4466 return (error); 4467 } 4468 4469 /* 4470 * Implement fstatfs() for (NFS) file handles. 4471 */ 4472 #ifndef _SYS_SYSPROTO_H_ 4473 struct fhstatfs_args { 4474 struct fhandle *u_fhp; 4475 struct statfs *buf; 4476 }; 4477 #endif 4478 int 4479 sys_fhstatfs(td, uap) 4480 struct thread *td; 4481 struct fhstatfs_args /* { 4482 struct fhandle *u_fhp; 4483 struct statfs *buf; 4484 } */ *uap; 4485 { 4486 struct statfs sf; 4487 fhandle_t fh; 4488 int error; 4489 4490 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4491 if (error != 0) 4492 return (error); 4493 error = kern_fhstatfs(td, fh, &sf); 4494 if (error != 0) 4495 return (error); 4496 return (copyout(&sf, uap->buf, sizeof(sf))); 4497 } 4498 4499 int 4500 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf) 4501 { 4502 struct statfs *sp; 4503 struct mount *mp; 4504 struct vnode *vp; 4505 int error; 4506 4507 error = priv_check(td, PRIV_VFS_FHSTATFS); 4508 if (error != 0) 4509 return (error); 4510 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4511 return (ESTALE); 4512 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4513 if (error != 0) { 4514 vfs_unbusy(mp); 4515 return (error); 4516 } 4517 vput(vp); 4518 error = prison_canseemount(td->td_ucred, mp); 4519 if (error != 0) 4520 goto out; 4521 #ifdef MAC 4522 error = mac_mount_check_stat(td->td_ucred, mp); 4523 if (error != 0) 4524 goto out; 4525 #endif 4526 /* 4527 * Set these in case the underlying filesystem fails to do so. 4528 */ 4529 sp = &mp->mnt_stat; 4530 sp->f_version = STATFS_VERSION; 4531 sp->f_namemax = NAME_MAX; 4532 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 4533 error = VFS_STATFS(mp, sp); 4534 if (error == 0) 4535 *buf = *sp; 4536 out: 4537 vfs_unbusy(mp); 4538 return (error); 4539 } 4540 4541 int 4542 kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len) 4543 { 4544 struct file *fp; 4545 struct mount *mp; 4546 struct vnode *vp; 4547 cap_rights_t rights; 4548 off_t olen, ooffset; 4549 int error; 4550 4551 if (offset < 0 || len <= 0) 4552 return (EINVAL); 4553 /* Check for wrap. */ 4554 if (offset > OFF_MAX - len) 4555 return (EFBIG); 4556 error = fget(td, fd, cap_rights_init(&rights, CAP_WRITE), &fp); 4557 if (error != 0) 4558 return (error); 4559 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { 4560 error = ESPIPE; 4561 goto out; 4562 } 4563 if ((fp->f_flag & FWRITE) == 0) { 4564 error = EBADF; 4565 goto out; 4566 } 4567 if (fp->f_type != DTYPE_VNODE) { 4568 error = ENODEV; 4569 goto out; 4570 } 4571 vp = fp->f_vnode; 4572 if (vp->v_type != VREG) { 4573 error = ENODEV; 4574 goto out; 4575 } 4576 4577 /* Allocating blocks may take a long time, so iterate. */ 4578 for (;;) { 4579 olen = len; 4580 ooffset = offset; 4581 4582 bwillwrite(); 4583 mp = NULL; 4584 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 4585 if (error != 0) 4586 break; 4587 error = vn_lock(vp, LK_EXCLUSIVE); 4588 if (error != 0) { 4589 vn_finished_write(mp); 4590 break; 4591 } 4592 #ifdef MAC 4593 error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp); 4594 if (error == 0) 4595 #endif 4596 error = VOP_ALLOCATE(vp, &offset, &len); 4597 VOP_UNLOCK(vp, 0); 4598 vn_finished_write(mp); 4599 4600 if (olen + ooffset != offset + len) { 4601 panic("offset + len changed from %jx/%jx to %jx/%jx", 4602 ooffset, olen, offset, len); 4603 } 4604 if (error != 0 || len == 0) 4605 break; 4606 KASSERT(olen > len, ("Iteration did not make progress?")); 4607 maybe_yield(); 4608 } 4609 out: 4610 fdrop(fp, td); 4611 return (error); 4612 } 4613 4614 int 4615 sys_posix_fallocate(struct thread *td, struct posix_fallocate_args *uap) 4616 { 4617 4618 td->td_retval[0] = kern_posix_fallocate(td, uap->fd, uap->offset, 4619 uap->len); 4620 return (0); 4621 } 4622 4623 /* 4624 * Unlike madvise(2), we do not make a best effort to remember every 4625 * possible caching hint. Instead, we remember the last setting with 4626 * the exception that we will allow POSIX_FADV_NORMAL to adjust the 4627 * region of any current setting. 4628 */ 4629 int 4630 kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len, 4631 int advice) 4632 { 4633 struct fadvise_info *fa, *new; 4634 struct file *fp; 4635 struct vnode *vp; 4636 cap_rights_t rights; 4637 off_t end; 4638 int error; 4639 4640 if (offset < 0 || len < 0 || offset > OFF_MAX - len) 4641 return (EINVAL); 4642 switch (advice) { 4643 case POSIX_FADV_SEQUENTIAL: 4644 case POSIX_FADV_RANDOM: 4645 case POSIX_FADV_NOREUSE: 4646 new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK); 4647 break; 4648 case POSIX_FADV_NORMAL: 4649 case POSIX_FADV_WILLNEED: 4650 case POSIX_FADV_DONTNEED: 4651 new = NULL; 4652 break; 4653 default: 4654 return (EINVAL); 4655 } 4656 /* XXX: CAP_POSIX_FADVISE? */ 4657 error = fget(td, fd, cap_rights_init(&rights), &fp); 4658 if (error != 0) 4659 goto out; 4660 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { 4661 error = ESPIPE; 4662 goto out; 4663 } 4664 if (fp->f_type != DTYPE_VNODE) { 4665 error = ENODEV; 4666 goto out; 4667 } 4668 vp = fp->f_vnode; 4669 if (vp->v_type != VREG) { 4670 error = ENODEV; 4671 goto out; 4672 } 4673 if (len == 0) 4674 end = OFF_MAX; 4675 else 4676 end = offset + len - 1; 4677 switch (advice) { 4678 case POSIX_FADV_SEQUENTIAL: 4679 case POSIX_FADV_RANDOM: 4680 case POSIX_FADV_NOREUSE: 4681 /* 4682 * Try to merge any existing non-standard region with 4683 * this new region if possible, otherwise create a new 4684 * non-standard region for this request. 4685 */ 4686 mtx_pool_lock(mtxpool_sleep, fp); 4687 fa = fp->f_advice; 4688 if (fa != NULL && fa->fa_advice == advice && 4689 ((fa->fa_start <= end && fa->fa_end >= offset) || 4690 (end != OFF_MAX && fa->fa_start == end + 1) || 4691 (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) { 4692 if (offset < fa->fa_start) 4693 fa->fa_start = offset; 4694 if (end > fa->fa_end) 4695 fa->fa_end = end; 4696 } else { 4697 new->fa_advice = advice; 4698 new->fa_start = offset; 4699 new->fa_end = end; 4700 new->fa_prevstart = 0; 4701 new->fa_prevend = 0; 4702 fp->f_advice = new; 4703 new = fa; 4704 } 4705 mtx_pool_unlock(mtxpool_sleep, fp); 4706 break; 4707 case POSIX_FADV_NORMAL: 4708 /* 4709 * If a the "normal" region overlaps with an existing 4710 * non-standard region, trim or remove the 4711 * non-standard region. 4712 */ 4713 mtx_pool_lock(mtxpool_sleep, fp); 4714 fa = fp->f_advice; 4715 if (fa != NULL) { 4716 if (offset <= fa->fa_start && end >= fa->fa_end) { 4717 new = fa; 4718 fp->f_advice = NULL; 4719 } else if (offset <= fa->fa_start && 4720 end >= fa->fa_start) 4721 fa->fa_start = end + 1; 4722 else if (offset <= fa->fa_end && end >= fa->fa_end) 4723 fa->fa_end = offset - 1; 4724 else if (offset >= fa->fa_start && end <= fa->fa_end) { 4725 /* 4726 * If the "normal" region is a middle 4727 * portion of the existing 4728 * non-standard region, just remove 4729 * the whole thing rather than picking 4730 * one side or the other to 4731 * preserve. 4732 */ 4733 new = fa; 4734 fp->f_advice = NULL; 4735 } 4736 } 4737 mtx_pool_unlock(mtxpool_sleep, fp); 4738 break; 4739 case POSIX_FADV_WILLNEED: 4740 case POSIX_FADV_DONTNEED: 4741 error = VOP_ADVISE(vp, offset, end, advice); 4742 break; 4743 } 4744 out: 4745 if (fp != NULL) 4746 fdrop(fp, td); 4747 free(new, M_FADVISE); 4748 return (error); 4749 } 4750 4751 int 4752 sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap) 4753 { 4754 4755 td->td_retval[0] = kern_posix_fadvise(td, uap->fd, uap->offset, 4756 uap->len, uap->advice); 4757 return (0); 4758 } 4759