1 /*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include "opt_capsicum.h" 41 #include "opt_compat.h" 42 #include "opt_ktrace.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/bio.h> 47 #include <sys/buf.h> 48 #include <sys/capsicum.h> 49 #include <sys/disk.h> 50 #include <sys/sysent.h> 51 #include <sys/malloc.h> 52 #include <sys/mount.h> 53 #include <sys/mutex.h> 54 #include <sys/sysproto.h> 55 #include <sys/namei.h> 56 #include <sys/filedesc.h> 57 #include <sys/kernel.h> 58 #include <sys/fcntl.h> 59 #include <sys/file.h> 60 #include <sys/filio.h> 61 #include <sys/limits.h> 62 #include <sys/linker.h> 63 #include <sys/rwlock.h> 64 #include <sys/sdt.h> 65 #include <sys/stat.h> 66 #include <sys/sx.h> 67 #include <sys/unistd.h> 68 #include <sys/vnode.h> 69 #include <sys/priv.h> 70 #include <sys/proc.h> 71 #include <sys/dirent.h> 72 #include <sys/jail.h> 73 #include <sys/syscallsubr.h> 74 #include <sys/sysctl.h> 75 #ifdef KTRACE 76 #include <sys/ktrace.h> 77 #endif 78 79 #include <machine/stdarg.h> 80 81 #include <security/audit/audit.h> 82 #include <security/mac/mac_framework.h> 83 84 #include <vm/vm.h> 85 #include <vm/vm_object.h> 86 #include <vm/vm_page.h> 87 #include <vm/uma.h> 88 89 #include <ufs/ufs/quota.h> 90 91 MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information"); 92 93 SDT_PROVIDER_DEFINE(vfs); 94 SDT_PROBE_DEFINE2(vfs, , stat, mode, "char *", "int"); 95 SDT_PROBE_DEFINE2(vfs, , stat, reg, "char *", "int"); 96 97 static int chroot_refuse_vdir_fds(struct filedesc *fdp); 98 static int kern_chflagsat(struct thread *td, int fd, const char *path, 99 enum uio_seg pathseg, u_long flags, int atflag); 100 static int setfflags(struct thread *td, struct vnode *, u_long); 101 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *); 102 static int getutimens(const struct timespec *, enum uio_seg, 103 struct timespec *, int *); 104 static int setutimes(struct thread *td, struct vnode *, 105 const struct timespec *, int, int); 106 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 107 struct thread *td); 108 109 /* 110 * The module initialization routine for POSIX asynchronous I/O will 111 * set this to the version of AIO that it implements. (Zero means 112 * that it is not implemented.) This value is used here by pathconf() 113 * and in kern_descrip.c by fpathconf(). 114 */ 115 int async_io_version; 116 117 /* 118 * Sync each mounted filesystem. 119 */ 120 #ifndef _SYS_SYSPROTO_H_ 121 struct sync_args { 122 int dummy; 123 }; 124 #endif 125 /* ARGSUSED */ 126 int 127 sys_sync(td, uap) 128 struct thread *td; 129 struct sync_args *uap; 130 { 131 struct mount *mp, *nmp; 132 int save; 133 134 mtx_lock(&mountlist_mtx); 135 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 136 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 137 nmp = TAILQ_NEXT(mp, mnt_list); 138 continue; 139 } 140 if ((mp->mnt_flag & MNT_RDONLY) == 0 && 141 vn_start_write(NULL, &mp, V_NOWAIT) == 0) { 142 save = curthread_pflags_set(TDP_SYNCIO); 143 vfs_msync(mp, MNT_NOWAIT); 144 VFS_SYNC(mp, MNT_NOWAIT); 145 curthread_pflags_restore(save); 146 vn_finished_write(mp); 147 } 148 mtx_lock(&mountlist_mtx); 149 nmp = TAILQ_NEXT(mp, mnt_list); 150 vfs_unbusy(mp); 151 } 152 mtx_unlock(&mountlist_mtx); 153 return (0); 154 } 155 156 /* 157 * Change filesystem quotas. 158 */ 159 #ifndef _SYS_SYSPROTO_H_ 160 struct quotactl_args { 161 char *path; 162 int cmd; 163 int uid; 164 caddr_t arg; 165 }; 166 #endif 167 int 168 sys_quotactl(td, uap) 169 struct thread *td; 170 register struct quotactl_args /* { 171 char *path; 172 int cmd; 173 int uid; 174 caddr_t arg; 175 } */ *uap; 176 { 177 struct mount *mp; 178 struct nameidata nd; 179 int error; 180 181 AUDIT_ARG_CMD(uap->cmd); 182 AUDIT_ARG_UID(uap->uid); 183 if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS)) 184 return (EPERM); 185 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 186 uap->path, td); 187 if ((error = namei(&nd)) != 0) 188 return (error); 189 NDFREE(&nd, NDF_ONLY_PNBUF); 190 mp = nd.ni_vp->v_mount; 191 vfs_ref(mp); 192 vput(nd.ni_vp); 193 error = vfs_busy(mp, 0); 194 vfs_rel(mp); 195 if (error != 0) 196 return (error); 197 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg); 198 199 /* 200 * Since quota on operation typically needs to open quota 201 * file, the Q_QUOTAON handler needs to unbusy the mount point 202 * before calling into namei. Otherwise, unmount might be 203 * started between two vfs_busy() invocations (first is our, 204 * second is from mount point cross-walk code in lookup()), 205 * causing deadlock. 206 * 207 * Require that Q_QUOTAON handles the vfs_busy() reference on 208 * its own, always returning with ubusied mount point. 209 */ 210 if ((uap->cmd >> SUBCMDSHIFT) != Q_QUOTAON) 211 vfs_unbusy(mp); 212 return (error); 213 } 214 215 /* 216 * Used by statfs conversion routines to scale the block size up if 217 * necessary so that all of the block counts are <= 'max_size'. Note 218 * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero 219 * value of 'n'. 220 */ 221 void 222 statfs_scale_blocks(struct statfs *sf, long max_size) 223 { 224 uint64_t count; 225 int shift; 226 227 KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__)); 228 229 /* 230 * Attempt to scale the block counts to give a more accurate 231 * overview to userland of the ratio of free space to used 232 * space. To do this, find the largest block count and compute 233 * a divisor that lets it fit into a signed integer <= max_size. 234 */ 235 if (sf->f_bavail < 0) 236 count = -sf->f_bavail; 237 else 238 count = sf->f_bavail; 239 count = MAX(sf->f_blocks, MAX(sf->f_bfree, count)); 240 if (count <= max_size) 241 return; 242 243 count >>= flsl(max_size); 244 shift = 0; 245 while (count > 0) { 246 shift++; 247 count >>=1; 248 } 249 250 sf->f_bsize <<= shift; 251 sf->f_blocks >>= shift; 252 sf->f_bfree >>= shift; 253 sf->f_bavail >>= shift; 254 } 255 256 /* 257 * Get filesystem statistics. 258 */ 259 #ifndef _SYS_SYSPROTO_H_ 260 struct statfs_args { 261 char *path; 262 struct statfs *buf; 263 }; 264 #endif 265 int 266 sys_statfs(td, uap) 267 struct thread *td; 268 register struct statfs_args /* { 269 char *path; 270 struct statfs *buf; 271 } */ *uap; 272 { 273 struct statfs sf; 274 int error; 275 276 error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf); 277 if (error == 0) 278 error = copyout(&sf, uap->buf, sizeof(sf)); 279 return (error); 280 } 281 282 int 283 kern_statfs(struct thread *td, char *path, enum uio_seg pathseg, 284 struct statfs *buf) 285 { 286 struct mount *mp; 287 struct statfs *sp, sb; 288 struct nameidata nd; 289 int error; 290 291 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 292 pathseg, path, td); 293 error = namei(&nd); 294 if (error != 0) 295 return (error); 296 mp = nd.ni_vp->v_mount; 297 vfs_ref(mp); 298 NDFREE(&nd, NDF_ONLY_PNBUF); 299 vput(nd.ni_vp); 300 error = vfs_busy(mp, 0); 301 vfs_rel(mp); 302 if (error != 0) 303 return (error); 304 #ifdef MAC 305 error = mac_mount_check_stat(td->td_ucred, mp); 306 if (error != 0) 307 goto out; 308 #endif 309 /* 310 * Set these in case the underlying filesystem fails to do so. 311 */ 312 sp = &mp->mnt_stat; 313 sp->f_version = STATFS_VERSION; 314 sp->f_namemax = NAME_MAX; 315 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 316 error = VFS_STATFS(mp, sp); 317 if (error != 0) 318 goto out; 319 if (priv_check(td, PRIV_VFS_GENERATION)) { 320 bcopy(sp, &sb, sizeof(sb)); 321 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 322 prison_enforce_statfs(td->td_ucred, mp, &sb); 323 sp = &sb; 324 } 325 *buf = *sp; 326 out: 327 vfs_unbusy(mp); 328 return (error); 329 } 330 331 /* 332 * Get filesystem statistics. 333 */ 334 #ifndef _SYS_SYSPROTO_H_ 335 struct fstatfs_args { 336 int fd; 337 struct statfs *buf; 338 }; 339 #endif 340 int 341 sys_fstatfs(td, uap) 342 struct thread *td; 343 register struct fstatfs_args /* { 344 int fd; 345 struct statfs *buf; 346 } */ *uap; 347 { 348 struct statfs sf; 349 int error; 350 351 error = kern_fstatfs(td, uap->fd, &sf); 352 if (error == 0) 353 error = copyout(&sf, uap->buf, sizeof(sf)); 354 return (error); 355 } 356 357 int 358 kern_fstatfs(struct thread *td, int fd, struct statfs *buf) 359 { 360 struct file *fp; 361 struct mount *mp; 362 struct statfs *sp, sb; 363 struct vnode *vp; 364 cap_rights_t rights; 365 int error; 366 367 AUDIT_ARG_FD(fd); 368 error = getvnode(td->td_proc->p_fd, fd, 369 cap_rights_init(&rights, CAP_FSTATFS), &fp); 370 if (error != 0) 371 return (error); 372 vp = fp->f_vnode; 373 vn_lock(vp, LK_SHARED | LK_RETRY); 374 #ifdef AUDIT 375 AUDIT_ARG_VNODE1(vp); 376 #endif 377 mp = vp->v_mount; 378 if (mp) 379 vfs_ref(mp); 380 VOP_UNLOCK(vp, 0); 381 fdrop(fp, td); 382 if (mp == NULL) { 383 error = EBADF; 384 goto out; 385 } 386 error = vfs_busy(mp, 0); 387 vfs_rel(mp); 388 if (error != 0) 389 return (error); 390 #ifdef MAC 391 error = mac_mount_check_stat(td->td_ucred, mp); 392 if (error != 0) 393 goto out; 394 #endif 395 /* 396 * Set these in case the underlying filesystem fails to do so. 397 */ 398 sp = &mp->mnt_stat; 399 sp->f_version = STATFS_VERSION; 400 sp->f_namemax = NAME_MAX; 401 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 402 error = VFS_STATFS(mp, sp); 403 if (error != 0) 404 goto out; 405 if (priv_check(td, PRIV_VFS_GENERATION)) { 406 bcopy(sp, &sb, sizeof(sb)); 407 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 408 prison_enforce_statfs(td->td_ucred, mp, &sb); 409 sp = &sb; 410 } 411 *buf = *sp; 412 out: 413 if (mp) 414 vfs_unbusy(mp); 415 return (error); 416 } 417 418 /* 419 * Get statistics on all filesystems. 420 */ 421 #ifndef _SYS_SYSPROTO_H_ 422 struct getfsstat_args { 423 struct statfs *buf; 424 long bufsize; 425 int flags; 426 }; 427 #endif 428 int 429 sys_getfsstat(td, uap) 430 struct thread *td; 431 register struct getfsstat_args /* { 432 struct statfs *buf; 433 long bufsize; 434 int flags; 435 } */ *uap; 436 { 437 size_t count; 438 int error; 439 440 error = kern_getfsstat(td, &uap->buf, uap->bufsize, &count, 441 UIO_USERSPACE, uap->flags); 442 if (error == 0) 443 td->td_retval[0] = count; 444 return (error); 445 } 446 447 /* 448 * If (bufsize > 0 && bufseg == UIO_SYSSPACE) 449 * The caller is responsible for freeing memory which will be allocated 450 * in '*buf'. 451 */ 452 int 453 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize, 454 size_t *countp, enum uio_seg bufseg, int flags) 455 { 456 struct mount *mp, *nmp; 457 struct statfs *sfsp, *sp, sb; 458 size_t count, maxcount; 459 int error; 460 461 maxcount = bufsize / sizeof(struct statfs); 462 if (bufsize == 0) 463 sfsp = NULL; 464 else if (bufseg == UIO_USERSPACE) 465 sfsp = *buf; 466 else /* if (bufseg == UIO_SYSSPACE) */ { 467 count = 0; 468 mtx_lock(&mountlist_mtx); 469 TAILQ_FOREACH(mp, &mountlist, mnt_list) { 470 count++; 471 } 472 mtx_unlock(&mountlist_mtx); 473 if (maxcount > count) 474 maxcount = count; 475 sfsp = *buf = malloc(maxcount * sizeof(struct statfs), M_TEMP, 476 M_WAITOK); 477 } 478 count = 0; 479 mtx_lock(&mountlist_mtx); 480 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 481 if (prison_canseemount(td->td_ucred, mp) != 0) { 482 nmp = TAILQ_NEXT(mp, mnt_list); 483 continue; 484 } 485 #ifdef MAC 486 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 487 nmp = TAILQ_NEXT(mp, mnt_list); 488 continue; 489 } 490 #endif 491 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 492 nmp = TAILQ_NEXT(mp, mnt_list); 493 continue; 494 } 495 if (sfsp && count < maxcount) { 496 sp = &mp->mnt_stat; 497 /* 498 * Set these in case the underlying filesystem 499 * fails to do so. 500 */ 501 sp->f_version = STATFS_VERSION; 502 sp->f_namemax = NAME_MAX; 503 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 504 /* 505 * If MNT_NOWAIT or MNT_LAZY is specified, do not 506 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 507 * overrides MNT_WAIT. 508 */ 509 if (((flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 510 (flags & MNT_WAIT)) && 511 (error = VFS_STATFS(mp, sp))) { 512 mtx_lock(&mountlist_mtx); 513 nmp = TAILQ_NEXT(mp, mnt_list); 514 vfs_unbusy(mp); 515 continue; 516 } 517 if (priv_check(td, PRIV_VFS_GENERATION)) { 518 bcopy(sp, &sb, sizeof(sb)); 519 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 520 prison_enforce_statfs(td->td_ucred, mp, &sb); 521 sp = &sb; 522 } 523 if (bufseg == UIO_SYSSPACE) 524 bcopy(sp, sfsp, sizeof(*sp)); 525 else /* if (bufseg == UIO_USERSPACE) */ { 526 error = copyout(sp, sfsp, sizeof(*sp)); 527 if (error != 0) { 528 vfs_unbusy(mp); 529 return (error); 530 } 531 } 532 sfsp++; 533 } 534 count++; 535 mtx_lock(&mountlist_mtx); 536 nmp = TAILQ_NEXT(mp, mnt_list); 537 vfs_unbusy(mp); 538 } 539 mtx_unlock(&mountlist_mtx); 540 if (sfsp && count > maxcount) 541 *countp = maxcount; 542 else 543 *countp = count; 544 return (0); 545 } 546 547 #ifdef COMPAT_FREEBSD4 548 /* 549 * Get old format filesystem statistics. 550 */ 551 static void cvtstatfs(struct statfs *, struct ostatfs *); 552 553 #ifndef _SYS_SYSPROTO_H_ 554 struct freebsd4_statfs_args { 555 char *path; 556 struct ostatfs *buf; 557 }; 558 #endif 559 int 560 freebsd4_statfs(td, uap) 561 struct thread *td; 562 struct freebsd4_statfs_args /* { 563 char *path; 564 struct ostatfs *buf; 565 } */ *uap; 566 { 567 struct ostatfs osb; 568 struct statfs sf; 569 int error; 570 571 error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf); 572 if (error != 0) 573 return (error); 574 cvtstatfs(&sf, &osb); 575 return (copyout(&osb, uap->buf, sizeof(osb))); 576 } 577 578 /* 579 * Get filesystem statistics. 580 */ 581 #ifndef _SYS_SYSPROTO_H_ 582 struct freebsd4_fstatfs_args { 583 int fd; 584 struct ostatfs *buf; 585 }; 586 #endif 587 int 588 freebsd4_fstatfs(td, uap) 589 struct thread *td; 590 struct freebsd4_fstatfs_args /* { 591 int fd; 592 struct ostatfs *buf; 593 } */ *uap; 594 { 595 struct ostatfs osb; 596 struct statfs sf; 597 int error; 598 599 error = kern_fstatfs(td, uap->fd, &sf); 600 if (error != 0) 601 return (error); 602 cvtstatfs(&sf, &osb); 603 return (copyout(&osb, uap->buf, sizeof(osb))); 604 } 605 606 /* 607 * Get statistics on all filesystems. 608 */ 609 #ifndef _SYS_SYSPROTO_H_ 610 struct freebsd4_getfsstat_args { 611 struct ostatfs *buf; 612 long bufsize; 613 int flags; 614 }; 615 #endif 616 int 617 freebsd4_getfsstat(td, uap) 618 struct thread *td; 619 register struct freebsd4_getfsstat_args /* { 620 struct ostatfs *buf; 621 long bufsize; 622 int flags; 623 } */ *uap; 624 { 625 struct statfs *buf, *sp; 626 struct ostatfs osb; 627 size_t count, size; 628 int error; 629 630 count = uap->bufsize / sizeof(struct ostatfs); 631 size = count * sizeof(struct statfs); 632 error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, 633 uap->flags); 634 if (size > 0) { 635 sp = buf; 636 while (count > 0 && error == 0) { 637 cvtstatfs(sp, &osb); 638 error = copyout(&osb, uap->buf, sizeof(osb)); 639 sp++; 640 uap->buf++; 641 count--; 642 } 643 free(buf, M_TEMP); 644 } 645 if (error == 0) 646 td->td_retval[0] = count; 647 return (error); 648 } 649 650 /* 651 * Implement fstatfs() for (NFS) file handles. 652 */ 653 #ifndef _SYS_SYSPROTO_H_ 654 struct freebsd4_fhstatfs_args { 655 struct fhandle *u_fhp; 656 struct ostatfs *buf; 657 }; 658 #endif 659 int 660 freebsd4_fhstatfs(td, uap) 661 struct thread *td; 662 struct freebsd4_fhstatfs_args /* { 663 struct fhandle *u_fhp; 664 struct ostatfs *buf; 665 } */ *uap; 666 { 667 struct ostatfs osb; 668 struct statfs sf; 669 fhandle_t fh; 670 int error; 671 672 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 673 if (error != 0) 674 return (error); 675 error = kern_fhstatfs(td, fh, &sf); 676 if (error != 0) 677 return (error); 678 cvtstatfs(&sf, &osb); 679 return (copyout(&osb, uap->buf, sizeof(osb))); 680 } 681 682 /* 683 * Convert a new format statfs structure to an old format statfs structure. 684 */ 685 static void 686 cvtstatfs(nsp, osp) 687 struct statfs *nsp; 688 struct ostatfs *osp; 689 { 690 691 statfs_scale_blocks(nsp, LONG_MAX); 692 bzero(osp, sizeof(*osp)); 693 osp->f_bsize = nsp->f_bsize; 694 osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX); 695 osp->f_blocks = nsp->f_blocks; 696 osp->f_bfree = nsp->f_bfree; 697 osp->f_bavail = nsp->f_bavail; 698 osp->f_files = MIN(nsp->f_files, LONG_MAX); 699 osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX); 700 osp->f_owner = nsp->f_owner; 701 osp->f_type = nsp->f_type; 702 osp->f_flags = nsp->f_flags; 703 osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX); 704 osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX); 705 osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX); 706 osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX); 707 strlcpy(osp->f_fstypename, nsp->f_fstypename, 708 MIN(MFSNAMELEN, OMFSNAMELEN)); 709 strlcpy(osp->f_mntonname, nsp->f_mntonname, 710 MIN(MNAMELEN, OMNAMELEN)); 711 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 712 MIN(MNAMELEN, OMNAMELEN)); 713 osp->f_fsid = nsp->f_fsid; 714 } 715 #endif /* COMPAT_FREEBSD4 */ 716 717 /* 718 * Change current working directory to a given file descriptor. 719 */ 720 #ifndef _SYS_SYSPROTO_H_ 721 struct fchdir_args { 722 int fd; 723 }; 724 #endif 725 int 726 sys_fchdir(td, uap) 727 struct thread *td; 728 struct fchdir_args /* { 729 int fd; 730 } */ *uap; 731 { 732 register struct filedesc *fdp = td->td_proc->p_fd; 733 struct vnode *vp, *tdp, *vpold; 734 struct mount *mp; 735 struct file *fp; 736 cap_rights_t rights; 737 int error; 738 739 AUDIT_ARG_FD(uap->fd); 740 error = getvnode(fdp, uap->fd, cap_rights_init(&rights, CAP_FCHDIR), 741 &fp); 742 if (error != 0) 743 return (error); 744 vp = fp->f_vnode; 745 VREF(vp); 746 fdrop(fp, td); 747 vn_lock(vp, LK_SHARED | LK_RETRY); 748 AUDIT_ARG_VNODE1(vp); 749 error = change_dir(vp, td); 750 while (!error && (mp = vp->v_mountedhere) != NULL) { 751 if (vfs_busy(mp, 0)) 752 continue; 753 error = VFS_ROOT(mp, LK_SHARED, &tdp); 754 vfs_unbusy(mp); 755 if (error != 0) 756 break; 757 vput(vp); 758 vp = tdp; 759 } 760 if (error != 0) { 761 vput(vp); 762 return (error); 763 } 764 VOP_UNLOCK(vp, 0); 765 FILEDESC_XLOCK(fdp); 766 vpold = fdp->fd_cdir; 767 fdp->fd_cdir = vp; 768 FILEDESC_XUNLOCK(fdp); 769 vrele(vpold); 770 return (0); 771 } 772 773 /* 774 * Change current working directory (``.''). 775 */ 776 #ifndef _SYS_SYSPROTO_H_ 777 struct chdir_args { 778 char *path; 779 }; 780 #endif 781 int 782 sys_chdir(td, uap) 783 struct thread *td; 784 struct chdir_args /* { 785 char *path; 786 } */ *uap; 787 { 788 789 return (kern_chdir(td, uap->path, UIO_USERSPACE)); 790 } 791 792 int 793 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg) 794 { 795 register struct filedesc *fdp = td->td_proc->p_fd; 796 struct nameidata nd; 797 struct vnode *vp; 798 int error; 799 800 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 801 pathseg, path, td); 802 if ((error = namei(&nd)) != 0) 803 return (error); 804 if ((error = change_dir(nd.ni_vp, td)) != 0) { 805 vput(nd.ni_vp); 806 NDFREE(&nd, NDF_ONLY_PNBUF); 807 return (error); 808 } 809 VOP_UNLOCK(nd.ni_vp, 0); 810 NDFREE(&nd, NDF_ONLY_PNBUF); 811 FILEDESC_XLOCK(fdp); 812 vp = fdp->fd_cdir; 813 fdp->fd_cdir = nd.ni_vp; 814 FILEDESC_XUNLOCK(fdp); 815 vrele(vp); 816 return (0); 817 } 818 819 /* 820 * Helper function for raised chroot(2) security function: Refuse if 821 * any filedescriptors are open directories. 822 */ 823 static int 824 chroot_refuse_vdir_fds(fdp) 825 struct filedesc *fdp; 826 { 827 struct vnode *vp; 828 struct file *fp; 829 int fd; 830 831 FILEDESC_LOCK_ASSERT(fdp); 832 833 for (fd = 0; fd <= fdp->fd_lastfile; fd++) { 834 fp = fget_locked(fdp, fd); 835 if (fp == NULL) 836 continue; 837 if (fp->f_type == DTYPE_VNODE) { 838 vp = fp->f_vnode; 839 if (vp->v_type == VDIR) 840 return (EPERM); 841 } 842 } 843 return (0); 844 } 845 846 /* 847 * This sysctl determines if we will allow a process to chroot(2) if it 848 * has a directory open: 849 * 0: disallowed for all processes. 850 * 1: allowed for processes that were not already chroot(2)'ed. 851 * 2: allowed for all processes. 852 */ 853 854 static int chroot_allow_open_directories = 1; 855 856 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW, 857 &chroot_allow_open_directories, 0, 858 "Allow a process to chroot(2) if it has a directory open"); 859 860 /* 861 * Change notion of root (``/'') directory. 862 */ 863 #ifndef _SYS_SYSPROTO_H_ 864 struct chroot_args { 865 char *path; 866 }; 867 #endif 868 int 869 sys_chroot(td, uap) 870 struct thread *td; 871 struct chroot_args /* { 872 char *path; 873 } */ *uap; 874 { 875 struct nameidata nd; 876 int error; 877 878 error = priv_check(td, PRIV_VFS_CHROOT); 879 if (error != 0) 880 return (error); 881 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 882 UIO_USERSPACE, uap->path, td); 883 error = namei(&nd); 884 if (error != 0) 885 goto error; 886 error = change_dir(nd.ni_vp, td); 887 if (error != 0) 888 goto e_vunlock; 889 #ifdef MAC 890 error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp); 891 if (error != 0) 892 goto e_vunlock; 893 #endif 894 VOP_UNLOCK(nd.ni_vp, 0); 895 error = change_root(nd.ni_vp, td); 896 vrele(nd.ni_vp); 897 NDFREE(&nd, NDF_ONLY_PNBUF); 898 return (error); 899 e_vunlock: 900 vput(nd.ni_vp); 901 error: 902 NDFREE(&nd, NDF_ONLY_PNBUF); 903 return (error); 904 } 905 906 /* 907 * Common routine for chroot and chdir. Callers must provide a locked vnode 908 * instance. 909 */ 910 int 911 change_dir(vp, td) 912 struct vnode *vp; 913 struct thread *td; 914 { 915 #ifdef MAC 916 int error; 917 #endif 918 919 ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked"); 920 if (vp->v_type != VDIR) 921 return (ENOTDIR); 922 #ifdef MAC 923 error = mac_vnode_check_chdir(td->td_ucred, vp); 924 if (error != 0) 925 return (error); 926 #endif 927 return (VOP_ACCESS(vp, VEXEC, td->td_ucred, td)); 928 } 929 930 /* 931 * Common routine for kern_chroot() and jail_attach(). The caller is 932 * responsible for invoking priv_check() and mac_vnode_check_chroot() to 933 * authorize this operation. 934 */ 935 int 936 change_root(vp, td) 937 struct vnode *vp; 938 struct thread *td; 939 { 940 struct filedesc *fdp; 941 struct vnode *oldvp; 942 int error; 943 944 fdp = td->td_proc->p_fd; 945 FILEDESC_XLOCK(fdp); 946 if (chroot_allow_open_directories == 0 || 947 (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) { 948 error = chroot_refuse_vdir_fds(fdp); 949 if (error != 0) { 950 FILEDESC_XUNLOCK(fdp); 951 return (error); 952 } 953 } 954 oldvp = fdp->fd_rdir; 955 fdp->fd_rdir = vp; 956 VREF(fdp->fd_rdir); 957 if (!fdp->fd_jdir) { 958 fdp->fd_jdir = vp; 959 VREF(fdp->fd_jdir); 960 } 961 FILEDESC_XUNLOCK(fdp); 962 vrele(oldvp); 963 return (0); 964 } 965 966 static __inline void 967 flags_to_rights(int flags, cap_rights_t *rightsp) 968 { 969 970 if (flags & O_EXEC) { 971 cap_rights_set(rightsp, CAP_FEXECVE); 972 } else { 973 switch ((flags & O_ACCMODE)) { 974 case O_RDONLY: 975 cap_rights_set(rightsp, CAP_READ); 976 break; 977 case O_RDWR: 978 cap_rights_set(rightsp, CAP_READ); 979 /* FALLTHROUGH */ 980 case O_WRONLY: 981 cap_rights_set(rightsp, CAP_WRITE); 982 if (!(flags & (O_APPEND | O_TRUNC))) 983 cap_rights_set(rightsp, CAP_SEEK); 984 break; 985 } 986 } 987 988 if (flags & O_CREAT) 989 cap_rights_set(rightsp, CAP_CREATE); 990 991 if (flags & O_TRUNC) 992 cap_rights_set(rightsp, CAP_FTRUNCATE); 993 994 if (flags & (O_SYNC | O_FSYNC)) 995 cap_rights_set(rightsp, CAP_FSYNC); 996 997 if (flags & (O_EXLOCK | O_SHLOCK)) 998 cap_rights_set(rightsp, CAP_FLOCK); 999 } 1000 1001 /* 1002 * Check permissions, allocate an open file structure, and call the device 1003 * open routine if any. 1004 */ 1005 #ifndef _SYS_SYSPROTO_H_ 1006 struct open_args { 1007 char *path; 1008 int flags; 1009 int mode; 1010 }; 1011 #endif 1012 int 1013 sys_open(td, uap) 1014 struct thread *td; 1015 register struct open_args /* { 1016 char *path; 1017 int flags; 1018 int mode; 1019 } */ *uap; 1020 { 1021 1022 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1023 uap->flags, uap->mode)); 1024 } 1025 1026 #ifndef _SYS_SYSPROTO_H_ 1027 struct openat_args { 1028 int fd; 1029 char *path; 1030 int flag; 1031 int mode; 1032 }; 1033 #endif 1034 int 1035 sys_openat(struct thread *td, struct openat_args *uap) 1036 { 1037 1038 return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 1039 uap->mode)); 1040 } 1041 1042 int 1043 kern_openat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1044 int flags, int mode) 1045 { 1046 struct proc *p = td->td_proc; 1047 struct filedesc *fdp = p->p_fd; 1048 struct file *fp; 1049 struct vnode *vp; 1050 struct nameidata nd; 1051 cap_rights_t rights; 1052 int cmode, error, indx; 1053 1054 indx = -1; 1055 1056 AUDIT_ARG_FFLAGS(flags); 1057 AUDIT_ARG_MODE(mode); 1058 /* XXX: audit dirfd */ 1059 cap_rights_init(&rights, CAP_LOOKUP); 1060 flags_to_rights(flags, &rights); 1061 /* 1062 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags 1063 * may be specified. 1064 */ 1065 if (flags & O_EXEC) { 1066 if (flags & O_ACCMODE) 1067 return (EINVAL); 1068 } else if ((flags & O_ACCMODE) == O_ACCMODE) { 1069 return (EINVAL); 1070 } else { 1071 flags = FFLAGS(flags); 1072 } 1073 1074 /* 1075 * Allocate the file descriptor, but don't install a descriptor yet. 1076 */ 1077 error = falloc_noinstall(td, &fp); 1078 if (error != 0) 1079 return (error); 1080 /* 1081 * An extra reference on `fp' has been held for us by 1082 * falloc_noinstall(). 1083 */ 1084 /* Set the flags early so the finit in devfs can pick them up. */ 1085 fp->f_flag = flags & FMASK; 1086 cmode = ((mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; 1087 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 1088 &rights, td); 1089 td->td_dupfd = -1; /* XXX check for fdopen */ 1090 error = vn_open(&nd, &flags, cmode, fp); 1091 if (error != 0) { 1092 /* 1093 * If the vn_open replaced the method vector, something 1094 * wonderous happened deep below and we just pass it up 1095 * pretending we know what we do. 1096 */ 1097 if (error == ENXIO && fp->f_ops != &badfileops) 1098 goto success; 1099 1100 /* 1101 * Handle special fdopen() case. bleh. 1102 * 1103 * Don't do this for relative (capability) lookups; we don't 1104 * understand exactly what would happen, and we don't think 1105 * that it ever should. 1106 */ 1107 if (nd.ni_strictrelative == 0 && 1108 (error == ENODEV || error == ENXIO) && 1109 td->td_dupfd >= 0) { 1110 error = dupfdopen(td, fdp, td->td_dupfd, flags, error, 1111 &indx); 1112 if (error == 0) 1113 goto success; 1114 } 1115 1116 goto bad; 1117 } 1118 td->td_dupfd = 0; 1119 NDFREE(&nd, NDF_ONLY_PNBUF); 1120 vp = nd.ni_vp; 1121 1122 /* 1123 * Store the vnode, for any f_type. Typically, the vnode use 1124 * count is decremented by direct call to vn_closefile() for 1125 * files that switched type in the cdevsw fdopen() method. 1126 */ 1127 fp->f_vnode = vp; 1128 /* 1129 * If the file wasn't claimed by devfs bind it to the normal 1130 * vnode operations here. 1131 */ 1132 if (fp->f_ops == &badfileops) { 1133 KASSERT(vp->v_type != VFIFO, ("Unexpected fifo.")); 1134 fp->f_seqcount = 1; 1135 finit(fp, (flags & FMASK) | (fp->f_flag & FHASLOCK), 1136 DTYPE_VNODE, vp, &vnops); 1137 } 1138 1139 VOP_UNLOCK(vp, 0); 1140 if (flags & O_TRUNC) { 1141 error = fo_truncate(fp, 0, td->td_ucred, td); 1142 if (error != 0) 1143 goto bad; 1144 } 1145 success: 1146 /* 1147 * If we haven't already installed the FD (for dupfdopen), do so now. 1148 */ 1149 if (indx == -1) { 1150 struct filecaps *fcaps; 1151 1152 #ifdef CAPABILITIES 1153 if (nd.ni_strictrelative == 1) 1154 fcaps = &nd.ni_filecaps; 1155 else 1156 #endif 1157 fcaps = NULL; 1158 error = finstall(td, fp, &indx, flags, fcaps); 1159 /* On success finstall() consumes fcaps. */ 1160 if (error != 0) { 1161 filecaps_free(&nd.ni_filecaps); 1162 goto bad; 1163 } 1164 } else { 1165 filecaps_free(&nd.ni_filecaps); 1166 } 1167 1168 /* 1169 * Release our private reference, leaving the one associated with 1170 * the descriptor table intact. 1171 */ 1172 fdrop(fp, td); 1173 td->td_retval[0] = indx; 1174 return (0); 1175 bad: 1176 KASSERT(indx == -1, ("indx=%d, should be -1", indx)); 1177 fdrop(fp, td); 1178 return (error); 1179 } 1180 1181 #ifdef COMPAT_43 1182 /* 1183 * Create a file. 1184 */ 1185 #ifndef _SYS_SYSPROTO_H_ 1186 struct ocreat_args { 1187 char *path; 1188 int mode; 1189 }; 1190 #endif 1191 int 1192 ocreat(td, uap) 1193 struct thread *td; 1194 register struct ocreat_args /* { 1195 char *path; 1196 int mode; 1197 } */ *uap; 1198 { 1199 1200 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1201 O_WRONLY | O_CREAT | O_TRUNC, uap->mode)); 1202 } 1203 #endif /* COMPAT_43 */ 1204 1205 /* 1206 * Create a special file. 1207 */ 1208 #ifndef _SYS_SYSPROTO_H_ 1209 struct mknod_args { 1210 char *path; 1211 int mode; 1212 int dev; 1213 }; 1214 #endif 1215 int 1216 sys_mknod(td, uap) 1217 struct thread *td; 1218 register struct mknod_args /* { 1219 char *path; 1220 int mode; 1221 int dev; 1222 } */ *uap; 1223 { 1224 1225 return (kern_mknodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1226 uap->mode, uap->dev)); 1227 } 1228 1229 #ifndef _SYS_SYSPROTO_H_ 1230 struct mknodat_args { 1231 int fd; 1232 char *path; 1233 mode_t mode; 1234 dev_t dev; 1235 }; 1236 #endif 1237 int 1238 sys_mknodat(struct thread *td, struct mknodat_args *uap) 1239 { 1240 1241 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1242 uap->dev)); 1243 } 1244 1245 int 1246 kern_mknodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1247 int mode, int dev) 1248 { 1249 struct vnode *vp; 1250 struct mount *mp; 1251 struct vattr vattr; 1252 struct nameidata nd; 1253 cap_rights_t rights; 1254 int error, whiteout = 0; 1255 1256 AUDIT_ARG_MODE(mode); 1257 AUDIT_ARG_DEV(dev); 1258 switch (mode & S_IFMT) { 1259 case S_IFCHR: 1260 case S_IFBLK: 1261 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 1262 break; 1263 case S_IFMT: 1264 error = priv_check(td, PRIV_VFS_MKNOD_BAD); 1265 break; 1266 case S_IFWHT: 1267 error = priv_check(td, PRIV_VFS_MKNOD_WHT); 1268 break; 1269 case S_IFIFO: 1270 if (dev == 0) 1271 return (kern_mkfifoat(td, fd, path, pathseg, mode)); 1272 /* FALLTHROUGH */ 1273 default: 1274 error = EINVAL; 1275 break; 1276 } 1277 if (error != 0) 1278 return (error); 1279 restart: 1280 bwillwrite(); 1281 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1282 NOCACHE, pathseg, path, fd, cap_rights_init(&rights, CAP_MKNODAT), 1283 td); 1284 if ((error = namei(&nd)) != 0) 1285 return (error); 1286 vp = nd.ni_vp; 1287 if (vp != NULL) { 1288 NDFREE(&nd, NDF_ONLY_PNBUF); 1289 if (vp == nd.ni_dvp) 1290 vrele(nd.ni_dvp); 1291 else 1292 vput(nd.ni_dvp); 1293 vrele(vp); 1294 return (EEXIST); 1295 } else { 1296 VATTR_NULL(&vattr); 1297 vattr.va_mode = (mode & ALLPERMS) & 1298 ~td->td_proc->p_fd->fd_cmask; 1299 vattr.va_rdev = dev; 1300 whiteout = 0; 1301 1302 switch (mode & S_IFMT) { 1303 case S_IFMT: /* used by badsect to flag bad sectors */ 1304 vattr.va_type = VBAD; 1305 break; 1306 case S_IFCHR: 1307 vattr.va_type = VCHR; 1308 break; 1309 case S_IFBLK: 1310 vattr.va_type = VBLK; 1311 break; 1312 case S_IFWHT: 1313 whiteout = 1; 1314 break; 1315 default: 1316 panic("kern_mknod: invalid mode"); 1317 } 1318 } 1319 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1320 NDFREE(&nd, NDF_ONLY_PNBUF); 1321 vput(nd.ni_dvp); 1322 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1323 return (error); 1324 goto restart; 1325 } 1326 #ifdef MAC 1327 if (error == 0 && !whiteout) 1328 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, 1329 &nd.ni_cnd, &vattr); 1330 #endif 1331 if (error == 0) { 1332 if (whiteout) 1333 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1334 else { 1335 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1336 &nd.ni_cnd, &vattr); 1337 if (error == 0) 1338 vput(nd.ni_vp); 1339 } 1340 } 1341 NDFREE(&nd, NDF_ONLY_PNBUF); 1342 vput(nd.ni_dvp); 1343 vn_finished_write(mp); 1344 return (error); 1345 } 1346 1347 /* 1348 * Create a named pipe. 1349 */ 1350 #ifndef _SYS_SYSPROTO_H_ 1351 struct mkfifo_args { 1352 char *path; 1353 int mode; 1354 }; 1355 #endif 1356 int 1357 sys_mkfifo(td, uap) 1358 struct thread *td; 1359 register struct mkfifo_args /* { 1360 char *path; 1361 int mode; 1362 } */ *uap; 1363 { 1364 1365 return (kern_mkfifoat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1366 uap->mode)); 1367 } 1368 1369 #ifndef _SYS_SYSPROTO_H_ 1370 struct mkfifoat_args { 1371 int fd; 1372 char *path; 1373 mode_t mode; 1374 }; 1375 #endif 1376 int 1377 sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap) 1378 { 1379 1380 return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE, 1381 uap->mode)); 1382 } 1383 1384 int 1385 kern_mkfifoat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1386 int mode) 1387 { 1388 struct mount *mp; 1389 struct vattr vattr; 1390 struct nameidata nd; 1391 cap_rights_t rights; 1392 int error; 1393 1394 AUDIT_ARG_MODE(mode); 1395 restart: 1396 bwillwrite(); 1397 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1398 NOCACHE, pathseg, path, fd, cap_rights_init(&rights, CAP_MKFIFOAT), 1399 td); 1400 if ((error = namei(&nd)) != 0) 1401 return (error); 1402 if (nd.ni_vp != NULL) { 1403 NDFREE(&nd, NDF_ONLY_PNBUF); 1404 if (nd.ni_vp == nd.ni_dvp) 1405 vrele(nd.ni_dvp); 1406 else 1407 vput(nd.ni_dvp); 1408 vrele(nd.ni_vp); 1409 return (EEXIST); 1410 } 1411 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1412 NDFREE(&nd, NDF_ONLY_PNBUF); 1413 vput(nd.ni_dvp); 1414 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1415 return (error); 1416 goto restart; 1417 } 1418 VATTR_NULL(&vattr); 1419 vattr.va_type = VFIFO; 1420 vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask; 1421 #ifdef MAC 1422 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1423 &vattr); 1424 if (error != 0) 1425 goto out; 1426 #endif 1427 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 1428 if (error == 0) 1429 vput(nd.ni_vp); 1430 #ifdef MAC 1431 out: 1432 #endif 1433 vput(nd.ni_dvp); 1434 vn_finished_write(mp); 1435 NDFREE(&nd, NDF_ONLY_PNBUF); 1436 return (error); 1437 } 1438 1439 /* 1440 * Make a hard file link. 1441 */ 1442 #ifndef _SYS_SYSPROTO_H_ 1443 struct link_args { 1444 char *path; 1445 char *link; 1446 }; 1447 #endif 1448 int 1449 sys_link(td, uap) 1450 struct thread *td; 1451 register struct link_args /* { 1452 char *path; 1453 char *link; 1454 } */ *uap; 1455 { 1456 1457 return (kern_linkat(td, AT_FDCWD, AT_FDCWD, uap->path, uap->link, 1458 UIO_USERSPACE, FOLLOW)); 1459 } 1460 1461 #ifndef _SYS_SYSPROTO_H_ 1462 struct linkat_args { 1463 int fd1; 1464 char *path1; 1465 int fd2; 1466 char *path2; 1467 int flag; 1468 }; 1469 #endif 1470 int 1471 sys_linkat(struct thread *td, struct linkat_args *uap) 1472 { 1473 int flag; 1474 1475 flag = uap->flag; 1476 if (flag & ~AT_SYMLINK_FOLLOW) 1477 return (EINVAL); 1478 1479 return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2, 1480 UIO_USERSPACE, (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW)); 1481 } 1482 1483 int hardlink_check_uid = 0; 1484 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 1485 &hardlink_check_uid, 0, 1486 "Unprivileged processes cannot create hard links to files owned by other " 1487 "users"); 1488 static int hardlink_check_gid = 0; 1489 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 1490 &hardlink_check_gid, 0, 1491 "Unprivileged processes cannot create hard links to files owned by other " 1492 "groups"); 1493 1494 static int 1495 can_hardlink(struct vnode *vp, struct ucred *cred) 1496 { 1497 struct vattr va; 1498 int error; 1499 1500 if (!hardlink_check_uid && !hardlink_check_gid) 1501 return (0); 1502 1503 error = VOP_GETATTR(vp, &va, cred); 1504 if (error != 0) 1505 return (error); 1506 1507 if (hardlink_check_uid && cred->cr_uid != va.va_uid) { 1508 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1509 if (error != 0) 1510 return (error); 1511 } 1512 1513 if (hardlink_check_gid && !groupmember(va.va_gid, cred)) { 1514 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1515 if (error != 0) 1516 return (error); 1517 } 1518 1519 return (0); 1520 } 1521 1522 int 1523 kern_linkat(struct thread *td, int fd1, int fd2, char *path1, char *path2, 1524 enum uio_seg segflg, int follow) 1525 { 1526 struct vnode *vp; 1527 struct mount *mp; 1528 struct nameidata nd; 1529 cap_rights_t rights; 1530 int error; 1531 1532 again: 1533 bwillwrite(); 1534 NDINIT_AT(&nd, LOOKUP, follow | AUDITVNODE1, segflg, path1, fd1, td); 1535 1536 if ((error = namei(&nd)) != 0) 1537 return (error); 1538 NDFREE(&nd, NDF_ONLY_PNBUF); 1539 vp = nd.ni_vp; 1540 if (vp->v_type == VDIR) { 1541 vrele(vp); 1542 return (EPERM); /* POSIX */ 1543 } 1544 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE2 | 1545 NOCACHE, segflg, path2, fd2, cap_rights_init(&rights, CAP_LINKAT), 1546 td); 1547 if ((error = namei(&nd)) == 0) { 1548 if (nd.ni_vp != NULL) { 1549 NDFREE(&nd, NDF_ONLY_PNBUF); 1550 if (nd.ni_dvp == nd.ni_vp) 1551 vrele(nd.ni_dvp); 1552 else 1553 vput(nd.ni_dvp); 1554 vrele(nd.ni_vp); 1555 vrele(vp); 1556 return (EEXIST); 1557 } else if (nd.ni_dvp->v_mount != vp->v_mount) { 1558 /* 1559 * Cross-device link. No need to recheck 1560 * vp->v_type, since it cannot change, except 1561 * to VBAD. 1562 */ 1563 NDFREE(&nd, NDF_ONLY_PNBUF); 1564 vput(nd.ni_dvp); 1565 vrele(vp); 1566 return (EXDEV); 1567 } else if ((error = vn_lock(vp, LK_EXCLUSIVE)) == 0) { 1568 error = can_hardlink(vp, td->td_ucred); 1569 #ifdef MAC 1570 if (error == 0) 1571 error = mac_vnode_check_link(td->td_ucred, 1572 nd.ni_dvp, vp, &nd.ni_cnd); 1573 #endif 1574 if (error != 0) { 1575 vput(vp); 1576 vput(nd.ni_dvp); 1577 NDFREE(&nd, NDF_ONLY_PNBUF); 1578 return (error); 1579 } 1580 error = vn_start_write(vp, &mp, V_NOWAIT); 1581 if (error != 0) { 1582 vput(vp); 1583 vput(nd.ni_dvp); 1584 NDFREE(&nd, NDF_ONLY_PNBUF); 1585 error = vn_start_write(NULL, &mp, 1586 V_XSLEEP | PCATCH); 1587 if (error != 0) 1588 return (error); 1589 goto again; 1590 } 1591 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 1592 VOP_UNLOCK(vp, 0); 1593 vput(nd.ni_dvp); 1594 vn_finished_write(mp); 1595 NDFREE(&nd, NDF_ONLY_PNBUF); 1596 } else { 1597 vput(nd.ni_dvp); 1598 NDFREE(&nd, NDF_ONLY_PNBUF); 1599 vrele(vp); 1600 goto again; 1601 } 1602 } 1603 vrele(vp); 1604 return (error); 1605 } 1606 1607 /* 1608 * Make a symbolic link. 1609 */ 1610 #ifndef _SYS_SYSPROTO_H_ 1611 struct symlink_args { 1612 char *path; 1613 char *link; 1614 }; 1615 #endif 1616 int 1617 sys_symlink(td, uap) 1618 struct thread *td; 1619 register struct symlink_args /* { 1620 char *path; 1621 char *link; 1622 } */ *uap; 1623 { 1624 1625 return (kern_symlinkat(td, uap->path, AT_FDCWD, uap->link, 1626 UIO_USERSPACE)); 1627 } 1628 1629 #ifndef _SYS_SYSPROTO_H_ 1630 struct symlinkat_args { 1631 char *path; 1632 int fd; 1633 char *path2; 1634 }; 1635 #endif 1636 int 1637 sys_symlinkat(struct thread *td, struct symlinkat_args *uap) 1638 { 1639 1640 return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2, 1641 UIO_USERSPACE)); 1642 } 1643 1644 int 1645 kern_symlinkat(struct thread *td, char *path1, int fd, char *path2, 1646 enum uio_seg segflg) 1647 { 1648 struct mount *mp; 1649 struct vattr vattr; 1650 char *syspath; 1651 struct nameidata nd; 1652 int error; 1653 cap_rights_t rights; 1654 1655 if (segflg == UIO_SYSSPACE) { 1656 syspath = path1; 1657 } else { 1658 syspath = uma_zalloc(namei_zone, M_WAITOK); 1659 if ((error = copyinstr(path1, syspath, MAXPATHLEN, NULL)) != 0) 1660 goto out; 1661 } 1662 AUDIT_ARG_TEXT(syspath); 1663 restart: 1664 bwillwrite(); 1665 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1666 NOCACHE, segflg, path2, fd, cap_rights_init(&rights, CAP_SYMLINKAT), 1667 td); 1668 if ((error = namei(&nd)) != 0) 1669 goto out; 1670 if (nd.ni_vp) { 1671 NDFREE(&nd, NDF_ONLY_PNBUF); 1672 if (nd.ni_vp == nd.ni_dvp) 1673 vrele(nd.ni_dvp); 1674 else 1675 vput(nd.ni_dvp); 1676 vrele(nd.ni_vp); 1677 error = EEXIST; 1678 goto out; 1679 } 1680 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1681 NDFREE(&nd, NDF_ONLY_PNBUF); 1682 vput(nd.ni_dvp); 1683 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1684 goto out; 1685 goto restart; 1686 } 1687 VATTR_NULL(&vattr); 1688 vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask; 1689 #ifdef MAC 1690 vattr.va_type = VLNK; 1691 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1692 &vattr); 1693 if (error != 0) 1694 goto out2; 1695 #endif 1696 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath); 1697 if (error == 0) 1698 vput(nd.ni_vp); 1699 #ifdef MAC 1700 out2: 1701 #endif 1702 NDFREE(&nd, NDF_ONLY_PNBUF); 1703 vput(nd.ni_dvp); 1704 vn_finished_write(mp); 1705 out: 1706 if (segflg != UIO_SYSSPACE) 1707 uma_zfree(namei_zone, syspath); 1708 return (error); 1709 } 1710 1711 /* 1712 * Delete a whiteout from the filesystem. 1713 */ 1714 int 1715 sys_undelete(td, uap) 1716 struct thread *td; 1717 register struct undelete_args /* { 1718 char *path; 1719 } */ *uap; 1720 { 1721 struct mount *mp; 1722 struct nameidata nd; 1723 int error; 1724 1725 restart: 1726 bwillwrite(); 1727 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | AUDITVNODE1, 1728 UIO_USERSPACE, uap->path, td); 1729 error = namei(&nd); 1730 if (error != 0) 1731 return (error); 1732 1733 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 1734 NDFREE(&nd, NDF_ONLY_PNBUF); 1735 if (nd.ni_vp == nd.ni_dvp) 1736 vrele(nd.ni_dvp); 1737 else 1738 vput(nd.ni_dvp); 1739 if (nd.ni_vp) 1740 vrele(nd.ni_vp); 1741 return (EEXIST); 1742 } 1743 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1744 NDFREE(&nd, NDF_ONLY_PNBUF); 1745 vput(nd.ni_dvp); 1746 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1747 return (error); 1748 goto restart; 1749 } 1750 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE); 1751 NDFREE(&nd, NDF_ONLY_PNBUF); 1752 vput(nd.ni_dvp); 1753 vn_finished_write(mp); 1754 return (error); 1755 } 1756 1757 /* 1758 * Delete a name from the filesystem. 1759 */ 1760 #ifndef _SYS_SYSPROTO_H_ 1761 struct unlink_args { 1762 char *path; 1763 }; 1764 #endif 1765 int 1766 sys_unlink(td, uap) 1767 struct thread *td; 1768 struct unlink_args /* { 1769 char *path; 1770 } */ *uap; 1771 { 1772 1773 return (kern_unlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 0)); 1774 } 1775 1776 #ifndef _SYS_SYSPROTO_H_ 1777 struct unlinkat_args { 1778 int fd; 1779 char *path; 1780 int flag; 1781 }; 1782 #endif 1783 int 1784 sys_unlinkat(struct thread *td, struct unlinkat_args *uap) 1785 { 1786 int flag = uap->flag; 1787 int fd = uap->fd; 1788 char *path = uap->path; 1789 1790 if (flag & ~AT_REMOVEDIR) 1791 return (EINVAL); 1792 1793 if (flag & AT_REMOVEDIR) 1794 return (kern_rmdirat(td, fd, path, UIO_USERSPACE)); 1795 else 1796 return (kern_unlinkat(td, fd, path, UIO_USERSPACE, 0)); 1797 } 1798 1799 int 1800 kern_unlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1801 ino_t oldinum) 1802 { 1803 struct mount *mp; 1804 struct vnode *vp; 1805 struct nameidata nd; 1806 struct stat sb; 1807 cap_rights_t rights; 1808 int error; 1809 1810 restart: 1811 bwillwrite(); 1812 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 1813 pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td); 1814 if ((error = namei(&nd)) != 0) 1815 return (error == EINVAL ? EPERM : error); 1816 vp = nd.ni_vp; 1817 if (vp->v_type == VDIR && oldinum == 0) { 1818 error = EPERM; /* POSIX */ 1819 } else if (oldinum != 0 && 1820 ((error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td)) == 0) && 1821 sb.st_ino != oldinum) { 1822 error = EIDRM; /* Identifier removed */ 1823 } else { 1824 /* 1825 * The root of a mounted filesystem cannot be deleted. 1826 * 1827 * XXX: can this only be a VDIR case? 1828 */ 1829 if (vp->v_vflag & VV_ROOT) 1830 error = EBUSY; 1831 } 1832 if (error == 0) { 1833 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1834 NDFREE(&nd, NDF_ONLY_PNBUF); 1835 vput(nd.ni_dvp); 1836 if (vp == nd.ni_dvp) 1837 vrele(vp); 1838 else 1839 vput(vp); 1840 if ((error = vn_start_write(NULL, &mp, 1841 V_XSLEEP | PCATCH)) != 0) 1842 return (error); 1843 goto restart; 1844 } 1845 #ifdef MAC 1846 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 1847 &nd.ni_cnd); 1848 if (error != 0) 1849 goto out; 1850 #endif 1851 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 1852 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 1853 #ifdef MAC 1854 out: 1855 #endif 1856 vn_finished_write(mp); 1857 } 1858 NDFREE(&nd, NDF_ONLY_PNBUF); 1859 vput(nd.ni_dvp); 1860 if (vp == nd.ni_dvp) 1861 vrele(vp); 1862 else 1863 vput(vp); 1864 return (error); 1865 } 1866 1867 /* 1868 * Reposition read/write file offset. 1869 */ 1870 #ifndef _SYS_SYSPROTO_H_ 1871 struct lseek_args { 1872 int fd; 1873 int pad; 1874 off_t offset; 1875 int whence; 1876 }; 1877 #endif 1878 int 1879 sys_lseek(td, uap) 1880 struct thread *td; 1881 register struct lseek_args /* { 1882 int fd; 1883 int pad; 1884 off_t offset; 1885 int whence; 1886 } */ *uap; 1887 { 1888 struct file *fp; 1889 cap_rights_t rights; 1890 int error; 1891 1892 AUDIT_ARG_FD(uap->fd); 1893 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_SEEK), &fp); 1894 if (error != 0) 1895 return (error); 1896 error = (fp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0 ? 1897 fo_seek(fp, uap->offset, uap->whence, td) : ESPIPE; 1898 fdrop(fp, td); 1899 return (error); 1900 } 1901 1902 #if defined(COMPAT_43) 1903 /* 1904 * Reposition read/write file offset. 1905 */ 1906 #ifndef _SYS_SYSPROTO_H_ 1907 struct olseek_args { 1908 int fd; 1909 long offset; 1910 int whence; 1911 }; 1912 #endif 1913 int 1914 olseek(td, uap) 1915 struct thread *td; 1916 register struct olseek_args /* { 1917 int fd; 1918 long offset; 1919 int whence; 1920 } */ *uap; 1921 { 1922 struct lseek_args /* { 1923 int fd; 1924 int pad; 1925 off_t offset; 1926 int whence; 1927 } */ nuap; 1928 1929 nuap.fd = uap->fd; 1930 nuap.offset = uap->offset; 1931 nuap.whence = uap->whence; 1932 return (sys_lseek(td, &nuap)); 1933 } 1934 #endif /* COMPAT_43 */ 1935 1936 #if defined(COMPAT_FREEBSD6) 1937 /* Version with the 'pad' argument */ 1938 int 1939 freebsd6_lseek(td, uap) 1940 struct thread *td; 1941 register struct freebsd6_lseek_args *uap; 1942 { 1943 struct lseek_args ouap; 1944 1945 ouap.fd = uap->fd; 1946 ouap.offset = uap->offset; 1947 ouap.whence = uap->whence; 1948 return (sys_lseek(td, &ouap)); 1949 } 1950 #endif 1951 1952 /* 1953 * Check access permissions using passed credentials. 1954 */ 1955 static int 1956 vn_access(vp, user_flags, cred, td) 1957 struct vnode *vp; 1958 int user_flags; 1959 struct ucred *cred; 1960 struct thread *td; 1961 { 1962 accmode_t accmode; 1963 int error; 1964 1965 /* Flags == 0 means only check for existence. */ 1966 if (user_flags == 0) 1967 return (0); 1968 1969 accmode = 0; 1970 if (user_flags & R_OK) 1971 accmode |= VREAD; 1972 if (user_flags & W_OK) 1973 accmode |= VWRITE; 1974 if (user_flags & X_OK) 1975 accmode |= VEXEC; 1976 #ifdef MAC 1977 error = mac_vnode_check_access(cred, vp, accmode); 1978 if (error != 0) 1979 return (error); 1980 #endif 1981 if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) 1982 error = VOP_ACCESS(vp, accmode, cred, td); 1983 return (error); 1984 } 1985 1986 /* 1987 * Check access permissions using "real" credentials. 1988 */ 1989 #ifndef _SYS_SYSPROTO_H_ 1990 struct access_args { 1991 char *path; 1992 int amode; 1993 }; 1994 #endif 1995 int 1996 sys_access(td, uap) 1997 struct thread *td; 1998 register struct access_args /* { 1999 char *path; 2000 int amode; 2001 } */ *uap; 2002 { 2003 2004 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2005 0, uap->amode)); 2006 } 2007 2008 #ifndef _SYS_SYSPROTO_H_ 2009 struct faccessat_args { 2010 int dirfd; 2011 char *path; 2012 int amode; 2013 int flag; 2014 } 2015 #endif 2016 int 2017 sys_faccessat(struct thread *td, struct faccessat_args *uap) 2018 { 2019 2020 return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 2021 uap->amode)); 2022 } 2023 2024 int 2025 kern_accessat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2026 int flag, int amode) 2027 { 2028 struct ucred *cred, *usecred; 2029 struct vnode *vp; 2030 struct nameidata nd; 2031 cap_rights_t rights; 2032 int error; 2033 2034 if (flag & ~AT_EACCESS) 2035 return (EINVAL); 2036 if (amode != F_OK && (amode & ~(R_OK | W_OK | X_OK)) != 0) 2037 return (EINVAL); 2038 2039 /* 2040 * Create and modify a temporary credential instead of one that 2041 * is potentially shared (if we need one). 2042 */ 2043 cred = td->td_ucred; 2044 if ((flag & AT_EACCESS) == 0 && 2045 ((cred->cr_uid != cred->cr_ruid || 2046 cred->cr_rgid != cred->cr_groups[0]))) { 2047 usecred = crdup(cred); 2048 usecred->cr_uid = cred->cr_ruid; 2049 usecred->cr_groups[0] = cred->cr_rgid; 2050 td->td_ucred = usecred; 2051 } else 2052 usecred = cred; 2053 AUDIT_ARG_VALUE(amode); 2054 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | 2055 AUDITVNODE1, pathseg, path, fd, cap_rights_init(&rights, CAP_FSTAT), 2056 td); 2057 if ((error = namei(&nd)) != 0) 2058 goto out; 2059 vp = nd.ni_vp; 2060 2061 error = vn_access(vp, amode, usecred, td); 2062 NDFREE(&nd, NDF_ONLY_PNBUF); 2063 vput(vp); 2064 out: 2065 if (usecred != cred) { 2066 td->td_ucred = cred; 2067 crfree(usecred); 2068 } 2069 return (error); 2070 } 2071 2072 /* 2073 * Check access permissions using "effective" credentials. 2074 */ 2075 #ifndef _SYS_SYSPROTO_H_ 2076 struct eaccess_args { 2077 char *path; 2078 int amode; 2079 }; 2080 #endif 2081 int 2082 sys_eaccess(td, uap) 2083 struct thread *td; 2084 register struct eaccess_args /* { 2085 char *path; 2086 int amode; 2087 } */ *uap; 2088 { 2089 2090 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2091 AT_EACCESS, uap->amode)); 2092 } 2093 2094 #if defined(COMPAT_43) 2095 /* 2096 * Get file status; this version follows links. 2097 */ 2098 #ifndef _SYS_SYSPROTO_H_ 2099 struct ostat_args { 2100 char *path; 2101 struct ostat *ub; 2102 }; 2103 #endif 2104 int 2105 ostat(td, uap) 2106 struct thread *td; 2107 register struct ostat_args /* { 2108 char *path; 2109 struct ostat *ub; 2110 } */ *uap; 2111 { 2112 struct stat sb; 2113 struct ostat osb; 2114 int error; 2115 2116 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2117 &sb, NULL); 2118 if (error != 0) 2119 return (error); 2120 cvtstat(&sb, &osb); 2121 return (copyout(&osb, uap->ub, sizeof (osb))); 2122 } 2123 2124 /* 2125 * Get file status; this version does not follow links. 2126 */ 2127 #ifndef _SYS_SYSPROTO_H_ 2128 struct olstat_args { 2129 char *path; 2130 struct ostat *ub; 2131 }; 2132 #endif 2133 int 2134 olstat(td, uap) 2135 struct thread *td; 2136 register struct olstat_args /* { 2137 char *path; 2138 struct ostat *ub; 2139 } */ *uap; 2140 { 2141 struct stat sb; 2142 struct ostat osb; 2143 int error; 2144 2145 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2146 UIO_USERSPACE, &sb, NULL); 2147 if (error != 0) 2148 return (error); 2149 cvtstat(&sb, &osb); 2150 return (copyout(&osb, uap->ub, sizeof (osb))); 2151 } 2152 2153 /* 2154 * Convert from an old to a new stat structure. 2155 */ 2156 void 2157 cvtstat(st, ost) 2158 struct stat *st; 2159 struct ostat *ost; 2160 { 2161 2162 ost->st_dev = st->st_dev; 2163 ost->st_ino = st->st_ino; 2164 ost->st_mode = st->st_mode; 2165 ost->st_nlink = st->st_nlink; 2166 ost->st_uid = st->st_uid; 2167 ost->st_gid = st->st_gid; 2168 ost->st_rdev = st->st_rdev; 2169 if (st->st_size < (quad_t)1 << 32) 2170 ost->st_size = st->st_size; 2171 else 2172 ost->st_size = -2; 2173 ost->st_atim = st->st_atim; 2174 ost->st_mtim = st->st_mtim; 2175 ost->st_ctim = st->st_ctim; 2176 ost->st_blksize = st->st_blksize; 2177 ost->st_blocks = st->st_blocks; 2178 ost->st_flags = st->st_flags; 2179 ost->st_gen = st->st_gen; 2180 } 2181 #endif /* COMPAT_43 */ 2182 2183 /* 2184 * Get file status; this version follows links. 2185 */ 2186 #ifndef _SYS_SYSPROTO_H_ 2187 struct stat_args { 2188 char *path; 2189 struct stat *ub; 2190 }; 2191 #endif 2192 int 2193 sys_stat(td, uap) 2194 struct thread *td; 2195 register struct stat_args /* { 2196 char *path; 2197 struct stat *ub; 2198 } */ *uap; 2199 { 2200 struct stat sb; 2201 int error; 2202 2203 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2204 &sb, NULL); 2205 if (error == 0) 2206 error = copyout(&sb, uap->ub, sizeof (sb)); 2207 return (error); 2208 } 2209 2210 #ifndef _SYS_SYSPROTO_H_ 2211 struct fstatat_args { 2212 int fd; 2213 char *path; 2214 struct stat *buf; 2215 int flag; 2216 } 2217 #endif 2218 int 2219 sys_fstatat(struct thread *td, struct fstatat_args *uap) 2220 { 2221 struct stat sb; 2222 int error; 2223 2224 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2225 UIO_USERSPACE, &sb, NULL); 2226 if (error == 0) 2227 error = copyout(&sb, uap->buf, sizeof (sb)); 2228 return (error); 2229 } 2230 2231 int 2232 kern_statat(struct thread *td, int flag, int fd, char *path, 2233 enum uio_seg pathseg, struct stat *sbp, 2234 void (*hook)(struct vnode *vp, struct stat *sbp)) 2235 { 2236 struct nameidata nd; 2237 struct stat sb; 2238 cap_rights_t rights; 2239 int error; 2240 2241 if (flag & ~AT_SYMLINK_NOFOLLOW) 2242 return (EINVAL); 2243 2244 NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : 2245 FOLLOW) | LOCKSHARED | LOCKLEAF | AUDITVNODE1, pathseg, path, fd, 2246 cap_rights_init(&rights, CAP_FSTAT), td); 2247 2248 if ((error = namei(&nd)) != 0) 2249 return (error); 2250 error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td); 2251 if (error == 0) { 2252 SDT_PROBE(vfs, , stat, mode, path, sb.st_mode, 0, 0, 0); 2253 if (S_ISREG(sb.st_mode)) 2254 SDT_PROBE(vfs, , stat, reg, path, pathseg, 0, 0, 0); 2255 if (__predict_false(hook != NULL)) 2256 hook(nd.ni_vp, &sb); 2257 } 2258 NDFREE(&nd, NDF_ONLY_PNBUF); 2259 vput(nd.ni_vp); 2260 if (error != 0) 2261 return (error); 2262 *sbp = sb; 2263 #ifdef KTRACE 2264 if (KTRPOINT(td, KTR_STRUCT)) 2265 ktrstat(&sb); 2266 #endif 2267 return (0); 2268 } 2269 2270 /* 2271 * Get file status; this version does not follow links. 2272 */ 2273 #ifndef _SYS_SYSPROTO_H_ 2274 struct lstat_args { 2275 char *path; 2276 struct stat *ub; 2277 }; 2278 #endif 2279 int 2280 sys_lstat(td, uap) 2281 struct thread *td; 2282 register struct lstat_args /* { 2283 char *path; 2284 struct stat *ub; 2285 } */ *uap; 2286 { 2287 struct stat sb; 2288 int error; 2289 2290 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2291 UIO_USERSPACE, &sb, NULL); 2292 if (error == 0) 2293 error = copyout(&sb, uap->ub, sizeof (sb)); 2294 return (error); 2295 } 2296 2297 /* 2298 * Implementation of the NetBSD [l]stat() functions. 2299 */ 2300 void 2301 cvtnstat(sb, nsb) 2302 struct stat *sb; 2303 struct nstat *nsb; 2304 { 2305 2306 bzero(nsb, sizeof *nsb); 2307 nsb->st_dev = sb->st_dev; 2308 nsb->st_ino = sb->st_ino; 2309 nsb->st_mode = sb->st_mode; 2310 nsb->st_nlink = sb->st_nlink; 2311 nsb->st_uid = sb->st_uid; 2312 nsb->st_gid = sb->st_gid; 2313 nsb->st_rdev = sb->st_rdev; 2314 nsb->st_atim = sb->st_atim; 2315 nsb->st_mtim = sb->st_mtim; 2316 nsb->st_ctim = sb->st_ctim; 2317 nsb->st_size = sb->st_size; 2318 nsb->st_blocks = sb->st_blocks; 2319 nsb->st_blksize = sb->st_blksize; 2320 nsb->st_flags = sb->st_flags; 2321 nsb->st_gen = sb->st_gen; 2322 nsb->st_birthtim = sb->st_birthtim; 2323 } 2324 2325 #ifndef _SYS_SYSPROTO_H_ 2326 struct nstat_args { 2327 char *path; 2328 struct nstat *ub; 2329 }; 2330 #endif 2331 int 2332 sys_nstat(td, uap) 2333 struct thread *td; 2334 register struct nstat_args /* { 2335 char *path; 2336 struct nstat *ub; 2337 } */ *uap; 2338 { 2339 struct stat sb; 2340 struct nstat nsb; 2341 int error; 2342 2343 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2344 &sb, NULL); 2345 if (error != 0) 2346 return (error); 2347 cvtnstat(&sb, &nsb); 2348 return (copyout(&nsb, uap->ub, sizeof (nsb))); 2349 } 2350 2351 /* 2352 * NetBSD lstat. Get file status; this version does not follow links. 2353 */ 2354 #ifndef _SYS_SYSPROTO_H_ 2355 struct lstat_args { 2356 char *path; 2357 struct stat *ub; 2358 }; 2359 #endif 2360 int 2361 sys_nlstat(td, uap) 2362 struct thread *td; 2363 register struct nlstat_args /* { 2364 char *path; 2365 struct nstat *ub; 2366 } */ *uap; 2367 { 2368 struct stat sb; 2369 struct nstat nsb; 2370 int error; 2371 2372 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2373 UIO_USERSPACE, &sb, NULL); 2374 if (error != 0) 2375 return (error); 2376 cvtnstat(&sb, &nsb); 2377 return (copyout(&nsb, uap->ub, sizeof (nsb))); 2378 } 2379 2380 /* 2381 * Get configurable pathname variables. 2382 */ 2383 #ifndef _SYS_SYSPROTO_H_ 2384 struct pathconf_args { 2385 char *path; 2386 int name; 2387 }; 2388 #endif 2389 int 2390 sys_pathconf(td, uap) 2391 struct thread *td; 2392 register struct pathconf_args /* { 2393 char *path; 2394 int name; 2395 } */ *uap; 2396 { 2397 2398 return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW)); 2399 } 2400 2401 #ifndef _SYS_SYSPROTO_H_ 2402 struct lpathconf_args { 2403 char *path; 2404 int name; 2405 }; 2406 #endif 2407 int 2408 sys_lpathconf(td, uap) 2409 struct thread *td; 2410 register struct lpathconf_args /* { 2411 char *path; 2412 int name; 2413 } */ *uap; 2414 { 2415 2416 return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, 2417 NOFOLLOW)); 2418 } 2419 2420 int 2421 kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name, 2422 u_long flags) 2423 { 2424 struct nameidata nd; 2425 int error; 2426 2427 NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | AUDITVNODE1 | flags, 2428 pathseg, path, td); 2429 if ((error = namei(&nd)) != 0) 2430 return (error); 2431 NDFREE(&nd, NDF_ONLY_PNBUF); 2432 2433 /* If asynchronous I/O is available, it works for all files. */ 2434 if (name == _PC_ASYNC_IO) 2435 td->td_retval[0] = async_io_version; 2436 else 2437 error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval); 2438 vput(nd.ni_vp); 2439 return (error); 2440 } 2441 2442 /* 2443 * Return target name of a symbolic link. 2444 */ 2445 #ifndef _SYS_SYSPROTO_H_ 2446 struct readlink_args { 2447 char *path; 2448 char *buf; 2449 size_t count; 2450 }; 2451 #endif 2452 int 2453 sys_readlink(td, uap) 2454 struct thread *td; 2455 register struct readlink_args /* { 2456 char *path; 2457 char *buf; 2458 size_t count; 2459 } */ *uap; 2460 { 2461 2462 return (kern_readlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2463 uap->buf, UIO_USERSPACE, uap->count)); 2464 } 2465 #ifndef _SYS_SYSPROTO_H_ 2466 struct readlinkat_args { 2467 int fd; 2468 char *path; 2469 char *buf; 2470 size_t bufsize; 2471 }; 2472 #endif 2473 int 2474 sys_readlinkat(struct thread *td, struct readlinkat_args *uap) 2475 { 2476 2477 return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE, 2478 uap->buf, UIO_USERSPACE, uap->bufsize)); 2479 } 2480 2481 int 2482 kern_readlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2483 char *buf, enum uio_seg bufseg, size_t count) 2484 { 2485 struct vnode *vp; 2486 struct iovec aiov; 2487 struct uio auio; 2488 struct nameidata nd; 2489 int error; 2490 2491 if (count > IOSIZE_MAX) 2492 return (EINVAL); 2493 2494 NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 2495 pathseg, path, fd, td); 2496 2497 if ((error = namei(&nd)) != 0) 2498 return (error); 2499 NDFREE(&nd, NDF_ONLY_PNBUF); 2500 vp = nd.ni_vp; 2501 #ifdef MAC 2502 error = mac_vnode_check_readlink(td->td_ucred, vp); 2503 if (error != 0) { 2504 vput(vp); 2505 return (error); 2506 } 2507 #endif 2508 if (vp->v_type != VLNK) 2509 error = EINVAL; 2510 else { 2511 aiov.iov_base = buf; 2512 aiov.iov_len = count; 2513 auio.uio_iov = &aiov; 2514 auio.uio_iovcnt = 1; 2515 auio.uio_offset = 0; 2516 auio.uio_rw = UIO_READ; 2517 auio.uio_segflg = bufseg; 2518 auio.uio_td = td; 2519 auio.uio_resid = count; 2520 error = VOP_READLINK(vp, &auio, td->td_ucred); 2521 td->td_retval[0] = count - auio.uio_resid; 2522 } 2523 vput(vp); 2524 return (error); 2525 } 2526 2527 /* 2528 * Common implementation code for chflags() and fchflags(). 2529 */ 2530 static int 2531 setfflags(td, vp, flags) 2532 struct thread *td; 2533 struct vnode *vp; 2534 u_long flags; 2535 { 2536 struct mount *mp; 2537 struct vattr vattr; 2538 int error; 2539 2540 /* We can't support the value matching VNOVAL. */ 2541 if (flags == VNOVAL) 2542 return (EOPNOTSUPP); 2543 2544 /* 2545 * Prevent non-root users from setting flags on devices. When 2546 * a device is reused, users can retain ownership of the device 2547 * if they are allowed to set flags and programs assume that 2548 * chown can't fail when done as root. 2549 */ 2550 if (vp->v_type == VCHR || vp->v_type == VBLK) { 2551 error = priv_check(td, PRIV_VFS_CHFLAGS_DEV); 2552 if (error != 0) 2553 return (error); 2554 } 2555 2556 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2557 return (error); 2558 VATTR_NULL(&vattr); 2559 vattr.va_flags = flags; 2560 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2561 #ifdef MAC 2562 error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags); 2563 if (error == 0) 2564 #endif 2565 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 2566 VOP_UNLOCK(vp, 0); 2567 vn_finished_write(mp); 2568 return (error); 2569 } 2570 2571 /* 2572 * Change flags of a file given a path name. 2573 */ 2574 #ifndef _SYS_SYSPROTO_H_ 2575 struct chflags_args { 2576 const char *path; 2577 u_long flags; 2578 }; 2579 #endif 2580 int 2581 sys_chflags(td, uap) 2582 struct thread *td; 2583 register struct chflags_args /* { 2584 const char *path; 2585 u_long flags; 2586 } */ *uap; 2587 { 2588 2589 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2590 uap->flags, 0)); 2591 } 2592 2593 #ifndef _SYS_SYSPROTO_H_ 2594 struct chflagsat_args { 2595 int fd; 2596 const char *path; 2597 u_long flags; 2598 int atflag; 2599 } 2600 #endif 2601 int 2602 sys_chflagsat(struct thread *td, struct chflagsat_args *uap) 2603 { 2604 int fd = uap->fd; 2605 const char *path = uap->path; 2606 u_long flags = uap->flags; 2607 int atflag = uap->atflag; 2608 2609 if (atflag & ~AT_SYMLINK_NOFOLLOW) 2610 return (EINVAL); 2611 2612 return (kern_chflagsat(td, fd, path, UIO_USERSPACE, flags, atflag)); 2613 } 2614 2615 /* 2616 * Same as chflags() but doesn't follow symlinks. 2617 */ 2618 int 2619 sys_lchflags(td, uap) 2620 struct thread *td; 2621 register struct lchflags_args /* { 2622 const char *path; 2623 u_long flags; 2624 } */ *uap; 2625 { 2626 2627 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2628 uap->flags, AT_SYMLINK_NOFOLLOW)); 2629 } 2630 2631 static int 2632 kern_chflagsat(struct thread *td, int fd, const char *path, 2633 enum uio_seg pathseg, u_long flags, int atflag) 2634 { 2635 struct nameidata nd; 2636 cap_rights_t rights; 2637 int error, follow; 2638 2639 AUDIT_ARG_FFLAGS(flags); 2640 follow = (atflag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2641 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2642 cap_rights_init(&rights, CAP_FCHFLAGS), td); 2643 if ((error = namei(&nd)) != 0) 2644 return (error); 2645 NDFREE(&nd, NDF_ONLY_PNBUF); 2646 error = setfflags(td, nd.ni_vp, flags); 2647 vrele(nd.ni_vp); 2648 return (error); 2649 } 2650 2651 /* 2652 * Change flags of a file given a file descriptor. 2653 */ 2654 #ifndef _SYS_SYSPROTO_H_ 2655 struct fchflags_args { 2656 int fd; 2657 u_long flags; 2658 }; 2659 #endif 2660 int 2661 sys_fchflags(td, uap) 2662 struct thread *td; 2663 register struct fchflags_args /* { 2664 int fd; 2665 u_long flags; 2666 } */ *uap; 2667 { 2668 struct file *fp; 2669 cap_rights_t rights; 2670 int error; 2671 2672 AUDIT_ARG_FD(uap->fd); 2673 AUDIT_ARG_FFLAGS(uap->flags); 2674 error = getvnode(td->td_proc->p_fd, uap->fd, 2675 cap_rights_init(&rights, CAP_FCHFLAGS), &fp); 2676 if (error != 0) 2677 return (error); 2678 #ifdef AUDIT 2679 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 2680 AUDIT_ARG_VNODE1(fp->f_vnode); 2681 VOP_UNLOCK(fp->f_vnode, 0); 2682 #endif 2683 error = setfflags(td, fp->f_vnode, uap->flags); 2684 fdrop(fp, td); 2685 return (error); 2686 } 2687 2688 /* 2689 * Common implementation code for chmod(), lchmod() and fchmod(). 2690 */ 2691 int 2692 setfmode(td, cred, vp, mode) 2693 struct thread *td; 2694 struct ucred *cred; 2695 struct vnode *vp; 2696 int mode; 2697 { 2698 struct mount *mp; 2699 struct vattr vattr; 2700 int error; 2701 2702 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2703 return (error); 2704 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2705 VATTR_NULL(&vattr); 2706 vattr.va_mode = mode & ALLPERMS; 2707 #ifdef MAC 2708 error = mac_vnode_check_setmode(cred, vp, vattr.va_mode); 2709 if (error == 0) 2710 #endif 2711 error = VOP_SETATTR(vp, &vattr, cred); 2712 VOP_UNLOCK(vp, 0); 2713 vn_finished_write(mp); 2714 return (error); 2715 } 2716 2717 /* 2718 * Change mode of a file given path name. 2719 */ 2720 #ifndef _SYS_SYSPROTO_H_ 2721 struct chmod_args { 2722 char *path; 2723 int mode; 2724 }; 2725 #endif 2726 int 2727 sys_chmod(td, uap) 2728 struct thread *td; 2729 register struct chmod_args /* { 2730 char *path; 2731 int mode; 2732 } */ *uap; 2733 { 2734 2735 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2736 uap->mode, 0)); 2737 } 2738 2739 #ifndef _SYS_SYSPROTO_H_ 2740 struct fchmodat_args { 2741 int dirfd; 2742 char *path; 2743 mode_t mode; 2744 int flag; 2745 } 2746 #endif 2747 int 2748 sys_fchmodat(struct thread *td, struct fchmodat_args *uap) 2749 { 2750 int flag = uap->flag; 2751 int fd = uap->fd; 2752 char *path = uap->path; 2753 mode_t mode = uap->mode; 2754 2755 if (flag & ~AT_SYMLINK_NOFOLLOW) 2756 return (EINVAL); 2757 2758 return (kern_fchmodat(td, fd, path, UIO_USERSPACE, mode, flag)); 2759 } 2760 2761 /* 2762 * Change mode of a file given path name (don't follow links.) 2763 */ 2764 #ifndef _SYS_SYSPROTO_H_ 2765 struct lchmod_args { 2766 char *path; 2767 int mode; 2768 }; 2769 #endif 2770 int 2771 sys_lchmod(td, uap) 2772 struct thread *td; 2773 register struct lchmod_args /* { 2774 char *path; 2775 int mode; 2776 } */ *uap; 2777 { 2778 2779 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2780 uap->mode, AT_SYMLINK_NOFOLLOW)); 2781 } 2782 2783 int 2784 kern_fchmodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2785 mode_t mode, int flag) 2786 { 2787 struct nameidata nd; 2788 cap_rights_t rights; 2789 int error, follow; 2790 2791 AUDIT_ARG_MODE(mode); 2792 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2793 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2794 cap_rights_init(&rights, CAP_FCHMOD), td); 2795 if ((error = namei(&nd)) != 0) 2796 return (error); 2797 NDFREE(&nd, NDF_ONLY_PNBUF); 2798 error = setfmode(td, td->td_ucred, nd.ni_vp, mode); 2799 vrele(nd.ni_vp); 2800 return (error); 2801 } 2802 2803 /* 2804 * Change mode of a file given a file descriptor. 2805 */ 2806 #ifndef _SYS_SYSPROTO_H_ 2807 struct fchmod_args { 2808 int fd; 2809 int mode; 2810 }; 2811 #endif 2812 int 2813 sys_fchmod(struct thread *td, struct fchmod_args *uap) 2814 { 2815 struct file *fp; 2816 cap_rights_t rights; 2817 int error; 2818 2819 AUDIT_ARG_FD(uap->fd); 2820 AUDIT_ARG_MODE(uap->mode); 2821 2822 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHMOD), &fp); 2823 if (error != 0) 2824 return (error); 2825 error = fo_chmod(fp, uap->mode, td->td_ucred, td); 2826 fdrop(fp, td); 2827 return (error); 2828 } 2829 2830 /* 2831 * Common implementation for chown(), lchown(), and fchown() 2832 */ 2833 int 2834 setfown(td, cred, vp, uid, gid) 2835 struct thread *td; 2836 struct ucred *cred; 2837 struct vnode *vp; 2838 uid_t uid; 2839 gid_t gid; 2840 { 2841 struct mount *mp; 2842 struct vattr vattr; 2843 int error; 2844 2845 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2846 return (error); 2847 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2848 VATTR_NULL(&vattr); 2849 vattr.va_uid = uid; 2850 vattr.va_gid = gid; 2851 #ifdef MAC 2852 error = mac_vnode_check_setowner(cred, vp, vattr.va_uid, 2853 vattr.va_gid); 2854 if (error == 0) 2855 #endif 2856 error = VOP_SETATTR(vp, &vattr, cred); 2857 VOP_UNLOCK(vp, 0); 2858 vn_finished_write(mp); 2859 return (error); 2860 } 2861 2862 /* 2863 * Set ownership given a path name. 2864 */ 2865 #ifndef _SYS_SYSPROTO_H_ 2866 struct chown_args { 2867 char *path; 2868 int uid; 2869 int gid; 2870 }; 2871 #endif 2872 int 2873 sys_chown(td, uap) 2874 struct thread *td; 2875 register struct chown_args /* { 2876 char *path; 2877 int uid; 2878 int gid; 2879 } */ *uap; 2880 { 2881 2882 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->uid, 2883 uap->gid, 0)); 2884 } 2885 2886 #ifndef _SYS_SYSPROTO_H_ 2887 struct fchownat_args { 2888 int fd; 2889 const char * path; 2890 uid_t uid; 2891 gid_t gid; 2892 int flag; 2893 }; 2894 #endif 2895 int 2896 sys_fchownat(struct thread *td, struct fchownat_args *uap) 2897 { 2898 int flag; 2899 2900 flag = uap->flag; 2901 if (flag & ~AT_SYMLINK_NOFOLLOW) 2902 return (EINVAL); 2903 2904 return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid, 2905 uap->gid, uap->flag)); 2906 } 2907 2908 int 2909 kern_fchownat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2910 int uid, int gid, int flag) 2911 { 2912 struct nameidata nd; 2913 cap_rights_t rights; 2914 int error, follow; 2915 2916 AUDIT_ARG_OWNER(uid, gid); 2917 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2918 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2919 cap_rights_init(&rights, CAP_FCHOWN), td); 2920 2921 if ((error = namei(&nd)) != 0) 2922 return (error); 2923 NDFREE(&nd, NDF_ONLY_PNBUF); 2924 error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid); 2925 vrele(nd.ni_vp); 2926 return (error); 2927 } 2928 2929 /* 2930 * Set ownership given a path name, do not cross symlinks. 2931 */ 2932 #ifndef _SYS_SYSPROTO_H_ 2933 struct lchown_args { 2934 char *path; 2935 int uid; 2936 int gid; 2937 }; 2938 #endif 2939 int 2940 sys_lchown(td, uap) 2941 struct thread *td; 2942 register struct lchown_args /* { 2943 char *path; 2944 int uid; 2945 int gid; 2946 } */ *uap; 2947 { 2948 2949 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2950 uap->uid, uap->gid, AT_SYMLINK_NOFOLLOW)); 2951 } 2952 2953 /* 2954 * Set ownership given a file descriptor. 2955 */ 2956 #ifndef _SYS_SYSPROTO_H_ 2957 struct fchown_args { 2958 int fd; 2959 int uid; 2960 int gid; 2961 }; 2962 #endif 2963 int 2964 sys_fchown(td, uap) 2965 struct thread *td; 2966 register struct fchown_args /* { 2967 int fd; 2968 int uid; 2969 int gid; 2970 } */ *uap; 2971 { 2972 struct file *fp; 2973 cap_rights_t rights; 2974 int error; 2975 2976 AUDIT_ARG_FD(uap->fd); 2977 AUDIT_ARG_OWNER(uap->uid, uap->gid); 2978 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHOWN), &fp); 2979 if (error != 0) 2980 return (error); 2981 error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td); 2982 fdrop(fp, td); 2983 return (error); 2984 } 2985 2986 /* 2987 * Common implementation code for utimes(), lutimes(), and futimes(). 2988 */ 2989 static int 2990 getutimes(usrtvp, tvpseg, tsp) 2991 const struct timeval *usrtvp; 2992 enum uio_seg tvpseg; 2993 struct timespec *tsp; 2994 { 2995 struct timeval tv[2]; 2996 const struct timeval *tvp; 2997 int error; 2998 2999 if (usrtvp == NULL) { 3000 vfs_timestamp(&tsp[0]); 3001 tsp[1] = tsp[0]; 3002 } else { 3003 if (tvpseg == UIO_SYSSPACE) { 3004 tvp = usrtvp; 3005 } else { 3006 if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0) 3007 return (error); 3008 tvp = tv; 3009 } 3010 3011 if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 || 3012 tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000) 3013 return (EINVAL); 3014 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 3015 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 3016 } 3017 return (0); 3018 } 3019 3020 /* 3021 * Common implementation code for futimens(), utimensat(). 3022 */ 3023 #define UTIMENS_NULL 0x1 3024 #define UTIMENS_EXIT 0x2 3025 static int 3026 getutimens(const struct timespec *usrtsp, enum uio_seg tspseg, 3027 struct timespec *tsp, int *retflags) 3028 { 3029 struct timespec tsnow; 3030 int error; 3031 3032 vfs_timestamp(&tsnow); 3033 *retflags = 0; 3034 if (usrtsp == NULL) { 3035 tsp[0] = tsnow; 3036 tsp[1] = tsnow; 3037 *retflags |= UTIMENS_NULL; 3038 return (0); 3039 } 3040 if (tspseg == UIO_SYSSPACE) { 3041 tsp[0] = usrtsp[0]; 3042 tsp[1] = usrtsp[1]; 3043 } else if ((error = copyin(usrtsp, tsp, sizeof(*tsp) * 2)) != 0) 3044 return (error); 3045 if (tsp[0].tv_nsec == UTIME_OMIT && tsp[1].tv_nsec == UTIME_OMIT) 3046 *retflags |= UTIMENS_EXIT; 3047 if (tsp[0].tv_nsec == UTIME_NOW && tsp[1].tv_nsec == UTIME_NOW) 3048 *retflags |= UTIMENS_NULL; 3049 if (tsp[0].tv_nsec == UTIME_OMIT) 3050 tsp[0].tv_sec = VNOVAL; 3051 else if (tsp[0].tv_nsec == UTIME_NOW) 3052 tsp[0] = tsnow; 3053 else if (tsp[0].tv_nsec < 0 || tsp[0].tv_nsec >= 1000000000L) 3054 return (EINVAL); 3055 if (tsp[1].tv_nsec == UTIME_OMIT) 3056 tsp[1].tv_sec = VNOVAL; 3057 else if (tsp[1].tv_nsec == UTIME_NOW) 3058 tsp[1] = tsnow; 3059 else if (tsp[1].tv_nsec < 0 || tsp[1].tv_nsec >= 1000000000L) 3060 return (EINVAL); 3061 3062 return (0); 3063 } 3064 3065 /* 3066 * Common implementation code for utimes(), lutimes(), futimes(), futimens(), 3067 * and utimensat(). 3068 */ 3069 static int 3070 setutimes(td, vp, ts, numtimes, nullflag) 3071 struct thread *td; 3072 struct vnode *vp; 3073 const struct timespec *ts; 3074 int numtimes; 3075 int nullflag; 3076 { 3077 struct mount *mp; 3078 struct vattr vattr; 3079 int error, setbirthtime; 3080 3081 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 3082 return (error); 3083 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3084 setbirthtime = 0; 3085 if (numtimes < 3 && !VOP_GETATTR(vp, &vattr, td->td_ucred) && 3086 timespeccmp(&ts[1], &vattr.va_birthtime, < )) 3087 setbirthtime = 1; 3088 VATTR_NULL(&vattr); 3089 vattr.va_atime = ts[0]; 3090 vattr.va_mtime = ts[1]; 3091 if (setbirthtime) 3092 vattr.va_birthtime = ts[1]; 3093 if (numtimes > 2) 3094 vattr.va_birthtime = ts[2]; 3095 if (nullflag) 3096 vattr.va_vaflags |= VA_UTIMES_NULL; 3097 #ifdef MAC 3098 error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime, 3099 vattr.va_mtime); 3100 #endif 3101 if (error == 0) 3102 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3103 VOP_UNLOCK(vp, 0); 3104 vn_finished_write(mp); 3105 return (error); 3106 } 3107 3108 /* 3109 * Set the access and modification times of a file. 3110 */ 3111 #ifndef _SYS_SYSPROTO_H_ 3112 struct utimes_args { 3113 char *path; 3114 struct timeval *tptr; 3115 }; 3116 #endif 3117 int 3118 sys_utimes(td, uap) 3119 struct thread *td; 3120 register struct utimes_args /* { 3121 char *path; 3122 struct timeval *tptr; 3123 } */ *uap; 3124 { 3125 3126 return (kern_utimesat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3127 uap->tptr, UIO_USERSPACE)); 3128 } 3129 3130 #ifndef _SYS_SYSPROTO_H_ 3131 struct futimesat_args { 3132 int fd; 3133 const char * path; 3134 const struct timeval * times; 3135 }; 3136 #endif 3137 int 3138 sys_futimesat(struct thread *td, struct futimesat_args *uap) 3139 { 3140 3141 return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE, 3142 uap->times, UIO_USERSPACE)); 3143 } 3144 3145 int 3146 kern_utimesat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 3147 struct timeval *tptr, enum uio_seg tptrseg) 3148 { 3149 struct nameidata nd; 3150 struct timespec ts[2]; 3151 cap_rights_t rights; 3152 int error; 3153 3154 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3155 return (error); 3156 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 3157 cap_rights_init(&rights, CAP_FUTIMES), td); 3158 3159 if ((error = namei(&nd)) != 0) 3160 return (error); 3161 NDFREE(&nd, NDF_ONLY_PNBUF); 3162 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3163 vrele(nd.ni_vp); 3164 return (error); 3165 } 3166 3167 /* 3168 * Set the access and modification times of a file. 3169 */ 3170 #ifndef _SYS_SYSPROTO_H_ 3171 struct lutimes_args { 3172 char *path; 3173 struct timeval *tptr; 3174 }; 3175 #endif 3176 int 3177 sys_lutimes(td, uap) 3178 struct thread *td; 3179 register struct lutimes_args /* { 3180 char *path; 3181 struct timeval *tptr; 3182 } */ *uap; 3183 { 3184 3185 return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr, 3186 UIO_USERSPACE)); 3187 } 3188 3189 int 3190 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg, 3191 struct timeval *tptr, enum uio_seg tptrseg) 3192 { 3193 struct timespec ts[2]; 3194 struct nameidata nd; 3195 int error; 3196 3197 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3198 return (error); 3199 NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, pathseg, path, td); 3200 if ((error = namei(&nd)) != 0) 3201 return (error); 3202 NDFREE(&nd, NDF_ONLY_PNBUF); 3203 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3204 vrele(nd.ni_vp); 3205 return (error); 3206 } 3207 3208 /* 3209 * Set the access and modification times of a file. 3210 */ 3211 #ifndef _SYS_SYSPROTO_H_ 3212 struct futimes_args { 3213 int fd; 3214 struct timeval *tptr; 3215 }; 3216 #endif 3217 int 3218 sys_futimes(td, uap) 3219 struct thread *td; 3220 register struct futimes_args /* { 3221 int fd; 3222 struct timeval *tptr; 3223 } */ *uap; 3224 { 3225 3226 return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE)); 3227 } 3228 3229 int 3230 kern_futimes(struct thread *td, int fd, struct timeval *tptr, 3231 enum uio_seg tptrseg) 3232 { 3233 struct timespec ts[2]; 3234 struct file *fp; 3235 cap_rights_t rights; 3236 int error; 3237 3238 AUDIT_ARG_FD(fd); 3239 error = getutimes(tptr, tptrseg, ts); 3240 if (error != 0) 3241 return (error); 3242 error = getvnode(td->td_proc->p_fd, fd, 3243 cap_rights_init(&rights, CAP_FUTIMES), &fp); 3244 if (error != 0) 3245 return (error); 3246 #ifdef AUDIT 3247 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3248 AUDIT_ARG_VNODE1(fp->f_vnode); 3249 VOP_UNLOCK(fp->f_vnode, 0); 3250 #endif 3251 error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL); 3252 fdrop(fp, td); 3253 return (error); 3254 } 3255 3256 int 3257 sys_futimens(struct thread *td, struct futimens_args *uap) 3258 { 3259 3260 return (kern_futimens(td, uap->fd, uap->times, UIO_USERSPACE)); 3261 } 3262 3263 int 3264 kern_futimens(struct thread *td, int fd, struct timespec *tptr, 3265 enum uio_seg tptrseg) 3266 { 3267 struct timespec ts[2]; 3268 struct file *fp; 3269 cap_rights_t rights; 3270 int error, flags; 3271 3272 AUDIT_ARG_FD(fd); 3273 error = getutimens(tptr, tptrseg, ts, &flags); 3274 if (error != 0) 3275 return (error); 3276 if (flags & UTIMENS_EXIT) 3277 return (0); 3278 error = getvnode(td->td_proc->p_fd, fd, 3279 cap_rights_init(&rights, CAP_FUTIMES), &fp); 3280 if (error != 0) 3281 return (error); 3282 #ifdef AUDIT 3283 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3284 AUDIT_ARG_VNODE1(fp->f_vnode); 3285 VOP_UNLOCK(fp->f_vnode, 0); 3286 #endif 3287 error = setutimes(td, fp->f_vnode, ts, 2, flags & UTIMENS_NULL); 3288 fdrop(fp, td); 3289 return (error); 3290 } 3291 3292 int 3293 sys_utimensat(struct thread *td, struct utimensat_args *uap) 3294 { 3295 3296 return (kern_utimensat(td, uap->fd, uap->path, UIO_USERSPACE, 3297 uap->times, UIO_USERSPACE, uap->flag)); 3298 } 3299 3300 int 3301 kern_utimensat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 3302 struct timespec *tptr, enum uio_seg tptrseg, int flag) 3303 { 3304 struct nameidata nd; 3305 struct timespec ts[2]; 3306 cap_rights_t rights; 3307 int error, flags; 3308 3309 if (flag & ~AT_SYMLINK_NOFOLLOW) 3310 return (EINVAL); 3311 3312 if ((error = getutimens(tptr, tptrseg, ts, &flags)) != 0) 3313 return (error); 3314 NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : 3315 FOLLOW) | AUDITVNODE1, pathseg, path, fd, 3316 cap_rights_init(&rights, CAP_FUTIMES), td); 3317 if ((error = namei(&nd)) != 0) 3318 return (error); 3319 /* 3320 * We are allowed to call namei() regardless of 2xUTIME_OMIT. 3321 * POSIX states: 3322 * "If both tv_nsec fields are UTIME_OMIT... EACCESS may be detected." 3323 * "Search permission is denied by a component of the path prefix." 3324 */ 3325 NDFREE(&nd, NDF_ONLY_PNBUF); 3326 if ((flags & UTIMENS_EXIT) == 0) 3327 error = setutimes(td, nd.ni_vp, ts, 2, flags & UTIMENS_NULL); 3328 vrele(nd.ni_vp); 3329 return (error); 3330 } 3331 3332 /* 3333 * Truncate a file given its path name. 3334 */ 3335 #ifndef _SYS_SYSPROTO_H_ 3336 struct truncate_args { 3337 char *path; 3338 int pad; 3339 off_t length; 3340 }; 3341 #endif 3342 int 3343 sys_truncate(td, uap) 3344 struct thread *td; 3345 register struct truncate_args /* { 3346 char *path; 3347 int pad; 3348 off_t length; 3349 } */ *uap; 3350 { 3351 3352 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3353 } 3354 3355 int 3356 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length) 3357 { 3358 struct mount *mp; 3359 struct vnode *vp; 3360 void *rl_cookie; 3361 struct vattr vattr; 3362 struct nameidata nd; 3363 int error; 3364 3365 if (length < 0) 3366 return(EINVAL); 3367 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, td); 3368 if ((error = namei(&nd)) != 0) 3369 return (error); 3370 vp = nd.ni_vp; 3371 rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); 3372 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { 3373 vn_rangelock_unlock(vp, rl_cookie); 3374 vrele(vp); 3375 return (error); 3376 } 3377 NDFREE(&nd, NDF_ONLY_PNBUF); 3378 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3379 if (vp->v_type == VDIR) 3380 error = EISDIR; 3381 #ifdef MAC 3382 else if ((error = mac_vnode_check_write(td->td_ucred, NOCRED, vp))) { 3383 } 3384 #endif 3385 else if ((error = vn_writechk(vp)) == 0 && 3386 (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) { 3387 VATTR_NULL(&vattr); 3388 vattr.va_size = length; 3389 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3390 } 3391 VOP_UNLOCK(vp, 0); 3392 vn_finished_write(mp); 3393 vn_rangelock_unlock(vp, rl_cookie); 3394 vrele(vp); 3395 return (error); 3396 } 3397 3398 #if defined(COMPAT_43) 3399 /* 3400 * Truncate a file given its path name. 3401 */ 3402 #ifndef _SYS_SYSPROTO_H_ 3403 struct otruncate_args { 3404 char *path; 3405 long length; 3406 }; 3407 #endif 3408 int 3409 otruncate(td, uap) 3410 struct thread *td; 3411 register struct otruncate_args /* { 3412 char *path; 3413 long length; 3414 } */ *uap; 3415 { 3416 struct truncate_args /* { 3417 char *path; 3418 int pad; 3419 off_t length; 3420 } */ nuap; 3421 3422 nuap.path = uap->path; 3423 nuap.length = uap->length; 3424 return (sys_truncate(td, &nuap)); 3425 } 3426 #endif /* COMPAT_43 */ 3427 3428 #if defined(COMPAT_FREEBSD6) 3429 /* Versions with the pad argument */ 3430 int 3431 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap) 3432 { 3433 struct truncate_args ouap; 3434 3435 ouap.path = uap->path; 3436 ouap.length = uap->length; 3437 return (sys_truncate(td, &ouap)); 3438 } 3439 3440 int 3441 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap) 3442 { 3443 struct ftruncate_args ouap; 3444 3445 ouap.fd = uap->fd; 3446 ouap.length = uap->length; 3447 return (sys_ftruncate(td, &ouap)); 3448 } 3449 #endif 3450 3451 /* 3452 * Sync an open file. 3453 */ 3454 #ifndef _SYS_SYSPROTO_H_ 3455 struct fsync_args { 3456 int fd; 3457 }; 3458 #endif 3459 int 3460 sys_fsync(td, uap) 3461 struct thread *td; 3462 struct fsync_args /* { 3463 int fd; 3464 } */ *uap; 3465 { 3466 struct vnode *vp; 3467 struct mount *mp; 3468 struct file *fp; 3469 cap_rights_t rights; 3470 int error, lock_flags; 3471 3472 AUDIT_ARG_FD(uap->fd); 3473 error = getvnode(td->td_proc->p_fd, uap->fd, 3474 cap_rights_init(&rights, CAP_FSYNC), &fp); 3475 if (error != 0) 3476 return (error); 3477 vp = fp->f_vnode; 3478 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 3479 if (error != 0) 3480 goto drop; 3481 if (MNT_SHARED_WRITES(mp) || 3482 ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) { 3483 lock_flags = LK_SHARED; 3484 } else { 3485 lock_flags = LK_EXCLUSIVE; 3486 } 3487 vn_lock(vp, lock_flags | LK_RETRY); 3488 AUDIT_ARG_VNODE1(vp); 3489 if (vp->v_object != NULL) { 3490 VM_OBJECT_WLOCK(vp->v_object); 3491 vm_object_page_clean(vp->v_object, 0, 0, 0); 3492 VM_OBJECT_WUNLOCK(vp->v_object); 3493 } 3494 error = VOP_FSYNC(vp, MNT_WAIT, td); 3495 3496 VOP_UNLOCK(vp, 0); 3497 vn_finished_write(mp); 3498 drop: 3499 fdrop(fp, td); 3500 return (error); 3501 } 3502 3503 /* 3504 * Rename files. Source and destination must either both be directories, or 3505 * both not be directories. If target is a directory, it must be empty. 3506 */ 3507 #ifndef _SYS_SYSPROTO_H_ 3508 struct rename_args { 3509 char *from; 3510 char *to; 3511 }; 3512 #endif 3513 int 3514 sys_rename(td, uap) 3515 struct thread *td; 3516 register struct rename_args /* { 3517 char *from; 3518 char *to; 3519 } */ *uap; 3520 { 3521 3522 return (kern_renameat(td, AT_FDCWD, uap->from, AT_FDCWD, 3523 uap->to, UIO_USERSPACE)); 3524 } 3525 3526 #ifndef _SYS_SYSPROTO_H_ 3527 struct renameat_args { 3528 int oldfd; 3529 char *old; 3530 int newfd; 3531 char *new; 3532 }; 3533 #endif 3534 int 3535 sys_renameat(struct thread *td, struct renameat_args *uap) 3536 { 3537 3538 return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new, 3539 UIO_USERSPACE)); 3540 } 3541 3542 int 3543 kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new, 3544 enum uio_seg pathseg) 3545 { 3546 struct mount *mp = NULL; 3547 struct vnode *tvp, *fvp, *tdvp; 3548 struct nameidata fromnd, tond; 3549 cap_rights_t rights; 3550 int error; 3551 3552 again: 3553 bwillwrite(); 3554 #ifdef MAC 3555 NDINIT_ATRIGHTS(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | 3556 AUDITVNODE1, pathseg, old, oldfd, 3557 cap_rights_init(&rights, CAP_RENAMEAT), td); 3558 #else 3559 NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | AUDITVNODE1, 3560 pathseg, old, oldfd, cap_rights_init(&rights, CAP_RENAMEAT), td); 3561 #endif 3562 3563 if ((error = namei(&fromnd)) != 0) 3564 return (error); 3565 #ifdef MAC 3566 error = mac_vnode_check_rename_from(td->td_ucred, fromnd.ni_dvp, 3567 fromnd.ni_vp, &fromnd.ni_cnd); 3568 VOP_UNLOCK(fromnd.ni_dvp, 0); 3569 if (fromnd.ni_dvp != fromnd.ni_vp) 3570 VOP_UNLOCK(fromnd.ni_vp, 0); 3571 #endif 3572 fvp = fromnd.ni_vp; 3573 NDINIT_ATRIGHTS(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | 3574 SAVESTART | AUDITVNODE2, pathseg, new, newfd, 3575 cap_rights_init(&rights, CAP_LINKAT), td); 3576 if (fromnd.ni_vp->v_type == VDIR) 3577 tond.ni_cnd.cn_flags |= WILLBEDIR; 3578 if ((error = namei(&tond)) != 0) { 3579 /* Translate error code for rename("dir1", "dir2/."). */ 3580 if (error == EISDIR && fvp->v_type == VDIR) 3581 error = EINVAL; 3582 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3583 vrele(fromnd.ni_dvp); 3584 vrele(fvp); 3585 goto out1; 3586 } 3587 tdvp = tond.ni_dvp; 3588 tvp = tond.ni_vp; 3589 error = vn_start_write(fvp, &mp, V_NOWAIT); 3590 if (error != 0) { 3591 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3592 NDFREE(&tond, NDF_ONLY_PNBUF); 3593 if (tvp != NULL) 3594 vput(tvp); 3595 if (tdvp == tvp) 3596 vrele(tdvp); 3597 else 3598 vput(tdvp); 3599 vrele(fromnd.ni_dvp); 3600 vrele(fvp); 3601 vrele(tond.ni_startdir); 3602 if (fromnd.ni_startdir != NULL) 3603 vrele(fromnd.ni_startdir); 3604 error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH); 3605 if (error != 0) 3606 return (error); 3607 goto again; 3608 } 3609 if (tvp != NULL) { 3610 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3611 error = ENOTDIR; 3612 goto out; 3613 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3614 error = EISDIR; 3615 goto out; 3616 } 3617 #ifdef CAPABILITIES 3618 if (newfd != AT_FDCWD) { 3619 /* 3620 * If the target already exists we require CAP_UNLINKAT 3621 * from 'newfd'. 3622 */ 3623 error = cap_check(&tond.ni_filecaps.fc_rights, 3624 cap_rights_init(&rights, CAP_UNLINKAT)); 3625 if (error != 0) 3626 goto out; 3627 } 3628 #endif 3629 } 3630 if (fvp == tdvp) { 3631 error = EINVAL; 3632 goto out; 3633 } 3634 /* 3635 * If the source is the same as the destination (that is, if they 3636 * are links to the same vnode), then there is nothing to do. 3637 */ 3638 if (fvp == tvp) 3639 error = -1; 3640 #ifdef MAC 3641 else 3642 error = mac_vnode_check_rename_to(td->td_ucred, tdvp, 3643 tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd); 3644 #endif 3645 out: 3646 if (error == 0) { 3647 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3648 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); 3649 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3650 NDFREE(&tond, NDF_ONLY_PNBUF); 3651 } else { 3652 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3653 NDFREE(&tond, NDF_ONLY_PNBUF); 3654 if (tvp != NULL) 3655 vput(tvp); 3656 if (tdvp == tvp) 3657 vrele(tdvp); 3658 else 3659 vput(tdvp); 3660 vrele(fromnd.ni_dvp); 3661 vrele(fvp); 3662 } 3663 vrele(tond.ni_startdir); 3664 vn_finished_write(mp); 3665 out1: 3666 if (fromnd.ni_startdir) 3667 vrele(fromnd.ni_startdir); 3668 if (error == -1) 3669 return (0); 3670 return (error); 3671 } 3672 3673 /* 3674 * Make a directory file. 3675 */ 3676 #ifndef _SYS_SYSPROTO_H_ 3677 struct mkdir_args { 3678 char *path; 3679 int mode; 3680 }; 3681 #endif 3682 int 3683 sys_mkdir(td, uap) 3684 struct thread *td; 3685 register struct mkdir_args /* { 3686 char *path; 3687 int mode; 3688 } */ *uap; 3689 { 3690 3691 return (kern_mkdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3692 uap->mode)); 3693 } 3694 3695 #ifndef _SYS_SYSPROTO_H_ 3696 struct mkdirat_args { 3697 int fd; 3698 char *path; 3699 mode_t mode; 3700 }; 3701 #endif 3702 int 3703 sys_mkdirat(struct thread *td, struct mkdirat_args *uap) 3704 { 3705 3706 return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode)); 3707 } 3708 3709 int 3710 kern_mkdirat(struct thread *td, int fd, char *path, enum uio_seg segflg, 3711 int mode) 3712 { 3713 struct mount *mp; 3714 struct vnode *vp; 3715 struct vattr vattr; 3716 struct nameidata nd; 3717 cap_rights_t rights; 3718 int error; 3719 3720 AUDIT_ARG_MODE(mode); 3721 restart: 3722 bwillwrite(); 3723 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 3724 NOCACHE, segflg, path, fd, cap_rights_init(&rights, CAP_MKDIRAT), 3725 td); 3726 nd.ni_cnd.cn_flags |= WILLBEDIR; 3727 if ((error = namei(&nd)) != 0) 3728 return (error); 3729 vp = nd.ni_vp; 3730 if (vp != NULL) { 3731 NDFREE(&nd, NDF_ONLY_PNBUF); 3732 /* 3733 * XXX namei called with LOCKPARENT but not LOCKLEAF has 3734 * the strange behaviour of leaving the vnode unlocked 3735 * if the target is the same vnode as the parent. 3736 */ 3737 if (vp == nd.ni_dvp) 3738 vrele(nd.ni_dvp); 3739 else 3740 vput(nd.ni_dvp); 3741 vrele(vp); 3742 return (EEXIST); 3743 } 3744 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3745 NDFREE(&nd, NDF_ONLY_PNBUF); 3746 vput(nd.ni_dvp); 3747 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3748 return (error); 3749 goto restart; 3750 } 3751 VATTR_NULL(&vattr); 3752 vattr.va_type = VDIR; 3753 vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask; 3754 #ifdef MAC 3755 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 3756 &vattr); 3757 if (error != 0) 3758 goto out; 3759 #endif 3760 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 3761 #ifdef MAC 3762 out: 3763 #endif 3764 NDFREE(&nd, NDF_ONLY_PNBUF); 3765 vput(nd.ni_dvp); 3766 if (error == 0) 3767 vput(nd.ni_vp); 3768 vn_finished_write(mp); 3769 return (error); 3770 } 3771 3772 /* 3773 * Remove a directory file. 3774 */ 3775 #ifndef _SYS_SYSPROTO_H_ 3776 struct rmdir_args { 3777 char *path; 3778 }; 3779 #endif 3780 int 3781 sys_rmdir(td, uap) 3782 struct thread *td; 3783 struct rmdir_args /* { 3784 char *path; 3785 } */ *uap; 3786 { 3787 3788 return (kern_rmdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE)); 3789 } 3790 3791 int 3792 kern_rmdirat(struct thread *td, int fd, char *path, enum uio_seg pathseg) 3793 { 3794 struct mount *mp; 3795 struct vnode *vp; 3796 struct nameidata nd; 3797 cap_rights_t rights; 3798 int error; 3799 3800 restart: 3801 bwillwrite(); 3802 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 3803 pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td); 3804 if ((error = namei(&nd)) != 0) 3805 return (error); 3806 vp = nd.ni_vp; 3807 if (vp->v_type != VDIR) { 3808 error = ENOTDIR; 3809 goto out; 3810 } 3811 /* 3812 * No rmdir "." please. 3813 */ 3814 if (nd.ni_dvp == vp) { 3815 error = EINVAL; 3816 goto out; 3817 } 3818 /* 3819 * The root of a mounted filesystem cannot be deleted. 3820 */ 3821 if (vp->v_vflag & VV_ROOT) { 3822 error = EBUSY; 3823 goto out; 3824 } 3825 #ifdef MAC 3826 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 3827 &nd.ni_cnd); 3828 if (error != 0) 3829 goto out; 3830 #endif 3831 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3832 NDFREE(&nd, NDF_ONLY_PNBUF); 3833 vput(vp); 3834 if (nd.ni_dvp == vp) 3835 vrele(nd.ni_dvp); 3836 else 3837 vput(nd.ni_dvp); 3838 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3839 return (error); 3840 goto restart; 3841 } 3842 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 3843 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 3844 vn_finished_write(mp); 3845 out: 3846 NDFREE(&nd, NDF_ONLY_PNBUF); 3847 vput(vp); 3848 if (nd.ni_dvp == vp) 3849 vrele(nd.ni_dvp); 3850 else 3851 vput(nd.ni_dvp); 3852 return (error); 3853 } 3854 3855 #ifdef COMPAT_43 3856 /* 3857 * Read a block of directory entries in a filesystem independent format. 3858 */ 3859 #ifndef _SYS_SYSPROTO_H_ 3860 struct ogetdirentries_args { 3861 int fd; 3862 char *buf; 3863 u_int count; 3864 long *basep; 3865 }; 3866 #endif 3867 int 3868 ogetdirentries(struct thread *td, struct ogetdirentries_args *uap) 3869 { 3870 long loff; 3871 int error; 3872 3873 error = kern_ogetdirentries(td, uap, &loff); 3874 if (error == 0) 3875 error = copyout(&loff, uap->basep, sizeof(long)); 3876 return (error); 3877 } 3878 3879 int 3880 kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap, 3881 long *ploff) 3882 { 3883 struct vnode *vp; 3884 struct file *fp; 3885 struct uio auio, kuio; 3886 struct iovec aiov, kiov; 3887 struct dirent *dp, *edp; 3888 cap_rights_t rights; 3889 caddr_t dirbuf; 3890 int error, eofflag, readcnt; 3891 long loff; 3892 off_t foffset; 3893 3894 /* XXX arbitrary sanity limit on `count'. */ 3895 if (uap->count > 64 * 1024) 3896 return (EINVAL); 3897 error = getvnode(td->td_proc->p_fd, uap->fd, 3898 cap_rights_init(&rights, CAP_READ), &fp); 3899 if (error != 0) 3900 return (error); 3901 if ((fp->f_flag & FREAD) == 0) { 3902 fdrop(fp, td); 3903 return (EBADF); 3904 } 3905 vp = fp->f_vnode; 3906 foffset = foffset_lock(fp, 0); 3907 unionread: 3908 if (vp->v_type != VDIR) { 3909 foffset_unlock(fp, foffset, 0); 3910 fdrop(fp, td); 3911 return (EINVAL); 3912 } 3913 aiov.iov_base = uap->buf; 3914 aiov.iov_len = uap->count; 3915 auio.uio_iov = &aiov; 3916 auio.uio_iovcnt = 1; 3917 auio.uio_rw = UIO_READ; 3918 auio.uio_segflg = UIO_USERSPACE; 3919 auio.uio_td = td; 3920 auio.uio_resid = uap->count; 3921 vn_lock(vp, LK_SHARED | LK_RETRY); 3922 loff = auio.uio_offset = foffset; 3923 #ifdef MAC 3924 error = mac_vnode_check_readdir(td->td_ucred, vp); 3925 if (error != 0) { 3926 VOP_UNLOCK(vp, 0); 3927 foffset_unlock(fp, foffset, FOF_NOUPDATE); 3928 fdrop(fp, td); 3929 return (error); 3930 } 3931 #endif 3932 # if (BYTE_ORDER != LITTLE_ENDIAN) 3933 if (vp->v_mount->mnt_maxsymlinklen <= 0) { 3934 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, 3935 NULL, NULL); 3936 foffset = auio.uio_offset; 3937 } else 3938 # endif 3939 { 3940 kuio = auio; 3941 kuio.uio_iov = &kiov; 3942 kuio.uio_segflg = UIO_SYSSPACE; 3943 kiov.iov_len = uap->count; 3944 dirbuf = malloc(uap->count, M_TEMP, M_WAITOK); 3945 kiov.iov_base = dirbuf; 3946 error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag, 3947 NULL, NULL); 3948 foffset = kuio.uio_offset; 3949 if (error == 0) { 3950 readcnt = uap->count - kuio.uio_resid; 3951 edp = (struct dirent *)&dirbuf[readcnt]; 3952 for (dp = (struct dirent *)dirbuf; dp < edp; ) { 3953 # if (BYTE_ORDER == LITTLE_ENDIAN) 3954 /* 3955 * The expected low byte of 3956 * dp->d_namlen is our dp->d_type. 3957 * The high MBZ byte of dp->d_namlen 3958 * is our dp->d_namlen. 3959 */ 3960 dp->d_type = dp->d_namlen; 3961 dp->d_namlen = 0; 3962 # else 3963 /* 3964 * The dp->d_type is the high byte 3965 * of the expected dp->d_namlen, 3966 * so must be zero'ed. 3967 */ 3968 dp->d_type = 0; 3969 # endif 3970 if (dp->d_reclen > 0) { 3971 dp = (struct dirent *) 3972 ((char *)dp + dp->d_reclen); 3973 } else { 3974 error = EIO; 3975 break; 3976 } 3977 } 3978 if (dp >= edp) 3979 error = uiomove(dirbuf, readcnt, &auio); 3980 } 3981 free(dirbuf, M_TEMP); 3982 } 3983 if (error != 0) { 3984 VOP_UNLOCK(vp, 0); 3985 foffset_unlock(fp, foffset, 0); 3986 fdrop(fp, td); 3987 return (error); 3988 } 3989 if (uap->count == auio.uio_resid && 3990 (vp->v_vflag & VV_ROOT) && 3991 (vp->v_mount->mnt_flag & MNT_UNION)) { 3992 struct vnode *tvp = vp; 3993 vp = vp->v_mount->mnt_vnodecovered; 3994 VREF(vp); 3995 fp->f_vnode = vp; 3996 fp->f_data = vp; 3997 foffset = 0; 3998 vput(tvp); 3999 goto unionread; 4000 } 4001 VOP_UNLOCK(vp, 0); 4002 foffset_unlock(fp, foffset, 0); 4003 fdrop(fp, td); 4004 td->td_retval[0] = uap->count - auio.uio_resid; 4005 if (error == 0) 4006 *ploff = loff; 4007 return (error); 4008 } 4009 #endif /* COMPAT_43 */ 4010 4011 /* 4012 * Read a block of directory entries in a filesystem independent format. 4013 */ 4014 #ifndef _SYS_SYSPROTO_H_ 4015 struct getdirentries_args { 4016 int fd; 4017 char *buf; 4018 u_int count; 4019 long *basep; 4020 }; 4021 #endif 4022 int 4023 sys_getdirentries(td, uap) 4024 struct thread *td; 4025 register struct getdirentries_args /* { 4026 int fd; 4027 char *buf; 4028 u_int count; 4029 long *basep; 4030 } */ *uap; 4031 { 4032 long base; 4033 int error; 4034 4035 error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base, 4036 NULL, UIO_USERSPACE); 4037 if (error != 0) 4038 return (error); 4039 if (uap->basep != NULL) 4040 error = copyout(&base, uap->basep, sizeof(long)); 4041 return (error); 4042 } 4043 4044 int 4045 kern_getdirentries(struct thread *td, int fd, char *buf, u_int count, 4046 long *basep, ssize_t *residp, enum uio_seg bufseg) 4047 { 4048 struct vnode *vp; 4049 struct file *fp; 4050 struct uio auio; 4051 struct iovec aiov; 4052 cap_rights_t rights; 4053 long loff; 4054 int error, eofflag; 4055 off_t foffset; 4056 4057 AUDIT_ARG_FD(fd); 4058 if (count > IOSIZE_MAX) 4059 return (EINVAL); 4060 auio.uio_resid = count; 4061 error = getvnode(td->td_proc->p_fd, fd, 4062 cap_rights_init(&rights, CAP_READ), &fp); 4063 if (error != 0) 4064 return (error); 4065 if ((fp->f_flag & FREAD) == 0) { 4066 fdrop(fp, td); 4067 return (EBADF); 4068 } 4069 vp = fp->f_vnode; 4070 foffset = foffset_lock(fp, 0); 4071 unionread: 4072 if (vp->v_type != VDIR) { 4073 error = EINVAL; 4074 goto fail; 4075 } 4076 aiov.iov_base = buf; 4077 aiov.iov_len = count; 4078 auio.uio_iov = &aiov; 4079 auio.uio_iovcnt = 1; 4080 auio.uio_rw = UIO_READ; 4081 auio.uio_segflg = bufseg; 4082 auio.uio_td = td; 4083 vn_lock(vp, LK_SHARED | LK_RETRY); 4084 AUDIT_ARG_VNODE1(vp); 4085 loff = auio.uio_offset = foffset; 4086 #ifdef MAC 4087 error = mac_vnode_check_readdir(td->td_ucred, vp); 4088 if (error == 0) 4089 #endif 4090 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, 4091 NULL); 4092 foffset = auio.uio_offset; 4093 if (error != 0) { 4094 VOP_UNLOCK(vp, 0); 4095 goto fail; 4096 } 4097 if (count == auio.uio_resid && 4098 (vp->v_vflag & VV_ROOT) && 4099 (vp->v_mount->mnt_flag & MNT_UNION)) { 4100 struct vnode *tvp = vp; 4101 4102 vp = vp->v_mount->mnt_vnodecovered; 4103 VREF(vp); 4104 fp->f_vnode = vp; 4105 fp->f_data = vp; 4106 foffset = 0; 4107 vput(tvp); 4108 goto unionread; 4109 } 4110 VOP_UNLOCK(vp, 0); 4111 *basep = loff; 4112 if (residp != NULL) 4113 *residp = auio.uio_resid; 4114 td->td_retval[0] = count - auio.uio_resid; 4115 fail: 4116 foffset_unlock(fp, foffset, 0); 4117 fdrop(fp, td); 4118 return (error); 4119 } 4120 4121 #ifndef _SYS_SYSPROTO_H_ 4122 struct getdents_args { 4123 int fd; 4124 char *buf; 4125 size_t count; 4126 }; 4127 #endif 4128 int 4129 sys_getdents(td, uap) 4130 struct thread *td; 4131 register struct getdents_args /* { 4132 int fd; 4133 char *buf; 4134 u_int count; 4135 } */ *uap; 4136 { 4137 struct getdirentries_args ap; 4138 4139 ap.fd = uap->fd; 4140 ap.buf = uap->buf; 4141 ap.count = uap->count; 4142 ap.basep = NULL; 4143 return (sys_getdirentries(td, &ap)); 4144 } 4145 4146 /* 4147 * Set the mode mask for creation of filesystem nodes. 4148 */ 4149 #ifndef _SYS_SYSPROTO_H_ 4150 struct umask_args { 4151 int newmask; 4152 }; 4153 #endif 4154 int 4155 sys_umask(td, uap) 4156 struct thread *td; 4157 struct umask_args /* { 4158 int newmask; 4159 } */ *uap; 4160 { 4161 register struct filedesc *fdp; 4162 4163 FILEDESC_XLOCK(td->td_proc->p_fd); 4164 fdp = td->td_proc->p_fd; 4165 td->td_retval[0] = fdp->fd_cmask; 4166 fdp->fd_cmask = uap->newmask & ALLPERMS; 4167 FILEDESC_XUNLOCK(td->td_proc->p_fd); 4168 return (0); 4169 } 4170 4171 /* 4172 * Void all references to file by ripping underlying filesystem away from 4173 * vnode. 4174 */ 4175 #ifndef _SYS_SYSPROTO_H_ 4176 struct revoke_args { 4177 char *path; 4178 }; 4179 #endif 4180 int 4181 sys_revoke(td, uap) 4182 struct thread *td; 4183 register struct revoke_args /* { 4184 char *path; 4185 } */ *uap; 4186 { 4187 struct vnode *vp; 4188 struct vattr vattr; 4189 struct nameidata nd; 4190 int error; 4191 4192 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4193 uap->path, td); 4194 if ((error = namei(&nd)) != 0) 4195 return (error); 4196 vp = nd.ni_vp; 4197 NDFREE(&nd, NDF_ONLY_PNBUF); 4198 if (vp->v_type != VCHR || vp->v_rdev == NULL) { 4199 error = EINVAL; 4200 goto out; 4201 } 4202 #ifdef MAC 4203 error = mac_vnode_check_revoke(td->td_ucred, vp); 4204 if (error != 0) 4205 goto out; 4206 #endif 4207 error = VOP_GETATTR(vp, &vattr, td->td_ucred); 4208 if (error != 0) 4209 goto out; 4210 if (td->td_ucred->cr_uid != vattr.va_uid) { 4211 error = priv_check(td, PRIV_VFS_ADMIN); 4212 if (error != 0) 4213 goto out; 4214 } 4215 if (vcount(vp) > 1) 4216 VOP_REVOKE(vp, REVOKEALL); 4217 out: 4218 vput(vp); 4219 return (error); 4220 } 4221 4222 /* 4223 * Convert a user file descriptor to a kernel file entry and check that, if it 4224 * is a capability, the correct rights are present. A reference on the file 4225 * entry is held upon returning. 4226 */ 4227 int 4228 getvnode(struct filedesc *fdp, int fd, cap_rights_t *rightsp, struct file **fpp) 4229 { 4230 struct file *fp; 4231 int error; 4232 4233 error = fget_unlocked(fdp, fd, rightsp, &fp, NULL); 4234 if (error != 0) 4235 return (error); 4236 4237 /* 4238 * The file could be not of the vnode type, or it may be not 4239 * yet fully initialized, in which case the f_vnode pointer 4240 * may be set, but f_ops is still badfileops. E.g., 4241 * devfs_open() transiently create such situation to 4242 * facilitate csw d_fdopen(). 4243 * 4244 * Dupfdopen() handling in kern_openat() installs the 4245 * half-baked file into the process descriptor table, allowing 4246 * other thread to dereference it. Guard against the race by 4247 * checking f_ops. 4248 */ 4249 if (fp->f_vnode == NULL || fp->f_ops == &badfileops) { 4250 fdrop(fp, curthread); 4251 return (EINVAL); 4252 } 4253 *fpp = fp; 4254 return (0); 4255 } 4256 4257 4258 /* 4259 * Get an (NFS) file handle. 4260 */ 4261 #ifndef _SYS_SYSPROTO_H_ 4262 struct lgetfh_args { 4263 char *fname; 4264 fhandle_t *fhp; 4265 }; 4266 #endif 4267 int 4268 sys_lgetfh(td, uap) 4269 struct thread *td; 4270 register struct lgetfh_args *uap; 4271 { 4272 struct nameidata nd; 4273 fhandle_t fh; 4274 register struct vnode *vp; 4275 int error; 4276 4277 error = priv_check(td, PRIV_VFS_GETFH); 4278 if (error != 0) 4279 return (error); 4280 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4281 uap->fname, td); 4282 error = namei(&nd); 4283 if (error != 0) 4284 return (error); 4285 NDFREE(&nd, NDF_ONLY_PNBUF); 4286 vp = nd.ni_vp; 4287 bzero(&fh, sizeof(fh)); 4288 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4289 error = VOP_VPTOFH(vp, &fh.fh_fid); 4290 vput(vp); 4291 if (error == 0) 4292 error = copyout(&fh, uap->fhp, sizeof (fh)); 4293 return (error); 4294 } 4295 4296 #ifndef _SYS_SYSPROTO_H_ 4297 struct getfh_args { 4298 char *fname; 4299 fhandle_t *fhp; 4300 }; 4301 #endif 4302 int 4303 sys_getfh(td, uap) 4304 struct thread *td; 4305 register struct getfh_args *uap; 4306 { 4307 struct nameidata nd; 4308 fhandle_t fh; 4309 register struct vnode *vp; 4310 int error; 4311 4312 error = priv_check(td, PRIV_VFS_GETFH); 4313 if (error != 0) 4314 return (error); 4315 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4316 uap->fname, td); 4317 error = namei(&nd); 4318 if (error != 0) 4319 return (error); 4320 NDFREE(&nd, NDF_ONLY_PNBUF); 4321 vp = nd.ni_vp; 4322 bzero(&fh, sizeof(fh)); 4323 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4324 error = VOP_VPTOFH(vp, &fh.fh_fid); 4325 vput(vp); 4326 if (error == 0) 4327 error = copyout(&fh, uap->fhp, sizeof (fh)); 4328 return (error); 4329 } 4330 4331 /* 4332 * syscall for the rpc.lockd to use to translate a NFS file handle into an 4333 * open descriptor. 4334 * 4335 * warning: do not remove the priv_check() call or this becomes one giant 4336 * security hole. 4337 */ 4338 #ifndef _SYS_SYSPROTO_H_ 4339 struct fhopen_args { 4340 const struct fhandle *u_fhp; 4341 int flags; 4342 }; 4343 #endif 4344 int 4345 sys_fhopen(td, uap) 4346 struct thread *td; 4347 struct fhopen_args /* { 4348 const struct fhandle *u_fhp; 4349 int flags; 4350 } */ *uap; 4351 { 4352 struct mount *mp; 4353 struct vnode *vp; 4354 struct fhandle fhp; 4355 struct file *fp; 4356 int fmode, error; 4357 int indx; 4358 4359 error = priv_check(td, PRIV_VFS_FHOPEN); 4360 if (error != 0) 4361 return (error); 4362 indx = -1; 4363 fmode = FFLAGS(uap->flags); 4364 /* why not allow a non-read/write open for our lockd? */ 4365 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 4366 return (EINVAL); 4367 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 4368 if (error != 0) 4369 return(error); 4370 /* find the mount point */ 4371 mp = vfs_busyfs(&fhp.fh_fsid); 4372 if (mp == NULL) 4373 return (ESTALE); 4374 /* now give me my vnode, it gets returned to me locked */ 4375 error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp); 4376 vfs_unbusy(mp); 4377 if (error != 0) 4378 return (error); 4379 4380 error = falloc_noinstall(td, &fp); 4381 if (error != 0) { 4382 vput(vp); 4383 return (error); 4384 } 4385 /* 4386 * An extra reference on `fp' has been held for us by 4387 * falloc_noinstall(). 4388 */ 4389 4390 #ifdef INVARIANTS 4391 td->td_dupfd = -1; 4392 #endif 4393 error = vn_open_vnode(vp, fmode, td->td_ucred, td, fp); 4394 if (error != 0) { 4395 KASSERT(fp->f_ops == &badfileops, 4396 ("VOP_OPEN in fhopen() set f_ops")); 4397 KASSERT(td->td_dupfd < 0, 4398 ("fhopen() encountered fdopen()")); 4399 4400 vput(vp); 4401 goto bad; 4402 } 4403 #ifdef INVARIANTS 4404 td->td_dupfd = 0; 4405 #endif 4406 fp->f_vnode = vp; 4407 fp->f_seqcount = 1; 4408 finit(fp, (fmode & FMASK) | (fp->f_flag & FHASLOCK), DTYPE_VNODE, vp, 4409 &vnops); 4410 VOP_UNLOCK(vp, 0); 4411 if ((fmode & O_TRUNC) != 0) { 4412 error = fo_truncate(fp, 0, td->td_ucred, td); 4413 if (error != 0) 4414 goto bad; 4415 } 4416 4417 error = finstall(td, fp, &indx, fmode, NULL); 4418 bad: 4419 fdrop(fp, td); 4420 td->td_retval[0] = indx; 4421 return (error); 4422 } 4423 4424 /* 4425 * Stat an (NFS) file handle. 4426 */ 4427 #ifndef _SYS_SYSPROTO_H_ 4428 struct fhstat_args { 4429 struct fhandle *u_fhp; 4430 struct stat *sb; 4431 }; 4432 #endif 4433 int 4434 sys_fhstat(td, uap) 4435 struct thread *td; 4436 register struct fhstat_args /* { 4437 struct fhandle *u_fhp; 4438 struct stat *sb; 4439 } */ *uap; 4440 { 4441 struct stat sb; 4442 struct fhandle fh; 4443 int error; 4444 4445 error = copyin(uap->u_fhp, &fh, sizeof(fh)); 4446 if (error != 0) 4447 return (error); 4448 error = kern_fhstat(td, fh, &sb); 4449 if (error == 0) 4450 error = copyout(&sb, uap->sb, sizeof(sb)); 4451 return (error); 4452 } 4453 4454 int 4455 kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb) 4456 { 4457 struct mount *mp; 4458 struct vnode *vp; 4459 int error; 4460 4461 error = priv_check(td, PRIV_VFS_FHSTAT); 4462 if (error != 0) 4463 return (error); 4464 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4465 return (ESTALE); 4466 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4467 vfs_unbusy(mp); 4468 if (error != 0) 4469 return (error); 4470 error = vn_stat(vp, sb, td->td_ucred, NOCRED, td); 4471 vput(vp); 4472 return (error); 4473 } 4474 4475 /* 4476 * Implement fstatfs() for (NFS) file handles. 4477 */ 4478 #ifndef _SYS_SYSPROTO_H_ 4479 struct fhstatfs_args { 4480 struct fhandle *u_fhp; 4481 struct statfs *buf; 4482 }; 4483 #endif 4484 int 4485 sys_fhstatfs(td, uap) 4486 struct thread *td; 4487 struct fhstatfs_args /* { 4488 struct fhandle *u_fhp; 4489 struct statfs *buf; 4490 } */ *uap; 4491 { 4492 struct statfs sf; 4493 fhandle_t fh; 4494 int error; 4495 4496 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4497 if (error != 0) 4498 return (error); 4499 error = kern_fhstatfs(td, fh, &sf); 4500 if (error != 0) 4501 return (error); 4502 return (copyout(&sf, uap->buf, sizeof(sf))); 4503 } 4504 4505 int 4506 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf) 4507 { 4508 struct statfs *sp; 4509 struct mount *mp; 4510 struct vnode *vp; 4511 int error; 4512 4513 error = priv_check(td, PRIV_VFS_FHSTATFS); 4514 if (error != 0) 4515 return (error); 4516 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4517 return (ESTALE); 4518 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4519 if (error != 0) { 4520 vfs_unbusy(mp); 4521 return (error); 4522 } 4523 vput(vp); 4524 error = prison_canseemount(td->td_ucred, mp); 4525 if (error != 0) 4526 goto out; 4527 #ifdef MAC 4528 error = mac_mount_check_stat(td->td_ucred, mp); 4529 if (error != 0) 4530 goto out; 4531 #endif 4532 /* 4533 * Set these in case the underlying filesystem fails to do so. 4534 */ 4535 sp = &mp->mnt_stat; 4536 sp->f_version = STATFS_VERSION; 4537 sp->f_namemax = NAME_MAX; 4538 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 4539 error = VFS_STATFS(mp, sp); 4540 if (error == 0) 4541 *buf = *sp; 4542 out: 4543 vfs_unbusy(mp); 4544 return (error); 4545 } 4546 4547 int 4548 kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len) 4549 { 4550 struct file *fp; 4551 struct mount *mp; 4552 struct vnode *vp; 4553 cap_rights_t rights; 4554 off_t olen, ooffset; 4555 int error; 4556 4557 if (offset < 0 || len <= 0) 4558 return (EINVAL); 4559 /* Check for wrap. */ 4560 if (offset > OFF_MAX - len) 4561 return (EFBIG); 4562 error = fget(td, fd, cap_rights_init(&rights, CAP_WRITE), &fp); 4563 if (error != 0) 4564 return (error); 4565 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { 4566 error = ESPIPE; 4567 goto out; 4568 } 4569 if ((fp->f_flag & FWRITE) == 0) { 4570 error = EBADF; 4571 goto out; 4572 } 4573 if (fp->f_type != DTYPE_VNODE) { 4574 error = ENODEV; 4575 goto out; 4576 } 4577 vp = fp->f_vnode; 4578 if (vp->v_type != VREG) { 4579 error = ENODEV; 4580 goto out; 4581 } 4582 4583 /* Allocating blocks may take a long time, so iterate. */ 4584 for (;;) { 4585 olen = len; 4586 ooffset = offset; 4587 4588 bwillwrite(); 4589 mp = NULL; 4590 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 4591 if (error != 0) 4592 break; 4593 error = vn_lock(vp, LK_EXCLUSIVE); 4594 if (error != 0) { 4595 vn_finished_write(mp); 4596 break; 4597 } 4598 #ifdef MAC 4599 error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp); 4600 if (error == 0) 4601 #endif 4602 error = VOP_ALLOCATE(vp, &offset, &len); 4603 VOP_UNLOCK(vp, 0); 4604 vn_finished_write(mp); 4605 4606 if (olen + ooffset != offset + len) { 4607 panic("offset + len changed from %jx/%jx to %jx/%jx", 4608 ooffset, olen, offset, len); 4609 } 4610 if (error != 0 || len == 0) 4611 break; 4612 KASSERT(olen > len, ("Iteration did not make progress?")); 4613 maybe_yield(); 4614 } 4615 out: 4616 fdrop(fp, td); 4617 return (error); 4618 } 4619 4620 int 4621 sys_posix_fallocate(struct thread *td, struct posix_fallocate_args *uap) 4622 { 4623 4624 td->td_retval[0] = kern_posix_fallocate(td, uap->fd, uap->offset, 4625 uap->len); 4626 return (0); 4627 } 4628 4629 /* 4630 * Unlike madvise(2), we do not make a best effort to remember every 4631 * possible caching hint. Instead, we remember the last setting with 4632 * the exception that we will allow POSIX_FADV_NORMAL to adjust the 4633 * region of any current setting. 4634 */ 4635 int 4636 kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len, 4637 int advice) 4638 { 4639 struct fadvise_info *fa, *new; 4640 struct file *fp; 4641 struct vnode *vp; 4642 cap_rights_t rights; 4643 off_t end; 4644 int error; 4645 4646 if (offset < 0 || len < 0 || offset > OFF_MAX - len) 4647 return (EINVAL); 4648 switch (advice) { 4649 case POSIX_FADV_SEQUENTIAL: 4650 case POSIX_FADV_RANDOM: 4651 case POSIX_FADV_NOREUSE: 4652 new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK); 4653 break; 4654 case POSIX_FADV_NORMAL: 4655 case POSIX_FADV_WILLNEED: 4656 case POSIX_FADV_DONTNEED: 4657 new = NULL; 4658 break; 4659 default: 4660 return (EINVAL); 4661 } 4662 /* XXX: CAP_POSIX_FADVISE? */ 4663 error = fget(td, fd, cap_rights_init(&rights), &fp); 4664 if (error != 0) 4665 goto out; 4666 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { 4667 error = ESPIPE; 4668 goto out; 4669 } 4670 if (fp->f_type != DTYPE_VNODE) { 4671 error = ENODEV; 4672 goto out; 4673 } 4674 vp = fp->f_vnode; 4675 if (vp->v_type != VREG) { 4676 error = ENODEV; 4677 goto out; 4678 } 4679 if (len == 0) 4680 end = OFF_MAX; 4681 else 4682 end = offset + len - 1; 4683 switch (advice) { 4684 case POSIX_FADV_SEQUENTIAL: 4685 case POSIX_FADV_RANDOM: 4686 case POSIX_FADV_NOREUSE: 4687 /* 4688 * Try to merge any existing non-standard region with 4689 * this new region if possible, otherwise create a new 4690 * non-standard region for this request. 4691 */ 4692 mtx_pool_lock(mtxpool_sleep, fp); 4693 fa = fp->f_advice; 4694 if (fa != NULL && fa->fa_advice == advice && 4695 ((fa->fa_start <= end && fa->fa_end >= offset) || 4696 (end != OFF_MAX && fa->fa_start == end + 1) || 4697 (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) { 4698 if (offset < fa->fa_start) 4699 fa->fa_start = offset; 4700 if (end > fa->fa_end) 4701 fa->fa_end = end; 4702 } else { 4703 new->fa_advice = advice; 4704 new->fa_start = offset; 4705 new->fa_end = end; 4706 new->fa_prevstart = 0; 4707 new->fa_prevend = 0; 4708 fp->f_advice = new; 4709 new = fa; 4710 } 4711 mtx_pool_unlock(mtxpool_sleep, fp); 4712 break; 4713 case POSIX_FADV_NORMAL: 4714 /* 4715 * If a the "normal" region overlaps with an existing 4716 * non-standard region, trim or remove the 4717 * non-standard region. 4718 */ 4719 mtx_pool_lock(mtxpool_sleep, fp); 4720 fa = fp->f_advice; 4721 if (fa != NULL) { 4722 if (offset <= fa->fa_start && end >= fa->fa_end) { 4723 new = fa; 4724 fp->f_advice = NULL; 4725 } else if (offset <= fa->fa_start && 4726 end >= fa->fa_start) 4727 fa->fa_start = end + 1; 4728 else if (offset <= fa->fa_end && end >= fa->fa_end) 4729 fa->fa_end = offset - 1; 4730 else if (offset >= fa->fa_start && end <= fa->fa_end) { 4731 /* 4732 * If the "normal" region is a middle 4733 * portion of the existing 4734 * non-standard region, just remove 4735 * the whole thing rather than picking 4736 * one side or the other to 4737 * preserve. 4738 */ 4739 new = fa; 4740 fp->f_advice = NULL; 4741 } 4742 } 4743 mtx_pool_unlock(mtxpool_sleep, fp); 4744 break; 4745 case POSIX_FADV_WILLNEED: 4746 case POSIX_FADV_DONTNEED: 4747 error = VOP_ADVISE(vp, offset, end, advice); 4748 break; 4749 } 4750 out: 4751 if (fp != NULL) 4752 fdrop(fp, td); 4753 free(new, M_FADVISE); 4754 return (error); 4755 } 4756 4757 int 4758 sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap) 4759 { 4760 4761 td->td_retval[0] = kern_posix_fadvise(td, uap->fd, uap->offset, 4762 uap->len, uap->advice); 4763 return (0); 4764 } 4765