1 /*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include "opt_capsicum.h" 41 #include "opt_compat.h" 42 #include "opt_ktrace.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/bio.h> 47 #include <sys/buf.h> 48 #include <sys/capsicum.h> 49 #include <sys/disk.h> 50 #include <sys/sysent.h> 51 #include <sys/malloc.h> 52 #include <sys/mount.h> 53 #include <sys/mutex.h> 54 #include <sys/sysproto.h> 55 #include <sys/namei.h> 56 #include <sys/filedesc.h> 57 #include <sys/kernel.h> 58 #include <sys/fcntl.h> 59 #include <sys/file.h> 60 #include <sys/filio.h> 61 #include <sys/limits.h> 62 #include <sys/linker.h> 63 #include <sys/rwlock.h> 64 #include <sys/sdt.h> 65 #include <sys/stat.h> 66 #include <sys/sx.h> 67 #include <sys/unistd.h> 68 #include <sys/vnode.h> 69 #include <sys/priv.h> 70 #include <sys/proc.h> 71 #include <sys/dirent.h> 72 #include <sys/jail.h> 73 #include <sys/syscallsubr.h> 74 #include <sys/sysctl.h> 75 #ifdef KTRACE 76 #include <sys/ktrace.h> 77 #endif 78 79 #include <machine/stdarg.h> 80 81 #include <security/audit/audit.h> 82 #include <security/mac/mac_framework.h> 83 84 #include <vm/vm.h> 85 #include <vm/vm_object.h> 86 #include <vm/vm_page.h> 87 #include <vm/uma.h> 88 89 #include <ufs/ufs/quota.h> 90 91 MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information"); 92 93 SDT_PROVIDER_DEFINE(vfs); 94 SDT_PROBE_DEFINE2(vfs, , stat, mode, "char *", "int"); 95 SDT_PROBE_DEFINE2(vfs, , stat, reg, "char *", "int"); 96 97 static int kern_chflagsat(struct thread *td, int fd, const char *path, 98 enum uio_seg pathseg, u_long flags, int atflag); 99 static int setfflags(struct thread *td, struct vnode *, u_long); 100 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *); 101 static int getutimens(const struct timespec *, enum uio_seg, 102 struct timespec *, int *); 103 static int setutimes(struct thread *td, struct vnode *, 104 const struct timespec *, int, int); 105 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 106 struct thread *td); 107 108 /* 109 * The module initialization routine for POSIX asynchronous I/O will 110 * set this to the version of AIO that it implements. (Zero means 111 * that it is not implemented.) This value is used here by pathconf() 112 * and in kern_descrip.c by fpathconf(). 113 */ 114 int async_io_version; 115 116 /* 117 * Sync each mounted filesystem. 118 */ 119 #ifndef _SYS_SYSPROTO_H_ 120 struct sync_args { 121 int dummy; 122 }; 123 #endif 124 /* ARGSUSED */ 125 int 126 sys_sync(td, uap) 127 struct thread *td; 128 struct sync_args *uap; 129 { 130 struct mount *mp, *nmp; 131 int save; 132 133 mtx_lock(&mountlist_mtx); 134 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 135 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 136 nmp = TAILQ_NEXT(mp, mnt_list); 137 continue; 138 } 139 if ((mp->mnt_flag & MNT_RDONLY) == 0 && 140 vn_start_write(NULL, &mp, V_NOWAIT) == 0) { 141 save = curthread_pflags_set(TDP_SYNCIO); 142 vfs_msync(mp, MNT_NOWAIT); 143 VFS_SYNC(mp, MNT_NOWAIT); 144 curthread_pflags_restore(save); 145 vn_finished_write(mp); 146 } 147 mtx_lock(&mountlist_mtx); 148 nmp = TAILQ_NEXT(mp, mnt_list); 149 vfs_unbusy(mp); 150 } 151 mtx_unlock(&mountlist_mtx); 152 return (0); 153 } 154 155 /* 156 * Change filesystem quotas. 157 */ 158 #ifndef _SYS_SYSPROTO_H_ 159 struct quotactl_args { 160 char *path; 161 int cmd; 162 int uid; 163 caddr_t arg; 164 }; 165 #endif 166 int 167 sys_quotactl(td, uap) 168 struct thread *td; 169 register struct quotactl_args /* { 170 char *path; 171 int cmd; 172 int uid; 173 caddr_t arg; 174 } */ *uap; 175 { 176 struct mount *mp; 177 struct nameidata nd; 178 int error; 179 180 AUDIT_ARG_CMD(uap->cmd); 181 AUDIT_ARG_UID(uap->uid); 182 if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS)) 183 return (EPERM); 184 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 185 uap->path, td); 186 if ((error = namei(&nd)) != 0) 187 return (error); 188 NDFREE(&nd, NDF_ONLY_PNBUF); 189 mp = nd.ni_vp->v_mount; 190 vfs_ref(mp); 191 vput(nd.ni_vp); 192 error = vfs_busy(mp, 0); 193 vfs_rel(mp); 194 if (error != 0) 195 return (error); 196 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg); 197 198 /* 199 * Since quota on operation typically needs to open quota 200 * file, the Q_QUOTAON handler needs to unbusy the mount point 201 * before calling into namei. Otherwise, unmount might be 202 * started between two vfs_busy() invocations (first is our, 203 * second is from mount point cross-walk code in lookup()), 204 * causing deadlock. 205 * 206 * Require that Q_QUOTAON handles the vfs_busy() reference on 207 * its own, always returning with ubusied mount point. 208 */ 209 if ((uap->cmd >> SUBCMDSHIFT) != Q_QUOTAON) 210 vfs_unbusy(mp); 211 return (error); 212 } 213 214 /* 215 * Used by statfs conversion routines to scale the block size up if 216 * necessary so that all of the block counts are <= 'max_size'. Note 217 * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero 218 * value of 'n'. 219 */ 220 void 221 statfs_scale_blocks(struct statfs *sf, long max_size) 222 { 223 uint64_t count; 224 int shift; 225 226 KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__)); 227 228 /* 229 * Attempt to scale the block counts to give a more accurate 230 * overview to userland of the ratio of free space to used 231 * space. To do this, find the largest block count and compute 232 * a divisor that lets it fit into a signed integer <= max_size. 233 */ 234 if (sf->f_bavail < 0) 235 count = -sf->f_bavail; 236 else 237 count = sf->f_bavail; 238 count = MAX(sf->f_blocks, MAX(sf->f_bfree, count)); 239 if (count <= max_size) 240 return; 241 242 count >>= flsl(max_size); 243 shift = 0; 244 while (count > 0) { 245 shift++; 246 count >>=1; 247 } 248 249 sf->f_bsize <<= shift; 250 sf->f_blocks >>= shift; 251 sf->f_bfree >>= shift; 252 sf->f_bavail >>= shift; 253 } 254 255 /* 256 * Get filesystem statistics. 257 */ 258 #ifndef _SYS_SYSPROTO_H_ 259 struct statfs_args { 260 char *path; 261 struct statfs *buf; 262 }; 263 #endif 264 int 265 sys_statfs(td, uap) 266 struct thread *td; 267 register struct statfs_args /* { 268 char *path; 269 struct statfs *buf; 270 } */ *uap; 271 { 272 struct statfs sf; 273 int error; 274 275 error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf); 276 if (error == 0) 277 error = copyout(&sf, uap->buf, sizeof(sf)); 278 return (error); 279 } 280 281 int 282 kern_statfs(struct thread *td, char *path, enum uio_seg pathseg, 283 struct statfs *buf) 284 { 285 struct mount *mp; 286 struct statfs *sp, sb; 287 struct nameidata nd; 288 int error; 289 290 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 291 pathseg, path, td); 292 error = namei(&nd); 293 if (error != 0) 294 return (error); 295 mp = nd.ni_vp->v_mount; 296 vfs_ref(mp); 297 NDFREE(&nd, NDF_ONLY_PNBUF); 298 vput(nd.ni_vp); 299 error = vfs_busy(mp, 0); 300 vfs_rel(mp); 301 if (error != 0) 302 return (error); 303 #ifdef MAC 304 error = mac_mount_check_stat(td->td_ucred, mp); 305 if (error != 0) 306 goto out; 307 #endif 308 /* 309 * Set these in case the underlying filesystem fails to do so. 310 */ 311 sp = &mp->mnt_stat; 312 sp->f_version = STATFS_VERSION; 313 sp->f_namemax = NAME_MAX; 314 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 315 error = VFS_STATFS(mp, sp); 316 if (error != 0) 317 goto out; 318 if (priv_check(td, PRIV_VFS_GENERATION)) { 319 bcopy(sp, &sb, sizeof(sb)); 320 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 321 prison_enforce_statfs(td->td_ucred, mp, &sb); 322 sp = &sb; 323 } 324 *buf = *sp; 325 out: 326 vfs_unbusy(mp); 327 return (error); 328 } 329 330 /* 331 * Get filesystem statistics. 332 */ 333 #ifndef _SYS_SYSPROTO_H_ 334 struct fstatfs_args { 335 int fd; 336 struct statfs *buf; 337 }; 338 #endif 339 int 340 sys_fstatfs(td, uap) 341 struct thread *td; 342 register struct fstatfs_args /* { 343 int fd; 344 struct statfs *buf; 345 } */ *uap; 346 { 347 struct statfs sf; 348 int error; 349 350 error = kern_fstatfs(td, uap->fd, &sf); 351 if (error == 0) 352 error = copyout(&sf, uap->buf, sizeof(sf)); 353 return (error); 354 } 355 356 int 357 kern_fstatfs(struct thread *td, int fd, struct statfs *buf) 358 { 359 struct file *fp; 360 struct mount *mp; 361 struct statfs *sp, sb; 362 struct vnode *vp; 363 cap_rights_t rights; 364 int error; 365 366 AUDIT_ARG_FD(fd); 367 error = getvnode(td, fd, cap_rights_init(&rights, CAP_FSTATFS), &fp); 368 if (error != 0) 369 return (error); 370 vp = fp->f_vnode; 371 vn_lock(vp, LK_SHARED | LK_RETRY); 372 #ifdef AUDIT 373 AUDIT_ARG_VNODE1(vp); 374 #endif 375 mp = vp->v_mount; 376 if (mp) 377 vfs_ref(mp); 378 VOP_UNLOCK(vp, 0); 379 fdrop(fp, td); 380 if (mp == NULL) { 381 error = EBADF; 382 goto out; 383 } 384 error = vfs_busy(mp, 0); 385 vfs_rel(mp); 386 if (error != 0) 387 return (error); 388 #ifdef MAC 389 error = mac_mount_check_stat(td->td_ucred, mp); 390 if (error != 0) 391 goto out; 392 #endif 393 /* 394 * Set these in case the underlying filesystem fails to do so. 395 */ 396 sp = &mp->mnt_stat; 397 sp->f_version = STATFS_VERSION; 398 sp->f_namemax = NAME_MAX; 399 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 400 error = VFS_STATFS(mp, sp); 401 if (error != 0) 402 goto out; 403 if (priv_check(td, PRIV_VFS_GENERATION)) { 404 bcopy(sp, &sb, sizeof(sb)); 405 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 406 prison_enforce_statfs(td->td_ucred, mp, &sb); 407 sp = &sb; 408 } 409 *buf = *sp; 410 out: 411 if (mp) 412 vfs_unbusy(mp); 413 return (error); 414 } 415 416 /* 417 * Get statistics on all filesystems. 418 */ 419 #ifndef _SYS_SYSPROTO_H_ 420 struct getfsstat_args { 421 struct statfs *buf; 422 long bufsize; 423 int flags; 424 }; 425 #endif 426 int 427 sys_getfsstat(td, uap) 428 struct thread *td; 429 register struct getfsstat_args /* { 430 struct statfs *buf; 431 long bufsize; 432 int flags; 433 } */ *uap; 434 { 435 size_t count; 436 int error; 437 438 error = kern_getfsstat(td, &uap->buf, uap->bufsize, &count, 439 UIO_USERSPACE, uap->flags); 440 if (error == 0) 441 td->td_retval[0] = count; 442 return (error); 443 } 444 445 /* 446 * If (bufsize > 0 && bufseg == UIO_SYSSPACE) 447 * The caller is responsible for freeing memory which will be allocated 448 * in '*buf'. 449 */ 450 int 451 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize, 452 size_t *countp, enum uio_seg bufseg, int flags) 453 { 454 struct mount *mp, *nmp; 455 struct statfs *sfsp, *sp, sb; 456 size_t count, maxcount; 457 int error; 458 459 maxcount = bufsize / sizeof(struct statfs); 460 if (bufsize == 0) 461 sfsp = NULL; 462 else if (bufseg == UIO_USERSPACE) 463 sfsp = *buf; 464 else /* if (bufseg == UIO_SYSSPACE) */ { 465 count = 0; 466 mtx_lock(&mountlist_mtx); 467 TAILQ_FOREACH(mp, &mountlist, mnt_list) { 468 count++; 469 } 470 mtx_unlock(&mountlist_mtx); 471 if (maxcount > count) 472 maxcount = count; 473 sfsp = *buf = malloc(maxcount * sizeof(struct statfs), M_TEMP, 474 M_WAITOK); 475 } 476 count = 0; 477 mtx_lock(&mountlist_mtx); 478 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 479 if (prison_canseemount(td->td_ucred, mp) != 0) { 480 nmp = TAILQ_NEXT(mp, mnt_list); 481 continue; 482 } 483 #ifdef MAC 484 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 485 nmp = TAILQ_NEXT(mp, mnt_list); 486 continue; 487 } 488 #endif 489 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 490 nmp = TAILQ_NEXT(mp, mnt_list); 491 continue; 492 } 493 if (sfsp && count < maxcount) { 494 sp = &mp->mnt_stat; 495 /* 496 * Set these in case the underlying filesystem 497 * fails to do so. 498 */ 499 sp->f_version = STATFS_VERSION; 500 sp->f_namemax = NAME_MAX; 501 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 502 /* 503 * If MNT_NOWAIT or MNT_LAZY is specified, do not 504 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 505 * overrides MNT_WAIT. 506 */ 507 if (((flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 508 (flags & MNT_WAIT)) && 509 (error = VFS_STATFS(mp, sp))) { 510 mtx_lock(&mountlist_mtx); 511 nmp = TAILQ_NEXT(mp, mnt_list); 512 vfs_unbusy(mp); 513 continue; 514 } 515 if (priv_check(td, PRIV_VFS_GENERATION)) { 516 bcopy(sp, &sb, sizeof(sb)); 517 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 518 prison_enforce_statfs(td->td_ucred, mp, &sb); 519 sp = &sb; 520 } 521 if (bufseg == UIO_SYSSPACE) 522 bcopy(sp, sfsp, sizeof(*sp)); 523 else /* if (bufseg == UIO_USERSPACE) */ { 524 error = copyout(sp, sfsp, sizeof(*sp)); 525 if (error != 0) { 526 vfs_unbusy(mp); 527 return (error); 528 } 529 } 530 sfsp++; 531 } 532 count++; 533 mtx_lock(&mountlist_mtx); 534 nmp = TAILQ_NEXT(mp, mnt_list); 535 vfs_unbusy(mp); 536 } 537 mtx_unlock(&mountlist_mtx); 538 if (sfsp && count > maxcount) 539 *countp = maxcount; 540 else 541 *countp = count; 542 return (0); 543 } 544 545 #ifdef COMPAT_FREEBSD4 546 /* 547 * Get old format filesystem statistics. 548 */ 549 static void cvtstatfs(struct statfs *, struct ostatfs *); 550 551 #ifndef _SYS_SYSPROTO_H_ 552 struct freebsd4_statfs_args { 553 char *path; 554 struct ostatfs *buf; 555 }; 556 #endif 557 int 558 freebsd4_statfs(td, uap) 559 struct thread *td; 560 struct freebsd4_statfs_args /* { 561 char *path; 562 struct ostatfs *buf; 563 } */ *uap; 564 { 565 struct ostatfs osb; 566 struct statfs sf; 567 int error; 568 569 error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf); 570 if (error != 0) 571 return (error); 572 cvtstatfs(&sf, &osb); 573 return (copyout(&osb, uap->buf, sizeof(osb))); 574 } 575 576 /* 577 * Get filesystem statistics. 578 */ 579 #ifndef _SYS_SYSPROTO_H_ 580 struct freebsd4_fstatfs_args { 581 int fd; 582 struct ostatfs *buf; 583 }; 584 #endif 585 int 586 freebsd4_fstatfs(td, uap) 587 struct thread *td; 588 struct freebsd4_fstatfs_args /* { 589 int fd; 590 struct ostatfs *buf; 591 } */ *uap; 592 { 593 struct ostatfs osb; 594 struct statfs sf; 595 int error; 596 597 error = kern_fstatfs(td, uap->fd, &sf); 598 if (error != 0) 599 return (error); 600 cvtstatfs(&sf, &osb); 601 return (copyout(&osb, uap->buf, sizeof(osb))); 602 } 603 604 /* 605 * Get statistics on all filesystems. 606 */ 607 #ifndef _SYS_SYSPROTO_H_ 608 struct freebsd4_getfsstat_args { 609 struct ostatfs *buf; 610 long bufsize; 611 int flags; 612 }; 613 #endif 614 int 615 freebsd4_getfsstat(td, uap) 616 struct thread *td; 617 register struct freebsd4_getfsstat_args /* { 618 struct ostatfs *buf; 619 long bufsize; 620 int flags; 621 } */ *uap; 622 { 623 struct statfs *buf, *sp; 624 struct ostatfs osb; 625 size_t count, size; 626 int error; 627 628 count = uap->bufsize / sizeof(struct ostatfs); 629 size = count * sizeof(struct statfs); 630 error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, 631 uap->flags); 632 if (size > 0) { 633 sp = buf; 634 while (count > 0 && error == 0) { 635 cvtstatfs(sp, &osb); 636 error = copyout(&osb, uap->buf, sizeof(osb)); 637 sp++; 638 uap->buf++; 639 count--; 640 } 641 free(buf, M_TEMP); 642 } 643 if (error == 0) 644 td->td_retval[0] = count; 645 return (error); 646 } 647 648 /* 649 * Implement fstatfs() for (NFS) file handles. 650 */ 651 #ifndef _SYS_SYSPROTO_H_ 652 struct freebsd4_fhstatfs_args { 653 struct fhandle *u_fhp; 654 struct ostatfs *buf; 655 }; 656 #endif 657 int 658 freebsd4_fhstatfs(td, uap) 659 struct thread *td; 660 struct freebsd4_fhstatfs_args /* { 661 struct fhandle *u_fhp; 662 struct ostatfs *buf; 663 } */ *uap; 664 { 665 struct ostatfs osb; 666 struct statfs sf; 667 fhandle_t fh; 668 int error; 669 670 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 671 if (error != 0) 672 return (error); 673 error = kern_fhstatfs(td, fh, &sf); 674 if (error != 0) 675 return (error); 676 cvtstatfs(&sf, &osb); 677 return (copyout(&osb, uap->buf, sizeof(osb))); 678 } 679 680 /* 681 * Convert a new format statfs structure to an old format statfs structure. 682 */ 683 static void 684 cvtstatfs(nsp, osp) 685 struct statfs *nsp; 686 struct ostatfs *osp; 687 { 688 689 statfs_scale_blocks(nsp, LONG_MAX); 690 bzero(osp, sizeof(*osp)); 691 osp->f_bsize = nsp->f_bsize; 692 osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX); 693 osp->f_blocks = nsp->f_blocks; 694 osp->f_bfree = nsp->f_bfree; 695 osp->f_bavail = nsp->f_bavail; 696 osp->f_files = MIN(nsp->f_files, LONG_MAX); 697 osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX); 698 osp->f_owner = nsp->f_owner; 699 osp->f_type = nsp->f_type; 700 osp->f_flags = nsp->f_flags; 701 osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX); 702 osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX); 703 osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX); 704 osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX); 705 strlcpy(osp->f_fstypename, nsp->f_fstypename, 706 MIN(MFSNAMELEN, OMFSNAMELEN)); 707 strlcpy(osp->f_mntonname, nsp->f_mntonname, 708 MIN(MNAMELEN, OMNAMELEN)); 709 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 710 MIN(MNAMELEN, OMNAMELEN)); 711 osp->f_fsid = nsp->f_fsid; 712 } 713 #endif /* COMPAT_FREEBSD4 */ 714 715 /* 716 * Change current working directory to a given file descriptor. 717 */ 718 #ifndef _SYS_SYSPROTO_H_ 719 struct fchdir_args { 720 int fd; 721 }; 722 #endif 723 int 724 sys_fchdir(td, uap) 725 struct thread *td; 726 struct fchdir_args /* { 727 int fd; 728 } */ *uap; 729 { 730 struct vnode *vp, *tdp; 731 struct mount *mp; 732 struct file *fp; 733 cap_rights_t rights; 734 int error; 735 736 AUDIT_ARG_FD(uap->fd); 737 error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_FCHDIR), 738 &fp); 739 if (error != 0) 740 return (error); 741 vp = fp->f_vnode; 742 VREF(vp); 743 fdrop(fp, td); 744 vn_lock(vp, LK_SHARED | LK_RETRY); 745 AUDIT_ARG_VNODE1(vp); 746 error = change_dir(vp, td); 747 while (!error && (mp = vp->v_mountedhere) != NULL) { 748 if (vfs_busy(mp, 0)) 749 continue; 750 error = VFS_ROOT(mp, LK_SHARED, &tdp); 751 vfs_unbusy(mp); 752 if (error != 0) 753 break; 754 vput(vp); 755 vp = tdp; 756 } 757 if (error != 0) { 758 vput(vp); 759 return (error); 760 } 761 VOP_UNLOCK(vp, 0); 762 pwd_chdir(td, vp); 763 return (0); 764 } 765 766 /* 767 * Change current working directory (``.''). 768 */ 769 #ifndef _SYS_SYSPROTO_H_ 770 struct chdir_args { 771 char *path; 772 }; 773 #endif 774 int 775 sys_chdir(td, uap) 776 struct thread *td; 777 struct chdir_args /* { 778 char *path; 779 } */ *uap; 780 { 781 782 return (kern_chdir(td, uap->path, UIO_USERSPACE)); 783 } 784 785 int 786 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg) 787 { 788 struct nameidata nd; 789 int error; 790 791 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 792 pathseg, path, td); 793 if ((error = namei(&nd)) != 0) 794 return (error); 795 if ((error = change_dir(nd.ni_vp, td)) != 0) { 796 vput(nd.ni_vp); 797 NDFREE(&nd, NDF_ONLY_PNBUF); 798 return (error); 799 } 800 VOP_UNLOCK(nd.ni_vp, 0); 801 NDFREE(&nd, NDF_ONLY_PNBUF); 802 pwd_chdir(td, nd.ni_vp); 803 return (0); 804 } 805 806 /* 807 * Change notion of root (``/'') directory. 808 */ 809 #ifndef _SYS_SYSPROTO_H_ 810 struct chroot_args { 811 char *path; 812 }; 813 #endif 814 int 815 sys_chroot(td, uap) 816 struct thread *td; 817 struct chroot_args /* { 818 char *path; 819 } */ *uap; 820 { 821 struct nameidata nd; 822 int error; 823 824 error = priv_check(td, PRIV_VFS_CHROOT); 825 if (error != 0) 826 return (error); 827 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 828 UIO_USERSPACE, uap->path, td); 829 error = namei(&nd); 830 if (error != 0) 831 goto error; 832 error = change_dir(nd.ni_vp, td); 833 if (error != 0) 834 goto e_vunlock; 835 #ifdef MAC 836 error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp); 837 if (error != 0) 838 goto e_vunlock; 839 #endif 840 VOP_UNLOCK(nd.ni_vp, 0); 841 error = pwd_chroot(td, nd.ni_vp); 842 vrele(nd.ni_vp); 843 NDFREE(&nd, NDF_ONLY_PNBUF); 844 return (error); 845 e_vunlock: 846 vput(nd.ni_vp); 847 error: 848 NDFREE(&nd, NDF_ONLY_PNBUF); 849 return (error); 850 } 851 852 /* 853 * Common routine for chroot and chdir. Callers must provide a locked vnode 854 * instance. 855 */ 856 int 857 change_dir(vp, td) 858 struct vnode *vp; 859 struct thread *td; 860 { 861 #ifdef MAC 862 int error; 863 #endif 864 865 ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked"); 866 if (vp->v_type != VDIR) 867 return (ENOTDIR); 868 #ifdef MAC 869 error = mac_vnode_check_chdir(td->td_ucred, vp); 870 if (error != 0) 871 return (error); 872 #endif 873 return (VOP_ACCESS(vp, VEXEC, td->td_ucred, td)); 874 } 875 876 static __inline void 877 flags_to_rights(int flags, cap_rights_t *rightsp) 878 { 879 880 if (flags & O_EXEC) { 881 cap_rights_set(rightsp, CAP_FEXECVE); 882 } else { 883 switch ((flags & O_ACCMODE)) { 884 case O_RDONLY: 885 cap_rights_set(rightsp, CAP_READ); 886 break; 887 case O_RDWR: 888 cap_rights_set(rightsp, CAP_READ); 889 /* FALLTHROUGH */ 890 case O_WRONLY: 891 cap_rights_set(rightsp, CAP_WRITE); 892 if (!(flags & (O_APPEND | O_TRUNC))) 893 cap_rights_set(rightsp, CAP_SEEK); 894 break; 895 } 896 } 897 898 if (flags & O_CREAT) 899 cap_rights_set(rightsp, CAP_CREATE); 900 901 if (flags & O_TRUNC) 902 cap_rights_set(rightsp, CAP_FTRUNCATE); 903 904 if (flags & (O_SYNC | O_FSYNC)) 905 cap_rights_set(rightsp, CAP_FSYNC); 906 907 if (flags & (O_EXLOCK | O_SHLOCK)) 908 cap_rights_set(rightsp, CAP_FLOCK); 909 } 910 911 /* 912 * Check permissions, allocate an open file structure, and call the device 913 * open routine if any. 914 */ 915 #ifndef _SYS_SYSPROTO_H_ 916 struct open_args { 917 char *path; 918 int flags; 919 int mode; 920 }; 921 #endif 922 int 923 sys_open(td, uap) 924 struct thread *td; 925 register struct open_args /* { 926 char *path; 927 int flags; 928 int mode; 929 } */ *uap; 930 { 931 932 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 933 uap->flags, uap->mode)); 934 } 935 936 #ifndef _SYS_SYSPROTO_H_ 937 struct openat_args { 938 int fd; 939 char *path; 940 int flag; 941 int mode; 942 }; 943 #endif 944 int 945 sys_openat(struct thread *td, struct openat_args *uap) 946 { 947 948 return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 949 uap->mode)); 950 } 951 952 int 953 kern_openat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 954 int flags, int mode) 955 { 956 struct proc *p = td->td_proc; 957 struct filedesc *fdp = p->p_fd; 958 struct file *fp; 959 struct vnode *vp; 960 struct nameidata nd; 961 cap_rights_t rights; 962 int cmode, error, indx; 963 964 indx = -1; 965 966 AUDIT_ARG_FFLAGS(flags); 967 AUDIT_ARG_MODE(mode); 968 /* XXX: audit dirfd */ 969 cap_rights_init(&rights, CAP_LOOKUP); 970 flags_to_rights(flags, &rights); 971 /* 972 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags 973 * may be specified. 974 */ 975 if (flags & O_EXEC) { 976 if (flags & O_ACCMODE) 977 return (EINVAL); 978 } else if ((flags & O_ACCMODE) == O_ACCMODE) { 979 return (EINVAL); 980 } else { 981 flags = FFLAGS(flags); 982 } 983 984 /* 985 * Allocate the file descriptor, but don't install a descriptor yet. 986 */ 987 error = falloc_noinstall(td, &fp); 988 if (error != 0) 989 return (error); 990 /* 991 * An extra reference on `fp' has been held for us by 992 * falloc_noinstall(). 993 */ 994 /* Set the flags early so the finit in devfs can pick them up. */ 995 fp->f_flag = flags & FMASK; 996 cmode = ((mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; 997 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 998 &rights, td); 999 td->td_dupfd = -1; /* XXX check for fdopen */ 1000 error = vn_open(&nd, &flags, cmode, fp); 1001 if (error != 0) { 1002 /* 1003 * If the vn_open replaced the method vector, something 1004 * wonderous happened deep below and we just pass it up 1005 * pretending we know what we do. 1006 */ 1007 if (error == ENXIO && fp->f_ops != &badfileops) 1008 goto success; 1009 1010 /* 1011 * Handle special fdopen() case. bleh. 1012 * 1013 * Don't do this for relative (capability) lookups; we don't 1014 * understand exactly what would happen, and we don't think 1015 * that it ever should. 1016 */ 1017 if (nd.ni_strictrelative == 0 && 1018 (error == ENODEV || error == ENXIO) && 1019 td->td_dupfd >= 0) { 1020 error = dupfdopen(td, fdp, td->td_dupfd, flags, error, 1021 &indx); 1022 if (error == 0) 1023 goto success; 1024 } 1025 1026 goto bad; 1027 } 1028 td->td_dupfd = 0; 1029 NDFREE(&nd, NDF_ONLY_PNBUF); 1030 vp = nd.ni_vp; 1031 1032 /* 1033 * Store the vnode, for any f_type. Typically, the vnode use 1034 * count is decremented by direct call to vn_closefile() for 1035 * files that switched type in the cdevsw fdopen() method. 1036 */ 1037 fp->f_vnode = vp; 1038 /* 1039 * If the file wasn't claimed by devfs bind it to the normal 1040 * vnode operations here. 1041 */ 1042 if (fp->f_ops == &badfileops) { 1043 KASSERT(vp->v_type != VFIFO, ("Unexpected fifo.")); 1044 fp->f_seqcount = 1; 1045 finit(fp, (flags & FMASK) | (fp->f_flag & FHASLOCK), 1046 DTYPE_VNODE, vp, &vnops); 1047 } 1048 1049 VOP_UNLOCK(vp, 0); 1050 if (flags & O_TRUNC) { 1051 error = fo_truncate(fp, 0, td->td_ucred, td); 1052 if (error != 0) 1053 goto bad; 1054 } 1055 success: 1056 /* 1057 * If we haven't already installed the FD (for dupfdopen), do so now. 1058 */ 1059 if (indx == -1) { 1060 struct filecaps *fcaps; 1061 1062 #ifdef CAPABILITIES 1063 if (nd.ni_strictrelative == 1) 1064 fcaps = &nd.ni_filecaps; 1065 else 1066 #endif 1067 fcaps = NULL; 1068 error = finstall(td, fp, &indx, flags, fcaps); 1069 /* On success finstall() consumes fcaps. */ 1070 if (error != 0) { 1071 filecaps_free(&nd.ni_filecaps); 1072 goto bad; 1073 } 1074 } else { 1075 filecaps_free(&nd.ni_filecaps); 1076 } 1077 1078 /* 1079 * Release our private reference, leaving the one associated with 1080 * the descriptor table intact. 1081 */ 1082 fdrop(fp, td); 1083 td->td_retval[0] = indx; 1084 return (0); 1085 bad: 1086 KASSERT(indx == -1, ("indx=%d, should be -1", indx)); 1087 fdrop(fp, td); 1088 return (error); 1089 } 1090 1091 #ifdef COMPAT_43 1092 /* 1093 * Create a file. 1094 */ 1095 #ifndef _SYS_SYSPROTO_H_ 1096 struct ocreat_args { 1097 char *path; 1098 int mode; 1099 }; 1100 #endif 1101 int 1102 ocreat(td, uap) 1103 struct thread *td; 1104 register struct ocreat_args /* { 1105 char *path; 1106 int mode; 1107 } */ *uap; 1108 { 1109 1110 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1111 O_WRONLY | O_CREAT | O_TRUNC, uap->mode)); 1112 } 1113 #endif /* COMPAT_43 */ 1114 1115 /* 1116 * Create a special file. 1117 */ 1118 #ifndef _SYS_SYSPROTO_H_ 1119 struct mknod_args { 1120 char *path; 1121 int mode; 1122 int dev; 1123 }; 1124 #endif 1125 int 1126 sys_mknod(td, uap) 1127 struct thread *td; 1128 register struct mknod_args /* { 1129 char *path; 1130 int mode; 1131 int dev; 1132 } */ *uap; 1133 { 1134 1135 return (kern_mknodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1136 uap->mode, uap->dev)); 1137 } 1138 1139 #ifndef _SYS_SYSPROTO_H_ 1140 struct mknodat_args { 1141 int fd; 1142 char *path; 1143 mode_t mode; 1144 dev_t dev; 1145 }; 1146 #endif 1147 int 1148 sys_mknodat(struct thread *td, struct mknodat_args *uap) 1149 { 1150 1151 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1152 uap->dev)); 1153 } 1154 1155 int 1156 kern_mknodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1157 int mode, int dev) 1158 { 1159 struct vnode *vp; 1160 struct mount *mp; 1161 struct vattr vattr; 1162 struct nameidata nd; 1163 cap_rights_t rights; 1164 int error, whiteout = 0; 1165 1166 AUDIT_ARG_MODE(mode); 1167 AUDIT_ARG_DEV(dev); 1168 switch (mode & S_IFMT) { 1169 case S_IFCHR: 1170 case S_IFBLK: 1171 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 1172 break; 1173 case S_IFMT: 1174 error = priv_check(td, PRIV_VFS_MKNOD_BAD); 1175 break; 1176 case S_IFWHT: 1177 error = priv_check(td, PRIV_VFS_MKNOD_WHT); 1178 break; 1179 case S_IFIFO: 1180 if (dev == 0) 1181 return (kern_mkfifoat(td, fd, path, pathseg, mode)); 1182 /* FALLTHROUGH */ 1183 default: 1184 error = EINVAL; 1185 break; 1186 } 1187 if (error != 0) 1188 return (error); 1189 restart: 1190 bwillwrite(); 1191 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1192 NOCACHE, pathseg, path, fd, cap_rights_init(&rights, CAP_MKNODAT), 1193 td); 1194 if ((error = namei(&nd)) != 0) 1195 return (error); 1196 vp = nd.ni_vp; 1197 if (vp != NULL) { 1198 NDFREE(&nd, NDF_ONLY_PNBUF); 1199 if (vp == nd.ni_dvp) 1200 vrele(nd.ni_dvp); 1201 else 1202 vput(nd.ni_dvp); 1203 vrele(vp); 1204 return (EEXIST); 1205 } else { 1206 VATTR_NULL(&vattr); 1207 vattr.va_mode = (mode & ALLPERMS) & 1208 ~td->td_proc->p_fd->fd_cmask; 1209 vattr.va_rdev = dev; 1210 whiteout = 0; 1211 1212 switch (mode & S_IFMT) { 1213 case S_IFMT: /* used by badsect to flag bad sectors */ 1214 vattr.va_type = VBAD; 1215 break; 1216 case S_IFCHR: 1217 vattr.va_type = VCHR; 1218 break; 1219 case S_IFBLK: 1220 vattr.va_type = VBLK; 1221 break; 1222 case S_IFWHT: 1223 whiteout = 1; 1224 break; 1225 default: 1226 panic("kern_mknod: invalid mode"); 1227 } 1228 } 1229 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1230 NDFREE(&nd, NDF_ONLY_PNBUF); 1231 vput(nd.ni_dvp); 1232 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1233 return (error); 1234 goto restart; 1235 } 1236 #ifdef MAC 1237 if (error == 0 && !whiteout) 1238 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, 1239 &nd.ni_cnd, &vattr); 1240 #endif 1241 if (error == 0) { 1242 if (whiteout) 1243 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1244 else { 1245 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1246 &nd.ni_cnd, &vattr); 1247 if (error == 0) 1248 vput(nd.ni_vp); 1249 } 1250 } 1251 NDFREE(&nd, NDF_ONLY_PNBUF); 1252 vput(nd.ni_dvp); 1253 vn_finished_write(mp); 1254 return (error); 1255 } 1256 1257 /* 1258 * Create a named pipe. 1259 */ 1260 #ifndef _SYS_SYSPROTO_H_ 1261 struct mkfifo_args { 1262 char *path; 1263 int mode; 1264 }; 1265 #endif 1266 int 1267 sys_mkfifo(td, uap) 1268 struct thread *td; 1269 register struct mkfifo_args /* { 1270 char *path; 1271 int mode; 1272 } */ *uap; 1273 { 1274 1275 return (kern_mkfifoat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1276 uap->mode)); 1277 } 1278 1279 #ifndef _SYS_SYSPROTO_H_ 1280 struct mkfifoat_args { 1281 int fd; 1282 char *path; 1283 mode_t mode; 1284 }; 1285 #endif 1286 int 1287 sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap) 1288 { 1289 1290 return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE, 1291 uap->mode)); 1292 } 1293 1294 int 1295 kern_mkfifoat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1296 int mode) 1297 { 1298 struct mount *mp; 1299 struct vattr vattr; 1300 struct nameidata nd; 1301 cap_rights_t rights; 1302 int error; 1303 1304 AUDIT_ARG_MODE(mode); 1305 restart: 1306 bwillwrite(); 1307 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1308 NOCACHE, pathseg, path, fd, cap_rights_init(&rights, CAP_MKFIFOAT), 1309 td); 1310 if ((error = namei(&nd)) != 0) 1311 return (error); 1312 if (nd.ni_vp != NULL) { 1313 NDFREE(&nd, NDF_ONLY_PNBUF); 1314 if (nd.ni_vp == nd.ni_dvp) 1315 vrele(nd.ni_dvp); 1316 else 1317 vput(nd.ni_dvp); 1318 vrele(nd.ni_vp); 1319 return (EEXIST); 1320 } 1321 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1322 NDFREE(&nd, NDF_ONLY_PNBUF); 1323 vput(nd.ni_dvp); 1324 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1325 return (error); 1326 goto restart; 1327 } 1328 VATTR_NULL(&vattr); 1329 vattr.va_type = VFIFO; 1330 vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask; 1331 #ifdef MAC 1332 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1333 &vattr); 1334 if (error != 0) 1335 goto out; 1336 #endif 1337 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 1338 if (error == 0) 1339 vput(nd.ni_vp); 1340 #ifdef MAC 1341 out: 1342 #endif 1343 vput(nd.ni_dvp); 1344 vn_finished_write(mp); 1345 NDFREE(&nd, NDF_ONLY_PNBUF); 1346 return (error); 1347 } 1348 1349 /* 1350 * Make a hard file link. 1351 */ 1352 #ifndef _SYS_SYSPROTO_H_ 1353 struct link_args { 1354 char *path; 1355 char *link; 1356 }; 1357 #endif 1358 int 1359 sys_link(td, uap) 1360 struct thread *td; 1361 register struct link_args /* { 1362 char *path; 1363 char *link; 1364 } */ *uap; 1365 { 1366 1367 return (kern_linkat(td, AT_FDCWD, AT_FDCWD, uap->path, uap->link, 1368 UIO_USERSPACE, FOLLOW)); 1369 } 1370 1371 #ifndef _SYS_SYSPROTO_H_ 1372 struct linkat_args { 1373 int fd1; 1374 char *path1; 1375 int fd2; 1376 char *path2; 1377 int flag; 1378 }; 1379 #endif 1380 int 1381 sys_linkat(struct thread *td, struct linkat_args *uap) 1382 { 1383 int flag; 1384 1385 flag = uap->flag; 1386 if (flag & ~AT_SYMLINK_FOLLOW) 1387 return (EINVAL); 1388 1389 return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2, 1390 UIO_USERSPACE, (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW)); 1391 } 1392 1393 int hardlink_check_uid = 0; 1394 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 1395 &hardlink_check_uid, 0, 1396 "Unprivileged processes cannot create hard links to files owned by other " 1397 "users"); 1398 static int hardlink_check_gid = 0; 1399 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 1400 &hardlink_check_gid, 0, 1401 "Unprivileged processes cannot create hard links to files owned by other " 1402 "groups"); 1403 1404 static int 1405 can_hardlink(struct vnode *vp, struct ucred *cred) 1406 { 1407 struct vattr va; 1408 int error; 1409 1410 if (!hardlink_check_uid && !hardlink_check_gid) 1411 return (0); 1412 1413 error = VOP_GETATTR(vp, &va, cred); 1414 if (error != 0) 1415 return (error); 1416 1417 if (hardlink_check_uid && cred->cr_uid != va.va_uid) { 1418 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1419 if (error != 0) 1420 return (error); 1421 } 1422 1423 if (hardlink_check_gid && !groupmember(va.va_gid, cred)) { 1424 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1425 if (error != 0) 1426 return (error); 1427 } 1428 1429 return (0); 1430 } 1431 1432 int 1433 kern_linkat(struct thread *td, int fd1, int fd2, char *path1, char *path2, 1434 enum uio_seg segflg, int follow) 1435 { 1436 struct vnode *vp; 1437 struct mount *mp; 1438 struct nameidata nd; 1439 cap_rights_t rights; 1440 int error; 1441 1442 again: 1443 bwillwrite(); 1444 NDINIT_AT(&nd, LOOKUP, follow | AUDITVNODE1, segflg, path1, fd1, td); 1445 1446 if ((error = namei(&nd)) != 0) 1447 return (error); 1448 NDFREE(&nd, NDF_ONLY_PNBUF); 1449 vp = nd.ni_vp; 1450 if (vp->v_type == VDIR) { 1451 vrele(vp); 1452 return (EPERM); /* POSIX */ 1453 } 1454 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE2 | 1455 NOCACHE, segflg, path2, fd2, cap_rights_init(&rights, CAP_LINKAT), 1456 td); 1457 if ((error = namei(&nd)) == 0) { 1458 if (nd.ni_vp != NULL) { 1459 NDFREE(&nd, NDF_ONLY_PNBUF); 1460 if (nd.ni_dvp == nd.ni_vp) 1461 vrele(nd.ni_dvp); 1462 else 1463 vput(nd.ni_dvp); 1464 vrele(nd.ni_vp); 1465 vrele(vp); 1466 return (EEXIST); 1467 } else if (nd.ni_dvp->v_mount != vp->v_mount) { 1468 /* 1469 * Cross-device link. No need to recheck 1470 * vp->v_type, since it cannot change, except 1471 * to VBAD. 1472 */ 1473 NDFREE(&nd, NDF_ONLY_PNBUF); 1474 vput(nd.ni_dvp); 1475 vrele(vp); 1476 return (EXDEV); 1477 } else if ((error = vn_lock(vp, LK_EXCLUSIVE)) == 0) { 1478 error = can_hardlink(vp, td->td_ucred); 1479 #ifdef MAC 1480 if (error == 0) 1481 error = mac_vnode_check_link(td->td_ucred, 1482 nd.ni_dvp, vp, &nd.ni_cnd); 1483 #endif 1484 if (error != 0) { 1485 vput(vp); 1486 vput(nd.ni_dvp); 1487 NDFREE(&nd, NDF_ONLY_PNBUF); 1488 return (error); 1489 } 1490 error = vn_start_write(vp, &mp, V_NOWAIT); 1491 if (error != 0) { 1492 vput(vp); 1493 vput(nd.ni_dvp); 1494 NDFREE(&nd, NDF_ONLY_PNBUF); 1495 error = vn_start_write(NULL, &mp, 1496 V_XSLEEP | PCATCH); 1497 if (error != 0) 1498 return (error); 1499 goto again; 1500 } 1501 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 1502 VOP_UNLOCK(vp, 0); 1503 vput(nd.ni_dvp); 1504 vn_finished_write(mp); 1505 NDFREE(&nd, NDF_ONLY_PNBUF); 1506 } else { 1507 vput(nd.ni_dvp); 1508 NDFREE(&nd, NDF_ONLY_PNBUF); 1509 vrele(vp); 1510 goto again; 1511 } 1512 } 1513 vrele(vp); 1514 return (error); 1515 } 1516 1517 /* 1518 * Make a symbolic link. 1519 */ 1520 #ifndef _SYS_SYSPROTO_H_ 1521 struct symlink_args { 1522 char *path; 1523 char *link; 1524 }; 1525 #endif 1526 int 1527 sys_symlink(td, uap) 1528 struct thread *td; 1529 register struct symlink_args /* { 1530 char *path; 1531 char *link; 1532 } */ *uap; 1533 { 1534 1535 return (kern_symlinkat(td, uap->path, AT_FDCWD, uap->link, 1536 UIO_USERSPACE)); 1537 } 1538 1539 #ifndef _SYS_SYSPROTO_H_ 1540 struct symlinkat_args { 1541 char *path; 1542 int fd; 1543 char *path2; 1544 }; 1545 #endif 1546 int 1547 sys_symlinkat(struct thread *td, struct symlinkat_args *uap) 1548 { 1549 1550 return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2, 1551 UIO_USERSPACE)); 1552 } 1553 1554 int 1555 kern_symlinkat(struct thread *td, char *path1, int fd, char *path2, 1556 enum uio_seg segflg) 1557 { 1558 struct mount *mp; 1559 struct vattr vattr; 1560 char *syspath; 1561 struct nameidata nd; 1562 int error; 1563 cap_rights_t rights; 1564 1565 if (segflg == UIO_SYSSPACE) { 1566 syspath = path1; 1567 } else { 1568 syspath = uma_zalloc(namei_zone, M_WAITOK); 1569 if ((error = copyinstr(path1, syspath, MAXPATHLEN, NULL)) != 0) 1570 goto out; 1571 } 1572 AUDIT_ARG_TEXT(syspath); 1573 restart: 1574 bwillwrite(); 1575 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1576 NOCACHE, segflg, path2, fd, cap_rights_init(&rights, CAP_SYMLINKAT), 1577 td); 1578 if ((error = namei(&nd)) != 0) 1579 goto out; 1580 if (nd.ni_vp) { 1581 NDFREE(&nd, NDF_ONLY_PNBUF); 1582 if (nd.ni_vp == nd.ni_dvp) 1583 vrele(nd.ni_dvp); 1584 else 1585 vput(nd.ni_dvp); 1586 vrele(nd.ni_vp); 1587 error = EEXIST; 1588 goto out; 1589 } 1590 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1591 NDFREE(&nd, NDF_ONLY_PNBUF); 1592 vput(nd.ni_dvp); 1593 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1594 goto out; 1595 goto restart; 1596 } 1597 VATTR_NULL(&vattr); 1598 vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask; 1599 #ifdef MAC 1600 vattr.va_type = VLNK; 1601 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1602 &vattr); 1603 if (error != 0) 1604 goto out2; 1605 #endif 1606 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath); 1607 if (error == 0) 1608 vput(nd.ni_vp); 1609 #ifdef MAC 1610 out2: 1611 #endif 1612 NDFREE(&nd, NDF_ONLY_PNBUF); 1613 vput(nd.ni_dvp); 1614 vn_finished_write(mp); 1615 out: 1616 if (segflg != UIO_SYSSPACE) 1617 uma_zfree(namei_zone, syspath); 1618 return (error); 1619 } 1620 1621 /* 1622 * Delete a whiteout from the filesystem. 1623 */ 1624 int 1625 sys_undelete(td, uap) 1626 struct thread *td; 1627 register struct undelete_args /* { 1628 char *path; 1629 } */ *uap; 1630 { 1631 struct mount *mp; 1632 struct nameidata nd; 1633 int error; 1634 1635 restart: 1636 bwillwrite(); 1637 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | AUDITVNODE1, 1638 UIO_USERSPACE, uap->path, td); 1639 error = namei(&nd); 1640 if (error != 0) 1641 return (error); 1642 1643 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 1644 NDFREE(&nd, NDF_ONLY_PNBUF); 1645 if (nd.ni_vp == nd.ni_dvp) 1646 vrele(nd.ni_dvp); 1647 else 1648 vput(nd.ni_dvp); 1649 if (nd.ni_vp) 1650 vrele(nd.ni_vp); 1651 return (EEXIST); 1652 } 1653 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1654 NDFREE(&nd, NDF_ONLY_PNBUF); 1655 vput(nd.ni_dvp); 1656 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1657 return (error); 1658 goto restart; 1659 } 1660 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE); 1661 NDFREE(&nd, NDF_ONLY_PNBUF); 1662 vput(nd.ni_dvp); 1663 vn_finished_write(mp); 1664 return (error); 1665 } 1666 1667 /* 1668 * Delete a name from the filesystem. 1669 */ 1670 #ifndef _SYS_SYSPROTO_H_ 1671 struct unlink_args { 1672 char *path; 1673 }; 1674 #endif 1675 int 1676 sys_unlink(td, uap) 1677 struct thread *td; 1678 struct unlink_args /* { 1679 char *path; 1680 } */ *uap; 1681 { 1682 1683 return (kern_unlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 0)); 1684 } 1685 1686 #ifndef _SYS_SYSPROTO_H_ 1687 struct unlinkat_args { 1688 int fd; 1689 char *path; 1690 int flag; 1691 }; 1692 #endif 1693 int 1694 sys_unlinkat(struct thread *td, struct unlinkat_args *uap) 1695 { 1696 int flag = uap->flag; 1697 int fd = uap->fd; 1698 char *path = uap->path; 1699 1700 if (flag & ~AT_REMOVEDIR) 1701 return (EINVAL); 1702 1703 if (flag & AT_REMOVEDIR) 1704 return (kern_rmdirat(td, fd, path, UIO_USERSPACE)); 1705 else 1706 return (kern_unlinkat(td, fd, path, UIO_USERSPACE, 0)); 1707 } 1708 1709 int 1710 kern_unlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1711 ino_t oldinum) 1712 { 1713 struct mount *mp; 1714 struct vnode *vp; 1715 struct nameidata nd; 1716 struct stat sb; 1717 cap_rights_t rights; 1718 int error; 1719 1720 restart: 1721 bwillwrite(); 1722 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 1723 pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td); 1724 if ((error = namei(&nd)) != 0) 1725 return (error == EINVAL ? EPERM : error); 1726 vp = nd.ni_vp; 1727 if (vp->v_type == VDIR && oldinum == 0) { 1728 error = EPERM; /* POSIX */ 1729 } else if (oldinum != 0 && 1730 ((error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td)) == 0) && 1731 sb.st_ino != oldinum) { 1732 error = EIDRM; /* Identifier removed */ 1733 } else { 1734 /* 1735 * The root of a mounted filesystem cannot be deleted. 1736 * 1737 * XXX: can this only be a VDIR case? 1738 */ 1739 if (vp->v_vflag & VV_ROOT) 1740 error = EBUSY; 1741 } 1742 if (error == 0) { 1743 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1744 NDFREE(&nd, NDF_ONLY_PNBUF); 1745 vput(nd.ni_dvp); 1746 if (vp == nd.ni_dvp) 1747 vrele(vp); 1748 else 1749 vput(vp); 1750 if ((error = vn_start_write(NULL, &mp, 1751 V_XSLEEP | PCATCH)) != 0) 1752 return (error); 1753 goto restart; 1754 } 1755 #ifdef MAC 1756 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 1757 &nd.ni_cnd); 1758 if (error != 0) 1759 goto out; 1760 #endif 1761 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 1762 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 1763 #ifdef MAC 1764 out: 1765 #endif 1766 vn_finished_write(mp); 1767 } 1768 NDFREE(&nd, NDF_ONLY_PNBUF); 1769 vput(nd.ni_dvp); 1770 if (vp == nd.ni_dvp) 1771 vrele(vp); 1772 else 1773 vput(vp); 1774 return (error); 1775 } 1776 1777 /* 1778 * Reposition read/write file offset. 1779 */ 1780 #ifndef _SYS_SYSPROTO_H_ 1781 struct lseek_args { 1782 int fd; 1783 int pad; 1784 off_t offset; 1785 int whence; 1786 }; 1787 #endif 1788 int 1789 sys_lseek(td, uap) 1790 struct thread *td; 1791 register struct lseek_args /* { 1792 int fd; 1793 int pad; 1794 off_t offset; 1795 int whence; 1796 } */ *uap; 1797 { 1798 struct file *fp; 1799 cap_rights_t rights; 1800 int error; 1801 1802 AUDIT_ARG_FD(uap->fd); 1803 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_SEEK), &fp); 1804 if (error != 0) 1805 return (error); 1806 error = (fp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0 ? 1807 fo_seek(fp, uap->offset, uap->whence, td) : ESPIPE; 1808 fdrop(fp, td); 1809 return (error); 1810 } 1811 1812 #if defined(COMPAT_43) 1813 /* 1814 * Reposition read/write file offset. 1815 */ 1816 #ifndef _SYS_SYSPROTO_H_ 1817 struct olseek_args { 1818 int fd; 1819 long offset; 1820 int whence; 1821 }; 1822 #endif 1823 int 1824 olseek(td, uap) 1825 struct thread *td; 1826 register struct olseek_args /* { 1827 int fd; 1828 long offset; 1829 int whence; 1830 } */ *uap; 1831 { 1832 struct lseek_args /* { 1833 int fd; 1834 int pad; 1835 off_t offset; 1836 int whence; 1837 } */ nuap; 1838 1839 nuap.fd = uap->fd; 1840 nuap.offset = uap->offset; 1841 nuap.whence = uap->whence; 1842 return (sys_lseek(td, &nuap)); 1843 } 1844 #endif /* COMPAT_43 */ 1845 1846 #if defined(COMPAT_FREEBSD6) 1847 /* Version with the 'pad' argument */ 1848 int 1849 freebsd6_lseek(td, uap) 1850 struct thread *td; 1851 register struct freebsd6_lseek_args *uap; 1852 { 1853 struct lseek_args ouap; 1854 1855 ouap.fd = uap->fd; 1856 ouap.offset = uap->offset; 1857 ouap.whence = uap->whence; 1858 return (sys_lseek(td, &ouap)); 1859 } 1860 #endif 1861 1862 /* 1863 * Check access permissions using passed credentials. 1864 */ 1865 static int 1866 vn_access(vp, user_flags, cred, td) 1867 struct vnode *vp; 1868 int user_flags; 1869 struct ucred *cred; 1870 struct thread *td; 1871 { 1872 accmode_t accmode; 1873 int error; 1874 1875 /* Flags == 0 means only check for existence. */ 1876 if (user_flags == 0) 1877 return (0); 1878 1879 accmode = 0; 1880 if (user_flags & R_OK) 1881 accmode |= VREAD; 1882 if (user_flags & W_OK) 1883 accmode |= VWRITE; 1884 if (user_flags & X_OK) 1885 accmode |= VEXEC; 1886 #ifdef MAC 1887 error = mac_vnode_check_access(cred, vp, accmode); 1888 if (error != 0) 1889 return (error); 1890 #endif 1891 if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) 1892 error = VOP_ACCESS(vp, accmode, cred, td); 1893 return (error); 1894 } 1895 1896 /* 1897 * Check access permissions using "real" credentials. 1898 */ 1899 #ifndef _SYS_SYSPROTO_H_ 1900 struct access_args { 1901 char *path; 1902 int amode; 1903 }; 1904 #endif 1905 int 1906 sys_access(td, uap) 1907 struct thread *td; 1908 register struct access_args /* { 1909 char *path; 1910 int amode; 1911 } */ *uap; 1912 { 1913 1914 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1915 0, uap->amode)); 1916 } 1917 1918 #ifndef _SYS_SYSPROTO_H_ 1919 struct faccessat_args { 1920 int dirfd; 1921 char *path; 1922 int amode; 1923 int flag; 1924 } 1925 #endif 1926 int 1927 sys_faccessat(struct thread *td, struct faccessat_args *uap) 1928 { 1929 1930 return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 1931 uap->amode)); 1932 } 1933 1934 int 1935 kern_accessat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1936 int flag, int amode) 1937 { 1938 struct ucred *cred, *usecred; 1939 struct vnode *vp; 1940 struct nameidata nd; 1941 cap_rights_t rights; 1942 int error; 1943 1944 if (flag & ~AT_EACCESS) 1945 return (EINVAL); 1946 if (amode != F_OK && (amode & ~(R_OK | W_OK | X_OK)) != 0) 1947 return (EINVAL); 1948 1949 /* 1950 * Create and modify a temporary credential instead of one that 1951 * is potentially shared (if we need one). 1952 */ 1953 cred = td->td_ucred; 1954 if ((flag & AT_EACCESS) == 0 && 1955 ((cred->cr_uid != cred->cr_ruid || 1956 cred->cr_rgid != cred->cr_groups[0]))) { 1957 usecred = crdup(cred); 1958 usecred->cr_uid = cred->cr_ruid; 1959 usecred->cr_groups[0] = cred->cr_rgid; 1960 td->td_ucred = usecred; 1961 } else 1962 usecred = cred; 1963 AUDIT_ARG_VALUE(amode); 1964 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | 1965 AUDITVNODE1, pathseg, path, fd, cap_rights_init(&rights, CAP_FSTAT), 1966 td); 1967 if ((error = namei(&nd)) != 0) 1968 goto out; 1969 vp = nd.ni_vp; 1970 1971 error = vn_access(vp, amode, usecred, td); 1972 NDFREE(&nd, NDF_ONLY_PNBUF); 1973 vput(vp); 1974 out: 1975 if (usecred != cred) { 1976 td->td_ucred = cred; 1977 crfree(usecred); 1978 } 1979 return (error); 1980 } 1981 1982 /* 1983 * Check access permissions using "effective" credentials. 1984 */ 1985 #ifndef _SYS_SYSPROTO_H_ 1986 struct eaccess_args { 1987 char *path; 1988 int amode; 1989 }; 1990 #endif 1991 int 1992 sys_eaccess(td, uap) 1993 struct thread *td; 1994 register struct eaccess_args /* { 1995 char *path; 1996 int amode; 1997 } */ *uap; 1998 { 1999 2000 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2001 AT_EACCESS, uap->amode)); 2002 } 2003 2004 #if defined(COMPAT_43) 2005 /* 2006 * Get file status; this version follows links. 2007 */ 2008 #ifndef _SYS_SYSPROTO_H_ 2009 struct ostat_args { 2010 char *path; 2011 struct ostat *ub; 2012 }; 2013 #endif 2014 int 2015 ostat(td, uap) 2016 struct thread *td; 2017 register struct ostat_args /* { 2018 char *path; 2019 struct ostat *ub; 2020 } */ *uap; 2021 { 2022 struct stat sb; 2023 struct ostat osb; 2024 int error; 2025 2026 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2027 &sb, NULL); 2028 if (error != 0) 2029 return (error); 2030 cvtstat(&sb, &osb); 2031 return (copyout(&osb, uap->ub, sizeof (osb))); 2032 } 2033 2034 /* 2035 * Get file status; this version does not follow links. 2036 */ 2037 #ifndef _SYS_SYSPROTO_H_ 2038 struct olstat_args { 2039 char *path; 2040 struct ostat *ub; 2041 }; 2042 #endif 2043 int 2044 olstat(td, uap) 2045 struct thread *td; 2046 register struct olstat_args /* { 2047 char *path; 2048 struct ostat *ub; 2049 } */ *uap; 2050 { 2051 struct stat sb; 2052 struct ostat osb; 2053 int error; 2054 2055 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2056 UIO_USERSPACE, &sb, NULL); 2057 if (error != 0) 2058 return (error); 2059 cvtstat(&sb, &osb); 2060 return (copyout(&osb, uap->ub, sizeof (osb))); 2061 } 2062 2063 /* 2064 * Convert from an old to a new stat structure. 2065 */ 2066 void 2067 cvtstat(st, ost) 2068 struct stat *st; 2069 struct ostat *ost; 2070 { 2071 2072 ost->st_dev = st->st_dev; 2073 ost->st_ino = st->st_ino; 2074 ost->st_mode = st->st_mode; 2075 ost->st_nlink = st->st_nlink; 2076 ost->st_uid = st->st_uid; 2077 ost->st_gid = st->st_gid; 2078 ost->st_rdev = st->st_rdev; 2079 if (st->st_size < (quad_t)1 << 32) 2080 ost->st_size = st->st_size; 2081 else 2082 ost->st_size = -2; 2083 ost->st_atim = st->st_atim; 2084 ost->st_mtim = st->st_mtim; 2085 ost->st_ctim = st->st_ctim; 2086 ost->st_blksize = st->st_blksize; 2087 ost->st_blocks = st->st_blocks; 2088 ost->st_flags = st->st_flags; 2089 ost->st_gen = st->st_gen; 2090 } 2091 #endif /* COMPAT_43 */ 2092 2093 /* 2094 * Get file status; this version follows links. 2095 */ 2096 #ifndef _SYS_SYSPROTO_H_ 2097 struct stat_args { 2098 char *path; 2099 struct stat *ub; 2100 }; 2101 #endif 2102 int 2103 sys_stat(td, uap) 2104 struct thread *td; 2105 register struct stat_args /* { 2106 char *path; 2107 struct stat *ub; 2108 } */ *uap; 2109 { 2110 struct stat sb; 2111 int error; 2112 2113 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2114 &sb, NULL); 2115 if (error == 0) 2116 error = copyout(&sb, uap->ub, sizeof (sb)); 2117 return (error); 2118 } 2119 2120 #ifndef _SYS_SYSPROTO_H_ 2121 struct fstatat_args { 2122 int fd; 2123 char *path; 2124 struct stat *buf; 2125 int flag; 2126 } 2127 #endif 2128 int 2129 sys_fstatat(struct thread *td, struct fstatat_args *uap) 2130 { 2131 struct stat sb; 2132 int error; 2133 2134 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2135 UIO_USERSPACE, &sb, NULL); 2136 if (error == 0) 2137 error = copyout(&sb, uap->buf, sizeof (sb)); 2138 return (error); 2139 } 2140 2141 int 2142 kern_statat(struct thread *td, int flag, int fd, char *path, 2143 enum uio_seg pathseg, struct stat *sbp, 2144 void (*hook)(struct vnode *vp, struct stat *sbp)) 2145 { 2146 struct nameidata nd; 2147 struct stat sb; 2148 cap_rights_t rights; 2149 int error; 2150 2151 if (flag & ~AT_SYMLINK_NOFOLLOW) 2152 return (EINVAL); 2153 2154 NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : 2155 FOLLOW) | LOCKSHARED | LOCKLEAF | AUDITVNODE1, pathseg, path, fd, 2156 cap_rights_init(&rights, CAP_FSTAT), td); 2157 2158 if ((error = namei(&nd)) != 0) 2159 return (error); 2160 error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td); 2161 if (error == 0) { 2162 SDT_PROBE(vfs, , stat, mode, path, sb.st_mode, 0, 0, 0); 2163 if (S_ISREG(sb.st_mode)) 2164 SDT_PROBE(vfs, , stat, reg, path, pathseg, 0, 0, 0); 2165 if (__predict_false(hook != NULL)) 2166 hook(nd.ni_vp, &sb); 2167 } 2168 NDFREE(&nd, NDF_ONLY_PNBUF); 2169 vput(nd.ni_vp); 2170 if (error != 0) 2171 return (error); 2172 *sbp = sb; 2173 #ifdef KTRACE 2174 if (KTRPOINT(td, KTR_STRUCT)) 2175 ktrstat(&sb); 2176 #endif 2177 return (0); 2178 } 2179 2180 /* 2181 * Get file status; this version does not follow links. 2182 */ 2183 #ifndef _SYS_SYSPROTO_H_ 2184 struct lstat_args { 2185 char *path; 2186 struct stat *ub; 2187 }; 2188 #endif 2189 int 2190 sys_lstat(td, uap) 2191 struct thread *td; 2192 register struct lstat_args /* { 2193 char *path; 2194 struct stat *ub; 2195 } */ *uap; 2196 { 2197 struct stat sb; 2198 int error; 2199 2200 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2201 UIO_USERSPACE, &sb, NULL); 2202 if (error == 0) 2203 error = copyout(&sb, uap->ub, sizeof (sb)); 2204 return (error); 2205 } 2206 2207 /* 2208 * Implementation of the NetBSD [l]stat() functions. 2209 */ 2210 void 2211 cvtnstat(sb, nsb) 2212 struct stat *sb; 2213 struct nstat *nsb; 2214 { 2215 2216 bzero(nsb, sizeof *nsb); 2217 nsb->st_dev = sb->st_dev; 2218 nsb->st_ino = sb->st_ino; 2219 nsb->st_mode = sb->st_mode; 2220 nsb->st_nlink = sb->st_nlink; 2221 nsb->st_uid = sb->st_uid; 2222 nsb->st_gid = sb->st_gid; 2223 nsb->st_rdev = sb->st_rdev; 2224 nsb->st_atim = sb->st_atim; 2225 nsb->st_mtim = sb->st_mtim; 2226 nsb->st_ctim = sb->st_ctim; 2227 nsb->st_size = sb->st_size; 2228 nsb->st_blocks = sb->st_blocks; 2229 nsb->st_blksize = sb->st_blksize; 2230 nsb->st_flags = sb->st_flags; 2231 nsb->st_gen = sb->st_gen; 2232 nsb->st_birthtim = sb->st_birthtim; 2233 } 2234 2235 #ifndef _SYS_SYSPROTO_H_ 2236 struct nstat_args { 2237 char *path; 2238 struct nstat *ub; 2239 }; 2240 #endif 2241 int 2242 sys_nstat(td, uap) 2243 struct thread *td; 2244 register struct nstat_args /* { 2245 char *path; 2246 struct nstat *ub; 2247 } */ *uap; 2248 { 2249 struct stat sb; 2250 struct nstat nsb; 2251 int error; 2252 2253 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2254 &sb, NULL); 2255 if (error != 0) 2256 return (error); 2257 cvtnstat(&sb, &nsb); 2258 return (copyout(&nsb, uap->ub, sizeof (nsb))); 2259 } 2260 2261 /* 2262 * NetBSD lstat. Get file status; this version does not follow links. 2263 */ 2264 #ifndef _SYS_SYSPROTO_H_ 2265 struct lstat_args { 2266 char *path; 2267 struct stat *ub; 2268 }; 2269 #endif 2270 int 2271 sys_nlstat(td, uap) 2272 struct thread *td; 2273 register struct nlstat_args /* { 2274 char *path; 2275 struct nstat *ub; 2276 } */ *uap; 2277 { 2278 struct stat sb; 2279 struct nstat nsb; 2280 int error; 2281 2282 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2283 UIO_USERSPACE, &sb, NULL); 2284 if (error != 0) 2285 return (error); 2286 cvtnstat(&sb, &nsb); 2287 return (copyout(&nsb, uap->ub, sizeof (nsb))); 2288 } 2289 2290 /* 2291 * Get configurable pathname variables. 2292 */ 2293 #ifndef _SYS_SYSPROTO_H_ 2294 struct pathconf_args { 2295 char *path; 2296 int name; 2297 }; 2298 #endif 2299 int 2300 sys_pathconf(td, uap) 2301 struct thread *td; 2302 register struct pathconf_args /* { 2303 char *path; 2304 int name; 2305 } */ *uap; 2306 { 2307 2308 return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW)); 2309 } 2310 2311 #ifndef _SYS_SYSPROTO_H_ 2312 struct lpathconf_args { 2313 char *path; 2314 int name; 2315 }; 2316 #endif 2317 int 2318 sys_lpathconf(td, uap) 2319 struct thread *td; 2320 register struct lpathconf_args /* { 2321 char *path; 2322 int name; 2323 } */ *uap; 2324 { 2325 2326 return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, 2327 NOFOLLOW)); 2328 } 2329 2330 int 2331 kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name, 2332 u_long flags) 2333 { 2334 struct nameidata nd; 2335 int error; 2336 2337 NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | AUDITVNODE1 | flags, 2338 pathseg, path, td); 2339 if ((error = namei(&nd)) != 0) 2340 return (error); 2341 NDFREE(&nd, NDF_ONLY_PNBUF); 2342 2343 /* If asynchronous I/O is available, it works for all files. */ 2344 if (name == _PC_ASYNC_IO) 2345 td->td_retval[0] = async_io_version; 2346 else 2347 error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval); 2348 vput(nd.ni_vp); 2349 return (error); 2350 } 2351 2352 /* 2353 * Return target name of a symbolic link. 2354 */ 2355 #ifndef _SYS_SYSPROTO_H_ 2356 struct readlink_args { 2357 char *path; 2358 char *buf; 2359 size_t count; 2360 }; 2361 #endif 2362 int 2363 sys_readlink(td, uap) 2364 struct thread *td; 2365 register struct readlink_args /* { 2366 char *path; 2367 char *buf; 2368 size_t count; 2369 } */ *uap; 2370 { 2371 2372 return (kern_readlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2373 uap->buf, UIO_USERSPACE, uap->count)); 2374 } 2375 #ifndef _SYS_SYSPROTO_H_ 2376 struct readlinkat_args { 2377 int fd; 2378 char *path; 2379 char *buf; 2380 size_t bufsize; 2381 }; 2382 #endif 2383 int 2384 sys_readlinkat(struct thread *td, struct readlinkat_args *uap) 2385 { 2386 2387 return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE, 2388 uap->buf, UIO_USERSPACE, uap->bufsize)); 2389 } 2390 2391 int 2392 kern_readlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2393 char *buf, enum uio_seg bufseg, size_t count) 2394 { 2395 struct vnode *vp; 2396 struct iovec aiov; 2397 struct uio auio; 2398 struct nameidata nd; 2399 int error; 2400 2401 if (count > IOSIZE_MAX) 2402 return (EINVAL); 2403 2404 NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 2405 pathseg, path, fd, td); 2406 2407 if ((error = namei(&nd)) != 0) 2408 return (error); 2409 NDFREE(&nd, NDF_ONLY_PNBUF); 2410 vp = nd.ni_vp; 2411 #ifdef MAC 2412 error = mac_vnode_check_readlink(td->td_ucred, vp); 2413 if (error != 0) { 2414 vput(vp); 2415 return (error); 2416 } 2417 #endif 2418 if (vp->v_type != VLNK) 2419 error = EINVAL; 2420 else { 2421 aiov.iov_base = buf; 2422 aiov.iov_len = count; 2423 auio.uio_iov = &aiov; 2424 auio.uio_iovcnt = 1; 2425 auio.uio_offset = 0; 2426 auio.uio_rw = UIO_READ; 2427 auio.uio_segflg = bufseg; 2428 auio.uio_td = td; 2429 auio.uio_resid = count; 2430 error = VOP_READLINK(vp, &auio, td->td_ucred); 2431 td->td_retval[0] = count - auio.uio_resid; 2432 } 2433 vput(vp); 2434 return (error); 2435 } 2436 2437 /* 2438 * Common implementation code for chflags() and fchflags(). 2439 */ 2440 static int 2441 setfflags(td, vp, flags) 2442 struct thread *td; 2443 struct vnode *vp; 2444 u_long flags; 2445 { 2446 struct mount *mp; 2447 struct vattr vattr; 2448 int error; 2449 2450 /* We can't support the value matching VNOVAL. */ 2451 if (flags == VNOVAL) 2452 return (EOPNOTSUPP); 2453 2454 /* 2455 * Prevent non-root users from setting flags on devices. When 2456 * a device is reused, users can retain ownership of the device 2457 * if they are allowed to set flags and programs assume that 2458 * chown can't fail when done as root. 2459 */ 2460 if (vp->v_type == VCHR || vp->v_type == VBLK) { 2461 error = priv_check(td, PRIV_VFS_CHFLAGS_DEV); 2462 if (error != 0) 2463 return (error); 2464 } 2465 2466 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2467 return (error); 2468 VATTR_NULL(&vattr); 2469 vattr.va_flags = flags; 2470 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2471 #ifdef MAC 2472 error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags); 2473 if (error == 0) 2474 #endif 2475 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 2476 VOP_UNLOCK(vp, 0); 2477 vn_finished_write(mp); 2478 return (error); 2479 } 2480 2481 /* 2482 * Change flags of a file given a path name. 2483 */ 2484 #ifndef _SYS_SYSPROTO_H_ 2485 struct chflags_args { 2486 const char *path; 2487 u_long flags; 2488 }; 2489 #endif 2490 int 2491 sys_chflags(td, uap) 2492 struct thread *td; 2493 register struct chflags_args /* { 2494 const char *path; 2495 u_long flags; 2496 } */ *uap; 2497 { 2498 2499 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2500 uap->flags, 0)); 2501 } 2502 2503 #ifndef _SYS_SYSPROTO_H_ 2504 struct chflagsat_args { 2505 int fd; 2506 const char *path; 2507 u_long flags; 2508 int atflag; 2509 } 2510 #endif 2511 int 2512 sys_chflagsat(struct thread *td, struct chflagsat_args *uap) 2513 { 2514 int fd = uap->fd; 2515 const char *path = uap->path; 2516 u_long flags = uap->flags; 2517 int atflag = uap->atflag; 2518 2519 if (atflag & ~AT_SYMLINK_NOFOLLOW) 2520 return (EINVAL); 2521 2522 return (kern_chflagsat(td, fd, path, UIO_USERSPACE, flags, atflag)); 2523 } 2524 2525 /* 2526 * Same as chflags() but doesn't follow symlinks. 2527 */ 2528 int 2529 sys_lchflags(td, uap) 2530 struct thread *td; 2531 register struct lchflags_args /* { 2532 const char *path; 2533 u_long flags; 2534 } */ *uap; 2535 { 2536 2537 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2538 uap->flags, AT_SYMLINK_NOFOLLOW)); 2539 } 2540 2541 static int 2542 kern_chflagsat(struct thread *td, int fd, const char *path, 2543 enum uio_seg pathseg, u_long flags, int atflag) 2544 { 2545 struct nameidata nd; 2546 cap_rights_t rights; 2547 int error, follow; 2548 2549 AUDIT_ARG_FFLAGS(flags); 2550 follow = (atflag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2551 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2552 cap_rights_init(&rights, CAP_FCHFLAGS), td); 2553 if ((error = namei(&nd)) != 0) 2554 return (error); 2555 NDFREE(&nd, NDF_ONLY_PNBUF); 2556 error = setfflags(td, nd.ni_vp, flags); 2557 vrele(nd.ni_vp); 2558 return (error); 2559 } 2560 2561 /* 2562 * Change flags of a file given a file descriptor. 2563 */ 2564 #ifndef _SYS_SYSPROTO_H_ 2565 struct fchflags_args { 2566 int fd; 2567 u_long flags; 2568 }; 2569 #endif 2570 int 2571 sys_fchflags(td, uap) 2572 struct thread *td; 2573 register struct fchflags_args /* { 2574 int fd; 2575 u_long flags; 2576 } */ *uap; 2577 { 2578 struct file *fp; 2579 cap_rights_t rights; 2580 int error; 2581 2582 AUDIT_ARG_FD(uap->fd); 2583 AUDIT_ARG_FFLAGS(uap->flags); 2584 error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_FCHFLAGS), 2585 &fp); 2586 if (error != 0) 2587 return (error); 2588 #ifdef AUDIT 2589 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 2590 AUDIT_ARG_VNODE1(fp->f_vnode); 2591 VOP_UNLOCK(fp->f_vnode, 0); 2592 #endif 2593 error = setfflags(td, fp->f_vnode, uap->flags); 2594 fdrop(fp, td); 2595 return (error); 2596 } 2597 2598 /* 2599 * Common implementation code for chmod(), lchmod() and fchmod(). 2600 */ 2601 int 2602 setfmode(td, cred, vp, mode) 2603 struct thread *td; 2604 struct ucred *cred; 2605 struct vnode *vp; 2606 int mode; 2607 { 2608 struct mount *mp; 2609 struct vattr vattr; 2610 int error; 2611 2612 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2613 return (error); 2614 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2615 VATTR_NULL(&vattr); 2616 vattr.va_mode = mode & ALLPERMS; 2617 #ifdef MAC 2618 error = mac_vnode_check_setmode(cred, vp, vattr.va_mode); 2619 if (error == 0) 2620 #endif 2621 error = VOP_SETATTR(vp, &vattr, cred); 2622 VOP_UNLOCK(vp, 0); 2623 vn_finished_write(mp); 2624 return (error); 2625 } 2626 2627 /* 2628 * Change mode of a file given path name. 2629 */ 2630 #ifndef _SYS_SYSPROTO_H_ 2631 struct chmod_args { 2632 char *path; 2633 int mode; 2634 }; 2635 #endif 2636 int 2637 sys_chmod(td, uap) 2638 struct thread *td; 2639 register struct chmod_args /* { 2640 char *path; 2641 int mode; 2642 } */ *uap; 2643 { 2644 2645 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2646 uap->mode, 0)); 2647 } 2648 2649 #ifndef _SYS_SYSPROTO_H_ 2650 struct fchmodat_args { 2651 int dirfd; 2652 char *path; 2653 mode_t mode; 2654 int flag; 2655 } 2656 #endif 2657 int 2658 sys_fchmodat(struct thread *td, struct fchmodat_args *uap) 2659 { 2660 int flag = uap->flag; 2661 int fd = uap->fd; 2662 char *path = uap->path; 2663 mode_t mode = uap->mode; 2664 2665 if (flag & ~AT_SYMLINK_NOFOLLOW) 2666 return (EINVAL); 2667 2668 return (kern_fchmodat(td, fd, path, UIO_USERSPACE, mode, flag)); 2669 } 2670 2671 /* 2672 * Change mode of a file given path name (don't follow links.) 2673 */ 2674 #ifndef _SYS_SYSPROTO_H_ 2675 struct lchmod_args { 2676 char *path; 2677 int mode; 2678 }; 2679 #endif 2680 int 2681 sys_lchmod(td, uap) 2682 struct thread *td; 2683 register struct lchmod_args /* { 2684 char *path; 2685 int mode; 2686 } */ *uap; 2687 { 2688 2689 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2690 uap->mode, AT_SYMLINK_NOFOLLOW)); 2691 } 2692 2693 int 2694 kern_fchmodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2695 mode_t mode, int flag) 2696 { 2697 struct nameidata nd; 2698 cap_rights_t rights; 2699 int error, follow; 2700 2701 AUDIT_ARG_MODE(mode); 2702 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2703 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2704 cap_rights_init(&rights, CAP_FCHMOD), td); 2705 if ((error = namei(&nd)) != 0) 2706 return (error); 2707 NDFREE(&nd, NDF_ONLY_PNBUF); 2708 error = setfmode(td, td->td_ucred, nd.ni_vp, mode); 2709 vrele(nd.ni_vp); 2710 return (error); 2711 } 2712 2713 /* 2714 * Change mode of a file given a file descriptor. 2715 */ 2716 #ifndef _SYS_SYSPROTO_H_ 2717 struct fchmod_args { 2718 int fd; 2719 int mode; 2720 }; 2721 #endif 2722 int 2723 sys_fchmod(struct thread *td, struct fchmod_args *uap) 2724 { 2725 struct file *fp; 2726 cap_rights_t rights; 2727 int error; 2728 2729 AUDIT_ARG_FD(uap->fd); 2730 AUDIT_ARG_MODE(uap->mode); 2731 2732 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHMOD), &fp); 2733 if (error != 0) 2734 return (error); 2735 error = fo_chmod(fp, uap->mode, td->td_ucred, td); 2736 fdrop(fp, td); 2737 return (error); 2738 } 2739 2740 /* 2741 * Common implementation for chown(), lchown(), and fchown() 2742 */ 2743 int 2744 setfown(td, cred, vp, uid, gid) 2745 struct thread *td; 2746 struct ucred *cred; 2747 struct vnode *vp; 2748 uid_t uid; 2749 gid_t gid; 2750 { 2751 struct mount *mp; 2752 struct vattr vattr; 2753 int error; 2754 2755 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2756 return (error); 2757 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2758 VATTR_NULL(&vattr); 2759 vattr.va_uid = uid; 2760 vattr.va_gid = gid; 2761 #ifdef MAC 2762 error = mac_vnode_check_setowner(cred, vp, vattr.va_uid, 2763 vattr.va_gid); 2764 if (error == 0) 2765 #endif 2766 error = VOP_SETATTR(vp, &vattr, cred); 2767 VOP_UNLOCK(vp, 0); 2768 vn_finished_write(mp); 2769 return (error); 2770 } 2771 2772 /* 2773 * Set ownership given a path name. 2774 */ 2775 #ifndef _SYS_SYSPROTO_H_ 2776 struct chown_args { 2777 char *path; 2778 int uid; 2779 int gid; 2780 }; 2781 #endif 2782 int 2783 sys_chown(td, uap) 2784 struct thread *td; 2785 register struct chown_args /* { 2786 char *path; 2787 int uid; 2788 int gid; 2789 } */ *uap; 2790 { 2791 2792 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->uid, 2793 uap->gid, 0)); 2794 } 2795 2796 #ifndef _SYS_SYSPROTO_H_ 2797 struct fchownat_args { 2798 int fd; 2799 const char * path; 2800 uid_t uid; 2801 gid_t gid; 2802 int flag; 2803 }; 2804 #endif 2805 int 2806 sys_fchownat(struct thread *td, struct fchownat_args *uap) 2807 { 2808 int flag; 2809 2810 flag = uap->flag; 2811 if (flag & ~AT_SYMLINK_NOFOLLOW) 2812 return (EINVAL); 2813 2814 return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid, 2815 uap->gid, uap->flag)); 2816 } 2817 2818 int 2819 kern_fchownat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2820 int uid, int gid, int flag) 2821 { 2822 struct nameidata nd; 2823 cap_rights_t rights; 2824 int error, follow; 2825 2826 AUDIT_ARG_OWNER(uid, gid); 2827 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2828 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2829 cap_rights_init(&rights, CAP_FCHOWN), td); 2830 2831 if ((error = namei(&nd)) != 0) 2832 return (error); 2833 NDFREE(&nd, NDF_ONLY_PNBUF); 2834 error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid); 2835 vrele(nd.ni_vp); 2836 return (error); 2837 } 2838 2839 /* 2840 * Set ownership given a path name, do not cross symlinks. 2841 */ 2842 #ifndef _SYS_SYSPROTO_H_ 2843 struct lchown_args { 2844 char *path; 2845 int uid; 2846 int gid; 2847 }; 2848 #endif 2849 int 2850 sys_lchown(td, uap) 2851 struct thread *td; 2852 register struct lchown_args /* { 2853 char *path; 2854 int uid; 2855 int gid; 2856 } */ *uap; 2857 { 2858 2859 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2860 uap->uid, uap->gid, AT_SYMLINK_NOFOLLOW)); 2861 } 2862 2863 /* 2864 * Set ownership given a file descriptor. 2865 */ 2866 #ifndef _SYS_SYSPROTO_H_ 2867 struct fchown_args { 2868 int fd; 2869 int uid; 2870 int gid; 2871 }; 2872 #endif 2873 int 2874 sys_fchown(td, uap) 2875 struct thread *td; 2876 register struct fchown_args /* { 2877 int fd; 2878 int uid; 2879 int gid; 2880 } */ *uap; 2881 { 2882 struct file *fp; 2883 cap_rights_t rights; 2884 int error; 2885 2886 AUDIT_ARG_FD(uap->fd); 2887 AUDIT_ARG_OWNER(uap->uid, uap->gid); 2888 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHOWN), &fp); 2889 if (error != 0) 2890 return (error); 2891 error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td); 2892 fdrop(fp, td); 2893 return (error); 2894 } 2895 2896 /* 2897 * Common implementation code for utimes(), lutimes(), and futimes(). 2898 */ 2899 static int 2900 getutimes(usrtvp, tvpseg, tsp) 2901 const struct timeval *usrtvp; 2902 enum uio_seg tvpseg; 2903 struct timespec *tsp; 2904 { 2905 struct timeval tv[2]; 2906 const struct timeval *tvp; 2907 int error; 2908 2909 if (usrtvp == NULL) { 2910 vfs_timestamp(&tsp[0]); 2911 tsp[1] = tsp[0]; 2912 } else { 2913 if (tvpseg == UIO_SYSSPACE) { 2914 tvp = usrtvp; 2915 } else { 2916 if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0) 2917 return (error); 2918 tvp = tv; 2919 } 2920 2921 if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 || 2922 tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000) 2923 return (EINVAL); 2924 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 2925 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 2926 } 2927 return (0); 2928 } 2929 2930 /* 2931 * Common implementation code for futimens(), utimensat(). 2932 */ 2933 #define UTIMENS_NULL 0x1 2934 #define UTIMENS_EXIT 0x2 2935 static int 2936 getutimens(const struct timespec *usrtsp, enum uio_seg tspseg, 2937 struct timespec *tsp, int *retflags) 2938 { 2939 struct timespec tsnow; 2940 int error; 2941 2942 vfs_timestamp(&tsnow); 2943 *retflags = 0; 2944 if (usrtsp == NULL) { 2945 tsp[0] = tsnow; 2946 tsp[1] = tsnow; 2947 *retflags |= UTIMENS_NULL; 2948 return (0); 2949 } 2950 if (tspseg == UIO_SYSSPACE) { 2951 tsp[0] = usrtsp[0]; 2952 tsp[1] = usrtsp[1]; 2953 } else if ((error = copyin(usrtsp, tsp, sizeof(*tsp) * 2)) != 0) 2954 return (error); 2955 if (tsp[0].tv_nsec == UTIME_OMIT && tsp[1].tv_nsec == UTIME_OMIT) 2956 *retflags |= UTIMENS_EXIT; 2957 if (tsp[0].tv_nsec == UTIME_NOW && tsp[1].tv_nsec == UTIME_NOW) 2958 *retflags |= UTIMENS_NULL; 2959 if (tsp[0].tv_nsec == UTIME_OMIT) 2960 tsp[0].tv_sec = VNOVAL; 2961 else if (tsp[0].tv_nsec == UTIME_NOW) 2962 tsp[0] = tsnow; 2963 else if (tsp[0].tv_nsec < 0 || tsp[0].tv_nsec >= 1000000000L) 2964 return (EINVAL); 2965 if (tsp[1].tv_nsec == UTIME_OMIT) 2966 tsp[1].tv_sec = VNOVAL; 2967 else if (tsp[1].tv_nsec == UTIME_NOW) 2968 tsp[1] = tsnow; 2969 else if (tsp[1].tv_nsec < 0 || tsp[1].tv_nsec >= 1000000000L) 2970 return (EINVAL); 2971 2972 return (0); 2973 } 2974 2975 /* 2976 * Common implementation code for utimes(), lutimes(), futimes(), futimens(), 2977 * and utimensat(). 2978 */ 2979 static int 2980 setutimes(td, vp, ts, numtimes, nullflag) 2981 struct thread *td; 2982 struct vnode *vp; 2983 const struct timespec *ts; 2984 int numtimes; 2985 int nullflag; 2986 { 2987 struct mount *mp; 2988 struct vattr vattr; 2989 int error, setbirthtime; 2990 2991 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2992 return (error); 2993 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2994 setbirthtime = 0; 2995 if (numtimes < 3 && !VOP_GETATTR(vp, &vattr, td->td_ucred) && 2996 timespeccmp(&ts[1], &vattr.va_birthtime, < )) 2997 setbirthtime = 1; 2998 VATTR_NULL(&vattr); 2999 vattr.va_atime = ts[0]; 3000 vattr.va_mtime = ts[1]; 3001 if (setbirthtime) 3002 vattr.va_birthtime = ts[1]; 3003 if (numtimes > 2) 3004 vattr.va_birthtime = ts[2]; 3005 if (nullflag) 3006 vattr.va_vaflags |= VA_UTIMES_NULL; 3007 #ifdef MAC 3008 error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime, 3009 vattr.va_mtime); 3010 #endif 3011 if (error == 0) 3012 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3013 VOP_UNLOCK(vp, 0); 3014 vn_finished_write(mp); 3015 return (error); 3016 } 3017 3018 /* 3019 * Set the access and modification times of a file. 3020 */ 3021 #ifndef _SYS_SYSPROTO_H_ 3022 struct utimes_args { 3023 char *path; 3024 struct timeval *tptr; 3025 }; 3026 #endif 3027 int 3028 sys_utimes(td, uap) 3029 struct thread *td; 3030 register struct utimes_args /* { 3031 char *path; 3032 struct timeval *tptr; 3033 } */ *uap; 3034 { 3035 3036 return (kern_utimesat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3037 uap->tptr, UIO_USERSPACE)); 3038 } 3039 3040 #ifndef _SYS_SYSPROTO_H_ 3041 struct futimesat_args { 3042 int fd; 3043 const char * path; 3044 const struct timeval * times; 3045 }; 3046 #endif 3047 int 3048 sys_futimesat(struct thread *td, struct futimesat_args *uap) 3049 { 3050 3051 return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE, 3052 uap->times, UIO_USERSPACE)); 3053 } 3054 3055 int 3056 kern_utimesat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 3057 struct timeval *tptr, enum uio_seg tptrseg) 3058 { 3059 struct nameidata nd; 3060 struct timespec ts[2]; 3061 cap_rights_t rights; 3062 int error; 3063 3064 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3065 return (error); 3066 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 3067 cap_rights_init(&rights, CAP_FUTIMES), td); 3068 3069 if ((error = namei(&nd)) != 0) 3070 return (error); 3071 NDFREE(&nd, NDF_ONLY_PNBUF); 3072 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3073 vrele(nd.ni_vp); 3074 return (error); 3075 } 3076 3077 /* 3078 * Set the access and modification times of a file. 3079 */ 3080 #ifndef _SYS_SYSPROTO_H_ 3081 struct lutimes_args { 3082 char *path; 3083 struct timeval *tptr; 3084 }; 3085 #endif 3086 int 3087 sys_lutimes(td, uap) 3088 struct thread *td; 3089 register struct lutimes_args /* { 3090 char *path; 3091 struct timeval *tptr; 3092 } */ *uap; 3093 { 3094 3095 return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr, 3096 UIO_USERSPACE)); 3097 } 3098 3099 int 3100 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg, 3101 struct timeval *tptr, enum uio_seg tptrseg) 3102 { 3103 struct timespec ts[2]; 3104 struct nameidata nd; 3105 int error; 3106 3107 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3108 return (error); 3109 NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, pathseg, path, td); 3110 if ((error = namei(&nd)) != 0) 3111 return (error); 3112 NDFREE(&nd, NDF_ONLY_PNBUF); 3113 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3114 vrele(nd.ni_vp); 3115 return (error); 3116 } 3117 3118 /* 3119 * Set the access and modification times of a file. 3120 */ 3121 #ifndef _SYS_SYSPROTO_H_ 3122 struct futimes_args { 3123 int fd; 3124 struct timeval *tptr; 3125 }; 3126 #endif 3127 int 3128 sys_futimes(td, uap) 3129 struct thread *td; 3130 register struct futimes_args /* { 3131 int fd; 3132 struct timeval *tptr; 3133 } */ *uap; 3134 { 3135 3136 return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE)); 3137 } 3138 3139 int 3140 kern_futimes(struct thread *td, int fd, struct timeval *tptr, 3141 enum uio_seg tptrseg) 3142 { 3143 struct timespec ts[2]; 3144 struct file *fp; 3145 cap_rights_t rights; 3146 int error; 3147 3148 AUDIT_ARG_FD(fd); 3149 error = getutimes(tptr, tptrseg, ts); 3150 if (error != 0) 3151 return (error); 3152 error = getvnode(td, fd, cap_rights_init(&rights, CAP_FUTIMES), &fp); 3153 if (error != 0) 3154 return (error); 3155 #ifdef AUDIT 3156 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3157 AUDIT_ARG_VNODE1(fp->f_vnode); 3158 VOP_UNLOCK(fp->f_vnode, 0); 3159 #endif 3160 error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL); 3161 fdrop(fp, td); 3162 return (error); 3163 } 3164 3165 int 3166 sys_futimens(struct thread *td, struct futimens_args *uap) 3167 { 3168 3169 return (kern_futimens(td, uap->fd, uap->times, UIO_USERSPACE)); 3170 } 3171 3172 int 3173 kern_futimens(struct thread *td, int fd, struct timespec *tptr, 3174 enum uio_seg tptrseg) 3175 { 3176 struct timespec ts[2]; 3177 struct file *fp; 3178 cap_rights_t rights; 3179 int error, flags; 3180 3181 AUDIT_ARG_FD(fd); 3182 error = getutimens(tptr, tptrseg, ts, &flags); 3183 if (error != 0) 3184 return (error); 3185 if (flags & UTIMENS_EXIT) 3186 return (0); 3187 error = getvnode(td, fd, cap_rights_init(&rights, CAP_FUTIMES), &fp); 3188 if (error != 0) 3189 return (error); 3190 #ifdef AUDIT 3191 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3192 AUDIT_ARG_VNODE1(fp->f_vnode); 3193 VOP_UNLOCK(fp->f_vnode, 0); 3194 #endif 3195 error = setutimes(td, fp->f_vnode, ts, 2, flags & UTIMENS_NULL); 3196 fdrop(fp, td); 3197 return (error); 3198 } 3199 3200 int 3201 sys_utimensat(struct thread *td, struct utimensat_args *uap) 3202 { 3203 3204 return (kern_utimensat(td, uap->fd, uap->path, UIO_USERSPACE, 3205 uap->times, UIO_USERSPACE, uap->flag)); 3206 } 3207 3208 int 3209 kern_utimensat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 3210 struct timespec *tptr, enum uio_seg tptrseg, int flag) 3211 { 3212 struct nameidata nd; 3213 struct timespec ts[2]; 3214 cap_rights_t rights; 3215 int error, flags; 3216 3217 if (flag & ~AT_SYMLINK_NOFOLLOW) 3218 return (EINVAL); 3219 3220 if ((error = getutimens(tptr, tptrseg, ts, &flags)) != 0) 3221 return (error); 3222 NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : 3223 FOLLOW) | AUDITVNODE1, pathseg, path, fd, 3224 cap_rights_init(&rights, CAP_FUTIMES), td); 3225 if ((error = namei(&nd)) != 0) 3226 return (error); 3227 /* 3228 * We are allowed to call namei() regardless of 2xUTIME_OMIT. 3229 * POSIX states: 3230 * "If both tv_nsec fields are UTIME_OMIT... EACCESS may be detected." 3231 * "Search permission is denied by a component of the path prefix." 3232 */ 3233 NDFREE(&nd, NDF_ONLY_PNBUF); 3234 if ((flags & UTIMENS_EXIT) == 0) 3235 error = setutimes(td, nd.ni_vp, ts, 2, flags & UTIMENS_NULL); 3236 vrele(nd.ni_vp); 3237 return (error); 3238 } 3239 3240 /* 3241 * Truncate a file given its path name. 3242 */ 3243 #ifndef _SYS_SYSPROTO_H_ 3244 struct truncate_args { 3245 char *path; 3246 int pad; 3247 off_t length; 3248 }; 3249 #endif 3250 int 3251 sys_truncate(td, uap) 3252 struct thread *td; 3253 register struct truncate_args /* { 3254 char *path; 3255 int pad; 3256 off_t length; 3257 } */ *uap; 3258 { 3259 3260 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3261 } 3262 3263 int 3264 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length) 3265 { 3266 struct mount *mp; 3267 struct vnode *vp; 3268 void *rl_cookie; 3269 struct vattr vattr; 3270 struct nameidata nd; 3271 int error; 3272 3273 if (length < 0) 3274 return(EINVAL); 3275 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, td); 3276 if ((error = namei(&nd)) != 0) 3277 return (error); 3278 vp = nd.ni_vp; 3279 rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); 3280 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { 3281 vn_rangelock_unlock(vp, rl_cookie); 3282 vrele(vp); 3283 return (error); 3284 } 3285 NDFREE(&nd, NDF_ONLY_PNBUF); 3286 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3287 if (vp->v_type == VDIR) 3288 error = EISDIR; 3289 #ifdef MAC 3290 else if ((error = mac_vnode_check_write(td->td_ucred, NOCRED, vp))) { 3291 } 3292 #endif 3293 else if ((error = vn_writechk(vp)) == 0 && 3294 (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) { 3295 VATTR_NULL(&vattr); 3296 vattr.va_size = length; 3297 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3298 } 3299 VOP_UNLOCK(vp, 0); 3300 vn_finished_write(mp); 3301 vn_rangelock_unlock(vp, rl_cookie); 3302 vrele(vp); 3303 return (error); 3304 } 3305 3306 #if defined(COMPAT_43) 3307 /* 3308 * Truncate a file given its path name. 3309 */ 3310 #ifndef _SYS_SYSPROTO_H_ 3311 struct otruncate_args { 3312 char *path; 3313 long length; 3314 }; 3315 #endif 3316 int 3317 otruncate(td, uap) 3318 struct thread *td; 3319 register struct otruncate_args /* { 3320 char *path; 3321 long length; 3322 } */ *uap; 3323 { 3324 struct truncate_args /* { 3325 char *path; 3326 int pad; 3327 off_t length; 3328 } */ nuap; 3329 3330 nuap.path = uap->path; 3331 nuap.length = uap->length; 3332 return (sys_truncate(td, &nuap)); 3333 } 3334 #endif /* COMPAT_43 */ 3335 3336 #if defined(COMPAT_FREEBSD6) 3337 /* Versions with the pad argument */ 3338 int 3339 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap) 3340 { 3341 struct truncate_args ouap; 3342 3343 ouap.path = uap->path; 3344 ouap.length = uap->length; 3345 return (sys_truncate(td, &ouap)); 3346 } 3347 3348 int 3349 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap) 3350 { 3351 struct ftruncate_args ouap; 3352 3353 ouap.fd = uap->fd; 3354 ouap.length = uap->length; 3355 return (sys_ftruncate(td, &ouap)); 3356 } 3357 #endif 3358 3359 /* 3360 * Sync an open file. 3361 */ 3362 #ifndef _SYS_SYSPROTO_H_ 3363 struct fsync_args { 3364 int fd; 3365 }; 3366 #endif 3367 int 3368 sys_fsync(td, uap) 3369 struct thread *td; 3370 struct fsync_args /* { 3371 int fd; 3372 } */ *uap; 3373 { 3374 struct vnode *vp; 3375 struct mount *mp; 3376 struct file *fp; 3377 cap_rights_t rights; 3378 int error, lock_flags; 3379 3380 AUDIT_ARG_FD(uap->fd); 3381 error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_FSYNC), &fp); 3382 if (error != 0) 3383 return (error); 3384 vp = fp->f_vnode; 3385 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 3386 if (error != 0) 3387 goto drop; 3388 if (MNT_SHARED_WRITES(mp) || 3389 ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) { 3390 lock_flags = LK_SHARED; 3391 } else { 3392 lock_flags = LK_EXCLUSIVE; 3393 } 3394 vn_lock(vp, lock_flags | LK_RETRY); 3395 AUDIT_ARG_VNODE1(vp); 3396 if (vp->v_object != NULL) { 3397 VM_OBJECT_WLOCK(vp->v_object); 3398 vm_object_page_clean(vp->v_object, 0, 0, 0); 3399 VM_OBJECT_WUNLOCK(vp->v_object); 3400 } 3401 error = VOP_FSYNC(vp, MNT_WAIT, td); 3402 3403 VOP_UNLOCK(vp, 0); 3404 vn_finished_write(mp); 3405 drop: 3406 fdrop(fp, td); 3407 return (error); 3408 } 3409 3410 /* 3411 * Rename files. Source and destination must either both be directories, or 3412 * both not be directories. If target is a directory, it must be empty. 3413 */ 3414 #ifndef _SYS_SYSPROTO_H_ 3415 struct rename_args { 3416 char *from; 3417 char *to; 3418 }; 3419 #endif 3420 int 3421 sys_rename(td, uap) 3422 struct thread *td; 3423 register struct rename_args /* { 3424 char *from; 3425 char *to; 3426 } */ *uap; 3427 { 3428 3429 return (kern_renameat(td, AT_FDCWD, uap->from, AT_FDCWD, 3430 uap->to, UIO_USERSPACE)); 3431 } 3432 3433 #ifndef _SYS_SYSPROTO_H_ 3434 struct renameat_args { 3435 int oldfd; 3436 char *old; 3437 int newfd; 3438 char *new; 3439 }; 3440 #endif 3441 int 3442 sys_renameat(struct thread *td, struct renameat_args *uap) 3443 { 3444 3445 return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new, 3446 UIO_USERSPACE)); 3447 } 3448 3449 int 3450 kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new, 3451 enum uio_seg pathseg) 3452 { 3453 struct mount *mp = NULL; 3454 struct vnode *tvp, *fvp, *tdvp; 3455 struct nameidata fromnd, tond; 3456 cap_rights_t rights; 3457 int error; 3458 3459 again: 3460 bwillwrite(); 3461 #ifdef MAC 3462 NDINIT_ATRIGHTS(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | 3463 AUDITVNODE1, pathseg, old, oldfd, 3464 cap_rights_init(&rights, CAP_RENAMEAT), td); 3465 #else 3466 NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | AUDITVNODE1, 3467 pathseg, old, oldfd, cap_rights_init(&rights, CAP_RENAMEAT), td); 3468 #endif 3469 3470 if ((error = namei(&fromnd)) != 0) 3471 return (error); 3472 #ifdef MAC 3473 error = mac_vnode_check_rename_from(td->td_ucred, fromnd.ni_dvp, 3474 fromnd.ni_vp, &fromnd.ni_cnd); 3475 VOP_UNLOCK(fromnd.ni_dvp, 0); 3476 if (fromnd.ni_dvp != fromnd.ni_vp) 3477 VOP_UNLOCK(fromnd.ni_vp, 0); 3478 #endif 3479 fvp = fromnd.ni_vp; 3480 NDINIT_ATRIGHTS(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | 3481 SAVESTART | AUDITVNODE2, pathseg, new, newfd, 3482 cap_rights_init(&rights, CAP_LINKAT), td); 3483 if (fromnd.ni_vp->v_type == VDIR) 3484 tond.ni_cnd.cn_flags |= WILLBEDIR; 3485 if ((error = namei(&tond)) != 0) { 3486 /* Translate error code for rename("dir1", "dir2/."). */ 3487 if (error == EISDIR && fvp->v_type == VDIR) 3488 error = EINVAL; 3489 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3490 vrele(fromnd.ni_dvp); 3491 vrele(fvp); 3492 goto out1; 3493 } 3494 tdvp = tond.ni_dvp; 3495 tvp = tond.ni_vp; 3496 error = vn_start_write(fvp, &mp, V_NOWAIT); 3497 if (error != 0) { 3498 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3499 NDFREE(&tond, NDF_ONLY_PNBUF); 3500 if (tvp != NULL) 3501 vput(tvp); 3502 if (tdvp == tvp) 3503 vrele(tdvp); 3504 else 3505 vput(tdvp); 3506 vrele(fromnd.ni_dvp); 3507 vrele(fvp); 3508 vrele(tond.ni_startdir); 3509 if (fromnd.ni_startdir != NULL) 3510 vrele(fromnd.ni_startdir); 3511 error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH); 3512 if (error != 0) 3513 return (error); 3514 goto again; 3515 } 3516 if (tvp != NULL) { 3517 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3518 error = ENOTDIR; 3519 goto out; 3520 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3521 error = EISDIR; 3522 goto out; 3523 } 3524 #ifdef CAPABILITIES 3525 if (newfd != AT_FDCWD) { 3526 /* 3527 * If the target already exists we require CAP_UNLINKAT 3528 * from 'newfd'. 3529 */ 3530 error = cap_check(&tond.ni_filecaps.fc_rights, 3531 cap_rights_init(&rights, CAP_UNLINKAT)); 3532 if (error != 0) 3533 goto out; 3534 } 3535 #endif 3536 } 3537 if (fvp == tdvp) { 3538 error = EINVAL; 3539 goto out; 3540 } 3541 /* 3542 * If the source is the same as the destination (that is, if they 3543 * are links to the same vnode), then there is nothing to do. 3544 */ 3545 if (fvp == tvp) 3546 error = -1; 3547 #ifdef MAC 3548 else 3549 error = mac_vnode_check_rename_to(td->td_ucred, tdvp, 3550 tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd); 3551 #endif 3552 out: 3553 if (error == 0) { 3554 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3555 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); 3556 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3557 NDFREE(&tond, NDF_ONLY_PNBUF); 3558 } else { 3559 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3560 NDFREE(&tond, NDF_ONLY_PNBUF); 3561 if (tvp != NULL) 3562 vput(tvp); 3563 if (tdvp == tvp) 3564 vrele(tdvp); 3565 else 3566 vput(tdvp); 3567 vrele(fromnd.ni_dvp); 3568 vrele(fvp); 3569 } 3570 vrele(tond.ni_startdir); 3571 vn_finished_write(mp); 3572 out1: 3573 if (fromnd.ni_startdir) 3574 vrele(fromnd.ni_startdir); 3575 if (error == -1) 3576 return (0); 3577 return (error); 3578 } 3579 3580 /* 3581 * Make a directory file. 3582 */ 3583 #ifndef _SYS_SYSPROTO_H_ 3584 struct mkdir_args { 3585 char *path; 3586 int mode; 3587 }; 3588 #endif 3589 int 3590 sys_mkdir(td, uap) 3591 struct thread *td; 3592 register struct mkdir_args /* { 3593 char *path; 3594 int mode; 3595 } */ *uap; 3596 { 3597 3598 return (kern_mkdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3599 uap->mode)); 3600 } 3601 3602 #ifndef _SYS_SYSPROTO_H_ 3603 struct mkdirat_args { 3604 int fd; 3605 char *path; 3606 mode_t mode; 3607 }; 3608 #endif 3609 int 3610 sys_mkdirat(struct thread *td, struct mkdirat_args *uap) 3611 { 3612 3613 return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode)); 3614 } 3615 3616 int 3617 kern_mkdirat(struct thread *td, int fd, char *path, enum uio_seg segflg, 3618 int mode) 3619 { 3620 struct mount *mp; 3621 struct vnode *vp; 3622 struct vattr vattr; 3623 struct nameidata nd; 3624 cap_rights_t rights; 3625 int error; 3626 3627 AUDIT_ARG_MODE(mode); 3628 restart: 3629 bwillwrite(); 3630 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 3631 NOCACHE, segflg, path, fd, cap_rights_init(&rights, CAP_MKDIRAT), 3632 td); 3633 nd.ni_cnd.cn_flags |= WILLBEDIR; 3634 if ((error = namei(&nd)) != 0) 3635 return (error); 3636 vp = nd.ni_vp; 3637 if (vp != NULL) { 3638 NDFREE(&nd, NDF_ONLY_PNBUF); 3639 /* 3640 * XXX namei called with LOCKPARENT but not LOCKLEAF has 3641 * the strange behaviour of leaving the vnode unlocked 3642 * if the target is the same vnode as the parent. 3643 */ 3644 if (vp == nd.ni_dvp) 3645 vrele(nd.ni_dvp); 3646 else 3647 vput(nd.ni_dvp); 3648 vrele(vp); 3649 return (EEXIST); 3650 } 3651 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3652 NDFREE(&nd, NDF_ONLY_PNBUF); 3653 vput(nd.ni_dvp); 3654 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3655 return (error); 3656 goto restart; 3657 } 3658 VATTR_NULL(&vattr); 3659 vattr.va_type = VDIR; 3660 vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask; 3661 #ifdef MAC 3662 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 3663 &vattr); 3664 if (error != 0) 3665 goto out; 3666 #endif 3667 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 3668 #ifdef MAC 3669 out: 3670 #endif 3671 NDFREE(&nd, NDF_ONLY_PNBUF); 3672 vput(nd.ni_dvp); 3673 if (error == 0) 3674 vput(nd.ni_vp); 3675 vn_finished_write(mp); 3676 return (error); 3677 } 3678 3679 /* 3680 * Remove a directory file. 3681 */ 3682 #ifndef _SYS_SYSPROTO_H_ 3683 struct rmdir_args { 3684 char *path; 3685 }; 3686 #endif 3687 int 3688 sys_rmdir(td, uap) 3689 struct thread *td; 3690 struct rmdir_args /* { 3691 char *path; 3692 } */ *uap; 3693 { 3694 3695 return (kern_rmdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE)); 3696 } 3697 3698 int 3699 kern_rmdirat(struct thread *td, int fd, char *path, enum uio_seg pathseg) 3700 { 3701 struct mount *mp; 3702 struct vnode *vp; 3703 struct nameidata nd; 3704 cap_rights_t rights; 3705 int error; 3706 3707 restart: 3708 bwillwrite(); 3709 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 3710 pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td); 3711 if ((error = namei(&nd)) != 0) 3712 return (error); 3713 vp = nd.ni_vp; 3714 if (vp->v_type != VDIR) { 3715 error = ENOTDIR; 3716 goto out; 3717 } 3718 /* 3719 * No rmdir "." please. 3720 */ 3721 if (nd.ni_dvp == vp) { 3722 error = EINVAL; 3723 goto out; 3724 } 3725 /* 3726 * The root of a mounted filesystem cannot be deleted. 3727 */ 3728 if (vp->v_vflag & VV_ROOT) { 3729 error = EBUSY; 3730 goto out; 3731 } 3732 #ifdef MAC 3733 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 3734 &nd.ni_cnd); 3735 if (error != 0) 3736 goto out; 3737 #endif 3738 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3739 NDFREE(&nd, NDF_ONLY_PNBUF); 3740 vput(vp); 3741 if (nd.ni_dvp == vp) 3742 vrele(nd.ni_dvp); 3743 else 3744 vput(nd.ni_dvp); 3745 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3746 return (error); 3747 goto restart; 3748 } 3749 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 3750 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 3751 vn_finished_write(mp); 3752 out: 3753 NDFREE(&nd, NDF_ONLY_PNBUF); 3754 vput(vp); 3755 if (nd.ni_dvp == vp) 3756 vrele(nd.ni_dvp); 3757 else 3758 vput(nd.ni_dvp); 3759 return (error); 3760 } 3761 3762 #ifdef COMPAT_43 3763 /* 3764 * Read a block of directory entries in a filesystem independent format. 3765 */ 3766 #ifndef _SYS_SYSPROTO_H_ 3767 struct ogetdirentries_args { 3768 int fd; 3769 char *buf; 3770 u_int count; 3771 long *basep; 3772 }; 3773 #endif 3774 int 3775 ogetdirentries(struct thread *td, struct ogetdirentries_args *uap) 3776 { 3777 long loff; 3778 int error; 3779 3780 error = kern_ogetdirentries(td, uap, &loff); 3781 if (error == 0) 3782 error = copyout(&loff, uap->basep, sizeof(long)); 3783 return (error); 3784 } 3785 3786 int 3787 kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap, 3788 long *ploff) 3789 { 3790 struct vnode *vp; 3791 struct file *fp; 3792 struct uio auio, kuio; 3793 struct iovec aiov, kiov; 3794 struct dirent *dp, *edp; 3795 cap_rights_t rights; 3796 caddr_t dirbuf; 3797 int error, eofflag, readcnt; 3798 long loff; 3799 off_t foffset; 3800 3801 /* XXX arbitrary sanity limit on `count'. */ 3802 if (uap->count > 64 * 1024) 3803 return (EINVAL); 3804 error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_READ), &fp); 3805 if (error != 0) 3806 return (error); 3807 if ((fp->f_flag & FREAD) == 0) { 3808 fdrop(fp, td); 3809 return (EBADF); 3810 } 3811 vp = fp->f_vnode; 3812 foffset = foffset_lock(fp, 0); 3813 unionread: 3814 if (vp->v_type != VDIR) { 3815 foffset_unlock(fp, foffset, 0); 3816 fdrop(fp, td); 3817 return (EINVAL); 3818 } 3819 aiov.iov_base = uap->buf; 3820 aiov.iov_len = uap->count; 3821 auio.uio_iov = &aiov; 3822 auio.uio_iovcnt = 1; 3823 auio.uio_rw = UIO_READ; 3824 auio.uio_segflg = UIO_USERSPACE; 3825 auio.uio_td = td; 3826 auio.uio_resid = uap->count; 3827 vn_lock(vp, LK_SHARED | LK_RETRY); 3828 loff = auio.uio_offset = foffset; 3829 #ifdef MAC 3830 error = mac_vnode_check_readdir(td->td_ucred, vp); 3831 if (error != 0) { 3832 VOP_UNLOCK(vp, 0); 3833 foffset_unlock(fp, foffset, FOF_NOUPDATE); 3834 fdrop(fp, td); 3835 return (error); 3836 } 3837 #endif 3838 # if (BYTE_ORDER != LITTLE_ENDIAN) 3839 if (vp->v_mount->mnt_maxsymlinklen <= 0) { 3840 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, 3841 NULL, NULL); 3842 foffset = auio.uio_offset; 3843 } else 3844 # endif 3845 { 3846 kuio = auio; 3847 kuio.uio_iov = &kiov; 3848 kuio.uio_segflg = UIO_SYSSPACE; 3849 kiov.iov_len = uap->count; 3850 dirbuf = malloc(uap->count, M_TEMP, M_WAITOK); 3851 kiov.iov_base = dirbuf; 3852 error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag, 3853 NULL, NULL); 3854 foffset = kuio.uio_offset; 3855 if (error == 0) { 3856 readcnt = uap->count - kuio.uio_resid; 3857 edp = (struct dirent *)&dirbuf[readcnt]; 3858 for (dp = (struct dirent *)dirbuf; dp < edp; ) { 3859 # if (BYTE_ORDER == LITTLE_ENDIAN) 3860 /* 3861 * The expected low byte of 3862 * dp->d_namlen is our dp->d_type. 3863 * The high MBZ byte of dp->d_namlen 3864 * is our dp->d_namlen. 3865 */ 3866 dp->d_type = dp->d_namlen; 3867 dp->d_namlen = 0; 3868 # else 3869 /* 3870 * The dp->d_type is the high byte 3871 * of the expected dp->d_namlen, 3872 * so must be zero'ed. 3873 */ 3874 dp->d_type = 0; 3875 # endif 3876 if (dp->d_reclen > 0) { 3877 dp = (struct dirent *) 3878 ((char *)dp + dp->d_reclen); 3879 } else { 3880 error = EIO; 3881 break; 3882 } 3883 } 3884 if (dp >= edp) 3885 error = uiomove(dirbuf, readcnt, &auio); 3886 } 3887 free(dirbuf, M_TEMP); 3888 } 3889 if (error != 0) { 3890 VOP_UNLOCK(vp, 0); 3891 foffset_unlock(fp, foffset, 0); 3892 fdrop(fp, td); 3893 return (error); 3894 } 3895 if (uap->count == auio.uio_resid && 3896 (vp->v_vflag & VV_ROOT) && 3897 (vp->v_mount->mnt_flag & MNT_UNION)) { 3898 struct vnode *tvp = vp; 3899 vp = vp->v_mount->mnt_vnodecovered; 3900 VREF(vp); 3901 fp->f_vnode = vp; 3902 fp->f_data = vp; 3903 foffset = 0; 3904 vput(tvp); 3905 goto unionread; 3906 } 3907 VOP_UNLOCK(vp, 0); 3908 foffset_unlock(fp, foffset, 0); 3909 fdrop(fp, td); 3910 td->td_retval[0] = uap->count - auio.uio_resid; 3911 if (error == 0) 3912 *ploff = loff; 3913 return (error); 3914 } 3915 #endif /* COMPAT_43 */ 3916 3917 /* 3918 * Read a block of directory entries in a filesystem independent format. 3919 */ 3920 #ifndef _SYS_SYSPROTO_H_ 3921 struct getdirentries_args { 3922 int fd; 3923 char *buf; 3924 u_int count; 3925 long *basep; 3926 }; 3927 #endif 3928 int 3929 sys_getdirentries(td, uap) 3930 struct thread *td; 3931 register struct getdirentries_args /* { 3932 int fd; 3933 char *buf; 3934 u_int count; 3935 long *basep; 3936 } */ *uap; 3937 { 3938 long base; 3939 int error; 3940 3941 error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base, 3942 NULL, UIO_USERSPACE); 3943 if (error != 0) 3944 return (error); 3945 if (uap->basep != NULL) 3946 error = copyout(&base, uap->basep, sizeof(long)); 3947 return (error); 3948 } 3949 3950 int 3951 kern_getdirentries(struct thread *td, int fd, char *buf, u_int count, 3952 long *basep, ssize_t *residp, enum uio_seg bufseg) 3953 { 3954 struct vnode *vp; 3955 struct file *fp; 3956 struct uio auio; 3957 struct iovec aiov; 3958 cap_rights_t rights; 3959 long loff; 3960 int error, eofflag; 3961 off_t foffset; 3962 3963 AUDIT_ARG_FD(fd); 3964 if (count > IOSIZE_MAX) 3965 return (EINVAL); 3966 auio.uio_resid = count; 3967 error = getvnode(td, fd, cap_rights_init(&rights, CAP_READ), &fp); 3968 if (error != 0) 3969 return (error); 3970 if ((fp->f_flag & FREAD) == 0) { 3971 fdrop(fp, td); 3972 return (EBADF); 3973 } 3974 vp = fp->f_vnode; 3975 foffset = foffset_lock(fp, 0); 3976 unionread: 3977 if (vp->v_type != VDIR) { 3978 error = EINVAL; 3979 goto fail; 3980 } 3981 aiov.iov_base = buf; 3982 aiov.iov_len = count; 3983 auio.uio_iov = &aiov; 3984 auio.uio_iovcnt = 1; 3985 auio.uio_rw = UIO_READ; 3986 auio.uio_segflg = bufseg; 3987 auio.uio_td = td; 3988 vn_lock(vp, LK_SHARED | LK_RETRY); 3989 AUDIT_ARG_VNODE1(vp); 3990 loff = auio.uio_offset = foffset; 3991 #ifdef MAC 3992 error = mac_vnode_check_readdir(td->td_ucred, vp); 3993 if (error == 0) 3994 #endif 3995 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, 3996 NULL); 3997 foffset = auio.uio_offset; 3998 if (error != 0) { 3999 VOP_UNLOCK(vp, 0); 4000 goto fail; 4001 } 4002 if (count == auio.uio_resid && 4003 (vp->v_vflag & VV_ROOT) && 4004 (vp->v_mount->mnt_flag & MNT_UNION)) { 4005 struct vnode *tvp = vp; 4006 4007 vp = vp->v_mount->mnt_vnodecovered; 4008 VREF(vp); 4009 fp->f_vnode = vp; 4010 fp->f_data = vp; 4011 foffset = 0; 4012 vput(tvp); 4013 goto unionread; 4014 } 4015 VOP_UNLOCK(vp, 0); 4016 *basep = loff; 4017 if (residp != NULL) 4018 *residp = auio.uio_resid; 4019 td->td_retval[0] = count - auio.uio_resid; 4020 fail: 4021 foffset_unlock(fp, foffset, 0); 4022 fdrop(fp, td); 4023 return (error); 4024 } 4025 4026 #ifndef _SYS_SYSPROTO_H_ 4027 struct getdents_args { 4028 int fd; 4029 char *buf; 4030 size_t count; 4031 }; 4032 #endif 4033 int 4034 sys_getdents(td, uap) 4035 struct thread *td; 4036 register struct getdents_args /* { 4037 int fd; 4038 char *buf; 4039 u_int count; 4040 } */ *uap; 4041 { 4042 struct getdirentries_args ap; 4043 4044 ap.fd = uap->fd; 4045 ap.buf = uap->buf; 4046 ap.count = uap->count; 4047 ap.basep = NULL; 4048 return (sys_getdirentries(td, &ap)); 4049 } 4050 4051 /* 4052 * Set the mode mask for creation of filesystem nodes. 4053 */ 4054 #ifndef _SYS_SYSPROTO_H_ 4055 struct umask_args { 4056 int newmask; 4057 }; 4058 #endif 4059 int 4060 sys_umask(td, uap) 4061 struct thread *td; 4062 struct umask_args /* { 4063 int newmask; 4064 } */ *uap; 4065 { 4066 struct filedesc *fdp; 4067 4068 fdp = td->td_proc->p_fd; 4069 FILEDESC_XLOCK(fdp); 4070 td->td_retval[0] = fdp->fd_cmask; 4071 fdp->fd_cmask = uap->newmask & ALLPERMS; 4072 FILEDESC_XUNLOCK(fdp); 4073 return (0); 4074 } 4075 4076 /* 4077 * Void all references to file by ripping underlying filesystem away from 4078 * vnode. 4079 */ 4080 #ifndef _SYS_SYSPROTO_H_ 4081 struct revoke_args { 4082 char *path; 4083 }; 4084 #endif 4085 int 4086 sys_revoke(td, uap) 4087 struct thread *td; 4088 register struct revoke_args /* { 4089 char *path; 4090 } */ *uap; 4091 { 4092 struct vnode *vp; 4093 struct vattr vattr; 4094 struct nameidata nd; 4095 int error; 4096 4097 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4098 uap->path, td); 4099 if ((error = namei(&nd)) != 0) 4100 return (error); 4101 vp = nd.ni_vp; 4102 NDFREE(&nd, NDF_ONLY_PNBUF); 4103 if (vp->v_type != VCHR || vp->v_rdev == NULL) { 4104 error = EINVAL; 4105 goto out; 4106 } 4107 #ifdef MAC 4108 error = mac_vnode_check_revoke(td->td_ucred, vp); 4109 if (error != 0) 4110 goto out; 4111 #endif 4112 error = VOP_GETATTR(vp, &vattr, td->td_ucred); 4113 if (error != 0) 4114 goto out; 4115 if (td->td_ucred->cr_uid != vattr.va_uid) { 4116 error = priv_check(td, PRIV_VFS_ADMIN); 4117 if (error != 0) 4118 goto out; 4119 } 4120 if (vcount(vp) > 1) 4121 VOP_REVOKE(vp, REVOKEALL); 4122 out: 4123 vput(vp); 4124 return (error); 4125 } 4126 4127 /* 4128 * Convert a user file descriptor to a kernel file entry and check that, if it 4129 * is a capability, the correct rights are present. A reference on the file 4130 * entry is held upon returning. 4131 */ 4132 int 4133 getvnode(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp) 4134 { 4135 struct file *fp; 4136 int error; 4137 4138 error = fget_unlocked(td->td_proc->p_fd, fd, rightsp, &fp, NULL); 4139 if (error != 0) 4140 return (error); 4141 4142 /* 4143 * The file could be not of the vnode type, or it may be not 4144 * yet fully initialized, in which case the f_vnode pointer 4145 * may be set, but f_ops is still badfileops. E.g., 4146 * devfs_open() transiently create such situation to 4147 * facilitate csw d_fdopen(). 4148 * 4149 * Dupfdopen() handling in kern_openat() installs the 4150 * half-baked file into the process descriptor table, allowing 4151 * other thread to dereference it. Guard against the race by 4152 * checking f_ops. 4153 */ 4154 if (fp->f_vnode == NULL || fp->f_ops == &badfileops) { 4155 fdrop(fp, td); 4156 return (EINVAL); 4157 } 4158 *fpp = fp; 4159 return (0); 4160 } 4161 4162 4163 /* 4164 * Get an (NFS) file handle. 4165 */ 4166 #ifndef _SYS_SYSPROTO_H_ 4167 struct lgetfh_args { 4168 char *fname; 4169 fhandle_t *fhp; 4170 }; 4171 #endif 4172 int 4173 sys_lgetfh(td, uap) 4174 struct thread *td; 4175 register struct lgetfh_args *uap; 4176 { 4177 struct nameidata nd; 4178 fhandle_t fh; 4179 register struct vnode *vp; 4180 int error; 4181 4182 error = priv_check(td, PRIV_VFS_GETFH); 4183 if (error != 0) 4184 return (error); 4185 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4186 uap->fname, td); 4187 error = namei(&nd); 4188 if (error != 0) 4189 return (error); 4190 NDFREE(&nd, NDF_ONLY_PNBUF); 4191 vp = nd.ni_vp; 4192 bzero(&fh, sizeof(fh)); 4193 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4194 error = VOP_VPTOFH(vp, &fh.fh_fid); 4195 vput(vp); 4196 if (error == 0) 4197 error = copyout(&fh, uap->fhp, sizeof (fh)); 4198 return (error); 4199 } 4200 4201 #ifndef _SYS_SYSPROTO_H_ 4202 struct getfh_args { 4203 char *fname; 4204 fhandle_t *fhp; 4205 }; 4206 #endif 4207 int 4208 sys_getfh(td, uap) 4209 struct thread *td; 4210 register struct getfh_args *uap; 4211 { 4212 struct nameidata nd; 4213 fhandle_t fh; 4214 register struct vnode *vp; 4215 int error; 4216 4217 error = priv_check(td, PRIV_VFS_GETFH); 4218 if (error != 0) 4219 return (error); 4220 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4221 uap->fname, td); 4222 error = namei(&nd); 4223 if (error != 0) 4224 return (error); 4225 NDFREE(&nd, NDF_ONLY_PNBUF); 4226 vp = nd.ni_vp; 4227 bzero(&fh, sizeof(fh)); 4228 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4229 error = VOP_VPTOFH(vp, &fh.fh_fid); 4230 vput(vp); 4231 if (error == 0) 4232 error = copyout(&fh, uap->fhp, sizeof (fh)); 4233 return (error); 4234 } 4235 4236 /* 4237 * syscall for the rpc.lockd to use to translate a NFS file handle into an 4238 * open descriptor. 4239 * 4240 * warning: do not remove the priv_check() call or this becomes one giant 4241 * security hole. 4242 */ 4243 #ifndef _SYS_SYSPROTO_H_ 4244 struct fhopen_args { 4245 const struct fhandle *u_fhp; 4246 int flags; 4247 }; 4248 #endif 4249 int 4250 sys_fhopen(td, uap) 4251 struct thread *td; 4252 struct fhopen_args /* { 4253 const struct fhandle *u_fhp; 4254 int flags; 4255 } */ *uap; 4256 { 4257 struct mount *mp; 4258 struct vnode *vp; 4259 struct fhandle fhp; 4260 struct file *fp; 4261 int fmode, error; 4262 int indx; 4263 4264 error = priv_check(td, PRIV_VFS_FHOPEN); 4265 if (error != 0) 4266 return (error); 4267 indx = -1; 4268 fmode = FFLAGS(uap->flags); 4269 /* why not allow a non-read/write open for our lockd? */ 4270 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 4271 return (EINVAL); 4272 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 4273 if (error != 0) 4274 return(error); 4275 /* find the mount point */ 4276 mp = vfs_busyfs(&fhp.fh_fsid); 4277 if (mp == NULL) 4278 return (ESTALE); 4279 /* now give me my vnode, it gets returned to me locked */ 4280 error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp); 4281 vfs_unbusy(mp); 4282 if (error != 0) 4283 return (error); 4284 4285 error = falloc_noinstall(td, &fp); 4286 if (error != 0) { 4287 vput(vp); 4288 return (error); 4289 } 4290 /* 4291 * An extra reference on `fp' has been held for us by 4292 * falloc_noinstall(). 4293 */ 4294 4295 #ifdef INVARIANTS 4296 td->td_dupfd = -1; 4297 #endif 4298 error = vn_open_vnode(vp, fmode, td->td_ucred, td, fp); 4299 if (error != 0) { 4300 KASSERT(fp->f_ops == &badfileops, 4301 ("VOP_OPEN in fhopen() set f_ops")); 4302 KASSERT(td->td_dupfd < 0, 4303 ("fhopen() encountered fdopen()")); 4304 4305 vput(vp); 4306 goto bad; 4307 } 4308 #ifdef INVARIANTS 4309 td->td_dupfd = 0; 4310 #endif 4311 fp->f_vnode = vp; 4312 fp->f_seqcount = 1; 4313 finit(fp, (fmode & FMASK) | (fp->f_flag & FHASLOCK), DTYPE_VNODE, vp, 4314 &vnops); 4315 VOP_UNLOCK(vp, 0); 4316 if ((fmode & O_TRUNC) != 0) { 4317 error = fo_truncate(fp, 0, td->td_ucred, td); 4318 if (error != 0) 4319 goto bad; 4320 } 4321 4322 error = finstall(td, fp, &indx, fmode, NULL); 4323 bad: 4324 fdrop(fp, td); 4325 td->td_retval[0] = indx; 4326 return (error); 4327 } 4328 4329 /* 4330 * Stat an (NFS) file handle. 4331 */ 4332 #ifndef _SYS_SYSPROTO_H_ 4333 struct fhstat_args { 4334 struct fhandle *u_fhp; 4335 struct stat *sb; 4336 }; 4337 #endif 4338 int 4339 sys_fhstat(td, uap) 4340 struct thread *td; 4341 register struct fhstat_args /* { 4342 struct fhandle *u_fhp; 4343 struct stat *sb; 4344 } */ *uap; 4345 { 4346 struct stat sb; 4347 struct fhandle fh; 4348 int error; 4349 4350 error = copyin(uap->u_fhp, &fh, sizeof(fh)); 4351 if (error != 0) 4352 return (error); 4353 error = kern_fhstat(td, fh, &sb); 4354 if (error == 0) 4355 error = copyout(&sb, uap->sb, sizeof(sb)); 4356 return (error); 4357 } 4358 4359 int 4360 kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb) 4361 { 4362 struct mount *mp; 4363 struct vnode *vp; 4364 int error; 4365 4366 error = priv_check(td, PRIV_VFS_FHSTAT); 4367 if (error != 0) 4368 return (error); 4369 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4370 return (ESTALE); 4371 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4372 vfs_unbusy(mp); 4373 if (error != 0) 4374 return (error); 4375 error = vn_stat(vp, sb, td->td_ucred, NOCRED, td); 4376 vput(vp); 4377 return (error); 4378 } 4379 4380 /* 4381 * Implement fstatfs() for (NFS) file handles. 4382 */ 4383 #ifndef _SYS_SYSPROTO_H_ 4384 struct fhstatfs_args { 4385 struct fhandle *u_fhp; 4386 struct statfs *buf; 4387 }; 4388 #endif 4389 int 4390 sys_fhstatfs(td, uap) 4391 struct thread *td; 4392 struct fhstatfs_args /* { 4393 struct fhandle *u_fhp; 4394 struct statfs *buf; 4395 } */ *uap; 4396 { 4397 struct statfs sf; 4398 fhandle_t fh; 4399 int error; 4400 4401 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4402 if (error != 0) 4403 return (error); 4404 error = kern_fhstatfs(td, fh, &sf); 4405 if (error != 0) 4406 return (error); 4407 return (copyout(&sf, uap->buf, sizeof(sf))); 4408 } 4409 4410 int 4411 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf) 4412 { 4413 struct statfs *sp; 4414 struct mount *mp; 4415 struct vnode *vp; 4416 int error; 4417 4418 error = priv_check(td, PRIV_VFS_FHSTATFS); 4419 if (error != 0) 4420 return (error); 4421 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4422 return (ESTALE); 4423 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4424 if (error != 0) { 4425 vfs_unbusy(mp); 4426 return (error); 4427 } 4428 vput(vp); 4429 error = prison_canseemount(td->td_ucred, mp); 4430 if (error != 0) 4431 goto out; 4432 #ifdef MAC 4433 error = mac_mount_check_stat(td->td_ucred, mp); 4434 if (error != 0) 4435 goto out; 4436 #endif 4437 /* 4438 * Set these in case the underlying filesystem fails to do so. 4439 */ 4440 sp = &mp->mnt_stat; 4441 sp->f_version = STATFS_VERSION; 4442 sp->f_namemax = NAME_MAX; 4443 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 4444 error = VFS_STATFS(mp, sp); 4445 if (error == 0) 4446 *buf = *sp; 4447 out: 4448 vfs_unbusy(mp); 4449 return (error); 4450 } 4451 4452 int 4453 kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len) 4454 { 4455 struct file *fp; 4456 struct mount *mp; 4457 struct vnode *vp; 4458 cap_rights_t rights; 4459 off_t olen, ooffset; 4460 int error; 4461 4462 if (offset < 0 || len <= 0) 4463 return (EINVAL); 4464 /* Check for wrap. */ 4465 if (offset > OFF_MAX - len) 4466 return (EFBIG); 4467 error = fget(td, fd, cap_rights_init(&rights, CAP_WRITE), &fp); 4468 if (error != 0) 4469 return (error); 4470 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { 4471 error = ESPIPE; 4472 goto out; 4473 } 4474 if ((fp->f_flag & FWRITE) == 0) { 4475 error = EBADF; 4476 goto out; 4477 } 4478 if (fp->f_type != DTYPE_VNODE) { 4479 error = ENODEV; 4480 goto out; 4481 } 4482 vp = fp->f_vnode; 4483 if (vp->v_type != VREG) { 4484 error = ENODEV; 4485 goto out; 4486 } 4487 4488 /* Allocating blocks may take a long time, so iterate. */ 4489 for (;;) { 4490 olen = len; 4491 ooffset = offset; 4492 4493 bwillwrite(); 4494 mp = NULL; 4495 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 4496 if (error != 0) 4497 break; 4498 error = vn_lock(vp, LK_EXCLUSIVE); 4499 if (error != 0) { 4500 vn_finished_write(mp); 4501 break; 4502 } 4503 #ifdef MAC 4504 error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp); 4505 if (error == 0) 4506 #endif 4507 error = VOP_ALLOCATE(vp, &offset, &len); 4508 VOP_UNLOCK(vp, 0); 4509 vn_finished_write(mp); 4510 4511 if (olen + ooffset != offset + len) { 4512 panic("offset + len changed from %jx/%jx to %jx/%jx", 4513 ooffset, olen, offset, len); 4514 } 4515 if (error != 0 || len == 0) 4516 break; 4517 KASSERT(olen > len, ("Iteration did not make progress?")); 4518 maybe_yield(); 4519 } 4520 out: 4521 fdrop(fp, td); 4522 return (error); 4523 } 4524 4525 int 4526 sys_posix_fallocate(struct thread *td, struct posix_fallocate_args *uap) 4527 { 4528 4529 td->td_retval[0] = kern_posix_fallocate(td, uap->fd, uap->offset, 4530 uap->len); 4531 return (0); 4532 } 4533 4534 /* 4535 * Unlike madvise(2), we do not make a best effort to remember every 4536 * possible caching hint. Instead, we remember the last setting with 4537 * the exception that we will allow POSIX_FADV_NORMAL to adjust the 4538 * region of any current setting. 4539 */ 4540 int 4541 kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len, 4542 int advice) 4543 { 4544 struct fadvise_info *fa, *new; 4545 struct file *fp; 4546 struct vnode *vp; 4547 cap_rights_t rights; 4548 off_t end; 4549 int error; 4550 4551 if (offset < 0 || len < 0 || offset > OFF_MAX - len) 4552 return (EINVAL); 4553 switch (advice) { 4554 case POSIX_FADV_SEQUENTIAL: 4555 case POSIX_FADV_RANDOM: 4556 case POSIX_FADV_NOREUSE: 4557 new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK); 4558 break; 4559 case POSIX_FADV_NORMAL: 4560 case POSIX_FADV_WILLNEED: 4561 case POSIX_FADV_DONTNEED: 4562 new = NULL; 4563 break; 4564 default: 4565 return (EINVAL); 4566 } 4567 /* XXX: CAP_POSIX_FADVISE? */ 4568 error = fget(td, fd, cap_rights_init(&rights), &fp); 4569 if (error != 0) 4570 goto out; 4571 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { 4572 error = ESPIPE; 4573 goto out; 4574 } 4575 if (fp->f_type != DTYPE_VNODE) { 4576 error = ENODEV; 4577 goto out; 4578 } 4579 vp = fp->f_vnode; 4580 if (vp->v_type != VREG) { 4581 error = ENODEV; 4582 goto out; 4583 } 4584 if (len == 0) 4585 end = OFF_MAX; 4586 else 4587 end = offset + len - 1; 4588 switch (advice) { 4589 case POSIX_FADV_SEQUENTIAL: 4590 case POSIX_FADV_RANDOM: 4591 case POSIX_FADV_NOREUSE: 4592 /* 4593 * Try to merge any existing non-standard region with 4594 * this new region if possible, otherwise create a new 4595 * non-standard region for this request. 4596 */ 4597 mtx_pool_lock(mtxpool_sleep, fp); 4598 fa = fp->f_advice; 4599 if (fa != NULL && fa->fa_advice == advice && 4600 ((fa->fa_start <= end && fa->fa_end >= offset) || 4601 (end != OFF_MAX && fa->fa_start == end + 1) || 4602 (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) { 4603 if (offset < fa->fa_start) 4604 fa->fa_start = offset; 4605 if (end > fa->fa_end) 4606 fa->fa_end = end; 4607 } else { 4608 new->fa_advice = advice; 4609 new->fa_start = offset; 4610 new->fa_end = end; 4611 new->fa_prevstart = 0; 4612 new->fa_prevend = 0; 4613 fp->f_advice = new; 4614 new = fa; 4615 } 4616 mtx_pool_unlock(mtxpool_sleep, fp); 4617 break; 4618 case POSIX_FADV_NORMAL: 4619 /* 4620 * If a the "normal" region overlaps with an existing 4621 * non-standard region, trim or remove the 4622 * non-standard region. 4623 */ 4624 mtx_pool_lock(mtxpool_sleep, fp); 4625 fa = fp->f_advice; 4626 if (fa != NULL) { 4627 if (offset <= fa->fa_start && end >= fa->fa_end) { 4628 new = fa; 4629 fp->f_advice = NULL; 4630 } else if (offset <= fa->fa_start && 4631 end >= fa->fa_start) 4632 fa->fa_start = end + 1; 4633 else if (offset <= fa->fa_end && end >= fa->fa_end) 4634 fa->fa_end = offset - 1; 4635 else if (offset >= fa->fa_start && end <= fa->fa_end) { 4636 /* 4637 * If the "normal" region is a middle 4638 * portion of the existing 4639 * non-standard region, just remove 4640 * the whole thing rather than picking 4641 * one side or the other to 4642 * preserve. 4643 */ 4644 new = fa; 4645 fp->f_advice = NULL; 4646 } 4647 } 4648 mtx_pool_unlock(mtxpool_sleep, fp); 4649 break; 4650 case POSIX_FADV_WILLNEED: 4651 case POSIX_FADV_DONTNEED: 4652 error = VOP_ADVISE(vp, offset, end, advice); 4653 break; 4654 } 4655 out: 4656 if (fp != NULL) 4657 fdrop(fp, td); 4658 free(new, M_FADVISE); 4659 return (error); 4660 } 4661 4662 int 4663 sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap) 4664 { 4665 4666 td->td_retval[0] = kern_posix_fadvise(td, uap->fd, uap->offset, 4667 uap->len, uap->advice); 4668 return (0); 4669 } 4670