1 /*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include "opt_capsicum.h" 41 #include "opt_compat.h" 42 #include "opt_ktrace.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/bio.h> 47 #include <sys/buf.h> 48 #include <sys/capsicum.h> 49 #include <sys/disk.h> 50 #include <sys/sysent.h> 51 #include <sys/malloc.h> 52 #include <sys/mount.h> 53 #include <sys/mutex.h> 54 #include <sys/sysproto.h> 55 #include <sys/namei.h> 56 #include <sys/filedesc.h> 57 #include <sys/kernel.h> 58 #include <sys/fcntl.h> 59 #include <sys/file.h> 60 #include <sys/filio.h> 61 #include <sys/limits.h> 62 #include <sys/linker.h> 63 #include <sys/rwlock.h> 64 #include <sys/sdt.h> 65 #include <sys/stat.h> 66 #include <sys/sx.h> 67 #include <sys/unistd.h> 68 #include <sys/vnode.h> 69 #include <sys/priv.h> 70 #include <sys/proc.h> 71 #include <sys/dirent.h> 72 #include <sys/jail.h> 73 #include <sys/syscallsubr.h> 74 #include <sys/sysctl.h> 75 #ifdef KTRACE 76 #include <sys/ktrace.h> 77 #endif 78 79 #include <machine/stdarg.h> 80 81 #include <security/audit/audit.h> 82 #include <security/mac/mac_framework.h> 83 84 #include <vm/vm.h> 85 #include <vm/vm_object.h> 86 #include <vm/vm_page.h> 87 #include <vm/uma.h> 88 89 #include <ufs/ufs/quota.h> 90 91 MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information"); 92 93 SDT_PROVIDER_DEFINE(vfs); 94 SDT_PROBE_DEFINE2(vfs, , stat, mode, "char *", "int"); 95 SDT_PROBE_DEFINE2(vfs, , stat, reg, "char *", "int"); 96 97 static int kern_chflagsat(struct thread *td, int fd, const char *path, 98 enum uio_seg pathseg, u_long flags, int atflag); 99 static int setfflags(struct thread *td, struct vnode *, u_long); 100 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *); 101 static int getutimens(const struct timespec *, enum uio_seg, 102 struct timespec *, int *); 103 static int setutimes(struct thread *td, struct vnode *, 104 const struct timespec *, int, int); 105 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 106 struct thread *td); 107 108 /* 109 * The module initialization routine for POSIX asynchronous I/O will 110 * set this to the version of AIO that it implements. (Zero means 111 * that it is not implemented.) This value is used here by pathconf() 112 * and in kern_descrip.c by fpathconf(). 113 */ 114 int async_io_version; 115 116 /* 117 * Sync each mounted filesystem. 118 */ 119 #ifndef _SYS_SYSPROTO_H_ 120 struct sync_args { 121 int dummy; 122 }; 123 #endif 124 /* ARGSUSED */ 125 int 126 sys_sync(td, uap) 127 struct thread *td; 128 struct sync_args *uap; 129 { 130 struct mount *mp, *nmp; 131 int save; 132 133 mtx_lock(&mountlist_mtx); 134 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 135 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 136 nmp = TAILQ_NEXT(mp, mnt_list); 137 continue; 138 } 139 if ((mp->mnt_flag & MNT_RDONLY) == 0 && 140 vn_start_write(NULL, &mp, V_NOWAIT) == 0) { 141 save = curthread_pflags_set(TDP_SYNCIO); 142 vfs_msync(mp, MNT_NOWAIT); 143 VFS_SYNC(mp, MNT_NOWAIT); 144 curthread_pflags_restore(save); 145 vn_finished_write(mp); 146 } 147 mtx_lock(&mountlist_mtx); 148 nmp = TAILQ_NEXT(mp, mnt_list); 149 vfs_unbusy(mp); 150 } 151 mtx_unlock(&mountlist_mtx); 152 return (0); 153 } 154 155 /* 156 * Change filesystem quotas. 157 */ 158 #ifndef _SYS_SYSPROTO_H_ 159 struct quotactl_args { 160 char *path; 161 int cmd; 162 int uid; 163 caddr_t arg; 164 }; 165 #endif 166 int 167 sys_quotactl(td, uap) 168 struct thread *td; 169 register struct quotactl_args /* { 170 char *path; 171 int cmd; 172 int uid; 173 caddr_t arg; 174 } */ *uap; 175 { 176 struct mount *mp; 177 struct nameidata nd; 178 int error; 179 180 AUDIT_ARG_CMD(uap->cmd); 181 AUDIT_ARG_UID(uap->uid); 182 if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS)) 183 return (EPERM); 184 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 185 uap->path, td); 186 if ((error = namei(&nd)) != 0) 187 return (error); 188 NDFREE(&nd, NDF_ONLY_PNBUF); 189 mp = nd.ni_vp->v_mount; 190 vfs_ref(mp); 191 vput(nd.ni_vp); 192 error = vfs_busy(mp, 0); 193 vfs_rel(mp); 194 if (error != 0) 195 return (error); 196 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg); 197 198 /* 199 * Since quota on operation typically needs to open quota 200 * file, the Q_QUOTAON handler needs to unbusy the mount point 201 * before calling into namei. Otherwise, unmount might be 202 * started between two vfs_busy() invocations (first is our, 203 * second is from mount point cross-walk code in lookup()), 204 * causing deadlock. 205 * 206 * Require that Q_QUOTAON handles the vfs_busy() reference on 207 * its own, always returning with ubusied mount point. 208 */ 209 if ((uap->cmd >> SUBCMDSHIFT) != Q_QUOTAON) 210 vfs_unbusy(mp); 211 return (error); 212 } 213 214 /* 215 * Used by statfs conversion routines to scale the block size up if 216 * necessary so that all of the block counts are <= 'max_size'. Note 217 * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero 218 * value of 'n'. 219 */ 220 void 221 statfs_scale_blocks(struct statfs *sf, long max_size) 222 { 223 uint64_t count; 224 int shift; 225 226 KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__)); 227 228 /* 229 * Attempt to scale the block counts to give a more accurate 230 * overview to userland of the ratio of free space to used 231 * space. To do this, find the largest block count and compute 232 * a divisor that lets it fit into a signed integer <= max_size. 233 */ 234 if (sf->f_bavail < 0) 235 count = -sf->f_bavail; 236 else 237 count = sf->f_bavail; 238 count = MAX(sf->f_blocks, MAX(sf->f_bfree, count)); 239 if (count <= max_size) 240 return; 241 242 count >>= flsl(max_size); 243 shift = 0; 244 while (count > 0) { 245 shift++; 246 count >>=1; 247 } 248 249 sf->f_bsize <<= shift; 250 sf->f_blocks >>= shift; 251 sf->f_bfree >>= shift; 252 sf->f_bavail >>= shift; 253 } 254 255 /* 256 * Get filesystem statistics. 257 */ 258 #ifndef _SYS_SYSPROTO_H_ 259 struct statfs_args { 260 char *path; 261 struct statfs *buf; 262 }; 263 #endif 264 int 265 sys_statfs(td, uap) 266 struct thread *td; 267 register struct statfs_args /* { 268 char *path; 269 struct statfs *buf; 270 } */ *uap; 271 { 272 struct statfs sf; 273 int error; 274 275 error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf); 276 if (error == 0) 277 error = copyout(&sf, uap->buf, sizeof(sf)); 278 return (error); 279 } 280 281 int 282 kern_statfs(struct thread *td, char *path, enum uio_seg pathseg, 283 struct statfs *buf) 284 { 285 struct mount *mp; 286 struct statfs *sp, sb; 287 struct nameidata nd; 288 int error; 289 290 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 291 pathseg, path, td); 292 error = namei(&nd); 293 if (error != 0) 294 return (error); 295 mp = nd.ni_vp->v_mount; 296 vfs_ref(mp); 297 NDFREE(&nd, NDF_ONLY_PNBUF); 298 vput(nd.ni_vp); 299 error = vfs_busy(mp, 0); 300 vfs_rel(mp); 301 if (error != 0) 302 return (error); 303 #ifdef MAC 304 error = mac_mount_check_stat(td->td_ucred, mp); 305 if (error != 0) 306 goto out; 307 #endif 308 /* 309 * Set these in case the underlying filesystem fails to do so. 310 */ 311 sp = &mp->mnt_stat; 312 sp->f_version = STATFS_VERSION; 313 sp->f_namemax = NAME_MAX; 314 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 315 error = VFS_STATFS(mp, sp); 316 if (error != 0) 317 goto out; 318 if (priv_check(td, PRIV_VFS_GENERATION)) { 319 bcopy(sp, &sb, sizeof(sb)); 320 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 321 prison_enforce_statfs(td->td_ucred, mp, &sb); 322 sp = &sb; 323 } 324 *buf = *sp; 325 out: 326 vfs_unbusy(mp); 327 return (error); 328 } 329 330 /* 331 * Get filesystem statistics. 332 */ 333 #ifndef _SYS_SYSPROTO_H_ 334 struct fstatfs_args { 335 int fd; 336 struct statfs *buf; 337 }; 338 #endif 339 int 340 sys_fstatfs(td, uap) 341 struct thread *td; 342 register struct fstatfs_args /* { 343 int fd; 344 struct statfs *buf; 345 } */ *uap; 346 { 347 struct statfs sf; 348 int error; 349 350 error = kern_fstatfs(td, uap->fd, &sf); 351 if (error == 0) 352 error = copyout(&sf, uap->buf, sizeof(sf)); 353 return (error); 354 } 355 356 int 357 kern_fstatfs(struct thread *td, int fd, struct statfs *buf) 358 { 359 struct file *fp; 360 struct mount *mp; 361 struct statfs *sp, sb; 362 struct vnode *vp; 363 cap_rights_t rights; 364 int error; 365 366 AUDIT_ARG_FD(fd); 367 error = getvnode(td, fd, cap_rights_init(&rights, CAP_FSTATFS), &fp); 368 if (error != 0) 369 return (error); 370 vp = fp->f_vnode; 371 vn_lock(vp, LK_SHARED | LK_RETRY); 372 #ifdef AUDIT 373 AUDIT_ARG_VNODE1(vp); 374 #endif 375 mp = vp->v_mount; 376 if (mp) 377 vfs_ref(mp); 378 VOP_UNLOCK(vp, 0); 379 fdrop(fp, td); 380 if (mp == NULL) { 381 error = EBADF; 382 goto out; 383 } 384 error = vfs_busy(mp, 0); 385 vfs_rel(mp); 386 if (error != 0) 387 return (error); 388 #ifdef MAC 389 error = mac_mount_check_stat(td->td_ucred, mp); 390 if (error != 0) 391 goto out; 392 #endif 393 /* 394 * Set these in case the underlying filesystem fails to do so. 395 */ 396 sp = &mp->mnt_stat; 397 sp->f_version = STATFS_VERSION; 398 sp->f_namemax = NAME_MAX; 399 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 400 error = VFS_STATFS(mp, sp); 401 if (error != 0) 402 goto out; 403 if (priv_check(td, PRIV_VFS_GENERATION)) { 404 bcopy(sp, &sb, sizeof(sb)); 405 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 406 prison_enforce_statfs(td->td_ucred, mp, &sb); 407 sp = &sb; 408 } 409 *buf = *sp; 410 out: 411 if (mp) 412 vfs_unbusy(mp); 413 return (error); 414 } 415 416 /* 417 * Get statistics on all filesystems. 418 */ 419 #ifndef _SYS_SYSPROTO_H_ 420 struct getfsstat_args { 421 struct statfs *buf; 422 long bufsize; 423 int flags; 424 }; 425 #endif 426 int 427 sys_getfsstat(td, uap) 428 struct thread *td; 429 register struct getfsstat_args /* { 430 struct statfs *buf; 431 long bufsize; 432 int flags; 433 } */ *uap; 434 { 435 size_t count; 436 int error; 437 438 if (uap->bufsize < 0 || uap->bufsize > SIZE_MAX) 439 return (EINVAL); 440 error = kern_getfsstat(td, &uap->buf, uap->bufsize, &count, 441 UIO_USERSPACE, uap->flags); 442 if (error == 0) 443 td->td_retval[0] = count; 444 return (error); 445 } 446 447 /* 448 * If (bufsize > 0 && bufseg == UIO_SYSSPACE) 449 * The caller is responsible for freeing memory which will be allocated 450 * in '*buf'. 451 */ 452 int 453 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize, 454 size_t *countp, enum uio_seg bufseg, int flags) 455 { 456 struct mount *mp, *nmp; 457 struct statfs *sfsp, *sp, sb; 458 size_t count, maxcount; 459 int error; 460 461 maxcount = bufsize / sizeof(struct statfs); 462 if (bufsize == 0) 463 sfsp = NULL; 464 else if (bufseg == UIO_USERSPACE) 465 sfsp = *buf; 466 else /* if (bufseg == UIO_SYSSPACE) */ { 467 count = 0; 468 mtx_lock(&mountlist_mtx); 469 TAILQ_FOREACH(mp, &mountlist, mnt_list) { 470 count++; 471 } 472 mtx_unlock(&mountlist_mtx); 473 if (maxcount > count) 474 maxcount = count; 475 sfsp = *buf = malloc(maxcount * sizeof(struct statfs), M_TEMP, 476 M_WAITOK); 477 } 478 count = 0; 479 mtx_lock(&mountlist_mtx); 480 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 481 if (prison_canseemount(td->td_ucred, mp) != 0) { 482 nmp = TAILQ_NEXT(mp, mnt_list); 483 continue; 484 } 485 #ifdef MAC 486 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 487 nmp = TAILQ_NEXT(mp, mnt_list); 488 continue; 489 } 490 #endif 491 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 492 nmp = TAILQ_NEXT(mp, mnt_list); 493 continue; 494 } 495 if (sfsp && count < maxcount) { 496 sp = &mp->mnt_stat; 497 /* 498 * Set these in case the underlying filesystem 499 * fails to do so. 500 */ 501 sp->f_version = STATFS_VERSION; 502 sp->f_namemax = NAME_MAX; 503 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 504 /* 505 * If MNT_NOWAIT or MNT_LAZY is specified, do not 506 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 507 * overrides MNT_WAIT. 508 */ 509 if (((flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 510 (flags & MNT_WAIT)) && 511 (error = VFS_STATFS(mp, sp))) { 512 mtx_lock(&mountlist_mtx); 513 nmp = TAILQ_NEXT(mp, mnt_list); 514 vfs_unbusy(mp); 515 continue; 516 } 517 if (priv_check(td, PRIV_VFS_GENERATION)) { 518 bcopy(sp, &sb, sizeof(sb)); 519 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 520 prison_enforce_statfs(td->td_ucred, mp, &sb); 521 sp = &sb; 522 } 523 if (bufseg == UIO_SYSSPACE) 524 bcopy(sp, sfsp, sizeof(*sp)); 525 else /* if (bufseg == UIO_USERSPACE) */ { 526 error = copyout(sp, sfsp, sizeof(*sp)); 527 if (error != 0) { 528 vfs_unbusy(mp); 529 return (error); 530 } 531 } 532 sfsp++; 533 } 534 count++; 535 mtx_lock(&mountlist_mtx); 536 nmp = TAILQ_NEXT(mp, mnt_list); 537 vfs_unbusy(mp); 538 } 539 mtx_unlock(&mountlist_mtx); 540 if (sfsp && count > maxcount) 541 *countp = maxcount; 542 else 543 *countp = count; 544 return (0); 545 } 546 547 #ifdef COMPAT_FREEBSD4 548 /* 549 * Get old format filesystem statistics. 550 */ 551 static void cvtstatfs(struct statfs *, struct ostatfs *); 552 553 #ifndef _SYS_SYSPROTO_H_ 554 struct freebsd4_statfs_args { 555 char *path; 556 struct ostatfs *buf; 557 }; 558 #endif 559 int 560 freebsd4_statfs(td, uap) 561 struct thread *td; 562 struct freebsd4_statfs_args /* { 563 char *path; 564 struct ostatfs *buf; 565 } */ *uap; 566 { 567 struct ostatfs osb; 568 struct statfs sf; 569 int error; 570 571 error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf); 572 if (error != 0) 573 return (error); 574 cvtstatfs(&sf, &osb); 575 return (copyout(&osb, uap->buf, sizeof(osb))); 576 } 577 578 /* 579 * Get filesystem statistics. 580 */ 581 #ifndef _SYS_SYSPROTO_H_ 582 struct freebsd4_fstatfs_args { 583 int fd; 584 struct ostatfs *buf; 585 }; 586 #endif 587 int 588 freebsd4_fstatfs(td, uap) 589 struct thread *td; 590 struct freebsd4_fstatfs_args /* { 591 int fd; 592 struct ostatfs *buf; 593 } */ *uap; 594 { 595 struct ostatfs osb; 596 struct statfs sf; 597 int error; 598 599 error = kern_fstatfs(td, uap->fd, &sf); 600 if (error != 0) 601 return (error); 602 cvtstatfs(&sf, &osb); 603 return (copyout(&osb, uap->buf, sizeof(osb))); 604 } 605 606 /* 607 * Get statistics on all filesystems. 608 */ 609 #ifndef _SYS_SYSPROTO_H_ 610 struct freebsd4_getfsstat_args { 611 struct ostatfs *buf; 612 long bufsize; 613 int flags; 614 }; 615 #endif 616 int 617 freebsd4_getfsstat(td, uap) 618 struct thread *td; 619 register struct freebsd4_getfsstat_args /* { 620 struct ostatfs *buf; 621 long bufsize; 622 int flags; 623 } */ *uap; 624 { 625 struct statfs *buf, *sp; 626 struct ostatfs osb; 627 size_t count, size; 628 int error; 629 630 if (uap->bufsize < 0) 631 return (EINVAL); 632 count = uap->bufsize / sizeof(struct ostatfs); 633 if (count > SIZE_MAX / sizeof(struct statfs)) 634 return (EINVAL); 635 size = count * sizeof(struct statfs); 636 error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, 637 uap->flags); 638 td->td_retval[0] = count; 639 if (size != 0) { 640 sp = buf; 641 while (count != 0 && error == 0) { 642 cvtstatfs(sp, &osb); 643 error = copyout(&osb, uap->buf, sizeof(osb)); 644 sp++; 645 uap->buf++; 646 count--; 647 } 648 free(buf, M_TEMP); 649 } 650 return (error); 651 } 652 653 /* 654 * Implement fstatfs() for (NFS) file handles. 655 */ 656 #ifndef _SYS_SYSPROTO_H_ 657 struct freebsd4_fhstatfs_args { 658 struct fhandle *u_fhp; 659 struct ostatfs *buf; 660 }; 661 #endif 662 int 663 freebsd4_fhstatfs(td, uap) 664 struct thread *td; 665 struct freebsd4_fhstatfs_args /* { 666 struct fhandle *u_fhp; 667 struct ostatfs *buf; 668 } */ *uap; 669 { 670 struct ostatfs osb; 671 struct statfs sf; 672 fhandle_t fh; 673 int error; 674 675 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 676 if (error != 0) 677 return (error); 678 error = kern_fhstatfs(td, fh, &sf); 679 if (error != 0) 680 return (error); 681 cvtstatfs(&sf, &osb); 682 return (copyout(&osb, uap->buf, sizeof(osb))); 683 } 684 685 /* 686 * Convert a new format statfs structure to an old format statfs structure. 687 */ 688 static void 689 cvtstatfs(nsp, osp) 690 struct statfs *nsp; 691 struct ostatfs *osp; 692 { 693 694 statfs_scale_blocks(nsp, LONG_MAX); 695 bzero(osp, sizeof(*osp)); 696 osp->f_bsize = nsp->f_bsize; 697 osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX); 698 osp->f_blocks = nsp->f_blocks; 699 osp->f_bfree = nsp->f_bfree; 700 osp->f_bavail = nsp->f_bavail; 701 osp->f_files = MIN(nsp->f_files, LONG_MAX); 702 osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX); 703 osp->f_owner = nsp->f_owner; 704 osp->f_type = nsp->f_type; 705 osp->f_flags = nsp->f_flags; 706 osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX); 707 osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX); 708 osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX); 709 osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX); 710 strlcpy(osp->f_fstypename, nsp->f_fstypename, 711 MIN(MFSNAMELEN, OMFSNAMELEN)); 712 strlcpy(osp->f_mntonname, nsp->f_mntonname, 713 MIN(MNAMELEN, OMNAMELEN)); 714 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 715 MIN(MNAMELEN, OMNAMELEN)); 716 osp->f_fsid = nsp->f_fsid; 717 } 718 #endif /* COMPAT_FREEBSD4 */ 719 720 /* 721 * Change current working directory to a given file descriptor. 722 */ 723 #ifndef _SYS_SYSPROTO_H_ 724 struct fchdir_args { 725 int fd; 726 }; 727 #endif 728 int 729 sys_fchdir(td, uap) 730 struct thread *td; 731 struct fchdir_args /* { 732 int fd; 733 } */ *uap; 734 { 735 struct vnode *vp, *tdp; 736 struct mount *mp; 737 struct file *fp; 738 cap_rights_t rights; 739 int error; 740 741 AUDIT_ARG_FD(uap->fd); 742 error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_FCHDIR), 743 &fp); 744 if (error != 0) 745 return (error); 746 vp = fp->f_vnode; 747 VREF(vp); 748 fdrop(fp, td); 749 vn_lock(vp, LK_SHARED | LK_RETRY); 750 AUDIT_ARG_VNODE1(vp); 751 error = change_dir(vp, td); 752 while (!error && (mp = vp->v_mountedhere) != NULL) { 753 if (vfs_busy(mp, 0)) 754 continue; 755 error = VFS_ROOT(mp, LK_SHARED, &tdp); 756 vfs_unbusy(mp); 757 if (error != 0) 758 break; 759 vput(vp); 760 vp = tdp; 761 } 762 if (error != 0) { 763 vput(vp); 764 return (error); 765 } 766 VOP_UNLOCK(vp, 0); 767 pwd_chdir(td, vp); 768 return (0); 769 } 770 771 /* 772 * Change current working directory (``.''). 773 */ 774 #ifndef _SYS_SYSPROTO_H_ 775 struct chdir_args { 776 char *path; 777 }; 778 #endif 779 int 780 sys_chdir(td, uap) 781 struct thread *td; 782 struct chdir_args /* { 783 char *path; 784 } */ *uap; 785 { 786 787 return (kern_chdir(td, uap->path, UIO_USERSPACE)); 788 } 789 790 int 791 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg) 792 { 793 struct nameidata nd; 794 int error; 795 796 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 797 pathseg, path, td); 798 if ((error = namei(&nd)) != 0) 799 return (error); 800 if ((error = change_dir(nd.ni_vp, td)) != 0) { 801 vput(nd.ni_vp); 802 NDFREE(&nd, NDF_ONLY_PNBUF); 803 return (error); 804 } 805 VOP_UNLOCK(nd.ni_vp, 0); 806 NDFREE(&nd, NDF_ONLY_PNBUF); 807 pwd_chdir(td, nd.ni_vp); 808 return (0); 809 } 810 811 /* 812 * Change notion of root (``/'') directory. 813 */ 814 #ifndef _SYS_SYSPROTO_H_ 815 struct chroot_args { 816 char *path; 817 }; 818 #endif 819 int 820 sys_chroot(td, uap) 821 struct thread *td; 822 struct chroot_args /* { 823 char *path; 824 } */ *uap; 825 { 826 struct nameidata nd; 827 int error; 828 829 error = priv_check(td, PRIV_VFS_CHROOT); 830 if (error != 0) 831 return (error); 832 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 833 UIO_USERSPACE, uap->path, td); 834 error = namei(&nd); 835 if (error != 0) 836 goto error; 837 error = change_dir(nd.ni_vp, td); 838 if (error != 0) 839 goto e_vunlock; 840 #ifdef MAC 841 error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp); 842 if (error != 0) 843 goto e_vunlock; 844 #endif 845 VOP_UNLOCK(nd.ni_vp, 0); 846 error = pwd_chroot(td, nd.ni_vp); 847 vrele(nd.ni_vp); 848 NDFREE(&nd, NDF_ONLY_PNBUF); 849 return (error); 850 e_vunlock: 851 vput(nd.ni_vp); 852 error: 853 NDFREE(&nd, NDF_ONLY_PNBUF); 854 return (error); 855 } 856 857 /* 858 * Common routine for chroot and chdir. Callers must provide a locked vnode 859 * instance. 860 */ 861 int 862 change_dir(vp, td) 863 struct vnode *vp; 864 struct thread *td; 865 { 866 #ifdef MAC 867 int error; 868 #endif 869 870 ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked"); 871 if (vp->v_type != VDIR) 872 return (ENOTDIR); 873 #ifdef MAC 874 error = mac_vnode_check_chdir(td->td_ucred, vp); 875 if (error != 0) 876 return (error); 877 #endif 878 return (VOP_ACCESS(vp, VEXEC, td->td_ucred, td)); 879 } 880 881 static __inline void 882 flags_to_rights(int flags, cap_rights_t *rightsp) 883 { 884 885 if (flags & O_EXEC) { 886 cap_rights_set(rightsp, CAP_FEXECVE); 887 } else { 888 switch ((flags & O_ACCMODE)) { 889 case O_RDONLY: 890 cap_rights_set(rightsp, CAP_READ); 891 break; 892 case O_RDWR: 893 cap_rights_set(rightsp, CAP_READ); 894 /* FALLTHROUGH */ 895 case O_WRONLY: 896 cap_rights_set(rightsp, CAP_WRITE); 897 if (!(flags & (O_APPEND | O_TRUNC))) 898 cap_rights_set(rightsp, CAP_SEEK); 899 break; 900 } 901 } 902 903 if (flags & O_CREAT) 904 cap_rights_set(rightsp, CAP_CREATE); 905 906 if (flags & O_TRUNC) 907 cap_rights_set(rightsp, CAP_FTRUNCATE); 908 909 if (flags & (O_SYNC | O_FSYNC)) 910 cap_rights_set(rightsp, CAP_FSYNC); 911 912 if (flags & (O_EXLOCK | O_SHLOCK)) 913 cap_rights_set(rightsp, CAP_FLOCK); 914 } 915 916 /* 917 * Check permissions, allocate an open file structure, and call the device 918 * open routine if any. 919 */ 920 #ifndef _SYS_SYSPROTO_H_ 921 struct open_args { 922 char *path; 923 int flags; 924 int mode; 925 }; 926 #endif 927 int 928 sys_open(td, uap) 929 struct thread *td; 930 register struct open_args /* { 931 char *path; 932 int flags; 933 int mode; 934 } */ *uap; 935 { 936 937 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 938 uap->flags, uap->mode)); 939 } 940 941 #ifndef _SYS_SYSPROTO_H_ 942 struct openat_args { 943 int fd; 944 char *path; 945 int flag; 946 int mode; 947 }; 948 #endif 949 int 950 sys_openat(struct thread *td, struct openat_args *uap) 951 { 952 953 return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 954 uap->mode)); 955 } 956 957 int 958 kern_openat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 959 int flags, int mode) 960 { 961 struct proc *p = td->td_proc; 962 struct filedesc *fdp = p->p_fd; 963 struct file *fp; 964 struct vnode *vp; 965 struct nameidata nd; 966 cap_rights_t rights; 967 int cmode, error, indx; 968 969 indx = -1; 970 971 AUDIT_ARG_FFLAGS(flags); 972 AUDIT_ARG_MODE(mode); 973 /* XXX: audit dirfd */ 974 cap_rights_init(&rights, CAP_LOOKUP); 975 flags_to_rights(flags, &rights); 976 /* 977 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags 978 * may be specified. 979 */ 980 if (flags & O_EXEC) { 981 if (flags & O_ACCMODE) 982 return (EINVAL); 983 } else if ((flags & O_ACCMODE) == O_ACCMODE) { 984 return (EINVAL); 985 } else { 986 flags = FFLAGS(flags); 987 } 988 989 /* 990 * Allocate the file descriptor, but don't install a descriptor yet. 991 */ 992 error = falloc_noinstall(td, &fp); 993 if (error != 0) 994 return (error); 995 /* 996 * An extra reference on `fp' has been held for us by 997 * falloc_noinstall(). 998 */ 999 /* Set the flags early so the finit in devfs can pick them up. */ 1000 fp->f_flag = flags & FMASK; 1001 cmode = ((mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; 1002 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 1003 &rights, td); 1004 td->td_dupfd = -1; /* XXX check for fdopen */ 1005 error = vn_open(&nd, &flags, cmode, fp); 1006 if (error != 0) { 1007 /* 1008 * If the vn_open replaced the method vector, something 1009 * wonderous happened deep below and we just pass it up 1010 * pretending we know what we do. 1011 */ 1012 if (error == ENXIO && fp->f_ops != &badfileops) 1013 goto success; 1014 1015 /* 1016 * Handle special fdopen() case. bleh. 1017 * 1018 * Don't do this for relative (capability) lookups; we don't 1019 * understand exactly what would happen, and we don't think 1020 * that it ever should. 1021 */ 1022 if (nd.ni_strictrelative == 0 && 1023 (error == ENODEV || error == ENXIO) && 1024 td->td_dupfd >= 0) { 1025 error = dupfdopen(td, fdp, td->td_dupfd, flags, error, 1026 &indx); 1027 if (error == 0) 1028 goto success; 1029 } 1030 1031 goto bad; 1032 } 1033 td->td_dupfd = 0; 1034 NDFREE(&nd, NDF_ONLY_PNBUF); 1035 vp = nd.ni_vp; 1036 1037 /* 1038 * Store the vnode, for any f_type. Typically, the vnode use 1039 * count is decremented by direct call to vn_closefile() for 1040 * files that switched type in the cdevsw fdopen() method. 1041 */ 1042 fp->f_vnode = vp; 1043 /* 1044 * If the file wasn't claimed by devfs bind it to the normal 1045 * vnode operations here. 1046 */ 1047 if (fp->f_ops == &badfileops) { 1048 KASSERT(vp->v_type != VFIFO, ("Unexpected fifo.")); 1049 fp->f_seqcount = 1; 1050 finit(fp, (flags & FMASK) | (fp->f_flag & FHASLOCK), 1051 DTYPE_VNODE, vp, &vnops); 1052 } 1053 1054 VOP_UNLOCK(vp, 0); 1055 if (flags & O_TRUNC) { 1056 error = fo_truncate(fp, 0, td->td_ucred, td); 1057 if (error != 0) 1058 goto bad; 1059 } 1060 success: 1061 /* 1062 * If we haven't already installed the FD (for dupfdopen), do so now. 1063 */ 1064 if (indx == -1) { 1065 struct filecaps *fcaps; 1066 1067 #ifdef CAPABILITIES 1068 if (nd.ni_strictrelative == 1) 1069 fcaps = &nd.ni_filecaps; 1070 else 1071 #endif 1072 fcaps = NULL; 1073 error = finstall(td, fp, &indx, flags, fcaps); 1074 /* On success finstall() consumes fcaps. */ 1075 if (error != 0) { 1076 filecaps_free(&nd.ni_filecaps); 1077 goto bad; 1078 } 1079 } else { 1080 filecaps_free(&nd.ni_filecaps); 1081 } 1082 1083 /* 1084 * Release our private reference, leaving the one associated with 1085 * the descriptor table intact. 1086 */ 1087 fdrop(fp, td); 1088 td->td_retval[0] = indx; 1089 return (0); 1090 bad: 1091 KASSERT(indx == -1, ("indx=%d, should be -1", indx)); 1092 fdrop(fp, td); 1093 return (error); 1094 } 1095 1096 #ifdef COMPAT_43 1097 /* 1098 * Create a file. 1099 */ 1100 #ifndef _SYS_SYSPROTO_H_ 1101 struct ocreat_args { 1102 char *path; 1103 int mode; 1104 }; 1105 #endif 1106 int 1107 ocreat(td, uap) 1108 struct thread *td; 1109 register struct ocreat_args /* { 1110 char *path; 1111 int mode; 1112 } */ *uap; 1113 { 1114 1115 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1116 O_WRONLY | O_CREAT | O_TRUNC, uap->mode)); 1117 } 1118 #endif /* COMPAT_43 */ 1119 1120 /* 1121 * Create a special file. 1122 */ 1123 #ifndef _SYS_SYSPROTO_H_ 1124 struct mknod_args { 1125 char *path; 1126 int mode; 1127 int dev; 1128 }; 1129 #endif 1130 int 1131 sys_mknod(td, uap) 1132 struct thread *td; 1133 register struct mknod_args /* { 1134 char *path; 1135 int mode; 1136 int dev; 1137 } */ *uap; 1138 { 1139 1140 return (kern_mknodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1141 uap->mode, uap->dev)); 1142 } 1143 1144 #ifndef _SYS_SYSPROTO_H_ 1145 struct mknodat_args { 1146 int fd; 1147 char *path; 1148 mode_t mode; 1149 dev_t dev; 1150 }; 1151 #endif 1152 int 1153 sys_mknodat(struct thread *td, struct mknodat_args *uap) 1154 { 1155 1156 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1157 uap->dev)); 1158 } 1159 1160 int 1161 kern_mknodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1162 int mode, int dev) 1163 { 1164 struct vnode *vp; 1165 struct mount *mp; 1166 struct vattr vattr; 1167 struct nameidata nd; 1168 cap_rights_t rights; 1169 int error, whiteout = 0; 1170 1171 AUDIT_ARG_MODE(mode); 1172 AUDIT_ARG_DEV(dev); 1173 switch (mode & S_IFMT) { 1174 case S_IFCHR: 1175 case S_IFBLK: 1176 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 1177 break; 1178 case S_IFMT: 1179 error = priv_check(td, PRIV_VFS_MKNOD_BAD); 1180 break; 1181 case S_IFWHT: 1182 error = priv_check(td, PRIV_VFS_MKNOD_WHT); 1183 break; 1184 case S_IFIFO: 1185 if (dev == 0) 1186 return (kern_mkfifoat(td, fd, path, pathseg, mode)); 1187 /* FALLTHROUGH */ 1188 default: 1189 error = EINVAL; 1190 break; 1191 } 1192 if (error != 0) 1193 return (error); 1194 restart: 1195 bwillwrite(); 1196 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1197 NOCACHE, pathseg, path, fd, cap_rights_init(&rights, CAP_MKNODAT), 1198 td); 1199 if ((error = namei(&nd)) != 0) 1200 return (error); 1201 vp = nd.ni_vp; 1202 if (vp != NULL) { 1203 NDFREE(&nd, NDF_ONLY_PNBUF); 1204 if (vp == nd.ni_dvp) 1205 vrele(nd.ni_dvp); 1206 else 1207 vput(nd.ni_dvp); 1208 vrele(vp); 1209 return (EEXIST); 1210 } else { 1211 VATTR_NULL(&vattr); 1212 vattr.va_mode = (mode & ALLPERMS) & 1213 ~td->td_proc->p_fd->fd_cmask; 1214 vattr.va_rdev = dev; 1215 whiteout = 0; 1216 1217 switch (mode & S_IFMT) { 1218 case S_IFMT: /* used by badsect to flag bad sectors */ 1219 vattr.va_type = VBAD; 1220 break; 1221 case S_IFCHR: 1222 vattr.va_type = VCHR; 1223 break; 1224 case S_IFBLK: 1225 vattr.va_type = VBLK; 1226 break; 1227 case S_IFWHT: 1228 whiteout = 1; 1229 break; 1230 default: 1231 panic("kern_mknod: invalid mode"); 1232 } 1233 } 1234 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1235 NDFREE(&nd, NDF_ONLY_PNBUF); 1236 vput(nd.ni_dvp); 1237 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1238 return (error); 1239 goto restart; 1240 } 1241 #ifdef MAC 1242 if (error == 0 && !whiteout) 1243 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, 1244 &nd.ni_cnd, &vattr); 1245 #endif 1246 if (error == 0) { 1247 if (whiteout) 1248 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1249 else { 1250 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1251 &nd.ni_cnd, &vattr); 1252 if (error == 0) 1253 vput(nd.ni_vp); 1254 } 1255 } 1256 NDFREE(&nd, NDF_ONLY_PNBUF); 1257 vput(nd.ni_dvp); 1258 vn_finished_write(mp); 1259 return (error); 1260 } 1261 1262 /* 1263 * Create a named pipe. 1264 */ 1265 #ifndef _SYS_SYSPROTO_H_ 1266 struct mkfifo_args { 1267 char *path; 1268 int mode; 1269 }; 1270 #endif 1271 int 1272 sys_mkfifo(td, uap) 1273 struct thread *td; 1274 register struct mkfifo_args /* { 1275 char *path; 1276 int mode; 1277 } */ *uap; 1278 { 1279 1280 return (kern_mkfifoat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1281 uap->mode)); 1282 } 1283 1284 #ifndef _SYS_SYSPROTO_H_ 1285 struct mkfifoat_args { 1286 int fd; 1287 char *path; 1288 mode_t mode; 1289 }; 1290 #endif 1291 int 1292 sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap) 1293 { 1294 1295 return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE, 1296 uap->mode)); 1297 } 1298 1299 int 1300 kern_mkfifoat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1301 int mode) 1302 { 1303 struct mount *mp; 1304 struct vattr vattr; 1305 struct nameidata nd; 1306 cap_rights_t rights; 1307 int error; 1308 1309 AUDIT_ARG_MODE(mode); 1310 restart: 1311 bwillwrite(); 1312 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1313 NOCACHE, pathseg, path, fd, cap_rights_init(&rights, CAP_MKFIFOAT), 1314 td); 1315 if ((error = namei(&nd)) != 0) 1316 return (error); 1317 if (nd.ni_vp != NULL) { 1318 NDFREE(&nd, NDF_ONLY_PNBUF); 1319 if (nd.ni_vp == nd.ni_dvp) 1320 vrele(nd.ni_dvp); 1321 else 1322 vput(nd.ni_dvp); 1323 vrele(nd.ni_vp); 1324 return (EEXIST); 1325 } 1326 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1327 NDFREE(&nd, NDF_ONLY_PNBUF); 1328 vput(nd.ni_dvp); 1329 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1330 return (error); 1331 goto restart; 1332 } 1333 VATTR_NULL(&vattr); 1334 vattr.va_type = VFIFO; 1335 vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask; 1336 #ifdef MAC 1337 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1338 &vattr); 1339 if (error != 0) 1340 goto out; 1341 #endif 1342 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 1343 if (error == 0) 1344 vput(nd.ni_vp); 1345 #ifdef MAC 1346 out: 1347 #endif 1348 vput(nd.ni_dvp); 1349 vn_finished_write(mp); 1350 NDFREE(&nd, NDF_ONLY_PNBUF); 1351 return (error); 1352 } 1353 1354 /* 1355 * Make a hard file link. 1356 */ 1357 #ifndef _SYS_SYSPROTO_H_ 1358 struct link_args { 1359 char *path; 1360 char *link; 1361 }; 1362 #endif 1363 int 1364 sys_link(td, uap) 1365 struct thread *td; 1366 register struct link_args /* { 1367 char *path; 1368 char *link; 1369 } */ *uap; 1370 { 1371 1372 return (kern_linkat(td, AT_FDCWD, AT_FDCWD, uap->path, uap->link, 1373 UIO_USERSPACE, FOLLOW)); 1374 } 1375 1376 #ifndef _SYS_SYSPROTO_H_ 1377 struct linkat_args { 1378 int fd1; 1379 char *path1; 1380 int fd2; 1381 char *path2; 1382 int flag; 1383 }; 1384 #endif 1385 int 1386 sys_linkat(struct thread *td, struct linkat_args *uap) 1387 { 1388 int flag; 1389 1390 flag = uap->flag; 1391 if (flag & ~AT_SYMLINK_FOLLOW) 1392 return (EINVAL); 1393 1394 return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2, 1395 UIO_USERSPACE, (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW)); 1396 } 1397 1398 int hardlink_check_uid = 0; 1399 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 1400 &hardlink_check_uid, 0, 1401 "Unprivileged processes cannot create hard links to files owned by other " 1402 "users"); 1403 static int hardlink_check_gid = 0; 1404 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 1405 &hardlink_check_gid, 0, 1406 "Unprivileged processes cannot create hard links to files owned by other " 1407 "groups"); 1408 1409 static int 1410 can_hardlink(struct vnode *vp, struct ucred *cred) 1411 { 1412 struct vattr va; 1413 int error; 1414 1415 if (!hardlink_check_uid && !hardlink_check_gid) 1416 return (0); 1417 1418 error = VOP_GETATTR(vp, &va, cred); 1419 if (error != 0) 1420 return (error); 1421 1422 if (hardlink_check_uid && cred->cr_uid != va.va_uid) { 1423 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1424 if (error != 0) 1425 return (error); 1426 } 1427 1428 if (hardlink_check_gid && !groupmember(va.va_gid, cred)) { 1429 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1430 if (error != 0) 1431 return (error); 1432 } 1433 1434 return (0); 1435 } 1436 1437 int 1438 kern_linkat(struct thread *td, int fd1, int fd2, char *path1, char *path2, 1439 enum uio_seg segflg, int follow) 1440 { 1441 struct vnode *vp; 1442 struct mount *mp; 1443 struct nameidata nd; 1444 cap_rights_t rights; 1445 int error; 1446 1447 again: 1448 bwillwrite(); 1449 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, segflg, path1, fd1, 1450 cap_rights_init(&rights, CAP_LINKAT_SOURCE), td); 1451 1452 if ((error = namei(&nd)) != 0) 1453 return (error); 1454 NDFREE(&nd, NDF_ONLY_PNBUF); 1455 vp = nd.ni_vp; 1456 if (vp->v_type == VDIR) { 1457 vrele(vp); 1458 return (EPERM); /* POSIX */ 1459 } 1460 NDINIT_ATRIGHTS(&nd, CREATE, 1461 LOCKPARENT | SAVENAME | AUDITVNODE2 | NOCACHE, segflg, path2, fd2, 1462 cap_rights_init(&rights, CAP_LINKAT_TARGET), td); 1463 if ((error = namei(&nd)) == 0) { 1464 if (nd.ni_vp != NULL) { 1465 NDFREE(&nd, NDF_ONLY_PNBUF); 1466 if (nd.ni_dvp == nd.ni_vp) 1467 vrele(nd.ni_dvp); 1468 else 1469 vput(nd.ni_dvp); 1470 vrele(nd.ni_vp); 1471 vrele(vp); 1472 return (EEXIST); 1473 } else if (nd.ni_dvp->v_mount != vp->v_mount) { 1474 /* 1475 * Cross-device link. No need to recheck 1476 * vp->v_type, since it cannot change, except 1477 * to VBAD. 1478 */ 1479 NDFREE(&nd, NDF_ONLY_PNBUF); 1480 vput(nd.ni_dvp); 1481 vrele(vp); 1482 return (EXDEV); 1483 } else if ((error = vn_lock(vp, LK_EXCLUSIVE)) == 0) { 1484 error = can_hardlink(vp, td->td_ucred); 1485 #ifdef MAC 1486 if (error == 0) 1487 error = mac_vnode_check_link(td->td_ucred, 1488 nd.ni_dvp, vp, &nd.ni_cnd); 1489 #endif 1490 if (error != 0) { 1491 vput(vp); 1492 vput(nd.ni_dvp); 1493 NDFREE(&nd, NDF_ONLY_PNBUF); 1494 return (error); 1495 } 1496 error = vn_start_write(vp, &mp, V_NOWAIT); 1497 if (error != 0) { 1498 vput(vp); 1499 vput(nd.ni_dvp); 1500 NDFREE(&nd, NDF_ONLY_PNBUF); 1501 error = vn_start_write(NULL, &mp, 1502 V_XSLEEP | PCATCH); 1503 if (error != 0) 1504 return (error); 1505 goto again; 1506 } 1507 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 1508 VOP_UNLOCK(vp, 0); 1509 vput(nd.ni_dvp); 1510 vn_finished_write(mp); 1511 NDFREE(&nd, NDF_ONLY_PNBUF); 1512 } else { 1513 vput(nd.ni_dvp); 1514 NDFREE(&nd, NDF_ONLY_PNBUF); 1515 vrele(vp); 1516 goto again; 1517 } 1518 } 1519 vrele(vp); 1520 return (error); 1521 } 1522 1523 /* 1524 * Make a symbolic link. 1525 */ 1526 #ifndef _SYS_SYSPROTO_H_ 1527 struct symlink_args { 1528 char *path; 1529 char *link; 1530 }; 1531 #endif 1532 int 1533 sys_symlink(td, uap) 1534 struct thread *td; 1535 register struct symlink_args /* { 1536 char *path; 1537 char *link; 1538 } */ *uap; 1539 { 1540 1541 return (kern_symlinkat(td, uap->path, AT_FDCWD, uap->link, 1542 UIO_USERSPACE)); 1543 } 1544 1545 #ifndef _SYS_SYSPROTO_H_ 1546 struct symlinkat_args { 1547 char *path; 1548 int fd; 1549 char *path2; 1550 }; 1551 #endif 1552 int 1553 sys_symlinkat(struct thread *td, struct symlinkat_args *uap) 1554 { 1555 1556 return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2, 1557 UIO_USERSPACE)); 1558 } 1559 1560 int 1561 kern_symlinkat(struct thread *td, char *path1, int fd, char *path2, 1562 enum uio_seg segflg) 1563 { 1564 struct mount *mp; 1565 struct vattr vattr; 1566 char *syspath; 1567 struct nameidata nd; 1568 int error; 1569 cap_rights_t rights; 1570 1571 if (segflg == UIO_SYSSPACE) { 1572 syspath = path1; 1573 } else { 1574 syspath = uma_zalloc(namei_zone, M_WAITOK); 1575 if ((error = copyinstr(path1, syspath, MAXPATHLEN, NULL)) != 0) 1576 goto out; 1577 } 1578 AUDIT_ARG_TEXT(syspath); 1579 restart: 1580 bwillwrite(); 1581 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1582 NOCACHE, segflg, path2, fd, cap_rights_init(&rights, CAP_SYMLINKAT), 1583 td); 1584 if ((error = namei(&nd)) != 0) 1585 goto out; 1586 if (nd.ni_vp) { 1587 NDFREE(&nd, NDF_ONLY_PNBUF); 1588 if (nd.ni_vp == nd.ni_dvp) 1589 vrele(nd.ni_dvp); 1590 else 1591 vput(nd.ni_dvp); 1592 vrele(nd.ni_vp); 1593 error = EEXIST; 1594 goto out; 1595 } 1596 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1597 NDFREE(&nd, NDF_ONLY_PNBUF); 1598 vput(nd.ni_dvp); 1599 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1600 goto out; 1601 goto restart; 1602 } 1603 VATTR_NULL(&vattr); 1604 vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask; 1605 #ifdef MAC 1606 vattr.va_type = VLNK; 1607 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1608 &vattr); 1609 if (error != 0) 1610 goto out2; 1611 #endif 1612 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath); 1613 if (error == 0) 1614 vput(nd.ni_vp); 1615 #ifdef MAC 1616 out2: 1617 #endif 1618 NDFREE(&nd, NDF_ONLY_PNBUF); 1619 vput(nd.ni_dvp); 1620 vn_finished_write(mp); 1621 out: 1622 if (segflg != UIO_SYSSPACE) 1623 uma_zfree(namei_zone, syspath); 1624 return (error); 1625 } 1626 1627 /* 1628 * Delete a whiteout from the filesystem. 1629 */ 1630 int 1631 sys_undelete(td, uap) 1632 struct thread *td; 1633 register struct undelete_args /* { 1634 char *path; 1635 } */ *uap; 1636 { 1637 struct mount *mp; 1638 struct nameidata nd; 1639 int error; 1640 1641 restart: 1642 bwillwrite(); 1643 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | AUDITVNODE1, 1644 UIO_USERSPACE, uap->path, td); 1645 error = namei(&nd); 1646 if (error != 0) 1647 return (error); 1648 1649 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 1650 NDFREE(&nd, NDF_ONLY_PNBUF); 1651 if (nd.ni_vp == nd.ni_dvp) 1652 vrele(nd.ni_dvp); 1653 else 1654 vput(nd.ni_dvp); 1655 if (nd.ni_vp) 1656 vrele(nd.ni_vp); 1657 return (EEXIST); 1658 } 1659 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1660 NDFREE(&nd, NDF_ONLY_PNBUF); 1661 vput(nd.ni_dvp); 1662 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1663 return (error); 1664 goto restart; 1665 } 1666 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE); 1667 NDFREE(&nd, NDF_ONLY_PNBUF); 1668 vput(nd.ni_dvp); 1669 vn_finished_write(mp); 1670 return (error); 1671 } 1672 1673 /* 1674 * Delete a name from the filesystem. 1675 */ 1676 #ifndef _SYS_SYSPROTO_H_ 1677 struct unlink_args { 1678 char *path; 1679 }; 1680 #endif 1681 int 1682 sys_unlink(td, uap) 1683 struct thread *td; 1684 struct unlink_args /* { 1685 char *path; 1686 } */ *uap; 1687 { 1688 1689 return (kern_unlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 0)); 1690 } 1691 1692 #ifndef _SYS_SYSPROTO_H_ 1693 struct unlinkat_args { 1694 int fd; 1695 char *path; 1696 int flag; 1697 }; 1698 #endif 1699 int 1700 sys_unlinkat(struct thread *td, struct unlinkat_args *uap) 1701 { 1702 int flag = uap->flag; 1703 int fd = uap->fd; 1704 char *path = uap->path; 1705 1706 if (flag & ~AT_REMOVEDIR) 1707 return (EINVAL); 1708 1709 if (flag & AT_REMOVEDIR) 1710 return (kern_rmdirat(td, fd, path, UIO_USERSPACE)); 1711 else 1712 return (kern_unlinkat(td, fd, path, UIO_USERSPACE, 0)); 1713 } 1714 1715 int 1716 kern_unlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1717 ino_t oldinum) 1718 { 1719 struct mount *mp; 1720 struct vnode *vp; 1721 struct nameidata nd; 1722 struct stat sb; 1723 cap_rights_t rights; 1724 int error; 1725 1726 restart: 1727 bwillwrite(); 1728 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 1729 pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td); 1730 if ((error = namei(&nd)) != 0) 1731 return (error == EINVAL ? EPERM : error); 1732 vp = nd.ni_vp; 1733 if (vp->v_type == VDIR && oldinum == 0) { 1734 error = EPERM; /* POSIX */ 1735 } else if (oldinum != 0 && 1736 ((error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td)) == 0) && 1737 sb.st_ino != oldinum) { 1738 error = EIDRM; /* Identifier removed */ 1739 } else { 1740 /* 1741 * The root of a mounted filesystem cannot be deleted. 1742 * 1743 * XXX: can this only be a VDIR case? 1744 */ 1745 if (vp->v_vflag & VV_ROOT) 1746 error = EBUSY; 1747 } 1748 if (error == 0) { 1749 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1750 NDFREE(&nd, NDF_ONLY_PNBUF); 1751 vput(nd.ni_dvp); 1752 if (vp == nd.ni_dvp) 1753 vrele(vp); 1754 else 1755 vput(vp); 1756 if ((error = vn_start_write(NULL, &mp, 1757 V_XSLEEP | PCATCH)) != 0) 1758 return (error); 1759 goto restart; 1760 } 1761 #ifdef MAC 1762 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 1763 &nd.ni_cnd); 1764 if (error != 0) 1765 goto out; 1766 #endif 1767 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 1768 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 1769 #ifdef MAC 1770 out: 1771 #endif 1772 vn_finished_write(mp); 1773 } 1774 NDFREE(&nd, NDF_ONLY_PNBUF); 1775 vput(nd.ni_dvp); 1776 if (vp == nd.ni_dvp) 1777 vrele(vp); 1778 else 1779 vput(vp); 1780 return (error); 1781 } 1782 1783 /* 1784 * Reposition read/write file offset. 1785 */ 1786 #ifndef _SYS_SYSPROTO_H_ 1787 struct lseek_args { 1788 int fd; 1789 int pad; 1790 off_t offset; 1791 int whence; 1792 }; 1793 #endif 1794 int 1795 sys_lseek(td, uap) 1796 struct thread *td; 1797 register struct lseek_args /* { 1798 int fd; 1799 int pad; 1800 off_t offset; 1801 int whence; 1802 } */ *uap; 1803 { 1804 struct file *fp; 1805 cap_rights_t rights; 1806 int error; 1807 1808 AUDIT_ARG_FD(uap->fd); 1809 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_SEEK), &fp); 1810 if (error != 0) 1811 return (error); 1812 error = (fp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0 ? 1813 fo_seek(fp, uap->offset, uap->whence, td) : ESPIPE; 1814 fdrop(fp, td); 1815 return (error); 1816 } 1817 1818 #if defined(COMPAT_43) 1819 /* 1820 * Reposition read/write file offset. 1821 */ 1822 #ifndef _SYS_SYSPROTO_H_ 1823 struct olseek_args { 1824 int fd; 1825 long offset; 1826 int whence; 1827 }; 1828 #endif 1829 int 1830 olseek(td, uap) 1831 struct thread *td; 1832 register struct olseek_args /* { 1833 int fd; 1834 long offset; 1835 int whence; 1836 } */ *uap; 1837 { 1838 struct lseek_args /* { 1839 int fd; 1840 int pad; 1841 off_t offset; 1842 int whence; 1843 } */ nuap; 1844 1845 nuap.fd = uap->fd; 1846 nuap.offset = uap->offset; 1847 nuap.whence = uap->whence; 1848 return (sys_lseek(td, &nuap)); 1849 } 1850 #endif /* COMPAT_43 */ 1851 1852 #if defined(COMPAT_FREEBSD6) 1853 /* Version with the 'pad' argument */ 1854 int 1855 freebsd6_lseek(td, uap) 1856 struct thread *td; 1857 register struct freebsd6_lseek_args *uap; 1858 { 1859 struct lseek_args ouap; 1860 1861 ouap.fd = uap->fd; 1862 ouap.offset = uap->offset; 1863 ouap.whence = uap->whence; 1864 return (sys_lseek(td, &ouap)); 1865 } 1866 #endif 1867 1868 /* 1869 * Check access permissions using passed credentials. 1870 */ 1871 static int 1872 vn_access(vp, user_flags, cred, td) 1873 struct vnode *vp; 1874 int user_flags; 1875 struct ucred *cred; 1876 struct thread *td; 1877 { 1878 accmode_t accmode; 1879 int error; 1880 1881 /* Flags == 0 means only check for existence. */ 1882 if (user_flags == 0) 1883 return (0); 1884 1885 accmode = 0; 1886 if (user_flags & R_OK) 1887 accmode |= VREAD; 1888 if (user_flags & W_OK) 1889 accmode |= VWRITE; 1890 if (user_flags & X_OK) 1891 accmode |= VEXEC; 1892 #ifdef MAC 1893 error = mac_vnode_check_access(cred, vp, accmode); 1894 if (error != 0) 1895 return (error); 1896 #endif 1897 if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) 1898 error = VOP_ACCESS(vp, accmode, cred, td); 1899 return (error); 1900 } 1901 1902 /* 1903 * Check access permissions using "real" credentials. 1904 */ 1905 #ifndef _SYS_SYSPROTO_H_ 1906 struct access_args { 1907 char *path; 1908 int amode; 1909 }; 1910 #endif 1911 int 1912 sys_access(td, uap) 1913 struct thread *td; 1914 register struct access_args /* { 1915 char *path; 1916 int amode; 1917 } */ *uap; 1918 { 1919 1920 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1921 0, uap->amode)); 1922 } 1923 1924 #ifndef _SYS_SYSPROTO_H_ 1925 struct faccessat_args { 1926 int dirfd; 1927 char *path; 1928 int amode; 1929 int flag; 1930 } 1931 #endif 1932 int 1933 sys_faccessat(struct thread *td, struct faccessat_args *uap) 1934 { 1935 1936 return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 1937 uap->amode)); 1938 } 1939 1940 int 1941 kern_accessat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1942 int flag, int amode) 1943 { 1944 struct ucred *cred, *usecred; 1945 struct vnode *vp; 1946 struct nameidata nd; 1947 cap_rights_t rights; 1948 int error; 1949 1950 if (flag & ~AT_EACCESS) 1951 return (EINVAL); 1952 if (amode != F_OK && (amode & ~(R_OK | W_OK | X_OK)) != 0) 1953 return (EINVAL); 1954 1955 /* 1956 * Create and modify a temporary credential instead of one that 1957 * is potentially shared (if we need one). 1958 */ 1959 cred = td->td_ucred; 1960 if ((flag & AT_EACCESS) == 0 && 1961 ((cred->cr_uid != cred->cr_ruid || 1962 cred->cr_rgid != cred->cr_groups[0]))) { 1963 usecred = crdup(cred); 1964 usecred->cr_uid = cred->cr_ruid; 1965 usecred->cr_groups[0] = cred->cr_rgid; 1966 td->td_ucred = usecred; 1967 } else 1968 usecred = cred; 1969 AUDIT_ARG_VALUE(amode); 1970 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | 1971 AUDITVNODE1, pathseg, path, fd, cap_rights_init(&rights, CAP_FSTAT), 1972 td); 1973 if ((error = namei(&nd)) != 0) 1974 goto out; 1975 vp = nd.ni_vp; 1976 1977 error = vn_access(vp, amode, usecred, td); 1978 NDFREE(&nd, NDF_ONLY_PNBUF); 1979 vput(vp); 1980 out: 1981 if (usecred != cred) { 1982 td->td_ucred = cred; 1983 crfree(usecred); 1984 } 1985 return (error); 1986 } 1987 1988 /* 1989 * Check access permissions using "effective" credentials. 1990 */ 1991 #ifndef _SYS_SYSPROTO_H_ 1992 struct eaccess_args { 1993 char *path; 1994 int amode; 1995 }; 1996 #endif 1997 int 1998 sys_eaccess(td, uap) 1999 struct thread *td; 2000 register struct eaccess_args /* { 2001 char *path; 2002 int amode; 2003 } */ *uap; 2004 { 2005 2006 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2007 AT_EACCESS, uap->amode)); 2008 } 2009 2010 #if defined(COMPAT_43) 2011 /* 2012 * Get file status; this version follows links. 2013 */ 2014 #ifndef _SYS_SYSPROTO_H_ 2015 struct ostat_args { 2016 char *path; 2017 struct ostat *ub; 2018 }; 2019 #endif 2020 int 2021 ostat(td, uap) 2022 struct thread *td; 2023 register struct ostat_args /* { 2024 char *path; 2025 struct ostat *ub; 2026 } */ *uap; 2027 { 2028 struct stat sb; 2029 struct ostat osb; 2030 int error; 2031 2032 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2033 &sb, NULL); 2034 if (error != 0) 2035 return (error); 2036 cvtstat(&sb, &osb); 2037 return (copyout(&osb, uap->ub, sizeof (osb))); 2038 } 2039 2040 /* 2041 * Get file status; this version does not follow links. 2042 */ 2043 #ifndef _SYS_SYSPROTO_H_ 2044 struct olstat_args { 2045 char *path; 2046 struct ostat *ub; 2047 }; 2048 #endif 2049 int 2050 olstat(td, uap) 2051 struct thread *td; 2052 register struct olstat_args /* { 2053 char *path; 2054 struct ostat *ub; 2055 } */ *uap; 2056 { 2057 struct stat sb; 2058 struct ostat osb; 2059 int error; 2060 2061 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2062 UIO_USERSPACE, &sb, NULL); 2063 if (error != 0) 2064 return (error); 2065 cvtstat(&sb, &osb); 2066 return (copyout(&osb, uap->ub, sizeof (osb))); 2067 } 2068 2069 /* 2070 * Convert from an old to a new stat structure. 2071 */ 2072 void 2073 cvtstat(st, ost) 2074 struct stat *st; 2075 struct ostat *ost; 2076 { 2077 2078 ost->st_dev = st->st_dev; 2079 ost->st_ino = st->st_ino; 2080 ost->st_mode = st->st_mode; 2081 ost->st_nlink = st->st_nlink; 2082 ost->st_uid = st->st_uid; 2083 ost->st_gid = st->st_gid; 2084 ost->st_rdev = st->st_rdev; 2085 if (st->st_size < (quad_t)1 << 32) 2086 ost->st_size = st->st_size; 2087 else 2088 ost->st_size = -2; 2089 ost->st_atim = st->st_atim; 2090 ost->st_mtim = st->st_mtim; 2091 ost->st_ctim = st->st_ctim; 2092 ost->st_blksize = st->st_blksize; 2093 ost->st_blocks = st->st_blocks; 2094 ost->st_flags = st->st_flags; 2095 ost->st_gen = st->st_gen; 2096 } 2097 #endif /* COMPAT_43 */ 2098 2099 /* 2100 * Get file status; this version follows links. 2101 */ 2102 #ifndef _SYS_SYSPROTO_H_ 2103 struct stat_args { 2104 char *path; 2105 struct stat *ub; 2106 }; 2107 #endif 2108 int 2109 sys_stat(td, uap) 2110 struct thread *td; 2111 register struct stat_args /* { 2112 char *path; 2113 struct stat *ub; 2114 } */ *uap; 2115 { 2116 struct stat sb; 2117 int error; 2118 2119 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2120 &sb, NULL); 2121 if (error == 0) 2122 error = copyout(&sb, uap->ub, sizeof (sb)); 2123 return (error); 2124 } 2125 2126 #ifndef _SYS_SYSPROTO_H_ 2127 struct fstatat_args { 2128 int fd; 2129 char *path; 2130 struct stat *buf; 2131 int flag; 2132 } 2133 #endif 2134 int 2135 sys_fstatat(struct thread *td, struct fstatat_args *uap) 2136 { 2137 struct stat sb; 2138 int error; 2139 2140 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2141 UIO_USERSPACE, &sb, NULL); 2142 if (error == 0) 2143 error = copyout(&sb, uap->buf, sizeof (sb)); 2144 return (error); 2145 } 2146 2147 int 2148 kern_statat(struct thread *td, int flag, int fd, char *path, 2149 enum uio_seg pathseg, struct stat *sbp, 2150 void (*hook)(struct vnode *vp, struct stat *sbp)) 2151 { 2152 struct nameidata nd; 2153 struct stat sb; 2154 cap_rights_t rights; 2155 int error; 2156 2157 if (flag & ~AT_SYMLINK_NOFOLLOW) 2158 return (EINVAL); 2159 2160 NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : 2161 FOLLOW) | LOCKSHARED | LOCKLEAF | AUDITVNODE1, pathseg, path, fd, 2162 cap_rights_init(&rights, CAP_FSTAT), td); 2163 2164 if ((error = namei(&nd)) != 0) 2165 return (error); 2166 error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td); 2167 if (error == 0) { 2168 SDT_PROBE2(vfs, , stat, mode, path, sb.st_mode); 2169 if (S_ISREG(sb.st_mode)) 2170 SDT_PROBE2(vfs, , stat, reg, path, pathseg); 2171 if (__predict_false(hook != NULL)) 2172 hook(nd.ni_vp, &sb); 2173 } 2174 NDFREE(&nd, NDF_ONLY_PNBUF); 2175 vput(nd.ni_vp); 2176 if (error != 0) 2177 return (error); 2178 *sbp = sb; 2179 #ifdef KTRACE 2180 if (KTRPOINT(td, KTR_STRUCT)) 2181 ktrstat(&sb); 2182 #endif 2183 return (0); 2184 } 2185 2186 /* 2187 * Get file status; this version does not follow links. 2188 */ 2189 #ifndef _SYS_SYSPROTO_H_ 2190 struct lstat_args { 2191 char *path; 2192 struct stat *ub; 2193 }; 2194 #endif 2195 int 2196 sys_lstat(td, uap) 2197 struct thread *td; 2198 register struct lstat_args /* { 2199 char *path; 2200 struct stat *ub; 2201 } */ *uap; 2202 { 2203 struct stat sb; 2204 int error; 2205 2206 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2207 UIO_USERSPACE, &sb, NULL); 2208 if (error == 0) 2209 error = copyout(&sb, uap->ub, sizeof (sb)); 2210 return (error); 2211 } 2212 2213 /* 2214 * Implementation of the NetBSD [l]stat() functions. 2215 */ 2216 void 2217 cvtnstat(sb, nsb) 2218 struct stat *sb; 2219 struct nstat *nsb; 2220 { 2221 2222 bzero(nsb, sizeof *nsb); 2223 nsb->st_dev = sb->st_dev; 2224 nsb->st_ino = sb->st_ino; 2225 nsb->st_mode = sb->st_mode; 2226 nsb->st_nlink = sb->st_nlink; 2227 nsb->st_uid = sb->st_uid; 2228 nsb->st_gid = sb->st_gid; 2229 nsb->st_rdev = sb->st_rdev; 2230 nsb->st_atim = sb->st_atim; 2231 nsb->st_mtim = sb->st_mtim; 2232 nsb->st_ctim = sb->st_ctim; 2233 nsb->st_size = sb->st_size; 2234 nsb->st_blocks = sb->st_blocks; 2235 nsb->st_blksize = sb->st_blksize; 2236 nsb->st_flags = sb->st_flags; 2237 nsb->st_gen = sb->st_gen; 2238 nsb->st_birthtim = sb->st_birthtim; 2239 } 2240 2241 #ifndef _SYS_SYSPROTO_H_ 2242 struct nstat_args { 2243 char *path; 2244 struct nstat *ub; 2245 }; 2246 #endif 2247 int 2248 sys_nstat(td, uap) 2249 struct thread *td; 2250 register struct nstat_args /* { 2251 char *path; 2252 struct nstat *ub; 2253 } */ *uap; 2254 { 2255 struct stat sb; 2256 struct nstat nsb; 2257 int error; 2258 2259 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2260 &sb, NULL); 2261 if (error != 0) 2262 return (error); 2263 cvtnstat(&sb, &nsb); 2264 return (copyout(&nsb, uap->ub, sizeof (nsb))); 2265 } 2266 2267 /* 2268 * NetBSD lstat. Get file status; this version does not follow links. 2269 */ 2270 #ifndef _SYS_SYSPROTO_H_ 2271 struct lstat_args { 2272 char *path; 2273 struct stat *ub; 2274 }; 2275 #endif 2276 int 2277 sys_nlstat(td, uap) 2278 struct thread *td; 2279 register struct nlstat_args /* { 2280 char *path; 2281 struct nstat *ub; 2282 } */ *uap; 2283 { 2284 struct stat sb; 2285 struct nstat nsb; 2286 int error; 2287 2288 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2289 UIO_USERSPACE, &sb, NULL); 2290 if (error != 0) 2291 return (error); 2292 cvtnstat(&sb, &nsb); 2293 return (copyout(&nsb, uap->ub, sizeof (nsb))); 2294 } 2295 2296 /* 2297 * Get configurable pathname variables. 2298 */ 2299 #ifndef _SYS_SYSPROTO_H_ 2300 struct pathconf_args { 2301 char *path; 2302 int name; 2303 }; 2304 #endif 2305 int 2306 sys_pathconf(td, uap) 2307 struct thread *td; 2308 register struct pathconf_args /* { 2309 char *path; 2310 int name; 2311 } */ *uap; 2312 { 2313 2314 return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW)); 2315 } 2316 2317 #ifndef _SYS_SYSPROTO_H_ 2318 struct lpathconf_args { 2319 char *path; 2320 int name; 2321 }; 2322 #endif 2323 int 2324 sys_lpathconf(td, uap) 2325 struct thread *td; 2326 register struct lpathconf_args /* { 2327 char *path; 2328 int name; 2329 } */ *uap; 2330 { 2331 2332 return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, 2333 NOFOLLOW)); 2334 } 2335 2336 int 2337 kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name, 2338 u_long flags) 2339 { 2340 struct nameidata nd; 2341 int error; 2342 2343 NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | AUDITVNODE1 | flags, 2344 pathseg, path, td); 2345 if ((error = namei(&nd)) != 0) 2346 return (error); 2347 NDFREE(&nd, NDF_ONLY_PNBUF); 2348 2349 /* If asynchronous I/O is available, it works for all files. */ 2350 if (name == _PC_ASYNC_IO) 2351 td->td_retval[0] = async_io_version; 2352 else 2353 error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval); 2354 vput(nd.ni_vp); 2355 return (error); 2356 } 2357 2358 /* 2359 * Return target name of a symbolic link. 2360 */ 2361 #ifndef _SYS_SYSPROTO_H_ 2362 struct readlink_args { 2363 char *path; 2364 char *buf; 2365 size_t count; 2366 }; 2367 #endif 2368 int 2369 sys_readlink(td, uap) 2370 struct thread *td; 2371 register struct readlink_args /* { 2372 char *path; 2373 char *buf; 2374 size_t count; 2375 } */ *uap; 2376 { 2377 2378 return (kern_readlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2379 uap->buf, UIO_USERSPACE, uap->count)); 2380 } 2381 #ifndef _SYS_SYSPROTO_H_ 2382 struct readlinkat_args { 2383 int fd; 2384 char *path; 2385 char *buf; 2386 size_t bufsize; 2387 }; 2388 #endif 2389 int 2390 sys_readlinkat(struct thread *td, struct readlinkat_args *uap) 2391 { 2392 2393 return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE, 2394 uap->buf, UIO_USERSPACE, uap->bufsize)); 2395 } 2396 2397 int 2398 kern_readlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2399 char *buf, enum uio_seg bufseg, size_t count) 2400 { 2401 struct vnode *vp; 2402 struct iovec aiov; 2403 struct uio auio; 2404 struct nameidata nd; 2405 int error; 2406 2407 if (count > IOSIZE_MAX) 2408 return (EINVAL); 2409 2410 NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 2411 pathseg, path, fd, td); 2412 2413 if ((error = namei(&nd)) != 0) 2414 return (error); 2415 NDFREE(&nd, NDF_ONLY_PNBUF); 2416 vp = nd.ni_vp; 2417 #ifdef MAC 2418 error = mac_vnode_check_readlink(td->td_ucred, vp); 2419 if (error != 0) { 2420 vput(vp); 2421 return (error); 2422 } 2423 #endif 2424 if (vp->v_type != VLNK) 2425 error = EINVAL; 2426 else { 2427 aiov.iov_base = buf; 2428 aiov.iov_len = count; 2429 auio.uio_iov = &aiov; 2430 auio.uio_iovcnt = 1; 2431 auio.uio_offset = 0; 2432 auio.uio_rw = UIO_READ; 2433 auio.uio_segflg = bufseg; 2434 auio.uio_td = td; 2435 auio.uio_resid = count; 2436 error = VOP_READLINK(vp, &auio, td->td_ucred); 2437 td->td_retval[0] = count - auio.uio_resid; 2438 } 2439 vput(vp); 2440 return (error); 2441 } 2442 2443 /* 2444 * Common implementation code for chflags() and fchflags(). 2445 */ 2446 static int 2447 setfflags(td, vp, flags) 2448 struct thread *td; 2449 struct vnode *vp; 2450 u_long flags; 2451 { 2452 struct mount *mp; 2453 struct vattr vattr; 2454 int error; 2455 2456 /* We can't support the value matching VNOVAL. */ 2457 if (flags == VNOVAL) 2458 return (EOPNOTSUPP); 2459 2460 /* 2461 * Prevent non-root users from setting flags on devices. When 2462 * a device is reused, users can retain ownership of the device 2463 * if they are allowed to set flags and programs assume that 2464 * chown can't fail when done as root. 2465 */ 2466 if (vp->v_type == VCHR || vp->v_type == VBLK) { 2467 error = priv_check(td, PRIV_VFS_CHFLAGS_DEV); 2468 if (error != 0) 2469 return (error); 2470 } 2471 2472 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2473 return (error); 2474 VATTR_NULL(&vattr); 2475 vattr.va_flags = flags; 2476 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2477 #ifdef MAC 2478 error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags); 2479 if (error == 0) 2480 #endif 2481 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 2482 VOP_UNLOCK(vp, 0); 2483 vn_finished_write(mp); 2484 return (error); 2485 } 2486 2487 /* 2488 * Change flags of a file given a path name. 2489 */ 2490 #ifndef _SYS_SYSPROTO_H_ 2491 struct chflags_args { 2492 const char *path; 2493 u_long flags; 2494 }; 2495 #endif 2496 int 2497 sys_chflags(td, uap) 2498 struct thread *td; 2499 register struct chflags_args /* { 2500 const char *path; 2501 u_long flags; 2502 } */ *uap; 2503 { 2504 2505 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2506 uap->flags, 0)); 2507 } 2508 2509 #ifndef _SYS_SYSPROTO_H_ 2510 struct chflagsat_args { 2511 int fd; 2512 const char *path; 2513 u_long flags; 2514 int atflag; 2515 } 2516 #endif 2517 int 2518 sys_chflagsat(struct thread *td, struct chflagsat_args *uap) 2519 { 2520 int fd = uap->fd; 2521 const char *path = uap->path; 2522 u_long flags = uap->flags; 2523 int atflag = uap->atflag; 2524 2525 if (atflag & ~AT_SYMLINK_NOFOLLOW) 2526 return (EINVAL); 2527 2528 return (kern_chflagsat(td, fd, path, UIO_USERSPACE, flags, atflag)); 2529 } 2530 2531 /* 2532 * Same as chflags() but doesn't follow symlinks. 2533 */ 2534 int 2535 sys_lchflags(td, uap) 2536 struct thread *td; 2537 register struct lchflags_args /* { 2538 const char *path; 2539 u_long flags; 2540 } */ *uap; 2541 { 2542 2543 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2544 uap->flags, AT_SYMLINK_NOFOLLOW)); 2545 } 2546 2547 static int 2548 kern_chflagsat(struct thread *td, int fd, const char *path, 2549 enum uio_seg pathseg, u_long flags, int atflag) 2550 { 2551 struct nameidata nd; 2552 cap_rights_t rights; 2553 int error, follow; 2554 2555 AUDIT_ARG_FFLAGS(flags); 2556 follow = (atflag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2557 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2558 cap_rights_init(&rights, CAP_FCHFLAGS), td); 2559 if ((error = namei(&nd)) != 0) 2560 return (error); 2561 NDFREE(&nd, NDF_ONLY_PNBUF); 2562 error = setfflags(td, nd.ni_vp, flags); 2563 vrele(nd.ni_vp); 2564 return (error); 2565 } 2566 2567 /* 2568 * Change flags of a file given a file descriptor. 2569 */ 2570 #ifndef _SYS_SYSPROTO_H_ 2571 struct fchflags_args { 2572 int fd; 2573 u_long flags; 2574 }; 2575 #endif 2576 int 2577 sys_fchflags(td, uap) 2578 struct thread *td; 2579 register struct fchflags_args /* { 2580 int fd; 2581 u_long flags; 2582 } */ *uap; 2583 { 2584 struct file *fp; 2585 cap_rights_t rights; 2586 int error; 2587 2588 AUDIT_ARG_FD(uap->fd); 2589 AUDIT_ARG_FFLAGS(uap->flags); 2590 error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_FCHFLAGS), 2591 &fp); 2592 if (error != 0) 2593 return (error); 2594 #ifdef AUDIT 2595 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 2596 AUDIT_ARG_VNODE1(fp->f_vnode); 2597 VOP_UNLOCK(fp->f_vnode, 0); 2598 #endif 2599 error = setfflags(td, fp->f_vnode, uap->flags); 2600 fdrop(fp, td); 2601 return (error); 2602 } 2603 2604 /* 2605 * Common implementation code for chmod(), lchmod() and fchmod(). 2606 */ 2607 int 2608 setfmode(td, cred, vp, mode) 2609 struct thread *td; 2610 struct ucred *cred; 2611 struct vnode *vp; 2612 int mode; 2613 { 2614 struct mount *mp; 2615 struct vattr vattr; 2616 int error; 2617 2618 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2619 return (error); 2620 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2621 VATTR_NULL(&vattr); 2622 vattr.va_mode = mode & ALLPERMS; 2623 #ifdef MAC 2624 error = mac_vnode_check_setmode(cred, vp, vattr.va_mode); 2625 if (error == 0) 2626 #endif 2627 error = VOP_SETATTR(vp, &vattr, cred); 2628 VOP_UNLOCK(vp, 0); 2629 vn_finished_write(mp); 2630 return (error); 2631 } 2632 2633 /* 2634 * Change mode of a file given path name. 2635 */ 2636 #ifndef _SYS_SYSPROTO_H_ 2637 struct chmod_args { 2638 char *path; 2639 int mode; 2640 }; 2641 #endif 2642 int 2643 sys_chmod(td, uap) 2644 struct thread *td; 2645 register struct chmod_args /* { 2646 char *path; 2647 int mode; 2648 } */ *uap; 2649 { 2650 2651 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2652 uap->mode, 0)); 2653 } 2654 2655 #ifndef _SYS_SYSPROTO_H_ 2656 struct fchmodat_args { 2657 int dirfd; 2658 char *path; 2659 mode_t mode; 2660 int flag; 2661 } 2662 #endif 2663 int 2664 sys_fchmodat(struct thread *td, struct fchmodat_args *uap) 2665 { 2666 int flag = uap->flag; 2667 int fd = uap->fd; 2668 char *path = uap->path; 2669 mode_t mode = uap->mode; 2670 2671 if (flag & ~AT_SYMLINK_NOFOLLOW) 2672 return (EINVAL); 2673 2674 return (kern_fchmodat(td, fd, path, UIO_USERSPACE, mode, flag)); 2675 } 2676 2677 /* 2678 * Change mode of a file given path name (don't follow links.) 2679 */ 2680 #ifndef _SYS_SYSPROTO_H_ 2681 struct lchmod_args { 2682 char *path; 2683 int mode; 2684 }; 2685 #endif 2686 int 2687 sys_lchmod(td, uap) 2688 struct thread *td; 2689 register struct lchmod_args /* { 2690 char *path; 2691 int mode; 2692 } */ *uap; 2693 { 2694 2695 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2696 uap->mode, AT_SYMLINK_NOFOLLOW)); 2697 } 2698 2699 int 2700 kern_fchmodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2701 mode_t mode, int flag) 2702 { 2703 struct nameidata nd; 2704 cap_rights_t rights; 2705 int error, follow; 2706 2707 AUDIT_ARG_MODE(mode); 2708 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2709 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2710 cap_rights_init(&rights, CAP_FCHMOD), td); 2711 if ((error = namei(&nd)) != 0) 2712 return (error); 2713 NDFREE(&nd, NDF_ONLY_PNBUF); 2714 error = setfmode(td, td->td_ucred, nd.ni_vp, mode); 2715 vrele(nd.ni_vp); 2716 return (error); 2717 } 2718 2719 /* 2720 * Change mode of a file given a file descriptor. 2721 */ 2722 #ifndef _SYS_SYSPROTO_H_ 2723 struct fchmod_args { 2724 int fd; 2725 int mode; 2726 }; 2727 #endif 2728 int 2729 sys_fchmod(struct thread *td, struct fchmod_args *uap) 2730 { 2731 struct file *fp; 2732 cap_rights_t rights; 2733 int error; 2734 2735 AUDIT_ARG_FD(uap->fd); 2736 AUDIT_ARG_MODE(uap->mode); 2737 2738 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHMOD), &fp); 2739 if (error != 0) 2740 return (error); 2741 error = fo_chmod(fp, uap->mode, td->td_ucred, td); 2742 fdrop(fp, td); 2743 return (error); 2744 } 2745 2746 /* 2747 * Common implementation for chown(), lchown(), and fchown() 2748 */ 2749 int 2750 setfown(td, cred, vp, uid, gid) 2751 struct thread *td; 2752 struct ucred *cred; 2753 struct vnode *vp; 2754 uid_t uid; 2755 gid_t gid; 2756 { 2757 struct mount *mp; 2758 struct vattr vattr; 2759 int error; 2760 2761 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2762 return (error); 2763 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2764 VATTR_NULL(&vattr); 2765 vattr.va_uid = uid; 2766 vattr.va_gid = gid; 2767 #ifdef MAC 2768 error = mac_vnode_check_setowner(cred, vp, vattr.va_uid, 2769 vattr.va_gid); 2770 if (error == 0) 2771 #endif 2772 error = VOP_SETATTR(vp, &vattr, cred); 2773 VOP_UNLOCK(vp, 0); 2774 vn_finished_write(mp); 2775 return (error); 2776 } 2777 2778 /* 2779 * Set ownership given a path name. 2780 */ 2781 #ifndef _SYS_SYSPROTO_H_ 2782 struct chown_args { 2783 char *path; 2784 int uid; 2785 int gid; 2786 }; 2787 #endif 2788 int 2789 sys_chown(td, uap) 2790 struct thread *td; 2791 register struct chown_args /* { 2792 char *path; 2793 int uid; 2794 int gid; 2795 } */ *uap; 2796 { 2797 2798 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->uid, 2799 uap->gid, 0)); 2800 } 2801 2802 #ifndef _SYS_SYSPROTO_H_ 2803 struct fchownat_args { 2804 int fd; 2805 const char * path; 2806 uid_t uid; 2807 gid_t gid; 2808 int flag; 2809 }; 2810 #endif 2811 int 2812 sys_fchownat(struct thread *td, struct fchownat_args *uap) 2813 { 2814 int flag; 2815 2816 flag = uap->flag; 2817 if (flag & ~AT_SYMLINK_NOFOLLOW) 2818 return (EINVAL); 2819 2820 return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid, 2821 uap->gid, uap->flag)); 2822 } 2823 2824 int 2825 kern_fchownat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2826 int uid, int gid, int flag) 2827 { 2828 struct nameidata nd; 2829 cap_rights_t rights; 2830 int error, follow; 2831 2832 AUDIT_ARG_OWNER(uid, gid); 2833 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2834 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2835 cap_rights_init(&rights, CAP_FCHOWN), td); 2836 2837 if ((error = namei(&nd)) != 0) 2838 return (error); 2839 NDFREE(&nd, NDF_ONLY_PNBUF); 2840 error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid); 2841 vrele(nd.ni_vp); 2842 return (error); 2843 } 2844 2845 /* 2846 * Set ownership given a path name, do not cross symlinks. 2847 */ 2848 #ifndef _SYS_SYSPROTO_H_ 2849 struct lchown_args { 2850 char *path; 2851 int uid; 2852 int gid; 2853 }; 2854 #endif 2855 int 2856 sys_lchown(td, uap) 2857 struct thread *td; 2858 register struct lchown_args /* { 2859 char *path; 2860 int uid; 2861 int gid; 2862 } */ *uap; 2863 { 2864 2865 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2866 uap->uid, uap->gid, AT_SYMLINK_NOFOLLOW)); 2867 } 2868 2869 /* 2870 * Set ownership given a file descriptor. 2871 */ 2872 #ifndef _SYS_SYSPROTO_H_ 2873 struct fchown_args { 2874 int fd; 2875 int uid; 2876 int gid; 2877 }; 2878 #endif 2879 int 2880 sys_fchown(td, uap) 2881 struct thread *td; 2882 register struct fchown_args /* { 2883 int fd; 2884 int uid; 2885 int gid; 2886 } */ *uap; 2887 { 2888 struct file *fp; 2889 cap_rights_t rights; 2890 int error; 2891 2892 AUDIT_ARG_FD(uap->fd); 2893 AUDIT_ARG_OWNER(uap->uid, uap->gid); 2894 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHOWN), &fp); 2895 if (error != 0) 2896 return (error); 2897 error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td); 2898 fdrop(fp, td); 2899 return (error); 2900 } 2901 2902 /* 2903 * Common implementation code for utimes(), lutimes(), and futimes(). 2904 */ 2905 static int 2906 getutimes(usrtvp, tvpseg, tsp) 2907 const struct timeval *usrtvp; 2908 enum uio_seg tvpseg; 2909 struct timespec *tsp; 2910 { 2911 struct timeval tv[2]; 2912 const struct timeval *tvp; 2913 int error; 2914 2915 if (usrtvp == NULL) { 2916 vfs_timestamp(&tsp[0]); 2917 tsp[1] = tsp[0]; 2918 } else { 2919 if (tvpseg == UIO_SYSSPACE) { 2920 tvp = usrtvp; 2921 } else { 2922 if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0) 2923 return (error); 2924 tvp = tv; 2925 } 2926 2927 if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 || 2928 tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000) 2929 return (EINVAL); 2930 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 2931 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 2932 } 2933 return (0); 2934 } 2935 2936 /* 2937 * Common implementation code for futimens(), utimensat(). 2938 */ 2939 #define UTIMENS_NULL 0x1 2940 #define UTIMENS_EXIT 0x2 2941 static int 2942 getutimens(const struct timespec *usrtsp, enum uio_seg tspseg, 2943 struct timespec *tsp, int *retflags) 2944 { 2945 struct timespec tsnow; 2946 int error; 2947 2948 vfs_timestamp(&tsnow); 2949 *retflags = 0; 2950 if (usrtsp == NULL) { 2951 tsp[0] = tsnow; 2952 tsp[1] = tsnow; 2953 *retflags |= UTIMENS_NULL; 2954 return (0); 2955 } 2956 if (tspseg == UIO_SYSSPACE) { 2957 tsp[0] = usrtsp[0]; 2958 tsp[1] = usrtsp[1]; 2959 } else if ((error = copyin(usrtsp, tsp, sizeof(*tsp) * 2)) != 0) 2960 return (error); 2961 if (tsp[0].tv_nsec == UTIME_OMIT && tsp[1].tv_nsec == UTIME_OMIT) 2962 *retflags |= UTIMENS_EXIT; 2963 if (tsp[0].tv_nsec == UTIME_NOW && tsp[1].tv_nsec == UTIME_NOW) 2964 *retflags |= UTIMENS_NULL; 2965 if (tsp[0].tv_nsec == UTIME_OMIT) 2966 tsp[0].tv_sec = VNOVAL; 2967 else if (tsp[0].tv_nsec == UTIME_NOW) 2968 tsp[0] = tsnow; 2969 else if (tsp[0].tv_nsec < 0 || tsp[0].tv_nsec >= 1000000000L) 2970 return (EINVAL); 2971 if (tsp[1].tv_nsec == UTIME_OMIT) 2972 tsp[1].tv_sec = VNOVAL; 2973 else if (tsp[1].tv_nsec == UTIME_NOW) 2974 tsp[1] = tsnow; 2975 else if (tsp[1].tv_nsec < 0 || tsp[1].tv_nsec >= 1000000000L) 2976 return (EINVAL); 2977 2978 return (0); 2979 } 2980 2981 /* 2982 * Common implementation code for utimes(), lutimes(), futimes(), futimens(), 2983 * and utimensat(). 2984 */ 2985 static int 2986 setutimes(td, vp, ts, numtimes, nullflag) 2987 struct thread *td; 2988 struct vnode *vp; 2989 const struct timespec *ts; 2990 int numtimes; 2991 int nullflag; 2992 { 2993 struct mount *mp; 2994 struct vattr vattr; 2995 int error, setbirthtime; 2996 2997 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2998 return (error); 2999 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3000 setbirthtime = 0; 3001 if (numtimes < 3 && !VOP_GETATTR(vp, &vattr, td->td_ucred) && 3002 timespeccmp(&ts[1], &vattr.va_birthtime, < )) 3003 setbirthtime = 1; 3004 VATTR_NULL(&vattr); 3005 vattr.va_atime = ts[0]; 3006 vattr.va_mtime = ts[1]; 3007 if (setbirthtime) 3008 vattr.va_birthtime = ts[1]; 3009 if (numtimes > 2) 3010 vattr.va_birthtime = ts[2]; 3011 if (nullflag) 3012 vattr.va_vaflags |= VA_UTIMES_NULL; 3013 #ifdef MAC 3014 error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime, 3015 vattr.va_mtime); 3016 #endif 3017 if (error == 0) 3018 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3019 VOP_UNLOCK(vp, 0); 3020 vn_finished_write(mp); 3021 return (error); 3022 } 3023 3024 /* 3025 * Set the access and modification times of a file. 3026 */ 3027 #ifndef _SYS_SYSPROTO_H_ 3028 struct utimes_args { 3029 char *path; 3030 struct timeval *tptr; 3031 }; 3032 #endif 3033 int 3034 sys_utimes(td, uap) 3035 struct thread *td; 3036 register struct utimes_args /* { 3037 char *path; 3038 struct timeval *tptr; 3039 } */ *uap; 3040 { 3041 3042 return (kern_utimesat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3043 uap->tptr, UIO_USERSPACE)); 3044 } 3045 3046 #ifndef _SYS_SYSPROTO_H_ 3047 struct futimesat_args { 3048 int fd; 3049 const char * path; 3050 const struct timeval * times; 3051 }; 3052 #endif 3053 int 3054 sys_futimesat(struct thread *td, struct futimesat_args *uap) 3055 { 3056 3057 return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE, 3058 uap->times, UIO_USERSPACE)); 3059 } 3060 3061 int 3062 kern_utimesat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 3063 struct timeval *tptr, enum uio_seg tptrseg) 3064 { 3065 struct nameidata nd; 3066 struct timespec ts[2]; 3067 cap_rights_t rights; 3068 int error; 3069 3070 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3071 return (error); 3072 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 3073 cap_rights_init(&rights, CAP_FUTIMES), td); 3074 3075 if ((error = namei(&nd)) != 0) 3076 return (error); 3077 NDFREE(&nd, NDF_ONLY_PNBUF); 3078 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3079 vrele(nd.ni_vp); 3080 return (error); 3081 } 3082 3083 /* 3084 * Set the access and modification times of a file. 3085 */ 3086 #ifndef _SYS_SYSPROTO_H_ 3087 struct lutimes_args { 3088 char *path; 3089 struct timeval *tptr; 3090 }; 3091 #endif 3092 int 3093 sys_lutimes(td, uap) 3094 struct thread *td; 3095 register struct lutimes_args /* { 3096 char *path; 3097 struct timeval *tptr; 3098 } */ *uap; 3099 { 3100 3101 return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr, 3102 UIO_USERSPACE)); 3103 } 3104 3105 int 3106 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg, 3107 struct timeval *tptr, enum uio_seg tptrseg) 3108 { 3109 struct timespec ts[2]; 3110 struct nameidata nd; 3111 int error; 3112 3113 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3114 return (error); 3115 NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, pathseg, path, td); 3116 if ((error = namei(&nd)) != 0) 3117 return (error); 3118 NDFREE(&nd, NDF_ONLY_PNBUF); 3119 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3120 vrele(nd.ni_vp); 3121 return (error); 3122 } 3123 3124 /* 3125 * Set the access and modification times of a file. 3126 */ 3127 #ifndef _SYS_SYSPROTO_H_ 3128 struct futimes_args { 3129 int fd; 3130 struct timeval *tptr; 3131 }; 3132 #endif 3133 int 3134 sys_futimes(td, uap) 3135 struct thread *td; 3136 register struct futimes_args /* { 3137 int fd; 3138 struct timeval *tptr; 3139 } */ *uap; 3140 { 3141 3142 return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE)); 3143 } 3144 3145 int 3146 kern_futimes(struct thread *td, int fd, struct timeval *tptr, 3147 enum uio_seg tptrseg) 3148 { 3149 struct timespec ts[2]; 3150 struct file *fp; 3151 cap_rights_t rights; 3152 int error; 3153 3154 AUDIT_ARG_FD(fd); 3155 error = getutimes(tptr, tptrseg, ts); 3156 if (error != 0) 3157 return (error); 3158 error = getvnode(td, fd, cap_rights_init(&rights, CAP_FUTIMES), &fp); 3159 if (error != 0) 3160 return (error); 3161 #ifdef AUDIT 3162 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3163 AUDIT_ARG_VNODE1(fp->f_vnode); 3164 VOP_UNLOCK(fp->f_vnode, 0); 3165 #endif 3166 error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL); 3167 fdrop(fp, td); 3168 return (error); 3169 } 3170 3171 int 3172 sys_futimens(struct thread *td, struct futimens_args *uap) 3173 { 3174 3175 return (kern_futimens(td, uap->fd, uap->times, UIO_USERSPACE)); 3176 } 3177 3178 int 3179 kern_futimens(struct thread *td, int fd, struct timespec *tptr, 3180 enum uio_seg tptrseg) 3181 { 3182 struct timespec ts[2]; 3183 struct file *fp; 3184 cap_rights_t rights; 3185 int error, flags; 3186 3187 AUDIT_ARG_FD(fd); 3188 error = getutimens(tptr, tptrseg, ts, &flags); 3189 if (error != 0) 3190 return (error); 3191 if (flags & UTIMENS_EXIT) 3192 return (0); 3193 error = getvnode(td, fd, cap_rights_init(&rights, CAP_FUTIMES), &fp); 3194 if (error != 0) 3195 return (error); 3196 #ifdef AUDIT 3197 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3198 AUDIT_ARG_VNODE1(fp->f_vnode); 3199 VOP_UNLOCK(fp->f_vnode, 0); 3200 #endif 3201 error = setutimes(td, fp->f_vnode, ts, 2, flags & UTIMENS_NULL); 3202 fdrop(fp, td); 3203 return (error); 3204 } 3205 3206 int 3207 sys_utimensat(struct thread *td, struct utimensat_args *uap) 3208 { 3209 3210 return (kern_utimensat(td, uap->fd, uap->path, UIO_USERSPACE, 3211 uap->times, UIO_USERSPACE, uap->flag)); 3212 } 3213 3214 int 3215 kern_utimensat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 3216 struct timespec *tptr, enum uio_seg tptrseg, int flag) 3217 { 3218 struct nameidata nd; 3219 struct timespec ts[2]; 3220 cap_rights_t rights; 3221 int error, flags; 3222 3223 if (flag & ~AT_SYMLINK_NOFOLLOW) 3224 return (EINVAL); 3225 3226 if ((error = getutimens(tptr, tptrseg, ts, &flags)) != 0) 3227 return (error); 3228 NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : 3229 FOLLOW) | AUDITVNODE1, pathseg, path, fd, 3230 cap_rights_init(&rights, CAP_FUTIMES), td); 3231 if ((error = namei(&nd)) != 0) 3232 return (error); 3233 /* 3234 * We are allowed to call namei() regardless of 2xUTIME_OMIT. 3235 * POSIX states: 3236 * "If both tv_nsec fields are UTIME_OMIT... EACCESS may be detected." 3237 * "Search permission is denied by a component of the path prefix." 3238 */ 3239 NDFREE(&nd, NDF_ONLY_PNBUF); 3240 if ((flags & UTIMENS_EXIT) == 0) 3241 error = setutimes(td, nd.ni_vp, ts, 2, flags & UTIMENS_NULL); 3242 vrele(nd.ni_vp); 3243 return (error); 3244 } 3245 3246 /* 3247 * Truncate a file given its path name. 3248 */ 3249 #ifndef _SYS_SYSPROTO_H_ 3250 struct truncate_args { 3251 char *path; 3252 int pad; 3253 off_t length; 3254 }; 3255 #endif 3256 int 3257 sys_truncate(td, uap) 3258 struct thread *td; 3259 register struct truncate_args /* { 3260 char *path; 3261 int pad; 3262 off_t length; 3263 } */ *uap; 3264 { 3265 3266 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3267 } 3268 3269 int 3270 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length) 3271 { 3272 struct mount *mp; 3273 struct vnode *vp; 3274 void *rl_cookie; 3275 struct vattr vattr; 3276 struct nameidata nd; 3277 int error; 3278 3279 if (length < 0) 3280 return(EINVAL); 3281 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, td); 3282 if ((error = namei(&nd)) != 0) 3283 return (error); 3284 vp = nd.ni_vp; 3285 rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); 3286 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { 3287 vn_rangelock_unlock(vp, rl_cookie); 3288 vrele(vp); 3289 return (error); 3290 } 3291 NDFREE(&nd, NDF_ONLY_PNBUF); 3292 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3293 if (vp->v_type == VDIR) 3294 error = EISDIR; 3295 #ifdef MAC 3296 else if ((error = mac_vnode_check_write(td->td_ucred, NOCRED, vp))) { 3297 } 3298 #endif 3299 else if ((error = vn_writechk(vp)) == 0 && 3300 (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) { 3301 VATTR_NULL(&vattr); 3302 vattr.va_size = length; 3303 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3304 } 3305 VOP_UNLOCK(vp, 0); 3306 vn_finished_write(mp); 3307 vn_rangelock_unlock(vp, rl_cookie); 3308 vrele(vp); 3309 return (error); 3310 } 3311 3312 #if defined(COMPAT_43) 3313 /* 3314 * Truncate a file given its path name. 3315 */ 3316 #ifndef _SYS_SYSPROTO_H_ 3317 struct otruncate_args { 3318 char *path; 3319 long length; 3320 }; 3321 #endif 3322 int 3323 otruncate(td, uap) 3324 struct thread *td; 3325 register struct otruncate_args /* { 3326 char *path; 3327 long length; 3328 } */ *uap; 3329 { 3330 struct truncate_args /* { 3331 char *path; 3332 int pad; 3333 off_t length; 3334 } */ nuap; 3335 3336 nuap.path = uap->path; 3337 nuap.length = uap->length; 3338 return (sys_truncate(td, &nuap)); 3339 } 3340 #endif /* COMPAT_43 */ 3341 3342 #if defined(COMPAT_FREEBSD6) 3343 /* Versions with the pad argument */ 3344 int 3345 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap) 3346 { 3347 struct truncate_args ouap; 3348 3349 ouap.path = uap->path; 3350 ouap.length = uap->length; 3351 return (sys_truncate(td, &ouap)); 3352 } 3353 3354 int 3355 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap) 3356 { 3357 struct ftruncate_args ouap; 3358 3359 ouap.fd = uap->fd; 3360 ouap.length = uap->length; 3361 return (sys_ftruncate(td, &ouap)); 3362 } 3363 #endif 3364 3365 /* 3366 * Sync an open file. 3367 */ 3368 #ifndef _SYS_SYSPROTO_H_ 3369 struct fsync_args { 3370 int fd; 3371 }; 3372 #endif 3373 int 3374 sys_fsync(td, uap) 3375 struct thread *td; 3376 struct fsync_args /* { 3377 int fd; 3378 } */ *uap; 3379 { 3380 struct vnode *vp; 3381 struct mount *mp; 3382 struct file *fp; 3383 cap_rights_t rights; 3384 int error, lock_flags; 3385 3386 AUDIT_ARG_FD(uap->fd); 3387 error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_FSYNC), &fp); 3388 if (error != 0) 3389 return (error); 3390 vp = fp->f_vnode; 3391 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 3392 if (error != 0) 3393 goto drop; 3394 if (MNT_SHARED_WRITES(mp) || 3395 ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) { 3396 lock_flags = LK_SHARED; 3397 } else { 3398 lock_flags = LK_EXCLUSIVE; 3399 } 3400 vn_lock(vp, lock_flags | LK_RETRY); 3401 AUDIT_ARG_VNODE1(vp); 3402 if (vp->v_object != NULL) { 3403 VM_OBJECT_WLOCK(vp->v_object); 3404 vm_object_page_clean(vp->v_object, 0, 0, 0); 3405 VM_OBJECT_WUNLOCK(vp->v_object); 3406 } 3407 error = VOP_FSYNC(vp, MNT_WAIT, td); 3408 3409 VOP_UNLOCK(vp, 0); 3410 vn_finished_write(mp); 3411 drop: 3412 fdrop(fp, td); 3413 return (error); 3414 } 3415 3416 /* 3417 * Rename files. Source and destination must either both be directories, or 3418 * both not be directories. If target is a directory, it must be empty. 3419 */ 3420 #ifndef _SYS_SYSPROTO_H_ 3421 struct rename_args { 3422 char *from; 3423 char *to; 3424 }; 3425 #endif 3426 int 3427 sys_rename(td, uap) 3428 struct thread *td; 3429 register struct rename_args /* { 3430 char *from; 3431 char *to; 3432 } */ *uap; 3433 { 3434 3435 return (kern_renameat(td, AT_FDCWD, uap->from, AT_FDCWD, 3436 uap->to, UIO_USERSPACE)); 3437 } 3438 3439 #ifndef _SYS_SYSPROTO_H_ 3440 struct renameat_args { 3441 int oldfd; 3442 char *old; 3443 int newfd; 3444 char *new; 3445 }; 3446 #endif 3447 int 3448 sys_renameat(struct thread *td, struct renameat_args *uap) 3449 { 3450 3451 return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new, 3452 UIO_USERSPACE)); 3453 } 3454 3455 int 3456 kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new, 3457 enum uio_seg pathseg) 3458 { 3459 struct mount *mp = NULL; 3460 struct vnode *tvp, *fvp, *tdvp; 3461 struct nameidata fromnd, tond; 3462 cap_rights_t rights; 3463 int error; 3464 3465 again: 3466 bwillwrite(); 3467 #ifdef MAC 3468 NDINIT_ATRIGHTS(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | 3469 AUDITVNODE1, pathseg, old, oldfd, 3470 cap_rights_init(&rights, CAP_RENAMEAT_SOURCE), td); 3471 #else 3472 NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | AUDITVNODE1, 3473 pathseg, old, oldfd, 3474 cap_rights_init(&rights, CAP_RENAMEAT_SOURCE), td); 3475 #endif 3476 3477 if ((error = namei(&fromnd)) != 0) 3478 return (error); 3479 #ifdef MAC 3480 error = mac_vnode_check_rename_from(td->td_ucred, fromnd.ni_dvp, 3481 fromnd.ni_vp, &fromnd.ni_cnd); 3482 VOP_UNLOCK(fromnd.ni_dvp, 0); 3483 if (fromnd.ni_dvp != fromnd.ni_vp) 3484 VOP_UNLOCK(fromnd.ni_vp, 0); 3485 #endif 3486 fvp = fromnd.ni_vp; 3487 NDINIT_ATRIGHTS(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | 3488 SAVESTART | AUDITVNODE2, pathseg, new, newfd, 3489 cap_rights_init(&rights, CAP_RENAMEAT_TARGET), td); 3490 if (fromnd.ni_vp->v_type == VDIR) 3491 tond.ni_cnd.cn_flags |= WILLBEDIR; 3492 if ((error = namei(&tond)) != 0) { 3493 /* Translate error code for rename("dir1", "dir2/."). */ 3494 if (error == EISDIR && fvp->v_type == VDIR) 3495 error = EINVAL; 3496 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3497 vrele(fromnd.ni_dvp); 3498 vrele(fvp); 3499 goto out1; 3500 } 3501 tdvp = tond.ni_dvp; 3502 tvp = tond.ni_vp; 3503 error = vn_start_write(fvp, &mp, V_NOWAIT); 3504 if (error != 0) { 3505 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3506 NDFREE(&tond, NDF_ONLY_PNBUF); 3507 if (tvp != NULL) 3508 vput(tvp); 3509 if (tdvp == tvp) 3510 vrele(tdvp); 3511 else 3512 vput(tdvp); 3513 vrele(fromnd.ni_dvp); 3514 vrele(fvp); 3515 vrele(tond.ni_startdir); 3516 if (fromnd.ni_startdir != NULL) 3517 vrele(fromnd.ni_startdir); 3518 error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH); 3519 if (error != 0) 3520 return (error); 3521 goto again; 3522 } 3523 if (tvp != NULL) { 3524 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3525 error = ENOTDIR; 3526 goto out; 3527 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3528 error = EISDIR; 3529 goto out; 3530 } 3531 #ifdef CAPABILITIES 3532 if (newfd != AT_FDCWD) { 3533 /* 3534 * If the target already exists we require CAP_UNLINKAT 3535 * from 'newfd'. 3536 */ 3537 error = cap_check(&tond.ni_filecaps.fc_rights, 3538 cap_rights_init(&rights, CAP_UNLINKAT)); 3539 if (error != 0) 3540 goto out; 3541 } 3542 #endif 3543 } 3544 if (fvp == tdvp) { 3545 error = EINVAL; 3546 goto out; 3547 } 3548 /* 3549 * If the source is the same as the destination (that is, if they 3550 * are links to the same vnode), then there is nothing to do. 3551 */ 3552 if (fvp == tvp) 3553 error = -1; 3554 #ifdef MAC 3555 else 3556 error = mac_vnode_check_rename_to(td->td_ucred, tdvp, 3557 tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd); 3558 #endif 3559 out: 3560 if (error == 0) { 3561 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3562 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); 3563 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3564 NDFREE(&tond, NDF_ONLY_PNBUF); 3565 } else { 3566 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3567 NDFREE(&tond, NDF_ONLY_PNBUF); 3568 if (tvp != NULL) 3569 vput(tvp); 3570 if (tdvp == tvp) 3571 vrele(tdvp); 3572 else 3573 vput(tdvp); 3574 vrele(fromnd.ni_dvp); 3575 vrele(fvp); 3576 } 3577 vrele(tond.ni_startdir); 3578 vn_finished_write(mp); 3579 out1: 3580 if (fromnd.ni_startdir) 3581 vrele(fromnd.ni_startdir); 3582 if (error == -1) 3583 return (0); 3584 return (error); 3585 } 3586 3587 /* 3588 * Make a directory file. 3589 */ 3590 #ifndef _SYS_SYSPROTO_H_ 3591 struct mkdir_args { 3592 char *path; 3593 int mode; 3594 }; 3595 #endif 3596 int 3597 sys_mkdir(td, uap) 3598 struct thread *td; 3599 register struct mkdir_args /* { 3600 char *path; 3601 int mode; 3602 } */ *uap; 3603 { 3604 3605 return (kern_mkdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3606 uap->mode)); 3607 } 3608 3609 #ifndef _SYS_SYSPROTO_H_ 3610 struct mkdirat_args { 3611 int fd; 3612 char *path; 3613 mode_t mode; 3614 }; 3615 #endif 3616 int 3617 sys_mkdirat(struct thread *td, struct mkdirat_args *uap) 3618 { 3619 3620 return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode)); 3621 } 3622 3623 int 3624 kern_mkdirat(struct thread *td, int fd, char *path, enum uio_seg segflg, 3625 int mode) 3626 { 3627 struct mount *mp; 3628 struct vnode *vp; 3629 struct vattr vattr; 3630 struct nameidata nd; 3631 cap_rights_t rights; 3632 int error; 3633 3634 AUDIT_ARG_MODE(mode); 3635 restart: 3636 bwillwrite(); 3637 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 3638 NOCACHE, segflg, path, fd, cap_rights_init(&rights, CAP_MKDIRAT), 3639 td); 3640 nd.ni_cnd.cn_flags |= WILLBEDIR; 3641 if ((error = namei(&nd)) != 0) 3642 return (error); 3643 vp = nd.ni_vp; 3644 if (vp != NULL) { 3645 NDFREE(&nd, NDF_ONLY_PNBUF); 3646 /* 3647 * XXX namei called with LOCKPARENT but not LOCKLEAF has 3648 * the strange behaviour of leaving the vnode unlocked 3649 * if the target is the same vnode as the parent. 3650 */ 3651 if (vp == nd.ni_dvp) 3652 vrele(nd.ni_dvp); 3653 else 3654 vput(nd.ni_dvp); 3655 vrele(vp); 3656 return (EEXIST); 3657 } 3658 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3659 NDFREE(&nd, NDF_ONLY_PNBUF); 3660 vput(nd.ni_dvp); 3661 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3662 return (error); 3663 goto restart; 3664 } 3665 VATTR_NULL(&vattr); 3666 vattr.va_type = VDIR; 3667 vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask; 3668 #ifdef MAC 3669 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 3670 &vattr); 3671 if (error != 0) 3672 goto out; 3673 #endif 3674 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 3675 #ifdef MAC 3676 out: 3677 #endif 3678 NDFREE(&nd, NDF_ONLY_PNBUF); 3679 vput(nd.ni_dvp); 3680 if (error == 0) 3681 vput(nd.ni_vp); 3682 vn_finished_write(mp); 3683 return (error); 3684 } 3685 3686 /* 3687 * Remove a directory file. 3688 */ 3689 #ifndef _SYS_SYSPROTO_H_ 3690 struct rmdir_args { 3691 char *path; 3692 }; 3693 #endif 3694 int 3695 sys_rmdir(td, uap) 3696 struct thread *td; 3697 struct rmdir_args /* { 3698 char *path; 3699 } */ *uap; 3700 { 3701 3702 return (kern_rmdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE)); 3703 } 3704 3705 int 3706 kern_rmdirat(struct thread *td, int fd, char *path, enum uio_seg pathseg) 3707 { 3708 struct mount *mp; 3709 struct vnode *vp; 3710 struct nameidata nd; 3711 cap_rights_t rights; 3712 int error; 3713 3714 restart: 3715 bwillwrite(); 3716 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 3717 pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td); 3718 if ((error = namei(&nd)) != 0) 3719 return (error); 3720 vp = nd.ni_vp; 3721 if (vp->v_type != VDIR) { 3722 error = ENOTDIR; 3723 goto out; 3724 } 3725 /* 3726 * No rmdir "." please. 3727 */ 3728 if (nd.ni_dvp == vp) { 3729 error = EINVAL; 3730 goto out; 3731 } 3732 /* 3733 * The root of a mounted filesystem cannot be deleted. 3734 */ 3735 if (vp->v_vflag & VV_ROOT) { 3736 error = EBUSY; 3737 goto out; 3738 } 3739 #ifdef MAC 3740 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 3741 &nd.ni_cnd); 3742 if (error != 0) 3743 goto out; 3744 #endif 3745 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3746 NDFREE(&nd, NDF_ONLY_PNBUF); 3747 vput(vp); 3748 if (nd.ni_dvp == vp) 3749 vrele(nd.ni_dvp); 3750 else 3751 vput(nd.ni_dvp); 3752 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3753 return (error); 3754 goto restart; 3755 } 3756 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 3757 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 3758 vn_finished_write(mp); 3759 out: 3760 NDFREE(&nd, NDF_ONLY_PNBUF); 3761 vput(vp); 3762 if (nd.ni_dvp == vp) 3763 vrele(nd.ni_dvp); 3764 else 3765 vput(nd.ni_dvp); 3766 return (error); 3767 } 3768 3769 #ifdef COMPAT_43 3770 /* 3771 * Read a block of directory entries in a filesystem independent format. 3772 */ 3773 #ifndef _SYS_SYSPROTO_H_ 3774 struct ogetdirentries_args { 3775 int fd; 3776 char *buf; 3777 u_int count; 3778 long *basep; 3779 }; 3780 #endif 3781 int 3782 ogetdirentries(struct thread *td, struct ogetdirentries_args *uap) 3783 { 3784 long loff; 3785 int error; 3786 3787 error = kern_ogetdirentries(td, uap, &loff); 3788 if (error == 0) 3789 error = copyout(&loff, uap->basep, sizeof(long)); 3790 return (error); 3791 } 3792 3793 int 3794 kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap, 3795 long *ploff) 3796 { 3797 struct vnode *vp; 3798 struct file *fp; 3799 struct uio auio, kuio; 3800 struct iovec aiov, kiov; 3801 struct dirent *dp, *edp; 3802 cap_rights_t rights; 3803 caddr_t dirbuf; 3804 int error, eofflag, readcnt; 3805 long loff; 3806 off_t foffset; 3807 3808 /* XXX arbitrary sanity limit on `count'. */ 3809 if (uap->count > 64 * 1024) 3810 return (EINVAL); 3811 error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_READ), &fp); 3812 if (error != 0) 3813 return (error); 3814 if ((fp->f_flag & FREAD) == 0) { 3815 fdrop(fp, td); 3816 return (EBADF); 3817 } 3818 vp = fp->f_vnode; 3819 foffset = foffset_lock(fp, 0); 3820 unionread: 3821 if (vp->v_type != VDIR) { 3822 foffset_unlock(fp, foffset, 0); 3823 fdrop(fp, td); 3824 return (EINVAL); 3825 } 3826 aiov.iov_base = uap->buf; 3827 aiov.iov_len = uap->count; 3828 auio.uio_iov = &aiov; 3829 auio.uio_iovcnt = 1; 3830 auio.uio_rw = UIO_READ; 3831 auio.uio_segflg = UIO_USERSPACE; 3832 auio.uio_td = td; 3833 auio.uio_resid = uap->count; 3834 vn_lock(vp, LK_SHARED | LK_RETRY); 3835 loff = auio.uio_offset = foffset; 3836 #ifdef MAC 3837 error = mac_vnode_check_readdir(td->td_ucred, vp); 3838 if (error != 0) { 3839 VOP_UNLOCK(vp, 0); 3840 foffset_unlock(fp, foffset, FOF_NOUPDATE); 3841 fdrop(fp, td); 3842 return (error); 3843 } 3844 #endif 3845 # if (BYTE_ORDER != LITTLE_ENDIAN) 3846 if (vp->v_mount->mnt_maxsymlinklen <= 0) { 3847 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, 3848 NULL, NULL); 3849 foffset = auio.uio_offset; 3850 } else 3851 # endif 3852 { 3853 kuio = auio; 3854 kuio.uio_iov = &kiov; 3855 kuio.uio_segflg = UIO_SYSSPACE; 3856 kiov.iov_len = uap->count; 3857 dirbuf = malloc(uap->count, M_TEMP, M_WAITOK); 3858 kiov.iov_base = dirbuf; 3859 error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag, 3860 NULL, NULL); 3861 foffset = kuio.uio_offset; 3862 if (error == 0) { 3863 readcnt = uap->count - kuio.uio_resid; 3864 edp = (struct dirent *)&dirbuf[readcnt]; 3865 for (dp = (struct dirent *)dirbuf; dp < edp; ) { 3866 # if (BYTE_ORDER == LITTLE_ENDIAN) 3867 /* 3868 * The expected low byte of 3869 * dp->d_namlen is our dp->d_type. 3870 * The high MBZ byte of dp->d_namlen 3871 * is our dp->d_namlen. 3872 */ 3873 dp->d_type = dp->d_namlen; 3874 dp->d_namlen = 0; 3875 # else 3876 /* 3877 * The dp->d_type is the high byte 3878 * of the expected dp->d_namlen, 3879 * so must be zero'ed. 3880 */ 3881 dp->d_type = 0; 3882 # endif 3883 if (dp->d_reclen > 0) { 3884 dp = (struct dirent *) 3885 ((char *)dp + dp->d_reclen); 3886 } else { 3887 error = EIO; 3888 break; 3889 } 3890 } 3891 if (dp >= edp) 3892 error = uiomove(dirbuf, readcnt, &auio); 3893 } 3894 free(dirbuf, M_TEMP); 3895 } 3896 if (error != 0) { 3897 VOP_UNLOCK(vp, 0); 3898 foffset_unlock(fp, foffset, 0); 3899 fdrop(fp, td); 3900 return (error); 3901 } 3902 if (uap->count == auio.uio_resid && 3903 (vp->v_vflag & VV_ROOT) && 3904 (vp->v_mount->mnt_flag & MNT_UNION)) { 3905 struct vnode *tvp = vp; 3906 vp = vp->v_mount->mnt_vnodecovered; 3907 VREF(vp); 3908 fp->f_vnode = vp; 3909 fp->f_data = vp; 3910 foffset = 0; 3911 vput(tvp); 3912 goto unionread; 3913 } 3914 VOP_UNLOCK(vp, 0); 3915 foffset_unlock(fp, foffset, 0); 3916 fdrop(fp, td); 3917 td->td_retval[0] = uap->count - auio.uio_resid; 3918 if (error == 0) 3919 *ploff = loff; 3920 return (error); 3921 } 3922 #endif /* COMPAT_43 */ 3923 3924 /* 3925 * Read a block of directory entries in a filesystem independent format. 3926 */ 3927 #ifndef _SYS_SYSPROTO_H_ 3928 struct getdirentries_args { 3929 int fd; 3930 char *buf; 3931 u_int count; 3932 long *basep; 3933 }; 3934 #endif 3935 int 3936 sys_getdirentries(td, uap) 3937 struct thread *td; 3938 register struct getdirentries_args /* { 3939 int fd; 3940 char *buf; 3941 u_int count; 3942 long *basep; 3943 } */ *uap; 3944 { 3945 long base; 3946 int error; 3947 3948 error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base, 3949 NULL, UIO_USERSPACE); 3950 if (error != 0) 3951 return (error); 3952 if (uap->basep != NULL) 3953 error = copyout(&base, uap->basep, sizeof(long)); 3954 return (error); 3955 } 3956 3957 int 3958 kern_getdirentries(struct thread *td, int fd, char *buf, u_int count, 3959 long *basep, ssize_t *residp, enum uio_seg bufseg) 3960 { 3961 struct vnode *vp; 3962 struct file *fp; 3963 struct uio auio; 3964 struct iovec aiov; 3965 cap_rights_t rights; 3966 long loff; 3967 int error, eofflag; 3968 off_t foffset; 3969 3970 AUDIT_ARG_FD(fd); 3971 if (count > IOSIZE_MAX) 3972 return (EINVAL); 3973 auio.uio_resid = count; 3974 error = getvnode(td, fd, cap_rights_init(&rights, CAP_READ), &fp); 3975 if (error != 0) 3976 return (error); 3977 if ((fp->f_flag & FREAD) == 0) { 3978 fdrop(fp, td); 3979 return (EBADF); 3980 } 3981 vp = fp->f_vnode; 3982 foffset = foffset_lock(fp, 0); 3983 unionread: 3984 if (vp->v_type != VDIR) { 3985 error = EINVAL; 3986 goto fail; 3987 } 3988 aiov.iov_base = buf; 3989 aiov.iov_len = count; 3990 auio.uio_iov = &aiov; 3991 auio.uio_iovcnt = 1; 3992 auio.uio_rw = UIO_READ; 3993 auio.uio_segflg = bufseg; 3994 auio.uio_td = td; 3995 vn_lock(vp, LK_SHARED | LK_RETRY); 3996 AUDIT_ARG_VNODE1(vp); 3997 loff = auio.uio_offset = foffset; 3998 #ifdef MAC 3999 error = mac_vnode_check_readdir(td->td_ucred, vp); 4000 if (error == 0) 4001 #endif 4002 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, 4003 NULL); 4004 foffset = auio.uio_offset; 4005 if (error != 0) { 4006 VOP_UNLOCK(vp, 0); 4007 goto fail; 4008 } 4009 if (count == auio.uio_resid && 4010 (vp->v_vflag & VV_ROOT) && 4011 (vp->v_mount->mnt_flag & MNT_UNION)) { 4012 struct vnode *tvp = vp; 4013 4014 vp = vp->v_mount->mnt_vnodecovered; 4015 VREF(vp); 4016 fp->f_vnode = vp; 4017 fp->f_data = vp; 4018 foffset = 0; 4019 vput(tvp); 4020 goto unionread; 4021 } 4022 VOP_UNLOCK(vp, 0); 4023 *basep = loff; 4024 if (residp != NULL) 4025 *residp = auio.uio_resid; 4026 td->td_retval[0] = count - auio.uio_resid; 4027 fail: 4028 foffset_unlock(fp, foffset, 0); 4029 fdrop(fp, td); 4030 return (error); 4031 } 4032 4033 #ifndef _SYS_SYSPROTO_H_ 4034 struct getdents_args { 4035 int fd; 4036 char *buf; 4037 size_t count; 4038 }; 4039 #endif 4040 int 4041 sys_getdents(td, uap) 4042 struct thread *td; 4043 register struct getdents_args /* { 4044 int fd; 4045 char *buf; 4046 u_int count; 4047 } */ *uap; 4048 { 4049 struct getdirentries_args ap; 4050 4051 ap.fd = uap->fd; 4052 ap.buf = uap->buf; 4053 ap.count = uap->count; 4054 ap.basep = NULL; 4055 return (sys_getdirentries(td, &ap)); 4056 } 4057 4058 /* 4059 * Set the mode mask for creation of filesystem nodes. 4060 */ 4061 #ifndef _SYS_SYSPROTO_H_ 4062 struct umask_args { 4063 int newmask; 4064 }; 4065 #endif 4066 int 4067 sys_umask(td, uap) 4068 struct thread *td; 4069 struct umask_args /* { 4070 int newmask; 4071 } */ *uap; 4072 { 4073 struct filedesc *fdp; 4074 4075 fdp = td->td_proc->p_fd; 4076 FILEDESC_XLOCK(fdp); 4077 td->td_retval[0] = fdp->fd_cmask; 4078 fdp->fd_cmask = uap->newmask & ALLPERMS; 4079 FILEDESC_XUNLOCK(fdp); 4080 return (0); 4081 } 4082 4083 /* 4084 * Void all references to file by ripping underlying filesystem away from 4085 * vnode. 4086 */ 4087 #ifndef _SYS_SYSPROTO_H_ 4088 struct revoke_args { 4089 char *path; 4090 }; 4091 #endif 4092 int 4093 sys_revoke(td, uap) 4094 struct thread *td; 4095 register struct revoke_args /* { 4096 char *path; 4097 } */ *uap; 4098 { 4099 struct vnode *vp; 4100 struct vattr vattr; 4101 struct nameidata nd; 4102 int error; 4103 4104 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4105 uap->path, td); 4106 if ((error = namei(&nd)) != 0) 4107 return (error); 4108 vp = nd.ni_vp; 4109 NDFREE(&nd, NDF_ONLY_PNBUF); 4110 if (vp->v_type != VCHR || vp->v_rdev == NULL) { 4111 error = EINVAL; 4112 goto out; 4113 } 4114 #ifdef MAC 4115 error = mac_vnode_check_revoke(td->td_ucred, vp); 4116 if (error != 0) 4117 goto out; 4118 #endif 4119 error = VOP_GETATTR(vp, &vattr, td->td_ucred); 4120 if (error != 0) 4121 goto out; 4122 if (td->td_ucred->cr_uid != vattr.va_uid) { 4123 error = priv_check(td, PRIV_VFS_ADMIN); 4124 if (error != 0) 4125 goto out; 4126 } 4127 if (vcount(vp) > 1) 4128 VOP_REVOKE(vp, REVOKEALL); 4129 out: 4130 vput(vp); 4131 return (error); 4132 } 4133 4134 /* 4135 * Convert a user file descriptor to a kernel file entry and check that, if it 4136 * is a capability, the correct rights are present. A reference on the file 4137 * entry is held upon returning. 4138 */ 4139 int 4140 getvnode(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp) 4141 { 4142 struct file *fp; 4143 int error; 4144 4145 error = fget_unlocked(td->td_proc->p_fd, fd, rightsp, &fp, NULL); 4146 if (error != 0) 4147 return (error); 4148 4149 /* 4150 * The file could be not of the vnode type, or it may be not 4151 * yet fully initialized, in which case the f_vnode pointer 4152 * may be set, but f_ops is still badfileops. E.g., 4153 * devfs_open() transiently create such situation to 4154 * facilitate csw d_fdopen(). 4155 * 4156 * Dupfdopen() handling in kern_openat() installs the 4157 * half-baked file into the process descriptor table, allowing 4158 * other thread to dereference it. Guard against the race by 4159 * checking f_ops. 4160 */ 4161 if (fp->f_vnode == NULL || fp->f_ops == &badfileops) { 4162 fdrop(fp, td); 4163 return (EINVAL); 4164 } 4165 *fpp = fp; 4166 return (0); 4167 } 4168 4169 4170 /* 4171 * Get an (NFS) file handle. 4172 */ 4173 #ifndef _SYS_SYSPROTO_H_ 4174 struct lgetfh_args { 4175 char *fname; 4176 fhandle_t *fhp; 4177 }; 4178 #endif 4179 int 4180 sys_lgetfh(td, uap) 4181 struct thread *td; 4182 register struct lgetfh_args *uap; 4183 { 4184 struct nameidata nd; 4185 fhandle_t fh; 4186 register struct vnode *vp; 4187 int error; 4188 4189 error = priv_check(td, PRIV_VFS_GETFH); 4190 if (error != 0) 4191 return (error); 4192 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4193 uap->fname, td); 4194 error = namei(&nd); 4195 if (error != 0) 4196 return (error); 4197 NDFREE(&nd, NDF_ONLY_PNBUF); 4198 vp = nd.ni_vp; 4199 bzero(&fh, sizeof(fh)); 4200 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4201 error = VOP_VPTOFH(vp, &fh.fh_fid); 4202 vput(vp); 4203 if (error == 0) 4204 error = copyout(&fh, uap->fhp, sizeof (fh)); 4205 return (error); 4206 } 4207 4208 #ifndef _SYS_SYSPROTO_H_ 4209 struct getfh_args { 4210 char *fname; 4211 fhandle_t *fhp; 4212 }; 4213 #endif 4214 int 4215 sys_getfh(td, uap) 4216 struct thread *td; 4217 register struct getfh_args *uap; 4218 { 4219 struct nameidata nd; 4220 fhandle_t fh; 4221 register struct vnode *vp; 4222 int error; 4223 4224 error = priv_check(td, PRIV_VFS_GETFH); 4225 if (error != 0) 4226 return (error); 4227 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4228 uap->fname, td); 4229 error = namei(&nd); 4230 if (error != 0) 4231 return (error); 4232 NDFREE(&nd, NDF_ONLY_PNBUF); 4233 vp = nd.ni_vp; 4234 bzero(&fh, sizeof(fh)); 4235 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4236 error = VOP_VPTOFH(vp, &fh.fh_fid); 4237 vput(vp); 4238 if (error == 0) 4239 error = copyout(&fh, uap->fhp, sizeof (fh)); 4240 return (error); 4241 } 4242 4243 /* 4244 * syscall for the rpc.lockd to use to translate a NFS file handle into an 4245 * open descriptor. 4246 * 4247 * warning: do not remove the priv_check() call or this becomes one giant 4248 * security hole. 4249 */ 4250 #ifndef _SYS_SYSPROTO_H_ 4251 struct fhopen_args { 4252 const struct fhandle *u_fhp; 4253 int flags; 4254 }; 4255 #endif 4256 int 4257 sys_fhopen(td, uap) 4258 struct thread *td; 4259 struct fhopen_args /* { 4260 const struct fhandle *u_fhp; 4261 int flags; 4262 } */ *uap; 4263 { 4264 struct mount *mp; 4265 struct vnode *vp; 4266 struct fhandle fhp; 4267 struct file *fp; 4268 int fmode, error; 4269 int indx; 4270 4271 error = priv_check(td, PRIV_VFS_FHOPEN); 4272 if (error != 0) 4273 return (error); 4274 indx = -1; 4275 fmode = FFLAGS(uap->flags); 4276 /* why not allow a non-read/write open for our lockd? */ 4277 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 4278 return (EINVAL); 4279 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 4280 if (error != 0) 4281 return(error); 4282 /* find the mount point */ 4283 mp = vfs_busyfs(&fhp.fh_fsid); 4284 if (mp == NULL) 4285 return (ESTALE); 4286 /* now give me my vnode, it gets returned to me locked */ 4287 error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp); 4288 vfs_unbusy(mp); 4289 if (error != 0) 4290 return (error); 4291 4292 error = falloc_noinstall(td, &fp); 4293 if (error != 0) { 4294 vput(vp); 4295 return (error); 4296 } 4297 /* 4298 * An extra reference on `fp' has been held for us by 4299 * falloc_noinstall(). 4300 */ 4301 4302 #ifdef INVARIANTS 4303 td->td_dupfd = -1; 4304 #endif 4305 error = vn_open_vnode(vp, fmode, td->td_ucred, td, fp); 4306 if (error != 0) { 4307 KASSERT(fp->f_ops == &badfileops, 4308 ("VOP_OPEN in fhopen() set f_ops")); 4309 KASSERT(td->td_dupfd < 0, 4310 ("fhopen() encountered fdopen()")); 4311 4312 vput(vp); 4313 goto bad; 4314 } 4315 #ifdef INVARIANTS 4316 td->td_dupfd = 0; 4317 #endif 4318 fp->f_vnode = vp; 4319 fp->f_seqcount = 1; 4320 finit(fp, (fmode & FMASK) | (fp->f_flag & FHASLOCK), DTYPE_VNODE, vp, 4321 &vnops); 4322 VOP_UNLOCK(vp, 0); 4323 if ((fmode & O_TRUNC) != 0) { 4324 error = fo_truncate(fp, 0, td->td_ucred, td); 4325 if (error != 0) 4326 goto bad; 4327 } 4328 4329 error = finstall(td, fp, &indx, fmode, NULL); 4330 bad: 4331 fdrop(fp, td); 4332 td->td_retval[0] = indx; 4333 return (error); 4334 } 4335 4336 /* 4337 * Stat an (NFS) file handle. 4338 */ 4339 #ifndef _SYS_SYSPROTO_H_ 4340 struct fhstat_args { 4341 struct fhandle *u_fhp; 4342 struct stat *sb; 4343 }; 4344 #endif 4345 int 4346 sys_fhstat(td, uap) 4347 struct thread *td; 4348 register struct fhstat_args /* { 4349 struct fhandle *u_fhp; 4350 struct stat *sb; 4351 } */ *uap; 4352 { 4353 struct stat sb; 4354 struct fhandle fh; 4355 int error; 4356 4357 error = copyin(uap->u_fhp, &fh, sizeof(fh)); 4358 if (error != 0) 4359 return (error); 4360 error = kern_fhstat(td, fh, &sb); 4361 if (error == 0) 4362 error = copyout(&sb, uap->sb, sizeof(sb)); 4363 return (error); 4364 } 4365 4366 int 4367 kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb) 4368 { 4369 struct mount *mp; 4370 struct vnode *vp; 4371 int error; 4372 4373 error = priv_check(td, PRIV_VFS_FHSTAT); 4374 if (error != 0) 4375 return (error); 4376 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4377 return (ESTALE); 4378 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4379 vfs_unbusy(mp); 4380 if (error != 0) 4381 return (error); 4382 error = vn_stat(vp, sb, td->td_ucred, NOCRED, td); 4383 vput(vp); 4384 return (error); 4385 } 4386 4387 /* 4388 * Implement fstatfs() for (NFS) file handles. 4389 */ 4390 #ifndef _SYS_SYSPROTO_H_ 4391 struct fhstatfs_args { 4392 struct fhandle *u_fhp; 4393 struct statfs *buf; 4394 }; 4395 #endif 4396 int 4397 sys_fhstatfs(td, uap) 4398 struct thread *td; 4399 struct fhstatfs_args /* { 4400 struct fhandle *u_fhp; 4401 struct statfs *buf; 4402 } */ *uap; 4403 { 4404 struct statfs sf; 4405 fhandle_t fh; 4406 int error; 4407 4408 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4409 if (error != 0) 4410 return (error); 4411 error = kern_fhstatfs(td, fh, &sf); 4412 if (error != 0) 4413 return (error); 4414 return (copyout(&sf, uap->buf, sizeof(sf))); 4415 } 4416 4417 int 4418 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf) 4419 { 4420 struct statfs *sp; 4421 struct mount *mp; 4422 struct vnode *vp; 4423 int error; 4424 4425 error = priv_check(td, PRIV_VFS_FHSTATFS); 4426 if (error != 0) 4427 return (error); 4428 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4429 return (ESTALE); 4430 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4431 if (error != 0) { 4432 vfs_unbusy(mp); 4433 return (error); 4434 } 4435 vput(vp); 4436 error = prison_canseemount(td->td_ucred, mp); 4437 if (error != 0) 4438 goto out; 4439 #ifdef MAC 4440 error = mac_mount_check_stat(td->td_ucred, mp); 4441 if (error != 0) 4442 goto out; 4443 #endif 4444 /* 4445 * Set these in case the underlying filesystem fails to do so. 4446 */ 4447 sp = &mp->mnt_stat; 4448 sp->f_version = STATFS_VERSION; 4449 sp->f_namemax = NAME_MAX; 4450 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 4451 error = VFS_STATFS(mp, sp); 4452 if (error == 0) 4453 *buf = *sp; 4454 out: 4455 vfs_unbusy(mp); 4456 return (error); 4457 } 4458 4459 int 4460 kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len) 4461 { 4462 struct file *fp; 4463 struct mount *mp; 4464 struct vnode *vp; 4465 cap_rights_t rights; 4466 off_t olen, ooffset; 4467 int error; 4468 4469 if (offset < 0 || len <= 0) 4470 return (EINVAL); 4471 /* Check for wrap. */ 4472 if (offset > OFF_MAX - len) 4473 return (EFBIG); 4474 error = fget(td, fd, cap_rights_init(&rights, CAP_WRITE), &fp); 4475 if (error != 0) 4476 return (error); 4477 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { 4478 error = ESPIPE; 4479 goto out; 4480 } 4481 if ((fp->f_flag & FWRITE) == 0) { 4482 error = EBADF; 4483 goto out; 4484 } 4485 if (fp->f_type != DTYPE_VNODE) { 4486 error = ENODEV; 4487 goto out; 4488 } 4489 vp = fp->f_vnode; 4490 if (vp->v_type != VREG) { 4491 error = ENODEV; 4492 goto out; 4493 } 4494 4495 /* Allocating blocks may take a long time, so iterate. */ 4496 for (;;) { 4497 olen = len; 4498 ooffset = offset; 4499 4500 bwillwrite(); 4501 mp = NULL; 4502 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 4503 if (error != 0) 4504 break; 4505 error = vn_lock(vp, LK_EXCLUSIVE); 4506 if (error != 0) { 4507 vn_finished_write(mp); 4508 break; 4509 } 4510 #ifdef MAC 4511 error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp); 4512 if (error == 0) 4513 #endif 4514 error = VOP_ALLOCATE(vp, &offset, &len); 4515 VOP_UNLOCK(vp, 0); 4516 vn_finished_write(mp); 4517 4518 if (olen + ooffset != offset + len) { 4519 panic("offset + len changed from %jx/%jx to %jx/%jx", 4520 ooffset, olen, offset, len); 4521 } 4522 if (error != 0 || len == 0) 4523 break; 4524 KASSERT(olen > len, ("Iteration did not make progress?")); 4525 maybe_yield(); 4526 } 4527 out: 4528 fdrop(fp, td); 4529 return (error); 4530 } 4531 4532 int 4533 sys_posix_fallocate(struct thread *td, struct posix_fallocate_args *uap) 4534 { 4535 4536 td->td_retval[0] = kern_posix_fallocate(td, uap->fd, uap->offset, 4537 uap->len); 4538 return (0); 4539 } 4540 4541 /* 4542 * Unlike madvise(2), we do not make a best effort to remember every 4543 * possible caching hint. Instead, we remember the last setting with 4544 * the exception that we will allow POSIX_FADV_NORMAL to adjust the 4545 * region of any current setting. 4546 */ 4547 int 4548 kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len, 4549 int advice) 4550 { 4551 struct fadvise_info *fa, *new; 4552 struct file *fp; 4553 struct vnode *vp; 4554 cap_rights_t rights; 4555 off_t end; 4556 int error; 4557 4558 if (offset < 0 || len < 0 || offset > OFF_MAX - len) 4559 return (EINVAL); 4560 switch (advice) { 4561 case POSIX_FADV_SEQUENTIAL: 4562 case POSIX_FADV_RANDOM: 4563 case POSIX_FADV_NOREUSE: 4564 new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK); 4565 break; 4566 case POSIX_FADV_NORMAL: 4567 case POSIX_FADV_WILLNEED: 4568 case POSIX_FADV_DONTNEED: 4569 new = NULL; 4570 break; 4571 default: 4572 return (EINVAL); 4573 } 4574 /* XXX: CAP_POSIX_FADVISE? */ 4575 error = fget(td, fd, cap_rights_init(&rights), &fp); 4576 if (error != 0) 4577 goto out; 4578 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { 4579 error = ESPIPE; 4580 goto out; 4581 } 4582 if (fp->f_type != DTYPE_VNODE) { 4583 error = ENODEV; 4584 goto out; 4585 } 4586 vp = fp->f_vnode; 4587 if (vp->v_type != VREG) { 4588 error = ENODEV; 4589 goto out; 4590 } 4591 if (len == 0) 4592 end = OFF_MAX; 4593 else 4594 end = offset + len - 1; 4595 switch (advice) { 4596 case POSIX_FADV_SEQUENTIAL: 4597 case POSIX_FADV_RANDOM: 4598 case POSIX_FADV_NOREUSE: 4599 /* 4600 * Try to merge any existing non-standard region with 4601 * this new region if possible, otherwise create a new 4602 * non-standard region for this request. 4603 */ 4604 mtx_pool_lock(mtxpool_sleep, fp); 4605 fa = fp->f_advice; 4606 if (fa != NULL && fa->fa_advice == advice && 4607 ((fa->fa_start <= end && fa->fa_end >= offset) || 4608 (end != OFF_MAX && fa->fa_start == end + 1) || 4609 (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) { 4610 if (offset < fa->fa_start) 4611 fa->fa_start = offset; 4612 if (end > fa->fa_end) 4613 fa->fa_end = end; 4614 } else { 4615 new->fa_advice = advice; 4616 new->fa_start = offset; 4617 new->fa_end = end; 4618 fp->f_advice = new; 4619 new = fa; 4620 } 4621 mtx_pool_unlock(mtxpool_sleep, fp); 4622 break; 4623 case POSIX_FADV_NORMAL: 4624 /* 4625 * If a the "normal" region overlaps with an existing 4626 * non-standard region, trim or remove the 4627 * non-standard region. 4628 */ 4629 mtx_pool_lock(mtxpool_sleep, fp); 4630 fa = fp->f_advice; 4631 if (fa != NULL) { 4632 if (offset <= fa->fa_start && end >= fa->fa_end) { 4633 new = fa; 4634 fp->f_advice = NULL; 4635 } else if (offset <= fa->fa_start && 4636 end >= fa->fa_start) 4637 fa->fa_start = end + 1; 4638 else if (offset <= fa->fa_end && end >= fa->fa_end) 4639 fa->fa_end = offset - 1; 4640 else if (offset >= fa->fa_start && end <= fa->fa_end) { 4641 /* 4642 * If the "normal" region is a middle 4643 * portion of the existing 4644 * non-standard region, just remove 4645 * the whole thing rather than picking 4646 * one side or the other to 4647 * preserve. 4648 */ 4649 new = fa; 4650 fp->f_advice = NULL; 4651 } 4652 } 4653 mtx_pool_unlock(mtxpool_sleep, fp); 4654 break; 4655 case POSIX_FADV_WILLNEED: 4656 case POSIX_FADV_DONTNEED: 4657 error = VOP_ADVISE(vp, offset, end, advice); 4658 break; 4659 } 4660 out: 4661 if (fp != NULL) 4662 fdrop(fp, td); 4663 free(new, M_FADVISE); 4664 return (error); 4665 } 4666 4667 int 4668 sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap) 4669 { 4670 4671 td->td_retval[0] = kern_posix_fadvise(td, uap->fd, uap->offset, 4672 uap->len, uap->advice); 4673 return (0); 4674 } 4675