1 /*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include "opt_capsicum.h" 41 #include "opt_compat.h" 42 #include "opt_ktrace.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/bio.h> 47 #include <sys/buf.h> 48 #include <sys/capsicum.h> 49 #include <sys/disk.h> 50 #include <sys/sysent.h> 51 #include <sys/malloc.h> 52 #include <sys/mount.h> 53 #include <sys/mutex.h> 54 #include <sys/sysproto.h> 55 #include <sys/namei.h> 56 #include <sys/filedesc.h> 57 #include <sys/kernel.h> 58 #include <sys/fcntl.h> 59 #include <sys/file.h> 60 #include <sys/filio.h> 61 #include <sys/limits.h> 62 #include <sys/linker.h> 63 #include <sys/rwlock.h> 64 #include <sys/sdt.h> 65 #include <sys/stat.h> 66 #include <sys/sx.h> 67 #include <sys/unistd.h> 68 #include <sys/vnode.h> 69 #include <sys/priv.h> 70 #include <sys/proc.h> 71 #include <sys/dirent.h> 72 #include <sys/jail.h> 73 #include <sys/syscallsubr.h> 74 #include <sys/sysctl.h> 75 #ifdef KTRACE 76 #include <sys/ktrace.h> 77 #endif 78 79 #include <machine/stdarg.h> 80 81 #include <security/audit/audit.h> 82 #include <security/mac/mac_framework.h> 83 84 #include <vm/vm.h> 85 #include <vm/vm_object.h> 86 #include <vm/vm_page.h> 87 #include <vm/uma.h> 88 89 #include <ufs/ufs/quota.h> 90 91 MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information"); 92 93 SDT_PROVIDER_DEFINE(vfs); 94 SDT_PROBE_DEFINE2(vfs, , stat, mode, "char *", "int"); 95 SDT_PROBE_DEFINE2(vfs, , stat, reg, "char *", "int"); 96 97 static int kern_chflagsat(struct thread *td, int fd, const char *path, 98 enum uio_seg pathseg, u_long flags, int atflag); 99 static int setfflags(struct thread *td, struct vnode *, u_long); 100 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *); 101 static int getutimens(const struct timespec *, enum uio_seg, 102 struct timespec *, int *); 103 static int setutimes(struct thread *td, struct vnode *, 104 const struct timespec *, int, int); 105 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 106 struct thread *td); 107 108 /* 109 * The module initialization routine for POSIX asynchronous I/O will 110 * set this to the version of AIO that it implements. (Zero means 111 * that it is not implemented.) This value is used here by pathconf() 112 * and in kern_descrip.c by fpathconf(). 113 */ 114 int async_io_version; 115 116 /* 117 * Sync each mounted filesystem. 118 */ 119 #ifndef _SYS_SYSPROTO_H_ 120 struct sync_args { 121 int dummy; 122 }; 123 #endif 124 /* ARGSUSED */ 125 int 126 sys_sync(td, uap) 127 struct thread *td; 128 struct sync_args *uap; 129 { 130 struct mount *mp, *nmp; 131 int save; 132 133 mtx_lock(&mountlist_mtx); 134 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 135 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 136 nmp = TAILQ_NEXT(mp, mnt_list); 137 continue; 138 } 139 if ((mp->mnt_flag & MNT_RDONLY) == 0 && 140 vn_start_write(NULL, &mp, V_NOWAIT) == 0) { 141 save = curthread_pflags_set(TDP_SYNCIO); 142 vfs_msync(mp, MNT_NOWAIT); 143 VFS_SYNC(mp, MNT_NOWAIT); 144 curthread_pflags_restore(save); 145 vn_finished_write(mp); 146 } 147 mtx_lock(&mountlist_mtx); 148 nmp = TAILQ_NEXT(mp, mnt_list); 149 vfs_unbusy(mp); 150 } 151 mtx_unlock(&mountlist_mtx); 152 return (0); 153 } 154 155 /* 156 * Change filesystem quotas. 157 */ 158 #ifndef _SYS_SYSPROTO_H_ 159 struct quotactl_args { 160 char *path; 161 int cmd; 162 int uid; 163 caddr_t arg; 164 }; 165 #endif 166 int 167 sys_quotactl(td, uap) 168 struct thread *td; 169 register struct quotactl_args /* { 170 char *path; 171 int cmd; 172 int uid; 173 caddr_t arg; 174 } */ *uap; 175 { 176 struct mount *mp; 177 struct nameidata nd; 178 int error; 179 180 AUDIT_ARG_CMD(uap->cmd); 181 AUDIT_ARG_UID(uap->uid); 182 if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS)) 183 return (EPERM); 184 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 185 uap->path, td); 186 if ((error = namei(&nd)) != 0) 187 return (error); 188 NDFREE(&nd, NDF_ONLY_PNBUF); 189 mp = nd.ni_vp->v_mount; 190 vfs_ref(mp); 191 vput(nd.ni_vp); 192 error = vfs_busy(mp, 0); 193 vfs_rel(mp); 194 if (error != 0) 195 return (error); 196 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg); 197 198 /* 199 * Since quota on operation typically needs to open quota 200 * file, the Q_QUOTAON handler needs to unbusy the mount point 201 * before calling into namei. Otherwise, unmount might be 202 * started between two vfs_busy() invocations (first is our, 203 * second is from mount point cross-walk code in lookup()), 204 * causing deadlock. 205 * 206 * Require that Q_QUOTAON handles the vfs_busy() reference on 207 * its own, always returning with ubusied mount point. 208 */ 209 if ((uap->cmd >> SUBCMDSHIFT) != Q_QUOTAON) 210 vfs_unbusy(mp); 211 return (error); 212 } 213 214 /* 215 * Used by statfs conversion routines to scale the block size up if 216 * necessary so that all of the block counts are <= 'max_size'. Note 217 * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero 218 * value of 'n'. 219 */ 220 void 221 statfs_scale_blocks(struct statfs *sf, long max_size) 222 { 223 uint64_t count; 224 int shift; 225 226 KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__)); 227 228 /* 229 * Attempt to scale the block counts to give a more accurate 230 * overview to userland of the ratio of free space to used 231 * space. To do this, find the largest block count and compute 232 * a divisor that lets it fit into a signed integer <= max_size. 233 */ 234 if (sf->f_bavail < 0) 235 count = -sf->f_bavail; 236 else 237 count = sf->f_bavail; 238 count = MAX(sf->f_blocks, MAX(sf->f_bfree, count)); 239 if (count <= max_size) 240 return; 241 242 count >>= flsl(max_size); 243 shift = 0; 244 while (count > 0) { 245 shift++; 246 count >>=1; 247 } 248 249 sf->f_bsize <<= shift; 250 sf->f_blocks >>= shift; 251 sf->f_bfree >>= shift; 252 sf->f_bavail >>= shift; 253 } 254 255 /* 256 * Get filesystem statistics. 257 */ 258 #ifndef _SYS_SYSPROTO_H_ 259 struct statfs_args { 260 char *path; 261 struct statfs *buf; 262 }; 263 #endif 264 int 265 sys_statfs(td, uap) 266 struct thread *td; 267 register struct statfs_args /* { 268 char *path; 269 struct statfs *buf; 270 } */ *uap; 271 { 272 struct statfs sf; 273 int error; 274 275 error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf); 276 if (error == 0) 277 error = copyout(&sf, uap->buf, sizeof(sf)); 278 return (error); 279 } 280 281 int 282 kern_statfs(struct thread *td, char *path, enum uio_seg pathseg, 283 struct statfs *buf) 284 { 285 struct mount *mp; 286 struct statfs *sp, sb; 287 struct nameidata nd; 288 int error; 289 290 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 291 pathseg, path, td); 292 error = namei(&nd); 293 if (error != 0) 294 return (error); 295 mp = nd.ni_vp->v_mount; 296 vfs_ref(mp); 297 NDFREE(&nd, NDF_ONLY_PNBUF); 298 vput(nd.ni_vp); 299 error = vfs_busy(mp, 0); 300 vfs_rel(mp); 301 if (error != 0) 302 return (error); 303 #ifdef MAC 304 error = mac_mount_check_stat(td->td_ucred, mp); 305 if (error != 0) 306 goto out; 307 #endif 308 /* 309 * Set these in case the underlying filesystem fails to do so. 310 */ 311 sp = &mp->mnt_stat; 312 sp->f_version = STATFS_VERSION; 313 sp->f_namemax = NAME_MAX; 314 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 315 error = VFS_STATFS(mp, sp); 316 if (error != 0) 317 goto out; 318 if (priv_check(td, PRIV_VFS_GENERATION)) { 319 bcopy(sp, &sb, sizeof(sb)); 320 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 321 prison_enforce_statfs(td->td_ucred, mp, &sb); 322 sp = &sb; 323 } 324 *buf = *sp; 325 out: 326 vfs_unbusy(mp); 327 return (error); 328 } 329 330 /* 331 * Get filesystem statistics. 332 */ 333 #ifndef _SYS_SYSPROTO_H_ 334 struct fstatfs_args { 335 int fd; 336 struct statfs *buf; 337 }; 338 #endif 339 int 340 sys_fstatfs(td, uap) 341 struct thread *td; 342 register struct fstatfs_args /* { 343 int fd; 344 struct statfs *buf; 345 } */ *uap; 346 { 347 struct statfs sf; 348 int error; 349 350 error = kern_fstatfs(td, uap->fd, &sf); 351 if (error == 0) 352 error = copyout(&sf, uap->buf, sizeof(sf)); 353 return (error); 354 } 355 356 int 357 kern_fstatfs(struct thread *td, int fd, struct statfs *buf) 358 { 359 struct file *fp; 360 struct mount *mp; 361 struct statfs *sp, sb; 362 struct vnode *vp; 363 cap_rights_t rights; 364 int error; 365 366 AUDIT_ARG_FD(fd); 367 error = getvnode(td, fd, cap_rights_init(&rights, CAP_FSTATFS), &fp); 368 if (error != 0) 369 return (error); 370 vp = fp->f_vnode; 371 vn_lock(vp, LK_SHARED | LK_RETRY); 372 #ifdef AUDIT 373 AUDIT_ARG_VNODE1(vp); 374 #endif 375 mp = vp->v_mount; 376 if (mp) 377 vfs_ref(mp); 378 VOP_UNLOCK(vp, 0); 379 fdrop(fp, td); 380 if (mp == NULL) { 381 error = EBADF; 382 goto out; 383 } 384 error = vfs_busy(mp, 0); 385 vfs_rel(mp); 386 if (error != 0) 387 return (error); 388 #ifdef MAC 389 error = mac_mount_check_stat(td->td_ucred, mp); 390 if (error != 0) 391 goto out; 392 #endif 393 /* 394 * Set these in case the underlying filesystem fails to do so. 395 */ 396 sp = &mp->mnt_stat; 397 sp->f_version = STATFS_VERSION; 398 sp->f_namemax = NAME_MAX; 399 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 400 error = VFS_STATFS(mp, sp); 401 if (error != 0) 402 goto out; 403 if (priv_check(td, PRIV_VFS_GENERATION)) { 404 bcopy(sp, &sb, sizeof(sb)); 405 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 406 prison_enforce_statfs(td->td_ucred, mp, &sb); 407 sp = &sb; 408 } 409 *buf = *sp; 410 out: 411 if (mp) 412 vfs_unbusy(mp); 413 return (error); 414 } 415 416 /* 417 * Get statistics on all filesystems. 418 */ 419 #ifndef _SYS_SYSPROTO_H_ 420 struct getfsstat_args { 421 struct statfs *buf; 422 long bufsize; 423 int flags; 424 }; 425 #endif 426 int 427 sys_getfsstat(td, uap) 428 struct thread *td; 429 register struct getfsstat_args /* { 430 struct statfs *buf; 431 long bufsize; 432 int flags; 433 } */ *uap; 434 { 435 size_t count; 436 int error; 437 438 if (uap->bufsize < 0 || uap->bufsize > SIZE_MAX) 439 return (EINVAL); 440 error = kern_getfsstat(td, &uap->buf, uap->bufsize, &count, 441 UIO_USERSPACE, uap->flags); 442 if (error == 0) 443 td->td_retval[0] = count; 444 return (error); 445 } 446 447 /* 448 * If (bufsize > 0 && bufseg == UIO_SYSSPACE) 449 * The caller is responsible for freeing memory which will be allocated 450 * in '*buf'. 451 */ 452 int 453 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize, 454 size_t *countp, enum uio_seg bufseg, int flags) 455 { 456 struct mount *mp, *nmp; 457 struct statfs *sfsp, *sp, sb; 458 size_t count, maxcount; 459 int error; 460 461 maxcount = bufsize / sizeof(struct statfs); 462 if (bufsize == 0) 463 sfsp = NULL; 464 else if (bufseg == UIO_USERSPACE) 465 sfsp = *buf; 466 else /* if (bufseg == UIO_SYSSPACE) */ { 467 count = 0; 468 mtx_lock(&mountlist_mtx); 469 TAILQ_FOREACH(mp, &mountlist, mnt_list) { 470 count++; 471 } 472 mtx_unlock(&mountlist_mtx); 473 if (maxcount > count) 474 maxcount = count; 475 sfsp = *buf = malloc(maxcount * sizeof(struct statfs), M_TEMP, 476 M_WAITOK); 477 } 478 count = 0; 479 mtx_lock(&mountlist_mtx); 480 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 481 if (prison_canseemount(td->td_ucred, mp) != 0) { 482 nmp = TAILQ_NEXT(mp, mnt_list); 483 continue; 484 } 485 #ifdef MAC 486 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 487 nmp = TAILQ_NEXT(mp, mnt_list); 488 continue; 489 } 490 #endif 491 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 492 nmp = TAILQ_NEXT(mp, mnt_list); 493 continue; 494 } 495 if (sfsp && count < maxcount) { 496 sp = &mp->mnt_stat; 497 /* 498 * Set these in case the underlying filesystem 499 * fails to do so. 500 */ 501 sp->f_version = STATFS_VERSION; 502 sp->f_namemax = NAME_MAX; 503 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 504 /* 505 * If MNT_NOWAIT or MNT_LAZY is specified, do not 506 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 507 * overrides MNT_WAIT. 508 */ 509 if (((flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 510 (flags & MNT_WAIT)) && 511 (error = VFS_STATFS(mp, sp))) { 512 mtx_lock(&mountlist_mtx); 513 nmp = TAILQ_NEXT(mp, mnt_list); 514 vfs_unbusy(mp); 515 continue; 516 } 517 if (priv_check(td, PRIV_VFS_GENERATION)) { 518 bcopy(sp, &sb, sizeof(sb)); 519 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 520 prison_enforce_statfs(td->td_ucred, mp, &sb); 521 sp = &sb; 522 } 523 if (bufseg == UIO_SYSSPACE) 524 bcopy(sp, sfsp, sizeof(*sp)); 525 else /* if (bufseg == UIO_USERSPACE) */ { 526 error = copyout(sp, sfsp, sizeof(*sp)); 527 if (error != 0) { 528 vfs_unbusy(mp); 529 return (error); 530 } 531 } 532 sfsp++; 533 } 534 count++; 535 mtx_lock(&mountlist_mtx); 536 nmp = TAILQ_NEXT(mp, mnt_list); 537 vfs_unbusy(mp); 538 } 539 mtx_unlock(&mountlist_mtx); 540 if (sfsp && count > maxcount) 541 *countp = maxcount; 542 else 543 *countp = count; 544 return (0); 545 } 546 547 #ifdef COMPAT_FREEBSD4 548 /* 549 * Get old format filesystem statistics. 550 */ 551 static void cvtstatfs(struct statfs *, struct ostatfs *); 552 553 #ifndef _SYS_SYSPROTO_H_ 554 struct freebsd4_statfs_args { 555 char *path; 556 struct ostatfs *buf; 557 }; 558 #endif 559 int 560 freebsd4_statfs(td, uap) 561 struct thread *td; 562 struct freebsd4_statfs_args /* { 563 char *path; 564 struct ostatfs *buf; 565 } */ *uap; 566 { 567 struct ostatfs osb; 568 struct statfs sf; 569 int error; 570 571 error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf); 572 if (error != 0) 573 return (error); 574 cvtstatfs(&sf, &osb); 575 return (copyout(&osb, uap->buf, sizeof(osb))); 576 } 577 578 /* 579 * Get filesystem statistics. 580 */ 581 #ifndef _SYS_SYSPROTO_H_ 582 struct freebsd4_fstatfs_args { 583 int fd; 584 struct ostatfs *buf; 585 }; 586 #endif 587 int 588 freebsd4_fstatfs(td, uap) 589 struct thread *td; 590 struct freebsd4_fstatfs_args /* { 591 int fd; 592 struct ostatfs *buf; 593 } */ *uap; 594 { 595 struct ostatfs osb; 596 struct statfs sf; 597 int error; 598 599 error = kern_fstatfs(td, uap->fd, &sf); 600 if (error != 0) 601 return (error); 602 cvtstatfs(&sf, &osb); 603 return (copyout(&osb, uap->buf, sizeof(osb))); 604 } 605 606 /* 607 * Get statistics on all filesystems. 608 */ 609 #ifndef _SYS_SYSPROTO_H_ 610 struct freebsd4_getfsstat_args { 611 struct ostatfs *buf; 612 long bufsize; 613 int flags; 614 }; 615 #endif 616 int 617 freebsd4_getfsstat(td, uap) 618 struct thread *td; 619 register struct freebsd4_getfsstat_args /* { 620 struct ostatfs *buf; 621 long bufsize; 622 int flags; 623 } */ *uap; 624 { 625 struct statfs *buf, *sp; 626 struct ostatfs osb; 627 size_t count, size; 628 int error; 629 630 if (uap->bufsize < 0) 631 return (EINVAL); 632 count = uap->bufsize / sizeof(struct ostatfs); 633 if (count > SIZE_MAX / sizeof(struct statfs)) 634 return (EINVAL); 635 size = count * sizeof(struct statfs); 636 error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, 637 uap->flags); 638 td->td_retval[0] = count; 639 if (size != 0) { 640 sp = buf; 641 while (count != 0 && error == 0) { 642 cvtstatfs(sp, &osb); 643 error = copyout(&osb, uap->buf, sizeof(osb)); 644 sp++; 645 uap->buf++; 646 count--; 647 } 648 free(buf, M_TEMP); 649 } 650 return (error); 651 } 652 653 /* 654 * Implement fstatfs() for (NFS) file handles. 655 */ 656 #ifndef _SYS_SYSPROTO_H_ 657 struct freebsd4_fhstatfs_args { 658 struct fhandle *u_fhp; 659 struct ostatfs *buf; 660 }; 661 #endif 662 int 663 freebsd4_fhstatfs(td, uap) 664 struct thread *td; 665 struct freebsd4_fhstatfs_args /* { 666 struct fhandle *u_fhp; 667 struct ostatfs *buf; 668 } */ *uap; 669 { 670 struct ostatfs osb; 671 struct statfs sf; 672 fhandle_t fh; 673 int error; 674 675 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 676 if (error != 0) 677 return (error); 678 error = kern_fhstatfs(td, fh, &sf); 679 if (error != 0) 680 return (error); 681 cvtstatfs(&sf, &osb); 682 return (copyout(&osb, uap->buf, sizeof(osb))); 683 } 684 685 /* 686 * Convert a new format statfs structure to an old format statfs structure. 687 */ 688 static void 689 cvtstatfs(nsp, osp) 690 struct statfs *nsp; 691 struct ostatfs *osp; 692 { 693 694 statfs_scale_blocks(nsp, LONG_MAX); 695 bzero(osp, sizeof(*osp)); 696 osp->f_bsize = nsp->f_bsize; 697 osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX); 698 osp->f_blocks = nsp->f_blocks; 699 osp->f_bfree = nsp->f_bfree; 700 osp->f_bavail = nsp->f_bavail; 701 osp->f_files = MIN(nsp->f_files, LONG_MAX); 702 osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX); 703 osp->f_owner = nsp->f_owner; 704 osp->f_type = nsp->f_type; 705 osp->f_flags = nsp->f_flags; 706 osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX); 707 osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX); 708 osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX); 709 osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX); 710 strlcpy(osp->f_fstypename, nsp->f_fstypename, 711 MIN(MFSNAMELEN, OMFSNAMELEN)); 712 strlcpy(osp->f_mntonname, nsp->f_mntonname, 713 MIN(MNAMELEN, OMNAMELEN)); 714 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 715 MIN(MNAMELEN, OMNAMELEN)); 716 osp->f_fsid = nsp->f_fsid; 717 } 718 #endif /* COMPAT_FREEBSD4 */ 719 720 /* 721 * Change current working directory to a given file descriptor. 722 */ 723 #ifndef _SYS_SYSPROTO_H_ 724 struct fchdir_args { 725 int fd; 726 }; 727 #endif 728 int 729 sys_fchdir(td, uap) 730 struct thread *td; 731 struct fchdir_args /* { 732 int fd; 733 } */ *uap; 734 { 735 struct vnode *vp, *tdp; 736 struct mount *mp; 737 struct file *fp; 738 cap_rights_t rights; 739 int error; 740 741 AUDIT_ARG_FD(uap->fd); 742 error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_FCHDIR), 743 &fp); 744 if (error != 0) 745 return (error); 746 vp = fp->f_vnode; 747 VREF(vp); 748 fdrop(fp, td); 749 vn_lock(vp, LK_SHARED | LK_RETRY); 750 AUDIT_ARG_VNODE1(vp); 751 error = change_dir(vp, td); 752 while (!error && (mp = vp->v_mountedhere) != NULL) { 753 if (vfs_busy(mp, 0)) 754 continue; 755 error = VFS_ROOT(mp, LK_SHARED, &tdp); 756 vfs_unbusy(mp); 757 if (error != 0) 758 break; 759 vput(vp); 760 vp = tdp; 761 } 762 if (error != 0) { 763 vput(vp); 764 return (error); 765 } 766 VOP_UNLOCK(vp, 0); 767 pwd_chdir(td, vp); 768 return (0); 769 } 770 771 /* 772 * Change current working directory (``.''). 773 */ 774 #ifndef _SYS_SYSPROTO_H_ 775 struct chdir_args { 776 char *path; 777 }; 778 #endif 779 int 780 sys_chdir(td, uap) 781 struct thread *td; 782 struct chdir_args /* { 783 char *path; 784 } */ *uap; 785 { 786 787 return (kern_chdir(td, uap->path, UIO_USERSPACE)); 788 } 789 790 int 791 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg) 792 { 793 struct nameidata nd; 794 int error; 795 796 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 797 pathseg, path, td); 798 if ((error = namei(&nd)) != 0) 799 return (error); 800 if ((error = change_dir(nd.ni_vp, td)) != 0) { 801 vput(nd.ni_vp); 802 NDFREE(&nd, NDF_ONLY_PNBUF); 803 return (error); 804 } 805 VOP_UNLOCK(nd.ni_vp, 0); 806 NDFREE(&nd, NDF_ONLY_PNBUF); 807 pwd_chdir(td, nd.ni_vp); 808 return (0); 809 } 810 811 /* 812 * Change notion of root (``/'') directory. 813 */ 814 #ifndef _SYS_SYSPROTO_H_ 815 struct chroot_args { 816 char *path; 817 }; 818 #endif 819 int 820 sys_chroot(td, uap) 821 struct thread *td; 822 struct chroot_args /* { 823 char *path; 824 } */ *uap; 825 { 826 struct nameidata nd; 827 int error; 828 829 error = priv_check(td, PRIV_VFS_CHROOT); 830 if (error != 0) 831 return (error); 832 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 833 UIO_USERSPACE, uap->path, td); 834 error = namei(&nd); 835 if (error != 0) 836 goto error; 837 error = change_dir(nd.ni_vp, td); 838 if (error != 0) 839 goto e_vunlock; 840 #ifdef MAC 841 error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp); 842 if (error != 0) 843 goto e_vunlock; 844 #endif 845 VOP_UNLOCK(nd.ni_vp, 0); 846 error = pwd_chroot(td, nd.ni_vp); 847 vrele(nd.ni_vp); 848 NDFREE(&nd, NDF_ONLY_PNBUF); 849 return (error); 850 e_vunlock: 851 vput(nd.ni_vp); 852 error: 853 NDFREE(&nd, NDF_ONLY_PNBUF); 854 return (error); 855 } 856 857 /* 858 * Common routine for chroot and chdir. Callers must provide a locked vnode 859 * instance. 860 */ 861 int 862 change_dir(vp, td) 863 struct vnode *vp; 864 struct thread *td; 865 { 866 #ifdef MAC 867 int error; 868 #endif 869 870 ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked"); 871 if (vp->v_type != VDIR) 872 return (ENOTDIR); 873 #ifdef MAC 874 error = mac_vnode_check_chdir(td->td_ucred, vp); 875 if (error != 0) 876 return (error); 877 #endif 878 return (VOP_ACCESS(vp, VEXEC, td->td_ucred, td)); 879 } 880 881 static __inline void 882 flags_to_rights(int flags, cap_rights_t *rightsp) 883 { 884 885 if (flags & O_EXEC) { 886 cap_rights_set(rightsp, CAP_FEXECVE); 887 } else { 888 switch ((flags & O_ACCMODE)) { 889 case O_RDONLY: 890 cap_rights_set(rightsp, CAP_READ); 891 break; 892 case O_RDWR: 893 cap_rights_set(rightsp, CAP_READ); 894 /* FALLTHROUGH */ 895 case O_WRONLY: 896 cap_rights_set(rightsp, CAP_WRITE); 897 if (!(flags & (O_APPEND | O_TRUNC))) 898 cap_rights_set(rightsp, CAP_SEEK); 899 break; 900 } 901 } 902 903 if (flags & O_CREAT) 904 cap_rights_set(rightsp, CAP_CREATE); 905 906 if (flags & O_TRUNC) 907 cap_rights_set(rightsp, CAP_FTRUNCATE); 908 909 if (flags & (O_SYNC | O_FSYNC)) 910 cap_rights_set(rightsp, CAP_FSYNC); 911 912 if (flags & (O_EXLOCK | O_SHLOCK)) 913 cap_rights_set(rightsp, CAP_FLOCK); 914 } 915 916 /* 917 * Check permissions, allocate an open file structure, and call the device 918 * open routine if any. 919 */ 920 #ifndef _SYS_SYSPROTO_H_ 921 struct open_args { 922 char *path; 923 int flags; 924 int mode; 925 }; 926 #endif 927 int 928 sys_open(td, uap) 929 struct thread *td; 930 register struct open_args /* { 931 char *path; 932 int flags; 933 int mode; 934 } */ *uap; 935 { 936 937 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 938 uap->flags, uap->mode)); 939 } 940 941 #ifndef _SYS_SYSPROTO_H_ 942 struct openat_args { 943 int fd; 944 char *path; 945 int flag; 946 int mode; 947 }; 948 #endif 949 int 950 sys_openat(struct thread *td, struct openat_args *uap) 951 { 952 953 return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 954 uap->mode)); 955 } 956 957 int 958 kern_openat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 959 int flags, int mode) 960 { 961 struct proc *p = td->td_proc; 962 struct filedesc *fdp = p->p_fd; 963 struct file *fp; 964 struct vnode *vp; 965 struct nameidata nd; 966 cap_rights_t rights; 967 int cmode, error, indx; 968 969 indx = -1; 970 971 AUDIT_ARG_FFLAGS(flags); 972 AUDIT_ARG_MODE(mode); 973 /* XXX: audit dirfd */ 974 cap_rights_init(&rights, CAP_LOOKUP); 975 flags_to_rights(flags, &rights); 976 /* 977 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags 978 * may be specified. 979 */ 980 if (flags & O_EXEC) { 981 if (flags & O_ACCMODE) 982 return (EINVAL); 983 } else if ((flags & O_ACCMODE) == O_ACCMODE) { 984 return (EINVAL); 985 } else { 986 flags = FFLAGS(flags); 987 } 988 989 /* 990 * Allocate a file structure. The descriptor to reference it 991 * is allocated and set by finstall() below. 992 */ 993 error = falloc_noinstall(td, &fp); 994 if (error != 0) 995 return (error); 996 /* 997 * An extra reference on `fp' has been held for us by 998 * falloc_noinstall(). 999 */ 1000 /* Set the flags early so the finit in devfs can pick them up. */ 1001 fp->f_flag = flags & FMASK; 1002 cmode = ((mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; 1003 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 1004 &rights, td); 1005 td->td_dupfd = -1; /* XXX check for fdopen */ 1006 error = vn_open(&nd, &flags, cmode, fp); 1007 if (error != 0) { 1008 /* 1009 * If the vn_open replaced the method vector, something 1010 * wonderous happened deep below and we just pass it up 1011 * pretending we know what we do. 1012 */ 1013 if (error == ENXIO && fp->f_ops != &badfileops) 1014 goto success; 1015 1016 /* 1017 * Handle special fdopen() case. bleh. 1018 * 1019 * Don't do this for relative (capability) lookups; we don't 1020 * understand exactly what would happen, and we don't think 1021 * that it ever should. 1022 */ 1023 if (nd.ni_strictrelative == 0 && 1024 (error == ENODEV || error == ENXIO) && 1025 td->td_dupfd >= 0) { 1026 error = dupfdopen(td, fdp, td->td_dupfd, flags, error, 1027 &indx); 1028 if (error == 0) 1029 goto success; 1030 } 1031 1032 goto bad; 1033 } 1034 td->td_dupfd = 0; 1035 NDFREE(&nd, NDF_ONLY_PNBUF); 1036 vp = nd.ni_vp; 1037 1038 /* 1039 * Store the vnode, for any f_type. Typically, the vnode use 1040 * count is decremented by direct call to vn_closefile() for 1041 * files that switched type in the cdevsw fdopen() method. 1042 */ 1043 fp->f_vnode = vp; 1044 /* 1045 * If the file wasn't claimed by devfs bind it to the normal 1046 * vnode operations here. 1047 */ 1048 if (fp->f_ops == &badfileops) { 1049 KASSERT(vp->v_type != VFIFO, ("Unexpected fifo.")); 1050 fp->f_seqcount = 1; 1051 finit(fp, (flags & FMASK) | (fp->f_flag & FHASLOCK), 1052 DTYPE_VNODE, vp, &vnops); 1053 } 1054 1055 VOP_UNLOCK(vp, 0); 1056 if (flags & O_TRUNC) { 1057 error = fo_truncate(fp, 0, td->td_ucred, td); 1058 if (error != 0) 1059 goto bad; 1060 } 1061 success: 1062 /* 1063 * If we haven't already installed the FD (for dupfdopen), do so now. 1064 */ 1065 if (indx == -1) { 1066 struct filecaps *fcaps; 1067 1068 #ifdef CAPABILITIES 1069 if (nd.ni_strictrelative == 1) 1070 fcaps = &nd.ni_filecaps; 1071 else 1072 #endif 1073 fcaps = NULL; 1074 error = finstall(td, fp, &indx, flags, fcaps); 1075 /* On success finstall() consumes fcaps. */ 1076 if (error != 0) { 1077 filecaps_free(&nd.ni_filecaps); 1078 goto bad; 1079 } 1080 } else { 1081 filecaps_free(&nd.ni_filecaps); 1082 } 1083 1084 /* 1085 * Release our private reference, leaving the one associated with 1086 * the descriptor table intact. 1087 */ 1088 fdrop(fp, td); 1089 td->td_retval[0] = indx; 1090 return (0); 1091 bad: 1092 KASSERT(indx == -1, ("indx=%d, should be -1", indx)); 1093 fdrop(fp, td); 1094 return (error); 1095 } 1096 1097 #ifdef COMPAT_43 1098 /* 1099 * Create a file. 1100 */ 1101 #ifndef _SYS_SYSPROTO_H_ 1102 struct ocreat_args { 1103 char *path; 1104 int mode; 1105 }; 1106 #endif 1107 int 1108 ocreat(td, uap) 1109 struct thread *td; 1110 register struct ocreat_args /* { 1111 char *path; 1112 int mode; 1113 } */ *uap; 1114 { 1115 1116 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1117 O_WRONLY | O_CREAT | O_TRUNC, uap->mode)); 1118 } 1119 #endif /* COMPAT_43 */ 1120 1121 /* 1122 * Create a special file. 1123 */ 1124 #ifndef _SYS_SYSPROTO_H_ 1125 struct mknod_args { 1126 char *path; 1127 int mode; 1128 int dev; 1129 }; 1130 #endif 1131 int 1132 sys_mknod(td, uap) 1133 struct thread *td; 1134 register struct mknod_args /* { 1135 char *path; 1136 int mode; 1137 int dev; 1138 } */ *uap; 1139 { 1140 1141 return (kern_mknodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1142 uap->mode, uap->dev)); 1143 } 1144 1145 #ifndef _SYS_SYSPROTO_H_ 1146 struct mknodat_args { 1147 int fd; 1148 char *path; 1149 mode_t mode; 1150 dev_t dev; 1151 }; 1152 #endif 1153 int 1154 sys_mknodat(struct thread *td, struct mknodat_args *uap) 1155 { 1156 1157 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1158 uap->dev)); 1159 } 1160 1161 int 1162 kern_mknodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1163 int mode, int dev) 1164 { 1165 struct vnode *vp; 1166 struct mount *mp; 1167 struct vattr vattr; 1168 struct nameidata nd; 1169 cap_rights_t rights; 1170 int error, whiteout = 0; 1171 1172 AUDIT_ARG_MODE(mode); 1173 AUDIT_ARG_DEV(dev); 1174 switch (mode & S_IFMT) { 1175 case S_IFCHR: 1176 case S_IFBLK: 1177 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 1178 break; 1179 case S_IFMT: 1180 error = priv_check(td, PRIV_VFS_MKNOD_BAD); 1181 break; 1182 case S_IFWHT: 1183 error = priv_check(td, PRIV_VFS_MKNOD_WHT); 1184 break; 1185 case S_IFIFO: 1186 if (dev == 0) 1187 return (kern_mkfifoat(td, fd, path, pathseg, mode)); 1188 /* FALLTHROUGH */ 1189 default: 1190 error = EINVAL; 1191 break; 1192 } 1193 if (error != 0) 1194 return (error); 1195 restart: 1196 bwillwrite(); 1197 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1198 NOCACHE, pathseg, path, fd, cap_rights_init(&rights, CAP_MKNODAT), 1199 td); 1200 if ((error = namei(&nd)) != 0) 1201 return (error); 1202 vp = nd.ni_vp; 1203 if (vp != NULL) { 1204 NDFREE(&nd, NDF_ONLY_PNBUF); 1205 if (vp == nd.ni_dvp) 1206 vrele(nd.ni_dvp); 1207 else 1208 vput(nd.ni_dvp); 1209 vrele(vp); 1210 return (EEXIST); 1211 } else { 1212 VATTR_NULL(&vattr); 1213 vattr.va_mode = (mode & ALLPERMS) & 1214 ~td->td_proc->p_fd->fd_cmask; 1215 vattr.va_rdev = dev; 1216 whiteout = 0; 1217 1218 switch (mode & S_IFMT) { 1219 case S_IFMT: /* used by badsect to flag bad sectors */ 1220 vattr.va_type = VBAD; 1221 break; 1222 case S_IFCHR: 1223 vattr.va_type = VCHR; 1224 break; 1225 case S_IFBLK: 1226 vattr.va_type = VBLK; 1227 break; 1228 case S_IFWHT: 1229 whiteout = 1; 1230 break; 1231 default: 1232 panic("kern_mknod: invalid mode"); 1233 } 1234 } 1235 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1236 NDFREE(&nd, NDF_ONLY_PNBUF); 1237 vput(nd.ni_dvp); 1238 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1239 return (error); 1240 goto restart; 1241 } 1242 #ifdef MAC 1243 if (error == 0 && !whiteout) 1244 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, 1245 &nd.ni_cnd, &vattr); 1246 #endif 1247 if (error == 0) { 1248 if (whiteout) 1249 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1250 else { 1251 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1252 &nd.ni_cnd, &vattr); 1253 if (error == 0) 1254 vput(nd.ni_vp); 1255 } 1256 } 1257 NDFREE(&nd, NDF_ONLY_PNBUF); 1258 vput(nd.ni_dvp); 1259 vn_finished_write(mp); 1260 return (error); 1261 } 1262 1263 /* 1264 * Create a named pipe. 1265 */ 1266 #ifndef _SYS_SYSPROTO_H_ 1267 struct mkfifo_args { 1268 char *path; 1269 int mode; 1270 }; 1271 #endif 1272 int 1273 sys_mkfifo(td, uap) 1274 struct thread *td; 1275 register struct mkfifo_args /* { 1276 char *path; 1277 int mode; 1278 } */ *uap; 1279 { 1280 1281 return (kern_mkfifoat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1282 uap->mode)); 1283 } 1284 1285 #ifndef _SYS_SYSPROTO_H_ 1286 struct mkfifoat_args { 1287 int fd; 1288 char *path; 1289 mode_t mode; 1290 }; 1291 #endif 1292 int 1293 sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap) 1294 { 1295 1296 return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE, 1297 uap->mode)); 1298 } 1299 1300 int 1301 kern_mkfifoat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1302 int mode) 1303 { 1304 struct mount *mp; 1305 struct vattr vattr; 1306 struct nameidata nd; 1307 cap_rights_t rights; 1308 int error; 1309 1310 AUDIT_ARG_MODE(mode); 1311 restart: 1312 bwillwrite(); 1313 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1314 NOCACHE, pathseg, path, fd, cap_rights_init(&rights, CAP_MKFIFOAT), 1315 td); 1316 if ((error = namei(&nd)) != 0) 1317 return (error); 1318 if (nd.ni_vp != NULL) { 1319 NDFREE(&nd, NDF_ONLY_PNBUF); 1320 if (nd.ni_vp == nd.ni_dvp) 1321 vrele(nd.ni_dvp); 1322 else 1323 vput(nd.ni_dvp); 1324 vrele(nd.ni_vp); 1325 return (EEXIST); 1326 } 1327 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1328 NDFREE(&nd, NDF_ONLY_PNBUF); 1329 vput(nd.ni_dvp); 1330 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1331 return (error); 1332 goto restart; 1333 } 1334 VATTR_NULL(&vattr); 1335 vattr.va_type = VFIFO; 1336 vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask; 1337 #ifdef MAC 1338 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1339 &vattr); 1340 if (error != 0) 1341 goto out; 1342 #endif 1343 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 1344 if (error == 0) 1345 vput(nd.ni_vp); 1346 #ifdef MAC 1347 out: 1348 #endif 1349 vput(nd.ni_dvp); 1350 vn_finished_write(mp); 1351 NDFREE(&nd, NDF_ONLY_PNBUF); 1352 return (error); 1353 } 1354 1355 /* 1356 * Make a hard file link. 1357 */ 1358 #ifndef _SYS_SYSPROTO_H_ 1359 struct link_args { 1360 char *path; 1361 char *link; 1362 }; 1363 #endif 1364 int 1365 sys_link(td, uap) 1366 struct thread *td; 1367 register struct link_args /* { 1368 char *path; 1369 char *link; 1370 } */ *uap; 1371 { 1372 1373 return (kern_linkat(td, AT_FDCWD, AT_FDCWD, uap->path, uap->link, 1374 UIO_USERSPACE, FOLLOW)); 1375 } 1376 1377 #ifndef _SYS_SYSPROTO_H_ 1378 struct linkat_args { 1379 int fd1; 1380 char *path1; 1381 int fd2; 1382 char *path2; 1383 int flag; 1384 }; 1385 #endif 1386 int 1387 sys_linkat(struct thread *td, struct linkat_args *uap) 1388 { 1389 int flag; 1390 1391 flag = uap->flag; 1392 if (flag & ~AT_SYMLINK_FOLLOW) 1393 return (EINVAL); 1394 1395 return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2, 1396 UIO_USERSPACE, (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW)); 1397 } 1398 1399 int hardlink_check_uid = 0; 1400 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 1401 &hardlink_check_uid, 0, 1402 "Unprivileged processes cannot create hard links to files owned by other " 1403 "users"); 1404 static int hardlink_check_gid = 0; 1405 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 1406 &hardlink_check_gid, 0, 1407 "Unprivileged processes cannot create hard links to files owned by other " 1408 "groups"); 1409 1410 static int 1411 can_hardlink(struct vnode *vp, struct ucred *cred) 1412 { 1413 struct vattr va; 1414 int error; 1415 1416 if (!hardlink_check_uid && !hardlink_check_gid) 1417 return (0); 1418 1419 error = VOP_GETATTR(vp, &va, cred); 1420 if (error != 0) 1421 return (error); 1422 1423 if (hardlink_check_uid && cred->cr_uid != va.va_uid) { 1424 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1425 if (error != 0) 1426 return (error); 1427 } 1428 1429 if (hardlink_check_gid && !groupmember(va.va_gid, cred)) { 1430 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1431 if (error != 0) 1432 return (error); 1433 } 1434 1435 return (0); 1436 } 1437 1438 int 1439 kern_linkat(struct thread *td, int fd1, int fd2, char *path1, char *path2, 1440 enum uio_seg segflg, int follow) 1441 { 1442 struct vnode *vp; 1443 struct mount *mp; 1444 struct nameidata nd; 1445 cap_rights_t rights; 1446 int error; 1447 1448 again: 1449 bwillwrite(); 1450 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, segflg, path1, fd1, 1451 cap_rights_init(&rights, CAP_LINKAT_SOURCE), td); 1452 1453 if ((error = namei(&nd)) != 0) 1454 return (error); 1455 NDFREE(&nd, NDF_ONLY_PNBUF); 1456 vp = nd.ni_vp; 1457 if (vp->v_type == VDIR) { 1458 vrele(vp); 1459 return (EPERM); /* POSIX */ 1460 } 1461 NDINIT_ATRIGHTS(&nd, CREATE, 1462 LOCKPARENT | SAVENAME | AUDITVNODE2 | NOCACHE, segflg, path2, fd2, 1463 cap_rights_init(&rights, CAP_LINKAT_TARGET), td); 1464 if ((error = namei(&nd)) == 0) { 1465 if (nd.ni_vp != NULL) { 1466 NDFREE(&nd, NDF_ONLY_PNBUF); 1467 if (nd.ni_dvp == nd.ni_vp) 1468 vrele(nd.ni_dvp); 1469 else 1470 vput(nd.ni_dvp); 1471 vrele(nd.ni_vp); 1472 vrele(vp); 1473 return (EEXIST); 1474 } else if (nd.ni_dvp->v_mount != vp->v_mount) { 1475 /* 1476 * Cross-device link. No need to recheck 1477 * vp->v_type, since it cannot change, except 1478 * to VBAD. 1479 */ 1480 NDFREE(&nd, NDF_ONLY_PNBUF); 1481 vput(nd.ni_dvp); 1482 vrele(vp); 1483 return (EXDEV); 1484 } else if ((error = vn_lock(vp, LK_EXCLUSIVE)) == 0) { 1485 error = can_hardlink(vp, td->td_ucred); 1486 #ifdef MAC 1487 if (error == 0) 1488 error = mac_vnode_check_link(td->td_ucred, 1489 nd.ni_dvp, vp, &nd.ni_cnd); 1490 #endif 1491 if (error != 0) { 1492 vput(vp); 1493 vput(nd.ni_dvp); 1494 NDFREE(&nd, NDF_ONLY_PNBUF); 1495 return (error); 1496 } 1497 error = vn_start_write(vp, &mp, V_NOWAIT); 1498 if (error != 0) { 1499 vput(vp); 1500 vput(nd.ni_dvp); 1501 NDFREE(&nd, NDF_ONLY_PNBUF); 1502 error = vn_start_write(NULL, &mp, 1503 V_XSLEEP | PCATCH); 1504 if (error != 0) 1505 return (error); 1506 goto again; 1507 } 1508 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 1509 VOP_UNLOCK(vp, 0); 1510 vput(nd.ni_dvp); 1511 vn_finished_write(mp); 1512 NDFREE(&nd, NDF_ONLY_PNBUF); 1513 } else { 1514 vput(nd.ni_dvp); 1515 NDFREE(&nd, NDF_ONLY_PNBUF); 1516 vrele(vp); 1517 goto again; 1518 } 1519 } 1520 vrele(vp); 1521 return (error); 1522 } 1523 1524 /* 1525 * Make a symbolic link. 1526 */ 1527 #ifndef _SYS_SYSPROTO_H_ 1528 struct symlink_args { 1529 char *path; 1530 char *link; 1531 }; 1532 #endif 1533 int 1534 sys_symlink(td, uap) 1535 struct thread *td; 1536 register struct symlink_args /* { 1537 char *path; 1538 char *link; 1539 } */ *uap; 1540 { 1541 1542 return (kern_symlinkat(td, uap->path, AT_FDCWD, uap->link, 1543 UIO_USERSPACE)); 1544 } 1545 1546 #ifndef _SYS_SYSPROTO_H_ 1547 struct symlinkat_args { 1548 char *path; 1549 int fd; 1550 char *path2; 1551 }; 1552 #endif 1553 int 1554 sys_symlinkat(struct thread *td, struct symlinkat_args *uap) 1555 { 1556 1557 return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2, 1558 UIO_USERSPACE)); 1559 } 1560 1561 int 1562 kern_symlinkat(struct thread *td, char *path1, int fd, char *path2, 1563 enum uio_seg segflg) 1564 { 1565 struct mount *mp; 1566 struct vattr vattr; 1567 char *syspath; 1568 struct nameidata nd; 1569 int error; 1570 cap_rights_t rights; 1571 1572 if (segflg == UIO_SYSSPACE) { 1573 syspath = path1; 1574 } else { 1575 syspath = uma_zalloc(namei_zone, M_WAITOK); 1576 if ((error = copyinstr(path1, syspath, MAXPATHLEN, NULL)) != 0) 1577 goto out; 1578 } 1579 AUDIT_ARG_TEXT(syspath); 1580 restart: 1581 bwillwrite(); 1582 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1583 NOCACHE, segflg, path2, fd, cap_rights_init(&rights, CAP_SYMLINKAT), 1584 td); 1585 if ((error = namei(&nd)) != 0) 1586 goto out; 1587 if (nd.ni_vp) { 1588 NDFREE(&nd, NDF_ONLY_PNBUF); 1589 if (nd.ni_vp == nd.ni_dvp) 1590 vrele(nd.ni_dvp); 1591 else 1592 vput(nd.ni_dvp); 1593 vrele(nd.ni_vp); 1594 error = EEXIST; 1595 goto out; 1596 } 1597 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1598 NDFREE(&nd, NDF_ONLY_PNBUF); 1599 vput(nd.ni_dvp); 1600 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1601 goto out; 1602 goto restart; 1603 } 1604 VATTR_NULL(&vattr); 1605 vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask; 1606 #ifdef MAC 1607 vattr.va_type = VLNK; 1608 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1609 &vattr); 1610 if (error != 0) 1611 goto out2; 1612 #endif 1613 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath); 1614 if (error == 0) 1615 vput(nd.ni_vp); 1616 #ifdef MAC 1617 out2: 1618 #endif 1619 NDFREE(&nd, NDF_ONLY_PNBUF); 1620 vput(nd.ni_dvp); 1621 vn_finished_write(mp); 1622 out: 1623 if (segflg != UIO_SYSSPACE) 1624 uma_zfree(namei_zone, syspath); 1625 return (error); 1626 } 1627 1628 /* 1629 * Delete a whiteout from the filesystem. 1630 */ 1631 int 1632 sys_undelete(td, uap) 1633 struct thread *td; 1634 register struct undelete_args /* { 1635 char *path; 1636 } */ *uap; 1637 { 1638 struct mount *mp; 1639 struct nameidata nd; 1640 int error; 1641 1642 restart: 1643 bwillwrite(); 1644 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | AUDITVNODE1, 1645 UIO_USERSPACE, uap->path, td); 1646 error = namei(&nd); 1647 if (error != 0) 1648 return (error); 1649 1650 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 1651 NDFREE(&nd, NDF_ONLY_PNBUF); 1652 if (nd.ni_vp == nd.ni_dvp) 1653 vrele(nd.ni_dvp); 1654 else 1655 vput(nd.ni_dvp); 1656 if (nd.ni_vp) 1657 vrele(nd.ni_vp); 1658 return (EEXIST); 1659 } 1660 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1661 NDFREE(&nd, NDF_ONLY_PNBUF); 1662 vput(nd.ni_dvp); 1663 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1664 return (error); 1665 goto restart; 1666 } 1667 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE); 1668 NDFREE(&nd, NDF_ONLY_PNBUF); 1669 vput(nd.ni_dvp); 1670 vn_finished_write(mp); 1671 return (error); 1672 } 1673 1674 /* 1675 * Delete a name from the filesystem. 1676 */ 1677 #ifndef _SYS_SYSPROTO_H_ 1678 struct unlink_args { 1679 char *path; 1680 }; 1681 #endif 1682 int 1683 sys_unlink(td, uap) 1684 struct thread *td; 1685 struct unlink_args /* { 1686 char *path; 1687 } */ *uap; 1688 { 1689 1690 return (kern_unlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 0)); 1691 } 1692 1693 #ifndef _SYS_SYSPROTO_H_ 1694 struct unlinkat_args { 1695 int fd; 1696 char *path; 1697 int flag; 1698 }; 1699 #endif 1700 int 1701 sys_unlinkat(struct thread *td, struct unlinkat_args *uap) 1702 { 1703 int flag = uap->flag; 1704 int fd = uap->fd; 1705 char *path = uap->path; 1706 1707 if (flag & ~AT_REMOVEDIR) 1708 return (EINVAL); 1709 1710 if (flag & AT_REMOVEDIR) 1711 return (kern_rmdirat(td, fd, path, UIO_USERSPACE)); 1712 else 1713 return (kern_unlinkat(td, fd, path, UIO_USERSPACE, 0)); 1714 } 1715 1716 int 1717 kern_unlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1718 ino_t oldinum) 1719 { 1720 struct mount *mp; 1721 struct vnode *vp; 1722 struct nameidata nd; 1723 struct stat sb; 1724 cap_rights_t rights; 1725 int error; 1726 1727 restart: 1728 bwillwrite(); 1729 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 1730 pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td); 1731 if ((error = namei(&nd)) != 0) 1732 return (error == EINVAL ? EPERM : error); 1733 vp = nd.ni_vp; 1734 if (vp->v_type == VDIR && oldinum == 0) { 1735 error = EPERM; /* POSIX */ 1736 } else if (oldinum != 0 && 1737 ((error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td)) == 0) && 1738 sb.st_ino != oldinum) { 1739 error = EIDRM; /* Identifier removed */ 1740 } else { 1741 /* 1742 * The root of a mounted filesystem cannot be deleted. 1743 * 1744 * XXX: can this only be a VDIR case? 1745 */ 1746 if (vp->v_vflag & VV_ROOT) 1747 error = EBUSY; 1748 } 1749 if (error == 0) { 1750 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1751 NDFREE(&nd, NDF_ONLY_PNBUF); 1752 vput(nd.ni_dvp); 1753 if (vp == nd.ni_dvp) 1754 vrele(vp); 1755 else 1756 vput(vp); 1757 if ((error = vn_start_write(NULL, &mp, 1758 V_XSLEEP | PCATCH)) != 0) 1759 return (error); 1760 goto restart; 1761 } 1762 #ifdef MAC 1763 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 1764 &nd.ni_cnd); 1765 if (error != 0) 1766 goto out; 1767 #endif 1768 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 1769 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 1770 #ifdef MAC 1771 out: 1772 #endif 1773 vn_finished_write(mp); 1774 } 1775 NDFREE(&nd, NDF_ONLY_PNBUF); 1776 vput(nd.ni_dvp); 1777 if (vp == nd.ni_dvp) 1778 vrele(vp); 1779 else 1780 vput(vp); 1781 return (error); 1782 } 1783 1784 /* 1785 * Reposition read/write file offset. 1786 */ 1787 #ifndef _SYS_SYSPROTO_H_ 1788 struct lseek_args { 1789 int fd; 1790 int pad; 1791 off_t offset; 1792 int whence; 1793 }; 1794 #endif 1795 int 1796 sys_lseek(td, uap) 1797 struct thread *td; 1798 register struct lseek_args /* { 1799 int fd; 1800 int pad; 1801 off_t offset; 1802 int whence; 1803 } */ *uap; 1804 { 1805 struct file *fp; 1806 cap_rights_t rights; 1807 int error; 1808 1809 AUDIT_ARG_FD(uap->fd); 1810 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_SEEK), &fp); 1811 if (error != 0) 1812 return (error); 1813 error = (fp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0 ? 1814 fo_seek(fp, uap->offset, uap->whence, td) : ESPIPE; 1815 fdrop(fp, td); 1816 return (error); 1817 } 1818 1819 #if defined(COMPAT_43) 1820 /* 1821 * Reposition read/write file offset. 1822 */ 1823 #ifndef _SYS_SYSPROTO_H_ 1824 struct olseek_args { 1825 int fd; 1826 long offset; 1827 int whence; 1828 }; 1829 #endif 1830 int 1831 olseek(td, uap) 1832 struct thread *td; 1833 register struct olseek_args /* { 1834 int fd; 1835 long offset; 1836 int whence; 1837 } */ *uap; 1838 { 1839 struct lseek_args /* { 1840 int fd; 1841 int pad; 1842 off_t offset; 1843 int whence; 1844 } */ nuap; 1845 1846 nuap.fd = uap->fd; 1847 nuap.offset = uap->offset; 1848 nuap.whence = uap->whence; 1849 return (sys_lseek(td, &nuap)); 1850 } 1851 #endif /* COMPAT_43 */ 1852 1853 #if defined(COMPAT_FREEBSD6) 1854 /* Version with the 'pad' argument */ 1855 int 1856 freebsd6_lseek(td, uap) 1857 struct thread *td; 1858 register struct freebsd6_lseek_args *uap; 1859 { 1860 struct lseek_args ouap; 1861 1862 ouap.fd = uap->fd; 1863 ouap.offset = uap->offset; 1864 ouap.whence = uap->whence; 1865 return (sys_lseek(td, &ouap)); 1866 } 1867 #endif 1868 1869 /* 1870 * Check access permissions using passed credentials. 1871 */ 1872 static int 1873 vn_access(vp, user_flags, cred, td) 1874 struct vnode *vp; 1875 int user_flags; 1876 struct ucred *cred; 1877 struct thread *td; 1878 { 1879 accmode_t accmode; 1880 int error; 1881 1882 /* Flags == 0 means only check for existence. */ 1883 if (user_flags == 0) 1884 return (0); 1885 1886 accmode = 0; 1887 if (user_flags & R_OK) 1888 accmode |= VREAD; 1889 if (user_flags & W_OK) 1890 accmode |= VWRITE; 1891 if (user_flags & X_OK) 1892 accmode |= VEXEC; 1893 #ifdef MAC 1894 error = mac_vnode_check_access(cred, vp, accmode); 1895 if (error != 0) 1896 return (error); 1897 #endif 1898 if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) 1899 error = VOP_ACCESS(vp, accmode, cred, td); 1900 return (error); 1901 } 1902 1903 /* 1904 * Check access permissions using "real" credentials. 1905 */ 1906 #ifndef _SYS_SYSPROTO_H_ 1907 struct access_args { 1908 char *path; 1909 int amode; 1910 }; 1911 #endif 1912 int 1913 sys_access(td, uap) 1914 struct thread *td; 1915 register struct access_args /* { 1916 char *path; 1917 int amode; 1918 } */ *uap; 1919 { 1920 1921 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1922 0, uap->amode)); 1923 } 1924 1925 #ifndef _SYS_SYSPROTO_H_ 1926 struct faccessat_args { 1927 int dirfd; 1928 char *path; 1929 int amode; 1930 int flag; 1931 } 1932 #endif 1933 int 1934 sys_faccessat(struct thread *td, struct faccessat_args *uap) 1935 { 1936 1937 return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 1938 uap->amode)); 1939 } 1940 1941 int 1942 kern_accessat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1943 int flag, int amode) 1944 { 1945 struct ucred *cred, *usecred; 1946 struct vnode *vp; 1947 struct nameidata nd; 1948 cap_rights_t rights; 1949 int error; 1950 1951 if (flag & ~AT_EACCESS) 1952 return (EINVAL); 1953 if (amode != F_OK && (amode & ~(R_OK | W_OK | X_OK)) != 0) 1954 return (EINVAL); 1955 1956 /* 1957 * Create and modify a temporary credential instead of one that 1958 * is potentially shared (if we need one). 1959 */ 1960 cred = td->td_ucred; 1961 if ((flag & AT_EACCESS) == 0 && 1962 ((cred->cr_uid != cred->cr_ruid || 1963 cred->cr_rgid != cred->cr_groups[0]))) { 1964 usecred = crdup(cred); 1965 usecred->cr_uid = cred->cr_ruid; 1966 usecred->cr_groups[0] = cred->cr_rgid; 1967 td->td_ucred = usecred; 1968 } else 1969 usecred = cred; 1970 AUDIT_ARG_VALUE(amode); 1971 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | 1972 AUDITVNODE1, pathseg, path, fd, cap_rights_init(&rights, CAP_FSTAT), 1973 td); 1974 if ((error = namei(&nd)) != 0) 1975 goto out; 1976 vp = nd.ni_vp; 1977 1978 error = vn_access(vp, amode, usecred, td); 1979 NDFREE(&nd, NDF_ONLY_PNBUF); 1980 vput(vp); 1981 out: 1982 if (usecred != cred) { 1983 td->td_ucred = cred; 1984 crfree(usecred); 1985 } 1986 return (error); 1987 } 1988 1989 /* 1990 * Check access permissions using "effective" credentials. 1991 */ 1992 #ifndef _SYS_SYSPROTO_H_ 1993 struct eaccess_args { 1994 char *path; 1995 int amode; 1996 }; 1997 #endif 1998 int 1999 sys_eaccess(td, uap) 2000 struct thread *td; 2001 register struct eaccess_args /* { 2002 char *path; 2003 int amode; 2004 } */ *uap; 2005 { 2006 2007 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2008 AT_EACCESS, uap->amode)); 2009 } 2010 2011 #if defined(COMPAT_43) 2012 /* 2013 * Get file status; this version follows links. 2014 */ 2015 #ifndef _SYS_SYSPROTO_H_ 2016 struct ostat_args { 2017 char *path; 2018 struct ostat *ub; 2019 }; 2020 #endif 2021 int 2022 ostat(td, uap) 2023 struct thread *td; 2024 register struct ostat_args /* { 2025 char *path; 2026 struct ostat *ub; 2027 } */ *uap; 2028 { 2029 struct stat sb; 2030 struct ostat osb; 2031 int error; 2032 2033 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2034 &sb, NULL); 2035 if (error != 0) 2036 return (error); 2037 cvtstat(&sb, &osb); 2038 return (copyout(&osb, uap->ub, sizeof (osb))); 2039 } 2040 2041 /* 2042 * Get file status; this version does not follow links. 2043 */ 2044 #ifndef _SYS_SYSPROTO_H_ 2045 struct olstat_args { 2046 char *path; 2047 struct ostat *ub; 2048 }; 2049 #endif 2050 int 2051 olstat(td, uap) 2052 struct thread *td; 2053 register struct olstat_args /* { 2054 char *path; 2055 struct ostat *ub; 2056 } */ *uap; 2057 { 2058 struct stat sb; 2059 struct ostat osb; 2060 int error; 2061 2062 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2063 UIO_USERSPACE, &sb, NULL); 2064 if (error != 0) 2065 return (error); 2066 cvtstat(&sb, &osb); 2067 return (copyout(&osb, uap->ub, sizeof (osb))); 2068 } 2069 2070 /* 2071 * Convert from an old to a new stat structure. 2072 */ 2073 void 2074 cvtstat(st, ost) 2075 struct stat *st; 2076 struct ostat *ost; 2077 { 2078 2079 ost->st_dev = st->st_dev; 2080 ost->st_ino = st->st_ino; 2081 ost->st_mode = st->st_mode; 2082 ost->st_nlink = st->st_nlink; 2083 ost->st_uid = st->st_uid; 2084 ost->st_gid = st->st_gid; 2085 ost->st_rdev = st->st_rdev; 2086 if (st->st_size < (quad_t)1 << 32) 2087 ost->st_size = st->st_size; 2088 else 2089 ost->st_size = -2; 2090 ost->st_atim = st->st_atim; 2091 ost->st_mtim = st->st_mtim; 2092 ost->st_ctim = st->st_ctim; 2093 ost->st_blksize = st->st_blksize; 2094 ost->st_blocks = st->st_blocks; 2095 ost->st_flags = st->st_flags; 2096 ost->st_gen = st->st_gen; 2097 } 2098 #endif /* COMPAT_43 */ 2099 2100 /* 2101 * Get file status; this version follows links. 2102 */ 2103 #ifndef _SYS_SYSPROTO_H_ 2104 struct stat_args { 2105 char *path; 2106 struct stat *ub; 2107 }; 2108 #endif 2109 int 2110 sys_stat(td, uap) 2111 struct thread *td; 2112 register struct stat_args /* { 2113 char *path; 2114 struct stat *ub; 2115 } */ *uap; 2116 { 2117 struct stat sb; 2118 int error; 2119 2120 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2121 &sb, NULL); 2122 if (error == 0) 2123 error = copyout(&sb, uap->ub, sizeof (sb)); 2124 return (error); 2125 } 2126 2127 #ifndef _SYS_SYSPROTO_H_ 2128 struct fstatat_args { 2129 int fd; 2130 char *path; 2131 struct stat *buf; 2132 int flag; 2133 } 2134 #endif 2135 int 2136 sys_fstatat(struct thread *td, struct fstatat_args *uap) 2137 { 2138 struct stat sb; 2139 int error; 2140 2141 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2142 UIO_USERSPACE, &sb, NULL); 2143 if (error == 0) 2144 error = copyout(&sb, uap->buf, sizeof (sb)); 2145 return (error); 2146 } 2147 2148 int 2149 kern_statat(struct thread *td, int flag, int fd, char *path, 2150 enum uio_seg pathseg, struct stat *sbp, 2151 void (*hook)(struct vnode *vp, struct stat *sbp)) 2152 { 2153 struct nameidata nd; 2154 struct stat sb; 2155 cap_rights_t rights; 2156 int error; 2157 2158 if (flag & ~AT_SYMLINK_NOFOLLOW) 2159 return (EINVAL); 2160 2161 NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : 2162 FOLLOW) | LOCKSHARED | LOCKLEAF | AUDITVNODE1, pathseg, path, fd, 2163 cap_rights_init(&rights, CAP_FSTAT), td); 2164 2165 if ((error = namei(&nd)) != 0) 2166 return (error); 2167 error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td); 2168 if (error == 0) { 2169 SDT_PROBE2(vfs, , stat, mode, path, sb.st_mode); 2170 if (S_ISREG(sb.st_mode)) 2171 SDT_PROBE2(vfs, , stat, reg, path, pathseg); 2172 if (__predict_false(hook != NULL)) 2173 hook(nd.ni_vp, &sb); 2174 } 2175 NDFREE(&nd, NDF_ONLY_PNBUF); 2176 vput(nd.ni_vp); 2177 if (error != 0) 2178 return (error); 2179 *sbp = sb; 2180 #ifdef KTRACE 2181 if (KTRPOINT(td, KTR_STRUCT)) 2182 ktrstat(&sb); 2183 #endif 2184 return (0); 2185 } 2186 2187 /* 2188 * Get file status; this version does not follow links. 2189 */ 2190 #ifndef _SYS_SYSPROTO_H_ 2191 struct lstat_args { 2192 char *path; 2193 struct stat *ub; 2194 }; 2195 #endif 2196 int 2197 sys_lstat(td, uap) 2198 struct thread *td; 2199 register struct lstat_args /* { 2200 char *path; 2201 struct stat *ub; 2202 } */ *uap; 2203 { 2204 struct stat sb; 2205 int error; 2206 2207 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2208 UIO_USERSPACE, &sb, NULL); 2209 if (error == 0) 2210 error = copyout(&sb, uap->ub, sizeof (sb)); 2211 return (error); 2212 } 2213 2214 /* 2215 * Implementation of the NetBSD [l]stat() functions. 2216 */ 2217 void 2218 cvtnstat(sb, nsb) 2219 struct stat *sb; 2220 struct nstat *nsb; 2221 { 2222 2223 bzero(nsb, sizeof *nsb); 2224 nsb->st_dev = sb->st_dev; 2225 nsb->st_ino = sb->st_ino; 2226 nsb->st_mode = sb->st_mode; 2227 nsb->st_nlink = sb->st_nlink; 2228 nsb->st_uid = sb->st_uid; 2229 nsb->st_gid = sb->st_gid; 2230 nsb->st_rdev = sb->st_rdev; 2231 nsb->st_atim = sb->st_atim; 2232 nsb->st_mtim = sb->st_mtim; 2233 nsb->st_ctim = sb->st_ctim; 2234 nsb->st_size = sb->st_size; 2235 nsb->st_blocks = sb->st_blocks; 2236 nsb->st_blksize = sb->st_blksize; 2237 nsb->st_flags = sb->st_flags; 2238 nsb->st_gen = sb->st_gen; 2239 nsb->st_birthtim = sb->st_birthtim; 2240 } 2241 2242 #ifndef _SYS_SYSPROTO_H_ 2243 struct nstat_args { 2244 char *path; 2245 struct nstat *ub; 2246 }; 2247 #endif 2248 int 2249 sys_nstat(td, uap) 2250 struct thread *td; 2251 register struct nstat_args /* { 2252 char *path; 2253 struct nstat *ub; 2254 } */ *uap; 2255 { 2256 struct stat sb; 2257 struct nstat nsb; 2258 int error; 2259 2260 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2261 &sb, NULL); 2262 if (error != 0) 2263 return (error); 2264 cvtnstat(&sb, &nsb); 2265 return (copyout(&nsb, uap->ub, sizeof (nsb))); 2266 } 2267 2268 /* 2269 * NetBSD lstat. Get file status; this version does not follow links. 2270 */ 2271 #ifndef _SYS_SYSPROTO_H_ 2272 struct lstat_args { 2273 char *path; 2274 struct stat *ub; 2275 }; 2276 #endif 2277 int 2278 sys_nlstat(td, uap) 2279 struct thread *td; 2280 register struct nlstat_args /* { 2281 char *path; 2282 struct nstat *ub; 2283 } */ *uap; 2284 { 2285 struct stat sb; 2286 struct nstat nsb; 2287 int error; 2288 2289 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2290 UIO_USERSPACE, &sb, NULL); 2291 if (error != 0) 2292 return (error); 2293 cvtnstat(&sb, &nsb); 2294 return (copyout(&nsb, uap->ub, sizeof (nsb))); 2295 } 2296 2297 /* 2298 * Get configurable pathname variables. 2299 */ 2300 #ifndef _SYS_SYSPROTO_H_ 2301 struct pathconf_args { 2302 char *path; 2303 int name; 2304 }; 2305 #endif 2306 int 2307 sys_pathconf(td, uap) 2308 struct thread *td; 2309 register struct pathconf_args /* { 2310 char *path; 2311 int name; 2312 } */ *uap; 2313 { 2314 2315 return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW)); 2316 } 2317 2318 #ifndef _SYS_SYSPROTO_H_ 2319 struct lpathconf_args { 2320 char *path; 2321 int name; 2322 }; 2323 #endif 2324 int 2325 sys_lpathconf(td, uap) 2326 struct thread *td; 2327 register struct lpathconf_args /* { 2328 char *path; 2329 int name; 2330 } */ *uap; 2331 { 2332 2333 return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, 2334 NOFOLLOW)); 2335 } 2336 2337 int 2338 kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name, 2339 u_long flags) 2340 { 2341 struct nameidata nd; 2342 int error; 2343 2344 NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | AUDITVNODE1 | flags, 2345 pathseg, path, td); 2346 if ((error = namei(&nd)) != 0) 2347 return (error); 2348 NDFREE(&nd, NDF_ONLY_PNBUF); 2349 2350 /* If asynchronous I/O is available, it works for all files. */ 2351 if (name == _PC_ASYNC_IO) 2352 td->td_retval[0] = async_io_version; 2353 else 2354 error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval); 2355 vput(nd.ni_vp); 2356 return (error); 2357 } 2358 2359 /* 2360 * Return target name of a symbolic link. 2361 */ 2362 #ifndef _SYS_SYSPROTO_H_ 2363 struct readlink_args { 2364 char *path; 2365 char *buf; 2366 size_t count; 2367 }; 2368 #endif 2369 int 2370 sys_readlink(td, uap) 2371 struct thread *td; 2372 register struct readlink_args /* { 2373 char *path; 2374 char *buf; 2375 size_t count; 2376 } */ *uap; 2377 { 2378 2379 return (kern_readlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2380 uap->buf, UIO_USERSPACE, uap->count)); 2381 } 2382 #ifndef _SYS_SYSPROTO_H_ 2383 struct readlinkat_args { 2384 int fd; 2385 char *path; 2386 char *buf; 2387 size_t bufsize; 2388 }; 2389 #endif 2390 int 2391 sys_readlinkat(struct thread *td, struct readlinkat_args *uap) 2392 { 2393 2394 return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE, 2395 uap->buf, UIO_USERSPACE, uap->bufsize)); 2396 } 2397 2398 int 2399 kern_readlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2400 char *buf, enum uio_seg bufseg, size_t count) 2401 { 2402 struct vnode *vp; 2403 struct iovec aiov; 2404 struct uio auio; 2405 struct nameidata nd; 2406 int error; 2407 2408 if (count > IOSIZE_MAX) 2409 return (EINVAL); 2410 2411 NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 2412 pathseg, path, fd, td); 2413 2414 if ((error = namei(&nd)) != 0) 2415 return (error); 2416 NDFREE(&nd, NDF_ONLY_PNBUF); 2417 vp = nd.ni_vp; 2418 #ifdef MAC 2419 error = mac_vnode_check_readlink(td->td_ucred, vp); 2420 if (error != 0) { 2421 vput(vp); 2422 return (error); 2423 } 2424 #endif 2425 if (vp->v_type != VLNK) 2426 error = EINVAL; 2427 else { 2428 aiov.iov_base = buf; 2429 aiov.iov_len = count; 2430 auio.uio_iov = &aiov; 2431 auio.uio_iovcnt = 1; 2432 auio.uio_offset = 0; 2433 auio.uio_rw = UIO_READ; 2434 auio.uio_segflg = bufseg; 2435 auio.uio_td = td; 2436 auio.uio_resid = count; 2437 error = VOP_READLINK(vp, &auio, td->td_ucred); 2438 td->td_retval[0] = count - auio.uio_resid; 2439 } 2440 vput(vp); 2441 return (error); 2442 } 2443 2444 /* 2445 * Common implementation code for chflags() and fchflags(). 2446 */ 2447 static int 2448 setfflags(td, vp, flags) 2449 struct thread *td; 2450 struct vnode *vp; 2451 u_long flags; 2452 { 2453 struct mount *mp; 2454 struct vattr vattr; 2455 int error; 2456 2457 /* We can't support the value matching VNOVAL. */ 2458 if (flags == VNOVAL) 2459 return (EOPNOTSUPP); 2460 2461 /* 2462 * Prevent non-root users from setting flags on devices. When 2463 * a device is reused, users can retain ownership of the device 2464 * if they are allowed to set flags and programs assume that 2465 * chown can't fail when done as root. 2466 */ 2467 if (vp->v_type == VCHR || vp->v_type == VBLK) { 2468 error = priv_check(td, PRIV_VFS_CHFLAGS_DEV); 2469 if (error != 0) 2470 return (error); 2471 } 2472 2473 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2474 return (error); 2475 VATTR_NULL(&vattr); 2476 vattr.va_flags = flags; 2477 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2478 #ifdef MAC 2479 error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags); 2480 if (error == 0) 2481 #endif 2482 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 2483 VOP_UNLOCK(vp, 0); 2484 vn_finished_write(mp); 2485 return (error); 2486 } 2487 2488 /* 2489 * Change flags of a file given a path name. 2490 */ 2491 #ifndef _SYS_SYSPROTO_H_ 2492 struct chflags_args { 2493 const char *path; 2494 u_long flags; 2495 }; 2496 #endif 2497 int 2498 sys_chflags(td, uap) 2499 struct thread *td; 2500 register struct chflags_args /* { 2501 const char *path; 2502 u_long flags; 2503 } */ *uap; 2504 { 2505 2506 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2507 uap->flags, 0)); 2508 } 2509 2510 #ifndef _SYS_SYSPROTO_H_ 2511 struct chflagsat_args { 2512 int fd; 2513 const char *path; 2514 u_long flags; 2515 int atflag; 2516 } 2517 #endif 2518 int 2519 sys_chflagsat(struct thread *td, struct chflagsat_args *uap) 2520 { 2521 int fd = uap->fd; 2522 const char *path = uap->path; 2523 u_long flags = uap->flags; 2524 int atflag = uap->atflag; 2525 2526 if (atflag & ~AT_SYMLINK_NOFOLLOW) 2527 return (EINVAL); 2528 2529 return (kern_chflagsat(td, fd, path, UIO_USERSPACE, flags, atflag)); 2530 } 2531 2532 /* 2533 * Same as chflags() but doesn't follow symlinks. 2534 */ 2535 int 2536 sys_lchflags(td, uap) 2537 struct thread *td; 2538 register struct lchflags_args /* { 2539 const char *path; 2540 u_long flags; 2541 } */ *uap; 2542 { 2543 2544 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2545 uap->flags, AT_SYMLINK_NOFOLLOW)); 2546 } 2547 2548 static int 2549 kern_chflagsat(struct thread *td, int fd, const char *path, 2550 enum uio_seg pathseg, u_long flags, int atflag) 2551 { 2552 struct nameidata nd; 2553 cap_rights_t rights; 2554 int error, follow; 2555 2556 AUDIT_ARG_FFLAGS(flags); 2557 follow = (atflag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2558 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2559 cap_rights_init(&rights, CAP_FCHFLAGS), td); 2560 if ((error = namei(&nd)) != 0) 2561 return (error); 2562 NDFREE(&nd, NDF_ONLY_PNBUF); 2563 error = setfflags(td, nd.ni_vp, flags); 2564 vrele(nd.ni_vp); 2565 return (error); 2566 } 2567 2568 /* 2569 * Change flags of a file given a file descriptor. 2570 */ 2571 #ifndef _SYS_SYSPROTO_H_ 2572 struct fchflags_args { 2573 int fd; 2574 u_long flags; 2575 }; 2576 #endif 2577 int 2578 sys_fchflags(td, uap) 2579 struct thread *td; 2580 register struct fchflags_args /* { 2581 int fd; 2582 u_long flags; 2583 } */ *uap; 2584 { 2585 struct file *fp; 2586 cap_rights_t rights; 2587 int error; 2588 2589 AUDIT_ARG_FD(uap->fd); 2590 AUDIT_ARG_FFLAGS(uap->flags); 2591 error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_FCHFLAGS), 2592 &fp); 2593 if (error != 0) 2594 return (error); 2595 #ifdef AUDIT 2596 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 2597 AUDIT_ARG_VNODE1(fp->f_vnode); 2598 VOP_UNLOCK(fp->f_vnode, 0); 2599 #endif 2600 error = setfflags(td, fp->f_vnode, uap->flags); 2601 fdrop(fp, td); 2602 return (error); 2603 } 2604 2605 /* 2606 * Common implementation code for chmod(), lchmod() and fchmod(). 2607 */ 2608 int 2609 setfmode(td, cred, vp, mode) 2610 struct thread *td; 2611 struct ucred *cred; 2612 struct vnode *vp; 2613 int mode; 2614 { 2615 struct mount *mp; 2616 struct vattr vattr; 2617 int error; 2618 2619 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2620 return (error); 2621 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2622 VATTR_NULL(&vattr); 2623 vattr.va_mode = mode & ALLPERMS; 2624 #ifdef MAC 2625 error = mac_vnode_check_setmode(cred, vp, vattr.va_mode); 2626 if (error == 0) 2627 #endif 2628 error = VOP_SETATTR(vp, &vattr, cred); 2629 VOP_UNLOCK(vp, 0); 2630 vn_finished_write(mp); 2631 return (error); 2632 } 2633 2634 /* 2635 * Change mode of a file given path name. 2636 */ 2637 #ifndef _SYS_SYSPROTO_H_ 2638 struct chmod_args { 2639 char *path; 2640 int mode; 2641 }; 2642 #endif 2643 int 2644 sys_chmod(td, uap) 2645 struct thread *td; 2646 register struct chmod_args /* { 2647 char *path; 2648 int mode; 2649 } */ *uap; 2650 { 2651 2652 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2653 uap->mode, 0)); 2654 } 2655 2656 #ifndef _SYS_SYSPROTO_H_ 2657 struct fchmodat_args { 2658 int dirfd; 2659 char *path; 2660 mode_t mode; 2661 int flag; 2662 } 2663 #endif 2664 int 2665 sys_fchmodat(struct thread *td, struct fchmodat_args *uap) 2666 { 2667 int flag = uap->flag; 2668 int fd = uap->fd; 2669 char *path = uap->path; 2670 mode_t mode = uap->mode; 2671 2672 if (flag & ~AT_SYMLINK_NOFOLLOW) 2673 return (EINVAL); 2674 2675 return (kern_fchmodat(td, fd, path, UIO_USERSPACE, mode, flag)); 2676 } 2677 2678 /* 2679 * Change mode of a file given path name (don't follow links.) 2680 */ 2681 #ifndef _SYS_SYSPROTO_H_ 2682 struct lchmod_args { 2683 char *path; 2684 int mode; 2685 }; 2686 #endif 2687 int 2688 sys_lchmod(td, uap) 2689 struct thread *td; 2690 register struct lchmod_args /* { 2691 char *path; 2692 int mode; 2693 } */ *uap; 2694 { 2695 2696 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2697 uap->mode, AT_SYMLINK_NOFOLLOW)); 2698 } 2699 2700 int 2701 kern_fchmodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2702 mode_t mode, int flag) 2703 { 2704 struct nameidata nd; 2705 cap_rights_t rights; 2706 int error, follow; 2707 2708 AUDIT_ARG_MODE(mode); 2709 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2710 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2711 cap_rights_init(&rights, CAP_FCHMOD), td); 2712 if ((error = namei(&nd)) != 0) 2713 return (error); 2714 NDFREE(&nd, NDF_ONLY_PNBUF); 2715 error = setfmode(td, td->td_ucred, nd.ni_vp, mode); 2716 vrele(nd.ni_vp); 2717 return (error); 2718 } 2719 2720 /* 2721 * Change mode of a file given a file descriptor. 2722 */ 2723 #ifndef _SYS_SYSPROTO_H_ 2724 struct fchmod_args { 2725 int fd; 2726 int mode; 2727 }; 2728 #endif 2729 int 2730 sys_fchmod(struct thread *td, struct fchmod_args *uap) 2731 { 2732 struct file *fp; 2733 cap_rights_t rights; 2734 int error; 2735 2736 AUDIT_ARG_FD(uap->fd); 2737 AUDIT_ARG_MODE(uap->mode); 2738 2739 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHMOD), &fp); 2740 if (error != 0) 2741 return (error); 2742 error = fo_chmod(fp, uap->mode, td->td_ucred, td); 2743 fdrop(fp, td); 2744 return (error); 2745 } 2746 2747 /* 2748 * Common implementation for chown(), lchown(), and fchown() 2749 */ 2750 int 2751 setfown(td, cred, vp, uid, gid) 2752 struct thread *td; 2753 struct ucred *cred; 2754 struct vnode *vp; 2755 uid_t uid; 2756 gid_t gid; 2757 { 2758 struct mount *mp; 2759 struct vattr vattr; 2760 int error; 2761 2762 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2763 return (error); 2764 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2765 VATTR_NULL(&vattr); 2766 vattr.va_uid = uid; 2767 vattr.va_gid = gid; 2768 #ifdef MAC 2769 error = mac_vnode_check_setowner(cred, vp, vattr.va_uid, 2770 vattr.va_gid); 2771 if (error == 0) 2772 #endif 2773 error = VOP_SETATTR(vp, &vattr, cred); 2774 VOP_UNLOCK(vp, 0); 2775 vn_finished_write(mp); 2776 return (error); 2777 } 2778 2779 /* 2780 * Set ownership given a path name. 2781 */ 2782 #ifndef _SYS_SYSPROTO_H_ 2783 struct chown_args { 2784 char *path; 2785 int uid; 2786 int gid; 2787 }; 2788 #endif 2789 int 2790 sys_chown(td, uap) 2791 struct thread *td; 2792 register struct chown_args /* { 2793 char *path; 2794 int uid; 2795 int gid; 2796 } */ *uap; 2797 { 2798 2799 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->uid, 2800 uap->gid, 0)); 2801 } 2802 2803 #ifndef _SYS_SYSPROTO_H_ 2804 struct fchownat_args { 2805 int fd; 2806 const char * path; 2807 uid_t uid; 2808 gid_t gid; 2809 int flag; 2810 }; 2811 #endif 2812 int 2813 sys_fchownat(struct thread *td, struct fchownat_args *uap) 2814 { 2815 int flag; 2816 2817 flag = uap->flag; 2818 if (flag & ~AT_SYMLINK_NOFOLLOW) 2819 return (EINVAL); 2820 2821 return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid, 2822 uap->gid, uap->flag)); 2823 } 2824 2825 int 2826 kern_fchownat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2827 int uid, int gid, int flag) 2828 { 2829 struct nameidata nd; 2830 cap_rights_t rights; 2831 int error, follow; 2832 2833 AUDIT_ARG_OWNER(uid, gid); 2834 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2835 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2836 cap_rights_init(&rights, CAP_FCHOWN), td); 2837 2838 if ((error = namei(&nd)) != 0) 2839 return (error); 2840 NDFREE(&nd, NDF_ONLY_PNBUF); 2841 error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid); 2842 vrele(nd.ni_vp); 2843 return (error); 2844 } 2845 2846 /* 2847 * Set ownership given a path name, do not cross symlinks. 2848 */ 2849 #ifndef _SYS_SYSPROTO_H_ 2850 struct lchown_args { 2851 char *path; 2852 int uid; 2853 int gid; 2854 }; 2855 #endif 2856 int 2857 sys_lchown(td, uap) 2858 struct thread *td; 2859 register struct lchown_args /* { 2860 char *path; 2861 int uid; 2862 int gid; 2863 } */ *uap; 2864 { 2865 2866 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2867 uap->uid, uap->gid, AT_SYMLINK_NOFOLLOW)); 2868 } 2869 2870 /* 2871 * Set ownership given a file descriptor. 2872 */ 2873 #ifndef _SYS_SYSPROTO_H_ 2874 struct fchown_args { 2875 int fd; 2876 int uid; 2877 int gid; 2878 }; 2879 #endif 2880 int 2881 sys_fchown(td, uap) 2882 struct thread *td; 2883 register struct fchown_args /* { 2884 int fd; 2885 int uid; 2886 int gid; 2887 } */ *uap; 2888 { 2889 struct file *fp; 2890 cap_rights_t rights; 2891 int error; 2892 2893 AUDIT_ARG_FD(uap->fd); 2894 AUDIT_ARG_OWNER(uap->uid, uap->gid); 2895 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHOWN), &fp); 2896 if (error != 0) 2897 return (error); 2898 error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td); 2899 fdrop(fp, td); 2900 return (error); 2901 } 2902 2903 /* 2904 * Common implementation code for utimes(), lutimes(), and futimes(). 2905 */ 2906 static int 2907 getutimes(usrtvp, tvpseg, tsp) 2908 const struct timeval *usrtvp; 2909 enum uio_seg tvpseg; 2910 struct timespec *tsp; 2911 { 2912 struct timeval tv[2]; 2913 const struct timeval *tvp; 2914 int error; 2915 2916 if (usrtvp == NULL) { 2917 vfs_timestamp(&tsp[0]); 2918 tsp[1] = tsp[0]; 2919 } else { 2920 if (tvpseg == UIO_SYSSPACE) { 2921 tvp = usrtvp; 2922 } else { 2923 if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0) 2924 return (error); 2925 tvp = tv; 2926 } 2927 2928 if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 || 2929 tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000) 2930 return (EINVAL); 2931 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 2932 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 2933 } 2934 return (0); 2935 } 2936 2937 /* 2938 * Common implementation code for futimens(), utimensat(). 2939 */ 2940 #define UTIMENS_NULL 0x1 2941 #define UTIMENS_EXIT 0x2 2942 static int 2943 getutimens(const struct timespec *usrtsp, enum uio_seg tspseg, 2944 struct timespec *tsp, int *retflags) 2945 { 2946 struct timespec tsnow; 2947 int error; 2948 2949 vfs_timestamp(&tsnow); 2950 *retflags = 0; 2951 if (usrtsp == NULL) { 2952 tsp[0] = tsnow; 2953 tsp[1] = tsnow; 2954 *retflags |= UTIMENS_NULL; 2955 return (0); 2956 } 2957 if (tspseg == UIO_SYSSPACE) { 2958 tsp[0] = usrtsp[0]; 2959 tsp[1] = usrtsp[1]; 2960 } else if ((error = copyin(usrtsp, tsp, sizeof(*tsp) * 2)) != 0) 2961 return (error); 2962 if (tsp[0].tv_nsec == UTIME_OMIT && tsp[1].tv_nsec == UTIME_OMIT) 2963 *retflags |= UTIMENS_EXIT; 2964 if (tsp[0].tv_nsec == UTIME_NOW && tsp[1].tv_nsec == UTIME_NOW) 2965 *retflags |= UTIMENS_NULL; 2966 if (tsp[0].tv_nsec == UTIME_OMIT) 2967 tsp[0].tv_sec = VNOVAL; 2968 else if (tsp[0].tv_nsec == UTIME_NOW) 2969 tsp[0] = tsnow; 2970 else if (tsp[0].tv_nsec < 0 || tsp[0].tv_nsec >= 1000000000L) 2971 return (EINVAL); 2972 if (tsp[1].tv_nsec == UTIME_OMIT) 2973 tsp[1].tv_sec = VNOVAL; 2974 else if (tsp[1].tv_nsec == UTIME_NOW) 2975 tsp[1] = tsnow; 2976 else if (tsp[1].tv_nsec < 0 || tsp[1].tv_nsec >= 1000000000L) 2977 return (EINVAL); 2978 2979 return (0); 2980 } 2981 2982 /* 2983 * Common implementation code for utimes(), lutimes(), futimes(), futimens(), 2984 * and utimensat(). 2985 */ 2986 static int 2987 setutimes(td, vp, ts, numtimes, nullflag) 2988 struct thread *td; 2989 struct vnode *vp; 2990 const struct timespec *ts; 2991 int numtimes; 2992 int nullflag; 2993 { 2994 struct mount *mp; 2995 struct vattr vattr; 2996 int error, setbirthtime; 2997 2998 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2999 return (error); 3000 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3001 setbirthtime = 0; 3002 if (numtimes < 3 && !VOP_GETATTR(vp, &vattr, td->td_ucred) && 3003 timespeccmp(&ts[1], &vattr.va_birthtime, < )) 3004 setbirthtime = 1; 3005 VATTR_NULL(&vattr); 3006 vattr.va_atime = ts[0]; 3007 vattr.va_mtime = ts[1]; 3008 if (setbirthtime) 3009 vattr.va_birthtime = ts[1]; 3010 if (numtimes > 2) 3011 vattr.va_birthtime = ts[2]; 3012 if (nullflag) 3013 vattr.va_vaflags |= VA_UTIMES_NULL; 3014 #ifdef MAC 3015 error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime, 3016 vattr.va_mtime); 3017 #endif 3018 if (error == 0) 3019 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3020 VOP_UNLOCK(vp, 0); 3021 vn_finished_write(mp); 3022 return (error); 3023 } 3024 3025 /* 3026 * Set the access and modification times of a file. 3027 */ 3028 #ifndef _SYS_SYSPROTO_H_ 3029 struct utimes_args { 3030 char *path; 3031 struct timeval *tptr; 3032 }; 3033 #endif 3034 int 3035 sys_utimes(td, uap) 3036 struct thread *td; 3037 register struct utimes_args /* { 3038 char *path; 3039 struct timeval *tptr; 3040 } */ *uap; 3041 { 3042 3043 return (kern_utimesat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3044 uap->tptr, UIO_USERSPACE)); 3045 } 3046 3047 #ifndef _SYS_SYSPROTO_H_ 3048 struct futimesat_args { 3049 int fd; 3050 const char * path; 3051 const struct timeval * times; 3052 }; 3053 #endif 3054 int 3055 sys_futimesat(struct thread *td, struct futimesat_args *uap) 3056 { 3057 3058 return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE, 3059 uap->times, UIO_USERSPACE)); 3060 } 3061 3062 int 3063 kern_utimesat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 3064 struct timeval *tptr, enum uio_seg tptrseg) 3065 { 3066 struct nameidata nd; 3067 struct timespec ts[2]; 3068 cap_rights_t rights; 3069 int error; 3070 3071 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3072 return (error); 3073 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 3074 cap_rights_init(&rights, CAP_FUTIMES), td); 3075 3076 if ((error = namei(&nd)) != 0) 3077 return (error); 3078 NDFREE(&nd, NDF_ONLY_PNBUF); 3079 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3080 vrele(nd.ni_vp); 3081 return (error); 3082 } 3083 3084 /* 3085 * Set the access and modification times of a file. 3086 */ 3087 #ifndef _SYS_SYSPROTO_H_ 3088 struct lutimes_args { 3089 char *path; 3090 struct timeval *tptr; 3091 }; 3092 #endif 3093 int 3094 sys_lutimes(td, uap) 3095 struct thread *td; 3096 register struct lutimes_args /* { 3097 char *path; 3098 struct timeval *tptr; 3099 } */ *uap; 3100 { 3101 3102 return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr, 3103 UIO_USERSPACE)); 3104 } 3105 3106 int 3107 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg, 3108 struct timeval *tptr, enum uio_seg tptrseg) 3109 { 3110 struct timespec ts[2]; 3111 struct nameidata nd; 3112 int error; 3113 3114 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3115 return (error); 3116 NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, pathseg, path, td); 3117 if ((error = namei(&nd)) != 0) 3118 return (error); 3119 NDFREE(&nd, NDF_ONLY_PNBUF); 3120 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3121 vrele(nd.ni_vp); 3122 return (error); 3123 } 3124 3125 /* 3126 * Set the access and modification times of a file. 3127 */ 3128 #ifndef _SYS_SYSPROTO_H_ 3129 struct futimes_args { 3130 int fd; 3131 struct timeval *tptr; 3132 }; 3133 #endif 3134 int 3135 sys_futimes(td, uap) 3136 struct thread *td; 3137 register struct futimes_args /* { 3138 int fd; 3139 struct timeval *tptr; 3140 } */ *uap; 3141 { 3142 3143 return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE)); 3144 } 3145 3146 int 3147 kern_futimes(struct thread *td, int fd, struct timeval *tptr, 3148 enum uio_seg tptrseg) 3149 { 3150 struct timespec ts[2]; 3151 struct file *fp; 3152 cap_rights_t rights; 3153 int error; 3154 3155 AUDIT_ARG_FD(fd); 3156 error = getutimes(tptr, tptrseg, ts); 3157 if (error != 0) 3158 return (error); 3159 error = getvnode(td, fd, cap_rights_init(&rights, CAP_FUTIMES), &fp); 3160 if (error != 0) 3161 return (error); 3162 #ifdef AUDIT 3163 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3164 AUDIT_ARG_VNODE1(fp->f_vnode); 3165 VOP_UNLOCK(fp->f_vnode, 0); 3166 #endif 3167 error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL); 3168 fdrop(fp, td); 3169 return (error); 3170 } 3171 3172 int 3173 sys_futimens(struct thread *td, struct futimens_args *uap) 3174 { 3175 3176 return (kern_futimens(td, uap->fd, uap->times, UIO_USERSPACE)); 3177 } 3178 3179 int 3180 kern_futimens(struct thread *td, int fd, struct timespec *tptr, 3181 enum uio_seg tptrseg) 3182 { 3183 struct timespec ts[2]; 3184 struct file *fp; 3185 cap_rights_t rights; 3186 int error, flags; 3187 3188 AUDIT_ARG_FD(fd); 3189 error = getutimens(tptr, tptrseg, ts, &flags); 3190 if (error != 0) 3191 return (error); 3192 if (flags & UTIMENS_EXIT) 3193 return (0); 3194 error = getvnode(td, fd, cap_rights_init(&rights, CAP_FUTIMES), &fp); 3195 if (error != 0) 3196 return (error); 3197 #ifdef AUDIT 3198 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3199 AUDIT_ARG_VNODE1(fp->f_vnode); 3200 VOP_UNLOCK(fp->f_vnode, 0); 3201 #endif 3202 error = setutimes(td, fp->f_vnode, ts, 2, flags & UTIMENS_NULL); 3203 fdrop(fp, td); 3204 return (error); 3205 } 3206 3207 int 3208 sys_utimensat(struct thread *td, struct utimensat_args *uap) 3209 { 3210 3211 return (kern_utimensat(td, uap->fd, uap->path, UIO_USERSPACE, 3212 uap->times, UIO_USERSPACE, uap->flag)); 3213 } 3214 3215 int 3216 kern_utimensat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 3217 struct timespec *tptr, enum uio_seg tptrseg, int flag) 3218 { 3219 struct nameidata nd; 3220 struct timespec ts[2]; 3221 cap_rights_t rights; 3222 int error, flags; 3223 3224 if (flag & ~AT_SYMLINK_NOFOLLOW) 3225 return (EINVAL); 3226 3227 if ((error = getutimens(tptr, tptrseg, ts, &flags)) != 0) 3228 return (error); 3229 NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : 3230 FOLLOW) | AUDITVNODE1, pathseg, path, fd, 3231 cap_rights_init(&rights, CAP_FUTIMES), td); 3232 if ((error = namei(&nd)) != 0) 3233 return (error); 3234 /* 3235 * We are allowed to call namei() regardless of 2xUTIME_OMIT. 3236 * POSIX states: 3237 * "If both tv_nsec fields are UTIME_OMIT... EACCESS may be detected." 3238 * "Search permission is denied by a component of the path prefix." 3239 */ 3240 NDFREE(&nd, NDF_ONLY_PNBUF); 3241 if ((flags & UTIMENS_EXIT) == 0) 3242 error = setutimes(td, nd.ni_vp, ts, 2, flags & UTIMENS_NULL); 3243 vrele(nd.ni_vp); 3244 return (error); 3245 } 3246 3247 /* 3248 * Truncate a file given its path name. 3249 */ 3250 #ifndef _SYS_SYSPROTO_H_ 3251 struct truncate_args { 3252 char *path; 3253 int pad; 3254 off_t length; 3255 }; 3256 #endif 3257 int 3258 sys_truncate(td, uap) 3259 struct thread *td; 3260 register struct truncate_args /* { 3261 char *path; 3262 int pad; 3263 off_t length; 3264 } */ *uap; 3265 { 3266 3267 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3268 } 3269 3270 int 3271 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length) 3272 { 3273 struct mount *mp; 3274 struct vnode *vp; 3275 void *rl_cookie; 3276 struct vattr vattr; 3277 struct nameidata nd; 3278 int error; 3279 3280 if (length < 0) 3281 return(EINVAL); 3282 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, td); 3283 if ((error = namei(&nd)) != 0) 3284 return (error); 3285 vp = nd.ni_vp; 3286 rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); 3287 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { 3288 vn_rangelock_unlock(vp, rl_cookie); 3289 vrele(vp); 3290 return (error); 3291 } 3292 NDFREE(&nd, NDF_ONLY_PNBUF); 3293 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3294 if (vp->v_type == VDIR) 3295 error = EISDIR; 3296 #ifdef MAC 3297 else if ((error = mac_vnode_check_write(td->td_ucred, NOCRED, vp))) { 3298 } 3299 #endif 3300 else if ((error = vn_writechk(vp)) == 0 && 3301 (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) { 3302 VATTR_NULL(&vattr); 3303 vattr.va_size = length; 3304 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3305 } 3306 VOP_UNLOCK(vp, 0); 3307 vn_finished_write(mp); 3308 vn_rangelock_unlock(vp, rl_cookie); 3309 vrele(vp); 3310 return (error); 3311 } 3312 3313 #if defined(COMPAT_43) 3314 /* 3315 * Truncate a file given its path name. 3316 */ 3317 #ifndef _SYS_SYSPROTO_H_ 3318 struct otruncate_args { 3319 char *path; 3320 long length; 3321 }; 3322 #endif 3323 int 3324 otruncate(td, uap) 3325 struct thread *td; 3326 register struct otruncate_args /* { 3327 char *path; 3328 long length; 3329 } */ *uap; 3330 { 3331 struct truncate_args /* { 3332 char *path; 3333 int pad; 3334 off_t length; 3335 } */ nuap; 3336 3337 nuap.path = uap->path; 3338 nuap.length = uap->length; 3339 return (sys_truncate(td, &nuap)); 3340 } 3341 #endif /* COMPAT_43 */ 3342 3343 #if defined(COMPAT_FREEBSD6) 3344 /* Versions with the pad argument */ 3345 int 3346 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap) 3347 { 3348 struct truncate_args ouap; 3349 3350 ouap.path = uap->path; 3351 ouap.length = uap->length; 3352 return (sys_truncate(td, &ouap)); 3353 } 3354 3355 int 3356 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap) 3357 { 3358 struct ftruncate_args ouap; 3359 3360 ouap.fd = uap->fd; 3361 ouap.length = uap->length; 3362 return (sys_ftruncate(td, &ouap)); 3363 } 3364 #endif 3365 3366 /* 3367 * Sync an open file. 3368 */ 3369 #ifndef _SYS_SYSPROTO_H_ 3370 struct fsync_args { 3371 int fd; 3372 }; 3373 #endif 3374 int 3375 sys_fsync(td, uap) 3376 struct thread *td; 3377 struct fsync_args /* { 3378 int fd; 3379 } */ *uap; 3380 { 3381 struct vnode *vp; 3382 struct mount *mp; 3383 struct file *fp; 3384 cap_rights_t rights; 3385 int error, lock_flags; 3386 3387 AUDIT_ARG_FD(uap->fd); 3388 error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_FSYNC), &fp); 3389 if (error != 0) 3390 return (error); 3391 vp = fp->f_vnode; 3392 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 3393 if (error != 0) 3394 goto drop; 3395 if (MNT_SHARED_WRITES(mp) || 3396 ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) { 3397 lock_flags = LK_SHARED; 3398 } else { 3399 lock_flags = LK_EXCLUSIVE; 3400 } 3401 vn_lock(vp, lock_flags | LK_RETRY); 3402 AUDIT_ARG_VNODE1(vp); 3403 if (vp->v_object != NULL) { 3404 VM_OBJECT_WLOCK(vp->v_object); 3405 vm_object_page_clean(vp->v_object, 0, 0, 0); 3406 VM_OBJECT_WUNLOCK(vp->v_object); 3407 } 3408 error = VOP_FSYNC(vp, MNT_WAIT, td); 3409 3410 VOP_UNLOCK(vp, 0); 3411 vn_finished_write(mp); 3412 drop: 3413 fdrop(fp, td); 3414 return (error); 3415 } 3416 3417 /* 3418 * Rename files. Source and destination must either both be directories, or 3419 * both not be directories. If target is a directory, it must be empty. 3420 */ 3421 #ifndef _SYS_SYSPROTO_H_ 3422 struct rename_args { 3423 char *from; 3424 char *to; 3425 }; 3426 #endif 3427 int 3428 sys_rename(td, uap) 3429 struct thread *td; 3430 register struct rename_args /* { 3431 char *from; 3432 char *to; 3433 } */ *uap; 3434 { 3435 3436 return (kern_renameat(td, AT_FDCWD, uap->from, AT_FDCWD, 3437 uap->to, UIO_USERSPACE)); 3438 } 3439 3440 #ifndef _SYS_SYSPROTO_H_ 3441 struct renameat_args { 3442 int oldfd; 3443 char *old; 3444 int newfd; 3445 char *new; 3446 }; 3447 #endif 3448 int 3449 sys_renameat(struct thread *td, struct renameat_args *uap) 3450 { 3451 3452 return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new, 3453 UIO_USERSPACE)); 3454 } 3455 3456 int 3457 kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new, 3458 enum uio_seg pathseg) 3459 { 3460 struct mount *mp = NULL; 3461 struct vnode *tvp, *fvp, *tdvp; 3462 struct nameidata fromnd, tond; 3463 cap_rights_t rights; 3464 int error; 3465 3466 again: 3467 bwillwrite(); 3468 #ifdef MAC 3469 NDINIT_ATRIGHTS(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | 3470 AUDITVNODE1, pathseg, old, oldfd, 3471 cap_rights_init(&rights, CAP_RENAMEAT_SOURCE), td); 3472 #else 3473 NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | AUDITVNODE1, 3474 pathseg, old, oldfd, 3475 cap_rights_init(&rights, CAP_RENAMEAT_SOURCE), td); 3476 #endif 3477 3478 if ((error = namei(&fromnd)) != 0) 3479 return (error); 3480 #ifdef MAC 3481 error = mac_vnode_check_rename_from(td->td_ucred, fromnd.ni_dvp, 3482 fromnd.ni_vp, &fromnd.ni_cnd); 3483 VOP_UNLOCK(fromnd.ni_dvp, 0); 3484 if (fromnd.ni_dvp != fromnd.ni_vp) 3485 VOP_UNLOCK(fromnd.ni_vp, 0); 3486 #endif 3487 fvp = fromnd.ni_vp; 3488 NDINIT_ATRIGHTS(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | 3489 SAVESTART | AUDITVNODE2, pathseg, new, newfd, 3490 cap_rights_init(&rights, CAP_RENAMEAT_TARGET), td); 3491 if (fromnd.ni_vp->v_type == VDIR) 3492 tond.ni_cnd.cn_flags |= WILLBEDIR; 3493 if ((error = namei(&tond)) != 0) { 3494 /* Translate error code for rename("dir1", "dir2/."). */ 3495 if (error == EISDIR && fvp->v_type == VDIR) 3496 error = EINVAL; 3497 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3498 vrele(fromnd.ni_dvp); 3499 vrele(fvp); 3500 goto out1; 3501 } 3502 tdvp = tond.ni_dvp; 3503 tvp = tond.ni_vp; 3504 error = vn_start_write(fvp, &mp, V_NOWAIT); 3505 if (error != 0) { 3506 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3507 NDFREE(&tond, NDF_ONLY_PNBUF); 3508 if (tvp != NULL) 3509 vput(tvp); 3510 if (tdvp == tvp) 3511 vrele(tdvp); 3512 else 3513 vput(tdvp); 3514 vrele(fromnd.ni_dvp); 3515 vrele(fvp); 3516 vrele(tond.ni_startdir); 3517 if (fromnd.ni_startdir != NULL) 3518 vrele(fromnd.ni_startdir); 3519 error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH); 3520 if (error != 0) 3521 return (error); 3522 goto again; 3523 } 3524 if (tvp != NULL) { 3525 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3526 error = ENOTDIR; 3527 goto out; 3528 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3529 error = EISDIR; 3530 goto out; 3531 } 3532 #ifdef CAPABILITIES 3533 if (newfd != AT_FDCWD) { 3534 /* 3535 * If the target already exists we require CAP_UNLINKAT 3536 * from 'newfd'. 3537 */ 3538 error = cap_check(&tond.ni_filecaps.fc_rights, 3539 cap_rights_init(&rights, CAP_UNLINKAT)); 3540 if (error != 0) 3541 goto out; 3542 } 3543 #endif 3544 } 3545 if (fvp == tdvp) { 3546 error = EINVAL; 3547 goto out; 3548 } 3549 /* 3550 * If the source is the same as the destination (that is, if they 3551 * are links to the same vnode), then there is nothing to do. 3552 */ 3553 if (fvp == tvp) 3554 error = -1; 3555 #ifdef MAC 3556 else 3557 error = mac_vnode_check_rename_to(td->td_ucred, tdvp, 3558 tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd); 3559 #endif 3560 out: 3561 if (error == 0) { 3562 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3563 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); 3564 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3565 NDFREE(&tond, NDF_ONLY_PNBUF); 3566 } else { 3567 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3568 NDFREE(&tond, NDF_ONLY_PNBUF); 3569 if (tvp != NULL) 3570 vput(tvp); 3571 if (tdvp == tvp) 3572 vrele(tdvp); 3573 else 3574 vput(tdvp); 3575 vrele(fromnd.ni_dvp); 3576 vrele(fvp); 3577 } 3578 vrele(tond.ni_startdir); 3579 vn_finished_write(mp); 3580 out1: 3581 if (fromnd.ni_startdir) 3582 vrele(fromnd.ni_startdir); 3583 if (error == -1) 3584 return (0); 3585 return (error); 3586 } 3587 3588 /* 3589 * Make a directory file. 3590 */ 3591 #ifndef _SYS_SYSPROTO_H_ 3592 struct mkdir_args { 3593 char *path; 3594 int mode; 3595 }; 3596 #endif 3597 int 3598 sys_mkdir(td, uap) 3599 struct thread *td; 3600 register struct mkdir_args /* { 3601 char *path; 3602 int mode; 3603 } */ *uap; 3604 { 3605 3606 return (kern_mkdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3607 uap->mode)); 3608 } 3609 3610 #ifndef _SYS_SYSPROTO_H_ 3611 struct mkdirat_args { 3612 int fd; 3613 char *path; 3614 mode_t mode; 3615 }; 3616 #endif 3617 int 3618 sys_mkdirat(struct thread *td, struct mkdirat_args *uap) 3619 { 3620 3621 return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode)); 3622 } 3623 3624 int 3625 kern_mkdirat(struct thread *td, int fd, char *path, enum uio_seg segflg, 3626 int mode) 3627 { 3628 struct mount *mp; 3629 struct vnode *vp; 3630 struct vattr vattr; 3631 struct nameidata nd; 3632 cap_rights_t rights; 3633 int error; 3634 3635 AUDIT_ARG_MODE(mode); 3636 restart: 3637 bwillwrite(); 3638 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 3639 NOCACHE, segflg, path, fd, cap_rights_init(&rights, CAP_MKDIRAT), 3640 td); 3641 nd.ni_cnd.cn_flags |= WILLBEDIR; 3642 if ((error = namei(&nd)) != 0) 3643 return (error); 3644 vp = nd.ni_vp; 3645 if (vp != NULL) { 3646 NDFREE(&nd, NDF_ONLY_PNBUF); 3647 /* 3648 * XXX namei called with LOCKPARENT but not LOCKLEAF has 3649 * the strange behaviour of leaving the vnode unlocked 3650 * if the target is the same vnode as the parent. 3651 */ 3652 if (vp == nd.ni_dvp) 3653 vrele(nd.ni_dvp); 3654 else 3655 vput(nd.ni_dvp); 3656 vrele(vp); 3657 return (EEXIST); 3658 } 3659 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3660 NDFREE(&nd, NDF_ONLY_PNBUF); 3661 vput(nd.ni_dvp); 3662 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3663 return (error); 3664 goto restart; 3665 } 3666 VATTR_NULL(&vattr); 3667 vattr.va_type = VDIR; 3668 vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask; 3669 #ifdef MAC 3670 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 3671 &vattr); 3672 if (error != 0) 3673 goto out; 3674 #endif 3675 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 3676 #ifdef MAC 3677 out: 3678 #endif 3679 NDFREE(&nd, NDF_ONLY_PNBUF); 3680 vput(nd.ni_dvp); 3681 if (error == 0) 3682 vput(nd.ni_vp); 3683 vn_finished_write(mp); 3684 return (error); 3685 } 3686 3687 /* 3688 * Remove a directory file. 3689 */ 3690 #ifndef _SYS_SYSPROTO_H_ 3691 struct rmdir_args { 3692 char *path; 3693 }; 3694 #endif 3695 int 3696 sys_rmdir(td, uap) 3697 struct thread *td; 3698 struct rmdir_args /* { 3699 char *path; 3700 } */ *uap; 3701 { 3702 3703 return (kern_rmdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE)); 3704 } 3705 3706 int 3707 kern_rmdirat(struct thread *td, int fd, char *path, enum uio_seg pathseg) 3708 { 3709 struct mount *mp; 3710 struct vnode *vp; 3711 struct nameidata nd; 3712 cap_rights_t rights; 3713 int error; 3714 3715 restart: 3716 bwillwrite(); 3717 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 3718 pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td); 3719 if ((error = namei(&nd)) != 0) 3720 return (error); 3721 vp = nd.ni_vp; 3722 if (vp->v_type != VDIR) { 3723 error = ENOTDIR; 3724 goto out; 3725 } 3726 /* 3727 * No rmdir "." please. 3728 */ 3729 if (nd.ni_dvp == vp) { 3730 error = EINVAL; 3731 goto out; 3732 } 3733 /* 3734 * The root of a mounted filesystem cannot be deleted. 3735 */ 3736 if (vp->v_vflag & VV_ROOT) { 3737 error = EBUSY; 3738 goto out; 3739 } 3740 #ifdef MAC 3741 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 3742 &nd.ni_cnd); 3743 if (error != 0) 3744 goto out; 3745 #endif 3746 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3747 NDFREE(&nd, NDF_ONLY_PNBUF); 3748 vput(vp); 3749 if (nd.ni_dvp == vp) 3750 vrele(nd.ni_dvp); 3751 else 3752 vput(nd.ni_dvp); 3753 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3754 return (error); 3755 goto restart; 3756 } 3757 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 3758 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 3759 vn_finished_write(mp); 3760 out: 3761 NDFREE(&nd, NDF_ONLY_PNBUF); 3762 vput(vp); 3763 if (nd.ni_dvp == vp) 3764 vrele(nd.ni_dvp); 3765 else 3766 vput(nd.ni_dvp); 3767 return (error); 3768 } 3769 3770 #ifdef COMPAT_43 3771 /* 3772 * Read a block of directory entries in a filesystem independent format. 3773 */ 3774 #ifndef _SYS_SYSPROTO_H_ 3775 struct ogetdirentries_args { 3776 int fd; 3777 char *buf; 3778 u_int count; 3779 long *basep; 3780 }; 3781 #endif 3782 int 3783 ogetdirentries(struct thread *td, struct ogetdirentries_args *uap) 3784 { 3785 long loff; 3786 int error; 3787 3788 error = kern_ogetdirentries(td, uap, &loff); 3789 if (error == 0) 3790 error = copyout(&loff, uap->basep, sizeof(long)); 3791 return (error); 3792 } 3793 3794 int 3795 kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap, 3796 long *ploff) 3797 { 3798 struct vnode *vp; 3799 struct file *fp; 3800 struct uio auio, kuio; 3801 struct iovec aiov, kiov; 3802 struct dirent *dp, *edp; 3803 cap_rights_t rights; 3804 caddr_t dirbuf; 3805 int error, eofflag, readcnt; 3806 long loff; 3807 off_t foffset; 3808 3809 /* XXX arbitrary sanity limit on `count'. */ 3810 if (uap->count > 64 * 1024) 3811 return (EINVAL); 3812 error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_READ), &fp); 3813 if (error != 0) 3814 return (error); 3815 if ((fp->f_flag & FREAD) == 0) { 3816 fdrop(fp, td); 3817 return (EBADF); 3818 } 3819 vp = fp->f_vnode; 3820 foffset = foffset_lock(fp, 0); 3821 unionread: 3822 if (vp->v_type != VDIR) { 3823 foffset_unlock(fp, foffset, 0); 3824 fdrop(fp, td); 3825 return (EINVAL); 3826 } 3827 aiov.iov_base = uap->buf; 3828 aiov.iov_len = uap->count; 3829 auio.uio_iov = &aiov; 3830 auio.uio_iovcnt = 1; 3831 auio.uio_rw = UIO_READ; 3832 auio.uio_segflg = UIO_USERSPACE; 3833 auio.uio_td = td; 3834 auio.uio_resid = uap->count; 3835 vn_lock(vp, LK_SHARED | LK_RETRY); 3836 loff = auio.uio_offset = foffset; 3837 #ifdef MAC 3838 error = mac_vnode_check_readdir(td->td_ucred, vp); 3839 if (error != 0) { 3840 VOP_UNLOCK(vp, 0); 3841 foffset_unlock(fp, foffset, FOF_NOUPDATE); 3842 fdrop(fp, td); 3843 return (error); 3844 } 3845 #endif 3846 # if (BYTE_ORDER != LITTLE_ENDIAN) 3847 if (vp->v_mount->mnt_maxsymlinklen <= 0) { 3848 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, 3849 NULL, NULL); 3850 foffset = auio.uio_offset; 3851 } else 3852 # endif 3853 { 3854 kuio = auio; 3855 kuio.uio_iov = &kiov; 3856 kuio.uio_segflg = UIO_SYSSPACE; 3857 kiov.iov_len = uap->count; 3858 dirbuf = malloc(uap->count, M_TEMP, M_WAITOK); 3859 kiov.iov_base = dirbuf; 3860 error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag, 3861 NULL, NULL); 3862 foffset = kuio.uio_offset; 3863 if (error == 0) { 3864 readcnt = uap->count - kuio.uio_resid; 3865 edp = (struct dirent *)&dirbuf[readcnt]; 3866 for (dp = (struct dirent *)dirbuf; dp < edp; ) { 3867 # if (BYTE_ORDER == LITTLE_ENDIAN) 3868 /* 3869 * The expected low byte of 3870 * dp->d_namlen is our dp->d_type. 3871 * The high MBZ byte of dp->d_namlen 3872 * is our dp->d_namlen. 3873 */ 3874 dp->d_type = dp->d_namlen; 3875 dp->d_namlen = 0; 3876 # else 3877 /* 3878 * The dp->d_type is the high byte 3879 * of the expected dp->d_namlen, 3880 * so must be zero'ed. 3881 */ 3882 dp->d_type = 0; 3883 # endif 3884 if (dp->d_reclen > 0) { 3885 dp = (struct dirent *) 3886 ((char *)dp + dp->d_reclen); 3887 } else { 3888 error = EIO; 3889 break; 3890 } 3891 } 3892 if (dp >= edp) 3893 error = uiomove(dirbuf, readcnt, &auio); 3894 } 3895 free(dirbuf, M_TEMP); 3896 } 3897 if (error != 0) { 3898 VOP_UNLOCK(vp, 0); 3899 foffset_unlock(fp, foffset, 0); 3900 fdrop(fp, td); 3901 return (error); 3902 } 3903 if (uap->count == auio.uio_resid && 3904 (vp->v_vflag & VV_ROOT) && 3905 (vp->v_mount->mnt_flag & MNT_UNION)) { 3906 struct vnode *tvp = vp; 3907 vp = vp->v_mount->mnt_vnodecovered; 3908 VREF(vp); 3909 fp->f_vnode = vp; 3910 fp->f_data = vp; 3911 foffset = 0; 3912 vput(tvp); 3913 goto unionread; 3914 } 3915 VOP_UNLOCK(vp, 0); 3916 foffset_unlock(fp, foffset, 0); 3917 fdrop(fp, td); 3918 td->td_retval[0] = uap->count - auio.uio_resid; 3919 if (error == 0) 3920 *ploff = loff; 3921 return (error); 3922 } 3923 #endif /* COMPAT_43 */ 3924 3925 /* 3926 * Read a block of directory entries in a filesystem independent format. 3927 */ 3928 #ifndef _SYS_SYSPROTO_H_ 3929 struct getdirentries_args { 3930 int fd; 3931 char *buf; 3932 u_int count; 3933 long *basep; 3934 }; 3935 #endif 3936 int 3937 sys_getdirentries(td, uap) 3938 struct thread *td; 3939 register struct getdirentries_args /* { 3940 int fd; 3941 char *buf; 3942 u_int count; 3943 long *basep; 3944 } */ *uap; 3945 { 3946 long base; 3947 int error; 3948 3949 error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base, 3950 NULL, UIO_USERSPACE); 3951 if (error != 0) 3952 return (error); 3953 if (uap->basep != NULL) 3954 error = copyout(&base, uap->basep, sizeof(long)); 3955 return (error); 3956 } 3957 3958 int 3959 kern_getdirentries(struct thread *td, int fd, char *buf, u_int count, 3960 long *basep, ssize_t *residp, enum uio_seg bufseg) 3961 { 3962 struct vnode *vp; 3963 struct file *fp; 3964 struct uio auio; 3965 struct iovec aiov; 3966 cap_rights_t rights; 3967 long loff; 3968 int error, eofflag; 3969 off_t foffset; 3970 3971 AUDIT_ARG_FD(fd); 3972 if (count > IOSIZE_MAX) 3973 return (EINVAL); 3974 auio.uio_resid = count; 3975 error = getvnode(td, fd, cap_rights_init(&rights, CAP_READ), &fp); 3976 if (error != 0) 3977 return (error); 3978 if ((fp->f_flag & FREAD) == 0) { 3979 fdrop(fp, td); 3980 return (EBADF); 3981 } 3982 vp = fp->f_vnode; 3983 foffset = foffset_lock(fp, 0); 3984 unionread: 3985 if (vp->v_type != VDIR) { 3986 error = EINVAL; 3987 goto fail; 3988 } 3989 aiov.iov_base = buf; 3990 aiov.iov_len = count; 3991 auio.uio_iov = &aiov; 3992 auio.uio_iovcnt = 1; 3993 auio.uio_rw = UIO_READ; 3994 auio.uio_segflg = bufseg; 3995 auio.uio_td = td; 3996 vn_lock(vp, LK_SHARED | LK_RETRY); 3997 AUDIT_ARG_VNODE1(vp); 3998 loff = auio.uio_offset = foffset; 3999 #ifdef MAC 4000 error = mac_vnode_check_readdir(td->td_ucred, vp); 4001 if (error == 0) 4002 #endif 4003 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, 4004 NULL); 4005 foffset = auio.uio_offset; 4006 if (error != 0) { 4007 VOP_UNLOCK(vp, 0); 4008 goto fail; 4009 } 4010 if (count == auio.uio_resid && 4011 (vp->v_vflag & VV_ROOT) && 4012 (vp->v_mount->mnt_flag & MNT_UNION)) { 4013 struct vnode *tvp = vp; 4014 4015 vp = vp->v_mount->mnt_vnodecovered; 4016 VREF(vp); 4017 fp->f_vnode = vp; 4018 fp->f_data = vp; 4019 foffset = 0; 4020 vput(tvp); 4021 goto unionread; 4022 } 4023 VOP_UNLOCK(vp, 0); 4024 *basep = loff; 4025 if (residp != NULL) 4026 *residp = auio.uio_resid; 4027 td->td_retval[0] = count - auio.uio_resid; 4028 fail: 4029 foffset_unlock(fp, foffset, 0); 4030 fdrop(fp, td); 4031 return (error); 4032 } 4033 4034 #ifndef _SYS_SYSPROTO_H_ 4035 struct getdents_args { 4036 int fd; 4037 char *buf; 4038 size_t count; 4039 }; 4040 #endif 4041 int 4042 sys_getdents(td, uap) 4043 struct thread *td; 4044 register struct getdents_args /* { 4045 int fd; 4046 char *buf; 4047 u_int count; 4048 } */ *uap; 4049 { 4050 struct getdirentries_args ap; 4051 4052 ap.fd = uap->fd; 4053 ap.buf = uap->buf; 4054 ap.count = uap->count; 4055 ap.basep = NULL; 4056 return (sys_getdirentries(td, &ap)); 4057 } 4058 4059 /* 4060 * Set the mode mask for creation of filesystem nodes. 4061 */ 4062 #ifndef _SYS_SYSPROTO_H_ 4063 struct umask_args { 4064 int newmask; 4065 }; 4066 #endif 4067 int 4068 sys_umask(td, uap) 4069 struct thread *td; 4070 struct umask_args /* { 4071 int newmask; 4072 } */ *uap; 4073 { 4074 struct filedesc *fdp; 4075 4076 fdp = td->td_proc->p_fd; 4077 FILEDESC_XLOCK(fdp); 4078 td->td_retval[0] = fdp->fd_cmask; 4079 fdp->fd_cmask = uap->newmask & ALLPERMS; 4080 FILEDESC_XUNLOCK(fdp); 4081 return (0); 4082 } 4083 4084 /* 4085 * Void all references to file by ripping underlying filesystem away from 4086 * vnode. 4087 */ 4088 #ifndef _SYS_SYSPROTO_H_ 4089 struct revoke_args { 4090 char *path; 4091 }; 4092 #endif 4093 int 4094 sys_revoke(td, uap) 4095 struct thread *td; 4096 register struct revoke_args /* { 4097 char *path; 4098 } */ *uap; 4099 { 4100 struct vnode *vp; 4101 struct vattr vattr; 4102 struct nameidata nd; 4103 int error; 4104 4105 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4106 uap->path, td); 4107 if ((error = namei(&nd)) != 0) 4108 return (error); 4109 vp = nd.ni_vp; 4110 NDFREE(&nd, NDF_ONLY_PNBUF); 4111 if (vp->v_type != VCHR || vp->v_rdev == NULL) { 4112 error = EINVAL; 4113 goto out; 4114 } 4115 #ifdef MAC 4116 error = mac_vnode_check_revoke(td->td_ucred, vp); 4117 if (error != 0) 4118 goto out; 4119 #endif 4120 error = VOP_GETATTR(vp, &vattr, td->td_ucred); 4121 if (error != 0) 4122 goto out; 4123 if (td->td_ucred->cr_uid != vattr.va_uid) { 4124 error = priv_check(td, PRIV_VFS_ADMIN); 4125 if (error != 0) 4126 goto out; 4127 } 4128 if (vcount(vp) > 1) 4129 VOP_REVOKE(vp, REVOKEALL); 4130 out: 4131 vput(vp); 4132 return (error); 4133 } 4134 4135 /* 4136 * Convert a user file descriptor to a kernel file entry and check that, if it 4137 * is a capability, the correct rights are present. A reference on the file 4138 * entry is held upon returning. 4139 */ 4140 int 4141 getvnode(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp) 4142 { 4143 struct file *fp; 4144 int error; 4145 4146 error = fget_unlocked(td->td_proc->p_fd, fd, rightsp, &fp, NULL); 4147 if (error != 0) 4148 return (error); 4149 4150 /* 4151 * The file could be not of the vnode type, or it may be not 4152 * yet fully initialized, in which case the f_vnode pointer 4153 * may be set, but f_ops is still badfileops. E.g., 4154 * devfs_open() transiently create such situation to 4155 * facilitate csw d_fdopen(). 4156 * 4157 * Dupfdopen() handling in kern_openat() installs the 4158 * half-baked file into the process descriptor table, allowing 4159 * other thread to dereference it. Guard against the race by 4160 * checking f_ops. 4161 */ 4162 if (fp->f_vnode == NULL || fp->f_ops == &badfileops) { 4163 fdrop(fp, td); 4164 return (EINVAL); 4165 } 4166 *fpp = fp; 4167 return (0); 4168 } 4169 4170 4171 /* 4172 * Get an (NFS) file handle. 4173 */ 4174 #ifndef _SYS_SYSPROTO_H_ 4175 struct lgetfh_args { 4176 char *fname; 4177 fhandle_t *fhp; 4178 }; 4179 #endif 4180 int 4181 sys_lgetfh(td, uap) 4182 struct thread *td; 4183 register struct lgetfh_args *uap; 4184 { 4185 struct nameidata nd; 4186 fhandle_t fh; 4187 register struct vnode *vp; 4188 int error; 4189 4190 error = priv_check(td, PRIV_VFS_GETFH); 4191 if (error != 0) 4192 return (error); 4193 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4194 uap->fname, td); 4195 error = namei(&nd); 4196 if (error != 0) 4197 return (error); 4198 NDFREE(&nd, NDF_ONLY_PNBUF); 4199 vp = nd.ni_vp; 4200 bzero(&fh, sizeof(fh)); 4201 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4202 error = VOP_VPTOFH(vp, &fh.fh_fid); 4203 vput(vp); 4204 if (error == 0) 4205 error = copyout(&fh, uap->fhp, sizeof (fh)); 4206 return (error); 4207 } 4208 4209 #ifndef _SYS_SYSPROTO_H_ 4210 struct getfh_args { 4211 char *fname; 4212 fhandle_t *fhp; 4213 }; 4214 #endif 4215 int 4216 sys_getfh(td, uap) 4217 struct thread *td; 4218 register struct getfh_args *uap; 4219 { 4220 struct nameidata nd; 4221 fhandle_t fh; 4222 register struct vnode *vp; 4223 int error; 4224 4225 error = priv_check(td, PRIV_VFS_GETFH); 4226 if (error != 0) 4227 return (error); 4228 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4229 uap->fname, td); 4230 error = namei(&nd); 4231 if (error != 0) 4232 return (error); 4233 NDFREE(&nd, NDF_ONLY_PNBUF); 4234 vp = nd.ni_vp; 4235 bzero(&fh, sizeof(fh)); 4236 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4237 error = VOP_VPTOFH(vp, &fh.fh_fid); 4238 vput(vp); 4239 if (error == 0) 4240 error = copyout(&fh, uap->fhp, sizeof (fh)); 4241 return (error); 4242 } 4243 4244 /* 4245 * syscall for the rpc.lockd to use to translate a NFS file handle into an 4246 * open descriptor. 4247 * 4248 * warning: do not remove the priv_check() call or this becomes one giant 4249 * security hole. 4250 */ 4251 #ifndef _SYS_SYSPROTO_H_ 4252 struct fhopen_args { 4253 const struct fhandle *u_fhp; 4254 int flags; 4255 }; 4256 #endif 4257 int 4258 sys_fhopen(td, uap) 4259 struct thread *td; 4260 struct fhopen_args /* { 4261 const struct fhandle *u_fhp; 4262 int flags; 4263 } */ *uap; 4264 { 4265 struct mount *mp; 4266 struct vnode *vp; 4267 struct fhandle fhp; 4268 struct file *fp; 4269 int fmode, error; 4270 int indx; 4271 4272 error = priv_check(td, PRIV_VFS_FHOPEN); 4273 if (error != 0) 4274 return (error); 4275 indx = -1; 4276 fmode = FFLAGS(uap->flags); 4277 /* why not allow a non-read/write open for our lockd? */ 4278 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 4279 return (EINVAL); 4280 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 4281 if (error != 0) 4282 return(error); 4283 /* find the mount point */ 4284 mp = vfs_busyfs(&fhp.fh_fsid); 4285 if (mp == NULL) 4286 return (ESTALE); 4287 /* now give me my vnode, it gets returned to me locked */ 4288 error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp); 4289 vfs_unbusy(mp); 4290 if (error != 0) 4291 return (error); 4292 4293 error = falloc_noinstall(td, &fp); 4294 if (error != 0) { 4295 vput(vp); 4296 return (error); 4297 } 4298 /* 4299 * An extra reference on `fp' has been held for us by 4300 * falloc_noinstall(). 4301 */ 4302 4303 #ifdef INVARIANTS 4304 td->td_dupfd = -1; 4305 #endif 4306 error = vn_open_vnode(vp, fmode, td->td_ucred, td, fp); 4307 if (error != 0) { 4308 KASSERT(fp->f_ops == &badfileops, 4309 ("VOP_OPEN in fhopen() set f_ops")); 4310 KASSERT(td->td_dupfd < 0, 4311 ("fhopen() encountered fdopen()")); 4312 4313 vput(vp); 4314 goto bad; 4315 } 4316 #ifdef INVARIANTS 4317 td->td_dupfd = 0; 4318 #endif 4319 fp->f_vnode = vp; 4320 fp->f_seqcount = 1; 4321 finit(fp, (fmode & FMASK) | (fp->f_flag & FHASLOCK), DTYPE_VNODE, vp, 4322 &vnops); 4323 VOP_UNLOCK(vp, 0); 4324 if ((fmode & O_TRUNC) != 0) { 4325 error = fo_truncate(fp, 0, td->td_ucred, td); 4326 if (error != 0) 4327 goto bad; 4328 } 4329 4330 error = finstall(td, fp, &indx, fmode, NULL); 4331 bad: 4332 fdrop(fp, td); 4333 td->td_retval[0] = indx; 4334 return (error); 4335 } 4336 4337 /* 4338 * Stat an (NFS) file handle. 4339 */ 4340 #ifndef _SYS_SYSPROTO_H_ 4341 struct fhstat_args { 4342 struct fhandle *u_fhp; 4343 struct stat *sb; 4344 }; 4345 #endif 4346 int 4347 sys_fhstat(td, uap) 4348 struct thread *td; 4349 register struct fhstat_args /* { 4350 struct fhandle *u_fhp; 4351 struct stat *sb; 4352 } */ *uap; 4353 { 4354 struct stat sb; 4355 struct fhandle fh; 4356 int error; 4357 4358 error = copyin(uap->u_fhp, &fh, sizeof(fh)); 4359 if (error != 0) 4360 return (error); 4361 error = kern_fhstat(td, fh, &sb); 4362 if (error == 0) 4363 error = copyout(&sb, uap->sb, sizeof(sb)); 4364 return (error); 4365 } 4366 4367 int 4368 kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb) 4369 { 4370 struct mount *mp; 4371 struct vnode *vp; 4372 int error; 4373 4374 error = priv_check(td, PRIV_VFS_FHSTAT); 4375 if (error != 0) 4376 return (error); 4377 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4378 return (ESTALE); 4379 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4380 vfs_unbusy(mp); 4381 if (error != 0) 4382 return (error); 4383 error = vn_stat(vp, sb, td->td_ucred, NOCRED, td); 4384 vput(vp); 4385 return (error); 4386 } 4387 4388 /* 4389 * Implement fstatfs() for (NFS) file handles. 4390 */ 4391 #ifndef _SYS_SYSPROTO_H_ 4392 struct fhstatfs_args { 4393 struct fhandle *u_fhp; 4394 struct statfs *buf; 4395 }; 4396 #endif 4397 int 4398 sys_fhstatfs(td, uap) 4399 struct thread *td; 4400 struct fhstatfs_args /* { 4401 struct fhandle *u_fhp; 4402 struct statfs *buf; 4403 } */ *uap; 4404 { 4405 struct statfs sf; 4406 fhandle_t fh; 4407 int error; 4408 4409 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4410 if (error != 0) 4411 return (error); 4412 error = kern_fhstatfs(td, fh, &sf); 4413 if (error != 0) 4414 return (error); 4415 return (copyout(&sf, uap->buf, sizeof(sf))); 4416 } 4417 4418 int 4419 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf) 4420 { 4421 struct statfs *sp; 4422 struct mount *mp; 4423 struct vnode *vp; 4424 int error; 4425 4426 error = priv_check(td, PRIV_VFS_FHSTATFS); 4427 if (error != 0) 4428 return (error); 4429 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4430 return (ESTALE); 4431 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4432 if (error != 0) { 4433 vfs_unbusy(mp); 4434 return (error); 4435 } 4436 vput(vp); 4437 error = prison_canseemount(td->td_ucred, mp); 4438 if (error != 0) 4439 goto out; 4440 #ifdef MAC 4441 error = mac_mount_check_stat(td->td_ucred, mp); 4442 if (error != 0) 4443 goto out; 4444 #endif 4445 /* 4446 * Set these in case the underlying filesystem fails to do so. 4447 */ 4448 sp = &mp->mnt_stat; 4449 sp->f_version = STATFS_VERSION; 4450 sp->f_namemax = NAME_MAX; 4451 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 4452 error = VFS_STATFS(mp, sp); 4453 if (error == 0) 4454 *buf = *sp; 4455 out: 4456 vfs_unbusy(mp); 4457 return (error); 4458 } 4459 4460 int 4461 kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len) 4462 { 4463 struct file *fp; 4464 struct mount *mp; 4465 struct vnode *vp; 4466 cap_rights_t rights; 4467 off_t olen, ooffset; 4468 int error; 4469 4470 if (offset < 0 || len <= 0) 4471 return (EINVAL); 4472 /* Check for wrap. */ 4473 if (offset > OFF_MAX - len) 4474 return (EFBIG); 4475 error = fget(td, fd, cap_rights_init(&rights, CAP_WRITE), &fp); 4476 if (error != 0) 4477 return (error); 4478 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { 4479 error = ESPIPE; 4480 goto out; 4481 } 4482 if ((fp->f_flag & FWRITE) == 0) { 4483 error = EBADF; 4484 goto out; 4485 } 4486 if (fp->f_type != DTYPE_VNODE) { 4487 error = ENODEV; 4488 goto out; 4489 } 4490 vp = fp->f_vnode; 4491 if (vp->v_type != VREG) { 4492 error = ENODEV; 4493 goto out; 4494 } 4495 4496 /* Allocating blocks may take a long time, so iterate. */ 4497 for (;;) { 4498 olen = len; 4499 ooffset = offset; 4500 4501 bwillwrite(); 4502 mp = NULL; 4503 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 4504 if (error != 0) 4505 break; 4506 error = vn_lock(vp, LK_EXCLUSIVE); 4507 if (error != 0) { 4508 vn_finished_write(mp); 4509 break; 4510 } 4511 #ifdef MAC 4512 error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp); 4513 if (error == 0) 4514 #endif 4515 error = VOP_ALLOCATE(vp, &offset, &len); 4516 VOP_UNLOCK(vp, 0); 4517 vn_finished_write(mp); 4518 4519 if (olen + ooffset != offset + len) { 4520 panic("offset + len changed from %jx/%jx to %jx/%jx", 4521 ooffset, olen, offset, len); 4522 } 4523 if (error != 0 || len == 0) 4524 break; 4525 KASSERT(olen > len, ("Iteration did not make progress?")); 4526 maybe_yield(); 4527 } 4528 out: 4529 fdrop(fp, td); 4530 return (error); 4531 } 4532 4533 int 4534 sys_posix_fallocate(struct thread *td, struct posix_fallocate_args *uap) 4535 { 4536 4537 td->td_retval[0] = kern_posix_fallocate(td, uap->fd, uap->offset, 4538 uap->len); 4539 return (0); 4540 } 4541 4542 /* 4543 * Unlike madvise(2), we do not make a best effort to remember every 4544 * possible caching hint. Instead, we remember the last setting with 4545 * the exception that we will allow POSIX_FADV_NORMAL to adjust the 4546 * region of any current setting. 4547 */ 4548 int 4549 kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len, 4550 int advice) 4551 { 4552 struct fadvise_info *fa, *new; 4553 struct file *fp; 4554 struct vnode *vp; 4555 cap_rights_t rights; 4556 off_t end; 4557 int error; 4558 4559 if (offset < 0 || len < 0 || offset > OFF_MAX - len) 4560 return (EINVAL); 4561 switch (advice) { 4562 case POSIX_FADV_SEQUENTIAL: 4563 case POSIX_FADV_RANDOM: 4564 case POSIX_FADV_NOREUSE: 4565 new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK); 4566 break; 4567 case POSIX_FADV_NORMAL: 4568 case POSIX_FADV_WILLNEED: 4569 case POSIX_FADV_DONTNEED: 4570 new = NULL; 4571 break; 4572 default: 4573 return (EINVAL); 4574 } 4575 /* XXX: CAP_POSIX_FADVISE? */ 4576 error = fget(td, fd, cap_rights_init(&rights), &fp); 4577 if (error != 0) 4578 goto out; 4579 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { 4580 error = ESPIPE; 4581 goto out; 4582 } 4583 if (fp->f_type != DTYPE_VNODE) { 4584 error = ENODEV; 4585 goto out; 4586 } 4587 vp = fp->f_vnode; 4588 if (vp->v_type != VREG) { 4589 error = ENODEV; 4590 goto out; 4591 } 4592 if (len == 0) 4593 end = OFF_MAX; 4594 else 4595 end = offset + len - 1; 4596 switch (advice) { 4597 case POSIX_FADV_SEQUENTIAL: 4598 case POSIX_FADV_RANDOM: 4599 case POSIX_FADV_NOREUSE: 4600 /* 4601 * Try to merge any existing non-standard region with 4602 * this new region if possible, otherwise create a new 4603 * non-standard region for this request. 4604 */ 4605 mtx_pool_lock(mtxpool_sleep, fp); 4606 fa = fp->f_advice; 4607 if (fa != NULL && fa->fa_advice == advice && 4608 ((fa->fa_start <= end && fa->fa_end >= offset) || 4609 (end != OFF_MAX && fa->fa_start == end + 1) || 4610 (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) { 4611 if (offset < fa->fa_start) 4612 fa->fa_start = offset; 4613 if (end > fa->fa_end) 4614 fa->fa_end = end; 4615 } else { 4616 new->fa_advice = advice; 4617 new->fa_start = offset; 4618 new->fa_end = end; 4619 fp->f_advice = new; 4620 new = fa; 4621 } 4622 mtx_pool_unlock(mtxpool_sleep, fp); 4623 break; 4624 case POSIX_FADV_NORMAL: 4625 /* 4626 * If a the "normal" region overlaps with an existing 4627 * non-standard region, trim or remove the 4628 * non-standard region. 4629 */ 4630 mtx_pool_lock(mtxpool_sleep, fp); 4631 fa = fp->f_advice; 4632 if (fa != NULL) { 4633 if (offset <= fa->fa_start && end >= fa->fa_end) { 4634 new = fa; 4635 fp->f_advice = NULL; 4636 } else if (offset <= fa->fa_start && 4637 end >= fa->fa_start) 4638 fa->fa_start = end + 1; 4639 else if (offset <= fa->fa_end && end >= fa->fa_end) 4640 fa->fa_end = offset - 1; 4641 else if (offset >= fa->fa_start && end <= fa->fa_end) { 4642 /* 4643 * If the "normal" region is a middle 4644 * portion of the existing 4645 * non-standard region, just remove 4646 * the whole thing rather than picking 4647 * one side or the other to 4648 * preserve. 4649 */ 4650 new = fa; 4651 fp->f_advice = NULL; 4652 } 4653 } 4654 mtx_pool_unlock(mtxpool_sleep, fp); 4655 break; 4656 case POSIX_FADV_WILLNEED: 4657 case POSIX_FADV_DONTNEED: 4658 error = VOP_ADVISE(vp, offset, end, advice); 4659 break; 4660 } 4661 out: 4662 if (fp != NULL) 4663 fdrop(fp, td); 4664 free(new, M_FADVISE); 4665 return (error); 4666 } 4667 4668 int 4669 sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap) 4670 { 4671 4672 td->td_retval[0] = kern_posix_fadvise(td, uap->fd, uap->offset, 4673 uap->len, uap->advice); 4674 return (0); 4675 } 4676