1 /*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include "opt_capsicum.h" 41 #include "opt_compat.h" 42 #include "opt_ktrace.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/bio.h> 47 #include <sys/buf.h> 48 #include <sys/capsicum.h> 49 #include <sys/disk.h> 50 #include <sys/sysent.h> 51 #include <sys/malloc.h> 52 #include <sys/mount.h> 53 #include <sys/mutex.h> 54 #include <sys/sysproto.h> 55 #include <sys/namei.h> 56 #include <sys/filedesc.h> 57 #include <sys/kernel.h> 58 #include <sys/fcntl.h> 59 #include <sys/file.h> 60 #include <sys/filio.h> 61 #include <sys/limits.h> 62 #include <sys/linker.h> 63 #include <sys/rwlock.h> 64 #include <sys/sdt.h> 65 #include <sys/stat.h> 66 #include <sys/sx.h> 67 #include <sys/unistd.h> 68 #include <sys/vnode.h> 69 #include <sys/priv.h> 70 #include <sys/proc.h> 71 #include <sys/dirent.h> 72 #include <sys/jail.h> 73 #include <sys/syscallsubr.h> 74 #include <sys/sysctl.h> 75 #ifdef KTRACE 76 #include <sys/ktrace.h> 77 #endif 78 79 #include <machine/stdarg.h> 80 81 #include <security/audit/audit.h> 82 #include <security/mac/mac_framework.h> 83 84 #include <vm/vm.h> 85 #include <vm/vm_object.h> 86 #include <vm/vm_page.h> 87 #include <vm/uma.h> 88 89 #include <ufs/ufs/quota.h> 90 91 MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information"); 92 93 SDT_PROVIDER_DEFINE(vfs); 94 SDT_PROBE_DEFINE2(vfs, , stat, mode, "char *", "int"); 95 SDT_PROBE_DEFINE2(vfs, , stat, reg, "char *", "int"); 96 97 static int kern_chflagsat(struct thread *td, int fd, const char *path, 98 enum uio_seg pathseg, u_long flags, int atflag); 99 static int setfflags(struct thread *td, struct vnode *, u_long); 100 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *); 101 static int getutimens(const struct timespec *, enum uio_seg, 102 struct timespec *, int *); 103 static int setutimes(struct thread *td, struct vnode *, 104 const struct timespec *, int, int); 105 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 106 struct thread *td); 107 108 /* 109 * Sync each mounted filesystem. 110 */ 111 #ifndef _SYS_SYSPROTO_H_ 112 struct sync_args { 113 int dummy; 114 }; 115 #endif 116 /* ARGSUSED */ 117 int 118 sys_sync(td, uap) 119 struct thread *td; 120 struct sync_args *uap; 121 { 122 struct mount *mp, *nmp; 123 int save; 124 125 mtx_lock(&mountlist_mtx); 126 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 127 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 128 nmp = TAILQ_NEXT(mp, mnt_list); 129 continue; 130 } 131 if ((mp->mnt_flag & MNT_RDONLY) == 0 && 132 vn_start_write(NULL, &mp, V_NOWAIT) == 0) { 133 save = curthread_pflags_set(TDP_SYNCIO); 134 vfs_msync(mp, MNT_NOWAIT); 135 VFS_SYNC(mp, MNT_NOWAIT); 136 curthread_pflags_restore(save); 137 vn_finished_write(mp); 138 } 139 mtx_lock(&mountlist_mtx); 140 nmp = TAILQ_NEXT(mp, mnt_list); 141 vfs_unbusy(mp); 142 } 143 mtx_unlock(&mountlist_mtx); 144 return (0); 145 } 146 147 /* 148 * Change filesystem quotas. 149 */ 150 #ifndef _SYS_SYSPROTO_H_ 151 struct quotactl_args { 152 char *path; 153 int cmd; 154 int uid; 155 caddr_t arg; 156 }; 157 #endif 158 int 159 sys_quotactl(td, uap) 160 struct thread *td; 161 register struct quotactl_args /* { 162 char *path; 163 int cmd; 164 int uid; 165 caddr_t arg; 166 } */ *uap; 167 { 168 struct mount *mp; 169 struct nameidata nd; 170 int error; 171 172 AUDIT_ARG_CMD(uap->cmd); 173 AUDIT_ARG_UID(uap->uid); 174 if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS)) 175 return (EPERM); 176 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 177 uap->path, td); 178 if ((error = namei(&nd)) != 0) 179 return (error); 180 NDFREE(&nd, NDF_ONLY_PNBUF); 181 mp = nd.ni_vp->v_mount; 182 vfs_ref(mp); 183 vput(nd.ni_vp); 184 error = vfs_busy(mp, 0); 185 vfs_rel(mp); 186 if (error != 0) 187 return (error); 188 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg); 189 190 /* 191 * Since quota on operation typically needs to open quota 192 * file, the Q_QUOTAON handler needs to unbusy the mount point 193 * before calling into namei. Otherwise, unmount might be 194 * started between two vfs_busy() invocations (first is our, 195 * second is from mount point cross-walk code in lookup()), 196 * causing deadlock. 197 * 198 * Require that Q_QUOTAON handles the vfs_busy() reference on 199 * its own, always returning with ubusied mount point. 200 */ 201 if ((uap->cmd >> SUBCMDSHIFT) != Q_QUOTAON) 202 vfs_unbusy(mp); 203 return (error); 204 } 205 206 /* 207 * Used by statfs conversion routines to scale the block size up if 208 * necessary so that all of the block counts are <= 'max_size'. Note 209 * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero 210 * value of 'n'. 211 */ 212 void 213 statfs_scale_blocks(struct statfs *sf, long max_size) 214 { 215 uint64_t count; 216 int shift; 217 218 KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__)); 219 220 /* 221 * Attempt to scale the block counts to give a more accurate 222 * overview to userland of the ratio of free space to used 223 * space. To do this, find the largest block count and compute 224 * a divisor that lets it fit into a signed integer <= max_size. 225 */ 226 if (sf->f_bavail < 0) 227 count = -sf->f_bavail; 228 else 229 count = sf->f_bavail; 230 count = MAX(sf->f_blocks, MAX(sf->f_bfree, count)); 231 if (count <= max_size) 232 return; 233 234 count >>= flsl(max_size); 235 shift = 0; 236 while (count > 0) { 237 shift++; 238 count >>=1; 239 } 240 241 sf->f_bsize <<= shift; 242 sf->f_blocks >>= shift; 243 sf->f_bfree >>= shift; 244 sf->f_bavail >>= shift; 245 } 246 247 static int 248 kern_do_statfs(struct thread *td, struct mount *mp, struct statfs *buf) 249 { 250 struct statfs *sp; 251 int error; 252 253 if (mp == NULL) 254 return (EBADF); 255 error = vfs_busy(mp, 0); 256 vfs_rel(mp); 257 if (error != 0) 258 return (error); 259 #ifdef MAC 260 error = mac_mount_check_stat(td->td_ucred, mp); 261 if (error != 0) 262 goto out; 263 #endif 264 /* 265 * Set these in case the underlying filesystem fails to do so. 266 */ 267 sp = &mp->mnt_stat; 268 sp->f_version = STATFS_VERSION; 269 sp->f_namemax = NAME_MAX; 270 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 271 error = VFS_STATFS(mp, sp); 272 if (error != 0) 273 goto out; 274 *buf = *sp; 275 if (priv_check(td, PRIV_VFS_GENERATION)) { 276 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 277 prison_enforce_statfs(td->td_ucred, mp, buf); 278 } 279 out: 280 vfs_unbusy(mp); 281 return (error); 282 } 283 284 /* 285 * Get filesystem statistics. 286 */ 287 #ifndef _SYS_SYSPROTO_H_ 288 struct statfs_args { 289 char *path; 290 struct statfs *buf; 291 }; 292 #endif 293 int 294 sys_statfs(td, uap) 295 struct thread *td; 296 register struct statfs_args /* { 297 char *path; 298 struct statfs *buf; 299 } */ *uap; 300 { 301 struct statfs *sfp; 302 int error; 303 304 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 305 error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); 306 if (error == 0) 307 error = copyout(sfp, uap->buf, sizeof(struct statfs)); 308 free(sfp, M_STATFS); 309 return (error); 310 } 311 312 int 313 kern_statfs(struct thread *td, char *path, enum uio_seg pathseg, 314 struct statfs *buf) 315 { 316 struct mount *mp; 317 struct nameidata nd; 318 int error; 319 320 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 321 pathseg, path, td); 322 error = namei(&nd); 323 if (error != 0) 324 return (error); 325 mp = nd.ni_vp->v_mount; 326 vfs_ref(mp); 327 NDFREE(&nd, NDF_ONLY_PNBUF); 328 vput(nd.ni_vp); 329 return (kern_do_statfs(td, mp, buf)); 330 } 331 332 /* 333 * Get filesystem statistics. 334 */ 335 #ifndef _SYS_SYSPROTO_H_ 336 struct fstatfs_args { 337 int fd; 338 struct statfs *buf; 339 }; 340 #endif 341 int 342 sys_fstatfs(td, uap) 343 struct thread *td; 344 register struct fstatfs_args /* { 345 int fd; 346 struct statfs *buf; 347 } */ *uap; 348 { 349 struct statfs *sfp; 350 int error; 351 352 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 353 error = kern_fstatfs(td, uap->fd, sfp); 354 if (error == 0) 355 error = copyout(sfp, uap->buf, sizeof(struct statfs)); 356 free(sfp, M_STATFS); 357 return (error); 358 } 359 360 int 361 kern_fstatfs(struct thread *td, int fd, struct statfs *buf) 362 { 363 struct file *fp; 364 struct mount *mp; 365 struct vnode *vp; 366 cap_rights_t rights; 367 int error; 368 369 AUDIT_ARG_FD(fd); 370 error = getvnode(td, fd, cap_rights_init(&rights, CAP_FSTATFS), &fp); 371 if (error != 0) 372 return (error); 373 vp = fp->f_vnode; 374 vn_lock(vp, LK_SHARED | LK_RETRY); 375 #ifdef AUDIT 376 AUDIT_ARG_VNODE1(vp); 377 #endif 378 mp = vp->v_mount; 379 if (mp != NULL) 380 vfs_ref(mp); 381 VOP_UNLOCK(vp, 0); 382 fdrop(fp, td); 383 return (kern_do_statfs(td, mp, buf)); 384 } 385 386 /* 387 * Get statistics on all filesystems. 388 */ 389 #ifndef _SYS_SYSPROTO_H_ 390 struct getfsstat_args { 391 struct statfs *buf; 392 long bufsize; 393 int mode; 394 }; 395 #endif 396 int 397 sys_getfsstat(td, uap) 398 struct thread *td; 399 register struct getfsstat_args /* { 400 struct statfs *buf; 401 long bufsize; 402 int mode; 403 } */ *uap; 404 { 405 size_t count; 406 int error; 407 408 if (uap->bufsize < 0 || uap->bufsize > SIZE_MAX) 409 return (EINVAL); 410 error = kern_getfsstat(td, &uap->buf, uap->bufsize, &count, 411 UIO_USERSPACE, uap->mode); 412 if (error == 0) 413 td->td_retval[0] = count; 414 return (error); 415 } 416 417 /* 418 * If (bufsize > 0 && bufseg == UIO_SYSSPACE) 419 * The caller is responsible for freeing memory which will be allocated 420 * in '*buf'. 421 */ 422 int 423 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize, 424 size_t *countp, enum uio_seg bufseg, int mode) 425 { 426 struct mount *mp, *nmp; 427 struct statfs *sfsp, *sp, *sptmp, *tofree; 428 size_t count, maxcount; 429 int error; 430 431 switch (mode) { 432 case MNT_WAIT: 433 case MNT_NOWAIT: 434 break; 435 default: 436 if (bufseg == UIO_SYSSPACE) 437 *buf = NULL; 438 return (EINVAL); 439 } 440 restart: 441 maxcount = bufsize / sizeof(struct statfs); 442 if (bufsize == 0) { 443 sfsp = NULL; 444 tofree = NULL; 445 } else if (bufseg == UIO_USERSPACE) { 446 sfsp = *buf; 447 tofree = NULL; 448 } else /* if (bufseg == UIO_SYSSPACE) */ { 449 count = 0; 450 mtx_lock(&mountlist_mtx); 451 TAILQ_FOREACH(mp, &mountlist, mnt_list) { 452 count++; 453 } 454 mtx_unlock(&mountlist_mtx); 455 if (maxcount > count) 456 maxcount = count; 457 tofree = sfsp = *buf = malloc(maxcount * sizeof(struct statfs), 458 M_STATFS, M_WAITOK); 459 } 460 count = 0; 461 mtx_lock(&mountlist_mtx); 462 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 463 if (prison_canseemount(td->td_ucred, mp) != 0) { 464 nmp = TAILQ_NEXT(mp, mnt_list); 465 continue; 466 } 467 #ifdef MAC 468 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 469 nmp = TAILQ_NEXT(mp, mnt_list); 470 continue; 471 } 472 #endif 473 if (mode == MNT_WAIT) { 474 if (vfs_busy(mp, MBF_MNTLSTLOCK) != 0) { 475 /* 476 * If vfs_busy() failed, and MBF_NOWAIT 477 * wasn't passed, then the mp is gone. 478 * Furthermore, because of MBF_MNTLSTLOCK, 479 * the mountlist_mtx was dropped. We have 480 * no other choice than to start over. 481 */ 482 mtx_unlock(&mountlist_mtx); 483 free(tofree, M_STATFS); 484 goto restart; 485 } 486 } else { 487 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK) != 0) { 488 nmp = TAILQ_NEXT(mp, mnt_list); 489 continue; 490 } 491 } 492 if (sfsp != NULL && count < maxcount) { 493 sp = &mp->mnt_stat; 494 /* 495 * Set these in case the underlying filesystem 496 * fails to do so. 497 */ 498 sp->f_version = STATFS_VERSION; 499 sp->f_namemax = NAME_MAX; 500 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 501 /* 502 * If MNT_NOWAIT is specified, do not refresh 503 * the fsstat cache. 504 */ 505 if (mode != MNT_NOWAIT) { 506 error = VFS_STATFS(mp, sp); 507 if (error != 0) { 508 mtx_lock(&mountlist_mtx); 509 nmp = TAILQ_NEXT(mp, mnt_list); 510 vfs_unbusy(mp); 511 continue; 512 } 513 } 514 if (priv_check(td, PRIV_VFS_GENERATION)) { 515 sptmp = malloc(sizeof(struct statfs), M_STATFS, 516 M_WAITOK); 517 *sptmp = *sp; 518 sptmp->f_fsid.val[0] = sptmp->f_fsid.val[1] = 0; 519 prison_enforce_statfs(td->td_ucred, mp, sptmp); 520 sp = sptmp; 521 } else 522 sptmp = NULL; 523 if (bufseg == UIO_SYSSPACE) { 524 bcopy(sp, sfsp, sizeof(*sp)); 525 free(sptmp, M_STATFS); 526 } else /* if (bufseg == UIO_USERSPACE) */ { 527 error = copyout(sp, sfsp, sizeof(*sp)); 528 free(sptmp, M_STATFS); 529 if (error != 0) { 530 vfs_unbusy(mp); 531 return (error); 532 } 533 } 534 sfsp++; 535 } 536 count++; 537 mtx_lock(&mountlist_mtx); 538 nmp = TAILQ_NEXT(mp, mnt_list); 539 vfs_unbusy(mp); 540 } 541 mtx_unlock(&mountlist_mtx); 542 if (sfsp != NULL && count > maxcount) 543 *countp = maxcount; 544 else 545 *countp = count; 546 return (0); 547 } 548 549 #ifdef COMPAT_FREEBSD4 550 /* 551 * Get old format filesystem statistics. 552 */ 553 static void cvtstatfs(struct statfs *, struct ostatfs *); 554 555 #ifndef _SYS_SYSPROTO_H_ 556 struct freebsd4_statfs_args { 557 char *path; 558 struct ostatfs *buf; 559 }; 560 #endif 561 int 562 freebsd4_statfs(td, uap) 563 struct thread *td; 564 struct freebsd4_statfs_args /* { 565 char *path; 566 struct ostatfs *buf; 567 } */ *uap; 568 { 569 struct ostatfs osb; 570 struct statfs *sfp; 571 int error; 572 573 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 574 error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); 575 if (error == 0) { 576 cvtstatfs(sfp, &osb); 577 error = copyout(&osb, uap->buf, sizeof(osb)); 578 } 579 free(sfp, M_STATFS); 580 return (error); 581 } 582 583 /* 584 * Get filesystem statistics. 585 */ 586 #ifndef _SYS_SYSPROTO_H_ 587 struct freebsd4_fstatfs_args { 588 int fd; 589 struct ostatfs *buf; 590 }; 591 #endif 592 int 593 freebsd4_fstatfs(td, uap) 594 struct thread *td; 595 struct freebsd4_fstatfs_args /* { 596 int fd; 597 struct ostatfs *buf; 598 } */ *uap; 599 { 600 struct ostatfs osb; 601 struct statfs *sfp; 602 int error; 603 604 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 605 error = kern_fstatfs(td, uap->fd, sfp); 606 if (error == 0) { 607 cvtstatfs(sfp, &osb); 608 error = copyout(&osb, uap->buf, sizeof(osb)); 609 } 610 free(sfp, M_STATFS); 611 return (error); 612 } 613 614 /* 615 * Get statistics on all filesystems. 616 */ 617 #ifndef _SYS_SYSPROTO_H_ 618 struct freebsd4_getfsstat_args { 619 struct ostatfs *buf; 620 long bufsize; 621 int mode; 622 }; 623 #endif 624 int 625 freebsd4_getfsstat(td, uap) 626 struct thread *td; 627 register struct freebsd4_getfsstat_args /* { 628 struct ostatfs *buf; 629 long bufsize; 630 int mode; 631 } */ *uap; 632 { 633 struct statfs *buf, *sp; 634 struct ostatfs osb; 635 size_t count, size; 636 int error; 637 638 if (uap->bufsize < 0) 639 return (EINVAL); 640 count = uap->bufsize / sizeof(struct ostatfs); 641 if (count > SIZE_MAX / sizeof(struct statfs)) 642 return (EINVAL); 643 size = count * sizeof(struct statfs); 644 error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, 645 uap->mode); 646 td->td_retval[0] = count; 647 if (size != 0) { 648 sp = buf; 649 while (count != 0 && error == 0) { 650 cvtstatfs(sp, &osb); 651 error = copyout(&osb, uap->buf, sizeof(osb)); 652 sp++; 653 uap->buf++; 654 count--; 655 } 656 free(buf, M_STATFS); 657 } 658 return (error); 659 } 660 661 /* 662 * Implement fstatfs() for (NFS) file handles. 663 */ 664 #ifndef _SYS_SYSPROTO_H_ 665 struct freebsd4_fhstatfs_args { 666 struct fhandle *u_fhp; 667 struct ostatfs *buf; 668 }; 669 #endif 670 int 671 freebsd4_fhstatfs(td, uap) 672 struct thread *td; 673 struct freebsd4_fhstatfs_args /* { 674 struct fhandle *u_fhp; 675 struct ostatfs *buf; 676 } */ *uap; 677 { 678 struct ostatfs osb; 679 struct statfs *sfp; 680 fhandle_t fh; 681 int error; 682 683 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 684 if (error != 0) 685 return (error); 686 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 687 error = kern_fhstatfs(td, fh, sfp); 688 if (error == 0) { 689 cvtstatfs(sfp, &osb); 690 error = copyout(&osb, uap->buf, sizeof(osb)); 691 } 692 free(sfp, M_STATFS); 693 return (error); 694 } 695 696 /* 697 * Convert a new format statfs structure to an old format statfs structure. 698 */ 699 static void 700 cvtstatfs(nsp, osp) 701 struct statfs *nsp; 702 struct ostatfs *osp; 703 { 704 705 statfs_scale_blocks(nsp, LONG_MAX); 706 bzero(osp, sizeof(*osp)); 707 osp->f_bsize = nsp->f_bsize; 708 osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX); 709 osp->f_blocks = nsp->f_blocks; 710 osp->f_bfree = nsp->f_bfree; 711 osp->f_bavail = nsp->f_bavail; 712 osp->f_files = MIN(nsp->f_files, LONG_MAX); 713 osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX); 714 osp->f_owner = nsp->f_owner; 715 osp->f_type = nsp->f_type; 716 osp->f_flags = nsp->f_flags; 717 osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX); 718 osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX); 719 osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX); 720 osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX); 721 strlcpy(osp->f_fstypename, nsp->f_fstypename, 722 MIN(MFSNAMELEN, OMFSNAMELEN)); 723 strlcpy(osp->f_mntonname, nsp->f_mntonname, 724 MIN(MNAMELEN, OMNAMELEN)); 725 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 726 MIN(MNAMELEN, OMNAMELEN)); 727 osp->f_fsid = nsp->f_fsid; 728 } 729 #endif /* COMPAT_FREEBSD4 */ 730 731 /* 732 * Change current working directory to a given file descriptor. 733 */ 734 #ifndef _SYS_SYSPROTO_H_ 735 struct fchdir_args { 736 int fd; 737 }; 738 #endif 739 int 740 sys_fchdir(td, uap) 741 struct thread *td; 742 struct fchdir_args /* { 743 int fd; 744 } */ *uap; 745 { 746 struct vnode *vp, *tdp; 747 struct mount *mp; 748 struct file *fp; 749 cap_rights_t rights; 750 int error; 751 752 AUDIT_ARG_FD(uap->fd); 753 error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_FCHDIR), 754 &fp); 755 if (error != 0) 756 return (error); 757 vp = fp->f_vnode; 758 vrefact(vp); 759 fdrop(fp, td); 760 vn_lock(vp, LK_SHARED | LK_RETRY); 761 AUDIT_ARG_VNODE1(vp); 762 error = change_dir(vp, td); 763 while (!error && (mp = vp->v_mountedhere) != NULL) { 764 if (vfs_busy(mp, 0)) 765 continue; 766 error = VFS_ROOT(mp, LK_SHARED, &tdp); 767 vfs_unbusy(mp); 768 if (error != 0) 769 break; 770 vput(vp); 771 vp = tdp; 772 } 773 if (error != 0) { 774 vput(vp); 775 return (error); 776 } 777 VOP_UNLOCK(vp, 0); 778 pwd_chdir(td, vp); 779 return (0); 780 } 781 782 /* 783 * Change current working directory (``.''). 784 */ 785 #ifndef _SYS_SYSPROTO_H_ 786 struct chdir_args { 787 char *path; 788 }; 789 #endif 790 int 791 sys_chdir(td, uap) 792 struct thread *td; 793 struct chdir_args /* { 794 char *path; 795 } */ *uap; 796 { 797 798 return (kern_chdir(td, uap->path, UIO_USERSPACE)); 799 } 800 801 int 802 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg) 803 { 804 struct nameidata nd; 805 int error; 806 807 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 808 pathseg, path, td); 809 if ((error = namei(&nd)) != 0) 810 return (error); 811 if ((error = change_dir(nd.ni_vp, td)) != 0) { 812 vput(nd.ni_vp); 813 NDFREE(&nd, NDF_ONLY_PNBUF); 814 return (error); 815 } 816 VOP_UNLOCK(nd.ni_vp, 0); 817 NDFREE(&nd, NDF_ONLY_PNBUF); 818 pwd_chdir(td, nd.ni_vp); 819 return (0); 820 } 821 822 /* 823 * Change notion of root (``/'') directory. 824 */ 825 #ifndef _SYS_SYSPROTO_H_ 826 struct chroot_args { 827 char *path; 828 }; 829 #endif 830 int 831 sys_chroot(td, uap) 832 struct thread *td; 833 struct chroot_args /* { 834 char *path; 835 } */ *uap; 836 { 837 struct nameidata nd; 838 int error; 839 840 error = priv_check(td, PRIV_VFS_CHROOT); 841 if (error != 0) 842 return (error); 843 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 844 UIO_USERSPACE, uap->path, td); 845 error = namei(&nd); 846 if (error != 0) 847 goto error; 848 error = change_dir(nd.ni_vp, td); 849 if (error != 0) 850 goto e_vunlock; 851 #ifdef MAC 852 error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp); 853 if (error != 0) 854 goto e_vunlock; 855 #endif 856 VOP_UNLOCK(nd.ni_vp, 0); 857 error = pwd_chroot(td, nd.ni_vp); 858 vrele(nd.ni_vp); 859 NDFREE(&nd, NDF_ONLY_PNBUF); 860 return (error); 861 e_vunlock: 862 vput(nd.ni_vp); 863 error: 864 NDFREE(&nd, NDF_ONLY_PNBUF); 865 return (error); 866 } 867 868 /* 869 * Common routine for chroot and chdir. Callers must provide a locked vnode 870 * instance. 871 */ 872 int 873 change_dir(vp, td) 874 struct vnode *vp; 875 struct thread *td; 876 { 877 #ifdef MAC 878 int error; 879 #endif 880 881 ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked"); 882 if (vp->v_type != VDIR) 883 return (ENOTDIR); 884 #ifdef MAC 885 error = mac_vnode_check_chdir(td->td_ucred, vp); 886 if (error != 0) 887 return (error); 888 #endif 889 return (VOP_ACCESS(vp, VEXEC, td->td_ucred, td)); 890 } 891 892 static __inline void 893 flags_to_rights(int flags, cap_rights_t *rightsp) 894 { 895 896 if (flags & O_EXEC) { 897 cap_rights_set(rightsp, CAP_FEXECVE); 898 } else { 899 switch ((flags & O_ACCMODE)) { 900 case O_RDONLY: 901 cap_rights_set(rightsp, CAP_READ); 902 break; 903 case O_RDWR: 904 cap_rights_set(rightsp, CAP_READ); 905 /* FALLTHROUGH */ 906 case O_WRONLY: 907 cap_rights_set(rightsp, CAP_WRITE); 908 if (!(flags & (O_APPEND | O_TRUNC))) 909 cap_rights_set(rightsp, CAP_SEEK); 910 break; 911 } 912 } 913 914 if (flags & O_CREAT) 915 cap_rights_set(rightsp, CAP_CREATE); 916 917 if (flags & O_TRUNC) 918 cap_rights_set(rightsp, CAP_FTRUNCATE); 919 920 if (flags & (O_SYNC | O_FSYNC)) 921 cap_rights_set(rightsp, CAP_FSYNC); 922 923 if (flags & (O_EXLOCK | O_SHLOCK)) 924 cap_rights_set(rightsp, CAP_FLOCK); 925 } 926 927 /* 928 * Check permissions, allocate an open file structure, and call the device 929 * open routine if any. 930 */ 931 #ifndef _SYS_SYSPROTO_H_ 932 struct open_args { 933 char *path; 934 int flags; 935 int mode; 936 }; 937 #endif 938 int 939 sys_open(td, uap) 940 struct thread *td; 941 register struct open_args /* { 942 char *path; 943 int flags; 944 int mode; 945 } */ *uap; 946 { 947 948 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 949 uap->flags, uap->mode)); 950 } 951 952 #ifndef _SYS_SYSPROTO_H_ 953 struct openat_args { 954 int fd; 955 char *path; 956 int flag; 957 int mode; 958 }; 959 #endif 960 int 961 sys_openat(struct thread *td, struct openat_args *uap) 962 { 963 964 AUDIT_ARG_FD(uap->fd); 965 return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 966 uap->mode)); 967 } 968 969 int 970 kern_openat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 971 int flags, int mode) 972 { 973 struct proc *p = td->td_proc; 974 struct filedesc *fdp = p->p_fd; 975 struct file *fp; 976 struct vnode *vp; 977 struct nameidata nd; 978 cap_rights_t rights; 979 int cmode, error, indx; 980 981 indx = -1; 982 983 AUDIT_ARG_FFLAGS(flags); 984 AUDIT_ARG_MODE(mode); 985 cap_rights_init(&rights, CAP_LOOKUP); 986 flags_to_rights(flags, &rights); 987 /* 988 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags 989 * may be specified. 990 */ 991 if (flags & O_EXEC) { 992 if (flags & O_ACCMODE) 993 return (EINVAL); 994 } else if ((flags & O_ACCMODE) == O_ACCMODE) { 995 return (EINVAL); 996 } else { 997 flags = FFLAGS(flags); 998 } 999 1000 /* 1001 * Allocate a file structure. The descriptor to reference it 1002 * is allocated and set by finstall() below. 1003 */ 1004 error = falloc_noinstall(td, &fp); 1005 if (error != 0) 1006 return (error); 1007 /* 1008 * An extra reference on `fp' has been held for us by 1009 * falloc_noinstall(). 1010 */ 1011 /* Set the flags early so the finit in devfs can pick them up. */ 1012 fp->f_flag = flags & FMASK; 1013 cmode = ((mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; 1014 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 1015 &rights, td); 1016 td->td_dupfd = -1; /* XXX check for fdopen */ 1017 error = vn_open(&nd, &flags, cmode, fp); 1018 if (error != 0) { 1019 /* 1020 * If the vn_open replaced the method vector, something 1021 * wonderous happened deep below and we just pass it up 1022 * pretending we know what we do. 1023 */ 1024 if (error == ENXIO && fp->f_ops != &badfileops) 1025 goto success; 1026 1027 /* 1028 * Handle special fdopen() case. bleh. 1029 * 1030 * Don't do this for relative (capability) lookups; we don't 1031 * understand exactly what would happen, and we don't think 1032 * that it ever should. 1033 */ 1034 if ((nd.ni_lcf & NI_LCF_STRICTRELATIVE) == 0 && 1035 (error == ENODEV || error == ENXIO) && 1036 td->td_dupfd >= 0) { 1037 error = dupfdopen(td, fdp, td->td_dupfd, flags, error, 1038 &indx); 1039 if (error == 0) 1040 goto success; 1041 } 1042 1043 goto bad; 1044 } 1045 td->td_dupfd = 0; 1046 NDFREE(&nd, NDF_ONLY_PNBUF); 1047 vp = nd.ni_vp; 1048 1049 /* 1050 * Store the vnode, for any f_type. Typically, the vnode use 1051 * count is decremented by direct call to vn_closefile() for 1052 * files that switched type in the cdevsw fdopen() method. 1053 */ 1054 fp->f_vnode = vp; 1055 /* 1056 * If the file wasn't claimed by devfs bind it to the normal 1057 * vnode operations here. 1058 */ 1059 if (fp->f_ops == &badfileops) { 1060 KASSERT(vp->v_type != VFIFO, ("Unexpected fifo.")); 1061 fp->f_seqcount = 1; 1062 finit(fp, (flags & FMASK) | (fp->f_flag & FHASLOCK), 1063 DTYPE_VNODE, vp, &vnops); 1064 } 1065 1066 VOP_UNLOCK(vp, 0); 1067 if (flags & O_TRUNC) { 1068 error = fo_truncate(fp, 0, td->td_ucred, td); 1069 if (error != 0) 1070 goto bad; 1071 } 1072 success: 1073 /* 1074 * If we haven't already installed the FD (for dupfdopen), do so now. 1075 */ 1076 if (indx == -1) { 1077 struct filecaps *fcaps; 1078 1079 #ifdef CAPABILITIES 1080 if ((nd.ni_lcf & NI_LCF_STRICTRELATIVE) != 0) 1081 fcaps = &nd.ni_filecaps; 1082 else 1083 #endif 1084 fcaps = NULL; 1085 error = finstall(td, fp, &indx, flags, fcaps); 1086 /* On success finstall() consumes fcaps. */ 1087 if (error != 0) { 1088 filecaps_free(&nd.ni_filecaps); 1089 goto bad; 1090 } 1091 } else { 1092 filecaps_free(&nd.ni_filecaps); 1093 } 1094 1095 /* 1096 * Release our private reference, leaving the one associated with 1097 * the descriptor table intact. 1098 */ 1099 fdrop(fp, td); 1100 td->td_retval[0] = indx; 1101 return (0); 1102 bad: 1103 KASSERT(indx == -1, ("indx=%d, should be -1", indx)); 1104 fdrop(fp, td); 1105 return (error); 1106 } 1107 1108 #ifdef COMPAT_43 1109 /* 1110 * Create a file. 1111 */ 1112 #ifndef _SYS_SYSPROTO_H_ 1113 struct ocreat_args { 1114 char *path; 1115 int mode; 1116 }; 1117 #endif 1118 int 1119 ocreat(td, uap) 1120 struct thread *td; 1121 register struct ocreat_args /* { 1122 char *path; 1123 int mode; 1124 } */ *uap; 1125 { 1126 1127 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1128 O_WRONLY | O_CREAT | O_TRUNC, uap->mode)); 1129 } 1130 #endif /* COMPAT_43 */ 1131 1132 /* 1133 * Create a special file. 1134 */ 1135 #ifndef _SYS_SYSPROTO_H_ 1136 struct mknod_args { 1137 char *path; 1138 int mode; 1139 int dev; 1140 }; 1141 #endif 1142 int 1143 sys_mknod(td, uap) 1144 struct thread *td; 1145 register struct mknod_args /* { 1146 char *path; 1147 int mode; 1148 int dev; 1149 } */ *uap; 1150 { 1151 1152 return (kern_mknodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1153 uap->mode, uap->dev)); 1154 } 1155 1156 #ifndef _SYS_SYSPROTO_H_ 1157 struct mknodat_args { 1158 int fd; 1159 char *path; 1160 mode_t mode; 1161 dev_t dev; 1162 }; 1163 #endif 1164 int 1165 sys_mknodat(struct thread *td, struct mknodat_args *uap) 1166 { 1167 1168 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1169 uap->dev)); 1170 } 1171 1172 int 1173 kern_mknodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1174 int mode, int dev) 1175 { 1176 struct vnode *vp; 1177 struct mount *mp; 1178 struct vattr vattr; 1179 struct nameidata nd; 1180 cap_rights_t rights; 1181 int error, whiteout = 0; 1182 1183 AUDIT_ARG_MODE(mode); 1184 AUDIT_ARG_DEV(dev); 1185 switch (mode & S_IFMT) { 1186 case S_IFCHR: 1187 case S_IFBLK: 1188 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 1189 if (error == 0 && dev == VNOVAL) 1190 error = EINVAL; 1191 break; 1192 case S_IFMT: 1193 error = priv_check(td, PRIV_VFS_MKNOD_BAD); 1194 break; 1195 case S_IFWHT: 1196 error = priv_check(td, PRIV_VFS_MKNOD_WHT); 1197 break; 1198 case S_IFIFO: 1199 if (dev == 0) 1200 return (kern_mkfifoat(td, fd, path, pathseg, mode)); 1201 /* FALLTHROUGH */ 1202 default: 1203 error = EINVAL; 1204 break; 1205 } 1206 if (error != 0) 1207 return (error); 1208 restart: 1209 bwillwrite(); 1210 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1211 NOCACHE, pathseg, path, fd, cap_rights_init(&rights, CAP_MKNODAT), 1212 td); 1213 if ((error = namei(&nd)) != 0) 1214 return (error); 1215 vp = nd.ni_vp; 1216 if (vp != NULL) { 1217 NDFREE(&nd, NDF_ONLY_PNBUF); 1218 if (vp == nd.ni_dvp) 1219 vrele(nd.ni_dvp); 1220 else 1221 vput(nd.ni_dvp); 1222 vrele(vp); 1223 return (EEXIST); 1224 } else { 1225 VATTR_NULL(&vattr); 1226 vattr.va_mode = (mode & ALLPERMS) & 1227 ~td->td_proc->p_fd->fd_cmask; 1228 vattr.va_rdev = dev; 1229 whiteout = 0; 1230 1231 switch (mode & S_IFMT) { 1232 case S_IFMT: /* used by badsect to flag bad sectors */ 1233 vattr.va_type = VBAD; 1234 break; 1235 case S_IFCHR: 1236 vattr.va_type = VCHR; 1237 break; 1238 case S_IFBLK: 1239 vattr.va_type = VBLK; 1240 break; 1241 case S_IFWHT: 1242 whiteout = 1; 1243 break; 1244 default: 1245 panic("kern_mknod: invalid mode"); 1246 } 1247 } 1248 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1249 NDFREE(&nd, NDF_ONLY_PNBUF); 1250 vput(nd.ni_dvp); 1251 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1252 return (error); 1253 goto restart; 1254 } 1255 #ifdef MAC 1256 if (error == 0 && !whiteout) 1257 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, 1258 &nd.ni_cnd, &vattr); 1259 #endif 1260 if (error == 0) { 1261 if (whiteout) 1262 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1263 else { 1264 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1265 &nd.ni_cnd, &vattr); 1266 if (error == 0) 1267 vput(nd.ni_vp); 1268 } 1269 } 1270 NDFREE(&nd, NDF_ONLY_PNBUF); 1271 vput(nd.ni_dvp); 1272 vn_finished_write(mp); 1273 return (error); 1274 } 1275 1276 /* 1277 * Create a named pipe. 1278 */ 1279 #ifndef _SYS_SYSPROTO_H_ 1280 struct mkfifo_args { 1281 char *path; 1282 int mode; 1283 }; 1284 #endif 1285 int 1286 sys_mkfifo(td, uap) 1287 struct thread *td; 1288 register struct mkfifo_args /* { 1289 char *path; 1290 int mode; 1291 } */ *uap; 1292 { 1293 1294 return (kern_mkfifoat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1295 uap->mode)); 1296 } 1297 1298 #ifndef _SYS_SYSPROTO_H_ 1299 struct mkfifoat_args { 1300 int fd; 1301 char *path; 1302 mode_t mode; 1303 }; 1304 #endif 1305 int 1306 sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap) 1307 { 1308 1309 return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE, 1310 uap->mode)); 1311 } 1312 1313 int 1314 kern_mkfifoat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1315 int mode) 1316 { 1317 struct mount *mp; 1318 struct vattr vattr; 1319 struct nameidata nd; 1320 cap_rights_t rights; 1321 int error; 1322 1323 AUDIT_ARG_MODE(mode); 1324 restart: 1325 bwillwrite(); 1326 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1327 NOCACHE, pathseg, path, fd, cap_rights_init(&rights, CAP_MKFIFOAT), 1328 td); 1329 if ((error = namei(&nd)) != 0) 1330 return (error); 1331 if (nd.ni_vp != NULL) { 1332 NDFREE(&nd, NDF_ONLY_PNBUF); 1333 if (nd.ni_vp == nd.ni_dvp) 1334 vrele(nd.ni_dvp); 1335 else 1336 vput(nd.ni_dvp); 1337 vrele(nd.ni_vp); 1338 return (EEXIST); 1339 } 1340 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1341 NDFREE(&nd, NDF_ONLY_PNBUF); 1342 vput(nd.ni_dvp); 1343 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1344 return (error); 1345 goto restart; 1346 } 1347 VATTR_NULL(&vattr); 1348 vattr.va_type = VFIFO; 1349 vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask; 1350 #ifdef MAC 1351 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1352 &vattr); 1353 if (error != 0) 1354 goto out; 1355 #endif 1356 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 1357 if (error == 0) 1358 vput(nd.ni_vp); 1359 #ifdef MAC 1360 out: 1361 #endif 1362 vput(nd.ni_dvp); 1363 vn_finished_write(mp); 1364 NDFREE(&nd, NDF_ONLY_PNBUF); 1365 return (error); 1366 } 1367 1368 /* 1369 * Make a hard file link. 1370 */ 1371 #ifndef _SYS_SYSPROTO_H_ 1372 struct link_args { 1373 char *path; 1374 char *link; 1375 }; 1376 #endif 1377 int 1378 sys_link(td, uap) 1379 struct thread *td; 1380 register struct link_args /* { 1381 char *path; 1382 char *link; 1383 } */ *uap; 1384 { 1385 1386 return (kern_linkat(td, AT_FDCWD, AT_FDCWD, uap->path, uap->link, 1387 UIO_USERSPACE, FOLLOW)); 1388 } 1389 1390 #ifndef _SYS_SYSPROTO_H_ 1391 struct linkat_args { 1392 int fd1; 1393 char *path1; 1394 int fd2; 1395 char *path2; 1396 int flag; 1397 }; 1398 #endif 1399 int 1400 sys_linkat(struct thread *td, struct linkat_args *uap) 1401 { 1402 int flag; 1403 1404 flag = uap->flag; 1405 if (flag & ~AT_SYMLINK_FOLLOW) 1406 return (EINVAL); 1407 1408 return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2, 1409 UIO_USERSPACE, (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW)); 1410 } 1411 1412 int hardlink_check_uid = 0; 1413 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 1414 &hardlink_check_uid, 0, 1415 "Unprivileged processes cannot create hard links to files owned by other " 1416 "users"); 1417 static int hardlink_check_gid = 0; 1418 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 1419 &hardlink_check_gid, 0, 1420 "Unprivileged processes cannot create hard links to files owned by other " 1421 "groups"); 1422 1423 static int 1424 can_hardlink(struct vnode *vp, struct ucred *cred) 1425 { 1426 struct vattr va; 1427 int error; 1428 1429 if (!hardlink_check_uid && !hardlink_check_gid) 1430 return (0); 1431 1432 error = VOP_GETATTR(vp, &va, cred); 1433 if (error != 0) 1434 return (error); 1435 1436 if (hardlink_check_uid && cred->cr_uid != va.va_uid) { 1437 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1438 if (error != 0) 1439 return (error); 1440 } 1441 1442 if (hardlink_check_gid && !groupmember(va.va_gid, cred)) { 1443 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1444 if (error != 0) 1445 return (error); 1446 } 1447 1448 return (0); 1449 } 1450 1451 int 1452 kern_linkat(struct thread *td, int fd1, int fd2, char *path1, char *path2, 1453 enum uio_seg segflg, int follow) 1454 { 1455 struct vnode *vp; 1456 struct mount *mp; 1457 struct nameidata nd; 1458 cap_rights_t rights; 1459 int error; 1460 1461 again: 1462 bwillwrite(); 1463 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, segflg, path1, fd1, 1464 cap_rights_init(&rights, CAP_LINKAT_SOURCE), td); 1465 1466 if ((error = namei(&nd)) != 0) 1467 return (error); 1468 NDFREE(&nd, NDF_ONLY_PNBUF); 1469 vp = nd.ni_vp; 1470 if (vp->v_type == VDIR) { 1471 vrele(vp); 1472 return (EPERM); /* POSIX */ 1473 } 1474 NDINIT_ATRIGHTS(&nd, CREATE, 1475 LOCKPARENT | SAVENAME | AUDITVNODE2 | NOCACHE, segflg, path2, fd2, 1476 cap_rights_init(&rights, CAP_LINKAT_TARGET), td); 1477 if ((error = namei(&nd)) == 0) { 1478 if (nd.ni_vp != NULL) { 1479 NDFREE(&nd, NDF_ONLY_PNBUF); 1480 if (nd.ni_dvp == nd.ni_vp) 1481 vrele(nd.ni_dvp); 1482 else 1483 vput(nd.ni_dvp); 1484 vrele(nd.ni_vp); 1485 vrele(vp); 1486 return (EEXIST); 1487 } else if (nd.ni_dvp->v_mount != vp->v_mount) { 1488 /* 1489 * Cross-device link. No need to recheck 1490 * vp->v_type, since it cannot change, except 1491 * to VBAD. 1492 */ 1493 NDFREE(&nd, NDF_ONLY_PNBUF); 1494 vput(nd.ni_dvp); 1495 vrele(vp); 1496 return (EXDEV); 1497 } else if ((error = vn_lock(vp, LK_EXCLUSIVE)) == 0) { 1498 error = can_hardlink(vp, td->td_ucred); 1499 #ifdef MAC 1500 if (error == 0) 1501 error = mac_vnode_check_link(td->td_ucred, 1502 nd.ni_dvp, vp, &nd.ni_cnd); 1503 #endif 1504 if (error != 0) { 1505 vput(vp); 1506 vput(nd.ni_dvp); 1507 NDFREE(&nd, NDF_ONLY_PNBUF); 1508 return (error); 1509 } 1510 error = vn_start_write(vp, &mp, V_NOWAIT); 1511 if (error != 0) { 1512 vput(vp); 1513 vput(nd.ni_dvp); 1514 NDFREE(&nd, NDF_ONLY_PNBUF); 1515 error = vn_start_write(NULL, &mp, 1516 V_XSLEEP | PCATCH); 1517 if (error != 0) 1518 return (error); 1519 goto again; 1520 } 1521 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 1522 VOP_UNLOCK(vp, 0); 1523 vput(nd.ni_dvp); 1524 vn_finished_write(mp); 1525 NDFREE(&nd, NDF_ONLY_PNBUF); 1526 } else { 1527 vput(nd.ni_dvp); 1528 NDFREE(&nd, NDF_ONLY_PNBUF); 1529 vrele(vp); 1530 goto again; 1531 } 1532 } 1533 vrele(vp); 1534 return (error); 1535 } 1536 1537 /* 1538 * Make a symbolic link. 1539 */ 1540 #ifndef _SYS_SYSPROTO_H_ 1541 struct symlink_args { 1542 char *path; 1543 char *link; 1544 }; 1545 #endif 1546 int 1547 sys_symlink(td, uap) 1548 struct thread *td; 1549 register struct symlink_args /* { 1550 char *path; 1551 char *link; 1552 } */ *uap; 1553 { 1554 1555 return (kern_symlinkat(td, uap->path, AT_FDCWD, uap->link, 1556 UIO_USERSPACE)); 1557 } 1558 1559 #ifndef _SYS_SYSPROTO_H_ 1560 struct symlinkat_args { 1561 char *path; 1562 int fd; 1563 char *path2; 1564 }; 1565 #endif 1566 int 1567 sys_symlinkat(struct thread *td, struct symlinkat_args *uap) 1568 { 1569 1570 return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2, 1571 UIO_USERSPACE)); 1572 } 1573 1574 int 1575 kern_symlinkat(struct thread *td, char *path1, int fd, char *path2, 1576 enum uio_seg segflg) 1577 { 1578 struct mount *mp; 1579 struct vattr vattr; 1580 char *syspath; 1581 struct nameidata nd; 1582 int error; 1583 cap_rights_t rights; 1584 1585 if (segflg == UIO_SYSSPACE) { 1586 syspath = path1; 1587 } else { 1588 syspath = uma_zalloc(namei_zone, M_WAITOK); 1589 if ((error = copyinstr(path1, syspath, MAXPATHLEN, NULL)) != 0) 1590 goto out; 1591 } 1592 AUDIT_ARG_TEXT(syspath); 1593 restart: 1594 bwillwrite(); 1595 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1596 NOCACHE, segflg, path2, fd, cap_rights_init(&rights, CAP_SYMLINKAT), 1597 td); 1598 if ((error = namei(&nd)) != 0) 1599 goto out; 1600 if (nd.ni_vp) { 1601 NDFREE(&nd, NDF_ONLY_PNBUF); 1602 if (nd.ni_vp == nd.ni_dvp) 1603 vrele(nd.ni_dvp); 1604 else 1605 vput(nd.ni_dvp); 1606 vrele(nd.ni_vp); 1607 error = EEXIST; 1608 goto out; 1609 } 1610 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1611 NDFREE(&nd, NDF_ONLY_PNBUF); 1612 vput(nd.ni_dvp); 1613 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1614 goto out; 1615 goto restart; 1616 } 1617 VATTR_NULL(&vattr); 1618 vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask; 1619 #ifdef MAC 1620 vattr.va_type = VLNK; 1621 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1622 &vattr); 1623 if (error != 0) 1624 goto out2; 1625 #endif 1626 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath); 1627 if (error == 0) 1628 vput(nd.ni_vp); 1629 #ifdef MAC 1630 out2: 1631 #endif 1632 NDFREE(&nd, NDF_ONLY_PNBUF); 1633 vput(nd.ni_dvp); 1634 vn_finished_write(mp); 1635 out: 1636 if (segflg != UIO_SYSSPACE) 1637 uma_zfree(namei_zone, syspath); 1638 return (error); 1639 } 1640 1641 /* 1642 * Delete a whiteout from the filesystem. 1643 */ 1644 int 1645 sys_undelete(td, uap) 1646 struct thread *td; 1647 register struct undelete_args /* { 1648 char *path; 1649 } */ *uap; 1650 { 1651 struct mount *mp; 1652 struct nameidata nd; 1653 int error; 1654 1655 restart: 1656 bwillwrite(); 1657 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | AUDITVNODE1, 1658 UIO_USERSPACE, uap->path, td); 1659 error = namei(&nd); 1660 if (error != 0) 1661 return (error); 1662 1663 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 1664 NDFREE(&nd, NDF_ONLY_PNBUF); 1665 if (nd.ni_vp == nd.ni_dvp) 1666 vrele(nd.ni_dvp); 1667 else 1668 vput(nd.ni_dvp); 1669 if (nd.ni_vp) 1670 vrele(nd.ni_vp); 1671 return (EEXIST); 1672 } 1673 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1674 NDFREE(&nd, NDF_ONLY_PNBUF); 1675 vput(nd.ni_dvp); 1676 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1677 return (error); 1678 goto restart; 1679 } 1680 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE); 1681 NDFREE(&nd, NDF_ONLY_PNBUF); 1682 vput(nd.ni_dvp); 1683 vn_finished_write(mp); 1684 return (error); 1685 } 1686 1687 /* 1688 * Delete a name from the filesystem. 1689 */ 1690 #ifndef _SYS_SYSPROTO_H_ 1691 struct unlink_args { 1692 char *path; 1693 }; 1694 #endif 1695 int 1696 sys_unlink(td, uap) 1697 struct thread *td; 1698 struct unlink_args /* { 1699 char *path; 1700 } */ *uap; 1701 { 1702 1703 return (kern_unlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 0)); 1704 } 1705 1706 #ifndef _SYS_SYSPROTO_H_ 1707 struct unlinkat_args { 1708 int fd; 1709 char *path; 1710 int flag; 1711 }; 1712 #endif 1713 int 1714 sys_unlinkat(struct thread *td, struct unlinkat_args *uap) 1715 { 1716 int flag = uap->flag; 1717 int fd = uap->fd; 1718 char *path = uap->path; 1719 1720 if (flag & ~AT_REMOVEDIR) 1721 return (EINVAL); 1722 1723 if (flag & AT_REMOVEDIR) 1724 return (kern_rmdirat(td, fd, path, UIO_USERSPACE)); 1725 else 1726 return (kern_unlinkat(td, fd, path, UIO_USERSPACE, 0)); 1727 } 1728 1729 int 1730 kern_unlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1731 ino_t oldinum) 1732 { 1733 struct mount *mp; 1734 struct vnode *vp; 1735 struct nameidata nd; 1736 struct stat sb; 1737 cap_rights_t rights; 1738 int error; 1739 1740 restart: 1741 bwillwrite(); 1742 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 1743 pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td); 1744 if ((error = namei(&nd)) != 0) 1745 return (error == EINVAL ? EPERM : error); 1746 vp = nd.ni_vp; 1747 if (vp->v_type == VDIR && oldinum == 0) { 1748 error = EPERM; /* POSIX */ 1749 } else if (oldinum != 0 && 1750 ((error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td)) == 0) && 1751 sb.st_ino != oldinum) { 1752 error = EIDRM; /* Identifier removed */ 1753 } else { 1754 /* 1755 * The root of a mounted filesystem cannot be deleted. 1756 * 1757 * XXX: can this only be a VDIR case? 1758 */ 1759 if (vp->v_vflag & VV_ROOT) 1760 error = EBUSY; 1761 } 1762 if (error == 0) { 1763 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1764 NDFREE(&nd, NDF_ONLY_PNBUF); 1765 vput(nd.ni_dvp); 1766 if (vp == nd.ni_dvp) 1767 vrele(vp); 1768 else 1769 vput(vp); 1770 if ((error = vn_start_write(NULL, &mp, 1771 V_XSLEEP | PCATCH)) != 0) 1772 return (error); 1773 goto restart; 1774 } 1775 #ifdef MAC 1776 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 1777 &nd.ni_cnd); 1778 if (error != 0) 1779 goto out; 1780 #endif 1781 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 1782 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 1783 #ifdef MAC 1784 out: 1785 #endif 1786 vn_finished_write(mp); 1787 } 1788 NDFREE(&nd, NDF_ONLY_PNBUF); 1789 vput(nd.ni_dvp); 1790 if (vp == nd.ni_dvp) 1791 vrele(vp); 1792 else 1793 vput(vp); 1794 return (error); 1795 } 1796 1797 /* 1798 * Reposition read/write file offset. 1799 */ 1800 #ifndef _SYS_SYSPROTO_H_ 1801 struct lseek_args { 1802 int fd; 1803 int pad; 1804 off_t offset; 1805 int whence; 1806 }; 1807 #endif 1808 int 1809 sys_lseek(td, uap) 1810 struct thread *td; 1811 register struct lseek_args /* { 1812 int fd; 1813 int pad; 1814 off_t offset; 1815 int whence; 1816 } */ *uap; 1817 { 1818 struct file *fp; 1819 cap_rights_t rights; 1820 int error; 1821 1822 AUDIT_ARG_FD(uap->fd); 1823 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_SEEK), &fp); 1824 if (error != 0) 1825 return (error); 1826 error = (fp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0 ? 1827 fo_seek(fp, uap->offset, uap->whence, td) : ESPIPE; 1828 fdrop(fp, td); 1829 return (error); 1830 } 1831 1832 #if defined(COMPAT_43) 1833 /* 1834 * Reposition read/write file offset. 1835 */ 1836 #ifndef _SYS_SYSPROTO_H_ 1837 struct olseek_args { 1838 int fd; 1839 long offset; 1840 int whence; 1841 }; 1842 #endif 1843 int 1844 olseek(td, uap) 1845 struct thread *td; 1846 register struct olseek_args /* { 1847 int fd; 1848 long offset; 1849 int whence; 1850 } */ *uap; 1851 { 1852 struct lseek_args /* { 1853 int fd; 1854 int pad; 1855 off_t offset; 1856 int whence; 1857 } */ nuap; 1858 1859 nuap.fd = uap->fd; 1860 nuap.offset = uap->offset; 1861 nuap.whence = uap->whence; 1862 return (sys_lseek(td, &nuap)); 1863 } 1864 #endif /* COMPAT_43 */ 1865 1866 #if defined(COMPAT_FREEBSD6) 1867 /* Version with the 'pad' argument */ 1868 int 1869 freebsd6_lseek(td, uap) 1870 struct thread *td; 1871 register struct freebsd6_lseek_args *uap; 1872 { 1873 struct lseek_args ouap; 1874 1875 ouap.fd = uap->fd; 1876 ouap.offset = uap->offset; 1877 ouap.whence = uap->whence; 1878 return (sys_lseek(td, &ouap)); 1879 } 1880 #endif 1881 1882 /* 1883 * Check access permissions using passed credentials. 1884 */ 1885 static int 1886 vn_access(vp, user_flags, cred, td) 1887 struct vnode *vp; 1888 int user_flags; 1889 struct ucred *cred; 1890 struct thread *td; 1891 { 1892 accmode_t accmode; 1893 int error; 1894 1895 /* Flags == 0 means only check for existence. */ 1896 if (user_flags == 0) 1897 return (0); 1898 1899 accmode = 0; 1900 if (user_flags & R_OK) 1901 accmode |= VREAD; 1902 if (user_flags & W_OK) 1903 accmode |= VWRITE; 1904 if (user_flags & X_OK) 1905 accmode |= VEXEC; 1906 #ifdef MAC 1907 error = mac_vnode_check_access(cred, vp, accmode); 1908 if (error != 0) 1909 return (error); 1910 #endif 1911 if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) 1912 error = VOP_ACCESS(vp, accmode, cred, td); 1913 return (error); 1914 } 1915 1916 /* 1917 * Check access permissions using "real" credentials. 1918 */ 1919 #ifndef _SYS_SYSPROTO_H_ 1920 struct access_args { 1921 char *path; 1922 int amode; 1923 }; 1924 #endif 1925 int 1926 sys_access(td, uap) 1927 struct thread *td; 1928 register struct access_args /* { 1929 char *path; 1930 int amode; 1931 } */ *uap; 1932 { 1933 1934 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1935 0, uap->amode)); 1936 } 1937 1938 #ifndef _SYS_SYSPROTO_H_ 1939 struct faccessat_args { 1940 int dirfd; 1941 char *path; 1942 int amode; 1943 int flag; 1944 } 1945 #endif 1946 int 1947 sys_faccessat(struct thread *td, struct faccessat_args *uap) 1948 { 1949 1950 return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 1951 uap->amode)); 1952 } 1953 1954 int 1955 kern_accessat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1956 int flag, int amode) 1957 { 1958 struct ucred *cred, *usecred; 1959 struct vnode *vp; 1960 struct nameidata nd; 1961 cap_rights_t rights; 1962 int error; 1963 1964 if (flag & ~AT_EACCESS) 1965 return (EINVAL); 1966 if (amode != F_OK && (amode & ~(R_OK | W_OK | X_OK)) != 0) 1967 return (EINVAL); 1968 1969 /* 1970 * Create and modify a temporary credential instead of one that 1971 * is potentially shared (if we need one). 1972 */ 1973 cred = td->td_ucred; 1974 if ((flag & AT_EACCESS) == 0 && 1975 ((cred->cr_uid != cred->cr_ruid || 1976 cred->cr_rgid != cred->cr_groups[0]))) { 1977 usecred = crdup(cred); 1978 usecred->cr_uid = cred->cr_ruid; 1979 usecred->cr_groups[0] = cred->cr_rgid; 1980 td->td_ucred = usecred; 1981 } else 1982 usecred = cred; 1983 AUDIT_ARG_VALUE(amode); 1984 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | 1985 AUDITVNODE1, pathseg, path, fd, cap_rights_init(&rights, CAP_FSTAT), 1986 td); 1987 if ((error = namei(&nd)) != 0) 1988 goto out; 1989 vp = nd.ni_vp; 1990 1991 error = vn_access(vp, amode, usecred, td); 1992 NDFREE(&nd, NDF_ONLY_PNBUF); 1993 vput(vp); 1994 out: 1995 if (usecred != cred) { 1996 td->td_ucred = cred; 1997 crfree(usecred); 1998 } 1999 return (error); 2000 } 2001 2002 /* 2003 * Check access permissions using "effective" credentials. 2004 */ 2005 #ifndef _SYS_SYSPROTO_H_ 2006 struct eaccess_args { 2007 char *path; 2008 int amode; 2009 }; 2010 #endif 2011 int 2012 sys_eaccess(td, uap) 2013 struct thread *td; 2014 register struct eaccess_args /* { 2015 char *path; 2016 int amode; 2017 } */ *uap; 2018 { 2019 2020 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2021 AT_EACCESS, uap->amode)); 2022 } 2023 2024 #if defined(COMPAT_43) 2025 /* 2026 * Get file status; this version follows links. 2027 */ 2028 #ifndef _SYS_SYSPROTO_H_ 2029 struct ostat_args { 2030 char *path; 2031 struct ostat *ub; 2032 }; 2033 #endif 2034 int 2035 ostat(td, uap) 2036 struct thread *td; 2037 register struct ostat_args /* { 2038 char *path; 2039 struct ostat *ub; 2040 } */ *uap; 2041 { 2042 struct stat sb; 2043 struct ostat osb; 2044 int error; 2045 2046 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2047 &sb, NULL); 2048 if (error != 0) 2049 return (error); 2050 cvtstat(&sb, &osb); 2051 return (copyout(&osb, uap->ub, sizeof (osb))); 2052 } 2053 2054 /* 2055 * Get file status; this version does not follow links. 2056 */ 2057 #ifndef _SYS_SYSPROTO_H_ 2058 struct olstat_args { 2059 char *path; 2060 struct ostat *ub; 2061 }; 2062 #endif 2063 int 2064 olstat(td, uap) 2065 struct thread *td; 2066 register struct olstat_args /* { 2067 char *path; 2068 struct ostat *ub; 2069 } */ *uap; 2070 { 2071 struct stat sb; 2072 struct ostat osb; 2073 int error; 2074 2075 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2076 UIO_USERSPACE, &sb, NULL); 2077 if (error != 0) 2078 return (error); 2079 cvtstat(&sb, &osb); 2080 return (copyout(&osb, uap->ub, sizeof (osb))); 2081 } 2082 2083 /* 2084 * Convert from an old to a new stat structure. 2085 */ 2086 void 2087 cvtstat(st, ost) 2088 struct stat *st; 2089 struct ostat *ost; 2090 { 2091 2092 bzero(ost, sizeof(*ost)); 2093 ost->st_dev = st->st_dev; 2094 ost->st_ino = st->st_ino; 2095 ost->st_mode = st->st_mode; 2096 ost->st_nlink = st->st_nlink; 2097 ost->st_uid = st->st_uid; 2098 ost->st_gid = st->st_gid; 2099 ost->st_rdev = st->st_rdev; 2100 if (st->st_size < (quad_t)1 << 32) 2101 ost->st_size = st->st_size; 2102 else 2103 ost->st_size = -2; 2104 ost->st_atim = st->st_atim; 2105 ost->st_mtim = st->st_mtim; 2106 ost->st_ctim = st->st_ctim; 2107 ost->st_blksize = st->st_blksize; 2108 ost->st_blocks = st->st_blocks; 2109 ost->st_flags = st->st_flags; 2110 ost->st_gen = st->st_gen; 2111 } 2112 #endif /* COMPAT_43 */ 2113 2114 /* 2115 * Get file status; this version follows links. 2116 */ 2117 #ifndef _SYS_SYSPROTO_H_ 2118 struct stat_args { 2119 char *path; 2120 struct stat *ub; 2121 }; 2122 #endif 2123 int 2124 sys_stat(td, uap) 2125 struct thread *td; 2126 register struct stat_args /* { 2127 char *path; 2128 struct stat *ub; 2129 } */ *uap; 2130 { 2131 struct stat sb; 2132 int error; 2133 2134 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2135 &sb, NULL); 2136 if (error == 0) 2137 error = copyout(&sb, uap->ub, sizeof (sb)); 2138 return (error); 2139 } 2140 2141 #ifndef _SYS_SYSPROTO_H_ 2142 struct fstatat_args { 2143 int fd; 2144 char *path; 2145 struct stat *buf; 2146 int flag; 2147 } 2148 #endif 2149 int 2150 sys_fstatat(struct thread *td, struct fstatat_args *uap) 2151 { 2152 struct stat sb; 2153 int error; 2154 2155 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2156 UIO_USERSPACE, &sb, NULL); 2157 if (error == 0) 2158 error = copyout(&sb, uap->buf, sizeof (sb)); 2159 return (error); 2160 } 2161 2162 int 2163 kern_statat(struct thread *td, int flag, int fd, char *path, 2164 enum uio_seg pathseg, struct stat *sbp, 2165 void (*hook)(struct vnode *vp, struct stat *sbp)) 2166 { 2167 struct nameidata nd; 2168 struct stat sb; 2169 cap_rights_t rights; 2170 int error; 2171 2172 if (flag & ~AT_SYMLINK_NOFOLLOW) 2173 return (EINVAL); 2174 2175 NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : 2176 FOLLOW) | LOCKSHARED | LOCKLEAF | AUDITVNODE1, pathseg, path, fd, 2177 cap_rights_init(&rights, CAP_FSTAT), td); 2178 2179 if ((error = namei(&nd)) != 0) 2180 return (error); 2181 error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td); 2182 if (error == 0) { 2183 SDT_PROBE2(vfs, , stat, mode, path, sb.st_mode); 2184 if (S_ISREG(sb.st_mode)) 2185 SDT_PROBE2(vfs, , stat, reg, path, pathseg); 2186 if (__predict_false(hook != NULL)) 2187 hook(nd.ni_vp, &sb); 2188 } 2189 NDFREE(&nd, NDF_ONLY_PNBUF); 2190 vput(nd.ni_vp); 2191 if (error != 0) 2192 return (error); 2193 *sbp = sb; 2194 #ifdef KTRACE 2195 if (KTRPOINT(td, KTR_STRUCT)) 2196 ktrstat(&sb); 2197 #endif 2198 return (0); 2199 } 2200 2201 /* 2202 * Get file status; this version does not follow links. 2203 */ 2204 #ifndef _SYS_SYSPROTO_H_ 2205 struct lstat_args { 2206 char *path; 2207 struct stat *ub; 2208 }; 2209 #endif 2210 int 2211 sys_lstat(td, uap) 2212 struct thread *td; 2213 register struct lstat_args /* { 2214 char *path; 2215 struct stat *ub; 2216 } */ *uap; 2217 { 2218 struct stat sb; 2219 int error; 2220 2221 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2222 UIO_USERSPACE, &sb, NULL); 2223 if (error == 0) 2224 error = copyout(&sb, uap->ub, sizeof (sb)); 2225 return (error); 2226 } 2227 2228 /* 2229 * Implementation of the NetBSD [l]stat() functions. 2230 */ 2231 void 2232 cvtnstat(sb, nsb) 2233 struct stat *sb; 2234 struct nstat *nsb; 2235 { 2236 2237 bzero(nsb, sizeof *nsb); 2238 nsb->st_dev = sb->st_dev; 2239 nsb->st_ino = sb->st_ino; 2240 nsb->st_mode = sb->st_mode; 2241 nsb->st_nlink = sb->st_nlink; 2242 nsb->st_uid = sb->st_uid; 2243 nsb->st_gid = sb->st_gid; 2244 nsb->st_rdev = sb->st_rdev; 2245 nsb->st_atim = sb->st_atim; 2246 nsb->st_mtim = sb->st_mtim; 2247 nsb->st_ctim = sb->st_ctim; 2248 nsb->st_size = sb->st_size; 2249 nsb->st_blocks = sb->st_blocks; 2250 nsb->st_blksize = sb->st_blksize; 2251 nsb->st_flags = sb->st_flags; 2252 nsb->st_gen = sb->st_gen; 2253 nsb->st_birthtim = sb->st_birthtim; 2254 } 2255 2256 #ifndef _SYS_SYSPROTO_H_ 2257 struct nstat_args { 2258 char *path; 2259 struct nstat *ub; 2260 }; 2261 #endif 2262 int 2263 sys_nstat(td, uap) 2264 struct thread *td; 2265 register struct nstat_args /* { 2266 char *path; 2267 struct nstat *ub; 2268 } */ *uap; 2269 { 2270 struct stat sb; 2271 struct nstat nsb; 2272 int error; 2273 2274 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2275 &sb, NULL); 2276 if (error != 0) 2277 return (error); 2278 cvtnstat(&sb, &nsb); 2279 return (copyout(&nsb, uap->ub, sizeof (nsb))); 2280 } 2281 2282 /* 2283 * NetBSD lstat. Get file status; this version does not follow links. 2284 */ 2285 #ifndef _SYS_SYSPROTO_H_ 2286 struct lstat_args { 2287 char *path; 2288 struct stat *ub; 2289 }; 2290 #endif 2291 int 2292 sys_nlstat(td, uap) 2293 struct thread *td; 2294 register struct nlstat_args /* { 2295 char *path; 2296 struct nstat *ub; 2297 } */ *uap; 2298 { 2299 struct stat sb; 2300 struct nstat nsb; 2301 int error; 2302 2303 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2304 UIO_USERSPACE, &sb, NULL); 2305 if (error != 0) 2306 return (error); 2307 cvtnstat(&sb, &nsb); 2308 return (copyout(&nsb, uap->ub, sizeof (nsb))); 2309 } 2310 2311 /* 2312 * Get configurable pathname variables. 2313 */ 2314 #ifndef _SYS_SYSPROTO_H_ 2315 struct pathconf_args { 2316 char *path; 2317 int name; 2318 }; 2319 #endif 2320 int 2321 sys_pathconf(td, uap) 2322 struct thread *td; 2323 register struct pathconf_args /* { 2324 char *path; 2325 int name; 2326 } */ *uap; 2327 { 2328 2329 return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW)); 2330 } 2331 2332 #ifndef _SYS_SYSPROTO_H_ 2333 struct lpathconf_args { 2334 char *path; 2335 int name; 2336 }; 2337 #endif 2338 int 2339 sys_lpathconf(td, uap) 2340 struct thread *td; 2341 register struct lpathconf_args /* { 2342 char *path; 2343 int name; 2344 } */ *uap; 2345 { 2346 2347 return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, 2348 NOFOLLOW)); 2349 } 2350 2351 int 2352 kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name, 2353 u_long flags) 2354 { 2355 struct nameidata nd; 2356 int error; 2357 2358 NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | AUDITVNODE1 | flags, 2359 pathseg, path, td); 2360 if ((error = namei(&nd)) != 0) 2361 return (error); 2362 NDFREE(&nd, NDF_ONLY_PNBUF); 2363 2364 error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval); 2365 vput(nd.ni_vp); 2366 return (error); 2367 } 2368 2369 /* 2370 * Return target name of a symbolic link. 2371 */ 2372 #ifndef _SYS_SYSPROTO_H_ 2373 struct readlink_args { 2374 char *path; 2375 char *buf; 2376 size_t count; 2377 }; 2378 #endif 2379 int 2380 sys_readlink(td, uap) 2381 struct thread *td; 2382 register struct readlink_args /* { 2383 char *path; 2384 char *buf; 2385 size_t count; 2386 } */ *uap; 2387 { 2388 2389 return (kern_readlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2390 uap->buf, UIO_USERSPACE, uap->count)); 2391 } 2392 #ifndef _SYS_SYSPROTO_H_ 2393 struct readlinkat_args { 2394 int fd; 2395 char *path; 2396 char *buf; 2397 size_t bufsize; 2398 }; 2399 #endif 2400 int 2401 sys_readlinkat(struct thread *td, struct readlinkat_args *uap) 2402 { 2403 2404 return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE, 2405 uap->buf, UIO_USERSPACE, uap->bufsize)); 2406 } 2407 2408 int 2409 kern_readlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2410 char *buf, enum uio_seg bufseg, size_t count) 2411 { 2412 struct vnode *vp; 2413 struct iovec aiov; 2414 struct uio auio; 2415 struct nameidata nd; 2416 int error; 2417 2418 if (count > IOSIZE_MAX) 2419 return (EINVAL); 2420 2421 NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 2422 pathseg, path, fd, td); 2423 2424 if ((error = namei(&nd)) != 0) 2425 return (error); 2426 NDFREE(&nd, NDF_ONLY_PNBUF); 2427 vp = nd.ni_vp; 2428 #ifdef MAC 2429 error = mac_vnode_check_readlink(td->td_ucred, vp); 2430 if (error != 0) { 2431 vput(vp); 2432 return (error); 2433 } 2434 #endif 2435 if (vp->v_type != VLNK) 2436 error = EINVAL; 2437 else { 2438 aiov.iov_base = buf; 2439 aiov.iov_len = count; 2440 auio.uio_iov = &aiov; 2441 auio.uio_iovcnt = 1; 2442 auio.uio_offset = 0; 2443 auio.uio_rw = UIO_READ; 2444 auio.uio_segflg = bufseg; 2445 auio.uio_td = td; 2446 auio.uio_resid = count; 2447 error = VOP_READLINK(vp, &auio, td->td_ucred); 2448 td->td_retval[0] = count - auio.uio_resid; 2449 } 2450 vput(vp); 2451 return (error); 2452 } 2453 2454 /* 2455 * Common implementation code for chflags() and fchflags(). 2456 */ 2457 static int 2458 setfflags(td, vp, flags) 2459 struct thread *td; 2460 struct vnode *vp; 2461 u_long flags; 2462 { 2463 struct mount *mp; 2464 struct vattr vattr; 2465 int error; 2466 2467 /* We can't support the value matching VNOVAL. */ 2468 if (flags == VNOVAL) 2469 return (EOPNOTSUPP); 2470 2471 /* 2472 * Prevent non-root users from setting flags on devices. When 2473 * a device is reused, users can retain ownership of the device 2474 * if they are allowed to set flags and programs assume that 2475 * chown can't fail when done as root. 2476 */ 2477 if (vp->v_type == VCHR || vp->v_type == VBLK) { 2478 error = priv_check(td, PRIV_VFS_CHFLAGS_DEV); 2479 if (error != 0) 2480 return (error); 2481 } 2482 2483 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2484 return (error); 2485 VATTR_NULL(&vattr); 2486 vattr.va_flags = flags; 2487 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2488 #ifdef MAC 2489 error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags); 2490 if (error == 0) 2491 #endif 2492 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 2493 VOP_UNLOCK(vp, 0); 2494 vn_finished_write(mp); 2495 return (error); 2496 } 2497 2498 /* 2499 * Change flags of a file given a path name. 2500 */ 2501 #ifndef _SYS_SYSPROTO_H_ 2502 struct chflags_args { 2503 const char *path; 2504 u_long flags; 2505 }; 2506 #endif 2507 int 2508 sys_chflags(td, uap) 2509 struct thread *td; 2510 register struct chflags_args /* { 2511 const char *path; 2512 u_long flags; 2513 } */ *uap; 2514 { 2515 2516 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2517 uap->flags, 0)); 2518 } 2519 2520 #ifndef _SYS_SYSPROTO_H_ 2521 struct chflagsat_args { 2522 int fd; 2523 const char *path; 2524 u_long flags; 2525 int atflag; 2526 } 2527 #endif 2528 int 2529 sys_chflagsat(struct thread *td, struct chflagsat_args *uap) 2530 { 2531 int fd = uap->fd; 2532 const char *path = uap->path; 2533 u_long flags = uap->flags; 2534 int atflag = uap->atflag; 2535 2536 if (atflag & ~AT_SYMLINK_NOFOLLOW) 2537 return (EINVAL); 2538 2539 return (kern_chflagsat(td, fd, path, UIO_USERSPACE, flags, atflag)); 2540 } 2541 2542 /* 2543 * Same as chflags() but doesn't follow symlinks. 2544 */ 2545 int 2546 sys_lchflags(td, uap) 2547 struct thread *td; 2548 register struct lchflags_args /* { 2549 const char *path; 2550 u_long flags; 2551 } */ *uap; 2552 { 2553 2554 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2555 uap->flags, AT_SYMLINK_NOFOLLOW)); 2556 } 2557 2558 static int 2559 kern_chflagsat(struct thread *td, int fd, const char *path, 2560 enum uio_seg pathseg, u_long flags, int atflag) 2561 { 2562 struct nameidata nd; 2563 cap_rights_t rights; 2564 int error, follow; 2565 2566 AUDIT_ARG_FFLAGS(flags); 2567 follow = (atflag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2568 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2569 cap_rights_init(&rights, CAP_FCHFLAGS), td); 2570 if ((error = namei(&nd)) != 0) 2571 return (error); 2572 NDFREE(&nd, NDF_ONLY_PNBUF); 2573 error = setfflags(td, nd.ni_vp, flags); 2574 vrele(nd.ni_vp); 2575 return (error); 2576 } 2577 2578 /* 2579 * Change flags of a file given a file descriptor. 2580 */ 2581 #ifndef _SYS_SYSPROTO_H_ 2582 struct fchflags_args { 2583 int fd; 2584 u_long flags; 2585 }; 2586 #endif 2587 int 2588 sys_fchflags(td, uap) 2589 struct thread *td; 2590 register struct fchflags_args /* { 2591 int fd; 2592 u_long flags; 2593 } */ *uap; 2594 { 2595 struct file *fp; 2596 cap_rights_t rights; 2597 int error; 2598 2599 AUDIT_ARG_FD(uap->fd); 2600 AUDIT_ARG_FFLAGS(uap->flags); 2601 error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_FCHFLAGS), 2602 &fp); 2603 if (error != 0) 2604 return (error); 2605 #ifdef AUDIT 2606 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 2607 AUDIT_ARG_VNODE1(fp->f_vnode); 2608 VOP_UNLOCK(fp->f_vnode, 0); 2609 #endif 2610 error = setfflags(td, fp->f_vnode, uap->flags); 2611 fdrop(fp, td); 2612 return (error); 2613 } 2614 2615 /* 2616 * Common implementation code for chmod(), lchmod() and fchmod(). 2617 */ 2618 int 2619 setfmode(td, cred, vp, mode) 2620 struct thread *td; 2621 struct ucred *cred; 2622 struct vnode *vp; 2623 int mode; 2624 { 2625 struct mount *mp; 2626 struct vattr vattr; 2627 int error; 2628 2629 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2630 return (error); 2631 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2632 VATTR_NULL(&vattr); 2633 vattr.va_mode = mode & ALLPERMS; 2634 #ifdef MAC 2635 error = mac_vnode_check_setmode(cred, vp, vattr.va_mode); 2636 if (error == 0) 2637 #endif 2638 error = VOP_SETATTR(vp, &vattr, cred); 2639 VOP_UNLOCK(vp, 0); 2640 vn_finished_write(mp); 2641 return (error); 2642 } 2643 2644 /* 2645 * Change mode of a file given path name. 2646 */ 2647 #ifndef _SYS_SYSPROTO_H_ 2648 struct chmod_args { 2649 char *path; 2650 int mode; 2651 }; 2652 #endif 2653 int 2654 sys_chmod(td, uap) 2655 struct thread *td; 2656 register struct chmod_args /* { 2657 char *path; 2658 int mode; 2659 } */ *uap; 2660 { 2661 2662 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2663 uap->mode, 0)); 2664 } 2665 2666 #ifndef _SYS_SYSPROTO_H_ 2667 struct fchmodat_args { 2668 int dirfd; 2669 char *path; 2670 mode_t mode; 2671 int flag; 2672 } 2673 #endif 2674 int 2675 sys_fchmodat(struct thread *td, struct fchmodat_args *uap) 2676 { 2677 int flag = uap->flag; 2678 int fd = uap->fd; 2679 char *path = uap->path; 2680 mode_t mode = uap->mode; 2681 2682 if (flag & ~AT_SYMLINK_NOFOLLOW) 2683 return (EINVAL); 2684 2685 return (kern_fchmodat(td, fd, path, UIO_USERSPACE, mode, flag)); 2686 } 2687 2688 /* 2689 * Change mode of a file given path name (don't follow links.) 2690 */ 2691 #ifndef _SYS_SYSPROTO_H_ 2692 struct lchmod_args { 2693 char *path; 2694 int mode; 2695 }; 2696 #endif 2697 int 2698 sys_lchmod(td, uap) 2699 struct thread *td; 2700 register struct lchmod_args /* { 2701 char *path; 2702 int mode; 2703 } */ *uap; 2704 { 2705 2706 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2707 uap->mode, AT_SYMLINK_NOFOLLOW)); 2708 } 2709 2710 int 2711 kern_fchmodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2712 mode_t mode, int flag) 2713 { 2714 struct nameidata nd; 2715 cap_rights_t rights; 2716 int error, follow; 2717 2718 AUDIT_ARG_MODE(mode); 2719 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2720 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2721 cap_rights_init(&rights, CAP_FCHMOD), td); 2722 if ((error = namei(&nd)) != 0) 2723 return (error); 2724 NDFREE(&nd, NDF_ONLY_PNBUF); 2725 error = setfmode(td, td->td_ucred, nd.ni_vp, mode); 2726 vrele(nd.ni_vp); 2727 return (error); 2728 } 2729 2730 /* 2731 * Change mode of a file given a file descriptor. 2732 */ 2733 #ifndef _SYS_SYSPROTO_H_ 2734 struct fchmod_args { 2735 int fd; 2736 int mode; 2737 }; 2738 #endif 2739 int 2740 sys_fchmod(struct thread *td, struct fchmod_args *uap) 2741 { 2742 struct file *fp; 2743 cap_rights_t rights; 2744 int error; 2745 2746 AUDIT_ARG_FD(uap->fd); 2747 AUDIT_ARG_MODE(uap->mode); 2748 2749 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHMOD), &fp); 2750 if (error != 0) 2751 return (error); 2752 error = fo_chmod(fp, uap->mode, td->td_ucred, td); 2753 fdrop(fp, td); 2754 return (error); 2755 } 2756 2757 /* 2758 * Common implementation for chown(), lchown(), and fchown() 2759 */ 2760 int 2761 setfown(td, cred, vp, uid, gid) 2762 struct thread *td; 2763 struct ucred *cred; 2764 struct vnode *vp; 2765 uid_t uid; 2766 gid_t gid; 2767 { 2768 struct mount *mp; 2769 struct vattr vattr; 2770 int error; 2771 2772 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2773 return (error); 2774 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2775 VATTR_NULL(&vattr); 2776 vattr.va_uid = uid; 2777 vattr.va_gid = gid; 2778 #ifdef MAC 2779 error = mac_vnode_check_setowner(cred, vp, vattr.va_uid, 2780 vattr.va_gid); 2781 if (error == 0) 2782 #endif 2783 error = VOP_SETATTR(vp, &vattr, cred); 2784 VOP_UNLOCK(vp, 0); 2785 vn_finished_write(mp); 2786 return (error); 2787 } 2788 2789 /* 2790 * Set ownership given a path name. 2791 */ 2792 #ifndef _SYS_SYSPROTO_H_ 2793 struct chown_args { 2794 char *path; 2795 int uid; 2796 int gid; 2797 }; 2798 #endif 2799 int 2800 sys_chown(td, uap) 2801 struct thread *td; 2802 register struct chown_args /* { 2803 char *path; 2804 int uid; 2805 int gid; 2806 } */ *uap; 2807 { 2808 2809 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->uid, 2810 uap->gid, 0)); 2811 } 2812 2813 #ifndef _SYS_SYSPROTO_H_ 2814 struct fchownat_args { 2815 int fd; 2816 const char * path; 2817 uid_t uid; 2818 gid_t gid; 2819 int flag; 2820 }; 2821 #endif 2822 int 2823 sys_fchownat(struct thread *td, struct fchownat_args *uap) 2824 { 2825 int flag; 2826 2827 flag = uap->flag; 2828 if (flag & ~AT_SYMLINK_NOFOLLOW) 2829 return (EINVAL); 2830 2831 return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid, 2832 uap->gid, uap->flag)); 2833 } 2834 2835 int 2836 kern_fchownat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2837 int uid, int gid, int flag) 2838 { 2839 struct nameidata nd; 2840 cap_rights_t rights; 2841 int error, follow; 2842 2843 AUDIT_ARG_OWNER(uid, gid); 2844 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2845 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2846 cap_rights_init(&rights, CAP_FCHOWN), td); 2847 2848 if ((error = namei(&nd)) != 0) 2849 return (error); 2850 NDFREE(&nd, NDF_ONLY_PNBUF); 2851 error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid); 2852 vrele(nd.ni_vp); 2853 return (error); 2854 } 2855 2856 /* 2857 * Set ownership given a path name, do not cross symlinks. 2858 */ 2859 #ifndef _SYS_SYSPROTO_H_ 2860 struct lchown_args { 2861 char *path; 2862 int uid; 2863 int gid; 2864 }; 2865 #endif 2866 int 2867 sys_lchown(td, uap) 2868 struct thread *td; 2869 register struct lchown_args /* { 2870 char *path; 2871 int uid; 2872 int gid; 2873 } */ *uap; 2874 { 2875 2876 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2877 uap->uid, uap->gid, AT_SYMLINK_NOFOLLOW)); 2878 } 2879 2880 /* 2881 * Set ownership given a file descriptor. 2882 */ 2883 #ifndef _SYS_SYSPROTO_H_ 2884 struct fchown_args { 2885 int fd; 2886 int uid; 2887 int gid; 2888 }; 2889 #endif 2890 int 2891 sys_fchown(td, uap) 2892 struct thread *td; 2893 register struct fchown_args /* { 2894 int fd; 2895 int uid; 2896 int gid; 2897 } */ *uap; 2898 { 2899 struct file *fp; 2900 cap_rights_t rights; 2901 int error; 2902 2903 AUDIT_ARG_FD(uap->fd); 2904 AUDIT_ARG_OWNER(uap->uid, uap->gid); 2905 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHOWN), &fp); 2906 if (error != 0) 2907 return (error); 2908 error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td); 2909 fdrop(fp, td); 2910 return (error); 2911 } 2912 2913 /* 2914 * Common implementation code for utimes(), lutimes(), and futimes(). 2915 */ 2916 static int 2917 getutimes(usrtvp, tvpseg, tsp) 2918 const struct timeval *usrtvp; 2919 enum uio_seg tvpseg; 2920 struct timespec *tsp; 2921 { 2922 struct timeval tv[2]; 2923 const struct timeval *tvp; 2924 int error; 2925 2926 if (usrtvp == NULL) { 2927 vfs_timestamp(&tsp[0]); 2928 tsp[1] = tsp[0]; 2929 } else { 2930 if (tvpseg == UIO_SYSSPACE) { 2931 tvp = usrtvp; 2932 } else { 2933 if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0) 2934 return (error); 2935 tvp = tv; 2936 } 2937 2938 if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 || 2939 tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000) 2940 return (EINVAL); 2941 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 2942 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 2943 } 2944 return (0); 2945 } 2946 2947 /* 2948 * Common implementation code for futimens(), utimensat(). 2949 */ 2950 #define UTIMENS_NULL 0x1 2951 #define UTIMENS_EXIT 0x2 2952 static int 2953 getutimens(const struct timespec *usrtsp, enum uio_seg tspseg, 2954 struct timespec *tsp, int *retflags) 2955 { 2956 struct timespec tsnow; 2957 int error; 2958 2959 vfs_timestamp(&tsnow); 2960 *retflags = 0; 2961 if (usrtsp == NULL) { 2962 tsp[0] = tsnow; 2963 tsp[1] = tsnow; 2964 *retflags |= UTIMENS_NULL; 2965 return (0); 2966 } 2967 if (tspseg == UIO_SYSSPACE) { 2968 tsp[0] = usrtsp[0]; 2969 tsp[1] = usrtsp[1]; 2970 } else if ((error = copyin(usrtsp, tsp, sizeof(*tsp) * 2)) != 0) 2971 return (error); 2972 if (tsp[0].tv_nsec == UTIME_OMIT && tsp[1].tv_nsec == UTIME_OMIT) 2973 *retflags |= UTIMENS_EXIT; 2974 if (tsp[0].tv_nsec == UTIME_NOW && tsp[1].tv_nsec == UTIME_NOW) 2975 *retflags |= UTIMENS_NULL; 2976 if (tsp[0].tv_nsec == UTIME_OMIT) 2977 tsp[0].tv_sec = VNOVAL; 2978 else if (tsp[0].tv_nsec == UTIME_NOW) 2979 tsp[0] = tsnow; 2980 else if (tsp[0].tv_nsec < 0 || tsp[0].tv_nsec >= 1000000000L) 2981 return (EINVAL); 2982 if (tsp[1].tv_nsec == UTIME_OMIT) 2983 tsp[1].tv_sec = VNOVAL; 2984 else if (tsp[1].tv_nsec == UTIME_NOW) 2985 tsp[1] = tsnow; 2986 else if (tsp[1].tv_nsec < 0 || tsp[1].tv_nsec >= 1000000000L) 2987 return (EINVAL); 2988 2989 return (0); 2990 } 2991 2992 /* 2993 * Common implementation code for utimes(), lutimes(), futimes(), futimens(), 2994 * and utimensat(). 2995 */ 2996 static int 2997 setutimes(td, vp, ts, numtimes, nullflag) 2998 struct thread *td; 2999 struct vnode *vp; 3000 const struct timespec *ts; 3001 int numtimes; 3002 int nullflag; 3003 { 3004 struct mount *mp; 3005 struct vattr vattr; 3006 int error, setbirthtime; 3007 3008 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 3009 return (error); 3010 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3011 setbirthtime = 0; 3012 if (numtimes < 3 && !VOP_GETATTR(vp, &vattr, td->td_ucred) && 3013 timespeccmp(&ts[1], &vattr.va_birthtime, < )) 3014 setbirthtime = 1; 3015 VATTR_NULL(&vattr); 3016 vattr.va_atime = ts[0]; 3017 vattr.va_mtime = ts[1]; 3018 if (setbirthtime) 3019 vattr.va_birthtime = ts[1]; 3020 if (numtimes > 2) 3021 vattr.va_birthtime = ts[2]; 3022 if (nullflag) 3023 vattr.va_vaflags |= VA_UTIMES_NULL; 3024 #ifdef MAC 3025 error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime, 3026 vattr.va_mtime); 3027 #endif 3028 if (error == 0) 3029 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3030 VOP_UNLOCK(vp, 0); 3031 vn_finished_write(mp); 3032 return (error); 3033 } 3034 3035 /* 3036 * Set the access and modification times of a file. 3037 */ 3038 #ifndef _SYS_SYSPROTO_H_ 3039 struct utimes_args { 3040 char *path; 3041 struct timeval *tptr; 3042 }; 3043 #endif 3044 int 3045 sys_utimes(td, uap) 3046 struct thread *td; 3047 register struct utimes_args /* { 3048 char *path; 3049 struct timeval *tptr; 3050 } */ *uap; 3051 { 3052 3053 return (kern_utimesat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3054 uap->tptr, UIO_USERSPACE)); 3055 } 3056 3057 #ifndef _SYS_SYSPROTO_H_ 3058 struct futimesat_args { 3059 int fd; 3060 const char * path; 3061 const struct timeval * times; 3062 }; 3063 #endif 3064 int 3065 sys_futimesat(struct thread *td, struct futimesat_args *uap) 3066 { 3067 3068 return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE, 3069 uap->times, UIO_USERSPACE)); 3070 } 3071 3072 int 3073 kern_utimesat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 3074 struct timeval *tptr, enum uio_seg tptrseg) 3075 { 3076 struct nameidata nd; 3077 struct timespec ts[2]; 3078 cap_rights_t rights; 3079 int error; 3080 3081 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3082 return (error); 3083 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 3084 cap_rights_init(&rights, CAP_FUTIMES), td); 3085 3086 if ((error = namei(&nd)) != 0) 3087 return (error); 3088 NDFREE(&nd, NDF_ONLY_PNBUF); 3089 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3090 vrele(nd.ni_vp); 3091 return (error); 3092 } 3093 3094 /* 3095 * Set the access and modification times of a file. 3096 */ 3097 #ifndef _SYS_SYSPROTO_H_ 3098 struct lutimes_args { 3099 char *path; 3100 struct timeval *tptr; 3101 }; 3102 #endif 3103 int 3104 sys_lutimes(td, uap) 3105 struct thread *td; 3106 register struct lutimes_args /* { 3107 char *path; 3108 struct timeval *tptr; 3109 } */ *uap; 3110 { 3111 3112 return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr, 3113 UIO_USERSPACE)); 3114 } 3115 3116 int 3117 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg, 3118 struct timeval *tptr, enum uio_seg tptrseg) 3119 { 3120 struct timespec ts[2]; 3121 struct nameidata nd; 3122 int error; 3123 3124 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3125 return (error); 3126 NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, pathseg, path, td); 3127 if ((error = namei(&nd)) != 0) 3128 return (error); 3129 NDFREE(&nd, NDF_ONLY_PNBUF); 3130 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3131 vrele(nd.ni_vp); 3132 return (error); 3133 } 3134 3135 /* 3136 * Set the access and modification times of a file. 3137 */ 3138 #ifndef _SYS_SYSPROTO_H_ 3139 struct futimes_args { 3140 int fd; 3141 struct timeval *tptr; 3142 }; 3143 #endif 3144 int 3145 sys_futimes(td, uap) 3146 struct thread *td; 3147 register struct futimes_args /* { 3148 int fd; 3149 struct timeval *tptr; 3150 } */ *uap; 3151 { 3152 3153 return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE)); 3154 } 3155 3156 int 3157 kern_futimes(struct thread *td, int fd, struct timeval *tptr, 3158 enum uio_seg tptrseg) 3159 { 3160 struct timespec ts[2]; 3161 struct file *fp; 3162 cap_rights_t rights; 3163 int error; 3164 3165 AUDIT_ARG_FD(fd); 3166 error = getutimes(tptr, tptrseg, ts); 3167 if (error != 0) 3168 return (error); 3169 error = getvnode(td, fd, cap_rights_init(&rights, CAP_FUTIMES), &fp); 3170 if (error != 0) 3171 return (error); 3172 #ifdef AUDIT 3173 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3174 AUDIT_ARG_VNODE1(fp->f_vnode); 3175 VOP_UNLOCK(fp->f_vnode, 0); 3176 #endif 3177 error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL); 3178 fdrop(fp, td); 3179 return (error); 3180 } 3181 3182 int 3183 sys_futimens(struct thread *td, struct futimens_args *uap) 3184 { 3185 3186 return (kern_futimens(td, uap->fd, uap->times, UIO_USERSPACE)); 3187 } 3188 3189 int 3190 kern_futimens(struct thread *td, int fd, struct timespec *tptr, 3191 enum uio_seg tptrseg) 3192 { 3193 struct timespec ts[2]; 3194 struct file *fp; 3195 cap_rights_t rights; 3196 int error, flags; 3197 3198 AUDIT_ARG_FD(fd); 3199 error = getutimens(tptr, tptrseg, ts, &flags); 3200 if (error != 0) 3201 return (error); 3202 if (flags & UTIMENS_EXIT) 3203 return (0); 3204 error = getvnode(td, fd, cap_rights_init(&rights, CAP_FUTIMES), &fp); 3205 if (error != 0) 3206 return (error); 3207 #ifdef AUDIT 3208 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3209 AUDIT_ARG_VNODE1(fp->f_vnode); 3210 VOP_UNLOCK(fp->f_vnode, 0); 3211 #endif 3212 error = setutimes(td, fp->f_vnode, ts, 2, flags & UTIMENS_NULL); 3213 fdrop(fp, td); 3214 return (error); 3215 } 3216 3217 int 3218 sys_utimensat(struct thread *td, struct utimensat_args *uap) 3219 { 3220 3221 return (kern_utimensat(td, uap->fd, uap->path, UIO_USERSPACE, 3222 uap->times, UIO_USERSPACE, uap->flag)); 3223 } 3224 3225 int 3226 kern_utimensat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 3227 struct timespec *tptr, enum uio_seg tptrseg, int flag) 3228 { 3229 struct nameidata nd; 3230 struct timespec ts[2]; 3231 cap_rights_t rights; 3232 int error, flags; 3233 3234 if (flag & ~AT_SYMLINK_NOFOLLOW) 3235 return (EINVAL); 3236 3237 if ((error = getutimens(tptr, tptrseg, ts, &flags)) != 0) 3238 return (error); 3239 NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : 3240 FOLLOW) | AUDITVNODE1, pathseg, path, fd, 3241 cap_rights_init(&rights, CAP_FUTIMES), td); 3242 if ((error = namei(&nd)) != 0) 3243 return (error); 3244 /* 3245 * We are allowed to call namei() regardless of 2xUTIME_OMIT. 3246 * POSIX states: 3247 * "If both tv_nsec fields are UTIME_OMIT... EACCESS may be detected." 3248 * "Search permission is denied by a component of the path prefix." 3249 */ 3250 NDFREE(&nd, NDF_ONLY_PNBUF); 3251 if ((flags & UTIMENS_EXIT) == 0) 3252 error = setutimes(td, nd.ni_vp, ts, 2, flags & UTIMENS_NULL); 3253 vrele(nd.ni_vp); 3254 return (error); 3255 } 3256 3257 /* 3258 * Truncate a file given its path name. 3259 */ 3260 #ifndef _SYS_SYSPROTO_H_ 3261 struct truncate_args { 3262 char *path; 3263 int pad; 3264 off_t length; 3265 }; 3266 #endif 3267 int 3268 sys_truncate(td, uap) 3269 struct thread *td; 3270 register struct truncate_args /* { 3271 char *path; 3272 int pad; 3273 off_t length; 3274 } */ *uap; 3275 { 3276 3277 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3278 } 3279 3280 int 3281 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length) 3282 { 3283 struct mount *mp; 3284 struct vnode *vp; 3285 void *rl_cookie; 3286 struct vattr vattr; 3287 struct nameidata nd; 3288 int error; 3289 3290 if (length < 0) 3291 return(EINVAL); 3292 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, td); 3293 if ((error = namei(&nd)) != 0) 3294 return (error); 3295 vp = nd.ni_vp; 3296 rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); 3297 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { 3298 vn_rangelock_unlock(vp, rl_cookie); 3299 vrele(vp); 3300 return (error); 3301 } 3302 NDFREE(&nd, NDF_ONLY_PNBUF); 3303 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3304 if (vp->v_type == VDIR) 3305 error = EISDIR; 3306 #ifdef MAC 3307 else if ((error = mac_vnode_check_write(td->td_ucred, NOCRED, vp))) { 3308 } 3309 #endif 3310 else if ((error = vn_writechk(vp)) == 0 && 3311 (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) { 3312 VATTR_NULL(&vattr); 3313 vattr.va_size = length; 3314 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3315 } 3316 VOP_UNLOCK(vp, 0); 3317 vn_finished_write(mp); 3318 vn_rangelock_unlock(vp, rl_cookie); 3319 vrele(vp); 3320 return (error); 3321 } 3322 3323 #if defined(COMPAT_43) 3324 /* 3325 * Truncate a file given its path name. 3326 */ 3327 #ifndef _SYS_SYSPROTO_H_ 3328 struct otruncate_args { 3329 char *path; 3330 long length; 3331 }; 3332 #endif 3333 int 3334 otruncate(td, uap) 3335 struct thread *td; 3336 register struct otruncate_args /* { 3337 char *path; 3338 long length; 3339 } */ *uap; 3340 { 3341 struct truncate_args /* { 3342 char *path; 3343 int pad; 3344 off_t length; 3345 } */ nuap; 3346 3347 nuap.path = uap->path; 3348 nuap.length = uap->length; 3349 return (sys_truncate(td, &nuap)); 3350 } 3351 #endif /* COMPAT_43 */ 3352 3353 #if defined(COMPAT_FREEBSD6) 3354 /* Versions with the pad argument */ 3355 int 3356 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap) 3357 { 3358 struct truncate_args ouap; 3359 3360 ouap.path = uap->path; 3361 ouap.length = uap->length; 3362 return (sys_truncate(td, &ouap)); 3363 } 3364 3365 int 3366 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap) 3367 { 3368 struct ftruncate_args ouap; 3369 3370 ouap.fd = uap->fd; 3371 ouap.length = uap->length; 3372 return (sys_ftruncate(td, &ouap)); 3373 } 3374 #endif 3375 3376 int 3377 kern_fsync(struct thread *td, int fd, bool fullsync) 3378 { 3379 struct vnode *vp; 3380 struct mount *mp; 3381 struct file *fp; 3382 cap_rights_t rights; 3383 int error, lock_flags; 3384 3385 AUDIT_ARG_FD(fd); 3386 error = getvnode(td, fd, cap_rights_init(&rights, CAP_FSYNC), &fp); 3387 if (error != 0) 3388 return (error); 3389 vp = fp->f_vnode; 3390 #if 0 3391 if (!fullsync) 3392 /* XXXKIB: compete outstanding aio writes */; 3393 #endif 3394 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 3395 if (error != 0) 3396 goto drop; 3397 if (MNT_SHARED_WRITES(mp) || 3398 ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) { 3399 lock_flags = LK_SHARED; 3400 } else { 3401 lock_flags = LK_EXCLUSIVE; 3402 } 3403 vn_lock(vp, lock_flags | LK_RETRY); 3404 AUDIT_ARG_VNODE1(vp); 3405 if (vp->v_object != NULL) { 3406 VM_OBJECT_WLOCK(vp->v_object); 3407 vm_object_page_clean(vp->v_object, 0, 0, 0); 3408 VM_OBJECT_WUNLOCK(vp->v_object); 3409 } 3410 error = fullsync ? VOP_FSYNC(vp, MNT_WAIT, td) : VOP_FDATASYNC(vp, td); 3411 VOP_UNLOCK(vp, 0); 3412 vn_finished_write(mp); 3413 drop: 3414 fdrop(fp, td); 3415 return (error); 3416 } 3417 3418 /* 3419 * Sync an open file. 3420 */ 3421 #ifndef _SYS_SYSPROTO_H_ 3422 struct fsync_args { 3423 int fd; 3424 }; 3425 #endif 3426 int 3427 sys_fsync(struct thread *td, struct fsync_args *uap) 3428 { 3429 3430 return (kern_fsync(td, uap->fd, true)); 3431 } 3432 3433 int 3434 sys_fdatasync(struct thread *td, struct fdatasync_args *uap) 3435 { 3436 3437 return (kern_fsync(td, uap->fd, false)); 3438 } 3439 3440 /* 3441 * Rename files. Source and destination must either both be directories, or 3442 * both not be directories. If target is a directory, it must be empty. 3443 */ 3444 #ifndef _SYS_SYSPROTO_H_ 3445 struct rename_args { 3446 char *from; 3447 char *to; 3448 }; 3449 #endif 3450 int 3451 sys_rename(td, uap) 3452 struct thread *td; 3453 register struct rename_args /* { 3454 char *from; 3455 char *to; 3456 } */ *uap; 3457 { 3458 3459 return (kern_renameat(td, AT_FDCWD, uap->from, AT_FDCWD, 3460 uap->to, UIO_USERSPACE)); 3461 } 3462 3463 #ifndef _SYS_SYSPROTO_H_ 3464 struct renameat_args { 3465 int oldfd; 3466 char *old; 3467 int newfd; 3468 char *new; 3469 }; 3470 #endif 3471 int 3472 sys_renameat(struct thread *td, struct renameat_args *uap) 3473 { 3474 3475 return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new, 3476 UIO_USERSPACE)); 3477 } 3478 3479 int 3480 kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new, 3481 enum uio_seg pathseg) 3482 { 3483 struct mount *mp = NULL; 3484 struct vnode *tvp, *fvp, *tdvp; 3485 struct nameidata fromnd, tond; 3486 cap_rights_t rights; 3487 int error; 3488 3489 again: 3490 bwillwrite(); 3491 #ifdef MAC 3492 NDINIT_ATRIGHTS(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | 3493 AUDITVNODE1, pathseg, old, oldfd, 3494 cap_rights_init(&rights, CAP_RENAMEAT_SOURCE), td); 3495 #else 3496 NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | AUDITVNODE1, 3497 pathseg, old, oldfd, 3498 cap_rights_init(&rights, CAP_RENAMEAT_SOURCE), td); 3499 #endif 3500 3501 if ((error = namei(&fromnd)) != 0) 3502 return (error); 3503 #ifdef MAC 3504 error = mac_vnode_check_rename_from(td->td_ucred, fromnd.ni_dvp, 3505 fromnd.ni_vp, &fromnd.ni_cnd); 3506 VOP_UNLOCK(fromnd.ni_dvp, 0); 3507 if (fromnd.ni_dvp != fromnd.ni_vp) 3508 VOP_UNLOCK(fromnd.ni_vp, 0); 3509 #endif 3510 fvp = fromnd.ni_vp; 3511 NDINIT_ATRIGHTS(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | 3512 SAVESTART | AUDITVNODE2, pathseg, new, newfd, 3513 cap_rights_init(&rights, CAP_RENAMEAT_TARGET), td); 3514 if (fromnd.ni_vp->v_type == VDIR) 3515 tond.ni_cnd.cn_flags |= WILLBEDIR; 3516 if ((error = namei(&tond)) != 0) { 3517 /* Translate error code for rename("dir1", "dir2/."). */ 3518 if (error == EISDIR && fvp->v_type == VDIR) 3519 error = EINVAL; 3520 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3521 vrele(fromnd.ni_dvp); 3522 vrele(fvp); 3523 goto out1; 3524 } 3525 tdvp = tond.ni_dvp; 3526 tvp = tond.ni_vp; 3527 error = vn_start_write(fvp, &mp, V_NOWAIT); 3528 if (error != 0) { 3529 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3530 NDFREE(&tond, NDF_ONLY_PNBUF); 3531 if (tvp != NULL) 3532 vput(tvp); 3533 if (tdvp == tvp) 3534 vrele(tdvp); 3535 else 3536 vput(tdvp); 3537 vrele(fromnd.ni_dvp); 3538 vrele(fvp); 3539 vrele(tond.ni_startdir); 3540 if (fromnd.ni_startdir != NULL) 3541 vrele(fromnd.ni_startdir); 3542 error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH); 3543 if (error != 0) 3544 return (error); 3545 goto again; 3546 } 3547 if (tvp != NULL) { 3548 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3549 error = ENOTDIR; 3550 goto out; 3551 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3552 error = EISDIR; 3553 goto out; 3554 } 3555 #ifdef CAPABILITIES 3556 if (newfd != AT_FDCWD) { 3557 /* 3558 * If the target already exists we require CAP_UNLINKAT 3559 * from 'newfd'. 3560 */ 3561 error = cap_check(&tond.ni_filecaps.fc_rights, 3562 cap_rights_init(&rights, CAP_UNLINKAT)); 3563 if (error != 0) 3564 goto out; 3565 } 3566 #endif 3567 } 3568 if (fvp == tdvp) { 3569 error = EINVAL; 3570 goto out; 3571 } 3572 /* 3573 * If the source is the same as the destination (that is, if they 3574 * are links to the same vnode), then there is nothing to do. 3575 */ 3576 if (fvp == tvp) 3577 error = -1; 3578 #ifdef MAC 3579 else 3580 error = mac_vnode_check_rename_to(td->td_ucred, tdvp, 3581 tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd); 3582 #endif 3583 out: 3584 if (error == 0) { 3585 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3586 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); 3587 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3588 NDFREE(&tond, NDF_ONLY_PNBUF); 3589 } else { 3590 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3591 NDFREE(&tond, NDF_ONLY_PNBUF); 3592 if (tvp != NULL) 3593 vput(tvp); 3594 if (tdvp == tvp) 3595 vrele(tdvp); 3596 else 3597 vput(tdvp); 3598 vrele(fromnd.ni_dvp); 3599 vrele(fvp); 3600 } 3601 vrele(tond.ni_startdir); 3602 vn_finished_write(mp); 3603 out1: 3604 if (fromnd.ni_startdir) 3605 vrele(fromnd.ni_startdir); 3606 if (error == -1) 3607 return (0); 3608 return (error); 3609 } 3610 3611 /* 3612 * Make a directory file. 3613 */ 3614 #ifndef _SYS_SYSPROTO_H_ 3615 struct mkdir_args { 3616 char *path; 3617 int mode; 3618 }; 3619 #endif 3620 int 3621 sys_mkdir(td, uap) 3622 struct thread *td; 3623 register struct mkdir_args /* { 3624 char *path; 3625 int mode; 3626 } */ *uap; 3627 { 3628 3629 return (kern_mkdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3630 uap->mode)); 3631 } 3632 3633 #ifndef _SYS_SYSPROTO_H_ 3634 struct mkdirat_args { 3635 int fd; 3636 char *path; 3637 mode_t mode; 3638 }; 3639 #endif 3640 int 3641 sys_mkdirat(struct thread *td, struct mkdirat_args *uap) 3642 { 3643 3644 return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode)); 3645 } 3646 3647 int 3648 kern_mkdirat(struct thread *td, int fd, char *path, enum uio_seg segflg, 3649 int mode) 3650 { 3651 struct mount *mp; 3652 struct vnode *vp; 3653 struct vattr vattr; 3654 struct nameidata nd; 3655 cap_rights_t rights; 3656 int error; 3657 3658 AUDIT_ARG_MODE(mode); 3659 restart: 3660 bwillwrite(); 3661 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 3662 NOCACHE, segflg, path, fd, cap_rights_init(&rights, CAP_MKDIRAT), 3663 td); 3664 nd.ni_cnd.cn_flags |= WILLBEDIR; 3665 if ((error = namei(&nd)) != 0) 3666 return (error); 3667 vp = nd.ni_vp; 3668 if (vp != NULL) { 3669 NDFREE(&nd, NDF_ONLY_PNBUF); 3670 /* 3671 * XXX namei called with LOCKPARENT but not LOCKLEAF has 3672 * the strange behaviour of leaving the vnode unlocked 3673 * if the target is the same vnode as the parent. 3674 */ 3675 if (vp == nd.ni_dvp) 3676 vrele(nd.ni_dvp); 3677 else 3678 vput(nd.ni_dvp); 3679 vrele(vp); 3680 return (EEXIST); 3681 } 3682 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3683 NDFREE(&nd, NDF_ONLY_PNBUF); 3684 vput(nd.ni_dvp); 3685 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3686 return (error); 3687 goto restart; 3688 } 3689 VATTR_NULL(&vattr); 3690 vattr.va_type = VDIR; 3691 vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask; 3692 #ifdef MAC 3693 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 3694 &vattr); 3695 if (error != 0) 3696 goto out; 3697 #endif 3698 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 3699 #ifdef MAC 3700 out: 3701 #endif 3702 NDFREE(&nd, NDF_ONLY_PNBUF); 3703 vput(nd.ni_dvp); 3704 if (error == 0) 3705 vput(nd.ni_vp); 3706 vn_finished_write(mp); 3707 return (error); 3708 } 3709 3710 /* 3711 * Remove a directory file. 3712 */ 3713 #ifndef _SYS_SYSPROTO_H_ 3714 struct rmdir_args { 3715 char *path; 3716 }; 3717 #endif 3718 int 3719 sys_rmdir(td, uap) 3720 struct thread *td; 3721 struct rmdir_args /* { 3722 char *path; 3723 } */ *uap; 3724 { 3725 3726 return (kern_rmdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE)); 3727 } 3728 3729 int 3730 kern_rmdirat(struct thread *td, int fd, char *path, enum uio_seg pathseg) 3731 { 3732 struct mount *mp; 3733 struct vnode *vp; 3734 struct nameidata nd; 3735 cap_rights_t rights; 3736 int error; 3737 3738 restart: 3739 bwillwrite(); 3740 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 3741 pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td); 3742 if ((error = namei(&nd)) != 0) 3743 return (error); 3744 vp = nd.ni_vp; 3745 if (vp->v_type != VDIR) { 3746 error = ENOTDIR; 3747 goto out; 3748 } 3749 /* 3750 * No rmdir "." please. 3751 */ 3752 if (nd.ni_dvp == vp) { 3753 error = EINVAL; 3754 goto out; 3755 } 3756 /* 3757 * The root of a mounted filesystem cannot be deleted. 3758 */ 3759 if (vp->v_vflag & VV_ROOT) { 3760 error = EBUSY; 3761 goto out; 3762 } 3763 #ifdef MAC 3764 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 3765 &nd.ni_cnd); 3766 if (error != 0) 3767 goto out; 3768 #endif 3769 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3770 NDFREE(&nd, NDF_ONLY_PNBUF); 3771 vput(vp); 3772 if (nd.ni_dvp == vp) 3773 vrele(nd.ni_dvp); 3774 else 3775 vput(nd.ni_dvp); 3776 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3777 return (error); 3778 goto restart; 3779 } 3780 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 3781 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 3782 vn_finished_write(mp); 3783 out: 3784 NDFREE(&nd, NDF_ONLY_PNBUF); 3785 vput(vp); 3786 if (nd.ni_dvp == vp) 3787 vrele(nd.ni_dvp); 3788 else 3789 vput(nd.ni_dvp); 3790 return (error); 3791 } 3792 3793 #ifdef COMPAT_43 3794 /* 3795 * Read a block of directory entries in a filesystem independent format. 3796 */ 3797 #ifndef _SYS_SYSPROTO_H_ 3798 struct ogetdirentries_args { 3799 int fd; 3800 char *buf; 3801 u_int count; 3802 long *basep; 3803 }; 3804 #endif 3805 int 3806 ogetdirentries(struct thread *td, struct ogetdirentries_args *uap) 3807 { 3808 long loff; 3809 int error; 3810 3811 error = kern_ogetdirentries(td, uap, &loff); 3812 if (error == 0) 3813 error = copyout(&loff, uap->basep, sizeof(long)); 3814 return (error); 3815 } 3816 3817 int 3818 kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap, 3819 long *ploff) 3820 { 3821 struct vnode *vp; 3822 struct file *fp; 3823 struct uio auio, kuio; 3824 struct iovec aiov, kiov; 3825 struct dirent *dp, *edp; 3826 cap_rights_t rights; 3827 caddr_t dirbuf; 3828 int error, eofflag, readcnt; 3829 long loff; 3830 off_t foffset; 3831 3832 /* XXX arbitrary sanity limit on `count'. */ 3833 if (uap->count > 64 * 1024) 3834 return (EINVAL); 3835 error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_READ), &fp); 3836 if (error != 0) 3837 return (error); 3838 if ((fp->f_flag & FREAD) == 0) { 3839 fdrop(fp, td); 3840 return (EBADF); 3841 } 3842 vp = fp->f_vnode; 3843 foffset = foffset_lock(fp, 0); 3844 unionread: 3845 if (vp->v_type != VDIR) { 3846 foffset_unlock(fp, foffset, 0); 3847 fdrop(fp, td); 3848 return (EINVAL); 3849 } 3850 aiov.iov_base = uap->buf; 3851 aiov.iov_len = uap->count; 3852 auio.uio_iov = &aiov; 3853 auio.uio_iovcnt = 1; 3854 auio.uio_rw = UIO_READ; 3855 auio.uio_segflg = UIO_USERSPACE; 3856 auio.uio_td = td; 3857 auio.uio_resid = uap->count; 3858 vn_lock(vp, LK_SHARED | LK_RETRY); 3859 loff = auio.uio_offset = foffset; 3860 #ifdef MAC 3861 error = mac_vnode_check_readdir(td->td_ucred, vp); 3862 if (error != 0) { 3863 VOP_UNLOCK(vp, 0); 3864 foffset_unlock(fp, foffset, FOF_NOUPDATE); 3865 fdrop(fp, td); 3866 return (error); 3867 } 3868 #endif 3869 # if (BYTE_ORDER != LITTLE_ENDIAN) 3870 if (vp->v_mount->mnt_maxsymlinklen <= 0) { 3871 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, 3872 NULL, NULL); 3873 foffset = auio.uio_offset; 3874 } else 3875 # endif 3876 { 3877 kuio = auio; 3878 kuio.uio_iov = &kiov; 3879 kuio.uio_segflg = UIO_SYSSPACE; 3880 kiov.iov_len = uap->count; 3881 dirbuf = malloc(uap->count, M_TEMP, M_WAITOK); 3882 kiov.iov_base = dirbuf; 3883 error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag, 3884 NULL, NULL); 3885 foffset = kuio.uio_offset; 3886 if (error == 0) { 3887 readcnt = uap->count - kuio.uio_resid; 3888 edp = (struct dirent *)&dirbuf[readcnt]; 3889 for (dp = (struct dirent *)dirbuf; dp < edp; ) { 3890 # if (BYTE_ORDER == LITTLE_ENDIAN) 3891 /* 3892 * The expected low byte of 3893 * dp->d_namlen is our dp->d_type. 3894 * The high MBZ byte of dp->d_namlen 3895 * is our dp->d_namlen. 3896 */ 3897 dp->d_type = dp->d_namlen; 3898 dp->d_namlen = 0; 3899 # else 3900 /* 3901 * The dp->d_type is the high byte 3902 * of the expected dp->d_namlen, 3903 * so must be zero'ed. 3904 */ 3905 dp->d_type = 0; 3906 # endif 3907 if (dp->d_reclen > 0) { 3908 dp = (struct dirent *) 3909 ((char *)dp + dp->d_reclen); 3910 } else { 3911 error = EIO; 3912 break; 3913 } 3914 } 3915 if (dp >= edp) 3916 error = uiomove(dirbuf, readcnt, &auio); 3917 } 3918 free(dirbuf, M_TEMP); 3919 } 3920 if (error != 0) { 3921 VOP_UNLOCK(vp, 0); 3922 foffset_unlock(fp, foffset, 0); 3923 fdrop(fp, td); 3924 return (error); 3925 } 3926 if (uap->count == auio.uio_resid && 3927 (vp->v_vflag & VV_ROOT) && 3928 (vp->v_mount->mnt_flag & MNT_UNION)) { 3929 struct vnode *tvp = vp; 3930 vp = vp->v_mount->mnt_vnodecovered; 3931 VREF(vp); 3932 fp->f_vnode = vp; 3933 fp->f_data = vp; 3934 foffset = 0; 3935 vput(tvp); 3936 goto unionread; 3937 } 3938 VOP_UNLOCK(vp, 0); 3939 foffset_unlock(fp, foffset, 0); 3940 fdrop(fp, td); 3941 td->td_retval[0] = uap->count - auio.uio_resid; 3942 if (error == 0) 3943 *ploff = loff; 3944 return (error); 3945 } 3946 #endif /* COMPAT_43 */ 3947 3948 /* 3949 * Read a block of directory entries in a filesystem independent format. 3950 */ 3951 #ifndef _SYS_SYSPROTO_H_ 3952 struct getdirentries_args { 3953 int fd; 3954 char *buf; 3955 u_int count; 3956 long *basep; 3957 }; 3958 #endif 3959 int 3960 sys_getdirentries(td, uap) 3961 struct thread *td; 3962 register struct getdirentries_args /* { 3963 int fd; 3964 char *buf; 3965 u_int count; 3966 long *basep; 3967 } */ *uap; 3968 { 3969 long base; 3970 int error; 3971 3972 error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base, 3973 NULL, UIO_USERSPACE); 3974 if (error != 0) 3975 return (error); 3976 if (uap->basep != NULL) 3977 error = copyout(&base, uap->basep, sizeof(long)); 3978 return (error); 3979 } 3980 3981 int 3982 kern_getdirentries(struct thread *td, int fd, char *buf, u_int count, 3983 long *basep, ssize_t *residp, enum uio_seg bufseg) 3984 { 3985 struct vnode *vp; 3986 struct file *fp; 3987 struct uio auio; 3988 struct iovec aiov; 3989 cap_rights_t rights; 3990 long loff; 3991 int error, eofflag; 3992 off_t foffset; 3993 3994 AUDIT_ARG_FD(fd); 3995 if (count > IOSIZE_MAX) 3996 return (EINVAL); 3997 auio.uio_resid = count; 3998 error = getvnode(td, fd, cap_rights_init(&rights, CAP_READ), &fp); 3999 if (error != 0) 4000 return (error); 4001 if ((fp->f_flag & FREAD) == 0) { 4002 fdrop(fp, td); 4003 return (EBADF); 4004 } 4005 vp = fp->f_vnode; 4006 foffset = foffset_lock(fp, 0); 4007 unionread: 4008 if (vp->v_type != VDIR) { 4009 error = EINVAL; 4010 goto fail; 4011 } 4012 aiov.iov_base = buf; 4013 aiov.iov_len = count; 4014 auio.uio_iov = &aiov; 4015 auio.uio_iovcnt = 1; 4016 auio.uio_rw = UIO_READ; 4017 auio.uio_segflg = bufseg; 4018 auio.uio_td = td; 4019 vn_lock(vp, LK_SHARED | LK_RETRY); 4020 AUDIT_ARG_VNODE1(vp); 4021 loff = auio.uio_offset = foffset; 4022 #ifdef MAC 4023 error = mac_vnode_check_readdir(td->td_ucred, vp); 4024 if (error == 0) 4025 #endif 4026 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, 4027 NULL); 4028 foffset = auio.uio_offset; 4029 if (error != 0) { 4030 VOP_UNLOCK(vp, 0); 4031 goto fail; 4032 } 4033 if (count == auio.uio_resid && 4034 (vp->v_vflag & VV_ROOT) && 4035 (vp->v_mount->mnt_flag & MNT_UNION)) { 4036 struct vnode *tvp = vp; 4037 4038 vp = vp->v_mount->mnt_vnodecovered; 4039 VREF(vp); 4040 fp->f_vnode = vp; 4041 fp->f_data = vp; 4042 foffset = 0; 4043 vput(tvp); 4044 goto unionread; 4045 } 4046 VOP_UNLOCK(vp, 0); 4047 *basep = loff; 4048 if (residp != NULL) 4049 *residp = auio.uio_resid; 4050 td->td_retval[0] = count - auio.uio_resid; 4051 fail: 4052 foffset_unlock(fp, foffset, 0); 4053 fdrop(fp, td); 4054 return (error); 4055 } 4056 4057 #ifndef _SYS_SYSPROTO_H_ 4058 struct getdents_args { 4059 int fd; 4060 char *buf; 4061 size_t count; 4062 }; 4063 #endif 4064 int 4065 sys_getdents(td, uap) 4066 struct thread *td; 4067 register struct getdents_args /* { 4068 int fd; 4069 char *buf; 4070 u_int count; 4071 } */ *uap; 4072 { 4073 struct getdirentries_args ap; 4074 4075 ap.fd = uap->fd; 4076 ap.buf = uap->buf; 4077 ap.count = uap->count; 4078 ap.basep = NULL; 4079 return (sys_getdirentries(td, &ap)); 4080 } 4081 4082 /* 4083 * Set the mode mask for creation of filesystem nodes. 4084 */ 4085 #ifndef _SYS_SYSPROTO_H_ 4086 struct umask_args { 4087 int newmask; 4088 }; 4089 #endif 4090 int 4091 sys_umask(td, uap) 4092 struct thread *td; 4093 struct umask_args /* { 4094 int newmask; 4095 } */ *uap; 4096 { 4097 struct filedesc *fdp; 4098 4099 fdp = td->td_proc->p_fd; 4100 FILEDESC_XLOCK(fdp); 4101 td->td_retval[0] = fdp->fd_cmask; 4102 fdp->fd_cmask = uap->newmask & ALLPERMS; 4103 FILEDESC_XUNLOCK(fdp); 4104 return (0); 4105 } 4106 4107 /* 4108 * Void all references to file by ripping underlying filesystem away from 4109 * vnode. 4110 */ 4111 #ifndef _SYS_SYSPROTO_H_ 4112 struct revoke_args { 4113 char *path; 4114 }; 4115 #endif 4116 int 4117 sys_revoke(td, uap) 4118 struct thread *td; 4119 register struct revoke_args /* { 4120 char *path; 4121 } */ *uap; 4122 { 4123 struct vnode *vp; 4124 struct vattr vattr; 4125 struct nameidata nd; 4126 int error; 4127 4128 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4129 uap->path, td); 4130 if ((error = namei(&nd)) != 0) 4131 return (error); 4132 vp = nd.ni_vp; 4133 NDFREE(&nd, NDF_ONLY_PNBUF); 4134 if (vp->v_type != VCHR || vp->v_rdev == NULL) { 4135 error = EINVAL; 4136 goto out; 4137 } 4138 #ifdef MAC 4139 error = mac_vnode_check_revoke(td->td_ucred, vp); 4140 if (error != 0) 4141 goto out; 4142 #endif 4143 error = VOP_GETATTR(vp, &vattr, td->td_ucred); 4144 if (error != 0) 4145 goto out; 4146 if (td->td_ucred->cr_uid != vattr.va_uid) { 4147 error = priv_check(td, PRIV_VFS_ADMIN); 4148 if (error != 0) 4149 goto out; 4150 } 4151 if (vcount(vp) > 1) 4152 VOP_REVOKE(vp, REVOKEALL); 4153 out: 4154 vput(vp); 4155 return (error); 4156 } 4157 4158 /* 4159 * Convert a user file descriptor to a kernel file entry and check that, if it 4160 * is a capability, the correct rights are present. A reference on the file 4161 * entry is held upon returning. 4162 */ 4163 int 4164 getvnode(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp) 4165 { 4166 struct file *fp; 4167 int error; 4168 4169 error = fget_unlocked(td->td_proc->p_fd, fd, rightsp, &fp, NULL); 4170 if (error != 0) 4171 return (error); 4172 4173 /* 4174 * The file could be not of the vnode type, or it may be not 4175 * yet fully initialized, in which case the f_vnode pointer 4176 * may be set, but f_ops is still badfileops. E.g., 4177 * devfs_open() transiently create such situation to 4178 * facilitate csw d_fdopen(). 4179 * 4180 * Dupfdopen() handling in kern_openat() installs the 4181 * half-baked file into the process descriptor table, allowing 4182 * other thread to dereference it. Guard against the race by 4183 * checking f_ops. 4184 */ 4185 if (fp->f_vnode == NULL || fp->f_ops == &badfileops) { 4186 fdrop(fp, td); 4187 return (EINVAL); 4188 } 4189 *fpp = fp; 4190 return (0); 4191 } 4192 4193 4194 /* 4195 * Get an (NFS) file handle. 4196 */ 4197 #ifndef _SYS_SYSPROTO_H_ 4198 struct lgetfh_args { 4199 char *fname; 4200 fhandle_t *fhp; 4201 }; 4202 #endif 4203 int 4204 sys_lgetfh(td, uap) 4205 struct thread *td; 4206 register struct lgetfh_args *uap; 4207 { 4208 struct nameidata nd; 4209 fhandle_t fh; 4210 register struct vnode *vp; 4211 int error; 4212 4213 error = priv_check(td, PRIV_VFS_GETFH); 4214 if (error != 0) 4215 return (error); 4216 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4217 uap->fname, td); 4218 error = namei(&nd); 4219 if (error != 0) 4220 return (error); 4221 NDFREE(&nd, NDF_ONLY_PNBUF); 4222 vp = nd.ni_vp; 4223 bzero(&fh, sizeof(fh)); 4224 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4225 error = VOP_VPTOFH(vp, &fh.fh_fid); 4226 vput(vp); 4227 if (error == 0) 4228 error = copyout(&fh, uap->fhp, sizeof (fh)); 4229 return (error); 4230 } 4231 4232 #ifndef _SYS_SYSPROTO_H_ 4233 struct getfh_args { 4234 char *fname; 4235 fhandle_t *fhp; 4236 }; 4237 #endif 4238 int 4239 sys_getfh(td, uap) 4240 struct thread *td; 4241 register struct getfh_args *uap; 4242 { 4243 struct nameidata nd; 4244 fhandle_t fh; 4245 register struct vnode *vp; 4246 int error; 4247 4248 error = priv_check(td, PRIV_VFS_GETFH); 4249 if (error != 0) 4250 return (error); 4251 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4252 uap->fname, td); 4253 error = namei(&nd); 4254 if (error != 0) 4255 return (error); 4256 NDFREE(&nd, NDF_ONLY_PNBUF); 4257 vp = nd.ni_vp; 4258 bzero(&fh, sizeof(fh)); 4259 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4260 error = VOP_VPTOFH(vp, &fh.fh_fid); 4261 vput(vp); 4262 if (error == 0) 4263 error = copyout(&fh, uap->fhp, sizeof (fh)); 4264 return (error); 4265 } 4266 4267 /* 4268 * syscall for the rpc.lockd to use to translate a NFS file handle into an 4269 * open descriptor. 4270 * 4271 * warning: do not remove the priv_check() call or this becomes one giant 4272 * security hole. 4273 */ 4274 #ifndef _SYS_SYSPROTO_H_ 4275 struct fhopen_args { 4276 const struct fhandle *u_fhp; 4277 int flags; 4278 }; 4279 #endif 4280 int 4281 sys_fhopen(td, uap) 4282 struct thread *td; 4283 struct fhopen_args /* { 4284 const struct fhandle *u_fhp; 4285 int flags; 4286 } */ *uap; 4287 { 4288 struct mount *mp; 4289 struct vnode *vp; 4290 struct fhandle fhp; 4291 struct file *fp; 4292 int fmode, error; 4293 int indx; 4294 4295 error = priv_check(td, PRIV_VFS_FHOPEN); 4296 if (error != 0) 4297 return (error); 4298 indx = -1; 4299 fmode = FFLAGS(uap->flags); 4300 /* why not allow a non-read/write open for our lockd? */ 4301 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 4302 return (EINVAL); 4303 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 4304 if (error != 0) 4305 return(error); 4306 /* find the mount point */ 4307 mp = vfs_busyfs(&fhp.fh_fsid); 4308 if (mp == NULL) 4309 return (ESTALE); 4310 /* now give me my vnode, it gets returned to me locked */ 4311 error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp); 4312 vfs_unbusy(mp); 4313 if (error != 0) 4314 return (error); 4315 4316 error = falloc_noinstall(td, &fp); 4317 if (error != 0) { 4318 vput(vp); 4319 return (error); 4320 } 4321 /* 4322 * An extra reference on `fp' has been held for us by 4323 * falloc_noinstall(). 4324 */ 4325 4326 #ifdef INVARIANTS 4327 td->td_dupfd = -1; 4328 #endif 4329 error = vn_open_vnode(vp, fmode, td->td_ucred, td, fp); 4330 if (error != 0) { 4331 KASSERT(fp->f_ops == &badfileops, 4332 ("VOP_OPEN in fhopen() set f_ops")); 4333 KASSERT(td->td_dupfd < 0, 4334 ("fhopen() encountered fdopen()")); 4335 4336 vput(vp); 4337 goto bad; 4338 } 4339 #ifdef INVARIANTS 4340 td->td_dupfd = 0; 4341 #endif 4342 fp->f_vnode = vp; 4343 fp->f_seqcount = 1; 4344 finit(fp, (fmode & FMASK) | (fp->f_flag & FHASLOCK), DTYPE_VNODE, vp, 4345 &vnops); 4346 VOP_UNLOCK(vp, 0); 4347 if ((fmode & O_TRUNC) != 0) { 4348 error = fo_truncate(fp, 0, td->td_ucred, td); 4349 if (error != 0) 4350 goto bad; 4351 } 4352 4353 error = finstall(td, fp, &indx, fmode, NULL); 4354 bad: 4355 fdrop(fp, td); 4356 td->td_retval[0] = indx; 4357 return (error); 4358 } 4359 4360 /* 4361 * Stat an (NFS) file handle. 4362 */ 4363 #ifndef _SYS_SYSPROTO_H_ 4364 struct fhstat_args { 4365 struct fhandle *u_fhp; 4366 struct stat *sb; 4367 }; 4368 #endif 4369 int 4370 sys_fhstat(td, uap) 4371 struct thread *td; 4372 register struct fhstat_args /* { 4373 struct fhandle *u_fhp; 4374 struct stat *sb; 4375 } */ *uap; 4376 { 4377 struct stat sb; 4378 struct fhandle fh; 4379 int error; 4380 4381 error = copyin(uap->u_fhp, &fh, sizeof(fh)); 4382 if (error != 0) 4383 return (error); 4384 error = kern_fhstat(td, fh, &sb); 4385 if (error == 0) 4386 error = copyout(&sb, uap->sb, sizeof(sb)); 4387 return (error); 4388 } 4389 4390 int 4391 kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb) 4392 { 4393 struct mount *mp; 4394 struct vnode *vp; 4395 int error; 4396 4397 error = priv_check(td, PRIV_VFS_FHSTAT); 4398 if (error != 0) 4399 return (error); 4400 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4401 return (ESTALE); 4402 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4403 vfs_unbusy(mp); 4404 if (error != 0) 4405 return (error); 4406 error = vn_stat(vp, sb, td->td_ucred, NOCRED, td); 4407 vput(vp); 4408 return (error); 4409 } 4410 4411 /* 4412 * Implement fstatfs() for (NFS) file handles. 4413 */ 4414 #ifndef _SYS_SYSPROTO_H_ 4415 struct fhstatfs_args { 4416 struct fhandle *u_fhp; 4417 struct statfs *buf; 4418 }; 4419 #endif 4420 int 4421 sys_fhstatfs(td, uap) 4422 struct thread *td; 4423 struct fhstatfs_args /* { 4424 struct fhandle *u_fhp; 4425 struct statfs *buf; 4426 } */ *uap; 4427 { 4428 struct statfs *sfp; 4429 fhandle_t fh; 4430 int error; 4431 4432 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4433 if (error != 0) 4434 return (error); 4435 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 4436 error = kern_fhstatfs(td, fh, sfp); 4437 if (error == 0) 4438 error = copyout(sfp, uap->buf, sizeof(*sfp)); 4439 free(sfp, M_STATFS); 4440 return (error); 4441 } 4442 4443 int 4444 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf) 4445 { 4446 struct statfs *sp; 4447 struct mount *mp; 4448 struct vnode *vp; 4449 int error; 4450 4451 error = priv_check(td, PRIV_VFS_FHSTATFS); 4452 if (error != 0) 4453 return (error); 4454 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4455 return (ESTALE); 4456 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4457 if (error != 0) { 4458 vfs_unbusy(mp); 4459 return (error); 4460 } 4461 vput(vp); 4462 error = prison_canseemount(td->td_ucred, mp); 4463 if (error != 0) 4464 goto out; 4465 #ifdef MAC 4466 error = mac_mount_check_stat(td->td_ucred, mp); 4467 if (error != 0) 4468 goto out; 4469 #endif 4470 /* 4471 * Set these in case the underlying filesystem fails to do so. 4472 */ 4473 sp = &mp->mnt_stat; 4474 sp->f_version = STATFS_VERSION; 4475 sp->f_namemax = NAME_MAX; 4476 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 4477 error = VFS_STATFS(mp, sp); 4478 if (error == 0) 4479 *buf = *sp; 4480 out: 4481 vfs_unbusy(mp); 4482 return (error); 4483 } 4484 4485 int 4486 kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len) 4487 { 4488 struct file *fp; 4489 struct mount *mp; 4490 struct vnode *vp; 4491 cap_rights_t rights; 4492 off_t olen, ooffset; 4493 int error; 4494 4495 if (offset < 0 || len <= 0) 4496 return (EINVAL); 4497 /* Check for wrap. */ 4498 if (offset > OFF_MAX - len) 4499 return (EFBIG); 4500 error = fget(td, fd, cap_rights_init(&rights, CAP_WRITE), &fp); 4501 if (error != 0) 4502 return (error); 4503 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { 4504 error = ESPIPE; 4505 goto out; 4506 } 4507 if ((fp->f_flag & FWRITE) == 0) { 4508 error = EBADF; 4509 goto out; 4510 } 4511 if (fp->f_type != DTYPE_VNODE) { 4512 error = ENODEV; 4513 goto out; 4514 } 4515 vp = fp->f_vnode; 4516 if (vp->v_type != VREG) { 4517 error = ENODEV; 4518 goto out; 4519 } 4520 4521 /* Allocating blocks may take a long time, so iterate. */ 4522 for (;;) { 4523 olen = len; 4524 ooffset = offset; 4525 4526 bwillwrite(); 4527 mp = NULL; 4528 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 4529 if (error != 0) 4530 break; 4531 error = vn_lock(vp, LK_EXCLUSIVE); 4532 if (error != 0) { 4533 vn_finished_write(mp); 4534 break; 4535 } 4536 #ifdef MAC 4537 error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp); 4538 if (error == 0) 4539 #endif 4540 error = VOP_ALLOCATE(vp, &offset, &len); 4541 VOP_UNLOCK(vp, 0); 4542 vn_finished_write(mp); 4543 4544 if (olen + ooffset != offset + len) { 4545 panic("offset + len changed from %jx/%jx to %jx/%jx", 4546 ooffset, olen, offset, len); 4547 } 4548 if (error != 0 || len == 0) 4549 break; 4550 KASSERT(olen > len, ("Iteration did not make progress?")); 4551 maybe_yield(); 4552 } 4553 out: 4554 fdrop(fp, td); 4555 return (error); 4556 } 4557 4558 int 4559 sys_posix_fallocate(struct thread *td, struct posix_fallocate_args *uap) 4560 { 4561 int error; 4562 4563 error = kern_posix_fallocate(td, uap->fd, uap->offset, uap->len); 4564 return (kern_posix_error(td, error)); 4565 } 4566 4567 /* 4568 * Unlike madvise(2), we do not make a best effort to remember every 4569 * possible caching hint. Instead, we remember the last setting with 4570 * the exception that we will allow POSIX_FADV_NORMAL to adjust the 4571 * region of any current setting. 4572 */ 4573 int 4574 kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len, 4575 int advice) 4576 { 4577 struct fadvise_info *fa, *new; 4578 struct file *fp; 4579 struct vnode *vp; 4580 cap_rights_t rights; 4581 off_t end; 4582 int error; 4583 4584 if (offset < 0 || len < 0 || offset > OFF_MAX - len) 4585 return (EINVAL); 4586 switch (advice) { 4587 case POSIX_FADV_SEQUENTIAL: 4588 case POSIX_FADV_RANDOM: 4589 case POSIX_FADV_NOREUSE: 4590 new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK); 4591 break; 4592 case POSIX_FADV_NORMAL: 4593 case POSIX_FADV_WILLNEED: 4594 case POSIX_FADV_DONTNEED: 4595 new = NULL; 4596 break; 4597 default: 4598 return (EINVAL); 4599 } 4600 /* XXX: CAP_POSIX_FADVISE? */ 4601 error = fget(td, fd, cap_rights_init(&rights), &fp); 4602 if (error != 0) 4603 goto out; 4604 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { 4605 error = ESPIPE; 4606 goto out; 4607 } 4608 if (fp->f_type != DTYPE_VNODE) { 4609 error = ENODEV; 4610 goto out; 4611 } 4612 vp = fp->f_vnode; 4613 if (vp->v_type != VREG) { 4614 error = ENODEV; 4615 goto out; 4616 } 4617 if (len == 0) 4618 end = OFF_MAX; 4619 else 4620 end = offset + len - 1; 4621 switch (advice) { 4622 case POSIX_FADV_SEQUENTIAL: 4623 case POSIX_FADV_RANDOM: 4624 case POSIX_FADV_NOREUSE: 4625 /* 4626 * Try to merge any existing non-standard region with 4627 * this new region if possible, otherwise create a new 4628 * non-standard region for this request. 4629 */ 4630 mtx_pool_lock(mtxpool_sleep, fp); 4631 fa = fp->f_advice; 4632 if (fa != NULL && fa->fa_advice == advice && 4633 ((fa->fa_start <= end && fa->fa_end >= offset) || 4634 (end != OFF_MAX && fa->fa_start == end + 1) || 4635 (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) { 4636 if (offset < fa->fa_start) 4637 fa->fa_start = offset; 4638 if (end > fa->fa_end) 4639 fa->fa_end = end; 4640 } else { 4641 new->fa_advice = advice; 4642 new->fa_start = offset; 4643 new->fa_end = end; 4644 fp->f_advice = new; 4645 new = fa; 4646 } 4647 mtx_pool_unlock(mtxpool_sleep, fp); 4648 break; 4649 case POSIX_FADV_NORMAL: 4650 /* 4651 * If a the "normal" region overlaps with an existing 4652 * non-standard region, trim or remove the 4653 * non-standard region. 4654 */ 4655 mtx_pool_lock(mtxpool_sleep, fp); 4656 fa = fp->f_advice; 4657 if (fa != NULL) { 4658 if (offset <= fa->fa_start && end >= fa->fa_end) { 4659 new = fa; 4660 fp->f_advice = NULL; 4661 } else if (offset <= fa->fa_start && 4662 end >= fa->fa_start) 4663 fa->fa_start = end + 1; 4664 else if (offset <= fa->fa_end && end >= fa->fa_end) 4665 fa->fa_end = offset - 1; 4666 else if (offset >= fa->fa_start && end <= fa->fa_end) { 4667 /* 4668 * If the "normal" region is a middle 4669 * portion of the existing 4670 * non-standard region, just remove 4671 * the whole thing rather than picking 4672 * one side or the other to 4673 * preserve. 4674 */ 4675 new = fa; 4676 fp->f_advice = NULL; 4677 } 4678 } 4679 mtx_pool_unlock(mtxpool_sleep, fp); 4680 break; 4681 case POSIX_FADV_WILLNEED: 4682 case POSIX_FADV_DONTNEED: 4683 error = VOP_ADVISE(vp, offset, end, advice); 4684 break; 4685 } 4686 out: 4687 if (fp != NULL) 4688 fdrop(fp, td); 4689 free(new, M_FADVISE); 4690 return (error); 4691 } 4692 4693 int 4694 sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap) 4695 { 4696 int error; 4697 4698 error = kern_posix_fadvise(td, uap->fd, uap->offset, uap->len, 4699 uap->advice); 4700 return (kern_posix_error(td, error)); 4701 } 4702