1 /*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include "opt_capsicum.h" 41 #include "opt_compat.h" 42 #include "opt_ktrace.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/bio.h> 47 #include <sys/buf.h> 48 #include <sys/capsicum.h> 49 #include <sys/disk.h> 50 #include <sys/sysent.h> 51 #include <sys/malloc.h> 52 #include <sys/mount.h> 53 #include <sys/mutex.h> 54 #include <sys/sysproto.h> 55 #include <sys/namei.h> 56 #include <sys/filedesc.h> 57 #include <sys/kernel.h> 58 #include <sys/fcntl.h> 59 #include <sys/file.h> 60 #include <sys/filio.h> 61 #include <sys/limits.h> 62 #include <sys/linker.h> 63 #include <sys/rwlock.h> 64 #include <sys/sdt.h> 65 #include <sys/stat.h> 66 #include <sys/sx.h> 67 #include <sys/unistd.h> 68 #include <sys/vnode.h> 69 #include <sys/priv.h> 70 #include <sys/proc.h> 71 #include <sys/dirent.h> 72 #include <sys/jail.h> 73 #include <sys/syscallsubr.h> 74 #include <sys/sysctl.h> 75 #ifdef KTRACE 76 #include <sys/ktrace.h> 77 #endif 78 79 #include <machine/stdarg.h> 80 81 #include <security/audit/audit.h> 82 #include <security/mac/mac_framework.h> 83 84 #include <vm/vm.h> 85 #include <vm/vm_object.h> 86 #include <vm/vm_page.h> 87 #include <vm/uma.h> 88 89 #include <ufs/ufs/quota.h> 90 91 MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information"); 92 93 SDT_PROVIDER_DEFINE(vfs); 94 SDT_PROBE_DEFINE2(vfs, , stat, mode, "char *", "int"); 95 SDT_PROBE_DEFINE2(vfs, , stat, reg, "char *", "int"); 96 97 static int kern_chflagsat(struct thread *td, int fd, const char *path, 98 enum uio_seg pathseg, u_long flags, int atflag); 99 static int setfflags(struct thread *td, struct vnode *, u_long); 100 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *); 101 static int getutimens(const struct timespec *, enum uio_seg, 102 struct timespec *, int *); 103 static int setutimes(struct thread *td, struct vnode *, 104 const struct timespec *, int, int); 105 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 106 struct thread *td); 107 108 /* 109 * Sync each mounted filesystem. 110 */ 111 #ifndef _SYS_SYSPROTO_H_ 112 struct sync_args { 113 int dummy; 114 }; 115 #endif 116 /* ARGSUSED */ 117 int 118 sys_sync(struct thread *td, struct sync_args *uap) 119 { 120 struct mount *mp, *nmp; 121 int save; 122 123 mtx_lock(&mountlist_mtx); 124 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 125 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 126 nmp = TAILQ_NEXT(mp, mnt_list); 127 continue; 128 } 129 if ((mp->mnt_flag & MNT_RDONLY) == 0 && 130 vn_start_write(NULL, &mp, V_NOWAIT) == 0) { 131 save = curthread_pflags_set(TDP_SYNCIO); 132 vfs_msync(mp, MNT_NOWAIT); 133 VFS_SYNC(mp, MNT_NOWAIT); 134 curthread_pflags_restore(save); 135 vn_finished_write(mp); 136 } 137 mtx_lock(&mountlist_mtx); 138 nmp = TAILQ_NEXT(mp, mnt_list); 139 vfs_unbusy(mp); 140 } 141 mtx_unlock(&mountlist_mtx); 142 return (0); 143 } 144 145 /* 146 * Change filesystem quotas. 147 */ 148 #ifndef _SYS_SYSPROTO_H_ 149 struct quotactl_args { 150 char *path; 151 int cmd; 152 int uid; 153 caddr_t arg; 154 }; 155 #endif 156 int 157 sys_quotactl(struct thread *td, struct quotactl_args *uap) 158 { 159 struct mount *mp; 160 struct nameidata nd; 161 int error; 162 163 AUDIT_ARG_CMD(uap->cmd); 164 AUDIT_ARG_UID(uap->uid); 165 if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS)) 166 return (EPERM); 167 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 168 uap->path, td); 169 if ((error = namei(&nd)) != 0) 170 return (error); 171 NDFREE(&nd, NDF_ONLY_PNBUF); 172 mp = nd.ni_vp->v_mount; 173 vfs_ref(mp); 174 vput(nd.ni_vp); 175 error = vfs_busy(mp, 0); 176 vfs_rel(mp); 177 if (error != 0) 178 return (error); 179 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg); 180 181 /* 182 * Since quota on operation typically needs to open quota 183 * file, the Q_QUOTAON handler needs to unbusy the mount point 184 * before calling into namei. Otherwise, unmount might be 185 * started between two vfs_busy() invocations (first is our, 186 * second is from mount point cross-walk code in lookup()), 187 * causing deadlock. 188 * 189 * Require that Q_QUOTAON handles the vfs_busy() reference on 190 * its own, always returning with ubusied mount point. 191 */ 192 if ((uap->cmd >> SUBCMDSHIFT) != Q_QUOTAON) 193 vfs_unbusy(mp); 194 return (error); 195 } 196 197 /* 198 * Used by statfs conversion routines to scale the block size up if 199 * necessary so that all of the block counts are <= 'max_size'. Note 200 * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero 201 * value of 'n'. 202 */ 203 void 204 statfs_scale_blocks(struct statfs *sf, long max_size) 205 { 206 uint64_t count; 207 int shift; 208 209 KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__)); 210 211 /* 212 * Attempt to scale the block counts to give a more accurate 213 * overview to userland of the ratio of free space to used 214 * space. To do this, find the largest block count and compute 215 * a divisor that lets it fit into a signed integer <= max_size. 216 */ 217 if (sf->f_bavail < 0) 218 count = -sf->f_bavail; 219 else 220 count = sf->f_bavail; 221 count = MAX(sf->f_blocks, MAX(sf->f_bfree, count)); 222 if (count <= max_size) 223 return; 224 225 count >>= flsl(max_size); 226 shift = 0; 227 while (count > 0) { 228 shift++; 229 count >>=1; 230 } 231 232 sf->f_bsize <<= shift; 233 sf->f_blocks >>= shift; 234 sf->f_bfree >>= shift; 235 sf->f_bavail >>= shift; 236 } 237 238 static int 239 kern_do_statfs(struct thread *td, struct mount *mp, struct statfs *buf) 240 { 241 struct statfs *sp; 242 int error; 243 244 if (mp == NULL) 245 return (EBADF); 246 error = vfs_busy(mp, 0); 247 vfs_rel(mp); 248 if (error != 0) 249 return (error); 250 #ifdef MAC 251 error = mac_mount_check_stat(td->td_ucred, mp); 252 if (error != 0) 253 goto out; 254 #endif 255 /* 256 * Set these in case the underlying filesystem fails to do so. 257 */ 258 sp = &mp->mnt_stat; 259 sp->f_version = STATFS_VERSION; 260 sp->f_namemax = NAME_MAX; 261 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 262 error = VFS_STATFS(mp, sp); 263 if (error != 0) 264 goto out; 265 *buf = *sp; 266 if (priv_check(td, PRIV_VFS_GENERATION)) { 267 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 268 prison_enforce_statfs(td->td_ucred, mp, buf); 269 } 270 out: 271 vfs_unbusy(mp); 272 return (error); 273 } 274 275 /* 276 * Get filesystem statistics. 277 */ 278 #ifndef _SYS_SYSPROTO_H_ 279 struct statfs_args { 280 char *path; 281 struct statfs *buf; 282 }; 283 #endif 284 int 285 sys_statfs(struct thread *td, struct statfs_args *uap) 286 { 287 struct statfs *sfp; 288 int error; 289 290 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 291 error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); 292 if (error == 0) 293 error = copyout(sfp, uap->buf, sizeof(struct statfs)); 294 free(sfp, M_STATFS); 295 return (error); 296 } 297 298 int 299 kern_statfs(struct thread *td, char *path, enum uio_seg pathseg, 300 struct statfs *buf) 301 { 302 struct mount *mp; 303 struct nameidata nd; 304 int error; 305 306 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 307 pathseg, path, td); 308 error = namei(&nd); 309 if (error != 0) 310 return (error); 311 mp = nd.ni_vp->v_mount; 312 vfs_ref(mp); 313 NDFREE(&nd, NDF_ONLY_PNBUF); 314 vput(nd.ni_vp); 315 return (kern_do_statfs(td, mp, buf)); 316 } 317 318 /* 319 * Get filesystem statistics. 320 */ 321 #ifndef _SYS_SYSPROTO_H_ 322 struct fstatfs_args { 323 int fd; 324 struct statfs *buf; 325 }; 326 #endif 327 int 328 sys_fstatfs(struct thread *td, struct fstatfs_args *uap) 329 { 330 struct statfs *sfp; 331 int error; 332 333 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 334 error = kern_fstatfs(td, uap->fd, sfp); 335 if (error == 0) 336 error = copyout(sfp, uap->buf, sizeof(struct statfs)); 337 free(sfp, M_STATFS); 338 return (error); 339 } 340 341 int 342 kern_fstatfs(struct thread *td, int fd, struct statfs *buf) 343 { 344 struct file *fp; 345 struct mount *mp; 346 struct vnode *vp; 347 cap_rights_t rights; 348 int error; 349 350 AUDIT_ARG_FD(fd); 351 error = getvnode(td, fd, cap_rights_init(&rights, CAP_FSTATFS), &fp); 352 if (error != 0) 353 return (error); 354 vp = fp->f_vnode; 355 vn_lock(vp, LK_SHARED | LK_RETRY); 356 #ifdef AUDIT 357 AUDIT_ARG_VNODE1(vp); 358 #endif 359 mp = vp->v_mount; 360 if (mp != NULL) 361 vfs_ref(mp); 362 VOP_UNLOCK(vp, 0); 363 fdrop(fp, td); 364 return (kern_do_statfs(td, mp, buf)); 365 } 366 367 /* 368 * Get statistics on all filesystems. 369 */ 370 #ifndef _SYS_SYSPROTO_H_ 371 struct getfsstat_args { 372 struct statfs *buf; 373 long bufsize; 374 int mode; 375 }; 376 #endif 377 int 378 sys_getfsstat(struct thread *td, struct getfsstat_args *uap) 379 { 380 size_t count; 381 int error; 382 383 if (uap->bufsize < 0 || uap->bufsize > SIZE_MAX) 384 return (EINVAL); 385 error = kern_getfsstat(td, &uap->buf, uap->bufsize, &count, 386 UIO_USERSPACE, uap->mode); 387 if (error == 0) 388 td->td_retval[0] = count; 389 return (error); 390 } 391 392 /* 393 * If (bufsize > 0 && bufseg == UIO_SYSSPACE) 394 * The caller is responsible for freeing memory which will be allocated 395 * in '*buf'. 396 */ 397 int 398 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize, 399 size_t *countp, enum uio_seg bufseg, int mode) 400 { 401 struct mount *mp, *nmp; 402 struct statfs *sfsp, *sp, *sptmp, *tofree; 403 size_t count, maxcount; 404 int error; 405 406 switch (mode) { 407 case MNT_WAIT: 408 case MNT_NOWAIT: 409 break; 410 default: 411 if (bufseg == UIO_SYSSPACE) 412 *buf = NULL; 413 return (EINVAL); 414 } 415 restart: 416 maxcount = bufsize / sizeof(struct statfs); 417 if (bufsize == 0) { 418 sfsp = NULL; 419 tofree = NULL; 420 } else if (bufseg == UIO_USERSPACE) { 421 sfsp = *buf; 422 tofree = NULL; 423 } else /* if (bufseg == UIO_SYSSPACE) */ { 424 count = 0; 425 mtx_lock(&mountlist_mtx); 426 TAILQ_FOREACH(mp, &mountlist, mnt_list) { 427 count++; 428 } 429 mtx_unlock(&mountlist_mtx); 430 if (maxcount > count) 431 maxcount = count; 432 tofree = sfsp = *buf = malloc(maxcount * sizeof(struct statfs), 433 M_STATFS, M_WAITOK); 434 } 435 count = 0; 436 mtx_lock(&mountlist_mtx); 437 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 438 if (prison_canseemount(td->td_ucred, mp) != 0) { 439 nmp = TAILQ_NEXT(mp, mnt_list); 440 continue; 441 } 442 #ifdef MAC 443 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 444 nmp = TAILQ_NEXT(mp, mnt_list); 445 continue; 446 } 447 #endif 448 if (mode == MNT_WAIT) { 449 if (vfs_busy(mp, MBF_MNTLSTLOCK) != 0) { 450 /* 451 * If vfs_busy() failed, and MBF_NOWAIT 452 * wasn't passed, then the mp is gone. 453 * Furthermore, because of MBF_MNTLSTLOCK, 454 * the mountlist_mtx was dropped. We have 455 * no other choice than to start over. 456 */ 457 mtx_unlock(&mountlist_mtx); 458 free(tofree, M_STATFS); 459 goto restart; 460 } 461 } else { 462 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK) != 0) { 463 nmp = TAILQ_NEXT(mp, mnt_list); 464 continue; 465 } 466 } 467 if (sfsp != NULL && count < maxcount) { 468 sp = &mp->mnt_stat; 469 /* 470 * Set these in case the underlying filesystem 471 * fails to do so. 472 */ 473 sp->f_version = STATFS_VERSION; 474 sp->f_namemax = NAME_MAX; 475 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 476 /* 477 * If MNT_NOWAIT is specified, do not refresh 478 * the fsstat cache. 479 */ 480 if (mode != MNT_NOWAIT) { 481 error = VFS_STATFS(mp, sp); 482 if (error != 0) { 483 mtx_lock(&mountlist_mtx); 484 nmp = TAILQ_NEXT(mp, mnt_list); 485 vfs_unbusy(mp); 486 continue; 487 } 488 } 489 if (priv_check(td, PRIV_VFS_GENERATION)) { 490 sptmp = malloc(sizeof(struct statfs), M_STATFS, 491 M_WAITOK); 492 *sptmp = *sp; 493 sptmp->f_fsid.val[0] = sptmp->f_fsid.val[1] = 0; 494 prison_enforce_statfs(td->td_ucred, mp, sptmp); 495 sp = sptmp; 496 } else 497 sptmp = NULL; 498 if (bufseg == UIO_SYSSPACE) { 499 bcopy(sp, sfsp, sizeof(*sp)); 500 free(sptmp, M_STATFS); 501 } else /* if (bufseg == UIO_USERSPACE) */ { 502 error = copyout(sp, sfsp, sizeof(*sp)); 503 free(sptmp, M_STATFS); 504 if (error != 0) { 505 vfs_unbusy(mp); 506 return (error); 507 } 508 } 509 sfsp++; 510 } 511 count++; 512 mtx_lock(&mountlist_mtx); 513 nmp = TAILQ_NEXT(mp, mnt_list); 514 vfs_unbusy(mp); 515 } 516 mtx_unlock(&mountlist_mtx); 517 if (sfsp != NULL && count > maxcount) 518 *countp = maxcount; 519 else 520 *countp = count; 521 return (0); 522 } 523 524 #ifdef COMPAT_FREEBSD4 525 /* 526 * Get old format filesystem statistics. 527 */ 528 static void cvtstatfs(struct statfs *, struct ostatfs *); 529 530 #ifndef _SYS_SYSPROTO_H_ 531 struct freebsd4_statfs_args { 532 char *path; 533 struct ostatfs *buf; 534 }; 535 #endif 536 int 537 freebsd4_statfs(struct thread *td, struct freebsd4_statfs_args *uap) 538 { 539 struct ostatfs osb; 540 struct statfs *sfp; 541 int error; 542 543 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 544 error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); 545 if (error == 0) { 546 cvtstatfs(sfp, &osb); 547 error = copyout(&osb, uap->buf, sizeof(osb)); 548 } 549 free(sfp, M_STATFS); 550 return (error); 551 } 552 553 /* 554 * Get filesystem statistics. 555 */ 556 #ifndef _SYS_SYSPROTO_H_ 557 struct freebsd4_fstatfs_args { 558 int fd; 559 struct ostatfs *buf; 560 }; 561 #endif 562 int 563 freebsd4_fstatfs(struct thread *td, struct freebsd4_fstatfs_args *uap) 564 { 565 struct ostatfs osb; 566 struct statfs *sfp; 567 int error; 568 569 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 570 error = kern_fstatfs(td, uap->fd, sfp); 571 if (error == 0) { 572 cvtstatfs(sfp, &osb); 573 error = copyout(&osb, uap->buf, sizeof(osb)); 574 } 575 free(sfp, M_STATFS); 576 return (error); 577 } 578 579 /* 580 * Get statistics on all filesystems. 581 */ 582 #ifndef _SYS_SYSPROTO_H_ 583 struct freebsd4_getfsstat_args { 584 struct ostatfs *buf; 585 long bufsize; 586 int mode; 587 }; 588 #endif 589 int 590 freebsd4_getfsstat(struct thread *td, struct freebsd4_getfsstat_args *uap) 591 { 592 struct statfs *buf, *sp; 593 struct ostatfs osb; 594 size_t count, size; 595 int error; 596 597 if (uap->bufsize < 0) 598 return (EINVAL); 599 count = uap->bufsize / sizeof(struct ostatfs); 600 if (count > SIZE_MAX / sizeof(struct statfs)) 601 return (EINVAL); 602 size = count * sizeof(struct statfs); 603 error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, 604 uap->mode); 605 td->td_retval[0] = count; 606 if (size != 0) { 607 sp = buf; 608 while (count != 0 && error == 0) { 609 cvtstatfs(sp, &osb); 610 error = copyout(&osb, uap->buf, sizeof(osb)); 611 sp++; 612 uap->buf++; 613 count--; 614 } 615 free(buf, M_STATFS); 616 } 617 return (error); 618 } 619 620 /* 621 * Implement fstatfs() for (NFS) file handles. 622 */ 623 #ifndef _SYS_SYSPROTO_H_ 624 struct freebsd4_fhstatfs_args { 625 struct fhandle *u_fhp; 626 struct ostatfs *buf; 627 }; 628 #endif 629 int 630 freebsd4_fhstatfs(struct thread *td, struct freebsd4_fhstatfs_args *uap) 631 { 632 struct ostatfs osb; 633 struct statfs *sfp; 634 fhandle_t fh; 635 int error; 636 637 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 638 if (error != 0) 639 return (error); 640 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 641 error = kern_fhstatfs(td, fh, sfp); 642 if (error == 0) { 643 cvtstatfs(sfp, &osb); 644 error = copyout(&osb, uap->buf, sizeof(osb)); 645 } 646 free(sfp, M_STATFS); 647 return (error); 648 } 649 650 /* 651 * Convert a new format statfs structure to an old format statfs structure. 652 */ 653 static void 654 cvtstatfs(struct statfs *nsp, struct ostatfs *osp) 655 { 656 657 statfs_scale_blocks(nsp, LONG_MAX); 658 bzero(osp, sizeof(*osp)); 659 osp->f_bsize = nsp->f_bsize; 660 osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX); 661 osp->f_blocks = nsp->f_blocks; 662 osp->f_bfree = nsp->f_bfree; 663 osp->f_bavail = nsp->f_bavail; 664 osp->f_files = MIN(nsp->f_files, LONG_MAX); 665 osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX); 666 osp->f_owner = nsp->f_owner; 667 osp->f_type = nsp->f_type; 668 osp->f_flags = nsp->f_flags; 669 osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX); 670 osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX); 671 osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX); 672 osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX); 673 strlcpy(osp->f_fstypename, nsp->f_fstypename, 674 MIN(MFSNAMELEN, OMFSNAMELEN)); 675 strlcpy(osp->f_mntonname, nsp->f_mntonname, 676 MIN(MNAMELEN, OMNAMELEN)); 677 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 678 MIN(MNAMELEN, OMNAMELEN)); 679 osp->f_fsid = nsp->f_fsid; 680 } 681 #endif /* COMPAT_FREEBSD4 */ 682 683 /* 684 * Change current working directory to a given file descriptor. 685 */ 686 #ifndef _SYS_SYSPROTO_H_ 687 struct fchdir_args { 688 int fd; 689 }; 690 #endif 691 int 692 sys_fchdir(struct thread *td, struct fchdir_args *uap) 693 { 694 struct vnode *vp, *tdp; 695 struct mount *mp; 696 struct file *fp; 697 cap_rights_t rights; 698 int error; 699 700 AUDIT_ARG_FD(uap->fd); 701 error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_FCHDIR), 702 &fp); 703 if (error != 0) 704 return (error); 705 vp = fp->f_vnode; 706 vrefact(vp); 707 fdrop(fp, td); 708 vn_lock(vp, LK_SHARED | LK_RETRY); 709 AUDIT_ARG_VNODE1(vp); 710 error = change_dir(vp, td); 711 while (!error && (mp = vp->v_mountedhere) != NULL) { 712 if (vfs_busy(mp, 0)) 713 continue; 714 error = VFS_ROOT(mp, LK_SHARED, &tdp); 715 vfs_unbusy(mp); 716 if (error != 0) 717 break; 718 vput(vp); 719 vp = tdp; 720 } 721 if (error != 0) { 722 vput(vp); 723 return (error); 724 } 725 VOP_UNLOCK(vp, 0); 726 pwd_chdir(td, vp); 727 return (0); 728 } 729 730 /* 731 * Change current working directory (``.''). 732 */ 733 #ifndef _SYS_SYSPROTO_H_ 734 struct chdir_args { 735 char *path; 736 }; 737 #endif 738 int 739 sys_chdir(struct thread *td, struct chdir_args *uap) 740 { 741 742 return (kern_chdir(td, uap->path, UIO_USERSPACE)); 743 } 744 745 int 746 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg) 747 { 748 struct nameidata nd; 749 int error; 750 751 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 752 pathseg, path, td); 753 if ((error = namei(&nd)) != 0) 754 return (error); 755 if ((error = change_dir(nd.ni_vp, td)) != 0) { 756 vput(nd.ni_vp); 757 NDFREE(&nd, NDF_ONLY_PNBUF); 758 return (error); 759 } 760 VOP_UNLOCK(nd.ni_vp, 0); 761 NDFREE(&nd, NDF_ONLY_PNBUF); 762 pwd_chdir(td, nd.ni_vp); 763 return (0); 764 } 765 766 /* 767 * Change notion of root (``/'') directory. 768 */ 769 #ifndef _SYS_SYSPROTO_H_ 770 struct chroot_args { 771 char *path; 772 }; 773 #endif 774 int 775 sys_chroot(struct thread *td, struct chroot_args *uap) 776 { 777 struct nameidata nd; 778 int error; 779 780 error = priv_check(td, PRIV_VFS_CHROOT); 781 if (error != 0) 782 return (error); 783 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 784 UIO_USERSPACE, uap->path, td); 785 error = namei(&nd); 786 if (error != 0) 787 goto error; 788 error = change_dir(nd.ni_vp, td); 789 if (error != 0) 790 goto e_vunlock; 791 #ifdef MAC 792 error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp); 793 if (error != 0) 794 goto e_vunlock; 795 #endif 796 VOP_UNLOCK(nd.ni_vp, 0); 797 error = pwd_chroot(td, nd.ni_vp); 798 vrele(nd.ni_vp); 799 NDFREE(&nd, NDF_ONLY_PNBUF); 800 return (error); 801 e_vunlock: 802 vput(nd.ni_vp); 803 error: 804 NDFREE(&nd, NDF_ONLY_PNBUF); 805 return (error); 806 } 807 808 /* 809 * Common routine for chroot and chdir. Callers must provide a locked vnode 810 * instance. 811 */ 812 int 813 change_dir(struct vnode *vp, struct thread *td) 814 { 815 #ifdef MAC 816 int error; 817 #endif 818 819 ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked"); 820 if (vp->v_type != VDIR) 821 return (ENOTDIR); 822 #ifdef MAC 823 error = mac_vnode_check_chdir(td->td_ucred, vp); 824 if (error != 0) 825 return (error); 826 #endif 827 return (VOP_ACCESS(vp, VEXEC, td->td_ucred, td)); 828 } 829 830 static __inline void 831 flags_to_rights(int flags, cap_rights_t *rightsp) 832 { 833 834 if (flags & O_EXEC) { 835 cap_rights_set(rightsp, CAP_FEXECVE); 836 } else { 837 switch ((flags & O_ACCMODE)) { 838 case O_RDONLY: 839 cap_rights_set(rightsp, CAP_READ); 840 break; 841 case O_RDWR: 842 cap_rights_set(rightsp, CAP_READ); 843 /* FALLTHROUGH */ 844 case O_WRONLY: 845 cap_rights_set(rightsp, CAP_WRITE); 846 if (!(flags & (O_APPEND | O_TRUNC))) 847 cap_rights_set(rightsp, CAP_SEEK); 848 break; 849 } 850 } 851 852 if (flags & O_CREAT) 853 cap_rights_set(rightsp, CAP_CREATE); 854 855 if (flags & O_TRUNC) 856 cap_rights_set(rightsp, CAP_FTRUNCATE); 857 858 if (flags & (O_SYNC | O_FSYNC)) 859 cap_rights_set(rightsp, CAP_FSYNC); 860 861 if (flags & (O_EXLOCK | O_SHLOCK)) 862 cap_rights_set(rightsp, CAP_FLOCK); 863 } 864 865 /* 866 * Check permissions, allocate an open file structure, and call the device 867 * open routine if any. 868 */ 869 #ifndef _SYS_SYSPROTO_H_ 870 struct open_args { 871 char *path; 872 int flags; 873 int mode; 874 }; 875 #endif 876 int 877 sys_open(struct thread *td, struct open_args *uap) 878 { 879 880 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 881 uap->flags, uap->mode)); 882 } 883 884 #ifndef _SYS_SYSPROTO_H_ 885 struct openat_args { 886 int fd; 887 char *path; 888 int flag; 889 int mode; 890 }; 891 #endif 892 int 893 sys_openat(struct thread *td, struct openat_args *uap) 894 { 895 896 AUDIT_ARG_FD(uap->fd); 897 return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 898 uap->mode)); 899 } 900 901 int 902 kern_openat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 903 int flags, int mode) 904 { 905 struct proc *p = td->td_proc; 906 struct filedesc *fdp = p->p_fd; 907 struct file *fp; 908 struct vnode *vp; 909 struct nameidata nd; 910 cap_rights_t rights; 911 int cmode, error, indx; 912 913 indx = -1; 914 915 AUDIT_ARG_FFLAGS(flags); 916 AUDIT_ARG_MODE(mode); 917 cap_rights_init(&rights, CAP_LOOKUP); 918 flags_to_rights(flags, &rights); 919 /* 920 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags 921 * may be specified. 922 */ 923 if (flags & O_EXEC) { 924 if (flags & O_ACCMODE) 925 return (EINVAL); 926 } else if ((flags & O_ACCMODE) == O_ACCMODE) { 927 return (EINVAL); 928 } else { 929 flags = FFLAGS(flags); 930 } 931 932 /* 933 * Allocate a file structure. The descriptor to reference it 934 * is allocated and set by finstall() below. 935 */ 936 error = falloc_noinstall(td, &fp); 937 if (error != 0) 938 return (error); 939 /* 940 * An extra reference on `fp' has been held for us by 941 * falloc_noinstall(). 942 */ 943 /* Set the flags early so the finit in devfs can pick them up. */ 944 fp->f_flag = flags & FMASK; 945 cmode = ((mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; 946 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 947 &rights, td); 948 td->td_dupfd = -1; /* XXX check for fdopen */ 949 error = vn_open(&nd, &flags, cmode, fp); 950 if (error != 0) { 951 /* 952 * If the vn_open replaced the method vector, something 953 * wonderous happened deep below and we just pass it up 954 * pretending we know what we do. 955 */ 956 if (error == ENXIO && fp->f_ops != &badfileops) 957 goto success; 958 959 /* 960 * Handle special fdopen() case. bleh. 961 * 962 * Don't do this for relative (capability) lookups; we don't 963 * understand exactly what would happen, and we don't think 964 * that it ever should. 965 */ 966 if ((nd.ni_lcf & NI_LCF_STRICTRELATIVE) == 0 && 967 (error == ENODEV || error == ENXIO) && 968 td->td_dupfd >= 0) { 969 error = dupfdopen(td, fdp, td->td_dupfd, flags, error, 970 &indx); 971 if (error == 0) 972 goto success; 973 } 974 975 goto bad; 976 } 977 td->td_dupfd = 0; 978 NDFREE(&nd, NDF_ONLY_PNBUF); 979 vp = nd.ni_vp; 980 981 /* 982 * Store the vnode, for any f_type. Typically, the vnode use 983 * count is decremented by direct call to vn_closefile() for 984 * files that switched type in the cdevsw fdopen() method. 985 */ 986 fp->f_vnode = vp; 987 /* 988 * If the file wasn't claimed by devfs bind it to the normal 989 * vnode operations here. 990 */ 991 if (fp->f_ops == &badfileops) { 992 KASSERT(vp->v_type != VFIFO, ("Unexpected fifo.")); 993 fp->f_seqcount = 1; 994 finit(fp, (flags & FMASK) | (fp->f_flag & FHASLOCK), 995 DTYPE_VNODE, vp, &vnops); 996 } 997 998 VOP_UNLOCK(vp, 0); 999 if (flags & O_TRUNC) { 1000 error = fo_truncate(fp, 0, td->td_ucred, td); 1001 if (error != 0) 1002 goto bad; 1003 } 1004 success: 1005 /* 1006 * If we haven't already installed the FD (for dupfdopen), do so now. 1007 */ 1008 if (indx == -1) { 1009 struct filecaps *fcaps; 1010 1011 #ifdef CAPABILITIES 1012 if ((nd.ni_lcf & NI_LCF_STRICTRELATIVE) != 0) 1013 fcaps = &nd.ni_filecaps; 1014 else 1015 #endif 1016 fcaps = NULL; 1017 error = finstall(td, fp, &indx, flags, fcaps); 1018 /* On success finstall() consumes fcaps. */ 1019 if (error != 0) { 1020 filecaps_free(&nd.ni_filecaps); 1021 goto bad; 1022 } 1023 } else { 1024 filecaps_free(&nd.ni_filecaps); 1025 } 1026 1027 /* 1028 * Release our private reference, leaving the one associated with 1029 * the descriptor table intact. 1030 */ 1031 fdrop(fp, td); 1032 td->td_retval[0] = indx; 1033 return (0); 1034 bad: 1035 KASSERT(indx == -1, ("indx=%d, should be -1", indx)); 1036 fdrop(fp, td); 1037 return (error); 1038 } 1039 1040 #ifdef COMPAT_43 1041 /* 1042 * Create a file. 1043 */ 1044 #ifndef _SYS_SYSPROTO_H_ 1045 struct ocreat_args { 1046 char *path; 1047 int mode; 1048 }; 1049 #endif 1050 int 1051 ocreat(struct thread *td, struct ocreat_args *uap) 1052 { 1053 1054 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1055 O_WRONLY | O_CREAT | O_TRUNC, uap->mode)); 1056 } 1057 #endif /* COMPAT_43 */ 1058 1059 /* 1060 * Create a special file. 1061 */ 1062 #ifndef _SYS_SYSPROTO_H_ 1063 struct mknod_args { 1064 char *path; 1065 int mode; 1066 int dev; 1067 }; 1068 #endif 1069 int 1070 sys_mknod(struct thread *td, struct mknod_args *uap) 1071 { 1072 1073 return (kern_mknodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1074 uap->mode, uap->dev)); 1075 } 1076 1077 #ifndef _SYS_SYSPROTO_H_ 1078 struct mknodat_args { 1079 int fd; 1080 char *path; 1081 mode_t mode; 1082 dev_t dev; 1083 }; 1084 #endif 1085 int 1086 sys_mknodat(struct thread *td, struct mknodat_args *uap) 1087 { 1088 1089 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1090 uap->dev)); 1091 } 1092 1093 int 1094 kern_mknodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1095 int mode, int dev) 1096 { 1097 struct vnode *vp; 1098 struct mount *mp; 1099 struct vattr vattr; 1100 struct nameidata nd; 1101 cap_rights_t rights; 1102 int error, whiteout = 0; 1103 1104 AUDIT_ARG_MODE(mode); 1105 AUDIT_ARG_DEV(dev); 1106 switch (mode & S_IFMT) { 1107 case S_IFCHR: 1108 case S_IFBLK: 1109 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 1110 if (error == 0 && dev == VNOVAL) 1111 error = EINVAL; 1112 break; 1113 case S_IFMT: 1114 error = priv_check(td, PRIV_VFS_MKNOD_BAD); 1115 break; 1116 case S_IFWHT: 1117 error = priv_check(td, PRIV_VFS_MKNOD_WHT); 1118 break; 1119 case S_IFIFO: 1120 if (dev == 0) 1121 return (kern_mkfifoat(td, fd, path, pathseg, mode)); 1122 /* FALLTHROUGH */ 1123 default: 1124 error = EINVAL; 1125 break; 1126 } 1127 if (error != 0) 1128 return (error); 1129 restart: 1130 bwillwrite(); 1131 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1132 NOCACHE, pathseg, path, fd, cap_rights_init(&rights, CAP_MKNODAT), 1133 td); 1134 if ((error = namei(&nd)) != 0) 1135 return (error); 1136 vp = nd.ni_vp; 1137 if (vp != NULL) { 1138 NDFREE(&nd, NDF_ONLY_PNBUF); 1139 if (vp == nd.ni_dvp) 1140 vrele(nd.ni_dvp); 1141 else 1142 vput(nd.ni_dvp); 1143 vrele(vp); 1144 return (EEXIST); 1145 } else { 1146 VATTR_NULL(&vattr); 1147 vattr.va_mode = (mode & ALLPERMS) & 1148 ~td->td_proc->p_fd->fd_cmask; 1149 vattr.va_rdev = dev; 1150 whiteout = 0; 1151 1152 switch (mode & S_IFMT) { 1153 case S_IFMT: /* used by badsect to flag bad sectors */ 1154 vattr.va_type = VBAD; 1155 break; 1156 case S_IFCHR: 1157 vattr.va_type = VCHR; 1158 break; 1159 case S_IFBLK: 1160 vattr.va_type = VBLK; 1161 break; 1162 case S_IFWHT: 1163 whiteout = 1; 1164 break; 1165 default: 1166 panic("kern_mknod: invalid mode"); 1167 } 1168 } 1169 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1170 NDFREE(&nd, NDF_ONLY_PNBUF); 1171 vput(nd.ni_dvp); 1172 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1173 return (error); 1174 goto restart; 1175 } 1176 #ifdef MAC 1177 if (error == 0 && !whiteout) 1178 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, 1179 &nd.ni_cnd, &vattr); 1180 #endif 1181 if (error == 0) { 1182 if (whiteout) 1183 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1184 else { 1185 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1186 &nd.ni_cnd, &vattr); 1187 if (error == 0) 1188 vput(nd.ni_vp); 1189 } 1190 } 1191 NDFREE(&nd, NDF_ONLY_PNBUF); 1192 vput(nd.ni_dvp); 1193 vn_finished_write(mp); 1194 return (error); 1195 } 1196 1197 /* 1198 * Create a named pipe. 1199 */ 1200 #ifndef _SYS_SYSPROTO_H_ 1201 struct mkfifo_args { 1202 char *path; 1203 int mode; 1204 }; 1205 #endif 1206 int 1207 sys_mkfifo(struct thread *td, struct mkfifo_args *uap) 1208 { 1209 1210 return (kern_mkfifoat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1211 uap->mode)); 1212 } 1213 1214 #ifndef _SYS_SYSPROTO_H_ 1215 struct mkfifoat_args { 1216 int fd; 1217 char *path; 1218 mode_t mode; 1219 }; 1220 #endif 1221 int 1222 sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap) 1223 { 1224 1225 return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE, 1226 uap->mode)); 1227 } 1228 1229 int 1230 kern_mkfifoat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1231 int mode) 1232 { 1233 struct mount *mp; 1234 struct vattr vattr; 1235 struct nameidata nd; 1236 cap_rights_t rights; 1237 int error; 1238 1239 AUDIT_ARG_MODE(mode); 1240 restart: 1241 bwillwrite(); 1242 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1243 NOCACHE, pathseg, path, fd, cap_rights_init(&rights, CAP_MKFIFOAT), 1244 td); 1245 if ((error = namei(&nd)) != 0) 1246 return (error); 1247 if (nd.ni_vp != NULL) { 1248 NDFREE(&nd, NDF_ONLY_PNBUF); 1249 if (nd.ni_vp == nd.ni_dvp) 1250 vrele(nd.ni_dvp); 1251 else 1252 vput(nd.ni_dvp); 1253 vrele(nd.ni_vp); 1254 return (EEXIST); 1255 } 1256 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1257 NDFREE(&nd, NDF_ONLY_PNBUF); 1258 vput(nd.ni_dvp); 1259 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1260 return (error); 1261 goto restart; 1262 } 1263 VATTR_NULL(&vattr); 1264 vattr.va_type = VFIFO; 1265 vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask; 1266 #ifdef MAC 1267 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1268 &vattr); 1269 if (error != 0) 1270 goto out; 1271 #endif 1272 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 1273 if (error == 0) 1274 vput(nd.ni_vp); 1275 #ifdef MAC 1276 out: 1277 #endif 1278 vput(nd.ni_dvp); 1279 vn_finished_write(mp); 1280 NDFREE(&nd, NDF_ONLY_PNBUF); 1281 return (error); 1282 } 1283 1284 /* 1285 * Make a hard file link. 1286 */ 1287 #ifndef _SYS_SYSPROTO_H_ 1288 struct link_args { 1289 char *path; 1290 char *link; 1291 }; 1292 #endif 1293 int 1294 sys_link(struct thread *td, struct link_args *uap) 1295 { 1296 1297 return (kern_linkat(td, AT_FDCWD, AT_FDCWD, uap->path, uap->link, 1298 UIO_USERSPACE, FOLLOW)); 1299 } 1300 1301 #ifndef _SYS_SYSPROTO_H_ 1302 struct linkat_args { 1303 int fd1; 1304 char *path1; 1305 int fd2; 1306 char *path2; 1307 int flag; 1308 }; 1309 #endif 1310 int 1311 sys_linkat(struct thread *td, struct linkat_args *uap) 1312 { 1313 int flag; 1314 1315 flag = uap->flag; 1316 if (flag & ~AT_SYMLINK_FOLLOW) 1317 return (EINVAL); 1318 1319 return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2, 1320 UIO_USERSPACE, (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW)); 1321 } 1322 1323 int hardlink_check_uid = 0; 1324 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 1325 &hardlink_check_uid, 0, 1326 "Unprivileged processes cannot create hard links to files owned by other " 1327 "users"); 1328 static int hardlink_check_gid = 0; 1329 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 1330 &hardlink_check_gid, 0, 1331 "Unprivileged processes cannot create hard links to files owned by other " 1332 "groups"); 1333 1334 static int 1335 can_hardlink(struct vnode *vp, struct ucred *cred) 1336 { 1337 struct vattr va; 1338 int error; 1339 1340 if (!hardlink_check_uid && !hardlink_check_gid) 1341 return (0); 1342 1343 error = VOP_GETATTR(vp, &va, cred); 1344 if (error != 0) 1345 return (error); 1346 1347 if (hardlink_check_uid && cred->cr_uid != va.va_uid) { 1348 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1349 if (error != 0) 1350 return (error); 1351 } 1352 1353 if (hardlink_check_gid && !groupmember(va.va_gid, cred)) { 1354 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1355 if (error != 0) 1356 return (error); 1357 } 1358 1359 return (0); 1360 } 1361 1362 int 1363 kern_linkat(struct thread *td, int fd1, int fd2, char *path1, char *path2, 1364 enum uio_seg segflg, int follow) 1365 { 1366 struct vnode *vp; 1367 struct mount *mp; 1368 struct nameidata nd; 1369 cap_rights_t rights; 1370 int error; 1371 1372 again: 1373 bwillwrite(); 1374 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, segflg, path1, fd1, 1375 cap_rights_init(&rights, CAP_LINKAT_SOURCE), td); 1376 1377 if ((error = namei(&nd)) != 0) 1378 return (error); 1379 NDFREE(&nd, NDF_ONLY_PNBUF); 1380 vp = nd.ni_vp; 1381 if (vp->v_type == VDIR) { 1382 vrele(vp); 1383 return (EPERM); /* POSIX */ 1384 } 1385 NDINIT_ATRIGHTS(&nd, CREATE, 1386 LOCKPARENT | SAVENAME | AUDITVNODE2 | NOCACHE, segflg, path2, fd2, 1387 cap_rights_init(&rights, CAP_LINKAT_TARGET), td); 1388 if ((error = namei(&nd)) == 0) { 1389 if (nd.ni_vp != NULL) { 1390 NDFREE(&nd, NDF_ONLY_PNBUF); 1391 if (nd.ni_dvp == nd.ni_vp) 1392 vrele(nd.ni_dvp); 1393 else 1394 vput(nd.ni_dvp); 1395 vrele(nd.ni_vp); 1396 vrele(vp); 1397 return (EEXIST); 1398 } else if (nd.ni_dvp->v_mount != vp->v_mount) { 1399 /* 1400 * Cross-device link. No need to recheck 1401 * vp->v_type, since it cannot change, except 1402 * to VBAD. 1403 */ 1404 NDFREE(&nd, NDF_ONLY_PNBUF); 1405 vput(nd.ni_dvp); 1406 vrele(vp); 1407 return (EXDEV); 1408 } else if ((error = vn_lock(vp, LK_EXCLUSIVE)) == 0) { 1409 error = can_hardlink(vp, td->td_ucred); 1410 #ifdef MAC 1411 if (error == 0) 1412 error = mac_vnode_check_link(td->td_ucred, 1413 nd.ni_dvp, vp, &nd.ni_cnd); 1414 #endif 1415 if (error != 0) { 1416 vput(vp); 1417 vput(nd.ni_dvp); 1418 NDFREE(&nd, NDF_ONLY_PNBUF); 1419 return (error); 1420 } 1421 error = vn_start_write(vp, &mp, V_NOWAIT); 1422 if (error != 0) { 1423 vput(vp); 1424 vput(nd.ni_dvp); 1425 NDFREE(&nd, NDF_ONLY_PNBUF); 1426 error = vn_start_write(NULL, &mp, 1427 V_XSLEEP | PCATCH); 1428 if (error != 0) 1429 return (error); 1430 goto again; 1431 } 1432 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 1433 VOP_UNLOCK(vp, 0); 1434 vput(nd.ni_dvp); 1435 vn_finished_write(mp); 1436 NDFREE(&nd, NDF_ONLY_PNBUF); 1437 } else { 1438 vput(nd.ni_dvp); 1439 NDFREE(&nd, NDF_ONLY_PNBUF); 1440 vrele(vp); 1441 goto again; 1442 } 1443 } 1444 vrele(vp); 1445 return (error); 1446 } 1447 1448 /* 1449 * Make a symbolic link. 1450 */ 1451 #ifndef _SYS_SYSPROTO_H_ 1452 struct symlink_args { 1453 char *path; 1454 char *link; 1455 }; 1456 #endif 1457 int 1458 sys_symlink(struct thread *td, struct symlink_args *uap) 1459 { 1460 1461 return (kern_symlinkat(td, uap->path, AT_FDCWD, uap->link, 1462 UIO_USERSPACE)); 1463 } 1464 1465 #ifndef _SYS_SYSPROTO_H_ 1466 struct symlinkat_args { 1467 char *path; 1468 int fd; 1469 char *path2; 1470 }; 1471 #endif 1472 int 1473 sys_symlinkat(struct thread *td, struct symlinkat_args *uap) 1474 { 1475 1476 return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2, 1477 UIO_USERSPACE)); 1478 } 1479 1480 int 1481 kern_symlinkat(struct thread *td, char *path1, int fd, char *path2, 1482 enum uio_seg segflg) 1483 { 1484 struct mount *mp; 1485 struct vattr vattr; 1486 char *syspath; 1487 struct nameidata nd; 1488 int error; 1489 cap_rights_t rights; 1490 1491 if (segflg == UIO_SYSSPACE) { 1492 syspath = path1; 1493 } else { 1494 syspath = uma_zalloc(namei_zone, M_WAITOK); 1495 if ((error = copyinstr(path1, syspath, MAXPATHLEN, NULL)) != 0) 1496 goto out; 1497 } 1498 AUDIT_ARG_TEXT(syspath); 1499 restart: 1500 bwillwrite(); 1501 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1502 NOCACHE, segflg, path2, fd, cap_rights_init(&rights, CAP_SYMLINKAT), 1503 td); 1504 if ((error = namei(&nd)) != 0) 1505 goto out; 1506 if (nd.ni_vp) { 1507 NDFREE(&nd, NDF_ONLY_PNBUF); 1508 if (nd.ni_vp == nd.ni_dvp) 1509 vrele(nd.ni_dvp); 1510 else 1511 vput(nd.ni_dvp); 1512 vrele(nd.ni_vp); 1513 error = EEXIST; 1514 goto out; 1515 } 1516 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1517 NDFREE(&nd, NDF_ONLY_PNBUF); 1518 vput(nd.ni_dvp); 1519 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1520 goto out; 1521 goto restart; 1522 } 1523 VATTR_NULL(&vattr); 1524 vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask; 1525 #ifdef MAC 1526 vattr.va_type = VLNK; 1527 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1528 &vattr); 1529 if (error != 0) 1530 goto out2; 1531 #endif 1532 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath); 1533 if (error == 0) 1534 vput(nd.ni_vp); 1535 #ifdef MAC 1536 out2: 1537 #endif 1538 NDFREE(&nd, NDF_ONLY_PNBUF); 1539 vput(nd.ni_dvp); 1540 vn_finished_write(mp); 1541 out: 1542 if (segflg != UIO_SYSSPACE) 1543 uma_zfree(namei_zone, syspath); 1544 return (error); 1545 } 1546 1547 /* 1548 * Delete a whiteout from the filesystem. 1549 */ 1550 #ifndef _SYS_SYSPROTO_H_ 1551 struct undelete_args { 1552 char *path; 1553 }; 1554 #endif 1555 int 1556 sys_undelete(struct thread *td, struct undelete_args *uap) 1557 { 1558 struct mount *mp; 1559 struct nameidata nd; 1560 int error; 1561 1562 restart: 1563 bwillwrite(); 1564 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | AUDITVNODE1, 1565 UIO_USERSPACE, uap->path, td); 1566 error = namei(&nd); 1567 if (error != 0) 1568 return (error); 1569 1570 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 1571 NDFREE(&nd, NDF_ONLY_PNBUF); 1572 if (nd.ni_vp == nd.ni_dvp) 1573 vrele(nd.ni_dvp); 1574 else 1575 vput(nd.ni_dvp); 1576 if (nd.ni_vp) 1577 vrele(nd.ni_vp); 1578 return (EEXIST); 1579 } 1580 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1581 NDFREE(&nd, NDF_ONLY_PNBUF); 1582 vput(nd.ni_dvp); 1583 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1584 return (error); 1585 goto restart; 1586 } 1587 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE); 1588 NDFREE(&nd, NDF_ONLY_PNBUF); 1589 vput(nd.ni_dvp); 1590 vn_finished_write(mp); 1591 return (error); 1592 } 1593 1594 /* 1595 * Delete a name from the filesystem. 1596 */ 1597 #ifndef _SYS_SYSPROTO_H_ 1598 struct unlink_args { 1599 char *path; 1600 }; 1601 #endif 1602 int 1603 sys_unlink(struct thread *td, struct unlink_args *uap) 1604 { 1605 1606 return (kern_unlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 0)); 1607 } 1608 1609 #ifndef _SYS_SYSPROTO_H_ 1610 struct unlinkat_args { 1611 int fd; 1612 char *path; 1613 int flag; 1614 }; 1615 #endif 1616 int 1617 sys_unlinkat(struct thread *td, struct unlinkat_args *uap) 1618 { 1619 int flag = uap->flag; 1620 int fd = uap->fd; 1621 char *path = uap->path; 1622 1623 if (flag & ~AT_REMOVEDIR) 1624 return (EINVAL); 1625 1626 if (flag & AT_REMOVEDIR) 1627 return (kern_rmdirat(td, fd, path, UIO_USERSPACE)); 1628 else 1629 return (kern_unlinkat(td, fd, path, UIO_USERSPACE, 0)); 1630 } 1631 1632 int 1633 kern_unlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1634 ino_t oldinum) 1635 { 1636 struct mount *mp; 1637 struct vnode *vp; 1638 struct nameidata nd; 1639 struct stat sb; 1640 cap_rights_t rights; 1641 int error; 1642 1643 restart: 1644 bwillwrite(); 1645 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 1646 pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td); 1647 if ((error = namei(&nd)) != 0) 1648 return (error == EINVAL ? EPERM : error); 1649 vp = nd.ni_vp; 1650 if (vp->v_type == VDIR && oldinum == 0) { 1651 error = EPERM; /* POSIX */ 1652 } else if (oldinum != 0 && 1653 ((error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td)) == 0) && 1654 sb.st_ino != oldinum) { 1655 error = EIDRM; /* Identifier removed */ 1656 } else { 1657 /* 1658 * The root of a mounted filesystem cannot be deleted. 1659 * 1660 * XXX: can this only be a VDIR case? 1661 */ 1662 if (vp->v_vflag & VV_ROOT) 1663 error = EBUSY; 1664 } 1665 if (error == 0) { 1666 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1667 NDFREE(&nd, NDF_ONLY_PNBUF); 1668 vput(nd.ni_dvp); 1669 if (vp == nd.ni_dvp) 1670 vrele(vp); 1671 else 1672 vput(vp); 1673 if ((error = vn_start_write(NULL, &mp, 1674 V_XSLEEP | PCATCH)) != 0) 1675 return (error); 1676 goto restart; 1677 } 1678 #ifdef MAC 1679 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 1680 &nd.ni_cnd); 1681 if (error != 0) 1682 goto out; 1683 #endif 1684 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 1685 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 1686 #ifdef MAC 1687 out: 1688 #endif 1689 vn_finished_write(mp); 1690 } 1691 NDFREE(&nd, NDF_ONLY_PNBUF); 1692 vput(nd.ni_dvp); 1693 if (vp == nd.ni_dvp) 1694 vrele(vp); 1695 else 1696 vput(vp); 1697 return (error); 1698 } 1699 1700 /* 1701 * Reposition read/write file offset. 1702 */ 1703 #ifndef _SYS_SYSPROTO_H_ 1704 struct lseek_args { 1705 int fd; 1706 int pad; 1707 off_t offset; 1708 int whence; 1709 }; 1710 #endif 1711 int 1712 sys_lseek(struct thread *td, struct lseek_args *uap) 1713 { 1714 1715 return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); 1716 } 1717 1718 int 1719 kern_lseek(struct thread *td, int fd, off_t offset, int whence) 1720 { 1721 struct file *fp; 1722 cap_rights_t rights; 1723 int error; 1724 1725 AUDIT_ARG_FD(fd); 1726 error = fget(td, fd, cap_rights_init(&rights, CAP_SEEK), &fp); 1727 if (error != 0) 1728 return (error); 1729 error = (fp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0 ? 1730 fo_seek(fp, offset, whence, td) : ESPIPE; 1731 fdrop(fp, td); 1732 return (error); 1733 } 1734 1735 #if defined(COMPAT_43) 1736 /* 1737 * Reposition read/write file offset. 1738 */ 1739 #ifndef _SYS_SYSPROTO_H_ 1740 struct olseek_args { 1741 int fd; 1742 long offset; 1743 int whence; 1744 }; 1745 #endif 1746 int 1747 olseek(struct thread *td, struct olseek_args *uap) 1748 { 1749 1750 return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); 1751 } 1752 #endif /* COMPAT_43 */ 1753 1754 #if defined(COMPAT_FREEBSD6) 1755 /* Version with the 'pad' argument */ 1756 int 1757 freebsd6_lseek(struct thread *td, struct freebsd6_lseek_args *uap) 1758 { 1759 1760 return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); 1761 } 1762 #endif 1763 1764 /* 1765 * Check access permissions using passed credentials. 1766 */ 1767 static int 1768 vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 1769 struct thread *td) 1770 { 1771 accmode_t accmode; 1772 int error; 1773 1774 /* Flags == 0 means only check for existence. */ 1775 if (user_flags == 0) 1776 return (0); 1777 1778 accmode = 0; 1779 if (user_flags & R_OK) 1780 accmode |= VREAD; 1781 if (user_flags & W_OK) 1782 accmode |= VWRITE; 1783 if (user_flags & X_OK) 1784 accmode |= VEXEC; 1785 #ifdef MAC 1786 error = mac_vnode_check_access(cred, vp, accmode); 1787 if (error != 0) 1788 return (error); 1789 #endif 1790 if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) 1791 error = VOP_ACCESS(vp, accmode, cred, td); 1792 return (error); 1793 } 1794 1795 /* 1796 * Check access permissions using "real" credentials. 1797 */ 1798 #ifndef _SYS_SYSPROTO_H_ 1799 struct access_args { 1800 char *path; 1801 int amode; 1802 }; 1803 #endif 1804 int 1805 sys_access(struct thread *td, struct access_args *uap) 1806 { 1807 1808 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1809 0, uap->amode)); 1810 } 1811 1812 #ifndef _SYS_SYSPROTO_H_ 1813 struct faccessat_args { 1814 int dirfd; 1815 char *path; 1816 int amode; 1817 int flag; 1818 } 1819 #endif 1820 int 1821 sys_faccessat(struct thread *td, struct faccessat_args *uap) 1822 { 1823 1824 return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 1825 uap->amode)); 1826 } 1827 1828 int 1829 kern_accessat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1830 int flag, int amode) 1831 { 1832 struct ucred *cred, *usecred; 1833 struct vnode *vp; 1834 struct nameidata nd; 1835 cap_rights_t rights; 1836 int error; 1837 1838 if (flag & ~AT_EACCESS) 1839 return (EINVAL); 1840 if (amode != F_OK && (amode & ~(R_OK | W_OK | X_OK)) != 0) 1841 return (EINVAL); 1842 1843 /* 1844 * Create and modify a temporary credential instead of one that 1845 * is potentially shared (if we need one). 1846 */ 1847 cred = td->td_ucred; 1848 if ((flag & AT_EACCESS) == 0 && 1849 ((cred->cr_uid != cred->cr_ruid || 1850 cred->cr_rgid != cred->cr_groups[0]))) { 1851 usecred = crdup(cred); 1852 usecred->cr_uid = cred->cr_ruid; 1853 usecred->cr_groups[0] = cred->cr_rgid; 1854 td->td_ucred = usecred; 1855 } else 1856 usecred = cred; 1857 AUDIT_ARG_VALUE(amode); 1858 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | 1859 AUDITVNODE1, pathseg, path, fd, cap_rights_init(&rights, CAP_FSTAT), 1860 td); 1861 if ((error = namei(&nd)) != 0) 1862 goto out; 1863 vp = nd.ni_vp; 1864 1865 error = vn_access(vp, amode, usecred, td); 1866 NDFREE(&nd, NDF_ONLY_PNBUF); 1867 vput(vp); 1868 out: 1869 if (usecred != cred) { 1870 td->td_ucred = cred; 1871 crfree(usecred); 1872 } 1873 return (error); 1874 } 1875 1876 /* 1877 * Check access permissions using "effective" credentials. 1878 */ 1879 #ifndef _SYS_SYSPROTO_H_ 1880 struct eaccess_args { 1881 char *path; 1882 int amode; 1883 }; 1884 #endif 1885 int 1886 sys_eaccess(struct thread *td, struct eaccess_args *uap) 1887 { 1888 1889 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1890 AT_EACCESS, uap->amode)); 1891 } 1892 1893 #if defined(COMPAT_43) 1894 /* 1895 * Get file status; this version follows links. 1896 */ 1897 #ifndef _SYS_SYSPROTO_H_ 1898 struct ostat_args { 1899 char *path; 1900 struct ostat *ub; 1901 }; 1902 #endif 1903 int 1904 ostat(struct thread *td, struct ostat_args *uap) 1905 { 1906 struct stat sb; 1907 struct ostat osb; 1908 int error; 1909 1910 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 1911 &sb, NULL); 1912 if (error != 0) 1913 return (error); 1914 cvtstat(&sb, &osb); 1915 return (copyout(&osb, uap->ub, sizeof (osb))); 1916 } 1917 1918 /* 1919 * Get file status; this version does not follow links. 1920 */ 1921 #ifndef _SYS_SYSPROTO_H_ 1922 struct olstat_args { 1923 char *path; 1924 struct ostat *ub; 1925 }; 1926 #endif 1927 int 1928 olstat(struct thread *td, struct olstat_args *uap) 1929 { 1930 struct stat sb; 1931 struct ostat osb; 1932 int error; 1933 1934 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 1935 UIO_USERSPACE, &sb, NULL); 1936 if (error != 0) 1937 return (error); 1938 cvtstat(&sb, &osb); 1939 return (copyout(&osb, uap->ub, sizeof (osb))); 1940 } 1941 1942 /* 1943 * Convert from an old to a new stat structure. 1944 */ 1945 void 1946 cvtstat(struct stat *st, struct ostat *ost) 1947 { 1948 1949 bzero(ost, sizeof(*ost)); 1950 ost->st_dev = st->st_dev; 1951 ost->st_ino = st->st_ino; 1952 ost->st_mode = st->st_mode; 1953 ost->st_nlink = st->st_nlink; 1954 ost->st_uid = st->st_uid; 1955 ost->st_gid = st->st_gid; 1956 ost->st_rdev = st->st_rdev; 1957 if (st->st_size < (quad_t)1 << 32) 1958 ost->st_size = st->st_size; 1959 else 1960 ost->st_size = -2; 1961 ost->st_atim = st->st_atim; 1962 ost->st_mtim = st->st_mtim; 1963 ost->st_ctim = st->st_ctim; 1964 ost->st_blksize = st->st_blksize; 1965 ost->st_blocks = st->st_blocks; 1966 ost->st_flags = st->st_flags; 1967 ost->st_gen = st->st_gen; 1968 } 1969 #endif /* COMPAT_43 */ 1970 1971 /* 1972 * Get file status; this version follows links. 1973 */ 1974 #ifndef _SYS_SYSPROTO_H_ 1975 struct stat_args { 1976 char *path; 1977 struct stat *ub; 1978 }; 1979 #endif 1980 int 1981 sys_stat(struct thread *td, struct stat_args *uap) 1982 { 1983 struct stat sb; 1984 int error; 1985 1986 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 1987 &sb, NULL); 1988 if (error == 0) 1989 error = copyout(&sb, uap->ub, sizeof (sb)); 1990 return (error); 1991 } 1992 1993 #ifndef _SYS_SYSPROTO_H_ 1994 struct fstatat_args { 1995 int fd; 1996 char *path; 1997 struct stat *buf; 1998 int flag; 1999 } 2000 #endif 2001 int 2002 sys_fstatat(struct thread *td, struct fstatat_args *uap) 2003 { 2004 struct stat sb; 2005 int error; 2006 2007 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2008 UIO_USERSPACE, &sb, NULL); 2009 if (error == 0) 2010 error = copyout(&sb, uap->buf, sizeof (sb)); 2011 return (error); 2012 } 2013 2014 int 2015 kern_statat(struct thread *td, int flag, int fd, char *path, 2016 enum uio_seg pathseg, struct stat *sbp, 2017 void (*hook)(struct vnode *vp, struct stat *sbp)) 2018 { 2019 struct nameidata nd; 2020 struct stat sb; 2021 cap_rights_t rights; 2022 int error; 2023 2024 if (flag & ~AT_SYMLINK_NOFOLLOW) 2025 return (EINVAL); 2026 2027 NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : 2028 FOLLOW) | LOCKSHARED | LOCKLEAF | AUDITVNODE1, pathseg, path, fd, 2029 cap_rights_init(&rights, CAP_FSTAT), td); 2030 2031 if ((error = namei(&nd)) != 0) 2032 return (error); 2033 error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td); 2034 if (error == 0) { 2035 SDT_PROBE2(vfs, , stat, mode, path, sb.st_mode); 2036 if (S_ISREG(sb.st_mode)) 2037 SDT_PROBE2(vfs, , stat, reg, path, pathseg); 2038 if (__predict_false(hook != NULL)) 2039 hook(nd.ni_vp, &sb); 2040 } 2041 NDFREE(&nd, NDF_ONLY_PNBUF); 2042 vput(nd.ni_vp); 2043 if (error != 0) 2044 return (error); 2045 *sbp = sb; 2046 #ifdef KTRACE 2047 if (KTRPOINT(td, KTR_STRUCT)) 2048 ktrstat(&sb); 2049 #endif 2050 return (0); 2051 } 2052 2053 /* 2054 * Get file status; this version does not follow links. 2055 */ 2056 #ifndef _SYS_SYSPROTO_H_ 2057 struct lstat_args { 2058 char *path; 2059 struct stat *ub; 2060 }; 2061 #endif 2062 int 2063 sys_lstat(struct thread *td, struct lstat_args *uap) 2064 { 2065 struct stat sb; 2066 int error; 2067 2068 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2069 UIO_USERSPACE, &sb, NULL); 2070 if (error == 0) 2071 error = copyout(&sb, uap->ub, sizeof (sb)); 2072 return (error); 2073 } 2074 2075 /* 2076 * Implementation of the NetBSD [l]stat() functions. 2077 */ 2078 void 2079 cvtnstat( struct stat *sb, struct nstat *nsb) 2080 { 2081 2082 bzero(nsb, sizeof *nsb); 2083 nsb->st_dev = sb->st_dev; 2084 nsb->st_ino = sb->st_ino; 2085 nsb->st_mode = sb->st_mode; 2086 nsb->st_nlink = sb->st_nlink; 2087 nsb->st_uid = sb->st_uid; 2088 nsb->st_gid = sb->st_gid; 2089 nsb->st_rdev = sb->st_rdev; 2090 nsb->st_atim = sb->st_atim; 2091 nsb->st_mtim = sb->st_mtim; 2092 nsb->st_ctim = sb->st_ctim; 2093 nsb->st_size = sb->st_size; 2094 nsb->st_blocks = sb->st_blocks; 2095 nsb->st_blksize = sb->st_blksize; 2096 nsb->st_flags = sb->st_flags; 2097 nsb->st_gen = sb->st_gen; 2098 nsb->st_birthtim = sb->st_birthtim; 2099 } 2100 2101 #ifndef _SYS_SYSPROTO_H_ 2102 struct nstat_args { 2103 char *path; 2104 struct nstat *ub; 2105 }; 2106 #endif 2107 int 2108 sys_nstat(struct thread *td, struct nstat_args *uap) 2109 { 2110 struct stat sb; 2111 struct nstat nsb; 2112 int error; 2113 2114 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2115 &sb, NULL); 2116 if (error != 0) 2117 return (error); 2118 cvtnstat(&sb, &nsb); 2119 return (copyout(&nsb, uap->ub, sizeof (nsb))); 2120 } 2121 2122 /* 2123 * NetBSD lstat. Get file status; this version does not follow links. 2124 */ 2125 #ifndef _SYS_SYSPROTO_H_ 2126 struct lstat_args { 2127 char *path; 2128 struct stat *ub; 2129 }; 2130 #endif 2131 int 2132 sys_nlstat(struct thread *td, struct nlstat_args *uap) 2133 { 2134 struct stat sb; 2135 struct nstat nsb; 2136 int error; 2137 2138 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2139 UIO_USERSPACE, &sb, NULL); 2140 if (error != 0) 2141 return (error); 2142 cvtnstat(&sb, &nsb); 2143 return (copyout(&nsb, uap->ub, sizeof (nsb))); 2144 } 2145 2146 /* 2147 * Get configurable pathname variables. 2148 */ 2149 #ifndef _SYS_SYSPROTO_H_ 2150 struct pathconf_args { 2151 char *path; 2152 int name; 2153 }; 2154 #endif 2155 int 2156 sys_pathconf(struct thread *td, struct pathconf_args *uap) 2157 { 2158 2159 return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW)); 2160 } 2161 2162 #ifndef _SYS_SYSPROTO_H_ 2163 struct lpathconf_args { 2164 char *path; 2165 int name; 2166 }; 2167 #endif 2168 int 2169 sys_lpathconf(struct thread *td, struct lpathconf_args *uap) 2170 { 2171 2172 return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, 2173 NOFOLLOW)); 2174 } 2175 2176 int 2177 kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name, 2178 u_long flags) 2179 { 2180 struct nameidata nd; 2181 int error; 2182 2183 NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | AUDITVNODE1 | flags, 2184 pathseg, path, td); 2185 if ((error = namei(&nd)) != 0) 2186 return (error); 2187 NDFREE(&nd, NDF_ONLY_PNBUF); 2188 2189 error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval); 2190 vput(nd.ni_vp); 2191 return (error); 2192 } 2193 2194 /* 2195 * Return target name of a symbolic link. 2196 */ 2197 #ifndef _SYS_SYSPROTO_H_ 2198 struct readlink_args { 2199 char *path; 2200 char *buf; 2201 size_t count; 2202 }; 2203 #endif 2204 int 2205 sys_readlink(struct thread *td, struct readlink_args *uap) 2206 { 2207 2208 return (kern_readlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2209 uap->buf, UIO_USERSPACE, uap->count)); 2210 } 2211 #ifndef _SYS_SYSPROTO_H_ 2212 struct readlinkat_args { 2213 int fd; 2214 char *path; 2215 char *buf; 2216 size_t bufsize; 2217 }; 2218 #endif 2219 int 2220 sys_readlinkat(struct thread *td, struct readlinkat_args *uap) 2221 { 2222 2223 return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE, 2224 uap->buf, UIO_USERSPACE, uap->bufsize)); 2225 } 2226 2227 int 2228 kern_readlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2229 char *buf, enum uio_seg bufseg, size_t count) 2230 { 2231 struct vnode *vp; 2232 struct iovec aiov; 2233 struct uio auio; 2234 struct nameidata nd; 2235 int error; 2236 2237 if (count > IOSIZE_MAX) 2238 return (EINVAL); 2239 2240 NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 2241 pathseg, path, fd, td); 2242 2243 if ((error = namei(&nd)) != 0) 2244 return (error); 2245 NDFREE(&nd, NDF_ONLY_PNBUF); 2246 vp = nd.ni_vp; 2247 #ifdef MAC 2248 error = mac_vnode_check_readlink(td->td_ucred, vp); 2249 if (error != 0) { 2250 vput(vp); 2251 return (error); 2252 } 2253 #endif 2254 if (vp->v_type != VLNK) 2255 error = EINVAL; 2256 else { 2257 aiov.iov_base = buf; 2258 aiov.iov_len = count; 2259 auio.uio_iov = &aiov; 2260 auio.uio_iovcnt = 1; 2261 auio.uio_offset = 0; 2262 auio.uio_rw = UIO_READ; 2263 auio.uio_segflg = bufseg; 2264 auio.uio_td = td; 2265 auio.uio_resid = count; 2266 error = VOP_READLINK(vp, &auio, td->td_ucred); 2267 td->td_retval[0] = count - auio.uio_resid; 2268 } 2269 vput(vp); 2270 return (error); 2271 } 2272 2273 /* 2274 * Common implementation code for chflags() and fchflags(). 2275 */ 2276 static int 2277 setfflags(struct thread *td, struct vnode *vp, u_long flags) 2278 { 2279 struct mount *mp; 2280 struct vattr vattr; 2281 int error; 2282 2283 /* We can't support the value matching VNOVAL. */ 2284 if (flags == VNOVAL) 2285 return (EOPNOTSUPP); 2286 2287 /* 2288 * Prevent non-root users from setting flags on devices. When 2289 * a device is reused, users can retain ownership of the device 2290 * if they are allowed to set flags and programs assume that 2291 * chown can't fail when done as root. 2292 */ 2293 if (vp->v_type == VCHR || vp->v_type == VBLK) { 2294 error = priv_check(td, PRIV_VFS_CHFLAGS_DEV); 2295 if (error != 0) 2296 return (error); 2297 } 2298 2299 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2300 return (error); 2301 VATTR_NULL(&vattr); 2302 vattr.va_flags = flags; 2303 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2304 #ifdef MAC 2305 error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags); 2306 if (error == 0) 2307 #endif 2308 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 2309 VOP_UNLOCK(vp, 0); 2310 vn_finished_write(mp); 2311 return (error); 2312 } 2313 2314 /* 2315 * Change flags of a file given a path name. 2316 */ 2317 #ifndef _SYS_SYSPROTO_H_ 2318 struct chflags_args { 2319 const char *path; 2320 u_long flags; 2321 }; 2322 #endif 2323 int 2324 sys_chflags(struct thread *td, struct chflags_args *uap) 2325 { 2326 2327 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2328 uap->flags, 0)); 2329 } 2330 2331 #ifndef _SYS_SYSPROTO_H_ 2332 struct chflagsat_args { 2333 int fd; 2334 const char *path; 2335 u_long flags; 2336 int atflag; 2337 } 2338 #endif 2339 int 2340 sys_chflagsat(struct thread *td, struct chflagsat_args *uap) 2341 { 2342 int fd = uap->fd; 2343 const char *path = uap->path; 2344 u_long flags = uap->flags; 2345 int atflag = uap->atflag; 2346 2347 if (atflag & ~AT_SYMLINK_NOFOLLOW) 2348 return (EINVAL); 2349 2350 return (kern_chflagsat(td, fd, path, UIO_USERSPACE, flags, atflag)); 2351 } 2352 2353 /* 2354 * Same as chflags() but doesn't follow symlinks. 2355 */ 2356 #ifndef _SYS_SYSPROTO_H_ 2357 struct lchflags_args { 2358 const char *path; 2359 u_long flags; 2360 }; 2361 #endif 2362 int 2363 sys_lchflags(struct thread *td, struct lchflags_args *uap) 2364 { 2365 2366 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2367 uap->flags, AT_SYMLINK_NOFOLLOW)); 2368 } 2369 2370 static int 2371 kern_chflagsat(struct thread *td, int fd, const char *path, 2372 enum uio_seg pathseg, u_long flags, int atflag) 2373 { 2374 struct nameidata nd; 2375 cap_rights_t rights; 2376 int error, follow; 2377 2378 AUDIT_ARG_FFLAGS(flags); 2379 follow = (atflag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2380 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2381 cap_rights_init(&rights, CAP_FCHFLAGS), td); 2382 if ((error = namei(&nd)) != 0) 2383 return (error); 2384 NDFREE(&nd, NDF_ONLY_PNBUF); 2385 error = setfflags(td, nd.ni_vp, flags); 2386 vrele(nd.ni_vp); 2387 return (error); 2388 } 2389 2390 /* 2391 * Change flags of a file given a file descriptor. 2392 */ 2393 #ifndef _SYS_SYSPROTO_H_ 2394 struct fchflags_args { 2395 int fd; 2396 u_long flags; 2397 }; 2398 #endif 2399 int 2400 sys_fchflags(struct thread *td, struct fchflags_args *uap) 2401 { 2402 struct file *fp; 2403 cap_rights_t rights; 2404 int error; 2405 2406 AUDIT_ARG_FD(uap->fd); 2407 AUDIT_ARG_FFLAGS(uap->flags); 2408 error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_FCHFLAGS), 2409 &fp); 2410 if (error != 0) 2411 return (error); 2412 #ifdef AUDIT 2413 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 2414 AUDIT_ARG_VNODE1(fp->f_vnode); 2415 VOP_UNLOCK(fp->f_vnode, 0); 2416 #endif 2417 error = setfflags(td, fp->f_vnode, uap->flags); 2418 fdrop(fp, td); 2419 return (error); 2420 } 2421 2422 /* 2423 * Common implementation code for chmod(), lchmod() and fchmod(). 2424 */ 2425 int 2426 setfmode(struct thread *td, struct ucred *cred, struct vnode *vp, int mode) 2427 { 2428 struct mount *mp; 2429 struct vattr vattr; 2430 int error; 2431 2432 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2433 return (error); 2434 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2435 VATTR_NULL(&vattr); 2436 vattr.va_mode = mode & ALLPERMS; 2437 #ifdef MAC 2438 error = mac_vnode_check_setmode(cred, vp, vattr.va_mode); 2439 if (error == 0) 2440 #endif 2441 error = VOP_SETATTR(vp, &vattr, cred); 2442 VOP_UNLOCK(vp, 0); 2443 vn_finished_write(mp); 2444 return (error); 2445 } 2446 2447 /* 2448 * Change mode of a file given path name. 2449 */ 2450 #ifndef _SYS_SYSPROTO_H_ 2451 struct chmod_args { 2452 char *path; 2453 int mode; 2454 }; 2455 #endif 2456 int 2457 sys_chmod(struct thread *td, struct chmod_args *uap) 2458 { 2459 2460 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2461 uap->mode, 0)); 2462 } 2463 2464 #ifndef _SYS_SYSPROTO_H_ 2465 struct fchmodat_args { 2466 int dirfd; 2467 char *path; 2468 mode_t mode; 2469 int flag; 2470 } 2471 #endif 2472 int 2473 sys_fchmodat(struct thread *td, struct fchmodat_args *uap) 2474 { 2475 int flag = uap->flag; 2476 int fd = uap->fd; 2477 char *path = uap->path; 2478 mode_t mode = uap->mode; 2479 2480 if (flag & ~AT_SYMLINK_NOFOLLOW) 2481 return (EINVAL); 2482 2483 return (kern_fchmodat(td, fd, path, UIO_USERSPACE, mode, flag)); 2484 } 2485 2486 /* 2487 * Change mode of a file given path name (don't follow links.) 2488 */ 2489 #ifndef _SYS_SYSPROTO_H_ 2490 struct lchmod_args { 2491 char *path; 2492 int mode; 2493 }; 2494 #endif 2495 int 2496 sys_lchmod(struct thread *td, struct lchmod_args *uap) 2497 { 2498 2499 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2500 uap->mode, AT_SYMLINK_NOFOLLOW)); 2501 } 2502 2503 int 2504 kern_fchmodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2505 mode_t mode, int flag) 2506 { 2507 struct nameidata nd; 2508 cap_rights_t rights; 2509 int error, follow; 2510 2511 AUDIT_ARG_MODE(mode); 2512 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2513 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2514 cap_rights_init(&rights, CAP_FCHMOD), td); 2515 if ((error = namei(&nd)) != 0) 2516 return (error); 2517 NDFREE(&nd, NDF_ONLY_PNBUF); 2518 error = setfmode(td, td->td_ucred, nd.ni_vp, mode); 2519 vrele(nd.ni_vp); 2520 return (error); 2521 } 2522 2523 /* 2524 * Change mode of a file given a file descriptor. 2525 */ 2526 #ifndef _SYS_SYSPROTO_H_ 2527 struct fchmod_args { 2528 int fd; 2529 int mode; 2530 }; 2531 #endif 2532 int 2533 sys_fchmod(struct thread *td, struct fchmod_args *uap) 2534 { 2535 struct file *fp; 2536 cap_rights_t rights; 2537 int error; 2538 2539 AUDIT_ARG_FD(uap->fd); 2540 AUDIT_ARG_MODE(uap->mode); 2541 2542 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHMOD), &fp); 2543 if (error != 0) 2544 return (error); 2545 error = fo_chmod(fp, uap->mode, td->td_ucred, td); 2546 fdrop(fp, td); 2547 return (error); 2548 } 2549 2550 /* 2551 * Common implementation for chown(), lchown(), and fchown() 2552 */ 2553 int 2554 setfown(struct thread *td, struct ucred *cred, struct vnode *vp, uid_t uid, 2555 gid_t gid) 2556 { 2557 struct mount *mp; 2558 struct vattr vattr; 2559 int error; 2560 2561 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2562 return (error); 2563 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2564 VATTR_NULL(&vattr); 2565 vattr.va_uid = uid; 2566 vattr.va_gid = gid; 2567 #ifdef MAC 2568 error = mac_vnode_check_setowner(cred, vp, vattr.va_uid, 2569 vattr.va_gid); 2570 if (error == 0) 2571 #endif 2572 error = VOP_SETATTR(vp, &vattr, cred); 2573 VOP_UNLOCK(vp, 0); 2574 vn_finished_write(mp); 2575 return (error); 2576 } 2577 2578 /* 2579 * Set ownership given a path name. 2580 */ 2581 #ifndef _SYS_SYSPROTO_H_ 2582 struct chown_args { 2583 char *path; 2584 int uid; 2585 int gid; 2586 }; 2587 #endif 2588 int 2589 sys_chown(struct thread *td, struct chown_args *uap) 2590 { 2591 2592 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->uid, 2593 uap->gid, 0)); 2594 } 2595 2596 #ifndef _SYS_SYSPROTO_H_ 2597 struct fchownat_args { 2598 int fd; 2599 const char * path; 2600 uid_t uid; 2601 gid_t gid; 2602 int flag; 2603 }; 2604 #endif 2605 int 2606 sys_fchownat(struct thread *td, struct fchownat_args *uap) 2607 { 2608 int flag; 2609 2610 flag = uap->flag; 2611 if (flag & ~AT_SYMLINK_NOFOLLOW) 2612 return (EINVAL); 2613 2614 return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid, 2615 uap->gid, uap->flag)); 2616 } 2617 2618 int 2619 kern_fchownat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2620 int uid, int gid, int flag) 2621 { 2622 struct nameidata nd; 2623 cap_rights_t rights; 2624 int error, follow; 2625 2626 AUDIT_ARG_OWNER(uid, gid); 2627 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2628 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2629 cap_rights_init(&rights, CAP_FCHOWN), td); 2630 2631 if ((error = namei(&nd)) != 0) 2632 return (error); 2633 NDFREE(&nd, NDF_ONLY_PNBUF); 2634 error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid); 2635 vrele(nd.ni_vp); 2636 return (error); 2637 } 2638 2639 /* 2640 * Set ownership given a path name, do not cross symlinks. 2641 */ 2642 #ifndef _SYS_SYSPROTO_H_ 2643 struct lchown_args { 2644 char *path; 2645 int uid; 2646 int gid; 2647 }; 2648 #endif 2649 int 2650 sys_lchown(struct thread *td, struct lchown_args *uap) 2651 { 2652 2653 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2654 uap->uid, uap->gid, AT_SYMLINK_NOFOLLOW)); 2655 } 2656 2657 /* 2658 * Set ownership given a file descriptor. 2659 */ 2660 #ifndef _SYS_SYSPROTO_H_ 2661 struct fchown_args { 2662 int fd; 2663 int uid; 2664 int gid; 2665 }; 2666 #endif 2667 int 2668 sys_fchown(struct thread *td, struct fchown_args *uap) 2669 { 2670 struct file *fp; 2671 cap_rights_t rights; 2672 int error; 2673 2674 AUDIT_ARG_FD(uap->fd); 2675 AUDIT_ARG_OWNER(uap->uid, uap->gid); 2676 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHOWN), &fp); 2677 if (error != 0) 2678 return (error); 2679 error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td); 2680 fdrop(fp, td); 2681 return (error); 2682 } 2683 2684 /* 2685 * Common implementation code for utimes(), lutimes(), and futimes(). 2686 */ 2687 static int 2688 getutimes(const struct timeval *usrtvp, enum uio_seg tvpseg, 2689 struct timespec *tsp) 2690 { 2691 struct timeval tv[2]; 2692 const struct timeval *tvp; 2693 int error; 2694 2695 if (usrtvp == NULL) { 2696 vfs_timestamp(&tsp[0]); 2697 tsp[1] = tsp[0]; 2698 } else { 2699 if (tvpseg == UIO_SYSSPACE) { 2700 tvp = usrtvp; 2701 } else { 2702 if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0) 2703 return (error); 2704 tvp = tv; 2705 } 2706 2707 if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 || 2708 tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000) 2709 return (EINVAL); 2710 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 2711 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 2712 } 2713 return (0); 2714 } 2715 2716 /* 2717 * Common implementation code for futimens(), utimensat(). 2718 */ 2719 #define UTIMENS_NULL 0x1 2720 #define UTIMENS_EXIT 0x2 2721 static int 2722 getutimens(const struct timespec *usrtsp, enum uio_seg tspseg, 2723 struct timespec *tsp, int *retflags) 2724 { 2725 struct timespec tsnow; 2726 int error; 2727 2728 vfs_timestamp(&tsnow); 2729 *retflags = 0; 2730 if (usrtsp == NULL) { 2731 tsp[0] = tsnow; 2732 tsp[1] = tsnow; 2733 *retflags |= UTIMENS_NULL; 2734 return (0); 2735 } 2736 if (tspseg == UIO_SYSSPACE) { 2737 tsp[0] = usrtsp[0]; 2738 tsp[1] = usrtsp[1]; 2739 } else if ((error = copyin(usrtsp, tsp, sizeof(*tsp) * 2)) != 0) 2740 return (error); 2741 if (tsp[0].tv_nsec == UTIME_OMIT && tsp[1].tv_nsec == UTIME_OMIT) 2742 *retflags |= UTIMENS_EXIT; 2743 if (tsp[0].tv_nsec == UTIME_NOW && tsp[1].tv_nsec == UTIME_NOW) 2744 *retflags |= UTIMENS_NULL; 2745 if (tsp[0].tv_nsec == UTIME_OMIT) 2746 tsp[0].tv_sec = VNOVAL; 2747 else if (tsp[0].tv_nsec == UTIME_NOW) 2748 tsp[0] = tsnow; 2749 else if (tsp[0].tv_nsec < 0 || tsp[0].tv_nsec >= 1000000000L) 2750 return (EINVAL); 2751 if (tsp[1].tv_nsec == UTIME_OMIT) 2752 tsp[1].tv_sec = VNOVAL; 2753 else if (tsp[1].tv_nsec == UTIME_NOW) 2754 tsp[1] = tsnow; 2755 else if (tsp[1].tv_nsec < 0 || tsp[1].tv_nsec >= 1000000000L) 2756 return (EINVAL); 2757 2758 return (0); 2759 } 2760 2761 /* 2762 * Common implementation code for utimes(), lutimes(), futimes(), futimens(), 2763 * and utimensat(). 2764 */ 2765 static int 2766 setutimes(struct thread *td, struct vnode *vp, const struct timespec *ts, 2767 int numtimes, int nullflag) 2768 { 2769 struct mount *mp; 2770 struct vattr vattr; 2771 int error, setbirthtime; 2772 2773 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2774 return (error); 2775 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2776 setbirthtime = 0; 2777 if (numtimes < 3 && !VOP_GETATTR(vp, &vattr, td->td_ucred) && 2778 timespeccmp(&ts[1], &vattr.va_birthtime, < )) 2779 setbirthtime = 1; 2780 VATTR_NULL(&vattr); 2781 vattr.va_atime = ts[0]; 2782 vattr.va_mtime = ts[1]; 2783 if (setbirthtime) 2784 vattr.va_birthtime = ts[1]; 2785 if (numtimes > 2) 2786 vattr.va_birthtime = ts[2]; 2787 if (nullflag) 2788 vattr.va_vaflags |= VA_UTIMES_NULL; 2789 #ifdef MAC 2790 error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime, 2791 vattr.va_mtime); 2792 #endif 2793 if (error == 0) 2794 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 2795 VOP_UNLOCK(vp, 0); 2796 vn_finished_write(mp); 2797 return (error); 2798 } 2799 2800 /* 2801 * Set the access and modification times of a file. 2802 */ 2803 #ifndef _SYS_SYSPROTO_H_ 2804 struct utimes_args { 2805 char *path; 2806 struct timeval *tptr; 2807 }; 2808 #endif 2809 int 2810 sys_utimes(struct thread *td, struct utimes_args *uap) 2811 { 2812 2813 return (kern_utimesat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2814 uap->tptr, UIO_USERSPACE)); 2815 } 2816 2817 #ifndef _SYS_SYSPROTO_H_ 2818 struct futimesat_args { 2819 int fd; 2820 const char * path; 2821 const struct timeval * times; 2822 }; 2823 #endif 2824 int 2825 sys_futimesat(struct thread *td, struct futimesat_args *uap) 2826 { 2827 2828 return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE, 2829 uap->times, UIO_USERSPACE)); 2830 } 2831 2832 int 2833 kern_utimesat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2834 struct timeval *tptr, enum uio_seg tptrseg) 2835 { 2836 struct nameidata nd; 2837 struct timespec ts[2]; 2838 cap_rights_t rights; 2839 int error; 2840 2841 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 2842 return (error); 2843 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 2844 cap_rights_init(&rights, CAP_FUTIMES), td); 2845 2846 if ((error = namei(&nd)) != 0) 2847 return (error); 2848 NDFREE(&nd, NDF_ONLY_PNBUF); 2849 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 2850 vrele(nd.ni_vp); 2851 return (error); 2852 } 2853 2854 /* 2855 * Set the access and modification times of a file. 2856 */ 2857 #ifndef _SYS_SYSPROTO_H_ 2858 struct lutimes_args { 2859 char *path; 2860 struct timeval *tptr; 2861 }; 2862 #endif 2863 int 2864 sys_lutimes(struct thread *td, struct lutimes_args *uap) 2865 { 2866 2867 return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr, 2868 UIO_USERSPACE)); 2869 } 2870 2871 int 2872 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg, 2873 struct timeval *tptr, enum uio_seg tptrseg) 2874 { 2875 struct timespec ts[2]; 2876 struct nameidata nd; 2877 int error; 2878 2879 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 2880 return (error); 2881 NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, pathseg, path, td); 2882 if ((error = namei(&nd)) != 0) 2883 return (error); 2884 NDFREE(&nd, NDF_ONLY_PNBUF); 2885 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 2886 vrele(nd.ni_vp); 2887 return (error); 2888 } 2889 2890 /* 2891 * Set the access and modification times of a file. 2892 */ 2893 #ifndef _SYS_SYSPROTO_H_ 2894 struct futimes_args { 2895 int fd; 2896 struct timeval *tptr; 2897 }; 2898 #endif 2899 int 2900 sys_futimes(struct thread *td, struct futimes_args *uap) 2901 { 2902 2903 return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE)); 2904 } 2905 2906 int 2907 kern_futimes(struct thread *td, int fd, struct timeval *tptr, 2908 enum uio_seg tptrseg) 2909 { 2910 struct timespec ts[2]; 2911 struct file *fp; 2912 cap_rights_t rights; 2913 int error; 2914 2915 AUDIT_ARG_FD(fd); 2916 error = getutimes(tptr, tptrseg, ts); 2917 if (error != 0) 2918 return (error); 2919 error = getvnode(td, fd, cap_rights_init(&rights, CAP_FUTIMES), &fp); 2920 if (error != 0) 2921 return (error); 2922 #ifdef AUDIT 2923 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 2924 AUDIT_ARG_VNODE1(fp->f_vnode); 2925 VOP_UNLOCK(fp->f_vnode, 0); 2926 #endif 2927 error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL); 2928 fdrop(fp, td); 2929 return (error); 2930 } 2931 2932 int 2933 sys_futimens(struct thread *td, struct futimens_args *uap) 2934 { 2935 2936 return (kern_futimens(td, uap->fd, uap->times, UIO_USERSPACE)); 2937 } 2938 2939 int 2940 kern_futimens(struct thread *td, int fd, struct timespec *tptr, 2941 enum uio_seg tptrseg) 2942 { 2943 struct timespec ts[2]; 2944 struct file *fp; 2945 cap_rights_t rights; 2946 int error, flags; 2947 2948 AUDIT_ARG_FD(fd); 2949 error = getutimens(tptr, tptrseg, ts, &flags); 2950 if (error != 0) 2951 return (error); 2952 if (flags & UTIMENS_EXIT) 2953 return (0); 2954 error = getvnode(td, fd, cap_rights_init(&rights, CAP_FUTIMES), &fp); 2955 if (error != 0) 2956 return (error); 2957 #ifdef AUDIT 2958 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 2959 AUDIT_ARG_VNODE1(fp->f_vnode); 2960 VOP_UNLOCK(fp->f_vnode, 0); 2961 #endif 2962 error = setutimes(td, fp->f_vnode, ts, 2, flags & UTIMENS_NULL); 2963 fdrop(fp, td); 2964 return (error); 2965 } 2966 2967 int 2968 sys_utimensat(struct thread *td, struct utimensat_args *uap) 2969 { 2970 2971 return (kern_utimensat(td, uap->fd, uap->path, UIO_USERSPACE, 2972 uap->times, UIO_USERSPACE, uap->flag)); 2973 } 2974 2975 int 2976 kern_utimensat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2977 struct timespec *tptr, enum uio_seg tptrseg, int flag) 2978 { 2979 struct nameidata nd; 2980 struct timespec ts[2]; 2981 cap_rights_t rights; 2982 int error, flags; 2983 2984 if (flag & ~AT_SYMLINK_NOFOLLOW) 2985 return (EINVAL); 2986 2987 if ((error = getutimens(tptr, tptrseg, ts, &flags)) != 0) 2988 return (error); 2989 NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : 2990 FOLLOW) | AUDITVNODE1, pathseg, path, fd, 2991 cap_rights_init(&rights, CAP_FUTIMES), td); 2992 if ((error = namei(&nd)) != 0) 2993 return (error); 2994 /* 2995 * We are allowed to call namei() regardless of 2xUTIME_OMIT. 2996 * POSIX states: 2997 * "If both tv_nsec fields are UTIME_OMIT... EACCESS may be detected." 2998 * "Search permission is denied by a component of the path prefix." 2999 */ 3000 NDFREE(&nd, NDF_ONLY_PNBUF); 3001 if ((flags & UTIMENS_EXIT) == 0) 3002 error = setutimes(td, nd.ni_vp, ts, 2, flags & UTIMENS_NULL); 3003 vrele(nd.ni_vp); 3004 return (error); 3005 } 3006 3007 /* 3008 * Truncate a file given its path name. 3009 */ 3010 #ifndef _SYS_SYSPROTO_H_ 3011 struct truncate_args { 3012 char *path; 3013 int pad; 3014 off_t length; 3015 }; 3016 #endif 3017 int 3018 sys_truncate(struct thread *td, struct truncate_args *uap) 3019 { 3020 3021 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3022 } 3023 3024 int 3025 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length) 3026 { 3027 struct mount *mp; 3028 struct vnode *vp; 3029 void *rl_cookie; 3030 struct vattr vattr; 3031 struct nameidata nd; 3032 int error; 3033 3034 if (length < 0) 3035 return(EINVAL); 3036 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, td); 3037 if ((error = namei(&nd)) != 0) 3038 return (error); 3039 vp = nd.ni_vp; 3040 rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); 3041 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { 3042 vn_rangelock_unlock(vp, rl_cookie); 3043 vrele(vp); 3044 return (error); 3045 } 3046 NDFREE(&nd, NDF_ONLY_PNBUF); 3047 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3048 if (vp->v_type == VDIR) 3049 error = EISDIR; 3050 #ifdef MAC 3051 else if ((error = mac_vnode_check_write(td->td_ucred, NOCRED, vp))) { 3052 } 3053 #endif 3054 else if ((error = vn_writechk(vp)) == 0 && 3055 (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) { 3056 VATTR_NULL(&vattr); 3057 vattr.va_size = length; 3058 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3059 } 3060 VOP_UNLOCK(vp, 0); 3061 vn_finished_write(mp); 3062 vn_rangelock_unlock(vp, rl_cookie); 3063 vrele(vp); 3064 return (error); 3065 } 3066 3067 #if defined(COMPAT_43) 3068 /* 3069 * Truncate a file given its path name. 3070 */ 3071 #ifndef _SYS_SYSPROTO_H_ 3072 struct otruncate_args { 3073 char *path; 3074 long length; 3075 }; 3076 #endif 3077 int 3078 otruncate(struct thread *td, struct otruncate_args *uap) 3079 { 3080 3081 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3082 } 3083 #endif /* COMPAT_43 */ 3084 3085 #if defined(COMPAT_FREEBSD6) 3086 /* Versions with the pad argument */ 3087 int 3088 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap) 3089 { 3090 3091 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3092 } 3093 3094 int 3095 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap) 3096 { 3097 3098 return (kern_ftruncate(td, uap->fd, uap->length)); 3099 } 3100 #endif 3101 3102 int 3103 kern_fsync(struct thread *td, int fd, bool fullsync) 3104 { 3105 struct vnode *vp; 3106 struct mount *mp; 3107 struct file *fp; 3108 cap_rights_t rights; 3109 int error, lock_flags; 3110 3111 AUDIT_ARG_FD(fd); 3112 error = getvnode(td, fd, cap_rights_init(&rights, CAP_FSYNC), &fp); 3113 if (error != 0) 3114 return (error); 3115 vp = fp->f_vnode; 3116 #if 0 3117 if (!fullsync) 3118 /* XXXKIB: compete outstanding aio writes */; 3119 #endif 3120 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 3121 if (error != 0) 3122 goto drop; 3123 if (MNT_SHARED_WRITES(mp) || 3124 ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) { 3125 lock_flags = LK_SHARED; 3126 } else { 3127 lock_flags = LK_EXCLUSIVE; 3128 } 3129 vn_lock(vp, lock_flags | LK_RETRY); 3130 AUDIT_ARG_VNODE1(vp); 3131 if (vp->v_object != NULL) { 3132 VM_OBJECT_WLOCK(vp->v_object); 3133 vm_object_page_clean(vp->v_object, 0, 0, 0); 3134 VM_OBJECT_WUNLOCK(vp->v_object); 3135 } 3136 error = fullsync ? VOP_FSYNC(vp, MNT_WAIT, td) : VOP_FDATASYNC(vp, td); 3137 VOP_UNLOCK(vp, 0); 3138 vn_finished_write(mp); 3139 drop: 3140 fdrop(fp, td); 3141 return (error); 3142 } 3143 3144 /* 3145 * Sync an open file. 3146 */ 3147 #ifndef _SYS_SYSPROTO_H_ 3148 struct fsync_args { 3149 int fd; 3150 }; 3151 #endif 3152 int 3153 sys_fsync(struct thread *td, struct fsync_args *uap) 3154 { 3155 3156 return (kern_fsync(td, uap->fd, true)); 3157 } 3158 3159 int 3160 sys_fdatasync(struct thread *td, struct fdatasync_args *uap) 3161 { 3162 3163 return (kern_fsync(td, uap->fd, false)); 3164 } 3165 3166 /* 3167 * Rename files. Source and destination must either both be directories, or 3168 * both not be directories. If target is a directory, it must be empty. 3169 */ 3170 #ifndef _SYS_SYSPROTO_H_ 3171 struct rename_args { 3172 char *from; 3173 char *to; 3174 }; 3175 #endif 3176 int 3177 sys_rename(struct thread *td, struct rename_args *uap) 3178 { 3179 3180 return (kern_renameat(td, AT_FDCWD, uap->from, AT_FDCWD, 3181 uap->to, UIO_USERSPACE)); 3182 } 3183 3184 #ifndef _SYS_SYSPROTO_H_ 3185 struct renameat_args { 3186 int oldfd; 3187 char *old; 3188 int newfd; 3189 char *new; 3190 }; 3191 #endif 3192 int 3193 sys_renameat(struct thread *td, struct renameat_args *uap) 3194 { 3195 3196 return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new, 3197 UIO_USERSPACE)); 3198 } 3199 3200 int 3201 kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new, 3202 enum uio_seg pathseg) 3203 { 3204 struct mount *mp = NULL; 3205 struct vnode *tvp, *fvp, *tdvp; 3206 struct nameidata fromnd, tond; 3207 cap_rights_t rights; 3208 int error; 3209 3210 again: 3211 bwillwrite(); 3212 #ifdef MAC 3213 NDINIT_ATRIGHTS(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | 3214 AUDITVNODE1, pathseg, old, oldfd, 3215 cap_rights_init(&rights, CAP_RENAMEAT_SOURCE), td); 3216 #else 3217 NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | AUDITVNODE1, 3218 pathseg, old, oldfd, 3219 cap_rights_init(&rights, CAP_RENAMEAT_SOURCE), td); 3220 #endif 3221 3222 if ((error = namei(&fromnd)) != 0) 3223 return (error); 3224 #ifdef MAC 3225 error = mac_vnode_check_rename_from(td->td_ucred, fromnd.ni_dvp, 3226 fromnd.ni_vp, &fromnd.ni_cnd); 3227 VOP_UNLOCK(fromnd.ni_dvp, 0); 3228 if (fromnd.ni_dvp != fromnd.ni_vp) 3229 VOP_UNLOCK(fromnd.ni_vp, 0); 3230 #endif 3231 fvp = fromnd.ni_vp; 3232 NDINIT_ATRIGHTS(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | 3233 SAVESTART | AUDITVNODE2, pathseg, new, newfd, 3234 cap_rights_init(&rights, CAP_RENAMEAT_TARGET), td); 3235 if (fromnd.ni_vp->v_type == VDIR) 3236 tond.ni_cnd.cn_flags |= WILLBEDIR; 3237 if ((error = namei(&tond)) != 0) { 3238 /* Translate error code for rename("dir1", "dir2/."). */ 3239 if (error == EISDIR && fvp->v_type == VDIR) 3240 error = EINVAL; 3241 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3242 vrele(fromnd.ni_dvp); 3243 vrele(fvp); 3244 goto out1; 3245 } 3246 tdvp = tond.ni_dvp; 3247 tvp = tond.ni_vp; 3248 error = vn_start_write(fvp, &mp, V_NOWAIT); 3249 if (error != 0) { 3250 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3251 NDFREE(&tond, NDF_ONLY_PNBUF); 3252 if (tvp != NULL) 3253 vput(tvp); 3254 if (tdvp == tvp) 3255 vrele(tdvp); 3256 else 3257 vput(tdvp); 3258 vrele(fromnd.ni_dvp); 3259 vrele(fvp); 3260 vrele(tond.ni_startdir); 3261 if (fromnd.ni_startdir != NULL) 3262 vrele(fromnd.ni_startdir); 3263 error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH); 3264 if (error != 0) 3265 return (error); 3266 goto again; 3267 } 3268 if (tvp != NULL) { 3269 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3270 error = ENOTDIR; 3271 goto out; 3272 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3273 error = EISDIR; 3274 goto out; 3275 } 3276 #ifdef CAPABILITIES 3277 if (newfd != AT_FDCWD) { 3278 /* 3279 * If the target already exists we require CAP_UNLINKAT 3280 * from 'newfd'. 3281 */ 3282 error = cap_check(&tond.ni_filecaps.fc_rights, 3283 cap_rights_init(&rights, CAP_UNLINKAT)); 3284 if (error != 0) 3285 goto out; 3286 } 3287 #endif 3288 } 3289 if (fvp == tdvp) { 3290 error = EINVAL; 3291 goto out; 3292 } 3293 /* 3294 * If the source is the same as the destination (that is, if they 3295 * are links to the same vnode), then there is nothing to do. 3296 */ 3297 if (fvp == tvp) 3298 error = -1; 3299 #ifdef MAC 3300 else 3301 error = mac_vnode_check_rename_to(td->td_ucred, tdvp, 3302 tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd); 3303 #endif 3304 out: 3305 if (error == 0) { 3306 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3307 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); 3308 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3309 NDFREE(&tond, NDF_ONLY_PNBUF); 3310 } else { 3311 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3312 NDFREE(&tond, NDF_ONLY_PNBUF); 3313 if (tvp != NULL) 3314 vput(tvp); 3315 if (tdvp == tvp) 3316 vrele(tdvp); 3317 else 3318 vput(tdvp); 3319 vrele(fromnd.ni_dvp); 3320 vrele(fvp); 3321 } 3322 vrele(tond.ni_startdir); 3323 vn_finished_write(mp); 3324 out1: 3325 if (fromnd.ni_startdir) 3326 vrele(fromnd.ni_startdir); 3327 if (error == -1) 3328 return (0); 3329 return (error); 3330 } 3331 3332 /* 3333 * Make a directory file. 3334 */ 3335 #ifndef _SYS_SYSPROTO_H_ 3336 struct mkdir_args { 3337 char *path; 3338 int mode; 3339 }; 3340 #endif 3341 int 3342 sys_mkdir(struct thread *td, struct mkdir_args *uap) 3343 { 3344 3345 return (kern_mkdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3346 uap->mode)); 3347 } 3348 3349 #ifndef _SYS_SYSPROTO_H_ 3350 struct mkdirat_args { 3351 int fd; 3352 char *path; 3353 mode_t mode; 3354 }; 3355 #endif 3356 int 3357 sys_mkdirat(struct thread *td, struct mkdirat_args *uap) 3358 { 3359 3360 return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode)); 3361 } 3362 3363 int 3364 kern_mkdirat(struct thread *td, int fd, char *path, enum uio_seg segflg, 3365 int mode) 3366 { 3367 struct mount *mp; 3368 struct vnode *vp; 3369 struct vattr vattr; 3370 struct nameidata nd; 3371 cap_rights_t rights; 3372 int error; 3373 3374 AUDIT_ARG_MODE(mode); 3375 restart: 3376 bwillwrite(); 3377 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 3378 NOCACHE, segflg, path, fd, cap_rights_init(&rights, CAP_MKDIRAT), 3379 td); 3380 nd.ni_cnd.cn_flags |= WILLBEDIR; 3381 if ((error = namei(&nd)) != 0) 3382 return (error); 3383 vp = nd.ni_vp; 3384 if (vp != NULL) { 3385 NDFREE(&nd, NDF_ONLY_PNBUF); 3386 /* 3387 * XXX namei called with LOCKPARENT but not LOCKLEAF has 3388 * the strange behaviour of leaving the vnode unlocked 3389 * if the target is the same vnode as the parent. 3390 */ 3391 if (vp == nd.ni_dvp) 3392 vrele(nd.ni_dvp); 3393 else 3394 vput(nd.ni_dvp); 3395 vrele(vp); 3396 return (EEXIST); 3397 } 3398 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3399 NDFREE(&nd, NDF_ONLY_PNBUF); 3400 vput(nd.ni_dvp); 3401 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3402 return (error); 3403 goto restart; 3404 } 3405 VATTR_NULL(&vattr); 3406 vattr.va_type = VDIR; 3407 vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask; 3408 #ifdef MAC 3409 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 3410 &vattr); 3411 if (error != 0) 3412 goto out; 3413 #endif 3414 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 3415 #ifdef MAC 3416 out: 3417 #endif 3418 NDFREE(&nd, NDF_ONLY_PNBUF); 3419 vput(nd.ni_dvp); 3420 if (error == 0) 3421 vput(nd.ni_vp); 3422 vn_finished_write(mp); 3423 return (error); 3424 } 3425 3426 /* 3427 * Remove a directory file. 3428 */ 3429 #ifndef _SYS_SYSPROTO_H_ 3430 struct rmdir_args { 3431 char *path; 3432 }; 3433 #endif 3434 int 3435 sys_rmdir(struct thread *td, struct rmdir_args *uap) 3436 { 3437 3438 return (kern_rmdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE)); 3439 } 3440 3441 int 3442 kern_rmdirat(struct thread *td, int fd, char *path, enum uio_seg pathseg) 3443 { 3444 struct mount *mp; 3445 struct vnode *vp; 3446 struct nameidata nd; 3447 cap_rights_t rights; 3448 int error; 3449 3450 restart: 3451 bwillwrite(); 3452 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 3453 pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td); 3454 if ((error = namei(&nd)) != 0) 3455 return (error); 3456 vp = nd.ni_vp; 3457 if (vp->v_type != VDIR) { 3458 error = ENOTDIR; 3459 goto out; 3460 } 3461 /* 3462 * No rmdir "." please. 3463 */ 3464 if (nd.ni_dvp == vp) { 3465 error = EINVAL; 3466 goto out; 3467 } 3468 /* 3469 * The root of a mounted filesystem cannot be deleted. 3470 */ 3471 if (vp->v_vflag & VV_ROOT) { 3472 error = EBUSY; 3473 goto out; 3474 } 3475 #ifdef MAC 3476 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 3477 &nd.ni_cnd); 3478 if (error != 0) 3479 goto out; 3480 #endif 3481 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3482 NDFREE(&nd, NDF_ONLY_PNBUF); 3483 vput(vp); 3484 if (nd.ni_dvp == vp) 3485 vrele(nd.ni_dvp); 3486 else 3487 vput(nd.ni_dvp); 3488 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3489 return (error); 3490 goto restart; 3491 } 3492 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 3493 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 3494 vn_finished_write(mp); 3495 out: 3496 NDFREE(&nd, NDF_ONLY_PNBUF); 3497 vput(vp); 3498 if (nd.ni_dvp == vp) 3499 vrele(nd.ni_dvp); 3500 else 3501 vput(nd.ni_dvp); 3502 return (error); 3503 } 3504 3505 #ifdef COMPAT_43 3506 /* 3507 * Read a block of directory entries in a filesystem independent format. 3508 */ 3509 #ifndef _SYS_SYSPROTO_H_ 3510 struct ogetdirentries_args { 3511 int fd; 3512 char *buf; 3513 u_int count; 3514 long *basep; 3515 }; 3516 #endif 3517 int 3518 ogetdirentries(struct thread *td, struct ogetdirentries_args *uap) 3519 { 3520 long loff; 3521 int error; 3522 3523 error = kern_ogetdirentries(td, uap, &loff); 3524 if (error == 0) 3525 error = copyout(&loff, uap->basep, sizeof(long)); 3526 return (error); 3527 } 3528 3529 int 3530 kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap, 3531 long *ploff) 3532 { 3533 struct vnode *vp; 3534 struct file *fp; 3535 struct uio auio, kuio; 3536 struct iovec aiov, kiov; 3537 struct dirent *dp, *edp; 3538 cap_rights_t rights; 3539 caddr_t dirbuf; 3540 int error, eofflag, readcnt; 3541 long loff; 3542 off_t foffset; 3543 3544 /* XXX arbitrary sanity limit on `count'. */ 3545 if (uap->count > 64 * 1024) 3546 return (EINVAL); 3547 error = getvnode(td, uap->fd, cap_rights_init(&rights, CAP_READ), &fp); 3548 if (error != 0) 3549 return (error); 3550 if ((fp->f_flag & FREAD) == 0) { 3551 fdrop(fp, td); 3552 return (EBADF); 3553 } 3554 vp = fp->f_vnode; 3555 foffset = foffset_lock(fp, 0); 3556 unionread: 3557 if (vp->v_type != VDIR) { 3558 foffset_unlock(fp, foffset, 0); 3559 fdrop(fp, td); 3560 return (EINVAL); 3561 } 3562 aiov.iov_base = uap->buf; 3563 aiov.iov_len = uap->count; 3564 auio.uio_iov = &aiov; 3565 auio.uio_iovcnt = 1; 3566 auio.uio_rw = UIO_READ; 3567 auio.uio_segflg = UIO_USERSPACE; 3568 auio.uio_td = td; 3569 auio.uio_resid = uap->count; 3570 vn_lock(vp, LK_SHARED | LK_RETRY); 3571 loff = auio.uio_offset = foffset; 3572 #ifdef MAC 3573 error = mac_vnode_check_readdir(td->td_ucred, vp); 3574 if (error != 0) { 3575 VOP_UNLOCK(vp, 0); 3576 foffset_unlock(fp, foffset, FOF_NOUPDATE); 3577 fdrop(fp, td); 3578 return (error); 3579 } 3580 #endif 3581 # if (BYTE_ORDER != LITTLE_ENDIAN) 3582 if (vp->v_mount->mnt_maxsymlinklen <= 0) { 3583 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, 3584 NULL, NULL); 3585 foffset = auio.uio_offset; 3586 } else 3587 # endif 3588 { 3589 kuio = auio; 3590 kuio.uio_iov = &kiov; 3591 kuio.uio_segflg = UIO_SYSSPACE; 3592 kiov.iov_len = uap->count; 3593 dirbuf = malloc(uap->count, M_TEMP, M_WAITOK); 3594 kiov.iov_base = dirbuf; 3595 error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag, 3596 NULL, NULL); 3597 foffset = kuio.uio_offset; 3598 if (error == 0) { 3599 readcnt = uap->count - kuio.uio_resid; 3600 edp = (struct dirent *)&dirbuf[readcnt]; 3601 for (dp = (struct dirent *)dirbuf; dp < edp; ) { 3602 # if (BYTE_ORDER == LITTLE_ENDIAN) 3603 /* 3604 * The expected low byte of 3605 * dp->d_namlen is our dp->d_type. 3606 * The high MBZ byte of dp->d_namlen 3607 * is our dp->d_namlen. 3608 */ 3609 dp->d_type = dp->d_namlen; 3610 dp->d_namlen = 0; 3611 # else 3612 /* 3613 * The dp->d_type is the high byte 3614 * of the expected dp->d_namlen, 3615 * so must be zero'ed. 3616 */ 3617 dp->d_type = 0; 3618 # endif 3619 if (dp->d_reclen > 0) { 3620 dp = (struct dirent *) 3621 ((char *)dp + dp->d_reclen); 3622 } else { 3623 error = EIO; 3624 break; 3625 } 3626 } 3627 if (dp >= edp) 3628 error = uiomove(dirbuf, readcnt, &auio); 3629 } 3630 free(dirbuf, M_TEMP); 3631 } 3632 if (error != 0) { 3633 VOP_UNLOCK(vp, 0); 3634 foffset_unlock(fp, foffset, 0); 3635 fdrop(fp, td); 3636 return (error); 3637 } 3638 if (uap->count == auio.uio_resid && 3639 (vp->v_vflag & VV_ROOT) && 3640 (vp->v_mount->mnt_flag & MNT_UNION)) { 3641 struct vnode *tvp = vp; 3642 vp = vp->v_mount->mnt_vnodecovered; 3643 VREF(vp); 3644 fp->f_vnode = vp; 3645 fp->f_data = vp; 3646 foffset = 0; 3647 vput(tvp); 3648 goto unionread; 3649 } 3650 VOP_UNLOCK(vp, 0); 3651 foffset_unlock(fp, foffset, 0); 3652 fdrop(fp, td); 3653 td->td_retval[0] = uap->count - auio.uio_resid; 3654 if (error == 0) 3655 *ploff = loff; 3656 return (error); 3657 } 3658 #endif /* COMPAT_43 */ 3659 3660 /* 3661 * Read a block of directory entries in a filesystem independent format. 3662 */ 3663 #ifndef _SYS_SYSPROTO_H_ 3664 struct getdirentries_args { 3665 int fd; 3666 char *buf; 3667 u_int count; 3668 long *basep; 3669 }; 3670 #endif 3671 int 3672 sys_getdirentries(struct thread *td, struct getdirentries_args *uap) 3673 { 3674 long base; 3675 int error; 3676 3677 error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base, 3678 NULL, UIO_USERSPACE); 3679 if (error != 0) 3680 return (error); 3681 if (uap->basep != NULL) 3682 error = copyout(&base, uap->basep, sizeof(long)); 3683 return (error); 3684 } 3685 3686 int 3687 kern_getdirentries(struct thread *td, int fd, char *buf, u_int count, 3688 long *basep, ssize_t *residp, enum uio_seg bufseg) 3689 { 3690 struct vnode *vp; 3691 struct file *fp; 3692 struct uio auio; 3693 struct iovec aiov; 3694 cap_rights_t rights; 3695 long loff; 3696 int error, eofflag; 3697 off_t foffset; 3698 3699 AUDIT_ARG_FD(fd); 3700 if (count > IOSIZE_MAX) 3701 return (EINVAL); 3702 auio.uio_resid = count; 3703 error = getvnode(td, fd, cap_rights_init(&rights, CAP_READ), &fp); 3704 if (error != 0) 3705 return (error); 3706 if ((fp->f_flag & FREAD) == 0) { 3707 fdrop(fp, td); 3708 return (EBADF); 3709 } 3710 vp = fp->f_vnode; 3711 foffset = foffset_lock(fp, 0); 3712 unionread: 3713 if (vp->v_type != VDIR) { 3714 error = EINVAL; 3715 goto fail; 3716 } 3717 aiov.iov_base = buf; 3718 aiov.iov_len = count; 3719 auio.uio_iov = &aiov; 3720 auio.uio_iovcnt = 1; 3721 auio.uio_rw = UIO_READ; 3722 auio.uio_segflg = bufseg; 3723 auio.uio_td = td; 3724 vn_lock(vp, LK_SHARED | LK_RETRY); 3725 AUDIT_ARG_VNODE1(vp); 3726 loff = auio.uio_offset = foffset; 3727 #ifdef MAC 3728 error = mac_vnode_check_readdir(td->td_ucred, vp); 3729 if (error == 0) 3730 #endif 3731 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, 3732 NULL); 3733 foffset = auio.uio_offset; 3734 if (error != 0) { 3735 VOP_UNLOCK(vp, 0); 3736 goto fail; 3737 } 3738 if (count == auio.uio_resid && 3739 (vp->v_vflag & VV_ROOT) && 3740 (vp->v_mount->mnt_flag & MNT_UNION)) { 3741 struct vnode *tvp = vp; 3742 3743 vp = vp->v_mount->mnt_vnodecovered; 3744 VREF(vp); 3745 fp->f_vnode = vp; 3746 fp->f_data = vp; 3747 foffset = 0; 3748 vput(tvp); 3749 goto unionread; 3750 } 3751 VOP_UNLOCK(vp, 0); 3752 *basep = loff; 3753 if (residp != NULL) 3754 *residp = auio.uio_resid; 3755 td->td_retval[0] = count - auio.uio_resid; 3756 fail: 3757 foffset_unlock(fp, foffset, 0); 3758 fdrop(fp, td); 3759 return (error); 3760 } 3761 3762 #ifndef _SYS_SYSPROTO_H_ 3763 struct getdents_args { 3764 int fd; 3765 char *buf; 3766 size_t count; 3767 }; 3768 #endif 3769 int 3770 sys_getdents(struct thread *td, struct getdents_args *uap) 3771 { 3772 struct getdirentries_args ap; 3773 3774 ap.fd = uap->fd; 3775 ap.buf = uap->buf; 3776 ap.count = uap->count; 3777 ap.basep = NULL; 3778 return (sys_getdirentries(td, &ap)); 3779 } 3780 3781 /* 3782 * Set the mode mask for creation of filesystem nodes. 3783 */ 3784 #ifndef _SYS_SYSPROTO_H_ 3785 struct umask_args { 3786 int newmask; 3787 }; 3788 #endif 3789 int 3790 sys_umask(struct thread *td, struct umask_args *uap) 3791 { 3792 struct filedesc *fdp; 3793 3794 fdp = td->td_proc->p_fd; 3795 FILEDESC_XLOCK(fdp); 3796 td->td_retval[0] = fdp->fd_cmask; 3797 fdp->fd_cmask = uap->newmask & ALLPERMS; 3798 FILEDESC_XUNLOCK(fdp); 3799 return (0); 3800 } 3801 3802 /* 3803 * Void all references to file by ripping underlying filesystem away from 3804 * vnode. 3805 */ 3806 #ifndef _SYS_SYSPROTO_H_ 3807 struct revoke_args { 3808 char *path; 3809 }; 3810 #endif 3811 int 3812 sys_revoke(struct thread *td, struct revoke_args *uap) 3813 { 3814 struct vnode *vp; 3815 struct vattr vattr; 3816 struct nameidata nd; 3817 int error; 3818 3819 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 3820 uap->path, td); 3821 if ((error = namei(&nd)) != 0) 3822 return (error); 3823 vp = nd.ni_vp; 3824 NDFREE(&nd, NDF_ONLY_PNBUF); 3825 if (vp->v_type != VCHR || vp->v_rdev == NULL) { 3826 error = EINVAL; 3827 goto out; 3828 } 3829 #ifdef MAC 3830 error = mac_vnode_check_revoke(td->td_ucred, vp); 3831 if (error != 0) 3832 goto out; 3833 #endif 3834 error = VOP_GETATTR(vp, &vattr, td->td_ucred); 3835 if (error != 0) 3836 goto out; 3837 if (td->td_ucred->cr_uid != vattr.va_uid) { 3838 error = priv_check(td, PRIV_VFS_ADMIN); 3839 if (error != 0) 3840 goto out; 3841 } 3842 if (vcount(vp) > 1) 3843 VOP_REVOKE(vp, REVOKEALL); 3844 out: 3845 vput(vp); 3846 return (error); 3847 } 3848 3849 /* 3850 * Convert a user file descriptor to a kernel file entry and check that, if it 3851 * is a capability, the correct rights are present. A reference on the file 3852 * entry is held upon returning. 3853 */ 3854 int 3855 getvnode(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp) 3856 { 3857 struct file *fp; 3858 int error; 3859 3860 error = fget_unlocked(td->td_proc->p_fd, fd, rightsp, &fp, NULL); 3861 if (error != 0) 3862 return (error); 3863 3864 /* 3865 * The file could be not of the vnode type, or it may be not 3866 * yet fully initialized, in which case the f_vnode pointer 3867 * may be set, but f_ops is still badfileops. E.g., 3868 * devfs_open() transiently create such situation to 3869 * facilitate csw d_fdopen(). 3870 * 3871 * Dupfdopen() handling in kern_openat() installs the 3872 * half-baked file into the process descriptor table, allowing 3873 * other thread to dereference it. Guard against the race by 3874 * checking f_ops. 3875 */ 3876 if (fp->f_vnode == NULL || fp->f_ops == &badfileops) { 3877 fdrop(fp, td); 3878 return (EINVAL); 3879 } 3880 *fpp = fp; 3881 return (0); 3882 } 3883 3884 3885 /* 3886 * Get an (NFS) file handle. 3887 */ 3888 #ifndef _SYS_SYSPROTO_H_ 3889 struct lgetfh_args { 3890 char *fname; 3891 fhandle_t *fhp; 3892 }; 3893 #endif 3894 int 3895 sys_lgetfh(struct thread *td, struct lgetfh_args *uap) 3896 { 3897 struct nameidata nd; 3898 fhandle_t fh; 3899 struct vnode *vp; 3900 int error; 3901 3902 error = priv_check(td, PRIV_VFS_GETFH); 3903 if (error != 0) 3904 return (error); 3905 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 3906 uap->fname, td); 3907 error = namei(&nd); 3908 if (error != 0) 3909 return (error); 3910 NDFREE(&nd, NDF_ONLY_PNBUF); 3911 vp = nd.ni_vp; 3912 bzero(&fh, sizeof(fh)); 3913 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 3914 error = VOP_VPTOFH(vp, &fh.fh_fid); 3915 vput(vp); 3916 if (error == 0) 3917 error = copyout(&fh, uap->fhp, sizeof (fh)); 3918 return (error); 3919 } 3920 3921 #ifndef _SYS_SYSPROTO_H_ 3922 struct getfh_args { 3923 char *fname; 3924 fhandle_t *fhp; 3925 }; 3926 #endif 3927 int 3928 sys_getfh(struct thread *td, struct getfh_args *uap) 3929 { 3930 struct nameidata nd; 3931 fhandle_t fh; 3932 struct vnode *vp; 3933 int error; 3934 3935 error = priv_check(td, PRIV_VFS_GETFH); 3936 if (error != 0) 3937 return (error); 3938 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 3939 uap->fname, td); 3940 error = namei(&nd); 3941 if (error != 0) 3942 return (error); 3943 NDFREE(&nd, NDF_ONLY_PNBUF); 3944 vp = nd.ni_vp; 3945 bzero(&fh, sizeof(fh)); 3946 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 3947 error = VOP_VPTOFH(vp, &fh.fh_fid); 3948 vput(vp); 3949 if (error == 0) 3950 error = copyout(&fh, uap->fhp, sizeof (fh)); 3951 return (error); 3952 } 3953 3954 /* 3955 * syscall for the rpc.lockd to use to translate a NFS file handle into an 3956 * open descriptor. 3957 * 3958 * warning: do not remove the priv_check() call or this becomes one giant 3959 * security hole. 3960 */ 3961 #ifndef _SYS_SYSPROTO_H_ 3962 struct fhopen_args { 3963 const struct fhandle *u_fhp; 3964 int flags; 3965 }; 3966 #endif 3967 int 3968 sys_fhopen(struct thread *td, struct fhopen_args *uap) 3969 { 3970 struct mount *mp; 3971 struct vnode *vp; 3972 struct fhandle fhp; 3973 struct file *fp; 3974 int fmode, error; 3975 int indx; 3976 3977 error = priv_check(td, PRIV_VFS_FHOPEN); 3978 if (error != 0) 3979 return (error); 3980 indx = -1; 3981 fmode = FFLAGS(uap->flags); 3982 /* why not allow a non-read/write open for our lockd? */ 3983 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 3984 return (EINVAL); 3985 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 3986 if (error != 0) 3987 return(error); 3988 /* find the mount point */ 3989 mp = vfs_busyfs(&fhp.fh_fsid); 3990 if (mp == NULL) 3991 return (ESTALE); 3992 /* now give me my vnode, it gets returned to me locked */ 3993 error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp); 3994 vfs_unbusy(mp); 3995 if (error != 0) 3996 return (error); 3997 3998 error = falloc_noinstall(td, &fp); 3999 if (error != 0) { 4000 vput(vp); 4001 return (error); 4002 } 4003 /* 4004 * An extra reference on `fp' has been held for us by 4005 * falloc_noinstall(). 4006 */ 4007 4008 #ifdef INVARIANTS 4009 td->td_dupfd = -1; 4010 #endif 4011 error = vn_open_vnode(vp, fmode, td->td_ucred, td, fp); 4012 if (error != 0) { 4013 KASSERT(fp->f_ops == &badfileops, 4014 ("VOP_OPEN in fhopen() set f_ops")); 4015 KASSERT(td->td_dupfd < 0, 4016 ("fhopen() encountered fdopen()")); 4017 4018 vput(vp); 4019 goto bad; 4020 } 4021 #ifdef INVARIANTS 4022 td->td_dupfd = 0; 4023 #endif 4024 fp->f_vnode = vp; 4025 fp->f_seqcount = 1; 4026 finit(fp, (fmode & FMASK) | (fp->f_flag & FHASLOCK), DTYPE_VNODE, vp, 4027 &vnops); 4028 VOP_UNLOCK(vp, 0); 4029 if ((fmode & O_TRUNC) != 0) { 4030 error = fo_truncate(fp, 0, td->td_ucred, td); 4031 if (error != 0) 4032 goto bad; 4033 } 4034 4035 error = finstall(td, fp, &indx, fmode, NULL); 4036 bad: 4037 fdrop(fp, td); 4038 td->td_retval[0] = indx; 4039 return (error); 4040 } 4041 4042 /* 4043 * Stat an (NFS) file handle. 4044 */ 4045 #ifndef _SYS_SYSPROTO_H_ 4046 struct fhstat_args { 4047 struct fhandle *u_fhp; 4048 struct stat *sb; 4049 }; 4050 #endif 4051 int 4052 sys_fhstat(struct thread *td, struct fhstat_args *uap) 4053 { 4054 struct stat sb; 4055 struct fhandle fh; 4056 int error; 4057 4058 error = copyin(uap->u_fhp, &fh, sizeof(fh)); 4059 if (error != 0) 4060 return (error); 4061 error = kern_fhstat(td, fh, &sb); 4062 if (error == 0) 4063 error = copyout(&sb, uap->sb, sizeof(sb)); 4064 return (error); 4065 } 4066 4067 int 4068 kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb) 4069 { 4070 struct mount *mp; 4071 struct vnode *vp; 4072 int error; 4073 4074 error = priv_check(td, PRIV_VFS_FHSTAT); 4075 if (error != 0) 4076 return (error); 4077 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4078 return (ESTALE); 4079 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4080 vfs_unbusy(mp); 4081 if (error != 0) 4082 return (error); 4083 error = vn_stat(vp, sb, td->td_ucred, NOCRED, td); 4084 vput(vp); 4085 return (error); 4086 } 4087 4088 /* 4089 * Implement fstatfs() for (NFS) file handles. 4090 */ 4091 #ifndef _SYS_SYSPROTO_H_ 4092 struct fhstatfs_args { 4093 struct fhandle *u_fhp; 4094 struct statfs *buf; 4095 }; 4096 #endif 4097 int 4098 sys_fhstatfs(struct thread *td, struct fhstatfs_args *uap) 4099 { 4100 struct statfs *sfp; 4101 fhandle_t fh; 4102 int error; 4103 4104 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4105 if (error != 0) 4106 return (error); 4107 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 4108 error = kern_fhstatfs(td, fh, sfp); 4109 if (error == 0) 4110 error = copyout(sfp, uap->buf, sizeof(*sfp)); 4111 free(sfp, M_STATFS); 4112 return (error); 4113 } 4114 4115 int 4116 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf) 4117 { 4118 struct statfs *sp; 4119 struct mount *mp; 4120 struct vnode *vp; 4121 int error; 4122 4123 error = priv_check(td, PRIV_VFS_FHSTATFS); 4124 if (error != 0) 4125 return (error); 4126 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4127 return (ESTALE); 4128 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4129 if (error != 0) { 4130 vfs_unbusy(mp); 4131 return (error); 4132 } 4133 vput(vp); 4134 error = prison_canseemount(td->td_ucred, mp); 4135 if (error != 0) 4136 goto out; 4137 #ifdef MAC 4138 error = mac_mount_check_stat(td->td_ucred, mp); 4139 if (error != 0) 4140 goto out; 4141 #endif 4142 /* 4143 * Set these in case the underlying filesystem fails to do so. 4144 */ 4145 sp = &mp->mnt_stat; 4146 sp->f_version = STATFS_VERSION; 4147 sp->f_namemax = NAME_MAX; 4148 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 4149 error = VFS_STATFS(mp, sp); 4150 if (error == 0) 4151 *buf = *sp; 4152 out: 4153 vfs_unbusy(mp); 4154 return (error); 4155 } 4156 4157 int 4158 kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len) 4159 { 4160 struct file *fp; 4161 struct mount *mp; 4162 struct vnode *vp; 4163 cap_rights_t rights; 4164 off_t olen, ooffset; 4165 int error; 4166 #ifdef AUDIT 4167 int audited_vnode1 = 0; 4168 #endif 4169 4170 AUDIT_ARG_FD(fd); 4171 if (offset < 0 || len <= 0) 4172 return (EINVAL); 4173 /* Check for wrap. */ 4174 if (offset > OFF_MAX - len) 4175 return (EFBIG); 4176 AUDIT_ARG_FD(fd); 4177 error = fget(td, fd, cap_rights_init(&rights, CAP_WRITE), &fp); 4178 if (error != 0) 4179 return (error); 4180 AUDIT_ARG_FILE(td->td_proc, fp); 4181 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { 4182 error = ESPIPE; 4183 goto out; 4184 } 4185 if ((fp->f_flag & FWRITE) == 0) { 4186 error = EBADF; 4187 goto out; 4188 } 4189 if (fp->f_type != DTYPE_VNODE) { 4190 error = ENODEV; 4191 goto out; 4192 } 4193 vp = fp->f_vnode; 4194 if (vp->v_type != VREG) { 4195 error = ENODEV; 4196 goto out; 4197 } 4198 4199 /* Allocating blocks may take a long time, so iterate. */ 4200 for (;;) { 4201 olen = len; 4202 ooffset = offset; 4203 4204 bwillwrite(); 4205 mp = NULL; 4206 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 4207 if (error != 0) 4208 break; 4209 error = vn_lock(vp, LK_EXCLUSIVE); 4210 if (error != 0) { 4211 vn_finished_write(mp); 4212 break; 4213 } 4214 #ifdef AUDIT 4215 if (!audited_vnode1) { 4216 AUDIT_ARG_VNODE1(vp); 4217 audited_vnode1 = 1; 4218 } 4219 #endif 4220 #ifdef MAC 4221 error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp); 4222 if (error == 0) 4223 #endif 4224 error = VOP_ALLOCATE(vp, &offset, &len); 4225 VOP_UNLOCK(vp, 0); 4226 vn_finished_write(mp); 4227 4228 if (olen + ooffset != offset + len) { 4229 panic("offset + len changed from %jx/%jx to %jx/%jx", 4230 ooffset, olen, offset, len); 4231 } 4232 if (error != 0 || len == 0) 4233 break; 4234 KASSERT(olen > len, ("Iteration did not make progress?")); 4235 maybe_yield(); 4236 } 4237 out: 4238 fdrop(fp, td); 4239 return (error); 4240 } 4241 4242 int 4243 sys_posix_fallocate(struct thread *td, struct posix_fallocate_args *uap) 4244 { 4245 int error; 4246 4247 error = kern_posix_fallocate(td, uap->fd, uap->offset, uap->len); 4248 return (kern_posix_error(td, error)); 4249 } 4250 4251 /* 4252 * Unlike madvise(2), we do not make a best effort to remember every 4253 * possible caching hint. Instead, we remember the last setting with 4254 * the exception that we will allow POSIX_FADV_NORMAL to adjust the 4255 * region of any current setting. 4256 */ 4257 int 4258 kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len, 4259 int advice) 4260 { 4261 struct fadvise_info *fa, *new; 4262 struct file *fp; 4263 struct vnode *vp; 4264 cap_rights_t rights; 4265 off_t end; 4266 int error; 4267 4268 if (offset < 0 || len < 0 || offset > OFF_MAX - len) 4269 return (EINVAL); 4270 AUDIT_ARG_VALUE(advice); 4271 switch (advice) { 4272 case POSIX_FADV_SEQUENTIAL: 4273 case POSIX_FADV_RANDOM: 4274 case POSIX_FADV_NOREUSE: 4275 new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK); 4276 break; 4277 case POSIX_FADV_NORMAL: 4278 case POSIX_FADV_WILLNEED: 4279 case POSIX_FADV_DONTNEED: 4280 new = NULL; 4281 break; 4282 default: 4283 return (EINVAL); 4284 } 4285 /* XXX: CAP_POSIX_FADVISE? */ 4286 AUDIT_ARG_FD(fd); 4287 error = fget(td, fd, cap_rights_init(&rights), &fp); 4288 if (error != 0) 4289 goto out; 4290 AUDIT_ARG_FILE(td->td_proc, fp); 4291 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { 4292 error = ESPIPE; 4293 goto out; 4294 } 4295 if (fp->f_type != DTYPE_VNODE) { 4296 error = ENODEV; 4297 goto out; 4298 } 4299 vp = fp->f_vnode; 4300 if (vp->v_type != VREG) { 4301 error = ENODEV; 4302 goto out; 4303 } 4304 if (len == 0) 4305 end = OFF_MAX; 4306 else 4307 end = offset + len - 1; 4308 switch (advice) { 4309 case POSIX_FADV_SEQUENTIAL: 4310 case POSIX_FADV_RANDOM: 4311 case POSIX_FADV_NOREUSE: 4312 /* 4313 * Try to merge any existing non-standard region with 4314 * this new region if possible, otherwise create a new 4315 * non-standard region for this request. 4316 */ 4317 mtx_pool_lock(mtxpool_sleep, fp); 4318 fa = fp->f_advice; 4319 if (fa != NULL && fa->fa_advice == advice && 4320 ((fa->fa_start <= end && fa->fa_end >= offset) || 4321 (end != OFF_MAX && fa->fa_start == end + 1) || 4322 (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) { 4323 if (offset < fa->fa_start) 4324 fa->fa_start = offset; 4325 if (end > fa->fa_end) 4326 fa->fa_end = end; 4327 } else { 4328 new->fa_advice = advice; 4329 new->fa_start = offset; 4330 new->fa_end = end; 4331 fp->f_advice = new; 4332 new = fa; 4333 } 4334 mtx_pool_unlock(mtxpool_sleep, fp); 4335 break; 4336 case POSIX_FADV_NORMAL: 4337 /* 4338 * If a the "normal" region overlaps with an existing 4339 * non-standard region, trim or remove the 4340 * non-standard region. 4341 */ 4342 mtx_pool_lock(mtxpool_sleep, fp); 4343 fa = fp->f_advice; 4344 if (fa != NULL) { 4345 if (offset <= fa->fa_start && end >= fa->fa_end) { 4346 new = fa; 4347 fp->f_advice = NULL; 4348 } else if (offset <= fa->fa_start && 4349 end >= fa->fa_start) 4350 fa->fa_start = end + 1; 4351 else if (offset <= fa->fa_end && end >= fa->fa_end) 4352 fa->fa_end = offset - 1; 4353 else if (offset >= fa->fa_start && end <= fa->fa_end) { 4354 /* 4355 * If the "normal" region is a middle 4356 * portion of the existing 4357 * non-standard region, just remove 4358 * the whole thing rather than picking 4359 * one side or the other to 4360 * preserve. 4361 */ 4362 new = fa; 4363 fp->f_advice = NULL; 4364 } 4365 } 4366 mtx_pool_unlock(mtxpool_sleep, fp); 4367 break; 4368 case POSIX_FADV_WILLNEED: 4369 case POSIX_FADV_DONTNEED: 4370 error = VOP_ADVISE(vp, offset, end, advice); 4371 break; 4372 } 4373 out: 4374 if (fp != NULL) 4375 fdrop(fp, td); 4376 free(new, M_FADVISE); 4377 return (error); 4378 } 4379 4380 int 4381 sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap) 4382 { 4383 int error; 4384 4385 error = kern_posix_fadvise(td, uap->fd, uap->offset, uap->len, 4386 uap->advice); 4387 return (kern_posix_error(td, error)); 4388 } 4389