1 /*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include "opt_capsicum.h" 41 #include "opt_compat.h" 42 #include "opt_ktrace.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/bio.h> 47 #include <sys/buf.h> 48 #include <sys/capsicum.h> 49 #include <sys/disk.h> 50 #include <sys/sysent.h> 51 #include <sys/malloc.h> 52 #include <sys/mount.h> 53 #include <sys/mutex.h> 54 #include <sys/sysproto.h> 55 #include <sys/namei.h> 56 #include <sys/filedesc.h> 57 #include <sys/kernel.h> 58 #include <sys/fcntl.h> 59 #include <sys/file.h> 60 #include <sys/filio.h> 61 #include <sys/limits.h> 62 #include <sys/linker.h> 63 #include <sys/rwlock.h> 64 #include <sys/sdt.h> 65 #include <sys/stat.h> 66 #include <sys/sx.h> 67 #include <sys/unistd.h> 68 #include <sys/vnode.h> 69 #include <sys/priv.h> 70 #include <sys/proc.h> 71 #include <sys/dirent.h> 72 #include <sys/jail.h> 73 #include <sys/syscallsubr.h> 74 #include <sys/sysctl.h> 75 #ifdef KTRACE 76 #include <sys/ktrace.h> 77 #endif 78 79 #include <machine/stdarg.h> 80 81 #include <security/audit/audit.h> 82 #include <security/mac/mac_framework.h> 83 84 #include <vm/vm.h> 85 #include <vm/vm_object.h> 86 #include <vm/vm_page.h> 87 #include <vm/uma.h> 88 89 #include <ufs/ufs/quota.h> 90 91 MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information"); 92 93 SDT_PROVIDER_DEFINE(vfs); 94 SDT_PROBE_DEFINE2(vfs, , stat, mode, "char *", "int"); 95 SDT_PROBE_DEFINE2(vfs, , stat, reg, "char *", "int"); 96 97 static int chroot_refuse_vdir_fds(struct filedesc *fdp); 98 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *); 99 static int kern_chflags(struct thread *td, const char *path, 100 enum uio_seg pathseg, u_long flags); 101 static int kern_chflagsat(struct thread *td, int fd, const char *path, 102 enum uio_seg pathseg, u_long flags, int atflag); 103 static int setfflags(struct thread *td, struct vnode *, u_long); 104 static int setutimes(struct thread *td, struct vnode *, 105 const struct timespec *, int, int); 106 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 107 struct thread *td); 108 109 /* 110 * The module initialization routine for POSIX asynchronous I/O will 111 * set this to the version of AIO that it implements. (Zero means 112 * that it is not implemented.) This value is used here by pathconf() 113 * and in kern_descrip.c by fpathconf(). 114 */ 115 int async_io_version; 116 117 #ifdef DEBUG 118 static int syncprt = 0; 119 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, ""); 120 #endif 121 122 /* 123 * Sync each mounted filesystem. 124 */ 125 #ifndef _SYS_SYSPROTO_H_ 126 struct sync_args { 127 int dummy; 128 }; 129 #endif 130 /* ARGSUSED */ 131 int 132 sys_sync(td, uap) 133 struct thread *td; 134 struct sync_args *uap; 135 { 136 struct mount *mp, *nmp; 137 int save; 138 139 mtx_lock(&mountlist_mtx); 140 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 141 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 142 nmp = TAILQ_NEXT(mp, mnt_list); 143 continue; 144 } 145 if ((mp->mnt_flag & MNT_RDONLY) == 0 && 146 vn_start_write(NULL, &mp, V_NOWAIT) == 0) { 147 save = curthread_pflags_set(TDP_SYNCIO); 148 vfs_msync(mp, MNT_NOWAIT); 149 VFS_SYNC(mp, MNT_NOWAIT); 150 curthread_pflags_restore(save); 151 vn_finished_write(mp); 152 } 153 mtx_lock(&mountlist_mtx); 154 nmp = TAILQ_NEXT(mp, mnt_list); 155 vfs_unbusy(mp); 156 } 157 mtx_unlock(&mountlist_mtx); 158 return (0); 159 } 160 161 /* 162 * Change filesystem quotas. 163 */ 164 #ifndef _SYS_SYSPROTO_H_ 165 struct quotactl_args { 166 char *path; 167 int cmd; 168 int uid; 169 caddr_t arg; 170 }; 171 #endif 172 int 173 sys_quotactl(td, uap) 174 struct thread *td; 175 register struct quotactl_args /* { 176 char *path; 177 int cmd; 178 int uid; 179 caddr_t arg; 180 } */ *uap; 181 { 182 struct mount *mp; 183 struct nameidata nd; 184 int error; 185 186 AUDIT_ARG_CMD(uap->cmd); 187 AUDIT_ARG_UID(uap->uid); 188 if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS)) 189 return (EPERM); 190 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 191 uap->path, td); 192 if ((error = namei(&nd)) != 0) 193 return (error); 194 NDFREE(&nd, NDF_ONLY_PNBUF); 195 mp = nd.ni_vp->v_mount; 196 vfs_ref(mp); 197 vput(nd.ni_vp); 198 error = vfs_busy(mp, 0); 199 vfs_rel(mp); 200 if (error != 0) 201 return (error); 202 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg); 203 204 /* 205 * Since quota on operation typically needs to open quota 206 * file, the Q_QUOTAON handler needs to unbusy the mount point 207 * before calling into namei. Otherwise, unmount might be 208 * started between two vfs_busy() invocations (first is our, 209 * second is from mount point cross-walk code in lookup()), 210 * causing deadlock. 211 * 212 * Require that Q_QUOTAON handles the vfs_busy() reference on 213 * its own, always returning with ubusied mount point. 214 */ 215 if ((uap->cmd >> SUBCMDSHIFT) != Q_QUOTAON) 216 vfs_unbusy(mp); 217 return (error); 218 } 219 220 /* 221 * Used by statfs conversion routines to scale the block size up if 222 * necessary so that all of the block counts are <= 'max_size'. Note 223 * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero 224 * value of 'n'. 225 */ 226 void 227 statfs_scale_blocks(struct statfs *sf, long max_size) 228 { 229 uint64_t count; 230 int shift; 231 232 KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__)); 233 234 /* 235 * Attempt to scale the block counts to give a more accurate 236 * overview to userland of the ratio of free space to used 237 * space. To do this, find the largest block count and compute 238 * a divisor that lets it fit into a signed integer <= max_size. 239 */ 240 if (sf->f_bavail < 0) 241 count = -sf->f_bavail; 242 else 243 count = sf->f_bavail; 244 count = MAX(sf->f_blocks, MAX(sf->f_bfree, count)); 245 if (count <= max_size) 246 return; 247 248 count >>= flsl(max_size); 249 shift = 0; 250 while (count > 0) { 251 shift++; 252 count >>=1; 253 } 254 255 sf->f_bsize <<= shift; 256 sf->f_blocks >>= shift; 257 sf->f_bfree >>= shift; 258 sf->f_bavail >>= shift; 259 } 260 261 /* 262 * Get filesystem statistics. 263 */ 264 #ifndef _SYS_SYSPROTO_H_ 265 struct statfs_args { 266 char *path; 267 struct statfs *buf; 268 }; 269 #endif 270 int 271 sys_statfs(td, uap) 272 struct thread *td; 273 register struct statfs_args /* { 274 char *path; 275 struct statfs *buf; 276 } */ *uap; 277 { 278 struct statfs sf; 279 int error; 280 281 error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf); 282 if (error == 0) 283 error = copyout(&sf, uap->buf, sizeof(sf)); 284 return (error); 285 } 286 287 int 288 kern_statfs(struct thread *td, char *path, enum uio_seg pathseg, 289 struct statfs *buf) 290 { 291 struct mount *mp; 292 struct statfs *sp, sb; 293 struct nameidata nd; 294 int error; 295 296 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 297 pathseg, path, td); 298 error = namei(&nd); 299 if (error != 0) 300 return (error); 301 mp = nd.ni_vp->v_mount; 302 vfs_ref(mp); 303 NDFREE(&nd, NDF_ONLY_PNBUF); 304 vput(nd.ni_vp); 305 error = vfs_busy(mp, 0); 306 vfs_rel(mp); 307 if (error != 0) 308 return (error); 309 #ifdef MAC 310 error = mac_mount_check_stat(td->td_ucred, mp); 311 if (error != 0) 312 goto out; 313 #endif 314 /* 315 * Set these in case the underlying filesystem fails to do so. 316 */ 317 sp = &mp->mnt_stat; 318 sp->f_version = STATFS_VERSION; 319 sp->f_namemax = NAME_MAX; 320 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 321 error = VFS_STATFS(mp, sp); 322 if (error != 0) 323 goto out; 324 if (priv_check(td, PRIV_VFS_GENERATION)) { 325 bcopy(sp, &sb, sizeof(sb)); 326 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 327 prison_enforce_statfs(td->td_ucred, mp, &sb); 328 sp = &sb; 329 } 330 *buf = *sp; 331 out: 332 vfs_unbusy(mp); 333 return (error); 334 } 335 336 /* 337 * Get filesystem statistics. 338 */ 339 #ifndef _SYS_SYSPROTO_H_ 340 struct fstatfs_args { 341 int fd; 342 struct statfs *buf; 343 }; 344 #endif 345 int 346 sys_fstatfs(td, uap) 347 struct thread *td; 348 register struct fstatfs_args /* { 349 int fd; 350 struct statfs *buf; 351 } */ *uap; 352 { 353 struct statfs sf; 354 int error; 355 356 error = kern_fstatfs(td, uap->fd, &sf); 357 if (error == 0) 358 error = copyout(&sf, uap->buf, sizeof(sf)); 359 return (error); 360 } 361 362 int 363 kern_fstatfs(struct thread *td, int fd, struct statfs *buf) 364 { 365 struct file *fp; 366 struct mount *mp; 367 struct statfs *sp, sb; 368 struct vnode *vp; 369 cap_rights_t rights; 370 int error; 371 372 AUDIT_ARG_FD(fd); 373 error = getvnode(td->td_proc->p_fd, fd, 374 cap_rights_init(&rights, CAP_FSTATFS), &fp); 375 if (error != 0) 376 return (error); 377 vp = fp->f_vnode; 378 vn_lock(vp, LK_SHARED | LK_RETRY); 379 #ifdef AUDIT 380 AUDIT_ARG_VNODE1(vp); 381 #endif 382 mp = vp->v_mount; 383 if (mp) 384 vfs_ref(mp); 385 VOP_UNLOCK(vp, 0); 386 fdrop(fp, td); 387 if (mp == NULL) { 388 error = EBADF; 389 goto out; 390 } 391 error = vfs_busy(mp, 0); 392 vfs_rel(mp); 393 if (error != 0) 394 return (error); 395 #ifdef MAC 396 error = mac_mount_check_stat(td->td_ucred, mp); 397 if (error != 0) 398 goto out; 399 #endif 400 /* 401 * Set these in case the underlying filesystem fails to do so. 402 */ 403 sp = &mp->mnt_stat; 404 sp->f_version = STATFS_VERSION; 405 sp->f_namemax = NAME_MAX; 406 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 407 error = VFS_STATFS(mp, sp); 408 if (error != 0) 409 goto out; 410 if (priv_check(td, PRIV_VFS_GENERATION)) { 411 bcopy(sp, &sb, sizeof(sb)); 412 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 413 prison_enforce_statfs(td->td_ucred, mp, &sb); 414 sp = &sb; 415 } 416 *buf = *sp; 417 out: 418 if (mp) 419 vfs_unbusy(mp); 420 return (error); 421 } 422 423 /* 424 * Get statistics on all filesystems. 425 */ 426 #ifndef _SYS_SYSPROTO_H_ 427 struct getfsstat_args { 428 struct statfs *buf; 429 long bufsize; 430 int flags; 431 }; 432 #endif 433 int 434 sys_getfsstat(td, uap) 435 struct thread *td; 436 register struct getfsstat_args /* { 437 struct statfs *buf; 438 long bufsize; 439 int flags; 440 } */ *uap; 441 { 442 443 return (kern_getfsstat(td, &uap->buf, uap->bufsize, UIO_USERSPACE, 444 uap->flags)); 445 } 446 447 /* 448 * If (bufsize > 0 && bufseg == UIO_SYSSPACE) 449 * The caller is responsible for freeing memory which will be allocated 450 * in '*buf'. 451 */ 452 int 453 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize, 454 enum uio_seg bufseg, int flags) 455 { 456 struct mount *mp, *nmp; 457 struct statfs *sfsp, *sp, sb; 458 size_t count, maxcount; 459 int error; 460 461 maxcount = bufsize / sizeof(struct statfs); 462 if (bufsize == 0) 463 sfsp = NULL; 464 else if (bufseg == UIO_USERSPACE) 465 sfsp = *buf; 466 else /* if (bufseg == UIO_SYSSPACE) */ { 467 count = 0; 468 mtx_lock(&mountlist_mtx); 469 TAILQ_FOREACH(mp, &mountlist, mnt_list) { 470 count++; 471 } 472 mtx_unlock(&mountlist_mtx); 473 if (maxcount > count) 474 maxcount = count; 475 sfsp = *buf = malloc(maxcount * sizeof(struct statfs), M_TEMP, 476 M_WAITOK); 477 } 478 count = 0; 479 mtx_lock(&mountlist_mtx); 480 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 481 if (prison_canseemount(td->td_ucred, mp) != 0) { 482 nmp = TAILQ_NEXT(mp, mnt_list); 483 continue; 484 } 485 #ifdef MAC 486 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 487 nmp = TAILQ_NEXT(mp, mnt_list); 488 continue; 489 } 490 #endif 491 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 492 nmp = TAILQ_NEXT(mp, mnt_list); 493 continue; 494 } 495 if (sfsp && count < maxcount) { 496 sp = &mp->mnt_stat; 497 /* 498 * Set these in case the underlying filesystem 499 * fails to do so. 500 */ 501 sp->f_version = STATFS_VERSION; 502 sp->f_namemax = NAME_MAX; 503 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 504 /* 505 * If MNT_NOWAIT or MNT_LAZY is specified, do not 506 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 507 * overrides MNT_WAIT. 508 */ 509 if (((flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 510 (flags & MNT_WAIT)) && 511 (error = VFS_STATFS(mp, sp))) { 512 mtx_lock(&mountlist_mtx); 513 nmp = TAILQ_NEXT(mp, mnt_list); 514 vfs_unbusy(mp); 515 continue; 516 } 517 if (priv_check(td, PRIV_VFS_GENERATION)) { 518 bcopy(sp, &sb, sizeof(sb)); 519 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 520 prison_enforce_statfs(td->td_ucred, mp, &sb); 521 sp = &sb; 522 } 523 if (bufseg == UIO_SYSSPACE) 524 bcopy(sp, sfsp, sizeof(*sp)); 525 else /* if (bufseg == UIO_USERSPACE) */ { 526 error = copyout(sp, sfsp, sizeof(*sp)); 527 if (error != 0) { 528 vfs_unbusy(mp); 529 return (error); 530 } 531 } 532 sfsp++; 533 } 534 count++; 535 mtx_lock(&mountlist_mtx); 536 nmp = TAILQ_NEXT(mp, mnt_list); 537 vfs_unbusy(mp); 538 } 539 mtx_unlock(&mountlist_mtx); 540 if (sfsp && count > maxcount) 541 td->td_retval[0] = maxcount; 542 else 543 td->td_retval[0] = count; 544 return (0); 545 } 546 547 #ifdef COMPAT_FREEBSD4 548 /* 549 * Get old format filesystem statistics. 550 */ 551 static void cvtstatfs(struct statfs *, struct ostatfs *); 552 553 #ifndef _SYS_SYSPROTO_H_ 554 struct freebsd4_statfs_args { 555 char *path; 556 struct ostatfs *buf; 557 }; 558 #endif 559 int 560 freebsd4_statfs(td, uap) 561 struct thread *td; 562 struct freebsd4_statfs_args /* { 563 char *path; 564 struct ostatfs *buf; 565 } */ *uap; 566 { 567 struct ostatfs osb; 568 struct statfs sf; 569 int error; 570 571 error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf); 572 if (error != 0) 573 return (error); 574 cvtstatfs(&sf, &osb); 575 return (copyout(&osb, uap->buf, sizeof(osb))); 576 } 577 578 /* 579 * Get filesystem statistics. 580 */ 581 #ifndef _SYS_SYSPROTO_H_ 582 struct freebsd4_fstatfs_args { 583 int fd; 584 struct ostatfs *buf; 585 }; 586 #endif 587 int 588 freebsd4_fstatfs(td, uap) 589 struct thread *td; 590 struct freebsd4_fstatfs_args /* { 591 int fd; 592 struct ostatfs *buf; 593 } */ *uap; 594 { 595 struct ostatfs osb; 596 struct statfs sf; 597 int error; 598 599 error = kern_fstatfs(td, uap->fd, &sf); 600 if (error != 0) 601 return (error); 602 cvtstatfs(&sf, &osb); 603 return (copyout(&osb, uap->buf, sizeof(osb))); 604 } 605 606 /* 607 * Get statistics on all filesystems. 608 */ 609 #ifndef _SYS_SYSPROTO_H_ 610 struct freebsd4_getfsstat_args { 611 struct ostatfs *buf; 612 long bufsize; 613 int flags; 614 }; 615 #endif 616 int 617 freebsd4_getfsstat(td, uap) 618 struct thread *td; 619 register struct freebsd4_getfsstat_args /* { 620 struct ostatfs *buf; 621 long bufsize; 622 int flags; 623 } */ *uap; 624 { 625 struct statfs *buf, *sp; 626 struct ostatfs osb; 627 size_t count, size; 628 int error; 629 630 count = uap->bufsize / sizeof(struct ostatfs); 631 size = count * sizeof(struct statfs); 632 error = kern_getfsstat(td, &buf, size, UIO_SYSSPACE, uap->flags); 633 if (size > 0) { 634 count = td->td_retval[0]; 635 sp = buf; 636 while (count > 0 && error == 0) { 637 cvtstatfs(sp, &osb); 638 error = copyout(&osb, uap->buf, sizeof(osb)); 639 sp++; 640 uap->buf++; 641 count--; 642 } 643 free(buf, M_TEMP); 644 } 645 return (error); 646 } 647 648 /* 649 * Implement fstatfs() for (NFS) file handles. 650 */ 651 #ifndef _SYS_SYSPROTO_H_ 652 struct freebsd4_fhstatfs_args { 653 struct fhandle *u_fhp; 654 struct ostatfs *buf; 655 }; 656 #endif 657 int 658 freebsd4_fhstatfs(td, uap) 659 struct thread *td; 660 struct freebsd4_fhstatfs_args /* { 661 struct fhandle *u_fhp; 662 struct ostatfs *buf; 663 } */ *uap; 664 { 665 struct ostatfs osb; 666 struct statfs sf; 667 fhandle_t fh; 668 int error; 669 670 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 671 if (error != 0) 672 return (error); 673 error = kern_fhstatfs(td, fh, &sf); 674 if (error != 0) 675 return (error); 676 cvtstatfs(&sf, &osb); 677 return (copyout(&osb, uap->buf, sizeof(osb))); 678 } 679 680 /* 681 * Convert a new format statfs structure to an old format statfs structure. 682 */ 683 static void 684 cvtstatfs(nsp, osp) 685 struct statfs *nsp; 686 struct ostatfs *osp; 687 { 688 689 statfs_scale_blocks(nsp, LONG_MAX); 690 bzero(osp, sizeof(*osp)); 691 osp->f_bsize = nsp->f_bsize; 692 osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX); 693 osp->f_blocks = nsp->f_blocks; 694 osp->f_bfree = nsp->f_bfree; 695 osp->f_bavail = nsp->f_bavail; 696 osp->f_files = MIN(nsp->f_files, LONG_MAX); 697 osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX); 698 osp->f_owner = nsp->f_owner; 699 osp->f_type = nsp->f_type; 700 osp->f_flags = nsp->f_flags; 701 osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX); 702 osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX); 703 osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX); 704 osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX); 705 strlcpy(osp->f_fstypename, nsp->f_fstypename, 706 MIN(MFSNAMELEN, OMFSNAMELEN)); 707 strlcpy(osp->f_mntonname, nsp->f_mntonname, 708 MIN(MNAMELEN, OMNAMELEN)); 709 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 710 MIN(MNAMELEN, OMNAMELEN)); 711 osp->f_fsid = nsp->f_fsid; 712 } 713 #endif /* COMPAT_FREEBSD4 */ 714 715 /* 716 * Change current working directory to a given file descriptor. 717 */ 718 #ifndef _SYS_SYSPROTO_H_ 719 struct fchdir_args { 720 int fd; 721 }; 722 #endif 723 int 724 sys_fchdir(td, uap) 725 struct thread *td; 726 struct fchdir_args /* { 727 int fd; 728 } */ *uap; 729 { 730 register struct filedesc *fdp = td->td_proc->p_fd; 731 struct vnode *vp, *tdp, *vpold; 732 struct mount *mp; 733 struct file *fp; 734 cap_rights_t rights; 735 int error; 736 737 AUDIT_ARG_FD(uap->fd); 738 error = getvnode(fdp, uap->fd, cap_rights_init(&rights, CAP_FCHDIR), 739 &fp); 740 if (error != 0) 741 return (error); 742 vp = fp->f_vnode; 743 VREF(vp); 744 fdrop(fp, td); 745 vn_lock(vp, LK_SHARED | LK_RETRY); 746 AUDIT_ARG_VNODE1(vp); 747 error = change_dir(vp, td); 748 while (!error && (mp = vp->v_mountedhere) != NULL) { 749 if (vfs_busy(mp, 0)) 750 continue; 751 error = VFS_ROOT(mp, LK_SHARED, &tdp); 752 vfs_unbusy(mp); 753 if (error != 0) 754 break; 755 vput(vp); 756 vp = tdp; 757 } 758 if (error != 0) { 759 vput(vp); 760 return (error); 761 } 762 VOP_UNLOCK(vp, 0); 763 FILEDESC_XLOCK(fdp); 764 vpold = fdp->fd_cdir; 765 fdp->fd_cdir = vp; 766 FILEDESC_XUNLOCK(fdp); 767 vrele(vpold); 768 return (0); 769 } 770 771 /* 772 * Change current working directory (``.''). 773 */ 774 #ifndef _SYS_SYSPROTO_H_ 775 struct chdir_args { 776 char *path; 777 }; 778 #endif 779 int 780 sys_chdir(td, uap) 781 struct thread *td; 782 struct chdir_args /* { 783 char *path; 784 } */ *uap; 785 { 786 787 return (kern_chdir(td, uap->path, UIO_USERSPACE)); 788 } 789 790 int 791 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg) 792 { 793 register struct filedesc *fdp = td->td_proc->p_fd; 794 struct nameidata nd; 795 struct vnode *vp; 796 int error; 797 798 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 799 pathseg, path, td); 800 if ((error = namei(&nd)) != 0) 801 return (error); 802 if ((error = change_dir(nd.ni_vp, td)) != 0) { 803 vput(nd.ni_vp); 804 NDFREE(&nd, NDF_ONLY_PNBUF); 805 return (error); 806 } 807 VOP_UNLOCK(nd.ni_vp, 0); 808 NDFREE(&nd, NDF_ONLY_PNBUF); 809 FILEDESC_XLOCK(fdp); 810 vp = fdp->fd_cdir; 811 fdp->fd_cdir = nd.ni_vp; 812 FILEDESC_XUNLOCK(fdp); 813 vrele(vp); 814 return (0); 815 } 816 817 /* 818 * Helper function for raised chroot(2) security function: Refuse if 819 * any filedescriptors are open directories. 820 */ 821 static int 822 chroot_refuse_vdir_fds(fdp) 823 struct filedesc *fdp; 824 { 825 struct vnode *vp; 826 struct file *fp; 827 int fd; 828 829 FILEDESC_LOCK_ASSERT(fdp); 830 831 for (fd = 0; fd <= fdp->fd_lastfile; fd++) { 832 fp = fget_locked(fdp, fd); 833 if (fp == NULL) 834 continue; 835 if (fp->f_type == DTYPE_VNODE) { 836 vp = fp->f_vnode; 837 if (vp->v_type == VDIR) 838 return (EPERM); 839 } 840 } 841 return (0); 842 } 843 844 /* 845 * This sysctl determines if we will allow a process to chroot(2) if it 846 * has a directory open: 847 * 0: disallowed for all processes. 848 * 1: allowed for processes that were not already chroot(2)'ed. 849 * 2: allowed for all processes. 850 */ 851 852 static int chroot_allow_open_directories = 1; 853 854 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW, 855 &chroot_allow_open_directories, 0, 856 "Allow a process to chroot(2) if it has a directory open"); 857 858 /* 859 * Change notion of root (``/'') directory. 860 */ 861 #ifndef _SYS_SYSPROTO_H_ 862 struct chroot_args { 863 char *path; 864 }; 865 #endif 866 int 867 sys_chroot(td, uap) 868 struct thread *td; 869 struct chroot_args /* { 870 char *path; 871 } */ *uap; 872 { 873 struct nameidata nd; 874 int error; 875 876 error = priv_check(td, PRIV_VFS_CHROOT); 877 if (error != 0) 878 return (error); 879 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 880 UIO_USERSPACE, uap->path, td); 881 error = namei(&nd); 882 if (error != 0) 883 goto error; 884 error = change_dir(nd.ni_vp, td); 885 if (error != 0) 886 goto e_vunlock; 887 #ifdef MAC 888 error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp); 889 if (error != 0) 890 goto e_vunlock; 891 #endif 892 VOP_UNLOCK(nd.ni_vp, 0); 893 error = change_root(nd.ni_vp, td); 894 vrele(nd.ni_vp); 895 NDFREE(&nd, NDF_ONLY_PNBUF); 896 return (error); 897 e_vunlock: 898 vput(nd.ni_vp); 899 error: 900 NDFREE(&nd, NDF_ONLY_PNBUF); 901 return (error); 902 } 903 904 /* 905 * Common routine for chroot and chdir. Callers must provide a locked vnode 906 * instance. 907 */ 908 int 909 change_dir(vp, td) 910 struct vnode *vp; 911 struct thread *td; 912 { 913 #ifdef MAC 914 int error; 915 #endif 916 917 ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked"); 918 if (vp->v_type != VDIR) 919 return (ENOTDIR); 920 #ifdef MAC 921 error = mac_vnode_check_chdir(td->td_ucred, vp); 922 if (error != 0) 923 return (error); 924 #endif 925 return (VOP_ACCESS(vp, VEXEC, td->td_ucred, td)); 926 } 927 928 /* 929 * Common routine for kern_chroot() and jail_attach(). The caller is 930 * responsible for invoking priv_check() and mac_vnode_check_chroot() to 931 * authorize this operation. 932 */ 933 int 934 change_root(vp, td) 935 struct vnode *vp; 936 struct thread *td; 937 { 938 struct filedesc *fdp; 939 struct vnode *oldvp; 940 int error; 941 942 fdp = td->td_proc->p_fd; 943 FILEDESC_XLOCK(fdp); 944 if (chroot_allow_open_directories == 0 || 945 (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) { 946 error = chroot_refuse_vdir_fds(fdp); 947 if (error != 0) { 948 FILEDESC_XUNLOCK(fdp); 949 return (error); 950 } 951 } 952 oldvp = fdp->fd_rdir; 953 fdp->fd_rdir = vp; 954 VREF(fdp->fd_rdir); 955 if (!fdp->fd_jdir) { 956 fdp->fd_jdir = vp; 957 VREF(fdp->fd_jdir); 958 } 959 FILEDESC_XUNLOCK(fdp); 960 vrele(oldvp); 961 return (0); 962 } 963 964 static __inline void 965 flags_to_rights(int flags, cap_rights_t *rightsp) 966 { 967 968 if (flags & O_EXEC) { 969 cap_rights_set(rightsp, CAP_FEXECVE); 970 } else { 971 switch ((flags & O_ACCMODE)) { 972 case O_RDONLY: 973 cap_rights_set(rightsp, CAP_READ); 974 break; 975 case O_RDWR: 976 cap_rights_set(rightsp, CAP_READ); 977 /* FALLTHROUGH */ 978 case O_WRONLY: 979 cap_rights_set(rightsp, CAP_WRITE); 980 if (!(flags & (O_APPEND | O_TRUNC))) 981 cap_rights_set(rightsp, CAP_SEEK); 982 break; 983 } 984 } 985 986 if (flags & O_CREAT) 987 cap_rights_set(rightsp, CAP_CREATE); 988 989 if (flags & O_TRUNC) 990 cap_rights_set(rightsp, CAP_FTRUNCATE); 991 992 if (flags & (O_SYNC | O_FSYNC)) 993 cap_rights_set(rightsp, CAP_FSYNC); 994 995 if (flags & (O_EXLOCK | O_SHLOCK)) 996 cap_rights_set(rightsp, CAP_FLOCK); 997 } 998 999 /* 1000 * Check permissions, allocate an open file structure, and call the device 1001 * open routine if any. 1002 */ 1003 #ifndef _SYS_SYSPROTO_H_ 1004 struct open_args { 1005 char *path; 1006 int flags; 1007 int mode; 1008 }; 1009 #endif 1010 int 1011 sys_open(td, uap) 1012 struct thread *td; 1013 register struct open_args /* { 1014 char *path; 1015 int flags; 1016 int mode; 1017 } */ *uap; 1018 { 1019 1020 return (kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode)); 1021 } 1022 1023 #ifndef _SYS_SYSPROTO_H_ 1024 struct openat_args { 1025 int fd; 1026 char *path; 1027 int flag; 1028 int mode; 1029 }; 1030 #endif 1031 int 1032 sys_openat(struct thread *td, struct openat_args *uap) 1033 { 1034 1035 return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 1036 uap->mode)); 1037 } 1038 1039 int 1040 kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags, 1041 int mode) 1042 { 1043 1044 return (kern_openat(td, AT_FDCWD, path, pathseg, flags, mode)); 1045 } 1046 1047 int 1048 kern_openat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1049 int flags, int mode) 1050 { 1051 struct proc *p = td->td_proc; 1052 struct filedesc *fdp = p->p_fd; 1053 struct file *fp; 1054 struct vnode *vp; 1055 struct nameidata nd; 1056 cap_rights_t rights; 1057 int cmode, error, indx; 1058 1059 indx = -1; 1060 1061 AUDIT_ARG_FFLAGS(flags); 1062 AUDIT_ARG_MODE(mode); 1063 /* XXX: audit dirfd */ 1064 cap_rights_init(&rights, CAP_LOOKUP); 1065 flags_to_rights(flags, &rights); 1066 /* 1067 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags 1068 * may be specified. 1069 */ 1070 if (flags & O_EXEC) { 1071 if (flags & O_ACCMODE) 1072 return (EINVAL); 1073 } else if ((flags & O_ACCMODE) == O_ACCMODE) { 1074 return (EINVAL); 1075 } else { 1076 flags = FFLAGS(flags); 1077 } 1078 1079 /* 1080 * Allocate the file descriptor, but don't install a descriptor yet. 1081 */ 1082 error = falloc_noinstall(td, &fp); 1083 if (error != 0) 1084 return (error); 1085 /* 1086 * An extra reference on `fp' has been held for us by 1087 * falloc_noinstall(). 1088 */ 1089 /* Set the flags early so the finit in devfs can pick them up. */ 1090 fp->f_flag = flags & FMASK; 1091 cmode = ((mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; 1092 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 1093 &rights, td); 1094 td->td_dupfd = -1; /* XXX check for fdopen */ 1095 error = vn_open(&nd, &flags, cmode, fp); 1096 if (error != 0) { 1097 /* 1098 * If the vn_open replaced the method vector, something 1099 * wonderous happened deep below and we just pass it up 1100 * pretending we know what we do. 1101 */ 1102 if (error == ENXIO && fp->f_ops != &badfileops) 1103 goto success; 1104 1105 /* 1106 * Handle special fdopen() case. bleh. 1107 * 1108 * Don't do this for relative (capability) lookups; we don't 1109 * understand exactly what would happen, and we don't think 1110 * that it ever should. 1111 */ 1112 if (nd.ni_strictrelative == 0 && 1113 (error == ENODEV || error == ENXIO) && 1114 td->td_dupfd >= 0) { 1115 error = dupfdopen(td, fdp, td->td_dupfd, flags, error, 1116 &indx); 1117 if (error == 0) 1118 goto success; 1119 } 1120 1121 goto bad; 1122 } 1123 td->td_dupfd = 0; 1124 NDFREE(&nd, NDF_ONLY_PNBUF); 1125 vp = nd.ni_vp; 1126 1127 /* 1128 * Store the vnode, for any f_type. Typically, the vnode use 1129 * count is decremented by direct call to vn_closefile() for 1130 * files that switched type in the cdevsw fdopen() method. 1131 */ 1132 fp->f_vnode = vp; 1133 /* 1134 * If the file wasn't claimed by devfs bind it to the normal 1135 * vnode operations here. 1136 */ 1137 if (fp->f_ops == &badfileops) { 1138 KASSERT(vp->v_type != VFIFO, ("Unexpected fifo.")); 1139 fp->f_seqcount = 1; 1140 finit(fp, (flags & FMASK) | (fp->f_flag & FHASLOCK), 1141 DTYPE_VNODE, vp, &vnops); 1142 } 1143 1144 VOP_UNLOCK(vp, 0); 1145 if (flags & O_TRUNC) { 1146 error = fo_truncate(fp, 0, td->td_ucred, td); 1147 if (error != 0) 1148 goto bad; 1149 } 1150 success: 1151 /* 1152 * If we haven't already installed the FD (for dupfdopen), do so now. 1153 */ 1154 if (indx == -1) { 1155 struct filecaps *fcaps; 1156 1157 #ifdef CAPABILITIES 1158 if (nd.ni_strictrelative == 1) 1159 fcaps = &nd.ni_filecaps; 1160 else 1161 #endif 1162 fcaps = NULL; 1163 error = finstall(td, fp, &indx, flags, fcaps); 1164 /* On success finstall() consumes fcaps. */ 1165 if (error != 0) { 1166 filecaps_free(&nd.ni_filecaps); 1167 goto bad; 1168 } 1169 } else { 1170 filecaps_free(&nd.ni_filecaps); 1171 } 1172 1173 /* 1174 * Release our private reference, leaving the one associated with 1175 * the descriptor table intact. 1176 */ 1177 fdrop(fp, td); 1178 td->td_retval[0] = indx; 1179 return (0); 1180 bad: 1181 KASSERT(indx == -1, ("indx=%d, should be -1", indx)); 1182 fdrop(fp, td); 1183 return (error); 1184 } 1185 1186 #ifdef COMPAT_43 1187 /* 1188 * Create a file. 1189 */ 1190 #ifndef _SYS_SYSPROTO_H_ 1191 struct ocreat_args { 1192 char *path; 1193 int mode; 1194 }; 1195 #endif 1196 int 1197 ocreat(td, uap) 1198 struct thread *td; 1199 register struct ocreat_args /* { 1200 char *path; 1201 int mode; 1202 } */ *uap; 1203 { 1204 1205 return (kern_open(td, uap->path, UIO_USERSPACE, 1206 O_WRONLY | O_CREAT | O_TRUNC, uap->mode)); 1207 } 1208 #endif /* COMPAT_43 */ 1209 1210 /* 1211 * Create a special file. 1212 */ 1213 #ifndef _SYS_SYSPROTO_H_ 1214 struct mknod_args { 1215 char *path; 1216 int mode; 1217 int dev; 1218 }; 1219 #endif 1220 int 1221 sys_mknod(td, uap) 1222 struct thread *td; 1223 register struct mknod_args /* { 1224 char *path; 1225 int mode; 1226 int dev; 1227 } */ *uap; 1228 { 1229 1230 return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev)); 1231 } 1232 1233 #ifndef _SYS_SYSPROTO_H_ 1234 struct mknodat_args { 1235 int fd; 1236 char *path; 1237 mode_t mode; 1238 dev_t dev; 1239 }; 1240 #endif 1241 int 1242 sys_mknodat(struct thread *td, struct mknodat_args *uap) 1243 { 1244 1245 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1246 uap->dev)); 1247 } 1248 1249 int 1250 kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode, 1251 int dev) 1252 { 1253 1254 return (kern_mknodat(td, AT_FDCWD, path, pathseg, mode, dev)); 1255 } 1256 1257 int 1258 kern_mknodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1259 int mode, int dev) 1260 { 1261 struct vnode *vp; 1262 struct mount *mp; 1263 struct vattr vattr; 1264 struct nameidata nd; 1265 cap_rights_t rights; 1266 int error, whiteout = 0; 1267 1268 AUDIT_ARG_MODE(mode); 1269 AUDIT_ARG_DEV(dev); 1270 switch (mode & S_IFMT) { 1271 case S_IFCHR: 1272 case S_IFBLK: 1273 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 1274 break; 1275 case S_IFMT: 1276 error = priv_check(td, PRIV_VFS_MKNOD_BAD); 1277 break; 1278 case S_IFWHT: 1279 error = priv_check(td, PRIV_VFS_MKNOD_WHT); 1280 break; 1281 case S_IFIFO: 1282 if (dev == 0) 1283 return (kern_mkfifoat(td, fd, path, pathseg, mode)); 1284 /* FALLTHROUGH */ 1285 default: 1286 error = EINVAL; 1287 break; 1288 } 1289 if (error != 0) 1290 return (error); 1291 restart: 1292 bwillwrite(); 1293 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1, 1294 pathseg, path, fd, cap_rights_init(&rights, CAP_MKNODAT), td); 1295 if ((error = namei(&nd)) != 0) 1296 return (error); 1297 vp = nd.ni_vp; 1298 if (vp != NULL) { 1299 NDFREE(&nd, NDF_ONLY_PNBUF); 1300 if (vp == nd.ni_dvp) 1301 vrele(nd.ni_dvp); 1302 else 1303 vput(nd.ni_dvp); 1304 vrele(vp); 1305 return (EEXIST); 1306 } else { 1307 VATTR_NULL(&vattr); 1308 vattr.va_mode = (mode & ALLPERMS) & 1309 ~td->td_proc->p_fd->fd_cmask; 1310 vattr.va_rdev = dev; 1311 whiteout = 0; 1312 1313 switch (mode & S_IFMT) { 1314 case S_IFMT: /* used by badsect to flag bad sectors */ 1315 vattr.va_type = VBAD; 1316 break; 1317 case S_IFCHR: 1318 vattr.va_type = VCHR; 1319 break; 1320 case S_IFBLK: 1321 vattr.va_type = VBLK; 1322 break; 1323 case S_IFWHT: 1324 whiteout = 1; 1325 break; 1326 default: 1327 panic("kern_mknod: invalid mode"); 1328 } 1329 } 1330 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1331 NDFREE(&nd, NDF_ONLY_PNBUF); 1332 vput(nd.ni_dvp); 1333 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1334 return (error); 1335 goto restart; 1336 } 1337 #ifdef MAC 1338 if (error == 0 && !whiteout) 1339 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, 1340 &nd.ni_cnd, &vattr); 1341 #endif 1342 if (error == 0) { 1343 if (whiteout) 1344 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1345 else { 1346 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1347 &nd.ni_cnd, &vattr); 1348 if (error == 0) 1349 vput(nd.ni_vp); 1350 } 1351 } 1352 NDFREE(&nd, NDF_ONLY_PNBUF); 1353 vput(nd.ni_dvp); 1354 vn_finished_write(mp); 1355 return (error); 1356 } 1357 1358 /* 1359 * Create a named pipe. 1360 */ 1361 #ifndef _SYS_SYSPROTO_H_ 1362 struct mkfifo_args { 1363 char *path; 1364 int mode; 1365 }; 1366 #endif 1367 int 1368 sys_mkfifo(td, uap) 1369 struct thread *td; 1370 register struct mkfifo_args /* { 1371 char *path; 1372 int mode; 1373 } */ *uap; 1374 { 1375 1376 return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode)); 1377 } 1378 1379 #ifndef _SYS_SYSPROTO_H_ 1380 struct mkfifoat_args { 1381 int fd; 1382 char *path; 1383 mode_t mode; 1384 }; 1385 #endif 1386 int 1387 sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap) 1388 { 1389 1390 return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE, 1391 uap->mode)); 1392 } 1393 1394 int 1395 kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode) 1396 { 1397 1398 return (kern_mkfifoat(td, AT_FDCWD, path, pathseg, mode)); 1399 } 1400 1401 int 1402 kern_mkfifoat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1403 int mode) 1404 { 1405 struct mount *mp; 1406 struct vattr vattr; 1407 struct nameidata nd; 1408 cap_rights_t rights; 1409 int error; 1410 1411 AUDIT_ARG_MODE(mode); 1412 restart: 1413 bwillwrite(); 1414 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1, 1415 pathseg, path, fd, cap_rights_init(&rights, CAP_MKFIFOAT), td); 1416 if ((error = namei(&nd)) != 0) 1417 return (error); 1418 if (nd.ni_vp != NULL) { 1419 NDFREE(&nd, NDF_ONLY_PNBUF); 1420 if (nd.ni_vp == nd.ni_dvp) 1421 vrele(nd.ni_dvp); 1422 else 1423 vput(nd.ni_dvp); 1424 vrele(nd.ni_vp); 1425 return (EEXIST); 1426 } 1427 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1428 NDFREE(&nd, NDF_ONLY_PNBUF); 1429 vput(nd.ni_dvp); 1430 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1431 return (error); 1432 goto restart; 1433 } 1434 VATTR_NULL(&vattr); 1435 vattr.va_type = VFIFO; 1436 vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask; 1437 #ifdef MAC 1438 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1439 &vattr); 1440 if (error != 0) 1441 goto out; 1442 #endif 1443 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 1444 if (error == 0) 1445 vput(nd.ni_vp); 1446 #ifdef MAC 1447 out: 1448 #endif 1449 vput(nd.ni_dvp); 1450 vn_finished_write(mp); 1451 NDFREE(&nd, NDF_ONLY_PNBUF); 1452 return (error); 1453 } 1454 1455 /* 1456 * Make a hard file link. 1457 */ 1458 #ifndef _SYS_SYSPROTO_H_ 1459 struct link_args { 1460 char *path; 1461 char *link; 1462 }; 1463 #endif 1464 int 1465 sys_link(td, uap) 1466 struct thread *td; 1467 register struct link_args /* { 1468 char *path; 1469 char *link; 1470 } */ *uap; 1471 { 1472 1473 return (kern_link(td, uap->path, uap->link, UIO_USERSPACE)); 1474 } 1475 1476 #ifndef _SYS_SYSPROTO_H_ 1477 struct linkat_args { 1478 int fd1; 1479 char *path1; 1480 int fd2; 1481 char *path2; 1482 int flag; 1483 }; 1484 #endif 1485 int 1486 sys_linkat(struct thread *td, struct linkat_args *uap) 1487 { 1488 int flag; 1489 1490 flag = uap->flag; 1491 if (flag & ~AT_SYMLINK_FOLLOW) 1492 return (EINVAL); 1493 1494 return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2, 1495 UIO_USERSPACE, (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW)); 1496 } 1497 1498 int hardlink_check_uid = 0; 1499 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 1500 &hardlink_check_uid, 0, 1501 "Unprivileged processes cannot create hard links to files owned by other " 1502 "users"); 1503 static int hardlink_check_gid = 0; 1504 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 1505 &hardlink_check_gid, 0, 1506 "Unprivileged processes cannot create hard links to files owned by other " 1507 "groups"); 1508 1509 static int 1510 can_hardlink(struct vnode *vp, struct ucred *cred) 1511 { 1512 struct vattr va; 1513 int error; 1514 1515 if (!hardlink_check_uid && !hardlink_check_gid) 1516 return (0); 1517 1518 error = VOP_GETATTR(vp, &va, cred); 1519 if (error != 0) 1520 return (error); 1521 1522 if (hardlink_check_uid && cred->cr_uid != va.va_uid) { 1523 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1524 if (error != 0) 1525 return (error); 1526 } 1527 1528 if (hardlink_check_gid && !groupmember(va.va_gid, cred)) { 1529 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1530 if (error != 0) 1531 return (error); 1532 } 1533 1534 return (0); 1535 } 1536 1537 int 1538 kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg) 1539 { 1540 1541 return (kern_linkat(td, AT_FDCWD, AT_FDCWD, path,link, segflg, FOLLOW)); 1542 } 1543 1544 int 1545 kern_linkat(struct thread *td, int fd1, int fd2, char *path1, char *path2, 1546 enum uio_seg segflg, int follow) 1547 { 1548 struct vnode *vp; 1549 struct mount *mp; 1550 struct nameidata nd; 1551 cap_rights_t rights; 1552 int error; 1553 1554 again: 1555 bwillwrite(); 1556 NDINIT_AT(&nd, LOOKUP, follow | AUDITVNODE1, segflg, path1, fd1, td); 1557 1558 if ((error = namei(&nd)) != 0) 1559 return (error); 1560 NDFREE(&nd, NDF_ONLY_PNBUF); 1561 vp = nd.ni_vp; 1562 if (vp->v_type == VDIR) { 1563 vrele(vp); 1564 return (EPERM); /* POSIX */ 1565 } 1566 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE2, 1567 segflg, path2, fd2, cap_rights_init(&rights, CAP_LINKAT), td); 1568 if ((error = namei(&nd)) == 0) { 1569 if (nd.ni_vp != NULL) { 1570 NDFREE(&nd, NDF_ONLY_PNBUF); 1571 if (nd.ni_dvp == nd.ni_vp) 1572 vrele(nd.ni_dvp); 1573 else 1574 vput(nd.ni_dvp); 1575 vrele(nd.ni_vp); 1576 vrele(vp); 1577 return (EEXIST); 1578 } else if (nd.ni_dvp->v_mount != vp->v_mount) { 1579 /* 1580 * Cross-device link. No need to recheck 1581 * vp->v_type, since it cannot change, except 1582 * to VBAD. 1583 */ 1584 NDFREE(&nd, NDF_ONLY_PNBUF); 1585 vput(nd.ni_dvp); 1586 vrele(vp); 1587 return (EXDEV); 1588 } else if ((error = vn_lock(vp, LK_EXCLUSIVE)) == 0) { 1589 error = can_hardlink(vp, td->td_ucred); 1590 #ifdef MAC 1591 if (error == 0) 1592 error = mac_vnode_check_link(td->td_ucred, 1593 nd.ni_dvp, vp, &nd.ni_cnd); 1594 #endif 1595 if (error != 0) { 1596 vput(vp); 1597 vput(nd.ni_dvp); 1598 NDFREE(&nd, NDF_ONLY_PNBUF); 1599 return (error); 1600 } 1601 error = vn_start_write(vp, &mp, V_NOWAIT); 1602 if (error != 0) { 1603 vput(vp); 1604 vput(nd.ni_dvp); 1605 NDFREE(&nd, NDF_ONLY_PNBUF); 1606 error = vn_start_write(NULL, &mp, 1607 V_XSLEEP | PCATCH); 1608 if (error != 0) 1609 return (error); 1610 goto again; 1611 } 1612 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 1613 VOP_UNLOCK(vp, 0); 1614 vput(nd.ni_dvp); 1615 vn_finished_write(mp); 1616 NDFREE(&nd, NDF_ONLY_PNBUF); 1617 } else { 1618 vput(nd.ni_dvp); 1619 NDFREE(&nd, NDF_ONLY_PNBUF); 1620 vrele(vp); 1621 goto again; 1622 } 1623 } 1624 vrele(vp); 1625 return (error); 1626 } 1627 1628 /* 1629 * Make a symbolic link. 1630 */ 1631 #ifndef _SYS_SYSPROTO_H_ 1632 struct symlink_args { 1633 char *path; 1634 char *link; 1635 }; 1636 #endif 1637 int 1638 sys_symlink(td, uap) 1639 struct thread *td; 1640 register struct symlink_args /* { 1641 char *path; 1642 char *link; 1643 } */ *uap; 1644 { 1645 1646 return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE)); 1647 } 1648 1649 #ifndef _SYS_SYSPROTO_H_ 1650 struct symlinkat_args { 1651 char *path; 1652 int fd; 1653 char *path2; 1654 }; 1655 #endif 1656 int 1657 sys_symlinkat(struct thread *td, struct symlinkat_args *uap) 1658 { 1659 1660 return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2, 1661 UIO_USERSPACE)); 1662 } 1663 1664 int 1665 kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg) 1666 { 1667 1668 return (kern_symlinkat(td, path, AT_FDCWD, link, segflg)); 1669 } 1670 1671 int 1672 kern_symlinkat(struct thread *td, char *path1, int fd, char *path2, 1673 enum uio_seg segflg) 1674 { 1675 struct mount *mp; 1676 struct vattr vattr; 1677 char *syspath; 1678 struct nameidata nd; 1679 int error; 1680 cap_rights_t rights; 1681 1682 if (segflg == UIO_SYSSPACE) { 1683 syspath = path1; 1684 } else { 1685 syspath = uma_zalloc(namei_zone, M_WAITOK); 1686 if ((error = copyinstr(path1, syspath, MAXPATHLEN, NULL)) != 0) 1687 goto out; 1688 } 1689 AUDIT_ARG_TEXT(syspath); 1690 restart: 1691 bwillwrite(); 1692 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1, 1693 segflg, path2, fd, cap_rights_init(&rights, CAP_SYMLINKAT), td); 1694 if ((error = namei(&nd)) != 0) 1695 goto out; 1696 if (nd.ni_vp) { 1697 NDFREE(&nd, NDF_ONLY_PNBUF); 1698 if (nd.ni_vp == nd.ni_dvp) 1699 vrele(nd.ni_dvp); 1700 else 1701 vput(nd.ni_dvp); 1702 vrele(nd.ni_vp); 1703 error = EEXIST; 1704 goto out; 1705 } 1706 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1707 NDFREE(&nd, NDF_ONLY_PNBUF); 1708 vput(nd.ni_dvp); 1709 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1710 goto out; 1711 goto restart; 1712 } 1713 VATTR_NULL(&vattr); 1714 vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask; 1715 #ifdef MAC 1716 vattr.va_type = VLNK; 1717 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1718 &vattr); 1719 if (error != 0) 1720 goto out2; 1721 #endif 1722 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath); 1723 if (error == 0) 1724 vput(nd.ni_vp); 1725 #ifdef MAC 1726 out2: 1727 #endif 1728 NDFREE(&nd, NDF_ONLY_PNBUF); 1729 vput(nd.ni_dvp); 1730 vn_finished_write(mp); 1731 out: 1732 if (segflg != UIO_SYSSPACE) 1733 uma_zfree(namei_zone, syspath); 1734 return (error); 1735 } 1736 1737 /* 1738 * Delete a whiteout from the filesystem. 1739 */ 1740 int 1741 sys_undelete(td, uap) 1742 struct thread *td; 1743 register struct undelete_args /* { 1744 char *path; 1745 } */ *uap; 1746 { 1747 struct mount *mp; 1748 struct nameidata nd; 1749 int error; 1750 1751 restart: 1752 bwillwrite(); 1753 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | AUDITVNODE1, 1754 UIO_USERSPACE, uap->path, td); 1755 error = namei(&nd); 1756 if (error != 0) 1757 return (error); 1758 1759 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 1760 NDFREE(&nd, NDF_ONLY_PNBUF); 1761 if (nd.ni_vp == nd.ni_dvp) 1762 vrele(nd.ni_dvp); 1763 else 1764 vput(nd.ni_dvp); 1765 if (nd.ni_vp) 1766 vrele(nd.ni_vp); 1767 return (EEXIST); 1768 } 1769 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1770 NDFREE(&nd, NDF_ONLY_PNBUF); 1771 vput(nd.ni_dvp); 1772 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1773 return (error); 1774 goto restart; 1775 } 1776 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE); 1777 NDFREE(&nd, NDF_ONLY_PNBUF); 1778 vput(nd.ni_dvp); 1779 vn_finished_write(mp); 1780 return (error); 1781 } 1782 1783 /* 1784 * Delete a name from the filesystem. 1785 */ 1786 #ifndef _SYS_SYSPROTO_H_ 1787 struct unlink_args { 1788 char *path; 1789 }; 1790 #endif 1791 int 1792 sys_unlink(td, uap) 1793 struct thread *td; 1794 struct unlink_args /* { 1795 char *path; 1796 } */ *uap; 1797 { 1798 1799 return (kern_unlink(td, uap->path, UIO_USERSPACE)); 1800 } 1801 1802 #ifndef _SYS_SYSPROTO_H_ 1803 struct unlinkat_args { 1804 int fd; 1805 char *path; 1806 int flag; 1807 }; 1808 #endif 1809 int 1810 sys_unlinkat(struct thread *td, struct unlinkat_args *uap) 1811 { 1812 int flag = uap->flag; 1813 int fd = uap->fd; 1814 char *path = uap->path; 1815 1816 if (flag & ~AT_REMOVEDIR) 1817 return (EINVAL); 1818 1819 if (flag & AT_REMOVEDIR) 1820 return (kern_rmdirat(td, fd, path, UIO_USERSPACE)); 1821 else 1822 return (kern_unlinkat(td, fd, path, UIO_USERSPACE, 0)); 1823 } 1824 1825 int 1826 kern_unlink(struct thread *td, char *path, enum uio_seg pathseg) 1827 { 1828 1829 return (kern_unlinkat(td, AT_FDCWD, path, pathseg, 0)); 1830 } 1831 1832 int 1833 kern_unlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1834 ino_t oldinum) 1835 { 1836 struct mount *mp; 1837 struct vnode *vp; 1838 struct nameidata nd; 1839 struct stat sb; 1840 cap_rights_t rights; 1841 int error; 1842 1843 restart: 1844 bwillwrite(); 1845 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 1846 pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td); 1847 if ((error = namei(&nd)) != 0) 1848 return (error == EINVAL ? EPERM : error); 1849 vp = nd.ni_vp; 1850 if (vp->v_type == VDIR && oldinum == 0) { 1851 error = EPERM; /* POSIX */ 1852 } else if (oldinum != 0 && 1853 ((error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td)) == 0) && 1854 sb.st_ino != oldinum) { 1855 error = EIDRM; /* Identifier removed */ 1856 } else { 1857 /* 1858 * The root of a mounted filesystem cannot be deleted. 1859 * 1860 * XXX: can this only be a VDIR case? 1861 */ 1862 if (vp->v_vflag & VV_ROOT) 1863 error = EBUSY; 1864 } 1865 if (error == 0) { 1866 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1867 NDFREE(&nd, NDF_ONLY_PNBUF); 1868 vput(nd.ni_dvp); 1869 if (vp == nd.ni_dvp) 1870 vrele(vp); 1871 else 1872 vput(vp); 1873 if ((error = vn_start_write(NULL, &mp, 1874 V_XSLEEP | PCATCH)) != 0) 1875 return (error); 1876 goto restart; 1877 } 1878 #ifdef MAC 1879 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 1880 &nd.ni_cnd); 1881 if (error != 0) 1882 goto out; 1883 #endif 1884 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 1885 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 1886 #ifdef MAC 1887 out: 1888 #endif 1889 vn_finished_write(mp); 1890 } 1891 NDFREE(&nd, NDF_ONLY_PNBUF); 1892 vput(nd.ni_dvp); 1893 if (vp == nd.ni_dvp) 1894 vrele(vp); 1895 else 1896 vput(vp); 1897 return (error); 1898 } 1899 1900 /* 1901 * Reposition read/write file offset. 1902 */ 1903 #ifndef _SYS_SYSPROTO_H_ 1904 struct lseek_args { 1905 int fd; 1906 int pad; 1907 off_t offset; 1908 int whence; 1909 }; 1910 #endif 1911 int 1912 sys_lseek(td, uap) 1913 struct thread *td; 1914 register struct lseek_args /* { 1915 int fd; 1916 int pad; 1917 off_t offset; 1918 int whence; 1919 } */ *uap; 1920 { 1921 struct file *fp; 1922 cap_rights_t rights; 1923 int error; 1924 1925 AUDIT_ARG_FD(uap->fd); 1926 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_SEEK), &fp); 1927 if (error != 0) 1928 return (error); 1929 error = (fp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0 ? 1930 fo_seek(fp, uap->offset, uap->whence, td) : ESPIPE; 1931 fdrop(fp, td); 1932 return (error); 1933 } 1934 1935 #if defined(COMPAT_43) 1936 /* 1937 * Reposition read/write file offset. 1938 */ 1939 #ifndef _SYS_SYSPROTO_H_ 1940 struct olseek_args { 1941 int fd; 1942 long offset; 1943 int whence; 1944 }; 1945 #endif 1946 int 1947 olseek(td, uap) 1948 struct thread *td; 1949 register struct olseek_args /* { 1950 int fd; 1951 long offset; 1952 int whence; 1953 } */ *uap; 1954 { 1955 struct lseek_args /* { 1956 int fd; 1957 int pad; 1958 off_t offset; 1959 int whence; 1960 } */ nuap; 1961 1962 nuap.fd = uap->fd; 1963 nuap.offset = uap->offset; 1964 nuap.whence = uap->whence; 1965 return (sys_lseek(td, &nuap)); 1966 } 1967 #endif /* COMPAT_43 */ 1968 1969 /* Version with the 'pad' argument */ 1970 int 1971 freebsd6_lseek(td, uap) 1972 struct thread *td; 1973 register struct freebsd6_lseek_args *uap; 1974 { 1975 struct lseek_args ouap; 1976 1977 ouap.fd = uap->fd; 1978 ouap.offset = uap->offset; 1979 ouap.whence = uap->whence; 1980 return (sys_lseek(td, &ouap)); 1981 } 1982 1983 /* 1984 * Check access permissions using passed credentials. 1985 */ 1986 static int 1987 vn_access(vp, user_flags, cred, td) 1988 struct vnode *vp; 1989 int user_flags; 1990 struct ucred *cred; 1991 struct thread *td; 1992 { 1993 accmode_t accmode; 1994 int error; 1995 1996 /* Flags == 0 means only check for existence. */ 1997 error = 0; 1998 if (user_flags) { 1999 accmode = 0; 2000 if (user_flags & R_OK) 2001 accmode |= VREAD; 2002 if (user_flags & W_OK) 2003 accmode |= VWRITE; 2004 if (user_flags & X_OK) 2005 accmode |= VEXEC; 2006 #ifdef MAC 2007 error = mac_vnode_check_access(cred, vp, accmode); 2008 if (error != 0) 2009 return (error); 2010 #endif 2011 if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) 2012 error = VOP_ACCESS(vp, accmode, cred, td); 2013 } 2014 return (error); 2015 } 2016 2017 /* 2018 * Check access permissions using "real" credentials. 2019 */ 2020 #ifndef _SYS_SYSPROTO_H_ 2021 struct access_args { 2022 char *path; 2023 int amode; 2024 }; 2025 #endif 2026 int 2027 sys_access(td, uap) 2028 struct thread *td; 2029 register struct access_args /* { 2030 char *path; 2031 int amode; 2032 } */ *uap; 2033 { 2034 2035 return (kern_access(td, uap->path, UIO_USERSPACE, uap->amode)); 2036 } 2037 2038 #ifndef _SYS_SYSPROTO_H_ 2039 struct faccessat_args { 2040 int dirfd; 2041 char *path; 2042 int amode; 2043 int flag; 2044 } 2045 #endif 2046 int 2047 sys_faccessat(struct thread *td, struct faccessat_args *uap) 2048 { 2049 2050 if (uap->flag & ~AT_EACCESS) 2051 return (EINVAL); 2052 return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 2053 uap->amode)); 2054 } 2055 2056 int 2057 kern_access(struct thread *td, char *path, enum uio_seg pathseg, int amode) 2058 { 2059 2060 return (kern_accessat(td, AT_FDCWD, path, pathseg, 0, amode)); 2061 } 2062 2063 int 2064 kern_accessat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2065 int flag, int amode) 2066 { 2067 struct ucred *cred, *tmpcred; 2068 struct vnode *vp; 2069 struct nameidata nd; 2070 cap_rights_t rights; 2071 int error; 2072 2073 if (amode != F_OK && (amode & ~(R_OK | W_OK | X_OK)) != 0) 2074 return (EINVAL); 2075 2076 /* 2077 * Create and modify a temporary credential instead of one that 2078 * is potentially shared. 2079 */ 2080 if (!(flag & AT_EACCESS)) { 2081 cred = td->td_ucred; 2082 tmpcred = crdup(cred); 2083 tmpcred->cr_uid = cred->cr_ruid; 2084 tmpcred->cr_groups[0] = cred->cr_rgid; 2085 td->td_ucred = tmpcred; 2086 } else 2087 cred = tmpcred = td->td_ucred; 2088 AUDIT_ARG_VALUE(amode); 2089 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | 2090 AUDITVNODE1, pathseg, path, fd, cap_rights_init(&rights, CAP_FSTAT), 2091 td); 2092 if ((error = namei(&nd)) != 0) 2093 goto out1; 2094 vp = nd.ni_vp; 2095 2096 error = vn_access(vp, amode, tmpcred, td); 2097 NDFREE(&nd, NDF_ONLY_PNBUF); 2098 vput(vp); 2099 out1: 2100 if (!(flag & AT_EACCESS)) { 2101 td->td_ucred = cred; 2102 crfree(tmpcred); 2103 } 2104 return (error); 2105 } 2106 2107 /* 2108 * Check access permissions using "effective" credentials. 2109 */ 2110 #ifndef _SYS_SYSPROTO_H_ 2111 struct eaccess_args { 2112 char *path; 2113 int amode; 2114 }; 2115 #endif 2116 int 2117 sys_eaccess(td, uap) 2118 struct thread *td; 2119 register struct eaccess_args /* { 2120 char *path; 2121 int amode; 2122 } */ *uap; 2123 { 2124 2125 return (kern_eaccess(td, uap->path, UIO_USERSPACE, uap->amode)); 2126 } 2127 2128 int 2129 kern_eaccess(struct thread *td, char *path, enum uio_seg pathseg, int amode) 2130 { 2131 2132 return (kern_accessat(td, AT_FDCWD, path, pathseg, AT_EACCESS, amode)); 2133 } 2134 2135 #if defined(COMPAT_43) 2136 /* 2137 * Get file status; this version follows links. 2138 */ 2139 #ifndef _SYS_SYSPROTO_H_ 2140 struct ostat_args { 2141 char *path; 2142 struct ostat *ub; 2143 }; 2144 #endif 2145 int 2146 ostat(td, uap) 2147 struct thread *td; 2148 register struct ostat_args /* { 2149 char *path; 2150 struct ostat *ub; 2151 } */ *uap; 2152 { 2153 struct stat sb; 2154 struct ostat osb; 2155 int error; 2156 2157 error = kern_stat(td, uap->path, UIO_USERSPACE, &sb); 2158 if (error != 0) 2159 return (error); 2160 cvtstat(&sb, &osb); 2161 return (copyout(&osb, uap->ub, sizeof (osb))); 2162 } 2163 2164 /* 2165 * Get file status; this version does not follow links. 2166 */ 2167 #ifndef _SYS_SYSPROTO_H_ 2168 struct olstat_args { 2169 char *path; 2170 struct ostat *ub; 2171 }; 2172 #endif 2173 int 2174 olstat(td, uap) 2175 struct thread *td; 2176 register struct olstat_args /* { 2177 char *path; 2178 struct ostat *ub; 2179 } */ *uap; 2180 { 2181 struct stat sb; 2182 struct ostat osb; 2183 int error; 2184 2185 error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb); 2186 if (error != 0) 2187 return (error); 2188 cvtstat(&sb, &osb); 2189 return (copyout(&osb, uap->ub, sizeof (osb))); 2190 } 2191 2192 /* 2193 * Convert from an old to a new stat structure. 2194 */ 2195 void 2196 cvtstat(st, ost) 2197 struct stat *st; 2198 struct ostat *ost; 2199 { 2200 2201 ost->st_dev = st->st_dev; 2202 ost->st_ino = st->st_ino; 2203 ost->st_mode = st->st_mode; 2204 ost->st_nlink = st->st_nlink; 2205 ost->st_uid = st->st_uid; 2206 ost->st_gid = st->st_gid; 2207 ost->st_rdev = st->st_rdev; 2208 if (st->st_size < (quad_t)1 << 32) 2209 ost->st_size = st->st_size; 2210 else 2211 ost->st_size = -2; 2212 ost->st_atim = st->st_atim; 2213 ost->st_mtim = st->st_mtim; 2214 ost->st_ctim = st->st_ctim; 2215 ost->st_blksize = st->st_blksize; 2216 ost->st_blocks = st->st_blocks; 2217 ost->st_flags = st->st_flags; 2218 ost->st_gen = st->st_gen; 2219 } 2220 #endif /* COMPAT_43 */ 2221 2222 /* 2223 * Get file status; this version follows links. 2224 */ 2225 #ifndef _SYS_SYSPROTO_H_ 2226 struct stat_args { 2227 char *path; 2228 struct stat *ub; 2229 }; 2230 #endif 2231 int 2232 sys_stat(td, uap) 2233 struct thread *td; 2234 register struct stat_args /* { 2235 char *path; 2236 struct stat *ub; 2237 } */ *uap; 2238 { 2239 struct stat sb; 2240 int error; 2241 2242 error = kern_stat(td, uap->path, UIO_USERSPACE, &sb); 2243 if (error == 0) 2244 error = copyout(&sb, uap->ub, sizeof (sb)); 2245 return (error); 2246 } 2247 2248 #ifndef _SYS_SYSPROTO_H_ 2249 struct fstatat_args { 2250 int fd; 2251 char *path; 2252 struct stat *buf; 2253 int flag; 2254 } 2255 #endif 2256 int 2257 sys_fstatat(struct thread *td, struct fstatat_args *uap) 2258 { 2259 struct stat sb; 2260 int error; 2261 2262 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2263 UIO_USERSPACE, &sb); 2264 if (error == 0) 2265 error = copyout(&sb, uap->buf, sizeof (sb)); 2266 return (error); 2267 } 2268 2269 int 2270 kern_stat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp) 2271 { 2272 2273 return (kern_statat(td, 0, AT_FDCWD, path, pathseg, sbp)); 2274 } 2275 2276 int 2277 kern_statat(struct thread *td, int flag, int fd, char *path, 2278 enum uio_seg pathseg, struct stat *sbp) 2279 { 2280 2281 return (kern_statat_vnhook(td, flag, fd, path, pathseg, sbp, NULL)); 2282 } 2283 2284 int 2285 kern_statat_vnhook(struct thread *td, int flag, int fd, char *path, 2286 enum uio_seg pathseg, struct stat *sbp, 2287 void (*hook)(struct vnode *vp, struct stat *sbp)) 2288 { 2289 struct nameidata nd; 2290 struct stat sb; 2291 cap_rights_t rights; 2292 int error; 2293 2294 if (flag & ~AT_SYMLINK_NOFOLLOW) 2295 return (EINVAL); 2296 2297 NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : 2298 FOLLOW) | LOCKSHARED | LOCKLEAF | AUDITVNODE1, pathseg, path, fd, 2299 cap_rights_init(&rights, CAP_FSTAT), td); 2300 2301 if ((error = namei(&nd)) != 0) 2302 return (error); 2303 error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td); 2304 if (error == 0) { 2305 SDT_PROBE(vfs, , stat, mode, path, sb.st_mode, 0, 0, 0); 2306 if (S_ISREG(sb.st_mode)) 2307 SDT_PROBE(vfs, , stat, reg, path, pathseg, 0, 0, 0); 2308 if (__predict_false(hook != NULL)) 2309 hook(nd.ni_vp, &sb); 2310 } 2311 NDFREE(&nd, NDF_ONLY_PNBUF); 2312 vput(nd.ni_vp); 2313 if (error != 0) 2314 return (error); 2315 *sbp = sb; 2316 #ifdef KTRACE 2317 if (KTRPOINT(td, KTR_STRUCT)) 2318 ktrstat(&sb); 2319 #endif 2320 return (0); 2321 } 2322 2323 /* 2324 * Get file status; this version does not follow links. 2325 */ 2326 #ifndef _SYS_SYSPROTO_H_ 2327 struct lstat_args { 2328 char *path; 2329 struct stat *ub; 2330 }; 2331 #endif 2332 int 2333 sys_lstat(td, uap) 2334 struct thread *td; 2335 register struct lstat_args /* { 2336 char *path; 2337 struct stat *ub; 2338 } */ *uap; 2339 { 2340 struct stat sb; 2341 int error; 2342 2343 error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb); 2344 if (error == 0) 2345 error = copyout(&sb, uap->ub, sizeof (sb)); 2346 return (error); 2347 } 2348 2349 int 2350 kern_lstat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp) 2351 { 2352 2353 return (kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, path, pathseg, 2354 sbp)); 2355 } 2356 2357 /* 2358 * Implementation of the NetBSD [l]stat() functions. 2359 */ 2360 void 2361 cvtnstat(sb, nsb) 2362 struct stat *sb; 2363 struct nstat *nsb; 2364 { 2365 2366 bzero(nsb, sizeof *nsb); 2367 nsb->st_dev = sb->st_dev; 2368 nsb->st_ino = sb->st_ino; 2369 nsb->st_mode = sb->st_mode; 2370 nsb->st_nlink = sb->st_nlink; 2371 nsb->st_uid = sb->st_uid; 2372 nsb->st_gid = sb->st_gid; 2373 nsb->st_rdev = sb->st_rdev; 2374 nsb->st_atim = sb->st_atim; 2375 nsb->st_mtim = sb->st_mtim; 2376 nsb->st_ctim = sb->st_ctim; 2377 nsb->st_size = sb->st_size; 2378 nsb->st_blocks = sb->st_blocks; 2379 nsb->st_blksize = sb->st_blksize; 2380 nsb->st_flags = sb->st_flags; 2381 nsb->st_gen = sb->st_gen; 2382 nsb->st_birthtim = sb->st_birthtim; 2383 } 2384 2385 #ifndef _SYS_SYSPROTO_H_ 2386 struct nstat_args { 2387 char *path; 2388 struct nstat *ub; 2389 }; 2390 #endif 2391 int 2392 sys_nstat(td, uap) 2393 struct thread *td; 2394 register struct nstat_args /* { 2395 char *path; 2396 struct nstat *ub; 2397 } */ *uap; 2398 { 2399 struct stat sb; 2400 struct nstat nsb; 2401 int error; 2402 2403 error = kern_stat(td, uap->path, UIO_USERSPACE, &sb); 2404 if (error != 0) 2405 return (error); 2406 cvtnstat(&sb, &nsb); 2407 return (copyout(&nsb, uap->ub, sizeof (nsb))); 2408 } 2409 2410 /* 2411 * NetBSD lstat. Get file status; this version does not follow links. 2412 */ 2413 #ifndef _SYS_SYSPROTO_H_ 2414 struct lstat_args { 2415 char *path; 2416 struct stat *ub; 2417 }; 2418 #endif 2419 int 2420 sys_nlstat(td, uap) 2421 struct thread *td; 2422 register struct nlstat_args /* { 2423 char *path; 2424 struct nstat *ub; 2425 } */ *uap; 2426 { 2427 struct stat sb; 2428 struct nstat nsb; 2429 int error; 2430 2431 error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb); 2432 if (error != 0) 2433 return (error); 2434 cvtnstat(&sb, &nsb); 2435 return (copyout(&nsb, uap->ub, sizeof (nsb))); 2436 } 2437 2438 /* 2439 * Get configurable pathname variables. 2440 */ 2441 #ifndef _SYS_SYSPROTO_H_ 2442 struct pathconf_args { 2443 char *path; 2444 int name; 2445 }; 2446 #endif 2447 int 2448 sys_pathconf(td, uap) 2449 struct thread *td; 2450 register struct pathconf_args /* { 2451 char *path; 2452 int name; 2453 } */ *uap; 2454 { 2455 2456 return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW)); 2457 } 2458 2459 #ifndef _SYS_SYSPROTO_H_ 2460 struct lpathconf_args { 2461 char *path; 2462 int name; 2463 }; 2464 #endif 2465 int 2466 sys_lpathconf(td, uap) 2467 struct thread *td; 2468 register struct lpathconf_args /* { 2469 char *path; 2470 int name; 2471 } */ *uap; 2472 { 2473 2474 return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, 2475 NOFOLLOW)); 2476 } 2477 2478 int 2479 kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name, 2480 u_long flags) 2481 { 2482 struct nameidata nd; 2483 int error; 2484 2485 NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | AUDITVNODE1 | flags, 2486 pathseg, path, td); 2487 if ((error = namei(&nd)) != 0) 2488 return (error); 2489 NDFREE(&nd, NDF_ONLY_PNBUF); 2490 2491 /* If asynchronous I/O is available, it works for all files. */ 2492 if (name == _PC_ASYNC_IO) 2493 td->td_retval[0] = async_io_version; 2494 else 2495 error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval); 2496 vput(nd.ni_vp); 2497 return (error); 2498 } 2499 2500 /* 2501 * Return target name of a symbolic link. 2502 */ 2503 #ifndef _SYS_SYSPROTO_H_ 2504 struct readlink_args { 2505 char *path; 2506 char *buf; 2507 size_t count; 2508 }; 2509 #endif 2510 int 2511 sys_readlink(td, uap) 2512 struct thread *td; 2513 register struct readlink_args /* { 2514 char *path; 2515 char *buf; 2516 size_t count; 2517 } */ *uap; 2518 { 2519 2520 return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf, 2521 UIO_USERSPACE, uap->count)); 2522 } 2523 #ifndef _SYS_SYSPROTO_H_ 2524 struct readlinkat_args { 2525 int fd; 2526 char *path; 2527 char *buf; 2528 size_t bufsize; 2529 }; 2530 #endif 2531 int 2532 sys_readlinkat(struct thread *td, struct readlinkat_args *uap) 2533 { 2534 2535 return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE, 2536 uap->buf, UIO_USERSPACE, uap->bufsize)); 2537 } 2538 2539 int 2540 kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf, 2541 enum uio_seg bufseg, size_t count) 2542 { 2543 2544 return (kern_readlinkat(td, AT_FDCWD, path, pathseg, buf, bufseg, 2545 count)); 2546 } 2547 2548 int 2549 kern_readlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2550 char *buf, enum uio_seg bufseg, size_t count) 2551 { 2552 struct vnode *vp; 2553 struct iovec aiov; 2554 struct uio auio; 2555 struct nameidata nd; 2556 int error; 2557 2558 if (count > IOSIZE_MAX) 2559 return (EINVAL); 2560 2561 NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 2562 pathseg, path, fd, td); 2563 2564 if ((error = namei(&nd)) != 0) 2565 return (error); 2566 NDFREE(&nd, NDF_ONLY_PNBUF); 2567 vp = nd.ni_vp; 2568 #ifdef MAC 2569 error = mac_vnode_check_readlink(td->td_ucred, vp); 2570 if (error != 0) { 2571 vput(vp); 2572 return (error); 2573 } 2574 #endif 2575 if (vp->v_type != VLNK) 2576 error = EINVAL; 2577 else { 2578 aiov.iov_base = buf; 2579 aiov.iov_len = count; 2580 auio.uio_iov = &aiov; 2581 auio.uio_iovcnt = 1; 2582 auio.uio_offset = 0; 2583 auio.uio_rw = UIO_READ; 2584 auio.uio_segflg = bufseg; 2585 auio.uio_td = td; 2586 auio.uio_resid = count; 2587 error = VOP_READLINK(vp, &auio, td->td_ucred); 2588 td->td_retval[0] = count - auio.uio_resid; 2589 } 2590 vput(vp); 2591 return (error); 2592 } 2593 2594 /* 2595 * Common implementation code for chflags() and fchflags(). 2596 */ 2597 static int 2598 setfflags(td, vp, flags) 2599 struct thread *td; 2600 struct vnode *vp; 2601 u_long flags; 2602 { 2603 struct mount *mp; 2604 struct vattr vattr; 2605 int error; 2606 2607 /* We can't support the value matching VNOVAL. */ 2608 if (flags == VNOVAL) 2609 return (EOPNOTSUPP); 2610 2611 /* 2612 * Prevent non-root users from setting flags on devices. When 2613 * a device is reused, users can retain ownership of the device 2614 * if they are allowed to set flags and programs assume that 2615 * chown can't fail when done as root. 2616 */ 2617 if (vp->v_type == VCHR || vp->v_type == VBLK) { 2618 error = priv_check(td, PRIV_VFS_CHFLAGS_DEV); 2619 if (error != 0) 2620 return (error); 2621 } 2622 2623 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2624 return (error); 2625 VATTR_NULL(&vattr); 2626 vattr.va_flags = flags; 2627 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2628 #ifdef MAC 2629 error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags); 2630 if (error == 0) 2631 #endif 2632 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 2633 VOP_UNLOCK(vp, 0); 2634 vn_finished_write(mp); 2635 return (error); 2636 } 2637 2638 /* 2639 * Change flags of a file given a path name. 2640 */ 2641 #ifndef _SYS_SYSPROTO_H_ 2642 struct chflags_args { 2643 const char *path; 2644 u_long flags; 2645 }; 2646 #endif 2647 int 2648 sys_chflags(td, uap) 2649 struct thread *td; 2650 register struct chflags_args /* { 2651 const char *path; 2652 u_long flags; 2653 } */ *uap; 2654 { 2655 2656 return (kern_chflags(td, uap->path, UIO_USERSPACE, uap->flags)); 2657 } 2658 2659 #ifndef _SYS_SYSPROTO_H_ 2660 struct chflagsat_args { 2661 int fd; 2662 const char *path; 2663 u_long flags; 2664 int atflag; 2665 } 2666 #endif 2667 int 2668 sys_chflagsat(struct thread *td, struct chflagsat_args *uap) 2669 { 2670 int fd = uap->fd; 2671 const char *path = uap->path; 2672 u_long flags = uap->flags; 2673 int atflag = uap->atflag; 2674 2675 if (atflag & ~AT_SYMLINK_NOFOLLOW) 2676 return (EINVAL); 2677 2678 return (kern_chflagsat(td, fd, path, UIO_USERSPACE, flags, atflag)); 2679 } 2680 2681 static int 2682 kern_chflags(struct thread *td, const char *path, enum uio_seg pathseg, 2683 u_long flags) 2684 { 2685 2686 return (kern_chflagsat(td, AT_FDCWD, path, pathseg, flags, 0)); 2687 } 2688 2689 /* 2690 * Same as chflags() but doesn't follow symlinks. 2691 */ 2692 int 2693 sys_lchflags(td, uap) 2694 struct thread *td; 2695 register struct lchflags_args /* { 2696 const char *path; 2697 u_long flags; 2698 } */ *uap; 2699 { 2700 2701 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2702 uap->flags, AT_SYMLINK_NOFOLLOW)); 2703 } 2704 2705 static int 2706 kern_chflagsat(struct thread *td, int fd, const char *path, 2707 enum uio_seg pathseg, u_long flags, int atflag) 2708 { 2709 struct nameidata nd; 2710 cap_rights_t rights; 2711 int error, follow; 2712 2713 AUDIT_ARG_FFLAGS(flags); 2714 follow = (atflag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2715 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2716 cap_rights_init(&rights, CAP_FCHFLAGS), td); 2717 if ((error = namei(&nd)) != 0) 2718 return (error); 2719 NDFREE(&nd, NDF_ONLY_PNBUF); 2720 error = setfflags(td, nd.ni_vp, flags); 2721 vrele(nd.ni_vp); 2722 return (error); 2723 } 2724 2725 /* 2726 * Change flags of a file given a file descriptor. 2727 */ 2728 #ifndef _SYS_SYSPROTO_H_ 2729 struct fchflags_args { 2730 int fd; 2731 u_long flags; 2732 }; 2733 #endif 2734 int 2735 sys_fchflags(td, uap) 2736 struct thread *td; 2737 register struct fchflags_args /* { 2738 int fd; 2739 u_long flags; 2740 } */ *uap; 2741 { 2742 struct file *fp; 2743 cap_rights_t rights; 2744 int error; 2745 2746 AUDIT_ARG_FD(uap->fd); 2747 AUDIT_ARG_FFLAGS(uap->flags); 2748 error = getvnode(td->td_proc->p_fd, uap->fd, 2749 cap_rights_init(&rights, CAP_FCHFLAGS), &fp); 2750 if (error != 0) 2751 return (error); 2752 #ifdef AUDIT 2753 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 2754 AUDIT_ARG_VNODE1(fp->f_vnode); 2755 VOP_UNLOCK(fp->f_vnode, 0); 2756 #endif 2757 error = setfflags(td, fp->f_vnode, uap->flags); 2758 fdrop(fp, td); 2759 return (error); 2760 } 2761 2762 /* 2763 * Common implementation code for chmod(), lchmod() and fchmod(). 2764 */ 2765 int 2766 setfmode(td, cred, vp, mode) 2767 struct thread *td; 2768 struct ucred *cred; 2769 struct vnode *vp; 2770 int mode; 2771 { 2772 struct mount *mp; 2773 struct vattr vattr; 2774 int error; 2775 2776 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2777 return (error); 2778 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2779 VATTR_NULL(&vattr); 2780 vattr.va_mode = mode & ALLPERMS; 2781 #ifdef MAC 2782 error = mac_vnode_check_setmode(cred, vp, vattr.va_mode); 2783 if (error == 0) 2784 #endif 2785 error = VOP_SETATTR(vp, &vattr, cred); 2786 VOP_UNLOCK(vp, 0); 2787 vn_finished_write(mp); 2788 return (error); 2789 } 2790 2791 /* 2792 * Change mode of a file given path name. 2793 */ 2794 #ifndef _SYS_SYSPROTO_H_ 2795 struct chmod_args { 2796 char *path; 2797 int mode; 2798 }; 2799 #endif 2800 int 2801 sys_chmod(td, uap) 2802 struct thread *td; 2803 register struct chmod_args /* { 2804 char *path; 2805 int mode; 2806 } */ *uap; 2807 { 2808 2809 return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode)); 2810 } 2811 2812 #ifndef _SYS_SYSPROTO_H_ 2813 struct fchmodat_args { 2814 int dirfd; 2815 char *path; 2816 mode_t mode; 2817 int flag; 2818 } 2819 #endif 2820 int 2821 sys_fchmodat(struct thread *td, struct fchmodat_args *uap) 2822 { 2823 int flag = uap->flag; 2824 int fd = uap->fd; 2825 char *path = uap->path; 2826 mode_t mode = uap->mode; 2827 2828 if (flag & ~AT_SYMLINK_NOFOLLOW) 2829 return (EINVAL); 2830 2831 return (kern_fchmodat(td, fd, path, UIO_USERSPACE, mode, flag)); 2832 } 2833 2834 int 2835 kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode) 2836 { 2837 2838 return (kern_fchmodat(td, AT_FDCWD, path, pathseg, mode, 0)); 2839 } 2840 2841 /* 2842 * Change mode of a file given path name (don't follow links.) 2843 */ 2844 #ifndef _SYS_SYSPROTO_H_ 2845 struct lchmod_args { 2846 char *path; 2847 int mode; 2848 }; 2849 #endif 2850 int 2851 sys_lchmod(td, uap) 2852 struct thread *td; 2853 register struct lchmod_args /* { 2854 char *path; 2855 int mode; 2856 } */ *uap; 2857 { 2858 2859 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2860 uap->mode, AT_SYMLINK_NOFOLLOW)); 2861 } 2862 2863 int 2864 kern_fchmodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2865 mode_t mode, int flag) 2866 { 2867 struct nameidata nd; 2868 cap_rights_t rights; 2869 int error, follow; 2870 2871 AUDIT_ARG_MODE(mode); 2872 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2873 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2874 cap_rights_init(&rights, CAP_FCHMOD), td); 2875 if ((error = namei(&nd)) != 0) 2876 return (error); 2877 NDFREE(&nd, NDF_ONLY_PNBUF); 2878 error = setfmode(td, td->td_ucred, nd.ni_vp, mode); 2879 vrele(nd.ni_vp); 2880 return (error); 2881 } 2882 2883 /* 2884 * Change mode of a file given a file descriptor. 2885 */ 2886 #ifndef _SYS_SYSPROTO_H_ 2887 struct fchmod_args { 2888 int fd; 2889 int mode; 2890 }; 2891 #endif 2892 int 2893 sys_fchmod(struct thread *td, struct fchmod_args *uap) 2894 { 2895 struct file *fp; 2896 cap_rights_t rights; 2897 int error; 2898 2899 AUDIT_ARG_FD(uap->fd); 2900 AUDIT_ARG_MODE(uap->mode); 2901 2902 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHMOD), &fp); 2903 if (error != 0) 2904 return (error); 2905 error = fo_chmod(fp, uap->mode, td->td_ucred, td); 2906 fdrop(fp, td); 2907 return (error); 2908 } 2909 2910 /* 2911 * Common implementation for chown(), lchown(), and fchown() 2912 */ 2913 int 2914 setfown(td, cred, vp, uid, gid) 2915 struct thread *td; 2916 struct ucred *cred; 2917 struct vnode *vp; 2918 uid_t uid; 2919 gid_t gid; 2920 { 2921 struct mount *mp; 2922 struct vattr vattr; 2923 int error; 2924 2925 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2926 return (error); 2927 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2928 VATTR_NULL(&vattr); 2929 vattr.va_uid = uid; 2930 vattr.va_gid = gid; 2931 #ifdef MAC 2932 error = mac_vnode_check_setowner(cred, vp, vattr.va_uid, 2933 vattr.va_gid); 2934 if (error == 0) 2935 #endif 2936 error = VOP_SETATTR(vp, &vattr, cred); 2937 VOP_UNLOCK(vp, 0); 2938 vn_finished_write(mp); 2939 return (error); 2940 } 2941 2942 /* 2943 * Set ownership given a path name. 2944 */ 2945 #ifndef _SYS_SYSPROTO_H_ 2946 struct chown_args { 2947 char *path; 2948 int uid; 2949 int gid; 2950 }; 2951 #endif 2952 int 2953 sys_chown(td, uap) 2954 struct thread *td; 2955 register struct chown_args /* { 2956 char *path; 2957 int uid; 2958 int gid; 2959 } */ *uap; 2960 { 2961 2962 return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid)); 2963 } 2964 2965 #ifndef _SYS_SYSPROTO_H_ 2966 struct fchownat_args { 2967 int fd; 2968 const char * path; 2969 uid_t uid; 2970 gid_t gid; 2971 int flag; 2972 }; 2973 #endif 2974 int 2975 sys_fchownat(struct thread *td, struct fchownat_args *uap) 2976 { 2977 int flag; 2978 2979 flag = uap->flag; 2980 if (flag & ~AT_SYMLINK_NOFOLLOW) 2981 return (EINVAL); 2982 2983 return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid, 2984 uap->gid, uap->flag)); 2985 } 2986 2987 int 2988 kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid, 2989 int gid) 2990 { 2991 2992 return (kern_fchownat(td, AT_FDCWD, path, pathseg, uid, gid, 0)); 2993 } 2994 2995 int 2996 kern_fchownat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2997 int uid, int gid, int flag) 2998 { 2999 struct nameidata nd; 3000 cap_rights_t rights; 3001 int error, follow; 3002 3003 AUDIT_ARG_OWNER(uid, gid); 3004 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 3005 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 3006 cap_rights_init(&rights, CAP_FCHOWN), td); 3007 3008 if ((error = namei(&nd)) != 0) 3009 return (error); 3010 NDFREE(&nd, NDF_ONLY_PNBUF); 3011 error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid); 3012 vrele(nd.ni_vp); 3013 return (error); 3014 } 3015 3016 /* 3017 * Set ownership given a path name, do not cross symlinks. 3018 */ 3019 #ifndef _SYS_SYSPROTO_H_ 3020 struct lchown_args { 3021 char *path; 3022 int uid; 3023 int gid; 3024 }; 3025 #endif 3026 int 3027 sys_lchown(td, uap) 3028 struct thread *td; 3029 register struct lchown_args /* { 3030 char *path; 3031 int uid; 3032 int gid; 3033 } */ *uap; 3034 { 3035 3036 return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid)); 3037 } 3038 3039 int 3040 kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid, 3041 int gid) 3042 { 3043 3044 return (kern_fchownat(td, AT_FDCWD, path, pathseg, uid, gid, 3045 AT_SYMLINK_NOFOLLOW)); 3046 } 3047 3048 /* 3049 * Set ownership given a file descriptor. 3050 */ 3051 #ifndef _SYS_SYSPROTO_H_ 3052 struct fchown_args { 3053 int fd; 3054 int uid; 3055 int gid; 3056 }; 3057 #endif 3058 int 3059 sys_fchown(td, uap) 3060 struct thread *td; 3061 register struct fchown_args /* { 3062 int fd; 3063 int uid; 3064 int gid; 3065 } */ *uap; 3066 { 3067 struct file *fp; 3068 cap_rights_t rights; 3069 int error; 3070 3071 AUDIT_ARG_FD(uap->fd); 3072 AUDIT_ARG_OWNER(uap->uid, uap->gid); 3073 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHOWN), &fp); 3074 if (error != 0) 3075 return (error); 3076 error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td); 3077 fdrop(fp, td); 3078 return (error); 3079 } 3080 3081 /* 3082 * Common implementation code for utimes(), lutimes(), and futimes(). 3083 */ 3084 static int 3085 getutimes(usrtvp, tvpseg, tsp) 3086 const struct timeval *usrtvp; 3087 enum uio_seg tvpseg; 3088 struct timespec *tsp; 3089 { 3090 struct timeval tv[2]; 3091 const struct timeval *tvp; 3092 int error; 3093 3094 if (usrtvp == NULL) { 3095 vfs_timestamp(&tsp[0]); 3096 tsp[1] = tsp[0]; 3097 } else { 3098 if (tvpseg == UIO_SYSSPACE) { 3099 tvp = usrtvp; 3100 } else { 3101 if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0) 3102 return (error); 3103 tvp = tv; 3104 } 3105 3106 if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 || 3107 tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000) 3108 return (EINVAL); 3109 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 3110 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 3111 } 3112 return (0); 3113 } 3114 3115 /* 3116 * Common implementation code for utimes(), lutimes(), and futimes(). 3117 */ 3118 static int 3119 setutimes(td, vp, ts, numtimes, nullflag) 3120 struct thread *td; 3121 struct vnode *vp; 3122 const struct timespec *ts; 3123 int numtimes; 3124 int nullflag; 3125 { 3126 struct mount *mp; 3127 struct vattr vattr; 3128 int error, setbirthtime; 3129 3130 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 3131 return (error); 3132 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3133 setbirthtime = 0; 3134 if (numtimes < 3 && !VOP_GETATTR(vp, &vattr, td->td_ucred) && 3135 timespeccmp(&ts[1], &vattr.va_birthtime, < )) 3136 setbirthtime = 1; 3137 VATTR_NULL(&vattr); 3138 vattr.va_atime = ts[0]; 3139 vattr.va_mtime = ts[1]; 3140 if (setbirthtime) 3141 vattr.va_birthtime = ts[1]; 3142 if (numtimes > 2) 3143 vattr.va_birthtime = ts[2]; 3144 if (nullflag) 3145 vattr.va_vaflags |= VA_UTIMES_NULL; 3146 #ifdef MAC 3147 error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime, 3148 vattr.va_mtime); 3149 #endif 3150 if (error == 0) 3151 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3152 VOP_UNLOCK(vp, 0); 3153 vn_finished_write(mp); 3154 return (error); 3155 } 3156 3157 /* 3158 * Set the access and modification times of a file. 3159 */ 3160 #ifndef _SYS_SYSPROTO_H_ 3161 struct utimes_args { 3162 char *path; 3163 struct timeval *tptr; 3164 }; 3165 #endif 3166 int 3167 sys_utimes(td, uap) 3168 struct thread *td; 3169 register struct utimes_args /* { 3170 char *path; 3171 struct timeval *tptr; 3172 } */ *uap; 3173 { 3174 3175 return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr, 3176 UIO_USERSPACE)); 3177 } 3178 3179 #ifndef _SYS_SYSPROTO_H_ 3180 struct futimesat_args { 3181 int fd; 3182 const char * path; 3183 const struct timeval * times; 3184 }; 3185 #endif 3186 int 3187 sys_futimesat(struct thread *td, struct futimesat_args *uap) 3188 { 3189 3190 return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE, 3191 uap->times, UIO_USERSPACE)); 3192 } 3193 3194 int 3195 kern_utimes(struct thread *td, char *path, enum uio_seg pathseg, 3196 struct timeval *tptr, enum uio_seg tptrseg) 3197 { 3198 3199 return (kern_utimesat(td, AT_FDCWD, path, pathseg, tptr, tptrseg)); 3200 } 3201 3202 int 3203 kern_utimesat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 3204 struct timeval *tptr, enum uio_seg tptrseg) 3205 { 3206 struct nameidata nd; 3207 struct timespec ts[2]; 3208 cap_rights_t rights; 3209 int error; 3210 3211 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3212 return (error); 3213 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 3214 cap_rights_init(&rights, CAP_FUTIMES), td); 3215 3216 if ((error = namei(&nd)) != 0) 3217 return (error); 3218 NDFREE(&nd, NDF_ONLY_PNBUF); 3219 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3220 vrele(nd.ni_vp); 3221 return (error); 3222 } 3223 3224 /* 3225 * Set the access and modification times of a file. 3226 */ 3227 #ifndef _SYS_SYSPROTO_H_ 3228 struct lutimes_args { 3229 char *path; 3230 struct timeval *tptr; 3231 }; 3232 #endif 3233 int 3234 sys_lutimes(td, uap) 3235 struct thread *td; 3236 register struct lutimes_args /* { 3237 char *path; 3238 struct timeval *tptr; 3239 } */ *uap; 3240 { 3241 3242 return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr, 3243 UIO_USERSPACE)); 3244 } 3245 3246 int 3247 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg, 3248 struct timeval *tptr, enum uio_seg tptrseg) 3249 { 3250 struct timespec ts[2]; 3251 struct nameidata nd; 3252 int error; 3253 3254 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3255 return (error); 3256 NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, pathseg, path, td); 3257 if ((error = namei(&nd)) != 0) 3258 return (error); 3259 NDFREE(&nd, NDF_ONLY_PNBUF); 3260 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3261 vrele(nd.ni_vp); 3262 return (error); 3263 } 3264 3265 /* 3266 * Set the access and modification times of a file. 3267 */ 3268 #ifndef _SYS_SYSPROTO_H_ 3269 struct futimes_args { 3270 int fd; 3271 struct timeval *tptr; 3272 }; 3273 #endif 3274 int 3275 sys_futimes(td, uap) 3276 struct thread *td; 3277 register struct futimes_args /* { 3278 int fd; 3279 struct timeval *tptr; 3280 } */ *uap; 3281 { 3282 3283 return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE)); 3284 } 3285 3286 int 3287 kern_futimes(struct thread *td, int fd, struct timeval *tptr, 3288 enum uio_seg tptrseg) 3289 { 3290 struct timespec ts[2]; 3291 struct file *fp; 3292 cap_rights_t rights; 3293 int error; 3294 3295 AUDIT_ARG_FD(fd); 3296 error = getutimes(tptr, tptrseg, ts); 3297 if (error != 0) 3298 return (error); 3299 error = getvnode(td->td_proc->p_fd, fd, 3300 cap_rights_init(&rights, CAP_FUTIMES), &fp); 3301 if (error != 0) 3302 return (error); 3303 #ifdef AUDIT 3304 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3305 AUDIT_ARG_VNODE1(fp->f_vnode); 3306 VOP_UNLOCK(fp->f_vnode, 0); 3307 #endif 3308 error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL); 3309 fdrop(fp, td); 3310 return (error); 3311 } 3312 3313 /* 3314 * Truncate a file given its path name. 3315 */ 3316 #ifndef _SYS_SYSPROTO_H_ 3317 struct truncate_args { 3318 char *path; 3319 int pad; 3320 off_t length; 3321 }; 3322 #endif 3323 int 3324 sys_truncate(td, uap) 3325 struct thread *td; 3326 register struct truncate_args /* { 3327 char *path; 3328 int pad; 3329 off_t length; 3330 } */ *uap; 3331 { 3332 3333 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3334 } 3335 3336 int 3337 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length) 3338 { 3339 struct mount *mp; 3340 struct vnode *vp; 3341 void *rl_cookie; 3342 struct vattr vattr; 3343 struct nameidata nd; 3344 int error; 3345 3346 if (length < 0) 3347 return(EINVAL); 3348 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, td); 3349 if ((error = namei(&nd)) != 0) 3350 return (error); 3351 vp = nd.ni_vp; 3352 rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); 3353 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { 3354 vn_rangelock_unlock(vp, rl_cookie); 3355 vrele(vp); 3356 return (error); 3357 } 3358 NDFREE(&nd, NDF_ONLY_PNBUF); 3359 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3360 if (vp->v_type == VDIR) 3361 error = EISDIR; 3362 #ifdef MAC 3363 else if ((error = mac_vnode_check_write(td->td_ucred, NOCRED, vp))) { 3364 } 3365 #endif 3366 else if ((error = vn_writechk(vp)) == 0 && 3367 (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) { 3368 VATTR_NULL(&vattr); 3369 vattr.va_size = length; 3370 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3371 } 3372 VOP_UNLOCK(vp, 0); 3373 vn_finished_write(mp); 3374 vn_rangelock_unlock(vp, rl_cookie); 3375 vrele(vp); 3376 return (error); 3377 } 3378 3379 #if defined(COMPAT_43) 3380 /* 3381 * Truncate a file given its path name. 3382 */ 3383 #ifndef _SYS_SYSPROTO_H_ 3384 struct otruncate_args { 3385 char *path; 3386 long length; 3387 }; 3388 #endif 3389 int 3390 otruncate(td, uap) 3391 struct thread *td; 3392 register struct otruncate_args /* { 3393 char *path; 3394 long length; 3395 } */ *uap; 3396 { 3397 struct truncate_args /* { 3398 char *path; 3399 int pad; 3400 off_t length; 3401 } */ nuap; 3402 3403 nuap.path = uap->path; 3404 nuap.length = uap->length; 3405 return (sys_truncate(td, &nuap)); 3406 } 3407 #endif /* COMPAT_43 */ 3408 3409 /* Versions with the pad argument */ 3410 int 3411 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap) 3412 { 3413 struct truncate_args ouap; 3414 3415 ouap.path = uap->path; 3416 ouap.length = uap->length; 3417 return (sys_truncate(td, &ouap)); 3418 } 3419 3420 int 3421 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap) 3422 { 3423 struct ftruncate_args ouap; 3424 3425 ouap.fd = uap->fd; 3426 ouap.length = uap->length; 3427 return (sys_ftruncate(td, &ouap)); 3428 } 3429 3430 /* 3431 * Sync an open file. 3432 */ 3433 #ifndef _SYS_SYSPROTO_H_ 3434 struct fsync_args { 3435 int fd; 3436 }; 3437 #endif 3438 int 3439 sys_fsync(td, uap) 3440 struct thread *td; 3441 struct fsync_args /* { 3442 int fd; 3443 } */ *uap; 3444 { 3445 struct vnode *vp; 3446 struct mount *mp; 3447 struct file *fp; 3448 cap_rights_t rights; 3449 int error, lock_flags; 3450 3451 AUDIT_ARG_FD(uap->fd); 3452 error = getvnode(td->td_proc->p_fd, uap->fd, 3453 cap_rights_init(&rights, CAP_FSYNC), &fp); 3454 if (error != 0) 3455 return (error); 3456 vp = fp->f_vnode; 3457 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 3458 if (error != 0) 3459 goto drop; 3460 if (MNT_SHARED_WRITES(mp) || 3461 ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) { 3462 lock_flags = LK_SHARED; 3463 } else { 3464 lock_flags = LK_EXCLUSIVE; 3465 } 3466 vn_lock(vp, lock_flags | LK_RETRY); 3467 AUDIT_ARG_VNODE1(vp); 3468 if (vp->v_object != NULL) { 3469 VM_OBJECT_WLOCK(vp->v_object); 3470 vm_object_page_clean(vp->v_object, 0, 0, 0); 3471 VM_OBJECT_WUNLOCK(vp->v_object); 3472 } 3473 error = VOP_FSYNC(vp, MNT_WAIT, td); 3474 3475 VOP_UNLOCK(vp, 0); 3476 vn_finished_write(mp); 3477 drop: 3478 fdrop(fp, td); 3479 return (error); 3480 } 3481 3482 /* 3483 * Rename files. Source and destination must either both be directories, or 3484 * both not be directories. If target is a directory, it must be empty. 3485 */ 3486 #ifndef _SYS_SYSPROTO_H_ 3487 struct rename_args { 3488 char *from; 3489 char *to; 3490 }; 3491 #endif 3492 int 3493 sys_rename(td, uap) 3494 struct thread *td; 3495 register struct rename_args /* { 3496 char *from; 3497 char *to; 3498 } */ *uap; 3499 { 3500 3501 return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE)); 3502 } 3503 3504 #ifndef _SYS_SYSPROTO_H_ 3505 struct renameat_args { 3506 int oldfd; 3507 char *old; 3508 int newfd; 3509 char *new; 3510 }; 3511 #endif 3512 int 3513 sys_renameat(struct thread *td, struct renameat_args *uap) 3514 { 3515 3516 return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new, 3517 UIO_USERSPACE)); 3518 } 3519 3520 int 3521 kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg) 3522 { 3523 3524 return (kern_renameat(td, AT_FDCWD, from, AT_FDCWD, to, pathseg)); 3525 } 3526 3527 int 3528 kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new, 3529 enum uio_seg pathseg) 3530 { 3531 struct mount *mp = NULL; 3532 struct vnode *tvp, *fvp, *tdvp; 3533 struct nameidata fromnd, tond; 3534 cap_rights_t rights; 3535 int error; 3536 3537 again: 3538 bwillwrite(); 3539 #ifdef MAC 3540 NDINIT_ATRIGHTS(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | 3541 AUDITVNODE1, pathseg, old, oldfd, 3542 cap_rights_init(&rights, CAP_RENAMEAT), td); 3543 #else 3544 NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | AUDITVNODE1, 3545 pathseg, old, oldfd, cap_rights_init(&rights, CAP_RENAMEAT), td); 3546 #endif 3547 3548 if ((error = namei(&fromnd)) != 0) 3549 return (error); 3550 #ifdef MAC 3551 error = mac_vnode_check_rename_from(td->td_ucred, fromnd.ni_dvp, 3552 fromnd.ni_vp, &fromnd.ni_cnd); 3553 VOP_UNLOCK(fromnd.ni_dvp, 0); 3554 if (fromnd.ni_dvp != fromnd.ni_vp) 3555 VOP_UNLOCK(fromnd.ni_vp, 0); 3556 #endif 3557 fvp = fromnd.ni_vp; 3558 NDINIT_ATRIGHTS(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | 3559 SAVESTART | AUDITVNODE2, pathseg, new, newfd, 3560 cap_rights_init(&rights, CAP_LINKAT), td); 3561 if (fromnd.ni_vp->v_type == VDIR) 3562 tond.ni_cnd.cn_flags |= WILLBEDIR; 3563 if ((error = namei(&tond)) != 0) { 3564 /* Translate error code for rename("dir1", "dir2/."). */ 3565 if (error == EISDIR && fvp->v_type == VDIR) 3566 error = EINVAL; 3567 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3568 vrele(fromnd.ni_dvp); 3569 vrele(fvp); 3570 goto out1; 3571 } 3572 tdvp = tond.ni_dvp; 3573 tvp = tond.ni_vp; 3574 error = vn_start_write(fvp, &mp, V_NOWAIT); 3575 if (error != 0) { 3576 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3577 NDFREE(&tond, NDF_ONLY_PNBUF); 3578 if (tvp != NULL) 3579 vput(tvp); 3580 if (tdvp == tvp) 3581 vrele(tdvp); 3582 else 3583 vput(tdvp); 3584 vrele(fromnd.ni_dvp); 3585 vrele(fvp); 3586 vrele(tond.ni_startdir); 3587 if (fromnd.ni_startdir != NULL) 3588 vrele(fromnd.ni_startdir); 3589 error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH); 3590 if (error != 0) 3591 return (error); 3592 goto again; 3593 } 3594 if (tvp != NULL) { 3595 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3596 error = ENOTDIR; 3597 goto out; 3598 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3599 error = EISDIR; 3600 goto out; 3601 } 3602 #ifdef CAPABILITIES 3603 if (newfd != AT_FDCWD) { 3604 /* 3605 * If the target already exists we require CAP_UNLINKAT 3606 * from 'newfd'. 3607 */ 3608 error = cap_check(&tond.ni_filecaps.fc_rights, 3609 cap_rights_init(&rights, CAP_UNLINKAT)); 3610 if (error != 0) 3611 goto out; 3612 } 3613 #endif 3614 } 3615 if (fvp == tdvp) { 3616 error = EINVAL; 3617 goto out; 3618 } 3619 /* 3620 * If the source is the same as the destination (that is, if they 3621 * are links to the same vnode), then there is nothing to do. 3622 */ 3623 if (fvp == tvp) 3624 error = -1; 3625 #ifdef MAC 3626 else 3627 error = mac_vnode_check_rename_to(td->td_ucred, tdvp, 3628 tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd); 3629 #endif 3630 out: 3631 if (error == 0) { 3632 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3633 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); 3634 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3635 NDFREE(&tond, NDF_ONLY_PNBUF); 3636 } else { 3637 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3638 NDFREE(&tond, NDF_ONLY_PNBUF); 3639 if (tvp != NULL) 3640 vput(tvp); 3641 if (tdvp == tvp) 3642 vrele(tdvp); 3643 else 3644 vput(tdvp); 3645 vrele(fromnd.ni_dvp); 3646 vrele(fvp); 3647 } 3648 vrele(tond.ni_startdir); 3649 vn_finished_write(mp); 3650 out1: 3651 if (fromnd.ni_startdir) 3652 vrele(fromnd.ni_startdir); 3653 if (error == -1) 3654 return (0); 3655 return (error); 3656 } 3657 3658 /* 3659 * Make a directory file. 3660 */ 3661 #ifndef _SYS_SYSPROTO_H_ 3662 struct mkdir_args { 3663 char *path; 3664 int mode; 3665 }; 3666 #endif 3667 int 3668 sys_mkdir(td, uap) 3669 struct thread *td; 3670 register struct mkdir_args /* { 3671 char *path; 3672 int mode; 3673 } */ *uap; 3674 { 3675 3676 return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode)); 3677 } 3678 3679 #ifndef _SYS_SYSPROTO_H_ 3680 struct mkdirat_args { 3681 int fd; 3682 char *path; 3683 mode_t mode; 3684 }; 3685 #endif 3686 int 3687 sys_mkdirat(struct thread *td, struct mkdirat_args *uap) 3688 { 3689 3690 return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode)); 3691 } 3692 3693 int 3694 kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode) 3695 { 3696 3697 return (kern_mkdirat(td, AT_FDCWD, path, segflg, mode)); 3698 } 3699 3700 int 3701 kern_mkdirat(struct thread *td, int fd, char *path, enum uio_seg segflg, 3702 int mode) 3703 { 3704 struct mount *mp; 3705 struct vnode *vp; 3706 struct vattr vattr; 3707 struct nameidata nd; 3708 cap_rights_t rights; 3709 int error; 3710 3711 AUDIT_ARG_MODE(mode); 3712 restart: 3713 bwillwrite(); 3714 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1, 3715 segflg, path, fd, cap_rights_init(&rights, CAP_MKDIRAT), td); 3716 nd.ni_cnd.cn_flags |= WILLBEDIR; 3717 if ((error = namei(&nd)) != 0) 3718 return (error); 3719 vp = nd.ni_vp; 3720 if (vp != NULL) { 3721 NDFREE(&nd, NDF_ONLY_PNBUF); 3722 /* 3723 * XXX namei called with LOCKPARENT but not LOCKLEAF has 3724 * the strange behaviour of leaving the vnode unlocked 3725 * if the target is the same vnode as the parent. 3726 */ 3727 if (vp == nd.ni_dvp) 3728 vrele(nd.ni_dvp); 3729 else 3730 vput(nd.ni_dvp); 3731 vrele(vp); 3732 return (EEXIST); 3733 } 3734 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3735 NDFREE(&nd, NDF_ONLY_PNBUF); 3736 vput(nd.ni_dvp); 3737 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3738 return (error); 3739 goto restart; 3740 } 3741 VATTR_NULL(&vattr); 3742 vattr.va_type = VDIR; 3743 vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask; 3744 #ifdef MAC 3745 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 3746 &vattr); 3747 if (error != 0) 3748 goto out; 3749 #endif 3750 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 3751 #ifdef MAC 3752 out: 3753 #endif 3754 NDFREE(&nd, NDF_ONLY_PNBUF); 3755 vput(nd.ni_dvp); 3756 if (error == 0) 3757 vput(nd.ni_vp); 3758 vn_finished_write(mp); 3759 return (error); 3760 } 3761 3762 /* 3763 * Remove a directory file. 3764 */ 3765 #ifndef _SYS_SYSPROTO_H_ 3766 struct rmdir_args { 3767 char *path; 3768 }; 3769 #endif 3770 int 3771 sys_rmdir(td, uap) 3772 struct thread *td; 3773 struct rmdir_args /* { 3774 char *path; 3775 } */ *uap; 3776 { 3777 3778 return (kern_rmdir(td, uap->path, UIO_USERSPACE)); 3779 } 3780 3781 int 3782 kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg) 3783 { 3784 3785 return (kern_rmdirat(td, AT_FDCWD, path, pathseg)); 3786 } 3787 3788 int 3789 kern_rmdirat(struct thread *td, int fd, char *path, enum uio_seg pathseg) 3790 { 3791 struct mount *mp; 3792 struct vnode *vp; 3793 struct nameidata nd; 3794 cap_rights_t rights; 3795 int error; 3796 3797 restart: 3798 bwillwrite(); 3799 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 3800 pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td); 3801 if ((error = namei(&nd)) != 0) 3802 return (error); 3803 vp = nd.ni_vp; 3804 if (vp->v_type != VDIR) { 3805 error = ENOTDIR; 3806 goto out; 3807 } 3808 /* 3809 * No rmdir "." please. 3810 */ 3811 if (nd.ni_dvp == vp) { 3812 error = EINVAL; 3813 goto out; 3814 } 3815 /* 3816 * The root of a mounted filesystem cannot be deleted. 3817 */ 3818 if (vp->v_vflag & VV_ROOT) { 3819 error = EBUSY; 3820 goto out; 3821 } 3822 #ifdef MAC 3823 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 3824 &nd.ni_cnd); 3825 if (error != 0) 3826 goto out; 3827 #endif 3828 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3829 NDFREE(&nd, NDF_ONLY_PNBUF); 3830 vput(vp); 3831 if (nd.ni_dvp == vp) 3832 vrele(nd.ni_dvp); 3833 else 3834 vput(nd.ni_dvp); 3835 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3836 return (error); 3837 goto restart; 3838 } 3839 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 3840 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 3841 vn_finished_write(mp); 3842 out: 3843 NDFREE(&nd, NDF_ONLY_PNBUF); 3844 vput(vp); 3845 if (nd.ni_dvp == vp) 3846 vrele(nd.ni_dvp); 3847 else 3848 vput(nd.ni_dvp); 3849 return (error); 3850 } 3851 3852 #ifdef COMPAT_43 3853 /* 3854 * Read a block of directory entries in a filesystem independent format. 3855 */ 3856 #ifndef _SYS_SYSPROTO_H_ 3857 struct ogetdirentries_args { 3858 int fd; 3859 char *buf; 3860 u_int count; 3861 long *basep; 3862 }; 3863 #endif 3864 int 3865 ogetdirentries(struct thread *td, struct ogetdirentries_args *uap) 3866 { 3867 long loff; 3868 int error; 3869 3870 error = kern_ogetdirentries(td, uap, &loff); 3871 if (error == 0) 3872 error = copyout(&loff, uap->basep, sizeof(long)); 3873 return (error); 3874 } 3875 3876 int 3877 kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap, 3878 long *ploff) 3879 { 3880 struct vnode *vp; 3881 struct file *fp; 3882 struct uio auio, kuio; 3883 struct iovec aiov, kiov; 3884 struct dirent *dp, *edp; 3885 cap_rights_t rights; 3886 caddr_t dirbuf; 3887 int error, eofflag, readcnt; 3888 long loff; 3889 off_t foffset; 3890 3891 /* XXX arbitrary sanity limit on `count'. */ 3892 if (uap->count > 64 * 1024) 3893 return (EINVAL); 3894 error = getvnode(td->td_proc->p_fd, uap->fd, 3895 cap_rights_init(&rights, CAP_READ), &fp); 3896 if (error != 0) 3897 return (error); 3898 if ((fp->f_flag & FREAD) == 0) { 3899 fdrop(fp, td); 3900 return (EBADF); 3901 } 3902 vp = fp->f_vnode; 3903 foffset = foffset_lock(fp, 0); 3904 unionread: 3905 if (vp->v_type != VDIR) { 3906 foffset_unlock(fp, foffset, 0); 3907 fdrop(fp, td); 3908 return (EINVAL); 3909 } 3910 aiov.iov_base = uap->buf; 3911 aiov.iov_len = uap->count; 3912 auio.uio_iov = &aiov; 3913 auio.uio_iovcnt = 1; 3914 auio.uio_rw = UIO_READ; 3915 auio.uio_segflg = UIO_USERSPACE; 3916 auio.uio_td = td; 3917 auio.uio_resid = uap->count; 3918 vn_lock(vp, LK_SHARED | LK_RETRY); 3919 loff = auio.uio_offset = foffset; 3920 #ifdef MAC 3921 error = mac_vnode_check_readdir(td->td_ucred, vp); 3922 if (error != 0) { 3923 VOP_UNLOCK(vp, 0); 3924 foffset_unlock(fp, foffset, FOF_NOUPDATE); 3925 fdrop(fp, td); 3926 return (error); 3927 } 3928 #endif 3929 # if (BYTE_ORDER != LITTLE_ENDIAN) 3930 if (vp->v_mount->mnt_maxsymlinklen <= 0) { 3931 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, 3932 NULL, NULL); 3933 foffset = auio.uio_offset; 3934 } else 3935 # endif 3936 { 3937 kuio = auio; 3938 kuio.uio_iov = &kiov; 3939 kuio.uio_segflg = UIO_SYSSPACE; 3940 kiov.iov_len = uap->count; 3941 dirbuf = malloc(uap->count, M_TEMP, M_WAITOK); 3942 kiov.iov_base = dirbuf; 3943 error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag, 3944 NULL, NULL); 3945 foffset = kuio.uio_offset; 3946 if (error == 0) { 3947 readcnt = uap->count - kuio.uio_resid; 3948 edp = (struct dirent *)&dirbuf[readcnt]; 3949 for (dp = (struct dirent *)dirbuf; dp < edp; ) { 3950 # if (BYTE_ORDER == LITTLE_ENDIAN) 3951 /* 3952 * The expected low byte of 3953 * dp->d_namlen is our dp->d_type. 3954 * The high MBZ byte of dp->d_namlen 3955 * is our dp->d_namlen. 3956 */ 3957 dp->d_type = dp->d_namlen; 3958 dp->d_namlen = 0; 3959 # else 3960 /* 3961 * The dp->d_type is the high byte 3962 * of the expected dp->d_namlen, 3963 * so must be zero'ed. 3964 */ 3965 dp->d_type = 0; 3966 # endif 3967 if (dp->d_reclen > 0) { 3968 dp = (struct dirent *) 3969 ((char *)dp + dp->d_reclen); 3970 } else { 3971 error = EIO; 3972 break; 3973 } 3974 } 3975 if (dp >= edp) 3976 error = uiomove(dirbuf, readcnt, &auio); 3977 } 3978 free(dirbuf, M_TEMP); 3979 } 3980 if (error != 0) { 3981 VOP_UNLOCK(vp, 0); 3982 foffset_unlock(fp, foffset, 0); 3983 fdrop(fp, td); 3984 return (error); 3985 } 3986 if (uap->count == auio.uio_resid && 3987 (vp->v_vflag & VV_ROOT) && 3988 (vp->v_mount->mnt_flag & MNT_UNION)) { 3989 struct vnode *tvp = vp; 3990 vp = vp->v_mount->mnt_vnodecovered; 3991 VREF(vp); 3992 fp->f_vnode = vp; 3993 fp->f_data = vp; 3994 foffset = 0; 3995 vput(tvp); 3996 goto unionread; 3997 } 3998 VOP_UNLOCK(vp, 0); 3999 foffset_unlock(fp, foffset, 0); 4000 fdrop(fp, td); 4001 td->td_retval[0] = uap->count - auio.uio_resid; 4002 if (error == 0) 4003 *ploff = loff; 4004 return (error); 4005 } 4006 #endif /* COMPAT_43 */ 4007 4008 /* 4009 * Read a block of directory entries in a filesystem independent format. 4010 */ 4011 #ifndef _SYS_SYSPROTO_H_ 4012 struct getdirentries_args { 4013 int fd; 4014 char *buf; 4015 u_int count; 4016 long *basep; 4017 }; 4018 #endif 4019 int 4020 sys_getdirentries(td, uap) 4021 struct thread *td; 4022 register struct getdirentries_args /* { 4023 int fd; 4024 char *buf; 4025 u_int count; 4026 long *basep; 4027 } */ *uap; 4028 { 4029 long base; 4030 int error; 4031 4032 error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base, 4033 NULL, UIO_USERSPACE); 4034 if (error != 0) 4035 return (error); 4036 if (uap->basep != NULL) 4037 error = copyout(&base, uap->basep, sizeof(long)); 4038 return (error); 4039 } 4040 4041 int 4042 kern_getdirentries(struct thread *td, int fd, char *buf, u_int count, 4043 long *basep, ssize_t *residp, enum uio_seg bufseg) 4044 { 4045 struct vnode *vp; 4046 struct file *fp; 4047 struct uio auio; 4048 struct iovec aiov; 4049 cap_rights_t rights; 4050 long loff; 4051 int error, eofflag; 4052 off_t foffset; 4053 4054 AUDIT_ARG_FD(fd); 4055 if (count > IOSIZE_MAX) 4056 return (EINVAL); 4057 auio.uio_resid = count; 4058 error = getvnode(td->td_proc->p_fd, fd, 4059 cap_rights_init(&rights, CAP_READ), &fp); 4060 if (error != 0) 4061 return (error); 4062 if ((fp->f_flag & FREAD) == 0) { 4063 fdrop(fp, td); 4064 return (EBADF); 4065 } 4066 vp = fp->f_vnode; 4067 foffset = foffset_lock(fp, 0); 4068 unionread: 4069 if (vp->v_type != VDIR) { 4070 error = EINVAL; 4071 goto fail; 4072 } 4073 aiov.iov_base = buf; 4074 aiov.iov_len = count; 4075 auio.uio_iov = &aiov; 4076 auio.uio_iovcnt = 1; 4077 auio.uio_rw = UIO_READ; 4078 auio.uio_segflg = bufseg; 4079 auio.uio_td = td; 4080 vn_lock(vp, LK_SHARED | LK_RETRY); 4081 AUDIT_ARG_VNODE1(vp); 4082 loff = auio.uio_offset = foffset; 4083 #ifdef MAC 4084 error = mac_vnode_check_readdir(td->td_ucred, vp); 4085 if (error == 0) 4086 #endif 4087 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, 4088 NULL); 4089 foffset = auio.uio_offset; 4090 if (error != 0) { 4091 VOP_UNLOCK(vp, 0); 4092 goto fail; 4093 } 4094 if (count == auio.uio_resid && 4095 (vp->v_vflag & VV_ROOT) && 4096 (vp->v_mount->mnt_flag & MNT_UNION)) { 4097 struct vnode *tvp = vp; 4098 4099 vp = vp->v_mount->mnt_vnodecovered; 4100 VREF(vp); 4101 fp->f_vnode = vp; 4102 fp->f_data = vp; 4103 foffset = 0; 4104 vput(tvp); 4105 goto unionread; 4106 } 4107 VOP_UNLOCK(vp, 0); 4108 *basep = loff; 4109 if (residp != NULL) 4110 *residp = auio.uio_resid; 4111 td->td_retval[0] = count - auio.uio_resid; 4112 fail: 4113 foffset_unlock(fp, foffset, 0); 4114 fdrop(fp, td); 4115 return (error); 4116 } 4117 4118 #ifndef _SYS_SYSPROTO_H_ 4119 struct getdents_args { 4120 int fd; 4121 char *buf; 4122 size_t count; 4123 }; 4124 #endif 4125 int 4126 sys_getdents(td, uap) 4127 struct thread *td; 4128 register struct getdents_args /* { 4129 int fd; 4130 char *buf; 4131 u_int count; 4132 } */ *uap; 4133 { 4134 struct getdirentries_args ap; 4135 4136 ap.fd = uap->fd; 4137 ap.buf = uap->buf; 4138 ap.count = uap->count; 4139 ap.basep = NULL; 4140 return (sys_getdirentries(td, &ap)); 4141 } 4142 4143 /* 4144 * Set the mode mask for creation of filesystem nodes. 4145 */ 4146 #ifndef _SYS_SYSPROTO_H_ 4147 struct umask_args { 4148 int newmask; 4149 }; 4150 #endif 4151 int 4152 sys_umask(td, uap) 4153 struct thread *td; 4154 struct umask_args /* { 4155 int newmask; 4156 } */ *uap; 4157 { 4158 register struct filedesc *fdp; 4159 4160 FILEDESC_XLOCK(td->td_proc->p_fd); 4161 fdp = td->td_proc->p_fd; 4162 td->td_retval[0] = fdp->fd_cmask; 4163 fdp->fd_cmask = uap->newmask & ALLPERMS; 4164 FILEDESC_XUNLOCK(td->td_proc->p_fd); 4165 return (0); 4166 } 4167 4168 /* 4169 * Void all references to file by ripping underlying filesystem away from 4170 * vnode. 4171 */ 4172 #ifndef _SYS_SYSPROTO_H_ 4173 struct revoke_args { 4174 char *path; 4175 }; 4176 #endif 4177 int 4178 sys_revoke(td, uap) 4179 struct thread *td; 4180 register struct revoke_args /* { 4181 char *path; 4182 } */ *uap; 4183 { 4184 struct vnode *vp; 4185 struct vattr vattr; 4186 struct nameidata nd; 4187 int error; 4188 4189 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4190 uap->path, td); 4191 if ((error = namei(&nd)) != 0) 4192 return (error); 4193 vp = nd.ni_vp; 4194 NDFREE(&nd, NDF_ONLY_PNBUF); 4195 if (vp->v_type != VCHR || vp->v_rdev == NULL) { 4196 error = EINVAL; 4197 goto out; 4198 } 4199 #ifdef MAC 4200 error = mac_vnode_check_revoke(td->td_ucred, vp); 4201 if (error != 0) 4202 goto out; 4203 #endif 4204 error = VOP_GETATTR(vp, &vattr, td->td_ucred); 4205 if (error != 0) 4206 goto out; 4207 if (td->td_ucred->cr_uid != vattr.va_uid) { 4208 error = priv_check(td, PRIV_VFS_ADMIN); 4209 if (error != 0) 4210 goto out; 4211 } 4212 if (vcount(vp) > 1) 4213 VOP_REVOKE(vp, REVOKEALL); 4214 out: 4215 vput(vp); 4216 return (error); 4217 } 4218 4219 /* 4220 * Convert a user file descriptor to a kernel file entry and check that, if it 4221 * is a capability, the correct rights are present. A reference on the file 4222 * entry is held upon returning. 4223 */ 4224 int 4225 getvnode(struct filedesc *fdp, int fd, cap_rights_t *rightsp, struct file **fpp) 4226 { 4227 struct file *fp; 4228 int error; 4229 4230 error = fget_unlocked(fdp, fd, rightsp, 0, &fp, NULL); 4231 if (error != 0) 4232 return (error); 4233 4234 /* 4235 * The file could be not of the vnode type, or it may be not 4236 * yet fully initialized, in which case the f_vnode pointer 4237 * may be set, but f_ops is still badfileops. E.g., 4238 * devfs_open() transiently create such situation to 4239 * facilitate csw d_fdopen(). 4240 * 4241 * Dupfdopen() handling in kern_openat() installs the 4242 * half-baked file into the process descriptor table, allowing 4243 * other thread to dereference it. Guard against the race by 4244 * checking f_ops. 4245 */ 4246 if (fp->f_vnode == NULL || fp->f_ops == &badfileops) { 4247 fdrop(fp, curthread); 4248 return (EINVAL); 4249 } 4250 *fpp = fp; 4251 return (0); 4252 } 4253 4254 4255 /* 4256 * Get an (NFS) file handle. 4257 */ 4258 #ifndef _SYS_SYSPROTO_H_ 4259 struct lgetfh_args { 4260 char *fname; 4261 fhandle_t *fhp; 4262 }; 4263 #endif 4264 int 4265 sys_lgetfh(td, uap) 4266 struct thread *td; 4267 register struct lgetfh_args *uap; 4268 { 4269 struct nameidata nd; 4270 fhandle_t fh; 4271 register struct vnode *vp; 4272 int error; 4273 4274 error = priv_check(td, PRIV_VFS_GETFH); 4275 if (error != 0) 4276 return (error); 4277 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4278 uap->fname, td); 4279 error = namei(&nd); 4280 if (error != 0) 4281 return (error); 4282 NDFREE(&nd, NDF_ONLY_PNBUF); 4283 vp = nd.ni_vp; 4284 bzero(&fh, sizeof(fh)); 4285 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4286 error = VOP_VPTOFH(vp, &fh.fh_fid); 4287 vput(vp); 4288 if (error == 0) 4289 error = copyout(&fh, uap->fhp, sizeof (fh)); 4290 return (error); 4291 } 4292 4293 #ifndef _SYS_SYSPROTO_H_ 4294 struct getfh_args { 4295 char *fname; 4296 fhandle_t *fhp; 4297 }; 4298 #endif 4299 int 4300 sys_getfh(td, uap) 4301 struct thread *td; 4302 register struct getfh_args *uap; 4303 { 4304 struct nameidata nd; 4305 fhandle_t fh; 4306 register struct vnode *vp; 4307 int error; 4308 4309 error = priv_check(td, PRIV_VFS_GETFH); 4310 if (error != 0) 4311 return (error); 4312 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4313 uap->fname, td); 4314 error = namei(&nd); 4315 if (error != 0) 4316 return (error); 4317 NDFREE(&nd, NDF_ONLY_PNBUF); 4318 vp = nd.ni_vp; 4319 bzero(&fh, sizeof(fh)); 4320 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4321 error = VOP_VPTOFH(vp, &fh.fh_fid); 4322 vput(vp); 4323 if (error == 0) 4324 error = copyout(&fh, uap->fhp, sizeof (fh)); 4325 return (error); 4326 } 4327 4328 /* 4329 * syscall for the rpc.lockd to use to translate a NFS file handle into an 4330 * open descriptor. 4331 * 4332 * warning: do not remove the priv_check() call or this becomes one giant 4333 * security hole. 4334 */ 4335 #ifndef _SYS_SYSPROTO_H_ 4336 struct fhopen_args { 4337 const struct fhandle *u_fhp; 4338 int flags; 4339 }; 4340 #endif 4341 int 4342 sys_fhopen(td, uap) 4343 struct thread *td; 4344 struct fhopen_args /* { 4345 const struct fhandle *u_fhp; 4346 int flags; 4347 } */ *uap; 4348 { 4349 struct mount *mp; 4350 struct vnode *vp; 4351 struct fhandle fhp; 4352 struct file *fp; 4353 int fmode, error; 4354 int indx; 4355 4356 error = priv_check(td, PRIV_VFS_FHOPEN); 4357 if (error != 0) 4358 return (error); 4359 indx = -1; 4360 fmode = FFLAGS(uap->flags); 4361 /* why not allow a non-read/write open for our lockd? */ 4362 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 4363 return (EINVAL); 4364 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 4365 if (error != 0) 4366 return(error); 4367 /* find the mount point */ 4368 mp = vfs_busyfs(&fhp.fh_fsid); 4369 if (mp == NULL) 4370 return (ESTALE); 4371 /* now give me my vnode, it gets returned to me locked */ 4372 error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp); 4373 vfs_unbusy(mp); 4374 if (error != 0) 4375 return (error); 4376 4377 error = falloc_noinstall(td, &fp); 4378 if (error != 0) { 4379 vput(vp); 4380 return (error); 4381 } 4382 /* 4383 * An extra reference on `fp' has been held for us by 4384 * falloc_noinstall(). 4385 */ 4386 4387 #ifdef INVARIANTS 4388 td->td_dupfd = -1; 4389 #endif 4390 error = vn_open_vnode(vp, fmode, td->td_ucred, td, fp); 4391 if (error != 0) { 4392 KASSERT(fp->f_ops == &badfileops, 4393 ("VOP_OPEN in fhopen() set f_ops")); 4394 KASSERT(td->td_dupfd < 0, 4395 ("fhopen() encountered fdopen()")); 4396 4397 vput(vp); 4398 goto bad; 4399 } 4400 #ifdef INVARIANTS 4401 td->td_dupfd = 0; 4402 #endif 4403 fp->f_vnode = vp; 4404 fp->f_seqcount = 1; 4405 finit(fp, (fmode & FMASK) | (fp->f_flag & FHASLOCK), DTYPE_VNODE, vp, 4406 &vnops); 4407 VOP_UNLOCK(vp, 0); 4408 if ((fmode & O_TRUNC) != 0) { 4409 error = fo_truncate(fp, 0, td->td_ucred, td); 4410 if (error != 0) 4411 goto bad; 4412 } 4413 4414 error = finstall(td, fp, &indx, fmode, NULL); 4415 bad: 4416 fdrop(fp, td); 4417 td->td_retval[0] = indx; 4418 return (error); 4419 } 4420 4421 /* 4422 * Stat an (NFS) file handle. 4423 */ 4424 #ifndef _SYS_SYSPROTO_H_ 4425 struct fhstat_args { 4426 struct fhandle *u_fhp; 4427 struct stat *sb; 4428 }; 4429 #endif 4430 int 4431 sys_fhstat(td, uap) 4432 struct thread *td; 4433 register struct fhstat_args /* { 4434 struct fhandle *u_fhp; 4435 struct stat *sb; 4436 } */ *uap; 4437 { 4438 struct stat sb; 4439 struct fhandle fh; 4440 int error; 4441 4442 error = copyin(uap->u_fhp, &fh, sizeof(fh)); 4443 if (error != 0) 4444 return (error); 4445 error = kern_fhstat(td, fh, &sb); 4446 if (error == 0) 4447 error = copyout(&sb, uap->sb, sizeof(sb)); 4448 return (error); 4449 } 4450 4451 int 4452 kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb) 4453 { 4454 struct mount *mp; 4455 struct vnode *vp; 4456 int error; 4457 4458 error = priv_check(td, PRIV_VFS_FHSTAT); 4459 if (error != 0) 4460 return (error); 4461 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4462 return (ESTALE); 4463 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4464 vfs_unbusy(mp); 4465 if (error != 0) 4466 return (error); 4467 error = vn_stat(vp, sb, td->td_ucred, NOCRED, td); 4468 vput(vp); 4469 return (error); 4470 } 4471 4472 /* 4473 * Implement fstatfs() for (NFS) file handles. 4474 */ 4475 #ifndef _SYS_SYSPROTO_H_ 4476 struct fhstatfs_args { 4477 struct fhandle *u_fhp; 4478 struct statfs *buf; 4479 }; 4480 #endif 4481 int 4482 sys_fhstatfs(td, uap) 4483 struct thread *td; 4484 struct fhstatfs_args /* { 4485 struct fhandle *u_fhp; 4486 struct statfs *buf; 4487 } */ *uap; 4488 { 4489 struct statfs sf; 4490 fhandle_t fh; 4491 int error; 4492 4493 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4494 if (error != 0) 4495 return (error); 4496 error = kern_fhstatfs(td, fh, &sf); 4497 if (error != 0) 4498 return (error); 4499 return (copyout(&sf, uap->buf, sizeof(sf))); 4500 } 4501 4502 int 4503 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf) 4504 { 4505 struct statfs *sp; 4506 struct mount *mp; 4507 struct vnode *vp; 4508 int error; 4509 4510 error = priv_check(td, PRIV_VFS_FHSTATFS); 4511 if (error != 0) 4512 return (error); 4513 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4514 return (ESTALE); 4515 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4516 if (error != 0) { 4517 vfs_unbusy(mp); 4518 return (error); 4519 } 4520 vput(vp); 4521 error = prison_canseemount(td->td_ucred, mp); 4522 if (error != 0) 4523 goto out; 4524 #ifdef MAC 4525 error = mac_mount_check_stat(td->td_ucred, mp); 4526 if (error != 0) 4527 goto out; 4528 #endif 4529 /* 4530 * Set these in case the underlying filesystem fails to do so. 4531 */ 4532 sp = &mp->mnt_stat; 4533 sp->f_version = STATFS_VERSION; 4534 sp->f_namemax = NAME_MAX; 4535 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 4536 error = VFS_STATFS(mp, sp); 4537 if (error == 0) 4538 *buf = *sp; 4539 out: 4540 vfs_unbusy(mp); 4541 return (error); 4542 } 4543 4544 int 4545 kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len) 4546 { 4547 struct file *fp; 4548 struct mount *mp; 4549 struct vnode *vp; 4550 cap_rights_t rights; 4551 off_t olen, ooffset; 4552 int error; 4553 4554 fp = NULL; 4555 error = fget(td, fd, cap_rights_init(&rights, CAP_WRITE), &fp); 4556 if (error != 0) 4557 goto out; 4558 4559 switch (fp->f_type) { 4560 case DTYPE_VNODE: 4561 break; 4562 case DTYPE_PIPE: 4563 case DTYPE_FIFO: 4564 error = ESPIPE; 4565 goto out; 4566 default: 4567 error = ENODEV; 4568 goto out; 4569 } 4570 if ((fp->f_flag & FWRITE) == 0) { 4571 error = EBADF; 4572 goto out; 4573 } 4574 vp = fp->f_vnode; 4575 if (vp->v_type != VREG) { 4576 error = ENODEV; 4577 goto out; 4578 } 4579 if (offset < 0 || len <= 0) { 4580 error = EINVAL; 4581 goto out; 4582 } 4583 /* Check for wrap. */ 4584 if (offset > OFF_MAX - len) { 4585 error = EFBIG; 4586 goto out; 4587 } 4588 4589 /* Allocating blocks may take a long time, so iterate. */ 4590 for (;;) { 4591 olen = len; 4592 ooffset = offset; 4593 4594 bwillwrite(); 4595 mp = NULL; 4596 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 4597 if (error != 0) 4598 break; 4599 error = vn_lock(vp, LK_EXCLUSIVE); 4600 if (error != 0) { 4601 vn_finished_write(mp); 4602 break; 4603 } 4604 #ifdef MAC 4605 error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp); 4606 if (error == 0) 4607 #endif 4608 error = VOP_ALLOCATE(vp, &offset, &len); 4609 VOP_UNLOCK(vp, 0); 4610 vn_finished_write(mp); 4611 4612 if (olen + ooffset != offset + len) { 4613 panic("offset + len changed from %jx/%jx to %jx/%jx", 4614 ooffset, olen, offset, len); 4615 } 4616 if (error != 0 || len == 0) 4617 break; 4618 KASSERT(olen > len, ("Iteration did not make progress?")); 4619 maybe_yield(); 4620 } 4621 out: 4622 if (fp != NULL) 4623 fdrop(fp, td); 4624 return (error); 4625 } 4626 4627 int 4628 sys_posix_fallocate(struct thread *td, struct posix_fallocate_args *uap) 4629 { 4630 4631 td->td_retval[0] = kern_posix_fallocate(td, uap->fd, uap->offset, 4632 uap->len); 4633 return (0); 4634 } 4635 4636 /* 4637 * Unlike madvise(2), we do not make a best effort to remember every 4638 * possible caching hint. Instead, we remember the last setting with 4639 * the exception that we will allow POSIX_FADV_NORMAL to adjust the 4640 * region of any current setting. 4641 */ 4642 int 4643 kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len, 4644 int advice) 4645 { 4646 struct fadvise_info *fa, *new; 4647 struct file *fp; 4648 struct vnode *vp; 4649 cap_rights_t rights; 4650 off_t end; 4651 int error; 4652 4653 if (offset < 0 || len < 0 || offset > OFF_MAX - len) 4654 return (EINVAL); 4655 switch (advice) { 4656 case POSIX_FADV_SEQUENTIAL: 4657 case POSIX_FADV_RANDOM: 4658 case POSIX_FADV_NOREUSE: 4659 new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK); 4660 break; 4661 case POSIX_FADV_NORMAL: 4662 case POSIX_FADV_WILLNEED: 4663 case POSIX_FADV_DONTNEED: 4664 new = NULL; 4665 break; 4666 default: 4667 return (EINVAL); 4668 } 4669 /* XXX: CAP_POSIX_FADVISE? */ 4670 error = fget(td, fd, cap_rights_init(&rights), &fp); 4671 if (error != 0) 4672 goto out; 4673 4674 switch (fp->f_type) { 4675 case DTYPE_VNODE: 4676 break; 4677 case DTYPE_PIPE: 4678 case DTYPE_FIFO: 4679 error = ESPIPE; 4680 goto out; 4681 default: 4682 error = ENODEV; 4683 goto out; 4684 } 4685 vp = fp->f_vnode; 4686 if (vp->v_type != VREG) { 4687 error = ENODEV; 4688 goto out; 4689 } 4690 if (len == 0) 4691 end = OFF_MAX; 4692 else 4693 end = offset + len - 1; 4694 switch (advice) { 4695 case POSIX_FADV_SEQUENTIAL: 4696 case POSIX_FADV_RANDOM: 4697 case POSIX_FADV_NOREUSE: 4698 /* 4699 * Try to merge any existing non-standard region with 4700 * this new region if possible, otherwise create a new 4701 * non-standard region for this request. 4702 */ 4703 mtx_pool_lock(mtxpool_sleep, fp); 4704 fa = fp->f_advice; 4705 if (fa != NULL && fa->fa_advice == advice && 4706 ((fa->fa_start <= end && fa->fa_end >= offset) || 4707 (end != OFF_MAX && fa->fa_start == end + 1) || 4708 (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) { 4709 if (offset < fa->fa_start) 4710 fa->fa_start = offset; 4711 if (end > fa->fa_end) 4712 fa->fa_end = end; 4713 } else { 4714 new->fa_advice = advice; 4715 new->fa_start = offset; 4716 new->fa_end = end; 4717 new->fa_prevstart = 0; 4718 new->fa_prevend = 0; 4719 fp->f_advice = new; 4720 new = fa; 4721 } 4722 mtx_pool_unlock(mtxpool_sleep, fp); 4723 break; 4724 case POSIX_FADV_NORMAL: 4725 /* 4726 * If a the "normal" region overlaps with an existing 4727 * non-standard region, trim or remove the 4728 * non-standard region. 4729 */ 4730 mtx_pool_lock(mtxpool_sleep, fp); 4731 fa = fp->f_advice; 4732 if (fa != NULL) { 4733 if (offset <= fa->fa_start && end >= fa->fa_end) { 4734 new = fa; 4735 fp->f_advice = NULL; 4736 } else if (offset <= fa->fa_start && 4737 end >= fa->fa_start) 4738 fa->fa_start = end + 1; 4739 else if (offset <= fa->fa_end && end >= fa->fa_end) 4740 fa->fa_end = offset - 1; 4741 else if (offset >= fa->fa_start && end <= fa->fa_end) { 4742 /* 4743 * If the "normal" region is a middle 4744 * portion of the existing 4745 * non-standard region, just remove 4746 * the whole thing rather than picking 4747 * one side or the other to 4748 * preserve. 4749 */ 4750 new = fa; 4751 fp->f_advice = NULL; 4752 } 4753 } 4754 mtx_pool_unlock(mtxpool_sleep, fp); 4755 break; 4756 case POSIX_FADV_WILLNEED: 4757 case POSIX_FADV_DONTNEED: 4758 error = VOP_ADVISE(vp, offset, end, advice); 4759 break; 4760 } 4761 out: 4762 if (fp != NULL) 4763 fdrop(fp, td); 4764 free(new, M_FADVISE); 4765 return (error); 4766 } 4767 4768 int 4769 sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap) 4770 { 4771 4772 td->td_retval[0] = kern_posix_fadvise(td, uap->fd, uap->offset, 4773 uap->len, uap->advice); 4774 return (0); 4775 } 4776