1 /*- 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include "opt_capsicum.h" 41 #include "opt_compat.h" 42 #include "opt_ktrace.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/bio.h> 47 #include <sys/buf.h> 48 #include <sys/capability.h> 49 #include <sys/disk.h> 50 #include <sys/sysent.h> 51 #include <sys/malloc.h> 52 #include <sys/mount.h> 53 #include <sys/mutex.h> 54 #include <sys/sysproto.h> 55 #include <sys/namei.h> 56 #include <sys/filedesc.h> 57 #include <sys/kernel.h> 58 #include <sys/fcntl.h> 59 #include <sys/file.h> 60 #include <sys/filio.h> 61 #include <sys/limits.h> 62 #include <sys/linker.h> 63 #include <sys/rwlock.h> 64 #include <sys/sdt.h> 65 #include <sys/stat.h> 66 #include <sys/sx.h> 67 #include <sys/unistd.h> 68 #include <sys/vnode.h> 69 #include <sys/priv.h> 70 #include <sys/proc.h> 71 #include <sys/dirent.h> 72 #include <sys/jail.h> 73 #include <sys/syscallsubr.h> 74 #include <sys/sysctl.h> 75 #ifdef KTRACE 76 #include <sys/ktrace.h> 77 #endif 78 79 #include <machine/stdarg.h> 80 81 #include <security/audit/audit.h> 82 #include <security/mac/mac_framework.h> 83 84 #include <vm/vm.h> 85 #include <vm/vm_object.h> 86 #include <vm/vm_page.h> 87 #include <vm/uma.h> 88 89 #include <ufs/ufs/quota.h> 90 91 MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information"); 92 93 SDT_PROVIDER_DEFINE(vfs); 94 SDT_PROBE_DEFINE2(vfs, , stat, mode, "char *", "int"); 95 SDT_PROBE_DEFINE2(vfs, , stat, reg, "char *", "int"); 96 97 static int chroot_refuse_vdir_fds(struct filedesc *fdp); 98 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *); 99 static int kern_chflags(struct thread *td, const char *path, 100 enum uio_seg pathseg, u_long flags); 101 static int kern_chflagsat(struct thread *td, int fd, const char *path, 102 enum uio_seg pathseg, u_long flags, int atflag); 103 static int setfflags(struct thread *td, struct vnode *, u_long); 104 static int setutimes(struct thread *td, struct vnode *, 105 const struct timespec *, int, int); 106 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 107 struct thread *td); 108 109 /* 110 * The module initialization routine for POSIX asynchronous I/O will 111 * set this to the version of AIO that it implements. (Zero means 112 * that it is not implemented.) This value is used here by pathconf() 113 * and in kern_descrip.c by fpathconf(). 114 */ 115 int async_io_version; 116 117 #ifdef DEBUG 118 static int syncprt = 0; 119 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, ""); 120 #endif 121 122 /* 123 * Sync each mounted filesystem. 124 */ 125 #ifndef _SYS_SYSPROTO_H_ 126 struct sync_args { 127 int dummy; 128 }; 129 #endif 130 /* ARGSUSED */ 131 int 132 sys_sync(td, uap) 133 struct thread *td; 134 struct sync_args *uap; 135 { 136 struct mount *mp, *nmp; 137 int save; 138 139 mtx_lock(&mountlist_mtx); 140 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 141 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 142 nmp = TAILQ_NEXT(mp, mnt_list); 143 continue; 144 } 145 if ((mp->mnt_flag & MNT_RDONLY) == 0 && 146 vn_start_write(NULL, &mp, V_NOWAIT) == 0) { 147 save = curthread_pflags_set(TDP_SYNCIO); 148 vfs_msync(mp, MNT_NOWAIT); 149 VFS_SYNC(mp, MNT_NOWAIT); 150 curthread_pflags_restore(save); 151 vn_finished_write(mp); 152 } 153 mtx_lock(&mountlist_mtx); 154 nmp = TAILQ_NEXT(mp, mnt_list); 155 vfs_unbusy(mp); 156 } 157 mtx_unlock(&mountlist_mtx); 158 return (0); 159 } 160 161 /* 162 * Change filesystem quotas. 163 */ 164 #ifndef _SYS_SYSPROTO_H_ 165 struct quotactl_args { 166 char *path; 167 int cmd; 168 int uid; 169 caddr_t arg; 170 }; 171 #endif 172 int 173 sys_quotactl(td, uap) 174 struct thread *td; 175 register struct quotactl_args /* { 176 char *path; 177 int cmd; 178 int uid; 179 caddr_t arg; 180 } */ *uap; 181 { 182 struct mount *mp; 183 struct nameidata nd; 184 int error; 185 186 AUDIT_ARG_CMD(uap->cmd); 187 AUDIT_ARG_UID(uap->uid); 188 if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS)) 189 return (EPERM); 190 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 191 uap->path, td); 192 if ((error = namei(&nd)) != 0) 193 return (error); 194 NDFREE(&nd, NDF_ONLY_PNBUF); 195 mp = nd.ni_vp->v_mount; 196 vfs_ref(mp); 197 vput(nd.ni_vp); 198 error = vfs_busy(mp, 0); 199 vfs_rel(mp); 200 if (error != 0) 201 return (error); 202 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg); 203 204 /* 205 * Since quota on operation typically needs to open quota 206 * file, the Q_QUOTAON handler needs to unbusy the mount point 207 * before calling into namei. Otherwise, unmount might be 208 * started between two vfs_busy() invocations (first is our, 209 * second is from mount point cross-walk code in lookup()), 210 * causing deadlock. 211 * 212 * Require that Q_QUOTAON handles the vfs_busy() reference on 213 * its own, always returning with ubusied mount point. 214 */ 215 if ((uap->cmd >> SUBCMDSHIFT) != Q_QUOTAON) 216 vfs_unbusy(mp); 217 return (error); 218 } 219 220 /* 221 * Used by statfs conversion routines to scale the block size up if 222 * necessary so that all of the block counts are <= 'max_size'. Note 223 * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero 224 * value of 'n'. 225 */ 226 void 227 statfs_scale_blocks(struct statfs *sf, long max_size) 228 { 229 uint64_t count; 230 int shift; 231 232 KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__)); 233 234 /* 235 * Attempt to scale the block counts to give a more accurate 236 * overview to userland of the ratio of free space to used 237 * space. To do this, find the largest block count and compute 238 * a divisor that lets it fit into a signed integer <= max_size. 239 */ 240 if (sf->f_bavail < 0) 241 count = -sf->f_bavail; 242 else 243 count = sf->f_bavail; 244 count = MAX(sf->f_blocks, MAX(sf->f_bfree, count)); 245 if (count <= max_size) 246 return; 247 248 count >>= flsl(max_size); 249 shift = 0; 250 while (count > 0) { 251 shift++; 252 count >>=1; 253 } 254 255 sf->f_bsize <<= shift; 256 sf->f_blocks >>= shift; 257 sf->f_bfree >>= shift; 258 sf->f_bavail >>= shift; 259 } 260 261 /* 262 * Get filesystem statistics. 263 */ 264 #ifndef _SYS_SYSPROTO_H_ 265 struct statfs_args { 266 char *path; 267 struct statfs *buf; 268 }; 269 #endif 270 int 271 sys_statfs(td, uap) 272 struct thread *td; 273 register struct statfs_args /* { 274 char *path; 275 struct statfs *buf; 276 } */ *uap; 277 { 278 struct statfs sf; 279 int error; 280 281 error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf); 282 if (error == 0) 283 error = copyout(&sf, uap->buf, sizeof(sf)); 284 return (error); 285 } 286 287 int 288 kern_statfs(struct thread *td, char *path, enum uio_seg pathseg, 289 struct statfs *buf) 290 { 291 struct mount *mp; 292 struct statfs *sp, sb; 293 struct nameidata nd; 294 int error; 295 296 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 297 pathseg, path, td); 298 error = namei(&nd); 299 if (error != 0) 300 return (error); 301 mp = nd.ni_vp->v_mount; 302 vfs_ref(mp); 303 NDFREE(&nd, NDF_ONLY_PNBUF); 304 vput(nd.ni_vp); 305 error = vfs_busy(mp, 0); 306 vfs_rel(mp); 307 if (error != 0) 308 return (error); 309 #ifdef MAC 310 error = mac_mount_check_stat(td->td_ucred, mp); 311 if (error != 0) 312 goto out; 313 #endif 314 /* 315 * Set these in case the underlying filesystem fails to do so. 316 */ 317 sp = &mp->mnt_stat; 318 sp->f_version = STATFS_VERSION; 319 sp->f_namemax = NAME_MAX; 320 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 321 error = VFS_STATFS(mp, sp); 322 if (error != 0) 323 goto out; 324 if (priv_check(td, PRIV_VFS_GENERATION)) { 325 bcopy(sp, &sb, sizeof(sb)); 326 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 327 prison_enforce_statfs(td->td_ucred, mp, &sb); 328 sp = &sb; 329 } 330 *buf = *sp; 331 out: 332 vfs_unbusy(mp); 333 return (error); 334 } 335 336 /* 337 * Get filesystem statistics. 338 */ 339 #ifndef _SYS_SYSPROTO_H_ 340 struct fstatfs_args { 341 int fd; 342 struct statfs *buf; 343 }; 344 #endif 345 int 346 sys_fstatfs(td, uap) 347 struct thread *td; 348 register struct fstatfs_args /* { 349 int fd; 350 struct statfs *buf; 351 } */ *uap; 352 { 353 struct statfs sf; 354 int error; 355 356 error = kern_fstatfs(td, uap->fd, &sf); 357 if (error == 0) 358 error = copyout(&sf, uap->buf, sizeof(sf)); 359 return (error); 360 } 361 362 int 363 kern_fstatfs(struct thread *td, int fd, struct statfs *buf) 364 { 365 struct file *fp; 366 struct mount *mp; 367 struct statfs *sp, sb; 368 struct vnode *vp; 369 cap_rights_t rights; 370 int error; 371 372 AUDIT_ARG_FD(fd); 373 error = getvnode(td->td_proc->p_fd, fd, 374 cap_rights_init(&rights, CAP_FSTATFS), &fp); 375 if (error != 0) 376 return (error); 377 vp = fp->f_vnode; 378 vn_lock(vp, LK_SHARED | LK_RETRY); 379 #ifdef AUDIT 380 AUDIT_ARG_VNODE1(vp); 381 #endif 382 mp = vp->v_mount; 383 if (mp) 384 vfs_ref(mp); 385 VOP_UNLOCK(vp, 0); 386 fdrop(fp, td); 387 if (mp == NULL) { 388 error = EBADF; 389 goto out; 390 } 391 error = vfs_busy(mp, 0); 392 vfs_rel(mp); 393 if (error != 0) 394 return (error); 395 #ifdef MAC 396 error = mac_mount_check_stat(td->td_ucred, mp); 397 if (error != 0) 398 goto out; 399 #endif 400 /* 401 * Set these in case the underlying filesystem fails to do so. 402 */ 403 sp = &mp->mnt_stat; 404 sp->f_version = STATFS_VERSION; 405 sp->f_namemax = NAME_MAX; 406 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 407 error = VFS_STATFS(mp, sp); 408 if (error != 0) 409 goto out; 410 if (priv_check(td, PRIV_VFS_GENERATION)) { 411 bcopy(sp, &sb, sizeof(sb)); 412 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 413 prison_enforce_statfs(td->td_ucred, mp, &sb); 414 sp = &sb; 415 } 416 *buf = *sp; 417 out: 418 if (mp) 419 vfs_unbusy(mp); 420 return (error); 421 } 422 423 /* 424 * Get statistics on all filesystems. 425 */ 426 #ifndef _SYS_SYSPROTO_H_ 427 struct getfsstat_args { 428 struct statfs *buf; 429 long bufsize; 430 int flags; 431 }; 432 #endif 433 int 434 sys_getfsstat(td, uap) 435 struct thread *td; 436 register struct getfsstat_args /* { 437 struct statfs *buf; 438 long bufsize; 439 int flags; 440 } */ *uap; 441 { 442 443 return (kern_getfsstat(td, &uap->buf, uap->bufsize, UIO_USERSPACE, 444 uap->flags)); 445 } 446 447 /* 448 * If (bufsize > 0 && bufseg == UIO_SYSSPACE) 449 * The caller is responsible for freeing memory which will be allocated 450 * in '*buf'. 451 */ 452 int 453 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize, 454 enum uio_seg bufseg, int flags) 455 { 456 struct mount *mp, *nmp; 457 struct statfs *sfsp, *sp, sb; 458 size_t count, maxcount; 459 int error; 460 461 maxcount = bufsize / sizeof(struct statfs); 462 if (bufsize == 0) 463 sfsp = NULL; 464 else if (bufseg == UIO_USERSPACE) 465 sfsp = *buf; 466 else /* if (bufseg == UIO_SYSSPACE) */ { 467 count = 0; 468 mtx_lock(&mountlist_mtx); 469 TAILQ_FOREACH(mp, &mountlist, mnt_list) { 470 count++; 471 } 472 mtx_unlock(&mountlist_mtx); 473 if (maxcount > count) 474 maxcount = count; 475 sfsp = *buf = malloc(maxcount * sizeof(struct statfs), M_TEMP, 476 M_WAITOK); 477 } 478 count = 0; 479 mtx_lock(&mountlist_mtx); 480 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 481 if (prison_canseemount(td->td_ucred, mp) != 0) { 482 nmp = TAILQ_NEXT(mp, mnt_list); 483 continue; 484 } 485 #ifdef MAC 486 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 487 nmp = TAILQ_NEXT(mp, mnt_list); 488 continue; 489 } 490 #endif 491 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 492 nmp = TAILQ_NEXT(mp, mnt_list); 493 continue; 494 } 495 if (sfsp && count < maxcount) { 496 sp = &mp->mnt_stat; 497 /* 498 * Set these in case the underlying filesystem 499 * fails to do so. 500 */ 501 sp->f_version = STATFS_VERSION; 502 sp->f_namemax = NAME_MAX; 503 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 504 /* 505 * If MNT_NOWAIT or MNT_LAZY is specified, do not 506 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 507 * overrides MNT_WAIT. 508 */ 509 if (((flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 510 (flags & MNT_WAIT)) && 511 (error = VFS_STATFS(mp, sp))) { 512 mtx_lock(&mountlist_mtx); 513 nmp = TAILQ_NEXT(mp, mnt_list); 514 vfs_unbusy(mp); 515 continue; 516 } 517 if (priv_check(td, PRIV_VFS_GENERATION)) { 518 bcopy(sp, &sb, sizeof(sb)); 519 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 520 prison_enforce_statfs(td->td_ucred, mp, &sb); 521 sp = &sb; 522 } 523 if (bufseg == UIO_SYSSPACE) 524 bcopy(sp, sfsp, sizeof(*sp)); 525 else /* if (bufseg == UIO_USERSPACE) */ { 526 error = copyout(sp, sfsp, sizeof(*sp)); 527 if (error != 0) { 528 vfs_unbusy(mp); 529 return (error); 530 } 531 } 532 sfsp++; 533 } 534 count++; 535 mtx_lock(&mountlist_mtx); 536 nmp = TAILQ_NEXT(mp, mnt_list); 537 vfs_unbusy(mp); 538 } 539 mtx_unlock(&mountlist_mtx); 540 if (sfsp && count > maxcount) 541 td->td_retval[0] = maxcount; 542 else 543 td->td_retval[0] = count; 544 return (0); 545 } 546 547 #ifdef COMPAT_FREEBSD4 548 /* 549 * Get old format filesystem statistics. 550 */ 551 static void cvtstatfs(struct statfs *, struct ostatfs *); 552 553 #ifndef _SYS_SYSPROTO_H_ 554 struct freebsd4_statfs_args { 555 char *path; 556 struct ostatfs *buf; 557 }; 558 #endif 559 int 560 freebsd4_statfs(td, uap) 561 struct thread *td; 562 struct freebsd4_statfs_args /* { 563 char *path; 564 struct ostatfs *buf; 565 } */ *uap; 566 { 567 struct ostatfs osb; 568 struct statfs sf; 569 int error; 570 571 error = kern_statfs(td, uap->path, UIO_USERSPACE, &sf); 572 if (error != 0) 573 return (error); 574 cvtstatfs(&sf, &osb); 575 return (copyout(&osb, uap->buf, sizeof(osb))); 576 } 577 578 /* 579 * Get filesystem statistics. 580 */ 581 #ifndef _SYS_SYSPROTO_H_ 582 struct freebsd4_fstatfs_args { 583 int fd; 584 struct ostatfs *buf; 585 }; 586 #endif 587 int 588 freebsd4_fstatfs(td, uap) 589 struct thread *td; 590 struct freebsd4_fstatfs_args /* { 591 int fd; 592 struct ostatfs *buf; 593 } */ *uap; 594 { 595 struct ostatfs osb; 596 struct statfs sf; 597 int error; 598 599 error = kern_fstatfs(td, uap->fd, &sf); 600 if (error != 0) 601 return (error); 602 cvtstatfs(&sf, &osb); 603 return (copyout(&osb, uap->buf, sizeof(osb))); 604 } 605 606 /* 607 * Get statistics on all filesystems. 608 */ 609 #ifndef _SYS_SYSPROTO_H_ 610 struct freebsd4_getfsstat_args { 611 struct ostatfs *buf; 612 long bufsize; 613 int flags; 614 }; 615 #endif 616 int 617 freebsd4_getfsstat(td, uap) 618 struct thread *td; 619 register struct freebsd4_getfsstat_args /* { 620 struct ostatfs *buf; 621 long bufsize; 622 int flags; 623 } */ *uap; 624 { 625 struct statfs *buf, *sp; 626 struct ostatfs osb; 627 size_t count, size; 628 int error; 629 630 count = uap->bufsize / sizeof(struct ostatfs); 631 size = count * sizeof(struct statfs); 632 error = kern_getfsstat(td, &buf, size, UIO_SYSSPACE, uap->flags); 633 if (size > 0) { 634 count = td->td_retval[0]; 635 sp = buf; 636 while (count > 0 && error == 0) { 637 cvtstatfs(sp, &osb); 638 error = copyout(&osb, uap->buf, sizeof(osb)); 639 sp++; 640 uap->buf++; 641 count--; 642 } 643 free(buf, M_TEMP); 644 } 645 return (error); 646 } 647 648 /* 649 * Implement fstatfs() for (NFS) file handles. 650 */ 651 #ifndef _SYS_SYSPROTO_H_ 652 struct freebsd4_fhstatfs_args { 653 struct fhandle *u_fhp; 654 struct ostatfs *buf; 655 }; 656 #endif 657 int 658 freebsd4_fhstatfs(td, uap) 659 struct thread *td; 660 struct freebsd4_fhstatfs_args /* { 661 struct fhandle *u_fhp; 662 struct ostatfs *buf; 663 } */ *uap; 664 { 665 struct ostatfs osb; 666 struct statfs sf; 667 fhandle_t fh; 668 int error; 669 670 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 671 if (error != 0) 672 return (error); 673 error = kern_fhstatfs(td, fh, &sf); 674 if (error != 0) 675 return (error); 676 cvtstatfs(&sf, &osb); 677 return (copyout(&osb, uap->buf, sizeof(osb))); 678 } 679 680 /* 681 * Convert a new format statfs structure to an old format statfs structure. 682 */ 683 static void 684 cvtstatfs(nsp, osp) 685 struct statfs *nsp; 686 struct ostatfs *osp; 687 { 688 689 statfs_scale_blocks(nsp, LONG_MAX); 690 bzero(osp, sizeof(*osp)); 691 osp->f_bsize = nsp->f_bsize; 692 osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX); 693 osp->f_blocks = nsp->f_blocks; 694 osp->f_bfree = nsp->f_bfree; 695 osp->f_bavail = nsp->f_bavail; 696 osp->f_files = MIN(nsp->f_files, LONG_MAX); 697 osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX); 698 osp->f_owner = nsp->f_owner; 699 osp->f_type = nsp->f_type; 700 osp->f_flags = nsp->f_flags; 701 osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX); 702 osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX); 703 osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX); 704 osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX); 705 strlcpy(osp->f_fstypename, nsp->f_fstypename, 706 MIN(MFSNAMELEN, OMFSNAMELEN)); 707 strlcpy(osp->f_mntonname, nsp->f_mntonname, 708 MIN(MNAMELEN, OMNAMELEN)); 709 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 710 MIN(MNAMELEN, OMNAMELEN)); 711 osp->f_fsid = nsp->f_fsid; 712 } 713 #endif /* COMPAT_FREEBSD4 */ 714 715 /* 716 * Change current working directory to a given file descriptor. 717 */ 718 #ifndef _SYS_SYSPROTO_H_ 719 struct fchdir_args { 720 int fd; 721 }; 722 #endif 723 int 724 sys_fchdir(td, uap) 725 struct thread *td; 726 struct fchdir_args /* { 727 int fd; 728 } */ *uap; 729 { 730 register struct filedesc *fdp = td->td_proc->p_fd; 731 struct vnode *vp, *tdp, *vpold; 732 struct mount *mp; 733 struct file *fp; 734 cap_rights_t rights; 735 int error; 736 737 AUDIT_ARG_FD(uap->fd); 738 error = getvnode(fdp, uap->fd, cap_rights_init(&rights, CAP_FCHDIR), 739 &fp); 740 if (error != 0) 741 return (error); 742 vp = fp->f_vnode; 743 VREF(vp); 744 fdrop(fp, td); 745 vn_lock(vp, LK_SHARED | LK_RETRY); 746 AUDIT_ARG_VNODE1(vp); 747 error = change_dir(vp, td); 748 while (!error && (mp = vp->v_mountedhere) != NULL) { 749 if (vfs_busy(mp, 0)) 750 continue; 751 error = VFS_ROOT(mp, LK_SHARED, &tdp); 752 vfs_unbusy(mp); 753 if (error != 0) 754 break; 755 vput(vp); 756 vp = tdp; 757 } 758 if (error != 0) { 759 vput(vp); 760 return (error); 761 } 762 VOP_UNLOCK(vp, 0); 763 FILEDESC_XLOCK(fdp); 764 vpold = fdp->fd_cdir; 765 fdp->fd_cdir = vp; 766 FILEDESC_XUNLOCK(fdp); 767 vrele(vpold); 768 return (0); 769 } 770 771 /* 772 * Change current working directory (``.''). 773 */ 774 #ifndef _SYS_SYSPROTO_H_ 775 struct chdir_args { 776 char *path; 777 }; 778 #endif 779 int 780 sys_chdir(td, uap) 781 struct thread *td; 782 struct chdir_args /* { 783 char *path; 784 } */ *uap; 785 { 786 787 return (kern_chdir(td, uap->path, UIO_USERSPACE)); 788 } 789 790 int 791 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg) 792 { 793 register struct filedesc *fdp = td->td_proc->p_fd; 794 struct nameidata nd; 795 struct vnode *vp; 796 int error; 797 798 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 799 pathseg, path, td); 800 if ((error = namei(&nd)) != 0) 801 return (error); 802 if ((error = change_dir(nd.ni_vp, td)) != 0) { 803 vput(nd.ni_vp); 804 NDFREE(&nd, NDF_ONLY_PNBUF); 805 return (error); 806 } 807 VOP_UNLOCK(nd.ni_vp, 0); 808 NDFREE(&nd, NDF_ONLY_PNBUF); 809 FILEDESC_XLOCK(fdp); 810 vp = fdp->fd_cdir; 811 fdp->fd_cdir = nd.ni_vp; 812 FILEDESC_XUNLOCK(fdp); 813 vrele(vp); 814 return (0); 815 } 816 817 /* 818 * Helper function for raised chroot(2) security function: Refuse if 819 * any filedescriptors are open directories. 820 */ 821 static int 822 chroot_refuse_vdir_fds(fdp) 823 struct filedesc *fdp; 824 { 825 struct vnode *vp; 826 struct file *fp; 827 int fd; 828 829 FILEDESC_LOCK_ASSERT(fdp); 830 831 for (fd = 0; fd < fdp->fd_nfiles ; fd++) { 832 fp = fget_locked(fdp, fd); 833 if (fp == NULL) 834 continue; 835 if (fp->f_type == DTYPE_VNODE) { 836 vp = fp->f_vnode; 837 if (vp->v_type == VDIR) 838 return (EPERM); 839 } 840 } 841 return (0); 842 } 843 844 /* 845 * This sysctl determines if we will allow a process to chroot(2) if it 846 * has a directory open: 847 * 0: disallowed for all processes. 848 * 1: allowed for processes that were not already chroot(2)'ed. 849 * 2: allowed for all processes. 850 */ 851 852 static int chroot_allow_open_directories = 1; 853 854 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW, 855 &chroot_allow_open_directories, 0, 856 "Allow a process to chroot(2) if it has a directory open"); 857 858 /* 859 * Change notion of root (``/'') directory. 860 */ 861 #ifndef _SYS_SYSPROTO_H_ 862 struct chroot_args { 863 char *path; 864 }; 865 #endif 866 int 867 sys_chroot(td, uap) 868 struct thread *td; 869 struct chroot_args /* { 870 char *path; 871 } */ *uap; 872 { 873 struct nameidata nd; 874 int error; 875 876 error = priv_check(td, PRIV_VFS_CHROOT); 877 if (error != 0) 878 return (error); 879 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 880 UIO_USERSPACE, uap->path, td); 881 error = namei(&nd); 882 if (error != 0) 883 goto error; 884 error = change_dir(nd.ni_vp, td); 885 if (error != 0) 886 goto e_vunlock; 887 #ifdef MAC 888 error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp); 889 if (error != 0) 890 goto e_vunlock; 891 #endif 892 VOP_UNLOCK(nd.ni_vp, 0); 893 error = change_root(nd.ni_vp, td); 894 vrele(nd.ni_vp); 895 NDFREE(&nd, NDF_ONLY_PNBUF); 896 return (error); 897 e_vunlock: 898 vput(nd.ni_vp); 899 error: 900 NDFREE(&nd, NDF_ONLY_PNBUF); 901 return (error); 902 } 903 904 /* 905 * Common routine for chroot and chdir. Callers must provide a locked vnode 906 * instance. 907 */ 908 int 909 change_dir(vp, td) 910 struct vnode *vp; 911 struct thread *td; 912 { 913 #ifdef MAC 914 int error; 915 #endif 916 917 ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked"); 918 if (vp->v_type != VDIR) 919 return (ENOTDIR); 920 #ifdef MAC 921 error = mac_vnode_check_chdir(td->td_ucred, vp); 922 if (error != 0) 923 return (error); 924 #endif 925 return (VOP_ACCESS(vp, VEXEC, td->td_ucred, td)); 926 } 927 928 /* 929 * Common routine for kern_chroot() and jail_attach(). The caller is 930 * responsible for invoking priv_check() and mac_vnode_check_chroot() to 931 * authorize this operation. 932 */ 933 int 934 change_root(vp, td) 935 struct vnode *vp; 936 struct thread *td; 937 { 938 struct filedesc *fdp; 939 struct vnode *oldvp; 940 int error; 941 942 fdp = td->td_proc->p_fd; 943 FILEDESC_XLOCK(fdp); 944 if (chroot_allow_open_directories == 0 || 945 (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) { 946 error = chroot_refuse_vdir_fds(fdp); 947 if (error != 0) { 948 FILEDESC_XUNLOCK(fdp); 949 return (error); 950 } 951 } 952 oldvp = fdp->fd_rdir; 953 fdp->fd_rdir = vp; 954 VREF(fdp->fd_rdir); 955 if (!fdp->fd_jdir) { 956 fdp->fd_jdir = vp; 957 VREF(fdp->fd_jdir); 958 } 959 FILEDESC_XUNLOCK(fdp); 960 vrele(oldvp); 961 return (0); 962 } 963 964 static __inline void 965 flags_to_rights(int flags, cap_rights_t *rightsp) 966 { 967 968 if (flags & O_EXEC) { 969 cap_rights_set(rightsp, CAP_FEXECVE); 970 } else { 971 switch ((flags & O_ACCMODE)) { 972 case O_RDONLY: 973 cap_rights_set(rightsp, CAP_READ); 974 break; 975 case O_RDWR: 976 cap_rights_set(rightsp, CAP_READ); 977 /* FALLTHROUGH */ 978 case O_WRONLY: 979 cap_rights_set(rightsp, CAP_WRITE); 980 if (!(flags & (O_APPEND | O_TRUNC))) 981 cap_rights_set(rightsp, CAP_SEEK); 982 break; 983 } 984 } 985 986 if (flags & O_CREAT) 987 cap_rights_set(rightsp, CAP_CREATE); 988 989 if (flags & O_TRUNC) 990 cap_rights_set(rightsp, CAP_FTRUNCATE); 991 992 if (flags & (O_SYNC | O_FSYNC)) 993 cap_rights_set(rightsp, CAP_FSYNC); 994 995 if (flags & (O_EXLOCK | O_SHLOCK)) 996 cap_rights_set(rightsp, CAP_FLOCK); 997 } 998 999 /* 1000 * Check permissions, allocate an open file structure, and call the device 1001 * open routine if any. 1002 */ 1003 #ifndef _SYS_SYSPROTO_H_ 1004 struct open_args { 1005 char *path; 1006 int flags; 1007 int mode; 1008 }; 1009 #endif 1010 int 1011 sys_open(td, uap) 1012 struct thread *td; 1013 register struct open_args /* { 1014 char *path; 1015 int flags; 1016 int mode; 1017 } */ *uap; 1018 { 1019 1020 return (kern_open(td, uap->path, UIO_USERSPACE, uap->flags, uap->mode)); 1021 } 1022 1023 #ifndef _SYS_SYSPROTO_H_ 1024 struct openat_args { 1025 int fd; 1026 char *path; 1027 int flag; 1028 int mode; 1029 }; 1030 #endif 1031 int 1032 sys_openat(struct thread *td, struct openat_args *uap) 1033 { 1034 1035 return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 1036 uap->mode)); 1037 } 1038 1039 int 1040 kern_open(struct thread *td, char *path, enum uio_seg pathseg, int flags, 1041 int mode) 1042 { 1043 1044 return (kern_openat(td, AT_FDCWD, path, pathseg, flags, mode)); 1045 } 1046 1047 int 1048 kern_openat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1049 int flags, int mode) 1050 { 1051 struct proc *p = td->td_proc; 1052 struct filedesc *fdp = p->p_fd; 1053 struct file *fp; 1054 struct vnode *vp; 1055 struct nameidata nd; 1056 cap_rights_t rights; 1057 int cmode, error, indx; 1058 1059 indx = -1; 1060 1061 AUDIT_ARG_FFLAGS(flags); 1062 AUDIT_ARG_MODE(mode); 1063 /* XXX: audit dirfd */ 1064 cap_rights_init(&rights, CAP_LOOKUP); 1065 flags_to_rights(flags, &rights); 1066 /* 1067 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags 1068 * may be specified. 1069 */ 1070 if (flags & O_EXEC) { 1071 if (flags & O_ACCMODE) 1072 return (EINVAL); 1073 } else if ((flags & O_ACCMODE) == O_ACCMODE) { 1074 return (EINVAL); 1075 } else { 1076 flags = FFLAGS(flags); 1077 } 1078 1079 /* 1080 * Allocate the file descriptor, but don't install a descriptor yet. 1081 */ 1082 error = falloc_noinstall(td, &fp); 1083 if (error != 0) 1084 return (error); 1085 /* 1086 * An extra reference on `fp' has been held for us by 1087 * falloc_noinstall(). 1088 */ 1089 /* Set the flags early so the finit in devfs can pick them up. */ 1090 fp->f_flag = flags & FMASK; 1091 cmode = ((mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; 1092 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 1093 &rights, td); 1094 td->td_dupfd = -1; /* XXX check for fdopen */ 1095 error = vn_open(&nd, &flags, cmode, fp); 1096 if (error != 0) { 1097 /* 1098 * If the vn_open replaced the method vector, something 1099 * wonderous happened deep below and we just pass it up 1100 * pretending we know what we do. 1101 */ 1102 if (error == ENXIO && fp->f_ops != &badfileops) 1103 goto success; 1104 1105 /* 1106 * Handle special fdopen() case. bleh. 1107 * 1108 * Don't do this for relative (capability) lookups; we don't 1109 * understand exactly what would happen, and we don't think 1110 * that it ever should. 1111 */ 1112 if (nd.ni_strictrelative == 0 && 1113 (error == ENODEV || error == ENXIO) && 1114 td->td_dupfd >= 0) { 1115 error = dupfdopen(td, fdp, td->td_dupfd, flags, error, 1116 &indx); 1117 if (error == 0) 1118 goto success; 1119 } 1120 1121 goto bad; 1122 } 1123 td->td_dupfd = 0; 1124 NDFREE(&nd, NDF_ONLY_PNBUF); 1125 vp = nd.ni_vp; 1126 1127 /* 1128 * Store the vnode, for any f_type. Typically, the vnode use 1129 * count is decremented by direct call to vn_closefile() for 1130 * files that switched type in the cdevsw fdopen() method. 1131 */ 1132 fp->f_vnode = vp; 1133 /* 1134 * If the file wasn't claimed by devfs bind it to the normal 1135 * vnode operations here. 1136 */ 1137 if (fp->f_ops == &badfileops) { 1138 KASSERT(vp->v_type != VFIFO, ("Unexpected fifo.")); 1139 fp->f_seqcount = 1; 1140 finit(fp, (flags & FMASK) | (fp->f_flag & FHASLOCK), 1141 DTYPE_VNODE, vp, &vnops); 1142 } 1143 1144 VOP_UNLOCK(vp, 0); 1145 if (flags & O_TRUNC) { 1146 error = fo_truncate(fp, 0, td->td_ucred, td); 1147 if (error != 0) 1148 goto bad; 1149 } 1150 success: 1151 /* 1152 * If we haven't already installed the FD (for dupfdopen), do so now. 1153 */ 1154 if (indx == -1) { 1155 struct filecaps *fcaps; 1156 1157 #ifdef CAPABILITIES 1158 if (nd.ni_strictrelative == 1) 1159 fcaps = &nd.ni_filecaps; 1160 else 1161 #endif 1162 fcaps = NULL; 1163 error = finstall(td, fp, &indx, flags, fcaps); 1164 /* On success finstall() consumes fcaps. */ 1165 if (error != 0) { 1166 filecaps_free(&nd.ni_filecaps); 1167 goto bad; 1168 } 1169 } else { 1170 filecaps_free(&nd.ni_filecaps); 1171 } 1172 1173 /* 1174 * Release our private reference, leaving the one associated with 1175 * the descriptor table intact. 1176 */ 1177 fdrop(fp, td); 1178 td->td_retval[0] = indx; 1179 return (0); 1180 bad: 1181 KASSERT(indx == -1, ("indx=%d, should be -1", indx)); 1182 fdrop(fp, td); 1183 return (error); 1184 } 1185 1186 #ifdef COMPAT_43 1187 /* 1188 * Create a file. 1189 */ 1190 #ifndef _SYS_SYSPROTO_H_ 1191 struct ocreat_args { 1192 char *path; 1193 int mode; 1194 }; 1195 #endif 1196 int 1197 ocreat(td, uap) 1198 struct thread *td; 1199 register struct ocreat_args /* { 1200 char *path; 1201 int mode; 1202 } */ *uap; 1203 { 1204 1205 return (kern_open(td, uap->path, UIO_USERSPACE, 1206 O_WRONLY | O_CREAT | O_TRUNC, uap->mode)); 1207 } 1208 #endif /* COMPAT_43 */ 1209 1210 /* 1211 * Create a special file. 1212 */ 1213 #ifndef _SYS_SYSPROTO_H_ 1214 struct mknod_args { 1215 char *path; 1216 int mode; 1217 int dev; 1218 }; 1219 #endif 1220 int 1221 sys_mknod(td, uap) 1222 struct thread *td; 1223 register struct mknod_args /* { 1224 char *path; 1225 int mode; 1226 int dev; 1227 } */ *uap; 1228 { 1229 1230 return (kern_mknod(td, uap->path, UIO_USERSPACE, uap->mode, uap->dev)); 1231 } 1232 1233 #ifndef _SYS_SYSPROTO_H_ 1234 struct mknodat_args { 1235 int fd; 1236 char *path; 1237 mode_t mode; 1238 dev_t dev; 1239 }; 1240 #endif 1241 int 1242 sys_mknodat(struct thread *td, struct mknodat_args *uap) 1243 { 1244 1245 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1246 uap->dev)); 1247 } 1248 1249 int 1250 kern_mknod(struct thread *td, char *path, enum uio_seg pathseg, int mode, 1251 int dev) 1252 { 1253 1254 return (kern_mknodat(td, AT_FDCWD, path, pathseg, mode, dev)); 1255 } 1256 1257 int 1258 kern_mknodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1259 int mode, int dev) 1260 { 1261 struct vnode *vp; 1262 struct mount *mp; 1263 struct vattr vattr; 1264 struct nameidata nd; 1265 cap_rights_t rights; 1266 int error, whiteout = 0; 1267 1268 AUDIT_ARG_MODE(mode); 1269 AUDIT_ARG_DEV(dev); 1270 switch (mode & S_IFMT) { 1271 case S_IFCHR: 1272 case S_IFBLK: 1273 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 1274 break; 1275 case S_IFMT: 1276 error = priv_check(td, PRIV_VFS_MKNOD_BAD); 1277 break; 1278 case S_IFWHT: 1279 error = priv_check(td, PRIV_VFS_MKNOD_WHT); 1280 break; 1281 case S_IFIFO: 1282 if (dev == 0) 1283 return (kern_mkfifoat(td, fd, path, pathseg, mode)); 1284 /* FALLTHROUGH */ 1285 default: 1286 error = EINVAL; 1287 break; 1288 } 1289 if (error != 0) 1290 return (error); 1291 restart: 1292 bwillwrite(); 1293 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1, 1294 pathseg, path, fd, cap_rights_init(&rights, CAP_MKNODAT), td); 1295 if ((error = namei(&nd)) != 0) 1296 return (error); 1297 vp = nd.ni_vp; 1298 if (vp != NULL) { 1299 NDFREE(&nd, NDF_ONLY_PNBUF); 1300 if (vp == nd.ni_dvp) 1301 vrele(nd.ni_dvp); 1302 else 1303 vput(nd.ni_dvp); 1304 vrele(vp); 1305 return (EEXIST); 1306 } else { 1307 VATTR_NULL(&vattr); 1308 vattr.va_mode = (mode & ALLPERMS) & 1309 ~td->td_proc->p_fd->fd_cmask; 1310 vattr.va_rdev = dev; 1311 whiteout = 0; 1312 1313 switch (mode & S_IFMT) { 1314 case S_IFMT: /* used by badsect to flag bad sectors */ 1315 vattr.va_type = VBAD; 1316 break; 1317 case S_IFCHR: 1318 vattr.va_type = VCHR; 1319 break; 1320 case S_IFBLK: 1321 vattr.va_type = VBLK; 1322 break; 1323 case S_IFWHT: 1324 whiteout = 1; 1325 break; 1326 default: 1327 panic("kern_mknod: invalid mode"); 1328 } 1329 } 1330 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1331 NDFREE(&nd, NDF_ONLY_PNBUF); 1332 vput(nd.ni_dvp); 1333 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1334 return (error); 1335 goto restart; 1336 } 1337 #ifdef MAC 1338 if (error == 0 && !whiteout) 1339 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, 1340 &nd.ni_cnd, &vattr); 1341 #endif 1342 if (error == 0) { 1343 if (whiteout) 1344 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1345 else { 1346 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1347 &nd.ni_cnd, &vattr); 1348 if (error == 0) 1349 vput(nd.ni_vp); 1350 } 1351 } 1352 NDFREE(&nd, NDF_ONLY_PNBUF); 1353 vput(nd.ni_dvp); 1354 vn_finished_write(mp); 1355 return (error); 1356 } 1357 1358 /* 1359 * Create a named pipe. 1360 */ 1361 #ifndef _SYS_SYSPROTO_H_ 1362 struct mkfifo_args { 1363 char *path; 1364 int mode; 1365 }; 1366 #endif 1367 int 1368 sys_mkfifo(td, uap) 1369 struct thread *td; 1370 register struct mkfifo_args /* { 1371 char *path; 1372 int mode; 1373 } */ *uap; 1374 { 1375 1376 return (kern_mkfifo(td, uap->path, UIO_USERSPACE, uap->mode)); 1377 } 1378 1379 #ifndef _SYS_SYSPROTO_H_ 1380 struct mkfifoat_args { 1381 int fd; 1382 char *path; 1383 mode_t mode; 1384 }; 1385 #endif 1386 int 1387 sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap) 1388 { 1389 1390 return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE, 1391 uap->mode)); 1392 } 1393 1394 int 1395 kern_mkfifo(struct thread *td, char *path, enum uio_seg pathseg, int mode) 1396 { 1397 1398 return (kern_mkfifoat(td, AT_FDCWD, path, pathseg, mode)); 1399 } 1400 1401 int 1402 kern_mkfifoat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1403 int mode) 1404 { 1405 struct mount *mp; 1406 struct vattr vattr; 1407 struct nameidata nd; 1408 cap_rights_t rights; 1409 int error; 1410 1411 AUDIT_ARG_MODE(mode); 1412 restart: 1413 bwillwrite(); 1414 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1, 1415 pathseg, path, fd, cap_rights_init(&rights, CAP_MKFIFOAT), td); 1416 if ((error = namei(&nd)) != 0) 1417 return (error); 1418 if (nd.ni_vp != NULL) { 1419 NDFREE(&nd, NDF_ONLY_PNBUF); 1420 if (nd.ni_vp == nd.ni_dvp) 1421 vrele(nd.ni_dvp); 1422 else 1423 vput(nd.ni_dvp); 1424 vrele(nd.ni_vp); 1425 return (EEXIST); 1426 } 1427 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1428 NDFREE(&nd, NDF_ONLY_PNBUF); 1429 vput(nd.ni_dvp); 1430 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1431 return (error); 1432 goto restart; 1433 } 1434 VATTR_NULL(&vattr); 1435 vattr.va_type = VFIFO; 1436 vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask; 1437 #ifdef MAC 1438 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1439 &vattr); 1440 if (error != 0) 1441 goto out; 1442 #endif 1443 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 1444 if (error == 0) 1445 vput(nd.ni_vp); 1446 #ifdef MAC 1447 out: 1448 #endif 1449 vput(nd.ni_dvp); 1450 vn_finished_write(mp); 1451 NDFREE(&nd, NDF_ONLY_PNBUF); 1452 return (error); 1453 } 1454 1455 /* 1456 * Make a hard file link. 1457 */ 1458 #ifndef _SYS_SYSPROTO_H_ 1459 struct link_args { 1460 char *path; 1461 char *link; 1462 }; 1463 #endif 1464 int 1465 sys_link(td, uap) 1466 struct thread *td; 1467 register struct link_args /* { 1468 char *path; 1469 char *link; 1470 } */ *uap; 1471 { 1472 1473 return (kern_link(td, uap->path, uap->link, UIO_USERSPACE)); 1474 } 1475 1476 #ifndef _SYS_SYSPROTO_H_ 1477 struct linkat_args { 1478 int fd1; 1479 char *path1; 1480 int fd2; 1481 char *path2; 1482 int flag; 1483 }; 1484 #endif 1485 int 1486 sys_linkat(struct thread *td, struct linkat_args *uap) 1487 { 1488 int flag; 1489 1490 flag = uap->flag; 1491 if (flag & ~AT_SYMLINK_FOLLOW) 1492 return (EINVAL); 1493 1494 return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2, 1495 UIO_USERSPACE, (flag & AT_SYMLINK_FOLLOW) ? FOLLOW : NOFOLLOW)); 1496 } 1497 1498 int hardlink_check_uid = 0; 1499 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 1500 &hardlink_check_uid, 0, 1501 "Unprivileged processes cannot create hard links to files owned by other " 1502 "users"); 1503 static int hardlink_check_gid = 0; 1504 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 1505 &hardlink_check_gid, 0, 1506 "Unprivileged processes cannot create hard links to files owned by other " 1507 "groups"); 1508 1509 static int 1510 can_hardlink(struct vnode *vp, struct ucred *cred) 1511 { 1512 struct vattr va; 1513 int error; 1514 1515 if (!hardlink_check_uid && !hardlink_check_gid) 1516 return (0); 1517 1518 error = VOP_GETATTR(vp, &va, cred); 1519 if (error != 0) 1520 return (error); 1521 1522 if (hardlink_check_uid && cred->cr_uid != va.va_uid) { 1523 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1524 if (error != 0) 1525 return (error); 1526 } 1527 1528 if (hardlink_check_gid && !groupmember(va.va_gid, cred)) { 1529 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1530 if (error != 0) 1531 return (error); 1532 } 1533 1534 return (0); 1535 } 1536 1537 int 1538 kern_link(struct thread *td, char *path, char *link, enum uio_seg segflg) 1539 { 1540 1541 return (kern_linkat(td, AT_FDCWD, AT_FDCWD, path,link, segflg, FOLLOW)); 1542 } 1543 1544 int 1545 kern_linkat(struct thread *td, int fd1, int fd2, char *path1, char *path2, 1546 enum uio_seg segflg, int follow) 1547 { 1548 struct vnode *vp; 1549 struct mount *mp; 1550 struct nameidata nd; 1551 cap_rights_t rights; 1552 int error; 1553 1554 bwillwrite(); 1555 NDINIT_AT(&nd, LOOKUP, follow | AUDITVNODE1, segflg, path1, fd1, td); 1556 1557 if ((error = namei(&nd)) != 0) 1558 return (error); 1559 NDFREE(&nd, NDF_ONLY_PNBUF); 1560 vp = nd.ni_vp; 1561 if (vp->v_type == VDIR) { 1562 vrele(vp); 1563 return (EPERM); /* POSIX */ 1564 } 1565 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { 1566 vrele(vp); 1567 return (error); 1568 } 1569 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE2, 1570 segflg, path2, fd2, cap_rights_init(&rights, CAP_LINKAT), td); 1571 if ((error = namei(&nd)) == 0) { 1572 if (nd.ni_vp != NULL) { 1573 if (nd.ni_dvp == nd.ni_vp) 1574 vrele(nd.ni_dvp); 1575 else 1576 vput(nd.ni_dvp); 1577 vrele(nd.ni_vp); 1578 error = EEXIST; 1579 } else if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY)) 1580 == 0) { 1581 error = can_hardlink(vp, td->td_ucred); 1582 if (error == 0) 1583 #ifdef MAC 1584 error = mac_vnode_check_link(td->td_ucred, 1585 nd.ni_dvp, vp, &nd.ni_cnd); 1586 if (error == 0) 1587 #endif 1588 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 1589 VOP_UNLOCK(vp, 0); 1590 vput(nd.ni_dvp); 1591 } 1592 NDFREE(&nd, NDF_ONLY_PNBUF); 1593 } 1594 vrele(vp); 1595 vn_finished_write(mp); 1596 return (error); 1597 } 1598 1599 /* 1600 * Make a symbolic link. 1601 */ 1602 #ifndef _SYS_SYSPROTO_H_ 1603 struct symlink_args { 1604 char *path; 1605 char *link; 1606 }; 1607 #endif 1608 int 1609 sys_symlink(td, uap) 1610 struct thread *td; 1611 register struct symlink_args /* { 1612 char *path; 1613 char *link; 1614 } */ *uap; 1615 { 1616 1617 return (kern_symlink(td, uap->path, uap->link, UIO_USERSPACE)); 1618 } 1619 1620 #ifndef _SYS_SYSPROTO_H_ 1621 struct symlinkat_args { 1622 char *path; 1623 int fd; 1624 char *path2; 1625 }; 1626 #endif 1627 int 1628 sys_symlinkat(struct thread *td, struct symlinkat_args *uap) 1629 { 1630 1631 return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2, 1632 UIO_USERSPACE)); 1633 } 1634 1635 int 1636 kern_symlink(struct thread *td, char *path, char *link, enum uio_seg segflg) 1637 { 1638 1639 return (kern_symlinkat(td, path, AT_FDCWD, link, segflg)); 1640 } 1641 1642 int 1643 kern_symlinkat(struct thread *td, char *path1, int fd, char *path2, 1644 enum uio_seg segflg) 1645 { 1646 struct mount *mp; 1647 struct vattr vattr; 1648 char *syspath; 1649 struct nameidata nd; 1650 int error; 1651 cap_rights_t rights; 1652 1653 if (segflg == UIO_SYSSPACE) { 1654 syspath = path1; 1655 } else { 1656 syspath = uma_zalloc(namei_zone, M_WAITOK); 1657 if ((error = copyinstr(path1, syspath, MAXPATHLEN, NULL)) != 0) 1658 goto out; 1659 } 1660 AUDIT_ARG_TEXT(syspath); 1661 restart: 1662 bwillwrite(); 1663 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1, 1664 segflg, path2, fd, cap_rights_init(&rights, CAP_SYMLINKAT), td); 1665 if ((error = namei(&nd)) != 0) 1666 goto out; 1667 if (nd.ni_vp) { 1668 NDFREE(&nd, NDF_ONLY_PNBUF); 1669 if (nd.ni_vp == nd.ni_dvp) 1670 vrele(nd.ni_dvp); 1671 else 1672 vput(nd.ni_dvp); 1673 vrele(nd.ni_vp); 1674 error = EEXIST; 1675 goto out; 1676 } 1677 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1678 NDFREE(&nd, NDF_ONLY_PNBUF); 1679 vput(nd.ni_dvp); 1680 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1681 goto out; 1682 goto restart; 1683 } 1684 VATTR_NULL(&vattr); 1685 vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask; 1686 #ifdef MAC 1687 vattr.va_type = VLNK; 1688 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1689 &vattr); 1690 if (error != 0) 1691 goto out2; 1692 #endif 1693 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath); 1694 if (error == 0) 1695 vput(nd.ni_vp); 1696 #ifdef MAC 1697 out2: 1698 #endif 1699 NDFREE(&nd, NDF_ONLY_PNBUF); 1700 vput(nd.ni_dvp); 1701 vn_finished_write(mp); 1702 out: 1703 if (segflg != UIO_SYSSPACE) 1704 uma_zfree(namei_zone, syspath); 1705 return (error); 1706 } 1707 1708 /* 1709 * Delete a whiteout from the filesystem. 1710 */ 1711 int 1712 sys_undelete(td, uap) 1713 struct thread *td; 1714 register struct undelete_args /* { 1715 char *path; 1716 } */ *uap; 1717 { 1718 struct mount *mp; 1719 struct nameidata nd; 1720 int error; 1721 1722 restart: 1723 bwillwrite(); 1724 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | AUDITVNODE1, 1725 UIO_USERSPACE, uap->path, td); 1726 error = namei(&nd); 1727 if (error != 0) 1728 return (error); 1729 1730 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 1731 NDFREE(&nd, NDF_ONLY_PNBUF); 1732 if (nd.ni_vp == nd.ni_dvp) 1733 vrele(nd.ni_dvp); 1734 else 1735 vput(nd.ni_dvp); 1736 if (nd.ni_vp) 1737 vrele(nd.ni_vp); 1738 return (EEXIST); 1739 } 1740 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1741 NDFREE(&nd, NDF_ONLY_PNBUF); 1742 vput(nd.ni_dvp); 1743 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1744 return (error); 1745 goto restart; 1746 } 1747 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE); 1748 NDFREE(&nd, NDF_ONLY_PNBUF); 1749 vput(nd.ni_dvp); 1750 vn_finished_write(mp); 1751 return (error); 1752 } 1753 1754 /* 1755 * Delete a name from the filesystem. 1756 */ 1757 #ifndef _SYS_SYSPROTO_H_ 1758 struct unlink_args { 1759 char *path; 1760 }; 1761 #endif 1762 int 1763 sys_unlink(td, uap) 1764 struct thread *td; 1765 struct unlink_args /* { 1766 char *path; 1767 } */ *uap; 1768 { 1769 1770 return (kern_unlink(td, uap->path, UIO_USERSPACE)); 1771 } 1772 1773 #ifndef _SYS_SYSPROTO_H_ 1774 struct unlinkat_args { 1775 int fd; 1776 char *path; 1777 int flag; 1778 }; 1779 #endif 1780 int 1781 sys_unlinkat(struct thread *td, struct unlinkat_args *uap) 1782 { 1783 int flag = uap->flag; 1784 int fd = uap->fd; 1785 char *path = uap->path; 1786 1787 if (flag & ~AT_REMOVEDIR) 1788 return (EINVAL); 1789 1790 if (flag & AT_REMOVEDIR) 1791 return (kern_rmdirat(td, fd, path, UIO_USERSPACE)); 1792 else 1793 return (kern_unlinkat(td, fd, path, UIO_USERSPACE, 0)); 1794 } 1795 1796 int 1797 kern_unlink(struct thread *td, char *path, enum uio_seg pathseg) 1798 { 1799 1800 return (kern_unlinkat(td, AT_FDCWD, path, pathseg, 0)); 1801 } 1802 1803 int 1804 kern_unlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1805 ino_t oldinum) 1806 { 1807 struct mount *mp; 1808 struct vnode *vp; 1809 struct nameidata nd; 1810 struct stat sb; 1811 cap_rights_t rights; 1812 int error; 1813 1814 restart: 1815 bwillwrite(); 1816 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 1817 pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td); 1818 if ((error = namei(&nd)) != 0) 1819 return (error == EINVAL ? EPERM : error); 1820 vp = nd.ni_vp; 1821 if (vp->v_type == VDIR && oldinum == 0) { 1822 error = EPERM; /* POSIX */ 1823 } else if (oldinum != 0 && 1824 ((error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td)) == 0) && 1825 sb.st_ino != oldinum) { 1826 error = EIDRM; /* Identifier removed */ 1827 } else { 1828 /* 1829 * The root of a mounted filesystem cannot be deleted. 1830 * 1831 * XXX: can this only be a VDIR case? 1832 */ 1833 if (vp->v_vflag & VV_ROOT) 1834 error = EBUSY; 1835 } 1836 if (error == 0) { 1837 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1838 NDFREE(&nd, NDF_ONLY_PNBUF); 1839 vput(nd.ni_dvp); 1840 if (vp == nd.ni_dvp) 1841 vrele(vp); 1842 else 1843 vput(vp); 1844 if ((error = vn_start_write(NULL, &mp, 1845 V_XSLEEP | PCATCH)) != 0) 1846 return (error); 1847 goto restart; 1848 } 1849 #ifdef MAC 1850 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 1851 &nd.ni_cnd); 1852 if (error != 0) 1853 goto out; 1854 #endif 1855 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 1856 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 1857 #ifdef MAC 1858 out: 1859 #endif 1860 vn_finished_write(mp); 1861 } 1862 NDFREE(&nd, NDF_ONLY_PNBUF); 1863 vput(nd.ni_dvp); 1864 if (vp == nd.ni_dvp) 1865 vrele(vp); 1866 else 1867 vput(vp); 1868 return (error); 1869 } 1870 1871 /* 1872 * Reposition read/write file offset. 1873 */ 1874 #ifndef _SYS_SYSPROTO_H_ 1875 struct lseek_args { 1876 int fd; 1877 int pad; 1878 off_t offset; 1879 int whence; 1880 }; 1881 #endif 1882 int 1883 sys_lseek(td, uap) 1884 struct thread *td; 1885 register struct lseek_args /* { 1886 int fd; 1887 int pad; 1888 off_t offset; 1889 int whence; 1890 } */ *uap; 1891 { 1892 struct file *fp; 1893 cap_rights_t rights; 1894 int error; 1895 1896 AUDIT_ARG_FD(uap->fd); 1897 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_SEEK), &fp); 1898 if (error != 0) 1899 return (error); 1900 error = (fp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0 ? 1901 fo_seek(fp, uap->offset, uap->whence, td) : ESPIPE; 1902 fdrop(fp, td); 1903 return (error); 1904 } 1905 1906 #if defined(COMPAT_43) 1907 /* 1908 * Reposition read/write file offset. 1909 */ 1910 #ifndef _SYS_SYSPROTO_H_ 1911 struct olseek_args { 1912 int fd; 1913 long offset; 1914 int whence; 1915 }; 1916 #endif 1917 int 1918 olseek(td, uap) 1919 struct thread *td; 1920 register struct olseek_args /* { 1921 int fd; 1922 long offset; 1923 int whence; 1924 } */ *uap; 1925 { 1926 struct lseek_args /* { 1927 int fd; 1928 int pad; 1929 off_t offset; 1930 int whence; 1931 } */ nuap; 1932 1933 nuap.fd = uap->fd; 1934 nuap.offset = uap->offset; 1935 nuap.whence = uap->whence; 1936 return (sys_lseek(td, &nuap)); 1937 } 1938 #endif /* COMPAT_43 */ 1939 1940 /* Version with the 'pad' argument */ 1941 int 1942 freebsd6_lseek(td, uap) 1943 struct thread *td; 1944 register struct freebsd6_lseek_args *uap; 1945 { 1946 struct lseek_args ouap; 1947 1948 ouap.fd = uap->fd; 1949 ouap.offset = uap->offset; 1950 ouap.whence = uap->whence; 1951 return (sys_lseek(td, &ouap)); 1952 } 1953 1954 /* 1955 * Check access permissions using passed credentials. 1956 */ 1957 static int 1958 vn_access(vp, user_flags, cred, td) 1959 struct vnode *vp; 1960 int user_flags; 1961 struct ucred *cred; 1962 struct thread *td; 1963 { 1964 accmode_t accmode; 1965 int error; 1966 1967 /* Flags == 0 means only check for existence. */ 1968 error = 0; 1969 if (user_flags) { 1970 accmode = 0; 1971 if (user_flags & R_OK) 1972 accmode |= VREAD; 1973 if (user_flags & W_OK) 1974 accmode |= VWRITE; 1975 if (user_flags & X_OK) 1976 accmode |= VEXEC; 1977 #ifdef MAC 1978 error = mac_vnode_check_access(cred, vp, accmode); 1979 if (error != 0) 1980 return (error); 1981 #endif 1982 if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) 1983 error = VOP_ACCESS(vp, accmode, cred, td); 1984 } 1985 return (error); 1986 } 1987 1988 /* 1989 * Check access permissions using "real" credentials. 1990 */ 1991 #ifndef _SYS_SYSPROTO_H_ 1992 struct access_args { 1993 char *path; 1994 int amode; 1995 }; 1996 #endif 1997 int 1998 sys_access(td, uap) 1999 struct thread *td; 2000 register struct access_args /* { 2001 char *path; 2002 int amode; 2003 } */ *uap; 2004 { 2005 2006 return (kern_access(td, uap->path, UIO_USERSPACE, uap->amode)); 2007 } 2008 2009 #ifndef _SYS_SYSPROTO_H_ 2010 struct faccessat_args { 2011 int dirfd; 2012 char *path; 2013 int amode; 2014 int flag; 2015 } 2016 #endif 2017 int 2018 sys_faccessat(struct thread *td, struct faccessat_args *uap) 2019 { 2020 2021 if (uap->flag & ~AT_EACCESS) 2022 return (EINVAL); 2023 return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 2024 uap->amode)); 2025 } 2026 2027 int 2028 kern_access(struct thread *td, char *path, enum uio_seg pathseg, int amode) 2029 { 2030 2031 return (kern_accessat(td, AT_FDCWD, path, pathseg, 0, amode)); 2032 } 2033 2034 int 2035 kern_accessat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2036 int flag, int amode) 2037 { 2038 struct ucred *cred, *tmpcred; 2039 struct vnode *vp; 2040 struct nameidata nd; 2041 cap_rights_t rights; 2042 int error; 2043 2044 /* 2045 * Create and modify a temporary credential instead of one that 2046 * is potentially shared. 2047 */ 2048 if (!(flag & AT_EACCESS)) { 2049 cred = td->td_ucred; 2050 tmpcred = crdup(cred); 2051 tmpcred->cr_uid = cred->cr_ruid; 2052 tmpcred->cr_groups[0] = cred->cr_rgid; 2053 td->td_ucred = tmpcred; 2054 } else 2055 cred = tmpcred = td->td_ucred; 2056 AUDIT_ARG_VALUE(amode); 2057 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | 2058 AUDITVNODE1, pathseg, path, fd, cap_rights_init(&rights, CAP_FSTAT), 2059 td); 2060 if ((error = namei(&nd)) != 0) 2061 goto out1; 2062 vp = nd.ni_vp; 2063 2064 error = vn_access(vp, amode, tmpcred, td); 2065 NDFREE(&nd, NDF_ONLY_PNBUF); 2066 vput(vp); 2067 out1: 2068 if (!(flag & AT_EACCESS)) { 2069 td->td_ucred = cred; 2070 crfree(tmpcred); 2071 } 2072 return (error); 2073 } 2074 2075 /* 2076 * Check access permissions using "effective" credentials. 2077 */ 2078 #ifndef _SYS_SYSPROTO_H_ 2079 struct eaccess_args { 2080 char *path; 2081 int amode; 2082 }; 2083 #endif 2084 int 2085 sys_eaccess(td, uap) 2086 struct thread *td; 2087 register struct eaccess_args /* { 2088 char *path; 2089 int amode; 2090 } */ *uap; 2091 { 2092 2093 return (kern_eaccess(td, uap->path, UIO_USERSPACE, uap->amode)); 2094 } 2095 2096 int 2097 kern_eaccess(struct thread *td, char *path, enum uio_seg pathseg, int amode) 2098 { 2099 2100 return (kern_accessat(td, AT_FDCWD, path, pathseg, AT_EACCESS, amode)); 2101 } 2102 2103 #if defined(COMPAT_43) 2104 /* 2105 * Get file status; this version follows links. 2106 */ 2107 #ifndef _SYS_SYSPROTO_H_ 2108 struct ostat_args { 2109 char *path; 2110 struct ostat *ub; 2111 }; 2112 #endif 2113 int 2114 ostat(td, uap) 2115 struct thread *td; 2116 register struct ostat_args /* { 2117 char *path; 2118 struct ostat *ub; 2119 } */ *uap; 2120 { 2121 struct stat sb; 2122 struct ostat osb; 2123 int error; 2124 2125 error = kern_stat(td, uap->path, UIO_USERSPACE, &sb); 2126 if (error != 0) 2127 return (error); 2128 cvtstat(&sb, &osb); 2129 return (copyout(&osb, uap->ub, sizeof (osb))); 2130 } 2131 2132 /* 2133 * Get file status; this version does not follow links. 2134 */ 2135 #ifndef _SYS_SYSPROTO_H_ 2136 struct olstat_args { 2137 char *path; 2138 struct ostat *ub; 2139 }; 2140 #endif 2141 int 2142 olstat(td, uap) 2143 struct thread *td; 2144 register struct olstat_args /* { 2145 char *path; 2146 struct ostat *ub; 2147 } */ *uap; 2148 { 2149 struct stat sb; 2150 struct ostat osb; 2151 int error; 2152 2153 error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb); 2154 if (error != 0) 2155 return (error); 2156 cvtstat(&sb, &osb); 2157 return (copyout(&osb, uap->ub, sizeof (osb))); 2158 } 2159 2160 /* 2161 * Convert from an old to a new stat structure. 2162 */ 2163 void 2164 cvtstat(st, ost) 2165 struct stat *st; 2166 struct ostat *ost; 2167 { 2168 2169 ost->st_dev = st->st_dev; 2170 ost->st_ino = st->st_ino; 2171 ost->st_mode = st->st_mode; 2172 ost->st_nlink = st->st_nlink; 2173 ost->st_uid = st->st_uid; 2174 ost->st_gid = st->st_gid; 2175 ost->st_rdev = st->st_rdev; 2176 if (st->st_size < (quad_t)1 << 32) 2177 ost->st_size = st->st_size; 2178 else 2179 ost->st_size = -2; 2180 ost->st_atim = st->st_atim; 2181 ost->st_mtim = st->st_mtim; 2182 ost->st_ctim = st->st_ctim; 2183 ost->st_blksize = st->st_blksize; 2184 ost->st_blocks = st->st_blocks; 2185 ost->st_flags = st->st_flags; 2186 ost->st_gen = st->st_gen; 2187 } 2188 #endif /* COMPAT_43 */ 2189 2190 /* 2191 * Get file status; this version follows links. 2192 */ 2193 #ifndef _SYS_SYSPROTO_H_ 2194 struct stat_args { 2195 char *path; 2196 struct stat *ub; 2197 }; 2198 #endif 2199 int 2200 sys_stat(td, uap) 2201 struct thread *td; 2202 register struct stat_args /* { 2203 char *path; 2204 struct stat *ub; 2205 } */ *uap; 2206 { 2207 struct stat sb; 2208 int error; 2209 2210 error = kern_stat(td, uap->path, UIO_USERSPACE, &sb); 2211 if (error == 0) 2212 error = copyout(&sb, uap->ub, sizeof (sb)); 2213 return (error); 2214 } 2215 2216 #ifndef _SYS_SYSPROTO_H_ 2217 struct fstatat_args { 2218 int fd; 2219 char *path; 2220 struct stat *buf; 2221 int flag; 2222 } 2223 #endif 2224 int 2225 sys_fstatat(struct thread *td, struct fstatat_args *uap) 2226 { 2227 struct stat sb; 2228 int error; 2229 2230 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2231 UIO_USERSPACE, &sb); 2232 if (error == 0) 2233 error = copyout(&sb, uap->buf, sizeof (sb)); 2234 return (error); 2235 } 2236 2237 int 2238 kern_stat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp) 2239 { 2240 2241 return (kern_statat(td, 0, AT_FDCWD, path, pathseg, sbp)); 2242 } 2243 2244 int 2245 kern_statat(struct thread *td, int flag, int fd, char *path, 2246 enum uio_seg pathseg, struct stat *sbp) 2247 { 2248 2249 return (kern_statat_vnhook(td, flag, fd, path, pathseg, sbp, NULL)); 2250 } 2251 2252 int 2253 kern_statat_vnhook(struct thread *td, int flag, int fd, char *path, 2254 enum uio_seg pathseg, struct stat *sbp, 2255 void (*hook)(struct vnode *vp, struct stat *sbp)) 2256 { 2257 struct nameidata nd; 2258 struct stat sb; 2259 cap_rights_t rights; 2260 int error; 2261 2262 if (flag & ~AT_SYMLINK_NOFOLLOW) 2263 return (EINVAL); 2264 2265 NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : 2266 FOLLOW) | LOCKSHARED | LOCKLEAF | AUDITVNODE1, pathseg, path, fd, 2267 cap_rights_init(&rights, CAP_FSTAT), td); 2268 2269 if ((error = namei(&nd)) != 0) 2270 return (error); 2271 error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td); 2272 if (error == 0) { 2273 SDT_PROBE(vfs, , stat, mode, path, sb.st_mode, 0, 0, 0); 2274 if (S_ISREG(sb.st_mode)) 2275 SDT_PROBE(vfs, , stat, reg, path, pathseg, 0, 0, 0); 2276 if (__predict_false(hook != NULL)) 2277 hook(nd.ni_vp, &sb); 2278 } 2279 NDFREE(&nd, NDF_ONLY_PNBUF); 2280 vput(nd.ni_vp); 2281 if (error != 0) 2282 return (error); 2283 *sbp = sb; 2284 #ifdef KTRACE 2285 if (KTRPOINT(td, KTR_STRUCT)) 2286 ktrstat(&sb); 2287 #endif 2288 return (0); 2289 } 2290 2291 /* 2292 * Get file status; this version does not follow links. 2293 */ 2294 #ifndef _SYS_SYSPROTO_H_ 2295 struct lstat_args { 2296 char *path; 2297 struct stat *ub; 2298 }; 2299 #endif 2300 int 2301 sys_lstat(td, uap) 2302 struct thread *td; 2303 register struct lstat_args /* { 2304 char *path; 2305 struct stat *ub; 2306 } */ *uap; 2307 { 2308 struct stat sb; 2309 int error; 2310 2311 error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb); 2312 if (error == 0) 2313 error = copyout(&sb, uap->ub, sizeof (sb)); 2314 return (error); 2315 } 2316 2317 int 2318 kern_lstat(struct thread *td, char *path, enum uio_seg pathseg, struct stat *sbp) 2319 { 2320 2321 return (kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, path, pathseg, 2322 sbp)); 2323 } 2324 2325 /* 2326 * Implementation of the NetBSD [l]stat() functions. 2327 */ 2328 void 2329 cvtnstat(sb, nsb) 2330 struct stat *sb; 2331 struct nstat *nsb; 2332 { 2333 2334 bzero(nsb, sizeof *nsb); 2335 nsb->st_dev = sb->st_dev; 2336 nsb->st_ino = sb->st_ino; 2337 nsb->st_mode = sb->st_mode; 2338 nsb->st_nlink = sb->st_nlink; 2339 nsb->st_uid = sb->st_uid; 2340 nsb->st_gid = sb->st_gid; 2341 nsb->st_rdev = sb->st_rdev; 2342 nsb->st_atim = sb->st_atim; 2343 nsb->st_mtim = sb->st_mtim; 2344 nsb->st_ctim = sb->st_ctim; 2345 nsb->st_size = sb->st_size; 2346 nsb->st_blocks = sb->st_blocks; 2347 nsb->st_blksize = sb->st_blksize; 2348 nsb->st_flags = sb->st_flags; 2349 nsb->st_gen = sb->st_gen; 2350 nsb->st_birthtim = sb->st_birthtim; 2351 } 2352 2353 #ifndef _SYS_SYSPROTO_H_ 2354 struct nstat_args { 2355 char *path; 2356 struct nstat *ub; 2357 }; 2358 #endif 2359 int 2360 sys_nstat(td, uap) 2361 struct thread *td; 2362 register struct nstat_args /* { 2363 char *path; 2364 struct nstat *ub; 2365 } */ *uap; 2366 { 2367 struct stat sb; 2368 struct nstat nsb; 2369 int error; 2370 2371 error = kern_stat(td, uap->path, UIO_USERSPACE, &sb); 2372 if (error != 0) 2373 return (error); 2374 cvtnstat(&sb, &nsb); 2375 return (copyout(&nsb, uap->ub, sizeof (nsb))); 2376 } 2377 2378 /* 2379 * NetBSD lstat. Get file status; this version does not follow links. 2380 */ 2381 #ifndef _SYS_SYSPROTO_H_ 2382 struct lstat_args { 2383 char *path; 2384 struct stat *ub; 2385 }; 2386 #endif 2387 int 2388 sys_nlstat(td, uap) 2389 struct thread *td; 2390 register struct nlstat_args /* { 2391 char *path; 2392 struct nstat *ub; 2393 } */ *uap; 2394 { 2395 struct stat sb; 2396 struct nstat nsb; 2397 int error; 2398 2399 error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb); 2400 if (error != 0) 2401 return (error); 2402 cvtnstat(&sb, &nsb); 2403 return (copyout(&nsb, uap->ub, sizeof (nsb))); 2404 } 2405 2406 /* 2407 * Get configurable pathname variables. 2408 */ 2409 #ifndef _SYS_SYSPROTO_H_ 2410 struct pathconf_args { 2411 char *path; 2412 int name; 2413 }; 2414 #endif 2415 int 2416 sys_pathconf(td, uap) 2417 struct thread *td; 2418 register struct pathconf_args /* { 2419 char *path; 2420 int name; 2421 } */ *uap; 2422 { 2423 2424 return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW)); 2425 } 2426 2427 #ifndef _SYS_SYSPROTO_H_ 2428 struct lpathconf_args { 2429 char *path; 2430 int name; 2431 }; 2432 #endif 2433 int 2434 sys_lpathconf(td, uap) 2435 struct thread *td; 2436 register struct lpathconf_args /* { 2437 char *path; 2438 int name; 2439 } */ *uap; 2440 { 2441 2442 return (kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, 2443 NOFOLLOW)); 2444 } 2445 2446 int 2447 kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name, 2448 u_long flags) 2449 { 2450 struct nameidata nd; 2451 int error; 2452 2453 NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | AUDITVNODE1 | flags, 2454 pathseg, path, td); 2455 if ((error = namei(&nd)) != 0) 2456 return (error); 2457 NDFREE(&nd, NDF_ONLY_PNBUF); 2458 2459 /* If asynchronous I/O is available, it works for all files. */ 2460 if (name == _PC_ASYNC_IO) 2461 td->td_retval[0] = async_io_version; 2462 else 2463 error = VOP_PATHCONF(nd.ni_vp, name, td->td_retval); 2464 vput(nd.ni_vp); 2465 return (error); 2466 } 2467 2468 /* 2469 * Return target name of a symbolic link. 2470 */ 2471 #ifndef _SYS_SYSPROTO_H_ 2472 struct readlink_args { 2473 char *path; 2474 char *buf; 2475 size_t count; 2476 }; 2477 #endif 2478 int 2479 sys_readlink(td, uap) 2480 struct thread *td; 2481 register struct readlink_args /* { 2482 char *path; 2483 char *buf; 2484 size_t count; 2485 } */ *uap; 2486 { 2487 2488 return (kern_readlink(td, uap->path, UIO_USERSPACE, uap->buf, 2489 UIO_USERSPACE, uap->count)); 2490 } 2491 #ifndef _SYS_SYSPROTO_H_ 2492 struct readlinkat_args { 2493 int fd; 2494 char *path; 2495 char *buf; 2496 size_t bufsize; 2497 }; 2498 #endif 2499 int 2500 sys_readlinkat(struct thread *td, struct readlinkat_args *uap) 2501 { 2502 2503 return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE, 2504 uap->buf, UIO_USERSPACE, uap->bufsize)); 2505 } 2506 2507 int 2508 kern_readlink(struct thread *td, char *path, enum uio_seg pathseg, char *buf, 2509 enum uio_seg bufseg, size_t count) 2510 { 2511 2512 return (kern_readlinkat(td, AT_FDCWD, path, pathseg, buf, bufseg, 2513 count)); 2514 } 2515 2516 int 2517 kern_readlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2518 char *buf, enum uio_seg bufseg, size_t count) 2519 { 2520 struct vnode *vp; 2521 struct iovec aiov; 2522 struct uio auio; 2523 struct nameidata nd; 2524 int error; 2525 2526 if (count > IOSIZE_MAX) 2527 return (EINVAL); 2528 2529 NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 2530 pathseg, path, fd, td); 2531 2532 if ((error = namei(&nd)) != 0) 2533 return (error); 2534 NDFREE(&nd, NDF_ONLY_PNBUF); 2535 vp = nd.ni_vp; 2536 #ifdef MAC 2537 error = mac_vnode_check_readlink(td->td_ucred, vp); 2538 if (error != 0) { 2539 vput(vp); 2540 return (error); 2541 } 2542 #endif 2543 if (vp->v_type != VLNK) 2544 error = EINVAL; 2545 else { 2546 aiov.iov_base = buf; 2547 aiov.iov_len = count; 2548 auio.uio_iov = &aiov; 2549 auio.uio_iovcnt = 1; 2550 auio.uio_offset = 0; 2551 auio.uio_rw = UIO_READ; 2552 auio.uio_segflg = bufseg; 2553 auio.uio_td = td; 2554 auio.uio_resid = count; 2555 error = VOP_READLINK(vp, &auio, td->td_ucred); 2556 } 2557 vput(vp); 2558 td->td_retval[0] = count - auio.uio_resid; 2559 return (error); 2560 } 2561 2562 /* 2563 * Common implementation code for chflags() and fchflags(). 2564 */ 2565 static int 2566 setfflags(td, vp, flags) 2567 struct thread *td; 2568 struct vnode *vp; 2569 u_long flags; 2570 { 2571 struct mount *mp; 2572 struct vattr vattr; 2573 int error; 2574 2575 /* We can't support the value matching VNOVAL. */ 2576 if (flags == VNOVAL) 2577 return (EOPNOTSUPP); 2578 2579 /* 2580 * Prevent non-root users from setting flags on devices. When 2581 * a device is reused, users can retain ownership of the device 2582 * if they are allowed to set flags and programs assume that 2583 * chown can't fail when done as root. 2584 */ 2585 if (vp->v_type == VCHR || vp->v_type == VBLK) { 2586 error = priv_check(td, PRIV_VFS_CHFLAGS_DEV); 2587 if (error != 0) 2588 return (error); 2589 } 2590 2591 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2592 return (error); 2593 VATTR_NULL(&vattr); 2594 vattr.va_flags = flags; 2595 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2596 #ifdef MAC 2597 error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags); 2598 if (error == 0) 2599 #endif 2600 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 2601 VOP_UNLOCK(vp, 0); 2602 vn_finished_write(mp); 2603 return (error); 2604 } 2605 2606 /* 2607 * Change flags of a file given a path name. 2608 */ 2609 #ifndef _SYS_SYSPROTO_H_ 2610 struct chflags_args { 2611 const char *path; 2612 u_long flags; 2613 }; 2614 #endif 2615 int 2616 sys_chflags(td, uap) 2617 struct thread *td; 2618 register struct chflags_args /* { 2619 const char *path; 2620 u_long flags; 2621 } */ *uap; 2622 { 2623 2624 return (kern_chflags(td, uap->path, UIO_USERSPACE, uap->flags)); 2625 } 2626 2627 #ifndef _SYS_SYSPROTO_H_ 2628 struct chflagsat_args { 2629 int fd; 2630 const char *path; 2631 u_long flags; 2632 int atflag; 2633 } 2634 #endif 2635 int 2636 sys_chflagsat(struct thread *td, struct chflagsat_args *uap) 2637 { 2638 int fd = uap->fd; 2639 const char *path = uap->path; 2640 u_long flags = uap->flags; 2641 int atflag = uap->atflag; 2642 2643 if (atflag & ~AT_SYMLINK_NOFOLLOW) 2644 return (EINVAL); 2645 2646 return (kern_chflagsat(td, fd, path, UIO_USERSPACE, flags, atflag)); 2647 } 2648 2649 static int 2650 kern_chflags(struct thread *td, const char *path, enum uio_seg pathseg, 2651 u_long flags) 2652 { 2653 2654 return (kern_chflagsat(td, AT_FDCWD, path, pathseg, flags, 0)); 2655 } 2656 2657 /* 2658 * Same as chflags() but doesn't follow symlinks. 2659 */ 2660 int 2661 sys_lchflags(td, uap) 2662 struct thread *td; 2663 register struct lchflags_args /* { 2664 const char *path; 2665 u_long flags; 2666 } */ *uap; 2667 { 2668 2669 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2670 uap->flags, AT_SYMLINK_NOFOLLOW)); 2671 } 2672 2673 static int 2674 kern_chflagsat(struct thread *td, int fd, const char *path, 2675 enum uio_seg pathseg, u_long flags, int atflag) 2676 { 2677 struct nameidata nd; 2678 cap_rights_t rights; 2679 int error, follow; 2680 2681 AUDIT_ARG_FFLAGS(flags); 2682 follow = (atflag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2683 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2684 cap_rights_init(&rights, CAP_FCHFLAGS), td); 2685 if ((error = namei(&nd)) != 0) 2686 return (error); 2687 NDFREE(&nd, NDF_ONLY_PNBUF); 2688 error = setfflags(td, nd.ni_vp, flags); 2689 vrele(nd.ni_vp); 2690 return (error); 2691 } 2692 2693 /* 2694 * Change flags of a file given a file descriptor. 2695 */ 2696 #ifndef _SYS_SYSPROTO_H_ 2697 struct fchflags_args { 2698 int fd; 2699 u_long flags; 2700 }; 2701 #endif 2702 int 2703 sys_fchflags(td, uap) 2704 struct thread *td; 2705 register struct fchflags_args /* { 2706 int fd; 2707 u_long flags; 2708 } */ *uap; 2709 { 2710 struct file *fp; 2711 cap_rights_t rights; 2712 int error; 2713 2714 AUDIT_ARG_FD(uap->fd); 2715 AUDIT_ARG_FFLAGS(uap->flags); 2716 error = getvnode(td->td_proc->p_fd, uap->fd, 2717 cap_rights_init(&rights, CAP_FCHFLAGS), &fp); 2718 if (error != 0) 2719 return (error); 2720 #ifdef AUDIT 2721 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 2722 AUDIT_ARG_VNODE1(fp->f_vnode); 2723 VOP_UNLOCK(fp->f_vnode, 0); 2724 #endif 2725 error = setfflags(td, fp->f_vnode, uap->flags); 2726 fdrop(fp, td); 2727 return (error); 2728 } 2729 2730 /* 2731 * Common implementation code for chmod(), lchmod() and fchmod(). 2732 */ 2733 int 2734 setfmode(td, cred, vp, mode) 2735 struct thread *td; 2736 struct ucred *cred; 2737 struct vnode *vp; 2738 int mode; 2739 { 2740 struct mount *mp; 2741 struct vattr vattr; 2742 int error; 2743 2744 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2745 return (error); 2746 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2747 VATTR_NULL(&vattr); 2748 vattr.va_mode = mode & ALLPERMS; 2749 #ifdef MAC 2750 error = mac_vnode_check_setmode(cred, vp, vattr.va_mode); 2751 if (error == 0) 2752 #endif 2753 error = VOP_SETATTR(vp, &vattr, cred); 2754 VOP_UNLOCK(vp, 0); 2755 vn_finished_write(mp); 2756 return (error); 2757 } 2758 2759 /* 2760 * Change mode of a file given path name. 2761 */ 2762 #ifndef _SYS_SYSPROTO_H_ 2763 struct chmod_args { 2764 char *path; 2765 int mode; 2766 }; 2767 #endif 2768 int 2769 sys_chmod(td, uap) 2770 struct thread *td; 2771 register struct chmod_args /* { 2772 char *path; 2773 int mode; 2774 } */ *uap; 2775 { 2776 2777 return (kern_chmod(td, uap->path, UIO_USERSPACE, uap->mode)); 2778 } 2779 2780 #ifndef _SYS_SYSPROTO_H_ 2781 struct fchmodat_args { 2782 int dirfd; 2783 char *path; 2784 mode_t mode; 2785 int flag; 2786 } 2787 #endif 2788 int 2789 sys_fchmodat(struct thread *td, struct fchmodat_args *uap) 2790 { 2791 int flag = uap->flag; 2792 int fd = uap->fd; 2793 char *path = uap->path; 2794 mode_t mode = uap->mode; 2795 2796 if (flag & ~AT_SYMLINK_NOFOLLOW) 2797 return (EINVAL); 2798 2799 return (kern_fchmodat(td, fd, path, UIO_USERSPACE, mode, flag)); 2800 } 2801 2802 int 2803 kern_chmod(struct thread *td, char *path, enum uio_seg pathseg, int mode) 2804 { 2805 2806 return (kern_fchmodat(td, AT_FDCWD, path, pathseg, mode, 0)); 2807 } 2808 2809 /* 2810 * Change mode of a file given path name (don't follow links.) 2811 */ 2812 #ifndef _SYS_SYSPROTO_H_ 2813 struct lchmod_args { 2814 char *path; 2815 int mode; 2816 }; 2817 #endif 2818 int 2819 sys_lchmod(td, uap) 2820 struct thread *td; 2821 register struct lchmod_args /* { 2822 char *path; 2823 int mode; 2824 } */ *uap; 2825 { 2826 2827 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2828 uap->mode, AT_SYMLINK_NOFOLLOW)); 2829 } 2830 2831 int 2832 kern_fchmodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2833 mode_t mode, int flag) 2834 { 2835 struct nameidata nd; 2836 cap_rights_t rights; 2837 int error, follow; 2838 2839 AUDIT_ARG_MODE(mode); 2840 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2841 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2842 cap_rights_init(&rights, CAP_FCHMOD), td); 2843 if ((error = namei(&nd)) != 0) 2844 return (error); 2845 NDFREE(&nd, NDF_ONLY_PNBUF); 2846 error = setfmode(td, td->td_ucred, nd.ni_vp, mode); 2847 vrele(nd.ni_vp); 2848 return (error); 2849 } 2850 2851 /* 2852 * Change mode of a file given a file descriptor. 2853 */ 2854 #ifndef _SYS_SYSPROTO_H_ 2855 struct fchmod_args { 2856 int fd; 2857 int mode; 2858 }; 2859 #endif 2860 int 2861 sys_fchmod(struct thread *td, struct fchmod_args *uap) 2862 { 2863 struct file *fp; 2864 cap_rights_t rights; 2865 int error; 2866 2867 AUDIT_ARG_FD(uap->fd); 2868 AUDIT_ARG_MODE(uap->mode); 2869 2870 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHMOD), &fp); 2871 if (error != 0) 2872 return (error); 2873 error = fo_chmod(fp, uap->mode, td->td_ucred, td); 2874 fdrop(fp, td); 2875 return (error); 2876 } 2877 2878 /* 2879 * Common implementation for chown(), lchown(), and fchown() 2880 */ 2881 int 2882 setfown(td, cred, vp, uid, gid) 2883 struct thread *td; 2884 struct ucred *cred; 2885 struct vnode *vp; 2886 uid_t uid; 2887 gid_t gid; 2888 { 2889 struct mount *mp; 2890 struct vattr vattr; 2891 int error; 2892 2893 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2894 return (error); 2895 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2896 VATTR_NULL(&vattr); 2897 vattr.va_uid = uid; 2898 vattr.va_gid = gid; 2899 #ifdef MAC 2900 error = mac_vnode_check_setowner(cred, vp, vattr.va_uid, 2901 vattr.va_gid); 2902 if (error == 0) 2903 #endif 2904 error = VOP_SETATTR(vp, &vattr, cred); 2905 VOP_UNLOCK(vp, 0); 2906 vn_finished_write(mp); 2907 return (error); 2908 } 2909 2910 /* 2911 * Set ownership given a path name. 2912 */ 2913 #ifndef _SYS_SYSPROTO_H_ 2914 struct chown_args { 2915 char *path; 2916 int uid; 2917 int gid; 2918 }; 2919 #endif 2920 int 2921 sys_chown(td, uap) 2922 struct thread *td; 2923 register struct chown_args /* { 2924 char *path; 2925 int uid; 2926 int gid; 2927 } */ *uap; 2928 { 2929 2930 return (kern_chown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid)); 2931 } 2932 2933 #ifndef _SYS_SYSPROTO_H_ 2934 struct fchownat_args { 2935 int fd; 2936 const char * path; 2937 uid_t uid; 2938 gid_t gid; 2939 int flag; 2940 }; 2941 #endif 2942 int 2943 sys_fchownat(struct thread *td, struct fchownat_args *uap) 2944 { 2945 int flag; 2946 2947 flag = uap->flag; 2948 if (flag & ~AT_SYMLINK_NOFOLLOW) 2949 return (EINVAL); 2950 2951 return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid, 2952 uap->gid, uap->flag)); 2953 } 2954 2955 int 2956 kern_chown(struct thread *td, char *path, enum uio_seg pathseg, int uid, 2957 int gid) 2958 { 2959 2960 return (kern_fchownat(td, AT_FDCWD, path, pathseg, uid, gid, 0)); 2961 } 2962 2963 int 2964 kern_fchownat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2965 int uid, int gid, int flag) 2966 { 2967 struct nameidata nd; 2968 cap_rights_t rights; 2969 int error, follow; 2970 2971 AUDIT_ARG_OWNER(uid, gid); 2972 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2973 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2974 cap_rights_init(&rights, CAP_FCHOWN), td); 2975 2976 if ((error = namei(&nd)) != 0) 2977 return (error); 2978 NDFREE(&nd, NDF_ONLY_PNBUF); 2979 error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid); 2980 vrele(nd.ni_vp); 2981 return (error); 2982 } 2983 2984 /* 2985 * Set ownership given a path name, do not cross symlinks. 2986 */ 2987 #ifndef _SYS_SYSPROTO_H_ 2988 struct lchown_args { 2989 char *path; 2990 int uid; 2991 int gid; 2992 }; 2993 #endif 2994 int 2995 sys_lchown(td, uap) 2996 struct thread *td; 2997 register struct lchown_args /* { 2998 char *path; 2999 int uid; 3000 int gid; 3001 } */ *uap; 3002 { 3003 3004 return (kern_lchown(td, uap->path, UIO_USERSPACE, uap->uid, uap->gid)); 3005 } 3006 3007 int 3008 kern_lchown(struct thread *td, char *path, enum uio_seg pathseg, int uid, 3009 int gid) 3010 { 3011 3012 return (kern_fchownat(td, AT_FDCWD, path, pathseg, uid, gid, 3013 AT_SYMLINK_NOFOLLOW)); 3014 } 3015 3016 /* 3017 * Set ownership given a file descriptor. 3018 */ 3019 #ifndef _SYS_SYSPROTO_H_ 3020 struct fchown_args { 3021 int fd; 3022 int uid; 3023 int gid; 3024 }; 3025 #endif 3026 int 3027 sys_fchown(td, uap) 3028 struct thread *td; 3029 register struct fchown_args /* { 3030 int fd; 3031 int uid; 3032 int gid; 3033 } */ *uap; 3034 { 3035 struct file *fp; 3036 cap_rights_t rights; 3037 int error; 3038 3039 AUDIT_ARG_FD(uap->fd); 3040 AUDIT_ARG_OWNER(uap->uid, uap->gid); 3041 error = fget(td, uap->fd, cap_rights_init(&rights, CAP_FCHOWN), &fp); 3042 if (error != 0) 3043 return (error); 3044 error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td); 3045 fdrop(fp, td); 3046 return (error); 3047 } 3048 3049 /* 3050 * Common implementation code for utimes(), lutimes(), and futimes(). 3051 */ 3052 static int 3053 getutimes(usrtvp, tvpseg, tsp) 3054 const struct timeval *usrtvp; 3055 enum uio_seg tvpseg; 3056 struct timespec *tsp; 3057 { 3058 struct timeval tv[2]; 3059 const struct timeval *tvp; 3060 int error; 3061 3062 if (usrtvp == NULL) { 3063 vfs_timestamp(&tsp[0]); 3064 tsp[1] = tsp[0]; 3065 } else { 3066 if (tvpseg == UIO_SYSSPACE) { 3067 tvp = usrtvp; 3068 } else { 3069 if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0) 3070 return (error); 3071 tvp = tv; 3072 } 3073 3074 if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 || 3075 tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000) 3076 return (EINVAL); 3077 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 3078 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 3079 } 3080 return (0); 3081 } 3082 3083 /* 3084 * Common implementation code for utimes(), lutimes(), and futimes(). 3085 */ 3086 static int 3087 setutimes(td, vp, ts, numtimes, nullflag) 3088 struct thread *td; 3089 struct vnode *vp; 3090 const struct timespec *ts; 3091 int numtimes; 3092 int nullflag; 3093 { 3094 struct mount *mp; 3095 struct vattr vattr; 3096 int error, setbirthtime; 3097 3098 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 3099 return (error); 3100 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3101 setbirthtime = 0; 3102 if (numtimes < 3 && !VOP_GETATTR(vp, &vattr, td->td_ucred) && 3103 timespeccmp(&ts[1], &vattr.va_birthtime, < )) 3104 setbirthtime = 1; 3105 VATTR_NULL(&vattr); 3106 vattr.va_atime = ts[0]; 3107 vattr.va_mtime = ts[1]; 3108 if (setbirthtime) 3109 vattr.va_birthtime = ts[1]; 3110 if (numtimes > 2) 3111 vattr.va_birthtime = ts[2]; 3112 if (nullflag) 3113 vattr.va_vaflags |= VA_UTIMES_NULL; 3114 #ifdef MAC 3115 error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime, 3116 vattr.va_mtime); 3117 #endif 3118 if (error == 0) 3119 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3120 VOP_UNLOCK(vp, 0); 3121 vn_finished_write(mp); 3122 return (error); 3123 } 3124 3125 /* 3126 * Set the access and modification times of a file. 3127 */ 3128 #ifndef _SYS_SYSPROTO_H_ 3129 struct utimes_args { 3130 char *path; 3131 struct timeval *tptr; 3132 }; 3133 #endif 3134 int 3135 sys_utimes(td, uap) 3136 struct thread *td; 3137 register struct utimes_args /* { 3138 char *path; 3139 struct timeval *tptr; 3140 } */ *uap; 3141 { 3142 3143 return (kern_utimes(td, uap->path, UIO_USERSPACE, uap->tptr, 3144 UIO_USERSPACE)); 3145 } 3146 3147 #ifndef _SYS_SYSPROTO_H_ 3148 struct futimesat_args { 3149 int fd; 3150 const char * path; 3151 const struct timeval * times; 3152 }; 3153 #endif 3154 int 3155 sys_futimesat(struct thread *td, struct futimesat_args *uap) 3156 { 3157 3158 return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE, 3159 uap->times, UIO_USERSPACE)); 3160 } 3161 3162 int 3163 kern_utimes(struct thread *td, char *path, enum uio_seg pathseg, 3164 struct timeval *tptr, enum uio_seg tptrseg) 3165 { 3166 3167 return (kern_utimesat(td, AT_FDCWD, path, pathseg, tptr, tptrseg)); 3168 } 3169 3170 int 3171 kern_utimesat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 3172 struct timeval *tptr, enum uio_seg tptrseg) 3173 { 3174 struct nameidata nd; 3175 struct timespec ts[2]; 3176 cap_rights_t rights; 3177 int error; 3178 3179 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3180 return (error); 3181 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 3182 cap_rights_init(&rights, CAP_FUTIMES), td); 3183 3184 if ((error = namei(&nd)) != 0) 3185 return (error); 3186 NDFREE(&nd, NDF_ONLY_PNBUF); 3187 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3188 vrele(nd.ni_vp); 3189 return (error); 3190 } 3191 3192 /* 3193 * Set the access and modification times of a file. 3194 */ 3195 #ifndef _SYS_SYSPROTO_H_ 3196 struct lutimes_args { 3197 char *path; 3198 struct timeval *tptr; 3199 }; 3200 #endif 3201 int 3202 sys_lutimes(td, uap) 3203 struct thread *td; 3204 register struct lutimes_args /* { 3205 char *path; 3206 struct timeval *tptr; 3207 } */ *uap; 3208 { 3209 3210 return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr, 3211 UIO_USERSPACE)); 3212 } 3213 3214 int 3215 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg, 3216 struct timeval *tptr, enum uio_seg tptrseg) 3217 { 3218 struct timespec ts[2]; 3219 struct nameidata nd; 3220 int error; 3221 3222 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3223 return (error); 3224 NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, pathseg, path, td); 3225 if ((error = namei(&nd)) != 0) 3226 return (error); 3227 NDFREE(&nd, NDF_ONLY_PNBUF); 3228 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3229 vrele(nd.ni_vp); 3230 return (error); 3231 } 3232 3233 /* 3234 * Set the access and modification times of a file. 3235 */ 3236 #ifndef _SYS_SYSPROTO_H_ 3237 struct futimes_args { 3238 int fd; 3239 struct timeval *tptr; 3240 }; 3241 #endif 3242 int 3243 sys_futimes(td, uap) 3244 struct thread *td; 3245 register struct futimes_args /* { 3246 int fd; 3247 struct timeval *tptr; 3248 } */ *uap; 3249 { 3250 3251 return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE)); 3252 } 3253 3254 int 3255 kern_futimes(struct thread *td, int fd, struct timeval *tptr, 3256 enum uio_seg tptrseg) 3257 { 3258 struct timespec ts[2]; 3259 struct file *fp; 3260 cap_rights_t rights; 3261 int error; 3262 3263 AUDIT_ARG_FD(fd); 3264 error = getutimes(tptr, tptrseg, ts); 3265 if (error != 0) 3266 return (error); 3267 error = getvnode(td->td_proc->p_fd, fd, 3268 cap_rights_init(&rights, CAP_FUTIMES), &fp); 3269 if (error != 0) 3270 return (error); 3271 #ifdef AUDIT 3272 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3273 AUDIT_ARG_VNODE1(fp->f_vnode); 3274 VOP_UNLOCK(fp->f_vnode, 0); 3275 #endif 3276 error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL); 3277 fdrop(fp, td); 3278 return (error); 3279 } 3280 3281 /* 3282 * Truncate a file given its path name. 3283 */ 3284 #ifndef _SYS_SYSPROTO_H_ 3285 struct truncate_args { 3286 char *path; 3287 int pad; 3288 off_t length; 3289 }; 3290 #endif 3291 int 3292 sys_truncate(td, uap) 3293 struct thread *td; 3294 register struct truncate_args /* { 3295 char *path; 3296 int pad; 3297 off_t length; 3298 } */ *uap; 3299 { 3300 3301 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3302 } 3303 3304 int 3305 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length) 3306 { 3307 struct mount *mp; 3308 struct vnode *vp; 3309 void *rl_cookie; 3310 struct vattr vattr; 3311 struct nameidata nd; 3312 int error; 3313 3314 if (length < 0) 3315 return(EINVAL); 3316 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, td); 3317 if ((error = namei(&nd)) != 0) 3318 return (error); 3319 vp = nd.ni_vp; 3320 rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); 3321 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { 3322 vn_rangelock_unlock(vp, rl_cookie); 3323 vrele(vp); 3324 return (error); 3325 } 3326 NDFREE(&nd, NDF_ONLY_PNBUF); 3327 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3328 if (vp->v_type == VDIR) 3329 error = EISDIR; 3330 #ifdef MAC 3331 else if ((error = mac_vnode_check_write(td->td_ucred, NOCRED, vp))) { 3332 } 3333 #endif 3334 else if ((error = vn_writechk(vp)) == 0 && 3335 (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) { 3336 VATTR_NULL(&vattr); 3337 vattr.va_size = length; 3338 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3339 } 3340 VOP_UNLOCK(vp, 0); 3341 vn_finished_write(mp); 3342 vn_rangelock_unlock(vp, rl_cookie); 3343 vrele(vp); 3344 return (error); 3345 } 3346 3347 #if defined(COMPAT_43) 3348 /* 3349 * Truncate a file given its path name. 3350 */ 3351 #ifndef _SYS_SYSPROTO_H_ 3352 struct otruncate_args { 3353 char *path; 3354 long length; 3355 }; 3356 #endif 3357 int 3358 otruncate(td, uap) 3359 struct thread *td; 3360 register struct otruncate_args /* { 3361 char *path; 3362 long length; 3363 } */ *uap; 3364 { 3365 struct truncate_args /* { 3366 char *path; 3367 int pad; 3368 off_t length; 3369 } */ nuap; 3370 3371 nuap.path = uap->path; 3372 nuap.length = uap->length; 3373 return (sys_truncate(td, &nuap)); 3374 } 3375 #endif /* COMPAT_43 */ 3376 3377 /* Versions with the pad argument */ 3378 int 3379 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap) 3380 { 3381 struct truncate_args ouap; 3382 3383 ouap.path = uap->path; 3384 ouap.length = uap->length; 3385 return (sys_truncate(td, &ouap)); 3386 } 3387 3388 int 3389 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap) 3390 { 3391 struct ftruncate_args ouap; 3392 3393 ouap.fd = uap->fd; 3394 ouap.length = uap->length; 3395 return (sys_ftruncate(td, &ouap)); 3396 } 3397 3398 /* 3399 * Sync an open file. 3400 */ 3401 #ifndef _SYS_SYSPROTO_H_ 3402 struct fsync_args { 3403 int fd; 3404 }; 3405 #endif 3406 int 3407 sys_fsync(td, uap) 3408 struct thread *td; 3409 struct fsync_args /* { 3410 int fd; 3411 } */ *uap; 3412 { 3413 struct vnode *vp; 3414 struct mount *mp; 3415 struct file *fp; 3416 cap_rights_t rights; 3417 int error, lock_flags; 3418 3419 AUDIT_ARG_FD(uap->fd); 3420 error = getvnode(td->td_proc->p_fd, uap->fd, 3421 cap_rights_init(&rights, CAP_FSYNC), &fp); 3422 if (error != 0) 3423 return (error); 3424 vp = fp->f_vnode; 3425 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 3426 if (error != 0) 3427 goto drop; 3428 if (MNT_SHARED_WRITES(mp) || 3429 ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) { 3430 lock_flags = LK_SHARED; 3431 } else { 3432 lock_flags = LK_EXCLUSIVE; 3433 } 3434 vn_lock(vp, lock_flags | LK_RETRY); 3435 AUDIT_ARG_VNODE1(vp); 3436 if (vp->v_object != NULL) { 3437 VM_OBJECT_WLOCK(vp->v_object); 3438 vm_object_page_clean(vp->v_object, 0, 0, 0); 3439 VM_OBJECT_WUNLOCK(vp->v_object); 3440 } 3441 error = VOP_FSYNC(vp, MNT_WAIT, td); 3442 3443 VOP_UNLOCK(vp, 0); 3444 vn_finished_write(mp); 3445 drop: 3446 fdrop(fp, td); 3447 return (error); 3448 } 3449 3450 /* 3451 * Rename files. Source and destination must either both be directories, or 3452 * both not be directories. If target is a directory, it must be empty. 3453 */ 3454 #ifndef _SYS_SYSPROTO_H_ 3455 struct rename_args { 3456 char *from; 3457 char *to; 3458 }; 3459 #endif 3460 int 3461 sys_rename(td, uap) 3462 struct thread *td; 3463 register struct rename_args /* { 3464 char *from; 3465 char *to; 3466 } */ *uap; 3467 { 3468 3469 return (kern_rename(td, uap->from, uap->to, UIO_USERSPACE)); 3470 } 3471 3472 #ifndef _SYS_SYSPROTO_H_ 3473 struct renameat_args { 3474 int oldfd; 3475 char *old; 3476 int newfd; 3477 char *new; 3478 }; 3479 #endif 3480 int 3481 sys_renameat(struct thread *td, struct renameat_args *uap) 3482 { 3483 3484 return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new, 3485 UIO_USERSPACE)); 3486 } 3487 3488 int 3489 kern_rename(struct thread *td, char *from, char *to, enum uio_seg pathseg) 3490 { 3491 3492 return (kern_renameat(td, AT_FDCWD, from, AT_FDCWD, to, pathseg)); 3493 } 3494 3495 int 3496 kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new, 3497 enum uio_seg pathseg) 3498 { 3499 struct mount *mp = NULL; 3500 struct vnode *tvp, *fvp, *tdvp; 3501 struct nameidata fromnd, tond; 3502 cap_rights_t rights; 3503 int error; 3504 3505 bwillwrite(); 3506 #ifdef MAC 3507 NDINIT_ATRIGHTS(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | 3508 AUDITVNODE1, pathseg, old, oldfd, 3509 cap_rights_init(&rights, CAP_RENAMEAT), td); 3510 #else 3511 NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | AUDITVNODE1, 3512 pathseg, old, oldfd, cap_rights_init(&rights, CAP_RENAMEAT), td); 3513 #endif 3514 3515 if ((error = namei(&fromnd)) != 0) 3516 return (error); 3517 #ifdef MAC 3518 error = mac_vnode_check_rename_from(td->td_ucred, fromnd.ni_dvp, 3519 fromnd.ni_vp, &fromnd.ni_cnd); 3520 VOP_UNLOCK(fromnd.ni_dvp, 0); 3521 if (fromnd.ni_dvp != fromnd.ni_vp) 3522 VOP_UNLOCK(fromnd.ni_vp, 0); 3523 #endif 3524 fvp = fromnd.ni_vp; 3525 if (error == 0) 3526 error = vn_start_write(fvp, &mp, V_WAIT | PCATCH); 3527 if (error != 0) { 3528 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3529 vrele(fromnd.ni_dvp); 3530 vrele(fvp); 3531 goto out1; 3532 } 3533 NDINIT_ATRIGHTS(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | 3534 SAVESTART | AUDITVNODE2, pathseg, new, newfd, 3535 cap_rights_init(&rights, CAP_LINKAT), td); 3536 if (fromnd.ni_vp->v_type == VDIR) 3537 tond.ni_cnd.cn_flags |= WILLBEDIR; 3538 if ((error = namei(&tond)) != 0) { 3539 /* Translate error code for rename("dir1", "dir2/."). */ 3540 if (error == EISDIR && fvp->v_type == VDIR) 3541 error = EINVAL; 3542 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3543 vrele(fromnd.ni_dvp); 3544 vrele(fvp); 3545 vn_finished_write(mp); 3546 goto out1; 3547 } 3548 tdvp = tond.ni_dvp; 3549 tvp = tond.ni_vp; 3550 if (tvp != NULL) { 3551 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3552 error = ENOTDIR; 3553 goto out; 3554 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3555 error = EISDIR; 3556 goto out; 3557 } 3558 #ifdef CAPABILITIES 3559 if (newfd != AT_FDCWD) { 3560 /* 3561 * If the target already exists we require CAP_UNLINKAT 3562 * from 'newfd'. 3563 */ 3564 error = cap_check(&tond.ni_filecaps.fc_rights, 3565 cap_rights_init(&rights, CAP_UNLINKAT)); 3566 if (error != 0) 3567 goto out; 3568 } 3569 #endif 3570 } 3571 if (fvp == tdvp) { 3572 error = EINVAL; 3573 goto out; 3574 } 3575 /* 3576 * If the source is the same as the destination (that is, if they 3577 * are links to the same vnode), then there is nothing to do. 3578 */ 3579 if (fvp == tvp) 3580 error = -1; 3581 #ifdef MAC 3582 else 3583 error = mac_vnode_check_rename_to(td->td_ucred, tdvp, 3584 tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd); 3585 #endif 3586 out: 3587 if (error == 0) { 3588 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3589 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); 3590 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3591 NDFREE(&tond, NDF_ONLY_PNBUF); 3592 } else { 3593 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3594 NDFREE(&tond, NDF_ONLY_PNBUF); 3595 if (tvp != NULL) 3596 vput(tvp); 3597 if (tdvp == tvp) 3598 vrele(tdvp); 3599 else 3600 vput(tdvp); 3601 vrele(fromnd.ni_dvp); 3602 vrele(fvp); 3603 } 3604 vrele(tond.ni_startdir); 3605 vn_finished_write(mp); 3606 out1: 3607 if (fromnd.ni_startdir) 3608 vrele(fromnd.ni_startdir); 3609 if (error == -1) 3610 return (0); 3611 return (error); 3612 } 3613 3614 /* 3615 * Make a directory file. 3616 */ 3617 #ifndef _SYS_SYSPROTO_H_ 3618 struct mkdir_args { 3619 char *path; 3620 int mode; 3621 }; 3622 #endif 3623 int 3624 sys_mkdir(td, uap) 3625 struct thread *td; 3626 register struct mkdir_args /* { 3627 char *path; 3628 int mode; 3629 } */ *uap; 3630 { 3631 3632 return (kern_mkdir(td, uap->path, UIO_USERSPACE, uap->mode)); 3633 } 3634 3635 #ifndef _SYS_SYSPROTO_H_ 3636 struct mkdirat_args { 3637 int fd; 3638 char *path; 3639 mode_t mode; 3640 }; 3641 #endif 3642 int 3643 sys_mkdirat(struct thread *td, struct mkdirat_args *uap) 3644 { 3645 3646 return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode)); 3647 } 3648 3649 int 3650 kern_mkdir(struct thread *td, char *path, enum uio_seg segflg, int mode) 3651 { 3652 3653 return (kern_mkdirat(td, AT_FDCWD, path, segflg, mode)); 3654 } 3655 3656 int 3657 kern_mkdirat(struct thread *td, int fd, char *path, enum uio_seg segflg, 3658 int mode) 3659 { 3660 struct mount *mp; 3661 struct vnode *vp; 3662 struct vattr vattr; 3663 struct nameidata nd; 3664 cap_rights_t rights; 3665 int error; 3666 3667 AUDIT_ARG_MODE(mode); 3668 restart: 3669 bwillwrite(); 3670 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1, 3671 segflg, path, fd, cap_rights_init(&rights, CAP_MKDIRAT), td); 3672 nd.ni_cnd.cn_flags |= WILLBEDIR; 3673 if ((error = namei(&nd)) != 0) 3674 return (error); 3675 vp = nd.ni_vp; 3676 if (vp != NULL) { 3677 NDFREE(&nd, NDF_ONLY_PNBUF); 3678 /* 3679 * XXX namei called with LOCKPARENT but not LOCKLEAF has 3680 * the strange behaviour of leaving the vnode unlocked 3681 * if the target is the same vnode as the parent. 3682 */ 3683 if (vp == nd.ni_dvp) 3684 vrele(nd.ni_dvp); 3685 else 3686 vput(nd.ni_dvp); 3687 vrele(vp); 3688 return (EEXIST); 3689 } 3690 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3691 NDFREE(&nd, NDF_ONLY_PNBUF); 3692 vput(nd.ni_dvp); 3693 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3694 return (error); 3695 goto restart; 3696 } 3697 VATTR_NULL(&vattr); 3698 vattr.va_type = VDIR; 3699 vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask; 3700 #ifdef MAC 3701 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 3702 &vattr); 3703 if (error != 0) 3704 goto out; 3705 #endif 3706 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 3707 #ifdef MAC 3708 out: 3709 #endif 3710 NDFREE(&nd, NDF_ONLY_PNBUF); 3711 vput(nd.ni_dvp); 3712 if (error == 0) 3713 vput(nd.ni_vp); 3714 vn_finished_write(mp); 3715 return (error); 3716 } 3717 3718 /* 3719 * Remove a directory file. 3720 */ 3721 #ifndef _SYS_SYSPROTO_H_ 3722 struct rmdir_args { 3723 char *path; 3724 }; 3725 #endif 3726 int 3727 sys_rmdir(td, uap) 3728 struct thread *td; 3729 struct rmdir_args /* { 3730 char *path; 3731 } */ *uap; 3732 { 3733 3734 return (kern_rmdir(td, uap->path, UIO_USERSPACE)); 3735 } 3736 3737 int 3738 kern_rmdir(struct thread *td, char *path, enum uio_seg pathseg) 3739 { 3740 3741 return (kern_rmdirat(td, AT_FDCWD, path, pathseg)); 3742 } 3743 3744 int 3745 kern_rmdirat(struct thread *td, int fd, char *path, enum uio_seg pathseg) 3746 { 3747 struct mount *mp; 3748 struct vnode *vp; 3749 struct nameidata nd; 3750 cap_rights_t rights; 3751 int error; 3752 3753 restart: 3754 bwillwrite(); 3755 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 3756 pathseg, path, fd, cap_rights_init(&rights, CAP_UNLINKAT), td); 3757 if ((error = namei(&nd)) != 0) 3758 return (error); 3759 vp = nd.ni_vp; 3760 if (vp->v_type != VDIR) { 3761 error = ENOTDIR; 3762 goto out; 3763 } 3764 /* 3765 * No rmdir "." please. 3766 */ 3767 if (nd.ni_dvp == vp) { 3768 error = EINVAL; 3769 goto out; 3770 } 3771 /* 3772 * The root of a mounted filesystem cannot be deleted. 3773 */ 3774 if (vp->v_vflag & VV_ROOT) { 3775 error = EBUSY; 3776 goto out; 3777 } 3778 #ifdef MAC 3779 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 3780 &nd.ni_cnd); 3781 if (error != 0) 3782 goto out; 3783 #endif 3784 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3785 NDFREE(&nd, NDF_ONLY_PNBUF); 3786 vput(vp); 3787 if (nd.ni_dvp == vp) 3788 vrele(nd.ni_dvp); 3789 else 3790 vput(nd.ni_dvp); 3791 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3792 return (error); 3793 goto restart; 3794 } 3795 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 3796 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 3797 vn_finished_write(mp); 3798 out: 3799 NDFREE(&nd, NDF_ONLY_PNBUF); 3800 vput(vp); 3801 if (nd.ni_dvp == vp) 3802 vrele(nd.ni_dvp); 3803 else 3804 vput(nd.ni_dvp); 3805 return (error); 3806 } 3807 3808 #ifdef COMPAT_43 3809 /* 3810 * Read a block of directory entries in a filesystem independent format. 3811 */ 3812 #ifndef _SYS_SYSPROTO_H_ 3813 struct ogetdirentries_args { 3814 int fd; 3815 char *buf; 3816 u_int count; 3817 long *basep; 3818 }; 3819 #endif 3820 int 3821 ogetdirentries(struct thread *td, struct ogetdirentries_args *uap) 3822 { 3823 long loff; 3824 int error; 3825 3826 error = kern_ogetdirentries(td, uap, &loff); 3827 if (error == 0) 3828 error = copyout(&loff, uap->basep, sizeof(long)); 3829 return (error); 3830 } 3831 3832 int 3833 kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap, 3834 long *ploff) 3835 { 3836 struct vnode *vp; 3837 struct file *fp; 3838 struct uio auio, kuio; 3839 struct iovec aiov, kiov; 3840 struct dirent *dp, *edp; 3841 cap_rights_t rights; 3842 caddr_t dirbuf; 3843 int error, eofflag, readcnt; 3844 long loff; 3845 off_t foffset; 3846 3847 /* XXX arbitrary sanity limit on `count'. */ 3848 if (uap->count > 64 * 1024) 3849 return (EINVAL); 3850 error = getvnode(td->td_proc->p_fd, uap->fd, 3851 cap_rights_init(&rights, CAP_READ), &fp); 3852 if (error != 0) 3853 return (error); 3854 if ((fp->f_flag & FREAD) == 0) { 3855 fdrop(fp, td); 3856 return (EBADF); 3857 } 3858 vp = fp->f_vnode; 3859 foffset = foffset_lock(fp, 0); 3860 unionread: 3861 if (vp->v_type != VDIR) { 3862 foffset_unlock(fp, foffset, 0); 3863 fdrop(fp, td); 3864 return (EINVAL); 3865 } 3866 aiov.iov_base = uap->buf; 3867 aiov.iov_len = uap->count; 3868 auio.uio_iov = &aiov; 3869 auio.uio_iovcnt = 1; 3870 auio.uio_rw = UIO_READ; 3871 auio.uio_segflg = UIO_USERSPACE; 3872 auio.uio_td = td; 3873 auio.uio_resid = uap->count; 3874 vn_lock(vp, LK_SHARED | LK_RETRY); 3875 loff = auio.uio_offset = foffset; 3876 #ifdef MAC 3877 error = mac_vnode_check_readdir(td->td_ucred, vp); 3878 if (error != 0) { 3879 VOP_UNLOCK(vp, 0); 3880 foffset_unlock(fp, foffset, FOF_NOUPDATE); 3881 fdrop(fp, td); 3882 return (error); 3883 } 3884 #endif 3885 # if (BYTE_ORDER != LITTLE_ENDIAN) 3886 if (vp->v_mount->mnt_maxsymlinklen <= 0) { 3887 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, 3888 NULL, NULL); 3889 foffset = auio.uio_offset; 3890 } else 3891 # endif 3892 { 3893 kuio = auio; 3894 kuio.uio_iov = &kiov; 3895 kuio.uio_segflg = UIO_SYSSPACE; 3896 kiov.iov_len = uap->count; 3897 dirbuf = malloc(uap->count, M_TEMP, M_WAITOK); 3898 kiov.iov_base = dirbuf; 3899 error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag, 3900 NULL, NULL); 3901 foffset = kuio.uio_offset; 3902 if (error == 0) { 3903 readcnt = uap->count - kuio.uio_resid; 3904 edp = (struct dirent *)&dirbuf[readcnt]; 3905 for (dp = (struct dirent *)dirbuf; dp < edp; ) { 3906 # if (BYTE_ORDER == LITTLE_ENDIAN) 3907 /* 3908 * The expected low byte of 3909 * dp->d_namlen is our dp->d_type. 3910 * The high MBZ byte of dp->d_namlen 3911 * is our dp->d_namlen. 3912 */ 3913 dp->d_type = dp->d_namlen; 3914 dp->d_namlen = 0; 3915 # else 3916 /* 3917 * The dp->d_type is the high byte 3918 * of the expected dp->d_namlen, 3919 * so must be zero'ed. 3920 */ 3921 dp->d_type = 0; 3922 # endif 3923 if (dp->d_reclen > 0) { 3924 dp = (struct dirent *) 3925 ((char *)dp + dp->d_reclen); 3926 } else { 3927 error = EIO; 3928 break; 3929 } 3930 } 3931 if (dp >= edp) 3932 error = uiomove(dirbuf, readcnt, &auio); 3933 } 3934 free(dirbuf, M_TEMP); 3935 } 3936 if (error != 0) { 3937 VOP_UNLOCK(vp, 0); 3938 foffset_unlock(fp, foffset, 0); 3939 fdrop(fp, td); 3940 return (error); 3941 } 3942 if (uap->count == auio.uio_resid && 3943 (vp->v_vflag & VV_ROOT) && 3944 (vp->v_mount->mnt_flag & MNT_UNION)) { 3945 struct vnode *tvp = vp; 3946 vp = vp->v_mount->mnt_vnodecovered; 3947 VREF(vp); 3948 fp->f_vnode = vp; 3949 fp->f_data = vp; 3950 foffset = 0; 3951 vput(tvp); 3952 goto unionread; 3953 } 3954 VOP_UNLOCK(vp, 0); 3955 foffset_unlock(fp, foffset, 0); 3956 fdrop(fp, td); 3957 td->td_retval[0] = uap->count - auio.uio_resid; 3958 if (error == 0) 3959 *ploff = loff; 3960 return (error); 3961 } 3962 #endif /* COMPAT_43 */ 3963 3964 /* 3965 * Read a block of directory entries in a filesystem independent format. 3966 */ 3967 #ifndef _SYS_SYSPROTO_H_ 3968 struct getdirentries_args { 3969 int fd; 3970 char *buf; 3971 u_int count; 3972 long *basep; 3973 }; 3974 #endif 3975 int 3976 sys_getdirentries(td, uap) 3977 struct thread *td; 3978 register struct getdirentries_args /* { 3979 int fd; 3980 char *buf; 3981 u_int count; 3982 long *basep; 3983 } */ *uap; 3984 { 3985 long base; 3986 int error; 3987 3988 error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base, 3989 NULL, UIO_USERSPACE); 3990 if (error != 0) 3991 return (error); 3992 if (uap->basep != NULL) 3993 error = copyout(&base, uap->basep, sizeof(long)); 3994 return (error); 3995 } 3996 3997 int 3998 kern_getdirentries(struct thread *td, int fd, char *buf, u_int count, 3999 long *basep, ssize_t *residp, enum uio_seg bufseg) 4000 { 4001 struct vnode *vp; 4002 struct file *fp; 4003 struct uio auio; 4004 struct iovec aiov; 4005 cap_rights_t rights; 4006 long loff; 4007 int error, eofflag; 4008 off_t foffset; 4009 4010 AUDIT_ARG_FD(fd); 4011 if (count > IOSIZE_MAX) 4012 return (EINVAL); 4013 auio.uio_resid = count; 4014 error = getvnode(td->td_proc->p_fd, fd, 4015 cap_rights_init(&rights, CAP_READ), &fp); 4016 if (error != 0) 4017 return (error); 4018 if ((fp->f_flag & FREAD) == 0) { 4019 fdrop(fp, td); 4020 return (EBADF); 4021 } 4022 vp = fp->f_vnode; 4023 foffset = foffset_lock(fp, 0); 4024 unionread: 4025 if (vp->v_type != VDIR) { 4026 error = EINVAL; 4027 goto fail; 4028 } 4029 aiov.iov_base = buf; 4030 aiov.iov_len = count; 4031 auio.uio_iov = &aiov; 4032 auio.uio_iovcnt = 1; 4033 auio.uio_rw = UIO_READ; 4034 auio.uio_segflg = bufseg; 4035 auio.uio_td = td; 4036 vn_lock(vp, LK_SHARED | LK_RETRY); 4037 AUDIT_ARG_VNODE1(vp); 4038 loff = auio.uio_offset = foffset; 4039 #ifdef MAC 4040 error = mac_vnode_check_readdir(td->td_ucred, vp); 4041 if (error == 0) 4042 #endif 4043 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, 4044 NULL); 4045 foffset = auio.uio_offset; 4046 if (error != 0) { 4047 VOP_UNLOCK(vp, 0); 4048 goto fail; 4049 } 4050 if (count == auio.uio_resid && 4051 (vp->v_vflag & VV_ROOT) && 4052 (vp->v_mount->mnt_flag & MNT_UNION)) { 4053 struct vnode *tvp = vp; 4054 4055 vp = vp->v_mount->mnt_vnodecovered; 4056 VREF(vp); 4057 fp->f_vnode = vp; 4058 fp->f_data = vp; 4059 foffset = 0; 4060 vput(tvp); 4061 goto unionread; 4062 } 4063 VOP_UNLOCK(vp, 0); 4064 *basep = loff; 4065 if (residp != NULL) 4066 *residp = auio.uio_resid; 4067 td->td_retval[0] = count - auio.uio_resid; 4068 fail: 4069 foffset_unlock(fp, foffset, 0); 4070 fdrop(fp, td); 4071 return (error); 4072 } 4073 4074 #ifndef _SYS_SYSPROTO_H_ 4075 struct getdents_args { 4076 int fd; 4077 char *buf; 4078 size_t count; 4079 }; 4080 #endif 4081 int 4082 sys_getdents(td, uap) 4083 struct thread *td; 4084 register struct getdents_args /* { 4085 int fd; 4086 char *buf; 4087 u_int count; 4088 } */ *uap; 4089 { 4090 struct getdirentries_args ap; 4091 4092 ap.fd = uap->fd; 4093 ap.buf = uap->buf; 4094 ap.count = uap->count; 4095 ap.basep = NULL; 4096 return (sys_getdirentries(td, &ap)); 4097 } 4098 4099 /* 4100 * Set the mode mask for creation of filesystem nodes. 4101 */ 4102 #ifndef _SYS_SYSPROTO_H_ 4103 struct umask_args { 4104 int newmask; 4105 }; 4106 #endif 4107 int 4108 sys_umask(td, uap) 4109 struct thread *td; 4110 struct umask_args /* { 4111 int newmask; 4112 } */ *uap; 4113 { 4114 register struct filedesc *fdp; 4115 4116 FILEDESC_XLOCK(td->td_proc->p_fd); 4117 fdp = td->td_proc->p_fd; 4118 td->td_retval[0] = fdp->fd_cmask; 4119 fdp->fd_cmask = uap->newmask & ALLPERMS; 4120 FILEDESC_XUNLOCK(td->td_proc->p_fd); 4121 return (0); 4122 } 4123 4124 /* 4125 * Void all references to file by ripping underlying filesystem away from 4126 * vnode. 4127 */ 4128 #ifndef _SYS_SYSPROTO_H_ 4129 struct revoke_args { 4130 char *path; 4131 }; 4132 #endif 4133 int 4134 sys_revoke(td, uap) 4135 struct thread *td; 4136 register struct revoke_args /* { 4137 char *path; 4138 } */ *uap; 4139 { 4140 struct vnode *vp; 4141 struct vattr vattr; 4142 struct nameidata nd; 4143 int error; 4144 4145 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4146 uap->path, td); 4147 if ((error = namei(&nd)) != 0) 4148 return (error); 4149 vp = nd.ni_vp; 4150 NDFREE(&nd, NDF_ONLY_PNBUF); 4151 if (vp->v_type != VCHR || vp->v_rdev == NULL) { 4152 error = EINVAL; 4153 goto out; 4154 } 4155 #ifdef MAC 4156 error = mac_vnode_check_revoke(td->td_ucred, vp); 4157 if (error != 0) 4158 goto out; 4159 #endif 4160 error = VOP_GETATTR(vp, &vattr, td->td_ucred); 4161 if (error != 0) 4162 goto out; 4163 if (td->td_ucred->cr_uid != vattr.va_uid) { 4164 error = priv_check(td, PRIV_VFS_ADMIN); 4165 if (error != 0) 4166 goto out; 4167 } 4168 if (vcount(vp) > 1) 4169 VOP_REVOKE(vp, REVOKEALL); 4170 out: 4171 vput(vp); 4172 return (error); 4173 } 4174 4175 /* 4176 * Convert a user file descriptor to a kernel file entry and check that, if it 4177 * is a capability, the correct rights are present. A reference on the file 4178 * entry is held upon returning. 4179 */ 4180 int 4181 getvnode(struct filedesc *fdp, int fd, cap_rights_t *rightsp, struct file **fpp) 4182 { 4183 struct file *fp; 4184 int error; 4185 4186 error = fget_unlocked(fdp, fd, rightsp, 0, &fp, NULL); 4187 if (error != 0) 4188 return (error); 4189 4190 /* 4191 * The file could be not of the vnode type, or it may be not 4192 * yet fully initialized, in which case the f_vnode pointer 4193 * may be set, but f_ops is still badfileops. E.g., 4194 * devfs_open() transiently create such situation to 4195 * facilitate csw d_fdopen(). 4196 * 4197 * Dupfdopen() handling in kern_openat() installs the 4198 * half-baked file into the process descriptor table, allowing 4199 * other thread to dereference it. Guard against the race by 4200 * checking f_ops. 4201 */ 4202 if (fp->f_vnode == NULL || fp->f_ops == &badfileops) { 4203 fdrop(fp, curthread); 4204 return (EINVAL); 4205 } 4206 *fpp = fp; 4207 return (0); 4208 } 4209 4210 4211 /* 4212 * Get an (NFS) file handle. 4213 */ 4214 #ifndef _SYS_SYSPROTO_H_ 4215 struct lgetfh_args { 4216 char *fname; 4217 fhandle_t *fhp; 4218 }; 4219 #endif 4220 int 4221 sys_lgetfh(td, uap) 4222 struct thread *td; 4223 register struct lgetfh_args *uap; 4224 { 4225 struct nameidata nd; 4226 fhandle_t fh; 4227 register struct vnode *vp; 4228 int error; 4229 4230 error = priv_check(td, PRIV_VFS_GETFH); 4231 if (error != 0) 4232 return (error); 4233 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4234 uap->fname, td); 4235 error = namei(&nd); 4236 if (error != 0) 4237 return (error); 4238 NDFREE(&nd, NDF_ONLY_PNBUF); 4239 vp = nd.ni_vp; 4240 bzero(&fh, sizeof(fh)); 4241 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4242 error = VOP_VPTOFH(vp, &fh.fh_fid); 4243 vput(vp); 4244 if (error == 0) 4245 error = copyout(&fh, uap->fhp, sizeof (fh)); 4246 return (error); 4247 } 4248 4249 #ifndef _SYS_SYSPROTO_H_ 4250 struct getfh_args { 4251 char *fname; 4252 fhandle_t *fhp; 4253 }; 4254 #endif 4255 int 4256 sys_getfh(td, uap) 4257 struct thread *td; 4258 register struct getfh_args *uap; 4259 { 4260 struct nameidata nd; 4261 fhandle_t fh; 4262 register struct vnode *vp; 4263 int error; 4264 4265 error = priv_check(td, PRIV_VFS_GETFH); 4266 if (error != 0) 4267 return (error); 4268 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4269 uap->fname, td); 4270 error = namei(&nd); 4271 if (error != 0) 4272 return (error); 4273 NDFREE(&nd, NDF_ONLY_PNBUF); 4274 vp = nd.ni_vp; 4275 bzero(&fh, sizeof(fh)); 4276 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4277 error = VOP_VPTOFH(vp, &fh.fh_fid); 4278 vput(vp); 4279 if (error == 0) 4280 error = copyout(&fh, uap->fhp, sizeof (fh)); 4281 return (error); 4282 } 4283 4284 /* 4285 * syscall for the rpc.lockd to use to translate a NFS file handle into an 4286 * open descriptor. 4287 * 4288 * warning: do not remove the priv_check() call or this becomes one giant 4289 * security hole. 4290 */ 4291 #ifndef _SYS_SYSPROTO_H_ 4292 struct fhopen_args { 4293 const struct fhandle *u_fhp; 4294 int flags; 4295 }; 4296 #endif 4297 int 4298 sys_fhopen(td, uap) 4299 struct thread *td; 4300 struct fhopen_args /* { 4301 const struct fhandle *u_fhp; 4302 int flags; 4303 } */ *uap; 4304 { 4305 struct mount *mp; 4306 struct vnode *vp; 4307 struct fhandle fhp; 4308 struct file *fp; 4309 int fmode, error; 4310 int indx; 4311 4312 error = priv_check(td, PRIV_VFS_FHOPEN); 4313 if (error != 0) 4314 return (error); 4315 indx = -1; 4316 fmode = FFLAGS(uap->flags); 4317 /* why not allow a non-read/write open for our lockd? */ 4318 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 4319 return (EINVAL); 4320 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 4321 if (error != 0) 4322 return(error); 4323 /* find the mount point */ 4324 mp = vfs_busyfs(&fhp.fh_fsid); 4325 if (mp == NULL) 4326 return (ESTALE); 4327 /* now give me my vnode, it gets returned to me locked */ 4328 error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp); 4329 vfs_unbusy(mp); 4330 if (error != 0) 4331 return (error); 4332 4333 error = falloc_noinstall(td, &fp); 4334 if (error != 0) { 4335 vput(vp); 4336 return (error); 4337 } 4338 /* 4339 * An extra reference on `fp' has been held for us by 4340 * falloc_noinstall(). 4341 */ 4342 4343 #ifdef INVARIANTS 4344 td->td_dupfd = -1; 4345 #endif 4346 error = vn_open_vnode(vp, fmode, td->td_ucred, td, fp); 4347 if (error != 0) { 4348 KASSERT(fp->f_ops == &badfileops, 4349 ("VOP_OPEN in fhopen() set f_ops")); 4350 KASSERT(td->td_dupfd < 0, 4351 ("fhopen() encountered fdopen()")); 4352 4353 vput(vp); 4354 goto bad; 4355 } 4356 #ifdef INVARIANTS 4357 td->td_dupfd = 0; 4358 #endif 4359 fp->f_vnode = vp; 4360 fp->f_seqcount = 1; 4361 finit(fp, (fmode & FMASK) | (fp->f_flag & FHASLOCK), DTYPE_VNODE, vp, 4362 &vnops); 4363 VOP_UNLOCK(vp, 0); 4364 if ((fmode & O_TRUNC) != 0) { 4365 error = fo_truncate(fp, 0, td->td_ucred, td); 4366 if (error != 0) 4367 goto bad; 4368 } 4369 4370 error = finstall(td, fp, &indx, fmode, NULL); 4371 bad: 4372 fdrop(fp, td); 4373 td->td_retval[0] = indx; 4374 return (error); 4375 } 4376 4377 /* 4378 * Stat an (NFS) file handle. 4379 */ 4380 #ifndef _SYS_SYSPROTO_H_ 4381 struct fhstat_args { 4382 struct fhandle *u_fhp; 4383 struct stat *sb; 4384 }; 4385 #endif 4386 int 4387 sys_fhstat(td, uap) 4388 struct thread *td; 4389 register struct fhstat_args /* { 4390 struct fhandle *u_fhp; 4391 struct stat *sb; 4392 } */ *uap; 4393 { 4394 struct stat sb; 4395 struct fhandle fh; 4396 int error; 4397 4398 error = copyin(uap->u_fhp, &fh, sizeof(fh)); 4399 if (error != 0) 4400 return (error); 4401 error = kern_fhstat(td, fh, &sb); 4402 if (error == 0) 4403 error = copyout(&sb, uap->sb, sizeof(sb)); 4404 return (error); 4405 } 4406 4407 int 4408 kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb) 4409 { 4410 struct mount *mp; 4411 struct vnode *vp; 4412 int error; 4413 4414 error = priv_check(td, PRIV_VFS_FHSTAT); 4415 if (error != 0) 4416 return (error); 4417 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4418 return (ESTALE); 4419 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4420 vfs_unbusy(mp); 4421 if (error != 0) 4422 return (error); 4423 error = vn_stat(vp, sb, td->td_ucred, NOCRED, td); 4424 vput(vp); 4425 return (error); 4426 } 4427 4428 /* 4429 * Implement fstatfs() for (NFS) file handles. 4430 */ 4431 #ifndef _SYS_SYSPROTO_H_ 4432 struct fhstatfs_args { 4433 struct fhandle *u_fhp; 4434 struct statfs *buf; 4435 }; 4436 #endif 4437 int 4438 sys_fhstatfs(td, uap) 4439 struct thread *td; 4440 struct fhstatfs_args /* { 4441 struct fhandle *u_fhp; 4442 struct statfs *buf; 4443 } */ *uap; 4444 { 4445 struct statfs sf; 4446 fhandle_t fh; 4447 int error; 4448 4449 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4450 if (error != 0) 4451 return (error); 4452 error = kern_fhstatfs(td, fh, &sf); 4453 if (error != 0) 4454 return (error); 4455 return (copyout(&sf, uap->buf, sizeof(sf))); 4456 } 4457 4458 int 4459 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf) 4460 { 4461 struct statfs *sp; 4462 struct mount *mp; 4463 struct vnode *vp; 4464 int error; 4465 4466 error = priv_check(td, PRIV_VFS_FHSTATFS); 4467 if (error != 0) 4468 return (error); 4469 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4470 return (ESTALE); 4471 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4472 if (error != 0) { 4473 vfs_unbusy(mp); 4474 return (error); 4475 } 4476 vput(vp); 4477 error = prison_canseemount(td->td_ucred, mp); 4478 if (error != 0) 4479 goto out; 4480 #ifdef MAC 4481 error = mac_mount_check_stat(td->td_ucred, mp); 4482 if (error != 0) 4483 goto out; 4484 #endif 4485 /* 4486 * Set these in case the underlying filesystem fails to do so. 4487 */ 4488 sp = &mp->mnt_stat; 4489 sp->f_version = STATFS_VERSION; 4490 sp->f_namemax = NAME_MAX; 4491 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 4492 error = VFS_STATFS(mp, sp); 4493 if (error == 0) 4494 *buf = *sp; 4495 out: 4496 vfs_unbusy(mp); 4497 return (error); 4498 } 4499 4500 int 4501 kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len) 4502 { 4503 struct file *fp; 4504 struct mount *mp; 4505 struct vnode *vp; 4506 cap_rights_t rights; 4507 off_t olen, ooffset; 4508 int error; 4509 4510 fp = NULL; 4511 error = fget(td, fd, cap_rights_init(&rights, CAP_WRITE), &fp); 4512 if (error != 0) 4513 goto out; 4514 4515 switch (fp->f_type) { 4516 case DTYPE_VNODE: 4517 break; 4518 case DTYPE_PIPE: 4519 case DTYPE_FIFO: 4520 error = ESPIPE; 4521 goto out; 4522 default: 4523 error = ENODEV; 4524 goto out; 4525 } 4526 if ((fp->f_flag & FWRITE) == 0) { 4527 error = EBADF; 4528 goto out; 4529 } 4530 vp = fp->f_vnode; 4531 if (vp->v_type != VREG) { 4532 error = ENODEV; 4533 goto out; 4534 } 4535 if (offset < 0 || len <= 0) { 4536 error = EINVAL; 4537 goto out; 4538 } 4539 /* Check for wrap. */ 4540 if (offset > OFF_MAX - len) { 4541 error = EFBIG; 4542 goto out; 4543 } 4544 4545 /* Allocating blocks may take a long time, so iterate. */ 4546 for (;;) { 4547 olen = len; 4548 ooffset = offset; 4549 4550 bwillwrite(); 4551 mp = NULL; 4552 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 4553 if (error != 0) 4554 break; 4555 error = vn_lock(vp, LK_EXCLUSIVE); 4556 if (error != 0) { 4557 vn_finished_write(mp); 4558 break; 4559 } 4560 #ifdef MAC 4561 error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp); 4562 if (error == 0) 4563 #endif 4564 error = VOP_ALLOCATE(vp, &offset, &len); 4565 VOP_UNLOCK(vp, 0); 4566 vn_finished_write(mp); 4567 4568 if (olen + ooffset != offset + len) { 4569 panic("offset + len changed from %jx/%jx to %jx/%jx", 4570 ooffset, olen, offset, len); 4571 } 4572 if (error != 0 || len == 0) 4573 break; 4574 KASSERT(olen > len, ("Iteration did not make progress?")); 4575 maybe_yield(); 4576 } 4577 out: 4578 if (fp != NULL) 4579 fdrop(fp, td); 4580 return (error); 4581 } 4582 4583 int 4584 sys_posix_fallocate(struct thread *td, struct posix_fallocate_args *uap) 4585 { 4586 4587 td->td_retval[0] = kern_posix_fallocate(td, uap->fd, uap->offset, 4588 uap->len); 4589 return (0); 4590 } 4591 4592 /* 4593 * Unlike madvise(2), we do not make a best effort to remember every 4594 * possible caching hint. Instead, we remember the last setting with 4595 * the exception that we will allow POSIX_FADV_NORMAL to adjust the 4596 * region of any current setting. 4597 */ 4598 int 4599 kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len, 4600 int advice) 4601 { 4602 struct fadvise_info *fa, *new; 4603 struct file *fp; 4604 struct vnode *vp; 4605 cap_rights_t rights; 4606 off_t end; 4607 int error; 4608 4609 if (offset < 0 || len < 0 || offset > OFF_MAX - len) 4610 return (EINVAL); 4611 switch (advice) { 4612 case POSIX_FADV_SEQUENTIAL: 4613 case POSIX_FADV_RANDOM: 4614 case POSIX_FADV_NOREUSE: 4615 new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK); 4616 break; 4617 case POSIX_FADV_NORMAL: 4618 case POSIX_FADV_WILLNEED: 4619 case POSIX_FADV_DONTNEED: 4620 new = NULL; 4621 break; 4622 default: 4623 return (EINVAL); 4624 } 4625 /* XXX: CAP_POSIX_FADVISE? */ 4626 error = fget(td, fd, cap_rights_init(&rights), &fp); 4627 if (error != 0) 4628 goto out; 4629 4630 switch (fp->f_type) { 4631 case DTYPE_VNODE: 4632 break; 4633 case DTYPE_PIPE: 4634 case DTYPE_FIFO: 4635 error = ESPIPE; 4636 goto out; 4637 default: 4638 error = ENODEV; 4639 goto out; 4640 } 4641 vp = fp->f_vnode; 4642 if (vp->v_type != VREG) { 4643 error = ENODEV; 4644 goto out; 4645 } 4646 if (len == 0) 4647 end = OFF_MAX; 4648 else 4649 end = offset + len - 1; 4650 switch (advice) { 4651 case POSIX_FADV_SEQUENTIAL: 4652 case POSIX_FADV_RANDOM: 4653 case POSIX_FADV_NOREUSE: 4654 /* 4655 * Try to merge any existing non-standard region with 4656 * this new region if possible, otherwise create a new 4657 * non-standard region for this request. 4658 */ 4659 mtx_pool_lock(mtxpool_sleep, fp); 4660 fa = fp->f_advice; 4661 if (fa != NULL && fa->fa_advice == advice && 4662 ((fa->fa_start <= end && fa->fa_end >= offset) || 4663 (end != OFF_MAX && fa->fa_start == end + 1) || 4664 (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) { 4665 if (offset < fa->fa_start) 4666 fa->fa_start = offset; 4667 if (end > fa->fa_end) 4668 fa->fa_end = end; 4669 } else { 4670 new->fa_advice = advice; 4671 new->fa_start = offset; 4672 new->fa_end = end; 4673 new->fa_prevstart = 0; 4674 new->fa_prevend = 0; 4675 fp->f_advice = new; 4676 new = fa; 4677 } 4678 mtx_pool_unlock(mtxpool_sleep, fp); 4679 break; 4680 case POSIX_FADV_NORMAL: 4681 /* 4682 * If a the "normal" region overlaps with an existing 4683 * non-standard region, trim or remove the 4684 * non-standard region. 4685 */ 4686 mtx_pool_lock(mtxpool_sleep, fp); 4687 fa = fp->f_advice; 4688 if (fa != NULL) { 4689 if (offset <= fa->fa_start && end >= fa->fa_end) { 4690 new = fa; 4691 fp->f_advice = NULL; 4692 } else if (offset <= fa->fa_start && 4693 end >= fa->fa_start) 4694 fa->fa_start = end + 1; 4695 else if (offset <= fa->fa_end && end >= fa->fa_end) 4696 fa->fa_end = offset - 1; 4697 else if (offset >= fa->fa_start && end <= fa->fa_end) { 4698 /* 4699 * If the "normal" region is a middle 4700 * portion of the existing 4701 * non-standard region, just remove 4702 * the whole thing rather than picking 4703 * one side or the other to 4704 * preserve. 4705 */ 4706 new = fa; 4707 fp->f_advice = NULL; 4708 } 4709 } 4710 mtx_pool_unlock(mtxpool_sleep, fp); 4711 break; 4712 case POSIX_FADV_WILLNEED: 4713 case POSIX_FADV_DONTNEED: 4714 error = VOP_ADVISE(vp, offset, end, advice); 4715 break; 4716 } 4717 out: 4718 if (fp != NULL) 4719 fdrop(fp, td); 4720 free(new, M_FADVISE); 4721 return (error); 4722 } 4723 4724 int 4725 sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap) 4726 { 4727 4728 return (kern_posix_fadvise(td, uap->fd, uap->offset, uap->len, 4729 uap->advice)); 4730 } 4731