1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 37 */ 38 39 #include <sys/cdefs.h> 40 __FBSDID("$FreeBSD$"); 41 42 #include "opt_capsicum.h" 43 #include "opt_ktrace.h" 44 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 #include <sys/bio.h> 48 #include <sys/buf.h> 49 #include <sys/capsicum.h> 50 #include <sys/disk.h> 51 #include <sys/sysent.h> 52 #include <sys/malloc.h> 53 #include <sys/mount.h> 54 #include <sys/mutex.h> 55 #include <sys/sysproto.h> 56 #include <sys/namei.h> 57 #include <sys/filedesc.h> 58 #include <sys/kernel.h> 59 #include <sys/fcntl.h> 60 #include <sys/file.h> 61 #include <sys/filio.h> 62 #include <sys/limits.h> 63 #include <sys/linker.h> 64 #include <sys/rwlock.h> 65 #include <sys/sdt.h> 66 #include <sys/stat.h> 67 #include <sys/sx.h> 68 #include <sys/unistd.h> 69 #include <sys/vnode.h> 70 #include <sys/priv.h> 71 #include <sys/proc.h> 72 #include <sys/dirent.h> 73 #include <sys/jail.h> 74 #include <sys/syscallsubr.h> 75 #include <sys/sysctl.h> 76 #ifdef KTRACE 77 #include <sys/ktrace.h> 78 #endif 79 80 #include <machine/stdarg.h> 81 82 #include <security/audit/audit.h> 83 #include <security/mac/mac_framework.h> 84 85 #include <vm/vm.h> 86 #include <vm/vm_object.h> 87 #include <vm/vm_page.h> 88 #include <vm/uma.h> 89 90 #include <ufs/ufs/quota.h> 91 92 MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information"); 93 94 SDT_PROVIDER_DEFINE(vfs); 95 SDT_PROBE_DEFINE2(vfs, , stat, mode, "char *", "int"); 96 SDT_PROBE_DEFINE2(vfs, , stat, reg, "char *", "int"); 97 98 static int kern_chflagsat(struct thread *td, int fd, const char *path, 99 enum uio_seg pathseg, u_long flags, int atflag); 100 static int setfflags(struct thread *td, struct vnode *, u_long); 101 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *); 102 static int getutimens(const struct timespec *, enum uio_seg, 103 struct timespec *, int *); 104 static int setutimes(struct thread *td, struct vnode *, 105 const struct timespec *, int, int); 106 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 107 struct thread *td); 108 109 /* 110 * Sync each mounted filesystem. 111 */ 112 #ifndef _SYS_SYSPROTO_H_ 113 struct sync_args { 114 int dummy; 115 }; 116 #endif 117 /* ARGSUSED */ 118 int 119 sys_sync(struct thread *td, struct sync_args *uap) 120 { 121 struct mount *mp, *nmp; 122 int save; 123 124 mtx_lock(&mountlist_mtx); 125 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 126 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 127 nmp = TAILQ_NEXT(mp, mnt_list); 128 continue; 129 } 130 if ((mp->mnt_flag & MNT_RDONLY) == 0 && 131 vn_start_write(NULL, &mp, V_NOWAIT) == 0) { 132 save = curthread_pflags_set(TDP_SYNCIO); 133 vfs_msync(mp, MNT_NOWAIT); 134 VFS_SYNC(mp, MNT_NOWAIT); 135 curthread_pflags_restore(save); 136 vn_finished_write(mp); 137 } 138 mtx_lock(&mountlist_mtx); 139 nmp = TAILQ_NEXT(mp, mnt_list); 140 vfs_unbusy(mp); 141 } 142 mtx_unlock(&mountlist_mtx); 143 return (0); 144 } 145 146 /* 147 * Change filesystem quotas. 148 */ 149 #ifndef _SYS_SYSPROTO_H_ 150 struct quotactl_args { 151 char *path; 152 int cmd; 153 int uid; 154 caddr_t arg; 155 }; 156 #endif 157 int 158 sys_quotactl(struct thread *td, struct quotactl_args *uap) 159 { 160 struct mount *mp; 161 struct nameidata nd; 162 int error; 163 164 AUDIT_ARG_CMD(uap->cmd); 165 AUDIT_ARG_UID(uap->uid); 166 if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS)) 167 return (EPERM); 168 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 169 uap->path, td); 170 if ((error = namei(&nd)) != 0) 171 return (error); 172 NDFREE(&nd, NDF_ONLY_PNBUF); 173 mp = nd.ni_vp->v_mount; 174 vfs_ref(mp); 175 vput(nd.ni_vp); 176 error = vfs_busy(mp, 0); 177 vfs_rel(mp); 178 if (error != 0) 179 return (error); 180 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg); 181 182 /* 183 * Since quota on operation typically needs to open quota 184 * file, the Q_QUOTAON handler needs to unbusy the mount point 185 * before calling into namei. Otherwise, unmount might be 186 * started between two vfs_busy() invocations (first is our, 187 * second is from mount point cross-walk code in lookup()), 188 * causing deadlock. 189 * 190 * Require that Q_QUOTAON handles the vfs_busy() reference on 191 * its own, always returning with ubusied mount point. 192 */ 193 if ((uap->cmd >> SUBCMDSHIFT) != Q_QUOTAON && 194 (uap->cmd >> SUBCMDSHIFT) != Q_QUOTAOFF) 195 vfs_unbusy(mp); 196 return (error); 197 } 198 199 /* 200 * Used by statfs conversion routines to scale the block size up if 201 * necessary so that all of the block counts are <= 'max_size'. Note 202 * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero 203 * value of 'n'. 204 */ 205 void 206 statfs_scale_blocks(struct statfs *sf, long max_size) 207 { 208 uint64_t count; 209 int shift; 210 211 KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__)); 212 213 /* 214 * Attempt to scale the block counts to give a more accurate 215 * overview to userland of the ratio of free space to used 216 * space. To do this, find the largest block count and compute 217 * a divisor that lets it fit into a signed integer <= max_size. 218 */ 219 if (sf->f_bavail < 0) 220 count = -sf->f_bavail; 221 else 222 count = sf->f_bavail; 223 count = MAX(sf->f_blocks, MAX(sf->f_bfree, count)); 224 if (count <= max_size) 225 return; 226 227 count >>= flsl(max_size); 228 shift = 0; 229 while (count > 0) { 230 shift++; 231 count >>=1; 232 } 233 234 sf->f_bsize <<= shift; 235 sf->f_blocks >>= shift; 236 sf->f_bfree >>= shift; 237 sf->f_bavail >>= shift; 238 } 239 240 static int 241 kern_do_statfs(struct thread *td, struct mount *mp, struct statfs *buf) 242 { 243 struct statfs *sp; 244 int error; 245 246 if (mp == NULL) 247 return (EBADF); 248 error = vfs_busy(mp, 0); 249 vfs_rel(mp); 250 if (error != 0) 251 return (error); 252 #ifdef MAC 253 error = mac_mount_check_stat(td->td_ucred, mp); 254 if (error != 0) 255 goto out; 256 #endif 257 /* 258 * Set these in case the underlying filesystem fails to do so. 259 */ 260 sp = &mp->mnt_stat; 261 sp->f_version = STATFS_VERSION; 262 sp->f_namemax = NAME_MAX; 263 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 264 error = VFS_STATFS(mp, sp); 265 if (error != 0) 266 goto out; 267 *buf = *sp; 268 if (priv_check(td, PRIV_VFS_GENERATION)) { 269 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 270 prison_enforce_statfs(td->td_ucred, mp, buf); 271 } 272 out: 273 vfs_unbusy(mp); 274 return (error); 275 } 276 277 /* 278 * Get filesystem statistics. 279 */ 280 #ifndef _SYS_SYSPROTO_H_ 281 struct statfs_args { 282 char *path; 283 struct statfs *buf; 284 }; 285 #endif 286 int 287 sys_statfs(struct thread *td, struct statfs_args *uap) 288 { 289 struct statfs *sfp; 290 int error; 291 292 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 293 error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); 294 if (error == 0) 295 error = copyout(sfp, uap->buf, sizeof(struct statfs)); 296 free(sfp, M_STATFS); 297 return (error); 298 } 299 300 int 301 kern_statfs(struct thread *td, char *path, enum uio_seg pathseg, 302 struct statfs *buf) 303 { 304 struct mount *mp; 305 struct nameidata nd; 306 int error; 307 308 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 309 pathseg, path, td); 310 error = namei(&nd); 311 if (error != 0) 312 return (error); 313 mp = nd.ni_vp->v_mount; 314 vfs_ref(mp); 315 NDFREE(&nd, NDF_ONLY_PNBUF); 316 vput(nd.ni_vp); 317 return (kern_do_statfs(td, mp, buf)); 318 } 319 320 /* 321 * Get filesystem statistics. 322 */ 323 #ifndef _SYS_SYSPROTO_H_ 324 struct fstatfs_args { 325 int fd; 326 struct statfs *buf; 327 }; 328 #endif 329 int 330 sys_fstatfs(struct thread *td, struct fstatfs_args *uap) 331 { 332 struct statfs *sfp; 333 int error; 334 335 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 336 error = kern_fstatfs(td, uap->fd, sfp); 337 if (error == 0) 338 error = copyout(sfp, uap->buf, sizeof(struct statfs)); 339 free(sfp, M_STATFS); 340 return (error); 341 } 342 343 int 344 kern_fstatfs(struct thread *td, int fd, struct statfs *buf) 345 { 346 struct file *fp; 347 struct mount *mp; 348 struct vnode *vp; 349 int error; 350 351 AUDIT_ARG_FD(fd); 352 error = getvnode(td, fd, &cap_fstatfs_rights, &fp); 353 if (error != 0) 354 return (error); 355 vp = fp->f_vnode; 356 vn_lock(vp, LK_SHARED | LK_RETRY); 357 #ifdef AUDIT 358 AUDIT_ARG_VNODE1(vp); 359 #endif 360 mp = vp->v_mount; 361 if (mp != NULL) 362 vfs_ref(mp); 363 VOP_UNLOCK(vp, 0); 364 fdrop(fp, td); 365 return (kern_do_statfs(td, mp, buf)); 366 } 367 368 /* 369 * Get statistics on all filesystems. 370 */ 371 #ifndef _SYS_SYSPROTO_H_ 372 struct getfsstat_args { 373 struct statfs *buf; 374 long bufsize; 375 int mode; 376 }; 377 #endif 378 int 379 sys_getfsstat(struct thread *td, struct getfsstat_args *uap) 380 { 381 size_t count; 382 int error; 383 384 if (uap->bufsize < 0 || uap->bufsize > SIZE_MAX) 385 return (EINVAL); 386 error = kern_getfsstat(td, &uap->buf, uap->bufsize, &count, 387 UIO_USERSPACE, uap->mode); 388 if (error == 0) 389 td->td_retval[0] = count; 390 return (error); 391 } 392 393 /* 394 * If (bufsize > 0 && bufseg == UIO_SYSSPACE) 395 * The caller is responsible for freeing memory which will be allocated 396 * in '*buf'. 397 */ 398 int 399 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize, 400 size_t *countp, enum uio_seg bufseg, int mode) 401 { 402 struct mount *mp, *nmp; 403 struct statfs *sfsp, *sp, *sptmp, *tofree; 404 size_t count, maxcount; 405 int error; 406 407 switch (mode) { 408 case MNT_WAIT: 409 case MNT_NOWAIT: 410 break; 411 default: 412 if (bufseg == UIO_SYSSPACE) 413 *buf = NULL; 414 return (EINVAL); 415 } 416 restart: 417 maxcount = bufsize / sizeof(struct statfs); 418 if (bufsize == 0) { 419 sfsp = NULL; 420 tofree = NULL; 421 } else if (bufseg == UIO_USERSPACE) { 422 sfsp = *buf; 423 tofree = NULL; 424 } else /* if (bufseg == UIO_SYSSPACE) */ { 425 count = 0; 426 mtx_lock(&mountlist_mtx); 427 TAILQ_FOREACH(mp, &mountlist, mnt_list) { 428 count++; 429 } 430 mtx_unlock(&mountlist_mtx); 431 if (maxcount > count) 432 maxcount = count; 433 tofree = sfsp = *buf = malloc(maxcount * sizeof(struct statfs), 434 M_STATFS, M_WAITOK); 435 } 436 count = 0; 437 mtx_lock(&mountlist_mtx); 438 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 439 if (prison_canseemount(td->td_ucred, mp) != 0) { 440 nmp = TAILQ_NEXT(mp, mnt_list); 441 continue; 442 } 443 #ifdef MAC 444 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 445 nmp = TAILQ_NEXT(mp, mnt_list); 446 continue; 447 } 448 #endif 449 if (mode == MNT_WAIT) { 450 if (vfs_busy(mp, MBF_MNTLSTLOCK) != 0) { 451 /* 452 * If vfs_busy() failed, and MBF_NOWAIT 453 * wasn't passed, then the mp is gone. 454 * Furthermore, because of MBF_MNTLSTLOCK, 455 * the mountlist_mtx was dropped. We have 456 * no other choice than to start over. 457 */ 458 mtx_unlock(&mountlist_mtx); 459 free(tofree, M_STATFS); 460 goto restart; 461 } 462 } else { 463 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK) != 0) { 464 nmp = TAILQ_NEXT(mp, mnt_list); 465 continue; 466 } 467 } 468 if (sfsp != NULL && count < maxcount) { 469 sp = &mp->mnt_stat; 470 /* 471 * Set these in case the underlying filesystem 472 * fails to do so. 473 */ 474 sp->f_version = STATFS_VERSION; 475 sp->f_namemax = NAME_MAX; 476 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 477 /* 478 * If MNT_NOWAIT is specified, do not refresh 479 * the fsstat cache. 480 */ 481 if (mode != MNT_NOWAIT) { 482 error = VFS_STATFS(mp, sp); 483 if (error != 0) { 484 mtx_lock(&mountlist_mtx); 485 nmp = TAILQ_NEXT(mp, mnt_list); 486 vfs_unbusy(mp); 487 continue; 488 } 489 } 490 if (priv_check(td, PRIV_VFS_GENERATION)) { 491 sptmp = malloc(sizeof(struct statfs), M_STATFS, 492 M_WAITOK); 493 *sptmp = *sp; 494 sptmp->f_fsid.val[0] = sptmp->f_fsid.val[1] = 0; 495 prison_enforce_statfs(td->td_ucred, mp, sptmp); 496 sp = sptmp; 497 } else 498 sptmp = NULL; 499 if (bufseg == UIO_SYSSPACE) { 500 bcopy(sp, sfsp, sizeof(*sp)); 501 free(sptmp, M_STATFS); 502 } else /* if (bufseg == UIO_USERSPACE) */ { 503 error = copyout(sp, sfsp, sizeof(*sp)); 504 free(sptmp, M_STATFS); 505 if (error != 0) { 506 vfs_unbusy(mp); 507 return (error); 508 } 509 } 510 sfsp++; 511 } 512 count++; 513 mtx_lock(&mountlist_mtx); 514 nmp = TAILQ_NEXT(mp, mnt_list); 515 vfs_unbusy(mp); 516 } 517 mtx_unlock(&mountlist_mtx); 518 if (sfsp != NULL && count > maxcount) 519 *countp = maxcount; 520 else 521 *countp = count; 522 return (0); 523 } 524 525 #ifdef COMPAT_FREEBSD4 526 /* 527 * Get old format filesystem statistics. 528 */ 529 static void freebsd4_cvtstatfs(struct statfs *, struct ostatfs *); 530 531 #ifndef _SYS_SYSPROTO_H_ 532 struct freebsd4_statfs_args { 533 char *path; 534 struct ostatfs *buf; 535 }; 536 #endif 537 int 538 freebsd4_statfs(struct thread *td, struct freebsd4_statfs_args *uap) 539 { 540 struct ostatfs osb; 541 struct statfs *sfp; 542 int error; 543 544 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 545 error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); 546 if (error == 0) { 547 freebsd4_cvtstatfs(sfp, &osb); 548 error = copyout(&osb, uap->buf, sizeof(osb)); 549 } 550 free(sfp, M_STATFS); 551 return (error); 552 } 553 554 /* 555 * Get filesystem statistics. 556 */ 557 #ifndef _SYS_SYSPROTO_H_ 558 struct freebsd4_fstatfs_args { 559 int fd; 560 struct ostatfs *buf; 561 }; 562 #endif 563 int 564 freebsd4_fstatfs(struct thread *td, struct freebsd4_fstatfs_args *uap) 565 { 566 struct ostatfs osb; 567 struct statfs *sfp; 568 int error; 569 570 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 571 error = kern_fstatfs(td, uap->fd, sfp); 572 if (error == 0) { 573 freebsd4_cvtstatfs(sfp, &osb); 574 error = copyout(&osb, uap->buf, sizeof(osb)); 575 } 576 free(sfp, M_STATFS); 577 return (error); 578 } 579 580 /* 581 * Get statistics on all filesystems. 582 */ 583 #ifndef _SYS_SYSPROTO_H_ 584 struct freebsd4_getfsstat_args { 585 struct ostatfs *buf; 586 long bufsize; 587 int mode; 588 }; 589 #endif 590 int 591 freebsd4_getfsstat(struct thread *td, struct freebsd4_getfsstat_args *uap) 592 { 593 struct statfs *buf, *sp; 594 struct ostatfs osb; 595 size_t count, size; 596 int error; 597 598 if (uap->bufsize < 0) 599 return (EINVAL); 600 count = uap->bufsize / sizeof(struct ostatfs); 601 if (count > SIZE_MAX / sizeof(struct statfs)) 602 return (EINVAL); 603 size = count * sizeof(struct statfs); 604 error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, 605 uap->mode); 606 if (error == 0) 607 td->td_retval[0] = count; 608 if (size != 0) { 609 sp = buf; 610 while (count != 0 && error == 0) { 611 freebsd4_cvtstatfs(sp, &osb); 612 error = copyout(&osb, uap->buf, sizeof(osb)); 613 sp++; 614 uap->buf++; 615 count--; 616 } 617 free(buf, M_STATFS); 618 } 619 return (error); 620 } 621 622 /* 623 * Implement fstatfs() for (NFS) file handles. 624 */ 625 #ifndef _SYS_SYSPROTO_H_ 626 struct freebsd4_fhstatfs_args { 627 struct fhandle *u_fhp; 628 struct ostatfs *buf; 629 }; 630 #endif 631 int 632 freebsd4_fhstatfs(struct thread *td, struct freebsd4_fhstatfs_args *uap) 633 { 634 struct ostatfs osb; 635 struct statfs *sfp; 636 fhandle_t fh; 637 int error; 638 639 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 640 if (error != 0) 641 return (error); 642 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 643 error = kern_fhstatfs(td, fh, sfp); 644 if (error == 0) { 645 freebsd4_cvtstatfs(sfp, &osb); 646 error = copyout(&osb, uap->buf, sizeof(osb)); 647 } 648 free(sfp, M_STATFS); 649 return (error); 650 } 651 652 /* 653 * Convert a new format statfs structure to an old format statfs structure. 654 */ 655 static void 656 freebsd4_cvtstatfs(struct statfs *nsp, struct ostatfs *osp) 657 { 658 659 statfs_scale_blocks(nsp, LONG_MAX); 660 bzero(osp, sizeof(*osp)); 661 osp->f_bsize = nsp->f_bsize; 662 osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX); 663 osp->f_blocks = nsp->f_blocks; 664 osp->f_bfree = nsp->f_bfree; 665 osp->f_bavail = nsp->f_bavail; 666 osp->f_files = MIN(nsp->f_files, LONG_MAX); 667 osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX); 668 osp->f_owner = nsp->f_owner; 669 osp->f_type = nsp->f_type; 670 osp->f_flags = nsp->f_flags; 671 osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX); 672 osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX); 673 osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX); 674 osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX); 675 strlcpy(osp->f_fstypename, nsp->f_fstypename, 676 MIN(MFSNAMELEN, OMFSNAMELEN)); 677 strlcpy(osp->f_mntonname, nsp->f_mntonname, 678 MIN(MNAMELEN, OMNAMELEN)); 679 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 680 MIN(MNAMELEN, OMNAMELEN)); 681 osp->f_fsid = nsp->f_fsid; 682 } 683 #endif /* COMPAT_FREEBSD4 */ 684 685 #if defined(COMPAT_FREEBSD11) 686 /* 687 * Get old format filesystem statistics. 688 */ 689 static void freebsd11_cvtstatfs(struct statfs *, struct freebsd11_statfs *); 690 691 int 692 freebsd11_statfs(struct thread *td, struct freebsd11_statfs_args *uap) 693 { 694 struct freebsd11_statfs osb; 695 struct statfs *sfp; 696 int error; 697 698 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 699 error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); 700 if (error == 0) { 701 freebsd11_cvtstatfs(sfp, &osb); 702 error = copyout(&osb, uap->buf, sizeof(osb)); 703 } 704 free(sfp, M_STATFS); 705 return (error); 706 } 707 708 /* 709 * Get filesystem statistics. 710 */ 711 int 712 freebsd11_fstatfs(struct thread *td, struct freebsd11_fstatfs_args *uap) 713 { 714 struct freebsd11_statfs osb; 715 struct statfs *sfp; 716 int error; 717 718 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 719 error = kern_fstatfs(td, uap->fd, sfp); 720 if (error == 0) { 721 freebsd11_cvtstatfs(sfp, &osb); 722 error = copyout(&osb, uap->buf, sizeof(osb)); 723 } 724 free(sfp, M_STATFS); 725 return (error); 726 } 727 728 /* 729 * Get statistics on all filesystems. 730 */ 731 int 732 freebsd11_getfsstat(struct thread *td, struct freebsd11_getfsstat_args *uap) 733 { 734 struct freebsd11_statfs osb; 735 struct statfs *buf, *sp; 736 size_t count, size; 737 int error; 738 739 count = uap->bufsize / sizeof(struct ostatfs); 740 size = count * sizeof(struct statfs); 741 error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, 742 uap->mode); 743 if (error == 0) 744 td->td_retval[0] = count; 745 if (size > 0) { 746 sp = buf; 747 while (count > 0 && error == 0) { 748 freebsd11_cvtstatfs(sp, &osb); 749 error = copyout(&osb, uap->buf, sizeof(osb)); 750 sp++; 751 uap->buf++; 752 count--; 753 } 754 free(buf, M_STATFS); 755 } 756 return (error); 757 } 758 759 /* 760 * Implement fstatfs() for (NFS) file handles. 761 */ 762 int 763 freebsd11_fhstatfs(struct thread *td, struct freebsd11_fhstatfs_args *uap) 764 { 765 struct freebsd11_statfs osb; 766 struct statfs *sfp; 767 fhandle_t fh; 768 int error; 769 770 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 771 if (error) 772 return (error); 773 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 774 error = kern_fhstatfs(td, fh, sfp); 775 if (error == 0) { 776 freebsd11_cvtstatfs(sfp, &osb); 777 error = copyout(&osb, uap->buf, sizeof(osb)); 778 } 779 free(sfp, M_STATFS); 780 return (error); 781 } 782 783 /* 784 * Convert a new format statfs structure to an old format statfs structure. 785 */ 786 static void 787 freebsd11_cvtstatfs(struct statfs *nsp, struct freebsd11_statfs *osp) 788 { 789 790 bzero(osp, sizeof(*osp)); 791 osp->f_version = FREEBSD11_STATFS_VERSION; 792 osp->f_type = nsp->f_type; 793 osp->f_flags = nsp->f_flags; 794 osp->f_bsize = nsp->f_bsize; 795 osp->f_iosize = nsp->f_iosize; 796 osp->f_blocks = nsp->f_blocks; 797 osp->f_bfree = nsp->f_bfree; 798 osp->f_bavail = nsp->f_bavail; 799 osp->f_files = nsp->f_files; 800 osp->f_ffree = nsp->f_ffree; 801 osp->f_syncwrites = nsp->f_syncwrites; 802 osp->f_asyncwrites = nsp->f_asyncwrites; 803 osp->f_syncreads = nsp->f_syncreads; 804 osp->f_asyncreads = nsp->f_asyncreads; 805 osp->f_namemax = nsp->f_namemax; 806 osp->f_owner = nsp->f_owner; 807 osp->f_fsid = nsp->f_fsid; 808 strlcpy(osp->f_fstypename, nsp->f_fstypename, 809 MIN(MFSNAMELEN, sizeof(osp->f_fstypename))); 810 strlcpy(osp->f_mntonname, nsp->f_mntonname, 811 MIN(MNAMELEN, sizeof(osp->f_mntonname))); 812 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 813 MIN(MNAMELEN, sizeof(osp->f_mntfromname))); 814 } 815 #endif /* COMPAT_FREEBSD11 */ 816 817 /* 818 * Change current working directory to a given file descriptor. 819 */ 820 #ifndef _SYS_SYSPROTO_H_ 821 struct fchdir_args { 822 int fd; 823 }; 824 #endif 825 int 826 sys_fchdir(struct thread *td, struct fchdir_args *uap) 827 { 828 struct vnode *vp, *tdp; 829 struct mount *mp; 830 struct file *fp; 831 int error; 832 833 AUDIT_ARG_FD(uap->fd); 834 error = getvnode(td, uap->fd, &cap_fchdir_rights, 835 &fp); 836 if (error != 0) 837 return (error); 838 vp = fp->f_vnode; 839 vrefact(vp); 840 fdrop(fp, td); 841 vn_lock(vp, LK_SHARED | LK_RETRY); 842 AUDIT_ARG_VNODE1(vp); 843 error = change_dir(vp, td); 844 while (!error && (mp = vp->v_mountedhere) != NULL) { 845 if (vfs_busy(mp, 0)) 846 continue; 847 error = VFS_ROOT(mp, LK_SHARED, &tdp); 848 vfs_unbusy(mp); 849 if (error != 0) 850 break; 851 vput(vp); 852 vp = tdp; 853 } 854 if (error != 0) { 855 vput(vp); 856 return (error); 857 } 858 VOP_UNLOCK(vp, 0); 859 pwd_chdir(td, vp); 860 return (0); 861 } 862 863 /* 864 * Change current working directory (``.''). 865 */ 866 #ifndef _SYS_SYSPROTO_H_ 867 struct chdir_args { 868 char *path; 869 }; 870 #endif 871 int 872 sys_chdir(struct thread *td, struct chdir_args *uap) 873 { 874 875 return (kern_chdir(td, uap->path, UIO_USERSPACE)); 876 } 877 878 int 879 kern_chdir(struct thread *td, char *path, enum uio_seg pathseg) 880 { 881 struct nameidata nd; 882 int error; 883 884 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 885 pathseg, path, td); 886 if ((error = namei(&nd)) != 0) 887 return (error); 888 if ((error = change_dir(nd.ni_vp, td)) != 0) { 889 vput(nd.ni_vp); 890 NDFREE(&nd, NDF_ONLY_PNBUF); 891 return (error); 892 } 893 VOP_UNLOCK(nd.ni_vp, 0); 894 NDFREE(&nd, NDF_ONLY_PNBUF); 895 pwd_chdir(td, nd.ni_vp); 896 return (0); 897 } 898 899 /* 900 * Change notion of root (``/'') directory. 901 */ 902 #ifndef _SYS_SYSPROTO_H_ 903 struct chroot_args { 904 char *path; 905 }; 906 #endif 907 int 908 sys_chroot(struct thread *td, struct chroot_args *uap) 909 { 910 struct nameidata nd; 911 int error; 912 913 error = priv_check(td, PRIV_VFS_CHROOT); 914 if (error != 0) 915 return (error); 916 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 917 UIO_USERSPACE, uap->path, td); 918 error = namei(&nd); 919 if (error != 0) 920 goto error; 921 error = change_dir(nd.ni_vp, td); 922 if (error != 0) 923 goto e_vunlock; 924 #ifdef MAC 925 error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp); 926 if (error != 0) 927 goto e_vunlock; 928 #endif 929 VOP_UNLOCK(nd.ni_vp, 0); 930 error = pwd_chroot(td, nd.ni_vp); 931 vrele(nd.ni_vp); 932 NDFREE(&nd, NDF_ONLY_PNBUF); 933 return (error); 934 e_vunlock: 935 vput(nd.ni_vp); 936 error: 937 NDFREE(&nd, NDF_ONLY_PNBUF); 938 return (error); 939 } 940 941 /* 942 * Common routine for chroot and chdir. Callers must provide a locked vnode 943 * instance. 944 */ 945 int 946 change_dir(struct vnode *vp, struct thread *td) 947 { 948 #ifdef MAC 949 int error; 950 #endif 951 952 ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked"); 953 if (vp->v_type != VDIR) 954 return (ENOTDIR); 955 #ifdef MAC 956 error = mac_vnode_check_chdir(td->td_ucred, vp); 957 if (error != 0) 958 return (error); 959 #endif 960 return (VOP_ACCESS(vp, VEXEC, td->td_ucred, td)); 961 } 962 963 static __inline void 964 flags_to_rights(int flags, cap_rights_t *rightsp) 965 { 966 967 if (flags & O_EXEC) { 968 cap_rights_set(rightsp, CAP_FEXECVE); 969 } else { 970 switch ((flags & O_ACCMODE)) { 971 case O_RDONLY: 972 cap_rights_set(rightsp, CAP_READ); 973 break; 974 case O_RDWR: 975 cap_rights_set(rightsp, CAP_READ); 976 /* FALLTHROUGH */ 977 case O_WRONLY: 978 cap_rights_set(rightsp, CAP_WRITE); 979 if (!(flags & (O_APPEND | O_TRUNC))) 980 cap_rights_set(rightsp, CAP_SEEK); 981 break; 982 } 983 } 984 985 if (flags & O_CREAT) 986 cap_rights_set(rightsp, CAP_CREATE); 987 988 if (flags & O_TRUNC) 989 cap_rights_set(rightsp, CAP_FTRUNCATE); 990 991 if (flags & (O_SYNC | O_FSYNC)) 992 cap_rights_set(rightsp, CAP_FSYNC); 993 994 if (flags & (O_EXLOCK | O_SHLOCK)) 995 cap_rights_set(rightsp, CAP_FLOCK); 996 } 997 998 /* 999 * Check permissions, allocate an open file structure, and call the device 1000 * open routine if any. 1001 */ 1002 #ifndef _SYS_SYSPROTO_H_ 1003 struct open_args { 1004 char *path; 1005 int flags; 1006 int mode; 1007 }; 1008 #endif 1009 int 1010 sys_open(struct thread *td, struct open_args *uap) 1011 { 1012 1013 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1014 uap->flags, uap->mode)); 1015 } 1016 1017 #ifndef _SYS_SYSPROTO_H_ 1018 struct openat_args { 1019 int fd; 1020 char *path; 1021 int flag; 1022 int mode; 1023 }; 1024 #endif 1025 int 1026 sys_openat(struct thread *td, struct openat_args *uap) 1027 { 1028 1029 AUDIT_ARG_FD(uap->fd); 1030 return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 1031 uap->mode)); 1032 } 1033 1034 int 1035 kern_openat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1036 int flags, int mode) 1037 { 1038 struct proc *p = td->td_proc; 1039 struct filedesc *fdp = p->p_fd; 1040 struct file *fp; 1041 struct vnode *vp; 1042 struct nameidata nd; 1043 cap_rights_t rights; 1044 int cmode, error, indx; 1045 1046 indx = -1; 1047 1048 AUDIT_ARG_FFLAGS(flags); 1049 AUDIT_ARG_MODE(mode); 1050 cap_rights_init(&rights, CAP_LOOKUP); 1051 flags_to_rights(flags, &rights); 1052 /* 1053 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags 1054 * may be specified. 1055 */ 1056 if (flags & O_EXEC) { 1057 if (flags & O_ACCMODE) 1058 return (EINVAL); 1059 } else if ((flags & O_ACCMODE) == O_ACCMODE) { 1060 return (EINVAL); 1061 } else { 1062 flags = FFLAGS(flags); 1063 } 1064 1065 /* 1066 * Allocate a file structure. The descriptor to reference it 1067 * is allocated and set by finstall() below. 1068 */ 1069 error = falloc_noinstall(td, &fp); 1070 if (error != 0) 1071 return (error); 1072 /* 1073 * An extra reference on `fp' has been held for us by 1074 * falloc_noinstall(). 1075 */ 1076 /* Set the flags early so the finit in devfs can pick them up. */ 1077 fp->f_flag = flags & FMASK; 1078 cmode = ((mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; 1079 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 1080 &rights, td); 1081 td->td_dupfd = -1; /* XXX check for fdopen */ 1082 error = vn_open(&nd, &flags, cmode, fp); 1083 if (error != 0) { 1084 /* 1085 * If the vn_open replaced the method vector, something 1086 * wonderous happened deep below and we just pass it up 1087 * pretending we know what we do. 1088 */ 1089 if (error == ENXIO && fp->f_ops != &badfileops) 1090 goto success; 1091 1092 /* 1093 * Handle special fdopen() case. bleh. 1094 * 1095 * Don't do this for relative (capability) lookups; we don't 1096 * understand exactly what would happen, and we don't think 1097 * that it ever should. 1098 */ 1099 if ((nd.ni_lcf & NI_LCF_STRICTRELATIVE) == 0 && 1100 (error == ENODEV || error == ENXIO) && 1101 td->td_dupfd >= 0) { 1102 error = dupfdopen(td, fdp, td->td_dupfd, flags, error, 1103 &indx); 1104 if (error == 0) 1105 goto success; 1106 } 1107 1108 goto bad; 1109 } 1110 td->td_dupfd = 0; 1111 NDFREE(&nd, NDF_ONLY_PNBUF); 1112 vp = nd.ni_vp; 1113 1114 /* 1115 * Store the vnode, for any f_type. Typically, the vnode use 1116 * count is decremented by direct call to vn_closefile() for 1117 * files that switched type in the cdevsw fdopen() method. 1118 */ 1119 fp->f_vnode = vp; 1120 /* 1121 * If the file wasn't claimed by devfs bind it to the normal 1122 * vnode operations here. 1123 */ 1124 if (fp->f_ops == &badfileops) { 1125 KASSERT(vp->v_type != VFIFO, ("Unexpected fifo.")); 1126 fp->f_seqcount = 1; 1127 finit(fp, (flags & FMASK) | (fp->f_flag & FHASLOCK), 1128 DTYPE_VNODE, vp, &vnops); 1129 } 1130 1131 VOP_UNLOCK(vp, 0); 1132 if (flags & O_TRUNC) { 1133 error = fo_truncate(fp, 0, td->td_ucred, td); 1134 if (error != 0) 1135 goto bad; 1136 } 1137 success: 1138 /* 1139 * If we haven't already installed the FD (for dupfdopen), do so now. 1140 */ 1141 if (indx == -1) { 1142 struct filecaps *fcaps; 1143 1144 #ifdef CAPABILITIES 1145 if ((nd.ni_lcf & NI_LCF_STRICTRELATIVE) != 0) 1146 fcaps = &nd.ni_filecaps; 1147 else 1148 #endif 1149 fcaps = NULL; 1150 error = finstall(td, fp, &indx, flags, fcaps); 1151 /* On success finstall() consumes fcaps. */ 1152 if (error != 0) { 1153 filecaps_free(&nd.ni_filecaps); 1154 goto bad; 1155 } 1156 } else { 1157 filecaps_free(&nd.ni_filecaps); 1158 } 1159 1160 /* 1161 * Release our private reference, leaving the one associated with 1162 * the descriptor table intact. 1163 */ 1164 fdrop(fp, td); 1165 td->td_retval[0] = indx; 1166 return (0); 1167 bad: 1168 KASSERT(indx == -1, ("indx=%d, should be -1", indx)); 1169 fdrop(fp, td); 1170 return (error); 1171 } 1172 1173 #ifdef COMPAT_43 1174 /* 1175 * Create a file. 1176 */ 1177 #ifndef _SYS_SYSPROTO_H_ 1178 struct ocreat_args { 1179 char *path; 1180 int mode; 1181 }; 1182 #endif 1183 int 1184 ocreat(struct thread *td, struct ocreat_args *uap) 1185 { 1186 1187 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1188 O_WRONLY | O_CREAT | O_TRUNC, uap->mode)); 1189 } 1190 #endif /* COMPAT_43 */ 1191 1192 /* 1193 * Create a special file. 1194 */ 1195 #ifndef _SYS_SYSPROTO_H_ 1196 struct mknodat_args { 1197 int fd; 1198 char *path; 1199 mode_t mode; 1200 dev_t dev; 1201 }; 1202 #endif 1203 int 1204 sys_mknodat(struct thread *td, struct mknodat_args *uap) 1205 { 1206 1207 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1208 uap->dev)); 1209 } 1210 1211 #if defined(COMPAT_FREEBSD11) 1212 int 1213 freebsd11_mknod(struct thread *td, 1214 struct freebsd11_mknod_args *uap) 1215 { 1216 1217 return (kern_mknodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1218 uap->mode, uap->dev)); 1219 } 1220 1221 int 1222 freebsd11_mknodat(struct thread *td, 1223 struct freebsd11_mknodat_args *uap) 1224 { 1225 1226 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1227 uap->dev)); 1228 } 1229 #endif /* COMPAT_FREEBSD11 */ 1230 1231 int 1232 kern_mknodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1233 int mode, dev_t dev) 1234 { 1235 struct vnode *vp; 1236 struct mount *mp; 1237 struct vattr vattr; 1238 struct nameidata nd; 1239 int error, whiteout = 0; 1240 1241 AUDIT_ARG_MODE(mode); 1242 AUDIT_ARG_DEV(dev); 1243 switch (mode & S_IFMT) { 1244 case S_IFCHR: 1245 case S_IFBLK: 1246 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 1247 if (error == 0 && dev == VNOVAL) 1248 error = EINVAL; 1249 break; 1250 case S_IFWHT: 1251 error = priv_check(td, PRIV_VFS_MKNOD_WHT); 1252 break; 1253 case S_IFIFO: 1254 if (dev == 0) 1255 return (kern_mkfifoat(td, fd, path, pathseg, mode)); 1256 /* FALLTHROUGH */ 1257 default: 1258 error = EINVAL; 1259 break; 1260 } 1261 if (error != 0) 1262 return (error); 1263 restart: 1264 bwillwrite(); 1265 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1266 NOCACHE, pathseg, path, fd, &cap_mknodat_rights, 1267 td); 1268 if ((error = namei(&nd)) != 0) 1269 return (error); 1270 vp = nd.ni_vp; 1271 if (vp != NULL) { 1272 NDFREE(&nd, NDF_ONLY_PNBUF); 1273 if (vp == nd.ni_dvp) 1274 vrele(nd.ni_dvp); 1275 else 1276 vput(nd.ni_dvp); 1277 vrele(vp); 1278 return (EEXIST); 1279 } else { 1280 VATTR_NULL(&vattr); 1281 vattr.va_mode = (mode & ALLPERMS) & 1282 ~td->td_proc->p_fd->fd_cmask; 1283 vattr.va_rdev = dev; 1284 whiteout = 0; 1285 1286 switch (mode & S_IFMT) { 1287 case S_IFCHR: 1288 vattr.va_type = VCHR; 1289 break; 1290 case S_IFBLK: 1291 vattr.va_type = VBLK; 1292 break; 1293 case S_IFWHT: 1294 whiteout = 1; 1295 break; 1296 default: 1297 panic("kern_mknod: invalid mode"); 1298 } 1299 } 1300 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1301 NDFREE(&nd, NDF_ONLY_PNBUF); 1302 vput(nd.ni_dvp); 1303 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1304 return (error); 1305 goto restart; 1306 } 1307 #ifdef MAC 1308 if (error == 0 && !whiteout) 1309 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, 1310 &nd.ni_cnd, &vattr); 1311 #endif 1312 if (error == 0) { 1313 if (whiteout) 1314 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1315 else { 1316 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1317 &nd.ni_cnd, &vattr); 1318 if (error == 0) 1319 vput(nd.ni_vp); 1320 } 1321 } 1322 NDFREE(&nd, NDF_ONLY_PNBUF); 1323 vput(nd.ni_dvp); 1324 vn_finished_write(mp); 1325 return (error); 1326 } 1327 1328 /* 1329 * Create a named pipe. 1330 */ 1331 #ifndef _SYS_SYSPROTO_H_ 1332 struct mkfifo_args { 1333 char *path; 1334 int mode; 1335 }; 1336 #endif 1337 int 1338 sys_mkfifo(struct thread *td, struct mkfifo_args *uap) 1339 { 1340 1341 return (kern_mkfifoat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1342 uap->mode)); 1343 } 1344 1345 #ifndef _SYS_SYSPROTO_H_ 1346 struct mkfifoat_args { 1347 int fd; 1348 char *path; 1349 mode_t mode; 1350 }; 1351 #endif 1352 int 1353 sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap) 1354 { 1355 1356 return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE, 1357 uap->mode)); 1358 } 1359 1360 int 1361 kern_mkfifoat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1362 int mode) 1363 { 1364 struct mount *mp; 1365 struct vattr vattr; 1366 struct nameidata nd; 1367 int error; 1368 1369 AUDIT_ARG_MODE(mode); 1370 restart: 1371 bwillwrite(); 1372 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1373 NOCACHE, pathseg, path, fd, &cap_mkfifoat_rights, 1374 td); 1375 if ((error = namei(&nd)) != 0) 1376 return (error); 1377 if (nd.ni_vp != NULL) { 1378 NDFREE(&nd, NDF_ONLY_PNBUF); 1379 if (nd.ni_vp == nd.ni_dvp) 1380 vrele(nd.ni_dvp); 1381 else 1382 vput(nd.ni_dvp); 1383 vrele(nd.ni_vp); 1384 return (EEXIST); 1385 } 1386 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1387 NDFREE(&nd, NDF_ONLY_PNBUF); 1388 vput(nd.ni_dvp); 1389 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1390 return (error); 1391 goto restart; 1392 } 1393 VATTR_NULL(&vattr); 1394 vattr.va_type = VFIFO; 1395 vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask; 1396 #ifdef MAC 1397 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1398 &vattr); 1399 if (error != 0) 1400 goto out; 1401 #endif 1402 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 1403 if (error == 0) 1404 vput(nd.ni_vp); 1405 #ifdef MAC 1406 out: 1407 #endif 1408 vput(nd.ni_dvp); 1409 vn_finished_write(mp); 1410 NDFREE(&nd, NDF_ONLY_PNBUF); 1411 return (error); 1412 } 1413 1414 /* 1415 * Make a hard file link. 1416 */ 1417 #ifndef _SYS_SYSPROTO_H_ 1418 struct link_args { 1419 char *path; 1420 char *link; 1421 }; 1422 #endif 1423 int 1424 sys_link(struct thread *td, struct link_args *uap) 1425 { 1426 1427 return (kern_linkat(td, AT_FDCWD, AT_FDCWD, uap->path, uap->link, 1428 UIO_USERSPACE, FOLLOW)); 1429 } 1430 1431 #ifndef _SYS_SYSPROTO_H_ 1432 struct linkat_args { 1433 int fd1; 1434 char *path1; 1435 int fd2; 1436 char *path2; 1437 int flag; 1438 }; 1439 #endif 1440 int 1441 sys_linkat(struct thread *td, struct linkat_args *uap) 1442 { 1443 int flag; 1444 1445 flag = uap->flag; 1446 if ((flag & ~(AT_SYMLINK_FOLLOW | AT_BENEATH)) != 0) 1447 return (EINVAL); 1448 1449 return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2, 1450 UIO_USERSPACE, ((flag & AT_SYMLINK_FOLLOW) != 0 ? FOLLOW : 1451 NOFOLLOW) | ((flag & AT_BENEATH) != 0 ? BENEATH : 0))); 1452 } 1453 1454 int hardlink_check_uid = 0; 1455 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 1456 &hardlink_check_uid, 0, 1457 "Unprivileged processes cannot create hard links to files owned by other " 1458 "users"); 1459 static int hardlink_check_gid = 0; 1460 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 1461 &hardlink_check_gid, 0, 1462 "Unprivileged processes cannot create hard links to files owned by other " 1463 "groups"); 1464 1465 static int 1466 can_hardlink(struct vnode *vp, struct ucred *cred) 1467 { 1468 struct vattr va; 1469 int error; 1470 1471 if (!hardlink_check_uid && !hardlink_check_gid) 1472 return (0); 1473 1474 error = VOP_GETATTR(vp, &va, cred); 1475 if (error != 0) 1476 return (error); 1477 1478 if (hardlink_check_uid && cred->cr_uid != va.va_uid) { 1479 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1480 if (error != 0) 1481 return (error); 1482 } 1483 1484 if (hardlink_check_gid && !groupmember(va.va_gid, cred)) { 1485 error = priv_check_cred(cred, PRIV_VFS_LINK, 0); 1486 if (error != 0) 1487 return (error); 1488 } 1489 1490 return (0); 1491 } 1492 1493 int 1494 kern_linkat(struct thread *td, int fd1, int fd2, char *path1, char *path2, 1495 enum uio_seg segflg, int follow) 1496 { 1497 struct vnode *vp; 1498 struct mount *mp; 1499 struct nameidata nd; 1500 int error; 1501 1502 again: 1503 bwillwrite(); 1504 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, segflg, path1, fd1, 1505 &cap_linkat_source_rights, td); 1506 1507 if ((error = namei(&nd)) != 0) 1508 return (error); 1509 NDFREE(&nd, NDF_ONLY_PNBUF); 1510 vp = nd.ni_vp; 1511 if (vp->v_type == VDIR) { 1512 vrele(vp); 1513 return (EPERM); /* POSIX */ 1514 } 1515 NDINIT_ATRIGHTS(&nd, CREATE, 1516 LOCKPARENT | SAVENAME | AUDITVNODE2 | NOCACHE, segflg, path2, fd2, 1517 &cap_linkat_target_rights, td); 1518 if ((error = namei(&nd)) == 0) { 1519 if (nd.ni_vp != NULL) { 1520 NDFREE(&nd, NDF_ONLY_PNBUF); 1521 if (nd.ni_dvp == nd.ni_vp) 1522 vrele(nd.ni_dvp); 1523 else 1524 vput(nd.ni_dvp); 1525 vrele(nd.ni_vp); 1526 vrele(vp); 1527 return (EEXIST); 1528 } else if (nd.ni_dvp->v_mount != vp->v_mount) { 1529 /* 1530 * Cross-device link. No need to recheck 1531 * vp->v_type, since it cannot change, except 1532 * to VBAD. 1533 */ 1534 NDFREE(&nd, NDF_ONLY_PNBUF); 1535 vput(nd.ni_dvp); 1536 vrele(vp); 1537 return (EXDEV); 1538 } else if ((error = vn_lock(vp, LK_EXCLUSIVE)) == 0) { 1539 error = can_hardlink(vp, td->td_ucred); 1540 #ifdef MAC 1541 if (error == 0) 1542 error = mac_vnode_check_link(td->td_ucred, 1543 nd.ni_dvp, vp, &nd.ni_cnd); 1544 #endif 1545 if (error != 0) { 1546 vput(vp); 1547 vput(nd.ni_dvp); 1548 NDFREE(&nd, NDF_ONLY_PNBUF); 1549 return (error); 1550 } 1551 error = vn_start_write(vp, &mp, V_NOWAIT); 1552 if (error != 0) { 1553 vput(vp); 1554 vput(nd.ni_dvp); 1555 NDFREE(&nd, NDF_ONLY_PNBUF); 1556 error = vn_start_write(NULL, &mp, 1557 V_XSLEEP | PCATCH); 1558 if (error != 0) 1559 return (error); 1560 goto again; 1561 } 1562 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 1563 VOP_UNLOCK(vp, 0); 1564 vput(nd.ni_dvp); 1565 vn_finished_write(mp); 1566 NDFREE(&nd, NDF_ONLY_PNBUF); 1567 } else { 1568 vput(nd.ni_dvp); 1569 NDFREE(&nd, NDF_ONLY_PNBUF); 1570 vrele(vp); 1571 goto again; 1572 } 1573 } 1574 vrele(vp); 1575 return (error); 1576 } 1577 1578 /* 1579 * Make a symbolic link. 1580 */ 1581 #ifndef _SYS_SYSPROTO_H_ 1582 struct symlink_args { 1583 char *path; 1584 char *link; 1585 }; 1586 #endif 1587 int 1588 sys_symlink(struct thread *td, struct symlink_args *uap) 1589 { 1590 1591 return (kern_symlinkat(td, uap->path, AT_FDCWD, uap->link, 1592 UIO_USERSPACE)); 1593 } 1594 1595 #ifndef _SYS_SYSPROTO_H_ 1596 struct symlinkat_args { 1597 char *path; 1598 int fd; 1599 char *path2; 1600 }; 1601 #endif 1602 int 1603 sys_symlinkat(struct thread *td, struct symlinkat_args *uap) 1604 { 1605 1606 return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2, 1607 UIO_USERSPACE)); 1608 } 1609 1610 int 1611 kern_symlinkat(struct thread *td, char *path1, int fd, char *path2, 1612 enum uio_seg segflg) 1613 { 1614 struct mount *mp; 1615 struct vattr vattr; 1616 char *syspath; 1617 struct nameidata nd; 1618 int error; 1619 1620 if (segflg == UIO_SYSSPACE) { 1621 syspath = path1; 1622 } else { 1623 syspath = uma_zalloc(namei_zone, M_WAITOK); 1624 if ((error = copyinstr(path1, syspath, MAXPATHLEN, NULL)) != 0) 1625 goto out; 1626 } 1627 AUDIT_ARG_TEXT(syspath); 1628 restart: 1629 bwillwrite(); 1630 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1631 NOCACHE, segflg, path2, fd, &cap_symlinkat_rights, 1632 td); 1633 if ((error = namei(&nd)) != 0) 1634 goto out; 1635 if (nd.ni_vp) { 1636 NDFREE(&nd, NDF_ONLY_PNBUF); 1637 if (nd.ni_vp == nd.ni_dvp) 1638 vrele(nd.ni_dvp); 1639 else 1640 vput(nd.ni_dvp); 1641 vrele(nd.ni_vp); 1642 error = EEXIST; 1643 goto out; 1644 } 1645 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1646 NDFREE(&nd, NDF_ONLY_PNBUF); 1647 vput(nd.ni_dvp); 1648 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1649 goto out; 1650 goto restart; 1651 } 1652 VATTR_NULL(&vattr); 1653 vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask; 1654 #ifdef MAC 1655 vattr.va_type = VLNK; 1656 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1657 &vattr); 1658 if (error != 0) 1659 goto out2; 1660 #endif 1661 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath); 1662 if (error == 0) 1663 vput(nd.ni_vp); 1664 #ifdef MAC 1665 out2: 1666 #endif 1667 NDFREE(&nd, NDF_ONLY_PNBUF); 1668 vput(nd.ni_dvp); 1669 vn_finished_write(mp); 1670 out: 1671 if (segflg != UIO_SYSSPACE) 1672 uma_zfree(namei_zone, syspath); 1673 return (error); 1674 } 1675 1676 /* 1677 * Delete a whiteout from the filesystem. 1678 */ 1679 #ifndef _SYS_SYSPROTO_H_ 1680 struct undelete_args { 1681 char *path; 1682 }; 1683 #endif 1684 int 1685 sys_undelete(struct thread *td, struct undelete_args *uap) 1686 { 1687 struct mount *mp; 1688 struct nameidata nd; 1689 int error; 1690 1691 restart: 1692 bwillwrite(); 1693 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | AUDITVNODE1, 1694 UIO_USERSPACE, uap->path, td); 1695 error = namei(&nd); 1696 if (error != 0) 1697 return (error); 1698 1699 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 1700 NDFREE(&nd, NDF_ONLY_PNBUF); 1701 if (nd.ni_vp == nd.ni_dvp) 1702 vrele(nd.ni_dvp); 1703 else 1704 vput(nd.ni_dvp); 1705 if (nd.ni_vp) 1706 vrele(nd.ni_vp); 1707 return (EEXIST); 1708 } 1709 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1710 NDFREE(&nd, NDF_ONLY_PNBUF); 1711 vput(nd.ni_dvp); 1712 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1713 return (error); 1714 goto restart; 1715 } 1716 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE); 1717 NDFREE(&nd, NDF_ONLY_PNBUF); 1718 vput(nd.ni_dvp); 1719 vn_finished_write(mp); 1720 return (error); 1721 } 1722 1723 /* 1724 * Delete a name from the filesystem. 1725 */ 1726 #ifndef _SYS_SYSPROTO_H_ 1727 struct unlink_args { 1728 char *path; 1729 }; 1730 #endif 1731 int 1732 sys_unlink(struct thread *td, struct unlink_args *uap) 1733 { 1734 1735 return (kern_unlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 0, 0)); 1736 } 1737 1738 #ifndef _SYS_SYSPROTO_H_ 1739 struct unlinkat_args { 1740 int fd; 1741 char *path; 1742 int flag; 1743 }; 1744 #endif 1745 int 1746 sys_unlinkat(struct thread *td, struct unlinkat_args *uap) 1747 { 1748 int fd, flag; 1749 char *path; 1750 1751 flag = uap->flag; 1752 fd = uap->fd; 1753 path = uap->path; 1754 1755 if ((flag & ~(AT_REMOVEDIR | AT_BENEATH)) != 0) 1756 return (EINVAL); 1757 1758 if ((uap->flag & AT_REMOVEDIR) != 0) 1759 return (kern_rmdirat(td, fd, path, UIO_USERSPACE, flag)); 1760 else 1761 return (kern_unlinkat(td, fd, path, UIO_USERSPACE, flag, 0)); 1762 } 1763 1764 int 1765 kern_unlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1766 int flag, ino_t oldinum) 1767 { 1768 struct mount *mp; 1769 struct vnode *vp; 1770 struct nameidata nd; 1771 struct stat sb; 1772 int error; 1773 1774 restart: 1775 bwillwrite(); 1776 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1 | 1777 ((flag & AT_BENEATH) != 0 ? BENEATH : 0), 1778 pathseg, path, fd, &cap_unlinkat_rights, td); 1779 if ((error = namei(&nd)) != 0) 1780 return (error == EINVAL ? EPERM : error); 1781 vp = nd.ni_vp; 1782 if (vp->v_type == VDIR && oldinum == 0) { 1783 error = EPERM; /* POSIX */ 1784 } else if (oldinum != 0 && 1785 ((error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td)) == 0) && 1786 sb.st_ino != oldinum) { 1787 error = EIDRM; /* Identifier removed */ 1788 } else { 1789 /* 1790 * The root of a mounted filesystem cannot be deleted. 1791 * 1792 * XXX: can this only be a VDIR case? 1793 */ 1794 if (vp->v_vflag & VV_ROOT) 1795 error = EBUSY; 1796 } 1797 if (error == 0) { 1798 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1799 NDFREE(&nd, NDF_ONLY_PNBUF); 1800 vput(nd.ni_dvp); 1801 if (vp == nd.ni_dvp) 1802 vrele(vp); 1803 else 1804 vput(vp); 1805 if ((error = vn_start_write(NULL, &mp, 1806 V_XSLEEP | PCATCH)) != 0) 1807 return (error); 1808 goto restart; 1809 } 1810 #ifdef MAC 1811 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 1812 &nd.ni_cnd); 1813 if (error != 0) 1814 goto out; 1815 #endif 1816 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 1817 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 1818 #ifdef MAC 1819 out: 1820 #endif 1821 vn_finished_write(mp); 1822 } 1823 NDFREE(&nd, NDF_ONLY_PNBUF); 1824 vput(nd.ni_dvp); 1825 if (vp == nd.ni_dvp) 1826 vrele(vp); 1827 else 1828 vput(vp); 1829 return (error); 1830 } 1831 1832 /* 1833 * Reposition read/write file offset. 1834 */ 1835 #ifndef _SYS_SYSPROTO_H_ 1836 struct lseek_args { 1837 int fd; 1838 int pad; 1839 off_t offset; 1840 int whence; 1841 }; 1842 #endif 1843 int 1844 sys_lseek(struct thread *td, struct lseek_args *uap) 1845 { 1846 1847 return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); 1848 } 1849 1850 int 1851 kern_lseek(struct thread *td, int fd, off_t offset, int whence) 1852 { 1853 struct file *fp; 1854 int error; 1855 1856 AUDIT_ARG_FD(fd); 1857 error = fget(td, fd, &cap_seek_rights, &fp); 1858 if (error != 0) 1859 return (error); 1860 error = (fp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0 ? 1861 fo_seek(fp, offset, whence, td) : ESPIPE; 1862 fdrop(fp, td); 1863 return (error); 1864 } 1865 1866 #if defined(COMPAT_43) 1867 /* 1868 * Reposition read/write file offset. 1869 */ 1870 #ifndef _SYS_SYSPROTO_H_ 1871 struct olseek_args { 1872 int fd; 1873 long offset; 1874 int whence; 1875 }; 1876 #endif 1877 int 1878 olseek(struct thread *td, struct olseek_args *uap) 1879 { 1880 1881 return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); 1882 } 1883 #endif /* COMPAT_43 */ 1884 1885 #if defined(COMPAT_FREEBSD6) 1886 /* Version with the 'pad' argument */ 1887 int 1888 freebsd6_lseek(struct thread *td, struct freebsd6_lseek_args *uap) 1889 { 1890 1891 return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); 1892 } 1893 #endif 1894 1895 /* 1896 * Check access permissions using passed credentials. 1897 */ 1898 static int 1899 vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 1900 struct thread *td) 1901 { 1902 accmode_t accmode; 1903 int error; 1904 1905 /* Flags == 0 means only check for existence. */ 1906 if (user_flags == 0) 1907 return (0); 1908 1909 accmode = 0; 1910 if (user_flags & R_OK) 1911 accmode |= VREAD; 1912 if (user_flags & W_OK) 1913 accmode |= VWRITE; 1914 if (user_flags & X_OK) 1915 accmode |= VEXEC; 1916 #ifdef MAC 1917 error = mac_vnode_check_access(cred, vp, accmode); 1918 if (error != 0) 1919 return (error); 1920 #endif 1921 if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) 1922 error = VOP_ACCESS(vp, accmode, cred, td); 1923 return (error); 1924 } 1925 1926 /* 1927 * Check access permissions using "real" credentials. 1928 */ 1929 #ifndef _SYS_SYSPROTO_H_ 1930 struct access_args { 1931 char *path; 1932 int amode; 1933 }; 1934 #endif 1935 int 1936 sys_access(struct thread *td, struct access_args *uap) 1937 { 1938 1939 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1940 0, uap->amode)); 1941 } 1942 1943 #ifndef _SYS_SYSPROTO_H_ 1944 struct faccessat_args { 1945 int dirfd; 1946 char *path; 1947 int amode; 1948 int flag; 1949 } 1950 #endif 1951 int 1952 sys_faccessat(struct thread *td, struct faccessat_args *uap) 1953 { 1954 1955 return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 1956 uap->amode)); 1957 } 1958 1959 int 1960 kern_accessat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 1961 int flag, int amode) 1962 { 1963 struct ucred *cred, *usecred; 1964 struct vnode *vp; 1965 struct nameidata nd; 1966 int error; 1967 1968 if ((flag & ~(AT_EACCESS | AT_BENEATH)) != 0) 1969 return (EINVAL); 1970 if (amode != F_OK && (amode & ~(R_OK | W_OK | X_OK)) != 0) 1971 return (EINVAL); 1972 1973 /* 1974 * Create and modify a temporary credential instead of one that 1975 * is potentially shared (if we need one). 1976 */ 1977 cred = td->td_ucred; 1978 if ((flag & AT_EACCESS) == 0 && 1979 ((cred->cr_uid != cred->cr_ruid || 1980 cred->cr_rgid != cred->cr_groups[0]))) { 1981 usecred = crdup(cred); 1982 usecred->cr_uid = cred->cr_ruid; 1983 usecred->cr_groups[0] = cred->cr_rgid; 1984 td->td_ucred = usecred; 1985 } else 1986 usecred = cred; 1987 AUDIT_ARG_VALUE(amode); 1988 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | 1989 AUDITVNODE1 | ((flag & AT_BENEATH) != 0 ? BENEATH : 0), 1990 pathseg, path, fd, &cap_fstat_rights, td); 1991 if ((error = namei(&nd)) != 0) 1992 goto out; 1993 vp = nd.ni_vp; 1994 1995 error = vn_access(vp, amode, usecred, td); 1996 NDFREE(&nd, NDF_ONLY_PNBUF); 1997 vput(vp); 1998 out: 1999 if (usecred != cred) { 2000 td->td_ucred = cred; 2001 crfree(usecred); 2002 } 2003 return (error); 2004 } 2005 2006 /* 2007 * Check access permissions using "effective" credentials. 2008 */ 2009 #ifndef _SYS_SYSPROTO_H_ 2010 struct eaccess_args { 2011 char *path; 2012 int amode; 2013 }; 2014 #endif 2015 int 2016 sys_eaccess(struct thread *td, struct eaccess_args *uap) 2017 { 2018 2019 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2020 AT_EACCESS, uap->amode)); 2021 } 2022 2023 #if defined(COMPAT_43) 2024 /* 2025 * Get file status; this version follows links. 2026 */ 2027 #ifndef _SYS_SYSPROTO_H_ 2028 struct ostat_args { 2029 char *path; 2030 struct ostat *ub; 2031 }; 2032 #endif 2033 int 2034 ostat(struct thread *td, struct ostat_args *uap) 2035 { 2036 struct stat sb; 2037 struct ostat osb; 2038 int error; 2039 2040 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2041 &sb, NULL); 2042 if (error != 0) 2043 return (error); 2044 cvtstat(&sb, &osb); 2045 return (copyout(&osb, uap->ub, sizeof (osb))); 2046 } 2047 2048 /* 2049 * Get file status; this version does not follow links. 2050 */ 2051 #ifndef _SYS_SYSPROTO_H_ 2052 struct olstat_args { 2053 char *path; 2054 struct ostat *ub; 2055 }; 2056 #endif 2057 int 2058 olstat(struct thread *td, struct olstat_args *uap) 2059 { 2060 struct stat sb; 2061 struct ostat osb; 2062 int error; 2063 2064 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2065 UIO_USERSPACE, &sb, NULL); 2066 if (error != 0) 2067 return (error); 2068 cvtstat(&sb, &osb); 2069 return (copyout(&osb, uap->ub, sizeof (osb))); 2070 } 2071 2072 /* 2073 * Convert from an old to a new stat structure. 2074 * XXX: many values are blindly truncated. 2075 */ 2076 void 2077 cvtstat(struct stat *st, struct ostat *ost) 2078 { 2079 2080 bzero(ost, sizeof(*ost)); 2081 ost->st_dev = st->st_dev; 2082 ost->st_ino = st->st_ino; 2083 ost->st_mode = st->st_mode; 2084 ost->st_nlink = st->st_nlink; 2085 ost->st_uid = st->st_uid; 2086 ost->st_gid = st->st_gid; 2087 ost->st_rdev = st->st_rdev; 2088 ost->st_size = MIN(st->st_size, INT32_MAX); 2089 ost->st_atim = st->st_atim; 2090 ost->st_mtim = st->st_mtim; 2091 ost->st_ctim = st->st_ctim; 2092 ost->st_blksize = st->st_blksize; 2093 ost->st_blocks = st->st_blocks; 2094 ost->st_flags = st->st_flags; 2095 ost->st_gen = st->st_gen; 2096 } 2097 #endif /* COMPAT_43 */ 2098 2099 #if defined(COMPAT_43) || defined(COMPAT_FREEBSD11) 2100 int ino64_trunc_error; 2101 SYSCTL_INT(_vfs, OID_AUTO, ino64_trunc_error, CTLFLAG_RW, 2102 &ino64_trunc_error, 0, 2103 "Error on truncation of device, file or inode number, or link count"); 2104 2105 int 2106 freebsd11_cvtstat(struct stat *st, struct freebsd11_stat *ost) 2107 { 2108 2109 ost->st_dev = st->st_dev; 2110 if (ost->st_dev != st->st_dev) { 2111 switch (ino64_trunc_error) { 2112 default: 2113 /* 2114 * Since dev_t is almost raw, don't clamp to the 2115 * maximum for case 2, but ignore the error. 2116 */ 2117 break; 2118 case 1: 2119 return (EOVERFLOW); 2120 } 2121 } 2122 ost->st_ino = st->st_ino; 2123 if (ost->st_ino != st->st_ino) { 2124 switch (ino64_trunc_error) { 2125 default: 2126 case 0: 2127 break; 2128 case 1: 2129 return (EOVERFLOW); 2130 case 2: 2131 ost->st_ino = UINT32_MAX; 2132 break; 2133 } 2134 } 2135 ost->st_mode = st->st_mode; 2136 ost->st_nlink = st->st_nlink; 2137 if (ost->st_nlink != st->st_nlink) { 2138 switch (ino64_trunc_error) { 2139 default: 2140 case 0: 2141 break; 2142 case 1: 2143 return (EOVERFLOW); 2144 case 2: 2145 ost->st_nlink = UINT16_MAX; 2146 break; 2147 } 2148 } 2149 ost->st_uid = st->st_uid; 2150 ost->st_gid = st->st_gid; 2151 ost->st_rdev = st->st_rdev; 2152 if (ost->st_rdev != st->st_rdev) { 2153 switch (ino64_trunc_error) { 2154 default: 2155 break; 2156 case 1: 2157 return (EOVERFLOW); 2158 } 2159 } 2160 ost->st_atim = st->st_atim; 2161 ost->st_mtim = st->st_mtim; 2162 ost->st_ctim = st->st_ctim; 2163 ost->st_size = st->st_size; 2164 ost->st_blocks = st->st_blocks; 2165 ost->st_blksize = st->st_blksize; 2166 ost->st_flags = st->st_flags; 2167 ost->st_gen = st->st_gen; 2168 ost->st_lspare = 0; 2169 ost->st_birthtim = st->st_birthtim; 2170 bzero((char *)&ost->st_birthtim + sizeof(ost->st_birthtim), 2171 sizeof(*ost) - offsetof(struct freebsd11_stat, 2172 st_birthtim) - sizeof(ost->st_birthtim)); 2173 return (0); 2174 } 2175 2176 int 2177 freebsd11_stat(struct thread *td, struct freebsd11_stat_args* uap) 2178 { 2179 struct stat sb; 2180 struct freebsd11_stat osb; 2181 int error; 2182 2183 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2184 &sb, NULL); 2185 if (error != 0) 2186 return (error); 2187 error = freebsd11_cvtstat(&sb, &osb); 2188 if (error == 0) 2189 error = copyout(&osb, uap->ub, sizeof(osb)); 2190 return (error); 2191 } 2192 2193 int 2194 freebsd11_lstat(struct thread *td, struct freebsd11_lstat_args* uap) 2195 { 2196 struct stat sb; 2197 struct freebsd11_stat osb; 2198 int error; 2199 2200 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2201 UIO_USERSPACE, &sb, NULL); 2202 if (error != 0) 2203 return (error); 2204 error = freebsd11_cvtstat(&sb, &osb); 2205 if (error == 0) 2206 error = copyout(&osb, uap->ub, sizeof(osb)); 2207 return (error); 2208 } 2209 2210 int 2211 freebsd11_fhstat(struct thread *td, struct freebsd11_fhstat_args* uap) 2212 { 2213 struct fhandle fh; 2214 struct stat sb; 2215 struct freebsd11_stat osb; 2216 int error; 2217 2218 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 2219 if (error != 0) 2220 return (error); 2221 error = kern_fhstat(td, fh, &sb); 2222 if (error != 0) 2223 return (error); 2224 error = freebsd11_cvtstat(&sb, &osb); 2225 if (error == 0) 2226 error = copyout(&osb, uap->sb, sizeof(osb)); 2227 return (error); 2228 } 2229 2230 int 2231 freebsd11_fstatat(struct thread *td, struct freebsd11_fstatat_args* uap) 2232 { 2233 struct stat sb; 2234 struct freebsd11_stat osb; 2235 int error; 2236 2237 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2238 UIO_USERSPACE, &sb, NULL); 2239 if (error != 0) 2240 return (error); 2241 error = freebsd11_cvtstat(&sb, &osb); 2242 if (error == 0) 2243 error = copyout(&osb, uap->buf, sizeof(osb)); 2244 return (error); 2245 } 2246 #endif /* COMPAT_FREEBSD11 */ 2247 2248 /* 2249 * Get file status 2250 */ 2251 #ifndef _SYS_SYSPROTO_H_ 2252 struct fstatat_args { 2253 int fd; 2254 char *path; 2255 struct stat *buf; 2256 int flag; 2257 } 2258 #endif 2259 int 2260 sys_fstatat(struct thread *td, struct fstatat_args *uap) 2261 { 2262 struct stat sb; 2263 int error; 2264 2265 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2266 UIO_USERSPACE, &sb, NULL); 2267 if (error == 0) 2268 error = copyout(&sb, uap->buf, sizeof (sb)); 2269 return (error); 2270 } 2271 2272 int 2273 kern_statat(struct thread *td, int flag, int fd, char *path, 2274 enum uio_seg pathseg, struct stat *sbp, 2275 void (*hook)(struct vnode *vp, struct stat *sbp)) 2276 { 2277 struct nameidata nd; 2278 struct stat sb; 2279 int error; 2280 2281 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_BENEATH)) != 0) 2282 return (EINVAL); 2283 2284 NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) != 0 ? 2285 NOFOLLOW : FOLLOW) | ((flag & AT_BENEATH) != 0 ? BENEATH : 0) | 2286 LOCKSHARED | LOCKLEAF | AUDITVNODE1, pathseg, path, fd, 2287 &cap_fstat_rights, td); 2288 2289 if ((error = namei(&nd)) != 0) 2290 return (error); 2291 error = vn_stat(nd.ni_vp, &sb, td->td_ucred, NOCRED, td); 2292 if (error == 0) { 2293 SDT_PROBE2(vfs, , stat, mode, path, sb.st_mode); 2294 if (S_ISREG(sb.st_mode)) 2295 SDT_PROBE2(vfs, , stat, reg, path, pathseg); 2296 if (__predict_false(hook != NULL)) 2297 hook(nd.ni_vp, &sb); 2298 } 2299 NDFREE(&nd, NDF_ONLY_PNBUF); 2300 vput(nd.ni_vp); 2301 if (error != 0) 2302 return (error); 2303 #ifdef __STAT_TIME_T_EXT 2304 sb.st_atim_ext = 0; 2305 sb.st_mtim_ext = 0; 2306 sb.st_ctim_ext = 0; 2307 sb.st_btim_ext = 0; 2308 #endif 2309 *sbp = sb; 2310 #ifdef KTRACE 2311 if (KTRPOINT(td, KTR_STRUCT)) 2312 ktrstat(&sb); 2313 #endif 2314 return (0); 2315 } 2316 2317 #if defined(COMPAT_FREEBSD11) 2318 /* 2319 * Implementation of the NetBSD [l]stat() functions. 2320 */ 2321 void 2322 freebsd11_cvtnstat(struct stat *sb, struct nstat *nsb) 2323 { 2324 2325 bzero(nsb, sizeof(*nsb)); 2326 nsb->st_dev = sb->st_dev; 2327 nsb->st_ino = sb->st_ino; 2328 nsb->st_mode = sb->st_mode; 2329 nsb->st_nlink = sb->st_nlink; 2330 nsb->st_uid = sb->st_uid; 2331 nsb->st_gid = sb->st_gid; 2332 nsb->st_rdev = sb->st_rdev; 2333 nsb->st_atim = sb->st_atim; 2334 nsb->st_mtim = sb->st_mtim; 2335 nsb->st_ctim = sb->st_ctim; 2336 nsb->st_size = sb->st_size; 2337 nsb->st_blocks = sb->st_blocks; 2338 nsb->st_blksize = sb->st_blksize; 2339 nsb->st_flags = sb->st_flags; 2340 nsb->st_gen = sb->st_gen; 2341 nsb->st_birthtim = sb->st_birthtim; 2342 } 2343 2344 #ifndef _SYS_SYSPROTO_H_ 2345 struct freebsd11_nstat_args { 2346 char *path; 2347 struct nstat *ub; 2348 }; 2349 #endif 2350 int 2351 freebsd11_nstat(struct thread *td, struct freebsd11_nstat_args *uap) 2352 { 2353 struct stat sb; 2354 struct nstat nsb; 2355 int error; 2356 2357 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2358 &sb, NULL); 2359 if (error != 0) 2360 return (error); 2361 freebsd11_cvtnstat(&sb, &nsb); 2362 return (copyout(&nsb, uap->ub, sizeof (nsb))); 2363 } 2364 2365 /* 2366 * NetBSD lstat. Get file status; this version does not follow links. 2367 */ 2368 #ifndef _SYS_SYSPROTO_H_ 2369 struct freebsd11_nlstat_args { 2370 char *path; 2371 struct nstat *ub; 2372 }; 2373 #endif 2374 int 2375 freebsd11_nlstat(struct thread *td, struct freebsd11_nlstat_args *uap) 2376 { 2377 struct stat sb; 2378 struct nstat nsb; 2379 int error; 2380 2381 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2382 UIO_USERSPACE, &sb, NULL); 2383 if (error != 0) 2384 return (error); 2385 freebsd11_cvtnstat(&sb, &nsb); 2386 return (copyout(&nsb, uap->ub, sizeof (nsb))); 2387 } 2388 #endif /* COMPAT_FREEBSD11 */ 2389 2390 /* 2391 * Get configurable pathname variables. 2392 */ 2393 #ifndef _SYS_SYSPROTO_H_ 2394 struct pathconf_args { 2395 char *path; 2396 int name; 2397 }; 2398 #endif 2399 int 2400 sys_pathconf(struct thread *td, struct pathconf_args *uap) 2401 { 2402 long value; 2403 int error; 2404 2405 error = kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW, 2406 &value); 2407 if (error == 0) 2408 td->td_retval[0] = value; 2409 return (error); 2410 } 2411 2412 #ifndef _SYS_SYSPROTO_H_ 2413 struct lpathconf_args { 2414 char *path; 2415 int name; 2416 }; 2417 #endif 2418 int 2419 sys_lpathconf(struct thread *td, struct lpathconf_args *uap) 2420 { 2421 long value; 2422 int error; 2423 2424 error = kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, 2425 NOFOLLOW, &value); 2426 if (error == 0) 2427 td->td_retval[0] = value; 2428 return (error); 2429 } 2430 2431 int 2432 kern_pathconf(struct thread *td, char *path, enum uio_seg pathseg, int name, 2433 u_long flags, long *valuep) 2434 { 2435 struct nameidata nd; 2436 int error; 2437 2438 NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | AUDITVNODE1 | flags, 2439 pathseg, path, td); 2440 if ((error = namei(&nd)) != 0) 2441 return (error); 2442 NDFREE(&nd, NDF_ONLY_PNBUF); 2443 2444 error = VOP_PATHCONF(nd.ni_vp, name, valuep); 2445 vput(nd.ni_vp); 2446 return (error); 2447 } 2448 2449 /* 2450 * Return target name of a symbolic link. 2451 */ 2452 #ifndef _SYS_SYSPROTO_H_ 2453 struct readlink_args { 2454 char *path; 2455 char *buf; 2456 size_t count; 2457 }; 2458 #endif 2459 int 2460 sys_readlink(struct thread *td, struct readlink_args *uap) 2461 { 2462 2463 return (kern_readlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2464 uap->buf, UIO_USERSPACE, uap->count)); 2465 } 2466 #ifndef _SYS_SYSPROTO_H_ 2467 struct readlinkat_args { 2468 int fd; 2469 char *path; 2470 char *buf; 2471 size_t bufsize; 2472 }; 2473 #endif 2474 int 2475 sys_readlinkat(struct thread *td, struct readlinkat_args *uap) 2476 { 2477 2478 return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE, 2479 uap->buf, UIO_USERSPACE, uap->bufsize)); 2480 } 2481 2482 int 2483 kern_readlinkat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2484 char *buf, enum uio_seg bufseg, size_t count) 2485 { 2486 struct vnode *vp; 2487 struct iovec aiov; 2488 struct uio auio; 2489 struct nameidata nd; 2490 int error; 2491 2492 if (count > IOSIZE_MAX) 2493 return (EINVAL); 2494 2495 NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 2496 pathseg, path, fd, td); 2497 2498 if ((error = namei(&nd)) != 0) 2499 return (error); 2500 NDFREE(&nd, NDF_ONLY_PNBUF); 2501 vp = nd.ni_vp; 2502 #ifdef MAC 2503 error = mac_vnode_check_readlink(td->td_ucred, vp); 2504 if (error != 0) { 2505 vput(vp); 2506 return (error); 2507 } 2508 #endif 2509 if (vp->v_type != VLNK && (vp->v_vflag & VV_READLINK) == 0) 2510 error = EINVAL; 2511 else { 2512 aiov.iov_base = buf; 2513 aiov.iov_len = count; 2514 auio.uio_iov = &aiov; 2515 auio.uio_iovcnt = 1; 2516 auio.uio_offset = 0; 2517 auio.uio_rw = UIO_READ; 2518 auio.uio_segflg = bufseg; 2519 auio.uio_td = td; 2520 auio.uio_resid = count; 2521 error = VOP_READLINK(vp, &auio, td->td_ucred); 2522 td->td_retval[0] = count - auio.uio_resid; 2523 } 2524 vput(vp); 2525 return (error); 2526 } 2527 2528 /* 2529 * Common implementation code for chflags() and fchflags(). 2530 */ 2531 static int 2532 setfflags(struct thread *td, struct vnode *vp, u_long flags) 2533 { 2534 struct mount *mp; 2535 struct vattr vattr; 2536 int error; 2537 2538 /* We can't support the value matching VNOVAL. */ 2539 if (flags == VNOVAL) 2540 return (EOPNOTSUPP); 2541 2542 /* 2543 * Prevent non-root users from setting flags on devices. When 2544 * a device is reused, users can retain ownership of the device 2545 * if they are allowed to set flags and programs assume that 2546 * chown can't fail when done as root. 2547 */ 2548 if (vp->v_type == VCHR || vp->v_type == VBLK) { 2549 error = priv_check(td, PRIV_VFS_CHFLAGS_DEV); 2550 if (error != 0) 2551 return (error); 2552 } 2553 2554 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2555 return (error); 2556 VATTR_NULL(&vattr); 2557 vattr.va_flags = flags; 2558 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2559 #ifdef MAC 2560 error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags); 2561 if (error == 0) 2562 #endif 2563 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 2564 VOP_UNLOCK(vp, 0); 2565 vn_finished_write(mp); 2566 return (error); 2567 } 2568 2569 /* 2570 * Change flags of a file given a path name. 2571 */ 2572 #ifndef _SYS_SYSPROTO_H_ 2573 struct chflags_args { 2574 const char *path; 2575 u_long flags; 2576 }; 2577 #endif 2578 int 2579 sys_chflags(struct thread *td, struct chflags_args *uap) 2580 { 2581 2582 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2583 uap->flags, 0)); 2584 } 2585 2586 #ifndef _SYS_SYSPROTO_H_ 2587 struct chflagsat_args { 2588 int fd; 2589 const char *path; 2590 u_long flags; 2591 int atflag; 2592 } 2593 #endif 2594 int 2595 sys_chflagsat(struct thread *td, struct chflagsat_args *uap) 2596 { 2597 2598 if ((uap->atflag & ~(AT_SYMLINK_NOFOLLOW | AT_BENEATH)) != 0) 2599 return (EINVAL); 2600 2601 return (kern_chflagsat(td, uap->fd, uap->path, UIO_USERSPACE, 2602 uap->flags, uap->atflag)); 2603 } 2604 2605 /* 2606 * Same as chflags() but doesn't follow symlinks. 2607 */ 2608 #ifndef _SYS_SYSPROTO_H_ 2609 struct lchflags_args { 2610 const char *path; 2611 u_long flags; 2612 }; 2613 #endif 2614 int 2615 sys_lchflags(struct thread *td, struct lchflags_args *uap) 2616 { 2617 2618 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2619 uap->flags, AT_SYMLINK_NOFOLLOW)); 2620 } 2621 2622 static int 2623 kern_chflagsat(struct thread *td, int fd, const char *path, 2624 enum uio_seg pathseg, u_long flags, int atflag) 2625 { 2626 struct nameidata nd; 2627 int error, follow; 2628 2629 AUDIT_ARG_FFLAGS(flags); 2630 follow = (atflag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2631 follow |= (atflag & AT_BENEATH) != 0 ? BENEATH : 0; 2632 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2633 &cap_fchflags_rights, td); 2634 if ((error = namei(&nd)) != 0) 2635 return (error); 2636 NDFREE(&nd, NDF_ONLY_PNBUF); 2637 error = setfflags(td, nd.ni_vp, flags); 2638 vrele(nd.ni_vp); 2639 return (error); 2640 } 2641 2642 /* 2643 * Change flags of a file given a file descriptor. 2644 */ 2645 #ifndef _SYS_SYSPROTO_H_ 2646 struct fchflags_args { 2647 int fd; 2648 u_long flags; 2649 }; 2650 #endif 2651 int 2652 sys_fchflags(struct thread *td, struct fchflags_args *uap) 2653 { 2654 struct file *fp; 2655 int error; 2656 2657 AUDIT_ARG_FD(uap->fd); 2658 AUDIT_ARG_FFLAGS(uap->flags); 2659 error = getvnode(td, uap->fd, &cap_fchflags_rights, 2660 &fp); 2661 if (error != 0) 2662 return (error); 2663 #ifdef AUDIT 2664 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 2665 AUDIT_ARG_VNODE1(fp->f_vnode); 2666 VOP_UNLOCK(fp->f_vnode, 0); 2667 #endif 2668 error = setfflags(td, fp->f_vnode, uap->flags); 2669 fdrop(fp, td); 2670 return (error); 2671 } 2672 2673 /* 2674 * Common implementation code for chmod(), lchmod() and fchmod(). 2675 */ 2676 int 2677 setfmode(struct thread *td, struct ucred *cred, struct vnode *vp, int mode) 2678 { 2679 struct mount *mp; 2680 struct vattr vattr; 2681 int error; 2682 2683 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2684 return (error); 2685 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2686 VATTR_NULL(&vattr); 2687 vattr.va_mode = mode & ALLPERMS; 2688 #ifdef MAC 2689 error = mac_vnode_check_setmode(cred, vp, vattr.va_mode); 2690 if (error == 0) 2691 #endif 2692 error = VOP_SETATTR(vp, &vattr, cred); 2693 VOP_UNLOCK(vp, 0); 2694 vn_finished_write(mp); 2695 return (error); 2696 } 2697 2698 /* 2699 * Change mode of a file given path name. 2700 */ 2701 #ifndef _SYS_SYSPROTO_H_ 2702 struct chmod_args { 2703 char *path; 2704 int mode; 2705 }; 2706 #endif 2707 int 2708 sys_chmod(struct thread *td, struct chmod_args *uap) 2709 { 2710 2711 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2712 uap->mode, 0)); 2713 } 2714 2715 #ifndef _SYS_SYSPROTO_H_ 2716 struct fchmodat_args { 2717 int dirfd; 2718 char *path; 2719 mode_t mode; 2720 int flag; 2721 } 2722 #endif 2723 int 2724 sys_fchmodat(struct thread *td, struct fchmodat_args *uap) 2725 { 2726 2727 if ((uap->flag & ~(AT_SYMLINK_NOFOLLOW | AT_BENEATH)) != 0) 2728 return (EINVAL); 2729 2730 return (kern_fchmodat(td, uap->fd, uap->path, UIO_USERSPACE, 2731 uap->mode, uap->flag)); 2732 } 2733 2734 /* 2735 * Change mode of a file given path name (don't follow links.) 2736 */ 2737 #ifndef _SYS_SYSPROTO_H_ 2738 struct lchmod_args { 2739 char *path; 2740 int mode; 2741 }; 2742 #endif 2743 int 2744 sys_lchmod(struct thread *td, struct lchmod_args *uap) 2745 { 2746 2747 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2748 uap->mode, AT_SYMLINK_NOFOLLOW)); 2749 } 2750 2751 int 2752 kern_fchmodat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2753 mode_t mode, int flag) 2754 { 2755 struct nameidata nd; 2756 int error, follow; 2757 2758 AUDIT_ARG_MODE(mode); 2759 follow = (flag & AT_SYMLINK_NOFOLLOW) != 0 ? NOFOLLOW : FOLLOW; 2760 follow |= (flag & AT_BENEATH) != 0 ? BENEATH : 0; 2761 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2762 &cap_fchmod_rights, td); 2763 if ((error = namei(&nd)) != 0) 2764 return (error); 2765 NDFREE(&nd, NDF_ONLY_PNBUF); 2766 error = setfmode(td, td->td_ucred, nd.ni_vp, mode); 2767 vrele(nd.ni_vp); 2768 return (error); 2769 } 2770 2771 /* 2772 * Change mode of a file given a file descriptor. 2773 */ 2774 #ifndef _SYS_SYSPROTO_H_ 2775 struct fchmod_args { 2776 int fd; 2777 int mode; 2778 }; 2779 #endif 2780 int 2781 sys_fchmod(struct thread *td, struct fchmod_args *uap) 2782 { 2783 struct file *fp; 2784 int error; 2785 2786 AUDIT_ARG_FD(uap->fd); 2787 AUDIT_ARG_MODE(uap->mode); 2788 2789 error = fget(td, uap->fd, &cap_fchmod_rights, &fp); 2790 if (error != 0) 2791 return (error); 2792 error = fo_chmod(fp, uap->mode, td->td_ucred, td); 2793 fdrop(fp, td); 2794 return (error); 2795 } 2796 2797 /* 2798 * Common implementation for chown(), lchown(), and fchown() 2799 */ 2800 int 2801 setfown(struct thread *td, struct ucred *cred, struct vnode *vp, uid_t uid, 2802 gid_t gid) 2803 { 2804 struct mount *mp; 2805 struct vattr vattr; 2806 int error; 2807 2808 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2809 return (error); 2810 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2811 VATTR_NULL(&vattr); 2812 vattr.va_uid = uid; 2813 vattr.va_gid = gid; 2814 #ifdef MAC 2815 error = mac_vnode_check_setowner(cred, vp, vattr.va_uid, 2816 vattr.va_gid); 2817 if (error == 0) 2818 #endif 2819 error = VOP_SETATTR(vp, &vattr, cred); 2820 VOP_UNLOCK(vp, 0); 2821 vn_finished_write(mp); 2822 return (error); 2823 } 2824 2825 /* 2826 * Set ownership given a path name. 2827 */ 2828 #ifndef _SYS_SYSPROTO_H_ 2829 struct chown_args { 2830 char *path; 2831 int uid; 2832 int gid; 2833 }; 2834 #endif 2835 int 2836 sys_chown(struct thread *td, struct chown_args *uap) 2837 { 2838 2839 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->uid, 2840 uap->gid, 0)); 2841 } 2842 2843 #ifndef _SYS_SYSPROTO_H_ 2844 struct fchownat_args { 2845 int fd; 2846 const char * path; 2847 uid_t uid; 2848 gid_t gid; 2849 int flag; 2850 }; 2851 #endif 2852 int 2853 sys_fchownat(struct thread *td, struct fchownat_args *uap) 2854 { 2855 2856 if ((uap->flag & ~(AT_SYMLINK_NOFOLLOW | AT_BENEATH)) != 0) 2857 return (EINVAL); 2858 2859 return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid, 2860 uap->gid, uap->flag)); 2861 } 2862 2863 int 2864 kern_fchownat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 2865 int uid, int gid, int flag) 2866 { 2867 struct nameidata nd; 2868 int error, follow; 2869 2870 AUDIT_ARG_OWNER(uid, gid); 2871 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2872 follow |= (flag & AT_BENEATH) != 0 ? BENEATH : 0; 2873 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2874 &cap_fchown_rights, td); 2875 2876 if ((error = namei(&nd)) != 0) 2877 return (error); 2878 NDFREE(&nd, NDF_ONLY_PNBUF); 2879 error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid); 2880 vrele(nd.ni_vp); 2881 return (error); 2882 } 2883 2884 /* 2885 * Set ownership given a path name, do not cross symlinks. 2886 */ 2887 #ifndef _SYS_SYSPROTO_H_ 2888 struct lchown_args { 2889 char *path; 2890 int uid; 2891 int gid; 2892 }; 2893 #endif 2894 int 2895 sys_lchown(struct thread *td, struct lchown_args *uap) 2896 { 2897 2898 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2899 uap->uid, uap->gid, AT_SYMLINK_NOFOLLOW)); 2900 } 2901 2902 /* 2903 * Set ownership given a file descriptor. 2904 */ 2905 #ifndef _SYS_SYSPROTO_H_ 2906 struct fchown_args { 2907 int fd; 2908 int uid; 2909 int gid; 2910 }; 2911 #endif 2912 int 2913 sys_fchown(struct thread *td, struct fchown_args *uap) 2914 { 2915 struct file *fp; 2916 int error; 2917 2918 AUDIT_ARG_FD(uap->fd); 2919 AUDIT_ARG_OWNER(uap->uid, uap->gid); 2920 error = fget(td, uap->fd, &cap_fchown_rights, &fp); 2921 if (error != 0) 2922 return (error); 2923 error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td); 2924 fdrop(fp, td); 2925 return (error); 2926 } 2927 2928 /* 2929 * Common implementation code for utimes(), lutimes(), and futimes(). 2930 */ 2931 static int 2932 getutimes(const struct timeval *usrtvp, enum uio_seg tvpseg, 2933 struct timespec *tsp) 2934 { 2935 struct timeval tv[2]; 2936 const struct timeval *tvp; 2937 int error; 2938 2939 if (usrtvp == NULL) { 2940 vfs_timestamp(&tsp[0]); 2941 tsp[1] = tsp[0]; 2942 } else { 2943 if (tvpseg == UIO_SYSSPACE) { 2944 tvp = usrtvp; 2945 } else { 2946 if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0) 2947 return (error); 2948 tvp = tv; 2949 } 2950 2951 if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 || 2952 tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000) 2953 return (EINVAL); 2954 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 2955 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 2956 } 2957 return (0); 2958 } 2959 2960 /* 2961 * Common implementation code for futimens(), utimensat(). 2962 */ 2963 #define UTIMENS_NULL 0x1 2964 #define UTIMENS_EXIT 0x2 2965 static int 2966 getutimens(const struct timespec *usrtsp, enum uio_seg tspseg, 2967 struct timespec *tsp, int *retflags) 2968 { 2969 struct timespec tsnow; 2970 int error; 2971 2972 vfs_timestamp(&tsnow); 2973 *retflags = 0; 2974 if (usrtsp == NULL) { 2975 tsp[0] = tsnow; 2976 tsp[1] = tsnow; 2977 *retflags |= UTIMENS_NULL; 2978 return (0); 2979 } 2980 if (tspseg == UIO_SYSSPACE) { 2981 tsp[0] = usrtsp[0]; 2982 tsp[1] = usrtsp[1]; 2983 } else if ((error = copyin(usrtsp, tsp, sizeof(*tsp) * 2)) != 0) 2984 return (error); 2985 if (tsp[0].tv_nsec == UTIME_OMIT && tsp[1].tv_nsec == UTIME_OMIT) 2986 *retflags |= UTIMENS_EXIT; 2987 if (tsp[0].tv_nsec == UTIME_NOW && tsp[1].tv_nsec == UTIME_NOW) 2988 *retflags |= UTIMENS_NULL; 2989 if (tsp[0].tv_nsec == UTIME_OMIT) 2990 tsp[0].tv_sec = VNOVAL; 2991 else if (tsp[0].tv_nsec == UTIME_NOW) 2992 tsp[0] = tsnow; 2993 else if (tsp[0].tv_nsec < 0 || tsp[0].tv_nsec >= 1000000000L) 2994 return (EINVAL); 2995 if (tsp[1].tv_nsec == UTIME_OMIT) 2996 tsp[1].tv_sec = VNOVAL; 2997 else if (tsp[1].tv_nsec == UTIME_NOW) 2998 tsp[1] = tsnow; 2999 else if (tsp[1].tv_nsec < 0 || tsp[1].tv_nsec >= 1000000000L) 3000 return (EINVAL); 3001 3002 return (0); 3003 } 3004 3005 /* 3006 * Common implementation code for utimes(), lutimes(), futimes(), futimens(), 3007 * and utimensat(). 3008 */ 3009 static int 3010 setutimes(struct thread *td, struct vnode *vp, const struct timespec *ts, 3011 int numtimes, int nullflag) 3012 { 3013 struct mount *mp; 3014 struct vattr vattr; 3015 int error, setbirthtime; 3016 3017 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 3018 return (error); 3019 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3020 setbirthtime = 0; 3021 if (numtimes < 3 && !VOP_GETATTR(vp, &vattr, td->td_ucred) && 3022 timespeccmp(&ts[1], &vattr.va_birthtime, < )) 3023 setbirthtime = 1; 3024 VATTR_NULL(&vattr); 3025 vattr.va_atime = ts[0]; 3026 vattr.va_mtime = ts[1]; 3027 if (setbirthtime) 3028 vattr.va_birthtime = ts[1]; 3029 if (numtimes > 2) 3030 vattr.va_birthtime = ts[2]; 3031 if (nullflag) 3032 vattr.va_vaflags |= VA_UTIMES_NULL; 3033 #ifdef MAC 3034 error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime, 3035 vattr.va_mtime); 3036 #endif 3037 if (error == 0) 3038 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3039 VOP_UNLOCK(vp, 0); 3040 vn_finished_write(mp); 3041 return (error); 3042 } 3043 3044 /* 3045 * Set the access and modification times of a file. 3046 */ 3047 #ifndef _SYS_SYSPROTO_H_ 3048 struct utimes_args { 3049 char *path; 3050 struct timeval *tptr; 3051 }; 3052 #endif 3053 int 3054 sys_utimes(struct thread *td, struct utimes_args *uap) 3055 { 3056 3057 return (kern_utimesat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3058 uap->tptr, UIO_USERSPACE)); 3059 } 3060 3061 #ifndef _SYS_SYSPROTO_H_ 3062 struct futimesat_args { 3063 int fd; 3064 const char * path; 3065 const struct timeval * times; 3066 }; 3067 #endif 3068 int 3069 sys_futimesat(struct thread *td, struct futimesat_args *uap) 3070 { 3071 3072 return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE, 3073 uap->times, UIO_USERSPACE)); 3074 } 3075 3076 int 3077 kern_utimesat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 3078 struct timeval *tptr, enum uio_seg tptrseg) 3079 { 3080 struct nameidata nd; 3081 struct timespec ts[2]; 3082 int error; 3083 3084 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3085 return (error); 3086 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 3087 &cap_futimes_rights, td); 3088 3089 if ((error = namei(&nd)) != 0) 3090 return (error); 3091 NDFREE(&nd, NDF_ONLY_PNBUF); 3092 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3093 vrele(nd.ni_vp); 3094 return (error); 3095 } 3096 3097 /* 3098 * Set the access and modification times of a file. 3099 */ 3100 #ifndef _SYS_SYSPROTO_H_ 3101 struct lutimes_args { 3102 char *path; 3103 struct timeval *tptr; 3104 }; 3105 #endif 3106 int 3107 sys_lutimes(struct thread *td, struct lutimes_args *uap) 3108 { 3109 3110 return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr, 3111 UIO_USERSPACE)); 3112 } 3113 3114 int 3115 kern_lutimes(struct thread *td, char *path, enum uio_seg pathseg, 3116 struct timeval *tptr, enum uio_seg tptrseg) 3117 { 3118 struct timespec ts[2]; 3119 struct nameidata nd; 3120 int error; 3121 3122 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3123 return (error); 3124 NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, pathseg, path, td); 3125 if ((error = namei(&nd)) != 0) 3126 return (error); 3127 NDFREE(&nd, NDF_ONLY_PNBUF); 3128 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3129 vrele(nd.ni_vp); 3130 return (error); 3131 } 3132 3133 /* 3134 * Set the access and modification times of a file. 3135 */ 3136 #ifndef _SYS_SYSPROTO_H_ 3137 struct futimes_args { 3138 int fd; 3139 struct timeval *tptr; 3140 }; 3141 #endif 3142 int 3143 sys_futimes(struct thread *td, struct futimes_args *uap) 3144 { 3145 3146 return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE)); 3147 } 3148 3149 int 3150 kern_futimes(struct thread *td, int fd, struct timeval *tptr, 3151 enum uio_seg tptrseg) 3152 { 3153 struct timespec ts[2]; 3154 struct file *fp; 3155 int error; 3156 3157 AUDIT_ARG_FD(fd); 3158 error = getutimes(tptr, tptrseg, ts); 3159 if (error != 0) 3160 return (error); 3161 error = getvnode(td, fd, &cap_futimes_rights, &fp); 3162 if (error != 0) 3163 return (error); 3164 #ifdef AUDIT 3165 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3166 AUDIT_ARG_VNODE1(fp->f_vnode); 3167 VOP_UNLOCK(fp->f_vnode, 0); 3168 #endif 3169 error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL); 3170 fdrop(fp, td); 3171 return (error); 3172 } 3173 3174 int 3175 sys_futimens(struct thread *td, struct futimens_args *uap) 3176 { 3177 3178 return (kern_futimens(td, uap->fd, uap->times, UIO_USERSPACE)); 3179 } 3180 3181 int 3182 kern_futimens(struct thread *td, int fd, struct timespec *tptr, 3183 enum uio_seg tptrseg) 3184 { 3185 struct timespec ts[2]; 3186 struct file *fp; 3187 int error, flags; 3188 3189 AUDIT_ARG_FD(fd); 3190 error = getutimens(tptr, tptrseg, ts, &flags); 3191 if (error != 0) 3192 return (error); 3193 if (flags & UTIMENS_EXIT) 3194 return (0); 3195 error = getvnode(td, fd, &cap_futimes_rights, &fp); 3196 if (error != 0) 3197 return (error); 3198 #ifdef AUDIT 3199 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3200 AUDIT_ARG_VNODE1(fp->f_vnode); 3201 VOP_UNLOCK(fp->f_vnode, 0); 3202 #endif 3203 error = setutimes(td, fp->f_vnode, ts, 2, flags & UTIMENS_NULL); 3204 fdrop(fp, td); 3205 return (error); 3206 } 3207 3208 int 3209 sys_utimensat(struct thread *td, struct utimensat_args *uap) 3210 { 3211 3212 return (kern_utimensat(td, uap->fd, uap->path, UIO_USERSPACE, 3213 uap->times, UIO_USERSPACE, uap->flag)); 3214 } 3215 3216 int 3217 kern_utimensat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 3218 struct timespec *tptr, enum uio_seg tptrseg, int flag) 3219 { 3220 struct nameidata nd; 3221 struct timespec ts[2]; 3222 int error, flags; 3223 3224 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_BENEATH)) != 0) 3225 return (EINVAL); 3226 3227 if ((error = getutimens(tptr, tptrseg, ts, &flags)) != 0) 3228 return (error); 3229 NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : 3230 FOLLOW) | ((flag & AT_BENEATH) != 0 ? BENEATH : 0) | AUDITVNODE1, 3231 pathseg, path, fd, &cap_futimes_rights, td); 3232 if ((error = namei(&nd)) != 0) 3233 return (error); 3234 /* 3235 * We are allowed to call namei() regardless of 2xUTIME_OMIT. 3236 * POSIX states: 3237 * "If both tv_nsec fields are UTIME_OMIT... EACCESS may be detected." 3238 * "Search permission is denied by a component of the path prefix." 3239 */ 3240 NDFREE(&nd, NDF_ONLY_PNBUF); 3241 if ((flags & UTIMENS_EXIT) == 0) 3242 error = setutimes(td, nd.ni_vp, ts, 2, flags & UTIMENS_NULL); 3243 vrele(nd.ni_vp); 3244 return (error); 3245 } 3246 3247 /* 3248 * Truncate a file given its path name. 3249 */ 3250 #ifndef _SYS_SYSPROTO_H_ 3251 struct truncate_args { 3252 char *path; 3253 int pad; 3254 off_t length; 3255 }; 3256 #endif 3257 int 3258 sys_truncate(struct thread *td, struct truncate_args *uap) 3259 { 3260 3261 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3262 } 3263 3264 int 3265 kern_truncate(struct thread *td, char *path, enum uio_seg pathseg, off_t length) 3266 { 3267 struct mount *mp; 3268 struct vnode *vp; 3269 void *rl_cookie; 3270 struct vattr vattr; 3271 struct nameidata nd; 3272 int error; 3273 3274 if (length < 0) 3275 return(EINVAL); 3276 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, td); 3277 if ((error = namei(&nd)) != 0) 3278 return (error); 3279 vp = nd.ni_vp; 3280 rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); 3281 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { 3282 vn_rangelock_unlock(vp, rl_cookie); 3283 vrele(vp); 3284 return (error); 3285 } 3286 NDFREE(&nd, NDF_ONLY_PNBUF); 3287 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3288 if (vp->v_type == VDIR) 3289 error = EISDIR; 3290 #ifdef MAC 3291 else if ((error = mac_vnode_check_write(td->td_ucred, NOCRED, vp))) { 3292 } 3293 #endif 3294 else if ((error = vn_writechk(vp)) == 0 && 3295 (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) { 3296 VATTR_NULL(&vattr); 3297 vattr.va_size = length; 3298 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3299 } 3300 VOP_UNLOCK(vp, 0); 3301 vn_finished_write(mp); 3302 vn_rangelock_unlock(vp, rl_cookie); 3303 vrele(vp); 3304 return (error); 3305 } 3306 3307 #if defined(COMPAT_43) 3308 /* 3309 * Truncate a file given its path name. 3310 */ 3311 #ifndef _SYS_SYSPROTO_H_ 3312 struct otruncate_args { 3313 char *path; 3314 long length; 3315 }; 3316 #endif 3317 int 3318 otruncate(struct thread *td, struct otruncate_args *uap) 3319 { 3320 3321 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3322 } 3323 #endif /* COMPAT_43 */ 3324 3325 #if defined(COMPAT_FREEBSD6) 3326 /* Versions with the pad argument */ 3327 int 3328 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap) 3329 { 3330 3331 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3332 } 3333 3334 int 3335 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap) 3336 { 3337 3338 return (kern_ftruncate(td, uap->fd, uap->length)); 3339 } 3340 #endif 3341 3342 int 3343 kern_fsync(struct thread *td, int fd, bool fullsync) 3344 { 3345 struct vnode *vp; 3346 struct mount *mp; 3347 struct file *fp; 3348 int error, lock_flags; 3349 3350 AUDIT_ARG_FD(fd); 3351 error = getvnode(td, fd, &cap_fsync_rights, &fp); 3352 if (error != 0) 3353 return (error); 3354 vp = fp->f_vnode; 3355 #if 0 3356 if (!fullsync) 3357 /* XXXKIB: compete outstanding aio writes */; 3358 #endif 3359 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 3360 if (error != 0) 3361 goto drop; 3362 if (MNT_SHARED_WRITES(mp) || 3363 ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) { 3364 lock_flags = LK_SHARED; 3365 } else { 3366 lock_flags = LK_EXCLUSIVE; 3367 } 3368 vn_lock(vp, lock_flags | LK_RETRY); 3369 AUDIT_ARG_VNODE1(vp); 3370 if (vp->v_object != NULL) { 3371 VM_OBJECT_WLOCK(vp->v_object); 3372 vm_object_page_clean(vp->v_object, 0, 0, 0); 3373 VM_OBJECT_WUNLOCK(vp->v_object); 3374 } 3375 error = fullsync ? VOP_FSYNC(vp, MNT_WAIT, td) : VOP_FDATASYNC(vp, td); 3376 VOP_UNLOCK(vp, 0); 3377 vn_finished_write(mp); 3378 drop: 3379 fdrop(fp, td); 3380 return (error); 3381 } 3382 3383 /* 3384 * Sync an open file. 3385 */ 3386 #ifndef _SYS_SYSPROTO_H_ 3387 struct fsync_args { 3388 int fd; 3389 }; 3390 #endif 3391 int 3392 sys_fsync(struct thread *td, struct fsync_args *uap) 3393 { 3394 3395 return (kern_fsync(td, uap->fd, true)); 3396 } 3397 3398 int 3399 sys_fdatasync(struct thread *td, struct fdatasync_args *uap) 3400 { 3401 3402 return (kern_fsync(td, uap->fd, false)); 3403 } 3404 3405 /* 3406 * Rename files. Source and destination must either both be directories, or 3407 * both not be directories. If target is a directory, it must be empty. 3408 */ 3409 #ifndef _SYS_SYSPROTO_H_ 3410 struct rename_args { 3411 char *from; 3412 char *to; 3413 }; 3414 #endif 3415 int 3416 sys_rename(struct thread *td, struct rename_args *uap) 3417 { 3418 3419 return (kern_renameat(td, AT_FDCWD, uap->from, AT_FDCWD, 3420 uap->to, UIO_USERSPACE)); 3421 } 3422 3423 #ifndef _SYS_SYSPROTO_H_ 3424 struct renameat_args { 3425 int oldfd; 3426 char *old; 3427 int newfd; 3428 char *new; 3429 }; 3430 #endif 3431 int 3432 sys_renameat(struct thread *td, struct renameat_args *uap) 3433 { 3434 3435 return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new, 3436 UIO_USERSPACE)); 3437 } 3438 3439 int 3440 kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new, 3441 enum uio_seg pathseg) 3442 { 3443 struct mount *mp = NULL; 3444 struct vnode *tvp, *fvp, *tdvp; 3445 struct nameidata fromnd, tond; 3446 int error; 3447 3448 again: 3449 bwillwrite(); 3450 #ifdef MAC 3451 NDINIT_ATRIGHTS(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | 3452 AUDITVNODE1, pathseg, old, oldfd, 3453 &cap_renameat_source_rights, td); 3454 #else 3455 NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | AUDITVNODE1, 3456 pathseg, old, oldfd, 3457 &cap_renameat_source_rights, td); 3458 #endif 3459 3460 if ((error = namei(&fromnd)) != 0) 3461 return (error); 3462 #ifdef MAC 3463 error = mac_vnode_check_rename_from(td->td_ucred, fromnd.ni_dvp, 3464 fromnd.ni_vp, &fromnd.ni_cnd); 3465 VOP_UNLOCK(fromnd.ni_dvp, 0); 3466 if (fromnd.ni_dvp != fromnd.ni_vp) 3467 VOP_UNLOCK(fromnd.ni_vp, 0); 3468 #endif 3469 fvp = fromnd.ni_vp; 3470 NDINIT_ATRIGHTS(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | 3471 SAVESTART | AUDITVNODE2, pathseg, new, newfd, 3472 &cap_renameat_target_rights, td); 3473 if (fromnd.ni_vp->v_type == VDIR) 3474 tond.ni_cnd.cn_flags |= WILLBEDIR; 3475 if ((error = namei(&tond)) != 0) { 3476 /* Translate error code for rename("dir1", "dir2/."). */ 3477 if (error == EISDIR && fvp->v_type == VDIR) 3478 error = EINVAL; 3479 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3480 vrele(fromnd.ni_dvp); 3481 vrele(fvp); 3482 goto out1; 3483 } 3484 tdvp = tond.ni_dvp; 3485 tvp = tond.ni_vp; 3486 error = vn_start_write(fvp, &mp, V_NOWAIT); 3487 if (error != 0) { 3488 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3489 NDFREE(&tond, NDF_ONLY_PNBUF); 3490 if (tvp != NULL) 3491 vput(tvp); 3492 if (tdvp == tvp) 3493 vrele(tdvp); 3494 else 3495 vput(tdvp); 3496 vrele(fromnd.ni_dvp); 3497 vrele(fvp); 3498 vrele(tond.ni_startdir); 3499 if (fromnd.ni_startdir != NULL) 3500 vrele(fromnd.ni_startdir); 3501 error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH); 3502 if (error != 0) 3503 return (error); 3504 goto again; 3505 } 3506 if (tvp != NULL) { 3507 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3508 error = ENOTDIR; 3509 goto out; 3510 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3511 error = EISDIR; 3512 goto out; 3513 } 3514 #ifdef CAPABILITIES 3515 if (newfd != AT_FDCWD) { 3516 /* 3517 * If the target already exists we require CAP_UNLINKAT 3518 * from 'newfd'. 3519 */ 3520 error = cap_check(&tond.ni_filecaps.fc_rights, 3521 &cap_unlinkat_rights); 3522 if (error != 0) 3523 goto out; 3524 } 3525 #endif 3526 } 3527 if (fvp == tdvp) { 3528 error = EINVAL; 3529 goto out; 3530 } 3531 /* 3532 * If the source is the same as the destination (that is, if they 3533 * are links to the same vnode), then there is nothing to do. 3534 */ 3535 if (fvp == tvp) 3536 error = -1; 3537 #ifdef MAC 3538 else 3539 error = mac_vnode_check_rename_to(td->td_ucred, tdvp, 3540 tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd); 3541 #endif 3542 out: 3543 if (error == 0) { 3544 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3545 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); 3546 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3547 NDFREE(&tond, NDF_ONLY_PNBUF); 3548 } else { 3549 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3550 NDFREE(&tond, NDF_ONLY_PNBUF); 3551 if (tvp != NULL) 3552 vput(tvp); 3553 if (tdvp == tvp) 3554 vrele(tdvp); 3555 else 3556 vput(tdvp); 3557 vrele(fromnd.ni_dvp); 3558 vrele(fvp); 3559 } 3560 vrele(tond.ni_startdir); 3561 vn_finished_write(mp); 3562 out1: 3563 if (fromnd.ni_startdir) 3564 vrele(fromnd.ni_startdir); 3565 if (error == -1) 3566 return (0); 3567 return (error); 3568 } 3569 3570 /* 3571 * Make a directory file. 3572 */ 3573 #ifndef _SYS_SYSPROTO_H_ 3574 struct mkdir_args { 3575 char *path; 3576 int mode; 3577 }; 3578 #endif 3579 int 3580 sys_mkdir(struct thread *td, struct mkdir_args *uap) 3581 { 3582 3583 return (kern_mkdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3584 uap->mode)); 3585 } 3586 3587 #ifndef _SYS_SYSPROTO_H_ 3588 struct mkdirat_args { 3589 int fd; 3590 char *path; 3591 mode_t mode; 3592 }; 3593 #endif 3594 int 3595 sys_mkdirat(struct thread *td, struct mkdirat_args *uap) 3596 { 3597 3598 return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode)); 3599 } 3600 3601 int 3602 kern_mkdirat(struct thread *td, int fd, char *path, enum uio_seg segflg, 3603 int mode) 3604 { 3605 struct mount *mp; 3606 struct vnode *vp; 3607 struct vattr vattr; 3608 struct nameidata nd; 3609 int error; 3610 3611 AUDIT_ARG_MODE(mode); 3612 restart: 3613 bwillwrite(); 3614 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 3615 NOCACHE, segflg, path, fd, &cap_mkdirat_rights, 3616 td); 3617 nd.ni_cnd.cn_flags |= WILLBEDIR; 3618 if ((error = namei(&nd)) != 0) 3619 return (error); 3620 vp = nd.ni_vp; 3621 if (vp != NULL) { 3622 NDFREE(&nd, NDF_ONLY_PNBUF); 3623 /* 3624 * XXX namei called with LOCKPARENT but not LOCKLEAF has 3625 * the strange behaviour of leaving the vnode unlocked 3626 * if the target is the same vnode as the parent. 3627 */ 3628 if (vp == nd.ni_dvp) 3629 vrele(nd.ni_dvp); 3630 else 3631 vput(nd.ni_dvp); 3632 vrele(vp); 3633 return (EEXIST); 3634 } 3635 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3636 NDFREE(&nd, NDF_ONLY_PNBUF); 3637 vput(nd.ni_dvp); 3638 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3639 return (error); 3640 goto restart; 3641 } 3642 VATTR_NULL(&vattr); 3643 vattr.va_type = VDIR; 3644 vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask; 3645 #ifdef MAC 3646 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 3647 &vattr); 3648 if (error != 0) 3649 goto out; 3650 #endif 3651 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 3652 #ifdef MAC 3653 out: 3654 #endif 3655 NDFREE(&nd, NDF_ONLY_PNBUF); 3656 vput(nd.ni_dvp); 3657 if (error == 0) 3658 vput(nd.ni_vp); 3659 vn_finished_write(mp); 3660 return (error); 3661 } 3662 3663 /* 3664 * Remove a directory file. 3665 */ 3666 #ifndef _SYS_SYSPROTO_H_ 3667 struct rmdir_args { 3668 char *path; 3669 }; 3670 #endif 3671 int 3672 sys_rmdir(struct thread *td, struct rmdir_args *uap) 3673 { 3674 3675 return (kern_rmdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 0)); 3676 } 3677 3678 int 3679 kern_rmdirat(struct thread *td, int fd, char *path, enum uio_seg pathseg, 3680 int flag) 3681 { 3682 struct mount *mp; 3683 struct vnode *vp; 3684 struct nameidata nd; 3685 int error; 3686 3687 restart: 3688 bwillwrite(); 3689 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1 | 3690 ((flag & AT_BENEATH) != 0 ? BENEATH : 0), 3691 pathseg, path, fd, &cap_unlinkat_rights, td); 3692 if ((error = namei(&nd)) != 0) 3693 return (error); 3694 vp = nd.ni_vp; 3695 if (vp->v_type != VDIR) { 3696 error = ENOTDIR; 3697 goto out; 3698 } 3699 /* 3700 * No rmdir "." please. 3701 */ 3702 if (nd.ni_dvp == vp) { 3703 error = EINVAL; 3704 goto out; 3705 } 3706 /* 3707 * The root of a mounted filesystem cannot be deleted. 3708 */ 3709 if (vp->v_vflag & VV_ROOT) { 3710 error = EBUSY; 3711 goto out; 3712 } 3713 #ifdef MAC 3714 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 3715 &nd.ni_cnd); 3716 if (error != 0) 3717 goto out; 3718 #endif 3719 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3720 NDFREE(&nd, NDF_ONLY_PNBUF); 3721 vput(vp); 3722 if (nd.ni_dvp == vp) 3723 vrele(nd.ni_dvp); 3724 else 3725 vput(nd.ni_dvp); 3726 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3727 return (error); 3728 goto restart; 3729 } 3730 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 3731 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 3732 vn_finished_write(mp); 3733 out: 3734 NDFREE(&nd, NDF_ONLY_PNBUF); 3735 vput(vp); 3736 if (nd.ni_dvp == vp) 3737 vrele(nd.ni_dvp); 3738 else 3739 vput(nd.ni_dvp); 3740 return (error); 3741 } 3742 3743 #if defined(COMPAT_43) || defined(COMPAT_FREEBSD11) 3744 int 3745 freebsd11_kern_getdirentries(struct thread *td, int fd, char *ubuf, u_int count, 3746 long *basep, void (*func)(struct freebsd11_dirent *)) 3747 { 3748 struct freebsd11_dirent dstdp; 3749 struct dirent *dp, *edp; 3750 char *dirbuf; 3751 off_t base; 3752 ssize_t resid, ucount; 3753 int error; 3754 3755 /* XXX arbitrary sanity limit on `count'. */ 3756 count = min(count, 64 * 1024); 3757 3758 dirbuf = malloc(count, M_TEMP, M_WAITOK); 3759 3760 error = kern_getdirentries(td, fd, dirbuf, count, &base, &resid, 3761 UIO_SYSSPACE); 3762 if (error != 0) 3763 goto done; 3764 if (basep != NULL) 3765 *basep = base; 3766 3767 ucount = 0; 3768 for (dp = (struct dirent *)dirbuf, 3769 edp = (struct dirent *)&dirbuf[count - resid]; 3770 ucount < count && dp < edp; ) { 3771 if (dp->d_reclen == 0) 3772 break; 3773 MPASS(dp->d_reclen >= _GENERIC_DIRLEN(0)); 3774 if (dp->d_namlen >= sizeof(dstdp.d_name)) 3775 continue; 3776 dstdp.d_type = dp->d_type; 3777 dstdp.d_namlen = dp->d_namlen; 3778 dstdp.d_fileno = dp->d_fileno; /* truncate */ 3779 if (dstdp.d_fileno != dp->d_fileno) { 3780 switch (ino64_trunc_error) { 3781 default: 3782 case 0: 3783 break; 3784 case 1: 3785 error = EOVERFLOW; 3786 goto done; 3787 case 2: 3788 dstdp.d_fileno = UINT32_MAX; 3789 break; 3790 } 3791 } 3792 dstdp.d_reclen = sizeof(dstdp) - sizeof(dstdp.d_name) + 3793 ((dp->d_namlen + 1 + 3) &~ 3); 3794 bcopy(dp->d_name, dstdp.d_name, dstdp.d_namlen); 3795 bzero(dstdp.d_name + dstdp.d_namlen, 3796 dstdp.d_reclen - offsetof(struct freebsd11_dirent, d_name) - 3797 dstdp.d_namlen); 3798 MPASS(dstdp.d_reclen <= dp->d_reclen); 3799 MPASS(ucount + dstdp.d_reclen <= count); 3800 if (func != NULL) 3801 func(&dstdp); 3802 error = copyout(&dstdp, ubuf + ucount, dstdp.d_reclen); 3803 if (error != 0) 3804 break; 3805 dp = (struct dirent *)((char *)dp + dp->d_reclen); 3806 ucount += dstdp.d_reclen; 3807 } 3808 3809 done: 3810 free(dirbuf, M_TEMP); 3811 if (error == 0) 3812 td->td_retval[0] = ucount; 3813 return (error); 3814 } 3815 #endif /* COMPAT */ 3816 3817 #ifdef COMPAT_43 3818 static void 3819 ogetdirentries_cvt(struct freebsd11_dirent *dp) 3820 { 3821 #if (BYTE_ORDER == LITTLE_ENDIAN) 3822 /* 3823 * The expected low byte of dp->d_namlen is our dp->d_type. 3824 * The high MBZ byte of dp->d_namlen is our dp->d_namlen. 3825 */ 3826 dp->d_type = dp->d_namlen; 3827 dp->d_namlen = 0; 3828 #else 3829 /* 3830 * The dp->d_type is the high byte of the expected dp->d_namlen, 3831 * so must be zero'ed. 3832 */ 3833 dp->d_type = 0; 3834 #endif 3835 } 3836 3837 /* 3838 * Read a block of directory entries in a filesystem independent format. 3839 */ 3840 #ifndef _SYS_SYSPROTO_H_ 3841 struct ogetdirentries_args { 3842 int fd; 3843 char *buf; 3844 u_int count; 3845 long *basep; 3846 }; 3847 #endif 3848 int 3849 ogetdirentries(struct thread *td, struct ogetdirentries_args *uap) 3850 { 3851 long loff; 3852 int error; 3853 3854 error = kern_ogetdirentries(td, uap, &loff); 3855 if (error == 0) 3856 error = copyout(&loff, uap->basep, sizeof(long)); 3857 return (error); 3858 } 3859 3860 int 3861 kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap, 3862 long *ploff) 3863 { 3864 long base; 3865 int error; 3866 3867 /* XXX arbitrary sanity limit on `count'. */ 3868 if (uap->count > 64 * 1024) 3869 return (EINVAL); 3870 3871 error = freebsd11_kern_getdirentries(td, uap->fd, uap->buf, uap->count, 3872 &base, ogetdirentries_cvt); 3873 3874 if (error == 0 && uap->basep != NULL) 3875 error = copyout(&base, uap->basep, sizeof(long)); 3876 3877 return (error); 3878 } 3879 #endif /* COMPAT_43 */ 3880 3881 #if defined(COMPAT_FREEBSD11) 3882 #ifndef _SYS_SYSPROTO_H_ 3883 struct freebsd11_getdirentries_args { 3884 int fd; 3885 char *buf; 3886 u_int count; 3887 long *basep; 3888 }; 3889 #endif 3890 int 3891 freebsd11_getdirentries(struct thread *td, 3892 struct freebsd11_getdirentries_args *uap) 3893 { 3894 long base; 3895 int error; 3896 3897 error = freebsd11_kern_getdirentries(td, uap->fd, uap->buf, uap->count, 3898 &base, NULL); 3899 3900 if (error == 0 && uap->basep != NULL) 3901 error = copyout(&base, uap->basep, sizeof(long)); 3902 return (error); 3903 } 3904 3905 int 3906 freebsd11_getdents(struct thread *td, struct freebsd11_getdents_args *uap) 3907 { 3908 struct freebsd11_getdirentries_args ap; 3909 3910 ap.fd = uap->fd; 3911 ap.buf = uap->buf; 3912 ap.count = uap->count; 3913 ap.basep = NULL; 3914 return (freebsd11_getdirentries(td, &ap)); 3915 } 3916 #endif /* COMPAT_FREEBSD11 */ 3917 3918 /* 3919 * Read a block of directory entries in a filesystem independent format. 3920 */ 3921 int 3922 sys_getdirentries(struct thread *td, struct getdirentries_args *uap) 3923 { 3924 off_t base; 3925 int error; 3926 3927 error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base, 3928 NULL, UIO_USERSPACE); 3929 if (error != 0) 3930 return (error); 3931 if (uap->basep != NULL) 3932 error = copyout(&base, uap->basep, sizeof(off_t)); 3933 return (error); 3934 } 3935 3936 int 3937 kern_getdirentries(struct thread *td, int fd, char *buf, size_t count, 3938 off_t *basep, ssize_t *residp, enum uio_seg bufseg) 3939 { 3940 struct vnode *vp; 3941 struct file *fp; 3942 struct uio auio; 3943 struct iovec aiov; 3944 off_t loff; 3945 int error, eofflag; 3946 off_t foffset; 3947 3948 AUDIT_ARG_FD(fd); 3949 if (count > IOSIZE_MAX) 3950 return (EINVAL); 3951 auio.uio_resid = count; 3952 error = getvnode(td, fd, &cap_read_rights, &fp); 3953 if (error != 0) 3954 return (error); 3955 if ((fp->f_flag & FREAD) == 0) { 3956 fdrop(fp, td); 3957 return (EBADF); 3958 } 3959 vp = fp->f_vnode; 3960 foffset = foffset_lock(fp, 0); 3961 unionread: 3962 if (vp->v_type != VDIR) { 3963 error = EINVAL; 3964 goto fail; 3965 } 3966 aiov.iov_base = buf; 3967 aiov.iov_len = count; 3968 auio.uio_iov = &aiov; 3969 auio.uio_iovcnt = 1; 3970 auio.uio_rw = UIO_READ; 3971 auio.uio_segflg = bufseg; 3972 auio.uio_td = td; 3973 vn_lock(vp, LK_SHARED | LK_RETRY); 3974 AUDIT_ARG_VNODE1(vp); 3975 loff = auio.uio_offset = foffset; 3976 #ifdef MAC 3977 error = mac_vnode_check_readdir(td->td_ucred, vp); 3978 if (error == 0) 3979 #endif 3980 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, 3981 NULL); 3982 foffset = auio.uio_offset; 3983 if (error != 0) { 3984 VOP_UNLOCK(vp, 0); 3985 goto fail; 3986 } 3987 if (count == auio.uio_resid && 3988 (vp->v_vflag & VV_ROOT) && 3989 (vp->v_mount->mnt_flag & MNT_UNION)) { 3990 struct vnode *tvp = vp; 3991 3992 vp = vp->v_mount->mnt_vnodecovered; 3993 VREF(vp); 3994 fp->f_vnode = vp; 3995 fp->f_data = vp; 3996 foffset = 0; 3997 vput(tvp); 3998 goto unionread; 3999 } 4000 VOP_UNLOCK(vp, 0); 4001 *basep = loff; 4002 if (residp != NULL) 4003 *residp = auio.uio_resid; 4004 td->td_retval[0] = count - auio.uio_resid; 4005 fail: 4006 foffset_unlock(fp, foffset, 0); 4007 fdrop(fp, td); 4008 return (error); 4009 } 4010 4011 /* 4012 * Set the mode mask for creation of filesystem nodes. 4013 */ 4014 #ifndef _SYS_SYSPROTO_H_ 4015 struct umask_args { 4016 int newmask; 4017 }; 4018 #endif 4019 int 4020 sys_umask(struct thread *td, struct umask_args *uap) 4021 { 4022 struct filedesc *fdp; 4023 4024 fdp = td->td_proc->p_fd; 4025 FILEDESC_XLOCK(fdp); 4026 td->td_retval[0] = fdp->fd_cmask; 4027 fdp->fd_cmask = uap->newmask & ALLPERMS; 4028 FILEDESC_XUNLOCK(fdp); 4029 return (0); 4030 } 4031 4032 /* 4033 * Void all references to file by ripping underlying filesystem away from 4034 * vnode. 4035 */ 4036 #ifndef _SYS_SYSPROTO_H_ 4037 struct revoke_args { 4038 char *path; 4039 }; 4040 #endif 4041 int 4042 sys_revoke(struct thread *td, struct revoke_args *uap) 4043 { 4044 struct vnode *vp; 4045 struct vattr vattr; 4046 struct nameidata nd; 4047 int error; 4048 4049 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4050 uap->path, td); 4051 if ((error = namei(&nd)) != 0) 4052 return (error); 4053 vp = nd.ni_vp; 4054 NDFREE(&nd, NDF_ONLY_PNBUF); 4055 if (vp->v_type != VCHR || vp->v_rdev == NULL) { 4056 error = EINVAL; 4057 goto out; 4058 } 4059 #ifdef MAC 4060 error = mac_vnode_check_revoke(td->td_ucred, vp); 4061 if (error != 0) 4062 goto out; 4063 #endif 4064 error = VOP_GETATTR(vp, &vattr, td->td_ucred); 4065 if (error != 0) 4066 goto out; 4067 if (td->td_ucred->cr_uid != vattr.va_uid) { 4068 error = priv_check(td, PRIV_VFS_ADMIN); 4069 if (error != 0) 4070 goto out; 4071 } 4072 if (vcount(vp) > 1) 4073 VOP_REVOKE(vp, REVOKEALL); 4074 out: 4075 vput(vp); 4076 return (error); 4077 } 4078 4079 /* 4080 * Convert a user file descriptor to a kernel file entry and check that, if it 4081 * is a capability, the correct rights are present. A reference on the file 4082 * entry is held upon returning. 4083 */ 4084 int 4085 getvnode(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp) 4086 { 4087 struct file *fp; 4088 int error; 4089 4090 error = fget_unlocked(td->td_proc->p_fd, fd, rightsp, &fp, NULL); 4091 if (error != 0) 4092 return (error); 4093 4094 /* 4095 * The file could be not of the vnode type, or it may be not 4096 * yet fully initialized, in which case the f_vnode pointer 4097 * may be set, but f_ops is still badfileops. E.g., 4098 * devfs_open() transiently create such situation to 4099 * facilitate csw d_fdopen(). 4100 * 4101 * Dupfdopen() handling in kern_openat() installs the 4102 * half-baked file into the process descriptor table, allowing 4103 * other thread to dereference it. Guard against the race by 4104 * checking f_ops. 4105 */ 4106 if (fp->f_vnode == NULL || fp->f_ops == &badfileops) { 4107 fdrop(fp, td); 4108 return (EINVAL); 4109 } 4110 *fpp = fp; 4111 return (0); 4112 } 4113 4114 4115 /* 4116 * Get an (NFS) file handle. 4117 */ 4118 #ifndef _SYS_SYSPROTO_H_ 4119 struct lgetfh_args { 4120 char *fname; 4121 fhandle_t *fhp; 4122 }; 4123 #endif 4124 int 4125 sys_lgetfh(struct thread *td, struct lgetfh_args *uap) 4126 { 4127 struct nameidata nd; 4128 fhandle_t fh; 4129 struct vnode *vp; 4130 int error; 4131 4132 error = priv_check(td, PRIV_VFS_GETFH); 4133 if (error != 0) 4134 return (error); 4135 NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4136 uap->fname, td); 4137 error = namei(&nd); 4138 if (error != 0) 4139 return (error); 4140 NDFREE(&nd, NDF_ONLY_PNBUF); 4141 vp = nd.ni_vp; 4142 bzero(&fh, sizeof(fh)); 4143 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4144 error = VOP_VPTOFH(vp, &fh.fh_fid); 4145 vput(vp); 4146 if (error == 0) 4147 error = copyout(&fh, uap->fhp, sizeof (fh)); 4148 return (error); 4149 } 4150 4151 #ifndef _SYS_SYSPROTO_H_ 4152 struct getfh_args { 4153 char *fname; 4154 fhandle_t *fhp; 4155 }; 4156 #endif 4157 int 4158 sys_getfh(struct thread *td, struct getfh_args *uap) 4159 { 4160 struct nameidata nd; 4161 fhandle_t fh; 4162 struct vnode *vp; 4163 int error; 4164 4165 error = priv_check(td, PRIV_VFS_GETFH); 4166 if (error != 0) 4167 return (error); 4168 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4169 uap->fname, td); 4170 error = namei(&nd); 4171 if (error != 0) 4172 return (error); 4173 NDFREE(&nd, NDF_ONLY_PNBUF); 4174 vp = nd.ni_vp; 4175 bzero(&fh, sizeof(fh)); 4176 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4177 error = VOP_VPTOFH(vp, &fh.fh_fid); 4178 vput(vp); 4179 if (error == 0) 4180 error = copyout(&fh, uap->fhp, sizeof (fh)); 4181 return (error); 4182 } 4183 4184 /* 4185 * syscall for the rpc.lockd to use to translate a NFS file handle into an 4186 * open descriptor. 4187 * 4188 * warning: do not remove the priv_check() call or this becomes one giant 4189 * security hole. 4190 */ 4191 #ifndef _SYS_SYSPROTO_H_ 4192 struct fhopen_args { 4193 const struct fhandle *u_fhp; 4194 int flags; 4195 }; 4196 #endif 4197 int 4198 sys_fhopen(struct thread *td, struct fhopen_args *uap) 4199 { 4200 struct mount *mp; 4201 struct vnode *vp; 4202 struct fhandle fhp; 4203 struct file *fp; 4204 int fmode, error; 4205 int indx; 4206 4207 error = priv_check(td, PRIV_VFS_FHOPEN); 4208 if (error != 0) 4209 return (error); 4210 indx = -1; 4211 fmode = FFLAGS(uap->flags); 4212 /* why not allow a non-read/write open for our lockd? */ 4213 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 4214 return (EINVAL); 4215 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 4216 if (error != 0) 4217 return(error); 4218 /* find the mount point */ 4219 mp = vfs_busyfs(&fhp.fh_fsid); 4220 if (mp == NULL) 4221 return (ESTALE); 4222 /* now give me my vnode, it gets returned to me locked */ 4223 error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp); 4224 vfs_unbusy(mp); 4225 if (error != 0) 4226 return (error); 4227 4228 error = falloc_noinstall(td, &fp); 4229 if (error != 0) { 4230 vput(vp); 4231 return (error); 4232 } 4233 /* 4234 * An extra reference on `fp' has been held for us by 4235 * falloc_noinstall(). 4236 */ 4237 4238 #ifdef INVARIANTS 4239 td->td_dupfd = -1; 4240 #endif 4241 error = vn_open_vnode(vp, fmode, td->td_ucred, td, fp); 4242 if (error != 0) { 4243 KASSERT(fp->f_ops == &badfileops, 4244 ("VOP_OPEN in fhopen() set f_ops")); 4245 KASSERT(td->td_dupfd < 0, 4246 ("fhopen() encountered fdopen()")); 4247 4248 vput(vp); 4249 goto bad; 4250 } 4251 #ifdef INVARIANTS 4252 td->td_dupfd = 0; 4253 #endif 4254 fp->f_vnode = vp; 4255 fp->f_seqcount = 1; 4256 finit(fp, (fmode & FMASK) | (fp->f_flag & FHASLOCK), DTYPE_VNODE, vp, 4257 &vnops); 4258 VOP_UNLOCK(vp, 0); 4259 if ((fmode & O_TRUNC) != 0) { 4260 error = fo_truncate(fp, 0, td->td_ucred, td); 4261 if (error != 0) 4262 goto bad; 4263 } 4264 4265 error = finstall(td, fp, &indx, fmode, NULL); 4266 bad: 4267 fdrop(fp, td); 4268 td->td_retval[0] = indx; 4269 return (error); 4270 } 4271 4272 /* 4273 * Stat an (NFS) file handle. 4274 */ 4275 #ifndef _SYS_SYSPROTO_H_ 4276 struct fhstat_args { 4277 struct fhandle *u_fhp; 4278 struct stat *sb; 4279 }; 4280 #endif 4281 int 4282 sys_fhstat(struct thread *td, struct fhstat_args *uap) 4283 { 4284 struct stat sb; 4285 struct fhandle fh; 4286 int error; 4287 4288 error = copyin(uap->u_fhp, &fh, sizeof(fh)); 4289 if (error != 0) 4290 return (error); 4291 error = kern_fhstat(td, fh, &sb); 4292 if (error == 0) 4293 error = copyout(&sb, uap->sb, sizeof(sb)); 4294 return (error); 4295 } 4296 4297 int 4298 kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb) 4299 { 4300 struct mount *mp; 4301 struct vnode *vp; 4302 int error; 4303 4304 error = priv_check(td, PRIV_VFS_FHSTAT); 4305 if (error != 0) 4306 return (error); 4307 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4308 return (ESTALE); 4309 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4310 vfs_unbusy(mp); 4311 if (error != 0) 4312 return (error); 4313 error = vn_stat(vp, sb, td->td_ucred, NOCRED, td); 4314 vput(vp); 4315 return (error); 4316 } 4317 4318 /* 4319 * Implement fstatfs() for (NFS) file handles. 4320 */ 4321 #ifndef _SYS_SYSPROTO_H_ 4322 struct fhstatfs_args { 4323 struct fhandle *u_fhp; 4324 struct statfs *buf; 4325 }; 4326 #endif 4327 int 4328 sys_fhstatfs(struct thread *td, struct fhstatfs_args *uap) 4329 { 4330 struct statfs *sfp; 4331 fhandle_t fh; 4332 int error; 4333 4334 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4335 if (error != 0) 4336 return (error); 4337 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 4338 error = kern_fhstatfs(td, fh, sfp); 4339 if (error == 0) 4340 error = copyout(sfp, uap->buf, sizeof(*sfp)); 4341 free(sfp, M_STATFS); 4342 return (error); 4343 } 4344 4345 int 4346 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf) 4347 { 4348 struct statfs *sp; 4349 struct mount *mp; 4350 struct vnode *vp; 4351 int error; 4352 4353 error = priv_check(td, PRIV_VFS_FHSTATFS); 4354 if (error != 0) 4355 return (error); 4356 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4357 return (ESTALE); 4358 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4359 if (error != 0) { 4360 vfs_unbusy(mp); 4361 return (error); 4362 } 4363 vput(vp); 4364 error = prison_canseemount(td->td_ucred, mp); 4365 if (error != 0) 4366 goto out; 4367 #ifdef MAC 4368 error = mac_mount_check_stat(td->td_ucred, mp); 4369 if (error != 0) 4370 goto out; 4371 #endif 4372 /* 4373 * Set these in case the underlying filesystem fails to do so. 4374 */ 4375 sp = &mp->mnt_stat; 4376 sp->f_version = STATFS_VERSION; 4377 sp->f_namemax = NAME_MAX; 4378 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 4379 error = VFS_STATFS(mp, sp); 4380 if (error == 0) 4381 *buf = *sp; 4382 out: 4383 vfs_unbusy(mp); 4384 return (error); 4385 } 4386 4387 int 4388 kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len) 4389 { 4390 struct file *fp; 4391 struct mount *mp; 4392 struct vnode *vp; 4393 off_t olen, ooffset; 4394 int error; 4395 #ifdef AUDIT 4396 int audited_vnode1 = 0; 4397 #endif 4398 4399 AUDIT_ARG_FD(fd); 4400 if (offset < 0 || len <= 0) 4401 return (EINVAL); 4402 /* Check for wrap. */ 4403 if (offset > OFF_MAX - len) 4404 return (EFBIG); 4405 AUDIT_ARG_FD(fd); 4406 error = fget(td, fd, &cap_pwrite_rights, &fp); 4407 if (error != 0) 4408 return (error); 4409 AUDIT_ARG_FILE(td->td_proc, fp); 4410 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { 4411 error = ESPIPE; 4412 goto out; 4413 } 4414 if ((fp->f_flag & FWRITE) == 0) { 4415 error = EBADF; 4416 goto out; 4417 } 4418 if (fp->f_type != DTYPE_VNODE) { 4419 error = ENODEV; 4420 goto out; 4421 } 4422 vp = fp->f_vnode; 4423 if (vp->v_type != VREG) { 4424 error = ENODEV; 4425 goto out; 4426 } 4427 4428 /* Allocating blocks may take a long time, so iterate. */ 4429 for (;;) { 4430 olen = len; 4431 ooffset = offset; 4432 4433 bwillwrite(); 4434 mp = NULL; 4435 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 4436 if (error != 0) 4437 break; 4438 error = vn_lock(vp, LK_EXCLUSIVE); 4439 if (error != 0) { 4440 vn_finished_write(mp); 4441 break; 4442 } 4443 #ifdef AUDIT 4444 if (!audited_vnode1) { 4445 AUDIT_ARG_VNODE1(vp); 4446 audited_vnode1 = 1; 4447 } 4448 #endif 4449 #ifdef MAC 4450 error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp); 4451 if (error == 0) 4452 #endif 4453 error = VOP_ALLOCATE(vp, &offset, &len); 4454 VOP_UNLOCK(vp, 0); 4455 vn_finished_write(mp); 4456 4457 if (olen + ooffset != offset + len) { 4458 panic("offset + len changed from %jx/%jx to %jx/%jx", 4459 ooffset, olen, offset, len); 4460 } 4461 if (error != 0 || len == 0) 4462 break; 4463 KASSERT(olen > len, ("Iteration did not make progress?")); 4464 maybe_yield(); 4465 } 4466 out: 4467 fdrop(fp, td); 4468 return (error); 4469 } 4470 4471 int 4472 sys_posix_fallocate(struct thread *td, struct posix_fallocate_args *uap) 4473 { 4474 int error; 4475 4476 error = kern_posix_fallocate(td, uap->fd, uap->offset, uap->len); 4477 return (kern_posix_error(td, error)); 4478 } 4479 4480 /* 4481 * Unlike madvise(2), we do not make a best effort to remember every 4482 * possible caching hint. Instead, we remember the last setting with 4483 * the exception that we will allow POSIX_FADV_NORMAL to adjust the 4484 * region of any current setting. 4485 */ 4486 int 4487 kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len, 4488 int advice) 4489 { 4490 struct fadvise_info *fa, *new; 4491 struct file *fp; 4492 struct vnode *vp; 4493 off_t end; 4494 int error; 4495 4496 if (offset < 0 || len < 0 || offset > OFF_MAX - len) 4497 return (EINVAL); 4498 AUDIT_ARG_VALUE(advice); 4499 switch (advice) { 4500 case POSIX_FADV_SEQUENTIAL: 4501 case POSIX_FADV_RANDOM: 4502 case POSIX_FADV_NOREUSE: 4503 new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK); 4504 break; 4505 case POSIX_FADV_NORMAL: 4506 case POSIX_FADV_WILLNEED: 4507 case POSIX_FADV_DONTNEED: 4508 new = NULL; 4509 break; 4510 default: 4511 return (EINVAL); 4512 } 4513 /* XXX: CAP_POSIX_FADVISE? */ 4514 AUDIT_ARG_FD(fd); 4515 error = fget(td, fd, &cap_no_rights, &fp); 4516 if (error != 0) 4517 goto out; 4518 AUDIT_ARG_FILE(td->td_proc, fp); 4519 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { 4520 error = ESPIPE; 4521 goto out; 4522 } 4523 if (fp->f_type != DTYPE_VNODE) { 4524 error = ENODEV; 4525 goto out; 4526 } 4527 vp = fp->f_vnode; 4528 if (vp->v_type != VREG) { 4529 error = ENODEV; 4530 goto out; 4531 } 4532 if (len == 0) 4533 end = OFF_MAX; 4534 else 4535 end = offset + len - 1; 4536 switch (advice) { 4537 case POSIX_FADV_SEQUENTIAL: 4538 case POSIX_FADV_RANDOM: 4539 case POSIX_FADV_NOREUSE: 4540 /* 4541 * Try to merge any existing non-standard region with 4542 * this new region if possible, otherwise create a new 4543 * non-standard region for this request. 4544 */ 4545 mtx_pool_lock(mtxpool_sleep, fp); 4546 fa = fp->f_advice; 4547 if (fa != NULL && fa->fa_advice == advice && 4548 ((fa->fa_start <= end && fa->fa_end >= offset) || 4549 (end != OFF_MAX && fa->fa_start == end + 1) || 4550 (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) { 4551 if (offset < fa->fa_start) 4552 fa->fa_start = offset; 4553 if (end > fa->fa_end) 4554 fa->fa_end = end; 4555 } else { 4556 new->fa_advice = advice; 4557 new->fa_start = offset; 4558 new->fa_end = end; 4559 fp->f_advice = new; 4560 new = fa; 4561 } 4562 mtx_pool_unlock(mtxpool_sleep, fp); 4563 break; 4564 case POSIX_FADV_NORMAL: 4565 /* 4566 * If a the "normal" region overlaps with an existing 4567 * non-standard region, trim or remove the 4568 * non-standard region. 4569 */ 4570 mtx_pool_lock(mtxpool_sleep, fp); 4571 fa = fp->f_advice; 4572 if (fa != NULL) { 4573 if (offset <= fa->fa_start && end >= fa->fa_end) { 4574 new = fa; 4575 fp->f_advice = NULL; 4576 } else if (offset <= fa->fa_start && 4577 end >= fa->fa_start) 4578 fa->fa_start = end + 1; 4579 else if (offset <= fa->fa_end && end >= fa->fa_end) 4580 fa->fa_end = offset - 1; 4581 else if (offset >= fa->fa_start && end <= fa->fa_end) { 4582 /* 4583 * If the "normal" region is a middle 4584 * portion of the existing 4585 * non-standard region, just remove 4586 * the whole thing rather than picking 4587 * one side or the other to 4588 * preserve. 4589 */ 4590 new = fa; 4591 fp->f_advice = NULL; 4592 } 4593 } 4594 mtx_pool_unlock(mtxpool_sleep, fp); 4595 break; 4596 case POSIX_FADV_WILLNEED: 4597 case POSIX_FADV_DONTNEED: 4598 error = VOP_ADVISE(vp, offset, end, advice); 4599 break; 4600 } 4601 out: 4602 if (fp != NULL) 4603 fdrop(fp, td); 4604 free(new, M_FADVISE); 4605 return (error); 4606 } 4607 4608 int 4609 sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap) 4610 { 4611 int error; 4612 4613 error = kern_posix_fadvise(td, uap->fd, uap->offset, uap->len, 4614 uap->advice); 4615 return (kern_posix_error(td, error)); 4616 } 4617