1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/param.h> 29 #include <sys/t_lock.h> 30 #include <sys/systm.h> 31 #include <sys/sysmacros.h> 32 #include <sys/user.h> 33 #include <sys/time.h> 34 #include <sys/vfs.h> 35 #include <sys/vfs_opreg.h> 36 #include <sys/vnode.h> 37 #include <sys/file.h> 38 #include <sys/fcntl.h> 39 #include <sys/flock.h> 40 #include <sys/kmem.h> 41 #include <sys/uio.h> 42 #include <sys/errno.h> 43 #include <sys/stat.h> 44 #include <sys/cred.h> 45 #include <sys/dirent.h> 46 #include <sys/pathname.h> 47 #include <sys/vmsystm.h> 48 #include <sys/fs/tmp.h> 49 #include <sys/fs/tmpnode.h> 50 #include <sys/mman.h> 51 #include <vm/hat.h> 52 #include <vm/seg_vn.h> 53 #include <vm/seg_map.h> 54 #include <vm/seg.h> 55 #include <vm/anon.h> 56 #include <vm/as.h> 57 #include <vm/page.h> 58 #include <vm/pvn.h> 59 #include <sys/cmn_err.h> 60 #include <sys/debug.h> 61 #include <sys/swap.h> 62 #include <sys/buf.h> 63 #include <sys/vm.h> 64 #include <sys/vtrace.h> 65 #include <sys/policy.h> 66 #include <fs/fs_subr.h> 67 68 static int tmp_getapage(struct vnode *, u_offset_t, size_t, uint_t *, 69 page_t **, size_t, struct seg *, caddr_t, enum seg_rw, struct cred *); 70 static int tmp_putapage(struct vnode *, page_t *, u_offset_t *, size_t *, 71 int, struct cred *); 72 73 /* ARGSUSED1 */ 74 static int 75 tmp_open(struct vnode **vpp, int flag, struct cred *cred, caller_context_t *ct) 76 { 77 /* 78 * swapon to a tmpfs file is not supported so access 79 * is denied on open if VISSWAP is set. 80 */ 81 if ((*vpp)->v_flag & VISSWAP) 82 return (EINVAL); 83 return (0); 84 } 85 86 /* ARGSUSED1 */ 87 static int 88 tmp_close( 89 struct vnode *vp, 90 int flag, 91 int count, 92 offset_t offset, 93 struct cred *cred, 94 caller_context_t *ct) 95 { 96 cleanlocks(vp, ttoproc(curthread)->p_pid, 0); 97 cleanshares(vp, ttoproc(curthread)->p_pid); 98 return (0); 99 } 100 101 /* 102 * wrtmp does the real work of write requests for tmpfs. 103 */ 104 static int 105 wrtmp( 106 struct tmount *tm, 107 struct tmpnode *tp, 108 struct uio *uio, 109 struct cred *cr, 110 struct caller_context *ct) 111 { 112 pgcnt_t pageoffset; /* offset in pages */ 113 ulong_t segmap_offset; /* pagesize byte offset into segmap */ 114 caddr_t base; /* base of segmap */ 115 ssize_t bytes; /* bytes to uiomove */ 116 pfn_t pagenumber; /* offset in pages into tmp file */ 117 struct vnode *vp; 118 int error = 0; 119 int pagecreate; /* == 1 if we allocated a page */ 120 int newpage; 121 rlim64_t limit = uio->uio_llimit; 122 long oresid = uio->uio_resid; 123 timestruc_t now; 124 125 long tn_size_changed = 0; 126 long old_tn_size; 127 long new_tn_size; 128 129 vp = TNTOV(tp); 130 ASSERT(vp->v_type == VREG); 131 132 TRACE_1(TR_FAC_TMPFS, TR_TMPFS_RWTMP_START, 133 "tmp_wrtmp_start:vp %p", vp); 134 135 ASSERT(RW_WRITE_HELD(&tp->tn_contents)); 136 ASSERT(RW_WRITE_HELD(&tp->tn_rwlock)); 137 138 if (MANDLOCK(vp, tp->tn_mode)) { 139 rw_exit(&tp->tn_contents); 140 /* 141 * tmp_getattr ends up being called by chklock 142 */ 143 error = chklock(vp, FWRITE, uio->uio_loffset, uio->uio_resid, 144 uio->uio_fmode, ct); 145 rw_enter(&tp->tn_contents, RW_WRITER); 146 if (error != 0) { 147 TRACE_2(TR_FAC_TMPFS, TR_TMPFS_RWTMP_END, 148 "tmp_wrtmp_end:vp %p error %d", vp, error); 149 return (error); 150 } 151 } 152 153 if (uio->uio_loffset < 0) 154 return (EINVAL); 155 156 if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T) 157 limit = MAXOFFSET_T; 158 159 if (uio->uio_loffset >= limit) { 160 proc_t *p = ttoproc(curthread); 161 162 mutex_enter(&p->p_lock); 163 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], p->p_rctls, 164 p, RCA_UNSAFE_SIGINFO); 165 mutex_exit(&p->p_lock); 166 return (EFBIG); 167 } 168 169 if (uio->uio_loffset >= MAXOFF_T) { 170 TRACE_2(TR_FAC_TMPFS, TR_TMPFS_RWTMP_END, 171 "tmp_wrtmp_end:vp %p error %d", vp, EINVAL); 172 return (EFBIG); 173 } 174 175 if (uio->uio_resid == 0) { 176 TRACE_2(TR_FAC_TMPFS, TR_TMPFS_RWTMP_END, 177 "tmp_wrtmp_end:vp %p error %d", vp, 0); 178 return (0); 179 } 180 181 if (limit > MAXOFF_T) 182 limit = MAXOFF_T; 183 184 do { 185 long offset; 186 long delta; 187 188 offset = (long)uio->uio_offset; 189 pageoffset = offset & PAGEOFFSET; 190 /* 191 * A maximum of PAGESIZE bytes of data is transferred 192 * each pass through this loop 193 */ 194 bytes = MIN(PAGESIZE - pageoffset, uio->uio_resid); 195 196 if (offset + bytes >= limit) { 197 if (offset >= limit) { 198 error = EFBIG; 199 goto out; 200 } 201 bytes = limit - offset; 202 } 203 pagenumber = btop(offset); 204 205 /* 206 * delta is the amount of anonymous memory 207 * to reserve for the file. 208 * We always reserve in pagesize increments so 209 * unless we're extending the file into a new page, 210 * we don't need to call tmp_resv. 211 */ 212 delta = offset + bytes - 213 P2ROUNDUP_TYPED(tp->tn_size, PAGESIZE, u_offset_t); 214 if (delta > 0) { 215 pagecreate = 1; 216 if (tmp_resv(tm, tp, delta, pagecreate)) { 217 /* 218 * Log file system full in the zone that owns 219 * the tmpfs mount, as well as in the global 220 * zone if necessary. 221 */ 222 zcmn_err(tm->tm_vfsp->vfs_zone->zone_id, 223 CE_WARN, "%s: File system full, " 224 "swap space limit exceeded", 225 tm->tm_mntpath); 226 227 if (tm->tm_vfsp->vfs_zone->zone_id != 228 GLOBAL_ZONEID) { 229 230 vfs_t *vfs = tm->tm_vfsp; 231 232 zcmn_err(GLOBAL_ZONEID, 233 CE_WARN, "%s: File system full, " 234 "swap space limit exceeded", 235 vfs->vfs_vnodecovered->v_path); 236 } 237 error = ENOSPC; 238 break; 239 } 240 tmpnode_growmap(tp, (ulong_t)offset + bytes); 241 } 242 /* grow the file to the new length */ 243 if (offset + bytes > tp->tn_size) { 244 tn_size_changed = 1; 245 old_tn_size = tp->tn_size; 246 /* 247 * Postpone updating tp->tn_size until uiomove() is 248 * done. 249 */ 250 new_tn_size = offset + bytes; 251 } 252 if (bytes == PAGESIZE) { 253 /* 254 * Writing whole page so reading from disk 255 * is a waste 256 */ 257 pagecreate = 1; 258 } else { 259 pagecreate = 0; 260 } 261 /* 262 * If writing past EOF or filling in a hole 263 * we need to allocate an anon slot. 264 */ 265 if (anon_get_ptr(tp->tn_anon, pagenumber) == NULL) { 266 (void) anon_set_ptr(tp->tn_anon, pagenumber, 267 anon_alloc(vp, ptob(pagenumber)), ANON_SLEEP); 268 pagecreate = 1; 269 tp->tn_nblocks++; 270 } 271 272 /* 273 * We have to drop the contents lock to allow the VM 274 * system to reacquire it in tmp_getpage() 275 */ 276 rw_exit(&tp->tn_contents); 277 278 /* 279 * Touch the page and fault it in if it is not in core 280 * before segmap_getmapflt or vpm_data_copy can lock it. 281 * This is to avoid the deadlock if the buffer is mapped 282 * to the same file through mmap which we want to write. 283 */ 284 uio_prefaultpages((long)bytes, uio); 285 286 newpage = 0; 287 if (vpm_enable) { 288 /* 289 * Copy data. If new pages are created, part of 290 * the page that is not written will be initizliazed 291 * with zeros. 292 */ 293 error = vpm_data_copy(vp, offset, bytes, uio, 294 !pagecreate, &newpage, 1, S_WRITE); 295 } else { 296 /* Get offset within the segmap mapping */ 297 segmap_offset = (offset & PAGEMASK) & MAXBOFFSET; 298 base = segmap_getmapflt(segkmap, vp, 299 (offset & MAXBMASK), PAGESIZE, !pagecreate, 300 S_WRITE); 301 } 302 303 304 if (!vpm_enable && pagecreate) { 305 /* 306 * segmap_pagecreate() returns 1 if it calls 307 * page_create_va() to allocate any pages. 308 */ 309 newpage = segmap_pagecreate(segkmap, 310 base + segmap_offset, (size_t)PAGESIZE, 0); 311 /* 312 * Clear from the beginning of the page to the starting 313 * offset of the data. 314 */ 315 if (pageoffset != 0) 316 (void) kzero(base + segmap_offset, 317 (size_t)pageoffset); 318 } 319 320 if (!vpm_enable) { 321 error = uiomove(base + segmap_offset + pageoffset, 322 (long)bytes, UIO_WRITE, uio); 323 } 324 325 if (!vpm_enable && pagecreate && 326 uio->uio_offset < P2ROUNDUP(offset + bytes, PAGESIZE)) { 327 long zoffset; /* zero from offset into page */ 328 /* 329 * We created pages w/o initializing them completely, 330 * thus we need to zero the part that wasn't set up. 331 * This happens on most EOF write cases and if 332 * we had some sort of error during the uiomove. 333 */ 334 long nmoved; 335 336 nmoved = uio->uio_offset - offset; 337 ASSERT((nmoved + pageoffset) <= PAGESIZE); 338 339 /* 340 * Zero from the end of data in the page to the 341 * end of the page. 342 */ 343 if ((zoffset = pageoffset + nmoved) < PAGESIZE) 344 (void) kzero(base + segmap_offset + zoffset, 345 (size_t)PAGESIZE - zoffset); 346 } 347 348 /* 349 * Unlock the pages which have been allocated by 350 * page_create_va() in segmap_pagecreate() 351 */ 352 if (!vpm_enable && newpage) { 353 segmap_pageunlock(segkmap, base + segmap_offset, 354 (size_t)PAGESIZE, S_WRITE); 355 } 356 357 if (error) { 358 /* 359 * If we failed on a write, we must 360 * be sure to invalidate any pages that may have 361 * been allocated. 362 */ 363 if (vpm_enable) { 364 (void) vpm_sync_pages(vp, offset, PAGESIZE, 365 SM_INVAL); 366 } else { 367 (void) segmap_release(segkmap, base, SM_INVAL); 368 } 369 } else { 370 if (vpm_enable) { 371 error = vpm_sync_pages(vp, offset, PAGESIZE, 372 0); 373 } else { 374 error = segmap_release(segkmap, base, 0); 375 } 376 } 377 378 /* 379 * Re-acquire contents lock. 380 */ 381 rw_enter(&tp->tn_contents, RW_WRITER); 382 383 /* 384 * Update tn_size. 385 */ 386 if (tn_size_changed) 387 tp->tn_size = new_tn_size; 388 389 /* 390 * If the uiomove failed, fix up tn_size. 391 */ 392 if (error) { 393 if (tn_size_changed) { 394 /* 395 * The uiomove failed, and we 396 * allocated blocks,so get rid 397 * of them. 398 */ 399 (void) tmpnode_trunc(tm, tp, 400 (ulong_t)old_tn_size); 401 } 402 } else { 403 /* 404 * XXX - Can this be out of the loop? 405 */ 406 if ((tp->tn_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) && 407 (tp->tn_mode & (S_ISUID | S_ISGID)) && 408 secpolicy_vnode_setid_retain(cr, 409 (tp->tn_mode & S_ISUID) != 0 && tp->tn_uid == 0)) { 410 /* 411 * Clear Set-UID & Set-GID bits on 412 * successful write if not privileged 413 * and at least one of the execute bits 414 * is set. If we always clear Set-GID, 415 * mandatory file and record locking is 416 * unuseable. 417 */ 418 tp->tn_mode &= ~(S_ISUID | S_ISGID); 419 } 420 gethrestime(&now); 421 tp->tn_mtime = now; 422 tp->tn_ctime = now; 423 } 424 } while (error == 0 && uio->uio_resid > 0 && bytes != 0); 425 426 out: 427 /* 428 * If we've already done a partial-write, terminate 429 * the write but return no error. 430 */ 431 if (oresid != uio->uio_resid) 432 error = 0; 433 TRACE_2(TR_FAC_TMPFS, TR_TMPFS_RWTMP_END, 434 "tmp_wrtmp_end:vp %p error %d", vp, error); 435 return (error); 436 } 437 438 /* 439 * rdtmp does the real work of read requests for tmpfs. 440 */ 441 static int 442 rdtmp( 443 struct tmount *tm, 444 struct tmpnode *tp, 445 struct uio *uio, 446 struct caller_context *ct) 447 { 448 ulong_t pageoffset; /* offset in tmpfs file (uio_offset) */ 449 ulong_t segmap_offset; /* pagesize byte offset into segmap */ 450 caddr_t base; /* base of segmap */ 451 ssize_t bytes; /* bytes to uiomove */ 452 struct vnode *vp; 453 int error; 454 long oresid = uio->uio_resid; 455 456 #if defined(lint) 457 tm = tm; 458 #endif 459 vp = TNTOV(tp); 460 461 TRACE_1(TR_FAC_TMPFS, TR_TMPFS_RWTMP_START, "tmp_rdtmp_start:vp %p", 462 vp); 463 464 ASSERT(RW_LOCK_HELD(&tp->tn_contents)); 465 466 if (MANDLOCK(vp, tp->tn_mode)) { 467 rw_exit(&tp->tn_contents); 468 /* 469 * tmp_getattr ends up being called by chklock 470 */ 471 error = chklock(vp, FREAD, uio->uio_loffset, uio->uio_resid, 472 uio->uio_fmode, ct); 473 rw_enter(&tp->tn_contents, RW_READER); 474 if (error != 0) { 475 TRACE_2(TR_FAC_TMPFS, TR_TMPFS_RWTMP_END, 476 "tmp_rdtmp_end:vp %p error %d", vp, error); 477 return (error); 478 } 479 } 480 ASSERT(tp->tn_type == VREG); 481 482 if (uio->uio_loffset >= MAXOFF_T) { 483 TRACE_2(TR_FAC_TMPFS, TR_TMPFS_RWTMP_END, 484 "tmp_rdtmp_end:vp %p error %d", vp, EINVAL); 485 return (0); 486 } 487 if (uio->uio_loffset < 0) 488 return (EINVAL); 489 if (uio->uio_resid == 0) { 490 TRACE_2(TR_FAC_TMPFS, TR_TMPFS_RWTMP_END, 491 "tmp_rdtmp_end:vp %p error %d", vp, 0); 492 return (0); 493 } 494 495 vp = TNTOV(tp); 496 497 do { 498 long diff; 499 long offset; 500 501 offset = uio->uio_offset; 502 pageoffset = offset & PAGEOFFSET; 503 bytes = MIN(PAGESIZE - pageoffset, uio->uio_resid); 504 505 diff = tp->tn_size - offset; 506 507 if (diff <= 0) { 508 error = 0; 509 goto out; 510 } 511 if (diff < bytes) 512 bytes = diff; 513 514 /* 515 * We have to drop the contents lock to allow the VM system 516 * to reacquire it in tmp_getpage() should the uiomove cause a 517 * pagefault. 518 */ 519 rw_exit(&tp->tn_contents); 520 521 if (vpm_enable) { 522 /* 523 * Copy data. 524 */ 525 error = vpm_data_copy(vp, offset, bytes, uio, 1, NULL, 526 0, S_READ); 527 } else { 528 segmap_offset = (offset & PAGEMASK) & MAXBOFFSET; 529 base = segmap_getmapflt(segkmap, vp, offset & MAXBMASK, 530 bytes, 1, S_READ); 531 532 error = uiomove(base + segmap_offset + pageoffset, 533 (long)bytes, UIO_READ, uio); 534 } 535 536 if (error) { 537 if (vpm_enable) { 538 (void) vpm_sync_pages(vp, offset, PAGESIZE, 0); 539 } else { 540 (void) segmap_release(segkmap, base, 0); 541 } 542 } else { 543 if (vpm_enable) { 544 error = vpm_sync_pages(vp, offset, PAGESIZE, 545 0); 546 } else { 547 error = segmap_release(segkmap, base, 0); 548 } 549 } 550 551 /* 552 * Re-acquire contents lock. 553 */ 554 rw_enter(&tp->tn_contents, RW_READER); 555 556 } while (error == 0 && uio->uio_resid > 0); 557 558 out: 559 gethrestime(&tp->tn_atime); 560 561 /* 562 * If we've already done a partial read, terminate 563 * the read but return no error. 564 */ 565 if (oresid != uio->uio_resid) 566 error = 0; 567 568 TRACE_2(TR_FAC_TMPFS, TR_TMPFS_RWTMP_END, 569 "tmp_rdtmp_end:vp %x error %d", vp, error); 570 return (error); 571 } 572 573 /* ARGSUSED2 */ 574 static int 575 tmp_read(struct vnode *vp, struct uio *uiop, int ioflag, cred_t *cred, 576 struct caller_context *ct) 577 { 578 struct tmpnode *tp = (struct tmpnode *)VTOTN(vp); 579 struct tmount *tm = (struct tmount *)VTOTM(vp); 580 int error; 581 582 /* 583 * We don't currently support reading non-regular files 584 */ 585 if (vp->v_type == VDIR) 586 return (EISDIR); 587 if (vp->v_type != VREG) 588 return (EINVAL); 589 /* 590 * tmp_rwlock should have already been called from layers above 591 */ 592 ASSERT(RW_READ_HELD(&tp->tn_rwlock)); 593 594 rw_enter(&tp->tn_contents, RW_READER); 595 596 error = rdtmp(tm, tp, uiop, ct); 597 598 rw_exit(&tp->tn_contents); 599 600 return (error); 601 } 602 603 static int 604 tmp_write(struct vnode *vp, struct uio *uiop, int ioflag, struct cred *cred, 605 struct caller_context *ct) 606 { 607 struct tmpnode *tp = (struct tmpnode *)VTOTN(vp); 608 struct tmount *tm = (struct tmount *)VTOTM(vp); 609 int error; 610 611 /* 612 * We don't currently support writing to non-regular files 613 */ 614 if (vp->v_type != VREG) 615 return (EINVAL); /* XXX EISDIR? */ 616 617 /* 618 * tmp_rwlock should have already been called from layers above 619 */ 620 ASSERT(RW_WRITE_HELD(&tp->tn_rwlock)); 621 622 rw_enter(&tp->tn_contents, RW_WRITER); 623 624 if (ioflag & FAPPEND) { 625 /* 626 * In append mode start at end of file. 627 */ 628 uiop->uio_loffset = tp->tn_size; 629 } 630 631 error = wrtmp(tm, tp, uiop, cred, ct); 632 633 rw_exit(&tp->tn_contents); 634 635 return (error); 636 } 637 638 /* ARGSUSED */ 639 static int 640 tmp_ioctl( 641 struct vnode *vp, 642 int com, 643 intptr_t data, 644 int flag, 645 struct cred *cred, 646 int *rvalp, 647 caller_context_t *ct) 648 { 649 return (ENOTTY); 650 } 651 652 /* ARGSUSED2 */ 653 static int 654 tmp_getattr( 655 struct vnode *vp, 656 struct vattr *vap, 657 int flags, 658 struct cred *cred, 659 caller_context_t *ct) 660 { 661 struct tmpnode *tp = (struct tmpnode *)VTOTN(vp); 662 struct vnode *mvp; 663 struct vattr va; 664 int attrs = 1; 665 666 /* 667 * A special case to handle the root tnode on a diskless nfs 668 * client who may have had its uid and gid inherited 669 * from an nfs vnode with nobody ownership. Likely the 670 * root filesystem. After nfs is fully functional the uid/gid 671 * may be mapable so ask again. 672 * vfsp can't get unmounted because we hold vp. 673 */ 674 if (vp->v_flag & VROOT && 675 (mvp = vp->v_vfsp->vfs_vnodecovered) != NULL) { 676 mutex_enter(&tp->tn_tlock); 677 if (tp->tn_uid == UID_NOBODY || tp->tn_gid == GID_NOBODY) { 678 mutex_exit(&tp->tn_tlock); 679 bzero(&va, sizeof (struct vattr)); 680 va.va_mask = AT_UID|AT_GID; 681 attrs = VOP_GETATTR(mvp, &va, 0, cred, ct); 682 } else { 683 mutex_exit(&tp->tn_tlock); 684 } 685 } 686 mutex_enter(&tp->tn_tlock); 687 if (attrs == 0) { 688 tp->tn_uid = va.va_uid; 689 tp->tn_gid = va.va_gid; 690 } 691 vap->va_type = vp->v_type; 692 vap->va_mode = tp->tn_mode & MODEMASK; 693 vap->va_uid = tp->tn_uid; 694 vap->va_gid = tp->tn_gid; 695 vap->va_fsid = tp->tn_fsid; 696 vap->va_nodeid = (ino64_t)tp->tn_nodeid; 697 vap->va_nlink = tp->tn_nlink; 698 vap->va_size = (u_offset_t)tp->tn_size; 699 vap->va_atime = tp->tn_atime; 700 vap->va_mtime = tp->tn_mtime; 701 vap->va_ctime = tp->tn_ctime; 702 vap->va_blksize = PAGESIZE; 703 vap->va_rdev = tp->tn_rdev; 704 vap->va_seq = tp->tn_seq; 705 706 /* 707 * XXX Holes are not taken into account. We could take the time to 708 * run through the anon array looking for allocated slots... 709 */ 710 vap->va_nblocks = (fsblkcnt64_t)btodb(ptob(btopr(vap->va_size))); 711 mutex_exit(&tp->tn_tlock); 712 return (0); 713 } 714 715 /*ARGSUSED4*/ 716 static int 717 tmp_setattr( 718 struct vnode *vp, 719 struct vattr *vap, 720 int flags, 721 struct cred *cred, 722 caller_context_t *ct) 723 { 724 struct tmount *tm = (struct tmount *)VTOTM(vp); 725 struct tmpnode *tp = (struct tmpnode *)VTOTN(vp); 726 int error = 0; 727 struct vattr *get; 728 long mask; 729 730 /* 731 * Cannot set these attributes 732 */ 733 if ((vap->va_mask & AT_NOSET) || (vap->va_mask & AT_XVATTR)) 734 return (EINVAL); 735 736 mutex_enter(&tp->tn_tlock); 737 738 get = &tp->tn_attr; 739 /* 740 * Change file access modes. Must be owner or have sufficient 741 * privileges. 742 */ 743 error = secpolicy_vnode_setattr(cred, vp, vap, get, flags, tmp_taccess, 744 tp); 745 746 if (error) 747 goto out; 748 749 mask = vap->va_mask; 750 751 if (mask & AT_MODE) { 752 get->va_mode &= S_IFMT; 753 get->va_mode |= vap->va_mode & ~S_IFMT; 754 } 755 756 if (mask & AT_UID) 757 get->va_uid = vap->va_uid; 758 if (mask & AT_GID) 759 get->va_gid = vap->va_gid; 760 if (mask & AT_ATIME) 761 get->va_atime = vap->va_atime; 762 if (mask & AT_MTIME) 763 get->va_mtime = vap->va_mtime; 764 765 if (mask & (AT_UID | AT_GID | AT_MODE | AT_MTIME)) 766 gethrestime(&tp->tn_ctime); 767 768 if (mask & AT_SIZE) { 769 ASSERT(vp->v_type != VDIR); 770 771 /* Don't support large files. */ 772 if (vap->va_size > MAXOFF_T) { 773 error = EFBIG; 774 goto out; 775 } 776 mutex_exit(&tp->tn_tlock); 777 778 rw_enter(&tp->tn_rwlock, RW_WRITER); 779 rw_enter(&tp->tn_contents, RW_WRITER); 780 error = tmpnode_trunc(tm, tp, (ulong_t)vap->va_size); 781 rw_exit(&tp->tn_contents); 782 rw_exit(&tp->tn_rwlock); 783 goto out1; 784 } 785 out: 786 mutex_exit(&tp->tn_tlock); 787 out1: 788 return (error); 789 } 790 791 /* ARGSUSED2 */ 792 static int 793 tmp_access( 794 struct vnode *vp, 795 int mode, 796 int flags, 797 struct cred *cred, 798 caller_context_t *ct) 799 { 800 struct tmpnode *tp = (struct tmpnode *)VTOTN(vp); 801 int error; 802 803 mutex_enter(&tp->tn_tlock); 804 error = tmp_taccess(tp, mode, cred); 805 mutex_exit(&tp->tn_tlock); 806 return (error); 807 } 808 809 /* ARGSUSED3 */ 810 static int 811 tmp_lookup( 812 struct vnode *dvp, 813 char *nm, 814 struct vnode **vpp, 815 struct pathname *pnp, 816 int flags, 817 struct vnode *rdir, 818 struct cred *cred, 819 caller_context_t *ct, 820 int *direntflags, 821 pathname_t *realpnp) 822 { 823 struct tmpnode *tp = (struct tmpnode *)VTOTN(dvp); 824 struct tmpnode *ntp = NULL; 825 int error; 826 827 828 /* allow cd into @ dir */ 829 if (flags & LOOKUP_XATTR) { 830 struct tmpnode *xdp; 831 struct tmount *tm; 832 833 /* 834 * don't allow attributes if not mounted XATTR support 835 */ 836 if (!(dvp->v_vfsp->vfs_flag & VFS_XATTR)) 837 return (EINVAL); 838 839 if (tp->tn_flags & ISXATTR) 840 /* No attributes on attributes */ 841 return (EINVAL); 842 843 rw_enter(&tp->tn_rwlock, RW_WRITER); 844 if (tp->tn_xattrdp == NULL) { 845 if (!(flags & CREATE_XATTR_DIR)) { 846 rw_exit(&tp->tn_rwlock); 847 return (ENOENT); 848 } 849 850 /* 851 * No attribute directory exists for this 852 * node - create the attr dir as a side effect 853 * of this lookup. 854 */ 855 856 /* 857 * Make sure we have adequate permission... 858 */ 859 860 if ((error = tmp_taccess(tp, VWRITE, cred)) != 0) { 861 rw_exit(&tp->tn_rwlock); 862 return (error); 863 } 864 865 xdp = tmp_memalloc(sizeof (struct tmpnode), 866 TMP_MUSTHAVE); 867 tm = VTOTM(dvp); 868 tmpnode_init(tm, xdp, &tp->tn_attr, NULL); 869 /* 870 * Fix-up fields unique to attribute directories. 871 */ 872 xdp->tn_flags = ISXATTR; 873 xdp->tn_type = VDIR; 874 if (tp->tn_type == VDIR) { 875 xdp->tn_mode = tp->tn_attr.va_mode; 876 } else { 877 xdp->tn_mode = 0700; 878 if (tp->tn_attr.va_mode & 0040) 879 xdp->tn_mode |= 0750; 880 if (tp->tn_attr.va_mode & 0004) 881 xdp->tn_mode |= 0705; 882 } 883 xdp->tn_vnode->v_type = VDIR; 884 xdp->tn_vnode->v_flag |= V_XATTRDIR; 885 tdirinit(tp, xdp); 886 tp->tn_xattrdp = xdp; 887 } else { 888 VN_HOLD(tp->tn_xattrdp->tn_vnode); 889 } 890 *vpp = TNTOV(tp->tn_xattrdp); 891 rw_exit(&tp->tn_rwlock); 892 return (0); 893 } 894 895 /* 896 * Null component name is a synonym for directory being searched. 897 */ 898 if (*nm == '\0') { 899 VN_HOLD(dvp); 900 *vpp = dvp; 901 return (0); 902 } 903 ASSERT(tp); 904 905 error = tdirlookup(tp, nm, &ntp, cred); 906 907 if (error == 0) { 908 ASSERT(ntp); 909 *vpp = TNTOV(ntp); 910 /* 911 * If vnode is a device return special vnode instead 912 */ 913 if (IS_DEVVP(*vpp)) { 914 struct vnode *newvp; 915 916 newvp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, 917 cred); 918 VN_RELE(*vpp); 919 *vpp = newvp; 920 } 921 } 922 TRACE_4(TR_FAC_TMPFS, TR_TMPFS_LOOKUP, 923 "tmpfs lookup:vp %p name %s vpp %p error %d", 924 dvp, nm, vpp, error); 925 return (error); 926 } 927 928 /*ARGSUSED7*/ 929 static int 930 tmp_create( 931 struct vnode *dvp, 932 char *nm, 933 struct vattr *vap, 934 enum vcexcl exclusive, 935 int mode, 936 struct vnode **vpp, 937 struct cred *cred, 938 int flag, 939 caller_context_t *ct, 940 vsecattr_t *vsecp) 941 { 942 struct tmpnode *parent; 943 struct tmount *tm; 944 struct tmpnode *self; 945 int error; 946 struct tmpnode *oldtp; 947 948 again: 949 parent = (struct tmpnode *)VTOTN(dvp); 950 tm = (struct tmount *)VTOTM(dvp); 951 self = NULL; 952 error = 0; 953 oldtp = NULL; 954 955 /* device files not allowed in ext. attr dirs */ 956 if ((parent->tn_flags & ISXATTR) && 957 (vap->va_type == VBLK || vap->va_type == VCHR || 958 vap->va_type == VFIFO || vap->va_type == VDOOR || 959 vap->va_type == VSOCK || vap->va_type == VPORT)) 960 return (EINVAL); 961 962 if (vap->va_type == VREG && (vap->va_mode & VSVTX)) { 963 /* Must be privileged to set sticky bit */ 964 if (secpolicy_vnode_stky_modify(cred)) 965 vap->va_mode &= ~VSVTX; 966 } else if (vap->va_type == VNON) { 967 return (EINVAL); 968 } 969 970 /* 971 * Null component name is a synonym for directory being searched. 972 */ 973 if (*nm == '\0') { 974 VN_HOLD(dvp); 975 oldtp = parent; 976 } else { 977 error = tdirlookup(parent, nm, &oldtp, cred); 978 } 979 980 if (error == 0) { /* name found */ 981 ASSERT(oldtp); 982 983 rw_enter(&oldtp->tn_rwlock, RW_WRITER); 984 985 /* 986 * if create/read-only an existing 987 * directory, allow it 988 */ 989 if (exclusive == EXCL) 990 error = EEXIST; 991 else if ((oldtp->tn_type == VDIR) && (mode & VWRITE)) 992 error = EISDIR; 993 else { 994 error = tmp_taccess(oldtp, mode, cred); 995 } 996 997 if (error) { 998 rw_exit(&oldtp->tn_rwlock); 999 tmpnode_rele(oldtp); 1000 return (error); 1001 } 1002 *vpp = TNTOV(oldtp); 1003 if ((*vpp)->v_type == VREG && (vap->va_mask & AT_SIZE) && 1004 vap->va_size == 0) { 1005 rw_enter(&oldtp->tn_contents, RW_WRITER); 1006 (void) tmpnode_trunc(tm, oldtp, 0); 1007 rw_exit(&oldtp->tn_contents); 1008 } 1009 rw_exit(&oldtp->tn_rwlock); 1010 if (IS_DEVVP(*vpp)) { 1011 struct vnode *newvp; 1012 1013 newvp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, 1014 cred); 1015 VN_RELE(*vpp); 1016 if (newvp == NULL) { 1017 return (ENOSYS); 1018 } 1019 *vpp = newvp; 1020 } 1021 1022 if (error == 0) { 1023 vnevent_create(*vpp, ct); 1024 } 1025 return (0); 1026 } 1027 1028 if (error != ENOENT) 1029 return (error); 1030 1031 rw_enter(&parent->tn_rwlock, RW_WRITER); 1032 error = tdirenter(tm, parent, nm, DE_CREATE, 1033 (struct tmpnode *)NULL, (struct tmpnode *)NULL, 1034 vap, &self, cred, ct); 1035 rw_exit(&parent->tn_rwlock); 1036 1037 if (error) { 1038 if (self) 1039 tmpnode_rele(self); 1040 1041 if (error == EEXIST) { 1042 /* 1043 * This means that the file was created sometime 1044 * after we checked and did not find it and when 1045 * we went to create it. 1046 * Since creat() is supposed to truncate a file 1047 * that already exits go back to the begining 1048 * of the function. This time we will find it 1049 * and go down the tmp_trunc() path 1050 */ 1051 goto again; 1052 } 1053 return (error); 1054 } 1055 1056 *vpp = TNTOV(self); 1057 1058 if (!error && IS_DEVVP(*vpp)) { 1059 struct vnode *newvp; 1060 1061 newvp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cred); 1062 VN_RELE(*vpp); 1063 if (newvp == NULL) 1064 return (ENOSYS); 1065 *vpp = newvp; 1066 } 1067 TRACE_3(TR_FAC_TMPFS, TR_TMPFS_CREATE, 1068 "tmpfs create:dvp %p nm %s vpp %p", dvp, nm, vpp); 1069 return (0); 1070 } 1071 1072 /* ARGSUSED3 */ 1073 static int 1074 tmp_remove( 1075 struct vnode *dvp, 1076 char *nm, 1077 struct cred *cred, 1078 caller_context_t *ct, 1079 int flags) 1080 { 1081 struct tmpnode *parent = (struct tmpnode *)VTOTN(dvp); 1082 int error; 1083 struct tmpnode *tp = NULL; 1084 1085 error = tdirlookup(parent, nm, &tp, cred); 1086 if (error) 1087 return (error); 1088 1089 ASSERT(tp); 1090 rw_enter(&parent->tn_rwlock, RW_WRITER); 1091 rw_enter(&tp->tn_rwlock, RW_WRITER); 1092 1093 if (tp->tn_type != VDIR || 1094 (error = secpolicy_fs_linkdir(cred, dvp->v_vfsp)) == 0) 1095 error = tdirdelete(parent, tp, nm, DR_REMOVE, cred); 1096 1097 rw_exit(&tp->tn_rwlock); 1098 rw_exit(&parent->tn_rwlock); 1099 vnevent_remove(TNTOV(tp), dvp, nm, ct); 1100 tmpnode_rele(tp); 1101 1102 TRACE_3(TR_FAC_TMPFS, TR_TMPFS_REMOVE, 1103 "tmpfs remove:dvp %p nm %s error %d", dvp, nm, error); 1104 return (error); 1105 } 1106 1107 /* ARGSUSED4 */ 1108 static int 1109 tmp_link( 1110 struct vnode *dvp, 1111 struct vnode *srcvp, 1112 char *tnm, 1113 struct cred *cred, 1114 caller_context_t *ct, 1115 int flags) 1116 { 1117 struct tmpnode *parent; 1118 struct tmpnode *from; 1119 struct tmount *tm = (struct tmount *)VTOTM(dvp); 1120 int error; 1121 struct tmpnode *found = NULL; 1122 struct vnode *realvp; 1123 1124 if (VOP_REALVP(srcvp, &realvp, ct) == 0) 1125 srcvp = realvp; 1126 1127 parent = (struct tmpnode *)VTOTN(dvp); 1128 from = (struct tmpnode *)VTOTN(srcvp); 1129 1130 if ((srcvp->v_type == VDIR && 1131 secpolicy_fs_linkdir(cred, dvp->v_vfsp)) || 1132 (from->tn_uid != crgetuid(cred) && secpolicy_basic_link(cred))) 1133 return (EPERM); 1134 1135 /* 1136 * Make sure link for extended attributes is valid 1137 * We only support hard linking of xattr's in xattrdir to an xattrdir 1138 */ 1139 if ((from->tn_flags & ISXATTR) != (parent->tn_flags & ISXATTR)) 1140 return (EINVAL); 1141 1142 error = tdirlookup(parent, tnm, &found, cred); 1143 if (error == 0) { 1144 ASSERT(found); 1145 tmpnode_rele(found); 1146 return (EEXIST); 1147 } 1148 1149 if (error != ENOENT) 1150 return (error); 1151 1152 rw_enter(&parent->tn_rwlock, RW_WRITER); 1153 error = tdirenter(tm, parent, tnm, DE_LINK, (struct tmpnode *)NULL, 1154 from, NULL, (struct tmpnode **)NULL, cred, ct); 1155 rw_exit(&parent->tn_rwlock); 1156 if (error == 0) { 1157 vnevent_link(srcvp, ct); 1158 } 1159 return (error); 1160 } 1161 1162 /* ARGSUSED5 */ 1163 static int 1164 tmp_rename( 1165 struct vnode *odvp, /* source parent vnode */ 1166 char *onm, /* source name */ 1167 struct vnode *ndvp, /* destination parent vnode */ 1168 char *nnm, /* destination name */ 1169 struct cred *cred, 1170 caller_context_t *ct, 1171 int flags) 1172 { 1173 struct tmpnode *fromparent; 1174 struct tmpnode *toparent; 1175 struct tmpnode *fromtp = NULL; /* source tmpnode */ 1176 struct tmount *tm = (struct tmount *)VTOTM(odvp); 1177 int error; 1178 int samedir = 0; /* set if odvp == ndvp */ 1179 struct vnode *realvp; 1180 1181 if (VOP_REALVP(ndvp, &realvp, ct) == 0) 1182 ndvp = realvp; 1183 1184 fromparent = (struct tmpnode *)VTOTN(odvp); 1185 toparent = (struct tmpnode *)VTOTN(ndvp); 1186 1187 if ((fromparent->tn_flags & ISXATTR) != (toparent->tn_flags & ISXATTR)) 1188 return (EINVAL); 1189 1190 mutex_enter(&tm->tm_renamelck); 1191 1192 /* 1193 * Look up tmpnode of file we're supposed to rename. 1194 */ 1195 error = tdirlookup(fromparent, onm, &fromtp, cred); 1196 if (error) { 1197 mutex_exit(&tm->tm_renamelck); 1198 return (error); 1199 } 1200 1201 /* 1202 * Make sure we can delete the old (source) entry. This 1203 * requires write permission on the containing directory. If 1204 * that directory is "sticky" it requires further checks. 1205 */ 1206 if (((error = tmp_taccess(fromparent, VWRITE, cred)) != 0) || 1207 (error = tmp_sticky_remove_access(fromparent, fromtp, cred)) != 0) 1208 goto done; 1209 1210 /* 1211 * Check for renaming to or from '.' or '..' or that 1212 * fromtp == fromparent 1213 */ 1214 if ((onm[0] == '.' && 1215 (onm[1] == '\0' || (onm[1] == '.' && onm[2] == '\0'))) || 1216 (nnm[0] == '.' && 1217 (nnm[1] == '\0' || (nnm[1] == '.' && nnm[2] == '\0'))) || 1218 (fromparent == fromtp)) { 1219 error = EINVAL; 1220 goto done; 1221 } 1222 1223 samedir = (fromparent == toparent); 1224 /* 1225 * Make sure we can search and rename into the new 1226 * (destination) directory. 1227 */ 1228 if (!samedir) { 1229 error = tmp_taccess(toparent, VEXEC|VWRITE, cred); 1230 if (error) 1231 goto done; 1232 } 1233 1234 /* 1235 * Link source to new target 1236 */ 1237 rw_enter(&toparent->tn_rwlock, RW_WRITER); 1238 error = tdirenter(tm, toparent, nnm, DE_RENAME, 1239 fromparent, fromtp, (struct vattr *)NULL, 1240 (struct tmpnode **)NULL, cred, ct); 1241 rw_exit(&toparent->tn_rwlock); 1242 1243 if (error) { 1244 /* 1245 * ESAME isn't really an error; it indicates that the 1246 * operation should not be done because the source and target 1247 * are the same file, but that no error should be reported. 1248 */ 1249 if (error == ESAME) 1250 error = 0; 1251 goto done; 1252 } 1253 vnevent_rename_src(TNTOV(fromtp), odvp, onm, ct); 1254 1255 /* 1256 * Notify the target directory if not same as 1257 * source directory. 1258 */ 1259 if (ndvp != odvp) { 1260 vnevent_rename_dest_dir(ndvp, ct); 1261 } 1262 1263 /* 1264 * Unlink from source. 1265 */ 1266 rw_enter(&fromparent->tn_rwlock, RW_WRITER); 1267 rw_enter(&fromtp->tn_rwlock, RW_WRITER); 1268 1269 error = tdirdelete(fromparent, fromtp, onm, DR_RENAME, cred); 1270 1271 /* 1272 * The following handles the case where our source tmpnode was 1273 * removed before we got to it. 1274 * 1275 * XXX We should also cleanup properly in the case where tdirdelete 1276 * fails for some other reason. Currently this case shouldn't happen. 1277 * (see 1184991). 1278 */ 1279 if (error == ENOENT) 1280 error = 0; 1281 1282 rw_exit(&fromtp->tn_rwlock); 1283 rw_exit(&fromparent->tn_rwlock); 1284 done: 1285 tmpnode_rele(fromtp); 1286 mutex_exit(&tm->tm_renamelck); 1287 1288 TRACE_5(TR_FAC_TMPFS, TR_TMPFS_RENAME, 1289 "tmpfs rename:ovp %p onm %s nvp %p nnm %s error %d", odvp, onm, 1290 ndvp, nnm, error); 1291 return (error); 1292 } 1293 1294 /* ARGSUSED5 */ 1295 static int 1296 tmp_mkdir( 1297 struct vnode *dvp, 1298 char *nm, 1299 struct vattr *va, 1300 struct vnode **vpp, 1301 struct cred *cred, 1302 caller_context_t *ct, 1303 int flags, 1304 vsecattr_t *vsecp) 1305 { 1306 struct tmpnode *parent = (struct tmpnode *)VTOTN(dvp); 1307 struct tmpnode *self = NULL; 1308 struct tmount *tm = (struct tmount *)VTOTM(dvp); 1309 int error; 1310 1311 /* no new dirs allowed in xattr dirs */ 1312 if (parent->tn_flags & ISXATTR) 1313 return (EINVAL); 1314 1315 /* 1316 * Might be dangling directory. Catch it here, 1317 * because a ENOENT return from tdirlookup() is 1318 * an "o.k. return". 1319 */ 1320 if (parent->tn_nlink == 0) 1321 return (ENOENT); 1322 1323 error = tdirlookup(parent, nm, &self, cred); 1324 if (error == 0) { 1325 ASSERT(self); 1326 tmpnode_rele(self); 1327 return (EEXIST); 1328 } 1329 if (error != ENOENT) 1330 return (error); 1331 1332 rw_enter(&parent->tn_rwlock, RW_WRITER); 1333 error = tdirenter(tm, parent, nm, DE_MKDIR, (struct tmpnode *)NULL, 1334 (struct tmpnode *)NULL, va, &self, cred, ct); 1335 if (error) { 1336 rw_exit(&parent->tn_rwlock); 1337 if (self) 1338 tmpnode_rele(self); 1339 return (error); 1340 } 1341 rw_exit(&parent->tn_rwlock); 1342 *vpp = TNTOV(self); 1343 return (0); 1344 } 1345 1346 /* ARGSUSED4 */ 1347 static int 1348 tmp_rmdir( 1349 struct vnode *dvp, 1350 char *nm, 1351 struct vnode *cdir, 1352 struct cred *cred, 1353 caller_context_t *ct, 1354 int flags) 1355 { 1356 struct tmpnode *parent = (struct tmpnode *)VTOTN(dvp); 1357 struct tmpnode *self = NULL; 1358 struct vnode *vp; 1359 int error = 0; 1360 1361 /* 1362 * Return error when removing . and .. 1363 */ 1364 if (strcmp(nm, ".") == 0) 1365 return (EINVAL); 1366 if (strcmp(nm, "..") == 0) 1367 return (EEXIST); /* Should be ENOTEMPTY */ 1368 error = tdirlookup(parent, nm, &self, cred); 1369 if (error) 1370 return (error); 1371 1372 rw_enter(&parent->tn_rwlock, RW_WRITER); 1373 rw_enter(&self->tn_rwlock, RW_WRITER); 1374 1375 vp = TNTOV(self); 1376 if (vp == dvp || vp == cdir) { 1377 error = EINVAL; 1378 goto done1; 1379 } 1380 if (self->tn_type != VDIR) { 1381 error = ENOTDIR; 1382 goto done1; 1383 } 1384 1385 mutex_enter(&self->tn_tlock); 1386 if (self->tn_nlink > 2) { 1387 mutex_exit(&self->tn_tlock); 1388 error = EEXIST; 1389 goto done1; 1390 } 1391 mutex_exit(&self->tn_tlock); 1392 1393 if (vn_vfswlock(vp)) { 1394 error = EBUSY; 1395 goto done1; 1396 } 1397 if (vn_mountedvfs(vp) != NULL) { 1398 error = EBUSY; 1399 goto done; 1400 } 1401 1402 /* 1403 * Check for an empty directory 1404 * i.e. only includes entries for "." and ".." 1405 */ 1406 if (self->tn_dirents > 2) { 1407 error = EEXIST; /* SIGH should be ENOTEMPTY */ 1408 /* 1409 * Update atime because checking tn_dirents is logically 1410 * equivalent to reading the directory 1411 */ 1412 gethrestime(&self->tn_atime); 1413 goto done; 1414 } 1415 1416 error = tdirdelete(parent, self, nm, DR_RMDIR, cred); 1417 done: 1418 vn_vfsunlock(vp); 1419 done1: 1420 rw_exit(&self->tn_rwlock); 1421 rw_exit(&parent->tn_rwlock); 1422 vnevent_rmdir(TNTOV(self), dvp, nm, ct); 1423 tmpnode_rele(self); 1424 1425 return (error); 1426 } 1427 1428 /* ARGSUSED2 */ 1429 static int 1430 tmp_readdir( 1431 struct vnode *vp, 1432 struct uio *uiop, 1433 struct cred *cred, 1434 int *eofp, 1435 caller_context_t *ct, 1436 int flags) 1437 { 1438 struct tmpnode *tp = (struct tmpnode *)VTOTN(vp); 1439 struct tdirent *tdp; 1440 int error = 0; 1441 size_t namelen; 1442 struct dirent64 *dp; 1443 ulong_t offset; 1444 ulong_t total_bytes_wanted; 1445 long outcount = 0; 1446 long bufsize; 1447 int reclen; 1448 caddr_t outbuf; 1449 1450 if (uiop->uio_loffset >= MAXOFF_T) { 1451 if (eofp) 1452 *eofp = 1; 1453 return (0); 1454 } 1455 /* 1456 * assuming system call has already called tmp_rwlock 1457 */ 1458 ASSERT(RW_READ_HELD(&tp->tn_rwlock)); 1459 1460 if (uiop->uio_iovcnt != 1) 1461 return (EINVAL); 1462 1463 if (vp->v_type != VDIR) 1464 return (ENOTDIR); 1465 1466 /* 1467 * There's a window here where someone could have removed 1468 * all the entries in the directory after we put a hold on the 1469 * vnode but before we grabbed the rwlock. Just return. 1470 */ 1471 if (tp->tn_dir == NULL) { 1472 if (tp->tn_nlink) { 1473 panic("empty directory 0x%p", (void *)tp); 1474 /*NOTREACHED*/ 1475 } 1476 return (0); 1477 } 1478 1479 /* 1480 * Get space for multiple directory entries 1481 */ 1482 total_bytes_wanted = uiop->uio_iov->iov_len; 1483 bufsize = total_bytes_wanted + sizeof (struct dirent64); 1484 outbuf = kmem_alloc(bufsize, KM_SLEEP); 1485 1486 dp = (struct dirent64 *)outbuf; 1487 1488 1489 offset = 0; 1490 tdp = tp->tn_dir; 1491 while (tdp) { 1492 namelen = strlen(tdp->td_name); /* no +1 needed */ 1493 offset = tdp->td_offset; 1494 if (offset >= uiop->uio_offset) { 1495 reclen = (int)DIRENT64_RECLEN(namelen); 1496 if (outcount + reclen > total_bytes_wanted) { 1497 if (!outcount) 1498 /* 1499 * Buffer too small for any entries. 1500 */ 1501 error = EINVAL; 1502 break; 1503 } 1504 ASSERT(tdp->td_tmpnode != NULL); 1505 1506 /* use strncpy(9f) to zero out uninitialized bytes */ 1507 1508 (void) strncpy(dp->d_name, tdp->td_name, 1509 DIRENT64_NAMELEN(reclen)); 1510 dp->d_reclen = (ushort_t)reclen; 1511 dp->d_ino = (ino64_t)tdp->td_tmpnode->tn_nodeid; 1512 dp->d_off = (offset_t)tdp->td_offset + 1; 1513 dp = (struct dirent64 *) 1514 ((uintptr_t)dp + dp->d_reclen); 1515 outcount += reclen; 1516 ASSERT(outcount <= bufsize); 1517 } 1518 tdp = tdp->td_next; 1519 } 1520 1521 if (!error) 1522 error = uiomove(outbuf, outcount, UIO_READ, uiop); 1523 1524 if (!error) { 1525 /* If we reached the end of the list our offset */ 1526 /* should now be just past the end. */ 1527 if (!tdp) { 1528 offset += 1; 1529 if (eofp) 1530 *eofp = 1; 1531 } else if (eofp) 1532 *eofp = 0; 1533 uiop->uio_offset = offset; 1534 } 1535 gethrestime(&tp->tn_atime); 1536 kmem_free(outbuf, bufsize); 1537 return (error); 1538 } 1539 1540 /* ARGSUSED5 */ 1541 static int 1542 tmp_symlink( 1543 struct vnode *dvp, 1544 char *lnm, 1545 struct vattr *tva, 1546 char *tnm, 1547 struct cred *cred, 1548 caller_context_t *ct, 1549 int flags) 1550 { 1551 struct tmpnode *parent = (struct tmpnode *)VTOTN(dvp); 1552 struct tmpnode *self = (struct tmpnode *)NULL; 1553 struct tmount *tm = (struct tmount *)VTOTM(dvp); 1554 char *cp = NULL; 1555 int error; 1556 size_t len; 1557 1558 /* no symlinks allowed to files in xattr dirs */ 1559 if (parent->tn_flags & ISXATTR) 1560 return (EINVAL); 1561 1562 error = tdirlookup(parent, lnm, &self, cred); 1563 if (error == 0) { 1564 /* 1565 * The entry already exists 1566 */ 1567 tmpnode_rele(self); 1568 return (EEXIST); /* was 0 */ 1569 } 1570 1571 if (error != ENOENT) { 1572 if (self != NULL) 1573 tmpnode_rele(self); 1574 return (error); 1575 } 1576 1577 rw_enter(&parent->tn_rwlock, RW_WRITER); 1578 error = tdirenter(tm, parent, lnm, DE_CREATE, (struct tmpnode *)NULL, 1579 (struct tmpnode *)NULL, tva, &self, cred, ct); 1580 rw_exit(&parent->tn_rwlock); 1581 1582 if (error) { 1583 if (self) 1584 tmpnode_rele(self); 1585 return (error); 1586 } 1587 len = strlen(tnm) + 1; 1588 cp = tmp_memalloc(len, 0); 1589 if (cp == NULL) { 1590 tmpnode_rele(self); 1591 return (ENOSPC); 1592 } 1593 (void) strcpy(cp, tnm); 1594 1595 self->tn_symlink = cp; 1596 self->tn_size = len - 1; 1597 tmpnode_rele(self); 1598 return (error); 1599 } 1600 1601 /* ARGSUSED2 */ 1602 static int 1603 tmp_readlink( 1604 struct vnode *vp, 1605 struct uio *uiop, 1606 struct cred *cred, 1607 caller_context_t *ct) 1608 { 1609 struct tmpnode *tp = (struct tmpnode *)VTOTN(vp); 1610 int error = 0; 1611 1612 if (vp->v_type != VLNK) 1613 return (EINVAL); 1614 1615 rw_enter(&tp->tn_rwlock, RW_READER); 1616 rw_enter(&tp->tn_contents, RW_READER); 1617 error = uiomove(tp->tn_symlink, tp->tn_size, UIO_READ, uiop); 1618 gethrestime(&tp->tn_atime); 1619 rw_exit(&tp->tn_contents); 1620 rw_exit(&tp->tn_rwlock); 1621 return (error); 1622 } 1623 1624 /* ARGSUSED */ 1625 static int 1626 tmp_fsync( 1627 struct vnode *vp, 1628 int syncflag, 1629 struct cred *cred, 1630 caller_context_t *ct) 1631 { 1632 return (0); 1633 } 1634 1635 /* ARGSUSED */ 1636 static void 1637 tmp_inactive(struct vnode *vp, struct cred *cred, caller_context_t *ct) 1638 { 1639 struct tmpnode *tp = (struct tmpnode *)VTOTN(vp); 1640 struct tmount *tm = (struct tmount *)VFSTOTM(vp->v_vfsp); 1641 1642 rw_enter(&tp->tn_rwlock, RW_WRITER); 1643 top: 1644 mutex_enter(&tp->tn_tlock); 1645 mutex_enter(&vp->v_lock); 1646 ASSERT(vp->v_count >= 1); 1647 1648 /* 1649 * If we don't have the last hold or the link count is non-zero, 1650 * there's little to do -- just drop our hold. 1651 */ 1652 if (vp->v_count > 1 || tp->tn_nlink != 0) { 1653 vp->v_count--; 1654 mutex_exit(&vp->v_lock); 1655 mutex_exit(&tp->tn_tlock); 1656 rw_exit(&tp->tn_rwlock); 1657 return; 1658 } 1659 1660 /* 1661 * We have the last hold *and* the link count is zero, so this 1662 * tmpnode is dead from the filesystem's viewpoint. However, 1663 * if the tmpnode has any pages associated with it (i.e. if it's 1664 * a normal file with non-zero size), the tmpnode can still be 1665 * discovered by pageout or fsflush via the page vnode pointers. 1666 * In this case we must drop all our locks, truncate the tmpnode, 1667 * and try the whole dance again. 1668 */ 1669 if (tp->tn_size != 0) { 1670 if (tp->tn_type == VREG) { 1671 mutex_exit(&vp->v_lock); 1672 mutex_exit(&tp->tn_tlock); 1673 rw_enter(&tp->tn_contents, RW_WRITER); 1674 (void) tmpnode_trunc(tm, tp, 0); 1675 rw_exit(&tp->tn_contents); 1676 ASSERT(tp->tn_size == 0); 1677 ASSERT(tp->tn_nblocks == 0); 1678 goto top; 1679 } 1680 if (tp->tn_type == VLNK) 1681 tmp_memfree(tp->tn_symlink, tp->tn_size + 1); 1682 } 1683 1684 /* 1685 * Remove normal file/dir's xattr dir and xattrs. 1686 */ 1687 if (tp->tn_xattrdp) { 1688 struct tmpnode *xtp = tp->tn_xattrdp; 1689 1690 ASSERT(xtp->tn_flags & ISXATTR); 1691 tmpnode_hold(xtp); 1692 rw_enter(&xtp->tn_rwlock, RW_WRITER); 1693 tdirtrunc(xtp); 1694 DECR_COUNT(&xtp->tn_nlink, &xtp->tn_tlock); 1695 tp->tn_xattrdp = NULL; 1696 rw_exit(&xtp->tn_rwlock); 1697 tmpnode_rele(xtp); 1698 } 1699 1700 mutex_exit(&vp->v_lock); 1701 mutex_exit(&tp->tn_tlock); 1702 /* Here's our chance to send invalid event while we're between locks */ 1703 vn_invalid(TNTOV(tp)); 1704 mutex_enter(&tm->tm_contents); 1705 if (tp->tn_forw == NULL) 1706 tm->tm_rootnode->tn_back = tp->tn_back; 1707 else 1708 tp->tn_forw->tn_back = tp->tn_back; 1709 tp->tn_back->tn_forw = tp->tn_forw; 1710 mutex_exit(&tm->tm_contents); 1711 rw_exit(&tp->tn_rwlock); 1712 rw_destroy(&tp->tn_rwlock); 1713 mutex_destroy(&tp->tn_tlock); 1714 vn_free(TNTOV(tp)); 1715 tmp_memfree(tp, sizeof (struct tmpnode)); 1716 } 1717 1718 /* ARGSUSED2 */ 1719 static int 1720 tmp_fid(struct vnode *vp, struct fid *fidp, caller_context_t *ct) 1721 { 1722 struct tmpnode *tp = (struct tmpnode *)VTOTN(vp); 1723 struct tfid *tfid; 1724 1725 if (fidp->fid_len < (sizeof (struct tfid) - sizeof (ushort_t))) { 1726 fidp->fid_len = sizeof (struct tfid) - sizeof (ushort_t); 1727 return (ENOSPC); 1728 } 1729 1730 tfid = (struct tfid *)fidp; 1731 bzero(tfid, sizeof (struct tfid)); 1732 tfid->tfid_len = (int)sizeof (struct tfid) - sizeof (ushort_t); 1733 1734 tfid->tfid_ino = tp->tn_nodeid; 1735 tfid->tfid_gen = tp->tn_gen; 1736 1737 return (0); 1738 } 1739 1740 1741 /* 1742 * Return all the pages from [off..off+len] in given file 1743 */ 1744 /* ARGSUSED */ 1745 static int 1746 tmp_getpage( 1747 struct vnode *vp, 1748 offset_t off, 1749 size_t len, 1750 uint_t *protp, 1751 page_t *pl[], 1752 size_t plsz, 1753 struct seg *seg, 1754 caddr_t addr, 1755 enum seg_rw rw, 1756 struct cred *cr, 1757 caller_context_t *ct) 1758 { 1759 int err = 0; 1760 struct tmpnode *tp = VTOTN(vp); 1761 anoff_t toff = (anoff_t)off; 1762 size_t tlen = len; 1763 u_offset_t tmpoff; 1764 timestruc_t now; 1765 1766 rw_enter(&tp->tn_contents, RW_READER); 1767 1768 if (off + len > tp->tn_size + PAGEOFFSET) { 1769 err = EFAULT; 1770 goto out; 1771 } 1772 /* 1773 * Look for holes (no anon slot) in faulting range. If there are 1774 * holes we have to switch to a write lock and fill them in. Swap 1775 * space for holes was already reserved when the file was grown. 1776 */ 1777 tmpoff = toff; 1778 if (non_anon(tp->tn_anon, btop(off), &tmpoff, &tlen)) { 1779 if (!rw_tryupgrade(&tp->tn_contents)) { 1780 rw_exit(&tp->tn_contents); 1781 rw_enter(&tp->tn_contents, RW_WRITER); 1782 /* Size may have changed when lock was dropped */ 1783 if (off + len > tp->tn_size + PAGEOFFSET) { 1784 err = EFAULT; 1785 goto out; 1786 } 1787 } 1788 for (toff = (anoff_t)off; toff < (anoff_t)off + len; 1789 toff += PAGESIZE) { 1790 if (anon_get_ptr(tp->tn_anon, btop(toff)) == NULL) { 1791 /* XXX - may allocate mem w. write lock held */ 1792 (void) anon_set_ptr(tp->tn_anon, btop(toff), 1793 anon_alloc(vp, toff), ANON_SLEEP); 1794 tp->tn_nblocks++; 1795 } 1796 } 1797 rw_downgrade(&tp->tn_contents); 1798 } 1799 1800 1801 if (len <= PAGESIZE) 1802 err = tmp_getapage(vp, (u_offset_t)off, len, protp, pl, plsz, 1803 seg, addr, rw, cr); 1804 else 1805 err = pvn_getpages(tmp_getapage, vp, (u_offset_t)off, len, 1806 protp, pl, plsz, seg, addr, rw, cr); 1807 1808 gethrestime(&now); 1809 tp->tn_atime = now; 1810 if (rw == S_WRITE) 1811 tp->tn_mtime = now; 1812 1813 out: 1814 rw_exit(&tp->tn_contents); 1815 return (err); 1816 } 1817 1818 /* 1819 * Called from pvn_getpages or swap_getpage to get a particular page. 1820 */ 1821 /*ARGSUSED*/ 1822 static int 1823 tmp_getapage( 1824 struct vnode *vp, 1825 u_offset_t off, 1826 size_t len, 1827 uint_t *protp, 1828 page_t *pl[], 1829 size_t plsz, 1830 struct seg *seg, 1831 caddr_t addr, 1832 enum seg_rw rw, 1833 struct cred *cr) 1834 { 1835 struct page *pp; 1836 int flags; 1837 int err = 0; 1838 struct vnode *pvp; 1839 u_offset_t poff; 1840 1841 if (protp != NULL) 1842 *protp = PROT_ALL; 1843 again: 1844 if (pp = page_lookup(vp, off, rw == S_CREATE ? SE_EXCL : SE_SHARED)) { 1845 if (pl) { 1846 pl[0] = pp; 1847 pl[1] = NULL; 1848 } else { 1849 page_unlock(pp); 1850 } 1851 } else { 1852 pp = page_create_va(vp, off, PAGESIZE, 1853 PG_WAIT | PG_EXCL, seg, addr); 1854 /* 1855 * Someone raced in and created the page after we did the 1856 * lookup but before we did the create, so go back and 1857 * try to look it up again. 1858 */ 1859 if (pp == NULL) 1860 goto again; 1861 /* 1862 * Fill page from backing store, if any. If none, then 1863 * either this is a newly filled hole or page must have 1864 * been unmodified and freed so just zero it out. 1865 */ 1866 err = swap_getphysname(vp, off, &pvp, &poff); 1867 if (err) { 1868 panic("tmp_getapage: no anon slot vp %p " 1869 "off %llx pp %p\n", (void *)vp, off, (void *)pp); 1870 } 1871 if (pvp) { 1872 flags = (pl == NULL ? B_ASYNC|B_READ : B_READ); 1873 err = VOP_PAGEIO(pvp, pp, (u_offset_t)poff, PAGESIZE, 1874 flags, cr, NULL); 1875 if (flags & B_ASYNC) 1876 pp = NULL; 1877 } else if (rw != S_CREATE) { 1878 pagezero(pp, 0, PAGESIZE); 1879 } 1880 if (err && pp) 1881 pvn_read_done(pp, B_ERROR); 1882 if (err == 0) { 1883 if (pl) 1884 pvn_plist_init(pp, pl, plsz, off, PAGESIZE, rw); 1885 else 1886 pvn_io_done(pp); 1887 } 1888 } 1889 return (err); 1890 } 1891 1892 1893 /* 1894 * Flags are composed of {B_INVAL, B_DIRTY B_FREE, B_DONTNEED}. 1895 * If len == 0, do from off to EOF. 1896 */ 1897 static int tmp_nopage = 0; /* Don't do tmp_putpage's if set */ 1898 1899 /* ARGSUSED */ 1900 int 1901 tmp_putpage( 1902 register struct vnode *vp, 1903 offset_t off, 1904 size_t len, 1905 int flags, 1906 struct cred *cr, 1907 caller_context_t *ct) 1908 { 1909 register page_t *pp; 1910 u_offset_t io_off; 1911 size_t io_len = 0; 1912 int err = 0; 1913 struct tmpnode *tp = VTOTN(vp); 1914 int dolock; 1915 1916 if (tmp_nopage) 1917 return (0); 1918 1919 ASSERT(vp->v_count != 0); 1920 1921 if (vp->v_flag & VNOMAP) 1922 return (ENOSYS); 1923 1924 /* 1925 * This being tmpfs, we don't ever do i/o unless we really 1926 * have to (when we're low on memory and pageout calls us 1927 * with B_ASYNC | B_FREE or the user explicitly asks for it with 1928 * B_DONTNEED). 1929 * XXX to approximately track the mod time like ufs we should 1930 * update the times here. The problem is, once someone does a 1931 * store we never clear the mod bit and do i/o, thus fsflush 1932 * will keep calling us every 30 seconds to do the i/o and we'll 1933 * continually update the mod time. At least we update the mod 1934 * time on the first store because this results in a call to getpage. 1935 */ 1936 if (flags != (B_ASYNC | B_FREE) && (flags & B_INVAL) == 0 && 1937 (flags & B_DONTNEED) == 0) 1938 return (0); 1939 /* 1940 * If this thread owns the lock, i.e., this thread grabbed it 1941 * as writer somewhere above, then we don't need to grab the 1942 * lock as reader in this routine. 1943 */ 1944 dolock = (rw_owner(&tp->tn_contents) != curthread); 1945 1946 /* 1947 * If this is pageout don't block on the lock as you could deadlock 1948 * when freemem == 0 (another thread has the read lock and is blocked 1949 * creating a page, and a third thread is waiting to get the writers 1950 * lock - waiting writers priority blocks us from getting the read 1951 * lock). Of course, if the only freeable pages are on this tmpnode 1952 * we're hosed anyways. A better solution might be a new lock type. 1953 * Note: ufs has the same problem. 1954 */ 1955 if (curproc == proc_pageout) { 1956 if (!rw_tryenter(&tp->tn_contents, RW_READER)) 1957 return (ENOMEM); 1958 } else if (dolock) 1959 rw_enter(&tp->tn_contents, RW_READER); 1960 1961 if (!vn_has_cached_data(vp)) 1962 goto out; 1963 1964 if (len == 0) { 1965 if (curproc == proc_pageout) { 1966 panic("tmp: pageout can't block"); 1967 /*NOTREACHED*/ 1968 } 1969 1970 /* Search the entire vp list for pages >= off. */ 1971 err = pvn_vplist_dirty(vp, (u_offset_t)off, tmp_putapage, 1972 flags, cr); 1973 } else { 1974 u_offset_t eoff; 1975 1976 /* 1977 * Loop over all offsets in the range [off...off + len] 1978 * looking for pages to deal with. 1979 */ 1980 eoff = MIN(off + len, tp->tn_size); 1981 for (io_off = off; io_off < eoff; io_off += io_len) { 1982 /* 1983 * If we are not invalidating, synchronously 1984 * freeing or writing pages use the routine 1985 * page_lookup_nowait() to prevent reclaiming 1986 * them from the free list. 1987 */ 1988 if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) { 1989 pp = page_lookup(vp, io_off, 1990 (flags & (B_INVAL | B_FREE)) ? 1991 SE_EXCL : SE_SHARED); 1992 } else { 1993 pp = page_lookup_nowait(vp, io_off, 1994 (flags & B_FREE) ? SE_EXCL : SE_SHARED); 1995 } 1996 1997 if (pp == NULL || pvn_getdirty(pp, flags) == 0) 1998 io_len = PAGESIZE; 1999 else { 2000 err = tmp_putapage(vp, pp, &io_off, &io_len, 2001 flags, cr); 2002 if (err != 0) 2003 break; 2004 } 2005 } 2006 } 2007 /* If invalidating, verify all pages on vnode list are gone. */ 2008 if (err == 0 && off == 0 && len == 0 && 2009 (flags & B_INVAL) && vn_has_cached_data(vp)) { 2010 panic("tmp_putpage: B_INVAL, pages not gone"); 2011 /*NOTREACHED*/ 2012 } 2013 out: 2014 if ((curproc == proc_pageout) || dolock) 2015 rw_exit(&tp->tn_contents); 2016 /* 2017 * Only reason putapage is going to give us SE_NOSWAP as error 2018 * is when we ask a page to be written to physical backing store 2019 * and there is none. Ignore this because we might be dealing 2020 * with a swap page which does not have any backing store 2021 * on disk. In any other case we won't get this error over here. 2022 */ 2023 if (err == SE_NOSWAP) 2024 err = 0; 2025 return (err); 2026 } 2027 2028 long tmp_putpagecnt, tmp_pagespushed; 2029 2030 /* 2031 * Write out a single page. 2032 * For tmpfs this means choose a physical swap slot and write the page 2033 * out using VOP_PAGEIO. For performance, we attempt to kluster; i.e., 2034 * we try to find a bunch of other dirty pages adjacent in the file 2035 * and a bunch of contiguous swap slots, and then write all the pages 2036 * out in a single i/o. 2037 */ 2038 /*ARGSUSED*/ 2039 static int 2040 tmp_putapage( 2041 struct vnode *vp, 2042 page_t *pp, 2043 u_offset_t *offp, 2044 size_t *lenp, 2045 int flags, 2046 struct cred *cr) 2047 { 2048 int err; 2049 ulong_t klstart, kllen; 2050 page_t *pplist, *npplist; 2051 extern int klustsize; 2052 long tmp_klustsize; 2053 struct tmpnode *tp; 2054 size_t pp_off, pp_len; 2055 u_offset_t io_off; 2056 size_t io_len; 2057 struct vnode *pvp; 2058 u_offset_t pstart; 2059 u_offset_t offset; 2060 u_offset_t tmpoff; 2061 2062 ASSERT(PAGE_LOCKED(pp)); 2063 2064 /* Kluster in tmp_klustsize chunks */ 2065 tp = VTOTN(vp); 2066 tmp_klustsize = klustsize; 2067 offset = pp->p_offset; 2068 klstart = (offset / tmp_klustsize) * tmp_klustsize; 2069 kllen = MIN(tmp_klustsize, tp->tn_size - klstart); 2070 2071 /* Get a kluster of pages */ 2072 pplist = 2073 pvn_write_kluster(vp, pp, &tmpoff, &pp_len, klstart, kllen, flags); 2074 2075 pp_off = (size_t)tmpoff; 2076 2077 /* 2078 * Get a cluster of physical offsets for the pages; the amount we 2079 * get may be some subrange of what we ask for (io_off, io_len). 2080 */ 2081 io_off = pp_off; 2082 io_len = pp_len; 2083 err = swap_newphysname(vp, offset, &io_off, &io_len, &pvp, &pstart); 2084 ASSERT(err != SE_NOANON); /* anon slot must have been filled */ 2085 if (err) { 2086 pvn_write_done(pplist, B_ERROR | B_WRITE | flags); 2087 /* 2088 * If this routine is called as a result of segvn_sync 2089 * operation and we have no physical swap then we can get an 2090 * error here. In such case we would return SE_NOSWAP as error. 2091 * At this point, we expect only SE_NOSWAP. 2092 */ 2093 ASSERT(err == SE_NOSWAP); 2094 if (flags & B_INVAL) 2095 err = ENOMEM; 2096 goto out; 2097 } 2098 ASSERT(pp_off <= io_off && io_off + io_len <= pp_off + pp_len); 2099 ASSERT(io_off <= offset && offset < io_off + io_len); 2100 2101 /* Toss pages at front/rear that we couldn't get physical backing for */ 2102 if (io_off != pp_off) { 2103 npplist = NULL; 2104 page_list_break(&pplist, &npplist, btop(io_off - pp_off)); 2105 ASSERT(pplist->p_offset == pp_off); 2106 ASSERT(pplist->p_prev->p_offset == io_off - PAGESIZE); 2107 pvn_write_done(pplist, B_ERROR | B_WRITE | flags); 2108 pplist = npplist; 2109 } 2110 if (io_off + io_len < pp_off + pp_len) { 2111 npplist = NULL; 2112 page_list_break(&pplist, &npplist, btop(io_len)); 2113 ASSERT(npplist->p_offset == io_off + io_len); 2114 ASSERT(npplist->p_prev->p_offset == pp_off + pp_len - PAGESIZE); 2115 pvn_write_done(npplist, B_ERROR | B_WRITE | flags); 2116 } 2117 2118 ASSERT(pplist->p_offset == io_off); 2119 ASSERT(pplist->p_prev->p_offset == io_off + io_len - PAGESIZE); 2120 ASSERT(btopr(io_len) <= btopr(kllen)); 2121 2122 /* Do i/o on the remaining kluster */ 2123 err = VOP_PAGEIO(pvp, pplist, (u_offset_t)pstart, io_len, 2124 B_WRITE | flags, cr, NULL); 2125 2126 if ((flags & B_ASYNC) == 0) { 2127 pvn_write_done(pplist, ((err) ? B_ERROR : 0) | B_WRITE | flags); 2128 } 2129 out: 2130 if (!err) { 2131 if (offp) 2132 *offp = io_off; 2133 if (lenp) 2134 *lenp = io_len; 2135 tmp_putpagecnt++; 2136 tmp_pagespushed += btop(io_len); 2137 } 2138 if (err && err != ENOMEM && err != SE_NOSWAP) 2139 cmn_err(CE_WARN, "tmp_putapage: err %d\n", err); 2140 return (err); 2141 } 2142 2143 /* ARGSUSED */ 2144 static int 2145 tmp_map( 2146 struct vnode *vp, 2147 offset_t off, 2148 struct as *as, 2149 caddr_t *addrp, 2150 size_t len, 2151 uchar_t prot, 2152 uchar_t maxprot, 2153 uint_t flags, 2154 struct cred *cred, 2155 caller_context_t *ct) 2156 { 2157 struct segvn_crargs vn_a; 2158 struct tmpnode *tp = (struct tmpnode *)VTOTN(vp); 2159 int error; 2160 2161 #ifdef _ILP32 2162 if (len > MAXOFF_T) 2163 return (ENOMEM); 2164 #endif 2165 2166 if (vp->v_flag & VNOMAP) 2167 return (ENOSYS); 2168 2169 if (off < 0 || (offset_t)(off + len) < 0 || 2170 off > MAXOFF_T || (off + len) > MAXOFF_T) 2171 return (ENXIO); 2172 2173 if (vp->v_type != VREG) 2174 return (ENODEV); 2175 2176 /* 2177 * Don't allow mapping to locked file 2178 */ 2179 if (vn_has_mandatory_locks(vp, tp->tn_mode)) { 2180 return (EAGAIN); 2181 } 2182 2183 as_rangelock(as); 2184 error = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags); 2185 if (error != 0) { 2186 as_rangeunlock(as); 2187 return (error); 2188 } 2189 2190 vn_a.vp = vp; 2191 vn_a.offset = (u_offset_t)off; 2192 vn_a.type = flags & MAP_TYPE; 2193 vn_a.prot = prot; 2194 vn_a.maxprot = maxprot; 2195 vn_a.flags = flags & ~MAP_TYPE; 2196 vn_a.cred = cred; 2197 vn_a.amp = NULL; 2198 vn_a.szc = 0; 2199 vn_a.lgrp_mem_policy_flags = 0; 2200 2201 error = as_map(as, *addrp, len, segvn_create, &vn_a); 2202 as_rangeunlock(as); 2203 return (error); 2204 } 2205 2206 /* 2207 * tmp_addmap and tmp_delmap can't be called since the vp 2208 * maintained in the segvn mapping is NULL. 2209 */ 2210 /* ARGSUSED */ 2211 static int 2212 tmp_addmap( 2213 struct vnode *vp, 2214 offset_t off, 2215 struct as *as, 2216 caddr_t addr, 2217 size_t len, 2218 uchar_t prot, 2219 uchar_t maxprot, 2220 uint_t flags, 2221 struct cred *cred, 2222 caller_context_t *ct) 2223 { 2224 return (0); 2225 } 2226 2227 /* ARGSUSED */ 2228 static int 2229 tmp_delmap( 2230 struct vnode *vp, 2231 offset_t off, 2232 struct as *as, 2233 caddr_t addr, 2234 size_t len, 2235 uint_t prot, 2236 uint_t maxprot, 2237 uint_t flags, 2238 struct cred *cred, 2239 caller_context_t *ct) 2240 { 2241 return (0); 2242 } 2243 2244 static int 2245 tmp_freesp(struct vnode *vp, struct flock64 *lp, int flag) 2246 { 2247 register int i; 2248 register struct tmpnode *tp = VTOTN(vp); 2249 int error; 2250 2251 ASSERT(vp->v_type == VREG); 2252 ASSERT(lp->l_start >= 0); 2253 2254 if (lp->l_len != 0) 2255 return (EINVAL); 2256 2257 rw_enter(&tp->tn_rwlock, RW_WRITER); 2258 if (tp->tn_size == lp->l_start) { 2259 rw_exit(&tp->tn_rwlock); 2260 return (0); 2261 } 2262 2263 /* 2264 * Check for any mandatory locks on the range 2265 */ 2266 if (MANDLOCK(vp, tp->tn_mode)) { 2267 long save_start; 2268 2269 save_start = lp->l_start; 2270 2271 if (tp->tn_size < lp->l_start) { 2272 /* 2273 * "Truncate up" case: need to make sure there 2274 * is no lock beyond current end-of-file. To 2275 * do so, we need to set l_start to the size 2276 * of the file temporarily. 2277 */ 2278 lp->l_start = tp->tn_size; 2279 } 2280 lp->l_type = F_WRLCK; 2281 lp->l_sysid = 0; 2282 lp->l_pid = ttoproc(curthread)->p_pid; 2283 i = (flag & (FNDELAY|FNONBLOCK)) ? 0 : SLPFLCK; 2284 if ((i = reclock(vp, lp, i, 0, lp->l_start, NULL)) != 0 || 2285 lp->l_type != F_UNLCK) { 2286 rw_exit(&tp->tn_rwlock); 2287 return (i ? i : EAGAIN); 2288 } 2289 2290 lp->l_start = save_start; 2291 } 2292 VFSTOTM(vp->v_vfsp); 2293 2294 rw_enter(&tp->tn_contents, RW_WRITER); 2295 error = tmpnode_trunc((struct tmount *)VFSTOTM(vp->v_vfsp), 2296 tp, (ulong_t)lp->l_start); 2297 rw_exit(&tp->tn_contents); 2298 rw_exit(&tp->tn_rwlock); 2299 return (error); 2300 } 2301 2302 /* ARGSUSED */ 2303 static int 2304 tmp_space( 2305 struct vnode *vp, 2306 int cmd, 2307 struct flock64 *bfp, 2308 int flag, 2309 offset_t offset, 2310 cred_t *cred, 2311 caller_context_t *ct) 2312 { 2313 int error; 2314 2315 if (cmd != F_FREESP) 2316 return (EINVAL); 2317 if ((error = convoff(vp, bfp, 0, (offset_t)offset)) == 0) { 2318 if ((bfp->l_start > MAXOFF_T) || (bfp->l_len > MAXOFF_T)) 2319 return (EFBIG); 2320 error = tmp_freesp(vp, bfp, flag); 2321 } 2322 return (error); 2323 } 2324 2325 /* ARGSUSED */ 2326 static int 2327 tmp_seek( 2328 struct vnode *vp, 2329 offset_t ooff, 2330 offset_t *noffp, 2331 caller_context_t *ct) 2332 { 2333 return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0); 2334 } 2335 2336 /* ARGSUSED2 */ 2337 static int 2338 tmp_rwlock(struct vnode *vp, int write_lock, caller_context_t *ctp) 2339 { 2340 struct tmpnode *tp = VTOTN(vp); 2341 2342 if (write_lock) { 2343 rw_enter(&tp->tn_rwlock, RW_WRITER); 2344 } else { 2345 rw_enter(&tp->tn_rwlock, RW_READER); 2346 } 2347 return (write_lock); 2348 } 2349 2350 /* ARGSUSED1 */ 2351 static void 2352 tmp_rwunlock(struct vnode *vp, int write_lock, caller_context_t *ctp) 2353 { 2354 struct tmpnode *tp = VTOTN(vp); 2355 2356 rw_exit(&tp->tn_rwlock); 2357 } 2358 2359 static int 2360 tmp_pathconf( 2361 struct vnode *vp, 2362 int cmd, 2363 ulong_t *valp, 2364 cred_t *cr, 2365 caller_context_t *ct) 2366 { 2367 struct tmpnode *tp = NULL; 2368 int error; 2369 2370 switch (cmd) { 2371 case _PC_XATTR_EXISTS: 2372 if (vp->v_vfsp->vfs_flag & VFS_XATTR) { 2373 *valp = 0; /* assume no attributes */ 2374 error = 0; /* okay to ask */ 2375 tp = VTOTN(vp); 2376 rw_enter(&tp->tn_rwlock, RW_READER); 2377 if (tp->tn_xattrdp) { 2378 rw_enter(&tp->tn_xattrdp->tn_rwlock, RW_READER); 2379 /* do not count "." and ".." */ 2380 if (tp->tn_xattrdp->tn_dirents > 2) 2381 *valp = 1; 2382 rw_exit(&tp->tn_xattrdp->tn_rwlock); 2383 } 2384 rw_exit(&tp->tn_rwlock); 2385 } else { 2386 error = EINVAL; 2387 } 2388 break; 2389 case _PC_SATTR_ENABLED: 2390 case _PC_SATTR_EXISTS: 2391 *valp = vfs_has_feature(vp->v_vfsp, VFSFT_SYSATTR_VIEWS) && 2392 (vp->v_type == VREG || vp->v_type == VDIR); 2393 error = 0; 2394 break; 2395 case _PC_TIMESTAMP_RESOLUTION: 2396 /* nanosecond timestamp resolution */ 2397 *valp = 1L; 2398 error = 0; 2399 break; 2400 default: 2401 error = fs_pathconf(vp, cmd, valp, cr, ct); 2402 } 2403 return (error); 2404 } 2405 2406 2407 struct vnodeops *tmp_vnodeops; 2408 2409 const fs_operation_def_t tmp_vnodeops_template[] = { 2410 VOPNAME_OPEN, { .vop_open = tmp_open }, 2411 VOPNAME_CLOSE, { .vop_close = tmp_close }, 2412 VOPNAME_READ, { .vop_read = tmp_read }, 2413 VOPNAME_WRITE, { .vop_write = tmp_write }, 2414 VOPNAME_IOCTL, { .vop_ioctl = tmp_ioctl }, 2415 VOPNAME_GETATTR, { .vop_getattr = tmp_getattr }, 2416 VOPNAME_SETATTR, { .vop_setattr = tmp_setattr }, 2417 VOPNAME_ACCESS, { .vop_access = tmp_access }, 2418 VOPNAME_LOOKUP, { .vop_lookup = tmp_lookup }, 2419 VOPNAME_CREATE, { .vop_create = tmp_create }, 2420 VOPNAME_REMOVE, { .vop_remove = tmp_remove }, 2421 VOPNAME_LINK, { .vop_link = tmp_link }, 2422 VOPNAME_RENAME, { .vop_rename = tmp_rename }, 2423 VOPNAME_MKDIR, { .vop_mkdir = tmp_mkdir }, 2424 VOPNAME_RMDIR, { .vop_rmdir = tmp_rmdir }, 2425 VOPNAME_READDIR, { .vop_readdir = tmp_readdir }, 2426 VOPNAME_SYMLINK, { .vop_symlink = tmp_symlink }, 2427 VOPNAME_READLINK, { .vop_readlink = tmp_readlink }, 2428 VOPNAME_FSYNC, { .vop_fsync = tmp_fsync }, 2429 VOPNAME_INACTIVE, { .vop_inactive = tmp_inactive }, 2430 VOPNAME_FID, { .vop_fid = tmp_fid }, 2431 VOPNAME_RWLOCK, { .vop_rwlock = tmp_rwlock }, 2432 VOPNAME_RWUNLOCK, { .vop_rwunlock = tmp_rwunlock }, 2433 VOPNAME_SEEK, { .vop_seek = tmp_seek }, 2434 VOPNAME_SPACE, { .vop_space = tmp_space }, 2435 VOPNAME_GETPAGE, { .vop_getpage = tmp_getpage }, 2436 VOPNAME_PUTPAGE, { .vop_putpage = tmp_putpage }, 2437 VOPNAME_MAP, { .vop_map = tmp_map }, 2438 VOPNAME_ADDMAP, { .vop_addmap = tmp_addmap }, 2439 VOPNAME_DELMAP, { .vop_delmap = tmp_delmap }, 2440 VOPNAME_PATHCONF, { .vop_pathconf = tmp_pathconf }, 2441 VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_support }, 2442 NULL, NULL 2443 }; 2444