1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <sys/param.h> 30 #include <sys/t_lock.h> 31 #include <sys/systm.h> 32 #include <sys/sysmacros.h> 33 #include <sys/user.h> 34 #include <sys/time.h> 35 #include <sys/vfs.h> 36 #include <sys/vnode.h> 37 #include <sys/file.h> 38 #include <sys/fcntl.h> 39 #include <sys/flock.h> 40 #include <sys/kmem.h> 41 #include <sys/uio.h> 42 #include <sys/errno.h> 43 #include <sys/stat.h> 44 #include <sys/cred.h> 45 #include <sys/dirent.h> 46 #include <sys/pathname.h> 47 #include <sys/vmsystm.h> 48 #include <sys/fs/tmp.h> 49 #include <sys/fs/tmpnode.h> 50 #include <sys/mman.h> 51 #include <vm/hat.h> 52 #include <vm/seg_vn.h> 53 #include <vm/seg_map.h> 54 #include <vm/seg.h> 55 #include <vm/anon.h> 56 #include <vm/as.h> 57 #include <vm/page.h> 58 #include <vm/pvn.h> 59 #include <sys/cmn_err.h> 60 #include <sys/debug.h> 61 #include <sys/swap.h> 62 #include <sys/buf.h> 63 #include <sys/vm.h> 64 #include <sys/vtrace.h> 65 #include <sys/policy.h> 66 #include <fs/fs_subr.h> 67 68 static int tmp_getapage(struct vnode *, u_offset_t, size_t, uint_t *, 69 page_t **, size_t, struct seg *, caddr_t, enum seg_rw, struct cred *); 70 static int tmp_putapage(struct vnode *, page_t *, u_offset_t *, size_t *, 71 int, struct cred *); 72 73 /* ARGSUSED1 */ 74 static int 75 tmp_open(struct vnode **vpp, int flag, struct cred *cred) 76 { 77 /* 78 * swapon to a tmpfs file is not supported so access 79 * is denied on open if VISSWAP is set. 80 */ 81 if ((*vpp)->v_flag & VISSWAP) 82 return (EINVAL); 83 return (0); 84 } 85 86 /* ARGSUSED1 */ 87 static int 88 tmp_close(struct vnode *vp, int flag, int count, 89 offset_t offset, struct cred *cred) 90 { 91 cleanlocks(vp, ttoproc(curthread)->p_pid, 0); 92 cleanshares(vp, ttoproc(curthread)->p_pid); 93 return (0); 94 } 95 96 /* 97 * wrtmp does the real work of write requests for tmpfs. 98 */ 99 static int 100 wrtmp( 101 struct tmount *tm, 102 struct tmpnode *tp, 103 struct uio *uio, 104 struct cred *cr, 105 struct caller_context *ct) 106 { 107 pgcnt_t pageoffset; /* offset in pages */ 108 ulong_t segmap_offset; /* pagesize byte offset into segmap */ 109 caddr_t base; /* base of segmap */ 110 ssize_t bytes; /* bytes to uiomove */ 111 pfn_t pagenumber; /* offset in pages into tmp file */ 112 struct vnode *vp; 113 int error = 0; 114 int pagecreate; /* == 1 if we allocated a page */ 115 int newpage; 116 rlim64_t limit = uio->uio_llimit; 117 long oresid = uio->uio_resid; 118 timestruc_t now; 119 120 /* 121 * tp->tn_size is incremented before the uiomove 122 * is done on a write. If the move fails (bad user 123 * address) reset tp->tn_size. 124 * The better way would be to increment tp->tn_size 125 * only if the uiomove succeeds. 126 */ 127 long tn_size_changed = 0; 128 long old_tn_size; 129 130 vp = TNTOV(tp); 131 ASSERT(vp->v_type == VREG); 132 133 TRACE_1(TR_FAC_TMPFS, TR_TMPFS_RWTMP_START, 134 "tmp_wrtmp_start:vp %p", vp); 135 136 ASSERT(RW_WRITE_HELD(&tp->tn_contents)); 137 ASSERT(RW_WRITE_HELD(&tp->tn_rwlock)); 138 139 if (MANDLOCK(vp, tp->tn_mode)) { 140 rw_exit(&tp->tn_contents); 141 /* 142 * tmp_getattr ends up being called by chklock 143 */ 144 error = chklock(vp, FWRITE, 145 uio->uio_loffset, uio->uio_resid, uio->uio_fmode, ct); 146 rw_enter(&tp->tn_contents, RW_WRITER); 147 if (error != 0) { 148 TRACE_2(TR_FAC_TMPFS, TR_TMPFS_RWTMP_END, 149 "tmp_wrtmp_end:vp %p error %d", vp, error); 150 return (error); 151 } 152 } 153 154 if (uio->uio_loffset < 0) 155 return (EINVAL); 156 157 if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T) 158 limit = MAXOFFSET_T; 159 160 if (uio->uio_loffset >= limit) { 161 proc_t *p = ttoproc(curthread); 162 163 mutex_enter(&p->p_lock); 164 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], p->p_rctls, 165 p, RCA_UNSAFE_SIGINFO); 166 mutex_exit(&p->p_lock); 167 return (EFBIG); 168 } 169 170 if (uio->uio_loffset >= MAXOFF_T) { 171 TRACE_2(TR_FAC_TMPFS, TR_TMPFS_RWTMP_END, 172 "tmp_wrtmp_end:vp %p error %d", vp, EINVAL); 173 return (EFBIG); 174 } 175 176 if (uio->uio_resid == 0) { 177 TRACE_2(TR_FAC_TMPFS, TR_TMPFS_RWTMP_END, 178 "tmp_wrtmp_end:vp %p error %d", vp, 0); 179 return (0); 180 } 181 182 if (limit > MAXOFF_T) 183 limit = MAXOFF_T; 184 185 do { 186 long offset; 187 long delta; 188 189 offset = (long)uio->uio_offset; 190 pageoffset = offset & PAGEOFFSET; 191 /* 192 * A maximum of PAGESIZE bytes of data is transferred 193 * each pass through this loop 194 */ 195 bytes = MIN(PAGESIZE - pageoffset, uio->uio_resid); 196 197 if (offset + bytes >= limit) { 198 if (offset >= limit) { 199 error = EFBIG; 200 goto out; 201 } 202 bytes = limit - offset; 203 } 204 pagenumber = btop(offset); 205 206 /* 207 * delta is the amount of anonymous memory 208 * to reserve for the file. 209 * We always reserve in pagesize increments so 210 * unless we're extending the file into a new page, 211 * we don't need to call tmp_resv. 212 */ 213 delta = offset + bytes - 214 P2ROUNDUP_TYPED(tp->tn_size, PAGESIZE, u_offset_t); 215 if (delta > 0) { 216 pagecreate = 1; 217 if (tmp_resv(tm, tp, delta, pagecreate)) { 218 /* 219 * Log file system full in the zone that owns 220 * the tmpfs mount, as well as in the global 221 * zone if necessary. 222 */ 223 zcmn_err(tm->tm_vfsp->vfs_zone->zone_id, 224 CE_WARN, "%s: File system full, " 225 "swap space limit exceeded", 226 tm->tm_mntpath); 227 228 if (tm->tm_vfsp->vfs_zone->zone_id != 229 GLOBAL_ZONEID) { 230 231 vfs_t *vfs = tm->tm_vfsp; 232 233 zcmn_err(GLOBAL_ZONEID, 234 CE_WARN, "%s: File system full, " 235 "swap space limit exceeded", 236 vfs->vfs_vnodecovered->v_path); 237 } 238 error = ENOSPC; 239 break; 240 } 241 tmpnode_growmap(tp, (ulong_t)offset + bytes); 242 } 243 /* grow the file to the new length */ 244 if (offset + bytes > tp->tn_size) { 245 tn_size_changed = 1; 246 old_tn_size = tp->tn_size; 247 tp->tn_size = offset + bytes; 248 } 249 if (bytes == PAGESIZE) { 250 /* 251 * Writing whole page so reading from disk 252 * is a waste 253 */ 254 pagecreate = 1; 255 } else { 256 pagecreate = 0; 257 } 258 /* 259 * If writing past EOF or filling in a hole 260 * we need to allocate an anon slot. 261 */ 262 if (anon_get_ptr(tp->tn_anon, pagenumber) == NULL) { 263 (void) anon_set_ptr(tp->tn_anon, pagenumber, 264 anon_alloc(vp, ptob(pagenumber)), ANON_SLEEP); 265 pagecreate = 1; 266 tp->tn_nblocks++; 267 } 268 269 /* 270 * We have to drop the contents lock to prevent the VM 271 * system from trying to reaquire it in tmp_getpage() 272 * should the uiomove cause a pagefault. If we're doing 273 * a pagecreate segmap creates the page without calling 274 * the filesystem so we need to hold onto the lock until 275 * the page is created. 276 */ 277 if (!pagecreate) 278 rw_exit(&tp->tn_contents); 279 280 newpage = 0; 281 if (vpm_enable) { 282 /* 283 * XXX Why do we need to hold the contents lock? 284 * The kpm mappings will not cause a fault. 285 * 286 * Copy data. If new pages are created, part of 287 * the page that is not written will be initizliazed 288 * with zeros. 289 */ 290 error = vpm_data_copy(vp, offset, bytes, uio, 291 !pagecreate, &newpage, 1, S_WRITE); 292 293 if (pagecreate) { 294 rw_exit(&tp->tn_contents); 295 } 296 } else { 297 /* Get offset within the segmap mapping */ 298 segmap_offset = (offset & PAGEMASK) & MAXBOFFSET; 299 base = segmap_getmapflt(segkmap, vp, 300 (offset & MAXBMASK), 301 PAGESIZE, !pagecreate, S_WRITE); 302 } 303 304 305 if (!vpm_enable && pagecreate) { 306 rw_downgrade(&tp->tn_contents); 307 308 /* 309 * segmap_pagecreate() returns 1 if it calls 310 * page_create_va() to allocate any pages. 311 */ 312 newpage = segmap_pagecreate(segkmap, 313 base + segmap_offset, (size_t)PAGESIZE, 0); 314 rw_exit(&tp->tn_contents); 315 /* 316 * Clear from the beginning of the page to the starting 317 * offset of the data. 318 */ 319 if (pageoffset != 0) 320 (void) kzero(base + segmap_offset, 321 (size_t)pageoffset); 322 } 323 324 if (!vpm_enable) { 325 error = uiomove(base + segmap_offset + pageoffset, 326 (long)bytes, UIO_WRITE, uio); 327 } 328 329 if (!vpm_enable && pagecreate && 330 uio->uio_offset < P2ROUNDUP(offset + bytes, PAGESIZE)) { 331 long zoffset; /* zero from offset into page */ 332 /* 333 * We created pages w/o initializing them completely, 334 * thus we need to zero the part that wasn't set up. 335 * This happens on most EOF write cases and if 336 * we had some sort of error during the uiomove. 337 */ 338 long nmoved; 339 340 nmoved = uio->uio_offset - offset; 341 ASSERT((nmoved + pageoffset) <= PAGESIZE); 342 343 /* 344 * Zero from the end of data in the page to the 345 * end of the page. 346 */ 347 if ((zoffset = pageoffset + nmoved) < PAGESIZE) 348 (void) kzero(base + segmap_offset + zoffset, 349 (size_t)PAGESIZE - zoffset); 350 } 351 352 /* 353 * Unlock the pages which have been allocated by 354 * page_create_va() in segmap_pagecreate() 355 */ 356 if (!vpm_enable && newpage) { 357 segmap_pageunlock(segkmap, base + segmap_offset, 358 (size_t)PAGESIZE, S_WRITE); 359 } 360 361 if (error) { 362 /* 363 * If we failed on a write, we must 364 * be sure to invalidate any pages that may have 365 * been allocated. 366 */ 367 if (vpm_enable) { 368 (void) vpm_sync_pages(vp, offset, 369 PAGESIZE, SM_INVAL); 370 } else { 371 (void) segmap_release(segkmap, base, SM_INVAL); 372 } 373 } else { 374 if (vpm_enable) { 375 error = vpm_sync_pages(vp, offset, 376 PAGESIZE, 0); 377 } else { 378 error = segmap_release(segkmap, base, 0); 379 } 380 } 381 382 /* 383 * Re-acquire contents lock. 384 */ 385 rw_enter(&tp->tn_contents, RW_WRITER); 386 /* 387 * If the uiomove failed, fix up tn_size. 388 */ 389 if (error) { 390 if (tn_size_changed) { 391 /* 392 * The uiomove failed, and we 393 * allocated blocks,so get rid 394 * of them. 395 */ 396 (void) tmpnode_trunc(tm, tp, 397 (ulong_t)old_tn_size); 398 } 399 } else { 400 /* 401 * XXX - Can this be out of the loop? 402 */ 403 if ((tp->tn_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) && 404 (tp->tn_mode & (S_ISUID | S_ISGID)) && 405 secpolicy_vnode_setid_retain(cr, 406 (tp->tn_mode & S_ISUID) != 0 && tp->tn_uid == 0)) { 407 /* 408 * Clear Set-UID & Set-GID bits on 409 * successful write if not privileged 410 * and at least one of the execute bits 411 * is set. If we always clear Set-GID, 412 * mandatory file and record locking is 413 * unuseable. 414 */ 415 tp->tn_mode &= ~(S_ISUID | S_ISGID); 416 } 417 gethrestime(&now); 418 tp->tn_mtime = now; 419 tp->tn_ctime = now; 420 } 421 } while (error == 0 && uio->uio_resid > 0 && bytes != 0); 422 423 out: 424 /* 425 * If we've already done a partial-write, terminate 426 * the write but return no error. 427 */ 428 if (oresid != uio->uio_resid) 429 error = 0; 430 TRACE_2(TR_FAC_TMPFS, TR_TMPFS_RWTMP_END, 431 "tmp_wrtmp_end:vp %p error %d", vp, error); 432 return (error); 433 } 434 435 /* 436 * rdtmp does the real work of read requests for tmpfs. 437 */ 438 static int 439 rdtmp( 440 struct tmount *tm, 441 struct tmpnode *tp, 442 struct uio *uio, 443 struct caller_context *ct) 444 { 445 ulong_t pageoffset; /* offset in tmpfs file (uio_offset) */ 446 ulong_t segmap_offset; /* pagesize byte offset into segmap */ 447 caddr_t base; /* base of segmap */ 448 ssize_t bytes; /* bytes to uiomove */ 449 struct vnode *vp; 450 int error; 451 long oresid = uio->uio_resid; 452 453 #if defined(lint) 454 tm = tm; 455 #endif 456 vp = TNTOV(tp); 457 458 TRACE_1(TR_FAC_TMPFS, TR_TMPFS_RWTMP_START, 459 "tmp_rdtmp_start:vp %p", vp); 460 461 ASSERT(RW_LOCK_HELD(&tp->tn_contents)); 462 463 if (MANDLOCK(vp, tp->tn_mode)) { 464 rw_exit(&tp->tn_contents); 465 /* 466 * tmp_getattr ends up being called by chklock 467 */ 468 error = chklock(vp, FREAD, 469 uio->uio_loffset, uio->uio_resid, uio->uio_fmode, ct); 470 rw_enter(&tp->tn_contents, RW_READER); 471 if (error != 0) { 472 TRACE_2(TR_FAC_TMPFS, TR_TMPFS_RWTMP_END, 473 "tmp_rdtmp_end:vp %p error %d", vp, error); 474 return (error); 475 } 476 } 477 ASSERT(tp->tn_type == VREG); 478 479 if (uio->uio_loffset >= MAXOFF_T) { 480 TRACE_2(TR_FAC_TMPFS, TR_TMPFS_RWTMP_END, 481 "tmp_rdtmp_end:vp %p error %d", vp, EINVAL); 482 return (0); 483 } 484 if (uio->uio_loffset < 0) 485 return (EINVAL); 486 if (uio->uio_resid == 0) { 487 TRACE_2(TR_FAC_TMPFS, TR_TMPFS_RWTMP_END, 488 "tmp_rdtmp_end:vp %p error %d", vp, 0); 489 return (0); 490 } 491 492 vp = TNTOV(tp); 493 494 do { 495 long diff; 496 long offset; 497 498 offset = uio->uio_offset; 499 pageoffset = offset & PAGEOFFSET; 500 bytes = MIN(PAGESIZE - pageoffset, uio->uio_resid); 501 502 diff = tp->tn_size - offset; 503 504 if (diff <= 0) { 505 error = 0; 506 goto out; 507 } 508 if (diff < bytes) 509 bytes = diff; 510 511 /* 512 * We have to drop the contents lock to prevent the VM 513 * system from trying to reaquire it in tmp_getpage() 514 * should the uiomove cause a pagefault. 515 */ 516 rw_exit(&tp->tn_contents); 517 518 if (vpm_enable) { 519 /* 520 * Copy data. 521 */ 522 error = vpm_data_copy(vp, offset, bytes, uio, 523 1, NULL, 0, S_READ); 524 } else { 525 segmap_offset = (offset & PAGEMASK) & MAXBOFFSET; 526 base = segmap_getmapflt(segkmap, vp, offset & MAXBMASK, 527 bytes, 1, S_READ); 528 529 error = uiomove(base + segmap_offset + pageoffset, 530 (long)bytes, UIO_READ, uio); 531 } 532 533 if (error) { 534 if (vpm_enable) { 535 (void) vpm_sync_pages(vp, offset, 536 PAGESIZE, 0); 537 } else { 538 (void) segmap_release(segkmap, base, 0); 539 } 540 } else { 541 if (vpm_enable) { 542 error = vpm_sync_pages(vp, offset, 543 PAGESIZE, 0); 544 } else { 545 error = segmap_release(segkmap, base, 0); 546 } 547 } 548 549 /* 550 * Re-acquire contents lock. 551 */ 552 rw_enter(&tp->tn_contents, RW_READER); 553 554 } while (error == 0 && uio->uio_resid > 0); 555 556 out: 557 gethrestime(&tp->tn_atime); 558 559 /* 560 * If we've already done a partial read, terminate 561 * the read but return no error. 562 */ 563 if (oresid != uio->uio_resid) 564 error = 0; 565 566 TRACE_2(TR_FAC_TMPFS, TR_TMPFS_RWTMP_END, 567 "tmp_rdtmp_end:vp %x error %d", vp, error); 568 return (error); 569 } 570 571 /* ARGSUSED2 */ 572 static int 573 tmp_read(struct vnode *vp, struct uio *uiop, int ioflag, cred_t *cred, 574 struct caller_context *ct) 575 { 576 struct tmpnode *tp = (struct tmpnode *)VTOTN(vp); 577 struct tmount *tm = (struct tmount *)VTOTM(vp); 578 int error; 579 580 /* 581 * We don't currently support reading non-regular files 582 */ 583 if (vp->v_type == VDIR) 584 return (EISDIR); 585 if (vp->v_type != VREG) 586 return (EINVAL); 587 /* 588 * tmp_rwlock should have already been called from layers above 589 */ 590 ASSERT(RW_READ_HELD(&tp->tn_rwlock)); 591 592 rw_enter(&tp->tn_contents, RW_READER); 593 594 error = rdtmp(tm, tp, uiop, ct); 595 596 rw_exit(&tp->tn_contents); 597 598 return (error); 599 } 600 601 static int 602 tmp_write(struct vnode *vp, struct uio *uiop, int ioflag, struct cred *cred, 603 struct caller_context *ct) 604 { 605 struct tmpnode *tp = (struct tmpnode *)VTOTN(vp); 606 struct tmount *tm = (struct tmount *)VTOTM(vp); 607 int error; 608 609 /* 610 * We don't currently support writing to non-regular files 611 */ 612 if (vp->v_type != VREG) 613 return (EINVAL); /* XXX EISDIR? */ 614 615 /* 616 * tmp_rwlock should have already been called from layers above 617 */ 618 ASSERT(RW_WRITE_HELD(&tp->tn_rwlock)); 619 620 rw_enter(&tp->tn_contents, RW_WRITER); 621 622 if (ioflag & FAPPEND) { 623 /* 624 * In append mode start at end of file. 625 */ 626 uiop->uio_loffset = tp->tn_size; 627 } 628 629 error = wrtmp(tm, tp, uiop, cred, ct); 630 631 rw_exit(&tp->tn_contents); 632 633 return (error); 634 } 635 636 /* ARGSUSED */ 637 static int 638 tmp_ioctl(struct vnode *vp, int com, intptr_t data, int flag, 639 struct cred *cred, int *rvalp) 640 { 641 return (ENOTTY); 642 } 643 644 /* ARGSUSED2 */ 645 static int 646 tmp_getattr(struct vnode *vp, struct vattr *vap, int flags, struct cred *cred) 647 { 648 struct tmpnode *tp = (struct tmpnode *)VTOTN(vp); 649 struct vnode *mvp; 650 struct vattr va; 651 int attrs = 1; 652 653 /* 654 * A special case to handle the root tnode on a diskless nfs 655 * client who may have had its uid and gid inherited 656 * from an nfs vnode with nobody ownership. Likely the 657 * root filesystem. After nfs is fully functional the uid/gid 658 * may be mapable so ask again. 659 * vfsp can't get unmounted because we hold vp. 660 */ 661 if (vp->v_flag & VROOT && 662 (mvp = vp->v_vfsp->vfs_vnodecovered) != NULL) { 663 mutex_enter(&tp->tn_tlock); 664 if (tp->tn_uid == UID_NOBODY || tp->tn_gid == GID_NOBODY) { 665 mutex_exit(&tp->tn_tlock); 666 bzero(&va, sizeof (struct vattr)); 667 va.va_mask = AT_UID|AT_GID; 668 attrs = VOP_GETATTR(mvp, &va, 0, cred); 669 } else { 670 mutex_exit(&tp->tn_tlock); 671 } 672 } 673 mutex_enter(&tp->tn_tlock); 674 if (attrs == 0) { 675 tp->tn_uid = va.va_uid; 676 tp->tn_gid = va.va_gid; 677 } 678 vap->va_type = vp->v_type; 679 vap->va_mode = tp->tn_mode & MODEMASK; 680 vap->va_uid = tp->tn_uid; 681 vap->va_gid = tp->tn_gid; 682 vap->va_fsid = tp->tn_fsid; 683 vap->va_nodeid = (ino64_t)tp->tn_nodeid; 684 vap->va_nlink = tp->tn_nlink; 685 vap->va_size = (u_offset_t)tp->tn_size; 686 vap->va_atime = tp->tn_atime; 687 vap->va_mtime = tp->tn_mtime; 688 vap->va_ctime = tp->tn_ctime; 689 vap->va_blksize = PAGESIZE; 690 vap->va_rdev = tp->tn_rdev; 691 vap->va_seq = tp->tn_seq; 692 693 /* 694 * XXX Holes are not taken into account. We could take the time to 695 * run through the anon array looking for allocated slots... 696 */ 697 vap->va_nblocks = (fsblkcnt64_t)btodb(ptob(btopr(vap->va_size))); 698 mutex_exit(&tp->tn_tlock); 699 return (0); 700 } 701 702 /*ARGSUSED4*/ 703 static int 704 tmp_setattr( 705 struct vnode *vp, 706 struct vattr *vap, 707 int flags, 708 struct cred *cred, 709 caller_context_t *ct) 710 { 711 struct tmount *tm = (struct tmount *)VTOTM(vp); 712 struct tmpnode *tp = (struct tmpnode *)VTOTN(vp); 713 int error = 0; 714 struct vattr *get; 715 long mask; 716 717 /* 718 * Cannot set these attributes 719 */ 720 if (vap->va_mask & AT_NOSET) 721 return (EINVAL); 722 723 mutex_enter(&tp->tn_tlock); 724 725 get = &tp->tn_attr; 726 /* 727 * Change file access modes. Must be owner or have sufficient 728 * privileges. 729 */ 730 error = secpolicy_vnode_setattr(cred, vp, vap, get, flags, 731 tmp_taccess, tp); 732 733 if (error) 734 goto out; 735 736 mask = vap->va_mask; 737 738 if (mask & AT_MODE) { 739 get->va_mode &= S_IFMT; 740 get->va_mode |= vap->va_mode & ~S_IFMT; 741 } 742 743 if (mask & AT_UID) 744 get->va_uid = vap->va_uid; 745 if (mask & AT_GID) 746 get->va_gid = vap->va_gid; 747 if (mask & AT_ATIME) 748 get->va_atime = vap->va_atime; 749 if (mask & AT_MTIME) 750 get->va_mtime = vap->va_mtime; 751 752 if (mask & (AT_UID | AT_GID | AT_MODE | AT_MTIME)) 753 gethrestime(&tp->tn_ctime); 754 755 if (mask & AT_SIZE) { 756 ASSERT(vp->v_type != VDIR); 757 758 /* Don't support large files. */ 759 if (vap->va_size > MAXOFF_T) { 760 error = EFBIG; 761 goto out; 762 } 763 mutex_exit(&tp->tn_tlock); 764 765 rw_enter(&tp->tn_rwlock, RW_WRITER); 766 rw_enter(&tp->tn_contents, RW_WRITER); 767 error = tmpnode_trunc(tm, tp, (ulong_t)vap->va_size); 768 rw_exit(&tp->tn_contents); 769 rw_exit(&tp->tn_rwlock); 770 goto out1; 771 } 772 out: 773 mutex_exit(&tp->tn_tlock); 774 out1: 775 return (error); 776 } 777 778 /* ARGSUSED2 */ 779 static int 780 tmp_access(struct vnode *vp, int mode, int flags, struct cred *cred) 781 { 782 struct tmpnode *tp = (struct tmpnode *)VTOTN(vp); 783 int error; 784 785 mutex_enter(&tp->tn_tlock); 786 error = tmp_taccess(tp, mode, cred); 787 mutex_exit(&tp->tn_tlock); 788 return (error); 789 } 790 791 /* ARGSUSED3 */ 792 static int 793 tmp_lookup( 794 struct vnode *dvp, 795 char *nm, 796 struct vnode **vpp, 797 struct pathname *pnp, 798 int flags, 799 struct vnode *rdir, 800 struct cred *cred) 801 { 802 struct tmpnode *tp = (struct tmpnode *)VTOTN(dvp); 803 struct tmpnode *ntp = NULL; 804 int error; 805 806 807 /* allow cd into @ dir */ 808 if (flags & LOOKUP_XATTR) { 809 struct tmpnode *xdp; 810 struct tmount *tm; 811 812 if (tp->tn_flags & ISXATTR) 813 /* No attributes on attributes */ 814 return (EINVAL); 815 816 rw_enter(&tp->tn_rwlock, RW_WRITER); 817 if (tp->tn_xattrdp == NULL) { 818 if (!(flags & CREATE_XATTR_DIR)) { 819 rw_exit(&tp->tn_rwlock); 820 return (ENOENT); 821 } 822 823 /* 824 * No attribute directory exists for this 825 * node - create the attr dir as a side effect 826 * of this lookup. 827 */ 828 829 /* 830 * Make sure we have adequate permission... 831 */ 832 833 if ((error = tmp_taccess(tp, VWRITE, cred)) != 0) { 834 rw_exit(&tp->tn_rwlock); 835 return (error); 836 } 837 838 xdp = tmp_memalloc(sizeof (struct tmpnode), 839 TMP_MUSTHAVE); 840 tm = VTOTM(dvp); 841 tmpnode_init(tm, xdp, &tp->tn_attr, NULL); 842 /* 843 * Fix-up fields unique to attribute directories. 844 */ 845 xdp->tn_flags = ISXATTR; 846 xdp->tn_type = VDIR; 847 if (tp->tn_type == VDIR) { 848 xdp->tn_mode = tp->tn_attr.va_mode; 849 } else { 850 xdp->tn_mode = 0700; 851 if (tp->tn_attr.va_mode & 0040) 852 xdp->tn_mode |= 0750; 853 if (tp->tn_attr.va_mode & 0004) 854 xdp->tn_mode |= 0705; 855 } 856 xdp->tn_vnode->v_type = VDIR; 857 xdp->tn_vnode->v_flag |= V_XATTRDIR; 858 tdirinit(tp, xdp); 859 tp->tn_xattrdp = xdp; 860 } else { 861 VN_HOLD(tp->tn_xattrdp->tn_vnode); 862 } 863 *vpp = TNTOV(tp->tn_xattrdp); 864 rw_exit(&tp->tn_rwlock); 865 return (0); 866 } 867 868 /* 869 * Null component name is a synonym for directory being searched. 870 */ 871 if (*nm == '\0') { 872 VN_HOLD(dvp); 873 *vpp = dvp; 874 return (0); 875 } 876 ASSERT(tp); 877 878 error = tdirlookup(tp, nm, &ntp, cred); 879 880 if (error == 0) { 881 ASSERT(ntp); 882 *vpp = TNTOV(ntp); 883 /* 884 * If vnode is a device return special vnode instead 885 */ 886 if (IS_DEVVP(*vpp)) { 887 struct vnode *newvp; 888 889 newvp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, 890 cred); 891 VN_RELE(*vpp); 892 *vpp = newvp; 893 } 894 } 895 TRACE_4(TR_FAC_TMPFS, TR_TMPFS_LOOKUP, 896 "tmpfs lookup:vp %p name %s vpp %p error %d", 897 dvp, nm, vpp, error); 898 return (error); 899 } 900 901 /*ARGSUSED7*/ 902 static int 903 tmp_create( 904 struct vnode *dvp, 905 char *nm, 906 struct vattr *vap, 907 enum vcexcl exclusive, 908 int mode, 909 struct vnode **vpp, 910 struct cred *cred, 911 int flag) 912 { 913 struct tmpnode *parent; 914 struct tmount *tm; 915 struct tmpnode *self; 916 int error; 917 struct tmpnode *oldtp; 918 919 again: 920 parent = (struct tmpnode *)VTOTN(dvp); 921 tm = (struct tmount *)VTOTM(dvp); 922 self = NULL; 923 error = 0; 924 oldtp = NULL; 925 926 /* device files not allowed in ext. attr dirs */ 927 if ((parent->tn_flags & ISXATTR) && 928 (vap->va_type == VBLK || vap->va_type == VCHR || 929 vap->va_type == VFIFO || vap->va_type == VDOOR || 930 vap->va_type == VSOCK || vap->va_type == VPORT)) 931 return (EINVAL); 932 933 if (vap->va_type == VREG && (vap->va_mode & VSVTX)) { 934 /* Must be privileged to set sticky bit */ 935 if (secpolicy_vnode_stky_modify(cred)) 936 vap->va_mode &= ~VSVTX; 937 } else if (vap->va_type == VNON) { 938 return (EINVAL); 939 } 940 941 /* 942 * Null component name is a synonym for directory being searched. 943 */ 944 if (*nm == '\0') { 945 VN_HOLD(dvp); 946 oldtp = parent; 947 } else { 948 error = tdirlookup(parent, nm, &oldtp, cred); 949 } 950 951 if (error == 0) { /* name found */ 952 ASSERT(oldtp); 953 954 rw_enter(&oldtp->tn_rwlock, RW_WRITER); 955 956 /* 957 * if create/read-only an existing 958 * directory, allow it 959 */ 960 if (exclusive == EXCL) 961 error = EEXIST; 962 else if ((oldtp->tn_type == VDIR) && (mode & VWRITE)) 963 error = EISDIR; 964 else { 965 error = tmp_taccess(oldtp, mode, cred); 966 } 967 968 if (error) { 969 rw_exit(&oldtp->tn_rwlock); 970 tmpnode_rele(oldtp); 971 return (error); 972 } 973 *vpp = TNTOV(oldtp); 974 if ((*vpp)->v_type == VREG && (vap->va_mask & AT_SIZE) && 975 vap->va_size == 0) { 976 rw_enter(&oldtp->tn_contents, RW_WRITER); 977 (void) tmpnode_trunc(tm, oldtp, 0); 978 rw_exit(&oldtp->tn_contents); 979 } 980 rw_exit(&oldtp->tn_rwlock); 981 if (IS_DEVVP(*vpp)) { 982 struct vnode *newvp; 983 984 newvp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, 985 cred); 986 VN_RELE(*vpp); 987 if (newvp == NULL) { 988 return (ENOSYS); 989 } 990 *vpp = newvp; 991 } 992 return (0); 993 } 994 995 if (error != ENOENT) 996 return (error); 997 998 rw_enter(&parent->tn_rwlock, RW_WRITER); 999 error = tdirenter(tm, parent, nm, DE_CREATE, 1000 (struct tmpnode *)NULL, (struct tmpnode *)NULL, 1001 vap, &self, cred); 1002 rw_exit(&parent->tn_rwlock); 1003 1004 if (error) { 1005 if (self) 1006 tmpnode_rele(self); 1007 1008 if (error == EEXIST) { 1009 /* 1010 * This means that the file was created sometime 1011 * after we checked and did not find it and when 1012 * we went to create it. 1013 * Since creat() is supposed to truncate a file 1014 * that already exits go back to the begining 1015 * of the function. This time we will find it 1016 * and go down the tmp_trunc() path 1017 */ 1018 goto again; 1019 } 1020 return (error); 1021 } 1022 1023 *vpp = TNTOV(self); 1024 1025 if (!error && IS_DEVVP(*vpp)) { 1026 struct vnode *newvp; 1027 1028 newvp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, cred); 1029 VN_RELE(*vpp); 1030 if (newvp == NULL) 1031 return (ENOSYS); 1032 *vpp = newvp; 1033 } 1034 TRACE_3(TR_FAC_TMPFS, TR_TMPFS_CREATE, 1035 "tmpfs create:dvp %p nm %s vpp %p", dvp, nm, vpp); 1036 return (0); 1037 } 1038 1039 static int 1040 tmp_remove(struct vnode *dvp, char *nm, struct cred *cred) 1041 { 1042 struct tmpnode *parent = (struct tmpnode *)VTOTN(dvp); 1043 int error; 1044 struct tmpnode *tp = NULL; 1045 1046 error = tdirlookup(parent, nm, &tp, cred); 1047 if (error) 1048 return (error); 1049 1050 ASSERT(tp); 1051 rw_enter(&parent->tn_rwlock, RW_WRITER); 1052 rw_enter(&tp->tn_rwlock, RW_WRITER); 1053 1054 if (tp->tn_type != VDIR || 1055 (error = secpolicy_fs_linkdir(cred, dvp->v_vfsp)) == 0) 1056 error = tdirdelete(parent, tp, nm, DR_REMOVE, cred); 1057 1058 rw_exit(&tp->tn_rwlock); 1059 rw_exit(&parent->tn_rwlock); 1060 vnevent_remove(TNTOV(tp)); 1061 tmpnode_rele(tp); 1062 1063 TRACE_3(TR_FAC_TMPFS, TR_TMPFS_REMOVE, 1064 "tmpfs remove:dvp %p nm %s error %d", dvp, nm, error); 1065 return (error); 1066 } 1067 1068 static int 1069 tmp_link(struct vnode *dvp, struct vnode *srcvp, char *tnm, struct cred *cred) 1070 { 1071 struct tmpnode *parent; 1072 struct tmpnode *from; 1073 struct tmount *tm = (struct tmount *)VTOTM(dvp); 1074 int error; 1075 struct tmpnode *found = NULL; 1076 struct vnode *realvp; 1077 1078 if (VOP_REALVP(srcvp, &realvp) == 0) 1079 srcvp = realvp; 1080 1081 parent = (struct tmpnode *)VTOTN(dvp); 1082 from = (struct tmpnode *)VTOTN(srcvp); 1083 1084 if ((srcvp->v_type == VDIR && 1085 secpolicy_fs_linkdir(cred, dvp->v_vfsp)) || 1086 (from->tn_uid != crgetuid(cred) && secpolicy_basic_link(cred))) 1087 return (EPERM); 1088 1089 /* 1090 * Make sure link for extended attributes is valid 1091 * We only support hard linking of xattr's in xattrdir to an xattrdir 1092 */ 1093 if ((from->tn_flags & ISXATTR) != (parent->tn_flags & ISXATTR)) 1094 return (EINVAL); 1095 1096 error = tdirlookup(parent, tnm, &found, cred); 1097 if (error == 0) { 1098 ASSERT(found); 1099 tmpnode_rele(found); 1100 return (EEXIST); 1101 } 1102 1103 if (error != ENOENT) 1104 return (error); 1105 1106 rw_enter(&parent->tn_rwlock, RW_WRITER); 1107 error = tdirenter(tm, parent, tnm, DE_LINK, (struct tmpnode *)NULL, 1108 from, NULL, (struct tmpnode **)NULL, cred); 1109 rw_exit(&parent->tn_rwlock); 1110 return (error); 1111 } 1112 1113 static int 1114 tmp_rename( 1115 struct vnode *odvp, /* source parent vnode */ 1116 char *onm, /* source name */ 1117 struct vnode *ndvp, /* destination parent vnode */ 1118 char *nnm, /* destination name */ 1119 struct cred *cred) 1120 { 1121 struct tmpnode *fromparent; 1122 struct tmpnode *toparent; 1123 struct tmpnode *fromtp = NULL; /* source tmpnode */ 1124 struct tmount *tm = (struct tmount *)VTOTM(odvp); 1125 int error; 1126 int samedir = 0; /* set if odvp == ndvp */ 1127 struct vnode *realvp; 1128 1129 if (VOP_REALVP(ndvp, &realvp) == 0) 1130 ndvp = realvp; 1131 1132 fromparent = (struct tmpnode *)VTOTN(odvp); 1133 toparent = (struct tmpnode *)VTOTN(ndvp); 1134 1135 if ((fromparent->tn_flags & ISXATTR) != (toparent->tn_flags & ISXATTR)) 1136 return (EINVAL); 1137 1138 mutex_enter(&tm->tm_renamelck); 1139 1140 /* 1141 * Look up tmpnode of file we're supposed to rename. 1142 */ 1143 error = tdirlookup(fromparent, onm, &fromtp, cred); 1144 if (error) { 1145 mutex_exit(&tm->tm_renamelck); 1146 return (error); 1147 } 1148 1149 /* 1150 * Make sure we can delete the old (source) entry. This 1151 * requires write permission on the containing directory. If 1152 * that directory is "sticky" it requires further checks. 1153 */ 1154 if (((error = tmp_taccess(fromparent, VWRITE, cred)) != 0) || 1155 (error = tmp_sticky_remove_access(fromparent, fromtp, cred)) != 0) 1156 goto done; 1157 1158 /* 1159 * Check for renaming to or from '.' or '..' or that 1160 * fromtp == fromparent 1161 */ 1162 if ((onm[0] == '.' && 1163 (onm[1] == '\0' || (onm[1] == '.' && onm[2] == '\0'))) || 1164 (nnm[0] == '.' && 1165 (nnm[1] == '\0' || (nnm[1] == '.' && nnm[2] == '\0'))) || 1166 (fromparent == fromtp)) { 1167 error = EINVAL; 1168 goto done; 1169 } 1170 1171 samedir = (fromparent == toparent); 1172 /* 1173 * Make sure we can search and rename into the new 1174 * (destination) directory. 1175 */ 1176 if (!samedir) { 1177 error = tmp_taccess(toparent, VEXEC|VWRITE, cred); 1178 if (error) 1179 goto done; 1180 } 1181 1182 /* 1183 * Link source to new target 1184 */ 1185 rw_enter(&toparent->tn_rwlock, RW_WRITER); 1186 error = tdirenter(tm, toparent, nnm, DE_RENAME, 1187 fromparent, fromtp, (struct vattr *)NULL, 1188 (struct tmpnode **)NULL, cred); 1189 rw_exit(&toparent->tn_rwlock); 1190 1191 if (error) { 1192 /* 1193 * ESAME isn't really an error; it indicates that the 1194 * operation should not be done because the source and target 1195 * are the same file, but that no error should be reported. 1196 */ 1197 if (error == ESAME) 1198 error = 0; 1199 goto done; 1200 } 1201 1202 /* 1203 * Unlink from source. 1204 */ 1205 rw_enter(&fromparent->tn_rwlock, RW_WRITER); 1206 rw_enter(&fromtp->tn_rwlock, RW_WRITER); 1207 1208 error = tdirdelete(fromparent, fromtp, onm, DR_RENAME, cred); 1209 1210 /* 1211 * The following handles the case where our source tmpnode was 1212 * removed before we got to it. 1213 * 1214 * XXX We should also cleanup properly in the case where tdirdelete 1215 * fails for some other reason. Currently this case shouldn't happen. 1216 * (see 1184991). 1217 */ 1218 if (error == ENOENT) 1219 error = 0; 1220 1221 rw_exit(&fromtp->tn_rwlock); 1222 rw_exit(&fromparent->tn_rwlock); 1223 done: 1224 tmpnode_rele(fromtp); 1225 mutex_exit(&tm->tm_renamelck); 1226 1227 TRACE_5(TR_FAC_TMPFS, TR_TMPFS_RENAME, 1228 "tmpfs rename:ovp %p onm %s nvp %p nnm %s error %d", 1229 odvp, onm, ndvp, nnm, error); 1230 return (error); 1231 } 1232 1233 static int 1234 tmp_mkdir( 1235 struct vnode *dvp, 1236 char *nm, 1237 struct vattr *va, 1238 struct vnode **vpp, 1239 struct cred *cred) 1240 { 1241 struct tmpnode *parent = (struct tmpnode *)VTOTN(dvp); 1242 struct tmpnode *self = NULL; 1243 struct tmount *tm = (struct tmount *)VTOTM(dvp); 1244 int error; 1245 1246 /* no new dirs allowed in xattr dirs */ 1247 if (parent->tn_flags & ISXATTR) 1248 return (EINVAL); 1249 1250 /* 1251 * Might be dangling directory. Catch it here, 1252 * because a ENOENT return from tdirlookup() is 1253 * an "o.k. return". 1254 */ 1255 if (parent->tn_nlink == 0) 1256 return (ENOENT); 1257 1258 error = tdirlookup(parent, nm, &self, cred); 1259 if (error == 0) { 1260 ASSERT(self); 1261 tmpnode_rele(self); 1262 return (EEXIST); 1263 } 1264 if (error != ENOENT) 1265 return (error); 1266 1267 rw_enter(&parent->tn_rwlock, RW_WRITER); 1268 error = tdirenter(tm, parent, nm, DE_MKDIR, 1269 (struct tmpnode *)NULL, (struct tmpnode *)NULL, va, 1270 &self, cred); 1271 if (error) { 1272 rw_exit(&parent->tn_rwlock); 1273 if (self) 1274 tmpnode_rele(self); 1275 return (error); 1276 } 1277 rw_exit(&parent->tn_rwlock); 1278 *vpp = TNTOV(self); 1279 return (0); 1280 } 1281 1282 static int 1283 tmp_rmdir( 1284 struct vnode *dvp, 1285 char *nm, 1286 struct vnode *cdir, 1287 struct cred *cred) 1288 { 1289 struct tmpnode *parent = (struct tmpnode *)VTOTN(dvp); 1290 struct tmpnode *self = NULL; 1291 struct vnode *vp; 1292 int error = 0; 1293 1294 /* 1295 * Return error when removing . and .. 1296 */ 1297 if (strcmp(nm, ".") == 0) 1298 return (EINVAL); 1299 if (strcmp(nm, "..") == 0) 1300 return (EEXIST); /* Should be ENOTEMPTY */ 1301 error = tdirlookup(parent, nm, &self, cred); 1302 if (error) 1303 return (error); 1304 1305 rw_enter(&parent->tn_rwlock, RW_WRITER); 1306 rw_enter(&self->tn_rwlock, RW_WRITER); 1307 1308 vp = TNTOV(self); 1309 if (vp == dvp || vp == cdir) { 1310 error = EINVAL; 1311 goto done1; 1312 } 1313 if (self->tn_type != VDIR) { 1314 error = ENOTDIR; 1315 goto done1; 1316 } 1317 1318 mutex_enter(&self->tn_tlock); 1319 if (self->tn_nlink > 2) { 1320 mutex_exit(&self->tn_tlock); 1321 error = EEXIST; 1322 goto done1; 1323 } 1324 mutex_exit(&self->tn_tlock); 1325 1326 if (vn_vfswlock(vp)) { 1327 error = EBUSY; 1328 goto done1; 1329 } 1330 if (vn_mountedvfs(vp) != NULL) { 1331 error = EBUSY; 1332 goto done; 1333 } 1334 1335 /* 1336 * Check for an empty directory 1337 * i.e. only includes entries for "." and ".." 1338 */ 1339 if (self->tn_dirents > 2) { 1340 error = EEXIST; /* SIGH should be ENOTEMPTY */ 1341 /* 1342 * Update atime because checking tn_dirents is logically 1343 * equivalent to reading the directory 1344 */ 1345 gethrestime(&self->tn_atime); 1346 goto done; 1347 } 1348 1349 error = tdirdelete(parent, self, nm, DR_RMDIR, cred); 1350 done: 1351 vn_vfsunlock(vp); 1352 done1: 1353 rw_exit(&self->tn_rwlock); 1354 rw_exit(&parent->tn_rwlock); 1355 vnevent_rmdir(TNTOV(self)); 1356 tmpnode_rele(self); 1357 1358 return (error); 1359 } 1360 1361 /* ARGSUSED2 */ 1362 1363 static int 1364 tmp_readdir(struct vnode *vp, struct uio *uiop, struct cred *cred, int *eofp) 1365 { 1366 struct tmpnode *tp = (struct tmpnode *)VTOTN(vp); 1367 struct tdirent *tdp; 1368 int error = 0; 1369 size_t namelen; 1370 struct dirent64 *dp; 1371 ulong_t offset; 1372 ulong_t total_bytes_wanted; 1373 long outcount = 0; 1374 long bufsize; 1375 int reclen; 1376 caddr_t outbuf; 1377 1378 if (uiop->uio_loffset >= MAXOFF_T) { 1379 if (eofp) 1380 *eofp = 1; 1381 return (0); 1382 } 1383 /* 1384 * assuming system call has already called tmp_rwlock 1385 */ 1386 ASSERT(RW_READ_HELD(&tp->tn_rwlock)); 1387 1388 if (uiop->uio_iovcnt != 1) 1389 return (EINVAL); 1390 1391 if (vp->v_type != VDIR) 1392 return (ENOTDIR); 1393 1394 /* 1395 * There's a window here where someone could have removed 1396 * all the entries in the directory after we put a hold on the 1397 * vnode but before we grabbed the rwlock. Just return. 1398 */ 1399 if (tp->tn_dir == NULL) { 1400 if (tp->tn_nlink) { 1401 panic("empty directory 0x%p", (void *)tp); 1402 /*NOTREACHED*/ 1403 } 1404 return (0); 1405 } 1406 1407 /* 1408 * Get space for multiple directory entries 1409 */ 1410 total_bytes_wanted = uiop->uio_iov->iov_len; 1411 bufsize = total_bytes_wanted + sizeof (struct dirent64); 1412 outbuf = kmem_alloc(bufsize, KM_SLEEP); 1413 1414 dp = (struct dirent64 *)outbuf; 1415 1416 1417 offset = 0; 1418 tdp = tp->tn_dir; 1419 while (tdp) { 1420 namelen = strlen(tdp->td_name); /* no +1 needed */ 1421 offset = tdp->td_offset; 1422 if (offset >= uiop->uio_offset) { 1423 reclen = (int)DIRENT64_RECLEN(namelen); 1424 if (outcount + reclen > total_bytes_wanted) { 1425 if (!outcount) 1426 /* 1427 * Buffer too small for any entries. 1428 */ 1429 error = EINVAL; 1430 break; 1431 } 1432 ASSERT(tdp->td_tmpnode != NULL); 1433 1434 /* use strncpy(9f) to zero out uninitialized bytes */ 1435 1436 (void) strncpy(dp->d_name, tdp->td_name, 1437 DIRENT64_NAMELEN(reclen)); 1438 dp->d_reclen = (ushort_t)reclen; 1439 dp->d_ino = (ino64_t)tdp->td_tmpnode->tn_nodeid; 1440 dp->d_off = (offset_t)tdp->td_offset + 1; 1441 dp = (struct dirent64 *) 1442 ((uintptr_t)dp + dp->d_reclen); 1443 outcount += reclen; 1444 ASSERT(outcount <= bufsize); 1445 } 1446 tdp = tdp->td_next; 1447 } 1448 1449 if (!error) 1450 error = uiomove(outbuf, outcount, UIO_READ, uiop); 1451 1452 if (!error) { 1453 /* If we reached the end of the list our offset */ 1454 /* should now be just past the end. */ 1455 if (!tdp) { 1456 offset += 1; 1457 if (eofp) 1458 *eofp = 1; 1459 } else if (eofp) 1460 *eofp = 0; 1461 uiop->uio_offset = offset; 1462 } 1463 gethrestime(&tp->tn_atime); 1464 kmem_free(outbuf, bufsize); 1465 return (error); 1466 } 1467 1468 static int 1469 tmp_symlink( 1470 struct vnode *dvp, 1471 char *lnm, 1472 struct vattr *tva, 1473 char *tnm, 1474 struct cred *cred) 1475 { 1476 struct tmpnode *parent = (struct tmpnode *)VTOTN(dvp); 1477 struct tmpnode *self = (struct tmpnode *)NULL; 1478 struct tmount *tm = (struct tmount *)VTOTM(dvp); 1479 char *cp = NULL; 1480 int error; 1481 size_t len; 1482 1483 /* no symlinks allowed to files in xattr dirs */ 1484 if (parent->tn_flags & ISXATTR) 1485 return (EINVAL); 1486 1487 error = tdirlookup(parent, lnm, &self, cred); 1488 if (error == 0) { 1489 /* 1490 * The entry already exists 1491 */ 1492 tmpnode_rele(self); 1493 return (EEXIST); /* was 0 */ 1494 } 1495 1496 if (error != ENOENT) { 1497 if (self != NULL) 1498 tmpnode_rele(self); 1499 return (error); 1500 } 1501 1502 rw_enter(&parent->tn_rwlock, RW_WRITER); 1503 error = tdirenter(tm, parent, lnm, DE_CREATE, (struct tmpnode *)NULL, 1504 (struct tmpnode *)NULL, tva, &self, cred); 1505 rw_exit(&parent->tn_rwlock); 1506 1507 if (error) { 1508 if (self) 1509 tmpnode_rele(self); 1510 return (error); 1511 } 1512 len = strlen(tnm) + 1; 1513 cp = tmp_memalloc(len, 0); 1514 if (cp == NULL) { 1515 tmpnode_rele(self); 1516 return (ENOSPC); 1517 } 1518 (void) strcpy(cp, tnm); 1519 1520 self->tn_symlink = cp; 1521 self->tn_size = len - 1; 1522 tmpnode_rele(self); 1523 return (error); 1524 } 1525 1526 /* ARGSUSED2 */ 1527 static int 1528 tmp_readlink(struct vnode *vp, struct uio *uiop, struct cred *cred) 1529 { 1530 struct tmpnode *tp = (struct tmpnode *)VTOTN(vp); 1531 int error = 0; 1532 1533 if (vp->v_type != VLNK) 1534 return (EINVAL); 1535 1536 rw_enter(&tp->tn_rwlock, RW_READER); 1537 rw_enter(&tp->tn_contents, RW_READER); 1538 error = uiomove(tp->tn_symlink, tp->tn_size, UIO_READ, uiop); 1539 gethrestime(&tp->tn_atime); 1540 rw_exit(&tp->tn_contents); 1541 rw_exit(&tp->tn_rwlock); 1542 return (error); 1543 } 1544 1545 /* ARGSUSED */ 1546 static int 1547 tmp_fsync(struct vnode *vp, int syncflag, struct cred *cred) 1548 { 1549 return (0); 1550 } 1551 1552 /* ARGSUSED */ 1553 static void 1554 tmp_inactive(struct vnode *vp, struct cred *cred) 1555 { 1556 struct tmpnode *tp = (struct tmpnode *)VTOTN(vp); 1557 struct tmount *tm = (struct tmount *)VFSTOTM(vp->v_vfsp); 1558 1559 rw_enter(&tp->tn_rwlock, RW_WRITER); 1560 top: 1561 mutex_enter(&tp->tn_tlock); 1562 mutex_enter(&vp->v_lock); 1563 ASSERT(vp->v_count >= 1); 1564 1565 /* 1566 * If we don't have the last hold or the link count is non-zero, 1567 * there's little to do -- just drop our hold. 1568 */ 1569 if (vp->v_count > 1 || tp->tn_nlink != 0) { 1570 vp->v_count--; 1571 mutex_exit(&vp->v_lock); 1572 mutex_exit(&tp->tn_tlock); 1573 rw_exit(&tp->tn_rwlock); 1574 return; 1575 } 1576 1577 /* 1578 * We have the last hold *and* the link count is zero, so this 1579 * tmpnode is dead from the filesystem's viewpoint. However, 1580 * if the tmpnode has any pages associated with it (i.e. if it's 1581 * a normal file with non-zero size), the tmpnode can still be 1582 * discovered by pageout or fsflush via the page vnode pointers. 1583 * In this case we must drop all our locks, truncate the tmpnode, 1584 * and try the whole dance again. 1585 */ 1586 if (tp->tn_size != 0) { 1587 if (tp->tn_type == VREG) { 1588 mutex_exit(&vp->v_lock); 1589 mutex_exit(&tp->tn_tlock); 1590 rw_enter(&tp->tn_contents, RW_WRITER); 1591 (void) tmpnode_trunc(tm, tp, 0); 1592 rw_exit(&tp->tn_contents); 1593 ASSERT(tp->tn_size == 0); 1594 ASSERT(tp->tn_nblocks == 0); 1595 goto top; 1596 } 1597 if (tp->tn_type == VLNK) 1598 tmp_memfree(tp->tn_symlink, tp->tn_size + 1); 1599 } 1600 1601 /* 1602 * Remove normal file/dir's xattr dir and xattrs. 1603 */ 1604 if (tp->tn_xattrdp) { 1605 struct tmpnode *xtp = tp->tn_xattrdp; 1606 1607 ASSERT(xtp->tn_flags & ISXATTR); 1608 tmpnode_hold(xtp); 1609 rw_enter(&xtp->tn_rwlock, RW_WRITER); 1610 tdirtrunc(xtp); 1611 DECR_COUNT(&xtp->tn_nlink, &xtp->tn_tlock); 1612 tp->tn_xattrdp = NULL; 1613 rw_exit(&xtp->tn_rwlock); 1614 tmpnode_rele(xtp); 1615 } 1616 1617 mutex_exit(&vp->v_lock); 1618 mutex_exit(&tp->tn_tlock); 1619 /* Here's our chance to send invalid event while we're between locks */ 1620 vn_invalid(TNTOV(tp)); 1621 mutex_enter(&tm->tm_contents); 1622 if (tp->tn_forw == NULL) 1623 tm->tm_rootnode->tn_back = tp->tn_back; 1624 else 1625 tp->tn_forw->tn_back = tp->tn_back; 1626 tp->tn_back->tn_forw = tp->tn_forw; 1627 mutex_exit(&tm->tm_contents); 1628 rw_exit(&tp->tn_rwlock); 1629 rw_destroy(&tp->tn_rwlock); 1630 mutex_destroy(&tp->tn_tlock); 1631 vn_free(TNTOV(tp)); 1632 tmp_memfree(tp, sizeof (struct tmpnode)); 1633 } 1634 1635 static int 1636 tmp_fid(struct vnode *vp, struct fid *fidp) 1637 { 1638 struct tmpnode *tp = (struct tmpnode *)VTOTN(vp); 1639 struct tfid *tfid; 1640 1641 if (fidp->fid_len < (sizeof (struct tfid) - sizeof (ushort_t))) { 1642 fidp->fid_len = sizeof (struct tfid) - sizeof (ushort_t); 1643 return (ENOSPC); 1644 } 1645 1646 tfid = (struct tfid *)fidp; 1647 bzero(tfid, sizeof (struct tfid)); 1648 tfid->tfid_len = (int)sizeof (struct tfid) - sizeof (ushort_t); 1649 1650 tfid->tfid_ino = tp->tn_nodeid; 1651 tfid->tfid_gen = tp->tn_gen; 1652 1653 return (0); 1654 } 1655 1656 1657 /* 1658 * Return all the pages from [off..off+len] in given file 1659 */ 1660 static int 1661 tmp_getpage( 1662 struct vnode *vp, 1663 offset_t off, 1664 size_t len, 1665 uint_t *protp, 1666 page_t *pl[], 1667 size_t plsz, 1668 struct seg *seg, 1669 caddr_t addr, 1670 enum seg_rw rw, 1671 struct cred *cr) 1672 { 1673 int err = 0; 1674 struct tmpnode *tp = VTOTN(vp); 1675 anoff_t toff = (anoff_t)off; 1676 size_t tlen = len; 1677 u_offset_t tmpoff; 1678 timestruc_t now; 1679 1680 rw_enter(&tp->tn_contents, RW_READER); 1681 1682 if (off + len > tp->tn_size + PAGEOFFSET) { 1683 err = EFAULT; 1684 goto out; 1685 } 1686 /* 1687 * Look for holes (no anon slot) in faulting range. If there are 1688 * holes we have to switch to a write lock and fill them in. Swap 1689 * space for holes was already reserved when the file was grown. 1690 */ 1691 tmpoff = toff; 1692 if (non_anon(tp->tn_anon, btop(off), &tmpoff, &tlen)) { 1693 if (!rw_tryupgrade(&tp->tn_contents)) { 1694 rw_exit(&tp->tn_contents); 1695 rw_enter(&tp->tn_contents, RW_WRITER); 1696 /* Size may have changed when lock was dropped */ 1697 if (off + len > tp->tn_size + PAGEOFFSET) { 1698 err = EFAULT; 1699 goto out; 1700 } 1701 } 1702 for (toff = (anoff_t)off; toff < (anoff_t)off + len; 1703 toff += PAGESIZE) { 1704 if (anon_get_ptr(tp->tn_anon, btop(toff)) == NULL) { 1705 /* XXX - may allocate mem w. write lock held */ 1706 (void) anon_set_ptr(tp->tn_anon, btop(toff), 1707 anon_alloc(vp, toff), 1708 ANON_SLEEP); 1709 tp->tn_nblocks++; 1710 } 1711 } 1712 rw_downgrade(&tp->tn_contents); 1713 } 1714 1715 1716 if (len <= PAGESIZE) 1717 err = tmp_getapage(vp, (u_offset_t)off, len, protp, pl, plsz, 1718 seg, addr, rw, cr); 1719 else 1720 err = pvn_getpages(tmp_getapage, vp, (u_offset_t)off, len, 1721 protp, pl, plsz, seg, addr, rw, cr); 1722 1723 gethrestime(&now); 1724 tp->tn_atime = now; 1725 if (rw == S_WRITE) 1726 tp->tn_mtime = now; 1727 1728 out: 1729 rw_exit(&tp->tn_contents); 1730 return (err); 1731 } 1732 1733 /* 1734 * Called from pvn_getpages or swap_getpage to get a particular page. 1735 */ 1736 /*ARGSUSED*/ 1737 static int 1738 tmp_getapage( 1739 struct vnode *vp, 1740 u_offset_t off, 1741 size_t len, 1742 uint_t *protp, 1743 page_t *pl[], 1744 size_t plsz, 1745 struct seg *seg, 1746 caddr_t addr, 1747 enum seg_rw rw, 1748 struct cred *cr) 1749 { 1750 struct page *pp; 1751 int flags; 1752 int err = 0; 1753 struct vnode *pvp; 1754 u_offset_t poff; 1755 1756 if (protp != NULL) 1757 *protp = PROT_ALL; 1758 again: 1759 if (pp = page_lookup(vp, off, rw == S_CREATE ? SE_EXCL : SE_SHARED)) { 1760 if (pl) { 1761 pl[0] = pp; 1762 pl[1] = NULL; 1763 } else { 1764 page_unlock(pp); 1765 } 1766 } else { 1767 pp = page_create_va(vp, off, PAGESIZE, 1768 PG_WAIT | PG_EXCL, seg, addr); 1769 /* 1770 * Someone raced in and created the page after we did the 1771 * lookup but before we did the create, so go back and 1772 * try to look it up again. 1773 */ 1774 if (pp == NULL) 1775 goto again; 1776 /* 1777 * Fill page from backing store, if any. If none, then 1778 * either this is a newly filled hole or page must have 1779 * been unmodified and freed so just zero it out. 1780 */ 1781 err = swap_getphysname(vp, off, &pvp, &poff); 1782 if (err) { 1783 panic("tmp_getapage: no anon slot vp %p " 1784 "off %llx pp %p\n", (void *)vp, off, (void *)pp); 1785 } 1786 if (pvp) { 1787 flags = (pl == NULL ? B_ASYNC|B_READ : B_READ); 1788 err = VOP_PAGEIO(pvp, pp, (u_offset_t)poff, PAGESIZE, 1789 flags, cr); 1790 if (flags & B_ASYNC) 1791 pp = NULL; 1792 } else if (rw != S_CREATE) { 1793 pagezero(pp, 0, PAGESIZE); 1794 } 1795 if (err && pp) 1796 pvn_read_done(pp, B_ERROR); 1797 if (err == 0) { 1798 if (pl) 1799 pvn_plist_init(pp, pl, plsz, off, PAGESIZE, rw); 1800 else 1801 pvn_io_done(pp); 1802 } 1803 } 1804 return (err); 1805 } 1806 1807 1808 /* 1809 * Flags are composed of {B_INVAL, B_DIRTY B_FREE, B_DONTNEED}. 1810 * If len == 0, do from off to EOF. 1811 */ 1812 static int tmp_nopage = 0; /* Don't do tmp_putpage's if set */ 1813 1814 /* ARGSUSED */ 1815 int 1816 tmp_putpage( 1817 register struct vnode *vp, 1818 offset_t off, 1819 size_t len, 1820 int flags, 1821 struct cred *cr) 1822 { 1823 register page_t *pp; 1824 u_offset_t io_off; 1825 size_t io_len = 0; 1826 int err = 0; 1827 struct tmpnode *tp = VTOTN(vp); 1828 int dolock; 1829 1830 if (tmp_nopage) 1831 return (0); 1832 1833 ASSERT(vp->v_count != 0); 1834 1835 if (vp->v_flag & VNOMAP) 1836 return (ENOSYS); 1837 1838 /* 1839 * This being tmpfs, we don't ever do i/o unless we really 1840 * have to (when we're low on memory and pageout calls us 1841 * with B_ASYNC | B_FREE or the user explicitly asks for it with 1842 * B_DONTNEED). 1843 * XXX to approximately track the mod time like ufs we should 1844 * update the times here. The problem is, once someone does a 1845 * store we never clear the mod bit and do i/o, thus fsflush 1846 * will keep calling us every 30 seconds to do the i/o and we'll 1847 * continually update the mod time. At least we update the mod 1848 * time on the first store because this results in a call to getpage. 1849 */ 1850 if (flags != (B_ASYNC | B_FREE) && (flags & B_INVAL) == 0 && 1851 (flags & B_DONTNEED) == 0) 1852 return (0); 1853 /* 1854 * If this thread owns the lock, i.e., this thread grabbed it 1855 * as writer somewhere above, then we don't need to grab the 1856 * lock as reader in this routine. 1857 */ 1858 dolock = (rw_owner(&tp->tn_contents) != curthread); 1859 1860 /* 1861 * If this is pageout don't block on the lock as you could deadlock 1862 * when freemem == 0 (another thread has the read lock and is blocked 1863 * creating a page, and a third thread is waiting to get the writers 1864 * lock - waiting writers priority blocks us from getting the read 1865 * lock). Of course, if the only freeable pages are on this tmpnode 1866 * we're hosed anyways. A better solution might be a new lock type. 1867 * Note: ufs has the same problem. 1868 */ 1869 if (curproc == proc_pageout) { 1870 if (!rw_tryenter(&tp->tn_contents, RW_READER)) 1871 return (ENOMEM); 1872 } else if (dolock) 1873 rw_enter(&tp->tn_contents, RW_READER); 1874 1875 if (!vn_has_cached_data(vp)) 1876 goto out; 1877 1878 if (len == 0) { 1879 if (curproc == proc_pageout) { 1880 panic("tmp: pageout can't block"); 1881 /*NOTREACHED*/ 1882 } 1883 1884 /* Search the entire vp list for pages >= off. */ 1885 err = pvn_vplist_dirty(vp, (u_offset_t)off, tmp_putapage, 1886 flags, cr); 1887 } else { 1888 u_offset_t eoff; 1889 1890 /* 1891 * Loop over all offsets in the range [off...off + len] 1892 * looking for pages to deal with. 1893 */ 1894 eoff = MIN(off + len, tp->tn_size); 1895 for (io_off = off; io_off < eoff; io_off += io_len) { 1896 /* 1897 * If we are not invalidating, synchronously 1898 * freeing or writing pages use the routine 1899 * page_lookup_nowait() to prevent reclaiming 1900 * them from the free list. 1901 */ 1902 if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) { 1903 pp = page_lookup(vp, io_off, 1904 (flags & (B_INVAL | B_FREE)) ? 1905 SE_EXCL : SE_SHARED); 1906 } else { 1907 pp = page_lookup_nowait(vp, io_off, 1908 (flags & B_FREE) ? SE_EXCL : SE_SHARED); 1909 } 1910 1911 if (pp == NULL || pvn_getdirty(pp, flags) == 0) 1912 io_len = PAGESIZE; 1913 else { 1914 err = tmp_putapage(vp, pp, &io_off, &io_len, 1915 flags, cr); 1916 if (err != 0) 1917 break; 1918 } 1919 } 1920 } 1921 /* If invalidating, verify all pages on vnode list are gone. */ 1922 if (err == 0 && off == 0 && len == 0 && 1923 (flags & B_INVAL) && vn_has_cached_data(vp)) { 1924 panic("tmp_putpage: B_INVAL, pages not gone"); 1925 /*NOTREACHED*/ 1926 } 1927 out: 1928 if ((curproc == proc_pageout) || dolock) 1929 rw_exit(&tp->tn_contents); 1930 /* 1931 * Only reason putapage is going to give us SE_NOSWAP as error 1932 * is when we ask a page to be written to physical backing store 1933 * and there is none. Ignore this because we might be dealing 1934 * with a swap page which does not have any backing store 1935 * on disk. In any other case we won't get this error over here. 1936 */ 1937 if (err == SE_NOSWAP) 1938 err = 0; 1939 return (err); 1940 } 1941 1942 long tmp_putpagecnt, tmp_pagespushed; 1943 1944 /* 1945 * Write out a single page. 1946 * For tmpfs this means choose a physical swap slot and write the page 1947 * out using VOP_PAGEIO. For performance, we attempt to kluster; i.e., 1948 * we try to find a bunch of other dirty pages adjacent in the file 1949 * and a bunch of contiguous swap slots, and then write all the pages 1950 * out in a single i/o. 1951 */ 1952 /*ARGSUSED*/ 1953 static int 1954 tmp_putapage( 1955 struct vnode *vp, 1956 page_t *pp, 1957 u_offset_t *offp, 1958 size_t *lenp, 1959 int flags, 1960 struct cred *cr) 1961 { 1962 int err; 1963 ulong_t klstart, kllen; 1964 page_t *pplist, *npplist; 1965 extern int klustsize; 1966 long tmp_klustsize; 1967 struct tmpnode *tp; 1968 size_t pp_off, pp_len; 1969 u_offset_t io_off; 1970 size_t io_len; 1971 struct vnode *pvp; 1972 u_offset_t pstart; 1973 u_offset_t offset; 1974 u_offset_t tmpoff; 1975 1976 ASSERT(PAGE_LOCKED(pp)); 1977 1978 /* Kluster in tmp_klustsize chunks */ 1979 tp = VTOTN(vp); 1980 tmp_klustsize = klustsize; 1981 offset = pp->p_offset; 1982 klstart = (offset / tmp_klustsize) * tmp_klustsize; 1983 kllen = MIN(tmp_klustsize, tp->tn_size - klstart); 1984 1985 /* Get a kluster of pages */ 1986 pplist = 1987 pvn_write_kluster(vp, pp, &tmpoff, &pp_len, klstart, kllen, flags); 1988 1989 pp_off = (size_t)tmpoff; 1990 1991 /* 1992 * Get a cluster of physical offsets for the pages; the amount we 1993 * get may be some subrange of what we ask for (io_off, io_len). 1994 */ 1995 io_off = pp_off; 1996 io_len = pp_len; 1997 err = swap_newphysname(vp, offset, &io_off, &io_len, &pvp, &pstart); 1998 ASSERT(err != SE_NOANON); /* anon slot must have been filled */ 1999 if (err) { 2000 pvn_write_done(pplist, B_ERROR | B_WRITE | flags); 2001 /* 2002 * If this routine is called as a result of segvn_sync 2003 * operation and we have no physical swap then we can get an 2004 * error here. In such case we would return SE_NOSWAP as error. 2005 * At this point, we expect only SE_NOSWAP. 2006 */ 2007 ASSERT(err == SE_NOSWAP); 2008 if (flags & B_INVAL) 2009 err = ENOMEM; 2010 goto out; 2011 } 2012 ASSERT(pp_off <= io_off && io_off + io_len <= pp_off + pp_len); 2013 ASSERT(io_off <= offset && offset < io_off + io_len); 2014 2015 /* Toss pages at front/rear that we couldn't get physical backing for */ 2016 if (io_off != pp_off) { 2017 npplist = NULL; 2018 page_list_break(&pplist, &npplist, btop(io_off - pp_off)); 2019 ASSERT(pplist->p_offset == pp_off); 2020 ASSERT(pplist->p_prev->p_offset == io_off - PAGESIZE); 2021 pvn_write_done(pplist, B_ERROR | B_WRITE | flags); 2022 pplist = npplist; 2023 } 2024 if (io_off + io_len < pp_off + pp_len) { 2025 npplist = NULL; 2026 page_list_break(&pplist, &npplist, btop(io_len)); 2027 ASSERT(npplist->p_offset == io_off + io_len); 2028 ASSERT(npplist->p_prev->p_offset == pp_off + pp_len - PAGESIZE); 2029 pvn_write_done(npplist, B_ERROR | B_WRITE | flags); 2030 } 2031 2032 ASSERT(pplist->p_offset == io_off); 2033 ASSERT(pplist->p_prev->p_offset == io_off + io_len - PAGESIZE); 2034 ASSERT(btopr(io_len) <= btopr(kllen)); 2035 2036 /* Do i/o on the remaining kluster */ 2037 err = VOP_PAGEIO(pvp, pplist, (u_offset_t)pstart, io_len, 2038 B_WRITE | flags, cr); 2039 2040 if ((flags & B_ASYNC) == 0) { 2041 pvn_write_done(pplist, ((err) ? B_ERROR : 0) | B_WRITE | flags); 2042 } 2043 out: 2044 if (!err) { 2045 if (offp) 2046 *offp = io_off; 2047 if (lenp) 2048 *lenp = io_len; 2049 tmp_putpagecnt++; 2050 tmp_pagespushed += btop(io_len); 2051 } 2052 if (err && err != ENOMEM && err != SE_NOSWAP) 2053 cmn_err(CE_WARN, "tmp_putapage: err %d\n", err); 2054 return (err); 2055 } 2056 2057 static int 2058 tmp_map( 2059 struct vnode *vp, 2060 offset_t off, 2061 struct as *as, 2062 caddr_t *addrp, 2063 size_t len, 2064 uchar_t prot, 2065 uchar_t maxprot, 2066 uint_t flags, 2067 struct cred *cred) 2068 { 2069 struct segvn_crargs vn_a; 2070 struct tmpnode *tp = (struct tmpnode *)VTOTN(vp); 2071 int error; 2072 2073 #ifdef _ILP32 2074 if (len > MAXOFF_T) 2075 return (ENOMEM); 2076 #endif 2077 2078 if (vp->v_flag & VNOMAP) 2079 return (ENOSYS); 2080 2081 if (off < 0 || (off + len) < 0 || 2082 off > MAXOFF_T || (off + len) > MAXOFF_T) 2083 return (ENXIO); 2084 2085 if (vp->v_type != VREG) 2086 return (ENODEV); 2087 2088 /* 2089 * Don't allow mapping to locked file 2090 */ 2091 if (vn_has_mandatory_locks(vp, tp->tn_mode)) { 2092 return (EAGAIN); 2093 } 2094 2095 as_rangelock(as); 2096 if ((flags & MAP_FIXED) == 0) { 2097 map_addr(addrp, len, (offset_t)off, 1, flags); 2098 if (*addrp == NULL) { 2099 as_rangeunlock(as); 2100 return (ENOMEM); 2101 } 2102 } else { 2103 /* 2104 * User specified address - blow away any previous mappings 2105 */ 2106 (void) as_unmap(as, *addrp, len); 2107 } 2108 2109 vn_a.vp = vp; 2110 vn_a.offset = (u_offset_t)off; 2111 vn_a.type = flags & MAP_TYPE; 2112 vn_a.prot = prot; 2113 vn_a.maxprot = maxprot; 2114 vn_a.flags = flags & ~MAP_TYPE; 2115 vn_a.cred = cred; 2116 vn_a.amp = NULL; 2117 vn_a.szc = 0; 2118 vn_a.lgrp_mem_policy_flags = 0; 2119 2120 error = as_map(as, *addrp, len, segvn_create, &vn_a); 2121 as_rangeunlock(as); 2122 return (error); 2123 } 2124 2125 /* 2126 * tmp_addmap and tmp_delmap can't be called since the vp 2127 * maintained in the segvn mapping is NULL. 2128 */ 2129 /* ARGSUSED */ 2130 static int 2131 tmp_addmap( 2132 struct vnode *vp, 2133 offset_t off, 2134 struct as *as, 2135 caddr_t addr, 2136 size_t len, 2137 uchar_t prot, 2138 uchar_t maxprot, 2139 uint_t flags, 2140 struct cred *cred) 2141 { 2142 return (0); 2143 } 2144 2145 /* ARGSUSED */ 2146 static int 2147 tmp_delmap( 2148 struct vnode *vp, 2149 offset_t off, 2150 struct as *as, 2151 caddr_t addr, 2152 size_t len, 2153 uint_t prot, 2154 uint_t maxprot, 2155 uint_t flags, 2156 struct cred *cred) 2157 { 2158 return (0); 2159 } 2160 2161 static int 2162 tmp_freesp(struct vnode *vp, struct flock64 *lp, int flag) 2163 { 2164 register int i; 2165 register struct tmpnode *tp = VTOTN(vp); 2166 int error; 2167 2168 ASSERT(vp->v_type == VREG); 2169 ASSERT(lp->l_start >= 0); 2170 2171 if (lp->l_len != 0) 2172 return (EINVAL); 2173 2174 rw_enter(&tp->tn_rwlock, RW_WRITER); 2175 if (tp->tn_size == lp->l_start) { 2176 rw_exit(&tp->tn_rwlock); 2177 return (0); 2178 } 2179 2180 /* 2181 * Check for any mandatory locks on the range 2182 */ 2183 if (MANDLOCK(vp, tp->tn_mode)) { 2184 long save_start; 2185 2186 save_start = lp->l_start; 2187 2188 if (tp->tn_size < lp->l_start) { 2189 /* 2190 * "Truncate up" case: need to make sure there 2191 * is no lock beyond current end-of-file. To 2192 * do so, we need to set l_start to the size 2193 * of the file temporarily. 2194 */ 2195 lp->l_start = tp->tn_size; 2196 } 2197 lp->l_type = F_WRLCK; 2198 lp->l_sysid = 0; 2199 lp->l_pid = ttoproc(curthread)->p_pid; 2200 i = (flag & (FNDELAY|FNONBLOCK)) ? 0 : SLPFLCK; 2201 if ((i = reclock(vp, lp, i, 0, lp->l_start, NULL)) != 0 || 2202 lp->l_type != F_UNLCK) { 2203 rw_exit(&tp->tn_rwlock); 2204 return (i ? i : EAGAIN); 2205 } 2206 2207 lp->l_start = save_start; 2208 } 2209 VFSTOTM(vp->v_vfsp); 2210 2211 rw_enter(&tp->tn_contents, RW_WRITER); 2212 error = tmpnode_trunc((struct tmount *)VFSTOTM(vp->v_vfsp), 2213 tp, (ulong_t)lp->l_start); 2214 rw_exit(&tp->tn_contents); 2215 rw_exit(&tp->tn_rwlock); 2216 return (error); 2217 } 2218 2219 /* ARGSUSED */ 2220 static int 2221 tmp_space( 2222 struct vnode *vp, 2223 int cmd, 2224 struct flock64 *bfp, 2225 int flag, 2226 offset_t offset, 2227 cred_t *cred, 2228 caller_context_t *ct) 2229 { 2230 int error; 2231 2232 if (cmd != F_FREESP) 2233 return (EINVAL); 2234 if ((error = convoff(vp, bfp, 0, (offset_t)offset)) == 0) { 2235 if ((bfp->l_start > MAXOFF_T) || (bfp->l_len > MAXOFF_T)) 2236 return (EFBIG); 2237 error = tmp_freesp(vp, bfp, flag); 2238 } 2239 return (error); 2240 } 2241 2242 /* ARGSUSED */ 2243 static int 2244 tmp_seek(struct vnode *vp, offset_t ooff, offset_t *noffp) 2245 { 2246 return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0); 2247 } 2248 2249 /* ARGSUSED2 */ 2250 static int 2251 tmp_rwlock(struct vnode *vp, int write_lock, caller_context_t *ctp) 2252 { 2253 struct tmpnode *tp = VTOTN(vp); 2254 2255 if (write_lock) { 2256 rw_enter(&tp->tn_rwlock, RW_WRITER); 2257 } else { 2258 rw_enter(&tp->tn_rwlock, RW_READER); 2259 } 2260 return (write_lock); 2261 } 2262 2263 /* ARGSUSED1 */ 2264 static void 2265 tmp_rwunlock(struct vnode *vp, int write_lock, caller_context_t *ctp) 2266 { 2267 struct tmpnode *tp = VTOTN(vp); 2268 2269 rw_exit(&tp->tn_rwlock); 2270 } 2271 2272 static int 2273 tmp_pathconf(struct vnode *vp, int cmd, ulong_t *valp, cred_t *cr) 2274 { 2275 struct tmpnode *tp = NULL; 2276 int error; 2277 2278 switch (cmd) { 2279 case _PC_XATTR_EXISTS: 2280 if (vp->v_vfsp->vfs_flag & VFS_XATTR) { 2281 *valp = 0; /* assume no attributes */ 2282 error = 0; /* okay to ask */ 2283 tp = VTOTN(vp); 2284 rw_enter(&tp->tn_rwlock, RW_READER); 2285 if (tp->tn_xattrdp) { 2286 rw_enter(&tp->tn_xattrdp->tn_rwlock, RW_READER); 2287 /* do not count "." and ".." */ 2288 if (tp->tn_xattrdp->tn_dirents > 2) 2289 *valp = 1; 2290 rw_exit(&tp->tn_xattrdp->tn_rwlock); 2291 } 2292 rw_exit(&tp->tn_rwlock); 2293 } else { 2294 error = EINVAL; 2295 } 2296 break; 2297 default: 2298 error = fs_pathconf(vp, cmd, valp, cr); 2299 } 2300 return (error); 2301 } 2302 2303 2304 struct vnodeops *tmp_vnodeops; 2305 2306 const fs_operation_def_t tmp_vnodeops_template[] = { 2307 VOPNAME_OPEN, tmp_open, 2308 VOPNAME_CLOSE, tmp_close, 2309 VOPNAME_READ, tmp_read, 2310 VOPNAME_WRITE, tmp_write, 2311 VOPNAME_IOCTL, tmp_ioctl, 2312 VOPNAME_GETATTR, tmp_getattr, 2313 VOPNAME_SETATTR, tmp_setattr, 2314 VOPNAME_ACCESS, tmp_access, 2315 VOPNAME_LOOKUP, tmp_lookup, 2316 VOPNAME_CREATE, tmp_create, 2317 VOPNAME_REMOVE, tmp_remove, 2318 VOPNAME_LINK, tmp_link, 2319 VOPNAME_RENAME, tmp_rename, 2320 VOPNAME_MKDIR, tmp_mkdir, 2321 VOPNAME_RMDIR, tmp_rmdir, 2322 VOPNAME_READDIR, tmp_readdir, 2323 VOPNAME_SYMLINK, tmp_symlink, 2324 VOPNAME_READLINK, tmp_readlink, 2325 VOPNAME_FSYNC, tmp_fsync, 2326 VOPNAME_INACTIVE, (fs_generic_func_p) tmp_inactive, 2327 VOPNAME_FID, tmp_fid, 2328 VOPNAME_RWLOCK, tmp_rwlock, 2329 VOPNAME_RWUNLOCK, (fs_generic_func_p) tmp_rwunlock, 2330 VOPNAME_SEEK, tmp_seek, 2331 VOPNAME_SPACE, tmp_space, 2332 VOPNAME_GETPAGE, tmp_getpage, 2333 VOPNAME_PUTPAGE, tmp_putpage, 2334 VOPNAME_MAP, (fs_generic_func_p) tmp_map, 2335 VOPNAME_ADDMAP, (fs_generic_func_p) tmp_addmap, 2336 VOPNAME_DELMAP, tmp_delmap, 2337 VOPNAME_PATHCONF, tmp_pathconf, 2338 VOPNAME_VNEVENT, fs_vnevent_support, 2339 NULL, NULL 2340 }; 2341