1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/t_lock.h> 31 #include <sys/param.h> 32 #include <sys/time.h> 33 #include <sys/systm.h> 34 #include <sys/sysmacros.h> 35 #include <sys/resource.h> 36 #include <sys/signal.h> 37 #include <sys/cred.h> 38 #include <sys/user.h> 39 #include <sys/buf.h> 40 #include <sys/vfs.h> 41 #include <sys/vnode.h> 42 #include <sys/proc.h> 43 #include <sys/disp.h> 44 #include <sys/file.h> 45 #include <sys/fcntl.h> 46 #include <sys/flock.h> 47 #include <sys/atomic.h> 48 #include <sys/kmem.h> 49 #include <sys/uio.h> 50 #include <sys/conf.h> 51 #include <sys/mman.h> 52 #include <sys/pathname.h> 53 #include <sys/debug.h> 54 #include <sys/vmmeter.h> 55 #include <sys/vmsystm.h> 56 #include <sys/cmn_err.h> 57 #include <sys/vtrace.h> 58 #include <sys/acct.h> 59 #include <sys/dnlc.h> 60 #include <sys/swap.h> 61 62 #include <sys/fs/ufs_fs.h> 63 #include <sys/fs/ufs_inode.h> 64 #include <sys/fs/ufs_fsdir.h> 65 #include <sys/fs/ufs_trans.h> 66 #include <sys/fs/ufs_panic.h> 67 #include <sys/fs/ufs_mount.h> 68 #include <sys/fs/ufs_bio.h> 69 #include <sys/fs/ufs_log.h> 70 #include <sys/fs/ufs_quota.h> 71 #include <sys/dirent.h> /* must be AFTER <sys/fs/fsdir.h>! */ 72 #include <sys/errno.h> 73 #include <sys/sysinfo.h> 74 75 #include <vm/hat.h> 76 #include <vm/pvn.h> 77 #include <vm/as.h> 78 #include <vm/seg.h> 79 #include <vm/seg_map.h> 80 #include <vm/seg_vn.h> 81 #include <vm/rm.h> 82 #include <vm/anon.h> 83 #include <sys/swap.h> 84 #include <sys/dnlc.h> 85 86 extern struct vnode *common_specvp(struct vnode *vp); 87 88 /* error lock status */ 89 #define UN_ERRLCK (-1) 90 #define SET_ERRLCK 1 91 #define RE_ERRLCK 2 92 #define NO_ERRLCK 0 93 94 /* 95 * Index to be used in TSD for storing lockfs data 96 */ 97 uint_t ufs_lockfs_key; 98 99 typedef struct _ulockfs_info { 100 struct _ulockfs_info *next; 101 struct ulockfs *ulp; 102 uint_t flags; 103 } ulockfs_info_t; 104 105 #define ULOCK_INFO_FALLOCATE 0x00000001 /* fallocate thread */ 106 107 /* 108 * Check in TSD that whether we are already doing any VOP on this filesystem 109 */ 110 #define IS_REC_VOP(found, head, ulp, free) \ 111 { \ 112 ulockfs_info_t *_curr; \ 113 \ 114 for (found = 0, free = NULL, _curr = head; \ 115 _curr != NULL; _curr = _curr->next) { \ 116 if ((free == NULL) && \ 117 (_curr->ulp == NULL)) \ 118 free = _curr; \ 119 if (_curr->ulp == ulp) { \ 120 found = 1; \ 121 break; \ 122 } \ 123 } \ 124 } 125 126 /* 127 * Get the lockfs data from TSD so that lockfs handles the recursive VOP 128 * properly 129 */ 130 #define SEARCH_ULOCKFSP(head, ulp, info) \ 131 { \ 132 ulockfs_info_t *_curr; \ 133 \ 134 for (_curr = head; _curr != NULL; \ 135 _curr = _curr->next) { \ 136 if (_curr->ulp == ulp) { \ 137 break; \ 138 } \ 139 } \ 140 \ 141 info = _curr; \ 142 } 143 144 /* 145 * Validate lockfs request 146 */ 147 static int 148 ufs_getlfd( 149 struct lockfs *lockfsp, /* new lock request */ 150 struct lockfs *ul_lockfsp) /* old lock state */ 151 { 152 int error = 0; 153 154 /* 155 * no input flags defined 156 */ 157 if (lockfsp->lf_flags != 0) { 158 error = EINVAL; 159 goto errout; 160 } 161 162 /* 163 * check key 164 */ 165 if (!LOCKFS_IS_ULOCK(ul_lockfsp)) 166 if (lockfsp->lf_key != ul_lockfsp->lf_key) { 167 error = EINVAL; 168 goto errout; 169 } 170 171 lockfsp->lf_key = ul_lockfsp->lf_key + 1; 172 173 errout: 174 return (error); 175 } 176 177 /* 178 * ufs_checkaccton 179 * check if accounting is turned on on this fs 180 */ 181 182 int 183 ufs_checkaccton(struct vnode *vp) 184 { 185 if (acct_fs_in_use(vp)) 186 return (EDEADLK); 187 return (0); 188 } 189 190 /* 191 * ufs_checkswapon 192 * check if local swapping is to file on this fs 193 */ 194 int 195 ufs_checkswapon(struct vnode *vp) 196 { 197 struct swapinfo *sip; 198 199 mutex_enter(&swapinfo_lock); 200 for (sip = swapinfo; sip; sip = sip->si_next) 201 if (sip->si_vp->v_vfsp == vp->v_vfsp) { 202 mutex_exit(&swapinfo_lock); 203 return (EDEADLK); 204 } 205 mutex_exit(&swapinfo_lock); 206 return (0); 207 } 208 209 /* 210 * ufs_freeze 211 * pend future accesses for current lock and desired lock 212 */ 213 void 214 ufs_freeze(struct ulockfs *ulp, struct lockfs *lockfsp) 215 { 216 /* 217 * set to new lock type 218 */ 219 ulp->ul_lockfs.lf_lock = lockfsp->lf_lock; 220 ulp->ul_lockfs.lf_key = lockfsp->lf_key; 221 ulp->ul_lockfs.lf_comlen = lockfsp->lf_comlen; 222 ulp->ul_lockfs.lf_comment = lockfsp->lf_comment; 223 224 ulp->ul_fs_lock = (1 << ulp->ul_lockfs.lf_lock); 225 } 226 227 /* 228 * All callers of ufs_quiesce() atomically increment ufs_quiesce_pend before 229 * starting ufs_quiesce() protocol and decrement it only when a file system no 230 * longer has to be in quiescent state. This allows ufs_pageio() to detect 231 * that another thread wants to quiesce a file system. See more comments in 232 * ufs_pageio(). 233 */ 234 ulong_t ufs_quiesce_pend = 0; 235 236 /* 237 * ufs_quiesce 238 * wait for outstanding accesses to finish 239 */ 240 int 241 ufs_quiesce(struct ulockfs *ulp) 242 { 243 int error = 0; 244 ulockfs_info_t *head; 245 ulockfs_info_t *info; 246 247 head = (ulockfs_info_t *)tsd_get(ufs_lockfs_key); 248 SEARCH_ULOCKFSP(head, ulp, info); 249 250 /* 251 * Set a softlock to suspend future ufs_vnops so that 252 * this lockfs request will not be starved 253 */ 254 ULOCKFS_SET_SLOCK(ulp); 255 ASSERT(ufs_quiesce_pend); 256 257 /* check if there is any outstanding ufs vnodeops calls */ 258 while (ulp->ul_vnops_cnt || ulp->ul_falloc_cnt) { 259 /* 260 * use timed version of cv_wait_sig() to make sure we don't 261 * miss a wake up call from ufs_pageio() when it doesn't use 262 * ul_lock. 263 * 264 * when a fallocate thread comes in, the only way it returns 265 * from this function is if there are no other vnode operations 266 * going on (remember fallocate threads are tracked using 267 * ul_falloc_cnt not ul_vnops_cnt), and another fallocate thread 268 * hasn't already grabbed the fs write lock. 269 */ 270 if (info && (info->flags & ULOCK_INFO_FALLOCATE)) { 271 if (!ulp->ul_vnops_cnt && !ULOCKFS_IS_FWLOCK(ulp)) 272 goto out; 273 } 274 if (!cv_timedwait_sig(&ulp->ul_cv, &ulp->ul_lock, lbolt + hz)) { 275 error = EINTR; 276 goto out; 277 } 278 } 279 280 out: 281 /* 282 * unlock the soft lock 283 */ 284 ULOCKFS_CLR_SLOCK(ulp); 285 286 return (error); 287 } 288 289 /* 290 * ufs_flush_inode 291 */ 292 int 293 ufs_flush_inode(struct inode *ip, void *arg) 294 { 295 int error; 296 int saverror = 0; 297 298 /* 299 * wrong file system; keep looking 300 */ 301 if (ip->i_ufsvfs != (struct ufsvfs *)arg) 302 return (0); 303 304 /* 305 * asynchronously push all the dirty pages 306 */ 307 if (((error = TRANS_SYNCIP(ip, B_ASYNC, 0, TOP_SYNCIP_FLUSHI)) != 0) && 308 (error != EAGAIN)) 309 saverror = error; 310 /* 311 * wait for io and discard all mappings 312 */ 313 if (error = TRANS_SYNCIP(ip, B_INVAL, 0, TOP_SYNCIP_FLUSHI)) 314 saverror = error; 315 316 if (ITOV(ip)->v_type == VDIR) { 317 dnlc_dir_purge(&ip->i_danchor); 318 } 319 320 return (saverror); 321 } 322 323 /* 324 * ufs_flush 325 * Flush everything that is currently dirty; this includes invalidating 326 * any mappings. 327 */ 328 int 329 ufs_flush(struct vfs *vfsp) 330 { 331 int error; 332 int saverror = 0; 333 struct ufsvfs *ufsvfsp = (struct ufsvfs *)vfsp->vfs_data; 334 struct fs *fs = ufsvfsp->vfs_fs; 335 336 ASSERT(vfs_lock_held(vfsp)); 337 338 /* 339 * purge dnlc 340 */ 341 (void) dnlc_purge_vfsp(vfsp, 0); 342 343 /* 344 * drain the delete and idle threads 345 */ 346 ufs_delete_drain(vfsp, 0, 0); 347 ufs_idle_drain(vfsp); 348 349 /* 350 * flush and invalidate quota records 351 */ 352 (void) qsync(ufsvfsp); 353 354 /* 355 * flush w/invalidate the inodes for vfsp 356 */ 357 if (error = ufs_scan_inodes(0, ufs_flush_inode, ufsvfsp, ufsvfsp)) 358 saverror = error; 359 360 /* 361 * synchronously flush superblock and summary info 362 */ 363 if (fs->fs_ronly == 0 && fs->fs_fmod) { 364 fs->fs_fmod = 0; 365 TRANS_SBUPDATE(ufsvfsp, vfsp, TOP_SBUPDATE_FLUSH); 366 } 367 /* 368 * flush w/invalidate block device pages and buf cache 369 */ 370 if ((error = VOP_PUTPAGE(common_specvp(ufsvfsp->vfs_devvp), 371 (offset_t)0, 0, B_INVAL, CRED())) > 0) 372 saverror = error; 373 374 (void) bflush((dev_t)vfsp->vfs_dev); 375 (void) bfinval((dev_t)vfsp->vfs_dev, 0); 376 377 /* 378 * drain the delete and idle threads again 379 */ 380 ufs_delete_drain(vfsp, 0, 0); 381 ufs_idle_drain(vfsp); 382 383 /* 384 * play with the clean flag 385 */ 386 if (saverror == 0) 387 ufs_checkclean(vfsp); 388 389 /* 390 * Flush any outstanding transactions and roll the log 391 * only if we are supposed to do, i.e. LDL_NOROLL not set. 392 * We can not simply check for fs_ronly here since fsck also may 393 * use this code to roll the log on a read-only filesystem, e.g. 394 * root during early stages of boot, if other then a sanity check is 395 * done, it will clear LDL_NOROLL before. 396 * In addition we assert that the deltamap does not contain any deltas 397 * in case LDL_NOROLL is set since this is not supposed to happen. 398 */ 399 if (TRANS_ISTRANS(ufsvfsp)) { 400 ml_unit_t *ul = ufsvfsp->vfs_log; 401 mt_map_t *mtm = ul->un_deltamap; 402 403 if (ul->un_flags & LDL_NOROLL) { 404 ASSERT(mtm->mtm_nme == 0); 405 } else { 406 curthread->t_flag |= T_DONTBLOCK; 407 TRANS_BEGIN_SYNC(ufsvfsp, TOP_COMMIT_FLUSH, 408 TOP_COMMIT_SIZE, error); 409 if (!error) { 410 TRANS_END_SYNC(ufsvfsp, saverror, 411 TOP_COMMIT_FLUSH, TOP_COMMIT_SIZE); 412 } 413 curthread->t_flag &= ~T_DONTBLOCK; 414 logmap_roll_dev(ufsvfsp->vfs_log); 415 } 416 } 417 418 return (saverror); 419 } 420 421 /* 422 * ufs_thaw_wlock 423 * special processing when thawing down to wlock 424 */ 425 static int 426 ufs_thaw_wlock(struct inode *ip, void *arg) 427 { 428 /* 429 * wrong file system; keep looking 430 */ 431 if (ip->i_ufsvfs != (struct ufsvfs *)arg) 432 return (0); 433 434 /* 435 * iupdat refuses to clear flags if the fs is read only. The fs 436 * may become read/write during the lock and we wouldn't want 437 * these inodes being written to disk. So clear the flags. 438 */ 439 rw_enter(&ip->i_contents, RW_WRITER); 440 ip->i_flag &= ~(IMOD|IMODACC|IACC|IUPD|ICHG|IATTCHG); 441 rw_exit(&ip->i_contents); 442 443 /* 444 * pages are mlocked -- fail wlock 445 */ 446 if (ITOV(ip)->v_type != VCHR && vn_has_cached_data(ITOV(ip))) 447 return (EBUSY); 448 449 return (0); 450 } 451 452 /* 453 * ufs_thaw_hlock 454 * special processing when thawing down to hlock or elock 455 */ 456 static int 457 ufs_thaw_hlock(struct inode *ip, void *arg) 458 { 459 struct vnode *vp = ITOV(ip); 460 461 /* 462 * wrong file system; keep looking 463 */ 464 if (ip->i_ufsvfs != (struct ufsvfs *)arg) 465 return (0); 466 467 /* 468 * blow away all pages - even if they are mlocked 469 */ 470 do { 471 (void) TRANS_SYNCIP(ip, B_INVAL | B_FORCE, 0, TOP_SYNCIP_HLOCK); 472 } while ((vp->v_type != VCHR) && vn_has_cached_data(vp)); 473 rw_enter(&ip->i_contents, RW_WRITER); 474 ip->i_flag &= ~(IMOD|IMODACC|IACC|IUPD|ICHG|IATTCHG); 475 rw_exit(&ip->i_contents); 476 477 return (0); 478 } 479 480 /* 481 * ufs_thaw 482 * thaw file system lock down to current value 483 */ 484 int 485 ufs_thaw(struct vfs *vfsp, struct ufsvfs *ufsvfsp, struct ulockfs *ulp) 486 { 487 int error = 0; 488 int noidel = (int)(ulp->ul_flag & ULOCKFS_NOIDEL); 489 490 /* 491 * if wlock or hlock or elock 492 */ 493 if (ULOCKFS_IS_WLOCK(ulp) || ULOCKFS_IS_HLOCK(ulp) || 494 ULOCKFS_IS_ELOCK(ulp)) { 495 496 /* 497 * don't keep access times 498 * don't free deleted files 499 * if superblock writes are allowed, limit them to me for now 500 */ 501 ulp->ul_flag |= (ULOCKFS_NOIACC|ULOCKFS_NOIDEL); 502 if (ulp->ul_sbowner != (kthread_id_t)-1) 503 ulp->ul_sbowner = curthread; 504 505 /* 506 * wait for writes for deleted files and superblock updates 507 */ 508 (void) ufs_flush(vfsp); 509 510 /* 511 * now make sure the quota file is up-to-date 512 * expensive; but effective 513 */ 514 error = ufs_flush(vfsp); 515 /* 516 * no one can write the superblock 517 */ 518 ulp->ul_sbowner = (kthread_id_t)-1; 519 520 /* 521 * special processing for wlock/hlock/elock 522 */ 523 if (ULOCKFS_IS_WLOCK(ulp)) { 524 if (error) 525 goto errout; 526 error = bfinval(ufsvfsp->vfs_dev, 0); 527 if (error) 528 goto errout; 529 error = ufs_scan_inodes(0, ufs_thaw_wlock, 530 (void *)ufsvfsp, ufsvfsp); 531 if (error) 532 goto errout; 533 } 534 if (ULOCKFS_IS_HLOCK(ulp) || ULOCKFS_IS_ELOCK(ulp)) { 535 error = 0; 536 (void) ufs_scan_inodes(0, ufs_thaw_hlock, 537 (void *)ufsvfsp, ufsvfsp); 538 (void) bfinval(ufsvfsp->vfs_dev, 1); 539 } 540 } else { 541 542 /* 543 * okay to keep access times 544 * okay to free deleted files 545 * okay to write the superblock 546 */ 547 ulp->ul_flag &= ~(ULOCKFS_NOIACC|ULOCKFS_NOIDEL); 548 ulp->ul_sbowner = NULL; 549 550 /* 551 * flush in case deleted files are in memory 552 */ 553 if (noidel) { 554 if (error = ufs_flush(vfsp)) 555 goto errout; 556 } 557 } 558 559 errout: 560 cv_broadcast(&ulp->ul_cv); 561 return (error); 562 } 563 564 /* 565 * ufs_reconcile_fs 566 * reconcile incore superblock with ondisk superblock 567 */ 568 int 569 ufs_reconcile_fs(struct vfs *vfsp, struct ufsvfs *ufsvfsp, int errlck) 570 { 571 struct fs *mfs; /* in-memory superblock */ 572 struct fs *dfs; /* on-disk superblock */ 573 struct buf *bp; /* on-disk superblock buf */ 574 int needs_unlock; 575 char finished_fsclean; 576 577 mfs = ufsvfsp->vfs_fs; 578 579 /* 580 * get the on-disk copy of the superblock 581 */ 582 bp = UFS_BREAD(ufsvfsp, vfsp->vfs_dev, SBLOCK, SBSIZE); 583 bp->b_flags |= (B_STALE|B_AGE); 584 if (bp->b_flags & B_ERROR) { 585 brelse(bp); 586 return (EIO); 587 } 588 dfs = bp->b_un.b_fs; 589 590 /* error locks may only unlock after the fs has been made consistent */ 591 if (errlck == UN_ERRLCK) { 592 if (dfs->fs_clean == FSFIX) { /* being repaired */ 593 brelse(bp); 594 return (EAGAIN); 595 } 596 /* repair not yet started? */ 597 finished_fsclean = TRANS_ISTRANS(ufsvfsp)? FSLOG: FSCLEAN; 598 if (dfs->fs_clean != finished_fsclean) { 599 brelse(bp); 600 return (EBUSY); 601 } 602 } 603 604 /* 605 * if superblock has changed too much, abort 606 */ 607 if ((mfs->fs_sblkno != dfs->fs_sblkno) || 608 (mfs->fs_cblkno != dfs->fs_cblkno) || 609 (mfs->fs_iblkno != dfs->fs_iblkno) || 610 (mfs->fs_dblkno != dfs->fs_dblkno) || 611 (mfs->fs_cgoffset != dfs->fs_cgoffset) || 612 (mfs->fs_cgmask != dfs->fs_cgmask) || 613 (mfs->fs_bsize != dfs->fs_bsize) || 614 (mfs->fs_fsize != dfs->fs_fsize) || 615 (mfs->fs_frag != dfs->fs_frag) || 616 (mfs->fs_bmask != dfs->fs_bmask) || 617 (mfs->fs_fmask != dfs->fs_fmask) || 618 (mfs->fs_bshift != dfs->fs_bshift) || 619 (mfs->fs_fshift != dfs->fs_fshift) || 620 (mfs->fs_fragshift != dfs->fs_fragshift) || 621 (mfs->fs_fsbtodb != dfs->fs_fsbtodb) || 622 (mfs->fs_sbsize != dfs->fs_sbsize) || 623 (mfs->fs_nindir != dfs->fs_nindir) || 624 (mfs->fs_nspf != dfs->fs_nspf) || 625 (mfs->fs_trackskew != dfs->fs_trackskew) || 626 (mfs->fs_cgsize != dfs->fs_cgsize) || 627 (mfs->fs_ntrak != dfs->fs_ntrak) || 628 (mfs->fs_nsect != dfs->fs_nsect) || 629 (mfs->fs_spc != dfs->fs_spc) || 630 (mfs->fs_cpg != dfs->fs_cpg) || 631 (mfs->fs_ipg != dfs->fs_ipg) || 632 (mfs->fs_fpg != dfs->fs_fpg) || 633 (mfs->fs_postblformat != dfs->fs_postblformat) || 634 (mfs->fs_magic != dfs->fs_magic)) { 635 brelse(bp); 636 return (EACCES); 637 } 638 if (dfs->fs_clean == FSBAD || FSOKAY != dfs->fs_state + dfs->fs_time) 639 if (mfs->fs_clean == FSLOG) { 640 brelse(bp); 641 return (EACCES); 642 } 643 644 /* 645 * get new summary info 646 */ 647 if (ufs_getsummaryinfo(vfsp->vfs_dev, ufsvfsp, dfs)) { 648 brelse(bp); 649 return (EIO); 650 } 651 652 /* 653 * release old summary info and update in-memory superblock 654 */ 655 kmem_free(mfs->fs_u.fs_csp, mfs->fs_cssize); 656 mfs->fs_u.fs_csp = dfs->fs_u.fs_csp; /* Only entry 0 used */ 657 658 /* 659 * update fields allowed to change 660 */ 661 mfs->fs_size = dfs->fs_size; 662 mfs->fs_dsize = dfs->fs_dsize; 663 mfs->fs_ncg = dfs->fs_ncg; 664 mfs->fs_minfree = dfs->fs_minfree; 665 mfs->fs_rotdelay = dfs->fs_rotdelay; 666 mfs->fs_rps = dfs->fs_rps; 667 mfs->fs_maxcontig = dfs->fs_maxcontig; 668 mfs->fs_maxbpg = dfs->fs_maxbpg; 669 mfs->fs_csmask = dfs->fs_csmask; 670 mfs->fs_csshift = dfs->fs_csshift; 671 mfs->fs_optim = dfs->fs_optim; 672 mfs->fs_csaddr = dfs->fs_csaddr; 673 mfs->fs_cssize = dfs->fs_cssize; 674 mfs->fs_ncyl = dfs->fs_ncyl; 675 mfs->fs_cstotal = dfs->fs_cstotal; 676 mfs->fs_reclaim = dfs->fs_reclaim; 677 678 if (mfs->fs_reclaim & (FS_RECLAIM|FS_RECLAIMING)) { 679 mfs->fs_reclaim &= ~FS_RECLAIM; 680 mfs->fs_reclaim |= FS_RECLAIMING; 681 ufs_thread_start(&ufsvfsp->vfs_reclaim, 682 ufs_thread_reclaim, vfsp); 683 } 684 685 /* XXX What to do about sparecon? */ 686 687 /* XXX need to copy volume label */ 688 689 /* 690 * ondisk clean flag overrides inmemory clean flag iff == FSBAD 691 * or if error-locked and ondisk is now clean 692 */ 693 needs_unlock = !MUTEX_HELD(&ufsvfsp->vfs_lock); 694 if (needs_unlock) 695 mutex_enter(&ufsvfsp->vfs_lock); 696 697 if (errlck == UN_ERRLCK) { 698 if (finished_fsclean == dfs->fs_clean) 699 mfs->fs_clean = finished_fsclean; 700 else 701 mfs->fs_clean = FSBAD; 702 mfs->fs_state = FSOKAY - dfs->fs_time; 703 } 704 705 if (FSOKAY != dfs->fs_state + dfs->fs_time || 706 (dfs->fs_clean == FSBAD)) 707 mfs->fs_clean = FSBAD; 708 709 if (needs_unlock) 710 mutex_exit(&ufsvfsp->vfs_lock); 711 712 brelse(bp); 713 714 return (0); 715 } 716 717 /* 718 * ufs_reconcile_inode 719 * reconcile ondisk inode with incore inode 720 */ 721 static int 722 ufs_reconcile_inode(struct inode *ip, void *arg) 723 { 724 int i; 725 int ndaddr; 726 int niaddr; 727 struct dinode *dp; /* ondisk inode */ 728 struct buf *bp = NULL; 729 uid_t d_uid; 730 gid_t d_gid; 731 int error = 0; 732 struct fs *fs; 733 734 /* 735 * not an inode we care about 736 */ 737 if (ip->i_ufsvfs != (struct ufsvfs *)arg) 738 return (0); 739 740 fs = ip->i_fs; 741 742 /* 743 * Inode reconciliation fails: we made the filesystem quiescent 744 * and we did a ufs_flush() before calling ufs_reconcile_inode() 745 * and thus the inode should not have been changed inbetween. 746 * Any discrepancies indicate a logic error and a pretty 747 * significant run-state inconsistency we should complain about. 748 */ 749 if (ip->i_flag & (IMOD|IMODACC|IACC|IUPD|ICHG|IATTCHG)) { 750 cmn_err(CE_WARN, "%s: Inode reconciliation failed for" 751 "inode %llu", fs->fs_fsmnt, (u_longlong_t)ip->i_number); 752 return (EINVAL); 753 } 754 755 /* 756 * get the dinode 757 */ 758 bp = UFS_BREAD(ip->i_ufsvfs, 759 ip->i_dev, (daddr_t)fsbtodb(fs, itod(fs, ip->i_number)), 760 (int)fs->fs_bsize); 761 if (bp->b_flags & B_ERROR) { 762 brelse(bp); 763 return (EIO); 764 } 765 dp = bp->b_un.b_dino; 766 dp += itoo(fs, ip->i_number); 767 768 /* 769 * handle Sun's implementation of EFT 770 */ 771 d_uid = (dp->di_suid == UID_LONG) ? dp->di_uid : (uid_t)dp->di_suid; 772 d_gid = (dp->di_sgid == GID_LONG) ? dp->di_gid : (uid_t)dp->di_sgid; 773 774 rw_enter(&ip->i_contents, RW_WRITER); 775 776 /* 777 * some fields are not allowed to change 778 */ 779 if ((ip->i_mode != dp->di_mode) || 780 (ip->i_gen != dp->di_gen) || 781 (ip->i_uid != d_uid) || 782 (ip->i_gid != d_gid)) { 783 error = EACCES; 784 goto out; 785 } 786 787 /* 788 * and some are allowed to change 789 */ 790 ip->i_size = dp->di_size; 791 ip->i_ic.ic_flags = dp->di_ic.ic_flags; 792 ip->i_blocks = dp->di_blocks; 793 ip->i_nlink = dp->di_nlink; 794 if (ip->i_flag & IFASTSYMLNK) { 795 ndaddr = 1; 796 niaddr = 0; 797 } else { 798 ndaddr = NDADDR; 799 niaddr = NIADDR; 800 } 801 for (i = 0; i < ndaddr; ++i) 802 ip->i_db[i] = dp->di_db[i]; 803 for (i = 0; i < niaddr; ++i) 804 ip->i_ib[i] = dp->di_ib[i]; 805 806 out: 807 rw_exit(&ip->i_contents); 808 brelse(bp); 809 return (error); 810 } 811 812 /* 813 * ufs_reconcile 814 * reconcile ondisk superblock/inodes with any incore 815 */ 816 static int 817 ufs_reconcile(struct vfs *vfsp, struct ufsvfs *ufsvfsp, int errlck) 818 { 819 int error = 0; 820 821 /* 822 * get rid of as much inmemory data as possible 823 */ 824 (void) ufs_flush(vfsp); 825 826 /* 827 * reconcile the superblock and inodes 828 */ 829 if (error = ufs_reconcile_fs(vfsp, ufsvfsp, errlck)) 830 return (error); 831 if (error = ufs_scan_inodes(0, ufs_reconcile_inode, ufsvfsp, ufsvfsp)) 832 return (error); 833 /* 834 * allocation blocks may be incorrect; get rid of them 835 */ 836 (void) ufs_flush(vfsp); 837 838 return (error); 839 } 840 841 /* 842 * File system locking 843 */ 844 int 845 ufs_fiolfs(struct vnode *vp, struct lockfs *lockfsp, int from_log) 846 { 847 return (ufs__fiolfs(vp, lockfsp, /* from_user */ 1, from_log)); 848 } 849 850 /* kernel-internal interface, also used by fix-on-panic */ 851 int 852 ufs__fiolfs( 853 struct vnode *vp, 854 struct lockfs *lockfsp, 855 int from_user, 856 int from_log) 857 { 858 struct ulockfs *ulp; 859 struct lockfs lfs; 860 int error; 861 struct vfs *vfsp; 862 struct ufsvfs *ufsvfsp; 863 int errlck = NO_ERRLCK; 864 int poll_events = POLLPRI; 865 extern struct pollhead ufs_pollhd; 866 ulockfs_info_t *head; 867 ulockfs_info_t *info; 868 869 /* check valid lock type */ 870 if (!lockfsp || lockfsp->lf_lock > LOCKFS_MAXLOCK) 871 return (EINVAL); 872 873 if (!vp || !vp->v_vfsp || !vp->v_vfsp->vfs_data) 874 return (EIO); 875 876 vfsp = vp->v_vfsp; 877 ufsvfsp = (struct ufsvfs *)vfsp->vfs_data; 878 ulp = &ufsvfsp->vfs_ulockfs; 879 880 head = (ulockfs_info_t *)tsd_get(ufs_lockfs_key); 881 SEARCH_ULOCKFSP(head, ulp, info); 882 883 /* 884 * Suspend both the reclaim thread and the delete thread. 885 * This must be done outside the lockfs locking protocol. 886 */ 887 ufs_thread_suspend(&ufsvfsp->vfs_reclaim); 888 ufs_thread_suspend(&ufsvfsp->vfs_delete); 889 890 /* 891 * Acquire vfs_reflock around ul_lock to avoid deadlock with 892 * umount/remount/sync. 893 */ 894 vfs_lock_wait(vfsp); 895 mutex_enter(&ulp->ul_lock); 896 atomic_add_long(&ufs_quiesce_pend, 1); 897 898 /* 899 * Quit if there is another lockfs request in progress 900 * that is waiting for existing ufs_vnops to complete. 901 */ 902 if (ULOCKFS_IS_BUSY(ulp)) { 903 error = EBUSY; 904 goto errexit; 905 } 906 907 /* cannot ulocked or downgrade a hard-lock */ 908 if (ULOCKFS_IS_HLOCK(ulp)) { 909 error = EIO; 910 goto errexit; 911 } 912 913 /* an error lock may be unlocked or relocked, only */ 914 if (ULOCKFS_IS_ELOCK(ulp)) { 915 if (!LOCKFS_IS_ULOCK(lockfsp) && !LOCKFS_IS_ELOCK(lockfsp)) { 916 error = EBUSY; 917 goto errexit; 918 } 919 } 920 921 /* 922 * a read-only error lock may only be upgraded to an 923 * error lock or hard lock 924 */ 925 if (ULOCKFS_IS_ROELOCK(ulp)) { 926 if (!LOCKFS_IS_HLOCK(lockfsp) && !LOCKFS_IS_ELOCK(lockfsp)) { 927 error = EBUSY; 928 goto errexit; 929 } 930 } 931 932 /* 933 * until read-only error locks are fully implemented 934 * just return EINVAL 935 */ 936 if (LOCKFS_IS_ROELOCK(lockfsp)) { 937 error = EINVAL; 938 goto errexit; 939 } 940 941 /* 942 * an error lock may only be applied if the file system is 943 * unlocked or already error locked. 944 * (this is to prevent the case where a fs gets changed out from 945 * underneath a fs that is locked for backup, 946 * that is, name/delete/write-locked.) 947 */ 948 if ((!ULOCKFS_IS_ULOCK(ulp) && !ULOCKFS_IS_ELOCK(ulp) && 949 !ULOCKFS_IS_ROELOCK(ulp)) && 950 (LOCKFS_IS_ELOCK(lockfsp) || LOCKFS_IS_ROELOCK(lockfsp))) { 951 error = EBUSY; 952 goto errexit; 953 } 954 955 /* get and validate the input lockfs request */ 956 if (error = ufs_getlfd(lockfsp, &ulp->ul_lockfs)) 957 goto errexit; 958 959 /* 960 * save current ulockfs struct 961 */ 962 bcopy(&ulp->ul_lockfs, &lfs, sizeof (struct lockfs)); 963 964 /* 965 * Freeze the file system (pend future accesses) 966 */ 967 ufs_freeze(ulp, lockfsp); 968 969 /* 970 * Set locking in progress because ufs_quiesce may free the 971 * ul_lock mutex. 972 */ 973 ULOCKFS_SET_BUSY(ulp); 974 /* update the ioctl copy */ 975 LOCKFS_SET_BUSY(&ulp->ul_lockfs); 976 977 /* 978 * We need to unset FWLOCK status before we call ufs_quiesce 979 * so that the thread doesnt get suspended. We do this only if 980 * this (fallocate) thread requested an unlock operation. 981 */ 982 if (info && (info->flags & ULOCK_INFO_FALLOCATE)) { 983 if (!ULOCKFS_IS_WLOCK(ulp)) 984 ULOCKFS_CLR_FWLOCK(ulp); 985 } 986 987 /* 988 * Quiesce (wait for outstanding accesses to finish) 989 */ 990 if (error = ufs_quiesce(ulp)) 991 goto errout; 992 993 /* 994 * If the fallocate thread requested a write fs lock operation 995 * then we set fwlock status in the ulp. 996 */ 997 if (info && (info->flags & ULOCK_INFO_FALLOCATE)) { 998 if (ULOCKFS_IS_WLOCK(ulp)) 999 ULOCKFS_SET_FWLOCK(ulp); 1000 } 1001 1002 /* 1003 * can't wlock or (ro)elock fs with accounting or local swap file 1004 */ 1005 if ((ULOCKFS_IS_WLOCK(ulp) || ULOCKFS_IS_ELOCK(ulp) || 1006 ULOCKFS_IS_ROELOCK(ulp)) && !from_log) { 1007 if (error = ufs_checkaccton(vp)) 1008 goto errout; 1009 if (error = ufs_checkswapon(vp)) 1010 goto errout; 1011 } 1012 1013 /* 1014 * save error lock status to pass down to reconcilation 1015 * routines and for later cleanup 1016 */ 1017 if (LOCKFS_IS_ELOCK(&lfs) && ULOCKFS_IS_ULOCK(ulp)) 1018 errlck = UN_ERRLCK; 1019 1020 if (ULOCKFS_IS_ELOCK(ulp) || ULOCKFS_IS_ROELOCK(ulp)) { 1021 int needs_unlock; 1022 int needs_sbwrite; 1023 1024 poll_events |= POLLERR; 1025 errlck = LOCKFS_IS_ELOCK(&lfs) || LOCKFS_IS_ROELOCK(&lfs)? 1026 RE_ERRLCK: SET_ERRLCK; 1027 1028 needs_unlock = !MUTEX_HELD(&ufsvfsp->vfs_lock); 1029 if (needs_unlock) 1030 mutex_enter(&ufsvfsp->vfs_lock); 1031 1032 /* disable delayed i/o */ 1033 needs_sbwrite = 0; 1034 1035 if (errlck == SET_ERRLCK) { 1036 ufsvfsp->vfs_fs->fs_clean = FSBAD; 1037 needs_sbwrite = 1; 1038 } 1039 1040 needs_sbwrite |= ufsvfsp->vfs_dio; 1041 ufsvfsp->vfs_dio = 0; 1042 1043 if (needs_unlock) 1044 mutex_exit(&ufsvfsp->vfs_lock); 1045 1046 if (needs_sbwrite) { 1047 ulp->ul_sbowner = curthread; 1048 TRANS_SBWRITE(ufsvfsp, TOP_SBWRITE_STABLE); 1049 1050 if (needs_unlock) 1051 mutex_enter(&ufsvfsp->vfs_lock); 1052 1053 ufsvfsp->vfs_fs->fs_fmod = 0; 1054 1055 if (needs_unlock) 1056 mutex_exit(&ufsvfsp->vfs_lock); 1057 } 1058 } 1059 1060 /* 1061 * reconcile superblock and inodes if was wlocked 1062 */ 1063 if (LOCKFS_IS_WLOCK(&lfs) || LOCKFS_IS_ELOCK(&lfs)) { 1064 if (error = ufs_reconcile(vfsp, ufsvfsp, errlck)) 1065 goto errout; 1066 /* 1067 * in case the fs grew; reset the metadata map for logging tests 1068 */ 1069 TRANS_MATA_UMOUNT(ufsvfsp); 1070 TRANS_MATA_MOUNT(ufsvfsp); 1071 TRANS_MATA_SI(ufsvfsp, ufsvfsp->vfs_fs); 1072 } 1073 1074 /* 1075 * At least everything *currently* dirty goes out. 1076 */ 1077 1078 if ((error = ufs_flush(vfsp)) != 0 && !ULOCKFS_IS_HLOCK(ulp) && 1079 !ULOCKFS_IS_ELOCK(ulp)) 1080 goto errout; 1081 1082 /* 1083 * thaw file system and wakeup pended processes 1084 */ 1085 if (error = ufs_thaw(vfsp, ufsvfsp, ulp)) 1086 if (!ULOCKFS_IS_HLOCK(ulp) && !ULOCKFS_IS_ELOCK(ulp)) 1087 goto errout; 1088 1089 /* 1090 * reset modified flag if not already write locked 1091 */ 1092 if (!LOCKFS_IS_WLOCK(&lfs)) 1093 ULOCKFS_CLR_MOD(ulp); 1094 1095 /* 1096 * idle the lock struct 1097 */ 1098 ULOCKFS_CLR_BUSY(ulp); 1099 /* update the ioctl copy */ 1100 LOCKFS_CLR_BUSY(&ulp->ul_lockfs); 1101 1102 /* 1103 * free current comment 1104 */ 1105 if (lfs.lf_comment && lfs.lf_comlen != 0) { 1106 kmem_free(lfs.lf_comment, lfs.lf_comlen); 1107 lfs.lf_comment = NULL; 1108 lfs.lf_comlen = 0; 1109 } 1110 1111 /* do error lock cleanup */ 1112 if (errlck == UN_ERRLCK) 1113 ufsfx_unlockfs(ufsvfsp); 1114 1115 else if (errlck == RE_ERRLCK) 1116 ufsfx_lockfs(ufsvfsp); 1117 1118 /* don't allow error lock from user to invoke panic */ 1119 else if (from_user && errlck == SET_ERRLCK && 1120 !(ufsvfsp->vfs_fsfx.fx_flags & (UFSMNT_ONERROR_PANIC >> 4))) 1121 (void) ufs_fault(ufsvfsp->vfs_root, 1122 ulp->ul_lockfs.lf_comment && ulp->ul_lockfs.lf_comlen > 0 ? 1123 ulp->ul_lockfs.lf_comment: "user-applied error lock"); 1124 1125 atomic_add_long(&ufs_quiesce_pend, -1); 1126 mutex_exit(&ulp->ul_lock); 1127 vfs_unlock(vfsp); 1128 1129 if (ULOCKFS_IS_HLOCK(&ufsvfsp->vfs_ulockfs)) 1130 poll_events |= POLLERR; 1131 1132 pollwakeup(&ufs_pollhd, poll_events); 1133 1134 /* 1135 * Allow both the delete thread and the reclaim thread to 1136 * continue. 1137 */ 1138 ufs_thread_continue(&ufsvfsp->vfs_delete); 1139 ufs_thread_continue(&ufsvfsp->vfs_reclaim); 1140 1141 return (0); 1142 1143 errout: 1144 /* 1145 * Lock failed. Reset the old lock in ufsvfs if not hard locked. 1146 */ 1147 if (!LOCKFS_IS_HLOCK(&ulp->ul_lockfs)) { 1148 bcopy(&lfs, &ulp->ul_lockfs, sizeof (struct lockfs)); 1149 ulp->ul_fs_lock = (1 << lfs.lf_lock); 1150 } 1151 (void) ufs_thaw(vfsp, ufsvfsp, ulp); 1152 ULOCKFS_CLR_BUSY(ulp); 1153 LOCKFS_CLR_BUSY(&ulp->ul_lockfs); 1154 1155 errexit: 1156 atomic_add_long(&ufs_quiesce_pend, -1); 1157 mutex_exit(&ulp->ul_lock); 1158 vfs_unlock(vfsp); 1159 1160 /* 1161 * Allow both the delete thread and the reclaim thread to 1162 * continue. 1163 */ 1164 ufs_thread_continue(&ufsvfsp->vfs_delete); 1165 ufs_thread_continue(&ufsvfsp->vfs_reclaim); 1166 1167 return (error); 1168 } 1169 1170 /* 1171 * fiolfss 1172 * return the current file system locking state info 1173 */ 1174 int 1175 ufs_fiolfss(struct vnode *vp, struct lockfs *lockfsp) 1176 { 1177 struct ulockfs *ulp; 1178 1179 if (!vp || !vp->v_vfsp || !VTOI(vp)) 1180 return (EINVAL); 1181 1182 /* file system has been forcibly unmounted */ 1183 if (VTOI(vp)->i_ufsvfs == NULL) 1184 return (EIO); 1185 1186 ulp = VTOUL(vp); 1187 1188 if (ULOCKFS_IS_HLOCK(ulp)) { 1189 *lockfsp = ulp->ul_lockfs; /* structure assignment */ 1190 return (0); 1191 } 1192 1193 mutex_enter(&ulp->ul_lock); 1194 1195 *lockfsp = ulp->ul_lockfs; /* structure assignment */ 1196 1197 if (ULOCKFS_IS_MOD(ulp)) 1198 lockfsp->lf_flags |= LOCKFS_MOD; 1199 1200 mutex_exit(&ulp->ul_lock); 1201 1202 return (0); 1203 } 1204 1205 /* 1206 * ufs_check_lockfs 1207 * check whether a ufs_vnops conflicts with the file system lock 1208 */ 1209 int 1210 ufs_check_lockfs(struct ufsvfs *ufsvfsp, struct ulockfs *ulp, ulong_t mask) 1211 { 1212 k_sigset_t smask; 1213 int sig, slock; 1214 1215 ASSERT(MUTEX_HELD(&ulp->ul_lock)); 1216 1217 while (ulp->ul_fs_lock & mask) { 1218 slock = (int)ULOCKFS_IS_SLOCK(ulp); 1219 if ((curthread->t_flag & T_DONTPEND) && !slock) { 1220 curthread->t_flag |= T_WOULDBLOCK; 1221 return (EAGAIN); 1222 } 1223 curthread->t_flag &= ~T_WOULDBLOCK; 1224 1225 if (ULOCKFS_IS_HLOCK(ulp)) 1226 return (EIO); 1227 1228 /* 1229 * wait for lock status to change 1230 */ 1231 if (slock || ufsvfsp->vfs_nointr) { 1232 cv_wait(&ulp->ul_cv, &ulp->ul_lock); 1233 } else { 1234 sigintr(&smask, 1); 1235 sig = cv_wait_sig(&ulp->ul_cv, &ulp->ul_lock); 1236 sigunintr(&smask); 1237 if ((!sig && (ulp->ul_fs_lock & mask)) || 1238 ufsvfsp->vfs_dontblock) 1239 return (EINTR); 1240 } 1241 } 1242 1243 if (mask & ULOCKFS_FWLOCK) { 1244 atomic_add_long(&ulp->ul_falloc_cnt, 1); 1245 ULOCKFS_SET_FALLOC(ulp); 1246 } else { 1247 atomic_add_long(&ulp->ul_vnops_cnt, 1); 1248 } 1249 1250 return (0); 1251 } 1252 1253 /* 1254 * Check whether we came across the handcrafted lockfs protocol path. We can't 1255 * simply check for T_DONTBLOCK here as one would assume since this can also 1256 * falsely catch recursive VOP's going to a different filesystem, instead we 1257 * check if we already hold the ulockfs->ul_lock mutex. 1258 */ 1259 static int 1260 ufs_lockfs_is_under_rawlockfs(struct ulockfs *ulp) 1261 { 1262 return ((mutex_owner(&ulp->ul_lock) != curthread) ? 0 : 1); 1263 } 1264 1265 /* 1266 * ufs_lockfs_begin - start the lockfs locking protocol 1267 */ 1268 int 1269 ufs_lockfs_begin(struct ufsvfs *ufsvfsp, struct ulockfs **ulpp, ulong_t mask) 1270 { 1271 int error; 1272 int rec_vop; 1273 struct ulockfs *ulp; 1274 ulockfs_info_t *ulockfs_info; 1275 ulockfs_info_t *ulockfs_info_free; 1276 ulockfs_info_t *ulockfs_info_temp; 1277 1278 /* 1279 * file system has been forcibly unmounted 1280 */ 1281 if (ufsvfsp == NULL) 1282 return (EIO); 1283 1284 *ulpp = ulp = &ufsvfsp->vfs_ulockfs; 1285 1286 /* 1287 * Do lockfs protocol 1288 */ 1289 ulockfs_info = (ulockfs_info_t *)tsd_get(ufs_lockfs_key); 1290 IS_REC_VOP(rec_vop, ulockfs_info, ulp, ulockfs_info_free); 1291 1292 /* 1293 * Detect recursive VOP call or handcrafted internal lockfs protocol 1294 * path and bail out in that case. 1295 */ 1296 if (rec_vop || ufs_lockfs_is_under_rawlockfs(ulp)) { 1297 *ulpp = NULL; 1298 return (0); 1299 } else { 1300 if (ulockfs_info_free == NULL) { 1301 if ((ulockfs_info_temp = (ulockfs_info_t *) 1302 kmem_zalloc(sizeof (ulockfs_info_t), 1303 KM_NOSLEEP)) == NULL) { 1304 *ulpp = NULL; 1305 return (ENOMEM); 1306 } 1307 } 1308 } 1309 1310 /* 1311 * First time VOP call 1312 */ 1313 mutex_enter(&ulp->ul_lock); 1314 if (ULOCKFS_IS_JUSTULOCK(ulp)) { 1315 if (mask & ULOCKFS_FWLOCK) { 1316 atomic_add_long(&ulp->ul_falloc_cnt, 1); 1317 ULOCKFS_SET_FALLOC(ulp); 1318 } else { 1319 atomic_add_long(&ulp->ul_vnops_cnt, 1); 1320 } 1321 } else { 1322 if (error = ufs_check_lockfs(ufsvfsp, ulp, mask)) { 1323 mutex_exit(&ulp->ul_lock); 1324 if (ulockfs_info_free == NULL) 1325 kmem_free(ulockfs_info_temp, 1326 sizeof (ulockfs_info_t)); 1327 return (error); 1328 } 1329 } 1330 mutex_exit(&ulp->ul_lock); 1331 1332 if (ulockfs_info_free != NULL) { 1333 ulockfs_info_free->ulp = ulp; 1334 if (mask & ULOCKFS_FWLOCK) 1335 ulockfs_info_free->flags |= ULOCK_INFO_FALLOCATE; 1336 } else { 1337 ulockfs_info_temp->ulp = ulp; 1338 ulockfs_info_temp->next = ulockfs_info; 1339 if (mask & ULOCKFS_FWLOCK) 1340 ulockfs_info_temp->flags |= ULOCK_INFO_FALLOCATE; 1341 ASSERT(ufs_lockfs_key != 0); 1342 (void) tsd_set(ufs_lockfs_key, (void *)ulockfs_info_temp); 1343 } 1344 1345 curthread->t_flag |= T_DONTBLOCK; 1346 return (0); 1347 } 1348 1349 /* 1350 * Check whether we are returning from the top level VOP. 1351 */ 1352 static int 1353 ufs_lockfs_top_vop_return(ulockfs_info_t *head) 1354 { 1355 ulockfs_info_t *info; 1356 int result = 1; 1357 1358 for (info = head; info != NULL; info = info->next) { 1359 if (info->ulp != NULL) { 1360 result = 0; 1361 break; 1362 } 1363 } 1364 1365 return (result); 1366 } 1367 1368 /* 1369 * ufs_lockfs_end - terminate the lockfs locking protocol 1370 */ 1371 void 1372 ufs_lockfs_end(struct ulockfs *ulp) 1373 { 1374 ulockfs_info_t *info; 1375 ulockfs_info_t *head; 1376 1377 /* 1378 * end-of-VOP protocol 1379 */ 1380 if (ulp == NULL) 1381 return; 1382 1383 head = (ulockfs_info_t *)tsd_get(ufs_lockfs_key); 1384 SEARCH_ULOCKFSP(head, ulp, info); 1385 1386 /* 1387 * If we're called from a first level VOP, we have to have a 1388 * valid ulockfs record in the TSD. 1389 */ 1390 ASSERT(info != NULL); 1391 1392 /* 1393 * Invalidate the ulockfs record. 1394 */ 1395 info->ulp = NULL; 1396 1397 if (ufs_lockfs_top_vop_return(head)) 1398 curthread->t_flag &= ~T_DONTBLOCK; 1399 1400 mutex_enter(&ulp->ul_lock); 1401 1402 /* fallocate thread */ 1403 if (ULOCKFS_IS_FALLOC(ulp) && info->flags & ULOCK_INFO_FALLOCATE) { 1404 if (!atomic_add_long_nv(&ulp->ul_falloc_cnt, -1)) 1405 ULOCKFS_CLR_FALLOC(ulp); 1406 } else { /* normal thread */ 1407 if (!atomic_add_long_nv(&ulp->ul_vnops_cnt, -1)) 1408 cv_broadcast(&ulp->ul_cv); 1409 } 1410 1411 /* Clear the thread's fallocate state */ 1412 if (info->flags & ULOCK_INFO_FALLOCATE) 1413 info->flags &= ~ULOCK_INFO_FALLOCATE; 1414 1415 if (ulp->ul_vnops_cnt == 0 && ulp->ul_falloc_cnt) 1416 cv_broadcast(&ulp->ul_cv); 1417 1418 mutex_exit(&ulp->ul_lock); 1419 } 1420 1421 /* 1422 * specialized version of ufs_lockfs_begin() called by ufs_getpage(). 1423 */ 1424 int 1425 ufs_lockfs_begin_getpage( 1426 struct ufsvfs *ufsvfsp, 1427 struct ulockfs **ulpp, 1428 struct seg *seg, 1429 int read_access, 1430 uint_t *protp) 1431 { 1432 ulong_t mask; 1433 int error; 1434 int rec_vop; 1435 struct ulockfs *ulp; 1436 ulockfs_info_t *ulockfs_info; 1437 ulockfs_info_t *ulockfs_info_free; 1438 ulockfs_info_t *ulockfs_info_temp; 1439 1440 /* 1441 * file system has been forcibly unmounted 1442 */ 1443 if (ufsvfsp == NULL) 1444 return (EIO); 1445 1446 *ulpp = ulp = &ufsvfsp->vfs_ulockfs; 1447 1448 /* 1449 * Do lockfs protocol 1450 */ 1451 ulockfs_info = (ulockfs_info_t *)tsd_get(ufs_lockfs_key); 1452 IS_REC_VOP(rec_vop, ulockfs_info, ulp, ulockfs_info_free); 1453 1454 /* 1455 * Detect recursive VOP call or handcrafted internal lockfs protocol 1456 * path and bail out in that case. 1457 */ 1458 if (rec_vop || ufs_lockfs_is_under_rawlockfs(ulp)) { 1459 *ulpp = NULL; 1460 return (0); 1461 } else { 1462 if (ulockfs_info_free == NULL) { 1463 if ((ulockfs_info_temp = (ulockfs_info_t *) 1464 kmem_zalloc(sizeof (ulockfs_info_t), 1465 KM_NOSLEEP)) == NULL) { 1466 *ulpp = NULL; 1467 return (ENOMEM); 1468 } 1469 } 1470 } 1471 1472 /* 1473 * First time VOP call 1474 */ 1475 mutex_enter(&ulp->ul_lock); 1476 if (ULOCKFS_IS_JUSTULOCK(ulp)) 1477 /* 1478 * fs is not locked, simply inc the active-ops counter 1479 */ 1480 atomic_add_long(&ulp->ul_vnops_cnt, 1); 1481 else { 1482 if (seg->s_ops == &segvn_ops && 1483 ((struct segvn_data *)seg->s_data)->type != MAP_SHARED) { 1484 mask = (ulong_t)ULOCKFS_GETREAD_MASK; 1485 } else if (protp && read_access) { 1486 /* 1487 * Restrict the mapping to readonly. 1488 * Writes to this mapping will cause 1489 * another fault which will then 1490 * be suspended if fs is write locked 1491 */ 1492 *protp &= ~PROT_WRITE; 1493 mask = (ulong_t)ULOCKFS_GETREAD_MASK; 1494 } else 1495 mask = (ulong_t)ULOCKFS_GETWRITE_MASK; 1496 1497 /* 1498 * will sleep if this fs is locked against this VOP 1499 */ 1500 if (error = ufs_check_lockfs(ufsvfsp, ulp, mask)) { 1501 mutex_exit(&ulp->ul_lock); 1502 if (ulockfs_info_free == NULL) 1503 kmem_free(ulockfs_info_temp, 1504 sizeof (ulockfs_info_t)); 1505 return (error); 1506 } 1507 } 1508 mutex_exit(&ulp->ul_lock); 1509 1510 if (ulockfs_info_free != NULL) { 1511 ulockfs_info_free->ulp = ulp; 1512 } else { 1513 ulockfs_info_temp->ulp = ulp; 1514 ulockfs_info_temp->next = ulockfs_info; 1515 ASSERT(ufs_lockfs_key != 0); 1516 (void) tsd_set(ufs_lockfs_key, (void *)ulockfs_info_temp); 1517 } 1518 1519 curthread->t_flag |= T_DONTBLOCK; 1520 return (0); 1521 } 1522 1523 void 1524 ufs_lockfs_tsd_destructor(void *head) 1525 { 1526 ulockfs_info_t *curr = (ulockfs_info_t *)head; 1527 ulockfs_info_t *temp; 1528 1529 for (; curr != NULL; ) { 1530 /* 1531 * The TSD destructor is being called when the thread exits 1532 * (via thread_exit()). At that time it must have cleaned up 1533 * all VOPs via ufs_lockfs_end() and there must not be a 1534 * valid ulockfs record exist while a thread is exiting. 1535 */ 1536 temp = curr; 1537 curr = curr->next; 1538 ASSERT(temp->ulp == NULL); 1539 kmem_free(temp, sizeof (ulockfs_info_t)); 1540 } 1541 } 1542