1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <sys/t_lock.h> 30 #include <sys/param.h> 31 #include <sys/time.h> 32 #include <sys/systm.h> 33 #include <sys/sysmacros.h> 34 #include <sys/resource.h> 35 #include <sys/signal.h> 36 #include <sys/cred.h> 37 #include <sys/user.h> 38 #include <sys/buf.h> 39 #include <sys/vfs.h> 40 #include <sys/vnode.h> 41 #include <sys/proc.h> 42 #include <sys/disp.h> 43 #include <sys/file.h> 44 #include <sys/fcntl.h> 45 #include <sys/flock.h> 46 #include <sys/atomic.h> 47 #include <sys/kmem.h> 48 #include <sys/uio.h> 49 #include <sys/conf.h> 50 #include <sys/mman.h> 51 #include <sys/pathname.h> 52 #include <sys/debug.h> 53 #include <sys/vmmeter.h> 54 #include <sys/vmsystm.h> 55 #include <sys/cmn_err.h> 56 #include <sys/vtrace.h> 57 #include <sys/acct.h> 58 #include <sys/dnlc.h> 59 #include <sys/swap.h> 60 61 #include <sys/fs/ufs_fs.h> 62 #include <sys/fs/ufs_inode.h> 63 #include <sys/fs/ufs_fsdir.h> 64 #include <sys/fs/ufs_trans.h> 65 #include <sys/fs/ufs_panic.h> 66 #include <sys/fs/ufs_mount.h> 67 #include <sys/fs/ufs_bio.h> 68 #include <sys/fs/ufs_log.h> 69 #include <sys/fs/ufs_quota.h> 70 #include <sys/dirent.h> /* must be AFTER <sys/fs/fsdir.h>! */ 71 #include <sys/errno.h> 72 #include <sys/sysinfo.h> 73 74 #include <vm/hat.h> 75 #include <vm/pvn.h> 76 #include <vm/as.h> 77 #include <vm/seg.h> 78 #include <vm/seg_map.h> 79 #include <vm/seg_vn.h> 80 #include <vm/rm.h> 81 #include <vm/anon.h> 82 #include <sys/swap.h> 83 #include <sys/dnlc.h> 84 85 extern struct vnode *common_specvp(struct vnode *vp); 86 87 /* error lock status */ 88 #define UN_ERRLCK (-1) 89 #define SET_ERRLCK 1 90 #define RE_ERRLCK 2 91 #define NO_ERRLCK 0 92 93 /* 94 * Index to be used in TSD for storing lockfs data 95 */ 96 uint_t ufs_lockfs_key; 97 98 typedef struct _ulockfs_info { 99 struct _ulockfs_info *next; 100 struct ulockfs *ulp; 101 uint_t flags; 102 } ulockfs_info_t; 103 104 #define ULOCK_INFO_FALLOCATE 0x00000001 /* fallocate thread */ 105 106 /* 107 * Check in TSD that whether we are already doing any VOP on this filesystem 108 */ 109 #define IS_REC_VOP(found, head, ulp, free) \ 110 { \ 111 ulockfs_info_t *_curr; \ 112 \ 113 for (found = 0, free = NULL, _curr = head; \ 114 _curr != NULL; _curr = _curr->next) { \ 115 if ((free == NULL) && \ 116 (_curr->ulp == NULL)) \ 117 free = _curr; \ 118 if (_curr->ulp == ulp) { \ 119 found = 1; \ 120 break; \ 121 } \ 122 } \ 123 } 124 125 /* 126 * Get the lockfs data from TSD so that lockfs handles the recursive VOP 127 * properly 128 */ 129 #define SEARCH_ULOCKFSP(head, ulp, info) \ 130 { \ 131 ulockfs_info_t *_curr; \ 132 \ 133 for (_curr = head; _curr != NULL; \ 134 _curr = _curr->next) { \ 135 if (_curr->ulp == ulp) { \ 136 break; \ 137 } \ 138 } \ 139 \ 140 info = _curr; \ 141 } 142 143 /* 144 * Validate lockfs request 145 */ 146 static int 147 ufs_getlfd( 148 struct lockfs *lockfsp, /* new lock request */ 149 struct lockfs *ul_lockfsp) /* old lock state */ 150 { 151 int error = 0; 152 153 /* 154 * no input flags defined 155 */ 156 if (lockfsp->lf_flags != 0) { 157 error = EINVAL; 158 goto errout; 159 } 160 161 /* 162 * check key 163 */ 164 if (!LOCKFS_IS_ULOCK(ul_lockfsp)) 165 if (lockfsp->lf_key != ul_lockfsp->lf_key) { 166 error = EINVAL; 167 goto errout; 168 } 169 170 lockfsp->lf_key = ul_lockfsp->lf_key + 1; 171 172 errout: 173 return (error); 174 } 175 176 /* 177 * ufs_checkaccton 178 * check if accounting is turned on on this fs 179 */ 180 181 int 182 ufs_checkaccton(struct vnode *vp) 183 { 184 if (acct_fs_in_use(vp)) 185 return (EDEADLK); 186 return (0); 187 } 188 189 /* 190 * ufs_checkswapon 191 * check if local swapping is to file on this fs 192 */ 193 int 194 ufs_checkswapon(struct vnode *vp) 195 { 196 struct swapinfo *sip; 197 198 mutex_enter(&swapinfo_lock); 199 for (sip = swapinfo; sip; sip = sip->si_next) 200 if (sip->si_vp->v_vfsp == vp->v_vfsp) { 201 mutex_exit(&swapinfo_lock); 202 return (EDEADLK); 203 } 204 mutex_exit(&swapinfo_lock); 205 return (0); 206 } 207 208 /* 209 * ufs_freeze 210 * pend future accesses for current lock and desired lock 211 */ 212 void 213 ufs_freeze(struct ulockfs *ulp, struct lockfs *lockfsp) 214 { 215 /* 216 * set to new lock type 217 */ 218 ulp->ul_lockfs.lf_lock = lockfsp->lf_lock; 219 ulp->ul_lockfs.lf_key = lockfsp->lf_key; 220 ulp->ul_lockfs.lf_comlen = lockfsp->lf_comlen; 221 ulp->ul_lockfs.lf_comment = lockfsp->lf_comment; 222 223 ulp->ul_fs_lock = (1 << ulp->ul_lockfs.lf_lock); 224 } 225 226 /* 227 * All callers of ufs_quiesce() atomically increment ufs_quiesce_pend before 228 * starting ufs_quiesce() protocol and decrement it only when a file system no 229 * longer has to be in quiescent state. This allows ufs_pageio() to detect 230 * that another thread wants to quiesce a file system. See more comments in 231 * ufs_pageio(). 232 */ 233 ulong_t ufs_quiesce_pend = 0; 234 235 /* 236 * ufs_quiesce 237 * wait for outstanding accesses to finish 238 */ 239 int 240 ufs_quiesce(struct ulockfs *ulp) 241 { 242 int error = 0; 243 ulockfs_info_t *head; 244 ulockfs_info_t *info; 245 246 head = (ulockfs_info_t *)tsd_get(ufs_lockfs_key); 247 SEARCH_ULOCKFSP(head, ulp, info); 248 249 /* 250 * Set a softlock to suspend future ufs_vnops so that 251 * this lockfs request will not be starved 252 */ 253 ULOCKFS_SET_SLOCK(ulp); 254 ASSERT(ufs_quiesce_pend); 255 256 /* check if there is any outstanding ufs vnodeops calls */ 257 while (ulp->ul_vnops_cnt || ulp->ul_falloc_cnt) { 258 /* 259 * use timed version of cv_wait_sig() to make sure we don't 260 * miss a wake up call from ufs_pageio() when it doesn't use 261 * ul_lock. 262 * 263 * when a fallocate thread comes in, the only way it returns 264 * from this function is if there are no other vnode operations 265 * going on (remember fallocate threads are tracked using 266 * ul_falloc_cnt not ul_vnops_cnt), and another fallocate thread 267 * hasn't already grabbed the fs write lock. 268 */ 269 if (info && (info->flags & ULOCK_INFO_FALLOCATE)) { 270 if (!ulp->ul_vnops_cnt && !ULOCKFS_IS_FWLOCK(ulp)) 271 goto out; 272 } 273 if (!cv_timedwait_sig(&ulp->ul_cv, &ulp->ul_lock, lbolt + hz)) { 274 error = EINTR; 275 goto out; 276 } 277 } 278 279 out: 280 /* 281 * unlock the soft lock 282 */ 283 ULOCKFS_CLR_SLOCK(ulp); 284 285 return (error); 286 } 287 288 /* 289 * ufs_flush_inode 290 */ 291 int 292 ufs_flush_inode(struct inode *ip, void *arg) 293 { 294 int error; 295 int saverror = 0; 296 297 /* 298 * wrong file system; keep looking 299 */ 300 if (ip->i_ufsvfs != (struct ufsvfs *)arg) 301 return (0); 302 303 /* 304 * asynchronously push all the dirty pages 305 */ 306 if (((error = TRANS_SYNCIP(ip, B_ASYNC, 0, TOP_SYNCIP_FLUSHI)) != 0) && 307 (error != EAGAIN)) 308 saverror = error; 309 /* 310 * wait for io and discard all mappings 311 */ 312 if (error = TRANS_SYNCIP(ip, B_INVAL, 0, TOP_SYNCIP_FLUSHI)) 313 saverror = error; 314 315 if (ITOV(ip)->v_type == VDIR) { 316 dnlc_dir_purge(&ip->i_danchor); 317 } 318 319 return (saverror); 320 } 321 322 /* 323 * ufs_flush 324 * Flush everything that is currently dirty; this includes invalidating 325 * any mappings. 326 */ 327 int 328 ufs_flush(struct vfs *vfsp) 329 { 330 int error; 331 int saverror = 0; 332 struct ufsvfs *ufsvfsp = (struct ufsvfs *)vfsp->vfs_data; 333 struct fs *fs = ufsvfsp->vfs_fs; 334 int tdontblock = 0; 335 336 ASSERT(vfs_lock_held(vfsp)); 337 338 /* 339 * purge dnlc 340 */ 341 (void) dnlc_purge_vfsp(vfsp, 0); 342 343 /* 344 * drain the delete and idle threads 345 */ 346 ufs_delete_drain(vfsp, 0, 0); 347 ufs_idle_drain(vfsp); 348 349 /* 350 * flush and invalidate quota records 351 */ 352 (void) qsync(ufsvfsp); 353 354 /* 355 * flush w/invalidate the inodes for vfsp 356 */ 357 if (error = ufs_scan_inodes(0, ufs_flush_inode, ufsvfsp, ufsvfsp)) 358 saverror = error; 359 360 /* 361 * synchronously flush superblock and summary info 362 */ 363 if (fs->fs_ronly == 0 && fs->fs_fmod) { 364 fs->fs_fmod = 0; 365 TRANS_SBUPDATE(ufsvfsp, vfsp, TOP_SBUPDATE_FLUSH); 366 } 367 /* 368 * flush w/invalidate block device pages and buf cache 369 */ 370 if ((error = VOP_PUTPAGE(common_specvp(ufsvfsp->vfs_devvp), 371 (offset_t)0, 0, B_INVAL, CRED())) > 0) 372 saverror = error; 373 374 (void) bflush((dev_t)vfsp->vfs_dev); 375 (void) bfinval((dev_t)vfsp->vfs_dev, 0); 376 377 /* 378 * drain the delete and idle threads again 379 */ 380 ufs_delete_drain(vfsp, 0, 0); 381 ufs_idle_drain(vfsp); 382 383 /* 384 * play with the clean flag 385 */ 386 if (saverror == 0) 387 ufs_checkclean(vfsp); 388 389 /* 390 * Flush any outstanding transactions and roll the log 391 * only if we are supposed to do, i.e. LDL_NOROLL not set. 392 * We can not simply check for fs_ronly here since fsck also may 393 * use this code to roll the log on a read-only filesystem, e.g. 394 * root during early stages of boot, if other then a sanity check is 395 * done, it will clear LDL_NOROLL before. 396 * In addition we assert that the deltamap does not contain any deltas 397 * in case LDL_NOROLL is set since this is not supposed to happen. 398 */ 399 if (TRANS_ISTRANS(ufsvfsp)) { 400 ml_unit_t *ul = ufsvfsp->vfs_log; 401 mt_map_t *mtm = ul->un_deltamap; 402 403 if (ul->un_flags & LDL_NOROLL) { 404 ASSERT(mtm->mtm_nme == 0); 405 } else { 406 /* 407 * Do not set T_DONTBLOCK if there is a 408 * transaction opened by caller. 409 */ 410 if (curthread->t_flag & T_DONTBLOCK) 411 tdontblock = 1; 412 else 413 curthread->t_flag |= T_DONTBLOCK; 414 415 TRANS_BEGIN_SYNC(ufsvfsp, TOP_COMMIT_FLUSH, 416 TOP_COMMIT_SIZE, error); 417 418 if (!error) { 419 TRANS_END_SYNC(ufsvfsp, saverror, 420 TOP_COMMIT_FLUSH, TOP_COMMIT_SIZE); 421 } 422 423 if (tdontblock == 0) 424 curthread->t_flag &= ~T_DONTBLOCK; 425 426 logmap_roll_dev(ufsvfsp->vfs_log); 427 } 428 } 429 430 return (saverror); 431 } 432 433 /* 434 * ufs_thaw_wlock 435 * special processing when thawing down to wlock 436 */ 437 static int 438 ufs_thaw_wlock(struct inode *ip, void *arg) 439 { 440 /* 441 * wrong file system; keep looking 442 */ 443 if (ip->i_ufsvfs != (struct ufsvfs *)arg) 444 return (0); 445 446 /* 447 * iupdat refuses to clear flags if the fs is read only. The fs 448 * may become read/write during the lock and we wouldn't want 449 * these inodes being written to disk. So clear the flags. 450 */ 451 rw_enter(&ip->i_contents, RW_WRITER); 452 ip->i_flag &= ~(IMOD|IMODACC|IACC|IUPD|ICHG|IATTCHG); 453 rw_exit(&ip->i_contents); 454 455 /* 456 * pages are mlocked -- fail wlock 457 */ 458 if (ITOV(ip)->v_type != VCHR && vn_has_cached_data(ITOV(ip))) 459 return (EBUSY); 460 461 return (0); 462 } 463 464 /* 465 * ufs_thaw_hlock 466 * special processing when thawing down to hlock or elock 467 */ 468 static int 469 ufs_thaw_hlock(struct inode *ip, void *arg) 470 { 471 struct vnode *vp = ITOV(ip); 472 473 /* 474 * wrong file system; keep looking 475 */ 476 if (ip->i_ufsvfs != (struct ufsvfs *)arg) 477 return (0); 478 479 /* 480 * blow away all pages - even if they are mlocked 481 */ 482 do { 483 (void) TRANS_SYNCIP(ip, B_INVAL | B_FORCE, 0, TOP_SYNCIP_HLOCK); 484 } while ((vp->v_type != VCHR) && vn_has_cached_data(vp)); 485 rw_enter(&ip->i_contents, RW_WRITER); 486 ip->i_flag &= ~(IMOD|IMODACC|IACC|IUPD|ICHG|IATTCHG); 487 rw_exit(&ip->i_contents); 488 489 return (0); 490 } 491 492 /* 493 * ufs_thaw 494 * thaw file system lock down to current value 495 */ 496 int 497 ufs_thaw(struct vfs *vfsp, struct ufsvfs *ufsvfsp, struct ulockfs *ulp) 498 { 499 int error = 0; 500 int noidel = (int)(ulp->ul_flag & ULOCKFS_NOIDEL); 501 502 /* 503 * if wlock or hlock or elock 504 */ 505 if (ULOCKFS_IS_WLOCK(ulp) || ULOCKFS_IS_HLOCK(ulp) || 506 ULOCKFS_IS_ELOCK(ulp)) { 507 508 /* 509 * don't keep access times 510 * don't free deleted files 511 * if superblock writes are allowed, limit them to me for now 512 */ 513 ulp->ul_flag |= (ULOCKFS_NOIACC|ULOCKFS_NOIDEL); 514 if (ulp->ul_sbowner != (kthread_id_t)-1) 515 ulp->ul_sbowner = curthread; 516 517 /* 518 * wait for writes for deleted files and superblock updates 519 */ 520 (void) ufs_flush(vfsp); 521 522 /* 523 * now make sure the quota file is up-to-date 524 * expensive; but effective 525 */ 526 error = ufs_flush(vfsp); 527 /* 528 * no one can write the superblock 529 */ 530 ulp->ul_sbowner = (kthread_id_t)-1; 531 532 /* 533 * special processing for wlock/hlock/elock 534 */ 535 if (ULOCKFS_IS_WLOCK(ulp)) { 536 if (error) 537 goto errout; 538 error = bfinval(ufsvfsp->vfs_dev, 0); 539 if (error) 540 goto errout; 541 error = ufs_scan_inodes(0, ufs_thaw_wlock, 542 (void *)ufsvfsp, ufsvfsp); 543 if (error) 544 goto errout; 545 } 546 if (ULOCKFS_IS_HLOCK(ulp) || ULOCKFS_IS_ELOCK(ulp)) { 547 error = 0; 548 (void) ufs_scan_inodes(0, ufs_thaw_hlock, 549 (void *)ufsvfsp, ufsvfsp); 550 (void) bfinval(ufsvfsp->vfs_dev, 1); 551 } 552 } else { 553 554 /* 555 * okay to keep access times 556 * okay to free deleted files 557 * okay to write the superblock 558 */ 559 ulp->ul_flag &= ~(ULOCKFS_NOIACC|ULOCKFS_NOIDEL); 560 ulp->ul_sbowner = NULL; 561 562 /* 563 * flush in case deleted files are in memory 564 */ 565 if (noidel) { 566 if (error = ufs_flush(vfsp)) 567 goto errout; 568 } 569 } 570 571 errout: 572 cv_broadcast(&ulp->ul_cv); 573 return (error); 574 } 575 576 /* 577 * ufs_reconcile_fs 578 * reconcile incore superblock with ondisk superblock 579 */ 580 int 581 ufs_reconcile_fs(struct vfs *vfsp, struct ufsvfs *ufsvfsp, int errlck) 582 { 583 struct fs *mfs; /* in-memory superblock */ 584 struct fs *dfs; /* on-disk superblock */ 585 struct buf *bp; /* on-disk superblock buf */ 586 int needs_unlock; 587 char finished_fsclean; 588 589 mfs = ufsvfsp->vfs_fs; 590 591 /* 592 * get the on-disk copy of the superblock 593 */ 594 bp = UFS_BREAD(ufsvfsp, vfsp->vfs_dev, SBLOCK, SBSIZE); 595 bp->b_flags |= (B_STALE|B_AGE); 596 if (bp->b_flags & B_ERROR) { 597 brelse(bp); 598 return (EIO); 599 } 600 dfs = bp->b_un.b_fs; 601 602 /* error locks may only unlock after the fs has been made consistent */ 603 if (errlck == UN_ERRLCK) { 604 if (dfs->fs_clean == FSFIX) { /* being repaired */ 605 brelse(bp); 606 return (EAGAIN); 607 } 608 /* repair not yet started? */ 609 finished_fsclean = TRANS_ISTRANS(ufsvfsp)? FSLOG: FSCLEAN; 610 if (dfs->fs_clean != finished_fsclean) { 611 brelse(bp); 612 return (EBUSY); 613 } 614 } 615 616 /* 617 * if superblock has changed too much, abort 618 */ 619 if ((mfs->fs_sblkno != dfs->fs_sblkno) || 620 (mfs->fs_cblkno != dfs->fs_cblkno) || 621 (mfs->fs_iblkno != dfs->fs_iblkno) || 622 (mfs->fs_dblkno != dfs->fs_dblkno) || 623 (mfs->fs_cgoffset != dfs->fs_cgoffset) || 624 (mfs->fs_cgmask != dfs->fs_cgmask) || 625 (mfs->fs_bsize != dfs->fs_bsize) || 626 (mfs->fs_fsize != dfs->fs_fsize) || 627 (mfs->fs_frag != dfs->fs_frag) || 628 (mfs->fs_bmask != dfs->fs_bmask) || 629 (mfs->fs_fmask != dfs->fs_fmask) || 630 (mfs->fs_bshift != dfs->fs_bshift) || 631 (mfs->fs_fshift != dfs->fs_fshift) || 632 (mfs->fs_fragshift != dfs->fs_fragshift) || 633 (mfs->fs_fsbtodb != dfs->fs_fsbtodb) || 634 (mfs->fs_sbsize != dfs->fs_sbsize) || 635 (mfs->fs_nindir != dfs->fs_nindir) || 636 (mfs->fs_nspf != dfs->fs_nspf) || 637 (mfs->fs_trackskew != dfs->fs_trackskew) || 638 (mfs->fs_cgsize != dfs->fs_cgsize) || 639 (mfs->fs_ntrak != dfs->fs_ntrak) || 640 (mfs->fs_nsect != dfs->fs_nsect) || 641 (mfs->fs_spc != dfs->fs_spc) || 642 (mfs->fs_cpg != dfs->fs_cpg) || 643 (mfs->fs_ipg != dfs->fs_ipg) || 644 (mfs->fs_fpg != dfs->fs_fpg) || 645 (mfs->fs_postblformat != dfs->fs_postblformat) || 646 (mfs->fs_magic != dfs->fs_magic)) { 647 brelse(bp); 648 return (EACCES); 649 } 650 if (dfs->fs_clean == FSBAD || FSOKAY != dfs->fs_state + dfs->fs_time) 651 if (mfs->fs_clean == FSLOG) { 652 brelse(bp); 653 return (EACCES); 654 } 655 656 /* 657 * get new summary info 658 */ 659 if (ufs_getsummaryinfo(vfsp->vfs_dev, ufsvfsp, dfs)) { 660 brelse(bp); 661 return (EIO); 662 } 663 664 /* 665 * release old summary info and update in-memory superblock 666 */ 667 kmem_free(mfs->fs_u.fs_csp, mfs->fs_cssize); 668 mfs->fs_u.fs_csp = dfs->fs_u.fs_csp; /* Only entry 0 used */ 669 670 /* 671 * update fields allowed to change 672 */ 673 mfs->fs_size = dfs->fs_size; 674 mfs->fs_dsize = dfs->fs_dsize; 675 mfs->fs_ncg = dfs->fs_ncg; 676 mfs->fs_minfree = dfs->fs_minfree; 677 mfs->fs_rotdelay = dfs->fs_rotdelay; 678 mfs->fs_rps = dfs->fs_rps; 679 mfs->fs_maxcontig = dfs->fs_maxcontig; 680 mfs->fs_maxbpg = dfs->fs_maxbpg; 681 mfs->fs_csmask = dfs->fs_csmask; 682 mfs->fs_csshift = dfs->fs_csshift; 683 mfs->fs_optim = dfs->fs_optim; 684 mfs->fs_csaddr = dfs->fs_csaddr; 685 mfs->fs_cssize = dfs->fs_cssize; 686 mfs->fs_ncyl = dfs->fs_ncyl; 687 mfs->fs_cstotal = dfs->fs_cstotal; 688 mfs->fs_reclaim = dfs->fs_reclaim; 689 690 if (mfs->fs_reclaim & (FS_RECLAIM|FS_RECLAIMING)) { 691 mfs->fs_reclaim &= ~FS_RECLAIM; 692 mfs->fs_reclaim |= FS_RECLAIMING; 693 ufs_thread_start(&ufsvfsp->vfs_reclaim, 694 ufs_thread_reclaim, vfsp); 695 } 696 697 /* XXX What to do about sparecon? */ 698 699 /* XXX need to copy volume label */ 700 701 /* 702 * ondisk clean flag overrides inmemory clean flag iff == FSBAD 703 * or if error-locked and ondisk is now clean 704 */ 705 needs_unlock = !MUTEX_HELD(&ufsvfsp->vfs_lock); 706 if (needs_unlock) 707 mutex_enter(&ufsvfsp->vfs_lock); 708 709 if (errlck == UN_ERRLCK) { 710 if (finished_fsclean == dfs->fs_clean) 711 mfs->fs_clean = finished_fsclean; 712 else 713 mfs->fs_clean = FSBAD; 714 mfs->fs_state = FSOKAY - dfs->fs_time; 715 } 716 717 if (FSOKAY != dfs->fs_state + dfs->fs_time || 718 (dfs->fs_clean == FSBAD)) 719 mfs->fs_clean = FSBAD; 720 721 if (needs_unlock) 722 mutex_exit(&ufsvfsp->vfs_lock); 723 724 brelse(bp); 725 726 return (0); 727 } 728 729 /* 730 * ufs_reconcile_inode 731 * reconcile ondisk inode with incore inode 732 */ 733 static int 734 ufs_reconcile_inode(struct inode *ip, void *arg) 735 { 736 int i; 737 int ndaddr; 738 int niaddr; 739 struct dinode *dp; /* ondisk inode */ 740 struct buf *bp = NULL; 741 uid_t d_uid; 742 gid_t d_gid; 743 int error = 0; 744 struct fs *fs; 745 746 /* 747 * not an inode we care about 748 */ 749 if (ip->i_ufsvfs != (struct ufsvfs *)arg) 750 return (0); 751 752 fs = ip->i_fs; 753 754 /* 755 * Inode reconciliation fails: we made the filesystem quiescent 756 * and we did a ufs_flush() before calling ufs_reconcile_inode() 757 * and thus the inode should not have been changed inbetween. 758 * Any discrepancies indicate a logic error and a pretty 759 * significant run-state inconsistency we should complain about. 760 */ 761 if (ip->i_flag & (IMOD|IMODACC|IACC|IUPD|ICHG|IATTCHG)) { 762 cmn_err(CE_WARN, "%s: Inode reconciliation failed for" 763 "inode %llu", fs->fs_fsmnt, (u_longlong_t)ip->i_number); 764 return (EINVAL); 765 } 766 767 /* 768 * get the dinode 769 */ 770 bp = UFS_BREAD(ip->i_ufsvfs, 771 ip->i_dev, (daddr_t)fsbtodb(fs, itod(fs, ip->i_number)), 772 (int)fs->fs_bsize); 773 if (bp->b_flags & B_ERROR) { 774 brelse(bp); 775 return (EIO); 776 } 777 dp = bp->b_un.b_dino; 778 dp += itoo(fs, ip->i_number); 779 780 /* 781 * handle Sun's implementation of EFT 782 */ 783 d_uid = (dp->di_suid == UID_LONG) ? dp->di_uid : (uid_t)dp->di_suid; 784 d_gid = (dp->di_sgid == GID_LONG) ? dp->di_gid : (uid_t)dp->di_sgid; 785 786 rw_enter(&ip->i_contents, RW_WRITER); 787 788 /* 789 * some fields are not allowed to change 790 */ 791 if ((ip->i_mode != dp->di_mode) || 792 (ip->i_gen != dp->di_gen) || 793 (ip->i_uid != d_uid) || 794 (ip->i_gid != d_gid)) { 795 error = EACCES; 796 goto out; 797 } 798 799 /* 800 * and some are allowed to change 801 */ 802 ip->i_size = dp->di_size; 803 ip->i_ic.ic_flags = dp->di_ic.ic_flags; 804 ip->i_blocks = dp->di_blocks; 805 ip->i_nlink = dp->di_nlink; 806 if (ip->i_flag & IFASTSYMLNK) { 807 ndaddr = 1; 808 niaddr = 0; 809 } else { 810 ndaddr = NDADDR; 811 niaddr = NIADDR; 812 } 813 for (i = 0; i < ndaddr; ++i) 814 ip->i_db[i] = dp->di_db[i]; 815 for (i = 0; i < niaddr; ++i) 816 ip->i_ib[i] = dp->di_ib[i]; 817 818 out: 819 rw_exit(&ip->i_contents); 820 brelse(bp); 821 return (error); 822 } 823 824 /* 825 * ufs_reconcile 826 * reconcile ondisk superblock/inodes with any incore 827 */ 828 static int 829 ufs_reconcile(struct vfs *vfsp, struct ufsvfs *ufsvfsp, int errlck) 830 { 831 int error = 0; 832 833 /* 834 * get rid of as much inmemory data as possible 835 */ 836 (void) ufs_flush(vfsp); 837 838 /* 839 * reconcile the superblock and inodes 840 */ 841 if (error = ufs_reconcile_fs(vfsp, ufsvfsp, errlck)) 842 return (error); 843 if (error = ufs_scan_inodes(0, ufs_reconcile_inode, ufsvfsp, ufsvfsp)) 844 return (error); 845 /* 846 * allocation blocks may be incorrect; get rid of them 847 */ 848 (void) ufs_flush(vfsp); 849 850 return (error); 851 } 852 853 /* 854 * File system locking 855 */ 856 int 857 ufs_fiolfs(struct vnode *vp, struct lockfs *lockfsp, int from_log) 858 { 859 return (ufs__fiolfs(vp, lockfsp, /* from_user */ 1, from_log)); 860 } 861 862 /* kernel-internal interface, also used by fix-on-panic */ 863 int 864 ufs__fiolfs( 865 struct vnode *vp, 866 struct lockfs *lockfsp, 867 int from_user, 868 int from_log) 869 { 870 struct ulockfs *ulp; 871 struct lockfs lfs; 872 int error; 873 struct vfs *vfsp; 874 struct ufsvfs *ufsvfsp; 875 int errlck = NO_ERRLCK; 876 int poll_events = POLLPRI; 877 extern struct pollhead ufs_pollhd; 878 ulockfs_info_t *head; 879 ulockfs_info_t *info; 880 int signal = 0; 881 882 /* check valid lock type */ 883 if (!lockfsp || lockfsp->lf_lock > LOCKFS_MAXLOCK) 884 return (EINVAL); 885 886 if (!vp || !vp->v_vfsp || !vp->v_vfsp->vfs_data) 887 return (EIO); 888 889 vfsp = vp->v_vfsp; 890 ufsvfsp = (struct ufsvfs *)vfsp->vfs_data; 891 ulp = &ufsvfsp->vfs_ulockfs; 892 893 head = (ulockfs_info_t *)tsd_get(ufs_lockfs_key); 894 SEARCH_ULOCKFSP(head, ulp, info); 895 896 /* 897 * Suspend both the reclaim thread and the delete thread. 898 * This must be done outside the lockfs locking protocol. 899 */ 900 ufs_thread_suspend(&ufsvfsp->vfs_reclaim); 901 ufs_thread_suspend(&ufsvfsp->vfs_delete); 902 903 /* 904 * Acquire vfs_reflock around ul_lock to avoid deadlock with 905 * umount/remount/sync. 906 */ 907 vfs_lock_wait(vfsp); 908 mutex_enter(&ulp->ul_lock); 909 atomic_add_long(&ufs_quiesce_pend, 1); 910 911 /* 912 * Quit if there is another lockfs request in progress 913 * that is waiting for existing ufs_vnops to complete. 914 */ 915 if (ULOCKFS_IS_BUSY(ulp)) { 916 error = EBUSY; 917 goto errexit; 918 } 919 920 /* cannot ulocked or downgrade a hard-lock */ 921 if (ULOCKFS_IS_HLOCK(ulp)) { 922 error = EIO; 923 goto errexit; 924 } 925 926 /* an error lock may be unlocked or relocked, only */ 927 if (ULOCKFS_IS_ELOCK(ulp)) { 928 if (!LOCKFS_IS_ULOCK(lockfsp) && !LOCKFS_IS_ELOCK(lockfsp)) { 929 error = EBUSY; 930 goto errexit; 931 } 932 } 933 934 /* 935 * a read-only error lock may only be upgraded to an 936 * error lock or hard lock 937 */ 938 if (ULOCKFS_IS_ROELOCK(ulp)) { 939 if (!LOCKFS_IS_HLOCK(lockfsp) && !LOCKFS_IS_ELOCK(lockfsp)) { 940 error = EBUSY; 941 goto errexit; 942 } 943 } 944 945 /* 946 * until read-only error locks are fully implemented 947 * just return EINVAL 948 */ 949 if (LOCKFS_IS_ROELOCK(lockfsp)) { 950 error = EINVAL; 951 goto errexit; 952 } 953 954 /* 955 * an error lock may only be applied if the file system is 956 * unlocked or already error locked. 957 * (this is to prevent the case where a fs gets changed out from 958 * underneath a fs that is locked for backup, 959 * that is, name/delete/write-locked.) 960 */ 961 if ((!ULOCKFS_IS_ULOCK(ulp) && !ULOCKFS_IS_ELOCK(ulp) && 962 !ULOCKFS_IS_ROELOCK(ulp)) && 963 (LOCKFS_IS_ELOCK(lockfsp) || LOCKFS_IS_ROELOCK(lockfsp))) { 964 error = EBUSY; 965 goto errexit; 966 } 967 968 /* get and validate the input lockfs request */ 969 if (error = ufs_getlfd(lockfsp, &ulp->ul_lockfs)) 970 goto errexit; 971 972 /* 973 * save current ulockfs struct 974 */ 975 bcopy(&ulp->ul_lockfs, &lfs, sizeof (struct lockfs)); 976 977 /* 978 * Freeze the file system (pend future accesses) 979 */ 980 ufs_freeze(ulp, lockfsp); 981 982 /* 983 * Set locking in progress because ufs_quiesce may free the 984 * ul_lock mutex. 985 */ 986 ULOCKFS_SET_BUSY(ulp); 987 /* update the ioctl copy */ 988 LOCKFS_SET_BUSY(&ulp->ul_lockfs); 989 990 /* 991 * We need to unset FWLOCK status before we call ufs_quiesce 992 * so that the thread doesnt get suspended. We do this only if 993 * this (fallocate) thread requested an unlock operation. 994 */ 995 if (info && (info->flags & ULOCK_INFO_FALLOCATE)) { 996 if (!ULOCKFS_IS_WLOCK(ulp)) 997 ULOCKFS_CLR_FWLOCK(ulp); 998 } 999 1000 /* 1001 * Quiesce (wait for outstanding accesses to finish) 1002 */ 1003 if (error = ufs_quiesce(ulp)) { 1004 /* 1005 * Interrupted due to signal. There could still be 1006 * pending vnops. 1007 */ 1008 signal = 1; 1009 1010 /* 1011 * We do broadcast because lock-status 1012 * could be reverted to old status. 1013 */ 1014 cv_broadcast(&ulp->ul_cv); 1015 goto errout; 1016 } 1017 1018 /* 1019 * If the fallocate thread requested a write fs lock operation 1020 * then we set fwlock status in the ulp. 1021 */ 1022 if (info && (info->flags & ULOCK_INFO_FALLOCATE)) { 1023 if (ULOCKFS_IS_WLOCK(ulp)) 1024 ULOCKFS_SET_FWLOCK(ulp); 1025 } 1026 1027 /* 1028 * can't wlock or (ro)elock fs with accounting or local swap file 1029 */ 1030 if ((ULOCKFS_IS_WLOCK(ulp) || ULOCKFS_IS_ELOCK(ulp) || 1031 ULOCKFS_IS_ROELOCK(ulp)) && !from_log) { 1032 if (error = ufs_checkaccton(vp)) 1033 goto errout; 1034 if (error = ufs_checkswapon(vp)) 1035 goto errout; 1036 } 1037 1038 /* 1039 * save error lock status to pass down to reconcilation 1040 * routines and for later cleanup 1041 */ 1042 if (LOCKFS_IS_ELOCK(&lfs) && ULOCKFS_IS_ULOCK(ulp)) 1043 errlck = UN_ERRLCK; 1044 1045 if (ULOCKFS_IS_ELOCK(ulp) || ULOCKFS_IS_ROELOCK(ulp)) { 1046 int needs_unlock; 1047 int needs_sbwrite; 1048 1049 poll_events |= POLLERR; 1050 errlck = LOCKFS_IS_ELOCK(&lfs) || LOCKFS_IS_ROELOCK(&lfs)? 1051 RE_ERRLCK: SET_ERRLCK; 1052 1053 needs_unlock = !MUTEX_HELD(&ufsvfsp->vfs_lock); 1054 if (needs_unlock) 1055 mutex_enter(&ufsvfsp->vfs_lock); 1056 1057 /* disable delayed i/o */ 1058 needs_sbwrite = 0; 1059 1060 if (errlck == SET_ERRLCK) { 1061 ufsvfsp->vfs_fs->fs_clean = FSBAD; 1062 needs_sbwrite = 1; 1063 } 1064 1065 needs_sbwrite |= ufsvfsp->vfs_dio; 1066 ufsvfsp->vfs_dio = 0; 1067 1068 if (needs_unlock) 1069 mutex_exit(&ufsvfsp->vfs_lock); 1070 1071 if (needs_sbwrite) { 1072 ulp->ul_sbowner = curthread; 1073 TRANS_SBWRITE(ufsvfsp, TOP_SBWRITE_STABLE); 1074 1075 if (needs_unlock) 1076 mutex_enter(&ufsvfsp->vfs_lock); 1077 1078 ufsvfsp->vfs_fs->fs_fmod = 0; 1079 1080 if (needs_unlock) 1081 mutex_exit(&ufsvfsp->vfs_lock); 1082 } 1083 } 1084 1085 /* 1086 * reconcile superblock and inodes if was wlocked 1087 */ 1088 if (LOCKFS_IS_WLOCK(&lfs) || LOCKFS_IS_ELOCK(&lfs)) { 1089 if (error = ufs_reconcile(vfsp, ufsvfsp, errlck)) 1090 goto errout; 1091 /* 1092 * in case the fs grew; reset the metadata map for logging tests 1093 */ 1094 TRANS_MATA_UMOUNT(ufsvfsp); 1095 TRANS_MATA_MOUNT(ufsvfsp); 1096 TRANS_MATA_SI(ufsvfsp, ufsvfsp->vfs_fs); 1097 } 1098 1099 /* 1100 * At least everything *currently* dirty goes out. 1101 */ 1102 1103 if ((error = ufs_flush(vfsp)) != 0 && !ULOCKFS_IS_HLOCK(ulp) && 1104 !ULOCKFS_IS_ELOCK(ulp)) 1105 goto errout; 1106 1107 /* 1108 * thaw file system and wakeup pended processes 1109 */ 1110 if (error = ufs_thaw(vfsp, ufsvfsp, ulp)) 1111 if (!ULOCKFS_IS_HLOCK(ulp) && !ULOCKFS_IS_ELOCK(ulp)) 1112 goto errout; 1113 1114 /* 1115 * reset modified flag if not already write locked 1116 */ 1117 if (!LOCKFS_IS_WLOCK(&lfs)) 1118 ULOCKFS_CLR_MOD(ulp); 1119 1120 /* 1121 * idle the lock struct 1122 */ 1123 ULOCKFS_CLR_BUSY(ulp); 1124 /* update the ioctl copy */ 1125 LOCKFS_CLR_BUSY(&ulp->ul_lockfs); 1126 1127 /* 1128 * free current comment 1129 */ 1130 if (lfs.lf_comment && lfs.lf_comlen != 0) { 1131 kmem_free(lfs.lf_comment, lfs.lf_comlen); 1132 lfs.lf_comment = NULL; 1133 lfs.lf_comlen = 0; 1134 } 1135 1136 /* do error lock cleanup */ 1137 if (errlck == UN_ERRLCK) 1138 ufsfx_unlockfs(ufsvfsp); 1139 1140 else if (errlck == RE_ERRLCK) 1141 ufsfx_lockfs(ufsvfsp); 1142 1143 /* don't allow error lock from user to invoke panic */ 1144 else if (from_user && errlck == SET_ERRLCK && 1145 !(ufsvfsp->vfs_fsfx.fx_flags & (UFSMNT_ONERROR_PANIC >> 4))) 1146 (void) ufs_fault(ufsvfsp->vfs_root, 1147 ulp->ul_lockfs.lf_comment && ulp->ul_lockfs.lf_comlen > 0 ? 1148 ulp->ul_lockfs.lf_comment: "user-applied error lock"); 1149 1150 atomic_add_long(&ufs_quiesce_pend, -1); 1151 mutex_exit(&ulp->ul_lock); 1152 vfs_unlock(vfsp); 1153 1154 if (ULOCKFS_IS_HLOCK(&ufsvfsp->vfs_ulockfs)) 1155 poll_events |= POLLERR; 1156 1157 pollwakeup(&ufs_pollhd, poll_events); 1158 1159 /* 1160 * Allow both the delete thread and the reclaim thread to 1161 * continue. 1162 */ 1163 ufs_thread_continue(&ufsvfsp->vfs_delete); 1164 ufs_thread_continue(&ufsvfsp->vfs_reclaim); 1165 1166 return (0); 1167 1168 errout: 1169 /* 1170 * Lock failed. Reset the old lock in ufsvfs if not hard locked. 1171 */ 1172 if (!LOCKFS_IS_HLOCK(&ulp->ul_lockfs)) { 1173 bcopy(&lfs, &ulp->ul_lockfs, sizeof (struct lockfs)); 1174 ulp->ul_fs_lock = (1 << lfs.lf_lock); 1175 } 1176 1177 /* 1178 * Don't call ufs_thaw() when there's a signal during 1179 * ufs quiesce operation as it can lead to deadlock 1180 * with getpage. 1181 */ 1182 if (signal == 0) 1183 (void) ufs_thaw(vfsp, ufsvfsp, ulp); 1184 1185 ULOCKFS_CLR_BUSY(ulp); 1186 LOCKFS_CLR_BUSY(&ulp->ul_lockfs); 1187 1188 errexit: 1189 atomic_add_long(&ufs_quiesce_pend, -1); 1190 mutex_exit(&ulp->ul_lock); 1191 vfs_unlock(vfsp); 1192 1193 /* 1194 * Allow both the delete thread and the reclaim thread to 1195 * continue. 1196 */ 1197 ufs_thread_continue(&ufsvfsp->vfs_delete); 1198 ufs_thread_continue(&ufsvfsp->vfs_reclaim); 1199 1200 return (error); 1201 } 1202 1203 /* 1204 * fiolfss 1205 * return the current file system locking state info 1206 */ 1207 int 1208 ufs_fiolfss(struct vnode *vp, struct lockfs *lockfsp) 1209 { 1210 struct ulockfs *ulp; 1211 1212 if (!vp || !vp->v_vfsp || !VTOI(vp)) 1213 return (EINVAL); 1214 1215 /* file system has been forcibly unmounted */ 1216 if (VTOI(vp)->i_ufsvfs == NULL) 1217 return (EIO); 1218 1219 ulp = VTOUL(vp); 1220 1221 if (ULOCKFS_IS_HLOCK(ulp)) { 1222 *lockfsp = ulp->ul_lockfs; /* structure assignment */ 1223 return (0); 1224 } 1225 1226 mutex_enter(&ulp->ul_lock); 1227 1228 *lockfsp = ulp->ul_lockfs; /* structure assignment */ 1229 1230 if (ULOCKFS_IS_MOD(ulp)) 1231 lockfsp->lf_flags |= LOCKFS_MOD; 1232 1233 mutex_exit(&ulp->ul_lock); 1234 1235 return (0); 1236 } 1237 1238 /* 1239 * ufs_check_lockfs 1240 * check whether a ufs_vnops conflicts with the file system lock 1241 */ 1242 int 1243 ufs_check_lockfs(struct ufsvfs *ufsvfsp, struct ulockfs *ulp, ulong_t mask) 1244 { 1245 k_sigset_t smask; 1246 int sig, slock; 1247 1248 ASSERT(MUTEX_HELD(&ulp->ul_lock)); 1249 1250 while (ulp->ul_fs_lock & mask) { 1251 slock = (int)ULOCKFS_IS_SLOCK(ulp); 1252 if ((curthread->t_flag & T_DONTPEND) && !slock) { 1253 curthread->t_flag |= T_WOULDBLOCK; 1254 return (EAGAIN); 1255 } 1256 curthread->t_flag &= ~T_WOULDBLOCK; 1257 1258 /* 1259 * In the case of an onerr umount of the fs, threads could 1260 * have blocked before coming into ufs_check_lockfs and 1261 * need to check for the special case of ELOCK and 1262 * vfs_dontblock being set which would indicate that the fs 1263 * is on its way out and will not return therefore making 1264 * EIO the appropriate response. 1265 */ 1266 if (ULOCKFS_IS_HLOCK(ulp) || 1267 (ULOCKFS_IS_ELOCK(ulp) && ufsvfsp->vfs_dontblock)) 1268 return (EIO); 1269 1270 /* 1271 * wait for lock status to change 1272 */ 1273 if (slock || ufsvfsp->vfs_nointr) { 1274 cv_wait(&ulp->ul_cv, &ulp->ul_lock); 1275 } else { 1276 sigintr(&smask, 1); 1277 sig = cv_wait_sig(&ulp->ul_cv, &ulp->ul_lock); 1278 sigunintr(&smask); 1279 if ((!sig && (ulp->ul_fs_lock & mask)) || 1280 ufsvfsp->vfs_dontblock) 1281 return (EINTR); 1282 } 1283 } 1284 1285 if (mask & ULOCKFS_FWLOCK) { 1286 atomic_add_long(&ulp->ul_falloc_cnt, 1); 1287 ULOCKFS_SET_FALLOC(ulp); 1288 } else { 1289 atomic_add_long(&ulp->ul_vnops_cnt, 1); 1290 } 1291 1292 return (0); 1293 } 1294 1295 /* 1296 * Check whether we came across the handcrafted lockfs protocol path. We can't 1297 * simply check for T_DONTBLOCK here as one would assume since this can also 1298 * falsely catch recursive VOP's going to a different filesystem, instead we 1299 * check if we already hold the ulockfs->ul_lock mutex. 1300 */ 1301 static int 1302 ufs_lockfs_is_under_rawlockfs(struct ulockfs *ulp) 1303 { 1304 return ((mutex_owner(&ulp->ul_lock) != curthread) ? 0 : 1); 1305 } 1306 1307 /* 1308 * ufs_lockfs_begin - start the lockfs locking protocol 1309 */ 1310 int 1311 ufs_lockfs_begin(struct ufsvfs *ufsvfsp, struct ulockfs **ulpp, ulong_t mask) 1312 { 1313 int error; 1314 int rec_vop; 1315 struct ulockfs *ulp; 1316 ulockfs_info_t *ulockfs_info; 1317 ulockfs_info_t *ulockfs_info_free; 1318 ulockfs_info_t *ulockfs_info_temp; 1319 1320 /* 1321 * file system has been forcibly unmounted 1322 */ 1323 if (ufsvfsp == NULL) 1324 return (EIO); 1325 1326 *ulpp = ulp = &ufsvfsp->vfs_ulockfs; 1327 1328 /* 1329 * Do lockfs protocol 1330 */ 1331 ulockfs_info = (ulockfs_info_t *)tsd_get(ufs_lockfs_key); 1332 IS_REC_VOP(rec_vop, ulockfs_info, ulp, ulockfs_info_free); 1333 1334 /* 1335 * Detect recursive VOP call or handcrafted internal lockfs protocol 1336 * path and bail out in that case. 1337 */ 1338 if (rec_vop || ufs_lockfs_is_under_rawlockfs(ulp)) { 1339 *ulpp = NULL; 1340 return (0); 1341 } else { 1342 if (ulockfs_info_free == NULL) { 1343 if ((ulockfs_info_temp = (ulockfs_info_t *) 1344 kmem_zalloc(sizeof (ulockfs_info_t), 1345 KM_NOSLEEP)) == NULL) { 1346 *ulpp = NULL; 1347 return (ENOMEM); 1348 } 1349 } 1350 } 1351 1352 /* 1353 * First time VOP call 1354 */ 1355 mutex_enter(&ulp->ul_lock); 1356 if (ULOCKFS_IS_JUSTULOCK(ulp)) { 1357 if (mask & ULOCKFS_FWLOCK) { 1358 atomic_add_long(&ulp->ul_falloc_cnt, 1); 1359 ULOCKFS_SET_FALLOC(ulp); 1360 } else { 1361 atomic_add_long(&ulp->ul_vnops_cnt, 1); 1362 } 1363 } else { 1364 if (error = ufs_check_lockfs(ufsvfsp, ulp, mask)) { 1365 mutex_exit(&ulp->ul_lock); 1366 if (ulockfs_info_free == NULL) 1367 kmem_free(ulockfs_info_temp, 1368 sizeof (ulockfs_info_t)); 1369 return (error); 1370 } 1371 } 1372 mutex_exit(&ulp->ul_lock); 1373 1374 if (ulockfs_info_free != NULL) { 1375 ulockfs_info_free->ulp = ulp; 1376 if (mask & ULOCKFS_FWLOCK) 1377 ulockfs_info_free->flags |= ULOCK_INFO_FALLOCATE; 1378 } else { 1379 ulockfs_info_temp->ulp = ulp; 1380 ulockfs_info_temp->next = ulockfs_info; 1381 if (mask & ULOCKFS_FWLOCK) 1382 ulockfs_info_temp->flags |= ULOCK_INFO_FALLOCATE; 1383 ASSERT(ufs_lockfs_key != 0); 1384 (void) tsd_set(ufs_lockfs_key, (void *)ulockfs_info_temp); 1385 } 1386 1387 curthread->t_flag |= T_DONTBLOCK; 1388 return (0); 1389 } 1390 1391 /* 1392 * Check whether we are returning from the top level VOP. 1393 */ 1394 static int 1395 ufs_lockfs_top_vop_return(ulockfs_info_t *head) 1396 { 1397 ulockfs_info_t *info; 1398 int result = 1; 1399 1400 for (info = head; info != NULL; info = info->next) { 1401 if (info->ulp != NULL) { 1402 result = 0; 1403 break; 1404 } 1405 } 1406 1407 return (result); 1408 } 1409 1410 /* 1411 * ufs_lockfs_end - terminate the lockfs locking protocol 1412 */ 1413 void 1414 ufs_lockfs_end(struct ulockfs *ulp) 1415 { 1416 ulockfs_info_t *info; 1417 ulockfs_info_t *head; 1418 1419 /* 1420 * end-of-VOP protocol 1421 */ 1422 if (ulp == NULL) 1423 return; 1424 1425 head = (ulockfs_info_t *)tsd_get(ufs_lockfs_key); 1426 SEARCH_ULOCKFSP(head, ulp, info); 1427 1428 /* 1429 * If we're called from a first level VOP, we have to have a 1430 * valid ulockfs record in the TSD. 1431 */ 1432 ASSERT(info != NULL); 1433 1434 /* 1435 * Invalidate the ulockfs record. 1436 */ 1437 info->ulp = NULL; 1438 1439 if (ufs_lockfs_top_vop_return(head)) 1440 curthread->t_flag &= ~T_DONTBLOCK; 1441 1442 mutex_enter(&ulp->ul_lock); 1443 1444 /* fallocate thread */ 1445 if (ULOCKFS_IS_FALLOC(ulp) && info->flags & ULOCK_INFO_FALLOCATE) { 1446 if (!atomic_add_long_nv(&ulp->ul_falloc_cnt, -1)) 1447 ULOCKFS_CLR_FALLOC(ulp); 1448 } else { /* normal thread */ 1449 if (!atomic_add_long_nv(&ulp->ul_vnops_cnt, -1)) 1450 cv_broadcast(&ulp->ul_cv); 1451 } 1452 1453 /* Clear the thread's fallocate state */ 1454 if (info->flags & ULOCK_INFO_FALLOCATE) 1455 info->flags &= ~ULOCK_INFO_FALLOCATE; 1456 1457 if (ulp->ul_vnops_cnt == 0 && ulp->ul_falloc_cnt) 1458 cv_broadcast(&ulp->ul_cv); 1459 1460 mutex_exit(&ulp->ul_lock); 1461 } 1462 1463 /* 1464 * specialized version of ufs_lockfs_begin() called by ufs_getpage(). 1465 */ 1466 int 1467 ufs_lockfs_begin_getpage( 1468 struct ufsvfs *ufsvfsp, 1469 struct ulockfs **ulpp, 1470 struct seg *seg, 1471 int read_access, 1472 uint_t *protp) 1473 { 1474 ulong_t mask; 1475 int error; 1476 int rec_vop; 1477 struct ulockfs *ulp; 1478 ulockfs_info_t *ulockfs_info; 1479 ulockfs_info_t *ulockfs_info_free; 1480 ulockfs_info_t *ulockfs_info_temp; 1481 1482 /* 1483 * file system has been forcibly unmounted 1484 */ 1485 if (ufsvfsp == NULL) 1486 return (EIO); 1487 1488 *ulpp = ulp = &ufsvfsp->vfs_ulockfs; 1489 1490 /* 1491 * Do lockfs protocol 1492 */ 1493 ulockfs_info = (ulockfs_info_t *)tsd_get(ufs_lockfs_key); 1494 IS_REC_VOP(rec_vop, ulockfs_info, ulp, ulockfs_info_free); 1495 1496 /* 1497 * Detect recursive VOP call or handcrafted internal lockfs protocol 1498 * path and bail out in that case. 1499 */ 1500 if (rec_vop || ufs_lockfs_is_under_rawlockfs(ulp)) { 1501 *ulpp = NULL; 1502 return (0); 1503 } else { 1504 if (ulockfs_info_free == NULL) { 1505 if ((ulockfs_info_temp = (ulockfs_info_t *) 1506 kmem_zalloc(sizeof (ulockfs_info_t), 1507 KM_NOSLEEP)) == NULL) { 1508 *ulpp = NULL; 1509 return (ENOMEM); 1510 } 1511 } 1512 } 1513 1514 /* 1515 * First time VOP call 1516 */ 1517 mutex_enter(&ulp->ul_lock); 1518 if (ULOCKFS_IS_JUSTULOCK(ulp)) 1519 /* 1520 * fs is not locked, simply inc the active-ops counter 1521 */ 1522 atomic_add_long(&ulp->ul_vnops_cnt, 1); 1523 else { 1524 if (seg->s_ops == &segvn_ops && 1525 ((struct segvn_data *)seg->s_data)->type != MAP_SHARED) { 1526 mask = (ulong_t)ULOCKFS_GETREAD_MASK; 1527 } else if (protp && read_access) { 1528 /* 1529 * Restrict the mapping to readonly. 1530 * Writes to this mapping will cause 1531 * another fault which will then 1532 * be suspended if fs is write locked 1533 */ 1534 *protp &= ~PROT_WRITE; 1535 mask = (ulong_t)ULOCKFS_GETREAD_MASK; 1536 } else 1537 mask = (ulong_t)ULOCKFS_GETWRITE_MASK; 1538 1539 /* 1540 * will sleep if this fs is locked against this VOP 1541 */ 1542 if (error = ufs_check_lockfs(ufsvfsp, ulp, mask)) { 1543 mutex_exit(&ulp->ul_lock); 1544 if (ulockfs_info_free == NULL) 1545 kmem_free(ulockfs_info_temp, 1546 sizeof (ulockfs_info_t)); 1547 return (error); 1548 } 1549 } 1550 mutex_exit(&ulp->ul_lock); 1551 1552 if (ulockfs_info_free != NULL) { 1553 ulockfs_info_free->ulp = ulp; 1554 } else { 1555 ulockfs_info_temp->ulp = ulp; 1556 ulockfs_info_temp->next = ulockfs_info; 1557 ASSERT(ufs_lockfs_key != 0); 1558 (void) tsd_set(ufs_lockfs_key, (void *)ulockfs_info_temp); 1559 } 1560 1561 curthread->t_flag |= T_DONTBLOCK; 1562 return (0); 1563 } 1564 1565 void 1566 ufs_lockfs_tsd_destructor(void *head) 1567 { 1568 ulockfs_info_t *curr = (ulockfs_info_t *)head; 1569 ulockfs_info_t *temp; 1570 1571 for (; curr != NULL; ) { 1572 /* 1573 * The TSD destructor is being called when the thread exits 1574 * (via thread_exit()). At that time it must have cleaned up 1575 * all VOPs via ufs_lockfs_end() and there must not be a 1576 * valid ulockfs record exist while a thread is exiting. 1577 */ 1578 temp = curr; 1579 curr = curr->next; 1580 ASSERT(temp->ulp == NULL); 1581 kmem_free(temp, sizeof (ulockfs_info_t)); 1582 } 1583 } 1584