xref: /illumos-gate/usr/src/uts/common/fs/ufs/ufs_lockfs.c (revision 032624d56c174c5c55126582b32e314a6af15522)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/t_lock.h>
31 #include <sys/param.h>
32 #include <sys/time.h>
33 #include <sys/systm.h>
34 #include <sys/sysmacros.h>
35 #include <sys/resource.h>
36 #include <sys/signal.h>
37 #include <sys/cred.h>
38 #include <sys/user.h>
39 #include <sys/buf.h>
40 #include <sys/vfs.h>
41 #include <sys/vnode.h>
42 #include <sys/proc.h>
43 #include <sys/disp.h>
44 #include <sys/file.h>
45 #include <sys/fcntl.h>
46 #include <sys/flock.h>
47 #include <sys/atomic.h>
48 #include <sys/kmem.h>
49 #include <sys/uio.h>
50 #include <sys/conf.h>
51 #include <sys/mman.h>
52 #include <sys/pathname.h>
53 #include <sys/debug.h>
54 #include <sys/vmmeter.h>
55 #include <sys/vmsystm.h>
56 #include <sys/cmn_err.h>
57 #include <sys/vtrace.h>
58 #include <sys/acct.h>
59 #include <sys/dnlc.h>
60 #include <sys/swap.h>
61 
62 #include <sys/fs/ufs_fs.h>
63 #include <sys/fs/ufs_inode.h>
64 #include <sys/fs/ufs_fsdir.h>
65 #include <sys/fs/ufs_trans.h>
66 #include <sys/fs/ufs_panic.h>
67 #include <sys/fs/ufs_mount.h>
68 #include <sys/fs/ufs_bio.h>
69 #include <sys/fs/ufs_log.h>
70 #include <sys/fs/ufs_quota.h>
71 #include <sys/dirent.h>		/* must be AFTER <sys/fs/fsdir.h>! */
72 #include <sys/errno.h>
73 #include <sys/sysinfo.h>
74 
75 #include <vm/hat.h>
76 #include <vm/pvn.h>
77 #include <vm/as.h>
78 #include <vm/seg.h>
79 #include <vm/seg_map.h>
80 #include <vm/seg_vn.h>
81 #include <vm/rm.h>
82 #include <vm/anon.h>
83 #include <sys/swap.h>
84 #include <sys/dnlc.h>
85 
86 extern struct vnode *common_specvp(struct vnode *vp);
87 
88 /* error lock status */
89 #define	UN_ERRLCK	(-1)
90 #define	SET_ERRLCK	1
91 #define	RE_ERRLCK	2
92 #define	NO_ERRLCK	0
93 
94 /*
95  * Index to be used in TSD for storing lockfs data
96  */
97 uint_t ufs_lockfs_key;
98 
99 typedef struct _ulockfs_info {
100 	struct _ulockfs_info *next;
101 	struct ulockfs *ulp;
102 } ulockfs_info_t;
103 
104 /*
105  * Check in TSD that whether we are already doing any VOP on this filesystem
106  */
107 #define	IS_REC_VOP(found, head, ulp, free)		\
108 {							\
109 	ulockfs_info_t *_curr;				\
110 							\
111 	for (found = 0, free = NULL, _curr = head;	\
112 	    _curr != NULL; _curr = _curr->next) {	\
113 		if ((free == NULL) &&			\
114 		    (_curr->ulp == NULL))		\
115 			free = _curr;			\
116 		if (_curr->ulp == ulp) {		\
117 			found = 1;			\
118 			break;				\
119 		}					\
120 	}						\
121 }
122 
123 /*
124  * Get the lockfs data from TSD so that lockfs handles the recursive VOP
125  * properly
126  */
127 #define	SEARCH_ULOCKFSP(head, ulp, info)		\
128 {							\
129 	ulockfs_info_t *_curr;				\
130 							\
131 	for (_curr = head; _curr != NULL;		\
132 	    _curr = _curr->next) {			\
133 		if (_curr->ulp == ulp) {		\
134 			break;				\
135 		}					\
136 	}						\
137 							\
138 	info = _curr;					\
139 }
140 
141 /*
142  * Validate lockfs request
143  */
144 static int
145 ufs_getlfd(
146 	struct lockfs *lockfsp,		/* new lock request */
147 	struct lockfs *ul_lockfsp)	/* old lock state */
148 {
149 	int	error = 0;
150 
151 	/*
152 	 * no input flags defined
153 	 */
154 	if (lockfsp->lf_flags != 0) {
155 		error = EINVAL;
156 		goto errout;
157 	}
158 
159 	/*
160 	 * check key
161 	 */
162 	if (!LOCKFS_IS_ULOCK(ul_lockfsp))
163 		if (lockfsp->lf_key != ul_lockfsp->lf_key) {
164 			error = EINVAL;
165 			goto errout;
166 	}
167 
168 	lockfsp->lf_key = ul_lockfsp->lf_key + 1;
169 
170 errout:
171 	return (error);
172 }
173 
174 /*
175  * ufs_checkaccton
176  *	check if accounting is turned on on this fs
177  */
178 
179 int
180 ufs_checkaccton(struct vnode *vp)
181 {
182 	if (acct_fs_in_use(vp))
183 		return (EDEADLK);
184 	return (0);
185 }
186 
187 /*
188  * ufs_checkswapon
189  *	check if local swapping is to file on this fs
190  */
191 int
192 ufs_checkswapon(struct vnode *vp)
193 {
194 	struct swapinfo	*sip;
195 
196 	mutex_enter(&swapinfo_lock);
197 	for (sip = swapinfo; sip; sip = sip->si_next)
198 		if (sip->si_vp->v_vfsp == vp->v_vfsp) {
199 			mutex_exit(&swapinfo_lock);
200 			return (EDEADLK);
201 		}
202 	mutex_exit(&swapinfo_lock);
203 	return (0);
204 }
205 
206 /*
207  * ufs_freeze
208  *	pend future accesses for current lock and desired lock
209  */
210 void
211 ufs_freeze(struct ulockfs *ulp, struct lockfs *lockfsp)
212 {
213 	/*
214 	 * set to new lock type
215 	 */
216 	ulp->ul_lockfs.lf_lock = lockfsp->lf_lock;
217 	ulp->ul_lockfs.lf_key = lockfsp->lf_key;
218 	ulp->ul_lockfs.lf_comlen = lockfsp->lf_comlen;
219 	ulp->ul_lockfs.lf_comment = lockfsp->lf_comment;
220 
221 	ulp->ul_fs_lock = (1 << ulp->ul_lockfs.lf_lock);
222 }
223 
224 /*
225  * All callers of ufs_quiesce() atomically increment ufs_quiesce_pend before
226  * starting ufs_quiesce() protocol and decrement it only when a file system no
227  * longer has to be in quiescent state. This allows ufs_pageio() to detect
228  * that another thread wants to quiesce a file system. See more comments in
229  * ufs_pageio().
230  */
231 ulong_t ufs_quiesce_pend = 0;
232 
233 /*
234  * ufs_quiesce
235  *	wait for outstanding accesses to finish
236  */
237 int
238 ufs_quiesce(struct ulockfs *ulp)
239 {
240 	int error = 0;
241 
242 	/*
243 	 * Set a softlock to suspend future ufs_vnops so that
244 	 * this lockfs request will not be starved
245 	 */
246 	ULOCKFS_SET_SLOCK(ulp);
247 	ASSERT(ufs_quiesce_pend);
248 
249 	/* check if there is any outstanding ufs vnodeops calls */
250 	while (ulp->ul_vnops_cnt)
251 		/*
252 		 * use timed version of cv_wait_sig() to make sure we don't
253 		 * miss a wake up call from ufs_pageio() when it doesn't use
254 		 * ul_lock.
255 		 */
256 		if (!cv_timedwait_sig(&ulp->ul_cv, &ulp->ul_lock, lbolt + hz)) {
257 			error = EINTR;
258 			goto out;
259 		}
260 
261 out:
262 	/*
263 	 * unlock the soft lock
264 	 */
265 	ULOCKFS_CLR_SLOCK(ulp);
266 
267 	return (error);
268 }
269 /*
270  * ufs_flush_inode
271  */
272 int
273 ufs_flush_inode(struct inode *ip, void *arg)
274 {
275 	int	error;
276 	int	saverror	= 0;
277 
278 	/*
279 	 * wrong file system; keep looking
280 	 */
281 	if (ip->i_ufsvfs != (struct ufsvfs *)arg)
282 		return (0);
283 
284 	/*
285 	 * asynchronously push all the dirty pages
286 	 */
287 	if (((error = TRANS_SYNCIP(ip, B_ASYNC, 0, TOP_SYNCIP_FLUSHI)) != 0) &&
288 	    (error != EAGAIN))
289 		saverror = error;
290 	/*
291 	 * wait for io and discard all mappings
292 	 */
293 	if (error = TRANS_SYNCIP(ip, B_INVAL, 0, TOP_SYNCIP_FLUSHI))
294 		saverror = error;
295 
296 	if (ITOV(ip)->v_type == VDIR) {
297 		dnlc_dir_purge(&ip->i_danchor);
298 	}
299 
300 	return (saverror);
301 }
302 
303 /*
304  * ufs_flush
305  *	Flush everything that is currently dirty; this includes invalidating
306  *	any mappings.
307  */
308 int
309 ufs_flush(struct vfs *vfsp)
310 {
311 	int		error;
312 	int		saverror = 0;
313 	struct ufsvfs	*ufsvfsp	= (struct ufsvfs *)vfsp->vfs_data;
314 	struct fs	*fs		= ufsvfsp->vfs_fs;
315 
316 	ASSERT(vfs_lock_held(vfsp));
317 
318 	/*
319 	 * purge dnlc
320 	 */
321 	(void) dnlc_purge_vfsp(vfsp, 0);
322 
323 	/*
324 	 * drain the delete and idle threads
325 	 */
326 	ufs_delete_drain(vfsp, 0, 0);
327 	ufs_idle_drain(vfsp);
328 
329 	/*
330 	 * flush and invalidate quota records
331 	 */
332 	(void) qsync(ufsvfsp);
333 
334 	/*
335 	 * flush w/invalidate the inodes for vfsp
336 	 */
337 	if (error = ufs_scan_inodes(0, ufs_flush_inode, ufsvfsp, ufsvfsp))
338 		saverror = error;
339 
340 	/*
341 	 * synchronously flush superblock and summary info
342 	 */
343 	if (fs->fs_ronly == 0 && fs->fs_fmod) {
344 		fs->fs_fmod = 0;
345 		TRANS_SBUPDATE(ufsvfsp, vfsp, TOP_SBUPDATE_FLUSH);
346 	}
347 	/*
348 	 * flush w/invalidate block device pages and buf cache
349 	 */
350 	if ((error = VOP_PUTPAGE(common_specvp(ufsvfsp->vfs_devvp),
351 	    (offset_t)0, 0, B_INVAL, CRED())) > 0)
352 		saverror = error;
353 
354 	(void) bflush((dev_t)vfsp->vfs_dev);
355 	(void) bfinval((dev_t)vfsp->vfs_dev, 0);
356 
357 	/*
358 	 * drain the delete and idle threads again
359 	 */
360 	ufs_delete_drain(vfsp, 0, 0);
361 	ufs_idle_drain(vfsp);
362 
363 	/*
364 	 * play with the clean flag
365 	 */
366 	if (saverror == 0)
367 		ufs_checkclean(vfsp);
368 
369 	/*
370 	 * flush any outstanding transactions and roll the log
371 	 */
372 	if (TRANS_ISTRANS(ufsvfsp)) {
373 		curthread->t_flag |= T_DONTBLOCK;
374 		TRANS_BEGIN_SYNC(ufsvfsp, TOP_COMMIT_FLUSH, TOP_COMMIT_SIZE,
375 		    error);
376 		if (!error) {
377 			TRANS_END_SYNC(ufsvfsp, saverror, TOP_COMMIT_FLUSH,
378 			    TOP_COMMIT_SIZE);
379 		}
380 		curthread->t_flag &= ~T_DONTBLOCK;
381 
382 		logmap_roll_dev(ufsvfsp->vfs_log); /* fully roll the log */
383 	}
384 
385 	return (saverror);
386 }
387 
388 /*
389  * ufs_thaw_wlock
390  *	special processing when thawing down to wlock
391  */
392 static int
393 ufs_thaw_wlock(struct inode *ip, void *arg)
394 {
395 	/*
396 	 * wrong file system; keep looking
397 	 */
398 	if (ip->i_ufsvfs != (struct ufsvfs *)arg)
399 		return (0);
400 
401 	/*
402 	 * iupdat refuses to clear flags if the fs is read only.  The fs
403 	 * may become read/write during the lock and we wouldn't want
404 	 * these inodes being written to disk.  So clear the flags.
405 	 */
406 	rw_enter(&ip->i_contents, RW_WRITER);
407 	ip->i_flag &= ~(IMOD|IMODACC|IACC|IUPD|ICHG|IATTCHG);
408 	rw_exit(&ip->i_contents);
409 
410 	/*
411 	 * pages are mlocked -- fail wlock
412 	 */
413 	if (ITOV(ip)->v_type != VCHR && vn_has_cached_data(ITOV(ip)))
414 		return (EBUSY);
415 
416 	return (0);
417 }
418 
419 /*
420  * ufs_thaw_hlock
421  *	special processing when thawing down to hlock or elock
422  */
423 static int
424 ufs_thaw_hlock(struct inode *ip, void *arg)
425 {
426 	struct vnode	*vp	= ITOV(ip);
427 
428 	/*
429 	 * wrong file system; keep looking
430 	 */
431 	if (ip->i_ufsvfs != (struct ufsvfs *)arg)
432 		return (0);
433 
434 	/*
435 	 * blow away all pages - even if they are mlocked
436 	 */
437 	do {
438 		(void) TRANS_SYNCIP(ip, B_INVAL | B_FORCE, 0, TOP_SYNCIP_HLOCK);
439 	} while ((vp->v_type != VCHR) && vn_has_cached_data(vp));
440 	rw_enter(&ip->i_contents, RW_WRITER);
441 	ip->i_flag &= ~(IMOD|IMODACC|IACC|IUPD|ICHG|IATTCHG);
442 	rw_exit(&ip->i_contents);
443 
444 	return (0);
445 }
446 
447 /*
448  * ufs_thaw
449  *	thaw file system lock down to current value
450  */
451 int
452 ufs_thaw(struct vfs *vfsp, struct ufsvfs *ufsvfsp, struct ulockfs *ulp)
453 {
454 	int		error	= 0;
455 	int		noidel	= (int)(ulp->ul_flag & ULOCKFS_NOIDEL);
456 
457 	/*
458 	 * if wlock or hlock or elock
459 	 */
460 	if (ULOCKFS_IS_WLOCK(ulp) || ULOCKFS_IS_HLOCK(ulp) ||
461 	    ULOCKFS_IS_ELOCK(ulp)) {
462 
463 		/*
464 		 * don't keep access times
465 		 * don't free deleted files
466 		 * if superblock writes are allowed, limit them to me for now
467 		 */
468 		ulp->ul_flag |= (ULOCKFS_NOIACC|ULOCKFS_NOIDEL);
469 		if (ulp->ul_sbowner != (kthread_id_t)-1)
470 			ulp->ul_sbowner = curthread;
471 
472 		/*
473 		 * wait for writes for deleted files and superblock updates
474 		 */
475 		(void) ufs_flush(vfsp);
476 
477 		/*
478 		 * now make sure the quota file is up-to-date
479 		 *	expensive; but effective
480 		 */
481 		error = ufs_flush(vfsp);
482 		/*
483 		 * no one can write the superblock
484 		 */
485 		ulp->ul_sbowner = (kthread_id_t)-1;
486 
487 		/*
488 		 * special processing for wlock/hlock/elock
489 		 */
490 		if (ULOCKFS_IS_WLOCK(ulp)) {
491 			if (error)
492 				goto errout;
493 			error = bfinval(ufsvfsp->vfs_dev, 0);
494 			if (error)
495 				goto errout;
496 			error = ufs_scan_inodes(0, ufs_thaw_wlock,
497 					(void *)ufsvfsp, ufsvfsp);
498 			if (error)
499 				goto errout;
500 		}
501 		if (ULOCKFS_IS_HLOCK(ulp) || ULOCKFS_IS_ELOCK(ulp)) {
502 			error = 0;
503 			(void) ufs_scan_inodes(0, ufs_thaw_hlock,
504 					(void *)ufsvfsp, ufsvfsp);
505 			(void) bfinval(ufsvfsp->vfs_dev, 1);
506 		}
507 	} else {
508 
509 		/*
510 		 * okay to keep access times
511 		 * okay to free deleted files
512 		 * okay to write the superblock
513 		 */
514 		ulp->ul_flag &= ~(ULOCKFS_NOIACC|ULOCKFS_NOIDEL);
515 		ulp->ul_sbowner = NULL;
516 
517 		/*
518 		 * flush in case deleted files are in memory
519 		 */
520 		if (noidel) {
521 			if (error = ufs_flush(vfsp))
522 				goto errout;
523 		}
524 	}
525 
526 errout:
527 	cv_broadcast(&ulp->ul_cv);
528 	return (error);
529 }
530 
531 /*
532  * ufs_reconcile_fs
533  *	reconcile incore superblock with ondisk superblock
534  */
535 int
536 ufs_reconcile_fs(struct vfs *vfsp, struct ufsvfs *ufsvfsp, int errlck)
537 {
538 	struct fs	*mfs; 	/* in-memory superblock */
539 	struct fs	*dfs;	/* on-disk   superblock */
540 	struct buf	*bp;	/* on-disk   superblock buf */
541 	int		 needs_unlock;
542 	char		 finished_fsclean;
543 
544 	mfs = ufsvfsp->vfs_fs;
545 
546 	/*
547 	 * get the on-disk copy of the superblock
548 	 */
549 	bp = UFS_BREAD(ufsvfsp, vfsp->vfs_dev, SBLOCK, SBSIZE);
550 	bp->b_flags |= (B_STALE|B_AGE);
551 	if (bp->b_flags & B_ERROR) {
552 		brelse(bp);
553 		return (EIO);
554 	}
555 	dfs = bp->b_un.b_fs;
556 
557 	/* error locks may only unlock after the fs has been made consistent */
558 	if (errlck == UN_ERRLCK) {
559 		if (dfs->fs_clean == FSFIX) {	/* being repaired */
560 			brelse(bp);
561 			return (EAGAIN);
562 		}
563 		/* repair not yet started? */
564 		finished_fsclean = TRANS_ISTRANS(ufsvfsp)? FSLOG: FSCLEAN;
565 		if (dfs->fs_clean != finished_fsclean) {
566 			brelse(bp);
567 			return (EBUSY);
568 		}
569 	}
570 
571 	/*
572 	 * if superblock has changed too much, abort
573 	 */
574 	if ((mfs->fs_sblkno		!= dfs->fs_sblkno) ||
575 	    (mfs->fs_cblkno		!= dfs->fs_cblkno) ||
576 	    (mfs->fs_iblkno		!= dfs->fs_iblkno) ||
577 	    (mfs->fs_dblkno		!= dfs->fs_dblkno) ||
578 	    (mfs->fs_cgoffset		!= dfs->fs_cgoffset) ||
579 	    (mfs->fs_cgmask		!= dfs->fs_cgmask) ||
580 	    (mfs->fs_bsize		!= dfs->fs_bsize) ||
581 	    (mfs->fs_fsize		!= dfs->fs_fsize) ||
582 	    (mfs->fs_frag		!= dfs->fs_frag) ||
583 	    (mfs->fs_bmask		!= dfs->fs_bmask) ||
584 	    (mfs->fs_fmask		!= dfs->fs_fmask) ||
585 	    (mfs->fs_bshift		!= dfs->fs_bshift) ||
586 	    (mfs->fs_fshift		!= dfs->fs_fshift) ||
587 	    (mfs->fs_fragshift		!= dfs->fs_fragshift) ||
588 	    (mfs->fs_fsbtodb		!= dfs->fs_fsbtodb) ||
589 	    (mfs->fs_sbsize		!= dfs->fs_sbsize) ||
590 	    (mfs->fs_nindir		!= dfs->fs_nindir) ||
591 	    (mfs->fs_nspf		!= dfs->fs_nspf) ||
592 	    (mfs->fs_trackskew		!= dfs->fs_trackskew) ||
593 	    (mfs->fs_cgsize		!= dfs->fs_cgsize) ||
594 	    (mfs->fs_ntrak		!= dfs->fs_ntrak) ||
595 	    (mfs->fs_nsect		!= dfs->fs_nsect) ||
596 	    (mfs->fs_spc		!= dfs->fs_spc) ||
597 	    (mfs->fs_cpg		!= dfs->fs_cpg) ||
598 	    (mfs->fs_ipg		!= dfs->fs_ipg) ||
599 	    (mfs->fs_fpg		!= dfs->fs_fpg) ||
600 	    (mfs->fs_postblformat	!= dfs->fs_postblformat) ||
601 	    (mfs->fs_magic		!= dfs->fs_magic)) {
602 		brelse(bp);
603 		return (EACCES);
604 	}
605 	if (dfs->fs_clean == FSBAD || FSOKAY != dfs->fs_state + dfs->fs_time)
606 		if (mfs->fs_clean == FSLOG) {
607 			brelse(bp);
608 			return (EACCES);
609 		}
610 
611 	/*
612 	 * get new summary info
613 	 */
614 	if (ufs_getsummaryinfo(vfsp->vfs_dev, ufsvfsp, dfs)) {
615 		brelse(bp);
616 		return (EIO);
617 	}
618 
619 	/*
620 	 * release old summary info and update in-memory superblock
621 	 */
622 	kmem_free(mfs->fs_u.fs_csp, mfs->fs_cssize);
623 	mfs->fs_u.fs_csp = dfs->fs_u.fs_csp;	/* Only entry 0 used */
624 
625 	/*
626 	 * update fields allowed to change
627 	 */
628 	mfs->fs_size		= dfs->fs_size;
629 	mfs->fs_dsize		= dfs->fs_dsize;
630 	mfs->fs_ncg		= dfs->fs_ncg;
631 	mfs->fs_minfree		= dfs->fs_minfree;
632 	mfs->fs_rotdelay	= dfs->fs_rotdelay;
633 	mfs->fs_rps		= dfs->fs_rps;
634 	mfs->fs_maxcontig	= dfs->fs_maxcontig;
635 	mfs->fs_maxbpg		= dfs->fs_maxbpg;
636 	mfs->fs_csmask		= dfs->fs_csmask;
637 	mfs->fs_csshift		= dfs->fs_csshift;
638 	mfs->fs_optim		= dfs->fs_optim;
639 	mfs->fs_csaddr		= dfs->fs_csaddr;
640 	mfs->fs_cssize		= dfs->fs_cssize;
641 	mfs->fs_ncyl		= dfs->fs_ncyl;
642 	mfs->fs_cstotal		= dfs->fs_cstotal;
643 	mfs->fs_reclaim		= dfs->fs_reclaim;
644 
645 	if (mfs->fs_reclaim & (FS_RECLAIM|FS_RECLAIMING)) {
646 		mfs->fs_reclaim &= ~FS_RECLAIM;
647 		mfs->fs_reclaim |=  FS_RECLAIMING;
648 		ufs_thread_start(&ufsvfsp->vfs_reclaim,
649 			ufs_thread_reclaim, vfsp);
650 	}
651 
652 	/* XXX What to do about sparecon? */
653 
654 	/* XXX need to copy volume label */
655 
656 	/*
657 	 * ondisk clean flag overrides inmemory clean flag iff == FSBAD
658 	 * or if error-locked and ondisk is now clean
659 	 */
660 	needs_unlock = !MUTEX_HELD(&ufsvfsp->vfs_lock);
661 	if (needs_unlock)
662 		mutex_enter(&ufsvfsp->vfs_lock);
663 
664 	if (errlck == UN_ERRLCK) {
665 		if (finished_fsclean == dfs->fs_clean)
666 			mfs->fs_clean = finished_fsclean;
667 		else
668 			mfs->fs_clean = FSBAD;
669 		mfs->fs_state = FSOKAY - dfs->fs_time;
670 	}
671 
672 	if (FSOKAY != dfs->fs_state + dfs->fs_time ||
673 	    (dfs->fs_clean == FSBAD))
674 		mfs->fs_clean = FSBAD;
675 
676 	if (needs_unlock)
677 		mutex_exit(&ufsvfsp->vfs_lock);
678 
679 	brelse(bp);
680 
681 	return (0);
682 }
683 
684 /*
685  * ufs_reconcile_inode
686  *	reconcile ondisk inode with incore inode
687  */
688 static int
689 ufs_reconcile_inode(struct inode *ip, void *arg)
690 {
691 	int		i;
692 	int		ndaddr;
693 	int		niaddr;
694 	struct dinode	*dp;		/* ondisk inode */
695 	struct buf	*bp	= NULL;
696 	uid_t		d_uid;
697 	gid_t		d_gid;
698 	int		error = 0;
699 	struct fs	*fs;
700 
701 	/*
702 	 * not an inode we care about
703 	 */
704 	if (ip->i_ufsvfs != (struct ufsvfs *)arg)
705 		return (0);
706 
707 	fs = ip->i_fs;
708 
709 	/*
710 	 * Inode reconciliation fails: we made the filesystem quiescent
711 	 * and we did a ufs_flush() before calling ufs_reconcile_inode()
712 	 * and thus the inode should not have been changed inbetween.
713 	 * Any discrepancies indicate a logic error and a pretty
714 	 * significant run-state inconsistency we should complain about.
715 	 */
716 	if (ip->i_flag & (IMOD|IMODACC|IACC|IUPD|ICHG|IATTCHG)) {
717 		cmn_err(CE_WARN, "%s: Inode reconciliation failed for"
718 		    "inode %llu", fs->fs_fsmnt, (u_longlong_t)ip->i_number);
719 		return (EINVAL);
720 	}
721 
722 	/*
723 	 * get the dinode
724 	 */
725 	bp = UFS_BREAD(ip->i_ufsvfs,
726 			ip->i_dev, (daddr_t)fsbtodb(fs, itod(fs, ip->i_number)),
727 	    (int)fs->fs_bsize);
728 	if (bp->b_flags & B_ERROR) {
729 		brelse(bp);
730 		return (EIO);
731 	}
732 	dp  = bp->b_un.b_dino;
733 	dp += itoo(fs, ip->i_number);
734 
735 	/*
736 	 * handle Sun's implementation of EFT
737 	 */
738 	d_uid = (dp->di_suid == UID_LONG) ? dp->di_uid : (uid_t)dp->di_suid;
739 	d_gid = (dp->di_sgid == GID_LONG) ? dp->di_gid : (uid_t)dp->di_sgid;
740 
741 	rw_enter(&ip->i_contents, RW_WRITER);
742 
743 	/*
744 	 * some fields are not allowed to change
745 	 */
746 	if ((ip->i_mode  != dp->di_mode) ||
747 	    (ip->i_gen   != dp->di_gen) ||
748 	    (ip->i_uid   != d_uid) ||
749 	    (ip->i_gid   != d_gid)) {
750 		error = EACCES;
751 		goto out;
752 	}
753 
754 	/*
755 	 * and some are allowed to change
756 	 */
757 	ip->i_size		= dp->di_size;
758 	ip->i_ic.ic_flags	= dp->di_ic.ic_flags;
759 	ip->i_blocks		= dp->di_blocks;
760 	ip->i_nlink		= dp->di_nlink;
761 	if (ip->i_flag & IFASTSYMLNK) {
762 		ndaddr = 1;
763 		niaddr = 0;
764 	} else {
765 		ndaddr = NDADDR;
766 		niaddr = NIADDR;
767 	}
768 	for (i = 0; i < ndaddr; ++i)
769 		ip->i_db[i] = dp->di_db[i];
770 	for (i = 0; i < niaddr; ++i)
771 		ip->i_ib[i] = dp->di_ib[i];
772 
773 out:
774 	rw_exit(&ip->i_contents);
775 	brelse(bp);
776 	return (error);
777 }
778 
779 /*
780  * ufs_reconcile
781  *	reconcile ondisk superblock/inodes with any incore
782  */
783 static int
784 ufs_reconcile(struct vfs *vfsp, struct ufsvfs *ufsvfsp, int errlck)
785 {
786 	int	error = 0;
787 
788 	/*
789 	 * get rid of as much inmemory data as possible
790 	 */
791 	(void) ufs_flush(vfsp);
792 
793 	/*
794 	 * reconcile the superblock and inodes
795 	 */
796 	if (error = ufs_reconcile_fs(vfsp, ufsvfsp, errlck))
797 		return (error);
798 	if (error = ufs_scan_inodes(0, ufs_reconcile_inode, ufsvfsp, ufsvfsp))
799 		return (error);
800 	/*
801 	 * allocation blocks may be incorrect; get rid of them
802 	 */
803 	(void) ufs_flush(vfsp);
804 
805 	return (error);
806 }
807 
808 /*
809  * File system locking
810  */
811 int
812 ufs_fiolfs(struct vnode *vp, struct lockfs *lockfsp, int from_log)
813 {
814 	return (ufs__fiolfs(vp, lockfsp, /* from_user */ 1, from_log));
815 }
816 
817 /* kernel-internal interface, also used by fix-on-panic */
818 int
819 ufs__fiolfs(
820 	struct vnode *vp,
821 	struct lockfs *lockfsp,
822 	int from_user,
823 	int from_log)
824 {
825 	struct ulockfs	*ulp;
826 	struct lockfs	lfs;
827 	int		error;
828 	struct vfs	*vfsp;
829 	struct ufsvfs	*ufsvfsp;
830 	int		 errlck		= NO_ERRLCK;
831 	int		 poll_events	= POLLPRI;
832 	extern struct pollhead ufs_pollhd;
833 
834 	/* check valid lock type */
835 	if (!lockfsp || lockfsp->lf_lock > LOCKFS_MAXLOCK)
836 		return (EINVAL);
837 
838 	if (!vp || !vp->v_vfsp || !vp->v_vfsp->vfs_data)
839 		return (EIO);
840 
841 	vfsp = vp->v_vfsp;
842 	ufsvfsp = (struct ufsvfs *)vfsp->vfs_data;
843 	ulp = &ufsvfsp->vfs_ulockfs;
844 
845 	/*
846 	 * Suspend both the reclaim thread and the delete thread.
847 	 * This must be done outside the lockfs locking protocol.
848 	 */
849 	ufs_thread_suspend(&ufsvfsp->vfs_reclaim);
850 	ufs_thread_suspend(&ufsvfsp->vfs_delete);
851 
852 	/*
853 	 * Acquire vfs_reflock around ul_lock to avoid deadlock with
854 	 * umount/remount/sync.
855 	 */
856 	vfs_lock_wait(vfsp);
857 	mutex_enter(&ulp->ul_lock);
858 	atomic_add_long(&ufs_quiesce_pend, 1);
859 
860 	/*
861 	 * Quit if there is another lockfs request in progress
862 	 * that is waiting for existing ufs_vnops to complete.
863 	 */
864 	if (ULOCKFS_IS_BUSY(ulp)) {
865 		error = EBUSY;
866 		goto errexit;
867 	}
868 
869 	/* cannot ulocked or downgrade a hard-lock */
870 	if (ULOCKFS_IS_HLOCK(ulp)) {
871 		error = EIO;
872 		goto errexit;
873 	}
874 
875 	/* an error lock may be unlocked or relocked, only */
876 	if (ULOCKFS_IS_ELOCK(ulp)) {
877 		if (!LOCKFS_IS_ULOCK(lockfsp) && !LOCKFS_IS_ELOCK(lockfsp)) {
878 			error = EBUSY;
879 			goto errexit;
880 		}
881 	}
882 
883 	/*
884 	 * a read-only error lock may only be upgraded to an
885 	 * error lock or hard lock
886 	 */
887 	if (ULOCKFS_IS_ROELOCK(ulp)) {
888 		if (!LOCKFS_IS_HLOCK(lockfsp) && !LOCKFS_IS_ELOCK(lockfsp)) {
889 			error = EBUSY;
890 			goto errexit;
891 		}
892 	}
893 
894 	/*
895 	 * until read-only error locks are fully implemented
896 	 * just return EINVAL
897 	 */
898 	if (LOCKFS_IS_ROELOCK(lockfsp)) {
899 		error = EINVAL;
900 		goto errexit;
901 	}
902 
903 	/*
904 	 * an error lock may only be applied if the file system is
905 	 * unlocked or already error locked.
906 	 * (this is to prevent the case where a fs gets changed out from
907 	 * underneath a fs that is locked for backup,
908 	 * that is, name/delete/write-locked.)
909 	 */
910 	if ((!ULOCKFS_IS_ULOCK(ulp) && !ULOCKFS_IS_ELOCK(ulp) &&
911 	    !ULOCKFS_IS_ROELOCK(ulp)) &&
912 	    (LOCKFS_IS_ELOCK(lockfsp) || LOCKFS_IS_ROELOCK(lockfsp))) {
913 		error = EBUSY;
914 		goto errexit;
915 	}
916 
917 	/* get and validate the input lockfs request */
918 	if (error = ufs_getlfd(lockfsp, &ulp->ul_lockfs))
919 		goto errexit;
920 
921 	/*
922 	 * save current ulockfs struct
923 	 */
924 	bcopy(&ulp->ul_lockfs, &lfs, sizeof (struct lockfs));
925 
926 	/*
927 	 * Freeze the file system (pend future accesses)
928 	 */
929 	ufs_freeze(ulp, lockfsp);
930 
931 	/*
932 	 * Set locking in progress because ufs_quiesce may free the
933 	 * ul_lock mutex.
934 	 */
935 	ULOCKFS_SET_BUSY(ulp);
936 	/* update the ioctl copy */
937 	LOCKFS_SET_BUSY(&ulp->ul_lockfs);
938 
939 	/*
940 	 * Quiesce (wait for outstanding accesses to finish)
941 	 */
942 	if (error = ufs_quiesce(ulp))
943 		goto errout;
944 
945 	/*
946 	 * can't wlock or (ro)elock fs with accounting or local swap file
947 	 */
948 	if ((ULOCKFS_IS_WLOCK(ulp) || ULOCKFS_IS_ELOCK(ulp) ||
949 	    ULOCKFS_IS_ROELOCK(ulp)) && !from_log) {
950 		if (error = ufs_checkaccton(vp))
951 			goto errout;
952 		if (error = ufs_checkswapon(vp))
953 			goto errout;
954 	}
955 
956 	/*
957 	 * save error lock status to pass down to reconcilation
958 	 * routines and for later cleanup
959 	 */
960 	if (LOCKFS_IS_ELOCK(&lfs) && ULOCKFS_IS_ULOCK(ulp))
961 		errlck = UN_ERRLCK;
962 
963 	if (ULOCKFS_IS_ELOCK(ulp) || ULOCKFS_IS_ROELOCK(ulp)) {
964 		int needs_unlock;
965 		int needs_sbwrite;
966 
967 		poll_events |= POLLERR;
968 		errlck = LOCKFS_IS_ELOCK(&lfs) || LOCKFS_IS_ROELOCK(&lfs)?
969 							RE_ERRLCK: SET_ERRLCK;
970 
971 		needs_unlock = !MUTEX_HELD(&ufsvfsp->vfs_lock);
972 		if (needs_unlock)
973 			mutex_enter(&ufsvfsp->vfs_lock);
974 
975 		/* disable delayed i/o */
976 		needs_sbwrite = 0;
977 
978 		if (errlck == SET_ERRLCK) {
979 			ufsvfsp->vfs_fs->fs_clean = FSBAD;
980 			needs_sbwrite = 1;
981 		}
982 
983 		needs_sbwrite |= ufsvfsp->vfs_dio;
984 		ufsvfsp->vfs_dio = 0;
985 
986 		if (needs_unlock)
987 			mutex_exit(&ufsvfsp->vfs_lock);
988 
989 		if (needs_sbwrite) {
990 			ulp->ul_sbowner = curthread;
991 			TRANS_SBWRITE(ufsvfsp, TOP_SBWRITE_STABLE);
992 
993 			if (needs_unlock)
994 				mutex_enter(&ufsvfsp->vfs_lock);
995 
996 			ufsvfsp->vfs_fs->fs_fmod = 0;
997 
998 			if (needs_unlock)
999 				mutex_exit(&ufsvfsp->vfs_lock);
1000 		}
1001 	}
1002 
1003 	/*
1004 	 * reconcile superblock and inodes if was wlocked
1005 	 */
1006 	if (LOCKFS_IS_WLOCK(&lfs) || LOCKFS_IS_ELOCK(&lfs)) {
1007 		if (error = ufs_reconcile(vfsp, ufsvfsp, errlck))
1008 			goto errout;
1009 		/*
1010 		 * in case the fs grew; reset the metadata map for logging tests
1011 		 */
1012 		TRANS_MATA_UMOUNT(ufsvfsp);
1013 		TRANS_MATA_MOUNT(ufsvfsp);
1014 		TRANS_MATA_SI(ufsvfsp, ufsvfsp->vfs_fs);
1015 	}
1016 
1017 	/*
1018 	 * At least everything *currently* dirty goes out.
1019 	 */
1020 
1021 	if ((error = ufs_flush(vfsp)) != 0 && !ULOCKFS_IS_HLOCK(ulp) &&
1022 	    !ULOCKFS_IS_ELOCK(ulp))
1023 		goto errout;
1024 
1025 	/*
1026 	 * thaw file system and wakeup pended processes
1027 	 */
1028 	if (error = ufs_thaw(vfsp, ufsvfsp, ulp))
1029 		if (!ULOCKFS_IS_HLOCK(ulp) && !ULOCKFS_IS_ELOCK(ulp))
1030 			goto errout;
1031 
1032 	/*
1033 	 * reset modified flag if not already write locked
1034 	 */
1035 	if (!LOCKFS_IS_WLOCK(&lfs))
1036 		ULOCKFS_CLR_MOD(ulp);
1037 
1038 	/*
1039 	 * idle the lock struct
1040 	 */
1041 	ULOCKFS_CLR_BUSY(ulp);
1042 	/* update the ioctl copy */
1043 	LOCKFS_CLR_BUSY(&ulp->ul_lockfs);
1044 
1045 	/*
1046 	 * free current comment
1047 	 */
1048 	if (lfs.lf_comment && lfs.lf_comlen != 0) {
1049 		kmem_free(lfs.lf_comment, lfs.lf_comlen);
1050 		lfs.lf_comment = NULL;
1051 		lfs.lf_comlen = 0;
1052 	}
1053 
1054 	/* do error lock cleanup */
1055 	if (errlck == UN_ERRLCK)
1056 		ufsfx_unlockfs(ufsvfsp);
1057 
1058 	else if (errlck == RE_ERRLCK)
1059 		ufsfx_lockfs(ufsvfsp);
1060 
1061 	/* don't allow error lock from user to invoke panic */
1062 	else if (from_user && errlck == SET_ERRLCK &&
1063 		!(ufsvfsp->vfs_fsfx.fx_flags & (UFSMNT_ONERROR_PANIC >> 4)))
1064 		(void) ufs_fault(ufsvfsp->vfs_root,
1065 		    ulp->ul_lockfs.lf_comment && ulp->ul_lockfs.lf_comlen > 0 ?
1066 		    ulp->ul_lockfs.lf_comment: "user-applied error lock");
1067 
1068 	atomic_add_long(&ufs_quiesce_pend, -1);
1069 	mutex_exit(&ulp->ul_lock);
1070 	vfs_unlock(vfsp);
1071 
1072 	if (ULOCKFS_IS_HLOCK(&ufsvfsp->vfs_ulockfs))
1073 		poll_events |= POLLERR;
1074 
1075 	pollwakeup(&ufs_pollhd, poll_events);
1076 
1077 	/*
1078 	 * Allow both the delete thread and the reclaim thread to
1079 	 * continue.
1080 	 */
1081 	ufs_thread_continue(&ufsvfsp->vfs_delete);
1082 	ufs_thread_continue(&ufsvfsp->vfs_reclaim);
1083 
1084 	return (0);
1085 
1086 errout:
1087 	/*
1088 	 * Lock failed. Reset the old lock in ufsvfs if not hard locked.
1089 	 */
1090 	if (!LOCKFS_IS_HLOCK(&ulp->ul_lockfs)) {
1091 		bcopy(&lfs, &ulp->ul_lockfs, sizeof (struct lockfs));
1092 		ulp->ul_fs_lock = (1 << lfs.lf_lock);
1093 	}
1094 	(void) ufs_thaw(vfsp, ufsvfsp, ulp);
1095 	ULOCKFS_CLR_BUSY(ulp);
1096 	LOCKFS_CLR_BUSY(&ulp->ul_lockfs);
1097 
1098 errexit:
1099 	atomic_add_long(&ufs_quiesce_pend, -1);
1100 	mutex_exit(&ulp->ul_lock);
1101 	vfs_unlock(vfsp);
1102 
1103 	/*
1104 	 * Allow both the delete thread and the reclaim thread to
1105 	 * continue.
1106 	 */
1107 	ufs_thread_continue(&ufsvfsp->vfs_delete);
1108 	ufs_thread_continue(&ufsvfsp->vfs_reclaim);
1109 
1110 	return (error);
1111 }
1112 
1113 /*
1114  * fiolfss
1115  * 	return the current file system locking state info
1116  */
1117 int
1118 ufs_fiolfss(struct vnode *vp, struct lockfs *lockfsp)
1119 {
1120 	struct ulockfs	*ulp;
1121 
1122 	if (!vp || !vp->v_vfsp || !VTOI(vp))
1123 		return (EINVAL);
1124 
1125 	/* file system has been forcibly unmounted */
1126 	if (VTOI(vp)->i_ufsvfs == NULL)
1127 		return (EIO);
1128 
1129 	ulp = VTOUL(vp);
1130 
1131 	if (ULOCKFS_IS_HLOCK(ulp)) {
1132 		*lockfsp = ulp->ul_lockfs;	/* structure assignment */
1133 		return (0);
1134 	}
1135 
1136 	mutex_enter(&ulp->ul_lock);
1137 
1138 	*lockfsp = ulp->ul_lockfs;	/* structure assignment */
1139 
1140 	if (ULOCKFS_IS_MOD(ulp))
1141 		lockfsp->lf_flags |= LOCKFS_MOD;
1142 
1143 	mutex_exit(&ulp->ul_lock);
1144 
1145 	return (0);
1146 }
1147 
1148 /*
1149  * ufs_check_lockfs
1150  *	check whether a ufs_vnops conflicts with the file system lock
1151  */
1152 int
1153 ufs_check_lockfs(struct ufsvfs *ufsvfsp, struct ulockfs *ulp, ulong_t mask)
1154 {
1155 	k_sigset_t	smask;
1156 	int		sig, slock;
1157 
1158 	ASSERT(MUTEX_HELD(&ulp->ul_lock));
1159 
1160 	while (ulp->ul_fs_lock & mask) {
1161 		slock = (int)ULOCKFS_IS_SLOCK(ulp);
1162 		if ((curthread->t_flag & T_DONTPEND) && !slock) {
1163 			curthread->t_flag |= T_WOULDBLOCK;
1164 			return (EAGAIN);
1165 		}
1166 		curthread->t_flag &= ~T_WOULDBLOCK;
1167 
1168 		if (ULOCKFS_IS_HLOCK(ulp))
1169 			return (EIO);
1170 
1171 		/*
1172 		 * wait for lock status to change
1173 		 */
1174 		if (slock || ufsvfsp->vfs_nointr) {
1175 			cv_wait(&ulp->ul_cv, &ulp->ul_lock);
1176 		} else {
1177 			sigintr(&smask, 1);
1178 			sig = cv_wait_sig(&ulp->ul_cv, &ulp->ul_lock);
1179 			sigunintr(&smask);
1180 			if ((!sig && (ulp->ul_fs_lock & mask)) ||
1181 				ufsvfsp->vfs_dontblock)
1182 				return (EINTR);
1183 		}
1184 	}
1185 	atomic_add_long(&ulp->ul_vnops_cnt, 1);
1186 	return (0);
1187 }
1188 
1189 /*
1190  * Check whether we came across the handcrafted lockfs protocol path. We can't
1191  * simply check for T_DONTBLOCK here as one would assume since this can also
1192  * falsely catch recursive VOP's going to a different filesystem, instead we
1193  * check if we already hold the ulockfs->ul_lock mutex.
1194  */
1195 static int
1196 ufs_lockfs_is_under_rawlockfs(struct ulockfs *ulp)
1197 {
1198 	return ((mutex_owner(&ulp->ul_lock) != curthread) ? 0 : 1);
1199 }
1200 
1201 /*
1202  * ufs_lockfs_begin - start the lockfs locking protocol
1203  */
1204 int
1205 ufs_lockfs_begin(struct ufsvfs *ufsvfsp, struct ulockfs **ulpp, ulong_t mask)
1206 {
1207 	int 		error;
1208 	int		rec_vop;
1209 	struct ulockfs *ulp;
1210 	ulockfs_info_t	*ulockfs_info;
1211 	ulockfs_info_t	*ulockfs_info_free;
1212 	ulockfs_info_t	*ulockfs_info_temp;
1213 
1214 	/*
1215 	 * file system has been forcibly unmounted
1216 	 */
1217 	if (ufsvfsp == NULL)
1218 		return (EIO);
1219 
1220 	*ulpp = ulp = &ufsvfsp->vfs_ulockfs;
1221 
1222 	/*
1223 	 * Do lockfs protocol
1224 	 */
1225 	ulockfs_info = (ulockfs_info_t *)tsd_get(ufs_lockfs_key);
1226 	IS_REC_VOP(rec_vop, ulockfs_info, ulp, ulockfs_info_free);
1227 
1228 	/*
1229 	 * Detect recursive VOP call or handcrafted internal lockfs protocol
1230 	 * path and bail out in that case.
1231 	 */
1232 	if (rec_vop || ufs_lockfs_is_under_rawlockfs(ulp)) {
1233 		*ulpp = NULL;
1234 		return (0);
1235 	} else {
1236 		if (ulockfs_info_free == NULL) {
1237 			if ((ulockfs_info_temp = (ulockfs_info_t *)
1238 			    kmem_zalloc(sizeof (ulockfs_info_t),
1239 			    KM_NOSLEEP)) == NULL) {
1240 				*ulpp = NULL;
1241 				return (ENOMEM);
1242 			}
1243 		}
1244 	}
1245 
1246 	/*
1247 	 * First time VOP call
1248 	 */
1249 	mutex_enter(&ulp->ul_lock);
1250 	if (ULOCKFS_IS_JUSTULOCK(ulp))
1251 		atomic_add_long(&ulp->ul_vnops_cnt, 1);
1252 	else {
1253 		if (error = ufs_check_lockfs(ufsvfsp, ulp, mask)) {
1254 			mutex_exit(&ulp->ul_lock);
1255 			if (ulockfs_info_free == NULL)
1256 				kmem_free(ulockfs_info_temp,
1257 				    sizeof (ulockfs_info_t));
1258 			return (error);
1259 		}
1260 	}
1261 	mutex_exit(&ulp->ul_lock);
1262 
1263 	if (ulockfs_info_free != NULL) {
1264 		ulockfs_info_free->ulp = ulp;
1265 	} else {
1266 		ulockfs_info_temp->ulp = ulp;
1267 		ulockfs_info_temp->next = ulockfs_info;
1268 		ASSERT(ufs_lockfs_key != 0);
1269 		(void) tsd_set(ufs_lockfs_key, (void *)ulockfs_info_temp);
1270 	}
1271 
1272 	curthread->t_flag |= T_DONTBLOCK;
1273 	return (0);
1274 }
1275 
1276 /*
1277  * Check whether we are returning from the top level VOP.
1278  */
1279 static int
1280 ufs_lockfs_top_vop_return(ulockfs_info_t *head)
1281 {
1282 	ulockfs_info_t *info;
1283 	int result = 1;
1284 
1285 	for (info = head; info != NULL; info = info->next) {
1286 		if (info->ulp != NULL) {
1287 			result = 0;
1288 			break;
1289 		}
1290 	}
1291 
1292 	return (result);
1293 }
1294 
1295 /*
1296  * ufs_lockfs_end - terminate the lockfs locking protocol
1297  */
1298 void
1299 ufs_lockfs_end(struct ulockfs *ulp)
1300 {
1301 	ulockfs_info_t *info;
1302 	ulockfs_info_t *head;
1303 
1304 	/*
1305 	 * end-of-VOP protocol
1306 	 */
1307 	if (ulp == NULL)
1308 		return;
1309 
1310 	head = (ulockfs_info_t *)tsd_get(ufs_lockfs_key);
1311 	SEARCH_ULOCKFSP(head, ulp, info);
1312 
1313 	/*
1314 	 * If we're called from a first level VOP, we have to have a
1315 	 * valid ulockfs record in the TSD.
1316 	 */
1317 	ASSERT(info != NULL);
1318 
1319 	/*
1320 	 * Invalidate the ulockfs record.
1321 	 */
1322 	info->ulp = NULL;
1323 
1324 	if (ufs_lockfs_top_vop_return(head))
1325 		curthread->t_flag &= ~T_DONTBLOCK;
1326 
1327 	mutex_enter(&ulp->ul_lock);
1328 
1329 	if (!atomic_add_long_nv(&ulp->ul_vnops_cnt, -1))
1330 		cv_broadcast(&ulp->ul_cv);
1331 
1332 	mutex_exit(&ulp->ul_lock);
1333 }
1334 
1335 /*
1336  * specialized version of ufs_lockfs_begin() called by ufs_getpage().
1337  */
1338 int
1339 ufs_lockfs_begin_getpage(
1340 	struct ufsvfs	*ufsvfsp,
1341 	struct ulockfs	**ulpp,
1342 	struct seg	*seg,
1343 	int		read_access,
1344 	uint_t		*protp)
1345 {
1346 	ulong_t			mask;
1347 	int 			error;
1348 	int			rec_vop;
1349 	struct ulockfs		*ulp;
1350 	ulockfs_info_t		*ulockfs_info;
1351 	ulockfs_info_t		*ulockfs_info_free;
1352 	ulockfs_info_t		*ulockfs_info_temp;
1353 
1354 	/*
1355 	 * file system has been forcibly unmounted
1356 	 */
1357 	if (ufsvfsp == NULL)
1358 		return (EIO);
1359 
1360 	*ulpp = ulp = &ufsvfsp->vfs_ulockfs;
1361 
1362 	/*
1363 	 * Do lockfs protocol
1364 	 */
1365 	ulockfs_info = (ulockfs_info_t *)tsd_get(ufs_lockfs_key);
1366 	IS_REC_VOP(rec_vop, ulockfs_info, ulp, ulockfs_info_free);
1367 
1368 	/*
1369 	 * Detect recursive VOP call or handcrafted internal lockfs protocol
1370 	 * path and bail out in that case.
1371 	 */
1372 	if (rec_vop || ufs_lockfs_is_under_rawlockfs(ulp)) {
1373 		*ulpp = NULL;
1374 		return (0);
1375 	} else {
1376 		if (ulockfs_info_free == NULL) {
1377 			if ((ulockfs_info_temp = (ulockfs_info_t *)
1378 			    kmem_zalloc(sizeof (ulockfs_info_t),
1379 			    KM_NOSLEEP)) == NULL) {
1380 				*ulpp = NULL;
1381 				return (ENOMEM);
1382 			}
1383 		}
1384 	}
1385 
1386 	/*
1387 	 * First time VOP call
1388 	 */
1389 	mutex_enter(&ulp->ul_lock);
1390 	if (ULOCKFS_IS_JUSTULOCK(ulp))
1391 		/*
1392 		 * fs is not locked, simply inc the active-ops counter
1393 		 */
1394 		atomic_add_long(&ulp->ul_vnops_cnt, 1);
1395 	else {
1396 		if (seg->s_ops == &segvn_ops &&
1397 		    ((struct segvn_data *)seg->s_data)->type != MAP_SHARED) {
1398 			mask = (ulong_t)ULOCKFS_GETREAD_MASK;
1399 		} else if (protp && read_access) {
1400 			/*
1401 			 * Restrict the mapping to readonly.
1402 			 * Writes to this mapping will cause
1403 			 * another fault which will then
1404 			 * be suspended if fs is write locked
1405 			 */
1406 			*protp &= ~PROT_WRITE;
1407 			mask = (ulong_t)ULOCKFS_GETREAD_MASK;
1408 		} else
1409 			mask = (ulong_t)ULOCKFS_GETWRITE_MASK;
1410 
1411 		/*
1412 		 * will sleep if this fs is locked against this VOP
1413 		 */
1414 		if (error = ufs_check_lockfs(ufsvfsp, ulp, mask)) {
1415 			mutex_exit(&ulp->ul_lock);
1416 			if (ulockfs_info_free == NULL)
1417 				kmem_free(ulockfs_info_temp,
1418 				    sizeof (ulockfs_info_t));
1419 			return (error);
1420 		}
1421 	}
1422 	mutex_exit(&ulp->ul_lock);
1423 
1424 	if (ulockfs_info_free != NULL) {
1425 		ulockfs_info_free->ulp = ulp;
1426 	} else {
1427 		ulockfs_info_temp->ulp = ulp;
1428 		ulockfs_info_temp->next = ulockfs_info;
1429 		ASSERT(ufs_lockfs_key != 0);
1430 		(void) tsd_set(ufs_lockfs_key, (void *)ulockfs_info_temp);
1431 	}
1432 
1433 	curthread->t_flag |= T_DONTBLOCK;
1434 	return (0);
1435 }
1436 
1437 void
1438 ufs_lockfs_tsd_destructor(void *head)
1439 {
1440 	ulockfs_info_t *curr = (ulockfs_info_t *)head;
1441 	ulockfs_info_t *temp;
1442 
1443 	for (; curr != NULL; ) {
1444 		/*
1445 		 * The TSD destructor is being called when the thread exits
1446 		 * (via thread_exit()). At that time it must have cleaned up
1447 		 * all VOPs via ufs_lockfs_end() and there must not be a
1448 		 * valid ulockfs record exist while a thread is exiting.
1449 		 */
1450 		temp = curr;
1451 		curr = curr->next;
1452 		ASSERT(temp->ulp == NULL);
1453 		kmem_free(temp, sizeof (ulockfs_info_t));
1454 	}
1455 }
1456