xref: /titanic_50/usr/src/uts/common/fs/lofs/lofs_subr.c (revision ba3594ba9b5dd4c846c472a8d657edcb7c8109ac)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * The idea behind composition-based stacked filesystems is to add a
28  * vnode to the stack of vnodes for each mount. These vnodes have their
29  * own set of mount options and filesystem-specific functions, so they
30  * can modify data or operations before they are passed along. Such a
31  * filesystem must maintain a mapping from the underlying vnodes to its
32  * interposing vnodes.
33  *
34  * In lofs, this mapping is implemented by a hashtable. Each bucket
35  * contains a count of the number of nodes currently contained, the
36  * chain of vnodes, and a lock to protect the list of vnodes. The
37  * hashtable dynamically grows if the number of vnodes in the table as a
38  * whole exceeds the size of the table left-shifted by
39  * lo_resize_threshold. In order to minimize lock contention, there is
40  * no global lock protecting the hashtable, hence obtaining the
41  * per-bucket locks consists of a dance to make sure we've actually
42  * locked the correct bucket. Acquiring a bucket lock doesn't involve
43  * locking the hashtable itself, so we refrain from freeing old
44  * hashtables, and store them in a linked list of retired hashtables;
45  * the list is freed when the filesystem is unmounted.
46  */
47 
48 #include <sys/param.h>
49 #include <sys/kmem.h>
50 #include <sys/vfs.h>
51 #include <sys/vnode.h>
52 #include <sys/cmn_err.h>
53 #include <sys/systm.h>
54 #include <sys/t_lock.h>
55 #include <sys/debug.h>
56 #include <sys/atomic.h>
57 
58 #include <sys/fs/lofs_node.h>
59 #include <sys/fs/lofs_info.h>
60 /*
61  * Due to the hashing algorithm, the size of the hash table needs to be a
62  * power of 2.
63  */
64 #define	LOFS_DEFAULT_HTSIZE	(1 << 6)
65 
66 #define	ltablehash(vp, tblsz)	((((intptr_t)(vp))>>10) & ((tblsz)-1))
67 
68 /*
69  * The following macros can only be safely used when the desired bucket
70  * is already locked.
71  */
72 /*
73  * The lock in the hashtable associated with the given vnode.
74  */
75 #define	TABLE_LOCK(vp, li)      \
76 	(&(li)->li_hashtable[ltablehash((vp), (li)->li_htsize)].lh_lock)
77 
78 /*
79  * The bucket in the hashtable that the given vnode hashes to.
80  */
81 #define	TABLE_BUCKET(vp, li)    \
82 	((li)->li_hashtable[ltablehash((vp), (li)->li_htsize)].lh_chain)
83 
84 /*
85  * Number of elements currently in the bucket that the vnode hashes to.
86  */
87 #define	TABLE_COUNT(vp, li)	\
88 	((li)->li_hashtable[ltablehash((vp), (li)->li_htsize)].lh_count)
89 
90 /*
91  * Grab/Drop the lock for the bucket this vnode hashes to.
92  */
93 #define	TABLE_LOCK_ENTER(vp, li)	table_lock_enter(vp, li)
94 #define	TABLE_LOCK_EXIT(vp, li)		\
95 	mutex_exit(&(li)->li_hashtable[ltablehash((vp),	\
96 	    (li)->li_htsize)].lh_lock)
97 
98 static lnode_t *lfind(struct vnode *, struct loinfo *);
99 static void lsave(lnode_t *, struct loinfo *);
100 static struct vfs *makelfsnode(struct vfs *, struct loinfo *);
101 static struct lfsnode *lfsfind(struct vfs *, struct loinfo *);
102 
103 uint_t lo_resize_threshold = 1;
104 uint_t lo_resize_factor = 2;
105 
106 static kmem_cache_t *lnode_cache;
107 
108 /*
109  * Since the hashtable itself isn't protected by a lock, obtaining a
110  * per-bucket lock proceeds as follows:
111  *
112  * (a) li->li_htlock protects li->li_hashtable, li->li_htsize, and
113  * li->li_retired.
114  *
115  * (b) Per-bucket locks (lh_lock) protect the contents of the bucket.
116  *
117  * (c) Locking order for resizing the hashtable is li_htlock then
118  * lh_lock.
119  *
120  * To grab the bucket lock we:
121  *
122  * (1) Stash away the htsize and the pointer to the hashtable to make
123  * sure neither change while we're using them.
124  *
125  * (2) lgrow() updates the pointer to the hashtable before it updates
126  * the size: the worst case scenario is that we have the wrong size (but
127  * the correct table), so we hash to the wrong bucket, grab the wrong
128  * lock, and then realize that things have changed, rewind and start
129  * again. If both the size and the table changed since we loaded them,
130  * we'll realize that too and restart.
131  *
132  * (3) The protocol for growing the hashtable involves holding *all* the
133  * locks in the table, hence the unlocking code (TABLE_LOCK_EXIT())
134  * doesn't need to do any dances, since neither the table nor the size
135  * can change while any bucket lock is held.
136  *
137  * (4) If the hashtable is growing (by thread t1) while another thread
138  * (t2) is trying to grab a bucket lock, t2 might have a stale reference
139  * to li->li_htsize:
140  *
141  * - t1 grabs all locks in lgrow()
142  * 	- t2 loads li->li_htsize and li->li_hashtable
143  * - t1 changes li->hashtable
144  * 	- t2 loads from an offset in the "stale" hashtable and tries to grab
145  * 	the relevant mutex.
146  *
147  * If t1 had free'd the stale hashtable, t2 would be in trouble. Hence,
148  * stale hashtables are not freed but stored in a list of "retired"
149  * hashtables, which is emptied when the filesystem is unmounted.
150  */
151 static void
152 table_lock_enter(vnode_t *vp, struct loinfo *li)
153 {
154 	struct lobucket *chain;
155 	uint_t htsize;
156 	uint_t hash;
157 
158 	for (;;) {
159 		htsize = li->li_htsize;
160 		membar_consumer();
161 		chain = (struct lobucket *)li->li_hashtable;
162 		hash = ltablehash(vp, htsize);
163 		mutex_enter(&chain[hash].lh_lock);
164 		if (li->li_hashtable == chain && li->li_htsize == htsize)
165 			break;
166 		mutex_exit(&chain[hash].lh_lock);
167 	}
168 }
169 
170 void
171 lofs_subrinit(void)
172 {
173 	/*
174 	 * Initialize the cache.
175 	 */
176 	lnode_cache = kmem_cache_create("lnode_cache", sizeof (lnode_t),
177 	    0, NULL, NULL, NULL, NULL, NULL, 0);
178 }
179 
180 void
181 lofs_subrfini(void)
182 {
183 	kmem_cache_destroy(lnode_cache);
184 }
185 
186 /*
187  * Initialize a (struct loinfo), and initialize the hashtable to have
188  * htsize buckets.
189  */
190 void
191 lsetup(struct loinfo *li, uint_t htsize)
192 {
193 	li->li_refct = 0;
194 	li->li_lfs = NULL;
195 	if (htsize == 0)
196 		htsize = LOFS_DEFAULT_HTSIZE;
197 	li->li_htsize = htsize;
198 	li->li_hashtable = kmem_zalloc(htsize * sizeof (*li->li_hashtable),
199 	    KM_SLEEP);
200 	mutex_init(&li->li_lfslock, NULL, MUTEX_DEFAULT, NULL);
201 	mutex_init(&li->li_htlock, NULL, MUTEX_DEFAULT, NULL);
202 	li->li_retired = NULL;
203 }
204 
205 /*
206  * Destroy a (struct loinfo)
207  */
208 void
209 ldestroy(struct loinfo *li)
210 {
211 	uint_t i, htsize;
212 	struct lobucket *table;
213 	struct lo_retired_ht *lrhp, *trhp;
214 
215 	mutex_destroy(&li->li_htlock);
216 	mutex_destroy(&li->li_lfslock);
217 	htsize = li->li_htsize;
218 	table = li->li_hashtable;
219 	for (i = 0; i < htsize; i++)
220 		mutex_destroy(&table[i].lh_lock);
221 	kmem_free(table, htsize * sizeof (*li->li_hashtable));
222 
223 	/*
224 	 * Free the retired hashtables.
225 	 */
226 	lrhp = li->li_retired;
227 	while (lrhp != NULL) {
228 		trhp = lrhp;
229 		lrhp = lrhp->lrh_next;
230 		kmem_free(trhp->lrh_table,
231 		    trhp->lrh_size * sizeof (*li->li_hashtable));
232 		kmem_free(trhp, sizeof (*trhp));
233 	}
234 	li->li_retired = NULL;
235 }
236 
237 /*
238  * Return a looped back vnode for the given vnode.
239  * If no lnode exists for this vnode create one and put it
240  * in a table hashed by vnode.  If the lnode for
241  * this vnode is already in the table return it (ref count is
242  * incremented by lfind).  The lnode will be flushed from the
243  * table when lo_inactive calls freelonode.  The creation of
244  * a new lnode can be forced via the LOF_FORCE flag even if
245  * the vnode exists in the table.  This is used in the creation
246  * of a terminating lnode when looping is detected.  A unique
247  * lnode is required for the correct evaluation of the current
248  * working directory.
249  * NOTE: vp is assumed to be a held vnode.
250  */
251 struct vnode *
252 makelonode(struct vnode *vp, struct loinfo *li, int flag)
253 {
254 	lnode_t *lp, *tlp;
255 	struct vfs *vfsp;
256 	vnode_t *nvp;
257 
258 	lp = NULL;
259 	TABLE_LOCK_ENTER(vp, li);
260 	if (flag != LOF_FORCE)
261 		lp = lfind(vp, li);
262 	if ((flag == LOF_FORCE) || (lp == NULL)) {
263 		/*
264 		 * Optimistically assume that we won't need to sleep.
265 		 */
266 		lp = kmem_cache_alloc(lnode_cache, KM_NOSLEEP);
267 		nvp = vn_alloc(KM_NOSLEEP);
268 		if (lp == NULL || nvp == NULL) {
269 			TABLE_LOCK_EXIT(vp, li);
270 			/* The lnode allocation may have succeeded, save it */
271 			tlp = lp;
272 			if (tlp == NULL) {
273 				tlp = kmem_cache_alloc(lnode_cache, KM_SLEEP);
274 			}
275 			if (nvp == NULL) {
276 				nvp = vn_alloc(KM_SLEEP);
277 			}
278 			lp = NULL;
279 			TABLE_LOCK_ENTER(vp, li);
280 			if (flag != LOF_FORCE)
281 				lp = lfind(vp, li);
282 			if (lp != NULL) {
283 				kmem_cache_free(lnode_cache, tlp);
284 				vn_free(nvp);
285 				VN_RELE(vp);
286 				goto found_lnode;
287 			}
288 			lp = tlp;
289 		}
290 		atomic_inc_32(&li->li_refct);
291 		vfsp = makelfsnode(vp->v_vfsp, li);
292 		lp->lo_vnode = nvp;
293 		VN_SET_VFS_TYPE_DEV(nvp, vfsp, vp->v_type, vp->v_rdev);
294 		nvp->v_flag |= (vp->v_flag & (VNOMOUNT|VNOMAP|VDIROPEN));
295 		vn_setops(nvp, lo_vnodeops);
296 		nvp->v_data = (caddr_t)lp;
297 		lp->lo_vp = vp;
298 		lp->lo_looping = 0;
299 		lsave(lp, li);
300 		vn_exists(vp);
301 	} else {
302 		VN_RELE(vp);
303 	}
304 
305 found_lnode:
306 	TABLE_LOCK_EXIT(vp, li);
307 	return (ltov(lp));
308 }
309 
310 /*
311  * Get/Make vfs structure for given real vfs
312  */
313 static struct vfs *
314 makelfsnode(struct vfs *vfsp, struct loinfo *li)
315 {
316 	struct lfsnode *lfs;
317 	struct lfsnode *tlfs;
318 
319 	/*
320 	 * Don't grab any locks for the fast (common) case.
321 	 */
322 	if (vfsp == li->li_realvfs)
323 		return (li->li_mountvfs);
324 	ASSERT(li->li_refct > 0);
325 	mutex_enter(&li->li_lfslock);
326 	if ((lfs = lfsfind(vfsp, li)) == NULL) {
327 		mutex_exit(&li->li_lfslock);
328 		lfs = kmem_zalloc(sizeof (*lfs), KM_SLEEP);
329 		mutex_enter(&li->li_lfslock);
330 		if ((tlfs = lfsfind(vfsp, li)) != NULL) {
331 			kmem_free(lfs, sizeof (*lfs));
332 			lfs = tlfs;
333 			goto found_lfs;
334 		}
335 		lfs->lfs_realvfs = vfsp;
336 
337 		/*
338 		 * Even though the lfsnode is strictly speaking a private
339 		 * implementation detail of lofs, it should behave as a regular
340 		 * vfs_t for the benefit of the rest of the kernel.
341 		 */
342 		VFS_INIT(&lfs->lfs_vfs, lo_vfsops, (caddr_t)li);
343 		lfs->lfs_vfs.vfs_fstype = li->li_mountvfs->vfs_fstype;
344 		lfs->lfs_vfs.vfs_flag =
345 		    ((vfsp->vfs_flag | li->li_mflag) & ~li->li_dflag) &
346 		    INHERIT_VFS_FLAG;
347 		lfs->lfs_vfs.vfs_bsize = vfsp->vfs_bsize;
348 		lfs->lfs_vfs.vfs_dev = vfsp->vfs_dev;
349 		lfs->lfs_vfs.vfs_fsid = vfsp->vfs_fsid;
350 
351 		if (vfsp->vfs_mntpt != NULL) {
352 			lfs->lfs_vfs.vfs_mntpt = vfs_getmntpoint(vfsp);
353 			/* Leave a reference to the mountpoint */
354 		}
355 
356 		(void) VFS_ROOT(vfsp, &lfs->lfs_realrootvp);
357 
358 		/*
359 		 * We use 1 instead of 0 as the value to associate with
360 		 * an idle lfs_vfs.  This is to prevent VFS_RELE()
361 		 * trying to kmem_free() our lfs_t (which is the wrong
362 		 * size).
363 		 */
364 		VFS_HOLD(&lfs->lfs_vfs);
365 		lfs->lfs_next = li->li_lfs;
366 		li->li_lfs = lfs;
367 		vfs_propagate_features(vfsp, &lfs->lfs_vfs);
368 	}
369 
370 found_lfs:
371 	VFS_HOLD(&lfs->lfs_vfs);
372 	mutex_exit(&li->li_lfslock);
373 	return (&lfs->lfs_vfs);
374 }
375 
376 /*
377  * Free lfs node since no longer in use
378  */
379 static void
380 freelfsnode(struct lfsnode *lfs, struct loinfo *li)
381 {
382 	struct lfsnode *prev = NULL;
383 	struct lfsnode *this;
384 
385 	ASSERT(MUTEX_HELD(&li->li_lfslock));
386 	ASSERT(li->li_refct > 0);
387 	for (this = li->li_lfs; this != NULL; this = this->lfs_next) {
388 		if (this == lfs) {
389 			ASSERT(lfs->lfs_vfs.vfs_count == 1);
390 			if (prev == NULL)
391 				li->li_lfs = lfs->lfs_next;
392 			else
393 				prev->lfs_next = lfs->lfs_next;
394 			if (lfs->lfs_realrootvp != NULL) {
395 				VN_RELE(lfs->lfs_realrootvp);
396 			}
397 			if (lfs->lfs_vfs.vfs_mntpt != NULL)
398 				refstr_rele(lfs->lfs_vfs.vfs_mntpt);
399 			if (lfs->lfs_vfs.vfs_implp != NULL) {
400 				ASSERT(lfs->lfs_vfs.vfs_femhead == NULL);
401 				ASSERT(lfs->lfs_vfs.vfs_vskap == NULL);
402 				ASSERT(lfs->lfs_vfs.vfs_fstypevsp == NULL);
403 				kmem_free(lfs->lfs_vfs.vfs_implp,
404 				    sizeof (vfs_impl_t));
405 			}
406 			sema_destroy(&lfs->lfs_vfs.vfs_reflock);
407 			kmem_free(lfs, sizeof (struct lfsnode));
408 			return;
409 		}
410 		prev = this;
411 	}
412 	panic("freelfsnode");
413 	/*NOTREACHED*/
414 }
415 
416 /*
417  * Find lfs given real vfs and mount instance(li)
418  */
419 static struct lfsnode *
420 lfsfind(struct vfs *vfsp, struct loinfo *li)
421 {
422 	struct lfsnode *lfs;
423 
424 	ASSERT(MUTEX_HELD(&li->li_lfslock));
425 
426 	/*
427 	 * We need to handle the case where a UFS filesystem was forced
428 	 * unmounted and then a subsequent mount got the same vfs
429 	 * structure.  If the new mount lies in the lofs hierarchy, then
430 	 * this will confuse lofs, because the original vfsp (of the
431 	 * forced unmounted filesystem) is still around. We check for
432 	 * this condition here.
433 	 *
434 	 * If we find a cache vfsp hit, then we check to see if the
435 	 * cached filesystem was forced unmounted. Skip all such
436 	 * entries. This should be safe to do since no
437 	 * makelonode()->makelfsnode()->lfsfind() calls should be
438 	 * generated for such force-unmounted filesystems (because (ufs)
439 	 * lookup would've returned an error).
440 	 */
441 	for (lfs = li->li_lfs; lfs != NULL; lfs = lfs->lfs_next) {
442 		if (lfs->lfs_realvfs == vfsp) {
443 			struct vnode *realvp;
444 
445 			realvp = lfs->lfs_realrootvp;
446 			if (realvp == NULL)
447 				continue;
448 			if (realvp->v_vfsp == NULL || realvp->v_type == VBAD)
449 				continue;
450 			return (lfs);
451 		}
452 	}
453 	return (NULL);
454 }
455 
456 /*
457  * Find real vfs given loopback vfs
458  */
459 struct vfs *
460 lo_realvfs(struct vfs *vfsp, struct vnode **realrootvpp)
461 {
462 	struct loinfo *li = vtoli(vfsp);
463 	struct lfsnode *lfs;
464 
465 	ASSERT(li->li_refct > 0);
466 	if (vfsp == li->li_mountvfs) {
467 		if (realrootvpp != NULL)
468 			*realrootvpp = vtol(li->li_rootvp)->lo_vp;
469 		return (li->li_realvfs);
470 	}
471 	mutex_enter(&li->li_lfslock);
472 	for (lfs = li->li_lfs; lfs != NULL; lfs = lfs->lfs_next) {
473 		if (vfsp == &lfs->lfs_vfs) {
474 			if (realrootvpp != NULL)
475 				*realrootvpp = lfs->lfs_realrootvp;
476 			mutex_exit(&li->li_lfslock);
477 			return (lfs->lfs_realvfs);
478 		}
479 	}
480 	panic("lo_realvfs");
481 	/*NOTREACHED*/
482 }
483 
484 /*
485  * Lnode lookup stuff.
486  * These routines maintain a table of lnodes hashed by vp so
487  * that the lnode for a vp can be found if it already exists.
488  *
489  * NB: A lofs shadow vnode causes exactly one VN_HOLD() on the
490  * underlying vnode.
491  */
492 
493 /*
494  * Retire old hashtables.
495  */
496 static void
497 lretire(struct loinfo *li, struct lobucket *table, uint_t size)
498 {
499 	struct lo_retired_ht *lrhp;
500 
501 	lrhp = kmem_alloc(sizeof (*lrhp), KM_SLEEP);
502 	lrhp->lrh_table = table;
503 	lrhp->lrh_size = size;
504 
505 	mutex_enter(&li->li_htlock);
506 	lrhp->lrh_next = li->li_retired;
507 	li->li_retired = lrhp;
508 	mutex_exit(&li->li_htlock);
509 }
510 
511 /*
512  * Grow the hashtable.
513  */
514 static void
515 lgrow(struct loinfo *li, uint_t newsize)
516 {
517 	uint_t oldsize;
518 	uint_t i;
519 	struct lobucket *oldtable, *newtable;
520 
521 	/*
522 	 * It's OK to not have enough memory to resize the hashtable.
523 	 * We'll go down this path the next time we add something to the
524 	 * table, and retry the allocation then.
525 	 */
526 	if ((newtable = kmem_zalloc(newsize * sizeof (*li->li_hashtable),
527 	    KM_NOSLEEP)) == NULL)
528 		return;
529 
530 	mutex_enter(&li->li_htlock);
531 	if (newsize <= li->li_htsize) {
532 		mutex_exit(&li->li_htlock);
533 		kmem_free(newtable, newsize * sizeof (*li->li_hashtable));
534 		return;
535 	}
536 	oldsize = li->li_htsize;
537 	oldtable = li->li_hashtable;
538 
539 	/*
540 	 * Grab all locks so TABLE_LOCK_ENTER() calls block until the
541 	 * resize is complete.
542 	 */
543 	for (i = 0; i < oldsize; i++)
544 		mutex_enter(&oldtable[i].lh_lock);
545 	/*
546 	 * li->li_hashtable gets set before li->li_htsize, so in the
547 	 * time between the two assignments, callers of
548 	 * TABLE_LOCK_ENTER() cannot hash to a bucket beyond oldsize,
549 	 * hence we only need to grab the locks up to oldsize.
550 	 */
551 	for (i = 0; i < oldsize; i++)
552 		mutex_enter(&newtable[i].lh_lock);
553 	/*
554 	 * Rehash.
555 	 */
556 	for (i = 0; i < oldsize; i++) {
557 		lnode_t *tlp, *nlp;
558 
559 		for (tlp = oldtable[i].lh_chain; tlp != NULL; tlp = nlp) {
560 			uint_t hash = ltablehash(tlp->lo_vp, newsize);
561 
562 			nlp = tlp->lo_next;
563 			tlp->lo_next = newtable[hash].lh_chain;
564 			newtable[hash].lh_chain = tlp;
565 			newtable[hash].lh_count++;
566 		}
567 	}
568 
569 	/*
570 	 * As soon as we store the new hashtable, future locking operations
571 	 * will use it.  Therefore, we must ensure that all the state we've
572 	 * just established reaches global visibility before the new hashtable
573 	 * does.
574 	 */
575 	membar_producer();
576 	li->li_hashtable = newtable;
577 
578 	/*
579 	 * table_lock_enter() relies on the fact that li->li_hashtable
580 	 * is set to its new value before li->li_htsize.
581 	 */
582 	membar_producer();
583 	li->li_htsize = newsize;
584 
585 	/*
586 	 * The new state is consistent now, so we can drop all the locks.
587 	 */
588 	for (i = 0; i < oldsize; i++) {
589 		mutex_exit(&newtable[i].lh_lock);
590 		mutex_exit(&oldtable[i].lh_lock);
591 	}
592 	mutex_exit(&li->li_htlock);
593 
594 	lretire(li, oldtable, oldsize);
595 }
596 
597 /*
598  * Put a lnode in the table
599  */
600 static void
601 lsave(lnode_t *lp, struct loinfo *li)
602 {
603 	ASSERT(lp->lo_vp);
604 	ASSERT(MUTEX_HELD(TABLE_LOCK(lp->lo_vp, li)));
605 
606 #ifdef LODEBUG
607 	lo_dprint(4, "lsave lp %p hash %d\n",
608 	    lp, ltablehash(lp->lo_vp, li));
609 #endif
610 
611 	TABLE_COUNT(lp->lo_vp, li)++;
612 	lp->lo_next = TABLE_BUCKET(lp->lo_vp, li);
613 	TABLE_BUCKET(lp->lo_vp, li) = lp;
614 
615 	if (li->li_refct > (li->li_htsize << lo_resize_threshold)) {
616 		TABLE_LOCK_EXIT(lp->lo_vp, li);
617 		lgrow(li, li->li_htsize << lo_resize_factor);
618 		TABLE_LOCK_ENTER(lp->lo_vp, li);
619 	}
620 }
621 
622 /*
623  * Our version of vfs_rele() that stops at 1 instead of 0, and calls
624  * freelfsnode() instead of kmem_free().
625  */
626 static void
627 lfs_rele(struct lfsnode *lfs, struct loinfo *li)
628 {
629 	vfs_t *vfsp = &lfs->lfs_vfs;
630 
631 	ASSERT(MUTEX_HELD(&li->li_lfslock));
632 	ASSERT(vfsp->vfs_count > 1);
633 	if (atomic_dec_32_nv(&vfsp->vfs_count) == 1)
634 		freelfsnode(lfs, li);
635 }
636 
637 /*
638  * Remove a lnode from the table
639  */
640 void
641 freelonode(lnode_t *lp)
642 {
643 	lnode_t *lt;
644 	lnode_t *ltprev = NULL;
645 	struct lfsnode *lfs, *nextlfs;
646 	struct vfs *vfsp;
647 	struct vnode *vp = ltov(lp);
648 	struct vnode *realvp = realvp(vp);
649 	struct loinfo *li = vtoli(vp->v_vfsp);
650 
651 #ifdef LODEBUG
652 	lo_dprint(4, "freelonode lp %p hash %d\n",
653 	    lp, ltablehash(lp->lo_vp, li));
654 #endif
655 	TABLE_LOCK_ENTER(lp->lo_vp, li);
656 
657 	mutex_enter(&vp->v_lock);
658 	if (vp->v_count > 1) {
659 		vp->v_count--;	/* release our hold from vn_rele */
660 		mutex_exit(&vp->v_lock);
661 		TABLE_LOCK_EXIT(lp->lo_vp, li);
662 		return;
663 	}
664 	mutex_exit(&vp->v_lock);
665 
666 	for (lt = TABLE_BUCKET(lp->lo_vp, li); lt != NULL;
667 	    ltprev = lt, lt = lt->lo_next) {
668 		if (lt == lp) {
669 #ifdef LODEBUG
670 			lo_dprint(4, "freeing %p, vfsp %p\n",
671 			    vp, vp->v_vfsp);
672 #endif
673 			atomic_dec_32(&li->li_refct);
674 			vfsp = vp->v_vfsp;
675 			vn_invalid(vp);
676 			if (vfsp != li->li_mountvfs) {
677 				mutex_enter(&li->li_lfslock);
678 				/*
679 				 * Check for unused lfs
680 				 */
681 				lfs = li->li_lfs;
682 				while (lfs != NULL) {
683 					nextlfs = lfs->lfs_next;
684 					if (vfsp == &lfs->lfs_vfs) {
685 						lfs_rele(lfs, li);
686 						break;
687 					}
688 					if (lfs->lfs_vfs.vfs_count == 1) {
689 						/*
690 						 * Lfs is idle
691 						 */
692 						freelfsnode(lfs, li);
693 					}
694 					lfs = nextlfs;
695 				}
696 				mutex_exit(&li->li_lfslock);
697 			}
698 			if (ltprev == NULL) {
699 				TABLE_BUCKET(lt->lo_vp, li) = lt->lo_next;
700 			} else {
701 				ltprev->lo_next = lt->lo_next;
702 			}
703 			TABLE_COUNT(lt->lo_vp, li)--;
704 			TABLE_LOCK_EXIT(lt->lo_vp, li);
705 			kmem_cache_free(lnode_cache, lt);
706 			vn_free(vp);
707 			VN_RELE(realvp);
708 			return;
709 		}
710 	}
711 	panic("freelonode");
712 	/*NOTREACHED*/
713 }
714 
715 /*
716  * Lookup a lnode by vp
717  */
718 static lnode_t *
719 lfind(struct vnode *vp, struct loinfo *li)
720 {
721 	lnode_t *lt;
722 
723 	ASSERT(MUTEX_HELD(TABLE_LOCK(vp, li)));
724 
725 	lt = TABLE_BUCKET(vp, li);
726 	while (lt != NULL) {
727 		if (lt->lo_vp == vp) {
728 			VN_HOLD(ltov(lt));
729 			return (lt);
730 		}
731 		lt = lt->lo_next;
732 	}
733 	return (NULL);
734 }
735 
736 #ifdef	LODEBUG
737 static int lofsdebug;
738 #endif	/* LODEBUG */
739 
740 /*
741  * Utilities used by both client and server
742  * Standard levels:
743  * 0) no debugging
744  * 1) hard failures
745  * 2) soft failures
746  * 3) current test software
747  * 4) main procedure entry points
748  * 5) main procedure exit points
749  * 6) utility procedure entry points
750  * 7) utility procedure exit points
751  * 8) obscure procedure entry points
752  * 9) obscure procedure exit points
753  * 10) random stuff
754  * 11) all <= 1
755  * 12) all <= 2
756  * 13) all <= 3
757  * ...
758  */
759 
760 #ifdef LODEBUG
761 /*VARARGS2*/
762 lo_dprint(level, str, a1, a2, a3, a4, a5, a6, a7, a8, a9)
763 	int level;
764 	char *str;
765 	int a1, a2, a3, a4, a5, a6, a7, a8, a9;
766 {
767 
768 	if (lofsdebug == level || (lofsdebug > 10 && (lofsdebug - 10) >= level))
769 		printf(str, a1, a2, a3, a4, a5, a6, a7, a8, a9);
770 }
771 #endif
772