xref: /titanic_52/usr/src/uts/common/fs/lofs/lofs_subr.c (revision 2eeaed14a5e2ed9bd811643ad5bffc3510ca0310)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * The idea behind composition-based stacked filesystems is to add a
30  * vnode to the stack of vnodes for each mount. These vnodes have their
31  * own set of mount options and filesystem-specific functions, so they
32  * can modify data or operations before they are passed along. Such a
33  * filesystem must maintain a mapping from the underlying vnodes to its
34  * interposing vnodes.
35  *
36  * In lofs, this mapping is implemented by a hashtable. Each bucket
37  * contains a count of the number of nodes currently contained, the
38  * chain of vnodes, and a lock to protect the list of vnodes. The
39  * hashtable dynamically grows if the number of vnodes in the table as a
40  * whole exceeds the size of the table left-shifted by
41  * lo_resize_threshold. In order to minimize lock contention, there is
42  * no global lock protecting the hashtable, hence obtaining the
43  * per-bucket locks consists of a dance to make sure we've actually
44  * locked the correct bucket. Acquiring a bucket lock doesn't involve
45  * locking the hashtable itself, so we refrain from freeing old
46  * hashtables, and store them in a linked list of retired hashtables;
47  * the list is freed when the filesystem is unmounted.
48  */
49 
50 #include <sys/param.h>
51 #include <sys/kmem.h>
52 #include <sys/vfs.h>
53 #include <sys/vnode.h>
54 #include <sys/cmn_err.h>
55 #include <sys/systm.h>
56 #include <sys/t_lock.h>
57 #include <sys/debug.h>
58 #include <sys/atomic.h>
59 
60 #include <sys/fs/lofs_node.h>
61 #include <sys/fs/lofs_info.h>
62 /*
63  * Due to the hashing algorithm, the size of the hash table needs to be a
64  * power of 2.
65  */
66 #define	LOFS_DEFAULT_HTSIZE	(1 << 6)
67 
68 #define	ltablehash(vp, tblsz)	((((intptr_t)(vp))>>10) & ((tblsz)-1))
69 
70 /*
71  * The following macros can only be safely used when the desired bucket
72  * is already locked.
73  */
74 /*
75  * The lock in the hashtable associated with the given vnode.
76  */
77 #define	TABLE_LOCK(vp, li)      \
78 	(&(li)->li_hashtable[ltablehash((vp), (li)->li_htsize)].lh_lock)
79 
80 /*
81  * The bucket in the hashtable that the given vnode hashes to.
82  */
83 #define	TABLE_BUCKET(vp, li)    \
84 	((li)->li_hashtable[ltablehash((vp), (li)->li_htsize)].lh_chain)
85 
86 /*
87  * Number of elements currently in the bucket that the vnode hashes to.
88  */
89 #define	TABLE_COUNT(vp, li)	\
90 	((li)->li_hashtable[ltablehash((vp), (li)->li_htsize)].lh_count)
91 
92 /*
93  * Grab/Drop the lock for the bucket this vnode hashes to.
94  */
95 #define	TABLE_LOCK_ENTER(vp, li)	table_lock_enter(vp, li)
96 #define	TABLE_LOCK_EXIT(vp, li)		\
97 	mutex_exit(&(li)->li_hashtable[ltablehash((vp),	\
98 	    (li)->li_htsize)].lh_lock)
99 
100 static lnode_t *lfind(struct vnode *, struct loinfo *);
101 static void lsave(lnode_t *, struct loinfo *);
102 static struct vfs *makelfsnode(struct vfs *, struct loinfo *);
103 static struct lfsnode *lfsfind(struct vfs *, struct loinfo *);
104 
105 uint_t lo_resize_threshold = 1;
106 uint_t lo_resize_factor = 2;
107 
108 static kmem_cache_t *lnode_cache;
109 
110 /*
111  * Since the hashtable itself isn't protected by a lock, obtaining a
112  * per-bucket lock proceeds as follows:
113  *
114  * (a) li->li_htlock protects li->li_hashtable, li->li_htsize, and
115  * li->li_retired.
116  *
117  * (b) Per-bucket locks (lh_lock) protect the contents of the bucket.
118  *
119  * (c) Locking order for resizing the hashtable is li_htlock then
120  * lh_lock.
121  *
122  * To grab the bucket lock we:
123  *
124  * (1) Stash away the htsize and the pointer to the hashtable to make
125  * sure neither change while we're using them.
126  *
127  * (2) lgrow() updates the pointer to the hashtable before it updates
128  * the size: the worst case scenario is that we have the wrong size (but
129  * the correct table), so we hash to the wrong bucket, grab the wrong
130  * lock, and then realize that things have changed, rewind and start
131  * again. If both the size and the table changed since we loaded them,
132  * we'll realize that too and restart.
133  *
134  * (3) The protocol for growing the hashtable involves holding *all* the
135  * locks in the table, hence the unlocking code (TABLE_LOCK_EXIT())
136  * doesn't need to do any dances, since neither the table nor the size
137  * can change while any bucket lock is held.
138  *
139  * (4) If the hashtable is growing (by thread t1) while another thread
140  * (t2) is trying to grab a bucket lock, t2 might have a stale reference
141  * to li->li_htsize:
142  *
143  * - t1 grabs all locks in lgrow()
144  * 	- t2 loads li->li_htsize and li->li_hashtable
145  * - t1 changes li->hashtable
146  * 	- t2 loads from an offset in the "stale" hashtable and tries to grab
147  * 	the relevant mutex.
148  *
149  * If t1 had free'd the stale hashtable, t2 would be in trouble. Hence,
150  * stale hashtables are not freed but stored in a list of "retired"
151  * hashtables, which is emptied when the filesystem is unmounted.
152  */
153 static void
154 table_lock_enter(vnode_t *vp, struct loinfo *li)
155 {
156 	struct lobucket *chain;
157 	uint_t htsize;
158 	uint_t hash;
159 
160 	for (;;) {
161 		htsize = li->li_htsize;
162 		membar_consumer();
163 		chain = (struct lobucket *)li->li_hashtable;
164 		hash = ltablehash(vp, htsize);
165 		mutex_enter(&chain[hash].lh_lock);
166 		if (li->li_hashtable == chain && li->li_htsize == htsize)
167 			break;
168 		mutex_exit(&chain[hash].lh_lock);
169 	}
170 }
171 
172 void
173 lofs_subrinit(void)
174 {
175 	/*
176 	 * Initialize the cache.
177 	 */
178 	lnode_cache = kmem_cache_create("lnode_cache", sizeof (lnode_t),
179 	    0, NULL, NULL, NULL, NULL, NULL, 0);
180 }
181 
182 void
183 lofs_subrfini(void)
184 {
185 	kmem_cache_destroy(lnode_cache);
186 }
187 
188 /*
189  * Initialize a (struct loinfo), and initialize the hashtable to have
190  * htsize buckets.
191  */
192 void
193 lsetup(struct loinfo *li, uint_t htsize)
194 {
195 	li->li_refct = 0;
196 	li->li_lfs = NULL;
197 	if (htsize == 0)
198 		htsize = LOFS_DEFAULT_HTSIZE;
199 	li->li_htsize = htsize;
200 	li->li_hashtable = kmem_zalloc(htsize * sizeof (*li->li_hashtable),
201 	    KM_SLEEP);
202 	mutex_init(&li->li_lfslock, NULL, MUTEX_DEFAULT, NULL);
203 	mutex_init(&li->li_htlock, NULL, MUTEX_DEFAULT, NULL);
204 	li->li_retired = NULL;
205 }
206 
207 /*
208  * Destroy a (struct loinfo)
209  */
210 void
211 ldestroy(struct loinfo *li)
212 {
213 	uint_t i, htsize;
214 	struct lobucket *table;
215 	struct lo_retired_ht *lrhp, *trhp;
216 
217 	mutex_destroy(&li->li_htlock);
218 	mutex_destroy(&li->li_lfslock);
219 	htsize = li->li_htsize;
220 	table = li->li_hashtable;
221 	for (i = 0; i < htsize; i++)
222 		mutex_destroy(&table[i].lh_lock);
223 	kmem_free(table, htsize * sizeof (*li->li_hashtable));
224 
225 	/*
226 	 * Free the retired hashtables.
227 	 */
228 	lrhp = li->li_retired;
229 	while (lrhp != NULL) {
230 		trhp = lrhp;
231 		lrhp = lrhp->lrh_next;
232 		kmem_free(trhp->lrh_table,
233 		    trhp->lrh_size * sizeof (*li->li_hashtable));
234 		kmem_free(trhp, sizeof (*trhp));
235 	}
236 	li->li_retired = NULL;
237 }
238 
239 /*
240  * Return a looped back vnode for the given vnode.
241  * If no lnode exists for this vnode create one and put it
242  * in a table hashed by vnode.  If the lnode for
243  * this vnode is already in the table return it (ref count is
244  * incremented by lfind).  The lnode will be flushed from the
245  * table when lo_inactive calls freelonode.  The creation of
246  * a new lnode can be forced via the LOF_FORCE flag even if
247  * the vnode exists in the table.  This is used in the creation
248  * of a terminating lnode when looping is detected.  A unique
249  * lnode is required for the correct evaluation of the current
250  * working directory.
251  * NOTE: vp is assumed to be a held vnode.
252  */
253 struct vnode *
254 makelonode(struct vnode *vp, struct loinfo *li, int flag)
255 {
256 	lnode_t *lp, *tlp;
257 	struct vfs *vfsp;
258 	vnode_t *nvp;
259 
260 	lp = NULL;
261 	TABLE_LOCK_ENTER(vp, li);
262 	if (flag != LOF_FORCE)
263 		lp = lfind(vp, li);
264 	if ((flag == LOF_FORCE) || (lp == NULL)) {
265 		/*
266 		 * Optimistically assume that we won't need to sleep.
267 		 */
268 		lp = kmem_cache_alloc(lnode_cache, KM_NOSLEEP);
269 		nvp = vn_alloc(KM_NOSLEEP);
270 		if (lp == NULL || nvp == NULL) {
271 			TABLE_LOCK_EXIT(vp, li);
272 			/* The lnode allocation may have succeeded, save it */
273 			tlp = lp;
274 			if (tlp == NULL) {
275 				tlp = kmem_cache_alloc(lnode_cache, KM_SLEEP);
276 			}
277 			if (nvp == NULL) {
278 				nvp = vn_alloc(KM_SLEEP);
279 			}
280 			lp = NULL;
281 			TABLE_LOCK_ENTER(vp, li);
282 			if (flag != LOF_FORCE)
283 				lp = lfind(vp, li);
284 			if (lp != NULL) {
285 				kmem_cache_free(lnode_cache, tlp);
286 				vn_free(nvp);
287 				VN_RELE(vp);
288 				goto found_lnode;
289 			}
290 			lp = tlp;
291 		}
292 		atomic_add_32(&li->li_refct, 1);
293 		vfsp = makelfsnode(vp->v_vfsp, li);
294 		lp->lo_vnode = nvp;
295 		VN_SET_VFS_TYPE_DEV(nvp, vfsp, vp->v_type, vp->v_rdev);
296 		nvp->v_flag |= (vp->v_flag & (VNOMOUNT|VNOMAP|VDIROPEN));
297 		vn_setops(nvp, lo_vnodeops);
298 		nvp->v_data = (caddr_t)lp;
299 		lp->lo_vp = vp;
300 		lp->lo_looping = 0;
301 		lsave(lp, li);
302 		vn_exists(vp);
303 	} else {
304 		VN_RELE(vp);
305 	}
306 
307 found_lnode:
308 	TABLE_LOCK_EXIT(vp, li);
309 	return (ltov(lp));
310 }
311 
312 /*
313  * Get/Make vfs structure for given real vfs
314  */
315 static struct vfs *
316 makelfsnode(struct vfs *vfsp, struct loinfo *li)
317 {
318 	struct lfsnode *lfs;
319 	struct lfsnode *tlfs;
320 
321 	/*
322 	 * Don't grab any locks for the fast (common) case.
323 	 */
324 	if (vfsp == li->li_realvfs)
325 		return (li->li_mountvfs);
326 	ASSERT(li->li_refct > 0);
327 	mutex_enter(&li->li_lfslock);
328 	if ((lfs = lfsfind(vfsp, li)) == NULL) {
329 		mutex_exit(&li->li_lfslock);
330 		lfs = kmem_zalloc(sizeof (*lfs), KM_SLEEP);
331 		mutex_enter(&li->li_lfslock);
332 		if ((tlfs = lfsfind(vfsp, li)) != NULL) {
333 			kmem_free(lfs, sizeof (*lfs));
334 			lfs = tlfs;
335 			goto found_lfs;
336 		}
337 		lfs->lfs_realvfs = vfsp;
338 
339 		/*
340 		 * Even though the lfsnode is strictly speaking a private
341 		 * implementation detail of lofs, it should behave as a regular
342 		 * vfs_t for the benefit of the rest of the kernel.
343 		 */
344 		VFS_INIT(&lfs->lfs_vfs, lo_vfsops, (caddr_t)li);
345 		lfs->lfs_vfs.vfs_fstype = li->li_mountvfs->vfs_fstype;
346 		lfs->lfs_vfs.vfs_flag =
347 			((vfsp->vfs_flag | li->li_mflag) & ~li->li_dflag) &
348 			INHERIT_VFS_FLAG;
349 		lfs->lfs_vfs.vfs_bsize = vfsp->vfs_bsize;
350 		lfs->lfs_vfs.vfs_dev = vfsp->vfs_dev;
351 		lfs->lfs_vfs.vfs_fsid = vfsp->vfs_fsid;
352 
353 		if (vfsp->vfs_mntpt != NULL) {
354 			lfs->lfs_vfs.vfs_mntpt = vfs_getmntpoint(vfsp);
355 			/* Leave a reference to the mountpoint */
356 		}
357 
358 		(void) VFS_ROOT(vfsp, &lfs->lfs_realrootvp);
359 
360 		/*
361 		 * We use 1 instead of 0 as the value to associate with
362 		 * an idle lfs_vfs.  This is to prevent VFS_RELE()
363 		 * trying to kmem_free() our lfs_t (which is the wrong
364 		 * size).
365 		 */
366 		VFS_HOLD(&lfs->lfs_vfs);
367 		lfs->lfs_next = li->li_lfs;
368 		li->li_lfs = lfs;
369 	}
370 
371 found_lfs:
372 	VFS_HOLD(&lfs->lfs_vfs);
373 	mutex_exit(&li->li_lfslock);
374 	return (&lfs->lfs_vfs);
375 }
376 
377 /*
378  * Free lfs node since no longer in use
379  */
380 static void
381 freelfsnode(struct lfsnode *lfs, struct loinfo *li)
382 {
383 	struct lfsnode *prev = NULL;
384 	struct lfsnode *this;
385 
386 	ASSERT(MUTEX_HELD(&li->li_lfslock));
387 	ASSERT(li->li_refct > 0);
388 	for (this = li->li_lfs; this != NULL; this = this->lfs_next) {
389 		if (this == lfs) {
390 			ASSERT(lfs->lfs_vfs.vfs_count == 1);
391 			if (prev == NULL)
392 				li->li_lfs = lfs->lfs_next;
393 			else
394 				prev->lfs_next = lfs->lfs_next;
395 			if (lfs->lfs_realrootvp != NULL) {
396 				VN_RELE(lfs->lfs_realrootvp);
397 			}
398 			if (lfs->lfs_vfs.vfs_mntpt != NULL)
399 				refstr_rele(lfs->lfs_vfs.vfs_mntpt);
400 			if (lfs->lfs_vfs.vfs_implp != NULL) {
401 				ASSERT(lfs->lfs_vfs.vfs_femhead == NULL);
402 				ASSERT(lfs->lfs_vfs.vfs_vskap == NULL);
403 				ASSERT(lfs->lfs_vfs.vfs_fstypevsp == NULL);
404 				kmem_free(lfs->lfs_vfs.vfs_implp,
405 				    sizeof (vfs_impl_t));
406 			}
407 			sema_destroy(&lfs->lfs_vfs.vfs_reflock);
408 			kmem_free(lfs, sizeof (struct lfsnode));
409 			return;
410 		}
411 		prev = this;
412 	}
413 	panic("freelfsnode");
414 	/*NOTREACHED*/
415 }
416 
417 /*
418  * Find lfs given real vfs and mount instance(li)
419  */
420 static struct lfsnode *
421 lfsfind(struct vfs *vfsp, struct loinfo *li)
422 {
423 	struct lfsnode *lfs;
424 
425 	ASSERT(MUTEX_HELD(&li->li_lfslock));
426 
427 	/*
428 	 * We need to handle the case where a UFS filesystem was forced
429 	 * unmounted and then a subsequent mount got the same vfs
430 	 * structure.  If the new mount lies in the lofs hierarchy, then
431 	 * this will confuse lofs, because the original vfsp (of the
432 	 * forced unmounted filesystem) is still around. We check for
433 	 * this condition here.
434 	 *
435 	 * If we find a cache vfsp hit, then we check to see if the
436 	 * cached filesystem was forced unmounted. Skip all such
437 	 * entries. This should be safe to do since no
438 	 * makelonode()->makelfsnode()->lfsfind() calls should be
439 	 * generated for such force-unmounted filesystems (because (ufs)
440 	 * lookup would've returned an error).
441 	 */
442 	for (lfs = li->li_lfs; lfs != NULL; lfs = lfs->lfs_next) {
443 		if (lfs->lfs_realvfs == vfsp) {
444 			struct vnode *realvp;
445 
446 			realvp = lfs->lfs_realrootvp;
447 			if (realvp == NULL)
448 				continue;
449 			if (realvp->v_vfsp == NULL || realvp->v_type == VBAD)
450 				continue;
451 			return (lfs);
452 		}
453 	}
454 	return (NULL);
455 }
456 
457 /*
458  * Find real vfs given loopback vfs
459  */
460 struct vfs *
461 lo_realvfs(struct vfs *vfsp, struct vnode **realrootvpp)
462 {
463 	struct loinfo *li = vtoli(vfsp);
464 	struct lfsnode *lfs;
465 
466 	ASSERT(li->li_refct > 0);
467 	if (vfsp == li->li_mountvfs) {
468 		if (realrootvpp != NULL)
469 			*realrootvpp = vtol(li->li_rootvp)->lo_vp;
470 		return (li->li_realvfs);
471 	}
472 	mutex_enter(&li->li_lfslock);
473 	for (lfs = li->li_lfs; lfs != NULL; lfs = lfs->lfs_next) {
474 		if (vfsp == &lfs->lfs_vfs) {
475 			if (realrootvpp != NULL)
476 				*realrootvpp = lfs->lfs_realrootvp;
477 			mutex_exit(&li->li_lfslock);
478 			return (lfs->lfs_realvfs);
479 		}
480 	}
481 	panic("lo_realvfs");
482 	/*NOTREACHED*/
483 }
484 
485 /*
486  * Lnode lookup stuff.
487  * These routines maintain a table of lnodes hashed by vp so
488  * that the lnode for a vp can be found if it already exists.
489  *
490  * NB: A lofs shadow vnode causes exactly one VN_HOLD() on the
491  * underlying vnode.
492  */
493 
494 /*
495  * Retire old hashtables.
496  */
497 static void
498 lretire(struct loinfo *li, struct lobucket *table, uint_t size)
499 {
500 	struct lo_retired_ht *lrhp;
501 
502 	lrhp = kmem_alloc(sizeof (*lrhp), KM_SLEEP);
503 	lrhp->lrh_table = table;
504 	lrhp->lrh_size = size;
505 
506 	mutex_enter(&li->li_htlock);
507 	lrhp->lrh_next = li->li_retired;
508 	li->li_retired = lrhp;
509 	mutex_exit(&li->li_htlock);
510 }
511 
512 /*
513  * Grow the hashtable.
514  */
515 static void
516 lgrow(struct loinfo *li, uint_t newsize)
517 {
518 	uint_t oldsize;
519 	uint_t i;
520 	struct lobucket *oldtable, *newtable;
521 
522 	/*
523 	 * It's OK to not have enough memory to resize the hashtable.
524 	 * We'll go down this path the next time we add something to the
525 	 * table, and retry the allocation then.
526 	 */
527 	if ((newtable = kmem_zalloc(newsize * sizeof (*li->li_hashtable),
528 	    KM_NOSLEEP)) == NULL)
529 		return;
530 
531 	mutex_enter(&li->li_htlock);
532 	if (newsize <= li->li_htsize) {
533 		mutex_exit(&li->li_htlock);
534 		kmem_free(newtable, newsize * sizeof (*li->li_hashtable));
535 		return;
536 	}
537 	oldsize = li->li_htsize;
538 	oldtable = li->li_hashtable;
539 
540 	/*
541 	 * Grab all locks so TABLE_LOCK_ENTER() calls block until the
542 	 * resize is complete.
543 	 */
544 	for (i = 0; i < oldsize; i++)
545 		mutex_enter(&oldtable[i].lh_lock);
546 	/*
547 	 * li->li_hashtable gets set before li->li_htsize, so in the
548 	 * time between the two assignments, callers of
549 	 * TABLE_LOCK_ENTER() cannot hash to a bucket beyond oldsize,
550 	 * hence we only need to grab the locks up to oldsize.
551 	 */
552 	for (i = 0; i < oldsize; i++)
553 		mutex_enter(&newtable[i].lh_lock);
554 	/*
555 	 * Rehash.
556 	 */
557 	for (i = 0; i < oldsize; i++) {
558 		lnode_t *tlp, *nlp;
559 
560 		for (tlp = oldtable[i].lh_chain; tlp != NULL; tlp = nlp) {
561 			uint_t hash = ltablehash(tlp->lo_vp, newsize);
562 
563 			nlp = tlp->lo_next;
564 			tlp->lo_next = newtable[hash].lh_chain;
565 			newtable[hash].lh_chain = tlp;
566 			newtable[hash].lh_count++;
567 		}
568 	}
569 
570 	/*
571 	 * As soon as we store the new hashtable, future locking operations
572 	 * will use it.  Therefore, we must ensure that all the state we've
573 	 * just established reaches global visibility before the new hashtable
574 	 * does.
575 	 */
576 	membar_producer();
577 	li->li_hashtable = newtable;
578 
579 	/*
580 	 * table_lock_enter() relies on the fact that li->li_hashtable
581 	 * is set to its new value before li->li_htsize.
582 	 */
583 	membar_producer();
584 	li->li_htsize = newsize;
585 
586 	/*
587 	 * The new state is consistent now, so we can drop all the locks.
588 	 */
589 	for (i = 0; i < oldsize; i++) {
590 		mutex_exit(&newtable[i].lh_lock);
591 		mutex_exit(&oldtable[i].lh_lock);
592 	}
593 	mutex_exit(&li->li_htlock);
594 
595 	lretire(li, oldtable, oldsize);
596 }
597 
598 /*
599  * Put a lnode in the table
600  */
601 static void
602 lsave(lnode_t *lp, struct loinfo *li)
603 {
604 	ASSERT(lp->lo_vp);
605 	ASSERT(MUTEX_HELD(TABLE_LOCK(lp->lo_vp, li)));
606 
607 #ifdef LODEBUG
608 	lo_dprint(4, "lsave lp %p hash %d\n",
609 			lp, ltablehash(lp->lo_vp, li));
610 #endif
611 
612 	TABLE_COUNT(lp->lo_vp, li)++;
613 	lp->lo_next = TABLE_BUCKET(lp->lo_vp, li);
614 	TABLE_BUCKET(lp->lo_vp, li) = lp;
615 
616 	if (li->li_refct > (li->li_htsize << lo_resize_threshold)) {
617 		TABLE_LOCK_EXIT(lp->lo_vp, li);
618 		lgrow(li, li->li_htsize << lo_resize_factor);
619 		TABLE_LOCK_ENTER(lp->lo_vp, li);
620 	}
621 }
622 
623 /*
624  * Our version of vfs_rele() that stops at 1 instead of 0, and calls
625  * freelfsnode() instead of kmem_free().
626  */
627 static void
628 lfs_rele(struct lfsnode *lfs, struct loinfo *li)
629 {
630 	vfs_t *vfsp = &lfs->lfs_vfs;
631 
632 	ASSERT(MUTEX_HELD(&li->li_lfslock));
633 	ASSERT(vfsp->vfs_count > 1);
634 	if (atomic_add_32_nv(&vfsp->vfs_count, -1) == 1)
635 		freelfsnode(lfs, li);
636 }
637 
638 /*
639  * Remove a lnode from the table
640  */
641 void
642 freelonode(lnode_t *lp)
643 {
644 	lnode_t *lt;
645 	lnode_t *ltprev = NULL;
646 	struct lfsnode *lfs, *nextlfs;
647 	struct vfs *vfsp;
648 	struct vnode *vp = ltov(lp);
649 	struct vnode *realvp = realvp(vp);
650 	struct loinfo *li = vtoli(vp->v_vfsp);
651 
652 #ifdef LODEBUG
653 	lo_dprint(4, "freelonode lp %p hash %d\n",
654 			lp, ltablehash(lp->lo_vp, li));
655 #endif
656 	TABLE_LOCK_ENTER(lp->lo_vp, li);
657 
658 	mutex_enter(&vp->v_lock);
659 	if (vp->v_count > 1) {
660 		vp->v_count--;	/* release our hold from vn_rele */
661 		mutex_exit(&vp->v_lock);
662 		TABLE_LOCK_EXIT(lp->lo_vp, li);
663 		return;
664 	}
665 	mutex_exit(&vp->v_lock);
666 
667 	for (lt = TABLE_BUCKET(lp->lo_vp, li); lt != NULL;
668 	    ltprev = lt, lt = lt->lo_next) {
669 		if (lt == lp) {
670 #ifdef LODEBUG
671 			lo_dprint(4, "freeing %p, vfsp %p\n",
672 					vp, vp->v_vfsp);
673 #endif
674 			atomic_add_32(&li->li_refct, -1);
675 			vfsp = vp->v_vfsp;
676 			vn_invalid(vp);
677 			if (vfsp != li->li_mountvfs) {
678 				mutex_enter(&li->li_lfslock);
679 				/*
680 				 * Check for unused lfs
681 				 */
682 				lfs = li->li_lfs;
683 				while (lfs != NULL) {
684 					nextlfs = lfs->lfs_next;
685 					if (vfsp == &lfs->lfs_vfs) {
686 						lfs_rele(lfs, li);
687 						break;
688 					}
689 					if (lfs->lfs_vfs.vfs_count == 1) {
690 						/*
691 						 * Lfs is idle
692 						 */
693 						freelfsnode(lfs, li);
694 					}
695 					lfs = nextlfs;
696 				}
697 				mutex_exit(&li->li_lfslock);
698 			}
699 			if (ltprev == NULL) {
700 				TABLE_BUCKET(lt->lo_vp, li) = lt->lo_next;
701 			} else {
702 				ltprev->lo_next = lt->lo_next;
703 			}
704 			TABLE_COUNT(lt->lo_vp, li)--;
705 			TABLE_LOCK_EXIT(lt->lo_vp, li);
706 			kmem_cache_free(lnode_cache, lt);
707 			vn_free(vp);
708 			VN_RELE(realvp);
709 			return;
710 		}
711 	}
712 	panic("freelonode");
713 	/*NOTREACHED*/
714 }
715 
716 /*
717  * Lookup a lnode by vp
718  */
719 static lnode_t *
720 lfind(struct vnode *vp, struct loinfo *li)
721 {
722 	lnode_t *lt;
723 
724 	ASSERT(MUTEX_HELD(TABLE_LOCK(vp, li)));
725 
726 	lt = TABLE_BUCKET(vp, li);
727 	while (lt != NULL) {
728 		if (lt->lo_vp == vp) {
729 			VN_HOLD(ltov(lt));
730 			return (lt);
731 		}
732 		lt = lt->lo_next;
733 	}
734 	return (NULL);
735 }
736 
737 #ifdef	LODEBUG
738 static int lofsdebug;
739 #endif	/* LODEBUG */
740 
741 /*
742  * Utilities used by both client and server
743  * Standard levels:
744  * 0) no debugging
745  * 1) hard failures
746  * 2) soft failures
747  * 3) current test software
748  * 4) main procedure entry points
749  * 5) main procedure exit points
750  * 6) utility procedure entry points
751  * 7) utility procedure exit points
752  * 8) obscure procedure entry points
753  * 9) obscure procedure exit points
754  * 10) random stuff
755  * 11) all <= 1
756  * 12) all <= 2
757  * 13) all <= 3
758  * ...
759  */
760 
761 #ifdef LODEBUG
762 /*VARARGS2*/
763 lo_dprint(level, str, a1, a2, a3, a4, a5, a6, a7, a8, a9)
764 	int level;
765 	char *str;
766 	int a1, a2, a3, a4, a5, a6, a7, a8, a9;
767 {
768 
769 	if (lofsdebug == level || (lofsdebug > 10 && (lofsdebug - 10) >= level))
770 		printf(str, a1, a2, a3, a4, a5, a6, a7, a8, a9);
771 }
772 #endif
773