xref: /illumos-gate/usr/src/uts/common/fs/smbclnt/smbfs/smbfs_subr2.c (revision d8a7fe16f62711cdc5c4267da8b34ff24a6b668c)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  *
25  *	Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
26  *	All rights reserved.
27  */
28 
29 /*
30  * Node hash implementation initially borrowed from NFS (nfs_subr.c)
31  * but then heavily modified. It's no longer an array of hash lists,
32  * but an AVL tree per mount point.  More on this below.
33  */
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/time.h>
38 #include <sys/vnode.h>
39 #include <sys/bitmap.h>
40 #include <sys/dnlc.h>
41 #include <sys/kmem.h>
42 #include <sys/sunddi.h>
43 #include <sys/sysmacros.h>
44 
45 #include <netsmb/smb_osdep.h>
46 
47 #include <netsmb/smb.h>
48 #include <netsmb/smb_conn.h>
49 #include <netsmb/smb_subr.h>
50 #include <netsmb/smb_rq.h>
51 
52 #include <smbfs/smbfs.h>
53 #include <smbfs/smbfs_node.h>
54 #include <smbfs/smbfs_subr.h>
55 
56 /*
57  * The AVL trees (now per-mount) allow finding an smbfs node by its
58  * full remote path name.  It also allows easy traversal of all nodes
59  * below (path wise) any given node.  A reader/writer lock for each
60  * (per mount) AVL tree is used to control access and to synchronize
61  * lookups, additions, and deletions from that AVL tree.
62  *
63  * Previously, this code use a global array of hash chains, each with
64  * its own rwlock.  A few struct members, functions, and comments may
65  * still refer to a "hash", and those should all now be considered to
66  * refer to the per-mount AVL tree that replaced the old hash chains.
67  * (i.e. member smi_hash_lk, function sn_hashfind, etc.)
68  *
69  * The smbnode freelist is organized as a doubly linked list with
70  * a head pointer.  Additions and deletions are synchronized via
71  * a single mutex.
72  *
73  * In order to add an smbnode to the free list, it must be linked into
74  * the mount's AVL tree and the exclusive lock for the AVL must be held.
75  * If an smbnode is not linked into the AVL tree, then it is destroyed
76  * because it represents no valuable information that can be reused
77  * about the file.  The exclusive lock for the AVL tree must be held
78  * in order to prevent a lookup in the AVL tree from finding the
79  * smbnode and using it and assuming that the smbnode is not on the
80  * freelist.  The lookup in the AVL tree will have the AVL tree lock
81  * held, either exclusive or shared.
82  *
83  * The vnode reference count for each smbnode is not allowed to drop
84  * below 1.  This prevents external entities, such as the VM
85  * subsystem, from acquiring references to vnodes already on the
86  * freelist and then trying to place them back on the freelist
87  * when their reference is released.  This means that the when an
88  * smbnode is looked up in the AVL tree, then either the smbnode
89  * is removed from the freelist and that reference is tranfered to
90  * the new reference or the vnode reference count must be incremented
91  * accordingly.  The mutex for the freelist must be held in order to
92  * accurately test to see if the smbnode is on the freelist or not.
93  * The AVL tree lock might be held shared and it is possible that
94  * two different threads may race to remove the smbnode from the
95  * freelist.  This race can be resolved by holding the mutex for the
96  * freelist.  Please note that the mutex for the freelist does not
97  * need to held if the smbnode is not on the freelist.  It can not be
98  * placed on the freelist due to the requirement that the thread
99  * putting the smbnode on the freelist must hold the exclusive lock
100  * for the AVL tree and the thread doing the lookup in the AVL tree
101  * is holding either a shared or exclusive lock for the AVL tree.
102  *
103  * The lock ordering is:
104  *
105  *	AVL tree lock -> vnode lock
106  *	AVL tree lock -> freelist lock
107  */
108 
109 static kmutex_t smbfreelist_lock;
110 static smbnode_t *smbfreelist = NULL;
111 static ulong_t	smbnodenew = 0;
112 long	nsmbnode = 0;
113 
114 static struct kmem_cache *smbnode_cache;
115 
116 /*
117  * Mutex to protect the following variables:
118  *	smbfs_major
119  *	smbfs_minor
120  */
121 kmutex_t smbfs_minor_lock;
122 int smbfs_major;
123 int smbfs_minor;
124 
125 /* See smbfs_node_findcreate() */
126 struct smbfattr smbfs_fattr0;
127 
128 /*
129  * Local functions.
130  * SN for Smb Node
131  */
132 static void sn_rmfree(smbnode_t *);
133 static void sn_inactive(smbnode_t *);
134 static void sn_addhash_locked(smbnode_t *, avl_index_t);
135 static void sn_rmhash_locked(smbnode_t *);
136 static void sn_destroy_node(smbnode_t *);
137 void smbfs_kmem_reclaim(void *cdrarg);
138 
139 static smbnode_t *
140 sn_hashfind(smbmntinfo_t *, const char *, int, avl_index_t *);
141 
142 static smbnode_t *
143 make_smbnode(smbmntinfo_t *, const char *, int, int *);
144 
145 /*
146  * Free the resources associated with an smbnode.
147  * Note: This is different from smbfs_inactive
148  *
149  * NFS: nfs_subr.c:rinactive
150  */
151 static void
152 sn_inactive(smbnode_t *np)
153 {
154 	cred_t		*oldcr;
155 	char 		*orpath;
156 	int		orplen;
157 
158 	/*
159 	 * Flush and invalidate all pages (todo)
160 	 * Free any held credentials and caches...
161 	 * etc.  (See NFS code)
162 	 */
163 	mutex_enter(&np->r_statelock);
164 
165 	oldcr = np->r_cred;
166 	np->r_cred = NULL;
167 
168 	orpath = np->n_rpath;
169 	orplen = np->n_rplen;
170 	np->n_rpath = NULL;
171 	np->n_rplen = 0;
172 
173 	mutex_exit(&np->r_statelock);
174 
175 	if (oldcr != NULL)
176 		crfree(oldcr);
177 
178 	if (orpath != NULL)
179 		kmem_free(orpath, orplen + 1);
180 }
181 
182 /*
183  * Find and optionally create an smbnode for the passed
184  * mountinfo, directory, separator, and name.  If the
185  * desired smbnode already exists, return a reference.
186  * If the file attributes pointer is non-null, the node
187  * is created if necessary and linked into the AVL tree.
188  *
189  * Callers that need a node created but don't have the
190  * real attributes pass smbfs_fattr0 to force creation.
191  *
192  * Note: make_smbnode() may upgrade the "hash" lock to exclusive.
193  *
194  * NFS: nfs_subr.c:makenfsnode
195  */
196 smbnode_t *
197 smbfs_node_findcreate(
198 	smbmntinfo_t *mi,
199 	const char *dirnm,
200 	int dirlen,
201 	const char *name,
202 	int nmlen,
203 	char sep,
204 	struct smbfattr *fap)
205 {
206 	char tmpbuf[256];
207 	size_t rpalloc;
208 	char *p, *rpath;
209 	int rplen;
210 	smbnode_t *np;
211 	vnode_t *vp;
212 	int newnode;
213 
214 	/*
215 	 * Build the search string, either in tmpbuf or
216 	 * in allocated memory if larger than tmpbuf.
217 	 */
218 	rplen = dirlen;
219 	if (sep != '\0')
220 		rplen++;
221 	rplen += nmlen;
222 	if (rplen < sizeof (tmpbuf)) {
223 		/* use tmpbuf */
224 		rpalloc = 0;
225 		rpath = tmpbuf;
226 	} else {
227 		rpalloc = rplen + 1;
228 		rpath = kmem_alloc(rpalloc, KM_SLEEP);
229 	}
230 	p = rpath;
231 	bcopy(dirnm, p, dirlen);
232 	p += dirlen;
233 	if (sep != '\0')
234 		*p++ = sep;
235 	if (name != NULL) {
236 		bcopy(name, p, nmlen);
237 		p += nmlen;
238 	}
239 	ASSERT(p == rpath + rplen);
240 
241 	/*
242 	 * Find or create a node with this path.
243 	 */
244 	rw_enter(&mi->smi_hash_lk, RW_READER);
245 	if (fap == NULL)
246 		np = sn_hashfind(mi, rpath, rplen, NULL);
247 	else
248 		np = make_smbnode(mi, rpath, rplen, &newnode);
249 	rw_exit(&mi->smi_hash_lk);
250 
251 	if (rpalloc)
252 		kmem_free(rpath, rpalloc);
253 
254 	if (fap == NULL) {
255 		/*
256 		 * Caller is "just looking" (no create)
257 		 * so np may or may not be NULL here.
258 		 * Either way, we're done.
259 		 */
260 		return (np);
261 	}
262 
263 	/*
264 	 * We should have a node, possibly created.
265 	 * Do we have (real) attributes to apply?
266 	 */
267 	ASSERT(np != NULL);
268 	if (fap == &smbfs_fattr0)
269 		return (np);
270 
271 	/*
272 	 * Apply the given attributes to this node,
273 	 * dealing with any cache impact, etc.
274 	 */
275 	vp = SMBTOV(np);
276 	if (!newnode) {
277 		/*
278 		 * Found an existing node.
279 		 * Maybe purge caches...
280 		 */
281 		smbfs_cache_check(vp, fap);
282 	}
283 	smbfs_attrcache_fa(vp, fap);
284 
285 	/*
286 	 * Note NFS sets vp->v_type here, assuming it
287 	 * can never change for the life of a node.
288 	 * We allow v_type to change, and set it in
289 	 * smbfs_attrcache().  Also: mode, uid, gid
290 	 */
291 	return (np);
292 }
293 
294 /*
295  * NFS: nfs_subr.c:rtablehash
296  * We use smbfs_hash().
297  */
298 
299 /*
300  * Find or create an smbnode.
301  * NFS: nfs_subr.c:make_rnode
302  */
303 static smbnode_t *
304 make_smbnode(
305 	smbmntinfo_t *mi,
306 	const char *rpath,
307 	int rplen,
308 	int *newnode)
309 {
310 	smbnode_t *np;
311 	smbnode_t *tnp;
312 	vnode_t *vp;
313 	vfs_t *vfsp;
314 	avl_index_t where;
315 	char *new_rpath = NULL;
316 
317 	ASSERT(RW_READ_HELD(&mi->smi_hash_lk));
318 	vfsp = mi->smi_vfsp;
319 
320 start:
321 	np = sn_hashfind(mi, rpath, rplen, NULL);
322 	if (np != NULL) {
323 		*newnode = 0;
324 		return (np);
325 	}
326 
327 	/* Note: will retake this lock below. */
328 	rw_exit(&mi->smi_hash_lk);
329 
330 	/*
331 	 * see if we can find something on the freelist
332 	 */
333 	mutex_enter(&smbfreelist_lock);
334 	if (smbfreelist != NULL && smbnodenew >= nsmbnode) {
335 		np = smbfreelist;
336 		sn_rmfree(np);
337 		mutex_exit(&smbfreelist_lock);
338 
339 		vp = SMBTOV(np);
340 
341 		if (np->r_flags & RHASHED) {
342 			smbmntinfo_t *tmp_mi = np->n_mount;
343 			ASSERT(tmp_mi != NULL);
344 			rw_enter(&tmp_mi->smi_hash_lk, RW_WRITER);
345 			mutex_enter(&vp->v_lock);
346 			if (vp->v_count > 1) {
347 				vp->v_count--;
348 				mutex_exit(&vp->v_lock);
349 				rw_exit(&tmp_mi->smi_hash_lk);
350 				/* start over */
351 				rw_enter(&mi->smi_hash_lk, RW_READER);
352 				goto start;
353 			}
354 			mutex_exit(&vp->v_lock);
355 			sn_rmhash_locked(np);
356 			rw_exit(&tmp_mi->smi_hash_lk);
357 		}
358 
359 		sn_inactive(np);
360 
361 		mutex_enter(&vp->v_lock);
362 		if (vp->v_count > 1) {
363 			vp->v_count--;
364 			mutex_exit(&vp->v_lock);
365 			rw_enter(&mi->smi_hash_lk, RW_READER);
366 			goto start;
367 		}
368 		mutex_exit(&vp->v_lock);
369 		vn_invalid(vp);
370 		/*
371 		 * destroy old locks before bzero'ing and
372 		 * recreating the locks below.
373 		 */
374 		smbfs_rw_destroy(&np->r_rwlock);
375 		smbfs_rw_destroy(&np->r_lkserlock);
376 		mutex_destroy(&np->r_statelock);
377 		cv_destroy(&np->r_cv);
378 		/*
379 		 * Make sure that if smbnode is recycled then
380 		 * VFS count is decremented properly before
381 		 * reuse.
382 		 */
383 		VFS_RELE(vp->v_vfsp);
384 		vn_reinit(vp);
385 	} else {
386 		/*
387 		 * allocate and initialize a new smbnode
388 		 */
389 		vnode_t *new_vp;
390 
391 		mutex_exit(&smbfreelist_lock);
392 
393 		np = kmem_cache_alloc(smbnode_cache, KM_SLEEP);
394 		new_vp = vn_alloc(KM_SLEEP);
395 
396 		atomic_add_long((ulong_t *)&smbnodenew, 1);
397 		vp = new_vp;
398 	}
399 
400 	/*
401 	 * Allocate and copy the rpath we'll need below.
402 	 */
403 	new_rpath = kmem_alloc(rplen + 1, KM_SLEEP);
404 	bcopy(rpath, new_rpath, rplen);
405 	new_rpath[rplen] = '\0';
406 
407 	/* Initialize smbnode_t */
408 	bzero(np, sizeof (*np));
409 
410 	smbfs_rw_init(&np->r_rwlock, NULL, RW_DEFAULT, NULL);
411 	smbfs_rw_init(&np->r_lkserlock, NULL, RW_DEFAULT, NULL);
412 	mutex_init(&np->r_statelock, NULL, MUTEX_DEFAULT, NULL);
413 	cv_init(&np->r_cv, NULL, CV_DEFAULT, NULL);
414 	/* cv_init(&np->r_commit.c_cv, NULL, CV_DEFAULT, NULL); */
415 
416 	np->r_vnode = vp;
417 	np->n_mount = mi;
418 
419 	np->n_fid = SMB_FID_UNUSED;
420 	np->n_uid = mi->smi_uid;
421 	np->n_gid = mi->smi_gid;
422 	/* Leave attributes "stale." */
423 
424 #if 0 /* XXX dircache */
425 	/*
426 	 * We don't know if it's a directory yet.
427 	 * Let the caller do this?  XXX
428 	 */
429 	avl_create(&np->r_dir, compar, sizeof (rddir_cache),
430 	    offsetof(rddir_cache, tree));
431 #endif
432 
433 	/* Now fill in the vnode. */
434 	vn_setops(vp, smbfs_vnodeops);
435 	vp->v_data = (caddr_t)np;
436 	VFS_HOLD(vfsp);
437 	vp->v_vfsp = vfsp;
438 	vp->v_type = VNON;
439 
440 	/*
441 	 * We entered with mi->smi_hash_lk held (reader).
442 	 * Retake it now, (as the writer).
443 	 * Will return with it held.
444 	 */
445 	rw_enter(&mi->smi_hash_lk, RW_WRITER);
446 
447 	/*
448 	 * There is a race condition where someone else
449 	 * may alloc the smbnode while no locks are held,
450 	 * so check again and recover if found.
451 	 */
452 	tnp = sn_hashfind(mi, rpath, rplen, &where);
453 	if (tnp != NULL) {
454 		/*
455 		 * Lost the race.  Put the node we were building
456 		 * on the free list and return the one we found.
457 		 */
458 		rw_exit(&mi->smi_hash_lk);
459 		kmem_free(new_rpath, rplen + 1);
460 		smbfs_addfree(np);
461 		rw_enter(&mi->smi_hash_lk, RW_READER);
462 		*newnode = 0;
463 		return (tnp);
464 	}
465 
466 	/*
467 	 * Hash search identifies nodes by the remote path
468 	 * (n_rpath) so fill that in now, before linking
469 	 * this node into the node cache (AVL tree).
470 	 */
471 	np->n_rpath = new_rpath;
472 	np->n_rplen = rplen;
473 	np->n_ino = smbfs_gethash(new_rpath, rplen);
474 
475 	sn_addhash_locked(np, where);
476 	*newnode = 1;
477 	return (np);
478 }
479 
480 /*
481  * smbfs_addfree
482  * Put an smbnode on the free list, or destroy it immediately
483  * if it offers no value were it to be reclaimed later.  Also
484  * destroy immediately when we have too many smbnodes, etc.
485  *
486  * Normally called by smbfs_inactive, but also
487  * called in here during cleanup operations.
488  *
489  * NFS: nfs_subr.c:rp_addfree
490  */
491 void
492 smbfs_addfree(smbnode_t *np)
493 {
494 	vnode_t *vp;
495 	struct vfs *vfsp;
496 	smbmntinfo_t *mi;
497 
498 	ASSERT(np->r_freef == NULL && np->r_freeb == NULL);
499 
500 	vp = SMBTOV(np);
501 	ASSERT(vp->v_count >= 1);
502 
503 	vfsp = vp->v_vfsp;
504 	mi = VFTOSMI(vfsp);
505 
506 	/*
507 	 * If there are no more references to this smbnode and:
508 	 * we have too many smbnodes allocated, or if the node
509 	 * is no longer accessible via the AVL tree (!RHASHED),
510 	 * or an i/o error occurred while writing to the file,
511 	 * or it's part of an unmounted FS, then try to destroy
512 	 * it instead of putting it on the smbnode freelist.
513 	 */
514 	if (np->r_count == 0 && (
515 	    (np->r_flags & RHASHED) == 0 ||
516 	    (np->r_error != 0) ||
517 	    (vfsp->vfs_flag & VFS_UNMOUNTED) ||
518 	    (smbnodenew > nsmbnode))) {
519 
520 		/* Try to destroy this node. */
521 
522 		if (np->r_flags & RHASHED) {
523 			rw_enter(&mi->smi_hash_lk, RW_WRITER);
524 			mutex_enter(&vp->v_lock);
525 			if (vp->v_count > 1) {
526 				vp->v_count--;
527 				mutex_exit(&vp->v_lock);
528 				rw_exit(&mi->smi_hash_lk);
529 				return;
530 				/*
531 				 * Will get another call later,
532 				 * via smbfs_inactive.
533 				 */
534 			}
535 			mutex_exit(&vp->v_lock);
536 			sn_rmhash_locked(np);
537 			rw_exit(&mi->smi_hash_lk);
538 		}
539 
540 		sn_inactive(np);
541 
542 		/*
543 		 * Recheck the vnode reference count.  We need to
544 		 * make sure that another reference has not been
545 		 * acquired while we were not holding v_lock.  The
546 		 * smbnode is not in the smbnode "hash" AVL tree, so
547 		 * the only way for a reference to have been acquired
548 		 * is for a VOP_PUTPAGE because the smbnode was marked
549 		 * with RDIRTY or for a modified page.  This vnode
550 		 * reference may have been acquired before our call
551 		 * to sn_inactive.  The i/o may have been completed,
552 		 * thus allowing sn_inactive to complete, but the
553 		 * reference to the vnode may not have been released
554 		 * yet.  In any case, the smbnode can not be destroyed
555 		 * until the other references to this vnode have been
556 		 * released.  The other references will take care of
557 		 * either destroying the smbnode or placing it on the
558 		 * smbnode freelist.  If there are no other references,
559 		 * then the smbnode may be safely destroyed.
560 		 */
561 		mutex_enter(&vp->v_lock);
562 		if (vp->v_count > 1) {
563 			vp->v_count--;
564 			mutex_exit(&vp->v_lock);
565 			return;
566 		}
567 		mutex_exit(&vp->v_lock);
568 
569 		sn_destroy_node(np);
570 		return;
571 	}
572 
573 	/*
574 	 * Lock the AVL tree and then recheck the reference count
575 	 * to ensure that no other threads have acquired a reference
576 	 * to indicate that the smbnode should not be placed on the
577 	 * freelist.  If another reference has been acquired, then
578 	 * just release this one and let the other thread complete
579 	 * the processing of adding this smbnode to the freelist.
580 	 */
581 	rw_enter(&mi->smi_hash_lk, RW_WRITER);
582 
583 	mutex_enter(&vp->v_lock);
584 	if (vp->v_count > 1) {
585 		vp->v_count--;
586 		mutex_exit(&vp->v_lock);
587 		rw_exit(&mi->smi_hash_lk);
588 		return;
589 	}
590 	mutex_exit(&vp->v_lock);
591 
592 	/*
593 	 * Put this node on the free list.
594 	 */
595 	mutex_enter(&smbfreelist_lock);
596 	if (smbfreelist == NULL) {
597 		np->r_freef = np;
598 		np->r_freeb = np;
599 		smbfreelist = np;
600 	} else {
601 		np->r_freef = smbfreelist;
602 		np->r_freeb = smbfreelist->r_freeb;
603 		smbfreelist->r_freeb->r_freef = np;
604 		smbfreelist->r_freeb = np;
605 	}
606 	mutex_exit(&smbfreelist_lock);
607 
608 	rw_exit(&mi->smi_hash_lk);
609 }
610 
611 /*
612  * Remove an smbnode from the free list.
613  *
614  * The caller must be holding smbfreelist_lock and the smbnode
615  * must be on the freelist.
616  *
617  * NFS: nfs_subr.c:rp_rmfree
618  */
619 static void
620 sn_rmfree(smbnode_t *np)
621 {
622 
623 	ASSERT(MUTEX_HELD(&smbfreelist_lock));
624 	ASSERT(np->r_freef != NULL && np->r_freeb != NULL);
625 
626 	if (np == smbfreelist) {
627 		smbfreelist = np->r_freef;
628 		if (np == smbfreelist)
629 			smbfreelist = NULL;
630 	}
631 
632 	np->r_freeb->r_freef = np->r_freef;
633 	np->r_freef->r_freeb = np->r_freeb;
634 
635 	np->r_freef = np->r_freeb = NULL;
636 }
637 
638 /*
639  * Put an smbnode in the "hash" AVL tree.
640  *
641  * The caller must be hold the rwlock as writer.
642  *
643  * NFS: nfs_subr.c:rp_addhash
644  */
645 static void
646 sn_addhash_locked(smbnode_t *np, avl_index_t where)
647 {
648 	smbmntinfo_t *mi = np->n_mount;
649 
650 	ASSERT(RW_WRITE_HELD(&mi->smi_hash_lk));
651 	ASSERT(!(np->r_flags & RHASHED));
652 
653 	avl_insert(&mi->smi_hash_avl, np, where);
654 
655 	mutex_enter(&np->r_statelock);
656 	np->r_flags |= RHASHED;
657 	mutex_exit(&np->r_statelock);
658 }
659 
660 /*
661  * Remove an smbnode from the "hash" AVL tree.
662  *
663  * The caller must hold the rwlock as writer.
664  *
665  * NFS: nfs_subr.c:rp_rmhash_locked
666  */
667 static void
668 sn_rmhash_locked(smbnode_t *np)
669 {
670 	smbmntinfo_t *mi = np->n_mount;
671 
672 	ASSERT(RW_WRITE_HELD(&mi->smi_hash_lk));
673 	ASSERT(np->r_flags & RHASHED);
674 
675 	avl_remove(&mi->smi_hash_avl, np);
676 
677 	mutex_enter(&np->r_statelock);
678 	np->r_flags &= ~RHASHED;
679 	mutex_exit(&np->r_statelock);
680 }
681 
682 /*
683  * Remove an smbnode from the "hash" AVL tree.
684  *
685  * The caller must not be holding the rwlock.
686  */
687 void
688 smbfs_rmhash(smbnode_t *np)
689 {
690 	smbmntinfo_t *mi = np->n_mount;
691 
692 	rw_enter(&mi->smi_hash_lk, RW_WRITER);
693 	sn_rmhash_locked(np);
694 	rw_exit(&mi->smi_hash_lk);
695 }
696 
697 /*
698  * Lookup an smbnode by remote pathname
699  *
700  * The caller must be holding the AVL rwlock, either shared or exclusive.
701  *
702  * NFS: nfs_subr.c:rfind
703  */
704 static smbnode_t *
705 sn_hashfind(
706 	smbmntinfo_t *mi,
707 	const char *rpath,
708 	int rplen,
709 	avl_index_t *pwhere) /* optional */
710 {
711 	smbfs_node_hdr_t nhdr;
712 	smbnode_t *np;
713 	vnode_t *vp;
714 
715 	ASSERT(RW_LOCK_HELD(&mi->smi_hash_lk));
716 
717 	bzero(&nhdr, sizeof (nhdr));
718 	nhdr.hdr_n_rpath = (char *)rpath;
719 	nhdr.hdr_n_rplen = rplen;
720 
721 	/* See smbfs_node_cmp below. */
722 	np = avl_find(&mi->smi_hash_avl, &nhdr, pwhere);
723 
724 	if (np == NULL)
725 		return (NULL);
726 
727 	/*
728 	 * Found it in the "hash" AVL tree.
729 	 * Remove from free list, if necessary.
730 	 */
731 	vp = SMBTOV(np);
732 	if (np->r_freef != NULL) {
733 		mutex_enter(&smbfreelist_lock);
734 		/*
735 		 * If the smbnode is on the freelist,
736 		 * then remove it and use that reference
737 		 * as the new reference.  Otherwise,
738 		 * need to increment the reference count.
739 		 */
740 		if (np->r_freef != NULL) {
741 			sn_rmfree(np);
742 			mutex_exit(&smbfreelist_lock);
743 		} else {
744 			mutex_exit(&smbfreelist_lock);
745 			VN_HOLD(vp);
746 		}
747 	} else
748 		VN_HOLD(vp);
749 
750 	return (np);
751 }
752 
753 static int
754 smbfs_node_cmp(const void *va, const void *vb)
755 {
756 	const smbfs_node_hdr_t *a = va;
757 	const smbfs_node_hdr_t *b = vb;
758 	int clen, diff;
759 
760 	/*
761 	 * Same semantics as strcmp, but does not
762 	 * assume the strings are null terminated.
763 	 */
764 	clen = (a->hdr_n_rplen < b->hdr_n_rplen) ?
765 	    a->hdr_n_rplen : b->hdr_n_rplen;
766 	diff = strncmp(a->hdr_n_rpath, b->hdr_n_rpath, clen);
767 	if (diff < 0)
768 		return (-1);
769 	if (diff > 0)
770 		return (1);
771 	/* they match through clen */
772 	if (b->hdr_n_rplen > clen)
773 		return (-1);
774 	if (a->hdr_n_rplen > clen)
775 		return (1);
776 	return (0);
777 }
778 
779 /*
780  * Setup the "hash" AVL tree used for our node cache.
781  * See: smbfs_mount, smbfs_destroy_table.
782  */
783 void
784 smbfs_init_hash_avl(avl_tree_t *avl)
785 {
786 	avl_create(avl, smbfs_node_cmp, sizeof (smbnode_t),
787 	    offsetof(smbnode_t, r_avl_node));
788 }
789 
790 /*
791  * Invalidate the cached attributes for all nodes "under" the
792  * passed-in node.  Note: the passed-in node is NOT affected by
793  * this call.  This is used both for files under some directory
794  * after the directory is deleted or renamed, and for extended
795  * attribute files (named streams) under a plain file after that
796  * file is renamed or deleted.
797  *
798  * Do this by walking the AVL tree starting at the passed in node,
799  * and continuing while the visited nodes have a path prefix matching
800  * the entire path of the passed-in node, and a separator just after
801  * that matching path prefix.  Watch out for cases where the AVL tree
802  * order may not exactly match the order of an FS walk, i.e.
803  * consider this sequence:
804  *	"foo"		(directory)
805  *	"foo bar"	(name containing a space)
806  *	"foo/bar"
807  * The walk needs to skip "foo bar" and keep going until it finds
808  * something that doesn't match the "foo" name prefix.
809  */
810 void
811 smbfs_attrcache_prune(smbnode_t *top_np)
812 {
813 	smbmntinfo_t *mi;
814 	smbnode_t *np;
815 	char *rpath;
816 	int rplen;
817 
818 	mi = top_np->n_mount;
819 	rw_enter(&mi->smi_hash_lk, RW_READER);
820 
821 	np = top_np;
822 	rpath = top_np->n_rpath;
823 	rplen = top_np->n_rplen;
824 	for (;;) {
825 		np = avl_walk(&mi->smi_hash_avl, np, AVL_AFTER);
826 		if (np == NULL)
827 			break;
828 		if (np->n_rplen < rplen)
829 			break;
830 		if (0 != strncmp(np->n_rpath, rpath, rplen))
831 			break;
832 		if (np->n_rplen > rplen && (
833 		    np->n_rpath[rplen] == ':' ||
834 		    np->n_rpath[rplen] == '\\'))
835 			smbfs_attrcache_remove(np);
836 	}
837 
838 	rw_exit(&mi->smi_hash_lk);
839 }
840 
841 #ifdef SMB_VNODE_DEBUG
842 int smbfs_check_table_debug = 1;
843 #else /* SMB_VNODE_DEBUG */
844 int smbfs_check_table_debug = 0;
845 #endif /* SMB_VNODE_DEBUG */
846 
847 
848 /*
849  * Return 1 if there is a active vnode belonging to this vfs in the
850  * smbnode cache.
851  *
852  * Several of these checks are done without holding the usual
853  * locks.  This is safe because destroy_smbtable(), smbfs_addfree(),
854  * etc. will redo the necessary checks before actually destroying
855  * any smbnodes.
856  *
857  * NFS: nfs_subr.c:check_rtable
858  *
859  * Debugging changes here relative to NFS.
860  * Relatively harmless, so left 'em in.
861  */
862 int
863 smbfs_check_table(struct vfs *vfsp, smbnode_t *rtnp)
864 {
865 	smbmntinfo_t *mi;
866 	smbnode_t *np;
867 	vnode_t *vp;
868 	int busycnt = 0;
869 
870 	mi = VFTOSMI(vfsp);
871 	rw_enter(&mi->smi_hash_lk, RW_READER);
872 	for (np = avl_first(&mi->smi_hash_avl); np != NULL;
873 	    np = avl_walk(&mi->smi_hash_avl, np, AVL_AFTER)) {
874 
875 		if (np == rtnp)
876 			continue; /* skip the root */
877 		vp = SMBTOV(np);
878 
879 		/* Now the 'busy' checks: */
880 		/* Not on the free list? */
881 		if (np->r_freef == NULL) {
882 			SMBVDEBUG("!r_freef: node=0x%p, rpath=%s\n",
883 			    (void *)np, np->n_rpath);
884 			busycnt++;
885 		}
886 
887 		/* Has dirty pages? */
888 		if (vn_has_cached_data(vp) &&
889 		    (np->r_flags & RDIRTY)) {
890 			SMBVDEBUG("is dirty: node=0x%p, rpath=%s\n",
891 			    (void *)np, np->n_rpath);
892 			busycnt++;
893 		}
894 
895 		/* Other refs? (not reflected in v_count) */
896 		if (np->r_count > 0) {
897 			SMBVDEBUG("+r_count: node=0x%p, rpath=%s\n",
898 			    (void *)np, np->n_rpath);
899 			busycnt++;
900 		}
901 
902 		if (busycnt && !smbfs_check_table_debug)
903 			break;
904 
905 	}
906 	rw_exit(&mi->smi_hash_lk);
907 
908 	return (busycnt);
909 }
910 
911 /*
912  * Destroy inactive vnodes from the AVL tree which belong to this
913  * vfs.  It is essential that we destroy all inactive vnodes during a
914  * forced unmount as well as during a normal unmount.
915  *
916  * NFS: nfs_subr.c:destroy_rtable
917  *
918  * In here, we're normally destrying all or most of the AVL tree,
919  * so the natural choice is to use avl_destroy_nodes.  However,
920  * there may be a few busy nodes that should remain in the AVL
921  * tree when we're done.  The solution: use a temporary tree to
922  * hold the busy nodes until we're done destroying the old tree,
923  * then copy the temporary tree over the (now emtpy) real tree.
924  */
925 void
926 smbfs_destroy_table(struct vfs *vfsp)
927 {
928 	avl_tree_t tmp_avl;
929 	smbmntinfo_t *mi;
930 	smbnode_t *np;
931 	smbnode_t *rlist;
932 	void *v;
933 
934 	mi = VFTOSMI(vfsp);
935 	rlist = NULL;
936 	smbfs_init_hash_avl(&tmp_avl);
937 
938 	rw_enter(&mi->smi_hash_lk, RW_WRITER);
939 	v = NULL;
940 	while ((np = avl_destroy_nodes(&mi->smi_hash_avl, &v)) != NULL) {
941 
942 		mutex_enter(&smbfreelist_lock);
943 		if (np->r_freef == NULL) {
944 			/*
945 			 * Busy node (not on the free list).
946 			 * Will keep in the final AVL tree.
947 			 */
948 			mutex_exit(&smbfreelist_lock);
949 			avl_add(&tmp_avl, np);
950 		} else {
951 			/*
952 			 * It's on the free list.  Remove and
953 			 * arrange for it to be destroyed.
954 			 */
955 			sn_rmfree(np);
956 			mutex_exit(&smbfreelist_lock);
957 
958 			/*
959 			 * Last part of sn_rmhash_locked().
960 			 * NB: avl_destroy_nodes has already
961 			 * removed this from the "hash" AVL.
962 			 */
963 			mutex_enter(&np->r_statelock);
964 			np->r_flags &= ~RHASHED;
965 			mutex_exit(&np->r_statelock);
966 
967 			/*
968 			 * Add to the list of nodes to destroy.
969 			 * Borrowing avl_child[0] for this list.
970 			 */
971 			np->r_avl_node.avl_child[0] =
972 			    (struct avl_node *)rlist;
973 			rlist = np;
974 		}
975 	}
976 	avl_destroy(&mi->smi_hash_avl);
977 
978 	/*
979 	 * Replace the (now destroyed) "hash" AVL with the
980 	 * temporary AVL, which restores the busy nodes.
981 	 */
982 	mi->smi_hash_avl = tmp_avl;
983 	rw_exit(&mi->smi_hash_lk);
984 
985 	/*
986 	 * Now destroy the nodes on our temporary list (rlist).
987 	 * This call to smbfs_addfree will end up destroying the
988 	 * smbnode, but in a safe way with the appropriate set
989 	 * of checks done.
990 	 */
991 	while ((np = rlist) != NULL) {
992 		rlist = (smbnode_t *)np->r_avl_node.avl_child[0];
993 		smbfs_addfree(np);
994 	}
995 }
996 
997 /*
998  * This routine destroys all the resources associated with the smbnode
999  * and then the smbnode itself.  Note: sn_inactive has been called.
1000  *
1001  * NFS: nfs_subr.c:destroy_rnode
1002  */
1003 static void
1004 sn_destroy_node(smbnode_t *np)
1005 {
1006 	vnode_t *vp;
1007 	vfs_t *vfsp;
1008 
1009 	vp = SMBTOV(np);
1010 	vfsp = vp->v_vfsp;
1011 
1012 	ASSERT(vp->v_count == 1);
1013 	ASSERT(np->r_count == 0);
1014 	ASSERT(np->r_mapcnt == 0);
1015 	ASSERT(np->r_cred == NULL);
1016 	ASSERT(np->n_rpath == NULL);
1017 	ASSERT(!(np->r_flags & RHASHED));
1018 	ASSERT(np->r_freef == NULL && np->r_freeb == NULL);
1019 	atomic_add_long((ulong_t *)&smbnodenew, -1);
1020 	vn_invalid(vp);
1021 	vn_free(vp);
1022 	kmem_cache_free(smbnode_cache, np);
1023 	VFS_RELE(vfsp);
1024 }
1025 
1026 /*
1027  * Flush all vnodes in this (or every) vfs.
1028  * Used by nfs_sync and by nfs_unmount.
1029  */
1030 /*ARGSUSED*/
1031 void
1032 smbfs_rflush(struct vfs *vfsp, cred_t *cr)
1033 {
1034 	/* Todo: mmap support. */
1035 }
1036 
1037 /* access cache */
1038 /* client handles */
1039 
1040 /*
1041  * initialize resources that are used by smbfs_subr.c
1042  * this is called from the _init() routine (by the way of smbfs_clntinit())
1043  *
1044  * NFS: nfs_subr.c:nfs_subrinit
1045  */
1046 int
1047 smbfs_subrinit(void)
1048 {
1049 	ulong_t nsmbnode_max;
1050 
1051 	/*
1052 	 * Allocate and initialize the smbnode cache
1053 	 */
1054 	if (nsmbnode <= 0)
1055 		nsmbnode = ncsize; /* dnlc.h */
1056 	nsmbnode_max = (ulong_t)((kmem_maxavail() >> 2) /
1057 	    sizeof (struct smbnode));
1058 	if (nsmbnode > nsmbnode_max || (nsmbnode == 0 && ncsize == 0)) {
1059 		zcmn_err(GLOBAL_ZONEID, CE_NOTE,
1060 		    "setting nsmbnode to max value of %ld", nsmbnode_max);
1061 		nsmbnode = nsmbnode_max;
1062 	}
1063 
1064 	smbnode_cache = kmem_cache_create("smbnode_cache", sizeof (smbnode_t),
1065 	    0, NULL, NULL, smbfs_kmem_reclaim, NULL, NULL, 0);
1066 
1067 	/*
1068 	 * Initialize the various mutexes and reader/writer locks
1069 	 */
1070 	mutex_init(&smbfreelist_lock, NULL, MUTEX_DEFAULT, NULL);
1071 	mutex_init(&smbfs_minor_lock, NULL, MUTEX_DEFAULT, NULL);
1072 
1073 	/*
1074 	 * Assign unique major number for all smbfs mounts
1075 	 */
1076 	if ((smbfs_major = getudev()) == -1) {
1077 		zcmn_err(GLOBAL_ZONEID, CE_WARN,
1078 		    "smbfs: init: can't get unique device number");
1079 		smbfs_major = 0;
1080 	}
1081 	smbfs_minor = 0;
1082 
1083 	return (0);
1084 }
1085 
1086 /*
1087  * free smbfs hash table, etc.
1088  * NFS: nfs_subr.c:nfs_subrfini
1089  */
1090 void
1091 smbfs_subrfini(void)
1092 {
1093 
1094 	/*
1095 	 * Destroy the smbnode cache
1096 	 */
1097 	kmem_cache_destroy(smbnode_cache);
1098 
1099 	/*
1100 	 * Destroy the various mutexes and reader/writer locks
1101 	 */
1102 	mutex_destroy(&smbfreelist_lock);
1103 	mutex_destroy(&smbfs_minor_lock);
1104 }
1105 
1106 /* rddir_cache ? */
1107 
1108 /*
1109  * Support functions for smbfs_kmem_reclaim
1110  */
1111 
1112 static void
1113 smbfs_node_reclaim(void)
1114 {
1115 	smbmntinfo_t *mi;
1116 	smbnode_t *np;
1117 	vnode_t *vp;
1118 
1119 	mutex_enter(&smbfreelist_lock);
1120 	while ((np = smbfreelist) != NULL) {
1121 		sn_rmfree(np);
1122 		mutex_exit(&smbfreelist_lock);
1123 		if (np->r_flags & RHASHED) {
1124 			vp = SMBTOV(np);
1125 			mi = np->n_mount;
1126 			rw_enter(&mi->smi_hash_lk, RW_WRITER);
1127 			mutex_enter(&vp->v_lock);
1128 			if (vp->v_count > 1) {
1129 				vp->v_count--;
1130 				mutex_exit(&vp->v_lock);
1131 				rw_exit(&mi->smi_hash_lk);
1132 				mutex_enter(&smbfreelist_lock);
1133 				continue;
1134 			}
1135 			mutex_exit(&vp->v_lock);
1136 			sn_rmhash_locked(np);
1137 			rw_exit(&mi->smi_hash_lk);
1138 		}
1139 		/*
1140 		 * This call to smbfs_addfree will end up destroying the
1141 		 * smbnode, but in a safe way with the appropriate set
1142 		 * of checks done.
1143 		 */
1144 		smbfs_addfree(np);
1145 		mutex_enter(&smbfreelist_lock);
1146 	}
1147 	mutex_exit(&smbfreelist_lock);
1148 }
1149 
1150 /*
1151  * Called by kmem_cache_alloc ask us if we could
1152  * "Please give back some memory!"
1153  *
1154  * Todo: dump nodes from the free list?
1155  */
1156 /*ARGSUSED*/
1157 void
1158 smbfs_kmem_reclaim(void *cdrarg)
1159 {
1160 	smbfs_node_reclaim();
1161 }
1162 
1163 /* nfs failover stuff */
1164 /* nfs_rw_xxx - see smbfs_rwlock.c */
1165