xref: /illumos-gate/usr/src/uts/common/fs/smbclnt/smbfs/smbfs_subr2.c (revision 5f4fc069910e2cd7c469b9aaaa5bec453a954029)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  *
25  *	Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
26  *	All rights reserved.
27  */
28 /*
29  * Copyright (c) 2017 by Delphix. All rights reserved.
30  */
31 
32 /*
33  * Node hash implementation initially borrowed from NFS (nfs_subr.c)
34  * but then heavily modified. It's no longer an array of hash lists,
35  * but an AVL tree per mount point.  More on this below.
36  */
37 
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/time.h>
41 #include <sys/vnode.h>
42 #include <sys/bitmap.h>
43 #include <sys/dnlc.h>
44 #include <sys/kmem.h>
45 #include <sys/sunddi.h>
46 #include <sys/sysmacros.h>
47 #include <sys/fcntl.h>
48 
49 #include <netsmb/smb_osdep.h>
50 
51 #include <netsmb/smb.h>
52 #include <netsmb/smb_conn.h>
53 #include <netsmb/smb_subr.h>
54 #include <netsmb/smb_rq.h>
55 
56 #include <smbfs/smbfs.h>
57 #include <smbfs/smbfs_node.h>
58 #include <smbfs/smbfs_subr.h>
59 
60 /*
61  * The AVL trees (now per-mount) allow finding an smbfs node by its
62  * full remote path name.  It also allows easy traversal of all nodes
63  * below (path wise) any given node.  A reader/writer lock for each
64  * (per mount) AVL tree is used to control access and to synchronize
65  * lookups, additions, and deletions from that AVL tree.
66  *
67  * Previously, this code use a global array of hash chains, each with
68  * its own rwlock.  A few struct members, functions, and comments may
69  * still refer to a "hash", and those should all now be considered to
70  * refer to the per-mount AVL tree that replaced the old hash chains.
71  * (i.e. member smi_hash_lk, function sn_hashfind, etc.)
72  *
73  * The smbnode freelist is organized as a doubly linked list with
74  * a head pointer.  Additions and deletions are synchronized via
75  * a single mutex.
76  *
77  * In order to add an smbnode to the free list, it must be linked into
78  * the mount's AVL tree and the exclusive lock for the AVL must be held.
79  * If an smbnode is not linked into the AVL tree, then it is destroyed
80  * because it represents no valuable information that can be reused
81  * about the file.  The exclusive lock for the AVL tree must be held
82  * in order to prevent a lookup in the AVL tree from finding the
83  * smbnode and using it and assuming that the smbnode is not on the
84  * freelist.  The lookup in the AVL tree will have the AVL tree lock
85  * held, either exclusive or shared.
86  *
87  * The vnode reference count for each smbnode is not allowed to drop
88  * below 1.  This prevents external entities, such as the VM
89  * subsystem, from acquiring references to vnodes already on the
90  * freelist and then trying to place them back on the freelist
91  * when their reference is released.  This means that the when an
92  * smbnode is looked up in the AVL tree, then either the smbnode
93  * is removed from the freelist and that reference is tranfered to
94  * the new reference or the vnode reference count must be incremented
95  * accordingly.  The mutex for the freelist must be held in order to
96  * accurately test to see if the smbnode is on the freelist or not.
97  * The AVL tree lock might be held shared and it is possible that
98  * two different threads may race to remove the smbnode from the
99  * freelist.  This race can be resolved by holding the mutex for the
100  * freelist.  Please note that the mutex for the freelist does not
101  * need to held if the smbnode is not on the freelist.  It can not be
102  * placed on the freelist due to the requirement that the thread
103  * putting the smbnode on the freelist must hold the exclusive lock
104  * for the AVL tree and the thread doing the lookup in the AVL tree
105  * is holding either a shared or exclusive lock for the AVL tree.
106  *
107  * The lock ordering is:
108  *
109  *	AVL tree lock -> vnode lock
110  *	AVL tree lock -> freelist lock
111  */
112 
113 static kmutex_t smbfreelist_lock;
114 static smbnode_t *smbfreelist = NULL;
115 static ulong_t	smbnodenew = 0;
116 long	nsmbnode = 0;
117 
118 static struct kmem_cache *smbnode_cache;
119 
120 static const vsecattr_t smbfs_vsa0 = { 0 };
121 
122 /*
123  * Mutex to protect the following variables:
124  *	smbfs_major
125  *	smbfs_minor
126  */
127 kmutex_t smbfs_minor_lock;
128 int smbfs_major;
129 int smbfs_minor;
130 
131 /* See smbfs_node_findcreate() */
132 struct smbfattr smbfs_fattr0;
133 
134 /*
135  * Local functions.
136  * SN for Smb Node
137  */
138 static void sn_rmfree(smbnode_t *);
139 static void sn_inactive(smbnode_t *);
140 static void sn_addhash_locked(smbnode_t *, avl_index_t);
141 static void sn_rmhash_locked(smbnode_t *);
142 static void sn_destroy_node(smbnode_t *);
143 void smbfs_kmem_reclaim(void *cdrarg);
144 
145 static smbnode_t *
146 sn_hashfind(smbmntinfo_t *, const char *, int, avl_index_t *);
147 
148 static smbnode_t *
149 make_smbnode(smbmntinfo_t *, const char *, int, int *);
150 
151 /*
152  * Free the resources associated with an smbnode.
153  * Note: This is different from smbfs_inactive
154  *
155  * From NFS: nfs_subr.c:rinactive
156  */
157 static void
158 sn_inactive(smbnode_t *np)
159 {
160 	vsecattr_t	ovsa;
161 	cred_t		*oldcr;
162 	char		*orpath;
163 	int		orplen;
164 	vnode_t		*vp;
165 
166 	/*
167 	 * Here NFS has:
168 	 * Flush and invalidate all pages (done by caller)
169 	 * Free any held credentials and caches...
170 	 * etc.  (See NFS code)
171 	 */
172 	mutex_enter(&np->r_statelock);
173 
174 	ovsa = np->r_secattr;
175 	np->r_secattr = smbfs_vsa0;
176 	np->r_sectime = 0;
177 
178 	oldcr = np->r_cred;
179 	np->r_cred = NULL;
180 
181 	orpath = np->n_rpath;
182 	orplen = np->n_rplen;
183 	np->n_rpath = NULL;
184 	np->n_rplen = 0;
185 
186 	mutex_exit(&np->r_statelock);
187 
188 	vp = SMBTOV(np);
189 	if (vn_has_cached_data(vp)) {
190 		ASSERT3P(vp,==,NULL);
191 	}
192 
193 	if (ovsa.vsa_aclentp != NULL)
194 		kmem_free(ovsa.vsa_aclentp, ovsa.vsa_aclentsz);
195 
196 	if (oldcr != NULL)
197 		crfree(oldcr);
198 
199 	if (orpath != NULL)
200 		kmem_free(orpath, orplen + 1);
201 }
202 
203 /*
204  * Find and optionally create an smbnode for the passed
205  * mountinfo, directory, separator, and name.  If the
206  * desired smbnode already exists, return a reference.
207  * If the file attributes pointer is non-null, the node
208  * is created if necessary and linked into the AVL tree.
209  *
210  * Callers that need a node created but don't have the
211  * real attributes pass smbfs_fattr0 to force creation.
212  *
213  * Note: make_smbnode() may upgrade the "hash" lock to exclusive.
214  *
215  * Based on NFS: nfs_subr.c:makenfsnode
216  */
217 smbnode_t *
218 smbfs_node_findcreate(
219 	smbmntinfo_t *mi,
220 	const char *dirnm,
221 	int dirlen,
222 	const char *name,
223 	int nmlen,
224 	char sep,
225 	struct smbfattr *fap)
226 {
227 	char tmpbuf[256];
228 	size_t rpalloc;
229 	char *p, *rpath;
230 	int rplen;
231 	smbnode_t *np;
232 	vnode_t *vp;
233 	int newnode;
234 
235 	/*
236 	 * Build the search string, either in tmpbuf or
237 	 * in allocated memory if larger than tmpbuf.
238 	 */
239 	rplen = dirlen;
240 	if (sep != '\0')
241 		rplen++;
242 	rplen += nmlen;
243 	if (rplen < sizeof (tmpbuf)) {
244 		/* use tmpbuf */
245 		rpalloc = 0;
246 		rpath = tmpbuf;
247 	} else {
248 		rpalloc = rplen + 1;
249 		rpath = kmem_alloc(rpalloc, KM_SLEEP);
250 	}
251 	p = rpath;
252 	bcopy(dirnm, p, dirlen);
253 	p += dirlen;
254 	if (sep != '\0')
255 		*p++ = sep;
256 	if (name != NULL) {
257 		bcopy(name, p, nmlen);
258 		p += nmlen;
259 	}
260 	ASSERT(p == rpath + rplen);
261 
262 	/*
263 	 * Find or create a node with this path.
264 	 */
265 	rw_enter(&mi->smi_hash_lk, RW_READER);
266 	if (fap == NULL)
267 		np = sn_hashfind(mi, rpath, rplen, NULL);
268 	else
269 		np = make_smbnode(mi, rpath, rplen, &newnode);
270 	rw_exit(&mi->smi_hash_lk);
271 
272 	if (rpalloc)
273 		kmem_free(rpath, rpalloc);
274 
275 	if (fap == NULL) {
276 		/*
277 		 * Caller is "just looking" (no create)
278 		 * so np may or may not be NULL here.
279 		 * Either way, we're done.
280 		 */
281 		return (np);
282 	}
283 
284 	/*
285 	 * We should have a node, possibly created.
286 	 * Do we have (real) attributes to apply?
287 	 */
288 	ASSERT(np != NULL);
289 	if (fap == &smbfs_fattr0)
290 		return (np);
291 
292 	/*
293 	 * Apply the given attributes to this node,
294 	 * dealing with any cache impact, etc.
295 	 */
296 	vp = SMBTOV(np);
297 	smbfs_attrcache_fa(vp, fap);
298 
299 	/*
300 	 * Note NFS sets vp->v_type here, assuming it
301 	 * can never change for the life of a node.
302 	 * We allow v_type to change, and set it in
303 	 * smbfs_attrcache().  Also: mode, uid, gid
304 	 */
305 	return (np);
306 }
307 
308 /*
309  * Here NFS has: nfs_subr.c:rtablehash
310  * We use smbfs_hash().
311  */
312 
313 /*
314  * Find or create an smbnode.
315  * From NFS: nfs_subr.c:make_rnode
316  */
317 static smbnode_t *
318 make_smbnode(
319 	smbmntinfo_t *mi,
320 	const char *rpath,
321 	int rplen,
322 	int *newnode)
323 {
324 	smbnode_t *np;
325 	smbnode_t *tnp;
326 	vnode_t *vp;
327 	vfs_t *vfsp;
328 	avl_index_t where;
329 	char *new_rpath = NULL;
330 
331 	ASSERT(RW_READ_HELD(&mi->smi_hash_lk));
332 	vfsp = mi->smi_vfsp;
333 
334 start:
335 	np = sn_hashfind(mi, rpath, rplen, NULL);
336 	if (np != NULL) {
337 		*newnode = 0;
338 		return (np);
339 	}
340 
341 	/* Note: will retake this lock below. */
342 	rw_exit(&mi->smi_hash_lk);
343 
344 	/*
345 	 * see if we can find something on the freelist
346 	 */
347 	mutex_enter(&smbfreelist_lock);
348 	if (smbfreelist != NULL && smbnodenew >= nsmbnode) {
349 		np = smbfreelist;
350 		sn_rmfree(np);
351 		mutex_exit(&smbfreelist_lock);
352 
353 		vp = SMBTOV(np);
354 
355 		if (np->r_flags & RHASHED) {
356 			smbmntinfo_t *tmp_mi = np->n_mount;
357 			ASSERT(tmp_mi != NULL);
358 			rw_enter(&tmp_mi->smi_hash_lk, RW_WRITER);
359 			mutex_enter(&vp->v_lock);
360 			if (vp->v_count > 1) {
361 				VN_RELE_LOCKED(vp);
362 				mutex_exit(&vp->v_lock);
363 				rw_exit(&tmp_mi->smi_hash_lk);
364 				/* start over */
365 				rw_enter(&mi->smi_hash_lk, RW_READER);
366 				goto start;
367 			}
368 			mutex_exit(&vp->v_lock);
369 			sn_rmhash_locked(np);
370 			rw_exit(&tmp_mi->smi_hash_lk);
371 		}
372 
373 		sn_inactive(np);
374 
375 		mutex_enter(&vp->v_lock);
376 		if (vp->v_count > 1) {
377 			VN_RELE_LOCKED(vp);
378 			mutex_exit(&vp->v_lock);
379 			rw_enter(&mi->smi_hash_lk, RW_READER);
380 			goto start;
381 		}
382 		mutex_exit(&vp->v_lock);
383 		vn_invalid(vp);
384 		/*
385 		 * destroy old locks before bzero'ing and
386 		 * recreating the locks below.
387 		 */
388 		smbfs_rw_destroy(&np->r_rwlock);
389 		smbfs_rw_destroy(&np->r_lkserlock);
390 		mutex_destroy(&np->r_statelock);
391 		cv_destroy(&np->r_cv);
392 		/*
393 		 * Make sure that if smbnode is recycled then
394 		 * VFS count is decremented properly before
395 		 * reuse.
396 		 */
397 		VFS_RELE(vp->v_vfsp);
398 		vn_reinit(vp);
399 	} else {
400 		/*
401 		 * allocate and initialize a new smbnode
402 		 */
403 		vnode_t *new_vp;
404 
405 		mutex_exit(&smbfreelist_lock);
406 
407 		np = kmem_cache_alloc(smbnode_cache, KM_SLEEP);
408 		new_vp = vn_alloc(KM_SLEEP);
409 
410 		atomic_inc_ulong((ulong_t *)&smbnodenew);
411 		vp = new_vp;
412 	}
413 
414 	/*
415 	 * Allocate and copy the rpath we'll need below.
416 	 */
417 	new_rpath = kmem_alloc(rplen + 1, KM_SLEEP);
418 	bcopy(rpath, new_rpath, rplen);
419 	new_rpath[rplen] = '\0';
420 
421 	/* Initialize smbnode_t */
422 	bzero(np, sizeof (*np));
423 
424 	smbfs_rw_init(&np->r_rwlock, NULL, RW_DEFAULT, NULL);
425 	smbfs_rw_init(&np->r_lkserlock, NULL, RW_DEFAULT, NULL);
426 	mutex_init(&np->r_statelock, NULL, MUTEX_DEFAULT, NULL);
427 	cv_init(&np->r_cv, NULL, CV_DEFAULT, NULL);
428 	/* cv_init(&np->r_commit.c_cv, NULL, CV_DEFAULT, NULL); */
429 
430 	np->r_vnode = vp;
431 	np->n_mount = mi;
432 
433 	np->n_fid = SMB_FID_UNUSED;
434 	np->n_uid = mi->smi_uid;
435 	np->n_gid = mi->smi_gid;
436 	/* Leave attributes "stale." */
437 
438 	/*
439 	 * Here NFS has avl_create(&np->r_dir, ...)
440 	 * for the readdir cache (not used here).
441 	 */
442 
443 	/* Now fill in the vnode. */
444 	vn_setops(vp, smbfs_vnodeops);
445 	vp->v_data = (caddr_t)np;
446 	VFS_HOLD(vfsp);
447 	vp->v_vfsp = vfsp;
448 	vp->v_type = VNON;
449 
450 	/*
451 	 * We entered with mi->smi_hash_lk held (reader).
452 	 * Retake it now, (as the writer).
453 	 * Will return with it held.
454 	 */
455 	rw_enter(&mi->smi_hash_lk, RW_WRITER);
456 
457 	/*
458 	 * There is a race condition where someone else
459 	 * may alloc the smbnode while no locks are held,
460 	 * so check again and recover if found.
461 	 */
462 	tnp = sn_hashfind(mi, rpath, rplen, &where);
463 	if (tnp != NULL) {
464 		/*
465 		 * Lost the race.  Put the node we were building
466 		 * on the free list and return the one we found.
467 		 */
468 		rw_exit(&mi->smi_hash_lk);
469 		kmem_free(new_rpath, rplen + 1);
470 		smbfs_addfree(np);
471 		rw_enter(&mi->smi_hash_lk, RW_READER);
472 		*newnode = 0;
473 		return (tnp);
474 	}
475 
476 	/*
477 	 * Hash search identifies nodes by the remote path
478 	 * (n_rpath) so fill that in now, before linking
479 	 * this node into the node cache (AVL tree).
480 	 */
481 	np->n_rpath = new_rpath;
482 	np->n_rplen = rplen;
483 	np->n_ino = smbfs_gethash(new_rpath, rplen);
484 
485 	sn_addhash_locked(np, where);
486 	*newnode = 1;
487 	return (np);
488 }
489 
490 /*
491  * smbfs_addfree
492  * Put an smbnode on the free list, or destroy it immediately
493  * if it offers no value were it to be reclaimed later.  Also
494  * destroy immediately when we have too many smbnodes, etc.
495  *
496  * Normally called by smbfs_inactive, but also
497  * called in here during cleanup operations.
498  *
499  * From NFS: nfs_subr.c:rp_addfree
500  */
501 void
502 smbfs_addfree(smbnode_t *np)
503 {
504 	vnode_t *vp;
505 	struct vfs *vfsp;
506 	smbmntinfo_t *mi;
507 
508 	ASSERT(np->r_freef == NULL && np->r_freeb == NULL);
509 
510 	vp = SMBTOV(np);
511 	ASSERT(vp->v_count >= 1);
512 
513 	vfsp = vp->v_vfsp;
514 	mi = VFTOSMI(vfsp);
515 
516 	/*
517 	 * If there are no more references to this smbnode and:
518 	 * we have too many smbnodes allocated, or if the node
519 	 * is no longer accessible via the AVL tree (!RHASHED),
520 	 * or an i/o error occurred while writing to the file,
521 	 * or it's part of an unmounted FS, then try to destroy
522 	 * it instead of putting it on the smbnode freelist.
523 	 */
524 	if (np->r_count == 0 && (
525 	    (np->r_flags & RHASHED) == 0 ||
526 	    (np->r_error != 0) ||
527 	    (vfsp->vfs_flag & VFS_UNMOUNTED) ||
528 	    (smbnodenew > nsmbnode))) {
529 
530 		/* Try to destroy this node. */
531 
532 		if (np->r_flags & RHASHED) {
533 			rw_enter(&mi->smi_hash_lk, RW_WRITER);
534 			mutex_enter(&vp->v_lock);
535 			if (vp->v_count > 1) {
536 				VN_RELE_LOCKED(vp);
537 				mutex_exit(&vp->v_lock);
538 				rw_exit(&mi->smi_hash_lk);
539 				return;
540 				/*
541 				 * Will get another call later,
542 				 * via smbfs_inactive.
543 				 */
544 			}
545 			mutex_exit(&vp->v_lock);
546 			sn_rmhash_locked(np);
547 			rw_exit(&mi->smi_hash_lk);
548 		}
549 
550 		sn_inactive(np);
551 
552 		/*
553 		 * Recheck the vnode reference count.  We need to
554 		 * make sure that another reference has not been
555 		 * acquired while we were not holding v_lock.  The
556 		 * smbnode is not in the smbnode "hash" AVL tree, so
557 		 * the only way for a reference to have been acquired
558 		 * is for a VOP_PUTPAGE because the smbnode was marked
559 		 * with RDIRTY or for a modified page.  This vnode
560 		 * reference may have been acquired before our call
561 		 * to sn_inactive.  The i/o may have been completed,
562 		 * thus allowing sn_inactive to complete, but the
563 		 * reference to the vnode may not have been released
564 		 * yet.  In any case, the smbnode can not be destroyed
565 		 * until the other references to this vnode have been
566 		 * released.  The other references will take care of
567 		 * either destroying the smbnode or placing it on the
568 		 * smbnode freelist.  If there are no other references,
569 		 * then the smbnode may be safely destroyed.
570 		 */
571 		mutex_enter(&vp->v_lock);
572 		if (vp->v_count > 1) {
573 			VN_RELE_LOCKED(vp);
574 			mutex_exit(&vp->v_lock);
575 			return;
576 		}
577 		mutex_exit(&vp->v_lock);
578 
579 		sn_destroy_node(np);
580 		return;
581 	}
582 
583 	/*
584 	 * Lock the AVL tree and then recheck the reference count
585 	 * to ensure that no other threads have acquired a reference
586 	 * to indicate that the smbnode should not be placed on the
587 	 * freelist.  If another reference has been acquired, then
588 	 * just release this one and let the other thread complete
589 	 * the processing of adding this smbnode to the freelist.
590 	 */
591 	rw_enter(&mi->smi_hash_lk, RW_WRITER);
592 
593 	mutex_enter(&vp->v_lock);
594 	if (vp->v_count > 1) {
595 		VN_RELE_LOCKED(vp);
596 		mutex_exit(&vp->v_lock);
597 		rw_exit(&mi->smi_hash_lk);
598 		return;
599 	}
600 	mutex_exit(&vp->v_lock);
601 
602 	/*
603 	 * Put this node on the free list.
604 	 */
605 	mutex_enter(&smbfreelist_lock);
606 	if (smbfreelist == NULL) {
607 		np->r_freef = np;
608 		np->r_freeb = np;
609 		smbfreelist = np;
610 	} else {
611 		np->r_freef = smbfreelist;
612 		np->r_freeb = smbfreelist->r_freeb;
613 		smbfreelist->r_freeb->r_freef = np;
614 		smbfreelist->r_freeb = np;
615 	}
616 	mutex_exit(&smbfreelist_lock);
617 
618 	rw_exit(&mi->smi_hash_lk);
619 }
620 
621 /*
622  * Remove an smbnode from the free list.
623  *
624  * The caller must be holding smbfreelist_lock and the smbnode
625  * must be on the freelist.
626  *
627  * From NFS: nfs_subr.c:rp_rmfree
628  */
629 static void
630 sn_rmfree(smbnode_t *np)
631 {
632 
633 	ASSERT(MUTEX_HELD(&smbfreelist_lock));
634 	ASSERT(np->r_freef != NULL && np->r_freeb != NULL);
635 
636 	if (np == smbfreelist) {
637 		smbfreelist = np->r_freef;
638 		if (np == smbfreelist)
639 			smbfreelist = NULL;
640 	}
641 
642 	np->r_freeb->r_freef = np->r_freef;
643 	np->r_freef->r_freeb = np->r_freeb;
644 
645 	np->r_freef = np->r_freeb = NULL;
646 }
647 
648 /*
649  * Put an smbnode in the "hash" AVL tree.
650  *
651  * The caller must be hold the rwlock as writer.
652  *
653  * From NFS: nfs_subr.c:rp_addhash
654  */
655 static void
656 sn_addhash_locked(smbnode_t *np, avl_index_t where)
657 {
658 	smbmntinfo_t *mi = np->n_mount;
659 
660 	ASSERT(RW_WRITE_HELD(&mi->smi_hash_lk));
661 
662 	mutex_enter(&np->r_statelock);
663 	if ((np->r_flags & RHASHED) == 0) {
664 		avl_insert(&mi->smi_hash_avl, np, where);
665 		np->r_flags |= RHASHED;
666 	}
667 	mutex_exit(&np->r_statelock);
668 }
669 
670 /*
671  * Remove an smbnode from the "hash" AVL tree.
672  *
673  * The caller must hold the rwlock as writer.
674  *
675  * From NFS: nfs_subr.c:rp_rmhash_locked
676  */
677 static void
678 sn_rmhash_locked(smbnode_t *np)
679 {
680 	smbmntinfo_t *mi = np->n_mount;
681 
682 	ASSERT(RW_WRITE_HELD(&mi->smi_hash_lk));
683 
684 	mutex_enter(&np->r_statelock);
685 	if ((np->r_flags & RHASHED) != 0) {
686 		np->r_flags &= ~RHASHED;
687 		avl_remove(&mi->smi_hash_avl, np);
688 	}
689 	mutex_exit(&np->r_statelock);
690 }
691 
692 /*
693  * Remove an smbnode from the "hash" AVL tree.
694  *
695  * The caller must not be holding the rwlock.
696  */
697 void
698 smbfs_rmhash(smbnode_t *np)
699 {
700 	smbmntinfo_t *mi = np->n_mount;
701 
702 	rw_enter(&mi->smi_hash_lk, RW_WRITER);
703 	sn_rmhash_locked(np);
704 	rw_exit(&mi->smi_hash_lk);
705 }
706 
707 /*
708  * Lookup an smbnode by remote pathname
709  *
710  * The caller must be holding the AVL rwlock, either shared or exclusive.
711  *
712  * From NFS: nfs_subr.c:rfind
713  */
714 static smbnode_t *
715 sn_hashfind(
716 	smbmntinfo_t *mi,
717 	const char *rpath,
718 	int rplen,
719 	avl_index_t *pwhere) /* optional */
720 {
721 	smbfs_node_hdr_t nhdr;
722 	smbnode_t *np;
723 	vnode_t *vp;
724 
725 	ASSERT(RW_LOCK_HELD(&mi->smi_hash_lk));
726 
727 	bzero(&nhdr, sizeof (nhdr));
728 	nhdr.hdr_n_rpath = (char *)rpath;
729 	nhdr.hdr_n_rplen = rplen;
730 
731 	/* See smbfs_node_cmp below. */
732 	np = avl_find(&mi->smi_hash_avl, &nhdr, pwhere);
733 
734 	if (np == NULL)
735 		return (NULL);
736 
737 	/*
738 	 * Found it in the "hash" AVL tree.
739 	 * Remove from free list, if necessary.
740 	 */
741 	vp = SMBTOV(np);
742 	if (np->r_freef != NULL) {
743 		mutex_enter(&smbfreelist_lock);
744 		/*
745 		 * If the smbnode is on the freelist,
746 		 * then remove it and use that reference
747 		 * as the new reference.  Otherwise,
748 		 * need to increment the reference count.
749 		 */
750 		if (np->r_freef != NULL) {
751 			sn_rmfree(np);
752 			mutex_exit(&smbfreelist_lock);
753 		} else {
754 			mutex_exit(&smbfreelist_lock);
755 			VN_HOLD(vp);
756 		}
757 	} else
758 		VN_HOLD(vp);
759 
760 	return (np);
761 }
762 
763 static int
764 smbfs_node_cmp(const void *va, const void *vb)
765 {
766 	const smbfs_node_hdr_t *a = va;
767 	const smbfs_node_hdr_t *b = vb;
768 	int clen, diff;
769 
770 	/*
771 	 * Same semantics as strcmp, but does not
772 	 * assume the strings are null terminated.
773 	 */
774 	clen = (a->hdr_n_rplen < b->hdr_n_rplen) ?
775 	    a->hdr_n_rplen : b->hdr_n_rplen;
776 	diff = strncmp(a->hdr_n_rpath, b->hdr_n_rpath, clen);
777 	if (diff < 0)
778 		return (-1);
779 	if (diff > 0)
780 		return (1);
781 	/* they match through clen */
782 	if (b->hdr_n_rplen > clen)
783 		return (-1);
784 	if (a->hdr_n_rplen > clen)
785 		return (1);
786 	return (0);
787 }
788 
789 /*
790  * Setup the "hash" AVL tree used for our node cache.
791  * See: smbfs_mount, smbfs_destroy_table.
792  */
793 void
794 smbfs_init_hash_avl(avl_tree_t *avl)
795 {
796 	avl_create(avl, smbfs_node_cmp, sizeof (smbnode_t),
797 	    offsetof(smbnode_t, r_avl_node));
798 }
799 
800 /*
801  * Invalidate the cached attributes for all nodes "under" the
802  * passed-in node.  Note: the passed-in node is NOT affected by
803  * this call.  This is used both for files under some directory
804  * after the directory is deleted or renamed, and for extended
805  * attribute files (named streams) under a plain file after that
806  * file is renamed or deleted.
807  *
808  * Do this by walking the AVL tree starting at the passed in node,
809  * and continuing while the visited nodes have a path prefix matching
810  * the entire path of the passed-in node, and a separator just after
811  * that matching path prefix.  Watch out for cases where the AVL tree
812  * order may not exactly match the order of an FS walk, i.e.
813  * consider this sequence:
814  *	"foo"		(directory)
815  *	"foo bar"	(name containing a space)
816  *	"foo/bar"
817  * The walk needs to skip "foo bar" and keep going until it finds
818  * something that doesn't match the "foo" name prefix.
819  */
820 void
821 smbfs_attrcache_prune(smbnode_t *top_np)
822 {
823 	smbmntinfo_t *mi;
824 	smbnode_t *np;
825 	char *rpath;
826 	int rplen;
827 
828 	mi = top_np->n_mount;
829 	rw_enter(&mi->smi_hash_lk, RW_READER);
830 
831 	np = top_np;
832 	rpath = top_np->n_rpath;
833 	rplen = top_np->n_rplen;
834 	for (;;) {
835 		np = avl_walk(&mi->smi_hash_avl, np, AVL_AFTER);
836 		if (np == NULL)
837 			break;
838 		if (np->n_rplen < rplen)
839 			break;
840 		if (0 != strncmp(np->n_rpath, rpath, rplen))
841 			break;
842 		if (np->n_rplen > rplen && (
843 		    np->n_rpath[rplen] == ':' ||
844 		    np->n_rpath[rplen] == '\\'))
845 			smbfs_attrcache_remove(np);
846 	}
847 
848 	rw_exit(&mi->smi_hash_lk);
849 }
850 
851 #ifdef SMB_VNODE_DEBUG
852 int smbfs_check_table_debug = 1;
853 #else /* SMB_VNODE_DEBUG */
854 int smbfs_check_table_debug = 0;
855 #endif /* SMB_VNODE_DEBUG */
856 
857 
858 /*
859  * Return 1 if there is a active vnode belonging to this vfs in the
860  * smbnode cache.
861  *
862  * Several of these checks are done without holding the usual
863  * locks.  This is safe because destroy_smbtable(), smbfs_addfree(),
864  * etc. will redo the necessary checks before actually destroying
865  * any smbnodes.
866  *
867  * From NFS: nfs_subr.c:check_rtable
868  *
869  * Debugging changes here relative to NFS.
870  * Relatively harmless, so left 'em in.
871  */
872 int
873 smbfs_check_table(struct vfs *vfsp, smbnode_t *rtnp)
874 {
875 	smbmntinfo_t *mi;
876 	smbnode_t *np;
877 	vnode_t *vp;
878 	int busycnt = 0;
879 
880 	mi = VFTOSMI(vfsp);
881 	rw_enter(&mi->smi_hash_lk, RW_READER);
882 	for (np = avl_first(&mi->smi_hash_avl); np != NULL;
883 	    np = avl_walk(&mi->smi_hash_avl, np, AVL_AFTER)) {
884 
885 		if (np == rtnp)
886 			continue; /* skip the root */
887 		vp = SMBTOV(np);
888 
889 		/* Now the 'busy' checks: */
890 		/* Not on the free list? */
891 		if (np->r_freef == NULL) {
892 			SMBVDEBUG("!r_freef: node=0x%p, rpath=%s\n",
893 			    (void *)np, np->n_rpath);
894 			busycnt++;
895 		}
896 
897 		/* Has dirty pages? */
898 		if (vn_has_cached_data(vp) &&
899 		    (np->r_flags & RDIRTY)) {
900 			SMBVDEBUG("is dirty: node=0x%p, rpath=%s\n",
901 			    (void *)np, np->n_rpath);
902 			busycnt++;
903 		}
904 
905 		/* Other refs? (not reflected in v_count) */
906 		if (np->r_count > 0) {
907 			SMBVDEBUG("+r_count: node=0x%p, rpath=%s\n",
908 			    (void *)np, np->n_rpath);
909 			busycnt++;
910 		}
911 
912 		if (busycnt && !smbfs_check_table_debug)
913 			break;
914 
915 	}
916 	rw_exit(&mi->smi_hash_lk);
917 
918 	return (busycnt);
919 }
920 
921 /*
922  * Destroy inactive vnodes from the AVL tree which belong to this
923  * vfs.  It is essential that we destroy all inactive vnodes during a
924  * forced unmount as well as during a normal unmount.
925  *
926  * Based on NFS: nfs_subr.c:destroy_rtable
927  *
928  * In here, we're normally destrying all or most of the AVL tree,
929  * so the natural choice is to use avl_destroy_nodes.  However,
930  * there may be a few busy nodes that should remain in the AVL
931  * tree when we're done.  The solution: use a temporary tree to
932  * hold the busy nodes until we're done destroying the old tree,
933  * then copy the temporary tree over the (now emtpy) real tree.
934  */
935 void
936 smbfs_destroy_table(struct vfs *vfsp)
937 {
938 	avl_tree_t tmp_avl;
939 	smbmntinfo_t *mi;
940 	smbnode_t *np;
941 	smbnode_t *rlist;
942 	void *v;
943 
944 	mi = VFTOSMI(vfsp);
945 	rlist = NULL;
946 	smbfs_init_hash_avl(&tmp_avl);
947 
948 	rw_enter(&mi->smi_hash_lk, RW_WRITER);
949 	v = NULL;
950 	while ((np = avl_destroy_nodes(&mi->smi_hash_avl, &v)) != NULL) {
951 
952 		mutex_enter(&smbfreelist_lock);
953 		if (np->r_freef == NULL) {
954 			/*
955 			 * Busy node (not on the free list).
956 			 * Will keep in the final AVL tree.
957 			 */
958 			mutex_exit(&smbfreelist_lock);
959 			avl_add(&tmp_avl, np);
960 		} else {
961 			/*
962 			 * It's on the free list.  Remove and
963 			 * arrange for it to be destroyed.
964 			 */
965 			sn_rmfree(np);
966 			mutex_exit(&smbfreelist_lock);
967 
968 			/*
969 			 * Last part of sn_rmhash_locked().
970 			 * NB: avl_destroy_nodes has already
971 			 * removed this from the "hash" AVL.
972 			 */
973 			mutex_enter(&np->r_statelock);
974 			np->r_flags &= ~RHASHED;
975 			mutex_exit(&np->r_statelock);
976 
977 			/*
978 			 * Add to the list of nodes to destroy.
979 			 * Borrowing avl_child[0] for this list.
980 			 */
981 			np->r_avl_node.avl_child[0] =
982 			    (struct avl_node *)rlist;
983 			rlist = np;
984 		}
985 	}
986 	avl_destroy(&mi->smi_hash_avl);
987 
988 	/*
989 	 * Replace the (now destroyed) "hash" AVL with the
990 	 * temporary AVL, which restores the busy nodes.
991 	 */
992 	mi->smi_hash_avl = tmp_avl;
993 	rw_exit(&mi->smi_hash_lk);
994 
995 	/*
996 	 * Now destroy the nodes on our temporary list (rlist).
997 	 * This call to smbfs_addfree will end up destroying the
998 	 * smbnode, but in a safe way with the appropriate set
999 	 * of checks done.
1000 	 */
1001 	while ((np = rlist) != NULL) {
1002 		rlist = (smbnode_t *)np->r_avl_node.avl_child[0];
1003 		smbfs_addfree(np);
1004 	}
1005 }
1006 
1007 /*
1008  * This routine destroys all the resources associated with the smbnode
1009  * and then the smbnode itself.  Note: sn_inactive has been called.
1010  *
1011  * From NFS: nfs_subr.c:destroy_rnode
1012  */
1013 static void
1014 sn_destroy_node(smbnode_t *np)
1015 {
1016 	vnode_t *vp;
1017 	vfs_t *vfsp;
1018 
1019 	vp = SMBTOV(np);
1020 	vfsp = vp->v_vfsp;
1021 
1022 	ASSERT(vp->v_count == 1);
1023 	ASSERT(np->r_count == 0);
1024 	ASSERT(np->r_mapcnt == 0);
1025 	ASSERT(np->r_secattr.vsa_aclentp == NULL);
1026 	ASSERT(np->r_cred == NULL);
1027 	ASSERT(np->n_rpath == NULL);
1028 	ASSERT(!(np->r_flags & RHASHED));
1029 	ASSERT(np->r_freef == NULL && np->r_freeb == NULL);
1030 	atomic_dec_ulong((ulong_t *)&smbnodenew);
1031 	vn_invalid(vp);
1032 	vn_free(vp);
1033 	kmem_cache_free(smbnode_cache, np);
1034 	VFS_RELE(vfsp);
1035 }
1036 
1037 /*
1038  * From NFS rflush()
1039  * Flush all vnodes in this (or every) vfs.
1040  * Used by smbfs_sync and by smbfs_unmount.
1041  */
1042 /*ARGSUSED*/
1043 void
1044 smbfs_rflush(struct vfs *vfsp, cred_t *cr)
1045 {
1046 	smbmntinfo_t *mi;
1047 	smbnode_t *np;
1048 	vnode_t *vp, **vplist;
1049 	long num, cnt;
1050 
1051 	mi = VFTOSMI(vfsp);
1052 
1053 	/*
1054 	 * Check to see whether there is anything to do.
1055 	 */
1056 	num = avl_numnodes(&mi->smi_hash_avl);
1057 	if (num == 0)
1058 		return;
1059 
1060 	/*
1061 	 * Allocate a slot for all currently active rnodes on the
1062 	 * supposition that they all may need flushing.
1063 	 */
1064 	vplist = kmem_alloc(num * sizeof (*vplist), KM_SLEEP);
1065 	cnt = 0;
1066 
1067 	/*
1068 	 * Walk the AVL tree looking for rnodes with page
1069 	 * lists associated with them.  Make a list of these
1070 	 * files.
1071 	 */
1072 	rw_enter(&mi->smi_hash_lk, RW_READER);
1073 	for (np = avl_first(&mi->smi_hash_avl); np != NULL;
1074 	    np = avl_walk(&mi->smi_hash_avl, np, AVL_AFTER)) {
1075 		vp = SMBTOV(np);
1076 		/*
1077 		 * Don't bother sync'ing a vp if it
1078 		 * is part of virtual swap device or
1079 		 * if VFS is read-only
1080 		 */
1081 		if (IS_SWAPVP(vp) || vn_is_readonly(vp))
1082 			continue;
1083 		/*
1084 		 * If the vnode has pages and is marked as either
1085 		 * dirty or mmap'd, hold and add this vnode to the
1086 		 * list of vnodes to flush.
1087 		 */
1088 		if (vn_has_cached_data(vp) &&
1089 		    ((np->r_flags & RDIRTY) || np->r_mapcnt > 0)) {
1090 			VN_HOLD(vp);
1091 			vplist[cnt++] = vp;
1092 			if (cnt == num)
1093 				break;
1094 		}
1095 	}
1096 	rw_exit(&mi->smi_hash_lk);
1097 
1098 	/*
1099 	 * Flush and release all of the files on the list.
1100 	 */
1101 	while (cnt-- > 0) {
1102 		vp = vplist[cnt];
1103 		(void) VOP_PUTPAGE(vp, (u_offset_t)0, 0, B_ASYNC, cr, NULL);
1104 		VN_RELE(vp);
1105 	}
1106 
1107 	kmem_free(vplist, num * sizeof (vnode_t *));
1108 }
1109 
1110 /* Here NFS has access cache stuff (nfs_subr.c) not used here */
1111 
1112 /*
1113  * Set or Clear direct I/O flag
1114  * VOP_RWLOCK() is held for write access to prevent a race condition
1115  * which would occur if a process is in the middle of a write when
1116  * directio flag gets set. It is possible that all pages may not get flushed.
1117  * From nfs_common.c
1118  */
1119 
1120 /* ARGSUSED */
1121 int
1122 smbfs_directio(vnode_t *vp, int cmd, cred_t *cr)
1123 {
1124 	int	error = 0;
1125 	smbnode_t	*np;
1126 
1127 	np = VTOSMB(vp);
1128 
1129 	if (cmd == DIRECTIO_ON) {
1130 
1131 		if (np->r_flags & RDIRECTIO)
1132 			return (0);
1133 
1134 		/*
1135 		 * Flush the page cache.
1136 		 */
1137 
1138 		(void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL);
1139 
1140 		if (np->r_flags & RDIRECTIO) {
1141 			VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
1142 			return (0);
1143 		}
1144 
1145 		/* Here NFS also checks ->r_awcount */
1146 		if (vn_has_cached_data(vp) &&
1147 		    (np->r_flags & RDIRTY) != 0) {
1148 			error = VOP_PUTPAGE(vp, (offset_t)0, (uint_t)0,
1149 			    B_INVAL, cr, NULL);
1150 			if (error) {
1151 				if (error == ENOSPC || error == EDQUOT) {
1152 					mutex_enter(&np->r_statelock);
1153 					if (!np->r_error)
1154 						np->r_error = error;
1155 					mutex_exit(&np->r_statelock);
1156 				}
1157 				VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
1158 				return (error);
1159 			}
1160 		}
1161 
1162 		mutex_enter(&np->r_statelock);
1163 		np->r_flags |= RDIRECTIO;
1164 		mutex_exit(&np->r_statelock);
1165 		VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
1166 		return (0);
1167 	}
1168 
1169 	if (cmd == DIRECTIO_OFF) {
1170 		mutex_enter(&np->r_statelock);
1171 		np->r_flags &= ~RDIRECTIO;	/* disable direct mode */
1172 		mutex_exit(&np->r_statelock);
1173 		return (0);
1174 	}
1175 
1176 	return (EINVAL);
1177 }
1178 
1179 static kmutex_t smbfs_newnum_lock;
1180 static uint32_t smbfs_newnum_val = 0;
1181 
1182 /*
1183  * Return a number 0..0xffffffff that's different from the last
1184  * 0xffffffff numbers this returned.  Used for unlinked files.
1185  * From NFS nfs_subr.c newnum
1186  */
1187 uint32_t
1188 smbfs_newnum(void)
1189 {
1190 	uint32_t id;
1191 
1192 	mutex_enter(&smbfs_newnum_lock);
1193 	if (smbfs_newnum_val == 0)
1194 		smbfs_newnum_val = (uint32_t)gethrestime_sec();
1195 	id = smbfs_newnum_val++;
1196 	mutex_exit(&smbfs_newnum_lock);
1197 	return (id);
1198 }
1199 
1200 /*
1201  * Fill in a temporary name at buf
1202  */
1203 int
1204 smbfs_newname(char *buf, size_t buflen)
1205 {
1206 	uint_t id;
1207 	int n;
1208 
1209 	id = smbfs_newnum();
1210 	n = snprintf(buf, buflen, "~$smbfs%08X", id);
1211 	return (n);
1212 }
1213 
1214 
1215 /*
1216  * initialize resources that are used by smbfs_subr.c
1217  * this is called from the _init() routine (by the way of smbfs_clntinit())
1218  *
1219  * From NFS: nfs_subr.c:nfs_subrinit
1220  */
1221 int
1222 smbfs_subrinit(void)
1223 {
1224 	ulong_t nsmbnode_max;
1225 
1226 	/*
1227 	 * Allocate and initialize the smbnode cache
1228 	 */
1229 	if (nsmbnode <= 0)
1230 		nsmbnode = ncsize; /* dnlc.h */
1231 	nsmbnode_max = (ulong_t)((kmem_maxavail() >> 2) /
1232 	    sizeof (struct smbnode));
1233 	if (nsmbnode > nsmbnode_max || (nsmbnode == 0 && ncsize == 0)) {
1234 		zcmn_err(GLOBAL_ZONEID, CE_NOTE,
1235 		    "setting nsmbnode to max value of %ld", nsmbnode_max);
1236 		nsmbnode = nsmbnode_max;
1237 	}
1238 
1239 	smbnode_cache = kmem_cache_create("smbnode_cache", sizeof (smbnode_t),
1240 	    0, NULL, NULL, smbfs_kmem_reclaim, NULL, NULL, 0);
1241 
1242 	/*
1243 	 * Initialize the various mutexes and reader/writer locks
1244 	 */
1245 	mutex_init(&smbfreelist_lock, NULL, MUTEX_DEFAULT, NULL);
1246 	mutex_init(&smbfs_minor_lock, NULL, MUTEX_DEFAULT, NULL);
1247 
1248 	/*
1249 	 * Assign unique major number for all smbfs mounts
1250 	 */
1251 	if ((smbfs_major = getudev()) == -1) {
1252 		zcmn_err(GLOBAL_ZONEID, CE_WARN,
1253 		    "smbfs: init: can't get unique device number");
1254 		smbfs_major = 0;
1255 	}
1256 	smbfs_minor = 0;
1257 
1258 	return (0);
1259 }
1260 
1261 /*
1262  * free smbfs hash table, etc.
1263  * From NFS: nfs_subr.c:nfs_subrfini
1264  */
1265 void
1266 smbfs_subrfini(void)
1267 {
1268 
1269 	/*
1270 	 * Destroy the smbnode cache
1271 	 */
1272 	kmem_cache_destroy(smbnode_cache);
1273 
1274 	/*
1275 	 * Destroy the various mutexes and reader/writer locks
1276 	 */
1277 	mutex_destroy(&smbfreelist_lock);
1278 	mutex_destroy(&smbfs_minor_lock);
1279 }
1280 
1281 /* rddir_cache ? */
1282 
1283 /*
1284  * Support functions for smbfs_kmem_reclaim
1285  */
1286 
1287 static void
1288 smbfs_node_reclaim(void)
1289 {
1290 	smbmntinfo_t *mi;
1291 	smbnode_t *np;
1292 	vnode_t *vp;
1293 
1294 	mutex_enter(&smbfreelist_lock);
1295 	while ((np = smbfreelist) != NULL) {
1296 		sn_rmfree(np);
1297 		mutex_exit(&smbfreelist_lock);
1298 		if (np->r_flags & RHASHED) {
1299 			vp = SMBTOV(np);
1300 			mi = np->n_mount;
1301 			rw_enter(&mi->smi_hash_lk, RW_WRITER);
1302 			mutex_enter(&vp->v_lock);
1303 			if (vp->v_count > 1) {
1304 				VN_RELE_LOCKED(vp);
1305 				mutex_exit(&vp->v_lock);
1306 				rw_exit(&mi->smi_hash_lk);
1307 				mutex_enter(&smbfreelist_lock);
1308 				continue;
1309 			}
1310 			mutex_exit(&vp->v_lock);
1311 			sn_rmhash_locked(np);
1312 			rw_exit(&mi->smi_hash_lk);
1313 		}
1314 		/*
1315 		 * This call to smbfs_addfree will end up destroying the
1316 		 * smbnode, but in a safe way with the appropriate set
1317 		 * of checks done.
1318 		 */
1319 		smbfs_addfree(np);
1320 		mutex_enter(&smbfreelist_lock);
1321 	}
1322 	mutex_exit(&smbfreelist_lock);
1323 }
1324 
1325 /*
1326  * Called by kmem_cache_alloc ask us if we could
1327  * "Please give back some memory!"
1328  *
1329  * Todo: dump nodes from the free list?
1330  */
1331 /*ARGSUSED*/
1332 void
1333 smbfs_kmem_reclaim(void *cdrarg)
1334 {
1335 	smbfs_node_reclaim();
1336 }
1337 
1338 /*
1339  * Here NFS has failover stuff and
1340  * nfs_rw_xxx - see smbfs_rwlock.c
1341  */
1342