xref: /illumos-gate/usr/src/uts/common/fs/smbclnt/smbfs/smbfs_subr2.c (revision 67d74cc3e7c9d9461311136a0b2069813a3fd927)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  *
25  *	Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
26  *	All rights reserved.
27  */
28 /*
29  * Copyright (c) 2017 by Delphix. All rights reserved.
30  * Copyright 2018 Nexenta Systems, Inc.  All rights reserved.
31  */
32 
33 /*
34  * Node hash implementation initially borrowed from NFS (nfs_subr.c)
35  * but then heavily modified. It's no longer an array of hash lists,
36  * but an AVL tree per mount point.  More on this below.
37  */
38 
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/time.h>
42 #include <sys/vnode.h>
43 #include <sys/atomic.h>
44 #include <sys/bitmap.h>
45 #include <sys/buf.h>
46 #include <sys/dnlc.h>
47 #include <sys/kmem.h>
48 #include <sys/sunddi.h>
49 #include <sys/sysmacros.h>
50 #include <sys/fcntl.h>
51 
52 #include <netsmb/smb_osdep.h>
53 
54 #include <netsmb/smb.h>
55 #include <netsmb/smb_conn.h>
56 #include <netsmb/smb_subr.h>
57 #include <netsmb/smb_rq.h>
58 
59 #include <smbfs/smbfs.h>
60 #include <smbfs/smbfs_node.h>
61 #include <smbfs/smbfs_subr.h>
62 
63 /*
64  * The AVL trees (now per-mount) allow finding an smbfs node by its
65  * full remote path name.  It also allows easy traversal of all nodes
66  * below (path wise) any given node.  A reader/writer lock for each
67  * (per mount) AVL tree is used to control access and to synchronize
68  * lookups, additions, and deletions from that AVL tree.
69  *
70  * Previously, this code use a global array of hash chains, each with
71  * its own rwlock.  A few struct members, functions, and comments may
72  * still refer to a "hash", and those should all now be considered to
73  * refer to the per-mount AVL tree that replaced the old hash chains.
74  * (i.e. member smi_hash_lk, function sn_hashfind, etc.)
75  *
76  * The smbnode freelist is organized as a doubly linked list with
77  * a head pointer.  Additions and deletions are synchronized via
78  * a single mutex.
79  *
80  * In order to add an smbnode to the free list, it must be linked into
81  * the mount's AVL tree and the exclusive lock for the AVL must be held.
82  * If an smbnode is not linked into the AVL tree, then it is destroyed
83  * because it represents no valuable information that can be reused
84  * about the file.  The exclusive lock for the AVL tree must be held
85  * in order to prevent a lookup in the AVL tree from finding the
86  * smbnode and using it and assuming that the smbnode is not on the
87  * freelist.  The lookup in the AVL tree will have the AVL tree lock
88  * held, either exclusive or shared.
89  *
90  * The vnode reference count for each smbnode is not allowed to drop
91  * below 1.  This prevents external entities, such as the VM
92  * subsystem, from acquiring references to vnodes already on the
93  * freelist and then trying to place them back on the freelist
94  * when their reference is released.  This means that the when an
95  * smbnode is looked up in the AVL tree, then either the smbnode
96  * is removed from the freelist and that reference is tranfered to
97  * the new reference or the vnode reference count must be incremented
98  * accordingly.  The mutex for the freelist must be held in order to
99  * accurately test to see if the smbnode is on the freelist or not.
100  * The AVL tree lock might be held shared and it is possible that
101  * two different threads may race to remove the smbnode from the
102  * freelist.  This race can be resolved by holding the mutex for the
103  * freelist.  Please note that the mutex for the freelist does not
104  * need to held if the smbnode is not on the freelist.  It can not be
105  * placed on the freelist due to the requirement that the thread
106  * putting the smbnode on the freelist must hold the exclusive lock
107  * for the AVL tree and the thread doing the lookup in the AVL tree
108  * is holding either a shared or exclusive lock for the AVL tree.
109  *
110  * The lock ordering is:
111  *
112  *	AVL tree lock -> vnode lock
113  *	AVL tree lock -> freelist lock
114  */
115 
116 static kmutex_t smbfreelist_lock;
117 static smbnode_t *smbfreelist = NULL;
118 static ulong_t	smbnodenew = 0;
119 long	nsmbnode = 0;
120 
121 static struct kmem_cache *smbnode_cache;
122 
123 static const vsecattr_t smbfs_vsa0 = { 0 };
124 
125 /*
126  * Mutex to protect the following variables:
127  *	smbfs_major
128  *	smbfs_minor
129  */
130 kmutex_t smbfs_minor_lock;
131 int smbfs_major;
132 int smbfs_minor;
133 
134 /* See smbfs_node_findcreate() */
135 struct smbfattr smbfs_fattr0;
136 
137 /*
138  * Local functions.
139  * SN for Smb Node
140  */
141 static void sn_rmfree(smbnode_t *);
142 static void sn_inactive(smbnode_t *);
143 static void sn_addhash_locked(smbnode_t *, avl_index_t);
144 static void sn_rmhash_locked(smbnode_t *);
145 static void sn_destroy_node(smbnode_t *);
146 void smbfs_kmem_reclaim(void *cdrarg);
147 
148 static smbnode_t *
149 sn_hashfind(smbmntinfo_t *, const char *, int, avl_index_t *);
150 
151 static smbnode_t *
152 make_smbnode(smbmntinfo_t *, const char *, int, int *);
153 
154 /*
155  * Free the resources associated with an smbnode.
156  * Note: This is different from smbfs_inactive
157  *
158  * From NFS: nfs_subr.c:rinactive
159  */
160 static void
161 sn_inactive(smbnode_t *np)
162 {
163 	vsecattr_t	ovsa;
164 	cred_t		*oldcr;
165 	char		*orpath;
166 	int		orplen;
167 	vnode_t		*vp;
168 
169 	/*
170 	 * Here NFS has:
171 	 * Flush and invalidate all pages (done by caller)
172 	 * Free any held credentials and caches...
173 	 * etc.  (See NFS code)
174 	 */
175 	mutex_enter(&np->r_statelock);
176 
177 	ovsa = np->r_secattr;
178 	np->r_secattr = smbfs_vsa0;
179 	np->r_sectime = 0;
180 
181 	oldcr = np->r_cred;
182 	np->r_cred = NULL;
183 
184 	orpath = np->n_rpath;
185 	orplen = np->n_rplen;
186 	np->n_rpath = NULL;
187 	np->n_rplen = 0;
188 
189 	mutex_exit(&np->r_statelock);
190 
191 	vp = SMBTOV(np);
192 	if (vn_has_cached_data(vp)) {
193 		ASSERT3P(vp,==,NULL);
194 	}
195 
196 	if (ovsa.vsa_aclentp != NULL)
197 		kmem_free(ovsa.vsa_aclentp, ovsa.vsa_aclentsz);
198 
199 	if (oldcr != NULL)
200 		crfree(oldcr);
201 
202 	if (orpath != NULL)
203 		kmem_free(orpath, orplen + 1);
204 }
205 
206 /*
207  * Find and optionally create an smbnode for the passed
208  * mountinfo, directory, separator, and name.  If the
209  * desired smbnode already exists, return a reference.
210  * If the file attributes pointer is non-null, the node
211  * is created if necessary and linked into the AVL tree.
212  *
213  * Callers that need a node created but don't have the
214  * real attributes pass smbfs_fattr0 to force creation.
215  *
216  * Note: make_smbnode() may upgrade the "hash" lock to exclusive.
217  *
218  * Based on NFS: nfs_subr.c:makenfsnode
219  */
220 smbnode_t *
221 smbfs_node_findcreate(
222 	smbmntinfo_t *mi,
223 	const char *dirnm,
224 	int dirlen,
225 	const char *name,
226 	int nmlen,
227 	char sep,
228 	struct smbfattr *fap)
229 {
230 	char tmpbuf[256];
231 	size_t rpalloc;
232 	char *p, *rpath;
233 	int rplen;
234 	smbnode_t *np;
235 	vnode_t *vp;
236 	int newnode;
237 
238 	/*
239 	 * Build the search string, either in tmpbuf or
240 	 * in allocated memory if larger than tmpbuf.
241 	 */
242 	rplen = dirlen;
243 	if (sep != '\0')
244 		rplen++;
245 	rplen += nmlen;
246 	if (rplen < sizeof (tmpbuf)) {
247 		/* use tmpbuf */
248 		rpalloc = 0;
249 		rpath = tmpbuf;
250 	} else {
251 		rpalloc = rplen + 1;
252 		rpath = kmem_alloc(rpalloc, KM_SLEEP);
253 	}
254 	p = rpath;
255 	bcopy(dirnm, p, dirlen);
256 	p += dirlen;
257 	if (sep != '\0')
258 		*p++ = sep;
259 	if (name != NULL) {
260 		bcopy(name, p, nmlen);
261 		p += nmlen;
262 	}
263 	ASSERT(p == rpath + rplen);
264 
265 	/*
266 	 * Find or create a node with this path.
267 	 */
268 	rw_enter(&mi->smi_hash_lk, RW_READER);
269 	if (fap == NULL)
270 		np = sn_hashfind(mi, rpath, rplen, NULL);
271 	else
272 		np = make_smbnode(mi, rpath, rplen, &newnode);
273 	rw_exit(&mi->smi_hash_lk);
274 
275 	if (rpalloc)
276 		kmem_free(rpath, rpalloc);
277 
278 	if (fap == NULL) {
279 		/*
280 		 * Caller is "just looking" (no create)
281 		 * so np may or may not be NULL here.
282 		 * Either way, we're done.
283 		 */
284 		return (np);
285 	}
286 
287 	/*
288 	 * We should have a node, possibly created.
289 	 * Do we have (real) attributes to apply?
290 	 */
291 	ASSERT(np != NULL);
292 	if (fap == &smbfs_fattr0)
293 		return (np);
294 
295 	/*
296 	 * Apply the given attributes to this node,
297 	 * dealing with any cache impact, etc.
298 	 */
299 	vp = SMBTOV(np);
300 	smbfs_attrcache_fa(vp, fap);
301 
302 	/*
303 	 * Note NFS sets vp->v_type here, assuming it
304 	 * can never change for the life of a node.
305 	 * We allow v_type to change, and set it in
306 	 * smbfs_attrcache().  Also: mode, uid, gid
307 	 */
308 	return (np);
309 }
310 
311 /*
312  * Here NFS has: nfs_subr.c:rtablehash
313  * We use smbfs_hash().
314  */
315 
316 /*
317  * Find or create an smbnode.
318  * From NFS: nfs_subr.c:make_rnode
319  */
320 static smbnode_t *
321 make_smbnode(
322 	smbmntinfo_t *mi,
323 	const char *rpath,
324 	int rplen,
325 	int *newnode)
326 {
327 	smbnode_t *np;
328 	smbnode_t *tnp;
329 	vnode_t *vp;
330 	vfs_t *vfsp;
331 	avl_index_t where;
332 	char *new_rpath = NULL;
333 
334 	ASSERT(RW_READ_HELD(&mi->smi_hash_lk));
335 	vfsp = mi->smi_vfsp;
336 
337 start:
338 	np = sn_hashfind(mi, rpath, rplen, NULL);
339 	if (np != NULL) {
340 		*newnode = 0;
341 		return (np);
342 	}
343 
344 	/* Note: will retake this lock below. */
345 	rw_exit(&mi->smi_hash_lk);
346 
347 	/*
348 	 * see if we can find something on the freelist
349 	 */
350 	mutex_enter(&smbfreelist_lock);
351 	if (smbfreelist != NULL && smbnodenew >= nsmbnode) {
352 		np = smbfreelist;
353 		sn_rmfree(np);
354 		mutex_exit(&smbfreelist_lock);
355 
356 		vp = SMBTOV(np);
357 
358 		if (np->r_flags & RHASHED) {
359 			smbmntinfo_t *tmp_mi = np->n_mount;
360 			ASSERT(tmp_mi != NULL);
361 			rw_enter(&tmp_mi->smi_hash_lk, RW_WRITER);
362 			mutex_enter(&vp->v_lock);
363 			if (vp->v_count > 1) {
364 				VN_RELE_LOCKED(vp);
365 				mutex_exit(&vp->v_lock);
366 				rw_exit(&tmp_mi->smi_hash_lk);
367 				/* start over */
368 				rw_enter(&mi->smi_hash_lk, RW_READER);
369 				goto start;
370 			}
371 			mutex_exit(&vp->v_lock);
372 			sn_rmhash_locked(np);
373 			rw_exit(&tmp_mi->smi_hash_lk);
374 		}
375 
376 		sn_inactive(np);
377 
378 		mutex_enter(&vp->v_lock);
379 		if (vp->v_count > 1) {
380 			VN_RELE_LOCKED(vp);
381 			mutex_exit(&vp->v_lock);
382 			rw_enter(&mi->smi_hash_lk, RW_READER);
383 			goto start;
384 		}
385 		mutex_exit(&vp->v_lock);
386 		vn_invalid(vp);
387 		/*
388 		 * destroy old locks before bzero'ing and
389 		 * recreating the locks below.
390 		 */
391 		smbfs_rw_destroy(&np->r_rwlock);
392 		smbfs_rw_destroy(&np->r_lkserlock);
393 		mutex_destroy(&np->r_statelock);
394 		cv_destroy(&np->r_cv);
395 		/*
396 		 * Make sure that if smbnode is recycled then
397 		 * VFS count is decremented properly before
398 		 * reuse.
399 		 */
400 		VFS_RELE(vp->v_vfsp);
401 		vn_reinit(vp);
402 	} else {
403 		/*
404 		 * allocate and initialize a new smbnode
405 		 */
406 		vnode_t *new_vp;
407 
408 		mutex_exit(&smbfreelist_lock);
409 
410 		np = kmem_cache_alloc(smbnode_cache, KM_SLEEP);
411 		new_vp = vn_alloc(KM_SLEEP);
412 
413 		atomic_inc_ulong((ulong_t *)&smbnodenew);
414 		vp = new_vp;
415 	}
416 
417 	/*
418 	 * Allocate and copy the rpath we'll need below.
419 	 */
420 	new_rpath = kmem_alloc(rplen + 1, KM_SLEEP);
421 	bcopy(rpath, new_rpath, rplen);
422 	new_rpath[rplen] = '\0';
423 
424 	/* Initialize smbnode_t */
425 	bzero(np, sizeof (*np));
426 
427 	smbfs_rw_init(&np->r_rwlock, NULL, RW_DEFAULT, NULL);
428 	smbfs_rw_init(&np->r_lkserlock, NULL, RW_DEFAULT, NULL);
429 	mutex_init(&np->r_statelock, NULL, MUTEX_DEFAULT, NULL);
430 	cv_init(&np->r_cv, NULL, CV_DEFAULT, NULL);
431 	/* cv_init(&np->r_commit.c_cv, NULL, CV_DEFAULT, NULL); */
432 
433 	np->r_vnode = vp;
434 	np->n_mount = mi;
435 
436 	np->n_fid = NULL;
437 	np->n_uid = mi->smi_uid;
438 	np->n_gid = mi->smi_gid;
439 	/* Leave attributes "stale." */
440 
441 	/*
442 	 * Here NFS has avl_create(&np->r_dir, ...)
443 	 * for the readdir cache (not used here).
444 	 */
445 
446 	/* Now fill in the vnode. */
447 	vn_setops(vp, smbfs_vnodeops);
448 	vp->v_data = (caddr_t)np;
449 	VFS_HOLD(vfsp);
450 	vp->v_vfsp = vfsp;
451 	vp->v_type = VNON;
452 
453 	/*
454 	 * We entered with mi->smi_hash_lk held (reader).
455 	 * Retake it now, (as the writer).
456 	 * Will return with it held.
457 	 */
458 	rw_enter(&mi->smi_hash_lk, RW_WRITER);
459 
460 	/*
461 	 * There is a race condition where someone else
462 	 * may alloc the smbnode while no locks are held,
463 	 * so check again and recover if found.
464 	 */
465 	tnp = sn_hashfind(mi, rpath, rplen, &where);
466 	if (tnp != NULL) {
467 		/*
468 		 * Lost the race.  Put the node we were building
469 		 * on the free list and return the one we found.
470 		 */
471 		rw_exit(&mi->smi_hash_lk);
472 		kmem_free(new_rpath, rplen + 1);
473 		smbfs_addfree(np);
474 		rw_enter(&mi->smi_hash_lk, RW_READER);
475 		*newnode = 0;
476 		return (tnp);
477 	}
478 
479 	/*
480 	 * Hash search identifies nodes by the remote path
481 	 * (n_rpath) so fill that in now, before linking
482 	 * this node into the node cache (AVL tree).
483 	 */
484 	np->n_rpath = new_rpath;
485 	np->n_rplen = rplen;
486 	np->n_ino = smbfs_gethash(new_rpath, rplen);
487 
488 	sn_addhash_locked(np, where);
489 	*newnode = 1;
490 	return (np);
491 }
492 
493 /*
494  * smbfs_addfree
495  * Put an smbnode on the free list, or destroy it immediately
496  * if it offers no value were it to be reclaimed later.  Also
497  * destroy immediately when we have too many smbnodes, etc.
498  *
499  * Normally called by smbfs_inactive, but also
500  * called in here during cleanup operations.
501  *
502  * From NFS: nfs_subr.c:rp_addfree
503  */
504 void
505 smbfs_addfree(smbnode_t *np)
506 {
507 	vnode_t *vp;
508 	struct vfs *vfsp;
509 	smbmntinfo_t *mi;
510 
511 	ASSERT(np->r_freef == NULL && np->r_freeb == NULL);
512 
513 	vp = SMBTOV(np);
514 	ASSERT(vp->v_count >= 1);
515 
516 	vfsp = vp->v_vfsp;
517 	mi = VFTOSMI(vfsp);
518 
519 	/*
520 	 * If there are no more references to this smbnode and:
521 	 * we have too many smbnodes allocated, or if the node
522 	 * is no longer accessible via the AVL tree (!RHASHED),
523 	 * or an i/o error occurred while writing to the file,
524 	 * or it's part of an unmounted FS, then try to destroy
525 	 * it instead of putting it on the smbnode freelist.
526 	 */
527 	if (np->r_count == 0 && (
528 	    (np->r_flags & RHASHED) == 0 ||
529 	    (np->r_error != 0) ||
530 	    (vfsp->vfs_flag & VFS_UNMOUNTED) ||
531 	    (smbnodenew > nsmbnode))) {
532 
533 		/* Try to destroy this node. */
534 
535 		if (np->r_flags & RHASHED) {
536 			rw_enter(&mi->smi_hash_lk, RW_WRITER);
537 			mutex_enter(&vp->v_lock);
538 			if (vp->v_count > 1) {
539 				VN_RELE_LOCKED(vp);
540 				mutex_exit(&vp->v_lock);
541 				rw_exit(&mi->smi_hash_lk);
542 				return;
543 				/*
544 				 * Will get another call later,
545 				 * via smbfs_inactive.
546 				 */
547 			}
548 			mutex_exit(&vp->v_lock);
549 			sn_rmhash_locked(np);
550 			rw_exit(&mi->smi_hash_lk);
551 		}
552 
553 		sn_inactive(np);
554 
555 		/*
556 		 * Recheck the vnode reference count.  We need to
557 		 * make sure that another reference has not been
558 		 * acquired while we were not holding v_lock.  The
559 		 * smbnode is not in the smbnode "hash" AVL tree, so
560 		 * the only way for a reference to have been acquired
561 		 * is for a VOP_PUTPAGE because the smbnode was marked
562 		 * with RDIRTY or for a modified page.  This vnode
563 		 * reference may have been acquired before our call
564 		 * to sn_inactive.  The i/o may have been completed,
565 		 * thus allowing sn_inactive to complete, but the
566 		 * reference to the vnode may not have been released
567 		 * yet.  In any case, the smbnode can not be destroyed
568 		 * until the other references to this vnode have been
569 		 * released.  The other references will take care of
570 		 * either destroying the smbnode or placing it on the
571 		 * smbnode freelist.  If there are no other references,
572 		 * then the smbnode may be safely destroyed.
573 		 */
574 		mutex_enter(&vp->v_lock);
575 		if (vp->v_count > 1) {
576 			VN_RELE_LOCKED(vp);
577 			mutex_exit(&vp->v_lock);
578 			return;
579 		}
580 		mutex_exit(&vp->v_lock);
581 
582 		sn_destroy_node(np);
583 		return;
584 	}
585 
586 	/*
587 	 * Lock the AVL tree and then recheck the reference count
588 	 * to ensure that no other threads have acquired a reference
589 	 * to indicate that the smbnode should not be placed on the
590 	 * freelist.  If another reference has been acquired, then
591 	 * just release this one and let the other thread complete
592 	 * the processing of adding this smbnode to the freelist.
593 	 */
594 	rw_enter(&mi->smi_hash_lk, RW_WRITER);
595 
596 	mutex_enter(&vp->v_lock);
597 	if (vp->v_count > 1) {
598 		VN_RELE_LOCKED(vp);
599 		mutex_exit(&vp->v_lock);
600 		rw_exit(&mi->smi_hash_lk);
601 		return;
602 	}
603 	mutex_exit(&vp->v_lock);
604 
605 	/*
606 	 * Put this node on the free list.
607 	 */
608 	mutex_enter(&smbfreelist_lock);
609 	if (smbfreelist == NULL) {
610 		np->r_freef = np;
611 		np->r_freeb = np;
612 		smbfreelist = np;
613 	} else {
614 		np->r_freef = smbfreelist;
615 		np->r_freeb = smbfreelist->r_freeb;
616 		smbfreelist->r_freeb->r_freef = np;
617 		smbfreelist->r_freeb = np;
618 	}
619 	mutex_exit(&smbfreelist_lock);
620 
621 	rw_exit(&mi->smi_hash_lk);
622 }
623 
624 /*
625  * Remove an smbnode from the free list.
626  *
627  * The caller must be holding smbfreelist_lock and the smbnode
628  * must be on the freelist.
629  *
630  * From NFS: nfs_subr.c:rp_rmfree
631  */
632 static void
633 sn_rmfree(smbnode_t *np)
634 {
635 
636 	ASSERT(MUTEX_HELD(&smbfreelist_lock));
637 	ASSERT(np->r_freef != NULL && np->r_freeb != NULL);
638 
639 	if (np == smbfreelist) {
640 		smbfreelist = np->r_freef;
641 		if (np == smbfreelist)
642 			smbfreelist = NULL;
643 	}
644 
645 	np->r_freeb->r_freef = np->r_freef;
646 	np->r_freef->r_freeb = np->r_freeb;
647 
648 	np->r_freef = np->r_freeb = NULL;
649 }
650 
651 /*
652  * Put an smbnode in the "hash" AVL tree.
653  *
654  * The caller must be hold the rwlock as writer.
655  *
656  * From NFS: nfs_subr.c:rp_addhash
657  */
658 static void
659 sn_addhash_locked(smbnode_t *np, avl_index_t where)
660 {
661 	smbmntinfo_t *mi = np->n_mount;
662 
663 	ASSERT(RW_WRITE_HELD(&mi->smi_hash_lk));
664 
665 	mutex_enter(&np->r_statelock);
666 	if ((np->r_flags & RHASHED) == 0) {
667 		avl_insert(&mi->smi_hash_avl, np, where);
668 		np->r_flags |= RHASHED;
669 	}
670 	mutex_exit(&np->r_statelock);
671 }
672 
673 /*
674  * Remove an smbnode from the "hash" AVL tree.
675  *
676  * The caller must hold the rwlock as writer.
677  *
678  * From NFS: nfs_subr.c:rp_rmhash_locked
679  */
680 static void
681 sn_rmhash_locked(smbnode_t *np)
682 {
683 	smbmntinfo_t *mi = np->n_mount;
684 
685 	ASSERT(RW_WRITE_HELD(&mi->smi_hash_lk));
686 
687 	mutex_enter(&np->r_statelock);
688 	if ((np->r_flags & RHASHED) != 0) {
689 		np->r_flags &= ~RHASHED;
690 		avl_remove(&mi->smi_hash_avl, np);
691 	}
692 	mutex_exit(&np->r_statelock);
693 }
694 
695 /*
696  * Remove an smbnode from the "hash" AVL tree.
697  *
698  * The caller must not be holding the rwlock.
699  */
700 void
701 smbfs_rmhash(smbnode_t *np)
702 {
703 	smbmntinfo_t *mi = np->n_mount;
704 
705 	rw_enter(&mi->smi_hash_lk, RW_WRITER);
706 	sn_rmhash_locked(np);
707 	rw_exit(&mi->smi_hash_lk);
708 }
709 
710 /*
711  * Lookup an smbnode by remote pathname
712  *
713  * The caller must be holding the AVL rwlock, either shared or exclusive.
714  *
715  * From NFS: nfs_subr.c:rfind
716  */
717 static smbnode_t *
718 sn_hashfind(
719 	smbmntinfo_t *mi,
720 	const char *rpath,
721 	int rplen,
722 	avl_index_t *pwhere) /* optional */
723 {
724 	smbfs_node_hdr_t nhdr;
725 	smbnode_t *np;
726 	vnode_t *vp;
727 
728 	ASSERT(RW_LOCK_HELD(&mi->smi_hash_lk));
729 
730 	bzero(&nhdr, sizeof (nhdr));
731 	nhdr.hdr_n_rpath = (char *)rpath;
732 	nhdr.hdr_n_rplen = rplen;
733 
734 	/* See smbfs_node_cmp below. */
735 	np = avl_find(&mi->smi_hash_avl, &nhdr, pwhere);
736 
737 	if (np == NULL)
738 		return (NULL);
739 
740 	/*
741 	 * Found it in the "hash" AVL tree.
742 	 * Remove from free list, if necessary.
743 	 */
744 	vp = SMBTOV(np);
745 	if (np->r_freef != NULL) {
746 		mutex_enter(&smbfreelist_lock);
747 		/*
748 		 * If the smbnode is on the freelist,
749 		 * then remove it and use that reference
750 		 * as the new reference.  Otherwise,
751 		 * need to increment the reference count.
752 		 */
753 		if (np->r_freef != NULL) {
754 			sn_rmfree(np);
755 			mutex_exit(&smbfreelist_lock);
756 		} else {
757 			mutex_exit(&smbfreelist_lock);
758 			VN_HOLD(vp);
759 		}
760 	} else
761 		VN_HOLD(vp);
762 
763 	return (np);
764 }
765 
766 static int
767 smbfs_node_cmp(const void *va, const void *vb)
768 {
769 	const smbfs_node_hdr_t *a = va;
770 	const smbfs_node_hdr_t *b = vb;
771 	int clen, diff;
772 
773 	/*
774 	 * Same semantics as strcmp, but does not
775 	 * assume the strings are null terminated.
776 	 */
777 	clen = (a->hdr_n_rplen < b->hdr_n_rplen) ?
778 	    a->hdr_n_rplen : b->hdr_n_rplen;
779 	diff = strncmp(a->hdr_n_rpath, b->hdr_n_rpath, clen);
780 	if (diff < 0)
781 		return (-1);
782 	if (diff > 0)
783 		return (1);
784 	/* they match through clen */
785 	if (b->hdr_n_rplen > clen)
786 		return (-1);
787 	if (a->hdr_n_rplen > clen)
788 		return (1);
789 	return (0);
790 }
791 
792 /*
793  * Setup the "hash" AVL tree used for our node cache.
794  * See: smbfs_mount, smbfs_destroy_table.
795  */
796 void
797 smbfs_init_hash_avl(avl_tree_t *avl)
798 {
799 	avl_create(avl, smbfs_node_cmp, sizeof (smbnode_t),
800 	    offsetof(smbnode_t, r_avl_node));
801 }
802 
803 /*
804  * Invalidate the cached attributes for all nodes "under" the
805  * passed-in node.  Note: the passed-in node is NOT affected by
806  * this call.  This is used both for files under some directory
807  * after the directory is deleted or renamed, and for extended
808  * attribute files (named streams) under a plain file after that
809  * file is renamed or deleted.
810  *
811  * Do this by walking the AVL tree starting at the passed in node,
812  * and continuing while the visited nodes have a path prefix matching
813  * the entire path of the passed-in node, and a separator just after
814  * that matching path prefix.  Watch out for cases where the AVL tree
815  * order may not exactly match the order of an FS walk, i.e.
816  * consider this sequence:
817  *	"foo"		(directory)
818  *	"foo bar"	(name containing a space)
819  *	"foo/bar"
820  * The walk needs to skip "foo bar" and keep going until it finds
821  * something that doesn't match the "foo" name prefix.
822  */
823 void
824 smbfs_attrcache_prune(smbnode_t *top_np)
825 {
826 	smbmntinfo_t *mi;
827 	smbnode_t *np;
828 	char *rpath;
829 	int rplen;
830 
831 	mi = top_np->n_mount;
832 	rw_enter(&mi->smi_hash_lk, RW_READER);
833 
834 	np = top_np;
835 	rpath = top_np->n_rpath;
836 	rplen = top_np->n_rplen;
837 	for (;;) {
838 		np = avl_walk(&mi->smi_hash_avl, np, AVL_AFTER);
839 		if (np == NULL)
840 			break;
841 		if (np->n_rplen < rplen)
842 			break;
843 		if (0 != strncmp(np->n_rpath, rpath, rplen))
844 			break;
845 		if (np->n_rplen > rplen && (
846 		    np->n_rpath[rplen] == ':' ||
847 		    np->n_rpath[rplen] == '\\'))
848 			smbfs_attrcache_remove(np);
849 	}
850 
851 	rw_exit(&mi->smi_hash_lk);
852 }
853 
854 #ifdef SMB_VNODE_DEBUG
855 int smbfs_check_table_debug = 1;
856 #else /* SMB_VNODE_DEBUG */
857 int smbfs_check_table_debug = 0;
858 #endif /* SMB_VNODE_DEBUG */
859 
860 
861 /*
862  * Return 1 if there is a active vnode belonging to this vfs in the
863  * smbnode cache.
864  *
865  * Several of these checks are done without holding the usual
866  * locks.  This is safe because destroy_smbtable(), smbfs_addfree(),
867  * etc. will redo the necessary checks before actually destroying
868  * any smbnodes.
869  *
870  * From NFS: nfs_subr.c:check_rtable
871  *
872  * Debugging changes here relative to NFS.
873  * Relatively harmless, so left 'em in.
874  */
875 int
876 smbfs_check_table(struct vfs *vfsp, smbnode_t *rtnp)
877 {
878 	smbmntinfo_t *mi;
879 	smbnode_t *np;
880 	vnode_t *vp;
881 	int busycnt = 0;
882 
883 	mi = VFTOSMI(vfsp);
884 	rw_enter(&mi->smi_hash_lk, RW_READER);
885 	for (np = avl_first(&mi->smi_hash_avl); np != NULL;
886 	    np = avl_walk(&mi->smi_hash_avl, np, AVL_AFTER)) {
887 
888 		if (np == rtnp)
889 			continue; /* skip the root */
890 		vp = SMBTOV(np);
891 
892 		/* Now the 'busy' checks: */
893 		/* Not on the free list? */
894 		if (np->r_freef == NULL) {
895 			SMBVDEBUG("!r_freef: node=0x%p, rpath=%s\n",
896 			    (void *)np, np->n_rpath);
897 			busycnt++;
898 		}
899 
900 		/* Has dirty pages? */
901 		if (vn_has_cached_data(vp) &&
902 		    (np->r_flags & RDIRTY)) {
903 			SMBVDEBUG("is dirty: node=0x%p, rpath=%s\n",
904 			    (void *)np, np->n_rpath);
905 			busycnt++;
906 		}
907 
908 		/* Other refs? (not reflected in v_count) */
909 		if (np->r_count > 0) {
910 			SMBVDEBUG("+r_count: node=0x%p, rpath=%s\n",
911 			    (void *)np, np->n_rpath);
912 			busycnt++;
913 		}
914 
915 		if (busycnt && !smbfs_check_table_debug)
916 			break;
917 
918 	}
919 	rw_exit(&mi->smi_hash_lk);
920 
921 	return (busycnt);
922 }
923 
924 /*
925  * Destroy inactive vnodes from the AVL tree which belong to this
926  * vfs.  It is essential that we destroy all inactive vnodes during a
927  * forced unmount as well as during a normal unmount.
928  *
929  * Based on NFS: nfs_subr.c:destroy_rtable
930  *
931  * In here, we're normally destrying all or most of the AVL tree,
932  * so the natural choice is to use avl_destroy_nodes.  However,
933  * there may be a few busy nodes that should remain in the AVL
934  * tree when we're done.  The solution: use a temporary tree to
935  * hold the busy nodes until we're done destroying the old tree,
936  * then copy the temporary tree over the (now emtpy) real tree.
937  */
938 void
939 smbfs_destroy_table(struct vfs *vfsp)
940 {
941 	avl_tree_t tmp_avl;
942 	smbmntinfo_t *mi;
943 	smbnode_t *np;
944 	smbnode_t *rlist;
945 	void *v;
946 
947 	mi = VFTOSMI(vfsp);
948 	rlist = NULL;
949 	smbfs_init_hash_avl(&tmp_avl);
950 
951 	rw_enter(&mi->smi_hash_lk, RW_WRITER);
952 	v = NULL;
953 	while ((np = avl_destroy_nodes(&mi->smi_hash_avl, &v)) != NULL) {
954 
955 		mutex_enter(&smbfreelist_lock);
956 		if (np->r_freef == NULL) {
957 			/*
958 			 * Busy node (not on the free list).
959 			 * Will keep in the final AVL tree.
960 			 */
961 			mutex_exit(&smbfreelist_lock);
962 			avl_add(&tmp_avl, np);
963 		} else {
964 			/*
965 			 * It's on the free list.  Remove and
966 			 * arrange for it to be destroyed.
967 			 */
968 			sn_rmfree(np);
969 			mutex_exit(&smbfreelist_lock);
970 
971 			/*
972 			 * Last part of sn_rmhash_locked().
973 			 * NB: avl_destroy_nodes has already
974 			 * removed this from the "hash" AVL.
975 			 */
976 			mutex_enter(&np->r_statelock);
977 			np->r_flags &= ~RHASHED;
978 			mutex_exit(&np->r_statelock);
979 
980 			/*
981 			 * Add to the list of nodes to destroy.
982 			 * Borrowing avl_child[0] for this list.
983 			 */
984 			np->r_avl_node.avl_child[0] =
985 			    (struct avl_node *)rlist;
986 			rlist = np;
987 		}
988 	}
989 	avl_destroy(&mi->smi_hash_avl);
990 
991 	/*
992 	 * Replace the (now destroyed) "hash" AVL with the
993 	 * temporary AVL, which restores the busy nodes.
994 	 */
995 	mi->smi_hash_avl = tmp_avl;
996 	rw_exit(&mi->smi_hash_lk);
997 
998 	/*
999 	 * Now destroy the nodes on our temporary list (rlist).
1000 	 * This call to smbfs_addfree will end up destroying the
1001 	 * smbnode, but in a safe way with the appropriate set
1002 	 * of checks done.
1003 	 */
1004 	while ((np = rlist) != NULL) {
1005 		rlist = (smbnode_t *)np->r_avl_node.avl_child[0];
1006 		smbfs_addfree(np);
1007 	}
1008 }
1009 
1010 /*
1011  * This routine destroys all the resources associated with the smbnode
1012  * and then the smbnode itself.  Note: sn_inactive has been called.
1013  *
1014  * From NFS: nfs_subr.c:destroy_rnode
1015  */
1016 static void
1017 sn_destroy_node(smbnode_t *np)
1018 {
1019 	vnode_t *vp;
1020 	vfs_t *vfsp;
1021 
1022 	vp = SMBTOV(np);
1023 	vfsp = vp->v_vfsp;
1024 
1025 	ASSERT(vp->v_count == 1);
1026 	ASSERT(np->r_count == 0);
1027 	ASSERT(np->r_mapcnt == 0);
1028 	ASSERT(np->r_secattr.vsa_aclentp == NULL);
1029 	ASSERT(np->r_cred == NULL);
1030 	ASSERT(np->n_rpath == NULL);
1031 	ASSERT(!(np->r_flags & RHASHED));
1032 	ASSERT(np->r_freef == NULL && np->r_freeb == NULL);
1033 	atomic_dec_ulong((ulong_t *)&smbnodenew);
1034 	vn_invalid(vp);
1035 	vn_free(vp);
1036 	kmem_cache_free(smbnode_cache, np);
1037 	VFS_RELE(vfsp);
1038 }
1039 
1040 /*
1041  * From NFS rflush()
1042  * Flush all vnodes in this (or every) vfs.
1043  * Used by smbfs_sync and by smbfs_unmount.
1044  */
1045 /*ARGSUSED*/
1046 void
1047 smbfs_rflush(struct vfs *vfsp, cred_t *cr)
1048 {
1049 	smbmntinfo_t *mi;
1050 	smbnode_t *np;
1051 	vnode_t *vp, **vplist;
1052 	long num, cnt;
1053 
1054 	mi = VFTOSMI(vfsp);
1055 
1056 	/*
1057 	 * Check to see whether there is anything to do.
1058 	 */
1059 	num = avl_numnodes(&mi->smi_hash_avl);
1060 	if (num == 0)
1061 		return;
1062 
1063 	/*
1064 	 * Allocate a slot for all currently active rnodes on the
1065 	 * supposition that they all may need flushing.
1066 	 */
1067 	vplist = kmem_alloc(num * sizeof (*vplist), KM_SLEEP);
1068 	cnt = 0;
1069 
1070 	/*
1071 	 * Walk the AVL tree looking for rnodes with page
1072 	 * lists associated with them.  Make a list of these
1073 	 * files.
1074 	 */
1075 	rw_enter(&mi->smi_hash_lk, RW_READER);
1076 	for (np = avl_first(&mi->smi_hash_avl); np != NULL;
1077 	    np = avl_walk(&mi->smi_hash_avl, np, AVL_AFTER)) {
1078 		vp = SMBTOV(np);
1079 		/*
1080 		 * Don't bother sync'ing a vp if it
1081 		 * is part of virtual swap device or
1082 		 * if VFS is read-only
1083 		 */
1084 		if (IS_SWAPVP(vp) || vn_is_readonly(vp))
1085 			continue;
1086 		/*
1087 		 * If the vnode has pages and is marked as either
1088 		 * dirty or mmap'd, hold and add this vnode to the
1089 		 * list of vnodes to flush.
1090 		 */
1091 		if (vn_has_cached_data(vp) &&
1092 		    ((np->r_flags & RDIRTY) || np->r_mapcnt > 0)) {
1093 			VN_HOLD(vp);
1094 			vplist[cnt++] = vp;
1095 			if (cnt == num)
1096 				break;
1097 		}
1098 	}
1099 	rw_exit(&mi->smi_hash_lk);
1100 
1101 	/*
1102 	 * Flush and release all of the files on the list.
1103 	 */
1104 	while (cnt-- > 0) {
1105 		vp = vplist[cnt];
1106 		(void) VOP_PUTPAGE(vp, (u_offset_t)0, 0, B_ASYNC, cr, NULL);
1107 		VN_RELE(vp);
1108 	}
1109 
1110 	kmem_free(vplist, num * sizeof (vnode_t *));
1111 }
1112 
1113 /* Here NFS has access cache stuff (nfs_subr.c) not used here */
1114 
1115 /*
1116  * Set or Clear direct I/O flag
1117  * VOP_RWLOCK() is held for write access to prevent a race condition
1118  * which would occur if a process is in the middle of a write when
1119  * directio flag gets set. It is possible that all pages may not get flushed.
1120  * From nfs_common.c
1121  */
1122 
1123 /* ARGSUSED */
1124 int
1125 smbfs_directio(vnode_t *vp, int cmd, cred_t *cr)
1126 {
1127 	int	error = 0;
1128 	smbnode_t	*np;
1129 
1130 	np = VTOSMB(vp);
1131 
1132 	if (cmd == DIRECTIO_ON) {
1133 
1134 		if (np->r_flags & RDIRECTIO)
1135 			return (0);
1136 
1137 		/*
1138 		 * Flush the page cache.
1139 		 */
1140 
1141 		(void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL);
1142 
1143 		if (np->r_flags & RDIRECTIO) {
1144 			VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
1145 			return (0);
1146 		}
1147 
1148 		/* Here NFS also checks ->r_awcount */
1149 		if (vn_has_cached_data(vp) &&
1150 		    (np->r_flags & RDIRTY) != 0) {
1151 			error = VOP_PUTPAGE(vp, (offset_t)0, (uint_t)0,
1152 			    B_INVAL, cr, NULL);
1153 			if (error) {
1154 				if (error == ENOSPC || error == EDQUOT) {
1155 					mutex_enter(&np->r_statelock);
1156 					if (!np->r_error)
1157 						np->r_error = error;
1158 					mutex_exit(&np->r_statelock);
1159 				}
1160 				VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
1161 				return (error);
1162 			}
1163 		}
1164 
1165 		mutex_enter(&np->r_statelock);
1166 		np->r_flags |= RDIRECTIO;
1167 		mutex_exit(&np->r_statelock);
1168 		VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
1169 		return (0);
1170 	}
1171 
1172 	if (cmd == DIRECTIO_OFF) {
1173 		mutex_enter(&np->r_statelock);
1174 		np->r_flags &= ~RDIRECTIO;	/* disable direct mode */
1175 		mutex_exit(&np->r_statelock);
1176 		return (0);
1177 	}
1178 
1179 	return (EINVAL);
1180 }
1181 
1182 static kmutex_t smbfs_newnum_lock;
1183 static uint32_t smbfs_newnum_val = 0;
1184 
1185 /*
1186  * Return a number 0..0xffffffff that's different from the last
1187  * 0xffffffff numbers this returned.  Used for unlinked files.
1188  * From NFS nfs_subr.c newnum
1189  */
1190 uint32_t
1191 smbfs_newnum(void)
1192 {
1193 	uint32_t id;
1194 
1195 	mutex_enter(&smbfs_newnum_lock);
1196 	if (smbfs_newnum_val == 0)
1197 		smbfs_newnum_val = (uint32_t)gethrestime_sec();
1198 	id = smbfs_newnum_val++;
1199 	mutex_exit(&smbfs_newnum_lock);
1200 	return (id);
1201 }
1202 
1203 /*
1204  * Fill in a temporary name at buf
1205  */
1206 int
1207 smbfs_newname(char *buf, size_t buflen)
1208 {
1209 	uint_t id;
1210 	int n;
1211 
1212 	id = smbfs_newnum();
1213 	n = snprintf(buf, buflen, "~$smbfs%08X", id);
1214 	return (n);
1215 }
1216 
1217 
1218 /*
1219  * initialize resources that are used by smbfs_subr.c
1220  * this is called from the _init() routine (by the way of smbfs_clntinit())
1221  *
1222  * From NFS: nfs_subr.c:nfs_subrinit
1223  */
1224 int
1225 smbfs_subrinit(void)
1226 {
1227 	ulong_t nsmbnode_max;
1228 
1229 	/*
1230 	 * Allocate and initialize the smbnode cache
1231 	 */
1232 	if (nsmbnode <= 0)
1233 		nsmbnode = ncsize; /* dnlc.h */
1234 	nsmbnode_max = (ulong_t)((kmem_maxavail() >> 2) /
1235 	    sizeof (struct smbnode));
1236 	if (nsmbnode > nsmbnode_max || (nsmbnode == 0 && ncsize == 0)) {
1237 		cmn_err(CE_NOTE,
1238 		    "setting nsmbnode to max value of %ld", nsmbnode_max);
1239 		nsmbnode = nsmbnode_max;
1240 	}
1241 
1242 	smbnode_cache = kmem_cache_create("smbnode_cache", sizeof (smbnode_t),
1243 	    0, NULL, NULL, smbfs_kmem_reclaim, NULL, NULL, 0);
1244 
1245 	/*
1246 	 * Initialize the various mutexes and reader/writer locks
1247 	 */
1248 	mutex_init(&smbfreelist_lock, NULL, MUTEX_DEFAULT, NULL);
1249 	mutex_init(&smbfs_minor_lock, NULL, MUTEX_DEFAULT, NULL);
1250 
1251 	/*
1252 	 * Assign unique major number for all smbfs mounts
1253 	 */
1254 	if ((smbfs_major = getudev()) == -1) {
1255 		cmn_err(CE_WARN,
1256 		    "smbfs: init: can't get unique device number");
1257 		smbfs_major = 0;
1258 	}
1259 	smbfs_minor = 0;
1260 
1261 	return (0);
1262 }
1263 
1264 /*
1265  * free smbfs hash table, etc.
1266  * From NFS: nfs_subr.c:nfs_subrfini
1267  */
1268 void
1269 smbfs_subrfini(void)
1270 {
1271 
1272 	/*
1273 	 * Destroy the smbnode cache
1274 	 */
1275 	kmem_cache_destroy(smbnode_cache);
1276 
1277 	/*
1278 	 * Destroy the various mutexes and reader/writer locks
1279 	 */
1280 	mutex_destroy(&smbfreelist_lock);
1281 	mutex_destroy(&smbfs_minor_lock);
1282 }
1283 
1284 /* rddir_cache ? */
1285 
1286 /*
1287  * Support functions for smbfs_kmem_reclaim
1288  */
1289 
1290 static void
1291 smbfs_node_reclaim(void)
1292 {
1293 	smbmntinfo_t *mi;
1294 	smbnode_t *np;
1295 	vnode_t *vp;
1296 
1297 	mutex_enter(&smbfreelist_lock);
1298 	while ((np = smbfreelist) != NULL) {
1299 		sn_rmfree(np);
1300 		mutex_exit(&smbfreelist_lock);
1301 		if (np->r_flags & RHASHED) {
1302 			vp = SMBTOV(np);
1303 			mi = np->n_mount;
1304 			rw_enter(&mi->smi_hash_lk, RW_WRITER);
1305 			mutex_enter(&vp->v_lock);
1306 			if (vp->v_count > 1) {
1307 				VN_RELE_LOCKED(vp);
1308 				mutex_exit(&vp->v_lock);
1309 				rw_exit(&mi->smi_hash_lk);
1310 				mutex_enter(&smbfreelist_lock);
1311 				continue;
1312 			}
1313 			mutex_exit(&vp->v_lock);
1314 			sn_rmhash_locked(np);
1315 			rw_exit(&mi->smi_hash_lk);
1316 		}
1317 		/*
1318 		 * This call to smbfs_addfree will end up destroying the
1319 		 * smbnode, but in a safe way with the appropriate set
1320 		 * of checks done.
1321 		 */
1322 		smbfs_addfree(np);
1323 		mutex_enter(&smbfreelist_lock);
1324 	}
1325 	mutex_exit(&smbfreelist_lock);
1326 }
1327 
1328 /*
1329  * Called by kmem_cache_alloc ask us if we could
1330  * "Please give back some memory!"
1331  *
1332  * Todo: dump nodes from the free list?
1333  */
1334 /*ARGSUSED*/
1335 void
1336 smbfs_kmem_reclaim(void *cdrarg)
1337 {
1338 	smbfs_node_reclaim();
1339 }
1340 
1341 /*
1342  * Here NFS has failover stuff and
1343  * nfs_rw_xxx - see smbfs_rwlock.c
1344  */
1345