xref: /illumos-gate/usr/src/uts/common/fs/smbclnt/smbfs/smbfs_subr2.c (revision 7f3d7c9289dee6488b3cd2848a68c0b8580d750c)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  *
25  *	Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
26  *	All rights reserved.
27  */
28 /*
29  * Copyright (c) 2017 by Delphix. All rights reserved.
30  * Copyright 2018 Nexenta Systems, Inc.  All rights reserved.
31  * Copyright 2025 RackTop Systems, Inc.
32  */
33 
34 /*
35  * Node hash implementation initially borrowed from NFS (nfs_subr.c)
36  * but then heavily modified. It's no longer an array of hash lists,
37  * but an AVL tree per mount point.  More on this below.
38  */
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/time.h>
43 #include <sys/vnode.h>
44 #include <sys/atomic.h>
45 #include <sys/bitmap.h>
46 #include <sys/buf.h>
47 #include <sys/dnlc.h>
48 #include <sys/kmem.h>
49 #include <sys/sunddi.h>
50 #include <sys/sysmacros.h>
51 #include <sys/fcntl.h>
52 
53 #include <netsmb/smb_osdep.h>
54 
55 #include <netsmb/smb.h>
56 #include <netsmb/smb_conn.h>
57 #include <netsmb/smb_subr.h>
58 #include <netsmb/smb_rq.h>
59 
60 #include <smbfs/smbfs.h>
61 #include <smbfs/smbfs_node.h>
62 #include <smbfs/smbfs_subr.h>
63 
64 /*
65  * The AVL trees (now per-mount) allow finding an smbfs node by its
66  * full remote path name.  It also allows easy traversal of all nodes
67  * below (path wise) any given node.  A reader/writer lock for each
68  * (per mount) AVL tree is used to control access and to synchronize
69  * lookups, additions, and deletions from that AVL tree.
70  *
71  * Previously, this code use a global array of hash chains, each with
72  * its own rwlock.  A few struct members, functions, and comments may
73  * still refer to a "hash", and those should all now be considered to
74  * refer to the per-mount AVL tree that replaced the old hash chains.
75  * (i.e. member smi_hash_lk, function sn_hashfind, etc.)
76  *
77  * The smbnode freelist is organized as a doubly linked list with
78  * a head pointer.  Additions and deletions are synchronized via
79  * a single mutex.
80  *
81  * In order to add an smbnode to the free list, it must be linked into
82  * the mount's AVL tree and the exclusive lock for the AVL must be held.
83  * If an smbnode is not linked into the AVL tree, then it is destroyed
84  * because it represents no valuable information that can be reused
85  * about the file.  The exclusive lock for the AVL tree must be held
86  * in order to prevent a lookup in the AVL tree from finding the
87  * smbnode and using it and assuming that the smbnode is not on the
88  * freelist.  The lookup in the AVL tree will have the AVL tree lock
89  * held, either exclusive or shared.
90  *
91  * The vnode reference count for each smbnode is not allowed to drop
92  * below 1.  This prevents external entities, such as the VM
93  * subsystem, from acquiring references to vnodes already on the
94  * freelist and then trying to place them back on the freelist
95  * when their reference is released.  This means that the when an
96  * smbnode is looked up in the AVL tree, then either the smbnode
97  * is removed from the freelist and that reference is tranfered to
98  * the new reference or the vnode reference count must be incremented
99  * accordingly.  The mutex for the freelist must be held in order to
100  * accurately test to see if the smbnode is on the freelist or not.
101  * The AVL tree lock might be held shared and it is possible that
102  * two different threads may race to remove the smbnode from the
103  * freelist.  This race can be resolved by holding the mutex for the
104  * freelist.  Please note that the mutex for the freelist does not
105  * need to held if the smbnode is not on the freelist.  It can not be
106  * placed on the freelist due to the requirement that the thread
107  * putting the smbnode on the freelist must hold the exclusive lock
108  * for the AVL tree and the thread doing the lookup in the AVL tree
109  * is holding either a shared or exclusive lock for the AVL tree.
110  *
111  * The lock ordering is:
112  *
113  *	AVL tree lock -> vnode lock
114  *	AVL tree lock -> freelist lock
115  */
116 
117 static kmutex_t smbfreelist_lock;
118 static smbnode_t *smbfreelist = NULL;
119 static ulong_t	smbnodenew = 0;
120 long	nsmbnode = 0;
121 
122 static struct kmem_cache *smbnode_cache;
123 
124 static const vsecattr_t smbfs_vsa0 = { 0 };
125 
126 /*
127  * Mutex to protect the following variables:
128  *	smbfs_major
129  *	smbfs_minor
130  */
131 kmutex_t smbfs_minor_lock;
132 int smbfs_major;
133 int smbfs_minor;
134 
135 /* See smbfs_node_findcreate() */
136 struct smbfattr smbfs_fattr0;
137 
138 /*
139  * Local functions.
140  * SN for Smb Node
141  */
142 static void sn_rmfree(smbnode_t *);
143 static void sn_inactive(smbnode_t *);
144 static void sn_addhash_locked(smbnode_t *, avl_index_t);
145 static void sn_rmhash_locked(smbnode_t *);
146 static void sn_destroy_node(smbnode_t *);
147 void smbfs_kmem_reclaim(void *cdrarg);
148 
149 static smbnode_t *
150 sn_hashfind(smbmntinfo_t *, const char *, int, avl_index_t *);
151 
152 static smbnode_t *
153 make_smbnode(smbmntinfo_t *, const char *, int, int *);
154 
155 /*
156  * Free the resources associated with an smbnode.
157  * Note: This is different from smbfs_inactive
158  *
159  * From NFS: nfs_subr.c:rinactive
160  */
161 static void
162 sn_inactive(smbnode_t *np)
163 {
164 	vsecattr_t	ovsa;
165 	cred_t		*oldcr;
166 	char		*orpath;
167 	int		orplen;
168 	vnode_t		*vp;
169 
170 	/*
171 	 * smbfs_close should already have cleaned out any FIDs.
172 	 */
173 	ASSERT3P(np->n_fid, ==, NULL);
174 	ASSERT3P(np->n_dirseq, ==, NULL);
175 
176 	/*
177 	 * Here NFS has:
178 	 * Flush and invalidate all pages (done by caller)
179 	 * Free any held credentials and caches...
180 	 * etc.  (See NFS code)
181 	 */
182 	mutex_enter(&np->r_statelock);
183 
184 	ovsa = np->r_secattr;
185 	np->r_secattr = smbfs_vsa0;
186 	np->r_sectime = 0;
187 
188 	oldcr = np->r_cred;
189 	np->r_cred = NULL;
190 
191 	orpath = np->n_rpath;
192 	orplen = np->n_rplen;
193 	np->n_rpath = NULL;
194 	np->n_rplen = 0;
195 
196 	mutex_exit(&np->r_statelock);
197 
198 	vp = SMBTOV(np);
199 	if (vn_has_cached_data(vp)) {
200 		ASSERT3P(vp, ==, NULL);
201 	}
202 
203 	if (ovsa.vsa_aclentp != NULL)
204 		kmem_free(ovsa.vsa_aclentp, ovsa.vsa_aclentsz);
205 
206 	if (oldcr != NULL)
207 		crfree(oldcr);
208 
209 	if (orpath != NULL)
210 		kmem_free(orpath, orplen + 1);
211 }
212 
213 /*
214  * Find and optionally create an smbnode for the passed
215  * mountinfo, directory, separator, and name.  If the
216  * desired smbnode already exists, return a reference.
217  * If the file attributes pointer is non-null, the node
218  * is created if necessary and linked into the AVL tree.
219  *
220  * Callers that need a node created but don't have the
221  * real attributes pass smbfs_fattr0 to force creation.
222  *
223  * Note: make_smbnode() may upgrade the "hash" lock to exclusive.
224  *
225  * Based on NFS: nfs_subr.c:makenfsnode
226  */
227 smbnode_t *
228 smbfs_node_findcreate(
229 	smbmntinfo_t *mi,
230 	const char *dirnm,
231 	int dirlen,
232 	const char *name,
233 	int nmlen,
234 	char sep,
235 	struct smbfattr *fap)
236 {
237 	char tmpbuf[256];
238 	size_t rpalloc;
239 	char *p, *rpath;
240 	int rplen;
241 	smbnode_t *np;
242 	vnode_t *vp;
243 	int newnode;
244 
245 	/*
246 	 * Build the search string, either in tmpbuf or
247 	 * in allocated memory if larger than tmpbuf.
248 	 */
249 	rplen = dirlen;
250 	if (sep != '\0')
251 		rplen++;
252 	rplen += nmlen;
253 	if (rplen < sizeof (tmpbuf)) {
254 		/* use tmpbuf */
255 		rpalloc = 0;
256 		rpath = tmpbuf;
257 	} else {
258 		rpalloc = rplen + 1;
259 		rpath = kmem_alloc(rpalloc, KM_SLEEP);
260 	}
261 	p = rpath;
262 	bcopy(dirnm, p, dirlen);
263 	p += dirlen;
264 	if (sep != '\0')
265 		*p++ = sep;
266 	if (name != NULL) {
267 		bcopy(name, p, nmlen);
268 		p += nmlen;
269 	}
270 	ASSERT(p == rpath + rplen);
271 
272 	/*
273 	 * Find or create a node with this path.
274 	 */
275 	rw_enter(&mi->smi_hash_lk, RW_READER);
276 	if (fap == NULL)
277 		np = sn_hashfind(mi, rpath, rplen, NULL);
278 	else
279 		np = make_smbnode(mi, rpath, rplen, &newnode);
280 	rw_exit(&mi->smi_hash_lk);
281 
282 	if (rpalloc)
283 		kmem_free(rpath, rpalloc);
284 
285 	if (fap == NULL) {
286 		/*
287 		 * Caller is "just looking" (no create)
288 		 * so np may or may not be NULL here.
289 		 * Either way, we're done.
290 		 */
291 		return (np);
292 	}
293 
294 	/*
295 	 * We should have a node, possibly created.
296 	 * Do we have (real) attributes to apply?
297 	 */
298 	ASSERT(np != NULL);
299 	if (fap == &smbfs_fattr0)
300 		return (np);
301 
302 	/*
303 	 * Apply the given attributes to this node,
304 	 * dealing with any cache impact, etc.
305 	 */
306 	vp = SMBTOV(np);
307 	smbfs_attrcache_fa(vp, fap);
308 
309 	/*
310 	 * Note NFS sets vp->v_type here, assuming it
311 	 * can never change for the life of a node.
312 	 * We allow v_type to change, and set it in
313 	 * smbfs_attrcache().  Also: mode, uid, gid
314 	 */
315 	return (np);
316 }
317 
318 /*
319  * Here NFS has: nfs_subr.c:rtablehash
320  * We use smbfs_hash().
321  */
322 
323 /*
324  * Find or create an smbnode.
325  * From NFS: nfs_subr.c:make_rnode
326  */
327 static smbnode_t *
328 make_smbnode(
329 	smbmntinfo_t *mi,
330 	const char *rpath,
331 	int rplen,
332 	int *newnode)
333 {
334 	smbnode_t *np;
335 	smbnode_t *tnp;
336 	vnode_t *vp;
337 	vfs_t *vfsp;
338 	avl_index_t where;
339 	char *new_rpath = NULL;
340 
341 	ASSERT(RW_READ_HELD(&mi->smi_hash_lk));
342 	vfsp = mi->smi_vfsp;
343 
344 start:
345 	np = sn_hashfind(mi, rpath, rplen, NULL);
346 	if (np != NULL) {
347 		*newnode = 0;
348 		return (np);
349 	}
350 
351 	/* Note: will retake this lock below. */
352 	rw_exit(&mi->smi_hash_lk);
353 
354 	/*
355 	 * see if we can find something on the freelist
356 	 */
357 	mutex_enter(&smbfreelist_lock);
358 	if (smbfreelist != NULL && smbnodenew >= nsmbnode) {
359 		np = smbfreelist;
360 		sn_rmfree(np);
361 		mutex_exit(&smbfreelist_lock);
362 
363 		vp = SMBTOV(np);
364 
365 		if (np->r_flags & RHASHED) {
366 			smbmntinfo_t *tmp_mi = np->n_mount;
367 			ASSERT(tmp_mi != NULL);
368 			rw_enter(&tmp_mi->smi_hash_lk, RW_WRITER);
369 			mutex_enter(&vp->v_lock);
370 			if (vp->v_count > 1) {
371 				VN_RELE_LOCKED(vp);
372 				mutex_exit(&vp->v_lock);
373 				rw_exit(&tmp_mi->smi_hash_lk);
374 				/* start over */
375 				rw_enter(&mi->smi_hash_lk, RW_READER);
376 				goto start;
377 			}
378 			mutex_exit(&vp->v_lock);
379 			sn_rmhash_locked(np);
380 			rw_exit(&tmp_mi->smi_hash_lk);
381 		}
382 
383 		sn_inactive(np);
384 
385 		mutex_enter(&vp->v_lock);
386 		if (vp->v_count > 1) {
387 			VN_RELE_LOCKED(vp);
388 			mutex_exit(&vp->v_lock);
389 			rw_enter(&mi->smi_hash_lk, RW_READER);
390 			goto start;
391 		}
392 		mutex_exit(&vp->v_lock);
393 		vn_invalid(vp);
394 		/*
395 		 * destroy old locks before bzero'ing and
396 		 * recreating the locks below.
397 		 */
398 		smbfs_rw_destroy(&np->r_rwlock);
399 		smbfs_rw_destroy(&np->r_lkserlock);
400 		mutex_destroy(&np->r_statelock);
401 		cv_destroy(&np->r_cv);
402 		/*
403 		 * Make sure that if smbnode is recycled then
404 		 * VFS count is decremented properly before
405 		 * reuse.
406 		 */
407 		VFS_RELE(vp->v_vfsp);
408 		vn_reinit(vp);
409 	} else {
410 		/*
411 		 * allocate and initialize a new smbnode
412 		 */
413 		vnode_t *new_vp;
414 
415 		mutex_exit(&smbfreelist_lock);
416 
417 		np = kmem_cache_alloc(smbnode_cache, KM_SLEEP);
418 		new_vp = vn_alloc(KM_SLEEP);
419 
420 		atomic_inc_ulong((ulong_t *)&smbnodenew);
421 		vp = new_vp;
422 	}
423 
424 	/*
425 	 * Allocate and copy the rpath we'll need below.
426 	 */
427 	new_rpath = kmem_alloc(rplen + 1, KM_SLEEP);
428 	bcopy(rpath, new_rpath, rplen);
429 	new_rpath[rplen] = '\0';
430 
431 	/* Initialize smbnode_t */
432 	bzero(np, sizeof (*np));
433 
434 	smbfs_rw_init(&np->r_rwlock, NULL, RW_DEFAULT, NULL);
435 	smbfs_rw_init(&np->r_lkserlock, NULL, RW_DEFAULT, NULL);
436 	mutex_init(&np->r_statelock, NULL, MUTEX_DEFAULT, NULL);
437 	cv_init(&np->r_cv, NULL, CV_DEFAULT, NULL);
438 	/* cv_init(&np->r_commit.c_cv, NULL, CV_DEFAULT, NULL); */
439 
440 	np->r_vnode = vp;
441 	np->n_mount = mi;
442 
443 	np->n_fid = NULL;
444 	np->n_uid = mi->smi_uid;
445 	np->n_gid = mi->smi_gid;
446 	/* Leave attributes "stale." */
447 
448 	/*
449 	 * Here NFS has avl_create(&np->r_dir, ...)
450 	 * for the readdir cache (not used here).
451 	 */
452 
453 	/* Now fill in the vnode. */
454 	vn_setops(vp, smbfs_vnodeops);
455 	vp->v_data = (caddr_t)np;
456 	VFS_HOLD(vfsp);
457 	vp->v_vfsp = vfsp;
458 	vp->v_type = VNON;
459 
460 	/*
461 	 * We entered with mi->smi_hash_lk held (reader).
462 	 * Retake it now, (as the writer).
463 	 * Will return with it held.
464 	 */
465 	rw_enter(&mi->smi_hash_lk, RW_WRITER);
466 
467 	/*
468 	 * There is a race condition where someone else
469 	 * may alloc the smbnode while no locks are held,
470 	 * so check again and recover if found.
471 	 */
472 	tnp = sn_hashfind(mi, rpath, rplen, &where);
473 	if (tnp != NULL) {
474 		/*
475 		 * Lost the race.  Put the node we were building
476 		 * on the free list and return the one we found.
477 		 */
478 		rw_exit(&mi->smi_hash_lk);
479 		kmem_free(new_rpath, rplen + 1);
480 		smbfs_addfree(np);
481 		rw_enter(&mi->smi_hash_lk, RW_READER);
482 		*newnode = 0;
483 		return (tnp);
484 	}
485 
486 	/*
487 	 * Hash search identifies nodes by the remote path
488 	 * (n_rpath) so fill that in now, before linking
489 	 * this node into the node cache (AVL tree).
490 	 */
491 	np->n_rpath = new_rpath;
492 	np->n_rplen = rplen;
493 	np->n_ino = smbfs_gethash(new_rpath, rplen);
494 
495 	sn_addhash_locked(np, where);
496 	*newnode = 1;
497 	return (np);
498 }
499 
500 /*
501  * smbfs_addfree
502  * Put an smbnode on the free list, or destroy it immediately
503  * if it offers no value were it to be reclaimed later.  Also
504  * destroy immediately when we have too many smbnodes, etc.
505  *
506  * Normally called by smbfs_inactive, but also
507  * called in here during cleanup operations.
508  *
509  * From NFS: nfs_subr.c:rp_addfree
510  */
511 void
512 smbfs_addfree(smbnode_t *np)
513 {
514 	vnode_t *vp;
515 	struct vfs *vfsp;
516 	smbmntinfo_t *mi;
517 
518 	ASSERT(np->r_freef == NULL && np->r_freeb == NULL);
519 
520 	vp = SMBTOV(np);
521 	ASSERT(vp->v_count >= 1);
522 
523 	vfsp = vp->v_vfsp;
524 	mi = VFTOSMI(vfsp);
525 
526 	/*
527 	 * If there are no more references to this smbnode and:
528 	 * we have too many smbnodes allocated, or if the node
529 	 * is no longer accessible via the AVL tree (!RHASHED),
530 	 * or an i/o error occurred while writing to the file,
531 	 * or it's part of an unmounted FS, then try to destroy
532 	 * it instead of putting it on the smbnode freelist.
533 	 */
534 	if (np->r_count == 0 && (
535 	    (np->r_flags & RHASHED) == 0 ||
536 	    (np->r_error != 0) ||
537 	    (vfsp->vfs_flag & VFS_UNMOUNTED) ||
538 	    (smbnodenew > nsmbnode))) {
539 
540 		/* Try to destroy this node. */
541 
542 		if (np->r_flags & RHASHED) {
543 			rw_enter(&mi->smi_hash_lk, RW_WRITER);
544 			mutex_enter(&vp->v_lock);
545 			if (vp->v_count > 1) {
546 				VN_RELE_LOCKED(vp);
547 				mutex_exit(&vp->v_lock);
548 				rw_exit(&mi->smi_hash_lk);
549 				return;
550 				/*
551 				 * Will get another call later,
552 				 * via smbfs_inactive.
553 				 */
554 			}
555 			mutex_exit(&vp->v_lock);
556 			sn_rmhash_locked(np);
557 			rw_exit(&mi->smi_hash_lk);
558 		}
559 
560 		sn_inactive(np);
561 
562 		/*
563 		 * Recheck the vnode reference count.  We need to
564 		 * make sure that another reference has not been
565 		 * acquired while we were not holding v_lock.  The
566 		 * smbnode is not in the smbnode "hash" AVL tree, so
567 		 * the only way for a reference to have been acquired
568 		 * is for a VOP_PUTPAGE because the smbnode was marked
569 		 * with RDIRTY or for a modified page.  This vnode
570 		 * reference may have been acquired before our call
571 		 * to sn_inactive.  The i/o may have been completed,
572 		 * thus allowing sn_inactive to complete, but the
573 		 * reference to the vnode may not have been released
574 		 * yet.  In any case, the smbnode can not be destroyed
575 		 * until the other references to this vnode have been
576 		 * released.  The other references will take care of
577 		 * either destroying the smbnode or placing it on the
578 		 * smbnode freelist.  If there are no other references,
579 		 * then the smbnode may be safely destroyed.
580 		 */
581 		mutex_enter(&vp->v_lock);
582 		if (vp->v_count > 1) {
583 			VN_RELE_LOCKED(vp);
584 			mutex_exit(&vp->v_lock);
585 			return;
586 		}
587 		mutex_exit(&vp->v_lock);
588 
589 		sn_destroy_node(np);
590 		return;
591 	}
592 
593 	/*
594 	 * Lock the AVL tree and then recheck the reference count
595 	 * to ensure that no other threads have acquired a reference
596 	 * to indicate that the smbnode should not be placed on the
597 	 * freelist.  If another reference has been acquired, then
598 	 * just release this one and let the other thread complete
599 	 * the processing of adding this smbnode to the freelist.
600 	 */
601 	rw_enter(&mi->smi_hash_lk, RW_WRITER);
602 
603 	mutex_enter(&vp->v_lock);
604 	if (vp->v_count > 1) {
605 		VN_RELE_LOCKED(vp);
606 		mutex_exit(&vp->v_lock);
607 		rw_exit(&mi->smi_hash_lk);
608 		return;
609 	}
610 	mutex_exit(&vp->v_lock);
611 
612 	/*
613 	 * Put this node on the free list.
614 	 */
615 	mutex_enter(&smbfreelist_lock);
616 	if (smbfreelist == NULL) {
617 		np->r_freef = np;
618 		np->r_freeb = np;
619 		smbfreelist = np;
620 	} else {
621 		np->r_freef = smbfreelist;
622 		np->r_freeb = smbfreelist->r_freeb;
623 		smbfreelist->r_freeb->r_freef = np;
624 		smbfreelist->r_freeb = np;
625 	}
626 	mutex_exit(&smbfreelist_lock);
627 
628 	rw_exit(&mi->smi_hash_lk);
629 }
630 
631 /*
632  * Remove an smbnode from the free list.
633  *
634  * The caller must be holding smbfreelist_lock and the smbnode
635  * must be on the freelist.
636  *
637  * From NFS: nfs_subr.c:rp_rmfree
638  */
639 static void
640 sn_rmfree(smbnode_t *np)
641 {
642 
643 	ASSERT(MUTEX_HELD(&smbfreelist_lock));
644 	ASSERT(np->r_freef != NULL && np->r_freeb != NULL);
645 
646 	if (np == smbfreelist) {
647 		smbfreelist = np->r_freef;
648 		if (np == smbfreelist)
649 			smbfreelist = NULL;
650 	}
651 
652 	np->r_freeb->r_freef = np->r_freef;
653 	np->r_freef->r_freeb = np->r_freeb;
654 
655 	np->r_freef = np->r_freeb = NULL;
656 }
657 
658 /*
659  * Put an smbnode in the "hash" AVL tree.
660  *
661  * The caller must be hold the rwlock as writer.
662  *
663  * From NFS: nfs_subr.c:rp_addhash
664  */
665 static void
666 sn_addhash_locked(smbnode_t *np, avl_index_t where)
667 {
668 	smbmntinfo_t *mi = np->n_mount;
669 
670 	ASSERT(RW_WRITE_HELD(&mi->smi_hash_lk));
671 
672 	mutex_enter(&np->r_statelock);
673 	if ((np->r_flags & RHASHED) == 0) {
674 		avl_insert(&mi->smi_hash_avl, np, where);
675 		np->r_flags |= RHASHED;
676 	}
677 	mutex_exit(&np->r_statelock);
678 }
679 
680 /*
681  * Remove an smbnode from the "hash" AVL tree.
682  *
683  * The caller must hold the rwlock as writer.
684  *
685  * From NFS: nfs_subr.c:rp_rmhash_locked
686  */
687 static void
688 sn_rmhash_locked(smbnode_t *np)
689 {
690 	smbmntinfo_t *mi = np->n_mount;
691 
692 	ASSERT(RW_WRITE_HELD(&mi->smi_hash_lk));
693 
694 	mutex_enter(&np->r_statelock);
695 	if ((np->r_flags & RHASHED) != 0) {
696 		np->r_flags &= ~RHASHED;
697 		avl_remove(&mi->smi_hash_avl, np);
698 	}
699 	mutex_exit(&np->r_statelock);
700 }
701 
702 /*
703  * Remove an smbnode from the "hash" AVL tree.
704  *
705  * The caller must not be holding the rwlock.
706  */
707 void
708 smbfs_rmhash(smbnode_t *np)
709 {
710 	smbmntinfo_t *mi = np->n_mount;
711 
712 	rw_enter(&mi->smi_hash_lk, RW_WRITER);
713 	sn_rmhash_locked(np);
714 	rw_exit(&mi->smi_hash_lk);
715 }
716 
717 /*
718  * Lookup an smbnode by remote pathname
719  *
720  * The caller must be holding the AVL rwlock, either shared or exclusive.
721  *
722  * From NFS: nfs_subr.c:rfind
723  */
724 static smbnode_t *
725 sn_hashfind(
726 	smbmntinfo_t *mi,
727 	const char *rpath,
728 	int rplen,
729 	avl_index_t *pwhere) /* optional */
730 {
731 	smbfs_node_hdr_t nhdr;
732 	smbnode_t *np;
733 	vnode_t *vp;
734 
735 	ASSERT(RW_LOCK_HELD(&mi->smi_hash_lk));
736 
737 	bzero(&nhdr, sizeof (nhdr));
738 	nhdr.hdr_n_rpath = (char *)rpath;
739 	nhdr.hdr_n_rplen = rplen;
740 
741 	/* See smbfs_node_cmp below. */
742 	np = avl_find(&mi->smi_hash_avl, &nhdr, pwhere);
743 
744 	if (np == NULL)
745 		return (NULL);
746 
747 	/*
748 	 * Found it in the "hash" AVL tree.
749 	 * Remove from free list, if necessary.
750 	 */
751 	vp = SMBTOV(np);
752 	if (np->r_freef != NULL) {
753 		mutex_enter(&smbfreelist_lock);
754 		/*
755 		 * If the smbnode is on the freelist,
756 		 * then remove it and use that reference
757 		 * as the new reference.  Otherwise,
758 		 * need to increment the reference count.
759 		 */
760 		if (np->r_freef != NULL) {
761 			sn_rmfree(np);
762 			mutex_exit(&smbfreelist_lock);
763 		} else {
764 			mutex_exit(&smbfreelist_lock);
765 			VN_HOLD(vp);
766 		}
767 	} else
768 		VN_HOLD(vp);
769 
770 	return (np);
771 }
772 
773 static int
774 smbfs_node_cmp(const void *va, const void *vb)
775 {
776 	const smbfs_node_hdr_t *a = va;
777 	const smbfs_node_hdr_t *b = vb;
778 	int clen, diff;
779 
780 	/*
781 	 * Same semantics as strcmp, but does not
782 	 * assume the strings are null terminated.
783 	 */
784 	clen = (a->hdr_n_rplen < b->hdr_n_rplen) ?
785 	    a->hdr_n_rplen : b->hdr_n_rplen;
786 	diff = strncmp(a->hdr_n_rpath, b->hdr_n_rpath, clen);
787 	if (diff < 0)
788 		return (-1);
789 	if (diff > 0)
790 		return (1);
791 	/* they match through clen */
792 	if (b->hdr_n_rplen > clen)
793 		return (-1);
794 	if (a->hdr_n_rplen > clen)
795 		return (1);
796 	return (0);
797 }
798 
799 /*
800  * Setup the "hash" AVL tree used for our node cache.
801  * See: smbfs_mount, smbfs_destroy_table.
802  */
803 void
804 smbfs_init_hash_avl(avl_tree_t *avl)
805 {
806 	avl_create(avl, smbfs_node_cmp, sizeof (smbnode_t),
807 	    offsetof(smbnode_t, r_avl_node));
808 }
809 
810 /*
811  * Invalidate the cached attributes for all nodes "under" the
812  * passed-in node.  Note: the passed-in node is NOT affected by
813  * this call.  This is used both for files under some directory
814  * after the directory is deleted or renamed, and for extended
815  * attribute files (named streams) under a plain file after that
816  * file is renamed or deleted.
817  *
818  * Do this by walking the AVL tree starting at the passed in node,
819  * and continuing while the visited nodes have a path prefix matching
820  * the entire path of the passed-in node, and a separator just after
821  * that matching path prefix.  Watch out for cases where the AVL tree
822  * order may not exactly match the order of an FS walk, i.e.
823  * consider this sequence:
824  *	"foo"		(directory)
825  *	"foo bar"	(name containing a space)
826  *	"foo/bar"
827  * The walk needs to skip "foo bar" and keep going until it finds
828  * something that doesn't match the "foo" name prefix.
829  */
830 void
831 smbfs_attrcache_prune(smbnode_t *top_np)
832 {
833 	smbmntinfo_t *mi;
834 	smbnode_t *np;
835 	char *rpath;
836 	int rplen;
837 
838 	mi = top_np->n_mount;
839 	rw_enter(&mi->smi_hash_lk, RW_READER);
840 
841 	np = top_np;
842 	rpath = top_np->n_rpath;
843 	rplen = top_np->n_rplen;
844 	for (;;) {
845 		np = avl_walk(&mi->smi_hash_avl, np, AVL_AFTER);
846 		if (np == NULL)
847 			break;
848 		if (np->n_rplen < rplen)
849 			break;
850 		if (0 != strncmp(np->n_rpath, rpath, rplen))
851 			break;
852 		if (np->n_rplen > rplen && (
853 		    np->n_rpath[rplen] == ':' ||
854 		    np->n_rpath[rplen] == '\\'))
855 			smbfs_attrcache_remove(np);
856 	}
857 
858 	rw_exit(&mi->smi_hash_lk);
859 }
860 
861 #ifdef SMB_VNODE_DEBUG
862 int smbfs_check_table_debug = 1;
863 #else /* SMB_VNODE_DEBUG */
864 int smbfs_check_table_debug = 0;
865 #endif /* SMB_VNODE_DEBUG */
866 
867 
868 /*
869  * Return 1 if there is a active vnode belonging to this vfs in the
870  * smbnode cache.
871  *
872  * Several of these checks are done without holding the usual
873  * locks.  This is safe because destroy_smbtable(), smbfs_addfree(),
874  * etc. will redo the necessary checks before actually destroying
875  * any smbnodes.
876  *
877  * From NFS: nfs_subr.c:check_rtable
878  *
879  * Debugging changes here relative to NFS.
880  * Relatively harmless, so left 'em in.
881  */
882 int
883 smbfs_check_table(struct vfs *vfsp, smbnode_t *rtnp)
884 {
885 	smbmntinfo_t *mi;
886 	smbnode_t *np;
887 	vnode_t *vp;
888 	int busycnt = 0;
889 
890 	mi = VFTOSMI(vfsp);
891 	rw_enter(&mi->smi_hash_lk, RW_READER);
892 	for (np = avl_first(&mi->smi_hash_avl); np != NULL;
893 	    np = avl_walk(&mi->smi_hash_avl, np, AVL_AFTER)) {
894 
895 		if (np == rtnp)
896 			continue; /* skip the root */
897 		vp = SMBTOV(np);
898 
899 		/* Now the 'busy' checks: */
900 		/* Not on the free list? */
901 		if (np->r_freef == NULL) {
902 			SMBVDEBUG("!r_freef: node=0x%p, rpath=%s\n",
903 			    (void *)np, np->n_rpath);
904 			busycnt++;
905 		}
906 
907 		/* Has dirty pages? */
908 		if (vn_has_cached_data(vp) &&
909 		    (np->r_flags & RDIRTY)) {
910 			SMBVDEBUG("is dirty: node=0x%p, rpath=%s\n",
911 			    (void *)np, np->n_rpath);
912 			busycnt++;
913 		}
914 
915 		/* Other refs? (not reflected in v_count) */
916 		if (np->r_count > 0) {
917 			SMBVDEBUG("+r_count: node=0x%p, rpath=%s\n",
918 			    (void *)np, np->n_rpath);
919 			busycnt++;
920 		}
921 
922 		if (busycnt && !smbfs_check_table_debug)
923 			break;
924 
925 	}
926 	rw_exit(&mi->smi_hash_lk);
927 
928 	return (busycnt);
929 }
930 
931 /*
932  * Destroy inactive vnodes from the AVL tree which belong to this
933  * vfs.  It is essential that we destroy all inactive vnodes during a
934  * forced unmount as well as during a normal unmount.
935  *
936  * Based on NFS: nfs_subr.c:destroy_rtable
937  *
938  * In here, we're normally destrying all or most of the AVL tree,
939  * so the natural choice is to use avl_destroy_nodes.  However,
940  * there may be a few busy nodes that should remain in the AVL
941  * tree when we're done.  The solution: use a temporary tree to
942  * hold the busy nodes until we're done destroying the old tree,
943  * then copy the temporary tree over the (now emtpy) real tree.
944  */
945 void
946 smbfs_destroy_table(struct vfs *vfsp)
947 {
948 	avl_tree_t tmp_avl;
949 	smbmntinfo_t *mi;
950 	smbnode_t *np;
951 	smbnode_t *rlist;
952 	void *v;
953 
954 	mi = VFTOSMI(vfsp);
955 	rlist = NULL;
956 	smbfs_init_hash_avl(&tmp_avl);
957 
958 	rw_enter(&mi->smi_hash_lk, RW_WRITER);
959 	v = NULL;
960 	while ((np = avl_destroy_nodes(&mi->smi_hash_avl, &v)) != NULL) {
961 
962 		mutex_enter(&smbfreelist_lock);
963 		if (np->r_freef == NULL) {
964 			/*
965 			 * Busy node (not on the free list).
966 			 * Will keep in the final AVL tree.
967 			 */
968 			mutex_exit(&smbfreelist_lock);
969 			avl_add(&tmp_avl, np);
970 		} else {
971 			/*
972 			 * It's on the free list.  Remove and
973 			 * arrange for it to be destroyed.
974 			 */
975 			sn_rmfree(np);
976 			mutex_exit(&smbfreelist_lock);
977 
978 			/*
979 			 * Last part of sn_rmhash_locked().
980 			 * NB: avl_destroy_nodes has already
981 			 * removed this from the "hash" AVL.
982 			 */
983 			mutex_enter(&np->r_statelock);
984 			np->r_flags &= ~RHASHED;
985 			mutex_exit(&np->r_statelock);
986 
987 			/*
988 			 * Add to the list of nodes to destroy.
989 			 * Borrowing avl_child[0] for this list.
990 			 */
991 			np->r_avl_node.avl_child[0] =
992 			    (struct avl_node *)rlist;
993 			rlist = np;
994 		}
995 	}
996 	avl_destroy(&mi->smi_hash_avl);
997 
998 	/*
999 	 * Replace the (now destroyed) "hash" AVL with the
1000 	 * temporary AVL, which restores the busy nodes.
1001 	 */
1002 	mi->smi_hash_avl = tmp_avl;
1003 	rw_exit(&mi->smi_hash_lk);
1004 
1005 	/*
1006 	 * Now destroy the nodes on our temporary list (rlist).
1007 	 * This call to smbfs_addfree will end up destroying the
1008 	 * smbnode, but in a safe way with the appropriate set
1009 	 * of checks done.
1010 	 */
1011 	while ((np = rlist) != NULL) {
1012 		rlist = (smbnode_t *)np->r_avl_node.avl_child[0];
1013 		smbfs_addfree(np);
1014 	}
1015 }
1016 
1017 /*
1018  * This routine destroys all the resources associated with the smbnode
1019  * and then the smbnode itself.  Note: sn_inactive has been called.
1020  *
1021  * From NFS: nfs_subr.c:destroy_rnode
1022  */
1023 static void
1024 sn_destroy_node(smbnode_t *np)
1025 {
1026 	vnode_t *vp;
1027 	vfs_t *vfsp;
1028 
1029 	vp = SMBTOV(np);
1030 	vfsp = vp->v_vfsp;
1031 
1032 	ASSERT(vp->v_count == 1);
1033 	ASSERT(np->r_count == 0);
1034 	ASSERT(np->r_mapcnt == 0);
1035 	ASSERT(np->r_secattr.vsa_aclentp == NULL);
1036 	ASSERT(np->r_cred == NULL);
1037 	ASSERT(np->n_rpath == NULL);
1038 	ASSERT(!(np->r_flags & RHASHED));
1039 	ASSERT(np->r_freef == NULL && np->r_freeb == NULL);
1040 	atomic_dec_ulong((ulong_t *)&smbnodenew);
1041 	vn_invalid(vp);
1042 	vn_free(vp);
1043 	kmem_cache_free(smbnode_cache, np);
1044 	VFS_RELE(vfsp);
1045 }
1046 
1047 /*
1048  * From NFS rflush()
1049  * Flush all vnodes in this (or every) vfs.
1050  * Used by smbfs_sync and by smbfs_unmount.
1051  */
1052 /*ARGSUSED*/
1053 void
1054 smbfs_rflush(struct vfs *vfsp, cred_t *cr)
1055 {
1056 	smbmntinfo_t *mi;
1057 	smbnode_t *np;
1058 	vnode_t *vp, **vplist;
1059 	long num, cnt;
1060 
1061 	mi = VFTOSMI(vfsp);
1062 
1063 	/*
1064 	 * Check to see whether there is anything to do.
1065 	 */
1066 	num = avl_numnodes(&mi->smi_hash_avl);
1067 	if (num == 0)
1068 		return;
1069 
1070 	/*
1071 	 * Allocate a slot for all currently active rnodes on the
1072 	 * supposition that they all may need flushing.
1073 	 */
1074 	vplist = kmem_alloc(num * sizeof (*vplist), KM_SLEEP);
1075 	cnt = 0;
1076 
1077 	/*
1078 	 * Walk the AVL tree looking for rnodes with page
1079 	 * lists associated with them.  Make a list of these
1080 	 * files.
1081 	 */
1082 	rw_enter(&mi->smi_hash_lk, RW_READER);
1083 	for (np = avl_first(&mi->smi_hash_avl); np != NULL;
1084 	    np = avl_walk(&mi->smi_hash_avl, np, AVL_AFTER)) {
1085 		vp = SMBTOV(np);
1086 		/*
1087 		 * Don't bother sync'ing a vp if it
1088 		 * is part of virtual swap device or
1089 		 * if VFS is read-only
1090 		 */
1091 		if (IS_SWAPVP(vp) || vn_is_readonly(vp))
1092 			continue;
1093 		/*
1094 		 * If the vnode has pages and is marked as either
1095 		 * dirty or mmap'd, hold and add this vnode to the
1096 		 * list of vnodes to flush.
1097 		 */
1098 		if (vn_has_cached_data(vp) &&
1099 		    ((np->r_flags & RDIRTY) || np->r_mapcnt > 0)) {
1100 			VN_HOLD(vp);
1101 			vplist[cnt++] = vp;
1102 			if (cnt == num)
1103 				break;
1104 		}
1105 	}
1106 	rw_exit(&mi->smi_hash_lk);
1107 
1108 	/*
1109 	 * Flush and release all of the files on the list.
1110 	 */
1111 	while (cnt-- > 0) {
1112 		vp = vplist[cnt];
1113 		(void) VOP_PUTPAGE(vp, (u_offset_t)0, 0, B_ASYNC, cr, NULL);
1114 		VN_RELE(vp);
1115 	}
1116 
1117 	kmem_free(vplist, num * sizeof (vnode_t *));
1118 }
1119 
1120 /* Here NFS has access cache stuff (nfs_subr.c) not used here */
1121 
1122 /*
1123  * Set or Clear direct I/O flag
1124  * VOP_RWLOCK() is held for write access to prevent a race condition
1125  * which would occur if a process is in the middle of a write when
1126  * directio flag gets set. It is possible that all pages may not get flushed.
1127  * From nfs_common.c
1128  */
1129 
1130 /* ARGSUSED */
1131 int
1132 smbfs_directio(vnode_t *vp, int cmd, cred_t *cr)
1133 {
1134 	int	error = 0;
1135 	smbnode_t	*np;
1136 
1137 	np = VTOSMB(vp);
1138 
1139 	if (cmd == DIRECTIO_ON) {
1140 
1141 		if (np->r_flags & RDIRECTIO)
1142 			return (0);
1143 
1144 		/*
1145 		 * Flush the page cache.
1146 		 */
1147 
1148 		(void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL);
1149 
1150 		if (np->r_flags & RDIRECTIO) {
1151 			VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
1152 			return (0);
1153 		}
1154 
1155 		/* Here NFS also checks ->r_awcount */
1156 		if (vn_has_cached_data(vp) &&
1157 		    (np->r_flags & RDIRTY) != 0) {
1158 			error = VOP_PUTPAGE(vp, (offset_t)0, (uint_t)0,
1159 			    B_INVAL, cr, NULL);
1160 			if (error) {
1161 				if (error == ENOSPC || error == EDQUOT) {
1162 					mutex_enter(&np->r_statelock);
1163 					if (!np->r_error)
1164 						np->r_error = error;
1165 					mutex_exit(&np->r_statelock);
1166 				}
1167 				VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
1168 				return (error);
1169 			}
1170 		}
1171 
1172 		mutex_enter(&np->r_statelock);
1173 		np->r_flags |= RDIRECTIO;
1174 		mutex_exit(&np->r_statelock);
1175 		VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
1176 		return (0);
1177 	}
1178 
1179 	if (cmd == DIRECTIO_OFF) {
1180 		mutex_enter(&np->r_statelock);
1181 		np->r_flags &= ~RDIRECTIO;	/* disable direct mode */
1182 		mutex_exit(&np->r_statelock);
1183 		return (0);
1184 	}
1185 
1186 	return (EINVAL);
1187 }
1188 
1189 static kmutex_t smbfs_newnum_lock;
1190 static uint32_t smbfs_newnum_val = 0;
1191 
1192 /*
1193  * Return a number 0..0xffffffff that's different from the last
1194  * 0xffffffff numbers this returned.  Used for unlinked files.
1195  * From NFS nfs_subr.c newnum
1196  */
1197 uint32_t
1198 smbfs_newnum(void)
1199 {
1200 	uint32_t id;
1201 
1202 	mutex_enter(&smbfs_newnum_lock);
1203 	if (smbfs_newnum_val == 0)
1204 		smbfs_newnum_val = (uint32_t)gethrestime_sec();
1205 	id = smbfs_newnum_val++;
1206 	mutex_exit(&smbfs_newnum_lock);
1207 	return (id);
1208 }
1209 
1210 /*
1211  * Fill in a temporary name at buf
1212  */
1213 int
1214 smbfs_newname(char *buf, size_t buflen)
1215 {
1216 	uint_t id;
1217 	int n;
1218 
1219 	id = smbfs_newnum();
1220 	n = snprintf(buf, buflen, "~$smbfs%08X", id);
1221 	return (n);
1222 }
1223 
1224 
1225 /*
1226  * initialize resources that are used by smbfs_subr.c
1227  * this is called from the _init() routine (by the way of smbfs_clntinit())
1228  *
1229  * From NFS: nfs_subr.c:nfs_subrinit
1230  */
1231 int
1232 smbfs_subrinit(void)
1233 {
1234 	ulong_t nsmbnode_max;
1235 
1236 	/*
1237 	 * Allocate and initialize the smbnode cache
1238 	 */
1239 	if (nsmbnode <= 0)
1240 		nsmbnode = ncsize; /* dnlc.h */
1241 	nsmbnode_max = (ulong_t)((kmem_maxavail() >> 2) /
1242 	    sizeof (struct smbnode));
1243 	if (nsmbnode > nsmbnode_max || (nsmbnode == 0 && ncsize == 0)) {
1244 		cmn_err(CE_NOTE,
1245 		    "setting nsmbnode to max value of %ld", nsmbnode_max);
1246 		nsmbnode = nsmbnode_max;
1247 	}
1248 
1249 	smbnode_cache = kmem_cache_create("smbnode_cache", sizeof (smbnode_t),
1250 	    0, NULL, NULL, smbfs_kmem_reclaim, NULL, NULL, 0);
1251 
1252 	/*
1253 	 * Initialize the various mutexes and reader/writer locks
1254 	 */
1255 	mutex_init(&smbfreelist_lock, NULL, MUTEX_DEFAULT, NULL);
1256 	mutex_init(&smbfs_minor_lock, NULL, MUTEX_DEFAULT, NULL);
1257 
1258 	/*
1259 	 * Assign unique major number for all smbfs mounts
1260 	 */
1261 	if ((smbfs_major = getudev()) == -1) {
1262 		cmn_err(CE_WARN,
1263 		    "smbfs: init: can't get unique device number");
1264 		smbfs_major = 0;
1265 	}
1266 	smbfs_minor = 0;
1267 
1268 	return (0);
1269 }
1270 
1271 /*
1272  * free smbfs hash table, etc.
1273  * From NFS: nfs_subr.c:nfs_subrfini
1274  */
1275 void
1276 smbfs_subrfini(void)
1277 {
1278 
1279 	/*
1280 	 * Destroy the smbnode cache
1281 	 */
1282 	kmem_cache_destroy(smbnode_cache);
1283 
1284 	/*
1285 	 * Destroy the various mutexes and reader/writer locks
1286 	 */
1287 	mutex_destroy(&smbfreelist_lock);
1288 	mutex_destroy(&smbfs_minor_lock);
1289 }
1290 
1291 /* rddir_cache ? */
1292 
1293 /*
1294  * Support functions for smbfs_kmem_reclaim
1295  */
1296 
1297 static void
1298 smbfs_node_reclaim(void)
1299 {
1300 	smbmntinfo_t *mi;
1301 	smbnode_t *np;
1302 	vnode_t *vp;
1303 
1304 	mutex_enter(&smbfreelist_lock);
1305 	while ((np = smbfreelist) != NULL) {
1306 		sn_rmfree(np);
1307 		mutex_exit(&smbfreelist_lock);
1308 		if (np->r_flags & RHASHED) {
1309 			vp = SMBTOV(np);
1310 			mi = np->n_mount;
1311 			rw_enter(&mi->smi_hash_lk, RW_WRITER);
1312 			mutex_enter(&vp->v_lock);
1313 			if (vp->v_count > 1) {
1314 				VN_RELE_LOCKED(vp);
1315 				mutex_exit(&vp->v_lock);
1316 				rw_exit(&mi->smi_hash_lk);
1317 				mutex_enter(&smbfreelist_lock);
1318 				continue;
1319 			}
1320 			mutex_exit(&vp->v_lock);
1321 			sn_rmhash_locked(np);
1322 			rw_exit(&mi->smi_hash_lk);
1323 		}
1324 		/*
1325 		 * This call to smbfs_addfree will end up destroying the
1326 		 * smbnode, but in a safe way with the appropriate set
1327 		 * of checks done.
1328 		 */
1329 		smbfs_addfree(np);
1330 		mutex_enter(&smbfreelist_lock);
1331 	}
1332 	mutex_exit(&smbfreelist_lock);
1333 }
1334 
1335 /*
1336  * Called by kmem_cache_alloc ask us if we could
1337  * "Please give back some memory!"
1338  *
1339  * Todo: dump nodes from the free list?
1340  */
1341 /*ARGSUSED*/
1342 void
1343 smbfs_kmem_reclaim(void *cdrarg)
1344 {
1345 	smbfs_node_reclaim();
1346 }
1347 
1348 /*
1349  * Here NFS has failover stuff and
1350  * nfs_rw_xxx - see smbfs_rwlock.c
1351  */
1352