1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 *
25 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
26 * All rights reserved.
27 */
28 /*
29 * Copyright (c) 2017 by Delphix. All rights reserved.
30 * Copyright 2018 Nexenta Systems, Inc. All rights reserved.
31 * Copyright 2025 RackTop Systems, Inc.
32 */
33
34 /*
35 * Node hash implementation initially borrowed from NFS (nfs_subr.c)
36 * but then heavily modified. It's no longer an array of hash lists,
37 * but an AVL tree per mount point. More on this below.
38 */
39
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/time.h>
43 #include <sys/vnode.h>
44 #include <sys/atomic.h>
45 #include <sys/bitmap.h>
46 #include <sys/buf.h>
47 #include <sys/dnlc.h>
48 #include <sys/kmem.h>
49 #include <sys/sunddi.h>
50 #include <sys/sysmacros.h>
51 #include <sys/fcntl.h>
52
53 #include <netsmb/smb_osdep.h>
54
55 #include <netsmb/smb.h>
56 #include <netsmb/smb_conn.h>
57 #include <netsmb/smb_subr.h>
58 #include <netsmb/smb_rq.h>
59
60 #include <smbfs/smbfs.h>
61 #include <smbfs/smbfs_node.h>
62 #include <smbfs/smbfs_subr.h>
63
64 /*
65 * The AVL trees (now per-mount) allow finding an smbfs node by its
66 * full remote path name. It also allows easy traversal of all nodes
67 * below (path wise) any given node. A reader/writer lock for each
68 * (per mount) AVL tree is used to control access and to synchronize
69 * lookups, additions, and deletions from that AVL tree.
70 *
71 * Previously, this code use a global array of hash chains, each with
72 * its own rwlock. A few struct members, functions, and comments may
73 * still refer to a "hash", and those should all now be considered to
74 * refer to the per-mount AVL tree that replaced the old hash chains.
75 * (i.e. member smi_hash_lk, function sn_hashfind, etc.)
76 *
77 * The smbnode freelist is organized as a doubly linked list with
78 * a head pointer. Additions and deletions are synchronized via
79 * a single mutex.
80 *
81 * In order to add an smbnode to the free list, it must be linked into
82 * the mount's AVL tree and the exclusive lock for the AVL must be held.
83 * If an smbnode is not linked into the AVL tree, then it is destroyed
84 * because it represents no valuable information that can be reused
85 * about the file. The exclusive lock for the AVL tree must be held
86 * in order to prevent a lookup in the AVL tree from finding the
87 * smbnode and using it and assuming that the smbnode is not on the
88 * freelist. The lookup in the AVL tree will have the AVL tree lock
89 * held, either exclusive or shared.
90 *
91 * The vnode reference count for each smbnode is not allowed to drop
92 * below 1. This prevents external entities, such as the VM
93 * subsystem, from acquiring references to vnodes already on the
94 * freelist and then trying to place them back on the freelist
95 * when their reference is released. This means that the when an
96 * smbnode is looked up in the AVL tree, then either the smbnode
97 * is removed from the freelist and that reference is tranfered to
98 * the new reference or the vnode reference count must be incremented
99 * accordingly. The mutex for the freelist must be held in order to
100 * accurately test to see if the smbnode is on the freelist or not.
101 * The AVL tree lock might be held shared and it is possible that
102 * two different threads may race to remove the smbnode from the
103 * freelist. This race can be resolved by holding the mutex for the
104 * freelist. Please note that the mutex for the freelist does not
105 * need to held if the smbnode is not on the freelist. It can not be
106 * placed on the freelist due to the requirement that the thread
107 * putting the smbnode on the freelist must hold the exclusive lock
108 * for the AVL tree and the thread doing the lookup in the AVL tree
109 * is holding either a shared or exclusive lock for the AVL tree.
110 *
111 * The lock ordering is:
112 *
113 * AVL tree lock -> vnode lock
114 * AVL tree lock -> freelist lock
115 */
116
117 static kmutex_t smbfreelist_lock;
118 static smbnode_t *smbfreelist = NULL;
119 static ulong_t smbnodenew = 0;
120 long nsmbnode = 0;
121
122 static struct kmem_cache *smbnode_cache;
123
124 static const vsecattr_t smbfs_vsa0 = { 0 };
125
126 /*
127 * Mutex to protect the following variables:
128 * smbfs_major
129 * smbfs_minor
130 */
131 kmutex_t smbfs_minor_lock;
132 int smbfs_major;
133 int smbfs_minor;
134
135 /* See smbfs_node_findcreate() */
136 struct smbfattr smbfs_fattr0;
137
138 /*
139 * Local functions.
140 * SN for Smb Node
141 */
142 static void sn_rmfree(smbnode_t *);
143 static void sn_inactive(smbnode_t *);
144 static void sn_addhash_locked(smbnode_t *, avl_index_t);
145 static void sn_rmhash_locked(smbnode_t *);
146 static void sn_destroy_node(smbnode_t *);
147 void smbfs_kmem_reclaim(void *cdrarg);
148
149 static smbnode_t *
150 sn_hashfind(smbmntinfo_t *, const char *, int, avl_index_t *);
151
152 static smbnode_t *
153 make_smbnode(smbmntinfo_t *, const char *, int, int *);
154
155 /*
156 * Free the resources associated with an smbnode.
157 * Note: This is different from smbfs_inactive
158 *
159 * From NFS: nfs_subr.c:rinactive
160 */
161 static void
sn_inactive(smbnode_t * np)162 sn_inactive(smbnode_t *np)
163 {
164 vsecattr_t ovsa;
165 cred_t *oldcr;
166 char *orpath;
167 int orplen;
168 vnode_t *vp;
169
170 /*
171 * smbfs_close should already have cleaned out any FIDs.
172 */
173 ASSERT3P(np->n_fid, ==, NULL);
174 ASSERT3P(np->n_dirseq, ==, NULL);
175
176 /*
177 * Here NFS has:
178 * Flush and invalidate all pages (done by caller)
179 * Free any held credentials and caches...
180 * etc. (See NFS code)
181 */
182 mutex_enter(&np->r_statelock);
183
184 ovsa = np->r_secattr;
185 np->r_secattr = smbfs_vsa0;
186 np->r_sectime = 0;
187
188 oldcr = np->r_cred;
189 np->r_cred = NULL;
190
191 orpath = np->n_rpath;
192 orplen = np->n_rplen;
193 np->n_rpath = NULL;
194 np->n_rplen = 0;
195
196 mutex_exit(&np->r_statelock);
197
198 vp = SMBTOV(np);
199 if (vn_has_cached_data(vp)) {
200 ASSERT3P(vp, ==, NULL);
201 }
202
203 if (ovsa.vsa_aclentp != NULL)
204 kmem_free(ovsa.vsa_aclentp, ovsa.vsa_aclentsz);
205
206 if (oldcr != NULL)
207 crfree(oldcr);
208
209 if (orpath != NULL)
210 kmem_free(orpath, orplen + 1);
211 }
212
213 /*
214 * Find and optionally create an smbnode for the passed
215 * mountinfo, directory, separator, and name. If the
216 * desired smbnode already exists, return a reference.
217 * If the file attributes pointer is non-null, the node
218 * is created if necessary and linked into the AVL tree.
219 *
220 * Callers that need a node created but don't have the
221 * real attributes pass smbfs_fattr0 to force creation.
222 *
223 * Note: make_smbnode() may upgrade the "hash" lock to exclusive.
224 *
225 * Based on NFS: nfs_subr.c:makenfsnode
226 */
227 smbnode_t *
smbfs_node_findcreate(smbmntinfo_t * mi,const char * dirnm,int dirlen,const char * name,int nmlen,char sep,struct smbfattr * fap)228 smbfs_node_findcreate(
229 smbmntinfo_t *mi,
230 const char *dirnm,
231 int dirlen,
232 const char *name,
233 int nmlen,
234 char sep,
235 struct smbfattr *fap)
236 {
237 char tmpbuf[256];
238 size_t rpalloc;
239 char *p, *rpath;
240 int rplen;
241 smbnode_t *np;
242 vnode_t *vp;
243 int newnode;
244
245 /*
246 * Build the search string, either in tmpbuf or
247 * in allocated memory if larger than tmpbuf.
248 */
249 rplen = dirlen;
250 if (sep != '\0')
251 rplen++;
252 rplen += nmlen;
253 if (rplen < sizeof (tmpbuf)) {
254 /* use tmpbuf */
255 rpalloc = 0;
256 rpath = tmpbuf;
257 } else {
258 rpalloc = rplen + 1;
259 rpath = kmem_alloc(rpalloc, KM_SLEEP);
260 }
261 p = rpath;
262 bcopy(dirnm, p, dirlen);
263 p += dirlen;
264 if (sep != '\0')
265 *p++ = sep;
266 if (name != NULL) {
267 bcopy(name, p, nmlen);
268 p += nmlen;
269 }
270 ASSERT(p == rpath + rplen);
271
272 /*
273 * Find or create a node with this path.
274 */
275 rw_enter(&mi->smi_hash_lk, RW_READER);
276 if (fap == NULL)
277 np = sn_hashfind(mi, rpath, rplen, NULL);
278 else
279 np = make_smbnode(mi, rpath, rplen, &newnode);
280 rw_exit(&mi->smi_hash_lk);
281
282 if (rpalloc)
283 kmem_free(rpath, rpalloc);
284
285 if (fap == NULL) {
286 /*
287 * Caller is "just looking" (no create)
288 * so np may or may not be NULL here.
289 * Either way, we're done.
290 */
291 return (np);
292 }
293
294 /*
295 * We should have a node, possibly created.
296 * Do we have (real) attributes to apply?
297 */
298 ASSERT(np != NULL);
299 if (fap == &smbfs_fattr0)
300 return (np);
301
302 /*
303 * Apply the given attributes to this node,
304 * dealing with any cache impact, etc.
305 */
306 vp = SMBTOV(np);
307 smbfs_attrcache_fa(vp, fap);
308
309 /*
310 * Note NFS sets vp->v_type here, assuming it
311 * can never change for the life of a node.
312 * We allow v_type to change, and set it in
313 * smbfs_attrcache(). Also: mode, uid, gid
314 */
315 return (np);
316 }
317
318 /*
319 * Here NFS has: nfs_subr.c:rtablehash
320 * We use smbfs_hash().
321 */
322
323 /*
324 * Find or create an smbnode.
325 * From NFS: nfs_subr.c:make_rnode
326 */
327 static smbnode_t *
make_smbnode(smbmntinfo_t * mi,const char * rpath,int rplen,int * newnode)328 make_smbnode(
329 smbmntinfo_t *mi,
330 const char *rpath,
331 int rplen,
332 int *newnode)
333 {
334 smbnode_t *np;
335 smbnode_t *tnp;
336 vnode_t *vp;
337 vfs_t *vfsp;
338 avl_index_t where;
339 char *new_rpath = NULL;
340
341 ASSERT(RW_READ_HELD(&mi->smi_hash_lk));
342 vfsp = mi->smi_vfsp;
343
344 start:
345 np = sn_hashfind(mi, rpath, rplen, NULL);
346 if (np != NULL) {
347 *newnode = 0;
348 return (np);
349 }
350
351 /* Note: will retake this lock below. */
352 rw_exit(&mi->smi_hash_lk);
353
354 /*
355 * see if we can find something on the freelist
356 */
357 mutex_enter(&smbfreelist_lock);
358 if (smbfreelist != NULL && smbnodenew >= nsmbnode) {
359 np = smbfreelist;
360 sn_rmfree(np);
361 mutex_exit(&smbfreelist_lock);
362
363 vp = SMBTOV(np);
364
365 if (np->r_flags & RHASHED) {
366 smbmntinfo_t *tmp_mi = np->n_mount;
367 ASSERT(tmp_mi != NULL);
368 rw_enter(&tmp_mi->smi_hash_lk, RW_WRITER);
369 mutex_enter(&vp->v_lock);
370 if (vp->v_count > 1) {
371 VN_RELE_LOCKED(vp);
372 mutex_exit(&vp->v_lock);
373 rw_exit(&tmp_mi->smi_hash_lk);
374 /* start over */
375 rw_enter(&mi->smi_hash_lk, RW_READER);
376 goto start;
377 }
378 mutex_exit(&vp->v_lock);
379 sn_rmhash_locked(np);
380 rw_exit(&tmp_mi->smi_hash_lk);
381 }
382
383 sn_inactive(np);
384
385 mutex_enter(&vp->v_lock);
386 if (vp->v_count > 1) {
387 VN_RELE_LOCKED(vp);
388 mutex_exit(&vp->v_lock);
389 rw_enter(&mi->smi_hash_lk, RW_READER);
390 goto start;
391 }
392 mutex_exit(&vp->v_lock);
393 vn_invalid(vp);
394 /*
395 * destroy old locks before bzero'ing and
396 * recreating the locks below.
397 */
398 smbfs_rw_destroy(&np->r_rwlock);
399 smbfs_rw_destroy(&np->r_lkserlock);
400 mutex_destroy(&np->r_statelock);
401 cv_destroy(&np->r_cv);
402 /*
403 * Make sure that if smbnode is recycled then
404 * VFS count is decremented properly before
405 * reuse.
406 */
407 VFS_RELE(vp->v_vfsp);
408 vn_reinit(vp);
409 } else {
410 /*
411 * allocate and initialize a new smbnode
412 */
413 vnode_t *new_vp;
414
415 mutex_exit(&smbfreelist_lock);
416
417 np = kmem_cache_alloc(smbnode_cache, KM_SLEEP);
418 new_vp = vn_alloc(KM_SLEEP);
419
420 atomic_inc_ulong((ulong_t *)&smbnodenew);
421 vp = new_vp;
422 }
423
424 /*
425 * Allocate and copy the rpath we'll need below.
426 */
427 new_rpath = kmem_alloc(rplen + 1, KM_SLEEP);
428 bcopy(rpath, new_rpath, rplen);
429 new_rpath[rplen] = '\0';
430
431 /* Initialize smbnode_t */
432 bzero(np, sizeof (*np));
433
434 smbfs_rw_init(&np->r_rwlock, NULL, RW_DEFAULT, NULL);
435 smbfs_rw_init(&np->r_lkserlock, NULL, RW_DEFAULT, NULL);
436 mutex_init(&np->r_statelock, NULL, MUTEX_DEFAULT, NULL);
437 cv_init(&np->r_cv, NULL, CV_DEFAULT, NULL);
438 /* cv_init(&np->r_commit.c_cv, NULL, CV_DEFAULT, NULL); */
439
440 np->r_vnode = vp;
441 np->n_mount = mi;
442
443 np->n_fid = NULL;
444 np->n_uid = mi->smi_uid;
445 np->n_gid = mi->smi_gid;
446 /* Leave attributes "stale." */
447
448 /*
449 * Here NFS has avl_create(&np->r_dir, ...)
450 * for the readdir cache (not used here).
451 */
452
453 /* Now fill in the vnode. */
454 vn_setops(vp, smbfs_vnodeops);
455 vp->v_data = (caddr_t)np;
456 VFS_HOLD(vfsp);
457 vp->v_vfsp = vfsp;
458 vp->v_type = VNON;
459
460 /*
461 * We entered with mi->smi_hash_lk held (reader).
462 * Retake it now, (as the writer).
463 * Will return with it held.
464 */
465 rw_enter(&mi->smi_hash_lk, RW_WRITER);
466
467 /*
468 * There is a race condition where someone else
469 * may alloc the smbnode while no locks are held,
470 * so check again and recover if found.
471 */
472 tnp = sn_hashfind(mi, rpath, rplen, &where);
473 if (tnp != NULL) {
474 /*
475 * Lost the race. Put the node we were building
476 * on the free list and return the one we found.
477 */
478 rw_exit(&mi->smi_hash_lk);
479 kmem_free(new_rpath, rplen + 1);
480 smbfs_addfree(np);
481 rw_enter(&mi->smi_hash_lk, RW_READER);
482 *newnode = 0;
483 return (tnp);
484 }
485
486 /*
487 * Hash search identifies nodes by the remote path
488 * (n_rpath) so fill that in now, before linking
489 * this node into the node cache (AVL tree).
490 */
491 np->n_rpath = new_rpath;
492 np->n_rplen = rplen;
493 np->n_ino = smbfs_gethash(new_rpath, rplen);
494
495 sn_addhash_locked(np, where);
496 *newnode = 1;
497 return (np);
498 }
499
500 /*
501 * smbfs_addfree
502 * Put an smbnode on the free list, or destroy it immediately
503 * if it offers no value were it to be reclaimed later. Also
504 * destroy immediately when we have too many smbnodes, etc.
505 *
506 * Normally called by smbfs_inactive, but also
507 * called in here during cleanup operations.
508 *
509 * From NFS: nfs_subr.c:rp_addfree
510 */
511 void
smbfs_addfree(smbnode_t * np)512 smbfs_addfree(smbnode_t *np)
513 {
514 vnode_t *vp;
515 struct vfs *vfsp;
516 smbmntinfo_t *mi;
517
518 ASSERT(np->r_freef == NULL && np->r_freeb == NULL);
519
520 vp = SMBTOV(np);
521 ASSERT(vp->v_count >= 1);
522
523 vfsp = vp->v_vfsp;
524 mi = VFTOSMI(vfsp);
525
526 /*
527 * If there are no more references to this smbnode and:
528 * we have too many smbnodes allocated, or if the node
529 * is no longer accessible via the AVL tree (!RHASHED),
530 * or an i/o error occurred while writing to the file,
531 * or it's part of an unmounted FS, then try to destroy
532 * it instead of putting it on the smbnode freelist.
533 */
534 if (np->r_count == 0 && (
535 (np->r_flags & RHASHED) == 0 ||
536 (np->r_error != 0) ||
537 (vfsp->vfs_flag & VFS_UNMOUNTED) ||
538 (smbnodenew > nsmbnode))) {
539
540 /* Try to destroy this node. */
541
542 if (np->r_flags & RHASHED) {
543 rw_enter(&mi->smi_hash_lk, RW_WRITER);
544 mutex_enter(&vp->v_lock);
545 if (vp->v_count > 1) {
546 VN_RELE_LOCKED(vp);
547 mutex_exit(&vp->v_lock);
548 rw_exit(&mi->smi_hash_lk);
549 return;
550 /*
551 * Will get another call later,
552 * via smbfs_inactive.
553 */
554 }
555 mutex_exit(&vp->v_lock);
556 sn_rmhash_locked(np);
557 rw_exit(&mi->smi_hash_lk);
558 }
559
560 sn_inactive(np);
561
562 /*
563 * Recheck the vnode reference count. We need to
564 * make sure that another reference has not been
565 * acquired while we were not holding v_lock. The
566 * smbnode is not in the smbnode "hash" AVL tree, so
567 * the only way for a reference to have been acquired
568 * is for a VOP_PUTPAGE because the smbnode was marked
569 * with RDIRTY or for a modified page. This vnode
570 * reference may have been acquired before our call
571 * to sn_inactive. The i/o may have been completed,
572 * thus allowing sn_inactive to complete, but the
573 * reference to the vnode may not have been released
574 * yet. In any case, the smbnode can not be destroyed
575 * until the other references to this vnode have been
576 * released. The other references will take care of
577 * either destroying the smbnode or placing it on the
578 * smbnode freelist. If there are no other references,
579 * then the smbnode may be safely destroyed.
580 */
581 mutex_enter(&vp->v_lock);
582 if (vp->v_count > 1) {
583 VN_RELE_LOCKED(vp);
584 mutex_exit(&vp->v_lock);
585 return;
586 }
587 mutex_exit(&vp->v_lock);
588
589 sn_destroy_node(np);
590 return;
591 }
592
593 /*
594 * Lock the AVL tree and then recheck the reference count
595 * to ensure that no other threads have acquired a reference
596 * to indicate that the smbnode should not be placed on the
597 * freelist. If another reference has been acquired, then
598 * just release this one and let the other thread complete
599 * the processing of adding this smbnode to the freelist.
600 */
601 rw_enter(&mi->smi_hash_lk, RW_WRITER);
602
603 mutex_enter(&vp->v_lock);
604 if (vp->v_count > 1) {
605 VN_RELE_LOCKED(vp);
606 mutex_exit(&vp->v_lock);
607 rw_exit(&mi->smi_hash_lk);
608 return;
609 }
610 mutex_exit(&vp->v_lock);
611
612 /*
613 * Put this node on the free list.
614 */
615 mutex_enter(&smbfreelist_lock);
616 if (smbfreelist == NULL) {
617 np->r_freef = np;
618 np->r_freeb = np;
619 smbfreelist = np;
620 } else {
621 np->r_freef = smbfreelist;
622 np->r_freeb = smbfreelist->r_freeb;
623 smbfreelist->r_freeb->r_freef = np;
624 smbfreelist->r_freeb = np;
625 }
626 mutex_exit(&smbfreelist_lock);
627
628 rw_exit(&mi->smi_hash_lk);
629 }
630
631 /*
632 * Remove an smbnode from the free list.
633 *
634 * The caller must be holding smbfreelist_lock and the smbnode
635 * must be on the freelist.
636 *
637 * From NFS: nfs_subr.c:rp_rmfree
638 */
639 static void
sn_rmfree(smbnode_t * np)640 sn_rmfree(smbnode_t *np)
641 {
642
643 ASSERT(MUTEX_HELD(&smbfreelist_lock));
644 ASSERT(np->r_freef != NULL && np->r_freeb != NULL);
645
646 if (np == smbfreelist) {
647 smbfreelist = np->r_freef;
648 if (np == smbfreelist)
649 smbfreelist = NULL;
650 }
651
652 np->r_freeb->r_freef = np->r_freef;
653 np->r_freef->r_freeb = np->r_freeb;
654
655 np->r_freef = np->r_freeb = NULL;
656 }
657
658 /*
659 * Put an smbnode in the "hash" AVL tree.
660 *
661 * The caller must be hold the rwlock as writer.
662 *
663 * From NFS: nfs_subr.c:rp_addhash
664 */
665 static void
sn_addhash_locked(smbnode_t * np,avl_index_t where)666 sn_addhash_locked(smbnode_t *np, avl_index_t where)
667 {
668 smbmntinfo_t *mi = np->n_mount;
669
670 ASSERT(RW_WRITE_HELD(&mi->smi_hash_lk));
671
672 mutex_enter(&np->r_statelock);
673 if ((np->r_flags & RHASHED) == 0) {
674 avl_insert(&mi->smi_hash_avl, np, where);
675 np->r_flags |= RHASHED;
676 }
677 mutex_exit(&np->r_statelock);
678 }
679
680 /*
681 * Remove an smbnode from the "hash" AVL tree.
682 *
683 * The caller must hold the rwlock as writer.
684 *
685 * From NFS: nfs_subr.c:rp_rmhash_locked
686 */
687 static void
sn_rmhash_locked(smbnode_t * np)688 sn_rmhash_locked(smbnode_t *np)
689 {
690 smbmntinfo_t *mi = np->n_mount;
691
692 ASSERT(RW_WRITE_HELD(&mi->smi_hash_lk));
693
694 mutex_enter(&np->r_statelock);
695 if ((np->r_flags & RHASHED) != 0) {
696 np->r_flags &= ~RHASHED;
697 avl_remove(&mi->smi_hash_avl, np);
698 }
699 mutex_exit(&np->r_statelock);
700 }
701
702 /*
703 * Remove an smbnode from the "hash" AVL tree.
704 *
705 * The caller must not be holding the rwlock.
706 */
707 void
smbfs_rmhash(smbnode_t * np)708 smbfs_rmhash(smbnode_t *np)
709 {
710 smbmntinfo_t *mi = np->n_mount;
711
712 rw_enter(&mi->smi_hash_lk, RW_WRITER);
713 sn_rmhash_locked(np);
714 rw_exit(&mi->smi_hash_lk);
715 }
716
717 /*
718 * Lookup an smbnode by remote pathname
719 *
720 * The caller must be holding the AVL rwlock, either shared or exclusive.
721 *
722 * From NFS: nfs_subr.c:rfind
723 */
724 static smbnode_t *
sn_hashfind(smbmntinfo_t * mi,const char * rpath,int rplen,avl_index_t * pwhere)725 sn_hashfind(
726 smbmntinfo_t *mi,
727 const char *rpath,
728 int rplen,
729 avl_index_t *pwhere) /* optional */
730 {
731 smbfs_node_hdr_t nhdr;
732 smbnode_t *np;
733 vnode_t *vp;
734
735 ASSERT(RW_LOCK_HELD(&mi->smi_hash_lk));
736
737 bzero(&nhdr, sizeof (nhdr));
738 nhdr.hdr_n_rpath = (char *)rpath;
739 nhdr.hdr_n_rplen = rplen;
740
741 /* See smbfs_node_cmp below. */
742 np = avl_find(&mi->smi_hash_avl, &nhdr, pwhere);
743
744 if (np == NULL)
745 return (NULL);
746
747 /*
748 * Found it in the "hash" AVL tree.
749 * Remove from free list, if necessary.
750 */
751 vp = SMBTOV(np);
752 if (np->r_freef != NULL) {
753 mutex_enter(&smbfreelist_lock);
754 /*
755 * If the smbnode is on the freelist,
756 * then remove it and use that reference
757 * as the new reference. Otherwise,
758 * need to increment the reference count.
759 */
760 if (np->r_freef != NULL) {
761 sn_rmfree(np);
762 mutex_exit(&smbfreelist_lock);
763 } else {
764 mutex_exit(&smbfreelist_lock);
765 VN_HOLD(vp);
766 }
767 } else
768 VN_HOLD(vp);
769
770 return (np);
771 }
772
773 static int
smbfs_node_cmp(const void * va,const void * vb)774 smbfs_node_cmp(const void *va, const void *vb)
775 {
776 const smbfs_node_hdr_t *a = va;
777 const smbfs_node_hdr_t *b = vb;
778 int clen, diff;
779
780 /*
781 * Same semantics as strcmp, but does not
782 * assume the strings are null terminated.
783 */
784 clen = (a->hdr_n_rplen < b->hdr_n_rplen) ?
785 a->hdr_n_rplen : b->hdr_n_rplen;
786 diff = strncmp(a->hdr_n_rpath, b->hdr_n_rpath, clen);
787 if (diff < 0)
788 return (-1);
789 if (diff > 0)
790 return (1);
791 /* they match through clen */
792 if (b->hdr_n_rplen > clen)
793 return (-1);
794 if (a->hdr_n_rplen > clen)
795 return (1);
796 return (0);
797 }
798
799 /*
800 * Setup the "hash" AVL tree used for our node cache.
801 * See: smbfs_mount, smbfs_destroy_table.
802 */
803 void
smbfs_init_hash_avl(avl_tree_t * avl)804 smbfs_init_hash_avl(avl_tree_t *avl)
805 {
806 avl_create(avl, smbfs_node_cmp, sizeof (smbnode_t),
807 offsetof(smbnode_t, r_avl_node));
808 }
809
810 /*
811 * Invalidate the cached attributes for all nodes "under" the
812 * passed-in node. Note: the passed-in node is NOT affected by
813 * this call. This is used both for files under some directory
814 * after the directory is deleted or renamed, and for extended
815 * attribute files (named streams) under a plain file after that
816 * file is renamed or deleted.
817 *
818 * Do this by walking the AVL tree starting at the passed in node,
819 * and continuing while the visited nodes have a path prefix matching
820 * the entire path of the passed-in node, and a separator just after
821 * that matching path prefix. Watch out for cases where the AVL tree
822 * order may not exactly match the order of an FS walk, i.e.
823 * consider this sequence:
824 * "foo" (directory)
825 * "foo bar" (name containing a space)
826 * "foo/bar"
827 * The walk needs to skip "foo bar" and keep going until it finds
828 * something that doesn't match the "foo" name prefix.
829 */
830 void
smbfs_attrcache_prune(smbnode_t * top_np)831 smbfs_attrcache_prune(smbnode_t *top_np)
832 {
833 smbmntinfo_t *mi;
834 smbnode_t *np;
835 char *rpath;
836 int rplen;
837
838 mi = top_np->n_mount;
839 rw_enter(&mi->smi_hash_lk, RW_READER);
840
841 np = top_np;
842 rpath = top_np->n_rpath;
843 rplen = top_np->n_rplen;
844 for (;;) {
845 np = avl_walk(&mi->smi_hash_avl, np, AVL_AFTER);
846 if (np == NULL)
847 break;
848 if (np->n_rplen < rplen)
849 break;
850 if (0 != strncmp(np->n_rpath, rpath, rplen))
851 break;
852 if (np->n_rplen > rplen && (
853 np->n_rpath[rplen] == ':' ||
854 np->n_rpath[rplen] == '\\'))
855 smbfs_attrcache_remove(np);
856 }
857
858 rw_exit(&mi->smi_hash_lk);
859 }
860
861 #ifdef SMB_VNODE_DEBUG
862 int smbfs_check_table_debug = 1;
863 #else /* SMB_VNODE_DEBUG */
864 int smbfs_check_table_debug = 0;
865 #endif /* SMB_VNODE_DEBUG */
866
867
868 /*
869 * Return 1 if there is a active vnode belonging to this vfs in the
870 * smbnode cache.
871 *
872 * Several of these checks are done without holding the usual
873 * locks. This is safe because destroy_smbtable(), smbfs_addfree(),
874 * etc. will redo the necessary checks before actually destroying
875 * any smbnodes.
876 *
877 * From NFS: nfs_subr.c:check_rtable
878 *
879 * Debugging changes here relative to NFS.
880 * Relatively harmless, so left 'em in.
881 */
882 int
smbfs_check_table(struct vfs * vfsp,smbnode_t * rtnp)883 smbfs_check_table(struct vfs *vfsp, smbnode_t *rtnp)
884 {
885 smbmntinfo_t *mi;
886 smbnode_t *np;
887 vnode_t *vp;
888 int busycnt = 0;
889
890 mi = VFTOSMI(vfsp);
891 rw_enter(&mi->smi_hash_lk, RW_READER);
892 for (np = avl_first(&mi->smi_hash_avl); np != NULL;
893 np = avl_walk(&mi->smi_hash_avl, np, AVL_AFTER)) {
894
895 if (np == rtnp)
896 continue; /* skip the root */
897 vp = SMBTOV(np);
898
899 /* Now the 'busy' checks: */
900 /* Not on the free list? */
901 if (np->r_freef == NULL) {
902 SMBVDEBUG("!r_freef: node=0x%p, rpath=%s\n",
903 (void *)np, np->n_rpath);
904 busycnt++;
905 }
906
907 /* Has dirty pages? */
908 if (vn_has_cached_data(vp) &&
909 (np->r_flags & RDIRTY)) {
910 SMBVDEBUG("is dirty: node=0x%p, rpath=%s\n",
911 (void *)np, np->n_rpath);
912 busycnt++;
913 }
914
915 /* Other refs? (not reflected in v_count) */
916 if (np->r_count > 0) {
917 SMBVDEBUG("+r_count: node=0x%p, rpath=%s\n",
918 (void *)np, np->n_rpath);
919 busycnt++;
920 }
921
922 if (busycnt && !smbfs_check_table_debug)
923 break;
924
925 }
926 rw_exit(&mi->smi_hash_lk);
927
928 return (busycnt);
929 }
930
931 /*
932 * Destroy inactive vnodes from the AVL tree which belong to this
933 * vfs. It is essential that we destroy all inactive vnodes during a
934 * forced unmount as well as during a normal unmount.
935 *
936 * Based on NFS: nfs_subr.c:destroy_rtable
937 *
938 * In here, we're normally destrying all or most of the AVL tree,
939 * so the natural choice is to use avl_destroy_nodes. However,
940 * there may be a few busy nodes that should remain in the AVL
941 * tree when we're done. The solution: use a temporary tree to
942 * hold the busy nodes until we're done destroying the old tree,
943 * then copy the temporary tree over the (now emtpy) real tree.
944 */
945 void
smbfs_destroy_table(struct vfs * vfsp)946 smbfs_destroy_table(struct vfs *vfsp)
947 {
948 avl_tree_t tmp_avl;
949 smbmntinfo_t *mi;
950 smbnode_t *np;
951 smbnode_t *rlist;
952 void *v;
953
954 mi = VFTOSMI(vfsp);
955 rlist = NULL;
956 smbfs_init_hash_avl(&tmp_avl);
957
958 rw_enter(&mi->smi_hash_lk, RW_WRITER);
959 v = NULL;
960 while ((np = avl_destroy_nodes(&mi->smi_hash_avl, &v)) != NULL) {
961
962 mutex_enter(&smbfreelist_lock);
963 if (np->r_freef == NULL) {
964 /*
965 * Busy node (not on the free list).
966 * Will keep in the final AVL tree.
967 */
968 mutex_exit(&smbfreelist_lock);
969 avl_add(&tmp_avl, np);
970 } else {
971 /*
972 * It's on the free list. Remove and
973 * arrange for it to be destroyed.
974 */
975 sn_rmfree(np);
976 mutex_exit(&smbfreelist_lock);
977
978 /*
979 * Last part of sn_rmhash_locked().
980 * NB: avl_destroy_nodes has already
981 * removed this from the "hash" AVL.
982 */
983 mutex_enter(&np->r_statelock);
984 np->r_flags &= ~RHASHED;
985 mutex_exit(&np->r_statelock);
986
987 /*
988 * Add to the list of nodes to destroy.
989 * Borrowing avl_child[0] for this list.
990 */
991 np->r_avl_node.avl_child[0] =
992 (struct avl_node *)rlist;
993 rlist = np;
994 }
995 }
996 avl_destroy(&mi->smi_hash_avl);
997
998 /*
999 * Replace the (now destroyed) "hash" AVL with the
1000 * temporary AVL, which restores the busy nodes.
1001 */
1002 mi->smi_hash_avl = tmp_avl;
1003 rw_exit(&mi->smi_hash_lk);
1004
1005 /*
1006 * Now destroy the nodes on our temporary list (rlist).
1007 * This call to smbfs_addfree will end up destroying the
1008 * smbnode, but in a safe way with the appropriate set
1009 * of checks done.
1010 */
1011 while ((np = rlist) != NULL) {
1012 rlist = (smbnode_t *)np->r_avl_node.avl_child[0];
1013 smbfs_addfree(np);
1014 }
1015 }
1016
1017 /*
1018 * This routine destroys all the resources associated with the smbnode
1019 * and then the smbnode itself. Note: sn_inactive has been called.
1020 *
1021 * From NFS: nfs_subr.c:destroy_rnode
1022 */
1023 static void
sn_destroy_node(smbnode_t * np)1024 sn_destroy_node(smbnode_t *np)
1025 {
1026 vnode_t *vp;
1027 vfs_t *vfsp;
1028
1029 vp = SMBTOV(np);
1030 vfsp = vp->v_vfsp;
1031
1032 ASSERT(vp->v_count == 1);
1033 ASSERT(np->r_count == 0);
1034 ASSERT(np->r_mapcnt == 0);
1035 ASSERT(np->r_secattr.vsa_aclentp == NULL);
1036 ASSERT(np->r_cred == NULL);
1037 ASSERT(np->n_rpath == NULL);
1038 ASSERT(!(np->r_flags & RHASHED));
1039 ASSERT(np->r_freef == NULL && np->r_freeb == NULL);
1040 atomic_dec_ulong((ulong_t *)&smbnodenew);
1041 vn_invalid(vp);
1042 vn_free(vp);
1043 kmem_cache_free(smbnode_cache, np);
1044 VFS_RELE(vfsp);
1045 }
1046
1047 /*
1048 * From NFS rflush()
1049 * Flush all vnodes in this (or every) vfs.
1050 * Used by smbfs_sync and by smbfs_unmount.
1051 */
1052 /*ARGSUSED*/
1053 void
smbfs_rflush(struct vfs * vfsp,cred_t * cr)1054 smbfs_rflush(struct vfs *vfsp, cred_t *cr)
1055 {
1056 smbmntinfo_t *mi;
1057 smbnode_t *np;
1058 vnode_t *vp, **vplist;
1059 long num, cnt;
1060
1061 mi = VFTOSMI(vfsp);
1062
1063 /*
1064 * Check to see whether there is anything to do.
1065 */
1066 num = avl_numnodes(&mi->smi_hash_avl);
1067 if (num == 0)
1068 return;
1069
1070 /*
1071 * Allocate a slot for all currently active rnodes on the
1072 * supposition that they all may need flushing.
1073 */
1074 vplist = kmem_alloc(num * sizeof (*vplist), KM_SLEEP);
1075 cnt = 0;
1076
1077 /*
1078 * Walk the AVL tree looking for rnodes with page
1079 * lists associated with them. Make a list of these
1080 * files.
1081 */
1082 rw_enter(&mi->smi_hash_lk, RW_READER);
1083 for (np = avl_first(&mi->smi_hash_avl); np != NULL;
1084 np = avl_walk(&mi->smi_hash_avl, np, AVL_AFTER)) {
1085 vp = SMBTOV(np);
1086 /*
1087 * Don't bother sync'ing a vp if it
1088 * is part of virtual swap device or
1089 * if VFS is read-only
1090 */
1091 if (IS_SWAPVP(vp) || vn_is_readonly(vp))
1092 continue;
1093 /*
1094 * If the vnode has pages and is marked as either
1095 * dirty or mmap'd, hold and add this vnode to the
1096 * list of vnodes to flush.
1097 */
1098 if (vn_has_cached_data(vp) &&
1099 ((np->r_flags & RDIRTY) || np->r_mapcnt > 0)) {
1100 VN_HOLD(vp);
1101 vplist[cnt++] = vp;
1102 if (cnt == num)
1103 break;
1104 }
1105 }
1106 rw_exit(&mi->smi_hash_lk);
1107
1108 /*
1109 * Flush and release all of the files on the list.
1110 */
1111 while (cnt-- > 0) {
1112 vp = vplist[cnt];
1113 (void) VOP_PUTPAGE(vp, (u_offset_t)0, 0, B_ASYNC, cr, NULL);
1114 VN_RELE(vp);
1115 }
1116
1117 kmem_free(vplist, num * sizeof (vnode_t *));
1118 }
1119
1120 /* Here NFS has access cache stuff (nfs_subr.c) not used here */
1121
1122 /*
1123 * Set or Clear direct I/O flag
1124 * VOP_RWLOCK() is held for write access to prevent a race condition
1125 * which would occur if a process is in the middle of a write when
1126 * directio flag gets set. It is possible that all pages may not get flushed.
1127 * From nfs_common.c
1128 */
1129
1130 /* ARGSUSED */
1131 int
smbfs_directio(vnode_t * vp,int cmd,cred_t * cr)1132 smbfs_directio(vnode_t *vp, int cmd, cred_t *cr)
1133 {
1134 int error = 0;
1135 smbnode_t *np;
1136
1137 np = VTOSMB(vp);
1138
1139 if (cmd == DIRECTIO_ON) {
1140
1141 if (np->r_flags & RDIRECTIO)
1142 return (0);
1143
1144 /*
1145 * Flush the page cache.
1146 */
1147
1148 (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL);
1149
1150 if (np->r_flags & RDIRECTIO) {
1151 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
1152 return (0);
1153 }
1154
1155 /* Here NFS also checks ->r_awcount */
1156 if (vn_has_cached_data(vp) &&
1157 (np->r_flags & RDIRTY) != 0) {
1158 error = VOP_PUTPAGE(vp, (offset_t)0, (uint_t)0,
1159 B_INVAL, cr, NULL);
1160 if (error) {
1161 if (error == ENOSPC || error == EDQUOT) {
1162 mutex_enter(&np->r_statelock);
1163 if (!np->r_error)
1164 np->r_error = error;
1165 mutex_exit(&np->r_statelock);
1166 }
1167 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
1168 return (error);
1169 }
1170 }
1171
1172 mutex_enter(&np->r_statelock);
1173 np->r_flags |= RDIRECTIO;
1174 mutex_exit(&np->r_statelock);
1175 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
1176 return (0);
1177 }
1178
1179 if (cmd == DIRECTIO_OFF) {
1180 mutex_enter(&np->r_statelock);
1181 np->r_flags &= ~RDIRECTIO; /* disable direct mode */
1182 mutex_exit(&np->r_statelock);
1183 return (0);
1184 }
1185
1186 return (EINVAL);
1187 }
1188
1189 static kmutex_t smbfs_newnum_lock;
1190 static uint32_t smbfs_newnum_val = 0;
1191
1192 /*
1193 * Return a number 0..0xffffffff that's different from the last
1194 * 0xffffffff numbers this returned. Used for unlinked files.
1195 * From NFS nfs_subr.c newnum
1196 */
1197 uint32_t
smbfs_newnum(void)1198 smbfs_newnum(void)
1199 {
1200 uint32_t id;
1201
1202 mutex_enter(&smbfs_newnum_lock);
1203 if (smbfs_newnum_val == 0)
1204 smbfs_newnum_val = (uint32_t)gethrestime_sec();
1205 id = smbfs_newnum_val++;
1206 mutex_exit(&smbfs_newnum_lock);
1207 return (id);
1208 }
1209
1210 /*
1211 * Fill in a temporary name at buf
1212 */
1213 int
smbfs_newname(char * buf,size_t buflen)1214 smbfs_newname(char *buf, size_t buflen)
1215 {
1216 uint_t id;
1217 int n;
1218
1219 id = smbfs_newnum();
1220 n = snprintf(buf, buflen, "~$smbfs%08X", id);
1221 return (n);
1222 }
1223
1224
1225 /*
1226 * initialize resources that are used by smbfs_subr.c
1227 * this is called from the _init() routine (by the way of smbfs_clntinit())
1228 *
1229 * From NFS: nfs_subr.c:nfs_subrinit
1230 */
1231 int
smbfs_subrinit(void)1232 smbfs_subrinit(void)
1233 {
1234 ulong_t nsmbnode_max;
1235
1236 /*
1237 * Allocate and initialize the smbnode cache
1238 */
1239 if (nsmbnode <= 0)
1240 nsmbnode = ncsize; /* dnlc.h */
1241 nsmbnode_max = (ulong_t)((kmem_maxavail() >> 2) /
1242 sizeof (struct smbnode));
1243 if (nsmbnode > nsmbnode_max || (nsmbnode == 0 && ncsize == 0)) {
1244 cmn_err(CE_NOTE,
1245 "setting nsmbnode to max value of %ld", nsmbnode_max);
1246 nsmbnode = nsmbnode_max;
1247 }
1248
1249 smbnode_cache = kmem_cache_create("smbnode_cache", sizeof (smbnode_t),
1250 0, NULL, NULL, smbfs_kmem_reclaim, NULL, NULL, 0);
1251
1252 /*
1253 * Initialize the various mutexes and reader/writer locks
1254 */
1255 mutex_init(&smbfreelist_lock, NULL, MUTEX_DEFAULT, NULL);
1256 mutex_init(&smbfs_minor_lock, NULL, MUTEX_DEFAULT, NULL);
1257
1258 /*
1259 * Assign unique major number for all smbfs mounts
1260 */
1261 if ((smbfs_major = getudev()) == -1) {
1262 cmn_err(CE_WARN,
1263 "smbfs: init: can't get unique device number");
1264 smbfs_major = 0;
1265 }
1266 smbfs_minor = 0;
1267
1268 return (0);
1269 }
1270
1271 /*
1272 * free smbfs hash table, etc.
1273 * From NFS: nfs_subr.c:nfs_subrfini
1274 */
1275 void
smbfs_subrfini(void)1276 smbfs_subrfini(void)
1277 {
1278
1279 /*
1280 * Destroy the smbnode cache
1281 */
1282 kmem_cache_destroy(smbnode_cache);
1283
1284 /*
1285 * Destroy the various mutexes and reader/writer locks
1286 */
1287 mutex_destroy(&smbfreelist_lock);
1288 mutex_destroy(&smbfs_minor_lock);
1289 }
1290
1291 /* rddir_cache ? */
1292
1293 /*
1294 * Support functions for smbfs_kmem_reclaim
1295 */
1296
1297 static void
smbfs_node_reclaim(void)1298 smbfs_node_reclaim(void)
1299 {
1300 smbmntinfo_t *mi;
1301 smbnode_t *np;
1302 vnode_t *vp;
1303
1304 mutex_enter(&smbfreelist_lock);
1305 while ((np = smbfreelist) != NULL) {
1306 sn_rmfree(np);
1307 mutex_exit(&smbfreelist_lock);
1308 if (np->r_flags & RHASHED) {
1309 vp = SMBTOV(np);
1310 mi = np->n_mount;
1311 rw_enter(&mi->smi_hash_lk, RW_WRITER);
1312 mutex_enter(&vp->v_lock);
1313 if (vp->v_count > 1) {
1314 VN_RELE_LOCKED(vp);
1315 mutex_exit(&vp->v_lock);
1316 rw_exit(&mi->smi_hash_lk);
1317 mutex_enter(&smbfreelist_lock);
1318 continue;
1319 }
1320 mutex_exit(&vp->v_lock);
1321 sn_rmhash_locked(np);
1322 rw_exit(&mi->smi_hash_lk);
1323 }
1324 /*
1325 * This call to smbfs_addfree will end up destroying the
1326 * smbnode, but in a safe way with the appropriate set
1327 * of checks done.
1328 */
1329 smbfs_addfree(np);
1330 mutex_enter(&smbfreelist_lock);
1331 }
1332 mutex_exit(&smbfreelist_lock);
1333 }
1334
1335 /*
1336 * Called by kmem_cache_alloc ask us if we could
1337 * "Please give back some memory!"
1338 *
1339 * Todo: dump nodes from the free list?
1340 */
1341 /*ARGSUSED*/
1342 void
smbfs_kmem_reclaim(void * cdrarg)1343 smbfs_kmem_reclaim(void *cdrarg)
1344 {
1345 smbfs_node_reclaim();
1346 }
1347
1348 /*
1349 * Here NFS has failover stuff and
1350 * nfs_rw_xxx - see smbfs_rwlock.c
1351 */
1352