1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 *
25 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
26 * All rights reserved.
27 */
28 /*
29 * Copyright (c) 2017 by Delphix. All rights reserved.
30 * Copyright 2018 Nexenta Systems, Inc. All rights reserved.
31 */
32
33 /*
34 * Node hash implementation initially borrowed from NFS (nfs_subr.c)
35 * but then heavily modified. It's no longer an array of hash lists,
36 * but an AVL tree per mount point. More on this below.
37 */
38
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/time.h>
42 #include <sys/vnode.h>
43 #include <sys/atomic.h>
44 #include <sys/bitmap.h>
45 #include <sys/buf.h>
46 #include <sys/dnlc.h>
47 #include <sys/kmem.h>
48 #include <sys/sunddi.h>
49 #include <sys/sysmacros.h>
50 #include <sys/fcntl.h>
51
52 #include <netsmb/smb_osdep.h>
53
54 #include <netsmb/smb.h>
55 #include <netsmb/smb_conn.h>
56 #include <netsmb/smb_subr.h>
57 #include <netsmb/smb_rq.h>
58
59 #include <smbfs/smbfs.h>
60 #include <smbfs/smbfs_node.h>
61 #include <smbfs/smbfs_subr.h>
62
63 /*
64 * The AVL trees (now per-mount) allow finding an smbfs node by its
65 * full remote path name. It also allows easy traversal of all nodes
66 * below (path wise) any given node. A reader/writer lock for each
67 * (per mount) AVL tree is used to control access and to synchronize
68 * lookups, additions, and deletions from that AVL tree.
69 *
70 * Previously, this code use a global array of hash chains, each with
71 * its own rwlock. A few struct members, functions, and comments may
72 * still refer to a "hash", and those should all now be considered to
73 * refer to the per-mount AVL tree that replaced the old hash chains.
74 * (i.e. member smi_hash_lk, function sn_hashfind, etc.)
75 *
76 * The smbnode freelist is organized as a doubly linked list with
77 * a head pointer. Additions and deletions are synchronized via
78 * a single mutex.
79 *
80 * In order to add an smbnode to the free list, it must be linked into
81 * the mount's AVL tree and the exclusive lock for the AVL must be held.
82 * If an smbnode is not linked into the AVL tree, then it is destroyed
83 * because it represents no valuable information that can be reused
84 * about the file. The exclusive lock for the AVL tree must be held
85 * in order to prevent a lookup in the AVL tree from finding the
86 * smbnode and using it and assuming that the smbnode is not on the
87 * freelist. The lookup in the AVL tree will have the AVL tree lock
88 * held, either exclusive or shared.
89 *
90 * The vnode reference count for each smbnode is not allowed to drop
91 * below 1. This prevents external entities, such as the VM
92 * subsystem, from acquiring references to vnodes already on the
93 * freelist and then trying to place them back on the freelist
94 * when their reference is released. This means that the when an
95 * smbnode is looked up in the AVL tree, then either the smbnode
96 * is removed from the freelist and that reference is tranfered to
97 * the new reference or the vnode reference count must be incremented
98 * accordingly. The mutex for the freelist must be held in order to
99 * accurately test to see if the smbnode is on the freelist or not.
100 * The AVL tree lock might be held shared and it is possible that
101 * two different threads may race to remove the smbnode from the
102 * freelist. This race can be resolved by holding the mutex for the
103 * freelist. Please note that the mutex for the freelist does not
104 * need to held if the smbnode is not on the freelist. It can not be
105 * placed on the freelist due to the requirement that the thread
106 * putting the smbnode on the freelist must hold the exclusive lock
107 * for the AVL tree and the thread doing the lookup in the AVL tree
108 * is holding either a shared or exclusive lock for the AVL tree.
109 *
110 * The lock ordering is:
111 *
112 * AVL tree lock -> vnode lock
113 * AVL tree lock -> freelist lock
114 */
115
116 static kmutex_t smbfreelist_lock;
117 static smbnode_t *smbfreelist = NULL;
118 static ulong_t smbnodenew = 0;
119 long nsmbnode = 0;
120
121 static struct kmem_cache *smbnode_cache;
122
123 static const vsecattr_t smbfs_vsa0 = { 0 };
124
125 /*
126 * Mutex to protect the following variables:
127 * smbfs_major
128 * smbfs_minor
129 */
130 kmutex_t smbfs_minor_lock;
131 int smbfs_major;
132 int smbfs_minor;
133
134 /* See smbfs_node_findcreate() */
135 struct smbfattr smbfs_fattr0;
136
137 /*
138 * Local functions.
139 * SN for Smb Node
140 */
141 static void sn_rmfree(smbnode_t *);
142 static void sn_inactive(smbnode_t *);
143 static void sn_addhash_locked(smbnode_t *, avl_index_t);
144 static void sn_rmhash_locked(smbnode_t *);
145 static void sn_destroy_node(smbnode_t *);
146 void smbfs_kmem_reclaim(void *cdrarg);
147
148 static smbnode_t *
149 sn_hashfind(smbmntinfo_t *, const char *, int, avl_index_t *);
150
151 static smbnode_t *
152 make_smbnode(smbmntinfo_t *, const char *, int, int *);
153
154 /*
155 * Free the resources associated with an smbnode.
156 * Note: This is different from smbfs_inactive
157 *
158 * From NFS: nfs_subr.c:rinactive
159 */
160 static void
sn_inactive(smbnode_t * np)161 sn_inactive(smbnode_t *np)
162 {
163 vsecattr_t ovsa;
164 cred_t *oldcr;
165 char *orpath;
166 int orplen;
167 vnode_t *vp;
168
169 /*
170 * Here NFS has:
171 * Flush and invalidate all pages (done by caller)
172 * Free any held credentials and caches...
173 * etc. (See NFS code)
174 */
175 mutex_enter(&np->r_statelock);
176
177 ovsa = np->r_secattr;
178 np->r_secattr = smbfs_vsa0;
179 np->r_sectime = 0;
180
181 oldcr = np->r_cred;
182 np->r_cred = NULL;
183
184 orpath = np->n_rpath;
185 orplen = np->n_rplen;
186 np->n_rpath = NULL;
187 np->n_rplen = 0;
188
189 mutex_exit(&np->r_statelock);
190
191 vp = SMBTOV(np);
192 if (vn_has_cached_data(vp)) {
193 ASSERT3P(vp,==,NULL);
194 }
195
196 if (ovsa.vsa_aclentp != NULL)
197 kmem_free(ovsa.vsa_aclentp, ovsa.vsa_aclentsz);
198
199 if (oldcr != NULL)
200 crfree(oldcr);
201
202 if (orpath != NULL)
203 kmem_free(orpath, orplen + 1);
204 }
205
206 /*
207 * Find and optionally create an smbnode for the passed
208 * mountinfo, directory, separator, and name. If the
209 * desired smbnode already exists, return a reference.
210 * If the file attributes pointer is non-null, the node
211 * is created if necessary and linked into the AVL tree.
212 *
213 * Callers that need a node created but don't have the
214 * real attributes pass smbfs_fattr0 to force creation.
215 *
216 * Note: make_smbnode() may upgrade the "hash" lock to exclusive.
217 *
218 * Based on NFS: nfs_subr.c:makenfsnode
219 */
220 smbnode_t *
smbfs_node_findcreate(smbmntinfo_t * mi,const char * dirnm,int dirlen,const char * name,int nmlen,char sep,struct smbfattr * fap)221 smbfs_node_findcreate(
222 smbmntinfo_t *mi,
223 const char *dirnm,
224 int dirlen,
225 const char *name,
226 int nmlen,
227 char sep,
228 struct smbfattr *fap)
229 {
230 char tmpbuf[256];
231 size_t rpalloc;
232 char *p, *rpath;
233 int rplen;
234 smbnode_t *np;
235 vnode_t *vp;
236 int newnode;
237
238 /*
239 * Build the search string, either in tmpbuf or
240 * in allocated memory if larger than tmpbuf.
241 */
242 rplen = dirlen;
243 if (sep != '\0')
244 rplen++;
245 rplen += nmlen;
246 if (rplen < sizeof (tmpbuf)) {
247 /* use tmpbuf */
248 rpalloc = 0;
249 rpath = tmpbuf;
250 } else {
251 rpalloc = rplen + 1;
252 rpath = kmem_alloc(rpalloc, KM_SLEEP);
253 }
254 p = rpath;
255 bcopy(dirnm, p, dirlen);
256 p += dirlen;
257 if (sep != '\0')
258 *p++ = sep;
259 if (name != NULL) {
260 bcopy(name, p, nmlen);
261 p += nmlen;
262 }
263 ASSERT(p == rpath + rplen);
264
265 /*
266 * Find or create a node with this path.
267 */
268 rw_enter(&mi->smi_hash_lk, RW_READER);
269 if (fap == NULL)
270 np = sn_hashfind(mi, rpath, rplen, NULL);
271 else
272 np = make_smbnode(mi, rpath, rplen, &newnode);
273 rw_exit(&mi->smi_hash_lk);
274
275 if (rpalloc)
276 kmem_free(rpath, rpalloc);
277
278 if (fap == NULL) {
279 /*
280 * Caller is "just looking" (no create)
281 * so np may or may not be NULL here.
282 * Either way, we're done.
283 */
284 return (np);
285 }
286
287 /*
288 * We should have a node, possibly created.
289 * Do we have (real) attributes to apply?
290 */
291 ASSERT(np != NULL);
292 if (fap == &smbfs_fattr0)
293 return (np);
294
295 /*
296 * Apply the given attributes to this node,
297 * dealing with any cache impact, etc.
298 */
299 vp = SMBTOV(np);
300 smbfs_attrcache_fa(vp, fap);
301
302 /*
303 * Note NFS sets vp->v_type here, assuming it
304 * can never change for the life of a node.
305 * We allow v_type to change, and set it in
306 * smbfs_attrcache(). Also: mode, uid, gid
307 */
308 return (np);
309 }
310
311 /*
312 * Here NFS has: nfs_subr.c:rtablehash
313 * We use smbfs_hash().
314 */
315
316 /*
317 * Find or create an smbnode.
318 * From NFS: nfs_subr.c:make_rnode
319 */
320 static smbnode_t *
make_smbnode(smbmntinfo_t * mi,const char * rpath,int rplen,int * newnode)321 make_smbnode(
322 smbmntinfo_t *mi,
323 const char *rpath,
324 int rplen,
325 int *newnode)
326 {
327 smbnode_t *np;
328 smbnode_t *tnp;
329 vnode_t *vp;
330 vfs_t *vfsp;
331 avl_index_t where;
332 char *new_rpath = NULL;
333
334 ASSERT(RW_READ_HELD(&mi->smi_hash_lk));
335 vfsp = mi->smi_vfsp;
336
337 start:
338 np = sn_hashfind(mi, rpath, rplen, NULL);
339 if (np != NULL) {
340 *newnode = 0;
341 return (np);
342 }
343
344 /* Note: will retake this lock below. */
345 rw_exit(&mi->smi_hash_lk);
346
347 /*
348 * see if we can find something on the freelist
349 */
350 mutex_enter(&smbfreelist_lock);
351 if (smbfreelist != NULL && smbnodenew >= nsmbnode) {
352 np = smbfreelist;
353 sn_rmfree(np);
354 mutex_exit(&smbfreelist_lock);
355
356 vp = SMBTOV(np);
357
358 if (np->r_flags & RHASHED) {
359 smbmntinfo_t *tmp_mi = np->n_mount;
360 ASSERT(tmp_mi != NULL);
361 rw_enter(&tmp_mi->smi_hash_lk, RW_WRITER);
362 mutex_enter(&vp->v_lock);
363 if (vp->v_count > 1) {
364 VN_RELE_LOCKED(vp);
365 mutex_exit(&vp->v_lock);
366 rw_exit(&tmp_mi->smi_hash_lk);
367 /* start over */
368 rw_enter(&mi->smi_hash_lk, RW_READER);
369 goto start;
370 }
371 mutex_exit(&vp->v_lock);
372 sn_rmhash_locked(np);
373 rw_exit(&tmp_mi->smi_hash_lk);
374 }
375
376 sn_inactive(np);
377
378 mutex_enter(&vp->v_lock);
379 if (vp->v_count > 1) {
380 VN_RELE_LOCKED(vp);
381 mutex_exit(&vp->v_lock);
382 rw_enter(&mi->smi_hash_lk, RW_READER);
383 goto start;
384 }
385 mutex_exit(&vp->v_lock);
386 vn_invalid(vp);
387 /*
388 * destroy old locks before bzero'ing and
389 * recreating the locks below.
390 */
391 smbfs_rw_destroy(&np->r_rwlock);
392 smbfs_rw_destroy(&np->r_lkserlock);
393 mutex_destroy(&np->r_statelock);
394 cv_destroy(&np->r_cv);
395 /*
396 * Make sure that if smbnode is recycled then
397 * VFS count is decremented properly before
398 * reuse.
399 */
400 VFS_RELE(vp->v_vfsp);
401 vn_reinit(vp);
402 } else {
403 /*
404 * allocate and initialize a new smbnode
405 */
406 vnode_t *new_vp;
407
408 mutex_exit(&smbfreelist_lock);
409
410 np = kmem_cache_alloc(smbnode_cache, KM_SLEEP);
411 new_vp = vn_alloc(KM_SLEEP);
412
413 atomic_inc_ulong((ulong_t *)&smbnodenew);
414 vp = new_vp;
415 }
416
417 /*
418 * Allocate and copy the rpath we'll need below.
419 */
420 new_rpath = kmem_alloc(rplen + 1, KM_SLEEP);
421 bcopy(rpath, new_rpath, rplen);
422 new_rpath[rplen] = '\0';
423
424 /* Initialize smbnode_t */
425 bzero(np, sizeof (*np));
426
427 smbfs_rw_init(&np->r_rwlock, NULL, RW_DEFAULT, NULL);
428 smbfs_rw_init(&np->r_lkserlock, NULL, RW_DEFAULT, NULL);
429 mutex_init(&np->r_statelock, NULL, MUTEX_DEFAULT, NULL);
430 cv_init(&np->r_cv, NULL, CV_DEFAULT, NULL);
431 /* cv_init(&np->r_commit.c_cv, NULL, CV_DEFAULT, NULL); */
432
433 np->r_vnode = vp;
434 np->n_mount = mi;
435
436 np->n_fid = NULL;
437 np->n_uid = mi->smi_uid;
438 np->n_gid = mi->smi_gid;
439 /* Leave attributes "stale." */
440
441 /*
442 * Here NFS has avl_create(&np->r_dir, ...)
443 * for the readdir cache (not used here).
444 */
445
446 /* Now fill in the vnode. */
447 vn_setops(vp, smbfs_vnodeops);
448 vp->v_data = (caddr_t)np;
449 VFS_HOLD(vfsp);
450 vp->v_vfsp = vfsp;
451 vp->v_type = VNON;
452
453 /*
454 * We entered with mi->smi_hash_lk held (reader).
455 * Retake it now, (as the writer).
456 * Will return with it held.
457 */
458 rw_enter(&mi->smi_hash_lk, RW_WRITER);
459
460 /*
461 * There is a race condition where someone else
462 * may alloc the smbnode while no locks are held,
463 * so check again and recover if found.
464 */
465 tnp = sn_hashfind(mi, rpath, rplen, &where);
466 if (tnp != NULL) {
467 /*
468 * Lost the race. Put the node we were building
469 * on the free list and return the one we found.
470 */
471 rw_exit(&mi->smi_hash_lk);
472 kmem_free(new_rpath, rplen + 1);
473 smbfs_addfree(np);
474 rw_enter(&mi->smi_hash_lk, RW_READER);
475 *newnode = 0;
476 return (tnp);
477 }
478
479 /*
480 * Hash search identifies nodes by the remote path
481 * (n_rpath) so fill that in now, before linking
482 * this node into the node cache (AVL tree).
483 */
484 np->n_rpath = new_rpath;
485 np->n_rplen = rplen;
486 np->n_ino = smbfs_gethash(new_rpath, rplen);
487
488 sn_addhash_locked(np, where);
489 *newnode = 1;
490 return (np);
491 }
492
493 /*
494 * smbfs_addfree
495 * Put an smbnode on the free list, or destroy it immediately
496 * if it offers no value were it to be reclaimed later. Also
497 * destroy immediately when we have too many smbnodes, etc.
498 *
499 * Normally called by smbfs_inactive, but also
500 * called in here during cleanup operations.
501 *
502 * From NFS: nfs_subr.c:rp_addfree
503 */
504 void
smbfs_addfree(smbnode_t * np)505 smbfs_addfree(smbnode_t *np)
506 {
507 vnode_t *vp;
508 struct vfs *vfsp;
509 smbmntinfo_t *mi;
510
511 ASSERT(np->r_freef == NULL && np->r_freeb == NULL);
512
513 vp = SMBTOV(np);
514 ASSERT(vp->v_count >= 1);
515
516 vfsp = vp->v_vfsp;
517 mi = VFTOSMI(vfsp);
518
519 /*
520 * If there are no more references to this smbnode and:
521 * we have too many smbnodes allocated, or if the node
522 * is no longer accessible via the AVL tree (!RHASHED),
523 * or an i/o error occurred while writing to the file,
524 * or it's part of an unmounted FS, then try to destroy
525 * it instead of putting it on the smbnode freelist.
526 */
527 if (np->r_count == 0 && (
528 (np->r_flags & RHASHED) == 0 ||
529 (np->r_error != 0) ||
530 (vfsp->vfs_flag & VFS_UNMOUNTED) ||
531 (smbnodenew > nsmbnode))) {
532
533 /* Try to destroy this node. */
534
535 if (np->r_flags & RHASHED) {
536 rw_enter(&mi->smi_hash_lk, RW_WRITER);
537 mutex_enter(&vp->v_lock);
538 if (vp->v_count > 1) {
539 VN_RELE_LOCKED(vp);
540 mutex_exit(&vp->v_lock);
541 rw_exit(&mi->smi_hash_lk);
542 return;
543 /*
544 * Will get another call later,
545 * via smbfs_inactive.
546 */
547 }
548 mutex_exit(&vp->v_lock);
549 sn_rmhash_locked(np);
550 rw_exit(&mi->smi_hash_lk);
551 }
552
553 sn_inactive(np);
554
555 /*
556 * Recheck the vnode reference count. We need to
557 * make sure that another reference has not been
558 * acquired while we were not holding v_lock. The
559 * smbnode is not in the smbnode "hash" AVL tree, so
560 * the only way for a reference to have been acquired
561 * is for a VOP_PUTPAGE because the smbnode was marked
562 * with RDIRTY or for a modified page. This vnode
563 * reference may have been acquired before our call
564 * to sn_inactive. The i/o may have been completed,
565 * thus allowing sn_inactive to complete, but the
566 * reference to the vnode may not have been released
567 * yet. In any case, the smbnode can not be destroyed
568 * until the other references to this vnode have been
569 * released. The other references will take care of
570 * either destroying the smbnode or placing it on the
571 * smbnode freelist. If there are no other references,
572 * then the smbnode may be safely destroyed.
573 */
574 mutex_enter(&vp->v_lock);
575 if (vp->v_count > 1) {
576 VN_RELE_LOCKED(vp);
577 mutex_exit(&vp->v_lock);
578 return;
579 }
580 mutex_exit(&vp->v_lock);
581
582 sn_destroy_node(np);
583 return;
584 }
585
586 /*
587 * Lock the AVL tree and then recheck the reference count
588 * to ensure that no other threads have acquired a reference
589 * to indicate that the smbnode should not be placed on the
590 * freelist. If another reference has been acquired, then
591 * just release this one and let the other thread complete
592 * the processing of adding this smbnode to the freelist.
593 */
594 rw_enter(&mi->smi_hash_lk, RW_WRITER);
595
596 mutex_enter(&vp->v_lock);
597 if (vp->v_count > 1) {
598 VN_RELE_LOCKED(vp);
599 mutex_exit(&vp->v_lock);
600 rw_exit(&mi->smi_hash_lk);
601 return;
602 }
603 mutex_exit(&vp->v_lock);
604
605 /*
606 * Put this node on the free list.
607 */
608 mutex_enter(&smbfreelist_lock);
609 if (smbfreelist == NULL) {
610 np->r_freef = np;
611 np->r_freeb = np;
612 smbfreelist = np;
613 } else {
614 np->r_freef = smbfreelist;
615 np->r_freeb = smbfreelist->r_freeb;
616 smbfreelist->r_freeb->r_freef = np;
617 smbfreelist->r_freeb = np;
618 }
619 mutex_exit(&smbfreelist_lock);
620
621 rw_exit(&mi->smi_hash_lk);
622 }
623
624 /*
625 * Remove an smbnode from the free list.
626 *
627 * The caller must be holding smbfreelist_lock and the smbnode
628 * must be on the freelist.
629 *
630 * From NFS: nfs_subr.c:rp_rmfree
631 */
632 static void
sn_rmfree(smbnode_t * np)633 sn_rmfree(smbnode_t *np)
634 {
635
636 ASSERT(MUTEX_HELD(&smbfreelist_lock));
637 ASSERT(np->r_freef != NULL && np->r_freeb != NULL);
638
639 if (np == smbfreelist) {
640 smbfreelist = np->r_freef;
641 if (np == smbfreelist)
642 smbfreelist = NULL;
643 }
644
645 np->r_freeb->r_freef = np->r_freef;
646 np->r_freef->r_freeb = np->r_freeb;
647
648 np->r_freef = np->r_freeb = NULL;
649 }
650
651 /*
652 * Put an smbnode in the "hash" AVL tree.
653 *
654 * The caller must be hold the rwlock as writer.
655 *
656 * From NFS: nfs_subr.c:rp_addhash
657 */
658 static void
sn_addhash_locked(smbnode_t * np,avl_index_t where)659 sn_addhash_locked(smbnode_t *np, avl_index_t where)
660 {
661 smbmntinfo_t *mi = np->n_mount;
662
663 ASSERT(RW_WRITE_HELD(&mi->smi_hash_lk));
664
665 mutex_enter(&np->r_statelock);
666 if ((np->r_flags & RHASHED) == 0) {
667 avl_insert(&mi->smi_hash_avl, np, where);
668 np->r_flags |= RHASHED;
669 }
670 mutex_exit(&np->r_statelock);
671 }
672
673 /*
674 * Remove an smbnode from the "hash" AVL tree.
675 *
676 * The caller must hold the rwlock as writer.
677 *
678 * From NFS: nfs_subr.c:rp_rmhash_locked
679 */
680 static void
sn_rmhash_locked(smbnode_t * np)681 sn_rmhash_locked(smbnode_t *np)
682 {
683 smbmntinfo_t *mi = np->n_mount;
684
685 ASSERT(RW_WRITE_HELD(&mi->smi_hash_lk));
686
687 mutex_enter(&np->r_statelock);
688 if ((np->r_flags & RHASHED) != 0) {
689 np->r_flags &= ~RHASHED;
690 avl_remove(&mi->smi_hash_avl, np);
691 }
692 mutex_exit(&np->r_statelock);
693 }
694
695 /*
696 * Remove an smbnode from the "hash" AVL tree.
697 *
698 * The caller must not be holding the rwlock.
699 */
700 void
smbfs_rmhash(smbnode_t * np)701 smbfs_rmhash(smbnode_t *np)
702 {
703 smbmntinfo_t *mi = np->n_mount;
704
705 rw_enter(&mi->smi_hash_lk, RW_WRITER);
706 sn_rmhash_locked(np);
707 rw_exit(&mi->smi_hash_lk);
708 }
709
710 /*
711 * Lookup an smbnode by remote pathname
712 *
713 * The caller must be holding the AVL rwlock, either shared or exclusive.
714 *
715 * From NFS: nfs_subr.c:rfind
716 */
717 static smbnode_t *
sn_hashfind(smbmntinfo_t * mi,const char * rpath,int rplen,avl_index_t * pwhere)718 sn_hashfind(
719 smbmntinfo_t *mi,
720 const char *rpath,
721 int rplen,
722 avl_index_t *pwhere) /* optional */
723 {
724 smbfs_node_hdr_t nhdr;
725 smbnode_t *np;
726 vnode_t *vp;
727
728 ASSERT(RW_LOCK_HELD(&mi->smi_hash_lk));
729
730 bzero(&nhdr, sizeof (nhdr));
731 nhdr.hdr_n_rpath = (char *)rpath;
732 nhdr.hdr_n_rplen = rplen;
733
734 /* See smbfs_node_cmp below. */
735 np = avl_find(&mi->smi_hash_avl, &nhdr, pwhere);
736
737 if (np == NULL)
738 return (NULL);
739
740 /*
741 * Found it in the "hash" AVL tree.
742 * Remove from free list, if necessary.
743 */
744 vp = SMBTOV(np);
745 if (np->r_freef != NULL) {
746 mutex_enter(&smbfreelist_lock);
747 /*
748 * If the smbnode is on the freelist,
749 * then remove it and use that reference
750 * as the new reference. Otherwise,
751 * need to increment the reference count.
752 */
753 if (np->r_freef != NULL) {
754 sn_rmfree(np);
755 mutex_exit(&smbfreelist_lock);
756 } else {
757 mutex_exit(&smbfreelist_lock);
758 VN_HOLD(vp);
759 }
760 } else
761 VN_HOLD(vp);
762
763 return (np);
764 }
765
766 static int
smbfs_node_cmp(const void * va,const void * vb)767 smbfs_node_cmp(const void *va, const void *vb)
768 {
769 const smbfs_node_hdr_t *a = va;
770 const smbfs_node_hdr_t *b = vb;
771 int clen, diff;
772
773 /*
774 * Same semantics as strcmp, but does not
775 * assume the strings are null terminated.
776 */
777 clen = (a->hdr_n_rplen < b->hdr_n_rplen) ?
778 a->hdr_n_rplen : b->hdr_n_rplen;
779 diff = strncmp(a->hdr_n_rpath, b->hdr_n_rpath, clen);
780 if (diff < 0)
781 return (-1);
782 if (diff > 0)
783 return (1);
784 /* they match through clen */
785 if (b->hdr_n_rplen > clen)
786 return (-1);
787 if (a->hdr_n_rplen > clen)
788 return (1);
789 return (0);
790 }
791
792 /*
793 * Setup the "hash" AVL tree used for our node cache.
794 * See: smbfs_mount, smbfs_destroy_table.
795 */
796 void
smbfs_init_hash_avl(avl_tree_t * avl)797 smbfs_init_hash_avl(avl_tree_t *avl)
798 {
799 avl_create(avl, smbfs_node_cmp, sizeof (smbnode_t),
800 offsetof(smbnode_t, r_avl_node));
801 }
802
803 /*
804 * Invalidate the cached attributes for all nodes "under" the
805 * passed-in node. Note: the passed-in node is NOT affected by
806 * this call. This is used both for files under some directory
807 * after the directory is deleted or renamed, and for extended
808 * attribute files (named streams) under a plain file after that
809 * file is renamed or deleted.
810 *
811 * Do this by walking the AVL tree starting at the passed in node,
812 * and continuing while the visited nodes have a path prefix matching
813 * the entire path of the passed-in node, and a separator just after
814 * that matching path prefix. Watch out for cases where the AVL tree
815 * order may not exactly match the order of an FS walk, i.e.
816 * consider this sequence:
817 * "foo" (directory)
818 * "foo bar" (name containing a space)
819 * "foo/bar"
820 * The walk needs to skip "foo bar" and keep going until it finds
821 * something that doesn't match the "foo" name prefix.
822 */
823 void
smbfs_attrcache_prune(smbnode_t * top_np)824 smbfs_attrcache_prune(smbnode_t *top_np)
825 {
826 smbmntinfo_t *mi;
827 smbnode_t *np;
828 char *rpath;
829 int rplen;
830
831 mi = top_np->n_mount;
832 rw_enter(&mi->smi_hash_lk, RW_READER);
833
834 np = top_np;
835 rpath = top_np->n_rpath;
836 rplen = top_np->n_rplen;
837 for (;;) {
838 np = avl_walk(&mi->smi_hash_avl, np, AVL_AFTER);
839 if (np == NULL)
840 break;
841 if (np->n_rplen < rplen)
842 break;
843 if (0 != strncmp(np->n_rpath, rpath, rplen))
844 break;
845 if (np->n_rplen > rplen && (
846 np->n_rpath[rplen] == ':' ||
847 np->n_rpath[rplen] == '\\'))
848 smbfs_attrcache_remove(np);
849 }
850
851 rw_exit(&mi->smi_hash_lk);
852 }
853
854 #ifdef SMB_VNODE_DEBUG
855 int smbfs_check_table_debug = 1;
856 #else /* SMB_VNODE_DEBUG */
857 int smbfs_check_table_debug = 0;
858 #endif /* SMB_VNODE_DEBUG */
859
860
861 /*
862 * Return 1 if there is a active vnode belonging to this vfs in the
863 * smbnode cache.
864 *
865 * Several of these checks are done without holding the usual
866 * locks. This is safe because destroy_smbtable(), smbfs_addfree(),
867 * etc. will redo the necessary checks before actually destroying
868 * any smbnodes.
869 *
870 * From NFS: nfs_subr.c:check_rtable
871 *
872 * Debugging changes here relative to NFS.
873 * Relatively harmless, so left 'em in.
874 */
875 int
smbfs_check_table(struct vfs * vfsp,smbnode_t * rtnp)876 smbfs_check_table(struct vfs *vfsp, smbnode_t *rtnp)
877 {
878 smbmntinfo_t *mi;
879 smbnode_t *np;
880 vnode_t *vp;
881 int busycnt = 0;
882
883 mi = VFTOSMI(vfsp);
884 rw_enter(&mi->smi_hash_lk, RW_READER);
885 for (np = avl_first(&mi->smi_hash_avl); np != NULL;
886 np = avl_walk(&mi->smi_hash_avl, np, AVL_AFTER)) {
887
888 if (np == rtnp)
889 continue; /* skip the root */
890 vp = SMBTOV(np);
891
892 /* Now the 'busy' checks: */
893 /* Not on the free list? */
894 if (np->r_freef == NULL) {
895 SMBVDEBUG("!r_freef: node=0x%p, rpath=%s\n",
896 (void *)np, np->n_rpath);
897 busycnt++;
898 }
899
900 /* Has dirty pages? */
901 if (vn_has_cached_data(vp) &&
902 (np->r_flags & RDIRTY)) {
903 SMBVDEBUG("is dirty: node=0x%p, rpath=%s\n",
904 (void *)np, np->n_rpath);
905 busycnt++;
906 }
907
908 /* Other refs? (not reflected in v_count) */
909 if (np->r_count > 0) {
910 SMBVDEBUG("+r_count: node=0x%p, rpath=%s\n",
911 (void *)np, np->n_rpath);
912 busycnt++;
913 }
914
915 if (busycnt && !smbfs_check_table_debug)
916 break;
917
918 }
919 rw_exit(&mi->smi_hash_lk);
920
921 return (busycnt);
922 }
923
924 /*
925 * Destroy inactive vnodes from the AVL tree which belong to this
926 * vfs. It is essential that we destroy all inactive vnodes during a
927 * forced unmount as well as during a normal unmount.
928 *
929 * Based on NFS: nfs_subr.c:destroy_rtable
930 *
931 * In here, we're normally destrying all or most of the AVL tree,
932 * so the natural choice is to use avl_destroy_nodes. However,
933 * there may be a few busy nodes that should remain in the AVL
934 * tree when we're done. The solution: use a temporary tree to
935 * hold the busy nodes until we're done destroying the old tree,
936 * then copy the temporary tree over the (now emtpy) real tree.
937 */
938 void
smbfs_destroy_table(struct vfs * vfsp)939 smbfs_destroy_table(struct vfs *vfsp)
940 {
941 avl_tree_t tmp_avl;
942 smbmntinfo_t *mi;
943 smbnode_t *np;
944 smbnode_t *rlist;
945 void *v;
946
947 mi = VFTOSMI(vfsp);
948 rlist = NULL;
949 smbfs_init_hash_avl(&tmp_avl);
950
951 rw_enter(&mi->smi_hash_lk, RW_WRITER);
952 v = NULL;
953 while ((np = avl_destroy_nodes(&mi->smi_hash_avl, &v)) != NULL) {
954
955 mutex_enter(&smbfreelist_lock);
956 if (np->r_freef == NULL) {
957 /*
958 * Busy node (not on the free list).
959 * Will keep in the final AVL tree.
960 */
961 mutex_exit(&smbfreelist_lock);
962 avl_add(&tmp_avl, np);
963 } else {
964 /*
965 * It's on the free list. Remove and
966 * arrange for it to be destroyed.
967 */
968 sn_rmfree(np);
969 mutex_exit(&smbfreelist_lock);
970
971 /*
972 * Last part of sn_rmhash_locked().
973 * NB: avl_destroy_nodes has already
974 * removed this from the "hash" AVL.
975 */
976 mutex_enter(&np->r_statelock);
977 np->r_flags &= ~RHASHED;
978 mutex_exit(&np->r_statelock);
979
980 /*
981 * Add to the list of nodes to destroy.
982 * Borrowing avl_child[0] for this list.
983 */
984 np->r_avl_node.avl_child[0] =
985 (struct avl_node *)rlist;
986 rlist = np;
987 }
988 }
989 avl_destroy(&mi->smi_hash_avl);
990
991 /*
992 * Replace the (now destroyed) "hash" AVL with the
993 * temporary AVL, which restores the busy nodes.
994 */
995 mi->smi_hash_avl = tmp_avl;
996 rw_exit(&mi->smi_hash_lk);
997
998 /*
999 * Now destroy the nodes on our temporary list (rlist).
1000 * This call to smbfs_addfree will end up destroying the
1001 * smbnode, but in a safe way with the appropriate set
1002 * of checks done.
1003 */
1004 while ((np = rlist) != NULL) {
1005 rlist = (smbnode_t *)np->r_avl_node.avl_child[0];
1006 smbfs_addfree(np);
1007 }
1008 }
1009
1010 /*
1011 * This routine destroys all the resources associated with the smbnode
1012 * and then the smbnode itself. Note: sn_inactive has been called.
1013 *
1014 * From NFS: nfs_subr.c:destroy_rnode
1015 */
1016 static void
sn_destroy_node(smbnode_t * np)1017 sn_destroy_node(smbnode_t *np)
1018 {
1019 vnode_t *vp;
1020 vfs_t *vfsp;
1021
1022 vp = SMBTOV(np);
1023 vfsp = vp->v_vfsp;
1024
1025 ASSERT(vp->v_count == 1);
1026 ASSERT(np->r_count == 0);
1027 ASSERT(np->r_mapcnt == 0);
1028 ASSERT(np->r_secattr.vsa_aclentp == NULL);
1029 ASSERT(np->r_cred == NULL);
1030 ASSERT(np->n_rpath == NULL);
1031 ASSERT(!(np->r_flags & RHASHED));
1032 ASSERT(np->r_freef == NULL && np->r_freeb == NULL);
1033 atomic_dec_ulong((ulong_t *)&smbnodenew);
1034 vn_invalid(vp);
1035 vn_free(vp);
1036 kmem_cache_free(smbnode_cache, np);
1037 VFS_RELE(vfsp);
1038 }
1039
1040 /*
1041 * From NFS rflush()
1042 * Flush all vnodes in this (or every) vfs.
1043 * Used by smbfs_sync and by smbfs_unmount.
1044 */
1045 /*ARGSUSED*/
1046 void
smbfs_rflush(struct vfs * vfsp,cred_t * cr)1047 smbfs_rflush(struct vfs *vfsp, cred_t *cr)
1048 {
1049 smbmntinfo_t *mi;
1050 smbnode_t *np;
1051 vnode_t *vp, **vplist;
1052 long num, cnt;
1053
1054 mi = VFTOSMI(vfsp);
1055
1056 /*
1057 * Check to see whether there is anything to do.
1058 */
1059 num = avl_numnodes(&mi->smi_hash_avl);
1060 if (num == 0)
1061 return;
1062
1063 /*
1064 * Allocate a slot for all currently active rnodes on the
1065 * supposition that they all may need flushing.
1066 */
1067 vplist = kmem_alloc(num * sizeof (*vplist), KM_SLEEP);
1068 cnt = 0;
1069
1070 /*
1071 * Walk the AVL tree looking for rnodes with page
1072 * lists associated with them. Make a list of these
1073 * files.
1074 */
1075 rw_enter(&mi->smi_hash_lk, RW_READER);
1076 for (np = avl_first(&mi->smi_hash_avl); np != NULL;
1077 np = avl_walk(&mi->smi_hash_avl, np, AVL_AFTER)) {
1078 vp = SMBTOV(np);
1079 /*
1080 * Don't bother sync'ing a vp if it
1081 * is part of virtual swap device or
1082 * if VFS is read-only
1083 */
1084 if (IS_SWAPVP(vp) || vn_is_readonly(vp))
1085 continue;
1086 /*
1087 * If the vnode has pages and is marked as either
1088 * dirty or mmap'd, hold and add this vnode to the
1089 * list of vnodes to flush.
1090 */
1091 if (vn_has_cached_data(vp) &&
1092 ((np->r_flags & RDIRTY) || np->r_mapcnt > 0)) {
1093 VN_HOLD(vp);
1094 vplist[cnt++] = vp;
1095 if (cnt == num)
1096 break;
1097 }
1098 }
1099 rw_exit(&mi->smi_hash_lk);
1100
1101 /*
1102 * Flush and release all of the files on the list.
1103 */
1104 while (cnt-- > 0) {
1105 vp = vplist[cnt];
1106 (void) VOP_PUTPAGE(vp, (u_offset_t)0, 0, B_ASYNC, cr, NULL);
1107 VN_RELE(vp);
1108 }
1109
1110 kmem_free(vplist, num * sizeof (vnode_t *));
1111 }
1112
1113 /* Here NFS has access cache stuff (nfs_subr.c) not used here */
1114
1115 /*
1116 * Set or Clear direct I/O flag
1117 * VOP_RWLOCK() is held for write access to prevent a race condition
1118 * which would occur if a process is in the middle of a write when
1119 * directio flag gets set. It is possible that all pages may not get flushed.
1120 * From nfs_common.c
1121 */
1122
1123 /* ARGSUSED */
1124 int
smbfs_directio(vnode_t * vp,int cmd,cred_t * cr)1125 smbfs_directio(vnode_t *vp, int cmd, cred_t *cr)
1126 {
1127 int error = 0;
1128 smbnode_t *np;
1129
1130 np = VTOSMB(vp);
1131
1132 if (cmd == DIRECTIO_ON) {
1133
1134 if (np->r_flags & RDIRECTIO)
1135 return (0);
1136
1137 /*
1138 * Flush the page cache.
1139 */
1140
1141 (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL);
1142
1143 if (np->r_flags & RDIRECTIO) {
1144 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
1145 return (0);
1146 }
1147
1148 /* Here NFS also checks ->r_awcount */
1149 if (vn_has_cached_data(vp) &&
1150 (np->r_flags & RDIRTY) != 0) {
1151 error = VOP_PUTPAGE(vp, (offset_t)0, (uint_t)0,
1152 B_INVAL, cr, NULL);
1153 if (error) {
1154 if (error == ENOSPC || error == EDQUOT) {
1155 mutex_enter(&np->r_statelock);
1156 if (!np->r_error)
1157 np->r_error = error;
1158 mutex_exit(&np->r_statelock);
1159 }
1160 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
1161 return (error);
1162 }
1163 }
1164
1165 mutex_enter(&np->r_statelock);
1166 np->r_flags |= RDIRECTIO;
1167 mutex_exit(&np->r_statelock);
1168 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
1169 return (0);
1170 }
1171
1172 if (cmd == DIRECTIO_OFF) {
1173 mutex_enter(&np->r_statelock);
1174 np->r_flags &= ~RDIRECTIO; /* disable direct mode */
1175 mutex_exit(&np->r_statelock);
1176 return (0);
1177 }
1178
1179 return (EINVAL);
1180 }
1181
1182 static kmutex_t smbfs_newnum_lock;
1183 static uint32_t smbfs_newnum_val = 0;
1184
1185 /*
1186 * Return a number 0..0xffffffff that's different from the last
1187 * 0xffffffff numbers this returned. Used for unlinked files.
1188 * From NFS nfs_subr.c newnum
1189 */
1190 uint32_t
smbfs_newnum(void)1191 smbfs_newnum(void)
1192 {
1193 uint32_t id;
1194
1195 mutex_enter(&smbfs_newnum_lock);
1196 if (smbfs_newnum_val == 0)
1197 smbfs_newnum_val = (uint32_t)gethrestime_sec();
1198 id = smbfs_newnum_val++;
1199 mutex_exit(&smbfs_newnum_lock);
1200 return (id);
1201 }
1202
1203 /*
1204 * Fill in a temporary name at buf
1205 */
1206 int
smbfs_newname(char * buf,size_t buflen)1207 smbfs_newname(char *buf, size_t buflen)
1208 {
1209 uint_t id;
1210 int n;
1211
1212 id = smbfs_newnum();
1213 n = snprintf(buf, buflen, "~$smbfs%08X", id);
1214 return (n);
1215 }
1216
1217
1218 /*
1219 * initialize resources that are used by smbfs_subr.c
1220 * this is called from the _init() routine (by the way of smbfs_clntinit())
1221 *
1222 * From NFS: nfs_subr.c:nfs_subrinit
1223 */
1224 int
smbfs_subrinit(void)1225 smbfs_subrinit(void)
1226 {
1227 ulong_t nsmbnode_max;
1228
1229 /*
1230 * Allocate and initialize the smbnode cache
1231 */
1232 if (nsmbnode <= 0)
1233 nsmbnode = ncsize; /* dnlc.h */
1234 nsmbnode_max = (ulong_t)((kmem_maxavail() >> 2) /
1235 sizeof (struct smbnode));
1236 if (nsmbnode > nsmbnode_max || (nsmbnode == 0 && ncsize == 0)) {
1237 cmn_err(CE_NOTE,
1238 "setting nsmbnode to max value of %ld", nsmbnode_max);
1239 nsmbnode = nsmbnode_max;
1240 }
1241
1242 smbnode_cache = kmem_cache_create("smbnode_cache", sizeof (smbnode_t),
1243 0, NULL, NULL, smbfs_kmem_reclaim, NULL, NULL, 0);
1244
1245 /*
1246 * Initialize the various mutexes and reader/writer locks
1247 */
1248 mutex_init(&smbfreelist_lock, NULL, MUTEX_DEFAULT, NULL);
1249 mutex_init(&smbfs_minor_lock, NULL, MUTEX_DEFAULT, NULL);
1250
1251 /*
1252 * Assign unique major number for all smbfs mounts
1253 */
1254 if ((smbfs_major = getudev()) == -1) {
1255 cmn_err(CE_WARN,
1256 "smbfs: init: can't get unique device number");
1257 smbfs_major = 0;
1258 }
1259 smbfs_minor = 0;
1260
1261 return (0);
1262 }
1263
1264 /*
1265 * free smbfs hash table, etc.
1266 * From NFS: nfs_subr.c:nfs_subrfini
1267 */
1268 void
smbfs_subrfini(void)1269 smbfs_subrfini(void)
1270 {
1271
1272 /*
1273 * Destroy the smbnode cache
1274 */
1275 kmem_cache_destroy(smbnode_cache);
1276
1277 /*
1278 * Destroy the various mutexes and reader/writer locks
1279 */
1280 mutex_destroy(&smbfreelist_lock);
1281 mutex_destroy(&smbfs_minor_lock);
1282 }
1283
1284 /* rddir_cache ? */
1285
1286 /*
1287 * Support functions for smbfs_kmem_reclaim
1288 */
1289
1290 static void
smbfs_node_reclaim(void)1291 smbfs_node_reclaim(void)
1292 {
1293 smbmntinfo_t *mi;
1294 smbnode_t *np;
1295 vnode_t *vp;
1296
1297 mutex_enter(&smbfreelist_lock);
1298 while ((np = smbfreelist) != NULL) {
1299 sn_rmfree(np);
1300 mutex_exit(&smbfreelist_lock);
1301 if (np->r_flags & RHASHED) {
1302 vp = SMBTOV(np);
1303 mi = np->n_mount;
1304 rw_enter(&mi->smi_hash_lk, RW_WRITER);
1305 mutex_enter(&vp->v_lock);
1306 if (vp->v_count > 1) {
1307 VN_RELE_LOCKED(vp);
1308 mutex_exit(&vp->v_lock);
1309 rw_exit(&mi->smi_hash_lk);
1310 mutex_enter(&smbfreelist_lock);
1311 continue;
1312 }
1313 mutex_exit(&vp->v_lock);
1314 sn_rmhash_locked(np);
1315 rw_exit(&mi->smi_hash_lk);
1316 }
1317 /*
1318 * This call to smbfs_addfree will end up destroying the
1319 * smbnode, but in a safe way with the appropriate set
1320 * of checks done.
1321 */
1322 smbfs_addfree(np);
1323 mutex_enter(&smbfreelist_lock);
1324 }
1325 mutex_exit(&smbfreelist_lock);
1326 }
1327
1328 /*
1329 * Called by kmem_cache_alloc ask us if we could
1330 * "Please give back some memory!"
1331 *
1332 * Todo: dump nodes from the free list?
1333 */
1334 /*ARGSUSED*/
1335 void
smbfs_kmem_reclaim(void * cdrarg)1336 smbfs_kmem_reclaim(void *cdrarg)
1337 {
1338 smbfs_node_reclaim();
1339 }
1340
1341 /*
1342 * Here NFS has failover stuff and
1343 * nfs_rw_xxx - see smbfs_rwlock.c
1344 */
1345